/src/frr/zebra/rule_netlink.c
Line | Count | Source (jump to first uncovered line) |
1 | | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | | /* |
3 | | * Zebra Policy Based Routing (PBR) interaction with the kernel using |
4 | | * netlink. |
5 | | * Copyright (C) 2018 Cumulus Networks, Inc. |
6 | | */ |
7 | | |
8 | | #include <zebra.h> |
9 | | |
10 | | #ifdef HAVE_NETLINK |
11 | | |
12 | | #include "if.h" |
13 | | #include "prefix.h" |
14 | | #include "vrf.h" |
15 | | |
16 | | #include <linux/fib_rules.h> |
17 | | #include "zebra/zserv.h" |
18 | | #include "zebra/zebra_ns.h" |
19 | | #include "zebra/zebra_vrf.h" |
20 | | #include "zebra/rt.h" |
21 | | #include "zebra/interface.h" |
22 | | #include "zebra/debug.h" |
23 | | #include "zebra/rtadv.h" |
24 | | #include "zebra/kernel_netlink.h" |
25 | | #include "zebra/rule_netlink.h" |
26 | | #include "zebra/zebra_pbr.h" |
27 | | #include "zebra/zebra_errors.h" |
28 | | #include "zebra/zebra_dplane.h" |
29 | | #include "zebra/zebra_trace.h" |
30 | | |
31 | | /* definitions */ |
32 | | |
33 | | /* static function declarations */ |
34 | | |
35 | | /* Private functions */ |
36 | | |
37 | | |
38 | | /* |
39 | | * netlink_rule_msg_encode |
40 | | * |
41 | | * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen. |
42 | | * |
43 | | * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer |
44 | | * or the number of bytes written to buf. |
45 | | */ |
46 | | static ssize_t netlink_rule_msg_encode( |
47 | | int cmd, const struct zebra_dplane_ctx *ctx, uint32_t filter_bm, |
48 | | uint32_t priority, uint32_t table, const struct prefix *src_ip, |
49 | | const struct prefix *dst_ip, uint32_t fwmark, uint8_t dsfield, |
50 | | uint8_t ip_protocol, void *buf, size_t buflen) |
51 | 0 | { |
52 | 0 | uint8_t protocol = RTPROT_ZEBRA; |
53 | 0 | int family; |
54 | 0 | int bytelen; |
55 | 0 | struct { |
56 | 0 | struct nlmsghdr n; |
57 | 0 | struct fib_rule_hdr frh; |
58 | 0 | char buf[]; |
59 | 0 | } *req = buf; |
60 | |
|
61 | 0 | const char *ifname = dplane_ctx_rule_get_ifname(ctx); |
62 | |
|
63 | 0 | if (buflen < sizeof(*req)) |
64 | 0 | return 0; |
65 | 0 | memset(req, 0, sizeof(*req)); |
66 | | |
67 | | /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */ |
68 | 0 | if (PREFIX_FAMILY(src_ip)) |
69 | 0 | family = PREFIX_FAMILY(src_ip); |
70 | 0 | else if (PREFIX_FAMILY(dst_ip)) |
71 | 0 | family = PREFIX_FAMILY(dst_ip); |
72 | 0 | else |
73 | 0 | family = AF_INET; |
74 | |
|
75 | 0 | bytelen = (family == AF_INET ? 4 : 16); |
76 | |
|
77 | 0 | req->n.nlmsg_type = cmd; |
78 | 0 | req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg)); |
79 | 0 | req->n.nlmsg_flags = NLM_F_REQUEST; |
80 | |
|
81 | 0 | req->frh.family = family; |
82 | 0 | req->frh.action = FR_ACT_TO_TBL; |
83 | |
|
84 | 0 | if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol, |
85 | 0 | sizeof(protocol))) |
86 | 0 | return 0; |
87 | | |
88 | | /* rule's pref # */ |
89 | 0 | if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority)) |
90 | 0 | return 0; |
91 | | |
92 | | /* interface on which applied */ |
93 | 0 | if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname, |
94 | 0 | strlen(ifname) + 1)) |
95 | 0 | return 0; |
96 | | |
97 | | /* source IP, if specified */ |
98 | 0 | if (filter_bm & PBR_FILTER_SRC_IP) { |
99 | 0 | req->frh.src_len = src_ip->prefixlen; |
100 | 0 | if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix, |
101 | 0 | bytelen)) |
102 | 0 | return 0; |
103 | 0 | } |
104 | | |
105 | | /* destination IP, if specified */ |
106 | 0 | if (filter_bm & PBR_FILTER_DST_IP) { |
107 | 0 | req->frh.dst_len = dst_ip->prefixlen; |
108 | 0 | if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix, |
109 | 0 | bytelen)) |
110 | 0 | return 0; |
111 | 0 | } |
112 | | |
113 | | /* fwmark, if specified */ |
114 | 0 | if (filter_bm & PBR_FILTER_FWMARK) { |
115 | 0 | if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark)) |
116 | 0 | return 0; |
117 | 0 | } |
118 | | |
119 | | /* dsfield, if specified; mask off the ECN bits */ |
120 | 0 | if (filter_bm & PBR_FILTER_DSFIELD) |
121 | 0 | req->frh.tos = dsfield & 0xfc; |
122 | | |
123 | | /* protocol to match on */ |
124 | 0 | if (filter_bm & PBR_FILTER_IP_PROTOCOL) |
125 | 0 | nl_attr_put8(&req->n, buflen, FRA_IP_PROTO, ip_protocol); |
126 | | |
127 | | /* Route table to use to forward, if filter criteria matches. */ |
128 | 0 | if (table < 256) |
129 | 0 | req->frh.table = table; |
130 | 0 | else { |
131 | 0 | req->frh.table = RT_TABLE_UNSPEC; |
132 | 0 | if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table)) |
133 | 0 | return 0; |
134 | 0 | } |
135 | | |
136 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
137 | 0 | zlog_debug( |
138 | 0 | "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u", |
139 | 0 | nl_msg_type_to_str(cmd), nl_family_to_str(family), |
140 | 0 | ifname, priority, fwmark, src_ip, dst_ip, table); |
141 | |
|
142 | 0 | return NLMSG_ALIGN(req->n.nlmsg_len); |
143 | 0 | } |
144 | | |
145 | | static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf, |
146 | | size_t buflen) |
147 | 0 | { |
148 | 0 | int cmd = RTM_NEWRULE; |
149 | |
|
150 | 0 | if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE) |
151 | 0 | cmd = RTM_DELRULE; |
152 | |
|
153 | 0 | return netlink_rule_msg_encode( |
154 | 0 | cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx), |
155 | 0 | dplane_ctx_rule_get_priority(ctx), |
156 | 0 | dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx), |
157 | 0 | dplane_ctx_rule_get_dst_ip(ctx), |
158 | 0 | dplane_ctx_rule_get_fwmark(ctx), |
159 | 0 | dplane_ctx_rule_get_dsfield(ctx), |
160 | 0 | dplane_ctx_rule_get_ipproto(ctx), buf, buflen); |
161 | 0 | } |
162 | | |
163 | | static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx, |
164 | | void *buf, size_t buflen) |
165 | 0 | { |
166 | 0 | return netlink_rule_msg_encode( |
167 | 0 | RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx), |
168 | 0 | dplane_ctx_rule_get_old_priority(ctx), |
169 | 0 | dplane_ctx_rule_get_old_table(ctx), |
170 | 0 | dplane_ctx_rule_get_old_src_ip(ctx), |
171 | 0 | dplane_ctx_rule_get_old_dst_ip(ctx), |
172 | 0 | dplane_ctx_rule_get_old_fwmark(ctx), |
173 | 0 | dplane_ctx_rule_get_old_dsfield(ctx), |
174 | 0 | dplane_ctx_rule_get_old_ipproto(ctx), buf, buflen); |
175 | 0 | } |
176 | | |
177 | | /* Public functions */ |
178 | | |
179 | | enum netlink_msg_status |
180 | | netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx) |
181 | 0 | { |
182 | 0 | enum dplane_op_e op; |
183 | 0 | enum netlink_msg_status ret; |
184 | |
|
185 | 0 | op = dplane_ctx_get_op(ctx); |
186 | 0 | if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE |
187 | 0 | || op == DPLANE_OP_RULE_DELETE)) { |
188 | 0 | flog_err( |
189 | 0 | EC_ZEBRA_PBR_RULE_UPDATE, |
190 | 0 | "Context received for kernel rule update with incorrect OP code (%u)", |
191 | 0 | op); |
192 | 0 | return FRR_NETLINK_ERROR; |
193 | 0 | } |
194 | | |
195 | 0 | ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false); |
196 | | |
197 | | /** |
198 | | * Delete the old one. |
199 | | * |
200 | | * Don't care about this result right? |
201 | | */ |
202 | 0 | if (op == DPLANE_OP_RULE_UPDATE) |
203 | 0 | netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder, |
204 | 0 | true); |
205 | |
|
206 | 0 | return ret; |
207 | 0 | } |
208 | | |
209 | | /* |
210 | | * Handle netlink notification informing a rule add or delete. |
211 | | * Handling of an ADD is TBD. |
212 | | * DELs are notified up, if other attributes indicate it may be a |
213 | | * notification of interest. The expectation is that if this corresponds |
214 | | * to a PBR rule added by FRR, it will be readded. |
215 | | * |
216 | | * If startup and we see a rule we created, delete it as its leftover |
217 | | * from a previous instance and should have been removed on shutdown. |
218 | | * |
219 | | */ |
220 | | int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup) |
221 | 0 | { |
222 | 0 | struct zebra_ns *zns; |
223 | 0 | struct fib_rule_hdr *frh; |
224 | 0 | struct rtattr *tb[FRA_MAX + 1]; |
225 | 0 | int len; |
226 | 0 | char *ifname; |
227 | 0 | struct zebra_pbr_rule rule = {}; |
228 | 0 | uint8_t proto = 0; |
229 | 0 | uint8_t ip_proto = 0; |
230 | |
|
231 | 0 | frrtrace(3, frr_zebra, netlink_rule_change, h, ns_id, startup); |
232 | | |
233 | | /* Basic validation followed by extracting attributes. */ |
234 | 0 | if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE) |
235 | 0 | return 0; |
236 | | |
237 | 0 | len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); |
238 | 0 | if (len < 0) { |
239 | 0 | zlog_err( |
240 | 0 | "%s: Message received from netlink is of a broken size: %d %zu", |
241 | 0 | __func__, h->nlmsg_len, |
242 | 0 | (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr))); |
243 | 0 | return -1; |
244 | 0 | } |
245 | | |
246 | 0 | frh = NLMSG_DATA(h); |
247 | |
|
248 | 0 | if (frh->family != AF_INET && frh->family != AF_INET6) { |
249 | 0 | if (frh->family == RTNL_FAMILY_IPMR |
250 | 0 | || frh->family == RTNL_FAMILY_IP6MR) { |
251 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
252 | 0 | zlog_debug( |
253 | 0 | "Received rule netlink that we are ignoring for family %u, rule change: %u", |
254 | 0 | frh->family, h->nlmsg_type); |
255 | 0 | return 0; |
256 | 0 | } |
257 | 0 | flog_warn( |
258 | 0 | EC_ZEBRA_NETLINK_INVALID_AF, |
259 | 0 | "Invalid address family: %u received from kernel rule change: %u", |
260 | 0 | frh->family, h->nlmsg_type); |
261 | 0 | return 0; |
262 | 0 | } |
263 | 0 | if (frh->action != FR_ACT_TO_TBL) |
264 | 0 | return 0; |
265 | | |
266 | 0 | memset(tb, 0, sizeof(tb)); |
267 | 0 | netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len); |
268 | |
|
269 | 0 | if (tb[FRA_PRIORITY]) |
270 | 0 | rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]); |
271 | |
|
272 | 0 | if (tb[FRA_SRC]) { |
273 | 0 | if (frh->family == AF_INET) |
274 | 0 | memcpy(&rule.rule.filter.src_ip.u.prefix4, |
275 | 0 | RTA_DATA(tb[FRA_SRC]), 4); |
276 | 0 | else |
277 | 0 | memcpy(&rule.rule.filter.src_ip.u.prefix6, |
278 | 0 | RTA_DATA(tb[FRA_SRC]), 16); |
279 | 0 | rule.rule.filter.src_ip.prefixlen = frh->src_len; |
280 | 0 | rule.rule.filter.src_ip.family = frh->family; |
281 | 0 | rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP; |
282 | 0 | } |
283 | |
|
284 | 0 | if (tb[FRA_DST]) { |
285 | 0 | if (frh->family == AF_INET) |
286 | 0 | memcpy(&rule.rule.filter.dst_ip.u.prefix4, |
287 | 0 | RTA_DATA(tb[FRA_DST]), 4); |
288 | 0 | else |
289 | 0 | memcpy(&rule.rule.filter.dst_ip.u.prefix6, |
290 | 0 | RTA_DATA(tb[FRA_DST]), 16); |
291 | 0 | rule.rule.filter.dst_ip.prefixlen = frh->dst_len; |
292 | 0 | rule.rule.filter.dst_ip.family = frh->family; |
293 | 0 | rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP; |
294 | 0 | } |
295 | |
|
296 | 0 | if (tb[FRA_TABLE]) |
297 | 0 | rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]); |
298 | 0 | else |
299 | 0 | rule.rule.action.table = frh->table; |
300 | | |
301 | | /* TBD: We don't care about rules not specifying an IIF. */ |
302 | 0 | if (tb[FRA_IFNAME] == NULL) |
303 | 0 | return 0; |
304 | | |
305 | 0 | if (tb[FRA_PROTOCOL]) |
306 | 0 | proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]); |
307 | |
|
308 | 0 | if (tb[FRA_IP_PROTO]) |
309 | 0 | ip_proto = *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]); |
310 | |
|
311 | 0 | ifname = (char *)RTA_DATA(tb[FRA_IFNAME]); |
312 | 0 | strlcpy(rule.ifname, ifname, sizeof(rule.ifname)); |
313 | |
|
314 | 0 | if (h->nlmsg_type == RTM_NEWRULE) { |
315 | | /* |
316 | | * If we see a rule at startup we created, delete it now. |
317 | | * It should have been flushed on a previous shutdown. |
318 | | */ |
319 | 0 | if (startup && proto == RTPROT_ZEBRA) { |
320 | 0 | enum zebra_dplane_result ret; |
321 | |
|
322 | 0 | ret = dplane_pbr_rule_delete(&rule); |
323 | |
|
324 | 0 | zlog_debug( |
325 | 0 | "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", |
326 | 0 | __func__, |
327 | 0 | ((ret == ZEBRA_DPLANE_REQUEST_FAILURE) |
328 | 0 | ? "Failed to remove" |
329 | 0 | : "Removed"), |
330 | 0 | nl_family_to_str(frh->family), rule.ifname, |
331 | 0 | rule.rule.priority, &rule.rule.filter.src_ip, |
332 | 0 | &rule.rule.filter.dst_ip, |
333 | 0 | rule.rule.action.table, ip_proto); |
334 | 0 | } |
335 | | |
336 | | /* TBD */ |
337 | 0 | return 0; |
338 | 0 | } |
339 | | |
340 | 0 | zns = zebra_ns_lookup(ns_id); |
341 | | |
342 | | /* If we don't know the interface, we don't care. */ |
343 | 0 | if (!if_lookup_by_name_per_ns(zns, ifname)) |
344 | 0 | return 0; |
345 | | |
346 | 0 | if (IS_ZEBRA_DEBUG_KERNEL) |
347 | 0 | zlog_debug( |
348 | 0 | "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u", |
349 | 0 | nl_msg_type_to_str(h->nlmsg_type), |
350 | 0 | nl_family_to_str(frh->family), rule.ifname, |
351 | 0 | rule.rule.priority, &rule.rule.filter.src_ip, |
352 | 0 | &rule.rule.filter.dst_ip, rule.rule.action.table, |
353 | 0 | ip_proto); |
354 | |
|
355 | 0 | return kernel_pbr_rule_del(&rule); |
356 | 0 | } |
357 | | |
358 | | /* |
359 | | * Request rules from the kernel |
360 | | */ |
361 | | static int netlink_request_rules(struct zebra_ns *zns, int family, int type) |
362 | 0 | { |
363 | 0 | struct { |
364 | 0 | struct nlmsghdr n; |
365 | 0 | struct fib_rule_hdr frh; |
366 | 0 | char buf[NL_PKT_BUF_SIZE]; |
367 | 0 | } req; |
368 | |
|
369 | 0 | memset(&req, 0, sizeof(req)); |
370 | 0 | req.n.nlmsg_type = type; |
371 | 0 | req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST; |
372 | 0 | req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)); |
373 | 0 | req.frh.family = family; |
374 | |
|
375 | 0 | return netlink_request(&zns->netlink_cmd, &req); |
376 | 0 | } |
377 | | |
378 | | /* |
379 | | * Get to know existing PBR rules in the kernel - typically called at startup. |
380 | | */ |
381 | | int netlink_rules_read(struct zebra_ns *zns) |
382 | 0 | { |
383 | 0 | int ret; |
384 | 0 | struct zebra_dplane_info dp_info; |
385 | |
|
386 | 0 | zebra_dplane_info_from_zns(&dp_info, zns, true); |
387 | |
|
388 | 0 | ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE); |
389 | 0 | if (ret < 0) |
390 | 0 | return ret; |
391 | | |
392 | 0 | ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, |
393 | 0 | &dp_info, 0, true); |
394 | 0 | if (ret < 0) |
395 | 0 | return ret; |
396 | | |
397 | 0 | ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE); |
398 | 0 | if (ret < 0) |
399 | 0 | return ret; |
400 | | |
401 | 0 | ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd, |
402 | 0 | &dp_info, 0, true); |
403 | 0 | return ret; |
404 | 0 | } |
405 | | |
406 | | #endif /* HAVE_NETLINK */ |