Coverage Report

Created: 2025-08-28 06:29

/src/frr/zebra/rule_netlink.c
Line
Count
Source (jump to first uncovered line)
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/*
3
 * Zebra Policy Based Routing (PBR) interaction with the kernel using
4
 * netlink.
5
 * Copyright (C) 2018  Cumulus Networks, Inc.
6
 */
7
8
#include <zebra.h>
9
10
#ifdef HAVE_NETLINK
11
12
#include "if.h"
13
#include "prefix.h"
14
#include "vrf.h"
15
16
#include <linux/fib_rules.h>
17
#include "zebra/zserv.h"
18
#include "zebra/zebra_ns.h"
19
#include "zebra/zebra_vrf.h"
20
#include "zebra/rt.h"
21
#include "zebra/interface.h"
22
#include "zebra/debug.h"
23
#include "zebra/rtadv.h"
24
#include "zebra/kernel_netlink.h"
25
#include "zebra/rule_netlink.h"
26
#include "zebra/zebra_pbr.h"
27
#include "zebra/zebra_errors.h"
28
#include "zebra/zebra_dplane.h"
29
#include "zebra/zebra_trace.h"
30
31
/* definitions */
32
33
/* static function declarations */
34
35
/* Private functions */
36
37
38
/*
39
 * netlink_rule_msg_encode
40
 *
41
 * Encodes netlink RTM_ADDRULE/RTM_DELRULE message to buffer buf of size buflen.
42
 *
43
 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
44
 * or the number of bytes written to buf.
45
 */
46
static ssize_t netlink_rule_msg_encode(
47
  int cmd, const struct zebra_dplane_ctx *ctx, uint32_t filter_bm,
48
  uint32_t priority, uint32_t table, const struct prefix *src_ip,
49
  const struct prefix *dst_ip, uint32_t fwmark, uint8_t dsfield,
50
  uint8_t ip_protocol, void *buf, size_t buflen)
51
0
{
52
0
  uint8_t protocol = RTPROT_ZEBRA;
53
0
  int family;
54
0
  int bytelen;
55
0
  struct {
56
0
    struct nlmsghdr n;
57
0
    struct fib_rule_hdr frh;
58
0
    char buf[];
59
0
  } *req = buf;
60
61
0
  const char *ifname = dplane_ctx_rule_get_ifname(ctx);
62
63
0
  if (buflen < sizeof(*req))
64
0
    return 0;
65
0
  memset(req, 0, sizeof(*req));
66
67
  /* Assume ipv4 if no src/dst set, we only support ipv4/ipv6 */
68
0
  if (PREFIX_FAMILY(src_ip))
69
0
    family = PREFIX_FAMILY(src_ip);
70
0
  else if (PREFIX_FAMILY(dst_ip))
71
0
    family = PREFIX_FAMILY(dst_ip);
72
0
  else
73
0
    family = AF_INET;
74
75
0
  bytelen = (family == AF_INET ? 4 : 16);
76
77
0
  req->n.nlmsg_type = cmd;
78
0
  req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
79
0
  req->n.nlmsg_flags = NLM_F_REQUEST;
80
81
0
  req->frh.family = family;
82
0
  req->frh.action = FR_ACT_TO_TBL;
83
84
0
  if (!nl_attr_put(&req->n, buflen, FRA_PROTOCOL, &protocol,
85
0
       sizeof(protocol)))
86
0
    return 0;
87
88
  /* rule's pref # */
89
0
  if (!nl_attr_put32(&req->n, buflen, FRA_PRIORITY, priority))
90
0
    return 0;
91
92
  /* interface on which applied */
93
0
  if (!nl_attr_put(&req->n, buflen, FRA_IFNAME, ifname,
94
0
       strlen(ifname) + 1))
95
0
    return 0;
96
97
  /* source IP, if specified */
98
0
  if (filter_bm & PBR_FILTER_SRC_IP) {
99
0
    req->frh.src_len = src_ip->prefixlen;
100
0
    if (!nl_attr_put(&req->n, buflen, FRA_SRC, &src_ip->u.prefix,
101
0
         bytelen))
102
0
      return 0;
103
0
  }
104
105
  /* destination IP, if specified */
106
0
  if (filter_bm & PBR_FILTER_DST_IP) {
107
0
    req->frh.dst_len = dst_ip->prefixlen;
108
0
    if (!nl_attr_put(&req->n, buflen, FRA_DST, &dst_ip->u.prefix,
109
0
         bytelen))
110
0
      return 0;
111
0
  }
112
113
  /* fwmark, if specified */
114
0
  if (filter_bm & PBR_FILTER_FWMARK) {
115
0
    if (!nl_attr_put32(&req->n, buflen, FRA_FWMARK, fwmark))
116
0
      return 0;
117
0
  }
118
119
  /* dsfield, if specified; mask off the ECN bits */
120
0
  if (filter_bm & PBR_FILTER_DSFIELD)
121
0
    req->frh.tos = dsfield & 0xfc;
122
123
  /* protocol to match on */
124
0
  if (filter_bm & PBR_FILTER_IP_PROTOCOL)
125
0
    nl_attr_put8(&req->n, buflen, FRA_IP_PROTO, ip_protocol);
126
127
  /* Route table to use to forward, if filter criteria matches. */
128
0
  if (table < 256)
129
0
    req->frh.table = table;
130
0
  else {
131
0
    req->frh.table = RT_TABLE_UNSPEC;
132
0
    if (!nl_attr_put32(&req->n, buflen, FRA_TABLE, table))
133
0
      return 0;
134
0
  }
135
136
0
  if (IS_ZEBRA_DEBUG_KERNEL)
137
0
    zlog_debug(
138
0
      "Tx %s family %s IF %s Pref %u Fwmark %u Src %pFX Dst %pFX Table %u",
139
0
      nl_msg_type_to_str(cmd), nl_family_to_str(family),
140
0
      ifname, priority, fwmark, src_ip, dst_ip, table);
141
142
0
  return NLMSG_ALIGN(req->n.nlmsg_len);
143
0
}
144
145
static ssize_t netlink_rule_msg_encoder(struct zebra_dplane_ctx *ctx, void *buf,
146
          size_t buflen)
147
0
{
148
0
  int cmd = RTM_NEWRULE;
149
150
0
  if (dplane_ctx_get_op(ctx) == DPLANE_OP_RULE_DELETE)
151
0
    cmd = RTM_DELRULE;
152
153
0
  return netlink_rule_msg_encode(
154
0
    cmd, ctx, dplane_ctx_rule_get_filter_bm(ctx),
155
0
    dplane_ctx_rule_get_priority(ctx),
156
0
    dplane_ctx_rule_get_table(ctx), dplane_ctx_rule_get_src_ip(ctx),
157
0
    dplane_ctx_rule_get_dst_ip(ctx),
158
0
    dplane_ctx_rule_get_fwmark(ctx),
159
0
    dplane_ctx_rule_get_dsfield(ctx),
160
0
    dplane_ctx_rule_get_ipproto(ctx), buf, buflen);
161
0
}
162
163
static ssize_t netlink_oldrule_msg_encoder(struct zebra_dplane_ctx *ctx,
164
             void *buf, size_t buflen)
165
0
{
166
0
  return netlink_rule_msg_encode(
167
0
    RTM_DELRULE, ctx, dplane_ctx_rule_get_old_filter_bm(ctx),
168
0
    dplane_ctx_rule_get_old_priority(ctx),
169
0
    dplane_ctx_rule_get_old_table(ctx),
170
0
    dplane_ctx_rule_get_old_src_ip(ctx),
171
0
    dplane_ctx_rule_get_old_dst_ip(ctx),
172
0
    dplane_ctx_rule_get_old_fwmark(ctx),
173
0
    dplane_ctx_rule_get_old_dsfield(ctx),
174
0
    dplane_ctx_rule_get_old_ipproto(ctx), buf, buflen);
175
0
}
176
177
/* Public functions */
178
179
enum netlink_msg_status
180
netlink_put_rule_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
181
0
{
182
0
  enum dplane_op_e op;
183
0
  enum netlink_msg_status ret;
184
185
0
  op = dplane_ctx_get_op(ctx);
186
0
  if (!(op == DPLANE_OP_RULE_ADD || op == DPLANE_OP_RULE_UPDATE
187
0
        || op == DPLANE_OP_RULE_DELETE)) {
188
0
    flog_err(
189
0
      EC_ZEBRA_PBR_RULE_UPDATE,
190
0
      "Context received for kernel rule update with incorrect OP code (%u)",
191
0
      op);
192
0
    return FRR_NETLINK_ERROR;
193
0
  }
194
195
0
  ret = netlink_batch_add_msg(bth, ctx, netlink_rule_msg_encoder, false);
196
197
  /**
198
   * Delete the old one.
199
   *
200
   * Don't care about this result right?
201
   */
202
0
  if (op == DPLANE_OP_RULE_UPDATE)
203
0
    netlink_batch_add_msg(bth, ctx, netlink_oldrule_msg_encoder,
204
0
              true);
205
206
0
  return ret;
207
0
}
208
209
/*
210
 * Handle netlink notification informing a rule add or delete.
211
 * Handling of an ADD is TBD.
212
 * DELs are notified up, if other attributes indicate it may be a
213
 * notification of interest. The expectation is that if this corresponds
214
 * to a PBR rule added by FRR, it will be readded.
215
 *
216
 * If startup and we see a rule we created, delete it as its leftover
217
 * from a previous instance and should have been removed on shutdown.
218
 *
219
 */
220
int netlink_rule_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
221
0
{
222
0
  struct zebra_ns *zns;
223
0
  struct fib_rule_hdr *frh;
224
0
  struct rtattr *tb[FRA_MAX + 1];
225
0
  int len;
226
0
  char *ifname;
227
0
  struct zebra_pbr_rule rule = {};
228
0
  uint8_t proto = 0;
229
0
  uint8_t ip_proto = 0;
230
231
0
  frrtrace(3, frr_zebra, netlink_rule_change, h, ns_id, startup);
232
233
  /* Basic validation followed by extracting attributes. */
234
0
  if (h->nlmsg_type != RTM_NEWRULE && h->nlmsg_type != RTM_DELRULE)
235
0
    return 0;
236
237
0
  len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
238
0
  if (len < 0) {
239
0
    zlog_err(
240
0
      "%s: Message received from netlink is of a broken size: %d %zu",
241
0
      __func__, h->nlmsg_len,
242
0
      (size_t)NLMSG_LENGTH(sizeof(struct fib_rule_hdr)));
243
0
    return -1;
244
0
  }
245
246
0
  frh = NLMSG_DATA(h);
247
248
0
  if (frh->family != AF_INET && frh->family != AF_INET6) {
249
0
    if (frh->family == RTNL_FAMILY_IPMR
250
0
        || frh->family == RTNL_FAMILY_IP6MR) {
251
0
      if (IS_ZEBRA_DEBUG_KERNEL)
252
0
        zlog_debug(
253
0
          "Received rule netlink that we are ignoring for family %u, rule change: %u",
254
0
          frh->family, h->nlmsg_type);
255
0
      return 0;
256
0
    }
257
0
    flog_warn(
258
0
      EC_ZEBRA_NETLINK_INVALID_AF,
259
0
      "Invalid address family: %u received from kernel rule change: %u",
260
0
      frh->family, h->nlmsg_type);
261
0
    return 0;
262
0
  }
263
0
  if (frh->action != FR_ACT_TO_TBL)
264
0
    return 0;
265
266
0
  memset(tb, 0, sizeof(tb));
267
0
  netlink_parse_rtattr(tb, FRA_MAX, RTM_RTA(frh), len);
268
269
0
  if (tb[FRA_PRIORITY])
270
0
    rule.rule.priority = *(uint32_t *)RTA_DATA(tb[FRA_PRIORITY]);
271
272
0
  if (tb[FRA_SRC]) {
273
0
    if (frh->family == AF_INET)
274
0
      memcpy(&rule.rule.filter.src_ip.u.prefix4,
275
0
             RTA_DATA(tb[FRA_SRC]), 4);
276
0
    else
277
0
      memcpy(&rule.rule.filter.src_ip.u.prefix6,
278
0
             RTA_DATA(tb[FRA_SRC]), 16);
279
0
    rule.rule.filter.src_ip.prefixlen = frh->src_len;
280
0
    rule.rule.filter.src_ip.family = frh->family;
281
0
    rule.rule.filter.filter_bm |= PBR_FILTER_SRC_IP;
282
0
  }
283
284
0
  if (tb[FRA_DST]) {
285
0
    if (frh->family == AF_INET)
286
0
      memcpy(&rule.rule.filter.dst_ip.u.prefix4,
287
0
             RTA_DATA(tb[FRA_DST]), 4);
288
0
    else
289
0
      memcpy(&rule.rule.filter.dst_ip.u.prefix6,
290
0
             RTA_DATA(tb[FRA_DST]), 16);
291
0
    rule.rule.filter.dst_ip.prefixlen = frh->dst_len;
292
0
    rule.rule.filter.dst_ip.family = frh->family;
293
0
    rule.rule.filter.filter_bm |= PBR_FILTER_DST_IP;
294
0
  }
295
296
0
  if (tb[FRA_TABLE])
297
0
    rule.rule.action.table = *(uint32_t *)RTA_DATA(tb[FRA_TABLE]);
298
0
  else
299
0
    rule.rule.action.table = frh->table;
300
301
  /* TBD: We don't care about rules not specifying an IIF. */
302
0
  if (tb[FRA_IFNAME] == NULL)
303
0
    return 0;
304
305
0
  if (tb[FRA_PROTOCOL])
306
0
    proto = *(uint8_t *)RTA_DATA(tb[FRA_PROTOCOL]);
307
308
0
  if (tb[FRA_IP_PROTO])
309
0
    ip_proto = *(uint8_t *)RTA_DATA(tb[FRA_IP_PROTO]);
310
311
0
  ifname = (char *)RTA_DATA(tb[FRA_IFNAME]);
312
0
  strlcpy(rule.ifname, ifname, sizeof(rule.ifname));
313
314
0
  if (h->nlmsg_type == RTM_NEWRULE) {
315
    /*
316
     * If we see a rule at startup we created, delete it now.
317
     * It should have been flushed on a previous shutdown.
318
     */
319
0
    if (startup && proto == RTPROT_ZEBRA) {
320
0
      enum zebra_dplane_result ret;
321
322
0
      ret = dplane_pbr_rule_delete(&rule);
323
324
0
      zlog_debug(
325
0
        "%s: %s leftover rule: family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
326
0
        __func__,
327
0
        ((ret == ZEBRA_DPLANE_REQUEST_FAILURE)
328
0
           ? "Failed to remove"
329
0
           : "Removed"),
330
0
        nl_family_to_str(frh->family), rule.ifname,
331
0
        rule.rule.priority, &rule.rule.filter.src_ip,
332
0
        &rule.rule.filter.dst_ip,
333
0
        rule.rule.action.table, ip_proto);
334
0
    }
335
336
    /* TBD */
337
0
    return 0;
338
0
  }
339
340
0
  zns = zebra_ns_lookup(ns_id);
341
342
  /* If we don't know the interface, we don't care. */
343
0
  if (!if_lookup_by_name_per_ns(zns, ifname))
344
0
    return 0;
345
346
0
  if (IS_ZEBRA_DEBUG_KERNEL)
347
0
    zlog_debug(
348
0
      "Rx %s family %s IF %s Pref %u Src %pFX Dst %pFX Table %u ip-proto: %u",
349
0
      nl_msg_type_to_str(h->nlmsg_type),
350
0
      nl_family_to_str(frh->family), rule.ifname,
351
0
      rule.rule.priority, &rule.rule.filter.src_ip,
352
0
      &rule.rule.filter.dst_ip, rule.rule.action.table,
353
0
      ip_proto);
354
355
0
  return kernel_pbr_rule_del(&rule);
356
0
}
357
358
/*
359
 * Request rules from the kernel
360
 */
361
static int netlink_request_rules(struct zebra_ns *zns, int family, int type)
362
0
{
363
0
  struct {
364
0
    struct nlmsghdr n;
365
0
    struct fib_rule_hdr frh;
366
0
    char buf[NL_PKT_BUF_SIZE];
367
0
  } req;
368
369
0
  memset(&req, 0, sizeof(req));
370
0
  req.n.nlmsg_type = type;
371
0
  req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
372
0
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct fib_rule_hdr));
373
0
  req.frh.family = family;
374
375
0
  return netlink_request(&zns->netlink_cmd, &req);
376
0
}
377
378
/*
379
 * Get to know existing PBR rules in the kernel - typically called at startup.
380
 */
381
int netlink_rules_read(struct zebra_ns *zns)
382
0
{
383
0
  int ret;
384
0
  struct zebra_dplane_info dp_info;
385
386
0
  zebra_dplane_info_from_zns(&dp_info, zns, true);
387
388
0
  ret = netlink_request_rules(zns, AF_INET, RTM_GETRULE);
389
0
  if (ret < 0)
390
0
    return ret;
391
392
0
  ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
393
0
         &dp_info, 0, true);
394
0
  if (ret < 0)
395
0
    return ret;
396
397
0
  ret = netlink_request_rules(zns, AF_INET6, RTM_GETRULE);
398
0
  if (ret < 0)
399
0
    return ret;
400
401
0
  ret = netlink_parse_info(netlink_rule_change, &zns->netlink_cmd,
402
0
         &dp_info, 0, true);
403
0
  return ret;
404
0
}
405
406
#endif /* HAVE_NETLINK */