Coverage Report

Created: 2026-01-01 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/frr/zebra/rt_netlink.c
Line
Count
Source
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/* Kernel routing table updates using netlink over GNU/Linux system.
3
 * Copyright (C) 1997, 98, 99 Kunihiro Ishiguro
4
 */
5
6
#include <zebra.h>
7
8
#ifdef HAVE_NETLINK
9
10
/* The following definition is to workaround an issue in the Linux kernel
11
 * header files with redefinition of 'struct in6_addr' in both
12
 * netinet/in.h and linux/in6.h.
13
 * Reference - https://sourceware.org/ml/libc-alpha/2013-01/msg00599.html
14
 */
15
#define _LINUX_IN6_H
16
17
#include <net/if_arp.h>
18
#include <linux/lwtunnel.h>
19
#include <linux/mpls_iptunnel.h>
20
#include <linux/seg6_iptunnel.h>
21
#include <linux/seg6_local.h>
22
#include <linux/neighbour.h>
23
#include <linux/rtnetlink.h>
24
#include <linux/nexthop.h>
25
26
/* Hack for GNU libc version 2. */
27
#ifndef MSG_TRUNC
28
#define MSG_TRUNC      0x20
29
#endif /* MSG_TRUNC */
30
31
#include "linklist.h"
32
#include "if.h"
33
#include "log.h"
34
#include "prefix.h"
35
#include "plist.h"
36
#include "plist_int.h"
37
#include "connected.h"
38
#include "table.h"
39
#include "memory.h"
40
#include "rib.h"
41
#include "frrevent.h"
42
#include "privs.h"
43
#include "nexthop.h"
44
#include "vrf.h"
45
#include "vty.h"
46
#include "mpls.h"
47
#include "vxlan.h"
48
#include "printfrr.h"
49
50
#include "zebra/zapi_msg.h"
51
#include "zebra/zebra_ns.h"
52
#include "zebra/zebra_vrf.h"
53
#include "zebra/rt.h"
54
#include "zebra/redistribute.h"
55
#include "zebra/interface.h"
56
#include "zebra/debug.h"
57
#include "zebra/rtadv.h"
58
#include "zebra/zebra_ptm.h"
59
#include "zebra/zebra_mpls.h"
60
#include "zebra/kernel_netlink.h"
61
#include "zebra/rt_netlink.h"
62
#include "zebra/zebra_nhg.h"
63
#include "zebra/zebra_mroute.h"
64
#include "zebra/zebra_vxlan.h"
65
#include "zebra/zebra_errors.h"
66
#include "zebra/zebra_evpn_mh.h"
67
#include "zebra/zebra_trace.h"
68
#include "zebra/zebra_neigh.h"
69
70
#ifndef AF_MPLS
71
#define AF_MPLS 28
72
#endif
73
74
/* Re-defining as I am unable to include <linux/if_bridge.h> which has the
75
 * UAPI for MAC sync. */
76
#ifndef _UAPI_LINUX_IF_BRIDGE_H
77
#define BR_SPH_LIST_SIZE 10
78
#endif
79
80
static vlanid_t filter_vlan = 0;
81
82
/* We capture whether the current kernel supports nexthop ids; by
83
 * default, we'll use them if possible. There's also a configuration
84
 * available to _disable_ use of kernel nexthops.
85
 */
86
static bool supports_nh;
87
88
struct gw_family_t {
89
  uint16_t filler;
90
  uint16_t family;
91
  union g_addr gate;
92
};
93
94
static const char ipv4_ll_buf[16] = "169.254.0.1";
95
static struct in_addr ipv4_ll;
96
97
/* Is this a ipv4 over ipv6 route? */
98
static bool is_route_v4_over_v6(unsigned char rtm_family,
99
        enum nexthop_types_t nexthop_type)
100
0
{
101
0
  if (rtm_family == AF_INET
102
0
      && (nexthop_type == NEXTHOP_TYPE_IPV6
103
0
    || nexthop_type == NEXTHOP_TYPE_IPV6_IFINDEX))
104
0
    return true;
105
106
0
  return false;
107
0
}
108
109
/* Helper to control use of kernel-level nexthop ids */
110
static bool kernel_nexthops_supported(void)
111
0
{
112
0
  return (supports_nh && !vrf_is_backend_netns()
113
0
    && zebra_nhg_kernel_nexthops_enabled());
114
0
}
115
116
/*
117
 * Some people may only want to use NHGs created by protos and not
118
 * implicitly created by Zebra. This check accounts for that.
119
 */
120
static bool proto_nexthops_only(void)
121
0
{
122
0
  return zebra_nhg_proto_nexthops_only();
123
0
}
124
125
/* Is this a proto created NHG? */
126
static bool is_proto_nhg(uint32_t id, int type)
127
0
{
128
  /* If type is available, use it as the source of truth */
129
0
  if (type) {
130
0
    if (type != ZEBRA_ROUTE_NHG)
131
0
      return true;
132
0
    return false;
133
0
  }
134
135
0
  if (id >= ZEBRA_NHG_PROTO_LOWER)
136
0
    return true;
137
138
0
  return false;
139
0
}
140
141
/* Is vni mcast group */
142
static bool is_mac_vni_mcast_group(struct ethaddr *mac, vni_t vni,
143
           struct in_addr grp_addr)
144
0
{
145
0
  if (!vni)
146
0
    return false;
147
148
0
  if (!is_zero_mac(mac))
149
0
    return false;
150
151
0
  if (!IN_MULTICAST(ntohl(grp_addr.s_addr)))
152
0
    return false;
153
154
0
  return true;
155
0
}
156
157
/*
158
 * The ipv4_ll data structure is used for all 5549
159
 * additions to the kernel.  Let's figure out the
160
 * correct value one time instead for every
161
 * install/remove of a 5549 type route
162
 */
163
void rt_netlink_init(void)
164
1
{
165
1
  inet_pton(AF_INET, ipv4_ll_buf, &ipv4_ll);
166
1
}
167
168
/*
169
 * Mapping from dataplane neighbor flags to netlink flags
170
 */
171
static uint8_t neigh_flags_to_netlink(uint8_t dplane_flags)
172
0
{
173
0
  uint8_t flags = 0;
174
175
0
  if (dplane_flags & DPLANE_NTF_EXT_LEARNED)
176
0
    flags |= NTF_EXT_LEARNED;
177
0
  if (dplane_flags & DPLANE_NTF_ROUTER)
178
0
    flags |= NTF_ROUTER;
179
0
  if (dplane_flags & DPLANE_NTF_USE)
180
0
    flags |= NTF_USE;
181
182
0
  return flags;
183
0
}
184
185
/*
186
 * Mapping from dataplane neighbor state to netlink state
187
 */
188
static uint16_t neigh_state_to_netlink(uint16_t dplane_state)
189
0
{
190
0
  uint16_t state = 0;
191
192
0
  if (dplane_state & DPLANE_NUD_REACHABLE)
193
0
    state |= NUD_REACHABLE;
194
0
  if (dplane_state & DPLANE_NUD_STALE)
195
0
    state |= NUD_STALE;
196
0
  if (dplane_state & DPLANE_NUD_NOARP)
197
0
    state |= NUD_NOARP;
198
0
  if (dplane_state & DPLANE_NUD_PROBE)
199
0
    state |= NUD_PROBE;
200
0
  if (dplane_state & DPLANE_NUD_INCOMPLETE)
201
0
    state |= NUD_INCOMPLETE;
202
0
  if (dplane_state & DPLANE_NUD_PERMANENT)
203
0
    state |= NUD_PERMANENT;
204
0
  if (dplane_state & DPLANE_NUD_FAILED)
205
0
    state |= NUD_FAILED;
206
207
0
  return state;
208
0
}
209
210
211
static inline bool is_selfroute(int proto)
212
0
{
213
0
  if ((proto == RTPROT_BGP) || (proto == RTPROT_OSPF)
214
0
      || (proto == RTPROT_ZSTATIC) || (proto == RTPROT_ZEBRA)
215
0
      || (proto == RTPROT_ISIS) || (proto == RTPROT_RIPNG)
216
0
      || (proto == RTPROT_NHRP) || (proto == RTPROT_EIGRP)
217
0
      || (proto == RTPROT_LDP) || (proto == RTPROT_BABEL)
218
0
      || (proto == RTPROT_RIP) || (proto == RTPROT_SHARP)
219
0
      || (proto == RTPROT_PBR) || (proto == RTPROT_OPENFABRIC)
220
0
      || (proto == RTPROT_SRTE)) {
221
0
    return true;
222
0
  }
223
224
0
  return false;
225
0
}
226
227
int zebra2proto(int proto)
228
0
{
229
0
  switch (proto) {
230
0
  case ZEBRA_ROUTE_BABEL:
231
0
    proto = RTPROT_BABEL;
232
0
    break;
233
0
  case ZEBRA_ROUTE_BGP:
234
0
    proto = RTPROT_BGP;
235
0
    break;
236
0
  case ZEBRA_ROUTE_OSPF:
237
0
  case ZEBRA_ROUTE_OSPF6:
238
0
    proto = RTPROT_OSPF;
239
0
    break;
240
0
  case ZEBRA_ROUTE_STATIC:
241
0
    proto = RTPROT_ZSTATIC;
242
0
    break;
243
0
  case ZEBRA_ROUTE_ISIS:
244
0
    proto = RTPROT_ISIS;
245
0
    break;
246
0
  case ZEBRA_ROUTE_RIP:
247
0
    proto = RTPROT_RIP;
248
0
    break;
249
0
  case ZEBRA_ROUTE_RIPNG:
250
0
    proto = RTPROT_RIPNG;
251
0
    break;
252
0
  case ZEBRA_ROUTE_NHRP:
253
0
    proto = RTPROT_NHRP;
254
0
    break;
255
0
  case ZEBRA_ROUTE_EIGRP:
256
0
    proto = RTPROT_EIGRP;
257
0
    break;
258
0
  case ZEBRA_ROUTE_LDP:
259
0
    proto = RTPROT_LDP;
260
0
    break;
261
0
  case ZEBRA_ROUTE_SHARP:
262
0
    proto = RTPROT_SHARP;
263
0
    break;
264
0
  case ZEBRA_ROUTE_PBR:
265
0
    proto = RTPROT_PBR;
266
0
    break;
267
0
  case ZEBRA_ROUTE_OPENFABRIC:
268
0
    proto = RTPROT_OPENFABRIC;
269
0
    break;
270
0
  case ZEBRA_ROUTE_SRTE:
271
0
    proto = RTPROT_SRTE;
272
0
    break;
273
0
  case ZEBRA_ROUTE_TABLE:
274
0
  case ZEBRA_ROUTE_NHG:
275
0
    proto = RTPROT_ZEBRA;
276
0
    break;
277
0
  case ZEBRA_ROUTE_CONNECT:
278
0
  case ZEBRA_ROUTE_KERNEL:
279
0
    proto = RTPROT_KERNEL;
280
0
    break;
281
0
  default:
282
    /*
283
     * When a user adds a new protocol this will show up
284
     * to let them know to do something about it.  This
285
     * is intentionally a warn because we should see
286
     * this as part of development of a new protocol
287
     */
288
0
    zlog_debug(
289
0
      "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
290
0
      __func__, proto);
291
0
    proto = RTPROT_ZEBRA;
292
0
    break;
293
0
  }
294
295
0
  return proto;
296
0
}
297
298
static inline int proto2zebra(int proto, int family, bool is_nexthop)
299
0
{
300
0
  switch (proto) {
301
0
  case RTPROT_BABEL:
302
0
    proto = ZEBRA_ROUTE_BABEL;
303
0
    break;
304
0
  case RTPROT_BGP:
305
0
    proto = ZEBRA_ROUTE_BGP;
306
0
    break;
307
0
  case RTPROT_OSPF:
308
0
    proto = (family == AF_INET) ? ZEBRA_ROUTE_OSPF
309
0
              : ZEBRA_ROUTE_OSPF6;
310
0
    break;
311
0
  case RTPROT_ISIS:
312
0
    proto = ZEBRA_ROUTE_ISIS;
313
0
    break;
314
0
  case RTPROT_RIP:
315
0
    proto = ZEBRA_ROUTE_RIP;
316
0
    break;
317
0
  case RTPROT_RIPNG:
318
0
    proto = ZEBRA_ROUTE_RIPNG;
319
0
    break;
320
0
  case RTPROT_NHRP:
321
0
    proto = ZEBRA_ROUTE_NHRP;
322
0
    break;
323
0
  case RTPROT_EIGRP:
324
0
    proto = ZEBRA_ROUTE_EIGRP;
325
0
    break;
326
0
  case RTPROT_LDP:
327
0
    proto = ZEBRA_ROUTE_LDP;
328
0
    break;
329
0
  case RTPROT_STATIC:
330
0
  case RTPROT_ZSTATIC:
331
0
    proto = ZEBRA_ROUTE_STATIC;
332
0
    break;
333
0
  case RTPROT_SHARP:
334
0
    proto = ZEBRA_ROUTE_SHARP;
335
0
    break;
336
0
  case RTPROT_PBR:
337
0
    proto = ZEBRA_ROUTE_PBR;
338
0
    break;
339
0
  case RTPROT_OPENFABRIC:
340
0
    proto = ZEBRA_ROUTE_OPENFABRIC;
341
0
    break;
342
0
  case RTPROT_SRTE:
343
0
    proto = ZEBRA_ROUTE_SRTE;
344
0
    break;
345
0
  case RTPROT_UNSPEC:
346
0
  case RTPROT_REDIRECT:
347
0
  case RTPROT_KERNEL:
348
0
  case RTPROT_BOOT:
349
0
  case RTPROT_GATED:
350
0
  case RTPROT_RA:
351
0
  case RTPROT_MRT:
352
0
  case RTPROT_BIRD:
353
0
  case RTPROT_DNROUTED:
354
0
  case RTPROT_XORP:
355
0
  case RTPROT_NTK:
356
0
  case RTPROT_MROUTED:
357
0
  case RTPROT_KEEPALIVED:
358
0
  case RTPROT_OPENR:
359
0
    proto = ZEBRA_ROUTE_KERNEL;
360
0
    break;
361
0
  case RTPROT_ZEBRA:
362
0
    if (is_nexthop) {
363
0
      proto = ZEBRA_ROUTE_NHG;
364
0
      break;
365
0
    }
366
    /* Intentional fall thru */
367
0
  default:
368
    /*
369
     * When a user adds a new protocol this will show up
370
     * to let them know to do something about it.  This
371
     * is intentionally a warn because we should see
372
     * this as part of development of a new protocol
373
     */
374
0
    zlog_debug(
375
0
      "%s: Please add this protocol(%d) to proper rt_netlink.c handling",
376
0
      __func__, proto);
377
0
    proto = ZEBRA_ROUTE_KERNEL;
378
0
    break;
379
0
  }
380
0
  return proto;
381
0
}
382
383
/*
384
Pending: create an efficient table_id (in a tree/hash) based lookup)
385
 */
386
vrf_id_t vrf_lookup_by_table(uint32_t table_id, ns_id_t ns_id)
387
0
{
388
0
  struct vrf *vrf;
389
0
  struct zebra_vrf *zvrf;
390
391
0
  RB_FOREACH (vrf, vrf_id_head, &vrfs_by_id) {
392
0
    zvrf = vrf->info;
393
0
    if (zvrf == NULL)
394
0
      continue;
395
    /* case vrf with netns : match the netnsid */
396
0
    if (vrf_is_backend_netns()) {
397
0
      if (ns_id == zvrf_id(zvrf))
398
0
        return zvrf_id(zvrf);
399
0
    } else {
400
      /* VRF is VRF_BACKEND_VRF_LITE */
401
0
      if (zvrf->table_id != table_id)
402
0
        continue;
403
0
      return zvrf_id(zvrf);
404
0
    }
405
0
  }
406
407
0
  return VRF_DEFAULT;
408
0
}
409
410
/**
411
 * @parse_encap_mpls() - Parses encapsulated mpls attributes
412
 * @tb:         Pointer to rtattr to look for nested items in.
413
 * @labels:     Pointer to store labels in.
414
 *
415
 * Return:      Number of mpls labels found.
416
 */
417
static int parse_encap_mpls(struct rtattr *tb, mpls_label_t *labels)
418
0
{
419
0
  struct rtattr *tb_encap[MPLS_IPTUNNEL_MAX + 1] = {0};
420
0
  mpls_lse_t *lses = NULL;
421
0
  int num_labels = 0;
422
0
  uint32_t ttl = 0;
423
0
  uint32_t bos = 0;
424
0
  uint32_t exp = 0;
425
0
  mpls_label_t label = 0;
426
427
0
  netlink_parse_rtattr_nested(tb_encap, MPLS_IPTUNNEL_MAX, tb);
428
0
  lses = (mpls_lse_t *)RTA_DATA(tb_encap[MPLS_IPTUNNEL_DST]);
429
0
  while (!bos && num_labels < MPLS_MAX_LABELS) {
430
0
    mpls_lse_decode(lses[num_labels], &label, &ttl, &exp, &bos);
431
0
    labels[num_labels++] = label;
432
0
  }
433
434
0
  return num_labels;
435
0
}
436
437
static enum seg6local_action_t
438
parse_encap_seg6local(struct rtattr *tb,
439
          struct seg6local_context *ctx)
440
0
{
441
0
  struct rtattr *tb_encap[SEG6_LOCAL_MAX + 1] = {};
442
0
  enum seg6local_action_t act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
443
444
0
  netlink_parse_rtattr_nested(tb_encap, SEG6_LOCAL_MAX, tb);
445
446
0
  if (tb_encap[SEG6_LOCAL_ACTION])
447
0
    act = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_ACTION]);
448
449
0
  if (tb_encap[SEG6_LOCAL_NH4])
450
0
    ctx->nh4 = *(struct in_addr *)RTA_DATA(
451
0
        tb_encap[SEG6_LOCAL_NH4]);
452
453
0
  if (tb_encap[SEG6_LOCAL_NH6])
454
0
    ctx->nh6 = *(struct in6_addr *)RTA_DATA(
455
0
        tb_encap[SEG6_LOCAL_NH6]);
456
457
0
  if (tb_encap[SEG6_LOCAL_TABLE])
458
0
    ctx->table = *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_TABLE]);
459
460
0
  if (tb_encap[SEG6_LOCAL_VRFTABLE])
461
0
    ctx->table =
462
0
      *(uint32_t *)RTA_DATA(tb_encap[SEG6_LOCAL_VRFTABLE]);
463
464
0
  return act;
465
0
}
466
467
static int parse_encap_seg6(struct rtattr *tb, struct in6_addr *segs)
468
0
{
469
0
  struct rtattr *tb_encap[SEG6_IPTUNNEL_MAX + 1] = {};
470
0
  struct seg6_iptunnel_encap *ipt = NULL;
471
0
  struct in6_addr *segments = NULL;
472
473
0
  netlink_parse_rtattr_nested(tb_encap, SEG6_IPTUNNEL_MAX, tb);
474
475
  /*
476
   * TODO: It's not support multiple SID list.
477
   */
478
0
  if (tb_encap[SEG6_IPTUNNEL_SRH]) {
479
0
    ipt = (struct seg6_iptunnel_encap *)
480
0
      RTA_DATA(tb_encap[SEG6_IPTUNNEL_SRH]);
481
0
    segments = ipt->srh[0].segments;
482
0
    *segs = segments[0];
483
0
    return 1;
484
0
  }
485
486
0
  return 0;
487
0
}
488
489
490
static struct nexthop
491
parse_nexthop_unicast(ns_id_t ns_id, struct rtmsg *rtm, struct rtattr **tb,
492
          enum blackhole_type bh_type, int index, void *prefsrc,
493
          void *gate, afi_t afi, vrf_id_t vrf_id)
494
0
{
495
0
  struct interface *ifp = NULL;
496
0
  struct nexthop nh = {0};
497
0
  mpls_label_t labels[MPLS_MAX_LABELS] = {0};
498
0
  int num_labels = 0;
499
0
  enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
500
0
  struct seg6local_context seg6l_ctx = {};
501
0
  struct in6_addr seg6_segs = {};
502
0
  int num_segs = 0;
503
504
0
  vrf_id_t nh_vrf_id = vrf_id;
505
0
  size_t sz = (afi == AFI_IP) ? 4 : 16;
506
507
0
  if (bh_type == BLACKHOLE_UNSPEC) {
508
0
    if (index && !gate)
509
0
      nh.type = NEXTHOP_TYPE_IFINDEX;
510
0
    else if (index && gate)
511
0
      nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4_IFINDEX
512
0
              : NEXTHOP_TYPE_IPV6_IFINDEX;
513
0
    else if (!index && gate)
514
0
      nh.type = (afi == AFI_IP) ? NEXTHOP_TYPE_IPV4
515
0
              : NEXTHOP_TYPE_IPV6;
516
0
    else {
517
0
      nh.type = NEXTHOP_TYPE_BLACKHOLE;
518
0
      nh.bh_type = bh_type;
519
0
    }
520
0
  } else {
521
0
    nh.type = NEXTHOP_TYPE_BLACKHOLE;
522
0
    nh.bh_type = bh_type;
523
0
  }
524
0
  nh.ifindex = index;
525
0
  if (prefsrc)
526
0
    memcpy(&nh.src, prefsrc, sz);
527
0
  if (gate)
528
0
    memcpy(&nh.gate, gate, sz);
529
530
0
  if (index) {
531
0
    ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), index);
532
0
    if (ifp)
533
0
      nh_vrf_id = ifp->vrf->vrf_id;
534
0
  }
535
0
  nh.vrf_id = nh_vrf_id;
536
537
0
  if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
538
0
      && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
539
0
           == LWTUNNEL_ENCAP_MPLS) {
540
0
    num_labels = parse_encap_mpls(tb[RTA_ENCAP], labels);
541
0
  }
542
0
  if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
543
0
      && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
544
0
           == LWTUNNEL_ENCAP_SEG6_LOCAL) {
545
0
    seg6l_act = parse_encap_seg6local(tb[RTA_ENCAP], &seg6l_ctx);
546
0
  }
547
0
  if (tb[RTA_ENCAP] && tb[RTA_ENCAP_TYPE]
548
0
      && *(uint16_t *)RTA_DATA(tb[RTA_ENCAP_TYPE])
549
0
           == LWTUNNEL_ENCAP_SEG6) {
550
0
    num_segs = parse_encap_seg6(tb[RTA_ENCAP], &seg6_segs);
551
0
  }
552
553
0
  if (rtm->rtm_flags & RTNH_F_ONLINK)
554
0
    SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
555
556
0
  if (rtm->rtm_flags & RTNH_F_LINKDOWN)
557
0
    SET_FLAG(nh.flags, NEXTHOP_FLAG_LINKDOWN);
558
559
0
  if (num_labels)
560
0
    nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels, labels);
561
562
0
  if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
563
0
    nexthop_add_srv6_seg6local(&nh, seg6l_act, &seg6l_ctx);
564
565
0
  if (num_segs)
566
0
    nexthop_add_srv6_seg6(&nh, &seg6_segs);
567
568
0
  return nh;
569
0
}
570
571
static uint8_t parse_multipath_nexthops_unicast(ns_id_t ns_id,
572
            struct nexthop_group *ng,
573
            struct rtmsg *rtm,
574
            struct rtnexthop *rtnh,
575
            struct rtattr **tb,
576
            void *prefsrc, vrf_id_t vrf_id)
577
0
{
578
0
  void *gate = NULL;
579
0
  struct interface *ifp = NULL;
580
0
  int index = 0;
581
  /* MPLS labels */
582
0
  mpls_label_t labels[MPLS_MAX_LABELS] = {0};
583
0
  int num_labels = 0;
584
0
  enum seg6local_action_t seg6l_act = ZEBRA_SEG6_LOCAL_ACTION_UNSPEC;
585
0
  struct seg6local_context seg6l_ctx = {};
586
0
  struct in6_addr seg6_segs = {};
587
0
  int num_segs = 0;
588
0
  struct rtattr *rtnh_tb[RTA_MAX + 1] = {};
589
590
0
  int len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
591
0
  vrf_id_t nh_vrf_id = vrf_id;
592
593
0
  for (;;) {
594
0
    struct nexthop *nh = NULL;
595
596
0
    if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
597
0
      break;
598
599
0
    index = rtnh->rtnh_ifindex;
600
0
    if (index) {
601
      /*
602
       * Yes we are looking this up
603
       * for every nexthop and just
604
       * using the last one looked
605
       * up right now
606
       */
607
0
      ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
608
0
              index);
609
0
      if (ifp)
610
0
        nh_vrf_id = ifp->vrf->vrf_id;
611
0
      else {
612
0
        flog_warn(
613
0
          EC_ZEBRA_UNKNOWN_INTERFACE,
614
0
          "%s: Unknown interface %u specified, defaulting to VRF_DEFAULT",
615
0
          __func__, index);
616
0
        nh_vrf_id = VRF_DEFAULT;
617
0
      }
618
0
    } else
619
0
      nh_vrf_id = vrf_id;
620
621
0
    if (rtnh->rtnh_len > sizeof(*rtnh)) {
622
0
      netlink_parse_rtattr(rtnh_tb, RTA_MAX, RTNH_DATA(rtnh),
623
0
               rtnh->rtnh_len - sizeof(*rtnh));
624
0
      if (rtnh_tb[RTA_GATEWAY])
625
0
        gate = RTA_DATA(rtnh_tb[RTA_GATEWAY]);
626
0
      if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
627
0
          && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
628
0
               == LWTUNNEL_ENCAP_MPLS) {
629
0
        num_labels = parse_encap_mpls(
630
0
          rtnh_tb[RTA_ENCAP], labels);
631
0
      }
632
0
      if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
633
0
          && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
634
0
               == LWTUNNEL_ENCAP_SEG6_LOCAL) {
635
0
        seg6l_act = parse_encap_seg6local(
636
0
          rtnh_tb[RTA_ENCAP], &seg6l_ctx);
637
0
      }
638
0
      if (rtnh_tb[RTA_ENCAP] && rtnh_tb[RTA_ENCAP_TYPE]
639
0
          && *(uint16_t *)RTA_DATA(rtnh_tb[RTA_ENCAP_TYPE])
640
0
               == LWTUNNEL_ENCAP_SEG6) {
641
0
        num_segs = parse_encap_seg6(rtnh_tb[RTA_ENCAP],
642
0
                 &seg6_segs);
643
0
      }
644
0
    }
645
646
0
    if (gate && rtm->rtm_family == AF_INET) {
647
0
      if (index)
648
0
        nh = nexthop_from_ipv4_ifindex(
649
0
          gate, prefsrc, index, nh_vrf_id);
650
0
      else
651
0
        nh = nexthop_from_ipv4(gate, prefsrc,
652
0
                   nh_vrf_id);
653
0
    } else if (gate && rtm->rtm_family == AF_INET6) {
654
0
      if (index)
655
0
        nh = nexthop_from_ipv6_ifindex(
656
0
          gate, index, nh_vrf_id);
657
0
      else
658
0
        nh = nexthop_from_ipv6(gate, nh_vrf_id);
659
0
    } else
660
0
      nh = nexthop_from_ifindex(index, nh_vrf_id);
661
662
0
    if (nh) {
663
0
      nh->weight = rtnh->rtnh_hops + 1;
664
665
0
      if (num_labels)
666
0
        nexthop_add_labels(nh, ZEBRA_LSP_STATIC,
667
0
               num_labels, labels);
668
669
0
      if (seg6l_act != ZEBRA_SEG6_LOCAL_ACTION_UNSPEC)
670
0
        nexthop_add_srv6_seg6local(nh, seg6l_act,
671
0
                 &seg6l_ctx);
672
673
0
      if (num_segs)
674
0
        nexthop_add_srv6_seg6(nh, &seg6_segs);
675
676
0
      if (rtnh->rtnh_flags & RTNH_F_ONLINK)
677
0
        SET_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK);
678
679
      /* Add to temporary list */
680
0
      nexthop_group_add_sorted(ng, nh);
681
0
    }
682
683
0
    if (rtnh->rtnh_len == 0)
684
0
      break;
685
686
0
    len -= NLMSG_ALIGN(rtnh->rtnh_len);
687
0
    rtnh = RTNH_NEXT(rtnh);
688
0
  }
689
690
0
  uint8_t nhop_num = nexthop_group_nexthop_num(ng);
691
692
0
  return nhop_num;
693
0
}
694
695
/* Looking up routing table by netlink interface. */
696
int netlink_route_change_read_unicast_internal(struct nlmsghdr *h,
697
                 ns_id_t ns_id, int startup,
698
                 struct zebra_dplane_ctx *ctx)
699
0
{
700
0
  int len;
701
0
  struct rtmsg *rtm;
702
0
  struct rtattr *tb[RTA_MAX + 1];
703
0
  uint32_t flags = 0;
704
0
  struct prefix p;
705
0
  struct prefix_ipv6 src_p = {};
706
0
  vrf_id_t vrf_id;
707
0
  bool selfroute;
708
709
0
  char anyaddr[16] = {0};
710
711
0
  int proto = ZEBRA_ROUTE_KERNEL;
712
0
  int index = 0;
713
0
  int table;
714
0
  int metric = 0;
715
0
  uint32_t mtu = 0;
716
0
  uint8_t distance = 0;
717
0
  route_tag_t tag = 0;
718
0
  uint32_t nhe_id = 0;
719
720
0
  void *dest = NULL;
721
0
  void *gate = NULL;
722
0
  void *prefsrc = NULL; /* IPv4 preferred source host address */
723
0
  void *src = NULL;     /* IPv6 srcdest   source prefix */
724
0
  enum blackhole_type bh_type = BLACKHOLE_UNSPEC;
725
726
0
  frrtrace(3, frr_zebra, netlink_route_change_read_unicast, h, ns_id,
727
0
     startup);
728
729
0
  rtm = NLMSG_DATA(h);
730
731
0
  if (startup && h->nlmsg_type != RTM_NEWROUTE)
732
0
    return 0;
733
0
  switch (rtm->rtm_type) {
734
0
  case RTN_UNICAST:
735
0
    break;
736
0
  case RTN_BLACKHOLE:
737
0
    bh_type = BLACKHOLE_NULL;
738
0
    break;
739
0
  case RTN_UNREACHABLE:
740
0
    bh_type = BLACKHOLE_REJECT;
741
0
    break;
742
0
  case RTN_PROHIBIT:
743
0
    bh_type = BLACKHOLE_ADMINPROHIB;
744
0
    break;
745
0
  default:
746
0
    if (IS_ZEBRA_DEBUG_KERNEL)
747
0
      zlog_debug("Route rtm_type: %s(%d) intentionally ignoring",
748
0
           nl_rttype_to_str(rtm->rtm_type),
749
0
           rtm->rtm_type);
750
0
    return 0;
751
0
  }
752
753
0
  len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
754
0
  if (len < 0) {
755
0
    zlog_err(
756
0
      "%s: Message received from netlink is of a broken size %d %zu",
757
0
      __func__, h->nlmsg_len,
758
0
      (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
759
0
    return -1;
760
0
  }
761
762
0
  netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
763
764
0
  if (rtm->rtm_flags & RTM_F_CLONED)
765
0
    return 0;
766
0
  if (rtm->rtm_protocol == RTPROT_REDIRECT)
767
0
    return 0;
768
0
  if (rtm->rtm_protocol == RTPROT_KERNEL)
769
0
    return 0;
770
771
0
  selfroute = is_selfroute(rtm->rtm_protocol);
772
773
0
  if (!startup && selfroute && h->nlmsg_type == RTM_NEWROUTE &&
774
0
      !zrouter.asic_offloaded && !ctx) {
775
0
    if (IS_ZEBRA_DEBUG_KERNEL)
776
0
      zlog_debug("Route type: %d Received that we think we have originated, ignoring",
777
0
           rtm->rtm_protocol);
778
0
    return 0;
779
0
  }
780
781
  /* We don't care about change notifications for the MPLS table. */
782
  /* TODO: Revisit this. */
783
0
  if (rtm->rtm_family == AF_MPLS)
784
0
    return 0;
785
786
  /* Table corresponding to route. */
787
0
  if (tb[RTA_TABLE])
788
0
    table = *(int *)RTA_DATA(tb[RTA_TABLE]);
789
0
  else
790
0
    table = rtm->rtm_table;
791
792
  /* Map to VRF */
793
0
  vrf_id = vrf_lookup_by_table(table, ns_id);
794
0
  if (vrf_id == VRF_DEFAULT) {
795
0
    if (!is_zebra_valid_kernel_table(table)
796
0
        && !is_zebra_main_routing_table(table))
797
0
      return 0;
798
0
  }
799
800
0
  if (rtm->rtm_flags & RTM_F_TRAP)
801
0
    flags |= ZEBRA_FLAG_TRAPPED;
802
0
  if (rtm->rtm_flags & RTM_F_OFFLOAD)
803
0
    flags |= ZEBRA_FLAG_OFFLOADED;
804
0
  if (rtm->rtm_flags & RTM_F_OFFLOAD_FAILED)
805
0
    flags |= ZEBRA_FLAG_OFFLOAD_FAILED;
806
807
0
  if (h->nlmsg_flags & NLM_F_APPEND)
808
0
    flags |= ZEBRA_FLAG_OUTOFSYNC;
809
810
  /* Route which inserted by Zebra. */
811
0
  if (selfroute) {
812
0
    flags |= ZEBRA_FLAG_SELFROUTE;
813
0
    proto = proto2zebra(rtm->rtm_protocol, rtm->rtm_family, false);
814
0
  }
815
0
  if (tb[RTA_OIF])
816
0
    index = *(int *)RTA_DATA(tb[RTA_OIF]);
817
818
0
  if (tb[RTA_DST])
819
0
    dest = RTA_DATA(tb[RTA_DST]);
820
0
  else
821
0
    dest = anyaddr;
822
823
0
  if (tb[RTA_SRC])
824
0
    src = RTA_DATA(tb[RTA_SRC]);
825
0
  else
826
0
    src = anyaddr;
827
828
0
  if (tb[RTA_PREFSRC])
829
0
    prefsrc = RTA_DATA(tb[RTA_PREFSRC]);
830
831
0
  if (tb[RTA_GATEWAY])
832
0
    gate = RTA_DATA(tb[RTA_GATEWAY]);
833
834
0
  if (tb[RTA_NH_ID])
835
0
    nhe_id = *(uint32_t *)RTA_DATA(tb[RTA_NH_ID]);
836
837
0
  if (tb[RTA_PRIORITY])
838
0
    metric = *(int *)RTA_DATA(tb[RTA_PRIORITY]);
839
840
#if defined(SUPPORT_REALMS)
841
  if (tb[RTA_FLOW])
842
    tag = *(uint32_t *)RTA_DATA(tb[RTA_FLOW]);
843
#endif
844
845
0
  if (tb[RTA_METRICS]) {
846
0
    struct rtattr *mxrta[RTAX_MAX + 1];
847
848
0
    netlink_parse_rtattr(mxrta, RTAX_MAX, RTA_DATA(tb[RTA_METRICS]),
849
0
             RTA_PAYLOAD(tb[RTA_METRICS]));
850
851
0
    if (mxrta[RTAX_MTU])
852
0
      mtu = *(uint32_t *)RTA_DATA(mxrta[RTAX_MTU]);
853
0
  }
854
855
0
  if (rtm->rtm_family == AF_INET) {
856
0
    p.family = AF_INET;
857
0
    if (rtm->rtm_dst_len > IPV4_MAX_BITLEN) {
858
0
      zlog_err(
859
0
        "Invalid destination prefix length: %u received from kernel route change",
860
0
        rtm->rtm_dst_len);
861
0
      return -1;
862
0
    }
863
0
    memcpy(&p.u.prefix4, dest, 4);
864
0
    p.prefixlen = rtm->rtm_dst_len;
865
866
0
    if (rtm->rtm_src_len != 0) {
867
0
      flog_warn(
868
0
        EC_ZEBRA_UNSUPPORTED_V4_SRCDEST,
869
0
        "unsupported IPv4 sourcedest route (dest %pFX vrf %u)",
870
0
        &p, vrf_id);
871
0
      return 0;
872
0
    }
873
874
    /* Force debug below to not display anything for source */
875
0
    src_p.prefixlen = 0;
876
0
  } else if (rtm->rtm_family == AF_INET6) {
877
0
    p.family = AF_INET6;
878
0
    if (rtm->rtm_dst_len > IPV6_MAX_BITLEN) {
879
0
      zlog_err(
880
0
        "Invalid destination prefix length: %u received from kernel route change",
881
0
        rtm->rtm_dst_len);
882
0
      return -1;
883
0
    }
884
0
    memcpy(&p.u.prefix6, dest, 16);
885
0
    p.prefixlen = rtm->rtm_dst_len;
886
887
0
    src_p.family = AF_INET6;
888
0
    if (rtm->rtm_src_len > IPV6_MAX_BITLEN) {
889
0
      zlog_err(
890
0
        "Invalid source prefix length: %u received from kernel route change",
891
0
        rtm->rtm_src_len);
892
0
      return -1;
893
0
    }
894
0
    memcpy(&src_p.prefix, src, 16);
895
0
    src_p.prefixlen = rtm->rtm_src_len;
896
0
  } else {
897
    /* We only handle the AFs we handle... */
898
0
    if (IS_ZEBRA_DEBUG_KERNEL)
899
0
      zlog_debug("%s: unknown address-family %u", __func__,
900
0
           rtm->rtm_family);
901
0
    return 0;
902
0
  }
903
904
  /*
905
   * For ZEBRA_ROUTE_KERNEL types:
906
   *
907
   * The metric/priority of the route received from the kernel
908
   * is a 32 bit number.  We are going to interpret the high
909
   * order byte as the Admin Distance and the low order 3 bytes
910
   * as the metric.
911
   *
912
   * This will allow us to do two things:
913
   * 1) Allow the creation of kernel routes that can be
914
   *    overridden by zebra.
915
   * 2) Allow the old behavior for 'most' kernel route types
916
   *    if a user enters 'ip route ...' v4 routes get a metric
917
   *    of 0 and v6 routes get a metric of 1024.  Both of these
918
   *    values will end up with a admin distance of 0, which
919
   *    will cause them to win for the purposes of zebra.
920
   */
921
0
  if (proto == ZEBRA_ROUTE_KERNEL) {
922
0
    distance = (metric >> 24) & 0xFF;
923
0
    metric = (metric & 0x00FFFFFF);
924
0
  }
925
926
0
  if (IS_ZEBRA_DEBUG_KERNEL) {
927
0
    char buf2[PREFIX_STRLEN];
928
929
0
    zlog_debug(
930
0
      "%s %pFX%s%s vrf %s(%u) table_id: %u metric: %d Admin Distance: %d",
931
0
      nl_msg_type_to_str(h->nlmsg_type), &p,
932
0
      src_p.prefixlen ? " from " : "",
933
0
      src_p.prefixlen ? prefix2str(&src_p, buf2, sizeof(buf2))
934
0
          : "",
935
0
      vrf_id_to_name(vrf_id), vrf_id, table, metric,
936
0
      distance);
937
0
  }
938
939
0
  afi_t afi = AFI_IP;
940
0
  if (rtm->rtm_family == AF_INET6)
941
0
    afi = AFI_IP6;
942
943
0
  if (h->nlmsg_type == RTM_NEWROUTE) {
944
0
    struct route_entry *re;
945
0
    struct nexthop_group *ng = NULL;
946
947
0
    re = zebra_rib_route_entry_new(vrf_id, proto, 0, flags, nhe_id,
948
0
                 table, metric, mtu, distance,
949
0
                 tag);
950
0
    if (!nhe_id)
951
0
      ng = nexthop_group_new();
952
953
0
    if (!tb[RTA_MULTIPATH]) {
954
0
      struct nexthop *nexthop, nh;
955
956
0
      if (!nhe_id) {
957
0
        nh = parse_nexthop_unicast(
958
0
          ns_id, rtm, tb, bh_type, index, prefsrc,
959
0
          gate, afi, vrf_id);
960
961
0
        nexthop = nexthop_new();
962
0
        *nexthop = nh;
963
0
        nexthop_group_add_sorted(ng, nexthop);
964
0
      }
965
0
    } else {
966
      /* This is a multipath route */
967
0
      struct rtnexthop *rtnh =
968
0
        (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
969
970
0
      if (!nhe_id) {
971
0
        uint8_t nhop_num;
972
973
        /* Use temporary list of nexthops; parse
974
         * message payload's nexthops.
975
         */
976
0
        nhop_num =
977
0
          parse_multipath_nexthops_unicast(
978
0
            ns_id, ng, rtm, rtnh, tb,
979
0
            prefsrc, vrf_id);
980
981
0
        zserv_nexthop_num_warn(
982
0
          __func__, (const struct prefix *)&p,
983
0
          nhop_num);
984
985
0
        if (nhop_num == 0) {
986
0
          nexthop_group_delete(&ng);
987
0
          ng = NULL;
988
0
        }
989
0
      }
990
0
    }
991
0
    if (nhe_id || ng) {
992
0
      dplane_rib_add_multipath(afi, SAFI_UNICAST, &p, &src_p,
993
0
             re, ng, startup, ctx);
994
0
      if (ng)
995
0
        nexthop_group_delete(&ng);
996
0
    } else {
997
      /*
998
       * I really don't see how this is possible
999
       * but since we are testing for it let's
1000
       * let the end user know why the route
1001
       * that was just received was swallowed
1002
       * up and forgotten
1003
       */
1004
0
      zlog_err(
1005
0
        "%s: %pFX multipath RTM_NEWROUTE has a invalid nexthop group from the kernel",
1006
0
        __func__, &p);
1007
0
      XFREE(MTYPE_RE, re);
1008
0
    }
1009
0
  } else {
1010
0
    if (ctx) {
1011
0
      zlog_err(
1012
0
        "%s: %pFX RTM_DELROUTE received but received a context as well",
1013
0
        __func__, &p);
1014
0
      return 0;
1015
0
    }
1016
1017
0
    if (nhe_id) {
1018
0
      rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0, flags,
1019
0
           &p, &src_p, NULL, nhe_id, table, metric,
1020
0
           distance, true);
1021
0
    } else {
1022
0
      if (!tb[RTA_MULTIPATH]) {
1023
0
        struct nexthop nh;
1024
1025
0
        nh = parse_nexthop_unicast(
1026
0
          ns_id, rtm, tb, bh_type, index, prefsrc,
1027
0
          gate, afi, vrf_id);
1028
0
        rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
1029
0
             flags, &p, &src_p, &nh, 0, table,
1030
0
             metric, distance, true);
1031
0
      } else {
1032
        /* XXX: need to compare the entire list of
1033
         * nexthops here for NLM_F_APPEND stupidity */
1034
0
        rib_delete(afi, SAFI_UNICAST, vrf_id, proto, 0,
1035
0
             flags, &p, &src_p, NULL, 0, table,
1036
0
             metric, distance, true);
1037
0
      }
1038
0
    }
1039
0
  }
1040
1041
0
  return 1;
1042
0
}
1043
1044
static int netlink_route_change_read_unicast(struct nlmsghdr *h, ns_id_t ns_id,
1045
               int startup)
1046
0
{
1047
0
  return netlink_route_change_read_unicast_internal(h, ns_id, startup,
1048
0
                NULL);
1049
0
}
1050
1051
static struct mcast_route_data *mroute = NULL;
1052
1053
static int netlink_route_change_read_multicast(struct nlmsghdr *h,
1054
                 ns_id_t ns_id, int startup)
1055
0
{
1056
0
  int len;
1057
0
  struct rtmsg *rtm;
1058
0
  struct rtattr *tb[RTA_MAX + 1];
1059
0
  struct mcast_route_data *m;
1060
0
  int iif = 0;
1061
0
  int count;
1062
0
  int oif[256];
1063
0
  int oif_count = 0;
1064
0
  char oif_list[256] = "\0";
1065
0
  vrf_id_t vrf;
1066
0
  int table;
1067
1068
0
  assert(mroute);
1069
0
  m = mroute;
1070
1071
0
  rtm = NLMSG_DATA(h);
1072
1073
0
  len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
1074
1075
0
  netlink_parse_rtattr(tb, RTA_MAX, RTM_RTA(rtm), len);
1076
1077
0
  if (tb[RTA_TABLE])
1078
0
    table = *(int *)RTA_DATA(tb[RTA_TABLE]);
1079
0
  else
1080
0
    table = rtm->rtm_table;
1081
1082
0
  vrf = vrf_lookup_by_table(table, ns_id);
1083
1084
0
  if (tb[RTA_IIF])
1085
0
    iif = *(int *)RTA_DATA(tb[RTA_IIF]);
1086
1087
0
  if (tb[RTA_SRC]) {
1088
0
    if (rtm->rtm_family == RTNL_FAMILY_IPMR)
1089
0
      m->src.ipaddr_v4 =
1090
0
        *(struct in_addr *)RTA_DATA(tb[RTA_SRC]);
1091
0
    else
1092
0
      m->src.ipaddr_v6 =
1093
0
        *(struct in6_addr *)RTA_DATA(tb[RTA_SRC]);
1094
0
  }
1095
1096
0
  if (tb[RTA_DST]) {
1097
0
    if (rtm->rtm_family == RTNL_FAMILY_IPMR)
1098
0
      m->grp.ipaddr_v4 =
1099
0
        *(struct in_addr *)RTA_DATA(tb[RTA_DST]);
1100
0
    else
1101
0
      m->grp.ipaddr_v6 =
1102
0
        *(struct in6_addr *)RTA_DATA(tb[RTA_DST]);
1103
0
  }
1104
1105
0
  if (tb[RTA_EXPIRES])
1106
0
    m->lastused = *(unsigned long long *)RTA_DATA(tb[RTA_EXPIRES]);
1107
1108
0
  if (tb[RTA_MULTIPATH]) {
1109
0
    struct rtnexthop *rtnh =
1110
0
      (struct rtnexthop *)RTA_DATA(tb[RTA_MULTIPATH]);
1111
1112
0
    len = RTA_PAYLOAD(tb[RTA_MULTIPATH]);
1113
0
    for (;;) {
1114
0
      if (len < (int)sizeof(*rtnh) || rtnh->rtnh_len > len)
1115
0
        break;
1116
1117
0
      oif[oif_count] = rtnh->rtnh_ifindex;
1118
0
      oif_count++;
1119
1120
0
      if (rtnh->rtnh_len == 0)
1121
0
        break;
1122
1123
0
      len -= NLMSG_ALIGN(rtnh->rtnh_len);
1124
0
      rtnh = RTNH_NEXT(rtnh);
1125
0
    }
1126
0
  }
1127
1128
0
  if (rtm->rtm_family == RTNL_FAMILY_IPMR) {
1129
0
    SET_IPADDR_V4(&m->src);
1130
0
    SET_IPADDR_V4(&m->grp);
1131
0
  } else if (rtm->rtm_family == RTNL_FAMILY_IP6MR) {
1132
0
    SET_IPADDR_V6(&m->src);
1133
0
    SET_IPADDR_V6(&m->grp);
1134
0
  } else {
1135
0
    zlog_warn("%s: Invalid rtm_family received", __func__);
1136
0
    return 0;
1137
0
  }
1138
1139
0
  if (IS_ZEBRA_DEBUG_KERNEL) {
1140
0
    struct interface *ifp = NULL;
1141
0
    struct zebra_vrf *zvrf = NULL;
1142
1143
0
    for (count = 0; count < oif_count; count++) {
1144
0
      ifp = if_lookup_by_index(oif[count], vrf);
1145
0
      char temp[256];
1146
1147
0
      snprintf(temp, sizeof(temp), "%s(%d) ",
1148
0
         ifp ? ifp->name : "Unknown", oif[count]);
1149
0
      strlcat(oif_list, temp, sizeof(oif_list));
1150
0
    }
1151
0
    zvrf = zebra_vrf_lookup_by_id(vrf);
1152
0
    ifp = if_lookup_by_index(iif, vrf);
1153
0
    zlog_debug(
1154
0
      "MCAST VRF: %s(%d) %s (%pIA,%pIA) IIF: %s(%d) OIF: %s jiffies: %lld",
1155
0
      zvrf_name(zvrf), vrf, nl_msg_type_to_str(h->nlmsg_type),
1156
0
      &m->src, &m->grp, ifp ? ifp->name : "Unknown", iif,
1157
0
      oif_list, m->lastused);
1158
0
  }
1159
0
  return 0;
1160
0
}
1161
1162
int netlink_route_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
1163
0
{
1164
0
  int len;
1165
0
  struct rtmsg *rtm;
1166
1167
0
  rtm = NLMSG_DATA(h);
1168
1169
0
  if (!(h->nlmsg_type == RTM_NEWROUTE || h->nlmsg_type == RTM_DELROUTE)) {
1170
    /* If this is not route add/delete message print warning. */
1171
0
    zlog_debug("Kernel message: %s NS %u",
1172
0
         nl_msg_type_to_str(h->nlmsg_type), ns_id);
1173
0
    return 0;
1174
0
  }
1175
1176
0
  switch (rtm->rtm_family) {
1177
0
  case AF_INET:
1178
0
  case AF_INET6:
1179
0
    break;
1180
1181
0
  case RTNL_FAMILY_IPMR:
1182
0
  case RTNL_FAMILY_IP6MR:
1183
    /* notifications on IPMR are irrelevant to zebra, we only care
1184
     * about responses to RTM_GETROUTE requests we sent.
1185
     */
1186
0
    return 0;
1187
1188
0
  default:
1189
0
    flog_warn(
1190
0
      EC_ZEBRA_UNKNOWN_FAMILY,
1191
0
      "Invalid address family: %u received from kernel route change: %s",
1192
0
      rtm->rtm_family, nl_msg_type_to_str(h->nlmsg_type));
1193
0
    return 0;
1194
0
  }
1195
1196
  /* Connected route. */
1197
0
  if (IS_ZEBRA_DEBUG_KERNEL)
1198
0
    zlog_debug("%s %s %s proto %s NS %u",
1199
0
         nl_msg_type_to_str(h->nlmsg_type),
1200
0
         nl_family_to_str(rtm->rtm_family),
1201
0
         nl_rttype_to_str(rtm->rtm_type),
1202
0
         nl_rtproto_to_str(rtm->rtm_protocol), ns_id);
1203
1204
1205
0
  len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct rtmsg));
1206
0
  if (len < 0) {
1207
0
    zlog_err(
1208
0
      "%s: Message received from netlink is of a broken size: %d %zu",
1209
0
      __func__, h->nlmsg_len,
1210
0
      (size_t)NLMSG_LENGTH(sizeof(struct rtmsg)));
1211
0
    return -1;
1212
0
  }
1213
1214
  /* these are "magic" kernel-managed *unicast* routes used for
1215
   * outputting locally generated multicast traffic (which uses unicast
1216
   * handling on Linux because ~reasons~.
1217
   */
1218
0
  if (rtm->rtm_type == RTN_MULTICAST)
1219
0
    return 0;
1220
1221
0
  netlink_route_change_read_unicast(h, ns_id, startup);
1222
0
  return 0;
1223
0
}
1224
1225
/* Request for specific route information from the kernel */
1226
static int netlink_request_route(struct zebra_ns *zns, int family, int type)
1227
1
{
1228
1
  struct {
1229
1
    struct nlmsghdr n;
1230
1
    struct rtmsg rtm;
1231
1
  } req;
1232
1233
  /* Form the request, specifying filter (rtattr) if needed. */
1234
1
  memset(&req, 0, sizeof(req));
1235
1
  req.n.nlmsg_type = type;
1236
1
  req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
1237
1
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
1238
1
  req.rtm.rtm_family = family;
1239
1240
1
  return netlink_request(&zns->netlink_cmd, &req);
1241
1
}
1242
1243
/* Routing table read function using netlink interface.  Only called
1244
   bootstrap time. */
1245
int netlink_route_read(struct zebra_ns *zns)
1246
1
{
1247
1
  int ret;
1248
1
  struct zebra_dplane_info dp_info;
1249
1250
1
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
1251
1252
  /* Get IPv4 routing table. */
1253
1
  ret = netlink_request_route(zns, AF_INET, RTM_GETROUTE);
1254
1
  if (ret < 0)
1255
1
    return ret;
1256
0
  ret = netlink_parse_info(netlink_route_change_read_unicast,
1257
0
         &zns->netlink_cmd, &dp_info, 0, true);
1258
0
  if (ret < 0)
1259
0
    return ret;
1260
1261
  /* Get IPv6 routing table. */
1262
0
  ret = netlink_request_route(zns, AF_INET6, RTM_GETROUTE);
1263
0
  if (ret < 0)
1264
0
    return ret;
1265
0
  ret = netlink_parse_info(netlink_route_change_read_unicast,
1266
0
         &zns->netlink_cmd, &dp_info, 0, true);
1267
0
  if (ret < 0)
1268
0
    return ret;
1269
1270
0
  return 0;
1271
0
}
1272
1273
/*
1274
 * The function returns true if the gateway info could be added
1275
 * to the message, otherwise false is returned.
1276
 */
1277
static bool _netlink_route_add_gateway_info(uint8_t route_family,
1278
              uint8_t gw_family,
1279
              struct nlmsghdr *nlmsg,
1280
              size_t req_size, int bytelen,
1281
              const struct nexthop *nexthop)
1282
0
{
1283
0
  if (route_family == AF_MPLS) {
1284
0
    struct gw_family_t gw_fam;
1285
1286
0
    gw_fam.family = gw_family;
1287
0
    if (gw_family == AF_INET)
1288
0
      memcpy(&gw_fam.gate.ipv4, &nexthop->gate.ipv4, bytelen);
1289
0
    else
1290
0
      memcpy(&gw_fam.gate.ipv6, &nexthop->gate.ipv6, bytelen);
1291
0
    if (!nl_attr_put(nlmsg, req_size, RTA_VIA, &gw_fam.family,
1292
0
         bytelen + 2))
1293
0
      return false;
1294
0
  } else {
1295
0
    if (!(nexthop->rparent
1296
0
          && IS_MAPPED_IPV6(&nexthop->rparent->gate.ipv6))) {
1297
0
      if (gw_family == AF_INET) {
1298
0
        if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1299
0
             &nexthop->gate.ipv4, bytelen))
1300
0
          return false;
1301
0
      } else {
1302
0
        if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY,
1303
0
             &nexthop->gate.ipv6, bytelen))
1304
0
          return false;
1305
0
      }
1306
0
    }
1307
0
  }
1308
1309
0
  return true;
1310
0
}
1311
1312
static int build_label_stack(struct mpls_label_stack *nh_label,
1313
           enum lsp_types_t nh_label_type,
1314
           mpls_lse_t *out_lse, char *label_buf,
1315
           size_t label_buf_size)
1316
0
{
1317
0
  char label_buf1[20];
1318
0
  int num_labels = 0;
1319
1320
0
  for (int i = 0; nh_label && i < nh_label->num_labels; i++) {
1321
0
    if (nh_label_type != ZEBRA_LSP_EVPN &&
1322
0
        nh_label->label[i] == MPLS_LABEL_IMPLICIT_NULL)
1323
0
      continue;
1324
1325
0
    if (IS_ZEBRA_DEBUG_KERNEL) {
1326
0
      if (!num_labels)
1327
0
        snprintf(label_buf, label_buf_size, "label %u",
1328
0
           nh_label->label[i]);
1329
0
      else {
1330
0
        snprintf(label_buf1, sizeof(label_buf1), "/%u",
1331
0
           nh_label->label[i]);
1332
0
        strlcat(label_buf, label_buf1, label_buf_size);
1333
0
      }
1334
0
    }
1335
1336
0
    if (nh_label_type == ZEBRA_LSP_EVPN)
1337
0
      out_lse[num_labels] = label2vni(&nh_label->label[i]);
1338
0
    else
1339
0
      out_lse[num_labels] =
1340
0
        mpls_lse_encode(nh_label->label[i], 0, 0, 0);
1341
0
    num_labels++;
1342
0
  }
1343
1344
0
  return num_labels;
1345
0
}
1346
1347
static bool _netlink_nexthop_encode_dvni_label(const struct nexthop *nexthop,
1348
                 struct nlmsghdr *nlmsg,
1349
                 mpls_lse_t *out_lse,
1350
                 size_t buflen, char *label_buf)
1351
0
{
1352
0
  struct in_addr ipv4;
1353
1354
0
  if (!nl_attr_put64(nlmsg, buflen, LWTUNNEL_IP_ID,
1355
0
         htonll((uint64_t)out_lse[0])))
1356
0
    return false;
1357
1358
0
  if (nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1359
0
    if (!nl_attr_put(nlmsg, buflen, LWTUNNEL_IP_DST,
1360
0
         &nexthop->gate.ipv4, 4))
1361
0
      return false;
1362
1363
0
  } else if (nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1364
0
    if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) {
1365
0
      ipv4_mapped_ipv6_to_ipv4(&nexthop->gate.ipv6, &ipv4);
1366
0
      if (!nl_attr_put(nlmsg, buflen, LWTUNNEL_IP_DST, &ipv4,
1367
0
           4))
1368
0
        return false;
1369
1370
0
    } else {
1371
0
      if (!nl_attr_put(nlmsg, buflen, LWTUNNEL_IP_DST,
1372
0
           &nexthop->gate.ipv6, 16))
1373
0
        return false;
1374
0
    }
1375
0
  } else {
1376
0
    if (IS_ZEBRA_DEBUG_KERNEL)
1377
0
      zlog_debug(
1378
0
        "%s: nexthop %pNHv %s must NEXTHOP_TYPE_IPV*_IFINDEX to be vxlan encapped",
1379
0
        __func__, nexthop, label_buf);
1380
1381
0
    return false;
1382
0
  }
1383
1384
0
  return true;
1385
0
}
1386
1387
static bool _netlink_route_encode_label_info(const struct nexthop *nexthop,
1388
               struct nlmsghdr *nlmsg,
1389
               size_t buflen, struct rtmsg *rtmsg,
1390
               char *label_buf,
1391
               size_t label_buf_size)
1392
0
{
1393
0
  mpls_lse_t out_lse[MPLS_MAX_LABELS];
1394
0
  int num_labels;
1395
0
  struct rtattr *nest;
1396
0
  struct mpls_label_stack *nh_label;
1397
0
  enum lsp_types_t nh_label_type;
1398
1399
0
  nh_label = nexthop->nh_label;
1400
0
  nh_label_type = nexthop->nh_label_type;
1401
1402
  /*
1403
   * label_buf is *only* currently used within debugging.
1404
   * As such when we assign it we are guarding it inside
1405
   * a debug test.  If you want to change this make sure
1406
   * you fix this assumption
1407
   */
1408
0
  label_buf[0] = '\0';
1409
1410
0
  num_labels = build_label_stack(nh_label, nh_label_type, out_lse,
1411
0
               label_buf, label_buf_size);
1412
1413
0
  if (num_labels && nh_label_type == ZEBRA_LSP_EVPN) {
1414
0
    if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE,
1415
0
           LWTUNNEL_ENCAP_IP))
1416
0
      return false;
1417
1418
0
    nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP);
1419
0
    if (!nest)
1420
0
      return false;
1421
1422
0
    if (_netlink_nexthop_encode_dvni_label(nexthop, nlmsg, out_lse,
1423
0
                   buflen,
1424
0
                   label_buf) == false)
1425
0
      return false;
1426
1427
0
    nl_attr_nest_end(nlmsg, nest);
1428
1429
0
  } else if (num_labels) {
1430
    /* Set the BoS bit */
1431
0
    out_lse[num_labels - 1] |= htonl(1 << MPLS_LS_S_SHIFT);
1432
1433
0
    if (rtmsg->rtm_family == AF_MPLS) {
1434
0
      if (!nl_attr_put(nlmsg, buflen, RTA_NEWDST, &out_lse,
1435
0
           num_labels * sizeof(mpls_lse_t)))
1436
0
        return false;
1437
0
    } else {
1438
0
      if (!nl_attr_put16(nlmsg, buflen, RTA_ENCAP_TYPE,
1439
0
             LWTUNNEL_ENCAP_MPLS))
1440
0
        return false;
1441
1442
0
      nest = nl_attr_nest(nlmsg, buflen, RTA_ENCAP);
1443
0
      if (!nest)
1444
0
        return false;
1445
1446
0
      if (!nl_attr_put(nlmsg, buflen, MPLS_IPTUNNEL_DST,
1447
0
           &out_lse,
1448
0
           num_labels * sizeof(mpls_lse_t)))
1449
0
        return false;
1450
0
      nl_attr_nest_end(nlmsg, nest);
1451
0
    }
1452
0
  }
1453
1454
0
  return true;
1455
0
}
1456
1457
static bool _netlink_route_encode_nexthop_src(const struct nexthop *nexthop,
1458
                int family,
1459
                struct nlmsghdr *nlmsg,
1460
                size_t buflen, int bytelen)
1461
0
{
1462
0
  if (family == AF_INET) {
1463
0
    if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
1464
0
      if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1465
0
           &nexthop->rmap_src.ipv4, bytelen))
1466
0
        return false;
1467
0
    } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
1468
0
      if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1469
0
           &nexthop->src.ipv4, bytelen))
1470
0
        return false;
1471
0
    }
1472
0
  } else if (family == AF_INET6) {
1473
0
    if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
1474
0
      if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1475
0
           &nexthop->rmap_src.ipv6, bytelen))
1476
0
        return false;
1477
0
    } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
1478
0
      if (!nl_attr_put(nlmsg, buflen, RTA_PREFSRC,
1479
0
           &nexthop->src.ipv6, bytelen))
1480
0
        return false;
1481
0
    }
1482
0
  }
1483
1484
0
  return true;
1485
0
}
1486
1487
static ssize_t fill_seg6ipt_encap(char *buffer, size_t buflen,
1488
          const struct in6_addr *seg)
1489
0
{
1490
0
  struct seg6_iptunnel_encap *ipt;
1491
0
  struct ipv6_sr_hdr *srh;
1492
0
  const size_t srhlen = 24;
1493
1494
  /*
1495
   * Caution: Support only SINGLE-SID, not MULTI-SID
1496
   * This function only supports the case where segs represents
1497
   * a single SID. If you want to extend the SRv6 functionality,
1498
   * you should improve the Boundary Check.
1499
   * Ex. In case of set a SID-List include multiple-SIDs as an
1500
   * argument of the Transit Behavior, we must support variable
1501
   * boundary check for buflen.
1502
   */
1503
0
  if (buflen < (sizeof(struct seg6_iptunnel_encap) +
1504
0
          sizeof(struct ipv6_sr_hdr) + 16))
1505
0
    return -1;
1506
1507
0
  memset(buffer, 0, buflen);
1508
1509
0
  ipt = (struct seg6_iptunnel_encap *)buffer;
1510
0
  ipt->mode = SEG6_IPTUN_MODE_ENCAP;
1511
0
  srh = ipt->srh;
1512
0
  srh->hdrlen = (srhlen >> 3) - 1;
1513
0
  srh->type = 4;
1514
0
  srh->segments_left = 0;
1515
0
  srh->first_segment = 0;
1516
0
  memcpy(&srh->segments[0], seg, sizeof(struct in6_addr));
1517
1518
0
  return srhlen + 4;
1519
0
}
1520
1521
/* This function takes a nexthop as argument and adds
1522
 * the appropriate netlink attributes to an existing
1523
 * netlink message.
1524
 *
1525
 * @param routedesc: Human readable description of route type
1526
 *                   (direct/recursive, single-/multipath)
1527
 * @param bytelen: Length of addresses in bytes.
1528
 * @param nexthop: Nexthop information
1529
 * @param nlmsg: nlmsghdr structure to fill in.
1530
 * @param req_size: The size allocated for the message.
1531
 *
1532
 * The function returns true if the nexthop could be added
1533
 * to the message, otherwise false is returned.
1534
 */
1535
static bool _netlink_route_build_singlepath(const struct prefix *p,
1536
              const char *routedesc, int bytelen,
1537
              const struct nexthop *nexthop,
1538
              struct nlmsghdr *nlmsg,
1539
              struct rtmsg *rtmsg,
1540
              size_t req_size, int cmd)
1541
0
{
1542
1543
0
  char label_buf[256];
1544
0
  struct vrf *vrf;
1545
0
  char addrstr[INET6_ADDRSTRLEN];
1546
1547
0
  assert(nexthop);
1548
1549
0
  vrf = vrf_lookup_by_id(nexthop->vrf_id);
1550
1551
0
  if (!_netlink_route_encode_label_info(nexthop, nlmsg, req_size, rtmsg,
1552
0
                label_buf, sizeof(label_buf)))
1553
0
    return false;
1554
1555
0
  if (nexthop->nh_srv6) {
1556
0
    if (nexthop->nh_srv6->seg6local_action !=
1557
0
        ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) {
1558
0
      struct rtattr *nest;
1559
0
      const struct seg6local_context *ctx;
1560
1561
0
      ctx = &nexthop->nh_srv6->seg6local_ctx;
1562
0
      if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE,
1563
0
             LWTUNNEL_ENCAP_SEG6_LOCAL))
1564
0
        return false;
1565
1566
0
      nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP);
1567
0
      if (!nest)
1568
0
        return false;
1569
1570
0
      switch (nexthop->nh_srv6->seg6local_action) {
1571
0
      case ZEBRA_SEG6_LOCAL_ACTION_END:
1572
0
        if (!nl_attr_put32(nlmsg, req_size,
1573
0
               SEG6_LOCAL_ACTION,
1574
0
               SEG6_LOCAL_ACTION_END))
1575
0
          return false;
1576
0
        break;
1577
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_X:
1578
0
        if (!nl_attr_put32(nlmsg, req_size,
1579
0
               SEG6_LOCAL_ACTION,
1580
0
               SEG6_LOCAL_ACTION_END_X))
1581
0
          return false;
1582
0
        if (!nl_attr_put(nlmsg, req_size,
1583
0
             SEG6_LOCAL_NH6, &ctx->nh6,
1584
0
             sizeof(struct in6_addr)))
1585
0
          return false;
1586
0
        break;
1587
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_T:
1588
0
        if (!nl_attr_put32(nlmsg, req_size,
1589
0
               SEG6_LOCAL_ACTION,
1590
0
               SEG6_LOCAL_ACTION_END_T))
1591
0
          return false;
1592
0
        if (!nl_attr_put32(nlmsg, req_size,
1593
0
               SEG6_LOCAL_TABLE,
1594
0
               ctx->table))
1595
0
          return false;
1596
0
        break;
1597
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_DX4:
1598
0
        if (!nl_attr_put32(nlmsg, req_size,
1599
0
               SEG6_LOCAL_ACTION,
1600
0
               SEG6_LOCAL_ACTION_END_DX4))
1601
0
          return false;
1602
0
        if (!nl_attr_put(nlmsg, req_size,
1603
0
             SEG6_LOCAL_NH4, &ctx->nh4,
1604
0
             sizeof(struct in_addr)))
1605
0
          return false;
1606
0
        break;
1607
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_DT6:
1608
0
        if (!nl_attr_put32(nlmsg, req_size,
1609
0
               SEG6_LOCAL_ACTION,
1610
0
               SEG6_LOCAL_ACTION_END_DT6))
1611
0
          return false;
1612
0
        if (!nl_attr_put32(nlmsg, req_size,
1613
0
               SEG6_LOCAL_TABLE,
1614
0
               ctx->table))
1615
0
          return false;
1616
0
        break;
1617
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_DT4:
1618
0
        if (!nl_attr_put32(nlmsg, req_size,
1619
0
               SEG6_LOCAL_ACTION,
1620
0
               SEG6_LOCAL_ACTION_END_DT4))
1621
0
          return false;
1622
0
        if (!nl_attr_put32(nlmsg, req_size,
1623
0
               SEG6_LOCAL_VRFTABLE,
1624
0
               ctx->table))
1625
0
          return false;
1626
0
        break;
1627
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_DT46:
1628
0
        if (!nl_attr_put32(nlmsg, req_size,
1629
0
               SEG6_LOCAL_ACTION,
1630
0
               SEG6_LOCAL_ACTION_END_DT46))
1631
0
          return false;
1632
0
        if (!nl_attr_put32(nlmsg, req_size,
1633
0
               SEG6_LOCAL_VRFTABLE,
1634
0
               ctx->table))
1635
0
          return false;
1636
0
        break;
1637
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_DX2:
1638
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_DX6:
1639
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_B6:
1640
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_B6_ENCAP:
1641
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_BM:
1642
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_S:
1643
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_AS:
1644
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_AM:
1645
0
      case ZEBRA_SEG6_LOCAL_ACTION_END_BPF:
1646
0
      case ZEBRA_SEG6_LOCAL_ACTION_UNSPEC:
1647
0
        zlog_err("%s: unsupport seg6local behaviour action=%u",
1648
0
           __func__,
1649
0
           nexthop->nh_srv6->seg6local_action);
1650
0
        return false;
1651
0
      }
1652
0
      nl_attr_nest_end(nlmsg, nest);
1653
0
    }
1654
1655
0
    if (!sid_zero(&nexthop->nh_srv6->seg6_segs)) {
1656
0
      char tun_buf[4096];
1657
0
      ssize_t tun_len;
1658
0
      struct rtattr *nest;
1659
1660
0
      if (!nl_attr_put16(nlmsg, req_size, RTA_ENCAP_TYPE,
1661
0
            LWTUNNEL_ENCAP_SEG6))
1662
0
        return false;
1663
0
      nest = nl_attr_nest(nlmsg, req_size, RTA_ENCAP);
1664
0
      if (!nest)
1665
0
        return false;
1666
0
      tun_len = fill_seg6ipt_encap(tun_buf, sizeof(tun_buf),
1667
0
          &nexthop->nh_srv6->seg6_segs);
1668
0
      if (tun_len < 0)
1669
0
        return false;
1670
0
      if (!nl_attr_put(nlmsg, req_size, SEG6_IPTUNNEL_SRH,
1671
0
           tun_buf, tun_len))
1672
0
        return false;
1673
0
      nl_attr_nest_end(nlmsg, nest);
1674
0
    }
1675
0
  }
1676
1677
0
  if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1678
0
    rtmsg->rtm_flags |= RTNH_F_ONLINK;
1679
1680
0
  if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
1681
0
    rtmsg->rtm_flags |= RTNH_F_ONLINK;
1682
0
    if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1683
0
      return false;
1684
0
    if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1685
0
      return false;
1686
1687
0
    if (cmd == RTM_NEWROUTE) {
1688
0
      if (!_netlink_route_encode_nexthop_src(
1689
0
            nexthop, AF_INET, nlmsg, req_size, bytelen))
1690
0
        return false;
1691
0
    }
1692
1693
0
    if (IS_ZEBRA_DEBUG_KERNEL)
1694
0
      zlog_debug("%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1695
0
           __func__, routedesc, p, ipv4_ll_buf,
1696
0
           label_buf, nexthop->ifindex,
1697
0
           VRF_LOGNAME(vrf), nexthop->vrf_id);
1698
0
    return true;
1699
0
  }
1700
1701
0
  if (nexthop->type == NEXTHOP_TYPE_IPV4
1702
0
      || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1703
    /* Send deletes to the kernel without specifying the next-hop */
1704
0
    if (cmd != RTM_DELROUTE) {
1705
0
      if (!_netlink_route_add_gateway_info(
1706
0
            rtmsg->rtm_family, AF_INET, nlmsg, req_size,
1707
0
            bytelen, nexthop))
1708
0
        return false;
1709
0
    }
1710
1711
0
    if (cmd == RTM_NEWROUTE) {
1712
0
      if (!_netlink_route_encode_nexthop_src(
1713
0
            nexthop, AF_INET, nlmsg, req_size, bytelen))
1714
0
        return false;
1715
0
    }
1716
1717
0
    if (IS_ZEBRA_DEBUG_KERNEL) {
1718
0
      inet_ntop(AF_INET, &nexthop->gate.ipv4, addrstr,
1719
0
          sizeof(addrstr));
1720
0
      zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1721
0
           __func__, routedesc, p, addrstr, label_buf,
1722
0
           nexthop->ifindex, VRF_LOGNAME(vrf),
1723
0
           nexthop->vrf_id);
1724
0
    }
1725
0
  }
1726
1727
0
  if (nexthop->type == NEXTHOP_TYPE_IPV6
1728
0
      || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1729
0
    if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1730
0
                 AF_INET6, nlmsg, req_size,
1731
0
                 bytelen, nexthop))
1732
0
      return false;
1733
1734
0
    if (cmd == RTM_NEWROUTE) {
1735
0
      if (!_netlink_route_encode_nexthop_src(
1736
0
            nexthop, AF_INET6, nlmsg, req_size,
1737
0
            bytelen))
1738
0
        return false;
1739
0
    }
1740
1741
0
    if (IS_ZEBRA_DEBUG_KERNEL) {
1742
0
      inet_ntop(AF_INET6, &nexthop->gate.ipv6, addrstr,
1743
0
          sizeof(addrstr));
1744
0
      zlog_debug("%s: (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1745
0
           __func__, routedesc, p, addrstr, label_buf,
1746
0
           nexthop->ifindex, VRF_LOGNAME(vrf),
1747
0
           nexthop->vrf_id);
1748
0
    }
1749
0
  }
1750
1751
  /*
1752
   * We have the ifindex so we should always send it
1753
   * This is especially useful if we are doing route
1754
   * leaking.
1755
   */
1756
0
  if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
1757
0
    if (!nl_attr_put32(nlmsg, req_size, RTA_OIF, nexthop->ifindex))
1758
0
      return false;
1759
0
  }
1760
1761
0
  if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1762
0
    if (cmd == RTM_NEWROUTE) {
1763
0
      if (!_netlink_route_encode_nexthop_src(
1764
0
            nexthop, AF_INET, nlmsg, req_size, bytelen))
1765
0
        return false;
1766
0
    }
1767
1768
0
    if (IS_ZEBRA_DEBUG_KERNEL)
1769
0
      zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1770
0
           __func__, routedesc, p, nexthop->ifindex,
1771
0
           VRF_LOGNAME(vrf), nexthop->vrf_id);
1772
0
  }
1773
1774
0
  return true;
1775
0
}
1776
1777
/* This function appends tag value as rtnl flow attribute
1778
 * to the given netlink msg only if value is less than 256.
1779
 * Used only if SUPPORT_REALMS enabled.
1780
 *
1781
 * @param nlmsg: nlmsghdr structure to fill in.
1782
 * @param maxlen: The size allocated for the message.
1783
 * @param tag: The route tag.
1784
 *
1785
 * The function returns true if the flow attribute could
1786
 * be added to the message, otherwise false is returned.
1787
 */
1788
static inline bool _netlink_set_tag(struct nlmsghdr *n, unsigned int maxlen,
1789
            route_tag_t tag)
1790
0
{
1791
0
  if (tag > 0 && tag <= 255) {
1792
0
    if (!nl_attr_put32(n, maxlen, RTA_FLOW, tag))
1793
0
      return false;
1794
0
  }
1795
0
  return true;
1796
0
}
1797
1798
/* This function takes a nexthop as argument and
1799
 * appends to the given netlink msg. If the nexthop
1800
 * defines a preferred source, the src parameter
1801
 * will be modified to point to that src, otherwise
1802
 * it will be kept unmodified.
1803
 *
1804
 * @param routedesc: Human readable description of route type
1805
 *                   (direct/recursive, single-/multipath)
1806
 * @param bytelen: Length of addresses in bytes.
1807
 * @param nexthop: Nexthop information
1808
 * @param nlmsg: nlmsghdr structure to fill in.
1809
 * @param req_size: The size allocated for the message.
1810
 * @param src: pointer pointing to a location where
1811
 *             the prefsrc should be stored.
1812
 *
1813
 * The function returns true if the nexthop could be added
1814
 * to the message, otherwise false is returned.
1815
 */
1816
static bool _netlink_route_build_multipath(
1817
  const struct prefix *p, const char *routedesc, int bytelen,
1818
  const struct nexthop *nexthop, struct nlmsghdr *nlmsg, size_t req_size,
1819
  struct rtmsg *rtmsg, const union g_addr **src, route_tag_t tag)
1820
0
{
1821
0
  char label_buf[256];
1822
0
  struct vrf *vrf;
1823
0
  struct rtnexthop *rtnh;
1824
1825
0
  rtnh = nl_attr_rtnh(nlmsg, req_size);
1826
0
  if (rtnh == NULL)
1827
0
    return false;
1828
1829
0
  assert(nexthop);
1830
1831
0
  vrf = vrf_lookup_by_id(nexthop->vrf_id);
1832
1833
0
  if (!_netlink_route_encode_label_info(nexthop, nlmsg, req_size, rtmsg,
1834
0
                label_buf, sizeof(label_buf)))
1835
0
    return false;
1836
1837
0
  if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK))
1838
0
    rtnh->rtnh_flags |= RTNH_F_ONLINK;
1839
1840
0
  if (is_route_v4_over_v6(rtmsg->rtm_family, nexthop->type)) {
1841
0
    rtnh->rtnh_flags |= RTNH_F_ONLINK;
1842
0
    if (!nl_attr_put(nlmsg, req_size, RTA_GATEWAY, &ipv4_ll, 4))
1843
0
      return false;
1844
0
    rtnh->rtnh_ifindex = nexthop->ifindex;
1845
0
    if (nexthop->weight)
1846
0
      rtnh->rtnh_hops = nexthop->weight - 1;
1847
1848
0
    if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1849
0
      *src = &nexthop->rmap_src;
1850
0
    else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1851
0
      *src = &nexthop->src;
1852
1853
0
    if (IS_ZEBRA_DEBUG_KERNEL)
1854
0
      zlog_debug(
1855
0
        "%s: 5549 (%s): %pFX nexthop via %s %s if %u vrf %s(%u)",
1856
0
        __func__, routedesc, p, ipv4_ll_buf, label_buf,
1857
0
        nexthop->ifindex, VRF_LOGNAME(vrf),
1858
0
        nexthop->vrf_id);
1859
0
    nl_attr_rtnh_end(nlmsg, rtnh);
1860
0
    return true;
1861
0
  }
1862
1863
0
  if (nexthop->type == NEXTHOP_TYPE_IPV4
1864
0
      || nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX) {
1865
0
    if (!_netlink_route_add_gateway_info(rtmsg->rtm_family, AF_INET,
1866
0
                 nlmsg, req_size, bytelen,
1867
0
                 nexthop))
1868
0
      return false;
1869
1870
0
    if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1871
0
      *src = &nexthop->rmap_src;
1872
0
    else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1873
0
      *src = &nexthop->src;
1874
1875
0
    if (IS_ZEBRA_DEBUG_KERNEL)
1876
0
      zlog_debug("%s: (%s): %pFX nexthop via %pI4 %s if %u vrf %s(%u)",
1877
0
           __func__, routedesc, p, &nexthop->gate.ipv4,
1878
0
           label_buf, nexthop->ifindex,
1879
0
           VRF_LOGNAME(vrf), nexthop->vrf_id);
1880
0
  }
1881
0
  if (nexthop->type == NEXTHOP_TYPE_IPV6
1882
0
      || nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
1883
0
    if (!_netlink_route_add_gateway_info(rtmsg->rtm_family,
1884
0
                 AF_INET6, nlmsg, req_size,
1885
0
                 bytelen, nexthop))
1886
0
      return false;
1887
1888
0
    if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6))
1889
0
      *src = &nexthop->rmap_src;
1890
0
    else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6))
1891
0
      *src = &nexthop->src;
1892
1893
0
    if (IS_ZEBRA_DEBUG_KERNEL)
1894
0
      zlog_debug("%s: (%s): %pFX nexthop via %pI6 %s if %u vrf %s(%u)",
1895
0
           __func__, routedesc, p, &nexthop->gate.ipv6,
1896
0
           label_buf, nexthop->ifindex,
1897
0
           VRF_LOGNAME(vrf), nexthop->vrf_id);
1898
0
  }
1899
1900
  /*
1901
   * We have figured out the ifindex so we should always send it
1902
   * This is especially useful if we are doing route
1903
   * leaking.
1904
   */
1905
0
  if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE)
1906
0
    rtnh->rtnh_ifindex = nexthop->ifindex;
1907
1908
  /* ifindex */
1909
0
  if (nexthop->type == NEXTHOP_TYPE_IFINDEX) {
1910
0
    if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY)
1911
0
      *src = &nexthop->rmap_src;
1912
0
    else if (nexthop->src.ipv4.s_addr != INADDR_ANY)
1913
0
      *src = &nexthop->src;
1914
1915
0
    if (IS_ZEBRA_DEBUG_KERNEL)
1916
0
      zlog_debug("%s: (%s): %pFX nexthop via if %u vrf %s(%u)",
1917
0
           __func__, routedesc, p, nexthop->ifindex,
1918
0
           VRF_LOGNAME(vrf), nexthop->vrf_id);
1919
0
  }
1920
1921
0
  if (nexthop->weight)
1922
0
    rtnh->rtnh_hops = nexthop->weight - 1;
1923
1924
0
  if (!_netlink_set_tag(nlmsg, req_size, tag))
1925
0
    return false;
1926
1927
0
  nl_attr_rtnh_end(nlmsg, rtnh);
1928
0
  return true;
1929
0
}
1930
1931
static inline bool
1932
_netlink_mpls_build_singlepath(const struct prefix *p, const char *routedesc,
1933
             const struct zebra_nhlfe *nhlfe,
1934
             struct nlmsghdr *nlmsg, struct rtmsg *rtmsg,
1935
             size_t req_size, int cmd)
1936
0
{
1937
0
  int bytelen;
1938
0
  uint8_t family;
1939
1940
0
  family = NHLFE_FAMILY(nhlfe);
1941
0
  bytelen = (family == AF_INET ? 4 : 16);
1942
0
  return _netlink_route_build_singlepath(p, routedesc, bytelen,
1943
0
                 nhlfe->nexthop, nlmsg, rtmsg,
1944
0
                 req_size, cmd);
1945
0
}
1946
1947
1948
static inline bool
1949
_netlink_mpls_build_multipath(const struct prefix *p, const char *routedesc,
1950
            const struct zebra_nhlfe *nhlfe,
1951
            struct nlmsghdr *nlmsg, size_t req_size,
1952
            struct rtmsg *rtmsg, const union g_addr **src)
1953
0
{
1954
0
  int bytelen;
1955
0
  uint8_t family;
1956
1957
0
  family = NHLFE_FAMILY(nhlfe);
1958
0
  bytelen = (family == AF_INET ? 4 : 16);
1959
0
  return _netlink_route_build_multipath(p, routedesc, bytelen,
1960
0
                nhlfe->nexthop, nlmsg, req_size,
1961
0
                rtmsg, src, 0);
1962
0
}
1963
1964
static void _netlink_mpls_debug(int cmd, uint32_t label, const char *routedesc)
1965
0
{
1966
0
  if (IS_ZEBRA_DEBUG_KERNEL)
1967
0
    zlog_debug("netlink_mpls_multipath_msg_encode() (%s): %s %u/20",
1968
0
         routedesc, nl_msg_type_to_str(cmd), label);
1969
0
}
1970
1971
static int netlink_neigh_update(int cmd, int ifindex, void *addr, char *lla,
1972
        int llalen, ns_id_t ns_id, uint8_t family,
1973
        bool permanent, uint8_t protocol)
1974
0
{
1975
0
  struct {
1976
0
    struct nlmsghdr n;
1977
0
    struct ndmsg ndm;
1978
0
    char buf[256];
1979
0
  } req;
1980
1981
0
  struct zebra_ns *zns = zebra_ns_lookup(ns_id);
1982
1983
0
  memset(&req, 0, sizeof(req));
1984
1985
0
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
1986
0
  req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
1987
0
  req.n.nlmsg_type = cmd; // RTM_NEWNEIGH or RTM_DELNEIGH
1988
0
  req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
1989
1990
0
  req.ndm.ndm_family = family;
1991
0
  req.ndm.ndm_ifindex = ifindex;
1992
0
  req.ndm.ndm_type = RTN_UNICAST;
1993
0
  if (cmd == RTM_NEWNEIGH) {
1994
0
    if (!permanent)
1995
0
      req.ndm.ndm_state = NUD_REACHABLE;
1996
0
    else
1997
0
      req.ndm.ndm_state = NUD_PERMANENT;
1998
0
  } else
1999
0
    req.ndm.ndm_state = NUD_FAILED;
2000
2001
0
  nl_attr_put(&req.n, sizeof(req), NDA_PROTOCOL, &protocol,
2002
0
        sizeof(protocol));
2003
0
  req.ndm.ndm_type = RTN_UNICAST;
2004
0
  nl_attr_put(&req.n, sizeof(req), NDA_DST, addr,
2005
0
        family2addrsize(family));
2006
0
  if (lla)
2007
0
    nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
2008
2009
0
  if (IS_ZEBRA_DEBUG_KERNEL) {
2010
0
    char ip_str[INET6_ADDRSTRLEN + 8];
2011
0
    struct interface *ifp = if_lookup_by_index_per_ns(
2012
0
      zebra_ns_lookup(ns_id), ifindex);
2013
0
    if (ifp) {
2014
0
      if (family == AF_INET6)
2015
0
        snprintfrr(ip_str, sizeof(ip_str), "ipv6 %pI6",
2016
0
             (struct in6_addr *)addr);
2017
0
      else
2018
0
        snprintfrr(ip_str, sizeof(ip_str), "ipv4 %pI4",
2019
0
             (in_addr_t *)addr);
2020
0
      zlog_debug(
2021
0
        "%s: %s ifname %s ifindex %u addr %s mac %pEA vrf %s(%u)",
2022
0
        __func__, nl_msg_type_to_str(cmd), ifp->name,
2023
0
        ifindex, ip_str, (struct ethaddr *)lla,
2024
0
        vrf_id_to_name(ifp->vrf->vrf_id),
2025
0
        ifp->vrf->vrf_id);
2026
0
    }
2027
0
  }
2028
0
  return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
2029
0
          false);
2030
0
}
2031
2032
static bool nexthop_set_src(const struct nexthop *nexthop, int family,
2033
          union g_addr *src)
2034
0
{
2035
0
  if (family == AF_INET) {
2036
0
    if (nexthop->rmap_src.ipv4.s_addr != INADDR_ANY) {
2037
0
      src->ipv4 = nexthop->rmap_src.ipv4;
2038
0
      return true;
2039
0
    } else if (nexthop->src.ipv4.s_addr != INADDR_ANY) {
2040
0
      src->ipv4 = nexthop->src.ipv4;
2041
0
      return true;
2042
0
    }
2043
0
  } else if (family == AF_INET6) {
2044
0
    if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->rmap_src.ipv6)) {
2045
0
      src->ipv6 = nexthop->rmap_src.ipv6;
2046
0
      return true;
2047
0
    } else if (!IN6_IS_ADDR_UNSPECIFIED(&nexthop->src.ipv6)) {
2048
0
      src->ipv6 = nexthop->src.ipv6;
2049
0
      return true;
2050
0
    }
2051
0
  }
2052
2053
0
  return false;
2054
0
}
2055
2056
/*
2057
 * The function returns true if the attribute could be added
2058
 * to the message, otherwise false is returned.
2059
 */
2060
static int netlink_route_nexthop_encap(struct nlmsghdr *n, size_t nlen,
2061
               struct nexthop *nh)
2062
0
{
2063
0
  struct rtattr *nest;
2064
2065
0
  switch (nh->nh_encap_type) {
2066
0
  case NET_VXLAN:
2067
0
    if (!nl_attr_put16(n, nlen, RTA_ENCAP_TYPE, nh->nh_encap_type))
2068
0
      return false;
2069
2070
0
    nest = nl_attr_nest(n, nlen, RTA_ENCAP);
2071
0
    if (!nest)
2072
0
      return false;
2073
2074
0
    if (!nl_attr_put32(n, nlen, 0 /* VXLAN_VNI */,
2075
0
           nh->nh_encap.vni))
2076
0
      return false;
2077
0
    nl_attr_nest_end(n, nest);
2078
0
    break;
2079
0
  }
2080
2081
0
  return true;
2082
0
}
2083
2084
/*
2085
 * Routing table change via netlink interface, using a dataplane context object
2086
 *
2087
 * Returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
2088
 * otherwise the number of bytes written to buf.
2089
 */
2090
ssize_t netlink_route_multipath_msg_encode(int cmd,
2091
             struct zebra_dplane_ctx *ctx,
2092
             uint8_t *data, size_t datalen,
2093
             bool fpm, bool force_nhg)
2094
0
{
2095
0
  int bytelen;
2096
0
  struct nexthop *nexthop = NULL;
2097
0
  unsigned int nexthop_num;
2098
0
  const char *routedesc;
2099
0
  bool setsrc = false;
2100
0
  union g_addr src;
2101
0
  const struct prefix *p, *src_p;
2102
0
  uint32_t table_id;
2103
0
  struct nlsock *nl;
2104
0
  route_tag_t tag = 0;
2105
2106
0
  struct {
2107
0
    struct nlmsghdr n;
2108
0
    struct rtmsg r;
2109
0
    char buf[];
2110
0
  } *req = (void *)data;
2111
2112
0
  p = dplane_ctx_get_dest(ctx);
2113
0
  src_p = dplane_ctx_get_src(ctx);
2114
2115
0
  if (datalen < sizeof(*req))
2116
0
    return 0;
2117
2118
0
  nl = kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
2119
2120
0
  memset(req, 0, sizeof(*req));
2121
2122
0
  bytelen = (p->family == AF_INET ? 4 : 16);
2123
2124
0
  req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2125
0
  req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2126
2127
0
  if ((cmd == RTM_NEWROUTE) &&
2128
0
      ((p->family == AF_INET) || v6_rr_semantics))
2129
0
    req->n.nlmsg_flags |= NLM_F_REPLACE;
2130
2131
0
  req->n.nlmsg_type = cmd;
2132
2133
0
  req->n.nlmsg_pid = nl->snl.nl_pid;
2134
2135
0
  req->r.rtm_family = p->family;
2136
0
  req->r.rtm_dst_len = p->prefixlen;
2137
0
  req->r.rtm_src_len = src_p ? src_p->prefixlen : 0;
2138
0
  req->r.rtm_scope = RT_SCOPE_UNIVERSE;
2139
2140
0
  if (cmd == RTM_DELROUTE)
2141
0
    req->r.rtm_protocol = zebra2proto(dplane_ctx_get_old_type(ctx));
2142
0
  else
2143
0
    req->r.rtm_protocol = zebra2proto(dplane_ctx_get_type(ctx));
2144
2145
  /*
2146
   * blackhole routes are not RTN_UNICAST, they are
2147
   * RTN_ BLACKHOLE|UNREACHABLE|PROHIBIT
2148
   * so setting this value as a RTN_UNICAST would
2149
   * cause the route lookup of just the prefix
2150
   * to fail.  So no need to specify this for
2151
   * the RTM_DELROUTE case
2152
   */
2153
0
  if (cmd != RTM_DELROUTE)
2154
0
    req->r.rtm_type = RTN_UNICAST;
2155
2156
0
  if (!nl_attr_put(&req->n, datalen, RTA_DST, &p->u.prefix, bytelen))
2157
0
    return 0;
2158
0
  if (src_p) {
2159
0
    if (!nl_attr_put(&req->n, datalen, RTA_SRC, &src_p->u.prefix,
2160
0
         bytelen))
2161
0
      return 0;
2162
0
  }
2163
2164
  /* Metric. */
2165
  /* Hardcode the metric for all routes coming from zebra. Metric isn't
2166
   * used
2167
   * either by the kernel or by zebra. Its purely for calculating best
2168
   * path(s)
2169
   * by the routing protocol and for communicating with protocol peers.
2170
   */
2171
0
  if (!nl_attr_put32(&req->n, datalen, RTA_PRIORITY,
2172
0
         ROUTE_INSTALLATION_METRIC))
2173
0
    return 0;
2174
2175
#if defined(SUPPORT_REALMS)
2176
  if (cmd == RTM_DELROUTE)
2177
    tag = dplane_ctx_get_old_tag(ctx);
2178
  else
2179
    tag = dplane_ctx_get_tag(ctx);
2180
#endif
2181
2182
  /* Table corresponding to this route. */
2183
0
  table_id = dplane_ctx_get_table(ctx);
2184
0
  if (table_id < 256)
2185
0
    req->r.rtm_table = table_id;
2186
0
  else {
2187
0
    req->r.rtm_table = RT_TABLE_UNSPEC;
2188
0
    if (!nl_attr_put32(&req->n, datalen, RTA_TABLE, table_id))
2189
0
      return 0;
2190
0
  }
2191
2192
0
  if (IS_ZEBRA_DEBUG_KERNEL)
2193
0
    zlog_debug(
2194
0
      "%s: %s %pFX vrf %u(%u)", __func__,
2195
0
      nl_msg_type_to_str(cmd), p, dplane_ctx_get_vrf(ctx),
2196
0
      table_id);
2197
2198
  /*
2199
   * If we are not updating the route and we have received
2200
   * a route delete, then all we need to fill in is the
2201
   * prefix information to tell the kernel to schwack
2202
   * it.
2203
   */
2204
0
  if (cmd == RTM_DELROUTE) {
2205
0
    if (!_netlink_set_tag(&req->n, datalen, tag))
2206
0
      return 0;
2207
0
    return NLMSG_ALIGN(req->n.nlmsg_len);
2208
0
  }
2209
2210
0
  if (dplane_ctx_get_mtu(ctx) || dplane_ctx_get_nh_mtu(ctx)) {
2211
0
    struct rtattr *nest;
2212
0
    uint32_t mtu = dplane_ctx_get_mtu(ctx);
2213
0
    uint32_t nexthop_mtu = dplane_ctx_get_nh_mtu(ctx);
2214
2215
0
    if (!mtu || (nexthop_mtu && nexthop_mtu < mtu))
2216
0
      mtu = nexthop_mtu;
2217
2218
0
    nest = nl_attr_nest(&req->n, datalen, RTA_METRICS);
2219
0
    if (nest == NULL)
2220
0
      return 0;
2221
2222
0
    if (!nl_attr_put(&req->n, datalen, RTAX_MTU, &mtu, sizeof(mtu)))
2223
0
      return 0;
2224
0
    nl_attr_nest_end(&req->n, nest);
2225
0
  }
2226
2227
  /*
2228
   * Always install blackhole routes without using nexthops, because of
2229
   * the following kernel problems:
2230
   * 1. Kernel nexthops don't suport unreachable/prohibit route types.
2231
   * 2. Blackhole kernel nexthops are deleted when loopback is down.
2232
   */
2233
0
  nexthop = dplane_ctx_get_ng(ctx)->nexthop;
2234
0
  if (nexthop) {
2235
0
    if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2236
0
      nexthop = nexthop->resolved;
2237
2238
0
    if (nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
2239
0
      switch (nexthop->bh_type) {
2240
0
      case BLACKHOLE_ADMINPROHIB:
2241
0
        req->r.rtm_type = RTN_PROHIBIT;
2242
0
        break;
2243
0
      case BLACKHOLE_REJECT:
2244
0
        req->r.rtm_type = RTN_UNREACHABLE;
2245
0
        break;
2246
0
      case BLACKHOLE_UNSPEC:
2247
0
      case BLACKHOLE_NULL:
2248
0
        req->r.rtm_type = RTN_BLACKHOLE;
2249
0
        break;
2250
0
      }
2251
0
      return NLMSG_ALIGN(req->n.nlmsg_len);
2252
0
    }
2253
0
  }
2254
2255
0
  if ((!fpm && kernel_nexthops_supported()
2256
0
       && (!proto_nexthops_only()
2257
0
     || is_proto_nhg(dplane_ctx_get_nhe_id(ctx), 0)))
2258
0
      || (fpm && force_nhg)) {
2259
    /* Kernel supports nexthop objects */
2260
0
    if (IS_ZEBRA_DEBUG_KERNEL)
2261
0
      zlog_debug("%s: %pFX nhg_id is %u", __func__, p,
2262
0
           dplane_ctx_get_nhe_id(ctx));
2263
2264
0
    if (!nl_attr_put32(&req->n, datalen, RTA_NH_ID,
2265
0
           dplane_ctx_get_nhe_id(ctx)))
2266
0
      return 0;
2267
2268
    /* Have to determine src still */
2269
0
    for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2270
0
      if (setsrc)
2271
0
        break;
2272
2273
0
      setsrc = nexthop_set_src(nexthop, p->family, &src);
2274
0
    }
2275
2276
0
    if (setsrc) {
2277
0
      if (p->family == AF_INET) {
2278
0
        if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2279
0
             &src.ipv4, bytelen))
2280
0
          return 0;
2281
0
      } else if (p->family == AF_INET6) {
2282
0
        if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2283
0
             &src.ipv6, bytelen))
2284
0
          return 0;
2285
0
      }
2286
0
    }
2287
2288
0
    return NLMSG_ALIGN(req->n.nlmsg_len);
2289
0
  }
2290
2291
  /* Count overall nexthops so we can decide whether to use singlepath
2292
   * or multipath case.
2293
   */
2294
0
  nexthop_num = 0;
2295
0
  for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2296
0
    if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE))
2297
0
      continue;
2298
0
    if (!NEXTHOP_IS_ACTIVE(nexthop->flags))
2299
0
      continue;
2300
2301
0
    nexthop_num++;
2302
0
  }
2303
2304
  /* Singlepath case. */
2305
0
  if (nexthop_num == 1) {
2306
0
    nexthop_num = 0;
2307
0
    for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2308
0
      if (CHECK_FLAG(nexthop->flags,
2309
0
               NEXTHOP_FLAG_RECURSIVE)) {
2310
2311
0
        if (setsrc)
2312
0
          continue;
2313
2314
0
        setsrc = nexthop_set_src(nexthop, p->family,
2315
0
               &src);
2316
0
        continue;
2317
0
      }
2318
2319
0
      if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
2320
0
        routedesc = nexthop->rparent
2321
0
                ? "recursive, single-path"
2322
0
                : "single-path";
2323
2324
0
        if (!_netlink_set_tag(&req->n, datalen, tag))
2325
0
          return 0;
2326
2327
0
        if (!_netlink_route_build_singlepath(
2328
0
              p, routedesc, bytelen, nexthop,
2329
0
              &req->n, &req->r, datalen, cmd))
2330
0
          return 0;
2331
0
        nexthop_num++;
2332
0
        break;
2333
0
      }
2334
2335
      /*
2336
       * Add encapsulation information when installing via
2337
       * FPM.
2338
       */
2339
0
      if (fpm) {
2340
0
        if (!netlink_route_nexthop_encap(
2341
0
              &req->n, datalen, nexthop))
2342
0
          return 0;
2343
0
      }
2344
0
    }
2345
2346
0
    if (setsrc) {
2347
0
      if (p->family == AF_INET) {
2348
0
        if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2349
0
             &src.ipv4, bytelen))
2350
0
          return 0;
2351
0
      } else if (p->family == AF_INET6) {
2352
0
        if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2353
0
             &src.ipv6, bytelen))
2354
0
          return 0;
2355
0
      }
2356
0
    }
2357
0
  } else {    /* Multipath case */
2358
0
    struct rtattr *nest;
2359
0
    const union g_addr *src1 = NULL;
2360
2361
0
    nest = nl_attr_nest(&req->n, datalen, RTA_MULTIPATH);
2362
0
    if (nest == NULL)
2363
0
      return 0;
2364
2365
0
    nexthop_num = 0;
2366
0
    for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx), nexthop)) {
2367
0
      if (CHECK_FLAG(nexthop->flags,
2368
0
               NEXTHOP_FLAG_RECURSIVE)) {
2369
        /* This only works for IPv4 now */
2370
0
        if (setsrc)
2371
0
          continue;
2372
2373
0
        setsrc = nexthop_set_src(nexthop, p->family,
2374
0
               &src);
2375
0
        continue;
2376
0
      }
2377
2378
0
      if (NEXTHOP_IS_ACTIVE(nexthop->flags)) {
2379
0
        routedesc = nexthop->rparent
2380
0
                ? "recursive, multipath"
2381
0
                : "multipath";
2382
0
        nexthop_num++;
2383
2384
0
        if (!_netlink_route_build_multipath(
2385
0
              p, routedesc, bytelen, nexthop,
2386
0
              &req->n, datalen, &req->r, &src1,
2387
0
              tag))
2388
0
          return 0;
2389
2390
0
        if (!setsrc && src1) {
2391
0
          if (p->family == AF_INET)
2392
0
            src.ipv4 = src1->ipv4;
2393
0
          else if (p->family == AF_INET6)
2394
0
            src.ipv6 = src1->ipv6;
2395
2396
0
          setsrc = 1;
2397
0
        }
2398
0
      }
2399
0
    }
2400
2401
0
    nl_attr_nest_end(&req->n, nest);
2402
2403
    /*
2404
     * Add encapsulation information when installing via
2405
     * FPM.
2406
     */
2407
0
    if (fpm) {
2408
0
      for (ALL_NEXTHOPS_PTR(dplane_ctx_get_ng(ctx),
2409
0
                nexthop)) {
2410
0
        if (CHECK_FLAG(nexthop->flags,
2411
0
                 NEXTHOP_FLAG_RECURSIVE))
2412
0
          continue;
2413
0
        if (!netlink_route_nexthop_encap(
2414
0
              &req->n, datalen, nexthop))
2415
0
          return 0;
2416
0
      }
2417
0
    }
2418
2419
2420
0
    if (setsrc) {
2421
0
      if (p->family == AF_INET) {
2422
0
        if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2423
0
             &src.ipv4, bytelen))
2424
0
          return 0;
2425
0
      } else if (p->family == AF_INET6) {
2426
0
        if (!nl_attr_put(&req->n, datalen, RTA_PREFSRC,
2427
0
             &src.ipv6, bytelen))
2428
0
          return 0;
2429
0
      }
2430
0
      if (IS_ZEBRA_DEBUG_KERNEL)
2431
0
        zlog_debug("Setting source");
2432
0
    }
2433
0
  }
2434
2435
  /* If there is no useful nexthop then return. */
2436
0
  if (nexthop_num == 0) {
2437
0
    if (IS_ZEBRA_DEBUG_KERNEL)
2438
0
      zlog_debug("%s: No useful nexthop.", __func__);
2439
0
  }
2440
2441
0
  return NLMSG_ALIGN(req->n.nlmsg_len);
2442
0
}
2443
2444
int kernel_get_ipmr_sg_stats(struct zebra_vrf *zvrf, void *in)
2445
0
{
2446
0
  uint32_t actual_table;
2447
0
  int suc = 0;
2448
0
  struct mcast_route_data *mr = (struct mcast_route_data *)in;
2449
0
  struct {
2450
0
    struct nlmsghdr n;
2451
0
    struct rtmsg rtm;
2452
0
    char buf[256];
2453
0
  } req;
2454
2455
0
  mroute = mr;
2456
0
  struct zebra_ns *zns;
2457
2458
0
  zns = zvrf->zns;
2459
0
  memset(&req, 0, sizeof(req));
2460
2461
0
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
2462
0
  req.n.nlmsg_flags = NLM_F_REQUEST;
2463
0
  req.n.nlmsg_pid = zns->netlink_cmd.snl.nl_pid;
2464
2465
0
  req.n.nlmsg_type = RTM_GETROUTE;
2466
2467
0
  if (mroute->family == AF_INET) {
2468
0
    req.rtm.rtm_family = RTNL_FAMILY_IPMR;
2469
0
    req.rtm.rtm_dst_len = IPV4_MAX_BITLEN;
2470
0
    req.rtm.rtm_src_len = IPV4_MAX_BITLEN;
2471
2472
0
    nl_attr_put(&req.n, sizeof(req), RTA_SRC,
2473
0
          &mroute->src.ipaddr_v4,
2474
0
          sizeof(mroute->src.ipaddr_v4));
2475
0
    nl_attr_put(&req.n, sizeof(req), RTA_DST,
2476
0
          &mroute->grp.ipaddr_v4,
2477
0
          sizeof(mroute->grp.ipaddr_v4));
2478
0
  } else {
2479
0
    req.rtm.rtm_family = RTNL_FAMILY_IP6MR;
2480
0
    req.rtm.rtm_dst_len = IPV6_MAX_BITLEN;
2481
0
    req.rtm.rtm_src_len = IPV6_MAX_BITLEN;
2482
2483
0
    nl_attr_put(&req.n, sizeof(req), RTA_SRC,
2484
0
          &mroute->src.ipaddr_v6,
2485
0
          sizeof(mroute->src.ipaddr_v6));
2486
0
    nl_attr_put(&req.n, sizeof(req), RTA_DST,
2487
0
          &mroute->grp.ipaddr_v6,
2488
0
          sizeof(mroute->grp.ipaddr_v6));
2489
0
  }
2490
2491
  /*
2492
   * What?
2493
   *
2494
   * So during the namespace cleanup we started storing
2495
   * the zvrf table_id for the default table as RT_TABLE_MAIN
2496
   * which is what the normal routing table for ip routing is.
2497
   * This change caused this to break our lookups of sg data
2498
   * because prior to this change the zvrf->table_id was 0
2499
   * and when the pim multicast kernel code saw a 0,
2500
   * it was auto-translated to RT_TABLE_DEFAULT.  But since
2501
   * we are now passing in RT_TABLE_MAIN there is no auto-translation
2502
   * and the kernel goes screw you and the delicious cookies you
2503
   * are trying to give me.  So now we have this little hack.
2504
   */
2505
0
  if (mroute->family == AF_INET)
2506
0
    actual_table = (zvrf->table_id == RT_TABLE_MAIN)
2507
0
               ? RT_TABLE_DEFAULT
2508
0
               : zvrf->table_id;
2509
0
  else
2510
0
    actual_table = zvrf->table_id;
2511
2512
0
  nl_attr_put32(&req.n, sizeof(req), RTA_TABLE, actual_table);
2513
2514
0
  suc = netlink_talk(netlink_route_change_read_multicast, &req.n,
2515
0
         &zns->netlink_cmd, zns, false);
2516
2517
0
  mroute = NULL;
2518
0
  return suc;
2519
0
}
2520
2521
/* Char length to debug ID with */
2522
#define ID_LENGTH 10
2523
2524
static bool _netlink_nexthop_build_group(struct nlmsghdr *n, size_t req_size,
2525
           uint32_t id,
2526
           const struct nh_grp *z_grp,
2527
           const uint8_t count, bool resilient,
2528
           const struct nhg_resilience *nhgr)
2529
0
{
2530
0
  struct nexthop_grp grp[count];
2531
  /* Need space for max group size, "/", and null term */
2532
0
  char buf[(MULTIPATH_NUM * (ID_LENGTH + 1)) + 1];
2533
0
  char buf1[ID_LENGTH + 2];
2534
2535
0
  buf[0] = '\0';
2536
2537
0
  memset(grp, 0, sizeof(grp));
2538
2539
0
  if (count) {
2540
0
    for (int i = 0; i < count; i++) {
2541
0
      grp[i].id = z_grp[i].id;
2542
0
      grp[i].weight = z_grp[i].weight - 1;
2543
2544
0
      if (IS_ZEBRA_DEBUG_KERNEL) {
2545
0
        if (i == 0)
2546
0
          snprintf(buf, sizeof(buf1), "group %u",
2547
0
             grp[i].id);
2548
0
        else {
2549
0
          snprintf(buf1, sizeof(buf1), "/%u",
2550
0
             grp[i].id);
2551
0
          strlcat(buf, buf1, sizeof(buf));
2552
0
        }
2553
0
      }
2554
0
    }
2555
0
    if (!nl_attr_put(n, req_size, NHA_GROUP, grp,
2556
0
         count * sizeof(*grp)))
2557
0
      return false;
2558
2559
0
    if (resilient) {
2560
0
      struct rtattr *nest;
2561
2562
0
      nest = nl_attr_nest(n, req_size, NHA_RES_GROUP);
2563
2564
0
      nl_attr_put16(n, req_size, NHA_RES_GROUP_BUCKETS,
2565
0
              nhgr->buckets);
2566
0
      nl_attr_put32(n, req_size, NHA_RES_GROUP_IDLE_TIMER,
2567
0
              nhgr->idle_timer * 1000);
2568
0
      nl_attr_put32(n, req_size,
2569
0
              NHA_RES_GROUP_UNBALANCED_TIMER,
2570
0
              nhgr->unbalanced_timer * 1000);
2571
0
      nl_attr_nest_end(n, nest);
2572
2573
0
      nl_attr_put16(n, req_size, NHA_GROUP_TYPE,
2574
0
              NEXTHOP_GRP_TYPE_RES);
2575
0
    }
2576
0
  }
2577
2578
0
  if (IS_ZEBRA_DEBUG_KERNEL)
2579
0
    zlog_debug("%s: ID (%u): %s", __func__, id, buf);
2580
2581
0
  return true;
2582
0
}
2583
2584
/**
2585
 * Next hop packet encoding helper function.
2586
 *
2587
 * \param[in] cmd netlink command.
2588
 * \param[in] ctx dataplane context (information snapshot).
2589
 * \param[out] buf buffer to hold the packet.
2590
 * \param[in] buflen amount of buffer bytes.
2591
 *
2592
 * \returns -1 on failure, 0 when the msg doesn't fit entirely in the buffer
2593
 * otherwise the number of bytes written to buf.
2594
 */
2595
ssize_t netlink_nexthop_msg_encode(uint16_t cmd,
2596
           const struct zebra_dplane_ctx *ctx,
2597
           void *buf, size_t buflen, bool fpm)
2598
0
{
2599
0
  struct {
2600
0
    struct nlmsghdr n;
2601
0
    struct nhmsg nhm;
2602
0
    char buf[];
2603
0
  } *req = buf;
2604
2605
0
  mpls_lse_t out_lse[MPLS_MAX_LABELS];
2606
0
  char label_buf[256];
2607
0
  int num_labels = 0;
2608
0
  uint32_t id = dplane_ctx_get_nhe_id(ctx);
2609
0
  int type = dplane_ctx_get_nhe_type(ctx);
2610
0
  struct rtattr *nest;
2611
0
  uint16_t encap;
2612
0
  struct nlsock *nl =
2613
0
    kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
2614
2615
0
  if (!id) {
2616
0
    flog_err(
2617
0
      EC_ZEBRA_NHG_FIB_UPDATE,
2618
0
      "Failed trying to update a nexthop group in the kernel that does not have an ID");
2619
0
    return -1;
2620
0
  }
2621
2622
  /*
2623
   * Nothing to do if the kernel doesn't support nexthop objects or
2624
   * we dont want to install this type of NHG, but FPM may possible to
2625
   * handle this.
2626
   */
2627
0
  if (!fpm && !kernel_nexthops_supported()) {
2628
0
    if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2629
0
      zlog_debug(
2630
0
        "%s: nhg_id %u (%s): kernel nexthops not supported, ignoring",
2631
0
        __func__, id, zebra_route_string(type));
2632
0
    return 0;
2633
0
  }
2634
2635
0
  if (proto_nexthops_only() && !is_proto_nhg(id, type)) {
2636
0
    if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
2637
0
      zlog_debug(
2638
0
        "%s: nhg_id %u (%s): proto-based nexthops only, ignoring",
2639
0
        __func__, id, zebra_route_string(type));
2640
0
    return 0;
2641
0
  }
2642
2643
0
  label_buf[0] = '\0';
2644
2645
0
  if (buflen < sizeof(*req))
2646
0
    return 0;
2647
2648
0
  memset(req, 0, sizeof(*req));
2649
2650
0
  req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
2651
0
  req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
2652
2653
0
  if (cmd == RTM_NEWNEXTHOP)
2654
0
    req->n.nlmsg_flags |= NLM_F_REPLACE;
2655
2656
0
  req->n.nlmsg_type = cmd;
2657
0
  req->n.nlmsg_pid = nl->snl.nl_pid;
2658
2659
0
  req->nhm.nh_family = AF_UNSPEC;
2660
  /* TODO: Scope? */
2661
2662
0
  if (!nl_attr_put32(&req->n, buflen, NHA_ID, id))
2663
0
    return 0;
2664
2665
0
  if (cmd == RTM_NEWNEXTHOP) {
2666
    /*
2667
     * We distinguish between a "group", which is a collection
2668
     * of ids, and a singleton nexthop with an id. The
2669
     * group is installed as an id that just refers to a list of
2670
     * other ids.
2671
     */
2672
0
    if (dplane_ctx_get_nhe_nh_grp_count(ctx)) {
2673
0
      const struct nexthop_group *nhg;
2674
0
      const struct nhg_resilience *nhgr;
2675
2676
0
      nhg = dplane_ctx_get_nhe_ng(ctx);
2677
0
      nhgr = &nhg->nhgr;
2678
0
      if (!_netlink_nexthop_build_group(
2679
0
            &req->n, buflen, id,
2680
0
            dplane_ctx_get_nhe_nh_grp(ctx),
2681
0
            dplane_ctx_get_nhe_nh_grp_count(ctx),
2682
0
            !!nhgr->buckets, nhgr))
2683
0
        return 0;
2684
0
    } else {
2685
0
      const struct nexthop *nh =
2686
0
        dplane_ctx_get_nhe_ng(ctx)->nexthop;
2687
0
      afi_t afi = dplane_ctx_get_nhe_afi(ctx);
2688
2689
0
      if (afi == AFI_IP)
2690
0
        req->nhm.nh_family = AF_INET;
2691
0
      else if (afi == AFI_IP6)
2692
0
        req->nhm.nh_family = AF_INET6;
2693
2694
0
      switch (nh->type) {
2695
0
      case NEXTHOP_TYPE_IPV4:
2696
0
      case NEXTHOP_TYPE_IPV4_IFINDEX:
2697
0
        if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2698
0
             &nh->gate.ipv4,
2699
0
             IPV4_MAX_BYTELEN))
2700
0
          return 0;
2701
0
        break;
2702
0
      case NEXTHOP_TYPE_IPV6:
2703
0
      case NEXTHOP_TYPE_IPV6_IFINDEX:
2704
0
        if (!nl_attr_put(&req->n, buflen, NHA_GATEWAY,
2705
0
             &nh->gate.ipv6,
2706
0
             IPV6_MAX_BYTELEN))
2707
0
          return 0;
2708
0
        break;
2709
0
      case NEXTHOP_TYPE_BLACKHOLE:
2710
0
        if (!nl_attr_put(&req->n, buflen, NHA_BLACKHOLE,
2711
0
             NULL, 0))
2712
0
          return 0;
2713
        /* Blackhole shouldn't have anymore attributes
2714
         */
2715
0
        goto nexthop_done;
2716
0
      case NEXTHOP_TYPE_IFINDEX:
2717
        /* Don't need anymore info for this */
2718
0
        break;
2719
0
      }
2720
2721
0
      if (!nh->ifindex) {
2722
0
        flog_err(
2723
0
          EC_ZEBRA_NHG_FIB_UPDATE,
2724
0
          "Context received for kernel nexthop update without an interface");
2725
0
        return -1;
2726
0
      }
2727
2728
0
      if (!nl_attr_put32(&req->n, buflen, NHA_OIF,
2729
0
             nh->ifindex))
2730
0
        return 0;
2731
2732
0
      if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ONLINK))
2733
0
        req->nhm.nh_flags |= RTNH_F_ONLINK;
2734
2735
0
      num_labels = build_label_stack(
2736
0
        nh->nh_label, nh->nh_label_type, out_lse,
2737
0
        label_buf, sizeof(label_buf));
2738
2739
0
      if (num_labels && nh->nh_label_type == ZEBRA_LSP_EVPN) {
2740
0
        if (!nl_attr_put16(&req->n, buflen,
2741
0
               NHA_ENCAP_TYPE,
2742
0
               LWTUNNEL_ENCAP_IP))
2743
0
          return 0;
2744
2745
0
        nest = nl_attr_nest(&req->n, buflen, NHA_ENCAP);
2746
0
        if (!nest)
2747
0
          return 0;
2748
2749
0
        if (_netlink_nexthop_encode_dvni_label(
2750
0
              nh, &req->n, out_lse, buflen,
2751
0
              label_buf) == false)
2752
0
          return 0;
2753
2754
0
        nl_attr_nest_end(&req->n, nest);
2755
2756
0
      } else if (num_labels) {
2757
        /* Set the BoS bit */
2758
0
        out_lse[num_labels - 1] |=
2759
0
          htonl(1 << MPLS_LS_S_SHIFT);
2760
2761
        /*
2762
         * TODO: MPLS unsupported for now in kernel.
2763
         */
2764
0
        if (req->nhm.nh_family == AF_MPLS)
2765
0
          goto nexthop_done;
2766
2767
0
        encap = LWTUNNEL_ENCAP_MPLS;
2768
0
        if (!nl_attr_put16(&req->n, buflen,
2769
0
               NHA_ENCAP_TYPE, encap))
2770
0
          return 0;
2771
0
        nest = nl_attr_nest(&req->n, buflen, NHA_ENCAP);
2772
0
        if (!nest)
2773
0
          return 0;
2774
0
        if (!nl_attr_put(
2775
0
              &req->n, buflen, MPLS_IPTUNNEL_DST,
2776
0
              &out_lse,
2777
0
              num_labels * sizeof(mpls_lse_t)))
2778
0
          return 0;
2779
2780
0
        nl_attr_nest_end(&req->n, nest);
2781
0
      }
2782
2783
0
      if (nh->nh_srv6) {
2784
0
        if (nh->nh_srv6->seg6local_action !=
2785
0
            ZEBRA_SEG6_LOCAL_ACTION_UNSPEC) {
2786
0
          uint32_t action;
2787
0
          uint16_t encap;
2788
0
          struct rtattr *nest;
2789
0
          const struct seg6local_context *ctx;
2790
2791
0
          req->nhm.nh_family = AF_INET6;
2792
0
          action = nh->nh_srv6->seg6local_action;
2793
0
          ctx = &nh->nh_srv6->seg6local_ctx;
2794
0
          encap = LWTUNNEL_ENCAP_SEG6_LOCAL;
2795
0
          if (!nl_attr_put(&req->n, buflen,
2796
0
               NHA_ENCAP_TYPE,
2797
0
               &encap,
2798
0
               sizeof(uint16_t)))
2799
0
            return 0;
2800
2801
0
          nest = nl_attr_nest(&req->n, buflen,
2802
0
            NHA_ENCAP | NLA_F_NESTED);
2803
0
          if (!nest)
2804
0
            return 0;
2805
2806
0
          switch (action) {
2807
0
          case SEG6_LOCAL_ACTION_END:
2808
0
            if (!nl_attr_put32(
2809
0
                &req->n, buflen,
2810
0
                SEG6_LOCAL_ACTION,
2811
0
                SEG6_LOCAL_ACTION_END))
2812
0
              return 0;
2813
0
            break;
2814
0
          case SEG6_LOCAL_ACTION_END_X:
2815
0
            if (!nl_attr_put32(
2816
0
                &req->n, buflen,
2817
0
                SEG6_LOCAL_ACTION,
2818
0
                SEG6_LOCAL_ACTION_END_X))
2819
0
              return 0;
2820
0
            if (!nl_attr_put(
2821
0
                &req->n, buflen,
2822
0
                SEG6_LOCAL_NH6, &ctx->nh6,
2823
0
                sizeof(struct in6_addr)))
2824
0
              return 0;
2825
0
            break;
2826
0
          case SEG6_LOCAL_ACTION_END_T:
2827
0
            if (!nl_attr_put32(
2828
0
                &req->n, buflen,
2829
0
                SEG6_LOCAL_ACTION,
2830
0
                SEG6_LOCAL_ACTION_END_T))
2831
0
              return 0;
2832
0
            if (!nl_attr_put32(
2833
0
                &req->n, buflen,
2834
0
                SEG6_LOCAL_TABLE,
2835
0
                ctx->table))
2836
0
              return 0;
2837
0
            break;
2838
0
          case SEG6_LOCAL_ACTION_END_DX4:
2839
0
            if (!nl_attr_put32(
2840
0
                &req->n, buflen,
2841
0
                SEG6_LOCAL_ACTION,
2842
0
                SEG6_LOCAL_ACTION_END_DX4))
2843
0
              return 0;
2844
0
            if (!nl_attr_put(
2845
0
                &req->n, buflen,
2846
0
                SEG6_LOCAL_NH4, &ctx->nh4,
2847
0
                sizeof(struct in_addr)))
2848
0
              return 0;
2849
0
            break;
2850
0
          case SEG6_LOCAL_ACTION_END_DT6:
2851
0
            if (!nl_attr_put32(
2852
0
                &req->n, buflen,
2853
0
                SEG6_LOCAL_ACTION,
2854
0
                SEG6_LOCAL_ACTION_END_DT6))
2855
0
              return 0;
2856
0
            if (!nl_attr_put32(
2857
0
                &req->n, buflen,
2858
0
                SEG6_LOCAL_TABLE,
2859
0
                ctx->table))
2860
0
              return 0;
2861
0
            break;
2862
0
          case SEG6_LOCAL_ACTION_END_DT4:
2863
0
            if (!nl_attr_put32(
2864
0
                  &req->n, buflen,
2865
0
                  SEG6_LOCAL_ACTION,
2866
0
                  SEG6_LOCAL_ACTION_END_DT4))
2867
0
              return 0;
2868
0
            if (!nl_attr_put32(
2869
0
                  &req->n, buflen,
2870
0
                  SEG6_LOCAL_VRFTABLE,
2871
0
                  ctx->table))
2872
0
              return 0;
2873
0
            break;
2874
0
          case SEG6_LOCAL_ACTION_END_DT46:
2875
0
            if (!nl_attr_put32(
2876
0
                  &req->n, buflen,
2877
0
                  SEG6_LOCAL_ACTION,
2878
0
                  SEG6_LOCAL_ACTION_END_DT46))
2879
0
              return 0;
2880
0
            if (!nl_attr_put32(
2881
0
                  &req->n, buflen,
2882
0
                  SEG6_LOCAL_VRFTABLE,
2883
0
                  ctx->table))
2884
0
              return 0;
2885
0
            break;
2886
0
          default:
2887
0
            zlog_err("%s: unsupport seg6local behaviour action=%u",
2888
0
               __func__, action);
2889
0
            return 0;
2890
0
          }
2891
0
          nl_attr_nest_end(&req->n, nest);
2892
0
        }
2893
2894
0
        if (!sid_zero(&nh->nh_srv6->seg6_segs)) {
2895
0
          char tun_buf[4096];
2896
0
          ssize_t tun_len;
2897
0
          struct rtattr *nest;
2898
2899
0
          if (!nl_attr_put16(&req->n, buflen,
2900
0
              NHA_ENCAP_TYPE,
2901
0
              LWTUNNEL_ENCAP_SEG6))
2902
0
            return 0;
2903
0
          nest = nl_attr_nest(&req->n, buflen,
2904
0
              NHA_ENCAP | NLA_F_NESTED);
2905
0
          if (!nest)
2906
0
            return 0;
2907
0
          tun_len = fill_seg6ipt_encap(tun_buf,
2908
0
              sizeof(tun_buf),
2909
0
              &nh->nh_srv6->seg6_segs);
2910
0
          if (tun_len < 0)
2911
0
            return 0;
2912
0
          if (!nl_attr_put(&req->n, buflen,
2913
0
               SEG6_IPTUNNEL_SRH,
2914
0
               tun_buf, tun_len))
2915
0
            return 0;
2916
0
          nl_attr_nest_end(&req->n, nest);
2917
0
        }
2918
0
      }
2919
2920
0
nexthop_done:
2921
2922
0
      if (IS_ZEBRA_DEBUG_KERNEL)
2923
0
        zlog_debug("%s: ID (%u): %pNHv(%d) vrf %s(%u) %s ",
2924
0
             __func__, id, nh, nh->ifindex,
2925
0
             vrf_id_to_name(nh->vrf_id),
2926
0
             nh->vrf_id, label_buf);
2927
0
    }
2928
2929
0
    req->nhm.nh_protocol = zebra2proto(type);
2930
2931
0
  } else if (cmd != RTM_DELNEXTHOP) {
2932
0
    flog_err(
2933
0
      EC_ZEBRA_NHG_FIB_UPDATE,
2934
0
      "Nexthop group kernel update command (%d) does not exist",
2935
0
      cmd);
2936
0
    return -1;
2937
0
  }
2938
2939
0
  if (IS_ZEBRA_DEBUG_KERNEL)
2940
0
    zlog_debug("%s: %s, id=%u", __func__, nl_msg_type_to_str(cmd),
2941
0
         id);
2942
2943
0
  return NLMSG_ALIGN(req->n.nlmsg_len);
2944
0
}
2945
2946
static ssize_t netlink_nexthop_msg_encoder(struct zebra_dplane_ctx *ctx,
2947
             void *buf, size_t buflen)
2948
0
{
2949
0
  enum dplane_op_e op;
2950
0
  int cmd = 0;
2951
2952
0
  op = dplane_ctx_get_op(ctx);
2953
0
  if (op == DPLANE_OP_NH_INSTALL || op == DPLANE_OP_NH_UPDATE)
2954
0
    cmd = RTM_NEWNEXTHOP;
2955
0
  else if (op == DPLANE_OP_NH_DELETE)
2956
0
    cmd = RTM_DELNEXTHOP;
2957
0
  else {
2958
0
    flog_err(EC_ZEBRA_NHG_FIB_UPDATE,
2959
0
       "Context received for kernel nexthop update with incorrect OP code (%u)",
2960
0
       op);
2961
0
    return -1;
2962
0
  }
2963
2964
0
  return netlink_nexthop_msg_encode(cmd, ctx, buf, buflen, false);
2965
0
}
2966
2967
enum netlink_msg_status
2968
netlink_put_nexthop_update_msg(struct nl_batch *bth,
2969
             struct zebra_dplane_ctx *ctx)
2970
0
{
2971
  /* Nothing to do if the kernel doesn't support nexthop objects */
2972
0
  if (!kernel_nexthops_supported())
2973
0
    return FRR_NETLINK_SUCCESS;
2974
2975
0
  return netlink_batch_add_msg(bth, ctx, netlink_nexthop_msg_encoder,
2976
0
             false);
2977
0
}
2978
2979
static ssize_t netlink_newroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2980
              void *buf, size_t buflen)
2981
0
{
2982
0
  return netlink_route_multipath_msg_encode(RTM_NEWROUTE, ctx, buf,
2983
0
              buflen, false, false);
2984
0
}
2985
2986
static ssize_t netlink_delroute_msg_encoder(struct zebra_dplane_ctx *ctx,
2987
              void *buf, size_t buflen)
2988
0
{
2989
0
  return netlink_route_multipath_msg_encode(RTM_DELROUTE, ctx, buf,
2990
0
              buflen, false, false);
2991
0
}
2992
2993
enum netlink_msg_status
2994
netlink_put_route_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
2995
0
{
2996
0
  int cmd;
2997
0
  const struct prefix *p = dplane_ctx_get_dest(ctx);
2998
2999
0
  if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_DELETE) {
3000
0
    cmd = RTM_DELROUTE;
3001
0
  } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_INSTALL) {
3002
0
    cmd = RTM_NEWROUTE;
3003
0
  } else if (dplane_ctx_get_op(ctx) == DPLANE_OP_ROUTE_UPDATE) {
3004
3005
0
    if (p->family == AF_INET || v6_rr_semantics) {
3006
      /* Single 'replace' operation */
3007
3008
      /*
3009
       * With route replace semantics in place
3010
       * for v4 routes and the new route is a system
3011
       * route we do not install anything.
3012
       * The problem here is that the new system
3013
       * route should cause us to withdraw from
3014
       * the kernel the old non-system route
3015
       */
3016
0
      if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx))
3017
0
          && !RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
3018
0
        return netlink_batch_add_msg(
3019
0
          bth, ctx, netlink_delroute_msg_encoder,
3020
0
          true);
3021
0
    } else {
3022
      /*
3023
       * So v6 route replace semantics are not in
3024
       * the kernel at this point as I understand it.
3025
       * so let's do a delete then an add.
3026
       * In the future once v6 route replace semantics
3027
       * are in we can figure out what to do here to
3028
       * allow working with old and new kernels.
3029
       *
3030
       * I'm also intentionally ignoring the failure case
3031
       * of the route delete.  If that happens yeah we're
3032
       * screwed.
3033
       */
3034
0
      if (!RSYSTEM_ROUTE(dplane_ctx_get_old_type(ctx)))
3035
0
        netlink_batch_add_msg(
3036
0
          bth, ctx, netlink_delroute_msg_encoder,
3037
0
          true);
3038
0
    }
3039
3040
0
    cmd = RTM_NEWROUTE;
3041
0
  } else
3042
0
    return FRR_NETLINK_ERROR;
3043
3044
0
  if (RSYSTEM_ROUTE(dplane_ctx_get_type(ctx)))
3045
0
    return FRR_NETLINK_SUCCESS;
3046
3047
0
  return netlink_batch_add_msg(bth, ctx,
3048
0
             cmd == RTM_NEWROUTE
3049
0
               ? netlink_newroute_msg_encoder
3050
0
               : netlink_delroute_msg_encoder,
3051
0
             false);
3052
0
}
3053
3054
/**
3055
 * netlink_nexthop_process_nh() - Parse the gatway/if info from a new nexthop
3056
 *
3057
 * @tb:   Netlink RTA data
3058
 * @family: Address family in the nhmsg
3059
 * @ifp:  Interface connected - this should be NULL, we fill it in
3060
 * @ns_id:  Namspace id
3061
 *
3062
 * Return:  New nexthop
3063
 */
3064
static struct nexthop netlink_nexthop_process_nh(struct rtattr **tb,
3065
             unsigned char family,
3066
             struct interface **ifp,
3067
             ns_id_t ns_id)
3068
0
{
3069
0
  struct nexthop nh = {};
3070
0
  void *gate = NULL;
3071
0
  enum nexthop_types_t type = 0;
3072
0
  int if_index = 0;
3073
0
  size_t sz = 0;
3074
0
  struct interface *ifp_lookup;
3075
3076
0
  if_index = *(int *)RTA_DATA(tb[NHA_OIF]);
3077
3078
3079
0
  if (tb[NHA_GATEWAY]) {
3080
0
    switch (family) {
3081
0
    case AF_INET:
3082
0
      type = NEXTHOP_TYPE_IPV4_IFINDEX;
3083
0
      sz = 4;
3084
0
      break;
3085
0
    case AF_INET6:
3086
0
      type = NEXTHOP_TYPE_IPV6_IFINDEX;
3087
0
      sz = 16;
3088
0
      break;
3089
0
    default:
3090
0
      flog_warn(
3091
0
        EC_ZEBRA_BAD_NHG_MESSAGE,
3092
0
        "Nexthop gateway with bad address family (%d) received from kernel",
3093
0
        family);
3094
0
      return nh;
3095
0
    }
3096
0
    gate = RTA_DATA(tb[NHA_GATEWAY]);
3097
0
  } else
3098
0
    type = NEXTHOP_TYPE_IFINDEX;
3099
3100
0
  if (type)
3101
0
    nh.type = type;
3102
3103
0
  if (gate)
3104
0
    memcpy(&(nh.gate), gate, sz);
3105
3106
0
  if (if_index)
3107
0
    nh.ifindex = if_index;
3108
3109
0
  ifp_lookup =
3110
0
    if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id), nh.ifindex);
3111
3112
0
  if (ifp)
3113
0
    *ifp = ifp_lookup;
3114
0
  if (ifp_lookup)
3115
0
    nh.vrf_id = ifp_lookup->vrf->vrf_id;
3116
0
  else {
3117
0
    flog_warn(
3118
0
      EC_ZEBRA_UNKNOWN_INTERFACE,
3119
0
      "%s: Unknown nexthop interface %u received, defaulting to VRF_DEFAULT",
3120
0
      __func__, nh.ifindex);
3121
3122
0
    nh.vrf_id = VRF_DEFAULT;
3123
0
  }
3124
3125
0
  if (tb[NHA_ENCAP] && tb[NHA_ENCAP_TYPE]) {
3126
0
    uint16_t encap_type = *(uint16_t *)RTA_DATA(tb[NHA_ENCAP_TYPE]);
3127
0
    int num_labels = 0;
3128
3129
0
    mpls_label_t labels[MPLS_MAX_LABELS] = {0};
3130
3131
0
    if (encap_type == LWTUNNEL_ENCAP_MPLS)
3132
0
      num_labels = parse_encap_mpls(tb[NHA_ENCAP], labels);
3133
3134
0
    if (num_labels)
3135
0
      nexthop_add_labels(&nh, ZEBRA_LSP_STATIC, num_labels,
3136
0
             labels);
3137
0
  }
3138
3139
0
  return nh;
3140
0
}
3141
3142
static int netlink_nexthop_process_group(struct rtattr **tb,
3143
           struct nh_grp *z_grp, int z_grp_size,
3144
           struct nhg_resilience *nhgr)
3145
0
{
3146
0
  uint8_t count = 0;
3147
  /* linux/nexthop.h group struct */
3148
0
  struct nexthop_grp *n_grp = NULL;
3149
3150
0
  n_grp = (struct nexthop_grp *)RTA_DATA(tb[NHA_GROUP]);
3151
0
  count = (RTA_PAYLOAD(tb[NHA_GROUP]) / sizeof(*n_grp));
3152
3153
0
  if (!count || (count * sizeof(*n_grp)) != RTA_PAYLOAD(tb[NHA_GROUP])) {
3154
0
    flog_warn(EC_ZEBRA_BAD_NHG_MESSAGE,
3155
0
        "Invalid nexthop group received from the kernel");
3156
0
    return count;
3157
0
  }
3158
3159
0
  for (int i = 0; ((i < count) && (i < z_grp_size)); i++) {
3160
0
    z_grp[i].id = n_grp[i].id;
3161
0
    z_grp[i].weight = n_grp[i].weight + 1;
3162
0
  }
3163
3164
0
  memset(nhgr, 0, sizeof(*nhgr));
3165
0
  if (tb[NHA_RES_GROUP]) {
3166
0
    struct rtattr *tbn[NHA_RES_GROUP_MAX + 1];
3167
0
    struct rtattr *rta;
3168
0
    struct rtattr *res_group = tb[NHA_RES_GROUP];
3169
3170
0
    netlink_parse_rtattr_nested(tbn, NHA_RES_GROUP_MAX, res_group);
3171
3172
0
    if (tbn[NHA_RES_GROUP_BUCKETS]) {
3173
0
      rta = tbn[NHA_RES_GROUP_BUCKETS];
3174
0
      nhgr->buckets = *(uint16_t *)RTA_DATA(rta);
3175
0
    }
3176
3177
0
    if (tbn[NHA_RES_GROUP_IDLE_TIMER]) {
3178
0
      rta = tbn[NHA_RES_GROUP_IDLE_TIMER];
3179
0
      nhgr->idle_timer = *(uint32_t *)RTA_DATA(rta);
3180
0
    }
3181
3182
0
    if (tbn[NHA_RES_GROUP_UNBALANCED_TIMER]) {
3183
0
      rta = tbn[NHA_RES_GROUP_UNBALANCED_TIMER];
3184
0
      nhgr->unbalanced_timer = *(uint32_t *)RTA_DATA(rta);
3185
0
    }
3186
3187
0
    if (tbn[NHA_RES_GROUP_UNBALANCED_TIME]) {
3188
0
      rta = tbn[NHA_RES_GROUP_UNBALANCED_TIME];
3189
0
      nhgr->unbalanced_time = *(uint64_t *)RTA_DATA(rta);
3190
0
    }
3191
0
  }
3192
3193
0
  return count;
3194
0
}
3195
3196
/**
3197
 * netlink_nexthop_change() - Read in change about nexthops from the kernel
3198
 *
3199
 * @h:    Netlink message header
3200
 * @ns_id:  Namspace id
3201
 * @startup:  Are we reading under startup conditions?
3202
 *
3203
 * Return:  Result status
3204
 */
3205
int netlink_nexthop_change(struct nlmsghdr *h, ns_id_t ns_id, int startup)
3206
0
{
3207
0
  int len;
3208
  /* nexthop group id */
3209
0
  uint32_t id;
3210
0
  unsigned char family;
3211
0
  int type;
3212
0
  afi_t afi = AFI_UNSPEC;
3213
0
  vrf_id_t vrf_id = VRF_DEFAULT;
3214
0
  struct interface *ifp = NULL;
3215
0
  struct nhmsg *nhm = NULL;
3216
0
  struct nexthop nh = {};
3217
0
  struct nh_grp grp[MULTIPATH_NUM] = {};
3218
  /* Count of nexthops in group array */
3219
0
  uint8_t grp_count = 0;
3220
0
  struct rtattr *tb[NHA_MAX + 1] = {};
3221
3222
0
  frrtrace(3, frr_zebra, netlink_nexthop_change, h, ns_id, startup);
3223
3224
0
  nhm = NLMSG_DATA(h);
3225
3226
0
  if (ns_id)
3227
0
    vrf_id = ns_id;
3228
3229
0
  if (startup && h->nlmsg_type != RTM_NEWNEXTHOP)
3230
0
    return 0;
3231
3232
0
  len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct nhmsg));
3233
0
  if (len < 0) {
3234
0
    zlog_warn(
3235
0
      "%s: Message received from netlink is of a broken size %d %zu",
3236
0
      __func__, h->nlmsg_len,
3237
0
      (size_t)NLMSG_LENGTH(sizeof(struct nhmsg)));
3238
0
    return -1;
3239
0
  }
3240
3241
0
  netlink_parse_rtattr_flags(tb, NHA_MAX, RTM_NHA(nhm), len,
3242
0
           NLA_F_NESTED);
3243
3244
3245
0
  if (!tb[NHA_ID]) {
3246
0
    flog_warn(
3247
0
      EC_ZEBRA_BAD_NHG_MESSAGE,
3248
0
      "Nexthop group without an ID received from the kernel");
3249
0
    return -1;
3250
0
  }
3251
3252
  /* We use the ID key'd nhg table for kernel updates */
3253
0
  id = *((uint32_t *)RTA_DATA(tb[NHA_ID]));
3254
3255
0
  if (zebra_evpn_mh_is_fdb_nh(id)) {
3256
    /* If this is a L2 NH just ignore it */
3257
0
    if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
3258
0
      zlog_debug("Ignore kernel update (%u) for fdb-nh 0x%x",
3259
0
          h->nlmsg_type, id);
3260
0
    }
3261
0
    return 0;
3262
0
  }
3263
3264
0
  family = nhm->nh_family;
3265
0
  afi = family2afi(family);
3266
3267
0
  type = proto2zebra(nhm->nh_protocol, 0, true);
3268
3269
0
  if (IS_ZEBRA_DEBUG_KERNEL)
3270
0
    zlog_debug("%s ID (%u) %s NS %u",
3271
0
         nl_msg_type_to_str(h->nlmsg_type), id,
3272
0
         nl_family_to_str(family), ns_id);
3273
3274
3275
0
  if (h->nlmsg_type == RTM_NEWNEXTHOP) {
3276
0
    struct nhg_resilience nhgr = {};
3277
3278
0
    if (tb[NHA_GROUP]) {
3279
      /**
3280
       * If this is a group message its only going to have
3281
       * an array of nexthop IDs associated with it
3282
       */
3283
0
      grp_count = netlink_nexthop_process_group(
3284
0
        tb, grp, array_size(grp), &nhgr);
3285
0
    } else {
3286
0
      if (tb[NHA_BLACKHOLE]) {
3287
        /**
3288
         * This nexthop is just for blackhole-ing
3289
         * traffic, it should not have an OIF, GATEWAY,
3290
         * or ENCAP
3291
         */
3292
0
        nh.type = NEXTHOP_TYPE_BLACKHOLE;
3293
0
        nh.bh_type = BLACKHOLE_UNSPEC;
3294
0
      } else if (tb[NHA_OIF])
3295
        /**
3296
         * This is a true new nexthop, so we need
3297
         * to parse the gateway and device info
3298
         */
3299
0
        nh = netlink_nexthop_process_nh(tb, family,
3300
0
                &ifp, ns_id);
3301
0
      else {
3302
3303
0
        flog_warn(
3304
0
          EC_ZEBRA_BAD_NHG_MESSAGE,
3305
0
          "Invalid Nexthop message received from the kernel with ID (%u)",
3306
0
          id);
3307
0
        return -1;
3308
0
      }
3309
0
      SET_FLAG(nh.flags, NEXTHOP_FLAG_ACTIVE);
3310
0
      if (nhm->nh_flags & RTNH_F_ONLINK)
3311
0
        SET_FLAG(nh.flags, NEXTHOP_FLAG_ONLINK);
3312
0
      vrf_id = nh.vrf_id;
3313
0
    }
3314
3315
0
    if (zebra_nhg_kernel_find(id, &nh, grp, grp_count, vrf_id, afi,
3316
0
            type, startup, &nhgr))
3317
0
      return -1;
3318
3319
0
  } else if (h->nlmsg_type == RTM_DELNEXTHOP)
3320
0
    zebra_nhg_kernel_del(id, vrf_id);
3321
3322
0
  return 0;
3323
0
}
3324
3325
/**
3326
 * netlink_request_nexthop() - Request nextop information from the kernel
3327
 * @zns:  Zebra namespace
3328
 * @family: AF_* netlink family
3329
 * @type: RTM_* route type
3330
 *
3331
 * Return:  Result status
3332
 */
3333
static int netlink_request_nexthop(struct zebra_ns *zns, int family, int type)
3334
1
{
3335
1
  struct {
3336
1
    struct nlmsghdr n;
3337
1
    struct nhmsg nhm;
3338
1
  } req;
3339
3340
  /* Form the request, specifying filter (rtattr) if needed. */
3341
1
  memset(&req, 0, sizeof(req));
3342
1
  req.n.nlmsg_type = type;
3343
1
  req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3344
1
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
3345
1
  req.nhm.nh_family = family;
3346
3347
1
  return netlink_request(&zns->netlink_cmd, &req);
3348
1
}
3349
3350
3351
/**
3352
 * netlink_nexthop_read() - Nexthop read function using netlink interface
3353
 *
3354
 * @zns:  Zebra name space
3355
 *
3356
 * Return:  Result status
3357
 * Only called at bootstrap time.
3358
 */
3359
int netlink_nexthop_read(struct zebra_ns *zns)
3360
1
{
3361
1
  int ret;
3362
1
  struct zebra_dplane_info dp_info;
3363
3364
1
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3365
3366
  /* Get nexthop objects */
3367
1
  ret = netlink_request_nexthop(zns, AF_UNSPEC, RTM_GETNEXTHOP);
3368
1
  if (ret < 0)
3369
1
    return ret;
3370
0
  ret = netlink_parse_info(netlink_nexthop_change, &zns->netlink_cmd,
3371
0
         &dp_info, 0, true);
3372
3373
0
  if (!ret)
3374
    /* If we succesfully read in nexthop objects,
3375
     * this kernel must support them.
3376
     */
3377
0
    supports_nh = true;
3378
0
  if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_NHG)
3379
0
    zlog_debug("Nexthop objects %ssupported on this kernel",
3380
0
         supports_nh ? "" : "not ");
3381
3382
0
  zebra_router_set_supports_nhgs(supports_nh);
3383
3384
0
  return ret;
3385
1
}
3386
3387
3388
int kernel_neigh_update(int add, int ifindex, void *addr, char *lla, int llalen,
3389
      ns_id_t ns_id, uint8_t family, bool permanent)
3390
0
{
3391
0
  return netlink_neigh_update(add ? RTM_NEWNEIGH : RTM_DELNEIGH, ifindex,
3392
0
            addr, lla, llalen, ns_id, family, permanent,
3393
0
            RTPROT_ZEBRA);
3394
0
}
3395
3396
/**
3397
 * netlink_neigh_update_msg_encode() - Common helper api for encoding
3398
 * evpn neighbor update as netlink messages using dataplane context object.
3399
 * Here, a neighbor refers to a bridge forwarding database entry for
3400
 * either unicast forwarding or head-end replication or an IP neighbor
3401
 * entry.
3402
 * @ctx:    Dataplane context
3403
 * @cmd:    Netlink command (RTM_NEWNEIGH or RTM_DELNEIGH)
3404
 * @lla:    A pointer to neighbor cache link layer address
3405
 * @llalen:   Length of the pointer to neighbor cache link layer
3406
 * address
3407
 * @ip:   A neighbor cache n/w layer destination address
3408
 *      In the case of bridge FDB, this represnts the remote
3409
 *      VTEP IP.
3410
 * @replace_obj:  Whether NEW request should replace existing object or
3411
 *      add to the end of the list
3412
 * @family:   AF_* netlink family
3413
 * @type:   RTN_* route type
3414
 * @flags:    NTF_* flags
3415
 * @state:    NUD_* states
3416
 * @data:   data buffer pointer
3417
 * @datalen:    total amount of data buffer space
3418
 * @protocol:   protocol information
3419
 *
3420
 * Return:    0 when the msg doesn't fit entirely in the buffer
3421
 *        otherwise the number of bytes written to buf.
3422
 */
3423
static ssize_t netlink_neigh_update_msg_encode(
3424
  const struct zebra_dplane_ctx *ctx, int cmd, const void *lla,
3425
  int llalen, const struct ipaddr *ip, bool replace_obj, uint8_t family,
3426
  uint8_t type, uint8_t flags, uint16_t state, uint32_t nhg_id, bool nfy,
3427
  uint8_t nfy_flags, bool ext, uint32_t ext_flags, void *data,
3428
  size_t datalen, uint8_t protocol)
3429
0
{
3430
0
  struct {
3431
0
    struct nlmsghdr n;
3432
0
    struct ndmsg ndm;
3433
0
    char buf[];
3434
0
  } *req = data;
3435
0
  int ipa_len;
3436
0
  enum dplane_op_e op;
3437
3438
0
  if (datalen < sizeof(*req))
3439
0
    return 0;
3440
0
  memset(req, 0, sizeof(*req));
3441
3442
0
  op = dplane_ctx_get_op(ctx);
3443
3444
0
  req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3445
0
  req->n.nlmsg_flags = NLM_F_REQUEST;
3446
0
  if (cmd == RTM_NEWNEIGH)
3447
0
    req->n.nlmsg_flags |=
3448
0
      NLM_F_CREATE
3449
0
      | (replace_obj ? NLM_F_REPLACE : NLM_F_APPEND);
3450
0
  req->n.nlmsg_type = cmd;
3451
0
  req->ndm.ndm_family = family;
3452
0
  req->ndm.ndm_type = type;
3453
0
  req->ndm.ndm_state = state;
3454
0
  req->ndm.ndm_flags = flags;
3455
0
  req->ndm.ndm_ifindex = dplane_ctx_get_ifindex(ctx);
3456
3457
0
  if (!nl_attr_put(&req->n, datalen, NDA_PROTOCOL, &protocol,
3458
0
       sizeof(protocol)))
3459
0
    return 0;
3460
3461
0
  if (lla) {
3462
0
    if (!nl_attr_put(&req->n, datalen, NDA_LLADDR, lla, llalen))
3463
0
      return 0;
3464
0
  }
3465
3466
0
  if (nfy) {
3467
0
    struct rtattr *nest;
3468
3469
0
    nest = nl_attr_nest(&req->n, datalen,
3470
0
            NDA_FDB_EXT_ATTRS | NLA_F_NESTED);
3471
0
    if (!nest)
3472
0
      return 0;
3473
3474
0
    if (!nl_attr_put(&req->n, datalen, NFEA_ACTIVITY_NOTIFY,
3475
0
         &nfy_flags, sizeof(nfy_flags)))
3476
0
      return 0;
3477
0
    if (!nl_attr_put(&req->n, datalen, NFEA_DONT_REFRESH, NULL, 0))
3478
0
      return 0;
3479
3480
0
    nl_attr_nest_end(&req->n, nest);
3481
0
  }
3482
3483
3484
0
  if (ext) {
3485
0
    if (!nl_attr_put(&req->n, datalen, NDA_EXT_FLAGS, &ext_flags,
3486
0
         sizeof(ext_flags)))
3487
0
      return 0;
3488
0
  }
3489
3490
0
  if (nhg_id) {
3491
0
    if (!nl_attr_put32(&req->n, datalen, NDA_NH_ID, nhg_id))
3492
0
      return 0;
3493
0
  } else {
3494
0
    ipa_len =
3495
0
      IS_IPADDR_V4(ip) ? IPV4_MAX_BYTELEN : IPV6_MAX_BYTELEN;
3496
0
    if (!nl_attr_put(&req->n, datalen, NDA_DST, &ip->ip.addr,
3497
0
         ipa_len))
3498
0
      return 0;
3499
0
  }
3500
3501
0
  if (op == DPLANE_OP_MAC_INSTALL || op == DPLANE_OP_MAC_DELETE) {
3502
0
    vlanid_t vid = dplane_ctx_mac_get_vlan(ctx);
3503
0
    vni_t vni = dplane_ctx_mac_get_vni(ctx);
3504
3505
0
    if (vid > 0) {
3506
0
      if (!nl_attr_put16(&req->n, datalen, NDA_VLAN, vid))
3507
0
        return 0;
3508
0
    }
3509
3510
0
    if (vni > 0) {
3511
0
      if (!nl_attr_put32(&req->n, datalen, NDA_SRC_VNI, vni))
3512
0
        return 0;
3513
0
    }
3514
3515
0
    if (!nl_attr_put32(&req->n, datalen, NDA_MASTER,
3516
0
           dplane_ctx_mac_get_br_ifindex(ctx)))
3517
0
      return 0;
3518
0
  }
3519
3520
0
  if (op == DPLANE_OP_VTEP_ADD || op == DPLANE_OP_VTEP_DELETE) {
3521
0
    vni_t vni = dplane_ctx_neigh_get_vni(ctx);
3522
3523
0
    if (vni > 0) {
3524
0
      if (!nl_attr_put32(&req->n, datalen, NDA_SRC_VNI, vni))
3525
0
        return 0;
3526
0
    }
3527
0
  }
3528
3529
0
  return NLMSG_ALIGN(req->n.nlmsg_len);
3530
0
}
3531
3532
/*
3533
 * Add remote VTEP to the flood list for this VxLAN interface (VNI). This
3534
 * is done by adding an FDB entry with a MAC of 00:00:00:00:00:00.
3535
 */
3536
static ssize_t
3537
netlink_vxlan_flood_update_ctx(const struct zebra_dplane_ctx *ctx, int cmd,
3538
             void *buf, size_t buflen)
3539
0
{
3540
0
  struct ethaddr dst_mac = {.octet = {0}};
3541
0
  int proto = RTPROT_ZEBRA;
3542
3543
0
  if (dplane_ctx_get_type(ctx) != 0)
3544
0
    proto = zebra2proto(dplane_ctx_get_type(ctx));
3545
3546
0
  return netlink_neigh_update_msg_encode(
3547
0
    ctx, cmd, (const void *)&dst_mac, ETH_ALEN,
3548
0
    dplane_ctx_neigh_get_ipaddr(ctx), false, PF_BRIDGE, 0, NTF_SELF,
3549
0
    (NUD_NOARP | NUD_PERMANENT), 0 /*nhg*/, false /*nfy*/,
3550
0
    0 /*nfy_flags*/, false /*ext*/, 0 /*ext_flags*/, buf, buflen,
3551
0
    proto);
3552
0
}
3553
3554
#ifndef NDA_RTA
3555
#define NDA_RTA(r)                                                             \
3556
0
  ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
3557
#endif
3558
3559
static int netlink_macfdb_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
3560
0
{
3561
0
  struct ndmsg *ndm;
3562
0
  struct interface *ifp;
3563
0
  struct zebra_if *zif;
3564
0
  struct rtattr *tb[NDA_MAX + 1];
3565
0
  struct interface *br_if;
3566
0
  struct ethaddr mac;
3567
0
  vlanid_t vid = 0;
3568
0
  struct in_addr vtep_ip;
3569
0
  int vid_present = 0, dst_present = 0;
3570
0
  char vid_buf[20];
3571
0
  char dst_buf[30];
3572
0
  bool sticky;
3573
0
  bool local_inactive = false;
3574
0
  bool dp_static = false;
3575
0
  vni_t vni = 0;
3576
0
  uint32_t nhg_id = 0;
3577
0
  bool vni_mcast_grp = false;
3578
3579
0
  ndm = NLMSG_DATA(h);
3580
3581
  /* We only process macfdb notifications if EVPN is enabled */
3582
0
  if (!is_evpn_enabled())
3583
0
    return 0;
3584
3585
  /* Parse attributes and extract fields of interest. Do basic
3586
   * validation of the fields.
3587
   */
3588
0
  netlink_parse_rtattr_flags(tb, NDA_MAX, NDA_RTA(ndm), len,
3589
0
           NLA_F_NESTED);
3590
3591
0
  if (!tb[NDA_LLADDR]) {
3592
0
    if (IS_ZEBRA_DEBUG_KERNEL)
3593
0
      zlog_debug("%s AF_BRIDGE IF %u - no LLADDR",
3594
0
           nl_msg_type_to_str(h->nlmsg_type),
3595
0
           ndm->ndm_ifindex);
3596
0
    return 0;
3597
0
  }
3598
3599
0
  if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
3600
0
    if (IS_ZEBRA_DEBUG_KERNEL)
3601
0
      zlog_debug(
3602
0
        "%s AF_BRIDGE IF %u - LLADDR is not MAC, len %lu",
3603
0
        nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex,
3604
0
        (unsigned long)RTA_PAYLOAD(tb[NDA_LLADDR]));
3605
0
    return 0;
3606
0
  }
3607
3608
0
  memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
3609
3610
0
  if (tb[NDA_VLAN]) {
3611
0
    vid_present = 1;
3612
0
    vid = *(uint16_t *)RTA_DATA(tb[NDA_VLAN]);
3613
0
    snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
3614
0
  }
3615
3616
0
  if (tb[NDA_DST]) {
3617
    /* TODO: Only IPv4 supported now. */
3618
0
    dst_present = 1;
3619
0
    memcpy(&vtep_ip.s_addr, RTA_DATA(tb[NDA_DST]),
3620
0
           IPV4_MAX_BYTELEN);
3621
0
    snprintfrr(dst_buf, sizeof(dst_buf), " dst %pI4",
3622
0
         &vtep_ip);
3623
0
  } else
3624
0
    memset(&vtep_ip, 0, sizeof(vtep_ip));
3625
3626
0
  if (tb[NDA_NH_ID])
3627
0
    nhg_id = *(uint32_t *)RTA_DATA(tb[NDA_NH_ID]);
3628
3629
0
  if (ndm->ndm_state & NUD_STALE)
3630
0
    local_inactive = true;
3631
3632
0
  if (tb[NDA_FDB_EXT_ATTRS]) {
3633
0
    struct rtattr *attr = tb[NDA_FDB_EXT_ATTRS];
3634
0
    struct rtattr *nfea_tb[NFEA_MAX + 1] = {0};
3635
3636
0
    netlink_parse_rtattr_nested(nfea_tb, NFEA_MAX, attr);
3637
0
    if (nfea_tb[NFEA_ACTIVITY_NOTIFY]) {
3638
0
      uint8_t nfy_flags;
3639
3640
0
      nfy_flags = *(uint8_t *)RTA_DATA(
3641
0
        nfea_tb[NFEA_ACTIVITY_NOTIFY]);
3642
0
      if (nfy_flags & FDB_NOTIFY_BIT)
3643
0
        dp_static = true;
3644
0
      if (nfy_flags & FDB_NOTIFY_INACTIVE_BIT)
3645
0
        local_inactive = true;
3646
0
    }
3647
0
  }
3648
3649
0
  if (tb[NDA_SRC_VNI])
3650
0
    vni = *(vni_t *)RTA_DATA(tb[NDA_SRC_VNI]);
3651
3652
0
  if (IS_ZEBRA_DEBUG_KERNEL)
3653
0
    zlog_debug(
3654
0
      "Rx %s AF_BRIDGE IF %u%s st 0x%x fl 0x%x MAC %pEA%s nhg %d vni %d",
3655
0
      nl_msg_type_to_str(h->nlmsg_type), ndm->ndm_ifindex,
3656
0
      vid_present ? vid_buf : "", ndm->ndm_state,
3657
0
      ndm->ndm_flags, &mac, dst_present ? dst_buf : "",
3658
0
      nhg_id, vni);
3659
3660
  /* The interface should exist. */
3661
0
  ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
3662
0
          ndm->ndm_ifindex);
3663
0
  if (!ifp || !ifp->info)
3664
0
    return 0;
3665
3666
  /* The interface should be something we're interested in. */
3667
0
  if (!IS_ZEBRA_IF_BRIDGE_SLAVE(ifp))
3668
0
    return 0;
3669
3670
0
  zif = (struct zebra_if *)ifp->info;
3671
0
  if ((br_if = zif->brslave_info.br_if) == NULL) {
3672
0
    if (IS_ZEBRA_DEBUG_KERNEL)
3673
0
      zlog_debug(
3674
0
        "%s AF_BRIDGE IF %s(%u) brIF %u - no bridge master",
3675
0
        nl_msg_type_to_str(h->nlmsg_type), ifp->name,
3676
0
        ndm->ndm_ifindex,
3677
0
        zif->brslave_info.bridge_ifindex);
3678
0
    return 0;
3679
0
  }
3680
3681
  /* For per vni device, vni comes from device itself */
3682
0
  if (IS_ZEBRA_IF_VXLAN(ifp) && IS_ZEBRA_VXLAN_IF_VNI(zif)) {
3683
0
    struct zebra_vxlan_vni *vnip;
3684
3685
0
    vnip = zebra_vxlan_if_vni_find(zif, 0);
3686
0
    vni = vnip->vni;
3687
0
  }
3688
3689
0
  sticky = !!(ndm->ndm_flags & NTF_STICKY);
3690
3691
0
  if (filter_vlan && vid != filter_vlan) {
3692
0
    if (IS_ZEBRA_DEBUG_KERNEL)
3693
0
      zlog_debug("        Filtered due to filter vlan: %d",
3694
0
           filter_vlan);
3695
0
    return 0;
3696
0
  }
3697
3698
  /*
3699
   * Check if this is a mcast group update (svd case)
3700
   */
3701
0
  vni_mcast_grp = is_mac_vni_mcast_group(&mac, vni, vtep_ip);
3702
3703
  /* If add or update, do accordingly if learnt on a "local" interface; if
3704
   * the notification is over VxLAN, this has to be related to
3705
   * multi-homing,
3706
   * so perform an implicit delete of any local entry (if it exists).
3707
   */
3708
0
  if (h->nlmsg_type == RTM_NEWNEIGH) {
3709
                /* Drop "permanent" entries. */
3710
0
    if (!vni_mcast_grp && (ndm->ndm_state & NUD_PERMANENT)) {
3711
0
      if (IS_ZEBRA_DEBUG_KERNEL)
3712
0
        zlog_debug(
3713
0
          "        Dropping entry because of NUD_PERMANENT");
3714
0
      return 0;
3715
0
    }
3716
3717
0
    if (IS_ZEBRA_IF_VXLAN(ifp)) {
3718
0
      if (!dst_present)
3719
0
        return 0;
3720
3721
0
      if (vni_mcast_grp)
3722
0
        return zebra_vxlan_if_vni_mcast_group_add_update(
3723
0
          ifp, vni, &vtep_ip);
3724
3725
0
      return zebra_vxlan_dp_network_mac_add(
3726
0
        ifp, br_if, &mac, vid, vni, nhg_id, sticky,
3727
0
        !!(ndm->ndm_flags & NTF_EXT_LEARNED));
3728
0
    }
3729
3730
0
    return zebra_vxlan_local_mac_add_update(ifp, br_if, &mac, vid,
3731
0
        sticky, local_inactive, dp_static);
3732
0
  }
3733
3734
  /* This is a delete notification.
3735
   * Ignore the notification with IP dest as it may just signify that the
3736
   * MAC has moved from remote to local. The exception is the special
3737
   * all-zeros MAC that represents the BUM flooding entry; we may have
3738
   * to readd it. Otherwise,
3739
   *  1. For a MAC over VxLan, check if it needs to be refreshed(readded)
3740
   *  2. For a MAC over "local" interface, delete the mac
3741
   * Note: We will get notifications from both bridge driver and VxLAN
3742
   * driver.
3743
   */
3744
0
  if (nhg_id)
3745
0
    return 0;
3746
3747
0
  if (dst_present) {
3748
0
    if (vni_mcast_grp)
3749
0
      return zebra_vxlan_if_vni_mcast_group_del(ifp, vni,
3750
0
                  &vtep_ip);
3751
3752
0
    if (is_zero_mac(&mac) && vni)
3753
0
      return zebra_vxlan_check_readd_vtep(ifp, vni, vtep_ip);
3754
3755
0
    return 0;
3756
0
  }
3757
3758
0
  if (IS_ZEBRA_IF_VXLAN(ifp))
3759
0
    return 0;
3760
3761
0
  return zebra_vxlan_local_mac_del(ifp, br_if, &mac, vid);
3762
0
}
3763
3764
static int netlink_macfdb_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
3765
0
{
3766
0
  int len;
3767
0
  struct ndmsg *ndm;
3768
3769
0
  if (h->nlmsg_type != RTM_NEWNEIGH)
3770
0
    return 0;
3771
3772
  /* Length validity. */
3773
0
  len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
3774
0
  if (len < 0)
3775
0
    return -1;
3776
3777
  /* We are interested only in AF_BRIDGE notifications. */
3778
0
  ndm = NLMSG_DATA(h);
3779
0
  if (ndm->ndm_family != AF_BRIDGE)
3780
0
    return 0;
3781
3782
0
  return netlink_macfdb_change(h, len, ns_id);
3783
0
}
3784
3785
/* Request for MAC FDB information from the kernel */
3786
static int netlink_request_macs(struct nlsock *netlink_cmd, int family,
3787
        int type, ifindex_t master_ifindex)
3788
1
{
3789
1
  struct {
3790
1
    struct nlmsghdr n;
3791
1
    struct ifinfomsg ifm;
3792
1
    char buf[256];
3793
1
  } req;
3794
3795
  /* Form the request, specifying filter (rtattr) if needed. */
3796
1
  memset(&req, 0, sizeof(req));
3797
1
  req.n.nlmsg_type = type;
3798
1
  req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
3799
1
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
3800
1
  req.ifm.ifi_family = family;
3801
1
  if (master_ifindex)
3802
0
    nl_attr_put32(&req.n, sizeof(req), IFLA_MASTER, master_ifindex);
3803
3804
1
  return netlink_request(netlink_cmd, &req);
3805
1
}
3806
3807
/*
3808
 * MAC forwarding database read using netlink interface. This is invoked
3809
 * at startup.
3810
 */
3811
int netlink_macfdb_read(struct zebra_ns *zns)
3812
1
{
3813
1
  int ret;
3814
1
  struct zebra_dplane_info dp_info;
3815
3816
1
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3817
3818
  /* Get bridge FDB table. */
3819
1
  ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3820
1
           0);
3821
1
  if (ret < 0)
3822
1
    return ret;
3823
  /* We are reading entire table. */
3824
0
  filter_vlan = 0;
3825
0
  ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3826
0
         &dp_info, 0, true);
3827
3828
0
  return ret;
3829
1
}
3830
3831
/*
3832
 * MAC forwarding database read using netlink interface. This is for a
3833
 * specific bridge and matching specific access VLAN (if VLAN-aware bridge).
3834
 */
3835
int netlink_macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp,
3836
           struct interface *br_if, vlanid_t vid)
3837
0
{
3838
0
  struct zebra_if *br_zif;
3839
0
  struct zebra_dplane_info dp_info;
3840
0
  int ret = 0;
3841
3842
0
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3843
3844
  /* Save VLAN we're filtering on, if needed. */
3845
0
  br_zif = (struct zebra_if *)br_if->info;
3846
0
  if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(br_zif))
3847
0
    filter_vlan = vid;
3848
3849
  /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3850
   */
3851
0
  ret = netlink_request_macs(&zns->netlink_cmd, AF_BRIDGE, RTM_GETNEIGH,
3852
0
           br_if->ifindex);
3853
0
  if (ret < 0)
3854
0
    return ret;
3855
0
  ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3856
0
         &dp_info, 0, false);
3857
3858
  /* Reset VLAN filter. */
3859
0
  filter_vlan = 0;
3860
0
  return ret;
3861
0
}
3862
3863
3864
/* Request for MAC FDB for a specific MAC address in VLAN from the kernel */
3865
static int netlink_request_specific_mac(struct zebra_ns *zns, int family,
3866
          int type, struct interface *ifp,
3867
          const struct ethaddr *mac, vlanid_t vid,
3868
          vni_t vni, uint8_t flags)
3869
0
{
3870
0
  struct {
3871
0
    struct nlmsghdr n;
3872
0
    struct ndmsg ndm;
3873
0
    char buf[256];
3874
0
  } req;
3875
0
  struct zebra_if *zif;
3876
3877
0
  memset(&req, 0, sizeof(req));
3878
0
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
3879
0
  req.n.nlmsg_type = type;  /* RTM_GETNEIGH */
3880
0
  req.n.nlmsg_flags = NLM_F_REQUEST;
3881
0
  req.ndm.ndm_family = family;  /* AF_BRIDGE */
3882
0
  req.ndm.ndm_flags = flags;
3883
  /* req.ndm.ndm_state = NUD_REACHABLE; */
3884
3885
0
  nl_attr_put(&req.n, sizeof(req), NDA_LLADDR, mac, 6);
3886
3887
0
  zif = (struct zebra_if *)ifp->info;
3888
  /* Is this a read on a VXLAN interface? */
3889
0
  if (IS_ZEBRA_IF_VXLAN(ifp)) {
3890
0
    nl_attr_put32(&req.n, sizeof(req), NDA_VNI, vni);
3891
    /* TBD: Why is ifindex not filled in the non-vxlan case? */
3892
0
    req.ndm.ndm_ifindex = ifp->ifindex;
3893
0
  } else {
3894
0
    if (IS_ZEBRA_IF_BRIDGE_VLAN_AWARE(zif) && vid > 0)
3895
0
      nl_attr_put16(&req.n, sizeof(req), NDA_VLAN, vid);
3896
0
    nl_attr_put32(&req.n, sizeof(req), NDA_MASTER, ifp->ifindex);
3897
0
  }
3898
3899
0
  if (IS_ZEBRA_DEBUG_KERNEL)
3900
0
    zlog_debug("Tx %s %s IF %s(%u) MAC %pEA vid %u vni %u",
3901
0
         nl_msg_type_to_str(type),
3902
0
         nl_family_to_str(req.ndm.ndm_family), ifp->name,
3903
0
         ifp->ifindex, mac, vid, vni);
3904
3905
0
  return netlink_request(&zns->netlink_cmd, &req);
3906
0
}
3907
3908
int netlink_macfdb_read_specific_mac(struct zebra_ns *zns,
3909
             struct interface *br_if,
3910
             const struct ethaddr *mac, vlanid_t vid)
3911
0
{
3912
0
  int ret = 0;
3913
0
  struct zebra_dplane_info dp_info;
3914
3915
0
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3916
3917
  /* Get bridge FDB table for specific bridge - we do the VLAN filtering.
3918
   */
3919
0
  ret = netlink_request_specific_mac(zns, AF_BRIDGE, RTM_GETNEIGH, br_if,
3920
0
             mac, vid, 0, 0);
3921
0
  if (ret < 0)
3922
0
    return ret;
3923
3924
0
  ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3925
0
         &dp_info, 1, 0);
3926
3927
0
  return ret;
3928
0
}
3929
3930
int netlink_macfdb_read_mcast_for_vni(struct zebra_ns *zns,
3931
              struct interface *ifp, vni_t vni)
3932
0
{
3933
0
  struct zebra_if *zif;
3934
0
  struct ethaddr mac = {.octet = {0}};
3935
0
  struct zebra_dplane_info dp_info;
3936
0
  int ret = 0;
3937
3938
0
  zif = ifp->info;
3939
0
  if (IS_ZEBRA_VXLAN_IF_VNI(zif))
3940
0
    return 0;
3941
3942
0
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
3943
3944
  /* Get specific FDB entry for BUM handling, if any */
3945
0
  ret = netlink_request_specific_mac(zns, AF_BRIDGE, RTM_GETNEIGH, ifp,
3946
0
             &mac, 0, vni, NTF_SELF);
3947
0
  if (ret < 0)
3948
0
    return ret;
3949
3950
0
  ret = netlink_parse_info(netlink_macfdb_table, &zns->netlink_cmd,
3951
0
         &dp_info, 1, false);
3952
3953
0
  return ret;
3954
0
}
3955
3956
/*
3957
 * Netlink-specific handler for MAC updates using dataplane context object.
3958
 */
3959
ssize_t netlink_macfdb_update_ctx(struct zebra_dplane_ctx *ctx, void *data,
3960
          size_t datalen)
3961
0
{
3962
0
  struct ipaddr vtep_ip;
3963
0
  vlanid_t vid;
3964
0
  ssize_t total;
3965
0
  int cmd;
3966
0
  uint8_t flags;
3967
0
  uint16_t state;
3968
0
  uint32_t nhg_id;
3969
0
  uint32_t update_flags;
3970
0
  bool nfy = false;
3971
0
  uint8_t nfy_flags = 0;
3972
0
  int proto = RTPROT_ZEBRA;
3973
3974
0
  if (dplane_ctx_get_type(ctx) != 0)
3975
0
    proto = zebra2proto(dplane_ctx_get_type(ctx));
3976
3977
0
  cmd = dplane_ctx_get_op(ctx) == DPLANE_OP_MAC_INSTALL
3978
0
        ? RTM_NEWNEIGH : RTM_DELNEIGH;
3979
3980
0
  flags = NTF_MASTER;
3981
0
  state = NUD_REACHABLE;
3982
3983
0
  update_flags = dplane_ctx_mac_get_update_flags(ctx);
3984
0
  if (update_flags & DPLANE_MAC_REMOTE) {
3985
0
    flags |= NTF_SELF;
3986
0
    if (dplane_ctx_mac_is_sticky(ctx)) {
3987
      /* NUD_NOARP prevents the entry from expiring */
3988
0
      state |= NUD_NOARP;
3989
      /* sticky the entry from moving */
3990
0
      flags |= NTF_STICKY;
3991
0
    } else {
3992
0
      flags |= NTF_EXT_LEARNED;
3993
0
    }
3994
    /* if it was static-local previously we need to clear the
3995
     * notify flags on replace with remote
3996
     */
3997
0
    if (update_flags & DPLANE_MAC_WAS_STATIC)
3998
0
      nfy = true;
3999
0
  } else {
4000
    /* local mac */
4001
0
    if (update_flags & DPLANE_MAC_SET_STATIC) {
4002
0
      nfy_flags |= FDB_NOTIFY_BIT;
4003
0
      state |= NUD_NOARP;
4004
0
    }
4005
4006
0
    if (update_flags & DPLANE_MAC_SET_INACTIVE)
4007
0
      nfy_flags |= FDB_NOTIFY_INACTIVE_BIT;
4008
4009
0
    nfy = true;
4010
0
  }
4011
4012
0
  nhg_id = dplane_ctx_mac_get_nhg_id(ctx);
4013
0
  vtep_ip.ipaddr_v4 = *(dplane_ctx_mac_get_vtep_ip(ctx));
4014
0
  SET_IPADDR_V4(&vtep_ip);
4015
4016
0
  if (IS_ZEBRA_DEBUG_KERNEL) {
4017
0
    char vid_buf[20];
4018
0
    const struct ethaddr *mac = dplane_ctx_mac_get_addr(ctx);
4019
4020
0
    vid = dplane_ctx_mac_get_vlan(ctx);
4021
0
    if (vid > 0)
4022
0
      snprintf(vid_buf, sizeof(vid_buf), " VLAN %u", vid);
4023
0
    else
4024
0
      vid_buf[0] = '\0';
4025
4026
0
    zlog_debug(
4027
0
      "Tx %s family %s IF %s(%u)%s %sMAC %pEA dst %pIA nhg %u%s%s%s%s%s",
4028
0
      nl_msg_type_to_str(cmd), nl_family_to_str(AF_BRIDGE),
4029
0
      dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
4030
0
      vid_buf, dplane_ctx_mac_is_sticky(ctx) ? "sticky " : "",
4031
0
      mac, &vtep_ip, nhg_id,
4032
0
      (update_flags & DPLANE_MAC_REMOTE) ? " rem" : "",
4033
0
      (update_flags & DPLANE_MAC_WAS_STATIC) ? " clr_sync"
4034
0
                     : "",
4035
0
      (update_flags & DPLANE_MAC_SET_STATIC) ? " static" : "",
4036
0
      (update_flags & DPLANE_MAC_SET_INACTIVE) ? " inactive"
4037
0
                 : "",
4038
0
      nfy ? " nfy" : "");
4039
0
  }
4040
4041
0
  total = netlink_neigh_update_msg_encode(
4042
0
    ctx, cmd, (const void *)dplane_ctx_mac_get_addr(ctx), ETH_ALEN,
4043
0
    &vtep_ip, true, AF_BRIDGE, 0, flags, state, nhg_id, nfy,
4044
0
    nfy_flags, false /*ext*/, 0 /*ext_flags*/, data, datalen,
4045
0
    proto);
4046
4047
0
  return total;
4048
0
}
4049
4050
/*
4051
 * In the event the kernel deletes ipv4 link-local neighbor entries created for
4052
 * 5549 support, re-install them.
4053
 */
4054
static void netlink_handle_5549(struct ndmsg *ndm, struct zebra_if *zif,
4055
        struct interface *ifp, struct ipaddr *ip,
4056
        bool handle_failed)
4057
0
{
4058
0
  if (ndm->ndm_family != AF_INET)
4059
0
    return;
4060
4061
0
  if (!zif->v6_2_v4_ll_neigh_entry)
4062
0
    return;
4063
4064
0
  if (ipv4_ll.s_addr != ip->ip._v4_addr.s_addr)
4065
0
    return;
4066
4067
0
  if (handle_failed && ndm->ndm_state & NUD_FAILED) {
4068
0
    zlog_info("Neighbor Entry for %s has entered a failed state, not reinstalling",
4069
0
        ifp->name);
4070
0
    return;
4071
0
  }
4072
4073
0
  if_nbr_ipv6ll_to_ipv4ll_neigh_update(ifp, &zif->v6_2_v4_ll_addr6, true);
4074
0
}
4075
4076
#define NUD_VALID                                                              \
4077
0
  (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE | NUD_PROBE | NUD_STALE     \
4078
0
   | NUD_DELAY)
4079
#define NUD_LOCAL_ACTIVE                                                 \
4080
0
  (NUD_PERMANENT | NUD_NOARP | NUD_REACHABLE)
4081
4082
static int netlink_nbr_entry_state_to_zclient(int nbr_state)
4083
0
{
4084
  /* an exact match is done between
4085
   * - netlink neighbor state values: NDM_XXX (see in linux/neighbour.h)
4086
   * - zclient neighbor state values: ZEBRA_NEIGH_STATE_XXX
4087
   *  (see in lib/zclient.h)
4088
   */
4089
0
  return nbr_state;
4090
0
}
4091
static int netlink_ipneigh_change(struct nlmsghdr *h, int len, ns_id_t ns_id)
4092
0
{
4093
0
  struct ndmsg *ndm;
4094
0
  struct interface *ifp;
4095
0
  struct zebra_if *zif;
4096
0
  struct rtattr *tb[NDA_MAX + 1];
4097
0
  struct interface *link_if;
4098
0
  struct ethaddr mac;
4099
0
  struct ipaddr ip;
4100
0
  char buf[ETHER_ADDR_STRLEN];
4101
0
  int mac_present = 0;
4102
0
  bool is_ext;
4103
0
  bool is_router;
4104
0
  bool local_inactive;
4105
0
  uint32_t ext_flags = 0;
4106
0
  bool dp_static = false;
4107
0
  int l2_len = 0;
4108
0
  int cmd;
4109
4110
0
  ndm = NLMSG_DATA(h);
4111
4112
  /* The interface should exist. */
4113
0
  ifp = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
4114
0
          ndm->ndm_ifindex);
4115
0
  if (!ifp || !ifp->info)
4116
0
    return 0;
4117
4118
0
  zif = (struct zebra_if *)ifp->info;
4119
4120
  /* Parse attributes and extract fields of interest. */
4121
0
  netlink_parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
4122
4123
0
  if (!tb[NDA_DST]) {
4124
0
    zlog_debug("%s family %s IF %s(%u) vrf %s(%u) - no DST",
4125
0
         nl_msg_type_to_str(h->nlmsg_type),
4126
0
         nl_family_to_str(ndm->ndm_family), ifp->name,
4127
0
         ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id);
4128
0
    return 0;
4129
0
  }
4130
4131
0
  memset(&ip, 0, sizeof(ip));
4132
0
  ip.ipa_type = (ndm->ndm_family == AF_INET) ? IPADDR_V4 : IPADDR_V6;
4133
0
  memcpy(&ip.ip.addr, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
4134
4135
  /* if kernel deletes our rfc5549 neighbor entry, re-install it */
4136
0
  if (h->nlmsg_type == RTM_DELNEIGH && (ndm->ndm_state & NUD_PERMANENT)) {
4137
0
    netlink_handle_5549(ndm, zif, ifp, &ip, false);
4138
0
    if (IS_ZEBRA_DEBUG_KERNEL)
4139
0
      zlog_debug(
4140
0
        "    Neighbor Entry Received is a 5549 entry, finished");
4141
0
    return 0;
4142
0
  }
4143
4144
  /* if kernel marks our rfc5549 neighbor entry invalid, re-install it */
4145
0
  if (h->nlmsg_type == RTM_NEWNEIGH && !(ndm->ndm_state & NUD_VALID))
4146
0
    netlink_handle_5549(ndm, zif, ifp, &ip, true);
4147
4148
  /* we send link layer information to client:
4149
   * - nlmsg_type = RTM_DELNEIGH|NEWNEIGH|GETNEIGH
4150
   * - struct ipaddr ( for DEL and GET)
4151
   * - struct ethaddr mac; (for NEW)
4152
   */
4153
0
  if (h->nlmsg_type == RTM_NEWNEIGH)
4154
0
    cmd = ZEBRA_NHRP_NEIGH_ADDED;
4155
0
  else if (h->nlmsg_type == RTM_GETNEIGH)
4156
0
    cmd = ZEBRA_NHRP_NEIGH_GET;
4157
0
  else if (h->nlmsg_type == RTM_DELNEIGH)
4158
0
    cmd = ZEBRA_NHRP_NEIGH_REMOVED;
4159
0
  else {
4160
0
    zlog_debug("%s(): unknown nlmsg type %u", __func__,
4161
0
         h->nlmsg_type);
4162
0
    return 0;
4163
0
  }
4164
0
  if (tb[NDA_LLADDR]) {
4165
    /* copy LLADDR information */
4166
0
    l2_len = RTA_PAYLOAD(tb[NDA_LLADDR]);
4167
0
  }
4168
0
  if (l2_len == IPV4_MAX_BYTELEN || l2_len == 0) {
4169
0
    union sockunion link_layer_ipv4;
4170
4171
0
    if (l2_len) {
4172
0
      sockunion_family(&link_layer_ipv4) = AF_INET;
4173
0
      memcpy((void *)sockunion_get_addr(&link_layer_ipv4),
4174
0
             RTA_DATA(tb[NDA_LLADDR]), l2_len);
4175
0
    } else
4176
0
      sockunion_family(&link_layer_ipv4) = AF_UNSPEC;
4177
0
    zsend_nhrp_neighbor_notify(
4178
0
      cmd, ifp, &ip,
4179
0
      netlink_nbr_entry_state_to_zclient(ndm->ndm_state),
4180
0
      &link_layer_ipv4);
4181
0
  }
4182
4183
0
  if (h->nlmsg_type == RTM_GETNEIGH)
4184
0
    return 0;
4185
4186
  /* The neighbor is present on an SVI. From this, we locate the
4187
   * underlying
4188
   * bridge because we're only interested in neighbors on a VxLAN bridge.
4189
   * The bridge is located based on the nature of the SVI:
4190
   * (a) In the case of a VLAN-aware bridge, the SVI is a L3 VLAN
4191
   * interface
4192
   * and is linked to the bridge
4193
   * (b) In the case of a VLAN-unaware bridge, the SVI is the bridge
4194
   * interface
4195
   * itself
4196
   */
4197
0
  if (IS_ZEBRA_IF_VLAN(ifp)) {
4198
0
    link_if = if_lookup_by_index_per_ns(zebra_ns_lookup(ns_id),
4199
0
                zif->link_ifindex);
4200
0
    if (!link_if)
4201
0
      return 0;
4202
0
  } else if (IS_ZEBRA_IF_BRIDGE(ifp))
4203
0
    link_if = ifp;
4204
0
  else {
4205
0
    link_if = NULL;
4206
0
    if (IS_ZEBRA_DEBUG_KERNEL)
4207
0
      zlog_debug(
4208
0
        "    Neighbor Entry received is not on a VLAN or a BRIDGE, ignoring");
4209
0
  }
4210
4211
0
  memset(&mac, 0, sizeof(mac));
4212
0
  if (h->nlmsg_type == RTM_NEWNEIGH) {
4213
0
    if (tb[NDA_LLADDR]) {
4214
0
      if (RTA_PAYLOAD(tb[NDA_LLADDR]) != ETH_ALEN) {
4215
0
        if (IS_ZEBRA_DEBUG_KERNEL)
4216
0
          zlog_debug(
4217
0
            "%s family %s IF %s(%u) vrf %s(%u) - LLADDR is not MAC, len %lu",
4218
0
            nl_msg_type_to_str(
4219
0
              h->nlmsg_type),
4220
0
            nl_family_to_str(
4221
0
              ndm->ndm_family),
4222
0
            ifp->name, ndm->ndm_ifindex,
4223
0
            ifp->vrf->name,
4224
0
            ifp->vrf->vrf_id,
4225
0
            (unsigned long)RTA_PAYLOAD(
4226
0
              tb[NDA_LLADDR]));
4227
0
        return 0;
4228
0
      }
4229
4230
0
      mac_present = 1;
4231
0
      memcpy(&mac, RTA_DATA(tb[NDA_LLADDR]), ETH_ALEN);
4232
0
    }
4233
4234
0
    is_ext = !!(ndm->ndm_flags & NTF_EXT_LEARNED);
4235
0
    is_router = !!(ndm->ndm_flags & NTF_ROUTER);
4236
4237
0
    if (tb[NDA_EXT_FLAGS]) {
4238
0
      ext_flags = *(uint32_t *)RTA_DATA(tb[NDA_EXT_FLAGS]);
4239
0
      if (ext_flags & NTF_E_MH_PEER_SYNC)
4240
0
        dp_static = true;
4241
0
    }
4242
4243
0
    if (IS_ZEBRA_DEBUG_KERNEL)
4244
0
      zlog_debug(
4245
0
        "Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA MAC %s state 0x%x flags 0x%x ext_flags 0x%x",
4246
0
        nl_msg_type_to_str(h->nlmsg_type),
4247
0
        nl_family_to_str(ndm->ndm_family), ifp->name,
4248
0
        ndm->ndm_ifindex, ifp->vrf->name,
4249
0
        ifp->vrf->vrf_id, &ip,
4250
0
        mac_present
4251
0
          ? prefix_mac2str(&mac, buf, sizeof(buf))
4252
0
          : "",
4253
0
        ndm->ndm_state, ndm->ndm_flags, ext_flags);
4254
4255
    /* If the neighbor state is valid for use, process as an add or
4256
     * update
4257
     * else process as a delete. Note that the delete handling may
4258
     * result
4259
     * in re-adding the neighbor if it is a valid "remote" neighbor.
4260
     */
4261
0
    if (ndm->ndm_state & NUD_VALID) {
4262
0
      if (zebra_evpn_mh_do_adv_reachable_neigh_only())
4263
0
        local_inactive =
4264
0
          !(ndm->ndm_state & NUD_LOCAL_ACTIVE);
4265
0
      else
4266
        /* If EVPN-MH is not enabled we treat STALE
4267
         * neighbors as locally-active and advertise
4268
         * them
4269
         */
4270
0
        local_inactive = false;
4271
4272
      /* Add local neighbors to the l3 interface database */
4273
0
      if (is_ext)
4274
0
        zebra_neigh_del(ifp, &ip);
4275
0
      else
4276
0
        zebra_neigh_add(ifp, &ip, &mac);
4277
4278
0
      if (link_if)
4279
0
        zebra_vxlan_handle_kernel_neigh_update(
4280
0
          ifp, link_if, &ip, &mac, ndm->ndm_state,
4281
0
          is_ext, is_router, local_inactive,
4282
0
          dp_static);
4283
0
      return 0;
4284
0
    }
4285
4286
4287
0
    zebra_neigh_del(ifp, &ip);
4288
0
    if (link_if)
4289
0
      zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
4290
0
    return 0;
4291
0
  }
4292
4293
0
  if (IS_ZEBRA_DEBUG_KERNEL)
4294
0
    zlog_debug("Rx %s family %s IF %s(%u) vrf %s(%u) IP %pIA",
4295
0
         nl_msg_type_to_str(h->nlmsg_type),
4296
0
         nl_family_to_str(ndm->ndm_family), ifp->name,
4297
0
         ndm->ndm_ifindex, ifp->vrf->name, ifp->vrf->vrf_id,
4298
0
         &ip);
4299
4300
  /* Process the delete - it may result in re-adding the neighbor if it is
4301
   * a valid "remote" neighbor.
4302
   */
4303
0
  zebra_neigh_del(ifp, &ip);
4304
0
  if (link_if)
4305
0
    zebra_vxlan_handle_kernel_neigh_del(ifp, link_if, &ip);
4306
4307
0
  return 0;
4308
0
}
4309
4310
static int netlink_neigh_table(struct nlmsghdr *h, ns_id_t ns_id, int startup)
4311
0
{
4312
0
  int len;
4313
0
  struct ndmsg *ndm;
4314
4315
0
  if (h->nlmsg_type != RTM_NEWNEIGH)
4316
0
    return 0;
4317
4318
  /* Length validity. */
4319
0
  len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
4320
0
  if (len < 0)
4321
0
    return -1;
4322
4323
  /* We are interested only in AF_INET or AF_INET6 notifications. */
4324
0
  ndm = NLMSG_DATA(h);
4325
0
  if (ndm->ndm_family != AF_INET && ndm->ndm_family != AF_INET6)
4326
0
    return 0;
4327
4328
0
  return netlink_neigh_change(h, len);
4329
0
}
4330
4331
/* Request for IP neighbor information from the kernel */
4332
static int netlink_request_neigh(struct nlsock *netlink_cmd, int family,
4333
         int type, ifindex_t ifindex)
4334
1
{
4335
1
  struct {
4336
1
    struct nlmsghdr n;
4337
1
    struct ndmsg ndm;
4338
1
    char buf[256];
4339
1
  } req;
4340
4341
  /* Form the request, specifying filter (rtattr) if needed. */
4342
1
  memset(&req, 0, sizeof(req));
4343
1
  req.n.nlmsg_type = type;
4344
1
  req.n.nlmsg_flags = NLM_F_ROOT | NLM_F_MATCH | NLM_F_REQUEST;
4345
1
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
4346
1
  req.ndm.ndm_family = family;
4347
1
  if (ifindex)
4348
0
    nl_attr_put32(&req.n, sizeof(req), NDA_IFINDEX, ifindex);
4349
4350
1
  return netlink_request(netlink_cmd, &req);
4351
1
}
4352
4353
/*
4354
 * IP Neighbor table read using netlink interface. This is invoked
4355
 * at startup.
4356
 */
4357
int netlink_neigh_read(struct zebra_ns *zns)
4358
1
{
4359
1
  int ret;
4360
1
  struct zebra_dplane_info dp_info;
4361
4362
1
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4363
4364
  /* Get IP neighbor table. */
4365
1
  ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
4366
1
            0);
4367
1
  if (ret < 0)
4368
1
    return ret;
4369
0
  ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4370
0
         &dp_info, 0, true);
4371
4372
0
  return ret;
4373
1
}
4374
4375
/*
4376
 * IP Neighbor table read using netlink interface. This is for a specific
4377
 * VLAN device.
4378
 */
4379
int netlink_neigh_read_for_vlan(struct zebra_ns *zns, struct interface *vlan_if)
4380
0
{
4381
0
  int ret = 0;
4382
0
  struct zebra_dplane_info dp_info;
4383
4384
0
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4385
4386
0
  ret = netlink_request_neigh(&zns->netlink_cmd, AF_UNSPEC, RTM_GETNEIGH,
4387
0
            vlan_if->ifindex);
4388
0
  if (ret < 0)
4389
0
    return ret;
4390
0
  ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4391
0
         &dp_info, 0, false);
4392
4393
0
  return ret;
4394
0
}
4395
4396
/*
4397
 * Request for a specific IP in VLAN (SVI) device from IP Neighbor table,
4398
 * read using netlink interface.
4399
 */
4400
static int netlink_request_specific_neigh_in_vlan(struct zebra_ns *zns,
4401
              int type,
4402
              const struct ipaddr *ip,
4403
              ifindex_t ifindex)
4404
0
{
4405
0
  struct {
4406
0
    struct nlmsghdr n;
4407
0
    struct ndmsg ndm;
4408
0
    char buf[256];
4409
0
  } req;
4410
0
  int ipa_len;
4411
4412
  /* Form the request, specifying filter (rtattr) if needed. */
4413
0
  memset(&req, 0, sizeof(req));
4414
0
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
4415
0
  req.n.nlmsg_flags = NLM_F_REQUEST;
4416
0
  req.n.nlmsg_type = type; /* RTM_GETNEIGH */
4417
0
  req.ndm.ndm_ifindex = ifindex;
4418
4419
0
  if (IS_IPADDR_V4(ip)) {
4420
0
    ipa_len = IPV4_MAX_BYTELEN;
4421
0
    req.ndm.ndm_family = AF_INET;
4422
4423
0
  } else {
4424
0
    ipa_len = IPV6_MAX_BYTELEN;
4425
0
    req.ndm.ndm_family = AF_INET6;
4426
0
  }
4427
4428
0
  nl_attr_put(&req.n, sizeof(req), NDA_DST, &ip->ip.addr, ipa_len);
4429
4430
0
  if (IS_ZEBRA_DEBUG_KERNEL)
4431
0
    zlog_debug("%s: Tx %s family %s IF %u IP %pIA flags 0x%x",
4432
0
         __func__, nl_msg_type_to_str(type),
4433
0
         nl_family_to_str(req.ndm.ndm_family), ifindex, ip,
4434
0
         req.n.nlmsg_flags);
4435
4436
0
  return netlink_request(&zns->netlink_cmd, &req);
4437
0
}
4438
4439
int netlink_neigh_read_specific_ip(const struct ipaddr *ip,
4440
           struct interface *vlan_if)
4441
0
{
4442
0
  int ret = 0;
4443
0
  struct zebra_ns *zns;
4444
0
  struct zebra_vrf *zvrf = vlan_if->vrf->info;
4445
0
  struct zebra_dplane_info dp_info;
4446
4447
0
  zns = zvrf->zns;
4448
4449
0
  zebra_dplane_info_from_zns(&dp_info, zns, true /*is_cmd*/);
4450
4451
0
  if (IS_ZEBRA_DEBUG_KERNEL)
4452
0
    zlog_debug("%s: neigh request IF %s(%u) IP %pIA vrf %s(%u)",
4453
0
         __func__, vlan_if->name, vlan_if->ifindex, ip,
4454
0
         vlan_if->vrf->name, vlan_if->vrf->vrf_id);
4455
4456
0
  ret = netlink_request_specific_neigh_in_vlan(zns, RTM_GETNEIGH, ip,
4457
0
              vlan_if->ifindex);
4458
0
  if (ret < 0)
4459
0
    return ret;
4460
4461
0
  ret = netlink_parse_info(netlink_neigh_table, &zns->netlink_cmd,
4462
0
         &dp_info, 1, false);
4463
4464
0
  return ret;
4465
0
}
4466
4467
int netlink_neigh_change(struct nlmsghdr *h, ns_id_t ns_id)
4468
0
{
4469
0
  int len;
4470
0
  struct ndmsg *ndm;
4471
4472
0
  if (!(h->nlmsg_type == RTM_NEWNEIGH || h->nlmsg_type == RTM_DELNEIGH
4473
0
        || h->nlmsg_type == RTM_GETNEIGH))
4474
0
    return 0;
4475
4476
  /* Length validity. */
4477
0
  len = h->nlmsg_len - NLMSG_LENGTH(sizeof(struct ndmsg));
4478
0
  if (len < 0) {
4479
0
    zlog_err(
4480
0
      "%s: Message received from netlink is of a broken size %d %zu",
4481
0
      __func__, h->nlmsg_len,
4482
0
      (size_t)NLMSG_LENGTH(sizeof(struct ndmsg)));
4483
0
    return -1;
4484
0
  }
4485
4486
  /* Is this a notification for the MAC FDB or IP neighbor table? */
4487
0
  ndm = NLMSG_DATA(h);
4488
0
  if (ndm->ndm_family == AF_BRIDGE)
4489
0
    return netlink_macfdb_change(h, len, ns_id);
4490
4491
0
  if (ndm->ndm_type != RTN_UNICAST)
4492
0
    return 0;
4493
4494
0
  if (ndm->ndm_family == AF_INET || ndm->ndm_family == AF_INET6)
4495
0
    return netlink_ipneigh_change(h, len, ns_id);
4496
0
  else {
4497
0
    flog_warn(
4498
0
      EC_ZEBRA_UNKNOWN_FAMILY,
4499
0
      "Invalid address family: %u received from kernel neighbor change: %s",
4500
0
      ndm->ndm_family, nl_msg_type_to_str(h->nlmsg_type));
4501
0
    return 0;
4502
0
  }
4503
4504
0
  return 0;
4505
0
}
4506
4507
/*
4508
 * Utility neighbor-update function, using info from dplane context.
4509
 */
4510
static ssize_t netlink_neigh_update_ctx(const struct zebra_dplane_ctx *ctx,
4511
          int cmd, void *buf, size_t buflen)
4512
0
{
4513
0
  const struct ipaddr *ip;
4514
0
  const struct ethaddr *mac = NULL;
4515
0
  const struct ipaddr *link_ip = NULL;
4516
0
  const void *link_ptr = NULL;
4517
0
  char buf2[ETHER_ADDR_STRLEN];
4518
4519
0
  int llalen;
4520
0
  uint8_t flags;
4521
0
  uint16_t state;
4522
0
  uint8_t family;
4523
0
  uint32_t update_flags;
4524
0
  uint32_t ext_flags = 0;
4525
0
  bool ext = false;
4526
0
  int proto = RTPROT_ZEBRA;
4527
4528
0
  if (dplane_ctx_get_type(ctx) != 0)
4529
0
    proto = zebra2proto(dplane_ctx_get_type(ctx));
4530
4531
0
  ip = dplane_ctx_neigh_get_ipaddr(ctx);
4532
4533
0
  if (dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_INSTALL
4534
0
      || dplane_ctx_get_op(ctx) == DPLANE_OP_NEIGH_IP_DELETE) {
4535
0
    link_ip = dplane_ctx_neigh_get_link_ip(ctx);
4536
0
    llalen = IPADDRSZ(link_ip);
4537
0
    link_ptr = (const void *)&(link_ip->ip.addr);
4538
0
    ipaddr2str(link_ip, buf2, sizeof(buf2));
4539
0
  } else {
4540
0
    mac = dplane_ctx_neigh_get_mac(ctx);
4541
0
    llalen = ETH_ALEN;
4542
0
    link_ptr = (const void *)mac;
4543
0
    if (is_zero_mac(mac))
4544
0
      mac = NULL;
4545
0
    if (mac)
4546
0
      prefix_mac2str(mac, buf2, sizeof(buf2));
4547
0
    else
4548
0
      snprintf(buf2, sizeof(buf2), "null");
4549
0
  }
4550
0
  update_flags = dplane_ctx_neigh_get_update_flags(ctx);
4551
0
  flags = neigh_flags_to_netlink(dplane_ctx_neigh_get_flags(ctx));
4552
0
  state = neigh_state_to_netlink(dplane_ctx_neigh_get_state(ctx));
4553
4554
0
  family = IS_IPADDR_V4(ip) ? AF_INET : AF_INET6;
4555
4556
0
  if (update_flags & DPLANE_NEIGH_REMOTE) {
4557
0
    flags |= NTF_EXT_LEARNED;
4558
    /* if it was static-local previously we need to clear the
4559
     * ext flags on replace with remote
4560
     */
4561
0
    if (update_flags & DPLANE_NEIGH_WAS_STATIC)
4562
0
      ext = true;
4563
0
  } else if (!(update_flags & DPLANE_NEIGH_NO_EXTENSION)) {
4564
0
    ext = true;
4565
    /* local neigh */
4566
0
    if (update_flags & DPLANE_NEIGH_SET_STATIC)
4567
0
      ext_flags |= NTF_E_MH_PEER_SYNC;
4568
0
  }
4569
0
  if (IS_ZEBRA_DEBUG_KERNEL)
4570
0
    zlog_debug(
4571
0
      "Tx %s family %s IF %s(%u) Neigh %pIA %s %s flags 0x%x state 0x%x %sext_flags 0x%x",
4572
0
      nl_msg_type_to_str(cmd), nl_family_to_str(family),
4573
0
      dplane_ctx_get_ifname(ctx), dplane_ctx_get_ifindex(ctx),
4574
0
      ip, link_ip ? "Link" : "MAC", buf2, flags, state,
4575
0
      ext ? "ext " : "", ext_flags);
4576
4577
0
  return netlink_neigh_update_msg_encode(
4578
0
    ctx, cmd, link_ptr, llalen, ip, true, family, RTN_UNICAST,
4579
0
    flags, state, 0 /*nhg*/, false /*nfy*/, 0 /*nfy_flags*/, ext,
4580
0
    ext_flags, buf, buflen, proto);
4581
0
}
4582
4583
static int netlink_neigh_table_update_ctx(const struct zebra_dplane_ctx *ctx,
4584
            void *data, size_t datalen)
4585
0
{
4586
0
  struct {
4587
0
    struct nlmsghdr n;
4588
0
    struct ndtmsg ndtm;
4589
0
    char buf[];
4590
0
  } *req = data;
4591
0
  struct rtattr *nest;
4592
0
  uint8_t family;
4593
0
  ifindex_t idx;
4594
0
  uint32_t val;
4595
4596
0
  if (datalen < sizeof(*req))
4597
0
    return 0;
4598
0
  memset(req, 0, sizeof(*req));
4599
0
  family = dplane_ctx_neightable_get_family(ctx);
4600
0
  idx = dplane_ctx_get_ifindex(ctx);
4601
4602
0
  req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndtmsg));
4603
0
  req->n.nlmsg_flags = NLM_F_REQUEST | NLM_F_REPLACE;
4604
0
  req->n.nlmsg_type = RTM_SETNEIGHTBL;
4605
0
  req->ndtm.ndtm_family = family;
4606
4607
0
  nl_attr_put(&req->n, datalen, NDTA_NAME,
4608
0
        family == AF_INET ? "arp_cache" : "ndisc_cache", 10);
4609
0
  nest = nl_attr_nest(&req->n, datalen, NDTA_PARMS);
4610
0
  if (nest == NULL)
4611
0
    return 0;
4612
0
  if (!nl_attr_put(&req->n, datalen, NDTPA_IFINDEX, &idx, sizeof(idx)))
4613
0
    return 0;
4614
0
  val = dplane_ctx_neightable_get_app_probes(ctx);
4615
0
  if (!nl_attr_put(&req->n, datalen, NDTPA_APP_PROBES, &val, sizeof(val)))
4616
0
    return 0;
4617
0
  val = dplane_ctx_neightable_get_mcast_probes(ctx);
4618
0
  if (!nl_attr_put(&req->n, datalen, NDTPA_MCAST_PROBES, &val,
4619
0
       sizeof(val)))
4620
0
    return 0;
4621
0
  val = dplane_ctx_neightable_get_ucast_probes(ctx);
4622
0
  if (!nl_attr_put(&req->n, datalen, NDTPA_UCAST_PROBES, &val,
4623
0
       sizeof(val)))
4624
0
    return 0;
4625
0
  nl_attr_nest_end(&req->n, nest);
4626
4627
0
  return NLMSG_ALIGN(req->n.nlmsg_len);
4628
0
}
4629
4630
static ssize_t netlink_neigh_msg_encoder(struct zebra_dplane_ctx *ctx,
4631
           void *buf, size_t buflen)
4632
0
{
4633
0
  ssize_t ret = 0;
4634
4635
0
  switch (dplane_ctx_get_op(ctx)) {
4636
0
  case DPLANE_OP_NEIGH_INSTALL:
4637
0
  case DPLANE_OP_NEIGH_UPDATE:
4638
0
  case DPLANE_OP_NEIGH_DISCOVER:
4639
0
  case DPLANE_OP_NEIGH_IP_INSTALL:
4640
0
    ret = netlink_neigh_update_ctx(ctx, RTM_NEWNEIGH, buf, buflen);
4641
0
    break;
4642
0
  case DPLANE_OP_NEIGH_DELETE:
4643
0
  case DPLANE_OP_NEIGH_IP_DELETE:
4644
0
    ret = netlink_neigh_update_ctx(ctx, RTM_DELNEIGH, buf, buflen);
4645
0
    break;
4646
0
  case DPLANE_OP_VTEP_ADD:
4647
0
    ret = netlink_vxlan_flood_update_ctx(ctx, RTM_NEWNEIGH, buf,
4648
0
                 buflen);
4649
0
    break;
4650
0
  case DPLANE_OP_VTEP_DELETE:
4651
0
    ret = netlink_vxlan_flood_update_ctx(ctx, RTM_DELNEIGH, buf,
4652
0
                 buflen);
4653
0
    break;
4654
0
  case DPLANE_OP_NEIGH_TABLE_UPDATE:
4655
0
    ret = netlink_neigh_table_update_ctx(ctx, buf, buflen);
4656
0
    break;
4657
0
  case DPLANE_OP_ROUTE_INSTALL:
4658
0
  case DPLANE_OP_ROUTE_UPDATE:
4659
0
  case DPLANE_OP_ROUTE_DELETE:
4660
0
  case DPLANE_OP_ROUTE_NOTIFY:
4661
0
  case DPLANE_OP_NH_INSTALL:
4662
0
  case DPLANE_OP_NH_UPDATE:
4663
0
  case DPLANE_OP_NH_DELETE:
4664
0
  case DPLANE_OP_LSP_INSTALL:
4665
0
  case DPLANE_OP_LSP_UPDATE:
4666
0
  case DPLANE_OP_LSP_DELETE:
4667
0
  case DPLANE_OP_LSP_NOTIFY:
4668
0
  case DPLANE_OP_PW_INSTALL:
4669
0
  case DPLANE_OP_PW_UNINSTALL:
4670
0
  case DPLANE_OP_SYS_ROUTE_ADD:
4671
0
  case DPLANE_OP_SYS_ROUTE_DELETE:
4672
0
  case DPLANE_OP_ADDR_INSTALL:
4673
0
  case DPLANE_OP_ADDR_UNINSTALL:
4674
0
  case DPLANE_OP_MAC_INSTALL:
4675
0
  case DPLANE_OP_MAC_DELETE:
4676
0
  case DPLANE_OP_RULE_ADD:
4677
0
  case DPLANE_OP_RULE_DELETE:
4678
0
  case DPLANE_OP_RULE_UPDATE:
4679
0
  case DPLANE_OP_BR_PORT_UPDATE:
4680
0
  case DPLANE_OP_IPTABLE_ADD:
4681
0
  case DPLANE_OP_IPTABLE_DELETE:
4682
0
  case DPLANE_OP_IPSET_ADD:
4683
0
  case DPLANE_OP_IPSET_DELETE:
4684
0
  case DPLANE_OP_IPSET_ENTRY_ADD:
4685
0
  case DPLANE_OP_IPSET_ENTRY_DELETE:
4686
0
  case DPLANE_OP_GRE_SET:
4687
0
  case DPLANE_OP_INTF_ADDR_ADD:
4688
0
  case DPLANE_OP_INTF_ADDR_DEL:
4689
0
  case DPLANE_OP_INTF_NETCONFIG:
4690
0
  case DPLANE_OP_INTF_INSTALL:
4691
0
  case DPLANE_OP_INTF_UPDATE:
4692
0
  case DPLANE_OP_INTF_DELETE:
4693
0
  case DPLANE_OP_TC_QDISC_INSTALL:
4694
0
  case DPLANE_OP_TC_QDISC_UNINSTALL:
4695
0
  case DPLANE_OP_TC_CLASS_ADD:
4696
0
  case DPLANE_OP_TC_CLASS_DELETE:
4697
0
  case DPLANE_OP_TC_CLASS_UPDATE:
4698
0
  case DPLANE_OP_TC_FILTER_ADD:
4699
0
  case DPLANE_OP_TC_FILTER_DELETE:
4700
0
  case DPLANE_OP_TC_FILTER_UPDATE:
4701
0
  case DPLANE_OP_NONE:
4702
0
    ret = -1;
4703
0
  }
4704
4705
0
  return ret;
4706
0
}
4707
4708
/*
4709
 * Update MAC, using dataplane context object.
4710
 */
4711
4712
enum netlink_msg_status netlink_put_mac_update_msg(struct nl_batch *bth,
4713
               struct zebra_dplane_ctx *ctx)
4714
0
{
4715
0
  return netlink_batch_add_msg(bth, ctx, netlink_macfdb_update_ctx,
4716
0
             false);
4717
0
}
4718
4719
enum netlink_msg_status
4720
netlink_put_neigh_update_msg(struct nl_batch *bth, struct zebra_dplane_ctx *ctx)
4721
0
{
4722
0
  return netlink_batch_add_msg(bth, ctx, netlink_neigh_msg_encoder,
4723
0
             false);
4724
0
}
4725
4726
/*
4727
 * MPLS label forwarding table change via netlink interface, using dataplane
4728
 * context information.
4729
 */
4730
ssize_t netlink_mpls_multipath_msg_encode(int cmd, struct zebra_dplane_ctx *ctx,
4731
            void *buf, size_t buflen)
4732
0
{
4733
0
  mpls_lse_t lse;
4734
0
  const struct nhlfe_list_head *head;
4735
0
  const struct zebra_nhlfe *nhlfe;
4736
0
  struct nexthop *nexthop = NULL;
4737
0
  unsigned int nexthop_num;
4738
0
  const char *routedesc;
4739
0
  int route_type;
4740
0
  struct prefix p = {0};
4741
0
  struct nlsock *nl =
4742
0
    kernel_netlink_nlsock_lookup(dplane_ctx_get_ns_sock(ctx));
4743
4744
0
  struct {
4745
0
    struct nlmsghdr n;
4746
0
    struct rtmsg r;
4747
0
    char buf[0];
4748
0
  } *req = buf;
4749
4750
0
  if (buflen < sizeof(*req))
4751
0
    return 0;
4752
4753
0
  memset(req, 0, sizeof(*req));
4754
4755
  /*
4756
   * Count # nexthops so we can decide whether to use singlepath
4757
   * or multipath case.
4758
   */
4759
0
  nexthop_num = 0;
4760
0
  head = dplane_ctx_get_nhlfe_list(ctx);
4761
0
  frr_each(nhlfe_list_const, head, nhlfe) {
4762
0
    nexthop = nhlfe->nexthop;
4763
0
    if (!nexthop)
4764
0
      continue;
4765
0
    if (cmd == RTM_NEWROUTE) {
4766
      /* Count all selected NHLFEs */
4767
0
      if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4768
0
          && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE))
4769
0
        nexthop_num++;
4770
0
    } else { /* DEL */
4771
      /* Count all installed NHLFEs */
4772
0
      if (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_INSTALLED)
4773
0
          && CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_FIB))
4774
0
        nexthop_num++;
4775
0
    }
4776
0
  }
4777
4778
0
  if ((nexthop_num == 0) ||
4779
0
      (!dplane_ctx_get_best_nhlfe(ctx) && (cmd != RTM_DELROUTE)))
4780
0
    return 0;
4781
4782
0
  req->n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
4783
0
  req->n.nlmsg_flags = NLM_F_CREATE | NLM_F_REQUEST;
4784
0
  req->n.nlmsg_type = cmd;
4785
0
  req->n.nlmsg_pid = nl->snl.nl_pid;
4786
4787
0
  req->r.rtm_family = AF_MPLS;
4788
0
  req->r.rtm_table = RT_TABLE_MAIN;
4789
0
  req->r.rtm_dst_len = MPLS_LABEL_LEN_BITS;
4790
0
  req->r.rtm_scope = RT_SCOPE_UNIVERSE;
4791
0
  req->r.rtm_type = RTN_UNICAST;
4792
4793
0
  if (cmd == RTM_NEWROUTE) {
4794
    /* We do a replace to handle update. */
4795
0
    req->n.nlmsg_flags |= NLM_F_REPLACE;
4796
4797
    /* set the protocol value if installing */
4798
0
    route_type = re_type_from_lsp_type(
4799
0
      dplane_ctx_get_best_nhlfe(ctx)->type);
4800
0
    req->r.rtm_protocol = zebra2proto(route_type);
4801
0
  }
4802
4803
  /* Fill destination */
4804
0
  lse = mpls_lse_encode(dplane_ctx_get_in_label(ctx), 0, 0, 1);
4805
0
  if (!nl_attr_put(&req->n, buflen, RTA_DST, &lse, sizeof(mpls_lse_t)))
4806
0
    return 0;
4807
4808
  /* Fill nexthops (paths) based on single-path or multipath. The paths
4809
   * chosen depend on the operation.
4810
   */
4811
0
  if (nexthop_num == 1) {
4812
0
    routedesc = "single-path";
4813
0
    _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
4814
0
            routedesc);
4815
4816
0
    nexthop_num = 0;
4817
0
    frr_each(nhlfe_list_const, head, nhlfe) {
4818
0
      nexthop = nhlfe->nexthop;
4819
0
      if (!nexthop)
4820
0
        continue;
4821
4822
0
      if ((cmd == RTM_NEWROUTE
4823
0
           && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4824
0
         && CHECK_FLAG(nexthop->flags,
4825
0
                 NEXTHOP_FLAG_ACTIVE)))
4826
0
          || (cmd == RTM_DELROUTE
4827
0
        && (CHECK_FLAG(nhlfe->flags,
4828
0
                 NHLFE_FLAG_INSTALLED)
4829
0
            && CHECK_FLAG(nexthop->flags,
4830
0
              NEXTHOP_FLAG_FIB)))) {
4831
        /* Add the gateway */
4832
0
        if (!_netlink_mpls_build_singlepath(
4833
0
              &p, routedesc, nhlfe, &req->n,
4834
0
              &req->r, buflen, cmd))
4835
0
          return false;
4836
4837
0
        nexthop_num++;
4838
0
        break;
4839
0
      }
4840
0
    }
4841
0
  } else { /* Multipath case */
4842
0
    struct rtattr *nest;
4843
0
    const union g_addr *src1 = NULL;
4844
4845
0
    nest = nl_attr_nest(&req->n, buflen, RTA_MULTIPATH);
4846
0
    if (!nest)
4847
0
      return 0;
4848
4849
0
    routedesc = "multipath";
4850
0
    _netlink_mpls_debug(cmd, dplane_ctx_get_in_label(ctx),
4851
0
            routedesc);
4852
4853
0
    nexthop_num = 0;
4854
0
    frr_each(nhlfe_list_const, head, nhlfe) {
4855
0
      nexthop = nhlfe->nexthop;
4856
0
      if (!nexthop)
4857
0
        continue;
4858
4859
0
      if ((cmd == RTM_NEWROUTE
4860
0
           && (CHECK_FLAG(nhlfe->flags, NHLFE_FLAG_SELECTED)
4861
0
         && CHECK_FLAG(nexthop->flags,
4862
0
                 NEXTHOP_FLAG_ACTIVE)))
4863
0
          || (cmd == RTM_DELROUTE
4864
0
        && (CHECK_FLAG(nhlfe->flags,
4865
0
                 NHLFE_FLAG_INSTALLED)
4866
0
            && CHECK_FLAG(nexthop->flags,
4867
0
              NEXTHOP_FLAG_FIB)))) {
4868
0
        nexthop_num++;
4869
4870
        /* Build the multipath */
4871
0
        if (!_netlink_mpls_build_multipath(
4872
0
              &p, routedesc, nhlfe, &req->n,
4873
0
              buflen, &req->r, &src1))
4874
0
          return 0;
4875
0
      }
4876
0
    }
4877
4878
    /* Add the multipath */
4879
0
    nl_attr_nest_end(&req->n, nest);
4880
0
  }
4881
4882
0
  return NLMSG_ALIGN(req->n.nlmsg_len);
4883
0
}
4884
4885
/****************************************************************************
4886
* This code was developed in a branch that didn't have dplane APIs for
4887
* MAC updates. Hence the use of the legacy style. It will be moved to
4888
* the new dplane style pre-merge to master. XXX
4889
*/
4890
static int netlink_fdb_nh_update(uint32_t nh_id, struct in_addr vtep_ip)
4891
0
{
4892
0
  struct {
4893
0
    struct nlmsghdr n;
4894
0
    struct nhmsg nhm;
4895
0
    char buf[256];
4896
0
  } req;
4897
0
  int cmd = RTM_NEWNEXTHOP;
4898
0
  struct zebra_vrf *zvrf;
4899
0
  struct zebra_ns *zns;
4900
4901
0
  zvrf = zebra_vrf_get_evpn();
4902
0
  zns = zvrf->zns;
4903
4904
0
  memset(&req, 0, sizeof(req));
4905
4906
0
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4907
0
  req.n.nlmsg_flags = NLM_F_REQUEST;
4908
0
  req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4909
0
  req.n.nlmsg_type = cmd;
4910
0
  req.nhm.nh_family = AF_INET;
4911
4912
0
  if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
4913
0
    return -1;
4914
0
  if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4915
0
    return -1;
4916
0
  if (!nl_attr_put(&req.n, sizeof(req), NHA_GATEWAY,
4917
0
      &vtep_ip, IPV4_MAX_BYTELEN))
4918
0
    return -1;
4919
4920
0
  if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4921
0
    zlog_debug("Tx %s fdb-nh 0x%x %pI4",
4922
0
         nl_msg_type_to_str(cmd), nh_id, &vtep_ip);
4923
0
  }
4924
4925
0
  return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4926
0
          false);
4927
0
}
4928
4929
static int netlink_fdb_nh_del(uint32_t nh_id)
4930
0
{
4931
0
  struct {
4932
0
    struct nlmsghdr n;
4933
0
    struct nhmsg nhm;
4934
0
    char buf[256];
4935
0
  } req;
4936
0
  int cmd = RTM_DELNEXTHOP;
4937
0
  struct zebra_vrf *zvrf;
4938
0
  struct zebra_ns *zns;
4939
4940
0
  zvrf = zebra_vrf_get_evpn();
4941
0
  zns = zvrf->zns;
4942
4943
0
  memset(&req, 0, sizeof(req));
4944
4945
0
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4946
0
  req.n.nlmsg_flags = NLM_F_REQUEST;
4947
0
  req.n.nlmsg_type = cmd;
4948
0
  req.nhm.nh_family = AF_UNSPEC;
4949
4950
0
  if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nh_id))
4951
0
    return -1;
4952
4953
0
  if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
4954
0
    zlog_debug("Tx %s fdb-nh 0x%x",
4955
0
         nl_msg_type_to_str(cmd), nh_id);
4956
0
  }
4957
4958
0
  return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
4959
0
          false);
4960
0
}
4961
4962
static int netlink_fdb_nhg_update(uint32_t nhg_id, uint32_t nh_cnt,
4963
    struct nh_grp *nh_ids)
4964
0
{
4965
0
  struct {
4966
0
    struct nlmsghdr n;
4967
0
    struct nhmsg nhm;
4968
0
    char buf[256];
4969
0
  } req;
4970
0
  int cmd = RTM_NEWNEXTHOP;
4971
0
  struct zebra_vrf *zvrf;
4972
0
  struct zebra_ns *zns;
4973
0
  struct nexthop_grp grp[nh_cnt];
4974
0
  uint32_t i;
4975
4976
0
  zvrf = zebra_vrf_get_evpn();
4977
0
  zns = zvrf->zns;
4978
4979
0
  memset(&req, 0, sizeof(req));
4980
4981
0
  req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg));
4982
0
  req.n.nlmsg_flags = NLM_F_REQUEST;
4983
0
  req.n.nlmsg_flags |= (NLM_F_CREATE | NLM_F_REPLACE);
4984
0
  req.n.nlmsg_type = cmd;
4985
0
  req.nhm.nh_family = AF_UNSPEC;
4986
4987
0
  if (!nl_attr_put32(&req.n, sizeof(req), NHA_ID, nhg_id))
4988
0
    return -1;
4989
0
  if (!nl_attr_put(&req.n, sizeof(req), NHA_FDB, NULL, 0))
4990
0
    return -1;
4991
0
  memset(&grp, 0, sizeof(grp));
4992
0
  for (i = 0; i < nh_cnt; ++i) {
4993
0
    grp[i].id = nh_ids[i].id;
4994
0
    grp[i].weight = nh_ids[i].weight;
4995
0
  }
4996
0
  if (!nl_attr_put(&req.n, sizeof(req), NHA_GROUP,
4997
0
      grp, nh_cnt * sizeof(struct nexthop_grp)))
4998
0
    return -1;
4999
5000
5001
0
  if (IS_ZEBRA_DEBUG_KERNEL || IS_ZEBRA_DEBUG_EVPN_MH_NH) {
5002
0
    char vtep_str[ES_VTEP_LIST_STR_SZ];
5003
0
    char nh_buf[16];
5004
5005
0
    vtep_str[0] = '\0';
5006
0
    for (i = 0; i < nh_cnt; ++i) {
5007
0
      snprintf(nh_buf, sizeof(nh_buf), "%u ",
5008
0
          grp[i].id);
5009
0
      strlcat(vtep_str, nh_buf, sizeof(vtep_str));
5010
0
    }
5011
5012
0
    zlog_debug("Tx %s fdb-nhg 0x%x %s",
5013
0
         nl_msg_type_to_str(cmd), nhg_id, vtep_str);
5014
0
  }
5015
5016
0
  return netlink_talk(netlink_talk_filter, &req.n, &zns->netlink_cmd, zns,
5017
0
          false);
5018
0
}
5019
5020
static int netlink_fdb_nhg_del(uint32_t nhg_id)
5021
0
{
5022
0
  return netlink_fdb_nh_del(nhg_id);
5023
0
}
5024
5025
int kernel_upd_mac_nh(uint32_t nh_id, struct in_addr vtep_ip)
5026
0
{
5027
0
  return netlink_fdb_nh_update(nh_id, vtep_ip);
5028
0
}
5029
5030
int kernel_del_mac_nh(uint32_t nh_id)
5031
0
{
5032
0
  return netlink_fdb_nh_del(nh_id);
5033
0
}
5034
5035
int kernel_upd_mac_nhg(uint32_t nhg_id, uint32_t nh_cnt,
5036
    struct nh_grp *nh_ids)
5037
0
{
5038
0
  return netlink_fdb_nhg_update(nhg_id, nh_cnt, nh_ids);
5039
0
}
5040
5041
int kernel_del_mac_nhg(uint32_t nhg_id)
5042
0
{
5043
0
  return netlink_fdb_nhg_del(nhg_id);
5044
0
}
5045
5046
#endif /* HAVE_NETLINK */