Coverage Report

Created: 2025-08-28 06:29

/src/frr/bgpd/bgp_nht.c
Line
Count
Source (jump to first uncovered line)
1
// SPDX-License-Identifier: GPL-2.0-or-later
2
/* BGP Nexthop tracking
3
 * Copyright (C) 2013 Cumulus Networks, Inc.
4
 */
5
6
#include <zebra.h>
7
8
#include "command.h"
9
#include "frrevent.h"
10
#include "prefix.h"
11
#include "zclient.h"
12
#include "stream.h"
13
#include "network.h"
14
#include "log.h"
15
#include "memory.h"
16
#include "nexthop.h"
17
#include "vrf.h"
18
#include "filter.h"
19
#include "nexthop_group.h"
20
21
#include "bgpd/bgpd.h"
22
#include "bgpd/bgp_table.h"
23
#include "bgpd/bgp_route.h"
24
#include "bgpd/bgp_attr.h"
25
#include "bgpd/bgp_nexthop.h"
26
#include "bgpd/bgp_debug.h"
27
#include "bgpd/bgp_errors.h"
28
#include "bgpd/bgp_nht.h"
29
#include "bgpd/bgp_fsm.h"
30
#include "bgpd/bgp_zebra.h"
31
#include "bgpd/bgp_flowspec_util.h"
32
#include "bgpd/bgp_evpn.h"
33
#include "bgpd/bgp_rd.h"
34
#include "bgpd/bgp_mplsvpn.h"
35
36
extern struct zclient *zclient;
37
38
static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
39
static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
40
static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
41
static void bgp_nht_ifp_initial(struct event *thread);
42
43
static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
44
0
{
45
0
  return (bgp_zebra_num_connects() == 0
46
0
    || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
47
0
        && bnc->nexthop_num > 0));
48
0
}
49
50
static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
51
          struct bgp_path_info *path)
52
0
{
53
0
  struct interface *ifp = NULL;
54
0
  struct nexthop *nexthop;
55
0
  struct bgp_interface *iifp;
56
0
  struct peer *peer;
57
58
0
  if (!path->extra || !path->extra->peer_orig)
59
0
    return false;
60
61
0
  peer = path->extra->peer_orig;
62
63
  /* only connected ebgp peers are valid */
64
0
  if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
65
0
      CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
66
0
      CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
67
0
    return false;
68
69
0
  for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
70
0
    if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
71
0
        nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
72
0
        nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
73
0
      ifp = if_lookup_by_index(
74
0
        bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
75
0
        bnc->bgp->vrf_id);
76
0
    }
77
0
    if (!ifp)
78
0
      continue;
79
0
    iifp = ifp->info;
80
0
    if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
81
0
      return true;
82
0
  }
83
0
  return false;
84
0
}
85
86
static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
87
                 struct bgp_path_info *path)
88
0
{
89
0
  struct interface *ifp = NULL;
90
0
  struct nexthop *nexthop;
91
92
0
  for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
93
0
    if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
94
0
      ifp = if_lookup_by_index(
95
0
        bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
96
0
        bnc->bgp->vrf_id);
97
0
      if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
98
0
            ifp->ll_type == ZEBRA_LLT_IP6GRE))
99
0
        break;
100
0
    }
101
0
  }
102
0
  if (!ifp)
103
0
    return false;
104
105
0
  if (CHECK_FLAG(path->attr->rmap_change_flags,
106
0
           BATTR_RMAP_L3VPN_ACCEPT_GRE))
107
0
    return true;
108
109
0
  return false;
110
0
}
111
112
static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
113
          struct bgp_path_info *path)
114
0
{
115
  /*
116
   * - In the case of MPLS-VPN, the label is learned from LDP or other
117
   * protocols, and nexthop tracking is enabled for the label.
118
   * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
119
   * - In the case of SRv6-VPN, we need to track the reachability to the
120
   * SID (in other words, IPv6 address). As in MPLS, we need to record
121
   * the value as BGP_NEXTHOP_SID_VALID. However, this function is
122
   * currently not implemented, and this function assumes that all
123
   * Transit routes for SRv6-VPN are valid.
124
   * - Otherwise check for mpls-gre acceptance
125
   */
126
0
  return (bgp_zebra_num_connects() == 0 ||
127
0
    (bnc && (bnc->nexthop_num > 0 &&
128
0
       (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
129
0
        CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
130
0
        bnc->bgp->srv6_enabled ||
131
0
        bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
132
0
        bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
133
0
}
134
135
static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
136
0
{
137
0
  if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
138
0
    if (BGP_DEBUG(nht, NHT))
139
0
      zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
140
0
           &bnc->prefix, bnc->ifindex, bnc->srte_color,
141
0
           bnc->bgp->name_pretty);
142
    /* only unregister if this is the last nh for this prefix*/
143
0
    if (!bnc_existing_for_prefix(bnc))
144
0
      unregister_zebra_rnh(bnc);
145
0
    bnc_free(bnc);
146
0
  }
147
0
}
148
149
void bgp_unlink_nexthop(struct bgp_path_info *path)
150
0
{
151
0
  struct bgp_nexthop_cache *bnc = path->nexthop;
152
153
0
  bgp_mplsvpn_path_nh_label_unlink(path);
154
155
0
  if (!bnc)
156
0
    return;
157
158
0
  path_nh_map(path, NULL, false);
159
160
0
  bgp_unlink_nexthop_check(bnc);
161
0
}
162
163
void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
164
0
{
165
0
  struct prefix pp;
166
0
  struct prefix pt;
167
0
  struct bgp_nexthop_cache *bncp, *bnct;
168
0
  afi_t afi;
169
0
  ifindex_t ifindex = 0;
170
171
0
  if (!sockunion2hostprefix(&from->su, &pp))
172
0
    return;
173
174
  /*
175
   * Gather the ifindex for if up/down events to be
176
   * tagged into this fun
177
   */
178
0
  if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
179
0
    ifindex = from->su.sin6.sin6_scope_id;
180
181
0
  afi = family2afi(pp.family);
182
0
  bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
183
184
0
  if (!sockunion2hostprefix(&to->su, &pt))
185
0
    return;
186
187
  /*
188
   * Gather the ifindex for if up/down events to be
189
   * tagged into this fun
190
   */
191
0
  ifindex = 0;
192
0
  if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
193
0
    ifindex = to->su.sin6.sin6_scope_id;
194
0
  bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
195
196
0
  if (bnct != bncp)
197
0
    return;
198
199
0
  if (bnct)
200
0
    bnct->nht_info = to;
201
0
}
202
203
/*
204
 * Returns the bnc whose bnc->nht_info matches the LL peer by
205
 * looping through the IPv6 nexthop table
206
 */
207
static struct bgp_nexthop_cache *
208
bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
209
0
{
210
0
  struct bgp_nexthop_cache *bnc;
211
212
0
  frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
213
0
      bnc) {
214
0
    if (bnc->nht_info == peer) {
215
0
      if (BGP_DEBUG(nht, NHT)) {
216
0
        zlog_debug(
217
0
          "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
218
0
          &bnc->prefix, bnc->ifindex,
219
0
          bnc->srte_color, bnc, peer->host,
220
0
          peer->bgp->name_pretty, peer);
221
0
      }
222
0
      return bnc;
223
0
    }
224
0
  }
225
226
0
  if (BGP_DEBUG(nht, NHT))
227
0
    zlog_debug(
228
0
      "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
229
0
      peer->host, peer->bgp->name_pretty, peer);
230
231
0
  return NULL;
232
0
}
233
234
void bgp_unlink_nexthop_by_peer(struct peer *peer)
235
0
{
236
0
  struct prefix p;
237
0
  struct bgp_nexthop_cache *bnc;
238
0
  afi_t afi = family2afi(peer->su.sa.sa_family);
239
0
  ifindex_t ifindex = 0;
240
241
0
  if (!sockunion2hostprefix(&peer->su, &p)) {
242
    /*
243
     * In scenarios where unnumbered BGP session is brought
244
     * down by shutting down the interface before unconfiguring
245
     * the BGP neighbor, neighbor information in peer->su.sa
246
     * will be cleared when the interface is shutdown. So
247
     * during the deletion of unnumbered bgp peer, above check
248
     * will return true. Therefore, in this case,BGP needs to
249
     * find the bnc whose bnc->nht_info matches the
250
     * peer being deleted and free it.
251
     */
252
0
    bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
253
0
  } else {
254
    /*
255
     * Gather the ifindex for if up/down events to be
256
     * tagged into this fun
257
     */
258
0
    if (afi == AFI_IP6 &&
259
0
        IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
260
0
      ifindex = peer->su.sin6.sin6_scope_id;
261
0
    bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
262
0
             ifindex);
263
0
  }
264
265
0
  if (!bnc)
266
0
    return;
267
268
  /* cleanup the peer reference */
269
0
  bnc->nht_info = NULL;
270
271
0
  bgp_unlink_nexthop_check(bnc);
272
0
}
273
274
/*
275
 * A route and its nexthop might belong to different VRFs. Therefore,
276
 * we need both the bgp_route and bgp_nexthop pointers.
277
 */
278
int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
279
          afi_t afi, safi_t safi, struct bgp_path_info *pi,
280
          struct peer *peer, int connected,
281
          const struct prefix *orig_prefix)
282
0
{
283
0
  struct bgp_nexthop_cache_head *tree = NULL;
284
0
  struct bgp_nexthop_cache *bnc;
285
0
  struct bgp_path_info *bpi_ultimate;
286
0
  struct prefix p;
287
0
  uint32_t srte_color = 0;
288
0
  int is_bgp_static_route = 0;
289
0
  ifindex_t ifindex = 0;
290
291
0
  if (pi) {
292
0
    is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
293
0
               && (pi->sub_type == BGP_ROUTE_STATIC))
294
0
                ? 1
295
0
                : 0;
296
297
    /* Since Extended Next-hop Encoding (RFC5549) support, we want
298
       to derive
299
       address-family from the next-hop. */
300
0
    if (!is_bgp_static_route)
301
0
      afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
302
0
                    : AFI_IP;
303
304
    /* Validation for the ipv4 mapped ipv6 nexthop. */
305
0
    if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
306
0
      afi = AFI_IP;
307
0
    }
308
309
    /* This will return true if the global IPv6 NH is a link local
310
     * addr */
311
0
    if (make_prefix(afi, pi, &p) < 0)
312
0
      return 1;
313
314
0
    if (!is_bgp_static_route && orig_prefix
315
0
        && prefix_same(&p, orig_prefix)) {
316
0
      if (BGP_DEBUG(nht, NHT)) {
317
0
        zlog_debug(
318
0
          "%s(%pFX): prefix loops through itself",
319
0
          __func__, &p);
320
0
      }
321
0
      return 0;
322
0
    }
323
324
0
    srte_color = pi->attr->srte_color;
325
0
  } else if (peer) {
326
    /*
327
     * Gather the ifindex for if up/down events to be
328
     * tagged into this fun
329
     */
330
0
    if (afi == AFI_IP6 && peer->conf_if &&
331
0
        IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
332
0
      ifindex = peer->su.sin6.sin6_scope_id;
333
0
      if (ifindex == 0) {
334
0
        if (BGP_DEBUG(nht, NHT)) {
335
0
          zlog_debug(
336
0
            "%s: Unable to locate ifindex, waiting till we have one",
337
0
            peer->conf_if);
338
0
        }
339
0
        return 0;
340
0
      }
341
0
    }
342
343
0
    if (!sockunion2hostprefix(&peer->su, &p)) {
344
0
      if (BGP_DEBUG(nht, NHT)) {
345
0
        zlog_debug(
346
0
          "%s: Attempting to register with unknown AFI %d (not %d or %d)",
347
0
          __func__, afi, AFI_IP, AFI_IP6);
348
0
      }
349
0
      return 0;
350
0
    }
351
0
  } else
352
0
    return 0;
353
354
0
  if (is_bgp_static_route)
355
0
    tree = &bgp_nexthop->import_check_table[afi];
356
0
  else
357
0
    tree = &bgp_nexthop->nexthop_cache_table[afi];
358
359
0
  bnc = bnc_find(tree, &p, srte_color, ifindex);
360
0
  if (!bnc) {
361
0
    bnc = bnc_new(tree, &p, srte_color, ifindex);
362
0
    bnc->bgp = bgp_nexthop;
363
0
    if (BGP_DEBUG(nht, NHT))
364
0
      zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
365
0
           &bnc->prefix, bnc->ifindex, bnc->srte_color,
366
0
           bnc->bgp->name_pretty, peer);
367
0
  } else {
368
0
    if (BGP_DEBUG(nht, NHT))
369
0
      zlog_debug(
370
0
        "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
371
0
        &bnc->prefix, bnc->ifindex,
372
0
        bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
373
0
        bnc->path_count, bnc->nht_info);
374
0
  }
375
376
0
  if (pi && is_route_parent_evpn(pi))
377
0
    bnc->is_evpn_gwip_nexthop = true;
378
379
0
  if (is_bgp_static_route) {
380
0
    SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
381
382
    /* If we're toggling the type, re-register */
383
0
    if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
384
0
        && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
385
0
      SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
386
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
387
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
388
0
    } else if ((!CHECK_FLAG(bgp_route->flags,
389
0
          BGP_FLAG_IMPORT_CHECK))
390
0
         && CHECK_FLAG(bnc->flags,
391
0
           BGP_STATIC_ROUTE_EXACT_MATCH)) {
392
0
      UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
393
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
394
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
395
0
    }
396
0
  }
397
  /* When nexthop is already known, but now requires 'connected'
398
   * resolution,
399
   * re-register it. The reverse scenario where the nexthop currently
400
   * requires
401
   * 'connected' resolution does not need a re-register (i.e., we treat
402
   * 'connected-required' as an override) except in the scenario where
403
   * this
404
   * is actually a case of tracking a peer for connectivity (e.g., after
405
   * disable connected-check).
406
   * NOTE: We don't track the number of paths separately for 'connected-
407
   * required' vs 'connected-not-required' as this change is not a common
408
   * scenario.
409
   */
410
0
  else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
411
0
    SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
412
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
413
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
414
0
  } else if (peer && !connected
415
0
       && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
416
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
417
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
418
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
419
0
  }
420
0
  if (peer && (bnc->ifindex != ifindex)) {
421
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
422
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
423
0
    bnc->ifindex = ifindex;
424
0
  }
425
0
  if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
426
0
    SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
427
0
    SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
428
0
  } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
429
0
       && !is_default_host_route(&bnc->prefix))
430
0
    register_zebra_rnh(bnc);
431
432
0
  if (pi && pi->nexthop != bnc) {
433
    /* Unlink from existing nexthop cache, if any. This will also
434
     * free
435
     * the nexthop cache entry, if appropriate.
436
     */
437
0
    bgp_unlink_nexthop(pi);
438
439
    /* updates NHT pi list reference */
440
0
    path_nh_map(pi, bnc, true);
441
442
0
    bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
443
0
    if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
444
0
      (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
445
0
        bnc->metric;
446
0
    else if (bpi_ultimate->extra)
447
0
      bpi_ultimate->extra->igpmetric = 0;
448
0
  } else if (peer) {
449
    /*
450
     * Let's not accidentally save the peer data for a peer
451
     * we are going to throw away in a second or so.
452
     * When we come back around we'll fix up this
453
     * data properly in replace_nexthop_by_peer
454
     */
455
0
    if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
456
0
      bnc->nht_info = (void *)peer; /* NHT peer reference */
457
0
  }
458
459
  /*
460
   * We are cheating here.  Views have no associated underlying
461
   * ability to detect nexthops.  So when we have a view
462
   * just tell everyone the nexthop is valid
463
   */
464
0
  if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
465
0
    return 1;
466
0
  else if (safi == SAFI_UNICAST && pi &&
467
0
     pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
468
0
     pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
469
0
    return bgp_isvalid_nexthop_for_mpls(bnc, pi);
470
0
  else
471
0
    return (bgp_isvalid_nexthop(bnc));
472
0
}
473
474
void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
475
0
{
476
0
  struct bgp_nexthop_cache *bnc;
477
0
  struct prefix p;
478
0
  ifindex_t ifindex = 0;
479
480
0
  if (!peer)
481
0
    return;
482
483
  /*
484
   * In case the below check evaluates true and if
485
   * the bnc has not been freed at this point, then
486
   * we might have to do something similar to what's
487
   * done in bgp_unlink_nexthop_by_peer(). Since
488
   * bgp_unlink_nexthop_by_peer() loops through the
489
   * nodes of V6 nexthop cache to find the bnc, it is
490
   * currently not being called here.
491
   */
492
0
  if (!sockunion2hostprefix(&peer->su, &p))
493
0
    return;
494
  /*
495
   * Gather the ifindex for if up/down events to be
496
   * tagged into this fun
497
   */
498
0
  if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
499
0
    ifindex = peer->su.sin6.sin6_scope_id;
500
0
  bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
501
0
           &p, 0, ifindex);
502
0
  if (!bnc) {
503
0
    if (BGP_DEBUG(nht, NHT))
504
0
      zlog_debug(
505
0
        "Cannot find connected NHT node for peer %s(%s)",
506
0
        peer->host, peer->bgp->name_pretty);
507
0
    return;
508
0
  }
509
510
0
  if (bnc->nht_info != peer) {
511
0
    if (BGP_DEBUG(nht, NHT))
512
0
      zlog_debug(
513
0
        "Connected NHT %p node for peer %s(%s) points to %p",
514
0
        bnc, peer->host, bnc->bgp->name_pretty,
515
0
        bnc->nht_info);
516
0
    return;
517
0
  }
518
519
0
  bnc->nht_info = NULL;
520
521
0
  if (LIST_EMPTY(&(bnc->paths))) {
522
0
    if (BGP_DEBUG(nht, NHT))
523
0
      zlog_debug(
524
0
        "Freeing connected NHT node %p for peer %s(%s)",
525
0
        bnc, peer->host, bnc->bgp->name_pretty);
526
0
    unregister_zebra_rnh(bnc);
527
0
    bnc_free(bnc);
528
0
  }
529
0
}
530
531
static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
532
               struct zapi_route *nhr,
533
               bool import_check)
534
0
{
535
0
  struct nexthop *nexthop;
536
0
  struct nexthop *oldnh;
537
0
  struct nexthop *nhlist_head = NULL;
538
0
  struct nexthop *nhlist_tail = NULL;
539
0
  int i;
540
0
  bool evpn_resolved = false;
541
542
0
  bnc->last_update = monotime(NULL);
543
0
  bnc->change_flags = 0;
544
545
  /* debug print the input */
546
0
  if (BGP_DEBUG(nht, NHT)) {
547
0
    char bnc_buf[BNC_FLAG_DUMP_SIZE];
548
549
0
    zlog_debug(
550
0
      "%s(%u): Rcvd NH update %pFX(%u)(%u) - metric %d/%d #nhops %d/%d flags %s",
551
0
      bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
552
0
      bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
553
0
      nhr->nexthop_num, bnc->nexthop_num,
554
0
      bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
555
0
               sizeof(bnc_buf)));
556
0
  }
557
558
0
  if (nhr->metric != bnc->metric)
559
0
    bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
560
561
0
  if (nhr->nexthop_num != bnc->nexthop_num)
562
0
    bnc->change_flags |= BGP_NEXTHOP_CHANGED;
563
564
0
  if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
565
0
           !prefix_same(&bnc->prefix, &nhr->prefix))) {
566
0
    SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
567
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
568
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
569
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
570
571
0
    bnc_nexthop_free(bnc);
572
0
    bnc->nexthop = NULL;
573
574
0
    if (BGP_DEBUG(nht, NHT))
575
0
      zlog_debug(
576
0
        "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
577
0
        __func__, &bnc->prefix, &nhr->prefix);
578
0
  } else if (nhr->nexthop_num) {
579
0
    struct peer *peer = bnc->nht_info;
580
581
    /* notify bgp fsm if nbr ip goes from invalid->valid */
582
0
    if (!bnc->nexthop_num)
583
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
584
585
0
    if (!bnc->is_evpn_gwip_nexthop)
586
0
      bnc->flags |= BGP_NEXTHOP_VALID;
587
0
    bnc->metric = nhr->metric;
588
0
    bnc->nexthop_num = nhr->nexthop_num;
589
590
0
    bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
591
592
0
    for (i = 0; i < nhr->nexthop_num; i++) {
593
0
      int num_labels = 0;
594
595
0
      nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
596
597
      /*
598
       * Turn on RA for the v6 nexthops
599
       * we receive from bgp.  This is to allow us
600
       * to work with v4 routing over v6 nexthops
601
       */
602
0
      if (peer && !peer->ifp
603
0
          && CHECK_FLAG(peer->flags,
604
0
            PEER_FLAG_CAPABILITY_ENHE)
605
0
          && nhr->prefix.family == AF_INET6
606
0
          && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
607
0
        struct interface *ifp;
608
609
0
        ifp = if_lookup_by_index(nexthop->ifindex,
610
0
               nexthop->vrf_id);
611
0
        if (ifp)
612
0
          zclient_send_interface_radv_req(
613
0
            zclient, nexthop->vrf_id, ifp,
614
0
            true,
615
0
            BGP_UNNUM_DEFAULT_RA_INTERVAL);
616
0
      }
617
      /* There is at least one label-switched path */
618
0
      if (nexthop->nh_label &&
619
0
        nexthop->nh_label->num_labels) {
620
621
0
        bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
622
0
        num_labels = nexthop->nh_label->num_labels;
623
0
      }
624
625
0
      if (BGP_DEBUG(nht, NHT)) {
626
0
        char buf[NEXTHOP_STRLEN];
627
0
        zlog_debug(
628
0
          "    nhop via %s (%d labels)",
629
0
          nexthop2str(nexthop, buf, sizeof(buf)),
630
0
          num_labels);
631
0
      }
632
633
0
      if (nhlist_tail) {
634
0
        nhlist_tail->next = nexthop;
635
0
        nhlist_tail = nexthop;
636
0
      } else {
637
0
        nhlist_tail = nexthop;
638
0
        nhlist_head = nexthop;
639
0
      }
640
641
      /* No need to evaluate the nexthop if we have already
642
       * determined
643
       * that there has been a change.
644
       */
645
0
      if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
646
0
        continue;
647
648
0
      for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
649
0
        if (nexthop_same(oldnh, nexthop))
650
0
          break;
651
652
0
      if (!oldnh)
653
0
        bnc->change_flags |= BGP_NEXTHOP_CHANGED;
654
0
    }
655
0
    bnc_nexthop_free(bnc);
656
0
    bnc->nexthop = nhlist_head;
657
658
    /*
659
     * Gateway IP nexthop is L3 reachable. Mark it as
660
     * BGP_NEXTHOP_VALID only if it is recursively resolved with a
661
     * remote EVPN RT-2.
662
     * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
663
     * When its mapping with EVPN RT-2 is established, unset
664
     * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
665
     */
666
0
    if (bnc->is_evpn_gwip_nexthop) {
667
0
      evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
668
669
0
      if (BGP_DEBUG(nht, NHT))
670
0
        zlog_debug(
671
0
          "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
672
0
          &bnc->prefix,
673
0
          (evpn_resolved ? "successful"
674
0
                   : "failed"));
675
676
0
      if (evpn_resolved) {
677
0
        bnc->flags |= BGP_NEXTHOP_VALID;
678
0
        bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
679
0
        bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
680
0
      } else {
681
0
        bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
682
0
        bnc->flags &= ~BGP_NEXTHOP_VALID;
683
0
      }
684
0
    }
685
0
  } else {
686
0
    bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
687
0
    bnc->flags &= ~BGP_NEXTHOP_VALID;
688
0
    bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
689
0
    bnc->nexthop_num = nhr->nexthop_num;
690
691
    /* notify bgp fsm if nbr ip goes from valid->invalid */
692
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
693
694
0
    bnc_nexthop_free(bnc);
695
0
    bnc->nexthop = NULL;
696
0
  }
697
698
0
  evaluate_paths(bnc);
699
0
}
700
701
static void bgp_nht_ifp_table_handle(struct bgp *bgp,
702
             struct bgp_nexthop_cache_head *table,
703
             struct interface *ifp, bool up)
704
0
{
705
0
  struct bgp_nexthop_cache *bnc;
706
707
0
  frr_each (bgp_nexthop_cache, table, bnc) {
708
0
    if (bnc->ifindex != ifp->ifindex)
709
0
      continue;
710
711
0
    bnc->last_update = monotime(NULL);
712
0
    bnc->change_flags = 0;
713
714
    /*
715
     * For interface based routes ( ala the v6 LL routes
716
     * that this was written for ) the metric received
717
     * for the connected route is 0 not 1.
718
     */
719
0
    bnc->metric = 0;
720
0
    if (up) {
721
0
      SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
722
0
      SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
723
0
      bnc->nexthop_num = 1;
724
0
    } else {
725
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
726
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
727
0
      SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
728
0
      bnc->nexthop_num = 0;
729
0
    }
730
731
0
    evaluate_paths(bnc);
732
0
  }
733
0
}
734
static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
735
0
{
736
0
  struct bgp *bgp;
737
738
0
  bgp = ifp->vrf->info;
739
0
  if (!bgp)
740
0
    return;
741
742
0
  bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
743
0
         up);
744
0
  bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
745
0
         up);
746
0
  bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
747
0
         up);
748
0
  bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
749
0
         up);
750
0
}
751
752
void bgp_nht_ifp_up(struct interface *ifp)
753
0
{
754
0
  bgp_nht_ifp_handle(ifp, true);
755
0
}
756
757
void bgp_nht_ifp_down(struct interface *ifp)
758
0
{
759
0
  bgp_nht_ifp_handle(ifp, false);
760
0
}
761
762
static void bgp_nht_ifp_initial(struct event *thread)
763
0
{
764
0
  ifindex_t ifindex = EVENT_VAL(thread);
765
0
  struct bgp *bgp = EVENT_ARG(thread);
766
0
  struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
767
0
768
0
  if (!ifp)
769
0
    return;
770
0
771
0
  if (BGP_DEBUG(nht, NHT))
772
0
    zlog_debug(
773
0
      "Handle NHT initial update for Intf %s(%d) status %s",
774
0
      ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
775
0
776
0
  if (if_is_up(ifp))
777
0
    bgp_nht_ifp_up(ifp);
778
0
  else
779
0
    bgp_nht_ifp_down(ifp);
780
0
}
781
782
/*
783
 * So the bnc code has the ability to handle interface up/down
784
 * events to properly handle v6 LL peering.
785
 * What is happening here:
786
 * The event system for peering expects the nht code to
787
 * report on the tracking events after we move to active
788
 * So let's give the system a chance to report on that event
789
 * in a manner that is expected.
790
 */
791
void bgp_nht_interface_events(struct peer *peer)
792
0
{
793
0
  struct bgp *bgp = peer->bgp;
794
0
  struct bgp_nexthop_cache_head *table;
795
0
  struct bgp_nexthop_cache *bnc;
796
0
  struct prefix p;
797
0
  ifindex_t ifindex = 0;
798
799
0
  if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
800
0
    return;
801
802
0
  if (!sockunion2hostprefix(&peer->su, &p))
803
0
    return;
804
  /*
805
   * Gather the ifindex for if up/down events to be
806
   * tagged into this fun
807
   */
808
0
  if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
809
0
    ifindex = peer->su.sin6.sin6_scope_id;
810
811
0
  table = &bgp->nexthop_cache_table[AFI_IP6];
812
0
  bnc = bnc_find(table, &p, 0, ifindex);
813
0
  if (!bnc)
814
0
    return;
815
816
0
  if (bnc->ifindex)
817
0
    event_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
818
0
        bnc->ifindex, NULL);
819
0
}
820
821
void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
822
0
{
823
0
  struct bgp_nexthop_cache_head *tree = NULL;
824
0
  struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
825
0
  struct bgp *bgp;
826
0
  struct prefix match;
827
0
  struct zapi_route nhr;
828
0
  afi_t afi;
829
830
0
  bgp = bgp_lookup_by_vrf_id(vrf_id);
831
0
  if (!bgp) {
832
0
    flog_err(
833
0
      EC_BGP_NH_UPD,
834
0
      "parse nexthop update: instance not found for vrf_id %u",
835
0
      vrf_id);
836
0
    return;
837
0
  }
838
839
0
  if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
840
0
    zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
841
0
       bgp->name_pretty);
842
0
    return;
843
0
  }
844
845
0
  afi = family2afi(match.family);
846
0
  tree = &bgp->nexthop_cache_table[afi];
847
848
0
  bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
849
0
  if (!bnc_nhc) {
850
0
    if (BGP_DEBUG(nht, NHT))
851
0
      zlog_debug(
852
0
        "parse nexthop update %pFX(%u)(%s): bnc info not found for nexthop cache",
853
0
        &nhr.prefix, nhr.srte_color, bgp->name_pretty);
854
0
  } else
855
0
    bgp_process_nexthop_update(bnc_nhc, &nhr, false);
856
857
0
  tree = &bgp->import_check_table[afi];
858
859
0
  bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
860
0
  if (!bnc_import) {
861
0
    if (BGP_DEBUG(nht, NHT))
862
0
      zlog_debug(
863
0
        "parse nexthop update %pFX(%u)(%s): bnc info not found for import check",
864
0
        &nhr.prefix, nhr.srte_color, bgp->name_pretty);
865
0
  } else
866
0
    bgp_process_nexthop_update(bnc_import, &nhr, true);
867
868
  /*
869
   * HACK: if any BGP route is dependant on an SR-policy that doesn't
870
   * exist, zebra will never send NH updates relative to that policy. In
871
   * that case, whenever we receive an update about a colorless NH, update
872
   * the corresponding colorful NHs that share the same endpoint but that
873
   * are inactive. This ugly hack should work around the problem at the
874
   * cost of a performance pernalty. Long term, what should be done is to
875
   * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
876
   * which should provide a better infrastructure to solve this issue in
877
   * a more efficient and elegant way.
878
   */
879
0
  if (nhr.srte_color == 0 && bnc_nhc) {
880
0
    struct bgp_nexthop_cache *bnc_iter;
881
882
0
    frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
883
0
        bnc_iter) {
884
0
      if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
885
0
          bnc_iter->srte_color == 0 ||
886
0
          CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
887
0
        continue;
888
889
0
      bgp_process_nexthop_update(bnc_iter, &nhr, false);
890
0
    }
891
0
  }
892
0
}
893
894
/*
895
 * Cleanup nexthop registration and status information for BGP nexthops
896
 * pertaining to this VRF. This is invoked upon VRF deletion.
897
 */
898
void bgp_cleanup_nexthops(struct bgp *bgp)
899
0
{
900
0
  for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
901
0
    struct bgp_nexthop_cache *bnc;
902
903
0
    frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
904
0
        bnc) {
905
      /* Clear relevant flags. */
906
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
907
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
908
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
909
0
      UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
910
0
    }
911
0
  }
912
0
}
913
914
/**
915
 * make_prefix - make a prefix structure from the path (essentially
916
 * path's node.
917
 */
918
static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
919
0
{
920
921
0
  int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
922
0
           && (pi->sub_type == BGP_ROUTE_STATIC))
923
0
            ? 1
924
0
            : 0;
925
0
  struct bgp_dest *net = pi->net;
926
0
  const struct prefix *p_orig = bgp_dest_get_prefix(net);
927
0
  struct in_addr ipv4;
928
929
0
  if (p_orig->family == AF_FLOWSPEC) {
930
0
    if (!pi->peer)
931
0
      return -1;
932
0
    return bgp_flowspec_get_first_nh(pi->peer->bgp,
933
0
             pi, p, afi);
934
0
  }
935
0
  memset(p, 0, sizeof(struct prefix));
936
0
  switch (afi) {
937
0
  case AFI_IP:
938
0
    p->family = AF_INET;
939
0
    if (is_bgp_static) {
940
0
      p->u.prefix4 = p_orig->u.prefix4;
941
0
      p->prefixlen = p_orig->prefixlen;
942
0
    } else {
943
0
      if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
944
0
        ipv4_mapped_ipv6_to_ipv4(
945
0
          &pi->attr->mp_nexthop_global, &ipv4);
946
0
        p->u.prefix4 = ipv4;
947
0
        p->prefixlen = IPV4_MAX_BITLEN;
948
0
      } else {
949
0
        if (p_orig->family == AF_EVPN)
950
0
          p->u.prefix4 =
951
0
            pi->attr->mp_nexthop_global_in;
952
0
        else
953
0
          p->u.prefix4 = pi->attr->nexthop;
954
0
        p->prefixlen = IPV4_MAX_BITLEN;
955
0
      }
956
0
    }
957
0
    break;
958
0
  case AFI_IP6:
959
0
    p->family = AF_INET6;
960
961
0
    if (is_bgp_static) {
962
0
      p->u.prefix6 = p_orig->u.prefix6;
963
0
      p->prefixlen = p_orig->prefixlen;
964
0
    } else {
965
      /* If we receive MP_REACH nexthop with ::(LL)
966
       * or LL(LL), use LL address as nexthop cache.
967
       */
968
0
      if (pi->attr->mp_nexthop_len
969
0
            == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
970
0
          && (IN6_IS_ADDR_UNSPECIFIED(
971
0
          &pi->attr->mp_nexthop_global)
972
0
        || IN6_IS_ADDR_LINKLOCAL(
973
0
          &pi->attr->mp_nexthop_global)))
974
0
        p->u.prefix6 = pi->attr->mp_nexthop_local;
975
      /* If we receive MR_REACH with (GA)::(LL)
976
       * then check for route-map to choose GA or LL
977
       */
978
0
      else if (pi->attr->mp_nexthop_len
979
0
         == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
980
0
        if (pi->attr->mp_nexthop_prefer_global)
981
0
          p->u.prefix6 =
982
0
            pi->attr->mp_nexthop_global;
983
0
        else
984
0
          p->u.prefix6 =
985
0
            pi->attr->mp_nexthop_local;
986
0
      } else
987
0
        p->u.prefix6 = pi->attr->mp_nexthop_global;
988
0
      p->prefixlen = IPV6_MAX_BITLEN;
989
0
    }
990
0
    break;
991
0
  default:
992
0
    if (BGP_DEBUG(nht, NHT)) {
993
0
      zlog_debug(
994
0
        "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
995
0
        __func__, afi, AFI_IP, AFI_IP6);
996
0
    }
997
0
    break;
998
0
  }
999
0
  return 0;
1000
0
}
1001
1002
/**
1003
 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1004
 *   command to Zebra.
1005
 * ARGUMENTS:
1006
 *   struct bgp_nexthop_cache *bnc -- the nexthop structure.
1007
 *   int command -- command to send to zebra
1008
 * RETURNS:
1009
 *   void.
1010
 */
1011
static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
1012
0
{
1013
0
  bool exact_match = false;
1014
0
  bool resolve_via_default = false;
1015
0
  int ret;
1016
1017
0
  if (!zclient)
1018
0
    return;
1019
1020
  /* Don't try to register if Zebra doesn't know of this instance. */
1021
0
  if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1022
0
    if (BGP_DEBUG(zebra, ZEBRA))
1023
0
      zlog_debug(
1024
0
        "%s: No zebra instance to talk to, not installing NHT entry",
1025
0
        __func__);
1026
0
    return;
1027
0
  }
1028
1029
0
  if (!bgp_zebra_num_connects()) {
1030
0
    if (BGP_DEBUG(zebra, ZEBRA))
1031
0
      zlog_debug(
1032
0
        "%s: We have not connected yet, cannot send nexthops",
1033
0
        __func__);
1034
0
  }
1035
0
  if (command == ZEBRA_NEXTHOP_REGISTER) {
1036
0
    if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1037
0
      exact_match = true;
1038
0
    if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1039
0
      resolve_via_default = true;
1040
0
  }
1041
1042
0
  if (BGP_DEBUG(zebra, ZEBRA))
1043
0
    zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1044
0
         zserv_command_string(command), &bnc->prefix,
1045
0
         bnc->bgp->name_pretty);
1046
1047
0
  ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1048
0
             exact_match, resolve_via_default,
1049
0
             bnc->bgp->vrf_id);
1050
0
  if (ret == ZCLIENT_SEND_FAILURE) {
1051
0
    flog_warn(EC_BGP_ZEBRA_SEND,
1052
0
        "sendmsg_nexthop: zclient_send_message() failed");
1053
0
    return;
1054
0
  }
1055
1056
0
  if (command == ZEBRA_NEXTHOP_REGISTER)
1057
0
    SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1058
0
  else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1059
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1060
0
  return;
1061
0
}
1062
1063
/**
1064
 * register_zebra_rnh - register a NH/route with Zebra for notification
1065
 *    when the route or the route to the nexthop changes.
1066
 * ARGUMENTS:
1067
 *   struct bgp_nexthop_cache *bnc
1068
 * RETURNS:
1069
 *   void.
1070
 */
1071
static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1072
0
{
1073
  /* Check if we have already registered */
1074
0
  if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1075
0
    return;
1076
1077
0
  if (bnc->ifindex) {
1078
0
    SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1079
0
    return;
1080
0
  }
1081
1082
0
  sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1083
0
}
1084
1085
/**
1086
 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1087
 * ARGUMENTS:
1088
 *   struct bgp_nexthop_cache *bnc
1089
 * RETURNS:
1090
 *   void.
1091
 */
1092
static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1093
0
{
1094
  /* Check if we have already registered */
1095
0
  if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1096
0
    return;
1097
1098
0
  if (bnc->ifindex) {
1099
0
    UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1100
0
    return;
1101
0
  }
1102
1103
0
  sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1104
0
}
1105
1106
/**
1107
 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1108
 * ARGUMENTS:
1109
 *   struct bgp_nexthop_cache *bnc -- the nexthop structure.
1110
 * RETURNS:
1111
 *   void.
1112
 */
1113
void evaluate_paths(struct bgp_nexthop_cache *bnc)
1114
0
{
1115
0
  struct bgp_dest *dest;
1116
0
  struct bgp_path_info *path;
1117
0
  struct bgp_path_info *bpi_ultimate;
1118
0
  int afi;
1119
0
  struct peer *peer = (struct peer *)bnc->nht_info;
1120
0
  struct bgp_table *table;
1121
0
  safi_t safi;
1122
0
  struct bgp *bgp_path;
1123
0
  const struct prefix *p;
1124
1125
0
  if (BGP_DEBUG(nht, NHT)) {
1126
0
    char bnc_buf[BNC_FLAG_DUMP_SIZE];
1127
0
    char chg_buf[BNC_FLAG_DUMP_SIZE];
1128
1129
0
    zlog_debug(
1130
0
      "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1131
0
      &bnc->prefix, bnc->ifindex, bnc->srte_color,
1132
0
      bnc->bgp->name_pretty,
1133
0
      bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1134
0
               sizeof(bnc_buf)),
1135
0
      bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1136
0
                sizeof(bnc_buf)));
1137
0
  }
1138
1139
0
  LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1140
0
    if (path->type == ZEBRA_ROUTE_BGP &&
1141
0
        (path->sub_type == BGP_ROUTE_NORMAL ||
1142
0
         path->sub_type == BGP_ROUTE_STATIC ||
1143
0
         path->sub_type == BGP_ROUTE_IMPORTED))
1144
      /* evaluate the path */
1145
0
      ;
1146
0
    else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE) {
1147
      /* evaluate the path for redistributed routes
1148
       * except those from VNC
1149
       */
1150
0
      if ((path->type == ZEBRA_ROUTE_VNC) ||
1151
0
          (path->type == ZEBRA_ROUTE_VNC_DIRECT))
1152
0
        continue;
1153
0
    } else
1154
      /* don't evaluate the path */
1155
0
      continue;
1156
1157
0
    dest = path->net;
1158
0
    assert(dest && bgp_dest_table(dest));
1159
0
    p = bgp_dest_get_prefix(dest);
1160
0
    afi = family2afi(p->family);
1161
0
    table = bgp_dest_table(dest);
1162
0
    safi = table->safi;
1163
1164
    /*
1165
     * handle routes from other VRFs (they can have a
1166
     * nexthop in THIS VRF). bgp_path is the bgp instance
1167
     * that owns the route referencing this nexthop.
1168
     */
1169
0
    bgp_path = table->bgp;
1170
1171
    /*
1172
     * Path becomes valid/invalid depending on whether the nexthop
1173
     * reachable/unreachable.
1174
     *
1175
     * In case of unicast routes that were imported from vpn
1176
     * and that have labels, they are valid only if there are
1177
     * nexthops with labels
1178
     *
1179
     * If the nexthop is EVPN gateway-IP,
1180
     * do not check for a valid label.
1181
     */
1182
1183
0
    bool bnc_is_valid_nexthop = false;
1184
0
    bool path_valid = false;
1185
1186
0
    if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1187
0
        && path->extra && path->extra->num_labels
1188
0
        && (path->attr->evpn_overlay.type
1189
0
      != OVERLAY_INDEX_GATEWAY_IP)) {
1190
0
      bnc_is_valid_nexthop =
1191
0
        bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1192
0
                  : false;
1193
0
    } else {
1194
0
      if (bgp_update_martian_nexthop(
1195
0
            bnc->bgp, afi, safi, path->type,
1196
0
            path->sub_type, path->attr, dest)) {
1197
0
        if (BGP_DEBUG(nht, NHT))
1198
0
          zlog_debug(
1199
0
            "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1200
0
            __func__, dest, bgp_path->name);
1201
0
      } else
1202
0
        bnc_is_valid_nexthop =
1203
0
          bgp_isvalid_nexthop(bnc) ? true : false;
1204
0
    }
1205
1206
0
    if (BGP_DEBUG(nht, NHT)) {
1207
1208
0
      if (dest->pdest) {
1209
0
        char rd_buf[RD_ADDRSTRLEN];
1210
1211
0
        prefix_rd2str(
1212
0
          (struct prefix_rd *)bgp_dest_get_prefix(
1213
0
            dest->pdest),
1214
0
          rd_buf, sizeof(rd_buf),
1215
0
          bgp_get_asnotation(bnc->bgp));
1216
0
        zlog_debug(
1217
0
          "... eval path %d/%d %pBD RD %s %s flags 0x%x",
1218
0
          afi, safi, dest, rd_buf,
1219
0
          bgp_path->name_pretty, path->flags);
1220
0
      } else
1221
0
        zlog_debug(
1222
0
          "... eval path %d/%d %pBD %s flags 0x%x",
1223
0
          afi, safi, dest, bgp_path->name_pretty,
1224
0
          path->flags);
1225
0
    }
1226
1227
    /* Skip paths marked for removal or as history. */
1228
0
    if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1229
0
        || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1230
0
      continue;
1231
1232
    /* Copy the metric to the path. Will be used for bestpath
1233
     * computation */
1234
0
    bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
1235
0
    if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1236
0
      (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
1237
0
        bnc->metric;
1238
0
    else if (bpi_ultimate->extra)
1239
0
      bpi_ultimate->extra->igpmetric = 0;
1240
1241
0
    if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1242
0
        || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1243
0
        || path->attr->srte_color != 0)
1244
0
      SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1245
1246
0
    path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1247
0
    if (path->type == ZEBRA_ROUTE_BGP &&
1248
0
        path->sub_type == BGP_ROUTE_STATIC &&
1249
0
        !CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK))
1250
      /* static routes with 'no bgp network import-check' are
1251
       * always valid. if nht is called with static routes,
1252
       * the vpn exportation needs to be triggered
1253
       */
1254
0
      vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
1255
0
             path);
1256
0
    else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE &&
1257
0
       safi == SAFI_UNICAST &&
1258
0
       (bgp_path->inst_type == BGP_INSTANCE_TYPE_VRF ||
1259
0
        bgp_path->inst_type == BGP_INSTANCE_TYPE_DEFAULT))
1260
      /* redistribute routes are always valid
1261
       * if nht is called with redistribute routes, the vpn
1262
       * exportation needs to be triggered
1263
       */
1264
0
      vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
1265
0
             path);
1266
0
    else if (path_valid != bnc_is_valid_nexthop) {
1267
0
      if (path_valid) {
1268
        /* No longer valid, clear flag; also for EVPN
1269
         * routes, unimport from VRFs if needed.
1270
         */
1271
0
        bgp_aggregate_decrement(bgp_path, p, path, afi,
1272
0
              safi);
1273
0
        bgp_path_info_unset_flag(dest, path,
1274
0
               BGP_PATH_VALID);
1275
0
        if (safi == SAFI_EVPN &&
1276
0
            bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1277
0
          bgp_evpn_unimport_route(bgp_path,
1278
0
            afi, safi, bgp_dest_get_prefix(dest), path);
1279
0
        if (safi == SAFI_UNICAST &&
1280
0
            (bgp_path->inst_type !=
1281
0
             BGP_INSTANCE_TYPE_VIEW))
1282
0
          vpn_leak_from_vrf_withdraw(
1283
0
            bgp_get_default(), bgp_path,
1284
0
            path);
1285
0
      } else {
1286
        /* Path becomes valid, set flag; also for EVPN
1287
         * routes, import from VRFs if needed.
1288
         */
1289
0
        bgp_path_info_set_flag(dest, path,
1290
0
                   BGP_PATH_VALID);
1291
0
        bgp_aggregate_increment(bgp_path, p, path, afi,
1292
0
              safi);
1293
0
        if (safi == SAFI_EVPN &&
1294
0
            bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1295
0
          bgp_evpn_import_route(bgp_path,
1296
0
            afi, safi, bgp_dest_get_prefix(dest), path);
1297
0
        if (safi == SAFI_UNICAST &&
1298
0
            (bgp_path->inst_type !=
1299
0
             BGP_INSTANCE_TYPE_VIEW))
1300
0
          vpn_leak_from_vrf_update(
1301
0
            bgp_get_default(), bgp_path,
1302
0
            path);
1303
0
      }
1304
0
    }
1305
1306
0
    bgp_process(bgp_path, dest, afi, safi);
1307
0
  }
1308
1309
0
  if (peer) {
1310
0
    int valid_nexthops = bgp_isvalid_nexthop(bnc);
1311
1312
0
    if (valid_nexthops) {
1313
      /*
1314
       * Peering cannot occur across a blackhole nexthop
1315
       */
1316
0
      if (bnc->nexthop_num == 1 && bnc->nexthop
1317
0
          && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1318
0
        peer->last_reset = PEER_DOWN_WAITING_NHT;
1319
0
        valid_nexthops = 0;
1320
0
      } else
1321
0
        peer->last_reset = PEER_DOWN_WAITING_OPEN;
1322
0
    } else
1323
0
      peer->last_reset = PEER_DOWN_WAITING_NHT;
1324
1325
0
    if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1326
0
      if (BGP_DEBUG(nht, NHT))
1327
0
        zlog_debug(
1328
0
          "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1329
0
          __func__, peer->host,
1330
0
          peer->bgp->name_pretty,
1331
0
          !!valid_nexthops);
1332
0
      bgp_fsm_nht_update(peer, !!valid_nexthops);
1333
0
      SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1334
0
    }
1335
0
  }
1336
1337
0
  RESET_FLAG(bnc->change_flags);
1338
0
}
1339
1340
/**
1341
 * path_nh_map - make or break path-to-nexthop association.
1342
 * ARGUMENTS:
1343
 *   path - pointer to the path structure
1344
 *   bnc - pointer to the nexthop structure
1345
 *   make - if set, make the association. if unset, just break the existing
1346
 *          association.
1347
 */
1348
void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1349
     bool make)
1350
0
{
1351
0
  if (path->nexthop) {
1352
0
    LIST_REMOVE(path, nh_thread);
1353
0
    path->nexthop->path_count--;
1354
0
    path->nexthop = NULL;
1355
0
  }
1356
0
  if (make) {
1357
0
    LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1358
0
    path->nexthop = bnc;
1359
0
    path->nexthop->path_count++;
1360
0
  }
1361
0
}
1362
1363
/*
1364
 * This function is called to register nexthops to zebra
1365
 * as that we may have tried to install the nexthops
1366
 * before we actually have a zebra connection
1367
 */
1368
void bgp_nht_register_nexthops(struct bgp *bgp)
1369
0
{
1370
0
  for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1371
0
    struct bgp_nexthop_cache *bnc;
1372
1373
0
    frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1374
0
        bnc) {
1375
0
      register_zebra_rnh(bnc);
1376
0
    }
1377
0
  }
1378
0
}
1379
1380
void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1381
0
{
1382
0
  struct bgp *bgp;
1383
0
  struct bgp_nexthop_cache *bnc;
1384
0
  struct nexthop *nhop;
1385
0
  struct interface *ifp;
1386
0
  struct prefix p;
1387
0
  ifindex_t ifindex = 0;
1388
1389
0
  if (peer->ifp)
1390
0
    return;
1391
1392
0
  bgp = peer->bgp;
1393
0
  if (!sockunion2hostprefix(&peer->su, &p)) {
1394
0
    zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1395
0
        __func__, peer->host);
1396
0
    return;
1397
0
  }
1398
1399
0
  if (p.family != AF_INET6)
1400
0
    return;
1401
  /*
1402
   * Gather the ifindex for if up/down events to be
1403
   * tagged into this fun
1404
   */
1405
0
  if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1406
0
    ifindex = peer->su.sin6.sin6_scope_id;
1407
1408
0
  bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1409
0
  if (!bnc)
1410
0
    return;
1411
1412
0
  if (peer != bnc->nht_info)
1413
0
    return;
1414
1415
0
  for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1416
0
    ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1417
1418
0
    if (!ifp)
1419
0
      continue;
1420
1421
0
    zclient_send_interface_radv_req(zclient,
1422
0
            nhop->vrf_id,
1423
0
            ifp, true,
1424
0
            BGP_UNNUM_DEFAULT_RA_INTERVAL);
1425
0
  }
1426
0
}
1427
1428
void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1429
0
{
1430
0
  struct bgp *bgp;
1431
0
  struct bgp_nexthop_cache *bnc;
1432
0
  struct nexthop *nhop;
1433
0
  struct interface *ifp;
1434
0
  struct prefix p;
1435
0
  ifindex_t ifindex = 0;
1436
1437
0
  if (peer->ifp)
1438
0
    return;
1439
1440
0
  bgp = peer->bgp;
1441
1442
0
  if (!sockunion2hostprefix(&peer->su, &p)) {
1443
0
    zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1444
0
        __func__, peer->host);
1445
0
    return;
1446
0
  }
1447
1448
0
  if (p.family != AF_INET6)
1449
0
    return;
1450
  /*
1451
   * Gather the ifindex for if up/down events to be
1452
   * tagged into this fun
1453
   */
1454
0
  if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1455
0
    ifindex = peer->su.sin6.sin6_scope_id;
1456
1457
0
  bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1458
0
  if (!bnc)
1459
0
    return;
1460
1461
0
  if (peer != bnc->nht_info)
1462
0
    return;
1463
1464
0
  for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1465
0
    ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1466
1467
0
    if (!ifp)
1468
0
      continue;
1469
1470
0
    zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1471
0
            0);
1472
0
  }
1473
0
}
1474
1475
/****************************************************************************
1476
 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1477
 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1478
 * left to the application using it.
1479
 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1480
 * failover of remote ES links.
1481
 ***************************************************************************/
1482
static bitfield_t bgp_nh_id_bitmap;
1483
static uint32_t bgp_l3nhg_start;
1484
1485
/* XXX - currently we do nothing on the callbacks */
1486
static void bgp_l3nhg_add_cb(const char *name)
1487
0
{
1488
0
}
1489
1490
static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1491
0
{
1492
0
}
1493
1494
static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1495
             const struct nexthop *nhop)
1496
0
{
1497
0
}
1498
1499
static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1500
             const struct nexthop *nhop)
1501
0
{
1502
0
}
1503
1504
static void bgp_l3nhg_del_cb(const char *name)
1505
0
{
1506
0
}
1507
1508
static void bgp_l3nhg_zebra_init(void)
1509
0
{
1510
0
  static bool bgp_l3nhg_zebra_inited;
1511
0
  if (bgp_l3nhg_zebra_inited)
1512
0
    return;
1513
1514
0
  bgp_l3nhg_zebra_inited = true;
1515
0
  bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1516
0
  nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1517
0
         bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1518
0
         bgp_l3nhg_del_cb);
1519
0
}
1520
1521
1522
void bgp_l3nhg_init(void)
1523
0
{
1524
0
  uint32_t id_max;
1525
1526
0
  id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1527
0
  bf_init(bgp_nh_id_bitmap, id_max);
1528
0
  bf_assign_zero_index(bgp_nh_id_bitmap);
1529
1530
0
  if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1531
0
    zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1532
0
         bgp_l3nhg_start + id_max);
1533
0
}
1534
1535
void bgp_l3nhg_finish(void)
1536
0
{
1537
0
  bf_free(bgp_nh_id_bitmap);
1538
0
}
1539
1540
uint32_t bgp_l3nhg_id_alloc(void)
1541
0
{
1542
0
  uint32_t nhg_id = 0;
1543
1544
0
  bgp_l3nhg_zebra_init();
1545
0
  bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1546
0
  if (nhg_id)
1547
0
    nhg_id += bgp_l3nhg_start;
1548
1549
0
  return nhg_id;
1550
0
}
1551
1552
void bgp_l3nhg_id_free(uint32_t nhg_id)
1553
0
{
1554
0
  if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1555
0
    return;
1556
1557
0
  nhg_id -= bgp_l3nhg_start;
1558
1559
0
  bf_release_index(bgp_nh_id_bitmap, nhg_id);
1560
0
}