1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 58 #include <vm/uma.h> 59 60 /* 61 * This file contains control plane routing tables functions. 62 * 63 * All functions assumes they are called in net epoch. 64 */ 65 66 struct rib_subscription { 67 CK_STAILQ_ENTRY(rib_subscription) next; 68 rib_subscription_cb_t *func; 69 void *arg; 70 struct rib_head *rnh; 71 enum rib_subscription_type type; 72 struct epoch_context epoch_ctx; 73 }; 74 75 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 76 struct rib_cmd_info *rc); 77 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 78 struct rt_addrinfo *info, struct route_nhop_data *rnd, 79 struct rib_cmd_info *rc); 80 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 81 struct rib_cmd_info *rc); 82 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 84 85 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 86 struct rib_cmd_info *rc); 87 88 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 89 struct rib_cmd_info *rc); 90 91 static void destroy_subscription_epoch(epoch_context_t ctx); 92 #ifdef ROUTE_MPATH 93 static bool rib_can_multipath(struct rib_head *rh); 94 #endif 95 96 /* Per-vnet multipath routing configuration */ 97 SYSCTL_DECL(_net_route); 98 #define V_rib_route_multipath VNET(rib_route_multipath) 99 #ifdef ROUTE_MPATH 100 #define _MP_FLAGS CTLFLAG_RW 101 #else 102 #define _MP_FLAGS CTLFLAG_RD 103 #endif 104 VNET_DEFINE(u_int, rib_route_multipath) = 1; 105 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 106 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 107 #undef _MP_FLAGS 108 109 #if defined(INET) && defined(INET6) 110 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 111 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 112 VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1; 113 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 114 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 115 #endif 116 117 /* Routing table UMA zone */ 118 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 119 #define V_rtzone VNET(rtzone) 120 121 void 122 vnet_rtzone_init() 123 { 124 125 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 126 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 127 } 128 129 #ifdef VIMAGE 130 void 131 vnet_rtzone_destroy() 132 { 133 134 uma_zdestroy(V_rtzone); 135 } 136 #endif 137 138 static void 139 destroy_rtentry(struct rtentry *rt) 140 { 141 #ifdef VIMAGE 142 struct nhop_object *nh = rt->rt_nhop; 143 144 /* 145 * At this moment rnh, nh_control may be already freed. 146 * nhop interface may have been migrated to a different vnet. 147 * Use vnet stored in the nexthop to delete the entry. 148 */ 149 #ifdef ROUTE_MPATH 150 if (NH_IS_NHGRP(nh)) { 151 struct weightened_nhop *wn; 152 uint32_t num_nhops; 153 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops); 154 nh = wn[0].nh; 155 } 156 #endif 157 CURVNET_SET(nhop_get_vnet(nh)); 158 #endif 159 160 /* Unreference nexthop */ 161 nhop_free_any(rt->rt_nhop); 162 163 uma_zfree(V_rtzone, rt); 164 165 CURVNET_RESTORE(); 166 } 167 168 /* 169 * Epoch callback indicating rtentry is safe to destroy 170 */ 171 static void 172 destroy_rtentry_epoch(epoch_context_t ctx) 173 { 174 struct rtentry *rt; 175 176 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 177 178 destroy_rtentry(rt); 179 } 180 181 /* 182 * Schedule rtentry deletion 183 */ 184 static void 185 rtfree(struct rtentry *rt) 186 { 187 188 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 189 190 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 191 &rt->rt_epoch_ctx); 192 } 193 194 static struct rib_head * 195 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 196 { 197 struct rib_head *rnh; 198 struct sockaddr *dst; 199 200 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 201 202 dst = info->rti_info[RTAX_DST]; 203 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 204 205 return (rnh); 206 } 207 208 #if defined(INET) && defined(INET6) 209 static bool 210 rib_can_ipv6_nexthop_address(struct rib_head *rh) 211 { 212 int result; 213 214 CURVNET_SET(rh->rib_vnet); 215 result = !!V_rib_route_ipv6_nexthop; 216 CURVNET_RESTORE(); 217 218 return (result); 219 } 220 #endif 221 222 #ifdef ROUTE_MPATH 223 static bool 224 rib_can_multipath(struct rib_head *rh) 225 { 226 int result; 227 228 CURVNET_SET(rh->rib_vnet); 229 result = !!V_rib_route_multipath; 230 CURVNET_RESTORE(); 231 232 return (result); 233 } 234 235 /* 236 * Check is nhop is multipath-eligible. 237 * Avoid nhops without gateways and redirects. 238 * 239 * Returns 1 for multipath-eligible nexthop, 240 * 0 otherwise. 241 */ 242 bool 243 nhop_can_multipath(const struct nhop_object *nh) 244 { 245 246 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 247 return (1); 248 if ((nh->nh_flags & NHF_GATEWAY) == 0) 249 return (0); 250 if ((nh->nh_flags & NHF_REDIRECT) != 0) 251 return (0); 252 253 return (1); 254 } 255 #endif 256 257 static int 258 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 259 { 260 uint32_t weight; 261 262 if (info->rti_mflags & RTV_WEIGHT) 263 weight = info->rti_rmx->rmx_weight; 264 else 265 weight = default_weight; 266 /* Keep upper 1 byte for adm distance purposes */ 267 if (weight > RT_MAX_WEIGHT) 268 weight = RT_MAX_WEIGHT; 269 270 return (weight); 271 } 272 273 bool 274 rt_is_host(const struct rtentry *rt) 275 { 276 277 return (rt->rte_flags & RTF_HOST); 278 } 279 280 sa_family_t 281 rt_get_family(const struct rtentry *rt) 282 { 283 const struct sockaddr *dst; 284 285 dst = (const struct sockaddr *)rt_key_const(rt); 286 287 return (dst->sa_family); 288 } 289 290 /* 291 * Returns pointer to nexthop or nexthop group 292 * associated with @rt 293 */ 294 struct nhop_object * 295 rt_get_raw_nhop(const struct rtentry *rt) 296 { 297 298 return (rt->rt_nhop); 299 } 300 301 #ifdef INET 302 /* 303 * Stores IPv4 address and prefix length of @rt inside 304 * @paddr and @plen. 305 * @pscopeid is currently always set to 0. 306 */ 307 void 308 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 309 int *plen, uint32_t *pscopeid) 310 { 311 const struct sockaddr_in *dst; 312 313 dst = (const struct sockaddr_in *)rt_key_const(rt); 314 KASSERT((dst->sin_family == AF_INET), 315 ("rt family is %d, not inet", dst->sin_family)); 316 *paddr = dst->sin_addr; 317 dst = (const struct sockaddr_in *)rt_mask_const(rt); 318 if (dst == NULL) 319 *plen = 32; 320 else 321 *plen = bitcount32(dst->sin_addr.s_addr); 322 *pscopeid = 0; 323 } 324 325 /* 326 * Stores IPv4 address and prefix mask of @rt inside 327 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 328 * @pscopeid is currently always set to 0. 329 */ 330 void 331 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 332 struct in_addr *pmask, uint32_t *pscopeid) 333 { 334 const struct sockaddr_in *dst; 335 336 dst = (const struct sockaddr_in *)rt_key_const(rt); 337 KASSERT((dst->sin_family == AF_INET), 338 ("rt family is %d, not inet", dst->sin_family)); 339 *paddr = dst->sin_addr; 340 dst = (const struct sockaddr_in *)rt_mask_const(rt); 341 if (dst == NULL) 342 pmask->s_addr = INADDR_BROADCAST; 343 else 344 *pmask = dst->sin_addr; 345 *pscopeid = 0; 346 } 347 #endif 348 349 #ifdef INET6 350 static int 351 inet6_get_plen(const struct in6_addr *addr) 352 { 353 354 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 355 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 356 } 357 358 /* 359 * Stores IPv6 address and prefix length of @rt inside 360 * @paddr and @plen. Addresses are returned in de-embedded form. 361 * Scopeid is set to 0 for non-LL addresses. 362 */ 363 void 364 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 365 int *plen, uint32_t *pscopeid) 366 { 367 const struct sockaddr_in6 *dst; 368 369 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 370 KASSERT((dst->sin6_family == AF_INET6), 371 ("rt family is %d, not inet6", dst->sin6_family)); 372 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 373 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 374 else 375 *paddr = dst->sin6_addr; 376 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 377 if (dst == NULL) 378 *plen = 128; 379 else 380 *plen = inet6_get_plen(&dst->sin6_addr); 381 } 382 383 /* 384 * Stores IPv6 address and prefix mask of @rt inside 385 * @paddr and @pmask. Addresses are returned in de-embedded form. 386 * Scopeid is set to 0 for non-LL addresses. 387 */ 388 void 389 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 390 struct in6_addr *pmask, uint32_t *pscopeid) 391 { 392 const struct sockaddr_in6 *dst; 393 394 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 395 KASSERT((dst->sin6_family == AF_INET6), 396 ("rt family is %d, not inet", dst->sin6_family)); 397 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 398 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 399 else 400 *paddr = dst->sin6_addr; 401 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 402 if (dst == NULL) 403 memset(pmask, 0xFF, sizeof(struct in6_addr)); 404 else 405 *pmask = dst->sin6_addr; 406 } 407 #endif 408 409 static void 410 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info) 411 { 412 413 /* Kernel -> userland timebase conversion. */ 414 if (info->rti_mflags & RTV_EXPIRE) 415 rt->rt_expire = info->rti_rmx->rmx_expire ? 416 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 417 } 418 419 /* 420 * Check if specified @gw matches gw data in the nexthop @nh. 421 * 422 * Returns true if matches, false otherwise. 423 */ 424 bool 425 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 426 { 427 428 if (nh->gw_sa.sa_family != gw->sa_family) 429 return (false); 430 431 switch (gw->sa_family) { 432 case AF_INET: 433 return (nh->gw4_sa.sin_addr.s_addr == 434 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 435 case AF_INET6: 436 { 437 const struct sockaddr_in6 *gw6; 438 gw6 = (const struct sockaddr_in6 *)gw; 439 440 /* 441 * Currently (2020-09) IPv6 gws in kernel have their 442 * scope embedded. Once this becomes false, this code 443 * has to be revisited. 444 */ 445 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 446 &gw6->sin6_addr)) 447 return (true); 448 return (false); 449 } 450 case AF_LINK: 451 { 452 const struct sockaddr_dl *sdl; 453 sdl = (const struct sockaddr_dl *)gw; 454 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 455 } 456 default: 457 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 458 } 459 460 /* NOTREACHED */ 461 return (false); 462 } 463 464 /* 465 * Checks if data in @info matches nexhop @nh. 466 * 467 * Returns 0 on success, 468 * ESRCH if not matched, 469 * ENOENT if filter function returned false 470 */ 471 int 472 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 473 const struct nhop_object *nh) 474 { 475 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 476 477 if (info->rti_filter != NULL) { 478 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 479 return (ENOENT); 480 else 481 return (0); 482 } 483 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 484 return (ESRCH); 485 486 return (0); 487 } 488 489 /* 490 * Checks if nexhop @nh can be rewritten by data in @info because 491 * of higher "priority". Currently the only case for such scenario 492 * is kernel installing interface routes, marked by RTF_PINNED flag. 493 * 494 * Returns: 495 * 1 if @info data has higher priority 496 * 0 if priority is the same 497 * -1 if priority is lower 498 */ 499 int 500 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 501 { 502 503 if (info->rti_flags & RTF_PINNED) { 504 return (NH_IS_PINNED(nh)) ? 0 : 1; 505 } else { 506 return (NH_IS_PINNED(nh)) ? -1 : 0; 507 } 508 } 509 510 /* 511 * Runs exact prefix match based on @dst and @netmask. 512 * Returns matched @rtentry if found or NULL. 513 * If rtentry was found, saves nexthop / weight value into @rnd. 514 */ 515 static struct rtentry * 516 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 517 const struct sockaddr *netmask, struct route_nhop_data *rnd) 518 { 519 struct rtentry *rt; 520 521 RIB_LOCK_ASSERT(rnh); 522 523 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 524 __DECONST(void *, netmask), &rnh->head); 525 if (rt != NULL) { 526 rnd->rnd_nhop = rt->rt_nhop; 527 rnd->rnd_weight = rt->rt_weight; 528 } else { 529 rnd->rnd_nhop = NULL; 530 rnd->rnd_weight = 0; 531 } 532 533 return (rt); 534 } 535 536 /* 537 * Runs exact prefix match based on dst/netmask from @info. 538 * Assumes RIB lock is held. 539 * Returns matched @rtentry if found or NULL. 540 * If rtentry was found, saves nexthop / weight value into @rnd. 541 */ 542 struct rtentry * 543 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 544 struct route_nhop_data *rnd) 545 { 546 struct rtentry *rt; 547 548 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 549 info->rti_info[RTAX_NETMASK], rnd); 550 551 return (rt); 552 } 553 554 /* 555 * Adds route defined by @info into the kernel table specified by @fibnum and 556 * sa_family in @info->rti_info[RTAX_DST]. 557 * 558 * Returns 0 on success and fills in operation metadata into @rc. 559 */ 560 int 561 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 562 struct rib_cmd_info *rc) 563 { 564 struct rib_head *rnh; 565 int error; 566 567 NET_EPOCH_ASSERT(); 568 569 rnh = get_rnh(fibnum, info); 570 if (rnh == NULL) 571 return (EAFNOSUPPORT); 572 573 /* 574 * Check consistency between RTF_HOST flag and netmask 575 * existence. 576 */ 577 if (info->rti_flags & RTF_HOST) 578 info->rti_info[RTAX_NETMASK] = NULL; 579 else if (info->rti_info[RTAX_NETMASK] == NULL) 580 return (EINVAL); 581 582 bzero(rc, sizeof(struct rib_cmd_info)); 583 rc->rc_cmd = RTM_ADD; 584 585 error = add_route(rnh, info, rc); 586 if (error == 0) 587 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 588 589 return (error); 590 } 591 592 /* 593 * Checks if @dst and @gateway is valid combination. 594 * 595 * Returns true if is valid, false otherwise. 596 */ 597 static bool 598 check_gateway(struct rib_head *rnh, struct sockaddr *dst, 599 struct sockaddr *gateway) 600 { 601 if (dst->sa_family == gateway->sa_family) 602 return (true); 603 else if (gateway->sa_family == AF_UNSPEC) 604 return (true); 605 else if (gateway->sa_family == AF_LINK) 606 return (true); 607 #if defined(INET) && defined(INET6) 608 else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 && 609 rib_can_ipv6_nexthop_address(rnh)) 610 return (true); 611 #endif 612 else 613 return (false); 614 } 615 616 /* 617 * Creates rtentry and nexthop based on @info data. 618 * Return 0 and fills in rtentry into @prt on success, 619 * return errno otherwise. 620 */ 621 static int 622 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 623 struct rtentry **prt) 624 { 625 struct sockaddr *dst, *ndst, *gateway, *netmask; 626 struct rtentry *rt; 627 struct nhop_object *nh; 628 struct ifaddr *ifa; 629 int error, flags; 630 631 dst = info->rti_info[RTAX_DST]; 632 gateway = info->rti_info[RTAX_GATEWAY]; 633 netmask = info->rti_info[RTAX_NETMASK]; 634 flags = info->rti_flags; 635 636 if ((flags & RTF_GATEWAY) && !gateway) 637 return (EINVAL); 638 if (dst && gateway && !check_gateway(rnh, dst, gateway)) 639 return (EINVAL); 640 641 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 642 return (EINVAL); 643 644 if (info->rti_ifa == NULL) { 645 error = rt_getifa_fib(info, rnh->rib_fibnum); 646 if (error) 647 return (error); 648 } 649 650 error = nhop_create_from_info(rnh, info, &nh); 651 if (error != 0) 652 return (error); 653 654 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 655 if (rt == NULL) { 656 nhop_free(nh); 657 return (ENOBUFS); 658 } 659 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 660 rt->rt_nhop = nh; 661 662 /* Fill in dst */ 663 memcpy(&rt->rt_dst, dst, dst->sa_len); 664 rt_key(rt) = &rt->rt_dst; 665 666 /* 667 * point to the (possibly newly malloc'd) dest address. 668 */ 669 ndst = (struct sockaddr *)rt_key(rt); 670 671 /* 672 * make sure it contains the value we want (masked if needed). 673 */ 674 if (netmask) { 675 rt_maskedcopy(dst, ndst, netmask); 676 } else 677 bcopy(dst, ndst, dst->sa_len); 678 679 /* 680 * We use the ifa reference returned by rt_getifa_fib(). 681 * This moved from below so that rnh->rnh_addaddr() can 682 * examine the ifa and ifa->ifa_ifp if it so desires. 683 */ 684 ifa = info->rti_ifa; 685 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 686 rt_set_expire_info(rt, info); 687 688 *prt = rt; 689 return (0); 690 } 691 692 static int 693 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 694 struct rib_cmd_info *rc) 695 { 696 struct nhop_object *nh_orig; 697 struct route_nhop_data rnd_orig, rnd_add; 698 struct nhop_object *nh; 699 struct rtentry *rt, *rt_orig; 700 int error; 701 702 error = create_rtentry(rnh, info, &rt); 703 if (error != 0) 704 return (error); 705 706 rnd_add.rnd_nhop = rt->rt_nhop; 707 rnd_add.rnd_weight = rt->rt_weight; 708 nh = rt->rt_nhop; 709 710 RIB_WLOCK(rnh); 711 error = add_route_nhop(rnh, rt, info, &rnd_add, rc); 712 if (error == 0) { 713 RIB_WUNLOCK(rnh); 714 return (0); 715 } 716 717 /* addition failed. Lookup prefix in the rib to determine the cause */ 718 rt_orig = lookup_prefix(rnh, info, &rnd_orig); 719 if (rt_orig == NULL) { 720 /* No prefix -> rnh_addaddr() failed to allocate memory */ 721 RIB_WUNLOCK(rnh); 722 nhop_free(nh); 723 uma_zfree(V_rtzone, rt); 724 return (ENOMEM); 725 } 726 727 /* We have existing route in the RIB. */ 728 nh_orig = rnd_orig.rnd_nhop; 729 /* Check if new route has higher preference */ 730 if (can_override_nhop(info, nh_orig) > 0) { 731 /* Update nexthop to the new route */ 732 change_route_nhop(rnh, rt_orig, info, &rnd_add, rc); 733 RIB_WUNLOCK(rnh); 734 uma_zfree(V_rtzone, rt); 735 nhop_free(nh_orig); 736 return (0); 737 } 738 739 RIB_WUNLOCK(rnh); 740 741 #ifdef ROUTE_MPATH 742 if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) && 743 nhop_can_multipath(rnd_orig.rnd_nhop)) 744 error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc); 745 else 746 #endif 747 /* Unable to add - another route with the same preference exists */ 748 error = EEXIST; 749 750 /* 751 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt. 752 * ROUTE_MPATH enabled: original nhop reference is unused in any case, 753 * free rt only if not _adding_ new route to rib (e.g. the case 754 * when initial lookup returned existing route, but then it got 755 * deleted prior to multipath group insertion, leading to a simple 756 * non-multipath add as a result). 757 */ 758 nhop_free(nh); 759 if ((error != 0) || rc->rc_cmd != RTM_ADD) 760 uma_zfree(V_rtzone, rt); 761 762 return (error); 763 } 764 765 /* 766 * Removes route defined by @info from the kernel table specified by @fibnum and 767 * sa_family in @info->rti_info[RTAX_DST]. 768 * 769 * Returns 0 on success and fills in operation metadata into @rc. 770 */ 771 int 772 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 773 { 774 struct rib_head *rnh; 775 struct sockaddr *dst_orig, *netmask; 776 struct sockaddr_storage mdst; 777 int error; 778 779 NET_EPOCH_ASSERT(); 780 781 rnh = get_rnh(fibnum, info); 782 if (rnh == NULL) 783 return (EAFNOSUPPORT); 784 785 bzero(rc, sizeof(struct rib_cmd_info)); 786 rc->rc_cmd = RTM_DELETE; 787 788 dst_orig = info->rti_info[RTAX_DST]; 789 netmask = info->rti_info[RTAX_NETMASK]; 790 791 if (netmask != NULL) { 792 /* Ensure @dst is always properly masked */ 793 if (dst_orig->sa_len > sizeof(mdst)) 794 return (EINVAL); 795 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 796 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 797 } 798 error = del_route(rnh, info, rc); 799 info->rti_info[RTAX_DST] = dst_orig; 800 801 return (error); 802 } 803 804 /* 805 * Conditionally unlinks rtentry matching data inside @info from @rnh. 806 * Returns 0 on success with operation result stored in @rc. 807 * On error, returns: 808 * ESRCH - if prefix was not found, 809 * EADDRINUSE - if trying to delete higher priority route. 810 * ENOENT - if supplied filter function returned 0 (not matched). 811 */ 812 static int 813 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 814 { 815 struct rtentry *rt; 816 struct nhop_object *nh; 817 struct radix_node *rn; 818 struct route_nhop_data rnd; 819 int error; 820 821 rt = lookup_prefix(rnh, info, &rnd); 822 if (rt == NULL) 823 return (ESRCH); 824 825 nh = rt->rt_nhop; 826 #ifdef ROUTE_MPATH 827 if (NH_IS_NHGRP(nh)) { 828 error = del_route_mpath(rnh, info, rt, 829 (struct nhgrp_object *)nh, rc); 830 return (error); 831 } 832 #endif 833 error = check_info_match_nhop(info, rt, nh); 834 if (error != 0) 835 return (error); 836 837 if (can_override_nhop(info, nh) < 0) 838 return (EADDRINUSE); 839 840 /* 841 * Remove the item from the tree and return it. 842 * Complain if it is not there and do no more processing. 843 */ 844 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 845 info->rti_info[RTAX_NETMASK], &rnh->head); 846 if (rn == NULL) 847 return (ESRCH); 848 849 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 850 panic ("rtrequest delete"); 851 852 rt = RNTORT(rn); 853 rt->rte_flags &= ~RTF_UP; 854 855 /* Finalize notification */ 856 rib_bump_gen(rnh); 857 rnh->rnh_prefixes--; 858 859 rc->rc_cmd = RTM_DELETE; 860 rc->rc_rt = rt; 861 rc->rc_nh_old = rt->rt_nhop; 862 rc->rc_nh_weight = rt->rt_weight; 863 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 864 865 return (0); 866 } 867 868 static int 869 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 870 struct rib_cmd_info *rc) 871 { 872 int error; 873 874 RIB_WLOCK(rnh); 875 error = rt_unlinkrte(rnh, info, rc); 876 RIB_WUNLOCK(rnh); 877 if (error != 0) 878 return (error); 879 880 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 881 882 /* 883 * If the caller wants it, then it can have it, 884 * the entry will be deleted after the end of the current epoch. 885 */ 886 if (rc->rc_cmd == RTM_DELETE) 887 rtfree(rc->rc_rt); 888 #ifdef ROUTE_MPATH 889 else { 890 /* 891 * Deleting 1 path may result in RTM_CHANGE to 892 * a different mpath group/nhop. 893 * Free old mpath group. 894 */ 895 nhop_free_any(rc->rc_nh_old); 896 } 897 #endif 898 899 return (0); 900 } 901 902 int 903 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 904 struct rib_cmd_info *rc) 905 { 906 RIB_RLOCK_TRACKER; 907 struct route_nhop_data rnd_orig; 908 struct rib_head *rnh; 909 struct rtentry *rt; 910 int error; 911 912 NET_EPOCH_ASSERT(); 913 914 rnh = get_rnh(fibnum, info); 915 if (rnh == NULL) 916 return (EAFNOSUPPORT); 917 918 bzero(rc, sizeof(struct rib_cmd_info)); 919 rc->rc_cmd = RTM_CHANGE; 920 921 /* Check if updated gateway exists */ 922 if ((info->rti_flags & RTF_GATEWAY) && 923 (info->rti_info[RTAX_GATEWAY] == NULL)) { 924 925 /* 926 * route(8) adds RTF_GATEWAY flag if -interface is not set. 927 * Remove RTF_GATEWAY to enforce consistency and maintain 928 * compatibility.. 929 */ 930 info->rti_flags &= ~RTF_GATEWAY; 931 } 932 933 /* 934 * route change is done in multiple steps, with dropping and 935 * reacquiring lock. In the situations with multiple processes 936 * changes the same route in can lead to the case when route 937 * is changed between the steps. Address it by retrying the operation 938 * multiple times before failing. 939 */ 940 941 RIB_RLOCK(rnh); 942 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 943 info->rti_info[RTAX_NETMASK], &rnh->head); 944 945 if (rt == NULL) { 946 RIB_RUNLOCK(rnh); 947 return (ESRCH); 948 } 949 950 rnd_orig.rnd_nhop = rt->rt_nhop; 951 rnd_orig.rnd_weight = rt->rt_weight; 952 953 RIB_RUNLOCK(rnh); 954 955 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 956 error = change_route(rnh, info, &rnd_orig, rc); 957 if (error != EAGAIN) 958 break; 959 } 960 961 return (error); 962 } 963 964 static int 965 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 966 struct nhop_object *nh_orig, struct nhop_object **nh_new) 967 { 968 int error; 969 970 /* 971 * New gateway could require new ifaddr, ifp; 972 * flags may also be different; ifp may be specified 973 * by ll sockaddr when protocol address is ambiguous 974 */ 975 if (((nh_orig->nh_flags & NHF_GATEWAY) && 976 info->rti_info[RTAX_GATEWAY] != NULL) || 977 info->rti_info[RTAX_IFP] != NULL || 978 (info->rti_info[RTAX_IFA] != NULL && 979 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 980 error = rt_getifa_fib(info, rnh->rib_fibnum); 981 982 if (error != 0) { 983 info->rti_ifa = NULL; 984 return (error); 985 } 986 } 987 988 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 989 info->rti_ifa = NULL; 990 991 return (error); 992 } 993 994 #ifdef ROUTE_MPATH 995 static int 996 change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info, 997 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 998 { 999 int error = 0; 1000 struct nhop_object *nh, *nh_orig, *nh_new; 1001 struct route_nhop_data rnd_new; 1002 1003 nh = NULL; 1004 nh_orig = rnd_orig->rnd_nhop; 1005 1006 struct weightened_nhop *wn = NULL, *wn_new; 1007 uint32_t num_nhops; 1008 1009 wn = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, &num_nhops); 1010 nh_orig = NULL; 1011 for (int i = 0; i < num_nhops; i++) { 1012 if (check_info_match_nhop(info, NULL, wn[i].nh)) { 1013 nh_orig = wn[i].nh; 1014 break; 1015 } 1016 } 1017 1018 if (nh_orig == NULL) 1019 return (ESRCH); 1020 1021 error = change_nhop(rnh, info, nh_orig, &nh_new); 1022 if (error != 0) 1023 return (error); 1024 1025 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1026 M_TEMP, M_NOWAIT | M_ZERO); 1027 if (wn_new == NULL) { 1028 nhop_free(nh_new); 1029 return (EAGAIN); 1030 } 1031 1032 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1033 for (int i = 0; i < num_nhops; i++) { 1034 if (wn[i].nh == nh_orig) { 1035 wn[i].nh = nh_new; 1036 wn[i].weight = get_info_weight(info, rnd_orig->rnd_weight); 1037 break; 1038 } 1039 } 1040 1041 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new); 1042 nhop_free(nh_new); 1043 free(wn_new, M_TEMP); 1044 1045 if (error != 0) 1046 return (error); 1047 1048 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1049 1050 return (error); 1051 } 1052 #endif 1053 1054 static int 1055 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1056 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 1057 { 1058 int error = 0; 1059 struct nhop_object *nh, *nh_orig; 1060 struct route_nhop_data rnd_new; 1061 1062 nh = NULL; 1063 nh_orig = rnd_orig->rnd_nhop; 1064 if (nh_orig == NULL) 1065 return (ESRCH); 1066 1067 #ifdef ROUTE_MPATH 1068 if (NH_IS_NHGRP(nh_orig)) 1069 return (change_mpath_route(rnh, info, rnd_orig, rc)); 1070 #endif 1071 1072 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1073 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1074 if (error != 0) 1075 return (error); 1076 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1077 1078 return (error); 1079 } 1080 1081 /* 1082 * Insert @rt with nhop data from @rnd_new to @rnh. 1083 * Returns 0 on success and stores operation results in @rc. 1084 */ 1085 static int 1086 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1087 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1088 struct rib_cmd_info *rc) 1089 { 1090 struct sockaddr *ndst, *netmask; 1091 struct radix_node *rn; 1092 int error = 0; 1093 1094 RIB_WLOCK_ASSERT(rnh); 1095 1096 ndst = (struct sockaddr *)rt_key(rt); 1097 netmask = info->rti_info[RTAX_NETMASK]; 1098 1099 rt->rt_nhop = rnd->rnd_nhop; 1100 rt->rt_weight = rnd->rnd_weight; 1101 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1102 1103 if (rn != NULL) { 1104 if (rt->rt_expire > 0) 1105 tmproutes_update(rnh, rt); 1106 1107 /* Finalize notification */ 1108 rib_bump_gen(rnh); 1109 rnh->rnh_prefixes++; 1110 1111 rc->rc_cmd = RTM_ADD; 1112 rc->rc_rt = rt; 1113 rc->rc_nh_old = NULL; 1114 rc->rc_nh_new = rnd->rnd_nhop; 1115 rc->rc_nh_weight = rnd->rnd_weight; 1116 1117 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1118 } else { 1119 /* Existing route or memory allocation failure */ 1120 error = EEXIST; 1121 } 1122 1123 return (error); 1124 } 1125 1126 /* 1127 * Switch @rt nhop/weigh to the ones specified in @rnd. 1128 * Conditionally set rt_expire if set in @info. 1129 * Returns 0 on success. 1130 */ 1131 int 1132 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1133 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1134 struct rib_cmd_info *rc) 1135 { 1136 struct nhop_object *nh_orig; 1137 1138 RIB_WLOCK_ASSERT(rnh); 1139 1140 nh_orig = rt->rt_nhop; 1141 1142 if (rnd->rnd_nhop != NULL) { 1143 /* Changing expiration & nexthop & weight to a new one */ 1144 rt_set_expire_info(rt, info); 1145 rt->rt_nhop = rnd->rnd_nhop; 1146 rt->rt_weight = rnd->rnd_weight; 1147 if (rt->rt_expire > 0) 1148 tmproutes_update(rnh, rt); 1149 } else { 1150 /* Route deletion requested. */ 1151 struct sockaddr *ndst, *netmask; 1152 struct radix_node *rn; 1153 1154 ndst = (struct sockaddr *)rt_key(rt); 1155 netmask = info->rti_info[RTAX_NETMASK]; 1156 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 1157 if (rn == NULL) 1158 return (ESRCH); 1159 rt = RNTORT(rn); 1160 rt->rte_flags &= ~RTF_UP; 1161 } 1162 1163 /* Finalize notification */ 1164 rib_bump_gen(rnh); 1165 if (rnd->rnd_nhop == NULL) 1166 rnh->rnh_prefixes--; 1167 1168 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 1169 rc->rc_rt = rt; 1170 rc->rc_nh_old = nh_orig; 1171 rc->rc_nh_new = rnd->rnd_nhop; 1172 rc->rc_nh_weight = rnd->rnd_weight; 1173 1174 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1175 1176 return (0); 1177 } 1178 1179 /* 1180 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1181 * consistent with the current route data. 1182 * Nexthop in @nhd_new is consumed. 1183 */ 1184 int 1185 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1186 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1187 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 1188 { 1189 struct rtentry *rt_new; 1190 int error = 0; 1191 1192 RIB_WLOCK(rnh); 1193 1194 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1195 info->rti_info[RTAX_NETMASK], &rnh->head); 1196 1197 if (rt_new == NULL) { 1198 if (rnd_orig->rnd_nhop == NULL) 1199 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 1200 else { 1201 /* 1202 * Prefix does not exist, which was not our assumption. 1203 * Update @rnd_orig with the new data and return 1204 */ 1205 rnd_orig->rnd_nhop = NULL; 1206 rnd_orig->rnd_weight = 0; 1207 error = EAGAIN; 1208 } 1209 } else { 1210 /* Prefix exists, try to update */ 1211 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1212 /* 1213 * Nhop/mpath group hasn't changed. Flip 1214 * to the new precalculated one and return 1215 */ 1216 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 1217 } else { 1218 /* Update and retry */ 1219 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1220 rnd_orig->rnd_weight = rt_new->rt_weight; 1221 error = EAGAIN; 1222 } 1223 } 1224 1225 RIB_WUNLOCK(rnh); 1226 1227 if (error == 0) { 1228 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1229 1230 if (rnd_orig->rnd_nhop != NULL) 1231 nhop_free_any(rnd_orig->rnd_nhop); 1232 1233 } else { 1234 if (rnd_new->rnd_nhop != NULL) 1235 nhop_free_any(rnd_new->rnd_nhop); 1236 } 1237 1238 return (error); 1239 } 1240 1241 /* 1242 * Performs modification of routing table specificed by @action. 1243 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1244 * Needs to be run in network epoch. 1245 * 1246 * Returns 0 on success and fills in @rc with action result. 1247 */ 1248 int 1249 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1250 struct rib_cmd_info *rc) 1251 { 1252 int error; 1253 1254 switch (action) { 1255 case RTM_ADD: 1256 error = rib_add_route(fibnum, info, rc); 1257 break; 1258 case RTM_DELETE: 1259 error = rib_del_route(fibnum, info, rc); 1260 break; 1261 case RTM_CHANGE: 1262 error = rib_change_route(fibnum, info, rc); 1263 break; 1264 default: 1265 error = ENOTSUP; 1266 } 1267 1268 return (error); 1269 } 1270 1271 struct rt_delinfo 1272 { 1273 struct rt_addrinfo info; 1274 struct rib_head *rnh; 1275 struct rtentry *head; 1276 struct rib_cmd_info rc; 1277 }; 1278 1279 /* 1280 * Conditionally unlinks @rn from radix tree based 1281 * on info data passed in @arg. 1282 */ 1283 static int 1284 rt_checkdelroute(struct radix_node *rn, void *arg) 1285 { 1286 struct rt_delinfo *di; 1287 struct rt_addrinfo *info; 1288 struct rtentry *rt; 1289 1290 di = (struct rt_delinfo *)arg; 1291 rt = (struct rtentry *)rn; 1292 info = &di->info; 1293 1294 info->rti_info[RTAX_DST] = rt_key(rt); 1295 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 1296 1297 if (rt_unlinkrte(di->rnh, info, &di->rc) != 0) 1298 return (0); 1299 1300 /* 1301 * Add deleted rtentries to the list to GC them 1302 * after dropping the lock. 1303 * 1304 * XXX: Delayed notifications not implemented 1305 * for nexthop updates. 1306 */ 1307 if (di->rc.rc_cmd == RTM_DELETE) { 1308 /* Add to the list and return */ 1309 rt->rt_chain = di->head; 1310 di->head = rt; 1311 #ifdef ROUTE_MPATH 1312 } else { 1313 /* 1314 * RTM_CHANGE to a diferent nexthop or nexthop group. 1315 * Free old multipath group. 1316 */ 1317 nhop_free_any(di->rc.rc_nh_old); 1318 #endif 1319 } 1320 1321 return (0); 1322 } 1323 1324 /* 1325 * Iterates over a routing table specified by @fibnum and @family and 1326 * deletes elements marked by @filter_f. 1327 * @fibnum: rtable id 1328 * @family: AF_ address family 1329 * @filter_f: function returning non-zero value for items to delete 1330 * @arg: data to pass to the @filter_f function 1331 * @report: true if rtsock notification is needed. 1332 */ 1333 void 1334 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report) 1335 { 1336 struct rib_head *rnh; 1337 struct rt_delinfo di; 1338 struct rtentry *rt; 1339 struct nhop_object *nh; 1340 struct epoch_tracker et; 1341 1342 rnh = rt_tables_get_rnh(fibnum, family); 1343 if (rnh == NULL) 1344 return; 1345 1346 bzero(&di, sizeof(di)); 1347 di.info.rti_filter = filter_f; 1348 di.info.rti_filterdata = arg; 1349 di.rnh = rnh; 1350 di.rc.rc_cmd = RTM_DELETE; 1351 1352 NET_EPOCH_ENTER(et); 1353 1354 RIB_WLOCK(rnh); 1355 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1356 RIB_WUNLOCK(rnh); 1357 1358 /* We might have something to reclaim. */ 1359 bzero(&di.rc, sizeof(di.rc)); 1360 di.rc.rc_cmd = RTM_DELETE; 1361 while (di.head != NULL) { 1362 rt = di.head; 1363 di.head = rt->rt_chain; 1364 rt->rt_chain = NULL; 1365 nh = rt->rt_nhop; 1366 1367 di.rc.rc_rt = rt; 1368 di.rc.rc_nh_old = nh; 1369 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1370 1371 /* TODO std rt -> rt_addrinfo export */ 1372 di.info.rti_info[RTAX_DST] = rt_key(rt); 1373 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1374 1375 if (report) { 1376 #ifdef ROUTE_MPATH 1377 struct nhgrp_object *nhg; 1378 struct weightened_nhop *wn; 1379 uint32_t num_nhops; 1380 if (NH_IS_NHGRP(nh)) { 1381 nhg = (struct nhgrp_object *)nh; 1382 wn = nhgrp_get_nhops(nhg, &num_nhops); 1383 for (int i = 0; i < num_nhops; i++) 1384 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1385 } else 1386 #endif 1387 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1388 } 1389 rtfree(rt); 1390 } 1391 1392 NET_EPOCH_EXIT(et); 1393 } 1394 1395 static int 1396 rt_delete_unconditional(struct radix_node *rn, void *arg) 1397 { 1398 struct rtentry *rt = RNTORT(rn); 1399 struct rib_head *rnh = (struct rib_head *)arg; 1400 1401 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1402 if (RNTORT(rn) == rt) 1403 rtfree(rt); 1404 1405 return (0); 1406 } 1407 1408 /* 1409 * Removes all routes from the routing table without executing notifications. 1410 * rtentres will be removed after the end of a current epoch. 1411 */ 1412 static void 1413 rib_flush_routes(struct rib_head *rnh) 1414 { 1415 RIB_WLOCK(rnh); 1416 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1417 RIB_WUNLOCK(rnh); 1418 } 1419 1420 void 1421 rib_flush_routes_family(int family) 1422 { 1423 struct rib_head *rnh; 1424 1425 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1426 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1427 rib_flush_routes(rnh); 1428 } 1429 } 1430 1431 static void 1432 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1433 struct rib_cmd_info *rc) 1434 { 1435 struct rib_subscription *rs; 1436 1437 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1438 if (rs->type == type) 1439 rs->func(rnh, rc, rs->arg); 1440 } 1441 } 1442 1443 static struct rib_subscription * 1444 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1445 enum rib_subscription_type type, bool waitok) 1446 { 1447 struct rib_subscription *rs; 1448 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT); 1449 1450 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1451 if (rs == NULL) 1452 return (NULL); 1453 1454 rs->func = f; 1455 rs->arg = arg; 1456 rs->type = type; 1457 1458 return (rs); 1459 } 1460 1461 /* 1462 * Subscribe for the changes in the routing table specified by @fibnum and 1463 * @family. 1464 * 1465 * Returns pointer to the subscription structure on success. 1466 */ 1467 struct rib_subscription * 1468 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1469 enum rib_subscription_type type, bool waitok) 1470 { 1471 struct rib_head *rnh; 1472 struct epoch_tracker et; 1473 1474 NET_EPOCH_ENTER(et); 1475 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1476 rnh = rt_tables_get_rnh(fibnum, family); 1477 NET_EPOCH_EXIT(et); 1478 1479 return (rib_subscribe_internal(rnh, f, arg, type, waitok)); 1480 } 1481 1482 struct rib_subscription * 1483 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1484 enum rib_subscription_type type, bool waitok) 1485 { 1486 struct rib_subscription *rs; 1487 struct epoch_tracker et; 1488 1489 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1490 return (NULL); 1491 rs->rnh = rnh; 1492 1493 NET_EPOCH_ENTER(et); 1494 RIB_WLOCK(rnh); 1495 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1496 RIB_WUNLOCK(rnh); 1497 NET_EPOCH_EXIT(et); 1498 1499 return (rs); 1500 } 1501 1502 struct rib_subscription * 1503 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1504 enum rib_subscription_type type) 1505 { 1506 struct rib_subscription *rs; 1507 1508 NET_EPOCH_ASSERT(); 1509 RIB_WLOCK_ASSERT(rnh); 1510 1511 if ((rs = allocate_subscription(f, arg, type, false)) == NULL) 1512 return (NULL); 1513 rs->rnh = rnh; 1514 1515 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1516 1517 return (rs); 1518 } 1519 1520 /* 1521 * Remove rtable subscription @rs from the routing table. 1522 * Needs to be run in network epoch. 1523 */ 1524 void 1525 rib_unsubscribe(struct rib_subscription *rs) 1526 { 1527 struct rib_head *rnh = rs->rnh; 1528 1529 NET_EPOCH_ASSERT(); 1530 1531 RIB_WLOCK(rnh); 1532 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1533 RIB_WUNLOCK(rnh); 1534 1535 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1536 &rs->epoch_ctx); 1537 } 1538 1539 void 1540 rib_unsubscribe_locked(struct rib_subscription *rs) 1541 { 1542 struct rib_head *rnh = rs->rnh; 1543 1544 NET_EPOCH_ASSERT(); 1545 RIB_WLOCK_ASSERT(rnh); 1546 1547 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1548 1549 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1550 &rs->epoch_ctx); 1551 } 1552 1553 /* 1554 * Epoch callback indicating subscription is safe to destroy 1555 */ 1556 static void 1557 destroy_subscription_epoch(epoch_context_t ctx) 1558 { 1559 struct rib_subscription *rs; 1560 1561 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1562 1563 free(rs, M_RTABLE); 1564 } 1565 1566 void 1567 rib_init_subscriptions(struct rib_head *rnh) 1568 { 1569 1570 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1571 } 1572 1573 void 1574 rib_destroy_subscriptions(struct rib_head *rnh) 1575 { 1576 struct rib_subscription *rs; 1577 struct epoch_tracker et; 1578 1579 NET_EPOCH_ENTER(et); 1580 RIB_WLOCK(rnh); 1581 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1582 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1583 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1584 &rs->epoch_ctx); 1585 } 1586 RIB_WUNLOCK(rnh); 1587 NET_EPOCH_EXIT(et); 1588 } 1589