1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 58 #include <vm/uma.h> 59 60 /* 61 * This file contains control plane routing tables functions. 62 * 63 * All functions assumes they are called in net epoch. 64 */ 65 66 struct rib_subscription { 67 CK_STAILQ_ENTRY(rib_subscription) next; 68 rib_subscription_cb_t *func; 69 void *arg; 70 struct rib_head *rnh; 71 enum rib_subscription_type type; 72 struct epoch_context epoch_ctx; 73 }; 74 75 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 76 struct rib_cmd_info *rc); 77 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 78 struct rt_addrinfo *info, struct route_nhop_data *rnd, 79 struct rib_cmd_info *rc); 80 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 81 struct rib_cmd_info *rc); 82 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 84 85 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 86 struct rib_cmd_info *rc); 87 88 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 89 struct rib_cmd_info *rc); 90 91 static void destroy_subscription_epoch(epoch_context_t ctx); 92 #ifdef ROUTE_MPATH 93 static bool rib_can_multipath(struct rib_head *rh); 94 #endif 95 96 /* Per-vnet multipath routing configuration */ 97 SYSCTL_DECL(_net_route); 98 #define V_rib_route_multipath VNET(rib_route_multipath) 99 #ifdef ROUTE_MPATH 100 #define _MP_FLAGS CTLFLAG_RW 101 #else 102 #define _MP_FLAGS CTLFLAG_RD 103 #endif 104 VNET_DEFINE(u_int, rib_route_multipath) = 0; 105 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 106 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 107 #undef _MP_FLAGS 108 109 /* Routing table UMA zone */ 110 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 111 #define V_rtzone VNET(rtzone) 112 113 void 114 vnet_rtzone_init() 115 { 116 117 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 118 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 119 } 120 121 #ifdef VIMAGE 122 void 123 vnet_rtzone_destroy() 124 { 125 126 uma_zdestroy(V_rtzone); 127 } 128 #endif 129 130 static void 131 destroy_rtentry(struct rtentry *rt) 132 { 133 134 /* 135 * At this moment rnh, nh_control may be already freed. 136 * nhop interface may have been migrated to a different vnet. 137 * Use vnet stored in the nexthop to delete the entry. 138 */ 139 CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); 140 141 /* Unreference nexthop */ 142 nhop_free_any(rt->rt_nhop); 143 144 uma_zfree(V_rtzone, rt); 145 146 CURVNET_RESTORE(); 147 } 148 149 /* 150 * Epoch callback indicating rtentry is safe to destroy 151 */ 152 static void 153 destroy_rtentry_epoch(epoch_context_t ctx) 154 { 155 struct rtentry *rt; 156 157 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 158 159 destroy_rtentry(rt); 160 } 161 162 /* 163 * Schedule rtentry deletion 164 */ 165 static void 166 rtfree(struct rtentry *rt) 167 { 168 169 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 170 171 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 172 &rt->rt_epoch_ctx); 173 } 174 175 static struct rib_head * 176 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 177 { 178 struct rib_head *rnh; 179 struct sockaddr *dst; 180 181 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 182 183 dst = info->rti_info[RTAX_DST]; 184 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 185 186 return (rnh); 187 } 188 189 #ifdef ROUTE_MPATH 190 static bool 191 rib_can_multipath(struct rib_head *rh) 192 { 193 int result; 194 195 CURVNET_SET(rh->rib_vnet); 196 result = !!V_rib_route_multipath; 197 CURVNET_RESTORE(); 198 199 return (result); 200 } 201 202 /* 203 * Check is nhop is multipath-eligible. 204 * Avoid nhops without gateways and redirects. 205 * 206 * Returns 1 for multipath-eligible nexthop, 207 * 0 otherwise. 208 */ 209 bool 210 nhop_can_multipath(const struct nhop_object *nh) 211 { 212 213 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 214 return (1); 215 if ((nh->nh_flags & NHF_GATEWAY) == 0) 216 return (0); 217 if ((nh->nh_flags & NHF_REDIRECT) != 0) 218 return (0); 219 220 return (1); 221 } 222 #endif 223 224 static int 225 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 226 { 227 uint32_t weight; 228 229 if (info->rti_mflags & RTV_WEIGHT) 230 weight = info->rti_rmx->rmx_weight; 231 else 232 weight = default_weight; 233 /* Keep upper 1 byte for adm distance purposes */ 234 if (weight > RT_MAX_WEIGHT) 235 weight = RT_MAX_WEIGHT; 236 237 return (weight); 238 } 239 240 bool 241 rt_is_host(const struct rtentry *rt) 242 { 243 244 return (rt->rte_flags & RTF_HOST); 245 } 246 247 sa_family_t 248 rt_get_family(const struct rtentry *rt) 249 { 250 const struct sockaddr *dst; 251 252 dst = (const struct sockaddr *)rt_key_const(rt); 253 254 return (dst->sa_family); 255 } 256 257 /* 258 * Returns pointer to nexthop or nexthop group 259 * associated with @rt 260 */ 261 struct nhop_object * 262 rt_get_raw_nhop(const struct rtentry *rt) 263 { 264 265 return (rt->rt_nhop); 266 } 267 268 #ifdef INET 269 /* 270 * Stores IPv4 address and prefix length of @rt inside 271 * @paddr and @plen. 272 * @pscopeid is currently always set to 0. 273 */ 274 void 275 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 276 int *plen, uint32_t *pscopeid) 277 { 278 const struct sockaddr_in *dst; 279 280 dst = (const struct sockaddr_in *)rt_key_const(rt); 281 KASSERT((dst->sin_family == AF_INET), 282 ("rt family is %d, not inet", dst->sin_family)); 283 *paddr = dst->sin_addr; 284 dst = (const struct sockaddr_in *)rt_mask_const(rt); 285 if (dst == NULL) 286 *plen = 32; 287 else 288 *plen = bitcount32(dst->sin_addr.s_addr); 289 *pscopeid = 0; 290 } 291 292 /* 293 * Stores IPv4 address and prefix mask of @rt inside 294 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 295 * @pscopeid is currently always set to 0. 296 */ 297 void 298 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 299 struct in_addr *pmask, uint32_t *pscopeid) 300 { 301 const struct sockaddr_in *dst; 302 303 dst = (const struct sockaddr_in *)rt_key_const(rt); 304 KASSERT((dst->sin_family == AF_INET), 305 ("rt family is %d, not inet", dst->sin_family)); 306 *paddr = dst->sin_addr; 307 dst = (const struct sockaddr_in *)rt_mask_const(rt); 308 if (dst == NULL) 309 pmask->s_addr = INADDR_BROADCAST; 310 else 311 *pmask = dst->sin_addr; 312 *pscopeid = 0; 313 } 314 #endif 315 316 #ifdef INET6 317 static int 318 inet6_get_plen(const struct in6_addr *addr) 319 { 320 321 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 322 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 323 } 324 325 /* 326 * Stores IPv6 address and prefix length of @rt inside 327 * @paddr and @plen. Addresses are returned in de-embedded form. 328 * Scopeid is set to 0 for non-LL addresses. 329 */ 330 void 331 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 332 int *plen, uint32_t *pscopeid) 333 { 334 const struct sockaddr_in6 *dst; 335 336 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 337 KASSERT((dst->sin6_family == AF_INET6), 338 ("rt family is %d, not inet6", dst->sin6_family)); 339 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 340 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 341 else 342 *paddr = dst->sin6_addr; 343 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 344 if (dst == NULL) 345 *plen = 128; 346 else 347 *plen = inet6_get_plen(&dst->sin6_addr); 348 } 349 350 /* 351 * Stores IPv6 address and prefix mask of @rt inside 352 * @paddr and @pmask. Addresses are returned in de-embedded form. 353 * Scopeid is set to 0 for non-LL addresses. 354 */ 355 void 356 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 357 struct in6_addr *pmask, uint32_t *pscopeid) 358 { 359 const struct sockaddr_in6 *dst; 360 361 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 362 KASSERT((dst->sin6_family == AF_INET6), 363 ("rt family is %d, not inet", dst->sin6_family)); 364 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 365 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 366 else 367 *paddr = dst->sin6_addr; 368 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 369 if (dst == NULL) 370 memset(pmask, 0xFF, sizeof(struct in6_addr)); 371 else 372 *pmask = dst->sin6_addr; 373 } 374 #endif 375 376 static void 377 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info) 378 { 379 380 /* Kernel -> userland timebase conversion. */ 381 if (info->rti_mflags & RTV_EXPIRE) 382 rt->rt_expire = info->rti_rmx->rmx_expire ? 383 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 384 } 385 386 /* 387 * Check if specified @gw matches gw data in the nexthop @nh. 388 * 389 * Returns true if matches, false otherwise. 390 */ 391 bool 392 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 393 { 394 395 if (nh->gw_sa.sa_family != gw->sa_family) 396 return (false); 397 398 switch (gw->sa_family) { 399 case AF_INET: 400 return (nh->gw4_sa.sin_addr.s_addr == 401 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 402 case AF_INET6: 403 { 404 const struct sockaddr_in6 *gw6; 405 gw6 = (const struct sockaddr_in6 *)gw; 406 407 /* 408 * Currently (2020-09) IPv6 gws in kernel have their 409 * scope embedded. Once this becomes false, this code 410 * has to be revisited. 411 */ 412 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 413 &gw6->sin6_addr)) 414 return (true); 415 return (false); 416 } 417 case AF_LINK: 418 { 419 const struct sockaddr_dl *sdl; 420 sdl = (const struct sockaddr_dl *)gw; 421 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 422 } 423 default: 424 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 425 } 426 427 /* NOTREACHED */ 428 return (false); 429 } 430 431 /* 432 * Checks if data in @info matches nexhop @nh. 433 * 434 * Returns 0 on success, 435 * ESRCH if not matched, 436 * ENOENT if filter function returned false 437 */ 438 int 439 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 440 const struct nhop_object *nh) 441 { 442 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 443 444 if (info->rti_filter != NULL) { 445 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 446 return (ENOENT); 447 else 448 return (0); 449 } 450 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 451 return (ESRCH); 452 453 return (0); 454 } 455 456 /* 457 * Checks if nexhop @nh can be rewritten by data in @info because 458 * of higher "priority". Currently the only case for such scenario 459 * is kernel installing interface routes, marked by RTF_PINNED flag. 460 * 461 * Returns: 462 * 1 if @info data has higher priority 463 * 0 if priority is the same 464 * -1 if priority is lower 465 */ 466 int 467 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 468 { 469 470 if (info->rti_flags & RTF_PINNED) { 471 return (NH_IS_PINNED(nh)) ? 0 : 1; 472 } else { 473 return (NH_IS_PINNED(nh)) ? -1 : 0; 474 } 475 } 476 477 /* 478 * Runs exact prefix match based on @dst and @netmask. 479 * Returns matched @rtentry if found or NULL. 480 * If rtentry was found, saves nexthop / weight value into @rnd. 481 */ 482 static struct rtentry * 483 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 484 const struct sockaddr *netmask, struct route_nhop_data *rnd) 485 { 486 struct rtentry *rt; 487 488 RIB_LOCK_ASSERT(rnh); 489 490 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 491 __DECONST(void *, netmask), &rnh->head); 492 if (rt != NULL) { 493 rnd->rnd_nhop = rt->rt_nhop; 494 rnd->rnd_weight = rt->rt_weight; 495 } else { 496 rnd->rnd_nhop = NULL; 497 rnd->rnd_weight = 0; 498 } 499 500 return (rt); 501 } 502 503 /* 504 * Runs exact prefix match based on dst/netmask from @info. 505 * Assumes RIB lock is held. 506 * Returns matched @rtentry if found or NULL. 507 * If rtentry was found, saves nexthop / weight value into @rnd. 508 */ 509 struct rtentry * 510 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 511 struct route_nhop_data *rnd) 512 { 513 struct rtentry *rt; 514 515 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 516 info->rti_info[RTAX_NETMASK], rnd); 517 518 return (rt); 519 } 520 521 /* 522 * Adds route defined by @info into the kernel table specified by @fibnum and 523 * sa_family in @info->rti_info[RTAX_DST]. 524 * 525 * Returns 0 on success and fills in operation metadata into @rc. 526 */ 527 int 528 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 529 struct rib_cmd_info *rc) 530 { 531 struct rib_head *rnh; 532 int error; 533 534 NET_EPOCH_ASSERT(); 535 536 rnh = get_rnh(fibnum, info); 537 if (rnh == NULL) 538 return (EAFNOSUPPORT); 539 540 /* 541 * Check consistency between RTF_HOST flag and netmask 542 * existence. 543 */ 544 if (info->rti_flags & RTF_HOST) 545 info->rti_info[RTAX_NETMASK] = NULL; 546 else if (info->rti_info[RTAX_NETMASK] == NULL) 547 return (EINVAL); 548 549 bzero(rc, sizeof(struct rib_cmd_info)); 550 rc->rc_cmd = RTM_ADD; 551 552 error = add_route(rnh, info, rc); 553 if (error == 0) 554 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 555 556 return (error); 557 } 558 559 /* 560 * Creates rtentry and nexthop based on @info data. 561 * Return 0 and fills in rtentry into @prt on success, 562 * return errno otherwise. 563 */ 564 static int 565 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 566 struct rtentry **prt) 567 { 568 struct sockaddr *dst, *ndst, *gateway, *netmask; 569 struct rtentry *rt; 570 struct nhop_object *nh; 571 struct ifaddr *ifa; 572 int error, flags; 573 574 dst = info->rti_info[RTAX_DST]; 575 gateway = info->rti_info[RTAX_GATEWAY]; 576 netmask = info->rti_info[RTAX_NETMASK]; 577 flags = info->rti_flags; 578 579 if ((flags & RTF_GATEWAY) && !gateway) 580 return (EINVAL); 581 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 582 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 583 return (EINVAL); 584 585 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 586 return (EINVAL); 587 588 if (info->rti_ifa == NULL) { 589 error = rt_getifa_fib(info, rnh->rib_fibnum); 590 if (error) 591 return (error); 592 } else { 593 ifa_ref(info->rti_ifa); 594 } 595 596 error = nhop_create_from_info(rnh, info, &nh); 597 if (error != 0) { 598 ifa_free(info->rti_ifa); 599 return (error); 600 } 601 602 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 603 if (rt == NULL) { 604 ifa_free(info->rti_ifa); 605 nhop_free(nh); 606 return (ENOBUFS); 607 } 608 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 609 rt->rt_nhop = nh; 610 611 /* Fill in dst */ 612 memcpy(&rt->rt_dst, dst, dst->sa_len); 613 rt_key(rt) = &rt->rt_dst; 614 615 /* 616 * point to the (possibly newly malloc'd) dest address. 617 */ 618 ndst = (struct sockaddr *)rt_key(rt); 619 620 /* 621 * make sure it contains the value we want (masked if needed). 622 */ 623 if (netmask) { 624 rt_maskedcopy(dst, ndst, netmask); 625 } else 626 bcopy(dst, ndst, dst->sa_len); 627 628 /* 629 * We use the ifa reference returned by rt_getifa_fib(). 630 * This moved from below so that rnh->rnh_addaddr() can 631 * examine the ifa and ifa->ifa_ifp if it so desires. 632 */ 633 ifa = info->rti_ifa; 634 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 635 rt_set_expire_info(rt, info); 636 637 *prt = rt; 638 return (0); 639 } 640 641 static int 642 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 643 struct rib_cmd_info *rc) 644 { 645 struct nhop_object *nh_orig; 646 struct route_nhop_data rnd_orig, rnd_add; 647 struct nhop_object *nh; 648 struct rtentry *rt, *rt_orig; 649 int error; 650 651 error = create_rtentry(rnh, info, &rt); 652 if (error != 0) 653 return (error); 654 655 rnd_add.rnd_nhop = rt->rt_nhop; 656 rnd_add.rnd_weight = rt->rt_weight; 657 nh = rt->rt_nhop; 658 659 RIB_WLOCK(rnh); 660 error = add_route_nhop(rnh, rt, info, &rnd_add, rc); 661 if (error == 0) { 662 RIB_WUNLOCK(rnh); 663 return (0); 664 } 665 666 /* addition failed. Lookup prefix in the rib to determine the cause */ 667 rt_orig = lookup_prefix(rnh, info, &rnd_orig); 668 if (rt_orig == NULL) { 669 /* No prefix -> rnh_addaddr() failed to allocate memory */ 670 RIB_WUNLOCK(rnh); 671 nhop_free(nh); 672 uma_zfree(V_rtzone, rt); 673 return (ENOMEM); 674 } 675 676 /* We have existing route in the RIB. */ 677 nh_orig = rnd_orig.rnd_nhop; 678 /* Check if new route has higher preference */ 679 if (can_override_nhop(info, nh_orig) > 0) { 680 /* Update nexthop to the new route */ 681 change_route_nhop(rnh, rt_orig, info, &rnd_add, rc); 682 RIB_WUNLOCK(rnh); 683 uma_zfree(V_rtzone, rt); 684 nhop_free(nh_orig); 685 return (0); 686 } 687 688 RIB_WUNLOCK(rnh); 689 690 #ifdef ROUTE_MPATH 691 if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) && 692 nhop_can_multipath(rnd_orig.rnd_nhop)) 693 error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc); 694 else 695 #endif 696 /* Unable to add - another route with the same preference exists */ 697 error = EEXIST; 698 699 /* 700 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt. 701 * ROUTE_MPATH enabled: original nhop reference is unused in any case, 702 * free rt only if not _adding_ new route to rib (e.g. the case 703 * when initial lookup returned existing route, but then it got 704 * deleted prior to multipath group insertion, leading to a simple 705 * non-multipath add as a result). 706 */ 707 nhop_free(nh); 708 if ((error != 0) || rc->rc_cmd != RTM_ADD) 709 uma_zfree(V_rtzone, rt); 710 711 return (error); 712 } 713 714 /* 715 * Removes route defined by @info from the kernel table specified by @fibnum and 716 * sa_family in @info->rti_info[RTAX_DST]. 717 * 718 * Returns 0 on success and fills in operation metadata into @rc. 719 */ 720 int 721 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 722 { 723 struct rib_head *rnh; 724 struct sockaddr *dst_orig, *netmask; 725 struct sockaddr_storage mdst; 726 int error; 727 728 NET_EPOCH_ASSERT(); 729 730 rnh = get_rnh(fibnum, info); 731 if (rnh == NULL) 732 return (EAFNOSUPPORT); 733 734 bzero(rc, sizeof(struct rib_cmd_info)); 735 rc->rc_cmd = RTM_DELETE; 736 737 dst_orig = info->rti_info[RTAX_DST]; 738 netmask = info->rti_info[RTAX_NETMASK]; 739 740 if (netmask != NULL) { 741 /* Ensure @dst is always properly masked */ 742 if (dst_orig->sa_len > sizeof(mdst)) 743 return (EINVAL); 744 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 745 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 746 } 747 error = del_route(rnh, info, rc); 748 info->rti_info[RTAX_DST] = dst_orig; 749 750 return (error); 751 } 752 753 /* 754 * Conditionally unlinks rtentry matching data inside @info from @rnh. 755 * Returns 0 on success with operation result stored in @rc. 756 * On error, returns: 757 * ESRCH - if prefix was not found, 758 * EADDRINUSE - if trying to delete higher priority route. 759 * ENOENT - if supplied filter function returned 0 (not matched). 760 */ 761 static int 762 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 763 { 764 struct rtentry *rt; 765 struct nhop_object *nh; 766 struct radix_node *rn; 767 struct route_nhop_data rnd; 768 int error; 769 770 rt = lookup_prefix(rnh, info, &rnd); 771 if (rt == NULL) 772 return (ESRCH); 773 774 nh = rt->rt_nhop; 775 #ifdef ROUTE_MPATH 776 if (NH_IS_NHGRP(nh)) { 777 error = del_route_mpath(rnh, info, rt, 778 (struct nhgrp_object *)nh, rc); 779 return (error); 780 } 781 #endif 782 error = check_info_match_nhop(info, rt, nh); 783 if (error != 0) 784 return (error); 785 786 if (can_override_nhop(info, nh) < 0) 787 return (EADDRINUSE); 788 789 /* 790 * Remove the item from the tree and return it. 791 * Complain if it is not there and do no more processing. 792 */ 793 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 794 info->rti_info[RTAX_NETMASK], &rnh->head); 795 if (rn == NULL) 796 return (ESRCH); 797 798 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 799 panic ("rtrequest delete"); 800 801 rt = RNTORT(rn); 802 rt->rte_flags &= ~RTF_UP; 803 804 /* Finalize notification */ 805 rnh->rnh_gen++; 806 rnh->rnh_prefixes--; 807 808 rc->rc_cmd = RTM_DELETE; 809 rc->rc_rt = rt; 810 rc->rc_nh_old = rt->rt_nhop; 811 rc->rc_nh_weight = rt->rt_weight; 812 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 813 814 return (0); 815 } 816 817 static int 818 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 819 struct rib_cmd_info *rc) 820 { 821 int error; 822 823 RIB_WLOCK(rnh); 824 error = rt_unlinkrte(rnh, info, rc); 825 RIB_WUNLOCK(rnh); 826 if (error != 0) 827 return (error); 828 829 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 830 831 /* 832 * If the caller wants it, then it can have it, 833 * the entry will be deleted after the end of the current epoch. 834 */ 835 if (rc->rc_cmd == RTM_DELETE) 836 rtfree(rc->rc_rt); 837 #ifdef ROUTE_MPATH 838 else { 839 /* 840 * Deleting 1 path may result in RTM_CHANGE to 841 * a different mpath group/nhop. 842 * Free old mpath group. 843 */ 844 nhop_free_any(rc->rc_nh_old); 845 } 846 #endif 847 848 return (0); 849 } 850 851 int 852 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 853 struct rib_cmd_info *rc) 854 { 855 RIB_RLOCK_TRACKER; 856 struct route_nhop_data rnd_orig; 857 struct rib_head *rnh; 858 struct rtentry *rt; 859 int error; 860 861 NET_EPOCH_ASSERT(); 862 863 rnh = get_rnh(fibnum, info); 864 if (rnh == NULL) 865 return (EAFNOSUPPORT); 866 867 bzero(rc, sizeof(struct rib_cmd_info)); 868 rc->rc_cmd = RTM_CHANGE; 869 870 /* Check if updated gateway exists */ 871 if ((info->rti_flags & RTF_GATEWAY) && 872 (info->rti_info[RTAX_GATEWAY] == NULL)) { 873 874 /* 875 * route(8) adds RTF_GATEWAY flag if -interface is not set. 876 * Remove RTF_GATEWAY to enforce consistency and maintain 877 * compatibility.. 878 */ 879 info->rti_flags &= ~RTF_GATEWAY; 880 } 881 882 /* 883 * route change is done in multiple steps, with dropping and 884 * reacquiring lock. In the situations with multiple processes 885 * changes the same route in can lead to the case when route 886 * is changed between the steps. Address it by retrying the operation 887 * multiple times before failing. 888 */ 889 890 RIB_RLOCK(rnh); 891 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 892 info->rti_info[RTAX_NETMASK], &rnh->head); 893 894 if (rt == NULL) { 895 RIB_RUNLOCK(rnh); 896 return (ESRCH); 897 } 898 899 rnd_orig.rnd_nhop = rt->rt_nhop; 900 rnd_orig.rnd_weight = rt->rt_weight; 901 902 RIB_RUNLOCK(rnh); 903 904 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 905 error = change_route(rnh, info, &rnd_orig, rc); 906 if (error != EAGAIN) 907 break; 908 } 909 910 return (error); 911 } 912 913 static int 914 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 915 struct nhop_object *nh_orig, struct nhop_object **nh_new) 916 { 917 int free_ifa = 0; 918 int error; 919 920 /* 921 * New gateway could require new ifaddr, ifp; 922 * flags may also be different; ifp may be specified 923 * by ll sockaddr when protocol address is ambiguous 924 */ 925 if (((nh_orig->nh_flags & NHF_GATEWAY) && 926 info->rti_info[RTAX_GATEWAY] != NULL) || 927 info->rti_info[RTAX_IFP] != NULL || 928 (info->rti_info[RTAX_IFA] != NULL && 929 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 930 error = rt_getifa_fib(info, rnh->rib_fibnum); 931 if (info->rti_ifa != NULL) 932 free_ifa = 1; 933 934 if (error != 0) { 935 if (free_ifa) { 936 ifa_free(info->rti_ifa); 937 info->rti_ifa = NULL; 938 } 939 940 return (error); 941 } 942 } 943 944 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 945 if (free_ifa) { 946 ifa_free(info->rti_ifa); 947 info->rti_ifa = NULL; 948 } 949 950 return (error); 951 } 952 953 #ifdef ROUTE_MPATH 954 static int 955 change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info, 956 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 957 { 958 int error = 0; 959 struct nhop_object *nh, *nh_orig, *nh_new; 960 struct route_nhop_data rnd_new; 961 962 nh = NULL; 963 nh_orig = rnd_orig->rnd_nhop; 964 965 struct weightened_nhop *wn = NULL, *wn_new; 966 uint32_t num_nhops; 967 968 wn = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, &num_nhops); 969 nh_orig = NULL; 970 for (int i = 0; i < num_nhops; i++) { 971 if (check_info_match_nhop(info, NULL, wn[i].nh)) { 972 nh_orig = wn[i].nh; 973 break; 974 } 975 } 976 977 if (nh_orig == NULL) 978 return (ESRCH); 979 980 error = change_nhop(rnh, info, nh_orig, &nh_new); 981 if (error != 0) 982 return (error); 983 984 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 985 M_TEMP, M_NOWAIT | M_ZERO); 986 if (wn_new == NULL) { 987 nhop_free(nh_new); 988 return (EAGAIN); 989 } 990 991 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 992 for (int i = 0; i < num_nhops; i++) { 993 if (wn[i].nh == nh_orig) { 994 wn[i].nh = nh_new; 995 wn[i].weight = get_info_weight(info, rnd_orig->rnd_weight); 996 break; 997 } 998 } 999 1000 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new); 1001 nhop_free(nh_new); 1002 free(wn_new, M_TEMP); 1003 1004 if (error != 0) 1005 return (error); 1006 1007 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1008 1009 return (error); 1010 } 1011 #endif 1012 1013 static int 1014 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1015 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 1016 { 1017 int error = 0; 1018 struct nhop_object *nh, *nh_orig; 1019 struct route_nhop_data rnd_new; 1020 1021 nh = NULL; 1022 nh_orig = rnd_orig->rnd_nhop; 1023 if (nh_orig == NULL) 1024 return (ESRCH); 1025 1026 #ifdef ROUTE_MPATH 1027 if (NH_IS_NHGRP(nh_orig)) 1028 return (change_mpath_route(rnh, info, rnd_orig, rc)); 1029 #endif 1030 1031 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1032 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1033 if (error != 0) 1034 return (error); 1035 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1036 1037 return (error); 1038 } 1039 1040 /* 1041 * Insert @rt with nhop data from @rnd_new to @rnh. 1042 * Returns 0 on success and stores operation results in @rc. 1043 */ 1044 static int 1045 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1046 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1047 struct rib_cmd_info *rc) 1048 { 1049 struct sockaddr *ndst, *netmask; 1050 struct radix_node *rn; 1051 int error = 0; 1052 1053 RIB_WLOCK_ASSERT(rnh); 1054 1055 ndst = (struct sockaddr *)rt_key(rt); 1056 netmask = info->rti_info[RTAX_NETMASK]; 1057 1058 rt->rt_nhop = rnd->rnd_nhop; 1059 rt->rt_weight = rnd->rnd_weight; 1060 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1061 1062 if (rn != NULL) { 1063 if (rt->rt_expire > 0) 1064 tmproutes_update(rnh, rt); 1065 1066 /* Finalize notification */ 1067 rnh->rnh_gen++; 1068 rnh->rnh_prefixes++; 1069 1070 rc->rc_cmd = RTM_ADD; 1071 rc->rc_rt = rt; 1072 rc->rc_nh_old = NULL; 1073 rc->rc_nh_new = rnd->rnd_nhop; 1074 rc->rc_nh_weight = rnd->rnd_weight; 1075 1076 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1077 } else { 1078 /* Existing route or memory allocation failure */ 1079 error = EEXIST; 1080 } 1081 1082 return (error); 1083 } 1084 1085 /* 1086 * Switch @rt nhop/weigh to the ones specified in @rnd. 1087 * Conditionally set rt_expire if set in @info. 1088 * Returns 0 on success. 1089 */ 1090 int 1091 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1092 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1093 struct rib_cmd_info *rc) 1094 { 1095 struct nhop_object *nh_orig; 1096 1097 RIB_WLOCK_ASSERT(rnh); 1098 1099 nh_orig = rt->rt_nhop; 1100 1101 if (rnd->rnd_nhop != NULL) { 1102 /* Changing expiration & nexthop & weight to a new one */ 1103 rt_set_expire_info(rt, info); 1104 rt->rt_nhop = rnd->rnd_nhop; 1105 rt->rt_weight = rnd->rnd_weight; 1106 if (rt->rt_expire > 0) 1107 tmproutes_update(rnh, rt); 1108 } else { 1109 /* Route deletion requested. */ 1110 struct sockaddr *ndst, *netmask; 1111 struct radix_node *rn; 1112 1113 ndst = (struct sockaddr *)rt_key(rt); 1114 netmask = info->rti_info[RTAX_NETMASK]; 1115 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 1116 if (rn == NULL) 1117 return (ESRCH); 1118 rt = RNTORT(rn); 1119 rt->rte_flags &= ~RTF_UP; 1120 } 1121 1122 /* Finalize notification */ 1123 rnh->rnh_gen++; 1124 if (rnd->rnd_nhop == NULL) 1125 rnh->rnh_prefixes--; 1126 1127 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 1128 rc->rc_rt = rt; 1129 rc->rc_nh_old = nh_orig; 1130 rc->rc_nh_new = rnd->rnd_nhop; 1131 rc->rc_nh_weight = rnd->rnd_weight; 1132 1133 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1134 1135 return (0); 1136 } 1137 1138 /* 1139 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1140 * consistent with the current route data. 1141 * Nexthop in @nhd_new is consumed. 1142 */ 1143 int 1144 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1145 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1146 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 1147 { 1148 struct rtentry *rt_new; 1149 int error = 0; 1150 1151 RIB_WLOCK(rnh); 1152 1153 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1154 info->rti_info[RTAX_NETMASK], &rnh->head); 1155 1156 if (rt_new == NULL) { 1157 if (rnd_orig->rnd_nhop == NULL) 1158 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 1159 else { 1160 /* 1161 * Prefix does not exist, which was not our assumption. 1162 * Update @rnd_orig with the new data and return 1163 */ 1164 rnd_orig->rnd_nhop = NULL; 1165 rnd_orig->rnd_weight = 0; 1166 error = EAGAIN; 1167 } 1168 } else { 1169 /* Prefix exists, try to update */ 1170 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1171 /* 1172 * Nhop/mpath group hasn't changed. Flip 1173 * to the new precalculated one and return 1174 */ 1175 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 1176 } else { 1177 /* Update and retry */ 1178 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1179 rnd_orig->rnd_weight = rt_new->rt_weight; 1180 error = EAGAIN; 1181 } 1182 } 1183 1184 RIB_WUNLOCK(rnh); 1185 1186 if (error == 0) { 1187 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1188 1189 if (rnd_orig->rnd_nhop != NULL) 1190 nhop_free_any(rnd_orig->rnd_nhop); 1191 1192 } else { 1193 if (rnd_new->rnd_nhop != NULL) 1194 nhop_free_any(rnd_new->rnd_nhop); 1195 } 1196 1197 return (error); 1198 } 1199 1200 /* 1201 * Performs modification of routing table specificed by @action. 1202 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1203 * Needs to be run in network epoch. 1204 * 1205 * Returns 0 on success and fills in @rc with action result. 1206 */ 1207 int 1208 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1209 struct rib_cmd_info *rc) 1210 { 1211 int error; 1212 1213 switch (action) { 1214 case RTM_ADD: 1215 error = rib_add_route(fibnum, info, rc); 1216 break; 1217 case RTM_DELETE: 1218 error = rib_del_route(fibnum, info, rc); 1219 break; 1220 case RTM_CHANGE: 1221 error = rib_change_route(fibnum, info, rc); 1222 break; 1223 default: 1224 error = ENOTSUP; 1225 } 1226 1227 return (error); 1228 } 1229 1230 struct rt_delinfo 1231 { 1232 struct rt_addrinfo info; 1233 struct rib_head *rnh; 1234 struct rtentry *head; 1235 struct rib_cmd_info rc; 1236 }; 1237 1238 /* 1239 * Conditionally unlinks @rn from radix tree based 1240 * on info data passed in @arg. 1241 */ 1242 static int 1243 rt_checkdelroute(struct radix_node *rn, void *arg) 1244 { 1245 struct rt_delinfo *di; 1246 struct rt_addrinfo *info; 1247 struct rtentry *rt; 1248 int error; 1249 1250 di = (struct rt_delinfo *)arg; 1251 rt = (struct rtentry *)rn; 1252 info = &di->info; 1253 1254 info->rti_info[RTAX_DST] = rt_key(rt); 1255 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 1256 1257 error = rt_unlinkrte(di->rnh, info, &di->rc); 1258 1259 /* 1260 * Add deleted rtentries to the list to GC them 1261 * after dropping the lock. 1262 * 1263 * XXX: Delayed notifications not implemented 1264 * for nexthop updates. 1265 */ 1266 if ((error == 0) && (di->rc.rc_cmd == RTM_DELETE)) { 1267 /* Add to the list and return */ 1268 rt->rt_chain = di->head; 1269 di->head = rt; 1270 } 1271 1272 return (0); 1273 } 1274 1275 /* 1276 * Iterates over a routing table specified by @fibnum and @family and 1277 * deletes elements marked by @filter_f. 1278 * @fibnum: rtable id 1279 * @family: AF_ address family 1280 * @filter_f: function returning non-zero value for items to delete 1281 * @arg: data to pass to the @filter_f function 1282 * @report: true if rtsock notification is needed. 1283 */ 1284 void 1285 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report) 1286 { 1287 struct rib_head *rnh; 1288 struct rt_delinfo di; 1289 struct rtentry *rt; 1290 struct nhop_object *nh; 1291 struct epoch_tracker et; 1292 1293 rnh = rt_tables_get_rnh(fibnum, family); 1294 if (rnh == NULL) 1295 return; 1296 1297 bzero(&di, sizeof(di)); 1298 di.info.rti_filter = filter_f; 1299 di.info.rti_filterdata = arg; 1300 di.rnh = rnh; 1301 di.rc.rc_cmd = RTM_DELETE; 1302 1303 NET_EPOCH_ENTER(et); 1304 1305 RIB_WLOCK(rnh); 1306 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1307 RIB_WUNLOCK(rnh); 1308 1309 /* We might have something to reclaim. */ 1310 bzero(&di.rc, sizeof(di.rc)); 1311 di.rc.rc_cmd = RTM_DELETE; 1312 while (di.head != NULL) { 1313 rt = di.head; 1314 di.head = rt->rt_chain; 1315 rt->rt_chain = NULL; 1316 nh = rt->rt_nhop; 1317 1318 di.rc.rc_rt = rt; 1319 di.rc.rc_nh_old = nh; 1320 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1321 1322 /* TODO std rt -> rt_addrinfo export */ 1323 di.info.rti_info[RTAX_DST] = rt_key(rt); 1324 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1325 1326 if (report) { 1327 #ifdef ROUTE_MPATH 1328 struct nhgrp_object *nhg; 1329 struct weightened_nhop *wn; 1330 uint32_t num_nhops; 1331 if (NH_IS_NHGRP(nh)) { 1332 nhg = (struct nhgrp_object *)nh; 1333 wn = nhgrp_get_nhops(nhg, &num_nhops); 1334 for (int i = 0; i < num_nhops; i++) 1335 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1336 } else 1337 #endif 1338 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1339 } 1340 rtfree(rt); 1341 } 1342 1343 NET_EPOCH_EXIT(et); 1344 } 1345 1346 static void 1347 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1348 struct rib_cmd_info *rc) 1349 { 1350 struct rib_subscription *rs; 1351 1352 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1353 if (rs->type == type) 1354 rs->func(rnh, rc, rs->arg); 1355 } 1356 } 1357 1358 static struct rib_subscription * 1359 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1360 enum rib_subscription_type type, bool waitok) 1361 { 1362 struct rib_subscription *rs; 1363 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT); 1364 1365 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1366 if (rs == NULL) 1367 return (NULL); 1368 1369 rs->func = f; 1370 rs->arg = arg; 1371 rs->type = type; 1372 1373 return (rs); 1374 } 1375 1376 /* 1377 * Subscribe for the changes in the routing table specified by @fibnum and 1378 * @family. 1379 * 1380 * Returns pointer to the subscription structure on success. 1381 */ 1382 struct rib_subscription * 1383 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1384 enum rib_subscription_type type, bool waitok) 1385 { 1386 struct rib_head *rnh; 1387 struct epoch_tracker et; 1388 1389 NET_EPOCH_ENTER(et); 1390 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1391 rnh = rt_tables_get_rnh(fibnum, family); 1392 NET_EPOCH_EXIT(et); 1393 1394 return (rib_subscribe_internal(rnh, f, arg, type, waitok)); 1395 } 1396 1397 struct rib_subscription * 1398 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1399 enum rib_subscription_type type, bool waitok) 1400 { 1401 struct rib_subscription *rs; 1402 struct epoch_tracker et; 1403 1404 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1405 return (NULL); 1406 rs->rnh = rnh; 1407 1408 NET_EPOCH_ENTER(et); 1409 RIB_WLOCK(rnh); 1410 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1411 RIB_WUNLOCK(rnh); 1412 NET_EPOCH_EXIT(et); 1413 1414 return (rs); 1415 } 1416 1417 struct rib_subscription * 1418 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1419 enum rib_subscription_type type) 1420 { 1421 struct rib_subscription *rs; 1422 1423 NET_EPOCH_ASSERT(); 1424 RIB_WLOCK_ASSERT(rnh); 1425 1426 if ((rs = allocate_subscription(f, arg, type, false)) == NULL) 1427 return (NULL); 1428 rs->rnh = rnh; 1429 1430 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1431 1432 return (rs); 1433 } 1434 1435 /* 1436 * Remove rtable subscription @rs from the routing table. 1437 * Needs to be run in network epoch. 1438 */ 1439 void 1440 rib_unsibscribe(struct rib_subscription *rs) 1441 { 1442 struct rib_head *rnh = rs->rnh; 1443 1444 NET_EPOCH_ASSERT(); 1445 1446 RIB_WLOCK(rnh); 1447 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1448 RIB_WUNLOCK(rnh); 1449 1450 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1451 &rs->epoch_ctx); 1452 } 1453 1454 void 1455 rib_unsibscribe_locked(struct rib_subscription *rs) 1456 { 1457 struct rib_head *rnh = rs->rnh; 1458 1459 NET_EPOCH_ASSERT(); 1460 RIB_WLOCK_ASSERT(rnh); 1461 1462 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1463 1464 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1465 &rs->epoch_ctx); 1466 } 1467 1468 /* 1469 * Epoch callback indicating subscription is safe to destroy 1470 */ 1471 static void 1472 destroy_subscription_epoch(epoch_context_t ctx) 1473 { 1474 struct rib_subscription *rs; 1475 1476 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1477 1478 free(rs, M_RTABLE); 1479 } 1480 1481 void 1482 rib_init_subscriptions(struct rib_head *rnh) 1483 { 1484 1485 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1486 } 1487 1488 void 1489 rib_destroy_subscriptions(struct rib_head *rnh) 1490 { 1491 struct rib_subscription *rs; 1492 struct epoch_tracker et; 1493 1494 NET_EPOCH_ENTER(et); 1495 RIB_WLOCK(rnh); 1496 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1497 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1498 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1499 &rs->epoch_ctx); 1500 } 1501 RIB_WUNLOCK(rnh); 1502 NET_EPOCH_EXIT(et); 1503 } 1504