1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 58 #include <vm/uma.h> 59 60 /* 61 * This file contains control plane routing tables functions. 62 * 63 * All functions assumes they are called in net epoch. 64 */ 65 66 struct rib_subscription { 67 CK_STAILQ_ENTRY(rib_subscription) next; 68 rib_subscription_cb_t *func; 69 void *arg; 70 struct rib_head *rnh; 71 enum rib_subscription_type type; 72 struct epoch_context epoch_ctx; 73 }; 74 75 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 76 struct rib_cmd_info *rc); 77 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 78 struct rt_addrinfo *info, struct route_nhop_data *rnd, 79 struct rib_cmd_info *rc); 80 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 81 struct rib_cmd_info *rc); 82 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 84 85 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 86 struct rib_cmd_info *rc); 87 88 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 89 struct rib_cmd_info *rc); 90 91 static void destroy_subscription_epoch(epoch_context_t ctx); 92 #ifdef ROUTE_MPATH 93 static bool rib_can_multipath(struct rib_head *rh); 94 #endif 95 96 /* Per-vnet multipath routing configuration */ 97 SYSCTL_DECL(_net_route); 98 #define V_rib_route_multipath VNET(rib_route_multipath) 99 #ifdef ROUTE_MPATH 100 #define _MP_FLAGS CTLFLAG_RW 101 #else 102 #define _MP_FLAGS CTLFLAG_RD 103 #endif 104 VNET_DEFINE(u_int, rib_route_multipath) = 1; 105 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 106 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 107 #undef _MP_FLAGS 108 109 /* Routing table UMA zone */ 110 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 111 #define V_rtzone VNET(rtzone) 112 113 void 114 vnet_rtzone_init() 115 { 116 117 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 118 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 119 } 120 121 #ifdef VIMAGE 122 void 123 vnet_rtzone_destroy() 124 { 125 126 uma_zdestroy(V_rtzone); 127 } 128 #endif 129 130 static void 131 destroy_rtentry(struct rtentry *rt) 132 { 133 134 /* 135 * At this moment rnh, nh_control may be already freed. 136 * nhop interface may have been migrated to a different vnet. 137 * Use vnet stored in the nexthop to delete the entry. 138 */ 139 CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); 140 141 /* Unreference nexthop */ 142 nhop_free_any(rt->rt_nhop); 143 144 uma_zfree(V_rtzone, rt); 145 146 CURVNET_RESTORE(); 147 } 148 149 /* 150 * Epoch callback indicating rtentry is safe to destroy 151 */ 152 static void 153 destroy_rtentry_epoch(epoch_context_t ctx) 154 { 155 struct rtentry *rt; 156 157 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 158 159 destroy_rtentry(rt); 160 } 161 162 /* 163 * Schedule rtentry deletion 164 */ 165 static void 166 rtfree(struct rtentry *rt) 167 { 168 169 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 170 171 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 172 &rt->rt_epoch_ctx); 173 } 174 175 static struct rib_head * 176 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 177 { 178 struct rib_head *rnh; 179 struct sockaddr *dst; 180 181 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 182 183 dst = info->rti_info[RTAX_DST]; 184 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 185 186 return (rnh); 187 } 188 189 #ifdef ROUTE_MPATH 190 static bool 191 rib_can_multipath(struct rib_head *rh) 192 { 193 int result; 194 195 CURVNET_SET(rh->rib_vnet); 196 result = !!V_rib_route_multipath; 197 CURVNET_RESTORE(); 198 199 return (result); 200 } 201 202 /* 203 * Check is nhop is multipath-eligible. 204 * Avoid nhops without gateways and redirects. 205 * 206 * Returns 1 for multipath-eligible nexthop, 207 * 0 otherwise. 208 */ 209 bool 210 nhop_can_multipath(const struct nhop_object *nh) 211 { 212 213 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 214 return (1); 215 if ((nh->nh_flags & NHF_GATEWAY) == 0) 216 return (0); 217 if ((nh->nh_flags & NHF_REDIRECT) != 0) 218 return (0); 219 220 return (1); 221 } 222 #endif 223 224 static int 225 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 226 { 227 uint32_t weight; 228 229 if (info->rti_mflags & RTV_WEIGHT) 230 weight = info->rti_rmx->rmx_weight; 231 else 232 weight = default_weight; 233 /* Keep upper 1 byte for adm distance purposes */ 234 if (weight > RT_MAX_WEIGHT) 235 weight = RT_MAX_WEIGHT; 236 237 return (weight); 238 } 239 240 bool 241 rt_is_host(const struct rtentry *rt) 242 { 243 244 return (rt->rte_flags & RTF_HOST); 245 } 246 247 sa_family_t 248 rt_get_family(const struct rtentry *rt) 249 { 250 const struct sockaddr *dst; 251 252 dst = (const struct sockaddr *)rt_key_const(rt); 253 254 return (dst->sa_family); 255 } 256 257 /* 258 * Returns pointer to nexthop or nexthop group 259 * associated with @rt 260 */ 261 struct nhop_object * 262 rt_get_raw_nhop(const struct rtentry *rt) 263 { 264 265 return (rt->rt_nhop); 266 } 267 268 #ifdef INET 269 /* 270 * Stores IPv4 address and prefix length of @rt inside 271 * @paddr and @plen. 272 * @pscopeid is currently always set to 0. 273 */ 274 void 275 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 276 int *plen, uint32_t *pscopeid) 277 { 278 const struct sockaddr_in *dst; 279 280 dst = (const struct sockaddr_in *)rt_key_const(rt); 281 KASSERT((dst->sin_family == AF_INET), 282 ("rt family is %d, not inet", dst->sin_family)); 283 *paddr = dst->sin_addr; 284 dst = (const struct sockaddr_in *)rt_mask_const(rt); 285 if (dst == NULL) 286 *plen = 32; 287 else 288 *plen = bitcount32(dst->sin_addr.s_addr); 289 *pscopeid = 0; 290 } 291 292 /* 293 * Stores IPv4 address and prefix mask of @rt inside 294 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 295 * @pscopeid is currently always set to 0. 296 */ 297 void 298 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 299 struct in_addr *pmask, uint32_t *pscopeid) 300 { 301 const struct sockaddr_in *dst; 302 303 dst = (const struct sockaddr_in *)rt_key_const(rt); 304 KASSERT((dst->sin_family == AF_INET), 305 ("rt family is %d, not inet", dst->sin_family)); 306 *paddr = dst->sin_addr; 307 dst = (const struct sockaddr_in *)rt_mask_const(rt); 308 if (dst == NULL) 309 pmask->s_addr = INADDR_BROADCAST; 310 else 311 *pmask = dst->sin_addr; 312 *pscopeid = 0; 313 } 314 #endif 315 316 #ifdef INET6 317 static int 318 inet6_get_plen(const struct in6_addr *addr) 319 { 320 321 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 322 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 323 } 324 325 /* 326 * Stores IPv6 address and prefix length of @rt inside 327 * @paddr and @plen. Addresses are returned in de-embedded form. 328 * Scopeid is set to 0 for non-LL addresses. 329 */ 330 void 331 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 332 int *plen, uint32_t *pscopeid) 333 { 334 const struct sockaddr_in6 *dst; 335 336 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 337 KASSERT((dst->sin6_family == AF_INET6), 338 ("rt family is %d, not inet6", dst->sin6_family)); 339 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 340 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 341 else 342 *paddr = dst->sin6_addr; 343 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 344 if (dst == NULL) 345 *plen = 128; 346 else 347 *plen = inet6_get_plen(&dst->sin6_addr); 348 } 349 350 /* 351 * Stores IPv6 address and prefix mask of @rt inside 352 * @paddr and @pmask. Addresses are returned in de-embedded form. 353 * Scopeid is set to 0 for non-LL addresses. 354 */ 355 void 356 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 357 struct in6_addr *pmask, uint32_t *pscopeid) 358 { 359 const struct sockaddr_in6 *dst; 360 361 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 362 KASSERT((dst->sin6_family == AF_INET6), 363 ("rt family is %d, not inet", dst->sin6_family)); 364 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 365 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 366 else 367 *paddr = dst->sin6_addr; 368 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 369 if (dst == NULL) 370 memset(pmask, 0xFF, sizeof(struct in6_addr)); 371 else 372 *pmask = dst->sin6_addr; 373 } 374 #endif 375 376 static void 377 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info) 378 { 379 380 /* Kernel -> userland timebase conversion. */ 381 if (info->rti_mflags & RTV_EXPIRE) 382 rt->rt_expire = info->rti_rmx->rmx_expire ? 383 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 384 } 385 386 /* 387 * Check if specified @gw matches gw data in the nexthop @nh. 388 * 389 * Returns true if matches, false otherwise. 390 */ 391 bool 392 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 393 { 394 395 if (nh->gw_sa.sa_family != gw->sa_family) 396 return (false); 397 398 switch (gw->sa_family) { 399 case AF_INET: 400 return (nh->gw4_sa.sin_addr.s_addr == 401 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 402 case AF_INET6: 403 { 404 const struct sockaddr_in6 *gw6; 405 gw6 = (const struct sockaddr_in6 *)gw; 406 407 /* 408 * Currently (2020-09) IPv6 gws in kernel have their 409 * scope embedded. Once this becomes false, this code 410 * has to be revisited. 411 */ 412 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 413 &gw6->sin6_addr)) 414 return (true); 415 return (false); 416 } 417 case AF_LINK: 418 { 419 const struct sockaddr_dl *sdl; 420 sdl = (const struct sockaddr_dl *)gw; 421 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 422 } 423 default: 424 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 425 } 426 427 /* NOTREACHED */ 428 return (false); 429 } 430 431 /* 432 * Checks if data in @info matches nexhop @nh. 433 * 434 * Returns 0 on success, 435 * ESRCH if not matched, 436 * ENOENT if filter function returned false 437 */ 438 int 439 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 440 const struct nhop_object *nh) 441 { 442 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 443 444 if (info->rti_filter != NULL) { 445 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 446 return (ENOENT); 447 else 448 return (0); 449 } 450 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 451 return (ESRCH); 452 453 return (0); 454 } 455 456 /* 457 * Checks if nexhop @nh can be rewritten by data in @info because 458 * of higher "priority". Currently the only case for such scenario 459 * is kernel installing interface routes, marked by RTF_PINNED flag. 460 * 461 * Returns: 462 * 1 if @info data has higher priority 463 * 0 if priority is the same 464 * -1 if priority is lower 465 */ 466 int 467 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 468 { 469 470 if (info->rti_flags & RTF_PINNED) { 471 return (NH_IS_PINNED(nh)) ? 0 : 1; 472 } else { 473 return (NH_IS_PINNED(nh)) ? -1 : 0; 474 } 475 } 476 477 /* 478 * Runs exact prefix match based on @dst and @netmask. 479 * Returns matched @rtentry if found or NULL. 480 * If rtentry was found, saves nexthop / weight value into @rnd. 481 */ 482 static struct rtentry * 483 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 484 const struct sockaddr *netmask, struct route_nhop_data *rnd) 485 { 486 struct rtentry *rt; 487 488 RIB_LOCK_ASSERT(rnh); 489 490 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 491 __DECONST(void *, netmask), &rnh->head); 492 if (rt != NULL) { 493 rnd->rnd_nhop = rt->rt_nhop; 494 rnd->rnd_weight = rt->rt_weight; 495 } else { 496 rnd->rnd_nhop = NULL; 497 rnd->rnd_weight = 0; 498 } 499 500 return (rt); 501 } 502 503 /* 504 * Runs exact prefix match based on dst/netmask from @info. 505 * Assumes RIB lock is held. 506 * Returns matched @rtentry if found or NULL. 507 * If rtentry was found, saves nexthop / weight value into @rnd. 508 */ 509 struct rtentry * 510 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 511 struct route_nhop_data *rnd) 512 { 513 struct rtentry *rt; 514 515 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 516 info->rti_info[RTAX_NETMASK], rnd); 517 518 return (rt); 519 } 520 521 /* 522 * Adds route defined by @info into the kernel table specified by @fibnum and 523 * sa_family in @info->rti_info[RTAX_DST]. 524 * 525 * Returns 0 on success and fills in operation metadata into @rc. 526 */ 527 int 528 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 529 struct rib_cmd_info *rc) 530 { 531 struct rib_head *rnh; 532 int error; 533 534 NET_EPOCH_ASSERT(); 535 536 rnh = get_rnh(fibnum, info); 537 if (rnh == NULL) 538 return (EAFNOSUPPORT); 539 540 /* 541 * Check consistency between RTF_HOST flag and netmask 542 * existence. 543 */ 544 if (info->rti_flags & RTF_HOST) 545 info->rti_info[RTAX_NETMASK] = NULL; 546 else if (info->rti_info[RTAX_NETMASK] == NULL) 547 return (EINVAL); 548 549 bzero(rc, sizeof(struct rib_cmd_info)); 550 rc->rc_cmd = RTM_ADD; 551 552 error = add_route(rnh, info, rc); 553 if (error == 0) 554 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 555 556 return (error); 557 } 558 559 /* 560 * Creates rtentry and nexthop based on @info data. 561 * Return 0 and fills in rtentry into @prt on success, 562 * return errno otherwise. 563 */ 564 static int 565 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 566 struct rtentry **prt) 567 { 568 struct sockaddr *dst, *ndst, *gateway, *netmask; 569 struct rtentry *rt; 570 struct nhop_object *nh; 571 struct ifaddr *ifa; 572 int error, flags; 573 574 dst = info->rti_info[RTAX_DST]; 575 gateway = info->rti_info[RTAX_GATEWAY]; 576 netmask = info->rti_info[RTAX_NETMASK]; 577 flags = info->rti_flags; 578 579 if ((flags & RTF_GATEWAY) && !gateway) 580 return (EINVAL); 581 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 582 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 583 return (EINVAL); 584 585 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 586 return (EINVAL); 587 588 if (info->rti_ifa == NULL) { 589 error = rt_getifa_fib(info, rnh->rib_fibnum); 590 if (error) 591 return (error); 592 } else { 593 ifa_ref(info->rti_ifa); 594 } 595 596 error = nhop_create_from_info(rnh, info, &nh); 597 ifa_free(info->rti_ifa); 598 if (error != 0) 599 return (error); 600 601 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 602 if (rt == NULL) { 603 nhop_free(nh); 604 return (ENOBUFS); 605 } 606 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 607 rt->rt_nhop = nh; 608 609 /* Fill in dst */ 610 memcpy(&rt->rt_dst, dst, dst->sa_len); 611 rt_key(rt) = &rt->rt_dst; 612 613 /* 614 * point to the (possibly newly malloc'd) dest address. 615 */ 616 ndst = (struct sockaddr *)rt_key(rt); 617 618 /* 619 * make sure it contains the value we want (masked if needed). 620 */ 621 if (netmask) { 622 rt_maskedcopy(dst, ndst, netmask); 623 } else 624 bcopy(dst, ndst, dst->sa_len); 625 626 /* 627 * We use the ifa reference returned by rt_getifa_fib(). 628 * This moved from below so that rnh->rnh_addaddr() can 629 * examine the ifa and ifa->ifa_ifp if it so desires. 630 */ 631 ifa = info->rti_ifa; 632 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 633 rt_set_expire_info(rt, info); 634 635 *prt = rt; 636 return (0); 637 } 638 639 static int 640 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 641 struct rib_cmd_info *rc) 642 { 643 struct nhop_object *nh_orig; 644 struct route_nhop_data rnd_orig, rnd_add; 645 struct nhop_object *nh; 646 struct rtentry *rt, *rt_orig; 647 int error; 648 649 error = create_rtentry(rnh, info, &rt); 650 if (error != 0) 651 return (error); 652 653 rnd_add.rnd_nhop = rt->rt_nhop; 654 rnd_add.rnd_weight = rt->rt_weight; 655 nh = rt->rt_nhop; 656 657 RIB_WLOCK(rnh); 658 error = add_route_nhop(rnh, rt, info, &rnd_add, rc); 659 if (error == 0) { 660 RIB_WUNLOCK(rnh); 661 return (0); 662 } 663 664 /* addition failed. Lookup prefix in the rib to determine the cause */ 665 rt_orig = lookup_prefix(rnh, info, &rnd_orig); 666 if (rt_orig == NULL) { 667 /* No prefix -> rnh_addaddr() failed to allocate memory */ 668 RIB_WUNLOCK(rnh); 669 nhop_free(nh); 670 uma_zfree(V_rtzone, rt); 671 return (ENOMEM); 672 } 673 674 /* We have existing route in the RIB. */ 675 nh_orig = rnd_orig.rnd_nhop; 676 /* Check if new route has higher preference */ 677 if (can_override_nhop(info, nh_orig) > 0) { 678 /* Update nexthop to the new route */ 679 change_route_nhop(rnh, rt_orig, info, &rnd_add, rc); 680 RIB_WUNLOCK(rnh); 681 uma_zfree(V_rtzone, rt); 682 nhop_free(nh_orig); 683 return (0); 684 } 685 686 RIB_WUNLOCK(rnh); 687 688 #ifdef ROUTE_MPATH 689 if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) && 690 nhop_can_multipath(rnd_orig.rnd_nhop)) 691 error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc); 692 else 693 #endif 694 /* Unable to add - another route with the same preference exists */ 695 error = EEXIST; 696 697 /* 698 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt. 699 * ROUTE_MPATH enabled: original nhop reference is unused in any case, 700 * free rt only if not _adding_ new route to rib (e.g. the case 701 * when initial lookup returned existing route, but then it got 702 * deleted prior to multipath group insertion, leading to a simple 703 * non-multipath add as a result). 704 */ 705 nhop_free(nh); 706 if ((error != 0) || rc->rc_cmd != RTM_ADD) 707 uma_zfree(V_rtzone, rt); 708 709 return (error); 710 } 711 712 /* 713 * Removes route defined by @info from the kernel table specified by @fibnum and 714 * sa_family in @info->rti_info[RTAX_DST]. 715 * 716 * Returns 0 on success and fills in operation metadata into @rc. 717 */ 718 int 719 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 720 { 721 struct rib_head *rnh; 722 struct sockaddr *dst_orig, *netmask; 723 struct sockaddr_storage mdst; 724 int error; 725 726 NET_EPOCH_ASSERT(); 727 728 rnh = get_rnh(fibnum, info); 729 if (rnh == NULL) 730 return (EAFNOSUPPORT); 731 732 bzero(rc, sizeof(struct rib_cmd_info)); 733 rc->rc_cmd = RTM_DELETE; 734 735 dst_orig = info->rti_info[RTAX_DST]; 736 netmask = info->rti_info[RTAX_NETMASK]; 737 738 if (netmask != NULL) { 739 /* Ensure @dst is always properly masked */ 740 if (dst_orig->sa_len > sizeof(mdst)) 741 return (EINVAL); 742 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 743 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 744 } 745 error = del_route(rnh, info, rc); 746 info->rti_info[RTAX_DST] = dst_orig; 747 748 return (error); 749 } 750 751 /* 752 * Conditionally unlinks rtentry matching data inside @info from @rnh. 753 * Returns 0 on success with operation result stored in @rc. 754 * On error, returns: 755 * ESRCH - if prefix was not found, 756 * EADDRINUSE - if trying to delete higher priority route. 757 * ENOENT - if supplied filter function returned 0 (not matched). 758 */ 759 static int 760 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 761 { 762 struct rtentry *rt; 763 struct nhop_object *nh; 764 struct radix_node *rn; 765 struct route_nhop_data rnd; 766 int error; 767 768 rt = lookup_prefix(rnh, info, &rnd); 769 if (rt == NULL) 770 return (ESRCH); 771 772 nh = rt->rt_nhop; 773 #ifdef ROUTE_MPATH 774 if (NH_IS_NHGRP(nh)) { 775 error = del_route_mpath(rnh, info, rt, 776 (struct nhgrp_object *)nh, rc); 777 return (error); 778 } 779 #endif 780 error = check_info_match_nhop(info, rt, nh); 781 if (error != 0) 782 return (error); 783 784 if (can_override_nhop(info, nh) < 0) 785 return (EADDRINUSE); 786 787 /* 788 * Remove the item from the tree and return it. 789 * Complain if it is not there and do no more processing. 790 */ 791 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 792 info->rti_info[RTAX_NETMASK], &rnh->head); 793 if (rn == NULL) 794 return (ESRCH); 795 796 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 797 panic ("rtrequest delete"); 798 799 rt = RNTORT(rn); 800 rt->rte_flags &= ~RTF_UP; 801 802 /* Finalize notification */ 803 rnh->rnh_gen++; 804 rnh->rnh_prefixes--; 805 806 rc->rc_cmd = RTM_DELETE; 807 rc->rc_rt = rt; 808 rc->rc_nh_old = rt->rt_nhop; 809 rc->rc_nh_weight = rt->rt_weight; 810 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 811 812 return (0); 813 } 814 815 static int 816 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 817 struct rib_cmd_info *rc) 818 { 819 int error; 820 821 RIB_WLOCK(rnh); 822 error = rt_unlinkrte(rnh, info, rc); 823 RIB_WUNLOCK(rnh); 824 if (error != 0) 825 return (error); 826 827 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 828 829 /* 830 * If the caller wants it, then it can have it, 831 * the entry will be deleted after the end of the current epoch. 832 */ 833 if (rc->rc_cmd == RTM_DELETE) 834 rtfree(rc->rc_rt); 835 #ifdef ROUTE_MPATH 836 else { 837 /* 838 * Deleting 1 path may result in RTM_CHANGE to 839 * a different mpath group/nhop. 840 * Free old mpath group. 841 */ 842 nhop_free_any(rc->rc_nh_old); 843 } 844 #endif 845 846 return (0); 847 } 848 849 int 850 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 851 struct rib_cmd_info *rc) 852 { 853 RIB_RLOCK_TRACKER; 854 struct route_nhop_data rnd_orig; 855 struct rib_head *rnh; 856 struct rtentry *rt; 857 int error; 858 859 NET_EPOCH_ASSERT(); 860 861 rnh = get_rnh(fibnum, info); 862 if (rnh == NULL) 863 return (EAFNOSUPPORT); 864 865 bzero(rc, sizeof(struct rib_cmd_info)); 866 rc->rc_cmd = RTM_CHANGE; 867 868 /* Check if updated gateway exists */ 869 if ((info->rti_flags & RTF_GATEWAY) && 870 (info->rti_info[RTAX_GATEWAY] == NULL)) { 871 872 /* 873 * route(8) adds RTF_GATEWAY flag if -interface is not set. 874 * Remove RTF_GATEWAY to enforce consistency and maintain 875 * compatibility.. 876 */ 877 info->rti_flags &= ~RTF_GATEWAY; 878 } 879 880 /* 881 * route change is done in multiple steps, with dropping and 882 * reacquiring lock. In the situations with multiple processes 883 * changes the same route in can lead to the case when route 884 * is changed between the steps. Address it by retrying the operation 885 * multiple times before failing. 886 */ 887 888 RIB_RLOCK(rnh); 889 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 890 info->rti_info[RTAX_NETMASK], &rnh->head); 891 892 if (rt == NULL) { 893 RIB_RUNLOCK(rnh); 894 return (ESRCH); 895 } 896 897 rnd_orig.rnd_nhop = rt->rt_nhop; 898 rnd_orig.rnd_weight = rt->rt_weight; 899 900 RIB_RUNLOCK(rnh); 901 902 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 903 error = change_route(rnh, info, &rnd_orig, rc); 904 if (error != EAGAIN) 905 break; 906 } 907 908 return (error); 909 } 910 911 static int 912 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 913 struct nhop_object *nh_orig, struct nhop_object **nh_new) 914 { 915 int free_ifa = 0; 916 int error; 917 918 /* 919 * New gateway could require new ifaddr, ifp; 920 * flags may also be different; ifp may be specified 921 * by ll sockaddr when protocol address is ambiguous 922 */ 923 if (((nh_orig->nh_flags & NHF_GATEWAY) && 924 info->rti_info[RTAX_GATEWAY] != NULL) || 925 info->rti_info[RTAX_IFP] != NULL || 926 (info->rti_info[RTAX_IFA] != NULL && 927 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 928 error = rt_getifa_fib(info, rnh->rib_fibnum); 929 if (info->rti_ifa != NULL) 930 free_ifa = 1; 931 932 if (error != 0) { 933 if (free_ifa) { 934 ifa_free(info->rti_ifa); 935 info->rti_ifa = NULL; 936 } 937 938 return (error); 939 } 940 } 941 942 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 943 if (free_ifa) { 944 ifa_free(info->rti_ifa); 945 info->rti_ifa = NULL; 946 } 947 948 return (error); 949 } 950 951 #ifdef ROUTE_MPATH 952 static int 953 change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info, 954 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 955 { 956 int error = 0; 957 struct nhop_object *nh, *nh_orig, *nh_new; 958 struct route_nhop_data rnd_new; 959 960 nh = NULL; 961 nh_orig = rnd_orig->rnd_nhop; 962 963 struct weightened_nhop *wn = NULL, *wn_new; 964 uint32_t num_nhops; 965 966 wn = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, &num_nhops); 967 nh_orig = NULL; 968 for (int i = 0; i < num_nhops; i++) { 969 if (check_info_match_nhop(info, NULL, wn[i].nh)) { 970 nh_orig = wn[i].nh; 971 break; 972 } 973 } 974 975 if (nh_orig == NULL) 976 return (ESRCH); 977 978 error = change_nhop(rnh, info, nh_orig, &nh_new); 979 if (error != 0) 980 return (error); 981 982 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 983 M_TEMP, M_NOWAIT | M_ZERO); 984 if (wn_new == NULL) { 985 nhop_free(nh_new); 986 return (EAGAIN); 987 } 988 989 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 990 for (int i = 0; i < num_nhops; i++) { 991 if (wn[i].nh == nh_orig) { 992 wn[i].nh = nh_new; 993 wn[i].weight = get_info_weight(info, rnd_orig->rnd_weight); 994 break; 995 } 996 } 997 998 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new); 999 nhop_free(nh_new); 1000 free(wn_new, M_TEMP); 1001 1002 if (error != 0) 1003 return (error); 1004 1005 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1006 1007 return (error); 1008 } 1009 #endif 1010 1011 static int 1012 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1013 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 1014 { 1015 int error = 0; 1016 struct nhop_object *nh, *nh_orig; 1017 struct route_nhop_data rnd_new; 1018 1019 nh = NULL; 1020 nh_orig = rnd_orig->rnd_nhop; 1021 if (nh_orig == NULL) 1022 return (ESRCH); 1023 1024 #ifdef ROUTE_MPATH 1025 if (NH_IS_NHGRP(nh_orig)) 1026 return (change_mpath_route(rnh, info, rnd_orig, rc)); 1027 #endif 1028 1029 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1030 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1031 if (error != 0) 1032 return (error); 1033 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1034 1035 return (error); 1036 } 1037 1038 /* 1039 * Insert @rt with nhop data from @rnd_new to @rnh. 1040 * Returns 0 on success and stores operation results in @rc. 1041 */ 1042 static int 1043 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1044 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1045 struct rib_cmd_info *rc) 1046 { 1047 struct sockaddr *ndst, *netmask; 1048 struct radix_node *rn; 1049 int error = 0; 1050 1051 RIB_WLOCK_ASSERT(rnh); 1052 1053 ndst = (struct sockaddr *)rt_key(rt); 1054 netmask = info->rti_info[RTAX_NETMASK]; 1055 1056 rt->rt_nhop = rnd->rnd_nhop; 1057 rt->rt_weight = rnd->rnd_weight; 1058 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1059 1060 if (rn != NULL) { 1061 if (rt->rt_expire > 0) 1062 tmproutes_update(rnh, rt); 1063 1064 /* Finalize notification */ 1065 rnh->rnh_gen++; 1066 rnh->rnh_prefixes++; 1067 1068 rc->rc_cmd = RTM_ADD; 1069 rc->rc_rt = rt; 1070 rc->rc_nh_old = NULL; 1071 rc->rc_nh_new = rnd->rnd_nhop; 1072 rc->rc_nh_weight = rnd->rnd_weight; 1073 1074 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1075 } else { 1076 /* Existing route or memory allocation failure */ 1077 error = EEXIST; 1078 } 1079 1080 return (error); 1081 } 1082 1083 /* 1084 * Switch @rt nhop/weigh to the ones specified in @rnd. 1085 * Conditionally set rt_expire if set in @info. 1086 * Returns 0 on success. 1087 */ 1088 int 1089 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1090 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1091 struct rib_cmd_info *rc) 1092 { 1093 struct nhop_object *nh_orig; 1094 1095 RIB_WLOCK_ASSERT(rnh); 1096 1097 nh_orig = rt->rt_nhop; 1098 1099 if (rnd->rnd_nhop != NULL) { 1100 /* Changing expiration & nexthop & weight to a new one */ 1101 rt_set_expire_info(rt, info); 1102 rt->rt_nhop = rnd->rnd_nhop; 1103 rt->rt_weight = rnd->rnd_weight; 1104 if (rt->rt_expire > 0) 1105 tmproutes_update(rnh, rt); 1106 } else { 1107 /* Route deletion requested. */ 1108 struct sockaddr *ndst, *netmask; 1109 struct radix_node *rn; 1110 1111 ndst = (struct sockaddr *)rt_key(rt); 1112 netmask = info->rti_info[RTAX_NETMASK]; 1113 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 1114 if (rn == NULL) 1115 return (ESRCH); 1116 rt = RNTORT(rn); 1117 rt->rte_flags &= ~RTF_UP; 1118 } 1119 1120 /* Finalize notification */ 1121 rnh->rnh_gen++; 1122 if (rnd->rnd_nhop == NULL) 1123 rnh->rnh_prefixes--; 1124 1125 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 1126 rc->rc_rt = rt; 1127 rc->rc_nh_old = nh_orig; 1128 rc->rc_nh_new = rnd->rnd_nhop; 1129 rc->rc_nh_weight = rnd->rnd_weight; 1130 1131 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1132 1133 return (0); 1134 } 1135 1136 /* 1137 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1138 * consistent with the current route data. 1139 * Nexthop in @nhd_new is consumed. 1140 */ 1141 int 1142 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1143 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1144 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 1145 { 1146 struct rtentry *rt_new; 1147 int error = 0; 1148 1149 RIB_WLOCK(rnh); 1150 1151 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1152 info->rti_info[RTAX_NETMASK], &rnh->head); 1153 1154 if (rt_new == NULL) { 1155 if (rnd_orig->rnd_nhop == NULL) 1156 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 1157 else { 1158 /* 1159 * Prefix does not exist, which was not our assumption. 1160 * Update @rnd_orig with the new data and return 1161 */ 1162 rnd_orig->rnd_nhop = NULL; 1163 rnd_orig->rnd_weight = 0; 1164 error = EAGAIN; 1165 } 1166 } else { 1167 /* Prefix exists, try to update */ 1168 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1169 /* 1170 * Nhop/mpath group hasn't changed. Flip 1171 * to the new precalculated one and return 1172 */ 1173 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 1174 } else { 1175 /* Update and retry */ 1176 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1177 rnd_orig->rnd_weight = rt_new->rt_weight; 1178 error = EAGAIN; 1179 } 1180 } 1181 1182 RIB_WUNLOCK(rnh); 1183 1184 if (error == 0) { 1185 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1186 1187 if (rnd_orig->rnd_nhop != NULL) 1188 nhop_free_any(rnd_orig->rnd_nhop); 1189 1190 } else { 1191 if (rnd_new->rnd_nhop != NULL) 1192 nhop_free_any(rnd_new->rnd_nhop); 1193 } 1194 1195 return (error); 1196 } 1197 1198 /* 1199 * Performs modification of routing table specificed by @action. 1200 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1201 * Needs to be run in network epoch. 1202 * 1203 * Returns 0 on success and fills in @rc with action result. 1204 */ 1205 int 1206 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1207 struct rib_cmd_info *rc) 1208 { 1209 int error; 1210 1211 switch (action) { 1212 case RTM_ADD: 1213 error = rib_add_route(fibnum, info, rc); 1214 break; 1215 case RTM_DELETE: 1216 error = rib_del_route(fibnum, info, rc); 1217 break; 1218 case RTM_CHANGE: 1219 error = rib_change_route(fibnum, info, rc); 1220 break; 1221 default: 1222 error = ENOTSUP; 1223 } 1224 1225 return (error); 1226 } 1227 1228 struct rt_delinfo 1229 { 1230 struct rt_addrinfo info; 1231 struct rib_head *rnh; 1232 struct rtentry *head; 1233 struct rib_cmd_info rc; 1234 }; 1235 1236 /* 1237 * Conditionally unlinks @rn from radix tree based 1238 * on info data passed in @arg. 1239 */ 1240 static int 1241 rt_checkdelroute(struct radix_node *rn, void *arg) 1242 { 1243 struct rt_delinfo *di; 1244 struct rt_addrinfo *info; 1245 struct rtentry *rt; 1246 int error; 1247 1248 di = (struct rt_delinfo *)arg; 1249 rt = (struct rtentry *)rn; 1250 info = &di->info; 1251 1252 info->rti_info[RTAX_DST] = rt_key(rt); 1253 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 1254 1255 error = rt_unlinkrte(di->rnh, info, &di->rc); 1256 1257 /* 1258 * Add deleted rtentries to the list to GC them 1259 * after dropping the lock. 1260 * 1261 * XXX: Delayed notifications not implemented 1262 * for nexthop updates. 1263 */ 1264 if ((error == 0) && (di->rc.rc_cmd == RTM_DELETE)) { 1265 /* Add to the list and return */ 1266 rt->rt_chain = di->head; 1267 di->head = rt; 1268 } 1269 1270 return (0); 1271 } 1272 1273 /* 1274 * Iterates over a routing table specified by @fibnum and @family and 1275 * deletes elements marked by @filter_f. 1276 * @fibnum: rtable id 1277 * @family: AF_ address family 1278 * @filter_f: function returning non-zero value for items to delete 1279 * @arg: data to pass to the @filter_f function 1280 * @report: true if rtsock notification is needed. 1281 */ 1282 void 1283 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report) 1284 { 1285 struct rib_head *rnh; 1286 struct rt_delinfo di; 1287 struct rtentry *rt; 1288 struct nhop_object *nh; 1289 struct epoch_tracker et; 1290 1291 rnh = rt_tables_get_rnh(fibnum, family); 1292 if (rnh == NULL) 1293 return; 1294 1295 bzero(&di, sizeof(di)); 1296 di.info.rti_filter = filter_f; 1297 di.info.rti_filterdata = arg; 1298 di.rnh = rnh; 1299 di.rc.rc_cmd = RTM_DELETE; 1300 1301 NET_EPOCH_ENTER(et); 1302 1303 RIB_WLOCK(rnh); 1304 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1305 RIB_WUNLOCK(rnh); 1306 1307 /* We might have something to reclaim. */ 1308 bzero(&di.rc, sizeof(di.rc)); 1309 di.rc.rc_cmd = RTM_DELETE; 1310 while (di.head != NULL) { 1311 rt = di.head; 1312 di.head = rt->rt_chain; 1313 rt->rt_chain = NULL; 1314 nh = rt->rt_nhop; 1315 1316 di.rc.rc_rt = rt; 1317 di.rc.rc_nh_old = nh; 1318 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1319 1320 /* TODO std rt -> rt_addrinfo export */ 1321 di.info.rti_info[RTAX_DST] = rt_key(rt); 1322 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1323 1324 if (report) { 1325 #ifdef ROUTE_MPATH 1326 struct nhgrp_object *nhg; 1327 struct weightened_nhop *wn; 1328 uint32_t num_nhops; 1329 if (NH_IS_NHGRP(nh)) { 1330 nhg = (struct nhgrp_object *)nh; 1331 wn = nhgrp_get_nhops(nhg, &num_nhops); 1332 for (int i = 0; i < num_nhops; i++) 1333 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1334 } else 1335 #endif 1336 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1337 } 1338 rtfree(rt); 1339 } 1340 1341 NET_EPOCH_EXIT(et); 1342 } 1343 1344 static void 1345 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1346 struct rib_cmd_info *rc) 1347 { 1348 struct rib_subscription *rs; 1349 1350 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1351 if (rs->type == type) 1352 rs->func(rnh, rc, rs->arg); 1353 } 1354 } 1355 1356 static struct rib_subscription * 1357 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1358 enum rib_subscription_type type, bool waitok) 1359 { 1360 struct rib_subscription *rs; 1361 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT); 1362 1363 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1364 if (rs == NULL) 1365 return (NULL); 1366 1367 rs->func = f; 1368 rs->arg = arg; 1369 rs->type = type; 1370 1371 return (rs); 1372 } 1373 1374 /* 1375 * Subscribe for the changes in the routing table specified by @fibnum and 1376 * @family. 1377 * 1378 * Returns pointer to the subscription structure on success. 1379 */ 1380 struct rib_subscription * 1381 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1382 enum rib_subscription_type type, bool waitok) 1383 { 1384 struct rib_head *rnh; 1385 struct epoch_tracker et; 1386 1387 NET_EPOCH_ENTER(et); 1388 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1389 rnh = rt_tables_get_rnh(fibnum, family); 1390 NET_EPOCH_EXIT(et); 1391 1392 return (rib_subscribe_internal(rnh, f, arg, type, waitok)); 1393 } 1394 1395 struct rib_subscription * 1396 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1397 enum rib_subscription_type type, bool waitok) 1398 { 1399 struct rib_subscription *rs; 1400 struct epoch_tracker et; 1401 1402 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1403 return (NULL); 1404 rs->rnh = rnh; 1405 1406 NET_EPOCH_ENTER(et); 1407 RIB_WLOCK(rnh); 1408 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1409 RIB_WUNLOCK(rnh); 1410 NET_EPOCH_EXIT(et); 1411 1412 return (rs); 1413 } 1414 1415 struct rib_subscription * 1416 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1417 enum rib_subscription_type type) 1418 { 1419 struct rib_subscription *rs; 1420 1421 NET_EPOCH_ASSERT(); 1422 RIB_WLOCK_ASSERT(rnh); 1423 1424 if ((rs = allocate_subscription(f, arg, type, false)) == NULL) 1425 return (NULL); 1426 rs->rnh = rnh; 1427 1428 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1429 1430 return (rs); 1431 } 1432 1433 /* 1434 * Remove rtable subscription @rs from the routing table. 1435 * Needs to be run in network epoch. 1436 */ 1437 void 1438 rib_unsibscribe(struct rib_subscription *rs) 1439 { 1440 struct rib_head *rnh = rs->rnh; 1441 1442 NET_EPOCH_ASSERT(); 1443 1444 RIB_WLOCK(rnh); 1445 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1446 RIB_WUNLOCK(rnh); 1447 1448 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1449 &rs->epoch_ctx); 1450 } 1451 1452 void 1453 rib_unsibscribe_locked(struct rib_subscription *rs) 1454 { 1455 struct rib_head *rnh = rs->rnh; 1456 1457 NET_EPOCH_ASSERT(); 1458 RIB_WLOCK_ASSERT(rnh); 1459 1460 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1461 1462 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1463 &rs->epoch_ctx); 1464 } 1465 1466 /* 1467 * Epoch callback indicating subscription is safe to destroy 1468 */ 1469 static void 1470 destroy_subscription_epoch(epoch_context_t ctx) 1471 { 1472 struct rib_subscription *rs; 1473 1474 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1475 1476 free(rs, M_RTABLE); 1477 } 1478 1479 void 1480 rib_init_subscriptions(struct rib_head *rnh) 1481 { 1482 1483 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1484 } 1485 1486 void 1487 rib_destroy_subscriptions(struct rib_head *rnh) 1488 { 1489 struct rib_subscription *rs; 1490 struct epoch_tracker et; 1491 1492 NET_EPOCH_ENTER(et); 1493 RIB_WLOCK(rnh); 1494 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1495 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1496 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1497 &rs->epoch_ctx); 1498 } 1499 RIB_WUNLOCK(rnh); 1500 NET_EPOCH_EXIT(et); 1501 } 1502