1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 58 #include <vm/uma.h> 59 60 /* 61 * This file contains control plane routing tables functions. 62 * 63 * All functions assumes they are called in net epoch. 64 */ 65 66 struct rib_subscription { 67 CK_STAILQ_ENTRY(rib_subscription) next; 68 rib_subscription_cb_t *func; 69 void *arg; 70 struct rib_head *rnh; 71 enum rib_subscription_type type; 72 struct epoch_context epoch_ctx; 73 }; 74 75 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 76 struct rib_cmd_info *rc); 77 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 78 struct rt_addrinfo *info, struct route_nhop_data *rnd, 79 struct rib_cmd_info *rc); 80 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 81 struct rib_cmd_info *rc); 82 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 84 85 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 86 struct rib_cmd_info *rc); 87 88 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 89 struct rib_cmd_info *rc); 90 91 static void destroy_subscription_epoch(epoch_context_t ctx); 92 #ifdef ROUTE_MPATH 93 static bool rib_can_multipath(struct rib_head *rh); 94 #endif 95 96 /* Per-vnet multipath routing configuration */ 97 SYSCTL_DECL(_net_route); 98 #define V_rib_route_multipath VNET(rib_route_multipath) 99 #ifdef ROUTE_MPATH 100 #define _MP_FLAGS CTLFLAG_RW 101 #else 102 #define _MP_FLAGS CTLFLAG_RD 103 #endif 104 VNET_DEFINE(u_int, rib_route_multipath) = 1; 105 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 106 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 107 #undef _MP_FLAGS 108 109 /* Routing table UMA zone */ 110 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 111 #define V_rtzone VNET(rtzone) 112 113 void 114 vnet_rtzone_init() 115 { 116 117 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 118 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 119 } 120 121 #ifdef VIMAGE 122 void 123 vnet_rtzone_destroy() 124 { 125 126 uma_zdestroy(V_rtzone); 127 } 128 #endif 129 130 static void 131 destroy_rtentry(struct rtentry *rt) 132 { 133 134 /* 135 * At this moment rnh, nh_control may be already freed. 136 * nhop interface may have been migrated to a different vnet. 137 * Use vnet stored in the nexthop to delete the entry. 138 */ 139 CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); 140 141 /* Unreference nexthop */ 142 nhop_free_any(rt->rt_nhop); 143 144 uma_zfree(V_rtzone, rt); 145 146 CURVNET_RESTORE(); 147 } 148 149 /* 150 * Epoch callback indicating rtentry is safe to destroy 151 */ 152 static void 153 destroy_rtentry_epoch(epoch_context_t ctx) 154 { 155 struct rtentry *rt; 156 157 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 158 159 destroy_rtentry(rt); 160 } 161 162 /* 163 * Schedule rtentry deletion 164 */ 165 static void 166 rtfree(struct rtentry *rt) 167 { 168 169 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 170 171 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 172 &rt->rt_epoch_ctx); 173 } 174 175 static struct rib_head * 176 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 177 { 178 struct rib_head *rnh; 179 struct sockaddr *dst; 180 181 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 182 183 dst = info->rti_info[RTAX_DST]; 184 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 185 186 return (rnh); 187 } 188 189 #ifdef ROUTE_MPATH 190 static bool 191 rib_can_multipath(struct rib_head *rh) 192 { 193 int result; 194 195 CURVNET_SET(rh->rib_vnet); 196 result = !!V_rib_route_multipath; 197 CURVNET_RESTORE(); 198 199 return (result); 200 } 201 202 /* 203 * Check is nhop is multipath-eligible. 204 * Avoid nhops without gateways and redirects. 205 * 206 * Returns 1 for multipath-eligible nexthop, 207 * 0 otherwise. 208 */ 209 bool 210 nhop_can_multipath(const struct nhop_object *nh) 211 { 212 213 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 214 return (1); 215 if ((nh->nh_flags & NHF_GATEWAY) == 0) 216 return (0); 217 if ((nh->nh_flags & NHF_REDIRECT) != 0) 218 return (0); 219 220 return (1); 221 } 222 #endif 223 224 static int 225 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 226 { 227 uint32_t weight; 228 229 if (info->rti_mflags & RTV_WEIGHT) 230 weight = info->rti_rmx->rmx_weight; 231 else 232 weight = default_weight; 233 /* Keep upper 1 byte for adm distance purposes */ 234 if (weight > RT_MAX_WEIGHT) 235 weight = RT_MAX_WEIGHT; 236 237 return (weight); 238 } 239 240 bool 241 rt_is_host(const struct rtentry *rt) 242 { 243 244 return (rt->rte_flags & RTF_HOST); 245 } 246 247 sa_family_t 248 rt_get_family(const struct rtentry *rt) 249 { 250 const struct sockaddr *dst; 251 252 dst = (const struct sockaddr *)rt_key_const(rt); 253 254 return (dst->sa_family); 255 } 256 257 /* 258 * Returns pointer to nexthop or nexthop group 259 * associated with @rt 260 */ 261 struct nhop_object * 262 rt_get_raw_nhop(const struct rtentry *rt) 263 { 264 265 return (rt->rt_nhop); 266 } 267 268 #ifdef INET 269 /* 270 * Stores IPv4 address and prefix length of @rt inside 271 * @paddr and @plen. 272 * @pscopeid is currently always set to 0. 273 */ 274 void 275 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 276 int *plen, uint32_t *pscopeid) 277 { 278 const struct sockaddr_in *dst; 279 280 dst = (const struct sockaddr_in *)rt_key_const(rt); 281 KASSERT((dst->sin_family == AF_INET), 282 ("rt family is %d, not inet", dst->sin_family)); 283 *paddr = dst->sin_addr; 284 dst = (const struct sockaddr_in *)rt_mask_const(rt); 285 if (dst == NULL) 286 *plen = 32; 287 else 288 *plen = bitcount32(dst->sin_addr.s_addr); 289 *pscopeid = 0; 290 } 291 292 /* 293 * Stores IPv4 address and prefix mask of @rt inside 294 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 295 * @pscopeid is currently always set to 0. 296 */ 297 void 298 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 299 struct in_addr *pmask, uint32_t *pscopeid) 300 { 301 const struct sockaddr_in *dst; 302 303 dst = (const struct sockaddr_in *)rt_key_const(rt); 304 KASSERT((dst->sin_family == AF_INET), 305 ("rt family is %d, not inet", dst->sin_family)); 306 *paddr = dst->sin_addr; 307 dst = (const struct sockaddr_in *)rt_mask_const(rt); 308 if (dst == NULL) 309 pmask->s_addr = INADDR_BROADCAST; 310 else 311 *pmask = dst->sin_addr; 312 *pscopeid = 0; 313 } 314 #endif 315 316 #ifdef INET6 317 static int 318 inet6_get_plen(const struct in6_addr *addr) 319 { 320 321 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 322 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 323 } 324 325 /* 326 * Stores IPv6 address and prefix length of @rt inside 327 * @paddr and @plen. Addresses are returned in de-embedded form. 328 * Scopeid is set to 0 for non-LL addresses. 329 */ 330 void 331 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 332 int *plen, uint32_t *pscopeid) 333 { 334 const struct sockaddr_in6 *dst; 335 336 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 337 KASSERT((dst->sin6_family == AF_INET6), 338 ("rt family is %d, not inet6", dst->sin6_family)); 339 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 340 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 341 else 342 *paddr = dst->sin6_addr; 343 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 344 if (dst == NULL) 345 *plen = 128; 346 else 347 *plen = inet6_get_plen(&dst->sin6_addr); 348 } 349 350 /* 351 * Stores IPv6 address and prefix mask of @rt inside 352 * @paddr and @pmask. Addresses are returned in de-embedded form. 353 * Scopeid is set to 0 for non-LL addresses. 354 */ 355 void 356 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 357 struct in6_addr *pmask, uint32_t *pscopeid) 358 { 359 const struct sockaddr_in6 *dst; 360 361 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 362 KASSERT((dst->sin6_family == AF_INET6), 363 ("rt family is %d, not inet", dst->sin6_family)); 364 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 365 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 366 else 367 *paddr = dst->sin6_addr; 368 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 369 if (dst == NULL) 370 memset(pmask, 0xFF, sizeof(struct in6_addr)); 371 else 372 *pmask = dst->sin6_addr; 373 } 374 #endif 375 376 static void 377 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info) 378 { 379 380 /* Kernel -> userland timebase conversion. */ 381 if (info->rti_mflags & RTV_EXPIRE) 382 rt->rt_expire = info->rti_rmx->rmx_expire ? 383 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 384 } 385 386 /* 387 * Check if specified @gw matches gw data in the nexthop @nh. 388 * 389 * Returns true if matches, false otherwise. 390 */ 391 bool 392 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 393 { 394 395 if (nh->gw_sa.sa_family != gw->sa_family) 396 return (false); 397 398 switch (gw->sa_family) { 399 case AF_INET: 400 return (nh->gw4_sa.sin_addr.s_addr == 401 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 402 case AF_INET6: 403 { 404 const struct sockaddr_in6 *gw6; 405 gw6 = (const struct sockaddr_in6 *)gw; 406 407 /* 408 * Currently (2020-09) IPv6 gws in kernel have their 409 * scope embedded. Once this becomes false, this code 410 * has to be revisited. 411 */ 412 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 413 &gw6->sin6_addr)) 414 return (true); 415 return (false); 416 } 417 case AF_LINK: 418 { 419 const struct sockaddr_dl *sdl; 420 sdl = (const struct sockaddr_dl *)gw; 421 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 422 } 423 default: 424 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 425 } 426 427 /* NOTREACHED */ 428 return (false); 429 } 430 431 /* 432 * Checks if data in @info matches nexhop @nh. 433 * 434 * Returns 0 on success, 435 * ESRCH if not matched, 436 * ENOENT if filter function returned false 437 */ 438 int 439 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 440 const struct nhop_object *nh) 441 { 442 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 443 444 if (info->rti_filter != NULL) { 445 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 446 return (ENOENT); 447 else 448 return (0); 449 } 450 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 451 return (ESRCH); 452 453 return (0); 454 } 455 456 /* 457 * Checks if nexhop @nh can be rewritten by data in @info because 458 * of higher "priority". Currently the only case for such scenario 459 * is kernel installing interface routes, marked by RTF_PINNED flag. 460 * 461 * Returns: 462 * 1 if @info data has higher priority 463 * 0 if priority is the same 464 * -1 if priority is lower 465 */ 466 int 467 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 468 { 469 470 if (info->rti_flags & RTF_PINNED) { 471 return (NH_IS_PINNED(nh)) ? 0 : 1; 472 } else { 473 return (NH_IS_PINNED(nh)) ? -1 : 0; 474 } 475 } 476 477 /* 478 * Runs exact prefix match based on @dst and @netmask. 479 * Returns matched @rtentry if found or NULL. 480 * If rtentry was found, saves nexthop / weight value into @rnd. 481 */ 482 static struct rtentry * 483 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 484 const struct sockaddr *netmask, struct route_nhop_data *rnd) 485 { 486 struct rtentry *rt; 487 488 RIB_LOCK_ASSERT(rnh); 489 490 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 491 __DECONST(void *, netmask), &rnh->head); 492 if (rt != NULL) { 493 rnd->rnd_nhop = rt->rt_nhop; 494 rnd->rnd_weight = rt->rt_weight; 495 } else { 496 rnd->rnd_nhop = NULL; 497 rnd->rnd_weight = 0; 498 } 499 500 return (rt); 501 } 502 503 /* 504 * Runs exact prefix match based on dst/netmask from @info. 505 * Assumes RIB lock is held. 506 * Returns matched @rtentry if found or NULL. 507 * If rtentry was found, saves nexthop / weight value into @rnd. 508 */ 509 struct rtentry * 510 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 511 struct route_nhop_data *rnd) 512 { 513 struct rtentry *rt; 514 515 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 516 info->rti_info[RTAX_NETMASK], rnd); 517 518 return (rt); 519 } 520 521 /* 522 * Adds route defined by @info into the kernel table specified by @fibnum and 523 * sa_family in @info->rti_info[RTAX_DST]. 524 * 525 * Returns 0 on success and fills in operation metadata into @rc. 526 */ 527 int 528 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 529 struct rib_cmd_info *rc) 530 { 531 struct rib_head *rnh; 532 int error; 533 534 NET_EPOCH_ASSERT(); 535 536 rnh = get_rnh(fibnum, info); 537 if (rnh == NULL) 538 return (EAFNOSUPPORT); 539 540 /* 541 * Check consistency between RTF_HOST flag and netmask 542 * existence. 543 */ 544 if (info->rti_flags & RTF_HOST) 545 info->rti_info[RTAX_NETMASK] = NULL; 546 else if (info->rti_info[RTAX_NETMASK] == NULL) 547 return (EINVAL); 548 549 bzero(rc, sizeof(struct rib_cmd_info)); 550 rc->rc_cmd = RTM_ADD; 551 552 error = add_route(rnh, info, rc); 553 if (error == 0) 554 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 555 556 return (error); 557 } 558 559 /* 560 * Creates rtentry and nexthop based on @info data. 561 * Return 0 and fills in rtentry into @prt on success, 562 * return errno otherwise. 563 */ 564 static int 565 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 566 struct rtentry **prt) 567 { 568 struct sockaddr *dst, *ndst, *gateway, *netmask; 569 struct rtentry *rt; 570 struct nhop_object *nh; 571 struct ifaddr *ifa; 572 int error, flags; 573 574 dst = info->rti_info[RTAX_DST]; 575 gateway = info->rti_info[RTAX_GATEWAY]; 576 netmask = info->rti_info[RTAX_NETMASK]; 577 flags = info->rti_flags; 578 579 if ((flags & RTF_GATEWAY) && !gateway) 580 return (EINVAL); 581 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 582 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 583 return (EINVAL); 584 585 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 586 return (EINVAL); 587 588 if (info->rti_ifa == NULL) { 589 error = rt_getifa_fib(info, rnh->rib_fibnum); 590 if (error) 591 return (error); 592 } 593 594 error = nhop_create_from_info(rnh, info, &nh); 595 if (error != 0) 596 return (error); 597 598 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 599 if (rt == NULL) { 600 nhop_free(nh); 601 return (ENOBUFS); 602 } 603 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 604 rt->rt_nhop = nh; 605 606 /* Fill in dst */ 607 memcpy(&rt->rt_dst, dst, dst->sa_len); 608 rt_key(rt) = &rt->rt_dst; 609 610 /* 611 * point to the (possibly newly malloc'd) dest address. 612 */ 613 ndst = (struct sockaddr *)rt_key(rt); 614 615 /* 616 * make sure it contains the value we want (masked if needed). 617 */ 618 if (netmask) { 619 rt_maskedcopy(dst, ndst, netmask); 620 } else 621 bcopy(dst, ndst, dst->sa_len); 622 623 /* 624 * We use the ifa reference returned by rt_getifa_fib(). 625 * This moved from below so that rnh->rnh_addaddr() can 626 * examine the ifa and ifa->ifa_ifp if it so desires. 627 */ 628 ifa = info->rti_ifa; 629 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 630 rt_set_expire_info(rt, info); 631 632 *prt = rt; 633 return (0); 634 } 635 636 static int 637 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 638 struct rib_cmd_info *rc) 639 { 640 struct nhop_object *nh_orig; 641 struct route_nhop_data rnd_orig, rnd_add; 642 struct nhop_object *nh; 643 struct rtentry *rt, *rt_orig; 644 int error; 645 646 error = create_rtentry(rnh, info, &rt); 647 if (error != 0) 648 return (error); 649 650 rnd_add.rnd_nhop = rt->rt_nhop; 651 rnd_add.rnd_weight = rt->rt_weight; 652 nh = rt->rt_nhop; 653 654 RIB_WLOCK(rnh); 655 error = add_route_nhop(rnh, rt, info, &rnd_add, rc); 656 if (error == 0) { 657 RIB_WUNLOCK(rnh); 658 return (0); 659 } 660 661 /* addition failed. Lookup prefix in the rib to determine the cause */ 662 rt_orig = lookup_prefix(rnh, info, &rnd_orig); 663 if (rt_orig == NULL) { 664 /* No prefix -> rnh_addaddr() failed to allocate memory */ 665 RIB_WUNLOCK(rnh); 666 nhop_free(nh); 667 uma_zfree(V_rtzone, rt); 668 return (ENOMEM); 669 } 670 671 /* We have existing route in the RIB. */ 672 nh_orig = rnd_orig.rnd_nhop; 673 /* Check if new route has higher preference */ 674 if (can_override_nhop(info, nh_orig) > 0) { 675 /* Update nexthop to the new route */ 676 change_route_nhop(rnh, rt_orig, info, &rnd_add, rc); 677 RIB_WUNLOCK(rnh); 678 uma_zfree(V_rtzone, rt); 679 nhop_free(nh_orig); 680 return (0); 681 } 682 683 RIB_WUNLOCK(rnh); 684 685 #ifdef ROUTE_MPATH 686 if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) && 687 nhop_can_multipath(rnd_orig.rnd_nhop)) 688 error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc); 689 else 690 #endif 691 /* Unable to add - another route with the same preference exists */ 692 error = EEXIST; 693 694 /* 695 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt. 696 * ROUTE_MPATH enabled: original nhop reference is unused in any case, 697 * free rt only if not _adding_ new route to rib (e.g. the case 698 * when initial lookup returned existing route, but then it got 699 * deleted prior to multipath group insertion, leading to a simple 700 * non-multipath add as a result). 701 */ 702 nhop_free(nh); 703 if ((error != 0) || rc->rc_cmd != RTM_ADD) 704 uma_zfree(V_rtzone, rt); 705 706 return (error); 707 } 708 709 /* 710 * Removes route defined by @info from the kernel table specified by @fibnum and 711 * sa_family in @info->rti_info[RTAX_DST]. 712 * 713 * Returns 0 on success and fills in operation metadata into @rc. 714 */ 715 int 716 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 717 { 718 struct rib_head *rnh; 719 struct sockaddr *dst_orig, *netmask; 720 struct sockaddr_storage mdst; 721 int error; 722 723 NET_EPOCH_ASSERT(); 724 725 rnh = get_rnh(fibnum, info); 726 if (rnh == NULL) 727 return (EAFNOSUPPORT); 728 729 bzero(rc, sizeof(struct rib_cmd_info)); 730 rc->rc_cmd = RTM_DELETE; 731 732 dst_orig = info->rti_info[RTAX_DST]; 733 netmask = info->rti_info[RTAX_NETMASK]; 734 735 if (netmask != NULL) { 736 /* Ensure @dst is always properly masked */ 737 if (dst_orig->sa_len > sizeof(mdst)) 738 return (EINVAL); 739 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 740 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 741 } 742 error = del_route(rnh, info, rc); 743 info->rti_info[RTAX_DST] = dst_orig; 744 745 return (error); 746 } 747 748 /* 749 * Conditionally unlinks rtentry matching data inside @info from @rnh. 750 * Returns 0 on success with operation result stored in @rc. 751 * On error, returns: 752 * ESRCH - if prefix was not found, 753 * EADDRINUSE - if trying to delete higher priority route. 754 * ENOENT - if supplied filter function returned 0 (not matched). 755 */ 756 static int 757 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 758 { 759 struct rtentry *rt; 760 struct nhop_object *nh; 761 struct radix_node *rn; 762 struct route_nhop_data rnd; 763 int error; 764 765 rt = lookup_prefix(rnh, info, &rnd); 766 if (rt == NULL) 767 return (ESRCH); 768 769 nh = rt->rt_nhop; 770 #ifdef ROUTE_MPATH 771 if (NH_IS_NHGRP(nh)) { 772 error = del_route_mpath(rnh, info, rt, 773 (struct nhgrp_object *)nh, rc); 774 return (error); 775 } 776 #endif 777 error = check_info_match_nhop(info, rt, nh); 778 if (error != 0) 779 return (error); 780 781 if (can_override_nhop(info, nh) < 0) 782 return (EADDRINUSE); 783 784 /* 785 * Remove the item from the tree and return it. 786 * Complain if it is not there and do no more processing. 787 */ 788 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 789 info->rti_info[RTAX_NETMASK], &rnh->head); 790 if (rn == NULL) 791 return (ESRCH); 792 793 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 794 panic ("rtrequest delete"); 795 796 rt = RNTORT(rn); 797 rt->rte_flags &= ~RTF_UP; 798 799 /* Finalize notification */ 800 rnh->rnh_gen++; 801 rnh->rnh_prefixes--; 802 803 rc->rc_cmd = RTM_DELETE; 804 rc->rc_rt = rt; 805 rc->rc_nh_old = rt->rt_nhop; 806 rc->rc_nh_weight = rt->rt_weight; 807 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 808 809 return (0); 810 } 811 812 static int 813 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 814 struct rib_cmd_info *rc) 815 { 816 int error; 817 818 RIB_WLOCK(rnh); 819 error = rt_unlinkrte(rnh, info, rc); 820 RIB_WUNLOCK(rnh); 821 if (error != 0) 822 return (error); 823 824 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 825 826 /* 827 * If the caller wants it, then it can have it, 828 * the entry will be deleted after the end of the current epoch. 829 */ 830 if (rc->rc_cmd == RTM_DELETE) 831 rtfree(rc->rc_rt); 832 #ifdef ROUTE_MPATH 833 else { 834 /* 835 * Deleting 1 path may result in RTM_CHANGE to 836 * a different mpath group/nhop. 837 * Free old mpath group. 838 */ 839 nhop_free_any(rc->rc_nh_old); 840 } 841 #endif 842 843 return (0); 844 } 845 846 int 847 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 848 struct rib_cmd_info *rc) 849 { 850 RIB_RLOCK_TRACKER; 851 struct route_nhop_data rnd_orig; 852 struct rib_head *rnh; 853 struct rtentry *rt; 854 int error; 855 856 NET_EPOCH_ASSERT(); 857 858 rnh = get_rnh(fibnum, info); 859 if (rnh == NULL) 860 return (EAFNOSUPPORT); 861 862 bzero(rc, sizeof(struct rib_cmd_info)); 863 rc->rc_cmd = RTM_CHANGE; 864 865 /* Check if updated gateway exists */ 866 if ((info->rti_flags & RTF_GATEWAY) && 867 (info->rti_info[RTAX_GATEWAY] == NULL)) { 868 869 /* 870 * route(8) adds RTF_GATEWAY flag if -interface is not set. 871 * Remove RTF_GATEWAY to enforce consistency and maintain 872 * compatibility.. 873 */ 874 info->rti_flags &= ~RTF_GATEWAY; 875 } 876 877 /* 878 * route change is done in multiple steps, with dropping and 879 * reacquiring lock. In the situations with multiple processes 880 * changes the same route in can lead to the case when route 881 * is changed between the steps. Address it by retrying the operation 882 * multiple times before failing. 883 */ 884 885 RIB_RLOCK(rnh); 886 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 887 info->rti_info[RTAX_NETMASK], &rnh->head); 888 889 if (rt == NULL) { 890 RIB_RUNLOCK(rnh); 891 return (ESRCH); 892 } 893 894 rnd_orig.rnd_nhop = rt->rt_nhop; 895 rnd_orig.rnd_weight = rt->rt_weight; 896 897 RIB_RUNLOCK(rnh); 898 899 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 900 error = change_route(rnh, info, &rnd_orig, rc); 901 if (error != EAGAIN) 902 break; 903 } 904 905 return (error); 906 } 907 908 static int 909 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 910 struct nhop_object *nh_orig, struct nhop_object **nh_new) 911 { 912 int error; 913 914 /* 915 * New gateway could require new ifaddr, ifp; 916 * flags may also be different; ifp may be specified 917 * by ll sockaddr when protocol address is ambiguous 918 */ 919 if (((nh_orig->nh_flags & NHF_GATEWAY) && 920 info->rti_info[RTAX_GATEWAY] != NULL) || 921 info->rti_info[RTAX_IFP] != NULL || 922 (info->rti_info[RTAX_IFA] != NULL && 923 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 924 error = rt_getifa_fib(info, rnh->rib_fibnum); 925 926 if (error != 0) { 927 info->rti_ifa = NULL; 928 return (error); 929 } 930 } 931 932 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 933 info->rti_ifa = NULL; 934 935 return (error); 936 } 937 938 #ifdef ROUTE_MPATH 939 static int 940 change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info, 941 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 942 { 943 int error = 0; 944 struct nhop_object *nh, *nh_orig, *nh_new; 945 struct route_nhop_data rnd_new; 946 947 nh = NULL; 948 nh_orig = rnd_orig->rnd_nhop; 949 950 struct weightened_nhop *wn = NULL, *wn_new; 951 uint32_t num_nhops; 952 953 wn = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, &num_nhops); 954 nh_orig = NULL; 955 for (int i = 0; i < num_nhops; i++) { 956 if (check_info_match_nhop(info, NULL, wn[i].nh)) { 957 nh_orig = wn[i].nh; 958 break; 959 } 960 } 961 962 if (nh_orig == NULL) 963 return (ESRCH); 964 965 error = change_nhop(rnh, info, nh_orig, &nh_new); 966 if (error != 0) 967 return (error); 968 969 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 970 M_TEMP, M_NOWAIT | M_ZERO); 971 if (wn_new == NULL) { 972 nhop_free(nh_new); 973 return (EAGAIN); 974 } 975 976 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 977 for (int i = 0; i < num_nhops; i++) { 978 if (wn[i].nh == nh_orig) { 979 wn[i].nh = nh_new; 980 wn[i].weight = get_info_weight(info, rnd_orig->rnd_weight); 981 break; 982 } 983 } 984 985 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new); 986 nhop_free(nh_new); 987 free(wn_new, M_TEMP); 988 989 if (error != 0) 990 return (error); 991 992 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 993 994 return (error); 995 } 996 #endif 997 998 static int 999 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1000 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 1001 { 1002 int error = 0; 1003 struct nhop_object *nh, *nh_orig; 1004 struct route_nhop_data rnd_new; 1005 1006 nh = NULL; 1007 nh_orig = rnd_orig->rnd_nhop; 1008 if (nh_orig == NULL) 1009 return (ESRCH); 1010 1011 #ifdef ROUTE_MPATH 1012 if (NH_IS_NHGRP(nh_orig)) 1013 return (change_mpath_route(rnh, info, rnd_orig, rc)); 1014 #endif 1015 1016 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1017 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1018 if (error != 0) 1019 return (error); 1020 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1021 1022 return (error); 1023 } 1024 1025 /* 1026 * Insert @rt with nhop data from @rnd_new to @rnh. 1027 * Returns 0 on success and stores operation results in @rc. 1028 */ 1029 static int 1030 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1031 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1032 struct rib_cmd_info *rc) 1033 { 1034 struct sockaddr *ndst, *netmask; 1035 struct radix_node *rn; 1036 int error = 0; 1037 1038 RIB_WLOCK_ASSERT(rnh); 1039 1040 ndst = (struct sockaddr *)rt_key(rt); 1041 netmask = info->rti_info[RTAX_NETMASK]; 1042 1043 rt->rt_nhop = rnd->rnd_nhop; 1044 rt->rt_weight = rnd->rnd_weight; 1045 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1046 1047 if (rn != NULL) { 1048 if (rt->rt_expire > 0) 1049 tmproutes_update(rnh, rt); 1050 1051 /* Finalize notification */ 1052 rnh->rnh_gen++; 1053 rnh->rnh_prefixes++; 1054 1055 rc->rc_cmd = RTM_ADD; 1056 rc->rc_rt = rt; 1057 rc->rc_nh_old = NULL; 1058 rc->rc_nh_new = rnd->rnd_nhop; 1059 rc->rc_nh_weight = rnd->rnd_weight; 1060 1061 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1062 } else { 1063 /* Existing route or memory allocation failure */ 1064 error = EEXIST; 1065 } 1066 1067 return (error); 1068 } 1069 1070 /* 1071 * Switch @rt nhop/weigh to the ones specified in @rnd. 1072 * Conditionally set rt_expire if set in @info. 1073 * Returns 0 on success. 1074 */ 1075 int 1076 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1077 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1078 struct rib_cmd_info *rc) 1079 { 1080 struct nhop_object *nh_orig; 1081 1082 RIB_WLOCK_ASSERT(rnh); 1083 1084 nh_orig = rt->rt_nhop; 1085 1086 if (rnd->rnd_nhop != NULL) { 1087 /* Changing expiration & nexthop & weight to a new one */ 1088 rt_set_expire_info(rt, info); 1089 rt->rt_nhop = rnd->rnd_nhop; 1090 rt->rt_weight = rnd->rnd_weight; 1091 if (rt->rt_expire > 0) 1092 tmproutes_update(rnh, rt); 1093 } else { 1094 /* Route deletion requested. */ 1095 struct sockaddr *ndst, *netmask; 1096 struct radix_node *rn; 1097 1098 ndst = (struct sockaddr *)rt_key(rt); 1099 netmask = info->rti_info[RTAX_NETMASK]; 1100 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 1101 if (rn == NULL) 1102 return (ESRCH); 1103 rt = RNTORT(rn); 1104 rt->rte_flags &= ~RTF_UP; 1105 } 1106 1107 /* Finalize notification */ 1108 rnh->rnh_gen++; 1109 if (rnd->rnd_nhop == NULL) 1110 rnh->rnh_prefixes--; 1111 1112 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 1113 rc->rc_rt = rt; 1114 rc->rc_nh_old = nh_orig; 1115 rc->rc_nh_new = rnd->rnd_nhop; 1116 rc->rc_nh_weight = rnd->rnd_weight; 1117 1118 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1119 1120 return (0); 1121 } 1122 1123 /* 1124 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1125 * consistent with the current route data. 1126 * Nexthop in @nhd_new is consumed. 1127 */ 1128 int 1129 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1130 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1131 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 1132 { 1133 struct rtentry *rt_new; 1134 int error = 0; 1135 1136 RIB_WLOCK(rnh); 1137 1138 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1139 info->rti_info[RTAX_NETMASK], &rnh->head); 1140 1141 if (rt_new == NULL) { 1142 if (rnd_orig->rnd_nhop == NULL) 1143 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 1144 else { 1145 /* 1146 * Prefix does not exist, which was not our assumption. 1147 * Update @rnd_orig with the new data and return 1148 */ 1149 rnd_orig->rnd_nhop = NULL; 1150 rnd_orig->rnd_weight = 0; 1151 error = EAGAIN; 1152 } 1153 } else { 1154 /* Prefix exists, try to update */ 1155 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1156 /* 1157 * Nhop/mpath group hasn't changed. Flip 1158 * to the new precalculated one and return 1159 */ 1160 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 1161 } else { 1162 /* Update and retry */ 1163 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1164 rnd_orig->rnd_weight = rt_new->rt_weight; 1165 error = EAGAIN; 1166 } 1167 } 1168 1169 RIB_WUNLOCK(rnh); 1170 1171 if (error == 0) { 1172 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1173 1174 if (rnd_orig->rnd_nhop != NULL) 1175 nhop_free_any(rnd_orig->rnd_nhop); 1176 1177 } else { 1178 if (rnd_new->rnd_nhop != NULL) 1179 nhop_free_any(rnd_new->rnd_nhop); 1180 } 1181 1182 return (error); 1183 } 1184 1185 /* 1186 * Performs modification of routing table specificed by @action. 1187 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1188 * Needs to be run in network epoch. 1189 * 1190 * Returns 0 on success and fills in @rc with action result. 1191 */ 1192 int 1193 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1194 struct rib_cmd_info *rc) 1195 { 1196 int error; 1197 1198 switch (action) { 1199 case RTM_ADD: 1200 error = rib_add_route(fibnum, info, rc); 1201 break; 1202 case RTM_DELETE: 1203 error = rib_del_route(fibnum, info, rc); 1204 break; 1205 case RTM_CHANGE: 1206 error = rib_change_route(fibnum, info, rc); 1207 break; 1208 default: 1209 error = ENOTSUP; 1210 } 1211 1212 return (error); 1213 } 1214 1215 struct rt_delinfo 1216 { 1217 struct rt_addrinfo info; 1218 struct rib_head *rnh; 1219 struct rtentry *head; 1220 struct rib_cmd_info rc; 1221 }; 1222 1223 /* 1224 * Conditionally unlinks @rn from radix tree based 1225 * on info data passed in @arg. 1226 */ 1227 static int 1228 rt_checkdelroute(struct radix_node *rn, void *arg) 1229 { 1230 struct rt_delinfo *di; 1231 struct rt_addrinfo *info; 1232 struct rtentry *rt; 1233 int error; 1234 1235 di = (struct rt_delinfo *)arg; 1236 rt = (struct rtentry *)rn; 1237 info = &di->info; 1238 1239 info->rti_info[RTAX_DST] = rt_key(rt); 1240 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 1241 1242 error = rt_unlinkrte(di->rnh, info, &di->rc); 1243 1244 /* 1245 * Add deleted rtentries to the list to GC them 1246 * after dropping the lock. 1247 * 1248 * XXX: Delayed notifications not implemented 1249 * for nexthop updates. 1250 */ 1251 if ((error == 0) && (di->rc.rc_cmd == RTM_DELETE)) { 1252 /* Add to the list and return */ 1253 rt->rt_chain = di->head; 1254 di->head = rt; 1255 } 1256 1257 return (0); 1258 } 1259 1260 /* 1261 * Iterates over a routing table specified by @fibnum and @family and 1262 * deletes elements marked by @filter_f. 1263 * @fibnum: rtable id 1264 * @family: AF_ address family 1265 * @filter_f: function returning non-zero value for items to delete 1266 * @arg: data to pass to the @filter_f function 1267 * @report: true if rtsock notification is needed. 1268 */ 1269 void 1270 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report) 1271 { 1272 struct rib_head *rnh; 1273 struct rt_delinfo di; 1274 struct rtentry *rt; 1275 struct nhop_object *nh; 1276 struct epoch_tracker et; 1277 1278 rnh = rt_tables_get_rnh(fibnum, family); 1279 if (rnh == NULL) 1280 return; 1281 1282 bzero(&di, sizeof(di)); 1283 di.info.rti_filter = filter_f; 1284 di.info.rti_filterdata = arg; 1285 di.rnh = rnh; 1286 di.rc.rc_cmd = RTM_DELETE; 1287 1288 NET_EPOCH_ENTER(et); 1289 1290 RIB_WLOCK(rnh); 1291 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1292 RIB_WUNLOCK(rnh); 1293 1294 /* We might have something to reclaim. */ 1295 bzero(&di.rc, sizeof(di.rc)); 1296 di.rc.rc_cmd = RTM_DELETE; 1297 while (di.head != NULL) { 1298 rt = di.head; 1299 di.head = rt->rt_chain; 1300 rt->rt_chain = NULL; 1301 nh = rt->rt_nhop; 1302 1303 di.rc.rc_rt = rt; 1304 di.rc.rc_nh_old = nh; 1305 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1306 1307 /* TODO std rt -> rt_addrinfo export */ 1308 di.info.rti_info[RTAX_DST] = rt_key(rt); 1309 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1310 1311 if (report) { 1312 #ifdef ROUTE_MPATH 1313 struct nhgrp_object *nhg; 1314 struct weightened_nhop *wn; 1315 uint32_t num_nhops; 1316 if (NH_IS_NHGRP(nh)) { 1317 nhg = (struct nhgrp_object *)nh; 1318 wn = nhgrp_get_nhops(nhg, &num_nhops); 1319 for (int i = 0; i < num_nhops; i++) 1320 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1321 } else 1322 #endif 1323 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1324 } 1325 rtfree(rt); 1326 } 1327 1328 NET_EPOCH_EXIT(et); 1329 } 1330 1331 static void 1332 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1333 struct rib_cmd_info *rc) 1334 { 1335 struct rib_subscription *rs; 1336 1337 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1338 if (rs->type == type) 1339 rs->func(rnh, rc, rs->arg); 1340 } 1341 } 1342 1343 static struct rib_subscription * 1344 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1345 enum rib_subscription_type type, bool waitok) 1346 { 1347 struct rib_subscription *rs; 1348 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT); 1349 1350 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1351 if (rs == NULL) 1352 return (NULL); 1353 1354 rs->func = f; 1355 rs->arg = arg; 1356 rs->type = type; 1357 1358 return (rs); 1359 } 1360 1361 /* 1362 * Subscribe for the changes in the routing table specified by @fibnum and 1363 * @family. 1364 * 1365 * Returns pointer to the subscription structure on success. 1366 */ 1367 struct rib_subscription * 1368 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1369 enum rib_subscription_type type, bool waitok) 1370 { 1371 struct rib_head *rnh; 1372 struct epoch_tracker et; 1373 1374 NET_EPOCH_ENTER(et); 1375 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1376 rnh = rt_tables_get_rnh(fibnum, family); 1377 NET_EPOCH_EXIT(et); 1378 1379 return (rib_subscribe_internal(rnh, f, arg, type, waitok)); 1380 } 1381 1382 struct rib_subscription * 1383 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1384 enum rib_subscription_type type, bool waitok) 1385 { 1386 struct rib_subscription *rs; 1387 struct epoch_tracker et; 1388 1389 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1390 return (NULL); 1391 rs->rnh = rnh; 1392 1393 NET_EPOCH_ENTER(et); 1394 RIB_WLOCK(rnh); 1395 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1396 RIB_WUNLOCK(rnh); 1397 NET_EPOCH_EXIT(et); 1398 1399 return (rs); 1400 } 1401 1402 struct rib_subscription * 1403 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1404 enum rib_subscription_type type) 1405 { 1406 struct rib_subscription *rs; 1407 1408 NET_EPOCH_ASSERT(); 1409 RIB_WLOCK_ASSERT(rnh); 1410 1411 if ((rs = allocate_subscription(f, arg, type, false)) == NULL) 1412 return (NULL); 1413 rs->rnh = rnh; 1414 1415 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1416 1417 return (rs); 1418 } 1419 1420 /* 1421 * Remove rtable subscription @rs from the routing table. 1422 * Needs to be run in network epoch. 1423 */ 1424 void 1425 rib_unsibscribe(struct rib_subscription *rs) 1426 { 1427 struct rib_head *rnh = rs->rnh; 1428 1429 NET_EPOCH_ASSERT(); 1430 1431 RIB_WLOCK(rnh); 1432 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1433 RIB_WUNLOCK(rnh); 1434 1435 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1436 &rs->epoch_ctx); 1437 } 1438 1439 void 1440 rib_unsibscribe_locked(struct rib_subscription *rs) 1441 { 1442 struct rib_head *rnh = rs->rnh; 1443 1444 NET_EPOCH_ASSERT(); 1445 RIB_WLOCK_ASSERT(rnh); 1446 1447 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1448 1449 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1450 &rs->epoch_ctx); 1451 } 1452 1453 /* 1454 * Epoch callback indicating subscription is safe to destroy 1455 */ 1456 static void 1457 destroy_subscription_epoch(epoch_context_t ctx) 1458 { 1459 struct rib_subscription *rs; 1460 1461 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1462 1463 free(rs, M_RTABLE); 1464 } 1465 1466 void 1467 rib_init_subscriptions(struct rib_head *rnh) 1468 { 1469 1470 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1471 } 1472 1473 void 1474 rib_destroy_subscriptions(struct rib_head *rnh) 1475 { 1476 struct rib_subscription *rs; 1477 struct epoch_tracker et; 1478 1479 NET_EPOCH_ENTER(et); 1480 RIB_WLOCK(rnh); 1481 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1482 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1483 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1484 &rs->epoch_ctx); 1485 } 1486 RIB_WUNLOCK(rnh); 1487 NET_EPOCH_EXIT(et); 1488 } 1489