1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 58 #include <vm/uma.h> 59 60 /* 61 * This file contains control plane routing tables functions. 62 * 63 * All functions assumes they are called in net epoch. 64 */ 65 66 struct rib_subscription { 67 CK_STAILQ_ENTRY(rib_subscription) next; 68 rib_subscription_cb_t *func; 69 void *arg; 70 struct rib_head *rnh; 71 enum rib_subscription_type type; 72 struct epoch_context epoch_ctx; 73 }; 74 75 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 76 struct rib_cmd_info *rc); 77 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 78 struct rt_addrinfo *info, struct route_nhop_data *rnd, 79 struct rib_cmd_info *rc); 80 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 81 struct rib_cmd_info *rc); 82 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 84 85 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 86 struct rib_cmd_info *rc); 87 88 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 89 struct rib_cmd_info *rc); 90 91 static void destroy_subscription_epoch(epoch_context_t ctx); 92 #ifdef ROUTE_MPATH 93 static bool rib_can_multipath(struct rib_head *rh); 94 #endif 95 96 /* Per-vnet multipath routing configuration */ 97 SYSCTL_DECL(_net_route); 98 #define V_rib_route_multipath VNET(rib_route_multipath) 99 #ifdef ROUTE_MPATH 100 #define _MP_FLAGS CTLFLAG_RW 101 #else 102 #define _MP_FLAGS CTLFLAG_RD 103 #endif 104 VNET_DEFINE(u_int, rib_route_multipath) = 1; 105 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 106 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 107 #undef _MP_FLAGS 108 109 /* Routing table UMA zone */ 110 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 111 #define V_rtzone VNET(rtzone) 112 113 void 114 vnet_rtzone_init() 115 { 116 117 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 118 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 119 } 120 121 #ifdef VIMAGE 122 void 123 vnet_rtzone_destroy() 124 { 125 126 uma_zdestroy(V_rtzone); 127 } 128 #endif 129 130 static void 131 destroy_rtentry(struct rtentry *rt) 132 { 133 #ifdef VIMAGE 134 struct nhop_object *nh = rt->rt_nhop; 135 136 /* 137 * At this moment rnh, nh_control may be already freed. 138 * nhop interface may have been migrated to a different vnet. 139 * Use vnet stored in the nexthop to delete the entry. 140 */ 141 #ifdef ROUTE_MPATH 142 if (NH_IS_NHGRP(nh)) { 143 struct weightened_nhop *wn; 144 uint32_t num_nhops; 145 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops); 146 nh = wn[0].nh; 147 } 148 #endif 149 CURVNET_SET(nhop_get_vnet(nh)); 150 #endif 151 152 /* Unreference nexthop */ 153 nhop_free_any(rt->rt_nhop); 154 155 uma_zfree(V_rtzone, rt); 156 157 CURVNET_RESTORE(); 158 } 159 160 /* 161 * Epoch callback indicating rtentry is safe to destroy 162 */ 163 static void 164 destroy_rtentry_epoch(epoch_context_t ctx) 165 { 166 struct rtentry *rt; 167 168 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 169 170 destroy_rtentry(rt); 171 } 172 173 /* 174 * Schedule rtentry deletion 175 */ 176 static void 177 rtfree(struct rtentry *rt) 178 { 179 180 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 181 182 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 183 &rt->rt_epoch_ctx); 184 } 185 186 static struct rib_head * 187 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 188 { 189 struct rib_head *rnh; 190 struct sockaddr *dst; 191 192 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 193 194 dst = info->rti_info[RTAX_DST]; 195 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 196 197 return (rnh); 198 } 199 200 #ifdef ROUTE_MPATH 201 static bool 202 rib_can_multipath(struct rib_head *rh) 203 { 204 int result; 205 206 CURVNET_SET(rh->rib_vnet); 207 result = !!V_rib_route_multipath; 208 CURVNET_RESTORE(); 209 210 return (result); 211 } 212 213 /* 214 * Check is nhop is multipath-eligible. 215 * Avoid nhops without gateways and redirects. 216 * 217 * Returns 1 for multipath-eligible nexthop, 218 * 0 otherwise. 219 */ 220 bool 221 nhop_can_multipath(const struct nhop_object *nh) 222 { 223 224 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 225 return (1); 226 if ((nh->nh_flags & NHF_GATEWAY) == 0) 227 return (0); 228 if ((nh->nh_flags & NHF_REDIRECT) != 0) 229 return (0); 230 231 return (1); 232 } 233 #endif 234 235 static int 236 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 237 { 238 uint32_t weight; 239 240 if (info->rti_mflags & RTV_WEIGHT) 241 weight = info->rti_rmx->rmx_weight; 242 else 243 weight = default_weight; 244 /* Keep upper 1 byte for adm distance purposes */ 245 if (weight > RT_MAX_WEIGHT) 246 weight = RT_MAX_WEIGHT; 247 248 return (weight); 249 } 250 251 bool 252 rt_is_host(const struct rtentry *rt) 253 { 254 255 return (rt->rte_flags & RTF_HOST); 256 } 257 258 sa_family_t 259 rt_get_family(const struct rtentry *rt) 260 { 261 const struct sockaddr *dst; 262 263 dst = (const struct sockaddr *)rt_key_const(rt); 264 265 return (dst->sa_family); 266 } 267 268 /* 269 * Returns pointer to nexthop or nexthop group 270 * associated with @rt 271 */ 272 struct nhop_object * 273 rt_get_raw_nhop(const struct rtentry *rt) 274 { 275 276 return (rt->rt_nhop); 277 } 278 279 #ifdef INET 280 /* 281 * Stores IPv4 address and prefix length of @rt inside 282 * @paddr and @plen. 283 * @pscopeid is currently always set to 0. 284 */ 285 void 286 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 287 int *plen, uint32_t *pscopeid) 288 { 289 const struct sockaddr_in *dst; 290 291 dst = (const struct sockaddr_in *)rt_key_const(rt); 292 KASSERT((dst->sin_family == AF_INET), 293 ("rt family is %d, not inet", dst->sin_family)); 294 *paddr = dst->sin_addr; 295 dst = (const struct sockaddr_in *)rt_mask_const(rt); 296 if (dst == NULL) 297 *plen = 32; 298 else 299 *plen = bitcount32(dst->sin_addr.s_addr); 300 *pscopeid = 0; 301 } 302 303 /* 304 * Stores IPv4 address and prefix mask of @rt inside 305 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 306 * @pscopeid is currently always set to 0. 307 */ 308 void 309 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 310 struct in_addr *pmask, uint32_t *pscopeid) 311 { 312 const struct sockaddr_in *dst; 313 314 dst = (const struct sockaddr_in *)rt_key_const(rt); 315 KASSERT((dst->sin_family == AF_INET), 316 ("rt family is %d, not inet", dst->sin_family)); 317 *paddr = dst->sin_addr; 318 dst = (const struct sockaddr_in *)rt_mask_const(rt); 319 if (dst == NULL) 320 pmask->s_addr = INADDR_BROADCAST; 321 else 322 *pmask = dst->sin_addr; 323 *pscopeid = 0; 324 } 325 #endif 326 327 #ifdef INET6 328 static int 329 inet6_get_plen(const struct in6_addr *addr) 330 { 331 332 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 333 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 334 } 335 336 /* 337 * Stores IPv6 address and prefix length of @rt inside 338 * @paddr and @plen. Addresses are returned in de-embedded form. 339 * Scopeid is set to 0 for non-LL addresses. 340 */ 341 void 342 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 343 int *plen, uint32_t *pscopeid) 344 { 345 const struct sockaddr_in6 *dst; 346 347 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 348 KASSERT((dst->sin6_family == AF_INET6), 349 ("rt family is %d, not inet6", dst->sin6_family)); 350 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 351 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 352 else 353 *paddr = dst->sin6_addr; 354 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 355 if (dst == NULL) 356 *plen = 128; 357 else 358 *plen = inet6_get_plen(&dst->sin6_addr); 359 } 360 361 /* 362 * Stores IPv6 address and prefix mask of @rt inside 363 * @paddr and @pmask. Addresses are returned in de-embedded form. 364 * Scopeid is set to 0 for non-LL addresses. 365 */ 366 void 367 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 368 struct in6_addr *pmask, uint32_t *pscopeid) 369 { 370 const struct sockaddr_in6 *dst; 371 372 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 373 KASSERT((dst->sin6_family == AF_INET6), 374 ("rt family is %d, not inet", dst->sin6_family)); 375 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 376 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 377 else 378 *paddr = dst->sin6_addr; 379 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 380 if (dst == NULL) 381 memset(pmask, 0xFF, sizeof(struct in6_addr)); 382 else 383 *pmask = dst->sin6_addr; 384 } 385 #endif 386 387 static void 388 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info) 389 { 390 391 /* Kernel -> userland timebase conversion. */ 392 if (info->rti_mflags & RTV_EXPIRE) 393 rt->rt_expire = info->rti_rmx->rmx_expire ? 394 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 395 } 396 397 /* 398 * Check if specified @gw matches gw data in the nexthop @nh. 399 * 400 * Returns true if matches, false otherwise. 401 */ 402 bool 403 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 404 { 405 406 if (nh->gw_sa.sa_family != gw->sa_family) 407 return (false); 408 409 switch (gw->sa_family) { 410 case AF_INET: 411 return (nh->gw4_sa.sin_addr.s_addr == 412 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 413 case AF_INET6: 414 { 415 const struct sockaddr_in6 *gw6; 416 gw6 = (const struct sockaddr_in6 *)gw; 417 418 /* 419 * Currently (2020-09) IPv6 gws in kernel have their 420 * scope embedded. Once this becomes false, this code 421 * has to be revisited. 422 */ 423 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 424 &gw6->sin6_addr)) 425 return (true); 426 return (false); 427 } 428 case AF_LINK: 429 { 430 const struct sockaddr_dl *sdl; 431 sdl = (const struct sockaddr_dl *)gw; 432 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 433 } 434 default: 435 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 436 } 437 438 /* NOTREACHED */ 439 return (false); 440 } 441 442 /* 443 * Checks if data in @info matches nexhop @nh. 444 * 445 * Returns 0 on success, 446 * ESRCH if not matched, 447 * ENOENT if filter function returned false 448 */ 449 int 450 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 451 const struct nhop_object *nh) 452 { 453 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 454 455 if (info->rti_filter != NULL) { 456 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 457 return (ENOENT); 458 else 459 return (0); 460 } 461 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 462 return (ESRCH); 463 464 return (0); 465 } 466 467 /* 468 * Checks if nexhop @nh can be rewritten by data in @info because 469 * of higher "priority". Currently the only case for such scenario 470 * is kernel installing interface routes, marked by RTF_PINNED flag. 471 * 472 * Returns: 473 * 1 if @info data has higher priority 474 * 0 if priority is the same 475 * -1 if priority is lower 476 */ 477 int 478 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 479 { 480 481 if (info->rti_flags & RTF_PINNED) { 482 return (NH_IS_PINNED(nh)) ? 0 : 1; 483 } else { 484 return (NH_IS_PINNED(nh)) ? -1 : 0; 485 } 486 } 487 488 /* 489 * Runs exact prefix match based on @dst and @netmask. 490 * Returns matched @rtentry if found or NULL. 491 * If rtentry was found, saves nexthop / weight value into @rnd. 492 */ 493 static struct rtentry * 494 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 495 const struct sockaddr *netmask, struct route_nhop_data *rnd) 496 { 497 struct rtentry *rt; 498 499 RIB_LOCK_ASSERT(rnh); 500 501 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 502 __DECONST(void *, netmask), &rnh->head); 503 if (rt != NULL) { 504 rnd->rnd_nhop = rt->rt_nhop; 505 rnd->rnd_weight = rt->rt_weight; 506 } else { 507 rnd->rnd_nhop = NULL; 508 rnd->rnd_weight = 0; 509 } 510 511 return (rt); 512 } 513 514 /* 515 * Runs exact prefix match based on dst/netmask from @info. 516 * Assumes RIB lock is held. 517 * Returns matched @rtentry if found or NULL. 518 * If rtentry was found, saves nexthop / weight value into @rnd. 519 */ 520 struct rtentry * 521 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 522 struct route_nhop_data *rnd) 523 { 524 struct rtentry *rt; 525 526 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 527 info->rti_info[RTAX_NETMASK], rnd); 528 529 return (rt); 530 } 531 532 /* 533 * Adds route defined by @info into the kernel table specified by @fibnum and 534 * sa_family in @info->rti_info[RTAX_DST]. 535 * 536 * Returns 0 on success and fills in operation metadata into @rc. 537 */ 538 int 539 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 540 struct rib_cmd_info *rc) 541 { 542 struct rib_head *rnh; 543 int error; 544 545 NET_EPOCH_ASSERT(); 546 547 rnh = get_rnh(fibnum, info); 548 if (rnh == NULL) 549 return (EAFNOSUPPORT); 550 551 /* 552 * Check consistency between RTF_HOST flag and netmask 553 * existence. 554 */ 555 if (info->rti_flags & RTF_HOST) 556 info->rti_info[RTAX_NETMASK] = NULL; 557 else if (info->rti_info[RTAX_NETMASK] == NULL) 558 return (EINVAL); 559 560 bzero(rc, sizeof(struct rib_cmd_info)); 561 rc->rc_cmd = RTM_ADD; 562 563 error = add_route(rnh, info, rc); 564 if (error == 0) 565 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 566 567 return (error); 568 } 569 570 /* 571 * Checks if @dst and @gateway is valid combination. 572 * 573 * Returns true if is valid, false otherwise. 574 */ 575 static bool 576 check_gateway(struct rib_head *rnh, struct sockaddr *dst, 577 struct sockaddr *gateway) 578 { 579 if (dst->sa_family == gateway->sa_family) 580 return (true); 581 else if (gateway->sa_family == AF_UNSPEC) 582 return (true); 583 else if (gateway->sa_family == AF_LINK) 584 return (true); 585 return (false); 586 } 587 588 /* 589 * Creates rtentry and nexthop based on @info data. 590 * Return 0 and fills in rtentry into @prt on success, 591 * return errno otherwise. 592 */ 593 static int 594 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 595 struct rtentry **prt) 596 { 597 struct sockaddr *dst, *ndst, *gateway, *netmask; 598 struct rtentry *rt; 599 struct nhop_object *nh; 600 struct ifaddr *ifa; 601 int error, flags; 602 603 dst = info->rti_info[RTAX_DST]; 604 gateway = info->rti_info[RTAX_GATEWAY]; 605 netmask = info->rti_info[RTAX_NETMASK]; 606 flags = info->rti_flags; 607 608 if ((flags & RTF_GATEWAY) && !gateway) 609 return (EINVAL); 610 if (dst && gateway && !check_gateway(rnh, dst, gateway)) 611 return (EINVAL); 612 613 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 614 return (EINVAL); 615 616 if (info->rti_ifa == NULL) { 617 error = rt_getifa_fib(info, rnh->rib_fibnum); 618 if (error) 619 return (error); 620 } 621 622 error = nhop_create_from_info(rnh, info, &nh); 623 if (error != 0) 624 return (error); 625 626 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 627 if (rt == NULL) { 628 nhop_free(nh); 629 return (ENOBUFS); 630 } 631 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 632 rt->rt_nhop = nh; 633 634 /* Fill in dst */ 635 memcpy(&rt->rt_dst, dst, dst->sa_len); 636 rt_key(rt) = &rt->rt_dst; 637 638 /* 639 * point to the (possibly newly malloc'd) dest address. 640 */ 641 ndst = (struct sockaddr *)rt_key(rt); 642 643 /* 644 * make sure it contains the value we want (masked if needed). 645 */ 646 if (netmask) { 647 rt_maskedcopy(dst, ndst, netmask); 648 } else 649 bcopy(dst, ndst, dst->sa_len); 650 651 /* 652 * We use the ifa reference returned by rt_getifa_fib(). 653 * This moved from below so that rnh->rnh_addaddr() can 654 * examine the ifa and ifa->ifa_ifp if it so desires. 655 */ 656 ifa = info->rti_ifa; 657 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 658 rt_set_expire_info(rt, info); 659 660 *prt = rt; 661 return (0); 662 } 663 664 static int 665 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 666 struct rib_cmd_info *rc) 667 { 668 struct nhop_object *nh_orig; 669 struct route_nhop_data rnd_orig, rnd_add; 670 struct nhop_object *nh; 671 struct rtentry *rt, *rt_orig; 672 int error; 673 674 error = create_rtentry(rnh, info, &rt); 675 if (error != 0) 676 return (error); 677 678 rnd_add.rnd_nhop = rt->rt_nhop; 679 rnd_add.rnd_weight = rt->rt_weight; 680 nh = rt->rt_nhop; 681 682 RIB_WLOCK(rnh); 683 error = add_route_nhop(rnh, rt, info, &rnd_add, rc); 684 if (error == 0) { 685 RIB_WUNLOCK(rnh); 686 return (0); 687 } 688 689 /* addition failed. Lookup prefix in the rib to determine the cause */ 690 rt_orig = lookup_prefix(rnh, info, &rnd_orig); 691 if (rt_orig == NULL) { 692 /* No prefix -> rnh_addaddr() failed to allocate memory */ 693 RIB_WUNLOCK(rnh); 694 nhop_free(nh); 695 uma_zfree(V_rtzone, rt); 696 return (ENOMEM); 697 } 698 699 /* We have existing route in the RIB. */ 700 nh_orig = rnd_orig.rnd_nhop; 701 /* Check if new route has higher preference */ 702 if (can_override_nhop(info, nh_orig) > 0) { 703 /* Update nexthop to the new route */ 704 change_route_nhop(rnh, rt_orig, info, &rnd_add, rc); 705 RIB_WUNLOCK(rnh); 706 uma_zfree(V_rtzone, rt); 707 nhop_free(nh_orig); 708 return (0); 709 } 710 711 RIB_WUNLOCK(rnh); 712 713 #ifdef ROUTE_MPATH 714 if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) && 715 nhop_can_multipath(rnd_orig.rnd_nhop)) 716 error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc); 717 else 718 #endif 719 /* Unable to add - another route with the same preference exists */ 720 error = EEXIST; 721 722 /* 723 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt. 724 * ROUTE_MPATH enabled: original nhop reference is unused in any case, 725 * free rt only if not _adding_ new route to rib (e.g. the case 726 * when initial lookup returned existing route, but then it got 727 * deleted prior to multipath group insertion, leading to a simple 728 * non-multipath add as a result). 729 */ 730 nhop_free(nh); 731 if ((error != 0) || rc->rc_cmd != RTM_ADD) 732 uma_zfree(V_rtzone, rt); 733 734 return (error); 735 } 736 737 /* 738 * Removes route defined by @info from the kernel table specified by @fibnum and 739 * sa_family in @info->rti_info[RTAX_DST]. 740 * 741 * Returns 0 on success and fills in operation metadata into @rc. 742 */ 743 int 744 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 745 { 746 struct rib_head *rnh; 747 struct sockaddr *dst_orig, *netmask; 748 struct sockaddr_storage mdst; 749 int error; 750 751 NET_EPOCH_ASSERT(); 752 753 rnh = get_rnh(fibnum, info); 754 if (rnh == NULL) 755 return (EAFNOSUPPORT); 756 757 bzero(rc, sizeof(struct rib_cmd_info)); 758 rc->rc_cmd = RTM_DELETE; 759 760 dst_orig = info->rti_info[RTAX_DST]; 761 netmask = info->rti_info[RTAX_NETMASK]; 762 763 if (netmask != NULL) { 764 /* Ensure @dst is always properly masked */ 765 if (dst_orig->sa_len > sizeof(mdst)) 766 return (EINVAL); 767 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 768 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 769 } 770 error = del_route(rnh, info, rc); 771 info->rti_info[RTAX_DST] = dst_orig; 772 773 return (error); 774 } 775 776 /* 777 * Conditionally unlinks rtentry matching data inside @info from @rnh. 778 * Returns 0 on success with operation result stored in @rc. 779 * On error, returns: 780 * ESRCH - if prefix was not found, 781 * EADDRINUSE - if trying to delete higher priority route. 782 * ENOENT - if supplied filter function returned 0 (not matched). 783 */ 784 static int 785 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 786 { 787 struct rtentry *rt; 788 struct nhop_object *nh; 789 struct radix_node *rn; 790 struct route_nhop_data rnd; 791 int error; 792 793 rt = lookup_prefix(rnh, info, &rnd); 794 if (rt == NULL) 795 return (ESRCH); 796 797 nh = rt->rt_nhop; 798 #ifdef ROUTE_MPATH 799 if (NH_IS_NHGRP(nh)) { 800 error = del_route_mpath(rnh, info, rt, 801 (struct nhgrp_object *)nh, rc); 802 return (error); 803 } 804 #endif 805 error = check_info_match_nhop(info, rt, nh); 806 if (error != 0) 807 return (error); 808 809 if (can_override_nhop(info, nh) < 0) 810 return (EADDRINUSE); 811 812 /* 813 * Remove the item from the tree and return it. 814 * Complain if it is not there and do no more processing. 815 */ 816 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 817 info->rti_info[RTAX_NETMASK], &rnh->head); 818 if (rn == NULL) 819 return (ESRCH); 820 821 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 822 panic ("rtrequest delete"); 823 824 rt = RNTORT(rn); 825 rt->rte_flags &= ~RTF_UP; 826 827 /* Finalize notification */ 828 rib_bump_gen(rnh); 829 rnh->rnh_prefixes--; 830 831 rc->rc_cmd = RTM_DELETE; 832 rc->rc_rt = rt; 833 rc->rc_nh_old = rt->rt_nhop; 834 rc->rc_nh_weight = rt->rt_weight; 835 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 836 837 return (0); 838 } 839 840 static int 841 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 842 struct rib_cmd_info *rc) 843 { 844 int error; 845 846 RIB_WLOCK(rnh); 847 error = rt_unlinkrte(rnh, info, rc); 848 RIB_WUNLOCK(rnh); 849 if (error != 0) 850 return (error); 851 852 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 853 854 /* 855 * If the caller wants it, then it can have it, 856 * the entry will be deleted after the end of the current epoch. 857 */ 858 if (rc->rc_cmd == RTM_DELETE) 859 rtfree(rc->rc_rt); 860 #ifdef ROUTE_MPATH 861 else { 862 /* 863 * Deleting 1 path may result in RTM_CHANGE to 864 * a different mpath group/nhop. 865 * Free old mpath group. 866 */ 867 nhop_free_any(rc->rc_nh_old); 868 } 869 #endif 870 871 return (0); 872 } 873 874 int 875 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 876 struct rib_cmd_info *rc) 877 { 878 RIB_RLOCK_TRACKER; 879 struct route_nhop_data rnd_orig; 880 struct rib_head *rnh; 881 struct rtentry *rt; 882 int error; 883 884 NET_EPOCH_ASSERT(); 885 886 rnh = get_rnh(fibnum, info); 887 if (rnh == NULL) 888 return (EAFNOSUPPORT); 889 890 bzero(rc, sizeof(struct rib_cmd_info)); 891 rc->rc_cmd = RTM_CHANGE; 892 893 /* Check if updated gateway exists */ 894 if ((info->rti_flags & RTF_GATEWAY) && 895 (info->rti_info[RTAX_GATEWAY] == NULL)) { 896 897 /* 898 * route(8) adds RTF_GATEWAY flag if -interface is not set. 899 * Remove RTF_GATEWAY to enforce consistency and maintain 900 * compatibility.. 901 */ 902 info->rti_flags &= ~RTF_GATEWAY; 903 } 904 905 /* 906 * route change is done in multiple steps, with dropping and 907 * reacquiring lock. In the situations with multiple processes 908 * changes the same route in can lead to the case when route 909 * is changed between the steps. Address it by retrying the operation 910 * multiple times before failing. 911 */ 912 913 RIB_RLOCK(rnh); 914 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 915 info->rti_info[RTAX_NETMASK], &rnh->head); 916 917 if (rt == NULL) { 918 RIB_RUNLOCK(rnh); 919 return (ESRCH); 920 } 921 922 rnd_orig.rnd_nhop = rt->rt_nhop; 923 rnd_orig.rnd_weight = rt->rt_weight; 924 925 RIB_RUNLOCK(rnh); 926 927 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 928 error = change_route(rnh, info, &rnd_orig, rc); 929 if (error != EAGAIN) 930 break; 931 } 932 933 return (error); 934 } 935 936 static int 937 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 938 struct nhop_object *nh_orig, struct nhop_object **nh_new) 939 { 940 int error; 941 942 /* 943 * New gateway could require new ifaddr, ifp; 944 * flags may also be different; ifp may be specified 945 * by ll sockaddr when protocol address is ambiguous 946 */ 947 if (((nh_orig->nh_flags & NHF_GATEWAY) && 948 info->rti_info[RTAX_GATEWAY] != NULL) || 949 info->rti_info[RTAX_IFP] != NULL || 950 (info->rti_info[RTAX_IFA] != NULL && 951 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 952 error = rt_getifa_fib(info, rnh->rib_fibnum); 953 954 if (error != 0) { 955 info->rti_ifa = NULL; 956 return (error); 957 } 958 } 959 960 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 961 info->rti_ifa = NULL; 962 963 return (error); 964 } 965 966 #ifdef ROUTE_MPATH 967 static int 968 change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info, 969 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 970 { 971 int error = 0; 972 struct nhop_object *nh, *nh_orig, *nh_new; 973 struct route_nhop_data rnd_new; 974 975 nh = NULL; 976 nh_orig = rnd_orig->rnd_nhop; 977 978 struct weightened_nhop *wn = NULL, *wn_new; 979 uint32_t num_nhops; 980 981 wn = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, &num_nhops); 982 nh_orig = NULL; 983 for (int i = 0; i < num_nhops; i++) { 984 if (check_info_match_nhop(info, NULL, wn[i].nh)) { 985 nh_orig = wn[i].nh; 986 break; 987 } 988 } 989 990 if (nh_orig == NULL) 991 return (ESRCH); 992 993 error = change_nhop(rnh, info, nh_orig, &nh_new); 994 if (error != 0) 995 return (error); 996 997 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 998 M_TEMP, M_NOWAIT | M_ZERO); 999 if (wn_new == NULL) { 1000 nhop_free(nh_new); 1001 return (EAGAIN); 1002 } 1003 1004 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1005 for (int i = 0; i < num_nhops; i++) { 1006 if (wn[i].nh == nh_orig) { 1007 wn[i].nh = nh_new; 1008 wn[i].weight = get_info_weight(info, rnd_orig->rnd_weight); 1009 break; 1010 } 1011 } 1012 1013 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new); 1014 nhop_free(nh_new); 1015 free(wn_new, M_TEMP); 1016 1017 if (error != 0) 1018 return (error); 1019 1020 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1021 1022 return (error); 1023 } 1024 #endif 1025 1026 static int 1027 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1028 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 1029 { 1030 int error = 0; 1031 struct nhop_object *nh, *nh_orig; 1032 struct route_nhop_data rnd_new; 1033 1034 nh = NULL; 1035 nh_orig = rnd_orig->rnd_nhop; 1036 if (nh_orig == NULL) 1037 return (ESRCH); 1038 1039 #ifdef ROUTE_MPATH 1040 if (NH_IS_NHGRP(nh_orig)) 1041 return (change_mpath_route(rnh, info, rnd_orig, rc)); 1042 #endif 1043 1044 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1045 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1046 if (error != 0) 1047 return (error); 1048 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1049 1050 return (error); 1051 } 1052 1053 /* 1054 * Insert @rt with nhop data from @rnd_new to @rnh. 1055 * Returns 0 on success and stores operation results in @rc. 1056 */ 1057 static int 1058 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1059 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1060 struct rib_cmd_info *rc) 1061 { 1062 struct sockaddr *ndst, *netmask; 1063 struct radix_node *rn; 1064 int error = 0; 1065 1066 RIB_WLOCK_ASSERT(rnh); 1067 1068 ndst = (struct sockaddr *)rt_key(rt); 1069 netmask = info->rti_info[RTAX_NETMASK]; 1070 1071 rt->rt_nhop = rnd->rnd_nhop; 1072 rt->rt_weight = rnd->rnd_weight; 1073 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1074 1075 if (rn != NULL) { 1076 if (rt->rt_expire > 0) 1077 tmproutes_update(rnh, rt); 1078 1079 /* Finalize notification */ 1080 rib_bump_gen(rnh); 1081 rnh->rnh_prefixes++; 1082 1083 rc->rc_cmd = RTM_ADD; 1084 rc->rc_rt = rt; 1085 rc->rc_nh_old = NULL; 1086 rc->rc_nh_new = rnd->rnd_nhop; 1087 rc->rc_nh_weight = rnd->rnd_weight; 1088 1089 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1090 } else { 1091 /* Existing route or memory allocation failure */ 1092 error = EEXIST; 1093 } 1094 1095 return (error); 1096 } 1097 1098 /* 1099 * Switch @rt nhop/weigh to the ones specified in @rnd. 1100 * Conditionally set rt_expire if set in @info. 1101 * Returns 0 on success. 1102 */ 1103 int 1104 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1105 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1106 struct rib_cmd_info *rc) 1107 { 1108 struct nhop_object *nh_orig; 1109 1110 RIB_WLOCK_ASSERT(rnh); 1111 1112 nh_orig = rt->rt_nhop; 1113 1114 if (rnd->rnd_nhop != NULL) { 1115 /* Changing expiration & nexthop & weight to a new one */ 1116 rt_set_expire_info(rt, info); 1117 rt->rt_nhop = rnd->rnd_nhop; 1118 rt->rt_weight = rnd->rnd_weight; 1119 if (rt->rt_expire > 0) 1120 tmproutes_update(rnh, rt); 1121 } else { 1122 /* Route deletion requested. */ 1123 struct sockaddr *ndst, *netmask; 1124 struct radix_node *rn; 1125 1126 ndst = (struct sockaddr *)rt_key(rt); 1127 netmask = info->rti_info[RTAX_NETMASK]; 1128 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 1129 if (rn == NULL) 1130 return (ESRCH); 1131 rt = RNTORT(rn); 1132 rt->rte_flags &= ~RTF_UP; 1133 } 1134 1135 /* Finalize notification */ 1136 rib_bump_gen(rnh); 1137 if (rnd->rnd_nhop == NULL) 1138 rnh->rnh_prefixes--; 1139 1140 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 1141 rc->rc_rt = rt; 1142 rc->rc_nh_old = nh_orig; 1143 rc->rc_nh_new = rnd->rnd_nhop; 1144 rc->rc_nh_weight = rnd->rnd_weight; 1145 1146 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1147 1148 return (0); 1149 } 1150 1151 /* 1152 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1153 * consistent with the current route data. 1154 * Nexthop in @nhd_new is consumed. 1155 */ 1156 int 1157 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1158 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1159 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 1160 { 1161 struct rtentry *rt_new; 1162 int error = 0; 1163 1164 RIB_WLOCK(rnh); 1165 1166 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1167 info->rti_info[RTAX_NETMASK], &rnh->head); 1168 1169 if (rt_new == NULL) { 1170 if (rnd_orig->rnd_nhop == NULL) 1171 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 1172 else { 1173 /* 1174 * Prefix does not exist, which was not our assumption. 1175 * Update @rnd_orig with the new data and return 1176 */ 1177 rnd_orig->rnd_nhop = NULL; 1178 rnd_orig->rnd_weight = 0; 1179 error = EAGAIN; 1180 } 1181 } else { 1182 /* Prefix exists, try to update */ 1183 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1184 /* 1185 * Nhop/mpath group hasn't changed. Flip 1186 * to the new precalculated one and return 1187 */ 1188 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 1189 } else { 1190 /* Update and retry */ 1191 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1192 rnd_orig->rnd_weight = rt_new->rt_weight; 1193 error = EAGAIN; 1194 } 1195 } 1196 1197 RIB_WUNLOCK(rnh); 1198 1199 if (error == 0) { 1200 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1201 1202 if (rnd_orig->rnd_nhop != NULL) 1203 nhop_free_any(rnd_orig->rnd_nhop); 1204 1205 } else { 1206 if (rnd_new->rnd_nhop != NULL) 1207 nhop_free_any(rnd_new->rnd_nhop); 1208 } 1209 1210 return (error); 1211 } 1212 1213 /* 1214 * Performs modification of routing table specificed by @action. 1215 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1216 * Needs to be run in network epoch. 1217 * 1218 * Returns 0 on success and fills in @rc with action result. 1219 */ 1220 int 1221 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1222 struct rib_cmd_info *rc) 1223 { 1224 int error; 1225 1226 switch (action) { 1227 case RTM_ADD: 1228 error = rib_add_route(fibnum, info, rc); 1229 break; 1230 case RTM_DELETE: 1231 error = rib_del_route(fibnum, info, rc); 1232 break; 1233 case RTM_CHANGE: 1234 error = rib_change_route(fibnum, info, rc); 1235 break; 1236 default: 1237 error = ENOTSUP; 1238 } 1239 1240 return (error); 1241 } 1242 1243 struct rt_delinfo 1244 { 1245 struct rt_addrinfo info; 1246 struct rib_head *rnh; 1247 struct rtentry *head; 1248 struct rib_cmd_info rc; 1249 }; 1250 1251 /* 1252 * Conditionally unlinks @rn from radix tree based 1253 * on info data passed in @arg. 1254 */ 1255 static int 1256 rt_checkdelroute(struct radix_node *rn, void *arg) 1257 { 1258 struct rt_delinfo *di; 1259 struct rt_addrinfo *info; 1260 struct rtentry *rt; 1261 1262 di = (struct rt_delinfo *)arg; 1263 rt = (struct rtentry *)rn; 1264 info = &di->info; 1265 1266 info->rti_info[RTAX_DST] = rt_key(rt); 1267 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 1268 1269 if (rt_unlinkrte(di->rnh, info, &di->rc) != 0) 1270 return (0); 1271 1272 /* 1273 * Add deleted rtentries to the list to GC them 1274 * after dropping the lock. 1275 * 1276 * XXX: Delayed notifications not implemented 1277 * for nexthop updates. 1278 */ 1279 if (di->rc.rc_cmd == RTM_DELETE) { 1280 /* Add to the list and return */ 1281 rt->rt_chain = di->head; 1282 di->head = rt; 1283 #ifdef ROUTE_MPATH 1284 } else { 1285 /* 1286 * RTM_CHANGE to a diferent nexthop or nexthop group. 1287 * Free old multipath group. 1288 */ 1289 nhop_free_any(di->rc.rc_nh_old); 1290 #endif 1291 } 1292 1293 return (0); 1294 } 1295 1296 /* 1297 * Iterates over a routing table specified by @fibnum and @family and 1298 * deletes elements marked by @filter_f. 1299 * @fibnum: rtable id 1300 * @family: AF_ address family 1301 * @filter_f: function returning non-zero value for items to delete 1302 * @arg: data to pass to the @filter_f function 1303 * @report: true if rtsock notification is needed. 1304 */ 1305 void 1306 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report) 1307 { 1308 struct rib_head *rnh; 1309 struct rt_delinfo di; 1310 struct rtentry *rt; 1311 struct nhop_object *nh; 1312 struct epoch_tracker et; 1313 1314 rnh = rt_tables_get_rnh(fibnum, family); 1315 if (rnh == NULL) 1316 return; 1317 1318 bzero(&di, sizeof(di)); 1319 di.info.rti_filter = filter_f; 1320 di.info.rti_filterdata = arg; 1321 di.rnh = rnh; 1322 di.rc.rc_cmd = RTM_DELETE; 1323 1324 NET_EPOCH_ENTER(et); 1325 1326 RIB_WLOCK(rnh); 1327 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1328 RIB_WUNLOCK(rnh); 1329 1330 /* We might have something to reclaim. */ 1331 bzero(&di.rc, sizeof(di.rc)); 1332 di.rc.rc_cmd = RTM_DELETE; 1333 while (di.head != NULL) { 1334 rt = di.head; 1335 di.head = rt->rt_chain; 1336 rt->rt_chain = NULL; 1337 nh = rt->rt_nhop; 1338 1339 di.rc.rc_rt = rt; 1340 di.rc.rc_nh_old = nh; 1341 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1342 1343 /* TODO std rt -> rt_addrinfo export */ 1344 di.info.rti_info[RTAX_DST] = rt_key(rt); 1345 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1346 1347 if (report) { 1348 #ifdef ROUTE_MPATH 1349 struct nhgrp_object *nhg; 1350 struct weightened_nhop *wn; 1351 uint32_t num_nhops; 1352 if (NH_IS_NHGRP(nh)) { 1353 nhg = (struct nhgrp_object *)nh; 1354 wn = nhgrp_get_nhops(nhg, &num_nhops); 1355 for (int i = 0; i < num_nhops; i++) 1356 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1357 } else 1358 #endif 1359 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1360 } 1361 rtfree(rt); 1362 } 1363 1364 NET_EPOCH_EXIT(et); 1365 } 1366 1367 static int 1368 rt_delete_unconditional(struct radix_node *rn, void *arg) 1369 { 1370 struct rtentry *rt = RNTORT(rn); 1371 struct rib_head *rnh = (struct rib_head *)arg; 1372 1373 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1374 if (RNTORT(rn) == rt) 1375 rtfree(rt); 1376 1377 return (0); 1378 } 1379 1380 /* 1381 * Removes all routes from the routing table without executing notifications. 1382 * rtentres will be removed after the end of a current epoch. 1383 */ 1384 static void 1385 rib_flush_routes(struct rib_head *rnh) 1386 { 1387 RIB_WLOCK(rnh); 1388 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1389 RIB_WUNLOCK(rnh); 1390 } 1391 1392 void 1393 rib_flush_routes_family(int family) 1394 { 1395 struct rib_head *rnh; 1396 1397 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1398 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1399 rib_flush_routes(rnh); 1400 } 1401 } 1402 1403 static void 1404 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1405 struct rib_cmd_info *rc) 1406 { 1407 struct rib_subscription *rs; 1408 1409 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1410 if (rs->type == type) 1411 rs->func(rnh, rc, rs->arg); 1412 } 1413 } 1414 1415 static struct rib_subscription * 1416 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1417 enum rib_subscription_type type, bool waitok) 1418 { 1419 struct rib_subscription *rs; 1420 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT); 1421 1422 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1423 if (rs == NULL) 1424 return (NULL); 1425 1426 rs->func = f; 1427 rs->arg = arg; 1428 rs->type = type; 1429 1430 return (rs); 1431 } 1432 1433 /* 1434 * Subscribe for the changes in the routing table specified by @fibnum and 1435 * @family. 1436 * 1437 * Returns pointer to the subscription structure on success. 1438 */ 1439 struct rib_subscription * 1440 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1441 enum rib_subscription_type type, bool waitok) 1442 { 1443 struct rib_head *rnh; 1444 struct epoch_tracker et; 1445 1446 NET_EPOCH_ENTER(et); 1447 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1448 rnh = rt_tables_get_rnh(fibnum, family); 1449 NET_EPOCH_EXIT(et); 1450 1451 return (rib_subscribe_internal(rnh, f, arg, type, waitok)); 1452 } 1453 1454 struct rib_subscription * 1455 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1456 enum rib_subscription_type type, bool waitok) 1457 { 1458 struct rib_subscription *rs; 1459 struct epoch_tracker et; 1460 1461 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1462 return (NULL); 1463 rs->rnh = rnh; 1464 1465 NET_EPOCH_ENTER(et); 1466 RIB_WLOCK(rnh); 1467 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1468 RIB_WUNLOCK(rnh); 1469 NET_EPOCH_EXIT(et); 1470 1471 return (rs); 1472 } 1473 1474 struct rib_subscription * 1475 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1476 enum rib_subscription_type type) 1477 { 1478 struct rib_subscription *rs; 1479 1480 NET_EPOCH_ASSERT(); 1481 RIB_WLOCK_ASSERT(rnh); 1482 1483 if ((rs = allocate_subscription(f, arg, type, false)) == NULL) 1484 return (NULL); 1485 rs->rnh = rnh; 1486 1487 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1488 1489 return (rs); 1490 } 1491 1492 /* 1493 * Remove rtable subscription @rs from the routing table. 1494 * Needs to be run in network epoch. 1495 */ 1496 void 1497 rib_unsubscribe(struct rib_subscription *rs) 1498 { 1499 struct rib_head *rnh = rs->rnh; 1500 1501 NET_EPOCH_ASSERT(); 1502 1503 RIB_WLOCK(rnh); 1504 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1505 RIB_WUNLOCK(rnh); 1506 1507 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1508 &rs->epoch_ctx); 1509 } 1510 1511 void 1512 rib_unsubscribe_locked(struct rib_subscription *rs) 1513 { 1514 struct rib_head *rnh = rs->rnh; 1515 1516 NET_EPOCH_ASSERT(); 1517 RIB_WLOCK_ASSERT(rnh); 1518 1519 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1520 1521 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1522 &rs->epoch_ctx); 1523 } 1524 1525 /* 1526 * Epoch callback indicating subscription is safe to destroy 1527 */ 1528 static void 1529 destroy_subscription_epoch(epoch_context_t ctx) 1530 { 1531 struct rib_subscription *rs; 1532 1533 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1534 1535 free(rs, M_RTABLE); 1536 } 1537 1538 void 1539 rib_init_subscriptions(struct rib_head *rnh) 1540 { 1541 1542 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1543 } 1544 1545 void 1546 rib_destroy_subscriptions(struct rib_head *rnh) 1547 { 1548 struct rib_subscription *rs; 1549 struct epoch_tracker et; 1550 1551 NET_EPOCH_ENTER(et); 1552 RIB_WLOCK(rnh); 1553 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1554 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1555 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1556 &rs->epoch_ctx); 1557 } 1558 RIB_WUNLOCK(rnh); 1559 NET_EPOCH_EXIT(et); 1560 } 1561