1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 58 #include <vm/uma.h> 59 60 /* 61 * This file contains control plane routing tables functions. 62 * 63 * All functions assumes they are called in net epoch. 64 */ 65 66 struct rib_subscription { 67 CK_STAILQ_ENTRY(rib_subscription) next; 68 rib_subscription_cb_t *func; 69 void *arg; 70 struct rib_head *rnh; 71 enum rib_subscription_type type; 72 struct epoch_context epoch_ctx; 73 }; 74 75 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 76 struct rib_cmd_info *rc); 77 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 78 struct rt_addrinfo *info, struct route_nhop_data *rnd, 79 struct rib_cmd_info *rc); 80 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 81 struct rib_cmd_info *rc); 82 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 84 85 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 86 struct rib_cmd_info *rc); 87 88 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 89 struct rib_cmd_info *rc); 90 91 static void destroy_subscription_epoch(epoch_context_t ctx); 92 #ifdef ROUTE_MPATH 93 static bool rib_can_multipath(struct rib_head *rh); 94 #endif 95 96 /* Per-vnet multipath routing configuration */ 97 SYSCTL_DECL(_net_route); 98 #define V_rib_route_multipath VNET(rib_route_multipath) 99 #ifdef ROUTE_MPATH 100 #define _MP_FLAGS CTLFLAG_RW 101 #else 102 #define _MP_FLAGS CTLFLAG_RD 103 #endif 104 VNET_DEFINE(u_int, rib_route_multipath) = 1; 105 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 106 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 107 #undef _MP_FLAGS 108 109 /* Routing table UMA zone */ 110 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 111 #define V_rtzone VNET(rtzone) 112 113 void 114 vnet_rtzone_init() 115 { 116 117 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 118 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 119 } 120 121 #ifdef VIMAGE 122 void 123 vnet_rtzone_destroy() 124 { 125 126 uma_zdestroy(V_rtzone); 127 } 128 #endif 129 130 static void 131 destroy_rtentry(struct rtentry *rt) 132 { 133 #ifdef VIMAGE 134 struct nhop_object *nh = rt->rt_nhop; 135 136 /* 137 * At this moment rnh, nh_control may be already freed. 138 * nhop interface may have been migrated to a different vnet. 139 * Use vnet stored in the nexthop to delete the entry. 140 */ 141 #ifdef ROUTE_MPATH 142 if (NH_IS_NHGRP(nh)) { 143 struct weightened_nhop *wn; 144 uint32_t num_nhops; 145 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops); 146 nh = wn[0].nh; 147 } 148 #endif 149 CURVNET_SET(nhop_get_vnet(nh)); 150 #endif 151 152 /* Unreference nexthop */ 153 nhop_free_any(rt->rt_nhop); 154 155 uma_zfree(V_rtzone, rt); 156 157 CURVNET_RESTORE(); 158 } 159 160 /* 161 * Epoch callback indicating rtentry is safe to destroy 162 */ 163 static void 164 destroy_rtentry_epoch(epoch_context_t ctx) 165 { 166 struct rtentry *rt; 167 168 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 169 170 destroy_rtentry(rt); 171 } 172 173 /* 174 * Schedule rtentry deletion 175 */ 176 static void 177 rtfree(struct rtentry *rt) 178 { 179 180 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 181 182 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 183 &rt->rt_epoch_ctx); 184 } 185 186 static struct rib_head * 187 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 188 { 189 struct rib_head *rnh; 190 struct sockaddr *dst; 191 192 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 193 194 dst = info->rti_info[RTAX_DST]; 195 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 196 197 return (rnh); 198 } 199 200 #ifdef ROUTE_MPATH 201 static bool 202 rib_can_multipath(struct rib_head *rh) 203 { 204 int result; 205 206 CURVNET_SET(rh->rib_vnet); 207 result = !!V_rib_route_multipath; 208 CURVNET_RESTORE(); 209 210 return (result); 211 } 212 213 /* 214 * Check is nhop is multipath-eligible. 215 * Avoid nhops without gateways and redirects. 216 * 217 * Returns 1 for multipath-eligible nexthop, 218 * 0 otherwise. 219 */ 220 bool 221 nhop_can_multipath(const struct nhop_object *nh) 222 { 223 224 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 225 return (1); 226 if ((nh->nh_flags & NHF_GATEWAY) == 0) 227 return (0); 228 if ((nh->nh_flags & NHF_REDIRECT) != 0) 229 return (0); 230 231 return (1); 232 } 233 #endif 234 235 static int 236 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 237 { 238 uint32_t weight; 239 240 if (info->rti_mflags & RTV_WEIGHT) 241 weight = info->rti_rmx->rmx_weight; 242 else 243 weight = default_weight; 244 /* Keep upper 1 byte for adm distance purposes */ 245 if (weight > RT_MAX_WEIGHT) 246 weight = RT_MAX_WEIGHT; 247 248 return (weight); 249 } 250 251 bool 252 rt_is_host(const struct rtentry *rt) 253 { 254 255 return (rt->rte_flags & RTF_HOST); 256 } 257 258 sa_family_t 259 rt_get_family(const struct rtentry *rt) 260 { 261 const struct sockaddr *dst; 262 263 dst = (const struct sockaddr *)rt_key_const(rt); 264 265 return (dst->sa_family); 266 } 267 268 /* 269 * Returns pointer to nexthop or nexthop group 270 * associated with @rt 271 */ 272 struct nhop_object * 273 rt_get_raw_nhop(const struct rtentry *rt) 274 { 275 276 return (rt->rt_nhop); 277 } 278 279 #ifdef INET 280 /* 281 * Stores IPv4 address and prefix length of @rt inside 282 * @paddr and @plen. 283 * @pscopeid is currently always set to 0. 284 */ 285 void 286 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 287 int *plen, uint32_t *pscopeid) 288 { 289 const struct sockaddr_in *dst; 290 291 dst = (const struct sockaddr_in *)rt_key_const(rt); 292 KASSERT((dst->sin_family == AF_INET), 293 ("rt family is %d, not inet", dst->sin_family)); 294 *paddr = dst->sin_addr; 295 dst = (const struct sockaddr_in *)rt_mask_const(rt); 296 if (dst == NULL) 297 *plen = 32; 298 else 299 *plen = bitcount32(dst->sin_addr.s_addr); 300 *pscopeid = 0; 301 } 302 303 /* 304 * Stores IPv4 address and prefix mask of @rt inside 305 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 306 * @pscopeid is currently always set to 0. 307 */ 308 void 309 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 310 struct in_addr *pmask, uint32_t *pscopeid) 311 { 312 const struct sockaddr_in *dst; 313 314 dst = (const struct sockaddr_in *)rt_key_const(rt); 315 KASSERT((dst->sin_family == AF_INET), 316 ("rt family is %d, not inet", dst->sin_family)); 317 *paddr = dst->sin_addr; 318 dst = (const struct sockaddr_in *)rt_mask_const(rt); 319 if (dst == NULL) 320 pmask->s_addr = INADDR_BROADCAST; 321 else 322 *pmask = dst->sin_addr; 323 *pscopeid = 0; 324 } 325 #endif 326 327 #ifdef INET6 328 static int 329 inet6_get_plen(const struct in6_addr *addr) 330 { 331 332 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 333 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 334 } 335 336 /* 337 * Stores IPv6 address and prefix length of @rt inside 338 * @paddr and @plen. Addresses are returned in de-embedded form. 339 * Scopeid is set to 0 for non-LL addresses. 340 */ 341 void 342 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 343 int *plen, uint32_t *pscopeid) 344 { 345 const struct sockaddr_in6 *dst; 346 347 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 348 KASSERT((dst->sin6_family == AF_INET6), 349 ("rt family is %d, not inet6", dst->sin6_family)); 350 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 351 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 352 else 353 *paddr = dst->sin6_addr; 354 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 355 if (dst == NULL) 356 *plen = 128; 357 else 358 *plen = inet6_get_plen(&dst->sin6_addr); 359 } 360 361 /* 362 * Stores IPv6 address and prefix mask of @rt inside 363 * @paddr and @pmask. Addresses are returned in de-embedded form. 364 * Scopeid is set to 0 for non-LL addresses. 365 */ 366 void 367 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 368 struct in6_addr *pmask, uint32_t *pscopeid) 369 { 370 const struct sockaddr_in6 *dst; 371 372 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 373 KASSERT((dst->sin6_family == AF_INET6), 374 ("rt family is %d, not inet", dst->sin6_family)); 375 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 376 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 377 else 378 *paddr = dst->sin6_addr; 379 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 380 if (dst == NULL) 381 memset(pmask, 0xFF, sizeof(struct in6_addr)); 382 else 383 *pmask = dst->sin6_addr; 384 } 385 #endif 386 387 static void 388 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info) 389 { 390 391 /* Kernel -> userland timebase conversion. */ 392 if (info->rti_mflags & RTV_EXPIRE) 393 rt->rt_expire = info->rti_rmx->rmx_expire ? 394 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 395 } 396 397 /* 398 * Check if specified @gw matches gw data in the nexthop @nh. 399 * 400 * Returns true if matches, false otherwise. 401 */ 402 bool 403 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 404 { 405 406 if (nh->gw_sa.sa_family != gw->sa_family) 407 return (false); 408 409 switch (gw->sa_family) { 410 case AF_INET: 411 return (nh->gw4_sa.sin_addr.s_addr == 412 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 413 case AF_INET6: 414 { 415 const struct sockaddr_in6 *gw6; 416 gw6 = (const struct sockaddr_in6 *)gw; 417 418 /* 419 * Currently (2020-09) IPv6 gws in kernel have their 420 * scope embedded. Once this becomes false, this code 421 * has to be revisited. 422 */ 423 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 424 &gw6->sin6_addr)) 425 return (true); 426 return (false); 427 } 428 case AF_LINK: 429 { 430 const struct sockaddr_dl *sdl; 431 sdl = (const struct sockaddr_dl *)gw; 432 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 433 } 434 default: 435 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 436 } 437 438 /* NOTREACHED */ 439 return (false); 440 } 441 442 /* 443 * Checks if data in @info matches nexhop @nh. 444 * 445 * Returns 0 on success, 446 * ESRCH if not matched, 447 * ENOENT if filter function returned false 448 */ 449 int 450 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 451 const struct nhop_object *nh) 452 { 453 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 454 455 if (info->rti_filter != NULL) { 456 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 457 return (ENOENT); 458 else 459 return (0); 460 } 461 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 462 return (ESRCH); 463 464 return (0); 465 } 466 467 /* 468 * Checks if nexhop @nh can be rewritten by data in @info because 469 * of higher "priority". Currently the only case for such scenario 470 * is kernel installing interface routes, marked by RTF_PINNED flag. 471 * 472 * Returns: 473 * 1 if @info data has higher priority 474 * 0 if priority is the same 475 * -1 if priority is lower 476 */ 477 int 478 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 479 { 480 481 if (info->rti_flags & RTF_PINNED) { 482 return (NH_IS_PINNED(nh)) ? 0 : 1; 483 } else { 484 return (NH_IS_PINNED(nh)) ? -1 : 0; 485 } 486 } 487 488 /* 489 * Runs exact prefix match based on @dst and @netmask. 490 * Returns matched @rtentry if found or NULL. 491 * If rtentry was found, saves nexthop / weight value into @rnd. 492 */ 493 static struct rtentry * 494 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 495 const struct sockaddr *netmask, struct route_nhop_data *rnd) 496 { 497 struct rtentry *rt; 498 499 RIB_LOCK_ASSERT(rnh); 500 501 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 502 __DECONST(void *, netmask), &rnh->head); 503 if (rt != NULL) { 504 rnd->rnd_nhop = rt->rt_nhop; 505 rnd->rnd_weight = rt->rt_weight; 506 } else { 507 rnd->rnd_nhop = NULL; 508 rnd->rnd_weight = 0; 509 } 510 511 return (rt); 512 } 513 514 /* 515 * Runs exact prefix match based on dst/netmask from @info. 516 * Assumes RIB lock is held. 517 * Returns matched @rtentry if found or NULL. 518 * If rtentry was found, saves nexthop / weight value into @rnd. 519 */ 520 struct rtentry * 521 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 522 struct route_nhop_data *rnd) 523 { 524 struct rtentry *rt; 525 526 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 527 info->rti_info[RTAX_NETMASK], rnd); 528 529 return (rt); 530 } 531 532 /* 533 * Adds route defined by @info into the kernel table specified by @fibnum and 534 * sa_family in @info->rti_info[RTAX_DST]. 535 * 536 * Returns 0 on success and fills in operation metadata into @rc. 537 */ 538 int 539 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 540 struct rib_cmd_info *rc) 541 { 542 struct rib_head *rnh; 543 int error; 544 545 NET_EPOCH_ASSERT(); 546 547 rnh = get_rnh(fibnum, info); 548 if (rnh == NULL) 549 return (EAFNOSUPPORT); 550 551 /* 552 * Check consistency between RTF_HOST flag and netmask 553 * existence. 554 */ 555 if (info->rti_flags & RTF_HOST) 556 info->rti_info[RTAX_NETMASK] = NULL; 557 else if (info->rti_info[RTAX_NETMASK] == NULL) 558 return (EINVAL); 559 560 bzero(rc, sizeof(struct rib_cmd_info)); 561 rc->rc_cmd = RTM_ADD; 562 563 error = add_route(rnh, info, rc); 564 if (error == 0) 565 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 566 567 return (error); 568 } 569 570 /* 571 * Creates rtentry and nexthop based on @info data. 572 * Return 0 and fills in rtentry into @prt on success, 573 * return errno otherwise. 574 */ 575 static int 576 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 577 struct rtentry **prt) 578 { 579 struct sockaddr *dst, *ndst, *gateway, *netmask; 580 struct rtentry *rt; 581 struct nhop_object *nh; 582 struct ifaddr *ifa; 583 int error, flags; 584 585 dst = info->rti_info[RTAX_DST]; 586 gateway = info->rti_info[RTAX_GATEWAY]; 587 netmask = info->rti_info[RTAX_NETMASK]; 588 flags = info->rti_flags; 589 590 if ((flags & RTF_GATEWAY) && !gateway) 591 return (EINVAL); 592 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 593 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 594 return (EINVAL); 595 596 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 597 return (EINVAL); 598 599 if (info->rti_ifa == NULL) { 600 error = rt_getifa_fib(info, rnh->rib_fibnum); 601 if (error) 602 return (error); 603 } 604 605 error = nhop_create_from_info(rnh, info, &nh); 606 if (error != 0) 607 return (error); 608 609 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 610 if (rt == NULL) { 611 nhop_free(nh); 612 return (ENOBUFS); 613 } 614 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 615 rt->rt_nhop = nh; 616 617 /* Fill in dst */ 618 memcpy(&rt->rt_dst, dst, dst->sa_len); 619 rt_key(rt) = &rt->rt_dst; 620 621 /* 622 * point to the (possibly newly malloc'd) dest address. 623 */ 624 ndst = (struct sockaddr *)rt_key(rt); 625 626 /* 627 * make sure it contains the value we want (masked if needed). 628 */ 629 if (netmask) { 630 rt_maskedcopy(dst, ndst, netmask); 631 } else 632 bcopy(dst, ndst, dst->sa_len); 633 634 /* 635 * We use the ifa reference returned by rt_getifa_fib(). 636 * This moved from below so that rnh->rnh_addaddr() can 637 * examine the ifa and ifa->ifa_ifp if it so desires. 638 */ 639 ifa = info->rti_ifa; 640 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 641 rt_set_expire_info(rt, info); 642 643 *prt = rt; 644 return (0); 645 } 646 647 static int 648 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 649 struct rib_cmd_info *rc) 650 { 651 struct nhop_object *nh_orig; 652 struct route_nhop_data rnd_orig, rnd_add; 653 struct nhop_object *nh; 654 struct rtentry *rt, *rt_orig; 655 int error; 656 657 error = create_rtentry(rnh, info, &rt); 658 if (error != 0) 659 return (error); 660 661 rnd_add.rnd_nhop = rt->rt_nhop; 662 rnd_add.rnd_weight = rt->rt_weight; 663 nh = rt->rt_nhop; 664 665 RIB_WLOCK(rnh); 666 error = add_route_nhop(rnh, rt, info, &rnd_add, rc); 667 if (error == 0) { 668 RIB_WUNLOCK(rnh); 669 return (0); 670 } 671 672 /* addition failed. Lookup prefix in the rib to determine the cause */ 673 rt_orig = lookup_prefix(rnh, info, &rnd_orig); 674 if (rt_orig == NULL) { 675 /* No prefix -> rnh_addaddr() failed to allocate memory */ 676 RIB_WUNLOCK(rnh); 677 nhop_free(nh); 678 uma_zfree(V_rtzone, rt); 679 return (ENOMEM); 680 } 681 682 /* We have existing route in the RIB. */ 683 nh_orig = rnd_orig.rnd_nhop; 684 /* Check if new route has higher preference */ 685 if (can_override_nhop(info, nh_orig) > 0) { 686 /* Update nexthop to the new route */ 687 change_route_nhop(rnh, rt_orig, info, &rnd_add, rc); 688 RIB_WUNLOCK(rnh); 689 uma_zfree(V_rtzone, rt); 690 nhop_free(nh_orig); 691 return (0); 692 } 693 694 RIB_WUNLOCK(rnh); 695 696 #ifdef ROUTE_MPATH 697 if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) && 698 nhop_can_multipath(rnd_orig.rnd_nhop)) 699 error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc); 700 else 701 #endif 702 /* Unable to add - another route with the same preference exists */ 703 error = EEXIST; 704 705 /* 706 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt. 707 * ROUTE_MPATH enabled: original nhop reference is unused in any case, 708 * free rt only if not _adding_ new route to rib (e.g. the case 709 * when initial lookup returned existing route, but then it got 710 * deleted prior to multipath group insertion, leading to a simple 711 * non-multipath add as a result). 712 */ 713 nhop_free(nh); 714 if ((error != 0) || rc->rc_cmd != RTM_ADD) 715 uma_zfree(V_rtzone, rt); 716 717 return (error); 718 } 719 720 /* 721 * Removes route defined by @info from the kernel table specified by @fibnum and 722 * sa_family in @info->rti_info[RTAX_DST]. 723 * 724 * Returns 0 on success and fills in operation metadata into @rc. 725 */ 726 int 727 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 728 { 729 struct rib_head *rnh; 730 struct sockaddr *dst_orig, *netmask; 731 struct sockaddr_storage mdst; 732 int error; 733 734 NET_EPOCH_ASSERT(); 735 736 rnh = get_rnh(fibnum, info); 737 if (rnh == NULL) 738 return (EAFNOSUPPORT); 739 740 bzero(rc, sizeof(struct rib_cmd_info)); 741 rc->rc_cmd = RTM_DELETE; 742 743 dst_orig = info->rti_info[RTAX_DST]; 744 netmask = info->rti_info[RTAX_NETMASK]; 745 746 if (netmask != NULL) { 747 /* Ensure @dst is always properly masked */ 748 if (dst_orig->sa_len > sizeof(mdst)) 749 return (EINVAL); 750 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 751 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 752 } 753 error = del_route(rnh, info, rc); 754 info->rti_info[RTAX_DST] = dst_orig; 755 756 return (error); 757 } 758 759 /* 760 * Conditionally unlinks rtentry matching data inside @info from @rnh. 761 * Returns 0 on success with operation result stored in @rc. 762 * On error, returns: 763 * ESRCH - if prefix was not found, 764 * EADDRINUSE - if trying to delete higher priority route. 765 * ENOENT - if supplied filter function returned 0 (not matched). 766 */ 767 static int 768 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 769 { 770 struct rtentry *rt; 771 struct nhop_object *nh; 772 struct radix_node *rn; 773 struct route_nhop_data rnd; 774 int error; 775 776 rt = lookup_prefix(rnh, info, &rnd); 777 if (rt == NULL) 778 return (ESRCH); 779 780 nh = rt->rt_nhop; 781 #ifdef ROUTE_MPATH 782 if (NH_IS_NHGRP(nh)) { 783 error = del_route_mpath(rnh, info, rt, 784 (struct nhgrp_object *)nh, rc); 785 return (error); 786 } 787 #endif 788 error = check_info_match_nhop(info, rt, nh); 789 if (error != 0) 790 return (error); 791 792 if (can_override_nhop(info, nh) < 0) 793 return (EADDRINUSE); 794 795 /* 796 * Remove the item from the tree and return it. 797 * Complain if it is not there and do no more processing. 798 */ 799 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 800 info->rti_info[RTAX_NETMASK], &rnh->head); 801 if (rn == NULL) 802 return (ESRCH); 803 804 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 805 panic ("rtrequest delete"); 806 807 rt = RNTORT(rn); 808 rt->rte_flags &= ~RTF_UP; 809 810 /* Finalize notification */ 811 rib_bump_gen(rnh); 812 rnh->rnh_prefixes--; 813 814 rc->rc_cmd = RTM_DELETE; 815 rc->rc_rt = rt; 816 rc->rc_nh_old = rt->rt_nhop; 817 rc->rc_nh_weight = rt->rt_weight; 818 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 819 820 return (0); 821 } 822 823 static int 824 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 825 struct rib_cmd_info *rc) 826 { 827 int error; 828 829 RIB_WLOCK(rnh); 830 error = rt_unlinkrte(rnh, info, rc); 831 RIB_WUNLOCK(rnh); 832 if (error != 0) 833 return (error); 834 835 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 836 837 /* 838 * If the caller wants it, then it can have it, 839 * the entry will be deleted after the end of the current epoch. 840 */ 841 if (rc->rc_cmd == RTM_DELETE) 842 rtfree(rc->rc_rt); 843 #ifdef ROUTE_MPATH 844 else { 845 /* 846 * Deleting 1 path may result in RTM_CHANGE to 847 * a different mpath group/nhop. 848 * Free old mpath group. 849 */ 850 nhop_free_any(rc->rc_nh_old); 851 } 852 #endif 853 854 return (0); 855 } 856 857 int 858 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 859 struct rib_cmd_info *rc) 860 { 861 RIB_RLOCK_TRACKER; 862 struct route_nhop_data rnd_orig; 863 struct rib_head *rnh; 864 struct rtentry *rt; 865 int error; 866 867 NET_EPOCH_ASSERT(); 868 869 rnh = get_rnh(fibnum, info); 870 if (rnh == NULL) 871 return (EAFNOSUPPORT); 872 873 bzero(rc, sizeof(struct rib_cmd_info)); 874 rc->rc_cmd = RTM_CHANGE; 875 876 /* Check if updated gateway exists */ 877 if ((info->rti_flags & RTF_GATEWAY) && 878 (info->rti_info[RTAX_GATEWAY] == NULL)) { 879 880 /* 881 * route(8) adds RTF_GATEWAY flag if -interface is not set. 882 * Remove RTF_GATEWAY to enforce consistency and maintain 883 * compatibility.. 884 */ 885 info->rti_flags &= ~RTF_GATEWAY; 886 } 887 888 /* 889 * route change is done in multiple steps, with dropping and 890 * reacquiring lock. In the situations with multiple processes 891 * changes the same route in can lead to the case when route 892 * is changed between the steps. Address it by retrying the operation 893 * multiple times before failing. 894 */ 895 896 RIB_RLOCK(rnh); 897 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 898 info->rti_info[RTAX_NETMASK], &rnh->head); 899 900 if (rt == NULL) { 901 RIB_RUNLOCK(rnh); 902 return (ESRCH); 903 } 904 905 rnd_orig.rnd_nhop = rt->rt_nhop; 906 rnd_orig.rnd_weight = rt->rt_weight; 907 908 RIB_RUNLOCK(rnh); 909 910 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 911 error = change_route(rnh, info, &rnd_orig, rc); 912 if (error != EAGAIN) 913 break; 914 } 915 916 return (error); 917 } 918 919 static int 920 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 921 struct nhop_object *nh_orig, struct nhop_object **nh_new) 922 { 923 int error; 924 925 /* 926 * New gateway could require new ifaddr, ifp; 927 * flags may also be different; ifp may be specified 928 * by ll sockaddr when protocol address is ambiguous 929 */ 930 if (((nh_orig->nh_flags & NHF_GATEWAY) && 931 info->rti_info[RTAX_GATEWAY] != NULL) || 932 info->rti_info[RTAX_IFP] != NULL || 933 (info->rti_info[RTAX_IFA] != NULL && 934 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 935 error = rt_getifa_fib(info, rnh->rib_fibnum); 936 937 if (error != 0) { 938 info->rti_ifa = NULL; 939 return (error); 940 } 941 } 942 943 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 944 info->rti_ifa = NULL; 945 946 return (error); 947 } 948 949 #ifdef ROUTE_MPATH 950 static int 951 change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info, 952 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 953 { 954 int error = 0; 955 struct nhop_object *nh, *nh_orig, *nh_new; 956 struct route_nhop_data rnd_new; 957 958 nh = NULL; 959 nh_orig = rnd_orig->rnd_nhop; 960 961 struct weightened_nhop *wn = NULL, *wn_new; 962 uint32_t num_nhops; 963 964 wn = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, &num_nhops); 965 nh_orig = NULL; 966 for (int i = 0; i < num_nhops; i++) { 967 if (check_info_match_nhop(info, NULL, wn[i].nh)) { 968 nh_orig = wn[i].nh; 969 break; 970 } 971 } 972 973 if (nh_orig == NULL) 974 return (ESRCH); 975 976 error = change_nhop(rnh, info, nh_orig, &nh_new); 977 if (error != 0) 978 return (error); 979 980 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 981 M_TEMP, M_NOWAIT | M_ZERO); 982 if (wn_new == NULL) { 983 nhop_free(nh_new); 984 return (EAGAIN); 985 } 986 987 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 988 for (int i = 0; i < num_nhops; i++) { 989 if (wn[i].nh == nh_orig) { 990 wn[i].nh = nh_new; 991 wn[i].weight = get_info_weight(info, rnd_orig->rnd_weight); 992 break; 993 } 994 } 995 996 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new); 997 nhop_free(nh_new); 998 free(wn_new, M_TEMP); 999 1000 if (error != 0) 1001 return (error); 1002 1003 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1004 1005 return (error); 1006 } 1007 #endif 1008 1009 static int 1010 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1011 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 1012 { 1013 int error = 0; 1014 struct nhop_object *nh, *nh_orig; 1015 struct route_nhop_data rnd_new; 1016 1017 nh = NULL; 1018 nh_orig = rnd_orig->rnd_nhop; 1019 if (nh_orig == NULL) 1020 return (ESRCH); 1021 1022 #ifdef ROUTE_MPATH 1023 if (NH_IS_NHGRP(nh_orig)) 1024 return (change_mpath_route(rnh, info, rnd_orig, rc)); 1025 #endif 1026 1027 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1028 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1029 if (error != 0) 1030 return (error); 1031 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1032 1033 return (error); 1034 } 1035 1036 /* 1037 * Insert @rt with nhop data from @rnd_new to @rnh. 1038 * Returns 0 on success and stores operation results in @rc. 1039 */ 1040 static int 1041 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1042 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1043 struct rib_cmd_info *rc) 1044 { 1045 struct sockaddr *ndst, *netmask; 1046 struct radix_node *rn; 1047 int error = 0; 1048 1049 RIB_WLOCK_ASSERT(rnh); 1050 1051 ndst = (struct sockaddr *)rt_key(rt); 1052 netmask = info->rti_info[RTAX_NETMASK]; 1053 1054 rt->rt_nhop = rnd->rnd_nhop; 1055 rt->rt_weight = rnd->rnd_weight; 1056 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1057 1058 if (rn != NULL) { 1059 if (rt->rt_expire > 0) 1060 tmproutes_update(rnh, rt); 1061 1062 /* Finalize notification */ 1063 rib_bump_gen(rnh); 1064 rnh->rnh_prefixes++; 1065 1066 rc->rc_cmd = RTM_ADD; 1067 rc->rc_rt = rt; 1068 rc->rc_nh_old = NULL; 1069 rc->rc_nh_new = rnd->rnd_nhop; 1070 rc->rc_nh_weight = rnd->rnd_weight; 1071 1072 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1073 } else { 1074 /* Existing route or memory allocation failure */ 1075 error = EEXIST; 1076 } 1077 1078 return (error); 1079 } 1080 1081 /* 1082 * Switch @rt nhop/weigh to the ones specified in @rnd. 1083 * Conditionally set rt_expire if set in @info. 1084 * Returns 0 on success. 1085 */ 1086 int 1087 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1088 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1089 struct rib_cmd_info *rc) 1090 { 1091 struct nhop_object *nh_orig; 1092 1093 RIB_WLOCK_ASSERT(rnh); 1094 1095 nh_orig = rt->rt_nhop; 1096 1097 if (rnd->rnd_nhop != NULL) { 1098 /* Changing expiration & nexthop & weight to a new one */ 1099 rt_set_expire_info(rt, info); 1100 rt->rt_nhop = rnd->rnd_nhop; 1101 rt->rt_weight = rnd->rnd_weight; 1102 if (rt->rt_expire > 0) 1103 tmproutes_update(rnh, rt); 1104 } else { 1105 /* Route deletion requested. */ 1106 struct sockaddr *ndst, *netmask; 1107 struct radix_node *rn; 1108 1109 ndst = (struct sockaddr *)rt_key(rt); 1110 netmask = info->rti_info[RTAX_NETMASK]; 1111 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 1112 if (rn == NULL) 1113 return (ESRCH); 1114 rt = RNTORT(rn); 1115 rt->rte_flags &= ~RTF_UP; 1116 } 1117 1118 /* Finalize notification */ 1119 rib_bump_gen(rnh); 1120 if (rnd->rnd_nhop == NULL) 1121 rnh->rnh_prefixes--; 1122 1123 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 1124 rc->rc_rt = rt; 1125 rc->rc_nh_old = nh_orig; 1126 rc->rc_nh_new = rnd->rnd_nhop; 1127 rc->rc_nh_weight = rnd->rnd_weight; 1128 1129 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1130 1131 return (0); 1132 } 1133 1134 /* 1135 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1136 * consistent with the current route data. 1137 * Nexthop in @nhd_new is consumed. 1138 */ 1139 int 1140 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1141 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1142 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 1143 { 1144 struct rtentry *rt_new; 1145 int error = 0; 1146 1147 RIB_WLOCK(rnh); 1148 1149 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1150 info->rti_info[RTAX_NETMASK], &rnh->head); 1151 1152 if (rt_new == NULL) { 1153 if (rnd_orig->rnd_nhop == NULL) 1154 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 1155 else { 1156 /* 1157 * Prefix does not exist, which was not our assumption. 1158 * Update @rnd_orig with the new data and return 1159 */ 1160 rnd_orig->rnd_nhop = NULL; 1161 rnd_orig->rnd_weight = 0; 1162 error = EAGAIN; 1163 } 1164 } else { 1165 /* Prefix exists, try to update */ 1166 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1167 /* 1168 * Nhop/mpath group hasn't changed. Flip 1169 * to the new precalculated one and return 1170 */ 1171 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 1172 } else { 1173 /* Update and retry */ 1174 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1175 rnd_orig->rnd_weight = rt_new->rt_weight; 1176 error = EAGAIN; 1177 } 1178 } 1179 1180 RIB_WUNLOCK(rnh); 1181 1182 if (error == 0) { 1183 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1184 1185 if (rnd_orig->rnd_nhop != NULL) 1186 nhop_free_any(rnd_orig->rnd_nhop); 1187 1188 } else { 1189 if (rnd_new->rnd_nhop != NULL) 1190 nhop_free_any(rnd_new->rnd_nhop); 1191 } 1192 1193 return (error); 1194 } 1195 1196 /* 1197 * Performs modification of routing table specificed by @action. 1198 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1199 * Needs to be run in network epoch. 1200 * 1201 * Returns 0 on success and fills in @rc with action result. 1202 */ 1203 int 1204 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1205 struct rib_cmd_info *rc) 1206 { 1207 int error; 1208 1209 switch (action) { 1210 case RTM_ADD: 1211 error = rib_add_route(fibnum, info, rc); 1212 break; 1213 case RTM_DELETE: 1214 error = rib_del_route(fibnum, info, rc); 1215 break; 1216 case RTM_CHANGE: 1217 error = rib_change_route(fibnum, info, rc); 1218 break; 1219 default: 1220 error = ENOTSUP; 1221 } 1222 1223 return (error); 1224 } 1225 1226 struct rt_delinfo 1227 { 1228 struct rt_addrinfo info; 1229 struct rib_head *rnh; 1230 struct rtentry *head; 1231 struct rib_cmd_info rc; 1232 }; 1233 1234 /* 1235 * Conditionally unlinks @rn from radix tree based 1236 * on info data passed in @arg. 1237 */ 1238 static int 1239 rt_checkdelroute(struct radix_node *rn, void *arg) 1240 { 1241 struct rt_delinfo *di; 1242 struct rt_addrinfo *info; 1243 struct rtentry *rt; 1244 1245 di = (struct rt_delinfo *)arg; 1246 rt = (struct rtentry *)rn; 1247 info = &di->info; 1248 1249 info->rti_info[RTAX_DST] = rt_key(rt); 1250 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 1251 1252 if (rt_unlinkrte(di->rnh, info, &di->rc) != 0) 1253 return (0); 1254 1255 /* 1256 * Add deleted rtentries to the list to GC them 1257 * after dropping the lock. 1258 * 1259 * XXX: Delayed notifications not implemented 1260 * for nexthop updates. 1261 */ 1262 if (di->rc.rc_cmd == RTM_DELETE) { 1263 /* Add to the list and return */ 1264 rt->rt_chain = di->head; 1265 di->head = rt; 1266 #ifdef ROUTE_MPATH 1267 } else { 1268 /* 1269 * RTM_CHANGE to a diferent nexthop or nexthop group. 1270 * Free old multipath group. 1271 */ 1272 nhop_free_any(di->rc.rc_nh_old); 1273 #endif 1274 } 1275 1276 return (0); 1277 } 1278 1279 /* 1280 * Iterates over a routing table specified by @fibnum and @family and 1281 * deletes elements marked by @filter_f. 1282 * @fibnum: rtable id 1283 * @family: AF_ address family 1284 * @filter_f: function returning non-zero value for items to delete 1285 * @arg: data to pass to the @filter_f function 1286 * @report: true if rtsock notification is needed. 1287 */ 1288 void 1289 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report) 1290 { 1291 struct rib_head *rnh; 1292 struct rt_delinfo di; 1293 struct rtentry *rt; 1294 struct nhop_object *nh; 1295 struct epoch_tracker et; 1296 1297 rnh = rt_tables_get_rnh(fibnum, family); 1298 if (rnh == NULL) 1299 return; 1300 1301 bzero(&di, sizeof(di)); 1302 di.info.rti_filter = filter_f; 1303 di.info.rti_filterdata = arg; 1304 di.rnh = rnh; 1305 di.rc.rc_cmd = RTM_DELETE; 1306 1307 NET_EPOCH_ENTER(et); 1308 1309 RIB_WLOCK(rnh); 1310 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1311 RIB_WUNLOCK(rnh); 1312 1313 /* We might have something to reclaim. */ 1314 bzero(&di.rc, sizeof(di.rc)); 1315 di.rc.rc_cmd = RTM_DELETE; 1316 while (di.head != NULL) { 1317 rt = di.head; 1318 di.head = rt->rt_chain; 1319 rt->rt_chain = NULL; 1320 nh = rt->rt_nhop; 1321 1322 di.rc.rc_rt = rt; 1323 di.rc.rc_nh_old = nh; 1324 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1325 1326 /* TODO std rt -> rt_addrinfo export */ 1327 di.info.rti_info[RTAX_DST] = rt_key(rt); 1328 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1329 1330 if (report) { 1331 #ifdef ROUTE_MPATH 1332 struct nhgrp_object *nhg; 1333 struct weightened_nhop *wn; 1334 uint32_t num_nhops; 1335 if (NH_IS_NHGRP(nh)) { 1336 nhg = (struct nhgrp_object *)nh; 1337 wn = nhgrp_get_nhops(nhg, &num_nhops); 1338 for (int i = 0; i < num_nhops; i++) 1339 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1340 } else 1341 #endif 1342 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1343 } 1344 rtfree(rt); 1345 } 1346 1347 NET_EPOCH_EXIT(et); 1348 } 1349 1350 static int 1351 rt_delete_unconditional(struct radix_node *rn, void *arg) 1352 { 1353 struct rtentry *rt = RNTORT(rn); 1354 struct rib_head *rnh = (struct rib_head *)arg; 1355 1356 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1357 if (RNTORT(rn) == rt) 1358 rtfree(rt); 1359 1360 return (0); 1361 } 1362 1363 /* 1364 * Removes all routes from the routing table without executing notifications. 1365 * rtentres will be removed after the end of a current epoch. 1366 */ 1367 static void 1368 rib_flush_routes(struct rib_head *rnh) 1369 { 1370 RIB_WLOCK(rnh); 1371 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1372 RIB_WUNLOCK(rnh); 1373 } 1374 1375 void 1376 rib_flush_routes_family(int family) 1377 { 1378 struct rib_head *rnh; 1379 1380 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1381 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1382 rib_flush_routes(rnh); 1383 } 1384 } 1385 1386 static void 1387 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1388 struct rib_cmd_info *rc) 1389 { 1390 struct rib_subscription *rs; 1391 1392 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1393 if (rs->type == type) 1394 rs->func(rnh, rc, rs->arg); 1395 } 1396 } 1397 1398 static struct rib_subscription * 1399 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1400 enum rib_subscription_type type, bool waitok) 1401 { 1402 struct rib_subscription *rs; 1403 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT); 1404 1405 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1406 if (rs == NULL) 1407 return (NULL); 1408 1409 rs->func = f; 1410 rs->arg = arg; 1411 rs->type = type; 1412 1413 return (rs); 1414 } 1415 1416 /* 1417 * Subscribe for the changes in the routing table specified by @fibnum and 1418 * @family. 1419 * 1420 * Returns pointer to the subscription structure on success. 1421 */ 1422 struct rib_subscription * 1423 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1424 enum rib_subscription_type type, bool waitok) 1425 { 1426 struct rib_head *rnh; 1427 struct epoch_tracker et; 1428 1429 NET_EPOCH_ENTER(et); 1430 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1431 rnh = rt_tables_get_rnh(fibnum, family); 1432 NET_EPOCH_EXIT(et); 1433 1434 return (rib_subscribe_internal(rnh, f, arg, type, waitok)); 1435 } 1436 1437 struct rib_subscription * 1438 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1439 enum rib_subscription_type type, bool waitok) 1440 { 1441 struct rib_subscription *rs; 1442 struct epoch_tracker et; 1443 1444 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1445 return (NULL); 1446 rs->rnh = rnh; 1447 1448 NET_EPOCH_ENTER(et); 1449 RIB_WLOCK(rnh); 1450 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1451 RIB_WUNLOCK(rnh); 1452 NET_EPOCH_EXIT(et); 1453 1454 return (rs); 1455 } 1456 1457 struct rib_subscription * 1458 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1459 enum rib_subscription_type type) 1460 { 1461 struct rib_subscription *rs; 1462 1463 NET_EPOCH_ASSERT(); 1464 RIB_WLOCK_ASSERT(rnh); 1465 1466 if ((rs = allocate_subscription(f, arg, type, false)) == NULL) 1467 return (NULL); 1468 rs->rnh = rnh; 1469 1470 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1471 1472 return (rs); 1473 } 1474 1475 /* 1476 * Remove rtable subscription @rs from the routing table. 1477 * Needs to be run in network epoch. 1478 */ 1479 void 1480 rib_unsubscribe(struct rib_subscription *rs) 1481 { 1482 struct rib_head *rnh = rs->rnh; 1483 1484 NET_EPOCH_ASSERT(); 1485 1486 RIB_WLOCK(rnh); 1487 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1488 RIB_WUNLOCK(rnh); 1489 1490 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1491 &rs->epoch_ctx); 1492 } 1493 1494 void 1495 rib_unsubscribe_locked(struct rib_subscription *rs) 1496 { 1497 struct rib_head *rnh = rs->rnh; 1498 1499 NET_EPOCH_ASSERT(); 1500 RIB_WLOCK_ASSERT(rnh); 1501 1502 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1503 1504 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1505 &rs->epoch_ctx); 1506 } 1507 1508 /* 1509 * Epoch callback indicating subscription is safe to destroy 1510 */ 1511 static void 1512 destroy_subscription_epoch(epoch_context_t ctx) 1513 { 1514 struct rib_subscription *rs; 1515 1516 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1517 1518 free(rs, M_RTABLE); 1519 } 1520 1521 void 1522 rib_init_subscriptions(struct rib_head *rnh) 1523 { 1524 1525 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1526 } 1527 1528 void 1529 rib_destroy_subscriptions(struct rib_head *rnh) 1530 { 1531 struct rib_subscription *rs; 1532 struct epoch_tracker et; 1533 1534 NET_EPOCH_ENTER(et); 1535 RIB_WLOCK(rnh); 1536 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1537 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1538 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1539 &rs->epoch_ctx); 1540 } 1541 RIB_WUNLOCK(rnh); 1542 NET_EPOCH_EXIT(et); 1543 } 1544