1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 58 #include <vm/uma.h> 59 60 #define DEBUG_MOD_NAME route_ctl 61 #define DEBUG_MAX_LEVEL LOG_DEBUG 62 #include <net/route/route_debug.h> 63 _DECLARE_DEBUG(LOG_INFO); 64 65 /* 66 * This file contains control plane routing tables functions. 67 * 68 * All functions assumes they are called in net epoch. 69 */ 70 71 struct rib_subscription { 72 CK_STAILQ_ENTRY(rib_subscription) next; 73 rib_subscription_cb_t *func; 74 void *arg; 75 struct rib_head *rnh; 76 enum rib_subscription_type type; 77 struct epoch_context epoch_ctx; 78 }; 79 80 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 81 struct rib_cmd_info *rc); 82 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 83 struct rt_addrinfo *info, struct route_nhop_data *rnd, 84 struct rib_cmd_info *rc); 85 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 86 struct rib_cmd_info *rc); 87 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 88 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 89 90 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 91 struct rib_cmd_info *rc); 92 93 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 94 struct rib_cmd_info *rc); 95 96 static void destroy_subscription_epoch(epoch_context_t ctx); 97 #ifdef ROUTE_MPATH 98 static bool rib_can_multipath(struct rib_head *rh); 99 #endif 100 101 /* Per-vnet multipath routing configuration */ 102 SYSCTL_DECL(_net_route); 103 #define V_rib_route_multipath VNET(rib_route_multipath) 104 #ifdef ROUTE_MPATH 105 #define _MP_FLAGS CTLFLAG_RW 106 #else 107 #define _MP_FLAGS CTLFLAG_RD 108 #endif 109 VNET_DEFINE(u_int, rib_route_multipath) = 1; 110 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 111 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 112 #undef _MP_FLAGS 113 114 #if defined(INET) && defined(INET6) 115 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 116 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 117 VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1; 118 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 119 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 120 #endif 121 122 /* Routing table UMA zone */ 123 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 124 #define V_rtzone VNET(rtzone) 125 126 /* Debug bits */ 127 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 128 129 void 130 vnet_rtzone_init(void) 131 { 132 133 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 134 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 135 } 136 137 #ifdef VIMAGE 138 void 139 vnet_rtzone_destroy(void) 140 { 141 142 uma_zdestroy(V_rtzone); 143 } 144 #endif 145 146 static void 147 destroy_rtentry(struct rtentry *rt) 148 { 149 #ifdef VIMAGE 150 struct nhop_object *nh = rt->rt_nhop; 151 152 /* 153 * At this moment rnh, nh_control may be already freed. 154 * nhop interface may have been migrated to a different vnet. 155 * Use vnet stored in the nexthop to delete the entry. 156 */ 157 #ifdef ROUTE_MPATH 158 if (NH_IS_NHGRP(nh)) { 159 struct weightened_nhop *wn; 160 uint32_t num_nhops; 161 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops); 162 nh = wn[0].nh; 163 } 164 #endif 165 CURVNET_SET(nhop_get_vnet(nh)); 166 #endif 167 168 /* Unreference nexthop */ 169 nhop_free_any(rt->rt_nhop); 170 171 uma_zfree(V_rtzone, rt); 172 173 CURVNET_RESTORE(); 174 } 175 176 /* 177 * Epoch callback indicating rtentry is safe to destroy 178 */ 179 static void 180 destroy_rtentry_epoch(epoch_context_t ctx) 181 { 182 struct rtentry *rt; 183 184 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 185 186 destroy_rtentry(rt); 187 } 188 189 /* 190 * Schedule rtentry deletion 191 */ 192 static void 193 rtfree(struct rtentry *rt) 194 { 195 196 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 197 198 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 199 &rt->rt_epoch_ctx); 200 } 201 202 static struct rib_head * 203 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 204 { 205 struct rib_head *rnh; 206 struct sockaddr *dst; 207 208 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 209 210 dst = info->rti_info[RTAX_DST]; 211 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 212 213 return (rnh); 214 } 215 216 #if defined(INET) && defined(INET6) 217 static bool 218 rib_can_ipv6_nexthop_address(struct rib_head *rh) 219 { 220 int result; 221 222 CURVNET_SET(rh->rib_vnet); 223 result = !!V_rib_route_ipv6_nexthop; 224 CURVNET_RESTORE(); 225 226 return (result); 227 } 228 #endif 229 230 #ifdef ROUTE_MPATH 231 static bool 232 rib_can_multipath(struct rib_head *rh) 233 { 234 int result; 235 236 CURVNET_SET(rh->rib_vnet); 237 result = !!V_rib_route_multipath; 238 CURVNET_RESTORE(); 239 240 return (result); 241 } 242 243 /* 244 * Check is nhop is multipath-eligible. 245 * Avoid nhops without gateways and redirects. 246 * 247 * Returns 1 for multipath-eligible nexthop, 248 * 0 otherwise. 249 */ 250 bool 251 nhop_can_multipath(const struct nhop_object *nh) 252 { 253 254 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 255 return (1); 256 if ((nh->nh_flags & NHF_GATEWAY) == 0) 257 return (0); 258 if ((nh->nh_flags & NHF_REDIRECT) != 0) 259 return (0); 260 261 return (1); 262 } 263 #endif 264 265 static int 266 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 267 { 268 uint32_t weight; 269 270 if (info->rti_mflags & RTV_WEIGHT) 271 weight = info->rti_rmx->rmx_weight; 272 else 273 weight = default_weight; 274 /* Keep upper 1 byte for adm distance purposes */ 275 if (weight > RT_MAX_WEIGHT) 276 weight = RT_MAX_WEIGHT; 277 else if (weight == 0) 278 weight = default_weight; 279 280 return (weight); 281 } 282 283 bool 284 rt_is_host(const struct rtentry *rt) 285 { 286 287 return (rt->rte_flags & RTF_HOST); 288 } 289 290 sa_family_t 291 rt_get_family(const struct rtentry *rt) 292 { 293 const struct sockaddr *dst; 294 295 dst = (const struct sockaddr *)rt_key_const(rt); 296 297 return (dst->sa_family); 298 } 299 300 /* 301 * Returns pointer to nexthop or nexthop group 302 * associated with @rt 303 */ 304 struct nhop_object * 305 rt_get_raw_nhop(const struct rtentry *rt) 306 { 307 308 return (rt->rt_nhop); 309 } 310 311 #ifdef INET 312 /* 313 * Stores IPv4 address and prefix length of @rt inside 314 * @paddr and @plen. 315 * @pscopeid is currently always set to 0. 316 */ 317 void 318 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 319 int *plen, uint32_t *pscopeid) 320 { 321 const struct sockaddr_in *dst; 322 323 dst = (const struct sockaddr_in *)rt_key_const(rt); 324 KASSERT((dst->sin_family == AF_INET), 325 ("rt family is %d, not inet", dst->sin_family)); 326 *paddr = dst->sin_addr; 327 dst = (const struct sockaddr_in *)rt_mask_const(rt); 328 if (dst == NULL) 329 *plen = 32; 330 else 331 *plen = bitcount32(dst->sin_addr.s_addr); 332 *pscopeid = 0; 333 } 334 335 /* 336 * Stores IPv4 address and prefix mask of @rt inside 337 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 338 * @pscopeid is currently always set to 0. 339 */ 340 void 341 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 342 struct in_addr *pmask, uint32_t *pscopeid) 343 { 344 const struct sockaddr_in *dst; 345 346 dst = (const struct sockaddr_in *)rt_key_const(rt); 347 KASSERT((dst->sin_family == AF_INET), 348 ("rt family is %d, not inet", dst->sin_family)); 349 *paddr = dst->sin_addr; 350 dst = (const struct sockaddr_in *)rt_mask_const(rt); 351 if (dst == NULL) 352 pmask->s_addr = INADDR_BROADCAST; 353 else 354 *pmask = dst->sin_addr; 355 *pscopeid = 0; 356 } 357 #endif 358 359 #ifdef INET6 360 static int 361 inet6_get_plen(const struct in6_addr *addr) 362 { 363 364 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 365 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 366 } 367 368 /* 369 * Stores IPv6 address and prefix length of @rt inside 370 * @paddr and @plen. Addresses are returned in de-embedded form. 371 * Scopeid is set to 0 for non-LL addresses. 372 */ 373 void 374 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 375 int *plen, uint32_t *pscopeid) 376 { 377 const struct sockaddr_in6 *dst; 378 379 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 380 KASSERT((dst->sin6_family == AF_INET6), 381 ("rt family is %d, not inet6", dst->sin6_family)); 382 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 383 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 384 else 385 *paddr = dst->sin6_addr; 386 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 387 if (dst == NULL) 388 *plen = 128; 389 else 390 *plen = inet6_get_plen(&dst->sin6_addr); 391 } 392 393 /* 394 * Stores IPv6 address and prefix mask of @rt inside 395 * @paddr and @pmask. Addresses are returned in de-embedded form. 396 * Scopeid is set to 0 for non-LL addresses. 397 */ 398 void 399 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 400 struct in6_addr *pmask, uint32_t *pscopeid) 401 { 402 const struct sockaddr_in6 *dst; 403 404 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 405 KASSERT((dst->sin6_family == AF_INET6), 406 ("rt family is %d, not inet", dst->sin6_family)); 407 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 408 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 409 else 410 *paddr = dst->sin6_addr; 411 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 412 if (dst == NULL) 413 memset(pmask, 0xFF, sizeof(struct in6_addr)); 414 else 415 *pmask = dst->sin6_addr; 416 } 417 #endif 418 419 static void 420 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info) 421 { 422 423 /* Kernel -> userland timebase conversion. */ 424 if (info->rti_mflags & RTV_EXPIRE) 425 rt->rt_expire = info->rti_rmx->rmx_expire ? 426 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 427 } 428 429 /* 430 * Check if specified @gw matches gw data in the nexthop @nh. 431 * 432 * Returns true if matches, false otherwise. 433 */ 434 bool 435 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 436 { 437 438 if (nh->gw_sa.sa_family != gw->sa_family) 439 return (false); 440 441 switch (gw->sa_family) { 442 case AF_INET: 443 return (nh->gw4_sa.sin_addr.s_addr == 444 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 445 case AF_INET6: 446 { 447 const struct sockaddr_in6 *gw6; 448 gw6 = (const struct sockaddr_in6 *)gw; 449 450 /* 451 * Currently (2020-09) IPv6 gws in kernel have their 452 * scope embedded. Once this becomes false, this code 453 * has to be revisited. 454 */ 455 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 456 &gw6->sin6_addr)) 457 return (true); 458 return (false); 459 } 460 case AF_LINK: 461 { 462 const struct sockaddr_dl *sdl; 463 sdl = (const struct sockaddr_dl *)gw; 464 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 465 } 466 default: 467 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 468 } 469 470 /* NOTREACHED */ 471 return (false); 472 } 473 474 /* 475 * Checks if data in @info matches nexhop @nh. 476 * 477 * Returns 0 on success, 478 * ESRCH if not matched, 479 * ENOENT if filter function returned false 480 */ 481 int 482 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 483 const struct nhop_object *nh) 484 { 485 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 486 487 if (info->rti_filter != NULL) { 488 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 489 return (ENOENT); 490 else 491 return (0); 492 } 493 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 494 return (ESRCH); 495 496 return (0); 497 } 498 499 /* 500 * Checks if nexhop @nh can be rewritten by data in @info because 501 * of higher "priority". Currently the only case for such scenario 502 * is kernel installing interface routes, marked by RTF_PINNED flag. 503 * 504 * Returns: 505 * 1 if @info data has higher priority 506 * 0 if priority is the same 507 * -1 if priority is lower 508 */ 509 int 510 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 511 { 512 513 if (info->rti_flags & RTF_PINNED) { 514 return (NH_IS_PINNED(nh)) ? 0 : 1; 515 } else { 516 return (NH_IS_PINNED(nh)) ? -1 : 0; 517 } 518 } 519 520 /* 521 * Runs exact prefix match based on @dst and @netmask. 522 * Returns matched @rtentry if found or NULL. 523 * If rtentry was found, saves nexthop / weight value into @rnd. 524 */ 525 static struct rtentry * 526 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 527 const struct sockaddr *netmask, struct route_nhop_data *rnd) 528 { 529 struct rtentry *rt; 530 531 RIB_LOCK_ASSERT(rnh); 532 533 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 534 __DECONST(void *, netmask), &rnh->head); 535 if (rt != NULL) { 536 rnd->rnd_nhop = rt->rt_nhop; 537 rnd->rnd_weight = rt->rt_weight; 538 } else { 539 rnd->rnd_nhop = NULL; 540 rnd->rnd_weight = 0; 541 } 542 543 return (rt); 544 } 545 546 /* 547 * Runs exact prefix match based on dst/netmask from @info. 548 * Assumes RIB lock is held. 549 * Returns matched @rtentry if found or NULL. 550 * If rtentry was found, saves nexthop / weight value into @rnd. 551 */ 552 struct rtentry * 553 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 554 struct route_nhop_data *rnd) 555 { 556 struct rtentry *rt; 557 558 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 559 info->rti_info[RTAX_NETMASK], rnd); 560 561 return (rt); 562 } 563 564 /* 565 * Adds route defined by @info into the kernel table specified by @fibnum and 566 * sa_family in @info->rti_info[RTAX_DST]. 567 * 568 * Returns 0 on success and fills in operation metadata into @rc. 569 */ 570 int 571 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 572 struct rib_cmd_info *rc) 573 { 574 struct rib_head *rnh; 575 int error; 576 577 NET_EPOCH_ASSERT(); 578 579 rnh = get_rnh(fibnum, info); 580 if (rnh == NULL) 581 return (EAFNOSUPPORT); 582 583 /* 584 * Check consistency between RTF_HOST flag and netmask 585 * existence. 586 */ 587 if (info->rti_flags & RTF_HOST) 588 info->rti_info[RTAX_NETMASK] = NULL; 589 else if (info->rti_info[RTAX_NETMASK] == NULL) { 590 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 591 return (EINVAL); 592 } 593 594 bzero(rc, sizeof(struct rib_cmd_info)); 595 rc->rc_cmd = RTM_ADD; 596 597 error = add_route(rnh, info, rc); 598 if (error == 0) 599 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 600 601 return (error); 602 } 603 604 /* 605 * Checks if @dst and @gateway is valid combination. 606 * 607 * Returns true if is valid, false otherwise. 608 */ 609 static bool 610 check_gateway(struct rib_head *rnh, struct sockaddr *dst, 611 struct sockaddr *gateway) 612 { 613 if (dst->sa_family == gateway->sa_family) 614 return (true); 615 else if (gateway->sa_family == AF_UNSPEC) 616 return (true); 617 else if (gateway->sa_family == AF_LINK) 618 return (true); 619 #if defined(INET) && defined(INET6) 620 else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 && 621 rib_can_ipv6_nexthop_address(rnh)) 622 return (true); 623 #endif 624 else 625 return (false); 626 } 627 628 /* 629 * Creates rtentry and nexthop based on @info data. 630 * Return 0 and fills in rtentry into @prt on success, 631 * return errno otherwise. 632 */ 633 static int 634 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 635 struct rtentry **prt) 636 { 637 struct sockaddr *dst, *ndst, *gateway, *netmask; 638 struct rtentry *rt; 639 struct nhop_object *nh; 640 int error, flags; 641 642 dst = info->rti_info[RTAX_DST]; 643 gateway = info->rti_info[RTAX_GATEWAY]; 644 netmask = info->rti_info[RTAX_NETMASK]; 645 flags = info->rti_flags; 646 647 if ((flags & RTF_GATEWAY) && !gateway) { 648 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 649 return (EINVAL); 650 } 651 if (dst && gateway && !check_gateway(rnh, dst, gateway)) { 652 FIB_RH_LOG(LOG_DEBUG, rnh, 653 "error: invalid dst/gateway family combination (%d, %d)", 654 dst->sa_family, gateway->sa_family); 655 return (EINVAL); 656 } 657 658 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 659 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 660 dst->sa_len); 661 return (EINVAL); 662 } 663 664 if (info->rti_ifa == NULL) { 665 error = rt_getifa_fib(info, rnh->rib_fibnum); 666 if (error) 667 return (error); 668 } 669 670 error = nhop_create_from_info(rnh, info, &nh); 671 if (error != 0) 672 return (error); 673 674 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 675 if (rt == NULL) { 676 nhop_free(nh); 677 return (ENOBUFS); 678 } 679 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 680 rt->rt_nhop = nh; 681 682 /* Fill in dst */ 683 memcpy(&rt->rt_dst, dst, dst->sa_len); 684 rt_key(rt) = &rt->rt_dst; 685 686 /* 687 * point to the (possibly newly malloc'd) dest address. 688 */ 689 ndst = (struct sockaddr *)rt_key(rt); 690 691 /* 692 * make sure it contains the value we want (masked if needed). 693 */ 694 if (netmask) { 695 rt_maskedcopy(dst, ndst, netmask); 696 } else 697 bcopy(dst, ndst, dst->sa_len); 698 699 /* 700 * We use the ifa reference returned by rt_getifa_fib(). 701 * This moved from below so that rnh->rnh_addaddr() can 702 * examine the ifa and ifa->ifa_ifp if it so desires. 703 */ 704 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 705 rt_set_expire_info(rt, info); 706 707 *prt = rt; 708 return (0); 709 } 710 711 static int 712 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 713 struct rib_cmd_info *rc) 714 { 715 struct nhop_object *nh_orig; 716 struct route_nhop_data rnd_orig, rnd_add; 717 struct nhop_object *nh; 718 struct rtentry *rt, *rt_orig; 719 int error; 720 721 error = create_rtentry(rnh, info, &rt); 722 if (error != 0) 723 return (error); 724 725 rnd_add.rnd_nhop = rt->rt_nhop; 726 rnd_add.rnd_weight = rt->rt_weight; 727 nh = rt->rt_nhop; 728 729 RIB_WLOCK(rnh); 730 error = add_route_nhop(rnh, rt, info, &rnd_add, rc); 731 if (error == 0) { 732 RIB_WUNLOCK(rnh); 733 return (0); 734 } 735 736 /* addition failed. Lookup prefix in the rib to determine the cause */ 737 rt_orig = lookup_prefix(rnh, info, &rnd_orig); 738 if (rt_orig == NULL) { 739 /* No prefix -> rnh_addaddr() failed to allocate memory */ 740 RIB_WUNLOCK(rnh); 741 nhop_free(nh); 742 uma_zfree(V_rtzone, rt); 743 return (ENOMEM); 744 } 745 746 /* We have existing route in the RIB. */ 747 nh_orig = rnd_orig.rnd_nhop; 748 /* Check if new route has higher preference */ 749 if (can_override_nhop(info, nh_orig) > 0) { 750 /* Update nexthop to the new route */ 751 change_route_nhop(rnh, rt_orig, info, &rnd_add, rc); 752 RIB_WUNLOCK(rnh); 753 uma_zfree(V_rtzone, rt); 754 nhop_free(nh_orig); 755 return (0); 756 } 757 758 RIB_WUNLOCK(rnh); 759 760 #ifdef ROUTE_MPATH 761 if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) && 762 nhop_can_multipath(rnd_orig.rnd_nhop)) 763 error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc); 764 else 765 #endif 766 /* Unable to add - another route with the same preference exists */ 767 error = EEXIST; 768 769 /* 770 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt. 771 * ROUTE_MPATH enabled: original nhop reference is unused in any case, 772 * free rt only if not _adding_ new route to rib (e.g. the case 773 * when initial lookup returned existing route, but then it got 774 * deleted prior to multipath group insertion, leading to a simple 775 * non-multipath add as a result). 776 */ 777 nhop_free(nh); 778 if ((error != 0) || rc->rc_cmd != RTM_ADD) 779 uma_zfree(V_rtzone, rt); 780 781 return (error); 782 } 783 784 /* 785 * Removes route defined by @info from the kernel table specified by @fibnum and 786 * sa_family in @info->rti_info[RTAX_DST]. 787 * 788 * Returns 0 on success and fills in operation metadata into @rc. 789 */ 790 int 791 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 792 { 793 struct rib_head *rnh; 794 struct sockaddr *dst_orig, *netmask; 795 struct sockaddr_storage mdst; 796 int error; 797 798 NET_EPOCH_ASSERT(); 799 800 rnh = get_rnh(fibnum, info); 801 if (rnh == NULL) 802 return (EAFNOSUPPORT); 803 804 bzero(rc, sizeof(struct rib_cmd_info)); 805 rc->rc_cmd = RTM_DELETE; 806 807 dst_orig = info->rti_info[RTAX_DST]; 808 netmask = info->rti_info[RTAX_NETMASK]; 809 810 if (netmask != NULL) { 811 /* Ensure @dst is always properly masked */ 812 if (dst_orig->sa_len > sizeof(mdst)) { 813 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 814 return (EINVAL); 815 } 816 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 817 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 818 } 819 error = del_route(rnh, info, rc); 820 info->rti_info[RTAX_DST] = dst_orig; 821 822 return (error); 823 } 824 825 /* 826 * Conditionally unlinks rtentry matching data inside @info from @rnh. 827 * Returns 0 on success with operation result stored in @rc. 828 * On error, returns: 829 * ESRCH - if prefix was not found, 830 * EADDRINUSE - if trying to delete higher priority route. 831 * ENOENT - if supplied filter function returned 0 (not matched). 832 */ 833 static int 834 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 835 { 836 struct rtentry *rt; 837 struct nhop_object *nh; 838 struct radix_node *rn; 839 struct route_nhop_data rnd; 840 int error; 841 842 rt = lookup_prefix(rnh, info, &rnd); 843 if (rt == NULL) 844 return (ESRCH); 845 846 nh = rt->rt_nhop; 847 #ifdef ROUTE_MPATH 848 if (NH_IS_NHGRP(nh)) { 849 error = del_route_mpath(rnh, info, rt, 850 (struct nhgrp_object *)nh, rc); 851 return (error); 852 } 853 #endif 854 error = check_info_match_nhop(info, rt, nh); 855 if (error != 0) 856 return (error); 857 858 if (can_override_nhop(info, nh) < 0) 859 return (EADDRINUSE); 860 861 /* 862 * Remove the item from the tree and return it. 863 * Complain if it is not there and do no more processing. 864 */ 865 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 866 info->rti_info[RTAX_NETMASK], &rnh->head); 867 if (rn == NULL) 868 return (ESRCH); 869 870 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 871 panic ("rtrequest delete"); 872 873 rt = RNTORT(rn); 874 rt->rte_flags &= ~RTF_UP; 875 876 /* Finalize notification */ 877 rib_bump_gen(rnh); 878 rnh->rnh_prefixes--; 879 880 rc->rc_cmd = RTM_DELETE; 881 rc->rc_rt = rt; 882 rc->rc_nh_old = rt->rt_nhop; 883 rc->rc_nh_weight = rt->rt_weight; 884 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 885 886 return (0); 887 } 888 889 static int 890 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 891 struct rib_cmd_info *rc) 892 { 893 int error; 894 895 RIB_WLOCK(rnh); 896 error = rt_unlinkrte(rnh, info, rc); 897 RIB_WUNLOCK(rnh); 898 if (error != 0) 899 return (error); 900 901 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 902 903 /* 904 * If the caller wants it, then it can have it, 905 * the entry will be deleted after the end of the current epoch. 906 */ 907 if (rc->rc_cmd == RTM_DELETE) 908 rtfree(rc->rc_rt); 909 #ifdef ROUTE_MPATH 910 else { 911 /* 912 * Deleting 1 path may result in RTM_CHANGE to 913 * a different mpath group/nhop. 914 * Free old mpath group. 915 */ 916 nhop_free_any(rc->rc_nh_old); 917 } 918 #endif 919 920 return (0); 921 } 922 923 int 924 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 925 struct rib_cmd_info *rc) 926 { 927 RIB_RLOCK_TRACKER; 928 struct route_nhop_data rnd_orig; 929 struct rib_head *rnh; 930 struct rtentry *rt; 931 int error; 932 933 NET_EPOCH_ASSERT(); 934 935 rnh = get_rnh(fibnum, info); 936 if (rnh == NULL) 937 return (EAFNOSUPPORT); 938 939 bzero(rc, sizeof(struct rib_cmd_info)); 940 rc->rc_cmd = RTM_CHANGE; 941 942 /* Check if updated gateway exists */ 943 if ((info->rti_flags & RTF_GATEWAY) && 944 (info->rti_info[RTAX_GATEWAY] == NULL)) { 945 946 /* 947 * route(8) adds RTF_GATEWAY flag if -interface is not set. 948 * Remove RTF_GATEWAY to enforce consistency and maintain 949 * compatibility.. 950 */ 951 info->rti_flags &= ~RTF_GATEWAY; 952 } 953 954 /* 955 * route change is done in multiple steps, with dropping and 956 * reacquiring lock. In the situations with multiple processes 957 * changes the same route in can lead to the case when route 958 * is changed between the steps. Address it by retrying the operation 959 * multiple times before failing. 960 */ 961 962 RIB_RLOCK(rnh); 963 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 964 info->rti_info[RTAX_NETMASK], &rnh->head); 965 966 if (rt == NULL) { 967 RIB_RUNLOCK(rnh); 968 return (ESRCH); 969 } 970 971 rnd_orig.rnd_nhop = rt->rt_nhop; 972 rnd_orig.rnd_weight = rt->rt_weight; 973 974 RIB_RUNLOCK(rnh); 975 976 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 977 error = change_route(rnh, info, &rnd_orig, rc); 978 if (error != EAGAIN) 979 break; 980 } 981 982 return (error); 983 } 984 985 static int 986 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 987 struct nhop_object *nh_orig, struct nhop_object **nh_new) 988 { 989 int error; 990 991 /* 992 * New gateway could require new ifaddr, ifp; 993 * flags may also be different; ifp may be specified 994 * by ll sockaddr when protocol address is ambiguous 995 */ 996 if (((nh_orig->nh_flags & NHF_GATEWAY) && 997 info->rti_info[RTAX_GATEWAY] != NULL) || 998 info->rti_info[RTAX_IFP] != NULL || 999 (info->rti_info[RTAX_IFA] != NULL && 1000 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1001 error = rt_getifa_fib(info, rnh->rib_fibnum); 1002 1003 if (error != 0) { 1004 info->rti_ifa = NULL; 1005 return (error); 1006 } 1007 } 1008 1009 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1010 info->rti_ifa = NULL; 1011 1012 return (error); 1013 } 1014 1015 #ifdef ROUTE_MPATH 1016 static int 1017 change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info, 1018 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 1019 { 1020 int error = 0, found_idx = 0; 1021 struct nhop_object *nh_orig = NULL, *nh_new; 1022 struct route_nhop_data rnd_new; 1023 struct weightened_nhop *wn = NULL, *wn_new; 1024 uint32_t num_nhops; 1025 1026 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1027 for (int i = 0; i < num_nhops; i++) { 1028 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1029 nh_orig = wn[i].nh; 1030 found_idx = i; 1031 break; 1032 } 1033 } 1034 1035 if (nh_orig == NULL) 1036 return (ESRCH); 1037 1038 error = change_nhop(rnh, info, nh_orig, &nh_new); 1039 if (error != 0) 1040 return (error); 1041 1042 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1043 M_TEMP, M_NOWAIT | M_ZERO); 1044 if (wn_new == NULL) { 1045 nhop_free(nh_new); 1046 return (EAGAIN); 1047 } 1048 1049 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1050 wn_new[found_idx].nh = nh_new; 1051 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1052 1053 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new); 1054 nhop_free(nh_new); 1055 free(wn_new, M_TEMP); 1056 1057 if (error != 0) 1058 return (error); 1059 1060 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1061 1062 return (error); 1063 } 1064 #endif 1065 1066 static int 1067 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 1068 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 1069 { 1070 int error = 0; 1071 struct nhop_object *nh_orig; 1072 struct route_nhop_data rnd_new; 1073 1074 nh_orig = rnd_orig->rnd_nhop; 1075 if (nh_orig == NULL) 1076 return (ESRCH); 1077 1078 #ifdef ROUTE_MPATH 1079 if (NH_IS_NHGRP(nh_orig)) 1080 return (change_mpath_route(rnh, info, rnd_orig, rc)); 1081 #endif 1082 1083 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1084 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1085 if (error != 0) 1086 return (error); 1087 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 1088 1089 return (error); 1090 } 1091 1092 /* 1093 * Insert @rt with nhop data from @rnd_new to @rnh. 1094 * Returns 0 on success and stores operation results in @rc. 1095 */ 1096 static int 1097 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1098 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1099 struct rib_cmd_info *rc) 1100 { 1101 struct sockaddr *ndst, *netmask; 1102 struct radix_node *rn; 1103 int error = 0; 1104 1105 RIB_WLOCK_ASSERT(rnh); 1106 1107 ndst = (struct sockaddr *)rt_key(rt); 1108 netmask = info->rti_info[RTAX_NETMASK]; 1109 1110 rt->rt_nhop = rnd->rnd_nhop; 1111 rt->rt_weight = rnd->rnd_weight; 1112 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 1113 1114 if (rn != NULL) { 1115 if (rt->rt_expire > 0) 1116 tmproutes_update(rnh, rt); 1117 1118 /* Finalize notification */ 1119 rib_bump_gen(rnh); 1120 rnh->rnh_prefixes++; 1121 1122 rc->rc_cmd = RTM_ADD; 1123 rc->rc_rt = rt; 1124 rc->rc_nh_old = NULL; 1125 rc->rc_nh_new = rnd->rnd_nhop; 1126 rc->rc_nh_weight = rnd->rnd_weight; 1127 1128 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1129 } else { 1130 /* Existing route or memory allocation failure */ 1131 error = EEXIST; 1132 } 1133 1134 return (error); 1135 } 1136 1137 /* 1138 * Switch @rt nhop/weigh to the ones specified in @rnd. 1139 * Conditionally set rt_expire if set in @info. 1140 * Returns 0 on success. 1141 */ 1142 int 1143 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 1144 struct rt_addrinfo *info, struct route_nhop_data *rnd, 1145 struct rib_cmd_info *rc) 1146 { 1147 struct nhop_object *nh_orig; 1148 1149 RIB_WLOCK_ASSERT(rnh); 1150 1151 nh_orig = rt->rt_nhop; 1152 1153 if (rnd->rnd_nhop != NULL) { 1154 /* Changing expiration & nexthop & weight to a new one */ 1155 rt_set_expire_info(rt, info); 1156 rt->rt_nhop = rnd->rnd_nhop; 1157 rt->rt_weight = rnd->rnd_weight; 1158 if (rt->rt_expire > 0) 1159 tmproutes_update(rnh, rt); 1160 } else { 1161 /* Route deletion requested. */ 1162 struct sockaddr *ndst, *netmask; 1163 struct radix_node *rn; 1164 1165 ndst = (struct sockaddr *)rt_key(rt); 1166 netmask = info->rti_info[RTAX_NETMASK]; 1167 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 1168 if (rn == NULL) 1169 return (ESRCH); 1170 rt = RNTORT(rn); 1171 rt->rte_flags &= ~RTF_UP; 1172 } 1173 1174 /* Finalize notification */ 1175 rib_bump_gen(rnh); 1176 if (rnd->rnd_nhop == NULL) 1177 rnh->rnh_prefixes--; 1178 1179 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 1180 rc->rc_rt = rt; 1181 rc->rc_nh_old = nh_orig; 1182 rc->rc_nh_new = rnd->rnd_nhop; 1183 rc->rc_nh_weight = rnd->rnd_weight; 1184 1185 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1186 1187 return (0); 1188 } 1189 1190 /* 1191 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1192 * consistent with the current route data. 1193 * Nexthop in @nhd_new is consumed. 1194 */ 1195 int 1196 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1197 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1198 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 1199 { 1200 struct rtentry *rt_new; 1201 int error = 0; 1202 1203 #if DEBUG_MAX_LEVEL >= LOG_DEBUG2 1204 { 1205 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1206 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1207 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1208 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1209 "trying change %s -> %s", buf_old, buf_new); 1210 } 1211 #endif 1212 RIB_WLOCK(rnh); 1213 1214 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1215 info->rti_info[RTAX_NETMASK], &rnh->head); 1216 1217 if (rt_new == NULL) { 1218 if (rnd_orig->rnd_nhop == NULL) 1219 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 1220 else { 1221 /* 1222 * Prefix does not exist, which was not our assumption. 1223 * Update @rnd_orig with the new data and return 1224 */ 1225 rnd_orig->rnd_nhop = NULL; 1226 rnd_orig->rnd_weight = 0; 1227 error = EAGAIN; 1228 } 1229 } else { 1230 /* Prefix exists, try to update */ 1231 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1232 /* 1233 * Nhop/mpath group hasn't changed. Flip 1234 * to the new precalculated one and return 1235 */ 1236 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 1237 } else { 1238 /* Update and retry */ 1239 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1240 rnd_orig->rnd_weight = rt_new->rt_weight; 1241 error = EAGAIN; 1242 } 1243 } 1244 1245 RIB_WUNLOCK(rnh); 1246 1247 if (error == 0) { 1248 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1249 1250 if (rnd_orig->rnd_nhop != NULL) 1251 nhop_free_any(rnd_orig->rnd_nhop); 1252 1253 } else { 1254 if (rnd_new->rnd_nhop != NULL) 1255 nhop_free_any(rnd_new->rnd_nhop); 1256 } 1257 1258 return (error); 1259 } 1260 1261 /* 1262 * Performs modification of routing table specificed by @action. 1263 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1264 * Needs to be run in network epoch. 1265 * 1266 * Returns 0 on success and fills in @rc with action result. 1267 */ 1268 int 1269 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1270 struct rib_cmd_info *rc) 1271 { 1272 int error; 1273 1274 switch (action) { 1275 case RTM_ADD: 1276 error = rib_add_route(fibnum, info, rc); 1277 break; 1278 case RTM_DELETE: 1279 error = rib_del_route(fibnum, info, rc); 1280 break; 1281 case RTM_CHANGE: 1282 error = rib_change_route(fibnum, info, rc); 1283 break; 1284 default: 1285 error = ENOTSUP; 1286 } 1287 1288 return (error); 1289 } 1290 1291 struct rt_delinfo 1292 { 1293 struct rt_addrinfo info; 1294 struct rib_head *rnh; 1295 struct rtentry *head; 1296 struct rib_cmd_info rc; 1297 }; 1298 1299 /* 1300 * Conditionally unlinks @rn from radix tree based 1301 * on info data passed in @arg. 1302 */ 1303 static int 1304 rt_checkdelroute(struct radix_node *rn, void *arg) 1305 { 1306 struct rt_delinfo *di; 1307 struct rt_addrinfo *info; 1308 struct rtentry *rt; 1309 1310 di = (struct rt_delinfo *)arg; 1311 rt = (struct rtentry *)rn; 1312 info = &di->info; 1313 1314 info->rti_info[RTAX_DST] = rt_key(rt); 1315 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 1316 1317 if (rt_unlinkrte(di->rnh, info, &di->rc) != 0) 1318 return (0); 1319 1320 /* 1321 * Add deleted rtentries to the list to GC them 1322 * after dropping the lock. 1323 * 1324 * XXX: Delayed notifications not implemented 1325 * for nexthop updates. 1326 */ 1327 if (di->rc.rc_cmd == RTM_DELETE) { 1328 /* Add to the list and return */ 1329 rt->rt_chain = di->head; 1330 di->head = rt; 1331 #ifdef ROUTE_MPATH 1332 } else { 1333 /* 1334 * RTM_CHANGE to a diferent nexthop or nexthop group. 1335 * Free old multipath group. 1336 */ 1337 nhop_free_any(di->rc.rc_nh_old); 1338 #endif 1339 } 1340 1341 return (0); 1342 } 1343 1344 /* 1345 * Iterates over a routing table specified by @fibnum and @family and 1346 * deletes elements marked by @filter_f. 1347 * @fibnum: rtable id 1348 * @family: AF_ address family 1349 * @filter_f: function returning non-zero value for items to delete 1350 * @arg: data to pass to the @filter_f function 1351 * @report: true if rtsock notification is needed. 1352 */ 1353 void 1354 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report) 1355 { 1356 struct rib_head *rnh; 1357 struct rt_delinfo di; 1358 struct rtentry *rt; 1359 struct nhop_object *nh; 1360 struct epoch_tracker et; 1361 1362 rnh = rt_tables_get_rnh(fibnum, family); 1363 if (rnh == NULL) 1364 return; 1365 1366 bzero(&di, sizeof(di)); 1367 di.info.rti_filter = filter_f; 1368 di.info.rti_filterdata = arg; 1369 di.rnh = rnh; 1370 di.rc.rc_cmd = RTM_DELETE; 1371 1372 NET_EPOCH_ENTER(et); 1373 1374 RIB_WLOCK(rnh); 1375 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1376 RIB_WUNLOCK(rnh); 1377 1378 /* We might have something to reclaim. */ 1379 bzero(&di.rc, sizeof(di.rc)); 1380 di.rc.rc_cmd = RTM_DELETE; 1381 while (di.head != NULL) { 1382 rt = di.head; 1383 di.head = rt->rt_chain; 1384 rt->rt_chain = NULL; 1385 nh = rt->rt_nhop; 1386 1387 di.rc.rc_rt = rt; 1388 di.rc.rc_nh_old = nh; 1389 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1390 1391 /* TODO std rt -> rt_addrinfo export */ 1392 di.info.rti_info[RTAX_DST] = rt_key(rt); 1393 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1394 1395 if (report) { 1396 #ifdef ROUTE_MPATH 1397 struct nhgrp_object *nhg; 1398 struct weightened_nhop *wn; 1399 uint32_t num_nhops; 1400 if (NH_IS_NHGRP(nh)) { 1401 nhg = (struct nhgrp_object *)nh; 1402 wn = nhgrp_get_nhops(nhg, &num_nhops); 1403 for (int i = 0; i < num_nhops; i++) 1404 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1405 } else 1406 #endif 1407 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1408 } 1409 rtfree(rt); 1410 } 1411 1412 NET_EPOCH_EXIT(et); 1413 } 1414 1415 static int 1416 rt_delete_unconditional(struct radix_node *rn, void *arg) 1417 { 1418 struct rtentry *rt = RNTORT(rn); 1419 struct rib_head *rnh = (struct rib_head *)arg; 1420 1421 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1422 if (RNTORT(rn) == rt) 1423 rtfree(rt); 1424 1425 return (0); 1426 } 1427 1428 /* 1429 * Removes all routes from the routing table without executing notifications. 1430 * rtentres will be removed after the end of a current epoch. 1431 */ 1432 static void 1433 rib_flush_routes(struct rib_head *rnh) 1434 { 1435 RIB_WLOCK(rnh); 1436 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1437 RIB_WUNLOCK(rnh); 1438 } 1439 1440 void 1441 rib_flush_routes_family(int family) 1442 { 1443 struct rib_head *rnh; 1444 1445 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1446 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1447 rib_flush_routes(rnh); 1448 } 1449 } 1450 1451 const char * 1452 rib_print_family(int family) 1453 { 1454 switch (family) { 1455 case AF_INET: 1456 return ("inet"); 1457 case AF_INET6: 1458 return ("inet6"); 1459 case AF_LINK: 1460 return ("link"); 1461 } 1462 return ("unknown"); 1463 } 1464 1465 static void 1466 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1467 struct rib_cmd_info *rc) 1468 { 1469 struct rib_subscription *rs; 1470 1471 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1472 if (rs->type == type) 1473 rs->func(rnh, rc, rs->arg); 1474 } 1475 } 1476 1477 static struct rib_subscription * 1478 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1479 enum rib_subscription_type type, bool waitok) 1480 { 1481 struct rib_subscription *rs; 1482 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT); 1483 1484 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1485 if (rs == NULL) 1486 return (NULL); 1487 1488 rs->func = f; 1489 rs->arg = arg; 1490 rs->type = type; 1491 1492 return (rs); 1493 } 1494 1495 /* 1496 * Subscribe for the changes in the routing table specified by @fibnum and 1497 * @family. 1498 * 1499 * Returns pointer to the subscription structure on success. 1500 */ 1501 struct rib_subscription * 1502 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1503 enum rib_subscription_type type, bool waitok) 1504 { 1505 struct rib_head *rnh; 1506 struct epoch_tracker et; 1507 1508 NET_EPOCH_ENTER(et); 1509 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1510 rnh = rt_tables_get_rnh(fibnum, family); 1511 NET_EPOCH_EXIT(et); 1512 1513 return (rib_subscribe_internal(rnh, f, arg, type, waitok)); 1514 } 1515 1516 struct rib_subscription * 1517 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1518 enum rib_subscription_type type, bool waitok) 1519 { 1520 struct rib_subscription *rs; 1521 struct epoch_tracker et; 1522 1523 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1524 return (NULL); 1525 rs->rnh = rnh; 1526 1527 NET_EPOCH_ENTER(et); 1528 RIB_WLOCK(rnh); 1529 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1530 RIB_WUNLOCK(rnh); 1531 NET_EPOCH_EXIT(et); 1532 1533 return (rs); 1534 } 1535 1536 struct rib_subscription * 1537 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1538 enum rib_subscription_type type) 1539 { 1540 struct rib_subscription *rs; 1541 1542 NET_EPOCH_ASSERT(); 1543 RIB_WLOCK_ASSERT(rnh); 1544 1545 if ((rs = allocate_subscription(f, arg, type, false)) == NULL) 1546 return (NULL); 1547 rs->rnh = rnh; 1548 1549 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1550 1551 return (rs); 1552 } 1553 1554 /* 1555 * Remove rtable subscription @rs from the routing table. 1556 * Needs to be run in network epoch. 1557 */ 1558 void 1559 rib_unsubscribe(struct rib_subscription *rs) 1560 { 1561 struct rib_head *rnh = rs->rnh; 1562 1563 NET_EPOCH_ASSERT(); 1564 1565 RIB_WLOCK(rnh); 1566 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1567 RIB_WUNLOCK(rnh); 1568 1569 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1570 &rs->epoch_ctx); 1571 } 1572 1573 void 1574 rib_unsubscribe_locked(struct rib_subscription *rs) 1575 { 1576 struct rib_head *rnh = rs->rnh; 1577 1578 NET_EPOCH_ASSERT(); 1579 RIB_WLOCK_ASSERT(rnh); 1580 1581 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1582 1583 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1584 &rs->epoch_ctx); 1585 } 1586 1587 /* 1588 * Epoch callback indicating subscription is safe to destroy 1589 */ 1590 static void 1591 destroy_subscription_epoch(epoch_context_t ctx) 1592 { 1593 struct rib_subscription *rs; 1594 1595 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1596 1597 free(rs, M_RTABLE); 1598 } 1599 1600 void 1601 rib_init_subscriptions(struct rib_head *rnh) 1602 { 1603 1604 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1605 } 1606 1607 void 1608 rib_destroy_subscriptions(struct rib_head *rnh) 1609 { 1610 struct rib_subscription *rs; 1611 struct epoch_tracker et; 1612 1613 NET_EPOCH_ENTER(et); 1614 RIB_WLOCK(rnh); 1615 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1616 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1617 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1618 &rs->epoch_ctx); 1619 } 1620 RIB_WUNLOCK(rnh); 1621 NET_EPOCH_EXIT(et); 1622 } 1623