1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 58 #include <vm/uma.h> 59 60 #define DEBUG_MOD_NAME route_ctl 61 #define DEBUG_MAX_LEVEL LOG_DEBUG 62 #include <net/route/route_debug.h> 63 _DECLARE_DEBUG(LOG_INFO); 64 65 /* 66 * This file contains control plane routing tables functions. 67 * 68 * All functions assumes they are called in net epoch. 69 */ 70 71 struct rib_subscription { 72 CK_STAILQ_ENTRY(rib_subscription) next; 73 rib_subscription_cb_t *func; 74 void *arg; 75 struct rib_head *rnh; 76 enum rib_subscription_type type; 77 struct epoch_context epoch_ctx; 78 }; 79 80 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 81 struct rib_cmd_info *rc); 82 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 83 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 84 struct rib_cmd_info *rc); 85 86 static int add_route(struct rib_head *rnh, struct rtentry *rt, 87 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 88 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 89 struct rib_cmd_info *rc); 90 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 91 struct rib_cmd_info *rc); 92 93 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 94 struct rib_cmd_info *rc); 95 96 static void destroy_subscription_epoch(epoch_context_t ctx); 97 #ifdef ROUTE_MPATH 98 static bool rib_can_multipath(struct rib_head *rh); 99 #endif 100 101 /* Per-vnet multipath routing configuration */ 102 SYSCTL_DECL(_net_route); 103 #define V_rib_route_multipath VNET(rib_route_multipath) 104 #ifdef ROUTE_MPATH 105 #define _MP_FLAGS CTLFLAG_RW 106 #else 107 #define _MP_FLAGS CTLFLAG_RD 108 #endif 109 VNET_DEFINE(u_int, rib_route_multipath) = 1; 110 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 111 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 112 #undef _MP_FLAGS 113 114 #if defined(INET) && defined(INET6) 115 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 116 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 117 VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1; 118 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 119 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 120 #endif 121 122 /* Routing table UMA zone */ 123 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 124 #define V_rtzone VNET(rtzone) 125 126 /* Debug bits */ 127 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 128 129 void 130 vnet_rtzone_init(void) 131 { 132 133 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 134 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 135 } 136 137 #ifdef VIMAGE 138 void 139 vnet_rtzone_destroy(void) 140 { 141 142 uma_zdestroy(V_rtzone); 143 } 144 #endif 145 146 static void 147 destroy_rtentry(struct rtentry *rt) 148 { 149 #ifdef VIMAGE 150 struct nhop_object *nh = rt->rt_nhop; 151 152 /* 153 * At this moment rnh, nh_control may be already freed. 154 * nhop interface may have been migrated to a different vnet. 155 * Use vnet stored in the nexthop to delete the entry. 156 */ 157 #ifdef ROUTE_MPATH 158 if (NH_IS_NHGRP(nh)) { 159 const struct weightened_nhop *wn; 160 uint32_t num_nhops; 161 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops); 162 nh = wn[0].nh; 163 } 164 #endif 165 CURVNET_SET(nhop_get_vnet(nh)); 166 #endif 167 168 /* Unreference nexthop */ 169 nhop_free_any(rt->rt_nhop); 170 171 uma_zfree(V_rtzone, rt); 172 173 CURVNET_RESTORE(); 174 } 175 176 /* 177 * Epoch callback indicating rtentry is safe to destroy 178 */ 179 static void 180 destroy_rtentry_epoch(epoch_context_t ctx) 181 { 182 struct rtentry *rt; 183 184 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 185 186 destroy_rtentry(rt); 187 } 188 189 /* 190 * Schedule rtentry deletion 191 */ 192 static void 193 rtfree(struct rtentry *rt) 194 { 195 196 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 197 198 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 199 &rt->rt_epoch_ctx); 200 } 201 202 static struct rib_head * 203 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 204 { 205 struct rib_head *rnh; 206 struct sockaddr *dst; 207 208 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 209 210 dst = info->rti_info[RTAX_DST]; 211 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 212 213 return (rnh); 214 } 215 216 #if defined(INET) && defined(INET6) 217 static bool 218 rib_can_ipv6_nexthop_address(struct rib_head *rh) 219 { 220 int result; 221 222 CURVNET_SET(rh->rib_vnet); 223 result = !!V_rib_route_ipv6_nexthop; 224 CURVNET_RESTORE(); 225 226 return (result); 227 } 228 #endif 229 230 #ifdef ROUTE_MPATH 231 static bool 232 rib_can_multipath(struct rib_head *rh) 233 { 234 int result; 235 236 CURVNET_SET(rh->rib_vnet); 237 result = !!V_rib_route_multipath; 238 CURVNET_RESTORE(); 239 240 return (result); 241 } 242 243 /* 244 * Check is nhop is multipath-eligible. 245 * Avoid nhops without gateways and redirects. 246 * 247 * Returns 1 for multipath-eligible nexthop, 248 * 0 otherwise. 249 */ 250 bool 251 nhop_can_multipath(const struct nhop_object *nh) 252 { 253 254 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 255 return (1); 256 if ((nh->nh_flags & NHF_GATEWAY) == 0) 257 return (0); 258 if ((nh->nh_flags & NHF_REDIRECT) != 0) 259 return (0); 260 261 return (1); 262 } 263 #endif 264 265 static int 266 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 267 { 268 uint32_t weight; 269 270 if (info->rti_mflags & RTV_WEIGHT) 271 weight = info->rti_rmx->rmx_weight; 272 else 273 weight = default_weight; 274 /* Keep upper 1 byte for adm distance purposes */ 275 if (weight > RT_MAX_WEIGHT) 276 weight = RT_MAX_WEIGHT; 277 else if (weight == 0) 278 weight = default_weight; 279 280 return (weight); 281 } 282 283 bool 284 rt_is_host(const struct rtentry *rt) 285 { 286 287 return (rt->rte_flags & RTF_HOST); 288 } 289 290 sa_family_t 291 rt_get_family(const struct rtentry *rt) 292 { 293 const struct sockaddr *dst; 294 295 dst = (const struct sockaddr *)rt_key_const(rt); 296 297 return (dst->sa_family); 298 } 299 300 /* 301 * Returns pointer to nexthop or nexthop group 302 * associated with @rt 303 */ 304 struct nhop_object * 305 rt_get_raw_nhop(const struct rtentry *rt) 306 { 307 308 return (rt->rt_nhop); 309 } 310 311 #ifdef INET 312 /* 313 * Stores IPv4 address and prefix length of @rt inside 314 * @paddr and @plen. 315 * @pscopeid is currently always set to 0. 316 */ 317 void 318 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 319 int *plen, uint32_t *pscopeid) 320 { 321 const struct sockaddr_in *dst; 322 323 dst = (const struct sockaddr_in *)rt_key_const(rt); 324 KASSERT((dst->sin_family == AF_INET), 325 ("rt family is %d, not inet", dst->sin_family)); 326 *paddr = dst->sin_addr; 327 dst = (const struct sockaddr_in *)rt_mask_const(rt); 328 if (dst == NULL) 329 *plen = 32; 330 else 331 *plen = bitcount32(dst->sin_addr.s_addr); 332 *pscopeid = 0; 333 } 334 335 /* 336 * Stores IPv4 address and prefix mask of @rt inside 337 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 338 * @pscopeid is currently always set to 0. 339 */ 340 void 341 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 342 struct in_addr *pmask, uint32_t *pscopeid) 343 { 344 const struct sockaddr_in *dst; 345 346 dst = (const struct sockaddr_in *)rt_key_const(rt); 347 KASSERT((dst->sin_family == AF_INET), 348 ("rt family is %d, not inet", dst->sin_family)); 349 *paddr = dst->sin_addr; 350 dst = (const struct sockaddr_in *)rt_mask_const(rt); 351 if (dst == NULL) 352 pmask->s_addr = INADDR_BROADCAST; 353 else 354 *pmask = dst->sin_addr; 355 *pscopeid = 0; 356 } 357 #endif 358 359 #ifdef INET6 360 static int 361 inet6_get_plen(const struct in6_addr *addr) 362 { 363 364 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 365 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 366 } 367 368 /* 369 * Stores IPv6 address and prefix length of @rt inside 370 * @paddr and @plen. Addresses are returned in de-embedded form. 371 * Scopeid is set to 0 for non-LL addresses. 372 */ 373 void 374 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 375 int *plen, uint32_t *pscopeid) 376 { 377 const struct sockaddr_in6 *dst; 378 379 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 380 KASSERT((dst->sin6_family == AF_INET6), 381 ("rt family is %d, not inet6", dst->sin6_family)); 382 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 383 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 384 else 385 *paddr = dst->sin6_addr; 386 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 387 if (dst == NULL) 388 *plen = 128; 389 else 390 *plen = inet6_get_plen(&dst->sin6_addr); 391 } 392 393 /* 394 * Stores IPv6 address and prefix mask of @rt inside 395 * @paddr and @pmask. Addresses are returned in de-embedded form. 396 * Scopeid is set to 0 for non-LL addresses. 397 */ 398 void 399 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 400 struct in6_addr *pmask, uint32_t *pscopeid) 401 { 402 const struct sockaddr_in6 *dst; 403 404 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 405 KASSERT((dst->sin6_family == AF_INET6), 406 ("rt family is %d, not inet", dst->sin6_family)); 407 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 408 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 409 else 410 *paddr = dst->sin6_addr; 411 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 412 if (dst == NULL) 413 memset(pmask, 0xFF, sizeof(struct in6_addr)); 414 else 415 *pmask = dst->sin6_addr; 416 } 417 #endif 418 419 /* 420 * Check if specified @gw matches gw data in the nexthop @nh. 421 * 422 * Returns true if matches, false otherwise. 423 */ 424 bool 425 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 426 { 427 428 if (nh->gw_sa.sa_family != gw->sa_family) 429 return (false); 430 431 switch (gw->sa_family) { 432 case AF_INET: 433 return (nh->gw4_sa.sin_addr.s_addr == 434 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 435 case AF_INET6: 436 { 437 const struct sockaddr_in6 *gw6; 438 gw6 = (const struct sockaddr_in6 *)gw; 439 440 /* 441 * Currently (2020-09) IPv6 gws in kernel have their 442 * scope embedded. Once this becomes false, this code 443 * has to be revisited. 444 */ 445 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 446 &gw6->sin6_addr)) 447 return (true); 448 return (false); 449 } 450 case AF_LINK: 451 { 452 const struct sockaddr_dl *sdl; 453 sdl = (const struct sockaddr_dl *)gw; 454 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 455 } 456 default: 457 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 458 } 459 460 /* NOTREACHED */ 461 return (false); 462 } 463 464 /* 465 * Checks if data in @info matches nexhop @nh. 466 * 467 * Returns 0 on success, 468 * ESRCH if not matched, 469 * ENOENT if filter function returned false 470 */ 471 int 472 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 473 const struct nhop_object *nh) 474 { 475 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 476 477 if (info->rti_filter != NULL) { 478 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 479 return (ENOENT); 480 else 481 return (0); 482 } 483 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 484 return (ESRCH); 485 486 return (0); 487 } 488 489 /* 490 * Checks if nexhop @nh can be rewritten by data in @info because 491 * of higher "priority". Currently the only case for such scenario 492 * is kernel installing interface routes, marked by RTF_PINNED flag. 493 * 494 * Returns: 495 * 1 if @info data has higher priority 496 * 0 if priority is the same 497 * -1 if priority is lower 498 */ 499 int 500 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 501 { 502 503 if (info->rti_flags & RTF_PINNED) { 504 return (NH_IS_PINNED(nh)) ? 0 : 1; 505 } else { 506 return (NH_IS_PINNED(nh)) ? -1 : 0; 507 } 508 } 509 510 /* 511 * Runs exact prefix match based on @dst and @netmask. 512 * Returns matched @rtentry if found or NULL. 513 * If rtentry was found, saves nexthop / weight value into @rnd. 514 */ 515 static struct rtentry * 516 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 517 const struct sockaddr *netmask, struct route_nhop_data *rnd) 518 { 519 struct rtentry *rt; 520 521 RIB_LOCK_ASSERT(rnh); 522 523 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 524 if (rt != NULL) { 525 rnd->rnd_nhop = rt->rt_nhop; 526 rnd->rnd_weight = rt->rt_weight; 527 } else { 528 rnd->rnd_nhop = NULL; 529 rnd->rnd_weight = 0; 530 } 531 532 return (rt); 533 } 534 535 struct rtentry * 536 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 537 struct route_nhop_data *rnd) 538 { 539 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 540 } 541 542 /* 543 * Runs exact prefix match based on dst/netmask from @info. 544 * Assumes RIB lock is held. 545 * Returns matched @rtentry if found or NULL. 546 * If rtentry was found, saves nexthop / weight value into @rnd. 547 */ 548 struct rtentry * 549 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 550 struct route_nhop_data *rnd) 551 { 552 struct rtentry *rt; 553 554 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 555 info->rti_info[RTAX_NETMASK], rnd); 556 557 return (rt); 558 } 559 560 /* 561 * Adds route defined by @info into the kernel table specified by @fibnum and 562 * sa_family in @info->rti_info[RTAX_DST]. 563 * 564 * Returns 0 on success and fills in operation metadata into @rc. 565 */ 566 int 567 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 568 struct rib_cmd_info *rc) 569 { 570 struct rib_head *rnh; 571 int error; 572 573 NET_EPOCH_ASSERT(); 574 575 rnh = get_rnh(fibnum, info); 576 if (rnh == NULL) 577 return (EAFNOSUPPORT); 578 579 /* 580 * Check consistency between RTF_HOST flag and netmask 581 * existence. 582 */ 583 if (info->rti_flags & RTF_HOST) 584 info->rti_info[RTAX_NETMASK] = NULL; 585 else if (info->rti_info[RTAX_NETMASK] == NULL) { 586 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 587 return (EINVAL); 588 } 589 590 bzero(rc, sizeof(struct rib_cmd_info)); 591 rc->rc_cmd = RTM_ADD; 592 593 error = add_route_byinfo(rnh, info, rc); 594 if (error == 0) 595 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 596 597 return (error); 598 } 599 600 /* 601 * Checks if @dst and @gateway is valid combination. 602 * 603 * Returns true if is valid, false otherwise. 604 */ 605 static bool 606 check_gateway(struct rib_head *rnh, struct sockaddr *dst, 607 struct sockaddr *gateway) 608 { 609 if (dst->sa_family == gateway->sa_family) 610 return (true); 611 else if (gateway->sa_family == AF_UNSPEC) 612 return (true); 613 else if (gateway->sa_family == AF_LINK) 614 return (true); 615 #if defined(INET) && defined(INET6) 616 else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 && 617 rib_can_ipv6_nexthop_address(rnh)) 618 return (true); 619 #endif 620 else 621 return (false); 622 } 623 624 /* 625 * Creates rtentry and nexthop based on @info data. 626 * Return 0 and fills in rtentry into @prt on success, 627 * Note: rtentry mask will be set to RTAX_NETMASK, thus its pointer is required 628 * to be stable till the end of the operation (radix rt insertion/change/removal). 629 * return errno otherwise. 630 */ 631 static int 632 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 633 struct rtentry **prt) 634 { 635 struct sockaddr *dst, *ndst, *gateway, *netmask; 636 struct rtentry *rt; 637 struct nhop_object *nh; 638 int error, flags; 639 640 dst = info->rti_info[RTAX_DST]; 641 gateway = info->rti_info[RTAX_GATEWAY]; 642 netmask = info->rti_info[RTAX_NETMASK]; 643 flags = info->rti_flags; 644 645 if ((flags & RTF_GATEWAY) && !gateway) { 646 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 647 return (EINVAL); 648 } 649 if (dst && gateway && !check_gateway(rnh, dst, gateway)) { 650 FIB_RH_LOG(LOG_DEBUG, rnh, 651 "error: invalid dst/gateway family combination (%d, %d)", 652 dst->sa_family, gateway->sa_family); 653 return (EINVAL); 654 } 655 656 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 657 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 658 dst->sa_len); 659 return (EINVAL); 660 } 661 662 if (info->rti_ifa == NULL) { 663 error = rt_getifa_fib(info, rnh->rib_fibnum); 664 if (error) 665 return (error); 666 } 667 668 error = nhop_create_from_info(rnh, info, &nh); 669 if (error != 0) 670 return (error); 671 672 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 673 if (rt == NULL) { 674 nhop_free(nh); 675 return (ENOBUFS); 676 } 677 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 678 rt->rt_nhop = nh; 679 680 /* Fill in dst */ 681 memcpy(&rt->rt_dst, dst, dst->sa_len); 682 rt_key(rt) = &rt->rt_dst; 683 684 /* 685 * point to the (possibly newly malloc'd) dest address. 686 */ 687 ndst = (struct sockaddr *)rt_key(rt); 688 689 /* 690 * make sure it contains the value we want (masked if needed). 691 */ 692 if (netmask) { 693 rt_maskedcopy(dst, ndst, netmask); 694 } else 695 bcopy(dst, ndst, dst->sa_len); 696 /* Set netmask to the storage from info. It will be updated upon insertion */ 697 rt_mask(rt) = netmask; 698 699 /* 700 * We use the ifa reference returned by rt_getifa_fib(). 701 * This moved from below so that rnh->rnh_addaddr() can 702 * examine the ifa and ifa->ifa_ifp if it so desires. 703 */ 704 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 705 706 *prt = rt; 707 return (0); 708 } 709 710 static int 711 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 712 struct rib_cmd_info *rc) 713 { 714 struct nhop_object *nh_orig; 715 struct route_nhop_data rnd_orig, rnd_add; 716 struct nhop_object *nh; 717 struct rtentry *rt, *rt_orig; 718 int error; 719 720 error = create_rtentry(rnh, info, &rt); 721 if (error != 0) 722 return (error); 723 724 rnd_add.rnd_nhop = rt->rt_nhop; 725 rnd_add.rnd_weight = rt->rt_weight; 726 nh = rt->rt_nhop; 727 728 RIB_WLOCK(rnh); 729 error = add_route(rnh, rt, &rnd_add, rc); 730 if (error == 0) { 731 RIB_WUNLOCK(rnh); 732 return (0); 733 } 734 735 /* addition failed. Lookup prefix in the rib to determine the cause */ 736 rt_orig = lookup_prefix(rnh, info, &rnd_orig); 737 if (rt_orig == NULL) { 738 /* No prefix -> rnh_addaddr() failed to allocate memory */ 739 RIB_WUNLOCK(rnh); 740 nhop_free(nh); 741 uma_zfree(V_rtzone, rt); 742 return (ENOMEM); 743 } 744 745 /* We have existing route in the RIB. */ 746 nh_orig = rnd_orig.rnd_nhop; 747 /* Check if new route has higher preference */ 748 if (can_override_nhop(info, nh_orig) > 0) { 749 /* Update nexthop to the new route */ 750 change_route(rnh, rt_orig, &rnd_add, rc); 751 RIB_WUNLOCK(rnh); 752 uma_zfree(V_rtzone, rt); 753 nhop_free(nh_orig); 754 return (0); 755 } 756 757 RIB_WUNLOCK(rnh); 758 759 #ifdef ROUTE_MPATH 760 if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) && 761 nhop_can_multipath(rnd_orig.rnd_nhop)) 762 error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc); 763 else 764 #endif 765 /* Unable to add - another route with the same preference exists */ 766 error = EEXIST; 767 768 /* 769 * ROUTE_MPATH disabled: failed to add route, free both nhop and rt. 770 * ROUTE_MPATH enabled: original nhop reference is unused in any case, 771 * free rt only if not _adding_ new route to rib (e.g. the case 772 * when initial lookup returned existing route, but then it got 773 * deleted prior to multipath group insertion, leading to a simple 774 * non-multipath add as a result). 775 */ 776 nhop_free(nh); 777 if ((error != 0) || rc->rc_cmd != RTM_ADD) 778 uma_zfree(V_rtzone, rt); 779 780 return (error); 781 } 782 783 /* 784 * Removes route defined by @info from the kernel table specified by @fibnum and 785 * sa_family in @info->rti_info[RTAX_DST]. 786 * 787 * Returns 0 on success and fills in operation metadata into @rc. 788 */ 789 int 790 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 791 { 792 struct rib_head *rnh; 793 struct sockaddr *dst_orig, *netmask; 794 struct sockaddr_storage mdst; 795 int error; 796 797 NET_EPOCH_ASSERT(); 798 799 rnh = get_rnh(fibnum, info); 800 if (rnh == NULL) 801 return (EAFNOSUPPORT); 802 803 bzero(rc, sizeof(struct rib_cmd_info)); 804 rc->rc_cmd = RTM_DELETE; 805 806 dst_orig = info->rti_info[RTAX_DST]; 807 netmask = info->rti_info[RTAX_NETMASK]; 808 809 if (netmask != NULL) { 810 /* Ensure @dst is always properly masked */ 811 if (dst_orig->sa_len > sizeof(mdst)) { 812 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 813 return (EINVAL); 814 } 815 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 816 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 817 } 818 819 RIB_WLOCK(rnh); 820 error = rt_unlinkrte(rnh, info, rc); 821 RIB_WUNLOCK(rnh); 822 823 info->rti_info[RTAX_DST] = dst_orig; 824 825 if (error != 0) 826 return (error); 827 828 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 829 830 if (rc->rc_cmd == RTM_DELETE) 831 rtfree(rc->rc_rt); 832 #ifdef ROUTE_MPATH 833 else { 834 /* 835 * Deleting 1 path may result in RTM_CHANGE to 836 * a different mpath group/nhop. 837 * Free old mpath group. 838 */ 839 nhop_free_any(rc->rc_nh_old); 840 } 841 #endif 842 843 return (0); 844 } 845 846 /* 847 * Conditionally unlinks rtentry matching data inside @info from @rnh. 848 * Returns 0 on success with operation result stored in @rc. 849 * On error, returns: 850 * ESRCH - if prefix was not found, 851 * EADDRINUSE - if trying to delete higher priority route. 852 * ENOENT - if supplied filter function returned 0 (not matched). 853 */ 854 static int 855 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 856 { 857 struct rtentry *rt; 858 struct nhop_object *nh; 859 struct route_nhop_data rnd; 860 int error; 861 862 rt = lookup_prefix(rnh, info, &rnd); 863 if (rt == NULL) 864 return (ESRCH); 865 866 nh = rt->rt_nhop; 867 #ifdef ROUTE_MPATH 868 if (NH_IS_NHGRP(nh)) { 869 error = del_route_mpath(rnh, info, rt, 870 (struct nhgrp_object *)nh, rc); 871 return (error); 872 } 873 #endif 874 error = check_info_match_nhop(info, rt, nh); 875 if (error != 0) 876 return (error); 877 878 if (can_override_nhop(info, nh) < 0) 879 return (EADDRINUSE); 880 881 return (delete_route(rnh, rt, rc)); 882 } 883 884 int 885 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 886 struct rib_cmd_info *rc) 887 { 888 RIB_RLOCK_TRACKER; 889 struct route_nhop_data rnd_orig; 890 struct rib_head *rnh; 891 struct rtentry *rt; 892 int error; 893 894 NET_EPOCH_ASSERT(); 895 896 rnh = get_rnh(fibnum, info); 897 if (rnh == NULL) 898 return (EAFNOSUPPORT); 899 900 bzero(rc, sizeof(struct rib_cmd_info)); 901 rc->rc_cmd = RTM_CHANGE; 902 903 /* Check if updated gateway exists */ 904 if ((info->rti_flags & RTF_GATEWAY) && 905 (info->rti_info[RTAX_GATEWAY] == NULL)) { 906 907 /* 908 * route(8) adds RTF_GATEWAY flag if -interface is not set. 909 * Remove RTF_GATEWAY to enforce consistency and maintain 910 * compatibility.. 911 */ 912 info->rti_flags &= ~RTF_GATEWAY; 913 } 914 915 /* 916 * route change is done in multiple steps, with dropping and 917 * reacquiring lock. In the situations with multiple processes 918 * changes the same route in can lead to the case when route 919 * is changed between the steps. Address it by retrying the operation 920 * multiple times before failing. 921 */ 922 923 RIB_RLOCK(rnh); 924 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 925 info->rti_info[RTAX_NETMASK], &rnh->head); 926 927 if (rt == NULL) { 928 RIB_RUNLOCK(rnh); 929 return (ESRCH); 930 } 931 932 rnd_orig.rnd_nhop = rt->rt_nhop; 933 rnd_orig.rnd_weight = rt->rt_weight; 934 935 RIB_RUNLOCK(rnh); 936 937 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 938 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 939 if (error != EAGAIN) 940 break; 941 } 942 943 return (error); 944 } 945 946 static int 947 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 948 struct nhop_object *nh_orig, struct nhop_object **nh_new) 949 { 950 int error; 951 952 /* 953 * New gateway could require new ifaddr, ifp; 954 * flags may also be different; ifp may be specified 955 * by ll sockaddr when protocol address is ambiguous 956 */ 957 if (((nh_orig->nh_flags & NHF_GATEWAY) && 958 info->rti_info[RTAX_GATEWAY] != NULL) || 959 info->rti_info[RTAX_IFP] != NULL || 960 (info->rti_info[RTAX_IFA] != NULL && 961 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 962 error = rt_getifa_fib(info, rnh->rib_fibnum); 963 964 if (error != 0) { 965 info->rti_ifa = NULL; 966 return (error); 967 } 968 } 969 970 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 971 info->rti_ifa = NULL; 972 973 return (error); 974 } 975 976 #ifdef ROUTE_MPATH 977 static int 978 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 979 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 980 struct rib_cmd_info *rc) 981 { 982 int error = 0, found_idx = 0; 983 struct nhop_object *nh_orig = NULL, *nh_new; 984 struct route_nhop_data rnd_new = {}; 985 const struct weightened_nhop *wn = NULL; 986 struct weightened_nhop *wn_new; 987 uint32_t num_nhops; 988 989 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 990 for (int i = 0; i < num_nhops; i++) { 991 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 992 nh_orig = wn[i].nh; 993 found_idx = i; 994 break; 995 } 996 } 997 998 if (nh_orig == NULL) 999 return (ESRCH); 1000 1001 error = change_nhop(rnh, info, nh_orig, &nh_new); 1002 if (error != 0) 1003 return (error); 1004 1005 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1006 M_TEMP, M_NOWAIT | M_ZERO); 1007 if (wn_new == NULL) { 1008 nhop_free(nh_new); 1009 return (EAGAIN); 1010 } 1011 1012 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1013 wn_new[found_idx].nh = nh_new; 1014 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1015 1016 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new.rnd_nhgrp); 1017 nhop_free(nh_new); 1018 free(wn_new, M_TEMP); 1019 1020 if (error != 0) 1021 return (error); 1022 1023 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1024 1025 return (error); 1026 } 1027 #endif 1028 1029 static int 1030 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1031 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1032 struct rib_cmd_info *rc) 1033 { 1034 int error = 0; 1035 struct nhop_object *nh_orig; 1036 struct route_nhop_data rnd_new; 1037 1038 nh_orig = rnd_orig->rnd_nhop; 1039 if (nh_orig == NULL) 1040 return (ESRCH); 1041 1042 #ifdef ROUTE_MPATH 1043 if (NH_IS_NHGRP(nh_orig)) 1044 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1045 #endif 1046 1047 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1048 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1049 if (error != 0) 1050 return (error); 1051 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1052 1053 return (error); 1054 } 1055 1056 /* 1057 * Insert @rt with nhop data from @rnd_new to @rnh. 1058 * Returns 0 on success and stores operation results in @rc. 1059 */ 1060 static int 1061 add_route(struct rib_head *rnh, struct rtentry *rt, 1062 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1063 { 1064 struct radix_node *rn; 1065 1066 RIB_WLOCK_ASSERT(rnh); 1067 1068 rt->rt_nhop = rnd->rnd_nhop; 1069 rt->rt_weight = rnd->rnd_weight; 1070 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1071 1072 if (rn != NULL) { 1073 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1074 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1075 1076 /* Finalize notification */ 1077 rib_bump_gen(rnh); 1078 rnh->rnh_prefixes++; 1079 1080 rc->rc_cmd = RTM_ADD; 1081 rc->rc_rt = rt; 1082 rc->rc_nh_old = NULL; 1083 rc->rc_nh_new = rnd->rnd_nhop; 1084 rc->rc_nh_weight = rnd->rnd_weight; 1085 1086 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1087 return (0); 1088 } 1089 1090 /* Existing route or memory allocation failure. */ 1091 return (EEXIST); 1092 } 1093 1094 /* 1095 * Unconditionally deletes @rt from @rnh. 1096 */ 1097 static int 1098 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1099 { 1100 RIB_WLOCK_ASSERT(rnh); 1101 1102 /* Route deletion requested. */ 1103 struct radix_node *rn; 1104 1105 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1106 if (rn == NULL) 1107 return (ESRCH); 1108 rt = RNTORT(rn); 1109 rt->rte_flags &= ~RTF_UP; 1110 1111 rib_bump_gen(rnh); 1112 rnh->rnh_prefixes--; 1113 1114 rc->rc_cmd = RTM_DELETE; 1115 rc->rc_rt = rt; 1116 rc->rc_nh_old = rt->rt_nhop; 1117 rc->rc_nh_new = NULL; 1118 rc->rc_nh_weight = rt->rt_weight; 1119 1120 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1121 1122 return (0); 1123 } 1124 1125 /* 1126 * Switch @rt nhop/weigh to the ones specified in @rnd. 1127 * Returns 0 on success. 1128 */ 1129 int 1130 change_route(struct rib_head *rnh, struct rtentry *rt, 1131 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1132 { 1133 struct nhop_object *nh_orig; 1134 1135 RIB_WLOCK_ASSERT(rnh); 1136 1137 nh_orig = rt->rt_nhop; 1138 1139 if (rnd->rnd_nhop == NULL) 1140 return (delete_route(rnh, rt, rc)); 1141 1142 /* Changing nexthop & weight to a new one */ 1143 rt->rt_nhop = rnd->rnd_nhop; 1144 rt->rt_weight = rnd->rnd_weight; 1145 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1146 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1147 1148 /* Finalize notification */ 1149 rib_bump_gen(rnh); 1150 rc->rc_cmd = RTM_CHANGE; 1151 rc->rc_rt = rt; 1152 rc->rc_nh_old = nh_orig; 1153 rc->rc_nh_new = rnd->rnd_nhop; 1154 rc->rc_nh_weight = rnd->rnd_weight; 1155 1156 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1157 1158 return (0); 1159 } 1160 1161 /* 1162 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1163 * consistent with the current route data. 1164 * Nexthop in @nhd_new is consumed. 1165 */ 1166 int 1167 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1168 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1169 struct rib_cmd_info *rc) 1170 { 1171 struct rtentry *rt_new; 1172 int error = 0; 1173 1174 #if DEBUG_MAX_LEVEL >= LOG_DEBUG2 1175 { 1176 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1177 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1178 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1179 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1180 "trying change %s -> %s", buf_old, buf_new); 1181 } 1182 #endif 1183 RIB_WLOCK(rnh); 1184 1185 struct route_nhop_data rnd; 1186 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1187 1188 if (rt_new == NULL) { 1189 if (rnd_orig->rnd_nhop == NULL) 1190 error = add_route(rnh, rt, rnd_new, rc); 1191 else { 1192 /* 1193 * Prefix does not exist, which was not our assumption. 1194 * Update @rnd_orig with the new data and return 1195 */ 1196 rnd_orig->rnd_nhop = NULL; 1197 rnd_orig->rnd_weight = 0; 1198 error = EAGAIN; 1199 } 1200 } else { 1201 /* Prefix exists, try to update */ 1202 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1203 /* 1204 * Nhop/mpath group hasn't changed. Flip 1205 * to the new precalculated one and return 1206 */ 1207 error = change_route(rnh, rt_new, rnd_new, rc); 1208 } else { 1209 /* Update and retry */ 1210 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1211 rnd_orig->rnd_weight = rt_new->rt_weight; 1212 error = EAGAIN; 1213 } 1214 } 1215 1216 RIB_WUNLOCK(rnh); 1217 1218 if (error == 0) { 1219 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1220 1221 if (rnd_orig->rnd_nhop != NULL) 1222 nhop_free_any(rnd_orig->rnd_nhop); 1223 1224 } else { 1225 if (rnd_new->rnd_nhop != NULL) 1226 nhop_free_any(rnd_new->rnd_nhop); 1227 } 1228 1229 return (error); 1230 } 1231 1232 /* 1233 * Performs modification of routing table specificed by @action. 1234 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1235 * Needs to be run in network epoch. 1236 * 1237 * Returns 0 on success and fills in @rc with action result. 1238 */ 1239 int 1240 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1241 struct rib_cmd_info *rc) 1242 { 1243 int error; 1244 1245 switch (action) { 1246 case RTM_ADD: 1247 error = rib_add_route(fibnum, info, rc); 1248 break; 1249 case RTM_DELETE: 1250 error = rib_del_route(fibnum, info, rc); 1251 break; 1252 case RTM_CHANGE: 1253 error = rib_change_route(fibnum, info, rc); 1254 break; 1255 default: 1256 error = ENOTSUP; 1257 } 1258 1259 return (error); 1260 } 1261 1262 struct rt_delinfo 1263 { 1264 struct rt_addrinfo info; 1265 struct rib_head *rnh; 1266 struct rtentry *head; 1267 struct rib_cmd_info rc; 1268 }; 1269 1270 /* 1271 * Conditionally unlinks @rn from radix tree based 1272 * on info data passed in @arg. 1273 */ 1274 static int 1275 rt_checkdelroute(struct radix_node *rn, void *arg) 1276 { 1277 struct rt_delinfo *di; 1278 struct rt_addrinfo *info; 1279 struct rtentry *rt; 1280 1281 di = (struct rt_delinfo *)arg; 1282 rt = (struct rtentry *)rn; 1283 info = &di->info; 1284 1285 info->rti_info[RTAX_DST] = rt_key(rt); 1286 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 1287 1288 if (rt_unlinkrte(di->rnh, info, &di->rc) != 0) 1289 return (0); 1290 1291 /* 1292 * Add deleted rtentries to the list to GC them 1293 * after dropping the lock. 1294 * 1295 * XXX: Delayed notifications not implemented 1296 * for nexthop updates. 1297 */ 1298 if (di->rc.rc_cmd == RTM_DELETE) { 1299 /* Add to the list and return */ 1300 rt->rt_chain = di->head; 1301 di->head = rt; 1302 #ifdef ROUTE_MPATH 1303 } else { 1304 /* 1305 * RTM_CHANGE to a diferent nexthop or nexthop group. 1306 * Free old multipath group. 1307 */ 1308 nhop_free_any(di->rc.rc_nh_old); 1309 #endif 1310 } 1311 1312 return (0); 1313 } 1314 1315 /* 1316 * Iterates over a routing table specified by @fibnum and @family and 1317 * deletes elements marked by @filter_f. 1318 * @fibnum: rtable id 1319 * @family: AF_ address family 1320 * @filter_f: function returning non-zero value for items to delete 1321 * @arg: data to pass to the @filter_f function 1322 * @report: true if rtsock notification is needed. 1323 */ 1324 void 1325 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *arg, bool report) 1326 { 1327 struct rib_head *rnh; 1328 struct rt_delinfo di; 1329 struct rtentry *rt; 1330 struct nhop_object *nh; 1331 struct epoch_tracker et; 1332 1333 rnh = rt_tables_get_rnh(fibnum, family); 1334 if (rnh == NULL) 1335 return; 1336 1337 bzero(&di, sizeof(di)); 1338 di.info.rti_filter = filter_f; 1339 di.info.rti_filterdata = arg; 1340 di.rnh = rnh; 1341 di.rc.rc_cmd = RTM_DELETE; 1342 1343 NET_EPOCH_ENTER(et); 1344 1345 RIB_WLOCK(rnh); 1346 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1347 RIB_WUNLOCK(rnh); 1348 1349 /* We might have something to reclaim. */ 1350 bzero(&di.rc, sizeof(di.rc)); 1351 di.rc.rc_cmd = RTM_DELETE; 1352 while (di.head != NULL) { 1353 rt = di.head; 1354 di.head = rt->rt_chain; 1355 rt->rt_chain = NULL; 1356 nh = rt->rt_nhop; 1357 1358 di.rc.rc_rt = rt; 1359 di.rc.rc_nh_old = nh; 1360 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1361 1362 /* TODO std rt -> rt_addrinfo export */ 1363 di.info.rti_info[RTAX_DST] = rt_key(rt); 1364 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1365 1366 if (report) { 1367 #ifdef ROUTE_MPATH 1368 struct nhgrp_object *nhg; 1369 const struct weightened_nhop *wn; 1370 uint32_t num_nhops; 1371 if (NH_IS_NHGRP(nh)) { 1372 nhg = (struct nhgrp_object *)nh; 1373 wn = nhgrp_get_nhops(nhg, &num_nhops); 1374 for (int i = 0; i < num_nhops; i++) 1375 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1376 } else 1377 #endif 1378 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1379 } 1380 rtfree(rt); 1381 } 1382 1383 NET_EPOCH_EXIT(et); 1384 } 1385 1386 static int 1387 rt_delete_unconditional(struct radix_node *rn, void *arg) 1388 { 1389 struct rtentry *rt = RNTORT(rn); 1390 struct rib_head *rnh = (struct rib_head *)arg; 1391 1392 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1393 if (RNTORT(rn) == rt) 1394 rtfree(rt); 1395 1396 return (0); 1397 } 1398 1399 /* 1400 * Removes all routes from the routing table without executing notifications. 1401 * rtentres will be removed after the end of a current epoch. 1402 */ 1403 static void 1404 rib_flush_routes(struct rib_head *rnh) 1405 { 1406 RIB_WLOCK(rnh); 1407 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1408 RIB_WUNLOCK(rnh); 1409 } 1410 1411 void 1412 rib_flush_routes_family(int family) 1413 { 1414 struct rib_head *rnh; 1415 1416 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1417 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1418 rib_flush_routes(rnh); 1419 } 1420 } 1421 1422 const char * 1423 rib_print_family(int family) 1424 { 1425 switch (family) { 1426 case AF_INET: 1427 return ("inet"); 1428 case AF_INET6: 1429 return ("inet6"); 1430 case AF_LINK: 1431 return ("link"); 1432 } 1433 return ("unknown"); 1434 } 1435 1436 static void 1437 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1438 struct rib_cmd_info *rc) 1439 { 1440 struct rib_subscription *rs; 1441 1442 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1443 if (rs->type == type) 1444 rs->func(rnh, rc, rs->arg); 1445 } 1446 } 1447 1448 static struct rib_subscription * 1449 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1450 enum rib_subscription_type type, bool waitok) 1451 { 1452 struct rib_subscription *rs; 1453 int flags = M_ZERO | (waitok ? M_WAITOK : M_NOWAIT); 1454 1455 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1456 if (rs == NULL) 1457 return (NULL); 1458 1459 rs->func = f; 1460 rs->arg = arg; 1461 rs->type = type; 1462 1463 return (rs); 1464 } 1465 1466 /* 1467 * Subscribe for the changes in the routing table specified by @fibnum and 1468 * @family. 1469 * 1470 * Returns pointer to the subscription structure on success. 1471 */ 1472 struct rib_subscription * 1473 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1474 enum rib_subscription_type type, bool waitok) 1475 { 1476 struct rib_head *rnh; 1477 struct epoch_tracker et; 1478 1479 NET_EPOCH_ENTER(et); 1480 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1481 rnh = rt_tables_get_rnh(fibnum, family); 1482 NET_EPOCH_EXIT(et); 1483 1484 return (rib_subscribe_internal(rnh, f, arg, type, waitok)); 1485 } 1486 1487 struct rib_subscription * 1488 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1489 enum rib_subscription_type type, bool waitok) 1490 { 1491 struct rib_subscription *rs; 1492 struct epoch_tracker et; 1493 1494 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1495 return (NULL); 1496 rs->rnh = rnh; 1497 1498 NET_EPOCH_ENTER(et); 1499 RIB_WLOCK(rnh); 1500 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1501 RIB_WUNLOCK(rnh); 1502 NET_EPOCH_EXIT(et); 1503 1504 return (rs); 1505 } 1506 1507 struct rib_subscription * 1508 rib_subscribe_locked(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1509 enum rib_subscription_type type) 1510 { 1511 struct rib_subscription *rs; 1512 1513 NET_EPOCH_ASSERT(); 1514 RIB_WLOCK_ASSERT(rnh); 1515 1516 if ((rs = allocate_subscription(f, arg, type, false)) == NULL) 1517 return (NULL); 1518 rs->rnh = rnh; 1519 1520 CK_STAILQ_INSERT_HEAD(&rnh->rnh_subscribers, rs, next); 1521 1522 return (rs); 1523 } 1524 1525 /* 1526 * Remove rtable subscription @rs from the routing table. 1527 * Needs to be run in network epoch. 1528 */ 1529 void 1530 rib_unsubscribe(struct rib_subscription *rs) 1531 { 1532 struct rib_head *rnh = rs->rnh; 1533 1534 NET_EPOCH_ASSERT(); 1535 1536 RIB_WLOCK(rnh); 1537 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1538 RIB_WUNLOCK(rnh); 1539 1540 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1541 &rs->epoch_ctx); 1542 } 1543 1544 void 1545 rib_unsubscribe_locked(struct rib_subscription *rs) 1546 { 1547 struct rib_head *rnh = rs->rnh; 1548 1549 NET_EPOCH_ASSERT(); 1550 RIB_WLOCK_ASSERT(rnh); 1551 1552 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1553 1554 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1555 &rs->epoch_ctx); 1556 } 1557 1558 /* 1559 * Epoch callback indicating subscription is safe to destroy 1560 */ 1561 static void 1562 destroy_subscription_epoch(epoch_context_t ctx) 1563 { 1564 struct rib_subscription *rs; 1565 1566 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1567 1568 free(rs, M_RTABLE); 1569 } 1570 1571 void 1572 rib_init_subscriptions(struct rib_head *rnh) 1573 { 1574 1575 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1576 } 1577 1578 void 1579 rib_destroy_subscriptions(struct rib_head *rnh) 1580 { 1581 struct rib_subscription *rs; 1582 struct epoch_tracker et; 1583 1584 NET_EPOCH_ENTER(et); 1585 RIB_WLOCK(rnh); 1586 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1587 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1588 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1589 &rs->epoch_ctx); 1590 } 1591 RIB_WUNLOCK(rnh); 1592 NET_EPOCH_EXIT(et); 1593 } 1594