1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_mpath.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 57 #ifdef RADIX_MPATH 58 #include <net/radix_mpath.h> 59 #endif 60 61 #include <vm/uma.h> 62 63 /* 64 * This file contains control plane routing tables functions. 65 * 66 * All functions assumes they are called in net epoch. 67 */ 68 69 struct rib_subscription { 70 CK_STAILQ_ENTRY(rib_subscription) next; 71 rib_subscription_cb_t *func; 72 void *arg; 73 enum rib_subscription_type type; 74 struct epoch_context epoch_ctx; 75 }; 76 77 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *rnd, 81 struct rib_cmd_info *rc); 82 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct rib_cmd_info *rc); 84 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 85 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 86 static int change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 87 struct rt_addrinfo *info, struct route_nhop_data *rnd, 88 struct rib_cmd_info *rc); 89 90 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 91 struct rib_cmd_info *rc); 92 93 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 94 struct rib_cmd_info *rc); 95 96 static void destroy_subscription_epoch(epoch_context_t ctx); 97 98 /* Routing table UMA zone */ 99 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 100 #define V_rtzone VNET(rtzone) 101 102 void 103 vnet_rtzone_init() 104 { 105 106 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 107 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 108 } 109 110 #ifdef VIMAGE 111 void 112 vnet_rtzone_destroy() 113 { 114 115 uma_zdestroy(V_rtzone); 116 } 117 #endif 118 119 static void 120 destroy_rtentry(struct rtentry *rt) 121 { 122 123 /* 124 * At this moment rnh, nh_control may be already freed. 125 * nhop interface may have been migrated to a different vnet. 126 * Use vnet stored in the nexthop to delete the entry. 127 */ 128 CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); 129 130 /* Unreference nexthop */ 131 nhop_free(rt->rt_nhop); 132 133 uma_zfree(V_rtzone, rt); 134 135 CURVNET_RESTORE(); 136 } 137 138 /* 139 * Epoch callback indicating rtentry is safe to destroy 140 */ 141 static void 142 destroy_rtentry_epoch(epoch_context_t ctx) 143 { 144 struct rtentry *rt; 145 146 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 147 148 destroy_rtentry(rt); 149 } 150 151 /* 152 * Schedule rtentry deletion 153 */ 154 static void 155 rtfree(struct rtentry *rt) 156 { 157 158 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 159 160 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 161 &rt->rt_epoch_ctx); 162 } 163 164 static struct rib_head * 165 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 166 { 167 struct rib_head *rnh; 168 struct sockaddr *dst; 169 170 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 171 172 dst = info->rti_info[RTAX_DST]; 173 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 174 175 return (rnh); 176 } 177 178 static int 179 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 180 { 181 uint32_t weight; 182 183 if (info->rti_mflags & RTV_WEIGHT) 184 weight = info->rti_rmx->rmx_weight; 185 else 186 weight = default_weight; 187 /* Keep upper 1 byte for adm distance purposes */ 188 if (weight > RT_MAX_WEIGHT) 189 weight = RT_MAX_WEIGHT; 190 191 return (weight); 192 } 193 194 static void 195 rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info) 196 { 197 198 /* Kernel -> userland timebase conversion. */ 199 if (info->rti_mflags & RTV_EXPIRE) 200 rt->rt_expire = info->rti_rmx->rmx_expire ? 201 info->rti_rmx->rmx_expire - time_second + time_uptime : 0; 202 } 203 204 /* 205 * Check if specified @gw matches gw data in the nexthop @nh. 206 * 207 * Returns true if matches, false otherwise. 208 */ 209 static bool 210 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 211 { 212 213 if (nh->gw_sa.sa_family != gw->sa_family) 214 return (false); 215 216 switch (gw->sa_family) { 217 case AF_INET: 218 return (nh->gw4_sa.sin_addr.s_addr == 219 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 220 case AF_INET6: 221 { 222 const struct sockaddr_in6 *gw6; 223 gw6 = (const struct sockaddr_in6 *)gw; 224 225 /* 226 * Currently (2020-09) IPv6 gws in kernel have their 227 * scope embedded. Once this becomes false, this code 228 * has to be revisited. 229 */ 230 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 231 &gw6->sin6_addr)) 232 return (true); 233 return (false); 234 } 235 case AF_LINK: 236 { 237 const struct sockaddr_dl *sdl; 238 sdl = (const struct sockaddr_dl *)gw; 239 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 240 } 241 default: 242 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 243 } 244 245 /* NOTREACHED */ 246 return (false); 247 } 248 249 /* 250 * Checks if data in @info matches nexhop @nh. 251 * 252 * Returns 0 on success, 253 * ESRCH if not matched, 254 * ENOENT if filter function returned false 255 */ 256 int 257 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 258 const struct nhop_object *nh) 259 { 260 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 261 262 if (info->rti_filter != NULL) { 263 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 264 return (ENOENT); 265 else 266 return (0); 267 } 268 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 269 return (ESRCH); 270 271 return (0); 272 } 273 274 /* 275 * Checks if nexhop @nh can be rewritten by data in @info because 276 * of higher "priority". Currently the only case for such scenario 277 * is kernel installing interface routes, marked by RTF_PINNED flag. 278 * 279 * Returns: 280 * 1 if @info data has higher priority 281 * 0 if priority is the same 282 * -1 if priority is lower 283 */ 284 int 285 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 286 { 287 288 if (info->rti_flags & RTF_PINNED) { 289 return (NH_IS_PINNED(nh)) ? 0 : 1; 290 } else { 291 return (NH_IS_PINNED(nh)) ? -1 : 0; 292 } 293 } 294 295 /* 296 * Runs exact prefix match based on @dst and @netmask. 297 * Returns matched @rtentry if found or NULL. 298 * If rtentry was found, saves nexthop / weight value into @rnd. 299 */ 300 static struct rtentry * 301 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 302 const struct sockaddr *netmask, struct route_nhop_data *rnd) 303 { 304 struct rtentry *rt; 305 306 RIB_LOCK_ASSERT(rnh); 307 308 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 309 __DECONST(void *, netmask), &rnh->head); 310 if (rt != NULL) { 311 rnd->rnd_nhop = rt->rt_nhop; 312 rnd->rnd_weight = rt->rt_weight; 313 } else { 314 rnd->rnd_nhop = NULL; 315 rnd->rnd_weight = 0; 316 } 317 318 return (rt); 319 } 320 321 /* 322 * Runs exact prefix match based on dst/netmask from @info. 323 * Assumes RIB lock is held. 324 * Returns matched @rtentry if found or NULL. 325 * If rtentry was found, saves nexthop / weight value into @rnd. 326 */ 327 struct rtentry * 328 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 329 struct route_nhop_data *rnd) 330 { 331 struct rtentry *rt; 332 333 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 334 info->rti_info[RTAX_NETMASK], rnd); 335 336 return (rt); 337 } 338 339 /* 340 * Adds route defined by @info into the kernel table specified by @fibnum and 341 * sa_family in @info->rti_info[RTAX_DST]. 342 * 343 * Returns 0 on success and fills in operation metadata into @rc. 344 */ 345 int 346 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 347 struct rib_cmd_info *rc) 348 { 349 struct rib_head *rnh; 350 int error; 351 352 NET_EPOCH_ASSERT(); 353 354 rnh = get_rnh(fibnum, info); 355 if (rnh == NULL) 356 return (EAFNOSUPPORT); 357 358 /* 359 * Check consistency between RTF_HOST flag and netmask 360 * existence. 361 */ 362 if (info->rti_flags & RTF_HOST) 363 info->rti_info[RTAX_NETMASK] = NULL; 364 else if (info->rti_info[RTAX_NETMASK] == NULL) 365 return (EINVAL); 366 367 bzero(rc, sizeof(struct rib_cmd_info)); 368 rc->rc_cmd = RTM_ADD; 369 370 error = add_route(rnh, info, rc); 371 if (error == 0) 372 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 373 374 return (error); 375 } 376 377 /* 378 * Creates rtentry and nexthop based on @info data. 379 * Return 0 and fills in rtentry into @prt on success, 380 * return errno otherwise. 381 */ 382 static int 383 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 384 struct rtentry **prt) 385 { 386 struct sockaddr *dst, *ndst, *gateway, *netmask; 387 struct rtentry *rt; 388 struct nhop_object *nh; 389 struct ifaddr *ifa; 390 int error, flags; 391 392 dst = info->rti_info[RTAX_DST]; 393 gateway = info->rti_info[RTAX_GATEWAY]; 394 netmask = info->rti_info[RTAX_NETMASK]; 395 flags = info->rti_flags; 396 397 if ((flags & RTF_GATEWAY) && !gateway) 398 return (EINVAL); 399 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 400 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 401 return (EINVAL); 402 403 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 404 return (EINVAL); 405 406 if (info->rti_ifa == NULL) { 407 error = rt_getifa_fib(info, rnh->rib_fibnum); 408 if (error) 409 return (error); 410 } else { 411 ifa_ref(info->rti_ifa); 412 } 413 414 error = nhop_create_from_info(rnh, info, &nh); 415 if (error != 0) { 416 ifa_free(info->rti_ifa); 417 return (error); 418 } 419 420 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 421 if (rt == NULL) { 422 ifa_free(info->rti_ifa); 423 nhop_free(nh); 424 return (ENOBUFS); 425 } 426 rt->rte_flags = (RTF_UP | flags) & RTE_RT_FLAG_MASK; 427 rt->rt_nhop = nh; 428 429 /* Fill in dst */ 430 memcpy(&rt->rt_dst, dst, dst->sa_len); 431 rt_key(rt) = &rt->rt_dst; 432 433 /* 434 * point to the (possibly newly malloc'd) dest address. 435 */ 436 ndst = (struct sockaddr *)rt_key(rt); 437 438 /* 439 * make sure it contains the value we want (masked if needed). 440 */ 441 if (netmask) { 442 rt_maskedcopy(dst, ndst, netmask); 443 } else 444 bcopy(dst, ndst, dst->sa_len); 445 446 /* 447 * We use the ifa reference returned by rt_getifa_fib(). 448 * This moved from below so that rnh->rnh_addaddr() can 449 * examine the ifa and ifa->ifa_ifp if it so desires. 450 */ 451 ifa = info->rti_ifa; 452 rt->rt_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 453 rt_set_expire_info(rt, info); 454 455 *prt = rt; 456 return (0); 457 } 458 459 static int 460 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 461 struct rib_cmd_info *rc) 462 { 463 struct nhop_object *nh_orig; 464 struct route_nhop_data rnd; 465 struct nhop_object *nh; 466 struct rtentry *rt, *rt_orig; 467 int error; 468 469 error = create_rtentry(rnh, info, &rt); 470 if (error != 0) 471 return (error); 472 473 rnd.rnd_nhop = rt->rt_nhop; 474 rnd.rnd_weight = rt->rt_weight; 475 nh = rt->rt_nhop; 476 477 RIB_WLOCK(rnh); 478 #ifdef RADIX_MPATH 479 struct sockaddr *netmask; 480 netmask = info->rti_info[RTAX_NETMASK]; 481 /* do not permit exactly the same dst/mask/gw pair */ 482 if (rt_mpath_capable(rnh) && 483 rt_mpath_conflict(rnh, rt, netmask)) { 484 RIB_WUNLOCK(rnh); 485 486 nhop_free(nh); 487 uma_zfree(V_rtzone, rt); 488 return (EEXIST); 489 } 490 #endif 491 error = add_route_nhop(rnh, rt, info, &rnd, rc); 492 if (error == 0) { 493 RIB_WUNLOCK(rnh); 494 return (0); 495 } 496 497 /* addition failed. Lookup prefix in the rib to determine the cause */ 498 rt_orig = lookup_prefix(rnh, info, &rnd); 499 if (rt_orig == NULL) { 500 /* No prefix -> rnh_addaddr() failed to allocate memory */ 501 RIB_WUNLOCK(rnh); 502 nhop_free(nh); 503 uma_zfree(V_rtzone, rt); 504 return (ENOMEM); 505 } 506 507 /* We have existing route in the RIB. */ 508 nh_orig = rnd.rnd_nhop; 509 /* Check if new route has higher preference */ 510 if (can_override_nhop(info, nh_orig) > 0) { 511 /* Update nexthop to the new route */ 512 change_route_nhop(rnh, rt_orig, info, &rnd, rc); 513 RIB_WUNLOCK(rnh); 514 uma_zfree(V_rtzone, rt); 515 nhop_free(nh_orig); 516 return (0); 517 } 518 519 RIB_WUNLOCK(rnh); 520 521 /* Unable to add - another route with the same preference exists */ 522 error = EEXIST; 523 524 nhop_free(nh); 525 uma_zfree(V_rtzone, rt); 526 527 return (error); 528 } 529 530 /* 531 * Removes route defined by @info from the kernel table specified by @fibnum and 532 * sa_family in @info->rti_info[RTAX_DST]. 533 * 534 * Returns 0 on success and fills in operation metadata into @rc. 535 */ 536 int 537 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 538 { 539 struct rib_head *rnh; 540 struct sockaddr *dst_orig, *netmask; 541 struct sockaddr_storage mdst; 542 int error; 543 544 NET_EPOCH_ASSERT(); 545 546 rnh = get_rnh(fibnum, info); 547 if (rnh == NULL) 548 return (EAFNOSUPPORT); 549 550 bzero(rc, sizeof(struct rib_cmd_info)); 551 rc->rc_cmd = RTM_DELETE; 552 553 dst_orig = info->rti_info[RTAX_DST]; 554 netmask = info->rti_info[RTAX_NETMASK]; 555 556 if (netmask != NULL) { 557 /* Ensure @dst is always properly masked */ 558 if (dst_orig->sa_len > sizeof(mdst)) 559 return (EINVAL); 560 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 561 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 562 } 563 error = del_route(rnh, info, rc); 564 info->rti_info[RTAX_DST] = dst_orig; 565 566 return (error); 567 } 568 569 /* 570 * Conditionally unlinks rtentry matching data inside @info from @rnh. 571 * Returns 0 on success with operation result stored in @rc. 572 * On error, returns: 573 * ESRCH - if prefix was not found, 574 * EADDRINUSE - if trying to delete higher priority route. 575 * ENOENT - if supplied filter function returned 0 (not matched). 576 */ 577 static int 578 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 579 { 580 struct rtentry *rt; 581 struct nhop_object *nh; 582 struct radix_node *rn; 583 struct route_nhop_data rnd; 584 int error; 585 586 rt = lookup_prefix(rnh, info, &rnd); 587 if (rt == NULL) 588 return (ESRCH); 589 590 nh = rt->rt_nhop; 591 592 error = check_info_match_nhop(info, rt, nh); 593 if (error != 0) 594 return (error); 595 596 if (can_override_nhop(info, nh) < 0) 597 return (EADDRINUSE); 598 599 /* 600 * Remove the item from the tree and return it. 601 * Complain if it is not there and do no more processing. 602 */ 603 #ifdef RADIX_MPATH 604 info->rti_info[RTAX_GATEWAY] = &nh->gw_sa; 605 if (rt_mpath_capable(rnh)) { 606 rn = rt_mpath_unlink(rnh, info, rt, &error); 607 if (error != 0) 608 return (error); 609 } else 610 #endif 611 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 612 info->rti_info[RTAX_NETMASK], &rnh->head); 613 if (rn == NULL) 614 return (ESRCH); 615 616 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 617 panic ("rtrequest delete"); 618 619 rt = RNTORT(rn); 620 rt->rte_flags &= ~RTF_UP; 621 622 /* Finalize notification */ 623 rnh->rnh_gen++; 624 rc->rc_cmd = RTM_DELETE; 625 rc->rc_rt = rt; 626 rc->rc_nh_old = rt->rt_nhop; 627 rc->rc_nh_weight = rt->rt_weight; 628 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 629 630 return (0); 631 } 632 633 static int 634 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 635 struct rib_cmd_info *rc) 636 { 637 int error; 638 639 RIB_WLOCK(rnh); 640 error = rt_unlinkrte(rnh, info, rc); 641 RIB_WUNLOCK(rnh); 642 if (error != 0) 643 return (error); 644 645 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 646 647 /* 648 * If the caller wants it, then it can have it, 649 * the entry will be deleted after the end of the current epoch. 650 */ 651 rtfree(rc->rc_rt); 652 653 return (0); 654 } 655 656 int 657 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 658 struct rib_cmd_info *rc) 659 { 660 RIB_RLOCK_TRACKER; 661 struct route_nhop_data rnd_orig; 662 struct rib_head *rnh; 663 struct rtentry *rt; 664 int error; 665 666 NET_EPOCH_ASSERT(); 667 668 rnh = get_rnh(fibnum, info); 669 if (rnh == NULL) 670 return (EAFNOSUPPORT); 671 672 bzero(rc, sizeof(struct rib_cmd_info)); 673 rc->rc_cmd = RTM_CHANGE; 674 675 /* Check if updated gateway exists */ 676 if ((info->rti_flags & RTF_GATEWAY) && 677 (info->rti_info[RTAX_GATEWAY] == NULL)) 678 return (EINVAL); 679 680 /* 681 * route change is done in multiple steps, with dropping and 682 * reacquiring lock. In the situations with multiple processes 683 * changes the same route in can lead to the case when route 684 * is changed between the steps. Address it by retrying the operation 685 * multiple times before failing. 686 */ 687 688 RIB_RLOCK(rnh); 689 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 690 info->rti_info[RTAX_NETMASK], &rnh->head); 691 692 if (rt == NULL) { 693 RIB_RUNLOCK(rnh); 694 return (ESRCH); 695 } 696 697 #ifdef RADIX_MPATH 698 /* 699 * If we got multipath routes, 700 * we require users to specify a matching RTAX_GATEWAY. 701 */ 702 if (rt_mpath_capable(rnh)) { 703 rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); 704 if (rt == NULL) { 705 RIB_RUNLOCK(rnh); 706 return (ESRCH); 707 } 708 } 709 #endif 710 rnd_orig.rnd_nhop = rt->rt_nhop; 711 rnd_orig.rnd_weight = rt->rt_weight; 712 713 RIB_RUNLOCK(rnh); 714 715 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 716 error = change_route(rnh, info, &rnd_orig, rc); 717 if (error != EAGAIN) 718 break; 719 } 720 721 return (error); 722 } 723 724 static int 725 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 726 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 727 { 728 int error = 0; 729 int free_ifa = 0; 730 struct nhop_object *nh, *nh_orig; 731 struct route_nhop_data rnd_new; 732 733 nh = NULL; 734 nh_orig = rnd_orig->rnd_nhop; 735 if (nh_orig == NULL) 736 return (ESRCH); 737 738 /* 739 * New gateway could require new ifaddr, ifp; 740 * flags may also be different; ifp may be specified 741 * by ll sockaddr when protocol address is ambiguous 742 */ 743 if (((nh_orig->nh_flags & NHF_GATEWAY) && 744 info->rti_info[RTAX_GATEWAY] != NULL) || 745 info->rti_info[RTAX_IFP] != NULL || 746 (info->rti_info[RTAX_IFA] != NULL && 747 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 748 error = rt_getifa_fib(info, rnh->rib_fibnum); 749 if (info->rti_ifa != NULL) 750 free_ifa = 1; 751 752 if (error != 0) { 753 if (free_ifa) { 754 ifa_free(info->rti_ifa); 755 info->rti_ifa = NULL; 756 } 757 758 return (error); 759 } 760 } 761 762 error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); 763 if (free_ifa) { 764 ifa_free(info->rti_ifa); 765 info->rti_ifa = NULL; 766 } 767 if (error != 0) 768 return (error); 769 770 rnd_new.rnd_nhop = nh; 771 if (info->rti_mflags & RTV_WEIGHT) 772 rnd_new.rnd_weight = info->rti_rmx->rmx_weight; 773 else 774 rnd_new.rnd_weight = rnd_orig->rnd_weight; 775 776 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 777 778 return (error); 779 } 780 781 /* 782 * Insert @rt with nhop data from @rnd_new to @rnh. 783 * Returns 0 on success and stores operation results in @rc. 784 */ 785 static int 786 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 787 struct rt_addrinfo *info, struct route_nhop_data *rnd, 788 struct rib_cmd_info *rc) 789 { 790 struct sockaddr *ndst, *netmask; 791 struct radix_node *rn; 792 int error = 0; 793 794 RIB_WLOCK_ASSERT(rnh); 795 796 ndst = (struct sockaddr *)rt_key(rt); 797 netmask = info->rti_info[RTAX_NETMASK]; 798 799 rt->rt_nhop = rnd->rnd_nhop; 800 rt->rt_weight = rnd->rnd_weight; 801 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 802 803 if (rn != NULL) { 804 if (rt->rt_expire > 0) 805 tmproutes_update(rnh, rt); 806 807 /* Finalize notification */ 808 rnh->rnh_gen++; 809 810 rc->rc_cmd = RTM_ADD; 811 rc->rc_rt = rt; 812 rc->rc_nh_old = NULL; 813 rc->rc_nh_new = rnd->rnd_nhop; 814 rc->rc_nh_weight = rnd->rnd_weight; 815 816 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 817 } else { 818 /* Existing route or memory allocation failure */ 819 error = EEXIST; 820 } 821 822 return (error); 823 } 824 825 /* 826 * Switch @rt nhop/weigh to the ones specified in @rnd. 827 * Conditionally set rt_expire if set in @info. 828 * Returns 0 on success. 829 */ 830 static int 831 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 832 struct rt_addrinfo *info, struct route_nhop_data *rnd, 833 struct rib_cmd_info *rc) 834 { 835 struct nhop_object *nh_orig; 836 837 RIB_WLOCK_ASSERT(rnh); 838 839 nh_orig = rt->rt_nhop; 840 841 if (rnd->rnd_nhop != NULL) { 842 /* Changing expiration & nexthop & weight to a new one */ 843 rt_set_expire_info(rt, info); 844 rt->rt_nhop = rnd->rnd_nhop; 845 rt->rt_weight = rnd->rnd_weight; 846 if (rt->rt_expire > 0) 847 tmproutes_update(rnh, rt); 848 } else { 849 /* Route deletion requested. */ 850 struct sockaddr *ndst, *netmask; 851 struct radix_node *rn; 852 853 ndst = (struct sockaddr *)rt_key(rt); 854 netmask = info->rti_info[RTAX_NETMASK]; 855 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 856 if (rn == NULL) 857 return (ESRCH); 858 } 859 860 /* Finalize notification */ 861 rnh->rnh_gen++; 862 863 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 864 rc->rc_rt = rt; 865 rc->rc_nh_old = nh_orig; 866 rc->rc_nh_new = rnd->rnd_nhop; 867 rc->rc_nh_weight = rnd->rnd_weight; 868 869 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 870 871 return (0); 872 } 873 874 /* 875 * Conditionally update route nhop/weight IFF data in @nhd_orig is 876 * consistent with the current route data. 877 * Nexthop in @nhd_new is consumed. 878 */ 879 int 880 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 881 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 882 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 883 { 884 struct rtentry *rt_new; 885 int error = 0; 886 887 RIB_WLOCK(rnh); 888 889 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 890 info->rti_info[RTAX_NETMASK], &rnh->head); 891 892 if (rt_new == NULL) { 893 if (rnd_orig->rnd_nhop == NULL) 894 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 895 else { 896 /* 897 * Prefix does not exist, which was not our assumption. 898 * Update @rnd_orig with the new data and return 899 */ 900 rnd_orig->rnd_nhop = NULL; 901 rnd_orig->rnd_weight = 0; 902 error = EAGAIN; 903 } 904 } else { 905 /* Prefix exists, try to update */ 906 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 907 /* 908 * Nhop/mpath group hasn't changed. Flip 909 * to the new precalculated one and return 910 */ 911 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 912 } else { 913 /* Update and retry */ 914 rnd_orig->rnd_nhop = rt_new->rt_nhop; 915 rnd_orig->rnd_weight = rt_new->rt_weight; 916 error = EAGAIN; 917 } 918 } 919 920 RIB_WUNLOCK(rnh); 921 922 if (error == 0) { 923 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 924 925 if (rnd_orig->rnd_nhop != NULL) 926 nhop_free_any(rnd_orig->rnd_nhop); 927 928 } else { 929 if (rnd_new->rnd_nhop != NULL) 930 nhop_free_any(rnd_new->rnd_nhop); 931 } 932 933 return (error); 934 } 935 936 /* 937 * Performs modification of routing table specificed by @action. 938 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 939 * Needs to be run in network epoch. 940 * 941 * Returns 0 on success and fills in @rc with action result. 942 */ 943 int 944 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 945 struct rib_cmd_info *rc) 946 { 947 int error; 948 949 switch (action) { 950 case RTM_ADD: 951 error = rib_add_route(fibnum, info, rc); 952 break; 953 case RTM_DELETE: 954 error = rib_del_route(fibnum, info, rc); 955 break; 956 case RTM_CHANGE: 957 error = rib_change_route(fibnum, info, rc); 958 break; 959 default: 960 error = ENOTSUP; 961 } 962 963 return (error); 964 } 965 966 struct rt_delinfo 967 { 968 struct rt_addrinfo info; 969 struct rib_head *rnh; 970 struct rtentry *head; 971 struct rib_cmd_info rc; 972 }; 973 974 /* 975 * Conditionally unlinks @rn from radix tree based 976 * on info data passed in @arg. 977 */ 978 static int 979 rt_checkdelroute(struct radix_node *rn, void *arg) 980 { 981 struct rt_delinfo *di; 982 struct rt_addrinfo *info; 983 struct rtentry *rt; 984 int error; 985 986 di = (struct rt_delinfo *)arg; 987 rt = (struct rtentry *)rn; 988 info = &di->info; 989 990 info->rti_info[RTAX_DST] = rt_key(rt); 991 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 992 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 993 994 error = rt_unlinkrte(di->rnh, info, &di->rc); 995 996 /* 997 * Add deleted rtentries to the list to GC them 998 * after dropping the lock. 999 * 1000 * XXX: Delayed notifications not implemented 1001 * for nexthop updates. 1002 */ 1003 if (error == 0) { 1004 /* Add to the list and return */ 1005 rt->rt_chain = di->head; 1006 di->head = rt; 1007 } 1008 1009 return (0); 1010 } 1011 1012 /* 1013 * Iterates over a routing table specified by @fibnum and @family and 1014 * deletes elements marked by @filter_f. 1015 * @fibnum: rtable id 1016 * @family: AF_ address family 1017 * @filter_f: function returning non-zero value for items to delete 1018 * @arg: data to pass to the @filter_f function 1019 * @report: true if rtsock notification is needed. 1020 */ 1021 void 1022 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) 1023 { 1024 struct rib_head *rnh; 1025 struct rt_delinfo di; 1026 struct rtentry *rt; 1027 struct epoch_tracker et; 1028 1029 rnh = rt_tables_get_rnh(fibnum, family); 1030 if (rnh == NULL) 1031 return; 1032 1033 bzero(&di, sizeof(di)); 1034 di.info.rti_filter = filter_f; 1035 di.info.rti_filterdata = arg; 1036 di.rnh = rnh; 1037 di.rc.rc_cmd = RTM_DELETE; 1038 1039 NET_EPOCH_ENTER(et); 1040 1041 RIB_WLOCK(rnh); 1042 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1043 RIB_WUNLOCK(rnh); 1044 1045 /* We might have something to reclaim. */ 1046 bzero(&di.rc, sizeof(di.rc)); 1047 di.rc.rc_cmd = RTM_DELETE; 1048 while (di.head != NULL) { 1049 rt = di.head; 1050 di.head = rt->rt_chain; 1051 rt->rt_chain = NULL; 1052 1053 di.rc.rc_rt = rt; 1054 di.rc.rc_nh_old = rt->rt_nhop; 1055 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1056 1057 /* TODO std rt -> rt_addrinfo export */ 1058 di.info.rti_info[RTAX_DST] = rt_key(rt); 1059 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1060 1061 if (report) 1062 rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, 1063 fibnum); 1064 rtfree(rt); 1065 } 1066 1067 NET_EPOCH_EXIT(et); 1068 } 1069 1070 static void 1071 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1072 struct rib_cmd_info *rc) 1073 { 1074 struct rib_subscription *rs; 1075 1076 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1077 if (rs->type == type) 1078 rs->func(rnh, rc, rs->arg); 1079 } 1080 } 1081 1082 static struct rib_subscription * 1083 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1084 enum rib_subscription_type type, bool waitok) 1085 { 1086 struct rib_subscription *rs; 1087 int flags = M_ZERO | (waitok ? M_WAITOK : 0); 1088 1089 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1090 if (rs == NULL) 1091 return (NULL); 1092 1093 rs->func = f; 1094 rs->arg = arg; 1095 rs->type = type; 1096 1097 return (rs); 1098 } 1099 1100 /* 1101 * Subscribe for the changes in the routing table specified by @fibnum and 1102 * @family. 1103 * 1104 * Returns pointer to the subscription structure on success. 1105 */ 1106 struct rib_subscription * 1107 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1108 enum rib_subscription_type type, bool waitok) 1109 { 1110 struct rib_head *rnh; 1111 struct rib_subscription *rs; 1112 struct epoch_tracker et; 1113 1114 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1115 return (NULL); 1116 1117 NET_EPOCH_ENTER(et); 1118 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1119 rnh = rt_tables_get_rnh(fibnum, family); 1120 1121 RIB_WLOCK(rnh); 1122 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 1123 RIB_WUNLOCK(rnh); 1124 NET_EPOCH_EXIT(et); 1125 1126 return (rs); 1127 } 1128 1129 struct rib_subscription * 1130 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1131 enum rib_subscription_type type, bool waitok) 1132 { 1133 struct rib_subscription *rs; 1134 struct epoch_tracker et; 1135 1136 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1137 return (NULL); 1138 1139 NET_EPOCH_ENTER(et); 1140 RIB_WLOCK(rnh); 1141 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 1142 RIB_WUNLOCK(rnh); 1143 NET_EPOCH_EXIT(et); 1144 1145 return (rs); 1146 } 1147 1148 /* 1149 * Remove rtable subscription @rs from the table specified by @fibnum 1150 * and @family. 1151 * Needs to be run in network epoch. 1152 * 1153 * Returns 0 on success. 1154 */ 1155 int 1156 rib_unsibscribe(uint32_t fibnum, int family, struct rib_subscription *rs) 1157 { 1158 struct rib_head *rnh; 1159 1160 NET_EPOCH_ASSERT(); 1161 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1162 rnh = rt_tables_get_rnh(fibnum, family); 1163 1164 if (rnh == NULL) 1165 return (ENOENT); 1166 1167 RIB_WLOCK(rnh); 1168 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1169 RIB_WUNLOCK(rnh); 1170 1171 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1172 &rs->epoch_ctx); 1173 1174 return (0); 1175 } 1176 1177 /* 1178 * Epoch callback indicating subscription is safe to destroy 1179 */ 1180 static void 1181 destroy_subscription_epoch(epoch_context_t ctx) 1182 { 1183 struct rib_subscription *rs; 1184 1185 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1186 1187 free(rs, M_RTABLE); 1188 } 1189 1190 void 1191 rib_init_subscriptions(struct rib_head *rnh) 1192 { 1193 1194 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1195 } 1196 1197 void 1198 rib_destroy_subscriptions(struct rib_head *rnh) 1199 { 1200 struct rib_subscription *rs; 1201 struct epoch_tracker et; 1202 1203 NET_EPOCH_ENTER(et); 1204 RIB_WLOCK(rnh); 1205 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1206 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1207 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1208 &rs->epoch_ctx); 1209 } 1210 RIB_WUNLOCK(rnh); 1211 NET_EPOCH_EXIT(et); 1212 } 1213