1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_mpath.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 57 #ifdef RADIX_MPATH 58 #include <net/radix_mpath.h> 59 #endif 60 61 #include <vm/uma.h> 62 63 /* 64 * This file contains control plane routing tables functions. 65 * 66 * All functions assumes they are called in net epoch. 67 */ 68 69 struct rib_subscription { 70 CK_STAILQ_ENTRY(rib_subscription) next; 71 rib_subscription_cb_t *func; 72 void *arg; 73 enum rib_subscription_type type; 74 struct epoch_context epoch_ctx; 75 }; 76 77 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *rnd, 81 struct rib_cmd_info *rc); 82 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct rib_cmd_info *rc); 84 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 85 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 86 static int change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 87 struct rt_addrinfo *info, struct route_nhop_data *rnd, 88 struct rib_cmd_info *rc); 89 90 static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, 91 struct rib_cmd_info *rc); 92 93 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 94 struct rib_cmd_info *rc); 95 96 static void destroy_subscription_epoch(epoch_context_t ctx); 97 98 /* Routing table UMA zone */ 99 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 100 #define V_rtzone VNET(rtzone) 101 102 void 103 vnet_rtzone_init() 104 { 105 106 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 107 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 108 } 109 110 #ifdef VIMAGE 111 void 112 vnet_rtzone_destroy() 113 { 114 115 uma_zdestroy(V_rtzone); 116 } 117 #endif 118 119 static void 120 destroy_rtentry(struct rtentry *rt) 121 { 122 123 /* 124 * At this moment rnh, nh_control may be already freed. 125 * nhop interface may have been migrated to a different vnet. 126 * Use vnet stored in the nexthop to delete the entry. 127 */ 128 CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); 129 130 /* Unreference nexthop */ 131 nhop_free(rt->rt_nhop); 132 133 uma_zfree(V_rtzone, rt); 134 135 CURVNET_RESTORE(); 136 } 137 138 /* 139 * Epoch callback indicating rtentry is safe to destroy 140 */ 141 static void 142 destroy_rtentry_epoch(epoch_context_t ctx) 143 { 144 struct rtentry *rt; 145 146 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 147 148 destroy_rtentry(rt); 149 } 150 151 /* 152 * Schedule rtentry deletion 153 */ 154 static void 155 rtfree(struct rtentry *rt) 156 { 157 158 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 159 160 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 161 &rt->rt_epoch_ctx); 162 } 163 164 static struct rib_head * 165 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 166 { 167 struct rib_head *rnh; 168 struct sockaddr *dst; 169 170 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 171 172 dst = info->rti_info[RTAX_DST]; 173 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 174 175 return (rnh); 176 } 177 178 /* 179 * Check if specified @gw matches gw data in the nexthop @nh. 180 * 181 * Returns true if matches, false otherwise. 182 */ 183 static bool 184 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 185 { 186 187 if (nh->gw_sa.sa_family != gw->sa_family) 188 return (false); 189 190 switch (gw->sa_family) { 191 case AF_INET: 192 return (nh->gw4_sa.sin_addr.s_addr == 193 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 194 case AF_INET6: 195 { 196 const struct sockaddr_in6 *gw6; 197 gw6 = (const struct sockaddr_in6 *)gw; 198 199 /* 200 * Currently (2020-09) IPv6 gws in kernel have their 201 * scope embedded. Once this becomes false, this code 202 * has to be revisited. 203 */ 204 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 205 &gw6->sin6_addr)) 206 return (true); 207 return (false); 208 } 209 case AF_LINK: 210 { 211 const struct sockaddr_dl *sdl; 212 sdl = (const struct sockaddr_dl *)gw; 213 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 214 } 215 default: 216 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 217 } 218 219 /* NOTREACHED */ 220 return (false); 221 } 222 223 /* 224 * Checks if data in @info matches nexhop @nh. 225 * 226 * Returns 0 on success, 227 * ESRCH if not matched, 228 * ENOENT if filter function returned false 229 */ 230 int 231 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 232 const struct nhop_object *nh) 233 { 234 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 235 236 if (info->rti_filter != NULL) { 237 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 238 return (ENOENT); 239 else 240 return (0); 241 } 242 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 243 return (ESRCH); 244 245 return (0); 246 } 247 248 /* 249 * Checks if nexhop @nh can be rewritten by data in @info because 250 * of higher "priority". Currently the only case for such scenario 251 * is kernel installing interface routes, marked by RTF_PINNED flag. 252 * 253 * Returns: 254 * 1 if @info data has higher priority 255 * 0 if priority is the same 256 * -1 if priority is lower 257 */ 258 int 259 can_override_nhop(const struct rt_addrinfo *info, const struct nhop_object *nh) 260 { 261 262 if (info->rti_flags & RTF_PINNED) { 263 return (NH_IS_PINNED(nh)) ? 0 : 1; 264 } else { 265 return (NH_IS_PINNED(nh)) ? -1 : 0; 266 } 267 } 268 269 /* 270 * Runs exact prefix match based on @dst and @netmask. 271 * Returns matched @rtentry if found or NULL. 272 * If rtentry was found, saves nexthop / weight value into @rnd. 273 */ 274 static struct rtentry * 275 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 276 const struct sockaddr *netmask, struct route_nhop_data *rnd) 277 { 278 struct rtentry *rt; 279 280 RIB_LOCK_ASSERT(rnh); 281 282 rt = (struct rtentry *)rnh->rnh_lookup(__DECONST(void *, dst), 283 __DECONST(void *, netmask), &rnh->head); 284 if (rt != NULL) { 285 rnd->rnd_nhop = rt->rt_nhop; 286 rnd->rnd_weight = rt->rt_weight; 287 } else { 288 rnd->rnd_nhop = NULL; 289 rnd->rnd_weight = 0; 290 } 291 292 return (rt); 293 } 294 295 /* 296 * Runs exact prefix match based on dst/netmask from @info. 297 * Assumes RIB lock is held. 298 * Returns matched @rtentry if found or NULL. 299 * If rtentry was found, saves nexthop / weight value into @rnd. 300 */ 301 struct rtentry * 302 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 303 struct route_nhop_data *rnd) 304 { 305 struct rtentry *rt; 306 307 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 308 info->rti_info[RTAX_NETMASK], rnd); 309 310 return (rt); 311 } 312 313 /* 314 * Adds route defined by @info into the kernel table specified by @fibnum and 315 * sa_family in @info->rti_info[RTAX_DST]. 316 * 317 * Returns 0 on success and fills in operation metadata into @rc. 318 */ 319 int 320 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 321 struct rib_cmd_info *rc) 322 { 323 struct rib_head *rnh; 324 int error; 325 326 NET_EPOCH_ASSERT(); 327 328 rnh = get_rnh(fibnum, info); 329 if (rnh == NULL) 330 return (EAFNOSUPPORT); 331 332 /* 333 * Check consistency between RTF_HOST flag and netmask 334 * existence. 335 */ 336 if (info->rti_flags & RTF_HOST) 337 info->rti_info[RTAX_NETMASK] = NULL; 338 else if (info->rti_info[RTAX_NETMASK] == NULL) 339 return (EINVAL); 340 341 bzero(rc, sizeof(struct rib_cmd_info)); 342 rc->rc_cmd = RTM_ADD; 343 344 error = add_route(rnh, info, rc); 345 if (error == 0) 346 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 347 348 return (error); 349 } 350 351 /* 352 * Creates rtentry and nexthop based on @info data. 353 * Return 0 and fills in rtentry into @prt on success, 354 * return errno otherwise. 355 */ 356 static int 357 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 358 struct rtentry **prt) 359 { 360 struct sockaddr *dst, *ndst, *gateway, *netmask; 361 struct rtentry *rt; 362 struct nhop_object *nh; 363 struct ifaddr *ifa; 364 int error, flags; 365 366 dst = info->rti_info[RTAX_DST]; 367 gateway = info->rti_info[RTAX_GATEWAY]; 368 netmask = info->rti_info[RTAX_NETMASK]; 369 flags = info->rti_flags; 370 371 if ((flags & RTF_GATEWAY) && !gateway) 372 return (EINVAL); 373 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 374 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 375 return (EINVAL); 376 377 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 378 return (EINVAL); 379 380 if (info->rti_ifa == NULL) { 381 error = rt_getifa_fib(info, rnh->rib_fibnum); 382 if (error) 383 return (error); 384 } else { 385 ifa_ref(info->rti_ifa); 386 } 387 388 error = nhop_create_from_info(rnh, info, &nh); 389 if (error != 0) { 390 ifa_free(info->rti_ifa); 391 return (error); 392 } 393 394 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 395 if (rt == NULL) { 396 ifa_free(info->rti_ifa); 397 nhop_free(nh); 398 return (ENOBUFS); 399 } 400 rt->rte_flags = RTF_UP | flags; 401 rt->rt_nhop = nh; 402 403 /* Fill in dst */ 404 memcpy(&rt->rt_dst, dst, dst->sa_len); 405 rt_key(rt) = &rt->rt_dst; 406 407 /* 408 * point to the (possibly newly malloc'd) dest address. 409 */ 410 ndst = (struct sockaddr *)rt_key(rt); 411 412 /* 413 * make sure it contains the value we want (masked if needed). 414 */ 415 if (netmask) { 416 rt_maskedcopy(dst, ndst, netmask); 417 } else 418 bcopy(dst, ndst, dst->sa_len); 419 420 /* 421 * We use the ifa reference returned by rt_getifa_fib(). 422 * This moved from below so that rnh->rnh_addaddr() can 423 * examine the ifa and ifa->ifa_ifp if it so desires. 424 */ 425 ifa = info->rti_ifa; 426 rt->rt_weight = 1; 427 428 rt_setmetrics(info, rt); 429 430 *prt = rt; 431 return (0); 432 } 433 434 static int 435 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 436 struct rib_cmd_info *rc) 437 { 438 struct nhop_object *nh_orig; 439 struct route_nhop_data rnd; 440 struct nhop_object *nh; 441 struct rtentry *rt, *rt_orig; 442 int error; 443 444 error = create_rtentry(rnh, info, &rt); 445 if (error != 0) 446 return (error); 447 448 rnd.rnd_nhop = rt->rt_nhop; 449 rnd.rnd_weight = rt->rt_weight; 450 nh = rt->rt_nhop; 451 452 RIB_WLOCK(rnh); 453 #ifdef RADIX_MPATH 454 struct sockaddr *netmask; 455 netmask = info->rti_info[RTAX_NETMASK]; 456 /* do not permit exactly the same dst/mask/gw pair */ 457 if (rt_mpath_capable(rnh) && 458 rt_mpath_conflict(rnh, rt, netmask)) { 459 RIB_WUNLOCK(rnh); 460 461 nhop_free(nh); 462 uma_zfree(V_rtzone, rt); 463 return (EEXIST); 464 } 465 #endif 466 error = add_route_nhop(rnh, rt, info, &rnd, rc); 467 if (error == 0) { 468 RIB_WUNLOCK(rnh); 469 return (0); 470 } 471 472 /* addition failed. Lookup prefix in the rib to determine the cause */ 473 rt_orig = lookup_prefix(rnh, info, &rnd); 474 if (rt_orig == NULL) { 475 /* No prefix -> rnh_addaddr() failed to allocate memory */ 476 RIB_WUNLOCK(rnh); 477 nhop_free(nh); 478 uma_zfree(V_rtzone, rt); 479 return (ENOMEM); 480 } 481 482 /* We have existing route in the RIB. */ 483 nh_orig = rnd.rnd_nhop; 484 /* Check if new route has higher preference */ 485 if (can_override_nhop(info, nh_orig) > 0) { 486 /* Update nexthop to the new route */ 487 change_route_nhop(rnh, rt_orig, info, &rnd, rc); 488 RIB_WUNLOCK(rnh); 489 uma_zfree(V_rtzone, rt); 490 nhop_free(nh_orig); 491 return (0); 492 } 493 494 RIB_WUNLOCK(rnh); 495 496 /* Unable to add - another route with the same preference exists */ 497 error = EEXIST; 498 499 nhop_free(nh); 500 uma_zfree(V_rtzone, rt); 501 502 return (error); 503 } 504 505 /* 506 * Removes route defined by @info from the kernel table specified by @fibnum and 507 * sa_family in @info->rti_info[RTAX_DST]. 508 * 509 * Returns 0 on success and fills in operation metadata into @rc. 510 */ 511 int 512 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 513 { 514 struct rib_head *rnh; 515 struct sockaddr *dst_orig, *netmask; 516 struct sockaddr_storage mdst; 517 int error; 518 519 NET_EPOCH_ASSERT(); 520 521 rnh = get_rnh(fibnum, info); 522 if (rnh == NULL) 523 return (EAFNOSUPPORT); 524 525 bzero(rc, sizeof(struct rib_cmd_info)); 526 rc->rc_cmd = RTM_DELETE; 527 528 dst_orig = info->rti_info[RTAX_DST]; 529 netmask = info->rti_info[RTAX_NETMASK]; 530 531 if (netmask != NULL) { 532 /* Ensure @dst is always properly masked */ 533 if (dst_orig->sa_len > sizeof(mdst)) 534 return (EINVAL); 535 rt_maskedcopy(dst_orig, (struct sockaddr *)&mdst, netmask); 536 info->rti_info[RTAX_DST] = (struct sockaddr *)&mdst; 537 } 538 error = del_route(rnh, info, rc); 539 info->rti_info[RTAX_DST] = dst_orig; 540 541 return (error); 542 } 543 544 /* 545 * Conditionally unlinks rtentry matching data inside @info from @rnh. 546 * Returns 0 on success with operation result stored in @rc. 547 * On error, returns: 548 * ESRCH - if prefix was not found, 549 * EADDRINUSE - if trying to delete higher priority route. 550 * ENOENT - if supplied filter function returned 0 (not matched). 551 */ 552 static int 553 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info *rc) 554 { 555 struct rtentry *rt; 556 struct nhop_object *nh; 557 struct radix_node *rn; 558 struct route_nhop_data rnd; 559 int error; 560 561 rt = lookup_prefix(rnh, info, &rnd); 562 if (rt == NULL) 563 return (ESRCH); 564 565 nh = rt->rt_nhop; 566 567 error = check_info_match_nhop(info, rt, nh); 568 if (error != 0) 569 return (error); 570 571 if (can_override_nhop(info, nh) < 0) 572 return (EADDRINUSE); 573 574 /* 575 * Remove the item from the tree and return it. 576 * Complain if it is not there and do no more processing. 577 */ 578 #ifdef RADIX_MPATH 579 info->rti_info[RTAX_GATEWAY] = &nh->gw_sa; 580 if (rt_mpath_capable(rnh)) { 581 rn = rt_mpath_unlink(rnh, info, rt, &error); 582 if (error != 0) 583 return (error); 584 } else 585 #endif 586 rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST], 587 info->rti_info[RTAX_NETMASK], &rnh->head); 588 if (rn == NULL) 589 return (ESRCH); 590 591 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 592 panic ("rtrequest delete"); 593 594 rt = RNTORT(rn); 595 rt->rte_flags &= ~RTF_UP; 596 597 /* Finalize notification */ 598 rnh->rnh_gen++; 599 rc->rc_cmd = RTM_DELETE; 600 rc->rc_rt = rt; 601 rc->rc_nh_old = rt->rt_nhop; 602 rc->rc_nh_weight = rt->rt_weight; 603 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 604 605 return (0); 606 } 607 608 static int 609 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 610 struct rib_cmd_info *rc) 611 { 612 int error; 613 614 RIB_WLOCK(rnh); 615 error = rt_unlinkrte(rnh, info, rc); 616 RIB_WUNLOCK(rnh); 617 if (error != 0) 618 return (error); 619 620 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 621 622 /* 623 * If the caller wants it, then it can have it, 624 * the entry will be deleted after the end of the current epoch. 625 */ 626 rtfree(rc->rc_rt); 627 628 return (0); 629 } 630 631 int 632 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 633 struct rib_cmd_info *rc) 634 { 635 RIB_RLOCK_TRACKER; 636 struct route_nhop_data rnd_orig; 637 struct rib_head *rnh; 638 struct rtentry *rt; 639 int error; 640 641 NET_EPOCH_ASSERT(); 642 643 rnh = get_rnh(fibnum, info); 644 if (rnh == NULL) 645 return (EAFNOSUPPORT); 646 647 bzero(rc, sizeof(struct rib_cmd_info)); 648 rc->rc_cmd = RTM_CHANGE; 649 650 /* Check if updated gateway exists */ 651 if ((info->rti_flags & RTF_GATEWAY) && 652 (info->rti_info[RTAX_GATEWAY] == NULL)) 653 return (EINVAL); 654 655 /* 656 * route change is done in multiple steps, with dropping and 657 * reacquiring lock. In the situations with multiple processes 658 * changes the same route in can lead to the case when route 659 * is changed between the steps. Address it by retrying the operation 660 * multiple times before failing. 661 */ 662 663 RIB_RLOCK(rnh); 664 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 665 info->rti_info[RTAX_NETMASK], &rnh->head); 666 667 if (rt == NULL) { 668 RIB_RUNLOCK(rnh); 669 return (ESRCH); 670 } 671 672 #ifdef RADIX_MPATH 673 /* 674 * If we got multipath routes, 675 * we require users to specify a matching RTAX_GATEWAY. 676 */ 677 if (rt_mpath_capable(rnh)) { 678 rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); 679 if (rt == NULL) { 680 RIB_RUNLOCK(rnh); 681 return (ESRCH); 682 } 683 } 684 #endif 685 rnd_orig.rnd_nhop = rt->rt_nhop; 686 rnd_orig.rnd_weight = rt->rt_weight; 687 688 RIB_RUNLOCK(rnh); 689 690 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 691 error = change_route(rnh, info, &rnd_orig, rc); 692 if (error != EAGAIN) 693 break; 694 } 695 696 return (error); 697 } 698 699 static int 700 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 701 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 702 { 703 int error = 0; 704 int free_ifa = 0; 705 struct nhop_object *nh, *nh_orig; 706 struct route_nhop_data rnd_new; 707 708 nh = NULL; 709 nh_orig = rnd_orig->rnd_nhop; 710 if (nh_orig == NULL) 711 return (ESRCH); 712 713 /* 714 * New gateway could require new ifaddr, ifp; 715 * flags may also be different; ifp may be specified 716 * by ll sockaddr when protocol address is ambiguous 717 */ 718 if (((nh_orig->nh_flags & NHF_GATEWAY) && 719 info->rti_info[RTAX_GATEWAY] != NULL) || 720 info->rti_info[RTAX_IFP] != NULL || 721 (info->rti_info[RTAX_IFA] != NULL && 722 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 723 error = rt_getifa_fib(info, rnh->rib_fibnum); 724 if (info->rti_ifa != NULL) 725 free_ifa = 1; 726 727 if (error != 0) { 728 if (free_ifa) { 729 ifa_free(info->rti_ifa); 730 info->rti_ifa = NULL; 731 } 732 733 return (error); 734 } 735 } 736 737 error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); 738 if (free_ifa) { 739 ifa_free(info->rti_ifa); 740 info->rti_ifa = NULL; 741 } 742 if (error != 0) 743 return (error); 744 745 rnd_new.rnd_nhop = nh; 746 if (info->rti_mflags & RTV_WEIGHT) 747 rnd_new.rnd_weight = info->rti_rmx->rmx_weight; 748 else 749 rnd_new.rnd_weight = rnd_orig->rnd_weight; 750 751 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 752 753 return (error); 754 } 755 756 /* 757 * Insert @rt with nhop data from @rnd_new to @rnh. 758 * Returns 0 on success and stores operation results in @rc. 759 */ 760 static int 761 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 762 struct rt_addrinfo *info, struct route_nhop_data *rnd, 763 struct rib_cmd_info *rc) 764 { 765 struct sockaddr *ndst, *netmask; 766 struct radix_node *rn; 767 int error = 0; 768 769 RIB_WLOCK_ASSERT(rnh); 770 771 ndst = (struct sockaddr *)rt_key(rt); 772 netmask = info->rti_info[RTAX_NETMASK]; 773 774 rt->rt_nhop = rnd->rnd_nhop; 775 rt->rt_weight = rnd->rnd_weight; 776 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 777 778 if (rn != NULL) { 779 if (rt->rt_expire > 0) 780 tmproutes_update(rnh, rt); 781 782 /* Finalize notification */ 783 rnh->rnh_gen++; 784 785 rc->rc_cmd = RTM_ADD; 786 rc->rc_rt = rt; 787 rc->rc_nh_old = NULL; 788 rc->rc_nh_new = rnd->rnd_nhop; 789 rc->rc_nh_weight = rnd->rnd_weight; 790 791 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 792 } else { 793 /* Existing route or memory allocation failure */ 794 error = EEXIST; 795 } 796 797 return (error); 798 } 799 800 /* 801 * Switch @rt nhop/weigh to the ones specified in @rnd. 802 * Conditionally set rt_expire if set in @info. 803 * Returns 0 on success. 804 */ 805 static int 806 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 807 struct rt_addrinfo *info, struct route_nhop_data *rnd, 808 struct rib_cmd_info *rc) 809 { 810 struct nhop_object *nh_orig; 811 812 RIB_WLOCK_ASSERT(rnh); 813 814 nh_orig = rt->rt_nhop; 815 816 if (rnd->rnd_nhop != NULL) { 817 /* Changing expiration & nexthop & weight to a new one */ 818 rt_setmetrics(info, rt); 819 rt->rt_nhop = rnd->rnd_nhop; 820 rt->rt_weight = rnd->rnd_weight; 821 if (rt->rt_expire > 0) 822 tmproutes_update(rnh, rt); 823 } else { 824 /* Route deletion requested. */ 825 struct sockaddr *ndst, *netmask; 826 struct radix_node *rn; 827 828 ndst = (struct sockaddr *)rt_key(rt); 829 netmask = info->rti_info[RTAX_NETMASK]; 830 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 831 if (rn == NULL) 832 return (ESRCH); 833 } 834 835 /* Finalize notification */ 836 rnh->rnh_gen++; 837 838 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 839 rc->rc_rt = rt; 840 rc->rc_nh_old = nh_orig; 841 rc->rc_nh_new = rnd->rnd_nhop; 842 rc->rc_nh_weight = rnd->rnd_weight; 843 844 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 845 846 return (0); 847 } 848 849 /* 850 * Conditionally update route nhop/weight IFF data in @nhd_orig is 851 * consistent with the current route data. 852 * Nexthop in @nhd_new is consumed. 853 */ 854 int 855 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 856 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 857 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 858 { 859 struct rtentry *rt_new; 860 int error = 0; 861 862 RIB_WLOCK(rnh); 863 864 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 865 info->rti_info[RTAX_NETMASK], &rnh->head); 866 867 if (rt_new == NULL) { 868 if (rnd_orig->rnd_nhop == NULL) 869 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 870 else { 871 /* 872 * Prefix does not exist, which was not our assumption. 873 * Update @rnd_orig with the new data and return 874 */ 875 rnd_orig->rnd_nhop = NULL; 876 rnd_orig->rnd_weight = 0; 877 error = EAGAIN; 878 } 879 } else { 880 /* Prefix exists, try to update */ 881 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 882 /* 883 * Nhop/mpath group hasn't changed. Flip 884 * to the new precalculated one and return 885 */ 886 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 887 } else { 888 /* Update and retry */ 889 rnd_orig->rnd_nhop = rt_new->rt_nhop; 890 rnd_orig->rnd_weight = rt_new->rt_weight; 891 error = EAGAIN; 892 } 893 } 894 895 RIB_WUNLOCK(rnh); 896 897 if (error == 0) { 898 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 899 900 if (rnd_orig->rnd_nhop != NULL) 901 nhop_free_any(rnd_orig->rnd_nhop); 902 903 } else { 904 if (rnd_new->rnd_nhop != NULL) 905 nhop_free_any(rnd_new->rnd_nhop); 906 } 907 908 return (error); 909 } 910 911 /* 912 * Performs modification of routing table specificed by @action. 913 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 914 * Needs to be run in network epoch. 915 * 916 * Returns 0 on success and fills in @rc with action result. 917 */ 918 int 919 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 920 struct rib_cmd_info *rc) 921 { 922 int error; 923 924 switch (action) { 925 case RTM_ADD: 926 error = rib_add_route(fibnum, info, rc); 927 break; 928 case RTM_DELETE: 929 error = rib_del_route(fibnum, info, rc); 930 break; 931 case RTM_CHANGE: 932 error = rib_change_route(fibnum, info, rc); 933 break; 934 default: 935 error = ENOTSUP; 936 } 937 938 return (error); 939 } 940 941 struct rt_delinfo 942 { 943 struct rt_addrinfo info; 944 struct rib_head *rnh; 945 struct rtentry *head; 946 struct rib_cmd_info rc; 947 }; 948 949 /* 950 * Conditionally unlinks @rn from radix tree based 951 * on info data passed in @arg. 952 */ 953 static int 954 rt_checkdelroute(struct radix_node *rn, void *arg) 955 { 956 struct rt_delinfo *di; 957 struct rt_addrinfo *info; 958 struct rtentry *rt; 959 int error; 960 961 di = (struct rt_delinfo *)arg; 962 rt = (struct rtentry *)rn; 963 info = &di->info; 964 965 info->rti_info[RTAX_DST] = rt_key(rt); 966 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 967 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 968 969 error = rt_unlinkrte(di->rnh, info, &di->rc); 970 971 /* 972 * Add deleted rtentries to the list to GC them 973 * after dropping the lock. 974 * 975 * XXX: Delayed notifications not implemented 976 * for nexthop updates. 977 */ 978 if (error == 0) { 979 /* Add to the list and return */ 980 rt->rt_chain = di->head; 981 di->head = rt; 982 } 983 984 return (0); 985 } 986 987 /* 988 * Iterates over a routing table specified by @fibnum and @family and 989 * deletes elements marked by @filter_f. 990 * @fibnum: rtable id 991 * @family: AF_ address family 992 * @filter_f: function returning non-zero value for items to delete 993 * @arg: data to pass to the @filter_f function 994 * @report: true if rtsock notification is needed. 995 */ 996 void 997 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) 998 { 999 struct rib_head *rnh; 1000 struct rt_delinfo di; 1001 struct rtentry *rt; 1002 struct epoch_tracker et; 1003 1004 rnh = rt_tables_get_rnh(fibnum, family); 1005 if (rnh == NULL) 1006 return; 1007 1008 bzero(&di, sizeof(di)); 1009 di.info.rti_filter = filter_f; 1010 di.info.rti_filterdata = arg; 1011 di.rnh = rnh; 1012 di.rc.rc_cmd = RTM_DELETE; 1013 1014 NET_EPOCH_ENTER(et); 1015 1016 RIB_WLOCK(rnh); 1017 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1018 RIB_WUNLOCK(rnh); 1019 1020 /* We might have something to reclaim. */ 1021 bzero(&di.rc, sizeof(di.rc)); 1022 di.rc.rc_cmd = RTM_DELETE; 1023 while (di.head != NULL) { 1024 rt = di.head; 1025 di.head = rt->rt_chain; 1026 rt->rt_chain = NULL; 1027 1028 di.rc.rc_rt = rt; 1029 di.rc.rc_nh_old = rt->rt_nhop; 1030 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1031 1032 /* TODO std rt -> rt_addrinfo export */ 1033 di.info.rti_info[RTAX_DST] = rt_key(rt); 1034 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 1035 1036 if (report) 1037 rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, 1038 fibnum); 1039 rtfree(rt); 1040 } 1041 1042 NET_EPOCH_EXIT(et); 1043 } 1044 1045 static void 1046 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 1047 struct rib_cmd_info *rc) 1048 { 1049 struct rib_subscription *rs; 1050 1051 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 1052 if (rs->type == type) 1053 rs->func(rnh, rc, rs->arg); 1054 } 1055 } 1056 1057 static struct rib_subscription * 1058 allocate_subscription(rib_subscription_cb_t *f, void *arg, 1059 enum rib_subscription_type type, bool waitok) 1060 { 1061 struct rib_subscription *rs; 1062 int flags = M_ZERO | (waitok ? M_WAITOK : 0); 1063 1064 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 1065 if (rs == NULL) 1066 return (NULL); 1067 1068 rs->func = f; 1069 rs->arg = arg; 1070 rs->type = type; 1071 1072 return (rs); 1073 } 1074 1075 /* 1076 * Subscribe for the changes in the routing table specified by @fibnum and 1077 * @family. 1078 * 1079 * Returns pointer to the subscription structure on success. 1080 */ 1081 struct rib_subscription * 1082 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 1083 enum rib_subscription_type type, bool waitok) 1084 { 1085 struct rib_head *rnh; 1086 struct rib_subscription *rs; 1087 struct epoch_tracker et; 1088 1089 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1090 return (NULL); 1091 1092 NET_EPOCH_ENTER(et); 1093 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1094 rnh = rt_tables_get_rnh(fibnum, family); 1095 1096 RIB_WLOCK(rnh); 1097 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 1098 RIB_WUNLOCK(rnh); 1099 NET_EPOCH_EXIT(et); 1100 1101 return (rs); 1102 } 1103 1104 struct rib_subscription * 1105 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 1106 enum rib_subscription_type type, bool waitok) 1107 { 1108 struct rib_subscription *rs; 1109 struct epoch_tracker et; 1110 1111 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 1112 return (NULL); 1113 1114 NET_EPOCH_ENTER(et); 1115 RIB_WLOCK(rnh); 1116 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 1117 RIB_WUNLOCK(rnh); 1118 NET_EPOCH_EXIT(et); 1119 1120 return (rs); 1121 } 1122 1123 /* 1124 * Remove rtable subscription @rs from the table specified by @fibnum 1125 * and @family. 1126 * Needs to be run in network epoch. 1127 * 1128 * Returns 0 on success. 1129 */ 1130 int 1131 rib_unsibscribe(uint32_t fibnum, int family, struct rib_subscription *rs) 1132 { 1133 struct rib_head *rnh; 1134 1135 NET_EPOCH_ASSERT(); 1136 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1137 rnh = rt_tables_get_rnh(fibnum, family); 1138 1139 if (rnh == NULL) 1140 return (ENOENT); 1141 1142 RIB_WLOCK(rnh); 1143 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1144 RIB_WUNLOCK(rnh); 1145 1146 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1147 &rs->epoch_ctx); 1148 1149 return (0); 1150 } 1151 1152 /* 1153 * Epoch callback indicating subscription is safe to destroy 1154 */ 1155 static void 1156 destroy_subscription_epoch(epoch_context_t ctx) 1157 { 1158 struct rib_subscription *rs; 1159 1160 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1161 1162 free(rs, M_RTABLE); 1163 } 1164 1165 void 1166 rib_init_subscriptions(struct rib_head *rnh) 1167 { 1168 1169 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1170 } 1171 1172 void 1173 rib_destroy_subscriptions(struct rib_head *rnh) 1174 { 1175 struct rib_subscription *rs; 1176 struct epoch_tracker et; 1177 1178 NET_EPOCH_ENTER(et); 1179 RIB_WLOCK(rnh); 1180 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1181 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1182 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1183 &rs->epoch_ctx); 1184 } 1185 RIB_WUNLOCK(rnh); 1186 NET_EPOCH_EXIT(et); 1187 } 1188