1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_mpath.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 57 #ifdef RADIX_MPATH 58 #include <net/radix_mpath.h> 59 #endif 60 61 #include <vm/uma.h> 62 63 /* 64 * This file contains control plane routing tables functions. 65 * 66 * All functions assumes they are called in net epoch. 67 */ 68 69 struct rib_subscription { 70 CK_STAILQ_ENTRY(rib_subscription) next; 71 rib_subscription_cb_t *func; 72 void *arg; 73 enum rib_subscription_type type; 74 struct epoch_context epoch_ctx; 75 }; 76 77 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *rnd, 81 struct rib_cmd_info *rc); 82 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 83 struct rib_cmd_info *rc); 84 static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, 85 struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc); 86 static int change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 87 struct rt_addrinfo *info, struct route_nhop_data *rnd, 88 struct rib_cmd_info *rc); 89 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 90 struct rib_cmd_info *rc); 91 92 static void destroy_subscription_epoch(epoch_context_t ctx); 93 94 /* Routing table UMA zone */ 95 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 96 #define V_rtzone VNET(rtzone) 97 98 void 99 vnet_rtzone_init() 100 { 101 102 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 103 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 104 } 105 106 #ifdef VIMAGE 107 void 108 vnet_rtzone_destroy() 109 { 110 111 uma_zdestroy(V_rtzone); 112 } 113 #endif 114 115 static void 116 destroy_rtentry(struct rtentry *rt) 117 { 118 119 /* 120 * At this moment rnh, nh_control may be already freed. 121 * nhop interface may have been migrated to a different vnet. 122 * Use vnet stored in the nexthop to delete the entry. 123 */ 124 CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); 125 126 /* Unreference nexthop */ 127 nhop_free(rt->rt_nhop); 128 129 uma_zfree(V_rtzone, rt); 130 131 CURVNET_RESTORE(); 132 } 133 134 /* 135 * Epoch callback indicating rtentry is safe to destroy 136 */ 137 static void 138 destroy_rtentry_epoch(epoch_context_t ctx) 139 { 140 struct rtentry *rt; 141 142 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 143 144 destroy_rtentry(rt); 145 } 146 147 /* 148 * Schedule rtentry deletion 149 */ 150 static void 151 rtfree(struct rtentry *rt) 152 { 153 154 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 155 156 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 157 &rt->rt_epoch_ctx); 158 } 159 160 static struct rib_head * 161 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 162 { 163 struct rib_head *rnh; 164 struct sockaddr *dst; 165 166 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 167 168 dst = info->rti_info[RTAX_DST]; 169 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 170 171 return (rnh); 172 } 173 174 /* 175 * Adds route defined by @info into the kernel table specified by @fibnum and 176 * sa_family in @info->rti_info[RTAX_DST]. 177 * 178 * Returns 0 on success and fills in operation metadata into @rc. 179 */ 180 int 181 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 182 struct rib_cmd_info *rc) 183 { 184 struct rib_head *rnh; 185 186 NET_EPOCH_ASSERT(); 187 188 rnh = get_rnh(fibnum, info); 189 if (rnh == NULL) 190 return (EAFNOSUPPORT); 191 192 /* 193 * Check consistency between RTF_HOST flag and netmask 194 * existence. 195 */ 196 if (info->rti_flags & RTF_HOST) 197 info->rti_info[RTAX_NETMASK] = NULL; 198 else if (info->rti_info[RTAX_NETMASK] == NULL) 199 return (EINVAL); 200 201 bzero(rc, sizeof(struct rib_cmd_info)); 202 rc->rc_cmd = RTM_ADD; 203 204 return (add_route(rnh, info, rc)); 205 } 206 207 /* 208 * Creates rtentry and nexthop based on @info data. 209 * Return 0 and fills in rtentry into @prt on success, 210 * return errno otherwise. 211 */ 212 static int 213 create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info, 214 struct rtentry **prt) 215 { 216 struct sockaddr *dst, *ndst, *gateway, *netmask; 217 struct rtentry *rt; 218 struct nhop_object *nh; 219 struct ifaddr *ifa; 220 int error, flags; 221 222 dst = info->rti_info[RTAX_DST]; 223 gateway = info->rti_info[RTAX_GATEWAY]; 224 netmask = info->rti_info[RTAX_NETMASK]; 225 flags = info->rti_flags; 226 227 if ((flags & RTF_GATEWAY) && !gateway) 228 return (EINVAL); 229 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 230 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 231 return (EINVAL); 232 233 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 234 return (EINVAL); 235 236 if (info->rti_ifa == NULL) { 237 error = rt_getifa_fib(info, rnh->rib_fibnum); 238 if (error) 239 return (error); 240 } else { 241 ifa_ref(info->rti_ifa); 242 } 243 244 error = nhop_create_from_info(rnh, info, &nh); 245 if (error != 0) { 246 ifa_free(info->rti_ifa); 247 return (error); 248 } 249 250 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 251 if (rt == NULL) { 252 ifa_free(info->rti_ifa); 253 nhop_free(nh); 254 return (ENOBUFS); 255 } 256 rt->rte_flags = RTF_UP | flags; 257 rt->rt_nhop = nh; 258 259 /* Fill in dst */ 260 memcpy(&rt->rt_dst, dst, dst->sa_len); 261 rt_key(rt) = &rt->rt_dst; 262 263 /* 264 * point to the (possibly newly malloc'd) dest address. 265 */ 266 ndst = (struct sockaddr *)rt_key(rt); 267 268 /* 269 * make sure it contains the value we want (masked if needed). 270 */ 271 if (netmask) { 272 rt_maskedcopy(dst, ndst, netmask); 273 } else 274 bcopy(dst, ndst, dst->sa_len); 275 276 /* 277 * We use the ifa reference returned by rt_getifa_fib(). 278 * This moved from below so that rnh->rnh_addaddr() can 279 * examine the ifa and ifa->ifa_ifp if it so desires. 280 */ 281 ifa = info->rti_ifa; 282 rt->rt_weight = 1; 283 284 rt_setmetrics(info, rt); 285 286 *prt = rt; 287 return (0); 288 } 289 290 static int 291 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 292 struct rib_cmd_info *rc) 293 { 294 struct sockaddr *ndst, *netmask; 295 struct route_nhop_data rnd; 296 struct nhop_object *nh; 297 struct rtentry *rt; 298 int error; 299 300 error = create_rtentry(rnh, info, &rt); 301 if (error != 0) 302 return (error); 303 304 rnd.rnd_nhop = rt->rt_nhop; 305 rnd.rnd_weight = rt->rt_weight; 306 nh = rt->rt_nhop; 307 308 RIB_WLOCK(rnh); 309 #ifdef RADIX_MPATH 310 netmask = info->rti_info[RTAX_NETMASK]; 311 /* do not permit exactly the same dst/mask/gw pair */ 312 if (rt_mpath_capable(rnh) && 313 rt_mpath_conflict(rnh, rt, netmask)) { 314 RIB_WUNLOCK(rnh); 315 316 nhop_free(nh); 317 uma_zfree(V_rtzone, rt); 318 return (EEXIST); 319 } 320 #endif 321 error = add_route_nhop(rnh, rt, info, &rnd, rc); 322 if (error == 0) { 323 rt = NULL; 324 nh = NULL; 325 } else if ((error == EEXIST) && ((info->rti_flags & RTF_PINNED) != 0)) { 326 struct rtentry *rt_orig; 327 struct nhop_object *nh_orig; 328 struct radix_node *rn; 329 330 ndst = (struct sockaddr *)rt_key(rt); 331 netmask = info->rti_info[RTAX_NETMASK]; 332 rn = rnh->rnh_lookup(ndst, netmask, &rnh->head); 333 rt_orig = (struct rtentry *)rn; 334 if (rt_orig != NULL) { 335 nh_orig = rt_orig->rt_nhop; 336 if ((nhop_get_rtflags(nh_orig) & RTF_PINNED) == 0) { 337 /* Current nexhop is not PINNED, can update */ 338 error = change_route_nhop(rnh, rt_orig, 339 info, &rnd, rc); 340 if (error == 0) 341 nh = NULL; 342 } 343 } else 344 error = ENOBUFS; 345 } 346 RIB_WUNLOCK(rnh); 347 348 if (error == 0) 349 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 350 351 if (nh != NULL) 352 nhop_free(nh); 353 if (rt != NULL) 354 uma_zfree(V_rtzone, rt); 355 356 return (error); 357 } 358 359 /* 360 * Removes route defined by @info from the kernel table specified by @fibnum and 361 * sa_family in @info->rti_info[RTAX_DST]. 362 * 363 * Returns 0 on success and fills in operation metadata into @rc. 364 */ 365 int 366 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 367 { 368 struct rib_head *rnh; 369 370 NET_EPOCH_ASSERT(); 371 372 rnh = get_rnh(fibnum, info); 373 if (rnh == NULL) 374 return (EAFNOSUPPORT); 375 376 bzero(rc, sizeof(struct rib_cmd_info)); 377 rc->rc_cmd = RTM_DELETE; 378 379 return (del_route(rnh, info, rc)); 380 } 381 382 /* 383 * Conditionally unlinks rtentry matching data inside @info from @rnh. 384 * Returns unlinked, locked and referenced @rtentry on success, 385 * Returns NULL and sets @perror to: 386 * ESRCH - if prefix was not found, 387 * EADDRINUSE - if trying to delete PINNED route without appropriate flag. 388 * ENOENT - if supplied filter function returned 0 (not matched). 389 */ 390 struct rtentry * 391 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) 392 { 393 struct sockaddr *dst, *netmask; 394 struct rtentry *rt; 395 struct nhop_object *nh; 396 struct radix_node *rn; 397 398 dst = info->rti_info[RTAX_DST]; 399 netmask = info->rti_info[RTAX_NETMASK]; 400 401 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 402 if (rt == NULL) { 403 *perror = ESRCH; 404 return (NULL); 405 } 406 407 nh = rt->rt_nhop; 408 409 if ((info->rti_flags & RTF_PINNED) == 0) { 410 /* Check if target route can be deleted */ 411 if (NH_IS_PINNED(nh)) { 412 *perror = EADDRINUSE; 413 return (NULL); 414 } 415 } 416 417 if (info->rti_filter != NULL) { 418 if (info->rti_filter(rt, nh, info->rti_filterdata)==0){ 419 /* Not matched */ 420 *perror = ENOENT; 421 return (NULL); 422 } 423 424 /* 425 * Filter function requested rte deletion. 426 * Ease the caller work by filling in remaining info 427 * from that particular entry. 428 */ 429 info->rti_info[RTAX_GATEWAY] = &nh->gw_sa; 430 } 431 432 /* 433 * Remove the item from the tree and return it. 434 * Complain if it is not there and do no more processing. 435 */ 436 *perror = ESRCH; 437 #ifdef RADIX_MPATH 438 if (rt_mpath_capable(rnh)) 439 rn = rt_mpath_unlink(rnh, info, rt, perror); 440 else 441 #endif 442 rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); 443 if (rn == NULL) 444 return (NULL); 445 446 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 447 panic ("rtrequest delete"); 448 449 rt = RNTORT(rn); 450 rt->rte_flags &= ~RTF_UP; 451 452 *perror = 0; 453 454 return (rt); 455 } 456 457 static int 458 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 459 struct rib_cmd_info *rc) 460 { 461 struct sockaddr *dst, *netmask; 462 struct sockaddr_storage mdst; 463 struct rtentry *rt; 464 int error; 465 466 dst = info->rti_info[RTAX_DST]; 467 netmask = info->rti_info[RTAX_NETMASK]; 468 469 if (netmask) { 470 if (dst->sa_len > sizeof(mdst)) 471 return (EINVAL); 472 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 473 dst = (struct sockaddr *)&mdst; 474 } 475 476 RIB_WLOCK(rnh); 477 rt = rt_unlinkrte(rnh, info, &error); 478 if (rt != NULL) { 479 /* Finalize notification */ 480 rnh->rnh_gen++; 481 rc->rc_rt = rt; 482 rc->rc_nh_old = rt->rt_nhop; 483 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 484 } 485 RIB_WUNLOCK(rnh); 486 if (error != 0) 487 return (error); 488 489 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 490 491 /* 492 * If the caller wants it, then it can have it, 493 * the entry will be deleted after the end of the current epoch. 494 */ 495 rtfree(rt); 496 497 return (0); 498 } 499 500 int 501 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 502 struct rib_cmd_info *rc) 503 { 504 RIB_RLOCK_TRACKER; 505 struct route_nhop_data rnd_orig; 506 struct rib_head *rnh; 507 struct rtentry *rt; 508 int error; 509 510 NET_EPOCH_ASSERT(); 511 512 rnh = get_rnh(fibnum, info); 513 if (rnh == NULL) 514 return (EAFNOSUPPORT); 515 516 bzero(rc, sizeof(struct rib_cmd_info)); 517 rc->rc_cmd = RTM_CHANGE; 518 519 /* Check if updated gateway exists */ 520 if ((info->rti_flags & RTF_GATEWAY) && 521 (info->rti_info[RTAX_GATEWAY] == NULL)) 522 return (EINVAL); 523 524 /* 525 * route change is done in multiple steps, with dropping and 526 * reacquiring lock. In the situations with multiple processes 527 * changes the same route in can lead to the case when route 528 * is changed between the steps. Address it by retrying the operation 529 * multiple times before failing. 530 */ 531 532 RIB_RLOCK(rnh); 533 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 534 info->rti_info[RTAX_NETMASK], &rnh->head); 535 536 if (rt == NULL) { 537 RIB_RUNLOCK(rnh); 538 return (ESRCH); 539 } 540 541 #ifdef RADIX_MPATH 542 /* 543 * If we got multipath routes, 544 * we require users to specify a matching RTAX_GATEWAY. 545 */ 546 if (rt_mpath_capable(rnh)) { 547 rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); 548 if (rt == NULL) { 549 RIB_RUNLOCK(rnh); 550 return (ESRCH); 551 } 552 } 553 #endif 554 rnd_orig.rnd_nhop = rt->rt_nhop; 555 rnd_orig.rnd_weight = rt->rt_weight; 556 557 RIB_RUNLOCK(rnh); 558 559 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 560 error = change_route(rnh, info, &rnd_orig, rc); 561 if (error != EAGAIN) 562 break; 563 } 564 565 return (error); 566 } 567 568 static int 569 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 570 struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc) 571 { 572 int error = 0; 573 int free_ifa = 0; 574 struct nhop_object *nh, *nh_orig; 575 struct route_nhop_data rnd_new; 576 577 nh = NULL; 578 nh_orig = rnd_orig->rnd_nhop; 579 if (nh_orig == NULL) 580 return (ESRCH); 581 582 /* 583 * New gateway could require new ifaddr, ifp; 584 * flags may also be different; ifp may be specified 585 * by ll sockaddr when protocol address is ambiguous 586 */ 587 if (((nh_orig->nh_flags & NHF_GATEWAY) && 588 info->rti_info[RTAX_GATEWAY] != NULL) || 589 info->rti_info[RTAX_IFP] != NULL || 590 (info->rti_info[RTAX_IFA] != NULL && 591 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 592 error = rt_getifa_fib(info, rnh->rib_fibnum); 593 if (info->rti_ifa != NULL) 594 free_ifa = 1; 595 596 if (error != 0) { 597 if (free_ifa) { 598 ifa_free(info->rti_ifa); 599 info->rti_ifa = NULL; 600 } 601 602 return (error); 603 } 604 } 605 606 error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); 607 if (free_ifa) { 608 ifa_free(info->rti_ifa); 609 info->rti_ifa = NULL; 610 } 611 if (error != 0) 612 return (error); 613 614 rnd_new.rnd_nhop = nh; 615 if (info->rti_mflags & RTV_WEIGHT) 616 rnd_new.rnd_weight = info->rti_rmx->rmx_weight; 617 else 618 rnd_new.rnd_weight = rnd_orig->rnd_weight; 619 620 error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc); 621 622 return (error); 623 } 624 625 /* 626 * Insert @rt with nhop data from @rnd_new to @rnh. 627 * Returns 0 on success. 628 */ 629 static int 630 add_route_nhop(struct rib_head *rnh, struct rtentry *rt, 631 struct rt_addrinfo *info, struct route_nhop_data *rnd, 632 struct rib_cmd_info *rc) 633 { 634 struct sockaddr *ndst, *netmask; 635 struct radix_node *rn; 636 int error = 0; 637 638 RIB_WLOCK_ASSERT(rnh); 639 640 ndst = (struct sockaddr *)rt_key(rt); 641 netmask = info->rti_info[RTAX_NETMASK]; 642 643 rt->rt_nhop = rnd->rnd_nhop; 644 rt->rt_weight = rnd->rnd_weight; 645 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 646 647 if (rn != NULL) { 648 if (rt->rt_expire > 0) 649 tmproutes_update(rnh, rt); 650 651 /* Finalize notification */ 652 rnh->rnh_gen++; 653 654 rc->rc_cmd = RTM_ADD; 655 rc->rc_rt = rt; 656 rc->rc_nh_old = NULL; 657 rc->rc_nh_new = rnd->rnd_nhop; 658 rc->rc_nh_weight = rnd->rnd_weight; 659 660 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 661 } else { 662 /* Existing route or memory allocation failure */ 663 error = EEXIST; 664 } 665 666 return (error); 667 } 668 669 /* 670 * Switch @rt nhop/weigh to the ones specified in @rnd. 671 * Conditionally set rt_expire if set in @info. 672 * Returns 0 on success. 673 */ 674 static int 675 change_route_nhop(struct rib_head *rnh, struct rtentry *rt, 676 struct rt_addrinfo *info, struct route_nhop_data *rnd, 677 struct rib_cmd_info *rc) 678 { 679 struct nhop_object *nh_orig; 680 681 RIB_WLOCK_ASSERT(rnh); 682 683 nh_orig = rt->rt_nhop; 684 685 if (rnd->rnd_nhop != NULL) { 686 /* Changing expiration & nexthop & weight to a new one */ 687 rt_setmetrics(info, rt); 688 rt->rt_nhop = rnd->rnd_nhop; 689 rt->rt_weight = rnd->rnd_weight; 690 if (rt->rt_expire > 0) 691 tmproutes_update(rnh, rt); 692 } else { 693 /* Route deletion requested. */ 694 struct sockaddr *ndst, *netmask; 695 struct radix_node *rn; 696 697 ndst = (struct sockaddr *)rt_key(rt); 698 netmask = info->rti_info[RTAX_NETMASK]; 699 rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head); 700 if (rn == NULL) 701 return (ESRCH); 702 } 703 704 /* Finalize notification */ 705 rnh->rnh_gen++; 706 707 rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE; 708 rc->rc_rt = rt; 709 rc->rc_nh_old = nh_orig; 710 rc->rc_nh_new = rnd->rnd_nhop; 711 rc->rc_nh_weight = rnd->rnd_weight; 712 713 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 714 715 return (0); 716 } 717 718 /* 719 * Conditionally update route nhop/weight IFF data in @nhd_orig is 720 * consistent with the current route data. 721 * Nexthop in @nhd_new is consumed. 722 */ 723 int 724 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 725 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 726 struct route_nhop_data *rnd_new, struct rib_cmd_info *rc) 727 { 728 struct rtentry *rt_new; 729 int error = 0; 730 731 RIB_WLOCK(rnh); 732 733 rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 734 info->rti_info[RTAX_NETMASK], &rnh->head); 735 736 if (rt_new == NULL) { 737 if (rnd_orig->rnd_nhop == NULL) 738 error = add_route_nhop(rnh, rt, info, rnd_new, rc); 739 else { 740 /* 741 * Prefix does not exist, which was not our assumption. 742 * Update @rnd_orig with the new data and return 743 */ 744 rnd_orig->rnd_nhop = NULL; 745 rnd_orig->rnd_weight = 0; 746 error = EAGAIN; 747 } 748 } else { 749 /* Prefix exists, try to update */ 750 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 751 /* 752 * Nhop/mpath group hasn't changed. Flip 753 * to the new precalculated one and return 754 */ 755 error = change_route_nhop(rnh, rt_new, info, rnd_new, rc); 756 } else { 757 /* Update and retry */ 758 rnd_orig->rnd_nhop = rt_new->rt_nhop; 759 rnd_orig->rnd_weight = rt_new->rt_weight; 760 error = EAGAIN; 761 } 762 } 763 764 RIB_WUNLOCK(rnh); 765 766 if (error == 0) { 767 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 768 769 if (rnd_orig->rnd_nhop != NULL) 770 nhop_free_any(rnd_orig->rnd_nhop); 771 772 } else { 773 if (rnd_new->rnd_nhop != NULL) 774 nhop_free_any(rnd_new->rnd_nhop); 775 } 776 777 return (error); 778 } 779 780 /* 781 * Performs modification of routing table specificed by @action. 782 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 783 * Needs to be run in network epoch. 784 * 785 * Returns 0 on success and fills in @rc with action result. 786 */ 787 int 788 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 789 struct rib_cmd_info *rc) 790 { 791 int error; 792 793 switch (action) { 794 case RTM_ADD: 795 error = rib_add_route(fibnum, info, rc); 796 break; 797 case RTM_DELETE: 798 error = rib_del_route(fibnum, info, rc); 799 break; 800 case RTM_CHANGE: 801 error = rib_change_route(fibnum, info, rc); 802 break; 803 default: 804 error = ENOTSUP; 805 } 806 807 return (error); 808 } 809 810 struct rt_delinfo 811 { 812 struct rt_addrinfo info; 813 struct rib_head *rnh; 814 struct rtentry *head; 815 struct rib_cmd_info rc; 816 }; 817 818 /* 819 * Conditionally unlinks @rn from radix tree based 820 * on info data passed in @arg. 821 */ 822 static int 823 rt_checkdelroute(struct radix_node *rn, void *arg) 824 { 825 struct rt_delinfo *di; 826 struct rt_addrinfo *info; 827 struct rtentry *rt; 828 int error; 829 830 di = (struct rt_delinfo *)arg; 831 rt = (struct rtentry *)rn; 832 info = &di->info; 833 error = 0; 834 835 info->rti_info[RTAX_DST] = rt_key(rt); 836 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 837 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 838 839 rt = rt_unlinkrte(di->rnh, info, &error); 840 if (rt == NULL) { 841 /* Either not allowed or not matched. Skip entry */ 842 return (0); 843 } 844 845 /* Entry was unlinked. Notify subscribers */ 846 di->rnh->rnh_gen++; 847 di->rc.rc_rt = rt; 848 di->rc.rc_nh_old = rt->rt_nhop; 849 rib_notify(di->rnh, RIB_NOTIFY_IMMEDIATE, &di->rc); 850 851 /* Add to the list and return */ 852 rt->rt_chain = di->head; 853 di->head = rt; 854 855 return (0); 856 } 857 858 /* 859 * Iterates over a routing table specified by @fibnum and @family and 860 * deletes elements marked by @filter_f. 861 * @fibnum: rtable id 862 * @family: AF_ address family 863 * @filter_f: function returning non-zero value for items to delete 864 * @arg: data to pass to the @filter_f function 865 * @report: true if rtsock notification is needed. 866 */ 867 void 868 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) 869 { 870 struct rib_head *rnh; 871 struct rt_delinfo di; 872 struct rtentry *rt; 873 struct epoch_tracker et; 874 875 rnh = rt_tables_get_rnh(fibnum, family); 876 if (rnh == NULL) 877 return; 878 879 bzero(&di, sizeof(di)); 880 di.info.rti_filter = filter_f; 881 di.info.rti_filterdata = arg; 882 di.rnh = rnh; 883 di.rc.rc_cmd = RTM_DELETE; 884 885 NET_EPOCH_ENTER(et); 886 887 RIB_WLOCK(rnh); 888 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 889 RIB_WUNLOCK(rnh); 890 891 /* We might have something to reclaim. */ 892 while (di.head != NULL) { 893 rt = di.head; 894 di.head = rt->rt_chain; 895 rt->rt_chain = NULL; 896 897 di.rc.rc_rt = rt; 898 di.rc.rc_nh_old = rt->rt_nhop; 899 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 900 901 /* TODO std rt -> rt_addrinfo export */ 902 di.info.rti_info[RTAX_DST] = rt_key(rt); 903 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 904 905 if (report) 906 rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, 907 fibnum); 908 rtfree(rt); 909 } 910 911 NET_EPOCH_EXIT(et); 912 } 913 914 static void 915 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 916 struct rib_cmd_info *rc) 917 { 918 struct rib_subscription *rs; 919 920 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 921 if (rs->type == type) 922 rs->func(rnh, rc, rs->arg); 923 } 924 } 925 926 static struct rib_subscription * 927 allocate_subscription(rib_subscription_cb_t *f, void *arg, 928 enum rib_subscription_type type, bool waitok) 929 { 930 struct rib_subscription *rs; 931 int flags = M_ZERO | (waitok ? M_WAITOK : 0); 932 933 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 934 if (rs == NULL) 935 return (NULL); 936 937 rs->func = f; 938 rs->arg = arg; 939 rs->type = type; 940 941 return (rs); 942 } 943 944 /* 945 * Subscribe for the changes in the routing table specified by @fibnum and 946 * @family. 947 * 948 * Returns pointer to the subscription structure on success. 949 */ 950 struct rib_subscription * 951 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 952 enum rib_subscription_type type, bool waitok) 953 { 954 struct rib_head *rnh; 955 struct rib_subscription *rs; 956 struct epoch_tracker et; 957 958 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 959 return (NULL); 960 961 NET_EPOCH_ENTER(et); 962 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 963 rnh = rt_tables_get_rnh(fibnum, family); 964 965 RIB_WLOCK(rnh); 966 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 967 RIB_WUNLOCK(rnh); 968 NET_EPOCH_EXIT(et); 969 970 return (rs); 971 } 972 973 struct rib_subscription * 974 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 975 enum rib_subscription_type type, bool waitok) 976 { 977 struct rib_subscription *rs; 978 struct epoch_tracker et; 979 980 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 981 return (NULL); 982 983 NET_EPOCH_ENTER(et); 984 RIB_WLOCK(rnh); 985 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 986 RIB_WUNLOCK(rnh); 987 NET_EPOCH_EXIT(et); 988 989 return (rs); 990 } 991 992 /* 993 * Remove rtable subscription @rs from the table specified by @fibnum 994 * and @family. 995 * Needs to be run in network epoch. 996 * 997 * Returns 0 on success. 998 */ 999 int 1000 rib_unsibscribe(uint32_t fibnum, int family, struct rib_subscription *rs) 1001 { 1002 struct rib_head *rnh; 1003 1004 NET_EPOCH_ASSERT(); 1005 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 1006 rnh = rt_tables_get_rnh(fibnum, family); 1007 1008 if (rnh == NULL) 1009 return (ENOENT); 1010 1011 RIB_WLOCK(rnh); 1012 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 1013 RIB_WUNLOCK(rnh); 1014 1015 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1016 &rs->epoch_ctx); 1017 1018 return (0); 1019 } 1020 1021 /* 1022 * Epoch callback indicating subscription is safe to destroy 1023 */ 1024 static void 1025 destroy_subscription_epoch(epoch_context_t ctx) 1026 { 1027 struct rib_subscription *rs; 1028 1029 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 1030 1031 free(rs, M_RTABLE); 1032 } 1033 1034 void 1035 rib_init_subscriptions(struct rib_head *rnh) 1036 { 1037 1038 CK_STAILQ_INIT(&rnh->rnh_subscribers); 1039 } 1040 1041 void 1042 rib_destroy_subscriptions(struct rib_head *rnh) 1043 { 1044 struct rib_subscription *rs; 1045 struct epoch_tracker et; 1046 1047 NET_EPOCH_ENTER(et); 1048 RIB_WLOCK(rnh); 1049 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 1050 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 1051 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 1052 &rs->epoch_ctx); 1053 } 1054 RIB_WUNLOCK(rnh); 1055 NET_EPOCH_EXIT(et); 1056 } 1057