1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_mpath.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <net/route/shared.h> 56 #include <netinet/in.h> 57 58 #ifdef RADIX_MPATH 59 #include <net/radix_mpath.h> 60 #endif 61 62 #include <vm/uma.h> 63 64 65 /* 66 * This file contains control plane routing tables functions. 67 * 68 * All functions assumes they are called in net epoch. 69 */ 70 71 struct rib_subscription { 72 CK_STAILQ_ENTRY(rib_subscription) next; 73 rib_subscription_cb_t *func; 74 void *arg; 75 enum rib_subscription_type type; 76 struct epoch_context epoch_ctx; 77 }; 78 79 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 80 struct rib_cmd_info *rc); 81 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 82 struct rib_cmd_info *rc); 83 static int change_route(struct rib_head *, struct rt_addrinfo *, 84 struct rib_cmd_info *rc); 85 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 86 struct rib_cmd_info *rc); 87 88 static void destroy_subscription_epoch(epoch_context_t ctx); 89 90 /* Routing table UMA zone */ 91 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 92 #define V_rtzone VNET(rtzone) 93 94 void 95 vnet_rtzone_init() 96 { 97 98 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 99 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 100 } 101 102 #ifdef VIMAGE 103 void 104 vnet_rtzone_destroy() 105 { 106 107 uma_zdestroy(V_rtzone); 108 } 109 #endif 110 111 static void 112 destroy_rtentry(struct rtentry *rt) 113 { 114 115 /* 116 * At this moment rnh, nh_control may be already freed. 117 * nhop interface may have been migrated to a different vnet. 118 * Use vnet stored in the nexthop to delete the entry. 119 */ 120 CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); 121 122 /* Unreference nexthop */ 123 nhop_free(rt->rt_nhop); 124 125 uma_zfree(V_rtzone, rt); 126 127 CURVNET_RESTORE(); 128 } 129 130 /* 131 * Epoch callback indicating rtentry is safe to destroy 132 */ 133 static void 134 destroy_rtentry_epoch(epoch_context_t ctx) 135 { 136 struct rtentry *rt; 137 138 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 139 140 destroy_rtentry(rt); 141 } 142 143 /* 144 * Schedule rtentry deletion 145 */ 146 static void 147 rtfree(struct rtentry *rt) 148 { 149 150 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 151 152 RT_LOCK_ASSERT(rt); 153 154 RT_UNLOCK(rt); 155 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 156 &rt->rt_epoch_ctx); 157 } 158 159 160 161 static struct rib_head * 162 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 163 { 164 struct rib_head *rnh; 165 struct sockaddr *dst; 166 167 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 168 169 dst = info->rti_info[RTAX_DST]; 170 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 171 172 return (rnh); 173 } 174 175 /* 176 * Adds route defined by @info into the kernel table specified by @fibnum and 177 * sa_family in @info->rti_info[RTAX_DST]. 178 * 179 * Returns 0 on success and fills in operation metadata into @rc. 180 */ 181 int 182 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 183 struct rib_cmd_info *rc) 184 { 185 struct rib_head *rnh; 186 187 NET_EPOCH_ASSERT(); 188 189 rnh = get_rnh(fibnum, info); 190 if (rnh == NULL) 191 return (EAFNOSUPPORT); 192 193 /* 194 * Check consistency between RTF_HOST flag and netmask 195 * existence. 196 */ 197 if (info->rti_flags & RTF_HOST) 198 info->rti_info[RTAX_NETMASK] = NULL; 199 else if (info->rti_info[RTAX_NETMASK] == NULL) 200 return (EINVAL); 201 202 bzero(rc, sizeof(struct rib_cmd_info)); 203 rc->rc_cmd = RTM_ADD; 204 205 return (add_route(rnh, info, rc)); 206 } 207 208 static int 209 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 210 struct rib_cmd_info *rc) 211 { 212 struct sockaddr *dst, *ndst, *gateway, *netmask; 213 struct rtentry *rt, *rt_old; 214 struct nhop_object *nh; 215 struct radix_node *rn; 216 struct ifaddr *ifa; 217 int error, flags; 218 219 dst = info->rti_info[RTAX_DST]; 220 gateway = info->rti_info[RTAX_GATEWAY]; 221 netmask = info->rti_info[RTAX_NETMASK]; 222 flags = info->rti_flags; 223 224 if ((flags & RTF_GATEWAY) && !gateway) 225 return (EINVAL); 226 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 227 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 228 return (EINVAL); 229 230 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 231 return (EINVAL); 232 233 if (info->rti_ifa == NULL) { 234 error = rt_getifa_fib(info, rnh->rib_fibnum); 235 if (error) 236 return (error); 237 } else { 238 ifa_ref(info->rti_ifa); 239 } 240 241 error = nhop_create_from_info(rnh, info, &nh); 242 if (error != 0) { 243 ifa_free(info->rti_ifa); 244 return (error); 245 } 246 247 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 248 if (rt == NULL) { 249 ifa_free(info->rti_ifa); 250 nhop_free(nh); 251 return (ENOBUFS); 252 } 253 RT_LOCK_INIT(rt); 254 rt->rt_flags = RTF_UP | flags; 255 rt->rt_nhop = nh; 256 257 /* Fill in dst */ 258 memcpy(&rt->rt_dst, dst, dst->sa_len); 259 rt_key(rt) = &rt->rt_dst; 260 261 /* 262 * point to the (possibly newly malloc'd) dest address. 263 */ 264 ndst = (struct sockaddr *)rt_key(rt); 265 266 /* 267 * make sure it contains the value we want (masked if needed). 268 */ 269 if (netmask) { 270 rt_maskedcopy(dst, ndst, netmask); 271 } else 272 bcopy(dst, ndst, dst->sa_len); 273 274 /* 275 * We use the ifa reference returned by rt_getifa_fib(). 276 * This moved from below so that rnh->rnh_addaddr() can 277 * examine the ifa and ifa->ifa_ifp if it so desires. 278 */ 279 ifa = info->rti_ifa; 280 rt->rt_weight = 1; 281 282 rt_setmetrics(info, rt); 283 rt_old = NULL; 284 285 RIB_WLOCK(rnh); 286 RT_LOCK(rt); 287 #ifdef RADIX_MPATH 288 /* do not permit exactly the same dst/mask/gw pair */ 289 if (rt_mpath_capable(rnh) && 290 rt_mpath_conflict(rnh, rt, netmask)) { 291 RIB_WUNLOCK(rnh); 292 293 nhop_free(nh); 294 RT_LOCK_DESTROY(rt); 295 uma_zfree(V_rtzone, rt); 296 return (EEXIST); 297 } 298 #endif 299 300 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 301 302 if (rn != NULL) { 303 /* Most common usecase */ 304 if (rt->rt_expire > 0) 305 tmproutes_update(rnh, rt); 306 307 /* Finalize notification */ 308 rnh->rnh_gen++; 309 310 rc->rc_rt = RNTORT(rn); 311 rc->rc_nh_new = nh; 312 313 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 314 } else if ((info->rti_flags & RTF_PINNED) != 0) { 315 316 /* 317 * Force removal and re-try addition 318 * TODO: better multipath&pinned support 319 */ 320 struct sockaddr *info_dst = info->rti_info[RTAX_DST]; 321 info->rti_info[RTAX_DST] = ndst; 322 /* Do not delete existing PINNED(interface) routes */ 323 info->rti_flags &= ~RTF_PINNED; 324 rt_old = rt_unlinkrte(rnh, info, &error); 325 info->rti_flags |= RTF_PINNED; 326 info->rti_info[RTAX_DST] = info_dst; 327 if (rt_old != NULL) { 328 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, 329 rt->rt_nodes); 330 331 /* Finalize notification */ 332 rnh->rnh_gen++; 333 334 if (rn != NULL) { 335 rc->rc_cmd = RTM_CHANGE; 336 rc->rc_rt = RNTORT(rn); 337 rc->rc_nh_old = rt_old->rt_nhop; 338 rc->rc_nh_new = nh; 339 } else { 340 rc->rc_cmd = RTM_DELETE; 341 rc->rc_rt = RNTORT(rn); 342 rc->rc_nh_old = rt_old->rt_nhop; 343 rc->rc_nh_new = nh; 344 } 345 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 346 } 347 } 348 RIB_WUNLOCK(rnh); 349 350 if ((rn != NULL) || (rt_old != NULL)) 351 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 352 353 if (rt_old != NULL) 354 rtfree(rt_old); 355 356 /* 357 * If it still failed to go into the tree, 358 * then un-make it (this should be a function) 359 */ 360 if (rn == NULL) { 361 nhop_free(nh); 362 RT_LOCK_DESTROY(rt); 363 uma_zfree(V_rtzone, rt); 364 return (EEXIST); 365 } 366 367 RT_UNLOCK(rt); 368 369 return (0); 370 } 371 372 373 /* 374 * Removes route defined by @info from the kernel table specified by @fibnum and 375 * sa_family in @info->rti_info[RTAX_DST]. 376 * 377 * Returns 0 on success and fills in operation metadata into @rc. 378 */ 379 int 380 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 381 { 382 struct rib_head *rnh; 383 384 NET_EPOCH_ASSERT(); 385 386 rnh = get_rnh(fibnum, info); 387 if (rnh == NULL) 388 return (EAFNOSUPPORT); 389 390 bzero(rc, sizeof(struct rib_cmd_info)); 391 rc->rc_cmd = RTM_DELETE; 392 393 return (del_route(rnh, info, rc)); 394 } 395 396 /* 397 * Conditionally unlinks rtentry matching data inside @info from @rnh. 398 * Returns unlinked, locked and referenced @rtentry on success, 399 * Returns NULL and sets @perror to: 400 * ESRCH - if prefix was not found, 401 * EADDRINUSE - if trying to delete PINNED route without appropriate flag. 402 * ENOENT - if supplied filter function returned 0 (not matched). 403 */ 404 struct rtentry * 405 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) 406 { 407 struct sockaddr *dst, *netmask; 408 struct rtentry *rt; 409 struct radix_node *rn; 410 411 dst = info->rti_info[RTAX_DST]; 412 netmask = info->rti_info[RTAX_NETMASK]; 413 414 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 415 if (rt == NULL) { 416 *perror = ESRCH; 417 return (NULL); 418 } 419 420 if ((info->rti_flags & RTF_PINNED) == 0) { 421 /* Check if target route can be deleted */ 422 if (rt->rt_flags & RTF_PINNED) { 423 *perror = EADDRINUSE; 424 return (NULL); 425 } 426 } 427 428 if (info->rti_filter != NULL) { 429 if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata)==0){ 430 /* Not matched */ 431 *perror = ENOENT; 432 return (NULL); 433 } 434 435 /* 436 * Filter function requested rte deletion. 437 * Ease the caller work by filling in remaining info 438 * from that particular entry. 439 */ 440 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 441 } 442 443 /* 444 * Remove the item from the tree and return it. 445 * Complain if it is not there and do no more processing. 446 */ 447 *perror = ESRCH; 448 #ifdef RADIX_MPATH 449 if (rt_mpath_capable(rnh)) 450 rn = rt_mpath_unlink(rnh, info, rt, perror); 451 else 452 #endif 453 rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); 454 if (rn == NULL) 455 return (NULL); 456 457 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 458 panic ("rtrequest delete"); 459 460 rt = RNTORT(rn); 461 RT_LOCK(rt); 462 rt->rt_flags &= ~RTF_UP; 463 464 *perror = 0; 465 466 return (rt); 467 } 468 469 static int 470 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 471 struct rib_cmd_info *rc) 472 { 473 struct sockaddr *dst, *netmask; 474 struct sockaddr_storage mdst; 475 struct rtentry *rt; 476 int error; 477 478 dst = info->rti_info[RTAX_DST]; 479 netmask = info->rti_info[RTAX_NETMASK]; 480 481 if (netmask) { 482 if (dst->sa_len > sizeof(mdst)) 483 return (EINVAL); 484 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 485 dst = (struct sockaddr *)&mdst; 486 } 487 488 RIB_WLOCK(rnh); 489 rt = rt_unlinkrte(rnh, info, &error); 490 if (rt != NULL) { 491 /* Finalize notification */ 492 rnh->rnh_gen++; 493 rc->rc_rt = rt; 494 rc->rc_nh_old = rt->rt_nhop; 495 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 496 } 497 RIB_WUNLOCK(rnh); 498 if (error != 0) 499 return (error); 500 501 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 502 503 /* 504 * If the caller wants it, then it can have it, 505 * the entry will be deleted after the end of the current epoch. 506 */ 507 rtfree(rt); 508 509 return (0); 510 } 511 512 int 513 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 514 struct rib_cmd_info *rc) 515 { 516 struct rib_head *rnh; 517 518 NET_EPOCH_ASSERT(); 519 520 rnh = get_rnh(fibnum, info); 521 if (rnh == NULL) 522 return (EAFNOSUPPORT); 523 524 bzero(rc, sizeof(struct rib_cmd_info)); 525 rc->rc_cmd = RTM_CHANGE; 526 527 return (change_route(rnh, info, rc)); 528 } 529 530 static int 531 change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, 532 struct rib_cmd_info *rc) 533 { 534 RIB_RLOCK_TRACKER; 535 struct rtentry *rt = NULL; 536 int error = 0; 537 int free_ifa = 0; 538 struct nhop_object *nh, *nh_orig; 539 540 RIB_RLOCK(rnh); 541 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 542 info->rti_info[RTAX_NETMASK], &rnh->head); 543 544 if (rt == NULL) { 545 RIB_RUNLOCK(rnh); 546 return (ESRCH); 547 } 548 549 #ifdef RADIX_MPATH 550 /* 551 * If we got multipath routes, 552 * we require users to specify a matching RTAX_GATEWAY. 553 */ 554 if (rt_mpath_capable(rnh)) { 555 rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); 556 if (rt == NULL) { 557 RIB_RUNLOCK(rnh); 558 return (ESRCH); 559 } 560 } 561 #endif 562 nh_orig = rt->rt_nhop; 563 564 RIB_RUNLOCK(rnh); 565 566 rt = NULL; 567 nh = NULL; 568 569 /* 570 * New gateway could require new ifaddr, ifp; 571 * flags may also be different; ifp may be specified 572 * by ll sockaddr when protocol address is ambiguous 573 */ 574 if (((nh_orig->nh_flags & NHF_GATEWAY) && 575 info->rti_info[RTAX_GATEWAY] != NULL) || 576 info->rti_info[RTAX_IFP] != NULL || 577 (info->rti_info[RTAX_IFA] != NULL && 578 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 579 error = rt_getifa_fib(info, rnh->rib_fibnum); 580 if (info->rti_ifa != NULL) 581 free_ifa = 1; 582 583 if (error != 0) { 584 if (free_ifa) { 585 ifa_free(info->rti_ifa); 586 info->rti_ifa = NULL; 587 } 588 589 return (error); 590 } 591 } 592 593 error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); 594 if (free_ifa) { 595 ifa_free(info->rti_ifa); 596 info->rti_ifa = NULL; 597 } 598 if (error != 0) 599 return (error); 600 601 RIB_WLOCK(rnh); 602 603 /* Lookup rtentry once again and check if nexthop is still the same */ 604 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 605 info->rti_info[RTAX_NETMASK], &rnh->head); 606 607 if (rt == NULL) { 608 RIB_WUNLOCK(rnh); 609 nhop_free(nh); 610 return (ESRCH); 611 } 612 613 if (rt->rt_nhop != nh_orig) { 614 RIB_WUNLOCK(rnh); 615 nhop_free(nh); 616 return (EAGAIN); 617 } 618 619 /* Proceed with the update */ 620 RT_LOCK(rt); 621 622 /* Provide notification to the protocols.*/ 623 rt->rt_nhop = nh; 624 rt_setmetrics(info, rt); 625 626 /* Finalize notification */ 627 rc->rc_rt = rt; 628 rc->rc_nh_old = nh_orig; 629 rc->rc_nh_new = rt->rt_nhop; 630 631 RT_UNLOCK(rt); 632 633 /* Update generation id to reflect rtable change */ 634 rnh->rnh_gen++; 635 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 636 637 RIB_WUNLOCK(rnh); 638 639 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 640 641 nhop_free(nh_orig); 642 643 return (0); 644 } 645 646 static int 647 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 648 struct rib_cmd_info *rc) 649 { 650 int error; 651 652 /* Check if updated gateway exists */ 653 if ((info->rti_flags & RTF_GATEWAY) && 654 (info->rti_info[RTAX_GATEWAY] == NULL)) 655 return (EINVAL); 656 657 /* 658 * route change is done in multiple steps, with dropping and 659 * reacquiring lock. In the situations with multiple processes 660 * changes the same route in can lead to the case when route 661 * is changed between the steps. Address it by retrying the operation 662 * multiple times before failing. 663 */ 664 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 665 error = change_route_one(rnh, info, rc); 666 if (error != EAGAIN) 667 break; 668 } 669 670 return (error); 671 } 672 673 /* 674 * Performs modification of routing table specificed by @action. 675 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 676 * Needs to be run in network epoch. 677 * 678 * Returns 0 on success and fills in @rc with action result. 679 */ 680 int 681 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 682 struct rib_cmd_info *rc) 683 { 684 int error; 685 686 switch (action) { 687 case RTM_ADD: 688 error = rib_add_route(fibnum, info, rc); 689 break; 690 case RTM_DELETE: 691 error = rib_del_route(fibnum, info, rc); 692 break; 693 case RTM_CHANGE: 694 error = rib_change_route(fibnum, info, rc); 695 break; 696 default: 697 error = ENOTSUP; 698 } 699 700 return (error); 701 } 702 703 704 struct rt_delinfo 705 { 706 struct rt_addrinfo info; 707 struct rib_head *rnh; 708 struct rtentry *head; 709 struct rib_cmd_info rc; 710 }; 711 712 /* 713 * Conditionally unlinks @rn from radix tree based 714 * on info data passed in @arg. 715 */ 716 static int 717 rt_checkdelroute(struct radix_node *rn, void *arg) 718 { 719 struct rt_delinfo *di; 720 struct rt_addrinfo *info; 721 struct rtentry *rt; 722 int error; 723 724 di = (struct rt_delinfo *)arg; 725 rt = (struct rtentry *)rn; 726 info = &di->info; 727 error = 0; 728 729 info->rti_info[RTAX_DST] = rt_key(rt); 730 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 731 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 732 733 rt = rt_unlinkrte(di->rnh, info, &error); 734 if (rt == NULL) { 735 /* Either not allowed or not matched. Skip entry */ 736 return (0); 737 } 738 739 /* Entry was unlinked. Notify subscribers */ 740 di->rnh->rnh_gen++; 741 di->rc.rc_rt = rt; 742 di->rc.rc_nh_old = rt->rt_nhop; 743 rib_notify(di->rnh, RIB_NOTIFY_IMMEDIATE, &di->rc); 744 745 /* Add to the list and return */ 746 rt->rt_chain = di->head; 747 di->head = rt; 748 749 return (0); 750 } 751 752 /* 753 * Iterates over a routing table specified by @fibnum and @family and 754 * deletes elements marked by @filter_f. 755 * @fibnum: rtable id 756 * @family: AF_ address family 757 * @filter_f: function returning non-zero value for items to delete 758 * @arg: data to pass to the @filter_f function 759 * @report: true if rtsock notification is needed. 760 */ 761 void 762 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) 763 { 764 struct rib_head *rnh; 765 struct rt_delinfo di; 766 struct rtentry *rt; 767 struct epoch_tracker et; 768 769 rnh = rt_tables_get_rnh(fibnum, family); 770 if (rnh == NULL) 771 return; 772 773 bzero(&di, sizeof(di)); 774 di.info.rti_filter = filter_f; 775 di.info.rti_filterdata = arg; 776 di.rnh = rnh; 777 di.rc.rc_cmd = RTM_DELETE; 778 779 NET_EPOCH_ENTER(et); 780 781 RIB_WLOCK(rnh); 782 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 783 RIB_WUNLOCK(rnh); 784 785 /* We might have something to reclaim. */ 786 while (di.head != NULL) { 787 rt = di.head; 788 di.head = rt->rt_chain; 789 rt->rt_chain = NULL; 790 791 di.rc.rc_rt = rt; 792 di.rc.rc_nh_old = rt->rt_nhop; 793 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 794 795 /* TODO std rt -> rt_addrinfo export */ 796 di.info.rti_info[RTAX_DST] = rt_key(rt); 797 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 798 799 if (report) 800 rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, 801 fibnum); 802 rtfree(rt); 803 } 804 805 NET_EPOCH_EXIT(et); 806 } 807 808 static void 809 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 810 struct rib_cmd_info *rc) 811 { 812 struct rib_subscription *rs; 813 814 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 815 if (rs->type == type) 816 rs->func(rnh, rc, rs->arg); 817 } 818 } 819 820 static struct rib_subscription * 821 allocate_subscription(rib_subscription_cb_t *f, void *arg, 822 enum rib_subscription_type type, bool waitok) 823 { 824 struct rib_subscription *rs; 825 int flags = M_ZERO | (waitok ? M_WAITOK : 0); 826 827 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 828 if (rs == NULL) 829 return (NULL); 830 831 rs->func = f; 832 rs->arg = arg; 833 rs->type = type; 834 835 return (rs); 836 } 837 838 839 /* 840 * Subscribe for the changes in the routing table specified by @fibnum and 841 * @family. 842 * 843 * Returns pointer to the subscription structure on success. 844 */ 845 struct rib_subscription * 846 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 847 enum rib_subscription_type type, bool waitok) 848 { 849 struct rib_head *rnh; 850 struct rib_subscription *rs; 851 struct epoch_tracker et; 852 853 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 854 return (NULL); 855 856 NET_EPOCH_ENTER(et); 857 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 858 rnh = rt_tables_get_rnh(fibnum, family); 859 860 RIB_WLOCK(rnh); 861 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 862 RIB_WUNLOCK(rnh); 863 NET_EPOCH_EXIT(et); 864 865 return (rs); 866 } 867 868 struct rib_subscription * 869 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 870 enum rib_subscription_type type, bool waitok) 871 { 872 struct rib_subscription *rs; 873 struct epoch_tracker et; 874 875 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 876 return (NULL); 877 878 NET_EPOCH_ENTER(et); 879 RIB_WLOCK(rnh); 880 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 881 RIB_WUNLOCK(rnh); 882 NET_EPOCH_EXIT(et); 883 884 return (rs); 885 } 886 887 /* 888 * Remove rtable subscription @rs from the table specified by @fibnum 889 * and @family. 890 * Needs to be run in network epoch. 891 * 892 * Returns 0 on success. 893 */ 894 int 895 rib_unsibscribe(uint32_t fibnum, int family, struct rib_subscription *rs) 896 { 897 struct rib_head *rnh; 898 899 NET_EPOCH_ASSERT(); 900 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 901 rnh = rt_tables_get_rnh(fibnum, family); 902 903 if (rnh == NULL) 904 return (ENOENT); 905 906 RIB_WLOCK(rnh); 907 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 908 RIB_WUNLOCK(rnh); 909 910 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 911 &rs->epoch_ctx); 912 913 return (0); 914 } 915 916 /* 917 * Epoch callback indicating subscription is safe to destroy 918 */ 919 static void 920 destroy_subscription_epoch(epoch_context_t ctx) 921 { 922 struct rib_subscription *rs; 923 924 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 925 926 free(rs, M_RTABLE); 927 } 928 929 void 930 rib_init_subscriptions(struct rib_head *rnh) 931 { 932 933 CK_STAILQ_INIT(&rnh->rnh_subscribers); 934 } 935 936 void 937 rib_destroy_subscriptions(struct rib_head *rnh) 938 { 939 struct rib_subscription *rs; 940 struct epoch_tracker et; 941 942 NET_EPOCH_ENTER(et); 943 RIB_WLOCK(rnh); 944 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 945 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 946 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 947 &rs->epoch_ctx); 948 } 949 RIB_WUNLOCK(rnh); 950 NET_EPOCH_EXIT(et); 951 } 952 953