1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_mpath.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <net/route/shared.h> 56 #include <netinet/in.h> 57 58 #ifdef RADIX_MPATH 59 #include <net/radix_mpath.h> 60 #endif 61 62 #include <vm/uma.h> 63 64 65 /* 66 * This file contains control plane routing tables functions. 67 * 68 * All functions assumes they are called in net epoch. 69 */ 70 71 struct rib_subscription { 72 CK_STAILQ_ENTRY(rib_subscription) next; 73 rib_subscription_cb_t *func; 74 void *arg; 75 enum rib_subscription_type type; 76 struct epoch_context epoch_ctx; 77 }; 78 79 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, 80 struct rib_cmd_info *rc); 81 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, 82 struct rib_cmd_info *rc); 83 static int change_route(struct rib_head *, struct rt_addrinfo *, 84 struct rib_cmd_info *rc); 85 static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 86 struct rib_cmd_info *rc); 87 88 static void destroy_subscription_epoch(epoch_context_t ctx); 89 90 /* Routing table UMA zone */ 91 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 92 #define V_rtzone VNET(rtzone) 93 94 void 95 vnet_rtzone_init() 96 { 97 98 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 99 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 100 } 101 102 #ifdef VIMAGE 103 void 104 vnet_rtzone_destroy() 105 { 106 107 uma_zdestroy(V_rtzone); 108 } 109 #endif 110 111 static void 112 destroy_rtentry(struct rtentry *rt) 113 { 114 115 /* 116 * At this moment rnh, nh_control may be already freed. 117 * nhop interface may have been migrated to a different vnet. 118 * Use vnet stored in the nexthop to delete the entry. 119 */ 120 CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); 121 122 /* Unreference nexthop */ 123 nhop_free(rt->rt_nhop); 124 125 uma_zfree(V_rtzone, rt); 126 127 CURVNET_RESTORE(); 128 } 129 130 /* 131 * Epoch callback indicating rtentry is safe to destroy 132 */ 133 static void 134 destroy_rtentry_epoch(epoch_context_t ctx) 135 { 136 struct rtentry *rt; 137 138 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 139 140 destroy_rtentry(rt); 141 } 142 143 /* 144 * Schedule rtentry deletion 145 */ 146 static void 147 rtfree(struct rtentry *rt) 148 { 149 150 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 151 152 epoch_call(net_epoch_preempt, destroy_rtentry_epoch, 153 &rt->rt_epoch_ctx); 154 } 155 156 157 158 static struct rib_head * 159 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 160 { 161 struct rib_head *rnh; 162 struct sockaddr *dst; 163 164 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 165 166 dst = info->rti_info[RTAX_DST]; 167 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 168 169 return (rnh); 170 } 171 172 /* 173 * Adds route defined by @info into the kernel table specified by @fibnum and 174 * sa_family in @info->rti_info[RTAX_DST]. 175 * 176 * Returns 0 on success and fills in operation metadata into @rc. 177 */ 178 int 179 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 180 struct rib_cmd_info *rc) 181 { 182 struct rib_head *rnh; 183 184 NET_EPOCH_ASSERT(); 185 186 rnh = get_rnh(fibnum, info); 187 if (rnh == NULL) 188 return (EAFNOSUPPORT); 189 190 /* 191 * Check consistency between RTF_HOST flag and netmask 192 * existence. 193 */ 194 if (info->rti_flags & RTF_HOST) 195 info->rti_info[RTAX_NETMASK] = NULL; 196 else if (info->rti_info[RTAX_NETMASK] == NULL) 197 return (EINVAL); 198 199 bzero(rc, sizeof(struct rib_cmd_info)); 200 rc->rc_cmd = RTM_ADD; 201 202 return (add_route(rnh, info, rc)); 203 } 204 205 static int 206 add_route(struct rib_head *rnh, struct rt_addrinfo *info, 207 struct rib_cmd_info *rc) 208 { 209 struct sockaddr *dst, *ndst, *gateway, *netmask; 210 struct rtentry *rt, *rt_old; 211 struct nhop_object *nh; 212 struct radix_node *rn; 213 struct ifaddr *ifa; 214 int error, flags; 215 216 dst = info->rti_info[RTAX_DST]; 217 gateway = info->rti_info[RTAX_GATEWAY]; 218 netmask = info->rti_info[RTAX_NETMASK]; 219 flags = info->rti_flags; 220 221 if ((flags & RTF_GATEWAY) && !gateway) 222 return (EINVAL); 223 if (dst && gateway && (dst->sa_family != gateway->sa_family) && 224 (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 225 return (EINVAL); 226 227 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) 228 return (EINVAL); 229 230 if (info->rti_ifa == NULL) { 231 error = rt_getifa_fib(info, rnh->rib_fibnum); 232 if (error) 233 return (error); 234 } else { 235 ifa_ref(info->rti_ifa); 236 } 237 238 error = nhop_create_from_info(rnh, info, &nh); 239 if (error != 0) { 240 ifa_free(info->rti_ifa); 241 return (error); 242 } 243 244 rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 245 if (rt == NULL) { 246 ifa_free(info->rti_ifa); 247 nhop_free(nh); 248 return (ENOBUFS); 249 } 250 rt->rte_flags = RTF_UP | flags; 251 rt->rt_nhop = nh; 252 253 /* Fill in dst */ 254 memcpy(&rt->rt_dst, dst, dst->sa_len); 255 rt_key(rt) = &rt->rt_dst; 256 257 /* 258 * point to the (possibly newly malloc'd) dest address. 259 */ 260 ndst = (struct sockaddr *)rt_key(rt); 261 262 /* 263 * make sure it contains the value we want (masked if needed). 264 */ 265 if (netmask) { 266 rt_maskedcopy(dst, ndst, netmask); 267 } else 268 bcopy(dst, ndst, dst->sa_len); 269 270 /* 271 * We use the ifa reference returned by rt_getifa_fib(). 272 * This moved from below so that rnh->rnh_addaddr() can 273 * examine the ifa and ifa->ifa_ifp if it so desires. 274 */ 275 ifa = info->rti_ifa; 276 rt->rt_weight = 1; 277 278 rt_setmetrics(info, rt); 279 rt_old = NULL; 280 281 RIB_WLOCK(rnh); 282 #ifdef RADIX_MPATH 283 /* do not permit exactly the same dst/mask/gw pair */ 284 if (rt_mpath_capable(rnh) && 285 rt_mpath_conflict(rnh, rt, netmask)) { 286 RIB_WUNLOCK(rnh); 287 288 nhop_free(nh); 289 uma_zfree(V_rtzone, rt); 290 return (EEXIST); 291 } 292 #endif 293 294 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); 295 296 if (rn != NULL) { 297 /* Most common usecase */ 298 if (rt->rt_expire > 0) 299 tmproutes_update(rnh, rt); 300 301 /* Finalize notification */ 302 rnh->rnh_gen++; 303 304 rc->rc_rt = rt; 305 rc->rc_nh_new = nh; 306 rc->rc_nh_weight = rt->rt_weight; 307 308 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 309 } else if ((info->rti_flags & RTF_PINNED) != 0) { 310 311 /* 312 * Force removal and re-try addition 313 * TODO: better multipath&pinned support 314 */ 315 struct sockaddr *info_dst = info->rti_info[RTAX_DST]; 316 info->rti_info[RTAX_DST] = ndst; 317 /* Do not delete existing PINNED(interface) routes */ 318 info->rti_flags &= ~RTF_PINNED; 319 rt_old = rt_unlinkrte(rnh, info, &error); 320 info->rti_flags |= RTF_PINNED; 321 info->rti_info[RTAX_DST] = info_dst; 322 if (rt_old != NULL) { 323 rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, 324 rt->rt_nodes); 325 326 /* Finalize notification */ 327 rnh->rnh_gen++; 328 329 if (rn != NULL) { 330 rc->rc_cmd = RTM_CHANGE; 331 rc->rc_rt = rt; 332 rc->rc_nh_old = rt_old->rt_nhop; 333 rc->rc_nh_new = nh; 334 rc->rc_nh_weight = rt->rt_weight; 335 } else { 336 rc->rc_cmd = RTM_DELETE; 337 rc->rc_rt = rt_old; 338 rc->rc_nh_old = rt_old->rt_nhop; 339 rc->rc_nh_weight = rt_old->rt_weight; 340 } 341 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 342 } 343 } 344 RIB_WUNLOCK(rnh); 345 346 if ((rn != NULL) || (rt_old != NULL)) 347 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 348 349 if (rt_old != NULL) 350 rtfree(rt_old); 351 352 /* 353 * If it still failed to go into the tree, 354 * then un-make it (this should be a function) 355 */ 356 if (rn == NULL) { 357 nhop_free(nh); 358 uma_zfree(V_rtzone, rt); 359 return (EEXIST); 360 } 361 362 return (0); 363 } 364 365 366 /* 367 * Removes route defined by @info from the kernel table specified by @fibnum and 368 * sa_family in @info->rti_info[RTAX_DST]. 369 * 370 * Returns 0 on success and fills in operation metadata into @rc. 371 */ 372 int 373 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 374 { 375 struct rib_head *rnh; 376 377 NET_EPOCH_ASSERT(); 378 379 rnh = get_rnh(fibnum, info); 380 if (rnh == NULL) 381 return (EAFNOSUPPORT); 382 383 bzero(rc, sizeof(struct rib_cmd_info)); 384 rc->rc_cmd = RTM_DELETE; 385 386 return (del_route(rnh, info, rc)); 387 } 388 389 /* 390 * Conditionally unlinks rtentry matching data inside @info from @rnh. 391 * Returns unlinked, locked and referenced @rtentry on success, 392 * Returns NULL and sets @perror to: 393 * ESRCH - if prefix was not found, 394 * EADDRINUSE - if trying to delete PINNED route without appropriate flag. 395 * ENOENT - if supplied filter function returned 0 (not matched). 396 */ 397 struct rtentry * 398 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) 399 { 400 struct sockaddr *dst, *netmask; 401 struct rtentry *rt; 402 struct nhop_object *nh; 403 struct radix_node *rn; 404 405 dst = info->rti_info[RTAX_DST]; 406 netmask = info->rti_info[RTAX_NETMASK]; 407 408 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 409 if (rt == NULL) { 410 *perror = ESRCH; 411 return (NULL); 412 } 413 414 nh = rt->rt_nhop; 415 416 if ((info->rti_flags & RTF_PINNED) == 0) { 417 /* Check if target route can be deleted */ 418 if (NH_IS_PINNED(nh)) { 419 *perror = EADDRINUSE; 420 return (NULL); 421 } 422 } 423 424 if (info->rti_filter != NULL) { 425 if (info->rti_filter(rt, nh, info->rti_filterdata)==0){ 426 /* Not matched */ 427 *perror = ENOENT; 428 return (NULL); 429 } 430 431 /* 432 * Filter function requested rte deletion. 433 * Ease the caller work by filling in remaining info 434 * from that particular entry. 435 */ 436 info->rti_info[RTAX_GATEWAY] = &nh->gw_sa; 437 } 438 439 /* 440 * Remove the item from the tree and return it. 441 * Complain if it is not there and do no more processing. 442 */ 443 *perror = ESRCH; 444 #ifdef RADIX_MPATH 445 if (rt_mpath_capable(rnh)) 446 rn = rt_mpath_unlink(rnh, info, rt, perror); 447 else 448 #endif 449 rn = rnh->rnh_deladdr(dst, netmask, &rnh->head); 450 if (rn == NULL) 451 return (NULL); 452 453 if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 454 panic ("rtrequest delete"); 455 456 rt = RNTORT(rn); 457 rt->rte_flags &= ~RTF_UP; 458 459 *perror = 0; 460 461 return (rt); 462 } 463 464 static int 465 del_route(struct rib_head *rnh, struct rt_addrinfo *info, 466 struct rib_cmd_info *rc) 467 { 468 struct sockaddr *dst, *netmask; 469 struct sockaddr_storage mdst; 470 struct rtentry *rt; 471 int error; 472 473 dst = info->rti_info[RTAX_DST]; 474 netmask = info->rti_info[RTAX_NETMASK]; 475 476 if (netmask) { 477 if (dst->sa_len > sizeof(mdst)) 478 return (EINVAL); 479 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 480 dst = (struct sockaddr *)&mdst; 481 } 482 483 RIB_WLOCK(rnh); 484 rt = rt_unlinkrte(rnh, info, &error); 485 if (rt != NULL) { 486 /* Finalize notification */ 487 rnh->rnh_gen++; 488 rc->rc_rt = rt; 489 rc->rc_nh_old = rt->rt_nhop; 490 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 491 } 492 RIB_WUNLOCK(rnh); 493 if (error != 0) 494 return (error); 495 496 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 497 498 /* 499 * If the caller wants it, then it can have it, 500 * the entry will be deleted after the end of the current epoch. 501 */ 502 rtfree(rt); 503 504 return (0); 505 } 506 507 int 508 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 509 struct rib_cmd_info *rc) 510 { 511 struct rib_head *rnh; 512 513 NET_EPOCH_ASSERT(); 514 515 rnh = get_rnh(fibnum, info); 516 if (rnh == NULL) 517 return (EAFNOSUPPORT); 518 519 bzero(rc, sizeof(struct rib_cmd_info)); 520 rc->rc_cmd = RTM_CHANGE; 521 522 return (change_route(rnh, info, rc)); 523 } 524 525 static int 526 change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, 527 struct rib_cmd_info *rc) 528 { 529 RIB_RLOCK_TRACKER; 530 struct rtentry *rt = NULL; 531 int error = 0; 532 int free_ifa = 0; 533 struct nhop_object *nh, *nh_orig; 534 535 RIB_RLOCK(rnh); 536 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 537 info->rti_info[RTAX_NETMASK], &rnh->head); 538 539 if (rt == NULL) { 540 RIB_RUNLOCK(rnh); 541 return (ESRCH); 542 } 543 544 #ifdef RADIX_MPATH 545 /* 546 * If we got multipath routes, 547 * we require users to specify a matching RTAX_GATEWAY. 548 */ 549 if (rt_mpath_capable(rnh)) { 550 rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]); 551 if (rt == NULL) { 552 RIB_RUNLOCK(rnh); 553 return (ESRCH); 554 } 555 } 556 #endif 557 nh_orig = rt->rt_nhop; 558 559 RIB_RUNLOCK(rnh); 560 561 rt = NULL; 562 nh = NULL; 563 564 /* 565 * New gateway could require new ifaddr, ifp; 566 * flags may also be different; ifp may be specified 567 * by ll sockaddr when protocol address is ambiguous 568 */ 569 if (((nh_orig->nh_flags & NHF_GATEWAY) && 570 info->rti_info[RTAX_GATEWAY] != NULL) || 571 info->rti_info[RTAX_IFP] != NULL || 572 (info->rti_info[RTAX_IFA] != NULL && 573 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 574 error = rt_getifa_fib(info, rnh->rib_fibnum); 575 if (info->rti_ifa != NULL) 576 free_ifa = 1; 577 578 if (error != 0) { 579 if (free_ifa) { 580 ifa_free(info->rti_ifa); 581 info->rti_ifa = NULL; 582 } 583 584 return (error); 585 } 586 } 587 588 error = nhop_create_from_nhop(rnh, nh_orig, info, &nh); 589 if (free_ifa) { 590 ifa_free(info->rti_ifa); 591 info->rti_ifa = NULL; 592 } 593 if (error != 0) 594 return (error); 595 596 RIB_WLOCK(rnh); 597 598 /* Lookup rtentry once again and check if nexthop is still the same */ 599 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 600 info->rti_info[RTAX_NETMASK], &rnh->head); 601 602 if (rt == NULL) { 603 RIB_WUNLOCK(rnh); 604 nhop_free(nh); 605 return (ESRCH); 606 } 607 608 if (rt->rt_nhop != nh_orig) { 609 RIB_WUNLOCK(rnh); 610 nhop_free(nh); 611 return (EAGAIN); 612 } 613 614 /* Proceed with the update */ 615 616 /* Provide notification to the protocols.*/ 617 rt->rt_nhop = nh; 618 rt_setmetrics(info, rt); 619 620 /* Finalize notification */ 621 rnh->rnh_gen++; 622 623 rc->rc_rt = rt; 624 rc->rc_nh_old = nh_orig; 625 rc->rc_nh_new = rt->rt_nhop; 626 rc->rc_nh_weight = rt->rt_weight; 627 628 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 629 630 RIB_WUNLOCK(rnh); 631 632 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 633 634 nhop_free(nh_orig); 635 636 return (0); 637 } 638 639 static int 640 change_route(struct rib_head *rnh, struct rt_addrinfo *info, 641 struct rib_cmd_info *rc) 642 { 643 int error; 644 645 /* Check if updated gateway exists */ 646 if ((info->rti_flags & RTF_GATEWAY) && 647 (info->rti_info[RTAX_GATEWAY] == NULL)) 648 return (EINVAL); 649 650 /* 651 * route change is done in multiple steps, with dropping and 652 * reacquiring lock. In the situations with multiple processes 653 * changes the same route in can lead to the case when route 654 * is changed between the steps. Address it by retrying the operation 655 * multiple times before failing. 656 */ 657 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 658 error = change_route_one(rnh, info, rc); 659 if (error != EAGAIN) 660 break; 661 } 662 663 return (error); 664 } 665 666 /* 667 * Performs modification of routing table specificed by @action. 668 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 669 * Needs to be run in network epoch. 670 * 671 * Returns 0 on success and fills in @rc with action result. 672 */ 673 int 674 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 675 struct rib_cmd_info *rc) 676 { 677 int error; 678 679 switch (action) { 680 case RTM_ADD: 681 error = rib_add_route(fibnum, info, rc); 682 break; 683 case RTM_DELETE: 684 error = rib_del_route(fibnum, info, rc); 685 break; 686 case RTM_CHANGE: 687 error = rib_change_route(fibnum, info, rc); 688 break; 689 default: 690 error = ENOTSUP; 691 } 692 693 return (error); 694 } 695 696 697 struct rt_delinfo 698 { 699 struct rt_addrinfo info; 700 struct rib_head *rnh; 701 struct rtentry *head; 702 struct rib_cmd_info rc; 703 }; 704 705 /* 706 * Conditionally unlinks @rn from radix tree based 707 * on info data passed in @arg. 708 */ 709 static int 710 rt_checkdelroute(struct radix_node *rn, void *arg) 711 { 712 struct rt_delinfo *di; 713 struct rt_addrinfo *info; 714 struct rtentry *rt; 715 int error; 716 717 di = (struct rt_delinfo *)arg; 718 rt = (struct rtentry *)rn; 719 info = &di->info; 720 error = 0; 721 722 info->rti_info[RTAX_DST] = rt_key(rt); 723 info->rti_info[RTAX_NETMASK] = rt_mask(rt); 724 info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa; 725 726 rt = rt_unlinkrte(di->rnh, info, &error); 727 if (rt == NULL) { 728 /* Either not allowed or not matched. Skip entry */ 729 return (0); 730 } 731 732 /* Entry was unlinked. Notify subscribers */ 733 di->rnh->rnh_gen++; 734 di->rc.rc_rt = rt; 735 di->rc.rc_nh_old = rt->rt_nhop; 736 rib_notify(di->rnh, RIB_NOTIFY_IMMEDIATE, &di->rc); 737 738 /* Add to the list and return */ 739 rt->rt_chain = di->head; 740 di->head = rt; 741 742 return (0); 743 } 744 745 /* 746 * Iterates over a routing table specified by @fibnum and @family and 747 * deletes elements marked by @filter_f. 748 * @fibnum: rtable id 749 * @family: AF_ address family 750 * @filter_f: function returning non-zero value for items to delete 751 * @arg: data to pass to the @filter_f function 752 * @report: true if rtsock notification is needed. 753 */ 754 void 755 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report) 756 { 757 struct rib_head *rnh; 758 struct rt_delinfo di; 759 struct rtentry *rt; 760 struct epoch_tracker et; 761 762 rnh = rt_tables_get_rnh(fibnum, family); 763 if (rnh == NULL) 764 return; 765 766 bzero(&di, sizeof(di)); 767 di.info.rti_filter = filter_f; 768 di.info.rti_filterdata = arg; 769 di.rnh = rnh; 770 di.rc.rc_cmd = RTM_DELETE; 771 772 NET_EPOCH_ENTER(et); 773 774 RIB_WLOCK(rnh); 775 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 776 RIB_WUNLOCK(rnh); 777 778 /* We might have something to reclaim. */ 779 while (di.head != NULL) { 780 rt = di.head; 781 di.head = rt->rt_chain; 782 rt->rt_chain = NULL; 783 784 di.rc.rc_rt = rt; 785 di.rc.rc_nh_old = rt->rt_nhop; 786 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 787 788 /* TODO std rt -> rt_addrinfo export */ 789 di.info.rti_info[RTAX_DST] = rt_key(rt); 790 di.info.rti_info[RTAX_NETMASK] = rt_mask(rt); 791 792 if (report) 793 rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0, 794 fibnum); 795 rtfree(rt); 796 } 797 798 NET_EPOCH_EXIT(et); 799 } 800 801 static void 802 rib_notify(struct rib_head *rnh, enum rib_subscription_type type, 803 struct rib_cmd_info *rc) 804 { 805 struct rib_subscription *rs; 806 807 CK_STAILQ_FOREACH(rs, &rnh->rnh_subscribers, next) { 808 if (rs->type == type) 809 rs->func(rnh, rc, rs->arg); 810 } 811 } 812 813 static struct rib_subscription * 814 allocate_subscription(rib_subscription_cb_t *f, void *arg, 815 enum rib_subscription_type type, bool waitok) 816 { 817 struct rib_subscription *rs; 818 int flags = M_ZERO | (waitok ? M_WAITOK : 0); 819 820 rs = malloc(sizeof(struct rib_subscription), M_RTABLE, flags); 821 if (rs == NULL) 822 return (NULL); 823 824 rs->func = f; 825 rs->arg = arg; 826 rs->type = type; 827 828 return (rs); 829 } 830 831 832 /* 833 * Subscribe for the changes in the routing table specified by @fibnum and 834 * @family. 835 * 836 * Returns pointer to the subscription structure on success. 837 */ 838 struct rib_subscription * 839 rib_subscribe(uint32_t fibnum, int family, rib_subscription_cb_t *f, void *arg, 840 enum rib_subscription_type type, bool waitok) 841 { 842 struct rib_head *rnh; 843 struct rib_subscription *rs; 844 struct epoch_tracker et; 845 846 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 847 return (NULL); 848 849 NET_EPOCH_ENTER(et); 850 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 851 rnh = rt_tables_get_rnh(fibnum, family); 852 853 RIB_WLOCK(rnh); 854 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 855 RIB_WUNLOCK(rnh); 856 NET_EPOCH_EXIT(et); 857 858 return (rs); 859 } 860 861 struct rib_subscription * 862 rib_subscribe_internal(struct rib_head *rnh, rib_subscription_cb_t *f, void *arg, 863 enum rib_subscription_type type, bool waitok) 864 { 865 struct rib_subscription *rs; 866 struct epoch_tracker et; 867 868 if ((rs = allocate_subscription(f, arg, type, waitok)) == NULL) 869 return (NULL); 870 871 NET_EPOCH_ENTER(et); 872 RIB_WLOCK(rnh); 873 CK_STAILQ_INSERT_TAIL(&rnh->rnh_subscribers, rs, next); 874 RIB_WUNLOCK(rnh); 875 NET_EPOCH_EXIT(et); 876 877 return (rs); 878 } 879 880 /* 881 * Remove rtable subscription @rs from the table specified by @fibnum 882 * and @family. 883 * Needs to be run in network epoch. 884 * 885 * Returns 0 on success. 886 */ 887 int 888 rib_unsibscribe(uint32_t fibnum, int family, struct rib_subscription *rs) 889 { 890 struct rib_head *rnh; 891 892 NET_EPOCH_ASSERT(); 893 KASSERT((fibnum < rt_numfibs), ("%s: bad fibnum", __func__)); 894 rnh = rt_tables_get_rnh(fibnum, family); 895 896 if (rnh == NULL) 897 return (ENOENT); 898 899 RIB_WLOCK(rnh); 900 CK_STAILQ_REMOVE(&rnh->rnh_subscribers, rs, rib_subscription, next); 901 RIB_WUNLOCK(rnh); 902 903 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 904 &rs->epoch_ctx); 905 906 return (0); 907 } 908 909 /* 910 * Epoch callback indicating subscription is safe to destroy 911 */ 912 static void 913 destroy_subscription_epoch(epoch_context_t ctx) 914 { 915 struct rib_subscription *rs; 916 917 rs = __containerof(ctx, struct rib_subscription, epoch_ctx); 918 919 free(rs, M_RTABLE); 920 } 921 922 void 923 rib_init_subscriptions(struct rib_head *rnh) 924 { 925 926 CK_STAILQ_INIT(&rnh->rnh_subscribers); 927 } 928 929 void 930 rib_destroy_subscriptions(struct rib_head *rnh) 931 { 932 struct rib_subscription *rs; 933 struct epoch_tracker et; 934 935 NET_EPOCH_ENTER(et); 936 RIB_WLOCK(rnh); 937 while ((rs = CK_STAILQ_FIRST(&rnh->rnh_subscribers)) != NULL) { 938 CK_STAILQ_REMOVE_HEAD(&rnh->rnh_subscribers, next); 939 epoch_call(net_epoch_preempt, destroy_subscription_epoch, 940 &rs->epoch_ctx); 941 } 942 RIB_WUNLOCK(rnh); 943 NET_EPOCH_EXIT(et); 944 } 945 946