1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_route.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 #include <sys/syslog.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/rmlock.h> 43 44 #include <net/if.h> 45 #include <net/if_var.h> 46 #include <net/if_private.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 #include <netinet6/in6_var.h> 58 59 #define DEBUG_MOD_NAME route_ctl 60 #define DEBUG_MAX_LEVEL LOG_DEBUG 61 #include <net/route/route_debug.h> 62 _DECLARE_DEBUG(LOG_INFO); 63 64 /* 65 * This file contains control plane routing tables functions. 66 * 67 * All functions assumes they are called in net epoch. 68 */ 69 70 union sockaddr_union { 71 struct sockaddr sa; 72 struct sockaddr_in sin; 73 struct sockaddr_in6 sin6; 74 char _buf[32]; 75 }; 76 77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 81 struct rib_cmd_info *rc); 82 83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 85 #ifdef ROUTE_MPATH 86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 88 int op_flags, struct rib_cmd_info *rc); 89 #endif 90 91 static int add_route(struct rib_head *rnh, struct rtentry *rt, 92 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 93 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 94 struct rib_cmd_info *rc); 95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 97 98 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 99 struct sockaddr **pmask); 100 static int get_prio_from_info(const struct rt_addrinfo *info); 101 static int nhop_get_prio(const struct nhop_object *nh); 102 103 #ifdef ROUTE_MPATH 104 static bool rib_can_multipath(struct rib_head *rh); 105 #endif 106 107 /* Per-vnet multipath routing configuration */ 108 SYSCTL_DECL(_net_route); 109 #define V_rib_route_multipath VNET(rib_route_multipath) 110 #ifdef ROUTE_MPATH 111 #define _MP_FLAGS CTLFLAG_RW 112 VNET_DEFINE(u_int, rib_route_multipath) = 1; 113 #else 114 #define _MP_FLAGS CTLFLAG_RD 115 VNET_DEFINE(u_int, rib_route_multipath) = 0; 116 #endif 117 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 118 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 119 #undef _MP_FLAGS 120 121 #ifdef ROUTE_MPATH 122 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 123 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 124 &VNET_NAME(fib_hash_outbound), 0, 125 "Compute flowid for locally-originated packets"); 126 127 /* Default entropy to add to the hash calculation for the outbound connections*/ 128 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 129 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 130 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 131 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 132 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 133 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 134 }; 135 #endif 136 137 #if defined(INET) && defined(INET6) 138 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 139 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 140 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 141 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 142 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 143 #endif 144 145 /* Debug bits */ 146 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 147 148 static struct rib_head * 149 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 150 { 151 struct rib_head *rnh; 152 struct sockaddr *dst; 153 154 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 155 156 dst = info->rti_info[RTAX_DST]; 157 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 158 159 return (rnh); 160 } 161 162 #if defined(INET) && defined(INET6) 163 bool 164 rib_can_4o6_nhop(void) 165 { 166 return (!!V_rib_route_ipv6_nexthop); 167 } 168 #endif 169 170 #ifdef ROUTE_MPATH 171 static bool 172 rib_can_multipath(struct rib_head *rh) 173 { 174 int result; 175 176 CURVNET_SET(rh->rib_vnet); 177 result = !!V_rib_route_multipath; 178 CURVNET_RESTORE(); 179 180 return (result); 181 } 182 183 /* 184 * Check is nhop is multipath-eligible. 185 * Avoid nhops without gateways and redirects. 186 * 187 * Returns 1 for multipath-eligible nexthop, 188 * 0 otherwise. 189 */ 190 bool 191 nhop_can_multipath(const struct nhop_object *nh) 192 { 193 194 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 195 return (1); 196 if ((nh->nh_flags & NHF_GATEWAY) == 0) 197 return (0); 198 if ((nh->nh_flags & NHF_REDIRECT) != 0) 199 return (0); 200 201 return (1); 202 } 203 #endif 204 205 static int 206 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 207 { 208 uint32_t weight; 209 210 if (info->rti_mflags & RTV_WEIGHT) 211 weight = info->rti_rmx->rmx_weight; 212 else 213 weight = default_weight; 214 /* Keep upper 1 byte for adm distance purposes */ 215 if (weight > RT_MAX_WEIGHT) 216 weight = RT_MAX_WEIGHT; 217 else if (weight == 0) 218 weight = default_weight; 219 220 return (weight); 221 } 222 223 /* 224 * File-local concept for distingushing between the normal and 225 * RTF_PINNED routes tha can override the "normal" one. 226 */ 227 #define NH_PRIORITY_HIGH 2 228 #define NH_PRIORITY_NORMAL 1 229 static int 230 get_prio_from_info(const struct rt_addrinfo *info) 231 { 232 if (info->rti_flags & RTF_PINNED) 233 return (NH_PRIORITY_HIGH); 234 return (NH_PRIORITY_NORMAL); 235 } 236 237 static int 238 nhop_get_prio(const struct nhop_object *nh) 239 { 240 if (NH_IS_PINNED(nh)) 241 return (NH_PRIORITY_HIGH); 242 return (NH_PRIORITY_NORMAL); 243 } 244 245 /* 246 * Check if specified @gw matches gw data in the nexthop @nh. 247 * 248 * Returns true if matches, false otherwise. 249 */ 250 bool 251 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 252 { 253 254 if (nh->gw_sa.sa_family != gw->sa_family) 255 return (false); 256 257 switch (gw->sa_family) { 258 case AF_INET: 259 return (nh->gw4_sa.sin_addr.s_addr == 260 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 261 case AF_INET6: 262 { 263 const struct sockaddr_in6 *gw6; 264 gw6 = (const struct sockaddr_in6 *)gw; 265 266 /* 267 * Currently (2020-09) IPv6 gws in kernel have their 268 * scope embedded. Once this becomes false, this code 269 * has to be revisited. 270 */ 271 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 272 &gw6->sin6_addr)) 273 return (true); 274 return (false); 275 } 276 case AF_LINK: 277 { 278 const struct sockaddr_dl *sdl; 279 sdl = (const struct sockaddr_dl *)gw; 280 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 281 } 282 default: 283 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 284 } 285 286 /* NOTREACHED */ 287 return (false); 288 } 289 290 /* 291 * Matches all nexthop with given @gw. 292 * Can be used as rib_filter_f callback. 293 */ 294 int 295 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 296 { 297 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 298 299 return (match_nhop_gw(nh, gw)); 300 } 301 302 struct gw_filter_data { 303 const struct sockaddr *gw; 304 int count; 305 }; 306 307 /* 308 * Matches first occurence of the gateway provided in @gwd 309 */ 310 static int 311 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 312 { 313 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 314 315 /* Return only first match to make rtsock happy */ 316 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 317 return (1); 318 return (0); 319 } 320 321 /* 322 * Checks if data in @info matches nexhop @nh. 323 * 324 * Returns 0 on success, 325 * ESRCH if not matched, 326 * ENOENT if filter function returned false 327 */ 328 int 329 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 330 const struct nhop_object *nh) 331 { 332 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 333 334 if (info->rti_filter != NULL) { 335 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 336 return (ENOENT); 337 else 338 return (0); 339 } 340 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 341 return (ESRCH); 342 343 return (0); 344 } 345 346 /* 347 * Runs exact prefix match based on @dst and @netmask. 348 * Returns matched @rtentry if found or NULL. 349 * If rtentry was found, saves nexthop / weight value into @rnd. 350 */ 351 static struct rtentry * 352 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 353 const struct sockaddr *netmask, struct route_nhop_data *rnd) 354 { 355 struct rtentry *rt; 356 357 RIB_LOCK_ASSERT(rnh); 358 359 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 360 if (rt != NULL) { 361 rnd->rnd_nhop = rt->rt_nhop; 362 rnd->rnd_weight = rt->rt_weight; 363 } else { 364 rnd->rnd_nhop = NULL; 365 rnd->rnd_weight = 0; 366 } 367 368 return (rt); 369 } 370 371 struct rtentry * 372 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 373 struct route_nhop_data *rnd) 374 { 375 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 376 } 377 378 /* 379 * Runs exact prefix match based on dst/netmask from @info. 380 * Assumes RIB lock is held. 381 * Returns matched @rtentry if found or NULL. 382 * If rtentry was found, saves nexthop / weight value into @rnd. 383 */ 384 struct rtentry * 385 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 386 struct route_nhop_data *rnd) 387 { 388 struct rtentry *rt; 389 390 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 391 info->rti_info[RTAX_NETMASK], rnd); 392 393 return (rt); 394 } 395 396 const struct rtentry * 397 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen, 398 struct route_nhop_data *rnd) 399 { 400 union sockaddr_union mask_storage; 401 struct sockaddr *netmask = &mask_storage.sa; 402 403 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) 404 return (lookup_prefix_bysa(rnh, dst, netmask, rnd)); 405 return (NULL); 406 } 407 408 static bool 409 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 410 struct sockaddr **pmask) 411 { 412 if (plen == -1) { 413 *pmask = NULL; 414 return (true); 415 } 416 417 switch (family) { 418 #ifdef INET 419 case AF_INET: 420 { 421 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 422 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 423 424 memset(mask, 0, sizeof(*mask)); 425 mask->sin_family = family; 426 mask->sin_len = sizeof(*mask); 427 if (plen == 32) 428 *pmask = NULL; 429 else if (plen > 32 || plen < 0) 430 return (false); 431 else { 432 uint32_t daddr, maddr; 433 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 434 mask->sin_addr.s_addr = maddr; 435 daddr = dst->sin_addr.s_addr; 436 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 437 dst->sin_addr.s_addr = daddr; 438 } 439 return (true); 440 } 441 break; 442 #endif 443 #ifdef INET6 444 case AF_INET6: 445 { 446 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 447 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 448 449 memset(mask, 0, sizeof(*mask)); 450 mask->sin6_family = family; 451 mask->sin6_len = sizeof(*mask); 452 if (plen == 128) 453 *pmask = NULL; 454 else if (plen > 128 || plen < 0) 455 return (false); 456 else { 457 ip6_writemask(&mask->sin6_addr, plen); 458 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 459 } 460 return (true); 461 } 462 break; 463 #endif 464 } 465 return (false); 466 } 467 468 /* 469 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 470 * to the routing table. 471 * 472 * @fibnum: verified kernel rtable id to insert route to 473 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 474 * @plen: prefix length (or -1 if host route or not applicable for AF) 475 * @op_flags: combination of RTM_F_ flags 476 * @rc: storage to report operation result 477 * 478 * Returns 0 on success. 479 */ 480 int 481 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 482 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 483 { 484 union sockaddr_union mask_storage; 485 struct sockaddr *netmask = &mask_storage.sa; 486 struct rtentry *rt = NULL; 487 488 NET_EPOCH_ASSERT(); 489 490 bzero(rc, sizeof(struct rib_cmd_info)); 491 rc->rc_cmd = RTM_ADD; 492 493 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 494 if (rnh == NULL) 495 return (EAFNOSUPPORT); 496 497 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 498 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 499 return (EINVAL); 500 } 501 502 if (op_flags & RTM_F_CREATE) { 503 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 504 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 505 return (ENOMEM); 506 } 507 } else { 508 struct route_nhop_data rnd_tmp; 509 RIB_RLOCK_TRACKER; 510 511 RIB_RLOCK(rnh); 512 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp); 513 RIB_RUNLOCK(rnh); 514 515 if (rt == NULL) 516 return (ESRCH); 517 } 518 519 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 520 } 521 522 /* 523 * Attempts to delete @dst/plen prefix matching gateway @gw from the 524 * routing rable. 525 * 526 * @fibnum: rtable id to remove route from 527 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 528 * @plen: prefix length (or -1 if host route or not applicable for AF) 529 * @gw: gateway to match 530 * @op_flags: combination of RTM_F_ flags 531 * @rc: storage to report operation result 532 * 533 * Returns 0 on success. 534 */ 535 int 536 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 537 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 538 { 539 struct gw_filter_data gwd = { .gw = gw }; 540 541 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 542 } 543 544 /* 545 * Attempts to delete @dst/plen prefix matching @filter_func from the 546 * routing rable. 547 * 548 * @fibnum: rtable id to remove route from 549 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 550 * @plen: prefix length (or -1 if host route or not applicable for AF) 551 * @filter_func: func to be called for each nexthop of the prefix for matching 552 * @filter_arg: argument to pass to @filter_func 553 * @op_flags: combination of RTM_F_ flags 554 * @rc: storage to report operation result 555 * 556 * Returns 0 on success. 557 */ 558 int 559 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 560 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 561 struct rib_cmd_info *rc) 562 { 563 union sockaddr_union mask_storage; 564 struct sockaddr *netmask = &mask_storage.sa; 565 int error; 566 567 NET_EPOCH_ASSERT(); 568 569 bzero(rc, sizeof(struct rib_cmd_info)); 570 rc->rc_cmd = RTM_DELETE; 571 572 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 573 if (rnh == NULL) 574 return (EAFNOSUPPORT); 575 576 if (dst->sa_len > sizeof(mask_storage)) { 577 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 578 return (EINVAL); 579 } 580 581 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 582 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 583 return (EINVAL); 584 } 585 586 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 587 588 RIB_WLOCK(rnh); 589 struct route_nhop_data rnd; 590 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 591 if (rt != NULL) { 592 error = rt_delete_conditional(rnh, rt, prio, filter_func, 593 filter_arg, rc); 594 } else 595 error = ESRCH; 596 RIB_WUNLOCK(rnh); 597 598 if (error != 0) 599 return (error); 600 601 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 602 603 if (rc->rc_cmd == RTM_DELETE) 604 rt_free(rc->rc_rt); 605 #ifdef ROUTE_MPATH 606 else { 607 /* 608 * Deleting 1 path may result in RTM_CHANGE to 609 * a different mpath group/nhop. 610 * Free old mpath group. 611 */ 612 nhop_free_any(rc->rc_nh_old); 613 } 614 #endif 615 616 return (0); 617 } 618 619 /* 620 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 621 * @rt: route to copy. 622 * @rnd_src: nhop and weight. Multipath routes are not supported 623 * @rh_dst: target rtable. 624 * @rc: operation result storage 625 * 626 * Return 0 on success. 627 */ 628 int 629 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 630 struct rib_head *rh_dst, struct rib_cmd_info *rc) 631 { 632 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 633 int error; 634 635 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 636 637 IF_DEBUG_LEVEL(LOG_DEBUG2) { 638 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 639 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 640 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 641 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 642 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 643 } 644 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 645 if (nh == NULL) { 646 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 647 return (ENOMEM); 648 } 649 nhop_copy(nh, rnd_src->rnd_nhop); 650 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop)); 651 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 652 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 653 if (error != 0) { 654 FIB_RH_LOG(LOG_INFO, rh_dst, 655 "unable to finalize new nexthop: error %d", error); 656 return (ENOMEM); 657 } 658 659 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 660 if (rt_new == NULL) { 661 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 662 nhop_free(nh); 663 return (ENOMEM); 664 } 665 666 struct route_nhop_data rnd = { 667 .rnd_nhop = nh, 668 .rnd_weight = rnd_src->rnd_weight 669 }; 670 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 671 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 672 673 if (error != 0) { 674 IF_DEBUG_LEVEL(LOG_DEBUG2) { 675 char buf[NHOP_PRINT_BUFSIZE]; 676 rt_print_buf(rt, buf, sizeof(buf)); 677 FIB_RH_LOG(LOG_DEBUG, rh_dst, 678 "Unable to add route %s: error %d", buf, error); 679 } 680 nhop_free(nh); 681 } 682 return (error); 683 } 684 685 /* 686 * Adds route defined by @info into the kernel table specified by @fibnum and 687 * sa_family in @info->rti_info[RTAX_DST]. 688 * 689 * Returns 0 on success and fills in operation metadata into @rc. 690 */ 691 int 692 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 693 struct rib_cmd_info *rc) 694 { 695 struct rib_head *rnh; 696 int error; 697 698 NET_EPOCH_ASSERT(); 699 700 rnh = get_rnh(fibnum, info); 701 if (rnh == NULL) 702 return (EAFNOSUPPORT); 703 704 /* 705 * Check consistency between RTF_HOST flag and netmask 706 * existence. 707 */ 708 if (info->rti_flags & RTF_HOST) 709 info->rti_info[RTAX_NETMASK] = NULL; 710 else if (info->rti_info[RTAX_NETMASK] == NULL) { 711 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 712 return (EINVAL); 713 } 714 715 bzero(rc, sizeof(struct rib_cmd_info)); 716 rc->rc_cmd = RTM_ADD; 717 718 error = add_route_byinfo(rnh, info, rc); 719 if (error == 0) 720 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 721 722 return (error); 723 } 724 725 static int 726 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 727 struct rib_cmd_info *rc) 728 { 729 struct route_nhop_data rnd_add; 730 struct nhop_object *nh; 731 struct rtentry *rt; 732 struct sockaddr *dst, *gateway, *netmask; 733 int error; 734 735 dst = info->rti_info[RTAX_DST]; 736 gateway = info->rti_info[RTAX_GATEWAY]; 737 netmask = info->rti_info[RTAX_NETMASK]; 738 739 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 740 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 741 return (EINVAL); 742 } 743 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 744 FIB_RH_LOG(LOG_DEBUG, rnh, 745 "error: invalid dst/gateway family combination (%d, %d)", 746 dst->sa_family, gateway->sa_family); 747 return (EINVAL); 748 } 749 750 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 751 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 752 dst->sa_len); 753 return (EINVAL); 754 } 755 756 if (info->rti_ifa == NULL) { 757 error = rt_getifa_fib(info, rnh->rib_fibnum); 758 if (error) 759 return (error); 760 } 761 762 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 763 return (ENOBUFS); 764 765 error = nhop_create_from_info(rnh, info, &nh); 766 if (error != 0) { 767 rt_free_immediate(rt); 768 return (error); 769 } 770 771 rnd_add.rnd_nhop = nh; 772 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 773 774 int op_flags = RTM_F_CREATE; 775 776 /* 777 * Set the desired action when the route already exists: 778 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath. 779 * Otherwise, append the path. 780 */ 781 op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND; 782 783 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 784 } 785 786 static int 787 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 788 int op_flags, struct rib_cmd_info *rc) 789 { 790 struct route_nhop_data rnd_orig; 791 struct nhop_object *nh; 792 struct rtentry *rt_orig; 793 int error = 0; 794 795 MPASS(rt != NULL); 796 797 nh = rnd_add->rnd_nhop; 798 799 RIB_WLOCK(rnh); 800 801 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 802 803 if (rt_orig == NULL) { 804 if (op_flags & RTM_F_CREATE) 805 error = add_route(rnh, rt, rnd_add, rc); 806 else 807 error = ESRCH; /* no entry but creation was not required */ 808 RIB_WUNLOCK(rnh); 809 if (error != 0) 810 goto out; 811 return (0); 812 } 813 814 if (op_flags & RTM_F_EXCL) { 815 /* We have existing route in the RIB but not allowed to replace. */ 816 RIB_WUNLOCK(rnh); 817 error = EEXIST; 818 goto out; 819 } 820 821 /* Now either append or replace */ 822 if (op_flags & RTM_F_REPLACE) { 823 if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) { 824 /* Old path is "better" (e.g. has PINNED flag set) */ 825 RIB_WUNLOCK(rnh); 826 error = EEXIST; 827 goto out; 828 } 829 change_route(rnh, rt_orig, rnd_add, rc); 830 RIB_WUNLOCK(rnh); 831 nh = rc->rc_nh_old; 832 goto out; 833 } 834 835 RIB_WUNLOCK(rnh); 836 837 #ifdef ROUTE_MPATH 838 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 839 nhop_can_multipath(rnd_add->rnd_nhop) && 840 nhop_can_multipath(rnd_orig.rnd_nhop)) { 841 842 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 843 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 844 op_flags, rc); 845 if (error != EAGAIN) 846 break; 847 RTSTAT_INC(rts_add_retry); 848 } 849 850 /* 851 * Original nhop reference is unused in any case. 852 */ 853 nhop_free_any(rnd_add->rnd_nhop); 854 if (op_flags & RTM_F_CREATE) { 855 if (error != 0 || rc->rc_cmd != RTM_ADD) 856 rt_free_immediate(rt); 857 } 858 return (error); 859 } 860 #endif 861 /* Out of options - free state and return error */ 862 error = EEXIST; 863 out: 864 if (op_flags & RTM_F_CREATE) 865 rt_free_immediate(rt); 866 nhop_free_any(nh); 867 868 return (error); 869 } 870 871 #ifdef ROUTE_MPATH 872 static int 873 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 874 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 875 int op_flags, struct rib_cmd_info *rc) 876 { 877 RIB_RLOCK_TRACKER; 878 struct route_nhop_data rnd_new; 879 int error = 0; 880 881 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 882 if (error != 0) { 883 if (error == EAGAIN) { 884 /* 885 * Group creation failed, most probably because 886 * @rnd_orig data got scheduled for deletion. 887 * Refresh @rnd_orig data and retry. 888 */ 889 RIB_RLOCK(rnh); 890 lookup_prefix_rt(rnh, rt, rnd_orig); 891 RIB_RUNLOCK(rnh); 892 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 893 /* In this iteration route doesn't exist */ 894 error = ENOENT; 895 } 896 } 897 return (error); 898 } 899 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 900 if (error != 0) 901 return (error); 902 903 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 904 /* 905 * First multipath route got installed. Enable local 906 * outbound connections hashing. 907 */ 908 if (bootverbose) 909 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 910 V_fib_hash_outbound = 1; 911 } 912 913 return (0); 914 } 915 #endif 916 917 /* 918 * Removes route defined by @info from the kernel table specified by @fibnum and 919 * sa_family in @info->rti_info[RTAX_DST]. 920 * 921 * Returns 0 on success and fills in operation metadata into @rc. 922 */ 923 int 924 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 925 { 926 struct rib_head *rnh; 927 struct sockaddr *dst, *netmask; 928 struct sockaddr_storage mdst; 929 int error; 930 931 NET_EPOCH_ASSERT(); 932 933 rnh = get_rnh(fibnum, info); 934 if (rnh == NULL) 935 return (EAFNOSUPPORT); 936 937 bzero(rc, sizeof(struct rib_cmd_info)); 938 rc->rc_cmd = RTM_DELETE; 939 940 dst = info->rti_info[RTAX_DST]; 941 netmask = info->rti_info[RTAX_NETMASK]; 942 943 if (netmask != NULL) { 944 /* Ensure @dst is always properly masked */ 945 if (dst->sa_len > sizeof(mdst)) { 946 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 947 return (EINVAL); 948 } 949 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 950 dst = (struct sockaddr *)&mdst; 951 } 952 953 rib_filter_f_t *filter_func = NULL; 954 void *filter_arg = NULL; 955 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 956 957 if (info->rti_filter != NULL) { 958 filter_func = info->rti_filter; 959 filter_arg = info->rti_filterdata; 960 } else if (gwd.gw != NULL) { 961 filter_func = match_gw_one; 962 filter_arg = &gwd; 963 } 964 965 int prio = get_prio_from_info(info); 966 967 RIB_WLOCK(rnh); 968 struct route_nhop_data rnd; 969 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 970 if (rt != NULL) { 971 error = rt_delete_conditional(rnh, rt, prio, filter_func, 972 filter_arg, rc); 973 } else 974 error = ESRCH; 975 RIB_WUNLOCK(rnh); 976 977 if (error != 0) 978 return (error); 979 980 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 981 982 if (rc->rc_cmd == RTM_DELETE) 983 rt_free(rc->rc_rt); 984 #ifdef ROUTE_MPATH 985 else { 986 /* 987 * Deleting 1 path may result in RTM_CHANGE to 988 * a different mpath group/nhop. 989 * Free old mpath group. 990 */ 991 nhop_free_any(rc->rc_nh_old); 992 } 993 #endif 994 995 return (0); 996 } 997 998 /* 999 * Conditionally unlinks rtentry paths from @rnh matching @cb. 1000 * Returns 0 on success with operation result stored in @rc. 1001 * On error, returns: 1002 * ESRCH - if prefix was not found or filter function failed to match 1003 * EADDRINUSE - if trying to delete higher priority route. 1004 */ 1005 static int 1006 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 1007 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 1008 { 1009 struct nhop_object *nh = rt->rt_nhop; 1010 1011 #ifdef ROUTE_MPATH 1012 if (NH_IS_NHGRP(nh)) { 1013 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 1014 struct route_nhop_data rnd; 1015 int error; 1016 1017 if (cb == NULL) 1018 return (ESRCH); 1019 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 1020 if (error == 0) { 1021 if (rnd.rnd_nhgrp == nhg) { 1022 /* No match, unreference new group and return. */ 1023 nhop_free_any(rnd.rnd_nhop); 1024 return (ESRCH); 1025 } 1026 error = change_route(rnh, rt, &rnd, rc); 1027 } 1028 return (error); 1029 } 1030 #endif 1031 if (cb != NULL && !cb(rt, nh, cbdata)) 1032 return (ESRCH); 1033 1034 if (prio < nhop_get_prio(nh)) 1035 return (EADDRINUSE); 1036 1037 return (delete_route(rnh, rt, rc)); 1038 } 1039 1040 int 1041 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1042 struct rib_cmd_info *rc) 1043 { 1044 RIB_RLOCK_TRACKER; 1045 struct route_nhop_data rnd_orig; 1046 struct rib_head *rnh; 1047 struct rtentry *rt; 1048 int error; 1049 1050 NET_EPOCH_ASSERT(); 1051 1052 rnh = get_rnh(fibnum, info); 1053 if (rnh == NULL) 1054 return (EAFNOSUPPORT); 1055 1056 bzero(rc, sizeof(struct rib_cmd_info)); 1057 rc->rc_cmd = RTM_CHANGE; 1058 1059 /* Check if updated gateway exists */ 1060 if ((info->rti_flags & RTF_GATEWAY) && 1061 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1062 1063 /* 1064 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1065 * Remove RTF_GATEWAY to enforce consistency and maintain 1066 * compatibility.. 1067 */ 1068 info->rti_flags &= ~RTF_GATEWAY; 1069 } 1070 1071 /* 1072 * route change is done in multiple steps, with dropping and 1073 * reacquiring lock. In the situations with multiple processes 1074 * changes the same route in can lead to the case when route 1075 * is changed between the steps. Address it by retrying the operation 1076 * multiple times before failing. 1077 */ 1078 1079 RIB_RLOCK(rnh); 1080 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1081 info->rti_info[RTAX_NETMASK], &rnh->head); 1082 1083 if (rt == NULL) { 1084 RIB_RUNLOCK(rnh); 1085 return (ESRCH); 1086 } 1087 1088 rnd_orig.rnd_nhop = rt->rt_nhop; 1089 rnd_orig.rnd_weight = rt->rt_weight; 1090 1091 RIB_RUNLOCK(rnh); 1092 1093 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1094 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1095 if (error != EAGAIN) 1096 break; 1097 } 1098 1099 return (error); 1100 } 1101 1102 static int 1103 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1104 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1105 { 1106 int error; 1107 1108 /* 1109 * New gateway could require new ifaddr, ifp; 1110 * flags may also be different; ifp may be specified 1111 * by ll sockaddr when protocol address is ambiguous 1112 */ 1113 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1114 info->rti_info[RTAX_GATEWAY] != NULL) || 1115 info->rti_info[RTAX_IFP] != NULL || 1116 (info->rti_info[RTAX_IFA] != NULL && 1117 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1118 error = rt_getifa_fib(info, rnh->rib_fibnum); 1119 1120 if (error != 0) { 1121 info->rti_ifa = NULL; 1122 return (error); 1123 } 1124 } 1125 1126 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1127 info->rti_ifa = NULL; 1128 1129 return (error); 1130 } 1131 1132 #ifdef ROUTE_MPATH 1133 static int 1134 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1135 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1136 struct rib_cmd_info *rc) 1137 { 1138 int error = 0, found_idx = 0; 1139 struct nhop_object *nh_orig = NULL, *nh_new; 1140 struct route_nhop_data rnd_new = {}; 1141 const struct weightened_nhop *wn = NULL; 1142 struct weightened_nhop *wn_new; 1143 uint32_t num_nhops; 1144 1145 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1146 for (int i = 0; i < num_nhops; i++) { 1147 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1148 nh_orig = wn[i].nh; 1149 found_idx = i; 1150 break; 1151 } 1152 } 1153 1154 if (nh_orig == NULL) 1155 return (ESRCH); 1156 1157 error = change_nhop(rnh, info, nh_orig, &nh_new); 1158 if (error != 0) 1159 return (error); 1160 1161 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1162 M_TEMP, M_NOWAIT | M_ZERO); 1163 if (wn_new == NULL) { 1164 nhop_free(nh_new); 1165 return (EAGAIN); 1166 } 1167 1168 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1169 wn_new[found_idx].nh = nh_new; 1170 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1171 1172 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1173 nhop_free(nh_new); 1174 free(wn_new, M_TEMP); 1175 1176 if (error != 0) 1177 return (error); 1178 1179 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1180 1181 return (error); 1182 } 1183 #endif 1184 1185 static int 1186 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1187 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1188 struct rib_cmd_info *rc) 1189 { 1190 int error = 0; 1191 struct nhop_object *nh_orig; 1192 struct route_nhop_data rnd_new; 1193 1194 nh_orig = rnd_orig->rnd_nhop; 1195 if (nh_orig == NULL) 1196 return (ESRCH); 1197 1198 #ifdef ROUTE_MPATH 1199 if (NH_IS_NHGRP(nh_orig)) 1200 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1201 #endif 1202 1203 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1204 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1205 if (error != 0) 1206 return (error); 1207 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1208 1209 return (error); 1210 } 1211 1212 /* 1213 * Insert @rt with nhop data from @rnd_new to @rnh. 1214 * Returns 0 on success and stores operation results in @rc. 1215 */ 1216 static int 1217 add_route(struct rib_head *rnh, struct rtentry *rt, 1218 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1219 { 1220 struct radix_node *rn; 1221 1222 RIB_WLOCK_ASSERT(rnh); 1223 1224 rt->rt_nhop = rnd->rnd_nhop; 1225 rt->rt_weight = rnd->rnd_weight; 1226 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1227 1228 if (rn != NULL) { 1229 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1230 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1231 1232 /* Finalize notification */ 1233 rib_bump_gen(rnh); 1234 rnh->rnh_prefixes++; 1235 1236 rc->rc_cmd = RTM_ADD; 1237 rc->rc_rt = rt; 1238 rc->rc_nh_old = NULL; 1239 rc->rc_nh_new = rnd->rnd_nhop; 1240 rc->rc_nh_weight = rnd->rnd_weight; 1241 1242 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1243 return (0); 1244 } 1245 1246 /* Existing route or memory allocation failure. */ 1247 return (EEXIST); 1248 } 1249 1250 /* 1251 * Unconditionally deletes @rt from @rnh. 1252 */ 1253 static int 1254 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1255 { 1256 RIB_WLOCK_ASSERT(rnh); 1257 1258 /* Route deletion requested. */ 1259 struct radix_node *rn; 1260 1261 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1262 if (rn == NULL) 1263 return (ESRCH); 1264 rt = RNTORT(rn); 1265 rt->rte_flags &= ~RTF_UP; 1266 1267 rib_bump_gen(rnh); 1268 rnh->rnh_prefixes--; 1269 1270 rc->rc_cmd = RTM_DELETE; 1271 rc->rc_rt = rt; 1272 rc->rc_nh_old = rt->rt_nhop; 1273 rc->rc_nh_new = NULL; 1274 rc->rc_nh_weight = rt->rt_weight; 1275 1276 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1277 1278 return (0); 1279 } 1280 1281 /* 1282 * Switch @rt nhop/weigh to the ones specified in @rnd. 1283 * Returns 0 on success. 1284 */ 1285 int 1286 change_route(struct rib_head *rnh, struct rtentry *rt, 1287 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1288 { 1289 struct nhop_object *nh_orig; 1290 1291 RIB_WLOCK_ASSERT(rnh); 1292 1293 nh_orig = rt->rt_nhop; 1294 1295 if (rnd->rnd_nhop == NULL) 1296 return (delete_route(rnh, rt, rc)); 1297 1298 /* Changing nexthop & weight to a new one */ 1299 rt->rt_nhop = rnd->rnd_nhop; 1300 rt->rt_weight = rnd->rnd_weight; 1301 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1302 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1303 1304 /* Finalize notification */ 1305 rib_bump_gen(rnh); 1306 rc->rc_cmd = RTM_CHANGE; 1307 rc->rc_rt = rt; 1308 rc->rc_nh_old = nh_orig; 1309 rc->rc_nh_new = rnd->rnd_nhop; 1310 rc->rc_nh_weight = rnd->rnd_weight; 1311 1312 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1313 1314 return (0); 1315 } 1316 1317 /* 1318 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1319 * consistent with the current route data. 1320 * Nexthop in @nhd_new is consumed. 1321 */ 1322 int 1323 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1324 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1325 struct rib_cmd_info *rc) 1326 { 1327 struct rtentry *rt_new; 1328 int error = 0; 1329 1330 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1331 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1332 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1333 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1334 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1335 "trying change %s -> %s", buf_old, buf_new); 1336 } 1337 RIB_WLOCK(rnh); 1338 1339 struct route_nhop_data rnd; 1340 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1341 1342 if (rt_new == NULL) { 1343 if (rnd_orig->rnd_nhop == NULL) 1344 error = add_route(rnh, rt, rnd_new, rc); 1345 else { 1346 /* 1347 * Prefix does not exist, which was not our assumption. 1348 * Update @rnd_orig with the new data and return 1349 */ 1350 rnd_orig->rnd_nhop = NULL; 1351 rnd_orig->rnd_weight = 0; 1352 error = EAGAIN; 1353 } 1354 } else { 1355 /* Prefix exists, try to update */ 1356 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1357 /* 1358 * Nhop/mpath group hasn't changed. Flip 1359 * to the new precalculated one and return 1360 */ 1361 error = change_route(rnh, rt_new, rnd_new, rc); 1362 } else { 1363 /* Update and retry */ 1364 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1365 rnd_orig->rnd_weight = rt_new->rt_weight; 1366 error = EAGAIN; 1367 } 1368 } 1369 1370 RIB_WUNLOCK(rnh); 1371 1372 if (error == 0) { 1373 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1374 1375 if (rnd_orig->rnd_nhop != NULL) 1376 nhop_free_any(rnd_orig->rnd_nhop); 1377 1378 } else { 1379 if (rnd_new->rnd_nhop != NULL) 1380 nhop_free_any(rnd_new->rnd_nhop); 1381 } 1382 1383 return (error); 1384 } 1385 1386 /* 1387 * Performs modification of routing table specificed by @action. 1388 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1389 * Needs to be run in network epoch. 1390 * 1391 * Returns 0 on success and fills in @rc with action result. 1392 */ 1393 int 1394 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1395 struct rib_cmd_info *rc) 1396 { 1397 int error; 1398 1399 switch (action) { 1400 case RTM_ADD: 1401 error = rib_add_route(fibnum, info, rc); 1402 break; 1403 case RTM_DELETE: 1404 error = rib_del_route(fibnum, info, rc); 1405 break; 1406 case RTM_CHANGE: 1407 error = rib_change_route(fibnum, info, rc); 1408 break; 1409 default: 1410 error = ENOTSUP; 1411 } 1412 1413 return (error); 1414 } 1415 1416 struct rt_delinfo 1417 { 1418 struct rib_head *rnh; 1419 struct rtentry *head; 1420 rib_filter_f_t *filter_f; 1421 void *filter_arg; 1422 int prio; 1423 struct rib_cmd_info rc; 1424 }; 1425 1426 /* 1427 * Conditionally unlinks rtenties or paths from radix tree based 1428 * on the callback data passed in @arg. 1429 */ 1430 static int 1431 rt_checkdelroute(struct radix_node *rn, void *arg) 1432 { 1433 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1434 struct rtentry *rt = (struct rtentry *)rn; 1435 1436 if (rt_delete_conditional(di->rnh, rt, di->prio, 1437 di->filter_f, di->filter_arg, &di->rc) != 0) 1438 return (0); 1439 1440 /* 1441 * Add deleted rtentries to the list to GC them 1442 * after dropping the lock. 1443 * 1444 * XXX: Delayed notifications not implemented 1445 * for nexthop updates. 1446 */ 1447 if (di->rc.rc_cmd == RTM_DELETE) { 1448 /* Add to the list and return */ 1449 rt->rt_chain = di->head; 1450 di->head = rt; 1451 #ifdef ROUTE_MPATH 1452 } else { 1453 /* 1454 * RTM_CHANGE to a different nexthop or nexthop group. 1455 * Free old multipath group. 1456 */ 1457 nhop_free_any(di->rc.rc_nh_old); 1458 #endif 1459 } 1460 1461 return (0); 1462 } 1463 1464 /* 1465 * Iterates over a routing table specified by @fibnum and @family and 1466 * deletes elements marked by @filter_f. 1467 * @fibnum: rtable id 1468 * @family: AF_ address family 1469 * @filter_f: function returning non-zero value for items to delete 1470 * @arg: data to pass to the @filter_f function 1471 * @report: true if rtsock notification is needed. 1472 */ 1473 void 1474 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1475 bool report) 1476 { 1477 struct rib_head *rnh; 1478 struct rtentry *rt; 1479 struct nhop_object *nh; 1480 struct epoch_tracker et; 1481 1482 rnh = rt_tables_get_rnh(fibnum, family); 1483 if (rnh == NULL) 1484 return; 1485 1486 struct rt_delinfo di = { 1487 .rnh = rnh, 1488 .filter_f = filter_f, 1489 .filter_arg = filter_arg, 1490 .prio = NH_PRIORITY_NORMAL, 1491 }; 1492 1493 NET_EPOCH_ENTER(et); 1494 1495 RIB_WLOCK(rnh); 1496 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1497 RIB_WUNLOCK(rnh); 1498 1499 /* We might have something to reclaim. */ 1500 bzero(&di.rc, sizeof(di.rc)); 1501 di.rc.rc_cmd = RTM_DELETE; 1502 while (di.head != NULL) { 1503 rt = di.head; 1504 di.head = rt->rt_chain; 1505 rt->rt_chain = NULL; 1506 nh = rt->rt_nhop; 1507 1508 di.rc.rc_rt = rt; 1509 di.rc.rc_nh_old = nh; 1510 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1511 1512 if (report) { 1513 #ifdef ROUTE_MPATH 1514 struct nhgrp_object *nhg; 1515 const struct weightened_nhop *wn; 1516 uint32_t num_nhops; 1517 if (NH_IS_NHGRP(nh)) { 1518 nhg = (struct nhgrp_object *)nh; 1519 wn = nhgrp_get_nhops(nhg, &num_nhops); 1520 for (int i = 0; i < num_nhops; i++) 1521 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1522 } else 1523 #endif 1524 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1525 } 1526 rt_free(rt); 1527 } 1528 1529 NET_EPOCH_EXIT(et); 1530 } 1531 1532 static int 1533 rt_delete_unconditional(struct radix_node *rn, void *arg) 1534 { 1535 struct rtentry *rt = RNTORT(rn); 1536 struct rib_head *rnh = (struct rib_head *)arg; 1537 1538 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1539 if (RNTORT(rn) == rt) 1540 rt_free(rt); 1541 1542 return (0); 1543 } 1544 1545 /* 1546 * Removes all routes from the routing table without executing notifications. 1547 * rtentres will be removed after the end of a current epoch. 1548 */ 1549 static void 1550 rib_flush_routes(struct rib_head *rnh) 1551 { 1552 RIB_WLOCK(rnh); 1553 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1554 RIB_WUNLOCK(rnh); 1555 } 1556 1557 void 1558 rib_flush_routes_family(int family) 1559 { 1560 struct rib_head *rnh; 1561 1562 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1563 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1564 rib_flush_routes(rnh); 1565 } 1566 } 1567 1568 const char * 1569 rib_print_family(int family) 1570 { 1571 switch (family) { 1572 case AF_INET: 1573 return ("inet"); 1574 case AF_INET6: 1575 return ("inet6"); 1576 case AF_LINK: 1577 return ("link"); 1578 } 1579 return ("unknown"); 1580 } 1581 1582