1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_route.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 #include <sys/syslog.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/rmlock.h> 43 44 #include <net/if.h> 45 #include <net/if_var.h> 46 #include <net/if_private.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 #include <netinet6/in6_var.h> 58 59 #define DEBUG_MOD_NAME route_ctl 60 #define DEBUG_MAX_LEVEL LOG_DEBUG 61 #include <net/route/route_debug.h> 62 _DECLARE_DEBUG(LOG_INFO); 63 64 /* 65 * This file contains control plane routing tables functions. 66 * 67 * All functions assumes they are called in net epoch. 68 */ 69 70 union sockaddr_union { 71 struct sockaddr sa; 72 struct sockaddr_in sin; 73 struct sockaddr_in6 sin6; 74 char _buf[32]; 75 }; 76 77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 81 struct rib_cmd_info *rc); 82 83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 85 #ifdef ROUTE_MPATH 86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 88 int op_flags, struct rib_cmd_info *rc); 89 #endif 90 91 static int add_route(struct rib_head *rnh, struct rtentry *rt, 92 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 93 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 94 struct rib_cmd_info *rc); 95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 97 98 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 99 struct sockaddr **pmask); 100 static int get_prio_from_info(const struct rt_addrinfo *info); 101 static int nhop_get_prio(const struct nhop_object *nh); 102 103 #ifdef ROUTE_MPATH 104 static bool rib_can_multipath(struct rib_head *rh); 105 #endif 106 107 /* Per-vnet multipath routing configuration */ 108 SYSCTL_DECL(_net_route); 109 #define V_rib_route_multipath VNET(rib_route_multipath) 110 #ifdef ROUTE_MPATH 111 #define _MP_FLAGS CTLFLAG_RW 112 VNET_DEFINE(u_int, rib_route_multipath) = 1; 113 #else 114 #define _MP_FLAGS CTLFLAG_RD 115 VNET_DEFINE(u_int, rib_route_multipath) = 0; 116 #endif 117 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 118 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 119 #undef _MP_FLAGS 120 121 #ifdef ROUTE_MPATH 122 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 123 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 124 &VNET_NAME(fib_hash_outbound), 0, 125 "Compute flowid for locally-originated packets"); 126 127 /* Default entropy to add to the hash calculation for the outbound connections*/ 128 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 129 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 130 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 131 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 132 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 133 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 134 }; 135 #endif 136 137 #if defined(INET) && defined(INET6) 138 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 139 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 140 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 141 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 142 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 143 #endif 144 145 /* Debug bits */ 146 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 147 148 static struct rib_head * 149 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 150 { 151 struct rib_head *rnh; 152 struct sockaddr *dst; 153 154 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 155 156 dst = info->rti_info[RTAX_DST]; 157 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 158 159 return (rnh); 160 } 161 162 #if defined(INET) && defined(INET6) 163 bool 164 rib_can_4o6_nhop(void) 165 { 166 return (!!V_rib_route_ipv6_nexthop); 167 } 168 #endif 169 170 #ifdef ROUTE_MPATH 171 static bool 172 rib_can_multipath(struct rib_head *rh) 173 { 174 int result; 175 176 CURVNET_SET(rh->rib_vnet); 177 result = !!V_rib_route_multipath; 178 CURVNET_RESTORE(); 179 180 return (result); 181 } 182 183 /* 184 * Check is nhop is multipath-eligible. 185 * Avoid nhops without gateways and redirects. 186 * 187 * Returns 1 for multipath-eligible nexthop, 188 * 0 otherwise. 189 */ 190 bool 191 nhop_can_multipath(const struct nhop_object *nh) 192 { 193 194 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 195 return (1); 196 if ((nh->nh_flags & NHF_GATEWAY) == 0) 197 return (0); 198 if ((nh->nh_flags & NHF_REDIRECT) != 0) 199 return (0); 200 201 return (1); 202 } 203 #endif 204 205 static int 206 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 207 { 208 uint32_t weight; 209 210 if (info->rti_mflags & RTV_WEIGHT) 211 weight = info->rti_rmx->rmx_weight; 212 else 213 weight = default_weight; 214 /* Keep upper 1 byte for adm distance purposes */ 215 if (weight > RT_MAX_WEIGHT) 216 weight = RT_MAX_WEIGHT; 217 else if (weight == 0) 218 weight = default_weight; 219 220 return (weight); 221 } 222 223 /* 224 * File-local concept for distingushing between the normal and 225 * RTF_PINNED routes tha can override the "normal" one. 226 */ 227 #define NH_PRIORITY_HIGH 2 228 #define NH_PRIORITY_NORMAL 1 229 static int 230 get_prio_from_info(const struct rt_addrinfo *info) 231 { 232 if (info->rti_flags & RTF_PINNED) 233 return (NH_PRIORITY_HIGH); 234 return (NH_PRIORITY_NORMAL); 235 } 236 237 static int 238 nhop_get_prio(const struct nhop_object *nh) 239 { 240 if (NH_IS_PINNED(nh)) 241 return (NH_PRIORITY_HIGH); 242 return (NH_PRIORITY_NORMAL); 243 } 244 245 /* 246 * Check if specified @gw matches gw data in the nexthop @nh. 247 * 248 * Returns true if matches, false otherwise. 249 */ 250 bool 251 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 252 { 253 254 if (nh->gw_sa.sa_family != gw->sa_family) 255 return (false); 256 257 switch (gw->sa_family) { 258 case AF_INET: 259 return (nh->gw4_sa.sin_addr.s_addr == 260 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 261 case AF_INET6: 262 { 263 const struct sockaddr_in6 *gw6; 264 gw6 = (const struct sockaddr_in6 *)gw; 265 266 /* 267 * Currently (2020-09) IPv6 gws in kernel have their 268 * scope embedded. Once this becomes false, this code 269 * has to be revisited. 270 */ 271 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 272 &gw6->sin6_addr)) 273 return (true); 274 return (false); 275 } 276 case AF_LINK: 277 { 278 const struct sockaddr_dl *sdl; 279 sdl = (const struct sockaddr_dl *)gw; 280 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 281 } 282 default: 283 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 284 } 285 286 /* NOTREACHED */ 287 return (false); 288 } 289 290 /* 291 * Matches all nexthop with given @gw. 292 * Can be used as rib_filter_f callback. 293 */ 294 int 295 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 296 { 297 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 298 299 return (match_nhop_gw(nh, gw)); 300 } 301 302 struct gw_filter_data { 303 const struct sockaddr *gw; 304 int count; 305 }; 306 307 /* 308 * Matches first occurence of the gateway provided in @gwd 309 */ 310 static int 311 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 312 { 313 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 314 315 /* Return only first match to make rtsock happy */ 316 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 317 return (1); 318 return (0); 319 } 320 321 /* 322 * Checks if data in @info matches nexhop @nh. 323 * 324 * Returns 0 on success, 325 * ESRCH if not matched, 326 * ENOENT if filter function returned false 327 */ 328 int 329 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 330 const struct nhop_object *nh) 331 { 332 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 333 334 if (info->rti_filter != NULL) { 335 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 336 return (ENOENT); 337 else 338 return (0); 339 } 340 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 341 return (ESRCH); 342 343 return (0); 344 } 345 346 /* 347 * Runs exact prefix match based on @dst and @netmask. 348 * Returns matched @rtentry if found or NULL. 349 * If rtentry was found, saves nexthop / weight value into @rnd. 350 */ 351 static struct rtentry * 352 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 353 const struct sockaddr *netmask, struct route_nhop_data *rnd) 354 { 355 struct rtentry *rt; 356 357 RIB_LOCK_ASSERT(rnh); 358 359 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 360 if (rt != NULL) { 361 rnd->rnd_nhop = rt->rt_nhop; 362 rnd->rnd_weight = rt->rt_weight; 363 } else { 364 rnd->rnd_nhop = NULL; 365 rnd->rnd_weight = 0; 366 } 367 368 return (rt); 369 } 370 371 struct rtentry * 372 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 373 struct route_nhop_data *rnd) 374 { 375 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 376 } 377 378 /* 379 * Runs exact prefix match based on dst/netmask from @info. 380 * Assumes RIB lock is held. 381 * Returns matched @rtentry if found or NULL. 382 * If rtentry was found, saves nexthop / weight value into @rnd. 383 */ 384 struct rtentry * 385 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 386 struct route_nhop_data *rnd) 387 { 388 struct rtentry *rt; 389 390 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 391 info->rti_info[RTAX_NETMASK], rnd); 392 393 return (rt); 394 } 395 396 const struct rtentry * 397 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen, 398 struct route_nhop_data *rnd) 399 { 400 union sockaddr_union mask_storage; 401 struct sockaddr *netmask = &mask_storage.sa; 402 403 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) 404 return (lookup_prefix_bysa(rnh, dst, netmask, rnd)); 405 return (NULL); 406 } 407 408 static bool 409 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 410 struct sockaddr **pmask) 411 { 412 if (plen == -1) { 413 *pmask = NULL; 414 return (true); 415 } 416 417 switch (family) { 418 #ifdef INET 419 case AF_INET: 420 { 421 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 422 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 423 424 memset(mask, 0, sizeof(*mask)); 425 mask->sin_family = family; 426 mask->sin_len = sizeof(*mask); 427 if (plen == 32) 428 *pmask = NULL; 429 else if (plen > 32 || plen < 0) 430 return (false); 431 else { 432 uint32_t daddr, maddr; 433 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 434 mask->sin_addr.s_addr = maddr; 435 daddr = dst->sin_addr.s_addr; 436 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 437 dst->sin_addr.s_addr = daddr; 438 } 439 return (true); 440 } 441 break; 442 #endif 443 #ifdef INET6 444 case AF_INET6: 445 { 446 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 447 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 448 449 memset(mask, 0, sizeof(*mask)); 450 mask->sin6_family = family; 451 mask->sin6_len = sizeof(*mask); 452 if (plen == 128) 453 *pmask = NULL; 454 else if (plen > 128 || plen < 0) 455 return (false); 456 else { 457 ip6_writemask(&mask->sin6_addr, plen); 458 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 459 } 460 return (true); 461 } 462 break; 463 #endif 464 } 465 return (false); 466 } 467 468 /* 469 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 470 * to the routing table. 471 * 472 * @fibnum: verified kernel rtable id to insert route to 473 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 474 * @plen: prefix length (or -1 if host route or not applicable for AF) 475 * @op_flags: combination of RTM_F_ flags 476 * @rc: storage to report operation result 477 * 478 * Returns 0 on success. 479 */ 480 int 481 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 482 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 483 { 484 union sockaddr_union mask_storage; 485 struct sockaddr *netmask = &mask_storage.sa; 486 struct rtentry *rt = NULL; 487 488 NET_EPOCH_ASSERT(); 489 490 bzero(rc, sizeof(struct rib_cmd_info)); 491 rc->rc_cmd = RTM_ADD; 492 493 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 494 if (rnh == NULL) 495 return (EAFNOSUPPORT); 496 497 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 498 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 499 return (EINVAL); 500 } 501 502 if (op_flags & RTM_F_CREATE) { 503 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 504 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 505 return (ENOMEM); 506 } 507 } else { 508 struct route_nhop_data rnd_tmp; 509 RIB_RLOCK_TRACKER; 510 511 RIB_RLOCK(rnh); 512 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp); 513 RIB_RUNLOCK(rnh); 514 515 if (rt == NULL) 516 return (ESRCH); 517 } 518 519 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 520 } 521 522 /* 523 * Attempts to delete @dst/plen prefix matching gateway @gw from the 524 * routing rable. 525 * 526 * @fibnum: rtable id to remove route from 527 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 528 * @plen: prefix length (or -1 if host route or not applicable for AF) 529 * @gw: gateway to match 530 * @op_flags: combination of RTM_F_ flags 531 * @rc: storage to report operation result 532 * 533 * Returns 0 on success. 534 */ 535 int 536 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 537 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 538 { 539 struct gw_filter_data gwd = { .gw = gw }; 540 541 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 542 } 543 544 /* 545 * Attempts to delete @dst/plen prefix matching @filter_func from the 546 * routing rable. 547 * 548 * @fibnum: rtable id to remove route from 549 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 550 * @plen: prefix length (or -1 if host route or not applicable for AF) 551 * @filter_func: func to be called for each nexthop of the prefix for matching 552 * @filter_arg: argument to pass to @filter_func 553 * @op_flags: combination of RTM_F_ flags 554 * @rc: storage to report operation result 555 * 556 * Returns 0 on success. 557 */ 558 int 559 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 560 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 561 struct rib_cmd_info *rc) 562 { 563 union sockaddr_union mask_storage; 564 struct sockaddr *netmask = &mask_storage.sa; 565 int error; 566 567 NET_EPOCH_ASSERT(); 568 569 bzero(rc, sizeof(struct rib_cmd_info)); 570 rc->rc_cmd = RTM_DELETE; 571 572 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 573 if (rnh == NULL) 574 return (EAFNOSUPPORT); 575 576 if (dst->sa_len > sizeof(mask_storage)) { 577 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 578 return (EINVAL); 579 } 580 581 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 582 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 583 return (EINVAL); 584 } 585 586 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 587 588 RIB_WLOCK(rnh); 589 struct route_nhop_data rnd; 590 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 591 if (rt != NULL) { 592 error = rt_delete_conditional(rnh, rt, prio, filter_func, 593 filter_arg, rc); 594 } else 595 error = ESRCH; 596 RIB_WUNLOCK(rnh); 597 598 if (error != 0) 599 return (error); 600 601 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 602 603 if (rc->rc_cmd == RTM_DELETE) 604 rt_free(rc->rc_rt); 605 #ifdef ROUTE_MPATH 606 else { 607 /* 608 * Deleting 1 path may result in RTM_CHANGE to 609 * a different mpath group/nhop. 610 * Free old mpath group. 611 */ 612 nhop_free_any(rc->rc_nh_old); 613 } 614 #endif 615 616 return (0); 617 } 618 619 /* 620 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 621 * @rt: route to copy. 622 * @rnd_src: nhop and weight. Multipath routes are not supported 623 * @rh_dst: target rtable. 624 * @rc: operation result storage 625 * 626 * Return 0 on success. 627 */ 628 int 629 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 630 struct rib_head *rh_dst, struct rib_cmd_info *rc) 631 { 632 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 633 int error; 634 635 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 636 637 IF_DEBUG_LEVEL(LOG_DEBUG2) { 638 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 639 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 640 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 641 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 642 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 643 } 644 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 645 if (nh == NULL) { 646 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 647 return (ENOMEM); 648 } 649 nhop_copy(nh, rnd_src->rnd_nhop); 650 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop)); 651 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 652 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 653 if (error != 0) { 654 FIB_RH_LOG(LOG_INFO, rh_dst, 655 "unable to finalize new nexthop: error %d", error); 656 return (ENOMEM); 657 } 658 659 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 660 if (rt_new == NULL) { 661 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 662 nhop_free(nh); 663 return (ENOMEM); 664 } 665 666 struct route_nhop_data rnd = { 667 .rnd_nhop = nh, 668 .rnd_weight = rnd_src->rnd_weight 669 }; 670 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 671 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 672 673 if (error != 0) { 674 IF_DEBUG_LEVEL(LOG_DEBUG2) { 675 char buf[NHOP_PRINT_BUFSIZE]; 676 rt_print_buf(rt_new, buf, sizeof(buf)); 677 FIB_RH_LOG(LOG_DEBUG, rh_dst, 678 "Unable to add route %s: error %d", buf, error); 679 } 680 nhop_free(nh); 681 rt_free_immediate(rt_new); 682 } 683 return (error); 684 } 685 686 /* 687 * Adds route defined by @info into the kernel table specified by @fibnum and 688 * sa_family in @info->rti_info[RTAX_DST]. 689 * 690 * Returns 0 on success and fills in operation metadata into @rc. 691 */ 692 int 693 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 694 struct rib_cmd_info *rc) 695 { 696 struct rib_head *rnh; 697 int error; 698 699 NET_EPOCH_ASSERT(); 700 701 rnh = get_rnh(fibnum, info); 702 if (rnh == NULL) 703 return (EAFNOSUPPORT); 704 705 /* 706 * Check consistency between RTF_HOST flag and netmask 707 * existence. 708 */ 709 if (info->rti_flags & RTF_HOST) 710 info->rti_info[RTAX_NETMASK] = NULL; 711 else if (info->rti_info[RTAX_NETMASK] == NULL) { 712 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 713 return (EINVAL); 714 } 715 716 bzero(rc, sizeof(struct rib_cmd_info)); 717 rc->rc_cmd = RTM_ADD; 718 719 error = add_route_byinfo(rnh, info, rc); 720 if (error == 0) 721 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 722 723 return (error); 724 } 725 726 static int 727 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 728 struct rib_cmd_info *rc) 729 { 730 struct route_nhop_data rnd_add; 731 struct nhop_object *nh; 732 struct rtentry *rt; 733 struct sockaddr *dst, *gateway, *netmask; 734 int error; 735 736 dst = info->rti_info[RTAX_DST]; 737 gateway = info->rti_info[RTAX_GATEWAY]; 738 netmask = info->rti_info[RTAX_NETMASK]; 739 740 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 741 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 742 return (EINVAL); 743 } 744 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 745 FIB_RH_LOG(LOG_DEBUG, rnh, 746 "error: invalid dst/gateway family combination (%d, %d)", 747 dst->sa_family, gateway->sa_family); 748 return (EINVAL); 749 } 750 751 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 752 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 753 dst->sa_len); 754 return (EINVAL); 755 } 756 757 if (info->rti_ifa == NULL) { 758 error = rt_getifa_fib(info, rnh->rib_fibnum); 759 if (error) 760 return (error); 761 } 762 763 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 764 return (ENOBUFS); 765 766 error = nhop_create_from_info(rnh, info, &nh); 767 if (error != 0) { 768 rt_free_immediate(rt); 769 return (error); 770 } 771 772 rnd_add.rnd_nhop = nh; 773 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 774 775 int op_flags = RTM_F_CREATE; 776 777 /* 778 * Set the desired action when the route already exists: 779 * If RTF_PINNED is present, assume the direct kernel routes that cannot be multipath. 780 * Otherwise, append the path. 781 */ 782 op_flags |= (info->rti_flags & RTF_PINNED) ? RTM_F_REPLACE : RTM_F_APPEND; 783 784 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 785 } 786 787 static int 788 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 789 int op_flags, struct rib_cmd_info *rc) 790 { 791 struct route_nhop_data rnd_orig; 792 struct nhop_object *nh; 793 struct rtentry *rt_orig; 794 int error = 0; 795 796 MPASS(rt != NULL); 797 798 nh = rnd_add->rnd_nhop; 799 800 RIB_WLOCK(rnh); 801 802 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 803 804 if (rt_orig == NULL) { 805 if (op_flags & RTM_F_CREATE) 806 error = add_route(rnh, rt, rnd_add, rc); 807 else 808 error = ESRCH; /* no entry but creation was not required */ 809 RIB_WUNLOCK(rnh); 810 if (error != 0) 811 goto out; 812 return (0); 813 } 814 815 if (op_flags & RTM_F_EXCL) { 816 /* We have existing route in the RIB but not allowed to replace. */ 817 RIB_WUNLOCK(rnh); 818 error = EEXIST; 819 goto out; 820 } 821 822 /* Now either append or replace */ 823 if (op_flags & RTM_F_REPLACE) { 824 if (nhop_get_prio(rnd_orig.rnd_nhop) == NH_PRIORITY_HIGH) { 825 /* Old path is "better" (e.g. has PINNED flag set) */ 826 RIB_WUNLOCK(rnh); 827 error = EEXIST; 828 goto out; 829 } 830 change_route(rnh, rt_orig, rnd_add, rc); 831 RIB_WUNLOCK(rnh); 832 nh = rc->rc_nh_old; 833 goto out; 834 } 835 836 RIB_WUNLOCK(rnh); 837 838 #ifdef ROUTE_MPATH 839 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 840 nhop_can_multipath(rnd_add->rnd_nhop) && 841 nhop_can_multipath(rnd_orig.rnd_nhop)) { 842 843 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 844 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 845 op_flags, rc); 846 if (error != EAGAIN) 847 break; 848 RTSTAT_INC(rts_add_retry); 849 } 850 851 /* 852 * Original nhop reference is unused in any case. 853 */ 854 nhop_free_any(rnd_add->rnd_nhop); 855 if (op_flags & RTM_F_CREATE) { 856 if (error != 0 || rc->rc_cmd != RTM_ADD) 857 rt_free_immediate(rt); 858 } 859 return (error); 860 } 861 #endif 862 /* Out of options - free state and return error */ 863 error = EEXIST; 864 out: 865 if (op_flags & RTM_F_CREATE) 866 rt_free_immediate(rt); 867 nhop_free_any(nh); 868 869 return (error); 870 } 871 872 #ifdef ROUTE_MPATH 873 static int 874 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 875 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 876 int op_flags, struct rib_cmd_info *rc) 877 { 878 RIB_RLOCK_TRACKER; 879 struct route_nhop_data rnd_new; 880 int error = 0; 881 882 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 883 if (error != 0) { 884 if (error == EAGAIN) { 885 /* 886 * Group creation failed, most probably because 887 * @rnd_orig data got scheduled for deletion. 888 * Refresh @rnd_orig data and retry. 889 */ 890 RIB_RLOCK(rnh); 891 lookup_prefix_rt(rnh, rt, rnd_orig); 892 RIB_RUNLOCK(rnh); 893 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 894 /* In this iteration route doesn't exist */ 895 error = ENOENT; 896 } 897 } 898 return (error); 899 } 900 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 901 if (error != 0) 902 return (error); 903 904 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 905 /* 906 * First multipath route got installed. Enable local 907 * outbound connections hashing. 908 */ 909 if (bootverbose) 910 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 911 V_fib_hash_outbound = 1; 912 } 913 914 return (0); 915 } 916 #endif 917 918 /* 919 * Removes route defined by @info from the kernel table specified by @fibnum and 920 * sa_family in @info->rti_info[RTAX_DST]. 921 * 922 * Returns 0 on success and fills in operation metadata into @rc. 923 */ 924 int 925 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 926 { 927 struct rib_head *rnh; 928 struct sockaddr *dst, *netmask; 929 struct sockaddr_storage mdst; 930 int error; 931 932 NET_EPOCH_ASSERT(); 933 934 rnh = get_rnh(fibnum, info); 935 if (rnh == NULL) 936 return (EAFNOSUPPORT); 937 938 bzero(rc, sizeof(struct rib_cmd_info)); 939 rc->rc_cmd = RTM_DELETE; 940 941 dst = info->rti_info[RTAX_DST]; 942 netmask = info->rti_info[RTAX_NETMASK]; 943 944 if (netmask != NULL) { 945 /* Ensure @dst is always properly masked */ 946 if (dst->sa_len > sizeof(mdst)) { 947 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 948 return (EINVAL); 949 } 950 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 951 dst = (struct sockaddr *)&mdst; 952 } 953 954 rib_filter_f_t *filter_func = NULL; 955 void *filter_arg = NULL; 956 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 957 958 if (info->rti_filter != NULL) { 959 filter_func = info->rti_filter; 960 filter_arg = info->rti_filterdata; 961 } else if (gwd.gw != NULL) { 962 filter_func = match_gw_one; 963 filter_arg = &gwd; 964 } 965 966 int prio = get_prio_from_info(info); 967 968 RIB_WLOCK(rnh); 969 struct route_nhop_data rnd; 970 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 971 if (rt != NULL) { 972 error = rt_delete_conditional(rnh, rt, prio, filter_func, 973 filter_arg, rc); 974 } else 975 error = ESRCH; 976 RIB_WUNLOCK(rnh); 977 978 if (error != 0) 979 return (error); 980 981 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 982 983 if (rc->rc_cmd == RTM_DELETE) 984 rt_free(rc->rc_rt); 985 #ifdef ROUTE_MPATH 986 else { 987 /* 988 * Deleting 1 path may result in RTM_CHANGE to 989 * a different mpath group/nhop. 990 * Free old mpath group. 991 */ 992 nhop_free_any(rc->rc_nh_old); 993 } 994 #endif 995 996 return (0); 997 } 998 999 /* 1000 * Conditionally unlinks rtentry paths from @rnh matching @cb. 1001 * Returns 0 on success with operation result stored in @rc. 1002 * On error, returns: 1003 * ESRCH - if prefix was not found or filter function failed to match 1004 * EADDRINUSE - if trying to delete higher priority route. 1005 */ 1006 static int 1007 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 1008 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 1009 { 1010 struct nhop_object *nh = rt->rt_nhop; 1011 1012 #ifdef ROUTE_MPATH 1013 if (NH_IS_NHGRP(nh)) { 1014 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 1015 struct route_nhop_data rnd; 1016 int error; 1017 1018 if (cb == NULL) 1019 return (ESRCH); 1020 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 1021 if (error == 0) { 1022 if (rnd.rnd_nhgrp == nhg) { 1023 /* No match, unreference new group and return. */ 1024 nhop_free_any(rnd.rnd_nhop); 1025 return (ESRCH); 1026 } 1027 error = change_route(rnh, rt, &rnd, rc); 1028 } 1029 return (error); 1030 } 1031 #endif 1032 if (cb != NULL && !cb(rt, nh, cbdata)) 1033 return (ESRCH); 1034 1035 if (prio < nhop_get_prio(nh)) 1036 return (EADDRINUSE); 1037 1038 return (delete_route(rnh, rt, rc)); 1039 } 1040 1041 int 1042 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1043 struct rib_cmd_info *rc) 1044 { 1045 RIB_RLOCK_TRACKER; 1046 struct route_nhop_data rnd_orig; 1047 struct rib_head *rnh; 1048 struct rtentry *rt; 1049 int error; 1050 1051 NET_EPOCH_ASSERT(); 1052 1053 rnh = get_rnh(fibnum, info); 1054 if (rnh == NULL) 1055 return (EAFNOSUPPORT); 1056 1057 bzero(rc, sizeof(struct rib_cmd_info)); 1058 rc->rc_cmd = RTM_CHANGE; 1059 1060 /* Check if updated gateway exists */ 1061 if ((info->rti_flags & RTF_GATEWAY) && 1062 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1063 1064 /* 1065 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1066 * Remove RTF_GATEWAY to enforce consistency and maintain 1067 * compatibility.. 1068 */ 1069 info->rti_flags &= ~RTF_GATEWAY; 1070 } 1071 1072 /* 1073 * route change is done in multiple steps, with dropping and 1074 * reacquiring lock. In the situations with multiple processes 1075 * changes the same route in can lead to the case when route 1076 * is changed between the steps. Address it by retrying the operation 1077 * multiple times before failing. 1078 */ 1079 1080 RIB_RLOCK(rnh); 1081 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1082 info->rti_info[RTAX_NETMASK], &rnh->head); 1083 1084 if (rt == NULL) { 1085 RIB_RUNLOCK(rnh); 1086 return (ESRCH); 1087 } 1088 1089 rnd_orig.rnd_nhop = rt->rt_nhop; 1090 rnd_orig.rnd_weight = rt->rt_weight; 1091 1092 RIB_RUNLOCK(rnh); 1093 1094 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1095 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1096 if (error != EAGAIN) 1097 break; 1098 } 1099 1100 return (error); 1101 } 1102 1103 static int 1104 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1105 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1106 { 1107 int error; 1108 1109 /* 1110 * New gateway could require new ifaddr, ifp; 1111 * flags may also be different; ifp may be specified 1112 * by ll sockaddr when protocol address is ambiguous 1113 */ 1114 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1115 info->rti_info[RTAX_GATEWAY] != NULL) || 1116 info->rti_info[RTAX_IFP] != NULL || 1117 (info->rti_info[RTAX_IFA] != NULL && 1118 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1119 error = rt_getifa_fib(info, rnh->rib_fibnum); 1120 1121 if (error != 0) { 1122 info->rti_ifa = NULL; 1123 return (error); 1124 } 1125 } 1126 1127 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1128 info->rti_ifa = NULL; 1129 1130 return (error); 1131 } 1132 1133 #ifdef ROUTE_MPATH 1134 static int 1135 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1136 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1137 struct rib_cmd_info *rc) 1138 { 1139 int error = 0, found_idx = 0; 1140 struct nhop_object *nh_orig = NULL, *nh_new; 1141 struct route_nhop_data rnd_new = {}; 1142 const struct weightened_nhop *wn = NULL; 1143 struct weightened_nhop *wn_new; 1144 uint32_t num_nhops; 1145 1146 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1147 for (int i = 0; i < num_nhops; i++) { 1148 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1149 nh_orig = wn[i].nh; 1150 found_idx = i; 1151 break; 1152 } 1153 } 1154 1155 if (nh_orig == NULL) 1156 return (ESRCH); 1157 1158 error = change_nhop(rnh, info, nh_orig, &nh_new); 1159 if (error != 0) 1160 return (error); 1161 1162 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1163 M_TEMP, M_NOWAIT | M_ZERO); 1164 if (wn_new == NULL) { 1165 nhop_free(nh_new); 1166 return (EAGAIN); 1167 } 1168 1169 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1170 wn_new[found_idx].nh = nh_new; 1171 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1172 1173 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1174 nhop_free(nh_new); 1175 free(wn_new, M_TEMP); 1176 1177 if (error != 0) 1178 return (error); 1179 1180 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1181 1182 return (error); 1183 } 1184 #endif 1185 1186 static int 1187 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1188 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1189 struct rib_cmd_info *rc) 1190 { 1191 int error = 0; 1192 struct nhop_object *nh_orig; 1193 struct route_nhop_data rnd_new; 1194 1195 nh_orig = rnd_orig->rnd_nhop; 1196 if (nh_orig == NULL) 1197 return (ESRCH); 1198 1199 #ifdef ROUTE_MPATH 1200 if (NH_IS_NHGRP(nh_orig)) 1201 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1202 #endif 1203 1204 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1205 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1206 if (error != 0) 1207 return (error); 1208 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1209 1210 return (error); 1211 } 1212 1213 /* 1214 * Insert @rt with nhop data from @rnd_new to @rnh. 1215 * Returns 0 on success and stores operation results in @rc. 1216 */ 1217 static int 1218 add_route(struct rib_head *rnh, struct rtentry *rt, 1219 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1220 { 1221 struct radix_node *rn; 1222 1223 RIB_WLOCK_ASSERT(rnh); 1224 1225 rt->rt_nhop = rnd->rnd_nhop; 1226 rt->rt_weight = rnd->rnd_weight; 1227 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1228 1229 if (rn != NULL) { 1230 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1231 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1232 1233 /* Finalize notification */ 1234 rib_bump_gen(rnh); 1235 rnh->rnh_prefixes++; 1236 1237 rc->rc_cmd = RTM_ADD; 1238 rc->rc_rt = rt; 1239 rc->rc_nh_old = NULL; 1240 rc->rc_nh_new = rnd->rnd_nhop; 1241 rc->rc_nh_weight = rnd->rnd_weight; 1242 1243 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1244 return (0); 1245 } 1246 1247 /* Existing route or memory allocation failure. */ 1248 return (EEXIST); 1249 } 1250 1251 /* 1252 * Unconditionally deletes @rt from @rnh. 1253 */ 1254 static int 1255 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1256 { 1257 RIB_WLOCK_ASSERT(rnh); 1258 1259 /* Route deletion requested. */ 1260 struct radix_node *rn; 1261 1262 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1263 if (rn == NULL) 1264 return (ESRCH); 1265 rt = RNTORT(rn); 1266 rt->rte_flags &= ~RTF_UP; 1267 1268 rib_bump_gen(rnh); 1269 rnh->rnh_prefixes--; 1270 1271 rc->rc_cmd = RTM_DELETE; 1272 rc->rc_rt = rt; 1273 rc->rc_nh_old = rt->rt_nhop; 1274 rc->rc_nh_new = NULL; 1275 rc->rc_nh_weight = rt->rt_weight; 1276 1277 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1278 1279 return (0); 1280 } 1281 1282 /* 1283 * Switch @rt nhop/weigh to the ones specified in @rnd. 1284 * Returns 0 on success. 1285 */ 1286 int 1287 change_route(struct rib_head *rnh, struct rtentry *rt, 1288 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1289 { 1290 struct nhop_object *nh_orig; 1291 1292 RIB_WLOCK_ASSERT(rnh); 1293 1294 nh_orig = rt->rt_nhop; 1295 1296 if (rnd->rnd_nhop == NULL) 1297 return (delete_route(rnh, rt, rc)); 1298 1299 /* Changing nexthop & weight to a new one */ 1300 rt->rt_nhop = rnd->rnd_nhop; 1301 rt->rt_weight = rnd->rnd_weight; 1302 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1303 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1304 1305 /* Finalize notification */ 1306 rib_bump_gen(rnh); 1307 rc->rc_cmd = RTM_CHANGE; 1308 rc->rc_rt = rt; 1309 rc->rc_nh_old = nh_orig; 1310 rc->rc_nh_new = rnd->rnd_nhop; 1311 rc->rc_nh_weight = rnd->rnd_weight; 1312 1313 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1314 1315 return (0); 1316 } 1317 1318 /* 1319 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1320 * consistent with the current route data. 1321 * Nexthop in @nhd_new is consumed. 1322 */ 1323 int 1324 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1325 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1326 struct rib_cmd_info *rc) 1327 { 1328 struct rtentry *rt_new; 1329 int error = 0; 1330 1331 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1332 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1333 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1334 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1335 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1336 "trying change %s -> %s", buf_old, buf_new); 1337 } 1338 RIB_WLOCK(rnh); 1339 1340 struct route_nhop_data rnd; 1341 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1342 1343 if (rt_new == NULL) { 1344 if (rnd_orig->rnd_nhop == NULL) 1345 error = add_route(rnh, rt, rnd_new, rc); 1346 else { 1347 /* 1348 * Prefix does not exist, which was not our assumption. 1349 * Update @rnd_orig with the new data and return 1350 */ 1351 rnd_orig->rnd_nhop = NULL; 1352 rnd_orig->rnd_weight = 0; 1353 error = EAGAIN; 1354 } 1355 } else { 1356 /* Prefix exists, try to update */ 1357 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1358 /* 1359 * Nhop/mpath group hasn't changed. Flip 1360 * to the new precalculated one and return 1361 */ 1362 error = change_route(rnh, rt_new, rnd_new, rc); 1363 } else { 1364 /* Update and retry */ 1365 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1366 rnd_orig->rnd_weight = rt_new->rt_weight; 1367 error = EAGAIN; 1368 } 1369 } 1370 1371 RIB_WUNLOCK(rnh); 1372 1373 if (error == 0) { 1374 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1375 1376 if (rnd_orig->rnd_nhop != NULL) 1377 nhop_free_any(rnd_orig->rnd_nhop); 1378 1379 } else { 1380 if (rnd_new->rnd_nhop != NULL) 1381 nhop_free_any(rnd_new->rnd_nhop); 1382 } 1383 1384 return (error); 1385 } 1386 1387 /* 1388 * Performs modification of routing table specificed by @action. 1389 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1390 * Needs to be run in network epoch. 1391 * 1392 * Returns 0 on success and fills in @rc with action result. 1393 */ 1394 int 1395 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1396 struct rib_cmd_info *rc) 1397 { 1398 int error; 1399 1400 switch (action) { 1401 case RTM_ADD: 1402 error = rib_add_route(fibnum, info, rc); 1403 break; 1404 case RTM_DELETE: 1405 error = rib_del_route(fibnum, info, rc); 1406 break; 1407 case RTM_CHANGE: 1408 error = rib_change_route(fibnum, info, rc); 1409 break; 1410 default: 1411 error = ENOTSUP; 1412 } 1413 1414 return (error); 1415 } 1416 1417 struct rt_delinfo 1418 { 1419 struct rib_head *rnh; 1420 struct rtentry *head; 1421 rib_filter_f_t *filter_f; 1422 void *filter_arg; 1423 int prio; 1424 struct rib_cmd_info rc; 1425 }; 1426 1427 /* 1428 * Conditionally unlinks rtenties or paths from radix tree based 1429 * on the callback data passed in @arg. 1430 */ 1431 static int 1432 rt_checkdelroute(struct radix_node *rn, void *arg) 1433 { 1434 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1435 struct rtentry *rt = (struct rtentry *)rn; 1436 1437 if (rt_delete_conditional(di->rnh, rt, di->prio, 1438 di->filter_f, di->filter_arg, &di->rc) != 0) 1439 return (0); 1440 1441 /* 1442 * Add deleted rtentries to the list to GC them 1443 * after dropping the lock. 1444 * 1445 * XXX: Delayed notifications not implemented 1446 * for nexthop updates. 1447 */ 1448 if (di->rc.rc_cmd == RTM_DELETE) { 1449 /* Add to the list and return */ 1450 rt->rt_chain = di->head; 1451 di->head = rt; 1452 #ifdef ROUTE_MPATH 1453 } else { 1454 /* 1455 * RTM_CHANGE to a different nexthop or nexthop group. 1456 * Free old multipath group. 1457 */ 1458 nhop_free_any(di->rc.rc_nh_old); 1459 #endif 1460 } 1461 1462 return (0); 1463 } 1464 1465 /* 1466 * Iterates over a routing table specified by @fibnum and @family and 1467 * deletes elements marked by @filter_f. 1468 * @fibnum: rtable id 1469 * @family: AF_ address family 1470 * @filter_f: function returning non-zero value for items to delete 1471 * @arg: data to pass to the @filter_f function 1472 * @report: true if rtsock notification is needed. 1473 */ 1474 void 1475 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1476 bool report) 1477 { 1478 struct rib_head *rnh; 1479 struct rtentry *rt; 1480 struct nhop_object *nh; 1481 struct epoch_tracker et; 1482 1483 rnh = rt_tables_get_rnh(fibnum, family); 1484 if (rnh == NULL) 1485 return; 1486 1487 struct rt_delinfo di = { 1488 .rnh = rnh, 1489 .filter_f = filter_f, 1490 .filter_arg = filter_arg, 1491 .prio = NH_PRIORITY_NORMAL, 1492 }; 1493 1494 NET_EPOCH_ENTER(et); 1495 1496 RIB_WLOCK(rnh); 1497 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1498 RIB_WUNLOCK(rnh); 1499 1500 /* We might have something to reclaim. */ 1501 bzero(&di.rc, sizeof(di.rc)); 1502 di.rc.rc_cmd = RTM_DELETE; 1503 while (di.head != NULL) { 1504 rt = di.head; 1505 di.head = rt->rt_chain; 1506 rt->rt_chain = NULL; 1507 nh = rt->rt_nhop; 1508 1509 di.rc.rc_rt = rt; 1510 di.rc.rc_nh_old = nh; 1511 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1512 1513 if (report) { 1514 #ifdef ROUTE_MPATH 1515 struct nhgrp_object *nhg; 1516 const struct weightened_nhop *wn; 1517 uint32_t num_nhops; 1518 if (NH_IS_NHGRP(nh)) { 1519 nhg = (struct nhgrp_object *)nh; 1520 wn = nhgrp_get_nhops(nhg, &num_nhops); 1521 for (int i = 0; i < num_nhops; i++) 1522 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1523 } else 1524 #endif 1525 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1526 } 1527 rt_free(rt); 1528 } 1529 1530 NET_EPOCH_EXIT(et); 1531 } 1532 1533 static int 1534 rt_delete_unconditional(struct radix_node *rn, void *arg) 1535 { 1536 struct rtentry *rt = RNTORT(rn); 1537 struct rib_head *rnh = (struct rib_head *)arg; 1538 1539 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1540 if (RNTORT(rn) == rt) 1541 rt_free(rt); 1542 1543 return (0); 1544 } 1545 1546 /* 1547 * Removes all routes from the routing table without executing notifications. 1548 * rtentres will be removed after the end of a current epoch. 1549 */ 1550 static void 1551 rib_flush_routes(struct rib_head *rnh) 1552 { 1553 RIB_WLOCK(rnh); 1554 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1555 RIB_WUNLOCK(rnh); 1556 } 1557 1558 void 1559 rib_flush_routes_family(int family) 1560 { 1561 struct rib_head *rnh; 1562 1563 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1564 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1565 rib_flush_routes(rnh); 1566 } 1567 } 1568 1569 const char * 1570 rib_print_family(int family) 1571 { 1572 switch (family) { 1573 case AF_INET: 1574 return ("inet"); 1575 case AF_INET6: 1576 return ("inet6"); 1577 case AF_LINK: 1578 return ("link"); 1579 } 1580 return ("unknown"); 1581 } 1582 1583