1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 #include <netinet6/in6_var.h> 58 59 #define DEBUG_MOD_NAME route_ctl 60 #define DEBUG_MAX_LEVEL LOG_DEBUG 61 #include <net/route/route_debug.h> 62 _DECLARE_DEBUG(LOG_INFO); 63 64 /* 65 * This file contains control plane routing tables functions. 66 * 67 * All functions assumes they are called in net epoch. 68 */ 69 70 union sockaddr_union { 71 struct sockaddr sa; 72 struct sockaddr_in sin; 73 struct sockaddr_in6 sin6; 74 char _buf[32]; 75 }; 76 77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 81 struct rib_cmd_info *rc); 82 83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 85 #ifdef ROUTE_MPATH 86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 88 int op_flags, struct rib_cmd_info *rc); 89 #endif 90 91 static int add_route(struct rib_head *rnh, struct rtentry *rt, 92 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 93 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 94 struct rib_cmd_info *rc); 95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 97 98 static int get_prio_from_info(const struct rt_addrinfo *info); 99 static int nhop_get_prio(const struct nhop_object *nh); 100 101 #ifdef ROUTE_MPATH 102 static bool rib_can_multipath(struct rib_head *rh); 103 #endif 104 105 /* Per-vnet multipath routing configuration */ 106 SYSCTL_DECL(_net_route); 107 #define V_rib_route_multipath VNET(rib_route_multipath) 108 #ifdef ROUTE_MPATH 109 #define _MP_FLAGS CTLFLAG_RW 110 #else 111 #define _MP_FLAGS CTLFLAG_RD 112 #endif 113 VNET_DEFINE(u_int, rib_route_multipath) = 1; 114 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 115 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 116 #undef _MP_FLAGS 117 118 #ifdef ROUTE_MPATH 119 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 120 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 121 &VNET_NAME(fib_hash_outbound), 0, 122 "Compute flowid for locally-originated packets"); 123 124 /* Default entropy to add to the hash calculation for the outbound connections*/ 125 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 126 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 127 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 128 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 129 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 130 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 131 }; 132 #endif 133 134 #if defined(INET) && defined(INET6) 135 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 136 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 137 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 138 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 139 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 140 #endif 141 142 /* Debug bits */ 143 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 144 145 static struct rib_head * 146 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 147 { 148 struct rib_head *rnh; 149 struct sockaddr *dst; 150 151 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 152 153 dst = info->rti_info[RTAX_DST]; 154 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 155 156 return (rnh); 157 } 158 159 #if defined(INET) && defined(INET6) 160 bool 161 rib_can_4o6_nhop(void) 162 { 163 return (!!V_rib_route_ipv6_nexthop); 164 } 165 #endif 166 167 #ifdef ROUTE_MPATH 168 static bool 169 rib_can_multipath(struct rib_head *rh) 170 { 171 int result; 172 173 CURVNET_SET(rh->rib_vnet); 174 result = !!V_rib_route_multipath; 175 CURVNET_RESTORE(); 176 177 return (result); 178 } 179 180 /* 181 * Check is nhop is multipath-eligible. 182 * Avoid nhops without gateways and redirects. 183 * 184 * Returns 1 for multipath-eligible nexthop, 185 * 0 otherwise. 186 */ 187 bool 188 nhop_can_multipath(const struct nhop_object *nh) 189 { 190 191 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 192 return (1); 193 if ((nh->nh_flags & NHF_GATEWAY) == 0) 194 return (0); 195 if ((nh->nh_flags & NHF_REDIRECT) != 0) 196 return (0); 197 198 return (1); 199 } 200 #endif 201 202 static int 203 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 204 { 205 uint32_t weight; 206 207 if (info->rti_mflags & RTV_WEIGHT) 208 weight = info->rti_rmx->rmx_weight; 209 else 210 weight = default_weight; 211 /* Keep upper 1 byte for adm distance purposes */ 212 if (weight > RT_MAX_WEIGHT) 213 weight = RT_MAX_WEIGHT; 214 else if (weight == 0) 215 weight = default_weight; 216 217 return (weight); 218 } 219 220 /* 221 * File-local concept for distingushing between the normal and 222 * RTF_PINNED routes tha can override the "normal" one. 223 */ 224 #define NH_PRIORITY_HIGH 2 225 #define NH_PRIORITY_NORMAL 1 226 static int 227 get_prio_from_info(const struct rt_addrinfo *info) 228 { 229 if (info->rti_flags & RTF_PINNED) 230 return (NH_PRIORITY_HIGH); 231 return (NH_PRIORITY_NORMAL); 232 } 233 234 static int 235 nhop_get_prio(const struct nhop_object *nh) 236 { 237 if (NH_IS_PINNED(nh)) 238 return (NH_PRIORITY_HIGH); 239 return (NH_PRIORITY_NORMAL); 240 } 241 242 /* 243 * Check if specified @gw matches gw data in the nexthop @nh. 244 * 245 * Returns true if matches, false otherwise. 246 */ 247 bool 248 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 249 { 250 251 if (nh->gw_sa.sa_family != gw->sa_family) 252 return (false); 253 254 switch (gw->sa_family) { 255 case AF_INET: 256 return (nh->gw4_sa.sin_addr.s_addr == 257 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 258 case AF_INET6: 259 { 260 const struct sockaddr_in6 *gw6; 261 gw6 = (const struct sockaddr_in6 *)gw; 262 263 /* 264 * Currently (2020-09) IPv6 gws in kernel have their 265 * scope embedded. Once this becomes false, this code 266 * has to be revisited. 267 */ 268 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 269 &gw6->sin6_addr)) 270 return (true); 271 return (false); 272 } 273 case AF_LINK: 274 { 275 const struct sockaddr_dl *sdl; 276 sdl = (const struct sockaddr_dl *)gw; 277 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 278 } 279 default: 280 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 281 } 282 283 /* NOTREACHED */ 284 return (false); 285 } 286 287 /* 288 * Matches all nexthop with given @gw. 289 * Can be used as rib_filter_f callback. 290 */ 291 int 292 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 293 { 294 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 295 296 return (match_nhop_gw(nh, gw)); 297 } 298 299 struct gw_filter_data { 300 const struct sockaddr *gw; 301 int count; 302 }; 303 304 /* 305 * Matches first occurence of the gateway provided in @gwd 306 */ 307 static int 308 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 309 { 310 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 311 312 /* Return only first match to make rtsock happy */ 313 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 314 return (1); 315 return (0); 316 } 317 318 /* 319 * Checks if data in @info matches nexhop @nh. 320 * 321 * Returns 0 on success, 322 * ESRCH if not matched, 323 * ENOENT if filter function returned false 324 */ 325 int 326 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 327 const struct nhop_object *nh) 328 { 329 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 330 331 if (info->rti_filter != NULL) { 332 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 333 return (ENOENT); 334 else 335 return (0); 336 } 337 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 338 return (ESRCH); 339 340 return (0); 341 } 342 343 /* 344 * Runs exact prefix match based on @dst and @netmask. 345 * Returns matched @rtentry if found or NULL. 346 * If rtentry was found, saves nexthop / weight value into @rnd. 347 */ 348 static struct rtentry * 349 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 350 const struct sockaddr *netmask, struct route_nhop_data *rnd) 351 { 352 struct rtentry *rt; 353 354 RIB_LOCK_ASSERT(rnh); 355 356 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 357 if (rt != NULL) { 358 rnd->rnd_nhop = rt->rt_nhop; 359 rnd->rnd_weight = rt->rt_weight; 360 } else { 361 rnd->rnd_nhop = NULL; 362 rnd->rnd_weight = 0; 363 } 364 365 return (rt); 366 } 367 368 struct rtentry * 369 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 370 struct route_nhop_data *rnd) 371 { 372 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 373 } 374 375 /* 376 * Runs exact prefix match based on dst/netmask from @info. 377 * Assumes RIB lock is held. 378 * Returns matched @rtentry if found or NULL. 379 * If rtentry was found, saves nexthop / weight value into @rnd. 380 */ 381 struct rtentry * 382 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 383 struct route_nhop_data *rnd) 384 { 385 struct rtentry *rt; 386 387 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 388 info->rti_info[RTAX_NETMASK], rnd); 389 390 return (rt); 391 } 392 393 static bool 394 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 395 struct sockaddr **pmask) 396 { 397 if (plen == -1) { 398 *pmask = NULL; 399 return (true); 400 } 401 402 switch (family) { 403 #ifdef INET 404 case AF_INET: 405 { 406 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 407 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 408 409 memset(mask, 0, sizeof(*mask)); 410 mask->sin_family = family; 411 mask->sin_len = sizeof(*mask); 412 if (plen == 32) 413 *pmask = NULL; 414 else if (plen > 32 || plen < 0) 415 return (false); 416 else { 417 uint32_t daddr, maddr; 418 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 419 mask->sin_addr.s_addr = maddr; 420 daddr = dst->sin_addr.s_addr; 421 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 422 dst->sin_addr.s_addr = daddr; 423 } 424 return (true); 425 } 426 break; 427 #endif 428 #ifdef INET6 429 case AF_INET6: 430 { 431 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 432 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 433 434 memset(mask, 0, sizeof(*mask)); 435 mask->sin6_family = family; 436 mask->sin6_len = sizeof(*mask); 437 if (plen == 128) 438 *pmask = NULL; 439 else if (plen > 128 || plen < 0) 440 return (false); 441 else { 442 ip6_writemask(&mask->sin6_addr, plen); 443 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 444 } 445 return (true); 446 } 447 break; 448 #endif 449 } 450 return (false); 451 } 452 453 /* 454 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 455 * to the routing table. 456 * 457 * @fibnum: rtable id to insert route to 458 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 459 * @plen: prefix length (or -1 if host route or not applicable for AF) 460 * @op_flags: combination of RTM_F_ flags 461 * @rc: storage to report operation result 462 * 463 * Returns 0 on success. 464 */ 465 int 466 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 467 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 468 { 469 union sockaddr_union mask_storage; 470 struct sockaddr *netmask = &mask_storage.sa; 471 struct rtentry *rt = NULL; 472 473 NET_EPOCH_ASSERT(); 474 475 bzero(rc, sizeof(struct rib_cmd_info)); 476 rc->rc_cmd = RTM_ADD; 477 478 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 479 if (rnh == NULL) 480 return (EAFNOSUPPORT); 481 482 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 483 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 484 return (EINVAL); 485 } 486 487 if (op_flags & RTM_F_CREATE) { 488 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 489 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 490 return (ENOMEM); 491 } 492 } 493 494 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 495 } 496 497 /* 498 * Attempts to delete @dst/plen prefix matching gateway @gw from the 499 * routing rable. 500 * 501 * @fibnum: rtable id to remove route from 502 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 503 * @plen: prefix length (or -1 if host route or not applicable for AF) 504 * @gw: gateway to match 505 * @op_flags: combination of RTM_F_ flags 506 * @rc: storage to report operation result 507 * 508 * Returns 0 on success. 509 */ 510 int 511 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 512 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 513 { 514 struct gw_filter_data gwd = { .gw = gw }; 515 516 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 517 } 518 519 /* 520 * Attempts to delete @dst/plen prefix matching @filter_func from the 521 * routing rable. 522 * 523 * @fibnum: rtable id to remove route from 524 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 525 * @plen: prefix length (or -1 if host route or not applicable for AF) 526 * @filter_func: func to be called for each nexthop of the prefix for matching 527 * @filter_arg: argument to pass to @filter_func 528 * @op_flags: combination of RTM_F_ flags 529 * @rc: storage to report operation result 530 * 531 * Returns 0 on success. 532 */ 533 int 534 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 535 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 536 struct rib_cmd_info *rc) 537 { 538 union sockaddr_union mask_storage; 539 struct sockaddr *netmask = &mask_storage.sa; 540 int error; 541 542 NET_EPOCH_ASSERT(); 543 544 bzero(rc, sizeof(struct rib_cmd_info)); 545 rc->rc_cmd = RTM_DELETE; 546 547 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 548 if (rnh == NULL) 549 return (EAFNOSUPPORT); 550 551 if (dst->sa_len > sizeof(mask_storage)) { 552 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 553 return (EINVAL); 554 } 555 556 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 557 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 558 return (EINVAL); 559 } 560 561 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 562 563 RIB_WLOCK(rnh); 564 struct route_nhop_data rnd; 565 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 566 if (rt != NULL) { 567 error = rt_delete_conditional(rnh, rt, prio, filter_func, 568 filter_arg, rc); 569 } else 570 error = ESRCH; 571 RIB_WUNLOCK(rnh); 572 573 if (error != 0) 574 return (error); 575 576 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 577 578 if (rc->rc_cmd == RTM_DELETE) 579 rt_free(rc->rc_rt); 580 #ifdef ROUTE_MPATH 581 else { 582 /* 583 * Deleting 1 path may result in RTM_CHANGE to 584 * a different mpath group/nhop. 585 * Free old mpath group. 586 */ 587 nhop_free_any(rc->rc_nh_old); 588 } 589 #endif 590 591 return (0); 592 } 593 594 /* 595 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 596 * @rt: route to copy. 597 * @rnd_src: nhop and weight. Multipath routes are not supported 598 * @rh_dst: target rtable. 599 * @rc: operation result storage 600 * 601 * Return 0 on success. 602 */ 603 int 604 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 605 struct rib_head *rh_dst, struct rib_cmd_info *rc) 606 { 607 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 608 int error; 609 610 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 611 612 IF_DEBUG_LEVEL(LOG_DEBUG2) { 613 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 614 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 615 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 616 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 617 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 618 } 619 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 620 if (nh == NULL) { 621 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 622 return (ENOMEM); 623 } 624 nhop_copy(nh, rnd_src->rnd_nhop); 625 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop)); 626 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 627 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 628 if (error != 0) { 629 FIB_RH_LOG(LOG_INFO, rh_dst, 630 "unable to finalize new nexthop: error %d", error); 631 return (ENOMEM); 632 } 633 634 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 635 if (rt_new == NULL) { 636 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 637 nhop_free(nh); 638 return (ENOMEM); 639 } 640 641 struct route_nhop_data rnd = { 642 .rnd_nhop = nh, 643 .rnd_weight = rnd_src->rnd_weight 644 }; 645 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 646 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 647 648 if (error != 0) { 649 IF_DEBUG_LEVEL(LOG_DEBUG2) { 650 char buf[NHOP_PRINT_BUFSIZE]; 651 rt_print_buf(rt_new, buf, sizeof(buf)); 652 FIB_RH_LOG(LOG_DEBUG, rh_dst, 653 "Unable to add route %s: error %d", buf, error); 654 } 655 nhop_free(nh); 656 rt_free_immediate(rt_new); 657 } 658 return (error); 659 } 660 661 /* 662 * Adds route defined by @info into the kernel table specified by @fibnum and 663 * sa_family in @info->rti_info[RTAX_DST]. 664 * 665 * Returns 0 on success and fills in operation metadata into @rc. 666 */ 667 int 668 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 669 struct rib_cmd_info *rc) 670 { 671 struct rib_head *rnh; 672 int error; 673 674 NET_EPOCH_ASSERT(); 675 676 rnh = get_rnh(fibnum, info); 677 if (rnh == NULL) 678 return (EAFNOSUPPORT); 679 680 /* 681 * Check consistency between RTF_HOST flag and netmask 682 * existence. 683 */ 684 if (info->rti_flags & RTF_HOST) 685 info->rti_info[RTAX_NETMASK] = NULL; 686 else if (info->rti_info[RTAX_NETMASK] == NULL) { 687 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 688 return (EINVAL); 689 } 690 691 bzero(rc, sizeof(struct rib_cmd_info)); 692 rc->rc_cmd = RTM_ADD; 693 694 error = add_route_byinfo(rnh, info, rc); 695 if (error == 0) 696 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 697 698 return (error); 699 } 700 701 static int 702 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 703 struct rib_cmd_info *rc) 704 { 705 struct route_nhop_data rnd_add; 706 struct nhop_object *nh; 707 struct rtentry *rt; 708 struct sockaddr *dst, *gateway, *netmask; 709 int error; 710 711 dst = info->rti_info[RTAX_DST]; 712 gateway = info->rti_info[RTAX_GATEWAY]; 713 netmask = info->rti_info[RTAX_NETMASK]; 714 715 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 716 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 717 return (EINVAL); 718 } 719 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 720 FIB_RH_LOG(LOG_DEBUG, rnh, 721 "error: invalid dst/gateway family combination (%d, %d)", 722 dst->sa_family, gateway->sa_family); 723 return (EINVAL); 724 } 725 726 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 727 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 728 dst->sa_len); 729 return (EINVAL); 730 } 731 732 if (info->rti_ifa == NULL) { 733 error = rt_getifa_fib(info, rnh->rib_fibnum); 734 if (error) 735 return (error); 736 } 737 738 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 739 return (ENOBUFS); 740 741 error = nhop_create_from_info(rnh, info, &nh); 742 if (error != 0) { 743 rt_free_immediate(rt); 744 return (error); 745 } 746 747 rnd_add.rnd_nhop = nh; 748 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 749 750 int op_flags = RTM_F_CREATE; 751 if (get_prio_from_info(info) == NH_PRIORITY_HIGH) 752 op_flags |= RTM_F_FORCE; 753 else 754 op_flags |= RTM_F_APPEND; 755 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 756 757 } 758 759 static int 760 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 761 int op_flags, struct rib_cmd_info *rc) 762 { 763 struct route_nhop_data rnd_orig; 764 struct nhop_object *nh; 765 struct rtentry *rt_orig; 766 int error = 0; 767 768 nh = rnd_add->rnd_nhop; 769 770 RIB_WLOCK(rnh); 771 772 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 773 774 if (rt_orig == NULL) { 775 if (op_flags & RTM_F_CREATE) 776 error = add_route(rnh, rt, rnd_add, rc); 777 else 778 error = ESRCH; /* no entry but creation was not required */ 779 RIB_WUNLOCK(rnh); 780 if (error != 0) 781 goto out; 782 return (0); 783 } 784 785 if (op_flags & RTM_F_EXCL) { 786 /* We have existing route in the RIB but not allowed to replace. */ 787 RIB_WUNLOCK(rnh); 788 error = EEXIST; 789 goto out; 790 } 791 792 /* Now either append or replace */ 793 if (op_flags & RTM_F_REPLACE) { 794 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) { 795 /* Old path is "better" (e.g. has PINNED flag set) */ 796 error = EEXIST; 797 goto out; 798 } 799 change_route(rnh, rt_orig, rnd_add, rc); 800 RIB_WUNLOCK(rnh); 801 nh = rc->rc_nh_old; 802 goto out; 803 } 804 805 RIB_WUNLOCK(rnh); 806 807 #ifdef ROUTE_MPATH 808 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 809 nhop_can_multipath(rnd_add->rnd_nhop) && 810 nhop_can_multipath(rnd_orig.rnd_nhop)) { 811 812 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 813 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 814 op_flags, rc); 815 if (error != EAGAIN) 816 break; 817 RTSTAT_INC(rts_add_retry); 818 } 819 820 /* 821 * Original nhop reference is unused in any case. 822 */ 823 nhop_free_any(rnd_add->rnd_nhop); 824 if (op_flags & RTM_F_CREATE) { 825 if (error != 0 || rc->rc_cmd != RTM_ADD) 826 rt_free_immediate(rt); 827 } 828 return (error); 829 } 830 #endif 831 /* Out of options - free state and return error */ 832 error = EEXIST; 833 out: 834 if (op_flags & RTM_F_CREATE) 835 rt_free_immediate(rt); 836 nhop_free_any(nh); 837 838 return (error); 839 } 840 841 #ifdef ROUTE_MPATH 842 static int 843 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 844 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 845 int op_flags, struct rib_cmd_info *rc) 846 { 847 RIB_RLOCK_TRACKER; 848 struct route_nhop_data rnd_new; 849 int error = 0; 850 851 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 852 if (error != 0) { 853 if (error == EAGAIN) { 854 /* 855 * Group creation failed, most probably because 856 * @rnd_orig data got scheduled for deletion. 857 * Refresh @rnd_orig data and retry. 858 */ 859 RIB_RLOCK(rnh); 860 lookup_prefix_rt(rnh, rt, rnd_orig); 861 RIB_RUNLOCK(rnh); 862 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 863 /* In this iteration route doesn't exist */ 864 error = ENOENT; 865 } 866 } 867 return (error); 868 } 869 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 870 if (error != 0) 871 return (error); 872 873 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 874 /* 875 * First multipath route got installed. Enable local 876 * outbound connections hashing. 877 */ 878 if (bootverbose) 879 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 880 V_fib_hash_outbound = 1; 881 } 882 883 return (0); 884 } 885 #endif 886 887 /* 888 * Removes route defined by @info from the kernel table specified by @fibnum and 889 * sa_family in @info->rti_info[RTAX_DST]. 890 * 891 * Returns 0 on success and fills in operation metadata into @rc. 892 */ 893 int 894 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 895 { 896 struct rib_head *rnh; 897 struct sockaddr *dst, *netmask; 898 struct sockaddr_storage mdst; 899 int error; 900 901 NET_EPOCH_ASSERT(); 902 903 rnh = get_rnh(fibnum, info); 904 if (rnh == NULL) 905 return (EAFNOSUPPORT); 906 907 bzero(rc, sizeof(struct rib_cmd_info)); 908 rc->rc_cmd = RTM_DELETE; 909 910 dst = info->rti_info[RTAX_DST]; 911 netmask = info->rti_info[RTAX_NETMASK]; 912 913 if (netmask != NULL) { 914 /* Ensure @dst is always properly masked */ 915 if (dst->sa_len > sizeof(mdst)) { 916 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 917 return (EINVAL); 918 } 919 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 920 dst = (struct sockaddr *)&mdst; 921 } 922 923 rib_filter_f_t *filter_func = NULL; 924 void *filter_arg = NULL; 925 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 926 927 if (info->rti_filter != NULL) { 928 filter_func = info->rti_filter; 929 filter_arg = info->rti_filterdata; 930 } else if (gwd.gw != NULL) { 931 filter_func = match_gw_one; 932 filter_arg = &gwd; 933 } 934 935 int prio = get_prio_from_info(info); 936 937 RIB_WLOCK(rnh); 938 struct route_nhop_data rnd; 939 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 940 if (rt != NULL) { 941 error = rt_delete_conditional(rnh, rt, prio, filter_func, 942 filter_arg, rc); 943 } else 944 error = ESRCH; 945 RIB_WUNLOCK(rnh); 946 947 if (error != 0) 948 return (error); 949 950 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 951 952 if (rc->rc_cmd == RTM_DELETE) 953 rt_free(rc->rc_rt); 954 #ifdef ROUTE_MPATH 955 else { 956 /* 957 * Deleting 1 path may result in RTM_CHANGE to 958 * a different mpath group/nhop. 959 * Free old mpath group. 960 */ 961 nhop_free_any(rc->rc_nh_old); 962 } 963 #endif 964 965 return (0); 966 } 967 968 /* 969 * Conditionally unlinks rtentry paths from @rnh matching @cb. 970 * Returns 0 on success with operation result stored in @rc. 971 * On error, returns: 972 * ESRCH - if prefix was not found or filter function failed to match 973 * EADDRINUSE - if trying to delete higher priority route. 974 */ 975 static int 976 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 977 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 978 { 979 struct nhop_object *nh = rt->rt_nhop; 980 981 #ifdef ROUTE_MPATH 982 if (NH_IS_NHGRP(nh)) { 983 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 984 struct route_nhop_data rnd; 985 int error; 986 987 if (cb == NULL) 988 return (ESRCH); 989 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 990 if (error == 0) { 991 if (rnd.rnd_nhgrp == nhg) { 992 /* No match, unreference new group and return. */ 993 nhop_free_any(rnd.rnd_nhop); 994 return (ESRCH); 995 } 996 error = change_route(rnh, rt, &rnd, rc); 997 } 998 return (error); 999 } 1000 #endif 1001 if (cb != NULL && !cb(rt, nh, cbdata)) 1002 return (ESRCH); 1003 1004 if (prio < nhop_get_prio(nh)) 1005 return (EADDRINUSE); 1006 1007 return (delete_route(rnh, rt, rc)); 1008 } 1009 1010 int 1011 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1012 struct rib_cmd_info *rc) 1013 { 1014 RIB_RLOCK_TRACKER; 1015 struct route_nhop_data rnd_orig; 1016 struct rib_head *rnh; 1017 struct rtentry *rt; 1018 int error; 1019 1020 NET_EPOCH_ASSERT(); 1021 1022 rnh = get_rnh(fibnum, info); 1023 if (rnh == NULL) 1024 return (EAFNOSUPPORT); 1025 1026 bzero(rc, sizeof(struct rib_cmd_info)); 1027 rc->rc_cmd = RTM_CHANGE; 1028 1029 /* Check if updated gateway exists */ 1030 if ((info->rti_flags & RTF_GATEWAY) && 1031 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1032 1033 /* 1034 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1035 * Remove RTF_GATEWAY to enforce consistency and maintain 1036 * compatibility.. 1037 */ 1038 info->rti_flags &= ~RTF_GATEWAY; 1039 } 1040 1041 /* 1042 * route change is done in multiple steps, with dropping and 1043 * reacquiring lock. In the situations with multiple processes 1044 * changes the same route in can lead to the case when route 1045 * is changed between the steps. Address it by retrying the operation 1046 * multiple times before failing. 1047 */ 1048 1049 RIB_RLOCK(rnh); 1050 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1051 info->rti_info[RTAX_NETMASK], &rnh->head); 1052 1053 if (rt == NULL) { 1054 RIB_RUNLOCK(rnh); 1055 return (ESRCH); 1056 } 1057 1058 rnd_orig.rnd_nhop = rt->rt_nhop; 1059 rnd_orig.rnd_weight = rt->rt_weight; 1060 1061 RIB_RUNLOCK(rnh); 1062 1063 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1064 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1065 if (error != EAGAIN) 1066 break; 1067 } 1068 1069 return (error); 1070 } 1071 1072 static int 1073 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1074 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1075 { 1076 int error; 1077 1078 /* 1079 * New gateway could require new ifaddr, ifp; 1080 * flags may also be different; ifp may be specified 1081 * by ll sockaddr when protocol address is ambiguous 1082 */ 1083 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1084 info->rti_info[RTAX_GATEWAY] != NULL) || 1085 info->rti_info[RTAX_IFP] != NULL || 1086 (info->rti_info[RTAX_IFA] != NULL && 1087 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1088 error = rt_getifa_fib(info, rnh->rib_fibnum); 1089 1090 if (error != 0) { 1091 info->rti_ifa = NULL; 1092 return (error); 1093 } 1094 } 1095 1096 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1097 info->rti_ifa = NULL; 1098 1099 return (error); 1100 } 1101 1102 #ifdef ROUTE_MPATH 1103 static int 1104 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1105 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1106 struct rib_cmd_info *rc) 1107 { 1108 int error = 0, found_idx = 0; 1109 struct nhop_object *nh_orig = NULL, *nh_new; 1110 struct route_nhop_data rnd_new = {}; 1111 const struct weightened_nhop *wn = NULL; 1112 struct weightened_nhop *wn_new; 1113 uint32_t num_nhops; 1114 1115 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1116 for (int i = 0; i < num_nhops; i++) { 1117 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1118 nh_orig = wn[i].nh; 1119 found_idx = i; 1120 break; 1121 } 1122 } 1123 1124 if (nh_orig == NULL) 1125 return (ESRCH); 1126 1127 error = change_nhop(rnh, info, nh_orig, &nh_new); 1128 if (error != 0) 1129 return (error); 1130 1131 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1132 M_TEMP, M_NOWAIT | M_ZERO); 1133 if (wn_new == NULL) { 1134 nhop_free(nh_new); 1135 return (EAGAIN); 1136 } 1137 1138 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1139 wn_new[found_idx].nh = nh_new; 1140 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1141 1142 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1143 nhop_free(nh_new); 1144 free(wn_new, M_TEMP); 1145 1146 if (error != 0) 1147 return (error); 1148 1149 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1150 1151 return (error); 1152 } 1153 #endif 1154 1155 static int 1156 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1157 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1158 struct rib_cmd_info *rc) 1159 { 1160 int error = 0; 1161 struct nhop_object *nh_orig; 1162 struct route_nhop_data rnd_new; 1163 1164 nh_orig = rnd_orig->rnd_nhop; 1165 if (nh_orig == NULL) 1166 return (ESRCH); 1167 1168 #ifdef ROUTE_MPATH 1169 if (NH_IS_NHGRP(nh_orig)) 1170 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1171 #endif 1172 1173 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1174 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1175 if (error != 0) 1176 return (error); 1177 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1178 1179 return (error); 1180 } 1181 1182 /* 1183 * Insert @rt with nhop data from @rnd_new to @rnh. 1184 * Returns 0 on success and stores operation results in @rc. 1185 */ 1186 static int 1187 add_route(struct rib_head *rnh, struct rtentry *rt, 1188 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1189 { 1190 struct radix_node *rn; 1191 1192 RIB_WLOCK_ASSERT(rnh); 1193 1194 rt->rt_nhop = rnd->rnd_nhop; 1195 rt->rt_weight = rnd->rnd_weight; 1196 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1197 1198 if (rn != NULL) { 1199 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1200 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1201 1202 /* Finalize notification */ 1203 rib_bump_gen(rnh); 1204 rnh->rnh_prefixes++; 1205 1206 rc->rc_cmd = RTM_ADD; 1207 rc->rc_rt = rt; 1208 rc->rc_nh_old = NULL; 1209 rc->rc_nh_new = rnd->rnd_nhop; 1210 rc->rc_nh_weight = rnd->rnd_weight; 1211 1212 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1213 return (0); 1214 } 1215 1216 /* Existing route or memory allocation failure. */ 1217 return (EEXIST); 1218 } 1219 1220 /* 1221 * Unconditionally deletes @rt from @rnh. 1222 */ 1223 static int 1224 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1225 { 1226 RIB_WLOCK_ASSERT(rnh); 1227 1228 /* Route deletion requested. */ 1229 struct radix_node *rn; 1230 1231 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1232 if (rn == NULL) 1233 return (ESRCH); 1234 rt = RNTORT(rn); 1235 rt->rte_flags &= ~RTF_UP; 1236 1237 rib_bump_gen(rnh); 1238 rnh->rnh_prefixes--; 1239 1240 rc->rc_cmd = RTM_DELETE; 1241 rc->rc_rt = rt; 1242 rc->rc_nh_old = rt->rt_nhop; 1243 rc->rc_nh_new = NULL; 1244 rc->rc_nh_weight = rt->rt_weight; 1245 1246 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1247 1248 return (0); 1249 } 1250 1251 /* 1252 * Switch @rt nhop/weigh to the ones specified in @rnd. 1253 * Returns 0 on success. 1254 */ 1255 int 1256 change_route(struct rib_head *rnh, struct rtentry *rt, 1257 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1258 { 1259 struct nhop_object *nh_orig; 1260 1261 RIB_WLOCK_ASSERT(rnh); 1262 1263 nh_orig = rt->rt_nhop; 1264 1265 if (rnd->rnd_nhop == NULL) 1266 return (delete_route(rnh, rt, rc)); 1267 1268 /* Changing nexthop & weight to a new one */ 1269 rt->rt_nhop = rnd->rnd_nhop; 1270 rt->rt_weight = rnd->rnd_weight; 1271 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1272 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1273 1274 /* Finalize notification */ 1275 rib_bump_gen(rnh); 1276 rc->rc_cmd = RTM_CHANGE; 1277 rc->rc_rt = rt; 1278 rc->rc_nh_old = nh_orig; 1279 rc->rc_nh_new = rnd->rnd_nhop; 1280 rc->rc_nh_weight = rnd->rnd_weight; 1281 1282 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1283 1284 return (0); 1285 } 1286 1287 /* 1288 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1289 * consistent with the current route data. 1290 * Nexthop in @nhd_new is consumed. 1291 */ 1292 int 1293 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1294 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1295 struct rib_cmd_info *rc) 1296 { 1297 struct rtentry *rt_new; 1298 int error = 0; 1299 1300 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1301 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1302 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1303 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1304 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1305 "trying change %s -> %s", buf_old, buf_new); 1306 } 1307 RIB_WLOCK(rnh); 1308 1309 struct route_nhop_data rnd; 1310 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1311 1312 if (rt_new == NULL) { 1313 if (rnd_orig->rnd_nhop == NULL) 1314 error = add_route(rnh, rt, rnd_new, rc); 1315 else { 1316 /* 1317 * Prefix does not exist, which was not our assumption. 1318 * Update @rnd_orig with the new data and return 1319 */ 1320 rnd_orig->rnd_nhop = NULL; 1321 rnd_orig->rnd_weight = 0; 1322 error = EAGAIN; 1323 } 1324 } else { 1325 /* Prefix exists, try to update */ 1326 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1327 /* 1328 * Nhop/mpath group hasn't changed. Flip 1329 * to the new precalculated one and return 1330 */ 1331 error = change_route(rnh, rt_new, rnd_new, rc); 1332 } else { 1333 /* Update and retry */ 1334 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1335 rnd_orig->rnd_weight = rt_new->rt_weight; 1336 error = EAGAIN; 1337 } 1338 } 1339 1340 RIB_WUNLOCK(rnh); 1341 1342 if (error == 0) { 1343 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1344 1345 if (rnd_orig->rnd_nhop != NULL) 1346 nhop_free_any(rnd_orig->rnd_nhop); 1347 1348 } else { 1349 if (rnd_new->rnd_nhop != NULL) 1350 nhop_free_any(rnd_new->rnd_nhop); 1351 } 1352 1353 return (error); 1354 } 1355 1356 /* 1357 * Performs modification of routing table specificed by @action. 1358 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1359 * Needs to be run in network epoch. 1360 * 1361 * Returns 0 on success and fills in @rc with action result. 1362 */ 1363 int 1364 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1365 struct rib_cmd_info *rc) 1366 { 1367 int error; 1368 1369 switch (action) { 1370 case RTM_ADD: 1371 error = rib_add_route(fibnum, info, rc); 1372 break; 1373 case RTM_DELETE: 1374 error = rib_del_route(fibnum, info, rc); 1375 break; 1376 case RTM_CHANGE: 1377 error = rib_change_route(fibnum, info, rc); 1378 break; 1379 default: 1380 error = ENOTSUP; 1381 } 1382 1383 return (error); 1384 } 1385 1386 struct rt_delinfo 1387 { 1388 struct rib_head *rnh; 1389 struct rtentry *head; 1390 rib_filter_f_t *filter_f; 1391 void *filter_arg; 1392 int prio; 1393 struct rib_cmd_info rc; 1394 }; 1395 1396 /* 1397 * Conditionally unlinks rtenties or paths from radix tree based 1398 * on the callback data passed in @arg. 1399 */ 1400 static int 1401 rt_checkdelroute(struct radix_node *rn, void *arg) 1402 { 1403 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1404 struct rtentry *rt = (struct rtentry *)rn; 1405 1406 if (rt_delete_conditional(di->rnh, rt, di->prio, 1407 di->filter_f, di->filter_arg, &di->rc) != 0) 1408 return (0); 1409 1410 /* 1411 * Add deleted rtentries to the list to GC them 1412 * after dropping the lock. 1413 * 1414 * XXX: Delayed notifications not implemented 1415 * for nexthop updates. 1416 */ 1417 if (di->rc.rc_cmd == RTM_DELETE) { 1418 /* Add to the list and return */ 1419 rt->rt_chain = di->head; 1420 di->head = rt; 1421 #ifdef ROUTE_MPATH 1422 } else { 1423 /* 1424 * RTM_CHANGE to a different nexthop or nexthop group. 1425 * Free old multipath group. 1426 */ 1427 nhop_free_any(di->rc.rc_nh_old); 1428 #endif 1429 } 1430 1431 return (0); 1432 } 1433 1434 /* 1435 * Iterates over a routing table specified by @fibnum and @family and 1436 * deletes elements marked by @filter_f. 1437 * @fibnum: rtable id 1438 * @family: AF_ address family 1439 * @filter_f: function returning non-zero value for items to delete 1440 * @arg: data to pass to the @filter_f function 1441 * @report: true if rtsock notification is needed. 1442 */ 1443 void 1444 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1445 bool report) 1446 { 1447 struct rib_head *rnh; 1448 struct rtentry *rt; 1449 struct nhop_object *nh; 1450 struct epoch_tracker et; 1451 1452 rnh = rt_tables_get_rnh(fibnum, family); 1453 if (rnh == NULL) 1454 return; 1455 1456 struct rt_delinfo di = { 1457 .rnh = rnh, 1458 .filter_f = filter_f, 1459 .filter_arg = filter_arg, 1460 .prio = NH_PRIORITY_NORMAL, 1461 }; 1462 1463 NET_EPOCH_ENTER(et); 1464 1465 RIB_WLOCK(rnh); 1466 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1467 RIB_WUNLOCK(rnh); 1468 1469 /* We might have something to reclaim. */ 1470 bzero(&di.rc, sizeof(di.rc)); 1471 di.rc.rc_cmd = RTM_DELETE; 1472 while (di.head != NULL) { 1473 rt = di.head; 1474 di.head = rt->rt_chain; 1475 rt->rt_chain = NULL; 1476 nh = rt->rt_nhop; 1477 1478 di.rc.rc_rt = rt; 1479 di.rc.rc_nh_old = nh; 1480 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1481 1482 if (report) { 1483 #ifdef ROUTE_MPATH 1484 struct nhgrp_object *nhg; 1485 const struct weightened_nhop *wn; 1486 uint32_t num_nhops; 1487 if (NH_IS_NHGRP(nh)) { 1488 nhg = (struct nhgrp_object *)nh; 1489 wn = nhgrp_get_nhops(nhg, &num_nhops); 1490 for (int i = 0; i < num_nhops; i++) 1491 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1492 } else 1493 #endif 1494 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1495 } 1496 rt_free(rt); 1497 } 1498 1499 NET_EPOCH_EXIT(et); 1500 } 1501 1502 static int 1503 rt_delete_unconditional(struct radix_node *rn, void *arg) 1504 { 1505 struct rtentry *rt = RNTORT(rn); 1506 struct rib_head *rnh = (struct rib_head *)arg; 1507 1508 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1509 if (RNTORT(rn) == rt) 1510 rt_free(rt); 1511 1512 return (0); 1513 } 1514 1515 /* 1516 * Removes all routes from the routing table without executing notifications. 1517 * rtentres will be removed after the end of a current epoch. 1518 */ 1519 static void 1520 rib_flush_routes(struct rib_head *rnh) 1521 { 1522 RIB_WLOCK(rnh); 1523 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1524 RIB_WUNLOCK(rnh); 1525 } 1526 1527 void 1528 rib_flush_routes_family(int family) 1529 { 1530 struct rib_head *rnh; 1531 1532 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1533 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1534 rib_flush_routes(rnh); 1535 } 1536 } 1537 1538 const char * 1539 rib_print_family(int family) 1540 { 1541 switch (family) { 1542 case AF_INET: 1543 return ("inet"); 1544 case AF_INET6: 1545 return ("inet6"); 1546 case AF_LINK: 1547 return ("link"); 1548 } 1549 return ("unknown"); 1550 } 1551 1552