1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_private.h> 48 #include <net/if_dl.h> 49 #include <net/vnet.h> 50 #include <net/route.h> 51 #include <net/route/route_ctl.h> 52 #include <net/route/route_var.h> 53 #include <net/route/nhop_utils.h> 54 #include <net/route/nhop.h> 55 #include <net/route/nhop_var.h> 56 #include <netinet/in.h> 57 #include <netinet6/scope6_var.h> 58 #include <netinet6/in6_var.h> 59 60 #define DEBUG_MOD_NAME route_ctl 61 #define DEBUG_MAX_LEVEL LOG_DEBUG 62 #include <net/route/route_debug.h> 63 _DECLARE_DEBUG(LOG_INFO); 64 65 /* 66 * This file contains control plane routing tables functions. 67 * 68 * All functions assumes they are called in net epoch. 69 */ 70 71 union sockaddr_union { 72 struct sockaddr sa; 73 struct sockaddr_in sin; 74 struct sockaddr_in6 sin6; 75 char _buf[32]; 76 }; 77 78 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 79 struct rib_cmd_info *rc); 80 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 81 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 82 struct rib_cmd_info *rc); 83 84 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 85 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 86 #ifdef ROUTE_MPATH 87 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 88 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 89 int op_flags, struct rib_cmd_info *rc); 90 #endif 91 92 static int add_route(struct rib_head *rnh, struct rtentry *rt, 93 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 94 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 95 struct rib_cmd_info *rc); 96 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 97 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 98 99 static int get_prio_from_info(const struct rt_addrinfo *info); 100 static int nhop_get_prio(const struct nhop_object *nh); 101 102 #ifdef ROUTE_MPATH 103 static bool rib_can_multipath(struct rib_head *rh); 104 #endif 105 106 /* Per-vnet multipath routing configuration */ 107 SYSCTL_DECL(_net_route); 108 #define V_rib_route_multipath VNET(rib_route_multipath) 109 #ifdef ROUTE_MPATH 110 #define _MP_FLAGS CTLFLAG_RW 111 #else 112 #define _MP_FLAGS CTLFLAG_RD 113 #endif 114 VNET_DEFINE(u_int, rib_route_multipath) = 1; 115 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 116 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 117 #undef _MP_FLAGS 118 119 #ifdef ROUTE_MPATH 120 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 121 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 122 &VNET_NAME(fib_hash_outbound), 0, 123 "Compute flowid for locally-originated packets"); 124 125 /* Default entropy to add to the hash calculation for the outbound connections*/ 126 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 127 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 128 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 129 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 130 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 131 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 132 }; 133 #endif 134 135 #if defined(INET) && defined(INET6) 136 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 137 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 138 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 139 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 140 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 141 #endif 142 143 /* Debug bits */ 144 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 145 146 static struct rib_head * 147 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 148 { 149 struct rib_head *rnh; 150 struct sockaddr *dst; 151 152 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 153 154 dst = info->rti_info[RTAX_DST]; 155 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 156 157 return (rnh); 158 } 159 160 #if defined(INET) && defined(INET6) 161 bool 162 rib_can_4o6_nhop(void) 163 { 164 return (!!V_rib_route_ipv6_nexthop); 165 } 166 #endif 167 168 #ifdef ROUTE_MPATH 169 static bool 170 rib_can_multipath(struct rib_head *rh) 171 { 172 int result; 173 174 CURVNET_SET(rh->rib_vnet); 175 result = !!V_rib_route_multipath; 176 CURVNET_RESTORE(); 177 178 return (result); 179 } 180 181 /* 182 * Check is nhop is multipath-eligible. 183 * Avoid nhops without gateways and redirects. 184 * 185 * Returns 1 for multipath-eligible nexthop, 186 * 0 otherwise. 187 */ 188 bool 189 nhop_can_multipath(const struct nhop_object *nh) 190 { 191 192 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 193 return (1); 194 if ((nh->nh_flags & NHF_GATEWAY) == 0) 195 return (0); 196 if ((nh->nh_flags & NHF_REDIRECT) != 0) 197 return (0); 198 199 return (1); 200 } 201 #endif 202 203 static int 204 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 205 { 206 uint32_t weight; 207 208 if (info->rti_mflags & RTV_WEIGHT) 209 weight = info->rti_rmx->rmx_weight; 210 else 211 weight = default_weight; 212 /* Keep upper 1 byte for adm distance purposes */ 213 if (weight > RT_MAX_WEIGHT) 214 weight = RT_MAX_WEIGHT; 215 else if (weight == 0) 216 weight = default_weight; 217 218 return (weight); 219 } 220 221 /* 222 * File-local concept for distingushing between the normal and 223 * RTF_PINNED routes tha can override the "normal" one. 224 */ 225 #define NH_PRIORITY_HIGH 2 226 #define NH_PRIORITY_NORMAL 1 227 static int 228 get_prio_from_info(const struct rt_addrinfo *info) 229 { 230 if (info->rti_flags & RTF_PINNED) 231 return (NH_PRIORITY_HIGH); 232 return (NH_PRIORITY_NORMAL); 233 } 234 235 static int 236 nhop_get_prio(const struct nhop_object *nh) 237 { 238 if (NH_IS_PINNED(nh)) 239 return (NH_PRIORITY_HIGH); 240 return (NH_PRIORITY_NORMAL); 241 } 242 243 /* 244 * Check if specified @gw matches gw data in the nexthop @nh. 245 * 246 * Returns true if matches, false otherwise. 247 */ 248 bool 249 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 250 { 251 252 if (nh->gw_sa.sa_family != gw->sa_family) 253 return (false); 254 255 switch (gw->sa_family) { 256 case AF_INET: 257 return (nh->gw4_sa.sin_addr.s_addr == 258 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 259 case AF_INET6: 260 { 261 const struct sockaddr_in6 *gw6; 262 gw6 = (const struct sockaddr_in6 *)gw; 263 264 /* 265 * Currently (2020-09) IPv6 gws in kernel have their 266 * scope embedded. Once this becomes false, this code 267 * has to be revisited. 268 */ 269 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 270 &gw6->sin6_addr)) 271 return (true); 272 return (false); 273 } 274 case AF_LINK: 275 { 276 const struct sockaddr_dl *sdl; 277 sdl = (const struct sockaddr_dl *)gw; 278 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 279 } 280 default: 281 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 282 } 283 284 /* NOTREACHED */ 285 return (false); 286 } 287 288 /* 289 * Matches all nexthop with given @gw. 290 * Can be used as rib_filter_f callback. 291 */ 292 int 293 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 294 { 295 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 296 297 return (match_nhop_gw(nh, gw)); 298 } 299 300 struct gw_filter_data { 301 const struct sockaddr *gw; 302 int count; 303 }; 304 305 /* 306 * Matches first occurence of the gateway provided in @gwd 307 */ 308 static int 309 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 310 { 311 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 312 313 /* Return only first match to make rtsock happy */ 314 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 315 return (1); 316 return (0); 317 } 318 319 /* 320 * Checks if data in @info matches nexhop @nh. 321 * 322 * Returns 0 on success, 323 * ESRCH if not matched, 324 * ENOENT if filter function returned false 325 */ 326 int 327 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 328 const struct nhop_object *nh) 329 { 330 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 331 332 if (info->rti_filter != NULL) { 333 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 334 return (ENOENT); 335 else 336 return (0); 337 } 338 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 339 return (ESRCH); 340 341 return (0); 342 } 343 344 /* 345 * Runs exact prefix match based on @dst and @netmask. 346 * Returns matched @rtentry if found or NULL. 347 * If rtentry was found, saves nexthop / weight value into @rnd. 348 */ 349 static struct rtentry * 350 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 351 const struct sockaddr *netmask, struct route_nhop_data *rnd) 352 { 353 struct rtentry *rt; 354 355 RIB_LOCK_ASSERT(rnh); 356 357 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 358 if (rt != NULL) { 359 rnd->rnd_nhop = rt->rt_nhop; 360 rnd->rnd_weight = rt->rt_weight; 361 } else { 362 rnd->rnd_nhop = NULL; 363 rnd->rnd_weight = 0; 364 } 365 366 return (rt); 367 } 368 369 struct rtentry * 370 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 371 struct route_nhop_data *rnd) 372 { 373 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 374 } 375 376 /* 377 * Runs exact prefix match based on dst/netmask from @info. 378 * Assumes RIB lock is held. 379 * Returns matched @rtentry if found or NULL. 380 * If rtentry was found, saves nexthop / weight value into @rnd. 381 */ 382 struct rtentry * 383 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 384 struct route_nhop_data *rnd) 385 { 386 struct rtentry *rt; 387 388 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 389 info->rti_info[RTAX_NETMASK], rnd); 390 391 return (rt); 392 } 393 394 static bool 395 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 396 struct sockaddr **pmask) 397 { 398 if (plen == -1) { 399 *pmask = NULL; 400 return (true); 401 } 402 403 switch (family) { 404 #ifdef INET 405 case AF_INET: 406 { 407 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 408 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 409 410 memset(mask, 0, sizeof(*mask)); 411 mask->sin_family = family; 412 mask->sin_len = sizeof(*mask); 413 if (plen == 32) 414 *pmask = NULL; 415 else if (plen > 32 || plen < 0) 416 return (false); 417 else { 418 uint32_t daddr, maddr; 419 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 420 mask->sin_addr.s_addr = maddr; 421 daddr = dst->sin_addr.s_addr; 422 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 423 dst->sin_addr.s_addr = daddr; 424 } 425 return (true); 426 } 427 break; 428 #endif 429 #ifdef INET6 430 case AF_INET6: 431 { 432 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 433 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 434 435 memset(mask, 0, sizeof(*mask)); 436 mask->sin6_family = family; 437 mask->sin6_len = sizeof(*mask); 438 if (plen == 128) 439 *pmask = NULL; 440 else if (plen > 128 || plen < 0) 441 return (false); 442 else { 443 ip6_writemask(&mask->sin6_addr, plen); 444 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 445 } 446 return (true); 447 } 448 break; 449 #endif 450 } 451 return (false); 452 } 453 454 /* 455 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 456 * to the routing table. 457 * 458 * @fibnum: rtable id to insert route to 459 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 460 * @plen: prefix length (or -1 if host route or not applicable for AF) 461 * @op_flags: combination of RTM_F_ flags 462 * @rc: storage to report operation result 463 * 464 * Returns 0 on success. 465 */ 466 int 467 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 468 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 469 { 470 union sockaddr_union mask_storage; 471 struct sockaddr *netmask = &mask_storage.sa; 472 struct rtentry *rt = NULL; 473 474 NET_EPOCH_ASSERT(); 475 476 bzero(rc, sizeof(struct rib_cmd_info)); 477 rc->rc_cmd = RTM_ADD; 478 479 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 480 if (rnh == NULL) 481 return (EAFNOSUPPORT); 482 483 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 484 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 485 return (EINVAL); 486 } 487 488 if (op_flags & RTM_F_CREATE) { 489 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 490 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 491 return (ENOMEM); 492 } 493 } 494 495 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 496 } 497 498 /* 499 * Attempts to delete @dst/plen prefix matching gateway @gw from the 500 * routing rable. 501 * 502 * @fibnum: rtable id to remove route from 503 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 504 * @plen: prefix length (or -1 if host route or not applicable for AF) 505 * @gw: gateway to match 506 * @op_flags: combination of RTM_F_ flags 507 * @rc: storage to report operation result 508 * 509 * Returns 0 on success. 510 */ 511 int 512 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 513 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 514 { 515 struct gw_filter_data gwd = { .gw = gw }; 516 517 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 518 } 519 520 /* 521 * Attempts to delete @dst/plen prefix matching @filter_func from the 522 * routing rable. 523 * 524 * @fibnum: rtable id to remove route from 525 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 526 * @plen: prefix length (or -1 if host route or not applicable for AF) 527 * @filter_func: func to be called for each nexthop of the prefix for matching 528 * @filter_arg: argument to pass to @filter_func 529 * @op_flags: combination of RTM_F_ flags 530 * @rc: storage to report operation result 531 * 532 * Returns 0 on success. 533 */ 534 int 535 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 536 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 537 struct rib_cmd_info *rc) 538 { 539 union sockaddr_union mask_storage; 540 struct sockaddr *netmask = &mask_storage.sa; 541 int error; 542 543 NET_EPOCH_ASSERT(); 544 545 bzero(rc, sizeof(struct rib_cmd_info)); 546 rc->rc_cmd = RTM_DELETE; 547 548 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 549 if (rnh == NULL) 550 return (EAFNOSUPPORT); 551 552 if (dst->sa_len > sizeof(mask_storage)) { 553 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 554 return (EINVAL); 555 } 556 557 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 558 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 559 return (EINVAL); 560 } 561 562 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 563 564 RIB_WLOCK(rnh); 565 struct route_nhop_data rnd; 566 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 567 if (rt != NULL) { 568 error = rt_delete_conditional(rnh, rt, prio, filter_func, 569 filter_arg, rc); 570 } else 571 error = ESRCH; 572 RIB_WUNLOCK(rnh); 573 574 if (error != 0) 575 return (error); 576 577 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 578 579 if (rc->rc_cmd == RTM_DELETE) 580 rt_free(rc->rc_rt); 581 #ifdef ROUTE_MPATH 582 else { 583 /* 584 * Deleting 1 path may result in RTM_CHANGE to 585 * a different mpath group/nhop. 586 * Free old mpath group. 587 */ 588 nhop_free_any(rc->rc_nh_old); 589 } 590 #endif 591 592 return (0); 593 } 594 595 /* 596 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 597 * @rt: route to copy. 598 * @rnd_src: nhop and weight. Multipath routes are not supported 599 * @rh_dst: target rtable. 600 * @rc: operation result storage 601 * 602 * Return 0 on success. 603 */ 604 int 605 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 606 struct rib_head *rh_dst, struct rib_cmd_info *rc) 607 { 608 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 609 int error; 610 611 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 612 613 IF_DEBUG_LEVEL(LOG_DEBUG2) { 614 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 615 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 616 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 617 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 618 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 619 } 620 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 621 if (nh == NULL) { 622 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 623 return (ENOMEM); 624 } 625 nhop_copy(nh, rnd_src->rnd_nhop); 626 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop)); 627 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 628 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 629 if (error != 0) { 630 FIB_RH_LOG(LOG_INFO, rh_dst, 631 "unable to finalize new nexthop: error %d", error); 632 return (ENOMEM); 633 } 634 635 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 636 if (rt_new == NULL) { 637 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 638 nhop_free(nh); 639 return (ENOMEM); 640 } 641 642 struct route_nhop_data rnd = { 643 .rnd_nhop = nh, 644 .rnd_weight = rnd_src->rnd_weight 645 }; 646 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 647 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 648 649 if (error != 0) { 650 IF_DEBUG_LEVEL(LOG_DEBUG2) { 651 char buf[NHOP_PRINT_BUFSIZE]; 652 rt_print_buf(rt_new, buf, sizeof(buf)); 653 FIB_RH_LOG(LOG_DEBUG, rh_dst, 654 "Unable to add route %s: error %d", buf, error); 655 } 656 nhop_free(nh); 657 rt_free_immediate(rt_new); 658 } 659 return (error); 660 } 661 662 /* 663 * Adds route defined by @info into the kernel table specified by @fibnum and 664 * sa_family in @info->rti_info[RTAX_DST]. 665 * 666 * Returns 0 on success and fills in operation metadata into @rc. 667 */ 668 int 669 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 670 struct rib_cmd_info *rc) 671 { 672 struct rib_head *rnh; 673 int error; 674 675 NET_EPOCH_ASSERT(); 676 677 rnh = get_rnh(fibnum, info); 678 if (rnh == NULL) 679 return (EAFNOSUPPORT); 680 681 /* 682 * Check consistency between RTF_HOST flag and netmask 683 * existence. 684 */ 685 if (info->rti_flags & RTF_HOST) 686 info->rti_info[RTAX_NETMASK] = NULL; 687 else if (info->rti_info[RTAX_NETMASK] == NULL) { 688 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 689 return (EINVAL); 690 } 691 692 bzero(rc, sizeof(struct rib_cmd_info)); 693 rc->rc_cmd = RTM_ADD; 694 695 error = add_route_byinfo(rnh, info, rc); 696 if (error == 0) 697 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 698 699 return (error); 700 } 701 702 static int 703 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 704 struct rib_cmd_info *rc) 705 { 706 struct route_nhop_data rnd_add; 707 struct nhop_object *nh; 708 struct rtentry *rt; 709 struct sockaddr *dst, *gateway, *netmask; 710 int error; 711 712 dst = info->rti_info[RTAX_DST]; 713 gateway = info->rti_info[RTAX_GATEWAY]; 714 netmask = info->rti_info[RTAX_NETMASK]; 715 716 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 717 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 718 return (EINVAL); 719 } 720 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 721 FIB_RH_LOG(LOG_DEBUG, rnh, 722 "error: invalid dst/gateway family combination (%d, %d)", 723 dst->sa_family, gateway->sa_family); 724 return (EINVAL); 725 } 726 727 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 728 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 729 dst->sa_len); 730 return (EINVAL); 731 } 732 733 if (info->rti_ifa == NULL) { 734 error = rt_getifa_fib(info, rnh->rib_fibnum); 735 if (error) 736 return (error); 737 } 738 739 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 740 return (ENOBUFS); 741 742 error = nhop_create_from_info(rnh, info, &nh); 743 if (error != 0) { 744 rt_free_immediate(rt); 745 return (error); 746 } 747 748 rnd_add.rnd_nhop = nh; 749 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 750 751 int op_flags = RTM_F_CREATE; 752 if (get_prio_from_info(info) == NH_PRIORITY_HIGH) 753 op_flags |= RTM_F_FORCE; 754 else 755 op_flags |= RTM_F_APPEND; 756 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 757 758 } 759 760 static int 761 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 762 int op_flags, struct rib_cmd_info *rc) 763 { 764 struct route_nhop_data rnd_orig; 765 struct nhop_object *nh; 766 struct rtentry *rt_orig; 767 int error = 0; 768 769 nh = rnd_add->rnd_nhop; 770 771 RIB_WLOCK(rnh); 772 773 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 774 775 if (rt_orig == NULL) { 776 if (op_flags & RTM_F_CREATE) 777 error = add_route(rnh, rt, rnd_add, rc); 778 else 779 error = ESRCH; /* no entry but creation was not required */ 780 RIB_WUNLOCK(rnh); 781 if (error != 0) 782 goto out; 783 return (0); 784 } 785 786 if (op_flags & RTM_F_EXCL) { 787 /* We have existing route in the RIB but not allowed to replace. */ 788 RIB_WUNLOCK(rnh); 789 error = EEXIST; 790 goto out; 791 } 792 793 /* Now either append or replace */ 794 if (op_flags & RTM_F_REPLACE) { 795 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) { 796 /* Old path is "better" (e.g. has PINNED flag set) */ 797 RIB_WUNLOCK(rnh); 798 error = EEXIST; 799 goto out; 800 } 801 change_route(rnh, rt_orig, rnd_add, rc); 802 RIB_WUNLOCK(rnh); 803 nh = rc->rc_nh_old; 804 goto out; 805 } 806 807 RIB_WUNLOCK(rnh); 808 809 #ifdef ROUTE_MPATH 810 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 811 nhop_can_multipath(rnd_add->rnd_nhop) && 812 nhop_can_multipath(rnd_orig.rnd_nhop)) { 813 814 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 815 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 816 op_flags, rc); 817 if (error != EAGAIN) 818 break; 819 RTSTAT_INC(rts_add_retry); 820 } 821 822 /* 823 * Original nhop reference is unused in any case. 824 */ 825 nhop_free_any(rnd_add->rnd_nhop); 826 if (op_flags & RTM_F_CREATE) { 827 if (error != 0 || rc->rc_cmd != RTM_ADD) 828 rt_free_immediate(rt); 829 } 830 return (error); 831 } 832 #endif 833 /* Out of options - free state and return error */ 834 error = EEXIST; 835 out: 836 if (op_flags & RTM_F_CREATE) 837 rt_free_immediate(rt); 838 nhop_free_any(nh); 839 840 return (error); 841 } 842 843 #ifdef ROUTE_MPATH 844 static int 845 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 846 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 847 int op_flags, struct rib_cmd_info *rc) 848 { 849 RIB_RLOCK_TRACKER; 850 struct route_nhop_data rnd_new; 851 int error = 0; 852 853 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 854 if (error != 0) { 855 if (error == EAGAIN) { 856 /* 857 * Group creation failed, most probably because 858 * @rnd_orig data got scheduled for deletion. 859 * Refresh @rnd_orig data and retry. 860 */ 861 RIB_RLOCK(rnh); 862 lookup_prefix_rt(rnh, rt, rnd_orig); 863 RIB_RUNLOCK(rnh); 864 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 865 /* In this iteration route doesn't exist */ 866 error = ENOENT; 867 } 868 } 869 return (error); 870 } 871 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 872 if (error != 0) 873 return (error); 874 875 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 876 /* 877 * First multipath route got installed. Enable local 878 * outbound connections hashing. 879 */ 880 if (bootverbose) 881 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 882 V_fib_hash_outbound = 1; 883 } 884 885 return (0); 886 } 887 #endif 888 889 /* 890 * Removes route defined by @info from the kernel table specified by @fibnum and 891 * sa_family in @info->rti_info[RTAX_DST]. 892 * 893 * Returns 0 on success and fills in operation metadata into @rc. 894 */ 895 int 896 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 897 { 898 struct rib_head *rnh; 899 struct sockaddr *dst, *netmask; 900 struct sockaddr_storage mdst; 901 int error; 902 903 NET_EPOCH_ASSERT(); 904 905 rnh = get_rnh(fibnum, info); 906 if (rnh == NULL) 907 return (EAFNOSUPPORT); 908 909 bzero(rc, sizeof(struct rib_cmd_info)); 910 rc->rc_cmd = RTM_DELETE; 911 912 dst = info->rti_info[RTAX_DST]; 913 netmask = info->rti_info[RTAX_NETMASK]; 914 915 if (netmask != NULL) { 916 /* Ensure @dst is always properly masked */ 917 if (dst->sa_len > sizeof(mdst)) { 918 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 919 return (EINVAL); 920 } 921 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 922 dst = (struct sockaddr *)&mdst; 923 } 924 925 rib_filter_f_t *filter_func = NULL; 926 void *filter_arg = NULL; 927 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 928 929 if (info->rti_filter != NULL) { 930 filter_func = info->rti_filter; 931 filter_arg = info->rti_filterdata; 932 } else if (gwd.gw != NULL) { 933 filter_func = match_gw_one; 934 filter_arg = &gwd; 935 } 936 937 int prio = get_prio_from_info(info); 938 939 RIB_WLOCK(rnh); 940 struct route_nhop_data rnd; 941 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 942 if (rt != NULL) { 943 error = rt_delete_conditional(rnh, rt, prio, filter_func, 944 filter_arg, rc); 945 } else 946 error = ESRCH; 947 RIB_WUNLOCK(rnh); 948 949 if (error != 0) 950 return (error); 951 952 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 953 954 if (rc->rc_cmd == RTM_DELETE) 955 rt_free(rc->rc_rt); 956 #ifdef ROUTE_MPATH 957 else { 958 /* 959 * Deleting 1 path may result in RTM_CHANGE to 960 * a different mpath group/nhop. 961 * Free old mpath group. 962 */ 963 nhop_free_any(rc->rc_nh_old); 964 } 965 #endif 966 967 return (0); 968 } 969 970 /* 971 * Conditionally unlinks rtentry paths from @rnh matching @cb. 972 * Returns 0 on success with operation result stored in @rc. 973 * On error, returns: 974 * ESRCH - if prefix was not found or filter function failed to match 975 * EADDRINUSE - if trying to delete higher priority route. 976 */ 977 static int 978 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 979 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 980 { 981 struct nhop_object *nh = rt->rt_nhop; 982 983 #ifdef ROUTE_MPATH 984 if (NH_IS_NHGRP(nh)) { 985 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 986 struct route_nhop_data rnd; 987 int error; 988 989 if (cb == NULL) 990 return (ESRCH); 991 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 992 if (error == 0) { 993 if (rnd.rnd_nhgrp == nhg) { 994 /* No match, unreference new group and return. */ 995 nhop_free_any(rnd.rnd_nhop); 996 return (ESRCH); 997 } 998 error = change_route(rnh, rt, &rnd, rc); 999 } 1000 return (error); 1001 } 1002 #endif 1003 if (cb != NULL && !cb(rt, nh, cbdata)) 1004 return (ESRCH); 1005 1006 if (prio < nhop_get_prio(nh)) 1007 return (EADDRINUSE); 1008 1009 return (delete_route(rnh, rt, rc)); 1010 } 1011 1012 int 1013 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1014 struct rib_cmd_info *rc) 1015 { 1016 RIB_RLOCK_TRACKER; 1017 struct route_nhop_data rnd_orig; 1018 struct rib_head *rnh; 1019 struct rtentry *rt; 1020 int error; 1021 1022 NET_EPOCH_ASSERT(); 1023 1024 rnh = get_rnh(fibnum, info); 1025 if (rnh == NULL) 1026 return (EAFNOSUPPORT); 1027 1028 bzero(rc, sizeof(struct rib_cmd_info)); 1029 rc->rc_cmd = RTM_CHANGE; 1030 1031 /* Check if updated gateway exists */ 1032 if ((info->rti_flags & RTF_GATEWAY) && 1033 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1034 1035 /* 1036 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1037 * Remove RTF_GATEWAY to enforce consistency and maintain 1038 * compatibility.. 1039 */ 1040 info->rti_flags &= ~RTF_GATEWAY; 1041 } 1042 1043 /* 1044 * route change is done in multiple steps, with dropping and 1045 * reacquiring lock. In the situations with multiple processes 1046 * changes the same route in can lead to the case when route 1047 * is changed between the steps. Address it by retrying the operation 1048 * multiple times before failing. 1049 */ 1050 1051 RIB_RLOCK(rnh); 1052 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1053 info->rti_info[RTAX_NETMASK], &rnh->head); 1054 1055 if (rt == NULL) { 1056 RIB_RUNLOCK(rnh); 1057 return (ESRCH); 1058 } 1059 1060 rnd_orig.rnd_nhop = rt->rt_nhop; 1061 rnd_orig.rnd_weight = rt->rt_weight; 1062 1063 RIB_RUNLOCK(rnh); 1064 1065 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1066 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1067 if (error != EAGAIN) 1068 break; 1069 } 1070 1071 return (error); 1072 } 1073 1074 static int 1075 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1076 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1077 { 1078 int error; 1079 1080 /* 1081 * New gateway could require new ifaddr, ifp; 1082 * flags may also be different; ifp may be specified 1083 * by ll sockaddr when protocol address is ambiguous 1084 */ 1085 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1086 info->rti_info[RTAX_GATEWAY] != NULL) || 1087 info->rti_info[RTAX_IFP] != NULL || 1088 (info->rti_info[RTAX_IFA] != NULL && 1089 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1090 error = rt_getifa_fib(info, rnh->rib_fibnum); 1091 1092 if (error != 0) { 1093 info->rti_ifa = NULL; 1094 return (error); 1095 } 1096 } 1097 1098 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1099 info->rti_ifa = NULL; 1100 1101 return (error); 1102 } 1103 1104 #ifdef ROUTE_MPATH 1105 static int 1106 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1107 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1108 struct rib_cmd_info *rc) 1109 { 1110 int error = 0, found_idx = 0; 1111 struct nhop_object *nh_orig = NULL, *nh_new; 1112 struct route_nhop_data rnd_new = {}; 1113 const struct weightened_nhop *wn = NULL; 1114 struct weightened_nhop *wn_new; 1115 uint32_t num_nhops; 1116 1117 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1118 for (int i = 0; i < num_nhops; i++) { 1119 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1120 nh_orig = wn[i].nh; 1121 found_idx = i; 1122 break; 1123 } 1124 } 1125 1126 if (nh_orig == NULL) 1127 return (ESRCH); 1128 1129 error = change_nhop(rnh, info, nh_orig, &nh_new); 1130 if (error != 0) 1131 return (error); 1132 1133 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1134 M_TEMP, M_NOWAIT | M_ZERO); 1135 if (wn_new == NULL) { 1136 nhop_free(nh_new); 1137 return (EAGAIN); 1138 } 1139 1140 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1141 wn_new[found_idx].nh = nh_new; 1142 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1143 1144 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1145 nhop_free(nh_new); 1146 free(wn_new, M_TEMP); 1147 1148 if (error != 0) 1149 return (error); 1150 1151 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1152 1153 return (error); 1154 } 1155 #endif 1156 1157 static int 1158 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1159 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1160 struct rib_cmd_info *rc) 1161 { 1162 int error = 0; 1163 struct nhop_object *nh_orig; 1164 struct route_nhop_data rnd_new; 1165 1166 nh_orig = rnd_orig->rnd_nhop; 1167 if (nh_orig == NULL) 1168 return (ESRCH); 1169 1170 #ifdef ROUTE_MPATH 1171 if (NH_IS_NHGRP(nh_orig)) 1172 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1173 #endif 1174 1175 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1176 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1177 if (error != 0) 1178 return (error); 1179 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1180 1181 return (error); 1182 } 1183 1184 /* 1185 * Insert @rt with nhop data from @rnd_new to @rnh. 1186 * Returns 0 on success and stores operation results in @rc. 1187 */ 1188 static int 1189 add_route(struct rib_head *rnh, struct rtentry *rt, 1190 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1191 { 1192 struct radix_node *rn; 1193 1194 RIB_WLOCK_ASSERT(rnh); 1195 1196 rt->rt_nhop = rnd->rnd_nhop; 1197 rt->rt_weight = rnd->rnd_weight; 1198 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1199 1200 if (rn != NULL) { 1201 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1202 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1203 1204 /* Finalize notification */ 1205 rib_bump_gen(rnh); 1206 rnh->rnh_prefixes++; 1207 1208 rc->rc_cmd = RTM_ADD; 1209 rc->rc_rt = rt; 1210 rc->rc_nh_old = NULL; 1211 rc->rc_nh_new = rnd->rnd_nhop; 1212 rc->rc_nh_weight = rnd->rnd_weight; 1213 1214 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1215 return (0); 1216 } 1217 1218 /* Existing route or memory allocation failure. */ 1219 return (EEXIST); 1220 } 1221 1222 /* 1223 * Unconditionally deletes @rt from @rnh. 1224 */ 1225 static int 1226 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1227 { 1228 RIB_WLOCK_ASSERT(rnh); 1229 1230 /* Route deletion requested. */ 1231 struct radix_node *rn; 1232 1233 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1234 if (rn == NULL) 1235 return (ESRCH); 1236 rt = RNTORT(rn); 1237 rt->rte_flags &= ~RTF_UP; 1238 1239 rib_bump_gen(rnh); 1240 rnh->rnh_prefixes--; 1241 1242 rc->rc_cmd = RTM_DELETE; 1243 rc->rc_rt = rt; 1244 rc->rc_nh_old = rt->rt_nhop; 1245 rc->rc_nh_new = NULL; 1246 rc->rc_nh_weight = rt->rt_weight; 1247 1248 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1249 1250 return (0); 1251 } 1252 1253 /* 1254 * Switch @rt nhop/weigh to the ones specified in @rnd. 1255 * Returns 0 on success. 1256 */ 1257 int 1258 change_route(struct rib_head *rnh, struct rtentry *rt, 1259 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1260 { 1261 struct nhop_object *nh_orig; 1262 1263 RIB_WLOCK_ASSERT(rnh); 1264 1265 nh_orig = rt->rt_nhop; 1266 1267 if (rnd->rnd_nhop == NULL) 1268 return (delete_route(rnh, rt, rc)); 1269 1270 /* Changing nexthop & weight to a new one */ 1271 rt->rt_nhop = rnd->rnd_nhop; 1272 rt->rt_weight = rnd->rnd_weight; 1273 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1274 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1275 1276 /* Finalize notification */ 1277 rib_bump_gen(rnh); 1278 rc->rc_cmd = RTM_CHANGE; 1279 rc->rc_rt = rt; 1280 rc->rc_nh_old = nh_orig; 1281 rc->rc_nh_new = rnd->rnd_nhop; 1282 rc->rc_nh_weight = rnd->rnd_weight; 1283 1284 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1285 1286 return (0); 1287 } 1288 1289 /* 1290 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1291 * consistent with the current route data. 1292 * Nexthop in @nhd_new is consumed. 1293 */ 1294 int 1295 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1296 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1297 struct rib_cmd_info *rc) 1298 { 1299 struct rtentry *rt_new; 1300 int error = 0; 1301 1302 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1303 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1304 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1305 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1306 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1307 "trying change %s -> %s", buf_old, buf_new); 1308 } 1309 RIB_WLOCK(rnh); 1310 1311 struct route_nhop_data rnd; 1312 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1313 1314 if (rt_new == NULL) { 1315 if (rnd_orig->rnd_nhop == NULL) 1316 error = add_route(rnh, rt, rnd_new, rc); 1317 else { 1318 /* 1319 * Prefix does not exist, which was not our assumption. 1320 * Update @rnd_orig with the new data and return 1321 */ 1322 rnd_orig->rnd_nhop = NULL; 1323 rnd_orig->rnd_weight = 0; 1324 error = EAGAIN; 1325 } 1326 } else { 1327 /* Prefix exists, try to update */ 1328 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1329 /* 1330 * Nhop/mpath group hasn't changed. Flip 1331 * to the new precalculated one and return 1332 */ 1333 error = change_route(rnh, rt_new, rnd_new, rc); 1334 } else { 1335 /* Update and retry */ 1336 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1337 rnd_orig->rnd_weight = rt_new->rt_weight; 1338 error = EAGAIN; 1339 } 1340 } 1341 1342 RIB_WUNLOCK(rnh); 1343 1344 if (error == 0) { 1345 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1346 1347 if (rnd_orig->rnd_nhop != NULL) 1348 nhop_free_any(rnd_orig->rnd_nhop); 1349 1350 } else { 1351 if (rnd_new->rnd_nhop != NULL) 1352 nhop_free_any(rnd_new->rnd_nhop); 1353 } 1354 1355 return (error); 1356 } 1357 1358 /* 1359 * Performs modification of routing table specificed by @action. 1360 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1361 * Needs to be run in network epoch. 1362 * 1363 * Returns 0 on success and fills in @rc with action result. 1364 */ 1365 int 1366 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1367 struct rib_cmd_info *rc) 1368 { 1369 int error; 1370 1371 switch (action) { 1372 case RTM_ADD: 1373 error = rib_add_route(fibnum, info, rc); 1374 break; 1375 case RTM_DELETE: 1376 error = rib_del_route(fibnum, info, rc); 1377 break; 1378 case RTM_CHANGE: 1379 error = rib_change_route(fibnum, info, rc); 1380 break; 1381 default: 1382 error = ENOTSUP; 1383 } 1384 1385 return (error); 1386 } 1387 1388 struct rt_delinfo 1389 { 1390 struct rib_head *rnh; 1391 struct rtentry *head; 1392 rib_filter_f_t *filter_f; 1393 void *filter_arg; 1394 int prio; 1395 struct rib_cmd_info rc; 1396 }; 1397 1398 /* 1399 * Conditionally unlinks rtenties or paths from radix tree based 1400 * on the callback data passed in @arg. 1401 */ 1402 static int 1403 rt_checkdelroute(struct radix_node *rn, void *arg) 1404 { 1405 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1406 struct rtentry *rt = (struct rtentry *)rn; 1407 1408 if (rt_delete_conditional(di->rnh, rt, di->prio, 1409 di->filter_f, di->filter_arg, &di->rc) != 0) 1410 return (0); 1411 1412 /* 1413 * Add deleted rtentries to the list to GC them 1414 * after dropping the lock. 1415 * 1416 * XXX: Delayed notifications not implemented 1417 * for nexthop updates. 1418 */ 1419 if (di->rc.rc_cmd == RTM_DELETE) { 1420 /* Add to the list and return */ 1421 rt->rt_chain = di->head; 1422 di->head = rt; 1423 #ifdef ROUTE_MPATH 1424 } else { 1425 /* 1426 * RTM_CHANGE to a different nexthop or nexthop group. 1427 * Free old multipath group. 1428 */ 1429 nhop_free_any(di->rc.rc_nh_old); 1430 #endif 1431 } 1432 1433 return (0); 1434 } 1435 1436 /* 1437 * Iterates over a routing table specified by @fibnum and @family and 1438 * deletes elements marked by @filter_f. 1439 * @fibnum: rtable id 1440 * @family: AF_ address family 1441 * @filter_f: function returning non-zero value for items to delete 1442 * @arg: data to pass to the @filter_f function 1443 * @report: true if rtsock notification is needed. 1444 */ 1445 void 1446 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1447 bool report) 1448 { 1449 struct rib_head *rnh; 1450 struct rtentry *rt; 1451 struct nhop_object *nh; 1452 struct epoch_tracker et; 1453 1454 rnh = rt_tables_get_rnh(fibnum, family); 1455 if (rnh == NULL) 1456 return; 1457 1458 struct rt_delinfo di = { 1459 .rnh = rnh, 1460 .filter_f = filter_f, 1461 .filter_arg = filter_arg, 1462 .prio = NH_PRIORITY_NORMAL, 1463 }; 1464 1465 NET_EPOCH_ENTER(et); 1466 1467 RIB_WLOCK(rnh); 1468 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1469 RIB_WUNLOCK(rnh); 1470 1471 /* We might have something to reclaim. */ 1472 bzero(&di.rc, sizeof(di.rc)); 1473 di.rc.rc_cmd = RTM_DELETE; 1474 while (di.head != NULL) { 1475 rt = di.head; 1476 di.head = rt->rt_chain; 1477 rt->rt_chain = NULL; 1478 nh = rt->rt_nhop; 1479 1480 di.rc.rc_rt = rt; 1481 di.rc.rc_nh_old = nh; 1482 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1483 1484 if (report) { 1485 #ifdef ROUTE_MPATH 1486 struct nhgrp_object *nhg; 1487 const struct weightened_nhop *wn; 1488 uint32_t num_nhops; 1489 if (NH_IS_NHGRP(nh)) { 1490 nhg = (struct nhgrp_object *)nh; 1491 wn = nhgrp_get_nhops(nhg, &num_nhops); 1492 for (int i = 0; i < num_nhops; i++) 1493 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1494 } else 1495 #endif 1496 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1497 } 1498 rt_free(rt); 1499 } 1500 1501 NET_EPOCH_EXIT(et); 1502 } 1503 1504 static int 1505 rt_delete_unconditional(struct radix_node *rn, void *arg) 1506 { 1507 struct rtentry *rt = RNTORT(rn); 1508 struct rib_head *rnh = (struct rib_head *)arg; 1509 1510 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1511 if (RNTORT(rn) == rt) 1512 rt_free(rt); 1513 1514 return (0); 1515 } 1516 1517 /* 1518 * Removes all routes from the routing table without executing notifications. 1519 * rtentres will be removed after the end of a current epoch. 1520 */ 1521 static void 1522 rib_flush_routes(struct rib_head *rnh) 1523 { 1524 RIB_WLOCK(rnh); 1525 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1526 RIB_WUNLOCK(rnh); 1527 } 1528 1529 void 1530 rib_flush_routes_family(int family) 1531 { 1532 struct rib_head *rnh; 1533 1534 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1535 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1536 rib_flush_routes(rnh); 1537 } 1538 } 1539 1540 const char * 1541 rib_print_family(int family) 1542 { 1543 switch (family) { 1544 case AF_INET: 1545 return ("inet"); 1546 case AF_INET6: 1547 return ("inet6"); 1548 case AF_LINK: 1549 return ("link"); 1550 } 1551 return ("unknown"); 1552 } 1553 1554