1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_private.h> 48 #include <net/if_dl.h> 49 #include <net/vnet.h> 50 #include <net/route.h> 51 #include <net/route/route_ctl.h> 52 #include <net/route/route_var.h> 53 #include <net/route/nhop_utils.h> 54 #include <net/route/nhop.h> 55 #include <net/route/nhop_var.h> 56 #include <netinet/in.h> 57 #include <netinet6/scope6_var.h> 58 #include <netinet6/in6_var.h> 59 60 #define DEBUG_MOD_NAME route_ctl 61 #define DEBUG_MAX_LEVEL LOG_DEBUG 62 #include <net/route/route_debug.h> 63 _DECLARE_DEBUG(LOG_INFO); 64 65 /* 66 * This file contains control plane routing tables functions. 67 * 68 * All functions assumes they are called in net epoch. 69 */ 70 71 union sockaddr_union { 72 struct sockaddr sa; 73 struct sockaddr_in sin; 74 struct sockaddr_in6 sin6; 75 char _buf[32]; 76 }; 77 78 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 79 struct rib_cmd_info *rc); 80 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 81 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 82 struct rib_cmd_info *rc); 83 84 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 85 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 86 #ifdef ROUTE_MPATH 87 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 88 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 89 int op_flags, struct rib_cmd_info *rc); 90 #endif 91 92 static int add_route(struct rib_head *rnh, struct rtentry *rt, 93 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 94 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 95 struct rib_cmd_info *rc); 96 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 97 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 98 99 static int get_prio_from_info(const struct rt_addrinfo *info); 100 static int nhop_get_prio(const struct nhop_object *nh); 101 102 #ifdef ROUTE_MPATH 103 static bool rib_can_multipath(struct rib_head *rh); 104 #endif 105 106 /* Per-vnet multipath routing configuration */ 107 SYSCTL_DECL(_net_route); 108 #define V_rib_route_multipath VNET(rib_route_multipath) 109 #ifdef ROUTE_MPATH 110 #define _MP_FLAGS CTLFLAG_RW 111 #else 112 #define _MP_FLAGS CTLFLAG_RD 113 #endif 114 VNET_DEFINE(u_int, rib_route_multipath) = 1; 115 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 116 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 117 #undef _MP_FLAGS 118 119 #ifdef ROUTE_MPATH 120 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 121 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 122 &VNET_NAME(fib_hash_outbound), 0, 123 "Compute flowid for locally-originated packets"); 124 125 /* Default entropy to add to the hash calculation for the outbound connections*/ 126 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 127 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 128 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 129 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 130 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 131 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 132 }; 133 #endif 134 135 #if defined(INET) && defined(INET6) 136 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 137 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 138 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 139 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 140 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 141 #endif 142 143 /* Debug bits */ 144 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 145 146 static struct rib_head * 147 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 148 { 149 struct rib_head *rnh; 150 struct sockaddr *dst; 151 152 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 153 154 dst = info->rti_info[RTAX_DST]; 155 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 156 157 return (rnh); 158 } 159 160 #if defined(INET) && defined(INET6) 161 bool 162 rib_can_4o6_nhop(void) 163 { 164 return (!!V_rib_route_ipv6_nexthop); 165 } 166 #endif 167 168 #ifdef ROUTE_MPATH 169 static bool 170 rib_can_multipath(struct rib_head *rh) 171 { 172 int result; 173 174 CURVNET_SET(rh->rib_vnet); 175 result = !!V_rib_route_multipath; 176 CURVNET_RESTORE(); 177 178 return (result); 179 } 180 181 /* 182 * Check is nhop is multipath-eligible. 183 * Avoid nhops without gateways and redirects. 184 * 185 * Returns 1 for multipath-eligible nexthop, 186 * 0 otherwise. 187 */ 188 bool 189 nhop_can_multipath(const struct nhop_object *nh) 190 { 191 192 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 193 return (1); 194 if ((nh->nh_flags & NHF_GATEWAY) == 0) 195 return (0); 196 if ((nh->nh_flags & NHF_REDIRECT) != 0) 197 return (0); 198 199 return (1); 200 } 201 #endif 202 203 static int 204 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 205 { 206 uint32_t weight; 207 208 if (info->rti_mflags & RTV_WEIGHT) 209 weight = info->rti_rmx->rmx_weight; 210 else 211 weight = default_weight; 212 /* Keep upper 1 byte for adm distance purposes */ 213 if (weight > RT_MAX_WEIGHT) 214 weight = RT_MAX_WEIGHT; 215 else if (weight == 0) 216 weight = default_weight; 217 218 return (weight); 219 } 220 221 /* 222 * File-local concept for distingushing between the normal and 223 * RTF_PINNED routes tha can override the "normal" one. 224 */ 225 #define NH_PRIORITY_HIGH 2 226 #define NH_PRIORITY_NORMAL 1 227 static int 228 get_prio_from_info(const struct rt_addrinfo *info) 229 { 230 if (info->rti_flags & RTF_PINNED) 231 return (NH_PRIORITY_HIGH); 232 return (NH_PRIORITY_NORMAL); 233 } 234 235 static int 236 nhop_get_prio(const struct nhop_object *nh) 237 { 238 if (NH_IS_PINNED(nh)) 239 return (NH_PRIORITY_HIGH); 240 return (NH_PRIORITY_NORMAL); 241 } 242 243 /* 244 * Check if specified @gw matches gw data in the nexthop @nh. 245 * 246 * Returns true if matches, false otherwise. 247 */ 248 bool 249 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 250 { 251 252 if (nh->gw_sa.sa_family != gw->sa_family) 253 return (false); 254 255 switch (gw->sa_family) { 256 case AF_INET: 257 return (nh->gw4_sa.sin_addr.s_addr == 258 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 259 case AF_INET6: 260 { 261 const struct sockaddr_in6 *gw6; 262 gw6 = (const struct sockaddr_in6 *)gw; 263 264 /* 265 * Currently (2020-09) IPv6 gws in kernel have their 266 * scope embedded. Once this becomes false, this code 267 * has to be revisited. 268 */ 269 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 270 &gw6->sin6_addr)) 271 return (true); 272 return (false); 273 } 274 case AF_LINK: 275 { 276 const struct sockaddr_dl *sdl; 277 sdl = (const struct sockaddr_dl *)gw; 278 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 279 } 280 default: 281 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 282 } 283 284 /* NOTREACHED */ 285 return (false); 286 } 287 288 /* 289 * Matches all nexthop with given @gw. 290 * Can be used as rib_filter_f callback. 291 */ 292 int 293 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 294 { 295 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 296 297 return (match_nhop_gw(nh, gw)); 298 } 299 300 struct gw_filter_data { 301 const struct sockaddr *gw; 302 int count; 303 }; 304 305 /* 306 * Matches first occurence of the gateway provided in @gwd 307 */ 308 static int 309 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 310 { 311 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 312 313 /* Return only first match to make rtsock happy */ 314 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 315 return (1); 316 return (0); 317 } 318 319 /* 320 * Checks if data in @info matches nexhop @nh. 321 * 322 * Returns 0 on success, 323 * ESRCH if not matched, 324 * ENOENT if filter function returned false 325 */ 326 int 327 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 328 const struct nhop_object *nh) 329 { 330 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 331 332 if (info->rti_filter != NULL) { 333 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 334 return (ENOENT); 335 else 336 return (0); 337 } 338 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 339 return (ESRCH); 340 341 return (0); 342 } 343 344 /* 345 * Runs exact prefix match based on @dst and @netmask. 346 * Returns matched @rtentry if found or NULL. 347 * If rtentry was found, saves nexthop / weight value into @rnd. 348 */ 349 static struct rtentry * 350 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 351 const struct sockaddr *netmask, struct route_nhop_data *rnd) 352 { 353 struct rtentry *rt; 354 355 RIB_LOCK_ASSERT(rnh); 356 357 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 358 if (rt != NULL) { 359 rnd->rnd_nhop = rt->rt_nhop; 360 rnd->rnd_weight = rt->rt_weight; 361 } else { 362 rnd->rnd_nhop = NULL; 363 rnd->rnd_weight = 0; 364 } 365 366 return (rt); 367 } 368 369 struct rtentry * 370 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 371 struct route_nhop_data *rnd) 372 { 373 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 374 } 375 376 /* 377 * Runs exact prefix match based on dst/netmask from @info. 378 * Assumes RIB lock is held. 379 * Returns matched @rtentry if found or NULL. 380 * If rtentry was found, saves nexthop / weight value into @rnd. 381 */ 382 struct rtentry * 383 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 384 struct route_nhop_data *rnd) 385 { 386 struct rtentry *rt; 387 388 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 389 info->rti_info[RTAX_NETMASK], rnd); 390 391 return (rt); 392 } 393 394 static bool 395 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 396 struct sockaddr **pmask) 397 { 398 if (plen == -1) { 399 *pmask = NULL; 400 return (true); 401 } 402 403 switch (family) { 404 #ifdef INET 405 case AF_INET: 406 { 407 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 408 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 409 410 memset(mask, 0, sizeof(*mask)); 411 mask->sin_family = family; 412 mask->sin_len = sizeof(*mask); 413 if (plen == 32) 414 *pmask = NULL; 415 else if (plen > 32 || plen < 0) 416 return (false); 417 else { 418 uint32_t daddr, maddr; 419 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 420 mask->sin_addr.s_addr = maddr; 421 daddr = dst->sin_addr.s_addr; 422 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 423 dst->sin_addr.s_addr = daddr; 424 } 425 return (true); 426 } 427 break; 428 #endif 429 #ifdef INET6 430 case AF_INET6: 431 { 432 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 433 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 434 435 memset(mask, 0, sizeof(*mask)); 436 mask->sin6_family = family; 437 mask->sin6_len = sizeof(*mask); 438 if (plen == 128) 439 *pmask = NULL; 440 else if (plen > 128 || plen < 0) 441 return (false); 442 else { 443 ip6_writemask(&mask->sin6_addr, plen); 444 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 445 } 446 return (true); 447 } 448 break; 449 #endif 450 } 451 return (false); 452 } 453 454 /* 455 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 456 * to the routing table. 457 * 458 * @fibnum: verified kernel rtable id to insert route to 459 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 460 * @plen: prefix length (or -1 if host route or not applicable for AF) 461 * @op_flags: combination of RTM_F_ flags 462 * @rc: storage to report operation result 463 * 464 * Returns 0 on success. 465 */ 466 int 467 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 468 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 469 { 470 union sockaddr_union mask_storage; 471 struct sockaddr *netmask = &mask_storage.sa; 472 struct rtentry *rt = NULL; 473 474 NET_EPOCH_ASSERT(); 475 476 bzero(rc, sizeof(struct rib_cmd_info)); 477 rc->rc_cmd = RTM_ADD; 478 479 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 480 if (rnh == NULL) 481 return (EAFNOSUPPORT); 482 483 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 484 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 485 return (EINVAL); 486 } 487 488 if (op_flags & RTM_F_CREATE) { 489 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 490 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 491 return (ENOMEM); 492 } 493 } else { 494 struct route_nhop_data rnd_tmp; 495 RIB_RLOCK_TRACKER; 496 497 RIB_RLOCK(rnh); 498 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp); 499 RIB_RUNLOCK(rnh); 500 501 if (rt == NULL) 502 return (ESRCH); 503 } 504 505 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 506 } 507 508 /* 509 * Attempts to delete @dst/plen prefix matching gateway @gw from the 510 * routing rable. 511 * 512 * @fibnum: rtable id to remove route from 513 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 514 * @plen: prefix length (or -1 if host route or not applicable for AF) 515 * @gw: gateway to match 516 * @op_flags: combination of RTM_F_ flags 517 * @rc: storage to report operation result 518 * 519 * Returns 0 on success. 520 */ 521 int 522 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 523 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 524 { 525 struct gw_filter_data gwd = { .gw = gw }; 526 527 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 528 } 529 530 /* 531 * Attempts to delete @dst/plen prefix matching @filter_func from the 532 * routing rable. 533 * 534 * @fibnum: rtable id to remove route from 535 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 536 * @plen: prefix length (or -1 if host route or not applicable for AF) 537 * @filter_func: func to be called for each nexthop of the prefix for matching 538 * @filter_arg: argument to pass to @filter_func 539 * @op_flags: combination of RTM_F_ flags 540 * @rc: storage to report operation result 541 * 542 * Returns 0 on success. 543 */ 544 int 545 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 546 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 547 struct rib_cmd_info *rc) 548 { 549 union sockaddr_union mask_storage; 550 struct sockaddr *netmask = &mask_storage.sa; 551 int error; 552 553 NET_EPOCH_ASSERT(); 554 555 bzero(rc, sizeof(struct rib_cmd_info)); 556 rc->rc_cmd = RTM_DELETE; 557 558 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 559 if (rnh == NULL) 560 return (EAFNOSUPPORT); 561 562 if (dst->sa_len > sizeof(mask_storage)) { 563 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 564 return (EINVAL); 565 } 566 567 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 568 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 569 return (EINVAL); 570 } 571 572 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 573 574 RIB_WLOCK(rnh); 575 struct route_nhop_data rnd; 576 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 577 if (rt != NULL) { 578 error = rt_delete_conditional(rnh, rt, prio, filter_func, 579 filter_arg, rc); 580 } else 581 error = ESRCH; 582 RIB_WUNLOCK(rnh); 583 584 if (error != 0) 585 return (error); 586 587 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 588 589 if (rc->rc_cmd == RTM_DELETE) 590 rt_free(rc->rc_rt); 591 #ifdef ROUTE_MPATH 592 else { 593 /* 594 * Deleting 1 path may result in RTM_CHANGE to 595 * a different mpath group/nhop. 596 * Free old mpath group. 597 */ 598 nhop_free_any(rc->rc_nh_old); 599 } 600 #endif 601 602 return (0); 603 } 604 605 /* 606 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 607 * @rt: route to copy. 608 * @rnd_src: nhop and weight. Multipath routes are not supported 609 * @rh_dst: target rtable. 610 * @rc: operation result storage 611 * 612 * Return 0 on success. 613 */ 614 int 615 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 616 struct rib_head *rh_dst, struct rib_cmd_info *rc) 617 { 618 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 619 int error; 620 621 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 622 623 IF_DEBUG_LEVEL(LOG_DEBUG2) { 624 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 625 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 626 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 627 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 628 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 629 } 630 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 631 if (nh == NULL) { 632 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 633 return (ENOMEM); 634 } 635 nhop_copy(nh, rnd_src->rnd_nhop); 636 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop)); 637 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 638 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 639 if (error != 0) { 640 FIB_RH_LOG(LOG_INFO, rh_dst, 641 "unable to finalize new nexthop: error %d", error); 642 return (ENOMEM); 643 } 644 645 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 646 if (rt_new == NULL) { 647 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 648 nhop_free(nh); 649 return (ENOMEM); 650 } 651 652 struct route_nhop_data rnd = { 653 .rnd_nhop = nh, 654 .rnd_weight = rnd_src->rnd_weight 655 }; 656 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 657 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 658 659 if (error != 0) { 660 IF_DEBUG_LEVEL(LOG_DEBUG2) { 661 char buf[NHOP_PRINT_BUFSIZE]; 662 rt_print_buf(rt_new, buf, sizeof(buf)); 663 FIB_RH_LOG(LOG_DEBUG, rh_dst, 664 "Unable to add route %s: error %d", buf, error); 665 } 666 nhop_free(nh); 667 rt_free_immediate(rt_new); 668 } 669 return (error); 670 } 671 672 /* 673 * Adds route defined by @info into the kernel table specified by @fibnum and 674 * sa_family in @info->rti_info[RTAX_DST]. 675 * 676 * Returns 0 on success and fills in operation metadata into @rc. 677 */ 678 int 679 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 680 struct rib_cmd_info *rc) 681 { 682 struct rib_head *rnh; 683 int error; 684 685 NET_EPOCH_ASSERT(); 686 687 rnh = get_rnh(fibnum, info); 688 if (rnh == NULL) 689 return (EAFNOSUPPORT); 690 691 /* 692 * Check consistency between RTF_HOST flag and netmask 693 * existence. 694 */ 695 if (info->rti_flags & RTF_HOST) 696 info->rti_info[RTAX_NETMASK] = NULL; 697 else if (info->rti_info[RTAX_NETMASK] == NULL) { 698 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 699 return (EINVAL); 700 } 701 702 bzero(rc, sizeof(struct rib_cmd_info)); 703 rc->rc_cmd = RTM_ADD; 704 705 error = add_route_byinfo(rnh, info, rc); 706 if (error == 0) 707 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 708 709 return (error); 710 } 711 712 static int 713 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 714 struct rib_cmd_info *rc) 715 { 716 struct route_nhop_data rnd_add; 717 struct nhop_object *nh; 718 struct rtentry *rt; 719 struct sockaddr *dst, *gateway, *netmask; 720 int error; 721 722 dst = info->rti_info[RTAX_DST]; 723 gateway = info->rti_info[RTAX_GATEWAY]; 724 netmask = info->rti_info[RTAX_NETMASK]; 725 726 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 727 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 728 return (EINVAL); 729 } 730 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 731 FIB_RH_LOG(LOG_DEBUG, rnh, 732 "error: invalid dst/gateway family combination (%d, %d)", 733 dst->sa_family, gateway->sa_family); 734 return (EINVAL); 735 } 736 737 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 738 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 739 dst->sa_len); 740 return (EINVAL); 741 } 742 743 if (info->rti_ifa == NULL) { 744 error = rt_getifa_fib(info, rnh->rib_fibnum); 745 if (error) 746 return (error); 747 } 748 749 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 750 return (ENOBUFS); 751 752 error = nhop_create_from_info(rnh, info, &nh); 753 if (error != 0) { 754 rt_free_immediate(rt); 755 return (error); 756 } 757 758 rnd_add.rnd_nhop = nh; 759 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 760 761 int op_flags = RTM_F_CREATE; 762 if (get_prio_from_info(info) == NH_PRIORITY_HIGH) 763 op_flags |= RTM_F_FORCE; 764 else 765 op_flags |= RTM_F_APPEND; 766 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 767 768 } 769 770 static int 771 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 772 int op_flags, struct rib_cmd_info *rc) 773 { 774 struct route_nhop_data rnd_orig; 775 struct nhop_object *nh; 776 struct rtentry *rt_orig; 777 int error = 0; 778 779 MPASS(rt != NULL); 780 781 nh = rnd_add->rnd_nhop; 782 783 RIB_WLOCK(rnh); 784 785 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 786 787 if (rt_orig == NULL) { 788 if (op_flags & RTM_F_CREATE) 789 error = add_route(rnh, rt, rnd_add, rc); 790 else 791 error = ESRCH; /* no entry but creation was not required */ 792 RIB_WUNLOCK(rnh); 793 if (error != 0) 794 goto out; 795 return (0); 796 } 797 798 if (op_flags & RTM_F_EXCL) { 799 /* We have existing route in the RIB but not allowed to replace. */ 800 RIB_WUNLOCK(rnh); 801 error = EEXIST; 802 goto out; 803 } 804 805 /* Now either append or replace */ 806 if (op_flags & RTM_F_REPLACE) { 807 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) { 808 /* Old path is "better" (e.g. has PINNED flag set) */ 809 RIB_WUNLOCK(rnh); 810 error = EEXIST; 811 goto out; 812 } 813 change_route(rnh, rt_orig, rnd_add, rc); 814 RIB_WUNLOCK(rnh); 815 nh = rc->rc_nh_old; 816 goto out; 817 } 818 819 RIB_WUNLOCK(rnh); 820 821 #ifdef ROUTE_MPATH 822 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 823 nhop_can_multipath(rnd_add->rnd_nhop) && 824 nhop_can_multipath(rnd_orig.rnd_nhop)) { 825 826 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 827 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 828 op_flags, rc); 829 if (error != EAGAIN) 830 break; 831 RTSTAT_INC(rts_add_retry); 832 } 833 834 /* 835 * Original nhop reference is unused in any case. 836 */ 837 nhop_free_any(rnd_add->rnd_nhop); 838 if (op_flags & RTM_F_CREATE) { 839 if (error != 0 || rc->rc_cmd != RTM_ADD) 840 rt_free_immediate(rt); 841 } 842 return (error); 843 } 844 #endif 845 /* Out of options - free state and return error */ 846 error = EEXIST; 847 out: 848 if (op_flags & RTM_F_CREATE) 849 rt_free_immediate(rt); 850 nhop_free_any(nh); 851 852 return (error); 853 } 854 855 #ifdef ROUTE_MPATH 856 static int 857 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 858 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 859 int op_flags, struct rib_cmd_info *rc) 860 { 861 RIB_RLOCK_TRACKER; 862 struct route_nhop_data rnd_new; 863 int error = 0; 864 865 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 866 if (error != 0) { 867 if (error == EAGAIN) { 868 /* 869 * Group creation failed, most probably because 870 * @rnd_orig data got scheduled for deletion. 871 * Refresh @rnd_orig data and retry. 872 */ 873 RIB_RLOCK(rnh); 874 lookup_prefix_rt(rnh, rt, rnd_orig); 875 RIB_RUNLOCK(rnh); 876 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 877 /* In this iteration route doesn't exist */ 878 error = ENOENT; 879 } 880 } 881 return (error); 882 } 883 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 884 if (error != 0) 885 return (error); 886 887 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 888 /* 889 * First multipath route got installed. Enable local 890 * outbound connections hashing. 891 */ 892 if (bootverbose) 893 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 894 V_fib_hash_outbound = 1; 895 } 896 897 return (0); 898 } 899 #endif 900 901 /* 902 * Removes route defined by @info from the kernel table specified by @fibnum and 903 * sa_family in @info->rti_info[RTAX_DST]. 904 * 905 * Returns 0 on success and fills in operation metadata into @rc. 906 */ 907 int 908 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 909 { 910 struct rib_head *rnh; 911 struct sockaddr *dst, *netmask; 912 struct sockaddr_storage mdst; 913 int error; 914 915 NET_EPOCH_ASSERT(); 916 917 rnh = get_rnh(fibnum, info); 918 if (rnh == NULL) 919 return (EAFNOSUPPORT); 920 921 bzero(rc, sizeof(struct rib_cmd_info)); 922 rc->rc_cmd = RTM_DELETE; 923 924 dst = info->rti_info[RTAX_DST]; 925 netmask = info->rti_info[RTAX_NETMASK]; 926 927 if (netmask != NULL) { 928 /* Ensure @dst is always properly masked */ 929 if (dst->sa_len > sizeof(mdst)) { 930 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 931 return (EINVAL); 932 } 933 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 934 dst = (struct sockaddr *)&mdst; 935 } 936 937 rib_filter_f_t *filter_func = NULL; 938 void *filter_arg = NULL; 939 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 940 941 if (info->rti_filter != NULL) { 942 filter_func = info->rti_filter; 943 filter_arg = info->rti_filterdata; 944 } else if (gwd.gw != NULL) { 945 filter_func = match_gw_one; 946 filter_arg = &gwd; 947 } 948 949 int prio = get_prio_from_info(info); 950 951 RIB_WLOCK(rnh); 952 struct route_nhop_data rnd; 953 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 954 if (rt != NULL) { 955 error = rt_delete_conditional(rnh, rt, prio, filter_func, 956 filter_arg, rc); 957 } else 958 error = ESRCH; 959 RIB_WUNLOCK(rnh); 960 961 if (error != 0) 962 return (error); 963 964 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 965 966 if (rc->rc_cmd == RTM_DELETE) 967 rt_free(rc->rc_rt); 968 #ifdef ROUTE_MPATH 969 else { 970 /* 971 * Deleting 1 path may result in RTM_CHANGE to 972 * a different mpath group/nhop. 973 * Free old mpath group. 974 */ 975 nhop_free_any(rc->rc_nh_old); 976 } 977 #endif 978 979 return (0); 980 } 981 982 /* 983 * Conditionally unlinks rtentry paths from @rnh matching @cb. 984 * Returns 0 on success with operation result stored in @rc. 985 * On error, returns: 986 * ESRCH - if prefix was not found or filter function failed to match 987 * EADDRINUSE - if trying to delete higher priority route. 988 */ 989 static int 990 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 991 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 992 { 993 struct nhop_object *nh = rt->rt_nhop; 994 995 #ifdef ROUTE_MPATH 996 if (NH_IS_NHGRP(nh)) { 997 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 998 struct route_nhop_data rnd; 999 int error; 1000 1001 if (cb == NULL) 1002 return (ESRCH); 1003 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 1004 if (error == 0) { 1005 if (rnd.rnd_nhgrp == nhg) { 1006 /* No match, unreference new group and return. */ 1007 nhop_free_any(rnd.rnd_nhop); 1008 return (ESRCH); 1009 } 1010 error = change_route(rnh, rt, &rnd, rc); 1011 } 1012 return (error); 1013 } 1014 #endif 1015 if (cb != NULL && !cb(rt, nh, cbdata)) 1016 return (ESRCH); 1017 1018 if (prio < nhop_get_prio(nh)) 1019 return (EADDRINUSE); 1020 1021 return (delete_route(rnh, rt, rc)); 1022 } 1023 1024 int 1025 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1026 struct rib_cmd_info *rc) 1027 { 1028 RIB_RLOCK_TRACKER; 1029 struct route_nhop_data rnd_orig; 1030 struct rib_head *rnh; 1031 struct rtentry *rt; 1032 int error; 1033 1034 NET_EPOCH_ASSERT(); 1035 1036 rnh = get_rnh(fibnum, info); 1037 if (rnh == NULL) 1038 return (EAFNOSUPPORT); 1039 1040 bzero(rc, sizeof(struct rib_cmd_info)); 1041 rc->rc_cmd = RTM_CHANGE; 1042 1043 /* Check if updated gateway exists */ 1044 if ((info->rti_flags & RTF_GATEWAY) && 1045 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1046 1047 /* 1048 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1049 * Remove RTF_GATEWAY to enforce consistency and maintain 1050 * compatibility.. 1051 */ 1052 info->rti_flags &= ~RTF_GATEWAY; 1053 } 1054 1055 /* 1056 * route change is done in multiple steps, with dropping and 1057 * reacquiring lock. In the situations with multiple processes 1058 * changes the same route in can lead to the case when route 1059 * is changed between the steps. Address it by retrying the operation 1060 * multiple times before failing. 1061 */ 1062 1063 RIB_RLOCK(rnh); 1064 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1065 info->rti_info[RTAX_NETMASK], &rnh->head); 1066 1067 if (rt == NULL) { 1068 RIB_RUNLOCK(rnh); 1069 return (ESRCH); 1070 } 1071 1072 rnd_orig.rnd_nhop = rt->rt_nhop; 1073 rnd_orig.rnd_weight = rt->rt_weight; 1074 1075 RIB_RUNLOCK(rnh); 1076 1077 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1078 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1079 if (error != EAGAIN) 1080 break; 1081 } 1082 1083 return (error); 1084 } 1085 1086 static int 1087 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1088 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1089 { 1090 int error; 1091 1092 /* 1093 * New gateway could require new ifaddr, ifp; 1094 * flags may also be different; ifp may be specified 1095 * by ll sockaddr when protocol address is ambiguous 1096 */ 1097 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1098 info->rti_info[RTAX_GATEWAY] != NULL) || 1099 info->rti_info[RTAX_IFP] != NULL || 1100 (info->rti_info[RTAX_IFA] != NULL && 1101 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1102 error = rt_getifa_fib(info, rnh->rib_fibnum); 1103 1104 if (error != 0) { 1105 info->rti_ifa = NULL; 1106 return (error); 1107 } 1108 } 1109 1110 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1111 info->rti_ifa = NULL; 1112 1113 return (error); 1114 } 1115 1116 #ifdef ROUTE_MPATH 1117 static int 1118 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1119 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1120 struct rib_cmd_info *rc) 1121 { 1122 int error = 0, found_idx = 0; 1123 struct nhop_object *nh_orig = NULL, *nh_new; 1124 struct route_nhop_data rnd_new = {}; 1125 const struct weightened_nhop *wn = NULL; 1126 struct weightened_nhop *wn_new; 1127 uint32_t num_nhops; 1128 1129 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1130 for (int i = 0; i < num_nhops; i++) { 1131 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1132 nh_orig = wn[i].nh; 1133 found_idx = i; 1134 break; 1135 } 1136 } 1137 1138 if (nh_orig == NULL) 1139 return (ESRCH); 1140 1141 error = change_nhop(rnh, info, nh_orig, &nh_new); 1142 if (error != 0) 1143 return (error); 1144 1145 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1146 M_TEMP, M_NOWAIT | M_ZERO); 1147 if (wn_new == NULL) { 1148 nhop_free(nh_new); 1149 return (EAGAIN); 1150 } 1151 1152 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1153 wn_new[found_idx].nh = nh_new; 1154 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1155 1156 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1157 nhop_free(nh_new); 1158 free(wn_new, M_TEMP); 1159 1160 if (error != 0) 1161 return (error); 1162 1163 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1164 1165 return (error); 1166 } 1167 #endif 1168 1169 static int 1170 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1171 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1172 struct rib_cmd_info *rc) 1173 { 1174 int error = 0; 1175 struct nhop_object *nh_orig; 1176 struct route_nhop_data rnd_new; 1177 1178 nh_orig = rnd_orig->rnd_nhop; 1179 if (nh_orig == NULL) 1180 return (ESRCH); 1181 1182 #ifdef ROUTE_MPATH 1183 if (NH_IS_NHGRP(nh_orig)) 1184 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1185 #endif 1186 1187 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1188 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1189 if (error != 0) 1190 return (error); 1191 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1192 1193 return (error); 1194 } 1195 1196 /* 1197 * Insert @rt with nhop data from @rnd_new to @rnh. 1198 * Returns 0 on success and stores operation results in @rc. 1199 */ 1200 static int 1201 add_route(struct rib_head *rnh, struct rtentry *rt, 1202 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1203 { 1204 struct radix_node *rn; 1205 1206 RIB_WLOCK_ASSERT(rnh); 1207 1208 rt->rt_nhop = rnd->rnd_nhop; 1209 rt->rt_weight = rnd->rnd_weight; 1210 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1211 1212 if (rn != NULL) { 1213 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1214 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1215 1216 /* Finalize notification */ 1217 rib_bump_gen(rnh); 1218 rnh->rnh_prefixes++; 1219 1220 rc->rc_cmd = RTM_ADD; 1221 rc->rc_rt = rt; 1222 rc->rc_nh_old = NULL; 1223 rc->rc_nh_new = rnd->rnd_nhop; 1224 rc->rc_nh_weight = rnd->rnd_weight; 1225 1226 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1227 return (0); 1228 } 1229 1230 /* Existing route or memory allocation failure. */ 1231 return (EEXIST); 1232 } 1233 1234 /* 1235 * Unconditionally deletes @rt from @rnh. 1236 */ 1237 static int 1238 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1239 { 1240 RIB_WLOCK_ASSERT(rnh); 1241 1242 /* Route deletion requested. */ 1243 struct radix_node *rn; 1244 1245 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1246 if (rn == NULL) 1247 return (ESRCH); 1248 rt = RNTORT(rn); 1249 rt->rte_flags &= ~RTF_UP; 1250 1251 rib_bump_gen(rnh); 1252 rnh->rnh_prefixes--; 1253 1254 rc->rc_cmd = RTM_DELETE; 1255 rc->rc_rt = rt; 1256 rc->rc_nh_old = rt->rt_nhop; 1257 rc->rc_nh_new = NULL; 1258 rc->rc_nh_weight = rt->rt_weight; 1259 1260 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1261 1262 return (0); 1263 } 1264 1265 /* 1266 * Switch @rt nhop/weigh to the ones specified in @rnd. 1267 * Returns 0 on success. 1268 */ 1269 int 1270 change_route(struct rib_head *rnh, struct rtentry *rt, 1271 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1272 { 1273 struct nhop_object *nh_orig; 1274 1275 RIB_WLOCK_ASSERT(rnh); 1276 1277 nh_orig = rt->rt_nhop; 1278 1279 if (rnd->rnd_nhop == NULL) 1280 return (delete_route(rnh, rt, rc)); 1281 1282 /* Changing nexthop & weight to a new one */ 1283 rt->rt_nhop = rnd->rnd_nhop; 1284 rt->rt_weight = rnd->rnd_weight; 1285 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1286 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1287 1288 /* Finalize notification */ 1289 rib_bump_gen(rnh); 1290 rc->rc_cmd = RTM_CHANGE; 1291 rc->rc_rt = rt; 1292 rc->rc_nh_old = nh_orig; 1293 rc->rc_nh_new = rnd->rnd_nhop; 1294 rc->rc_nh_weight = rnd->rnd_weight; 1295 1296 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1297 1298 return (0); 1299 } 1300 1301 /* 1302 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1303 * consistent with the current route data. 1304 * Nexthop in @nhd_new is consumed. 1305 */ 1306 int 1307 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1308 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1309 struct rib_cmd_info *rc) 1310 { 1311 struct rtentry *rt_new; 1312 int error = 0; 1313 1314 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1315 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1316 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1317 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1318 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1319 "trying change %s -> %s", buf_old, buf_new); 1320 } 1321 RIB_WLOCK(rnh); 1322 1323 struct route_nhop_data rnd; 1324 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1325 1326 if (rt_new == NULL) { 1327 if (rnd_orig->rnd_nhop == NULL) 1328 error = add_route(rnh, rt, rnd_new, rc); 1329 else { 1330 /* 1331 * Prefix does not exist, which was not our assumption. 1332 * Update @rnd_orig with the new data and return 1333 */ 1334 rnd_orig->rnd_nhop = NULL; 1335 rnd_orig->rnd_weight = 0; 1336 error = EAGAIN; 1337 } 1338 } else { 1339 /* Prefix exists, try to update */ 1340 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1341 /* 1342 * Nhop/mpath group hasn't changed. Flip 1343 * to the new precalculated one and return 1344 */ 1345 error = change_route(rnh, rt_new, rnd_new, rc); 1346 } else { 1347 /* Update and retry */ 1348 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1349 rnd_orig->rnd_weight = rt_new->rt_weight; 1350 error = EAGAIN; 1351 } 1352 } 1353 1354 RIB_WUNLOCK(rnh); 1355 1356 if (error == 0) { 1357 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1358 1359 if (rnd_orig->rnd_nhop != NULL) 1360 nhop_free_any(rnd_orig->rnd_nhop); 1361 1362 } else { 1363 if (rnd_new->rnd_nhop != NULL) 1364 nhop_free_any(rnd_new->rnd_nhop); 1365 } 1366 1367 return (error); 1368 } 1369 1370 /* 1371 * Performs modification of routing table specificed by @action. 1372 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1373 * Needs to be run in network epoch. 1374 * 1375 * Returns 0 on success and fills in @rc with action result. 1376 */ 1377 int 1378 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1379 struct rib_cmd_info *rc) 1380 { 1381 int error; 1382 1383 switch (action) { 1384 case RTM_ADD: 1385 error = rib_add_route(fibnum, info, rc); 1386 break; 1387 case RTM_DELETE: 1388 error = rib_del_route(fibnum, info, rc); 1389 break; 1390 case RTM_CHANGE: 1391 error = rib_change_route(fibnum, info, rc); 1392 break; 1393 default: 1394 error = ENOTSUP; 1395 } 1396 1397 return (error); 1398 } 1399 1400 struct rt_delinfo 1401 { 1402 struct rib_head *rnh; 1403 struct rtentry *head; 1404 rib_filter_f_t *filter_f; 1405 void *filter_arg; 1406 int prio; 1407 struct rib_cmd_info rc; 1408 }; 1409 1410 /* 1411 * Conditionally unlinks rtenties or paths from radix tree based 1412 * on the callback data passed in @arg. 1413 */ 1414 static int 1415 rt_checkdelroute(struct radix_node *rn, void *arg) 1416 { 1417 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1418 struct rtentry *rt = (struct rtentry *)rn; 1419 1420 if (rt_delete_conditional(di->rnh, rt, di->prio, 1421 di->filter_f, di->filter_arg, &di->rc) != 0) 1422 return (0); 1423 1424 /* 1425 * Add deleted rtentries to the list to GC them 1426 * after dropping the lock. 1427 * 1428 * XXX: Delayed notifications not implemented 1429 * for nexthop updates. 1430 */ 1431 if (di->rc.rc_cmd == RTM_DELETE) { 1432 /* Add to the list and return */ 1433 rt->rt_chain = di->head; 1434 di->head = rt; 1435 #ifdef ROUTE_MPATH 1436 } else { 1437 /* 1438 * RTM_CHANGE to a different nexthop or nexthop group. 1439 * Free old multipath group. 1440 */ 1441 nhop_free_any(di->rc.rc_nh_old); 1442 #endif 1443 } 1444 1445 return (0); 1446 } 1447 1448 /* 1449 * Iterates over a routing table specified by @fibnum and @family and 1450 * deletes elements marked by @filter_f. 1451 * @fibnum: rtable id 1452 * @family: AF_ address family 1453 * @filter_f: function returning non-zero value for items to delete 1454 * @arg: data to pass to the @filter_f function 1455 * @report: true if rtsock notification is needed. 1456 */ 1457 void 1458 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1459 bool report) 1460 { 1461 struct rib_head *rnh; 1462 struct rtentry *rt; 1463 struct nhop_object *nh; 1464 struct epoch_tracker et; 1465 1466 rnh = rt_tables_get_rnh(fibnum, family); 1467 if (rnh == NULL) 1468 return; 1469 1470 struct rt_delinfo di = { 1471 .rnh = rnh, 1472 .filter_f = filter_f, 1473 .filter_arg = filter_arg, 1474 .prio = NH_PRIORITY_NORMAL, 1475 }; 1476 1477 NET_EPOCH_ENTER(et); 1478 1479 RIB_WLOCK(rnh); 1480 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1481 RIB_WUNLOCK(rnh); 1482 1483 /* We might have something to reclaim. */ 1484 bzero(&di.rc, sizeof(di.rc)); 1485 di.rc.rc_cmd = RTM_DELETE; 1486 while (di.head != NULL) { 1487 rt = di.head; 1488 di.head = rt->rt_chain; 1489 rt->rt_chain = NULL; 1490 nh = rt->rt_nhop; 1491 1492 di.rc.rc_rt = rt; 1493 di.rc.rc_nh_old = nh; 1494 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1495 1496 if (report) { 1497 #ifdef ROUTE_MPATH 1498 struct nhgrp_object *nhg; 1499 const struct weightened_nhop *wn; 1500 uint32_t num_nhops; 1501 if (NH_IS_NHGRP(nh)) { 1502 nhg = (struct nhgrp_object *)nh; 1503 wn = nhgrp_get_nhops(nhg, &num_nhops); 1504 for (int i = 0; i < num_nhops; i++) 1505 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1506 } else 1507 #endif 1508 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1509 } 1510 rt_free(rt); 1511 } 1512 1513 NET_EPOCH_EXIT(et); 1514 } 1515 1516 static int 1517 rt_delete_unconditional(struct radix_node *rn, void *arg) 1518 { 1519 struct rtentry *rt = RNTORT(rn); 1520 struct rib_head *rnh = (struct rib_head *)arg; 1521 1522 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1523 if (RNTORT(rn) == rt) 1524 rt_free(rt); 1525 1526 return (0); 1527 } 1528 1529 /* 1530 * Removes all routes from the routing table without executing notifications. 1531 * rtentres will be removed after the end of a current epoch. 1532 */ 1533 static void 1534 rib_flush_routes(struct rib_head *rnh) 1535 { 1536 RIB_WLOCK(rnh); 1537 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1538 RIB_WUNLOCK(rnh); 1539 } 1540 1541 void 1542 rib_flush_routes_family(int family) 1543 { 1544 struct rib_head *rnh; 1545 1546 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1547 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1548 rib_flush_routes(rnh); 1549 } 1550 } 1551 1552 const char * 1553 rib_print_family(int family) 1554 { 1555 switch (family) { 1556 case AF_INET: 1557 return ("inet"); 1558 case AF_INET6: 1559 return ("inet6"); 1560 case AF_LINK: 1561 return ("link"); 1562 } 1563 return ("unknown"); 1564 } 1565 1566