1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_route.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 #include <sys/syslog.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/rmlock.h> 43 44 #include <net/if.h> 45 #include <net/if_var.h> 46 #include <net/if_private.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 #include <netinet6/in6_var.h> 58 59 #define DEBUG_MOD_NAME route_ctl 60 #define DEBUG_MAX_LEVEL LOG_DEBUG 61 #include <net/route/route_debug.h> 62 _DECLARE_DEBUG(LOG_INFO); 63 64 /* 65 * This file contains control plane routing tables functions. 66 * 67 * All functions assumes they are called in net epoch. 68 */ 69 70 union sockaddr_union { 71 struct sockaddr sa; 72 struct sockaddr_in sin; 73 struct sockaddr_in6 sin6; 74 char _buf[32]; 75 }; 76 77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 81 struct rib_cmd_info *rc); 82 83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 85 #ifdef ROUTE_MPATH 86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 88 int op_flags, struct rib_cmd_info *rc); 89 #endif 90 91 static int add_route(struct rib_head *rnh, struct rtentry *rt, 92 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 93 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 94 struct rib_cmd_info *rc); 95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 97 98 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 99 struct sockaddr **pmask); 100 static int get_prio_from_info(const struct rt_addrinfo *info); 101 static int nhop_get_prio(const struct nhop_object *nh); 102 103 #ifdef ROUTE_MPATH 104 static bool rib_can_multipath(struct rib_head *rh); 105 #endif 106 107 /* Per-vnet multipath routing configuration */ 108 SYSCTL_DECL(_net_route); 109 #define V_rib_route_multipath VNET(rib_route_multipath) 110 #ifdef ROUTE_MPATH 111 #define _MP_FLAGS CTLFLAG_RW 112 #else 113 #define _MP_FLAGS CTLFLAG_RD 114 #endif 115 VNET_DEFINE(u_int, rib_route_multipath) = 1; 116 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 117 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 118 #undef _MP_FLAGS 119 120 #ifdef ROUTE_MPATH 121 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 122 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 123 &VNET_NAME(fib_hash_outbound), 0, 124 "Compute flowid for locally-originated packets"); 125 126 /* Default entropy to add to the hash calculation for the outbound connections*/ 127 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 128 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 129 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 130 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 131 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 132 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 133 }; 134 #endif 135 136 #if defined(INET) && defined(INET6) 137 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 138 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 139 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 140 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 141 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 142 #endif 143 144 /* Debug bits */ 145 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 146 147 static struct rib_head * 148 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 149 { 150 struct rib_head *rnh; 151 struct sockaddr *dst; 152 153 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 154 155 dst = info->rti_info[RTAX_DST]; 156 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 157 158 return (rnh); 159 } 160 161 #if defined(INET) && defined(INET6) 162 bool 163 rib_can_4o6_nhop(void) 164 { 165 return (!!V_rib_route_ipv6_nexthop); 166 } 167 #endif 168 169 #ifdef ROUTE_MPATH 170 static bool 171 rib_can_multipath(struct rib_head *rh) 172 { 173 int result; 174 175 CURVNET_SET(rh->rib_vnet); 176 result = !!V_rib_route_multipath; 177 CURVNET_RESTORE(); 178 179 return (result); 180 } 181 182 /* 183 * Check is nhop is multipath-eligible. 184 * Avoid nhops without gateways and redirects. 185 * 186 * Returns 1 for multipath-eligible nexthop, 187 * 0 otherwise. 188 */ 189 bool 190 nhop_can_multipath(const struct nhop_object *nh) 191 { 192 193 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 194 return (1); 195 if ((nh->nh_flags & NHF_GATEWAY) == 0) 196 return (0); 197 if ((nh->nh_flags & NHF_REDIRECT) != 0) 198 return (0); 199 200 return (1); 201 } 202 #endif 203 204 static int 205 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 206 { 207 uint32_t weight; 208 209 if (info->rti_mflags & RTV_WEIGHT) 210 weight = info->rti_rmx->rmx_weight; 211 else 212 weight = default_weight; 213 /* Keep upper 1 byte for adm distance purposes */ 214 if (weight > RT_MAX_WEIGHT) 215 weight = RT_MAX_WEIGHT; 216 else if (weight == 0) 217 weight = default_weight; 218 219 return (weight); 220 } 221 222 /* 223 * File-local concept for distingushing between the normal and 224 * RTF_PINNED routes tha can override the "normal" one. 225 */ 226 #define NH_PRIORITY_HIGH 2 227 #define NH_PRIORITY_NORMAL 1 228 static int 229 get_prio_from_info(const struct rt_addrinfo *info) 230 { 231 if (info->rti_flags & RTF_PINNED) 232 return (NH_PRIORITY_HIGH); 233 return (NH_PRIORITY_NORMAL); 234 } 235 236 static int 237 nhop_get_prio(const struct nhop_object *nh) 238 { 239 if (NH_IS_PINNED(nh)) 240 return (NH_PRIORITY_HIGH); 241 return (NH_PRIORITY_NORMAL); 242 } 243 244 /* 245 * Check if specified @gw matches gw data in the nexthop @nh. 246 * 247 * Returns true if matches, false otherwise. 248 */ 249 bool 250 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 251 { 252 253 if (nh->gw_sa.sa_family != gw->sa_family) 254 return (false); 255 256 switch (gw->sa_family) { 257 case AF_INET: 258 return (nh->gw4_sa.sin_addr.s_addr == 259 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 260 case AF_INET6: 261 { 262 const struct sockaddr_in6 *gw6; 263 gw6 = (const struct sockaddr_in6 *)gw; 264 265 /* 266 * Currently (2020-09) IPv6 gws in kernel have their 267 * scope embedded. Once this becomes false, this code 268 * has to be revisited. 269 */ 270 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 271 &gw6->sin6_addr)) 272 return (true); 273 return (false); 274 } 275 case AF_LINK: 276 { 277 const struct sockaddr_dl *sdl; 278 sdl = (const struct sockaddr_dl *)gw; 279 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 280 } 281 default: 282 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 283 } 284 285 /* NOTREACHED */ 286 return (false); 287 } 288 289 /* 290 * Matches all nexthop with given @gw. 291 * Can be used as rib_filter_f callback. 292 */ 293 int 294 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 295 { 296 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 297 298 return (match_nhop_gw(nh, gw)); 299 } 300 301 struct gw_filter_data { 302 const struct sockaddr *gw; 303 int count; 304 }; 305 306 /* 307 * Matches first occurence of the gateway provided in @gwd 308 */ 309 static int 310 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 311 { 312 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 313 314 /* Return only first match to make rtsock happy */ 315 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 316 return (1); 317 return (0); 318 } 319 320 /* 321 * Checks if data in @info matches nexhop @nh. 322 * 323 * Returns 0 on success, 324 * ESRCH if not matched, 325 * ENOENT if filter function returned false 326 */ 327 int 328 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 329 const struct nhop_object *nh) 330 { 331 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 332 333 if (info->rti_filter != NULL) { 334 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 335 return (ENOENT); 336 else 337 return (0); 338 } 339 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 340 return (ESRCH); 341 342 return (0); 343 } 344 345 /* 346 * Runs exact prefix match based on @dst and @netmask. 347 * Returns matched @rtentry if found or NULL. 348 * If rtentry was found, saves nexthop / weight value into @rnd. 349 */ 350 static struct rtentry * 351 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 352 const struct sockaddr *netmask, struct route_nhop_data *rnd) 353 { 354 struct rtentry *rt; 355 356 RIB_LOCK_ASSERT(rnh); 357 358 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 359 if (rt != NULL) { 360 rnd->rnd_nhop = rt->rt_nhop; 361 rnd->rnd_weight = rt->rt_weight; 362 } else { 363 rnd->rnd_nhop = NULL; 364 rnd->rnd_weight = 0; 365 } 366 367 return (rt); 368 } 369 370 struct rtentry * 371 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 372 struct route_nhop_data *rnd) 373 { 374 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 375 } 376 377 /* 378 * Runs exact prefix match based on dst/netmask from @info. 379 * Assumes RIB lock is held. 380 * Returns matched @rtentry if found or NULL. 381 * If rtentry was found, saves nexthop / weight value into @rnd. 382 */ 383 struct rtentry * 384 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 385 struct route_nhop_data *rnd) 386 { 387 struct rtentry *rt; 388 389 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 390 info->rti_info[RTAX_NETMASK], rnd); 391 392 return (rt); 393 } 394 395 const struct rtentry * 396 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen, 397 struct route_nhop_data *rnd) 398 { 399 union sockaddr_union mask_storage; 400 struct sockaddr *netmask = &mask_storage.sa; 401 402 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) 403 return (lookup_prefix_bysa(rnh, dst, netmask, rnd)); 404 return (NULL); 405 } 406 407 static bool 408 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 409 struct sockaddr **pmask) 410 { 411 if (plen == -1) { 412 *pmask = NULL; 413 return (true); 414 } 415 416 switch (family) { 417 #ifdef INET 418 case AF_INET: 419 { 420 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 421 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 422 423 memset(mask, 0, sizeof(*mask)); 424 mask->sin_family = family; 425 mask->sin_len = sizeof(*mask); 426 if (plen == 32) 427 *pmask = NULL; 428 else if (plen > 32 || plen < 0) 429 return (false); 430 else { 431 uint32_t daddr, maddr; 432 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 433 mask->sin_addr.s_addr = maddr; 434 daddr = dst->sin_addr.s_addr; 435 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 436 dst->sin_addr.s_addr = daddr; 437 } 438 return (true); 439 } 440 break; 441 #endif 442 #ifdef INET6 443 case AF_INET6: 444 { 445 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 446 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 447 448 memset(mask, 0, sizeof(*mask)); 449 mask->sin6_family = family; 450 mask->sin6_len = sizeof(*mask); 451 if (plen == 128) 452 *pmask = NULL; 453 else if (plen > 128 || plen < 0) 454 return (false); 455 else { 456 ip6_writemask(&mask->sin6_addr, plen); 457 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 458 } 459 return (true); 460 } 461 break; 462 #endif 463 } 464 return (false); 465 } 466 467 /* 468 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 469 * to the routing table. 470 * 471 * @fibnum: verified kernel rtable id to insert route to 472 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 473 * @plen: prefix length (or -1 if host route or not applicable for AF) 474 * @op_flags: combination of RTM_F_ flags 475 * @rc: storage to report operation result 476 * 477 * Returns 0 on success. 478 */ 479 int 480 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 481 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 482 { 483 union sockaddr_union mask_storage; 484 struct sockaddr *netmask = &mask_storage.sa; 485 struct rtentry *rt = NULL; 486 487 NET_EPOCH_ASSERT(); 488 489 bzero(rc, sizeof(struct rib_cmd_info)); 490 rc->rc_cmd = RTM_ADD; 491 492 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 493 if (rnh == NULL) 494 return (EAFNOSUPPORT); 495 496 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 497 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 498 return (EINVAL); 499 } 500 501 if (op_flags & RTM_F_CREATE) { 502 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 503 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 504 return (ENOMEM); 505 } 506 } else { 507 struct route_nhop_data rnd_tmp; 508 RIB_RLOCK_TRACKER; 509 510 RIB_RLOCK(rnh); 511 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp); 512 RIB_RUNLOCK(rnh); 513 514 if (rt == NULL) 515 return (ESRCH); 516 } 517 518 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 519 } 520 521 /* 522 * Attempts to delete @dst/plen prefix matching gateway @gw from the 523 * routing rable. 524 * 525 * @fibnum: rtable id to remove route from 526 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 527 * @plen: prefix length (or -1 if host route or not applicable for AF) 528 * @gw: gateway to match 529 * @op_flags: combination of RTM_F_ flags 530 * @rc: storage to report operation result 531 * 532 * Returns 0 on success. 533 */ 534 int 535 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 536 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 537 { 538 struct gw_filter_data gwd = { .gw = gw }; 539 540 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 541 } 542 543 /* 544 * Attempts to delete @dst/plen prefix matching @filter_func from the 545 * routing rable. 546 * 547 * @fibnum: rtable id to remove route from 548 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 549 * @plen: prefix length (or -1 if host route or not applicable for AF) 550 * @filter_func: func to be called for each nexthop of the prefix for matching 551 * @filter_arg: argument to pass to @filter_func 552 * @op_flags: combination of RTM_F_ flags 553 * @rc: storage to report operation result 554 * 555 * Returns 0 on success. 556 */ 557 int 558 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 559 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 560 struct rib_cmd_info *rc) 561 { 562 union sockaddr_union mask_storage; 563 struct sockaddr *netmask = &mask_storage.sa; 564 int error; 565 566 NET_EPOCH_ASSERT(); 567 568 bzero(rc, sizeof(struct rib_cmd_info)); 569 rc->rc_cmd = RTM_DELETE; 570 571 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 572 if (rnh == NULL) 573 return (EAFNOSUPPORT); 574 575 if (dst->sa_len > sizeof(mask_storage)) { 576 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 577 return (EINVAL); 578 } 579 580 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 581 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 582 return (EINVAL); 583 } 584 585 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 586 587 RIB_WLOCK(rnh); 588 struct route_nhop_data rnd; 589 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 590 if (rt != NULL) { 591 error = rt_delete_conditional(rnh, rt, prio, filter_func, 592 filter_arg, rc); 593 } else 594 error = ESRCH; 595 RIB_WUNLOCK(rnh); 596 597 if (error != 0) 598 return (error); 599 600 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 601 602 if (rc->rc_cmd == RTM_DELETE) 603 rt_free(rc->rc_rt); 604 #ifdef ROUTE_MPATH 605 else { 606 /* 607 * Deleting 1 path may result in RTM_CHANGE to 608 * a different mpath group/nhop. 609 * Free old mpath group. 610 */ 611 nhop_free_any(rc->rc_nh_old); 612 } 613 #endif 614 615 return (0); 616 } 617 618 /* 619 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 620 * @rt: route to copy. 621 * @rnd_src: nhop and weight. Multipath routes are not supported 622 * @rh_dst: target rtable. 623 * @rc: operation result storage 624 * 625 * Return 0 on success. 626 */ 627 int 628 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 629 struct rib_head *rh_dst, struct rib_cmd_info *rc) 630 { 631 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 632 int error; 633 634 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 635 636 IF_DEBUG_LEVEL(LOG_DEBUG2) { 637 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 638 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 639 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 640 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 641 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 642 } 643 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 644 if (nh == NULL) { 645 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 646 return (ENOMEM); 647 } 648 nhop_copy(nh, rnd_src->rnd_nhop); 649 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop)); 650 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 651 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 652 if (error != 0) { 653 FIB_RH_LOG(LOG_INFO, rh_dst, 654 "unable to finalize new nexthop: error %d", error); 655 return (ENOMEM); 656 } 657 658 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 659 if (rt_new == NULL) { 660 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 661 nhop_free(nh); 662 return (ENOMEM); 663 } 664 665 struct route_nhop_data rnd = { 666 .rnd_nhop = nh, 667 .rnd_weight = rnd_src->rnd_weight 668 }; 669 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 670 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 671 672 if (error != 0) { 673 IF_DEBUG_LEVEL(LOG_DEBUG2) { 674 char buf[NHOP_PRINT_BUFSIZE]; 675 rt_print_buf(rt_new, buf, sizeof(buf)); 676 FIB_RH_LOG(LOG_DEBUG, rh_dst, 677 "Unable to add route %s: error %d", buf, error); 678 } 679 nhop_free(nh); 680 rt_free_immediate(rt_new); 681 } 682 return (error); 683 } 684 685 /* 686 * Adds route defined by @info into the kernel table specified by @fibnum and 687 * sa_family in @info->rti_info[RTAX_DST]. 688 * 689 * Returns 0 on success and fills in operation metadata into @rc. 690 */ 691 int 692 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 693 struct rib_cmd_info *rc) 694 { 695 struct rib_head *rnh; 696 int error; 697 698 NET_EPOCH_ASSERT(); 699 700 rnh = get_rnh(fibnum, info); 701 if (rnh == NULL) 702 return (EAFNOSUPPORT); 703 704 /* 705 * Check consistency between RTF_HOST flag and netmask 706 * existence. 707 */ 708 if (info->rti_flags & RTF_HOST) 709 info->rti_info[RTAX_NETMASK] = NULL; 710 else if (info->rti_info[RTAX_NETMASK] == NULL) { 711 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 712 return (EINVAL); 713 } 714 715 bzero(rc, sizeof(struct rib_cmd_info)); 716 rc->rc_cmd = RTM_ADD; 717 718 error = add_route_byinfo(rnh, info, rc); 719 if (error == 0) 720 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 721 722 return (error); 723 } 724 725 static int 726 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 727 struct rib_cmd_info *rc) 728 { 729 struct route_nhop_data rnd_add; 730 struct nhop_object *nh; 731 struct rtentry *rt; 732 struct sockaddr *dst, *gateway, *netmask; 733 int error; 734 735 dst = info->rti_info[RTAX_DST]; 736 gateway = info->rti_info[RTAX_GATEWAY]; 737 netmask = info->rti_info[RTAX_NETMASK]; 738 739 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 740 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 741 return (EINVAL); 742 } 743 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 744 FIB_RH_LOG(LOG_DEBUG, rnh, 745 "error: invalid dst/gateway family combination (%d, %d)", 746 dst->sa_family, gateway->sa_family); 747 return (EINVAL); 748 } 749 750 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 751 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 752 dst->sa_len); 753 return (EINVAL); 754 } 755 756 if (info->rti_ifa == NULL) { 757 error = rt_getifa_fib(info, rnh->rib_fibnum); 758 if (error) 759 return (error); 760 } 761 762 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 763 return (ENOBUFS); 764 765 error = nhop_create_from_info(rnh, info, &nh); 766 if (error != 0) { 767 rt_free_immediate(rt); 768 return (error); 769 } 770 771 rnd_add.rnd_nhop = nh; 772 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 773 774 int op_flags = RTM_F_CREATE; 775 if (get_prio_from_info(info) == NH_PRIORITY_HIGH) 776 op_flags |= RTM_F_FORCE; 777 else 778 op_flags |= RTM_F_APPEND; 779 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 780 781 } 782 783 static int 784 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 785 int op_flags, struct rib_cmd_info *rc) 786 { 787 struct route_nhop_data rnd_orig; 788 struct nhop_object *nh; 789 struct rtentry *rt_orig; 790 int error = 0; 791 792 MPASS(rt != NULL); 793 794 nh = rnd_add->rnd_nhop; 795 796 RIB_WLOCK(rnh); 797 798 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 799 800 if (rt_orig == NULL) { 801 if (op_flags & RTM_F_CREATE) 802 error = add_route(rnh, rt, rnd_add, rc); 803 else 804 error = ESRCH; /* no entry but creation was not required */ 805 RIB_WUNLOCK(rnh); 806 if (error != 0) 807 goto out; 808 return (0); 809 } 810 811 if (op_flags & RTM_F_EXCL) { 812 /* We have existing route in the RIB but not allowed to replace. */ 813 RIB_WUNLOCK(rnh); 814 error = EEXIST; 815 goto out; 816 } 817 818 /* Now either append or replace */ 819 if (op_flags & RTM_F_REPLACE) { 820 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) { 821 /* Old path is "better" (e.g. has PINNED flag set) */ 822 RIB_WUNLOCK(rnh); 823 error = EEXIST; 824 goto out; 825 } 826 change_route(rnh, rt_orig, rnd_add, rc); 827 RIB_WUNLOCK(rnh); 828 nh = rc->rc_nh_old; 829 goto out; 830 } 831 832 RIB_WUNLOCK(rnh); 833 834 #ifdef ROUTE_MPATH 835 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 836 nhop_can_multipath(rnd_add->rnd_nhop) && 837 nhop_can_multipath(rnd_orig.rnd_nhop)) { 838 839 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 840 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 841 op_flags, rc); 842 if (error != EAGAIN) 843 break; 844 RTSTAT_INC(rts_add_retry); 845 } 846 847 /* 848 * Original nhop reference is unused in any case. 849 */ 850 nhop_free_any(rnd_add->rnd_nhop); 851 if (op_flags & RTM_F_CREATE) { 852 if (error != 0 || rc->rc_cmd != RTM_ADD) 853 rt_free_immediate(rt); 854 } 855 return (error); 856 } 857 #endif 858 /* Out of options - free state and return error */ 859 error = EEXIST; 860 out: 861 if (op_flags & RTM_F_CREATE) 862 rt_free_immediate(rt); 863 nhop_free_any(nh); 864 865 return (error); 866 } 867 868 #ifdef ROUTE_MPATH 869 static int 870 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 871 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 872 int op_flags, struct rib_cmd_info *rc) 873 { 874 RIB_RLOCK_TRACKER; 875 struct route_nhop_data rnd_new; 876 int error = 0; 877 878 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 879 if (error != 0) { 880 if (error == EAGAIN) { 881 /* 882 * Group creation failed, most probably because 883 * @rnd_orig data got scheduled for deletion. 884 * Refresh @rnd_orig data and retry. 885 */ 886 RIB_RLOCK(rnh); 887 lookup_prefix_rt(rnh, rt, rnd_orig); 888 RIB_RUNLOCK(rnh); 889 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 890 /* In this iteration route doesn't exist */ 891 error = ENOENT; 892 } 893 } 894 return (error); 895 } 896 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 897 if (error != 0) 898 return (error); 899 900 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 901 /* 902 * First multipath route got installed. Enable local 903 * outbound connections hashing. 904 */ 905 if (bootverbose) 906 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 907 V_fib_hash_outbound = 1; 908 } 909 910 return (0); 911 } 912 #endif 913 914 /* 915 * Removes route defined by @info from the kernel table specified by @fibnum and 916 * sa_family in @info->rti_info[RTAX_DST]. 917 * 918 * Returns 0 on success and fills in operation metadata into @rc. 919 */ 920 int 921 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 922 { 923 struct rib_head *rnh; 924 struct sockaddr *dst, *netmask; 925 struct sockaddr_storage mdst; 926 int error; 927 928 NET_EPOCH_ASSERT(); 929 930 rnh = get_rnh(fibnum, info); 931 if (rnh == NULL) 932 return (EAFNOSUPPORT); 933 934 bzero(rc, sizeof(struct rib_cmd_info)); 935 rc->rc_cmd = RTM_DELETE; 936 937 dst = info->rti_info[RTAX_DST]; 938 netmask = info->rti_info[RTAX_NETMASK]; 939 940 if (netmask != NULL) { 941 /* Ensure @dst is always properly masked */ 942 if (dst->sa_len > sizeof(mdst)) { 943 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 944 return (EINVAL); 945 } 946 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 947 dst = (struct sockaddr *)&mdst; 948 } 949 950 rib_filter_f_t *filter_func = NULL; 951 void *filter_arg = NULL; 952 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 953 954 if (info->rti_filter != NULL) { 955 filter_func = info->rti_filter; 956 filter_arg = info->rti_filterdata; 957 } else if (gwd.gw != NULL) { 958 filter_func = match_gw_one; 959 filter_arg = &gwd; 960 } 961 962 int prio = get_prio_from_info(info); 963 964 RIB_WLOCK(rnh); 965 struct route_nhop_data rnd; 966 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 967 if (rt != NULL) { 968 error = rt_delete_conditional(rnh, rt, prio, filter_func, 969 filter_arg, rc); 970 } else 971 error = ESRCH; 972 RIB_WUNLOCK(rnh); 973 974 if (error != 0) 975 return (error); 976 977 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 978 979 if (rc->rc_cmd == RTM_DELETE) 980 rt_free(rc->rc_rt); 981 #ifdef ROUTE_MPATH 982 else { 983 /* 984 * Deleting 1 path may result in RTM_CHANGE to 985 * a different mpath group/nhop. 986 * Free old mpath group. 987 */ 988 nhop_free_any(rc->rc_nh_old); 989 } 990 #endif 991 992 return (0); 993 } 994 995 /* 996 * Conditionally unlinks rtentry paths from @rnh matching @cb. 997 * Returns 0 on success with operation result stored in @rc. 998 * On error, returns: 999 * ESRCH - if prefix was not found or filter function failed to match 1000 * EADDRINUSE - if trying to delete higher priority route. 1001 */ 1002 static int 1003 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 1004 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 1005 { 1006 struct nhop_object *nh = rt->rt_nhop; 1007 1008 #ifdef ROUTE_MPATH 1009 if (NH_IS_NHGRP(nh)) { 1010 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 1011 struct route_nhop_data rnd; 1012 int error; 1013 1014 if (cb == NULL) 1015 return (ESRCH); 1016 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 1017 if (error == 0) { 1018 if (rnd.rnd_nhgrp == nhg) { 1019 /* No match, unreference new group and return. */ 1020 nhop_free_any(rnd.rnd_nhop); 1021 return (ESRCH); 1022 } 1023 error = change_route(rnh, rt, &rnd, rc); 1024 } 1025 return (error); 1026 } 1027 #endif 1028 if (cb != NULL && !cb(rt, nh, cbdata)) 1029 return (ESRCH); 1030 1031 if (prio < nhop_get_prio(nh)) 1032 return (EADDRINUSE); 1033 1034 return (delete_route(rnh, rt, rc)); 1035 } 1036 1037 int 1038 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1039 struct rib_cmd_info *rc) 1040 { 1041 RIB_RLOCK_TRACKER; 1042 struct route_nhop_data rnd_orig; 1043 struct rib_head *rnh; 1044 struct rtentry *rt; 1045 int error; 1046 1047 NET_EPOCH_ASSERT(); 1048 1049 rnh = get_rnh(fibnum, info); 1050 if (rnh == NULL) 1051 return (EAFNOSUPPORT); 1052 1053 bzero(rc, sizeof(struct rib_cmd_info)); 1054 rc->rc_cmd = RTM_CHANGE; 1055 1056 /* Check if updated gateway exists */ 1057 if ((info->rti_flags & RTF_GATEWAY) && 1058 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1059 1060 /* 1061 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1062 * Remove RTF_GATEWAY to enforce consistency and maintain 1063 * compatibility.. 1064 */ 1065 info->rti_flags &= ~RTF_GATEWAY; 1066 } 1067 1068 /* 1069 * route change is done in multiple steps, with dropping and 1070 * reacquiring lock. In the situations with multiple processes 1071 * changes the same route in can lead to the case when route 1072 * is changed between the steps. Address it by retrying the operation 1073 * multiple times before failing. 1074 */ 1075 1076 RIB_RLOCK(rnh); 1077 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1078 info->rti_info[RTAX_NETMASK], &rnh->head); 1079 1080 if (rt == NULL) { 1081 RIB_RUNLOCK(rnh); 1082 return (ESRCH); 1083 } 1084 1085 rnd_orig.rnd_nhop = rt->rt_nhop; 1086 rnd_orig.rnd_weight = rt->rt_weight; 1087 1088 RIB_RUNLOCK(rnh); 1089 1090 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1091 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1092 if (error != EAGAIN) 1093 break; 1094 } 1095 1096 return (error); 1097 } 1098 1099 static int 1100 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1101 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1102 { 1103 int error; 1104 1105 /* 1106 * New gateway could require new ifaddr, ifp; 1107 * flags may also be different; ifp may be specified 1108 * by ll sockaddr when protocol address is ambiguous 1109 */ 1110 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1111 info->rti_info[RTAX_GATEWAY] != NULL) || 1112 info->rti_info[RTAX_IFP] != NULL || 1113 (info->rti_info[RTAX_IFA] != NULL && 1114 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1115 error = rt_getifa_fib(info, rnh->rib_fibnum); 1116 1117 if (error != 0) { 1118 info->rti_ifa = NULL; 1119 return (error); 1120 } 1121 } 1122 1123 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1124 info->rti_ifa = NULL; 1125 1126 return (error); 1127 } 1128 1129 #ifdef ROUTE_MPATH 1130 static int 1131 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1132 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1133 struct rib_cmd_info *rc) 1134 { 1135 int error = 0, found_idx = 0; 1136 struct nhop_object *nh_orig = NULL, *nh_new; 1137 struct route_nhop_data rnd_new = {}; 1138 const struct weightened_nhop *wn = NULL; 1139 struct weightened_nhop *wn_new; 1140 uint32_t num_nhops; 1141 1142 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1143 for (int i = 0; i < num_nhops; i++) { 1144 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1145 nh_orig = wn[i].nh; 1146 found_idx = i; 1147 break; 1148 } 1149 } 1150 1151 if (nh_orig == NULL) 1152 return (ESRCH); 1153 1154 error = change_nhop(rnh, info, nh_orig, &nh_new); 1155 if (error != 0) 1156 return (error); 1157 1158 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1159 M_TEMP, M_NOWAIT | M_ZERO); 1160 if (wn_new == NULL) { 1161 nhop_free(nh_new); 1162 return (EAGAIN); 1163 } 1164 1165 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1166 wn_new[found_idx].nh = nh_new; 1167 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1168 1169 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1170 nhop_free(nh_new); 1171 free(wn_new, M_TEMP); 1172 1173 if (error != 0) 1174 return (error); 1175 1176 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1177 1178 return (error); 1179 } 1180 #endif 1181 1182 static int 1183 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1184 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1185 struct rib_cmd_info *rc) 1186 { 1187 int error = 0; 1188 struct nhop_object *nh_orig; 1189 struct route_nhop_data rnd_new; 1190 1191 nh_orig = rnd_orig->rnd_nhop; 1192 if (nh_orig == NULL) 1193 return (ESRCH); 1194 1195 #ifdef ROUTE_MPATH 1196 if (NH_IS_NHGRP(nh_orig)) 1197 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1198 #endif 1199 1200 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1201 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1202 if (error != 0) 1203 return (error); 1204 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1205 1206 return (error); 1207 } 1208 1209 /* 1210 * Insert @rt with nhop data from @rnd_new to @rnh. 1211 * Returns 0 on success and stores operation results in @rc. 1212 */ 1213 static int 1214 add_route(struct rib_head *rnh, struct rtentry *rt, 1215 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1216 { 1217 struct radix_node *rn; 1218 1219 RIB_WLOCK_ASSERT(rnh); 1220 1221 rt->rt_nhop = rnd->rnd_nhop; 1222 rt->rt_weight = rnd->rnd_weight; 1223 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1224 1225 if (rn != NULL) { 1226 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1227 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1228 1229 /* Finalize notification */ 1230 rib_bump_gen(rnh); 1231 rnh->rnh_prefixes++; 1232 1233 rc->rc_cmd = RTM_ADD; 1234 rc->rc_rt = rt; 1235 rc->rc_nh_old = NULL; 1236 rc->rc_nh_new = rnd->rnd_nhop; 1237 rc->rc_nh_weight = rnd->rnd_weight; 1238 1239 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1240 return (0); 1241 } 1242 1243 /* Existing route or memory allocation failure. */ 1244 return (EEXIST); 1245 } 1246 1247 /* 1248 * Unconditionally deletes @rt from @rnh. 1249 */ 1250 static int 1251 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1252 { 1253 RIB_WLOCK_ASSERT(rnh); 1254 1255 /* Route deletion requested. */ 1256 struct radix_node *rn; 1257 1258 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1259 if (rn == NULL) 1260 return (ESRCH); 1261 rt = RNTORT(rn); 1262 rt->rte_flags &= ~RTF_UP; 1263 1264 rib_bump_gen(rnh); 1265 rnh->rnh_prefixes--; 1266 1267 rc->rc_cmd = RTM_DELETE; 1268 rc->rc_rt = rt; 1269 rc->rc_nh_old = rt->rt_nhop; 1270 rc->rc_nh_new = NULL; 1271 rc->rc_nh_weight = rt->rt_weight; 1272 1273 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1274 1275 return (0); 1276 } 1277 1278 /* 1279 * Switch @rt nhop/weigh to the ones specified in @rnd. 1280 * Returns 0 on success. 1281 */ 1282 int 1283 change_route(struct rib_head *rnh, struct rtentry *rt, 1284 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1285 { 1286 struct nhop_object *nh_orig; 1287 1288 RIB_WLOCK_ASSERT(rnh); 1289 1290 nh_orig = rt->rt_nhop; 1291 1292 if (rnd->rnd_nhop == NULL) 1293 return (delete_route(rnh, rt, rc)); 1294 1295 /* Changing nexthop & weight to a new one */ 1296 rt->rt_nhop = rnd->rnd_nhop; 1297 rt->rt_weight = rnd->rnd_weight; 1298 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1299 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1300 1301 /* Finalize notification */ 1302 rib_bump_gen(rnh); 1303 rc->rc_cmd = RTM_CHANGE; 1304 rc->rc_rt = rt; 1305 rc->rc_nh_old = nh_orig; 1306 rc->rc_nh_new = rnd->rnd_nhop; 1307 rc->rc_nh_weight = rnd->rnd_weight; 1308 1309 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1310 1311 return (0); 1312 } 1313 1314 /* 1315 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1316 * consistent with the current route data. 1317 * Nexthop in @nhd_new is consumed. 1318 */ 1319 int 1320 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1321 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1322 struct rib_cmd_info *rc) 1323 { 1324 struct rtentry *rt_new; 1325 int error = 0; 1326 1327 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1328 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1329 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1330 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1331 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1332 "trying change %s -> %s", buf_old, buf_new); 1333 } 1334 RIB_WLOCK(rnh); 1335 1336 struct route_nhop_data rnd; 1337 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1338 1339 if (rt_new == NULL) { 1340 if (rnd_orig->rnd_nhop == NULL) 1341 error = add_route(rnh, rt, rnd_new, rc); 1342 else { 1343 /* 1344 * Prefix does not exist, which was not our assumption. 1345 * Update @rnd_orig with the new data and return 1346 */ 1347 rnd_orig->rnd_nhop = NULL; 1348 rnd_orig->rnd_weight = 0; 1349 error = EAGAIN; 1350 } 1351 } else { 1352 /* Prefix exists, try to update */ 1353 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1354 /* 1355 * Nhop/mpath group hasn't changed. Flip 1356 * to the new precalculated one and return 1357 */ 1358 error = change_route(rnh, rt_new, rnd_new, rc); 1359 } else { 1360 /* Update and retry */ 1361 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1362 rnd_orig->rnd_weight = rt_new->rt_weight; 1363 error = EAGAIN; 1364 } 1365 } 1366 1367 RIB_WUNLOCK(rnh); 1368 1369 if (error == 0) { 1370 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1371 1372 if (rnd_orig->rnd_nhop != NULL) 1373 nhop_free_any(rnd_orig->rnd_nhop); 1374 1375 } else { 1376 if (rnd_new->rnd_nhop != NULL) 1377 nhop_free_any(rnd_new->rnd_nhop); 1378 } 1379 1380 return (error); 1381 } 1382 1383 /* 1384 * Performs modification of routing table specificed by @action. 1385 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1386 * Needs to be run in network epoch. 1387 * 1388 * Returns 0 on success and fills in @rc with action result. 1389 */ 1390 int 1391 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1392 struct rib_cmd_info *rc) 1393 { 1394 int error; 1395 1396 switch (action) { 1397 case RTM_ADD: 1398 error = rib_add_route(fibnum, info, rc); 1399 break; 1400 case RTM_DELETE: 1401 error = rib_del_route(fibnum, info, rc); 1402 break; 1403 case RTM_CHANGE: 1404 error = rib_change_route(fibnum, info, rc); 1405 break; 1406 default: 1407 error = ENOTSUP; 1408 } 1409 1410 return (error); 1411 } 1412 1413 struct rt_delinfo 1414 { 1415 struct rib_head *rnh; 1416 struct rtentry *head; 1417 rib_filter_f_t *filter_f; 1418 void *filter_arg; 1419 int prio; 1420 struct rib_cmd_info rc; 1421 }; 1422 1423 /* 1424 * Conditionally unlinks rtenties or paths from radix tree based 1425 * on the callback data passed in @arg. 1426 */ 1427 static int 1428 rt_checkdelroute(struct radix_node *rn, void *arg) 1429 { 1430 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1431 struct rtentry *rt = (struct rtentry *)rn; 1432 1433 if (rt_delete_conditional(di->rnh, rt, di->prio, 1434 di->filter_f, di->filter_arg, &di->rc) != 0) 1435 return (0); 1436 1437 /* 1438 * Add deleted rtentries to the list to GC them 1439 * after dropping the lock. 1440 * 1441 * XXX: Delayed notifications not implemented 1442 * for nexthop updates. 1443 */ 1444 if (di->rc.rc_cmd == RTM_DELETE) { 1445 /* Add to the list and return */ 1446 rt->rt_chain = di->head; 1447 di->head = rt; 1448 #ifdef ROUTE_MPATH 1449 } else { 1450 /* 1451 * RTM_CHANGE to a different nexthop or nexthop group. 1452 * Free old multipath group. 1453 */ 1454 nhop_free_any(di->rc.rc_nh_old); 1455 #endif 1456 } 1457 1458 return (0); 1459 } 1460 1461 /* 1462 * Iterates over a routing table specified by @fibnum and @family and 1463 * deletes elements marked by @filter_f. 1464 * @fibnum: rtable id 1465 * @family: AF_ address family 1466 * @filter_f: function returning non-zero value for items to delete 1467 * @arg: data to pass to the @filter_f function 1468 * @report: true if rtsock notification is needed. 1469 */ 1470 void 1471 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1472 bool report) 1473 { 1474 struct rib_head *rnh; 1475 struct rtentry *rt; 1476 struct nhop_object *nh; 1477 struct epoch_tracker et; 1478 1479 rnh = rt_tables_get_rnh(fibnum, family); 1480 if (rnh == NULL) 1481 return; 1482 1483 struct rt_delinfo di = { 1484 .rnh = rnh, 1485 .filter_f = filter_f, 1486 .filter_arg = filter_arg, 1487 .prio = NH_PRIORITY_NORMAL, 1488 }; 1489 1490 NET_EPOCH_ENTER(et); 1491 1492 RIB_WLOCK(rnh); 1493 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1494 RIB_WUNLOCK(rnh); 1495 1496 /* We might have something to reclaim. */ 1497 bzero(&di.rc, sizeof(di.rc)); 1498 di.rc.rc_cmd = RTM_DELETE; 1499 while (di.head != NULL) { 1500 rt = di.head; 1501 di.head = rt->rt_chain; 1502 rt->rt_chain = NULL; 1503 nh = rt->rt_nhop; 1504 1505 di.rc.rc_rt = rt; 1506 di.rc.rc_nh_old = nh; 1507 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1508 1509 if (report) { 1510 #ifdef ROUTE_MPATH 1511 struct nhgrp_object *nhg; 1512 const struct weightened_nhop *wn; 1513 uint32_t num_nhops; 1514 if (NH_IS_NHGRP(nh)) { 1515 nhg = (struct nhgrp_object *)nh; 1516 wn = nhgrp_get_nhops(nhg, &num_nhops); 1517 for (int i = 0; i < num_nhops; i++) 1518 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1519 } else 1520 #endif 1521 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1522 } 1523 rt_free(rt); 1524 } 1525 1526 NET_EPOCH_EXIT(et); 1527 } 1528 1529 static int 1530 rt_delete_unconditional(struct radix_node *rn, void *arg) 1531 { 1532 struct rtentry *rt = RNTORT(rn); 1533 struct rib_head *rnh = (struct rib_head *)arg; 1534 1535 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1536 if (RNTORT(rn) == rt) 1537 rt_free(rt); 1538 1539 return (0); 1540 } 1541 1542 /* 1543 * Removes all routes from the routing table without executing notifications. 1544 * rtentres will be removed after the end of a current epoch. 1545 */ 1546 static void 1547 rib_flush_routes(struct rib_head *rnh) 1548 { 1549 RIB_WLOCK(rnh); 1550 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1551 RIB_WUNLOCK(rnh); 1552 } 1553 1554 void 1555 rib_flush_routes_family(int family) 1556 { 1557 struct rib_head *rnh; 1558 1559 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1560 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1561 rib_flush_routes(rnh); 1562 } 1563 } 1564 1565 const char * 1566 rib_print_family(int family) 1567 { 1568 switch (family) { 1569 case AF_INET: 1570 return ("inet"); 1571 case AF_INET6: 1572 return ("inet6"); 1573 case AF_LINK: 1574 return ("link"); 1575 } 1576 return ("unknown"); 1577 } 1578 1579