1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 #include <netinet6/in6_var.h> 58 59 #define DEBUG_MOD_NAME route_ctl 60 #define DEBUG_MAX_LEVEL LOG_DEBUG 61 #include <net/route/route_debug.h> 62 _DECLARE_DEBUG(LOG_INFO); 63 64 /* 65 * This file contains control plane routing tables functions. 66 * 67 * All functions assumes they are called in net epoch. 68 */ 69 70 union sockaddr_union { 71 struct sockaddr sa; 72 struct sockaddr_in sin; 73 struct sockaddr_in6 sin6; 74 char _buf[32]; 75 }; 76 77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 81 struct rib_cmd_info *rc); 82 83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 85 #ifdef ROUTE_MPATH 86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 88 int op_flags, struct rib_cmd_info *rc); 89 #endif 90 91 static int add_route(struct rib_head *rnh, struct rtentry *rt, 92 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 93 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 94 struct rib_cmd_info *rc); 95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 97 98 static int get_prio_from_info(const struct rt_addrinfo *info); 99 static int nhop_get_prio(const struct nhop_object *nh); 100 101 #ifdef ROUTE_MPATH 102 static bool rib_can_multipath(struct rib_head *rh); 103 #endif 104 105 /* Per-vnet multipath routing configuration */ 106 SYSCTL_DECL(_net_route); 107 #define V_rib_route_multipath VNET(rib_route_multipath) 108 #ifdef ROUTE_MPATH 109 #define _MP_FLAGS CTLFLAG_RW 110 #else 111 #define _MP_FLAGS CTLFLAG_RD 112 #endif 113 VNET_DEFINE(u_int, rib_route_multipath) = 1; 114 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 115 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 116 #undef _MP_FLAGS 117 118 #ifdef ROUTE_MPATH 119 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 120 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 121 &VNET_NAME(fib_hash_outbound), 0, 122 "Compute flowid for locally-originated packets"); 123 124 /* Default entropy to add to the hash calculation for the outbound connections*/ 125 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 126 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 127 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 128 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 129 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 130 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 131 }; 132 #endif 133 134 #if defined(INET) && defined(INET6) 135 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 136 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 137 VNET_DEFINE(u_int, rib_route_ipv6_nexthop) = 1; 138 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 139 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 140 #endif 141 142 /* Debug bits */ 143 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 144 145 static struct rib_head * 146 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 147 { 148 struct rib_head *rnh; 149 struct sockaddr *dst; 150 151 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 152 153 dst = info->rti_info[RTAX_DST]; 154 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 155 156 return (rnh); 157 } 158 159 #if defined(INET) && defined(INET6) 160 static bool 161 rib_can_ipv6_nexthop_address(struct rib_head *rh) 162 { 163 int result; 164 165 CURVNET_SET(rh->rib_vnet); 166 result = !!V_rib_route_ipv6_nexthop; 167 CURVNET_RESTORE(); 168 169 return (result); 170 } 171 #endif 172 173 #ifdef ROUTE_MPATH 174 static bool 175 rib_can_multipath(struct rib_head *rh) 176 { 177 int result; 178 179 CURVNET_SET(rh->rib_vnet); 180 result = !!V_rib_route_multipath; 181 CURVNET_RESTORE(); 182 183 return (result); 184 } 185 186 /* 187 * Check is nhop is multipath-eligible. 188 * Avoid nhops without gateways and redirects. 189 * 190 * Returns 1 for multipath-eligible nexthop, 191 * 0 otherwise. 192 */ 193 bool 194 nhop_can_multipath(const struct nhop_object *nh) 195 { 196 197 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 198 return (1); 199 if ((nh->nh_flags & NHF_GATEWAY) == 0) 200 return (0); 201 if ((nh->nh_flags & NHF_REDIRECT) != 0) 202 return (0); 203 204 return (1); 205 } 206 #endif 207 208 static int 209 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 210 { 211 uint32_t weight; 212 213 if (info->rti_mflags & RTV_WEIGHT) 214 weight = info->rti_rmx->rmx_weight; 215 else 216 weight = default_weight; 217 /* Keep upper 1 byte for adm distance purposes */ 218 if (weight > RT_MAX_WEIGHT) 219 weight = RT_MAX_WEIGHT; 220 else if (weight == 0) 221 weight = default_weight; 222 223 return (weight); 224 } 225 226 /* 227 * File-local concept for distingushing between the normal and 228 * RTF_PINNED routes tha can override the "normal" one. 229 */ 230 #define NH_PRIORITY_HIGH 2 231 #define NH_PRIORITY_NORMAL 1 232 static int 233 get_prio_from_info(const struct rt_addrinfo *info) 234 { 235 if (info->rti_flags & RTF_PINNED) 236 return (NH_PRIORITY_HIGH); 237 return (NH_PRIORITY_NORMAL); 238 } 239 240 static int 241 nhop_get_prio(const struct nhop_object *nh) 242 { 243 if (NH_IS_PINNED(nh)) 244 return (NH_PRIORITY_HIGH); 245 return (NH_PRIORITY_NORMAL); 246 } 247 248 /* 249 * Check if specified @gw matches gw data in the nexthop @nh. 250 * 251 * Returns true if matches, false otherwise. 252 */ 253 bool 254 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 255 { 256 257 if (nh->gw_sa.sa_family != gw->sa_family) 258 return (false); 259 260 switch (gw->sa_family) { 261 case AF_INET: 262 return (nh->gw4_sa.sin_addr.s_addr == 263 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 264 case AF_INET6: 265 { 266 const struct sockaddr_in6 *gw6; 267 gw6 = (const struct sockaddr_in6 *)gw; 268 269 /* 270 * Currently (2020-09) IPv6 gws in kernel have their 271 * scope embedded. Once this becomes false, this code 272 * has to be revisited. 273 */ 274 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 275 &gw6->sin6_addr)) 276 return (true); 277 return (false); 278 } 279 case AF_LINK: 280 { 281 const struct sockaddr_dl *sdl; 282 sdl = (const struct sockaddr_dl *)gw; 283 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 284 } 285 default: 286 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 287 } 288 289 /* NOTREACHED */ 290 return (false); 291 } 292 293 /* 294 * Matches all nexthop with given @gw. 295 * Can be used as rib_filter_f callback. 296 */ 297 int 298 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 299 { 300 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 301 302 return (match_nhop_gw(nh, gw)); 303 } 304 305 struct gw_filter_data { 306 const struct sockaddr *gw; 307 int count; 308 }; 309 310 /* 311 * Matches first occurence of the gateway provided in @gwd 312 */ 313 static int 314 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 315 { 316 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 317 318 /* Return only first match to make rtsock happy */ 319 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 320 return (1); 321 return (0); 322 } 323 324 /* 325 * Checks if data in @info matches nexhop @nh. 326 * 327 * Returns 0 on success, 328 * ESRCH if not matched, 329 * ENOENT if filter function returned false 330 */ 331 int 332 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 333 const struct nhop_object *nh) 334 { 335 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 336 337 if (info->rti_filter != NULL) { 338 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 339 return (ENOENT); 340 else 341 return (0); 342 } 343 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 344 return (ESRCH); 345 346 return (0); 347 } 348 349 /* 350 * Runs exact prefix match based on @dst and @netmask. 351 * Returns matched @rtentry if found or NULL. 352 * If rtentry was found, saves nexthop / weight value into @rnd. 353 */ 354 static struct rtentry * 355 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 356 const struct sockaddr *netmask, struct route_nhop_data *rnd) 357 { 358 struct rtentry *rt; 359 360 RIB_LOCK_ASSERT(rnh); 361 362 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 363 if (rt != NULL) { 364 rnd->rnd_nhop = rt->rt_nhop; 365 rnd->rnd_weight = rt->rt_weight; 366 } else { 367 rnd->rnd_nhop = NULL; 368 rnd->rnd_weight = 0; 369 } 370 371 return (rt); 372 } 373 374 struct rtentry * 375 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 376 struct route_nhop_data *rnd) 377 { 378 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 379 } 380 381 /* 382 * Runs exact prefix match based on dst/netmask from @info. 383 * Assumes RIB lock is held. 384 * Returns matched @rtentry if found or NULL. 385 * If rtentry was found, saves nexthop / weight value into @rnd. 386 */ 387 struct rtentry * 388 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 389 struct route_nhop_data *rnd) 390 { 391 struct rtentry *rt; 392 393 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 394 info->rti_info[RTAX_NETMASK], rnd); 395 396 return (rt); 397 } 398 399 static bool 400 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 401 struct sockaddr **pmask) 402 { 403 if (plen == -1) { 404 *pmask = NULL; 405 return (true); 406 } 407 408 switch (family) { 409 #ifdef INET 410 case AF_INET: 411 { 412 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 413 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 414 415 memset(mask, 0, sizeof(*mask)); 416 mask->sin_family = family; 417 mask->sin_len = sizeof(*mask); 418 if (plen == 32) 419 *pmask = NULL; 420 else if (plen > 32 || plen < 0) 421 return (false); 422 else { 423 uint32_t daddr, maddr; 424 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 425 mask->sin_addr.s_addr = maddr; 426 daddr = dst->sin_addr.s_addr; 427 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 428 dst->sin_addr.s_addr = daddr; 429 } 430 return (true); 431 } 432 break; 433 #endif 434 #ifdef INET6 435 case AF_INET6: 436 { 437 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 438 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 439 440 memset(mask, 0, sizeof(*mask)); 441 mask->sin6_family = family; 442 mask->sin6_len = sizeof(*mask); 443 if (plen == 128) 444 *pmask = NULL; 445 else if (plen > 128 || plen < 0) 446 return (false); 447 else { 448 ip6_writemask(&mask->sin6_addr, plen); 449 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 450 } 451 return (true); 452 } 453 break; 454 #endif 455 } 456 return (false); 457 } 458 459 /* 460 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 461 * to the routing table. 462 * 463 * @fibnum: rtable id to insert route to 464 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 465 * @plen: prefix length (or -1 if host route or not applicable for AF) 466 * @op_flags: combination of RTM_F_ flags 467 * @rc: storage to report operation result 468 * 469 * Returns 0 on success. 470 */ 471 int 472 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 473 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 474 { 475 union sockaddr_union mask_storage; 476 struct sockaddr *netmask = &mask_storage.sa; 477 struct rtentry *rt; 478 479 NET_EPOCH_ASSERT(); 480 481 bzero(rc, sizeof(struct rib_cmd_info)); 482 rc->rc_cmd = RTM_ADD; 483 484 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 485 if (rnh == NULL) 486 return (EAFNOSUPPORT); 487 488 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 489 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 490 return (EINVAL); 491 } 492 493 if (op_flags & RTM_F_CREATE) { 494 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 495 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 496 return (ENOMEM); 497 } 498 } else { 499 struct route_nhop_data rnd_tmp; 500 501 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp); 502 if (rt == NULL) 503 return (ESRCH); 504 } 505 506 #if DEBUG_MAX_LEVEL >= LOG_DEBUG2 507 { 508 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 509 nhop_print_buf_any(rnd->rnd_nhop, nhbuf, sizeof(nhbuf)); 510 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 511 FIB_RH_LOG(LOG_DEBUG2, rnh, "request %s -> %s", rtbuf, nhbuf); 512 } 513 #endif 514 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 515 } 516 517 /* 518 * Attempts to delete @dst/plen prefix matching gateway @gw from the 519 * routing rable. 520 * 521 * @fibnum: rtable id to remove route from 522 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 523 * @plen: prefix length (or -1 if host route or not applicable for AF) 524 * @gw: gateway to match 525 * @op_flags: combination of RTM_F_ flags 526 * @rc: storage to report operation result 527 * 528 * Returns 0 on success. 529 */ 530 int 531 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 532 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 533 { 534 struct gw_filter_data gwd = { .gw = gw }; 535 536 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 537 } 538 539 /* 540 * Attempts to delete @dst/plen prefix matching @filter_func from the 541 * routing rable. 542 * 543 * @fibnum: rtable id to remove route from 544 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 545 * @plen: prefix length (or -1 if host route or not applicable for AF) 546 * @filter_func: func to be called for each nexthop of the prefix for matching 547 * @filter_arg: argument to pass to @filter_func 548 * @op_flags: combination of RTM_F_ flags 549 * @rc: storage to report operation result 550 * 551 * Returns 0 on success. 552 */ 553 int 554 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 555 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 556 struct rib_cmd_info *rc) 557 { 558 union sockaddr_union mask_storage; 559 struct sockaddr *netmask = &mask_storage.sa; 560 int error; 561 562 NET_EPOCH_ASSERT(); 563 564 bzero(rc, sizeof(struct rib_cmd_info)); 565 rc->rc_cmd = RTM_DELETE; 566 567 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 568 if (rnh == NULL) 569 return (EAFNOSUPPORT); 570 571 if (dst->sa_len > sizeof(mask_storage)) { 572 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 573 return (EINVAL); 574 } 575 576 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 577 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 578 return (EINVAL); 579 } 580 581 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 582 583 RIB_WLOCK(rnh); 584 struct route_nhop_data rnd; 585 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 586 if (rt != NULL) { 587 error = rt_delete_conditional(rnh, rt, prio, filter_func, 588 filter_arg, rc); 589 } else 590 error = ESRCH; 591 RIB_WUNLOCK(rnh); 592 593 if (error != 0) 594 return (error); 595 596 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 597 598 if (rc->rc_cmd == RTM_DELETE) 599 rt_free(rc->rc_rt); 600 #ifdef ROUTE_MPATH 601 else { 602 /* 603 * Deleting 1 path may result in RTM_CHANGE to 604 * a different mpath group/nhop. 605 * Free old mpath group. 606 */ 607 nhop_free_any(rc->rc_nh_old); 608 } 609 #endif 610 611 return (0); 612 } 613 614 /* 615 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 616 * @rt: route to copy. 617 * @rnd_src: nhop and weight. Multipath routes are not supported 618 * @rh_dst: target rtable. 619 * @rc: operation result storage 620 * 621 * Return 0 on success. 622 */ 623 int 624 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 625 struct rib_head *rh_dst, struct rib_cmd_info *rc) 626 { 627 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 628 int error; 629 630 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 631 632 #if DEBUG_MAX_LEVEL >= LOG_DEBUG2 633 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 634 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 635 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 636 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 637 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 638 #endif 639 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 640 if (nh == NULL) { 641 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 642 return (ENOMEM); 643 } 644 nhop_copy(nh, rnd_src->rnd_nhop); 645 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 646 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 647 if (error != 0) { 648 FIB_RH_LOG(LOG_INFO, rh_dst, 649 "unable to finalize new nexthop: error %d", error); 650 return (ENOMEM); 651 } 652 653 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 654 if (rt_new == NULL) { 655 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 656 nhop_free(nh); 657 return (ENOMEM); 658 } 659 660 struct route_nhop_data rnd = { 661 .rnd_nhop = nh, 662 .rnd_weight = rnd_src->rnd_weight 663 }; 664 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 665 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 666 667 if (error != 0) { 668 #if DEBUG_MAX_LEVEL >= LOG_DEBUG 669 char buf[NHOP_PRINT_BUFSIZE]; 670 rt_print_buf(rt_new, buf, sizeof(buf)); 671 FIB_RH_LOG(LOG_DEBUG, rh_dst, "Unable to add route %s: error %d", buf, error); 672 #endif 673 nhop_free(nh); 674 rt_free_immediate(rt_new); 675 } 676 return (error); 677 } 678 679 /* 680 * Adds route defined by @info into the kernel table specified by @fibnum and 681 * sa_family in @info->rti_info[RTAX_DST]. 682 * 683 * Returns 0 on success and fills in operation metadata into @rc. 684 */ 685 int 686 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 687 struct rib_cmd_info *rc) 688 { 689 struct rib_head *rnh; 690 int error; 691 692 NET_EPOCH_ASSERT(); 693 694 rnh = get_rnh(fibnum, info); 695 if (rnh == NULL) 696 return (EAFNOSUPPORT); 697 698 /* 699 * Check consistency between RTF_HOST flag and netmask 700 * existence. 701 */ 702 if (info->rti_flags & RTF_HOST) 703 info->rti_info[RTAX_NETMASK] = NULL; 704 else if (info->rti_info[RTAX_NETMASK] == NULL) { 705 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 706 return (EINVAL); 707 } 708 709 bzero(rc, sizeof(struct rib_cmd_info)); 710 rc->rc_cmd = RTM_ADD; 711 712 error = add_route_byinfo(rnh, info, rc); 713 if (error == 0) 714 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 715 716 return (error); 717 } 718 719 /* 720 * Checks if @dst and @gateway is valid combination. 721 * 722 * Returns true if is valid, false otherwise. 723 */ 724 static bool 725 check_gateway(struct rib_head *rnh, struct sockaddr *dst, 726 struct sockaddr *gateway) 727 { 728 if (dst->sa_family == gateway->sa_family) 729 return (true); 730 else if (gateway->sa_family == AF_UNSPEC) 731 return (true); 732 else if (gateway->sa_family == AF_LINK) 733 return (true); 734 #if defined(INET) && defined(INET6) 735 else if (dst->sa_family == AF_INET && gateway->sa_family == AF_INET6 && 736 rib_can_ipv6_nexthop_address(rnh)) 737 return (true); 738 #endif 739 else 740 return (false); 741 } 742 743 static int 744 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 745 struct rib_cmd_info *rc) 746 { 747 struct route_nhop_data rnd_add; 748 struct nhop_object *nh; 749 struct rtentry *rt; 750 struct sockaddr *dst, *gateway, *netmask; 751 int error; 752 753 dst = info->rti_info[RTAX_DST]; 754 gateway = info->rti_info[RTAX_GATEWAY]; 755 netmask = info->rti_info[RTAX_NETMASK]; 756 757 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 758 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 759 return (EINVAL); 760 } 761 if (dst && gateway && !check_gateway(rnh, dst, gateway)) { 762 FIB_RH_LOG(LOG_DEBUG, rnh, 763 "error: invalid dst/gateway family combination (%d, %d)", 764 dst->sa_family, gateway->sa_family); 765 return (EINVAL); 766 } 767 768 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 769 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 770 dst->sa_len); 771 return (EINVAL); 772 } 773 774 if (info->rti_ifa == NULL) { 775 error = rt_getifa_fib(info, rnh->rib_fibnum); 776 if (error) 777 return (error); 778 } 779 780 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 781 return (ENOBUFS); 782 783 error = nhop_create_from_info(rnh, info, &nh); 784 if (error != 0) { 785 rt_free_immediate(rt); 786 return (error); 787 } 788 789 rnd_add.rnd_nhop = nh; 790 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 791 792 int op_flags = RTM_F_CREATE; 793 if (get_prio_from_info(info) == NH_PRIORITY_HIGH) 794 op_flags |= RTM_F_FORCE; 795 else 796 op_flags |= RTM_F_APPEND; 797 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 798 799 } 800 801 static int 802 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 803 int op_flags, struct rib_cmd_info *rc) 804 { 805 struct route_nhop_data rnd_orig; 806 struct nhop_object *nh; 807 struct rtentry *rt_orig; 808 int error = 0; 809 810 nh = rnd_add->rnd_nhop; 811 812 RIB_WLOCK(rnh); 813 814 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 815 816 if (rt_orig == NULL) { 817 if (op_flags & RTM_F_CREATE) 818 error = add_route(rnh, rt, rnd_add, rc); 819 else 820 error = ENOENT; // no entry but creation was not required 821 RIB_WUNLOCK(rnh); 822 if (error != 0) 823 goto out; 824 return (0); 825 } 826 827 if (op_flags & RTM_F_EXCL) { 828 /* We have existing route in the RIB but not allowed to replace. */ 829 RIB_WUNLOCK(rnh); 830 error = EEXIST; 831 goto out; 832 } 833 834 /* Now either append or replace */ 835 if (op_flags & RTM_F_REPLACE) { 836 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) { 837 /* Old path is "better" (e.g. has PINNED flag set) */ 838 error = EEXIST; 839 goto out; 840 } 841 change_route(rnh, rt_orig, rnd_add, rc); 842 RIB_WUNLOCK(rnh); 843 nh = rc->rc_nh_old; 844 goto out; 845 } 846 847 RIB_WUNLOCK(rnh); 848 849 #ifdef ROUTE_MPATH 850 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 851 nhop_can_multipath(rnd_add->rnd_nhop) && 852 nhop_can_multipath(rnd_orig.rnd_nhop)) { 853 854 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 855 error = add_route_flags_mpath(rnh, rt, rnd_add, &rnd_orig, 856 op_flags, rc); 857 if (error != EAGAIN) 858 break; 859 RTSTAT_INC(rts_add_retry); 860 } 861 862 /* 863 * Original nhop reference is unused in any case. 864 */ 865 nhop_free_any(rnd_add->rnd_nhop); 866 if (op_flags & RTM_F_CREATE) { 867 if (error != 0 || rc->rc_cmd != RTM_ADD) 868 rt_free_immediate(rt); 869 } 870 return (error); 871 } 872 #endif 873 /* Out of options - free state and return error */ 874 error = EEXIST; 875 out: 876 if (op_flags & RTM_F_CREATE) 877 rt_free_immediate(rt); 878 nhop_free_any(nh); 879 880 return (error); 881 } 882 883 #ifdef ROUTE_MPATH 884 static int 885 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 886 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 887 int op_flags, struct rib_cmd_info *rc) 888 { 889 RIB_RLOCK_TRACKER; 890 struct route_nhop_data rnd_new; 891 int error = 0; 892 893 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 894 if (error != 0) { 895 if (error == EAGAIN) { 896 /* 897 * Group creation failed, most probably because 898 * @rnd_orig data got scheduled for deletion. 899 * Refresh @rnd_orig data and retry. 900 */ 901 RIB_RLOCK(rnh); 902 lookup_prefix_rt(rnh, rt, rnd_orig); 903 RIB_RUNLOCK(rnh); 904 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 905 /* In this iteration route doesn't exist */ 906 error = ENOENT; 907 } 908 } 909 return (error); 910 } 911 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 912 if (error != 0) 913 return (error); 914 915 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 916 /* 917 * First multipath route got installed. Enable local 918 * outbound connections hashing. 919 */ 920 if (bootverbose) 921 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 922 V_fib_hash_outbound = 1; 923 } 924 925 return (0); 926 } 927 #endif 928 929 /* 930 * Removes route defined by @info from the kernel table specified by @fibnum and 931 * sa_family in @info->rti_info[RTAX_DST]. 932 * 933 * Returns 0 on success and fills in operation metadata into @rc. 934 */ 935 int 936 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 937 { 938 struct rib_head *rnh; 939 struct sockaddr *dst, *netmask; 940 struct sockaddr_storage mdst; 941 int error; 942 943 NET_EPOCH_ASSERT(); 944 945 rnh = get_rnh(fibnum, info); 946 if (rnh == NULL) 947 return (EAFNOSUPPORT); 948 949 bzero(rc, sizeof(struct rib_cmd_info)); 950 rc->rc_cmd = RTM_DELETE; 951 952 dst = info->rti_info[RTAX_DST]; 953 netmask = info->rti_info[RTAX_NETMASK]; 954 955 if (netmask != NULL) { 956 /* Ensure @dst is always properly masked */ 957 if (dst->sa_len > sizeof(mdst)) { 958 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 959 return (EINVAL); 960 } 961 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 962 dst = (struct sockaddr *)&mdst; 963 } 964 965 rib_filter_f_t *filter_func = NULL; 966 void *filter_arg = NULL; 967 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 968 969 if (info->rti_filter != NULL) { 970 filter_func = info->rti_filter; 971 filter_arg = info->rti_filterdata; 972 } else if (gwd.gw != NULL) { 973 filter_func = match_gw_one; 974 filter_arg = &gwd; 975 } 976 977 int prio = get_prio_from_info(info); 978 979 RIB_WLOCK(rnh); 980 struct route_nhop_data rnd; 981 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 982 if (rt != NULL) { 983 error = rt_delete_conditional(rnh, rt, prio, filter_func, 984 filter_arg, rc); 985 } else 986 error = ESRCH; 987 RIB_WUNLOCK(rnh); 988 989 if (error != 0) 990 return (error); 991 992 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 993 994 if (rc->rc_cmd == RTM_DELETE) 995 rt_free(rc->rc_rt); 996 #ifdef ROUTE_MPATH 997 else { 998 /* 999 * Deleting 1 path may result in RTM_CHANGE to 1000 * a different mpath group/nhop. 1001 * Free old mpath group. 1002 */ 1003 nhop_free_any(rc->rc_nh_old); 1004 } 1005 #endif 1006 1007 return (0); 1008 } 1009 1010 /* 1011 * Conditionally unlinks rtentry paths from @rnh matching @cb. 1012 * Returns 0 on success with operation result stored in @rc. 1013 * On error, returns: 1014 * ESRCH - if prefix was not found or filter function failed to match 1015 * EADDRINUSE - if trying to delete higher priority route. 1016 */ 1017 static int 1018 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 1019 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 1020 { 1021 struct nhop_object *nh = rt->rt_nhop; 1022 1023 #ifdef ROUTE_MPATH 1024 if (NH_IS_NHGRP(nh)) { 1025 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 1026 struct route_nhop_data rnd; 1027 int error; 1028 1029 if (cb == NULL) 1030 return (ESRCH); 1031 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 1032 if (error == 0) { 1033 if (rnd.rnd_nhgrp == nhg) { 1034 /* No match, unreference new group and return. */ 1035 nhop_free_any(rnd.rnd_nhop); 1036 return (ESRCH); 1037 } 1038 error = change_route(rnh, rt, &rnd, rc); 1039 } 1040 return (error); 1041 } 1042 #endif 1043 if (cb != NULL && !cb(rt, nh, cbdata)) 1044 return (ESRCH); 1045 1046 if (prio < nhop_get_prio(nh)) 1047 return (EADDRINUSE); 1048 1049 return (delete_route(rnh, rt, rc)); 1050 } 1051 1052 int 1053 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1054 struct rib_cmd_info *rc) 1055 { 1056 RIB_RLOCK_TRACKER; 1057 struct route_nhop_data rnd_orig; 1058 struct rib_head *rnh; 1059 struct rtentry *rt; 1060 int error; 1061 1062 NET_EPOCH_ASSERT(); 1063 1064 rnh = get_rnh(fibnum, info); 1065 if (rnh == NULL) 1066 return (EAFNOSUPPORT); 1067 1068 bzero(rc, sizeof(struct rib_cmd_info)); 1069 rc->rc_cmd = RTM_CHANGE; 1070 1071 /* Check if updated gateway exists */ 1072 if ((info->rti_flags & RTF_GATEWAY) && 1073 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1074 1075 /* 1076 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1077 * Remove RTF_GATEWAY to enforce consistency and maintain 1078 * compatibility.. 1079 */ 1080 info->rti_flags &= ~RTF_GATEWAY; 1081 } 1082 1083 /* 1084 * route change is done in multiple steps, with dropping and 1085 * reacquiring lock. In the situations with multiple processes 1086 * changes the same route in can lead to the case when route 1087 * is changed between the steps. Address it by retrying the operation 1088 * multiple times before failing. 1089 */ 1090 1091 RIB_RLOCK(rnh); 1092 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1093 info->rti_info[RTAX_NETMASK], &rnh->head); 1094 1095 if (rt == NULL) { 1096 RIB_RUNLOCK(rnh); 1097 return (ESRCH); 1098 } 1099 1100 rnd_orig.rnd_nhop = rt->rt_nhop; 1101 rnd_orig.rnd_weight = rt->rt_weight; 1102 1103 RIB_RUNLOCK(rnh); 1104 1105 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1106 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1107 if (error != EAGAIN) 1108 break; 1109 } 1110 1111 return (error); 1112 } 1113 1114 static int 1115 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1116 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1117 { 1118 int error; 1119 1120 /* 1121 * New gateway could require new ifaddr, ifp; 1122 * flags may also be different; ifp may be specified 1123 * by ll sockaddr when protocol address is ambiguous 1124 */ 1125 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1126 info->rti_info[RTAX_GATEWAY] != NULL) || 1127 info->rti_info[RTAX_IFP] != NULL || 1128 (info->rti_info[RTAX_IFA] != NULL && 1129 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1130 error = rt_getifa_fib(info, rnh->rib_fibnum); 1131 1132 if (error != 0) { 1133 info->rti_ifa = NULL; 1134 return (error); 1135 } 1136 } 1137 1138 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1139 info->rti_ifa = NULL; 1140 1141 return (error); 1142 } 1143 1144 #ifdef ROUTE_MPATH 1145 static int 1146 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1147 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1148 struct rib_cmd_info *rc) 1149 { 1150 int error = 0, found_idx = 0; 1151 struct nhop_object *nh_orig = NULL, *nh_new; 1152 struct route_nhop_data rnd_new = {}; 1153 const struct weightened_nhop *wn = NULL; 1154 struct weightened_nhop *wn_new; 1155 uint32_t num_nhops; 1156 1157 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1158 for (int i = 0; i < num_nhops; i++) { 1159 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1160 nh_orig = wn[i].nh; 1161 found_idx = i; 1162 break; 1163 } 1164 } 1165 1166 if (nh_orig == NULL) 1167 return (ESRCH); 1168 1169 error = change_nhop(rnh, info, nh_orig, &nh_new); 1170 if (error != 0) 1171 return (error); 1172 1173 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1174 M_TEMP, M_NOWAIT | M_ZERO); 1175 if (wn_new == NULL) { 1176 nhop_free(nh_new); 1177 return (EAGAIN); 1178 } 1179 1180 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1181 wn_new[found_idx].nh = nh_new; 1182 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1183 1184 error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new.rnd_nhgrp); 1185 nhop_free(nh_new); 1186 free(wn_new, M_TEMP); 1187 1188 if (error != 0) 1189 return (error); 1190 1191 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1192 1193 return (error); 1194 } 1195 #endif 1196 1197 static int 1198 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1199 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1200 struct rib_cmd_info *rc) 1201 { 1202 int error = 0; 1203 struct nhop_object *nh_orig; 1204 struct route_nhop_data rnd_new; 1205 1206 nh_orig = rnd_orig->rnd_nhop; 1207 if (nh_orig == NULL) 1208 return (ESRCH); 1209 1210 #ifdef ROUTE_MPATH 1211 if (NH_IS_NHGRP(nh_orig)) 1212 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1213 #endif 1214 1215 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1216 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1217 if (error != 0) 1218 return (error); 1219 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1220 1221 return (error); 1222 } 1223 1224 /* 1225 * Insert @rt with nhop data from @rnd_new to @rnh. 1226 * Returns 0 on success and stores operation results in @rc. 1227 */ 1228 static int 1229 add_route(struct rib_head *rnh, struct rtentry *rt, 1230 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1231 { 1232 struct radix_node *rn; 1233 1234 RIB_WLOCK_ASSERT(rnh); 1235 1236 rt->rt_nhop = rnd->rnd_nhop; 1237 rt->rt_weight = rnd->rnd_weight; 1238 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1239 1240 if (rn != NULL) { 1241 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1242 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1243 1244 /* Finalize notification */ 1245 rib_bump_gen(rnh); 1246 rnh->rnh_prefixes++; 1247 1248 rc->rc_cmd = RTM_ADD; 1249 rc->rc_rt = rt; 1250 rc->rc_nh_old = NULL; 1251 rc->rc_nh_new = rnd->rnd_nhop; 1252 rc->rc_nh_weight = rnd->rnd_weight; 1253 1254 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1255 return (0); 1256 } 1257 1258 /* Existing route or memory allocation failure. */ 1259 return (EEXIST); 1260 } 1261 1262 /* 1263 * Unconditionally deletes @rt from @rnh. 1264 */ 1265 static int 1266 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1267 { 1268 RIB_WLOCK_ASSERT(rnh); 1269 1270 /* Route deletion requested. */ 1271 struct radix_node *rn; 1272 1273 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1274 if (rn == NULL) 1275 return (ESRCH); 1276 rt = RNTORT(rn); 1277 rt->rte_flags &= ~RTF_UP; 1278 1279 rib_bump_gen(rnh); 1280 rnh->rnh_prefixes--; 1281 1282 rc->rc_cmd = RTM_DELETE; 1283 rc->rc_rt = rt; 1284 rc->rc_nh_old = rt->rt_nhop; 1285 rc->rc_nh_new = NULL; 1286 rc->rc_nh_weight = rt->rt_weight; 1287 1288 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1289 1290 return (0); 1291 } 1292 1293 /* 1294 * Switch @rt nhop/weigh to the ones specified in @rnd. 1295 * Returns 0 on success. 1296 */ 1297 int 1298 change_route(struct rib_head *rnh, struct rtentry *rt, 1299 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1300 { 1301 struct nhop_object *nh_orig; 1302 1303 RIB_WLOCK_ASSERT(rnh); 1304 1305 nh_orig = rt->rt_nhop; 1306 1307 if (rnd->rnd_nhop == NULL) 1308 return (delete_route(rnh, rt, rc)); 1309 1310 /* Changing nexthop & weight to a new one */ 1311 rt->rt_nhop = rnd->rnd_nhop; 1312 rt->rt_weight = rnd->rnd_weight; 1313 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1314 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1315 1316 /* Finalize notification */ 1317 rib_bump_gen(rnh); 1318 rc->rc_cmd = RTM_CHANGE; 1319 rc->rc_rt = rt; 1320 rc->rc_nh_old = nh_orig; 1321 rc->rc_nh_new = rnd->rnd_nhop; 1322 rc->rc_nh_weight = rnd->rnd_weight; 1323 1324 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1325 1326 return (0); 1327 } 1328 1329 /* 1330 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1331 * consistent with the current route data. 1332 * Nexthop in @nhd_new is consumed. 1333 */ 1334 int 1335 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1336 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1337 struct rib_cmd_info *rc) 1338 { 1339 struct rtentry *rt_new; 1340 int error = 0; 1341 1342 #if DEBUG_MAX_LEVEL >= LOG_DEBUG2 1343 { 1344 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1345 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1346 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1347 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1348 "trying change %s -> %s", buf_old, buf_new); 1349 } 1350 #endif 1351 RIB_WLOCK(rnh); 1352 1353 struct route_nhop_data rnd; 1354 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1355 1356 if (rt_new == NULL) { 1357 if (rnd_orig->rnd_nhop == NULL) 1358 error = add_route(rnh, rt, rnd_new, rc); 1359 else { 1360 /* 1361 * Prefix does not exist, which was not our assumption. 1362 * Update @rnd_orig with the new data and return 1363 */ 1364 rnd_orig->rnd_nhop = NULL; 1365 rnd_orig->rnd_weight = 0; 1366 error = EAGAIN; 1367 } 1368 } else { 1369 /* Prefix exists, try to update */ 1370 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1371 /* 1372 * Nhop/mpath group hasn't changed. Flip 1373 * to the new precalculated one and return 1374 */ 1375 error = change_route(rnh, rt_new, rnd_new, rc); 1376 } else { 1377 /* Update and retry */ 1378 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1379 rnd_orig->rnd_weight = rt_new->rt_weight; 1380 error = EAGAIN; 1381 } 1382 } 1383 1384 RIB_WUNLOCK(rnh); 1385 1386 if (error == 0) { 1387 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1388 1389 if (rnd_orig->rnd_nhop != NULL) 1390 nhop_free_any(rnd_orig->rnd_nhop); 1391 1392 } else { 1393 if (rnd_new->rnd_nhop != NULL) 1394 nhop_free_any(rnd_new->rnd_nhop); 1395 } 1396 1397 return (error); 1398 } 1399 1400 /* 1401 * Performs modification of routing table specificed by @action. 1402 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1403 * Needs to be run in network epoch. 1404 * 1405 * Returns 0 on success and fills in @rc with action result. 1406 */ 1407 int 1408 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1409 struct rib_cmd_info *rc) 1410 { 1411 int error; 1412 1413 switch (action) { 1414 case RTM_ADD: 1415 error = rib_add_route(fibnum, info, rc); 1416 break; 1417 case RTM_DELETE: 1418 error = rib_del_route(fibnum, info, rc); 1419 break; 1420 case RTM_CHANGE: 1421 error = rib_change_route(fibnum, info, rc); 1422 break; 1423 default: 1424 error = ENOTSUP; 1425 } 1426 1427 return (error); 1428 } 1429 1430 struct rt_delinfo 1431 { 1432 struct rib_head *rnh; 1433 struct rtentry *head; 1434 rib_filter_f_t *filter_f; 1435 void *filter_arg; 1436 int prio; 1437 struct rib_cmd_info rc; 1438 }; 1439 1440 /* 1441 * Conditionally unlinks rtenties or paths from radix tree based 1442 * on the callback data passed in @arg. 1443 */ 1444 static int 1445 rt_checkdelroute(struct radix_node *rn, void *arg) 1446 { 1447 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1448 struct rtentry *rt = (struct rtentry *)rn; 1449 1450 if (rt_delete_conditional(di->rnh, rt, di->prio, 1451 di->filter_f, di->filter_arg, &di->rc) != 0) 1452 return (0); 1453 1454 /* 1455 * Add deleted rtentries to the list to GC them 1456 * after dropping the lock. 1457 * 1458 * XXX: Delayed notifications not implemented 1459 * for nexthop updates. 1460 */ 1461 if (di->rc.rc_cmd == RTM_DELETE) { 1462 /* Add to the list and return */ 1463 rt->rt_chain = di->head; 1464 di->head = rt; 1465 #ifdef ROUTE_MPATH 1466 } else { 1467 /* 1468 * RTM_CHANGE to a different nexthop or nexthop group. 1469 * Free old multipath group. 1470 */ 1471 nhop_free_any(di->rc.rc_nh_old); 1472 #endif 1473 } 1474 1475 return (0); 1476 } 1477 1478 /* 1479 * Iterates over a routing table specified by @fibnum and @family and 1480 * deletes elements marked by @filter_f. 1481 * @fibnum: rtable id 1482 * @family: AF_ address family 1483 * @filter_f: function returning non-zero value for items to delete 1484 * @arg: data to pass to the @filter_f function 1485 * @report: true if rtsock notification is needed. 1486 */ 1487 void 1488 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1489 bool report) 1490 { 1491 struct rib_head *rnh; 1492 struct rtentry *rt; 1493 struct nhop_object *nh; 1494 struct epoch_tracker et; 1495 1496 rnh = rt_tables_get_rnh(fibnum, family); 1497 if (rnh == NULL) 1498 return; 1499 1500 struct rt_delinfo di = { 1501 .rnh = rnh, 1502 .filter_f = filter_f, 1503 .filter_arg = filter_arg, 1504 .prio = NH_PRIORITY_NORMAL, 1505 }; 1506 1507 NET_EPOCH_ENTER(et); 1508 1509 RIB_WLOCK(rnh); 1510 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1511 RIB_WUNLOCK(rnh); 1512 1513 /* We might have something to reclaim. */ 1514 bzero(&di.rc, sizeof(di.rc)); 1515 di.rc.rc_cmd = RTM_DELETE; 1516 while (di.head != NULL) { 1517 rt = di.head; 1518 di.head = rt->rt_chain; 1519 rt->rt_chain = NULL; 1520 nh = rt->rt_nhop; 1521 1522 di.rc.rc_rt = rt; 1523 di.rc.rc_nh_old = nh; 1524 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1525 1526 if (report) { 1527 #ifdef ROUTE_MPATH 1528 struct nhgrp_object *nhg; 1529 const struct weightened_nhop *wn; 1530 uint32_t num_nhops; 1531 if (NH_IS_NHGRP(nh)) { 1532 nhg = (struct nhgrp_object *)nh; 1533 wn = nhgrp_get_nhops(nhg, &num_nhops); 1534 for (int i = 0; i < num_nhops; i++) 1535 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1536 } else 1537 #endif 1538 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1539 } 1540 rt_free(rt); 1541 } 1542 1543 NET_EPOCH_EXIT(et); 1544 } 1545 1546 static int 1547 rt_delete_unconditional(struct radix_node *rn, void *arg) 1548 { 1549 struct rtentry *rt = RNTORT(rn); 1550 struct rib_head *rnh = (struct rib_head *)arg; 1551 1552 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1553 if (RNTORT(rn) == rt) 1554 rt_free(rt); 1555 1556 return (0); 1557 } 1558 1559 /* 1560 * Removes all routes from the routing table without executing notifications. 1561 * rtentres will be removed after the end of a current epoch. 1562 */ 1563 static void 1564 rib_flush_routes(struct rib_head *rnh) 1565 { 1566 RIB_WLOCK(rnh); 1567 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1568 RIB_WUNLOCK(rnh); 1569 } 1570 1571 void 1572 rib_flush_routes_family(int family) 1573 { 1574 struct rib_head *rnh; 1575 1576 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1577 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1578 rib_flush_routes(rnh); 1579 } 1580 } 1581 1582 const char * 1583 rib_print_family(int family) 1584 { 1585 switch (family) { 1586 case AF_INET: 1587 return ("inet"); 1588 case AF_INET6: 1589 return ("inet6"); 1590 case AF_LINK: 1591 return ("link"); 1592 } 1593 return ("unknown"); 1594 } 1595