1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 #include <netinet6/in6_var.h> 58 59 #define DEBUG_MOD_NAME route_ctl 60 #define DEBUG_MAX_LEVEL LOG_DEBUG 61 #include <net/route/route_debug.h> 62 _DECLARE_DEBUG(LOG_INFO); 63 64 /* 65 * This file contains control plane routing tables functions. 66 * 67 * All functions assumes they are called in net epoch. 68 */ 69 70 union sockaddr_union { 71 struct sockaddr sa; 72 struct sockaddr_in sin; 73 struct sockaddr_in6 sin6; 74 char _buf[32]; 75 }; 76 77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 81 struct rib_cmd_info *rc); 82 83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 85 #ifdef ROUTE_MPATH 86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 88 int op_flags, struct rib_cmd_info *rc); 89 #endif 90 91 static int add_route(struct rib_head *rnh, struct rtentry *rt, 92 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 93 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 94 struct rib_cmd_info *rc); 95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 97 98 static int get_prio_from_info(const struct rt_addrinfo *info); 99 static int nhop_get_prio(const struct nhop_object *nh); 100 101 #ifdef ROUTE_MPATH 102 static bool rib_can_multipath(struct rib_head *rh); 103 #endif 104 105 /* Per-vnet multipath routing configuration */ 106 SYSCTL_DECL(_net_route); 107 #define V_rib_route_multipath VNET(rib_route_multipath) 108 #ifdef ROUTE_MPATH 109 #define _MP_FLAGS CTLFLAG_RW 110 #else 111 #define _MP_FLAGS CTLFLAG_RD 112 #endif 113 VNET_DEFINE(u_int, rib_route_multipath) = 1; 114 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 115 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 116 #undef _MP_FLAGS 117 118 #ifdef ROUTE_MPATH 119 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 120 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 121 &VNET_NAME(fib_hash_outbound), 0, 122 "Compute flowid for locally-originated packets"); 123 124 /* Default entropy to add to the hash calculation for the outbound connections*/ 125 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 126 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 127 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 128 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 129 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 130 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 131 }; 132 #endif 133 134 #if defined(INET) && defined(INET6) 135 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 136 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 137 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 138 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 139 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 140 #endif 141 142 /* Debug bits */ 143 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 144 145 static struct rib_head * 146 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 147 { 148 struct rib_head *rnh; 149 struct sockaddr *dst; 150 151 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 152 153 dst = info->rti_info[RTAX_DST]; 154 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 155 156 return (rnh); 157 } 158 159 #if defined(INET) && defined(INET6) 160 bool 161 rib_can_4o6_nhop(void) 162 { 163 return (!!V_rib_route_ipv6_nexthop); 164 } 165 #endif 166 167 #ifdef ROUTE_MPATH 168 static bool 169 rib_can_multipath(struct rib_head *rh) 170 { 171 int result; 172 173 CURVNET_SET(rh->rib_vnet); 174 result = !!V_rib_route_multipath; 175 CURVNET_RESTORE(); 176 177 return (result); 178 } 179 180 /* 181 * Check is nhop is multipath-eligible. 182 * Avoid nhops without gateways and redirects. 183 * 184 * Returns 1 for multipath-eligible nexthop, 185 * 0 otherwise. 186 */ 187 bool 188 nhop_can_multipath(const struct nhop_object *nh) 189 { 190 191 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 192 return (1); 193 if ((nh->nh_flags & NHF_GATEWAY) == 0) 194 return (0); 195 if ((nh->nh_flags & NHF_REDIRECT) != 0) 196 return (0); 197 198 return (1); 199 } 200 #endif 201 202 static int 203 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 204 { 205 uint32_t weight; 206 207 if (info->rti_mflags & RTV_WEIGHT) 208 weight = info->rti_rmx->rmx_weight; 209 else 210 weight = default_weight; 211 /* Keep upper 1 byte for adm distance purposes */ 212 if (weight > RT_MAX_WEIGHT) 213 weight = RT_MAX_WEIGHT; 214 else if (weight == 0) 215 weight = default_weight; 216 217 return (weight); 218 } 219 220 /* 221 * File-local concept for distingushing between the normal and 222 * RTF_PINNED routes tha can override the "normal" one. 223 */ 224 #define NH_PRIORITY_HIGH 2 225 #define NH_PRIORITY_NORMAL 1 226 static int 227 get_prio_from_info(const struct rt_addrinfo *info) 228 { 229 if (info->rti_flags & RTF_PINNED) 230 return (NH_PRIORITY_HIGH); 231 return (NH_PRIORITY_NORMAL); 232 } 233 234 static int 235 nhop_get_prio(const struct nhop_object *nh) 236 { 237 if (NH_IS_PINNED(nh)) 238 return (NH_PRIORITY_HIGH); 239 return (NH_PRIORITY_NORMAL); 240 } 241 242 /* 243 * Check if specified @gw matches gw data in the nexthop @nh. 244 * 245 * Returns true if matches, false otherwise. 246 */ 247 bool 248 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 249 { 250 251 if (nh->gw_sa.sa_family != gw->sa_family) 252 return (false); 253 254 switch (gw->sa_family) { 255 case AF_INET: 256 return (nh->gw4_sa.sin_addr.s_addr == 257 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 258 case AF_INET6: 259 { 260 const struct sockaddr_in6 *gw6; 261 gw6 = (const struct sockaddr_in6 *)gw; 262 263 /* 264 * Currently (2020-09) IPv6 gws in kernel have their 265 * scope embedded. Once this becomes false, this code 266 * has to be revisited. 267 */ 268 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 269 &gw6->sin6_addr)) 270 return (true); 271 return (false); 272 } 273 case AF_LINK: 274 { 275 const struct sockaddr_dl *sdl; 276 sdl = (const struct sockaddr_dl *)gw; 277 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 278 } 279 default: 280 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 281 } 282 283 /* NOTREACHED */ 284 return (false); 285 } 286 287 /* 288 * Matches all nexthop with given @gw. 289 * Can be used as rib_filter_f callback. 290 */ 291 int 292 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 293 { 294 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 295 296 return (match_nhop_gw(nh, gw)); 297 } 298 299 struct gw_filter_data { 300 const struct sockaddr *gw; 301 int count; 302 }; 303 304 /* 305 * Matches first occurence of the gateway provided in @gwd 306 */ 307 static int 308 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 309 { 310 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 311 312 /* Return only first match to make rtsock happy */ 313 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 314 return (1); 315 return (0); 316 } 317 318 /* 319 * Checks if data in @info matches nexhop @nh. 320 * 321 * Returns 0 on success, 322 * ESRCH if not matched, 323 * ENOENT if filter function returned false 324 */ 325 int 326 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 327 const struct nhop_object *nh) 328 { 329 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 330 331 if (info->rti_filter != NULL) { 332 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 333 return (ENOENT); 334 else 335 return (0); 336 } 337 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 338 return (ESRCH); 339 340 return (0); 341 } 342 343 /* 344 * Runs exact prefix match based on @dst and @netmask. 345 * Returns matched @rtentry if found or NULL. 346 * If rtentry was found, saves nexthop / weight value into @rnd. 347 */ 348 static struct rtentry * 349 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 350 const struct sockaddr *netmask, struct route_nhop_data *rnd) 351 { 352 struct rtentry *rt; 353 354 RIB_LOCK_ASSERT(rnh); 355 356 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 357 if (rt != NULL) { 358 rnd->rnd_nhop = rt->rt_nhop; 359 rnd->rnd_weight = rt->rt_weight; 360 } else { 361 rnd->rnd_nhop = NULL; 362 rnd->rnd_weight = 0; 363 } 364 365 return (rt); 366 } 367 368 struct rtentry * 369 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 370 struct route_nhop_data *rnd) 371 { 372 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 373 } 374 375 /* 376 * Runs exact prefix match based on dst/netmask from @info. 377 * Assumes RIB lock is held. 378 * Returns matched @rtentry if found or NULL. 379 * If rtentry was found, saves nexthop / weight value into @rnd. 380 */ 381 struct rtentry * 382 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 383 struct route_nhop_data *rnd) 384 { 385 struct rtentry *rt; 386 387 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 388 info->rti_info[RTAX_NETMASK], rnd); 389 390 return (rt); 391 } 392 393 static bool 394 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 395 struct sockaddr **pmask) 396 { 397 if (plen == -1) { 398 *pmask = NULL; 399 return (true); 400 } 401 402 switch (family) { 403 #ifdef INET 404 case AF_INET: 405 { 406 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 407 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 408 409 memset(mask, 0, sizeof(*mask)); 410 mask->sin_family = family; 411 mask->sin_len = sizeof(*mask); 412 if (plen == 32) 413 *pmask = NULL; 414 else if (plen > 32 || plen < 0) 415 return (false); 416 else { 417 uint32_t daddr, maddr; 418 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 419 mask->sin_addr.s_addr = maddr; 420 daddr = dst->sin_addr.s_addr; 421 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 422 dst->sin_addr.s_addr = daddr; 423 } 424 return (true); 425 } 426 break; 427 #endif 428 #ifdef INET6 429 case AF_INET6: 430 { 431 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 432 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 433 434 memset(mask, 0, sizeof(*mask)); 435 mask->sin6_family = family; 436 mask->sin6_len = sizeof(*mask); 437 if (plen == 128) 438 *pmask = NULL; 439 else if (plen > 128 || plen < 0) 440 return (false); 441 else { 442 ip6_writemask(&mask->sin6_addr, plen); 443 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 444 } 445 return (true); 446 } 447 break; 448 #endif 449 } 450 return (false); 451 } 452 453 /* 454 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 455 * to the routing table. 456 * 457 * @fibnum: rtable id to insert route to 458 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 459 * @plen: prefix length (or -1 if host route or not applicable for AF) 460 * @op_flags: combination of RTM_F_ flags 461 * @rc: storage to report operation result 462 * 463 * Returns 0 on success. 464 */ 465 int 466 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 467 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 468 { 469 union sockaddr_union mask_storage; 470 struct sockaddr *netmask = &mask_storage.sa; 471 struct rtentry *rt = NULL; 472 473 NET_EPOCH_ASSERT(); 474 475 bzero(rc, sizeof(struct rib_cmd_info)); 476 rc->rc_cmd = RTM_ADD; 477 478 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 479 if (rnh == NULL) 480 return (EAFNOSUPPORT); 481 482 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 483 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 484 return (EINVAL); 485 } 486 487 if (op_flags & RTM_F_CREATE) { 488 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 489 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 490 return (ENOMEM); 491 } 492 } 493 494 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 495 } 496 497 /* 498 * Attempts to delete @dst/plen prefix matching gateway @gw from the 499 * routing rable. 500 * 501 * @fibnum: rtable id to remove route from 502 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 503 * @plen: prefix length (or -1 if host route or not applicable for AF) 504 * @gw: gateway to match 505 * @op_flags: combination of RTM_F_ flags 506 * @rc: storage to report operation result 507 * 508 * Returns 0 on success. 509 */ 510 int 511 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 512 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 513 { 514 struct gw_filter_data gwd = { .gw = gw }; 515 516 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 517 } 518 519 /* 520 * Attempts to delete @dst/plen prefix matching @filter_func from the 521 * routing rable. 522 * 523 * @fibnum: rtable id to remove route from 524 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 525 * @plen: prefix length (or -1 if host route or not applicable for AF) 526 * @filter_func: func to be called for each nexthop of the prefix for matching 527 * @filter_arg: argument to pass to @filter_func 528 * @op_flags: combination of RTM_F_ flags 529 * @rc: storage to report operation result 530 * 531 * Returns 0 on success. 532 */ 533 int 534 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 535 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 536 struct rib_cmd_info *rc) 537 { 538 union sockaddr_union mask_storage; 539 struct sockaddr *netmask = &mask_storage.sa; 540 int error; 541 542 NET_EPOCH_ASSERT(); 543 544 bzero(rc, sizeof(struct rib_cmd_info)); 545 rc->rc_cmd = RTM_DELETE; 546 547 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 548 if (rnh == NULL) 549 return (EAFNOSUPPORT); 550 551 if (dst->sa_len > sizeof(mask_storage)) { 552 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 553 return (EINVAL); 554 } 555 556 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 557 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 558 return (EINVAL); 559 } 560 561 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 562 563 RIB_WLOCK(rnh); 564 struct route_nhop_data rnd; 565 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 566 if (rt != NULL) { 567 error = rt_delete_conditional(rnh, rt, prio, filter_func, 568 filter_arg, rc); 569 } else 570 error = ESRCH; 571 RIB_WUNLOCK(rnh); 572 573 if (error != 0) 574 return (error); 575 576 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 577 578 if (rc->rc_cmd == RTM_DELETE) 579 rt_free(rc->rc_rt); 580 #ifdef ROUTE_MPATH 581 else { 582 /* 583 * Deleting 1 path may result in RTM_CHANGE to 584 * a different mpath group/nhop. 585 * Free old mpath group. 586 */ 587 nhop_free_any(rc->rc_nh_old); 588 } 589 #endif 590 591 return (0); 592 } 593 594 /* 595 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 596 * @rt: route to copy. 597 * @rnd_src: nhop and weight. Multipath routes are not supported 598 * @rh_dst: target rtable. 599 * @rc: operation result storage 600 * 601 * Return 0 on success. 602 */ 603 int 604 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 605 struct rib_head *rh_dst, struct rib_cmd_info *rc) 606 { 607 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 608 int error; 609 610 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 611 612 IF_DEBUG_LEVEL(LOG_DEBUG2) { 613 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 614 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 615 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 616 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 617 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 618 } 619 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 620 if (nh == NULL) { 621 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 622 return (ENOMEM); 623 } 624 nhop_copy(nh, rnd_src->rnd_nhop); 625 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 626 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 627 if (error != 0) { 628 FIB_RH_LOG(LOG_INFO, rh_dst, 629 "unable to finalize new nexthop: error %d", error); 630 return (ENOMEM); 631 } 632 633 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 634 if (rt_new == NULL) { 635 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 636 nhop_free(nh); 637 return (ENOMEM); 638 } 639 640 struct route_nhop_data rnd = { 641 .rnd_nhop = nh, 642 .rnd_weight = rnd_src->rnd_weight 643 }; 644 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 645 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 646 647 if (error != 0) { 648 IF_DEBUG_LEVEL(LOG_DEBUG2) { 649 char buf[NHOP_PRINT_BUFSIZE]; 650 rt_print_buf(rt_new, buf, sizeof(buf)); 651 FIB_RH_LOG(LOG_DEBUG, rh_dst, 652 "Unable to add route %s: error %d", buf, error); 653 } 654 nhop_free(nh); 655 rt_free_immediate(rt_new); 656 } 657 return (error); 658 } 659 660 /* 661 * Adds route defined by @info into the kernel table specified by @fibnum and 662 * sa_family in @info->rti_info[RTAX_DST]. 663 * 664 * Returns 0 on success and fills in operation metadata into @rc. 665 */ 666 int 667 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 668 struct rib_cmd_info *rc) 669 { 670 struct rib_head *rnh; 671 int error; 672 673 NET_EPOCH_ASSERT(); 674 675 rnh = get_rnh(fibnum, info); 676 if (rnh == NULL) 677 return (EAFNOSUPPORT); 678 679 /* 680 * Check consistency between RTF_HOST flag and netmask 681 * existence. 682 */ 683 if (info->rti_flags & RTF_HOST) 684 info->rti_info[RTAX_NETMASK] = NULL; 685 else if (info->rti_info[RTAX_NETMASK] == NULL) { 686 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 687 return (EINVAL); 688 } 689 690 bzero(rc, sizeof(struct rib_cmd_info)); 691 rc->rc_cmd = RTM_ADD; 692 693 error = add_route_byinfo(rnh, info, rc); 694 if (error == 0) 695 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 696 697 return (error); 698 } 699 700 static int 701 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 702 struct rib_cmd_info *rc) 703 { 704 struct route_nhop_data rnd_add; 705 struct nhop_object *nh; 706 struct rtentry *rt; 707 struct sockaddr *dst, *gateway, *netmask; 708 int error; 709 710 dst = info->rti_info[RTAX_DST]; 711 gateway = info->rti_info[RTAX_GATEWAY]; 712 netmask = info->rti_info[RTAX_NETMASK]; 713 714 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 715 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 716 return (EINVAL); 717 } 718 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 719 FIB_RH_LOG(LOG_DEBUG, rnh, 720 "error: invalid dst/gateway family combination (%d, %d)", 721 dst->sa_family, gateway->sa_family); 722 return (EINVAL); 723 } 724 725 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 726 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 727 dst->sa_len); 728 return (EINVAL); 729 } 730 731 if (info->rti_ifa == NULL) { 732 error = rt_getifa_fib(info, rnh->rib_fibnum); 733 if (error) 734 return (error); 735 } 736 737 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 738 return (ENOBUFS); 739 740 error = nhop_create_from_info(rnh, info, &nh); 741 if (error != 0) { 742 rt_free_immediate(rt); 743 return (error); 744 } 745 746 rnd_add.rnd_nhop = nh; 747 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 748 749 int op_flags = RTM_F_CREATE; 750 if (get_prio_from_info(info) == NH_PRIORITY_HIGH) 751 op_flags |= RTM_F_FORCE; 752 else 753 op_flags |= RTM_F_APPEND; 754 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 755 756 } 757 758 static int 759 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 760 int op_flags, struct rib_cmd_info *rc) 761 { 762 struct route_nhop_data rnd_orig; 763 struct nhop_object *nh; 764 struct rtentry *rt_orig; 765 int error = 0; 766 767 nh = rnd_add->rnd_nhop; 768 769 RIB_WLOCK(rnh); 770 771 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 772 773 if (rt_orig == NULL) { 774 if (op_flags & RTM_F_CREATE) 775 error = add_route(rnh, rt, rnd_add, rc); 776 else 777 error = ESRCH; /* no entry but creation was not required */ 778 RIB_WUNLOCK(rnh); 779 if (error != 0) 780 goto out; 781 return (0); 782 } 783 784 if (op_flags & RTM_F_EXCL) { 785 /* We have existing route in the RIB but not allowed to replace. */ 786 RIB_WUNLOCK(rnh); 787 error = EEXIST; 788 goto out; 789 } 790 791 /* Now either append or replace */ 792 if (op_flags & RTM_F_REPLACE) { 793 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) { 794 /* Old path is "better" (e.g. has PINNED flag set) */ 795 error = EEXIST; 796 goto out; 797 } 798 change_route(rnh, rt_orig, rnd_add, rc); 799 RIB_WUNLOCK(rnh); 800 nh = rc->rc_nh_old; 801 goto out; 802 } 803 804 RIB_WUNLOCK(rnh); 805 806 #ifdef ROUTE_MPATH 807 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 808 nhop_can_multipath(rnd_add->rnd_nhop) && 809 nhop_can_multipath(rnd_orig.rnd_nhop)) { 810 811 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 812 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 813 op_flags, rc); 814 if (error != EAGAIN) 815 break; 816 RTSTAT_INC(rts_add_retry); 817 } 818 819 /* 820 * Original nhop reference is unused in any case. 821 */ 822 nhop_free_any(rnd_add->rnd_nhop); 823 if (op_flags & RTM_F_CREATE) { 824 if (error != 0 || rc->rc_cmd != RTM_ADD) 825 rt_free_immediate(rt); 826 } 827 return (error); 828 } 829 #endif 830 /* Out of options - free state and return error */ 831 error = EEXIST; 832 out: 833 if (op_flags & RTM_F_CREATE) 834 rt_free_immediate(rt); 835 nhop_free_any(nh); 836 837 return (error); 838 } 839 840 #ifdef ROUTE_MPATH 841 static int 842 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 843 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 844 int op_flags, struct rib_cmd_info *rc) 845 { 846 RIB_RLOCK_TRACKER; 847 struct route_nhop_data rnd_new; 848 int error = 0; 849 850 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 851 if (error != 0) { 852 if (error == EAGAIN) { 853 /* 854 * Group creation failed, most probably because 855 * @rnd_orig data got scheduled for deletion. 856 * Refresh @rnd_orig data and retry. 857 */ 858 RIB_RLOCK(rnh); 859 lookup_prefix_rt(rnh, rt, rnd_orig); 860 RIB_RUNLOCK(rnh); 861 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 862 /* In this iteration route doesn't exist */ 863 error = ENOENT; 864 } 865 } 866 return (error); 867 } 868 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 869 if (error != 0) 870 return (error); 871 872 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 873 /* 874 * First multipath route got installed. Enable local 875 * outbound connections hashing. 876 */ 877 if (bootverbose) 878 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 879 V_fib_hash_outbound = 1; 880 } 881 882 return (0); 883 } 884 #endif 885 886 /* 887 * Removes route defined by @info from the kernel table specified by @fibnum and 888 * sa_family in @info->rti_info[RTAX_DST]. 889 * 890 * Returns 0 on success and fills in operation metadata into @rc. 891 */ 892 int 893 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 894 { 895 struct rib_head *rnh; 896 struct sockaddr *dst, *netmask; 897 struct sockaddr_storage mdst; 898 int error; 899 900 NET_EPOCH_ASSERT(); 901 902 rnh = get_rnh(fibnum, info); 903 if (rnh == NULL) 904 return (EAFNOSUPPORT); 905 906 bzero(rc, sizeof(struct rib_cmd_info)); 907 rc->rc_cmd = RTM_DELETE; 908 909 dst = info->rti_info[RTAX_DST]; 910 netmask = info->rti_info[RTAX_NETMASK]; 911 912 if (netmask != NULL) { 913 /* Ensure @dst is always properly masked */ 914 if (dst->sa_len > sizeof(mdst)) { 915 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 916 return (EINVAL); 917 } 918 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 919 dst = (struct sockaddr *)&mdst; 920 } 921 922 rib_filter_f_t *filter_func = NULL; 923 void *filter_arg = NULL; 924 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 925 926 if (info->rti_filter != NULL) { 927 filter_func = info->rti_filter; 928 filter_arg = info->rti_filterdata; 929 } else if (gwd.gw != NULL) { 930 filter_func = match_gw_one; 931 filter_arg = &gwd; 932 } 933 934 int prio = get_prio_from_info(info); 935 936 RIB_WLOCK(rnh); 937 struct route_nhop_data rnd; 938 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 939 if (rt != NULL) { 940 error = rt_delete_conditional(rnh, rt, prio, filter_func, 941 filter_arg, rc); 942 } else 943 error = ESRCH; 944 RIB_WUNLOCK(rnh); 945 946 if (error != 0) 947 return (error); 948 949 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 950 951 if (rc->rc_cmd == RTM_DELETE) 952 rt_free(rc->rc_rt); 953 #ifdef ROUTE_MPATH 954 else { 955 /* 956 * Deleting 1 path may result in RTM_CHANGE to 957 * a different mpath group/nhop. 958 * Free old mpath group. 959 */ 960 nhop_free_any(rc->rc_nh_old); 961 } 962 #endif 963 964 return (0); 965 } 966 967 /* 968 * Conditionally unlinks rtentry paths from @rnh matching @cb. 969 * Returns 0 on success with operation result stored in @rc. 970 * On error, returns: 971 * ESRCH - if prefix was not found or filter function failed to match 972 * EADDRINUSE - if trying to delete higher priority route. 973 */ 974 static int 975 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 976 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 977 { 978 struct nhop_object *nh = rt->rt_nhop; 979 980 #ifdef ROUTE_MPATH 981 if (NH_IS_NHGRP(nh)) { 982 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 983 struct route_nhop_data rnd; 984 int error; 985 986 if (cb == NULL) 987 return (ESRCH); 988 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 989 if (error == 0) { 990 if (rnd.rnd_nhgrp == nhg) { 991 /* No match, unreference new group and return. */ 992 nhop_free_any(rnd.rnd_nhop); 993 return (ESRCH); 994 } 995 error = change_route(rnh, rt, &rnd, rc); 996 } 997 return (error); 998 } 999 #endif 1000 if (cb != NULL && !cb(rt, nh, cbdata)) 1001 return (ESRCH); 1002 1003 if (prio < nhop_get_prio(nh)) 1004 return (EADDRINUSE); 1005 1006 return (delete_route(rnh, rt, rc)); 1007 } 1008 1009 int 1010 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1011 struct rib_cmd_info *rc) 1012 { 1013 RIB_RLOCK_TRACKER; 1014 struct route_nhop_data rnd_orig; 1015 struct rib_head *rnh; 1016 struct rtentry *rt; 1017 int error; 1018 1019 NET_EPOCH_ASSERT(); 1020 1021 rnh = get_rnh(fibnum, info); 1022 if (rnh == NULL) 1023 return (EAFNOSUPPORT); 1024 1025 bzero(rc, sizeof(struct rib_cmd_info)); 1026 rc->rc_cmd = RTM_CHANGE; 1027 1028 /* Check if updated gateway exists */ 1029 if ((info->rti_flags & RTF_GATEWAY) && 1030 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1031 1032 /* 1033 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1034 * Remove RTF_GATEWAY to enforce consistency and maintain 1035 * compatibility.. 1036 */ 1037 info->rti_flags &= ~RTF_GATEWAY; 1038 } 1039 1040 /* 1041 * route change is done in multiple steps, with dropping and 1042 * reacquiring lock. In the situations with multiple processes 1043 * changes the same route in can lead to the case when route 1044 * is changed between the steps. Address it by retrying the operation 1045 * multiple times before failing. 1046 */ 1047 1048 RIB_RLOCK(rnh); 1049 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1050 info->rti_info[RTAX_NETMASK], &rnh->head); 1051 1052 if (rt == NULL) { 1053 RIB_RUNLOCK(rnh); 1054 return (ESRCH); 1055 } 1056 1057 rnd_orig.rnd_nhop = rt->rt_nhop; 1058 rnd_orig.rnd_weight = rt->rt_weight; 1059 1060 RIB_RUNLOCK(rnh); 1061 1062 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1063 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1064 if (error != EAGAIN) 1065 break; 1066 } 1067 1068 return (error); 1069 } 1070 1071 static int 1072 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1073 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1074 { 1075 int error; 1076 1077 /* 1078 * New gateway could require new ifaddr, ifp; 1079 * flags may also be different; ifp may be specified 1080 * by ll sockaddr when protocol address is ambiguous 1081 */ 1082 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1083 info->rti_info[RTAX_GATEWAY] != NULL) || 1084 info->rti_info[RTAX_IFP] != NULL || 1085 (info->rti_info[RTAX_IFA] != NULL && 1086 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1087 error = rt_getifa_fib(info, rnh->rib_fibnum); 1088 1089 if (error != 0) { 1090 info->rti_ifa = NULL; 1091 return (error); 1092 } 1093 } 1094 1095 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1096 info->rti_ifa = NULL; 1097 1098 return (error); 1099 } 1100 1101 #ifdef ROUTE_MPATH 1102 static int 1103 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1104 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1105 struct rib_cmd_info *rc) 1106 { 1107 int error = 0, found_idx = 0; 1108 struct nhop_object *nh_orig = NULL, *nh_new; 1109 struct route_nhop_data rnd_new = {}; 1110 const struct weightened_nhop *wn = NULL; 1111 struct weightened_nhop *wn_new; 1112 uint32_t num_nhops; 1113 1114 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1115 for (int i = 0; i < num_nhops; i++) { 1116 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1117 nh_orig = wn[i].nh; 1118 found_idx = i; 1119 break; 1120 } 1121 } 1122 1123 if (nh_orig == NULL) 1124 return (ESRCH); 1125 1126 error = change_nhop(rnh, info, nh_orig, &nh_new); 1127 if (error != 0) 1128 return (error); 1129 1130 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1131 M_TEMP, M_NOWAIT | M_ZERO); 1132 if (wn_new == NULL) { 1133 nhop_free(nh_new); 1134 return (EAGAIN); 1135 } 1136 1137 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1138 wn_new[found_idx].nh = nh_new; 1139 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1140 1141 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1142 nhop_free(nh_new); 1143 free(wn_new, M_TEMP); 1144 1145 if (error != 0) 1146 return (error); 1147 1148 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1149 1150 return (error); 1151 } 1152 #endif 1153 1154 static int 1155 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1156 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1157 struct rib_cmd_info *rc) 1158 { 1159 int error = 0; 1160 struct nhop_object *nh_orig; 1161 struct route_nhop_data rnd_new; 1162 1163 nh_orig = rnd_orig->rnd_nhop; 1164 if (nh_orig == NULL) 1165 return (ESRCH); 1166 1167 #ifdef ROUTE_MPATH 1168 if (NH_IS_NHGRP(nh_orig)) 1169 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1170 #endif 1171 1172 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1173 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1174 if (error != 0) 1175 return (error); 1176 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1177 1178 return (error); 1179 } 1180 1181 /* 1182 * Insert @rt with nhop data from @rnd_new to @rnh. 1183 * Returns 0 on success and stores operation results in @rc. 1184 */ 1185 static int 1186 add_route(struct rib_head *rnh, struct rtentry *rt, 1187 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1188 { 1189 struct radix_node *rn; 1190 1191 RIB_WLOCK_ASSERT(rnh); 1192 1193 rt->rt_nhop = rnd->rnd_nhop; 1194 rt->rt_weight = rnd->rnd_weight; 1195 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1196 1197 if (rn != NULL) { 1198 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1199 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1200 1201 /* Finalize notification */ 1202 rib_bump_gen(rnh); 1203 rnh->rnh_prefixes++; 1204 1205 rc->rc_cmd = RTM_ADD; 1206 rc->rc_rt = rt; 1207 rc->rc_nh_old = NULL; 1208 rc->rc_nh_new = rnd->rnd_nhop; 1209 rc->rc_nh_weight = rnd->rnd_weight; 1210 1211 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1212 return (0); 1213 } 1214 1215 /* Existing route or memory allocation failure. */ 1216 return (EEXIST); 1217 } 1218 1219 /* 1220 * Unconditionally deletes @rt from @rnh. 1221 */ 1222 static int 1223 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1224 { 1225 RIB_WLOCK_ASSERT(rnh); 1226 1227 /* Route deletion requested. */ 1228 struct radix_node *rn; 1229 1230 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1231 if (rn == NULL) 1232 return (ESRCH); 1233 rt = RNTORT(rn); 1234 rt->rte_flags &= ~RTF_UP; 1235 1236 rib_bump_gen(rnh); 1237 rnh->rnh_prefixes--; 1238 1239 rc->rc_cmd = RTM_DELETE; 1240 rc->rc_rt = rt; 1241 rc->rc_nh_old = rt->rt_nhop; 1242 rc->rc_nh_new = NULL; 1243 rc->rc_nh_weight = rt->rt_weight; 1244 1245 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1246 1247 return (0); 1248 } 1249 1250 /* 1251 * Switch @rt nhop/weigh to the ones specified in @rnd. 1252 * Returns 0 on success. 1253 */ 1254 int 1255 change_route(struct rib_head *rnh, struct rtentry *rt, 1256 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1257 { 1258 struct nhop_object *nh_orig; 1259 1260 RIB_WLOCK_ASSERT(rnh); 1261 1262 nh_orig = rt->rt_nhop; 1263 1264 if (rnd->rnd_nhop == NULL) 1265 return (delete_route(rnh, rt, rc)); 1266 1267 /* Changing nexthop & weight to a new one */ 1268 rt->rt_nhop = rnd->rnd_nhop; 1269 rt->rt_weight = rnd->rnd_weight; 1270 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1271 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1272 1273 /* Finalize notification */ 1274 rib_bump_gen(rnh); 1275 rc->rc_cmd = RTM_CHANGE; 1276 rc->rc_rt = rt; 1277 rc->rc_nh_old = nh_orig; 1278 rc->rc_nh_new = rnd->rnd_nhop; 1279 rc->rc_nh_weight = rnd->rnd_weight; 1280 1281 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1282 1283 return (0); 1284 } 1285 1286 /* 1287 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1288 * consistent with the current route data. 1289 * Nexthop in @nhd_new is consumed. 1290 */ 1291 int 1292 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1293 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1294 struct rib_cmd_info *rc) 1295 { 1296 struct rtentry *rt_new; 1297 int error = 0; 1298 1299 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1300 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1301 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1302 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1303 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1304 "trying change %s -> %s", buf_old, buf_new); 1305 } 1306 RIB_WLOCK(rnh); 1307 1308 struct route_nhop_data rnd; 1309 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1310 1311 if (rt_new == NULL) { 1312 if (rnd_orig->rnd_nhop == NULL) 1313 error = add_route(rnh, rt, rnd_new, rc); 1314 else { 1315 /* 1316 * Prefix does not exist, which was not our assumption. 1317 * Update @rnd_orig with the new data and return 1318 */ 1319 rnd_orig->rnd_nhop = NULL; 1320 rnd_orig->rnd_weight = 0; 1321 error = EAGAIN; 1322 } 1323 } else { 1324 /* Prefix exists, try to update */ 1325 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1326 /* 1327 * Nhop/mpath group hasn't changed. Flip 1328 * to the new precalculated one and return 1329 */ 1330 error = change_route(rnh, rt_new, rnd_new, rc); 1331 } else { 1332 /* Update and retry */ 1333 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1334 rnd_orig->rnd_weight = rt_new->rt_weight; 1335 error = EAGAIN; 1336 } 1337 } 1338 1339 RIB_WUNLOCK(rnh); 1340 1341 if (error == 0) { 1342 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1343 1344 if (rnd_orig->rnd_nhop != NULL) 1345 nhop_free_any(rnd_orig->rnd_nhop); 1346 1347 } else { 1348 if (rnd_new->rnd_nhop != NULL) 1349 nhop_free_any(rnd_new->rnd_nhop); 1350 } 1351 1352 return (error); 1353 } 1354 1355 /* 1356 * Performs modification of routing table specificed by @action. 1357 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1358 * Needs to be run in network epoch. 1359 * 1360 * Returns 0 on success and fills in @rc with action result. 1361 */ 1362 int 1363 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1364 struct rib_cmd_info *rc) 1365 { 1366 int error; 1367 1368 switch (action) { 1369 case RTM_ADD: 1370 error = rib_add_route(fibnum, info, rc); 1371 break; 1372 case RTM_DELETE: 1373 error = rib_del_route(fibnum, info, rc); 1374 break; 1375 case RTM_CHANGE: 1376 error = rib_change_route(fibnum, info, rc); 1377 break; 1378 default: 1379 error = ENOTSUP; 1380 } 1381 1382 return (error); 1383 } 1384 1385 struct rt_delinfo 1386 { 1387 struct rib_head *rnh; 1388 struct rtentry *head; 1389 rib_filter_f_t *filter_f; 1390 void *filter_arg; 1391 int prio; 1392 struct rib_cmd_info rc; 1393 }; 1394 1395 /* 1396 * Conditionally unlinks rtenties or paths from radix tree based 1397 * on the callback data passed in @arg. 1398 */ 1399 static int 1400 rt_checkdelroute(struct radix_node *rn, void *arg) 1401 { 1402 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1403 struct rtentry *rt = (struct rtentry *)rn; 1404 1405 if (rt_delete_conditional(di->rnh, rt, di->prio, 1406 di->filter_f, di->filter_arg, &di->rc) != 0) 1407 return (0); 1408 1409 /* 1410 * Add deleted rtentries to the list to GC them 1411 * after dropping the lock. 1412 * 1413 * XXX: Delayed notifications not implemented 1414 * for nexthop updates. 1415 */ 1416 if (di->rc.rc_cmd == RTM_DELETE) { 1417 /* Add to the list and return */ 1418 rt->rt_chain = di->head; 1419 di->head = rt; 1420 #ifdef ROUTE_MPATH 1421 } else { 1422 /* 1423 * RTM_CHANGE to a different nexthop or nexthop group. 1424 * Free old multipath group. 1425 */ 1426 nhop_free_any(di->rc.rc_nh_old); 1427 #endif 1428 } 1429 1430 return (0); 1431 } 1432 1433 /* 1434 * Iterates over a routing table specified by @fibnum and @family and 1435 * deletes elements marked by @filter_f. 1436 * @fibnum: rtable id 1437 * @family: AF_ address family 1438 * @filter_f: function returning non-zero value for items to delete 1439 * @arg: data to pass to the @filter_f function 1440 * @report: true if rtsock notification is needed. 1441 */ 1442 void 1443 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1444 bool report) 1445 { 1446 struct rib_head *rnh; 1447 struct rtentry *rt; 1448 struct nhop_object *nh; 1449 struct epoch_tracker et; 1450 1451 rnh = rt_tables_get_rnh(fibnum, family); 1452 if (rnh == NULL) 1453 return; 1454 1455 struct rt_delinfo di = { 1456 .rnh = rnh, 1457 .filter_f = filter_f, 1458 .filter_arg = filter_arg, 1459 .prio = NH_PRIORITY_NORMAL, 1460 }; 1461 1462 NET_EPOCH_ENTER(et); 1463 1464 RIB_WLOCK(rnh); 1465 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1466 RIB_WUNLOCK(rnh); 1467 1468 /* We might have something to reclaim. */ 1469 bzero(&di.rc, sizeof(di.rc)); 1470 di.rc.rc_cmd = RTM_DELETE; 1471 while (di.head != NULL) { 1472 rt = di.head; 1473 di.head = rt->rt_chain; 1474 rt->rt_chain = NULL; 1475 nh = rt->rt_nhop; 1476 1477 di.rc.rc_rt = rt; 1478 di.rc.rc_nh_old = nh; 1479 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1480 1481 if (report) { 1482 #ifdef ROUTE_MPATH 1483 struct nhgrp_object *nhg; 1484 const struct weightened_nhop *wn; 1485 uint32_t num_nhops; 1486 if (NH_IS_NHGRP(nh)) { 1487 nhg = (struct nhgrp_object *)nh; 1488 wn = nhgrp_get_nhops(nhg, &num_nhops); 1489 for (int i = 0; i < num_nhops; i++) 1490 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1491 } else 1492 #endif 1493 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1494 } 1495 rt_free(rt); 1496 } 1497 1498 NET_EPOCH_EXIT(et); 1499 } 1500 1501 static int 1502 rt_delete_unconditional(struct radix_node *rn, void *arg) 1503 { 1504 struct rtentry *rt = RNTORT(rn); 1505 struct rib_head *rnh = (struct rib_head *)arg; 1506 1507 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1508 if (RNTORT(rn) == rt) 1509 rt_free(rt); 1510 1511 return (0); 1512 } 1513 1514 /* 1515 * Removes all routes from the routing table without executing notifications. 1516 * rtentres will be removed after the end of a current epoch. 1517 */ 1518 static void 1519 rib_flush_routes(struct rib_head *rnh) 1520 { 1521 RIB_WLOCK(rnh); 1522 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1523 RIB_WUNLOCK(rnh); 1524 } 1525 1526 void 1527 rib_flush_routes_family(int family) 1528 { 1529 struct rib_head *rnh; 1530 1531 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1532 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1533 rib_flush_routes(rnh); 1534 } 1535 } 1536 1537 const char * 1538 rib_print_family(int family) 1539 { 1540 switch (family) { 1541 case AF_INET: 1542 return ("inet"); 1543 case AF_INET6: 1544 return ("inet6"); 1545 case AF_LINK: 1546 return ("link"); 1547 } 1548 return ("unknown"); 1549 } 1550 1551