1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_dl.h> 48 #include <net/vnet.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/route/nhop_utils.h> 53 #include <net/route/nhop.h> 54 #include <net/route/nhop_var.h> 55 #include <netinet/in.h> 56 #include <netinet6/scope6_var.h> 57 #include <netinet6/in6_var.h> 58 59 #define DEBUG_MOD_NAME route_ctl 60 #define DEBUG_MAX_LEVEL LOG_DEBUG 61 #include <net/route/route_debug.h> 62 _DECLARE_DEBUG(LOG_INFO); 63 64 /* 65 * This file contains control plane routing tables functions. 66 * 67 * All functions assumes they are called in net epoch. 68 */ 69 70 union sockaddr_union { 71 struct sockaddr sa; 72 struct sockaddr_in sin; 73 struct sockaddr_in6 sin6; 74 char _buf[32]; 75 }; 76 77 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 78 struct rib_cmd_info *rc); 79 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 80 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 81 struct rib_cmd_info *rc); 82 83 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 84 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 85 #ifdef ROUTE_MPATH 86 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 87 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 88 int op_flags, struct rib_cmd_info *rc); 89 #endif 90 91 static int add_route(struct rib_head *rnh, struct rtentry *rt, 92 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 93 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 94 struct rib_cmd_info *rc); 95 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 96 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 97 98 static int get_prio_from_info(const struct rt_addrinfo *info); 99 static int nhop_get_prio(const struct nhop_object *nh); 100 101 #ifdef ROUTE_MPATH 102 static bool rib_can_multipath(struct rib_head *rh); 103 #endif 104 105 /* Per-vnet multipath routing configuration */ 106 SYSCTL_DECL(_net_route); 107 #define V_rib_route_multipath VNET(rib_route_multipath) 108 #ifdef ROUTE_MPATH 109 #define _MP_FLAGS CTLFLAG_RW 110 #else 111 #define _MP_FLAGS CTLFLAG_RD 112 #endif 113 VNET_DEFINE(u_int, rib_route_multipath) = 1; 114 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 115 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 116 #undef _MP_FLAGS 117 118 #ifdef ROUTE_MPATH 119 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 120 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 121 &VNET_NAME(fib_hash_outbound), 0, 122 "Compute flowid for locally-originated packets"); 123 124 /* Default entropy to add to the hash calculation for the outbound connections*/ 125 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 126 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 127 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 128 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 129 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 130 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 131 }; 132 #endif 133 134 #if defined(INET) && defined(INET6) 135 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 136 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 137 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 138 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 139 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 140 #endif 141 142 /* Debug bits */ 143 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 144 145 static struct rib_head * 146 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 147 { 148 struct rib_head *rnh; 149 struct sockaddr *dst; 150 151 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 152 153 dst = info->rti_info[RTAX_DST]; 154 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 155 156 return (rnh); 157 } 158 159 #if defined(INET) && defined(INET6) 160 bool 161 rib_can_4o6_nhop(void) 162 { 163 return (!!V_rib_route_ipv6_nexthop); 164 } 165 #endif 166 167 #ifdef ROUTE_MPATH 168 static bool 169 rib_can_multipath(struct rib_head *rh) 170 { 171 int result; 172 173 CURVNET_SET(rh->rib_vnet); 174 result = !!V_rib_route_multipath; 175 CURVNET_RESTORE(); 176 177 return (result); 178 } 179 180 /* 181 * Check is nhop is multipath-eligible. 182 * Avoid nhops without gateways and redirects. 183 * 184 * Returns 1 for multipath-eligible nexthop, 185 * 0 otherwise. 186 */ 187 bool 188 nhop_can_multipath(const struct nhop_object *nh) 189 { 190 191 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 192 return (1); 193 if ((nh->nh_flags & NHF_GATEWAY) == 0) 194 return (0); 195 if ((nh->nh_flags & NHF_REDIRECT) != 0) 196 return (0); 197 198 return (1); 199 } 200 #endif 201 202 static int 203 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 204 { 205 uint32_t weight; 206 207 if (info->rti_mflags & RTV_WEIGHT) 208 weight = info->rti_rmx->rmx_weight; 209 else 210 weight = default_weight; 211 /* Keep upper 1 byte for adm distance purposes */ 212 if (weight > RT_MAX_WEIGHT) 213 weight = RT_MAX_WEIGHT; 214 else if (weight == 0) 215 weight = default_weight; 216 217 return (weight); 218 } 219 220 /* 221 * File-local concept for distingushing between the normal and 222 * RTF_PINNED routes tha can override the "normal" one. 223 */ 224 #define NH_PRIORITY_HIGH 2 225 #define NH_PRIORITY_NORMAL 1 226 static int 227 get_prio_from_info(const struct rt_addrinfo *info) 228 { 229 if (info->rti_flags & RTF_PINNED) 230 return (NH_PRIORITY_HIGH); 231 return (NH_PRIORITY_NORMAL); 232 } 233 234 static int 235 nhop_get_prio(const struct nhop_object *nh) 236 { 237 if (NH_IS_PINNED(nh)) 238 return (NH_PRIORITY_HIGH); 239 return (NH_PRIORITY_NORMAL); 240 } 241 242 /* 243 * Check if specified @gw matches gw data in the nexthop @nh. 244 * 245 * Returns true if matches, false otherwise. 246 */ 247 bool 248 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 249 { 250 251 if (nh->gw_sa.sa_family != gw->sa_family) 252 return (false); 253 254 switch (gw->sa_family) { 255 case AF_INET: 256 return (nh->gw4_sa.sin_addr.s_addr == 257 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 258 case AF_INET6: 259 { 260 const struct sockaddr_in6 *gw6; 261 gw6 = (const struct sockaddr_in6 *)gw; 262 263 /* 264 * Currently (2020-09) IPv6 gws in kernel have their 265 * scope embedded. Once this becomes false, this code 266 * has to be revisited. 267 */ 268 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 269 &gw6->sin6_addr)) 270 return (true); 271 return (false); 272 } 273 case AF_LINK: 274 { 275 const struct sockaddr_dl *sdl; 276 sdl = (const struct sockaddr_dl *)gw; 277 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 278 } 279 default: 280 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 281 } 282 283 /* NOTREACHED */ 284 return (false); 285 } 286 287 /* 288 * Matches all nexthop with given @gw. 289 * Can be used as rib_filter_f callback. 290 */ 291 int 292 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 293 { 294 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 295 296 return (match_nhop_gw(nh, gw)); 297 } 298 299 struct gw_filter_data { 300 const struct sockaddr *gw; 301 int count; 302 }; 303 304 /* 305 * Matches first occurence of the gateway provided in @gwd 306 */ 307 static int 308 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 309 { 310 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 311 312 /* Return only first match to make rtsock happy */ 313 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 314 return (1); 315 return (0); 316 } 317 318 /* 319 * Checks if data in @info matches nexhop @nh. 320 * 321 * Returns 0 on success, 322 * ESRCH if not matched, 323 * ENOENT if filter function returned false 324 */ 325 int 326 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 327 const struct nhop_object *nh) 328 { 329 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 330 331 if (info->rti_filter != NULL) { 332 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 333 return (ENOENT); 334 else 335 return (0); 336 } 337 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 338 return (ESRCH); 339 340 return (0); 341 } 342 343 /* 344 * Runs exact prefix match based on @dst and @netmask. 345 * Returns matched @rtentry if found or NULL. 346 * If rtentry was found, saves nexthop / weight value into @rnd. 347 */ 348 static struct rtentry * 349 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 350 const struct sockaddr *netmask, struct route_nhop_data *rnd) 351 { 352 struct rtentry *rt; 353 354 RIB_LOCK_ASSERT(rnh); 355 356 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 357 if (rt != NULL) { 358 rnd->rnd_nhop = rt->rt_nhop; 359 rnd->rnd_weight = rt->rt_weight; 360 } else { 361 rnd->rnd_nhop = NULL; 362 rnd->rnd_weight = 0; 363 } 364 365 return (rt); 366 } 367 368 struct rtentry * 369 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 370 struct route_nhop_data *rnd) 371 { 372 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 373 } 374 375 /* 376 * Runs exact prefix match based on dst/netmask from @info. 377 * Assumes RIB lock is held. 378 * Returns matched @rtentry if found or NULL. 379 * If rtentry was found, saves nexthop / weight value into @rnd. 380 */ 381 struct rtentry * 382 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 383 struct route_nhop_data *rnd) 384 { 385 struct rtentry *rt; 386 387 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 388 info->rti_info[RTAX_NETMASK], rnd); 389 390 return (rt); 391 } 392 393 static bool 394 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 395 struct sockaddr **pmask) 396 { 397 if (plen == -1) { 398 *pmask = NULL; 399 return (true); 400 } 401 402 switch (family) { 403 #ifdef INET 404 case AF_INET: 405 { 406 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 407 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 408 409 memset(mask, 0, sizeof(*mask)); 410 mask->sin_family = family; 411 mask->sin_len = sizeof(*mask); 412 if (plen == 32) 413 *pmask = NULL; 414 else if (plen > 32 || plen < 0) 415 return (false); 416 else { 417 uint32_t daddr, maddr; 418 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 419 mask->sin_addr.s_addr = maddr; 420 daddr = dst->sin_addr.s_addr; 421 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 422 dst->sin_addr.s_addr = daddr; 423 } 424 return (true); 425 } 426 break; 427 #endif 428 #ifdef INET6 429 case AF_INET6: 430 { 431 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 432 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 433 434 memset(mask, 0, sizeof(*mask)); 435 mask->sin6_family = family; 436 mask->sin6_len = sizeof(*mask); 437 if (plen == 128) 438 *pmask = NULL; 439 else if (plen > 128 || plen < 0) 440 return (false); 441 else { 442 ip6_writemask(&mask->sin6_addr, plen); 443 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 444 } 445 return (true); 446 } 447 break; 448 #endif 449 } 450 return (false); 451 } 452 453 /* 454 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 455 * to the routing table. 456 * 457 * @fibnum: rtable id to insert route to 458 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 459 * @plen: prefix length (or -1 if host route or not applicable for AF) 460 * @op_flags: combination of RTM_F_ flags 461 * @rc: storage to report operation result 462 * 463 * Returns 0 on success. 464 */ 465 int 466 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 467 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 468 { 469 union sockaddr_union mask_storage; 470 struct sockaddr *netmask = &mask_storage.sa; 471 struct rtentry *rt = NULL; 472 473 NET_EPOCH_ASSERT(); 474 475 bzero(rc, sizeof(struct rib_cmd_info)); 476 rc->rc_cmd = RTM_ADD; 477 478 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 479 if (rnh == NULL) 480 return (EAFNOSUPPORT); 481 482 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 483 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 484 return (EINVAL); 485 } 486 487 if (op_flags & RTM_F_CREATE) { 488 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 489 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 490 return (ENOMEM); 491 } 492 } 493 494 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 495 } 496 497 /* 498 * Attempts to delete @dst/plen prefix matching gateway @gw from the 499 * routing rable. 500 * 501 * @fibnum: rtable id to remove route from 502 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 503 * @plen: prefix length (or -1 if host route or not applicable for AF) 504 * @gw: gateway to match 505 * @op_flags: combination of RTM_F_ flags 506 * @rc: storage to report operation result 507 * 508 * Returns 0 on success. 509 */ 510 int 511 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 512 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 513 { 514 struct gw_filter_data gwd = { .gw = gw }; 515 516 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 517 } 518 519 /* 520 * Attempts to delete @dst/plen prefix matching @filter_func from the 521 * routing rable. 522 * 523 * @fibnum: rtable id to remove route from 524 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 525 * @plen: prefix length (or -1 if host route or not applicable for AF) 526 * @filter_func: func to be called for each nexthop of the prefix for matching 527 * @filter_arg: argument to pass to @filter_func 528 * @op_flags: combination of RTM_F_ flags 529 * @rc: storage to report operation result 530 * 531 * Returns 0 on success. 532 */ 533 int 534 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 535 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 536 struct rib_cmd_info *rc) 537 { 538 union sockaddr_union mask_storage; 539 struct sockaddr *netmask = &mask_storage.sa; 540 int error; 541 542 NET_EPOCH_ASSERT(); 543 544 bzero(rc, sizeof(struct rib_cmd_info)); 545 rc->rc_cmd = RTM_DELETE; 546 547 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 548 if (rnh == NULL) 549 return (EAFNOSUPPORT); 550 551 if (dst->sa_len > sizeof(mask_storage)) { 552 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 553 return (EINVAL); 554 } 555 556 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 557 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 558 return (EINVAL); 559 } 560 561 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 562 563 RIB_WLOCK(rnh); 564 struct route_nhop_data rnd; 565 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 566 if (rt != NULL) { 567 error = rt_delete_conditional(rnh, rt, prio, filter_func, 568 filter_arg, rc); 569 } else 570 error = ESRCH; 571 RIB_WUNLOCK(rnh); 572 573 if (error != 0) 574 return (error); 575 576 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 577 578 if (rc->rc_cmd == RTM_DELETE) 579 rt_free(rc->rc_rt); 580 #ifdef ROUTE_MPATH 581 else { 582 /* 583 * Deleting 1 path may result in RTM_CHANGE to 584 * a different mpath group/nhop. 585 * Free old mpath group. 586 */ 587 nhop_free_any(rc->rc_nh_old); 588 } 589 #endif 590 591 return (0); 592 } 593 594 /* 595 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 596 * @rt: route to copy. 597 * @rnd_src: nhop and weight. Multipath routes are not supported 598 * @rh_dst: target rtable. 599 * @rc: operation result storage 600 * 601 * Return 0 on success. 602 */ 603 int 604 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 605 struct rib_head *rh_dst, struct rib_cmd_info *rc) 606 { 607 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 608 int error; 609 610 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 611 612 IF_DEBUG_LEVEL(LOG_DEBUG2) { 613 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 614 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 615 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 616 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 617 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 618 } 619 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 620 if (nh == NULL) { 621 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 622 return (ENOMEM); 623 } 624 nhop_copy(nh, rnd_src->rnd_nhop); 625 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop)); 626 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 627 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 628 if (error != 0) { 629 FIB_RH_LOG(LOG_INFO, rh_dst, 630 "unable to finalize new nexthop: error %d", error); 631 return (ENOMEM); 632 } 633 634 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 635 if (rt_new == NULL) { 636 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 637 nhop_free(nh); 638 return (ENOMEM); 639 } 640 641 struct route_nhop_data rnd = { 642 .rnd_nhop = nh, 643 .rnd_weight = rnd_src->rnd_weight 644 }; 645 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 646 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 647 648 if (error != 0) { 649 IF_DEBUG_LEVEL(LOG_DEBUG2) { 650 char buf[NHOP_PRINT_BUFSIZE]; 651 rt_print_buf(rt_new, buf, sizeof(buf)); 652 FIB_RH_LOG(LOG_DEBUG, rh_dst, 653 "Unable to add route %s: error %d", buf, error); 654 } 655 nhop_free(nh); 656 rt_free_immediate(rt_new); 657 } 658 return (error); 659 } 660 661 /* 662 * Adds route defined by @info into the kernel table specified by @fibnum and 663 * sa_family in @info->rti_info[RTAX_DST]. 664 * 665 * Returns 0 on success and fills in operation metadata into @rc. 666 */ 667 int 668 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 669 struct rib_cmd_info *rc) 670 { 671 struct rib_head *rnh; 672 int error; 673 674 NET_EPOCH_ASSERT(); 675 676 rnh = get_rnh(fibnum, info); 677 if (rnh == NULL) 678 return (EAFNOSUPPORT); 679 680 /* 681 * Check consistency between RTF_HOST flag and netmask 682 * existence. 683 */ 684 if (info->rti_flags & RTF_HOST) 685 info->rti_info[RTAX_NETMASK] = NULL; 686 else if (info->rti_info[RTAX_NETMASK] == NULL) { 687 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 688 return (EINVAL); 689 } 690 691 bzero(rc, sizeof(struct rib_cmd_info)); 692 rc->rc_cmd = RTM_ADD; 693 694 error = add_route_byinfo(rnh, info, rc); 695 if (error == 0) 696 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 697 698 return (error); 699 } 700 701 static int 702 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 703 struct rib_cmd_info *rc) 704 { 705 struct route_nhop_data rnd_add; 706 struct nhop_object *nh; 707 struct rtentry *rt; 708 struct sockaddr *dst, *gateway, *netmask; 709 int error; 710 711 dst = info->rti_info[RTAX_DST]; 712 gateway = info->rti_info[RTAX_GATEWAY]; 713 netmask = info->rti_info[RTAX_NETMASK]; 714 715 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 716 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 717 return (EINVAL); 718 } 719 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 720 FIB_RH_LOG(LOG_DEBUG, rnh, 721 "error: invalid dst/gateway family combination (%d, %d)", 722 dst->sa_family, gateway->sa_family); 723 return (EINVAL); 724 } 725 726 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 727 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 728 dst->sa_len); 729 return (EINVAL); 730 } 731 732 if (info->rti_ifa == NULL) { 733 error = rt_getifa_fib(info, rnh->rib_fibnum); 734 if (error) 735 return (error); 736 } 737 738 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 739 return (ENOBUFS); 740 741 error = nhop_create_from_info(rnh, info, &nh); 742 if (error != 0) { 743 rt_free_immediate(rt); 744 return (error); 745 } 746 747 rnd_add.rnd_nhop = nh; 748 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 749 750 int op_flags = RTM_F_CREATE; 751 if (get_prio_from_info(info) == NH_PRIORITY_HIGH) 752 op_flags |= RTM_F_FORCE; 753 else 754 op_flags |= RTM_F_APPEND; 755 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 756 757 } 758 759 static int 760 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 761 int op_flags, struct rib_cmd_info *rc) 762 { 763 struct route_nhop_data rnd_orig; 764 struct nhop_object *nh; 765 struct rtentry *rt_orig; 766 int error = 0; 767 768 nh = rnd_add->rnd_nhop; 769 770 RIB_WLOCK(rnh); 771 772 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 773 774 if (rt_orig == NULL) { 775 if (op_flags & RTM_F_CREATE) 776 error = add_route(rnh, rt, rnd_add, rc); 777 else 778 error = ESRCH; /* no entry but creation was not required */ 779 RIB_WUNLOCK(rnh); 780 if (error != 0) 781 goto out; 782 return (0); 783 } 784 785 if (op_flags & RTM_F_EXCL) { 786 /* We have existing route in the RIB but not allowed to replace. */ 787 RIB_WUNLOCK(rnh); 788 error = EEXIST; 789 goto out; 790 } 791 792 /* Now either append or replace */ 793 if (op_flags & RTM_F_REPLACE) { 794 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) { 795 /* Old path is "better" (e.g. has PINNED flag set) */ 796 RIB_WUNLOCK(rnh); 797 error = EEXIST; 798 goto out; 799 } 800 change_route(rnh, rt_orig, rnd_add, rc); 801 RIB_WUNLOCK(rnh); 802 nh = rc->rc_nh_old; 803 goto out; 804 } 805 806 RIB_WUNLOCK(rnh); 807 808 #ifdef ROUTE_MPATH 809 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 810 nhop_can_multipath(rnd_add->rnd_nhop) && 811 nhop_can_multipath(rnd_orig.rnd_nhop)) { 812 813 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 814 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 815 op_flags, rc); 816 if (error != EAGAIN) 817 break; 818 RTSTAT_INC(rts_add_retry); 819 } 820 821 /* 822 * Original nhop reference is unused in any case. 823 */ 824 nhop_free_any(rnd_add->rnd_nhop); 825 if (op_flags & RTM_F_CREATE) { 826 if (error != 0 || rc->rc_cmd != RTM_ADD) 827 rt_free_immediate(rt); 828 } 829 return (error); 830 } 831 #endif 832 /* Out of options - free state and return error */ 833 error = EEXIST; 834 out: 835 if (op_flags & RTM_F_CREATE) 836 rt_free_immediate(rt); 837 nhop_free_any(nh); 838 839 return (error); 840 } 841 842 #ifdef ROUTE_MPATH 843 static int 844 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 845 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 846 int op_flags, struct rib_cmd_info *rc) 847 { 848 RIB_RLOCK_TRACKER; 849 struct route_nhop_data rnd_new; 850 int error = 0; 851 852 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 853 if (error != 0) { 854 if (error == EAGAIN) { 855 /* 856 * Group creation failed, most probably because 857 * @rnd_orig data got scheduled for deletion. 858 * Refresh @rnd_orig data and retry. 859 */ 860 RIB_RLOCK(rnh); 861 lookup_prefix_rt(rnh, rt, rnd_orig); 862 RIB_RUNLOCK(rnh); 863 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 864 /* In this iteration route doesn't exist */ 865 error = ENOENT; 866 } 867 } 868 return (error); 869 } 870 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 871 if (error != 0) 872 return (error); 873 874 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 875 /* 876 * First multipath route got installed. Enable local 877 * outbound connections hashing. 878 */ 879 if (bootverbose) 880 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 881 V_fib_hash_outbound = 1; 882 } 883 884 return (0); 885 } 886 #endif 887 888 /* 889 * Removes route defined by @info from the kernel table specified by @fibnum and 890 * sa_family in @info->rti_info[RTAX_DST]. 891 * 892 * Returns 0 on success and fills in operation metadata into @rc. 893 */ 894 int 895 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 896 { 897 struct rib_head *rnh; 898 struct sockaddr *dst, *netmask; 899 struct sockaddr_storage mdst; 900 int error; 901 902 NET_EPOCH_ASSERT(); 903 904 rnh = get_rnh(fibnum, info); 905 if (rnh == NULL) 906 return (EAFNOSUPPORT); 907 908 bzero(rc, sizeof(struct rib_cmd_info)); 909 rc->rc_cmd = RTM_DELETE; 910 911 dst = info->rti_info[RTAX_DST]; 912 netmask = info->rti_info[RTAX_NETMASK]; 913 914 if (netmask != NULL) { 915 /* Ensure @dst is always properly masked */ 916 if (dst->sa_len > sizeof(mdst)) { 917 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 918 return (EINVAL); 919 } 920 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 921 dst = (struct sockaddr *)&mdst; 922 } 923 924 rib_filter_f_t *filter_func = NULL; 925 void *filter_arg = NULL; 926 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 927 928 if (info->rti_filter != NULL) { 929 filter_func = info->rti_filter; 930 filter_arg = info->rti_filterdata; 931 } else if (gwd.gw != NULL) { 932 filter_func = match_gw_one; 933 filter_arg = &gwd; 934 } 935 936 int prio = get_prio_from_info(info); 937 938 RIB_WLOCK(rnh); 939 struct route_nhop_data rnd; 940 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 941 if (rt != NULL) { 942 error = rt_delete_conditional(rnh, rt, prio, filter_func, 943 filter_arg, rc); 944 } else 945 error = ESRCH; 946 RIB_WUNLOCK(rnh); 947 948 if (error != 0) 949 return (error); 950 951 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 952 953 if (rc->rc_cmd == RTM_DELETE) 954 rt_free(rc->rc_rt); 955 #ifdef ROUTE_MPATH 956 else { 957 /* 958 * Deleting 1 path may result in RTM_CHANGE to 959 * a different mpath group/nhop. 960 * Free old mpath group. 961 */ 962 nhop_free_any(rc->rc_nh_old); 963 } 964 #endif 965 966 return (0); 967 } 968 969 /* 970 * Conditionally unlinks rtentry paths from @rnh matching @cb. 971 * Returns 0 on success with operation result stored in @rc. 972 * On error, returns: 973 * ESRCH - if prefix was not found or filter function failed to match 974 * EADDRINUSE - if trying to delete higher priority route. 975 */ 976 static int 977 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 978 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 979 { 980 struct nhop_object *nh = rt->rt_nhop; 981 982 #ifdef ROUTE_MPATH 983 if (NH_IS_NHGRP(nh)) { 984 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 985 struct route_nhop_data rnd; 986 int error; 987 988 if (cb == NULL) 989 return (ESRCH); 990 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 991 if (error == 0) { 992 if (rnd.rnd_nhgrp == nhg) { 993 /* No match, unreference new group and return. */ 994 nhop_free_any(rnd.rnd_nhop); 995 return (ESRCH); 996 } 997 error = change_route(rnh, rt, &rnd, rc); 998 } 999 return (error); 1000 } 1001 #endif 1002 if (cb != NULL && !cb(rt, nh, cbdata)) 1003 return (ESRCH); 1004 1005 if (prio < nhop_get_prio(nh)) 1006 return (EADDRINUSE); 1007 1008 return (delete_route(rnh, rt, rc)); 1009 } 1010 1011 int 1012 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1013 struct rib_cmd_info *rc) 1014 { 1015 RIB_RLOCK_TRACKER; 1016 struct route_nhop_data rnd_orig; 1017 struct rib_head *rnh; 1018 struct rtentry *rt; 1019 int error; 1020 1021 NET_EPOCH_ASSERT(); 1022 1023 rnh = get_rnh(fibnum, info); 1024 if (rnh == NULL) 1025 return (EAFNOSUPPORT); 1026 1027 bzero(rc, sizeof(struct rib_cmd_info)); 1028 rc->rc_cmd = RTM_CHANGE; 1029 1030 /* Check if updated gateway exists */ 1031 if ((info->rti_flags & RTF_GATEWAY) && 1032 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1033 1034 /* 1035 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1036 * Remove RTF_GATEWAY to enforce consistency and maintain 1037 * compatibility.. 1038 */ 1039 info->rti_flags &= ~RTF_GATEWAY; 1040 } 1041 1042 /* 1043 * route change is done in multiple steps, with dropping and 1044 * reacquiring lock. In the situations with multiple processes 1045 * changes the same route in can lead to the case when route 1046 * is changed between the steps. Address it by retrying the operation 1047 * multiple times before failing. 1048 */ 1049 1050 RIB_RLOCK(rnh); 1051 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1052 info->rti_info[RTAX_NETMASK], &rnh->head); 1053 1054 if (rt == NULL) { 1055 RIB_RUNLOCK(rnh); 1056 return (ESRCH); 1057 } 1058 1059 rnd_orig.rnd_nhop = rt->rt_nhop; 1060 rnd_orig.rnd_weight = rt->rt_weight; 1061 1062 RIB_RUNLOCK(rnh); 1063 1064 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1065 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1066 if (error != EAGAIN) 1067 break; 1068 } 1069 1070 return (error); 1071 } 1072 1073 static int 1074 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1075 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1076 { 1077 int error; 1078 1079 /* 1080 * New gateway could require new ifaddr, ifp; 1081 * flags may also be different; ifp may be specified 1082 * by ll sockaddr when protocol address is ambiguous 1083 */ 1084 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1085 info->rti_info[RTAX_GATEWAY] != NULL) || 1086 info->rti_info[RTAX_IFP] != NULL || 1087 (info->rti_info[RTAX_IFA] != NULL && 1088 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1089 error = rt_getifa_fib(info, rnh->rib_fibnum); 1090 1091 if (error != 0) { 1092 info->rti_ifa = NULL; 1093 return (error); 1094 } 1095 } 1096 1097 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1098 info->rti_ifa = NULL; 1099 1100 return (error); 1101 } 1102 1103 #ifdef ROUTE_MPATH 1104 static int 1105 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1106 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1107 struct rib_cmd_info *rc) 1108 { 1109 int error = 0, found_idx = 0; 1110 struct nhop_object *nh_orig = NULL, *nh_new; 1111 struct route_nhop_data rnd_new = {}; 1112 const struct weightened_nhop *wn = NULL; 1113 struct weightened_nhop *wn_new; 1114 uint32_t num_nhops; 1115 1116 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1117 for (int i = 0; i < num_nhops; i++) { 1118 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1119 nh_orig = wn[i].nh; 1120 found_idx = i; 1121 break; 1122 } 1123 } 1124 1125 if (nh_orig == NULL) 1126 return (ESRCH); 1127 1128 error = change_nhop(rnh, info, nh_orig, &nh_new); 1129 if (error != 0) 1130 return (error); 1131 1132 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1133 M_TEMP, M_NOWAIT | M_ZERO); 1134 if (wn_new == NULL) { 1135 nhop_free(nh_new); 1136 return (EAGAIN); 1137 } 1138 1139 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1140 wn_new[found_idx].nh = nh_new; 1141 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1142 1143 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1144 nhop_free(nh_new); 1145 free(wn_new, M_TEMP); 1146 1147 if (error != 0) 1148 return (error); 1149 1150 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1151 1152 return (error); 1153 } 1154 #endif 1155 1156 static int 1157 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1158 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1159 struct rib_cmd_info *rc) 1160 { 1161 int error = 0; 1162 struct nhop_object *nh_orig; 1163 struct route_nhop_data rnd_new; 1164 1165 nh_orig = rnd_orig->rnd_nhop; 1166 if (nh_orig == NULL) 1167 return (ESRCH); 1168 1169 #ifdef ROUTE_MPATH 1170 if (NH_IS_NHGRP(nh_orig)) 1171 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1172 #endif 1173 1174 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1175 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1176 if (error != 0) 1177 return (error); 1178 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1179 1180 return (error); 1181 } 1182 1183 /* 1184 * Insert @rt with nhop data from @rnd_new to @rnh. 1185 * Returns 0 on success and stores operation results in @rc. 1186 */ 1187 static int 1188 add_route(struct rib_head *rnh, struct rtentry *rt, 1189 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1190 { 1191 struct radix_node *rn; 1192 1193 RIB_WLOCK_ASSERT(rnh); 1194 1195 rt->rt_nhop = rnd->rnd_nhop; 1196 rt->rt_weight = rnd->rnd_weight; 1197 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1198 1199 if (rn != NULL) { 1200 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1201 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1202 1203 /* Finalize notification */ 1204 rib_bump_gen(rnh); 1205 rnh->rnh_prefixes++; 1206 1207 rc->rc_cmd = RTM_ADD; 1208 rc->rc_rt = rt; 1209 rc->rc_nh_old = NULL; 1210 rc->rc_nh_new = rnd->rnd_nhop; 1211 rc->rc_nh_weight = rnd->rnd_weight; 1212 1213 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1214 return (0); 1215 } 1216 1217 /* Existing route or memory allocation failure. */ 1218 return (EEXIST); 1219 } 1220 1221 /* 1222 * Unconditionally deletes @rt from @rnh. 1223 */ 1224 static int 1225 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1226 { 1227 RIB_WLOCK_ASSERT(rnh); 1228 1229 /* Route deletion requested. */ 1230 struct radix_node *rn; 1231 1232 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1233 if (rn == NULL) 1234 return (ESRCH); 1235 rt = RNTORT(rn); 1236 rt->rte_flags &= ~RTF_UP; 1237 1238 rib_bump_gen(rnh); 1239 rnh->rnh_prefixes--; 1240 1241 rc->rc_cmd = RTM_DELETE; 1242 rc->rc_rt = rt; 1243 rc->rc_nh_old = rt->rt_nhop; 1244 rc->rc_nh_new = NULL; 1245 rc->rc_nh_weight = rt->rt_weight; 1246 1247 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1248 1249 return (0); 1250 } 1251 1252 /* 1253 * Switch @rt nhop/weigh to the ones specified in @rnd. 1254 * Returns 0 on success. 1255 */ 1256 int 1257 change_route(struct rib_head *rnh, struct rtentry *rt, 1258 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1259 { 1260 struct nhop_object *nh_orig; 1261 1262 RIB_WLOCK_ASSERT(rnh); 1263 1264 nh_orig = rt->rt_nhop; 1265 1266 if (rnd->rnd_nhop == NULL) 1267 return (delete_route(rnh, rt, rc)); 1268 1269 /* Changing nexthop & weight to a new one */ 1270 rt->rt_nhop = rnd->rnd_nhop; 1271 rt->rt_weight = rnd->rnd_weight; 1272 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1273 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1274 1275 /* Finalize notification */ 1276 rib_bump_gen(rnh); 1277 rc->rc_cmd = RTM_CHANGE; 1278 rc->rc_rt = rt; 1279 rc->rc_nh_old = nh_orig; 1280 rc->rc_nh_new = rnd->rnd_nhop; 1281 rc->rc_nh_weight = rnd->rnd_weight; 1282 1283 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1284 1285 return (0); 1286 } 1287 1288 /* 1289 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1290 * consistent with the current route data. 1291 * Nexthop in @nhd_new is consumed. 1292 */ 1293 int 1294 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1295 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1296 struct rib_cmd_info *rc) 1297 { 1298 struct rtentry *rt_new; 1299 int error = 0; 1300 1301 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1302 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1303 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1304 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1305 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1306 "trying change %s -> %s", buf_old, buf_new); 1307 } 1308 RIB_WLOCK(rnh); 1309 1310 struct route_nhop_data rnd; 1311 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1312 1313 if (rt_new == NULL) { 1314 if (rnd_orig->rnd_nhop == NULL) 1315 error = add_route(rnh, rt, rnd_new, rc); 1316 else { 1317 /* 1318 * Prefix does not exist, which was not our assumption. 1319 * Update @rnd_orig with the new data and return 1320 */ 1321 rnd_orig->rnd_nhop = NULL; 1322 rnd_orig->rnd_weight = 0; 1323 error = EAGAIN; 1324 } 1325 } else { 1326 /* Prefix exists, try to update */ 1327 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1328 /* 1329 * Nhop/mpath group hasn't changed. Flip 1330 * to the new precalculated one and return 1331 */ 1332 error = change_route(rnh, rt_new, rnd_new, rc); 1333 } else { 1334 /* Update and retry */ 1335 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1336 rnd_orig->rnd_weight = rt_new->rt_weight; 1337 error = EAGAIN; 1338 } 1339 } 1340 1341 RIB_WUNLOCK(rnh); 1342 1343 if (error == 0) { 1344 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1345 1346 if (rnd_orig->rnd_nhop != NULL) 1347 nhop_free_any(rnd_orig->rnd_nhop); 1348 1349 } else { 1350 if (rnd_new->rnd_nhop != NULL) 1351 nhop_free_any(rnd_new->rnd_nhop); 1352 } 1353 1354 return (error); 1355 } 1356 1357 /* 1358 * Performs modification of routing table specificed by @action. 1359 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1360 * Needs to be run in network epoch. 1361 * 1362 * Returns 0 on success and fills in @rc with action result. 1363 */ 1364 int 1365 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1366 struct rib_cmd_info *rc) 1367 { 1368 int error; 1369 1370 switch (action) { 1371 case RTM_ADD: 1372 error = rib_add_route(fibnum, info, rc); 1373 break; 1374 case RTM_DELETE: 1375 error = rib_del_route(fibnum, info, rc); 1376 break; 1377 case RTM_CHANGE: 1378 error = rib_change_route(fibnum, info, rc); 1379 break; 1380 default: 1381 error = ENOTSUP; 1382 } 1383 1384 return (error); 1385 } 1386 1387 struct rt_delinfo 1388 { 1389 struct rib_head *rnh; 1390 struct rtentry *head; 1391 rib_filter_f_t *filter_f; 1392 void *filter_arg; 1393 int prio; 1394 struct rib_cmd_info rc; 1395 }; 1396 1397 /* 1398 * Conditionally unlinks rtenties or paths from radix tree based 1399 * on the callback data passed in @arg. 1400 */ 1401 static int 1402 rt_checkdelroute(struct radix_node *rn, void *arg) 1403 { 1404 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1405 struct rtentry *rt = (struct rtentry *)rn; 1406 1407 if (rt_delete_conditional(di->rnh, rt, di->prio, 1408 di->filter_f, di->filter_arg, &di->rc) != 0) 1409 return (0); 1410 1411 /* 1412 * Add deleted rtentries to the list to GC them 1413 * after dropping the lock. 1414 * 1415 * XXX: Delayed notifications not implemented 1416 * for nexthop updates. 1417 */ 1418 if (di->rc.rc_cmd == RTM_DELETE) { 1419 /* Add to the list and return */ 1420 rt->rt_chain = di->head; 1421 di->head = rt; 1422 #ifdef ROUTE_MPATH 1423 } else { 1424 /* 1425 * RTM_CHANGE to a different nexthop or nexthop group. 1426 * Free old multipath group. 1427 */ 1428 nhop_free_any(di->rc.rc_nh_old); 1429 #endif 1430 } 1431 1432 return (0); 1433 } 1434 1435 /* 1436 * Iterates over a routing table specified by @fibnum and @family and 1437 * deletes elements marked by @filter_f. 1438 * @fibnum: rtable id 1439 * @family: AF_ address family 1440 * @filter_f: function returning non-zero value for items to delete 1441 * @arg: data to pass to the @filter_f function 1442 * @report: true if rtsock notification is needed. 1443 */ 1444 void 1445 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1446 bool report) 1447 { 1448 struct rib_head *rnh; 1449 struct rtentry *rt; 1450 struct nhop_object *nh; 1451 struct epoch_tracker et; 1452 1453 rnh = rt_tables_get_rnh(fibnum, family); 1454 if (rnh == NULL) 1455 return; 1456 1457 struct rt_delinfo di = { 1458 .rnh = rnh, 1459 .filter_f = filter_f, 1460 .filter_arg = filter_arg, 1461 .prio = NH_PRIORITY_NORMAL, 1462 }; 1463 1464 NET_EPOCH_ENTER(et); 1465 1466 RIB_WLOCK(rnh); 1467 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1468 RIB_WUNLOCK(rnh); 1469 1470 /* We might have something to reclaim. */ 1471 bzero(&di.rc, sizeof(di.rc)); 1472 di.rc.rc_cmd = RTM_DELETE; 1473 while (di.head != NULL) { 1474 rt = di.head; 1475 di.head = rt->rt_chain; 1476 rt->rt_chain = NULL; 1477 nh = rt->rt_nhop; 1478 1479 di.rc.rc_rt = rt; 1480 di.rc.rc_nh_old = nh; 1481 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1482 1483 if (report) { 1484 #ifdef ROUTE_MPATH 1485 struct nhgrp_object *nhg; 1486 const struct weightened_nhop *wn; 1487 uint32_t num_nhops; 1488 if (NH_IS_NHGRP(nh)) { 1489 nhg = (struct nhgrp_object *)nh; 1490 wn = nhgrp_get_nhops(nhg, &num_nhops); 1491 for (int i = 0; i < num_nhops; i++) 1492 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1493 } else 1494 #endif 1495 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1496 } 1497 rt_free(rt); 1498 } 1499 1500 NET_EPOCH_EXIT(et); 1501 } 1502 1503 static int 1504 rt_delete_unconditional(struct radix_node *rn, void *arg) 1505 { 1506 struct rtentry *rt = RNTORT(rn); 1507 struct rib_head *rnh = (struct rib_head *)arg; 1508 1509 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1510 if (RNTORT(rn) == rt) 1511 rt_free(rt); 1512 1513 return (0); 1514 } 1515 1516 /* 1517 * Removes all routes from the routing table without executing notifications. 1518 * rtentres will be removed after the end of a current epoch. 1519 */ 1520 static void 1521 rib_flush_routes(struct rib_head *rnh) 1522 { 1523 RIB_WLOCK(rnh); 1524 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1525 RIB_WUNLOCK(rnh); 1526 } 1527 1528 void 1529 rib_flush_routes_family(int family) 1530 { 1531 struct rib_head *rnh; 1532 1533 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1534 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1535 rib_flush_routes(rnh); 1536 } 1537 } 1538 1539 const char * 1540 rib_print_family(int family) 1541 { 1542 switch (family) { 1543 case AF_INET: 1544 return ("inet"); 1545 case AF_INET6: 1546 return ("inet6"); 1547 case AF_LINK: 1548 return ("link"); 1549 } 1550 return ("unknown"); 1551 } 1552 1553