1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_inet6.h" 32 #include "opt_route.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/malloc.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/syslog.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/rmlock.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_private.h> 48 #include <net/if_dl.h> 49 #include <net/vnet.h> 50 #include <net/route.h> 51 #include <net/route/route_ctl.h> 52 #include <net/route/route_var.h> 53 #include <net/route/nhop_utils.h> 54 #include <net/route/nhop.h> 55 #include <net/route/nhop_var.h> 56 #include <netinet/in.h> 57 #include <netinet6/scope6_var.h> 58 #include <netinet6/in6_var.h> 59 60 #define DEBUG_MOD_NAME route_ctl 61 #define DEBUG_MAX_LEVEL LOG_DEBUG 62 #include <net/route/route_debug.h> 63 _DECLARE_DEBUG(LOG_INFO); 64 65 /* 66 * This file contains control plane routing tables functions. 67 * 68 * All functions assumes they are called in net epoch. 69 */ 70 71 union sockaddr_union { 72 struct sockaddr sa; 73 struct sockaddr_in sin; 74 struct sockaddr_in6 sin6; 75 char _buf[32]; 76 }; 77 78 static int add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 79 struct rib_cmd_info *rc); 80 static int change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 81 struct rt_addrinfo *info, struct route_nhop_data *nhd_orig, 82 struct rib_cmd_info *rc); 83 84 static int add_route_flags(struct rib_head *rnh, struct rtentry *rt, 85 struct route_nhop_data *rnd_add, int op_flags, struct rib_cmd_info *rc); 86 #ifdef ROUTE_MPATH 87 static int add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 88 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 89 int op_flags, struct rib_cmd_info *rc); 90 #endif 91 92 static int add_route(struct rib_head *rnh, struct rtentry *rt, 93 struct route_nhop_data *rnd, struct rib_cmd_info *rc); 94 static int delete_route(struct rib_head *rnh, struct rtentry *rt, 95 struct rib_cmd_info *rc); 96 static int rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 97 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc); 98 99 static bool fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 100 struct sockaddr **pmask); 101 static int get_prio_from_info(const struct rt_addrinfo *info); 102 static int nhop_get_prio(const struct nhop_object *nh); 103 104 #ifdef ROUTE_MPATH 105 static bool rib_can_multipath(struct rib_head *rh); 106 #endif 107 108 /* Per-vnet multipath routing configuration */ 109 SYSCTL_DECL(_net_route); 110 #define V_rib_route_multipath VNET(rib_route_multipath) 111 #ifdef ROUTE_MPATH 112 #define _MP_FLAGS CTLFLAG_RW 113 #else 114 #define _MP_FLAGS CTLFLAG_RD 115 #endif 116 VNET_DEFINE(u_int, rib_route_multipath) = 1; 117 SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET, 118 &VNET_NAME(rib_route_multipath), 0, "Enable route multipath"); 119 #undef _MP_FLAGS 120 121 #ifdef ROUTE_MPATH 122 VNET_DEFINE(u_int, fib_hash_outbound) = 0; 123 SYSCTL_UINT(_net_route, OID_AUTO, hash_outbound, CTLFLAG_RD | CTLFLAG_VNET, 124 &VNET_NAME(fib_hash_outbound), 0, 125 "Compute flowid for locally-originated packets"); 126 127 /* Default entropy to add to the hash calculation for the outbound connections*/ 128 uint8_t mpath_entropy_key[MPATH_ENTROPY_KEY_LEN] = { 129 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 130 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 131 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 132 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 133 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa, 134 }; 135 #endif 136 137 #if defined(INET) && defined(INET6) 138 FEATURE(ipv4_rfc5549_support, "Route IPv4 packets via IPv6 nexthops"); 139 #define V_rib_route_ipv6_nexthop VNET(rib_route_ipv6_nexthop) 140 VNET_DEFINE_STATIC(u_int, rib_route_ipv6_nexthop) = 1; 141 SYSCTL_UINT(_net_route, OID_AUTO, ipv6_nexthop, CTLFLAG_RW | CTLFLAG_VNET, 142 &VNET_NAME(rib_route_ipv6_nexthop), 0, "Enable IPv4 route via IPv6 Next Hop address"); 143 #endif 144 145 /* Debug bits */ 146 SYSCTL_NODE(_net_route, OID_AUTO, debug, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); 147 148 static struct rib_head * 149 get_rnh(uint32_t fibnum, const struct rt_addrinfo *info) 150 { 151 struct rib_head *rnh; 152 struct sockaddr *dst; 153 154 KASSERT((fibnum < rt_numfibs), ("rib_add_route: bad fibnum")); 155 156 dst = info->rti_info[RTAX_DST]; 157 rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 158 159 return (rnh); 160 } 161 162 #if defined(INET) && defined(INET6) 163 bool 164 rib_can_4o6_nhop(void) 165 { 166 return (!!V_rib_route_ipv6_nexthop); 167 } 168 #endif 169 170 #ifdef ROUTE_MPATH 171 static bool 172 rib_can_multipath(struct rib_head *rh) 173 { 174 int result; 175 176 CURVNET_SET(rh->rib_vnet); 177 result = !!V_rib_route_multipath; 178 CURVNET_RESTORE(); 179 180 return (result); 181 } 182 183 /* 184 * Check is nhop is multipath-eligible. 185 * Avoid nhops without gateways and redirects. 186 * 187 * Returns 1 for multipath-eligible nexthop, 188 * 0 otherwise. 189 */ 190 bool 191 nhop_can_multipath(const struct nhop_object *nh) 192 { 193 194 if ((nh->nh_flags & NHF_MULTIPATH) != 0) 195 return (1); 196 if ((nh->nh_flags & NHF_GATEWAY) == 0) 197 return (0); 198 if ((nh->nh_flags & NHF_REDIRECT) != 0) 199 return (0); 200 201 return (1); 202 } 203 #endif 204 205 static int 206 get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight) 207 { 208 uint32_t weight; 209 210 if (info->rti_mflags & RTV_WEIGHT) 211 weight = info->rti_rmx->rmx_weight; 212 else 213 weight = default_weight; 214 /* Keep upper 1 byte for adm distance purposes */ 215 if (weight > RT_MAX_WEIGHT) 216 weight = RT_MAX_WEIGHT; 217 else if (weight == 0) 218 weight = default_weight; 219 220 return (weight); 221 } 222 223 /* 224 * File-local concept for distingushing between the normal and 225 * RTF_PINNED routes tha can override the "normal" one. 226 */ 227 #define NH_PRIORITY_HIGH 2 228 #define NH_PRIORITY_NORMAL 1 229 static int 230 get_prio_from_info(const struct rt_addrinfo *info) 231 { 232 if (info->rti_flags & RTF_PINNED) 233 return (NH_PRIORITY_HIGH); 234 return (NH_PRIORITY_NORMAL); 235 } 236 237 static int 238 nhop_get_prio(const struct nhop_object *nh) 239 { 240 if (NH_IS_PINNED(nh)) 241 return (NH_PRIORITY_HIGH); 242 return (NH_PRIORITY_NORMAL); 243 } 244 245 /* 246 * Check if specified @gw matches gw data in the nexthop @nh. 247 * 248 * Returns true if matches, false otherwise. 249 */ 250 bool 251 match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw) 252 { 253 254 if (nh->gw_sa.sa_family != gw->sa_family) 255 return (false); 256 257 switch (gw->sa_family) { 258 case AF_INET: 259 return (nh->gw4_sa.sin_addr.s_addr == 260 ((const struct sockaddr_in *)gw)->sin_addr.s_addr); 261 case AF_INET6: 262 { 263 const struct sockaddr_in6 *gw6; 264 gw6 = (const struct sockaddr_in6 *)gw; 265 266 /* 267 * Currently (2020-09) IPv6 gws in kernel have their 268 * scope embedded. Once this becomes false, this code 269 * has to be revisited. 270 */ 271 if (IN6_ARE_ADDR_EQUAL(&nh->gw6_sa.sin6_addr, 272 &gw6->sin6_addr)) 273 return (true); 274 return (false); 275 } 276 case AF_LINK: 277 { 278 const struct sockaddr_dl *sdl; 279 sdl = (const struct sockaddr_dl *)gw; 280 return (nh->gwl_sa.sdl_index == sdl->sdl_index); 281 } 282 default: 283 return (memcmp(&nh->gw_sa, gw, nh->gw_sa.sa_len) == 0); 284 } 285 286 /* NOTREACHED */ 287 return (false); 288 } 289 290 /* 291 * Matches all nexthop with given @gw. 292 * Can be used as rib_filter_f callback. 293 */ 294 int 295 rib_match_gw(const struct rtentry *rt, const struct nhop_object *nh, void *gw_sa) 296 { 297 const struct sockaddr *gw = (const struct sockaddr *)gw_sa; 298 299 return (match_nhop_gw(nh, gw)); 300 } 301 302 struct gw_filter_data { 303 const struct sockaddr *gw; 304 int count; 305 }; 306 307 /* 308 * Matches first occurence of the gateway provided in @gwd 309 */ 310 static int 311 match_gw_one(const struct rtentry *rt, const struct nhop_object *nh, void *_data) 312 { 313 struct gw_filter_data *gwd = (struct gw_filter_data *)_data; 314 315 /* Return only first match to make rtsock happy */ 316 if (match_nhop_gw(nh, gwd->gw) && gwd->count++ == 0) 317 return (1); 318 return (0); 319 } 320 321 /* 322 * Checks if data in @info matches nexhop @nh. 323 * 324 * Returns 0 on success, 325 * ESRCH if not matched, 326 * ENOENT if filter function returned false 327 */ 328 int 329 check_info_match_nhop(const struct rt_addrinfo *info, const struct rtentry *rt, 330 const struct nhop_object *nh) 331 { 332 const struct sockaddr *gw = info->rti_info[RTAX_GATEWAY]; 333 334 if (info->rti_filter != NULL) { 335 if (info->rti_filter(rt, nh, info->rti_filterdata) == 0) 336 return (ENOENT); 337 else 338 return (0); 339 } 340 if ((gw != NULL) && !match_nhop_gw(nh, gw)) 341 return (ESRCH); 342 343 return (0); 344 } 345 346 /* 347 * Runs exact prefix match based on @dst and @netmask. 348 * Returns matched @rtentry if found or NULL. 349 * If rtentry was found, saves nexthop / weight value into @rnd. 350 */ 351 static struct rtentry * 352 lookup_prefix_bysa(struct rib_head *rnh, const struct sockaddr *dst, 353 const struct sockaddr *netmask, struct route_nhop_data *rnd) 354 { 355 struct rtentry *rt; 356 357 RIB_LOCK_ASSERT(rnh); 358 359 rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head); 360 if (rt != NULL) { 361 rnd->rnd_nhop = rt->rt_nhop; 362 rnd->rnd_weight = rt->rt_weight; 363 } else { 364 rnd->rnd_nhop = NULL; 365 rnd->rnd_weight = 0; 366 } 367 368 return (rt); 369 } 370 371 struct rtentry * 372 lookup_prefix_rt(struct rib_head *rnh, const struct rtentry *rt, 373 struct route_nhop_data *rnd) 374 { 375 return (lookup_prefix_bysa(rnh, rt_key_const(rt), rt_mask_const(rt), rnd)); 376 } 377 378 /* 379 * Runs exact prefix match based on dst/netmask from @info. 380 * Assumes RIB lock is held. 381 * Returns matched @rtentry if found or NULL. 382 * If rtentry was found, saves nexthop / weight value into @rnd. 383 */ 384 struct rtentry * 385 lookup_prefix(struct rib_head *rnh, const struct rt_addrinfo *info, 386 struct route_nhop_data *rnd) 387 { 388 struct rtentry *rt; 389 390 rt = lookup_prefix_bysa(rnh, info->rti_info[RTAX_DST], 391 info->rti_info[RTAX_NETMASK], rnd); 392 393 return (rt); 394 } 395 396 const struct rtentry * 397 rib_lookup_prefix_plen(struct rib_head *rnh, struct sockaddr *dst, int plen, 398 struct route_nhop_data *rnd) 399 { 400 union sockaddr_union mask_storage; 401 struct sockaddr *netmask = &mask_storage.sa; 402 403 if (fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) 404 return (lookup_prefix_bysa(rnh, dst, netmask, rnd)); 405 return (NULL); 406 } 407 408 static bool 409 fill_pxmask_family(int family, int plen, struct sockaddr *_dst, 410 struct sockaddr **pmask) 411 { 412 if (plen == -1) { 413 *pmask = NULL; 414 return (true); 415 } 416 417 switch (family) { 418 #ifdef INET 419 case AF_INET: 420 { 421 struct sockaddr_in *mask = (struct sockaddr_in *)(*pmask); 422 struct sockaddr_in *dst= (struct sockaddr_in *)_dst; 423 424 memset(mask, 0, sizeof(*mask)); 425 mask->sin_family = family; 426 mask->sin_len = sizeof(*mask); 427 if (plen == 32) 428 *pmask = NULL; 429 else if (plen > 32 || plen < 0) 430 return (false); 431 else { 432 uint32_t daddr, maddr; 433 maddr = htonl(plen ? ~((1 << (32 - plen)) - 1) : 0); 434 mask->sin_addr.s_addr = maddr; 435 daddr = dst->sin_addr.s_addr; 436 daddr = htonl(ntohl(daddr) & ntohl(maddr)); 437 dst->sin_addr.s_addr = daddr; 438 } 439 return (true); 440 } 441 break; 442 #endif 443 #ifdef INET6 444 case AF_INET6: 445 { 446 struct sockaddr_in6 *mask = (struct sockaddr_in6 *)(*pmask); 447 struct sockaddr_in6 *dst = (struct sockaddr_in6 *)_dst; 448 449 memset(mask, 0, sizeof(*mask)); 450 mask->sin6_family = family; 451 mask->sin6_len = sizeof(*mask); 452 if (plen == 128) 453 *pmask = NULL; 454 else if (plen > 128 || plen < 0) 455 return (false); 456 else { 457 ip6_writemask(&mask->sin6_addr, plen); 458 IN6_MASK_ADDR(&dst->sin6_addr, &mask->sin6_addr); 459 } 460 return (true); 461 } 462 break; 463 #endif 464 } 465 return (false); 466 } 467 468 /* 469 * Attempts to add @dst/plen prefix with nexthop/nexhopgroup data @rnd 470 * to the routing table. 471 * 472 * @fibnum: verified kernel rtable id to insert route to 473 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 474 * @plen: prefix length (or -1 if host route or not applicable for AF) 475 * @op_flags: combination of RTM_F_ flags 476 * @rc: storage to report operation result 477 * 478 * Returns 0 on success. 479 */ 480 int 481 rib_add_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 482 struct route_nhop_data *rnd, int op_flags, struct rib_cmd_info *rc) 483 { 484 union sockaddr_union mask_storage; 485 struct sockaddr *netmask = &mask_storage.sa; 486 struct rtentry *rt = NULL; 487 488 NET_EPOCH_ASSERT(); 489 490 bzero(rc, sizeof(struct rib_cmd_info)); 491 rc->rc_cmd = RTM_ADD; 492 493 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 494 if (rnh == NULL) 495 return (EAFNOSUPPORT); 496 497 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 498 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 499 return (EINVAL); 500 } 501 502 if (op_flags & RTM_F_CREATE) { 503 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) { 504 FIB_RH_LOG(LOG_INFO, rnh, "rtentry allocation failed"); 505 return (ENOMEM); 506 } 507 } else { 508 struct route_nhop_data rnd_tmp; 509 RIB_RLOCK_TRACKER; 510 511 RIB_RLOCK(rnh); 512 rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd_tmp); 513 RIB_RUNLOCK(rnh); 514 515 if (rt == NULL) 516 return (ESRCH); 517 } 518 519 return (add_route_flags(rnh, rt, rnd, op_flags, rc)); 520 } 521 522 /* 523 * Attempts to delete @dst/plen prefix matching gateway @gw from the 524 * routing rable. 525 * 526 * @fibnum: rtable id to remove route from 527 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 528 * @plen: prefix length (or -1 if host route or not applicable for AF) 529 * @gw: gateway to match 530 * @op_flags: combination of RTM_F_ flags 531 * @rc: storage to report operation result 532 * 533 * Returns 0 on success. 534 */ 535 int 536 rib_del_route_px_gw(uint32_t fibnum, struct sockaddr *dst, int plen, 537 const struct sockaddr *gw, int op_flags, struct rib_cmd_info *rc) 538 { 539 struct gw_filter_data gwd = { .gw = gw }; 540 541 return (rib_del_route_px(fibnum, dst, plen, match_gw_one, &gwd, op_flags, rc)); 542 } 543 544 /* 545 * Attempts to delete @dst/plen prefix matching @filter_func from the 546 * routing rable. 547 * 548 * @fibnum: rtable id to remove route from 549 * @dst: verified kernel-originated sockaddr, can be masked if plen non-empty 550 * @plen: prefix length (or -1 if host route or not applicable for AF) 551 * @filter_func: func to be called for each nexthop of the prefix for matching 552 * @filter_arg: argument to pass to @filter_func 553 * @op_flags: combination of RTM_F_ flags 554 * @rc: storage to report operation result 555 * 556 * Returns 0 on success. 557 */ 558 int 559 rib_del_route_px(uint32_t fibnum, struct sockaddr *dst, int plen, 560 rib_filter_f_t *filter_func, void *filter_arg, int op_flags, 561 struct rib_cmd_info *rc) 562 { 563 union sockaddr_union mask_storage; 564 struct sockaddr *netmask = &mask_storage.sa; 565 int error; 566 567 NET_EPOCH_ASSERT(); 568 569 bzero(rc, sizeof(struct rib_cmd_info)); 570 rc->rc_cmd = RTM_DELETE; 571 572 struct rib_head *rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 573 if (rnh == NULL) 574 return (EAFNOSUPPORT); 575 576 if (dst->sa_len > sizeof(mask_storage)) { 577 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too big: %d", dst->sa_len); 578 return (EINVAL); 579 } 580 581 if (!fill_pxmask_family(dst->sa_family, plen, dst, &netmask)) { 582 FIB_RH_LOG(LOG_DEBUG, rnh, "error: invalid plen %d", plen); 583 return (EINVAL); 584 } 585 586 int prio = (op_flags & RTM_F_FORCE) ? NH_PRIORITY_HIGH : NH_PRIORITY_NORMAL; 587 588 RIB_WLOCK(rnh); 589 struct route_nhop_data rnd; 590 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 591 if (rt != NULL) { 592 error = rt_delete_conditional(rnh, rt, prio, filter_func, 593 filter_arg, rc); 594 } else 595 error = ESRCH; 596 RIB_WUNLOCK(rnh); 597 598 if (error != 0) 599 return (error); 600 601 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 602 603 if (rc->rc_cmd == RTM_DELETE) 604 rt_free(rc->rc_rt); 605 #ifdef ROUTE_MPATH 606 else { 607 /* 608 * Deleting 1 path may result in RTM_CHANGE to 609 * a different mpath group/nhop. 610 * Free old mpath group. 611 */ 612 nhop_free_any(rc->rc_nh_old); 613 } 614 #endif 615 616 return (0); 617 } 618 619 /* 620 * Tries to copy route @rt from one rtable to the rtable specified by @dst_rh. 621 * @rt: route to copy. 622 * @rnd_src: nhop and weight. Multipath routes are not supported 623 * @rh_dst: target rtable. 624 * @rc: operation result storage 625 * 626 * Return 0 on success. 627 */ 628 int 629 rib_copy_route(struct rtentry *rt, const struct route_nhop_data *rnd_src, 630 struct rib_head *rh_dst, struct rib_cmd_info *rc) 631 { 632 struct nhop_object __diagused *nh_src = rnd_src->rnd_nhop; 633 int error; 634 635 MPASS((nh_src->nh_flags & NHF_MULTIPATH) == 0); 636 637 IF_DEBUG_LEVEL(LOG_DEBUG2) { 638 char nhbuf[NHOP_PRINT_BUFSIZE], rtbuf[NHOP_PRINT_BUFSIZE]; 639 nhop_print_buf_any(nh_src, nhbuf, sizeof(nhbuf)); 640 rt_print_buf(rt, rtbuf, sizeof(rtbuf)); 641 FIB_RH_LOG(LOG_DEBUG2, rh_dst, "copying %s -> %s from fib %u", 642 rtbuf, nhbuf, nhop_get_fibnum(nh_src)); 643 } 644 struct nhop_object *nh = nhop_alloc(rh_dst->rib_fibnum, rh_dst->rib_family); 645 if (nh == NULL) { 646 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to allocate new nexthop"); 647 return (ENOMEM); 648 } 649 nhop_copy(nh, rnd_src->rnd_nhop); 650 nhop_set_origin(nh, nhop_get_origin(rnd_src->rnd_nhop)); 651 nhop_set_fibnum(nh, rh_dst->rib_fibnum); 652 nh = nhop_get_nhop_internal(rh_dst, nh, &error); 653 if (error != 0) { 654 FIB_RH_LOG(LOG_INFO, rh_dst, 655 "unable to finalize new nexthop: error %d", error); 656 return (ENOMEM); 657 } 658 659 struct rtentry *rt_new = rt_alloc(rh_dst, rt_key(rt), rt_mask(rt)); 660 if (rt_new == NULL) { 661 FIB_RH_LOG(LOG_INFO, rh_dst, "unable to create new rtentry"); 662 nhop_free(nh); 663 return (ENOMEM); 664 } 665 666 struct route_nhop_data rnd = { 667 .rnd_nhop = nh, 668 .rnd_weight = rnd_src->rnd_weight 669 }; 670 int op_flags = RTM_F_CREATE | (NH_IS_PINNED(nh) ? RTM_F_FORCE : 0); 671 error = add_route_flags(rh_dst, rt_new, &rnd, op_flags, rc); 672 673 if (error != 0) { 674 IF_DEBUG_LEVEL(LOG_DEBUG2) { 675 char buf[NHOP_PRINT_BUFSIZE]; 676 rt_print_buf(rt_new, buf, sizeof(buf)); 677 FIB_RH_LOG(LOG_DEBUG, rh_dst, 678 "Unable to add route %s: error %d", buf, error); 679 } 680 nhop_free(nh); 681 rt_free_immediate(rt_new); 682 } 683 return (error); 684 } 685 686 /* 687 * Adds route defined by @info into the kernel table specified by @fibnum and 688 * sa_family in @info->rti_info[RTAX_DST]. 689 * 690 * Returns 0 on success and fills in operation metadata into @rc. 691 */ 692 int 693 rib_add_route(uint32_t fibnum, struct rt_addrinfo *info, 694 struct rib_cmd_info *rc) 695 { 696 struct rib_head *rnh; 697 int error; 698 699 NET_EPOCH_ASSERT(); 700 701 rnh = get_rnh(fibnum, info); 702 if (rnh == NULL) 703 return (EAFNOSUPPORT); 704 705 /* 706 * Check consistency between RTF_HOST flag and netmask 707 * existence. 708 */ 709 if (info->rti_flags & RTF_HOST) 710 info->rti_info[RTAX_NETMASK] = NULL; 711 else if (info->rti_info[RTAX_NETMASK] == NULL) { 712 FIB_RH_LOG(LOG_DEBUG, rnh, "error: no RTF_HOST and empty netmask"); 713 return (EINVAL); 714 } 715 716 bzero(rc, sizeof(struct rib_cmd_info)); 717 rc->rc_cmd = RTM_ADD; 718 719 error = add_route_byinfo(rnh, info, rc); 720 if (error == 0) 721 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 722 723 return (error); 724 } 725 726 static int 727 add_route_byinfo(struct rib_head *rnh, struct rt_addrinfo *info, 728 struct rib_cmd_info *rc) 729 { 730 struct route_nhop_data rnd_add; 731 struct nhop_object *nh; 732 struct rtentry *rt; 733 struct sockaddr *dst, *gateway, *netmask; 734 int error; 735 736 dst = info->rti_info[RTAX_DST]; 737 gateway = info->rti_info[RTAX_GATEWAY]; 738 netmask = info->rti_info[RTAX_NETMASK]; 739 740 if ((info->rti_flags & RTF_GATEWAY) && !gateway) { 741 FIB_RH_LOG(LOG_DEBUG, rnh, "error: RTF_GATEWAY set with empty gw"); 742 return (EINVAL); 743 } 744 if (dst && gateway && !nhop_check_gateway(dst->sa_family, gateway->sa_family)) { 745 FIB_RH_LOG(LOG_DEBUG, rnh, 746 "error: invalid dst/gateway family combination (%d, %d)", 747 dst->sa_family, gateway->sa_family); 748 return (EINVAL); 749 } 750 751 if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb)) { 752 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large: %d", 753 dst->sa_len); 754 return (EINVAL); 755 } 756 757 if (info->rti_ifa == NULL) { 758 error = rt_getifa_fib(info, rnh->rib_fibnum); 759 if (error) 760 return (error); 761 } 762 763 if ((rt = rt_alloc(rnh, dst, netmask)) == NULL) 764 return (ENOBUFS); 765 766 error = nhop_create_from_info(rnh, info, &nh); 767 if (error != 0) { 768 rt_free_immediate(rt); 769 return (error); 770 } 771 772 rnd_add.rnd_nhop = nh; 773 rnd_add.rnd_weight = get_info_weight(info, RT_DEFAULT_WEIGHT); 774 775 int op_flags = RTM_F_CREATE; 776 if (get_prio_from_info(info) == NH_PRIORITY_HIGH) 777 op_flags |= RTM_F_FORCE; 778 else 779 op_flags |= RTM_F_APPEND; 780 return (add_route_flags(rnh, rt, &rnd_add, op_flags, rc)); 781 782 } 783 784 static int 785 add_route_flags(struct rib_head *rnh, struct rtentry *rt, struct route_nhop_data *rnd_add, 786 int op_flags, struct rib_cmd_info *rc) 787 { 788 struct route_nhop_data rnd_orig; 789 struct nhop_object *nh; 790 struct rtentry *rt_orig; 791 int error = 0; 792 793 MPASS(rt != NULL); 794 795 nh = rnd_add->rnd_nhop; 796 797 RIB_WLOCK(rnh); 798 799 rt_orig = lookup_prefix_rt(rnh, rt, &rnd_orig); 800 801 if (rt_orig == NULL) { 802 if (op_flags & RTM_F_CREATE) 803 error = add_route(rnh, rt, rnd_add, rc); 804 else 805 error = ESRCH; /* no entry but creation was not required */ 806 RIB_WUNLOCK(rnh); 807 if (error != 0) 808 goto out; 809 return (0); 810 } 811 812 if (op_flags & RTM_F_EXCL) { 813 /* We have existing route in the RIB but not allowed to replace. */ 814 RIB_WUNLOCK(rnh); 815 error = EEXIST; 816 goto out; 817 } 818 819 /* Now either append or replace */ 820 if (op_flags & RTM_F_REPLACE) { 821 if (nhop_get_prio(rnd_orig.rnd_nhop) > nhop_get_prio(rnd_add->rnd_nhop)) { 822 /* Old path is "better" (e.g. has PINNED flag set) */ 823 RIB_WUNLOCK(rnh); 824 error = EEXIST; 825 goto out; 826 } 827 change_route(rnh, rt_orig, rnd_add, rc); 828 RIB_WUNLOCK(rnh); 829 nh = rc->rc_nh_old; 830 goto out; 831 } 832 833 RIB_WUNLOCK(rnh); 834 835 #ifdef ROUTE_MPATH 836 if ((op_flags & RTM_F_APPEND) && rib_can_multipath(rnh) && 837 nhop_can_multipath(rnd_add->rnd_nhop) && 838 nhop_can_multipath(rnd_orig.rnd_nhop)) { 839 840 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 841 error = add_route_flags_mpath(rnh, rt_orig, rnd_add, &rnd_orig, 842 op_flags, rc); 843 if (error != EAGAIN) 844 break; 845 RTSTAT_INC(rts_add_retry); 846 } 847 848 /* 849 * Original nhop reference is unused in any case. 850 */ 851 nhop_free_any(rnd_add->rnd_nhop); 852 if (op_flags & RTM_F_CREATE) { 853 if (error != 0 || rc->rc_cmd != RTM_ADD) 854 rt_free_immediate(rt); 855 } 856 return (error); 857 } 858 #endif 859 /* Out of options - free state and return error */ 860 error = EEXIST; 861 out: 862 if (op_flags & RTM_F_CREATE) 863 rt_free_immediate(rt); 864 nhop_free_any(nh); 865 866 return (error); 867 } 868 869 #ifdef ROUTE_MPATH 870 static int 871 add_route_flags_mpath(struct rib_head *rnh, struct rtentry *rt, 872 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_orig, 873 int op_flags, struct rib_cmd_info *rc) 874 { 875 RIB_RLOCK_TRACKER; 876 struct route_nhop_data rnd_new; 877 int error = 0; 878 879 error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add, &rnd_new); 880 if (error != 0) { 881 if (error == EAGAIN) { 882 /* 883 * Group creation failed, most probably because 884 * @rnd_orig data got scheduled for deletion. 885 * Refresh @rnd_orig data and retry. 886 */ 887 RIB_RLOCK(rnh); 888 lookup_prefix_rt(rnh, rt, rnd_orig); 889 RIB_RUNLOCK(rnh); 890 if (rnd_orig == NULL && !(op_flags & RTM_F_CREATE)) { 891 /* In this iteration route doesn't exist */ 892 error = ENOENT; 893 } 894 } 895 return (error); 896 } 897 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 898 if (error != 0) 899 return (error); 900 901 if (V_fib_hash_outbound == 0 && NH_IS_NHGRP(rc->rc_nh_new)) { 902 /* 903 * First multipath route got installed. Enable local 904 * outbound connections hashing. 905 */ 906 if (bootverbose) 907 printf("FIB: enabled flowid calculation for locally-originated packets\n"); 908 V_fib_hash_outbound = 1; 909 } 910 911 return (0); 912 } 913 #endif 914 915 /* 916 * Removes route defined by @info from the kernel table specified by @fibnum and 917 * sa_family in @info->rti_info[RTAX_DST]. 918 * 919 * Returns 0 on success and fills in operation metadata into @rc. 920 */ 921 int 922 rib_del_route(uint32_t fibnum, struct rt_addrinfo *info, struct rib_cmd_info *rc) 923 { 924 struct rib_head *rnh; 925 struct sockaddr *dst, *netmask; 926 struct sockaddr_storage mdst; 927 int error; 928 929 NET_EPOCH_ASSERT(); 930 931 rnh = get_rnh(fibnum, info); 932 if (rnh == NULL) 933 return (EAFNOSUPPORT); 934 935 bzero(rc, sizeof(struct rib_cmd_info)); 936 rc->rc_cmd = RTM_DELETE; 937 938 dst = info->rti_info[RTAX_DST]; 939 netmask = info->rti_info[RTAX_NETMASK]; 940 941 if (netmask != NULL) { 942 /* Ensure @dst is always properly masked */ 943 if (dst->sa_len > sizeof(mdst)) { 944 FIB_RH_LOG(LOG_DEBUG, rnh, "error: dst->sa_len too large"); 945 return (EINVAL); 946 } 947 rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 948 dst = (struct sockaddr *)&mdst; 949 } 950 951 rib_filter_f_t *filter_func = NULL; 952 void *filter_arg = NULL; 953 struct gw_filter_data gwd = { .gw = info->rti_info[RTAX_GATEWAY] }; 954 955 if (info->rti_filter != NULL) { 956 filter_func = info->rti_filter; 957 filter_arg = info->rti_filterdata; 958 } else if (gwd.gw != NULL) { 959 filter_func = match_gw_one; 960 filter_arg = &gwd; 961 } 962 963 int prio = get_prio_from_info(info); 964 965 RIB_WLOCK(rnh); 966 struct route_nhop_data rnd; 967 struct rtentry *rt = lookup_prefix_bysa(rnh, dst, netmask, &rnd); 968 if (rt != NULL) { 969 error = rt_delete_conditional(rnh, rt, prio, filter_func, 970 filter_arg, rc); 971 } else 972 error = ESRCH; 973 RIB_WUNLOCK(rnh); 974 975 if (error != 0) 976 return (error); 977 978 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 979 980 if (rc->rc_cmd == RTM_DELETE) 981 rt_free(rc->rc_rt); 982 #ifdef ROUTE_MPATH 983 else { 984 /* 985 * Deleting 1 path may result in RTM_CHANGE to 986 * a different mpath group/nhop. 987 * Free old mpath group. 988 */ 989 nhop_free_any(rc->rc_nh_old); 990 } 991 #endif 992 993 return (0); 994 } 995 996 /* 997 * Conditionally unlinks rtentry paths from @rnh matching @cb. 998 * Returns 0 on success with operation result stored in @rc. 999 * On error, returns: 1000 * ESRCH - if prefix was not found or filter function failed to match 1001 * EADDRINUSE - if trying to delete higher priority route. 1002 */ 1003 static int 1004 rt_delete_conditional(struct rib_head *rnh, struct rtentry *rt, 1005 int prio, rib_filter_f_t *cb, void *cbdata, struct rib_cmd_info *rc) 1006 { 1007 struct nhop_object *nh = rt->rt_nhop; 1008 1009 #ifdef ROUTE_MPATH 1010 if (NH_IS_NHGRP(nh)) { 1011 struct nhgrp_object *nhg = (struct nhgrp_object *)nh; 1012 struct route_nhop_data rnd; 1013 int error; 1014 1015 if (cb == NULL) 1016 return (ESRCH); 1017 error = nhgrp_get_filtered_group(rnh, rt, nhg, cb, cbdata, &rnd); 1018 if (error == 0) { 1019 if (rnd.rnd_nhgrp == nhg) { 1020 /* No match, unreference new group and return. */ 1021 nhop_free_any(rnd.rnd_nhop); 1022 return (ESRCH); 1023 } 1024 error = change_route(rnh, rt, &rnd, rc); 1025 } 1026 return (error); 1027 } 1028 #endif 1029 if (cb != NULL && !cb(rt, nh, cbdata)) 1030 return (ESRCH); 1031 1032 if (prio < nhop_get_prio(nh)) 1033 return (EADDRINUSE); 1034 1035 return (delete_route(rnh, rt, rc)); 1036 } 1037 1038 int 1039 rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, 1040 struct rib_cmd_info *rc) 1041 { 1042 RIB_RLOCK_TRACKER; 1043 struct route_nhop_data rnd_orig; 1044 struct rib_head *rnh; 1045 struct rtentry *rt; 1046 int error; 1047 1048 NET_EPOCH_ASSERT(); 1049 1050 rnh = get_rnh(fibnum, info); 1051 if (rnh == NULL) 1052 return (EAFNOSUPPORT); 1053 1054 bzero(rc, sizeof(struct rib_cmd_info)); 1055 rc->rc_cmd = RTM_CHANGE; 1056 1057 /* Check if updated gateway exists */ 1058 if ((info->rti_flags & RTF_GATEWAY) && 1059 (info->rti_info[RTAX_GATEWAY] == NULL)) { 1060 1061 /* 1062 * route(8) adds RTF_GATEWAY flag if -interface is not set. 1063 * Remove RTF_GATEWAY to enforce consistency and maintain 1064 * compatibility.. 1065 */ 1066 info->rti_flags &= ~RTF_GATEWAY; 1067 } 1068 1069 /* 1070 * route change is done in multiple steps, with dropping and 1071 * reacquiring lock. In the situations with multiple processes 1072 * changes the same route in can lead to the case when route 1073 * is changed between the steps. Address it by retrying the operation 1074 * multiple times before failing. 1075 */ 1076 1077 RIB_RLOCK(rnh); 1078 rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], 1079 info->rti_info[RTAX_NETMASK], &rnh->head); 1080 1081 if (rt == NULL) { 1082 RIB_RUNLOCK(rnh); 1083 return (ESRCH); 1084 } 1085 1086 rnd_orig.rnd_nhop = rt->rt_nhop; 1087 rnd_orig.rnd_weight = rt->rt_weight; 1088 1089 RIB_RUNLOCK(rnh); 1090 1091 for (int i = 0; i < RIB_MAX_RETRIES; i++) { 1092 error = change_route_byinfo(rnh, rt, info, &rnd_orig, rc); 1093 if (error != EAGAIN) 1094 break; 1095 } 1096 1097 return (error); 1098 } 1099 1100 static int 1101 change_nhop(struct rib_head *rnh, struct rt_addrinfo *info, 1102 struct nhop_object *nh_orig, struct nhop_object **nh_new) 1103 { 1104 int error; 1105 1106 /* 1107 * New gateway could require new ifaddr, ifp; 1108 * flags may also be different; ifp may be specified 1109 * by ll sockaddr when protocol address is ambiguous 1110 */ 1111 if (((nh_orig->nh_flags & NHF_GATEWAY) && 1112 info->rti_info[RTAX_GATEWAY] != NULL) || 1113 info->rti_info[RTAX_IFP] != NULL || 1114 (info->rti_info[RTAX_IFA] != NULL && 1115 !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) { 1116 error = rt_getifa_fib(info, rnh->rib_fibnum); 1117 1118 if (error != 0) { 1119 info->rti_ifa = NULL; 1120 return (error); 1121 } 1122 } 1123 1124 error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new); 1125 info->rti_ifa = NULL; 1126 1127 return (error); 1128 } 1129 1130 #ifdef ROUTE_MPATH 1131 static int 1132 change_mpath_route(struct rib_head *rnh, struct rtentry *rt, 1133 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1134 struct rib_cmd_info *rc) 1135 { 1136 int error = 0, found_idx = 0; 1137 struct nhop_object *nh_orig = NULL, *nh_new; 1138 struct route_nhop_data rnd_new = {}; 1139 const struct weightened_nhop *wn = NULL; 1140 struct weightened_nhop *wn_new; 1141 uint32_t num_nhops; 1142 1143 wn = nhgrp_get_nhops(rnd_orig->rnd_nhgrp, &num_nhops); 1144 for (int i = 0; i < num_nhops; i++) { 1145 if (check_info_match_nhop(info, NULL, wn[i].nh) == 0) { 1146 nh_orig = wn[i].nh; 1147 found_idx = i; 1148 break; 1149 } 1150 } 1151 1152 if (nh_orig == NULL) 1153 return (ESRCH); 1154 1155 error = change_nhop(rnh, info, nh_orig, &nh_new); 1156 if (error != 0) 1157 return (error); 1158 1159 wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop), 1160 M_TEMP, M_NOWAIT | M_ZERO); 1161 if (wn_new == NULL) { 1162 nhop_free(nh_new); 1163 return (EAGAIN); 1164 } 1165 1166 memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop)); 1167 wn_new[found_idx].nh = nh_new; 1168 wn_new[found_idx].weight = get_info_weight(info, wn[found_idx].weight); 1169 1170 error = nhgrp_get_group(rnh, wn_new, num_nhops, 0, &rnd_new.rnd_nhgrp); 1171 nhop_free(nh_new); 1172 free(wn_new, M_TEMP); 1173 1174 if (error != 0) 1175 return (error); 1176 1177 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1178 1179 return (error); 1180 } 1181 #endif 1182 1183 static int 1184 change_route_byinfo(struct rib_head *rnh, struct rtentry *rt, 1185 struct rt_addrinfo *info, struct route_nhop_data *rnd_orig, 1186 struct rib_cmd_info *rc) 1187 { 1188 int error = 0; 1189 struct nhop_object *nh_orig; 1190 struct route_nhop_data rnd_new; 1191 1192 nh_orig = rnd_orig->rnd_nhop; 1193 if (nh_orig == NULL) 1194 return (ESRCH); 1195 1196 #ifdef ROUTE_MPATH 1197 if (NH_IS_NHGRP(nh_orig)) 1198 return (change_mpath_route(rnh, rt, info, rnd_orig, rc)); 1199 #endif 1200 1201 rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight); 1202 error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop); 1203 if (error != 0) 1204 return (error); 1205 error = change_route_conditional(rnh, rt, rnd_orig, &rnd_new, rc); 1206 1207 return (error); 1208 } 1209 1210 /* 1211 * Insert @rt with nhop data from @rnd_new to @rnh. 1212 * Returns 0 on success and stores operation results in @rc. 1213 */ 1214 static int 1215 add_route(struct rib_head *rnh, struct rtentry *rt, 1216 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1217 { 1218 struct radix_node *rn; 1219 1220 RIB_WLOCK_ASSERT(rnh); 1221 1222 rt->rt_nhop = rnd->rnd_nhop; 1223 rt->rt_weight = rnd->rnd_weight; 1224 rn = rnh->rnh_addaddr(rt_key(rt), rt_mask_const(rt), &rnh->head, rt->rt_nodes); 1225 1226 if (rn != NULL) { 1227 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1228 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1229 1230 /* Finalize notification */ 1231 rib_bump_gen(rnh); 1232 rnh->rnh_prefixes++; 1233 1234 rc->rc_cmd = RTM_ADD; 1235 rc->rc_rt = rt; 1236 rc->rc_nh_old = NULL; 1237 rc->rc_nh_new = rnd->rnd_nhop; 1238 rc->rc_nh_weight = rnd->rnd_weight; 1239 1240 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1241 return (0); 1242 } 1243 1244 /* Existing route or memory allocation failure. */ 1245 return (EEXIST); 1246 } 1247 1248 /* 1249 * Unconditionally deletes @rt from @rnh. 1250 */ 1251 static int 1252 delete_route(struct rib_head *rnh, struct rtentry *rt, struct rib_cmd_info *rc) 1253 { 1254 RIB_WLOCK_ASSERT(rnh); 1255 1256 /* Route deletion requested. */ 1257 struct radix_node *rn; 1258 1259 rn = rnh->rnh_deladdr(rt_key_const(rt), rt_mask_const(rt), &rnh->head); 1260 if (rn == NULL) 1261 return (ESRCH); 1262 rt = RNTORT(rn); 1263 rt->rte_flags &= ~RTF_UP; 1264 1265 rib_bump_gen(rnh); 1266 rnh->rnh_prefixes--; 1267 1268 rc->rc_cmd = RTM_DELETE; 1269 rc->rc_rt = rt; 1270 rc->rc_nh_old = rt->rt_nhop; 1271 rc->rc_nh_new = NULL; 1272 rc->rc_nh_weight = rt->rt_weight; 1273 1274 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1275 1276 return (0); 1277 } 1278 1279 /* 1280 * Switch @rt nhop/weigh to the ones specified in @rnd. 1281 * Returns 0 on success. 1282 */ 1283 int 1284 change_route(struct rib_head *rnh, struct rtentry *rt, 1285 struct route_nhop_data *rnd, struct rib_cmd_info *rc) 1286 { 1287 struct nhop_object *nh_orig; 1288 1289 RIB_WLOCK_ASSERT(rnh); 1290 1291 nh_orig = rt->rt_nhop; 1292 1293 if (rnd->rnd_nhop == NULL) 1294 return (delete_route(rnh, rt, rc)); 1295 1296 /* Changing nexthop & weight to a new one */ 1297 rt->rt_nhop = rnd->rnd_nhop; 1298 rt->rt_weight = rnd->rnd_weight; 1299 if (!NH_IS_NHGRP(rnd->rnd_nhop) && nhop_get_expire(rnd->rnd_nhop)) 1300 tmproutes_update(rnh, rt, rnd->rnd_nhop); 1301 1302 /* Finalize notification */ 1303 rib_bump_gen(rnh); 1304 rc->rc_cmd = RTM_CHANGE; 1305 rc->rc_rt = rt; 1306 rc->rc_nh_old = nh_orig; 1307 rc->rc_nh_new = rnd->rnd_nhop; 1308 rc->rc_nh_weight = rnd->rnd_weight; 1309 1310 rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); 1311 1312 return (0); 1313 } 1314 1315 /* 1316 * Conditionally update route nhop/weight IFF data in @nhd_orig is 1317 * consistent with the current route data. 1318 * Nexthop in @nhd_new is consumed. 1319 */ 1320 int 1321 change_route_conditional(struct rib_head *rnh, struct rtentry *rt, 1322 struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_new, 1323 struct rib_cmd_info *rc) 1324 { 1325 struct rtentry *rt_new; 1326 int error = 0; 1327 1328 IF_DEBUG_LEVEL(LOG_DEBUG2) { 1329 char buf_old[NHOP_PRINT_BUFSIZE], buf_new[NHOP_PRINT_BUFSIZE]; 1330 nhop_print_buf_any(rnd_orig->rnd_nhop, buf_old, NHOP_PRINT_BUFSIZE); 1331 nhop_print_buf_any(rnd_new->rnd_nhop, buf_new, NHOP_PRINT_BUFSIZE); 1332 FIB_LOG(LOG_DEBUG2, rnh->rib_fibnum, rnh->rib_family, 1333 "trying change %s -> %s", buf_old, buf_new); 1334 } 1335 RIB_WLOCK(rnh); 1336 1337 struct route_nhop_data rnd; 1338 rt_new = lookup_prefix_rt(rnh, rt, &rnd); 1339 1340 if (rt_new == NULL) { 1341 if (rnd_orig->rnd_nhop == NULL) 1342 error = add_route(rnh, rt, rnd_new, rc); 1343 else { 1344 /* 1345 * Prefix does not exist, which was not our assumption. 1346 * Update @rnd_orig with the new data and return 1347 */ 1348 rnd_orig->rnd_nhop = NULL; 1349 rnd_orig->rnd_weight = 0; 1350 error = EAGAIN; 1351 } 1352 } else { 1353 /* Prefix exists, try to update */ 1354 if (rnd_orig->rnd_nhop == rt_new->rt_nhop) { 1355 /* 1356 * Nhop/mpath group hasn't changed. Flip 1357 * to the new precalculated one and return 1358 */ 1359 error = change_route(rnh, rt_new, rnd_new, rc); 1360 } else { 1361 /* Update and retry */ 1362 rnd_orig->rnd_nhop = rt_new->rt_nhop; 1363 rnd_orig->rnd_weight = rt_new->rt_weight; 1364 error = EAGAIN; 1365 } 1366 } 1367 1368 RIB_WUNLOCK(rnh); 1369 1370 if (error == 0) { 1371 rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); 1372 1373 if (rnd_orig->rnd_nhop != NULL) 1374 nhop_free_any(rnd_orig->rnd_nhop); 1375 1376 } else { 1377 if (rnd_new->rnd_nhop != NULL) 1378 nhop_free_any(rnd_new->rnd_nhop); 1379 } 1380 1381 return (error); 1382 } 1383 1384 /* 1385 * Performs modification of routing table specificed by @action. 1386 * Table is specified by @fibnum and sa_family in @info->rti_info[RTAX_DST]. 1387 * Needs to be run in network epoch. 1388 * 1389 * Returns 0 on success and fills in @rc with action result. 1390 */ 1391 int 1392 rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info, 1393 struct rib_cmd_info *rc) 1394 { 1395 int error; 1396 1397 switch (action) { 1398 case RTM_ADD: 1399 error = rib_add_route(fibnum, info, rc); 1400 break; 1401 case RTM_DELETE: 1402 error = rib_del_route(fibnum, info, rc); 1403 break; 1404 case RTM_CHANGE: 1405 error = rib_change_route(fibnum, info, rc); 1406 break; 1407 default: 1408 error = ENOTSUP; 1409 } 1410 1411 return (error); 1412 } 1413 1414 struct rt_delinfo 1415 { 1416 struct rib_head *rnh; 1417 struct rtentry *head; 1418 rib_filter_f_t *filter_f; 1419 void *filter_arg; 1420 int prio; 1421 struct rib_cmd_info rc; 1422 }; 1423 1424 /* 1425 * Conditionally unlinks rtenties or paths from radix tree based 1426 * on the callback data passed in @arg. 1427 */ 1428 static int 1429 rt_checkdelroute(struct radix_node *rn, void *arg) 1430 { 1431 struct rt_delinfo *di = (struct rt_delinfo *)arg; 1432 struct rtentry *rt = (struct rtentry *)rn; 1433 1434 if (rt_delete_conditional(di->rnh, rt, di->prio, 1435 di->filter_f, di->filter_arg, &di->rc) != 0) 1436 return (0); 1437 1438 /* 1439 * Add deleted rtentries to the list to GC them 1440 * after dropping the lock. 1441 * 1442 * XXX: Delayed notifications not implemented 1443 * for nexthop updates. 1444 */ 1445 if (di->rc.rc_cmd == RTM_DELETE) { 1446 /* Add to the list and return */ 1447 rt->rt_chain = di->head; 1448 di->head = rt; 1449 #ifdef ROUTE_MPATH 1450 } else { 1451 /* 1452 * RTM_CHANGE to a different nexthop or nexthop group. 1453 * Free old multipath group. 1454 */ 1455 nhop_free_any(di->rc.rc_nh_old); 1456 #endif 1457 } 1458 1459 return (0); 1460 } 1461 1462 /* 1463 * Iterates over a routing table specified by @fibnum and @family and 1464 * deletes elements marked by @filter_f. 1465 * @fibnum: rtable id 1466 * @family: AF_ address family 1467 * @filter_f: function returning non-zero value for items to delete 1468 * @arg: data to pass to the @filter_f function 1469 * @report: true if rtsock notification is needed. 1470 */ 1471 void 1472 rib_walk_del(u_int fibnum, int family, rib_filter_f_t *filter_f, void *filter_arg, 1473 bool report) 1474 { 1475 struct rib_head *rnh; 1476 struct rtentry *rt; 1477 struct nhop_object *nh; 1478 struct epoch_tracker et; 1479 1480 rnh = rt_tables_get_rnh(fibnum, family); 1481 if (rnh == NULL) 1482 return; 1483 1484 struct rt_delinfo di = { 1485 .rnh = rnh, 1486 .filter_f = filter_f, 1487 .filter_arg = filter_arg, 1488 .prio = NH_PRIORITY_NORMAL, 1489 }; 1490 1491 NET_EPOCH_ENTER(et); 1492 1493 RIB_WLOCK(rnh); 1494 rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di); 1495 RIB_WUNLOCK(rnh); 1496 1497 /* We might have something to reclaim. */ 1498 bzero(&di.rc, sizeof(di.rc)); 1499 di.rc.rc_cmd = RTM_DELETE; 1500 while (di.head != NULL) { 1501 rt = di.head; 1502 di.head = rt->rt_chain; 1503 rt->rt_chain = NULL; 1504 nh = rt->rt_nhop; 1505 1506 di.rc.rc_rt = rt; 1507 di.rc.rc_nh_old = nh; 1508 rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc); 1509 1510 if (report) { 1511 #ifdef ROUTE_MPATH 1512 struct nhgrp_object *nhg; 1513 const struct weightened_nhop *wn; 1514 uint32_t num_nhops; 1515 if (NH_IS_NHGRP(nh)) { 1516 nhg = (struct nhgrp_object *)nh; 1517 wn = nhgrp_get_nhops(nhg, &num_nhops); 1518 for (int i = 0; i < num_nhops; i++) 1519 rt_routemsg(RTM_DELETE, rt, wn[i].nh, fibnum); 1520 } else 1521 #endif 1522 rt_routemsg(RTM_DELETE, rt, nh, fibnum); 1523 } 1524 rt_free(rt); 1525 } 1526 1527 NET_EPOCH_EXIT(et); 1528 } 1529 1530 static int 1531 rt_delete_unconditional(struct radix_node *rn, void *arg) 1532 { 1533 struct rtentry *rt = RNTORT(rn); 1534 struct rib_head *rnh = (struct rib_head *)arg; 1535 1536 rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), &rnh->head); 1537 if (RNTORT(rn) == rt) 1538 rt_free(rt); 1539 1540 return (0); 1541 } 1542 1543 /* 1544 * Removes all routes from the routing table without executing notifications. 1545 * rtentres will be removed after the end of a current epoch. 1546 */ 1547 static void 1548 rib_flush_routes(struct rib_head *rnh) 1549 { 1550 RIB_WLOCK(rnh); 1551 rnh->rnh_walktree(&rnh->head, rt_delete_unconditional, rnh); 1552 RIB_WUNLOCK(rnh); 1553 } 1554 1555 void 1556 rib_flush_routes_family(int family) 1557 { 1558 struct rib_head *rnh; 1559 1560 for (uint32_t fibnum = 0; fibnum < rt_numfibs; fibnum++) { 1561 if ((rnh = rt_tables_get_rnh(fibnum, family)) != NULL) 1562 rib_flush_routes(rnh); 1563 } 1564 } 1565 1566 const char * 1567 rib_print_family(int family) 1568 { 1569 switch (family) { 1570 case AF_INET: 1571 return ("inet"); 1572 case AF_INET6: 1573 return ("inet6"); 1574 case AF_LINK: 1575 return ("link"); 1576 } 1577 return ("unknown"); 1578 } 1579 1580