1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_route.h" 32 #include <sys/types.h> 33 #include <sys/ck.h> 34 #include <sys/epoch.h> 35 #include <sys/kernel.h> 36 #include <sys/malloc.h> 37 #include <sys/rmlock.h> 38 #include <sys/socket.h> 39 40 #include <net/if.h> 41 #include <net/route.h> 42 #include <net/route/nhop.h> 43 #include <net/route/nhop_utils.h> 44 45 #include <net/route/route_ctl.h> 46 #include <net/route/route_var.h> 47 #include <netinet6/scope6_var.h> 48 #include <netlink/netlink.h> 49 #include <netlink/netlink_ctl.h> 50 #include <netlink/netlink_route.h> 51 #include <netlink/route/route_var.h> 52 53 #define DEBUG_MOD_NAME nl_nhop 54 #define DEBUG_MAX_LEVEL LOG_DEBUG3 55 #include <netlink/netlink_debug.h> 56 _DECLARE_DEBUG(LOG_INFO); 57 58 /* 59 * This file contains the logic to maintain kernel nexthops and 60 * nexhop groups based om the data provided by the user. 61 * 62 * Kernel stores (nearly) all of the routing data in the nexthops, 63 * including the prefix-specific flags (NHF_HOST and NHF_DEFAULT). 64 * 65 * Netlink API provides higher-level abstraction for the user. Each 66 * user-created nexthop may map to multiple kernel nexthops. 67 * 68 * The following variations require separate kernel nexthop to be 69 * created: 70 * * prefix flags (NHF_HOST, NHF_DEFAULT) 71 * * using IPv6 gateway for IPv4 routes 72 * * different fibnum 73 * 74 * These kernel nexthops have the lifetime bound to the lifetime of 75 * the user_nhop object. They are not collected until user requests 76 * to delete the created user_nhop. 77 * 78 */ 79 struct user_nhop { 80 uint32_t un_idx; /* Userland-provided index */ 81 uint32_t un_fibfam; /* fibnum+af(as highest byte) */ 82 uint8_t un_protocol; /* protocol that install the record */ 83 struct nhop_object *un_nhop; /* "production" nexthop */ 84 struct nhop_object *un_nhop_src; /* nexthop to copy from */ 85 struct weightened_nhop *un_nhgrp_src; /* nexthops for nhg */ 86 uint32_t un_nhgrp_count; /* number of nexthops */ 87 struct user_nhop *un_next; /* next item in hash chain */ 88 struct user_nhop *un_nextchild; /* master -> children */ 89 struct epoch_context un_epoch_ctx; /* epoch ctl helper */ 90 }; 91 92 /* produce hash value for an object */ 93 #define unhop_hash_obj(_obj) (hash_unhop(_obj)) 94 /* compare two objects */ 95 #define unhop_cmp(_one, _two) (cmp_unhop(_one, _two)) 96 /* next object accessor */ 97 #define unhop_next(_obj) (_obj)->un_next 98 99 CHT_SLIST_DEFINE(unhop, struct user_nhop); 100 101 struct unhop_ctl { 102 struct unhop_head un_head; 103 struct rmlock un_lock; 104 }; 105 #define UN_LOCK_INIT(_ctl) rm_init(&(_ctl)->un_lock, "unhop_ctl") 106 #define UN_TRACKER struct rm_priotracker un_tracker 107 #define UN_RLOCK(_ctl) rm_rlock(&((_ctl)->un_lock), &un_tracker) 108 #define UN_RUNLOCK(_ctl) rm_runlock(&((_ctl)->un_lock), &un_tracker) 109 110 #define UN_WLOCK(_ctl) rm_wlock(&(_ctl)->un_lock); 111 #define UN_WUNLOCK(_ctl) rm_wunlock(&(_ctl)->un_lock); 112 113 VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL; 114 #define V_un_ctl VNET(un_ctl) 115 116 static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size); 117 static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b); 118 static unsigned int hash_unhop(const struct user_nhop *obj); 119 120 static void destroy_unhop(struct user_nhop *unhop); 121 static struct nhop_object *clone_unhop(const struct user_nhop *unhop, 122 uint32_t fibnum, int family, int nh_flags); 123 124 static int 125 cmp_unhop(const struct user_nhop *a, const struct user_nhop *b) 126 { 127 return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam); 128 } 129 130 /* 131 * Hash callback: calculate hash of an object 132 */ 133 static unsigned int 134 hash_unhop(const struct user_nhop *obj) 135 { 136 return (obj->un_idx ^ obj->un_fibfam); 137 } 138 139 #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0) 140 141 /* 142 * Factory interface for creating matching kernel nexthops/nexthop groups 143 * 144 * @uidx: userland nexhop index used to create the nexthop 145 * @fibnum: fibnum nexthop will be used in 146 * @family: upper family nexthop will be used in 147 * @nh_flags: desired nexthop prefix flags 148 * @perror: pointer to store error to 149 * 150 * Returns referenced nexthop linked to @fibnum/@family rib on success. 151 */ 152 struct nhop_object * 153 nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, 154 int nh_flags, int *perror) 155 { 156 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 157 UN_TRACKER; 158 159 if (__predict_false(ctl == NULL)) 160 return (NULL); 161 162 struct user_nhop key= { 163 .un_idx = uidx, 164 .un_fibfam = fibnum | ((uint32_t)family) << 24, 165 }; 166 struct user_nhop *unhop; 167 168 nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT); 169 170 if (__predict_false(family == 0)) 171 return (NULL); 172 173 UN_RLOCK(ctl); 174 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 175 if (unhop != NULL) { 176 struct nhop_object *nh = unhop->un_nhop; 177 UN_RLOCK(ctl); 178 *perror = 0; 179 nhop_ref_any(nh); 180 return (nh); 181 } 182 183 /* 184 * Exact nexthop not found. Search for template nexthop to clone from. 185 */ 186 key.un_fibfam = 0; 187 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 188 if (unhop == NULL) { 189 UN_RUNLOCK(ctl); 190 *perror = ESRCH; 191 return (NULL); 192 } 193 194 UN_RUNLOCK(ctl); 195 196 /* Create entry to insert first */ 197 struct user_nhop *un_new, *un_tmp; 198 un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); 199 if (un_new == NULL) { 200 *perror = ENOMEM; 201 return (NULL); 202 } 203 un_new->un_idx = uidx; 204 un_new->un_fibfam = fibnum | ((uint32_t)family) << 24; 205 206 /* Relying on epoch to protect unhop here */ 207 un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags); 208 if (un_new->un_nhop == NULL) { 209 free(un_new, M_NETLINK); 210 *perror = ENOMEM; 211 return (NULL); 212 } 213 214 /* Insert back and report */ 215 UN_WLOCK(ctl); 216 217 /* First, find template record once again */ 218 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 219 if (unhop == NULL) { 220 /* Someone deleted the nexthop during the call */ 221 UN_WUNLOCK(ctl); 222 *perror = ESRCH; 223 destroy_unhop(un_new); 224 return (NULL); 225 } 226 227 /* Second, check the direct match */ 228 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp); 229 struct nhop_object *nh; 230 if (un_tmp != NULL) { 231 /* Another thread already created the desired nextop, use it */ 232 nh = un_tmp->un_nhop; 233 } else { 234 /* Finally, insert the new nexthop and link it to the primary */ 235 nh = un_new->un_nhop; 236 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new); 237 un_new->un_nextchild = unhop->un_nextchild; 238 unhop->un_nextchild = un_new; 239 un_new = NULL; 240 NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh); 241 } 242 243 UN_WUNLOCK(ctl); 244 245 if (un_new != NULL) 246 destroy_unhop(un_new); 247 248 *perror = 0; 249 nhop_ref_any(nh); 250 return (nh); 251 } 252 253 static struct user_nhop * 254 nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx) 255 { 256 struct user_nhop key= { .un_idx = uidx }; 257 struct user_nhop *unhop = NULL; 258 UN_TRACKER; 259 260 UN_RLOCK(ctl); 261 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 262 UN_RUNLOCK(ctl); 263 264 return (unhop); 265 } 266 267 #define MAX_STACK_NHOPS 4 268 static struct nhop_object * 269 clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags) 270 { 271 #ifdef ROUTE_MPATH 272 const struct weightened_nhop *wn; 273 struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS]; 274 uint32_t num_nhops; 275 #endif 276 struct nhop_object *nh = NULL; 277 int error; 278 279 if (unhop->un_nhop_src != NULL) { 280 IF_DEBUG_LEVEL(LOG_DEBUG2) { 281 char nhbuf[NHOP_PRINT_BUFSIZE]; 282 nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf)); 283 FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src, 284 "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum, 285 family, nh_flags); 286 } 287 struct nhop_object *nh; 288 nh = nhop_alloc(fibnum, AF_UNSPEC); 289 if (nh == NULL) 290 return (NULL); 291 nhop_copy(nh, unhop->un_nhop_src); 292 /* Check that nexthop gateway is compatible with the new family */ 293 if (!nhop_set_upper_family(nh, family)) { 294 nhop_free(nh); 295 return (NULL); 296 } 297 nhop_set_uidx(nh, unhop->un_idx); 298 nhop_set_pxtype_flag(nh, nh_flags); 299 return (nhop_get_nhop(nh, &error)); 300 } 301 #ifdef ROUTE_MPATH 302 wn = unhop->un_nhgrp_src; 303 num_nhops = unhop->un_nhgrp_count; 304 305 if (num_nhops > MAX_STACK_NHOPS) { 306 wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT); 307 if (wn_new == NULL) 308 return (NULL); 309 } else 310 wn_new = wn_base; 311 312 for (int i = 0; i < num_nhops; i++) { 313 uint32_t uidx = nhop_get_uidx(wn[i].nh); 314 MPASS(uidx != 0); 315 wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error); 316 if (error != 0) 317 break; 318 wn_new[i].weight = wn[i].weight; 319 } 320 321 if (error == 0) { 322 struct rib_head *rh = nhop_get_rh(wn_new[0].nh); 323 struct nhgrp_object *nhg; 324 325 error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg); 326 nh = (struct nhop_object *)nhg; 327 } 328 329 if (wn_new != wn_base) 330 free(wn_new, M_TEMP); 331 #endif 332 return (nh); 333 } 334 335 static void 336 destroy_unhop(struct user_nhop *unhop) 337 { 338 if (unhop->un_nhop != NULL) 339 nhop_free_any(unhop->un_nhop); 340 if (unhop->un_nhop_src != NULL) 341 nhop_free_any(unhop->un_nhop_src); 342 free(unhop, M_NETLINK); 343 } 344 345 static void 346 destroy_unhop_epoch(epoch_context_t ctx) 347 { 348 struct user_nhop *unhop; 349 350 unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx); 351 352 destroy_unhop(unhop); 353 } 354 355 static uint32_t 356 find_spare_uidx(struct unhop_ctl *ctl) 357 { 358 struct user_nhop *unhop, key = {}; 359 uint32_t uidx = 0; 360 UN_TRACKER; 361 362 UN_RLOCK(ctl); 363 /* This should return spare uid with 75% of 65k used in ~99/100 cases */ 364 for (int i = 0; i < 16; i++) { 365 key.un_idx = (arc4random() % 65536) + 65536 * 4; 366 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 367 if (unhop == NULL) { 368 uidx = key.un_idx; 369 break; 370 } 371 } 372 UN_RUNLOCK(ctl); 373 374 return (uidx); 375 } 376 377 378 /* 379 * Actual netlink code 380 */ 381 struct netlink_walkargs { 382 struct nl_writer *nw; 383 struct nlmsghdr hdr; 384 struct nlpcb *so; 385 int family; 386 int error; 387 int count; 388 int dumped; 389 }; 390 #define ENOMEM_IF_NULL(_v) if ((_v) == NULL) goto enomem 391 392 static bool 393 dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr, 394 struct nl_writer *nw) 395 { 396 397 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) 398 goto enomem; 399 400 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); 401 nhm->nh_family = AF_UNSPEC; 402 nhm->nh_scope = 0; 403 nhm->nh_protocol = unhop->un_protocol; 404 nhm->nh_flags = 0; 405 406 nlattr_add_u32(nw, NHA_ID, unhop->un_idx); 407 nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH); 408 409 struct weightened_nhop *wn = unhop->un_nhgrp_src; 410 uint32_t num_nhops = unhop->un_nhgrp_count; 411 /* TODO: a better API? */ 412 int nla_len = sizeof(struct nlattr); 413 nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp)); 414 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 415 if (nla == NULL) 416 goto enomem; 417 nla->nla_type = NHA_GROUP; 418 nla->nla_len = nla_len; 419 for (int i = 0; i < num_nhops; i++) { 420 struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i]; 421 grp->id = nhop_get_uidx(wn[i].nh); 422 grp->weight = wn[i].weight; 423 grp->resvd1 = 0; 424 grp->resvd2 = 0; 425 } 426 427 if (nlmsg_end(nw)) 428 return (true); 429 enomem: 430 NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory"); 431 nlmsg_abort(nw); 432 return (false); 433 } 434 435 static bool 436 dump_nhop(const struct nhop_object *nh, uint32_t uidx, struct nlmsghdr *hdr, 437 struct nl_writer *nw) 438 { 439 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) 440 goto enomem; 441 442 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); 443 ENOMEM_IF_NULL(nhm); 444 nhm->nh_family = nhop_get_neigh_family(nh); 445 nhm->nh_scope = 0; // XXX: what's that? 446 nhm->nh_protocol = nhop_get_origin(nh); 447 nhm->nh_flags = 0; 448 449 if (uidx != 0) 450 nlattr_add_u32(nw, NHA_ID, uidx); 451 if (nh->nh_flags & NHF_BLACKHOLE) { 452 nlattr_add_flag(nw, NHA_BLACKHOLE); 453 goto done; 454 } 455 nlattr_add_u32(nw, NHA_OIF, if_getindex(nh->nh_ifp)); 456 457 switch (nh->gw_sa.sa_family) { 458 #ifdef INET 459 case AF_INET: 460 nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 461 break; 462 #endif 463 #ifdef INET6 464 case AF_INET6: 465 { 466 struct in6_addr addr = nh->gw6_sa.sin6_addr; 467 in6_clearscope(&addr); 468 nlattr_add(nw, NHA_GATEWAY, 16, &addr); 469 break; 470 } 471 #endif 472 } 473 474 int off = nlattr_add_nested(nw, NHA_FREEBSD); 475 if (off != 0) { 476 nlattr_add_u32(nw, NHAF_AIF, if_getindex(nh->nh_aifp)); 477 478 if (uidx == 0) { 479 nlattr_add_u32(nw, NHAF_KID, nhop_get_idx(nh)); 480 nlattr_add_u32(nw, NHAF_FAMILY, nhop_get_upper_family(nh)); 481 nlattr_add_u32(nw, NHAF_TABLE, nhop_get_fibnum(nh)); 482 } 483 484 nlattr_set_len(nw, off); 485 } 486 487 done: 488 if (nlmsg_end(nw)) 489 return (true); 490 enomem: 491 nlmsg_abort(nw); 492 return (false); 493 } 494 495 static void 496 dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr, 497 struct nl_writer *nw) 498 { 499 if (unhop->un_nhop_src != NULL) 500 dump_nhop(unhop->un_nhop_src, unhop->un_idx, hdr, nw); 501 else 502 dump_nhgrp(unhop, hdr, nw); 503 } 504 505 static int 506 delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx) 507 { 508 struct user_nhop *unhop_ret, *unhop_base, *unhop_chain; 509 510 struct user_nhop key = { .un_idx = uidx }; 511 512 UN_WLOCK(ctl); 513 514 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base); 515 516 if (unhop_base != NULL) { 517 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret); 518 IF_DEBUG_LEVEL(LOG_DEBUG2) { 519 char nhbuf[NHOP_PRINT_BUFSIZE]; 520 nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf)); 521 FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop, 522 "removed base nhop %u: %s", uidx, nhbuf); 523 } 524 /* Unlink all child nexhops as well, keeping the chain intact */ 525 unhop_chain = unhop_base->un_nextchild; 526 while (unhop_chain != NULL) { 527 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain, 528 unhop_ret); 529 MPASS(unhop_chain == unhop_ret); 530 IF_DEBUG_LEVEL(LOG_DEBUG3) { 531 char nhbuf[NHOP_PRINT_BUFSIZE]; 532 nhop_print_buf_any(unhop_chain->un_nhop, 533 nhbuf, sizeof(nhbuf)); 534 FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop, 535 "removed child nhop %u: %s", uidx, nhbuf); 536 } 537 unhop_chain = unhop_chain->un_nextchild; 538 } 539 } 540 541 UN_WUNLOCK(ctl); 542 543 if (unhop_base == NULL) { 544 NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx); 545 return (ENOENT); 546 } 547 548 /* Report nexthop deletion */ 549 struct netlink_walkargs wa = { 550 .hdr.nlmsg_pid = hdr->nlmsg_pid, 551 .hdr.nlmsg_seq = hdr->nlmsg_seq, 552 .hdr.nlmsg_flags = hdr->nlmsg_flags, 553 .hdr.nlmsg_type = NL_RTM_DELNEXTHOP, 554 }; 555 556 struct nl_writer nw = {}; 557 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) { 558 NL_LOG(LOG_DEBUG, "error allocating message writer"); 559 return (ENOMEM); 560 } 561 562 dump_unhop(unhop_base, &wa.hdr, &nw); 563 nlmsg_flush(&nw); 564 565 while (unhop_base != NULL) { 566 unhop_chain = unhop_base->un_nextchild; 567 NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx); 568 unhop_base = unhop_chain; 569 } 570 571 return (0); 572 } 573 574 static void 575 consider_resize(struct unhop_ctl *ctl, uint32_t new_size) 576 { 577 void *new_ptr = NULL; 578 size_t alloc_size; 579 580 if (new_size == 0) 581 return; 582 583 if (new_size != 0) { 584 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size); 585 new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); 586 if (new_ptr == NULL) 587 return; 588 } 589 590 NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size); 591 UN_WLOCK(ctl); 592 if (new_ptr != NULL) { 593 CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size); 594 } 595 UN_WUNLOCK(ctl); 596 597 598 if (new_ptr != NULL) 599 free(new_ptr, M_NETLINK); 600 } 601 602 static bool __noinline 603 vnet_init_unhops(void) 604 { 605 uint32_t num_buckets = 16; 606 size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); 607 608 struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK, 609 M_NOWAIT | M_ZERO); 610 if (ctl == NULL) 611 return (false); 612 613 void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); 614 if (ptr == NULL) { 615 free(ctl, M_NETLINK); 616 return (false); 617 } 618 CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets); 619 UN_LOCK_INIT(ctl); 620 621 if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) { 622 free(ptr, M_NETLINK); 623 free(ctl, M_NETLINK); 624 } 625 626 if (atomic_load_ptr(&V_un_ctl) == NULL) 627 return (false); 628 629 NL_LOG(LOG_NOTICE, "UNHOPS init done"); 630 631 return (true); 632 } 633 634 static void 635 vnet_destroy_unhops(const void *unused __unused) 636 { 637 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 638 struct user_nhop *unhop, *tmp; 639 640 if (ctl == NULL) 641 return; 642 V_un_ctl = NULL; 643 644 /* Wait till all unhop users finish their reads */ 645 NET_EPOCH_WAIT(); 646 647 UN_WLOCK(ctl); 648 CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) { 649 destroy_unhop(unhop); 650 } CHT_SLIST_FOREACH_SAFE_END; 651 UN_WUNLOCK(ctl); 652 653 free(ctl->un_head.ptr, M_NETLINK); 654 free(ctl, M_NETLINK); 655 } 656 VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY, 657 vnet_destroy_unhops, NULL); 658 659 static int 660 nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 661 { 662 int error = 0; 663 664 /* Verify attribute correctness */ 665 struct nexthop_grp *grp = NLA_DATA(nla); 666 int data_len = NLA_DATA_LEN(nla); 667 668 int count = data_len / sizeof(*grp); 669 if (count == 0 || (count * sizeof(*grp) != data_len)) { 670 NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len); 671 return (EINVAL); 672 } 673 674 *((struct nlattr **)target) = nla; 675 return (error); 676 } 677 678 static void 679 set_scope6(struct sockaddr *sa, if_t ifp) 680 { 681 #ifdef INET6 682 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { 683 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; 684 685 if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) 686 in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); 687 } 688 #endif 689 } 690 691 struct nl_parsed_nhop { 692 uint32_t nha_id; 693 uint8_t nha_blackhole; 694 uint8_t nha_groups; 695 uint8_t nhaf_knhops; 696 uint8_t nhaf_family; 697 struct ifnet *nha_oif; 698 struct sockaddr *nha_gw; 699 struct nlattr *nha_group; 700 uint8_t nh_family; 701 uint8_t nh_protocol; 702 uint32_t nhaf_table; 703 uint32_t nhaf_kid; 704 uint32_t nhaf_aif; 705 }; 706 707 #define _IN(_field) offsetof(struct nhmsg, _field) 708 #define _OUT(_field) offsetof(struct nl_parsed_nhop, _field) 709 static struct nlattr_parser nla_p_nh_fbsd[] = { 710 { .type = NHAF_KNHOPS, .off = _OUT(nhaf_knhops), .cb = nlattr_get_flag }, 711 { .type = NHAF_TABLE, .off = _OUT(nhaf_table), .cb = nlattr_get_uint32 }, 712 { .type = NHAF_FAMILY, .off = _OUT(nhaf_family), .cb = nlattr_get_uint8 }, 713 { .type = NHAF_KID, .off = _OUT(nhaf_kid), .cb = nlattr_get_uint32 }, 714 { .type = NHAF_AIF, .off = _OUT(nhaf_aif), .cb = nlattr_get_uint32 }, 715 }; 716 NL_DECLARE_ATTR_PARSER(nh_fbsd_parser, nla_p_nh_fbsd); 717 718 static const struct nlfield_parser nlf_p_nh[] = { 719 { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 }, 720 { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 }, 721 }; 722 723 static const struct nlattr_parser nla_p_nh[] = { 724 { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 }, 725 { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg }, 726 { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag }, 727 { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp }, 728 { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip }, 729 { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag }, 730 { .type = NHA_FREEBSD, .arg = &nh_fbsd_parser, .cb = nlattr_get_nested }, 731 }; 732 #undef _IN 733 #undef _OUT 734 735 static bool 736 post_p_nh(void *_attrs, struct nl_pstate *npt) 737 { 738 struct nl_parsed_nhop *attrs = (struct nl_parsed_nhop *)_attrs; 739 740 set_scope6(attrs->nha_gw, attrs->nha_oif); 741 return (true); 742 } 743 NL_DECLARE_PARSER_EXT(nhmsg_parser, struct nhmsg, NULL, nlf_p_nh, nla_p_nh, post_p_nh); 744 745 static bool 746 eligible_nhg(const struct nhop_object *nh) 747 { 748 return (nh->nh_flags & NHF_GATEWAY); 749 } 750 751 static int 752 newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop) 753 { 754 struct nexthop_grp *grp = NLA_DATA(attrs->nha_group); 755 int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp); 756 struct weightened_nhop *wn; 757 758 wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO); 759 if (wn == NULL) 760 return (ENOMEM); 761 762 for (int i = 0; i < count; i++) { 763 struct user_nhop *unhop; 764 unhop = nl_find_base_unhop(ctl, grp[i].id); 765 if (unhop == NULL) { 766 NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id); 767 free(wn, M_NETLINK); 768 return (ESRCH); 769 } else if (unhop->un_nhop_src == NULL) { 770 NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported", 771 grp[i].id); 772 free(wn, M_NETLINK); 773 return (ENOTSUP); 774 } else if (!eligible_nhg(unhop->un_nhop_src)) { 775 NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible", 776 grp[i].id); 777 free(wn, M_NETLINK); 778 return (ENOTSUP); 779 } 780 /* 781 * TODO: consider more rigid eligibility checks: 782 * restrict nexthops with the same gateway 783 */ 784 wn[i].nh = unhop->un_nhop_src; 785 wn[i].weight = grp[i].weight; 786 } 787 unhop->un_nhgrp_src = wn; 788 unhop->un_nhgrp_count = count; 789 return (0); 790 } 791 792 /* 793 * Sets nexthop @nh gateway specified by @gw. 794 * If gateway is IPv6 link-local, alters @gw to include scopeid equal to 795 * @ifp ifindex. 796 * Returns 0 on success or errno. 797 */ 798 int 799 nl_set_nexthop_gw(struct nhop_object *nh, struct sockaddr *gw, if_t ifp, 800 struct nl_pstate *npt) 801 { 802 #ifdef INET6 803 if (gw->sa_family == AF_INET6) { 804 struct sockaddr_in6 *gw6 = (struct sockaddr_in6 *)gw; 805 if (IN6_IS_ADDR_LINKLOCAL(&gw6->sin6_addr)) { 806 if (ifp == NULL) { 807 NLMSG_REPORT_ERR_MSG(npt, "interface not set"); 808 return (EINVAL); 809 } 810 in6_set_unicast_scopeid(&gw6->sin6_addr, if_getindex(ifp)); 811 } 812 } 813 #endif 814 nhop_set_gw(nh, gw, true); 815 return (0); 816 } 817 818 static int 819 newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop, struct nl_pstate *npt) 820 { 821 struct ifaddr *ifa = NULL; 822 struct nhop_object *nh; 823 int error; 824 825 if (!attrs->nha_blackhole) { 826 if (attrs->nha_gw == NULL) { 827 NLMSG_REPORT_ERR_MSG(npt, "missing NHA_GATEWAY"); 828 return (EINVAL); 829 } 830 if (attrs->nha_oif == NULL) { 831 NLMSG_REPORT_ERR_MSG(npt, "missing NHA_OIF"); 832 return (EINVAL); 833 } 834 if (ifa == NULL) 835 ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif); 836 if (ifa == NULL) { 837 NLMSG_REPORT_ERR_MSG(npt, "Unable to determine default source IP"); 838 return (EINVAL); 839 } 840 } 841 842 int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family; 843 844 nh = nhop_alloc(RT_DEFAULT_FIB, family); 845 if (nh == NULL) { 846 NL_LOG(LOG_DEBUG, "Unable to allocate nexthop"); 847 return (ENOMEM); 848 } 849 nhop_set_uidx(nh, attrs->nha_id); 850 nhop_set_origin(nh, attrs->nh_protocol); 851 852 if (attrs->nha_blackhole) 853 nhop_set_blackhole(nh, NHF_BLACKHOLE); 854 else { 855 error = nl_set_nexthop_gw(nh, attrs->nha_gw, attrs->nha_oif, npt); 856 if (error != 0) { 857 nhop_free(nh); 858 return (error); 859 } 860 nhop_set_transmit_ifp(nh, attrs->nha_oif); 861 nhop_set_src(nh, ifa); 862 } 863 864 error = nhop_get_unlinked(nh); 865 if (error != 0) { 866 NL_LOG(LOG_DEBUG, "unable to finalize nexthop"); 867 return (error); 868 } 869 870 IF_DEBUG_LEVEL(LOG_DEBUG2) { 871 char nhbuf[NHOP_PRINT_BUFSIZE]; 872 nhop_print_buf(nh, nhbuf, sizeof(nhbuf)); 873 NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf); 874 } 875 876 unhop->un_nhop_src = nh; 877 return (0); 878 } 879 880 static int 881 rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 882 struct nl_pstate *npt) 883 { 884 struct user_nhop *unhop; 885 int error; 886 887 if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops())) 888 return (ENOMEM); 889 struct unhop_ctl *ctl = V_un_ctl; 890 891 struct nl_parsed_nhop attrs = {}; 892 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 893 if (error != 0) 894 return (error); 895 896 /* 897 * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class 898 * citizen. 899 */ 900 if (attrs.nha_id == 0) { 901 attrs.nha_id = find_spare_uidx(ctl); 902 if (attrs.nha_id == 0) { 903 NL_LOG(LOG_DEBUG, "Unable to get spare uidx"); 904 return (ENOSPC); 905 } 906 } 907 908 NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? if_getindex(attrs.nha_oif) : 0); 909 910 unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); 911 if (unhop == NULL) { 912 NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop"); 913 return (ENOMEM); 914 } 915 unhop->un_idx = attrs.nha_id; 916 unhop->un_protocol = attrs.nh_protocol; 917 918 if (attrs.nha_group) 919 error = newnhg(ctl, &attrs, unhop); 920 else 921 error = newnhop(&attrs, unhop, npt); 922 923 if (error != 0) { 924 free(unhop, M_NETLINK); 925 return (error); 926 } 927 928 UN_WLOCK(ctl); 929 /* Check if uidx already exists */ 930 struct user_nhop *tmp = NULL; 931 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp); 932 if (tmp != NULL) { 933 UN_WUNLOCK(ctl); 934 NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id); 935 destroy_unhop(unhop); 936 return (EEXIST); 937 } 938 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop); 939 uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head); 940 UN_WUNLOCK(ctl); 941 942 /* Report addition of the next nexhop */ 943 struct netlink_walkargs wa = { 944 .hdr.nlmsg_pid = hdr->nlmsg_pid, 945 .hdr.nlmsg_seq = hdr->nlmsg_seq, 946 .hdr.nlmsg_flags = hdr->nlmsg_flags, 947 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, 948 }; 949 950 struct nl_writer nw = {}; 951 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) { 952 NL_LOG(LOG_DEBUG, "error allocating message writer"); 953 return (ENOMEM); 954 } 955 956 dump_unhop(unhop, &wa.hdr, &nw); 957 nlmsg_flush(&nw); 958 959 consider_resize(ctl, num_buckets_new); 960 961 return (0); 962 } 963 964 static int 965 rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 966 struct nl_pstate *npt) 967 { 968 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 969 int error; 970 971 if (__predict_false(ctl == NULL)) 972 return (ESRCH); 973 974 struct nl_parsed_nhop attrs = {}; 975 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 976 if (error != 0) 977 return (error); 978 979 if (attrs.nha_id == 0) { 980 NL_LOG(LOG_DEBUG, "NHA_ID not set"); 981 return (EINVAL); 982 } 983 984 error = delete_unhop(ctl, hdr, attrs.nha_id); 985 986 return (error); 987 } 988 989 static bool 990 match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop) 991 { 992 if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id) 993 return (false); 994 if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL) 995 return (false); 996 if (attrs->nha_oif != NULL && 997 (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif)) 998 return (false); 999 1000 return (true); 1001 } 1002 1003 static int 1004 rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 1005 struct nl_pstate *npt) 1006 { 1007 struct user_nhop *unhop; 1008 UN_TRACKER; 1009 int error; 1010 1011 struct nl_parsed_nhop attrs = {}; 1012 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 1013 if (error != 0) 1014 return (error); 1015 1016 struct netlink_walkargs wa = { 1017 .nw = npt->nw, 1018 .hdr.nlmsg_pid = hdr->nlmsg_pid, 1019 .hdr.nlmsg_seq = hdr->nlmsg_seq, 1020 .hdr.nlmsg_flags = hdr->nlmsg_flags, 1021 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, 1022 }; 1023 1024 if (attrs.nha_id != 0) { 1025 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 1026 struct user_nhop key = { .un_idx = attrs.nha_id }; 1027 1028 if (__predict_false(ctl == NULL)) 1029 return (ESRCH); 1030 1031 NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id); 1032 UN_RLOCK(ctl); 1033 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 1034 UN_RUNLOCK(ctl); 1035 1036 if (unhop == NULL) 1037 return (ESRCH); 1038 dump_unhop(unhop, &wa.hdr, wa.nw); 1039 return (0); 1040 } else if (attrs.nhaf_kid != 0) { 1041 struct nhop_iter iter = { 1042 .fibnum = attrs.nhaf_table, 1043 .family = attrs.nhaf_family, 1044 }; 1045 int error = ESRCH; 1046 1047 NL_LOG(LOG_DEBUG2, "START table %u family %d", attrs.nhaf_table, attrs.nhaf_family); 1048 for (struct nhop_object *nh = nhops_iter_start(&iter); nh; 1049 nh = nhops_iter_next(&iter)) { 1050 NL_LOG(LOG_DEBUG3, "get %u", nhop_get_idx(nh)); 1051 if (nhop_get_idx(nh) == attrs.nhaf_kid) { 1052 dump_nhop(nh, 0, &wa.hdr, wa.nw); 1053 error = 0; 1054 break; 1055 } 1056 } 1057 nhops_iter_stop(&iter); 1058 return (error); 1059 } else if (attrs.nhaf_knhops) { 1060 struct nhop_iter iter = { 1061 .fibnum = attrs.nhaf_table, 1062 .family = attrs.nhaf_family, 1063 }; 1064 1065 NL_LOG(LOG_DEBUG2, "DUMP table %u family %d", attrs.nhaf_table, attrs.nhaf_family); 1066 wa.hdr.nlmsg_flags |= NLM_F_MULTI; 1067 for (struct nhop_object *nh = nhops_iter_start(&iter); nh; 1068 nh = nhops_iter_next(&iter)) { 1069 dump_nhop(nh, 0, &wa.hdr, wa.nw); 1070 } 1071 nhops_iter_stop(&iter); 1072 } else { 1073 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 1074 1075 if (__predict_false(ctl == NULL)) 1076 return (ESRCH); 1077 1078 NL_LOG(LOG_DEBUG2, "DUMP unhops"); 1079 UN_RLOCK(ctl); 1080 wa.hdr.nlmsg_flags |= NLM_F_MULTI; 1081 CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) { 1082 if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop)) 1083 dump_unhop(unhop, &wa.hdr, wa.nw); 1084 } CHT_SLIST_FOREACH_END; 1085 UN_RUNLOCK(ctl); 1086 } 1087 1088 if (wa.error == 0) { 1089 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) 1090 return (ENOMEM); 1091 } 1092 return (0); 1093 } 1094 1095 static const struct rtnl_cmd_handler cmd_handlers[] = { 1096 { 1097 .cmd = NL_RTM_NEWNEXTHOP, 1098 .name = "RTM_NEWNEXTHOP", 1099 .cb = &rtnl_handle_newnhop, 1100 .priv = PRIV_NET_ROUTE, 1101 }, 1102 { 1103 .cmd = NL_RTM_DELNEXTHOP, 1104 .name = "RTM_DELNEXTHOP", 1105 .cb = &rtnl_handle_delnhop, 1106 .priv = PRIV_NET_ROUTE, 1107 }, 1108 { 1109 .cmd = NL_RTM_GETNEXTHOP, 1110 .name = "RTM_GETNEXTHOP", 1111 .cb = &rtnl_handle_getnhop, 1112 } 1113 }; 1114 1115 static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser, &nh_fbsd_parser }; 1116 1117 void 1118 rtnl_nexthops_init(void) 1119 { 1120 NL_VERIFY_PARSERS(all_parsers); 1121 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); 1122 } 1123