1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_netlink.h" 29 30 #include <sys/cdefs.h> 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_route.h" 34 #include <sys/types.h> 35 #include <sys/ck.h> 36 #include <sys/epoch.h> 37 #include <sys/kernel.h> 38 #include <sys/malloc.h> 39 #include <sys/rmlock.h> 40 #include <sys/socket.h> 41 42 #include <net/if.h> 43 #include <net/route.h> 44 #include <net/route/nhop.h> 45 #include <net/route/nhop_utils.h> 46 47 #include <net/route/route_ctl.h> 48 #include <net/route/route_var.h> 49 #include <netinet6/scope6_var.h> 50 #include <netlink/netlink.h> 51 #include <netlink/netlink_ctl.h> 52 #include <netlink/netlink_route.h> 53 #include <netlink/route/route_var.h> 54 55 #define DEBUG_MOD_NAME nl_nhop 56 #define DEBUG_MAX_LEVEL LOG_DEBUG3 57 #include <netlink/netlink_debug.h> 58 _DECLARE_DEBUG(LOG_INFO); 59 60 /* 61 * This file contains the logic to maintain kernel nexthops and 62 * nexhop groups based om the data provided by the user. 63 * 64 * Kernel stores (nearly) all of the routing data in the nexthops, 65 * including the prefix-specific flags (NHF_HOST and NHF_DEFAULT). 66 * 67 * Netlink API provides higher-level abstraction for the user. Each 68 * user-created nexthop may map to multiple kernel nexthops. 69 * 70 * The following variations require separate kernel nexthop to be 71 * created: 72 * * prefix flags (NHF_HOST, NHF_DEFAULT) 73 * * using IPv6 gateway for IPv4 routes 74 * * different fibnum 75 * 76 * These kernel nexthops have the lifetime bound to the lifetime of 77 * the user_nhop object. They are not collected until user requests 78 * to delete the created user_nhop. 79 * 80 */ 81 struct user_nhop { 82 uint32_t un_idx; /* Userland-provided index */ 83 uint32_t un_fibfam; /* fibnum+af(as highest byte) */ 84 uint8_t un_protocol; /* protocol that install the record */ 85 struct nhop_object *un_nhop; /* "production" nexthop */ 86 struct nhop_object *un_nhop_src; /* nexthop to copy from */ 87 struct weightened_nhop *un_nhgrp_src; /* nexthops for nhg */ 88 uint32_t un_nhgrp_count; /* number of nexthops */ 89 struct user_nhop *un_next; /* next item in hash chain */ 90 struct user_nhop *un_nextchild; /* master -> children */ 91 struct epoch_context un_epoch_ctx; /* epoch ctl helper */ 92 }; 93 94 /* produce hash value for an object */ 95 #define unhop_hash_obj(_obj) (hash_unhop(_obj)) 96 /* compare two objects */ 97 #define unhop_cmp(_one, _two) (cmp_unhop(_one, _two)) 98 /* next object accessor */ 99 #define unhop_next(_obj) (_obj)->un_next 100 101 CHT_SLIST_DEFINE(unhop, struct user_nhop); 102 103 struct unhop_ctl { 104 struct unhop_head un_head; 105 struct rmlock un_lock; 106 }; 107 #define UN_LOCK_INIT(_ctl) rm_init(&(_ctl)->un_lock, "unhop_ctl") 108 #define UN_TRACKER struct rm_priotracker un_tracker 109 #define UN_RLOCK(_ctl) rm_rlock(&((_ctl)->un_lock), &un_tracker) 110 #define UN_RUNLOCK(_ctl) rm_runlock(&((_ctl)->un_lock), &un_tracker) 111 112 #define UN_WLOCK(_ctl) rm_wlock(&(_ctl)->un_lock); 113 #define UN_WUNLOCK(_ctl) rm_wunlock(&(_ctl)->un_lock); 114 115 VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL; 116 #define V_un_ctl VNET(un_ctl) 117 118 static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size); 119 static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b); 120 static unsigned int hash_unhop(const struct user_nhop *obj); 121 122 static void destroy_unhop(struct user_nhop *unhop); 123 static struct nhop_object *clone_unhop(const struct user_nhop *unhop, 124 uint32_t fibnum, int family, int nh_flags); 125 126 static int 127 cmp_unhop(const struct user_nhop *a, const struct user_nhop *b) 128 { 129 return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam); 130 } 131 132 /* 133 * Hash callback: calculate hash of an object 134 */ 135 static unsigned int 136 hash_unhop(const struct user_nhop *obj) 137 { 138 return (obj->un_idx ^ obj->un_fibfam); 139 } 140 141 #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0) 142 143 /* 144 * Factory interface for creating matching kernel nexthops/nexthop groups 145 * 146 * @uidx: userland nexhop index used to create the nexthop 147 * @fibnum: fibnum nexthop will be used in 148 * @family: upper family nexthop will be used in 149 * @nh_flags: desired nexthop prefix flags 150 * @perror: pointer to store error to 151 * 152 * Returns referenced nexthop linked to @fibnum/@family rib on success. 153 */ 154 struct nhop_object * 155 nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, 156 int nh_flags, int *perror) 157 { 158 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 159 UN_TRACKER; 160 161 if (__predict_false(ctl == NULL)) 162 return (NULL); 163 164 struct user_nhop key= { 165 .un_idx = uidx, 166 .un_fibfam = fibnum | ((uint32_t)family) << 24, 167 }; 168 struct user_nhop *unhop; 169 170 nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT); 171 172 if (__predict_false(family == 0)) 173 return (NULL); 174 175 UN_RLOCK(ctl); 176 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 177 if (unhop != NULL) { 178 struct nhop_object *nh = unhop->un_nhop; 179 UN_RLOCK(ctl); 180 *perror = 0; 181 nhop_ref_any(nh); 182 return (nh); 183 } 184 185 /* 186 * Exact nexthop not found. Search for template nexthop to clone from. 187 */ 188 key.un_fibfam = 0; 189 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 190 if (unhop == NULL) { 191 UN_RUNLOCK(ctl); 192 *perror = ESRCH; 193 return (NULL); 194 } 195 196 UN_RUNLOCK(ctl); 197 198 /* Create entry to insert first */ 199 struct user_nhop *un_new, *un_tmp; 200 un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); 201 if (un_new == NULL) { 202 *perror = ENOMEM; 203 return (NULL); 204 } 205 un_new->un_idx = uidx; 206 un_new->un_fibfam = fibnum | ((uint32_t)family) << 24; 207 208 /* Relying on epoch to protect unhop here */ 209 un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags); 210 if (un_new->un_nhop == NULL) { 211 free(un_new, M_NETLINK); 212 *perror = ENOMEM; 213 return (NULL); 214 } 215 216 /* Insert back and report */ 217 UN_WLOCK(ctl); 218 219 /* First, find template record once again */ 220 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 221 if (unhop == NULL) { 222 /* Someone deleted the nexthop during the call */ 223 UN_WUNLOCK(ctl); 224 *perror = ESRCH; 225 destroy_unhop(un_new); 226 return (NULL); 227 } 228 229 /* Second, check the direct match */ 230 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp); 231 struct nhop_object *nh; 232 if (un_tmp != NULL) { 233 /* Another thread already created the desired nextop, use it */ 234 nh = un_tmp->un_nhop; 235 } else { 236 /* Finally, insert the new nexthop and link it to the primary */ 237 nh = un_new->un_nhop; 238 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new); 239 un_new->un_nextchild = unhop->un_nextchild; 240 unhop->un_nextchild = un_new; 241 un_new = NULL; 242 NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh); 243 } 244 245 UN_WUNLOCK(ctl); 246 247 if (un_new != NULL) 248 destroy_unhop(un_new); 249 250 *perror = 0; 251 nhop_ref_any(nh); 252 return (nh); 253 } 254 255 static struct user_nhop * 256 nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx) 257 { 258 struct user_nhop key= { .un_idx = uidx }; 259 struct user_nhop *unhop = NULL; 260 UN_TRACKER; 261 262 UN_RLOCK(ctl); 263 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 264 UN_RUNLOCK(ctl); 265 266 return (unhop); 267 } 268 269 #define MAX_STACK_NHOPS 4 270 static struct nhop_object * 271 clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags) 272 { 273 #ifdef ROUTE_MPATH 274 const struct weightened_nhop *wn; 275 struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS]; 276 uint32_t num_nhops; 277 #endif 278 struct nhop_object *nh = NULL; 279 int error; 280 281 if (unhop->un_nhop_src != NULL) { 282 IF_DEBUG_LEVEL(LOG_DEBUG2) { 283 char nhbuf[NHOP_PRINT_BUFSIZE]; 284 nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf)); 285 FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src, 286 "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum, 287 family, nh_flags); 288 } 289 struct nhop_object *nh; 290 nh = nhop_alloc(fibnum, AF_UNSPEC); 291 if (nh == NULL) 292 return (NULL); 293 nhop_copy(nh, unhop->un_nhop_src); 294 /* Check that nexthop gateway is compatible with the new family */ 295 if (!nhop_set_upper_family(nh, family)) { 296 nhop_free(nh); 297 return (NULL); 298 } 299 nhop_set_uidx(nh, unhop->un_idx); 300 nhop_set_pxtype_flag(nh, nh_flags); 301 return (nhop_get_nhop(nh, &error)); 302 } 303 #ifdef ROUTE_MPATH 304 wn = unhop->un_nhgrp_src; 305 num_nhops = unhop->un_nhgrp_count; 306 307 if (num_nhops > MAX_STACK_NHOPS) { 308 wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT); 309 if (wn_new == NULL) 310 return (NULL); 311 } else 312 wn_new = wn_base; 313 314 for (int i = 0; i < num_nhops; i++) { 315 uint32_t uidx = nhop_get_uidx(wn[i].nh); 316 MPASS(uidx != 0); 317 wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error); 318 if (error != 0) 319 break; 320 wn_new[i].weight = wn[i].weight; 321 } 322 323 if (error == 0) { 324 struct rib_head *rh = nhop_get_rh(wn_new[0].nh); 325 struct nhgrp_object *nhg; 326 327 error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg); 328 nh = (struct nhop_object *)nhg; 329 } 330 331 if (wn_new != wn_base) 332 free(wn_new, M_TEMP); 333 #endif 334 return (nh); 335 } 336 337 static void 338 destroy_unhop(struct user_nhop *unhop) 339 { 340 if (unhop->un_nhop != NULL) 341 nhop_free_any(unhop->un_nhop); 342 if (unhop->un_nhop_src != NULL) 343 nhop_free_any(unhop->un_nhop_src); 344 free(unhop, M_NETLINK); 345 } 346 347 static void 348 destroy_unhop_epoch(epoch_context_t ctx) 349 { 350 struct user_nhop *unhop; 351 352 unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx); 353 354 destroy_unhop(unhop); 355 } 356 357 static uint32_t 358 find_spare_uidx(struct unhop_ctl *ctl) 359 { 360 struct user_nhop *unhop, key = {}; 361 uint32_t uidx = 0; 362 UN_TRACKER; 363 364 UN_RLOCK(ctl); 365 /* This should return spare uid with 75% of 65k used in ~99/100 cases */ 366 for (int i = 0; i < 16; i++) { 367 key.un_idx = (arc4random() % 65536) + 65536 * 4; 368 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 369 if (unhop == NULL) { 370 uidx = key.un_idx; 371 break; 372 } 373 } 374 UN_RUNLOCK(ctl); 375 376 return (uidx); 377 } 378 379 380 /* 381 * Actual netlink code 382 */ 383 struct netlink_walkargs { 384 struct nl_writer *nw; 385 struct nlmsghdr hdr; 386 struct nlpcb *so; 387 int family; 388 int error; 389 int count; 390 int dumped; 391 }; 392 #define ENOMEM_IF_NULL(_v) if ((_v) == NULL) goto enomem 393 394 static bool 395 dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr, 396 struct nl_writer *nw) 397 { 398 399 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) 400 goto enomem; 401 402 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); 403 nhm->nh_family = AF_UNSPEC; 404 nhm->nh_scope = 0; 405 nhm->nh_protocol = unhop->un_protocol; 406 nhm->nh_flags = 0; 407 408 nlattr_add_u32(nw, NHA_ID, unhop->un_idx); 409 nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH); 410 411 struct weightened_nhop *wn = unhop->un_nhgrp_src; 412 uint32_t num_nhops = unhop->un_nhgrp_count; 413 /* TODO: a better API? */ 414 int nla_len = sizeof(struct nlattr); 415 nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp)); 416 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 417 if (nla == NULL) 418 goto enomem; 419 nla->nla_type = NHA_GROUP; 420 nla->nla_len = nla_len; 421 for (int i = 0; i < num_nhops; i++) { 422 struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i]; 423 grp->id = nhop_get_uidx(wn[i].nh); 424 grp->weight = wn[i].weight; 425 grp->resvd1 = 0; 426 grp->resvd2 = 0; 427 } 428 429 if (nlmsg_end(nw)) 430 return (true); 431 enomem: 432 NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory"); 433 nlmsg_abort(nw); 434 return (false); 435 } 436 437 static bool 438 dump_nhop(const struct nhop_object *nh, uint32_t uidx, struct nlmsghdr *hdr, 439 struct nl_writer *nw) 440 { 441 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) 442 goto enomem; 443 444 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); 445 ENOMEM_IF_NULL(nhm); 446 nhm->nh_family = nhop_get_neigh_family(nh); 447 nhm->nh_scope = 0; // XXX: what's that? 448 nhm->nh_protocol = nhop_get_origin(nh); 449 nhm->nh_flags = 0; 450 451 if (uidx != 0) 452 nlattr_add_u32(nw, NHA_ID, uidx); 453 if (nh->nh_flags & NHF_BLACKHOLE) { 454 nlattr_add_flag(nw, NHA_BLACKHOLE); 455 goto done; 456 } 457 nlattr_add_u32(nw, NHA_OIF, if_getindex(nh->nh_ifp)); 458 459 switch (nh->gw_sa.sa_family) { 460 #ifdef INET 461 case AF_INET: 462 nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 463 break; 464 #endif 465 #ifdef INET6 466 case AF_INET6: 467 { 468 struct in6_addr addr = nh->gw6_sa.sin6_addr; 469 in6_clearscope(&addr); 470 nlattr_add(nw, NHA_GATEWAY, 16, &addr); 471 break; 472 } 473 #endif 474 } 475 476 int off = nlattr_add_nested(nw, NHA_FREEBSD); 477 if (off != 0) { 478 nlattr_add_u32(nw, NHAF_AIF, if_getindex(nh->nh_aifp)); 479 480 if (uidx == 0) { 481 nlattr_add_u32(nw, NHAF_KID, nhop_get_idx(nh)); 482 nlattr_add_u32(nw, NHAF_FAMILY, nhop_get_upper_family(nh)); 483 nlattr_add_u32(nw, NHAF_TABLE, nhop_get_fibnum(nh)); 484 } 485 486 nlattr_set_len(nw, off); 487 } 488 489 done: 490 if (nlmsg_end(nw)) 491 return (true); 492 enomem: 493 nlmsg_abort(nw); 494 return (false); 495 } 496 497 static void 498 dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr, 499 struct nl_writer *nw) 500 { 501 if (unhop->un_nhop_src != NULL) 502 dump_nhop(unhop->un_nhop_src, unhop->un_idx, hdr, nw); 503 else 504 dump_nhgrp(unhop, hdr, nw); 505 } 506 507 static int 508 delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx) 509 { 510 struct user_nhop *unhop_ret, *unhop_base, *unhop_chain; 511 512 struct user_nhop key = { .un_idx = uidx }; 513 514 UN_WLOCK(ctl); 515 516 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base); 517 518 if (unhop_base != NULL) { 519 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret); 520 IF_DEBUG_LEVEL(LOG_DEBUG2) { 521 char nhbuf[NHOP_PRINT_BUFSIZE]; 522 nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf)); 523 FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop, 524 "removed base nhop %u: %s", uidx, nhbuf); 525 } 526 /* Unlink all child nexhops as well, keeping the chain intact */ 527 unhop_chain = unhop_base->un_nextchild; 528 while (unhop_chain != NULL) { 529 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain, 530 unhop_ret); 531 MPASS(unhop_chain == unhop_ret); 532 IF_DEBUG_LEVEL(LOG_DEBUG3) { 533 char nhbuf[NHOP_PRINT_BUFSIZE]; 534 nhop_print_buf_any(unhop_chain->un_nhop, 535 nhbuf, sizeof(nhbuf)); 536 FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop, 537 "removed child nhop %u: %s", uidx, nhbuf); 538 } 539 unhop_chain = unhop_chain->un_nextchild; 540 } 541 } 542 543 UN_WUNLOCK(ctl); 544 545 if (unhop_base == NULL) { 546 NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx); 547 return (ENOENT); 548 } 549 550 /* Report nexthop deletion */ 551 struct netlink_walkargs wa = { 552 .hdr.nlmsg_pid = hdr->nlmsg_pid, 553 .hdr.nlmsg_seq = hdr->nlmsg_seq, 554 .hdr.nlmsg_flags = hdr->nlmsg_flags, 555 .hdr.nlmsg_type = NL_RTM_DELNEXTHOP, 556 }; 557 558 struct nl_writer nw = {}; 559 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) { 560 NL_LOG(LOG_DEBUG, "error allocating message writer"); 561 return (ENOMEM); 562 } 563 564 dump_unhop(unhop_base, &wa.hdr, &nw); 565 nlmsg_flush(&nw); 566 567 while (unhop_base != NULL) { 568 unhop_chain = unhop_base->un_nextchild; 569 NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx); 570 unhop_base = unhop_chain; 571 } 572 573 return (0); 574 } 575 576 static void 577 consider_resize(struct unhop_ctl *ctl, uint32_t new_size) 578 { 579 void *new_ptr = NULL; 580 size_t alloc_size; 581 582 if (new_size == 0) 583 return; 584 585 if (new_size != 0) { 586 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size); 587 new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); 588 if (new_ptr == NULL) 589 return; 590 } 591 592 NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size); 593 UN_WLOCK(ctl); 594 if (new_ptr != NULL) { 595 CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size); 596 } 597 UN_WUNLOCK(ctl); 598 599 600 if (new_ptr != NULL) 601 free(new_ptr, M_NETLINK); 602 } 603 604 static bool __noinline 605 vnet_init_unhops(void) 606 { 607 uint32_t num_buckets = 16; 608 size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); 609 610 struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK, 611 M_NOWAIT | M_ZERO); 612 if (ctl == NULL) 613 return (false); 614 615 void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); 616 if (ptr == NULL) { 617 free(ctl, M_NETLINK); 618 return (false); 619 } 620 CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets); 621 UN_LOCK_INIT(ctl); 622 623 if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) { 624 free(ptr, M_NETLINK); 625 free(ctl, M_NETLINK); 626 } 627 628 if (atomic_load_ptr(&V_un_ctl) == NULL) 629 return (false); 630 631 NL_LOG(LOG_NOTICE, "UNHOPS init done"); 632 633 return (true); 634 } 635 636 static void 637 vnet_destroy_unhops(const void *unused __unused) 638 { 639 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 640 struct user_nhop *unhop, *tmp; 641 642 if (ctl == NULL) 643 return; 644 V_un_ctl = NULL; 645 646 /* Wait till all unhop users finish their reads */ 647 NET_EPOCH_WAIT(); 648 649 UN_WLOCK(ctl); 650 CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) { 651 destroy_unhop(unhop); 652 } CHT_SLIST_FOREACH_SAFE_END; 653 UN_WUNLOCK(ctl); 654 655 free(ctl->un_head.ptr, M_NETLINK); 656 free(ctl, M_NETLINK); 657 } 658 VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY, 659 vnet_destroy_unhops, NULL); 660 661 static int 662 nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 663 { 664 int error = 0; 665 666 /* Verify attribute correctness */ 667 struct nexthop_grp *grp = NLA_DATA(nla); 668 int data_len = NLA_DATA_LEN(nla); 669 670 int count = data_len / sizeof(*grp); 671 if (count == 0 || (count * sizeof(*grp) != data_len)) { 672 NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len); 673 return (EINVAL); 674 } 675 676 *((struct nlattr **)target) = nla; 677 return (error); 678 } 679 680 static void 681 set_scope6(struct sockaddr *sa, if_t ifp) 682 { 683 #ifdef INET6 684 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { 685 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; 686 687 if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) 688 in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); 689 } 690 #endif 691 } 692 693 struct nl_parsed_nhop { 694 uint32_t nha_id; 695 uint8_t nha_blackhole; 696 uint8_t nha_groups; 697 uint8_t nhaf_knhops; 698 uint8_t nhaf_family; 699 struct ifnet *nha_oif; 700 struct sockaddr *nha_gw; 701 struct nlattr *nha_group; 702 uint8_t nh_family; 703 uint8_t nh_protocol; 704 uint32_t nhaf_table; 705 uint32_t nhaf_kid; 706 uint32_t nhaf_aif; 707 }; 708 709 #define _IN(_field) offsetof(struct nhmsg, _field) 710 #define _OUT(_field) offsetof(struct nl_parsed_nhop, _field) 711 static struct nlattr_parser nla_p_nh_fbsd[] = { 712 { .type = NHAF_KNHOPS, .off = _OUT(nhaf_knhops), .cb = nlattr_get_flag }, 713 { .type = NHAF_TABLE, .off = _OUT(nhaf_table), .cb = nlattr_get_uint32 }, 714 { .type = NHAF_FAMILY, .off = _OUT(nhaf_family), .cb = nlattr_get_uint8 }, 715 { .type = NHAF_KID, .off = _OUT(nhaf_kid), .cb = nlattr_get_uint32 }, 716 { .type = NHAF_AIF, .off = _OUT(nhaf_aif), .cb = nlattr_get_uint32 }, 717 }; 718 NL_DECLARE_ATTR_PARSER(nh_fbsd_parser, nla_p_nh_fbsd); 719 720 static const struct nlfield_parser nlf_p_nh[] = { 721 { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 }, 722 { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 }, 723 }; 724 725 static const struct nlattr_parser nla_p_nh[] = { 726 { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 }, 727 { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg }, 728 { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag }, 729 { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp }, 730 { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip }, 731 { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag }, 732 { .type = NHA_FREEBSD, .arg = &nh_fbsd_parser, .cb = nlattr_get_nested }, 733 }; 734 #undef _IN 735 #undef _OUT 736 737 static bool 738 post_p_nh(void *_attrs, struct nl_pstate *npt) 739 { 740 struct nl_parsed_nhop *attrs = (struct nl_parsed_nhop *)_attrs; 741 742 set_scope6(attrs->nha_gw, attrs->nha_oif); 743 return (true); 744 } 745 NL_DECLARE_PARSER_EXT(nhmsg_parser, struct nhmsg, NULL, nlf_p_nh, nla_p_nh, post_p_nh); 746 747 static bool 748 eligible_nhg(const struct nhop_object *nh) 749 { 750 return (nh->nh_flags & NHF_GATEWAY); 751 } 752 753 static int 754 newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop) 755 { 756 struct nexthop_grp *grp = NLA_DATA(attrs->nha_group); 757 int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp); 758 struct weightened_nhop *wn; 759 760 wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO); 761 if (wn == NULL) 762 return (ENOMEM); 763 764 for (int i = 0; i < count; i++) { 765 struct user_nhop *unhop; 766 unhop = nl_find_base_unhop(ctl, grp[i].id); 767 if (unhop == NULL) { 768 NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id); 769 free(wn, M_NETLINK); 770 return (ESRCH); 771 } else if (unhop->un_nhop_src == NULL) { 772 NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported", 773 grp[i].id); 774 free(wn, M_NETLINK); 775 return (ENOTSUP); 776 } else if (!eligible_nhg(unhop->un_nhop_src)) { 777 NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible", 778 grp[i].id); 779 free(wn, M_NETLINK); 780 return (ENOTSUP); 781 } 782 /* 783 * TODO: consider more rigid eligibility checks: 784 * restrict nexthops with the same gateway 785 */ 786 wn[i].nh = unhop->un_nhop_src; 787 wn[i].weight = grp[i].weight; 788 } 789 unhop->un_nhgrp_src = wn; 790 unhop->un_nhgrp_count = count; 791 return (0); 792 } 793 794 /* 795 * Sets nexthop @nh gateway specified by @gw. 796 * If gateway is IPv6 link-local, alters @gw to include scopeid equal to 797 * @ifp ifindex. 798 * Returns 0 on success or errno. 799 */ 800 int 801 nl_set_nexthop_gw(struct nhop_object *nh, struct sockaddr *gw, if_t ifp, 802 struct nl_pstate *npt) 803 { 804 #ifdef INET6 805 if (gw->sa_family == AF_INET6) { 806 struct sockaddr_in6 *gw6 = (struct sockaddr_in6 *)gw; 807 if (IN6_IS_ADDR_LINKLOCAL(&gw6->sin6_addr)) { 808 if (ifp == NULL) { 809 NLMSG_REPORT_ERR_MSG(npt, "interface not set"); 810 return (EINVAL); 811 } 812 in6_set_unicast_scopeid(&gw6->sin6_addr, if_getindex(ifp)); 813 } 814 } 815 #endif 816 nhop_set_gw(nh, gw, true); 817 return (0); 818 } 819 820 static int 821 newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop, struct nl_pstate *npt) 822 { 823 struct ifaddr *ifa = NULL; 824 struct nhop_object *nh; 825 int error; 826 827 if (!attrs->nha_blackhole) { 828 if (attrs->nha_gw == NULL) { 829 NLMSG_REPORT_ERR_MSG(npt, "missing NHA_GATEWAY"); 830 return (EINVAL); 831 } 832 if (attrs->nha_oif == NULL) { 833 NLMSG_REPORT_ERR_MSG(npt, "missing NHA_OIF"); 834 return (EINVAL); 835 } 836 if (ifa == NULL) 837 ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif); 838 if (ifa == NULL) { 839 NLMSG_REPORT_ERR_MSG(npt, "Unable to determine default source IP"); 840 return (EINVAL); 841 } 842 } 843 844 int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family; 845 846 nh = nhop_alloc(RT_DEFAULT_FIB, family); 847 if (nh == NULL) { 848 NL_LOG(LOG_DEBUG, "Unable to allocate nexthop"); 849 return (ENOMEM); 850 } 851 nhop_set_uidx(nh, attrs->nha_id); 852 nhop_set_origin(nh, attrs->nh_protocol); 853 854 if (attrs->nha_blackhole) 855 nhop_set_blackhole(nh, NHF_BLACKHOLE); 856 else { 857 error = nl_set_nexthop_gw(nh, attrs->nha_gw, attrs->nha_oif, npt); 858 if (error != 0) { 859 nhop_free(nh); 860 return (error); 861 } 862 nhop_set_transmit_ifp(nh, attrs->nha_oif); 863 nhop_set_src(nh, ifa); 864 } 865 866 error = nhop_get_unlinked(nh); 867 if (error != 0) { 868 NL_LOG(LOG_DEBUG, "unable to finalize nexthop"); 869 return (error); 870 } 871 872 IF_DEBUG_LEVEL(LOG_DEBUG2) { 873 char nhbuf[NHOP_PRINT_BUFSIZE]; 874 nhop_print_buf(nh, nhbuf, sizeof(nhbuf)); 875 NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf); 876 } 877 878 unhop->un_nhop_src = nh; 879 return (0); 880 } 881 882 static int 883 rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 884 struct nl_pstate *npt) 885 { 886 struct user_nhop *unhop; 887 int error; 888 889 if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops())) 890 return (ENOMEM); 891 struct unhop_ctl *ctl = V_un_ctl; 892 893 struct nl_parsed_nhop attrs = {}; 894 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 895 if (error != 0) 896 return (error); 897 898 /* 899 * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class 900 * citizen. 901 */ 902 if (attrs.nha_id == 0) { 903 attrs.nha_id = find_spare_uidx(ctl); 904 if (attrs.nha_id == 0) { 905 NL_LOG(LOG_DEBUG, "Unable to get spare uidx"); 906 return (ENOSPC); 907 } 908 } 909 910 NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? if_getindex(attrs.nha_oif) : 0); 911 912 unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); 913 if (unhop == NULL) { 914 NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop"); 915 return (ENOMEM); 916 } 917 unhop->un_idx = attrs.nha_id; 918 unhop->un_protocol = attrs.nh_protocol; 919 920 if (attrs.nha_group) 921 error = newnhg(ctl, &attrs, unhop); 922 else 923 error = newnhop(&attrs, unhop, npt); 924 925 if (error != 0) { 926 free(unhop, M_NETLINK); 927 return (error); 928 } 929 930 UN_WLOCK(ctl); 931 /* Check if uidx already exists */ 932 struct user_nhop *tmp = NULL; 933 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp); 934 if (tmp != NULL) { 935 UN_WUNLOCK(ctl); 936 NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id); 937 destroy_unhop(unhop); 938 return (EEXIST); 939 } 940 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop); 941 uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head); 942 UN_WUNLOCK(ctl); 943 944 /* Report addition of the next nexhop */ 945 struct netlink_walkargs wa = { 946 .hdr.nlmsg_pid = hdr->nlmsg_pid, 947 .hdr.nlmsg_seq = hdr->nlmsg_seq, 948 .hdr.nlmsg_flags = hdr->nlmsg_flags, 949 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, 950 }; 951 952 struct nl_writer nw = {}; 953 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) { 954 NL_LOG(LOG_DEBUG, "error allocating message writer"); 955 return (ENOMEM); 956 } 957 958 dump_unhop(unhop, &wa.hdr, &nw); 959 nlmsg_flush(&nw); 960 961 consider_resize(ctl, num_buckets_new); 962 963 return (0); 964 } 965 966 static int 967 rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 968 struct nl_pstate *npt) 969 { 970 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 971 int error; 972 973 if (__predict_false(ctl == NULL)) 974 return (ESRCH); 975 976 struct nl_parsed_nhop attrs = {}; 977 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 978 if (error != 0) 979 return (error); 980 981 if (attrs.nha_id == 0) { 982 NL_LOG(LOG_DEBUG, "NHA_ID not set"); 983 return (EINVAL); 984 } 985 986 error = delete_unhop(ctl, hdr, attrs.nha_id); 987 988 return (error); 989 } 990 991 static bool 992 match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop) 993 { 994 if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id) 995 return (false); 996 if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL) 997 return (false); 998 if (attrs->nha_oif != NULL && 999 (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif)) 1000 return (false); 1001 1002 return (true); 1003 } 1004 1005 static int 1006 rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 1007 struct nl_pstate *npt) 1008 { 1009 struct user_nhop *unhop; 1010 UN_TRACKER; 1011 int error; 1012 1013 struct nl_parsed_nhop attrs = {}; 1014 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 1015 if (error != 0) 1016 return (error); 1017 1018 struct netlink_walkargs wa = { 1019 .nw = npt->nw, 1020 .hdr.nlmsg_pid = hdr->nlmsg_pid, 1021 .hdr.nlmsg_seq = hdr->nlmsg_seq, 1022 .hdr.nlmsg_flags = hdr->nlmsg_flags, 1023 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, 1024 }; 1025 1026 if (attrs.nha_id != 0) { 1027 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 1028 struct user_nhop key = { .un_idx = attrs.nha_id }; 1029 1030 if (__predict_false(ctl == NULL)) 1031 return (ESRCH); 1032 1033 NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id); 1034 UN_RLOCK(ctl); 1035 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 1036 UN_RUNLOCK(ctl); 1037 1038 if (unhop == NULL) 1039 return (ESRCH); 1040 dump_unhop(unhop, &wa.hdr, wa.nw); 1041 return (0); 1042 } else if (attrs.nhaf_kid != 0) { 1043 struct nhop_iter iter = { 1044 .fibnum = attrs.nhaf_table, 1045 .family = attrs.nhaf_family, 1046 }; 1047 int error = ESRCH; 1048 1049 NL_LOG(LOG_DEBUG2, "START table %u family %d", attrs.nhaf_table, attrs.nhaf_family); 1050 for (struct nhop_object *nh = nhops_iter_start(&iter); nh; 1051 nh = nhops_iter_next(&iter)) { 1052 NL_LOG(LOG_DEBUG3, "get %u", nhop_get_idx(nh)); 1053 if (nhop_get_idx(nh) == attrs.nhaf_kid) { 1054 dump_nhop(nh, 0, &wa.hdr, wa.nw); 1055 error = 0; 1056 break; 1057 } 1058 } 1059 nhops_iter_stop(&iter); 1060 return (error); 1061 } else if (attrs.nhaf_knhops) { 1062 struct nhop_iter iter = { 1063 .fibnum = attrs.nhaf_table, 1064 .family = attrs.nhaf_family, 1065 }; 1066 1067 NL_LOG(LOG_DEBUG2, "DUMP table %u family %d", attrs.nhaf_table, attrs.nhaf_family); 1068 wa.hdr.nlmsg_flags |= NLM_F_MULTI; 1069 for (struct nhop_object *nh = nhops_iter_start(&iter); nh; 1070 nh = nhops_iter_next(&iter)) { 1071 dump_nhop(nh, 0, &wa.hdr, wa.nw); 1072 } 1073 nhops_iter_stop(&iter); 1074 } else { 1075 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 1076 1077 if (__predict_false(ctl == NULL)) 1078 return (ESRCH); 1079 1080 NL_LOG(LOG_DEBUG2, "DUMP unhops"); 1081 UN_RLOCK(ctl); 1082 wa.hdr.nlmsg_flags |= NLM_F_MULTI; 1083 CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) { 1084 if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop)) 1085 dump_unhop(unhop, &wa.hdr, wa.nw); 1086 } CHT_SLIST_FOREACH_END; 1087 UN_RUNLOCK(ctl); 1088 } 1089 1090 if (wa.error == 0) { 1091 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) 1092 return (ENOMEM); 1093 } 1094 return (0); 1095 } 1096 1097 static const struct rtnl_cmd_handler cmd_handlers[] = { 1098 { 1099 .cmd = NL_RTM_NEWNEXTHOP, 1100 .name = "RTM_NEWNEXTHOP", 1101 .cb = &rtnl_handle_newnhop, 1102 .priv = PRIV_NET_ROUTE, 1103 }, 1104 { 1105 .cmd = NL_RTM_DELNEXTHOP, 1106 .name = "RTM_DELNEXTHOP", 1107 .cb = &rtnl_handle_delnhop, 1108 .priv = PRIV_NET_ROUTE, 1109 }, 1110 { 1111 .cmd = NL_RTM_GETNEXTHOP, 1112 .name = "RTM_GETNEXTHOP", 1113 .cb = &rtnl_handle_getnhop, 1114 } 1115 }; 1116 1117 static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser, &nh_fbsd_parser }; 1118 1119 void 1120 rtnl_nexthops_init(void) 1121 { 1122 NL_VERIFY_PARSERS(all_parsers); 1123 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); 1124 } 1125