1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022 Alexander V. Chernikov <melifaro@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include "opt_netlink.h" 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 #include "opt_route.h" 35 #include <sys/types.h> 36 #include <sys/ck.h> 37 #include <sys/epoch.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> 40 #include <sys/rmlock.h> 41 #include <sys/socket.h> 42 43 #include <net/if.h> 44 #include <net/route.h> 45 #include <net/route/nhop.h> 46 #include <net/route/nhop_utils.h> 47 48 #include <net/route/route_ctl.h> 49 #include <net/route/route_var.h> 50 #include <netinet6/scope6_var.h> 51 #include <netlink/netlink.h> 52 #include <netlink/netlink_ctl.h> 53 #include <netlink/netlink_route.h> 54 #include <netlink/route/route_var.h> 55 56 #define DEBUG_MOD_NAME nl_nhop 57 #define DEBUG_MAX_LEVEL LOG_DEBUG3 58 #include <netlink/netlink_debug.h> 59 _DECLARE_DEBUG(LOG_INFO); 60 61 /* 62 * This file contains the logic to maintain kernel nexthops and 63 * nexhop groups based om the data provided by the user. 64 * 65 * Kernel stores (nearly) all of the routing data in the nexthops, 66 * including the prefix-specific flags (NHF_HOST and NHF_DEFAULT). 67 * 68 * Netlink API provides higher-level abstraction for the user. Each 69 * user-created nexthop may map to multiple kernel nexthops. 70 * 71 * The following variations require separate kernel nexthop to be 72 * created: 73 * * prefix flags (NHF_HOST, NHF_DEFAULT) 74 * * using IPv6 gateway for IPv4 routes 75 * * different fibnum 76 * 77 * These kernel nexthops have the lifetime bound to the lifetime of 78 * the user_nhop object. They are not collected until user requests 79 * to delete the created user_nhop. 80 * 81 */ 82 struct user_nhop { 83 uint32_t un_idx; /* Userland-provided index */ 84 uint32_t un_fibfam; /* fibnum+af(as highest byte) */ 85 uint8_t un_protocol; /* protocol that install the record */ 86 struct nhop_object *un_nhop; /* "production" nexthop */ 87 struct nhop_object *un_nhop_src; /* nexthop to copy from */ 88 struct weightened_nhop *un_nhgrp_src; /* nexthops for nhg */ 89 uint32_t un_nhgrp_count; /* number of nexthops */ 90 struct user_nhop *un_next; /* next item in hash chain */ 91 struct user_nhop *un_nextchild; /* master -> children */ 92 struct epoch_context un_epoch_ctx; /* epoch ctl helper */ 93 }; 94 95 /* produce hash value for an object */ 96 #define unhop_hash_obj(_obj) (hash_unhop(_obj)) 97 /* compare two objects */ 98 #define unhop_cmp(_one, _two) (cmp_unhop(_one, _two)) 99 /* next object accessor */ 100 #define unhop_next(_obj) (_obj)->un_next 101 102 CHT_SLIST_DEFINE(unhop, struct user_nhop); 103 104 struct unhop_ctl { 105 struct unhop_head un_head; 106 struct rmlock un_lock; 107 }; 108 #define UN_LOCK_INIT(_ctl) rm_init(&(_ctl)->un_lock, "unhop_ctl") 109 #define UN_TRACKER struct rm_priotracker un_tracker 110 #define UN_RLOCK(_ctl) rm_rlock(&((_ctl)->un_lock), &un_tracker) 111 #define UN_RUNLOCK(_ctl) rm_runlock(&((_ctl)->un_lock), &un_tracker) 112 113 #define UN_WLOCK(_ctl) rm_wlock(&(_ctl)->un_lock); 114 #define UN_WUNLOCK(_ctl) rm_wunlock(&(_ctl)->un_lock); 115 116 VNET_DEFINE_STATIC(struct unhop_ctl *, un_ctl) = NULL; 117 #define V_un_ctl VNET(un_ctl) 118 119 static void consider_resize(struct unhop_ctl *ctl, uint32_t new_size); 120 static int cmp_unhop(const struct user_nhop *a, const struct user_nhop *b); 121 static unsigned int hash_unhop(const struct user_nhop *obj); 122 123 static void destroy_unhop(struct user_nhop *unhop); 124 static struct nhop_object *clone_unhop(const struct user_nhop *unhop, 125 uint32_t fibnum, int family, int nh_flags); 126 127 static int 128 cmp_unhop(const struct user_nhop *a, const struct user_nhop *b) 129 { 130 return (a->un_idx == b->un_idx && a->un_fibfam == b->un_fibfam); 131 } 132 133 /* 134 * Hash callback: calculate hash of an object 135 */ 136 static unsigned int 137 hash_unhop(const struct user_nhop *obj) 138 { 139 return (obj->un_idx ^ obj->un_fibfam); 140 } 141 142 #define UNHOP_IS_MASTER(_unhop) ((_unhop)->un_fibfam == 0) 143 144 /* 145 * Factory interface for creating matching kernel nexthops/nexthop groups 146 * 147 * @uidx: userland nexhop index used to create the nexthop 148 * @fibnum: fibnum nexthop will be used in 149 * @family: upper family nexthop will be used in 150 * @nh_flags: desired nexthop prefix flags 151 * @perror: pointer to store error to 152 * 153 * Returns referenced nexthop linked to @fibnum/@family rib on success. 154 */ 155 struct nhop_object * 156 nl_find_nhop(uint32_t fibnum, int family, uint32_t uidx, 157 int nh_flags, int *perror) 158 { 159 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 160 UN_TRACKER; 161 162 if (__predict_false(ctl == NULL)) 163 return (NULL); 164 165 struct user_nhop key= { 166 .un_idx = uidx, 167 .un_fibfam = fibnum | ((uint32_t)family) << 24, 168 }; 169 struct user_nhop *unhop; 170 171 nh_flags = nh_flags & (NHF_HOST | NHF_DEFAULT); 172 173 if (__predict_false(family == 0)) 174 return (NULL); 175 176 UN_RLOCK(ctl); 177 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 178 if (unhop != NULL) { 179 struct nhop_object *nh = unhop->un_nhop; 180 UN_RLOCK(ctl); 181 *perror = 0; 182 nhop_ref_any(nh); 183 return (nh); 184 } 185 186 /* 187 * Exact nexthop not found. Search for template nexthop to clone from. 188 */ 189 key.un_fibfam = 0; 190 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 191 if (unhop == NULL) { 192 UN_RUNLOCK(ctl); 193 *perror = ESRCH; 194 return (NULL); 195 } 196 197 UN_RUNLOCK(ctl); 198 199 /* Create entry to insert first */ 200 struct user_nhop *un_new, *un_tmp; 201 un_new = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); 202 if (un_new == NULL) { 203 *perror = ENOMEM; 204 return (NULL); 205 } 206 un_new->un_idx = uidx; 207 un_new->un_fibfam = fibnum | ((uint32_t)family) << 24; 208 209 /* Relying on epoch to protect unhop here */ 210 un_new->un_nhop = clone_unhop(unhop, fibnum, family, nh_flags); 211 if (un_new->un_nhop == NULL) { 212 free(un_new, M_NETLINK); 213 *perror = ENOMEM; 214 return (NULL); 215 } 216 217 /* Insert back and report */ 218 UN_WLOCK(ctl); 219 220 /* First, find template record once again */ 221 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 222 if (unhop == NULL) { 223 /* Someone deleted the nexthop during the call */ 224 UN_WUNLOCK(ctl); 225 *perror = ESRCH; 226 destroy_unhop(un_new); 227 return (NULL); 228 } 229 230 /* Second, check the direct match */ 231 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, un_new, un_tmp); 232 struct nhop_object *nh; 233 if (un_tmp != NULL) { 234 /* Another thread already created the desired nextop, use it */ 235 nh = un_tmp->un_nhop; 236 } else { 237 /* Finally, insert the new nexthop and link it to the primary */ 238 nh = un_new->un_nhop; 239 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, un_new); 240 un_new->un_nextchild = unhop->un_nextchild; 241 unhop->un_nextchild = un_new; 242 un_new = NULL; 243 NL_LOG(LOG_DEBUG2, "linked cloned nexthop %p", nh); 244 } 245 246 UN_WUNLOCK(ctl); 247 248 if (un_new != NULL) 249 destroy_unhop(un_new); 250 251 *perror = 0; 252 nhop_ref_any(nh); 253 return (nh); 254 } 255 256 static struct user_nhop * 257 nl_find_base_unhop(struct unhop_ctl *ctl, uint32_t uidx) 258 { 259 struct user_nhop key= { .un_idx = uidx }; 260 struct user_nhop *unhop = NULL; 261 UN_TRACKER; 262 263 UN_RLOCK(ctl); 264 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 265 UN_RUNLOCK(ctl); 266 267 return (unhop); 268 } 269 270 #define MAX_STACK_NHOPS 4 271 static struct nhop_object * 272 clone_unhop(const struct user_nhop *unhop, uint32_t fibnum, int family, int nh_flags) 273 { 274 #ifdef ROUTE_MPATH 275 const struct weightened_nhop *wn; 276 struct weightened_nhop *wn_new, wn_base[MAX_STACK_NHOPS]; 277 uint32_t num_nhops; 278 #endif 279 struct nhop_object *nh = NULL; 280 int error; 281 282 if (unhop->un_nhop_src != NULL) { 283 IF_DEBUG_LEVEL(LOG_DEBUG2) { 284 char nhbuf[NHOP_PRINT_BUFSIZE]; 285 nhop_print_buf_any(unhop->un_nhop_src, nhbuf, sizeof(nhbuf)); 286 FIB_NH_LOG(LOG_DEBUG2, unhop->un_nhop_src, 287 "cloning nhop %s -> %u.%u flags 0x%X", nhbuf, fibnum, 288 family, nh_flags); 289 } 290 struct nhop_object *nh; 291 nh = nhop_alloc(fibnum, AF_UNSPEC); 292 if (nh == NULL) 293 return (NULL); 294 nhop_copy(nh, unhop->un_nhop_src); 295 /* Check that nexthop gateway is compatible with the new family */ 296 if (!nhop_set_upper_family(nh, family)) { 297 nhop_free(nh); 298 return (NULL); 299 } 300 nhop_set_uidx(nh, unhop->un_idx); 301 nhop_set_pxtype_flag(nh, nh_flags); 302 return (nhop_get_nhop(nh, &error)); 303 } 304 #ifdef ROUTE_MPATH 305 wn = unhop->un_nhgrp_src; 306 num_nhops = unhop->un_nhgrp_count; 307 308 if (num_nhops > MAX_STACK_NHOPS) { 309 wn_new = malloc(num_nhops * sizeof(struct weightened_nhop), M_TEMP, M_NOWAIT); 310 if (wn_new == NULL) 311 return (NULL); 312 } else 313 wn_new = wn_base; 314 315 for (int i = 0; i < num_nhops; i++) { 316 uint32_t uidx = nhop_get_uidx(wn[i].nh); 317 MPASS(uidx != 0); 318 wn_new[i].nh = nl_find_nhop(fibnum, family, uidx, nh_flags, &error); 319 if (error != 0) 320 break; 321 wn_new[i].weight = wn[i].weight; 322 } 323 324 if (error == 0) { 325 struct rib_head *rh = nhop_get_rh(wn_new[0].nh); 326 struct nhgrp_object *nhg; 327 328 error = nhgrp_get_group(rh, wn_new, num_nhops, unhop->un_idx, &nhg); 329 nh = (struct nhop_object *)nhg; 330 } 331 332 if (wn_new != wn_base) 333 free(wn_new, M_TEMP); 334 #endif 335 return (nh); 336 } 337 338 static void 339 destroy_unhop(struct user_nhop *unhop) 340 { 341 if (unhop->un_nhop != NULL) 342 nhop_free_any(unhop->un_nhop); 343 if (unhop->un_nhop_src != NULL) 344 nhop_free_any(unhop->un_nhop_src); 345 free(unhop, M_NETLINK); 346 } 347 348 static void 349 destroy_unhop_epoch(epoch_context_t ctx) 350 { 351 struct user_nhop *unhop; 352 353 unhop = __containerof(ctx, struct user_nhop, un_epoch_ctx); 354 355 destroy_unhop(unhop); 356 } 357 358 static uint32_t 359 find_spare_uidx(struct unhop_ctl *ctl) 360 { 361 struct user_nhop *unhop, key = {}; 362 uint32_t uidx = 0; 363 UN_TRACKER; 364 365 UN_RLOCK(ctl); 366 /* This should return spare uid with 75% of 65k used in ~99/100 cases */ 367 for (int i = 0; i < 16; i++) { 368 key.un_idx = (arc4random() % 65536) + 65536 * 4; 369 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 370 if (unhop == NULL) { 371 uidx = key.un_idx; 372 break; 373 } 374 } 375 UN_RUNLOCK(ctl); 376 377 return (uidx); 378 } 379 380 381 /* 382 * Actual netlink code 383 */ 384 struct netlink_walkargs { 385 struct nl_writer *nw; 386 struct nlmsghdr hdr; 387 struct nlpcb *so; 388 int family; 389 int error; 390 int count; 391 int dumped; 392 }; 393 #define ENOMEM_IF_NULL(_v) if ((_v) == NULL) goto enomem 394 395 static bool 396 dump_nhgrp(const struct user_nhop *unhop, struct nlmsghdr *hdr, 397 struct nl_writer *nw) 398 { 399 400 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) 401 goto enomem; 402 403 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); 404 nhm->nh_family = AF_UNSPEC; 405 nhm->nh_scope = 0; 406 nhm->nh_protocol = unhop->un_protocol; 407 nhm->nh_flags = 0; 408 409 nlattr_add_u32(nw, NHA_ID, unhop->un_idx); 410 nlattr_add_u16(nw, NHA_GROUP_TYPE, NEXTHOP_GRP_TYPE_MPATH); 411 412 struct weightened_nhop *wn = unhop->un_nhgrp_src; 413 uint32_t num_nhops = unhop->un_nhgrp_count; 414 /* TODO: a better API? */ 415 int nla_len = sizeof(struct nlattr); 416 nla_len += NETLINK_ALIGN(num_nhops * sizeof(struct nexthop_grp)); 417 struct nlattr *nla = nlmsg_reserve_data(nw, nla_len, struct nlattr); 418 if (nla == NULL) 419 goto enomem; 420 nla->nla_type = NHA_GROUP; 421 nla->nla_len = nla_len; 422 for (int i = 0; i < num_nhops; i++) { 423 struct nexthop_grp *grp = &((struct nexthop_grp *)(nla + 1))[i]; 424 grp->id = nhop_get_uidx(wn[i].nh); 425 grp->weight = wn[i].weight; 426 grp->resvd1 = 0; 427 grp->resvd2 = 0; 428 } 429 430 if (nlmsg_end(nw)) 431 return (true); 432 enomem: 433 NL_LOG(LOG_DEBUG, "error: unable to allocate attribute memory"); 434 nlmsg_abort(nw); 435 return (false); 436 } 437 438 static bool 439 dump_nhop(const struct nhop_object *nh, uint32_t uidx, struct nlmsghdr *hdr, 440 struct nl_writer *nw) 441 { 442 if (!nlmsg_reply(nw, hdr, sizeof(struct nhmsg))) 443 goto enomem; 444 445 struct nhmsg *nhm = nlmsg_reserve_object(nw, struct nhmsg); 446 ENOMEM_IF_NULL(nhm); 447 nhm->nh_family = nhop_get_neigh_family(nh); 448 nhm->nh_scope = 0; // XXX: what's that? 449 nhm->nh_protocol = nhop_get_origin(nh); 450 nhm->nh_flags = 0; 451 452 if (uidx != 0) 453 nlattr_add_u32(nw, NHA_ID, uidx); 454 if (nh->nh_flags & NHF_BLACKHOLE) { 455 nlattr_add_flag(nw, NHA_BLACKHOLE); 456 goto done; 457 } 458 nlattr_add_u32(nw, NHA_OIF, if_getindex(nh->nh_ifp)); 459 460 switch (nh->gw_sa.sa_family) { 461 #ifdef INET 462 case AF_INET: 463 nlattr_add(nw, NHA_GATEWAY, 4, &nh->gw4_sa.sin_addr); 464 break; 465 #endif 466 #ifdef INET6 467 case AF_INET6: 468 { 469 struct in6_addr addr = nh->gw6_sa.sin6_addr; 470 in6_clearscope(&addr); 471 nlattr_add(nw, NHA_GATEWAY, 16, &addr); 472 break; 473 } 474 #endif 475 } 476 477 int off = nlattr_add_nested(nw, NHA_FREEBSD); 478 if (off != 0) { 479 nlattr_add_u32(nw, NHAF_AIF, if_getindex(nh->nh_aifp)); 480 481 if (uidx == 0) { 482 nlattr_add_u32(nw, NHAF_KID, nhop_get_idx(nh)); 483 nlattr_add_u32(nw, NHAF_FAMILY, nhop_get_upper_family(nh)); 484 nlattr_add_u32(nw, NHAF_TABLE, nhop_get_fibnum(nh)); 485 } 486 487 nlattr_set_len(nw, off); 488 } 489 490 done: 491 if (nlmsg_end(nw)) 492 return (true); 493 enomem: 494 nlmsg_abort(nw); 495 return (false); 496 } 497 498 static void 499 dump_unhop(const struct user_nhop *unhop, struct nlmsghdr *hdr, 500 struct nl_writer *nw) 501 { 502 if (unhop->un_nhop_src != NULL) 503 dump_nhop(unhop->un_nhop_src, unhop->un_idx, hdr, nw); 504 else 505 dump_nhgrp(unhop, hdr, nw); 506 } 507 508 static int 509 delete_unhop(struct unhop_ctl *ctl, struct nlmsghdr *hdr, uint32_t uidx) 510 { 511 struct user_nhop *unhop_ret, *unhop_base, *unhop_chain; 512 513 struct user_nhop key = { .un_idx = uidx }; 514 515 UN_WLOCK(ctl); 516 517 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop_base); 518 519 if (unhop_base != NULL) { 520 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_base, unhop_ret); 521 IF_DEBUG_LEVEL(LOG_DEBUG2) { 522 char nhbuf[NHOP_PRINT_BUFSIZE]; 523 nhop_print_buf_any(unhop_base->un_nhop, nhbuf, sizeof(nhbuf)); 524 FIB_NH_LOG(LOG_DEBUG3, unhop_base->un_nhop, 525 "removed base nhop %u: %s", uidx, nhbuf); 526 } 527 /* Unlink all child nexhops as well, keeping the chain intact */ 528 unhop_chain = unhop_base->un_nextchild; 529 while (unhop_chain != NULL) { 530 CHT_SLIST_REMOVE(&ctl->un_head, unhop, unhop_chain, 531 unhop_ret); 532 MPASS(unhop_chain == unhop_ret); 533 IF_DEBUG_LEVEL(LOG_DEBUG3) { 534 char nhbuf[NHOP_PRINT_BUFSIZE]; 535 nhop_print_buf_any(unhop_chain->un_nhop, 536 nhbuf, sizeof(nhbuf)); 537 FIB_NH_LOG(LOG_DEBUG3, unhop_chain->un_nhop, 538 "removed child nhop %u: %s", uidx, nhbuf); 539 } 540 unhop_chain = unhop_chain->un_nextchild; 541 } 542 } 543 544 UN_WUNLOCK(ctl); 545 546 if (unhop_base == NULL) { 547 NL_LOG(LOG_DEBUG, "unable to find unhop %u", uidx); 548 return (ENOENT); 549 } 550 551 /* Report nexthop deletion */ 552 struct netlink_walkargs wa = { 553 .hdr.nlmsg_pid = hdr->nlmsg_pid, 554 .hdr.nlmsg_seq = hdr->nlmsg_seq, 555 .hdr.nlmsg_flags = hdr->nlmsg_flags, 556 .hdr.nlmsg_type = NL_RTM_DELNEXTHOP, 557 }; 558 559 struct nl_writer nw = {}; 560 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) { 561 NL_LOG(LOG_DEBUG, "error allocating message writer"); 562 return (ENOMEM); 563 } 564 565 dump_unhop(unhop_base, &wa.hdr, &nw); 566 nlmsg_flush(&nw); 567 568 while (unhop_base != NULL) { 569 unhop_chain = unhop_base->un_nextchild; 570 NET_EPOCH_CALL(destroy_unhop_epoch, &unhop_base->un_epoch_ctx); 571 unhop_base = unhop_chain; 572 } 573 574 return (0); 575 } 576 577 static void 578 consider_resize(struct unhop_ctl *ctl, uint32_t new_size) 579 { 580 void *new_ptr = NULL; 581 size_t alloc_size; 582 583 if (new_size == 0) 584 return; 585 586 if (new_size != 0) { 587 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_size); 588 new_ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); 589 if (new_ptr == NULL) 590 return; 591 } 592 593 NL_LOG(LOG_DEBUG, "resizing hash: %u -> %u", ctl->un_head.hash_size, new_size); 594 UN_WLOCK(ctl); 595 if (new_ptr != NULL) { 596 CHT_SLIST_RESIZE(&ctl->un_head, unhop, new_ptr, new_size); 597 } 598 UN_WUNLOCK(ctl); 599 600 601 if (new_ptr != NULL) 602 free(new_ptr, M_NETLINK); 603 } 604 605 static bool __noinline 606 vnet_init_unhops(void) 607 { 608 uint32_t num_buckets = 16; 609 size_t alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); 610 611 struct unhop_ctl *ctl = malloc(sizeof(struct unhop_ctl), M_NETLINK, 612 M_NOWAIT | M_ZERO); 613 if (ctl == NULL) 614 return (false); 615 616 void *ptr = malloc(alloc_size, M_NETLINK, M_NOWAIT | M_ZERO); 617 if (ptr == NULL) { 618 free(ctl, M_NETLINK); 619 return (false); 620 } 621 CHT_SLIST_INIT(&ctl->un_head, ptr, num_buckets); 622 UN_LOCK_INIT(ctl); 623 624 if (!atomic_cmpset_ptr((uintptr_t *)&V_un_ctl, (uintptr_t)NULL, (uintptr_t)ctl)) { 625 free(ptr, M_NETLINK); 626 free(ctl, M_NETLINK); 627 } 628 629 if (atomic_load_ptr(&V_un_ctl) == NULL) 630 return (false); 631 632 NL_LOG(LOG_NOTICE, "UNHOPS init done"); 633 634 return (true); 635 } 636 637 static void 638 vnet_destroy_unhops(const void *unused __unused) 639 { 640 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 641 struct user_nhop *unhop, *tmp; 642 643 if (ctl == NULL) 644 return; 645 V_un_ctl = NULL; 646 647 /* Wait till all unhop users finish their reads */ 648 NET_EPOCH_WAIT(); 649 650 UN_WLOCK(ctl); 651 CHT_SLIST_FOREACH_SAFE(&ctl->un_head, unhop, unhop, tmp) { 652 destroy_unhop(unhop); 653 } CHT_SLIST_FOREACH_SAFE_END; 654 UN_WUNLOCK(ctl); 655 656 free(ctl->un_head.ptr, M_NETLINK); 657 free(ctl, M_NETLINK); 658 } 659 VNET_SYSUNINIT(vnet_destroy_unhops, SI_SUB_PROTO_IF, SI_ORDER_ANY, 660 vnet_destroy_unhops, NULL); 661 662 static int 663 nlattr_get_nhg(struct nlattr *nla, struct nl_pstate *npt, const void *arg, void *target) 664 { 665 int error = 0; 666 667 /* Verify attribute correctness */ 668 struct nexthop_grp *grp = NLA_DATA(nla); 669 int data_len = NLA_DATA_LEN(nla); 670 671 int count = data_len / sizeof(*grp); 672 if (count == 0 || (count * sizeof(*grp) != data_len)) { 673 NL_LOG(LOG_DEBUG, "Invalid length for RTA_GROUP: %d", data_len); 674 return (EINVAL); 675 } 676 677 *((struct nlattr **)target) = nla; 678 return (error); 679 } 680 681 static void 682 set_scope6(struct sockaddr *sa, if_t ifp) 683 { 684 #ifdef INET6 685 if (sa != NULL && sa->sa_family == AF_INET6 && ifp != NULL) { 686 struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)sa; 687 688 if (IN6_IS_ADDR_LINKLOCAL(&sa6->sin6_addr)) 689 in6_set_unicast_scopeid(&sa6->sin6_addr, if_getindex(ifp)); 690 } 691 #endif 692 } 693 694 struct nl_parsed_nhop { 695 uint32_t nha_id; 696 uint8_t nha_blackhole; 697 uint8_t nha_groups; 698 uint8_t nhaf_knhops; 699 uint8_t nhaf_family; 700 struct ifnet *nha_oif; 701 struct sockaddr *nha_gw; 702 struct nlattr *nha_group; 703 uint8_t nh_family; 704 uint8_t nh_protocol; 705 uint32_t nhaf_table; 706 uint32_t nhaf_kid; 707 uint32_t nhaf_aif; 708 }; 709 710 #define _IN(_field) offsetof(struct nhmsg, _field) 711 #define _OUT(_field) offsetof(struct nl_parsed_nhop, _field) 712 static struct nlattr_parser nla_p_nh_fbsd[] = { 713 { .type = NHAF_KNHOPS, .off = _OUT(nhaf_knhops), .cb = nlattr_get_flag }, 714 { .type = NHAF_TABLE, .off = _OUT(nhaf_table), .cb = nlattr_get_uint32 }, 715 { .type = NHAF_FAMILY, .off = _OUT(nhaf_family), .cb = nlattr_get_uint8 }, 716 { .type = NHAF_KID, .off = _OUT(nhaf_kid), .cb = nlattr_get_uint32 }, 717 { .type = NHAF_AIF, .off = _OUT(nhaf_aif), .cb = nlattr_get_uint32 }, 718 }; 719 NL_DECLARE_ATTR_PARSER(nh_fbsd_parser, nla_p_nh_fbsd); 720 721 static const struct nlfield_parser nlf_p_nh[] = { 722 { .off_in = _IN(nh_family), .off_out = _OUT(nh_family), .cb = nlf_get_u8 }, 723 { .off_in = _IN(nh_protocol), .off_out = _OUT(nh_protocol), .cb = nlf_get_u8 }, 724 }; 725 726 static const struct nlattr_parser nla_p_nh[] = { 727 { .type = NHA_ID, .off = _OUT(nha_id), .cb = nlattr_get_uint32 }, 728 { .type = NHA_GROUP, .off = _OUT(nha_group), .cb = nlattr_get_nhg }, 729 { .type = NHA_BLACKHOLE, .off = _OUT(nha_blackhole), .cb = nlattr_get_flag }, 730 { .type = NHA_OIF, .off = _OUT(nha_oif), .cb = nlattr_get_ifp }, 731 { .type = NHA_GATEWAY, .off = _OUT(nha_gw), .cb = nlattr_get_ip }, 732 { .type = NHA_GROUPS, .off = _OUT(nha_groups), .cb = nlattr_get_flag }, 733 { .type = NHA_FREEBSD, .arg = &nh_fbsd_parser, .cb = nlattr_get_nested }, 734 }; 735 #undef _IN 736 #undef _OUT 737 738 static bool 739 post_p_nh(void *_attrs, struct nl_pstate *npt) 740 { 741 struct nl_parsed_nhop *attrs = (struct nl_parsed_nhop *)_attrs; 742 743 set_scope6(attrs->nha_gw, attrs->nha_oif); 744 return (true); 745 } 746 NL_DECLARE_PARSER_EXT(nhmsg_parser, struct nhmsg, NULL, nlf_p_nh, nla_p_nh, post_p_nh); 747 748 static bool 749 eligible_nhg(const struct nhop_object *nh) 750 { 751 return (nh->nh_flags & NHF_GATEWAY); 752 } 753 754 static int 755 newnhg(struct unhop_ctl *ctl, struct nl_parsed_nhop *attrs, struct user_nhop *unhop) 756 { 757 struct nexthop_grp *grp = NLA_DATA(attrs->nha_group); 758 int count = NLA_DATA_LEN(attrs->nha_group) / sizeof(*grp); 759 struct weightened_nhop *wn; 760 761 wn = malloc(sizeof(*wn) * count, M_NETLINK, M_NOWAIT | M_ZERO); 762 if (wn == NULL) 763 return (ENOMEM); 764 765 for (int i = 0; i < count; i++) { 766 struct user_nhop *unhop; 767 unhop = nl_find_base_unhop(ctl, grp[i].id); 768 if (unhop == NULL) { 769 NL_LOG(LOG_DEBUG, "unable to find uidx %u", grp[i].id); 770 free(wn, M_NETLINK); 771 return (ESRCH); 772 } else if (unhop->un_nhop_src == NULL) { 773 NL_LOG(LOG_DEBUG, "uidx %u is a group, nested group unsupported", 774 grp[i].id); 775 free(wn, M_NETLINK); 776 return (ENOTSUP); 777 } else if (!eligible_nhg(unhop->un_nhop_src)) { 778 NL_LOG(LOG_DEBUG, "uidx %u nhop is not mpath-eligible", 779 grp[i].id); 780 free(wn, M_NETLINK); 781 return (ENOTSUP); 782 } 783 /* 784 * TODO: consider more rigid eligibility checks: 785 * restrict nexthops with the same gateway 786 */ 787 wn[i].nh = unhop->un_nhop_src; 788 wn[i].weight = grp[i].weight; 789 } 790 unhop->un_nhgrp_src = wn; 791 unhop->un_nhgrp_count = count; 792 return (0); 793 } 794 795 /* 796 * Sets nexthop @nh gateway specified by @gw. 797 * If gateway is IPv6 link-local, alters @gw to include scopeid equal to 798 * @ifp ifindex. 799 * Returns 0 on success or errno. 800 */ 801 int 802 nl_set_nexthop_gw(struct nhop_object *nh, struct sockaddr *gw, if_t ifp, 803 struct nl_pstate *npt) 804 { 805 #ifdef INET6 806 if (gw->sa_family == AF_INET6) { 807 struct sockaddr_in6 *gw6 = (struct sockaddr_in6 *)gw; 808 if (IN6_IS_ADDR_LINKLOCAL(&gw6->sin6_addr)) { 809 if (ifp == NULL) { 810 NLMSG_REPORT_ERR_MSG(npt, "interface not set"); 811 return (EINVAL); 812 } 813 in6_set_unicast_scopeid(&gw6->sin6_addr, if_getindex(ifp)); 814 } 815 } 816 #endif 817 nhop_set_gw(nh, gw, true); 818 return (0); 819 } 820 821 static int 822 newnhop(struct nl_parsed_nhop *attrs, struct user_nhop *unhop, struct nl_pstate *npt) 823 { 824 struct ifaddr *ifa = NULL; 825 struct nhop_object *nh; 826 int error; 827 828 if (!attrs->nha_blackhole) { 829 if (attrs->nha_gw == NULL) { 830 NLMSG_REPORT_ERR_MSG(npt, "missing NHA_GATEWAY"); 831 return (EINVAL); 832 } 833 if (attrs->nha_oif == NULL) { 834 NLMSG_REPORT_ERR_MSG(npt, "missing NHA_OIF"); 835 return (EINVAL); 836 } 837 if (ifa == NULL) 838 ifa = ifaof_ifpforaddr(attrs->nha_gw, attrs->nha_oif); 839 if (ifa == NULL) { 840 NLMSG_REPORT_ERR_MSG(npt, "Unable to determine default source IP"); 841 return (EINVAL); 842 } 843 } 844 845 int family = attrs->nha_gw != NULL ? attrs->nha_gw->sa_family : attrs->nh_family; 846 847 nh = nhop_alloc(RT_DEFAULT_FIB, family); 848 if (nh == NULL) { 849 NL_LOG(LOG_DEBUG, "Unable to allocate nexthop"); 850 return (ENOMEM); 851 } 852 nhop_set_uidx(nh, attrs->nha_id); 853 nhop_set_origin(nh, attrs->nh_protocol); 854 855 if (attrs->nha_blackhole) 856 nhop_set_blackhole(nh, NHF_BLACKHOLE); 857 else { 858 error = nl_set_nexthop_gw(nh, attrs->nha_gw, attrs->nha_oif, npt); 859 if (error != 0) { 860 nhop_free(nh); 861 return (error); 862 } 863 nhop_set_transmit_ifp(nh, attrs->nha_oif); 864 nhop_set_src(nh, ifa); 865 } 866 867 error = nhop_get_unlinked(nh); 868 if (error != 0) { 869 NL_LOG(LOG_DEBUG, "unable to finalize nexthop"); 870 return (error); 871 } 872 873 IF_DEBUG_LEVEL(LOG_DEBUG2) { 874 char nhbuf[NHOP_PRINT_BUFSIZE]; 875 nhop_print_buf(nh, nhbuf, sizeof(nhbuf)); 876 NL_LOG(LOG_DEBUG2, "Adding unhop %u: %s", attrs->nha_id, nhbuf); 877 } 878 879 unhop->un_nhop_src = nh; 880 return (0); 881 } 882 883 static int 884 rtnl_handle_newnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 885 struct nl_pstate *npt) 886 { 887 struct user_nhop *unhop; 888 int error; 889 890 if ((__predict_false(V_un_ctl == NULL)) && (!vnet_init_unhops())) 891 return (ENOMEM); 892 struct unhop_ctl *ctl = V_un_ctl; 893 894 struct nl_parsed_nhop attrs = {}; 895 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 896 if (error != 0) 897 return (error); 898 899 /* 900 * Get valid nha_id. Treat nha_id == 0 (auto-assignment) as a second-class 901 * citizen. 902 */ 903 if (attrs.nha_id == 0) { 904 attrs.nha_id = find_spare_uidx(ctl); 905 if (attrs.nha_id == 0) { 906 NL_LOG(LOG_DEBUG, "Unable to get spare uidx"); 907 return (ENOSPC); 908 } 909 } 910 911 NL_LOG(LOG_DEBUG, "IFINDEX %d", attrs.nha_oif ? if_getindex(attrs.nha_oif) : 0); 912 913 unhop = malloc(sizeof(struct user_nhop), M_NETLINK, M_NOWAIT | M_ZERO); 914 if (unhop == NULL) { 915 NL_LOG(LOG_DEBUG, "Unable to allocate user_nhop"); 916 return (ENOMEM); 917 } 918 unhop->un_idx = attrs.nha_id; 919 unhop->un_protocol = attrs.nh_protocol; 920 921 if (attrs.nha_group) 922 error = newnhg(ctl, &attrs, unhop); 923 else 924 error = newnhop(&attrs, unhop, npt); 925 926 if (error != 0) { 927 free(unhop, M_NETLINK); 928 return (error); 929 } 930 931 UN_WLOCK(ctl); 932 /* Check if uidx already exists */ 933 struct user_nhop *tmp = NULL; 934 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, unhop, tmp); 935 if (tmp != NULL) { 936 UN_WUNLOCK(ctl); 937 NL_LOG(LOG_DEBUG, "nhop idx %u already exists", attrs.nha_id); 938 destroy_unhop(unhop); 939 return (EEXIST); 940 } 941 CHT_SLIST_INSERT_HEAD(&ctl->un_head, unhop, unhop); 942 uint32_t num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->un_head); 943 UN_WUNLOCK(ctl); 944 945 /* Report addition of the next nexhop */ 946 struct netlink_walkargs wa = { 947 .hdr.nlmsg_pid = hdr->nlmsg_pid, 948 .hdr.nlmsg_seq = hdr->nlmsg_seq, 949 .hdr.nlmsg_flags = hdr->nlmsg_flags, 950 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, 951 }; 952 953 struct nl_writer nw = {}; 954 if (!nlmsg_get_group_writer(&nw, NLMSG_SMALL, NETLINK_ROUTE, RTNLGRP_NEXTHOP)) { 955 NL_LOG(LOG_DEBUG, "error allocating message writer"); 956 return (ENOMEM); 957 } 958 959 dump_unhop(unhop, &wa.hdr, &nw); 960 nlmsg_flush(&nw); 961 962 consider_resize(ctl, num_buckets_new); 963 964 return (0); 965 } 966 967 static int 968 rtnl_handle_delnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 969 struct nl_pstate *npt) 970 { 971 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 972 int error; 973 974 if (__predict_false(ctl == NULL)) 975 return (ESRCH); 976 977 struct nl_parsed_nhop attrs = {}; 978 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 979 if (error != 0) 980 return (error); 981 982 if (attrs.nha_id == 0) { 983 NL_LOG(LOG_DEBUG, "NHA_ID not set"); 984 return (EINVAL); 985 } 986 987 error = delete_unhop(ctl, hdr, attrs.nha_id); 988 989 return (error); 990 } 991 992 static bool 993 match_unhop(const struct nl_parsed_nhop *attrs, struct user_nhop *unhop) 994 { 995 if (attrs->nha_id != 0 && unhop->un_idx != attrs->nha_id) 996 return (false); 997 if (attrs->nha_groups != 0 && unhop->un_nhgrp_src == NULL) 998 return (false); 999 if (attrs->nha_oif != NULL && 1000 (unhop->un_nhop_src == NULL || unhop->un_nhop_src->nh_ifp != attrs->nha_oif)) 1001 return (false); 1002 1003 return (true); 1004 } 1005 1006 static int 1007 rtnl_handle_getnhop(struct nlmsghdr *hdr, struct nlpcb *nlp, 1008 struct nl_pstate *npt) 1009 { 1010 struct user_nhop *unhop; 1011 UN_TRACKER; 1012 int error; 1013 1014 struct nl_parsed_nhop attrs = {}; 1015 error = nl_parse_nlmsg(hdr, &nhmsg_parser, npt, &attrs); 1016 if (error != 0) 1017 return (error); 1018 1019 struct netlink_walkargs wa = { 1020 .nw = npt->nw, 1021 .hdr.nlmsg_pid = hdr->nlmsg_pid, 1022 .hdr.nlmsg_seq = hdr->nlmsg_seq, 1023 .hdr.nlmsg_flags = hdr->nlmsg_flags, 1024 .hdr.nlmsg_type = NL_RTM_NEWNEXTHOP, 1025 }; 1026 1027 if (attrs.nha_id != 0) { 1028 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 1029 struct user_nhop key = { .un_idx = attrs.nha_id }; 1030 1031 if (__predict_false(ctl == NULL)) 1032 return (ESRCH); 1033 1034 NL_LOG(LOG_DEBUG2, "searching for uidx %u", attrs.nha_id); 1035 UN_RLOCK(ctl); 1036 CHT_SLIST_FIND_BYOBJ(&ctl->un_head, unhop, &key, unhop); 1037 UN_RUNLOCK(ctl); 1038 1039 if (unhop == NULL) 1040 return (ESRCH); 1041 dump_unhop(unhop, &wa.hdr, wa.nw); 1042 return (0); 1043 } else if (attrs.nhaf_kid != 0) { 1044 struct nhop_iter iter = { 1045 .fibnum = attrs.nhaf_table, 1046 .family = attrs.nhaf_family, 1047 }; 1048 int error = ESRCH; 1049 1050 NL_LOG(LOG_DEBUG2, "START table %u family %d", attrs.nhaf_table, attrs.nhaf_family); 1051 for (struct nhop_object *nh = nhops_iter_start(&iter); nh; 1052 nh = nhops_iter_next(&iter)) { 1053 NL_LOG(LOG_DEBUG3, "get %u", nhop_get_idx(nh)); 1054 if (nhop_get_idx(nh) == attrs.nhaf_kid) { 1055 dump_nhop(nh, 0, &wa.hdr, wa.nw); 1056 error = 0; 1057 break; 1058 } 1059 } 1060 nhops_iter_stop(&iter); 1061 return (error); 1062 } else if (attrs.nhaf_knhops) { 1063 struct nhop_iter iter = { 1064 .fibnum = attrs.nhaf_table, 1065 .family = attrs.nhaf_family, 1066 }; 1067 1068 NL_LOG(LOG_DEBUG2, "DUMP table %u family %d", attrs.nhaf_table, attrs.nhaf_family); 1069 wa.hdr.nlmsg_flags |= NLM_F_MULTI; 1070 for (struct nhop_object *nh = nhops_iter_start(&iter); nh; 1071 nh = nhops_iter_next(&iter)) { 1072 dump_nhop(nh, 0, &wa.hdr, wa.nw); 1073 } 1074 nhops_iter_stop(&iter); 1075 } else { 1076 struct unhop_ctl *ctl = atomic_load_ptr(&V_un_ctl); 1077 1078 if (__predict_false(ctl == NULL)) 1079 return (ESRCH); 1080 1081 NL_LOG(LOG_DEBUG2, "DUMP unhops"); 1082 UN_RLOCK(ctl); 1083 wa.hdr.nlmsg_flags |= NLM_F_MULTI; 1084 CHT_SLIST_FOREACH(&ctl->un_head, unhop, unhop) { 1085 if (UNHOP_IS_MASTER(unhop) && match_unhop(&attrs, unhop)) 1086 dump_unhop(unhop, &wa.hdr, wa.nw); 1087 } CHT_SLIST_FOREACH_END; 1088 UN_RUNLOCK(ctl); 1089 } 1090 1091 if (wa.error == 0) { 1092 if (!nlmsg_end_dump(wa.nw, wa.error, &wa.hdr)) 1093 return (ENOMEM); 1094 } 1095 return (0); 1096 } 1097 1098 static const struct rtnl_cmd_handler cmd_handlers[] = { 1099 { 1100 .cmd = NL_RTM_NEWNEXTHOP, 1101 .name = "RTM_NEWNEXTHOP", 1102 .cb = &rtnl_handle_newnhop, 1103 .priv = PRIV_NET_ROUTE, 1104 }, 1105 { 1106 .cmd = NL_RTM_DELNEXTHOP, 1107 .name = "RTM_DELNEXTHOP", 1108 .cb = &rtnl_handle_delnhop, 1109 .priv = PRIV_NET_ROUTE, 1110 }, 1111 { 1112 .cmd = NL_RTM_GETNEXTHOP, 1113 .name = "RTM_GETNEXTHOP", 1114 .cb = &rtnl_handle_getnhop, 1115 } 1116 }; 1117 1118 static const struct nlhdr_parser *all_parsers[] = { &nhmsg_parser, &nh_fbsd_parser }; 1119 1120 void 1121 rtnl_nexthops_init(void) 1122 { 1123 NL_VERIFY_PARSERS(all_parsers); 1124 rtnl_register_messages(cmd_handlers, NL_ARRAY_LEN(cmd_handlers)); 1125 } 1126