1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 #include "opt_inet.h" 28 #include "opt_route.h" 29 30 #include <sys/cdefs.h> 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/lock.h> 34 #include <sys/rmlock.h> 35 #include <sys/malloc.h> 36 #include <sys/mbuf.h> 37 #include <sys/refcount.h> 38 #include <sys/socket.h> 39 #include <sys/sysctl.h> 40 #include <sys/kernel.h> 41 #include <sys/epoch.h> 42 43 #include <net/if.h> 44 #include <net/if_var.h> 45 #include <net/if_private.h> 46 #include <net/route.h> 47 #include <net/route/route_ctl.h> 48 #include <net/route/route_var.h> 49 #include <net/vnet.h> 50 51 #include <netinet/in.h> 52 #include <netinet/in_var.h> 53 #include <netinet/in_fib.h> 54 55 #include <net/route/nhop_utils.h> 56 #include <net/route/nhop.h> 57 #include <net/route/nhop_var.h> 58 #include <net/route/nhgrp_var.h> 59 60 #define DEBUG_MOD_NAME nhgrp_ctl 61 #define DEBUG_MAX_LEVEL LOG_DEBUG 62 #include <net/route/route_debug.h> 63 _DECLARE_DEBUG(LOG_INFO); 64 65 /* 66 * This file contains the supporting functions for creating multipath groups 67 * and compiling their dataplane parts. 68 */ 69 70 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */ 71 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH, 72 "MPF_MULTIPATH must be the same as NHF_MULTIPATH"); 73 /* Offset and size of flags field has to be the same for nhop/nhop groups */ 74 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags); 75 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */ 76 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64); 77 78 static int wn_cmp_idx(const void *a, const void *b); 79 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops); 80 81 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl, 82 struct weightened_nhop *wn, int num_nhops, uint32_t uidx, int *perror); 83 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv); 84 static void destroy_nhgrp_epoch(epoch_context_t ctx); 85 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv); 86 87 static int 88 wn_cmp_idx(const void *a, const void *b) 89 { 90 const struct weightened_nhop *w_a = a; 91 const struct weightened_nhop *w_b = b; 92 uint32_t a_idx = w_a->nh->nh_priv->nh_idx; 93 uint32_t b_idx = w_b->nh->nh_priv->nh_idx; 94 95 if (a_idx < b_idx) 96 return (-1); 97 else if (a_idx > b_idx) 98 return (1); 99 else 100 return (0); 101 } 102 103 /* 104 * Perform in-place sorting for array of nexthops in @wn. 105 * Sort by nexthop index ascending. 106 */ 107 static void 108 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops) 109 { 110 111 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp_idx); 112 } 113 114 /* 115 * In order to determine the minimum weight difference in the array 116 * of weights, create a sorted array of weights, using spare "storage" 117 * field in the `struct weightened_nhop`. 118 * Assume weights to be (mostly) the same and use insertion sort to 119 * make it sorted. 120 */ 121 static void 122 sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items) 123 { 124 wn[0].storage = wn[0].weight; 125 for (int i = 1, j = 0; i < num_items; i++) { 126 uint32_t weight = wn[i].weight; // read from 'weight' as it's not reordered 127 /* Move all weights > weight 1 position right */ 128 for (j = i - 1; j >= 0 && wn[j].storage > weight; j--) 129 wn[j + 1].storage = wn[j].storage; 130 wn[j + 1].storage = weight; 131 } 132 } 133 134 /* 135 * Calculate minimum number of slots required to fit the existing 136 * set of weights in the common use case where weights are "easily" 137 * comparable. 138 * Assumes @wn is sorted by weight ascending and each weight is > 0. 139 * Returns number of slots or 0 if precise calculation failed. 140 * 141 * Some examples: 142 * note: (i, X) pair means (nhop=i, weight=X): 143 * (1, 1) (2, 2) -> 3 slots [1, 2, 2] 144 * (1, 100), (2, 200) -> 3 slots [1, 2, 2] 145 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3] 146 */ 147 static uint32_t 148 calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items, 149 uint64_t *ptotal) 150 { 151 uint32_t i, last, xmin; 152 uint64_t total = 0; 153 154 // Get sorted array of weights in .storage field 155 sort_weightened_nhops_weights(wn, num_items); 156 157 last = 0; 158 xmin = wn[0].storage; 159 for (i = 0; i < num_items; i++) { 160 total += wn[i].storage; 161 if ((wn[i].storage != last) && 162 ((wn[i].storage - last < xmin) || xmin == 0)) { 163 xmin = wn[i].storage - last; 164 } 165 last = wn[i].storage; 166 } 167 *ptotal = total; 168 /* xmin is the minimum unit of desired capacity */ 169 if ((total % xmin) != 0) 170 return (0); 171 for (i = 0; i < num_items; i++) { 172 if ((wn[i].weight % xmin) != 0) 173 return (0); 174 } 175 176 return ((uint32_t)(total / xmin)); 177 } 178 179 /* 180 * Calculate minimum number of slots required to fit the existing 181 * set of weights while maintaining weight coefficients. 182 * 183 * Assume @wn is sorted by weight ascending and each weight is > 0. 184 * 185 * Tries to find simple precise solution first and falls back to 186 * RIB_MAX_MPATH_WIDTH in case of any failure. 187 */ 188 static uint32_t 189 calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items) 190 { 191 uint32_t v; 192 uint64_t total; 193 194 v = calc_min_mpath_slots_fast(wn, num_items, &total); 195 if (total == 0) 196 return (0); 197 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH)) 198 v = RIB_MAX_MPATH_WIDTH; 199 200 return (v); 201 } 202 203 /* 204 * Nexthop group data consists of 205 * 1) dataplane part, with nhgrp_object as a header followed by an 206 * arbitrary number of nexthop pointers. 207 * 2) control plane part, with nhgrp_priv as a header, followed by 208 * an arbirtrary number of 'struct weightened_nhop' object. 209 * 210 * Given nexthop groups are (mostly) immutable, allocate all data 211 * in one go. 212 * 213 */ 214 __noinline static size_t 215 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) 216 { 217 size_t sz; 218 219 sz = sizeof(struct nhgrp_object); 220 sz += nhg_size * sizeof(struct nhop_object *); 221 sz += sizeof(struct nhgrp_priv); 222 sz += num_nhops * sizeof(struct weightened_nhop); 223 return (sz); 224 } 225 226 /* 227 * Compile actual list of nexthops to be used by datapath from 228 * the nexthop group @dst. 229 * 230 * For example, compiling control plane list of 2 nexthops 231 * [(200, A), (100, B)] would result in the datapath array 232 * [A, A, B] 233 */ 234 static void 235 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, 236 uint32_t num_slots) 237 { 238 struct nhgrp_object *dst; 239 int i, slot_idx, remaining_slots; 240 uint64_t remaining_sum, nh_weight, nh_slots; 241 242 slot_idx = 0; 243 dst = dst_priv->nhg; 244 /* Calculate sum of all weights */ 245 remaining_sum = 0; 246 for (i = 0; i < dst_priv->nhg_nh_count; i++) 247 remaining_sum += x[i].weight; 248 remaining_slots = num_slots; 249 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d", 250 remaining_sum, remaining_slots); 251 for (i = 0; i < dst_priv->nhg_nh_count; i++) { 252 /* Calculate number of slots for the current nexthop */ 253 if (remaining_sum > 0) { 254 nh_weight = (uint64_t)x[i].weight; 255 nh_slots = (nh_weight * remaining_slots / remaining_sum); 256 } else 257 nh_slots = 0; 258 259 remaining_sum -= x[i].weight; 260 remaining_slots -= nh_slots; 261 262 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, 263 " rem_sum: %lu, rem_slots: %d nh_slots: %d, slot_idx: %d", 264 remaining_sum, remaining_slots, (int)nh_slots, slot_idx); 265 266 KASSERT((slot_idx + nh_slots <= num_slots), 267 ("index overflow during nhg compilation")); 268 while (nh_slots-- > 0) 269 dst->nhops[slot_idx++] = x[i].nh; 270 } 271 } 272 273 /* 274 * Allocates new nexthop group for the list of weightened nexthops. 275 * Assume sorted list. 276 * Does NOT reference any nexthops in the group. 277 * Returns group with refcount=1 or NULL. 278 */ 279 static struct nhgrp_priv * 280 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) 281 { 282 uint32_t nhgrp_size; 283 struct nhgrp_object *nhg; 284 struct nhgrp_priv *nhg_priv; 285 286 nhgrp_size = calc_min_mpath_slots(wn, num_nhops); 287 if (nhgrp_size == 0) { 288 /* Zero weights, abort */ 289 return (NULL); 290 } 291 292 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops); 293 nhg = malloc(sz, M_NHOP, M_NOWAIT | M_ZERO); 294 if (nhg == NULL) { 295 FIB_NH_LOG(LOG_INFO, wn[0].nh, 296 "unable to allocate group with num_nhops %d (compiled %u)", 297 num_nhops, nhgrp_size); 298 return (NULL); 299 } 300 301 /* Has to be the first to make NHGRP_PRIV() work */ 302 nhg->nhg_size = nhgrp_size; 303 nhg->nhg_flags = MPF_MULTIPATH; 304 305 nhg_priv = NHGRP_PRIV(nhg); 306 nhg_priv->nhg_nh_count = num_nhops; 307 refcount_init(&nhg_priv->nhg_refcount, 1); 308 309 /* Please see nhgrp_free() comments on the initial value */ 310 refcount_init(&nhg_priv->nhg_linked, 2); 311 312 nhg_priv->nhg = nhg; 313 memcpy(&nhg_priv->nhg_nh_weights[0], wn, 314 num_nhops * sizeof(struct weightened_nhop)); 315 316 FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u", 317 num_nhops, nhgrp_size); 318 319 compile_nhgrp(nhg_priv, wn, nhg->nhg_size); 320 321 return (nhg_priv); 322 } 323 324 void 325 nhgrp_ref_object(struct nhgrp_object *nhg) 326 { 327 struct nhgrp_priv *nhg_priv; 328 u_int old __diagused; 329 330 nhg_priv = NHGRP_PRIV(nhg); 331 old = refcount_acquire(&nhg_priv->nhg_refcount); 332 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg)); 333 } 334 335 void 336 nhgrp_free(struct nhgrp_object *nhg) 337 { 338 struct nhgrp_priv *nhg_priv; 339 struct nh_control *ctl; 340 struct epoch_tracker et; 341 342 nhg_priv = NHGRP_PRIV(nhg); 343 344 if (!refcount_release(&nhg_priv->nhg_refcount)) 345 return; 346 347 /* 348 * group objects don't have an explicit lock attached to it. 349 * As groups are reclaimed based on reference count, it is possible 350 * that some groups will persist after vnet destruction callback 351 * called. Given that, handle scenario with nhgrp_free_group() being 352 * called either after or simultaneously with nhgrp_ctl_unlink_all() 353 * by using another reference counter: nhg_linked. 354 * 355 * There are only 2 places, where nhg_linked can be decreased: 356 * rib destroy (nhgrp_ctl_unlink_all) and this function. 357 * nhg_link can never be increased. 358 * 359 * Hence, use initial value of 2 to make use of 360 * refcount_release_if_not_last(). 361 * 362 * There can be two scenarious when calling this function: 363 * 364 * 1) nhg_linked value is 2. This means that either 365 * nhgrp_ctl_unlink_all() has not been called OR it is running, 366 * but we are guaranteed that nh_control won't be freed in 367 * this epoch. Hence, nexthop can be safely unlinked. 368 * 369 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all() 370 * has been called and nhgrp unlink can be skipped. 371 */ 372 373 NET_EPOCH_ENTER(et); 374 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) { 375 ctl = nhg_priv->nh_control; 376 if (unlink_nhgrp(ctl, nhg_priv) == NULL) { 377 /* Do not try to reclaim */ 378 RT_LOG(LOG_INFO, "Failed to unlink nexhop group %p", 379 nhg_priv); 380 NET_EPOCH_EXIT(et); 381 return; 382 } 383 MPASS((nhg_priv->nhg_idx == 0)); 384 MPASS((nhg_priv->nhg_refcount == 0)); 385 } 386 NET_EPOCH_EXIT(et); 387 388 NET_EPOCH_CALL(destroy_nhgrp_epoch, &nhg_priv->nhg_epoch_ctx); 389 } 390 391 /* 392 * Destroys all local resources belonging to @nhg_priv. 393 */ 394 __noinline static void 395 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv) 396 { 397 398 free(nhg_priv->nhg, M_NHOP); 399 } 400 401 __noinline static void 402 destroy_nhgrp(struct nhgrp_priv *nhg_priv) 403 { 404 IF_DEBUG_LEVEL(LOG_DEBUG2) { 405 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused; 406 FIB_NH_LOG(LOG_DEBUG2, nhg_priv->nhg_nh_weights[0].nh, 407 "destroying %s", nhgrp_print_buf(nhg_priv->nhg, 408 nhgbuf, sizeof(nhgbuf))); 409 } 410 411 free_nhgrp_nhops(nhg_priv); 412 destroy_nhgrp_int(nhg_priv); 413 } 414 415 /* 416 * Epoch callback indicating group is safe to destroy 417 */ 418 static void 419 destroy_nhgrp_epoch(epoch_context_t ctx) 420 { 421 struct nhgrp_priv *nhg_priv; 422 423 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx); 424 425 destroy_nhgrp(nhg_priv); 426 } 427 428 static bool 429 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 430 { 431 432 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 433 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0) 434 continue; 435 436 /* 437 * Failed to ref the nexthop, b/c it's deleted. 438 * Need to rollback references back. 439 */ 440 for (int j = 0; j < i; j++) 441 nhop_free(nhg_priv->nhg_nh_weights[j].nh); 442 return (false); 443 } 444 445 return (true); 446 } 447 448 static void 449 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 450 { 451 452 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) 453 nhop_free(nhg_priv->nhg_nh_weights[i].nh); 454 } 455 456 /* 457 * Allocate nexthop group of size @num_nhops with nexthops specified by 458 * @wn. Nexthops have to be unique and match the fibnum/family of the group. 459 * Returns unlinked nhgrp object on success or NULL and non-zero perror. 460 */ 461 struct nhgrp_object * 462 nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nhops, 463 int *perror) 464 { 465 struct rib_head *rh = rt_tables_get_rnh(fibnum, family); 466 struct nhgrp_priv *nhg_priv; 467 struct nh_control *ctl; 468 469 if (rh == NULL) { 470 *perror = E2BIG; 471 return (NULL); 472 } 473 474 ctl = rh->nh_control; 475 476 if (num_nhops > RIB_MAX_MPATH_WIDTH) { 477 *perror = E2BIG; 478 return (NULL); 479 } 480 481 if (ctl->gr_head.hash_size == 0) { 482 /* First multipath request. Bootstrap mpath datastructures. */ 483 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) { 484 *perror = ENOMEM; 485 return (NULL); 486 } 487 } 488 489 /* Sort nexthops & check there are no duplicates */ 490 sort_weightened_nhops(wn, num_nhops); 491 uint32_t last_id = 0; 492 for (int i = 0; i < num_nhops; i++) { 493 if (wn[i].nh->nh_priv->nh_control != ctl) { 494 *perror = EINVAL; 495 return (NULL); 496 } 497 if (wn[i].nh->nh_priv->nh_idx == last_id) { 498 *perror = EEXIST; 499 return (NULL); 500 } 501 last_id = wn[i].nh->nh_priv->nh_idx; 502 } 503 504 if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) { 505 *perror = ENOMEM; 506 return (NULL); 507 } 508 nhg_priv->nh_control = ctl; 509 510 *perror = 0; 511 return (nhg_priv->nhg); 512 } 513 514 /* 515 * Finds an existing group matching @nhg or links @nhg to the tree. 516 * Returns the referenced group or NULL and non-zero @perror. 517 */ 518 struct nhgrp_object * 519 nhgrp_get_nhgrp(struct nhgrp_object *nhg, int *perror) 520 { 521 struct nhgrp_priv *nhg_priv, *key = NHGRP_PRIV(nhg); 522 struct nh_control *ctl = key->nh_control; 523 524 nhg_priv = find_nhgrp(ctl, key); 525 if (nhg_priv != NULL) { 526 /* 527 * Free originally-created group. As it hasn't been linked 528 * and the dependent nexhops haven't been referenced, just free 529 * the group. 530 */ 531 destroy_nhgrp_int(key); 532 *perror = 0; 533 return (nhg_priv->nhg); 534 } else { 535 /* No existing group, try to link the new one */ 536 if (!ref_nhgrp_nhops(key)) { 537 /* 538 * Some of the nexthops have been scheduled for deletion. 539 * As the group hasn't been linked / no nexhops have been 540 * referenced, call the final destructor immediately. 541 */ 542 destroy_nhgrp_int(key); 543 *perror = EAGAIN; 544 return (NULL); 545 } 546 if (link_nhgrp(ctl, key) == 0) { 547 /* Unable to allocate index? */ 548 *perror = EAGAIN; 549 free_nhgrp_nhops(key); 550 destroy_nhgrp_int(key); 551 return (NULL); 552 } 553 *perror = 0; 554 return (nhg); 555 } 556 557 /* NOTREACHED */ 558 } 559 560 /* 561 * Creates or looks up an existing nexthop group based on @wn and @num_nhops. 562 * 563 * Returns referenced nhop group or NULL, passing error code in @perror. 564 */ 565 struct nhgrp_priv * 566 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops, 567 uint32_t uidx, int *perror) 568 { 569 struct nhgrp_object *nhg; 570 571 nhg = nhgrp_alloc(ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family, 572 wn, num_nhops, perror); 573 if (nhg == NULL) 574 return (NULL); 575 nhgrp_set_uidx(nhg, uidx); 576 nhg = nhgrp_get_nhgrp(nhg, perror); 577 if (nhg != NULL) 578 return (NHGRP_PRIV(nhg)); 579 return (NULL); 580 } 581 582 583 /* 584 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig. 585 * 586 * Returns referenced nexthop group or NULL. In the latter case, @perror is 587 * filled with an error code. 588 * Note that function does NOT care if the next nexthops already exists 589 * in the @gr_orig. As a result, they will be added, resulting in the 590 * same nexthop being present multiple times in the new group. 591 */ 592 static struct nhgrp_priv * 593 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig, 594 struct weightened_nhop *wn, int num_nhops, int *perror) 595 { 596 char storage[64]; 597 struct weightened_nhop *pnhops; 598 struct nhgrp_priv *nhg_priv; 599 const struct nhgrp_priv *src_priv; 600 size_t sz; 601 int curr_nhops; 602 603 src_priv = NHGRP_PRIV_CONST(gr_orig); 604 curr_nhops = src_priv->nhg_nh_count; 605 606 *perror = 0; 607 608 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop)); 609 /* optimize for <= 4 paths, each path=16 bytes */ 610 if (sz <= sizeof(storage)) 611 pnhops = (struct weightened_nhop *)&storage[0]; 612 else { 613 pnhops = malloc(sz, M_TEMP, M_NOWAIT); 614 if (pnhops == NULL) { 615 *perror = ENOMEM; 616 return (NULL); 617 } 618 } 619 620 /* Copy nhops from original group first */ 621 memcpy(pnhops, src_priv->nhg_nh_weights, 622 curr_nhops * sizeof(struct weightened_nhop)); 623 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop)); 624 curr_nhops += num_nhops; 625 626 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, 0, perror); 627 628 if (pnhops != (struct weightened_nhop *)&storage[0]) 629 free(pnhops, M_TEMP); 630 631 if (nhg_priv == NULL) 632 return (NULL); 633 634 return (nhg_priv); 635 } 636 637 638 /* 639 * Creates/finds nexthop group based on @wn and @num_nhops. 640 * Returns 0 on success with referenced group in @rnd, or 641 * errno. 642 * 643 * If the error is EAGAIN, then the operation can be retried. 644 */ 645 int 646 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops, 647 uint32_t uidx, struct nhgrp_object **pnhg) 648 { 649 struct nh_control *ctl = rh->nh_control; 650 struct nhgrp_priv *nhg_priv; 651 int error; 652 653 nhg_priv = get_nhgrp(ctl, wn, num_nhops, uidx, &error); 654 if (nhg_priv != NULL) 655 *pnhg = nhg_priv->nhg; 656 657 return (error); 658 } 659 660 /* 661 * Creates new nexthop group based on @src group without the nexthops 662 * chosen by @flt_func. 663 * Returns 0 on success, storring the reference nhop group/object in @rnd. 664 */ 665 int 666 nhgrp_get_filtered_group(struct rib_head *rh, const struct rtentry *rt, 667 const struct nhgrp_object *src, rib_filter_f_t flt_func, void *flt_data, 668 struct route_nhop_data *rnd) 669 { 670 char storage[64]; 671 struct nh_control *ctl = rh->nh_control; 672 struct weightened_nhop *pnhops; 673 const struct nhgrp_priv *mp_priv, *src_priv; 674 size_t sz; 675 int error, i, num_nhops; 676 677 src_priv = NHGRP_PRIV_CONST(src); 678 679 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop)); 680 /* optimize for <= 4 paths, each path=16 bytes */ 681 if (sz <= sizeof(storage)) 682 pnhops = (struct weightened_nhop *)&storage[0]; 683 else { 684 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL) 685 return (ENOMEM); 686 } 687 688 /* Filter nexthops */ 689 error = 0; 690 num_nhops = 0; 691 for (i = 0; i < src_priv->nhg_nh_count; i++) { 692 if (flt_func(rt, src_priv->nhg_nh_weights[i].nh, flt_data)) 693 continue; 694 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i], 695 sizeof(struct weightened_nhop)); 696 } 697 698 if (num_nhops == 0) { 699 rnd->rnd_nhgrp = NULL; 700 rnd->rnd_weight = 0; 701 } else if (num_nhops == 1) { 702 rnd->rnd_nhop = pnhops[0].nh; 703 rnd->rnd_weight = pnhops[0].weight; 704 if (nhop_try_ref_object(rnd->rnd_nhop) == 0) 705 error = EAGAIN; 706 } else { 707 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, 0, &error); 708 if (mp_priv != NULL) 709 rnd->rnd_nhgrp = mp_priv->nhg; 710 rnd->rnd_weight = 0; 711 } 712 713 if (pnhops != (struct weightened_nhop *)&storage[0]) 714 free(pnhops, M_TEMP); 715 716 return (error); 717 } 718 719 /* 720 * Creates new multipath group based on existing group/nhop in @rnd_orig and 721 * to-be-added nhop @wn_add. 722 * Returns 0 on success and stores result in @rnd_new. 723 */ 724 int 725 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig, 726 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new) 727 { 728 struct nh_control *ctl = rh->nh_control; 729 struct nhgrp_priv *nhg_priv; 730 struct weightened_nhop wn[2] = {}; 731 int error; 732 733 if (rnd_orig->rnd_nhop == NULL) { 734 /* No paths to add to, just reference current nhop */ 735 *rnd_new = *rnd_add; 736 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0) 737 return (EAGAIN); 738 return (0); 739 } 740 741 wn[0].nh = rnd_add->rnd_nhop; 742 wn[0].weight = rnd_add->rnd_weight; 743 744 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) { 745 /* Simple merge of 2 non-multipath nexthops */ 746 wn[1].nh = rnd_orig->rnd_nhop; 747 wn[1].weight = rnd_orig->rnd_weight; 748 nhg_priv = get_nhgrp(ctl, wn, 2, 0, &error); 749 } else { 750 /* Get new nhop group with @rt->rt_nhop as an additional nhop */ 751 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1, 752 &error); 753 } 754 755 if (nhg_priv == NULL) 756 return (error); 757 rnd_new->rnd_nhgrp = nhg_priv->nhg; 758 rnd_new->rnd_weight = 0; 759 760 return (0); 761 } 762 763 /* 764 * Returns pointer to array of nexthops with weights for 765 * given @nhg. Stores number of items in the array into @pnum_nhops. 766 */ 767 const struct weightened_nhop * 768 nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops) 769 { 770 const struct nhgrp_priv *nhg_priv; 771 772 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 773 774 nhg_priv = NHGRP_PRIV_CONST(nhg); 775 *pnum_nhops = nhg_priv->nhg_nh_count; 776 777 return (nhg_priv->nhg_nh_weights); 778 } 779 780 void 781 nhgrp_set_uidx(struct nhgrp_object *nhg, uint32_t uidx) 782 { 783 struct nhgrp_priv *nhg_priv; 784 785 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 786 787 nhg_priv = NHGRP_PRIV(nhg); 788 789 nhg_priv->nhg_uidx = uidx; 790 } 791 792 uint32_t 793 nhgrp_get_uidx(const struct nhgrp_object *nhg) 794 { 795 const struct nhgrp_priv *nhg_priv; 796 797 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 798 799 nhg_priv = NHGRP_PRIV_CONST(nhg); 800 return (nhg_priv->nhg_uidx); 801 } 802 803 /* 804 * Prints nexhop group @nhg data in the provided @buf. 805 * Example: nhg#33/sz=3:[#1:100,#2:100,#3:100] 806 * Example: nhg#33/sz=5:[#1:100,#2:100,..] 807 */ 808 char * 809 nhgrp_print_buf(const struct nhgrp_object *nhg, char *buf, size_t bufsize) 810 { 811 const struct nhgrp_priv *nhg_priv = NHGRP_PRIV_CONST(nhg); 812 813 int off = snprintf(buf, bufsize, "nhg#%u/sz=%u:[", nhg_priv->nhg_idx, 814 nhg_priv->nhg_nh_count); 815 816 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 817 const struct weightened_nhop *wn = &nhg_priv->nhg_nh_weights[i]; 818 int len = snprintf(&buf[off], bufsize - off, "#%u:%u,", 819 wn->nh->nh_priv->nh_idx, wn->weight); 820 if (len + off + 3 >= bufsize) { 821 int len = snprintf(&buf[off], bufsize - off, "..."); 822 off += len; 823 break; 824 } 825 off += len; 826 } 827 if (off > 0) 828 off--; // remove last "," 829 if (off + 1 < bufsize) 830 snprintf(&buf[off], bufsize - off, "]"); 831 return buf; 832 } 833 834 __noinline static int 835 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv, 836 char *buffer, size_t buffer_size, struct sysctl_req *w) 837 { 838 struct rt_msghdr *rtm; 839 struct nhgrp_external *nhge; 840 struct nhgrp_container *nhgc; 841 const struct nhgrp_object *nhg; 842 struct nhgrp_nhop_external *ext; 843 int error; 844 size_t sz; 845 846 nhg = nhg_priv->nhg; 847 848 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 849 /* controlplane nexthops */ 850 sz += sizeof(struct nhgrp_container); 851 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 852 /* dataplane nexthops */ 853 sz += sizeof(struct nhgrp_container); 854 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 855 856 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size")); 857 858 bzero(buffer, sz); 859 860 rtm = (struct rt_msghdr *)buffer; 861 rtm->rtm_msglen = sz; 862 rtm->rtm_version = RTM_VERSION; 863 rtm->rtm_type = RTM_GET; 864 865 nhge = (struct nhgrp_external *)(rtm + 1); 866 867 nhge->nhg_idx = nhg_priv->nhg_idx; 868 nhge->nhg_refcount = nhg_priv->nhg_refcount; 869 870 /* fill in control plane nexthops firs */ 871 nhgc = (struct nhgrp_container *)(nhge + 1); 872 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS; 873 nhgc->nhgc_subtype = 0; 874 nhgc->nhgc_len = sizeof(struct nhgrp_container); 875 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 876 nhgc->nhgc_count = nhg_priv->nhg_nh_count; 877 878 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 879 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 880 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx; 881 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight; 882 } 883 884 /* fill in dataplane nexthops */ 885 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]); 886 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS; 887 nhgc->nhgc_subtype = 0; 888 nhgc->nhgc_len = sizeof(struct nhgrp_container); 889 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 890 nhgc->nhgc_count = nhg->nhg_size; 891 892 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 893 for (int i = 0; i < nhg->nhg_size; i++) { 894 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx; 895 ext[i].nh_weight = 0; 896 } 897 898 error = SYSCTL_OUT(w, buffer, sz); 899 900 return (error); 901 } 902 903 uint32_t 904 nhgrp_get_idx(const struct nhgrp_object *nhg) 905 { 906 const struct nhgrp_priv *nhg_priv; 907 908 nhg_priv = NHGRP_PRIV_CONST(nhg); 909 return (nhg_priv->nhg_idx); 910 } 911 912 uint8_t 913 nhgrp_get_origin(const struct nhgrp_object *nhg) 914 { 915 return (NHGRP_PRIV_CONST(nhg)->nhg_origin); 916 } 917 918 void 919 nhgrp_set_origin(struct nhgrp_object *nhg, uint8_t origin) 920 { 921 NHGRP_PRIV(nhg)->nhg_origin = origin; 922 } 923 924 uint32_t 925 nhgrp_get_count(struct rib_head *rh) 926 { 927 struct nh_control *ctl; 928 uint32_t count; 929 930 ctl = rh->nh_control; 931 932 NHOPS_RLOCK(ctl); 933 count = ctl->gr_head.items_count; 934 NHOPS_RUNLOCK(ctl); 935 936 return (count); 937 } 938 939 int 940 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w) 941 { 942 struct nh_control *ctl = rh->nh_control; 943 struct epoch_tracker et; 944 struct nhgrp_priv *nhg_priv; 945 char *buffer; 946 size_t sz; 947 int error = 0; 948 949 if (ctl->gr_head.items_count == 0) 950 return (0); 951 952 /* Calculate the maximum nhop group size in bytes */ 953 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 954 sz += 2 * sizeof(struct nhgrp_container); 955 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH; 956 buffer = malloc(sz, M_TEMP, M_NOWAIT); 957 if (buffer == NULL) 958 return (ENOMEM); 959 960 NET_EPOCH_ENTER(et); 961 NHOPS_RLOCK(ctl); 962 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 963 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w); 964 if (error != 0) 965 break; 966 } CHT_SLIST_FOREACH_END; 967 NHOPS_RUNLOCK(ctl); 968 NET_EPOCH_EXIT(et); 969 970 free(buffer, M_TEMP); 971 972 return (error); 973 } 974