1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 #include "opt_inet.h" 28 #include "opt_route.h" 29 30 #include <sys/param.h> 31 #include <sys/systm.h> 32 #include <sys/lock.h> 33 #include <sys/rmlock.h> 34 #include <sys/malloc.h> 35 #include <sys/mbuf.h> 36 #include <sys/refcount.h> 37 #include <sys/socket.h> 38 #include <sys/sysctl.h> 39 #include <sys/kernel.h> 40 #include <sys/epoch.h> 41 42 #include <net/if.h> 43 #include <net/if_var.h> 44 #include <net/if_private.h> 45 #include <net/route.h> 46 #include <net/route/route_ctl.h> 47 #include <net/route/route_var.h> 48 #include <net/vnet.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_var.h> 52 #include <netinet/in_fib.h> 53 54 #include <net/route/nhop_utils.h> 55 #include <net/route/nhop.h> 56 #include <net/route/nhop_var.h> 57 #include <net/route/nhgrp_var.h> 58 59 #define DEBUG_MOD_NAME nhgrp_ctl 60 #define DEBUG_MAX_LEVEL LOG_DEBUG 61 #include <net/route/route_debug.h> 62 _DECLARE_DEBUG(LOG_INFO); 63 64 /* 65 * This file contains the supporting functions for creating multipath groups 66 * and compiling their dataplane parts. 67 */ 68 69 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */ 70 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH, 71 "MPF_MULTIPATH must be the same as NHF_MULTIPATH"); 72 /* Offset and size of flags field has to be the same for nhop/nhop groups */ 73 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags); 74 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */ 75 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64); 76 77 static int wn_cmp_idx(const void *a, const void *b); 78 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops); 79 80 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl, 81 struct weightened_nhop *wn, int num_nhops, uint32_t uidx, int *perror); 82 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv); 83 static void destroy_nhgrp_epoch(epoch_context_t ctx); 84 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv); 85 86 static int 87 wn_cmp_idx(const void *a, const void *b) 88 { 89 const struct weightened_nhop *w_a = a; 90 const struct weightened_nhop *w_b = b; 91 uint32_t a_idx = w_a->nh->nh_priv->nh_idx; 92 uint32_t b_idx = w_b->nh->nh_priv->nh_idx; 93 94 if (a_idx < b_idx) 95 return (-1); 96 else if (a_idx > b_idx) 97 return (1); 98 else 99 return (0); 100 } 101 102 /* 103 * Perform in-place sorting for array of nexthops in @wn. 104 * Sort by nexthop index ascending. 105 */ 106 static void 107 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops) 108 { 109 110 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp_idx); 111 } 112 113 /* 114 * In order to determine the minimum weight difference in the array 115 * of weights, create a sorted array of weights, using spare "storage" 116 * field in the `struct weightened_nhop`. 117 * Assume weights to be (mostly) the same and use insertion sort to 118 * make it sorted. 119 */ 120 static void 121 sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items) 122 { 123 wn[0].storage = wn[0].weight; 124 for (int i = 1, j = 0; i < num_items; i++) { 125 uint32_t weight = wn[i].weight; // read from 'weight' as it's not reordered 126 /* Move all weights > weight 1 position right */ 127 for (j = i - 1; j >= 0 && wn[j].storage > weight; j--) 128 wn[j + 1].storage = wn[j].storage; 129 wn[j + 1].storage = weight; 130 } 131 } 132 133 /* 134 * Calculate minimum number of slots required to fit the existing 135 * set of weights in the common use case where weights are "easily" 136 * comparable. 137 * Assumes @wn is sorted by weight ascending and each weight is > 0. 138 * Returns number of slots or 0 if precise calculation failed. 139 * 140 * Some examples: 141 * note: (i, X) pair means (nhop=i, weight=X): 142 * (1, 1) (2, 2) -> 3 slots [1, 2, 2] 143 * (1, 100), (2, 200) -> 3 slots [1, 2, 2] 144 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3] 145 */ 146 static uint32_t 147 calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items, 148 uint64_t *ptotal) 149 { 150 uint32_t i, last, xmin; 151 uint64_t total = 0; 152 153 // Get sorted array of weights in .storage field 154 sort_weightened_nhops_weights(wn, num_items); 155 156 last = 0; 157 xmin = wn[0].storage; 158 for (i = 0; i < num_items; i++) { 159 total += wn[i].storage; 160 if ((wn[i].storage != last) && 161 ((wn[i].storage - last < xmin) || xmin == 0)) { 162 xmin = wn[i].storage - last; 163 } 164 last = wn[i].storage; 165 } 166 *ptotal = total; 167 /* xmin is the minimum unit of desired capacity */ 168 if ((total % xmin) != 0) 169 return (0); 170 for (i = 0; i < num_items; i++) { 171 if ((wn[i].weight % xmin) != 0) 172 return (0); 173 } 174 175 return ((uint32_t)(total / xmin)); 176 } 177 178 /* 179 * Calculate minimum number of slots required to fit the existing 180 * set of weights while maintaining weight coefficients. 181 * 182 * Assume @wn is sorted by weight ascending and each weight is > 0. 183 * 184 * Tries to find simple precise solution first and falls back to 185 * RIB_MAX_MPATH_WIDTH in case of any failure. 186 */ 187 static uint32_t 188 calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items) 189 { 190 uint32_t v; 191 uint64_t total; 192 193 v = calc_min_mpath_slots_fast(wn, num_items, &total); 194 if (total == 0) 195 return (0); 196 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH)) 197 v = RIB_MAX_MPATH_WIDTH; 198 199 return (v); 200 } 201 202 /* 203 * Nexthop group data consists of 204 * 1) dataplane part, with nhgrp_object as a header followed by an 205 * arbitrary number of nexthop pointers. 206 * 2) control plane part, with nhgrp_priv as a header, followed by 207 * an arbirtrary number of 'struct weightened_nhop' object. 208 * 209 * Given nexthop groups are (mostly) immutable, allocate all data 210 * in one go. 211 * 212 */ 213 __noinline static size_t 214 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) 215 { 216 size_t sz; 217 218 sz = sizeof(struct nhgrp_object); 219 sz += nhg_size * sizeof(struct nhop_object *); 220 sz += sizeof(struct nhgrp_priv); 221 sz += num_nhops * sizeof(struct weightened_nhop); 222 return (sz); 223 } 224 225 /* 226 * Compile actual list of nexthops to be used by datapath from 227 * the nexthop group @dst. 228 * 229 * For example, compiling control plane list of 2 nexthops 230 * [(200, A), (100, B)] would result in the datapath array 231 * [A, A, B] 232 */ 233 static void 234 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, 235 uint32_t num_slots) 236 { 237 struct nhgrp_object *dst; 238 int i, slot_idx, remaining_slots; 239 uint64_t remaining_sum, nh_weight, nh_slots; 240 241 slot_idx = 0; 242 dst = dst_priv->nhg; 243 /* Calculate sum of all weights */ 244 remaining_sum = 0; 245 for (i = 0; i < dst_priv->nhg_nh_count; i++) 246 remaining_sum += x[i].weight; 247 remaining_slots = num_slots; 248 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d", 249 remaining_sum, remaining_slots); 250 for (i = 0; i < dst_priv->nhg_nh_count; i++) { 251 /* Calculate number of slots for the current nexthop */ 252 if (remaining_sum > 0) { 253 nh_weight = (uint64_t)x[i].weight; 254 nh_slots = (nh_weight * remaining_slots / remaining_sum); 255 } else 256 nh_slots = 0; 257 258 remaining_sum -= x[i].weight; 259 remaining_slots -= nh_slots; 260 261 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, 262 " rem_sum: %lu, rem_slots: %d nh_slots: %d, slot_idx: %d", 263 remaining_sum, remaining_slots, (int)nh_slots, slot_idx); 264 265 KASSERT((slot_idx + nh_slots <= num_slots), 266 ("index overflow during nhg compilation")); 267 while (nh_slots-- > 0) 268 dst->nhops[slot_idx++] = x[i].nh; 269 } 270 } 271 272 /* 273 * Allocates new nexthop group for the list of weightened nexthops. 274 * Assume sorted list. 275 * Does NOT reference any nexthops in the group. 276 * Returns group with refcount=1 or NULL. 277 */ 278 static struct nhgrp_priv * 279 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) 280 { 281 uint32_t nhgrp_size; 282 struct nhgrp_object *nhg; 283 struct nhgrp_priv *nhg_priv; 284 285 nhgrp_size = calc_min_mpath_slots(wn, num_nhops); 286 if (nhgrp_size == 0) { 287 /* Zero weights, abort */ 288 return (NULL); 289 } 290 291 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops); 292 nhg = malloc(sz, M_NHOP, M_NOWAIT | M_ZERO); 293 if (nhg == NULL) { 294 FIB_NH_LOG(LOG_INFO, wn[0].nh, 295 "unable to allocate group with num_nhops %d (compiled %u)", 296 num_nhops, nhgrp_size); 297 return (NULL); 298 } 299 300 /* Has to be the first to make NHGRP_PRIV() work */ 301 nhg->nhg_size = nhgrp_size; 302 nhg->nhg_flags = MPF_MULTIPATH; 303 304 nhg_priv = NHGRP_PRIV(nhg); 305 nhg_priv->nhg_nh_count = num_nhops; 306 refcount_init(&nhg_priv->nhg_refcount, 1); 307 308 /* Please see nhgrp_free() comments on the initial value */ 309 refcount_init(&nhg_priv->nhg_linked, 2); 310 311 nhg_priv->nhg = nhg; 312 memcpy(&nhg_priv->nhg_nh_weights[0], wn, 313 num_nhops * sizeof(struct weightened_nhop)); 314 315 FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u", 316 num_nhops, nhgrp_size); 317 318 compile_nhgrp(nhg_priv, wn, nhg->nhg_size); 319 320 return (nhg_priv); 321 } 322 323 void 324 nhgrp_ref_object(struct nhgrp_object *nhg) 325 { 326 struct nhgrp_priv *nhg_priv; 327 u_int old __diagused; 328 329 nhg_priv = NHGRP_PRIV(nhg); 330 old = refcount_acquire(&nhg_priv->nhg_refcount); 331 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg)); 332 } 333 334 void 335 nhgrp_free(struct nhgrp_object *nhg) 336 { 337 struct nhgrp_priv *nhg_priv; 338 struct nh_control *ctl; 339 struct epoch_tracker et; 340 341 nhg_priv = NHGRP_PRIV(nhg); 342 343 if (!refcount_release(&nhg_priv->nhg_refcount)) 344 return; 345 346 /* 347 * group objects don't have an explicit lock attached to it. 348 * As groups are reclaimed based on reference count, it is possible 349 * that some groups will persist after vnet destruction callback 350 * called. Given that, handle scenario with nhgrp_free_group() being 351 * called either after or simultaneously with nhgrp_ctl_unlink_all() 352 * by using another reference counter: nhg_linked. 353 * 354 * There are only 2 places, where nhg_linked can be decreased: 355 * rib destroy (nhgrp_ctl_unlink_all) and this function. 356 * nhg_link can never be increased. 357 * 358 * Hence, use initial value of 2 to make use of 359 * refcount_release_if_not_last(). 360 * 361 * There can be two scenarious when calling this function: 362 * 363 * 1) nhg_linked value is 2. This means that either 364 * nhgrp_ctl_unlink_all() has not been called OR it is running, 365 * but we are guaranteed that nh_control won't be freed in 366 * this epoch. Hence, nexthop can be safely unlinked. 367 * 368 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all() 369 * has been called and nhgrp unlink can be skipped. 370 */ 371 372 NET_EPOCH_ENTER(et); 373 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) { 374 ctl = nhg_priv->nh_control; 375 if (unlink_nhgrp(ctl, nhg_priv) == NULL) { 376 /* Do not try to reclaim */ 377 RT_LOG(LOG_INFO, "Failed to unlink nexhop group %p", 378 nhg_priv); 379 NET_EPOCH_EXIT(et); 380 return; 381 } 382 MPASS((nhg_priv->nhg_idx == 0)); 383 MPASS((nhg_priv->nhg_refcount == 0)); 384 } 385 NET_EPOCH_EXIT(et); 386 387 NET_EPOCH_CALL(destroy_nhgrp_epoch, &nhg_priv->nhg_epoch_ctx); 388 } 389 390 /* 391 * Destroys all local resources belonging to @nhg_priv. 392 */ 393 __noinline static void 394 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv) 395 { 396 397 free(nhg_priv->nhg, M_NHOP); 398 } 399 400 __noinline static void 401 destroy_nhgrp(struct nhgrp_priv *nhg_priv) 402 { 403 IF_DEBUG_LEVEL(LOG_DEBUG2) { 404 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused; 405 FIB_NH_LOG(LOG_DEBUG2, nhg_priv->nhg_nh_weights[0].nh, 406 "destroying %s", nhgrp_print_buf(nhg_priv->nhg, 407 nhgbuf, sizeof(nhgbuf))); 408 } 409 410 free_nhgrp_nhops(nhg_priv); 411 destroy_nhgrp_int(nhg_priv); 412 } 413 414 /* 415 * Epoch callback indicating group is safe to destroy 416 */ 417 static void 418 destroy_nhgrp_epoch(epoch_context_t ctx) 419 { 420 struct nhgrp_priv *nhg_priv; 421 422 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx); 423 424 destroy_nhgrp(nhg_priv); 425 } 426 427 static bool 428 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 429 { 430 431 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 432 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0) 433 continue; 434 435 /* 436 * Failed to ref the nexthop, b/c it's deleted. 437 * Need to rollback references back. 438 */ 439 for (int j = 0; j < i; j++) 440 nhop_free(nhg_priv->nhg_nh_weights[j].nh); 441 return (false); 442 } 443 444 return (true); 445 } 446 447 static void 448 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 449 { 450 451 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) 452 nhop_free(nhg_priv->nhg_nh_weights[i].nh); 453 } 454 455 /* 456 * Allocate nexthop group of size @num_nhops with nexthops specified by 457 * @wn. Nexthops have to be unique and match the fibnum/family of the group. 458 * Returns unlinked nhgrp object on success or NULL and non-zero perror. 459 */ 460 struct nhgrp_object * 461 nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nhops, 462 int *perror) 463 { 464 struct rib_head *rh = rt_tables_get_rnh(fibnum, family); 465 struct nhgrp_priv *nhg_priv; 466 struct nh_control *ctl; 467 468 if (rh == NULL) { 469 *perror = E2BIG; 470 return (NULL); 471 } 472 473 ctl = rh->nh_control; 474 475 if (num_nhops > RIB_MAX_MPATH_WIDTH) { 476 *perror = E2BIG; 477 return (NULL); 478 } 479 480 if (ctl->gr_head.hash_size == 0) { 481 /* First multipath request. Bootstrap mpath datastructures. */ 482 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) { 483 *perror = ENOMEM; 484 return (NULL); 485 } 486 } 487 488 /* Sort nexthops & check there are no duplicates */ 489 sort_weightened_nhops(wn, num_nhops); 490 uint32_t last_id = 0; 491 for (int i = 0; i < num_nhops; i++) { 492 if (wn[i].nh->nh_priv->nh_control != ctl) { 493 *perror = EINVAL; 494 return (NULL); 495 } 496 if (wn[i].nh->nh_priv->nh_idx == last_id) { 497 *perror = EEXIST; 498 return (NULL); 499 } 500 last_id = wn[i].nh->nh_priv->nh_idx; 501 } 502 503 if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) { 504 *perror = ENOMEM; 505 return (NULL); 506 } 507 nhg_priv->nh_control = ctl; 508 509 *perror = 0; 510 return (nhg_priv->nhg); 511 } 512 513 /* 514 * Finds an existing group matching @nhg or links @nhg to the tree. 515 * Returns the referenced group or NULL and non-zero @perror. 516 */ 517 struct nhgrp_object * 518 nhgrp_get_nhgrp(struct nhgrp_object *nhg, int *perror) 519 { 520 struct nhgrp_priv *nhg_priv, *key = NHGRP_PRIV(nhg); 521 struct nh_control *ctl = key->nh_control; 522 523 nhg_priv = find_nhgrp(ctl, key); 524 if (nhg_priv != NULL) { 525 /* 526 * Free originally-created group. As it hasn't been linked 527 * and the dependent nexhops haven't been referenced, just free 528 * the group. 529 */ 530 destroy_nhgrp_int(key); 531 *perror = 0; 532 return (nhg_priv->nhg); 533 } else { 534 /* No existing group, try to link the new one */ 535 if (!ref_nhgrp_nhops(key)) { 536 /* 537 * Some of the nexthops have been scheduled for deletion. 538 * As the group hasn't been linked / no nexhops have been 539 * referenced, call the final destructor immediately. 540 */ 541 destroy_nhgrp_int(key); 542 *perror = EAGAIN; 543 return (NULL); 544 } 545 if (link_nhgrp(ctl, key) == 0) { 546 /* Unable to allocate index? */ 547 *perror = EAGAIN; 548 free_nhgrp_nhops(key); 549 destroy_nhgrp_int(key); 550 return (NULL); 551 } 552 *perror = 0; 553 return (nhg); 554 } 555 556 /* NOTREACHED */ 557 } 558 559 /* 560 * Creates or looks up an existing nexthop group based on @wn and @num_nhops. 561 * 562 * Returns referenced nhop group or NULL, passing error code in @perror. 563 */ 564 struct nhgrp_priv * 565 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops, 566 uint32_t uidx, int *perror) 567 { 568 struct nhgrp_object *nhg; 569 570 nhg = nhgrp_alloc(ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family, 571 wn, num_nhops, perror); 572 if (nhg == NULL) 573 return (NULL); 574 nhgrp_set_uidx(nhg, uidx); 575 nhg = nhgrp_get_nhgrp(nhg, perror); 576 if (nhg != NULL) 577 return (NHGRP_PRIV(nhg)); 578 return (NULL); 579 } 580 581 582 /* 583 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig. 584 * 585 * Returns referenced nexthop group or NULL. In the latter case, @perror is 586 * filled with an error code. 587 * Note that function does NOT care if the next nexthops already exists 588 * in the @gr_orig. As a result, they will be added, resulting in the 589 * same nexthop being present multiple times in the new group. 590 */ 591 static struct nhgrp_priv * 592 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig, 593 struct weightened_nhop *wn, int num_nhops, int *perror) 594 { 595 char storage[64]; 596 struct weightened_nhop *pnhops; 597 struct nhgrp_priv *nhg_priv; 598 const struct nhgrp_priv *src_priv; 599 size_t sz; 600 int curr_nhops; 601 602 src_priv = NHGRP_PRIV_CONST(gr_orig); 603 curr_nhops = src_priv->nhg_nh_count; 604 605 *perror = 0; 606 607 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop)); 608 /* optimize for <= 4 paths, each path=16 bytes */ 609 if (sz <= sizeof(storage)) 610 pnhops = (struct weightened_nhop *)&storage[0]; 611 else { 612 pnhops = malloc(sz, M_TEMP, M_NOWAIT); 613 if (pnhops == NULL) { 614 *perror = ENOMEM; 615 return (NULL); 616 } 617 } 618 619 /* Copy nhops from original group first */ 620 memcpy(pnhops, src_priv->nhg_nh_weights, 621 curr_nhops * sizeof(struct weightened_nhop)); 622 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop)); 623 curr_nhops += num_nhops; 624 625 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, 0, perror); 626 627 if (pnhops != (struct weightened_nhop *)&storage[0]) 628 free(pnhops, M_TEMP); 629 630 if (nhg_priv == NULL) 631 return (NULL); 632 633 return (nhg_priv); 634 } 635 636 637 /* 638 * Creates/finds nexthop group based on @wn and @num_nhops. 639 * Returns 0 on success with referenced group in @rnd, or 640 * errno. 641 * 642 * If the error is EAGAIN, then the operation can be retried. 643 */ 644 int 645 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops, 646 uint32_t uidx, struct nhgrp_object **pnhg) 647 { 648 struct nh_control *ctl = rh->nh_control; 649 struct nhgrp_priv *nhg_priv; 650 int error; 651 652 nhg_priv = get_nhgrp(ctl, wn, num_nhops, uidx, &error); 653 if (nhg_priv != NULL) 654 *pnhg = nhg_priv->nhg; 655 656 return (error); 657 } 658 659 /* 660 * Creates new nexthop group based on @src group without the nexthops 661 * chosen by @flt_func. 662 * Returns 0 on success, storring the reference nhop group/object in @rnd. 663 */ 664 int 665 nhgrp_get_filtered_group(struct rib_head *rh, const struct rtentry *rt, 666 const struct nhgrp_object *src, rib_filter_f_t flt_func, void *flt_data, 667 struct route_nhop_data *rnd) 668 { 669 char storage[64]; 670 struct nh_control *ctl = rh->nh_control; 671 struct weightened_nhop *pnhops; 672 const struct nhgrp_priv *mp_priv, *src_priv; 673 size_t sz; 674 int error, i, num_nhops; 675 676 src_priv = NHGRP_PRIV_CONST(src); 677 678 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop)); 679 /* optimize for <= 4 paths, each path=16 bytes */ 680 if (sz <= sizeof(storage)) 681 pnhops = (struct weightened_nhop *)&storage[0]; 682 else { 683 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL) 684 return (ENOMEM); 685 } 686 687 /* Filter nexthops */ 688 error = 0; 689 num_nhops = 0; 690 for (i = 0; i < src_priv->nhg_nh_count; i++) { 691 if (flt_func(rt, src_priv->nhg_nh_weights[i].nh, flt_data)) 692 continue; 693 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i], 694 sizeof(struct weightened_nhop)); 695 } 696 697 if (num_nhops == 0) { 698 rnd->rnd_nhgrp = NULL; 699 rnd->rnd_weight = 0; 700 } else if (num_nhops == 1) { 701 rnd->rnd_nhop = pnhops[0].nh; 702 rnd->rnd_weight = pnhops[0].weight; 703 if (nhop_try_ref_object(rnd->rnd_nhop) == 0) 704 error = EAGAIN; 705 } else { 706 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, 0, &error); 707 if (mp_priv != NULL) 708 rnd->rnd_nhgrp = mp_priv->nhg; 709 rnd->rnd_weight = 0; 710 } 711 712 if (pnhops != (struct weightened_nhop *)&storage[0]) 713 free(pnhops, M_TEMP); 714 715 return (error); 716 } 717 718 /* 719 * Creates new multipath group based on existing group/nhop in @rnd_orig and 720 * to-be-added nhop @wn_add. 721 * Returns 0 on success and stores result in @rnd_new. 722 */ 723 int 724 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig, 725 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new) 726 { 727 struct nh_control *ctl = rh->nh_control; 728 struct nhgrp_priv *nhg_priv; 729 struct weightened_nhop wn[2] = {}; 730 int error; 731 732 if (rnd_orig->rnd_nhop == NULL) { 733 /* No paths to add to, just reference current nhop */ 734 *rnd_new = *rnd_add; 735 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0) 736 return (EAGAIN); 737 return (0); 738 } 739 740 wn[0].nh = rnd_add->rnd_nhop; 741 wn[0].weight = rnd_add->rnd_weight; 742 743 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) { 744 /* Simple merge of 2 non-multipath nexthops */ 745 wn[1].nh = rnd_orig->rnd_nhop; 746 wn[1].weight = rnd_orig->rnd_weight; 747 nhg_priv = get_nhgrp(ctl, wn, 2, 0, &error); 748 } else { 749 /* Get new nhop group with @rt->rt_nhop as an additional nhop */ 750 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1, 751 &error); 752 } 753 754 if (nhg_priv == NULL) 755 return (error); 756 rnd_new->rnd_nhgrp = nhg_priv->nhg; 757 rnd_new->rnd_weight = 0; 758 759 return (0); 760 } 761 762 /* 763 * Returns pointer to array of nexthops with weights for 764 * given @nhg. Stores number of items in the array into @pnum_nhops. 765 */ 766 const struct weightened_nhop * 767 nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops) 768 { 769 const struct nhgrp_priv *nhg_priv; 770 771 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 772 773 nhg_priv = NHGRP_PRIV_CONST(nhg); 774 *pnum_nhops = nhg_priv->nhg_nh_count; 775 776 return (nhg_priv->nhg_nh_weights); 777 } 778 779 void 780 nhgrp_set_uidx(struct nhgrp_object *nhg, uint32_t uidx) 781 { 782 struct nhgrp_priv *nhg_priv; 783 784 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 785 786 nhg_priv = NHGRP_PRIV(nhg); 787 788 nhg_priv->nhg_uidx = uidx; 789 } 790 791 uint32_t 792 nhgrp_get_uidx(const struct nhgrp_object *nhg) 793 { 794 const struct nhgrp_priv *nhg_priv; 795 796 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 797 798 nhg_priv = NHGRP_PRIV_CONST(nhg); 799 return (nhg_priv->nhg_uidx); 800 } 801 802 /* 803 * Prints nexhop group @nhg data in the provided @buf. 804 * Example: nhg#33/sz=3:[#1:100,#2:100,#3:100] 805 * Example: nhg#33/sz=5:[#1:100,#2:100,..] 806 */ 807 char * 808 nhgrp_print_buf(const struct nhgrp_object *nhg, char *buf, size_t bufsize) 809 { 810 const struct nhgrp_priv *nhg_priv = NHGRP_PRIV_CONST(nhg); 811 812 int off = snprintf(buf, bufsize, "nhg#%u/sz=%u:[", nhg_priv->nhg_idx, 813 nhg_priv->nhg_nh_count); 814 815 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 816 const struct weightened_nhop *wn = &nhg_priv->nhg_nh_weights[i]; 817 int len = snprintf(&buf[off], bufsize - off, "#%u:%u,", 818 wn->nh->nh_priv->nh_idx, wn->weight); 819 if (len + off + 3 >= bufsize) { 820 int len = snprintf(&buf[off], bufsize - off, "..."); 821 off += len; 822 break; 823 } 824 off += len; 825 } 826 if (off > 0) 827 off--; // remove last "," 828 if (off + 1 < bufsize) 829 snprintf(&buf[off], bufsize - off, "]"); 830 return buf; 831 } 832 833 __noinline static int 834 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv, 835 char *buffer, size_t buffer_size, struct sysctl_req *w) 836 { 837 struct rt_msghdr *rtm; 838 struct nhgrp_external *nhge; 839 struct nhgrp_container *nhgc; 840 const struct nhgrp_object *nhg; 841 struct nhgrp_nhop_external *ext; 842 int error; 843 size_t sz; 844 845 nhg = nhg_priv->nhg; 846 847 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 848 /* controlplane nexthops */ 849 sz += sizeof(struct nhgrp_container); 850 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 851 /* dataplane nexthops */ 852 sz += sizeof(struct nhgrp_container); 853 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 854 855 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size")); 856 857 bzero(buffer, sz); 858 859 rtm = (struct rt_msghdr *)buffer; 860 rtm->rtm_msglen = sz; 861 rtm->rtm_version = RTM_VERSION; 862 rtm->rtm_type = RTM_GET; 863 864 nhge = (struct nhgrp_external *)(rtm + 1); 865 866 nhge->nhg_idx = nhg_priv->nhg_idx; 867 nhge->nhg_refcount = nhg_priv->nhg_refcount; 868 869 /* fill in control plane nexthops firs */ 870 nhgc = (struct nhgrp_container *)(nhge + 1); 871 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS; 872 nhgc->nhgc_subtype = 0; 873 nhgc->nhgc_len = sizeof(struct nhgrp_container); 874 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 875 nhgc->nhgc_count = nhg_priv->nhg_nh_count; 876 877 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 878 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 879 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx; 880 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight; 881 } 882 883 /* fill in dataplane nexthops */ 884 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]); 885 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS; 886 nhgc->nhgc_subtype = 0; 887 nhgc->nhgc_len = sizeof(struct nhgrp_container); 888 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 889 nhgc->nhgc_count = nhg->nhg_size; 890 891 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 892 for (int i = 0; i < nhg->nhg_size; i++) { 893 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx; 894 ext[i].nh_weight = 0; 895 } 896 897 error = SYSCTL_OUT(w, buffer, sz); 898 899 return (error); 900 } 901 902 uint32_t 903 nhgrp_get_idx(const struct nhgrp_object *nhg) 904 { 905 const struct nhgrp_priv *nhg_priv; 906 907 nhg_priv = NHGRP_PRIV_CONST(nhg); 908 return (nhg_priv->nhg_idx); 909 } 910 911 uint8_t 912 nhgrp_get_origin(const struct nhgrp_object *nhg) 913 { 914 return (NHGRP_PRIV_CONST(nhg)->nhg_origin); 915 } 916 917 void 918 nhgrp_set_origin(struct nhgrp_object *nhg, uint8_t origin) 919 { 920 NHGRP_PRIV(nhg)->nhg_origin = origin; 921 } 922 923 uint32_t 924 nhgrp_get_count(struct rib_head *rh) 925 { 926 struct nh_control *ctl; 927 uint32_t count; 928 929 ctl = rh->nh_control; 930 931 NHOPS_RLOCK(ctl); 932 count = ctl->gr_head.items_count; 933 NHOPS_RUNLOCK(ctl); 934 935 return (count); 936 } 937 938 int 939 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w) 940 { 941 struct nh_control *ctl = rh->nh_control; 942 struct epoch_tracker et; 943 struct nhgrp_priv *nhg_priv; 944 char *buffer; 945 size_t sz; 946 int error = 0; 947 948 if (ctl->gr_head.items_count == 0) 949 return (0); 950 951 /* Calculate the maximum nhop group size in bytes */ 952 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 953 sz += 2 * sizeof(struct nhgrp_container); 954 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH; 955 buffer = malloc(sz, M_TEMP, M_NOWAIT); 956 if (buffer == NULL) 957 return (ENOMEM); 958 959 NET_EPOCH_ENTER(et); 960 NHOPS_RLOCK(ctl); 961 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 962 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w); 963 if (error != 0) 964 break; 965 } CHT_SLIST_FOREACH_END; 966 NHOPS_RUNLOCK(ctl); 967 NET_EPOCH_EXIT(et); 968 969 free(buffer, M_TEMP); 970 971 return (error); 972 } 973