1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 #include "opt_inet.h" 30 #include "opt_route.h" 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/lock.h> 36 #include <sys/rmlock.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/refcount.h> 40 #include <sys/socket.h> 41 #include <sys/sysctl.h> 42 #include <sys/kernel.h> 43 #include <sys/epoch.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/if_private.h> 48 #include <net/route.h> 49 #include <net/route/route_ctl.h> 50 #include <net/route/route_var.h> 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_var.h> 55 #include <netinet/in_fib.h> 56 57 #include <net/route/nhop_utils.h> 58 #include <net/route/nhop.h> 59 #include <net/route/nhop_var.h> 60 #include <net/route/nhgrp_var.h> 61 62 #define DEBUG_MOD_NAME nhgrp_ctl 63 #define DEBUG_MAX_LEVEL LOG_DEBUG 64 #include <net/route/route_debug.h> 65 _DECLARE_DEBUG(LOG_INFO); 66 67 /* 68 * This file contains the supporting functions for creating multipath groups 69 * and compiling their dataplane parts. 70 */ 71 72 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */ 73 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH, 74 "MPF_MULTIPATH must be the same as NHF_MULTIPATH"); 75 /* Offset and size of flags field has to be the same for nhop/nhop groups */ 76 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags); 77 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */ 78 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64); 79 80 static int wn_cmp_idx(const void *a, const void *b); 81 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops); 82 83 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl, 84 struct weightened_nhop *wn, int num_nhops, uint32_t uidx, int *perror); 85 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv); 86 static void destroy_nhgrp_epoch(epoch_context_t ctx); 87 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv); 88 89 static int 90 wn_cmp_idx(const void *a, const void *b) 91 { 92 const struct weightened_nhop *w_a = a; 93 const struct weightened_nhop *w_b = b; 94 uint32_t a_idx = w_a->nh->nh_priv->nh_idx; 95 uint32_t b_idx = w_b->nh->nh_priv->nh_idx; 96 97 if (a_idx < b_idx) 98 return (-1); 99 else if (a_idx > b_idx) 100 return (1); 101 else 102 return (0); 103 } 104 105 /* 106 * Perform in-place sorting for array of nexthops in @wn. 107 * Sort by nexthop index ascending. 108 */ 109 static void 110 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops) 111 { 112 113 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp_idx); 114 } 115 116 /* 117 * In order to determine the minimum weight difference in the array 118 * of weights, create a sorted array of weights, using spare "storage" 119 * field in the `struct weightened_nhop`. 120 * Assume weights to be (mostly) the same and use insertion sort to 121 * make it sorted. 122 */ 123 static void 124 sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items) 125 { 126 wn[0].storage = wn[0].weight; 127 for (int i = 1, j = 0; i < num_items; i++) { 128 uint32_t weight = wn[i].weight; // read from 'weight' as it's not reordered 129 /* Move all weights > weight 1 position right */ 130 for (j = i - 1; j >= 0 && wn[j].storage > weight; j--) 131 wn[j + 1].storage = wn[j].storage; 132 wn[j + 1].storage = weight; 133 } 134 } 135 136 /* 137 * Calculate minimum number of slots required to fit the existing 138 * set of weights in the common use case where weights are "easily" 139 * comparable. 140 * Assumes @wn is sorted by weight ascending and each weight is > 0. 141 * Returns number of slots or 0 if precise calculation failed. 142 * 143 * Some examples: 144 * note: (i, X) pair means (nhop=i, weight=X): 145 * (1, 1) (2, 2) -> 3 slots [1, 2, 2] 146 * (1, 100), (2, 200) -> 3 slots [1, 2, 2] 147 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3] 148 */ 149 static uint32_t 150 calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items, 151 uint64_t *ptotal) 152 { 153 uint32_t i, last, xmin; 154 uint64_t total = 0; 155 156 // Get sorted array of weights in .storage field 157 sort_weightened_nhops_weights(wn, num_items); 158 159 last = 0; 160 xmin = wn[0].storage; 161 for (i = 0; i < num_items; i++) { 162 total += wn[i].storage; 163 if ((wn[i].storage != last) && 164 ((wn[i].storage - last < xmin) || xmin == 0)) { 165 xmin = wn[i].storage - last; 166 } 167 last = wn[i].storage; 168 } 169 *ptotal = total; 170 /* xmin is the minimum unit of desired capacity */ 171 if ((total % xmin) != 0) 172 return (0); 173 for (i = 0; i < num_items; i++) { 174 if ((wn[i].weight % xmin) != 0) 175 return (0); 176 } 177 178 return ((uint32_t)(total / xmin)); 179 } 180 181 /* 182 * Calculate minimum number of slots required to fit the existing 183 * set of weights while maintaining weight coefficients. 184 * 185 * Assume @wn is sorted by weight ascending and each weight is > 0. 186 * 187 * Tries to find simple precise solution first and falls back to 188 * RIB_MAX_MPATH_WIDTH in case of any failure. 189 */ 190 static uint32_t 191 calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items) 192 { 193 uint32_t v; 194 uint64_t total; 195 196 v = calc_min_mpath_slots_fast(wn, num_items, &total); 197 if (total == 0) 198 return (0); 199 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH)) 200 v = RIB_MAX_MPATH_WIDTH; 201 202 return (v); 203 } 204 205 /* 206 * Nexthop group data consists of 207 * 1) dataplane part, with nhgrp_object as a header followed by an 208 * arbitrary number of nexthop pointers. 209 * 2) control plane part, with nhgrp_priv as a header, followed by 210 * an arbirtrary number of 'struct weightened_nhop' object. 211 * 212 * Given nexthop groups are (mostly) immutable, allocate all data 213 * in one go. 214 * 215 */ 216 __noinline static size_t 217 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) 218 { 219 size_t sz; 220 221 sz = sizeof(struct nhgrp_object); 222 sz += nhg_size * sizeof(struct nhop_object *); 223 sz += sizeof(struct nhgrp_priv); 224 sz += num_nhops * sizeof(struct weightened_nhop); 225 return (sz); 226 } 227 228 /* 229 * Compile actual list of nexthops to be used by datapath from 230 * the nexthop group @dst. 231 * 232 * For example, compiling control plane list of 2 nexthops 233 * [(200, A), (100, B)] would result in the datapath array 234 * [A, A, B] 235 */ 236 static void 237 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, 238 uint32_t num_slots) 239 { 240 struct nhgrp_object *dst; 241 int i, slot_idx, remaining_slots; 242 uint64_t remaining_sum, nh_weight, nh_slots; 243 244 slot_idx = 0; 245 dst = dst_priv->nhg; 246 /* Calculate sum of all weights */ 247 remaining_sum = 0; 248 for (i = 0; i < dst_priv->nhg_nh_count; i++) 249 remaining_sum += x[i].weight; 250 remaining_slots = num_slots; 251 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d", 252 remaining_sum, remaining_slots); 253 for (i = 0; i < dst_priv->nhg_nh_count; i++) { 254 /* Calculate number of slots for the current nexthop */ 255 if (remaining_sum > 0) { 256 nh_weight = (uint64_t)x[i].weight; 257 nh_slots = (nh_weight * remaining_slots / remaining_sum); 258 } else 259 nh_slots = 0; 260 261 remaining_sum -= x[i].weight; 262 remaining_slots -= nh_slots; 263 264 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, 265 " rem_sum: %lu, rem_slots: %d nh_slots: %d, slot_idx: %d", 266 remaining_sum, remaining_slots, (int)nh_slots, slot_idx); 267 268 KASSERT((slot_idx + nh_slots <= num_slots), 269 ("index overflow during nhg compilation")); 270 while (nh_slots-- > 0) 271 dst->nhops[slot_idx++] = x[i].nh; 272 } 273 } 274 275 /* 276 * Allocates new nexthop group for the list of weightened nexthops. 277 * Assume sorted list. 278 * Does NOT reference any nexthops in the group. 279 * Returns group with refcount=1 or NULL. 280 */ 281 static struct nhgrp_priv * 282 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) 283 { 284 uint32_t nhgrp_size; 285 struct nhgrp_object *nhg; 286 struct nhgrp_priv *nhg_priv; 287 288 nhgrp_size = calc_min_mpath_slots(wn, num_nhops); 289 if (nhgrp_size == 0) { 290 /* Zero weights, abort */ 291 return (NULL); 292 } 293 294 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops); 295 nhg = malloc(sz, M_NHOP, M_NOWAIT | M_ZERO); 296 if (nhg == NULL) { 297 FIB_NH_LOG(LOG_INFO, wn[0].nh, 298 "unable to allocate group with num_nhops %d (compiled %u)", 299 num_nhops, nhgrp_size); 300 return (NULL); 301 } 302 303 /* Has to be the first to make NHGRP_PRIV() work */ 304 nhg->nhg_size = nhgrp_size; 305 nhg->nhg_flags = MPF_MULTIPATH; 306 307 nhg_priv = NHGRP_PRIV(nhg); 308 nhg_priv->nhg_nh_count = num_nhops; 309 refcount_init(&nhg_priv->nhg_refcount, 1); 310 311 /* Please see nhgrp_free() comments on the initial value */ 312 refcount_init(&nhg_priv->nhg_linked, 2); 313 314 nhg_priv->nhg = nhg; 315 memcpy(&nhg_priv->nhg_nh_weights[0], wn, 316 num_nhops * sizeof(struct weightened_nhop)); 317 318 FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u", 319 num_nhops, nhgrp_size); 320 321 compile_nhgrp(nhg_priv, wn, nhg->nhg_size); 322 323 return (nhg_priv); 324 } 325 326 void 327 nhgrp_ref_object(struct nhgrp_object *nhg) 328 { 329 struct nhgrp_priv *nhg_priv; 330 u_int old __diagused; 331 332 nhg_priv = NHGRP_PRIV(nhg); 333 old = refcount_acquire(&nhg_priv->nhg_refcount); 334 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg)); 335 } 336 337 void 338 nhgrp_free(struct nhgrp_object *nhg) 339 { 340 struct nhgrp_priv *nhg_priv; 341 struct nh_control *ctl; 342 struct epoch_tracker et; 343 344 nhg_priv = NHGRP_PRIV(nhg); 345 346 if (!refcount_release(&nhg_priv->nhg_refcount)) 347 return; 348 349 /* 350 * group objects don't have an explicit lock attached to it. 351 * As groups are reclaimed based on reference count, it is possible 352 * that some groups will persist after vnet destruction callback 353 * called. Given that, handle scenario with nhgrp_free_group() being 354 * called either after or simultaneously with nhgrp_ctl_unlink_all() 355 * by using another reference counter: nhg_linked. 356 * 357 * There are only 2 places, where nhg_linked can be decreased: 358 * rib destroy (nhgrp_ctl_unlink_all) and this function. 359 * nhg_link can never be increased. 360 * 361 * Hence, use initial value of 2 to make use of 362 * refcount_release_if_not_last(). 363 * 364 * There can be two scenarious when calling this function: 365 * 366 * 1) nhg_linked value is 2. This means that either 367 * nhgrp_ctl_unlink_all() has not been called OR it is running, 368 * but we are guaranteed that nh_control won't be freed in 369 * this epoch. Hence, nexthop can be safely unlinked. 370 * 371 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all() 372 * has been called and nhgrp unlink can be skipped. 373 */ 374 375 NET_EPOCH_ENTER(et); 376 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) { 377 ctl = nhg_priv->nh_control; 378 if (unlink_nhgrp(ctl, nhg_priv) == NULL) { 379 /* Do not try to reclaim */ 380 RT_LOG(LOG_INFO, "Failed to unlink nexhop group %p", 381 nhg_priv); 382 NET_EPOCH_EXIT(et); 383 return; 384 } 385 } 386 NET_EPOCH_EXIT(et); 387 388 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0")); 389 NET_EPOCH_CALL(destroy_nhgrp_epoch, &nhg_priv->nhg_epoch_ctx); 390 } 391 392 /* 393 * Destroys all local resources belonging to @nhg_priv. 394 */ 395 __noinline static void 396 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv) 397 { 398 399 free(nhg_priv->nhg, M_NHOP); 400 } 401 402 __noinline static void 403 destroy_nhgrp(struct nhgrp_priv *nhg_priv) 404 { 405 406 KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0")); 407 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0")); 408 409 IF_DEBUG_LEVEL(LOG_DEBUG2) { 410 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused; 411 FIB_NH_LOG(LOG_DEBUG2, nhg_priv->nhg_nh_weights[0].nh, 412 "destroying %s", nhgrp_print_buf(nhg_priv->nhg, 413 nhgbuf, sizeof(nhgbuf))); 414 } 415 416 free_nhgrp_nhops(nhg_priv); 417 destroy_nhgrp_int(nhg_priv); 418 } 419 420 /* 421 * Epoch callback indicating group is safe to destroy 422 */ 423 static void 424 destroy_nhgrp_epoch(epoch_context_t ctx) 425 { 426 struct nhgrp_priv *nhg_priv; 427 428 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx); 429 430 destroy_nhgrp(nhg_priv); 431 } 432 433 static bool 434 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 435 { 436 437 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 438 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0) 439 continue; 440 441 /* 442 * Failed to ref the nexthop, b/c it's deleted. 443 * Need to rollback references back. 444 */ 445 for (int j = 0; j < i; j++) 446 nhop_free(nhg_priv->nhg_nh_weights[j].nh); 447 return (false); 448 } 449 450 return (true); 451 } 452 453 static void 454 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 455 { 456 457 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) 458 nhop_free(nhg_priv->nhg_nh_weights[i].nh); 459 } 460 461 /* 462 * Allocate nexthop group of size @num_nhops with nexthops specified by 463 * @wn. Nexthops have to be unique and match the fibnum/family of the group. 464 * Returns unlinked nhgrp object on success or NULL and non-zero perror. 465 */ 466 struct nhgrp_object * 467 nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nhops, 468 int *perror) 469 { 470 struct rib_head *rh = rt_tables_get_rnh(fibnum, family); 471 struct nhgrp_priv *nhg_priv; 472 struct nh_control *ctl; 473 474 if (rh == NULL) { 475 *perror = E2BIG; 476 return (NULL); 477 } 478 479 ctl = rh->nh_control; 480 481 if (num_nhops > RIB_MAX_MPATH_WIDTH) { 482 *perror = E2BIG; 483 return (NULL); 484 } 485 486 if (ctl->gr_head.hash_size == 0) { 487 /* First multipath request. Bootstrap mpath datastructures. */ 488 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) { 489 *perror = ENOMEM; 490 return (NULL); 491 } 492 } 493 494 /* Sort nexthops & check there are no duplicates */ 495 sort_weightened_nhops(wn, num_nhops); 496 uint32_t last_id = 0; 497 for (int i = 0; i < num_nhops; i++) { 498 if (wn[i].nh->nh_priv->nh_control != ctl) { 499 *perror = EINVAL; 500 return (NULL); 501 } 502 if (wn[i].nh->nh_priv->nh_idx == last_id) { 503 *perror = EEXIST; 504 return (NULL); 505 } 506 last_id = wn[i].nh->nh_priv->nh_idx; 507 } 508 509 if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) { 510 *perror = ENOMEM; 511 return (NULL); 512 } 513 nhg_priv->nh_control = ctl; 514 515 *perror = 0; 516 return (nhg_priv->nhg); 517 } 518 519 /* 520 * Finds an existing group matching @nhg or links @nhg to the tree. 521 * Returns the referenced group or NULL and non-zero @perror. 522 */ 523 struct nhgrp_object * 524 nhgrp_get_nhgrp(struct nhgrp_object *nhg, int *perror) 525 { 526 struct nhgrp_priv *nhg_priv, *key = NHGRP_PRIV(nhg); 527 struct nh_control *ctl = key->nh_control; 528 529 nhg_priv = find_nhgrp(ctl, key); 530 if (nhg_priv != NULL) { 531 /* 532 * Free originally-created group. As it hasn't been linked 533 * and the dependent nexhops haven't been referenced, just free 534 * the group. 535 */ 536 destroy_nhgrp_int(key); 537 *perror = 0; 538 return (nhg_priv->nhg); 539 } else { 540 /* No existing group, try to link the new one */ 541 if (!ref_nhgrp_nhops(key)) { 542 /* 543 * Some of the nexthops have been scheduled for deletion. 544 * As the group hasn't been linked / no nexhops have been 545 * referenced, call the final destructor immediately. 546 */ 547 destroy_nhgrp_int(key); 548 *perror = EAGAIN; 549 return (NULL); 550 } 551 if (link_nhgrp(ctl, key) == 0) { 552 /* Unable to allocate index? */ 553 *perror = EAGAIN; 554 free_nhgrp_nhops(key); 555 destroy_nhgrp_int(key); 556 return (NULL); 557 } 558 *perror = 0; 559 return (nhg); 560 } 561 562 /* NOTREACHED */ 563 } 564 565 /* 566 * Creates or looks up an existing nexthop group based on @wn and @num_nhops. 567 * 568 * Returns referenced nhop group or NULL, passing error code in @perror. 569 */ 570 struct nhgrp_priv * 571 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops, 572 uint32_t uidx, int *perror) 573 { 574 struct nhgrp_object *nhg; 575 576 nhg = nhgrp_alloc(ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family, 577 wn, num_nhops, perror); 578 if (nhg == NULL) 579 return (NULL); 580 nhgrp_set_uidx(nhg, uidx); 581 nhg = nhgrp_get_nhgrp(nhg, perror); 582 if (nhg != NULL) 583 return (NHGRP_PRIV(nhg)); 584 return (NULL); 585 } 586 587 588 /* 589 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig. 590 * 591 * Returns referenced nexthop group or NULL. In the latter case, @perror is 592 * filled with an error code. 593 * Note that function does NOT care if the next nexthops already exists 594 * in the @gr_orig. As a result, they will be added, resulting in the 595 * same nexthop being present multiple times in the new group. 596 */ 597 static struct nhgrp_priv * 598 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig, 599 struct weightened_nhop *wn, int num_nhops, int *perror) 600 { 601 char storage[64]; 602 struct weightened_nhop *pnhops; 603 struct nhgrp_priv *nhg_priv; 604 const struct nhgrp_priv *src_priv; 605 size_t sz; 606 int curr_nhops; 607 608 src_priv = NHGRP_PRIV_CONST(gr_orig); 609 curr_nhops = src_priv->nhg_nh_count; 610 611 *perror = 0; 612 613 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop)); 614 /* optimize for <= 4 paths, each path=16 bytes */ 615 if (sz <= sizeof(storage)) 616 pnhops = (struct weightened_nhop *)&storage[0]; 617 else { 618 pnhops = malloc(sz, M_TEMP, M_NOWAIT); 619 if (pnhops == NULL) { 620 *perror = ENOMEM; 621 return (NULL); 622 } 623 } 624 625 /* Copy nhops from original group first */ 626 memcpy(pnhops, src_priv->nhg_nh_weights, 627 curr_nhops * sizeof(struct weightened_nhop)); 628 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop)); 629 curr_nhops += num_nhops; 630 631 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, 0, perror); 632 633 if (pnhops != (struct weightened_nhop *)&storage[0]) 634 free(pnhops, M_TEMP); 635 636 if (nhg_priv == NULL) 637 return (NULL); 638 639 return (nhg_priv); 640 } 641 642 643 /* 644 * Creates/finds nexthop group based on @wn and @num_nhops. 645 * Returns 0 on success with referenced group in @rnd, or 646 * errno. 647 * 648 * If the error is EAGAIN, then the operation can be retried. 649 */ 650 int 651 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops, 652 uint32_t uidx, struct nhgrp_object **pnhg) 653 { 654 struct nh_control *ctl = rh->nh_control; 655 struct nhgrp_priv *nhg_priv; 656 int error; 657 658 nhg_priv = get_nhgrp(ctl, wn, num_nhops, uidx, &error); 659 if (nhg_priv != NULL) 660 *pnhg = nhg_priv->nhg; 661 662 return (error); 663 } 664 665 /* 666 * Creates new nexthop group based on @src group without the nexthops 667 * chosen by @flt_func. 668 * Returns 0 on success, storring the reference nhop group/object in @rnd. 669 */ 670 int 671 nhgrp_get_filtered_group(struct rib_head *rh, const struct rtentry *rt, 672 const struct nhgrp_object *src, rib_filter_f_t flt_func, void *flt_data, 673 struct route_nhop_data *rnd) 674 { 675 char storage[64]; 676 struct nh_control *ctl = rh->nh_control; 677 struct weightened_nhop *pnhops; 678 const struct nhgrp_priv *mp_priv, *src_priv; 679 size_t sz; 680 int error, i, num_nhops; 681 682 src_priv = NHGRP_PRIV_CONST(src); 683 684 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop)); 685 /* optimize for <= 4 paths, each path=16 bytes */ 686 if (sz <= sizeof(storage)) 687 pnhops = (struct weightened_nhop *)&storage[0]; 688 else { 689 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL) 690 return (ENOMEM); 691 } 692 693 /* Filter nexthops */ 694 error = 0; 695 num_nhops = 0; 696 for (i = 0; i < src_priv->nhg_nh_count; i++) { 697 if (flt_func(rt, src_priv->nhg_nh_weights[i].nh, flt_data)) 698 continue; 699 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i], 700 sizeof(struct weightened_nhop)); 701 } 702 703 if (num_nhops == 0) { 704 rnd->rnd_nhgrp = NULL; 705 rnd->rnd_weight = 0; 706 } else if (num_nhops == 1) { 707 rnd->rnd_nhop = pnhops[0].nh; 708 rnd->rnd_weight = pnhops[0].weight; 709 if (nhop_try_ref_object(rnd->rnd_nhop) == 0) 710 error = EAGAIN; 711 } else { 712 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, 0, &error); 713 if (mp_priv != NULL) 714 rnd->rnd_nhgrp = mp_priv->nhg; 715 rnd->rnd_weight = 0; 716 } 717 718 if (pnhops != (struct weightened_nhop *)&storage[0]) 719 free(pnhops, M_TEMP); 720 721 return (error); 722 } 723 724 /* 725 * Creates new multipath group based on existing group/nhop in @rnd_orig and 726 * to-be-added nhop @wn_add. 727 * Returns 0 on success and stores result in @rnd_new. 728 */ 729 int 730 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig, 731 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new) 732 { 733 struct nh_control *ctl = rh->nh_control; 734 struct nhgrp_priv *nhg_priv; 735 struct weightened_nhop wn[2] = {}; 736 int error; 737 738 if (rnd_orig->rnd_nhop == NULL) { 739 /* No paths to add to, just reference current nhop */ 740 *rnd_new = *rnd_add; 741 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0) 742 return (EAGAIN); 743 return (0); 744 } 745 746 wn[0].nh = rnd_add->rnd_nhop; 747 wn[0].weight = rnd_add->rnd_weight; 748 749 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) { 750 /* Simple merge of 2 non-multipath nexthops */ 751 wn[1].nh = rnd_orig->rnd_nhop; 752 wn[1].weight = rnd_orig->rnd_weight; 753 nhg_priv = get_nhgrp(ctl, wn, 2, 0, &error); 754 } else { 755 /* Get new nhop group with @rt->rt_nhop as an additional nhop */ 756 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1, 757 &error); 758 } 759 760 if (nhg_priv == NULL) 761 return (error); 762 rnd_new->rnd_nhgrp = nhg_priv->nhg; 763 rnd_new->rnd_weight = 0; 764 765 return (0); 766 } 767 768 /* 769 * Returns pointer to array of nexthops with weights for 770 * given @nhg. Stores number of items in the array into @pnum_nhops. 771 */ 772 const struct weightened_nhop * 773 nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops) 774 { 775 const struct nhgrp_priv *nhg_priv; 776 777 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 778 779 nhg_priv = NHGRP_PRIV_CONST(nhg); 780 *pnum_nhops = nhg_priv->nhg_nh_count; 781 782 return (nhg_priv->nhg_nh_weights); 783 } 784 785 void 786 nhgrp_set_uidx(struct nhgrp_object *nhg, uint32_t uidx) 787 { 788 struct nhgrp_priv *nhg_priv; 789 790 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 791 792 nhg_priv = NHGRP_PRIV(nhg); 793 794 nhg_priv->nhg_uidx = uidx; 795 } 796 797 uint32_t 798 nhgrp_get_uidx(const struct nhgrp_object *nhg) 799 { 800 const struct nhgrp_priv *nhg_priv; 801 802 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 803 804 nhg_priv = NHGRP_PRIV_CONST(nhg); 805 return (nhg_priv->nhg_uidx); 806 } 807 808 /* 809 * Prints nexhop group @nhg data in the provided @buf. 810 * Example: nhg#33/sz=3:[#1:100,#2:100,#3:100] 811 * Example: nhg#33/sz=5:[#1:100,#2:100,..] 812 */ 813 char * 814 nhgrp_print_buf(const struct nhgrp_object *nhg, char *buf, size_t bufsize) 815 { 816 const struct nhgrp_priv *nhg_priv = NHGRP_PRIV_CONST(nhg); 817 818 int off = snprintf(buf, bufsize, "nhg#%u/sz=%u:[", nhg_priv->nhg_idx, 819 nhg_priv->nhg_nh_count); 820 821 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 822 const struct weightened_nhop *wn = &nhg_priv->nhg_nh_weights[i]; 823 int len = snprintf(&buf[off], bufsize - off, "#%u:%u,", 824 wn->nh->nh_priv->nh_idx, wn->weight); 825 if (len + off + 3 >= bufsize) { 826 int len = snprintf(&buf[off], bufsize - off, "..."); 827 off += len; 828 break; 829 } 830 off += len; 831 } 832 if (off > 0) 833 off--; // remove last "," 834 if (off + 1 < bufsize) 835 snprintf(&buf[off], bufsize - off, "]"); 836 return buf; 837 } 838 839 __noinline static int 840 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv, 841 char *buffer, size_t buffer_size, struct sysctl_req *w) 842 { 843 struct rt_msghdr *rtm; 844 struct nhgrp_external *nhge; 845 struct nhgrp_container *nhgc; 846 const struct nhgrp_object *nhg; 847 struct nhgrp_nhop_external *ext; 848 int error; 849 size_t sz; 850 851 nhg = nhg_priv->nhg; 852 853 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 854 /* controlplane nexthops */ 855 sz += sizeof(struct nhgrp_container); 856 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 857 /* dataplane nexthops */ 858 sz += sizeof(struct nhgrp_container); 859 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 860 861 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size")); 862 863 bzero(buffer, sz); 864 865 rtm = (struct rt_msghdr *)buffer; 866 rtm->rtm_msglen = sz; 867 rtm->rtm_version = RTM_VERSION; 868 rtm->rtm_type = RTM_GET; 869 870 nhge = (struct nhgrp_external *)(rtm + 1); 871 872 nhge->nhg_idx = nhg_priv->nhg_idx; 873 nhge->nhg_refcount = nhg_priv->nhg_refcount; 874 875 /* fill in control plane nexthops firs */ 876 nhgc = (struct nhgrp_container *)(nhge + 1); 877 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS; 878 nhgc->nhgc_subtype = 0; 879 nhgc->nhgc_len = sizeof(struct nhgrp_container); 880 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 881 nhgc->nhgc_count = nhg_priv->nhg_nh_count; 882 883 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 884 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 885 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx; 886 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight; 887 } 888 889 /* fill in dataplane nexthops */ 890 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]); 891 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS; 892 nhgc->nhgc_subtype = 0; 893 nhgc->nhgc_len = sizeof(struct nhgrp_container); 894 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 895 nhgc->nhgc_count = nhg->nhg_size; 896 897 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 898 for (int i = 0; i < nhg->nhg_size; i++) { 899 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx; 900 ext[i].nh_weight = 0; 901 } 902 903 error = SYSCTL_OUT(w, buffer, sz); 904 905 return (error); 906 } 907 908 uint32_t 909 nhgrp_get_idx(const struct nhgrp_object *nhg) 910 { 911 const struct nhgrp_priv *nhg_priv; 912 913 nhg_priv = NHGRP_PRIV_CONST(nhg); 914 return (nhg_priv->nhg_idx); 915 } 916 917 uint8_t 918 nhgrp_get_origin(const struct nhgrp_object *nhg) 919 { 920 return (NHGRP_PRIV_CONST(nhg)->nhg_origin); 921 } 922 923 void 924 nhgrp_set_origin(struct nhgrp_object *nhg, uint8_t origin) 925 { 926 NHGRP_PRIV(nhg)->nhg_origin = origin; 927 } 928 929 uint32_t 930 nhgrp_get_count(struct rib_head *rh) 931 { 932 struct nh_control *ctl; 933 uint32_t count; 934 935 ctl = rh->nh_control; 936 937 NHOPS_RLOCK(ctl); 938 count = ctl->gr_head.items_count; 939 NHOPS_RUNLOCK(ctl); 940 941 return (count); 942 } 943 944 int 945 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w) 946 { 947 struct nh_control *ctl = rh->nh_control; 948 struct epoch_tracker et; 949 struct nhgrp_priv *nhg_priv; 950 char *buffer; 951 size_t sz; 952 int error = 0; 953 954 if (ctl->gr_head.items_count == 0) 955 return (0); 956 957 /* Calculate the maximum nhop group size in bytes */ 958 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 959 sz += 2 * sizeof(struct nhgrp_container); 960 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH; 961 buffer = malloc(sz, M_TEMP, M_NOWAIT); 962 if (buffer == NULL) 963 return (ENOMEM); 964 965 NET_EPOCH_ENTER(et); 966 NHOPS_RLOCK(ctl); 967 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 968 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w); 969 if (error != 0) 970 break; 971 } CHT_SLIST_FOREACH_END; 972 NHOPS_RUNLOCK(ctl); 973 NET_EPOCH_EXIT(et); 974 975 free(buffer, M_TEMP); 976 977 return (error); 978 } 979