1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 #include "opt_inet.h" 30 #include "opt_route.h" 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/lock.h> 36 #include <sys/rmlock.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/refcount.h> 40 #include <sys/socket.h> 41 #include <sys/sysctl.h> 42 #include <sys/kernel.h> 43 #include <sys/epoch.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/route.h> 48 #include <net/route/route_ctl.h> 49 #include <net/route/route_var.h> 50 #include <net/vnet.h> 51 52 #include <netinet/in.h> 53 #include <netinet/in_var.h> 54 #include <netinet/in_fib.h> 55 56 #include <net/route/nhop_utils.h> 57 #include <net/route/nhop.h> 58 #include <net/route/nhop_var.h> 59 #include <net/route/nhgrp_var.h> 60 61 #define DEBUG_MOD_NAME nhgrp_ctl 62 #define DEBUG_MAX_LEVEL LOG_DEBUG 63 #include <net/route/route_debug.h> 64 _DECLARE_DEBUG(LOG_INFO); 65 66 /* 67 * This file contains the supporting functions for creating multipath groups 68 * and compiling their dataplane parts. 69 */ 70 71 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */ 72 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH, 73 "MPF_MULTIPATH must be the same as NHF_MULTIPATH"); 74 /* Offset and size of flags field has to be the same for nhop/nhop groups */ 75 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags); 76 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */ 77 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64); 78 79 static int wn_cmp_idx(const void *a, const void *b); 80 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops); 81 82 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl, 83 struct weightened_nhop *wn, int num_nhops, uint32_t uidx, int *perror); 84 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv); 85 static void destroy_nhgrp_epoch(epoch_context_t ctx); 86 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv); 87 88 static int 89 wn_cmp_idx(const void *a, const void *b) 90 { 91 const struct weightened_nhop *w_a = a; 92 const struct weightened_nhop *w_b = b; 93 uint32_t a_idx = w_a->nh->nh_priv->nh_idx; 94 uint32_t b_idx = w_b->nh->nh_priv->nh_idx; 95 96 if (a_idx < b_idx) 97 return (-1); 98 else if (a_idx > b_idx) 99 return (1); 100 else 101 return (0); 102 } 103 104 /* 105 * Perform in-place sorting for array of nexthops in @wn. 106 * Sort by nexthop index ascending. 107 */ 108 static void 109 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops) 110 { 111 112 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp_idx); 113 } 114 115 /* 116 * In order to determine the minimum weight difference in the array 117 * of weights, create a sorted array of weights, using spare "storage" 118 * field in the `struct weightened_nhop`. 119 * Assume weights to be (mostly) the same and use insertion sort to 120 * make it sorted. 121 */ 122 static void 123 sort_weightened_nhops_weights(struct weightened_nhop *wn, int num_items) 124 { 125 wn[0].storage = wn[0].weight; 126 for (int i = 1, j = 0; i < num_items; i++) { 127 uint32_t weight = wn[i].weight; // read from 'weight' as it's not reordered 128 /* Move all weights > weight 1 position right */ 129 for (j = i - 1; j >= 0 && wn[j].storage > weight; j--) 130 wn[j + 1].storage = wn[j].storage; 131 wn[j + 1].storage = weight; 132 } 133 } 134 135 /* 136 * Calculate minimum number of slots required to fit the existing 137 * set of weights in the common use case where weights are "easily" 138 * comparable. 139 * Assumes @wn is sorted by weight ascending and each weight is > 0. 140 * Returns number of slots or 0 if precise calculation failed. 141 * 142 * Some examples: 143 * note: (i, X) pair means (nhop=i, weight=X): 144 * (1, 1) (2, 2) -> 3 slots [1, 2, 2] 145 * (1, 100), (2, 200) -> 3 slots [1, 2, 2] 146 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3] 147 */ 148 static uint32_t 149 calc_min_mpath_slots_fast(struct weightened_nhop *wn, size_t num_items, 150 uint64_t *ptotal) 151 { 152 uint32_t i, last, xmin; 153 uint64_t total = 0; 154 155 // Get sorted array of weights in .storage field 156 sort_weightened_nhops_weights(wn, num_items); 157 158 last = 0; 159 xmin = wn[0].storage; 160 for (i = 0; i < num_items; i++) { 161 total += wn[i].storage; 162 if ((wn[i].storage != last) && 163 ((wn[i].storage - last < xmin) || xmin == 0)) { 164 xmin = wn[i].storage - last; 165 } 166 last = wn[i].storage; 167 } 168 *ptotal = total; 169 /* xmin is the minimum unit of desired capacity */ 170 if ((total % xmin) != 0) 171 return (0); 172 for (i = 0; i < num_items; i++) { 173 if ((wn[i].weight % xmin) != 0) 174 return (0); 175 } 176 177 return ((uint32_t)(total / xmin)); 178 } 179 180 /* 181 * Calculate minimum number of slots required to fit the existing 182 * set of weights while maintaining weight coefficients. 183 * 184 * Assume @wn is sorted by weight ascending and each weight is > 0. 185 * 186 * Tries to find simple precise solution first and falls back to 187 * RIB_MAX_MPATH_WIDTH in case of any failure. 188 */ 189 static uint32_t 190 calc_min_mpath_slots(struct weightened_nhop *wn, size_t num_items) 191 { 192 uint32_t v; 193 uint64_t total; 194 195 v = calc_min_mpath_slots_fast(wn, num_items, &total); 196 if (total == 0) 197 return (0); 198 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH)) 199 v = RIB_MAX_MPATH_WIDTH; 200 201 return (v); 202 } 203 204 /* 205 * Nexthop group data consists of 206 * 1) dataplane part, with nhgrp_object as a header followed by an 207 * arbitrary number of nexthop pointers. 208 * 2) control plane part, with nhgrp_priv as a header, followed by 209 * an arbirtrary number of 'struct weightened_nhop' object. 210 * 211 * Given nexthop groups are (mostly) immutable, allocate all data 212 * in one go. 213 * 214 */ 215 __noinline static size_t 216 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) 217 { 218 size_t sz; 219 220 sz = sizeof(struct nhgrp_object); 221 sz += nhg_size * sizeof(struct nhop_object *); 222 sz += sizeof(struct nhgrp_priv); 223 sz += num_nhops * sizeof(struct weightened_nhop); 224 return (sz); 225 } 226 227 /* 228 * Compile actual list of nexthops to be used by datapath from 229 * the nexthop group @dst. 230 * 231 * For example, compiling control plane list of 2 nexthops 232 * [(200, A), (100, B)] would result in the datapath array 233 * [A, A, B] 234 */ 235 static void 236 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, 237 uint32_t num_slots) 238 { 239 struct nhgrp_object *dst; 240 int i, slot_idx, remaining_slots; 241 uint64_t remaining_sum, nh_weight, nh_slots; 242 243 slot_idx = 0; 244 dst = dst_priv->nhg; 245 /* Calculate sum of all weights */ 246 remaining_sum = 0; 247 for (i = 0; i < dst_priv->nhg_nh_count; i++) 248 remaining_sum += x[i].weight; 249 remaining_slots = num_slots; 250 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d", 251 remaining_sum, remaining_slots); 252 for (i = 0; i < dst_priv->nhg_nh_count; i++) { 253 /* Calculate number of slots for the current nexthop */ 254 if (remaining_sum > 0) { 255 nh_weight = (uint64_t)x[i].weight; 256 nh_slots = (nh_weight * remaining_slots / remaining_sum); 257 } else 258 nh_slots = 0; 259 260 remaining_sum -= x[i].weight; 261 remaining_slots -= nh_slots; 262 263 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, 264 " rem_sum: %lu, rem_slots: %d nh_slots: %d, slot_idx: %d", 265 remaining_sum, remaining_slots, (int)nh_slots, slot_idx); 266 267 KASSERT((slot_idx + nh_slots <= num_slots), 268 ("index overflow during nhg compilation")); 269 while (nh_slots-- > 0) 270 dst->nhops[slot_idx++] = x[i].nh; 271 } 272 } 273 274 /* 275 * Allocates new nexthop group for the list of weightened nexthops. 276 * Assume sorted list. 277 * Does NOT reference any nexthops in the group. 278 * Returns group with refcount=1 or NULL. 279 */ 280 static struct nhgrp_priv * 281 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) 282 { 283 uint32_t nhgrp_size; 284 struct nhgrp_object *nhg; 285 struct nhgrp_priv *nhg_priv; 286 287 nhgrp_size = calc_min_mpath_slots(wn, num_nhops); 288 if (nhgrp_size == 0) { 289 /* Zero weights, abort */ 290 return (NULL); 291 } 292 293 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops); 294 nhg = malloc(sz, M_NHOP, M_NOWAIT | M_ZERO); 295 if (nhg == NULL) { 296 FIB_NH_LOG(LOG_INFO, wn[0].nh, 297 "unable to allocate group with num_nhops %d (compiled %u)", 298 num_nhops, nhgrp_size); 299 return (NULL); 300 } 301 302 /* Has to be the first to make NHGRP_PRIV() work */ 303 nhg->nhg_size = nhgrp_size; 304 nhg->nhg_flags = MPF_MULTIPATH; 305 306 nhg_priv = NHGRP_PRIV(nhg); 307 nhg_priv->nhg_nh_count = num_nhops; 308 refcount_init(&nhg_priv->nhg_refcount, 1); 309 310 /* Please see nhgrp_free() comments on the initial value */ 311 refcount_init(&nhg_priv->nhg_linked, 2); 312 313 nhg_priv->nhg = nhg; 314 memcpy(&nhg_priv->nhg_nh_weights[0], wn, 315 num_nhops * sizeof(struct weightened_nhop)); 316 317 FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u", 318 num_nhops, nhgrp_size); 319 320 compile_nhgrp(nhg_priv, wn, nhg->nhg_size); 321 322 return (nhg_priv); 323 } 324 325 void 326 nhgrp_ref_object(struct nhgrp_object *nhg) 327 { 328 struct nhgrp_priv *nhg_priv; 329 u_int old __diagused; 330 331 nhg_priv = NHGRP_PRIV(nhg); 332 old = refcount_acquire(&nhg_priv->nhg_refcount); 333 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg)); 334 } 335 336 void 337 nhgrp_free(struct nhgrp_object *nhg) 338 { 339 struct nhgrp_priv *nhg_priv; 340 struct nh_control *ctl; 341 struct epoch_tracker et; 342 343 nhg_priv = NHGRP_PRIV(nhg); 344 345 if (!refcount_release(&nhg_priv->nhg_refcount)) 346 return; 347 348 /* 349 * group objects don't have an explicit lock attached to it. 350 * As groups are reclaimed based on reference count, it is possible 351 * that some groups will persist after vnet destruction callback 352 * called. Given that, handle scenario with nhgrp_free_group() being 353 * called either after or simultaneously with nhgrp_ctl_unlink_all() 354 * by using another reference counter: nhg_linked. 355 * 356 * There are only 2 places, where nhg_linked can be decreased: 357 * rib destroy (nhgrp_ctl_unlink_all) and this function. 358 * nhg_link can never be increased. 359 * 360 * Hence, use initial value of 2 to make use of 361 * refcount_release_if_not_last(). 362 * 363 * There can be two scenarious when calling this function: 364 * 365 * 1) nhg_linked value is 2. This means that either 366 * nhgrp_ctl_unlink_all() has not been called OR it is running, 367 * but we are guaranteed that nh_control won't be freed in 368 * this epoch. Hence, nexthop can be safely unlinked. 369 * 370 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all() 371 * has been called and nhgrp unlink can be skipped. 372 */ 373 374 NET_EPOCH_ENTER(et); 375 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) { 376 ctl = nhg_priv->nh_control; 377 if (unlink_nhgrp(ctl, nhg_priv) == NULL) { 378 /* Do not try to reclaim */ 379 RT_LOG(LOG_INFO, "Failed to unlink nexhop group %p", 380 nhg_priv); 381 NET_EPOCH_EXIT(et); 382 return; 383 } 384 } 385 NET_EPOCH_EXIT(et); 386 387 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0")); 388 NET_EPOCH_CALL(destroy_nhgrp_epoch, &nhg_priv->nhg_epoch_ctx); 389 } 390 391 /* 392 * Destroys all local resources belonging to @nhg_priv. 393 */ 394 __noinline static void 395 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv) 396 { 397 398 free(nhg_priv->nhg, M_NHOP); 399 } 400 401 __noinline static void 402 destroy_nhgrp(struct nhgrp_priv *nhg_priv) 403 { 404 405 KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0")); 406 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0")); 407 408 IF_DEBUG_LEVEL(LOG_DEBUG2) { 409 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused; 410 FIB_NH_LOG(LOG_DEBUG2, nhg_priv->nhg_nh_weights[0].nh, 411 "destroying %s", nhgrp_print_buf(nhg_priv->nhg, 412 nhgbuf, sizeof(nhgbuf))); 413 } 414 415 free_nhgrp_nhops(nhg_priv); 416 destroy_nhgrp_int(nhg_priv); 417 } 418 419 /* 420 * Epoch callback indicating group is safe to destroy 421 */ 422 static void 423 destroy_nhgrp_epoch(epoch_context_t ctx) 424 { 425 struct nhgrp_priv *nhg_priv; 426 427 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx); 428 429 destroy_nhgrp(nhg_priv); 430 } 431 432 static bool 433 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 434 { 435 436 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 437 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0) 438 continue; 439 440 /* 441 * Failed to ref the nexthop, b/c it's deleted. 442 * Need to rollback references back. 443 */ 444 for (int j = 0; j < i; j++) 445 nhop_free(nhg_priv->nhg_nh_weights[j].nh); 446 return (false); 447 } 448 449 return (true); 450 } 451 452 static void 453 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 454 { 455 456 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) 457 nhop_free(nhg_priv->nhg_nh_weights[i].nh); 458 } 459 460 /* 461 * Allocate nexthop group of size @num_nhops with nexthops specified by 462 * @wn. Nexthops have to be unique and match the fibnum/family of the group. 463 * Returns unlinked nhgrp object on success or NULL and non-zero perror. 464 */ 465 struct nhgrp_object * 466 nhgrp_alloc(uint32_t fibnum, int family, struct weightened_nhop *wn, int num_nhops, 467 int *perror) 468 { 469 struct rib_head *rh = rt_tables_get_rnh(fibnum, family); 470 struct nhgrp_priv *nhg_priv; 471 struct nh_control *ctl; 472 473 if (rh == NULL) { 474 *perror = E2BIG; 475 return (NULL); 476 } 477 478 ctl = rh->nh_control; 479 480 if (num_nhops > RIB_MAX_MPATH_WIDTH) { 481 *perror = E2BIG; 482 return (NULL); 483 } 484 485 if (ctl->gr_head.hash_size == 0) { 486 /* First multipath request. Bootstrap mpath datastructures. */ 487 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) { 488 *perror = ENOMEM; 489 return (NULL); 490 } 491 } 492 493 /* Sort nexthops & check there are no duplicates */ 494 sort_weightened_nhops(wn, num_nhops); 495 uint32_t last_id = 0; 496 for (int i = 0; i < num_nhops; i++) { 497 if (wn[i].nh->nh_priv->nh_control != ctl) { 498 *perror = EINVAL; 499 return (NULL); 500 } 501 if (wn[i].nh->nh_priv->nh_idx == last_id) { 502 *perror = EEXIST; 503 return (NULL); 504 } 505 last_id = wn[i].nh->nh_priv->nh_idx; 506 } 507 508 if ((nhg_priv = alloc_nhgrp(wn, num_nhops)) == NULL) { 509 *perror = ENOMEM; 510 return (NULL); 511 } 512 nhg_priv->nh_control = ctl; 513 514 *perror = 0; 515 return (nhg_priv->nhg); 516 } 517 518 /* 519 * Finds an existing group matching @nhg or links @nhg to the tree. 520 * Returns the referenced group or NULL and non-zero @perror. 521 */ 522 struct nhgrp_object * 523 nhgrp_get_nhgrp(struct nhgrp_object *nhg, int *perror) 524 { 525 struct nhgrp_priv *nhg_priv, *key = NHGRP_PRIV(nhg); 526 struct nh_control *ctl = key->nh_control; 527 528 nhg_priv = find_nhgrp(ctl, key); 529 if (nhg_priv != NULL) { 530 /* 531 * Free originally-created group. As it hasn't been linked 532 * and the dependent nexhops haven't been referenced, just free 533 * the group. 534 */ 535 destroy_nhgrp_int(key); 536 *perror = 0; 537 return (nhg_priv->nhg); 538 } else { 539 /* No existing group, try to link the new one */ 540 if (!ref_nhgrp_nhops(key)) { 541 /* 542 * Some of the nexthops have been scheduled for deletion. 543 * As the group hasn't been linked / no nexhops have been 544 * referenced, call the final destructor immediately. 545 */ 546 destroy_nhgrp_int(key); 547 *perror = EAGAIN; 548 return (NULL); 549 } 550 if (link_nhgrp(ctl, key) == 0) { 551 /* Unable to allocate index? */ 552 *perror = EAGAIN; 553 free_nhgrp_nhops(key); 554 destroy_nhgrp_int(key); 555 return (NULL); 556 } 557 *perror = 0; 558 return (nhg); 559 } 560 561 /* NOTREACHED */ 562 } 563 564 /* 565 * Creates or looks up an existing nexthop group based on @wn and @num_nhops. 566 * 567 * Returns referenced nhop group or NULL, passing error code in @perror. 568 */ 569 struct nhgrp_priv * 570 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops, 571 uint32_t uidx, int *perror) 572 { 573 struct nhgrp_object *nhg; 574 575 nhg = nhgrp_alloc(ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family, 576 wn, num_nhops, perror); 577 if (nhg == NULL) 578 return (NULL); 579 nhgrp_set_uidx(nhg, uidx); 580 nhg = nhgrp_get_nhgrp(nhg, perror); 581 if (nhg != NULL) 582 return (NHGRP_PRIV(nhg)); 583 return (NULL); 584 } 585 586 587 /* 588 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig. 589 * 590 * Returns referenced nexthop group or NULL. In the latter case, @perror is 591 * filled with an error code. 592 * Note that function does NOT care if the next nexthops already exists 593 * in the @gr_orig. As a result, they will be added, resulting in the 594 * same nexthop being present multiple times in the new group. 595 */ 596 static struct nhgrp_priv * 597 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig, 598 struct weightened_nhop *wn, int num_nhops, int *perror) 599 { 600 char storage[64]; 601 struct weightened_nhop *pnhops; 602 struct nhgrp_priv *nhg_priv; 603 const struct nhgrp_priv *src_priv; 604 size_t sz; 605 int curr_nhops; 606 607 src_priv = NHGRP_PRIV_CONST(gr_orig); 608 curr_nhops = src_priv->nhg_nh_count; 609 610 *perror = 0; 611 612 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop)); 613 /* optimize for <= 4 paths, each path=16 bytes */ 614 if (sz <= sizeof(storage)) 615 pnhops = (struct weightened_nhop *)&storage[0]; 616 else { 617 pnhops = malloc(sz, M_TEMP, M_NOWAIT); 618 if (pnhops == NULL) { 619 *perror = ENOMEM; 620 return (NULL); 621 } 622 } 623 624 /* Copy nhops from original group first */ 625 memcpy(pnhops, src_priv->nhg_nh_weights, 626 curr_nhops * sizeof(struct weightened_nhop)); 627 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop)); 628 curr_nhops += num_nhops; 629 630 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, 0, perror); 631 632 if (pnhops != (struct weightened_nhop *)&storage[0]) 633 free(pnhops, M_TEMP); 634 635 if (nhg_priv == NULL) 636 return (NULL); 637 638 return (nhg_priv); 639 } 640 641 642 /* 643 * Creates/finds nexthop group based on @wn and @num_nhops. 644 * Returns 0 on success with referenced group in @rnd, or 645 * errno. 646 * 647 * If the error is EAGAIN, then the operation can be retried. 648 */ 649 int 650 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops, 651 uint32_t uidx, struct nhgrp_object **pnhg) 652 { 653 struct nh_control *ctl = rh->nh_control; 654 struct nhgrp_priv *nhg_priv; 655 int error; 656 657 nhg_priv = get_nhgrp(ctl, wn, num_nhops, uidx, &error); 658 if (nhg_priv != NULL) 659 *pnhg = nhg_priv->nhg; 660 661 return (error); 662 } 663 664 /* 665 * Creates new nexthop group based on @src group without the nexthops 666 * chosen by @flt_func. 667 * Returns 0 on success, storring the reference nhop group/object in @rnd. 668 */ 669 int 670 nhgrp_get_filtered_group(struct rib_head *rh, const struct rtentry *rt, 671 const struct nhgrp_object *src, rib_filter_f_t flt_func, void *flt_data, 672 struct route_nhop_data *rnd) 673 { 674 char storage[64]; 675 struct nh_control *ctl = rh->nh_control; 676 struct weightened_nhop *pnhops; 677 const struct nhgrp_priv *mp_priv, *src_priv; 678 size_t sz; 679 int error, i, num_nhops; 680 681 src_priv = NHGRP_PRIV_CONST(src); 682 683 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop)); 684 /* optimize for <= 4 paths, each path=16 bytes */ 685 if (sz <= sizeof(storage)) 686 pnhops = (struct weightened_nhop *)&storage[0]; 687 else { 688 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL) 689 return (ENOMEM); 690 } 691 692 /* Filter nexthops */ 693 error = 0; 694 num_nhops = 0; 695 for (i = 0; i < src_priv->nhg_nh_count; i++) { 696 if (flt_func(rt, src_priv->nhg_nh_weights[i].nh, flt_data)) 697 continue; 698 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i], 699 sizeof(struct weightened_nhop)); 700 } 701 702 if (num_nhops == 0) { 703 rnd->rnd_nhgrp = NULL; 704 rnd->rnd_weight = 0; 705 } else if (num_nhops == 1) { 706 rnd->rnd_nhop = pnhops[0].nh; 707 rnd->rnd_weight = pnhops[0].weight; 708 if (nhop_try_ref_object(rnd->rnd_nhop) == 0) 709 error = EAGAIN; 710 } else { 711 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, 0, &error); 712 if (mp_priv != NULL) 713 rnd->rnd_nhgrp = mp_priv->nhg; 714 rnd->rnd_weight = 0; 715 } 716 717 if (pnhops != (struct weightened_nhop *)&storage[0]) 718 free(pnhops, M_TEMP); 719 720 return (error); 721 } 722 723 /* 724 * Creates new multipath group based on existing group/nhop in @rnd_orig and 725 * to-be-added nhop @wn_add. 726 * Returns 0 on success and stores result in @rnd_new. 727 */ 728 int 729 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig, 730 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new) 731 { 732 struct nh_control *ctl = rh->nh_control; 733 struct nhgrp_priv *nhg_priv; 734 struct weightened_nhop wn[2] = {}; 735 int error; 736 737 if (rnd_orig->rnd_nhop == NULL) { 738 /* No paths to add to, just reference current nhop */ 739 *rnd_new = *rnd_add; 740 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0) 741 return (EAGAIN); 742 return (0); 743 } 744 745 wn[0].nh = rnd_add->rnd_nhop; 746 wn[0].weight = rnd_add->rnd_weight; 747 748 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) { 749 /* Simple merge of 2 non-multipath nexthops */ 750 wn[1].nh = rnd_orig->rnd_nhop; 751 wn[1].weight = rnd_orig->rnd_weight; 752 nhg_priv = get_nhgrp(ctl, wn, 2, 0, &error); 753 } else { 754 /* Get new nhop group with @rt->rt_nhop as an additional nhop */ 755 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1, 756 &error); 757 } 758 759 if (nhg_priv == NULL) 760 return (error); 761 rnd_new->rnd_nhgrp = nhg_priv->nhg; 762 rnd_new->rnd_weight = 0; 763 764 return (0); 765 } 766 767 /* 768 * Returns pointer to array of nexthops with weights for 769 * given @nhg. Stores number of items in the array into @pnum_nhops. 770 */ 771 const struct weightened_nhop * 772 nhgrp_get_nhops(const struct nhgrp_object *nhg, uint32_t *pnum_nhops) 773 { 774 const struct nhgrp_priv *nhg_priv; 775 776 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 777 778 nhg_priv = NHGRP_PRIV_CONST(nhg); 779 *pnum_nhops = nhg_priv->nhg_nh_count; 780 781 return (nhg_priv->nhg_nh_weights); 782 } 783 784 void 785 nhgrp_set_uidx(struct nhgrp_object *nhg, uint32_t uidx) 786 { 787 struct nhgrp_priv *nhg_priv; 788 789 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 790 791 nhg_priv = NHGRP_PRIV(nhg); 792 793 nhg_priv->nhg_uidx = uidx; 794 } 795 796 uint32_t 797 nhgrp_get_uidx(const struct nhgrp_object *nhg) 798 { 799 const struct nhgrp_priv *nhg_priv; 800 801 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 802 803 nhg_priv = NHGRP_PRIV_CONST(nhg); 804 return (nhg_priv->nhg_uidx); 805 } 806 807 /* 808 * Prints nexhop group @nhg data in the provided @buf. 809 * Example: nhg#33/sz=3:[#1:100,#2:100,#3:100] 810 * Example: nhg#33/sz=5:[#1:100,#2:100,..] 811 */ 812 char * 813 nhgrp_print_buf(const struct nhgrp_object *nhg, char *buf, size_t bufsize) 814 { 815 const struct nhgrp_priv *nhg_priv = NHGRP_PRIV_CONST(nhg); 816 817 int off = snprintf(buf, bufsize, "nhg#%u/sz=%u:[", nhg_priv->nhg_idx, 818 nhg_priv->nhg_nh_count); 819 820 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 821 const struct weightened_nhop *wn = &nhg_priv->nhg_nh_weights[i]; 822 int len = snprintf(&buf[off], bufsize - off, "#%u:%u,", 823 wn->nh->nh_priv->nh_idx, wn->weight); 824 if (len + off + 3 >= bufsize) { 825 int len = snprintf(&buf[off], bufsize - off, "..."); 826 off += len; 827 break; 828 } 829 off += len; 830 } 831 if (off > 0) 832 off--; // remove last "," 833 if (off + 1 < bufsize) 834 snprintf(&buf[off], bufsize - off, "]"); 835 return buf; 836 } 837 838 __noinline static int 839 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv, 840 char *buffer, size_t buffer_size, struct sysctl_req *w) 841 { 842 struct rt_msghdr *rtm; 843 struct nhgrp_external *nhge; 844 struct nhgrp_container *nhgc; 845 const struct nhgrp_object *nhg; 846 struct nhgrp_nhop_external *ext; 847 int error; 848 size_t sz; 849 850 nhg = nhg_priv->nhg; 851 852 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 853 /* controlplane nexthops */ 854 sz += sizeof(struct nhgrp_container); 855 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 856 /* dataplane nexthops */ 857 sz += sizeof(struct nhgrp_container); 858 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 859 860 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size")); 861 862 bzero(buffer, sz); 863 864 rtm = (struct rt_msghdr *)buffer; 865 rtm->rtm_msglen = sz; 866 rtm->rtm_version = RTM_VERSION; 867 rtm->rtm_type = RTM_GET; 868 869 nhge = (struct nhgrp_external *)(rtm + 1); 870 871 nhge->nhg_idx = nhg_priv->nhg_idx; 872 nhge->nhg_refcount = nhg_priv->nhg_refcount; 873 874 /* fill in control plane nexthops firs */ 875 nhgc = (struct nhgrp_container *)(nhge + 1); 876 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS; 877 nhgc->nhgc_subtype = 0; 878 nhgc->nhgc_len = sizeof(struct nhgrp_container); 879 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 880 nhgc->nhgc_count = nhg_priv->nhg_nh_count; 881 882 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 883 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 884 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx; 885 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight; 886 } 887 888 /* fill in dataplane nexthops */ 889 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]); 890 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS; 891 nhgc->nhgc_subtype = 0; 892 nhgc->nhgc_len = sizeof(struct nhgrp_container); 893 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 894 nhgc->nhgc_count = nhg->nhg_size; 895 896 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 897 for (int i = 0; i < nhg->nhg_size; i++) { 898 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx; 899 ext[i].nh_weight = 0; 900 } 901 902 error = SYSCTL_OUT(w, buffer, sz); 903 904 return (error); 905 } 906 907 uint32_t 908 nhgrp_get_idx(const struct nhgrp_object *nhg) 909 { 910 const struct nhgrp_priv *nhg_priv; 911 912 nhg_priv = NHGRP_PRIV_CONST(nhg); 913 return (nhg_priv->nhg_idx); 914 } 915 916 uint8_t 917 nhgrp_get_origin(const struct nhgrp_object *nhg) 918 { 919 return (NHGRP_PRIV_CONST(nhg)->nhg_origin); 920 } 921 922 void 923 nhgrp_set_origin(struct nhgrp_object *nhg, uint8_t origin) 924 { 925 NHGRP_PRIV(nhg)->nhg_origin = origin; 926 } 927 928 uint32_t 929 nhgrp_get_count(struct rib_head *rh) 930 { 931 struct nh_control *ctl; 932 uint32_t count; 933 934 ctl = rh->nh_control; 935 936 NHOPS_RLOCK(ctl); 937 count = ctl->gr_head.items_count; 938 NHOPS_RUNLOCK(ctl); 939 940 return (count); 941 } 942 943 int 944 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w) 945 { 946 struct nh_control *ctl = rh->nh_control; 947 struct epoch_tracker et; 948 struct nhgrp_priv *nhg_priv; 949 char *buffer; 950 size_t sz; 951 int error = 0; 952 953 if (ctl->gr_head.items_count == 0) 954 return (0); 955 956 /* Calculate the maximum nhop group size in bytes */ 957 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 958 sz += 2 * sizeof(struct nhgrp_container); 959 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH; 960 buffer = malloc(sz, M_TEMP, M_NOWAIT); 961 if (buffer == NULL) 962 return (ENOMEM); 963 964 NET_EPOCH_ENTER(et); 965 NHOPS_RLOCK(ctl); 966 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 967 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w); 968 if (error != 0) 969 break; 970 } CHT_SLIST_FOREACH_END; 971 NHOPS_RUNLOCK(ctl); 972 NET_EPOCH_EXIT(et); 973 974 free(buffer, M_TEMP); 975 976 return (error); 977 } 978