1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 #include "opt_inet.h" 30 #include "opt_route.h" 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/lock.h> 36 #include <sys/rmlock.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/refcount.h> 40 #include <sys/socket.h> 41 #include <sys/sysctl.h> 42 #include <sys/kernel.h> 43 #include <sys/epoch.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/route.h> 48 #include <net/route/route_ctl.h> 49 #include <net/route/route_var.h> 50 #include <net/vnet.h> 51 52 #include <netinet/in.h> 53 #include <netinet/in_var.h> 54 #include <netinet/in_fib.h> 55 56 #include <net/route/nhop_utils.h> 57 #include <net/route/nhop.h> 58 #include <net/route/nhop_var.h> 59 #include <net/route/nhgrp_var.h> 60 61 /* 62 * This file contains the supporting functions for creating multipath groups 63 * and compiling their dataplane parts. 64 */ 65 66 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */ 67 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH, 68 "MPF_MULTIPATH must be the same as NHF_MULTIPATH"); 69 /* Offset and size of flags field has to be the same for nhop/nhop groups */ 70 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags); 71 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */ 72 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64); 73 74 static int wn_cmp(const void *a, const void *b); 75 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops); 76 77 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl, 78 struct weightened_nhop *wn, int num_nhops, int *perror); 79 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv); 80 static void destroy_nhgrp_epoch(epoch_context_t ctx); 81 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv); 82 83 static int 84 wn_cmp(const void *a, const void *b) 85 { 86 const struct weightened_nhop *wa = a; 87 const struct weightened_nhop *wb = b; 88 89 if (wa->weight > wb->weight) 90 return (1); 91 else if (wa->weight < wb->weight) 92 return (-1); 93 94 /* Compare nexthops by pointer */ 95 if (wa->nh > wb->nh) 96 return (1); 97 else if (wa->nh < wb->nh) 98 return (-1); 99 else 100 return (0); 101 } 102 103 /* 104 * Perform in-place sorting for array of nexthops in @wn. 105 * 106 * To avoid nh groups duplication, nexthops/weights in the 107 * @wn need to be ordered deterministically. 108 * As this sorting is needed only for the control plane functionality, 109 * there are no specific external requirements. 110 * 111 * Sort by weight first, to ease calculation of the slot sizes. 112 */ 113 static void 114 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops) 115 { 116 117 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp); 118 } 119 120 /* 121 * Calculate minimum number of slots required to fit the existing 122 * set of weights in the common use case where weights are "easily" 123 * comparable. 124 * Assumes @wn is sorted by weight ascending and each weight is > 0. 125 * Returns number of slots or 0 if precise calculation failed. 126 * 127 * Some examples: 128 * note: (i, X) pair means (nhop=i, weight=X): 129 * (1, 1) (2, 2) -> 3 slots [1, 2, 2] 130 * (1, 100), (2, 200) -> 3 slots [1, 2, 2] 131 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3] 132 */ 133 static uint32_t 134 calc_min_mpath_slots_fast(const struct weightened_nhop *wn, size_t num_items) 135 { 136 uint32_t i, last, xmin; 137 uint64_t total = 0; 138 139 last = 0; 140 xmin = wn[0].weight; 141 for (i = 0; i < num_items; i++) { 142 total += wn[i].weight; 143 if ((wn[i].weight - last < xmin) && (wn[i].weight != last)) 144 xmin = wn[i].weight - last; 145 last = wn[i].weight; 146 } 147 /* xmin is the minimum unit of desired capacity */ 148 if ((total % xmin) != 0) 149 return (0); 150 for (i = 0; i < num_items; i++) { 151 if ((wn[i].weight % xmin) != 0) 152 return (0); 153 } 154 155 return ((uint32_t)(total / xmin)); 156 } 157 158 /* 159 * Calculate minimum number of slots required to fit the existing 160 * set of weights while maintaining weight coefficients. 161 * 162 * Assume @wn is sorted by weight ascending and each weight is > 0. 163 * 164 * Tries to find simple precise solution first and falls back to 165 * RIB_MAX_MPATH_WIDTH in case of any failure. 166 */ 167 static uint32_t 168 calc_min_mpath_slots(const struct weightened_nhop *wn, size_t num_items) 169 { 170 uint32_t v; 171 172 v = calc_min_mpath_slots_fast(wn, num_items); 173 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH)) 174 v = RIB_MAX_MPATH_WIDTH; 175 176 return (v); 177 } 178 179 /* 180 * Nexthop group data consists of 181 * 1) dataplane part, with nhgrp_object as a header followed by an 182 * arbitrary number of nexthop pointers. 183 * 2) control plane part, with nhgrp_priv as a header, followed by 184 * an arbirtrary number of 'struct weightened_nhop' object. 185 * 186 * Given nexthop groups are (mostly) immutable, allocate all data 187 * in one go. 188 * 189 */ 190 __noinline static size_t 191 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) 192 { 193 size_t sz; 194 195 sz = sizeof(struct nhgrp_object); 196 sz += nhg_size * sizeof(struct nhop_object *); 197 sz += sizeof(struct nhgrp_priv); 198 sz += num_nhops * sizeof(struct weightened_nhop); 199 return (sz); 200 } 201 202 /* 203 * Compile actual list of nexthops to be used by datapath from 204 * the nexthop group @dst. 205 * 206 * For example, compiling control plane list of 2 nexthops 207 * [(200, A), (100, B)] would result in the datapath array 208 * [A, A, B] 209 */ 210 static void 211 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, 212 uint32_t num_slots) 213 { 214 struct nhgrp_object *dst; 215 int i, slot_idx, remaining_slots; 216 uint64_t remaining_sum, nh_weight, nh_slots; 217 218 slot_idx = 0; 219 dst = dst_priv->nhg; 220 /* Calculate sum of all weights */ 221 remaining_sum = 0; 222 for (i = 0; i < dst_priv->nhg_nh_count; i++) 223 remaining_sum += x[i].weight; 224 remaining_slots = num_slots; 225 DPRINTF("O: %u/%u", (uint32_t)remaining_sum, remaining_slots); 226 for (i = 0; i < dst_priv->nhg_nh_count; i++) { 227 /* Calculate number of slots for the current nexthop */ 228 if (remaining_sum > 0) { 229 nh_weight = (uint64_t)x[i].weight; 230 nh_slots = (nh_weight * remaining_slots / remaining_sum); 231 } else 232 nh_slots = 0; 233 234 remaining_sum -= x[i].weight; 235 remaining_slots -= nh_slots; 236 237 DPRINTF(" OO[%d]: %u/%u curr=%d slot_idx=%d", i, 238 (uint32_t)remaining_sum, remaining_slots, 239 (int)nh_slots, slot_idx); 240 241 KASSERT((slot_idx + nh_slots <= num_slots), 242 ("index overflow during nhg compilation")); 243 while (nh_slots-- > 0) 244 dst->nhops[slot_idx++] = x[i].nh; 245 } 246 } 247 248 /* 249 * Allocates new nexthop group for the list of weightened nexthops. 250 * Assume sorted list. 251 * Does NOT reference any nexthops in the group. 252 * Returns group with refcount=1 or NULL. 253 */ 254 static struct nhgrp_priv * 255 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) 256 { 257 uint32_t nhgrp_size; 258 int flags = M_NOWAIT; 259 struct nhgrp_object *nhg; 260 struct nhgrp_priv *nhg_priv; 261 262 nhgrp_size = calc_min_mpath_slots(wn, num_nhops); 263 if (nhgrp_size == 0) { 264 /* Zero weights, abort */ 265 return (NULL); 266 } 267 268 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops); 269 nhg = malloc(sz, M_NHOP, flags | M_ZERO); 270 if (nhg == NULL) { 271 return (NULL); 272 } 273 274 /* Has to be the first to make NHGRP_PRIV() work */ 275 nhg->nhg_size = nhgrp_size; 276 DPRINTF("new mpath group: num_nhops: %u", (uint32_t)nhgrp_size); 277 nhg->nhg_flags = MPF_MULTIPATH; 278 279 nhg_priv = NHGRP_PRIV(nhg); 280 nhg_priv->nhg_nh_count = num_nhops; 281 refcount_init(&nhg_priv->nhg_refcount, 1); 282 283 /* Please see nhgrp_free() comments on the initial value */ 284 refcount_init(&nhg_priv->nhg_linked, 2); 285 286 nhg_priv->nhg = nhg; 287 memcpy(&nhg_priv->nhg_nh_weights[0], wn, 288 num_nhops * sizeof(struct weightened_nhop)); 289 290 compile_nhgrp(nhg_priv, wn, nhg->nhg_size); 291 292 return (nhg_priv); 293 } 294 295 void 296 nhgrp_ref_object(struct nhgrp_object *nhg) 297 { 298 struct nhgrp_priv *nhg_priv; 299 u_int old; 300 301 nhg_priv = NHGRP_PRIV(nhg); 302 old = refcount_acquire(&nhg_priv->nhg_refcount); 303 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg)); 304 } 305 306 void 307 nhgrp_free(struct nhgrp_object *nhg) 308 { 309 struct nhgrp_priv *nhg_priv; 310 struct nh_control *ctl; 311 struct epoch_tracker et; 312 313 nhg_priv = NHGRP_PRIV(nhg); 314 315 if (!refcount_release(&nhg_priv->nhg_refcount)) 316 return; 317 318 /* 319 * group objects don't have an explicit lock attached to it. 320 * As groups are reclaimed based on reference count, it is possible 321 * that some groups will persist after vnet destruction callback 322 * called. Given that, handle scenario with nhgrp_free_group() being 323 * called either after or simultaneously with nhgrp_ctl_unlink_all() 324 * by using another reference counter: nhg_linked. 325 * 326 * There are only 2 places, where nhg_linked can be decreased: 327 * rib destroy (nhgrp_ctl_unlink_all) and this function. 328 * nhg_link can never be increased. 329 * 330 * Hence, use initial value of 2 to make use of 331 * refcount_release_if_not_last(). 332 * 333 * There can be two scenarious when calling this function: 334 * 335 * 1) nhg_linked value is 2. This means that either 336 * nhgrp_ctl_unlink_all() has not been called OR it is running, 337 * but we are guaranteed that nh_control won't be freed in 338 * this epoch. Hence, nexthop can be safely unlinked. 339 * 340 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all() 341 * has been called and nhgrp unlink can be skipped. 342 */ 343 344 NET_EPOCH_ENTER(et); 345 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) { 346 ctl = nhg_priv->nh_control; 347 if (unlink_nhgrp(ctl, nhg_priv) == NULL) { 348 /* Do not try to reclaim */ 349 DPRINTF("Failed to unlink nexhop group %p", nhg_priv); 350 NET_EPOCH_EXIT(et); 351 return; 352 } 353 } 354 NET_EPOCH_EXIT(et); 355 356 epoch_call(net_epoch_preempt, destroy_nhgrp_epoch, 357 &nhg_priv->nhg_epoch_ctx); 358 } 359 360 /* 361 * Destroys all local resources belonging to @nhg_priv. 362 */ 363 __noinline static void 364 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv) 365 { 366 367 free(nhg_priv->nhg, M_NHOP); 368 } 369 370 __noinline static void 371 destroy_nhgrp(struct nhgrp_priv *nhg_priv) 372 { 373 374 KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0")); 375 376 DPRINTF("DEL MPATH %p", nhg_priv); 377 378 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0")); 379 380 free_nhgrp_nhops(nhg_priv); 381 382 destroy_nhgrp_int(nhg_priv); 383 } 384 385 /* 386 * Epoch callback indicating group is safe to destroy 387 */ 388 static void 389 destroy_nhgrp_epoch(epoch_context_t ctx) 390 { 391 struct nhgrp_priv *nhg_priv; 392 393 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx); 394 395 destroy_nhgrp(nhg_priv); 396 } 397 398 static bool 399 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 400 { 401 402 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 403 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0) 404 continue; 405 406 /* 407 * Failed to ref the nexthop, b/c it's deleted. 408 * Need to rollback references back. 409 */ 410 for (int j = 0; j < i; j++) 411 nhop_free(nhg_priv->nhg_nh_weights[j].nh); 412 return (false); 413 } 414 415 return (true); 416 } 417 418 static void 419 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 420 { 421 422 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) 423 nhop_free(nhg_priv->nhg_nh_weights[i].nh); 424 } 425 426 /* 427 * Creates or looks up an existing nexthop group based on @wn and @num_nhops. 428 * 429 * Returns referenced nhop group or NULL, passing error code in @perror. 430 */ 431 struct nhgrp_priv * 432 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops, 433 int *perror) 434 { 435 struct nhgrp_priv *key, *nhg_priv; 436 437 if (num_nhops > RIB_MAX_MPATH_WIDTH) { 438 *perror = E2BIG; 439 return (NULL); 440 } 441 442 if (ctl->gr_head.hash_size == 0) { 443 /* First multipath request. Bootstrap mpath datastructures. */ 444 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) { 445 *perror = ENOMEM; 446 return (NULL); 447 } 448 } 449 450 /* Sort nexthops & check there are no duplicates */ 451 sort_weightened_nhops(wn, num_nhops); 452 uint32_t last_id = 0; 453 for (int i = 0; i < num_nhops; i++) { 454 if (wn[i].nh->nh_priv->nh_idx == last_id) { 455 *perror = EEXIST; 456 return (NULL); 457 } 458 last_id = wn[i].nh->nh_priv->nh_idx; 459 } 460 461 if ((key = alloc_nhgrp(wn, num_nhops)) == NULL) { 462 *perror = ENOMEM; 463 return (NULL); 464 } 465 466 nhg_priv = find_nhgrp(ctl, key); 467 if (nhg_priv != NULL) { 468 /* 469 * Free originally-created group. As it hasn't been linked 470 * and the dependent nexhops haven't been referenced, just free 471 * the group. 472 */ 473 destroy_nhgrp_int(key); 474 *perror = 0; 475 return (nhg_priv); 476 } else { 477 /* No existing group, try to link the new one */ 478 if (!ref_nhgrp_nhops(key)) { 479 /* 480 * Some of the nexthops have been scheduled for deletion. 481 * As the group hasn't been linked / no nexhops have been 482 * referenced, call the final destructor immediately. 483 */ 484 destroy_nhgrp_int(key); 485 *perror = EAGAIN; 486 return (NULL); 487 } 488 if (link_nhgrp(ctl, key) == 0) { 489 /* Unable to allocate index? */ 490 *perror = EAGAIN; 491 free_nhgrp_nhops(key); 492 destroy_nhgrp_int(key); 493 return (NULL); 494 } 495 *perror = 0; 496 return (key); 497 } 498 499 /* NOTREACHED */ 500 } 501 502 /* 503 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig. 504 * 505 * Returns referenced nexthop group or NULL. In the latter case, @perror is 506 * filled with an error code. 507 * Note that function does NOT care if the next nexthops already exists 508 * in the @gr_orig. As a result, they will be added, resulting in the 509 * same nexthop being present multiple times in the new group. 510 */ 511 static struct nhgrp_priv * 512 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig, 513 struct weightened_nhop *wn, int num_nhops, int *perror) 514 { 515 char storage[64]; 516 struct weightened_nhop *pnhops; 517 struct nhgrp_priv *nhg_priv; 518 const struct nhgrp_priv *src_priv; 519 size_t sz; 520 int curr_nhops; 521 522 src_priv = NHGRP_PRIV_CONST(gr_orig); 523 curr_nhops = src_priv->nhg_nh_count; 524 525 *perror = 0; 526 527 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop)); 528 /* optimize for <= 4 paths, each path=16 bytes */ 529 if (sz <= sizeof(storage)) 530 pnhops = (struct weightened_nhop *)&storage[0]; 531 else { 532 pnhops = malloc(sz, M_TEMP, M_NOWAIT); 533 if (pnhops == NULL) { 534 *perror = ENOMEM; 535 return (NULL); 536 } 537 } 538 539 /* Copy nhops from original group first */ 540 memcpy(pnhops, src_priv->nhg_nh_weights, 541 curr_nhops * sizeof(struct weightened_nhop)); 542 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop)); 543 curr_nhops += num_nhops; 544 545 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, perror); 546 547 if (pnhops != (struct weightened_nhop *)&storage[0]) 548 free(pnhops, M_TEMP); 549 550 if (nhg_priv == NULL) 551 return (NULL); 552 553 return (nhg_priv); 554 } 555 556 557 /* 558 * Creates/finds nexthop group based on @wn and @num_nhops. 559 * Returns 0 on success with referenced group in @rnd, or 560 * errno. 561 * 562 * If the error is EAGAIN, then the operation can be retried. 563 */ 564 int 565 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops, 566 struct route_nhop_data *rnd) 567 { 568 struct nh_control *ctl = rh->nh_control; 569 struct nhgrp_priv *nhg_priv; 570 int error; 571 572 nhg_priv = get_nhgrp(ctl, wn, num_nhops, &error); 573 if (nhg_priv != NULL) 574 rnd->rnd_nhgrp = nhg_priv->nhg; 575 rnd->rnd_weight = 0; 576 577 return (error); 578 } 579 580 /* 581 * Creates new nexthop group based on @src group without the nexthops 582 * chosen by @flt_func. 583 * Returns 0 on success, storring the reference nhop group/object in @rnd. 584 */ 585 int 586 nhgrp_get_filtered_group(struct rib_head *rh, const struct nhgrp_object *src, 587 nhgrp_filter_cb_t flt_func, void *flt_data, struct route_nhop_data *rnd) 588 { 589 char storage[64]; 590 struct nh_control *ctl = rh->nh_control; 591 struct weightened_nhop *pnhops; 592 const struct nhgrp_priv *mp_priv, *src_priv; 593 size_t sz; 594 int error, i, num_nhops; 595 596 src_priv = NHGRP_PRIV_CONST(src); 597 598 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop)); 599 /* optimize for <= 4 paths, each path=16 bytes */ 600 if (sz <= sizeof(storage)) 601 pnhops = (struct weightened_nhop *)&storage[0]; 602 else { 603 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL) 604 return (ENOMEM); 605 } 606 607 /* Filter nexthops */ 608 error = 0; 609 num_nhops = 0; 610 for (i = 0; i < src_priv->nhg_nh_count; i++) { 611 if (flt_func(src_priv->nhg_nh_weights[i].nh, flt_data)) 612 continue; 613 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i], 614 sizeof(struct weightened_nhop)); 615 } 616 617 if (num_nhops == 0) { 618 rnd->rnd_nhgrp = NULL; 619 rnd->rnd_weight = 0; 620 } else if (num_nhops == 1) { 621 rnd->rnd_nhop = pnhops[0].nh; 622 rnd->rnd_weight = pnhops[0].weight; 623 if (nhop_try_ref_object(rnd->rnd_nhop) == 0) 624 error = EAGAIN; 625 } else { 626 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, &error); 627 if (mp_priv != NULL) 628 rnd->rnd_nhgrp = mp_priv->nhg; 629 rnd->rnd_weight = 0; 630 } 631 632 if (pnhops != (struct weightened_nhop *)&storage[0]) 633 free(pnhops, M_TEMP); 634 635 return (error); 636 } 637 638 /* 639 * Creates new multipath group based on existing group/nhop in @rnd_orig and 640 * to-be-added nhop @wn_add. 641 * Returns 0 on success and stores result in @rnd_new. 642 */ 643 int 644 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig, 645 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new) 646 { 647 struct nh_control *ctl = rh->nh_control; 648 struct nhgrp_priv *nhg_priv; 649 struct weightened_nhop wn[2] = {}; 650 int error; 651 652 if (rnd_orig->rnd_nhop == NULL) { 653 /* No paths to add to, just reference current nhop */ 654 *rnd_new = *rnd_add; 655 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0) 656 return (EAGAIN); 657 return (0); 658 } 659 660 wn[0].nh = rnd_add->rnd_nhop; 661 wn[0].weight = rnd_add->rnd_weight; 662 663 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) { 664 /* Simple merge of 2 non-multipath nexthops */ 665 wn[1].nh = rnd_orig->rnd_nhop; 666 wn[1].weight = rnd_orig->rnd_weight; 667 nhg_priv = get_nhgrp(ctl, wn, 2, &error); 668 } else { 669 /* Get new nhop group with @rt->rt_nhop as an additional nhop */ 670 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1, 671 &error); 672 } 673 674 if (nhg_priv == NULL) 675 return (error); 676 rnd_new->rnd_nhgrp = nhg_priv->nhg; 677 rnd_new->rnd_weight = 0; 678 679 return (0); 680 } 681 682 /* 683 * Returns pointer to array of nexthops with weights for 684 * given @nhg. Stores number of items in the array into @pnum_nhops. 685 */ 686 struct weightened_nhop * 687 nhgrp_get_nhops(struct nhgrp_object *nhg, uint32_t *pnum_nhops) 688 { 689 struct nhgrp_priv *nhg_priv; 690 691 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 692 693 nhg_priv = NHGRP_PRIV(nhg); 694 *pnum_nhops = nhg_priv->nhg_nh_count; 695 696 return (nhg_priv->nhg_nh_weights); 697 } 698 699 __noinline static int 700 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv, 701 char *buffer, size_t buffer_size, struct sysctl_req *w) 702 { 703 struct rt_msghdr *rtm; 704 struct nhgrp_external *nhge; 705 struct nhgrp_container *nhgc; 706 const struct nhgrp_object *nhg; 707 struct nhgrp_nhop_external *ext; 708 int error; 709 size_t sz; 710 711 nhg = nhg_priv->nhg; 712 713 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 714 /* controlplane nexthops */ 715 sz += sizeof(struct nhgrp_container); 716 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 717 /* dataplane nexthops */ 718 sz += sizeof(struct nhgrp_container); 719 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 720 721 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size")); 722 723 bzero(buffer, sz); 724 725 rtm = (struct rt_msghdr *)buffer; 726 rtm->rtm_msglen = sz; 727 rtm->rtm_version = RTM_VERSION; 728 rtm->rtm_type = RTM_GET; 729 730 nhge = (struct nhgrp_external *)(rtm + 1); 731 732 nhge->nhg_idx = nhg_priv->nhg_idx; 733 nhge->nhg_refcount = nhg_priv->nhg_refcount; 734 735 /* fill in control plane nexthops firs */ 736 nhgc = (struct nhgrp_container *)(nhge + 1); 737 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS; 738 nhgc->nhgc_subtype = 0; 739 nhgc->nhgc_len = sizeof(struct nhgrp_container); 740 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 741 nhgc->nhgc_count = nhg_priv->nhg_nh_count; 742 743 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 744 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 745 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx; 746 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight; 747 } 748 749 /* fill in dataplane nexthops */ 750 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]); 751 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS; 752 nhgc->nhgc_subtype = 0; 753 nhgc->nhgc_len = sizeof(struct nhgrp_container); 754 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 755 nhgc->nhgc_count = nhg->nhg_size; 756 757 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 758 for (int i = 0; i < nhg->nhg_size; i++) { 759 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx; 760 ext[i].nh_weight = 0; 761 } 762 763 error = SYSCTL_OUT(w, buffer, sz); 764 765 return (error); 766 } 767 768 uint32_t 769 nhgrp_get_idx(const struct nhgrp_object *nhg) 770 { 771 const struct nhgrp_priv *nhg_priv; 772 773 nhg_priv = NHGRP_PRIV_CONST(nhg); 774 return (nhg_priv->nhg_idx); 775 } 776 777 uint32_t 778 nhgrp_get_count(struct rib_head *rh) 779 { 780 struct nh_control *ctl; 781 uint32_t count; 782 783 ctl = rh->nh_control; 784 785 NHOPS_RLOCK(ctl); 786 count = ctl->gr_head.items_count; 787 NHOPS_RUNLOCK(ctl); 788 789 return (count); 790 } 791 792 int 793 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w) 794 { 795 struct nh_control *ctl = rh->nh_control; 796 struct epoch_tracker et; 797 struct nhgrp_priv *nhg_priv; 798 char *buffer; 799 size_t sz; 800 int error = 0; 801 802 if (ctl->gr_head.items_count == 0) 803 return (0); 804 805 /* Calculate the maximum nhop group size in bytes */ 806 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 807 sz += 2 * sizeof(struct nhgrp_container); 808 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH; 809 buffer = malloc(sz, M_TEMP, M_NOWAIT); 810 if (buffer == NULL) 811 return (ENOMEM); 812 813 NET_EPOCH_ENTER(et); 814 NHOPS_RLOCK(ctl); 815 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 816 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w); 817 if (error != 0) 818 break; 819 } CHT_SLIST_FOREACH_END; 820 NHOPS_RUNLOCK(ctl); 821 NET_EPOCH_EXIT(et); 822 823 free(buffer, M_TEMP); 824 825 return (error); 826 } 827