1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 #define RTDEBUG 30 #include "opt_inet.h" 31 #include "opt_route.h" 32 33 #include <sys/cdefs.h> 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/lock.h> 37 #include <sys/rmlock.h> 38 #include <sys/malloc.h> 39 #include <sys/mbuf.h> 40 #include <sys/refcount.h> 41 #include <sys/socket.h> 42 #include <sys/sysctl.h> 43 #include <sys/kernel.h> 44 #include <sys/epoch.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/route.h> 49 #include <net/route/route_ctl.h> 50 #include <net/route/route_var.h> 51 #include <net/vnet.h> 52 53 #include <netinet/in.h> 54 #include <netinet/in_var.h> 55 #include <netinet/in_fib.h> 56 57 #include <net/route/nhop_utils.h> 58 #include <net/route/nhop.h> 59 #include <net/route/nhop_var.h> 60 #include <net/route/nhgrp_var.h> 61 62 /* 63 * This file contains the supporting functions for creating multipath groups 64 * and compiling their dataplane parts. 65 */ 66 67 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */ 68 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH, 69 "MPF_MULTIPATH must be the same as NHF_MULTIPATH"); 70 /* Offset and size of flags field has to be the same for nhop/nhop groups */ 71 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags); 72 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */ 73 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64); 74 75 static int wn_cmp(const void *a, const void *b); 76 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops); 77 78 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl, 79 struct weightened_nhop *wn, int num_nhops, int *perror); 80 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv); 81 static void destroy_nhgrp_epoch(epoch_context_t ctx); 82 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv); 83 84 static int 85 wn_cmp(const void *a, const void *b) 86 { 87 const struct weightened_nhop *wa = a; 88 const struct weightened_nhop *wb = b; 89 90 if (wa->weight > wb->weight) 91 return (1); 92 else if (wa->weight < wb->weight) 93 return (-1); 94 95 /* Compare nexthops by pointer */ 96 if (wa->nh > wb->nh) 97 return (1); 98 else if (wa->nh < wb->nh) 99 return (-1); 100 else 101 return (0); 102 } 103 104 /* 105 * Perform in-place sorting for array of nexthops in @wn. 106 * 107 * To avoid nh groups duplication, nexthops/weights in the 108 * @wn need to be ordered deterministically. 109 * As this sorting is needed only for the control plane functionality, 110 * there are no specific external requirements. 111 * 112 * Sort by weight first, to ease calculation of the slot sizes. 113 */ 114 static void 115 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops) 116 { 117 118 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp); 119 } 120 121 /* 122 * Calculate minimum number of slots required to fit the existing 123 * set of weights in the common use case where weights are "easily" 124 * comparable. 125 * Assumes @wn is sorted by weight ascending and each weight is > 0. 126 * Returns number of slots or 0 if precise calculation failed. 127 * 128 * Some examples: 129 * note: (i, X) pair means (nhop=i, weight=X): 130 * (1, 1) (2, 2) -> 3 slots [1, 2, 2] 131 * (1, 100), (2, 200) -> 3 slots [1, 2, 2] 132 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3] 133 */ 134 static uint32_t 135 calc_min_mpath_slots_fast(const struct weightened_nhop *wn, size_t num_items) 136 { 137 uint32_t i, last, xmin; 138 uint64_t total = 0; 139 140 last = 0; 141 xmin = wn[0].weight; 142 for (i = 0; i < num_items; i++) { 143 total += wn[i].weight; 144 if ((wn[i].weight - last < xmin) && (wn[i].weight != last)) 145 xmin = wn[i].weight - last; 146 last = wn[i].weight; 147 } 148 /* xmin is the minimum unit of desired capacity */ 149 if ((total % xmin) != 0) 150 return (0); 151 for (i = 0; i < num_items; i++) { 152 if ((wn[i].weight % xmin) != 0) 153 return (0); 154 } 155 156 return ((uint32_t)(total / xmin)); 157 } 158 159 /* 160 * Calculate minimum number of slots required to fit the existing 161 * set of weights while maintaining weight coefficients. 162 * 163 * Assume @wn is sorted by weight ascending and each weight is > 0. 164 * 165 * Tries to find simple precise solution first and falls back to 166 * RIB_MAX_MPATH_WIDTH in case of any failure. 167 */ 168 static uint32_t 169 calc_min_mpath_slots(const struct weightened_nhop *wn, size_t num_items) 170 { 171 uint32_t v; 172 173 v = calc_min_mpath_slots_fast(wn, num_items); 174 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH)) 175 v = RIB_MAX_MPATH_WIDTH; 176 177 return (v); 178 } 179 180 /* 181 * Nexthop group data consists of 182 * 1) dataplane part, with nhgrp_object as a header followed by an 183 * arbitrary number of nexthop pointers. 184 * 2) control plane part, with nhgrp_priv as a header, followed by 185 * an arbirtrary number of 'struct weightened_nhop' object. 186 * 187 * Given nexthop groups are (mostly) immutable, allocate all data 188 * in one go. 189 * 190 */ 191 __noinline static size_t 192 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) 193 { 194 size_t sz; 195 196 sz = sizeof(struct nhgrp_object); 197 sz += nhg_size * sizeof(struct nhop_object *); 198 sz += sizeof(struct nhgrp_priv); 199 sz += num_nhops * sizeof(struct weightened_nhop); 200 return (sz); 201 } 202 203 /* 204 * Compile actual list of nexthops to be used by datapath from 205 * the nexthop group @dst. 206 * 207 * For example, compiling control plane list of 2 nexthops 208 * [(200, A), (100, B)] would result in the datapath array 209 * [A, A, B] 210 */ 211 static void 212 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, 213 uint32_t num_slots) 214 { 215 struct nhgrp_object *dst; 216 int i, slot_idx, remaining_slots; 217 uint64_t remaining_sum, nh_weight, nh_slots; 218 219 slot_idx = 0; 220 dst = dst_priv->nhg; 221 /* Calculate sum of all weights */ 222 remaining_sum = 0; 223 for (i = 0; i < dst_priv->nhg_nh_count; i++) 224 remaining_sum += x[i].weight; 225 remaining_slots = num_slots; 226 DPRINTF("O: %u/%u", (uint32_t)remaining_sum, remaining_slots); 227 for (i = 0; i < dst_priv->nhg_nh_count; i++) { 228 /* Calculate number of slots for the current nexthop */ 229 if (remaining_sum > 0) { 230 nh_weight = (uint64_t)x[i].weight; 231 nh_slots = (nh_weight * remaining_slots / remaining_sum); 232 } else 233 nh_slots = 0; 234 235 remaining_sum -= x[i].weight; 236 remaining_slots -= nh_slots; 237 238 DPRINTF(" OO[%d]: %u/%u curr=%d slot_idx=%d", i, 239 (uint32_t)remaining_sum, remaining_slots, 240 (int)nh_slots, slot_idx); 241 242 KASSERT((slot_idx + nh_slots <= num_slots), 243 ("index overflow during nhg compilation")); 244 while (nh_slots-- > 0) 245 dst->nhops[slot_idx++] = x[i].nh; 246 } 247 } 248 249 /* 250 * Allocates new nexthop group for the list of weightened nexthops. 251 * Assume sorted list. 252 * Does NOT reference any nexthops in the group. 253 * Returns group with refcount=1 or NULL. 254 */ 255 static struct nhgrp_priv * 256 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) 257 { 258 uint32_t nhgrp_size; 259 int flags = M_NOWAIT; 260 struct nhgrp_object *nhg; 261 struct nhgrp_priv *nhg_priv; 262 263 nhgrp_size = calc_min_mpath_slots(wn, num_nhops); 264 if (nhgrp_size == 0) { 265 /* Zero weights, abort */ 266 return (NULL); 267 } 268 269 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops); 270 nhg = malloc(sz, M_NHOP, flags | M_ZERO); 271 if (nhg == NULL) { 272 return (NULL); 273 } 274 275 /* Has to be the first to make NHGRP_PRIV() work */ 276 nhg->nhg_size = nhgrp_size; 277 DPRINTF("new mpath group: num_nhops: %u", (uint32_t)nhgrp_size); 278 nhg->nhg_flags = MPF_MULTIPATH; 279 280 nhg_priv = NHGRP_PRIV(nhg); 281 nhg_priv->nhg_nh_count = num_nhops; 282 refcount_init(&nhg_priv->nhg_refcount, 1); 283 284 /* Please see nhgrp_free() comments on the initial value */ 285 refcount_init(&nhg_priv->nhg_linked, 2); 286 287 nhg_priv->nhg = nhg; 288 memcpy(&nhg_priv->nhg_nh_weights[0], wn, 289 num_nhops * sizeof(struct weightened_nhop)); 290 291 compile_nhgrp(nhg_priv, wn, nhg->nhg_size); 292 293 return (nhg_priv); 294 } 295 296 void 297 nhgrp_free(struct nhgrp_object *nhg) 298 { 299 struct nhgrp_priv *nhg_priv; 300 struct nh_control *ctl; 301 struct epoch_tracker et; 302 303 nhg_priv = NHGRP_PRIV(nhg); 304 305 if (!refcount_release(&nhg_priv->nhg_refcount)) 306 return; 307 308 /* 309 * group objects don't have an explicit lock attached to it. 310 * As groups are reclaimed based on reference count, it is possible 311 * that some groups will persist after vnet destruction callback 312 * called. Given that, handle scenario with nhgrp_free_group() being 313 * called either after or simultaneously with nhgrp_ctl_unlink_all() 314 * by using another reference counter: nhg_linked. 315 * 316 * There are only 2 places, where nhg_linked can be decreased: 317 * rib destroy (nhgrp_ctl_unlink_all) and this function. 318 * nhg_link can never be increased. 319 * 320 * Hence, use initial value of 2 to make use of 321 * refcount_release_if_not_last(). 322 * 323 * There can be two scenarious when calling this function: 324 * 325 * 1) nhg_linked value is 2. This means that either 326 * nhgrp_ctl_unlink_all() has not been called OR it is running, 327 * but we are guaranteed that nh_control won't be freed in 328 * this epoch. Hence, nexthop can be safely unlinked. 329 * 330 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all() 331 * has been called and nhgrp unlink can be skipped. 332 */ 333 334 NET_EPOCH_ENTER(et); 335 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) { 336 ctl = nhg_priv->nh_control; 337 if (unlink_nhgrp(ctl, nhg_priv) == NULL) { 338 /* Do not try to reclaim */ 339 DPRINTF("Failed to unlink nexhop group %p", nhg_priv); 340 NET_EPOCH_EXIT(et); 341 return; 342 } 343 } 344 NET_EPOCH_EXIT(et); 345 346 epoch_call(net_epoch_preempt, destroy_nhgrp_epoch, 347 &nhg_priv->nhg_epoch_ctx); 348 } 349 350 /* 351 * Destroys all local resources belonging to @nhg_priv. 352 */ 353 __noinline static void 354 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv) 355 { 356 357 free(nhg_priv->nhg, M_NHOP); 358 } 359 360 __noinline static void 361 destroy_nhgrp(struct nhgrp_priv *nhg_priv) 362 { 363 364 KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0")); 365 366 DPRINTF("DEL MPATH %p", nhg_priv); 367 368 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0")); 369 370 free_nhgrp_nhops(nhg_priv); 371 372 destroy_nhgrp_int(nhg_priv); 373 } 374 375 /* 376 * Epoch callback indicating group is safe to destroy 377 */ 378 static void 379 destroy_nhgrp_epoch(epoch_context_t ctx) 380 { 381 struct nhgrp_priv *nhg_priv; 382 383 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx); 384 385 destroy_nhgrp(nhg_priv); 386 } 387 388 static bool 389 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 390 { 391 392 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 393 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0) 394 continue; 395 396 /* 397 * Failed to ref the nexthop, b/c it's deleted. 398 * Need to rollback references back. 399 */ 400 for (int j = 0; j < i; j++) 401 nhop_free(nhg_priv->nhg_nh_weights[j].nh); 402 return (false); 403 } 404 405 return (true); 406 } 407 408 static void 409 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 410 { 411 412 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) 413 nhop_free(nhg_priv->nhg_nh_weights[i].nh); 414 } 415 416 /* 417 * Creates or looks up an existing nexthop group based on @wn and @num_nhops. 418 * 419 * Returns referenced nhop group or NULL, passing error code in @perror. 420 */ 421 struct nhgrp_priv * 422 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops, 423 int *perror) 424 { 425 struct nhgrp_priv *key, *nhg_priv; 426 427 if (num_nhops > RIB_MAX_MPATH_WIDTH) { 428 *perror = E2BIG; 429 return (NULL); 430 } 431 432 if (ctl->gr_head.hash_size == 0) { 433 /* First multipath request. Bootstrap mpath datastructures. */ 434 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) { 435 *perror = ENOMEM; 436 return (NULL); 437 } 438 } 439 440 /* Sort nexthops & check there are no duplicates */ 441 sort_weightened_nhops(wn, num_nhops); 442 uint32_t last_id = 0; 443 for (int i = 0; i < num_nhops; i++) { 444 if (wn[i].nh->nh_priv->nh_idx == last_id) { 445 *perror = EEXIST; 446 return (NULL); 447 } 448 last_id = wn[i].nh->nh_priv->nh_idx; 449 } 450 451 if ((key = alloc_nhgrp(wn, num_nhops)) == NULL) { 452 *perror = ENOMEM; 453 return (NULL); 454 } 455 456 nhg_priv = find_nhgrp(ctl, key); 457 if (nhg_priv != NULL) { 458 /* 459 * Free originally-created group. As it hasn't been linked 460 * and the dependent nexhops haven't been referenced, just free 461 * the group. 462 */ 463 destroy_nhgrp_int(key); 464 *perror = 0; 465 return (nhg_priv); 466 } else { 467 /* No existing group, try to link the new one */ 468 if (!ref_nhgrp_nhops(key)) { 469 /* 470 * Some of the nexthops have been scheduled for deletion. 471 * As the group hasn't been linked / no nexhops have been 472 * referenced, call the final destructor immediately. 473 */ 474 destroy_nhgrp_int(key); 475 *perror = EAGAIN; 476 return (NULL); 477 } 478 if (link_nhgrp(ctl, key) == 0) { 479 /* Unable to allocate index? */ 480 *perror = EAGAIN; 481 destroy_nhgrp(key); 482 } 483 *perror = 0; 484 return (key); 485 } 486 487 /* NOTREACHED */ 488 } 489 490 /* 491 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig. 492 * 493 * Returns referenced nexthop group or NULL. In the latter case, @perror is 494 * filled with an error code. 495 * Note that function does NOT care if the next nexthops already exists 496 * in the @gr_orig. As a result, they will be added, resulting in the 497 * same nexthop being present multiple times in the new group. 498 */ 499 static struct nhgrp_priv * 500 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig, 501 struct weightened_nhop *wn, int num_nhops, int *perror) 502 { 503 char storage[64]; 504 struct weightened_nhop *pnhops; 505 struct nhgrp_priv *nhg_priv; 506 const struct nhgrp_priv *src_priv; 507 size_t sz; 508 int curr_nhops; 509 510 src_priv = NHGRP_PRIV_CONST(gr_orig); 511 curr_nhops = src_priv->nhg_nh_count; 512 513 *perror = 0; 514 515 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop)); 516 /* optimize for <= 4 paths, each path=16 bytes */ 517 if (sz <= sizeof(storage)) 518 pnhops = (struct weightened_nhop *)&storage[0]; 519 else { 520 pnhops = malloc(sz, M_TEMP, M_NOWAIT); 521 if (pnhops == NULL) { 522 *perror = ENOMEM; 523 return (NULL); 524 } 525 } 526 527 /* Copy nhops from original group first */ 528 memcpy(pnhops, src_priv->nhg_nh_weights, 529 curr_nhops * sizeof(struct weightened_nhop)); 530 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop)); 531 curr_nhops += num_nhops; 532 533 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, perror); 534 535 if (pnhops != (struct weightened_nhop *)&storage[0]) 536 free(pnhops, M_TEMP); 537 538 if (nhg_priv == NULL) 539 return (NULL); 540 541 return (nhg_priv); 542 } 543 544 545 /* 546 * Creates/finds nexthop group based on @wn and @num_nhops. 547 * Returns 0 on success with referenced group in @rnd, or 548 * errno. 549 * 550 * If the error is EAGAIN, then the operation can be retried. 551 */ 552 int 553 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops, 554 struct route_nhop_data *rnd) 555 { 556 struct nh_control *ctl = rh->nh_control; 557 struct nhgrp_priv *nhg_priv; 558 int error; 559 560 nhg_priv = get_nhgrp(ctl, wn, num_nhops, &error); 561 if (nhg_priv != NULL) 562 rnd->rnd_nhgrp = nhg_priv->nhg; 563 rnd->rnd_weight = 0; 564 565 return (error); 566 } 567 568 /* 569 * Creates new nexthop group based on @src group with the nexthops defined in bitmask 570 * @nhop_mask removed. 571 * Returns referenced nexthop group or NULL on failure. 572 */ 573 int 574 nhgrp_get_filtered_group(struct rib_head *rh, const struct nhgrp_object *src, 575 nhgrp_filter_cb_t flt_func, void *flt_data, struct route_nhop_data *rnd) 576 { 577 char storage[64]; 578 struct nh_control *ctl = rh->nh_control; 579 struct weightened_nhop *pnhops; 580 const struct nhgrp_priv *mp_priv, *src_priv; 581 size_t sz; 582 int error, i, num_nhops; 583 584 src_priv = NHGRP_PRIV_CONST(src); 585 586 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop)); 587 /* optimize for <= 4 paths, each path=16 bytes */ 588 if (sz <= sizeof(storage)) 589 pnhops = (struct weightened_nhop *)&storage[0]; 590 else { 591 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL) 592 return (ENOMEM); 593 } 594 595 /* Filter nexthops */ 596 error = 0; 597 num_nhops = 0; 598 for (i = 0; i < src_priv->nhg_nh_count; i++) { 599 if (flt_func(src_priv->nhg_nh_weights[i].nh, flt_data)) 600 continue; 601 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i], 602 sizeof(struct weightened_nhop)); 603 } 604 605 if (num_nhops == 0) { 606 rnd->rnd_nhgrp = NULL; 607 rnd->rnd_weight = 0; 608 } else if (num_nhops == 1) { 609 rnd->rnd_nhop = pnhops[0].nh; 610 rnd->rnd_weight = pnhops[0].weight; 611 if (nhop_try_ref_object(rnd->rnd_nhop) == 0) 612 error = EAGAIN; 613 } else { 614 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, &error); 615 if (mp_priv != NULL) 616 rnd->rnd_nhgrp = mp_priv->nhg; 617 rnd->rnd_weight = 0; 618 } 619 620 if (pnhops != (struct weightened_nhop *)&storage[0]) 621 free(pnhops, M_TEMP); 622 623 return (error); 624 } 625 626 /* 627 * Creates new multipath group based on existing group/nhop in @rnd_orig and 628 * to-be-added nhop @wn_add. 629 * Returns 0 on success and stores result in @rnd_new. 630 */ 631 int 632 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig, 633 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new) 634 { 635 struct nh_control *ctl = rh->nh_control; 636 struct nhgrp_priv *nhg_priv; 637 struct weightened_nhop wn[2]; 638 int error; 639 640 if (rnd_orig->rnd_nhop == NULL) { 641 /* No paths to add to, just reference current nhop */ 642 *rnd_new = *rnd_add; 643 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0) 644 return (EAGAIN); 645 return (0); 646 } 647 648 wn[0].nh = rnd_add->rnd_nhop; 649 wn[0].weight = rnd_add->rnd_weight; 650 651 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) { 652 /* Simple merge of 2 non-multipath nexthops */ 653 wn[1].nh = rnd_orig->rnd_nhop; 654 wn[1].weight = rnd_orig->rnd_weight; 655 nhg_priv = get_nhgrp(ctl, wn, 2, &error); 656 } else { 657 /* Get new nhop group with @rt->rt_nhop as an additional nhop */ 658 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1, 659 &error); 660 } 661 662 if (nhg_priv == NULL) 663 return (error); 664 rnd_new->rnd_nhgrp = nhg_priv->nhg; 665 rnd_new->rnd_weight = 0; 666 667 return (0); 668 } 669 670 /* 671 * Returns pointer to array of nexthops with weights for 672 * given @nhg. Stores number of items in the array into @pnum_nhops. 673 */ 674 struct weightened_nhop * 675 nhgrp_get_nhops(struct nhgrp_object *nhg, uint32_t *pnum_nhops) 676 { 677 struct nhgrp_priv *nhg_priv; 678 679 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 680 681 nhg_priv = NHGRP_PRIV(nhg); 682 *pnum_nhops = nhg_priv->nhg_nh_count; 683 684 return (nhg_priv->nhg_nh_weights); 685 } 686 687 __noinline static int 688 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv, 689 char *buffer, size_t buffer_size, struct sysctl_req *w) 690 { 691 struct rt_msghdr *rtm; 692 struct nhgrp_external *nhge; 693 struct nhgrp_container *nhgc; 694 const struct nhgrp_object *nhg; 695 struct nhgrp_nhop_external *ext; 696 int error; 697 size_t sz; 698 699 nhg = nhg_priv->nhg; 700 701 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 702 /* controlplane nexthops */ 703 sz += sizeof(struct nhgrp_container); 704 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 705 /* dataplane nexthops */ 706 sz += sizeof(struct nhgrp_container); 707 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 708 709 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size")); 710 711 bzero(buffer, sz); 712 713 rtm = (struct rt_msghdr *)buffer; 714 rtm->rtm_msglen = sz; 715 rtm->rtm_version = RTM_VERSION; 716 rtm->rtm_type = RTM_GET; 717 718 nhge = (struct nhgrp_external *)(rtm + 1); 719 720 nhge->nhg_idx = nhg_priv->nhg_idx; 721 nhge->nhg_refcount = nhg_priv->nhg_refcount; 722 723 /* fill in control plane nexthops firs */ 724 nhgc = (struct nhgrp_container *)(nhge + 1); 725 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS; 726 nhgc->nhgc_subtype = 0; 727 nhgc->nhgc_len = sizeof(struct nhgrp_container); 728 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 729 nhgc->nhgc_count = nhg_priv->nhg_nh_count; 730 731 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 732 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 733 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx; 734 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight; 735 } 736 737 /* fill in dataplane nexthops */ 738 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]); 739 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS; 740 nhgc->nhgc_subtype = 0; 741 nhgc->nhgc_len = sizeof(struct nhgrp_container); 742 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 743 nhgc->nhgc_count = nhg->nhg_size; 744 745 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 746 for (int i = 0; i < nhg->nhg_size; i++) { 747 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx; 748 ext[i].nh_weight = 0; 749 } 750 751 error = SYSCTL_OUT(w, buffer, sz); 752 753 return (error); 754 } 755 756 int 757 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w) 758 { 759 struct nh_control *ctl = rh->nh_control; 760 struct epoch_tracker et; 761 struct nhgrp_priv *nhg_priv; 762 char *buffer; 763 size_t sz; 764 int error = 0; 765 766 if (ctl->gr_head.items_count == 0) 767 return (0); 768 769 /* Calculate the maximum nhop group size in bytes */ 770 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 771 sz += 2 * sizeof(struct nhgrp_container); 772 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH; 773 buffer = malloc(sz, M_TEMP, M_WAITOK); 774 775 NET_EPOCH_ENTER(et); 776 NHOPS_RLOCK(ctl); 777 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 778 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w); 779 if (error != 0) 780 break; 781 } CHT_SLIST_FOREACH_END; 782 NHOPS_RUNLOCK(ctl); 783 NET_EPOCH_EXIT(et); 784 785 free(buffer, M_TEMP); 786 787 return (error); 788 } 789