1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 #include "opt_inet.h" 30 #include "opt_route.h" 31 32 #include <sys/cdefs.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/lock.h> 36 #include <sys/rmlock.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/refcount.h> 40 #include <sys/socket.h> 41 #include <sys/sysctl.h> 42 #include <sys/kernel.h> 43 #include <sys/epoch.h> 44 45 #include <net/if.h> 46 #include <net/if_var.h> 47 #include <net/route.h> 48 #include <net/route/route_ctl.h> 49 #include <net/route/route_var.h> 50 #include <net/vnet.h> 51 52 #include <netinet/in.h> 53 #include <netinet/in_var.h> 54 #include <netinet/in_fib.h> 55 56 #include <net/route/nhop_utils.h> 57 #include <net/route/nhop.h> 58 #include <net/route/nhop_var.h> 59 #include <net/route/nhgrp_var.h> 60 61 #define DEBUG_MOD_NAME nhgrp_ctl 62 #define DEBUG_MAX_LEVEL LOG_DEBUG 63 #include <net/route/route_debug.h> 64 _DECLARE_DEBUG(LOG_INFO); 65 66 /* 67 * This file contains the supporting functions for creating multipath groups 68 * and compiling their dataplane parts. 69 */ 70 71 /* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */ 72 _Static_assert(MPF_MULTIPATH == NHF_MULTIPATH, 73 "MPF_MULTIPATH must be the same as NHF_MULTIPATH"); 74 /* Offset and size of flags field has to be the same for nhop/nhop groups */ 75 CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags); 76 /* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */ 77 CTASSERT(RIB_MAX_MPATH_WIDTH <= 64); 78 79 static int wn_cmp(const void *a, const void *b); 80 static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops); 81 82 static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl, 83 struct weightened_nhop *wn, int num_nhops, int *perror); 84 static void destroy_nhgrp(struct nhgrp_priv *nhg_priv); 85 static void destroy_nhgrp_epoch(epoch_context_t ctx); 86 static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv); 87 88 static int 89 wn_cmp(const void *a, const void *b) 90 { 91 const struct weightened_nhop *wa = a; 92 const struct weightened_nhop *wb = b; 93 94 if (wa->weight > wb->weight) 95 return (1); 96 else if (wa->weight < wb->weight) 97 return (-1); 98 99 /* Compare nexthops by pointer */ 100 if (wa->nh > wb->nh) 101 return (1); 102 else if (wa->nh < wb->nh) 103 return (-1); 104 else 105 return (0); 106 } 107 108 /* 109 * Perform in-place sorting for array of nexthops in @wn. 110 * 111 * To avoid nh groups duplication, nexthops/weights in the 112 * @wn need to be ordered deterministically. 113 * As this sorting is needed only for the control plane functionality, 114 * there are no specific external requirements. 115 * 116 * Sort by weight first, to ease calculation of the slot sizes. 117 */ 118 static void 119 sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops) 120 { 121 122 qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp); 123 } 124 125 /* 126 * Calculate minimum number of slots required to fit the existing 127 * set of weights in the common use case where weights are "easily" 128 * comparable. 129 * Assumes @wn is sorted by weight ascending and each weight is > 0. 130 * Returns number of slots or 0 if precise calculation failed. 131 * 132 * Some examples: 133 * note: (i, X) pair means (nhop=i, weight=X): 134 * (1, 1) (2, 2) -> 3 slots [1, 2, 2] 135 * (1, 100), (2, 200) -> 3 slots [1, 2, 2] 136 * (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3] 137 */ 138 static uint32_t 139 calc_min_mpath_slots_fast(const struct weightened_nhop *wn, size_t num_items) 140 { 141 uint32_t i, last, xmin; 142 uint64_t total = 0; 143 144 last = 0; 145 xmin = wn[0].weight; 146 for (i = 0; i < num_items; i++) { 147 total += wn[i].weight; 148 if ((wn[i].weight - last < xmin) && (wn[i].weight != last)) 149 xmin = wn[i].weight - last; 150 last = wn[i].weight; 151 } 152 /* xmin is the minimum unit of desired capacity */ 153 if ((total % xmin) != 0) 154 return (0); 155 for (i = 0; i < num_items; i++) { 156 if ((wn[i].weight % xmin) != 0) 157 return (0); 158 } 159 160 return ((uint32_t)(total / xmin)); 161 } 162 163 /* 164 * Calculate minimum number of slots required to fit the existing 165 * set of weights while maintaining weight coefficients. 166 * 167 * Assume @wn is sorted by weight ascending and each weight is > 0. 168 * 169 * Tries to find simple precise solution first and falls back to 170 * RIB_MAX_MPATH_WIDTH in case of any failure. 171 */ 172 static uint32_t 173 calc_min_mpath_slots(const struct weightened_nhop *wn, size_t num_items) 174 { 175 uint32_t v; 176 177 v = calc_min_mpath_slots_fast(wn, num_items); 178 if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH)) 179 v = RIB_MAX_MPATH_WIDTH; 180 181 return (v); 182 } 183 184 /* 185 * Nexthop group data consists of 186 * 1) dataplane part, with nhgrp_object as a header followed by an 187 * arbitrary number of nexthop pointers. 188 * 2) control plane part, with nhgrp_priv as a header, followed by 189 * an arbirtrary number of 'struct weightened_nhop' object. 190 * 191 * Given nexthop groups are (mostly) immutable, allocate all data 192 * in one go. 193 * 194 */ 195 __noinline static size_t 196 get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops) 197 { 198 size_t sz; 199 200 sz = sizeof(struct nhgrp_object); 201 sz += nhg_size * sizeof(struct nhop_object *); 202 sz += sizeof(struct nhgrp_priv); 203 sz += num_nhops * sizeof(struct weightened_nhop); 204 return (sz); 205 } 206 207 /* 208 * Compile actual list of nexthops to be used by datapath from 209 * the nexthop group @dst. 210 * 211 * For example, compiling control plane list of 2 nexthops 212 * [(200, A), (100, B)] would result in the datapath array 213 * [A, A, B] 214 */ 215 static void 216 compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x, 217 uint32_t num_slots) 218 { 219 struct nhgrp_object *dst; 220 int i, slot_idx, remaining_slots; 221 uint64_t remaining_sum, nh_weight, nh_slots; 222 223 slot_idx = 0; 224 dst = dst_priv->nhg; 225 /* Calculate sum of all weights */ 226 remaining_sum = 0; 227 for (i = 0; i < dst_priv->nhg_nh_count; i++) 228 remaining_sum += x[i].weight; 229 remaining_slots = num_slots; 230 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, "sum: %lu, slots: %d", 231 remaining_sum, remaining_slots); 232 for (i = 0; i < dst_priv->nhg_nh_count; i++) { 233 /* Calculate number of slots for the current nexthop */ 234 if (remaining_sum > 0) { 235 nh_weight = (uint64_t)x[i].weight; 236 nh_slots = (nh_weight * remaining_slots / remaining_sum); 237 } else 238 nh_slots = 0; 239 240 remaining_sum -= x[i].weight; 241 remaining_slots -= nh_slots; 242 243 FIB_NH_LOG(LOG_DEBUG3, x[0].nh, 244 " rem_sum: %lu, rem_slots: %d nh_slots: %d, slot_idx: %d", 245 remaining_sum, remaining_slots, (int)nh_slots, slot_idx); 246 247 KASSERT((slot_idx + nh_slots <= num_slots), 248 ("index overflow during nhg compilation")); 249 while (nh_slots-- > 0) 250 dst->nhops[slot_idx++] = x[i].nh; 251 } 252 } 253 254 /* 255 * Allocates new nexthop group for the list of weightened nexthops. 256 * Assume sorted list. 257 * Does NOT reference any nexthops in the group. 258 * Returns group with refcount=1 or NULL. 259 */ 260 static struct nhgrp_priv * 261 alloc_nhgrp(struct weightened_nhop *wn, int num_nhops) 262 { 263 uint32_t nhgrp_size; 264 struct nhgrp_object *nhg; 265 struct nhgrp_priv *nhg_priv; 266 267 nhgrp_size = calc_min_mpath_slots(wn, num_nhops); 268 if (nhgrp_size == 0) { 269 /* Zero weights, abort */ 270 return (NULL); 271 } 272 273 size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops); 274 nhg = malloc(sz, M_NHOP, M_NOWAIT | M_ZERO); 275 if (nhg == NULL) { 276 FIB_NH_LOG(LOG_INFO, wn[0].nh, 277 "unable to allocate group with num_nhops %d (compiled %u)", 278 num_nhops, nhgrp_size); 279 return (NULL); 280 } 281 282 /* Has to be the first to make NHGRP_PRIV() work */ 283 nhg->nhg_size = nhgrp_size; 284 nhg->nhg_flags = MPF_MULTIPATH; 285 286 nhg_priv = NHGRP_PRIV(nhg); 287 nhg_priv->nhg_nh_count = num_nhops; 288 refcount_init(&nhg_priv->nhg_refcount, 1); 289 290 /* Please see nhgrp_free() comments on the initial value */ 291 refcount_init(&nhg_priv->nhg_linked, 2); 292 293 nhg_priv->nhg = nhg; 294 memcpy(&nhg_priv->nhg_nh_weights[0], wn, 295 num_nhops * sizeof(struct weightened_nhop)); 296 297 FIB_NH_LOG(LOG_DEBUG, wn[0].nh, "num_nhops: %d, compiled_nhop: %u", 298 num_nhops, nhgrp_size); 299 300 compile_nhgrp(nhg_priv, wn, nhg->nhg_size); 301 302 return (nhg_priv); 303 } 304 305 void 306 nhgrp_ref_object(struct nhgrp_object *nhg) 307 { 308 struct nhgrp_priv *nhg_priv; 309 u_int old __diagused; 310 311 nhg_priv = NHGRP_PRIV(nhg); 312 old = refcount_acquire(&nhg_priv->nhg_refcount); 313 KASSERT(old > 0, ("%s: nhgrp object %p has 0 refs", __func__, nhg)); 314 } 315 316 void 317 nhgrp_free(struct nhgrp_object *nhg) 318 { 319 struct nhgrp_priv *nhg_priv; 320 struct nh_control *ctl; 321 struct epoch_tracker et; 322 323 nhg_priv = NHGRP_PRIV(nhg); 324 325 if (!refcount_release(&nhg_priv->nhg_refcount)) 326 return; 327 328 /* 329 * group objects don't have an explicit lock attached to it. 330 * As groups are reclaimed based on reference count, it is possible 331 * that some groups will persist after vnet destruction callback 332 * called. Given that, handle scenario with nhgrp_free_group() being 333 * called either after or simultaneously with nhgrp_ctl_unlink_all() 334 * by using another reference counter: nhg_linked. 335 * 336 * There are only 2 places, where nhg_linked can be decreased: 337 * rib destroy (nhgrp_ctl_unlink_all) and this function. 338 * nhg_link can never be increased. 339 * 340 * Hence, use initial value of 2 to make use of 341 * refcount_release_if_not_last(). 342 * 343 * There can be two scenarious when calling this function: 344 * 345 * 1) nhg_linked value is 2. This means that either 346 * nhgrp_ctl_unlink_all() has not been called OR it is running, 347 * but we are guaranteed that nh_control won't be freed in 348 * this epoch. Hence, nexthop can be safely unlinked. 349 * 350 * 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all() 351 * has been called and nhgrp unlink can be skipped. 352 */ 353 354 NET_EPOCH_ENTER(et); 355 if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) { 356 ctl = nhg_priv->nh_control; 357 if (unlink_nhgrp(ctl, nhg_priv) == NULL) { 358 /* Do not try to reclaim */ 359 RT_LOG(LOG_INFO, "Failed to unlink nexhop group %p", 360 nhg_priv); 361 NET_EPOCH_EXIT(et); 362 return; 363 } 364 } 365 NET_EPOCH_EXIT(et); 366 367 epoch_call(net_epoch_preempt, destroy_nhgrp_epoch, 368 &nhg_priv->nhg_epoch_ctx); 369 } 370 371 /* 372 * Destroys all local resources belonging to @nhg_priv. 373 */ 374 __noinline static void 375 destroy_nhgrp_int(struct nhgrp_priv *nhg_priv) 376 { 377 378 free(nhg_priv->nhg, M_NHOP); 379 } 380 381 __noinline static void 382 destroy_nhgrp(struct nhgrp_priv *nhg_priv) 383 { 384 385 KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0")); 386 KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0")); 387 388 #if DEBUG_MAX_LEVEL >= LOG_DEBUG 389 char nhgbuf[NHOP_PRINT_BUFSIZE]; 390 FIB_NH_LOG(LOG_DEBUG, nhg_priv->nhg_nh_weights[0].nh, 391 "destroying %s", nhgrp_print_buf(nhg_priv->nhg, 392 nhgbuf, sizeof(nhgbuf))); 393 #endif 394 395 free_nhgrp_nhops(nhg_priv); 396 destroy_nhgrp_int(nhg_priv); 397 } 398 399 /* 400 * Epoch callback indicating group is safe to destroy 401 */ 402 static void 403 destroy_nhgrp_epoch(epoch_context_t ctx) 404 { 405 struct nhgrp_priv *nhg_priv; 406 407 nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx); 408 409 destroy_nhgrp(nhg_priv); 410 } 411 412 static bool 413 ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 414 { 415 416 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 417 if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0) 418 continue; 419 420 /* 421 * Failed to ref the nexthop, b/c it's deleted. 422 * Need to rollback references back. 423 */ 424 for (int j = 0; j < i; j++) 425 nhop_free(nhg_priv->nhg_nh_weights[j].nh); 426 return (false); 427 } 428 429 return (true); 430 } 431 432 static void 433 free_nhgrp_nhops(struct nhgrp_priv *nhg_priv) 434 { 435 436 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) 437 nhop_free(nhg_priv->nhg_nh_weights[i].nh); 438 } 439 440 /* 441 * Creates or looks up an existing nexthop group based on @wn and @num_nhops. 442 * 443 * Returns referenced nhop group or NULL, passing error code in @perror. 444 */ 445 struct nhgrp_priv * 446 get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops, 447 int *perror) 448 { 449 struct nhgrp_priv *key, *nhg_priv; 450 451 if (num_nhops > RIB_MAX_MPATH_WIDTH) { 452 *perror = E2BIG; 453 return (NULL); 454 } 455 456 if (ctl->gr_head.hash_size == 0) { 457 /* First multipath request. Bootstrap mpath datastructures. */ 458 if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) { 459 *perror = ENOMEM; 460 return (NULL); 461 } 462 } 463 464 /* Sort nexthops & check there are no duplicates */ 465 sort_weightened_nhops(wn, num_nhops); 466 uint32_t last_id = 0; 467 for (int i = 0; i < num_nhops; i++) { 468 if (wn[i].nh->nh_priv->nh_idx == last_id) { 469 *perror = EEXIST; 470 return (NULL); 471 } 472 last_id = wn[i].nh->nh_priv->nh_idx; 473 } 474 475 if ((key = alloc_nhgrp(wn, num_nhops)) == NULL) { 476 *perror = ENOMEM; 477 return (NULL); 478 } 479 480 nhg_priv = find_nhgrp(ctl, key); 481 if (nhg_priv != NULL) { 482 /* 483 * Free originally-created group. As it hasn't been linked 484 * and the dependent nexhops haven't been referenced, just free 485 * the group. 486 */ 487 destroy_nhgrp_int(key); 488 *perror = 0; 489 return (nhg_priv); 490 } else { 491 /* No existing group, try to link the new one */ 492 if (!ref_nhgrp_nhops(key)) { 493 /* 494 * Some of the nexthops have been scheduled for deletion. 495 * As the group hasn't been linked / no nexhops have been 496 * referenced, call the final destructor immediately. 497 */ 498 destroy_nhgrp_int(key); 499 *perror = EAGAIN; 500 return (NULL); 501 } 502 if (link_nhgrp(ctl, key) == 0) { 503 /* Unable to allocate index? */ 504 *perror = EAGAIN; 505 free_nhgrp_nhops(key); 506 destroy_nhgrp_int(key); 507 return (NULL); 508 } 509 *perror = 0; 510 return (key); 511 } 512 513 /* NOTREACHED */ 514 } 515 516 /* 517 * Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig. 518 * 519 * Returns referenced nexthop group or NULL. In the latter case, @perror is 520 * filled with an error code. 521 * Note that function does NOT care if the next nexthops already exists 522 * in the @gr_orig. As a result, they will be added, resulting in the 523 * same nexthop being present multiple times in the new group. 524 */ 525 static struct nhgrp_priv * 526 append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig, 527 struct weightened_nhop *wn, int num_nhops, int *perror) 528 { 529 char storage[64]; 530 struct weightened_nhop *pnhops; 531 struct nhgrp_priv *nhg_priv; 532 const struct nhgrp_priv *src_priv; 533 size_t sz; 534 int curr_nhops; 535 536 src_priv = NHGRP_PRIV_CONST(gr_orig); 537 curr_nhops = src_priv->nhg_nh_count; 538 539 *perror = 0; 540 541 sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop)); 542 /* optimize for <= 4 paths, each path=16 bytes */ 543 if (sz <= sizeof(storage)) 544 pnhops = (struct weightened_nhop *)&storage[0]; 545 else { 546 pnhops = malloc(sz, M_TEMP, M_NOWAIT); 547 if (pnhops == NULL) { 548 *perror = ENOMEM; 549 return (NULL); 550 } 551 } 552 553 /* Copy nhops from original group first */ 554 memcpy(pnhops, src_priv->nhg_nh_weights, 555 curr_nhops * sizeof(struct weightened_nhop)); 556 memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop)); 557 curr_nhops += num_nhops; 558 559 nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, perror); 560 561 if (pnhops != (struct weightened_nhop *)&storage[0]) 562 free(pnhops, M_TEMP); 563 564 if (nhg_priv == NULL) 565 return (NULL); 566 567 return (nhg_priv); 568 } 569 570 571 /* 572 * Creates/finds nexthop group based on @wn and @num_nhops. 573 * Returns 0 on success with referenced group in @rnd, or 574 * errno. 575 * 576 * If the error is EAGAIN, then the operation can be retried. 577 */ 578 int 579 nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops, 580 struct route_nhop_data *rnd) 581 { 582 struct nh_control *ctl = rh->nh_control; 583 struct nhgrp_priv *nhg_priv; 584 int error; 585 586 nhg_priv = get_nhgrp(ctl, wn, num_nhops, &error); 587 if (nhg_priv != NULL) 588 rnd->rnd_nhgrp = nhg_priv->nhg; 589 rnd->rnd_weight = 0; 590 591 return (error); 592 } 593 594 /* 595 * Creates new nexthop group based on @src group without the nexthops 596 * chosen by @flt_func. 597 * Returns 0 on success, storring the reference nhop group/object in @rnd. 598 */ 599 int 600 nhgrp_get_filtered_group(struct rib_head *rh, const struct nhgrp_object *src, 601 nhgrp_filter_cb_t flt_func, void *flt_data, struct route_nhop_data *rnd) 602 { 603 char storage[64]; 604 struct nh_control *ctl = rh->nh_control; 605 struct weightened_nhop *pnhops; 606 const struct nhgrp_priv *mp_priv, *src_priv; 607 size_t sz; 608 int error, i, num_nhops; 609 610 src_priv = NHGRP_PRIV_CONST(src); 611 612 sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop)); 613 /* optimize for <= 4 paths, each path=16 bytes */ 614 if (sz <= sizeof(storage)) 615 pnhops = (struct weightened_nhop *)&storage[0]; 616 else { 617 if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL) 618 return (ENOMEM); 619 } 620 621 /* Filter nexthops */ 622 error = 0; 623 num_nhops = 0; 624 for (i = 0; i < src_priv->nhg_nh_count; i++) { 625 if (flt_func(src_priv->nhg_nh_weights[i].nh, flt_data)) 626 continue; 627 memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i], 628 sizeof(struct weightened_nhop)); 629 } 630 631 if (num_nhops == 0) { 632 rnd->rnd_nhgrp = NULL; 633 rnd->rnd_weight = 0; 634 } else if (num_nhops == 1) { 635 rnd->rnd_nhop = pnhops[0].nh; 636 rnd->rnd_weight = pnhops[0].weight; 637 if (nhop_try_ref_object(rnd->rnd_nhop) == 0) 638 error = EAGAIN; 639 } else { 640 mp_priv = get_nhgrp(ctl, pnhops, num_nhops, &error); 641 if (mp_priv != NULL) 642 rnd->rnd_nhgrp = mp_priv->nhg; 643 rnd->rnd_weight = 0; 644 } 645 646 if (pnhops != (struct weightened_nhop *)&storage[0]) 647 free(pnhops, M_TEMP); 648 649 return (error); 650 } 651 652 /* 653 * Creates new multipath group based on existing group/nhop in @rnd_orig and 654 * to-be-added nhop @wn_add. 655 * Returns 0 on success and stores result in @rnd_new. 656 */ 657 int 658 nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig, 659 struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new) 660 { 661 struct nh_control *ctl = rh->nh_control; 662 struct nhgrp_priv *nhg_priv; 663 struct weightened_nhop wn[2] = {}; 664 int error; 665 666 if (rnd_orig->rnd_nhop == NULL) { 667 /* No paths to add to, just reference current nhop */ 668 *rnd_new = *rnd_add; 669 if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0) 670 return (EAGAIN); 671 return (0); 672 } 673 674 wn[0].nh = rnd_add->rnd_nhop; 675 wn[0].weight = rnd_add->rnd_weight; 676 677 if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) { 678 /* Simple merge of 2 non-multipath nexthops */ 679 wn[1].nh = rnd_orig->rnd_nhop; 680 wn[1].weight = rnd_orig->rnd_weight; 681 nhg_priv = get_nhgrp(ctl, wn, 2, &error); 682 } else { 683 /* Get new nhop group with @rt->rt_nhop as an additional nhop */ 684 nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1, 685 &error); 686 } 687 688 if (nhg_priv == NULL) 689 return (error); 690 rnd_new->rnd_nhgrp = nhg_priv->nhg; 691 rnd_new->rnd_weight = 0; 692 693 return (0); 694 } 695 696 /* 697 * Returns pointer to array of nexthops with weights for 698 * given @nhg. Stores number of items in the array into @pnum_nhops. 699 */ 700 struct weightened_nhop * 701 nhgrp_get_nhops(struct nhgrp_object *nhg, uint32_t *pnum_nhops) 702 { 703 struct nhgrp_priv *nhg_priv; 704 705 KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath")); 706 707 nhg_priv = NHGRP_PRIV(nhg); 708 *pnum_nhops = nhg_priv->nhg_nh_count; 709 710 return (nhg_priv->nhg_nh_weights); 711 } 712 713 /* 714 * Prints nexhop group @nhg data in the provided @buf. 715 * Example: nhg#33/sz=3:[#1:100,#2:100,#3:100] 716 * Example: nhg#33/sz=5:[#1:100,#2:100,..] 717 */ 718 char * 719 nhgrp_print_buf(const struct nhgrp_object *nhg, char *buf, size_t bufsize) 720 { 721 const struct nhgrp_priv *nhg_priv = NHGRP_PRIV_CONST(nhg); 722 723 int off = snprintf(buf, bufsize, "nhg#%u/sz=%u:[", nhg_priv->nhg_idx, 724 nhg_priv->nhg_nh_count); 725 726 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 727 const struct weightened_nhop *wn = &nhg_priv->nhg_nh_weights[i]; 728 int len = snprintf(&buf[off], bufsize - off, "#%u:%u,", 729 wn->nh->nh_priv->nh_idx, wn->weight); 730 if (len + off + 3 >= bufsize) { 731 int len = snprintf(&buf[off], bufsize - off, "..."); 732 off += len; 733 break; 734 } 735 off += len; 736 } 737 if (off > 0) 738 off--; // remove last "," 739 if (off + 1 < bufsize) 740 snprintf(&buf[off], bufsize - off, "]"); 741 return buf; 742 } 743 744 __noinline static int 745 dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv, 746 char *buffer, size_t buffer_size, struct sysctl_req *w) 747 { 748 struct rt_msghdr *rtm; 749 struct nhgrp_external *nhge; 750 struct nhgrp_container *nhgc; 751 const struct nhgrp_object *nhg; 752 struct nhgrp_nhop_external *ext; 753 int error; 754 size_t sz; 755 756 nhg = nhg_priv->nhg; 757 758 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 759 /* controlplane nexthops */ 760 sz += sizeof(struct nhgrp_container); 761 sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 762 /* dataplane nexthops */ 763 sz += sizeof(struct nhgrp_container); 764 sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 765 766 KASSERT(sz <= buffer_size, ("increase nhgrp buffer size")); 767 768 bzero(buffer, sz); 769 770 rtm = (struct rt_msghdr *)buffer; 771 rtm->rtm_msglen = sz; 772 rtm->rtm_version = RTM_VERSION; 773 rtm->rtm_type = RTM_GET; 774 775 nhge = (struct nhgrp_external *)(rtm + 1); 776 777 nhge->nhg_idx = nhg_priv->nhg_idx; 778 nhge->nhg_refcount = nhg_priv->nhg_refcount; 779 780 /* fill in control plane nexthops firs */ 781 nhgc = (struct nhgrp_container *)(nhge + 1); 782 nhgc->nhgc_type = NHG_C_TYPE_CNHOPS; 783 nhgc->nhgc_subtype = 0; 784 nhgc->nhgc_len = sizeof(struct nhgrp_container); 785 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count; 786 nhgc->nhgc_count = nhg_priv->nhg_nh_count; 787 788 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 789 for (int i = 0; i < nhg_priv->nhg_nh_count; i++) { 790 ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx; 791 ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight; 792 } 793 794 /* fill in dataplane nexthops */ 795 nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]); 796 nhgc->nhgc_type = NHG_C_TYPE_DNHOPS; 797 nhgc->nhgc_subtype = 0; 798 nhgc->nhgc_len = sizeof(struct nhgrp_container); 799 nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size; 800 nhgc->nhgc_count = nhg->nhg_size; 801 802 ext = (struct nhgrp_nhop_external *)(nhgc + 1); 803 for (int i = 0; i < nhg->nhg_size; i++) { 804 ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx; 805 ext[i].nh_weight = 0; 806 } 807 808 error = SYSCTL_OUT(w, buffer, sz); 809 810 return (error); 811 } 812 813 uint32_t 814 nhgrp_get_idx(const struct nhgrp_object *nhg) 815 { 816 const struct nhgrp_priv *nhg_priv; 817 818 nhg_priv = NHGRP_PRIV_CONST(nhg); 819 return (nhg_priv->nhg_idx); 820 } 821 822 uint32_t 823 nhgrp_get_count(struct rib_head *rh) 824 { 825 struct nh_control *ctl; 826 uint32_t count; 827 828 ctl = rh->nh_control; 829 830 NHOPS_RLOCK(ctl); 831 count = ctl->gr_head.items_count; 832 NHOPS_RUNLOCK(ctl); 833 834 return (count); 835 } 836 837 int 838 nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w) 839 { 840 struct nh_control *ctl = rh->nh_control; 841 struct epoch_tracker et; 842 struct nhgrp_priv *nhg_priv; 843 char *buffer; 844 size_t sz; 845 int error = 0; 846 847 if (ctl->gr_head.items_count == 0) 848 return (0); 849 850 /* Calculate the maximum nhop group size in bytes */ 851 sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external); 852 sz += 2 * sizeof(struct nhgrp_container); 853 sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH; 854 buffer = malloc(sz, M_TEMP, M_NOWAIT); 855 if (buffer == NULL) 856 return (ENOMEM); 857 858 NET_EPOCH_ENTER(et); 859 NHOPS_RLOCK(ctl); 860 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 861 error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w); 862 if (error != 0) 863 break; 864 } CHT_SLIST_FOREACH_END; 865 NHOPS_RUNLOCK(ctl); 866 NET_EPOCH_EXIT(et); 867 868 free(buffer, M_TEMP); 869 870 return (error); 871 } 872