1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include "opt_inet.h" 31 #include "opt_route.h" 32 33 #include <sys/cdefs.h> 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/lock.h> 37 #include <sys/rmlock.h> 38 #include <sys/rwlock.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/refcount.h> 42 #include <sys/socket.h> 43 #include <sys/sysctl.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/vnet.h> 53 54 #include <netinet/in.h> 55 #include <netinet/in_var.h> 56 #include <netinet/in_fib.h> 57 58 #include <net/route/nhop_utils.h> 59 #include <net/route/nhop.h> 60 #include <net/route/nhop_var.h> 61 #include <net/route/nhgrp_var.h> 62 63 #define DEBUG_MOD_NAME nhgrp 64 #define DEBUG_MAX_LEVEL LOG_DEBUG 65 #include <net/route/route_debug.h> 66 _DECLARE_DEBUG(LOG_INFO); 67 68 /* 69 * This file contains data structures management logic for the nexthop 70 * groups ("nhgrp") route subsystem. 71 * 72 * Nexthop groups are used to store multiple routes available for the specific 73 * prefix. Nexthop groups are immutable and can be shared across multiple 74 * prefixes. 75 * 76 * Each group consists of a control plane part and a dataplane part. 77 * Control plane is basically a collection of nexthop objects with 78 * weights and refcount. 79 * 80 * Datapath consists of a array of nexthop pointers, compiled from control 81 * plane data to support O(1) nexthop selection. 82 * 83 * For example, consider the following group: 84 * [(nh1, weight=100), (nh2, weight=200)] 85 * It will compile to the following array: 86 * [nh1, nh2, nh2] 87 * 88 */ 89 90 static void consider_resize(struct nh_control *ctl, uint32_t new_gr_buckets, 91 uint32_t new_idx_items); 92 93 static int cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b); 94 static unsigned int hash_nhgrp(const struct nhgrp_priv *obj); 95 96 static unsigned 97 djb_hash(const unsigned char *h, const int len) 98 { 99 unsigned int result = 0; 100 int i; 101 102 for (i = 0; i < len; i++) 103 result = 33 * result ^ h[i]; 104 105 return (result); 106 } 107 108 static int 109 cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b) 110 { 111 112 /* 113 * In case of consistent hashing, there can be multiple nexthop groups 114 * with the same "control plane" list of nexthops with weights and a 115 * different set of "data plane" nexthops. 116 * For now, ignore the data plane and focus on the control plane list. 117 */ 118 if (a->nhg_nh_count != b->nhg_nh_count || a->nhg_uidx != b->nhg_uidx) 119 return (0); 120 return !memcmp(a->nhg_nh_weights, b->nhg_nh_weights, 121 sizeof(struct weightened_nhop) * a->nhg_nh_count); 122 } 123 124 /* 125 * Hash callback: calculate hash of an object 126 */ 127 static unsigned int 128 hash_nhgrp(const struct nhgrp_priv *obj) 129 { 130 const unsigned char *key; 131 132 key = (const unsigned char *)obj->nhg_nh_weights; 133 134 return (djb_hash(key, sizeof(struct weightened_nhop) * obj->nhg_nh_count)); 135 } 136 137 /* 138 * Returns object referenced and unlocked 139 */ 140 struct nhgrp_priv * 141 find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key) 142 { 143 struct nhgrp_priv *priv_ret; 144 145 NHOPS_RLOCK(ctl); 146 CHT_SLIST_FIND_BYOBJ(&ctl->gr_head, mpath, key, priv_ret); 147 if (priv_ret != NULL) { 148 if (refcount_acquire_if_not_zero(&priv_ret->nhg_refcount) == 0) { 149 /* refcount is 0 -> group is being deleted */ 150 priv_ret = NULL; 151 } 152 } 153 NHOPS_RUNLOCK(ctl); 154 155 return (priv_ret); 156 } 157 158 int 159 link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv) 160 { 161 uint16_t idx; 162 uint32_t new_num_buckets, new_num_items; 163 164 NHOPS_WLOCK(ctl); 165 /* Check if we need to resize hash and index */ 166 new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->gr_head); 167 new_num_items = bitmask_get_resize_items(&ctl->nh_idx_head); 168 169 if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) { 170 NHOPS_WUNLOCK(ctl); 171 FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "Unable to allocate nhg index"); 172 consider_resize(ctl, new_num_buckets, new_num_items); 173 return (0); 174 } 175 176 grp_priv->nhg_idx = idx; 177 grp_priv->nh_control = ctl; 178 CHT_SLIST_INSERT_HEAD(&ctl->gr_head, mpath, grp_priv); 179 180 NHOPS_WUNLOCK(ctl); 181 182 IF_DEBUG_LEVEL(LOG_DEBUG2) { 183 char nhgrp_buf[NHOP_PRINT_BUFSIZE] __unused; 184 FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "linked %s", 185 nhgrp_print_buf(grp_priv->nhg, nhgrp_buf, sizeof(nhgrp_buf))); 186 } 187 consider_resize(ctl, new_num_buckets, new_num_items); 188 189 return (1); 190 } 191 192 struct nhgrp_priv * 193 unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key) 194 { 195 struct nhgrp_priv *nhg_priv_ret; 196 int idx; 197 198 NHOPS_WLOCK(ctl); 199 200 CHT_SLIST_REMOVE(&ctl->gr_head, mpath, key, nhg_priv_ret); 201 202 if (nhg_priv_ret == NULL) { 203 FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "Unable to find nhg"); 204 NHOPS_WUNLOCK(ctl); 205 return (NULL); 206 } 207 208 idx = nhg_priv_ret->nhg_idx; 209 bitmask_free_idx(&ctl->nh_idx_head, idx); 210 nhg_priv_ret->nhg_idx = 0; 211 nhg_priv_ret->nh_control = NULL; 212 213 NHOPS_WUNLOCK(ctl); 214 215 IF_DEBUG_LEVEL(LOG_DEBUG2) { 216 char nhgrp_buf[NHOP_PRINT_BUFSIZE]; 217 nhgrp_print_buf(nhg_priv_ret->nhg, nhgrp_buf, sizeof(nhgrp_buf)); 218 FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "unlinked idx#%d %s", idx, 219 nhgrp_buf); 220 } 221 222 return (nhg_priv_ret); 223 } 224 225 /* 226 * Checks if hash needs resizing and performs this resize if necessary 227 * 228 */ 229 static void 230 consider_resize(struct nh_control *ctl, uint32_t new_gr_bucket, uint32_t new_idx_items) 231 { 232 void *gr_ptr, *gr_idx_ptr; 233 void *old_idx_ptr; 234 size_t alloc_size; 235 236 gr_ptr = NULL ; 237 if (new_gr_bucket != 0) { 238 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_gr_bucket); 239 gr_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); 240 } 241 242 gr_idx_ptr = NULL; 243 if (new_idx_items != 0) { 244 alloc_size = bitmask_get_size(new_idx_items); 245 gr_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); 246 } 247 248 if (gr_ptr == NULL && gr_idx_ptr == NULL) { 249 /* Either resize is not required or allocations have failed. */ 250 return; 251 } 252 253 FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, 254 "going to resize nhg hash: [ptr:%p sz:%u] idx:[ptr:%p sz:%u]", 255 gr_ptr, new_gr_bucket, gr_idx_ptr, new_idx_items); 256 257 old_idx_ptr = NULL; 258 259 NHOPS_WLOCK(ctl); 260 if (gr_ptr != NULL) { 261 CHT_SLIST_RESIZE(&ctl->gr_head, mpath, gr_ptr, new_gr_bucket); 262 } 263 if (gr_idx_ptr != NULL) { 264 if (bitmask_copy(&ctl->nh_idx_head, gr_idx_ptr, new_idx_items) == 0) 265 bitmask_swap(&ctl->nh_idx_head, gr_idx_ptr, new_idx_items, &old_idx_ptr); 266 } 267 NHOPS_WUNLOCK(ctl); 268 269 if (gr_ptr != NULL) 270 free(gr_ptr, M_NHOP); 271 if (old_idx_ptr != NULL) 272 free(old_idx_ptr, M_NHOP); 273 } 274 275 /* 276 * Function allocating the necessary group data structures. 277 */ 278 bool 279 nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags) 280 { 281 size_t alloc_size; 282 uint32_t num_buckets; 283 void *cht_ptr; 284 285 malloc_flags = (malloc_flags & (M_NOWAIT | M_WAITOK)) | M_ZERO; 286 287 num_buckets = 8; 288 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); 289 cht_ptr = malloc(alloc_size, M_NHOP, malloc_flags); 290 291 if (cht_ptr == NULL) { 292 FIB_RH_LOG(LOG_WARNING, ctl->ctl_rh, "multipath init failed"); 293 return (false); 294 } 295 296 NHOPS_WLOCK(ctl); 297 298 if (ctl->gr_head.hash_size == 0) { 299 /* Init hash and bitmask */ 300 CHT_SLIST_INIT(&ctl->gr_head, cht_ptr, num_buckets); 301 NHOPS_WUNLOCK(ctl); 302 } else { 303 /* Other thread has already initiliazed hash/bitmask */ 304 NHOPS_WUNLOCK(ctl); 305 free(cht_ptr, M_NHOP); 306 } 307 308 FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "multipath init done"); 309 310 return (true); 311 } 312 313 int 314 nhgrp_ctl_init(struct nh_control *ctl) 315 { 316 317 /* 318 * By default, do not allocate datastructures as multipath 319 * routes will not be necessarily used. 320 */ 321 CHT_SLIST_INIT(&ctl->gr_head, NULL, 0); 322 return (0); 323 } 324 325 void 326 nhgrp_ctl_free(struct nh_control *ctl) 327 { 328 if (ctl->gr_head.ptr != NULL) 329 free(ctl->gr_head.ptr, M_NHOP); 330 } 331 332 void 333 nhgrp_ctl_unlink_all(struct nh_control *ctl) 334 { 335 struct nhgrp_priv *nhg_priv; 336 337 NHOPS_WLOCK_ASSERT(ctl); 338 339 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 340 IF_DEBUG_LEVEL(LOG_DEBUG2) { 341 char nhgbuf[NHOP_PRINT_BUFSIZE] __unused; 342 FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "marking %s unlinked", 343 nhgrp_print_buf(nhg_priv->nhg, nhgbuf, sizeof(nhgbuf))); 344 } 345 refcount_release(&nhg_priv->nhg_linked); 346 } CHT_SLIST_FOREACH_END; 347 } 348 349