1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include "opt_inet.h" 31 #include "opt_route.h" 32 33 #include <sys/cdefs.h> 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/lock.h> 37 #include <sys/rmlock.h> 38 #include <sys/rwlock.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/refcount.h> 42 #include <sys/socket.h> 43 #include <sys/sysctl.h> 44 #include <sys/kernel.h> 45 46 #include <net/if.h> 47 #include <net/if_var.h> 48 #include <net/if_dl.h> 49 #include <net/route.h> 50 #include <net/route/route_ctl.h> 51 #include <net/route/route_var.h> 52 #include <net/vnet.h> 53 54 #include <netinet/in.h> 55 #include <netinet/in_var.h> 56 #include <netinet/in_fib.h> 57 58 #include <net/route/nhop_utils.h> 59 #include <net/route/nhop.h> 60 #include <net/route/nhop_var.h> 61 #include <net/route/nhgrp_var.h> 62 63 /* 64 * This file contains data structures management logic for the nexthop 65 * groups ("nhgrp") route subsystem. 66 * 67 * Nexthop groups are used to store multiple routes available for the specific 68 * prefix. Nexthop groups are immutable and can be shared across multiple 69 * prefixes. 70 * 71 * Each group consists of a control plane part and a dataplane part. 72 * Control plane is basically a collection of nexthop objects with 73 * weights and refcount. 74 * 75 * Datapath consists of a array of nexthop pointers, compiled from control 76 * plane data to support O(1) nexthop selection. 77 * 78 * For example, consider the following group: 79 * [(nh1, weight=100), (nh2, weight=200)] 80 * It will compile to the following array: 81 * [nh1, nh2, nh2] 82 * 83 */ 84 85 static void consider_resize(struct nh_control *ctl, uint32_t new_gr_buckets, 86 uint32_t new_idx_items); 87 88 static int cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b); 89 static unsigned int hash_nhgrp(const struct nhgrp_priv *obj); 90 91 static unsigned 92 djb_hash(const unsigned char *h, const int len) 93 { 94 unsigned int result = 0; 95 int i; 96 97 for (i = 0; i < len; i++) 98 result = 33 * result ^ h[i]; 99 100 return (result); 101 } 102 103 static int 104 cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b) 105 { 106 107 /* 108 * In case of consistent hashing, there can be multiple nexthop groups 109 * with the same "control plane" list of nexthops with weights and a 110 * different set of "data plane" nexthops. 111 * For now, ignore the data plane and focus on the control plane list. 112 */ 113 if (a->nhg_nh_count != b->nhg_nh_count) 114 return (0); 115 return !memcmp(a->nhg_nh_weights, b->nhg_nh_weights, 116 sizeof(struct weightened_nhop) * a->nhg_nh_count); 117 } 118 119 /* 120 * Hash callback: calculate hash of an object 121 */ 122 static unsigned int 123 hash_nhgrp(const struct nhgrp_priv *obj) 124 { 125 const unsigned char *key; 126 127 key = (const unsigned char *)obj->nhg_nh_weights; 128 129 return (djb_hash(key, sizeof(struct weightened_nhop) * obj->nhg_nh_count)); 130 } 131 132 /* 133 * Returns object referenced and unlocked 134 */ 135 struct nhgrp_priv * 136 find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key) 137 { 138 struct nhgrp_priv *priv_ret; 139 140 NHOPS_RLOCK(ctl); 141 CHT_SLIST_FIND_BYOBJ(&ctl->gr_head, mpath, key, priv_ret); 142 if (priv_ret != NULL) { 143 if (refcount_acquire_if_not_zero(&priv_ret->nhg_refcount) == 0) { 144 /* refcount is 0 -> group is being deleted */ 145 priv_ret = NULL; 146 } 147 } 148 NHOPS_RUNLOCK(ctl); 149 150 return (priv_ret); 151 } 152 153 int 154 link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv) 155 { 156 uint16_t idx; 157 uint32_t new_num_buckets, new_num_items; 158 159 NHOPS_WLOCK(ctl); 160 /* Check if we need to resize hash and index */ 161 new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->gr_head); 162 new_num_items = bitmask_get_resize_items(&ctl->gr_idx_head); 163 164 if (bitmask_alloc_idx(&ctl->gr_idx_head, &idx) != 0) { 165 NHOPS_WUNLOCK(ctl); 166 DPRINTF("Unable to allocate mpath index"); 167 consider_resize(ctl, new_num_buckets, new_num_items); 168 return (0); 169 } 170 171 grp_priv->nhg_idx = idx; 172 grp_priv->nh_control = ctl; 173 CHT_SLIST_INSERT_HEAD(&ctl->gr_head, mpath, grp_priv); 174 175 NHOPS_WUNLOCK(ctl); 176 177 consider_resize(ctl, new_num_buckets, new_num_items); 178 179 return (1); 180 } 181 182 struct nhgrp_priv * 183 unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key) 184 { 185 struct nhgrp_priv *nhg_priv_ret; 186 int ret, idx; 187 188 NHOPS_WLOCK(ctl); 189 190 CHT_SLIST_REMOVE_BYOBJ(&ctl->gr_head, mpath, key, nhg_priv_ret); 191 192 if (nhg_priv_ret == NULL) { 193 DPRINTF("Unable to find nhop group!"); 194 NHOPS_WUNLOCK(ctl); 195 return (NULL); 196 } 197 198 idx = nhg_priv_ret->nhg_idx; 199 ret = bitmask_free_idx(&ctl->gr_idx_head, idx); 200 nhg_priv_ret->nhg_idx = 0; 201 nhg_priv_ret->nh_control = NULL; 202 203 NHOPS_WUNLOCK(ctl); 204 205 return (nhg_priv_ret); 206 } 207 208 /* 209 * Checks if hash needs resizing and performs this resize if necessary 210 * 211 */ 212 static void 213 consider_resize(struct nh_control *ctl, uint32_t new_gr_bucket, uint32_t new_idx_items) 214 { 215 void *gr_ptr, *gr_idx_ptr; 216 void *old_idx_ptr; 217 size_t alloc_size; 218 219 gr_ptr = NULL ; 220 if (new_gr_bucket != 0) { 221 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_gr_bucket); 222 gr_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); 223 } 224 225 gr_idx_ptr = NULL; 226 if (new_idx_items != 0) { 227 alloc_size = bitmask_get_size(new_idx_items); 228 gr_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); 229 } 230 231 if (gr_ptr == NULL && gr_idx_ptr == NULL) { 232 /* Either resize is not required or allocations have failed. */ 233 return; 234 } 235 236 DPRINTF("mp: going to resize: gr:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]", 237 gr_ptr, new_gr_bucket, gr_idx_ptr, new_idx_items); 238 239 old_idx_ptr = NULL; 240 241 NHOPS_WLOCK(ctl); 242 if (gr_ptr != NULL) { 243 CHT_SLIST_RESIZE(&ctl->gr_head, mpath, gr_ptr, new_gr_bucket); 244 } 245 if (gr_idx_ptr != NULL) { 246 if (bitmask_copy(&ctl->gr_idx_head, gr_idx_ptr, new_idx_items) == 0) 247 bitmask_swap(&ctl->gr_idx_head, gr_idx_ptr, new_idx_items, &old_idx_ptr); 248 } 249 NHOPS_WUNLOCK(ctl); 250 251 if (gr_ptr != NULL) 252 free(gr_ptr, M_NHOP); 253 if (old_idx_ptr != NULL) 254 free(old_idx_ptr, M_NHOP); 255 } 256 257 /* 258 * Function allocating the necessary group data structures. 259 */ 260 bool 261 nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags) 262 { 263 size_t alloc_size; 264 uint32_t num_buckets, num_items; 265 void *cht_ptr, *mask_ptr; 266 267 malloc_flags = (malloc_flags & (M_NOWAIT | M_WAITOK)) | M_ZERO; 268 269 num_buckets = 8; 270 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); 271 cht_ptr = malloc(alloc_size, M_NHOP, malloc_flags); 272 273 if (cht_ptr == NULL) { 274 DPRINTF("mpath init failed"); 275 return (false); 276 } 277 278 /* 279 * Allocate nexthop index bitmask. 280 */ 281 num_items = 128; 282 mask_ptr = malloc(bitmask_get_size(num_items), M_NHOP, malloc_flags); 283 if (mask_ptr == NULL) { 284 DPRINTF("mpath bitmask init failed"); 285 free(cht_ptr, M_NHOP); 286 return (false); 287 } 288 289 NHOPS_WLOCK(ctl); 290 291 if (ctl->gr_head.hash_size == 0) { 292 /* Init hash and bitmask */ 293 CHT_SLIST_INIT(&ctl->gr_head, cht_ptr, num_buckets); 294 bitmask_init(&ctl->gr_idx_head, mask_ptr, num_items); 295 NHOPS_WUNLOCK(ctl); 296 } else { 297 /* Other thread has already initiliazed hash/bitmask */ 298 NHOPS_WUNLOCK(ctl); 299 free(cht_ptr, M_NHOP); 300 free(mask_ptr, M_NHOP); 301 } 302 303 DPRINTF("mpath init done for fib/af %d/%d", ctl->rh->rib_fibnum, 304 ctl->rh->rib_family); 305 306 return (true); 307 } 308 309 int 310 nhgrp_ctl_init(struct nh_control *ctl) 311 { 312 313 /* 314 * By default, do not allocate datastructures as multipath 315 * routes will not be necessarily used. 316 */ 317 CHT_SLIST_INIT(&ctl->gr_head, NULL, 0); 318 bitmask_init(&ctl->gr_idx_head, NULL, 0); 319 return (0); 320 } 321 322 void 323 nhgrp_ctl_free(struct nh_control *ctl) 324 { 325 326 if (ctl->gr_head.ptr != NULL) 327 free(ctl->gr_head.ptr, M_NHOP); 328 if (ctl->gr_idx_head.idx != NULL) 329 free(ctl->gr_idx_head.idx, M_NHOP); 330 } 331 332 void 333 nhgrp_ctl_unlink_all(struct nh_control *ctl) 334 { 335 struct nhgrp_priv *nhg_priv; 336 337 NHOPS_WLOCK_ASSERT(ctl); 338 339 CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) { 340 DPRINTF("Marking nhgrp %u unlinked", nhg_priv->nhg_idx); 341 refcount_release(&nhg_priv->nhg_linked); 342 } CHT_SLIST_FOREACH_END; 343 } 344 345