1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_route.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/lock.h> 36 #include <sys/rwlock.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/socket.h> 40 #include <sys/kernel.h> 41 42 #include <net/if.h> 43 #include <net/if_var.h> 44 #include <net/route.h> 45 #include <net/route/route_var.h> 46 #include <net/route/nhop_utils.h> 47 #include <net/route/nhop.h> 48 #include <net/route/nhop_var.h> 49 #include <net/vnet.h> 50 51 /* 52 * This file contains data structures management logic for the nexthop ("nhop") 53 * route subsystem. 54 * 55 * Nexthops in the original sense are the objects containing all the necessary 56 * information to forward the packet to the selected destination. 57 * In particular, nexthop is defined by a combination of 58 * ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and 59 * NHF_DEFAULT 60 * 61 * All nexthops are stored in the resizable hash table. 62 * Additionally, each nexthop gets assigned its unique index (nexthop index) 63 * so userland programs can interact with the nexthops easier. Index allocation 64 * is backed by the bitmask array. 65 */ 66 67 static MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data"); 68 69 70 /* Hash management functions */ 71 72 int 73 nhops_init_rib(struct rib_head *rh) 74 { 75 struct nh_control *ctl; 76 size_t alloc_size; 77 uint32_t num_buckets, num_items; 78 void *ptr; 79 80 ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO); 81 82 /* 83 * Allocate nexthop hash. Start with 16 items by default (128 bytes). 84 * This will be enough for most of the cases. 85 */ 86 num_buckets = 16; 87 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); 88 ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO); 89 CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets); 90 91 /* 92 * Allocate nexthop index bitmask. 93 */ 94 num_items = 128 * 8; /* 128 bytes */ 95 ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO); 96 bitmask_init(&ctl->nh_idx_head, ptr, num_items); 97 98 NHOPS_LOCK_INIT(ctl); 99 100 rh->nh_control = ctl; 101 ctl->ctl_rh = rh; 102 103 DPRINTF("NHOPS init for fib %u af %u: ctl %p rh %p", rh->rib_fibnum, 104 rh->rib_family, ctl, rh); 105 106 return (0); 107 } 108 109 static void 110 destroy_ctl(struct nh_control *ctl) 111 { 112 113 NHOPS_LOCK_DESTROY(ctl); 114 free(ctl->nh_head.ptr, M_NHOP); 115 free(ctl->nh_idx_head.idx, M_NHOP); 116 free(ctl, M_NHOP); 117 } 118 119 /* 120 * Epoch callback indicating ctl is safe to destroy 121 */ 122 static void 123 destroy_ctl_epoch(epoch_context_t ctx) 124 { 125 struct nh_control *ctl; 126 127 ctl = __containerof(ctx, struct nh_control, ctl_epoch_ctx); 128 129 destroy_ctl(ctl); 130 } 131 132 void 133 nhops_destroy_rib(struct rib_head *rh) 134 { 135 struct nh_control *ctl; 136 struct nhop_priv *nh_priv; 137 138 ctl = rh->nh_control; 139 140 /* 141 * All routes should have been deleted in rt_table_destroy(). 142 * However, TCP stack or other consumers may store referenced 143 * nexthop pointers. When these references go to zero, 144 * nhop_free() will try to unlink these records from the 145 * datastructures, most likely leading to panic. 146 * 147 * Avoid that by explicitly marking all of the remaining 148 * nexthops as unlinked by removing a reference from a special 149 * counter. Please see nhop_free() comments for more 150 * details. 151 */ 152 153 NHOPS_WLOCK(ctl); 154 CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) { 155 DPRINTF("Marking nhop %u unlinked", nh_priv->nh_idx); 156 refcount_release(&nh_priv->nh_linked); 157 } CHT_SLIST_FOREACH_END; 158 NHOPS_WUNLOCK(ctl); 159 160 /* 161 * Postpone destruction till the end of current epoch 162 * so nhop_free() can safely use nh_control pointer. 163 */ 164 epoch_call(net_epoch_preempt, destroy_ctl_epoch, 165 &ctl->ctl_epoch_ctx); 166 } 167 168 /* 169 * Nexhop hash calculation: 170 * 171 * Nexthops distribution: 172 * 2 "mandatory" nexthops per interface ("interface route", "loopback"). 173 * For direct peering: 1 nexthop for the peering router per ifp/af. 174 * For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af. 175 * IGP control plane & broadcast segment: tens of nexthops per ifp/af. 176 * 177 * Each fib/af combination has its own hash table. 178 * With that in mind, hash nexthops by the combination of the interface 179 * and GW IP address. 180 * 181 * To optimize hash calculation, ignore higher bytes of ifindex, as they 182 * give very little entropy. 183 * Similarly, use lower 4 bytes of IPv6 address to distinguish between the 184 * neighbors. 185 */ 186 struct _hash_data { 187 uint16_t ifindex; 188 uint8_t family; 189 uint8_t nh_type; 190 uint32_t gw_addr; 191 }; 192 193 static unsigned 194 djb_hash(const unsigned char *h, const int len) 195 { 196 unsigned int result = 0; 197 int i; 198 199 for (i = 0; i < len; i++) 200 result = 33 * result ^ h[i]; 201 202 return (result); 203 } 204 205 static uint32_t 206 hash_priv(const struct nhop_priv *priv) 207 { 208 struct nhop_object *nh; 209 uint16_t ifindex; 210 struct _hash_data key; 211 212 nh = priv->nh; 213 ifindex = nh->nh_ifp->if_index & 0xFFFF; 214 memset(&key, 0, sizeof(key)); 215 216 key.ifindex = ifindex; 217 key.family = nh->gw_sa.sa_family; 218 key.nh_type = priv->nh_type & 0xFF; 219 if (nh->gw_sa.sa_family == AF_INET6) 220 memcpy(&key.gw_addr, &nh->gw6_sa.sin6_addr.s6_addr32[3], 4); 221 else if (nh->gw_sa.sa_family == AF_INET) 222 memcpy(&key.gw_addr, &nh->gw4_sa.sin_addr, 4); 223 224 return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key))); 225 } 226 227 /* 228 * Checks if hash needs resizing and performs this resize if necessary 229 * 230 */ 231 static void 232 consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items) 233 { 234 void *nh_ptr, *nh_idx_ptr; 235 void *old_idx_ptr; 236 size_t alloc_size; 237 238 nh_ptr = NULL; 239 if (new_nh_buckets != 0) { 240 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets); 241 nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); 242 } 243 244 nh_idx_ptr = NULL; 245 if (new_idx_items != 0) { 246 alloc_size = bitmask_get_size(new_idx_items); 247 nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); 248 } 249 250 if (nh_ptr == NULL && nh_idx_ptr == NULL) { 251 /* Either resize is not required or allocations have failed. */ 252 return; 253 } 254 255 DPRINTF("going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]", nh_ptr, 256 new_nh_buckets, nh_idx_ptr, new_idx_items); 257 258 old_idx_ptr = NULL; 259 260 NHOPS_WLOCK(ctl); 261 if (nh_ptr != NULL) { 262 CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets); 263 } 264 if (nh_idx_ptr != NULL) { 265 if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items) == 0) 266 bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr); 267 } 268 NHOPS_WUNLOCK(ctl); 269 270 if (nh_ptr != NULL) 271 free(nh_ptr, M_NHOP); 272 if (old_idx_ptr != NULL) 273 free(old_idx_ptr, M_NHOP); 274 } 275 276 /* 277 * Links nextop @nh_priv to the nexhop hash table and allocates 278 * nexhop index. 279 * Returns allocated index or 0 on failure. 280 */ 281 int 282 link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv) 283 { 284 uint16_t idx; 285 uint32_t num_buckets_new, num_items_new; 286 287 KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated")); 288 NHOPS_WLOCK(ctl); 289 290 /* 291 * Check if we need to resize hash and index. 292 * The following 2 functions returns either new size or 0 293 * if resize is not required. 294 */ 295 num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head); 296 num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head); 297 298 if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) { 299 NHOPS_WUNLOCK(ctl); 300 DPRINTF("Unable to allocate nhop index"); 301 RTSTAT_INC(rts_nh_idx_alloc_failure); 302 consider_resize(ctl, num_buckets_new, num_items_new); 303 return (0); 304 } 305 306 nh_priv->nh_idx = idx; 307 nh_priv->nh_control = ctl; 308 309 CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv); 310 311 NHOPS_WUNLOCK(ctl); 312 313 DPRINTF("Linked nhop priv %p to %d, hash %u, ctl %p", nh_priv, idx, 314 hash_priv(nh_priv), ctl); 315 consider_resize(ctl, num_buckets_new, num_items_new); 316 317 return (idx); 318 } 319 320 /* 321 * Unlinks nexthop specified by @nh_priv data from the hash. 322 * 323 * Returns found nexthop or NULL. 324 */ 325 struct nhop_priv * 326 unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del) 327 { 328 struct nhop_priv *priv_ret; 329 int idx; 330 uint32_t num_buckets_new, num_items_new; 331 332 idx = 0; 333 334 NHOPS_WLOCK(ctl); 335 CHT_SLIST_REMOVE_BYOBJ(&ctl->nh_head, nhops, nh_priv_del, priv_ret); 336 337 if (priv_ret != NULL) { 338 idx = priv_ret->nh_idx; 339 priv_ret->nh_idx = 0; 340 341 KASSERT((idx != 0), ("bogus nhop index 0")); 342 if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) { 343 DPRINTF("Unable to remove index %d from fib %u af %d", 344 idx, ctl->ctl_rh->rib_fibnum, 345 ctl->ctl_rh->rib_family); 346 } 347 } 348 349 /* Check if hash or index needs to be resized */ 350 num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head); 351 num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head); 352 353 NHOPS_WUNLOCK(ctl); 354 355 if (priv_ret == NULL) 356 DPRINTF("Unable to unlink nhop priv %p from hash, hash %u ctl %p", 357 nh_priv_del, hash_priv(nh_priv_del), ctl); 358 else 359 DPRINTF("Unlinked nhop %p priv idx %d", priv_ret, idx); 360 361 consider_resize(ctl, num_buckets_new, num_items_new); 362 363 return (priv_ret); 364 } 365 366 /* 367 * Searches for the nexthop by data specifcied in @nh_priv. 368 * Returns referenced nexthop or NULL. 369 */ 370 struct nhop_priv * 371 find_nhop(struct nh_control *ctl, const struct nhop_priv *nh_priv) 372 { 373 struct nhop_priv *nh_priv_ret; 374 375 NHOPS_RLOCK(ctl); 376 CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret); 377 if (nh_priv_ret != NULL) { 378 if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){ 379 /* refcount was 0 -> nhop is being deleted */ 380 nh_priv_ret = NULL; 381 } 382 } 383 NHOPS_RUNLOCK(ctl); 384 385 return (nh_priv_ret); 386 } 387 388