1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2020 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 #include "opt_inet.h" 31 #include "opt_route.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/lock.h> 36 #include <sys/rwlock.h> 37 #include <sys/malloc.h> 38 #include <sys/mbuf.h> 39 #include <sys/socket.h> 40 #include <sys/kernel.h> 41 42 #include <net/if.h> 43 #include <net/if_var.h> 44 #include <net/route.h> 45 #include <net/route/route_var.h> 46 #include <net/route/nhop_utils.h> 47 #include <net/route/nhop.h> 48 #include <net/route/nhop_var.h> 49 #include <net/vnet.h> 50 51 #define DEBUG_MOD_NAME nhop 52 #define DEBUG_MAX_LEVEL LOG_DEBUG 53 #include <net/route/route_debug.h> 54 _DECLARE_DEBUG(LOG_INFO); 55 56 /* 57 * This file contains data structures management logic for the nexthop ("nhop") 58 * route subsystem. 59 * 60 * Nexthops in the original sense are the objects containing all the necessary 61 * information to forward the packet to the selected destination. 62 * In particular, nexthop is defined by a combination of 63 * ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and 64 * NHF_DEFAULT 65 * 66 * All nexthops are stored in the resizable hash table. 67 * Additionally, each nexthop gets assigned its unique index (nexthop index) 68 * so userland programs can interact with the nexthops easier. Index allocation 69 * is backed by the bitmask array. 70 */ 71 72 MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data"); 73 74 /* Hash management functions */ 75 76 int 77 nhops_init_rib(struct rib_head *rh) 78 { 79 struct nh_control *ctl; 80 size_t alloc_size; 81 uint32_t num_buckets, num_items; 82 void *ptr; 83 84 ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO); 85 86 /* 87 * Allocate nexthop hash. Start with 16 items by default (128 bytes). 88 * This will be enough for most of the cases. 89 */ 90 num_buckets = 16; 91 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets); 92 ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO); 93 CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets); 94 95 /* 96 * Allocate nexthop index bitmask. 97 */ 98 num_items = 128 * 8; /* 128 bytes */ 99 ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO); 100 bitmask_init(&ctl->nh_idx_head, ptr, num_items); 101 102 NHOPS_LOCK_INIT(ctl); 103 104 rh->nh_control = ctl; 105 ctl->ctl_rh = rh; 106 107 FIB_CTL_LOG(LOG_DEBUG2, ctl, "nhops init: ctl %p rh %p", ctl, rh); 108 109 return (0); 110 } 111 112 static void 113 destroy_ctl(struct nh_control *ctl) 114 { 115 116 NHOPS_LOCK_DESTROY(ctl); 117 free(ctl->nh_head.ptr, M_NHOP); 118 free(ctl->nh_idx_head.idx, M_NHOP); 119 #ifdef ROUTE_MPATH 120 nhgrp_ctl_free(ctl); 121 #endif 122 free(ctl, M_NHOP); 123 } 124 125 /* 126 * Epoch callback indicating ctl is safe to destroy 127 */ 128 static void 129 destroy_ctl_epoch(epoch_context_t ctx) 130 { 131 struct nh_control *ctl; 132 133 ctl = __containerof(ctx, struct nh_control, ctl_epoch_ctx); 134 135 destroy_ctl(ctl); 136 } 137 138 void 139 nhops_destroy_rib(struct rib_head *rh) 140 { 141 struct nh_control *ctl; 142 struct nhop_priv *nh_priv; 143 144 ctl = rh->nh_control; 145 146 /* 147 * All routes should have been deleted in rt_table_destroy(). 148 * However, TCP stack or other consumers may store referenced 149 * nexthop pointers. When these references go to zero, 150 * nhop_free() will try to unlink these records from the 151 * datastructures, most likely leading to panic. 152 * 153 * Avoid that by explicitly marking all of the remaining 154 * nexthops as unlinked by removing a reference from a special 155 * counter. Please see nhop_free() comments for more 156 * details. 157 */ 158 159 NHOPS_WLOCK(ctl); 160 CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) { 161 FIB_RH_LOG(LOG_DEBUG3, rh, "marking nhop %u unlinked", nh_priv->nh_idx); 162 refcount_release(&nh_priv->nh_linked); 163 } CHT_SLIST_FOREACH_END; 164 #ifdef ROUTE_MPATH 165 nhgrp_ctl_unlink_all(ctl); 166 #endif 167 NHOPS_WUNLOCK(ctl); 168 169 /* 170 * Postpone destruction till the end of current epoch 171 * so nhop_free() can safely use nh_control pointer. 172 */ 173 epoch_call(net_epoch_preempt, destroy_ctl_epoch, 174 &ctl->ctl_epoch_ctx); 175 } 176 177 /* 178 * Nexhop hash calculation: 179 * 180 * Nexthops distribution: 181 * 2 "mandatory" nexthops per interface ("interface route", "loopback"). 182 * For direct peering: 1 nexthop for the peering router per ifp/af. 183 * For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af. 184 * IGP control plane & broadcast segment: tens of nexthops per ifp/af. 185 * 186 * Each fib/af combination has its own hash table. 187 * With that in mind, hash nexthops by the combination of the interface 188 * and GW IP address. 189 * 190 * To optimize hash calculation, ignore lower bits of ifnet pointer, 191 * as they give very little entropy. 192 * Similarly, use lower 4 bytes of IPv6 address to distinguish between the 193 * neighbors. 194 */ 195 struct _hash_data { 196 uint16_t ifentropy; 197 uint8_t family; 198 uint8_t nh_type; 199 uint32_t gw_addr; 200 }; 201 202 static unsigned 203 djb_hash(const unsigned char *h, const int len) 204 { 205 unsigned int result = 0; 206 int i; 207 208 for (i = 0; i < len; i++) 209 result = 33 * result ^ h[i]; 210 211 return (result); 212 } 213 214 static uint32_t 215 hash_priv(const struct nhop_priv *priv) 216 { 217 struct nhop_object *nh = priv->nh; 218 struct _hash_data key = { 219 .ifentropy = (uint16_t)((((uintptr_t)nh->nh_ifp) >> 6) & 0xFFFF), 220 .family = nh->gw_sa.sa_family, 221 .nh_type = priv->nh_type & 0xFF, 222 .gw_addr = (nh->gw_sa.sa_family == AF_INET6) ? 223 nh->gw6_sa.sin6_addr.s6_addr32[3] : 224 nh->gw4_sa.sin_addr.s_addr 225 }; 226 227 return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key))); 228 } 229 230 /* 231 * Checks if hash needs resizing and performs this resize if necessary 232 * 233 */ 234 static void 235 consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items) 236 { 237 void *nh_ptr, *nh_idx_ptr; 238 void *old_idx_ptr; 239 size_t alloc_size; 240 241 nh_ptr = NULL; 242 if (new_nh_buckets != 0) { 243 alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets); 244 nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); 245 } 246 247 nh_idx_ptr = NULL; 248 if (new_idx_items != 0) { 249 alloc_size = bitmask_get_size(new_idx_items); 250 nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO); 251 } 252 253 if (nh_ptr == NULL && nh_idx_ptr == NULL) { 254 /* Either resize is not required or allocations have failed. */ 255 return; 256 } 257 258 FIB_CTL_LOG(LOG_DEBUG, ctl, 259 "going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]", 260 nh_ptr, new_nh_buckets, nh_idx_ptr, new_idx_items); 261 262 old_idx_ptr = NULL; 263 264 NHOPS_WLOCK(ctl); 265 if (nh_ptr != NULL) { 266 CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets); 267 } 268 if (nh_idx_ptr != NULL) { 269 if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items) == 0) 270 bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr); 271 } 272 NHOPS_WUNLOCK(ctl); 273 274 if (nh_ptr != NULL) 275 free(nh_ptr, M_NHOP); 276 if (old_idx_ptr != NULL) 277 free(old_idx_ptr, M_NHOP); 278 } 279 280 /* 281 * Links nextop @nh_priv to the nexhop hash table and allocates 282 * nexhop index. 283 * Returns allocated index or 0 on failure. 284 */ 285 int 286 link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv) 287 { 288 uint16_t idx; 289 uint32_t num_buckets_new, num_items_new; 290 291 KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated")); 292 NHOPS_WLOCK(ctl); 293 294 /* 295 * Check if we need to resize hash and index. 296 * The following 2 functions returns either new size or 0 297 * if resize is not required. 298 */ 299 num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head); 300 num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head); 301 302 if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) { 303 NHOPS_WUNLOCK(ctl); 304 FIB_CTL_LOG(LOG_INFO, ctl, "Unable to allocate nhop index"); 305 RTSTAT_INC(rts_nh_idx_alloc_failure); 306 consider_resize(ctl, num_buckets_new, num_items_new); 307 return (0); 308 } 309 310 nh_priv->nh_idx = idx; 311 nh_priv->nh_control = ctl; 312 nh_priv->nh_finalized = 1; 313 314 CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv); 315 316 NHOPS_WUNLOCK(ctl); 317 318 FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, 319 "Linked nhop priv %p to %d, hash %u, ctl %p", 320 nh_priv, idx, hash_priv(nh_priv), ctl); 321 consider_resize(ctl, num_buckets_new, num_items_new); 322 323 return (idx); 324 } 325 326 /* 327 * Unlinks nexthop specified by @nh_priv data from the hash. 328 * 329 * Returns found nexthop or NULL. 330 */ 331 struct nhop_priv * 332 unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del) 333 { 334 struct nhop_priv *priv_ret; 335 int idx; 336 uint32_t num_buckets_new, num_items_new; 337 338 idx = 0; 339 340 NHOPS_WLOCK(ctl); 341 CHT_SLIST_REMOVE(&ctl->nh_head, nhops, nh_priv_del, priv_ret); 342 343 if (priv_ret != NULL) { 344 idx = priv_ret->nh_idx; 345 priv_ret->nh_idx = 0; 346 347 KASSERT((idx != 0), ("bogus nhop index 0")); 348 if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) { 349 FIB_CTL_LOG(LOG_DEBUG, ctl, 350 "Unable to remove index %d from fib %u af %d", 351 idx, ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family); 352 } 353 } 354 355 /* Check if hash or index needs to be resized */ 356 num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head); 357 num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head); 358 359 NHOPS_WUNLOCK(ctl); 360 361 if (priv_ret == NULL) { 362 FIB_CTL_LOG(LOG_INFO, ctl, 363 "Unable to unlink nhop priv %p from hash, hash %u ctl %p", 364 nh_priv_del, hash_priv(nh_priv_del), ctl); 365 } else { 366 FIB_CTL_LOG(LOG_DEBUG2, ctl, "Unlinked nhop %p priv idx %d", 367 priv_ret, idx); 368 } 369 370 consider_resize(ctl, num_buckets_new, num_items_new); 371 372 return (priv_ret); 373 } 374 375 /* 376 * Searches for the nexthop by data specifcied in @nh_priv. 377 * Returns referenced nexthop or NULL. 378 */ 379 struct nhop_priv * 380 find_nhop(struct nh_control *ctl, const struct nhop_priv *nh_priv) 381 { 382 struct nhop_priv *nh_priv_ret; 383 384 NHOPS_RLOCK(ctl); 385 CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret); 386 if (nh_priv_ret != NULL) { 387 if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){ 388 /* refcount was 0 -> nhop is being deleted */ 389 nh_priv_ret = NULL; 390 } 391 } 392 NHOPS_RUNLOCK(ctl); 393 394 return (nh_priv_ret); 395 } 396