xref: /freebsd/sys/net/route/nhop.c (revision 3a3af6b2a160bea72509a9d5ef84e25906b0478a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_route.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/lock.h>
36 #include <sys/rwlock.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/socket.h>
40 #include <sys/kernel.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/route.h>
45 #include <net/route/route_var.h>
46 #include <net/route/nhop_utils.h>
47 #include <net/route/nhop.h>
48 #include <net/route/nhop_var.h>
49 #include <net/vnet.h>
50 
51 #define	DEBUG_MOD_NAME	nhop
52 #define	DEBUG_MAX_LEVEL	LOG_DEBUG
53 #include <net/route/route_debug.h>
54 _DECLARE_DEBUG(LOG_INFO);
55 
56 /*
57  * This file contains data structures management logic for the nexthop ("nhop")
58  *   route subsystem.
59  *
60  * Nexthops in the original sense are the objects containing all the necessary
61  * information to forward the packet to the selected destination.
62  * In particular, nexthop is defined by a combination of
63  *  ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
64  *    NHF_DEFAULT
65  *
66  * All nexthops are stored in the resizable hash table.
67  * Additionally, each nexthop gets assigned its unique index (nexthop index)
68  * so userland programs can interact with the nexthops easier. Index allocation
69  * is backed by the bitmask array.
70  */
71 
72 MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data");
73 
74 /* Hash management functions */
75 
76 int
77 nhops_init_rib(struct rib_head *rh)
78 {
79 	struct nh_control *ctl;
80 	size_t alloc_size;
81 	uint32_t num_buckets, num_items;
82 	void *ptr;
83 
84 	ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO);
85 
86 	/*
87 	 * Allocate nexthop hash. Start with 16 items by default (128 bytes).
88 	 * This will be enough for most of the cases.
89 	 */
90 	num_buckets = 16;
91 	alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
92 	ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO);
93 	CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets);
94 
95 	/*
96 	 * Allocate nexthop index bitmask.
97 	 */
98 	num_items = 128 * 8; /* 128 bytes */
99 	ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO);
100 	bitmask_init(&ctl->nh_idx_head, ptr, num_items);
101 
102 	NHOPS_LOCK_INIT(ctl);
103 
104 	rh->nh_control = ctl;
105 	ctl->ctl_rh = rh;
106 
107 	FIB_CTL_LOG(LOG_DEBUG2, ctl, "nhops init: ctl %p rh %p", ctl, rh);
108 
109 	return (0);
110 }
111 
112 static void
113 destroy_ctl(struct nh_control *ctl)
114 {
115 
116 	NHOPS_LOCK_DESTROY(ctl);
117 	free(ctl->nh_head.ptr, M_NHOP);
118 	free(ctl->nh_idx_head.idx, M_NHOP);
119 #ifdef ROUTE_MPATH
120 	nhgrp_ctl_free(ctl);
121 #endif
122 	free(ctl, M_NHOP);
123 }
124 
125 /*
126  * Epoch callback indicating ctl is safe to destroy
127  */
128 static void
129 destroy_ctl_epoch(epoch_context_t ctx)
130 {
131 	struct nh_control *ctl;
132 
133 	ctl = __containerof(ctx, struct nh_control, ctl_epoch_ctx);
134 
135 	destroy_ctl(ctl);
136 }
137 
138 void
139 nhops_destroy_rib(struct rib_head *rh)
140 {
141 	struct nh_control *ctl;
142 	struct nhop_priv *nh_priv;
143 
144 	ctl = rh->nh_control;
145 
146 	/*
147 	 * All routes should have been deleted in rt_table_destroy().
148 	 * However, TCP stack or other consumers may store referenced
149 	 *  nexthop pointers. When these references go to zero,
150 	 *  nhop_free() will try to unlink these records from the
151 	 *  datastructures, most likely leading to panic.
152 	 *
153 	 * Avoid that by explicitly marking all of the remaining
154 	 *  nexthops as unlinked by removing a reference from a special
155 	 *  counter. Please see nhop_free() comments for more
156 	 *  details.
157 	 */
158 
159 	NHOPS_WLOCK(ctl);
160 	CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
161 		FIB_RH_LOG(LOG_DEBUG3, rh, "marking nhop %u unlinked", nh_priv->nh_idx);
162 		refcount_release(&nh_priv->nh_linked);
163 	} CHT_SLIST_FOREACH_END;
164 #ifdef ROUTE_MPATH
165 	nhgrp_ctl_unlink_all(ctl);
166 #endif
167 	NHOPS_WUNLOCK(ctl);
168 
169 	/*
170 	 * Postpone destruction till the end of current epoch
171 	 * so nhop_free() can safely use nh_control pointer.
172 	 */
173 	epoch_call(net_epoch_preempt, destroy_ctl_epoch,
174 	    &ctl->ctl_epoch_ctx);
175 }
176 
177 /*
178  * Nexhop hash calculation:
179  *
180  * Nexthops distribution:
181  * 2 "mandatory" nexthops per interface ("interface route", "loopback").
182  * For direct peering: 1 nexthop for the peering router per ifp/af.
183  * For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af.
184  * IGP control plane & broadcast segment: tens of nexthops per ifp/af.
185  *
186  * Each fib/af combination has its own hash table.
187  * With that in mind, hash nexthops by the combination of the interface
188  *  and GW IP address.
189  *
190  * To optimize hash calculation, ignore lower bits of ifnet pointer,
191  * as they  give very little entropy.
192  * Similarly, use lower 4 bytes of IPv6 address to distinguish between the
193  *  neighbors.
194  */
195 struct _hash_data {
196 	uint16_t	ifentropy;
197 	uint8_t		family;
198 	uint8_t		nh_type;
199 	uint32_t	gw_addr;
200 };
201 
202 static unsigned
203 djb_hash(const unsigned char *h, const int len)
204 {
205 	unsigned int result = 0;
206 	int i;
207 
208 	for (i = 0; i < len; i++)
209 		result = 33 * result ^ h[i];
210 
211 	return (result);
212 }
213 
214 static uint32_t
215 hash_priv(const struct nhop_priv *priv)
216 {
217 	struct nhop_object *nh = priv->nh;
218 	struct _hash_data key = {
219 	    .ifentropy = (uint16_t)((((uintptr_t)nh->nh_ifp) >> 6) & 0xFFFF),
220 	    .family = nh->gw_sa.sa_family,
221 	    .nh_type = priv->nh_type & 0xFF,
222 	    .gw_addr = (nh->gw_sa.sa_family == AF_INET6) ?
223 		nh->gw6_sa.sin6_addr.s6_addr32[3] :
224 		nh->gw4_sa.sin_addr.s_addr
225 	};
226 
227 	return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key)));
228 }
229 
230 /*
231  * Checks if hash needs resizing and performs this resize if necessary
232  *
233  */
234 static void
235 consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
236 {
237 	void *nh_ptr, *nh_idx_ptr;
238 	void *old_idx_ptr;
239 	size_t alloc_size;
240 
241 	nh_ptr = NULL;
242 	if (new_nh_buckets != 0) {
243 		alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
244 		nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
245 	}
246 
247 	nh_idx_ptr = NULL;
248 	if (new_idx_items != 0) {
249 		alloc_size = bitmask_get_size(new_idx_items);
250 		nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
251 	}
252 
253 	if (nh_ptr == NULL && nh_idx_ptr == NULL) {
254 		/* Either resize is not required or allocations have failed. */
255 		return;
256 	}
257 
258 	FIB_CTL_LOG(LOG_DEBUG, ctl,
259 	    "going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
260 	    nh_ptr, new_nh_buckets, nh_idx_ptr, new_idx_items);
261 
262 	old_idx_ptr = NULL;
263 
264 	NHOPS_WLOCK(ctl);
265 	if (nh_ptr != NULL) {
266 		CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets);
267 	}
268 	if (nh_idx_ptr != NULL) {
269 		if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items) == 0)
270 			bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
271 	}
272 	NHOPS_WUNLOCK(ctl);
273 
274 	if (nh_ptr != NULL)
275 		free(nh_ptr, M_NHOP);
276 	if (old_idx_ptr != NULL)
277 		free(old_idx_ptr, M_NHOP);
278 }
279 
280 /*
281  * Links nextop @nh_priv to the nexhop hash table and allocates
282  *  nexhop index.
283  * Returns allocated index or 0 on failure.
284  */
285 int
286 link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv)
287 {
288 	uint16_t idx;
289 	uint32_t num_buckets_new, num_items_new;
290 
291 	KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated"));
292 	NHOPS_WLOCK(ctl);
293 
294 	/*
295 	 * Check if we need to resize hash and index.
296 	 * The following 2 functions returns either new size or 0
297 	 *  if resize is not required.
298 	 */
299 	num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
300 	num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
301 
302 	if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
303 		NHOPS_WUNLOCK(ctl);
304 		FIB_CTL_LOG(LOG_INFO, ctl, "Unable to allocate nhop index");
305 		RTSTAT_INC(rts_nh_idx_alloc_failure);
306 		consider_resize(ctl, num_buckets_new, num_items_new);
307 		return (0);
308 	}
309 
310 	nh_priv->nh_idx = idx;
311 	nh_priv->nh_control = ctl;
312 	nh_priv->nh_finalized = 1;
313 
314 	CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv);
315 
316 	NHOPS_WUNLOCK(ctl);
317 
318 	FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh,
319 	    "Linked nhop priv %p to %d, hash %u, ctl %p",
320 	    nh_priv, idx, hash_priv(nh_priv), ctl);
321 	consider_resize(ctl, num_buckets_new, num_items_new);
322 
323 	return (idx);
324 }
325 
326 /*
327  * Unlinks nexthop specified by @nh_priv data from the hash.
328  *
329  * Returns found nexthop or NULL.
330  */
331 struct nhop_priv *
332 unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del)
333 {
334 	struct nhop_priv *priv_ret;
335 	int idx;
336 	uint32_t num_buckets_new, num_items_new;
337 
338 	idx = 0;
339 
340 	NHOPS_WLOCK(ctl);
341 	CHT_SLIST_REMOVE(&ctl->nh_head, nhops, nh_priv_del, priv_ret);
342 
343 	if (priv_ret != NULL) {
344 		idx = priv_ret->nh_idx;
345 		priv_ret->nh_idx = 0;
346 
347 		KASSERT((idx != 0), ("bogus nhop index 0"));
348 		if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) {
349 			FIB_CTL_LOG(LOG_DEBUG, ctl,
350 			    "Unable to remove index %d from fib %u af %d",
351 			    idx, ctl->ctl_rh->rib_fibnum, ctl->ctl_rh->rib_family);
352 		}
353 	}
354 
355 	/* Check if hash or index needs to be resized */
356 	num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
357 	num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
358 
359 	NHOPS_WUNLOCK(ctl);
360 
361 	if (priv_ret == NULL) {
362 		FIB_CTL_LOG(LOG_INFO, ctl,
363 		    "Unable to unlink nhop priv %p from hash, hash %u ctl %p",
364 		    nh_priv_del, hash_priv(nh_priv_del), ctl);
365 	} else {
366 		FIB_CTL_LOG(LOG_DEBUG2, ctl, "Unlinked nhop %p priv idx %d",
367 		    priv_ret, idx);
368 	}
369 
370 	consider_resize(ctl, num_buckets_new, num_items_new);
371 
372 	return (priv_ret);
373 }
374 
375 /*
376  * Searches for the nexthop by data specifcied in @nh_priv.
377  * Returns referenced nexthop or NULL.
378  */
379 struct nhop_priv *
380 find_nhop(struct nh_control *ctl, const struct nhop_priv *nh_priv)
381 {
382 	struct nhop_priv *nh_priv_ret;
383 
384 	NHOPS_RLOCK(ctl);
385 	CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret);
386 	if (nh_priv_ret != NULL) {
387 		if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){
388 			/* refcount was 0 -> nhop is being deleted */
389 			nh_priv_ret = NULL;
390 		}
391 	}
392 	NHOPS_RUNLOCK(ctl);
393 
394 	return (nh_priv_ret);
395 }
396