xref: /freebsd/sys/net/route/nhop.c (revision a624ca3dff0c2fa00728d5f50205f341554a0a10)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_route.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/lock.h>
36 #include <sys/rwlock.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/socket.h>
40 #include <sys/kernel.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/route.h>
45 #include <net/route/route_var.h>
46 #include <net/route/nhop_utils.h>
47 #include <net/route/nhop.h>
48 #include <net/route/nhop_var.h>
49 #include <net/vnet.h>
50 
51 /*
52  * This file contains data structures management logic for the nexthop ("nhop")
53  *   route subsystem.
54  *
55  * Nexthops in the original sense are the objects containing all the necessary
56  * information to forward the packet to the selected destination.
57  * In particular, nexthop is defined by a combination of
58  *  ifp, ifa, aifp, mtu, gw addr(if set), nh_type, nh_family, mask of rt_flags and
59  *    NHF_DEFAULT
60  *
61  * All nexthops are stored in the resizable hash table.
62  * Additionally, each nexthop gets assigned its unique index (nexthop index)
63  * so userland programs can interact with the nexthops easier. Index allocation
64  * is backed by the bitmask array.
65  */
66 
67 static MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data");
68 
69 
70 /* Hash management functions */
71 
72 int
73 nhops_init_rib(struct rib_head *rh)
74 {
75 	struct nh_control *ctl;
76 	size_t alloc_size;
77 	uint32_t num_buckets, num_items;
78 	void *ptr;
79 
80 	ctl = malloc(sizeof(struct nh_control), M_NHOP, M_WAITOK | M_ZERO);
81 
82 	/*
83 	 * Allocate nexthop hash. Start with 16 items by default (128 bytes).
84 	 * This will be enough for most of the cases.
85 	 */
86 	num_buckets = 16;
87 	alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
88 	ptr = malloc(alloc_size, M_NHOP, M_WAITOK | M_ZERO);
89 	CHT_SLIST_INIT(&ctl->nh_head, ptr, num_buckets);
90 
91 	/*
92 	 * Allocate nexthop index bitmask.
93 	 */
94 	num_items = 128 * 8; /* 128 bytes */
95 	ptr = malloc(bitmask_get_size(num_items), M_NHOP, M_WAITOK | M_ZERO);
96 	bitmask_init(&ctl->nh_idx_head, ptr, num_items);
97 
98 	NHOPS_LOCK_INIT(ctl);
99 
100 	rh->nh_control = ctl;
101 	ctl->ctl_rh = rh;
102 
103 	DPRINTF("NHOPS init for fib %u af %u: ctl %p rh %p", rh->rib_fibnum,
104 	    rh->rib_family, ctl, rh);
105 
106 	return (0);
107 }
108 
109 static void
110 destroy_ctl(struct nh_control *ctl)
111 {
112 
113 	NHOPS_LOCK_DESTROY(ctl);
114 	free(ctl->nh_head.ptr, M_NHOP);
115 	free(ctl->nh_idx_head.idx, M_NHOP);
116 	free(ctl, M_NHOP);
117 }
118 
119 /*
120  * Epoch callback indicating ctl is safe to destroy
121  */
122 static void
123 destroy_ctl_epoch(epoch_context_t ctx)
124 {
125 	struct nh_control *ctl;
126 
127 	ctl = __containerof(ctx, struct nh_control, ctl_epoch_ctx);
128 
129 	destroy_ctl(ctl);
130 }
131 
132 void
133 nhops_destroy_rib(struct rib_head *rh)
134 {
135 	struct nh_control *ctl;
136 	struct nhop_priv *nh_priv;
137 
138 	ctl = rh->nh_control;
139 
140 	/*
141 	 * All routes should have been deleted in rt_table_destroy().
142 	 * However, TCP stack or other consumers may store referenced
143 	 *  nexthop pointers. When these references go to zero,
144 	 *  nhop_free() will try to unlink these records from the
145 	 *  datastructures, most likely leading to panic.
146 	 *
147 	 * Avoid that by explicitly marking all of the remaining
148 	 *  nexthops as unlinked by removing a reference from a special
149 	 *  counter. Please see nhop_free() comments for more
150 	 *  details.
151 	 */
152 
153 	NHOPS_WLOCK(ctl);
154 	CHT_SLIST_FOREACH(&ctl->nh_head, nhops, nh_priv) {
155 		DPRINTF("Marking nhop %u unlinked", nh_priv->nh_idx);
156 		refcount_release(&nh_priv->nh_linked);
157 	} CHT_SLIST_FOREACH_END;
158 	NHOPS_WUNLOCK(ctl);
159 
160 	/*
161 	 * Postpone destruction till the end of current epoch
162 	 * so nhop_free() can safely use nh_control pointer.
163 	 */
164 	epoch_call(net_epoch_preempt, destroy_ctl_epoch,
165 	    &ctl->ctl_epoch_ctx);
166 }
167 
168 /*
169  * Nexhop hash calculation:
170  *
171  * Nexthops distribution:
172  * 2 "mandatory" nexthops per interface ("interface route", "loopback").
173  * For direct peering: 1 nexthop for the peering router per ifp/af.
174  * For Ix-like peering: tens to hundreds nexthops of neghbors per ifp/af.
175  * IGP control plane & broadcast segment: tens of nexthops per ifp/af.
176  *
177  * Each fib/af combination has its own hash table.
178  * With that in mind, hash nexthops by the combination of the interface
179  *  and GW IP address.
180  *
181  * To optimize hash calculation, ignore higher bytes of ifindex, as they
182  *  give very little entropy.
183  * Similarly, use lower 4 bytes of IPv6 address to distinguish between the
184  *  neighbors.
185  */
186 struct _hash_data {
187 	uint16_t	ifindex;
188 	uint8_t		family;
189 	uint8_t		nh_type;
190 	uint32_t	gw_addr;
191 };
192 
193 static unsigned
194 djb_hash(const unsigned char *h, const int len)
195 {
196 	unsigned int result = 0;
197 	int i;
198 
199 	for (i = 0; i < len; i++)
200 		result = 33 * result ^ h[i];
201 
202 	return (result);
203 }
204 
205 static uint32_t
206 hash_priv(const struct nhop_priv *priv)
207 {
208 	struct nhop_object *nh;
209 	uint16_t ifindex;
210 	struct _hash_data key;
211 
212 	nh = priv->nh;
213 	ifindex = nh->nh_ifp->if_index & 0xFFFF;
214 	memset(&key, 0, sizeof(key));
215 
216 	key.ifindex = ifindex;
217 	key.family = nh->gw_sa.sa_family;
218 	key.nh_type = priv->nh_type & 0xFF;
219 	if (nh->gw_sa.sa_family == AF_INET6)
220 		memcpy(&key.gw_addr, &nh->gw6_sa.sin6_addr.s6_addr32[3], 4);
221 	else if (nh->gw_sa.sa_family == AF_INET)
222 		memcpy(&key.gw_addr, &nh->gw4_sa.sin_addr, 4);
223 
224 	return (uint32_t)(djb_hash((const unsigned char *)&key, sizeof(key)));
225 }
226 
227 /*
228  * Checks if hash needs resizing and performs this resize if necessary
229  *
230  */
231 static void
232 consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
233 {
234 	void *nh_ptr, *nh_idx_ptr;
235 	void *old_idx_ptr;
236 	size_t alloc_size;
237 
238 	nh_ptr = NULL;
239 	if (new_nh_buckets != 0) {
240 		alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
241 		nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
242 	}
243 
244 	nh_idx_ptr = NULL;
245 	if (new_idx_items != 0) {
246 		alloc_size = bitmask_get_size(new_idx_items);
247 		nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
248 	}
249 
250 	if (nh_ptr == NULL && nh_idx_ptr == NULL) {
251 		/* Either resize is not required or allocations have failed. */
252 		return;
253 	}
254 
255 	DPRINTF("going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]", nh_ptr,
256 	    new_nh_buckets, nh_idx_ptr, new_idx_items);
257 
258 	old_idx_ptr = NULL;
259 
260 	NHOPS_WLOCK(ctl);
261 	if (nh_ptr != NULL) {
262 		CHT_SLIST_RESIZE(&ctl->nh_head, nhops, nh_ptr, new_nh_buckets);
263 	}
264 	if (nh_idx_ptr != NULL) {
265 		if (bitmask_copy(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items) == 0)
266 			bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
267 	}
268 	NHOPS_WUNLOCK(ctl);
269 
270 	if (nh_ptr != NULL)
271 		free(nh_ptr, M_NHOP);
272 	if (old_idx_ptr != NULL)
273 		free(old_idx_ptr, M_NHOP);
274 }
275 
276 /*
277  * Links nextop @nh_priv to the nexhop hash table and allocates
278  *  nexhop index.
279  * Returns allocated index or 0 on failure.
280  */
281 int
282 link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv)
283 {
284 	uint16_t idx;
285 	uint32_t num_buckets_new, num_items_new;
286 
287 	KASSERT((nh_priv->nh_idx == 0), ("nhop index is already allocated"));
288 	NHOPS_WLOCK(ctl);
289 
290 	/*
291 	 * Check if we need to resize hash and index.
292 	 * The following 2 functions returns either new size or 0
293 	 *  if resize is not required.
294 	 */
295 	num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
296 	num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
297 
298 	if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
299 		NHOPS_WUNLOCK(ctl);
300 		DPRINTF("Unable to allocate nhop index");
301 		RTSTAT_INC(rts_nh_idx_alloc_failure);
302 		consider_resize(ctl, num_buckets_new, num_items_new);
303 		return (0);
304 	}
305 
306 	nh_priv->nh_idx = idx;
307 	nh_priv->nh_control = ctl;
308 
309 	CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv);
310 
311 	NHOPS_WUNLOCK(ctl);
312 
313 	DPRINTF("Linked nhop priv %p to %d, hash %u, ctl %p", nh_priv, idx,
314 	    hash_priv(nh_priv), ctl);
315 	consider_resize(ctl, num_buckets_new, num_items_new);
316 
317 	return (idx);
318 }
319 
320 /*
321  * Unlinks nexthop specified by @nh_priv data from the hash.
322  *
323  * Returns found nexthop or NULL.
324  */
325 struct nhop_priv *
326 unlink_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv_del)
327 {
328 	struct nhop_priv *priv_ret;
329 	int idx;
330 	uint32_t num_buckets_new, num_items_new;
331 
332 	idx = 0;
333 
334 	NHOPS_WLOCK(ctl);
335 	CHT_SLIST_REMOVE_BYOBJ(&ctl->nh_head, nhops, nh_priv_del, priv_ret);
336 
337 	if (priv_ret != NULL) {
338 		idx = priv_ret->nh_idx;
339 		priv_ret->nh_idx = 0;
340 
341 		KASSERT((idx != 0), ("bogus nhop index 0"));
342 		if ((bitmask_free_idx(&ctl->nh_idx_head, idx)) != 0) {
343 			DPRINTF("Unable to remove index %d from fib %u af %d",
344 			    idx, ctl->ctl_rh->rib_fibnum,
345 			    ctl->ctl_rh->rib_family);
346 		}
347 	}
348 
349 	/* Check if hash or index needs to be resized */
350 	num_buckets_new = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->nh_head);
351 	num_items_new = bitmask_get_resize_items(&ctl->nh_idx_head);
352 
353 	NHOPS_WUNLOCK(ctl);
354 
355 	if (priv_ret == NULL)
356 		DPRINTF("Unable to unlink nhop priv %p from hash, hash %u ctl %p",
357 		    nh_priv_del, hash_priv(nh_priv_del), ctl);
358 	else
359 		DPRINTF("Unlinked nhop %p priv idx %d", priv_ret, idx);
360 
361 	consider_resize(ctl, num_buckets_new, num_items_new);
362 
363 	return (priv_ret);
364 }
365 
366 /*
367  * Searches for the nexthop by data specifcied in @nh_priv.
368  * Returns referenced nexthop or NULL.
369  */
370 struct nhop_priv *
371 find_nhop(struct nh_control *ctl, const struct nhop_priv *nh_priv)
372 {
373 	struct nhop_priv *nh_priv_ret;
374 
375 	NHOPS_RLOCK(ctl);
376 	CHT_SLIST_FIND_BYOBJ(&ctl->nh_head, nhops, nh_priv, nh_priv_ret);
377 	if (nh_priv_ret != NULL) {
378 		if (refcount_acquire_if_not_zero(&nh_priv_ret->nh_refcnt) == 0){
379 			/* refcount was 0 -> nhop is being deleted */
380 			nh_priv_ret = NULL;
381 		}
382 	}
383 	NHOPS_RUNLOCK(ctl);
384 
385 	return (nh_priv_ret);
386 }
387 
388