xref: /freebsd/sys/net/route/nhgrp.c (revision a4bcd20486f8c20cc875b39bc75aa0d5a047373f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 #include "opt_inet.h"
31 #include "opt_route.h"
32 
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/lock.h>
37 #include <sys/rmlock.h>
38 #include <sys/rwlock.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/refcount.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 #include <sys/kernel.h>
45 
46 #include <net/if.h>
47 #include <net/if_var.h>
48 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/vnet.h>
53 
54 #include <netinet/in.h>
55 #include <netinet/in_var.h>
56 #include <netinet/in_fib.h>
57 
58 #include <net/route/nhop_utils.h>
59 #include <net/route/nhop.h>
60 #include <net/route/nhop_var.h>
61 #include <net/route/nhgrp_var.h>
62 
63 /*
64  * This file contains data structures management logic for the nexthop
65  * groups ("nhgrp") route subsystem.
66  *
67  * Nexthop groups are used to store multiple routes available for the specific
68  *  prefix. Nexthop groups are immutable and can be shared across multiple
69  *  prefixes.
70  *
71  * Each group consists of a control plane part and a dataplane part.
72  * Control plane is basically a collection of nexthop objects with
73  *  weights and refcount.
74  *
75  * Datapath consists of a array of nexthop pointers, compiled from control
76  *  plane data to support O(1) nexthop selection.
77  *
78  * For example, consider the following group:
79  *  [(nh1, weight=100), (nh2, weight=200)]
80  * It will compile to the following array:
81  *  [nh1, nh2, nh2]
82  *
83  */
84 
85 static void consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets,
86     uint32_t new_idx_items);
87 
88 static int cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b);
89 static unsigned int hash_nhgrp(const struct nhgrp_priv *obj);
90 
91 static unsigned
92 djb_hash(const unsigned char *h, const int len)
93 {
94 	unsigned int result = 0;
95 	int i;
96 
97 	for (i = 0; i < len; i++)
98 		result = 33 * result ^ h[i];
99 
100 	return (result);
101 }
102 
103 static int
104 cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b)
105 {
106 
107 	/*
108 	 * In case of consistent hashing, there can be multiple nexthop groups
109 	 * with the same "control plane" list of nexthops with weights and a
110 	 * different set of "data plane" nexthops.
111 	 * For now, ignore the data plane and focus on the control plane list.
112 	 */
113 	if (a->nhg_nh_count != b->nhg_nh_count)
114 		return (0);
115 	return !memcmp(a->nhg_nh_weights, b->nhg_nh_weights,
116 	    sizeof(struct weightened_nhop) * a->nhg_nh_count);
117 }
118 
119 /*
120  * Hash callback: calculate hash of an object
121  */
122 static unsigned int
123 hash_nhgrp(const struct nhgrp_priv *obj)
124 {
125 	const unsigned char *key;
126 
127 	key = (const unsigned char *)obj->nhg_nh_weights;
128 
129 	return (djb_hash(key, sizeof(struct weightened_nhop) * obj->nhg_nh_count));
130 }
131 
132 /*
133  * Returns object referenced and unlocked
134  */
135 struct nhgrp_priv *
136 find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key)
137 {
138 	struct nhgrp_priv *priv_ret;
139 
140 	NHOPS_RLOCK(ctl);
141 	CHT_SLIST_FIND_BYOBJ(&ctl->gr_head, mpath, key, priv_ret);
142 	if (priv_ret != NULL) {
143 		if (refcount_acquire_if_not_zero(&priv_ret->nhg_refcount) == 0) {
144 			/* refcount is 0 -> group is being deleted */
145 			priv_ret = NULL;
146 		}
147 	}
148 	NHOPS_RUNLOCK(ctl);
149 
150 	return (priv_ret);
151 }
152 
153 int
154 link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv)
155 {
156 	uint16_t idx;
157 	uint32_t new_num_buckets, new_num_items;
158 
159 	NHOPS_WLOCK(ctl);
160 	/* Check if we need to resize hash and index */
161 	new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->gr_head);
162 	new_num_items = bitmask_get_resize_items(&ctl->gr_idx_head);
163 
164 	if (bitmask_alloc_idx(&ctl->gr_idx_head, &idx) != 0) {
165 		NHOPS_WUNLOCK(ctl);
166 		DPRINTF("Unable to allocate mpath index");
167 		consider_resize(ctl, new_num_buckets, new_num_items);
168 		return (0);
169 	}
170 
171 	grp_priv->nhg_idx = idx;
172 	grp_priv->nh_control = ctl;
173 	CHT_SLIST_INSERT_HEAD(&ctl->gr_head, mpath, grp_priv);
174 
175 	NHOPS_WUNLOCK(ctl);
176 
177 	consider_resize(ctl, new_num_buckets, new_num_items);
178 
179 	return (1);
180 }
181 
182 struct nhgrp_priv *
183 unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key)
184 {
185 	struct nhgrp_priv *nhg_priv_ret;
186 	int ret, idx;
187 
188 	NHOPS_WLOCK(ctl);
189 
190 	CHT_SLIST_REMOVE_BYOBJ(&ctl->gr_head, mpath, key, nhg_priv_ret);
191 
192 	if (nhg_priv_ret == NULL) {
193 		DPRINTF("Unable to find nhop group!");
194 		NHOPS_WUNLOCK(ctl);
195 		return (NULL);
196 	}
197 
198 	idx = nhg_priv_ret->nhg_idx;
199 	ret = bitmask_free_idx(&ctl->gr_idx_head, idx);
200 	nhg_priv_ret->nhg_idx = 0;
201 	nhg_priv_ret->nh_control = NULL;
202 
203 	NHOPS_WUNLOCK(ctl);
204 
205 	return (nhg_priv_ret);
206 }
207 
208 /*
209  * Checks if hash needs resizing and performs this resize if necessary
210  *
211  */
212 __noinline static void
213 consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
214 {
215 	void *nh_ptr, *nh_idx_ptr;
216 	void *old_idx_ptr;
217 	size_t alloc_size;
218 
219 	nh_ptr = NULL ;
220 	if (new_nh_buckets != 0) {
221 		alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
222 		nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
223 	}
224 
225 	nh_idx_ptr = NULL;
226 	if (new_idx_items != 0) {
227 		alloc_size = bitmask_get_size(new_idx_items);
228 		nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
229 	}
230 
231 	if (nh_ptr == NULL && nh_idx_ptr == NULL) {
232 		/* Either resize is not required or allocations have failed. */
233 		return;
234 	}
235 
236 	DPRINTF("mp: going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
237 	    nh_ptr, new_nh_buckets, nh_idx_ptr, new_idx_items);
238 
239 	old_idx_ptr = NULL;
240 
241 	NHOPS_WLOCK(ctl);
242 	if (nh_ptr != NULL) {
243 		CHT_SLIST_RESIZE(&ctl->gr_head, mpath, nh_ptr, new_nh_buckets);
244 	}
245 	if (nh_idx_ptr != NULL) {
246 		if (bitmask_copy(&ctl->gr_idx_head, nh_idx_ptr, new_idx_items))
247 			bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
248 	}
249 	NHOPS_WUNLOCK(ctl);
250 
251 	if (nh_ptr != NULL)
252 		free(nh_ptr, M_NHOP);
253 	if (old_idx_ptr != NULL)
254 		free(old_idx_ptr, M_NHOP);
255 }
256 
257 /*
258  * Function allocating the necessary group data structures.
259  */
260 bool
261 nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags)
262 {
263 	size_t alloc_size;
264 	uint32_t num_buckets, num_items;
265 	void *cht_ptr, *mask_ptr;
266 
267 	malloc_flags = (malloc_flags & (M_NOWAIT | M_WAITOK)) | M_ZERO;
268 
269 	num_buckets = 8;
270 	alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
271 	cht_ptr = malloc(alloc_size, M_NHOP, malloc_flags);
272 
273 	if (cht_ptr == NULL) {
274 		DPRINTF("mpath init failed");
275 		return (false);
276 	}
277 
278 	/*
279 	 * Allocate nexthop index bitmask.
280 	 */
281 	num_items = 128;
282 	mask_ptr = malloc(bitmask_get_size(num_items), M_NHOP, malloc_flags);
283 	if (mask_ptr == NULL) {
284 		DPRINTF("mpath bitmask init failed");
285 		free(cht_ptr, M_NHOP);
286 		return (false);
287 	}
288 
289 	NHOPS_WLOCK(ctl);
290 
291 	if (ctl->gr_head.hash_size == 0) {
292 		/* Init hash and bitmask */
293 		CHT_SLIST_INIT(&ctl->gr_head, cht_ptr, num_buckets);
294 		bitmask_init(&ctl->gr_idx_head, mask_ptr, num_items);
295 		NHOPS_WUNLOCK(ctl);
296 	} else {
297 		/* Other thread has already initiliazed hash/bitmask */
298 		NHOPS_WUNLOCK(ctl);
299 		free(cht_ptr, M_NHOP);
300 		free(mask_ptr, M_NHOP);
301 	}
302 
303 	DPRINTF("mpath init done for fib/af %d/%d", ctl->rh->rib_fibnum,
304 	    ctl->rh->rib_family);
305 
306 	return (true);
307 }
308 
309 int
310 nhgrp_ctl_init(struct nh_control *ctl)
311 {
312 
313 	/*
314 	 * By default, do not allocate datastructures as multipath
315 	 * routes will not be necessarily used.
316 	 */
317 	CHT_SLIST_INIT(&ctl->gr_head, NULL, 0);
318 	bitmask_init(&ctl->gr_idx_head, NULL, 0);
319 	return (0);
320 }
321 
322 void
323 nhgrp_ctl_free(struct nh_control *ctl)
324 {
325 
326 	if (ctl->gr_head.ptr != NULL)
327 		free(ctl->gr_head.ptr, M_NHOP);
328 	if (ctl->gr_idx_head.idx != NULL)
329 		free(ctl->gr_idx_head.idx, M_NHOP);
330 }
331 
332 void
333 nhgrp_ctl_unlink_all(struct nh_control *ctl)
334 {
335 	struct nhgrp_priv *nhg_priv;
336 
337 	NHOPS_WLOCK_ASSERT(ctl);
338 
339 	CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
340 		DPRINTF("Marking nhgrp %u unlinked", nhg_priv->nhg_idx);
341 		refcount_release(&nhg_priv->nhg_linked);
342 	} CHT_SLIST_FOREACH_END;
343 }
344 
345