xref: /freebsd/sys/net/route/nhgrp.c (revision a765ac11c50bb20a64905e365b05b010533f26d3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD$
28  */
29 
30 #include "opt_inet.h"
31 #include "opt_route.h"
32 
33 #include <sys/cdefs.h>
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/lock.h>
37 #include <sys/rmlock.h>
38 #include <sys/rwlock.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/refcount.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 #include <sys/kernel.h>
45 
46 #include <net/if.h>
47 #include <net/if_var.h>
48 #include <net/if_dl.h>
49 #include <net/route.h>
50 #include <net/route/route_ctl.h>
51 #include <net/route/route_var.h>
52 #include <net/vnet.h>
53 
54 #include <netinet/in.h>
55 #include <netinet/in_var.h>
56 #include <netinet/in_fib.h>
57 
58 #include <net/route/nhop_utils.h>
59 #include <net/route/nhop.h>
60 #include <net/route/nhop_var.h>
61 #include <net/route/nhgrp_var.h>
62 
63 #define	DEBUG_MOD_NAME	nhgrp
64 #define	DEBUG_MAX_LEVEL	LOG_DEBUG
65 #include <net/route/route_debug.h>
66 _DECLARE_DEBUG(LOG_INFO);
67 
68 /*
69  * This file contains data structures management logic for the nexthop
70  * groups ("nhgrp") route subsystem.
71  *
72  * Nexthop groups are used to store multiple routes available for the specific
73  *  prefix. Nexthop groups are immutable and can be shared across multiple
74  *  prefixes.
75  *
76  * Each group consists of a control plane part and a dataplane part.
77  * Control plane is basically a collection of nexthop objects with
78  *  weights and refcount.
79  *
80  * Datapath consists of a array of nexthop pointers, compiled from control
81  *  plane data to support O(1) nexthop selection.
82  *
83  * For example, consider the following group:
84  *  [(nh1, weight=100), (nh2, weight=200)]
85  * It will compile to the following array:
86  *  [nh1, nh2, nh2]
87  *
88  */
89 
90 static void consider_resize(struct nh_control *ctl, uint32_t new_gr_buckets,
91     uint32_t new_idx_items);
92 
93 static int cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b);
94 static unsigned int hash_nhgrp(const struct nhgrp_priv *obj);
95 
96 static unsigned
97 djb_hash(const unsigned char *h, const int len)
98 {
99 	unsigned int result = 0;
100 	int i;
101 
102 	for (i = 0; i < len; i++)
103 		result = 33 * result ^ h[i];
104 
105 	return (result);
106 }
107 
108 static int
109 cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b)
110 {
111 
112 	/*
113 	 * In case of consistent hashing, there can be multiple nexthop groups
114 	 * with the same "control plane" list of nexthops with weights and a
115 	 * different set of "data plane" nexthops.
116 	 * For now, ignore the data plane and focus on the control plane list.
117 	 */
118 	if (a->nhg_nh_count != b->nhg_nh_count)
119 		return (0);
120 	return !memcmp(a->nhg_nh_weights, b->nhg_nh_weights,
121 	    sizeof(struct weightened_nhop) * a->nhg_nh_count);
122 }
123 
124 /*
125  * Hash callback: calculate hash of an object
126  */
127 static unsigned int
128 hash_nhgrp(const struct nhgrp_priv *obj)
129 {
130 	const unsigned char *key;
131 
132 	key = (const unsigned char *)obj->nhg_nh_weights;
133 
134 	return (djb_hash(key, sizeof(struct weightened_nhop) * obj->nhg_nh_count));
135 }
136 
137 /*
138  * Returns object referenced and unlocked
139  */
140 struct nhgrp_priv *
141 find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key)
142 {
143 	struct nhgrp_priv *priv_ret;
144 
145 	NHOPS_RLOCK(ctl);
146 	CHT_SLIST_FIND_BYOBJ(&ctl->gr_head, mpath, key, priv_ret);
147 	if (priv_ret != NULL) {
148 		if (refcount_acquire_if_not_zero(&priv_ret->nhg_refcount) == 0) {
149 			/* refcount is 0 -> group is being deleted */
150 			priv_ret = NULL;
151 		}
152 	}
153 	NHOPS_RUNLOCK(ctl);
154 
155 	return (priv_ret);
156 }
157 
158 int
159 link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv)
160 {
161 	uint16_t idx;
162 	uint32_t new_num_buckets, new_num_items;
163 
164 	NHOPS_WLOCK(ctl);
165 	/* Check if we need to resize hash and index */
166 	new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->gr_head);
167 	new_num_items = bitmask_get_resize_items(&ctl->nh_idx_head);
168 
169 	if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
170 		NHOPS_WUNLOCK(ctl);
171 		FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "Unable to allocate nhg index");
172 		consider_resize(ctl, new_num_buckets, new_num_items);
173 		return (0);
174 	}
175 
176 	grp_priv->nhg_idx = idx;
177 	grp_priv->nh_control = ctl;
178 	CHT_SLIST_INSERT_HEAD(&ctl->gr_head, mpath, grp_priv);
179 
180 	NHOPS_WUNLOCK(ctl);
181 
182 	consider_resize(ctl, new_num_buckets, new_num_items);
183 
184 	return (1);
185 }
186 
187 struct nhgrp_priv *
188 unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key)
189 {
190 	struct nhgrp_priv *nhg_priv_ret;
191 	int idx;
192 
193 	NHOPS_WLOCK(ctl);
194 
195 	CHT_SLIST_REMOVE(&ctl->gr_head, mpath, key, nhg_priv_ret);
196 
197 	if (nhg_priv_ret == NULL) {
198 		FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "Unable to find nhg");
199 		NHOPS_WUNLOCK(ctl);
200 		return (NULL);
201 	}
202 
203 	idx = nhg_priv_ret->nhg_idx;
204 	bitmask_free_idx(&ctl->nh_idx_head, idx);
205 	nhg_priv_ret->nhg_idx = 0;
206 	nhg_priv_ret->nh_control = NULL;
207 
208 	NHOPS_WUNLOCK(ctl);
209 
210 	return (nhg_priv_ret);
211 }
212 
213 /*
214  * Checks if hash needs resizing and performs this resize if necessary
215  *
216  */
217 static void
218 consider_resize(struct nh_control *ctl, uint32_t new_gr_bucket, uint32_t new_idx_items)
219 {
220 	void *gr_ptr, *gr_idx_ptr;
221 	void *old_idx_ptr;
222 	size_t alloc_size;
223 
224 	gr_ptr = NULL ;
225 	if (new_gr_bucket != 0) {
226 		alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_gr_bucket);
227 		gr_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
228 	}
229 
230 	gr_idx_ptr = NULL;
231 	if (new_idx_items != 0) {
232 		alloc_size = bitmask_get_size(new_idx_items);
233 		gr_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
234 	}
235 
236 	if (gr_ptr == NULL && gr_idx_ptr == NULL) {
237 		/* Either resize is not required or allocations have failed. */
238 		return;
239 	}
240 
241 	FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh,
242 	    "going to resize nhg hash: [ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
243 	    gr_ptr, new_gr_bucket, gr_idx_ptr, new_idx_items);
244 
245 	old_idx_ptr = NULL;
246 
247 	NHOPS_WLOCK(ctl);
248 	if (gr_ptr != NULL) {
249 		CHT_SLIST_RESIZE(&ctl->gr_head, mpath, gr_ptr, new_gr_bucket);
250 	}
251 	if (gr_idx_ptr != NULL) {
252 		if (bitmask_copy(&ctl->nh_idx_head, gr_idx_ptr, new_idx_items) == 0)
253 			bitmask_swap(&ctl->nh_idx_head, gr_idx_ptr, new_idx_items, &old_idx_ptr);
254 	}
255 	NHOPS_WUNLOCK(ctl);
256 
257 	if (gr_ptr != NULL)
258 		free(gr_ptr, M_NHOP);
259 	if (old_idx_ptr != NULL)
260 		free(old_idx_ptr, M_NHOP);
261 }
262 
263 /*
264  * Function allocating the necessary group data structures.
265  */
266 bool
267 nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags)
268 {
269 	size_t alloc_size;
270 	uint32_t num_buckets;
271 	void *cht_ptr;
272 
273 	malloc_flags = (malloc_flags & (M_NOWAIT | M_WAITOK)) | M_ZERO;
274 
275 	num_buckets = 8;
276 	alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
277 	cht_ptr = malloc(alloc_size, M_NHOP, malloc_flags);
278 
279 	if (cht_ptr == NULL) {
280 		FIB_RH_LOG(LOG_WARNING, ctl->ctl_rh, "multipath init failed");
281 		return (false);
282 	}
283 
284 	NHOPS_WLOCK(ctl);
285 
286 	if (ctl->gr_head.hash_size == 0) {
287 		/* Init hash and bitmask */
288 		CHT_SLIST_INIT(&ctl->gr_head, cht_ptr, num_buckets);
289 		NHOPS_WUNLOCK(ctl);
290 	} else {
291 		/* Other thread has already initiliazed hash/bitmask */
292 		NHOPS_WUNLOCK(ctl);
293 		free(cht_ptr, M_NHOP);
294 	}
295 
296 	FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "multipath init done");
297 
298 	return (true);
299 }
300 
301 int
302 nhgrp_ctl_init(struct nh_control *ctl)
303 {
304 
305 	/*
306 	 * By default, do not allocate datastructures as multipath
307 	 * routes will not be necessarily used.
308 	 */
309 	CHT_SLIST_INIT(&ctl->gr_head, NULL, 0);
310 	return (0);
311 }
312 
313 void
314 nhgrp_ctl_free(struct nh_control *ctl)
315 {
316 	if (ctl->gr_head.ptr != NULL)
317 		free(ctl->gr_head.ptr, M_NHOP);
318 }
319 
320 void
321 nhgrp_ctl_unlink_all(struct nh_control *ctl)
322 {
323 	struct nhgrp_priv *nhg_priv;
324 
325 	NHOPS_WLOCK_ASSERT(ctl);
326 
327 	CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
328 #if DEBUG_MAX_LEVEL >= LOG_DEBUG
329 		char nhgbuf[NHOP_PRINT_BUFSIZE];
330 		FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "marking %s unlinked",
331 		    nhgrp_print_buf(nhg_priv->nhg, nhgbuf, sizeof(nhgbuf)));
332 #endif
333 		refcount_release(&nhg_priv->nhg_linked);
334 	} CHT_SLIST_FOREACH_END;
335 }
336 
337