xref: /freebsd/sys/net/route/nhgrp.c (revision 254b23eb1f540844cf2a90f2781ae4231c5701ce)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2020 Alexander V. Chernikov
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include "opt_inet.h"
29 
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/lock.h>
33 #include <sys/rmlock.h>
34 #include <sys/rwlock.h>
35 #include <sys/malloc.h>
36 #include <sys/mbuf.h>
37 #include <sys/refcount.h>
38 #include <sys/socket.h>
39 #include <sys/sysctl.h>
40 #include <sys/kernel.h>
41 
42 #include <net/if.h>
43 #include <net/if_var.h>
44 #include <net/if_dl.h>
45 #include <net/route.h>
46 #include <net/route/route_ctl.h>
47 #include <net/route/route_var.h>
48 #include <net/vnet.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_var.h>
52 #include <netinet/in_fib.h>
53 
54 #include <net/route/nhop_utils.h>
55 #include <net/route/nhop.h>
56 #include <net/route/nhop_var.h>
57 #include <net/route/nhgrp_var.h>
58 
59 #define	DEBUG_MOD_NAME	nhgrp
60 #define	DEBUG_MAX_LEVEL	LOG_DEBUG
61 #include <net/route/route_debug.h>
62 _DECLARE_DEBUG(LOG_INFO);
63 
64 /*
65  * This file contains data structures management logic for the nexthop
66  * groups ("nhgrp") route subsystem.
67  *
68  * Nexthop groups are used to store multiple routes available for the specific
69  *  prefix. Nexthop groups are immutable and can be shared across multiple
70  *  prefixes.
71  *
72  * Each group consists of a control plane part and a dataplane part.
73  * Control plane is basically a collection of nexthop objects with
74  *  weights and refcount.
75  *
76  * Datapath consists of a array of nexthop pointers, compiled from control
77  *  plane data to support O(1) nexthop selection.
78  *
79  * For example, consider the following group:
80  *  [(nh1, weight=100), (nh2, weight=200)]
81  * It will compile to the following array:
82  *  [nh1, nh2, nh2]
83  *
84  */
85 
86 static void consider_resize(struct nh_control *ctl, uint32_t new_gr_buckets,
87     uint32_t new_idx_items);
88 
89 static int cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b);
90 static unsigned int hash_nhgrp(const struct nhgrp_priv *obj);
91 
92 static unsigned
djb_hash(const unsigned char * h,const int len)93 djb_hash(const unsigned char *h, const int len)
94 {
95 	unsigned int result = 0;
96 	int i;
97 
98 	for (i = 0; i < len; i++)
99 		result = 33 * result ^ h[i];
100 
101 	return (result);
102 }
103 
104 static int
cmp_nhgrp(const struct nhgrp_priv * a,const struct nhgrp_priv * b)105 cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b)
106 {
107 
108 	/*
109 	 * In case of consistent hashing, there can be multiple nexthop groups
110 	 * with the same "control plane" list of nexthops with weights and a
111 	 * different set of "data plane" nexthops.
112 	 * For now, ignore the data plane and focus on the control plane list.
113 	 */
114 	if (a->nhg_nh_count != b->nhg_nh_count || a->nhg_uidx != b->nhg_uidx)
115 		return (0);
116 	return !memcmp(a->nhg_nh_weights, b->nhg_nh_weights,
117 	    sizeof(struct weightened_nhop) * a->nhg_nh_count);
118 }
119 
120 /*
121  * Hash callback: calculate hash of an object
122  */
123 static unsigned int
hash_nhgrp(const struct nhgrp_priv * obj)124 hash_nhgrp(const struct nhgrp_priv *obj)
125 {
126 	const unsigned char *key;
127 
128 	key = (const unsigned char *)obj->nhg_nh_weights;
129 
130 	return (djb_hash(key, sizeof(struct weightened_nhop) * obj->nhg_nh_count));
131 }
132 
133 /*
134  * Returns object referenced and unlocked
135  */
136 struct nhgrp_priv *
find_nhgrp(struct nh_control * ctl,const struct nhgrp_priv * key)137 find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key)
138 {
139 	struct nhgrp_priv *priv_ret;
140 
141 	NHOPS_RLOCK(ctl);
142 	CHT_SLIST_FIND_BYOBJ(&ctl->gr_head, mpath, key, priv_ret);
143 	if (priv_ret != NULL) {
144 		if (refcount_acquire_if_not_zero(&priv_ret->nhg_refcount) == 0) {
145 			/* refcount is 0 -> group is being deleted */
146 			priv_ret = NULL;
147 		}
148 	}
149 	NHOPS_RUNLOCK(ctl);
150 
151 	return (priv_ret);
152 }
153 
154 int
link_nhgrp(struct nh_control * ctl,struct nhgrp_priv * grp_priv)155 link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv)
156 {
157 	uint16_t idx;
158 	uint32_t new_num_buckets, new_num_items;
159 
160 	NHOPS_WLOCK(ctl);
161 	/* Check if we need to resize hash and index */
162 	new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->gr_head);
163 	new_num_items = bitmask_get_resize_items(&ctl->nh_idx_head);
164 
165 	if (bitmask_alloc_idx(&ctl->nh_idx_head, &idx) != 0) {
166 		NHOPS_WUNLOCK(ctl);
167 		FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "Unable to allocate nhg index");
168 		consider_resize(ctl, new_num_buckets, new_num_items);
169 		return (0);
170 	}
171 
172 	grp_priv->nhg_idx = idx;
173 	grp_priv->nh_control = ctl;
174 	CHT_SLIST_INSERT_HEAD(&ctl->gr_head, mpath, grp_priv);
175 
176 	NHOPS_WUNLOCK(ctl);
177 
178 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
179 		char nhgrp_buf[NHOP_PRINT_BUFSIZE] __unused;
180 		FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "linked %s",
181 		    nhgrp_print_buf(grp_priv->nhg, nhgrp_buf, sizeof(nhgrp_buf)));
182 	}
183 	consider_resize(ctl, new_num_buckets, new_num_items);
184 
185 	return (1);
186 }
187 
188 struct nhgrp_priv *
unlink_nhgrp(struct nh_control * ctl,struct nhgrp_priv * key)189 unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key)
190 {
191 	struct nhgrp_priv *nhg_priv_ret;
192 	int idx;
193 
194 	NHOPS_WLOCK(ctl);
195 
196 	CHT_SLIST_REMOVE(&ctl->gr_head, mpath, key, nhg_priv_ret);
197 
198 	if (nhg_priv_ret == NULL) {
199 		FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "Unable to find nhg");
200 		NHOPS_WUNLOCK(ctl);
201 		return (NULL);
202 	}
203 
204 	idx = nhg_priv_ret->nhg_idx;
205 	bitmask_free_idx(&ctl->nh_idx_head, idx);
206 	nhg_priv_ret->nhg_idx = 0;
207 	nhg_priv_ret->nh_control = NULL;
208 
209 	NHOPS_WUNLOCK(ctl);
210 
211 	IF_DEBUG_LEVEL(LOG_DEBUG2) {
212 		char nhgrp_buf[NHOP_PRINT_BUFSIZE];
213 		nhgrp_print_buf(nhg_priv_ret->nhg, nhgrp_buf, sizeof(nhgrp_buf));
214 		FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "unlinked idx#%d %s", idx,
215 		    nhgrp_buf);
216 	}
217 
218 	return (nhg_priv_ret);
219 }
220 
221 /*
222  * Checks if hash needs resizing and performs this resize if necessary
223  *
224  */
225 static void
consider_resize(struct nh_control * ctl,uint32_t new_gr_bucket,uint32_t new_idx_items)226 consider_resize(struct nh_control *ctl, uint32_t new_gr_bucket, uint32_t new_idx_items)
227 {
228 	void *gr_ptr, *gr_idx_ptr;
229 	void *old_idx_ptr;
230 	size_t alloc_size;
231 
232 	gr_ptr = NULL ;
233 	if (new_gr_bucket != 0) {
234 		alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_gr_bucket);
235 		gr_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
236 	}
237 
238 	gr_idx_ptr = NULL;
239 	if (new_idx_items != 0) {
240 		alloc_size = bitmask_get_size(new_idx_items);
241 		gr_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
242 	}
243 
244 	if (gr_ptr == NULL && gr_idx_ptr == NULL) {
245 		/* Either resize is not required or allocations have failed. */
246 		return;
247 	}
248 
249 	FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh,
250 	    "going to resize nhg hash: [ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
251 	    gr_ptr, new_gr_bucket, gr_idx_ptr, new_idx_items);
252 
253 	old_idx_ptr = NULL;
254 
255 	NHOPS_WLOCK(ctl);
256 	if (gr_ptr != NULL) {
257 		CHT_SLIST_RESIZE(&ctl->gr_head, mpath, gr_ptr, new_gr_bucket);
258 	}
259 	if (gr_idx_ptr != NULL) {
260 		if (bitmask_copy(&ctl->nh_idx_head, gr_idx_ptr, new_idx_items) == 0)
261 			bitmask_swap(&ctl->nh_idx_head, gr_idx_ptr, new_idx_items, &old_idx_ptr);
262 	}
263 	NHOPS_WUNLOCK(ctl);
264 
265 	if (gr_ptr != NULL)
266 		free(gr_ptr, M_NHOP);
267 	if (old_idx_ptr != NULL)
268 		free(old_idx_ptr, M_NHOP);
269 }
270 
271 /*
272  * Function allocating the necessary group data structures.
273  */
274 bool
nhgrp_ctl_alloc_default(struct nh_control * ctl,int malloc_flags)275 nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags)
276 {
277 	size_t alloc_size;
278 	uint32_t num_buckets;
279 	void *cht_ptr;
280 
281 	malloc_flags = (malloc_flags & (M_NOWAIT | M_WAITOK)) | M_ZERO;
282 
283 	num_buckets = 8;
284 	alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
285 	cht_ptr = malloc(alloc_size, M_NHOP, malloc_flags);
286 
287 	if (cht_ptr == NULL) {
288 		FIB_RH_LOG(LOG_WARNING, ctl->ctl_rh, "multipath init failed");
289 		return (false);
290 	}
291 
292 	NHOPS_WLOCK(ctl);
293 
294 	if (ctl->gr_head.hash_size == 0) {
295 		/* Init hash and bitmask */
296 		CHT_SLIST_INIT(&ctl->gr_head, cht_ptr, num_buckets);
297 		NHOPS_WUNLOCK(ctl);
298 	} else {
299 		/* Other thread has already initiliazed hash/bitmask */
300 		NHOPS_WUNLOCK(ctl);
301 		free(cht_ptr, M_NHOP);
302 	}
303 
304 	FIB_RH_LOG(LOG_DEBUG, ctl->ctl_rh, "multipath init done");
305 
306 	return (true);
307 }
308 
309 int
nhgrp_ctl_init(struct nh_control * ctl)310 nhgrp_ctl_init(struct nh_control *ctl)
311 {
312 
313 	/*
314 	 * By default, do not allocate datastructures as multipath
315 	 * routes will not be necessarily used.
316 	 */
317 	CHT_SLIST_INIT(&ctl->gr_head, NULL, 0);
318 	return (0);
319 }
320 
321 void
nhgrp_ctl_free(struct nh_control * ctl)322 nhgrp_ctl_free(struct nh_control *ctl)
323 {
324 	if (ctl->gr_head.ptr != NULL)
325 		free(ctl->gr_head.ptr, M_NHOP);
326 }
327 
328 void
nhgrp_ctl_unlink_all(struct nh_control * ctl)329 nhgrp_ctl_unlink_all(struct nh_control *ctl)
330 {
331 	struct nhgrp_priv *nhg_priv;
332 
333 	NHOPS_WLOCK_ASSERT(ctl);
334 
335 	CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
336 		IF_DEBUG_LEVEL(LOG_DEBUG2) {
337 			char nhgbuf[NHOP_PRINT_BUFSIZE] __unused;
338 			FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "marking %s unlinked",
339 			    nhgrp_print_buf(nhg_priv->nhg, nhgbuf, sizeof(nhgbuf)));
340 		}
341 		refcount_release(&nhg_priv->nhg_linked);
342 	} CHT_SLIST_FOREACH_END;
343 }
344 
345