xref: /freebsd/sys/net/route/route_tables.c (revision e64fe029e9d3ce476e77a478318e0c3cd201ff08)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /************************************************************************
32  * Note: In this file a 'fib' is a "forwarding information base"	*
33  * Which is the new name for an in kernel routing (next hop) table.	*
34  ***********************************************************************/
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 #include "opt_route.h"
39 
40 #include <sys/param.h>
41 #include <sys/socket.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
44 #include <sys/jail.h>
45 #include <sys/osd.h>
46 #include <sys/proc.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/sx.h>
52 #include <sys/domain.h>
53 #include <sys/sysproto.h>
54 
55 #include <net/vnet.h>
56 #include <net/route.h>
57 #include <net/route/route_ctl.h>
58 #include <net/route/route_var.h>
59 
60 /* Kernel config default option. */
61 #ifdef ROUTETABLES
62 #if ROUTETABLES <= 0
63 #error "ROUTETABLES defined too low"
64 #endif
65 #if ROUTETABLES > RT_MAXFIBS
66 #error "ROUTETABLES defined too big"
67 #endif
68 #define	RT_NUMFIBS	ROUTETABLES
69 #endif /* ROUTETABLES */
70 /* Initialize to default if not otherwise set. */
71 #ifndef	RT_NUMFIBS
72 #define	RT_NUMFIBS	1
73 #endif
74 
75 static void grow_rtables(uint32_t num_fibs);
76 
77 VNET_DEFINE_STATIC(struct sx, rtables_lock);
78 #define	V_rtables_lock		VNET(rtables_lock)
79 #define	RTABLES_LOCK()		sx_xlock(&V_rtables_lock)
80 #define	RTABLES_UNLOCK()	sx_xunlock(&V_rtables_lock)
81 #define	RTABLES_LOCK_INIT()	sx_init(&V_rtables_lock, "rtables lock")
82 #define	RTABLES_LOCK_ASSERT()	sx_assert(&V_rtables_lock, SA_LOCKED)
83 
84 VNET_DEFINE_STATIC(struct rib_head **, rt_tables);
85 #define	V_rt_tables	VNET(rt_tables)
86 
87 VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS;
88 
89 /*
90  * Handler for net.my_fibnum.
91  * Returns current fib of the process.
92  */
93 static int
94 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
95 {
96         int fibnum;
97         int error;
98 
99         fibnum = curthread->td_proc->p_fibnum;
100         error = sysctl_handle_int(oidp, &fibnum, 0, req);
101         return (error);
102 }
103 SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
104     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
105     &sysctl_my_fibnum, "I",
106     "default FIB of caller");
107 
108 static uint32_t
109 normalize_num_rtables(uint32_t num_rtables)
110 {
111 
112 	if (num_rtables > RT_MAXFIBS)
113 		num_rtables = RT_MAXFIBS;
114 	else if (num_rtables == 0)
115 		num_rtables = 1;
116 	return (num_rtables);
117 }
118 
119 /*
120  * Sets the number of fibs in the current vnet.
121  * Function does not allow shrinking number of rtables.
122  */
123 static int
124 sysctl_fibs(SYSCTL_HANDLER_ARGS)
125 {
126 	uint32_t new_fibs;
127 	int error;
128 
129 	RTABLES_LOCK();
130 	new_fibs = V_rt_numfibs;
131 	error = sysctl_handle_32(oidp, &new_fibs, 0, req);
132 	if (error == 0) {
133 		new_fibs = normalize_num_rtables(new_fibs);
134 
135 		if (new_fibs < V_rt_numfibs)
136 			error = ENOTCAPABLE;
137 		if (new_fibs > V_rt_numfibs)
138 			grow_rtables(new_fibs);
139 	}
140 	RTABLES_UNLOCK();
141 
142 	return (error);
143 }
144 SYSCTL_PROC(_net, OID_AUTO, fibs,
145     CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
146     NULL, 0, &sysctl_fibs, "IU",
147     "set number of fibs");
148 
149 /*
150  * Sets fib of a current process.
151  */
152 int
153 sys_setfib(struct thread *td, struct setfib_args *uap)
154 {
155 	int error = 0;
156 
157 	CURVNET_SET(TD_TO_VNET(td));
158 	if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs)
159 		td->td_proc->p_fibnum = uap->fibnum;
160 	else
161 		error = EINVAL;
162 	CURVNET_RESTORE();
163 
164 	return (error);
165 }
166 
167 static int
168 rtables_check_proc_fib(void *obj, void *data)
169 {
170 	struct prison *pr = obj;
171 	struct thread *td = data;
172 	int error = 0;
173 
174 	if (TD_TO_VNET(td) != pr->pr_vnet) {
175 		/* number of fibs may be lower in a new vnet */
176 		CURVNET_SET(pr->pr_vnet);
177 		if (td->td_proc->p_fibnum >= V_rt_numfibs)
178 			error = EINVAL;
179 		CURVNET_RESTORE();
180 	}
181 	return (error);
182 }
183 
184 static void
185 rtables_prison_destructor(void *data)
186 {
187 }
188 
189 static void
190 rtables_init(void)
191 {
192 	osd_method_t methods[PR_MAXMETHOD] = {
193 	    [PR_METHOD_ATTACH] =	rtables_check_proc_fib,
194 	};
195 	osd_jail_register(rtables_prison_destructor, methods);
196 }
197 SYSINIT(rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtables_init, NULL);
198 
199 
200 /*
201  * If required, copy interface routes from existing tables to the
202  * newly-created routing table.
203  */
204 static void
205 populate_kernel_routes(struct rib_head **new_rt_tables, struct rib_head *rh)
206 {
207 	for (int i = 0; i < V_rt_numfibs; i++) {
208 		struct rib_head *rh_src = new_rt_tables[i * (AF_MAX + 1) + rh->rib_family];
209 		if ((rh_src != NULL) && (rh_src != rh))
210 			rib_copy_kernel_routes(rh_src, rh);
211 	}
212 }
213 
214 /*
215  * Grows up the number of routing tables in the current fib.
216  * Function creates new index array for all rtables and allocates
217  *  remaining routing tables.
218  */
219 static void
220 grow_rtables(uint32_t num_tables)
221 {
222 	struct domain *dom;
223 	struct rib_head **prnh, *rh;
224 	struct rib_head **new_rt_tables, **old_rt_tables;
225 	int family;
226 
227 	RTABLES_LOCK_ASSERT();
228 
229 	KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n",
230 				num_tables, V_rt_numfibs));
231 
232 	new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *),
233 	    M_RTABLE, M_WAITOK | M_ZERO);
234 
235 	if ((num_tables > 1) && (V_rt_add_addr_allfibs == 0))
236 		printf("WARNING: Adding ifaddrs to all fibs has been turned off "
237 			"by default. Consider tuning %s if needed\n",
238 			"net.add_addr_allfibs");
239 
240 #ifdef FIB_ALGO
241 	fib_grow_rtables(num_tables);
242 #endif
243 
244 	/*
245 	 * Current rt_tables layout:
246 	 * fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX]..
247 	 * this allows to copy existing tables data by using memcpy()
248 	 */
249 	if (V_rt_tables != NULL)
250 		memcpy(new_rt_tables, V_rt_tables,
251 		    V_rt_numfibs * (AF_MAX + 1) * sizeof(void *));
252 
253 	/* Populate the remainders */
254 	SLIST_FOREACH(dom, &domains, dom_next) {
255 		if (dom->dom_rtattach == NULL)
256 			continue;
257 		family = dom->dom_family;
258 		for (int i = 0; i < num_tables; i++) {
259 			prnh = &new_rt_tables[i * (AF_MAX + 1) + family];
260 			if (*prnh != NULL)
261 				continue;
262 			rh = dom->dom_rtattach(i);
263 			if (rh == NULL)
264 				log(LOG_ERR, "unable to create routing table for %d.%d\n",
265 				    dom->dom_family, i);
266 			else
267 				populate_kernel_routes(new_rt_tables, rh);
268 			*prnh = rh;
269 		}
270 	}
271 
272 	/*
273 	 * Update rtables pointer.
274 	 * Ensure all writes to new_rt_tables has been completed before
275 	 *  switching pointer.
276 	 */
277 	atomic_thread_fence_rel();
278 	old_rt_tables = V_rt_tables;
279 	V_rt_tables = new_rt_tables;
280 
281 	/* Wait till all cpus see new pointers */
282 	atomic_thread_fence_rel();
283 	NET_EPOCH_WAIT();
284 
285 	/* Set number of fibs to a new value */
286 	V_rt_numfibs = num_tables;
287 
288 #ifdef FIB_ALGO
289 	/* Attach fib algo to the new rtables */
290 	SLIST_FOREACH(dom, &domains, dom_next) {
291 		if (dom->dom_rtattach != NULL)
292 			fib_setup_family(dom->dom_family, num_tables);
293 	}
294 #endif
295 
296 	if (old_rt_tables != NULL)
297 		free(old_rt_tables, M_RTABLE);
298 }
299 
300 static void
301 vnet_rtables_init(const void *unused __unused)
302 {
303 	int num_rtables_base;
304 
305 	if (IS_DEFAULT_VNET(curvnet)) {
306 		num_rtables_base = RT_NUMFIBS;
307 		TUNABLE_INT_FETCH("net.fibs", &num_rtables_base);
308 		V_rt_numfibs = normalize_num_rtables(num_rtables_base);
309 	} else
310 		V_rt_numfibs = 1;
311 
312 	vnet_rtzone_init();
313 #ifdef FIB_ALGO
314 	vnet_fib_init();
315 #endif
316 	RTABLES_LOCK_INIT();
317 
318 	RTABLES_LOCK();
319 	grow_rtables(V_rt_numfibs);
320 	RTABLES_UNLOCK();
321 }
322 VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
323     vnet_rtables_init, 0);
324 
325 #ifdef VIMAGE
326 static void
327 rtables_destroy(const void *unused __unused)
328 {
329 	struct rib_head *rnh;
330 	struct domain *dom;
331 	int family;
332 
333 	RTABLES_LOCK();
334 	SLIST_FOREACH(dom, &domains, dom_next) {
335 		if (dom->dom_rtdetach == NULL)
336 			continue;
337 		family = dom->dom_family;
338 		for (int i = 0; i < V_rt_numfibs; i++) {
339 			rnh = rt_tables_get_rnh(i, family);
340 			dom->dom_rtdetach(rnh);
341 		}
342 	}
343 	RTABLES_UNLOCK();
344 
345 	/*
346 	 * dom_rtdetach calls rt_table_destroy(), which
347 	 *  schedules deletion for all rtentries, nexthops and control
348 	 *  structures. Wait for the destruction callbacks to fire.
349 	 * Note that this should result in freeing all rtentries, but
350 	 *  nexthops deletions will be scheduled for the next epoch run
351 	 *  and will be completed after vnet teardown.
352 	 */
353 	NET_EPOCH_DRAIN_CALLBACKS();
354 
355 	free(V_rt_tables, M_RTABLE);
356 	vnet_rtzone_destroy();
357 #ifdef FIB_ALGO
358 	vnet_fib_destroy();
359 #endif
360 }
361 VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
362     rtables_destroy, 0);
363 #endif
364 
365 static inline struct rib_head *
366 rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family)
367 {
368 	struct rib_head **prnh;
369 
370 	KASSERT(table < V_rt_numfibs,
371 	    ("%s: table out of bounds (%d < %d)", __func__, table,
372 	     V_rt_numfibs));
373 	KASSERT(family < (AF_MAX + 1),
374 	    ("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1));
375 
376 	/* rnh is [fib=0][af=0]. */
377 	prnh = V_rt_tables;
378 	/* Get the offset to the requested table and fam. */
379 	prnh += table * (AF_MAX + 1) + family;
380 
381 	return (*prnh);
382 }
383 
384 struct rib_head *
385 rt_tables_get_rnh(uint32_t table, sa_family_t family)
386 {
387 
388 	return (rt_tables_get_rnh_ptr(table, family));
389 }
390 
391 struct rib_head *
392 rt_tables_get_rnh_safe(uint32_t table, sa_family_t family)
393 {
394 	if (__predict_false(table >= V_rt_numfibs))
395 		return (NULL);
396 	if (__predict_false(family >= (AF_MAX + 1)))
397 		return (NULL);
398 	return (rt_tables_get_rnh_ptr(table, family));
399 }
400 
401 u_int
402 rt_tables_get_gen(uint32_t table, sa_family_t family)
403 {
404 	struct rib_head *rnh;
405 
406 	rnh = rt_tables_get_rnh_ptr(table, family);
407 	KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d",
408 	    __func__, table, family));
409 	return (rnh->rnh_gen);
410 }
411