xref: /freebsd/sys/net/route/route_tables.c (revision 13a7663c521ac3541756aa1157dcfce9f0ce78e8)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 /************************************************************************
32  * Note: In this file a 'fib' is a "forwarding information base"	*
33  * Which is the new name for an in kernel routing (next hop) table.	*
34  ***********************************************************************/
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 #include "opt_route.h"
39 
40 #include <sys/param.h>
41 #include <sys/socket.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
44 #include <sys/jail.h>
45 #include <sys/proc.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/sx.h>
51 #include <sys/domain.h>
52 #include <sys/sysproto.h>
53 
54 #include <net/vnet.h>
55 #include <net/route.h>
56 #include <net/route/route_var.h>
57 
58 /* Kernel config default option. */
59 #ifdef ROUTETABLES
60 #if ROUTETABLES <= 0
61 #error "ROUTETABLES defined too low"
62 #endif
63 #if ROUTETABLES > RT_MAXFIBS
64 #error "ROUTETABLES defined too big"
65 #endif
66 #define	RT_NUMFIBS	ROUTETABLES
67 #endif /* ROUTETABLES */
68 /* Initialize to default if not otherwise set. */
69 #ifndef	RT_NUMFIBS
70 #define	RT_NUMFIBS	1
71 #endif
72 
73 static void grow_rtables(uint32_t num_fibs);
74 
75 VNET_DEFINE_STATIC(struct sx, rtables_lock);
76 #define	V_rtables_lock		VNET(rtables_lock)
77 #define	RTABLES_LOCK()		sx_xlock(&V_rtables_lock)
78 #define	RTABLES_UNLOCK()	sx_xunlock(&V_rtables_lock)
79 #define	RTABLES_LOCK_INIT()	sx_init(&V_rtables_lock, "rtables lock")
80 #define	RTABLES_LOCK_ASSERT()	sx_assert(&V_rtables_lock, SA_LOCKED)
81 
82 VNET_DEFINE_STATIC(struct rib_head **, rt_tables);
83 #define	V_rt_tables	VNET(rt_tables)
84 
85 VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS;
86 
87 /*
88  * Handler for net.my_fibnum.
89  * Returns current fib of the process.
90  */
91 static int
92 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
93 {
94         int fibnum;
95         int error;
96 
97         fibnum = curthread->td_proc->p_fibnum;
98         error = sysctl_handle_int(oidp, &fibnum, 0, req);
99         return (error);
100 }
101 SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
102     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
103     &sysctl_my_fibnum, "I",
104     "default FIB of caller");
105 
106 static uint32_t
107 normalize_num_rtables(uint32_t num_rtables)
108 {
109 
110 	if (num_rtables > RT_MAXFIBS)
111 		num_rtables = RT_MAXFIBS;
112 	else if (num_rtables == 0)
113 		num_rtables = 1;
114 	return (num_rtables);
115 }
116 
117 /*
118  * Sets the number of fibs in the current vnet.
119  * Function does not allow shrinking number of rtables.
120  */
121 static int
122 sysctl_fibs(SYSCTL_HANDLER_ARGS)
123 {
124 	uint32_t new_fibs;
125 	int error;
126 
127 	RTABLES_LOCK();
128 	new_fibs = V_rt_numfibs;
129 	error = sysctl_handle_32(oidp, &new_fibs, 0, req);
130 	if (error == 0) {
131 		new_fibs = normalize_num_rtables(new_fibs);
132 
133 		if (new_fibs < V_rt_numfibs)
134 			error = ENOTCAPABLE;
135 		if (new_fibs > V_rt_numfibs)
136 			grow_rtables(new_fibs);
137 	}
138 	RTABLES_UNLOCK();
139 
140 	return (error);
141 }
142 SYSCTL_PROC(_net, OID_AUTO, fibs,
143     CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
144     NULL, 0, &sysctl_fibs, "IU",
145     "set number of fibs");
146 
147 /*
148  * Sets fib of a current process.
149  */
150 int
151 sys_setfib(struct thread *td, struct setfib_args *uap)
152 {
153 	int error = 0;
154 
155 	CURVNET_SET(TD_TO_VNET(td));
156 	if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs)
157 		td->td_proc->p_fibnum = uap->fibnum;
158 	else
159 		error = EINVAL;
160 	CURVNET_RESTORE();
161 
162 	return (error);
163 }
164 
165 /*
166  * Grows up the number of routing tables in the current fib.
167  * Function creates new index array for all rtables and allocates
168  *  remaining routing tables.
169  */
170 static void
171 grow_rtables(uint32_t num_tables)
172 {
173 	struct domain *dom;
174 	struct rib_head **prnh, *rh;
175 	struct rib_head **new_rt_tables, **old_rt_tables;
176 	int family;
177 
178 	RTABLES_LOCK_ASSERT();
179 
180 	KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n",
181 				num_tables, V_rt_numfibs));
182 
183 	new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *),
184 	    M_RTABLE, M_WAITOK | M_ZERO);
185 
186 	if ((num_tables > 1) && (V_rt_add_addr_allfibs == 0))
187 		printf("WARNING: Adding ifaddrs to all fibs has been turned off "
188 			"by default. Consider tuning %s if needed\n",
189 			"net.add_addr_allfibs");
190 
191 #ifdef FIB_ALGO
192 	fib_grow_rtables(num_tables);
193 #endif
194 
195 	/*
196 	 * Current rt_tables layout:
197 	 * fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX]..
198 	 * this allows to copy existing tables data by using memcpy()
199 	 */
200 	if (V_rt_tables != NULL)
201 		memcpy(new_rt_tables, V_rt_tables,
202 		    V_rt_numfibs * (AF_MAX + 1) * sizeof(void *));
203 
204 	/* Populate the remainders */
205 	for (dom = domains; dom; dom = dom->dom_next) {
206 		if (dom->dom_rtattach == NULL)
207 			continue;
208 		family = dom->dom_family;
209 		for (int i = 0; i < num_tables; i++) {
210 			prnh = &new_rt_tables[i * (AF_MAX + 1) + family];
211 			if (*prnh != NULL)
212 				continue;
213 			rh = dom->dom_rtattach(i);
214 			if (rh == NULL)
215 				log(LOG_ERR, "unable to create routing table for %d.%d\n",
216 				    dom->dom_family, i);
217 			*prnh = rh;
218 		}
219 	}
220 
221 	/*
222 	 * Update rtables pointer.
223 	 * Ensure all writes to new_rt_tables has been completed before
224 	 *  switching pointer.
225 	 */
226 	atomic_thread_fence_rel();
227 	old_rt_tables = V_rt_tables;
228 	V_rt_tables = new_rt_tables;
229 
230 	/* Wait till all cpus see new pointers */
231 	atomic_thread_fence_rel();
232 	epoch_wait_preempt(net_epoch_preempt);
233 
234 	/* Set number of fibs to a new value */
235 	V_rt_numfibs = num_tables;
236 
237 #ifdef FIB_ALGO
238 	/* Attach fib algo to the new rtables */
239 	for (dom = domains; dom; dom = dom->dom_next) {
240 		if (dom->dom_rtattach != NULL)
241 			fib_setup_family(dom->dom_family, num_tables);
242 	}
243 #endif
244 
245 	if (old_rt_tables != NULL)
246 		free(old_rt_tables, M_RTABLE);
247 }
248 
249 static void
250 vnet_rtables_init(const void *unused __unused)
251 {
252 	int num_rtables_base;
253 
254 	if (IS_DEFAULT_VNET(curvnet)) {
255 		num_rtables_base = RT_NUMFIBS;
256 		TUNABLE_INT_FETCH("net.fibs", &num_rtables_base);
257 		V_rt_numfibs = normalize_num_rtables(num_rtables_base);
258 	} else
259 		V_rt_numfibs = 1;
260 
261 	vnet_rtzone_init();
262 #ifdef FIB_ALGO
263 	vnet_fib_init();
264 #endif
265 	RTABLES_LOCK_INIT();
266 
267 	RTABLES_LOCK();
268 	grow_rtables(V_rt_numfibs);
269 	RTABLES_UNLOCK();
270 }
271 VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
272     vnet_rtables_init, 0);
273 
274 #ifdef VIMAGE
275 static void
276 rtables_destroy(const void *unused __unused)
277 {
278 	struct rib_head *rnh;
279 	struct domain *dom;
280 	int family;
281 
282 	RTABLES_LOCK();
283 	for (dom = domains; dom; dom = dom->dom_next) {
284 		if (dom->dom_rtdetach == NULL)
285 			continue;
286 		family = dom->dom_family;
287 		for (int i = 0; i < V_rt_numfibs; i++) {
288 			rnh = rt_tables_get_rnh(i, family);
289 			dom->dom_rtdetach(rnh);
290 		}
291 	}
292 	RTABLES_UNLOCK();
293 
294 	/*
295 	 * dom_rtdetach calls rt_table_destroy(), which
296 	 *  schedules deletion for all rtentries, nexthops and control
297 	 *  structures. Wait for the destruction callbacks to fire.
298 	 * Note that this should result in freeing all rtentries, but
299 	 *  nexthops deletions will be scheduled for the next epoch run
300 	 *  and will be completed after vnet teardown.
301 	 */
302 	NET_EPOCH_DRAIN_CALLBACKS();
303 
304 	free(V_rt_tables, M_RTABLE);
305 	vnet_rtzone_destroy();
306 #ifdef FIB_ALGO
307 	vnet_fib_destroy();
308 #endif
309 }
310 VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
311     rtables_destroy, 0);
312 #endif
313 
314 static inline struct rib_head *
315 rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family)
316 {
317 	struct rib_head **prnh;
318 
319 	KASSERT(table < V_rt_numfibs,
320 	    ("%s: table out of bounds (%d < %d)", __func__, table,
321 	     V_rt_numfibs));
322 	KASSERT(family < (AF_MAX + 1),
323 	    ("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1));
324 
325 	/* rnh is [fib=0][af=0]. */
326 	prnh = V_rt_tables;
327 	/* Get the offset to the requested table and fam. */
328 	prnh += table * (AF_MAX + 1) + family;
329 
330 	return (*prnh);
331 }
332 
333 struct rib_head *
334 rt_tables_get_rnh(uint32_t table, sa_family_t family)
335 {
336 
337 	return (rt_tables_get_rnh_ptr(table, family));
338 }
339 
340 u_int
341 rt_tables_get_gen(uint32_t table, sa_family_t family)
342 {
343 	struct rib_head *rnh;
344 
345 	rnh = rt_tables_get_rnh_ptr(table, family);
346 	KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d",
347 	    __func__, table, family));
348 	return (rnh->rnh_gen);
349 }
350