1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 /************************************************************************
32 * Note: In this file a 'fib' is a "forwarding information base" *
33 * Which is the new name for an in kernel routing (next hop) table. *
34 ***********************************************************************/
35
36 #include <sys/cdefs.h>
37 #include "opt_route.h"
38
39 #include <sys/param.h>
40 #include <sys/socket.h>
41 #include <sys/systm.h>
42 #include <sys/malloc.h>
43 #include <sys/jail.h>
44 #include <sys/osd.h>
45 #include <sys/proc.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/sx.h>
51 #include <sys/domain.h>
52 #include <sys/sysproto.h>
53
54 #include <net/vnet.h>
55 #include <net/route.h>
56 #include <net/route/route_ctl.h>
57 #include <net/route/route_var.h>
58
59 /* Kernel config default option. */
60 #ifdef ROUTETABLES
61 #if ROUTETABLES <= 0
62 #error "ROUTETABLES defined too low"
63 #endif
64 #if ROUTETABLES > RT_MAXFIBS
65 #error "ROUTETABLES defined too big"
66 #endif
67 #define RT_NUMFIBS ROUTETABLES
68 #endif /* ROUTETABLES */
69 /* Initialize to default if not otherwise set. */
70 #ifndef RT_NUMFIBS
71 #define RT_NUMFIBS 1
72 #endif
73
74 static void grow_rtables(uint32_t num_fibs);
75
76 VNET_DEFINE_STATIC(struct sx, rtables_lock);
77 #define V_rtables_lock VNET(rtables_lock)
78 #define RTABLES_LOCK() sx_xlock(&V_rtables_lock)
79 #define RTABLES_UNLOCK() sx_xunlock(&V_rtables_lock)
80 #define RTABLES_LOCK_INIT() sx_init(&V_rtables_lock, "rtables lock")
81 #define RTABLES_LOCK_ASSERT() sx_assert(&V_rtables_lock, SA_LOCKED)
82
83 VNET_DEFINE_STATIC(struct rib_head **, rt_tables);
84 #define V_rt_tables VNET(rt_tables)
85
86 VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS;
87
88 /*
89 * Handler for net.my_fibnum.
90 * Returns current fib of the process.
91 */
92 static int
sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)93 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
94 {
95 int fibnum;
96 int error;
97
98 fibnum = curthread->td_proc->p_fibnum;
99 error = sysctl_handle_int(oidp, &fibnum, 0, req);
100 return (error);
101 }
102 SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
103 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
104 &sysctl_my_fibnum, "I",
105 "default FIB of caller");
106
107 static uint32_t
normalize_num_rtables(uint32_t num_rtables)108 normalize_num_rtables(uint32_t num_rtables)
109 {
110
111 if (num_rtables > RT_MAXFIBS)
112 num_rtables = RT_MAXFIBS;
113 else if (num_rtables == 0)
114 num_rtables = 1;
115 return (num_rtables);
116 }
117
118 /*
119 * Sets the number of fibs in the current vnet.
120 * Function does not allow shrinking number of rtables.
121 */
122 static int
sysctl_fibs(SYSCTL_HANDLER_ARGS)123 sysctl_fibs(SYSCTL_HANDLER_ARGS)
124 {
125 uint32_t new_fibs;
126 int error;
127
128 RTABLES_LOCK();
129 new_fibs = V_rt_numfibs;
130 error = sysctl_handle_32(oidp, &new_fibs, 0, req);
131 if (error == 0) {
132 new_fibs = normalize_num_rtables(new_fibs);
133
134 if (new_fibs < V_rt_numfibs)
135 error = ENOTCAPABLE;
136 if (new_fibs > V_rt_numfibs)
137 grow_rtables(new_fibs);
138 }
139 RTABLES_UNLOCK();
140
141 return (error);
142 }
143 SYSCTL_PROC(_net, OID_AUTO, fibs,
144 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
145 NULL, 0, &sysctl_fibs, "IU",
146 "set number of fibs");
147
148 /*
149 * Sets fib of a current process.
150 */
151 int
sys_setfib(struct thread * td,struct setfib_args * uap)152 sys_setfib(struct thread *td, struct setfib_args *uap)
153 {
154 int error = 0;
155
156 CURVNET_SET(TD_TO_VNET(td));
157 if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs)
158 td->td_proc->p_fibnum = uap->fibnum;
159 else
160 error = EINVAL;
161 CURVNET_RESTORE();
162
163 return (error);
164 }
165
166 static int
rtables_check_proc_fib(void * obj,void * data)167 rtables_check_proc_fib(void *obj, void *data)
168 {
169 struct prison *pr = obj;
170 struct thread *td = data;
171 int error = 0;
172
173 if (TD_TO_VNET(td) != pr->pr_vnet) {
174 /* number of fibs may be lower in a new vnet */
175 CURVNET_SET(pr->pr_vnet);
176 if (td->td_proc->p_fibnum >= V_rt_numfibs)
177 error = EINVAL;
178 CURVNET_RESTORE();
179 }
180 return (error);
181 }
182
183 static void
rtables_prison_destructor(void * data)184 rtables_prison_destructor(void *data)
185 {
186 }
187
188 static void
rtables_init(void * dummy __unused)189 rtables_init(void *dummy __unused)
190 {
191 osd_method_t methods[PR_MAXMETHOD] = {
192 [PR_METHOD_ATTACH] = rtables_check_proc_fib,
193 };
194 osd_jail_register(rtables_prison_destructor, methods);
195 }
196 SYSINIT(rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtables_init, NULL);
197
198
199 /*
200 * If required, copy interface routes from existing tables to the
201 * newly-created routing table.
202 */
203 static void
populate_kernel_routes(struct rib_head ** new_rt_tables,struct rib_head * rh)204 populate_kernel_routes(struct rib_head **new_rt_tables, struct rib_head *rh)
205 {
206 for (int i = 0; i < V_rt_numfibs; i++) {
207 struct rib_head *rh_src = new_rt_tables[i * (AF_MAX + 1) + rh->rib_family];
208 if ((rh_src != NULL) && (rh_src != rh))
209 rib_copy_kernel_routes(rh_src, rh);
210 }
211 }
212
213 /*
214 * Grows up the number of routing tables in the current fib.
215 * Function creates new index array for all rtables and allocates
216 * remaining routing tables.
217 */
218 static void
grow_rtables(uint32_t num_tables)219 grow_rtables(uint32_t num_tables)
220 {
221 struct domain *dom;
222 struct rib_head **prnh, *rh;
223 struct rib_head **new_rt_tables, **old_rt_tables;
224 int family;
225
226 RTABLES_LOCK_ASSERT();
227
228 KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n",
229 num_tables, V_rt_numfibs));
230
231 new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *),
232 M_RTABLE, M_WAITOK | M_ZERO);
233
234 #ifdef FIB_ALGO
235 fib_grow_rtables(num_tables);
236 #endif
237
238 /*
239 * Current rt_tables layout:
240 * fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX]..
241 * this allows to copy existing tables data by using memcpy()
242 */
243 if (V_rt_tables != NULL)
244 memcpy(new_rt_tables, V_rt_tables,
245 V_rt_numfibs * (AF_MAX + 1) * sizeof(void *));
246
247 /* Populate the remainders */
248 SLIST_FOREACH(dom, &domains, dom_next) {
249 if (dom->dom_rtattach == NULL)
250 continue;
251 family = dom->dom_family;
252 for (int i = 0; i < num_tables; i++) {
253 prnh = &new_rt_tables[i * (AF_MAX + 1) + family];
254 if (*prnh != NULL)
255 continue;
256 rh = dom->dom_rtattach(i);
257 if (rh == NULL)
258 log(LOG_ERR, "unable to create routing table for %d.%d\n",
259 dom->dom_family, i);
260 else
261 populate_kernel_routes(new_rt_tables, rh);
262 *prnh = rh;
263 }
264 }
265
266 /*
267 * Update rtables pointer.
268 * Ensure all writes to new_rt_tables has been completed before
269 * switching pointer.
270 */
271 atomic_thread_fence_rel();
272 old_rt_tables = V_rt_tables;
273 V_rt_tables = new_rt_tables;
274
275 /* Wait till all cpus see new pointers */
276 atomic_thread_fence_rel();
277 NET_EPOCH_WAIT();
278
279 /* Set number of fibs to a new value */
280 V_rt_numfibs = num_tables;
281
282 #ifdef FIB_ALGO
283 /* Attach fib algo to the new rtables */
284 SLIST_FOREACH(dom, &domains, dom_next) {
285 if (dom->dom_rtattach != NULL)
286 fib_setup_family(dom->dom_family, num_tables);
287 }
288 #endif
289
290 if (old_rt_tables != NULL)
291 free(old_rt_tables, M_RTABLE);
292 }
293
294 static void
vnet_rtables_init(const void * unused __unused)295 vnet_rtables_init(const void *unused __unused)
296 {
297 int num_rtables_base;
298
299 if (IS_DEFAULT_VNET(curvnet)) {
300 num_rtables_base = RT_NUMFIBS;
301 TUNABLE_INT_FETCH("net.fibs", &num_rtables_base);
302 V_rt_numfibs = normalize_num_rtables(num_rtables_base);
303 } else
304 V_rt_numfibs = 1;
305
306 vnet_rtzone_init();
307 #ifdef FIB_ALGO
308 vnet_fib_init();
309 #endif
310 RTABLES_LOCK_INIT();
311
312 RTABLES_LOCK();
313 grow_rtables(V_rt_numfibs);
314 RTABLES_UNLOCK();
315 }
316 VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
317 vnet_rtables_init, NULL);
318
319 #ifdef VIMAGE
320 static void
rtables_destroy(const void * unused __unused)321 rtables_destroy(const void *unused __unused)
322 {
323 struct rib_head *rnh;
324 struct domain *dom;
325 int family;
326
327 RTABLES_LOCK();
328 SLIST_FOREACH(dom, &domains, dom_next) {
329 if (dom->dom_rtdetach == NULL)
330 continue;
331 family = dom->dom_family;
332 for (int i = 0; i < V_rt_numfibs; i++) {
333 rnh = rt_tables_get_rnh(i, family);
334 dom->dom_rtdetach(rnh);
335 }
336 }
337 RTABLES_UNLOCK();
338
339 /*
340 * dom_rtdetach calls rt_table_destroy(), which
341 * schedules deletion for all rtentries, nexthops and control
342 * structures. Wait for the destruction callbacks to fire.
343 * Note that this should result in freeing all rtentries, but
344 * nexthops deletions will be scheduled for the next epoch run
345 * and will be completed after vnet teardown.
346 */
347 NET_EPOCH_DRAIN_CALLBACKS();
348
349 free(V_rt_tables, M_RTABLE);
350 vnet_rtzone_destroy();
351 #ifdef FIB_ALGO
352 vnet_fib_destroy();
353 #endif
354 }
355 VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
356 rtables_destroy, NULL);
357 #endif
358
359 static inline struct rib_head *
rt_tables_get_rnh_ptr(uint32_t table,sa_family_t family)360 rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family)
361 {
362 struct rib_head **prnh;
363
364 KASSERT(table < V_rt_numfibs,
365 ("%s: table out of bounds (%d < %d)", __func__, table,
366 V_rt_numfibs));
367 KASSERT(family < (AF_MAX + 1),
368 ("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1));
369
370 /* rnh is [fib=0][af=0]. */
371 prnh = V_rt_tables;
372 /* Get the offset to the requested table and fam. */
373 prnh += table * (AF_MAX + 1) + family;
374
375 return (*prnh);
376 }
377
378 struct rib_head *
rt_tables_get_rnh(uint32_t table,sa_family_t family)379 rt_tables_get_rnh(uint32_t table, sa_family_t family)
380 {
381
382 return (rt_tables_get_rnh_ptr(table, family));
383 }
384
385 struct rib_head *
rt_tables_get_rnh_safe(uint32_t table,sa_family_t family)386 rt_tables_get_rnh_safe(uint32_t table, sa_family_t family)
387 {
388 if (__predict_false(table >= V_rt_numfibs))
389 return (NULL);
390 if (__predict_false(family >= (AF_MAX + 1)))
391 return (NULL);
392 return (rt_tables_get_rnh_ptr(table, family));
393 }
394
395 u_int
rt_tables_get_gen(uint32_t table,sa_family_t family)396 rt_tables_get_gen(uint32_t table, sa_family_t family)
397 {
398 struct rib_head *rnh;
399
400 rnh = rt_tables_get_rnh_ptr(table, family);
401 KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d",
402 __func__, table, family));
403 return (rnh->rnh_gen);
404 }
405