1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 /************************************************************************ 32 * Note: In this file a 'fib' is a "forwarding information base" * 33 * Which is the new name for an in kernel routing (next hop) table. * 34 ***********************************************************************/ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 #include "opt_route.h" 39 40 #include <sys/param.h> 41 #include <sys/socket.h> 42 #include <sys/systm.h> 43 #include <sys/malloc.h> 44 #include <sys/jail.h> 45 #include <sys/osd.h> 46 #include <sys/proc.h> 47 #include <sys/sysctl.h> 48 #include <sys/syslog.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/sx.h> 52 #include <sys/domain.h> 53 #include <sys/sysproto.h> 54 55 #include <net/vnet.h> 56 #include <net/route.h> 57 #include <net/route/route_ctl.h> 58 #include <net/route/route_var.h> 59 60 /* Kernel config default option. */ 61 #ifdef ROUTETABLES 62 #if ROUTETABLES <= 0 63 #error "ROUTETABLES defined too low" 64 #endif 65 #if ROUTETABLES > RT_MAXFIBS 66 #error "ROUTETABLES defined too big" 67 #endif 68 #define RT_NUMFIBS ROUTETABLES 69 #endif /* ROUTETABLES */ 70 /* Initialize to default if not otherwise set. */ 71 #ifndef RT_NUMFIBS 72 #define RT_NUMFIBS 1 73 #endif 74 75 static void grow_rtables(uint32_t num_fibs); 76 77 VNET_DEFINE_STATIC(struct sx, rtables_lock); 78 #define V_rtables_lock VNET(rtables_lock) 79 #define RTABLES_LOCK() sx_xlock(&V_rtables_lock) 80 #define RTABLES_UNLOCK() sx_xunlock(&V_rtables_lock) 81 #define RTABLES_LOCK_INIT() sx_init(&V_rtables_lock, "rtables lock") 82 #define RTABLES_LOCK_ASSERT() sx_assert(&V_rtables_lock, SA_LOCKED) 83 84 VNET_DEFINE_STATIC(struct rib_head **, rt_tables); 85 #define V_rt_tables VNET(rt_tables) 86 87 VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS; 88 89 /* 90 * Handler for net.my_fibnum. 91 * Returns current fib of the process. 92 */ 93 static int 94 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 95 { 96 int fibnum; 97 int error; 98 99 fibnum = curthread->td_proc->p_fibnum; 100 error = sysctl_handle_int(oidp, &fibnum, 0, req); 101 return (error); 102 } 103 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, 104 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 105 &sysctl_my_fibnum, "I", 106 "default FIB of caller"); 107 108 static uint32_t 109 normalize_num_rtables(uint32_t num_rtables) 110 { 111 112 if (num_rtables > RT_MAXFIBS) 113 num_rtables = RT_MAXFIBS; 114 else if (num_rtables == 0) 115 num_rtables = 1; 116 return (num_rtables); 117 } 118 119 /* 120 * Sets the number of fibs in the current vnet. 121 * Function does not allow shrinking number of rtables. 122 */ 123 static int 124 sysctl_fibs(SYSCTL_HANDLER_ARGS) 125 { 126 uint32_t new_fibs; 127 int error; 128 129 RTABLES_LOCK(); 130 new_fibs = V_rt_numfibs; 131 error = sysctl_handle_32(oidp, &new_fibs, 0, req); 132 if (error == 0) { 133 new_fibs = normalize_num_rtables(new_fibs); 134 135 if (new_fibs < V_rt_numfibs) 136 error = ENOTCAPABLE; 137 if (new_fibs > V_rt_numfibs) 138 grow_rtables(new_fibs); 139 } 140 RTABLES_UNLOCK(); 141 142 return (error); 143 } 144 SYSCTL_PROC(_net, OID_AUTO, fibs, 145 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 146 NULL, 0, &sysctl_fibs, "IU", 147 "set number of fibs"); 148 149 /* 150 * Sets fib of a current process. 151 */ 152 int 153 sys_setfib(struct thread *td, struct setfib_args *uap) 154 { 155 int error = 0; 156 157 CURVNET_SET(TD_TO_VNET(td)); 158 if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs) 159 td->td_proc->p_fibnum = uap->fibnum; 160 else 161 error = EINVAL; 162 CURVNET_RESTORE(); 163 164 return (error); 165 } 166 167 static int 168 rtables_check_proc_fib(void *obj, void *data) 169 { 170 struct prison *pr = obj; 171 struct thread *td = data; 172 int error = 0; 173 174 if (TD_TO_VNET(td) != pr->pr_vnet) { 175 /* number of fibs may be lower in a new vnet */ 176 CURVNET_SET(pr->pr_vnet); 177 if (td->td_proc->p_fibnum >= V_rt_numfibs) 178 error = EINVAL; 179 CURVNET_RESTORE(); 180 } 181 return (error); 182 } 183 184 static void 185 rtables_prison_destructor(void *data) 186 { 187 } 188 189 static void 190 rtables_init(void) 191 { 192 osd_method_t methods[PR_MAXMETHOD] = { 193 [PR_METHOD_ATTACH] = rtables_check_proc_fib, 194 }; 195 osd_jail_register(rtables_prison_destructor, methods); 196 } 197 SYSINIT(rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtables_init, NULL); 198 199 200 /* 201 * If required, copy interface routes from existing tables to the 202 * newly-created routing table. 203 */ 204 static void 205 populate_kernel_routes(struct rib_head **new_rt_tables, struct rib_head *rh) 206 { 207 for (int i = 0; i < V_rt_numfibs; i++) { 208 struct rib_head *rh_src = new_rt_tables[i * (AF_MAX + 1) + rh->rib_family]; 209 if ((rh_src != NULL) && (rh_src != rh)) 210 rib_copy_kernel_routes(rh_src, rh); 211 } 212 } 213 214 /* 215 * Grows up the number of routing tables in the current fib. 216 * Function creates new index array for all rtables and allocates 217 * remaining routing tables. 218 */ 219 static void 220 grow_rtables(uint32_t num_tables) 221 { 222 struct domain *dom; 223 struct rib_head **prnh, *rh; 224 struct rib_head **new_rt_tables, **old_rt_tables; 225 int family; 226 227 RTABLES_LOCK_ASSERT(); 228 229 KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n", 230 num_tables, V_rt_numfibs)); 231 232 new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *), 233 M_RTABLE, M_WAITOK | M_ZERO); 234 235 if ((num_tables > 1) && (V_rt_add_addr_allfibs == 0)) 236 printf("WARNING: Adding ifaddrs to all fibs has been turned off " 237 "by default. Consider tuning %s if needed\n", 238 "net.add_addr_allfibs"); 239 240 #ifdef FIB_ALGO 241 fib_grow_rtables(num_tables); 242 #endif 243 244 /* 245 * Current rt_tables layout: 246 * fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX].. 247 * this allows to copy existing tables data by using memcpy() 248 */ 249 if (V_rt_tables != NULL) 250 memcpy(new_rt_tables, V_rt_tables, 251 V_rt_numfibs * (AF_MAX + 1) * sizeof(void *)); 252 253 /* Populate the remainders */ 254 SLIST_FOREACH(dom, &domains, dom_next) { 255 if (dom->dom_rtattach == NULL) 256 continue; 257 family = dom->dom_family; 258 for (int i = 0; i < num_tables; i++) { 259 prnh = &new_rt_tables[i * (AF_MAX + 1) + family]; 260 if (*prnh != NULL) 261 continue; 262 rh = dom->dom_rtattach(i); 263 if (rh == NULL) 264 log(LOG_ERR, "unable to create routing table for %d.%d\n", 265 dom->dom_family, i); 266 else 267 populate_kernel_routes(new_rt_tables, rh); 268 *prnh = rh; 269 } 270 } 271 272 /* 273 * Update rtables pointer. 274 * Ensure all writes to new_rt_tables has been completed before 275 * switching pointer. 276 */ 277 atomic_thread_fence_rel(); 278 old_rt_tables = V_rt_tables; 279 V_rt_tables = new_rt_tables; 280 281 /* Wait till all cpus see new pointers */ 282 atomic_thread_fence_rel(); 283 NET_EPOCH_WAIT(); 284 285 /* Set number of fibs to a new value */ 286 V_rt_numfibs = num_tables; 287 288 #ifdef FIB_ALGO 289 /* Attach fib algo to the new rtables */ 290 SLIST_FOREACH(dom, &domains, dom_next) { 291 if (dom->dom_rtattach != NULL) 292 fib_setup_family(dom->dom_family, num_tables); 293 } 294 #endif 295 296 if (old_rt_tables != NULL) 297 free(old_rt_tables, M_RTABLE); 298 } 299 300 static void 301 vnet_rtables_init(const void *unused __unused) 302 { 303 int num_rtables_base; 304 305 if (IS_DEFAULT_VNET(curvnet)) { 306 num_rtables_base = RT_NUMFIBS; 307 TUNABLE_INT_FETCH("net.fibs", &num_rtables_base); 308 V_rt_numfibs = normalize_num_rtables(num_rtables_base); 309 } else 310 V_rt_numfibs = 1; 311 312 vnet_rtzone_init(); 313 #ifdef FIB_ALGO 314 vnet_fib_init(); 315 #endif 316 RTABLES_LOCK_INIT(); 317 318 RTABLES_LOCK(); 319 grow_rtables(V_rt_numfibs); 320 RTABLES_UNLOCK(); 321 } 322 VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 323 vnet_rtables_init, 0); 324 325 #ifdef VIMAGE 326 static void 327 rtables_destroy(const void *unused __unused) 328 { 329 struct rib_head *rnh; 330 struct domain *dom; 331 int family; 332 333 RTABLES_LOCK(); 334 SLIST_FOREACH(dom, &domains, dom_next) { 335 if (dom->dom_rtdetach == NULL) 336 continue; 337 family = dom->dom_family; 338 for (int i = 0; i < V_rt_numfibs; i++) { 339 rnh = rt_tables_get_rnh(i, family); 340 dom->dom_rtdetach(rnh); 341 } 342 } 343 RTABLES_UNLOCK(); 344 345 /* 346 * dom_rtdetach calls rt_table_destroy(), which 347 * schedules deletion for all rtentries, nexthops and control 348 * structures. Wait for the destruction callbacks to fire. 349 * Note that this should result in freeing all rtentries, but 350 * nexthops deletions will be scheduled for the next epoch run 351 * and will be completed after vnet teardown. 352 */ 353 NET_EPOCH_DRAIN_CALLBACKS(); 354 355 free(V_rt_tables, M_RTABLE); 356 vnet_rtzone_destroy(); 357 #ifdef FIB_ALGO 358 vnet_fib_destroy(); 359 #endif 360 } 361 VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, 362 rtables_destroy, 0); 363 #endif 364 365 static inline struct rib_head * 366 rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family) 367 { 368 struct rib_head **prnh; 369 370 KASSERT(table < V_rt_numfibs, 371 ("%s: table out of bounds (%d < %d)", __func__, table, 372 V_rt_numfibs)); 373 KASSERT(family < (AF_MAX + 1), 374 ("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1)); 375 376 /* rnh is [fib=0][af=0]. */ 377 prnh = V_rt_tables; 378 /* Get the offset to the requested table and fam. */ 379 prnh += table * (AF_MAX + 1) + family; 380 381 return (*prnh); 382 } 383 384 struct rib_head * 385 rt_tables_get_rnh(uint32_t table, sa_family_t family) 386 { 387 388 return (rt_tables_get_rnh_ptr(table, family)); 389 } 390 391 struct rib_head * 392 rt_tables_get_rnh_safe(uint32_t table, sa_family_t family) 393 { 394 if (__predict_false(table >= V_rt_numfibs)) 395 return (NULL); 396 if (__predict_false(family >= (AF_MAX + 1))) 397 return (NULL); 398 return (rt_tables_get_rnh_ptr(table, family)); 399 } 400 401 u_int 402 rt_tables_get_gen(uint32_t table, sa_family_t family) 403 { 404 struct rib_head *rnh; 405 406 rnh = rt_tables_get_rnh_ptr(table, family); 407 KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d", 408 __func__, table, family)); 409 return (rnh->rnh_gen); 410 } 411