1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1980, 1986, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 /************************************************************************ 32 * Note: In this file a 'fib' is a "forwarding information base" * 33 * Which is the new name for an in kernel routing (next hop) table. * 34 ***********************************************************************/ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 #include "opt_route.h" 39 40 #include <sys/param.h> 41 #include <sys/socket.h> 42 #include <sys/systm.h> 43 #include <sys/malloc.h> 44 #include <sys/jail.h> 45 #include <sys/proc.h> 46 #include <sys/sysctl.h> 47 #include <sys/syslog.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/sx.h> 51 #include <sys/domain.h> 52 #include <sys/sysproto.h> 53 54 #include <net/vnet.h> 55 #include <net/route.h> 56 #include <net/route/route_var.h> 57 58 /* Kernel config default option. */ 59 #ifdef ROUTETABLES 60 #if ROUTETABLES <= 0 61 #error "ROUTETABLES defined too low" 62 #endif 63 #if ROUTETABLES > RT_MAXFIBS 64 #error "ROUTETABLES defined too big" 65 #endif 66 #define RT_NUMFIBS ROUTETABLES 67 #endif /* ROUTETABLES */ 68 /* Initialize to default if not otherwise set. */ 69 #ifndef RT_NUMFIBS 70 #define RT_NUMFIBS 1 71 #endif 72 73 static void grow_rtables(uint32_t num_fibs); 74 75 VNET_DEFINE_STATIC(struct sx, rtables_lock); 76 #define V_rtables_lock VNET(rtables_lock) 77 #define RTABLES_LOCK() sx_xlock(&V_rtables_lock) 78 #define RTABLES_UNLOCK() sx_xunlock(&V_rtables_lock) 79 #define RTABLES_LOCK_INIT() sx_init(&V_rtables_lock, "rtables lock") 80 #define RTABLES_LOCK_ASSERT() sx_assert(&V_rtables_lock, SA_LOCKED) 81 82 VNET_DEFINE_STATIC(struct rib_head **, rt_tables); 83 #define V_rt_tables VNET(rt_tables) 84 85 VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS; 86 87 /* 88 * Handler for net.my_fibnum. 89 * Returns current fib of the process. 90 */ 91 static int 92 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 93 { 94 int fibnum; 95 int error; 96 97 fibnum = curthread->td_proc->p_fibnum; 98 error = sysctl_handle_int(oidp, &fibnum, 0, req); 99 return (error); 100 } 101 SYSCTL_PROC(_net, OID_AUTO, my_fibnum, 102 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, 103 &sysctl_my_fibnum, "I", 104 "default FIB of caller"); 105 106 static uint32_t 107 normalize_num_rtables(uint32_t num_rtables) 108 { 109 110 if (num_rtables > RT_MAXFIBS) 111 num_rtables = RT_MAXFIBS; 112 else if (num_rtables == 0) 113 num_rtables = 1; 114 return (num_rtables); 115 } 116 117 /* 118 * Sets the number of fibs in the current vnet. 119 * Function does not allow shrinking number of rtables. 120 */ 121 static int 122 sysctl_fibs(SYSCTL_HANDLER_ARGS) 123 { 124 uint32_t new_fibs; 125 int error; 126 127 RTABLES_LOCK(); 128 new_fibs = V_rt_numfibs; 129 error = sysctl_handle_32(oidp, &new_fibs, 0, req); 130 if (error == 0) { 131 new_fibs = normalize_num_rtables(new_fibs); 132 133 if (new_fibs < V_rt_numfibs) 134 error = ENOTCAPABLE; 135 if (new_fibs > V_rt_numfibs) 136 grow_rtables(new_fibs); 137 } 138 RTABLES_UNLOCK(); 139 140 return (error); 141 } 142 SYSCTL_PROC(_net, OID_AUTO, fibs, 143 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, 144 NULL, 0, &sysctl_fibs, "IU", 145 "set number of fibs"); 146 147 /* 148 * Sets fib of a current process. 149 */ 150 int 151 sys_setfib(struct thread *td, struct setfib_args *uap) 152 { 153 int error = 0; 154 155 CURVNET_SET(TD_TO_VNET(td)); 156 if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs) 157 td->td_proc->p_fibnum = uap->fibnum; 158 else 159 error = EINVAL; 160 CURVNET_RESTORE(); 161 162 return (error); 163 } 164 165 /* 166 * If required, copy interface routes from existing tables to the 167 * newly-created routing table. 168 */ 169 static void 170 populate_kernel_routes(struct rib_head **new_rt_tables, struct rib_head *rh) 171 { 172 for (int i = 0; i < V_rt_numfibs; i++) { 173 struct rib_head *rh_src = new_rt_tables[i * (AF_MAX + 1) + rh->rib_family]; 174 if ((rh_src != NULL) && (rh_src != rh)) 175 rib_copy_kernel_routes(rh_src, rh); 176 } 177 } 178 179 /* 180 * Grows up the number of routing tables in the current fib. 181 * Function creates new index array for all rtables and allocates 182 * remaining routing tables. 183 */ 184 static void 185 grow_rtables(uint32_t num_tables) 186 { 187 struct domain *dom; 188 struct rib_head **prnh, *rh; 189 struct rib_head **new_rt_tables, **old_rt_tables; 190 int family; 191 192 RTABLES_LOCK_ASSERT(); 193 194 KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n", 195 num_tables, V_rt_numfibs)); 196 197 new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *), 198 M_RTABLE, M_WAITOK | M_ZERO); 199 200 if ((num_tables > 1) && (V_rt_add_addr_allfibs == 0)) 201 printf("WARNING: Adding ifaddrs to all fibs has been turned off " 202 "by default. Consider tuning %s if needed\n", 203 "net.add_addr_allfibs"); 204 205 #ifdef FIB_ALGO 206 fib_grow_rtables(num_tables); 207 #endif 208 209 /* 210 * Current rt_tables layout: 211 * fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX].. 212 * this allows to copy existing tables data by using memcpy() 213 */ 214 if (V_rt_tables != NULL) 215 memcpy(new_rt_tables, V_rt_tables, 216 V_rt_numfibs * (AF_MAX + 1) * sizeof(void *)); 217 218 /* Populate the remainders */ 219 SLIST_FOREACH(dom, &domains, dom_next) { 220 if (dom->dom_rtattach == NULL) 221 continue; 222 family = dom->dom_family; 223 for (int i = 0; i < num_tables; i++) { 224 prnh = &new_rt_tables[i * (AF_MAX + 1) + family]; 225 if (*prnh != NULL) 226 continue; 227 rh = dom->dom_rtattach(i); 228 if (rh == NULL) 229 log(LOG_ERR, "unable to create routing table for %d.%d\n", 230 dom->dom_family, i); 231 else 232 populate_kernel_routes(new_rt_tables, rh); 233 *prnh = rh; 234 } 235 } 236 237 /* 238 * Update rtables pointer. 239 * Ensure all writes to new_rt_tables has been completed before 240 * switching pointer. 241 */ 242 atomic_thread_fence_rel(); 243 old_rt_tables = V_rt_tables; 244 V_rt_tables = new_rt_tables; 245 246 /* Wait till all cpus see new pointers */ 247 atomic_thread_fence_rel(); 248 epoch_wait_preempt(net_epoch_preempt); 249 250 /* Set number of fibs to a new value */ 251 V_rt_numfibs = num_tables; 252 253 #ifdef FIB_ALGO 254 /* Attach fib algo to the new rtables */ 255 SLIST_FOREACH(dom, &domains, dom_next) { 256 if (dom->dom_rtattach != NULL) 257 fib_setup_family(dom->dom_family, num_tables); 258 } 259 #endif 260 261 if (old_rt_tables != NULL) 262 free(old_rt_tables, M_RTABLE); 263 } 264 265 static void 266 vnet_rtables_init(const void *unused __unused) 267 { 268 int num_rtables_base; 269 270 if (IS_DEFAULT_VNET(curvnet)) { 271 num_rtables_base = RT_NUMFIBS; 272 TUNABLE_INT_FETCH("net.fibs", &num_rtables_base); 273 V_rt_numfibs = normalize_num_rtables(num_rtables_base); 274 } else 275 V_rt_numfibs = 1; 276 277 vnet_rtzone_init(); 278 #ifdef FIB_ALGO 279 vnet_fib_init(); 280 #endif 281 RTABLES_LOCK_INIT(); 282 283 RTABLES_LOCK(); 284 grow_rtables(V_rt_numfibs); 285 RTABLES_UNLOCK(); 286 } 287 VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 288 vnet_rtables_init, 0); 289 290 #ifdef VIMAGE 291 static void 292 rtables_destroy(const void *unused __unused) 293 { 294 struct rib_head *rnh; 295 struct domain *dom; 296 int family; 297 298 RTABLES_LOCK(); 299 SLIST_FOREACH(dom, &domains, dom_next) { 300 if (dom->dom_rtdetach == NULL) 301 continue; 302 family = dom->dom_family; 303 for (int i = 0; i < V_rt_numfibs; i++) { 304 rnh = rt_tables_get_rnh(i, family); 305 dom->dom_rtdetach(rnh); 306 } 307 } 308 RTABLES_UNLOCK(); 309 310 /* 311 * dom_rtdetach calls rt_table_destroy(), which 312 * schedules deletion for all rtentries, nexthops and control 313 * structures. Wait for the destruction callbacks to fire. 314 * Note that this should result in freeing all rtentries, but 315 * nexthops deletions will be scheduled for the next epoch run 316 * and will be completed after vnet teardown. 317 */ 318 NET_EPOCH_DRAIN_CALLBACKS(); 319 320 free(V_rt_tables, M_RTABLE); 321 vnet_rtzone_destroy(); 322 #ifdef FIB_ALGO 323 vnet_fib_destroy(); 324 #endif 325 } 326 VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST, 327 rtables_destroy, 0); 328 #endif 329 330 static inline struct rib_head * 331 rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family) 332 { 333 struct rib_head **prnh; 334 335 KASSERT(table < V_rt_numfibs, 336 ("%s: table out of bounds (%d < %d)", __func__, table, 337 V_rt_numfibs)); 338 KASSERT(family < (AF_MAX + 1), 339 ("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1)); 340 341 /* rnh is [fib=0][af=0]. */ 342 prnh = V_rt_tables; 343 /* Get the offset to the requested table and fam. */ 344 prnh += table * (AF_MAX + 1) + family; 345 346 return (*prnh); 347 } 348 349 struct rib_head * 350 rt_tables_get_rnh(uint32_t table, sa_family_t family) 351 { 352 353 return (rt_tables_get_rnh_ptr(table, family)); 354 } 355 356 u_int 357 rt_tables_get_gen(uint32_t table, sa_family_t family) 358 { 359 struct rib_head *rnh; 360 361 rnh = rt_tables_get_rnh_ptr(table, family); 362 KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d", 363 __func__, table, family)); 364 return (rnh->rnh_gen); 365 } 366