1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2021-2022 Alexander V. Chernikov 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_route.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/malloc.h> 36 #include <sys/socket.h> 37 #include <sys/jail.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/rmlock.h> 41 42 #include <net/if.h> 43 #include <net/if_var.h> 44 #include <net/vnet.h> 45 #include <net/route.h> 46 #include <net/route/route_ctl.h> 47 #include <net/route/route_var.h> 48 #include <net/route/nhop.h> 49 #include <netinet/in.h> 50 #include <netinet6/scope6_var.h> 51 52 #include <vm/uma.h> 53 54 /* Routing table UMA zone */ 55 VNET_DEFINE_STATIC(uma_zone_t, rtzone); 56 #define V_rtzone VNET(rtzone) 57 58 void 59 vnet_rtzone_init(void) 60 { 61 62 V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), 63 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 64 } 65 66 #ifdef VIMAGE 67 void 68 vnet_rtzone_destroy(void) 69 { 70 71 uma_zdestroy(V_rtzone); 72 } 73 #endif 74 75 /* 76 * Creates rtentry and based on @dst/@netmask data. 77 * Return 0 and fills in rtentry into @prt on success, 78 * Note: rtentry mask ptr will be set to @netmask , thus its pointer is required 79 * to be stable till the end of the operation (radix rt insertion/change/removal). 80 */ 81 struct rtentry * 82 rt_alloc(struct rib_head *rnh, const struct sockaddr *dst, 83 struct sockaddr *netmask) 84 { 85 MPASS(dst->sa_len <= sizeof(((struct rtentry *)NULL)->rt_dstb)); 86 87 struct rtentry *rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 88 if (rt == NULL) 89 return (NULL); 90 rt->rte_flags = RTF_UP | (netmask == NULL ? RTF_HOST : 0); 91 92 /* Fill in dst, ensuring it's masked if needed. */ 93 if (netmask != NULL) { 94 rt_maskedcopy(dst, &rt->rt_dst, netmask); 95 } else 96 bcopy(dst, &rt->rt_dst, dst->sa_len); 97 rt_key(rt) = &rt->rt_dst; 98 /* Set netmask to the storage from info. It will be updated upon insertion */ 99 rt_mask(rt) = netmask; 100 101 return (rt); 102 } 103 104 static void 105 destroy_rtentry(struct rtentry *rt) 106 { 107 #ifdef VIMAGE 108 struct nhop_object *nh = rt->rt_nhop; 109 110 /* 111 * At this moment rnh, nh_control may be already freed. 112 * nhop interface may have been migrated to a different vnet. 113 * Use vnet stored in the nexthop to delete the entry. 114 */ 115 #ifdef ROUTE_MPATH 116 if (NH_IS_NHGRP(nh)) { 117 const struct weightened_nhop *wn; 118 uint32_t num_nhops; 119 wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops); 120 nh = wn[0].nh; 121 } 122 #endif 123 CURVNET_SET(nhop_get_vnet(nh)); 124 #endif 125 126 /* Unreference nexthop */ 127 nhop_free_any(rt->rt_nhop); 128 129 rt_free_immediate(rt); 130 131 CURVNET_RESTORE(); 132 } 133 134 /* 135 * Epoch callback indicating rtentry is safe to destroy 136 */ 137 static void 138 destroy_rtentry_epoch(epoch_context_t ctx) 139 { 140 struct rtentry *rt; 141 142 rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); 143 144 destroy_rtentry(rt); 145 } 146 147 /* 148 * Schedule rtentry deletion 149 */ 150 void 151 rt_free(struct rtentry *rt) 152 { 153 154 KASSERT(rt != NULL, ("%s: NULL rt", __func__)); 155 156 NET_EPOCH_CALL(destroy_rtentry_epoch, &rt->rt_epoch_ctx); 157 } 158 159 void 160 rt_free_immediate(struct rtentry *rt) 161 { 162 uma_zfree(V_rtzone, rt); 163 } 164 165 bool 166 rt_is_host(const struct rtentry *rt) 167 { 168 169 return (rt->rte_flags & RTF_HOST); 170 } 171 172 sa_family_t 173 rt_get_family(const struct rtentry *rt) 174 { 175 const struct sockaddr *dst; 176 177 dst = (const struct sockaddr *)rt_key_const(rt); 178 179 return (dst->sa_family); 180 } 181 182 /* 183 * Returns pointer to nexthop or nexthop group 184 * associated with @rt 185 */ 186 struct nhop_object * 187 rt_get_raw_nhop(const struct rtentry *rt) 188 { 189 190 return (rt->rt_nhop); 191 } 192 193 void 194 rt_get_rnd(const struct rtentry *rt, struct route_nhop_data *rnd) 195 { 196 rnd->rnd_nhop = rt->rt_nhop; 197 rnd->rnd_weight = rt->rt_weight; 198 } 199 200 /* 201 * If the process in in jail w/o VNET, export only host routes for the 202 * addresses assigned to the jail. 203 * Otherwise, allow exporting the entire table. 204 */ 205 bool 206 rt_is_exportable(const struct rtentry *rt, struct ucred *cred) 207 { 208 if (!rt_is_host(rt)) { 209 /* 210 * Performance optimisation: only host routes are allowed 211 * in the jail w/o vnet. 212 */ 213 if (jailed_without_vnet(cred)) 214 return (false); 215 } else { 216 if (prison_if(cred, rt_key_const(rt)) != 0) 217 return (false); 218 } 219 220 return (true); 221 } 222 223 #ifdef INET 224 /* 225 * Stores IPv4 address and prefix length of @rt inside 226 * @paddr and @plen. 227 * @pscopeid is currently always set to 0. 228 */ 229 void 230 rt_get_inet_prefix_plen(const struct rtentry *rt, struct in_addr *paddr, 231 int *plen, uint32_t *pscopeid) 232 { 233 const struct sockaddr_in *dst; 234 235 dst = (const struct sockaddr_in *)rt_key_const(rt); 236 KASSERT((dst->sin_family == AF_INET), 237 ("rt family is %d, not inet", dst->sin_family)); 238 *paddr = dst->sin_addr; 239 dst = (const struct sockaddr_in *)rt_mask_const(rt); 240 if (dst == NULL) 241 *plen = 32; 242 else 243 *plen = bitcount32(dst->sin_addr.s_addr); 244 *pscopeid = 0; 245 } 246 247 /* 248 * Stores IPv4 address and prefix mask of @rt inside 249 * @paddr and @pmask. Sets mask to INADDR_ANY for host routes. 250 * @pscopeid is currently always set to 0. 251 */ 252 void 253 rt_get_inet_prefix_pmask(const struct rtentry *rt, struct in_addr *paddr, 254 struct in_addr *pmask, uint32_t *pscopeid) 255 { 256 const struct sockaddr_in *dst; 257 258 dst = (const struct sockaddr_in *)rt_key_const(rt); 259 KASSERT((dst->sin_family == AF_INET), 260 ("rt family is %d, not inet", dst->sin_family)); 261 *paddr = dst->sin_addr; 262 dst = (const struct sockaddr_in *)rt_mask_const(rt); 263 if (dst == NULL) 264 pmask->s_addr = INADDR_BROADCAST; 265 else 266 *pmask = dst->sin_addr; 267 *pscopeid = 0; 268 } 269 #endif 270 271 #ifdef INET6 272 static int 273 inet6_get_plen(const struct in6_addr *addr) 274 { 275 276 return (bitcount32(addr->s6_addr32[0]) + bitcount32(addr->s6_addr32[1]) + 277 bitcount32(addr->s6_addr32[2]) + bitcount32(addr->s6_addr32[3])); 278 } 279 280 /* 281 * Stores IPv6 address and prefix length of @rt inside 282 * @paddr and @plen. Addresses are returned in de-embedded form. 283 * Scopeid is set to 0 for non-LL addresses. 284 */ 285 void 286 rt_get_inet6_prefix_plen(const struct rtentry *rt, struct in6_addr *paddr, 287 int *plen, uint32_t *pscopeid) 288 { 289 const struct sockaddr_in6 *dst; 290 291 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 292 KASSERT((dst->sin6_family == AF_INET6), 293 ("rt family is %d, not inet6", dst->sin6_family)); 294 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 295 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 296 else 297 *paddr = dst->sin6_addr; 298 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 299 if (dst == NULL) 300 *plen = 128; 301 else 302 *plen = inet6_get_plen(&dst->sin6_addr); 303 } 304 305 /* 306 * Stores IPv6 address and prefix mask of @rt inside 307 * @paddr and @pmask. Addresses are returned in de-embedded form. 308 * Scopeid is set to 0 for non-LL addresses. 309 */ 310 void 311 rt_get_inet6_prefix_pmask(const struct rtentry *rt, struct in6_addr *paddr, 312 struct in6_addr *pmask, uint32_t *pscopeid) 313 { 314 const struct sockaddr_in6 *dst; 315 316 dst = (const struct sockaddr_in6 *)rt_key_const(rt); 317 KASSERT((dst->sin6_family == AF_INET6), 318 ("rt family is %d, not inet", dst->sin6_family)); 319 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) 320 in6_splitscope(&dst->sin6_addr, paddr, pscopeid); 321 else 322 *paddr = dst->sin6_addr; 323 dst = (const struct sockaddr_in6 *)rt_mask_const(rt); 324 if (dst == NULL) 325 memset(pmask, 0xFF, sizeof(struct in6_addr)); 326 else 327 *pmask = dst->sin6_addr; 328 } 329 #endif 330 331 332