1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1980, 1986, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 2942e9e16dSRuslan Ermilov * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 30c3aac50fSPeter Wemm * $FreeBSD$ 31df8bae1dSRodney W. Grimes */ 328b07e49aSJulian Elischer /************************************************************************ 338b07e49aSJulian Elischer * Note: In this file a 'fib' is a "forwarding information base" * 348b07e49aSJulian Elischer * Which is the new name for an in kernel routing (next hop) table. * 358b07e49aSJulian Elischer ***********************************************************************/ 36df8bae1dSRodney W. Grimes 371d5e9e22SEivind Eklund #include "opt_inet.h" 38096f2786SBjoern A. Zeeb #include "opt_inet6.h" 398b07e49aSJulian Elischer #include "opt_route.h" 404bd49128SPeter Wemm #include "opt_mrouting.h" 41e440aed9SQing Li #include "opt_mpath.h" 424bd49128SPeter Wemm 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 456e6b3f7cSQing Li #include <sys/syslog.h> 464d1d4912SBruce Evans #include <sys/malloc.h> 47df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 48df8bae1dSRodney W. Grimes #include <sys/socket.h> 498b07e49aSJulian Elischer #include <sys/sysctl.h> 503120b9d4SKip Macy #include <sys/syslog.h> 518b07e49aSJulian Elischer #include <sys/sysproto.h> 528b07e49aSJulian Elischer #include <sys/proc.h> 53df8bae1dSRodney W. Grimes #include <sys/domain.h> 54cb64988fSLuoqi Chen #include <sys/kernel.h> 55df8bae1dSRodney W. Grimes 56df8bae1dSRodney W. Grimes #include <net/if.h> 5776039bc8SGleb Smirnoff #include <net/if_var.h> 586e6b3f7cSQing Li #include <net/if_dl.h> 59df8bae1dSRodney W. Grimes #include <net/route.h> 60530c0060SRobert Watson #include <net/vnet.h> 61e5c610d6SQing Li #include <net/flowtable.h> 62df8bae1dSRodney W. Grimes 63e440aed9SQing Li #ifdef RADIX_MPATH 64e440aed9SQing Li #include <net/radix_mpath.h> 65e440aed9SQing Li #endif 66e440aed9SQing Li 67df8bae1dSRodney W. Grimes #include <netinet/in.h> 68b5e8ce9fSBruce Evans #include <netinet/ip_mroute.h> 69df8bae1dSRodney W. Grimes 702dc1d581SAndre Oppermann #include <vm/uma.h> 712dc1d581SAndre Oppermann 724871fc4aSJulian Elischer #define RT_MAXFIBS UINT16_MAX 73bfca216eSBjoern A. Zeeb 74bfca216eSBjoern A. Zeeb /* Kernel config default option. */ 75bfca216eSBjoern A. Zeeb #ifdef ROUTETABLES 76bfca216eSBjoern A. Zeeb #if ROUTETABLES <= 0 77bfca216eSBjoern A. Zeeb #error "ROUTETABLES defined too low" 78bfca216eSBjoern A. Zeeb #endif 79bfca216eSBjoern A. Zeeb #if ROUTETABLES > RT_MAXFIBS 80bfca216eSBjoern A. Zeeb #error "ROUTETABLES defined too big" 81bfca216eSBjoern A. Zeeb #endif 82bfca216eSBjoern A. Zeeb #define RT_NUMFIBS ROUTETABLES 83bfca216eSBjoern A. Zeeb #endif /* ROUTETABLES */ 84bfca216eSBjoern A. Zeeb /* Initialize to default if not otherwise set. */ 85bfca216eSBjoern A. Zeeb #ifndef RT_NUMFIBS 86bfca216eSBjoern A. Zeeb #define RT_NUMFIBS 1 87bfca216eSBjoern A. Zeeb #endif 88bfca216eSBjoern A. Zeeb 894871fc4aSJulian Elischer /* This is read-only.. */ 908b07e49aSJulian Elischer u_int rt_numfibs = RT_NUMFIBS; 91f88910cdSMatthew D Fleming SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); 924871fc4aSJulian Elischer /* and this can be set too big but will be fixed before it is used */ 938b07e49aSJulian Elischer TUNABLE_INT("net.fibs", &rt_numfibs); 948b07e49aSJulian Elischer 9566e8505fSJulian Elischer /* 9666e8505fSJulian Elischer * By default add routes to all fibs for new interfaces. 9766e8505fSJulian Elischer * Once this is set to 0 then only allocate routes on interface 9866e8505fSJulian Elischer * changes for the FIB of the caller when adding a new set of addresses 9966e8505fSJulian Elischer * to an interface. XXX this is a shotgun aproach to a problem that needs 10066e8505fSJulian Elischer * a more fine grained solution.. that will come. 101a8498625SBjoern A. Zeeb * XXX also has the problems getting the FIB from curthread which will not 102a8498625SBjoern A. Zeeb * always work given the fib can be overridden and prefixes can be added 103a8498625SBjoern A. Zeeb * from the network stack context. 10466e8505fSJulian Elischer */ 10566e8505fSJulian Elischer u_int rt_add_addr_allfibs = 1; 106f88910cdSMatthew D Fleming SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, 10766e8505fSJulian Elischer &rt_add_addr_allfibs, 0, ""); 10866e8505fSJulian Elischer TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); 10966e8505fSJulian Elischer 110eddfbb76SRobert Watson VNET_DEFINE(struct rtstat, rtstat); 1111e77c105SRobert Watson #define V_rtstat VNET(rtstat) 112b58ea5f3SBjoern A. Zeeb 11382cea7e6SBjoern A. Zeeb VNET_DEFINE(struct radix_node_head *, rt_tables); 11482cea7e6SBjoern A. Zeeb #define V_rt_tables VNET(rt_tables) 11582cea7e6SBjoern A. Zeeb 11682cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ 11782cea7e6SBjoern A. Zeeb #define V_rttrash VNET(rttrash) 11882cea7e6SBjoern A. Zeeb 119bfe1aba4SMarko Zec 120d6941ce9SLuigi Rizzo /* compare two sockaddr structures */ 121d6941ce9SLuigi Rizzo #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) 122d6941ce9SLuigi Rizzo 123d6941ce9SLuigi Rizzo /* 124d6941ce9SLuigi Rizzo * Convert a 'struct radix_node *' to a 'struct rtentry *'. 125d6941ce9SLuigi Rizzo * The operation can be done safely (in this code) because a 126d6941ce9SLuigi Rizzo * 'struct rtentry' starts with two 'struct radix_node''s, the first 127d6941ce9SLuigi Rizzo * one representing leaf nodes in the routing tree, which is 128d6941ce9SLuigi Rizzo * what the code in radix.c passes us as a 'struct radix_node'. 129d6941ce9SLuigi Rizzo * 130d6941ce9SLuigi Rizzo * But because there are a lot of assumptions in this conversion, 131d6941ce9SLuigi Rizzo * do not cast explicitly, but always use the macro below. 132d6941ce9SLuigi Rizzo */ 133d6941ce9SLuigi Rizzo #define RNTORT(p) ((struct rtentry *)(p)) 134d6941ce9SLuigi Rizzo 1353e288e62SDimitry Andric static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */ 13682cea7e6SBjoern A. Zeeb #define V_rtzone VNET(rtzone) 13782cea7e6SBjoern A. Zeeb 1388b07e49aSJulian Elischer /* 1398b07e49aSJulian Elischer * handler for net.my_fibnum 1408b07e49aSJulian Elischer */ 1418b07e49aSJulian Elischer static int 1428b07e49aSJulian Elischer sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 143df8bae1dSRodney W. Grimes { 1448b07e49aSJulian Elischer int fibnum; 1458b07e49aSJulian Elischer int error; 1468b07e49aSJulian Elischer 1478b07e49aSJulian Elischer fibnum = curthread->td_proc->p_fibnum; 1488b07e49aSJulian Elischer error = sysctl_handle_int(oidp, &fibnum, 0, req); 1498b07e49aSJulian Elischer return (error); 150df8bae1dSRodney W. Grimes } 151df8bae1dSRodney W. Grimes 1528b07e49aSJulian Elischer SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, 1538b07e49aSJulian Elischer NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); 1542dc1d581SAndre Oppermann 155c2c2a7c1SBjoern A. Zeeb static __inline struct radix_node_head ** 156c2c2a7c1SBjoern A. Zeeb rt_tables_get_rnh_ptr(int table, int fam) 157c2c2a7c1SBjoern A. Zeeb { 158c2c2a7c1SBjoern A. Zeeb struct radix_node_head **rnh; 159c2c2a7c1SBjoern A. Zeeb 160c2c2a7c1SBjoern A. Zeeb KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", 161c2c2a7c1SBjoern A. Zeeb __func__)); 162c2c2a7c1SBjoern A. Zeeb KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", 163c2c2a7c1SBjoern A. Zeeb __func__)); 164c2c2a7c1SBjoern A. Zeeb 165c2c2a7c1SBjoern A. Zeeb /* rnh is [fib=0][af=0]. */ 166c2c2a7c1SBjoern A. Zeeb rnh = (struct radix_node_head **)V_rt_tables; 167c2c2a7c1SBjoern A. Zeeb /* Get the offset to the requested table and fam. */ 168c2c2a7c1SBjoern A. Zeeb rnh += table * (AF_MAX+1) + fam; 169c2c2a7c1SBjoern A. Zeeb 170c2c2a7c1SBjoern A. Zeeb return (rnh); 171c2c2a7c1SBjoern A. Zeeb } 172c2c2a7c1SBjoern A. Zeeb 173c2c2a7c1SBjoern A. Zeeb struct radix_node_head * 174c2c2a7c1SBjoern A. Zeeb rt_tables_get_rnh(int table, int fam) 175c2c2a7c1SBjoern A. Zeeb { 176c2c2a7c1SBjoern A. Zeeb 177c2c2a7c1SBjoern A. Zeeb return (*rt_tables_get_rnh_ptr(table, fam)); 178c2c2a7c1SBjoern A. Zeeb } 179c2c2a7c1SBjoern A. Zeeb 180d0728d71SRobert Watson /* 181d0728d71SRobert Watson * route initialization must occur before ip6_init2(), which happenas at 182d0728d71SRobert Watson * SI_ORDER_MIDDLE. 183d0728d71SRobert Watson */ 1842eb5613fSLuigi Rizzo static void 1852eb5613fSLuigi Rizzo route_init(void) 186df8bae1dSRodney W. Grimes { 1878b07e49aSJulian Elischer 1886f95a5ebSJulian Elischer /* whack the tunable ints into line. */ 1898b07e49aSJulian Elischer if (rt_numfibs > RT_MAXFIBS) 1908b07e49aSJulian Elischer rt_numfibs = RT_MAXFIBS; 1918b07e49aSJulian Elischer if (rt_numfibs == 0) 1928b07e49aSJulian Elischer rt_numfibs = 1; 1931ed81b73SMarko Zec } 194d0728d71SRobert Watson SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); 1951ed81b73SMarko Zec 196d0728d71SRobert Watson static void 197d0728d71SRobert Watson vnet_route_init(const void *unused __unused) 1981ed81b73SMarko Zec { 1991ed81b73SMarko Zec struct domain *dom; 200c2c2a7c1SBjoern A. Zeeb struct radix_node_head **rnh; 201c2c2a7c1SBjoern A. Zeeb int table; 2021ed81b73SMarko Zec int fam; 2031ed81b73SMarko Zec 204c2c2a7c1SBjoern A. Zeeb V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * 205c2c2a7c1SBjoern A. Zeeb sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); 206c2c2a7c1SBjoern A. Zeeb 2071ed81b73SMarko Zec V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, 2081ed81b73SMarko Zec NULL, NULL, UMA_ALIGN_PTR, 0); 2098b07e49aSJulian Elischer for (dom = domains; dom; dom = dom->dom_next) { 210b680a383SBjoern A. Zeeb if (dom->dom_rtattach == NULL) 211b680a383SBjoern A. Zeeb continue; 212b680a383SBjoern A. Zeeb 2138b07e49aSJulian Elischer for (table = 0; table < rt_numfibs; table++) { 214b680a383SBjoern A. Zeeb fam = dom->dom_family; 215b680a383SBjoern A. Zeeb if (table != 0 && fam != AF_INET6 && fam != AF_INET) 216b680a383SBjoern A. Zeeb break; 217b680a383SBjoern A. Zeeb 218b680a383SBjoern A. Zeeb /* 219b680a383SBjoern A. Zeeb * XXX MRT rtattach will be also called from 220b680a383SBjoern A. Zeeb * vfs_export.c but the offset will be 0 (only for 221b680a383SBjoern A. Zeeb * AF_INET and AF_INET6 which don't need it anyhow). 2228b07e49aSJulian Elischer */ 223c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh_ptr(table, fam); 224c2c2a7c1SBjoern A. Zeeb if (rnh == NULL) 225c2c2a7c1SBjoern A. Zeeb panic("%s: rnh NULL", __func__); 226b680a383SBjoern A. Zeeb dom->dom_rtattach((void **)rnh, dom->dom_rtoffset); 2278b07e49aSJulian Elischer } 2288b07e49aSJulian Elischer } 2298b07e49aSJulian Elischer } 230d0728d71SRobert Watson VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 231d0728d71SRobert Watson vnet_route_init, 0); 2328b07e49aSJulian Elischer 233bc29160dSMarko Zec #ifdef VIMAGE 234d0728d71SRobert Watson static void 235d0728d71SRobert Watson vnet_route_uninit(const void *unused __unused) 236bc29160dSMarko Zec { 237bc29160dSMarko Zec int table; 238bc29160dSMarko Zec int fam; 239bc29160dSMarko Zec struct domain *dom; 240bc29160dSMarko Zec struct radix_node_head **rnh; 241bc29160dSMarko Zec 242bc29160dSMarko Zec for (dom = domains; dom; dom = dom->dom_next) { 243b680a383SBjoern A. Zeeb if (dom->dom_rtdetach == NULL) 244b680a383SBjoern A. Zeeb continue; 245b680a383SBjoern A. Zeeb 246bc29160dSMarko Zec for (table = 0; table < rt_numfibs; table++) { 247b680a383SBjoern A. Zeeb fam = dom->dom_family; 248b680a383SBjoern A. Zeeb 249b680a383SBjoern A. Zeeb if (table != 0 && fam != AF_INET6 && fam != AF_INET) 250b680a383SBjoern A. Zeeb break; 251b680a383SBjoern A. Zeeb 252bc29160dSMarko Zec rnh = rt_tables_get_rnh_ptr(table, fam); 253bc29160dSMarko Zec if (rnh == NULL) 254bc29160dSMarko Zec panic("%s: rnh NULL", __func__); 255b680a383SBjoern A. Zeeb dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset); 256bc29160dSMarko Zec } 257bc29160dSMarko Zec } 2586274ce3eSCraig Rodrigues 2596274ce3eSCraig Rodrigues free(V_rt_tables, M_RTABLE); 2606274ce3eSCraig Rodrigues uma_zdestroy(V_rtzone); 261bc29160dSMarko Zec } 262d0728d71SRobert Watson VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, 263d0728d71SRobert Watson vnet_route_uninit, 0); 264bc29160dSMarko Zec #endif 265bc29160dSMarko Zec 2668b07e49aSJulian Elischer #ifndef _SYS_SYSPROTO_H_ 2678b07e49aSJulian Elischer struct setfib_args { 2688b07e49aSJulian Elischer int fibnum; 2698b07e49aSJulian Elischer }; 2708b07e49aSJulian Elischer #endif 2718b07e49aSJulian Elischer int 2728451d0ddSKip Macy sys_setfib(struct thread *td, struct setfib_args *uap) 2738b07e49aSJulian Elischer { 2748b07e49aSJulian Elischer if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) 2758b07e49aSJulian Elischer return EINVAL; 2768b07e49aSJulian Elischer td->td_proc->p_fibnum = uap->fibnum; 2778b07e49aSJulian Elischer return (0); 278df8bae1dSRodney W. Grimes } 279df8bae1dSRodney W. Grimes 280df8bae1dSRodney W. Grimes /* 281df8bae1dSRodney W. Grimes * Packet routing routines. 282df8bae1dSRodney W. Grimes */ 283df8bae1dSRodney W. Grimes void 284d1dd20beSSam Leffler rtalloc(struct route *ro) 285df8bae1dSRodney W. Grimes { 286a8498625SBjoern A. Zeeb 287a8498625SBjoern A. Zeeb rtalloc_ign_fib(ro, 0UL, RT_DEFAULT_FIB); 2888b07e49aSJulian Elischer } 2898b07e49aSJulian Elischer 2908b07e49aSJulian Elischer void 2918b07e49aSJulian Elischer rtalloc_fib(struct route *ro, u_int fibnum) 2928b07e49aSJulian Elischer { 2938b07e49aSJulian Elischer rtalloc_ign_fib(ro, 0UL, fibnum); 294df8bae1dSRodney W. Grimes } 295df8bae1dSRodney W. Grimes 296652082e6SGarrett Wollman void 297d1dd20beSSam Leffler rtalloc_ign(struct route *ro, u_long ignore) 298652082e6SGarrett Wollman { 29968f956b8SJohn Polstra struct rtentry *rt; 30068f956b8SJohn Polstra 30168f956b8SJohn Polstra if ((rt = ro->ro_rt) != NULL) { 30268f956b8SJohn Polstra if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 30368f956b8SJohn Polstra return; 30468f956b8SJohn Polstra RTFREE(rt); 30566810dd0SYoshinobu Inoue ro->ro_rt = NULL; 30668f956b8SJohn Polstra } 307a8498625SBjoern A. Zeeb ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, RT_DEFAULT_FIB); 3088b07e49aSJulian Elischer if (ro->ro_rt) 3098b07e49aSJulian Elischer RT_UNLOCK(ro->ro_rt); 3108b07e49aSJulian Elischer } 3118b07e49aSJulian Elischer 3128b07e49aSJulian Elischer void 3138b07e49aSJulian Elischer rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) 3148b07e49aSJulian Elischer { 3158b07e49aSJulian Elischer struct rtentry *rt; 3168b07e49aSJulian Elischer 3178b07e49aSJulian Elischer if ((rt = ro->ro_rt) != NULL) { 3188b07e49aSJulian Elischer if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 3198b07e49aSJulian Elischer return; 3208b07e49aSJulian Elischer RTFREE(rt); 3218b07e49aSJulian Elischer ro->ro_rt = NULL; 3228b07e49aSJulian Elischer } 3238b07e49aSJulian Elischer ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); 324d1dd20beSSam Leffler if (ro->ro_rt) 325d1dd20beSSam Leffler RT_UNLOCK(ro->ro_rt); 326652082e6SGarrett Wollman } 327652082e6SGarrett Wollman 328b0a76b88SJulian Elischer /* 329b0a76b88SJulian Elischer * Look up the route that matches the address given 330b0a76b88SJulian Elischer * Or, at least try.. Create a cloned route if needed. 331d1dd20beSSam Leffler * 332d1dd20beSSam Leffler * The returned route, if any, is locked. 333b0a76b88SJulian Elischer */ 334df8bae1dSRodney W. Grimes struct rtentry * 335d1dd20beSSam Leffler rtalloc1(struct sockaddr *dst, int report, u_long ignflags) 336df8bae1dSRodney W. Grimes { 337a8498625SBjoern A. Zeeb 338a8498625SBjoern A. Zeeb return (rtalloc1_fib(dst, report, ignflags, RT_DEFAULT_FIB)); 3398b07e49aSJulian Elischer } 3408b07e49aSJulian Elischer 3418b07e49aSJulian Elischer struct rtentry * 3428b07e49aSJulian Elischer rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, 3438b07e49aSJulian Elischer u_int fibnum) 3448b07e49aSJulian Elischer { 3458b07e49aSJulian Elischer struct radix_node_head *rnh; 346d1dd20beSSam Leffler struct radix_node *rn; 347d1dd20beSSam Leffler struct rtentry *newrt; 348df8bae1dSRodney W. Grimes struct rt_addrinfo info; 3496e6b3f7cSQing Li int err = 0, msgtype = RTM_MISS; 3503120b9d4SKip Macy int needlock; 351df8bae1dSRodney W. Grimes 3528b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); 353b680a383SBjoern A. Zeeb switch (dst->sa_family) { 354b680a383SBjoern A. Zeeb case AF_INET6: 355b680a383SBjoern A. Zeeb case AF_INET: 356b680a383SBjoern A. Zeeb /* We support multiple FIBs. */ 357b680a383SBjoern A. Zeeb break; 358b680a383SBjoern A. Zeeb default: 359b680a383SBjoern A. Zeeb fibnum = RT_DEFAULT_FIB; 360b680a383SBjoern A. Zeeb break; 361b680a383SBjoern A. Zeeb } 362c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 363e579f1c1SDmitry Chagin newrt = NULL; 364523e6002SDmitry Chagin if (rnh == NULL) 365523e6002SDmitry Chagin goto miss; 366523e6002SDmitry Chagin 367b0a76b88SJulian Elischer /* 368b0a76b88SJulian Elischer * Look up the address in the table for that Address Family 369b0a76b88SJulian Elischer */ 3703120b9d4SKip Macy needlock = !(ignflags & RTF_RNH_LOCKED); 3713120b9d4SKip Macy if (needlock) 3723120b9d4SKip Macy RADIX_NODE_HEAD_RLOCK(rnh); 3733120b9d4SKip Macy #ifdef INVARIANTS 3743120b9d4SKip Macy else 3753120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 3763120b9d4SKip Macy #endif 3773120b9d4SKip Macy rn = rnh->rnh_matchaddr(dst, rnh); 3783120b9d4SKip Macy if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 3796a873ef7SDmitry Chagin newrt = RNTORT(rn); 3803120b9d4SKip Macy RT_LOCK(newrt); 3813120b9d4SKip Macy RT_ADDREF(newrt); 3823120b9d4SKip Macy if (needlock) 3833120b9d4SKip Macy RADIX_NODE_HEAD_RUNLOCK(rnh); 3843120b9d4SKip Macy goto done; 3856e6b3f7cSQing Li 3866e6b3f7cSQing Li } else if (needlock) 3873120b9d4SKip Macy RADIX_NODE_HEAD_RUNLOCK(rnh); 3883120b9d4SKip Macy 389b0a76b88SJulian Elischer /* 390b0a76b88SJulian Elischer * Either we hit the root or couldn't find any match, 391b0a76b88SJulian Elischer * Which basically means 392b0a76b88SJulian Elischer * "caint get there frm here" 393b0a76b88SJulian Elischer */ 394956b0b65SJeffrey Hsu miss: 395523e6002SDmitry Chagin V_rtstat.rts_unreach++; 396523e6002SDmitry Chagin 3976e6b3f7cSQing Li if (report) { 398b0a76b88SJulian Elischer /* 399b0a76b88SJulian Elischer * If required, report the failure to the supervising 400b0a76b88SJulian Elischer * Authorities. 401b0a76b88SJulian Elischer * For a delete, this is not an error. (report == 0) 402b0a76b88SJulian Elischer */ 4036f5967c0SBruce Evans bzero(&info, sizeof(info)); 404df8bae1dSRodney W. Grimes info.rti_info[RTAX_DST] = dst; 405528737fdSBjoern A. Zeeb rt_missmsg_fib(msgtype, &info, 0, err, fibnum); 406df8bae1dSRodney W. Grimes } 4073120b9d4SKip Macy done: 408d1dd20beSSam Leffler if (newrt) 409d1dd20beSSam Leffler RT_LOCK_ASSERT(newrt); 410df8bae1dSRodney W. Grimes return (newrt); 411df8bae1dSRodney W. Grimes } 412df8bae1dSRodney W. Grimes 413499676dfSJulian Elischer /* 414499676dfSJulian Elischer * Remove a reference count from an rtentry. 415499676dfSJulian Elischer * If the count gets low enough, take it out of the routing table 416499676dfSJulian Elischer */ 417df8bae1dSRodney W. Grimes void 418d1dd20beSSam Leffler rtfree(struct rtentry *rt) 419df8bae1dSRodney W. Grimes { 42085911824SLuigi Rizzo struct radix_node_head *rnh; 421df8bae1dSRodney W. Grimes 422a0c0e34bSGleb Smirnoff KASSERT(rt != NULL,("%s: NULL rt", __func__)); 423c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 424a0c0e34bSGleb Smirnoff KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); 425499676dfSJulian Elischer 426d1dd20beSSam Leffler RT_LOCK_ASSERT(rt); 427d1dd20beSSam Leffler 428499676dfSJulian Elischer /* 429a0c0e34bSGleb Smirnoff * The callers should use RTFREE_LOCKED() or RTFREE(), so 430a0c0e34bSGleb Smirnoff * we should come here exactly with the last reference. 431499676dfSJulian Elischer */ 4327138d65cSSam Leffler RT_REMREF(rt); 433a0c0e34bSGleb Smirnoff if (rt->rt_refcnt > 0) { 434a42ea597SQing Li log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); 435d1dd20beSSam Leffler goto done; 436a0c0e34bSGleb Smirnoff } 4379c63e9dbSSam Leffler 4389c63e9dbSSam Leffler /* 4399c63e9dbSSam Leffler * On last reference give the "close method" a chance 4409c63e9dbSSam Leffler * to cleanup private state. This also permits (for 4419c63e9dbSSam Leffler * IPv4 and IPv6) a chance to decide if the routing table 4429c63e9dbSSam Leffler * entry should be purged immediately or at a later time. 4439c63e9dbSSam Leffler * When an immediate purge is to happen the close routine 4449c63e9dbSSam Leffler * typically calls rtexpunge which clears the RTF_UP flag 4459c63e9dbSSam Leffler * on the entry so that the code below reclaims the storage. 4469c63e9dbSSam Leffler */ 447d1dd20beSSam Leffler if (rt->rt_refcnt == 0 && rnh->rnh_close) 4485c2dae8eSGarrett Wollman rnh->rnh_close((struct radix_node *)rt, rnh); 449499676dfSJulian Elischer 450499676dfSJulian Elischer /* 451499676dfSJulian Elischer * If we are no longer "up" (and ref == 0) 452499676dfSJulian Elischer * then we can free the resources associated 453499676dfSJulian Elischer * with the route. 454499676dfSJulian Elischer */ 455d1dd20beSSam Leffler if ((rt->rt_flags & RTF_UP) == 0) { 456df8bae1dSRodney W. Grimes if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 457df8bae1dSRodney W. Grimes panic("rtfree 2"); 458499676dfSJulian Elischer /* 459499676dfSJulian Elischer * the rtentry must have been removed from the routing table 460499676dfSJulian Elischer * so it is represented in rttrash.. remove that now. 461499676dfSJulian Elischer */ 462603724d3SBjoern A. Zeeb V_rttrash--; 463499676dfSJulian Elischer #ifdef DIAGNOSTIC 464df8bae1dSRodney W. Grimes if (rt->rt_refcnt < 0) { 465623ae52eSPoul-Henning Kamp printf("rtfree: %p not freed (neg refs)\n", rt); 466d1dd20beSSam Leffler goto done; 467df8bae1dSRodney W. Grimes } 468499676dfSJulian Elischer #endif 469499676dfSJulian Elischer /* 470499676dfSJulian Elischer * release references on items we hold them on.. 471499676dfSJulian Elischer * e.g other routes and ifaddrs. 472499676dfSJulian Elischer */ 47319fc74fbSJeffrey Hsu if (rt->rt_ifa) 4741099f828SRobert Watson ifa_free(rt->rt_ifa); 475499676dfSJulian Elischer /* 476499676dfSJulian Elischer * The key is separatly alloc'd so free it (see rt_setgate()). 477499676dfSJulian Elischer * This also frees the gateway, as they are always malloc'd 478499676dfSJulian Elischer * together. 479499676dfSJulian Elischer */ 480df8bae1dSRodney W. Grimes Free(rt_key(rt)); 481499676dfSJulian Elischer 482499676dfSJulian Elischer /* 483499676dfSJulian Elischer * and the rtentry itself of course 484499676dfSJulian Elischer */ 485d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 4861ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 487d1dd20beSSam Leffler return; 488df8bae1dSRodney W. Grimes } 489d1dd20beSSam Leffler done: 490d1dd20beSSam Leffler RT_UNLOCK(rt); 491df8bae1dSRodney W. Grimes } 492df8bae1dSRodney W. Grimes 493df8bae1dSRodney W. Grimes 494df8bae1dSRodney W. Grimes /* 495df8bae1dSRodney W. Grimes * Force a routing table entry to the specified 496df8bae1dSRodney W. Grimes * destination to go through the given gateway. 497df8bae1dSRodney W. Grimes * Normally called as a result of a routing redirect 498df8bae1dSRodney W. Grimes * message from the network layer. 499df8bae1dSRodney W. Grimes */ 50026f9a767SRodney W. Grimes void 501d1dd20beSSam Leffler rtredirect(struct sockaddr *dst, 502d1dd20beSSam Leffler struct sockaddr *gateway, 503d1dd20beSSam Leffler struct sockaddr *netmask, 504d1dd20beSSam Leffler int flags, 505d1dd20beSSam Leffler struct sockaddr *src) 506df8bae1dSRodney W. Grimes { 507a8498625SBjoern A. Zeeb 508a8498625SBjoern A. Zeeb rtredirect_fib(dst, gateway, netmask, flags, src, RT_DEFAULT_FIB); 5098b07e49aSJulian Elischer } 5108b07e49aSJulian Elischer 5118b07e49aSJulian Elischer void 5128b07e49aSJulian Elischer rtredirect_fib(struct sockaddr *dst, 5138b07e49aSJulian Elischer struct sockaddr *gateway, 5148b07e49aSJulian Elischer struct sockaddr *netmask, 5158b07e49aSJulian Elischer int flags, 5168b07e49aSJulian Elischer struct sockaddr *src, 5178b07e49aSJulian Elischer u_int fibnum) 5188b07e49aSJulian Elischer { 5198e7e854cSKip Macy struct rtentry *rt, *rt0 = NULL; 520df8bae1dSRodney W. Grimes int error = 0; 52185911824SLuigi Rizzo short *stat = NULL; 522df8bae1dSRodney W. Grimes struct rt_addrinfo info; 523df8bae1dSRodney W. Grimes struct ifaddr *ifa; 524c2c2a7c1SBjoern A. Zeeb struct radix_node_head *rnh; 525c2c2a7c1SBjoern A. Zeeb 5268c0fec80SRobert Watson ifa = NULL; 527c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 528c2c2a7c1SBjoern A. Zeeb if (rnh == NULL) { 529c2c2a7c1SBjoern A. Zeeb error = EAFNOSUPPORT; 530c2c2a7c1SBjoern A. Zeeb goto out; 531c2c2a7c1SBjoern A. Zeeb } 532df8bae1dSRodney W. Grimes 533df8bae1dSRodney W. Grimes /* verify the gateway is directly reachable */ 5340ed6142bSQing Li if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) { 535df8bae1dSRodney W. Grimes error = ENETUNREACH; 536df8bae1dSRodney W. Grimes goto out; 537df8bae1dSRodney W. Grimes } 5388b07e49aSJulian Elischer rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ 539df8bae1dSRodney W. Grimes /* 540df8bae1dSRodney W. Grimes * If the redirect isn't from our current router for this dst, 541df8bae1dSRodney W. Grimes * it's either old or wrong. If it redirects us to ourselves, 542df8bae1dSRodney W. Grimes * we have a routing loop, perhaps as a result of an interface 543df8bae1dSRodney W. Grimes * going down recently. 544df8bae1dSRodney W. Grimes */ 545df8bae1dSRodney W. Grimes if (!(flags & RTF_DONE) && rt && 546956b0b65SJeffrey Hsu (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) 547df8bae1dSRodney W. Grimes error = EINVAL; 5488896f83aSRobert Watson else if (ifa_ifwithaddr_check(gateway)) 549df8bae1dSRodney W. Grimes error = EHOSTUNREACH; 550df8bae1dSRodney W. Grimes if (error) 551df8bae1dSRodney W. Grimes goto done; 552df8bae1dSRodney W. Grimes /* 553df8bae1dSRodney W. Grimes * Create a new entry if we just got back a wildcard entry 5546bccea7cSRebecca Cran * or the lookup failed. This is necessary for hosts 555df8bae1dSRodney W. Grimes * which use routing redirects generated by smart gateways 556df8bae1dSRodney W. Grimes * to dynamically build the routing tables. 557df8bae1dSRodney W. Grimes */ 55885911824SLuigi Rizzo if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 559df8bae1dSRodney W. Grimes goto create; 560df8bae1dSRodney W. Grimes /* 561df8bae1dSRodney W. Grimes * Don't listen to the redirect if it's 562df8bae1dSRodney W. Grimes * for a route to an interface. 563df8bae1dSRodney W. Grimes */ 564df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) { 565df8bae1dSRodney W. Grimes if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 566df8bae1dSRodney W. Grimes /* 567df8bae1dSRodney W. Grimes * Changing from route to net => route to host. 568df8bae1dSRodney W. Grimes * Create new route, rather than smashing route to net. 569df8bae1dSRodney W. Grimes */ 570df8bae1dSRodney W. Grimes create: 5718e7e854cSKip Macy rt0 = rt; 5728e7e854cSKip Macy rt = NULL; 5738e7e854cSKip Macy 574df8bae1dSRodney W. Grimes flags |= RTF_GATEWAY | RTF_DYNAMIC; 5758071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 5768071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 5778071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = gateway; 5788071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 5798071913dSRuslan Ermilov info.rti_ifa = ifa; 5808071913dSRuslan Ermilov info.rti_flags = flags; 5813120b9d4SKip Macy if (rt0 != NULL) 5823120b9d4SKip Macy RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ 5838b07e49aSJulian Elischer error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); 584d1dd20beSSam Leffler if (rt != NULL) { 5854de5d90cSSam Leffler RT_LOCK(rt); 5863120b9d4SKip Macy if (rt0 != NULL) 58729910a5aSKip Macy EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); 5888071913dSRuslan Ermilov flags = rt->rt_flags; 589d1dd20beSSam Leffler } 5903120b9d4SKip Macy if (rt0 != NULL) 5913120b9d4SKip Macy RTFREE(rt0); 5928e7e854cSKip Macy 593603724d3SBjoern A. Zeeb stat = &V_rtstat.rts_dynamic; 594df8bae1dSRodney W. Grimes } else { 5958e7e854cSKip Macy struct rtentry *gwrt; 5968e7e854cSKip Macy 597df8bae1dSRodney W. Grimes /* 598df8bae1dSRodney W. Grimes * Smash the current notion of the gateway to 599df8bae1dSRodney W. Grimes * this destination. Should check about netmask!!! 600df8bae1dSRodney W. Grimes */ 601df8bae1dSRodney W. Grimes rt->rt_flags |= RTF_MODIFIED; 602df8bae1dSRodney W. Grimes flags |= RTF_MODIFIED; 603603724d3SBjoern A. Zeeb stat = &V_rtstat.rts_newgateway; 604499676dfSJulian Elischer /* 605499676dfSJulian Elischer * add the key and gateway (in one malloc'd chunk). 606499676dfSJulian Elischer */ 6073120b9d4SKip Macy RT_UNLOCK(rt); 6083120b9d4SKip Macy RADIX_NODE_HEAD_LOCK(rnh); 6093120b9d4SKip Macy RT_LOCK(rt); 610df8bae1dSRodney W. Grimes rt_setgate(rt, rt_key(rt), gateway); 6113120b9d4SKip Macy gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); 6123120b9d4SKip Macy RADIX_NODE_HEAD_UNLOCK(rnh); 61329910a5aSKip Macy EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); 6148e7e854cSKip Macy RTFREE_LOCKED(gwrt); 615df8bae1dSRodney W. Grimes } 616df8bae1dSRodney W. Grimes } else 617df8bae1dSRodney W. Grimes error = EHOSTUNREACH; 618df8bae1dSRodney W. Grimes done: 619d1dd20beSSam Leffler if (rt) 6201951e633SJohn Baldwin RTFREE_LOCKED(rt); 621df8bae1dSRodney W. Grimes out: 622df8bae1dSRodney W. Grimes if (error) 623603724d3SBjoern A. Zeeb V_rtstat.rts_badredirect++; 624df8bae1dSRodney W. Grimes else if (stat != NULL) 625df8bae1dSRodney W. Grimes (*stat)++; 626df8bae1dSRodney W. Grimes bzero((caddr_t)&info, sizeof(info)); 627df8bae1dSRodney W. Grimes info.rti_info[RTAX_DST] = dst; 628df8bae1dSRodney W. Grimes info.rti_info[RTAX_GATEWAY] = gateway; 629df8bae1dSRodney W. Grimes info.rti_info[RTAX_NETMASK] = netmask; 630df8bae1dSRodney W. Grimes info.rti_info[RTAX_AUTHOR] = src; 631528737fdSBjoern A. Zeeb rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); 6328c0fec80SRobert Watson if (ifa != NULL) 6338c0fec80SRobert Watson ifa_free(ifa); 634df8bae1dSRodney W. Grimes } 635df8bae1dSRodney W. Grimes 6368b07e49aSJulian Elischer int 6378b07e49aSJulian Elischer rtioctl(u_long req, caddr_t data) 6388b07e49aSJulian Elischer { 639a8498625SBjoern A. Zeeb 640a8498625SBjoern A. Zeeb return (rtioctl_fib(req, data, RT_DEFAULT_FIB)); 6418b07e49aSJulian Elischer } 6428b07e49aSJulian Elischer 643df8bae1dSRodney W. Grimes /* 644df8bae1dSRodney W. Grimes * Routing table ioctl interface. 645df8bae1dSRodney W. Grimes */ 646df8bae1dSRodney W. Grimes int 6478b07e49aSJulian Elischer rtioctl_fib(u_long req, caddr_t data, u_int fibnum) 648df8bae1dSRodney W. Grimes { 6495090559bSChristian S.J. Peron 6505090559bSChristian S.J. Peron /* 6515090559bSChristian S.J. Peron * If more ioctl commands are added here, make sure the proper 6525090559bSChristian S.J. Peron * super-user checks are being performed because it is possible for 6535090559bSChristian S.J. Peron * prison-root to make it this far if raw sockets have been enabled 6545090559bSChristian S.J. Peron * in jails. 6555090559bSChristian S.J. Peron */ 656623ae52eSPoul-Henning Kamp #ifdef INET 657f0068c4aSGarrett Wollman /* Multicast goop, grrr... */ 6588b07e49aSJulian Elischer return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; 659623ae52eSPoul-Henning Kamp #else /* INET */ 660623ae52eSPoul-Henning Kamp return ENXIO; 661623ae52eSPoul-Henning Kamp #endif /* INET */ 662df8bae1dSRodney W. Grimes } 663df8bae1dSRodney W. Grimes 6648c0fec80SRobert Watson /* 6658c0fec80SRobert Watson * For both ifa_ifwithroute() routines, 'ifa' is returned referenced. 6668c0fec80SRobert Watson */ 667df8bae1dSRodney W. Grimes struct ifaddr * 668d1dd20beSSam Leffler ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) 669df8bae1dSRodney W. Grimes { 670a8498625SBjoern A. Zeeb 671a8498625SBjoern A. Zeeb return (ifa_ifwithroute_fib(flags, dst, gateway, RT_DEFAULT_FIB)); 6728b07e49aSJulian Elischer } 6738b07e49aSJulian Elischer 6748b07e49aSJulian Elischer struct ifaddr * 6758b07e49aSJulian Elischer ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, 6768b07e49aSJulian Elischer u_int fibnum) 6778b07e49aSJulian Elischer { 678df8bae1dSRodney W. Grimes register struct ifaddr *ifa; 679e034e82cSQing Li int not_found = 0; 680d1dd20beSSam Leffler 681df8bae1dSRodney W. Grimes if ((flags & RTF_GATEWAY) == 0) { 682df8bae1dSRodney W. Grimes /* 683df8bae1dSRodney W. Grimes * If we are adding a route to an interface, 684df8bae1dSRodney W. Grimes * and the interface is a pt to pt link 685df8bae1dSRodney W. Grimes * we should search for the destination 686df8bae1dSRodney W. Grimes * as our clue to the interface. Otherwise 687df8bae1dSRodney W. Grimes * we can use the local address. 688df8bae1dSRodney W. Grimes */ 68985911824SLuigi Rizzo ifa = NULL; 69085911824SLuigi Rizzo if (flags & RTF_HOST) 691df8bae1dSRodney W. Grimes ifa = ifa_ifwithdstaddr(dst); 69285911824SLuigi Rizzo if (ifa == NULL) 693df8bae1dSRodney W. Grimes ifa = ifa_ifwithaddr(gateway); 694df8bae1dSRodney W. Grimes } else { 695df8bae1dSRodney W. Grimes /* 696df8bae1dSRodney W. Grimes * If we are adding a route to a remote net 697df8bae1dSRodney W. Grimes * or host, the gateway may still be on the 698df8bae1dSRodney W. Grimes * other end of a pt to pt link. 699df8bae1dSRodney W. Grimes */ 700df8bae1dSRodney W. Grimes ifa = ifa_ifwithdstaddr(gateway); 701df8bae1dSRodney W. Grimes } 70285911824SLuigi Rizzo if (ifa == NULL) 7030ed6142bSQing Li ifa = ifa_ifwithnet(gateway, 0); 70485911824SLuigi Rizzo if (ifa == NULL) { 7059b20205dSKip Macy struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); 70685911824SLuigi Rizzo if (rt == NULL) 70785911824SLuigi Rizzo return (NULL); 708e034e82cSQing Li /* 709e034e82cSQing Li * dismiss a gateway that is reachable only 710e034e82cSQing Li * through the default router 711e034e82cSQing Li */ 712e034e82cSQing Li switch (gateway->sa_family) { 713e034e82cSQing Li case AF_INET: 714e034e82cSQing Li if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) 715e034e82cSQing Li not_found = 1; 716e034e82cSQing Li break; 717e034e82cSQing Li case AF_INET6: 718e034e82cSQing Li if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) 719e034e82cSQing Li not_found = 1; 720e034e82cSQing Li break; 721e034e82cSQing Li default: 722e034e82cSQing Li break; 723e034e82cSQing Li } 7248c0fec80SRobert Watson if (!not_found && rt->rt_ifa != NULL) { 7258c0fec80SRobert Watson ifa = rt->rt_ifa; 7268c0fec80SRobert Watson ifa_ref(ifa); 7278c0fec80SRobert Watson } 7287138d65cSSam Leffler RT_REMREF(rt); 729d1dd20beSSam Leffler RT_UNLOCK(rt); 7308c0fec80SRobert Watson if (not_found || ifa == NULL) 73185911824SLuigi Rizzo return (NULL); 732df8bae1dSRodney W. Grimes } 733df8bae1dSRodney W. Grimes if (ifa->ifa_addr->sa_family != dst->sa_family) { 734df8bae1dSRodney W. Grimes struct ifaddr *oifa = ifa; 735df8bae1dSRodney W. Grimes ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 73685911824SLuigi Rizzo if (ifa == NULL) 737df8bae1dSRodney W. Grimes ifa = oifa; 7388c0fec80SRobert Watson else 7398c0fec80SRobert Watson ifa_free(oifa); 740df8bae1dSRodney W. Grimes } 741df8bae1dSRodney W. Grimes return (ifa); 742df8bae1dSRodney W. Grimes } 743df8bae1dSRodney W. Grimes 744b0a76b88SJulian Elischer /* 745b0a76b88SJulian Elischer * Do appropriate manipulations of a routing tree given 746b0a76b88SJulian Elischer * all the bits of info needed 747b0a76b88SJulian Elischer */ 748df8bae1dSRodney W. Grimes int 749d1dd20beSSam Leffler rtrequest(int req, 750d1dd20beSSam Leffler struct sockaddr *dst, 751d1dd20beSSam Leffler struct sockaddr *gateway, 752d1dd20beSSam Leffler struct sockaddr *netmask, 753d1dd20beSSam Leffler int flags, 754d1dd20beSSam Leffler struct rtentry **ret_nrt) 755df8bae1dSRodney W. Grimes { 756a8498625SBjoern A. Zeeb 757a8498625SBjoern A. Zeeb return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 758a8498625SBjoern A. Zeeb RT_DEFAULT_FIB)); 7598b07e49aSJulian Elischer } 7608b07e49aSJulian Elischer 7618b07e49aSJulian Elischer int 7628b07e49aSJulian Elischer rtrequest_fib(int req, 7638b07e49aSJulian Elischer struct sockaddr *dst, 7648b07e49aSJulian Elischer struct sockaddr *gateway, 7658b07e49aSJulian Elischer struct sockaddr *netmask, 7668b07e49aSJulian Elischer int flags, 7678b07e49aSJulian Elischer struct rtentry **ret_nrt, 7688b07e49aSJulian Elischer u_int fibnum) 7698b07e49aSJulian Elischer { 7708071913dSRuslan Ermilov struct rt_addrinfo info; 7718071913dSRuslan Ermilov 772ac4a76ebSBjoern A. Zeeb if (dst->sa_len == 0) 773ac4a76ebSBjoern A. Zeeb return(EINVAL); 774ac4a76ebSBjoern A. Zeeb 7758071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 7768071913dSRuslan Ermilov info.rti_flags = flags; 7778071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 7788071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = gateway; 7798071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 7808b07e49aSJulian Elischer return rtrequest1_fib(req, &info, ret_nrt, fibnum); 7818071913dSRuslan Ermilov } 7828071913dSRuslan Ermilov 7838071913dSRuslan Ermilov /* 7848071913dSRuslan Ermilov * These (questionable) definitions of apparent local variables apply 7858071913dSRuslan Ermilov * to the next two functions. XXXXXX!!! 7868071913dSRuslan Ermilov */ 7878071913dSRuslan Ermilov #define dst info->rti_info[RTAX_DST] 7888071913dSRuslan Ermilov #define gateway info->rti_info[RTAX_GATEWAY] 7898071913dSRuslan Ermilov #define netmask info->rti_info[RTAX_NETMASK] 7908071913dSRuslan Ermilov #define ifaaddr info->rti_info[RTAX_IFA] 7918071913dSRuslan Ermilov #define ifpaddr info->rti_info[RTAX_IFP] 7928071913dSRuslan Ermilov #define flags info->rti_flags 7938071913dSRuslan Ermilov 7948071913dSRuslan Ermilov int 795d1dd20beSSam Leffler rt_getifa(struct rt_addrinfo *info) 7968071913dSRuslan Ermilov { 797a8498625SBjoern A. Zeeb 798a8498625SBjoern A. Zeeb return (rt_getifa_fib(info, RT_DEFAULT_FIB)); 7998b07e49aSJulian Elischer } 8008b07e49aSJulian Elischer 8018c0fec80SRobert Watson /* 8028c0fec80SRobert Watson * Look up rt_addrinfo for a specific fib. Note that if rti_ifa is defined, 8038c0fec80SRobert Watson * it will be referenced so the caller must free it. 8048c0fec80SRobert Watson */ 8058b07e49aSJulian Elischer int 8068b07e49aSJulian Elischer rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) 8078b07e49aSJulian Elischer { 8088071913dSRuslan Ermilov struct ifaddr *ifa; 8098071913dSRuslan Ermilov int error = 0; 8108071913dSRuslan Ermilov 8118071913dSRuslan Ermilov /* 8128071913dSRuslan Ermilov * ifp may be specified by sockaddr_dl 8138071913dSRuslan Ermilov * when protocol address is ambiguous. 8148071913dSRuslan Ermilov */ 8158071913dSRuslan Ermilov if (info->rti_ifp == NULL && ifpaddr != NULL && 8168071913dSRuslan Ermilov ifpaddr->sa_family == AF_LINK && 8170ed6142bSQing Li (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) { 8188071913dSRuslan Ermilov info->rti_ifp = ifa->ifa_ifp; 8198c0fec80SRobert Watson ifa_free(ifa); 8208c0fec80SRobert Watson } 8218071913dSRuslan Ermilov if (info->rti_ifa == NULL && ifaaddr != NULL) 8228071913dSRuslan Ermilov info->rti_ifa = ifa_ifwithaddr(ifaaddr); 8238071913dSRuslan Ermilov if (info->rti_ifa == NULL) { 8248071913dSRuslan Ermilov struct sockaddr *sa; 8258071913dSRuslan Ermilov 8268071913dSRuslan Ermilov sa = ifaaddr != NULL ? ifaaddr : 8278071913dSRuslan Ermilov (gateway != NULL ? gateway : dst); 8288071913dSRuslan Ermilov if (sa != NULL && info->rti_ifp != NULL) 8298071913dSRuslan Ermilov info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); 8308071913dSRuslan Ermilov else if (dst != NULL && gateway != NULL) 8318b07e49aSJulian Elischer info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, 8328b07e49aSJulian Elischer fibnum); 8338071913dSRuslan Ermilov else if (sa != NULL) 8348b07e49aSJulian Elischer info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, 8358b07e49aSJulian Elischer fibnum); 8368071913dSRuslan Ermilov } 8378071913dSRuslan Ermilov if ((ifa = info->rti_ifa) != NULL) { 8388071913dSRuslan Ermilov if (info->rti_ifp == NULL) 8398071913dSRuslan Ermilov info->rti_ifp = ifa->ifa_ifp; 8408071913dSRuslan Ermilov } else 8418071913dSRuslan Ermilov error = ENETUNREACH; 8428071913dSRuslan Ermilov return (error); 8438071913dSRuslan Ermilov } 8448071913dSRuslan Ermilov 8459c63e9dbSSam Leffler /* 8469c63e9dbSSam Leffler * Expunges references to a route that's about to be reclaimed. 8479c63e9dbSSam Leffler * The route must be locked. 8489c63e9dbSSam Leffler */ 8499c63e9dbSSam Leffler int 8509c63e9dbSSam Leffler rtexpunge(struct rtentry *rt) 8519c63e9dbSSam Leffler { 852c7ea0aa6SQing Li #if !defined(RADIX_MPATH) 8539c63e9dbSSam Leffler struct radix_node *rn; 854c7ea0aa6SQing Li #else 855c7ea0aa6SQing Li struct rt_addrinfo info; 856c7ea0aa6SQing Li int fib; 857c7ea0aa6SQing Li struct rtentry *rt0; 858c7ea0aa6SQing Li #endif 8599c63e9dbSSam Leffler struct radix_node_head *rnh; 8609c63e9dbSSam Leffler struct ifaddr *ifa; 8619c63e9dbSSam Leffler int error = 0; 8629c63e9dbSSam Leffler 8636e6b3f7cSQing Li /* 8646e6b3f7cSQing Li * Find the correct routing tree to use for this Address Family 8656e6b3f7cSQing Li */ 866c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 8679c63e9dbSSam Leffler RT_LOCK_ASSERT(rt); 8686e6b3f7cSQing Li if (rnh == NULL) 8696e6b3f7cSQing Li return (EAFNOSUPPORT); 8703120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 871c7ea0aa6SQing Li 872c7ea0aa6SQing Li #ifdef RADIX_MPATH 873c7ea0aa6SQing Li fib = rt->rt_fibnum; 874c7ea0aa6SQing Li bzero(&info, sizeof(info)); 875c7ea0aa6SQing Li info.rti_ifp = rt->rt_ifp; 876c7ea0aa6SQing Li info.rti_flags = RTF_RNH_LOCKED; 877c7ea0aa6SQing Li info.rti_info[RTAX_DST] = rt_key(rt); 878c7ea0aa6SQing Li info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr; 879c7ea0aa6SQing Li 880c7ea0aa6SQing Li RT_UNLOCK(rt); 881c7ea0aa6SQing Li error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib); 882c7ea0aa6SQing Li 883c7ea0aa6SQing Li if (error == 0 && rt0 != NULL) { 884c7ea0aa6SQing Li rt = rt0; 885c7ea0aa6SQing Li RT_LOCK(rt); 886c7ea0aa6SQing Li } else if (error != 0) { 887c7ea0aa6SQing Li RT_LOCK(rt); 888c7ea0aa6SQing Li return (error); 889c7ea0aa6SQing Li } 890c7ea0aa6SQing Li #else 8919c63e9dbSSam Leffler /* 8929c63e9dbSSam Leffler * Remove the item from the tree; it should be there, 8939c63e9dbSSam Leffler * but when callers invoke us blindly it may not (sigh). 8949c63e9dbSSam Leffler */ 8959c63e9dbSSam Leffler rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh); 89685911824SLuigi Rizzo if (rn == NULL) { 8979c63e9dbSSam Leffler error = ESRCH; 8989c63e9dbSSam Leffler goto bad; 8999c63e9dbSSam Leffler } 9009c63e9dbSSam Leffler KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0, 9019c63e9dbSSam Leffler ("unexpected flags 0x%x", rn->rn_flags)); 902d6941ce9SLuigi Rizzo KASSERT(rt == RNTORT(rn), 9039c63e9dbSSam Leffler ("lookup mismatch, rt %p rn %p", rt, rn)); 904c7ea0aa6SQing Li #endif /* RADIX_MPATH */ 9059c63e9dbSSam Leffler 9069c63e9dbSSam Leffler rt->rt_flags &= ~RTF_UP; 9079c63e9dbSSam Leffler 9089c63e9dbSSam Leffler /* 9099c63e9dbSSam Leffler * Give the protocol a chance to keep things in sync. 9109c63e9dbSSam Leffler */ 9119c63e9dbSSam Leffler if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { 9129c63e9dbSSam Leffler struct rt_addrinfo info; 9139c63e9dbSSam Leffler 9149c63e9dbSSam Leffler bzero((caddr_t)&info, sizeof(info)); 9159c63e9dbSSam Leffler info.rti_flags = rt->rt_flags; 9169c63e9dbSSam Leffler info.rti_info[RTAX_DST] = rt_key(rt); 9179c63e9dbSSam Leffler info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 9189c63e9dbSSam Leffler info.rti_info[RTAX_NETMASK] = rt_mask(rt); 9199c63e9dbSSam Leffler ifa->ifa_rtrequest(RTM_DELETE, rt, &info); 9209c63e9dbSSam Leffler } 9219c63e9dbSSam Leffler 9229c63e9dbSSam Leffler /* 9239c63e9dbSSam Leffler * one more rtentry floating around that is not 9249c63e9dbSSam Leffler * linked to the routing table. 9259c63e9dbSSam Leffler */ 926603724d3SBjoern A. Zeeb V_rttrash++; 927c7ea0aa6SQing Li #if !defined(RADIX_MPATH) 9289c63e9dbSSam Leffler bad: 929c7ea0aa6SQing Li #endif 9309c63e9dbSSam Leffler return (error); 9319c63e9dbSSam Leffler } 9329c63e9dbSSam Leffler 933*5a2f4cbdSAlexander V. Chernikov #if 0 934*5a2f4cbdSAlexander V. Chernikov int p_sockaddr(char *buf, int buflen, struct sockaddr *s); 935*5a2f4cbdSAlexander V. Chernikov int rt_print(char *buf, int buflen, struct rtentry *rt); 936*5a2f4cbdSAlexander V. Chernikov 937*5a2f4cbdSAlexander V. Chernikov int 938*5a2f4cbdSAlexander V. Chernikov p_sockaddr(char *buf, int buflen, struct sockaddr *s) 939*5a2f4cbdSAlexander V. Chernikov { 940*5a2f4cbdSAlexander V. Chernikov void *paddr = NULL; 941*5a2f4cbdSAlexander V. Chernikov 942*5a2f4cbdSAlexander V. Chernikov switch (s->sa_family) { 943*5a2f4cbdSAlexander V. Chernikov case AF_INET: 944*5a2f4cbdSAlexander V. Chernikov paddr = &((struct sockaddr_in *)s)->sin_addr; 945*5a2f4cbdSAlexander V. Chernikov break; 946*5a2f4cbdSAlexander V. Chernikov case AF_INET6: 947*5a2f4cbdSAlexander V. Chernikov paddr = &((struct sockaddr_in6 *)s)->sin6_addr; 948*5a2f4cbdSAlexander V. Chernikov break; 949*5a2f4cbdSAlexander V. Chernikov } 950*5a2f4cbdSAlexander V. Chernikov 951*5a2f4cbdSAlexander V. Chernikov if (paddr == NULL) 952*5a2f4cbdSAlexander V. Chernikov return (0); 953*5a2f4cbdSAlexander V. Chernikov 954*5a2f4cbdSAlexander V. Chernikov if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL) 955*5a2f4cbdSAlexander V. Chernikov return (0); 956*5a2f4cbdSAlexander V. Chernikov 957*5a2f4cbdSAlexander V. Chernikov return (strlen(buf)); 958*5a2f4cbdSAlexander V. Chernikov } 959*5a2f4cbdSAlexander V. Chernikov 960*5a2f4cbdSAlexander V. Chernikov int 961*5a2f4cbdSAlexander V. Chernikov rt_print(char *buf, int buflen, struct rtentry *rt) 962*5a2f4cbdSAlexander V. Chernikov { 963*5a2f4cbdSAlexander V. Chernikov struct sockaddr *addr, *mask; 964*5a2f4cbdSAlexander V. Chernikov int i = 0; 965*5a2f4cbdSAlexander V. Chernikov 966*5a2f4cbdSAlexander V. Chernikov addr = rt_key(rt); 967*5a2f4cbdSAlexander V. Chernikov mask = rt_mask(rt); 968*5a2f4cbdSAlexander V. Chernikov 969*5a2f4cbdSAlexander V. Chernikov i = p_sockaddr(buf, buflen, addr); 970*5a2f4cbdSAlexander V. Chernikov if (!(rt->rt_flags & RTF_HOST)) { 971*5a2f4cbdSAlexander V. Chernikov buf[i++] = '/'; 972*5a2f4cbdSAlexander V. Chernikov i += p_sockaddr(buf + i, buflen - i, mask); 973*5a2f4cbdSAlexander V. Chernikov } 974*5a2f4cbdSAlexander V. Chernikov 975*5a2f4cbdSAlexander V. Chernikov if (rt->rt_flags & RTF_GATEWAY) { 976*5a2f4cbdSAlexander V. Chernikov buf[i++] = '>'; 977*5a2f4cbdSAlexander V. Chernikov i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway); 978*5a2f4cbdSAlexander V. Chernikov } 979*5a2f4cbdSAlexander V. Chernikov 980*5a2f4cbdSAlexander V. Chernikov return (i); 981*5a2f4cbdSAlexander V. Chernikov } 982*5a2f4cbdSAlexander V. Chernikov #endif 983*5a2f4cbdSAlexander V. Chernikov 984427ac07fSKip Macy #ifdef RADIX_MPATH 985427ac07fSKip Macy static int 986427ac07fSKip Macy rn_mpath_update(int req, struct rt_addrinfo *info, 987427ac07fSKip Macy struct radix_node_head *rnh, struct rtentry **ret_nrt) 988427ac07fSKip Macy { 989427ac07fSKip Macy /* 990427ac07fSKip Macy * if we got multipath routes, we require users to specify 991427ac07fSKip Macy * a matching RTAX_GATEWAY. 992427ac07fSKip Macy */ 993427ac07fSKip Macy struct rtentry *rt, *rto = NULL; 994427ac07fSKip Macy register struct radix_node *rn; 995427ac07fSKip Macy int error = 0; 996427ac07fSKip Macy 997*5a2f4cbdSAlexander V. Chernikov rn = rnh->rnh_lookup(dst, netmask, rnh); 998427ac07fSKip Macy if (rn == NULL) 999427ac07fSKip Macy return (ESRCH); 1000427ac07fSKip Macy rto = rt = RNTORT(rn); 1001*5a2f4cbdSAlexander V. Chernikov 1002427ac07fSKip Macy rt = rt_mpath_matchgate(rt, gateway); 1003427ac07fSKip Macy if (rt == NULL) 1004427ac07fSKip Macy return (ESRCH); 1005427ac07fSKip Macy /* 1006427ac07fSKip Macy * this is the first entry in the chain 1007427ac07fSKip Macy */ 1008427ac07fSKip Macy if (rto == rt) { 1009427ac07fSKip Macy rn = rn_mpath_next((struct radix_node *)rt); 1010427ac07fSKip Macy /* 1011427ac07fSKip Macy * there is another entry, now it's active 1012427ac07fSKip Macy */ 1013427ac07fSKip Macy if (rn) { 1014427ac07fSKip Macy rto = RNTORT(rn); 1015427ac07fSKip Macy RT_LOCK(rto); 1016427ac07fSKip Macy rto->rt_flags |= RTF_UP; 1017427ac07fSKip Macy RT_UNLOCK(rto); 1018427ac07fSKip Macy } else if (rt->rt_flags & RTF_GATEWAY) { 1019427ac07fSKip Macy /* 1020427ac07fSKip Macy * For gateway routes, we need to 1021427ac07fSKip Macy * make sure that we we are deleting 1022427ac07fSKip Macy * the correct gateway. 1023427ac07fSKip Macy * rt_mpath_matchgate() does not 1024427ac07fSKip Macy * check the case when there is only 1025427ac07fSKip Macy * one route in the chain. 1026427ac07fSKip Macy */ 1027427ac07fSKip Macy if (gateway && 1028427ac07fSKip Macy (rt->rt_gateway->sa_len != gateway->sa_len || 1029427ac07fSKip Macy memcmp(rt->rt_gateway, gateway, gateway->sa_len))) 1030427ac07fSKip Macy error = ESRCH; 10316a7bff2cSKip Macy else { 10326a7bff2cSKip Macy /* 10336a7bff2cSKip Macy * remove from tree before returning it 10346a7bff2cSKip Macy * to the caller 10356a7bff2cSKip Macy */ 10366a7bff2cSKip Macy rn = rnh->rnh_deladdr(dst, netmask, rnh); 10376a7bff2cSKip Macy KASSERT(rt == RNTORT(rn), ("radix node disappeared")); 10386a7bff2cSKip Macy goto gwdelete; 10396a7bff2cSKip Macy } 10406a7bff2cSKip Macy 1041427ac07fSKip Macy } 1042427ac07fSKip Macy /* 1043427ac07fSKip Macy * use the normal delete code to remove 1044427ac07fSKip Macy * the first entry 1045427ac07fSKip Macy */ 1046427ac07fSKip Macy if (req != RTM_DELETE) 1047427ac07fSKip Macy goto nondelete; 1048427ac07fSKip Macy 1049427ac07fSKip Macy error = ENOENT; 1050427ac07fSKip Macy goto done; 1051427ac07fSKip Macy } 1052427ac07fSKip Macy 1053427ac07fSKip Macy /* 1054427ac07fSKip Macy * if the entry is 2nd and on up 1055427ac07fSKip Macy */ 1056427ac07fSKip Macy if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) 1057427ac07fSKip Macy panic ("rtrequest1: rt_mpath_deldup"); 10586a7bff2cSKip Macy gwdelete: 1059427ac07fSKip Macy RT_LOCK(rt); 1060427ac07fSKip Macy RT_ADDREF(rt); 1061427ac07fSKip Macy if (req == RTM_DELETE) { 1062427ac07fSKip Macy rt->rt_flags &= ~RTF_UP; 1063427ac07fSKip Macy /* 1064427ac07fSKip Macy * One more rtentry floating around that is not 1065427ac07fSKip Macy * linked to the routing table. rttrash will be decremented 1066427ac07fSKip Macy * when RTFREE(rt) is eventually called. 1067427ac07fSKip Macy */ 1068427ac07fSKip Macy V_rttrash++; 1069427ac07fSKip Macy } 1070427ac07fSKip Macy 1071427ac07fSKip Macy nondelete: 1072427ac07fSKip Macy if (req != RTM_DELETE) 1073427ac07fSKip Macy panic("unrecognized request %d", req); 1074427ac07fSKip Macy 1075427ac07fSKip Macy 1076427ac07fSKip Macy /* 1077427ac07fSKip Macy * If the caller wants it, then it can have it, 1078427ac07fSKip Macy * but it's up to it to free the rtentry as we won't be 1079427ac07fSKip Macy * doing it. 1080427ac07fSKip Macy */ 1081427ac07fSKip Macy if (ret_nrt) { 1082427ac07fSKip Macy *ret_nrt = rt; 1083427ac07fSKip Macy RT_UNLOCK(rt); 1084427ac07fSKip Macy } else 1085427ac07fSKip Macy RTFREE_LOCKED(rt); 1086427ac07fSKip Macy done: 1087427ac07fSKip Macy return (error); 1088427ac07fSKip Macy } 1089427ac07fSKip Macy #endif 1090427ac07fSKip Macy 10918071913dSRuslan Ermilov int 10928b07e49aSJulian Elischer rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, 10938b07e49aSJulian Elischer u_int fibnum) 10948b07e49aSJulian Elischer { 10953120b9d4SKip Macy int error = 0, needlock = 0; 1096df8bae1dSRodney W. Grimes register struct rtentry *rt; 1097e5c610d6SQing Li #ifdef FLOWTABLE 1098e5c610d6SQing Li register struct rtentry *rt0; 1099e5c610d6SQing Li #endif 1100df8bae1dSRodney W. Grimes register struct radix_node *rn; 1101df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 1102df8bae1dSRodney W. Grimes struct ifaddr *ifa; 1103df8bae1dSRodney W. Grimes struct sockaddr *ndst; 110446a70de2SQing Li struct sockaddr_storage mdst; 1105df8bae1dSRodney W. Grimes #define senderr(x) { error = x ; goto bad; } 1106df8bae1dSRodney W. Grimes 11078b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); 1108b680a383SBjoern A. Zeeb switch (dst->sa_family) { 1109b680a383SBjoern A. Zeeb case AF_INET6: 1110b680a383SBjoern A. Zeeb case AF_INET: 1111b680a383SBjoern A. Zeeb /* We support multiple FIBs. */ 1112b680a383SBjoern A. Zeeb break; 1113b680a383SBjoern A. Zeeb default: 1114b680a383SBjoern A. Zeeb fibnum = RT_DEFAULT_FIB; 1115b680a383SBjoern A. Zeeb break; 1116b680a383SBjoern A. Zeeb } 1117b680a383SBjoern A. Zeeb 1118b0a76b88SJulian Elischer /* 1119b0a76b88SJulian Elischer * Find the correct routing tree to use for this Address Family 1120b0a76b88SJulian Elischer */ 1121c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 112285911824SLuigi Rizzo if (rnh == NULL) 1123983985c1SJeffrey Hsu return (EAFNOSUPPORT); 11243120b9d4SKip Macy needlock = ((flags & RTF_RNH_LOCKED) == 0); 11253120b9d4SKip Macy flags &= ~RTF_RNH_LOCKED; 11263120b9d4SKip Macy if (needlock) 1127956b0b65SJeffrey Hsu RADIX_NODE_HEAD_LOCK(rnh); 1128c96b8224SKip Macy else 1129c96b8224SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1130b0a76b88SJulian Elischer /* 1131b0a76b88SJulian Elischer * If we are adding a host route then we don't want to put 113266953138SRuslan Ermilov * a netmask in the tree, nor do we want to clone it. 1133b0a76b88SJulian Elischer */ 11346e6b3f7cSQing Li if (flags & RTF_HOST) 113585911824SLuigi Rizzo netmask = NULL; 11366e6b3f7cSQing Li 1137df8bae1dSRodney W. Grimes switch (req) { 1138df8bae1dSRodney W. Grimes case RTM_DELETE: 113946a70de2SQing Li if (netmask) { 114046a70de2SQing Li rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask); 114146a70de2SQing Li dst = (struct sockaddr *)&mdst; 114246a70de2SQing Li } 1143e440aed9SQing Li #ifdef RADIX_MPATH 1144e440aed9SQing Li if (rn_mpath_capable(rnh)) { 1145427ac07fSKip Macy error = rn_mpath_update(req, info, rnh, ret_nrt); 1146e440aed9SQing Li /* 1147427ac07fSKip Macy * "bad" holds true for the success case 1148427ac07fSKip Macy * as well 1149e440aed9SQing Li */ 1150427ac07fSKip Macy if (error != ENOENT) 1151427ac07fSKip Macy goto bad; 1152c7ea0aa6SQing Li error = 0; 1153e440aed9SQing Li } 1154ea9cd9f2SBjoern A. Zeeb #endif 11553034f43fSAlexander V. Chernikov if ((flags & RTF_PINNED) == 0) { 11563034f43fSAlexander V. Chernikov /* Check if target route can be deleted */ 11573034f43fSAlexander V. Chernikov rt = (struct rtentry *)rnh->rnh_lookup(dst, 11583034f43fSAlexander V. Chernikov netmask, rnh); 11593034f43fSAlexander V. Chernikov if ((rt != NULL) && (rt->rt_flags & RTF_PINNED)) 11603034f43fSAlexander V. Chernikov senderr(EADDRINUSE); 11613034f43fSAlexander V. Chernikov } 11623034f43fSAlexander V. Chernikov 1163b0a76b88SJulian Elischer /* 1164b0a76b88SJulian Elischer * Remove the item from the tree and return it. 1165b0a76b88SJulian Elischer * Complain if it is not there and do no more processing. 1166b0a76b88SJulian Elischer */ 1167d1dd20beSSam Leffler rn = rnh->rnh_deladdr(dst, netmask, rnh); 116885911824SLuigi Rizzo if (rn == NULL) 1169df8bae1dSRodney W. Grimes senderr(ESRCH); 1170df8bae1dSRodney W. Grimes if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 1171df8bae1dSRodney W. Grimes panic ("rtrequest delete"); 1172d6941ce9SLuigi Rizzo rt = RNTORT(rn); 1173d1dd20beSSam Leffler RT_LOCK(rt); 11747138d65cSSam Leffler RT_ADDREF(rt); 117571eba915SRuslan Ermilov rt->rt_flags &= ~RTF_UP; 1176c2bed6a3SGarrett Wollman 1177c2bed6a3SGarrett Wollman /* 1178499676dfSJulian Elischer * give the protocol a chance to keep things in sync. 1179b0a76b88SJulian Elischer */ 1180df8bae1dSRodney W. Grimes if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) 11818071913dSRuslan Ermilov ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1182499676dfSJulian Elischer 1183b0a76b88SJulian Elischer /* 1184d6941ce9SLuigi Rizzo * One more rtentry floating around that is not 1185d6941ce9SLuigi Rizzo * linked to the routing table. rttrash will be decremented 1186d6941ce9SLuigi Rizzo * when RTFREE(rt) is eventually called. 1187499676dfSJulian Elischer */ 1188603724d3SBjoern A. Zeeb V_rttrash++; 1189499676dfSJulian Elischer 1190499676dfSJulian Elischer /* 1191499676dfSJulian Elischer * If the caller wants it, then it can have it, 1192499676dfSJulian Elischer * but it's up to it to free the rtentry as we won't be 1193499676dfSJulian Elischer * doing it. 1194b0a76b88SJulian Elischer */ 1195d1dd20beSSam Leffler if (ret_nrt) { 1196df8bae1dSRodney W. Grimes *ret_nrt = rt; 1197d1dd20beSSam Leffler RT_UNLOCK(rt); 1198d1dd20beSSam Leffler } else 1199d1dd20beSSam Leffler RTFREE_LOCKED(rt); 1200df8bae1dSRodney W. Grimes break; 1201df8bae1dSRodney W. Grimes case RTM_RESOLVE: 12026e6b3f7cSQing Li /* 12036e6b3f7cSQing Li * resolve was only used for route cloning 12046e6b3f7cSQing Li * here for compat 12056e6b3f7cSQing Li */ 12066e6b3f7cSQing Li break; 1207df8bae1dSRodney W. Grimes case RTM_ADD: 12085df72964SGarrett Wollman if ((flags & RTF_GATEWAY) && !gateway) 120916a2e0a6SQing Li senderr(EINVAL); 121016a2e0a6SQing Li if (dst && gateway && (dst->sa_family != gateway->sa_family) && 121116a2e0a6SQing Li (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 121216a2e0a6SQing Li senderr(EINVAL); 12135df72964SGarrett Wollman 12148c0fec80SRobert Watson if (info->rti_ifa == NULL) { 12158c0fec80SRobert Watson error = rt_getifa_fib(info, fibnum); 12168c0fec80SRobert Watson if (error) 12178071913dSRuslan Ermilov senderr(error); 12188c0fec80SRobert Watson } else 12198c0fec80SRobert Watson ifa_ref(info->rti_ifa); 12208071913dSRuslan Ermilov ifa = info->rti_ifa; 12211ed81b73SMarko Zec rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 12228c0fec80SRobert Watson if (rt == NULL) { 12238c0fec80SRobert Watson ifa_free(ifa); 1224df8bae1dSRodney W. Grimes senderr(ENOBUFS); 12258c0fec80SRobert Watson } 1226d1dd20beSSam Leffler RT_LOCK_INIT(rt); 1227df8bae1dSRodney W. Grimes rt->rt_flags = RTF_UP | flags; 12288b07e49aSJulian Elischer rt->rt_fibnum = fibnum; 1229499676dfSJulian Elischer /* 1230a8498625SBjoern A. Zeeb * Add the gateway. Possibly re-malloc-ing the storage for it. 1231499676dfSJulian Elischer */ 1232d1dd20beSSam Leffler RT_LOCK(rt); 1233831a80b0SMatthew Dillon if ((error = rt_setgate(rt, dst, gateway)) != 0) { 1234d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 12358c0fec80SRobert Watson ifa_free(ifa); 12361ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 1237704b0666SBill Fenner senderr(error); 1238df8bae1dSRodney W. Grimes } 1239499676dfSJulian Elischer 1240499676dfSJulian Elischer /* 1241499676dfSJulian Elischer * point to the (possibly newly malloc'd) dest address. 1242499676dfSJulian Elischer */ 1243d1dd20beSSam Leffler ndst = (struct sockaddr *)rt_key(rt); 1244499676dfSJulian Elischer 1245499676dfSJulian Elischer /* 1246499676dfSJulian Elischer * make sure it contains the value we want (masked if needed). 1247499676dfSJulian Elischer */ 1248df8bae1dSRodney W. Grimes if (netmask) { 1249df8bae1dSRodney W. Grimes rt_maskedcopy(dst, ndst, netmask); 1250df8bae1dSRodney W. Grimes } else 12511838a647SLuigi Rizzo bcopy(dst, ndst, dst->sa_len); 12528e718bb4SGarrett Wollman 12538e718bb4SGarrett Wollman /* 12548c0fec80SRobert Watson * We use the ifa reference returned by rt_getifa_fib(). 12558e718bb4SGarrett Wollman * This moved from below so that rnh->rnh_addaddr() can 1256499676dfSJulian Elischer * examine the ifa and ifa->ifa_ifp if it so desires. 12578e718bb4SGarrett Wollman */ 12588e718bb4SGarrett Wollman rt->rt_ifa = ifa; 12598e718bb4SGarrett Wollman rt->rt_ifp = ifa->ifa_ifp; 1260427ac07fSKip Macy rt->rt_rmx.rmx_weight = 1; 12618e718bb4SGarrett Wollman 1262e440aed9SQing Li #ifdef RADIX_MPATH 1263e440aed9SQing Li /* do not permit exactly the same dst/mask/gw pair */ 1264e440aed9SQing Li if (rn_mpath_capable(rnh) && 1265e440aed9SQing Li rt_mpath_conflict(rnh, rt, netmask)) { 12661099f828SRobert Watson ifa_free(rt->rt_ifa); 1267e440aed9SQing Li Free(rt_key(rt)); 1268e440aed9SQing Li RT_LOCK_DESTROY(rt); 12691ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 1270e440aed9SQing Li senderr(EEXIST); 1271e440aed9SQing Li } 1272e440aed9SQing Li #endif 1273e440aed9SQing Li 1274e5c610d6SQing Li #ifdef FLOWTABLE 1275e5c610d6SQing Li rt0 = NULL; 1276096f2786SBjoern A. Zeeb /* "flow-table" only supports IPv6 and IPv4 at the moment. */ 1277096f2786SBjoern A. Zeeb switch (dst->sa_family) { 1278096f2786SBjoern A. Zeeb #ifdef INET6 1279096f2786SBjoern A. Zeeb case AF_INET6: 1280096f2786SBjoern A. Zeeb #endif 1281db44ff40SBjoern A. Zeeb #ifdef INET 1282096f2786SBjoern A. Zeeb case AF_INET: 1283096f2786SBjoern A. Zeeb #endif 1284096f2786SBjoern A. Zeeb #if defined(INET6) || defined(INET) 1285e5c610d6SQing Li rn = rnh->rnh_matchaddr(dst, rnh); 1286e5c610d6SQing Li if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 1287e5c610d6SQing Li struct sockaddr *mask; 1288e5c610d6SQing Li u_char *m, *n; 1289e5c610d6SQing Li int len; 1290e5c610d6SQing Li 1291e5c610d6SQing Li /* 1292e5c610d6SQing Li * compare mask to see if the new route is 1293e5c610d6SQing Li * more specific than the existing one 1294e5c610d6SQing Li */ 1295e5c610d6SQing Li rt0 = RNTORT(rn); 1296e5c610d6SQing Li RT_LOCK(rt0); 1297e5c610d6SQing Li RT_ADDREF(rt0); 1298e5c610d6SQing Li RT_UNLOCK(rt0); 1299e5c610d6SQing Li /* 1300e5c610d6SQing Li * A host route is already present, so 1301e5c610d6SQing Li * leave the flow-table entries as is. 1302e5c610d6SQing Li */ 1303e5c610d6SQing Li if (rt0->rt_flags & RTF_HOST) { 1304e5c610d6SQing Li RTFREE(rt0); 1305e5c610d6SQing Li rt0 = NULL; 1306e5c610d6SQing Li } else if (!(flags & RTF_HOST) && netmask) { 1307e5c610d6SQing Li mask = rt_mask(rt0); 1308e5c610d6SQing Li len = mask->sa_len; 1309e5c610d6SQing Li m = (u_char *)mask; 1310e5c610d6SQing Li n = (u_char *)netmask; 1311e5c610d6SQing Li while (len-- > 0) { 1312e5c610d6SQing Li if (*n != *m) 1313e5c610d6SQing Li break; 1314e5c610d6SQing Li n++; 1315e5c610d6SQing Li m++; 1316e5c610d6SQing Li } 1317e5c610d6SQing Li if (len == 0 || (*n < *m)) { 1318e5c610d6SQing Li RTFREE(rt0); 1319e5c610d6SQing Li rt0 = NULL; 1320e5c610d6SQing Li } 1321e5c610d6SQing Li } 1322e5c610d6SQing Li } 1323096f2786SBjoern A. Zeeb #endif/* INET6 || INET */ 1324e5c610d6SQing Li } 1325096f2786SBjoern A. Zeeb #endif /* FLOWTABLE */ 1326e5c610d6SQing Li 1327d1dd20beSSam Leffler /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ 1328d1dd20beSSam Leffler rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); 1329499676dfSJulian Elischer /* 1330499676dfSJulian Elischer * If it still failed to go into the tree, 1331499676dfSJulian Elischer * then un-make it (this should be a function) 1332499676dfSJulian Elischer */ 133385911824SLuigi Rizzo if (rn == NULL) { 13341099f828SRobert Watson ifa_free(rt->rt_ifa); 1335df8bae1dSRodney W. Grimes Free(rt_key(rt)); 1336d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 13371ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 1338e5c610d6SQing Li #ifdef FLOWTABLE 1339e5c610d6SQing Li if (rt0 != NULL) 1340e5c610d6SQing Li RTFREE(rt0); 1341e5c610d6SQing Li #endif 1342df8bae1dSRodney W. Grimes senderr(EEXIST); 1343df8bae1dSRodney W. Grimes } 1344e5c610d6SQing Li #ifdef FLOWTABLE 1345e5c610d6SQing Li else if (rt0 != NULL) { 1346096f2786SBjoern A. Zeeb switch (dst->sa_family) { 1347096f2786SBjoern A. Zeeb #ifdef INET6 1348096f2786SBjoern A. Zeeb case AF_INET6: 1349096f2786SBjoern A. Zeeb flowtable_route_flush(V_ip6_ft, rt0); 1350096f2786SBjoern A. Zeeb break; 1351db44ff40SBjoern A. Zeeb #endif 1352096f2786SBjoern A. Zeeb #ifdef INET 1353096f2786SBjoern A. Zeeb case AF_INET: 1354096f2786SBjoern A. Zeeb flowtable_route_flush(V_ip_ft, rt0); 1355096f2786SBjoern A. Zeeb break; 1356096f2786SBjoern A. Zeeb #endif 1357096f2786SBjoern A. Zeeb } 1358e5c610d6SQing Li RTFREE(rt0); 1359e5c610d6SQing Li } 1360e5c610d6SQing Li #endif 1361499676dfSJulian Elischer 1362499676dfSJulian Elischer /* 1363a0c0e34bSGleb Smirnoff * If this protocol has something to add to this then 1364499676dfSJulian Elischer * allow it to do that as well. 1365499676dfSJulian Elischer */ 1366df8bae1dSRodney W. Grimes if (ifa->ifa_rtrequest) 13678071913dSRuslan Ermilov ifa->ifa_rtrequest(req, rt, info); 1368499676dfSJulian Elischer 1369cd02a0b7SGarrett Wollman /* 1370499676dfSJulian Elischer * actually return a resultant rtentry and 1371499676dfSJulian Elischer * give the caller a single reference. 1372499676dfSJulian Elischer */ 1373df8bae1dSRodney W. Grimes if (ret_nrt) { 1374df8bae1dSRodney W. Grimes *ret_nrt = rt; 13757138d65cSSam Leffler RT_ADDREF(rt); 1376df8bae1dSRodney W. Grimes } 1377d1dd20beSSam Leffler RT_UNLOCK(rt); 1378df8bae1dSRodney W. Grimes break; 13798071913dSRuslan Ermilov default: 13808071913dSRuslan Ermilov error = EOPNOTSUPP; 1381df8bae1dSRodney W. Grimes } 1382df8bae1dSRodney W. Grimes bad: 13833120b9d4SKip Macy if (needlock) 1384956b0b65SJeffrey Hsu RADIX_NODE_HEAD_UNLOCK(rnh); 1385df8bae1dSRodney W. Grimes return (error); 1386d1dd20beSSam Leffler #undef senderr 1387d1dd20beSSam Leffler } 1388d1dd20beSSam Leffler 13898071913dSRuslan Ermilov #undef dst 13908071913dSRuslan Ermilov #undef gateway 13918071913dSRuslan Ermilov #undef netmask 13928071913dSRuslan Ermilov #undef ifaaddr 13938071913dSRuslan Ermilov #undef ifpaddr 13948071913dSRuslan Ermilov #undef flags 1395df8bae1dSRodney W. Grimes 1396df8bae1dSRodney W. Grimes int 1397d1dd20beSSam Leffler rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 1398df8bae1dSRodney W. Grimes { 1399d1dd20beSSam Leffler /* XXX dst may be overwritten, can we move this to below */ 14006e6b3f7cSQing Li int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); 14016e6b3f7cSQing Li #ifdef INVARIANTS 1402c2c2a7c1SBjoern A. Zeeb struct radix_node_head *rnh; 1403c2c2a7c1SBjoern A. Zeeb 1404c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); 14056e6b3f7cSQing Li #endif 1406d1dd20beSSam Leffler 1407d1dd20beSSam Leffler RT_LOCK_ASSERT(rt); 14083120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1409df8bae1dSRodney W. Grimes 14101db1fffaSBill Fenner /* 141185911824SLuigi Rizzo * Prepare to store the gateway in rt->rt_gateway. 141285911824SLuigi Rizzo * Both dst and gateway are stored one after the other in the same 141385911824SLuigi Rizzo * malloc'd chunk. If we have room, we can reuse the old buffer, 141485911824SLuigi Rizzo * rt_gateway already points to the right place. 141585911824SLuigi Rizzo * Otherwise, malloc a new block and update the 'dst' address. 1416499676dfSJulian Elischer */ 141785911824SLuigi Rizzo if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { 141885911824SLuigi Rizzo caddr_t new; 141985911824SLuigi Rizzo 1420df8bae1dSRodney W. Grimes R_Malloc(new, caddr_t, dlen + glen); 142185911824SLuigi Rizzo if (new == NULL) 14221db1fffaSBill Fenner return ENOBUFS; 1423499676dfSJulian Elischer /* 142485911824SLuigi Rizzo * XXX note, we copy from *dst and not *rt_key(rt) because 142585911824SLuigi Rizzo * rt_setgate() can be called to initialize a newly 142685911824SLuigi Rizzo * allocated route entry, in which case rt_key(rt) == NULL 142785911824SLuigi Rizzo * (and also rt->rt_gateway == NULL). 142885911824SLuigi Rizzo * Free()/free() handle a NULL argument just fine. 1429499676dfSJulian Elischer */ 14301838a647SLuigi Rizzo bcopy(dst, new, dlen); 143185911824SLuigi Rizzo Free(rt_key(rt)); /* free old block, if any */ 1432445e045bSAlexander Kabaev rt_key(rt) = (struct sockaddr *)new; 143385911824SLuigi Rizzo rt->rt_gateway = (struct sockaddr *)(new + dlen); 1434df8bae1dSRodney W. Grimes } 1435499676dfSJulian Elischer 1436499676dfSJulian Elischer /* 143785911824SLuigi Rizzo * Copy the new gateway value into the memory chunk. 143885911824SLuigi Rizzo */ 143985911824SLuigi Rizzo bcopy(gate, rt->rt_gateway, glen); 144085911824SLuigi Rizzo 14416e6b3f7cSQing Li return (0); 1442df8bae1dSRodney W. Grimes } 1443df8bae1dSRodney W. Grimes 1444c7ab6602SQing Li void 1445d1dd20beSSam Leffler rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) 1446df8bae1dSRodney W. Grimes { 1447df8bae1dSRodney W. Grimes register u_char *cp1 = (u_char *)src; 1448df8bae1dSRodney W. Grimes register u_char *cp2 = (u_char *)dst; 1449df8bae1dSRodney W. Grimes register u_char *cp3 = (u_char *)netmask; 1450df8bae1dSRodney W. Grimes u_char *cplim = cp2 + *cp3; 1451df8bae1dSRodney W. Grimes u_char *cplim2 = cp2 + *cp1; 1452df8bae1dSRodney W. Grimes 1453df8bae1dSRodney W. Grimes *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1454df8bae1dSRodney W. Grimes cp3 += 2; 1455df8bae1dSRodney W. Grimes if (cplim > cplim2) 1456df8bae1dSRodney W. Grimes cplim = cplim2; 1457df8bae1dSRodney W. Grimes while (cp2 < cplim) 1458df8bae1dSRodney W. Grimes *cp2++ = *cp1++ & *cp3++; 1459df8bae1dSRodney W. Grimes if (cp2 < cplim2) 1460df8bae1dSRodney W. Grimes bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 1461df8bae1dSRodney W. Grimes } 1462df8bae1dSRodney W. Grimes 1463df8bae1dSRodney W. Grimes /* 1464df8bae1dSRodney W. Grimes * Set up a routing table entry, normally 1465df8bae1dSRodney W. Grimes * for an interface. 1466df8bae1dSRodney W. Grimes */ 14678b07e49aSJulian Elischer #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ 14688b07e49aSJulian Elischer static inline int 14698b07e49aSJulian Elischer rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) 1470df8bae1dSRodney W. Grimes { 14715aca0b30SLuigi Rizzo struct sockaddr *dst; 14728071913dSRuslan Ermilov struct sockaddr *netmask; 147385911824SLuigi Rizzo struct rtentry *rt = NULL; 14748071913dSRuslan Ermilov struct rt_addrinfo info; 1475e440aed9SQing Li int error = 0; 14768b07e49aSJulian Elischer int startfib, endfib; 14778b07e49aSJulian Elischer char tempbuf[_SOCKADDR_TMPSIZE]; 14788b07e49aSJulian Elischer int didwork = 0; 14798b07e49aSJulian Elischer int a_failure = 0; 14806e6b3f7cSQing Li static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; 14813034f43fSAlexander V. Chernikov struct radix_node_head *rnh; 1482df8bae1dSRodney W. Grimes 14838071913dSRuslan Ermilov if (flags & RTF_HOST) { 14848071913dSRuslan Ermilov dst = ifa->ifa_dstaddr; 14858071913dSRuslan Ermilov netmask = NULL; 14868071913dSRuslan Ermilov } else { 14878071913dSRuslan Ermilov dst = ifa->ifa_addr; 14888071913dSRuslan Ermilov netmask = ifa->ifa_netmask; 14898071913dSRuslan Ermilov } 1490b3dd0771SBjoern A. Zeeb if (dst->sa_len == 0) 1491b3dd0771SBjoern A. Zeeb return(EINVAL); 1492b680a383SBjoern A. Zeeb switch (dst->sa_family) { 1493b680a383SBjoern A. Zeeb case AF_INET6: 1494b680a383SBjoern A. Zeeb case AF_INET: 1495b680a383SBjoern A. Zeeb /* We support multiple FIBs. */ 1496b680a383SBjoern A. Zeeb break; 1497b680a383SBjoern A. Zeeb default: 1498b680a383SBjoern A. Zeeb fibnum = RT_DEFAULT_FIB; 1499b680a383SBjoern A. Zeeb break; 1500b680a383SBjoern A. Zeeb } 15018b07e49aSJulian Elischer if (fibnum == -1) { 150266e8505fSJulian Elischer if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { 150366e8505fSJulian Elischer startfib = endfib = curthread->td_proc->p_fibnum; 150466e8505fSJulian Elischer } else { 15058b07e49aSJulian Elischer startfib = 0; 15068b07e49aSJulian Elischer endfib = rt_numfibs - 1; 150766e8505fSJulian Elischer } 15088b07e49aSJulian Elischer } else { 15098b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); 15108b07e49aSJulian Elischer startfib = fibnum; 15118b07e49aSJulian Elischer endfib = fibnum; 15128b07e49aSJulian Elischer } 1513ac4a76ebSBjoern A. Zeeb 1514b0a76b88SJulian Elischer /* 15158b07e49aSJulian Elischer * If it's a delete, check that if it exists, 15168b07e49aSJulian Elischer * it's on the correct interface or we might scrub 15178b07e49aSJulian Elischer * a route to another ifa which would 1518b0a76b88SJulian Elischer * be confusing at best and possibly worse. 1519b0a76b88SJulian Elischer */ 1520df8bae1dSRodney W. Grimes if (cmd == RTM_DELETE) { 1521b0a76b88SJulian Elischer /* 1522b0a76b88SJulian Elischer * It's a delete, so it should already exist.. 1523b0a76b88SJulian Elischer * If it's a net, mask off the host bits 1524b0a76b88SJulian Elischer * (Assuming we have a mask) 15258b07e49aSJulian Elischer * XXX this is kinda inet specific.. 1526b0a76b88SJulian Elischer */ 15278071913dSRuslan Ermilov if (netmask != NULL) { 15288b07e49aSJulian Elischer rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); 15298b07e49aSJulian Elischer dst = (struct sockaddr *)tempbuf; 1530df8bae1dSRodney W. Grimes } 15318b07e49aSJulian Elischer } 15328b07e49aSJulian Elischer /* 15338b07e49aSJulian Elischer * Now go through all the requested tables (fibs) and do the 15348b07e49aSJulian Elischer * requested action. Realistically, this will either be fib 0 15358b07e49aSJulian Elischer * for protocols that don't do multiple tables or all the 1536a8498625SBjoern A. Zeeb * tables for those that do. 15378b07e49aSJulian Elischer */ 15388b07e49aSJulian Elischer for ( fibnum = startfib; fibnum <= endfib; fibnum++) { 15398b07e49aSJulian Elischer if (cmd == RTM_DELETE) { 15408b07e49aSJulian Elischer struct radix_node *rn; 1541b0a76b88SJulian Elischer /* 15428071913dSRuslan Ermilov * Look up an rtentry that is in the routing tree and 15438071913dSRuslan Ermilov * contains the correct info. 1544b0a76b88SJulian Elischer */ 1545c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1546c2c2a7c1SBjoern A. Zeeb if (rnh == NULL) 15478b07e49aSJulian Elischer /* this table doesn't exist but others might */ 15488b07e49aSJulian Elischer continue; 154914126522SAlexander V. Chernikov RADIX_NODE_HEAD_RLOCK(rnh); 1550e440aed9SQing Li #ifdef RADIX_MPATH 1551e440aed9SQing Li if (rn_mpath_capable(rnh)) { 1552e440aed9SQing Li 1553e440aed9SQing Li rn = rnh->rnh_matchaddr(dst, rnh); 1554e440aed9SQing Li if (rn == NULL) 1555e440aed9SQing Li error = ESRCH; 1556e440aed9SQing Li else { 1557e440aed9SQing Li rt = RNTORT(rn); 1558e440aed9SQing Li /* 15598b07e49aSJulian Elischer * for interface route the 15608b07e49aSJulian Elischer * rt->rt_gateway is sockaddr_intf 15618b07e49aSJulian Elischer * for cloning ARP entries, so 15628b07e49aSJulian Elischer * rt_mpath_matchgate must use the 15638b07e49aSJulian Elischer * interface address 1564e440aed9SQing Li */ 15658b07e49aSJulian Elischer rt = rt_mpath_matchgate(rt, 15668b07e49aSJulian Elischer ifa->ifa_addr); 1567e440aed9SQing Li if (!rt) 1568e440aed9SQing Li error = ESRCH; 1569e440aed9SQing Li } 1570e440aed9SQing Li } 1571e440aed9SQing Li else 1572e440aed9SQing Li #endif 15738b07e49aSJulian Elischer rn = rnh->rnh_lookup(dst, netmask, rnh); 15748b07e49aSJulian Elischer error = (rn == NULL || 15758071913dSRuslan Ermilov (rn->rn_flags & RNF_ROOT) || 1576*5a2f4cbdSAlexander V. Chernikov RNTORT(rn)->rt_ifa != ifa); 157714126522SAlexander V. Chernikov RADIX_NODE_HEAD_RUNLOCK(rnh); 1578956b0b65SJeffrey Hsu if (error) { 15798b07e49aSJulian Elischer /* this is only an error if bad on ALL tables */ 15808b07e49aSJulian Elischer continue; 1581df8bae1dSRodney W. Grimes } 1582b0a76b88SJulian Elischer } 1583b0a76b88SJulian Elischer /* 1584b0a76b88SJulian Elischer * Do the actual request 1585b0a76b88SJulian Elischer */ 15868071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 15878071913dSRuslan Ermilov info.rti_ifa = ifa; 15883034f43fSAlexander V. Chernikov info.rti_flags = flags | 15893034f43fSAlexander V. Chernikov (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; 15908071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 15916e6b3f7cSQing Li /* 15926e6b3f7cSQing Li * doing this for compatibility reasons 15936e6b3f7cSQing Li */ 15946e6b3f7cSQing Li if (cmd == RTM_ADD) 15956e6b3f7cSQing Li info.rti_info[RTAX_GATEWAY] = 15966e6b3f7cSQing Li (struct sockaddr *)&null_sdl; 15976e6b3f7cSQing Li else 15988071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 15998071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 16008b07e49aSJulian Elischer error = rtrequest1_fib(cmd, &info, &rt, fibnum); 16013034f43fSAlexander V. Chernikov 16023034f43fSAlexander V. Chernikov if ((error == EEXIST) && (cmd == RTM_ADD)) { 16033034f43fSAlexander V. Chernikov /* 16043034f43fSAlexander V. Chernikov * Interface route addition failed. 16053034f43fSAlexander V. Chernikov * Atomically delete current prefix generating 16063034f43fSAlexander V. Chernikov * RTM_DELETE message, and retry adding 16073034f43fSAlexander V. Chernikov * interface prefix. 16083034f43fSAlexander V. Chernikov */ 16093034f43fSAlexander V. Chernikov rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 16103034f43fSAlexander V. Chernikov RADIX_NODE_HEAD_LOCK(rnh); 16113034f43fSAlexander V. Chernikov 16123034f43fSAlexander V. Chernikov /* Delete old prefix */ 16133034f43fSAlexander V. Chernikov info.rti_ifa = NULL; 16143034f43fSAlexander V. Chernikov info.rti_flags = RTF_RNH_LOCKED; 16153034f43fSAlexander V. Chernikov 1616d54455b0SAlexander V. Chernikov error = rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); 16173034f43fSAlexander V. Chernikov if (error == 0) { 16183034f43fSAlexander V. Chernikov info.rti_ifa = ifa; 16193034f43fSAlexander V. Chernikov info.rti_flags = flags | RTF_RNH_LOCKED | 16203034f43fSAlexander V. Chernikov (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED; 16213034f43fSAlexander V. Chernikov error = rtrequest1_fib(cmd, &info, &rt, fibnum); 16223034f43fSAlexander V. Chernikov } 16233034f43fSAlexander V. Chernikov 16243034f43fSAlexander V. Chernikov RADIX_NODE_HEAD_UNLOCK(rnh); 16253034f43fSAlexander V. Chernikov } 16263034f43fSAlexander V. Chernikov 16273034f43fSAlexander V. Chernikov 16285aca0b30SLuigi Rizzo if (error == 0 && rt != NULL) { 16298071913dSRuslan Ermilov /* 16306f99b44cSBrian Somers * notify any listening routing agents of the change 16318071913dSRuslan Ermilov */ 1632d1dd20beSSam Leffler RT_LOCK(rt); 1633e440aed9SQing Li #ifdef RADIX_MPATH 1634e440aed9SQing Li /* 1635e440aed9SQing Li * in case address alias finds the first address 16368d74af36SBjoern A. Zeeb * e.g. ifconfig bge0 192.0.2.246/24 16378d74af36SBjoern A. Zeeb * e.g. ifconfig bge0 192.0.2.247/24 16388d74af36SBjoern A. Zeeb * the address set in the route is 192.0.2.246 16398d74af36SBjoern A. Zeeb * so we need to replace it with 192.0.2.247 1640e440aed9SQing Li */ 16418b07e49aSJulian Elischer if (memcmp(rt->rt_ifa->ifa_addr, 16428b07e49aSJulian Elischer ifa->ifa_addr, ifa->ifa_addr->sa_len)) { 16431099f828SRobert Watson ifa_free(rt->rt_ifa); 16441099f828SRobert Watson ifa_ref(ifa); 1645e440aed9SQing Li rt->rt_ifp = ifa->ifa_ifp; 1646e440aed9SQing Li rt->rt_ifa = ifa; 1647e440aed9SQing Li } 1648e440aed9SQing Li #endif 16496e6b3f7cSQing Li /* 16506e6b3f7cSQing Li * doing this for compatibility reasons 16516e6b3f7cSQing Li */ 16526e6b3f7cSQing Li if (cmd == RTM_ADD) { 16536e6b3f7cSQing Li ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = 16546e6b3f7cSQing Li rt->rt_ifp->if_type; 16556e6b3f7cSQing Li ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = 16566e6b3f7cSQing Li rt->rt_ifp->if_index; 16576e6b3f7cSQing Li } 16587f279720SMichael Tuexen RT_ADDREF(rt); 16597f279720SMichael Tuexen RT_UNLOCK(rt); 1660528737fdSBjoern A. Zeeb rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum); 16617f279720SMichael Tuexen RT_LOCK(rt); 16627f279720SMichael Tuexen RT_REMREF(rt); 16638071913dSRuslan Ermilov if (cmd == RTM_DELETE) { 1664b0a76b88SJulian Elischer /* 16658b07e49aSJulian Elischer * If we are deleting, and we found an entry, 16668b07e49aSJulian Elischer * then it's been removed from the tree.. 16678b07e49aSJulian Elischer * now throw it away. 1668b0a76b88SJulian Elischer */ 1669d1dd20beSSam Leffler RTFREE_LOCKED(rt); 1670d1dd20beSSam Leffler } else { 1671d1dd20beSSam Leffler if (cmd == RTM_ADD) { 1672b0a76b88SJulian Elischer /* 16738b07e49aSJulian Elischer * We just wanted to add it.. 16748b07e49aSJulian Elischer * we don't actually need a reference. 1675b0a76b88SJulian Elischer */ 16767138d65cSSam Leffler RT_REMREF(rt); 1677df8bae1dSRodney W. Grimes } 1678d1dd20beSSam Leffler RT_UNLOCK(rt); 1679d1dd20beSSam Leffler } 16808b07e49aSJulian Elischer didwork = 1; 1681df8bae1dSRodney W. Grimes } 16828b07e49aSJulian Elischer if (error) 16838b07e49aSJulian Elischer a_failure = error; 16848b07e49aSJulian Elischer } 16858b07e49aSJulian Elischer if (cmd == RTM_DELETE) { 16868b07e49aSJulian Elischer if (didwork) { 16878b07e49aSJulian Elischer error = 0; 16888b07e49aSJulian Elischer } else { 16898b07e49aSJulian Elischer /* we only give an error if it wasn't in any table */ 16908b07e49aSJulian Elischer error = ((flags & RTF_HOST) ? 16918b07e49aSJulian Elischer EHOSTUNREACH : ENETUNREACH); 16928b07e49aSJulian Elischer } 16938b07e49aSJulian Elischer } else { 16948b07e49aSJulian Elischer if (a_failure) { 16958b07e49aSJulian Elischer /* return an error if any of them failed */ 16968b07e49aSJulian Elischer error = a_failure; 16978b07e49aSJulian Elischer } 16988b07e49aSJulian Elischer } 16993ec66d6cSDavid Greenman return (error); 17003ec66d6cSDavid Greenman } 1701cb64988fSLuoqi Chen 1702a8498625SBjoern A. Zeeb #ifndef BURN_BRIDGES 17038b07e49aSJulian Elischer /* special one for inet internal use. may not use. */ 17048b07e49aSJulian Elischer int 17058b07e49aSJulian Elischer rtinit_fib(struct ifaddr *ifa, int cmd, int flags) 17068b07e49aSJulian Elischer { 17078b07e49aSJulian Elischer return (rtinit1(ifa, cmd, flags, -1)); 17088b07e49aSJulian Elischer } 1709a8498625SBjoern A. Zeeb #endif 17108b07e49aSJulian Elischer 17118b07e49aSJulian Elischer /* 17128b07e49aSJulian Elischer * Set up a routing table entry, normally 17138b07e49aSJulian Elischer * for an interface. 17148b07e49aSJulian Elischer */ 17158b07e49aSJulian Elischer int 17168b07e49aSJulian Elischer rtinit(struct ifaddr *ifa, int cmd, int flags) 17178b07e49aSJulian Elischer { 17188b07e49aSJulian Elischer struct sockaddr *dst; 1719a8498625SBjoern A. Zeeb int fib = RT_DEFAULT_FIB; 17208b07e49aSJulian Elischer 17218b07e49aSJulian Elischer if (flags & RTF_HOST) { 17228b07e49aSJulian Elischer dst = ifa->ifa_dstaddr; 17238b07e49aSJulian Elischer } else { 17248b07e49aSJulian Elischer dst = ifa->ifa_addr; 17258b07e49aSJulian Elischer } 17268b07e49aSJulian Elischer 1727b680a383SBjoern A. Zeeb switch (dst->sa_family) { 1728b680a383SBjoern A. Zeeb case AF_INET6: 1729b680a383SBjoern A. Zeeb case AF_INET: 1730b680a383SBjoern A. Zeeb /* We do support multiple FIBs. */ 17318b07e49aSJulian Elischer fib = -1; 1732b680a383SBjoern A. Zeeb break; 1733b680a383SBjoern A. Zeeb } 17348b07e49aSJulian Elischer return (rtinit1(ifa, cmd, flags, fib)); 17358b07e49aSJulian Elischer } 1736