1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1980, 1986, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 2942e9e16dSRuslan Ermilov * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 30c3aac50fSPeter Wemm * $FreeBSD$ 31df8bae1dSRodney W. Grimes */ 328b07e49aSJulian Elischer /************************************************************************ 338b07e49aSJulian Elischer * Note: In this file a 'fib' is a "forwarding information base" * 348b07e49aSJulian Elischer * Which is the new name for an in kernel routing (next hop) table. * 358b07e49aSJulian Elischer ***********************************************************************/ 36df8bae1dSRodney W. Grimes 371d5e9e22SEivind Eklund #include "opt_inet.h" 388b07e49aSJulian Elischer #include "opt_route.h" 394bd49128SPeter Wemm #include "opt_mrouting.h" 40e440aed9SQing Li #include "opt_mpath.h" 414bd49128SPeter Wemm 42df8bae1dSRodney W. Grimes #include <sys/param.h> 43df8bae1dSRodney W. Grimes #include <sys/systm.h> 446e6b3f7cSQing Li #include <sys/syslog.h> 454d1d4912SBruce Evans #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47df8bae1dSRodney W. Grimes #include <sys/socket.h> 488b07e49aSJulian Elischer #include <sys/sysctl.h> 493120b9d4SKip Macy #include <sys/syslog.h> 508b07e49aSJulian Elischer #include <sys/sysproto.h> 518b07e49aSJulian Elischer #include <sys/proc.h> 52df8bae1dSRodney W. Grimes #include <sys/domain.h> 53cb64988fSLuoqi Chen #include <sys/kernel.h> 54603724d3SBjoern A. Zeeb #include <sys/vimage.h> 55df8bae1dSRodney W. Grimes 56df8bae1dSRodney W. Grimes #include <net/if.h> 576e6b3f7cSQing Li #include <net/if_dl.h> 58df8bae1dSRodney W. Grimes #include <net/route.h> 59df8bae1dSRodney W. Grimes 60e440aed9SQing Li #ifdef RADIX_MPATH 61e440aed9SQing Li #include <net/radix_mpath.h> 62e440aed9SQing Li #endif 634b79449eSBjoern A. Zeeb #include <net/vnet.h> 64e440aed9SQing Li 65df8bae1dSRodney W. Grimes #include <netinet/in.h> 66b5e8ce9fSBruce Evans #include <netinet/ip_mroute.h> 674b79449eSBjoern A. Zeeb #include <netinet/vinet.h> 68df8bae1dSRodney W. Grimes 692dc1d581SAndre Oppermann #include <vm/uma.h> 702dc1d581SAndre Oppermann 718b07e49aSJulian Elischer u_int rt_numfibs = RT_NUMFIBS; 728b07e49aSJulian Elischer SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); 7366e8505fSJulian Elischer /* 7466e8505fSJulian Elischer * Allow the boot code to allow LESS than RT_MAXFIBS to be used. 7566e8505fSJulian Elischer * We can't do more because storage is statically allocated for now. 7666e8505fSJulian Elischer * (for compatibility reasons.. this will change). 7766e8505fSJulian Elischer */ 788b07e49aSJulian Elischer TUNABLE_INT("net.fibs", &rt_numfibs); 798b07e49aSJulian Elischer 8066e8505fSJulian Elischer /* 8166e8505fSJulian Elischer * By default add routes to all fibs for new interfaces. 8266e8505fSJulian Elischer * Once this is set to 0 then only allocate routes on interface 8366e8505fSJulian Elischer * changes for the FIB of the caller when adding a new set of addresses 8466e8505fSJulian Elischer * to an interface. XXX this is a shotgun aproach to a problem that needs 8566e8505fSJulian Elischer * a more fine grained solution.. that will come. 8666e8505fSJulian Elischer */ 8766e8505fSJulian Elischer u_int rt_add_addr_allfibs = 1; 8866e8505fSJulian Elischer SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, 8966e8505fSJulian Elischer &rt_add_addr_allfibs, 0, ""); 9066e8505fSJulian Elischer TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); 9166e8505fSJulian Elischer 9244e33a07SMarko Zec #ifdef VIMAGE_GLOBALS 93f708ef1bSPoul-Henning Kamp static struct rtstat rtstat; 94c2c2a7c1SBjoern A. Zeeb struct radix_node_head *rt_tables; 9528f8db14SBruce Evans 96f708ef1bSPoul-Henning Kamp static int rttrash; /* routes not in table but not freed */ 9744e33a07SMarko Zec #endif 98df8bae1dSRodney W. Grimes 99929ddbbbSAlfred Perlstein static void rt_maskedcopy(struct sockaddr *, 100929ddbbbSAlfred Perlstein struct sockaddr *, struct sockaddr *); 1011ed81b73SMarko Zec static int vnet_route_iattach(const void *); 102f708ef1bSPoul-Henning Kamp 103bfe1aba4SMarko Zec #ifndef VIMAGE_GLOBALS 104bfe1aba4SMarko Zec static const vnet_modinfo_t vnet_rtable_modinfo = { 105bfe1aba4SMarko Zec .vmi_id = VNET_MOD_RTABLE, 106bfe1aba4SMarko Zec .vmi_name = "rtable", 107bfe1aba4SMarko Zec .vmi_iattach = vnet_route_iattach 108bfe1aba4SMarko Zec }; 109bfe1aba4SMarko Zec #endif /* !VIMAGE_GLOBALS */ 110bfe1aba4SMarko Zec 111d6941ce9SLuigi Rizzo /* compare two sockaddr structures */ 112d6941ce9SLuigi Rizzo #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) 113d6941ce9SLuigi Rizzo 114d6941ce9SLuigi Rizzo /* 115d6941ce9SLuigi Rizzo * Convert a 'struct radix_node *' to a 'struct rtentry *'. 116d6941ce9SLuigi Rizzo * The operation can be done safely (in this code) because a 117d6941ce9SLuigi Rizzo * 'struct rtentry' starts with two 'struct radix_node''s, the first 118d6941ce9SLuigi Rizzo * one representing leaf nodes in the routing tree, which is 119d6941ce9SLuigi Rizzo * what the code in radix.c passes us as a 'struct radix_node'. 120d6941ce9SLuigi Rizzo * 121d6941ce9SLuigi Rizzo * But because there are a lot of assumptions in this conversion, 122d6941ce9SLuigi Rizzo * do not cast explicitly, but always use the macro below. 123d6941ce9SLuigi Rizzo */ 124d6941ce9SLuigi Rizzo #define RNTORT(p) ((struct rtentry *)(p)) 125d6941ce9SLuigi Rizzo 1261ed81b73SMarko Zec #ifdef VIMAGE_GLOBALS 1278b07e49aSJulian Elischer static uma_zone_t rtzone; /* Routing table UMA zone. */ 1281ed81b73SMarko Zec #endif 1298b07e49aSJulian Elischer 1308b07e49aSJulian Elischer #if 0 1318b07e49aSJulian Elischer /* default fib for tunnels to use */ 1328b07e49aSJulian Elischer u_int tunnel_fib = 0; 1338b07e49aSJulian Elischer SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, ""); 1348b07e49aSJulian Elischer #endif 1358b07e49aSJulian Elischer 1368b07e49aSJulian Elischer /* 1378b07e49aSJulian Elischer * handler for net.my_fibnum 1388b07e49aSJulian Elischer */ 1398b07e49aSJulian Elischer static int 1408b07e49aSJulian Elischer sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 141df8bae1dSRodney W. Grimes { 1428b07e49aSJulian Elischer int fibnum; 1438b07e49aSJulian Elischer int error; 1448b07e49aSJulian Elischer 1458b07e49aSJulian Elischer fibnum = curthread->td_proc->p_fibnum; 1468b07e49aSJulian Elischer error = sysctl_handle_int(oidp, &fibnum, 0, req); 1478b07e49aSJulian Elischer return (error); 148df8bae1dSRodney W. Grimes } 149df8bae1dSRodney W. Grimes 1508b07e49aSJulian Elischer SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, 1518b07e49aSJulian Elischer NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); 1522dc1d581SAndre Oppermann 153c2c2a7c1SBjoern A. Zeeb static __inline struct radix_node_head ** 154c2c2a7c1SBjoern A. Zeeb rt_tables_get_rnh_ptr(int table, int fam) 155c2c2a7c1SBjoern A. Zeeb { 156c2c2a7c1SBjoern A. Zeeb INIT_VNET_NET(curvnet); 157c2c2a7c1SBjoern A. Zeeb struct radix_node_head **rnh; 158c2c2a7c1SBjoern A. Zeeb 159c2c2a7c1SBjoern A. Zeeb KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.", 160c2c2a7c1SBjoern A. Zeeb __func__)); 161c2c2a7c1SBjoern A. Zeeb KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.", 162c2c2a7c1SBjoern A. Zeeb __func__)); 163c2c2a7c1SBjoern A. Zeeb 164c2c2a7c1SBjoern A. Zeeb /* rnh is [fib=0][af=0]. */ 165c2c2a7c1SBjoern A. Zeeb rnh = (struct radix_node_head **)V_rt_tables; 166c2c2a7c1SBjoern A. Zeeb /* Get the offset to the requested table and fam. */ 167c2c2a7c1SBjoern A. Zeeb rnh += table * (AF_MAX+1) + fam; 168c2c2a7c1SBjoern A. Zeeb 169c2c2a7c1SBjoern A. Zeeb return (rnh); 170c2c2a7c1SBjoern A. Zeeb } 171c2c2a7c1SBjoern A. Zeeb 172c2c2a7c1SBjoern A. Zeeb struct radix_node_head * 173c2c2a7c1SBjoern A. Zeeb rt_tables_get_rnh(int table, int fam) 174c2c2a7c1SBjoern A. Zeeb { 175c2c2a7c1SBjoern A. Zeeb 176c2c2a7c1SBjoern A. Zeeb return (*rt_tables_get_rnh_ptr(table, fam)); 177c2c2a7c1SBjoern A. Zeeb } 178c2c2a7c1SBjoern A. Zeeb 1792eb5613fSLuigi Rizzo static void 1802eb5613fSLuigi Rizzo route_init(void) 181df8bae1dSRodney W. Grimes { 1828b07e49aSJulian Elischer 1836f95a5ebSJulian Elischer /* whack the tunable ints into line. */ 1848b07e49aSJulian Elischer if (rt_numfibs > RT_MAXFIBS) 1858b07e49aSJulian Elischer rt_numfibs = RT_MAXFIBS; 1868b07e49aSJulian Elischer if (rt_numfibs == 0) 1878b07e49aSJulian Elischer rt_numfibs = 1; 188df8bae1dSRodney W. Grimes rn_init(); /* initialize all zeroes, all ones, mask table */ 1898b07e49aSJulian Elischer 190bfe1aba4SMarko Zec #ifndef VIMAGE_GLOBALS 191bfe1aba4SMarko Zec vnet_mod_register(&vnet_rtable_modinfo); 192bfe1aba4SMarko Zec #else 1931ed81b73SMarko Zec vnet_route_iattach(NULL); 194bfe1aba4SMarko Zec #endif 1951ed81b73SMarko Zec } 1961ed81b73SMarko Zec 1971ed81b73SMarko Zec static int vnet_route_iattach(const void *unused __unused) 1981ed81b73SMarko Zec { 199093f25f8SMarko Zec INIT_VNET_NET(curvnet); 2001ed81b73SMarko Zec struct domain *dom; 201c2c2a7c1SBjoern A. Zeeb struct radix_node_head **rnh; 202c2c2a7c1SBjoern A. Zeeb int table; 2031ed81b73SMarko Zec int fam; 2041ed81b73SMarko Zec 205c2c2a7c1SBjoern A. Zeeb V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) * 206c2c2a7c1SBjoern A. Zeeb sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO); 207c2c2a7c1SBjoern A. Zeeb 2081ed81b73SMarko Zec V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, 2091ed81b73SMarko Zec NULL, NULL, UMA_ALIGN_PTR, 0); 2108b07e49aSJulian Elischer for (dom = domains; dom; dom = dom->dom_next) { 2118b07e49aSJulian Elischer if (dom->dom_rtattach) { 2128b07e49aSJulian Elischer for (table = 0; table < rt_numfibs; table++) { 2138b07e49aSJulian Elischer if ( (fam = dom->dom_family) == AF_INET || 2148b07e49aSJulian Elischer table == 0) { 2158b07e49aSJulian Elischer /* for now only AF_INET has > 1 table */ 2168b07e49aSJulian Elischer /* XXX MRT 2178b07e49aSJulian Elischer * rtattach will be also called 2188b07e49aSJulian Elischer * from vfs_export.c but the 2198b07e49aSJulian Elischer * offset will be 0 2208b07e49aSJulian Elischer * (only for AF_INET and AF_INET6 2218b07e49aSJulian Elischer * which don't need it anyhow) 2228b07e49aSJulian Elischer */ 223c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh_ptr(table, fam); 224c2c2a7c1SBjoern A. Zeeb if (rnh == NULL) 225c2c2a7c1SBjoern A. Zeeb panic("%s: rnh NULL", __func__); 226c2c2a7c1SBjoern A. Zeeb dom->dom_rtattach((void **)rnh, 2278b07e49aSJulian Elischer dom->dom_rtoffset); 2288b07e49aSJulian Elischer } else { 2298b07e49aSJulian Elischer break; 2308b07e49aSJulian Elischer } 2318b07e49aSJulian Elischer } 2328b07e49aSJulian Elischer } 2338b07e49aSJulian Elischer } 2341ed81b73SMarko Zec 2351ed81b73SMarko Zec return (0); 2368b07e49aSJulian Elischer } 2378b07e49aSJulian Elischer 2388b07e49aSJulian Elischer #ifndef _SYS_SYSPROTO_H_ 2398b07e49aSJulian Elischer struct setfib_args { 2408b07e49aSJulian Elischer int fibnum; 2418b07e49aSJulian Elischer }; 2428b07e49aSJulian Elischer #endif 2438b07e49aSJulian Elischer int 2448b07e49aSJulian Elischer setfib(struct thread *td, struct setfib_args *uap) 2458b07e49aSJulian Elischer { 2468b07e49aSJulian Elischer if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) 2478b07e49aSJulian Elischer return EINVAL; 2488b07e49aSJulian Elischer td->td_proc->p_fibnum = uap->fibnum; 2498b07e49aSJulian Elischer return (0); 250df8bae1dSRodney W. Grimes } 251df8bae1dSRodney W. Grimes 252df8bae1dSRodney W. Grimes /* 253df8bae1dSRodney W. Grimes * Packet routing routines. 254df8bae1dSRodney W. Grimes */ 255df8bae1dSRodney W. Grimes void 256d1dd20beSSam Leffler rtalloc(struct route *ro) 257df8bae1dSRodney W. Grimes { 2588b07e49aSJulian Elischer rtalloc_ign_fib(ro, 0UL, 0); 2598b07e49aSJulian Elischer } 2608b07e49aSJulian Elischer 2618b07e49aSJulian Elischer void 2628b07e49aSJulian Elischer rtalloc_fib(struct route *ro, u_int fibnum) 2638b07e49aSJulian Elischer { 2648b07e49aSJulian Elischer rtalloc_ign_fib(ro, 0UL, fibnum); 265df8bae1dSRodney W. Grimes } 266df8bae1dSRodney W. Grimes 267652082e6SGarrett Wollman void 268d1dd20beSSam Leffler rtalloc_ign(struct route *ro, u_long ignore) 269652082e6SGarrett Wollman { 27068f956b8SJohn Polstra struct rtentry *rt; 27168f956b8SJohn Polstra 27268f956b8SJohn Polstra if ((rt = ro->ro_rt) != NULL) { 27368f956b8SJohn Polstra if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 27468f956b8SJohn Polstra return; 27568f956b8SJohn Polstra RTFREE(rt); 27666810dd0SYoshinobu Inoue ro->ro_rt = NULL; 27768f956b8SJohn Polstra } 2788b07e49aSJulian Elischer ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0); 2798b07e49aSJulian Elischer if (ro->ro_rt) 2808b07e49aSJulian Elischer RT_UNLOCK(ro->ro_rt); 2818b07e49aSJulian Elischer } 2828b07e49aSJulian Elischer 2838b07e49aSJulian Elischer void 2848b07e49aSJulian Elischer rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) 2858b07e49aSJulian Elischer { 2868b07e49aSJulian Elischer struct rtentry *rt; 2878b07e49aSJulian Elischer 2888b07e49aSJulian Elischer if ((rt = ro->ro_rt) != NULL) { 2898b07e49aSJulian Elischer if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 2908b07e49aSJulian Elischer return; 2918b07e49aSJulian Elischer RTFREE(rt); 2928b07e49aSJulian Elischer ro->ro_rt = NULL; 2938b07e49aSJulian Elischer } 2948b07e49aSJulian Elischer ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); 295d1dd20beSSam Leffler if (ro->ro_rt) 296d1dd20beSSam Leffler RT_UNLOCK(ro->ro_rt); 297652082e6SGarrett Wollman } 298652082e6SGarrett Wollman 299b0a76b88SJulian Elischer /* 300b0a76b88SJulian Elischer * Look up the route that matches the address given 301b0a76b88SJulian Elischer * Or, at least try.. Create a cloned route if needed. 302d1dd20beSSam Leffler * 303d1dd20beSSam Leffler * The returned route, if any, is locked. 304b0a76b88SJulian Elischer */ 305df8bae1dSRodney W. Grimes struct rtentry * 306d1dd20beSSam Leffler rtalloc1(struct sockaddr *dst, int report, u_long ignflags) 307df8bae1dSRodney W. Grimes { 3088b07e49aSJulian Elischer return (rtalloc1_fib(dst, report, ignflags, 0)); 3098b07e49aSJulian Elischer } 3108b07e49aSJulian Elischer 3118b07e49aSJulian Elischer struct rtentry * 3128b07e49aSJulian Elischer rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, 3138b07e49aSJulian Elischer u_int fibnum) 3148b07e49aSJulian Elischer { 3158b615593SMarko Zec INIT_VNET_NET(curvnet); 3168b07e49aSJulian Elischer struct radix_node_head *rnh; 317d1dd20beSSam Leffler struct rtentry *rt; 318d1dd20beSSam Leffler struct radix_node *rn; 319d1dd20beSSam Leffler struct rtentry *newrt; 320df8bae1dSRodney W. Grimes struct rt_addrinfo info; 3216e6b3f7cSQing Li int err = 0, msgtype = RTM_MISS; 3223120b9d4SKip Macy int needlock; 323df8bae1dSRodney W. Grimes 3248b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); 3258b07e49aSJulian Elischer if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ 3268b07e49aSJulian Elischer fibnum = 0; 327c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 32885911824SLuigi Rizzo newrt = NULL; 329b0a76b88SJulian Elischer /* 330b0a76b88SJulian Elischer * Look up the address in the table for that Address Family 331b0a76b88SJulian Elischer */ 332956b0b65SJeffrey Hsu if (rnh == NULL) { 333603724d3SBjoern A. Zeeb V_rtstat.rts_unreach++; 3346e6b3f7cSQing Li goto miss; 335956b0b65SJeffrey Hsu } 3363120b9d4SKip Macy needlock = !(ignflags & RTF_RNH_LOCKED); 3373120b9d4SKip Macy if (needlock) 3383120b9d4SKip Macy RADIX_NODE_HEAD_RLOCK(rnh); 3393120b9d4SKip Macy #ifdef INVARIANTS 3403120b9d4SKip Macy else 3413120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 3423120b9d4SKip Macy #endif 3433120b9d4SKip Macy rn = rnh->rnh_matchaddr(dst, rnh); 3443120b9d4SKip Macy if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 345d6941ce9SLuigi Rizzo newrt = rt = RNTORT(rn); 3463120b9d4SKip Macy RT_LOCK(newrt); 3473120b9d4SKip Macy RT_ADDREF(newrt); 3483120b9d4SKip Macy if (needlock) 3493120b9d4SKip Macy RADIX_NODE_HEAD_RUNLOCK(rnh); 3503120b9d4SKip Macy goto done; 3516e6b3f7cSQing Li 3526e6b3f7cSQing Li } else if (needlock) 3533120b9d4SKip Macy RADIX_NODE_HEAD_RUNLOCK(rnh); 3543120b9d4SKip Macy 355b0a76b88SJulian Elischer /* 356b0a76b88SJulian Elischer * Either we hit the root or couldn't find any match, 357b0a76b88SJulian Elischer * Which basically means 358b0a76b88SJulian Elischer * "caint get there frm here" 359b0a76b88SJulian Elischer */ 360603724d3SBjoern A. Zeeb V_rtstat.rts_unreach++; 361956b0b65SJeffrey Hsu miss: 3626e6b3f7cSQing Li if (report) { 363b0a76b88SJulian Elischer /* 364b0a76b88SJulian Elischer * If required, report the failure to the supervising 365b0a76b88SJulian Elischer * Authorities. 366b0a76b88SJulian Elischer * For a delete, this is not an error. (report == 0) 367b0a76b88SJulian Elischer */ 3686f5967c0SBruce Evans bzero(&info, sizeof(info)); 369df8bae1dSRodney W. Grimes info.rti_info[RTAX_DST] = dst; 370df8bae1dSRodney W. Grimes rt_missmsg(msgtype, &info, 0, err); 371df8bae1dSRodney W. Grimes } 3723120b9d4SKip Macy done: 373d1dd20beSSam Leffler if (newrt) 374d1dd20beSSam Leffler RT_LOCK_ASSERT(newrt); 375df8bae1dSRodney W. Grimes return (newrt); 376df8bae1dSRodney W. Grimes } 377df8bae1dSRodney W. Grimes 378499676dfSJulian Elischer /* 379499676dfSJulian Elischer * Remove a reference count from an rtentry. 380499676dfSJulian Elischer * If the count gets low enough, take it out of the routing table 381499676dfSJulian Elischer */ 382df8bae1dSRodney W. Grimes void 383d1dd20beSSam Leffler rtfree(struct rtentry *rt) 384df8bae1dSRodney W. Grimes { 3858b615593SMarko Zec INIT_VNET_NET(curvnet); 38685911824SLuigi Rizzo struct radix_node_head *rnh; 387df8bae1dSRodney W. Grimes 388a0c0e34bSGleb Smirnoff KASSERT(rt != NULL,("%s: NULL rt", __func__)); 389c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 390a0c0e34bSGleb Smirnoff KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); 391499676dfSJulian Elischer 392d1dd20beSSam Leffler RT_LOCK_ASSERT(rt); 393d1dd20beSSam Leffler 394499676dfSJulian Elischer /* 395a0c0e34bSGleb Smirnoff * The callers should use RTFREE_LOCKED() or RTFREE(), so 396a0c0e34bSGleb Smirnoff * we should come here exactly with the last reference. 397499676dfSJulian Elischer */ 3987138d65cSSam Leffler RT_REMREF(rt); 399a0c0e34bSGleb Smirnoff if (rt->rt_refcnt > 0) { 400a42ea597SQing Li log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); 401d1dd20beSSam Leffler goto done; 402a0c0e34bSGleb Smirnoff } 4039c63e9dbSSam Leffler 4049c63e9dbSSam Leffler /* 4059c63e9dbSSam Leffler * On last reference give the "close method" a chance 4069c63e9dbSSam Leffler * to cleanup private state. This also permits (for 4079c63e9dbSSam Leffler * IPv4 and IPv6) a chance to decide if the routing table 4089c63e9dbSSam Leffler * entry should be purged immediately or at a later time. 4099c63e9dbSSam Leffler * When an immediate purge is to happen the close routine 4109c63e9dbSSam Leffler * typically calls rtexpunge which clears the RTF_UP flag 4119c63e9dbSSam Leffler * on the entry so that the code below reclaims the storage. 4129c63e9dbSSam Leffler */ 413d1dd20beSSam Leffler if (rt->rt_refcnt == 0 && rnh->rnh_close) 4145c2dae8eSGarrett Wollman rnh->rnh_close((struct radix_node *)rt, rnh); 415499676dfSJulian Elischer 416499676dfSJulian Elischer /* 417499676dfSJulian Elischer * If we are no longer "up" (and ref == 0) 418499676dfSJulian Elischer * then we can free the resources associated 419499676dfSJulian Elischer * with the route. 420499676dfSJulian Elischer */ 421d1dd20beSSam Leffler if ((rt->rt_flags & RTF_UP) == 0) { 422df8bae1dSRodney W. Grimes if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 423df8bae1dSRodney W. Grimes panic("rtfree 2"); 424499676dfSJulian Elischer /* 425499676dfSJulian Elischer * the rtentry must have been removed from the routing table 426499676dfSJulian Elischer * so it is represented in rttrash.. remove that now. 427499676dfSJulian Elischer */ 428603724d3SBjoern A. Zeeb V_rttrash--; 429499676dfSJulian Elischer #ifdef DIAGNOSTIC 430df8bae1dSRodney W. Grimes if (rt->rt_refcnt < 0) { 431623ae52eSPoul-Henning Kamp printf("rtfree: %p not freed (neg refs)\n", rt); 432d1dd20beSSam Leffler goto done; 433df8bae1dSRodney W. Grimes } 434499676dfSJulian Elischer #endif 435499676dfSJulian Elischer /* 436499676dfSJulian Elischer * release references on items we hold them on.. 437499676dfSJulian Elischer * e.g other routes and ifaddrs. 438499676dfSJulian Elischer */ 43919fc74fbSJeffrey Hsu if (rt->rt_ifa) 44019fc74fbSJeffrey Hsu IFAFREE(rt->rt_ifa); 441499676dfSJulian Elischer /* 442499676dfSJulian Elischer * The key is separatly alloc'd so free it (see rt_setgate()). 443499676dfSJulian Elischer * This also frees the gateway, as they are always malloc'd 444499676dfSJulian Elischer * together. 445499676dfSJulian Elischer */ 446df8bae1dSRodney W. Grimes Free(rt_key(rt)); 447499676dfSJulian Elischer 448499676dfSJulian Elischer /* 449499676dfSJulian Elischer * and the rtentry itself of course 450499676dfSJulian Elischer */ 451d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 4521ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 453d1dd20beSSam Leffler return; 454df8bae1dSRodney W. Grimes } 455d1dd20beSSam Leffler done: 456d1dd20beSSam Leffler RT_UNLOCK(rt); 457df8bae1dSRodney W. Grimes } 458df8bae1dSRodney W. Grimes 459df8bae1dSRodney W. Grimes 460df8bae1dSRodney W. Grimes /* 461df8bae1dSRodney W. Grimes * Force a routing table entry to the specified 462df8bae1dSRodney W. Grimes * destination to go through the given gateway. 463df8bae1dSRodney W. Grimes * Normally called as a result of a routing redirect 464df8bae1dSRodney W. Grimes * message from the network layer. 465df8bae1dSRodney W. Grimes */ 46626f9a767SRodney W. Grimes void 467d1dd20beSSam Leffler rtredirect(struct sockaddr *dst, 468d1dd20beSSam Leffler struct sockaddr *gateway, 469d1dd20beSSam Leffler struct sockaddr *netmask, 470d1dd20beSSam Leffler int flags, 471d1dd20beSSam Leffler struct sockaddr *src) 472df8bae1dSRodney W. Grimes { 4738b07e49aSJulian Elischer rtredirect_fib(dst, gateway, netmask, flags, src, 0); 4748b07e49aSJulian Elischer } 4758b07e49aSJulian Elischer 4768b07e49aSJulian Elischer void 4778b07e49aSJulian Elischer rtredirect_fib(struct sockaddr *dst, 4788b07e49aSJulian Elischer struct sockaddr *gateway, 4798b07e49aSJulian Elischer struct sockaddr *netmask, 4808b07e49aSJulian Elischer int flags, 4818b07e49aSJulian Elischer struct sockaddr *src, 4828b07e49aSJulian Elischer u_int fibnum) 4838b07e49aSJulian Elischer { 4848b615593SMarko Zec INIT_VNET_NET(curvnet); 4858e7e854cSKip Macy struct rtentry *rt, *rt0 = NULL; 486df8bae1dSRodney W. Grimes int error = 0; 48785911824SLuigi Rizzo short *stat = NULL; 488df8bae1dSRodney W. Grimes struct rt_addrinfo info; 489df8bae1dSRodney W. Grimes struct ifaddr *ifa; 490c2c2a7c1SBjoern A. Zeeb struct radix_node_head *rnh; 491c2c2a7c1SBjoern A. Zeeb 492c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 493c2c2a7c1SBjoern A. Zeeb if (rnh == NULL) { 494c2c2a7c1SBjoern A. Zeeb error = EAFNOSUPPORT; 495c2c2a7c1SBjoern A. Zeeb goto out; 496c2c2a7c1SBjoern A. Zeeb } 497df8bae1dSRodney W. Grimes 498df8bae1dSRodney W. Grimes /* verify the gateway is directly reachable */ 49985911824SLuigi Rizzo if ((ifa = ifa_ifwithnet(gateway)) == NULL) { 500df8bae1dSRodney W. Grimes error = ENETUNREACH; 501df8bae1dSRodney W. Grimes goto out; 502df8bae1dSRodney W. Grimes } 5038b07e49aSJulian Elischer rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ 504df8bae1dSRodney W. Grimes /* 505df8bae1dSRodney W. Grimes * If the redirect isn't from our current router for this dst, 506df8bae1dSRodney W. Grimes * it's either old or wrong. If it redirects us to ourselves, 507df8bae1dSRodney W. Grimes * we have a routing loop, perhaps as a result of an interface 508df8bae1dSRodney W. Grimes * going down recently. 509df8bae1dSRodney W. Grimes */ 510df8bae1dSRodney W. Grimes if (!(flags & RTF_DONE) && rt && 511956b0b65SJeffrey Hsu (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) 512df8bae1dSRodney W. Grimes error = EINVAL; 513df8bae1dSRodney W. Grimes else if (ifa_ifwithaddr(gateway)) 514df8bae1dSRodney W. Grimes error = EHOSTUNREACH; 515df8bae1dSRodney W. Grimes if (error) 516df8bae1dSRodney W. Grimes goto done; 517df8bae1dSRodney W. Grimes /* 518df8bae1dSRodney W. Grimes * Create a new entry if we just got back a wildcard entry 519df8bae1dSRodney W. Grimes * or the the lookup failed. This is necessary for hosts 520df8bae1dSRodney W. Grimes * which use routing redirects generated by smart gateways 521df8bae1dSRodney W. Grimes * to dynamically build the routing tables. 522df8bae1dSRodney W. Grimes */ 52385911824SLuigi Rizzo if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 524df8bae1dSRodney W. Grimes goto create; 525df8bae1dSRodney W. Grimes /* 526df8bae1dSRodney W. Grimes * Don't listen to the redirect if it's 527df8bae1dSRodney W. Grimes * for a route to an interface. 528df8bae1dSRodney W. Grimes */ 529df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) { 530df8bae1dSRodney W. Grimes if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 531df8bae1dSRodney W. Grimes /* 532df8bae1dSRodney W. Grimes * Changing from route to net => route to host. 533df8bae1dSRodney W. Grimes * Create new route, rather than smashing route to net. 534df8bae1dSRodney W. Grimes */ 535df8bae1dSRodney W. Grimes create: 5368e7e854cSKip Macy rt0 = rt; 5378e7e854cSKip Macy rt = NULL; 5388e7e854cSKip Macy 539df8bae1dSRodney W. Grimes flags |= RTF_GATEWAY | RTF_DYNAMIC; 5408071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 5418071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 5428071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = gateway; 5438071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 5448071913dSRuslan Ermilov info.rti_ifa = ifa; 5458071913dSRuslan Ermilov info.rti_flags = flags; 5463120b9d4SKip Macy if (rt0 != NULL) 5473120b9d4SKip Macy RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ 5488b07e49aSJulian Elischer error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); 549d1dd20beSSam Leffler if (rt != NULL) { 5504de5d90cSSam Leffler RT_LOCK(rt); 5513120b9d4SKip Macy if (rt0 != NULL) 55229910a5aSKip Macy EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); 5538071913dSRuslan Ermilov flags = rt->rt_flags; 554d1dd20beSSam Leffler } 5553120b9d4SKip Macy if (rt0 != NULL) 5563120b9d4SKip Macy RTFREE(rt0); 5578e7e854cSKip Macy 558603724d3SBjoern A. Zeeb stat = &V_rtstat.rts_dynamic; 559df8bae1dSRodney W. Grimes } else { 5608e7e854cSKip Macy struct rtentry *gwrt; 5618e7e854cSKip Macy 562df8bae1dSRodney W. Grimes /* 563df8bae1dSRodney W. Grimes * Smash the current notion of the gateway to 564df8bae1dSRodney W. Grimes * this destination. Should check about netmask!!! 565df8bae1dSRodney W. Grimes */ 566df8bae1dSRodney W. Grimes rt->rt_flags |= RTF_MODIFIED; 567df8bae1dSRodney W. Grimes flags |= RTF_MODIFIED; 568603724d3SBjoern A. Zeeb stat = &V_rtstat.rts_newgateway; 569499676dfSJulian Elischer /* 570499676dfSJulian Elischer * add the key and gateway (in one malloc'd chunk). 571499676dfSJulian Elischer */ 5723120b9d4SKip Macy RT_UNLOCK(rt); 5733120b9d4SKip Macy RADIX_NODE_HEAD_LOCK(rnh); 5743120b9d4SKip Macy RT_LOCK(rt); 575df8bae1dSRodney W. Grimes rt_setgate(rt, rt_key(rt), gateway); 5763120b9d4SKip Macy gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); 5773120b9d4SKip Macy RADIX_NODE_HEAD_UNLOCK(rnh); 57829910a5aSKip Macy EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); 5798e7e854cSKip Macy RTFREE_LOCKED(gwrt); 580df8bae1dSRodney W. Grimes } 581df8bae1dSRodney W. Grimes } else 582df8bae1dSRodney W. Grimes error = EHOSTUNREACH; 583df8bae1dSRodney W. Grimes done: 584d1dd20beSSam Leffler if (rt) 5851951e633SJohn Baldwin RTFREE_LOCKED(rt); 586df8bae1dSRodney W. Grimes out: 587df8bae1dSRodney W. Grimes if (error) 588603724d3SBjoern A. Zeeb V_rtstat.rts_badredirect++; 589df8bae1dSRodney W. Grimes else if (stat != NULL) 590df8bae1dSRodney W. Grimes (*stat)++; 591df8bae1dSRodney W. Grimes bzero((caddr_t)&info, sizeof(info)); 592df8bae1dSRodney W. Grimes info.rti_info[RTAX_DST] = dst; 593df8bae1dSRodney W. Grimes info.rti_info[RTAX_GATEWAY] = gateway; 594df8bae1dSRodney W. Grimes info.rti_info[RTAX_NETMASK] = netmask; 595df8bae1dSRodney W. Grimes info.rti_info[RTAX_AUTHOR] = src; 596df8bae1dSRodney W. Grimes rt_missmsg(RTM_REDIRECT, &info, flags, error); 597df8bae1dSRodney W. Grimes } 598df8bae1dSRodney W. Grimes 5998b07e49aSJulian Elischer int 6008b07e49aSJulian Elischer rtioctl(u_long req, caddr_t data) 6018b07e49aSJulian Elischer { 6028b07e49aSJulian Elischer return (rtioctl_fib(req, data, 0)); 6038b07e49aSJulian Elischer } 6048b07e49aSJulian Elischer 605df8bae1dSRodney W. Grimes /* 606df8bae1dSRodney W. Grimes * Routing table ioctl interface. 607df8bae1dSRodney W. Grimes */ 608df8bae1dSRodney W. Grimes int 6098b07e49aSJulian Elischer rtioctl_fib(u_long req, caddr_t data, u_int fibnum) 610df8bae1dSRodney W. Grimes { 6115090559bSChristian S.J. Peron 6125090559bSChristian S.J. Peron /* 6135090559bSChristian S.J. Peron * If more ioctl commands are added here, make sure the proper 6145090559bSChristian S.J. Peron * super-user checks are being performed because it is possible for 6155090559bSChristian S.J. Peron * prison-root to make it this far if raw sockets have been enabled 6165090559bSChristian S.J. Peron * in jails. 6175090559bSChristian S.J. Peron */ 618623ae52eSPoul-Henning Kamp #ifdef INET 619f0068c4aSGarrett Wollman /* Multicast goop, grrr... */ 6208b07e49aSJulian Elischer return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; 621623ae52eSPoul-Henning Kamp #else /* INET */ 622623ae52eSPoul-Henning Kamp return ENXIO; 623623ae52eSPoul-Henning Kamp #endif /* INET */ 624df8bae1dSRodney W. Grimes } 625df8bae1dSRodney W. Grimes 626df8bae1dSRodney W. Grimes struct ifaddr * 627d1dd20beSSam Leffler ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) 628df8bae1dSRodney W. Grimes { 6298b07e49aSJulian Elischer return (ifa_ifwithroute_fib(flags, dst, gateway, 0)); 6308b07e49aSJulian Elischer } 6318b07e49aSJulian Elischer 6328b07e49aSJulian Elischer struct ifaddr * 6338b07e49aSJulian Elischer ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, 6348b07e49aSJulian Elischer u_int fibnum) 6358b07e49aSJulian Elischer { 636df8bae1dSRodney W. Grimes register struct ifaddr *ifa; 637e034e82cSQing Li int not_found = 0; 638d1dd20beSSam Leffler 639df8bae1dSRodney W. Grimes if ((flags & RTF_GATEWAY) == 0) { 640df8bae1dSRodney W. Grimes /* 641df8bae1dSRodney W. Grimes * If we are adding a route to an interface, 642df8bae1dSRodney W. Grimes * and the interface is a pt to pt link 643df8bae1dSRodney W. Grimes * we should search for the destination 644df8bae1dSRodney W. Grimes * as our clue to the interface. Otherwise 645df8bae1dSRodney W. Grimes * we can use the local address. 646df8bae1dSRodney W. Grimes */ 64785911824SLuigi Rizzo ifa = NULL; 64885911824SLuigi Rizzo if (flags & RTF_HOST) 649df8bae1dSRodney W. Grimes ifa = ifa_ifwithdstaddr(dst); 65085911824SLuigi Rizzo if (ifa == NULL) 651df8bae1dSRodney W. Grimes ifa = ifa_ifwithaddr(gateway); 652df8bae1dSRodney W. Grimes } else { 653df8bae1dSRodney W. Grimes /* 654df8bae1dSRodney W. Grimes * If we are adding a route to a remote net 655df8bae1dSRodney W. Grimes * or host, the gateway may still be on the 656df8bae1dSRodney W. Grimes * other end of a pt to pt link. 657df8bae1dSRodney W. Grimes */ 658df8bae1dSRodney W. Grimes ifa = ifa_ifwithdstaddr(gateway); 659df8bae1dSRodney W. Grimes } 66085911824SLuigi Rizzo if (ifa == NULL) 661df8bae1dSRodney W. Grimes ifa = ifa_ifwithnet(gateway); 66285911824SLuigi Rizzo if (ifa == NULL) { 6639b20205dSKip Macy struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); 66485911824SLuigi Rizzo if (rt == NULL) 66585911824SLuigi Rizzo return (NULL); 666e034e82cSQing Li /* 667e034e82cSQing Li * dismiss a gateway that is reachable only 668e034e82cSQing Li * through the default router 669e034e82cSQing Li */ 670e034e82cSQing Li switch (gateway->sa_family) { 671e034e82cSQing Li case AF_INET: 672e034e82cSQing Li if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) 673e034e82cSQing Li not_found = 1; 674e034e82cSQing Li break; 675e034e82cSQing Li case AF_INET6: 676e034e82cSQing Li if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) 677e034e82cSQing Li not_found = 1; 678e034e82cSQing Li break; 679e034e82cSQing Li default: 680e034e82cSQing Li break; 681e034e82cSQing Li } 6827138d65cSSam Leffler RT_REMREF(rt); 683d1dd20beSSam Leffler RT_UNLOCK(rt); 684e034e82cSQing Li if (not_found) 685e034e82cSQing Li return (NULL); 68685911824SLuigi Rizzo if ((ifa = rt->rt_ifa) == NULL) 68785911824SLuigi Rizzo return (NULL); 688df8bae1dSRodney W. Grimes } 689df8bae1dSRodney W. Grimes if (ifa->ifa_addr->sa_family != dst->sa_family) { 690df8bae1dSRodney W. Grimes struct ifaddr *oifa = ifa; 691df8bae1dSRodney W. Grimes ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 69285911824SLuigi Rizzo if (ifa == NULL) 693df8bae1dSRodney W. Grimes ifa = oifa; 694df8bae1dSRodney W. Grimes } 695df8bae1dSRodney W. Grimes return (ifa); 696df8bae1dSRodney W. Grimes } 697df8bae1dSRodney W. Grimes 698b0a76b88SJulian Elischer /* 699b0a76b88SJulian Elischer * Do appropriate manipulations of a routing tree given 700b0a76b88SJulian Elischer * all the bits of info needed 701b0a76b88SJulian Elischer */ 702df8bae1dSRodney W. Grimes int 703d1dd20beSSam Leffler rtrequest(int req, 704d1dd20beSSam Leffler struct sockaddr *dst, 705d1dd20beSSam Leffler struct sockaddr *gateway, 706d1dd20beSSam Leffler struct sockaddr *netmask, 707d1dd20beSSam Leffler int flags, 708d1dd20beSSam Leffler struct rtentry **ret_nrt) 709df8bae1dSRodney W. Grimes { 7108b07e49aSJulian Elischer return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0)); 7118b07e49aSJulian Elischer } 7128b07e49aSJulian Elischer 7138b07e49aSJulian Elischer int 7148b07e49aSJulian Elischer rtrequest_fib(int req, 7158b07e49aSJulian Elischer struct sockaddr *dst, 7168b07e49aSJulian Elischer struct sockaddr *gateway, 7178b07e49aSJulian Elischer struct sockaddr *netmask, 7188b07e49aSJulian Elischer int flags, 7198b07e49aSJulian Elischer struct rtentry **ret_nrt, 7208b07e49aSJulian Elischer u_int fibnum) 7218b07e49aSJulian Elischer { 7228071913dSRuslan Ermilov struct rt_addrinfo info; 7238071913dSRuslan Ermilov 724ac4a76ebSBjoern A. Zeeb if (dst->sa_len == 0) 725ac4a76ebSBjoern A. Zeeb return(EINVAL); 726ac4a76ebSBjoern A. Zeeb 7278071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 7288071913dSRuslan Ermilov info.rti_flags = flags; 7298071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 7308071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = gateway; 7318071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 7328b07e49aSJulian Elischer return rtrequest1_fib(req, &info, ret_nrt, fibnum); 7338071913dSRuslan Ermilov } 7348071913dSRuslan Ermilov 7358071913dSRuslan Ermilov /* 7368071913dSRuslan Ermilov * These (questionable) definitions of apparent local variables apply 7378071913dSRuslan Ermilov * to the next two functions. XXXXXX!!! 7388071913dSRuslan Ermilov */ 7398071913dSRuslan Ermilov #define dst info->rti_info[RTAX_DST] 7408071913dSRuslan Ermilov #define gateway info->rti_info[RTAX_GATEWAY] 7418071913dSRuslan Ermilov #define netmask info->rti_info[RTAX_NETMASK] 7428071913dSRuslan Ermilov #define ifaaddr info->rti_info[RTAX_IFA] 7438071913dSRuslan Ermilov #define ifpaddr info->rti_info[RTAX_IFP] 7448071913dSRuslan Ermilov #define flags info->rti_flags 7458071913dSRuslan Ermilov 7468071913dSRuslan Ermilov int 747d1dd20beSSam Leffler rt_getifa(struct rt_addrinfo *info) 7488071913dSRuslan Ermilov { 7498b07e49aSJulian Elischer return (rt_getifa_fib(info, 0)); 7508b07e49aSJulian Elischer } 7518b07e49aSJulian Elischer 7528b07e49aSJulian Elischer int 7538b07e49aSJulian Elischer rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) 7548b07e49aSJulian Elischer { 7558071913dSRuslan Ermilov struct ifaddr *ifa; 7568071913dSRuslan Ermilov int error = 0; 7578071913dSRuslan Ermilov 7588071913dSRuslan Ermilov /* 7598071913dSRuslan Ermilov * ifp may be specified by sockaddr_dl 7608071913dSRuslan Ermilov * when protocol address is ambiguous. 7618071913dSRuslan Ermilov */ 7628071913dSRuslan Ermilov if (info->rti_ifp == NULL && ifpaddr != NULL && 7638071913dSRuslan Ermilov ifpaddr->sa_family == AF_LINK && 7648071913dSRuslan Ermilov (ifa = ifa_ifwithnet(ifpaddr)) != NULL) 7658071913dSRuslan Ermilov info->rti_ifp = ifa->ifa_ifp; 7668071913dSRuslan Ermilov if (info->rti_ifa == NULL && ifaaddr != NULL) 7678071913dSRuslan Ermilov info->rti_ifa = ifa_ifwithaddr(ifaaddr); 7688071913dSRuslan Ermilov if (info->rti_ifa == NULL) { 7698071913dSRuslan Ermilov struct sockaddr *sa; 7708071913dSRuslan Ermilov 7718071913dSRuslan Ermilov sa = ifaaddr != NULL ? ifaaddr : 7728071913dSRuslan Ermilov (gateway != NULL ? gateway : dst); 7738071913dSRuslan Ermilov if (sa != NULL && info->rti_ifp != NULL) 7748071913dSRuslan Ermilov info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); 7758071913dSRuslan Ermilov else if (dst != NULL && gateway != NULL) 7768b07e49aSJulian Elischer info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, 7778b07e49aSJulian Elischer fibnum); 7788071913dSRuslan Ermilov else if (sa != NULL) 7798b07e49aSJulian Elischer info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, 7808b07e49aSJulian Elischer fibnum); 7818071913dSRuslan Ermilov } 7828071913dSRuslan Ermilov if ((ifa = info->rti_ifa) != NULL) { 7838071913dSRuslan Ermilov if (info->rti_ifp == NULL) 7848071913dSRuslan Ermilov info->rti_ifp = ifa->ifa_ifp; 7858071913dSRuslan Ermilov } else 7868071913dSRuslan Ermilov error = ENETUNREACH; 7878071913dSRuslan Ermilov return (error); 7888071913dSRuslan Ermilov } 7898071913dSRuslan Ermilov 7909c63e9dbSSam Leffler /* 7919c63e9dbSSam Leffler * Expunges references to a route that's about to be reclaimed. 7929c63e9dbSSam Leffler * The route must be locked. 7939c63e9dbSSam Leffler */ 7949c63e9dbSSam Leffler int 7959c63e9dbSSam Leffler rtexpunge(struct rtentry *rt) 7969c63e9dbSSam Leffler { 7978b615593SMarko Zec INIT_VNET_NET(curvnet); 7989c63e9dbSSam Leffler struct radix_node *rn; 7999c63e9dbSSam Leffler struct radix_node_head *rnh; 8009c63e9dbSSam Leffler struct ifaddr *ifa; 8019c63e9dbSSam Leffler int error = 0; 8029c63e9dbSSam Leffler 8036e6b3f7cSQing Li /* 8046e6b3f7cSQing Li * Find the correct routing tree to use for this Address Family 8056e6b3f7cSQing Li */ 806c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family); 8079c63e9dbSSam Leffler RT_LOCK_ASSERT(rt); 8086e6b3f7cSQing Li if (rnh == NULL) 8096e6b3f7cSQing Li return (EAFNOSUPPORT); 8103120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 8119c63e9dbSSam Leffler #if 0 8129c63e9dbSSam Leffler /* 8139c63e9dbSSam Leffler * We cannot assume anything about the reference count 8149c63e9dbSSam Leffler * because protocols call us in many situations; often 8159c63e9dbSSam Leffler * before unwinding references to the table entry. 8169c63e9dbSSam Leffler */ 8179c63e9dbSSam Leffler KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt)); 8189c63e9dbSSam Leffler #endif 8199c63e9dbSSam Leffler /* 8209c63e9dbSSam Leffler * Remove the item from the tree; it should be there, 8219c63e9dbSSam Leffler * but when callers invoke us blindly it may not (sigh). 8229c63e9dbSSam Leffler */ 8239c63e9dbSSam Leffler rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh); 82485911824SLuigi Rizzo if (rn == NULL) { 8259c63e9dbSSam Leffler error = ESRCH; 8269c63e9dbSSam Leffler goto bad; 8279c63e9dbSSam Leffler } 8289c63e9dbSSam Leffler KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0, 8299c63e9dbSSam Leffler ("unexpected flags 0x%x", rn->rn_flags)); 830d6941ce9SLuigi Rizzo KASSERT(rt == RNTORT(rn), 8319c63e9dbSSam Leffler ("lookup mismatch, rt %p rn %p", rt, rn)); 8329c63e9dbSSam Leffler 8339c63e9dbSSam Leffler rt->rt_flags &= ~RTF_UP; 8349c63e9dbSSam Leffler 8359c63e9dbSSam Leffler /* 8369c63e9dbSSam Leffler * Give the protocol a chance to keep things in sync. 8379c63e9dbSSam Leffler */ 8389c63e9dbSSam Leffler if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { 8399c63e9dbSSam Leffler struct rt_addrinfo info; 8409c63e9dbSSam Leffler 8419c63e9dbSSam Leffler bzero((caddr_t)&info, sizeof(info)); 8429c63e9dbSSam Leffler info.rti_flags = rt->rt_flags; 8439c63e9dbSSam Leffler info.rti_info[RTAX_DST] = rt_key(rt); 8449c63e9dbSSam Leffler info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 8459c63e9dbSSam Leffler info.rti_info[RTAX_NETMASK] = rt_mask(rt); 8469c63e9dbSSam Leffler ifa->ifa_rtrequest(RTM_DELETE, rt, &info); 8479c63e9dbSSam Leffler } 8489c63e9dbSSam Leffler 8499c63e9dbSSam Leffler /* 8509c63e9dbSSam Leffler * one more rtentry floating around that is not 8519c63e9dbSSam Leffler * linked to the routing table. 8529c63e9dbSSam Leffler */ 853603724d3SBjoern A. Zeeb V_rttrash++; 8549c63e9dbSSam Leffler bad: 8559c63e9dbSSam Leffler return (error); 8569c63e9dbSSam Leffler } 8579c63e9dbSSam Leffler 858427ac07fSKip Macy #ifdef RADIX_MPATH 859427ac07fSKip Macy static int 860427ac07fSKip Macy rn_mpath_update(int req, struct rt_addrinfo *info, 861427ac07fSKip Macy struct radix_node_head *rnh, struct rtentry **ret_nrt) 862427ac07fSKip Macy { 863427ac07fSKip Macy /* 864427ac07fSKip Macy * if we got multipath routes, we require users to specify 865427ac07fSKip Macy * a matching RTAX_GATEWAY. 866427ac07fSKip Macy */ 867427ac07fSKip Macy struct rtentry *rt, *rto = NULL; 868427ac07fSKip Macy register struct radix_node *rn; 869427ac07fSKip Macy int error = 0; 870427ac07fSKip Macy 871427ac07fSKip Macy rn = rnh->rnh_matchaddr(dst, rnh); 872427ac07fSKip Macy if (rn == NULL) 873427ac07fSKip Macy return (ESRCH); 874427ac07fSKip Macy rto = rt = RNTORT(rn); 875427ac07fSKip Macy rt = rt_mpath_matchgate(rt, gateway); 876427ac07fSKip Macy if (rt == NULL) 877427ac07fSKip Macy return (ESRCH); 878427ac07fSKip Macy /* 879427ac07fSKip Macy * this is the first entry in the chain 880427ac07fSKip Macy */ 881427ac07fSKip Macy if (rto == rt) { 882427ac07fSKip Macy rn = rn_mpath_next((struct radix_node *)rt); 883427ac07fSKip Macy /* 884427ac07fSKip Macy * there is another entry, now it's active 885427ac07fSKip Macy */ 886427ac07fSKip Macy if (rn) { 887427ac07fSKip Macy rto = RNTORT(rn); 888427ac07fSKip Macy RT_LOCK(rto); 889427ac07fSKip Macy rto->rt_flags |= RTF_UP; 890427ac07fSKip Macy RT_UNLOCK(rto); 891427ac07fSKip Macy } else if (rt->rt_flags & RTF_GATEWAY) { 892427ac07fSKip Macy /* 893427ac07fSKip Macy * For gateway routes, we need to 894427ac07fSKip Macy * make sure that we we are deleting 895427ac07fSKip Macy * the correct gateway. 896427ac07fSKip Macy * rt_mpath_matchgate() does not 897427ac07fSKip Macy * check the case when there is only 898427ac07fSKip Macy * one route in the chain. 899427ac07fSKip Macy */ 900427ac07fSKip Macy if (gateway && 901427ac07fSKip Macy (rt->rt_gateway->sa_len != gateway->sa_len || 902427ac07fSKip Macy memcmp(rt->rt_gateway, gateway, gateway->sa_len))) 903427ac07fSKip Macy error = ESRCH; 904427ac07fSKip Macy goto done; 905427ac07fSKip Macy } 906427ac07fSKip Macy /* 907427ac07fSKip Macy * use the normal delete code to remove 908427ac07fSKip Macy * the first entry 909427ac07fSKip Macy */ 910427ac07fSKip Macy if (req != RTM_DELETE) 911427ac07fSKip Macy goto nondelete; 912427ac07fSKip Macy 913427ac07fSKip Macy error = ENOENT; 914427ac07fSKip Macy goto done; 915427ac07fSKip Macy } 916427ac07fSKip Macy 917427ac07fSKip Macy /* 918427ac07fSKip Macy * if the entry is 2nd and on up 919427ac07fSKip Macy */ 920427ac07fSKip Macy if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt)) 921427ac07fSKip Macy panic ("rtrequest1: rt_mpath_deldup"); 922427ac07fSKip Macy RT_LOCK(rt); 923427ac07fSKip Macy RT_ADDREF(rt); 924427ac07fSKip Macy if (req == RTM_DELETE) { 925427ac07fSKip Macy rt->rt_flags &= ~RTF_UP; 926427ac07fSKip Macy /* 927427ac07fSKip Macy * One more rtentry floating around that is not 928427ac07fSKip Macy * linked to the routing table. rttrash will be decremented 929427ac07fSKip Macy * when RTFREE(rt) is eventually called. 930427ac07fSKip Macy */ 931427ac07fSKip Macy V_rttrash++; 932427ac07fSKip Macy 933427ac07fSKip Macy } 934427ac07fSKip Macy 935427ac07fSKip Macy nondelete: 936427ac07fSKip Macy if (req != RTM_DELETE) 937427ac07fSKip Macy panic("unrecognized request %d", req); 938427ac07fSKip Macy 939427ac07fSKip Macy 940427ac07fSKip Macy /* 941427ac07fSKip Macy * If the caller wants it, then it can have it, 942427ac07fSKip Macy * but it's up to it to free the rtentry as we won't be 943427ac07fSKip Macy * doing it. 944427ac07fSKip Macy */ 945427ac07fSKip Macy if (ret_nrt) { 946427ac07fSKip Macy *ret_nrt = rt; 947427ac07fSKip Macy RT_UNLOCK(rt); 948427ac07fSKip Macy } else 949427ac07fSKip Macy RTFREE_LOCKED(rt); 950427ac07fSKip Macy done: 951427ac07fSKip Macy return (error); 952427ac07fSKip Macy } 953427ac07fSKip Macy #endif 954427ac07fSKip Macy 9558071913dSRuslan Ermilov int 9568b07e49aSJulian Elischer rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, 9578b07e49aSJulian Elischer u_int fibnum) 9588b07e49aSJulian Elischer { 9598b615593SMarko Zec INIT_VNET_NET(curvnet); 9603120b9d4SKip Macy int error = 0, needlock = 0; 961df8bae1dSRodney W. Grimes register struct rtentry *rt; 962df8bae1dSRodney W. Grimes register struct radix_node *rn; 963df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 964df8bae1dSRodney W. Grimes struct ifaddr *ifa; 965df8bae1dSRodney W. Grimes struct sockaddr *ndst; 966df8bae1dSRodney W. Grimes #define senderr(x) { error = x ; goto bad; } 967df8bae1dSRodney W. Grimes 9688b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); 9698b07e49aSJulian Elischer if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ 9708b07e49aSJulian Elischer fibnum = 0; 971b0a76b88SJulian Elischer /* 972b0a76b88SJulian Elischer * Find the correct routing tree to use for this Address Family 973b0a76b88SJulian Elischer */ 974c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 97585911824SLuigi Rizzo if (rnh == NULL) 976983985c1SJeffrey Hsu return (EAFNOSUPPORT); 9773120b9d4SKip Macy needlock = ((flags & RTF_RNH_LOCKED) == 0); 9783120b9d4SKip Macy flags &= ~RTF_RNH_LOCKED; 9793120b9d4SKip Macy if (needlock) 980956b0b65SJeffrey Hsu RADIX_NODE_HEAD_LOCK(rnh); 981c96b8224SKip Macy else 982c96b8224SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 983b0a76b88SJulian Elischer /* 984b0a76b88SJulian Elischer * If we are adding a host route then we don't want to put 98566953138SRuslan Ermilov * a netmask in the tree, nor do we want to clone it. 986b0a76b88SJulian Elischer */ 9876e6b3f7cSQing Li if (flags & RTF_HOST) 98885911824SLuigi Rizzo netmask = NULL; 9896e6b3f7cSQing Li 990df8bae1dSRodney W. Grimes switch (req) { 991df8bae1dSRodney W. Grimes case RTM_DELETE: 992e440aed9SQing Li #ifdef RADIX_MPATH 993e440aed9SQing Li if (rn_mpath_capable(rnh)) { 994427ac07fSKip Macy error = rn_mpath_update(req, info, rnh, ret_nrt); 995e440aed9SQing Li /* 996427ac07fSKip Macy * "bad" holds true for the success case 997427ac07fSKip Macy * as well 998e440aed9SQing Li */ 999427ac07fSKip Macy if (error != ENOENT) 1000427ac07fSKip Macy goto bad; 1001e440aed9SQing Li } 1002ea9cd9f2SBjoern A. Zeeb #endif 1003b0a76b88SJulian Elischer /* 1004b0a76b88SJulian Elischer * Remove the item from the tree and return it. 1005b0a76b88SJulian Elischer * Complain if it is not there and do no more processing. 1006b0a76b88SJulian Elischer */ 1007d1dd20beSSam Leffler rn = rnh->rnh_deladdr(dst, netmask, rnh); 100885911824SLuigi Rizzo if (rn == NULL) 1009df8bae1dSRodney W. Grimes senderr(ESRCH); 1010df8bae1dSRodney W. Grimes if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 1011df8bae1dSRodney W. Grimes panic ("rtrequest delete"); 1012d6941ce9SLuigi Rizzo rt = RNTORT(rn); 1013d1dd20beSSam Leffler RT_LOCK(rt); 10147138d65cSSam Leffler RT_ADDREF(rt); 101571eba915SRuslan Ermilov rt->rt_flags &= ~RTF_UP; 1016c2bed6a3SGarrett Wollman 1017c2bed6a3SGarrett Wollman /* 1018499676dfSJulian Elischer * give the protocol a chance to keep things in sync. 1019b0a76b88SJulian Elischer */ 1020df8bae1dSRodney W. Grimes if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) 10218071913dSRuslan Ermilov ifa->ifa_rtrequest(RTM_DELETE, rt, info); 1022499676dfSJulian Elischer 1023b0a76b88SJulian Elischer /* 1024d6941ce9SLuigi Rizzo * One more rtentry floating around that is not 1025d6941ce9SLuigi Rizzo * linked to the routing table. rttrash will be decremented 1026d6941ce9SLuigi Rizzo * when RTFREE(rt) is eventually called. 1027499676dfSJulian Elischer */ 1028603724d3SBjoern A. Zeeb V_rttrash++; 1029499676dfSJulian Elischer 1030499676dfSJulian Elischer /* 1031499676dfSJulian Elischer * If the caller wants it, then it can have it, 1032499676dfSJulian Elischer * but it's up to it to free the rtentry as we won't be 1033499676dfSJulian Elischer * doing it. 1034b0a76b88SJulian Elischer */ 1035d1dd20beSSam Leffler if (ret_nrt) { 1036df8bae1dSRodney W. Grimes *ret_nrt = rt; 1037d1dd20beSSam Leffler RT_UNLOCK(rt); 1038d1dd20beSSam Leffler } else 1039d1dd20beSSam Leffler RTFREE_LOCKED(rt); 1040df8bae1dSRodney W. Grimes break; 1041df8bae1dSRodney W. Grimes case RTM_RESOLVE: 10426e6b3f7cSQing Li /* 10436e6b3f7cSQing Li * resolve was only used for route cloning 10446e6b3f7cSQing Li * here for compat 10456e6b3f7cSQing Li */ 10466e6b3f7cSQing Li break; 1047df8bae1dSRodney W. Grimes case RTM_ADD: 10485df72964SGarrett Wollman if ((flags & RTF_GATEWAY) && !gateway) 104916a2e0a6SQing Li senderr(EINVAL); 105016a2e0a6SQing Li if (dst && gateway && (dst->sa_family != gateway->sa_family) && 105116a2e0a6SQing Li (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 105216a2e0a6SQing Li senderr(EINVAL); 10535df72964SGarrett Wollman 10548b07e49aSJulian Elischer if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum))) 10558071913dSRuslan Ermilov senderr(error); 10568071913dSRuslan Ermilov ifa = info->rti_ifa; 10571ed81b73SMarko Zec rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 105885911824SLuigi Rizzo if (rt == NULL) 1059df8bae1dSRodney W. Grimes senderr(ENOBUFS); 1060d1dd20beSSam Leffler RT_LOCK_INIT(rt); 1061df8bae1dSRodney W. Grimes rt->rt_flags = RTF_UP | flags; 10628b07e49aSJulian Elischer rt->rt_fibnum = fibnum; 1063499676dfSJulian Elischer /* 1064499676dfSJulian Elischer * Add the gateway. Possibly re-malloc-ing the storage for it 10656e6b3f7cSQing Li * 1066499676dfSJulian Elischer */ 1067d1dd20beSSam Leffler RT_LOCK(rt); 1068831a80b0SMatthew Dillon if ((error = rt_setgate(rt, dst, gateway)) != 0) { 1069d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 10701ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 1071704b0666SBill Fenner senderr(error); 1072df8bae1dSRodney W. Grimes } 1073499676dfSJulian Elischer 1074499676dfSJulian Elischer /* 1075499676dfSJulian Elischer * point to the (possibly newly malloc'd) dest address. 1076499676dfSJulian Elischer */ 1077d1dd20beSSam Leffler ndst = (struct sockaddr *)rt_key(rt); 1078499676dfSJulian Elischer 1079499676dfSJulian Elischer /* 1080499676dfSJulian Elischer * make sure it contains the value we want (masked if needed). 1081499676dfSJulian Elischer */ 1082df8bae1dSRodney W. Grimes if (netmask) { 1083df8bae1dSRodney W. Grimes rt_maskedcopy(dst, ndst, netmask); 1084df8bae1dSRodney W. Grimes } else 10851838a647SLuigi Rizzo bcopy(dst, ndst, dst->sa_len); 10868e718bb4SGarrett Wollman 10878e718bb4SGarrett Wollman /* 1088499676dfSJulian Elischer * Note that we now have a reference to the ifa. 10898e718bb4SGarrett Wollman * This moved from below so that rnh->rnh_addaddr() can 1090499676dfSJulian Elischer * examine the ifa and ifa->ifa_ifp if it so desires. 10918e718bb4SGarrett Wollman */ 109219fc74fbSJeffrey Hsu IFAREF(ifa); 10938e718bb4SGarrett Wollman rt->rt_ifa = ifa; 10948e718bb4SGarrett Wollman rt->rt_ifp = ifa->ifa_ifp; 1095427ac07fSKip Macy rt->rt_rmx.rmx_weight = 1; 10968e718bb4SGarrett Wollman 1097e440aed9SQing Li #ifdef RADIX_MPATH 1098e440aed9SQing Li /* do not permit exactly the same dst/mask/gw pair */ 1099e440aed9SQing Li if (rn_mpath_capable(rnh) && 1100e440aed9SQing Li rt_mpath_conflict(rnh, rt, netmask)) { 1101e440aed9SQing Li if (rt->rt_ifa) { 1102e440aed9SQing Li IFAFREE(rt->rt_ifa); 1103e440aed9SQing Li } 1104e440aed9SQing Li Free(rt_key(rt)); 1105e440aed9SQing Li RT_LOCK_DESTROY(rt); 11061ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 1107e440aed9SQing Li senderr(EEXIST); 1108e440aed9SQing Li } 1109e440aed9SQing Li #endif 1110e440aed9SQing Li 1111d1dd20beSSam Leffler /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ 1112d1dd20beSSam Leffler rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); 1113499676dfSJulian Elischer /* 1114499676dfSJulian Elischer * If it still failed to go into the tree, 1115499676dfSJulian Elischer * then un-make it (this should be a function) 1116499676dfSJulian Elischer */ 111785911824SLuigi Rizzo if (rn == NULL) { 1118d1dd20beSSam Leffler if (rt->rt_ifa) 11198e718bb4SGarrett Wollman IFAFREE(rt->rt_ifa); 1120df8bae1dSRodney W. Grimes Free(rt_key(rt)); 1121d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 11221ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 1123df8bae1dSRodney W. Grimes senderr(EEXIST); 1124df8bae1dSRodney W. Grimes } 1125499676dfSJulian Elischer 1126499676dfSJulian Elischer /* 1127a0c0e34bSGleb Smirnoff * If this protocol has something to add to this then 1128499676dfSJulian Elischer * allow it to do that as well. 1129499676dfSJulian Elischer */ 1130df8bae1dSRodney W. Grimes if (ifa->ifa_rtrequest) 11318071913dSRuslan Ermilov ifa->ifa_rtrequest(req, rt, info); 1132499676dfSJulian Elischer 1133cd02a0b7SGarrett Wollman /* 1134499676dfSJulian Elischer * actually return a resultant rtentry and 1135499676dfSJulian Elischer * give the caller a single reference. 1136499676dfSJulian Elischer */ 1137df8bae1dSRodney W. Grimes if (ret_nrt) { 1138df8bae1dSRodney W. Grimes *ret_nrt = rt; 11397138d65cSSam Leffler RT_ADDREF(rt); 1140df8bae1dSRodney W. Grimes } 1141d1dd20beSSam Leffler RT_UNLOCK(rt); 1142df8bae1dSRodney W. Grimes break; 11438071913dSRuslan Ermilov default: 11448071913dSRuslan Ermilov error = EOPNOTSUPP; 1145df8bae1dSRodney W. Grimes } 1146df8bae1dSRodney W. Grimes bad: 11473120b9d4SKip Macy if (needlock) 1148956b0b65SJeffrey Hsu RADIX_NODE_HEAD_UNLOCK(rnh); 1149df8bae1dSRodney W. Grimes return (error); 1150d1dd20beSSam Leffler #undef senderr 1151d1dd20beSSam Leffler } 1152d1dd20beSSam Leffler 11538071913dSRuslan Ermilov #undef dst 11548071913dSRuslan Ermilov #undef gateway 11558071913dSRuslan Ermilov #undef netmask 11568071913dSRuslan Ermilov #undef ifaaddr 11578071913dSRuslan Ermilov #undef ifpaddr 11588071913dSRuslan Ermilov #undef flags 1159df8bae1dSRodney W. Grimes 1160df8bae1dSRodney W. Grimes int 1161d1dd20beSSam Leffler rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 1162df8bae1dSRodney W. Grimes { 1163d1dd20beSSam Leffler /* XXX dst may be overwritten, can we move this to below */ 11646e6b3f7cSQing Li int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); 11656e6b3f7cSQing Li #ifdef INVARIANTS 1166c2c2a7c1SBjoern A. Zeeb struct radix_node_head *rnh; 1167c2c2a7c1SBjoern A. Zeeb 1168c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family); 11696e6b3f7cSQing Li #endif 1170d1dd20beSSam Leffler 1171d1dd20beSSam Leffler RT_LOCK_ASSERT(rt); 11723120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1173df8bae1dSRodney W. Grimes 11741db1fffaSBill Fenner /* 117585911824SLuigi Rizzo * Prepare to store the gateway in rt->rt_gateway. 117685911824SLuigi Rizzo * Both dst and gateway are stored one after the other in the same 117785911824SLuigi Rizzo * malloc'd chunk. If we have room, we can reuse the old buffer, 117885911824SLuigi Rizzo * rt_gateway already points to the right place. 117985911824SLuigi Rizzo * Otherwise, malloc a new block and update the 'dst' address. 1180499676dfSJulian Elischer */ 118185911824SLuigi Rizzo if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { 118285911824SLuigi Rizzo caddr_t new; 118385911824SLuigi Rizzo 1184df8bae1dSRodney W. Grimes R_Malloc(new, caddr_t, dlen + glen); 118585911824SLuigi Rizzo if (new == NULL) 11861db1fffaSBill Fenner return ENOBUFS; 1187499676dfSJulian Elischer /* 118885911824SLuigi Rizzo * XXX note, we copy from *dst and not *rt_key(rt) because 118985911824SLuigi Rizzo * rt_setgate() can be called to initialize a newly 119085911824SLuigi Rizzo * allocated route entry, in which case rt_key(rt) == NULL 119185911824SLuigi Rizzo * (and also rt->rt_gateway == NULL). 119285911824SLuigi Rizzo * Free()/free() handle a NULL argument just fine. 1193499676dfSJulian Elischer */ 11941838a647SLuigi Rizzo bcopy(dst, new, dlen); 119585911824SLuigi Rizzo Free(rt_key(rt)); /* free old block, if any */ 1196445e045bSAlexander Kabaev rt_key(rt) = (struct sockaddr *)new; 119785911824SLuigi Rizzo rt->rt_gateway = (struct sockaddr *)(new + dlen); 1198df8bae1dSRodney W. Grimes } 1199499676dfSJulian Elischer 1200499676dfSJulian Elischer /* 120185911824SLuigi Rizzo * Copy the new gateway value into the memory chunk. 120285911824SLuigi Rizzo */ 120385911824SLuigi Rizzo bcopy(gate, rt->rt_gateway, glen); 120485911824SLuigi Rizzo 12056e6b3f7cSQing Li return (0); 1206df8bae1dSRodney W. Grimes } 1207df8bae1dSRodney W. Grimes 1208f708ef1bSPoul-Henning Kamp static void 1209d1dd20beSSam Leffler rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) 1210df8bae1dSRodney W. Grimes { 1211df8bae1dSRodney W. Grimes register u_char *cp1 = (u_char *)src; 1212df8bae1dSRodney W. Grimes register u_char *cp2 = (u_char *)dst; 1213df8bae1dSRodney W. Grimes register u_char *cp3 = (u_char *)netmask; 1214df8bae1dSRodney W. Grimes u_char *cplim = cp2 + *cp3; 1215df8bae1dSRodney W. Grimes u_char *cplim2 = cp2 + *cp1; 1216df8bae1dSRodney W. Grimes 1217df8bae1dSRodney W. Grimes *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1218df8bae1dSRodney W. Grimes cp3 += 2; 1219df8bae1dSRodney W. Grimes if (cplim > cplim2) 1220df8bae1dSRodney W. Grimes cplim = cplim2; 1221df8bae1dSRodney W. Grimes while (cp2 < cplim) 1222df8bae1dSRodney W. Grimes *cp2++ = *cp1++ & *cp3++; 1223df8bae1dSRodney W. Grimes if (cp2 < cplim2) 1224df8bae1dSRodney W. Grimes bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 1225df8bae1dSRodney W. Grimes } 1226df8bae1dSRodney W. Grimes 1227df8bae1dSRodney W. Grimes /* 1228df8bae1dSRodney W. Grimes * Set up a routing table entry, normally 1229df8bae1dSRodney W. Grimes * for an interface. 1230df8bae1dSRodney W. Grimes */ 12318b07e49aSJulian Elischer #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ 12328b07e49aSJulian Elischer static inline int 12338b07e49aSJulian Elischer rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) 1234df8bae1dSRodney W. Grimes { 12355aca0b30SLuigi Rizzo struct sockaddr *dst; 12368071913dSRuslan Ermilov struct sockaddr *netmask; 123785911824SLuigi Rizzo struct rtentry *rt = NULL; 12388071913dSRuslan Ermilov struct rt_addrinfo info; 1239e440aed9SQing Li int error = 0; 12408b07e49aSJulian Elischer int startfib, endfib; 12418b07e49aSJulian Elischer char tempbuf[_SOCKADDR_TMPSIZE]; 12428b07e49aSJulian Elischer int didwork = 0; 12438b07e49aSJulian Elischer int a_failure = 0; 12446e6b3f7cSQing Li static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; 1245df8bae1dSRodney W. Grimes 12468071913dSRuslan Ermilov if (flags & RTF_HOST) { 12478071913dSRuslan Ermilov dst = ifa->ifa_dstaddr; 12488071913dSRuslan Ermilov netmask = NULL; 12498071913dSRuslan Ermilov } else { 12508071913dSRuslan Ermilov dst = ifa->ifa_addr; 12518071913dSRuslan Ermilov netmask = ifa->ifa_netmask; 12528071913dSRuslan Ermilov } 12538b07e49aSJulian Elischer if ( dst->sa_family != AF_INET) 12548b07e49aSJulian Elischer fibnum = 0; 12558b07e49aSJulian Elischer if (fibnum == -1) { 125666e8505fSJulian Elischer if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { 125766e8505fSJulian Elischer startfib = endfib = curthread->td_proc->p_fibnum; 125866e8505fSJulian Elischer } else { 12598b07e49aSJulian Elischer startfib = 0; 12608b07e49aSJulian Elischer endfib = rt_numfibs - 1; 126166e8505fSJulian Elischer } 12628b07e49aSJulian Elischer } else { 12638b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); 12648b07e49aSJulian Elischer startfib = fibnum; 12658b07e49aSJulian Elischer endfib = fibnum; 12668b07e49aSJulian Elischer } 1267ac4a76ebSBjoern A. Zeeb if (dst->sa_len == 0) 1268ac4a76ebSBjoern A. Zeeb return(EINVAL); 1269ac4a76ebSBjoern A. Zeeb 1270b0a76b88SJulian Elischer /* 12718b07e49aSJulian Elischer * If it's a delete, check that if it exists, 12728b07e49aSJulian Elischer * it's on the correct interface or we might scrub 12738b07e49aSJulian Elischer * a route to another ifa which would 1274b0a76b88SJulian Elischer * be confusing at best and possibly worse. 1275b0a76b88SJulian Elischer */ 1276df8bae1dSRodney W. Grimes if (cmd == RTM_DELETE) { 1277b0a76b88SJulian Elischer /* 1278b0a76b88SJulian Elischer * It's a delete, so it should already exist.. 1279b0a76b88SJulian Elischer * If it's a net, mask off the host bits 1280b0a76b88SJulian Elischer * (Assuming we have a mask) 12818b07e49aSJulian Elischer * XXX this is kinda inet specific.. 1282b0a76b88SJulian Elischer */ 12838071913dSRuslan Ermilov if (netmask != NULL) { 12848b07e49aSJulian Elischer rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); 12858b07e49aSJulian Elischer dst = (struct sockaddr *)tempbuf; 1286df8bae1dSRodney W. Grimes } 12878b07e49aSJulian Elischer } 12888b07e49aSJulian Elischer /* 12898b07e49aSJulian Elischer * Now go through all the requested tables (fibs) and do the 12908b07e49aSJulian Elischer * requested action. Realistically, this will either be fib 0 12918b07e49aSJulian Elischer * for protocols that don't do multiple tables or all the 12928b07e49aSJulian Elischer * tables for those that do. XXX For this version only AF_INET. 12938b07e49aSJulian Elischer * When that changes code should be refactored to protocol 12948b07e49aSJulian Elischer * independent parts and protocol dependent parts. 12958b07e49aSJulian Elischer */ 12968b07e49aSJulian Elischer for ( fibnum = startfib; fibnum <= endfib; fibnum++) { 12978b07e49aSJulian Elischer if (cmd == RTM_DELETE) { 12988b07e49aSJulian Elischer struct radix_node_head *rnh; 12998b07e49aSJulian Elischer struct radix_node *rn; 1300b0a76b88SJulian Elischer /* 13018071913dSRuslan Ermilov * Look up an rtentry that is in the routing tree and 13028071913dSRuslan Ermilov * contains the correct info. 1303b0a76b88SJulian Elischer */ 1304c2c2a7c1SBjoern A. Zeeb rnh = rt_tables_get_rnh(fibnum, dst->sa_family); 1305c2c2a7c1SBjoern A. Zeeb if (rnh == NULL) 13068b07e49aSJulian Elischer /* this table doesn't exist but others might */ 13078b07e49aSJulian Elischer continue; 1308956b0b65SJeffrey Hsu RADIX_NODE_HEAD_LOCK(rnh); 1309e440aed9SQing Li #ifdef RADIX_MPATH 1310e440aed9SQing Li if (rn_mpath_capable(rnh)) { 1311e440aed9SQing Li 1312e440aed9SQing Li rn = rnh->rnh_matchaddr(dst, rnh); 1313e440aed9SQing Li if (rn == NULL) 1314e440aed9SQing Li error = ESRCH; 1315e440aed9SQing Li else { 1316e440aed9SQing Li rt = RNTORT(rn); 1317e440aed9SQing Li /* 13188b07e49aSJulian Elischer * for interface route the 13198b07e49aSJulian Elischer * rt->rt_gateway is sockaddr_intf 13208b07e49aSJulian Elischer * for cloning ARP entries, so 13218b07e49aSJulian Elischer * rt_mpath_matchgate must use the 13228b07e49aSJulian Elischer * interface address 1323e440aed9SQing Li */ 13248b07e49aSJulian Elischer rt = rt_mpath_matchgate(rt, 13258b07e49aSJulian Elischer ifa->ifa_addr); 1326e440aed9SQing Li if (!rt) 1327e440aed9SQing Li error = ESRCH; 1328e440aed9SQing Li } 1329e440aed9SQing Li } 1330e440aed9SQing Li else 1331e440aed9SQing Li #endif 13328b07e49aSJulian Elischer rn = rnh->rnh_lookup(dst, netmask, rnh); 13338b07e49aSJulian Elischer error = (rn == NULL || 13348071913dSRuslan Ermilov (rn->rn_flags & RNF_ROOT) || 1335d6941ce9SLuigi Rizzo RNTORT(rn)->rt_ifa != ifa || 133685911824SLuigi Rizzo !sa_equal((struct sockaddr *)rn->rn_key, dst)); 1337956b0b65SJeffrey Hsu RADIX_NODE_HEAD_UNLOCK(rnh); 1338956b0b65SJeffrey Hsu if (error) { 13398b07e49aSJulian Elischer /* this is only an error if bad on ALL tables */ 13408b07e49aSJulian Elischer continue; 1341df8bae1dSRodney W. Grimes } 1342b0a76b88SJulian Elischer } 1343b0a76b88SJulian Elischer /* 1344b0a76b88SJulian Elischer * Do the actual request 1345b0a76b88SJulian Elischer */ 13468071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 13478071913dSRuslan Ermilov info.rti_ifa = ifa; 13488071913dSRuslan Ermilov info.rti_flags = flags | ifa->ifa_flags; 13498071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 13506e6b3f7cSQing Li /* 13516e6b3f7cSQing Li * doing this for compatibility reasons 13526e6b3f7cSQing Li */ 13536e6b3f7cSQing Li if (cmd == RTM_ADD) 13546e6b3f7cSQing Li info.rti_info[RTAX_GATEWAY] = 13556e6b3f7cSQing Li (struct sockaddr *)&null_sdl; 13566e6b3f7cSQing Li else 13578071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 13588071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 13598b07e49aSJulian Elischer error = rtrequest1_fib(cmd, &info, &rt, fibnum); 13605aca0b30SLuigi Rizzo if (error == 0 && rt != NULL) { 13618071913dSRuslan Ermilov /* 13626f99b44cSBrian Somers * notify any listening routing agents of the change 13638071913dSRuslan Ermilov */ 1364d1dd20beSSam Leffler RT_LOCK(rt); 1365e440aed9SQing Li #ifdef RADIX_MPATH 1366e440aed9SQing Li /* 1367e440aed9SQing Li * in case address alias finds the first address 1368e440aed9SQing Li * e.g. ifconfig bge0 192.103.54.246/24 1369e440aed9SQing Li * e.g. ifconfig bge0 192.103.54.247/24 1370e440aed9SQing Li * the address set in the route is 192.103.54.246 1371e440aed9SQing Li * so we need to replace it with 192.103.54.247 1372e440aed9SQing Li */ 13738b07e49aSJulian Elischer if (memcmp(rt->rt_ifa->ifa_addr, 13748b07e49aSJulian Elischer ifa->ifa_addr, ifa->ifa_addr->sa_len)) { 1375e440aed9SQing Li IFAFREE(rt->rt_ifa); 1376e440aed9SQing Li IFAREF(ifa); 1377e440aed9SQing Li rt->rt_ifp = ifa->ifa_ifp; 1378e440aed9SQing Li rt->rt_ifa = ifa; 1379e440aed9SQing Li } 1380e440aed9SQing Li #endif 13816e6b3f7cSQing Li /* 13826e6b3f7cSQing Li * doing this for compatibility reasons 13836e6b3f7cSQing Li */ 13846e6b3f7cSQing Li if (cmd == RTM_ADD) { 13856e6b3f7cSQing Li ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = 13866e6b3f7cSQing Li rt->rt_ifp->if_type; 13876e6b3f7cSQing Li ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = 13886e6b3f7cSQing Li rt->rt_ifp->if_index; 13896e6b3f7cSQing Li } 13908071913dSRuslan Ermilov rt_newaddrmsg(cmd, ifa, error, rt); 13918071913dSRuslan Ermilov if (cmd == RTM_DELETE) { 1392b0a76b88SJulian Elischer /* 13938b07e49aSJulian Elischer * If we are deleting, and we found an entry, 13948b07e49aSJulian Elischer * then it's been removed from the tree.. 13958b07e49aSJulian Elischer * now throw it away. 1396b0a76b88SJulian Elischer */ 1397d1dd20beSSam Leffler RTFREE_LOCKED(rt); 1398d1dd20beSSam Leffler } else { 1399d1dd20beSSam Leffler if (cmd == RTM_ADD) { 1400b0a76b88SJulian Elischer /* 14018b07e49aSJulian Elischer * We just wanted to add it.. 14028b07e49aSJulian Elischer * we don't actually need a reference. 1403b0a76b88SJulian Elischer */ 14047138d65cSSam Leffler RT_REMREF(rt); 1405df8bae1dSRodney W. Grimes } 1406d1dd20beSSam Leffler RT_UNLOCK(rt); 1407d1dd20beSSam Leffler } 14088b07e49aSJulian Elischer didwork = 1; 1409df8bae1dSRodney W. Grimes } 14108b07e49aSJulian Elischer if (error) 14118b07e49aSJulian Elischer a_failure = error; 14128b07e49aSJulian Elischer } 14138b07e49aSJulian Elischer if (cmd == RTM_DELETE) { 14148b07e49aSJulian Elischer if (didwork) { 14158b07e49aSJulian Elischer error = 0; 14168b07e49aSJulian Elischer } else { 14178b07e49aSJulian Elischer /* we only give an error if it wasn't in any table */ 14188b07e49aSJulian Elischer error = ((flags & RTF_HOST) ? 14198b07e49aSJulian Elischer EHOSTUNREACH : ENETUNREACH); 14208b07e49aSJulian Elischer } 14218b07e49aSJulian Elischer } else { 14228b07e49aSJulian Elischer if (a_failure) { 14238b07e49aSJulian Elischer /* return an error if any of them failed */ 14248b07e49aSJulian Elischer error = a_failure; 14258b07e49aSJulian Elischer } 14268b07e49aSJulian Elischer } 14273ec66d6cSDavid Greenman return (error); 14283ec66d6cSDavid Greenman } 1429cb64988fSLuoqi Chen 14308b07e49aSJulian Elischer /* special one for inet internal use. may not use. */ 14318b07e49aSJulian Elischer int 14328b07e49aSJulian Elischer rtinit_fib(struct ifaddr *ifa, int cmd, int flags) 14338b07e49aSJulian Elischer { 14348b07e49aSJulian Elischer return (rtinit1(ifa, cmd, flags, -1)); 14358b07e49aSJulian Elischer } 14368b07e49aSJulian Elischer 14378b07e49aSJulian Elischer /* 14388b07e49aSJulian Elischer * Set up a routing table entry, normally 14398b07e49aSJulian Elischer * for an interface. 14408b07e49aSJulian Elischer */ 14418b07e49aSJulian Elischer int 14428b07e49aSJulian Elischer rtinit(struct ifaddr *ifa, int cmd, int flags) 14438b07e49aSJulian Elischer { 14448b07e49aSJulian Elischer struct sockaddr *dst; 14458b07e49aSJulian Elischer int fib = 0; 14468b07e49aSJulian Elischer 14478b07e49aSJulian Elischer if (flags & RTF_HOST) { 14488b07e49aSJulian Elischer dst = ifa->ifa_dstaddr; 14498b07e49aSJulian Elischer } else { 14508b07e49aSJulian Elischer dst = ifa->ifa_addr; 14518b07e49aSJulian Elischer } 14528b07e49aSJulian Elischer 14538b07e49aSJulian Elischer if (dst->sa_family == AF_INET) 14548b07e49aSJulian Elischer fib = -1; 14558b07e49aSJulian Elischer return (rtinit1(ifa, cmd, flags, fib)); 14568b07e49aSJulian Elischer } 14578b07e49aSJulian Elischer 14586a800098SYoshinobu Inoue /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */ 14596a800098SYoshinobu Inoue SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); 1460