1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1980, 1986, 1991, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 2942e9e16dSRuslan Ermilov * @(#)route.c 8.3.1.1 (Berkeley) 2/23/95 30c3aac50fSPeter Wemm * $FreeBSD$ 31df8bae1dSRodney W. Grimes */ 328b07e49aSJulian Elischer /************************************************************************ 338b07e49aSJulian Elischer * Note: In this file a 'fib' is a "forwarding information base" * 348b07e49aSJulian Elischer * Which is the new name for an in kernel routing (next hop) table. * 358b07e49aSJulian Elischer ***********************************************************************/ 36df8bae1dSRodney W. Grimes 371d5e9e22SEivind Eklund #include "opt_inet.h" 388b07e49aSJulian Elischer #include "opt_route.h" 394bd49128SPeter Wemm #include "opt_mrouting.h" 40e440aed9SQing Li #include "opt_mpath.h" 414bd49128SPeter Wemm 42df8bae1dSRodney W. Grimes #include <sys/param.h> 43df8bae1dSRodney W. Grimes #include <sys/systm.h> 446e6b3f7cSQing Li #include <sys/syslog.h> 454d1d4912SBruce Evans #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47df8bae1dSRodney W. Grimes #include <sys/socket.h> 488b07e49aSJulian Elischer #include <sys/sysctl.h> 493120b9d4SKip Macy #include <sys/syslog.h> 508b07e49aSJulian Elischer #include <sys/sysproto.h> 518b07e49aSJulian Elischer #include <sys/proc.h> 52df8bae1dSRodney W. Grimes #include <sys/domain.h> 53cb64988fSLuoqi Chen #include <sys/kernel.h> 54603724d3SBjoern A. Zeeb #include <sys/vimage.h> 55df8bae1dSRodney W. Grimes 56df8bae1dSRodney W. Grimes #include <net/if.h> 576e6b3f7cSQing Li #include <net/if_dl.h> 58df8bae1dSRodney W. Grimes #include <net/route.h> 59df8bae1dSRodney W. Grimes 60e440aed9SQing Li #ifdef RADIX_MPATH 61e440aed9SQing Li #include <net/radix_mpath.h> 62e440aed9SQing Li #endif 634b79449eSBjoern A. Zeeb #include <net/vnet.h> 64e440aed9SQing Li 65df8bae1dSRodney W. Grimes #include <netinet/in.h> 66b5e8ce9fSBruce Evans #include <netinet/ip_mroute.h> 674b79449eSBjoern A. Zeeb #include <netinet/vinet.h> 68df8bae1dSRodney W. Grimes 692dc1d581SAndre Oppermann #include <vm/uma.h> 702dc1d581SAndre Oppermann 718b07e49aSJulian Elischer u_int rt_numfibs = RT_NUMFIBS; 728b07e49aSJulian Elischer SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, ""); 7366e8505fSJulian Elischer /* 7466e8505fSJulian Elischer * Allow the boot code to allow LESS than RT_MAXFIBS to be used. 7566e8505fSJulian Elischer * We can't do more because storage is statically allocated for now. 7666e8505fSJulian Elischer * (for compatibility reasons.. this will change). 7766e8505fSJulian Elischer */ 788b07e49aSJulian Elischer TUNABLE_INT("net.fibs", &rt_numfibs); 798b07e49aSJulian Elischer 8066e8505fSJulian Elischer /* 8166e8505fSJulian Elischer * By default add routes to all fibs for new interfaces. 8266e8505fSJulian Elischer * Once this is set to 0 then only allocate routes on interface 8366e8505fSJulian Elischer * changes for the FIB of the caller when adding a new set of addresses 8466e8505fSJulian Elischer * to an interface. XXX this is a shotgun aproach to a problem that needs 8566e8505fSJulian Elischer * a more fine grained solution.. that will come. 8666e8505fSJulian Elischer */ 8766e8505fSJulian Elischer u_int rt_add_addr_allfibs = 1; 8866e8505fSJulian Elischer SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW, 8966e8505fSJulian Elischer &rt_add_addr_allfibs, 0, ""); 9066e8505fSJulian Elischer TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs); 9166e8505fSJulian Elischer 9244e33a07SMarko Zec #ifdef VIMAGE_GLOBALS 93f708ef1bSPoul-Henning Kamp static struct rtstat rtstat; 948b07e49aSJulian Elischer 958b07e49aSJulian Elischer /* by default only the first 'row' of tables will be accessed. */ 968b07e49aSJulian Elischer /* 978b07e49aSJulian Elischer * XXXMRT When we fix netstat, and do this differnetly, 988b07e49aSJulian Elischer * we can allocate this dynamically. As long as we are keeping 998b07e49aSJulian Elischer * things backwards compaitble we need to allocate this 1008b07e49aSJulian Elischer * statically. 1018b07e49aSJulian Elischer */ 1028b07e49aSJulian Elischer struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1]; 10328f8db14SBruce Evans 104f708ef1bSPoul-Henning Kamp static int rttrash; /* routes not in table but not freed */ 10544e33a07SMarko Zec #endif 106df8bae1dSRodney W. Grimes 107929ddbbbSAlfred Perlstein static void rt_maskedcopy(struct sockaddr *, 108929ddbbbSAlfred Perlstein struct sockaddr *, struct sockaddr *); 1091ed81b73SMarko Zec static int vnet_route_iattach(const void *); 110f708ef1bSPoul-Henning Kamp 111d6941ce9SLuigi Rizzo /* compare two sockaddr structures */ 112d6941ce9SLuigi Rizzo #define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0) 113d6941ce9SLuigi Rizzo 114d6941ce9SLuigi Rizzo /* 115d6941ce9SLuigi Rizzo * Convert a 'struct radix_node *' to a 'struct rtentry *'. 116d6941ce9SLuigi Rizzo * The operation can be done safely (in this code) because a 117d6941ce9SLuigi Rizzo * 'struct rtentry' starts with two 'struct radix_node''s, the first 118d6941ce9SLuigi Rizzo * one representing leaf nodes in the routing tree, which is 119d6941ce9SLuigi Rizzo * what the code in radix.c passes us as a 'struct radix_node'. 120d6941ce9SLuigi Rizzo * 121d6941ce9SLuigi Rizzo * But because there are a lot of assumptions in this conversion, 122d6941ce9SLuigi Rizzo * do not cast explicitly, but always use the macro below. 123d6941ce9SLuigi Rizzo */ 124d6941ce9SLuigi Rizzo #define RNTORT(p) ((struct rtentry *)(p)) 125d6941ce9SLuigi Rizzo 1261ed81b73SMarko Zec #ifdef VIMAGE_GLOBALS 1278b07e49aSJulian Elischer static uma_zone_t rtzone; /* Routing table UMA zone. */ 1281ed81b73SMarko Zec #endif 1298b07e49aSJulian Elischer 1308b07e49aSJulian Elischer #if 0 1318b07e49aSJulian Elischer /* default fib for tunnels to use */ 1328b07e49aSJulian Elischer u_int tunnel_fib = 0; 1338b07e49aSJulian Elischer SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, ""); 1348b07e49aSJulian Elischer #endif 1358b07e49aSJulian Elischer 1368b07e49aSJulian Elischer /* 1378b07e49aSJulian Elischer * handler for net.my_fibnum 1388b07e49aSJulian Elischer */ 1398b07e49aSJulian Elischer static int 1408b07e49aSJulian Elischer sysctl_my_fibnum(SYSCTL_HANDLER_ARGS) 141df8bae1dSRodney W. Grimes { 1428b07e49aSJulian Elischer int fibnum; 1438b07e49aSJulian Elischer int error; 1448b07e49aSJulian Elischer 1458b07e49aSJulian Elischer fibnum = curthread->td_proc->p_fibnum; 1468b07e49aSJulian Elischer error = sysctl_handle_int(oidp, &fibnum, 0, req); 1478b07e49aSJulian Elischer return (error); 148df8bae1dSRodney W. Grimes } 149df8bae1dSRodney W. Grimes 1508b07e49aSJulian Elischer SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD, 1518b07e49aSJulian Elischer NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller"); 1522dc1d581SAndre Oppermann 1532eb5613fSLuigi Rizzo static void 1542eb5613fSLuigi Rizzo route_init(void) 155df8bae1dSRodney W. Grimes { 1568b07e49aSJulian Elischer 1576f95a5ebSJulian Elischer /* whack the tunable ints into line. */ 1588b07e49aSJulian Elischer if (rt_numfibs > RT_MAXFIBS) 1598b07e49aSJulian Elischer rt_numfibs = RT_MAXFIBS; 1608b07e49aSJulian Elischer if (rt_numfibs == 0) 1618b07e49aSJulian Elischer rt_numfibs = 1; 162df8bae1dSRodney W. Grimes rn_init(); /* initialize all zeroes, all ones, mask table */ 1638b07e49aSJulian Elischer 1641ed81b73SMarko Zec vnet_route_iattach(NULL); 1651ed81b73SMarko Zec } 1661ed81b73SMarko Zec 1671ed81b73SMarko Zec static int vnet_route_iattach(const void *unused __unused) 1681ed81b73SMarko Zec { 1691ed81b73SMarko Zec INIT_VNET_INET(curvnet); 1701ed81b73SMarko Zec int table; 1711ed81b73SMarko Zec struct domain *dom; 1721ed81b73SMarko Zec int fam; 1731ed81b73SMarko Zec 1741ed81b73SMarko Zec V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL, 1751ed81b73SMarko Zec NULL, NULL, UMA_ALIGN_PTR, 0); 1768b07e49aSJulian Elischer for (dom = domains; dom; dom = dom->dom_next) { 1778b07e49aSJulian Elischer if (dom->dom_rtattach) { 1788b07e49aSJulian Elischer for (table = 0; table < rt_numfibs; table++) { 1798b07e49aSJulian Elischer if ( (fam = dom->dom_family) == AF_INET || 1808b07e49aSJulian Elischer table == 0) { 1818b07e49aSJulian Elischer /* for now only AF_INET has > 1 table */ 1828b07e49aSJulian Elischer /* XXX MRT 1838b07e49aSJulian Elischer * rtattach will be also called 1848b07e49aSJulian Elischer * from vfs_export.c but the 1858b07e49aSJulian Elischer * offset will be 0 1868b07e49aSJulian Elischer * (only for AF_INET and AF_INET6 1878b07e49aSJulian Elischer * which don't need it anyhow) 1888b07e49aSJulian Elischer */ 1898b07e49aSJulian Elischer dom->dom_rtattach( 190603724d3SBjoern A. Zeeb (void **)&V_rt_tables[table][fam], 1918b07e49aSJulian Elischer dom->dom_rtoffset); 1928b07e49aSJulian Elischer } else { 1938b07e49aSJulian Elischer break; 1948b07e49aSJulian Elischer } 1958b07e49aSJulian Elischer } 1968b07e49aSJulian Elischer } 1978b07e49aSJulian Elischer } 1981ed81b73SMarko Zec 1991ed81b73SMarko Zec return (0); 2008b07e49aSJulian Elischer } 2018b07e49aSJulian Elischer 2028b07e49aSJulian Elischer #ifndef _SYS_SYSPROTO_H_ 2038b07e49aSJulian Elischer struct setfib_args { 2048b07e49aSJulian Elischer int fibnum; 2058b07e49aSJulian Elischer }; 2068b07e49aSJulian Elischer #endif 2078b07e49aSJulian Elischer int 2088b07e49aSJulian Elischer setfib(struct thread *td, struct setfib_args *uap) 2098b07e49aSJulian Elischer { 2108b07e49aSJulian Elischer if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs) 2118b07e49aSJulian Elischer return EINVAL; 2128b07e49aSJulian Elischer td->td_proc->p_fibnum = uap->fibnum; 2138b07e49aSJulian Elischer return (0); 214df8bae1dSRodney W. Grimes } 215df8bae1dSRodney W. Grimes 216df8bae1dSRodney W. Grimes /* 217df8bae1dSRodney W. Grimes * Packet routing routines. 218df8bae1dSRodney W. Grimes */ 219df8bae1dSRodney W. Grimes void 220d1dd20beSSam Leffler rtalloc(struct route *ro) 221df8bae1dSRodney W. Grimes { 2228b07e49aSJulian Elischer rtalloc_ign_fib(ro, 0UL, 0); 2238b07e49aSJulian Elischer } 2248b07e49aSJulian Elischer 2258b07e49aSJulian Elischer void 2268b07e49aSJulian Elischer rtalloc_fib(struct route *ro, u_int fibnum) 2278b07e49aSJulian Elischer { 2288b07e49aSJulian Elischer rtalloc_ign_fib(ro, 0UL, fibnum); 229df8bae1dSRodney W. Grimes } 230df8bae1dSRodney W. Grimes 231652082e6SGarrett Wollman void 232d1dd20beSSam Leffler rtalloc_ign(struct route *ro, u_long ignore) 233652082e6SGarrett Wollman { 23468f956b8SJohn Polstra struct rtentry *rt; 23568f956b8SJohn Polstra 23668f956b8SJohn Polstra if ((rt = ro->ro_rt) != NULL) { 23768f956b8SJohn Polstra if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 23868f956b8SJohn Polstra return; 23968f956b8SJohn Polstra RTFREE(rt); 24066810dd0SYoshinobu Inoue ro->ro_rt = NULL; 24168f956b8SJohn Polstra } 2428b07e49aSJulian Elischer ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0); 2438b07e49aSJulian Elischer if (ro->ro_rt) 2448b07e49aSJulian Elischer RT_UNLOCK(ro->ro_rt); 2458b07e49aSJulian Elischer } 2468b07e49aSJulian Elischer 2478b07e49aSJulian Elischer void 2488b07e49aSJulian Elischer rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) 2498b07e49aSJulian Elischer { 2508b07e49aSJulian Elischer struct rtentry *rt; 2518b07e49aSJulian Elischer 2528b07e49aSJulian Elischer if ((rt = ro->ro_rt) != NULL) { 2538b07e49aSJulian Elischer if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) 2548b07e49aSJulian Elischer return; 2558b07e49aSJulian Elischer RTFREE(rt); 2568b07e49aSJulian Elischer ro->ro_rt = NULL; 2578b07e49aSJulian Elischer } 2588b07e49aSJulian Elischer ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); 259d1dd20beSSam Leffler if (ro->ro_rt) 260d1dd20beSSam Leffler RT_UNLOCK(ro->ro_rt); 261652082e6SGarrett Wollman } 262652082e6SGarrett Wollman 263b0a76b88SJulian Elischer /* 264b0a76b88SJulian Elischer * Look up the route that matches the address given 265b0a76b88SJulian Elischer * Or, at least try.. Create a cloned route if needed. 266d1dd20beSSam Leffler * 267d1dd20beSSam Leffler * The returned route, if any, is locked. 268b0a76b88SJulian Elischer */ 269df8bae1dSRodney W. Grimes struct rtentry * 270d1dd20beSSam Leffler rtalloc1(struct sockaddr *dst, int report, u_long ignflags) 271df8bae1dSRodney W. Grimes { 2728b07e49aSJulian Elischer return (rtalloc1_fib(dst, report, ignflags, 0)); 2738b07e49aSJulian Elischer } 2748b07e49aSJulian Elischer 2758b07e49aSJulian Elischer struct rtentry * 2768b07e49aSJulian Elischer rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, 2778b07e49aSJulian Elischer u_int fibnum) 2788b07e49aSJulian Elischer { 2798b615593SMarko Zec INIT_VNET_NET(curvnet); 2808b07e49aSJulian Elischer struct radix_node_head *rnh; 281d1dd20beSSam Leffler struct rtentry *rt; 282d1dd20beSSam Leffler struct radix_node *rn; 283d1dd20beSSam Leffler struct rtentry *newrt; 284df8bae1dSRodney W. Grimes struct rt_addrinfo info; 2856e6b3f7cSQing Li int err = 0, msgtype = RTM_MISS; 2863120b9d4SKip Macy int needlock; 287df8bae1dSRodney W. Grimes 2888b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum")); 2898b07e49aSJulian Elischer if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ 2908b07e49aSJulian Elischer fibnum = 0; 291603724d3SBjoern A. Zeeb rnh = V_rt_tables[fibnum][dst->sa_family]; 29285911824SLuigi Rizzo newrt = NULL; 293b0a76b88SJulian Elischer /* 294b0a76b88SJulian Elischer * Look up the address in the table for that Address Family 295b0a76b88SJulian Elischer */ 296956b0b65SJeffrey Hsu if (rnh == NULL) { 297603724d3SBjoern A. Zeeb V_rtstat.rts_unreach++; 2986e6b3f7cSQing Li goto miss; 299956b0b65SJeffrey Hsu } 3003120b9d4SKip Macy needlock = !(ignflags & RTF_RNH_LOCKED); 3013120b9d4SKip Macy if (needlock) 3023120b9d4SKip Macy RADIX_NODE_HEAD_RLOCK(rnh); 3033120b9d4SKip Macy #ifdef INVARIANTS 3043120b9d4SKip Macy else 3053120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 3063120b9d4SKip Macy #endif 3073120b9d4SKip Macy rn = rnh->rnh_matchaddr(dst, rnh); 3083120b9d4SKip Macy if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) { 309d6941ce9SLuigi Rizzo newrt = rt = RNTORT(rn); 3103120b9d4SKip Macy RT_LOCK(newrt); 3113120b9d4SKip Macy RT_ADDREF(newrt); 3123120b9d4SKip Macy if (needlock) 3133120b9d4SKip Macy RADIX_NODE_HEAD_RUNLOCK(rnh); 3143120b9d4SKip Macy goto done; 3156e6b3f7cSQing Li 3166e6b3f7cSQing Li } else if (needlock) 3173120b9d4SKip Macy RADIX_NODE_HEAD_RUNLOCK(rnh); 3183120b9d4SKip Macy 319b0a76b88SJulian Elischer /* 320b0a76b88SJulian Elischer * Either we hit the root or couldn't find any match, 321b0a76b88SJulian Elischer * Which basically means 322b0a76b88SJulian Elischer * "caint get there frm here" 323b0a76b88SJulian Elischer */ 324603724d3SBjoern A. Zeeb V_rtstat.rts_unreach++; 325956b0b65SJeffrey Hsu miss: 3266e6b3f7cSQing Li if (report) { 327b0a76b88SJulian Elischer /* 328b0a76b88SJulian Elischer * If required, report the failure to the supervising 329b0a76b88SJulian Elischer * Authorities. 330b0a76b88SJulian Elischer * For a delete, this is not an error. (report == 0) 331b0a76b88SJulian Elischer */ 3326f5967c0SBruce Evans bzero(&info, sizeof(info)); 333df8bae1dSRodney W. Grimes info.rti_info[RTAX_DST] = dst; 334df8bae1dSRodney W. Grimes rt_missmsg(msgtype, &info, 0, err); 335df8bae1dSRodney W. Grimes } 3363120b9d4SKip Macy done: 337d1dd20beSSam Leffler if (newrt) 338d1dd20beSSam Leffler RT_LOCK_ASSERT(newrt); 339df8bae1dSRodney W. Grimes return (newrt); 340df8bae1dSRodney W. Grimes } 341df8bae1dSRodney W. Grimes 342499676dfSJulian Elischer /* 343499676dfSJulian Elischer * Remove a reference count from an rtentry. 344499676dfSJulian Elischer * If the count gets low enough, take it out of the routing table 345499676dfSJulian Elischer */ 346df8bae1dSRodney W. Grimes void 347d1dd20beSSam Leffler rtfree(struct rtentry *rt) 348df8bae1dSRodney W. Grimes { 3498b615593SMarko Zec INIT_VNET_NET(curvnet); 35085911824SLuigi Rizzo struct radix_node_head *rnh; 351df8bae1dSRodney W. Grimes 352a0c0e34bSGleb Smirnoff KASSERT(rt != NULL,("%s: NULL rt", __func__)); 353603724d3SBjoern A. Zeeb rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; 354a0c0e34bSGleb Smirnoff KASSERT(rnh != NULL,("%s: NULL rnh", __func__)); 355499676dfSJulian Elischer 356d1dd20beSSam Leffler RT_LOCK_ASSERT(rt); 357d1dd20beSSam Leffler 358499676dfSJulian Elischer /* 359a0c0e34bSGleb Smirnoff * The callers should use RTFREE_LOCKED() or RTFREE(), so 360a0c0e34bSGleb Smirnoff * we should come here exactly with the last reference. 361499676dfSJulian Elischer */ 3627138d65cSSam Leffler RT_REMREF(rt); 363a0c0e34bSGleb Smirnoff if (rt->rt_refcnt > 0) { 364a42ea597SQing Li log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt); 365d1dd20beSSam Leffler goto done; 366a0c0e34bSGleb Smirnoff } 3679c63e9dbSSam Leffler 3689c63e9dbSSam Leffler /* 3699c63e9dbSSam Leffler * On last reference give the "close method" a chance 3709c63e9dbSSam Leffler * to cleanup private state. This also permits (for 3719c63e9dbSSam Leffler * IPv4 and IPv6) a chance to decide if the routing table 3729c63e9dbSSam Leffler * entry should be purged immediately or at a later time. 3739c63e9dbSSam Leffler * When an immediate purge is to happen the close routine 3749c63e9dbSSam Leffler * typically calls rtexpunge which clears the RTF_UP flag 3759c63e9dbSSam Leffler * on the entry so that the code below reclaims the storage. 3769c63e9dbSSam Leffler */ 377d1dd20beSSam Leffler if (rt->rt_refcnt == 0 && rnh->rnh_close) 3785c2dae8eSGarrett Wollman rnh->rnh_close((struct radix_node *)rt, rnh); 379499676dfSJulian Elischer 380499676dfSJulian Elischer /* 381499676dfSJulian Elischer * If we are no longer "up" (and ref == 0) 382499676dfSJulian Elischer * then we can free the resources associated 383499676dfSJulian Elischer * with the route. 384499676dfSJulian Elischer */ 385d1dd20beSSam Leffler if ((rt->rt_flags & RTF_UP) == 0) { 386df8bae1dSRodney W. Grimes if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 387df8bae1dSRodney W. Grimes panic("rtfree 2"); 388499676dfSJulian Elischer /* 389499676dfSJulian Elischer * the rtentry must have been removed from the routing table 390499676dfSJulian Elischer * so it is represented in rttrash.. remove that now. 391499676dfSJulian Elischer */ 392603724d3SBjoern A. Zeeb V_rttrash--; 393499676dfSJulian Elischer #ifdef DIAGNOSTIC 394df8bae1dSRodney W. Grimes if (rt->rt_refcnt < 0) { 395623ae52eSPoul-Henning Kamp printf("rtfree: %p not freed (neg refs)\n", rt); 396d1dd20beSSam Leffler goto done; 397df8bae1dSRodney W. Grimes } 398499676dfSJulian Elischer #endif 399499676dfSJulian Elischer /* 400499676dfSJulian Elischer * release references on items we hold them on.. 401499676dfSJulian Elischer * e.g other routes and ifaddrs. 402499676dfSJulian Elischer */ 40319fc74fbSJeffrey Hsu if (rt->rt_ifa) 40419fc74fbSJeffrey Hsu IFAFREE(rt->rt_ifa); 405499676dfSJulian Elischer /* 406499676dfSJulian Elischer * The key is separatly alloc'd so free it (see rt_setgate()). 407499676dfSJulian Elischer * This also frees the gateway, as they are always malloc'd 408499676dfSJulian Elischer * together. 409499676dfSJulian Elischer */ 410df8bae1dSRodney W. Grimes Free(rt_key(rt)); 411499676dfSJulian Elischer 412499676dfSJulian Elischer /* 413499676dfSJulian Elischer * and the rtentry itself of course 414499676dfSJulian Elischer */ 415d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 4161ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 417d1dd20beSSam Leffler return; 418df8bae1dSRodney W. Grimes } 419d1dd20beSSam Leffler done: 420d1dd20beSSam Leffler RT_UNLOCK(rt); 421df8bae1dSRodney W. Grimes } 422df8bae1dSRodney W. Grimes 423df8bae1dSRodney W. Grimes 424df8bae1dSRodney W. Grimes /* 425df8bae1dSRodney W. Grimes * Force a routing table entry to the specified 426df8bae1dSRodney W. Grimes * destination to go through the given gateway. 427df8bae1dSRodney W. Grimes * Normally called as a result of a routing redirect 428df8bae1dSRodney W. Grimes * message from the network layer. 429df8bae1dSRodney W. Grimes */ 43026f9a767SRodney W. Grimes void 431d1dd20beSSam Leffler rtredirect(struct sockaddr *dst, 432d1dd20beSSam Leffler struct sockaddr *gateway, 433d1dd20beSSam Leffler struct sockaddr *netmask, 434d1dd20beSSam Leffler int flags, 435d1dd20beSSam Leffler struct sockaddr *src) 436df8bae1dSRodney W. Grimes { 4378b07e49aSJulian Elischer rtredirect_fib(dst, gateway, netmask, flags, src, 0); 4388b07e49aSJulian Elischer } 4398b07e49aSJulian Elischer 4408b07e49aSJulian Elischer void 4418b07e49aSJulian Elischer rtredirect_fib(struct sockaddr *dst, 4428b07e49aSJulian Elischer struct sockaddr *gateway, 4438b07e49aSJulian Elischer struct sockaddr *netmask, 4448b07e49aSJulian Elischer int flags, 4458b07e49aSJulian Elischer struct sockaddr *src, 4468b07e49aSJulian Elischer u_int fibnum) 4478b07e49aSJulian Elischer { 4488b615593SMarko Zec INIT_VNET_NET(curvnet); 4498e7e854cSKip Macy struct rtentry *rt, *rt0 = NULL; 450df8bae1dSRodney W. Grimes int error = 0; 45185911824SLuigi Rizzo short *stat = NULL; 452df8bae1dSRodney W. Grimes struct rt_addrinfo info; 453df8bae1dSRodney W. Grimes struct ifaddr *ifa; 4543120b9d4SKip Macy struct radix_node_head *rnh = 4554e5fd766SBjoern A. Zeeb V_rt_tables[fibnum][dst->sa_family]; 456df8bae1dSRodney W. Grimes 457df8bae1dSRodney W. Grimes /* verify the gateway is directly reachable */ 45885911824SLuigi Rizzo if ((ifa = ifa_ifwithnet(gateway)) == NULL) { 459df8bae1dSRodney W. Grimes error = ENETUNREACH; 460df8bae1dSRodney W. Grimes goto out; 461df8bae1dSRodney W. Grimes } 4628b07e49aSJulian Elischer rt = rtalloc1_fib(dst, 0, 0UL, fibnum); /* NB: rt is locked */ 463df8bae1dSRodney W. Grimes /* 464df8bae1dSRodney W. Grimes * If the redirect isn't from our current router for this dst, 465df8bae1dSRodney W. Grimes * it's either old or wrong. If it redirects us to ourselves, 466df8bae1dSRodney W. Grimes * we have a routing loop, perhaps as a result of an interface 467df8bae1dSRodney W. Grimes * going down recently. 468df8bae1dSRodney W. Grimes */ 469df8bae1dSRodney W. Grimes if (!(flags & RTF_DONE) && rt && 470956b0b65SJeffrey Hsu (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa)) 471df8bae1dSRodney W. Grimes error = EINVAL; 472df8bae1dSRodney W. Grimes else if (ifa_ifwithaddr(gateway)) 473df8bae1dSRodney W. Grimes error = EHOSTUNREACH; 474df8bae1dSRodney W. Grimes if (error) 475df8bae1dSRodney W. Grimes goto done; 476df8bae1dSRodney W. Grimes /* 477df8bae1dSRodney W. Grimes * Create a new entry if we just got back a wildcard entry 478df8bae1dSRodney W. Grimes * or the the lookup failed. This is necessary for hosts 479df8bae1dSRodney W. Grimes * which use routing redirects generated by smart gateways 480df8bae1dSRodney W. Grimes * to dynamically build the routing tables. 481df8bae1dSRodney W. Grimes */ 48285911824SLuigi Rizzo if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2)) 483df8bae1dSRodney W. Grimes goto create; 484df8bae1dSRodney W. Grimes /* 485df8bae1dSRodney W. Grimes * Don't listen to the redirect if it's 486df8bae1dSRodney W. Grimes * for a route to an interface. 487df8bae1dSRodney W. Grimes */ 488df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) { 489df8bae1dSRodney W. Grimes if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) { 490df8bae1dSRodney W. Grimes /* 491df8bae1dSRodney W. Grimes * Changing from route to net => route to host. 492df8bae1dSRodney W. Grimes * Create new route, rather than smashing route to net. 493df8bae1dSRodney W. Grimes */ 494df8bae1dSRodney W. Grimes create: 4958e7e854cSKip Macy rt0 = rt; 4968e7e854cSKip Macy rt = NULL; 4978e7e854cSKip Macy 498df8bae1dSRodney W. Grimes flags |= RTF_GATEWAY | RTF_DYNAMIC; 4998071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 5008071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 5018071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = gateway; 5028071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 5038071913dSRuslan Ermilov info.rti_ifa = ifa; 5048071913dSRuslan Ermilov info.rti_flags = flags; 5053120b9d4SKip Macy if (rt0 != NULL) 5063120b9d4SKip Macy RT_UNLOCK(rt0); /* drop lock to avoid LOR with RNH */ 5078b07e49aSJulian Elischer error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum); 508d1dd20beSSam Leffler if (rt != NULL) { 5094de5d90cSSam Leffler RT_LOCK(rt); 5103120b9d4SKip Macy if (rt0 != NULL) 51129910a5aSKip Macy EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst); 5128071913dSRuslan Ermilov flags = rt->rt_flags; 513d1dd20beSSam Leffler } 5143120b9d4SKip Macy if (rt0 != NULL) 5153120b9d4SKip Macy RTFREE(rt0); 5168e7e854cSKip Macy 517603724d3SBjoern A. Zeeb stat = &V_rtstat.rts_dynamic; 518df8bae1dSRodney W. Grimes } else { 5198e7e854cSKip Macy struct rtentry *gwrt; 5208e7e854cSKip Macy 521df8bae1dSRodney W. Grimes /* 522df8bae1dSRodney W. Grimes * Smash the current notion of the gateway to 523df8bae1dSRodney W. Grimes * this destination. Should check about netmask!!! 524df8bae1dSRodney W. Grimes */ 525df8bae1dSRodney W. Grimes rt->rt_flags |= RTF_MODIFIED; 526df8bae1dSRodney W. Grimes flags |= RTF_MODIFIED; 527603724d3SBjoern A. Zeeb stat = &V_rtstat.rts_newgateway; 528499676dfSJulian Elischer /* 529499676dfSJulian Elischer * add the key and gateway (in one malloc'd chunk). 530499676dfSJulian Elischer */ 5313120b9d4SKip Macy RT_UNLOCK(rt); 5323120b9d4SKip Macy RADIX_NODE_HEAD_LOCK(rnh); 5333120b9d4SKip Macy RT_LOCK(rt); 534df8bae1dSRodney W. Grimes rt_setgate(rt, rt_key(rt), gateway); 5353120b9d4SKip Macy gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED); 5363120b9d4SKip Macy RADIX_NODE_HEAD_UNLOCK(rnh); 53729910a5aSKip Macy EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst); 5388e7e854cSKip Macy RTFREE_LOCKED(gwrt); 539df8bae1dSRodney W. Grimes } 540df8bae1dSRodney W. Grimes } else 541df8bae1dSRodney W. Grimes error = EHOSTUNREACH; 542df8bae1dSRodney W. Grimes done: 543d1dd20beSSam Leffler if (rt) 5441951e633SJohn Baldwin RTFREE_LOCKED(rt); 545df8bae1dSRodney W. Grimes out: 546df8bae1dSRodney W. Grimes if (error) 547603724d3SBjoern A. Zeeb V_rtstat.rts_badredirect++; 548df8bae1dSRodney W. Grimes else if (stat != NULL) 549df8bae1dSRodney W. Grimes (*stat)++; 550df8bae1dSRodney W. Grimes bzero((caddr_t)&info, sizeof(info)); 551df8bae1dSRodney W. Grimes info.rti_info[RTAX_DST] = dst; 552df8bae1dSRodney W. Grimes info.rti_info[RTAX_GATEWAY] = gateway; 553df8bae1dSRodney W. Grimes info.rti_info[RTAX_NETMASK] = netmask; 554df8bae1dSRodney W. Grimes info.rti_info[RTAX_AUTHOR] = src; 555df8bae1dSRodney W. Grimes rt_missmsg(RTM_REDIRECT, &info, flags, error); 556df8bae1dSRodney W. Grimes } 557df8bae1dSRodney W. Grimes 5588b07e49aSJulian Elischer int 5598b07e49aSJulian Elischer rtioctl(u_long req, caddr_t data) 5608b07e49aSJulian Elischer { 5618b07e49aSJulian Elischer return (rtioctl_fib(req, data, 0)); 5628b07e49aSJulian Elischer } 5638b07e49aSJulian Elischer 564df8bae1dSRodney W. Grimes /* 565df8bae1dSRodney W. Grimes * Routing table ioctl interface. 566df8bae1dSRodney W. Grimes */ 567df8bae1dSRodney W. Grimes int 5688b07e49aSJulian Elischer rtioctl_fib(u_long req, caddr_t data, u_int fibnum) 569df8bae1dSRodney W. Grimes { 5705090559bSChristian S.J. Peron 5715090559bSChristian S.J. Peron /* 5725090559bSChristian S.J. Peron * If more ioctl commands are added here, make sure the proper 5735090559bSChristian S.J. Peron * super-user checks are being performed because it is possible for 5745090559bSChristian S.J. Peron * prison-root to make it this far if raw sockets have been enabled 5755090559bSChristian S.J. Peron * in jails. 5765090559bSChristian S.J. Peron */ 577623ae52eSPoul-Henning Kamp #ifdef INET 578f0068c4aSGarrett Wollman /* Multicast goop, grrr... */ 5798b07e49aSJulian Elischer return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP; 580623ae52eSPoul-Henning Kamp #else /* INET */ 581623ae52eSPoul-Henning Kamp return ENXIO; 582623ae52eSPoul-Henning Kamp #endif /* INET */ 583df8bae1dSRodney W. Grimes } 584df8bae1dSRodney W. Grimes 585df8bae1dSRodney W. Grimes struct ifaddr * 586d1dd20beSSam Leffler ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway) 587df8bae1dSRodney W. Grimes { 5888b07e49aSJulian Elischer return (ifa_ifwithroute_fib(flags, dst, gateway, 0)); 5898b07e49aSJulian Elischer } 5908b07e49aSJulian Elischer 5918b07e49aSJulian Elischer struct ifaddr * 5928b07e49aSJulian Elischer ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway, 5938b07e49aSJulian Elischer u_int fibnum) 5948b07e49aSJulian Elischer { 595df8bae1dSRodney W. Grimes register struct ifaddr *ifa; 596e034e82cSQing Li int not_found = 0; 597d1dd20beSSam Leffler 598df8bae1dSRodney W. Grimes if ((flags & RTF_GATEWAY) == 0) { 599df8bae1dSRodney W. Grimes /* 600df8bae1dSRodney W. Grimes * If we are adding a route to an interface, 601df8bae1dSRodney W. Grimes * and the interface is a pt to pt link 602df8bae1dSRodney W. Grimes * we should search for the destination 603df8bae1dSRodney W. Grimes * as our clue to the interface. Otherwise 604df8bae1dSRodney W. Grimes * we can use the local address. 605df8bae1dSRodney W. Grimes */ 60685911824SLuigi Rizzo ifa = NULL; 60785911824SLuigi Rizzo if (flags & RTF_HOST) 608df8bae1dSRodney W. Grimes ifa = ifa_ifwithdstaddr(dst); 60985911824SLuigi Rizzo if (ifa == NULL) 610df8bae1dSRodney W. Grimes ifa = ifa_ifwithaddr(gateway); 611df8bae1dSRodney W. Grimes } else { 612df8bae1dSRodney W. Grimes /* 613df8bae1dSRodney W. Grimes * If we are adding a route to a remote net 614df8bae1dSRodney W. Grimes * or host, the gateway may still be on the 615df8bae1dSRodney W. Grimes * other end of a pt to pt link. 616df8bae1dSRodney W. Grimes */ 617df8bae1dSRodney W. Grimes ifa = ifa_ifwithdstaddr(gateway); 618df8bae1dSRodney W. Grimes } 61985911824SLuigi Rizzo if (ifa == NULL) 620df8bae1dSRodney W. Grimes ifa = ifa_ifwithnet(gateway); 62185911824SLuigi Rizzo if (ifa == NULL) { 6229b20205dSKip Macy struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum); 62385911824SLuigi Rizzo if (rt == NULL) 62485911824SLuigi Rizzo return (NULL); 625e034e82cSQing Li /* 626e034e82cSQing Li * dismiss a gateway that is reachable only 627e034e82cSQing Li * through the default router 628e034e82cSQing Li */ 629e034e82cSQing Li switch (gateway->sa_family) { 630e034e82cSQing Li case AF_INET: 631e034e82cSQing Li if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY) 632e034e82cSQing Li not_found = 1; 633e034e82cSQing Li break; 634e034e82cSQing Li case AF_INET6: 635e034e82cSQing Li if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr)) 636e034e82cSQing Li not_found = 1; 637e034e82cSQing Li break; 638e034e82cSQing Li default: 639e034e82cSQing Li break; 640e034e82cSQing Li } 6417138d65cSSam Leffler RT_REMREF(rt); 642d1dd20beSSam Leffler RT_UNLOCK(rt); 643e034e82cSQing Li if (not_found) 644e034e82cSQing Li return (NULL); 64585911824SLuigi Rizzo if ((ifa = rt->rt_ifa) == NULL) 64685911824SLuigi Rizzo return (NULL); 647df8bae1dSRodney W. Grimes } 648df8bae1dSRodney W. Grimes if (ifa->ifa_addr->sa_family != dst->sa_family) { 649df8bae1dSRodney W. Grimes struct ifaddr *oifa = ifa; 650df8bae1dSRodney W. Grimes ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp); 65185911824SLuigi Rizzo if (ifa == NULL) 652df8bae1dSRodney W. Grimes ifa = oifa; 653df8bae1dSRodney W. Grimes } 654df8bae1dSRodney W. Grimes return (ifa); 655df8bae1dSRodney W. Grimes } 656df8bae1dSRodney W. Grimes 657b0a76b88SJulian Elischer /* 658b0a76b88SJulian Elischer * Do appropriate manipulations of a routing tree given 659b0a76b88SJulian Elischer * all the bits of info needed 660b0a76b88SJulian Elischer */ 661df8bae1dSRodney W. Grimes int 662d1dd20beSSam Leffler rtrequest(int req, 663d1dd20beSSam Leffler struct sockaddr *dst, 664d1dd20beSSam Leffler struct sockaddr *gateway, 665d1dd20beSSam Leffler struct sockaddr *netmask, 666d1dd20beSSam Leffler int flags, 667d1dd20beSSam Leffler struct rtentry **ret_nrt) 668df8bae1dSRodney W. Grimes { 6698b07e49aSJulian Elischer return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0)); 6708b07e49aSJulian Elischer } 6718b07e49aSJulian Elischer 6728b07e49aSJulian Elischer int 6738b07e49aSJulian Elischer rtrequest_fib(int req, 6748b07e49aSJulian Elischer struct sockaddr *dst, 6758b07e49aSJulian Elischer struct sockaddr *gateway, 6768b07e49aSJulian Elischer struct sockaddr *netmask, 6778b07e49aSJulian Elischer int flags, 6788b07e49aSJulian Elischer struct rtentry **ret_nrt, 6798b07e49aSJulian Elischer u_int fibnum) 6808b07e49aSJulian Elischer { 6818071913dSRuslan Ermilov struct rt_addrinfo info; 6828071913dSRuslan Ermilov 683ac4a76ebSBjoern A. Zeeb if (dst->sa_len == 0) 684ac4a76ebSBjoern A. Zeeb return(EINVAL); 685ac4a76ebSBjoern A. Zeeb 6868071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 6878071913dSRuslan Ermilov info.rti_flags = flags; 6888071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 6898071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = gateway; 6908071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 6918b07e49aSJulian Elischer return rtrequest1_fib(req, &info, ret_nrt, fibnum); 6928071913dSRuslan Ermilov } 6938071913dSRuslan Ermilov 6948071913dSRuslan Ermilov /* 6958071913dSRuslan Ermilov * These (questionable) definitions of apparent local variables apply 6968071913dSRuslan Ermilov * to the next two functions. XXXXXX!!! 6978071913dSRuslan Ermilov */ 6988071913dSRuslan Ermilov #define dst info->rti_info[RTAX_DST] 6998071913dSRuslan Ermilov #define gateway info->rti_info[RTAX_GATEWAY] 7008071913dSRuslan Ermilov #define netmask info->rti_info[RTAX_NETMASK] 7018071913dSRuslan Ermilov #define ifaaddr info->rti_info[RTAX_IFA] 7028071913dSRuslan Ermilov #define ifpaddr info->rti_info[RTAX_IFP] 7038071913dSRuslan Ermilov #define flags info->rti_flags 7048071913dSRuslan Ermilov 7058071913dSRuslan Ermilov int 706d1dd20beSSam Leffler rt_getifa(struct rt_addrinfo *info) 7078071913dSRuslan Ermilov { 7088b07e49aSJulian Elischer return (rt_getifa_fib(info, 0)); 7098b07e49aSJulian Elischer } 7108b07e49aSJulian Elischer 7118b07e49aSJulian Elischer int 7128b07e49aSJulian Elischer rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum) 7138b07e49aSJulian Elischer { 7148071913dSRuslan Ermilov struct ifaddr *ifa; 7158071913dSRuslan Ermilov int error = 0; 7168071913dSRuslan Ermilov 7178071913dSRuslan Ermilov /* 7188071913dSRuslan Ermilov * ifp may be specified by sockaddr_dl 7198071913dSRuslan Ermilov * when protocol address is ambiguous. 7208071913dSRuslan Ermilov */ 7218071913dSRuslan Ermilov if (info->rti_ifp == NULL && ifpaddr != NULL && 7228071913dSRuslan Ermilov ifpaddr->sa_family == AF_LINK && 7238071913dSRuslan Ermilov (ifa = ifa_ifwithnet(ifpaddr)) != NULL) 7248071913dSRuslan Ermilov info->rti_ifp = ifa->ifa_ifp; 7258071913dSRuslan Ermilov if (info->rti_ifa == NULL && ifaaddr != NULL) 7268071913dSRuslan Ermilov info->rti_ifa = ifa_ifwithaddr(ifaaddr); 7278071913dSRuslan Ermilov if (info->rti_ifa == NULL) { 7288071913dSRuslan Ermilov struct sockaddr *sa; 7298071913dSRuslan Ermilov 7308071913dSRuslan Ermilov sa = ifaaddr != NULL ? ifaaddr : 7318071913dSRuslan Ermilov (gateway != NULL ? gateway : dst); 7328071913dSRuslan Ermilov if (sa != NULL && info->rti_ifp != NULL) 7338071913dSRuslan Ermilov info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp); 7348071913dSRuslan Ermilov else if (dst != NULL && gateway != NULL) 7358b07e49aSJulian Elischer info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway, 7368b07e49aSJulian Elischer fibnum); 7378071913dSRuslan Ermilov else if (sa != NULL) 7388b07e49aSJulian Elischer info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa, 7398b07e49aSJulian Elischer fibnum); 7408071913dSRuslan Ermilov } 7418071913dSRuslan Ermilov if ((ifa = info->rti_ifa) != NULL) { 7428071913dSRuslan Ermilov if (info->rti_ifp == NULL) 7438071913dSRuslan Ermilov info->rti_ifp = ifa->ifa_ifp; 7448071913dSRuslan Ermilov } else 7458071913dSRuslan Ermilov error = ENETUNREACH; 7468071913dSRuslan Ermilov return (error); 7478071913dSRuslan Ermilov } 7488071913dSRuslan Ermilov 7499c63e9dbSSam Leffler /* 7509c63e9dbSSam Leffler * Expunges references to a route that's about to be reclaimed. 7519c63e9dbSSam Leffler * The route must be locked. 7529c63e9dbSSam Leffler */ 7539c63e9dbSSam Leffler int 7549c63e9dbSSam Leffler rtexpunge(struct rtentry *rt) 7559c63e9dbSSam Leffler { 7568b615593SMarko Zec INIT_VNET_NET(curvnet); 7579c63e9dbSSam Leffler struct radix_node *rn; 7589c63e9dbSSam Leffler struct radix_node_head *rnh; 7599c63e9dbSSam Leffler struct ifaddr *ifa; 7609c63e9dbSSam Leffler int error = 0; 7619c63e9dbSSam Leffler 7626e6b3f7cSQing Li /* 7636e6b3f7cSQing Li * Find the correct routing tree to use for this Address Family 7646e6b3f7cSQing Li */ 7653120b9d4SKip Macy rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family]; 7669c63e9dbSSam Leffler RT_LOCK_ASSERT(rt); 7676e6b3f7cSQing Li if (rnh == NULL) 7686e6b3f7cSQing Li return (EAFNOSUPPORT); 7693120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 7709c63e9dbSSam Leffler #if 0 7719c63e9dbSSam Leffler /* 7729c63e9dbSSam Leffler * We cannot assume anything about the reference count 7739c63e9dbSSam Leffler * because protocols call us in many situations; often 7749c63e9dbSSam Leffler * before unwinding references to the table entry. 7759c63e9dbSSam Leffler */ 7769c63e9dbSSam Leffler KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt)); 7779c63e9dbSSam Leffler #endif 7789c63e9dbSSam Leffler /* 7799c63e9dbSSam Leffler * Remove the item from the tree; it should be there, 7809c63e9dbSSam Leffler * but when callers invoke us blindly it may not (sigh). 7819c63e9dbSSam Leffler */ 7829c63e9dbSSam Leffler rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh); 78385911824SLuigi Rizzo if (rn == NULL) { 7849c63e9dbSSam Leffler error = ESRCH; 7859c63e9dbSSam Leffler goto bad; 7869c63e9dbSSam Leffler } 7879c63e9dbSSam Leffler KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0, 7889c63e9dbSSam Leffler ("unexpected flags 0x%x", rn->rn_flags)); 789d6941ce9SLuigi Rizzo KASSERT(rt == RNTORT(rn), 7909c63e9dbSSam Leffler ("lookup mismatch, rt %p rn %p", rt, rn)); 7919c63e9dbSSam Leffler 7929c63e9dbSSam Leffler rt->rt_flags &= ~RTF_UP; 7939c63e9dbSSam Leffler 7949c63e9dbSSam Leffler /* 7959c63e9dbSSam Leffler * Give the protocol a chance to keep things in sync. 7969c63e9dbSSam Leffler */ 7979c63e9dbSSam Leffler if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) { 7989c63e9dbSSam Leffler struct rt_addrinfo info; 7999c63e9dbSSam Leffler 8009c63e9dbSSam Leffler bzero((caddr_t)&info, sizeof(info)); 8019c63e9dbSSam Leffler info.rti_flags = rt->rt_flags; 8029c63e9dbSSam Leffler info.rti_info[RTAX_DST] = rt_key(rt); 8039c63e9dbSSam Leffler info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 8049c63e9dbSSam Leffler info.rti_info[RTAX_NETMASK] = rt_mask(rt); 8059c63e9dbSSam Leffler ifa->ifa_rtrequest(RTM_DELETE, rt, &info); 8069c63e9dbSSam Leffler } 8079c63e9dbSSam Leffler 8089c63e9dbSSam Leffler /* 8099c63e9dbSSam Leffler * one more rtentry floating around that is not 8109c63e9dbSSam Leffler * linked to the routing table. 8119c63e9dbSSam Leffler */ 812603724d3SBjoern A. Zeeb V_rttrash++; 8139c63e9dbSSam Leffler bad: 8149c63e9dbSSam Leffler return (error); 8159c63e9dbSSam Leffler } 8169c63e9dbSSam Leffler 8178071913dSRuslan Ermilov int 8188b07e49aSJulian Elischer rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, 8198b07e49aSJulian Elischer u_int fibnum) 8208b07e49aSJulian Elischer { 8218b615593SMarko Zec INIT_VNET_NET(curvnet); 8223120b9d4SKip Macy int error = 0, needlock = 0; 823df8bae1dSRodney W. Grimes register struct rtentry *rt; 824df8bae1dSRodney W. Grimes register struct radix_node *rn; 825df8bae1dSRodney W. Grimes register struct radix_node_head *rnh; 826df8bae1dSRodney W. Grimes struct ifaddr *ifa; 827df8bae1dSRodney W. Grimes struct sockaddr *ndst; 828df8bae1dSRodney W. Grimes #define senderr(x) { error = x ; goto bad; } 829df8bae1dSRodney W. Grimes 8308b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); 8318b07e49aSJulian Elischer if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */ 8328b07e49aSJulian Elischer fibnum = 0; 833b0a76b88SJulian Elischer /* 834b0a76b88SJulian Elischer * Find the correct routing tree to use for this Address Family 835b0a76b88SJulian Elischer */ 836603724d3SBjoern A. Zeeb rnh = V_rt_tables[fibnum][dst->sa_family]; 83785911824SLuigi Rizzo if (rnh == NULL) 838983985c1SJeffrey Hsu return (EAFNOSUPPORT); 8393120b9d4SKip Macy needlock = ((flags & RTF_RNH_LOCKED) == 0); 8403120b9d4SKip Macy flags &= ~RTF_RNH_LOCKED; 8413120b9d4SKip Macy if (needlock) 842956b0b65SJeffrey Hsu RADIX_NODE_HEAD_LOCK(rnh); 843c96b8224SKip Macy else 844c96b8224SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 845b0a76b88SJulian Elischer /* 846b0a76b88SJulian Elischer * If we are adding a host route then we don't want to put 84766953138SRuslan Ermilov * a netmask in the tree, nor do we want to clone it. 848b0a76b88SJulian Elischer */ 8496e6b3f7cSQing Li if (flags & RTF_HOST) 85085911824SLuigi Rizzo netmask = NULL; 8516e6b3f7cSQing Li 852df8bae1dSRodney W. Grimes switch (req) { 853df8bae1dSRodney W. Grimes case RTM_DELETE: 854e440aed9SQing Li #ifdef RADIX_MPATH 855e440aed9SQing Li /* 856e440aed9SQing Li * if we got multipath routes, we require users to specify 857e440aed9SQing Li * a matching RTAX_GATEWAY. 858e440aed9SQing Li */ 859e440aed9SQing Li if (rn_mpath_capable(rnh)) { 860e440aed9SQing Li struct rtentry *rto = NULL; 861e440aed9SQing Li 862e440aed9SQing Li rn = rnh->rnh_matchaddr(dst, rnh); 863e440aed9SQing Li if (rn == NULL) 864e440aed9SQing Li senderr(ESRCH); 865e440aed9SQing Li rto = rt = RNTORT(rn); 866e440aed9SQing Li rt = rt_mpath_matchgate(rt, gateway); 867e440aed9SQing Li if (!rt) 868e440aed9SQing Li senderr(ESRCH); 869e440aed9SQing Li /* 870e440aed9SQing Li * this is the first entry in the chain 871e440aed9SQing Li */ 872e440aed9SQing Li if (rto == rt) { 873e440aed9SQing Li rn = rn_mpath_next((struct radix_node *)rt); 874e440aed9SQing Li /* 875e440aed9SQing Li * there is another entry, now it's active 876e440aed9SQing Li */ 877e440aed9SQing Li if (rn) { 878e440aed9SQing Li rto = RNTORT(rn); 879e440aed9SQing Li RT_LOCK(rto); 880e440aed9SQing Li rto->rt_flags |= RTF_UP; 881e440aed9SQing Li RT_UNLOCK(rto); 882e440aed9SQing Li } else if (rt->rt_flags & RTF_GATEWAY) { 883e440aed9SQing Li /* 884e440aed9SQing Li * For gateway routes, we need to 885e440aed9SQing Li * make sure that we we are deleting 886e440aed9SQing Li * the correct gateway. 887e440aed9SQing Li * rt_mpath_matchgate() does not 888e440aed9SQing Li * check the case when there is only 889e440aed9SQing Li * one route in the chain. 890e440aed9SQing Li */ 891e440aed9SQing Li if (gateway && 892e440aed9SQing Li (rt->rt_gateway->sa_len != gateway->sa_len || 893e440aed9SQing Li memcmp(rt->rt_gateway, gateway, gateway->sa_len))) 894e440aed9SQing Li senderr(ESRCH); 895e440aed9SQing Li } 896e440aed9SQing Li /* 897e440aed9SQing Li * use the normal delete code to remove 898e440aed9SQing Li * the first entry 899e440aed9SQing Li */ 900e440aed9SQing Li goto normal_rtdel; 901e440aed9SQing Li } 902e440aed9SQing Li /* 903e440aed9SQing Li * if the entry is 2nd and on up 904e440aed9SQing Li */ 905e440aed9SQing Li if (!rt_mpath_deldup(rto, rt)) 906e440aed9SQing Li panic ("rtrequest1: rt_mpath_deldup"); 907e440aed9SQing Li RT_LOCK(rt); 908e440aed9SQing Li RT_ADDREF(rt); 909e440aed9SQing Li rt->rt_flags &= ~RTF_UP; 910e440aed9SQing Li goto deldone; /* done with the RTM_DELETE command */ 911e440aed9SQing Li } 912e440aed9SQing Li 913e440aed9SQing Li normal_rtdel: 914ea9cd9f2SBjoern A. Zeeb #endif 915b0a76b88SJulian Elischer /* 916b0a76b88SJulian Elischer * Remove the item from the tree and return it. 917b0a76b88SJulian Elischer * Complain if it is not there and do no more processing. 918b0a76b88SJulian Elischer */ 919d1dd20beSSam Leffler rn = rnh->rnh_deladdr(dst, netmask, rnh); 92085911824SLuigi Rizzo if (rn == NULL) 921df8bae1dSRodney W. Grimes senderr(ESRCH); 922df8bae1dSRodney W. Grimes if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) 923df8bae1dSRodney W. Grimes panic ("rtrequest delete"); 924d6941ce9SLuigi Rizzo rt = RNTORT(rn); 925d1dd20beSSam Leffler RT_LOCK(rt); 9267138d65cSSam Leffler RT_ADDREF(rt); 92771eba915SRuslan Ermilov rt->rt_flags &= ~RTF_UP; 928c2bed6a3SGarrett Wollman 929c2bed6a3SGarrett Wollman /* 930499676dfSJulian Elischer * give the protocol a chance to keep things in sync. 931b0a76b88SJulian Elischer */ 932df8bae1dSRodney W. Grimes if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) 9338071913dSRuslan Ermilov ifa->ifa_rtrequest(RTM_DELETE, rt, info); 934499676dfSJulian Elischer 935ea9cd9f2SBjoern A. Zeeb #ifdef RADIX_MPATH 936e440aed9SQing Li deldone: 937ea9cd9f2SBjoern A. Zeeb #endif 938b0a76b88SJulian Elischer /* 939d6941ce9SLuigi Rizzo * One more rtentry floating around that is not 940d6941ce9SLuigi Rizzo * linked to the routing table. rttrash will be decremented 941d6941ce9SLuigi Rizzo * when RTFREE(rt) is eventually called. 942499676dfSJulian Elischer */ 943603724d3SBjoern A. Zeeb V_rttrash++; 944499676dfSJulian Elischer 945499676dfSJulian Elischer /* 946499676dfSJulian Elischer * If the caller wants it, then it can have it, 947499676dfSJulian Elischer * but it's up to it to free the rtentry as we won't be 948499676dfSJulian Elischer * doing it. 949b0a76b88SJulian Elischer */ 950d1dd20beSSam Leffler if (ret_nrt) { 951df8bae1dSRodney W. Grimes *ret_nrt = rt; 952d1dd20beSSam Leffler RT_UNLOCK(rt); 953d1dd20beSSam Leffler } else 954d1dd20beSSam Leffler RTFREE_LOCKED(rt); 955df8bae1dSRodney W. Grimes break; 956df8bae1dSRodney W. Grimes case RTM_RESOLVE: 9576e6b3f7cSQing Li /* 9586e6b3f7cSQing Li * resolve was only used for route cloning 9596e6b3f7cSQing Li * here for compat 9606e6b3f7cSQing Li */ 9616e6b3f7cSQing Li break; 962df8bae1dSRodney W. Grimes case RTM_ADD: 9635df72964SGarrett Wollman if ((flags & RTF_GATEWAY) && !gateway) 96416a2e0a6SQing Li senderr(EINVAL); 96516a2e0a6SQing Li if (dst && gateway && (dst->sa_family != gateway->sa_family) && 96616a2e0a6SQing Li (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK)) 96716a2e0a6SQing Li senderr(EINVAL); 9685df72964SGarrett Wollman 9698b07e49aSJulian Elischer if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum))) 9708071913dSRuslan Ermilov senderr(error); 9718071913dSRuslan Ermilov ifa = info->rti_ifa; 9721ed81b73SMarko Zec rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO); 97385911824SLuigi Rizzo if (rt == NULL) 974df8bae1dSRodney W. Grimes senderr(ENOBUFS); 975d1dd20beSSam Leffler RT_LOCK_INIT(rt); 976df8bae1dSRodney W. Grimes rt->rt_flags = RTF_UP | flags; 9778b07e49aSJulian Elischer rt->rt_fibnum = fibnum; 978499676dfSJulian Elischer /* 979499676dfSJulian Elischer * Add the gateway. Possibly re-malloc-ing the storage for it 9806e6b3f7cSQing Li * 981499676dfSJulian Elischer */ 982d1dd20beSSam Leffler RT_LOCK(rt); 983831a80b0SMatthew Dillon if ((error = rt_setgate(rt, dst, gateway)) != 0) { 984d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 9851ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 986704b0666SBill Fenner senderr(error); 987df8bae1dSRodney W. Grimes } 988499676dfSJulian Elischer 989499676dfSJulian Elischer /* 990499676dfSJulian Elischer * point to the (possibly newly malloc'd) dest address. 991499676dfSJulian Elischer */ 992d1dd20beSSam Leffler ndst = (struct sockaddr *)rt_key(rt); 993499676dfSJulian Elischer 994499676dfSJulian Elischer /* 995499676dfSJulian Elischer * make sure it contains the value we want (masked if needed). 996499676dfSJulian Elischer */ 997df8bae1dSRodney W. Grimes if (netmask) { 998df8bae1dSRodney W. Grimes rt_maskedcopy(dst, ndst, netmask); 999df8bae1dSRodney W. Grimes } else 10001838a647SLuigi Rizzo bcopy(dst, ndst, dst->sa_len); 10018e718bb4SGarrett Wollman 10028e718bb4SGarrett Wollman /* 1003499676dfSJulian Elischer * Note that we now have a reference to the ifa. 10048e718bb4SGarrett Wollman * This moved from below so that rnh->rnh_addaddr() can 1005499676dfSJulian Elischer * examine the ifa and ifa->ifa_ifp if it so desires. 10068e718bb4SGarrett Wollman */ 100719fc74fbSJeffrey Hsu IFAREF(ifa); 10088e718bb4SGarrett Wollman rt->rt_ifa = ifa; 10098e718bb4SGarrett Wollman rt->rt_ifp = ifa->ifa_ifp; 10108e718bb4SGarrett Wollman 1011e440aed9SQing Li #ifdef RADIX_MPATH 1012e440aed9SQing Li /* do not permit exactly the same dst/mask/gw pair */ 1013e440aed9SQing Li if (rn_mpath_capable(rnh) && 1014e440aed9SQing Li rt_mpath_conflict(rnh, rt, netmask)) { 1015e440aed9SQing Li if (rt->rt_ifa) { 1016e440aed9SQing Li IFAFREE(rt->rt_ifa); 1017e440aed9SQing Li } 1018e440aed9SQing Li Free(rt_key(rt)); 1019e440aed9SQing Li RT_LOCK_DESTROY(rt); 10201ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 1021e440aed9SQing Li senderr(EEXIST); 1022e440aed9SQing Li } 1023e440aed9SQing Li #endif 1024e440aed9SQing Li 1025d1dd20beSSam Leffler /* XXX mtu manipulation will be done in rnh_addaddr -- itojun */ 1026d1dd20beSSam Leffler rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes); 1027499676dfSJulian Elischer /* 1028499676dfSJulian Elischer * If it still failed to go into the tree, 1029499676dfSJulian Elischer * then un-make it (this should be a function) 1030499676dfSJulian Elischer */ 103185911824SLuigi Rizzo if (rn == NULL) { 1032d1dd20beSSam Leffler if (rt->rt_ifa) 10338e718bb4SGarrett Wollman IFAFREE(rt->rt_ifa); 1034df8bae1dSRodney W. Grimes Free(rt_key(rt)); 1035d1dd20beSSam Leffler RT_LOCK_DESTROY(rt); 10361ed81b73SMarko Zec uma_zfree(V_rtzone, rt); 1037df8bae1dSRodney W. Grimes senderr(EEXIST); 1038df8bae1dSRodney W. Grimes } 1039499676dfSJulian Elischer 1040499676dfSJulian Elischer /* 1041a0c0e34bSGleb Smirnoff * If this protocol has something to add to this then 1042499676dfSJulian Elischer * allow it to do that as well. 1043499676dfSJulian Elischer */ 1044df8bae1dSRodney W. Grimes if (ifa->ifa_rtrequest) 10458071913dSRuslan Ermilov ifa->ifa_rtrequest(req, rt, info); 1046499676dfSJulian Elischer 1047cd02a0b7SGarrett Wollman /* 1048499676dfSJulian Elischer * actually return a resultant rtentry and 1049499676dfSJulian Elischer * give the caller a single reference. 1050499676dfSJulian Elischer */ 1051df8bae1dSRodney W. Grimes if (ret_nrt) { 1052df8bae1dSRodney W. Grimes *ret_nrt = rt; 10537138d65cSSam Leffler RT_ADDREF(rt); 1054df8bae1dSRodney W. Grimes } 1055d1dd20beSSam Leffler RT_UNLOCK(rt); 1056df8bae1dSRodney W. Grimes break; 10578071913dSRuslan Ermilov default: 10588071913dSRuslan Ermilov error = EOPNOTSUPP; 1059df8bae1dSRodney W. Grimes } 1060df8bae1dSRodney W. Grimes bad: 10613120b9d4SKip Macy if (needlock) 1062956b0b65SJeffrey Hsu RADIX_NODE_HEAD_UNLOCK(rnh); 1063df8bae1dSRodney W. Grimes return (error); 1064d1dd20beSSam Leffler #undef senderr 1065d1dd20beSSam Leffler } 1066d1dd20beSSam Leffler 10678071913dSRuslan Ermilov #undef dst 10688071913dSRuslan Ermilov #undef gateway 10698071913dSRuslan Ermilov #undef netmask 10708071913dSRuslan Ermilov #undef ifaaddr 10718071913dSRuslan Ermilov #undef ifpaddr 10728071913dSRuslan Ermilov #undef flags 1073df8bae1dSRodney W. Grimes 1074df8bae1dSRodney W. Grimes int 1075d1dd20beSSam Leffler rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate) 1076df8bae1dSRodney W. Grimes { 10778b615593SMarko Zec INIT_VNET_NET(curvnet); 1078d1dd20beSSam Leffler /* XXX dst may be overwritten, can we move this to below */ 10796e6b3f7cSQing Li int dlen = SA_SIZE(dst), glen = SA_SIZE(gate); 10806e6b3f7cSQing Li #ifdef INVARIANTS 1081c7cacf27SBrooks Davis struct radix_node_head *rnh = 1082c7cacf27SBrooks Davis V_rt_tables[rt->rt_fibnum][dst->sa_family]; 10836e6b3f7cSQing Li #endif 1084d1dd20beSSam Leffler 1085d1dd20beSSam Leffler RT_LOCK_ASSERT(rt); 10863120b9d4SKip Macy RADIX_NODE_HEAD_LOCK_ASSERT(rnh); 1087df8bae1dSRodney W. Grimes 10881db1fffaSBill Fenner /* 108985911824SLuigi Rizzo * Prepare to store the gateway in rt->rt_gateway. 109085911824SLuigi Rizzo * Both dst and gateway are stored one after the other in the same 109185911824SLuigi Rizzo * malloc'd chunk. If we have room, we can reuse the old buffer, 109285911824SLuigi Rizzo * rt_gateway already points to the right place. 109385911824SLuigi Rizzo * Otherwise, malloc a new block and update the 'dst' address. 1094499676dfSJulian Elischer */ 109585911824SLuigi Rizzo if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) { 109685911824SLuigi Rizzo caddr_t new; 109785911824SLuigi Rizzo 1098df8bae1dSRodney W. Grimes R_Malloc(new, caddr_t, dlen + glen); 109985911824SLuigi Rizzo if (new == NULL) 11001db1fffaSBill Fenner return ENOBUFS; 1101499676dfSJulian Elischer /* 110285911824SLuigi Rizzo * XXX note, we copy from *dst and not *rt_key(rt) because 110385911824SLuigi Rizzo * rt_setgate() can be called to initialize a newly 110485911824SLuigi Rizzo * allocated route entry, in which case rt_key(rt) == NULL 110585911824SLuigi Rizzo * (and also rt->rt_gateway == NULL). 110685911824SLuigi Rizzo * Free()/free() handle a NULL argument just fine. 1107499676dfSJulian Elischer */ 11081838a647SLuigi Rizzo bcopy(dst, new, dlen); 110985911824SLuigi Rizzo Free(rt_key(rt)); /* free old block, if any */ 1110445e045bSAlexander Kabaev rt_key(rt) = (struct sockaddr *)new; 111185911824SLuigi Rizzo rt->rt_gateway = (struct sockaddr *)(new + dlen); 1112df8bae1dSRodney W. Grimes } 1113499676dfSJulian Elischer 1114499676dfSJulian Elischer /* 111585911824SLuigi Rizzo * Copy the new gateway value into the memory chunk. 111685911824SLuigi Rizzo */ 111785911824SLuigi Rizzo bcopy(gate, rt->rt_gateway, glen); 111885911824SLuigi Rizzo 11196e6b3f7cSQing Li return (0); 1120df8bae1dSRodney W. Grimes } 1121df8bae1dSRodney W. Grimes 1122f708ef1bSPoul-Henning Kamp static void 1123d1dd20beSSam Leffler rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask) 1124df8bae1dSRodney W. Grimes { 1125df8bae1dSRodney W. Grimes register u_char *cp1 = (u_char *)src; 1126df8bae1dSRodney W. Grimes register u_char *cp2 = (u_char *)dst; 1127df8bae1dSRodney W. Grimes register u_char *cp3 = (u_char *)netmask; 1128df8bae1dSRodney W. Grimes u_char *cplim = cp2 + *cp3; 1129df8bae1dSRodney W. Grimes u_char *cplim2 = cp2 + *cp1; 1130df8bae1dSRodney W. Grimes 1131df8bae1dSRodney W. Grimes *cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */ 1132df8bae1dSRodney W. Grimes cp3 += 2; 1133df8bae1dSRodney W. Grimes if (cplim > cplim2) 1134df8bae1dSRodney W. Grimes cplim = cplim2; 1135df8bae1dSRodney W. Grimes while (cp2 < cplim) 1136df8bae1dSRodney W. Grimes *cp2++ = *cp1++ & *cp3++; 1137df8bae1dSRodney W. Grimes if (cp2 < cplim2) 1138df8bae1dSRodney W. Grimes bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2)); 1139df8bae1dSRodney W. Grimes } 1140df8bae1dSRodney W. Grimes 1141df8bae1dSRodney W. Grimes /* 1142df8bae1dSRodney W. Grimes * Set up a routing table entry, normally 1143df8bae1dSRodney W. Grimes * for an interface. 1144df8bae1dSRodney W. Grimes */ 11458b07e49aSJulian Elischer #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */ 11468b07e49aSJulian Elischer static inline int 11478b07e49aSJulian Elischer rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) 1148df8bae1dSRodney W. Grimes { 11498b615593SMarko Zec INIT_VNET_NET(curvnet); 11505aca0b30SLuigi Rizzo struct sockaddr *dst; 11518071913dSRuslan Ermilov struct sockaddr *netmask; 115285911824SLuigi Rizzo struct rtentry *rt = NULL; 11538071913dSRuslan Ermilov struct rt_addrinfo info; 1154e440aed9SQing Li int error = 0; 11558b07e49aSJulian Elischer int startfib, endfib; 11568b07e49aSJulian Elischer char tempbuf[_SOCKADDR_TMPSIZE]; 11578b07e49aSJulian Elischer int didwork = 0; 11588b07e49aSJulian Elischer int a_failure = 0; 11596e6b3f7cSQing Li static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK}; 1160df8bae1dSRodney W. Grimes 11618071913dSRuslan Ermilov if (flags & RTF_HOST) { 11628071913dSRuslan Ermilov dst = ifa->ifa_dstaddr; 11638071913dSRuslan Ermilov netmask = NULL; 11648071913dSRuslan Ermilov } else { 11658071913dSRuslan Ermilov dst = ifa->ifa_addr; 11668071913dSRuslan Ermilov netmask = ifa->ifa_netmask; 11678071913dSRuslan Ermilov } 11688b07e49aSJulian Elischer if ( dst->sa_family != AF_INET) 11698b07e49aSJulian Elischer fibnum = 0; 11708b07e49aSJulian Elischer if (fibnum == -1) { 117166e8505fSJulian Elischer if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) { 117266e8505fSJulian Elischer startfib = endfib = curthread->td_proc->p_fibnum; 117366e8505fSJulian Elischer } else { 11748b07e49aSJulian Elischer startfib = 0; 11758b07e49aSJulian Elischer endfib = rt_numfibs - 1; 117666e8505fSJulian Elischer } 11778b07e49aSJulian Elischer } else { 11788b07e49aSJulian Elischer KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum")); 11798b07e49aSJulian Elischer startfib = fibnum; 11808b07e49aSJulian Elischer endfib = fibnum; 11818b07e49aSJulian Elischer } 1182ac4a76ebSBjoern A. Zeeb if (dst->sa_len == 0) 1183ac4a76ebSBjoern A. Zeeb return(EINVAL); 1184ac4a76ebSBjoern A. Zeeb 1185b0a76b88SJulian Elischer /* 11868b07e49aSJulian Elischer * If it's a delete, check that if it exists, 11878b07e49aSJulian Elischer * it's on the correct interface or we might scrub 11888b07e49aSJulian Elischer * a route to another ifa which would 1189b0a76b88SJulian Elischer * be confusing at best and possibly worse. 1190b0a76b88SJulian Elischer */ 1191df8bae1dSRodney W. Grimes if (cmd == RTM_DELETE) { 1192b0a76b88SJulian Elischer /* 1193b0a76b88SJulian Elischer * It's a delete, so it should already exist.. 1194b0a76b88SJulian Elischer * If it's a net, mask off the host bits 1195b0a76b88SJulian Elischer * (Assuming we have a mask) 11968b07e49aSJulian Elischer * XXX this is kinda inet specific.. 1197b0a76b88SJulian Elischer */ 11988071913dSRuslan Ermilov if (netmask != NULL) { 11998b07e49aSJulian Elischer rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask); 12008b07e49aSJulian Elischer dst = (struct sockaddr *)tempbuf; 1201df8bae1dSRodney W. Grimes } 12028b07e49aSJulian Elischer } 12038b07e49aSJulian Elischer /* 12048b07e49aSJulian Elischer * Now go through all the requested tables (fibs) and do the 12058b07e49aSJulian Elischer * requested action. Realistically, this will either be fib 0 12068b07e49aSJulian Elischer * for protocols that don't do multiple tables or all the 12078b07e49aSJulian Elischer * tables for those that do. XXX For this version only AF_INET. 12088b07e49aSJulian Elischer * When that changes code should be refactored to protocol 12098b07e49aSJulian Elischer * independent parts and protocol dependent parts. 12108b07e49aSJulian Elischer */ 12118b07e49aSJulian Elischer for ( fibnum = startfib; fibnum <= endfib; fibnum++) { 12128b07e49aSJulian Elischer if (cmd == RTM_DELETE) { 12138b07e49aSJulian Elischer struct radix_node_head *rnh; 12148b07e49aSJulian Elischer struct radix_node *rn; 1215b0a76b88SJulian Elischer /* 12168071913dSRuslan Ermilov * Look up an rtentry that is in the routing tree and 12178071913dSRuslan Ermilov * contains the correct info. 1218b0a76b88SJulian Elischer */ 1219603724d3SBjoern A. Zeeb if ((rnh = V_rt_tables[fibnum][dst->sa_family]) == NULL) 12208b07e49aSJulian Elischer /* this table doesn't exist but others might */ 12218b07e49aSJulian Elischer continue; 1222956b0b65SJeffrey Hsu RADIX_NODE_HEAD_LOCK(rnh); 1223e440aed9SQing Li #ifdef RADIX_MPATH 1224e440aed9SQing Li if (rn_mpath_capable(rnh)) { 1225e440aed9SQing Li 1226e440aed9SQing Li rn = rnh->rnh_matchaddr(dst, rnh); 1227e440aed9SQing Li if (rn == NULL) 1228e440aed9SQing Li error = ESRCH; 1229e440aed9SQing Li else { 1230e440aed9SQing Li rt = RNTORT(rn); 1231e440aed9SQing Li /* 12328b07e49aSJulian Elischer * for interface route the 12338b07e49aSJulian Elischer * rt->rt_gateway is sockaddr_intf 12348b07e49aSJulian Elischer * for cloning ARP entries, so 12358b07e49aSJulian Elischer * rt_mpath_matchgate must use the 12368b07e49aSJulian Elischer * interface address 1237e440aed9SQing Li */ 12388b07e49aSJulian Elischer rt = rt_mpath_matchgate(rt, 12398b07e49aSJulian Elischer ifa->ifa_addr); 1240e440aed9SQing Li if (!rt) 1241e440aed9SQing Li error = ESRCH; 1242e440aed9SQing Li } 1243e440aed9SQing Li } 1244e440aed9SQing Li else 1245e440aed9SQing Li #endif 12468b07e49aSJulian Elischer rn = rnh->rnh_lookup(dst, netmask, rnh); 12478b07e49aSJulian Elischer error = (rn == NULL || 12488071913dSRuslan Ermilov (rn->rn_flags & RNF_ROOT) || 1249d6941ce9SLuigi Rizzo RNTORT(rn)->rt_ifa != ifa || 125085911824SLuigi Rizzo !sa_equal((struct sockaddr *)rn->rn_key, dst)); 1251956b0b65SJeffrey Hsu RADIX_NODE_HEAD_UNLOCK(rnh); 1252956b0b65SJeffrey Hsu if (error) { 12538b07e49aSJulian Elischer /* this is only an error if bad on ALL tables */ 12548b07e49aSJulian Elischer continue; 1255df8bae1dSRodney W. Grimes } 1256b0a76b88SJulian Elischer } 1257b0a76b88SJulian Elischer /* 1258b0a76b88SJulian Elischer * Do the actual request 1259b0a76b88SJulian Elischer */ 12608071913dSRuslan Ermilov bzero((caddr_t)&info, sizeof(info)); 12618071913dSRuslan Ermilov info.rti_ifa = ifa; 12628071913dSRuslan Ermilov info.rti_flags = flags | ifa->ifa_flags; 12638071913dSRuslan Ermilov info.rti_info[RTAX_DST] = dst; 12646e6b3f7cSQing Li /* 12656e6b3f7cSQing Li * doing this for compatibility reasons 12666e6b3f7cSQing Li */ 12676e6b3f7cSQing Li if (cmd == RTM_ADD) 12686e6b3f7cSQing Li info.rti_info[RTAX_GATEWAY] = 12696e6b3f7cSQing Li (struct sockaddr *)&null_sdl; 12706e6b3f7cSQing Li else 12718071913dSRuslan Ermilov info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; 12728071913dSRuslan Ermilov info.rti_info[RTAX_NETMASK] = netmask; 12738b07e49aSJulian Elischer error = rtrequest1_fib(cmd, &info, &rt, fibnum); 12745aca0b30SLuigi Rizzo if (error == 0 && rt != NULL) { 12758071913dSRuslan Ermilov /* 12766f99b44cSBrian Somers * notify any listening routing agents of the change 12778071913dSRuslan Ermilov */ 1278d1dd20beSSam Leffler RT_LOCK(rt); 1279e440aed9SQing Li #ifdef RADIX_MPATH 1280e440aed9SQing Li /* 1281e440aed9SQing Li * in case address alias finds the first address 1282e440aed9SQing Li * e.g. ifconfig bge0 192.103.54.246/24 1283e440aed9SQing Li * e.g. ifconfig bge0 192.103.54.247/24 1284e440aed9SQing Li * the address set in the route is 192.103.54.246 1285e440aed9SQing Li * so we need to replace it with 192.103.54.247 1286e440aed9SQing Li */ 12878b07e49aSJulian Elischer if (memcmp(rt->rt_ifa->ifa_addr, 12888b07e49aSJulian Elischer ifa->ifa_addr, ifa->ifa_addr->sa_len)) { 1289e440aed9SQing Li IFAFREE(rt->rt_ifa); 1290e440aed9SQing Li IFAREF(ifa); 1291e440aed9SQing Li rt->rt_ifp = ifa->ifa_ifp; 1292e440aed9SQing Li rt->rt_ifa = ifa; 1293e440aed9SQing Li } 1294e440aed9SQing Li #endif 12956e6b3f7cSQing Li /* 12966e6b3f7cSQing Li * doing this for compatibility reasons 12976e6b3f7cSQing Li */ 12986e6b3f7cSQing Li if (cmd == RTM_ADD) { 12996e6b3f7cSQing Li ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type = 13006e6b3f7cSQing Li rt->rt_ifp->if_type; 13016e6b3f7cSQing Li ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index = 13026e6b3f7cSQing Li rt->rt_ifp->if_index; 13036e6b3f7cSQing Li } 13048071913dSRuslan Ermilov rt_newaddrmsg(cmd, ifa, error, rt); 13058071913dSRuslan Ermilov if (cmd == RTM_DELETE) { 1306b0a76b88SJulian Elischer /* 13078b07e49aSJulian Elischer * If we are deleting, and we found an entry, 13088b07e49aSJulian Elischer * then it's been removed from the tree.. 13098b07e49aSJulian Elischer * now throw it away. 1310b0a76b88SJulian Elischer */ 1311d1dd20beSSam Leffler RTFREE_LOCKED(rt); 1312d1dd20beSSam Leffler } else { 1313d1dd20beSSam Leffler if (cmd == RTM_ADD) { 1314b0a76b88SJulian Elischer /* 13158b07e49aSJulian Elischer * We just wanted to add it.. 13168b07e49aSJulian Elischer * we don't actually need a reference. 1317b0a76b88SJulian Elischer */ 13187138d65cSSam Leffler RT_REMREF(rt); 1319df8bae1dSRodney W. Grimes } 1320d1dd20beSSam Leffler RT_UNLOCK(rt); 1321d1dd20beSSam Leffler } 13228b07e49aSJulian Elischer didwork = 1; 1323df8bae1dSRodney W. Grimes } 13248b07e49aSJulian Elischer if (error) 13258b07e49aSJulian Elischer a_failure = error; 13268b07e49aSJulian Elischer } 13278b07e49aSJulian Elischer if (cmd == RTM_DELETE) { 13288b07e49aSJulian Elischer if (didwork) { 13298b07e49aSJulian Elischer error = 0; 13308b07e49aSJulian Elischer } else { 13318b07e49aSJulian Elischer /* we only give an error if it wasn't in any table */ 13328b07e49aSJulian Elischer error = ((flags & RTF_HOST) ? 13338b07e49aSJulian Elischer EHOSTUNREACH : ENETUNREACH); 13348b07e49aSJulian Elischer } 13358b07e49aSJulian Elischer } else { 13368b07e49aSJulian Elischer if (a_failure) { 13378b07e49aSJulian Elischer /* return an error if any of them failed */ 13388b07e49aSJulian Elischer error = a_failure; 13398b07e49aSJulian Elischer } 13408b07e49aSJulian Elischer } 13413ec66d6cSDavid Greenman return (error); 13423ec66d6cSDavid Greenman } 1343cb64988fSLuoqi Chen 13448b07e49aSJulian Elischer /* special one for inet internal use. may not use. */ 13458b07e49aSJulian Elischer int 13468b07e49aSJulian Elischer rtinit_fib(struct ifaddr *ifa, int cmd, int flags) 13478b07e49aSJulian Elischer { 13488b07e49aSJulian Elischer return (rtinit1(ifa, cmd, flags, -1)); 13498b07e49aSJulian Elischer } 13508b07e49aSJulian Elischer 13518b07e49aSJulian Elischer /* 13528b07e49aSJulian Elischer * Set up a routing table entry, normally 13538b07e49aSJulian Elischer * for an interface. 13548b07e49aSJulian Elischer */ 13558b07e49aSJulian Elischer int 13568b07e49aSJulian Elischer rtinit(struct ifaddr *ifa, int cmd, int flags) 13578b07e49aSJulian Elischer { 13588b07e49aSJulian Elischer struct sockaddr *dst; 13598b07e49aSJulian Elischer int fib = 0; 13608b07e49aSJulian Elischer 13618b07e49aSJulian Elischer if (flags & RTF_HOST) { 13628b07e49aSJulian Elischer dst = ifa->ifa_dstaddr; 13638b07e49aSJulian Elischer } else { 13648b07e49aSJulian Elischer dst = ifa->ifa_addr; 13658b07e49aSJulian Elischer } 13668b07e49aSJulian Elischer 13678b07e49aSJulian Elischer if (dst->sa_family == AF_INET) 13688b07e49aSJulian Elischer fib = -1; 13698b07e49aSJulian Elischer return (rtinit1(ifa, cmd, flags, fib)); 13708b07e49aSJulian Elischer } 13718b07e49aSJulian Elischer 13726a800098SYoshinobu Inoue /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */ 13736a800098SYoshinobu Inoue SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0); 1374