xref: /freebsd/sys/net/route.c (revision 1ed81b739e9e0f59a6195467758f5109d346ca4f)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1980, 1986, 1991, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
2942e9e16dSRuslan Ermilov  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
30c3aac50fSPeter Wemm  * $FreeBSD$
31df8bae1dSRodney W. Grimes  */
328b07e49aSJulian Elischer /************************************************************************
338b07e49aSJulian Elischer  * Note: In this file a 'fib' is a "forwarding information base"	*
348b07e49aSJulian Elischer  * Which is the new name for an in kernel routing (next hop) table.	*
358b07e49aSJulian Elischer  ***********************************************************************/
36df8bae1dSRodney W. Grimes 
371d5e9e22SEivind Eklund #include "opt_inet.h"
388b07e49aSJulian Elischer #include "opt_route.h"
394bd49128SPeter Wemm #include "opt_mrouting.h"
40e440aed9SQing Li #include "opt_mpath.h"
414bd49128SPeter Wemm 
42df8bae1dSRodney W. Grimes #include <sys/param.h>
43df8bae1dSRodney W. Grimes #include <sys/systm.h>
446e6b3f7cSQing Li #include <sys/syslog.h>
454d1d4912SBruce Evans #include <sys/malloc.h>
46df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
47df8bae1dSRodney W. Grimes #include <sys/socket.h>
488b07e49aSJulian Elischer #include <sys/sysctl.h>
493120b9d4SKip Macy #include <sys/syslog.h>
508b07e49aSJulian Elischer #include <sys/sysproto.h>
518b07e49aSJulian Elischer #include <sys/proc.h>
52df8bae1dSRodney W. Grimes #include <sys/domain.h>
53cb64988fSLuoqi Chen #include <sys/kernel.h>
54603724d3SBjoern A. Zeeb #include <sys/vimage.h>
55df8bae1dSRodney W. Grimes 
56df8bae1dSRodney W. Grimes #include <net/if.h>
576e6b3f7cSQing Li #include <net/if_dl.h>
58df8bae1dSRodney W. Grimes #include <net/route.h>
59df8bae1dSRodney W. Grimes 
60e440aed9SQing Li #ifdef RADIX_MPATH
61e440aed9SQing Li #include <net/radix_mpath.h>
62e440aed9SQing Li #endif
634b79449eSBjoern A. Zeeb #include <net/vnet.h>
64e440aed9SQing Li 
65df8bae1dSRodney W. Grimes #include <netinet/in.h>
66b5e8ce9fSBruce Evans #include <netinet/ip_mroute.h>
674b79449eSBjoern A. Zeeb #include <netinet/vinet.h>
68df8bae1dSRodney W. Grimes 
692dc1d581SAndre Oppermann #include <vm/uma.h>
702dc1d581SAndre Oppermann 
718b07e49aSJulian Elischer u_int rt_numfibs = RT_NUMFIBS;
728b07e49aSJulian Elischer SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
7366e8505fSJulian Elischer /*
7466e8505fSJulian Elischer  * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
7566e8505fSJulian Elischer  * We can't do more because storage is statically allocated for now.
7666e8505fSJulian Elischer  * (for compatibility reasons.. this will change).
7766e8505fSJulian Elischer  */
788b07e49aSJulian Elischer TUNABLE_INT("net.fibs", &rt_numfibs);
798b07e49aSJulian Elischer 
8066e8505fSJulian Elischer /*
8166e8505fSJulian Elischer  * By default add routes to all fibs for new interfaces.
8266e8505fSJulian Elischer  * Once this is set to 0 then only allocate routes on interface
8366e8505fSJulian Elischer  * changes for the FIB of the caller when adding a new set of addresses
8466e8505fSJulian Elischer  * to an interface.  XXX this is a shotgun aproach to a problem that needs
8566e8505fSJulian Elischer  * a more fine grained solution.. that will come.
8666e8505fSJulian Elischer  */
8766e8505fSJulian Elischer u_int rt_add_addr_allfibs = 1;
8866e8505fSJulian Elischer SYSCTL_INT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
8966e8505fSJulian Elischer     &rt_add_addr_allfibs, 0, "");
9066e8505fSJulian Elischer TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
9166e8505fSJulian Elischer 
9244e33a07SMarko Zec #ifdef VIMAGE_GLOBALS
93f708ef1bSPoul-Henning Kamp static struct rtstat rtstat;
948b07e49aSJulian Elischer 
958b07e49aSJulian Elischer /* by default only the first 'row' of tables will be accessed. */
968b07e49aSJulian Elischer /*
978b07e49aSJulian Elischer  * XXXMRT When we fix netstat, and do this differnetly,
988b07e49aSJulian Elischer  * we can allocate this dynamically. As long as we are keeping
998b07e49aSJulian Elischer  * things backwards compaitble we need to allocate this
1008b07e49aSJulian Elischer  * statically.
1018b07e49aSJulian Elischer  */
1028b07e49aSJulian Elischer struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1];
10328f8db14SBruce Evans 
104f708ef1bSPoul-Henning Kamp static int	rttrash;		/* routes not in table but not freed */
10544e33a07SMarko Zec #endif
106df8bae1dSRodney W. Grimes 
107929ddbbbSAlfred Perlstein static void rt_maskedcopy(struct sockaddr *,
108929ddbbbSAlfred Perlstein 	    struct sockaddr *, struct sockaddr *);
1091ed81b73SMarko Zec static int vnet_route_iattach(const void *);
110f708ef1bSPoul-Henning Kamp 
111d6941ce9SLuigi Rizzo /* compare two sockaddr structures */
112d6941ce9SLuigi Rizzo #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
113d6941ce9SLuigi Rizzo 
114d6941ce9SLuigi Rizzo /*
115d6941ce9SLuigi Rizzo  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
116d6941ce9SLuigi Rizzo  * The operation can be done safely (in this code) because a
117d6941ce9SLuigi Rizzo  * 'struct rtentry' starts with two 'struct radix_node''s, the first
118d6941ce9SLuigi Rizzo  * one representing leaf nodes in the routing tree, which is
119d6941ce9SLuigi Rizzo  * what the code in radix.c passes us as a 'struct radix_node'.
120d6941ce9SLuigi Rizzo  *
121d6941ce9SLuigi Rizzo  * But because there are a lot of assumptions in this conversion,
122d6941ce9SLuigi Rizzo  * do not cast explicitly, but always use the macro below.
123d6941ce9SLuigi Rizzo  */
124d6941ce9SLuigi Rizzo #define RNTORT(p)	((struct rtentry *)(p))
125d6941ce9SLuigi Rizzo 
1261ed81b73SMarko Zec #ifdef VIMAGE_GLOBALS
1278b07e49aSJulian Elischer static uma_zone_t rtzone;		/* Routing table UMA zone. */
1281ed81b73SMarko Zec #endif
1298b07e49aSJulian Elischer 
1308b07e49aSJulian Elischer #if 0
1318b07e49aSJulian Elischer /* default fib for tunnels to use */
1328b07e49aSJulian Elischer u_int tunnel_fib = 0;
1338b07e49aSJulian Elischer SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
1348b07e49aSJulian Elischer #endif
1358b07e49aSJulian Elischer 
1368b07e49aSJulian Elischer /*
1378b07e49aSJulian Elischer  * handler for net.my_fibnum
1388b07e49aSJulian Elischer  */
1398b07e49aSJulian Elischer static int
1408b07e49aSJulian Elischer sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
141df8bae1dSRodney W. Grimes {
1428b07e49aSJulian Elischer         int fibnum;
1438b07e49aSJulian Elischer         int error;
1448b07e49aSJulian Elischer 
1458b07e49aSJulian Elischer         fibnum = curthread->td_proc->p_fibnum;
1468b07e49aSJulian Elischer         error = sysctl_handle_int(oidp, &fibnum, 0, req);
1478b07e49aSJulian Elischer         return (error);
148df8bae1dSRodney W. Grimes }
149df8bae1dSRodney W. Grimes 
1508b07e49aSJulian Elischer SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
1518b07e49aSJulian Elischer             NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
1522dc1d581SAndre Oppermann 
1532eb5613fSLuigi Rizzo static void
1542eb5613fSLuigi Rizzo route_init(void)
155df8bae1dSRodney W. Grimes {
1568b07e49aSJulian Elischer 
1576f95a5ebSJulian Elischer 	/* whack the tunable ints into  line. */
1588b07e49aSJulian Elischer 	if (rt_numfibs > RT_MAXFIBS)
1598b07e49aSJulian Elischer 		rt_numfibs = RT_MAXFIBS;
1608b07e49aSJulian Elischer 	if (rt_numfibs == 0)
1618b07e49aSJulian Elischer 		rt_numfibs = 1;
162df8bae1dSRodney W. Grimes 	rn_init();	/* initialize all zeroes, all ones, mask table */
1638b07e49aSJulian Elischer 
1641ed81b73SMarko Zec 	vnet_route_iattach(NULL);
1651ed81b73SMarko Zec }
1661ed81b73SMarko Zec 
1671ed81b73SMarko Zec static int vnet_route_iattach(const void *unused __unused)
1681ed81b73SMarko Zec {
1691ed81b73SMarko Zec 	INIT_VNET_INET(curvnet);
1701ed81b73SMarko Zec 	int table;
1711ed81b73SMarko Zec 	struct domain *dom;
1721ed81b73SMarko Zec 	int fam;
1731ed81b73SMarko Zec 
1741ed81b73SMarko Zec 	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
1751ed81b73SMarko Zec 	    NULL, NULL, UMA_ALIGN_PTR, 0);
1768b07e49aSJulian Elischer 	for (dom = domains; dom; dom = dom->dom_next) {
1778b07e49aSJulian Elischer 		if (dom->dom_rtattach)  {
1788b07e49aSJulian Elischer 			for  (table = 0; table < rt_numfibs; table++) {
1798b07e49aSJulian Elischer 				if ( (fam = dom->dom_family) == AF_INET ||
1808b07e49aSJulian Elischer 				    table == 0) {
1818b07e49aSJulian Elischer  			        	/* for now only AF_INET has > 1 table */
1828b07e49aSJulian Elischer 					/* XXX MRT
1838b07e49aSJulian Elischer 					 * rtattach will be also called
1848b07e49aSJulian Elischer 					 * from vfs_export.c but the
1858b07e49aSJulian Elischer 					 * offset will be 0
1868b07e49aSJulian Elischer 					 * (only for AF_INET and AF_INET6
1878b07e49aSJulian Elischer 					 * which don't need it anyhow)
1888b07e49aSJulian Elischer 					 */
1898b07e49aSJulian Elischer 					dom->dom_rtattach(
190603724d3SBjoern A. Zeeb 				    	    (void **)&V_rt_tables[table][fam],
1918b07e49aSJulian Elischer 				    	    dom->dom_rtoffset);
1928b07e49aSJulian Elischer 				} else {
1938b07e49aSJulian Elischer 					break;
1948b07e49aSJulian Elischer 				}
1958b07e49aSJulian Elischer 			}
1968b07e49aSJulian Elischer 		}
1978b07e49aSJulian Elischer 	}
1981ed81b73SMarko Zec 
1991ed81b73SMarko Zec 	return (0);
2008b07e49aSJulian Elischer }
2018b07e49aSJulian Elischer 
2028b07e49aSJulian Elischer #ifndef _SYS_SYSPROTO_H_
2038b07e49aSJulian Elischer struct setfib_args {
2048b07e49aSJulian Elischer 	int     fibnum;
2058b07e49aSJulian Elischer };
2068b07e49aSJulian Elischer #endif
2078b07e49aSJulian Elischer int
2088b07e49aSJulian Elischer setfib(struct thread *td, struct setfib_args *uap)
2098b07e49aSJulian Elischer {
2108b07e49aSJulian Elischer 	if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
2118b07e49aSJulian Elischer 		return EINVAL;
2128b07e49aSJulian Elischer 	td->td_proc->p_fibnum = uap->fibnum;
2138b07e49aSJulian Elischer 	return (0);
214df8bae1dSRodney W. Grimes }
215df8bae1dSRodney W. Grimes 
216df8bae1dSRodney W. Grimes /*
217df8bae1dSRodney W. Grimes  * Packet routing routines.
218df8bae1dSRodney W. Grimes  */
219df8bae1dSRodney W. Grimes void
220d1dd20beSSam Leffler rtalloc(struct route *ro)
221df8bae1dSRodney W. Grimes {
2228b07e49aSJulian Elischer 	rtalloc_ign_fib(ro, 0UL, 0);
2238b07e49aSJulian Elischer }
2248b07e49aSJulian Elischer 
2258b07e49aSJulian Elischer void
2268b07e49aSJulian Elischer rtalloc_fib(struct route *ro, u_int fibnum)
2278b07e49aSJulian Elischer {
2288b07e49aSJulian Elischer 	rtalloc_ign_fib(ro, 0UL, fibnum);
229df8bae1dSRodney W. Grimes }
230df8bae1dSRodney W. Grimes 
231652082e6SGarrett Wollman void
232d1dd20beSSam Leffler rtalloc_ign(struct route *ro, u_long ignore)
233652082e6SGarrett Wollman {
23468f956b8SJohn Polstra 	struct rtentry *rt;
23568f956b8SJohn Polstra 
23668f956b8SJohn Polstra 	if ((rt = ro->ro_rt) != NULL) {
23768f956b8SJohn Polstra 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
23868f956b8SJohn Polstra 			return;
23968f956b8SJohn Polstra 		RTFREE(rt);
24066810dd0SYoshinobu Inoue 		ro->ro_rt = NULL;
24168f956b8SJohn Polstra 	}
2428b07e49aSJulian Elischer 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
2438b07e49aSJulian Elischer 	if (ro->ro_rt)
2448b07e49aSJulian Elischer 		RT_UNLOCK(ro->ro_rt);
2458b07e49aSJulian Elischer }
2468b07e49aSJulian Elischer 
2478b07e49aSJulian Elischer void
2488b07e49aSJulian Elischer rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
2498b07e49aSJulian Elischer {
2508b07e49aSJulian Elischer 	struct rtentry *rt;
2518b07e49aSJulian Elischer 
2528b07e49aSJulian Elischer 	if ((rt = ro->ro_rt) != NULL) {
2538b07e49aSJulian Elischer 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
2548b07e49aSJulian Elischer 			return;
2558b07e49aSJulian Elischer 		RTFREE(rt);
2568b07e49aSJulian Elischer 		ro->ro_rt = NULL;
2578b07e49aSJulian Elischer 	}
2588b07e49aSJulian Elischer 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
259d1dd20beSSam Leffler 	if (ro->ro_rt)
260d1dd20beSSam Leffler 		RT_UNLOCK(ro->ro_rt);
261652082e6SGarrett Wollman }
262652082e6SGarrett Wollman 
263b0a76b88SJulian Elischer /*
264b0a76b88SJulian Elischer  * Look up the route that matches the address given
265b0a76b88SJulian Elischer  * Or, at least try.. Create a cloned route if needed.
266d1dd20beSSam Leffler  *
267d1dd20beSSam Leffler  * The returned route, if any, is locked.
268b0a76b88SJulian Elischer  */
269df8bae1dSRodney W. Grimes struct rtentry *
270d1dd20beSSam Leffler rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
271df8bae1dSRodney W. Grimes {
2728b07e49aSJulian Elischer 	return (rtalloc1_fib(dst, report, ignflags, 0));
2738b07e49aSJulian Elischer }
2748b07e49aSJulian Elischer 
2758b07e49aSJulian Elischer struct rtentry *
2768b07e49aSJulian Elischer rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
2778b07e49aSJulian Elischer 		    u_int fibnum)
2788b07e49aSJulian Elischer {
2798b615593SMarko Zec 	INIT_VNET_NET(curvnet);
2808b07e49aSJulian Elischer 	struct radix_node_head *rnh;
281d1dd20beSSam Leffler 	struct rtentry *rt;
282d1dd20beSSam Leffler 	struct radix_node *rn;
283d1dd20beSSam Leffler 	struct rtentry *newrt;
284df8bae1dSRodney W. Grimes 	struct rt_addrinfo info;
2856e6b3f7cSQing Li 	int err = 0, msgtype = RTM_MISS;
2863120b9d4SKip Macy 	int needlock;
287df8bae1dSRodney W. Grimes 
2888b07e49aSJulian Elischer 	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
2898b07e49aSJulian Elischer 	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
2908b07e49aSJulian Elischer 		fibnum = 0;
291603724d3SBjoern A. Zeeb 	rnh = V_rt_tables[fibnum][dst->sa_family];
29285911824SLuigi Rizzo 	newrt = NULL;
293b0a76b88SJulian Elischer 	/*
294b0a76b88SJulian Elischer 	 * Look up the address in the table for that Address Family
295b0a76b88SJulian Elischer 	 */
296956b0b65SJeffrey Hsu 	if (rnh == NULL) {
297603724d3SBjoern A. Zeeb 		V_rtstat.rts_unreach++;
2986e6b3f7cSQing Li 		goto miss;
299956b0b65SJeffrey Hsu 	}
3003120b9d4SKip Macy 	needlock = !(ignflags & RTF_RNH_LOCKED);
3013120b9d4SKip Macy 	if (needlock)
3023120b9d4SKip Macy 		RADIX_NODE_HEAD_RLOCK(rnh);
3033120b9d4SKip Macy #ifdef INVARIANTS
3043120b9d4SKip Macy 	else
3053120b9d4SKip Macy 		RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
3063120b9d4SKip Macy #endif
3073120b9d4SKip Macy 	rn = rnh->rnh_matchaddr(dst, rnh);
3083120b9d4SKip Macy 	if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
309d6941ce9SLuigi Rizzo 		newrt = rt = RNTORT(rn);
3103120b9d4SKip Macy 		RT_LOCK(newrt);
3113120b9d4SKip Macy 		RT_ADDREF(newrt);
3123120b9d4SKip Macy 		if (needlock)
3133120b9d4SKip Macy 			RADIX_NODE_HEAD_RUNLOCK(rnh);
3143120b9d4SKip Macy 		goto done;
3156e6b3f7cSQing Li 
3166e6b3f7cSQing Li 	} else if (needlock)
3173120b9d4SKip Macy 		RADIX_NODE_HEAD_RUNLOCK(rnh);
3183120b9d4SKip Macy 
319b0a76b88SJulian Elischer 	/*
320b0a76b88SJulian Elischer 	 * Either we hit the root or couldn't find any match,
321b0a76b88SJulian Elischer 	 * Which basically means
322b0a76b88SJulian Elischer 	 * "caint get there frm here"
323b0a76b88SJulian Elischer 	 */
324603724d3SBjoern A. Zeeb 	V_rtstat.rts_unreach++;
325956b0b65SJeffrey Hsu miss:
3266e6b3f7cSQing Li 	if (report) {
327b0a76b88SJulian Elischer 		/*
328b0a76b88SJulian Elischer 		 * If required, report the failure to the supervising
329b0a76b88SJulian Elischer 		 * Authorities.
330b0a76b88SJulian Elischer 		 * For a delete, this is not an error. (report == 0)
331b0a76b88SJulian Elischer 		 */
3326f5967c0SBruce Evans 		bzero(&info, sizeof(info));
333df8bae1dSRodney W. Grimes 		info.rti_info[RTAX_DST] = dst;
334df8bae1dSRodney W. Grimes 		rt_missmsg(msgtype, &info, 0, err);
335df8bae1dSRodney W. Grimes 	}
3363120b9d4SKip Macy done:
337d1dd20beSSam Leffler 	if (newrt)
338d1dd20beSSam Leffler 		RT_LOCK_ASSERT(newrt);
339df8bae1dSRodney W. Grimes 	return (newrt);
340df8bae1dSRodney W. Grimes }
341df8bae1dSRodney W. Grimes 
342499676dfSJulian Elischer /*
343499676dfSJulian Elischer  * Remove a reference count from an rtentry.
344499676dfSJulian Elischer  * If the count gets low enough, take it out of the routing table
345499676dfSJulian Elischer  */
346df8bae1dSRodney W. Grimes void
347d1dd20beSSam Leffler rtfree(struct rtentry *rt)
348df8bae1dSRodney W. Grimes {
3498b615593SMarko Zec 	INIT_VNET_NET(curvnet);
35085911824SLuigi Rizzo 	struct radix_node_head *rnh;
351df8bae1dSRodney W. Grimes 
352a0c0e34bSGleb Smirnoff 	KASSERT(rt != NULL,("%s: NULL rt", __func__));
353603724d3SBjoern A. Zeeb 	rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
354a0c0e34bSGleb Smirnoff 	KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
355499676dfSJulian Elischer 
356d1dd20beSSam Leffler 	RT_LOCK_ASSERT(rt);
357d1dd20beSSam Leffler 
358499676dfSJulian Elischer 	/*
359a0c0e34bSGleb Smirnoff 	 * The callers should use RTFREE_LOCKED() or RTFREE(), so
360a0c0e34bSGleb Smirnoff 	 * we should come here exactly with the last reference.
361499676dfSJulian Elischer 	 */
3627138d65cSSam Leffler 	RT_REMREF(rt);
363a0c0e34bSGleb Smirnoff 	if (rt->rt_refcnt > 0) {
364a42ea597SQing Li 		log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt);
365d1dd20beSSam Leffler 		goto done;
366a0c0e34bSGleb Smirnoff 	}
3679c63e9dbSSam Leffler 
3689c63e9dbSSam Leffler 	/*
3699c63e9dbSSam Leffler 	 * On last reference give the "close method" a chance
3709c63e9dbSSam Leffler 	 * to cleanup private state.  This also permits (for
3719c63e9dbSSam Leffler 	 * IPv4 and IPv6) a chance to decide if the routing table
3729c63e9dbSSam Leffler 	 * entry should be purged immediately or at a later time.
3739c63e9dbSSam Leffler 	 * When an immediate purge is to happen the close routine
3749c63e9dbSSam Leffler 	 * typically calls rtexpunge which clears the RTF_UP flag
3759c63e9dbSSam Leffler 	 * on the entry so that the code below reclaims the storage.
3769c63e9dbSSam Leffler 	 */
377d1dd20beSSam Leffler 	if (rt->rt_refcnt == 0 && rnh->rnh_close)
3785c2dae8eSGarrett Wollman 		rnh->rnh_close((struct radix_node *)rt, rnh);
379499676dfSJulian Elischer 
380499676dfSJulian Elischer 	/*
381499676dfSJulian Elischer 	 * If we are no longer "up" (and ref == 0)
382499676dfSJulian Elischer 	 * then we can free the resources associated
383499676dfSJulian Elischer 	 * with the route.
384499676dfSJulian Elischer 	 */
385d1dd20beSSam Leffler 	if ((rt->rt_flags & RTF_UP) == 0) {
386df8bae1dSRodney W. Grimes 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
387df8bae1dSRodney W. Grimes 			panic("rtfree 2");
388499676dfSJulian Elischer 		/*
389499676dfSJulian Elischer 		 * the rtentry must have been removed from the routing table
390499676dfSJulian Elischer 		 * so it is represented in rttrash.. remove that now.
391499676dfSJulian Elischer 		 */
392603724d3SBjoern A. Zeeb 		V_rttrash--;
393499676dfSJulian Elischer #ifdef	DIAGNOSTIC
394df8bae1dSRodney W. Grimes 		if (rt->rt_refcnt < 0) {
395623ae52eSPoul-Henning Kamp 			printf("rtfree: %p not freed (neg refs)\n", rt);
396d1dd20beSSam Leffler 			goto done;
397df8bae1dSRodney W. Grimes 		}
398499676dfSJulian Elischer #endif
399499676dfSJulian Elischer 		/*
400499676dfSJulian Elischer 		 * release references on items we hold them on..
401499676dfSJulian Elischer 		 * e.g other routes and ifaddrs.
402499676dfSJulian Elischer 		 */
40319fc74fbSJeffrey Hsu 		if (rt->rt_ifa)
40419fc74fbSJeffrey Hsu 			IFAFREE(rt->rt_ifa);
405499676dfSJulian Elischer 		/*
406499676dfSJulian Elischer 		 * The key is separatly alloc'd so free it (see rt_setgate()).
407499676dfSJulian Elischer 		 * This also frees the gateway, as they are always malloc'd
408499676dfSJulian Elischer 		 * together.
409499676dfSJulian Elischer 		 */
410df8bae1dSRodney W. Grimes 		Free(rt_key(rt));
411499676dfSJulian Elischer 
412499676dfSJulian Elischer 		/*
413499676dfSJulian Elischer 		 * and the rtentry itself of course
414499676dfSJulian Elischer 		 */
415d1dd20beSSam Leffler 		RT_LOCK_DESTROY(rt);
4161ed81b73SMarko Zec 		uma_zfree(V_rtzone, rt);
417d1dd20beSSam Leffler 		return;
418df8bae1dSRodney W. Grimes 	}
419d1dd20beSSam Leffler done:
420d1dd20beSSam Leffler 	RT_UNLOCK(rt);
421df8bae1dSRodney W. Grimes }
422df8bae1dSRodney W. Grimes 
423df8bae1dSRodney W. Grimes 
424df8bae1dSRodney W. Grimes /*
425df8bae1dSRodney W. Grimes  * Force a routing table entry to the specified
426df8bae1dSRodney W. Grimes  * destination to go through the given gateway.
427df8bae1dSRodney W. Grimes  * Normally called as a result of a routing redirect
428df8bae1dSRodney W. Grimes  * message from the network layer.
429df8bae1dSRodney W. Grimes  */
43026f9a767SRodney W. Grimes void
431d1dd20beSSam Leffler rtredirect(struct sockaddr *dst,
432d1dd20beSSam Leffler 	struct sockaddr *gateway,
433d1dd20beSSam Leffler 	struct sockaddr *netmask,
434d1dd20beSSam Leffler 	int flags,
435d1dd20beSSam Leffler 	struct sockaddr *src)
436df8bae1dSRodney W. Grimes {
4378b07e49aSJulian Elischer 	rtredirect_fib(dst, gateway, netmask, flags, src, 0);
4388b07e49aSJulian Elischer }
4398b07e49aSJulian Elischer 
4408b07e49aSJulian Elischer void
4418b07e49aSJulian Elischer rtredirect_fib(struct sockaddr *dst,
4428b07e49aSJulian Elischer 	struct sockaddr *gateway,
4438b07e49aSJulian Elischer 	struct sockaddr *netmask,
4448b07e49aSJulian Elischer 	int flags,
4458b07e49aSJulian Elischer 	struct sockaddr *src,
4468b07e49aSJulian Elischer 	u_int fibnum)
4478b07e49aSJulian Elischer {
4488b615593SMarko Zec 	INIT_VNET_NET(curvnet);
4498e7e854cSKip Macy 	struct rtentry *rt, *rt0 = NULL;
450df8bae1dSRodney W. Grimes 	int error = 0;
45185911824SLuigi Rizzo 	short *stat = NULL;
452df8bae1dSRodney W. Grimes 	struct rt_addrinfo info;
453df8bae1dSRodney W. Grimes 	struct ifaddr *ifa;
4543120b9d4SKip Macy 	struct radix_node_head *rnh =
4554e5fd766SBjoern A. Zeeb 	    V_rt_tables[fibnum][dst->sa_family];
456df8bae1dSRodney W. Grimes 
457df8bae1dSRodney W. Grimes 	/* verify the gateway is directly reachable */
45885911824SLuigi Rizzo 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
459df8bae1dSRodney W. Grimes 		error = ENETUNREACH;
460df8bae1dSRodney W. Grimes 		goto out;
461df8bae1dSRodney W. Grimes 	}
4628b07e49aSJulian Elischer 	rt = rtalloc1_fib(dst, 0, 0UL, fibnum);	/* NB: rt is locked */
463df8bae1dSRodney W. Grimes 	/*
464df8bae1dSRodney W. Grimes 	 * If the redirect isn't from our current router for this dst,
465df8bae1dSRodney W. Grimes 	 * it's either old or wrong.  If it redirects us to ourselves,
466df8bae1dSRodney W. Grimes 	 * we have a routing loop, perhaps as a result of an interface
467df8bae1dSRodney W. Grimes 	 * going down recently.
468df8bae1dSRodney W. Grimes 	 */
469df8bae1dSRodney W. Grimes 	if (!(flags & RTF_DONE) && rt &&
470956b0b65SJeffrey Hsu 	     (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
471df8bae1dSRodney W. Grimes 		error = EINVAL;
472df8bae1dSRodney W. Grimes 	else if (ifa_ifwithaddr(gateway))
473df8bae1dSRodney W. Grimes 		error = EHOSTUNREACH;
474df8bae1dSRodney W. Grimes 	if (error)
475df8bae1dSRodney W. Grimes 		goto done;
476df8bae1dSRodney W. Grimes 	/*
477df8bae1dSRodney W. Grimes 	 * Create a new entry if we just got back a wildcard entry
478df8bae1dSRodney W. Grimes 	 * or the the lookup failed.  This is necessary for hosts
479df8bae1dSRodney W. Grimes 	 * which use routing redirects generated by smart gateways
480df8bae1dSRodney W. Grimes 	 * to dynamically build the routing tables.
481df8bae1dSRodney W. Grimes 	 */
48285911824SLuigi Rizzo 	if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
483df8bae1dSRodney W. Grimes 		goto create;
484df8bae1dSRodney W. Grimes 	/*
485df8bae1dSRodney W. Grimes 	 * Don't listen to the redirect if it's
486df8bae1dSRodney W. Grimes 	 * for a route to an interface.
487df8bae1dSRodney W. Grimes 	 */
488df8bae1dSRodney W. Grimes 	if (rt->rt_flags & RTF_GATEWAY) {
489df8bae1dSRodney W. Grimes 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
490df8bae1dSRodney W. Grimes 			/*
491df8bae1dSRodney W. Grimes 			 * Changing from route to net => route to host.
492df8bae1dSRodney W. Grimes 			 * Create new route, rather than smashing route to net.
493df8bae1dSRodney W. Grimes 			 */
494df8bae1dSRodney W. Grimes 		create:
4958e7e854cSKip Macy 			rt0 = rt;
4968e7e854cSKip Macy 			rt = NULL;
4978e7e854cSKip Macy 
498df8bae1dSRodney W. Grimes 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
4998071913dSRuslan Ermilov 			bzero((caddr_t)&info, sizeof(info));
5008071913dSRuslan Ermilov 			info.rti_info[RTAX_DST] = dst;
5018071913dSRuslan Ermilov 			info.rti_info[RTAX_GATEWAY] = gateway;
5028071913dSRuslan Ermilov 			info.rti_info[RTAX_NETMASK] = netmask;
5038071913dSRuslan Ermilov 			info.rti_ifa = ifa;
5048071913dSRuslan Ermilov 			info.rti_flags = flags;
5053120b9d4SKip Macy 			if (rt0 != NULL)
5063120b9d4SKip Macy 				RT_UNLOCK(rt0);	/* drop lock to avoid LOR with RNH */
5078b07e49aSJulian Elischer 			error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
508d1dd20beSSam Leffler 			if (rt != NULL) {
5094de5d90cSSam Leffler 				RT_LOCK(rt);
5103120b9d4SKip Macy 				if (rt0 != NULL)
51129910a5aSKip Macy 					EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
5128071913dSRuslan Ermilov 				flags = rt->rt_flags;
513d1dd20beSSam Leffler 			}
5143120b9d4SKip Macy 			if (rt0 != NULL)
5153120b9d4SKip Macy 				RTFREE(rt0);
5168e7e854cSKip Macy 
517603724d3SBjoern A. Zeeb 			stat = &V_rtstat.rts_dynamic;
518df8bae1dSRodney W. Grimes 		} else {
5198e7e854cSKip Macy 			struct rtentry *gwrt;
5208e7e854cSKip Macy 
521df8bae1dSRodney W. Grimes 			/*
522df8bae1dSRodney W. Grimes 			 * Smash the current notion of the gateway to
523df8bae1dSRodney W. Grimes 			 * this destination.  Should check about netmask!!!
524df8bae1dSRodney W. Grimes 			 */
525df8bae1dSRodney W. Grimes 			rt->rt_flags |= RTF_MODIFIED;
526df8bae1dSRodney W. Grimes 			flags |= RTF_MODIFIED;
527603724d3SBjoern A. Zeeb 			stat = &V_rtstat.rts_newgateway;
528499676dfSJulian Elischer 			/*
529499676dfSJulian Elischer 			 * add the key and gateway (in one malloc'd chunk).
530499676dfSJulian Elischer 			 */
5313120b9d4SKip Macy 			RT_UNLOCK(rt);
5323120b9d4SKip Macy 			RADIX_NODE_HEAD_LOCK(rnh);
5333120b9d4SKip Macy 			RT_LOCK(rt);
534df8bae1dSRodney W. Grimes 			rt_setgate(rt, rt_key(rt), gateway);
5353120b9d4SKip Macy 			gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
5363120b9d4SKip Macy 			RADIX_NODE_HEAD_UNLOCK(rnh);
53729910a5aSKip Macy 			EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
5388e7e854cSKip Macy 			RTFREE_LOCKED(gwrt);
539df8bae1dSRodney W. Grimes 		}
540df8bae1dSRodney W. Grimes 	} else
541df8bae1dSRodney W. Grimes 		error = EHOSTUNREACH;
542df8bae1dSRodney W. Grimes done:
543d1dd20beSSam Leffler 	if (rt)
5441951e633SJohn Baldwin 		RTFREE_LOCKED(rt);
545df8bae1dSRodney W. Grimes out:
546df8bae1dSRodney W. Grimes 	if (error)
547603724d3SBjoern A. Zeeb 		V_rtstat.rts_badredirect++;
548df8bae1dSRodney W. Grimes 	else if (stat != NULL)
549df8bae1dSRodney W. Grimes 		(*stat)++;
550df8bae1dSRodney W. Grimes 	bzero((caddr_t)&info, sizeof(info));
551df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_DST] = dst;
552df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_GATEWAY] = gateway;
553df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_NETMASK] = netmask;
554df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_AUTHOR] = src;
555df8bae1dSRodney W. Grimes 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
556df8bae1dSRodney W. Grimes }
557df8bae1dSRodney W. Grimes 
5588b07e49aSJulian Elischer int
5598b07e49aSJulian Elischer rtioctl(u_long req, caddr_t data)
5608b07e49aSJulian Elischer {
5618b07e49aSJulian Elischer 	return (rtioctl_fib(req, data, 0));
5628b07e49aSJulian Elischer }
5638b07e49aSJulian Elischer 
564df8bae1dSRodney W. Grimes /*
565df8bae1dSRodney W. Grimes  * Routing table ioctl interface.
566df8bae1dSRodney W. Grimes  */
567df8bae1dSRodney W. Grimes int
5688b07e49aSJulian Elischer rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
569df8bae1dSRodney W. Grimes {
5705090559bSChristian S.J. Peron 
5715090559bSChristian S.J. Peron 	/*
5725090559bSChristian S.J. Peron 	 * If more ioctl commands are added here, make sure the proper
5735090559bSChristian S.J. Peron 	 * super-user checks are being performed because it is possible for
5745090559bSChristian S.J. Peron 	 * prison-root to make it this far if raw sockets have been enabled
5755090559bSChristian S.J. Peron 	 * in jails.
5765090559bSChristian S.J. Peron 	 */
577623ae52eSPoul-Henning Kamp #ifdef INET
578f0068c4aSGarrett Wollman 	/* Multicast goop, grrr... */
5798b07e49aSJulian Elischer 	return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
580623ae52eSPoul-Henning Kamp #else /* INET */
581623ae52eSPoul-Henning Kamp 	return ENXIO;
582623ae52eSPoul-Henning Kamp #endif /* INET */
583df8bae1dSRodney W. Grimes }
584df8bae1dSRodney W. Grimes 
585df8bae1dSRodney W. Grimes struct ifaddr *
586d1dd20beSSam Leffler ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
587df8bae1dSRodney W. Grimes {
5888b07e49aSJulian Elischer 	return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
5898b07e49aSJulian Elischer }
5908b07e49aSJulian Elischer 
5918b07e49aSJulian Elischer struct ifaddr *
5928b07e49aSJulian Elischer ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
5938b07e49aSJulian Elischer 				u_int fibnum)
5948b07e49aSJulian Elischer {
595df8bae1dSRodney W. Grimes 	register struct ifaddr *ifa;
596e034e82cSQing Li 	int not_found = 0;
597d1dd20beSSam Leffler 
598df8bae1dSRodney W. Grimes 	if ((flags & RTF_GATEWAY) == 0) {
599df8bae1dSRodney W. Grimes 		/*
600df8bae1dSRodney W. Grimes 		 * If we are adding a route to an interface,
601df8bae1dSRodney W. Grimes 		 * and the interface is a pt to pt link
602df8bae1dSRodney W. Grimes 		 * we should search for the destination
603df8bae1dSRodney W. Grimes 		 * as our clue to the interface.  Otherwise
604df8bae1dSRodney W. Grimes 		 * we can use the local address.
605df8bae1dSRodney W. Grimes 		 */
60685911824SLuigi Rizzo 		ifa = NULL;
60785911824SLuigi Rizzo 		if (flags & RTF_HOST)
608df8bae1dSRodney W. Grimes 			ifa = ifa_ifwithdstaddr(dst);
60985911824SLuigi Rizzo 		if (ifa == NULL)
610df8bae1dSRodney W. Grimes 			ifa = ifa_ifwithaddr(gateway);
611df8bae1dSRodney W. Grimes 	} else {
612df8bae1dSRodney W. Grimes 		/*
613df8bae1dSRodney W. Grimes 		 * If we are adding a route to a remote net
614df8bae1dSRodney W. Grimes 		 * or host, the gateway may still be on the
615df8bae1dSRodney W. Grimes 		 * other end of a pt to pt link.
616df8bae1dSRodney W. Grimes 		 */
617df8bae1dSRodney W. Grimes 		ifa = ifa_ifwithdstaddr(gateway);
618df8bae1dSRodney W. Grimes 	}
61985911824SLuigi Rizzo 	if (ifa == NULL)
620df8bae1dSRodney W. Grimes 		ifa = ifa_ifwithnet(gateway);
62185911824SLuigi Rizzo 	if (ifa == NULL) {
6229b20205dSKip Macy 		struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
62385911824SLuigi Rizzo 		if (rt == NULL)
62485911824SLuigi Rizzo 			return (NULL);
625e034e82cSQing Li 		/*
626e034e82cSQing Li 		 * dismiss a gateway that is reachable only
627e034e82cSQing Li 		 * through the default router
628e034e82cSQing Li 		 */
629e034e82cSQing Li 		switch (gateway->sa_family) {
630e034e82cSQing Li 		case AF_INET:
631e034e82cSQing Li 			if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
632e034e82cSQing Li 				not_found = 1;
633e034e82cSQing Li 			break;
634e034e82cSQing Li 		case AF_INET6:
635e034e82cSQing Li 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
636e034e82cSQing Li 				not_found = 1;
637e034e82cSQing Li 			break;
638e034e82cSQing Li 		default:
639e034e82cSQing Li 			break;
640e034e82cSQing Li 		}
6417138d65cSSam Leffler 		RT_REMREF(rt);
642d1dd20beSSam Leffler 		RT_UNLOCK(rt);
643e034e82cSQing Li 		if (not_found)
644e034e82cSQing Li 			return (NULL);
64585911824SLuigi Rizzo 		if ((ifa = rt->rt_ifa) == NULL)
64685911824SLuigi Rizzo 			return (NULL);
647df8bae1dSRodney W. Grimes 	}
648df8bae1dSRodney W. Grimes 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
649df8bae1dSRodney W. Grimes 		struct ifaddr *oifa = ifa;
650df8bae1dSRodney W. Grimes 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
65185911824SLuigi Rizzo 		if (ifa == NULL)
652df8bae1dSRodney W. Grimes 			ifa = oifa;
653df8bae1dSRodney W. Grimes 	}
654df8bae1dSRodney W. Grimes 	return (ifa);
655df8bae1dSRodney W. Grimes }
656df8bae1dSRodney W. Grimes 
657b0a76b88SJulian Elischer /*
658b0a76b88SJulian Elischer  * Do appropriate manipulations of a routing tree given
659b0a76b88SJulian Elischer  * all the bits of info needed
660b0a76b88SJulian Elischer  */
661df8bae1dSRodney W. Grimes int
662d1dd20beSSam Leffler rtrequest(int req,
663d1dd20beSSam Leffler 	struct sockaddr *dst,
664d1dd20beSSam Leffler 	struct sockaddr *gateway,
665d1dd20beSSam Leffler 	struct sockaddr *netmask,
666d1dd20beSSam Leffler 	int flags,
667d1dd20beSSam Leffler 	struct rtentry **ret_nrt)
668df8bae1dSRodney W. Grimes {
6698b07e49aSJulian Elischer 	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
6708b07e49aSJulian Elischer }
6718b07e49aSJulian Elischer 
6728b07e49aSJulian Elischer int
6738b07e49aSJulian Elischer rtrequest_fib(int req,
6748b07e49aSJulian Elischer 	struct sockaddr *dst,
6758b07e49aSJulian Elischer 	struct sockaddr *gateway,
6768b07e49aSJulian Elischer 	struct sockaddr *netmask,
6778b07e49aSJulian Elischer 	int flags,
6788b07e49aSJulian Elischer 	struct rtentry **ret_nrt,
6798b07e49aSJulian Elischer 	u_int fibnum)
6808b07e49aSJulian Elischer {
6818071913dSRuslan Ermilov 	struct rt_addrinfo info;
6828071913dSRuslan Ermilov 
683ac4a76ebSBjoern A. Zeeb 	if (dst->sa_len == 0)
684ac4a76ebSBjoern A. Zeeb 		return(EINVAL);
685ac4a76ebSBjoern A. Zeeb 
6868071913dSRuslan Ermilov 	bzero((caddr_t)&info, sizeof(info));
6878071913dSRuslan Ermilov 	info.rti_flags = flags;
6888071913dSRuslan Ermilov 	info.rti_info[RTAX_DST] = dst;
6898071913dSRuslan Ermilov 	info.rti_info[RTAX_GATEWAY] = gateway;
6908071913dSRuslan Ermilov 	info.rti_info[RTAX_NETMASK] = netmask;
6918b07e49aSJulian Elischer 	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
6928071913dSRuslan Ermilov }
6938071913dSRuslan Ermilov 
6948071913dSRuslan Ermilov /*
6958071913dSRuslan Ermilov  * These (questionable) definitions of apparent local variables apply
6968071913dSRuslan Ermilov  * to the next two functions.  XXXXXX!!!
6978071913dSRuslan Ermilov  */
6988071913dSRuslan Ermilov #define	dst	info->rti_info[RTAX_DST]
6998071913dSRuslan Ermilov #define	gateway	info->rti_info[RTAX_GATEWAY]
7008071913dSRuslan Ermilov #define	netmask	info->rti_info[RTAX_NETMASK]
7018071913dSRuslan Ermilov #define	ifaaddr	info->rti_info[RTAX_IFA]
7028071913dSRuslan Ermilov #define	ifpaddr	info->rti_info[RTAX_IFP]
7038071913dSRuslan Ermilov #define	flags	info->rti_flags
7048071913dSRuslan Ermilov 
7058071913dSRuslan Ermilov int
706d1dd20beSSam Leffler rt_getifa(struct rt_addrinfo *info)
7078071913dSRuslan Ermilov {
7088b07e49aSJulian Elischer 	return (rt_getifa_fib(info, 0));
7098b07e49aSJulian Elischer }
7108b07e49aSJulian Elischer 
7118b07e49aSJulian Elischer int
7128b07e49aSJulian Elischer rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
7138b07e49aSJulian Elischer {
7148071913dSRuslan Ermilov 	struct ifaddr *ifa;
7158071913dSRuslan Ermilov 	int error = 0;
7168071913dSRuslan Ermilov 
7178071913dSRuslan Ermilov 	/*
7188071913dSRuslan Ermilov 	 * ifp may be specified by sockaddr_dl
7198071913dSRuslan Ermilov 	 * when protocol address is ambiguous.
7208071913dSRuslan Ermilov 	 */
7218071913dSRuslan Ermilov 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
7228071913dSRuslan Ermilov 	    ifpaddr->sa_family == AF_LINK &&
7238071913dSRuslan Ermilov 	    (ifa = ifa_ifwithnet(ifpaddr)) != NULL)
7248071913dSRuslan Ermilov 		info->rti_ifp = ifa->ifa_ifp;
7258071913dSRuslan Ermilov 	if (info->rti_ifa == NULL && ifaaddr != NULL)
7268071913dSRuslan Ermilov 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
7278071913dSRuslan Ermilov 	if (info->rti_ifa == NULL) {
7288071913dSRuslan Ermilov 		struct sockaddr *sa;
7298071913dSRuslan Ermilov 
7308071913dSRuslan Ermilov 		sa = ifaaddr != NULL ? ifaaddr :
7318071913dSRuslan Ermilov 		    (gateway != NULL ? gateway : dst);
7328071913dSRuslan Ermilov 		if (sa != NULL && info->rti_ifp != NULL)
7338071913dSRuslan Ermilov 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
7348071913dSRuslan Ermilov 		else if (dst != NULL && gateway != NULL)
7358b07e49aSJulian Elischer 			info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
7368b07e49aSJulian Elischer 							fibnum);
7378071913dSRuslan Ermilov 		else if (sa != NULL)
7388b07e49aSJulian Elischer 			info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
7398b07e49aSJulian Elischer 							fibnum);
7408071913dSRuslan Ermilov 	}
7418071913dSRuslan Ermilov 	if ((ifa = info->rti_ifa) != NULL) {
7428071913dSRuslan Ermilov 		if (info->rti_ifp == NULL)
7438071913dSRuslan Ermilov 			info->rti_ifp = ifa->ifa_ifp;
7448071913dSRuslan Ermilov 	} else
7458071913dSRuslan Ermilov 		error = ENETUNREACH;
7468071913dSRuslan Ermilov 	return (error);
7478071913dSRuslan Ermilov }
7488071913dSRuslan Ermilov 
7499c63e9dbSSam Leffler /*
7509c63e9dbSSam Leffler  * Expunges references to a route that's about to be reclaimed.
7519c63e9dbSSam Leffler  * The route must be locked.
7529c63e9dbSSam Leffler  */
7539c63e9dbSSam Leffler int
7549c63e9dbSSam Leffler rtexpunge(struct rtentry *rt)
7559c63e9dbSSam Leffler {
7568b615593SMarko Zec 	INIT_VNET_NET(curvnet);
7579c63e9dbSSam Leffler 	struct radix_node *rn;
7589c63e9dbSSam Leffler 	struct radix_node_head *rnh;
7599c63e9dbSSam Leffler 	struct ifaddr *ifa;
7609c63e9dbSSam Leffler 	int error = 0;
7619c63e9dbSSam Leffler 
7626e6b3f7cSQing Li 	/*
7636e6b3f7cSQing Li 	 * Find the correct routing tree to use for this Address Family
7646e6b3f7cSQing Li 	 */
7653120b9d4SKip Macy 	rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
7669c63e9dbSSam Leffler 	RT_LOCK_ASSERT(rt);
7676e6b3f7cSQing Li 	if (rnh == NULL)
7686e6b3f7cSQing Li 		return (EAFNOSUPPORT);
7693120b9d4SKip Macy 	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
7709c63e9dbSSam Leffler #if 0
7719c63e9dbSSam Leffler 	/*
7729c63e9dbSSam Leffler 	 * We cannot assume anything about the reference count
7739c63e9dbSSam Leffler 	 * because protocols call us in many situations; often
7749c63e9dbSSam Leffler 	 * before unwinding references to the table entry.
7759c63e9dbSSam Leffler 	 */
7769c63e9dbSSam Leffler 	KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt));
7779c63e9dbSSam Leffler #endif
7789c63e9dbSSam Leffler 	/*
7799c63e9dbSSam Leffler 	 * Remove the item from the tree; it should be there,
7809c63e9dbSSam Leffler 	 * but when callers invoke us blindly it may not (sigh).
7819c63e9dbSSam Leffler 	 */
7829c63e9dbSSam Leffler 	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
78385911824SLuigi Rizzo 	if (rn == NULL) {
7849c63e9dbSSam Leffler 		error = ESRCH;
7859c63e9dbSSam Leffler 		goto bad;
7869c63e9dbSSam Leffler 	}
7879c63e9dbSSam Leffler 	KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
7889c63e9dbSSam Leffler 		("unexpected flags 0x%x", rn->rn_flags));
789d6941ce9SLuigi Rizzo 	KASSERT(rt == RNTORT(rn),
7909c63e9dbSSam Leffler 		("lookup mismatch, rt %p rn %p", rt, rn));
7919c63e9dbSSam Leffler 
7929c63e9dbSSam Leffler 	rt->rt_flags &= ~RTF_UP;
7939c63e9dbSSam Leffler 
7949c63e9dbSSam Leffler 	/*
7959c63e9dbSSam Leffler 	 * Give the protocol a chance to keep things in sync.
7969c63e9dbSSam Leffler 	 */
7979c63e9dbSSam Leffler 	if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
7989c63e9dbSSam Leffler 		struct rt_addrinfo info;
7999c63e9dbSSam Leffler 
8009c63e9dbSSam Leffler 		bzero((caddr_t)&info, sizeof(info));
8019c63e9dbSSam Leffler 		info.rti_flags = rt->rt_flags;
8029c63e9dbSSam Leffler 		info.rti_info[RTAX_DST] = rt_key(rt);
8039c63e9dbSSam Leffler 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
8049c63e9dbSSam Leffler 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
8059c63e9dbSSam Leffler 		ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
8069c63e9dbSSam Leffler 	}
8079c63e9dbSSam Leffler 
8089c63e9dbSSam Leffler 	/*
8099c63e9dbSSam Leffler 	 * one more rtentry floating around that is not
8109c63e9dbSSam Leffler 	 * linked to the routing table.
8119c63e9dbSSam Leffler 	 */
812603724d3SBjoern A. Zeeb 	V_rttrash++;
8139c63e9dbSSam Leffler bad:
8149c63e9dbSSam Leffler 	return (error);
8159c63e9dbSSam Leffler }
8169c63e9dbSSam Leffler 
8178071913dSRuslan Ermilov int
8188b07e49aSJulian Elischer rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
8198b07e49aSJulian Elischer 				u_int fibnum)
8208b07e49aSJulian Elischer {
8218b615593SMarko Zec 	INIT_VNET_NET(curvnet);
8223120b9d4SKip Macy 	int error = 0, needlock = 0;
823df8bae1dSRodney W. Grimes 	register struct rtentry *rt;
824df8bae1dSRodney W. Grimes 	register struct radix_node *rn;
825df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
826df8bae1dSRodney W. Grimes 	struct ifaddr *ifa;
827df8bae1dSRodney W. Grimes 	struct sockaddr *ndst;
828df8bae1dSRodney W. Grimes #define senderr(x) { error = x ; goto bad; }
829df8bae1dSRodney W. Grimes 
8308b07e49aSJulian Elischer 	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
8318b07e49aSJulian Elischer 	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
8328b07e49aSJulian Elischer 		fibnum = 0;
833b0a76b88SJulian Elischer 	/*
834b0a76b88SJulian Elischer 	 * Find the correct routing tree to use for this Address Family
835b0a76b88SJulian Elischer 	 */
836603724d3SBjoern A. Zeeb 	rnh = V_rt_tables[fibnum][dst->sa_family];
83785911824SLuigi Rizzo 	if (rnh == NULL)
838983985c1SJeffrey Hsu 		return (EAFNOSUPPORT);
8393120b9d4SKip Macy 	needlock = ((flags & RTF_RNH_LOCKED) == 0);
8403120b9d4SKip Macy 	flags &= ~RTF_RNH_LOCKED;
8413120b9d4SKip Macy 	if (needlock)
842956b0b65SJeffrey Hsu 		RADIX_NODE_HEAD_LOCK(rnh);
843c96b8224SKip Macy 	else
844c96b8224SKip Macy 		RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
845b0a76b88SJulian Elischer 	/*
846b0a76b88SJulian Elischer 	 * If we are adding a host route then we don't want to put
84766953138SRuslan Ermilov 	 * a netmask in the tree, nor do we want to clone it.
848b0a76b88SJulian Elischer 	 */
8496e6b3f7cSQing Li 	if (flags & RTF_HOST)
85085911824SLuigi Rizzo 		netmask = NULL;
8516e6b3f7cSQing Li 
852df8bae1dSRodney W. Grimes 	switch (req) {
853df8bae1dSRodney W. Grimes 	case RTM_DELETE:
854e440aed9SQing Li #ifdef RADIX_MPATH
855e440aed9SQing Li 		/*
856e440aed9SQing Li 		 * if we got multipath routes, we require users to specify
857e440aed9SQing Li 		 * a matching RTAX_GATEWAY.
858e440aed9SQing Li 		 */
859e440aed9SQing Li 		if (rn_mpath_capable(rnh)) {
860e440aed9SQing Li 			struct rtentry *rto = NULL;
861e440aed9SQing Li 
862e440aed9SQing Li 			rn = rnh->rnh_matchaddr(dst, rnh);
863e440aed9SQing Li 			if (rn == NULL)
864e440aed9SQing Li 				senderr(ESRCH);
865e440aed9SQing Li  			rto = rt = RNTORT(rn);
866e440aed9SQing Li 			rt = rt_mpath_matchgate(rt, gateway);
867e440aed9SQing Li 			if (!rt)
868e440aed9SQing Li 				senderr(ESRCH);
869e440aed9SQing Li 			/*
870e440aed9SQing Li 			 * this is the first entry in the chain
871e440aed9SQing Li 			 */
872e440aed9SQing Li 			if (rto == rt) {
873e440aed9SQing Li 				rn = rn_mpath_next((struct radix_node *)rt);
874e440aed9SQing Li 				/*
875e440aed9SQing Li 				 * there is another entry, now it's active
876e440aed9SQing Li 				 */
877e440aed9SQing Li 				if (rn) {
878e440aed9SQing Li 					rto = RNTORT(rn);
879e440aed9SQing Li 					RT_LOCK(rto);
880e440aed9SQing Li 					rto->rt_flags |= RTF_UP;
881e440aed9SQing Li 					RT_UNLOCK(rto);
882e440aed9SQing Li 				} else if (rt->rt_flags & RTF_GATEWAY) {
883e440aed9SQing Li 					/*
884e440aed9SQing Li 					 * For gateway routes, we need to
885e440aed9SQing Li 					 * make sure that we we are deleting
886e440aed9SQing Li 					 * the correct gateway.
887e440aed9SQing Li 					 * rt_mpath_matchgate() does not
888e440aed9SQing Li 					 * check the case when there is only
889e440aed9SQing Li 					 * one route in the chain.
890e440aed9SQing Li 					 */
891e440aed9SQing Li 					if (gateway &&
892e440aed9SQing Li 					    (rt->rt_gateway->sa_len != gateway->sa_len ||
893e440aed9SQing Li 					    memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
894e440aed9SQing Li 						senderr(ESRCH);
895e440aed9SQing Li 				}
896e440aed9SQing Li 				/*
897e440aed9SQing Li 				 * use the normal delete code to remove
898e440aed9SQing Li 				 * the first entry
899e440aed9SQing Li 				 */
900e440aed9SQing Li 				goto normal_rtdel;
901e440aed9SQing Li 			}
902e440aed9SQing Li 			/*
903e440aed9SQing Li 			 * if the entry is 2nd and on up
904e440aed9SQing Li 			 */
905e440aed9SQing Li 			if (!rt_mpath_deldup(rto, rt))
906e440aed9SQing Li 				panic ("rtrequest1: rt_mpath_deldup");
907e440aed9SQing Li 			RT_LOCK(rt);
908e440aed9SQing Li 			RT_ADDREF(rt);
909e440aed9SQing Li 			rt->rt_flags &= ~RTF_UP;
910e440aed9SQing Li 			goto deldone;  /* done with the RTM_DELETE command */
911e440aed9SQing Li 		}
912e440aed9SQing Li 
913e440aed9SQing Li normal_rtdel:
914ea9cd9f2SBjoern A. Zeeb #endif
915b0a76b88SJulian Elischer 		/*
916b0a76b88SJulian Elischer 		 * Remove the item from the tree and return it.
917b0a76b88SJulian Elischer 		 * Complain if it is not there and do no more processing.
918b0a76b88SJulian Elischer 		 */
919d1dd20beSSam Leffler 		rn = rnh->rnh_deladdr(dst, netmask, rnh);
92085911824SLuigi Rizzo 		if (rn == NULL)
921df8bae1dSRodney W. Grimes 			senderr(ESRCH);
922df8bae1dSRodney W. Grimes 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
923df8bae1dSRodney W. Grimes 			panic ("rtrequest delete");
924d6941ce9SLuigi Rizzo 		rt = RNTORT(rn);
925d1dd20beSSam Leffler 		RT_LOCK(rt);
9267138d65cSSam Leffler 		RT_ADDREF(rt);
92771eba915SRuslan Ermilov 		rt->rt_flags &= ~RTF_UP;
928c2bed6a3SGarrett Wollman 
929c2bed6a3SGarrett Wollman 		/*
930499676dfSJulian Elischer 		 * give the protocol a chance to keep things in sync.
931b0a76b88SJulian Elischer 		 */
932df8bae1dSRodney W. Grimes 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
9338071913dSRuslan Ermilov 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
934499676dfSJulian Elischer 
935ea9cd9f2SBjoern A. Zeeb #ifdef RADIX_MPATH
936e440aed9SQing Li deldone:
937ea9cd9f2SBjoern A. Zeeb #endif
938b0a76b88SJulian Elischer 		/*
939d6941ce9SLuigi Rizzo 		 * One more rtentry floating around that is not
940d6941ce9SLuigi Rizzo 		 * linked to the routing table. rttrash will be decremented
941d6941ce9SLuigi Rizzo 		 * when RTFREE(rt) is eventually called.
942499676dfSJulian Elischer 		 */
943603724d3SBjoern A. Zeeb 		V_rttrash++;
944499676dfSJulian Elischer 
945499676dfSJulian Elischer 		/*
946499676dfSJulian Elischer 		 * If the caller wants it, then it can have it,
947499676dfSJulian Elischer 		 * but it's up to it to free the rtentry as we won't be
948499676dfSJulian Elischer 		 * doing it.
949b0a76b88SJulian Elischer 		 */
950d1dd20beSSam Leffler 		if (ret_nrt) {
951df8bae1dSRodney W. Grimes 			*ret_nrt = rt;
952d1dd20beSSam Leffler 			RT_UNLOCK(rt);
953d1dd20beSSam Leffler 		} else
954d1dd20beSSam Leffler 			RTFREE_LOCKED(rt);
955df8bae1dSRodney W. Grimes 		break;
956df8bae1dSRodney W. Grimes 	case RTM_RESOLVE:
9576e6b3f7cSQing Li 		/*
9586e6b3f7cSQing Li 		 * resolve was only used for route cloning
9596e6b3f7cSQing Li 		 * here for compat
9606e6b3f7cSQing Li 		 */
9616e6b3f7cSQing Li 		break;
962df8bae1dSRodney W. Grimes 	case RTM_ADD:
9635df72964SGarrett Wollman 		if ((flags & RTF_GATEWAY) && !gateway)
96416a2e0a6SQing Li 			senderr(EINVAL);
96516a2e0a6SQing Li 		if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
96616a2e0a6SQing Li 		    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
96716a2e0a6SQing Li 			senderr(EINVAL);
9685df72964SGarrett Wollman 
9698b07e49aSJulian Elischer 		if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum)))
9708071913dSRuslan Ermilov 			senderr(error);
9718071913dSRuslan Ermilov 		ifa = info->rti_ifa;
9721ed81b73SMarko Zec 		rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
97385911824SLuigi Rizzo 		if (rt == NULL)
974df8bae1dSRodney W. Grimes 			senderr(ENOBUFS);
975d1dd20beSSam Leffler 		RT_LOCK_INIT(rt);
976df8bae1dSRodney W. Grimes 		rt->rt_flags = RTF_UP | flags;
9778b07e49aSJulian Elischer 		rt->rt_fibnum = fibnum;
978499676dfSJulian Elischer 		/*
979499676dfSJulian Elischer 		 * Add the gateway. Possibly re-malloc-ing the storage for it
9806e6b3f7cSQing Li 		 *
981499676dfSJulian Elischer 		 */
982d1dd20beSSam Leffler 		RT_LOCK(rt);
983831a80b0SMatthew Dillon 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
984d1dd20beSSam Leffler 			RT_LOCK_DESTROY(rt);
9851ed81b73SMarko Zec 			uma_zfree(V_rtzone, rt);
986704b0666SBill Fenner 			senderr(error);
987df8bae1dSRodney W. Grimes 		}
988499676dfSJulian Elischer 
989499676dfSJulian Elischer 		/*
990499676dfSJulian Elischer 		 * point to the (possibly newly malloc'd) dest address.
991499676dfSJulian Elischer 		 */
992d1dd20beSSam Leffler 		ndst = (struct sockaddr *)rt_key(rt);
993499676dfSJulian Elischer 
994499676dfSJulian Elischer 		/*
995499676dfSJulian Elischer 		 * make sure it contains the value we want (masked if needed).
996499676dfSJulian Elischer 		 */
997df8bae1dSRodney W. Grimes 		if (netmask) {
998df8bae1dSRodney W. Grimes 			rt_maskedcopy(dst, ndst, netmask);
999df8bae1dSRodney W. Grimes 		} else
10001838a647SLuigi Rizzo 			bcopy(dst, ndst, dst->sa_len);
10018e718bb4SGarrett Wollman 
10028e718bb4SGarrett Wollman 		/*
1003499676dfSJulian Elischer 		 * Note that we now have a reference to the ifa.
10048e718bb4SGarrett Wollman 		 * This moved from below so that rnh->rnh_addaddr() can
1005499676dfSJulian Elischer 		 * examine the ifa and  ifa->ifa_ifp if it so desires.
10068e718bb4SGarrett Wollman 		 */
100719fc74fbSJeffrey Hsu 		IFAREF(ifa);
10088e718bb4SGarrett Wollman 		rt->rt_ifa = ifa;
10098e718bb4SGarrett Wollman 		rt->rt_ifp = ifa->ifa_ifp;
10108e718bb4SGarrett Wollman 
1011e440aed9SQing Li #ifdef RADIX_MPATH
1012e440aed9SQing Li 		/* do not permit exactly the same dst/mask/gw pair */
1013e440aed9SQing Li 		if (rn_mpath_capable(rnh) &&
1014e440aed9SQing Li 			rt_mpath_conflict(rnh, rt, netmask)) {
1015e440aed9SQing Li 			if (rt->rt_ifa) {
1016e440aed9SQing Li 				IFAFREE(rt->rt_ifa);
1017e440aed9SQing Li 			}
1018e440aed9SQing Li 			Free(rt_key(rt));
1019e440aed9SQing Li 			RT_LOCK_DESTROY(rt);
10201ed81b73SMarko Zec 			uma_zfree(V_rtzone, rt);
1021e440aed9SQing Li 			senderr(EEXIST);
1022e440aed9SQing Li 		}
1023e440aed9SQing Li #endif
1024e440aed9SQing Li 
1025d1dd20beSSam Leffler 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
1026d1dd20beSSam Leffler 		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
1027499676dfSJulian Elischer 		/*
1028499676dfSJulian Elischer 		 * If it still failed to go into the tree,
1029499676dfSJulian Elischer 		 * then un-make it (this should be a function)
1030499676dfSJulian Elischer 		 */
103185911824SLuigi Rizzo 		if (rn == NULL) {
1032d1dd20beSSam Leffler 			if (rt->rt_ifa)
10338e718bb4SGarrett Wollman 				IFAFREE(rt->rt_ifa);
1034df8bae1dSRodney W. Grimes 			Free(rt_key(rt));
1035d1dd20beSSam Leffler 			RT_LOCK_DESTROY(rt);
10361ed81b73SMarko Zec 			uma_zfree(V_rtzone, rt);
1037df8bae1dSRodney W. Grimes 			senderr(EEXIST);
1038df8bae1dSRodney W. Grimes 		}
1039499676dfSJulian Elischer 
1040499676dfSJulian Elischer 		/*
1041a0c0e34bSGleb Smirnoff 		 * If this protocol has something to add to this then
1042499676dfSJulian Elischer 		 * allow it to do that as well.
1043499676dfSJulian Elischer 		 */
1044df8bae1dSRodney W. Grimes 		if (ifa->ifa_rtrequest)
10458071913dSRuslan Ermilov 			ifa->ifa_rtrequest(req, rt, info);
1046499676dfSJulian Elischer 
1047cd02a0b7SGarrett Wollman 		/*
1048499676dfSJulian Elischer 		 * actually return a resultant rtentry and
1049499676dfSJulian Elischer 		 * give the caller a single reference.
1050499676dfSJulian Elischer 		 */
1051df8bae1dSRodney W. Grimes 		if (ret_nrt) {
1052df8bae1dSRodney W. Grimes 			*ret_nrt = rt;
10537138d65cSSam Leffler 			RT_ADDREF(rt);
1054df8bae1dSRodney W. Grimes 		}
1055d1dd20beSSam Leffler 		RT_UNLOCK(rt);
1056df8bae1dSRodney W. Grimes 		break;
10578071913dSRuslan Ermilov 	default:
10588071913dSRuslan Ermilov 		error = EOPNOTSUPP;
1059df8bae1dSRodney W. Grimes 	}
1060df8bae1dSRodney W. Grimes bad:
10613120b9d4SKip Macy 	if (needlock)
1062956b0b65SJeffrey Hsu 		RADIX_NODE_HEAD_UNLOCK(rnh);
1063df8bae1dSRodney W. Grimes 	return (error);
1064d1dd20beSSam Leffler #undef senderr
1065d1dd20beSSam Leffler }
1066d1dd20beSSam Leffler 
10678071913dSRuslan Ermilov #undef dst
10688071913dSRuslan Ermilov #undef gateway
10698071913dSRuslan Ermilov #undef netmask
10708071913dSRuslan Ermilov #undef ifaaddr
10718071913dSRuslan Ermilov #undef ifpaddr
10728071913dSRuslan Ermilov #undef flags
1073df8bae1dSRodney W. Grimes 
1074df8bae1dSRodney W. Grimes int
1075d1dd20beSSam Leffler rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
1076df8bae1dSRodney W. Grimes {
10778b615593SMarko Zec 	INIT_VNET_NET(curvnet);
1078d1dd20beSSam Leffler 	/* XXX dst may be overwritten, can we move this to below */
10796e6b3f7cSQing Li 	int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
10806e6b3f7cSQing Li #ifdef INVARIANTS
1081c7cacf27SBrooks Davis 	struct radix_node_head *rnh =
1082c7cacf27SBrooks Davis 	    V_rt_tables[rt->rt_fibnum][dst->sa_family];
10836e6b3f7cSQing Li #endif
1084d1dd20beSSam Leffler 
1085d1dd20beSSam Leffler 	RT_LOCK_ASSERT(rt);
10863120b9d4SKip Macy 	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
1087df8bae1dSRodney W. Grimes 
10881db1fffaSBill Fenner 	/*
108985911824SLuigi Rizzo 	 * Prepare to store the gateway in rt->rt_gateway.
109085911824SLuigi Rizzo 	 * Both dst and gateway are stored one after the other in the same
109185911824SLuigi Rizzo 	 * malloc'd chunk. If we have room, we can reuse the old buffer,
109285911824SLuigi Rizzo 	 * rt_gateway already points to the right place.
109385911824SLuigi Rizzo 	 * Otherwise, malloc a new block and update the 'dst' address.
1094499676dfSJulian Elischer 	 */
109585911824SLuigi Rizzo 	if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
109685911824SLuigi Rizzo 		caddr_t new;
109785911824SLuigi Rizzo 
1098df8bae1dSRodney W. Grimes 		R_Malloc(new, caddr_t, dlen + glen);
109985911824SLuigi Rizzo 		if (new == NULL)
11001db1fffaSBill Fenner 			return ENOBUFS;
1101499676dfSJulian Elischer 		/*
110285911824SLuigi Rizzo 		 * XXX note, we copy from *dst and not *rt_key(rt) because
110385911824SLuigi Rizzo 		 * rt_setgate() can be called to initialize a newly
110485911824SLuigi Rizzo 		 * allocated route entry, in which case rt_key(rt) == NULL
110585911824SLuigi Rizzo 		 * (and also rt->rt_gateway == NULL).
110685911824SLuigi Rizzo 		 * Free()/free() handle a NULL argument just fine.
1107499676dfSJulian Elischer 		 */
11081838a647SLuigi Rizzo 		bcopy(dst, new, dlen);
110985911824SLuigi Rizzo 		Free(rt_key(rt));	/* free old block, if any */
1110445e045bSAlexander Kabaev 		rt_key(rt) = (struct sockaddr *)new;
111185911824SLuigi Rizzo 		rt->rt_gateway = (struct sockaddr *)(new + dlen);
1112df8bae1dSRodney W. Grimes 	}
1113499676dfSJulian Elischer 
1114499676dfSJulian Elischer 	/*
111585911824SLuigi Rizzo 	 * Copy the new gateway value into the memory chunk.
111685911824SLuigi Rizzo 	 */
111785911824SLuigi Rizzo 	bcopy(gate, rt->rt_gateway, glen);
111885911824SLuigi Rizzo 
11196e6b3f7cSQing Li 	return (0);
1120df8bae1dSRodney W. Grimes }
1121df8bae1dSRodney W. Grimes 
1122f708ef1bSPoul-Henning Kamp static void
1123d1dd20beSSam Leffler rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
1124df8bae1dSRodney W. Grimes {
1125df8bae1dSRodney W. Grimes 	register u_char *cp1 = (u_char *)src;
1126df8bae1dSRodney W. Grimes 	register u_char *cp2 = (u_char *)dst;
1127df8bae1dSRodney W. Grimes 	register u_char *cp3 = (u_char *)netmask;
1128df8bae1dSRodney W. Grimes 	u_char *cplim = cp2 + *cp3;
1129df8bae1dSRodney W. Grimes 	u_char *cplim2 = cp2 + *cp1;
1130df8bae1dSRodney W. Grimes 
1131df8bae1dSRodney W. Grimes 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1132df8bae1dSRodney W. Grimes 	cp3 += 2;
1133df8bae1dSRodney W. Grimes 	if (cplim > cplim2)
1134df8bae1dSRodney W. Grimes 		cplim = cplim2;
1135df8bae1dSRodney W. Grimes 	while (cp2 < cplim)
1136df8bae1dSRodney W. Grimes 		*cp2++ = *cp1++ & *cp3++;
1137df8bae1dSRodney W. Grimes 	if (cp2 < cplim2)
1138df8bae1dSRodney W. Grimes 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1139df8bae1dSRodney W. Grimes }
1140df8bae1dSRodney W. Grimes 
1141df8bae1dSRodney W. Grimes /*
1142df8bae1dSRodney W. Grimes  * Set up a routing table entry, normally
1143df8bae1dSRodney W. Grimes  * for an interface.
1144df8bae1dSRodney W. Grimes  */
11458b07e49aSJulian Elischer #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
11468b07e49aSJulian Elischer static inline  int
11478b07e49aSJulian Elischer rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
1148df8bae1dSRodney W. Grimes {
11498b615593SMarko Zec 	INIT_VNET_NET(curvnet);
11505aca0b30SLuigi Rizzo 	struct sockaddr *dst;
11518071913dSRuslan Ermilov 	struct sockaddr *netmask;
115285911824SLuigi Rizzo 	struct rtentry *rt = NULL;
11538071913dSRuslan Ermilov 	struct rt_addrinfo info;
1154e440aed9SQing Li 	int error = 0;
11558b07e49aSJulian Elischer 	int startfib, endfib;
11568b07e49aSJulian Elischer 	char tempbuf[_SOCKADDR_TMPSIZE];
11578b07e49aSJulian Elischer 	int didwork = 0;
11588b07e49aSJulian Elischer 	int a_failure = 0;
11596e6b3f7cSQing Li 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1160df8bae1dSRodney W. Grimes 
11618071913dSRuslan Ermilov 	if (flags & RTF_HOST) {
11628071913dSRuslan Ermilov 		dst = ifa->ifa_dstaddr;
11638071913dSRuslan Ermilov 		netmask = NULL;
11648071913dSRuslan Ermilov 	} else {
11658071913dSRuslan Ermilov 		dst = ifa->ifa_addr;
11668071913dSRuslan Ermilov 		netmask = ifa->ifa_netmask;
11678071913dSRuslan Ermilov 	}
11688b07e49aSJulian Elischer 	if ( dst->sa_family != AF_INET)
11698b07e49aSJulian Elischer 		fibnum = 0;
11708b07e49aSJulian Elischer 	if (fibnum == -1) {
117166e8505fSJulian Elischer 		if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
117266e8505fSJulian Elischer 			startfib = endfib = curthread->td_proc->p_fibnum;
117366e8505fSJulian Elischer 		} else {
11748b07e49aSJulian Elischer 			startfib = 0;
11758b07e49aSJulian Elischer 			endfib = rt_numfibs - 1;
117666e8505fSJulian Elischer 		}
11778b07e49aSJulian Elischer 	} else {
11788b07e49aSJulian Elischer 		KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
11798b07e49aSJulian Elischer 		startfib = fibnum;
11808b07e49aSJulian Elischer 		endfib = fibnum;
11818b07e49aSJulian Elischer 	}
1182ac4a76ebSBjoern A. Zeeb 	if (dst->sa_len == 0)
1183ac4a76ebSBjoern A. Zeeb 		return(EINVAL);
1184ac4a76ebSBjoern A. Zeeb 
1185b0a76b88SJulian Elischer 	/*
11868b07e49aSJulian Elischer 	 * If it's a delete, check that if it exists,
11878b07e49aSJulian Elischer 	 * it's on the correct interface or we might scrub
11888b07e49aSJulian Elischer 	 * a route to another ifa which would
1189b0a76b88SJulian Elischer 	 * be confusing at best and possibly worse.
1190b0a76b88SJulian Elischer 	 */
1191df8bae1dSRodney W. Grimes 	if (cmd == RTM_DELETE) {
1192b0a76b88SJulian Elischer 		/*
1193b0a76b88SJulian Elischer 		 * It's a delete, so it should already exist..
1194b0a76b88SJulian Elischer 		 * If it's a net, mask off the host bits
1195b0a76b88SJulian Elischer 		 * (Assuming we have a mask)
11968b07e49aSJulian Elischer 		 * XXX this is kinda inet specific..
1197b0a76b88SJulian Elischer 		 */
11988071913dSRuslan Ermilov 		if (netmask != NULL) {
11998b07e49aSJulian Elischer 			rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
12008b07e49aSJulian Elischer 			dst = (struct sockaddr *)tempbuf;
1201df8bae1dSRodney W. Grimes 		}
12028b07e49aSJulian Elischer 	}
12038b07e49aSJulian Elischer 	/*
12048b07e49aSJulian Elischer 	 * Now go through all the requested tables (fibs) and do the
12058b07e49aSJulian Elischer 	 * requested action. Realistically, this will either be fib 0
12068b07e49aSJulian Elischer 	 * for protocols that don't do multiple tables or all the
12078b07e49aSJulian Elischer 	 * tables for those that do. XXX For this version only AF_INET.
12088b07e49aSJulian Elischer 	 * When that changes code should be refactored to protocol
12098b07e49aSJulian Elischer 	 * independent parts and protocol dependent parts.
12108b07e49aSJulian Elischer 	 */
12118b07e49aSJulian Elischer 	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
12128b07e49aSJulian Elischer 		if (cmd == RTM_DELETE) {
12138b07e49aSJulian Elischer 			struct radix_node_head *rnh;
12148b07e49aSJulian Elischer 			struct radix_node *rn;
1215b0a76b88SJulian Elischer 			/*
12168071913dSRuslan Ermilov 			 * Look up an rtentry that is in the routing tree and
12178071913dSRuslan Ermilov 			 * contains the correct info.
1218b0a76b88SJulian Elischer 			 */
1219603724d3SBjoern A. Zeeb 			if ((rnh = V_rt_tables[fibnum][dst->sa_family]) == NULL)
12208b07e49aSJulian Elischer 				/* this table doesn't exist but others might */
12218b07e49aSJulian Elischer 				continue;
1222956b0b65SJeffrey Hsu 			RADIX_NODE_HEAD_LOCK(rnh);
1223e440aed9SQing Li #ifdef RADIX_MPATH
1224e440aed9SQing Li 			if (rn_mpath_capable(rnh)) {
1225e440aed9SQing Li 
1226e440aed9SQing Li 				rn = rnh->rnh_matchaddr(dst, rnh);
1227e440aed9SQing Li 				if (rn == NULL)
1228e440aed9SQing Li 					error = ESRCH;
1229e440aed9SQing Li 				else {
1230e440aed9SQing Li 					rt = RNTORT(rn);
1231e440aed9SQing Li 					/*
12328b07e49aSJulian Elischer 					 * for interface route the
12338b07e49aSJulian Elischer 					 * rt->rt_gateway is sockaddr_intf
12348b07e49aSJulian Elischer 					 * for cloning ARP entries, so
12358b07e49aSJulian Elischer 					 * rt_mpath_matchgate must use the
12368b07e49aSJulian Elischer 					 * interface address
1237e440aed9SQing Li 					 */
12388b07e49aSJulian Elischer 					rt = rt_mpath_matchgate(rt,
12398b07e49aSJulian Elischer 					    ifa->ifa_addr);
1240e440aed9SQing Li 					if (!rt)
1241e440aed9SQing Li 						error = ESRCH;
1242e440aed9SQing Li 				}
1243e440aed9SQing Li 			}
1244e440aed9SQing Li 			else
1245e440aed9SQing Li #endif
12468b07e49aSJulian Elischer 			rn = rnh->rnh_lookup(dst, netmask, rnh);
12478b07e49aSJulian Elischer 			error = (rn == NULL ||
12488071913dSRuslan Ermilov 			    (rn->rn_flags & RNF_ROOT) ||
1249d6941ce9SLuigi Rizzo 			    RNTORT(rn)->rt_ifa != ifa ||
125085911824SLuigi Rizzo 			    !sa_equal((struct sockaddr *)rn->rn_key, dst));
1251956b0b65SJeffrey Hsu 			RADIX_NODE_HEAD_UNLOCK(rnh);
1252956b0b65SJeffrey Hsu 			if (error) {
12538b07e49aSJulian Elischer 				/* this is only an error if bad on ALL tables */
12548b07e49aSJulian Elischer 				continue;
1255df8bae1dSRodney W. Grimes 			}
1256b0a76b88SJulian Elischer 		}
1257b0a76b88SJulian Elischer 		/*
1258b0a76b88SJulian Elischer 		 * Do the actual request
1259b0a76b88SJulian Elischer 		 */
12608071913dSRuslan Ermilov 		bzero((caddr_t)&info, sizeof(info));
12618071913dSRuslan Ermilov 		info.rti_ifa = ifa;
12628071913dSRuslan Ermilov 		info.rti_flags = flags | ifa->ifa_flags;
12638071913dSRuslan Ermilov 		info.rti_info[RTAX_DST] = dst;
12646e6b3f7cSQing Li 		/*
12656e6b3f7cSQing Li 		 * doing this for compatibility reasons
12666e6b3f7cSQing Li 		 */
12676e6b3f7cSQing Li 		if (cmd == RTM_ADD)
12686e6b3f7cSQing Li 			info.rti_info[RTAX_GATEWAY] =
12696e6b3f7cSQing Li 			    (struct sockaddr *)&null_sdl;
12706e6b3f7cSQing Li 		else
12718071913dSRuslan Ermilov 			info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
12728071913dSRuslan Ermilov 		info.rti_info[RTAX_NETMASK] = netmask;
12738b07e49aSJulian Elischer 		error = rtrequest1_fib(cmd, &info, &rt, fibnum);
12745aca0b30SLuigi Rizzo 		if (error == 0 && rt != NULL) {
12758071913dSRuslan Ermilov 			/*
12766f99b44cSBrian Somers 			 * notify any listening routing agents of the change
12778071913dSRuslan Ermilov 			 */
1278d1dd20beSSam Leffler 			RT_LOCK(rt);
1279e440aed9SQing Li #ifdef RADIX_MPATH
1280e440aed9SQing Li 			/*
1281e440aed9SQing Li 			 * in case address alias finds the first address
1282e440aed9SQing Li 			 * e.g. ifconfig bge0 192.103.54.246/24
1283e440aed9SQing Li 			 * e.g. ifconfig bge0 192.103.54.247/24
1284e440aed9SQing Li 			 * the address set in the route is 192.103.54.246
1285e440aed9SQing Li 			 * so we need to replace it with 192.103.54.247
1286e440aed9SQing Li 			 */
12878b07e49aSJulian Elischer 			if (memcmp(rt->rt_ifa->ifa_addr,
12888b07e49aSJulian Elischer 			    ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
1289e440aed9SQing Li 				IFAFREE(rt->rt_ifa);
1290e440aed9SQing Li 				IFAREF(ifa);
1291e440aed9SQing Li 				rt->rt_ifp = ifa->ifa_ifp;
1292e440aed9SQing Li 				rt->rt_ifa = ifa;
1293e440aed9SQing Li 			}
1294e440aed9SQing Li #endif
12956e6b3f7cSQing Li 			/*
12966e6b3f7cSQing Li 			 * doing this for compatibility reasons
12976e6b3f7cSQing Li 			 */
12986e6b3f7cSQing Li 			if (cmd == RTM_ADD) {
12996e6b3f7cSQing Li 			    ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type  =
13006e6b3f7cSQing Li 				rt->rt_ifp->if_type;
13016e6b3f7cSQing Li 			    ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
13026e6b3f7cSQing Li 				rt->rt_ifp->if_index;
13036e6b3f7cSQing Li 			}
13048071913dSRuslan Ermilov 			rt_newaddrmsg(cmd, ifa, error, rt);
13058071913dSRuslan Ermilov 			if (cmd == RTM_DELETE) {
1306b0a76b88SJulian Elischer 				/*
13078b07e49aSJulian Elischer 				 * If we are deleting, and we found an entry,
13088b07e49aSJulian Elischer 				 * then it's been removed from the tree..
13098b07e49aSJulian Elischer 				 * now throw it away.
1310b0a76b88SJulian Elischer 				 */
1311d1dd20beSSam Leffler 				RTFREE_LOCKED(rt);
1312d1dd20beSSam Leffler 			} else {
1313d1dd20beSSam Leffler 				if (cmd == RTM_ADD) {
1314b0a76b88SJulian Elischer 					/*
13158b07e49aSJulian Elischer 					 * We just wanted to add it..
13168b07e49aSJulian Elischer 					 * we don't actually need a reference.
1317b0a76b88SJulian Elischer 					 */
13187138d65cSSam Leffler 					RT_REMREF(rt);
1319df8bae1dSRodney W. Grimes 				}
1320d1dd20beSSam Leffler 				RT_UNLOCK(rt);
1321d1dd20beSSam Leffler 			}
13228b07e49aSJulian Elischer 			didwork = 1;
1323df8bae1dSRodney W. Grimes 		}
13248b07e49aSJulian Elischer 		if (error)
13258b07e49aSJulian Elischer 			a_failure = error;
13268b07e49aSJulian Elischer 	}
13278b07e49aSJulian Elischer 	if (cmd == RTM_DELETE) {
13288b07e49aSJulian Elischer 		if (didwork) {
13298b07e49aSJulian Elischer 			error = 0;
13308b07e49aSJulian Elischer 		} else {
13318b07e49aSJulian Elischer 			/* we only give an error if it wasn't in any table */
13328b07e49aSJulian Elischer 			error = ((flags & RTF_HOST) ?
13338b07e49aSJulian Elischer 			    EHOSTUNREACH : ENETUNREACH);
13348b07e49aSJulian Elischer 		}
13358b07e49aSJulian Elischer 	} else {
13368b07e49aSJulian Elischer 		if (a_failure) {
13378b07e49aSJulian Elischer 			/* return an error if any of them failed */
13388b07e49aSJulian Elischer 			error = a_failure;
13398b07e49aSJulian Elischer 		}
13408b07e49aSJulian Elischer 	}
13413ec66d6cSDavid Greenman 	return (error);
13423ec66d6cSDavid Greenman }
1343cb64988fSLuoqi Chen 
13448b07e49aSJulian Elischer /* special one for inet internal use. may not use. */
13458b07e49aSJulian Elischer int
13468b07e49aSJulian Elischer rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
13478b07e49aSJulian Elischer {
13488b07e49aSJulian Elischer 	return (rtinit1(ifa, cmd, flags, -1));
13498b07e49aSJulian Elischer }
13508b07e49aSJulian Elischer 
13518b07e49aSJulian Elischer /*
13528b07e49aSJulian Elischer  * Set up a routing table entry, normally
13538b07e49aSJulian Elischer  * for an interface.
13548b07e49aSJulian Elischer  */
13558b07e49aSJulian Elischer int
13568b07e49aSJulian Elischer rtinit(struct ifaddr *ifa, int cmd, int flags)
13578b07e49aSJulian Elischer {
13588b07e49aSJulian Elischer 	struct sockaddr *dst;
13598b07e49aSJulian Elischer 	int fib = 0;
13608b07e49aSJulian Elischer 
13618b07e49aSJulian Elischer 	if (flags & RTF_HOST) {
13628b07e49aSJulian Elischer 		dst = ifa->ifa_dstaddr;
13638b07e49aSJulian Elischer 	} else {
13648b07e49aSJulian Elischer 		dst = ifa->ifa_addr;
13658b07e49aSJulian Elischer 	}
13668b07e49aSJulian Elischer 
13678b07e49aSJulian Elischer 	if (dst->sa_family == AF_INET)
13688b07e49aSJulian Elischer 		fib = -1;
13698b07e49aSJulian Elischer 	return (rtinit1(ifa, cmd, flags, fib));
13708b07e49aSJulian Elischer }
13718b07e49aSJulian Elischer 
13726a800098SYoshinobu Inoue /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
13736a800098SYoshinobu Inoue SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
1374