xref: /freebsd/sys/net/route.c (revision 8b07e49a008c89a15e1fc4a1e3db6d945f81fab4)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1980, 1986, 1991, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
2942e9e16dSRuslan Ermilov  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
30c3aac50fSPeter Wemm  * $FreeBSD$
31df8bae1dSRodney W. Grimes  */
328b07e49aSJulian Elischer /************************************************************************
338b07e49aSJulian Elischer  * Note: In this file a 'fib' is a "forwarding information base"	*
348b07e49aSJulian Elischer  * Which is the new name for an in kernel routing (next hop) table.	*
358b07e49aSJulian Elischer  ***********************************************************************/
36df8bae1dSRodney W. Grimes 
371d5e9e22SEivind Eklund #include "opt_inet.h"
388b07e49aSJulian Elischer #include "opt_route.h"
394bd49128SPeter Wemm #include "opt_mrouting.h"
40e440aed9SQing Li #include "opt_mpath.h"
414bd49128SPeter Wemm 
42df8bae1dSRodney W. Grimes #include <sys/param.h>
43df8bae1dSRodney W. Grimes #include <sys/systm.h>
444d1d4912SBruce Evans #include <sys/malloc.h>
45df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
46df8bae1dSRodney W. Grimes #include <sys/socket.h>
478b07e49aSJulian Elischer #include <sys/sysctl.h>
488b07e49aSJulian Elischer #include <sys/sysproto.h>
498b07e49aSJulian Elischer #include <sys/proc.h>
50df8bae1dSRodney W. Grimes #include <sys/domain.h>
51cb64988fSLuoqi Chen #include <sys/kernel.h>
52df8bae1dSRodney W. Grimes 
53df8bae1dSRodney W. Grimes #include <net/if.h>
54df8bae1dSRodney W. Grimes #include <net/route.h>
55df8bae1dSRodney W. Grimes 
56e440aed9SQing Li #ifdef RADIX_MPATH
57e440aed9SQing Li #include <net/radix_mpath.h>
58e440aed9SQing Li #endif
59e440aed9SQing Li 
60df8bae1dSRodney W. Grimes #include <netinet/in.h>
61b5e8ce9fSBruce Evans #include <netinet/ip_mroute.h>
62df8bae1dSRodney W. Grimes 
632dc1d581SAndre Oppermann #include <vm/uma.h>
642dc1d581SAndre Oppermann 
658b07e49aSJulian Elischer #ifndef ROUTETABLES
668b07e49aSJulian Elischer  #define RT_NUMFIBS 1
678b07e49aSJulian Elischer  #define RT_MAXFIBS 1
688b07e49aSJulian Elischer #else
698b07e49aSJulian Elischer  /* while we use 4 bits in the mbuf flags,
708b07e49aSJulian Elischer   * we are limited to 16
718b07e49aSJulian Elischer   */
728b07e49aSJulian Elischer  #if ROUTETABLES > RT_MAXFIBS
738b07e49aSJulian Elischer   #define RT_NUMFIBS RT_MAXFIBS
748b07e49aSJulian Elischer   #error "ROUTETABLES defined too big"
758b07e49aSJulian Elischer  #else
768b07e49aSJulian Elischer   #if ROUTETABLES == 0
778b07e49aSJulian Elischer    #define RT_NUMFIBS 1
788b07e49aSJulian Elischer   #else
798b07e49aSJulian Elischer    #define RT_NUMFIBS ROUTETABLES
808b07e49aSJulian Elischer   #endif
818b07e49aSJulian Elischer  #endif
828b07e49aSJulian Elischer #endif
838b07e49aSJulian Elischer 
848b07e49aSJulian Elischer u_int rt_numfibs = RT_NUMFIBS;
858b07e49aSJulian Elischer SYSCTL_INT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
868b07e49aSJulian Elischer /* Eventually this will be a tunable */
878b07e49aSJulian Elischer TUNABLE_INT("net.fibs", &rt_numfibs);
888b07e49aSJulian Elischer 
89f708ef1bSPoul-Henning Kamp static struct rtstat rtstat;
908b07e49aSJulian Elischer 
918b07e49aSJulian Elischer /* by default only the first 'row' of tables will be accessed. */
928b07e49aSJulian Elischer /*
938b07e49aSJulian Elischer  * XXXMRT When we fix netstat, and do this differnetly,
948b07e49aSJulian Elischer  * we can allocate this dynamically. As long as we are keeping
958b07e49aSJulian Elischer  * things backwards compaitble we need to allocate this
968b07e49aSJulian Elischer  * statically.
978b07e49aSJulian Elischer  */
988b07e49aSJulian Elischer struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1];
9928f8db14SBruce Evans 
100f708ef1bSPoul-Henning Kamp static int	rttrash;		/* routes not in table but not freed */
101df8bae1dSRodney W. Grimes 
102929ddbbbSAlfred Perlstein static void rt_maskedcopy(struct sockaddr *,
103929ddbbbSAlfred Perlstein 	    struct sockaddr *, struct sockaddr *);
104f708ef1bSPoul-Henning Kamp 
105d6941ce9SLuigi Rizzo /* compare two sockaddr structures */
106d6941ce9SLuigi Rizzo #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
107d6941ce9SLuigi Rizzo 
108d6941ce9SLuigi Rizzo /*
109d6941ce9SLuigi Rizzo  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
110d6941ce9SLuigi Rizzo  * The operation can be done safely (in this code) because a
111d6941ce9SLuigi Rizzo  * 'struct rtentry' starts with two 'struct radix_node''s, the first
112d6941ce9SLuigi Rizzo  * one representing leaf nodes in the routing tree, which is
113d6941ce9SLuigi Rizzo  * what the code in radix.c passes us as a 'struct radix_node'.
114d6941ce9SLuigi Rizzo  *
115d6941ce9SLuigi Rizzo  * But because there are a lot of assumptions in this conversion,
116d6941ce9SLuigi Rizzo  * do not cast explicitly, but always use the macro below.
117d6941ce9SLuigi Rizzo  */
118d6941ce9SLuigi Rizzo #define RNTORT(p)	((struct rtentry *)(p))
119d6941ce9SLuigi Rizzo 
1208b07e49aSJulian Elischer static uma_zone_t rtzone;		/* Routing table UMA zone. */
1218b07e49aSJulian Elischer 
1228b07e49aSJulian Elischer #if 0
1238b07e49aSJulian Elischer /* default fib for tunnels to use */
1248b07e49aSJulian Elischer u_int tunnel_fib = 0;
1258b07e49aSJulian Elischer SYSCTL_INT(_net, OID_AUTO, tunnelfib, CTLFLAG_RD, &tunnel_fib, 0, "");
1268b07e49aSJulian Elischer #endif
1278b07e49aSJulian Elischer 
1288b07e49aSJulian Elischer /*
1298b07e49aSJulian Elischer  * handler for net.my_fibnum
1308b07e49aSJulian Elischer  */
1318b07e49aSJulian Elischer static int
1328b07e49aSJulian Elischer sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
133df8bae1dSRodney W. Grimes {
1348b07e49aSJulian Elischer         int fibnum;
1358b07e49aSJulian Elischer         int error;
1368b07e49aSJulian Elischer 
1378b07e49aSJulian Elischer         fibnum = curthread->td_proc->p_fibnum;
1388b07e49aSJulian Elischer         error = sysctl_handle_int(oidp, &fibnum, 0, req);
1398b07e49aSJulian Elischer         return (error);
140df8bae1dSRodney W. Grimes }
141df8bae1dSRodney W. Grimes 
1428b07e49aSJulian Elischer SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
1438b07e49aSJulian Elischer             NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
1442dc1d581SAndre Oppermann 
1452eb5613fSLuigi Rizzo static void
1462eb5613fSLuigi Rizzo route_init(void)
147df8bae1dSRodney W. Grimes {
1488b07e49aSJulian Elischer 	int table;
1498b07e49aSJulian Elischer 	struct domain *dom;
1508b07e49aSJulian Elischer 	int fam;
1518b07e49aSJulian Elischer 
1528b07e49aSJulian Elischer 	/* whack teh tunable ints into  line. */
1538b07e49aSJulian Elischer 	if (rt_numfibs > RT_MAXFIBS)
1548b07e49aSJulian Elischer 		rt_numfibs = RT_MAXFIBS;
1558b07e49aSJulian Elischer 	if (rt_numfibs == 0)
1568b07e49aSJulian Elischer 		rt_numfibs = 1;
1572dc1d581SAndre Oppermann 	rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
1582dc1d581SAndre Oppermann 	    NULL, NULL, UMA_ALIGN_PTR, 0);
159df8bae1dSRodney W. Grimes 	rn_init();	/* initialize all zeroes, all ones, mask table */
1608b07e49aSJulian Elischer 
1618b07e49aSJulian Elischer 	for (dom = domains; dom; dom = dom->dom_next) {
1628b07e49aSJulian Elischer 		if (dom->dom_rtattach)  {
1638b07e49aSJulian Elischer 			for  (table = 0; table < rt_numfibs; table++) {
1648b07e49aSJulian Elischer 				if ( (fam = dom->dom_family) == AF_INET ||
1658b07e49aSJulian Elischer 				    table == 0) {
1668b07e49aSJulian Elischer  			        	/* for now only AF_INET has > 1 table */
1678b07e49aSJulian Elischer 					/* XXX MRT
1688b07e49aSJulian Elischer 					 * rtattach will be also called
1698b07e49aSJulian Elischer 					 * from vfs_export.c but the
1708b07e49aSJulian Elischer 					 * offset will be 0
1718b07e49aSJulian Elischer 					 * (only for AF_INET and AF_INET6
1728b07e49aSJulian Elischer 					 * which don't need it anyhow)
1738b07e49aSJulian Elischer 					 */
1748b07e49aSJulian Elischer 					dom->dom_rtattach(
1758b07e49aSJulian Elischer 				    	    (void **)&rt_tables[table][fam],
1768b07e49aSJulian Elischer 				    	    dom->dom_rtoffset);
1778b07e49aSJulian Elischer 				} else {
1788b07e49aSJulian Elischer 					break;
1798b07e49aSJulian Elischer 				}
1808b07e49aSJulian Elischer 			}
1818b07e49aSJulian Elischer 		}
1828b07e49aSJulian Elischer 	}
1838b07e49aSJulian Elischer }
1848b07e49aSJulian Elischer 
1858b07e49aSJulian Elischer #ifndef _SYS_SYSPROTO_H_
1868b07e49aSJulian Elischer struct setfib_args {
1878b07e49aSJulian Elischer 	int     fibnum;
1888b07e49aSJulian Elischer };
1898b07e49aSJulian Elischer #endif
1908b07e49aSJulian Elischer int
1918b07e49aSJulian Elischer setfib(struct thread *td, struct setfib_args *uap)
1928b07e49aSJulian Elischer {
1938b07e49aSJulian Elischer 	if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
1948b07e49aSJulian Elischer 		return EINVAL;
1958b07e49aSJulian Elischer 	td->td_proc->p_fibnum = uap->fibnum;
1968b07e49aSJulian Elischer 	return (0);
197df8bae1dSRodney W. Grimes }
198df8bae1dSRodney W. Grimes 
199df8bae1dSRodney W. Grimes /*
200df8bae1dSRodney W. Grimes  * Packet routing routines.
201df8bae1dSRodney W. Grimes  */
202df8bae1dSRodney W. Grimes void
203d1dd20beSSam Leffler rtalloc(struct route *ro)
204df8bae1dSRodney W. Grimes {
2058b07e49aSJulian Elischer 	rtalloc_ign_fib(ro, 0UL, 0);
2068b07e49aSJulian Elischer }
2078b07e49aSJulian Elischer 
2088b07e49aSJulian Elischer void
2098b07e49aSJulian Elischer rtalloc_fib(struct route *ro, u_int fibnum)
2108b07e49aSJulian Elischer {
2118b07e49aSJulian Elischer 	rtalloc_ign_fib(ro, 0UL, fibnum);
212df8bae1dSRodney W. Grimes }
213df8bae1dSRodney W. Grimes 
214652082e6SGarrett Wollman void
215d1dd20beSSam Leffler rtalloc_ign(struct route *ro, u_long ignore)
216652082e6SGarrett Wollman {
21768f956b8SJohn Polstra 	struct rtentry *rt;
21868f956b8SJohn Polstra 
21968f956b8SJohn Polstra 	if ((rt = ro->ro_rt) != NULL) {
22068f956b8SJohn Polstra 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
22168f956b8SJohn Polstra 			return;
22268f956b8SJohn Polstra 		RTFREE(rt);
22366810dd0SYoshinobu Inoue 		ro->ro_rt = NULL;
22468f956b8SJohn Polstra 	}
2258b07e49aSJulian Elischer 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
2268b07e49aSJulian Elischer 	if (ro->ro_rt)
2278b07e49aSJulian Elischer 		RT_UNLOCK(ro->ro_rt);
2288b07e49aSJulian Elischer }
2298b07e49aSJulian Elischer 
2308b07e49aSJulian Elischer void
2318b07e49aSJulian Elischer rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
2328b07e49aSJulian Elischer {
2338b07e49aSJulian Elischer 	struct rtentry *rt;
2348b07e49aSJulian Elischer 
2358b07e49aSJulian Elischer 	if ((rt = ro->ro_rt) != NULL) {
2368b07e49aSJulian Elischer 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
2378b07e49aSJulian Elischer 			return;
2388b07e49aSJulian Elischer 		RTFREE(rt);
2398b07e49aSJulian Elischer 		ro->ro_rt = NULL;
2408b07e49aSJulian Elischer 	}
2418b07e49aSJulian Elischer 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
242d1dd20beSSam Leffler 	if (ro->ro_rt)
243d1dd20beSSam Leffler 		RT_UNLOCK(ro->ro_rt);
244652082e6SGarrett Wollman }
245652082e6SGarrett Wollman 
246b0a76b88SJulian Elischer /*
247b0a76b88SJulian Elischer  * Look up the route that matches the address given
248b0a76b88SJulian Elischer  * Or, at least try.. Create a cloned route if needed.
249d1dd20beSSam Leffler  *
250d1dd20beSSam Leffler  * The returned route, if any, is locked.
251b0a76b88SJulian Elischer  */
252df8bae1dSRodney W. Grimes struct rtentry *
253d1dd20beSSam Leffler rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
254df8bae1dSRodney W. Grimes {
2558b07e49aSJulian Elischer 	return (rtalloc1_fib(dst, report, ignflags, 0));
2568b07e49aSJulian Elischer }
2578b07e49aSJulian Elischer 
2588b07e49aSJulian Elischer struct rtentry *
2598b07e49aSJulian Elischer rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
2608b07e49aSJulian Elischer 		    u_int fibnum)
2618b07e49aSJulian Elischer {
2628b07e49aSJulian Elischer 	struct radix_node_head *rnh;
263d1dd20beSSam Leffler 	struct rtentry *rt;
264d1dd20beSSam Leffler 	struct radix_node *rn;
265d1dd20beSSam Leffler 	struct rtentry *newrt;
266df8bae1dSRodney W. Grimes 	struct rt_addrinfo info;
267995add1aSGarrett Wollman 	u_long nflags;
268d1dd20beSSam Leffler 	int err = 0, msgtype = RTM_MISS;
269df8bae1dSRodney W. Grimes 
2708b07e49aSJulian Elischer 	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
2718b07e49aSJulian Elischer 	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
2728b07e49aSJulian Elischer 		fibnum = 0;
2738b07e49aSJulian Elischer 	rnh = rt_tables[fibnum][dst->sa_family];
27485911824SLuigi Rizzo 	newrt = NULL;
275b0a76b88SJulian Elischer 	/*
276b0a76b88SJulian Elischer 	 * Look up the address in the table for that Address Family
277b0a76b88SJulian Elischer 	 */
278956b0b65SJeffrey Hsu 	if (rnh == NULL) {
279956b0b65SJeffrey Hsu 		rtstat.rts_unreach++;
280956b0b65SJeffrey Hsu 		goto miss2;
281956b0b65SJeffrey Hsu 	}
282956b0b65SJeffrey Hsu 	RADIX_NODE_HEAD_LOCK(rnh);
283d1dd20beSSam Leffler 	if ((rn = rnh->rnh_matchaddr(dst, rnh)) &&
284d1dd20beSSam Leffler 	    (rn->rn_flags & RNF_ROOT) == 0) {
285b0a76b88SJulian Elischer 		/*
286b0a76b88SJulian Elischer 		 * If we find it and it's not the root node, then
287a0c0e34bSGleb Smirnoff 		 * get a reference on the rtentry associated.
288b0a76b88SJulian Elischer 		 */
289d6941ce9SLuigi Rizzo 		newrt = rt = RNTORT(rn);
290995add1aSGarrett Wollman 		nflags = rt->rt_flags & ~ignflags;
29126d02ca7SAndre Oppermann 		if (report && (nflags & RTF_CLONING)) {
292b0a76b88SJulian Elischer 			/*
293b0a76b88SJulian Elischer 			 * We are apparently adding (report = 0 in delete).
294b0a76b88SJulian Elischer 			 * If it requires that it be cloned, do so.
295b0a76b88SJulian Elischer 			 * (This implies it wasn't a HOST route.)
296b0a76b88SJulian Elischer 			 */
2978b07e49aSJulian Elischer 			err = rtrequest_fib(RTM_RESOLVE, dst, NULL,
2988b07e49aSJulian Elischer 					      NULL, 0, &newrt, fibnum);
299df8bae1dSRodney W. Grimes 			if (err) {
300b0a76b88SJulian Elischer 				/*
301b0a76b88SJulian Elischer 				 * If the cloning didn't succeed, maybe
302b0a76b88SJulian Elischer 				 * what we have will do. Return that.
303b0a76b88SJulian Elischer 				 */
304d1dd20beSSam Leffler 				newrt = rt;		/* existing route */
305d1dd20beSSam Leffler 				RT_LOCK(newrt);
3067138d65cSSam Leffler 				RT_ADDREF(newrt);
307df8bae1dSRodney W. Grimes 				goto miss;
308df8bae1dSRodney W. Grimes 			}
309d1dd20beSSam Leffler 			KASSERT(newrt, ("no route and no error"));
310d1dd20beSSam Leffler 			RT_LOCK(newrt);
311d1dd20beSSam Leffler 			if (newrt->rt_flags & RTF_XRESOLVE) {
312b0a76b88SJulian Elischer 				/*
313b0a76b88SJulian Elischer 				 * If the new route specifies it be
314b0a76b88SJulian Elischer 				 * externally resolved, then go do that.
315b0a76b88SJulian Elischer 				 */
316df8bae1dSRodney W. Grimes 				msgtype = RTM_RESOLVE;
317df8bae1dSRodney W. Grimes 				goto miss;
318df8bae1dSRodney W. Grimes 			}
3198071913dSRuslan Ermilov 			/* Inform listeners of the new route. */
3206f5967c0SBruce Evans 			bzero(&info, sizeof(info));
321d1dd20beSSam Leffler 			info.rti_info[RTAX_DST] = rt_key(newrt);
322d1dd20beSSam Leffler 			info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
323d1dd20beSSam Leffler 			info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
324d1dd20beSSam Leffler 			if (newrt->rt_ifp != NULL) {
3258071913dSRuslan Ermilov 				info.rti_info[RTAX_IFP] =
3264a0d6638SRuslan Ermilov 				    newrt->rt_ifp->if_addr->ifa_addr;
327d1dd20beSSam Leffler 				info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
3288071913dSRuslan Ermilov 			}
329d1dd20beSSam Leffler 			rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
330d1dd20beSSam Leffler 		} else {
331d1dd20beSSam Leffler 			RT_LOCK(newrt);
3327138d65cSSam Leffler 			RT_ADDREF(newrt);
333d1dd20beSSam Leffler 		}
334956b0b65SJeffrey Hsu 		RADIX_NODE_HEAD_UNLOCK(rnh);
335df8bae1dSRodney W. Grimes 	} else {
336b0a76b88SJulian Elischer 		/*
337b0a76b88SJulian Elischer 		 * Either we hit the root or couldn't find any match,
338b0a76b88SJulian Elischer 		 * Which basically means
339b0a76b88SJulian Elischer 		 * "caint get there frm here"
340b0a76b88SJulian Elischer 		 */
341df8bae1dSRodney W. Grimes 		rtstat.rts_unreach++;
342956b0b65SJeffrey Hsu 	miss:
343956b0b65SJeffrey Hsu 		RADIX_NODE_HEAD_UNLOCK(rnh);
344956b0b65SJeffrey Hsu 	miss2:	if (report) {
345b0a76b88SJulian Elischer 			/*
346b0a76b88SJulian Elischer 			 * If required, report the failure to the supervising
347b0a76b88SJulian Elischer 			 * Authorities.
348b0a76b88SJulian Elischer 			 * For a delete, this is not an error. (report == 0)
349b0a76b88SJulian Elischer 			 */
3506f5967c0SBruce Evans 			bzero(&info, sizeof(info));
351df8bae1dSRodney W. Grimes 			info.rti_info[RTAX_DST] = dst;
352df8bae1dSRodney W. Grimes 			rt_missmsg(msgtype, &info, 0, err);
353df8bae1dSRodney W. Grimes 		}
354df8bae1dSRodney W. Grimes 	}
355d1dd20beSSam Leffler 	if (newrt)
356d1dd20beSSam Leffler 		RT_LOCK_ASSERT(newrt);
357df8bae1dSRodney W. Grimes 	return (newrt);
358df8bae1dSRodney W. Grimes }
359df8bae1dSRodney W. Grimes 
360499676dfSJulian Elischer /*
361499676dfSJulian Elischer  * Remove a reference count from an rtentry.
362499676dfSJulian Elischer  * If the count gets low enough, take it out of the routing table
363499676dfSJulian Elischer  */
364df8bae1dSRodney W. Grimes void
365d1dd20beSSam Leffler rtfree(struct rtentry *rt)
366df8bae1dSRodney W. Grimes {
36785911824SLuigi Rizzo 	struct radix_node_head *rnh;
368df8bae1dSRodney W. Grimes 
369a0c0e34bSGleb Smirnoff 	KASSERT(rt != NULL,("%s: NULL rt", __func__));
3708b07e49aSJulian Elischer 	rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
371a0c0e34bSGleb Smirnoff 	KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
372499676dfSJulian Elischer 
373d1dd20beSSam Leffler 	RT_LOCK_ASSERT(rt);
374d1dd20beSSam Leffler 
375499676dfSJulian Elischer 	/*
376a0c0e34bSGleb Smirnoff 	 * The callers should use RTFREE_LOCKED() or RTFREE(), so
377a0c0e34bSGleb Smirnoff 	 * we should come here exactly with the last reference.
378499676dfSJulian Elischer 	 */
3797138d65cSSam Leffler 	RT_REMREF(rt);
380a0c0e34bSGleb Smirnoff 	if (rt->rt_refcnt > 0) {
381335fbc46SPoul-Henning Kamp 		printf("%s: %p has %lu refs\n", __func__, rt, rt->rt_refcnt);
382d1dd20beSSam Leffler 		goto done;
383a0c0e34bSGleb Smirnoff 	}
3849c63e9dbSSam Leffler 
3859c63e9dbSSam Leffler 	/*
3869c63e9dbSSam Leffler 	 * On last reference give the "close method" a chance
3879c63e9dbSSam Leffler 	 * to cleanup private state.  This also permits (for
3889c63e9dbSSam Leffler 	 * IPv4 and IPv6) a chance to decide if the routing table
3899c63e9dbSSam Leffler 	 * entry should be purged immediately or at a later time.
3909c63e9dbSSam Leffler 	 * When an immediate purge is to happen the close routine
3919c63e9dbSSam Leffler 	 * typically calls rtexpunge which clears the RTF_UP flag
3929c63e9dbSSam Leffler 	 * on the entry so that the code below reclaims the storage.
3939c63e9dbSSam Leffler 	 */
394d1dd20beSSam Leffler 	if (rt->rt_refcnt == 0 && rnh->rnh_close)
3955c2dae8eSGarrett Wollman 		rnh->rnh_close((struct radix_node *)rt, rnh);
396499676dfSJulian Elischer 
397499676dfSJulian Elischer 	/*
398499676dfSJulian Elischer 	 * If we are no longer "up" (and ref == 0)
399499676dfSJulian Elischer 	 * then we can free the resources associated
400499676dfSJulian Elischer 	 * with the route.
401499676dfSJulian Elischer 	 */
402d1dd20beSSam Leffler 	if ((rt->rt_flags & RTF_UP) == 0) {
403df8bae1dSRodney W. Grimes 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
404df8bae1dSRodney W. Grimes 			panic("rtfree 2");
405499676dfSJulian Elischer 		/*
406499676dfSJulian Elischer 		 * the rtentry must have been removed from the routing table
407499676dfSJulian Elischer 		 * so it is represented in rttrash.. remove that now.
408499676dfSJulian Elischer 		 */
409df8bae1dSRodney W. Grimes 		rttrash--;
410499676dfSJulian Elischer #ifdef	DIAGNOSTIC
411df8bae1dSRodney W. Grimes 		if (rt->rt_refcnt < 0) {
412623ae52eSPoul-Henning Kamp 			printf("rtfree: %p not freed (neg refs)\n", rt);
413d1dd20beSSam Leffler 			goto done;
414df8bae1dSRodney W. Grimes 		}
415499676dfSJulian Elischer #endif
416499676dfSJulian Elischer 		/*
417499676dfSJulian Elischer 		 * release references on items we hold them on..
418499676dfSJulian Elischer 		 * e.g other routes and ifaddrs.
419499676dfSJulian Elischer 		 */
42019fc74fbSJeffrey Hsu 		if (rt->rt_ifa)
42119fc74fbSJeffrey Hsu 			IFAFREE(rt->rt_ifa);
422d1dd20beSSam Leffler 		rt->rt_parent = NULL;		/* NB: no refcnt on parent */
423499676dfSJulian Elischer 
424499676dfSJulian Elischer 		/*
425499676dfSJulian Elischer 		 * The key is separatly alloc'd so free it (see rt_setgate()).
426499676dfSJulian Elischer 		 * This also frees the gateway, as they are always malloc'd
427499676dfSJulian Elischer 		 * together.
428499676dfSJulian Elischer 		 */
429df8bae1dSRodney W. Grimes 		Free(rt_key(rt));
430499676dfSJulian Elischer 
431499676dfSJulian Elischer 		/*
432499676dfSJulian Elischer 		 * and the rtentry itself of course
433499676dfSJulian Elischer 		 */
434d1dd20beSSam Leffler 		RT_LOCK_DESTROY(rt);
4352dc1d581SAndre Oppermann 		uma_zfree(rtzone, rt);
436d1dd20beSSam Leffler 		return;
437df8bae1dSRodney W. Grimes 	}
438d1dd20beSSam Leffler done:
439d1dd20beSSam Leffler 	RT_UNLOCK(rt);
440df8bae1dSRodney W. Grimes }
441df8bae1dSRodney W. Grimes 
442df8bae1dSRodney W. Grimes 
443df8bae1dSRodney W. Grimes /*
444df8bae1dSRodney W. Grimes  * Force a routing table entry to the specified
445df8bae1dSRodney W. Grimes  * destination to go through the given gateway.
446df8bae1dSRodney W. Grimes  * Normally called as a result of a routing redirect
447df8bae1dSRodney W. Grimes  * message from the network layer.
448df8bae1dSRodney W. Grimes  */
44926f9a767SRodney W. Grimes void
450d1dd20beSSam Leffler rtredirect(struct sockaddr *dst,
451d1dd20beSSam Leffler 	struct sockaddr *gateway,
452d1dd20beSSam Leffler 	struct sockaddr *netmask,
453d1dd20beSSam Leffler 	int flags,
454d1dd20beSSam Leffler 	struct sockaddr *src)
455df8bae1dSRodney W. Grimes {
4568b07e49aSJulian Elischer 	rtredirect_fib(dst, gateway, netmask, flags, src, 0);
4578b07e49aSJulian Elischer }
4588b07e49aSJulian Elischer 
4598b07e49aSJulian Elischer void
4608b07e49aSJulian Elischer rtredirect_fib(struct sockaddr *dst,
4618b07e49aSJulian Elischer 	struct sockaddr *gateway,
4628b07e49aSJulian Elischer 	struct sockaddr *netmask,
4638b07e49aSJulian Elischer 	int flags,
4648b07e49aSJulian Elischer 	struct sockaddr *src,
4658b07e49aSJulian Elischer 	u_int fibnum)
4668b07e49aSJulian Elischer {
4678e7e854cSKip Macy 	struct rtentry *rt, *rt0 = NULL;
468df8bae1dSRodney W. Grimes 	int error = 0;
46985911824SLuigi Rizzo 	short *stat = NULL;
470df8bae1dSRodney W. Grimes 	struct rt_addrinfo info;
471df8bae1dSRodney W. Grimes 	struct ifaddr *ifa;
472df8bae1dSRodney W. Grimes 
473df8bae1dSRodney W. Grimes 	/* verify the gateway is directly reachable */
47485911824SLuigi Rizzo 	if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
475df8bae1dSRodney W. Grimes 		error = ENETUNREACH;
476df8bae1dSRodney W. Grimes 		goto out;
477df8bae1dSRodney W. Grimes 	}
4788b07e49aSJulian Elischer 	rt = rtalloc1_fib(dst, 0, 0UL, fibnum);	/* NB: rt is locked */
479df8bae1dSRodney W. Grimes 	/*
480df8bae1dSRodney W. Grimes 	 * If the redirect isn't from our current router for this dst,
481df8bae1dSRodney W. Grimes 	 * it's either old or wrong.  If it redirects us to ourselves,
482df8bae1dSRodney W. Grimes 	 * we have a routing loop, perhaps as a result of an interface
483df8bae1dSRodney W. Grimes 	 * going down recently.
484df8bae1dSRodney W. Grimes 	 */
485df8bae1dSRodney W. Grimes 	if (!(flags & RTF_DONE) && rt &&
486956b0b65SJeffrey Hsu 	     (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
487df8bae1dSRodney W. Grimes 		error = EINVAL;
488df8bae1dSRodney W. Grimes 	else if (ifa_ifwithaddr(gateway))
489df8bae1dSRodney W. Grimes 		error = EHOSTUNREACH;
490df8bae1dSRodney W. Grimes 	if (error)
491df8bae1dSRodney W. Grimes 		goto done;
492df8bae1dSRodney W. Grimes 	/*
493df8bae1dSRodney W. Grimes 	 * Create a new entry if we just got back a wildcard entry
494df8bae1dSRodney W. Grimes 	 * or the the lookup failed.  This is necessary for hosts
495df8bae1dSRodney W. Grimes 	 * which use routing redirects generated by smart gateways
496df8bae1dSRodney W. Grimes 	 * to dynamically build the routing tables.
497df8bae1dSRodney W. Grimes 	 */
49885911824SLuigi Rizzo 	if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
499df8bae1dSRodney W. Grimes 		goto create;
500df8bae1dSRodney W. Grimes 	/*
501df8bae1dSRodney W. Grimes 	 * Don't listen to the redirect if it's
502df8bae1dSRodney W. Grimes 	 * for a route to an interface.
503df8bae1dSRodney W. Grimes 	 */
504df8bae1dSRodney W. Grimes 	if (rt->rt_flags & RTF_GATEWAY) {
505df8bae1dSRodney W. Grimes 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
506df8bae1dSRodney W. Grimes 			/*
507df8bae1dSRodney W. Grimes 			 * Changing from route to net => route to host.
508df8bae1dSRodney W. Grimes 			 * Create new route, rather than smashing route to net.
509df8bae1dSRodney W. Grimes 			 */
510df8bae1dSRodney W. Grimes 		create:
5118e7e854cSKip Macy 			rt0 = rt;
5128e7e854cSKip Macy 			rt = NULL;
5138e7e854cSKip Macy 
514df8bae1dSRodney W. Grimes 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
5158071913dSRuslan Ermilov 			bzero((caddr_t)&info, sizeof(info));
5168071913dSRuslan Ermilov 			info.rti_info[RTAX_DST] = dst;
5178071913dSRuslan Ermilov 			info.rti_info[RTAX_GATEWAY] = gateway;
5188071913dSRuslan Ermilov 			info.rti_info[RTAX_NETMASK] = netmask;
5198071913dSRuslan Ermilov 			info.rti_ifa = ifa;
5208071913dSRuslan Ermilov 			info.rti_flags = flags;
5218b07e49aSJulian Elischer 			error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
522d1dd20beSSam Leffler 			if (rt != NULL) {
5234de5d90cSSam Leffler 				RT_LOCK(rt);
52429910a5aSKip Macy 				EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
5258071913dSRuslan Ermilov 				flags = rt->rt_flags;
526d1dd20beSSam Leffler 			}
5278e7e854cSKip Macy 			if (rt0)
5288e7e854cSKip Macy 				RTFREE_LOCKED(rt0);
5298e7e854cSKip Macy 
530df8bae1dSRodney W. Grimes 			stat = &rtstat.rts_dynamic;
531df8bae1dSRodney W. Grimes 		} else {
5328e7e854cSKip Macy 			struct rtentry *gwrt;
5338e7e854cSKip Macy 
534df8bae1dSRodney W. Grimes 			/*
535df8bae1dSRodney W. Grimes 			 * Smash the current notion of the gateway to
536df8bae1dSRodney W. Grimes 			 * this destination.  Should check about netmask!!!
537df8bae1dSRodney W. Grimes 			 */
538df8bae1dSRodney W. Grimes 			rt->rt_flags |= RTF_MODIFIED;
539df8bae1dSRodney W. Grimes 			flags |= RTF_MODIFIED;
540df8bae1dSRodney W. Grimes 			stat = &rtstat.rts_newgateway;
541499676dfSJulian Elischer 			/*
542499676dfSJulian Elischer 			 * add the key and gateway (in one malloc'd chunk).
543499676dfSJulian Elischer 			 */
544df8bae1dSRodney W. Grimes 			rt_setgate(rt, rt_key(rt), gateway);
5458e7e854cSKip Macy 			gwrt = rtalloc1(gateway, 1, 0);
54629910a5aSKip Macy 			EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
5478e7e854cSKip Macy 			RTFREE_LOCKED(gwrt);
548df8bae1dSRodney W. Grimes 		}
549df8bae1dSRodney W. Grimes 	} else
550df8bae1dSRodney W. Grimes 		error = EHOSTUNREACH;
551df8bae1dSRodney W. Grimes done:
552d1dd20beSSam Leffler 	if (rt)
5531951e633SJohn Baldwin 		RTFREE_LOCKED(rt);
554df8bae1dSRodney W. Grimes out:
555df8bae1dSRodney W. Grimes 	if (error)
556df8bae1dSRodney W. Grimes 		rtstat.rts_badredirect++;
557df8bae1dSRodney W. Grimes 	else if (stat != NULL)
558df8bae1dSRodney W. Grimes 		(*stat)++;
559df8bae1dSRodney W. Grimes 	bzero((caddr_t)&info, sizeof(info));
560df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_DST] = dst;
561df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_GATEWAY] = gateway;
562df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_NETMASK] = netmask;
563df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_AUTHOR] = src;
564df8bae1dSRodney W. Grimes 	rt_missmsg(RTM_REDIRECT, &info, flags, error);
565df8bae1dSRodney W. Grimes }
566df8bae1dSRodney W. Grimes 
5678b07e49aSJulian Elischer int
5688b07e49aSJulian Elischer rtioctl(u_long req, caddr_t data)
5698b07e49aSJulian Elischer {
5708b07e49aSJulian Elischer 	return (rtioctl_fib(req, data, 0));
5718b07e49aSJulian Elischer }
5728b07e49aSJulian Elischer 
573df8bae1dSRodney W. Grimes /*
574df8bae1dSRodney W. Grimes  * Routing table ioctl interface.
575df8bae1dSRodney W. Grimes  */
576df8bae1dSRodney W. Grimes int
5778b07e49aSJulian Elischer rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
578df8bae1dSRodney W. Grimes {
5795090559bSChristian S.J. Peron 
5805090559bSChristian S.J. Peron 	/*
5815090559bSChristian S.J. Peron 	 * If more ioctl commands are added here, make sure the proper
5825090559bSChristian S.J. Peron 	 * super-user checks are being performed because it is possible for
5835090559bSChristian S.J. Peron 	 * prison-root to make it this far if raw sockets have been enabled
5845090559bSChristian S.J. Peron 	 * in jails.
5855090559bSChristian S.J. Peron 	 */
586623ae52eSPoul-Henning Kamp #ifdef INET
587f0068c4aSGarrett Wollman 	/* Multicast goop, grrr... */
5888b07e49aSJulian Elischer 	return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
589623ae52eSPoul-Henning Kamp #else /* INET */
590623ae52eSPoul-Henning Kamp 	return ENXIO;
591623ae52eSPoul-Henning Kamp #endif /* INET */
592df8bae1dSRodney W. Grimes }
593df8bae1dSRodney W. Grimes 
594df8bae1dSRodney W. Grimes struct ifaddr *
595d1dd20beSSam Leffler ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
596df8bae1dSRodney W. Grimes {
5978b07e49aSJulian Elischer 	return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
5988b07e49aSJulian Elischer }
5998b07e49aSJulian Elischer 
6008b07e49aSJulian Elischer struct ifaddr *
6018b07e49aSJulian Elischer ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
6028b07e49aSJulian Elischer 				u_int fibnum)
6038b07e49aSJulian Elischer {
604df8bae1dSRodney W. Grimes 	register struct ifaddr *ifa;
605e034e82cSQing Li 	int not_found = 0;
606d1dd20beSSam Leffler 
607df8bae1dSRodney W. Grimes 	if ((flags & RTF_GATEWAY) == 0) {
608df8bae1dSRodney W. Grimes 		/*
609df8bae1dSRodney W. Grimes 		 * If we are adding a route to an interface,
610df8bae1dSRodney W. Grimes 		 * and the interface is a pt to pt link
611df8bae1dSRodney W. Grimes 		 * we should search for the destination
612df8bae1dSRodney W. Grimes 		 * as our clue to the interface.  Otherwise
613df8bae1dSRodney W. Grimes 		 * we can use the local address.
614df8bae1dSRodney W. Grimes 		 */
61585911824SLuigi Rizzo 		ifa = NULL;
61685911824SLuigi Rizzo 		if (flags & RTF_HOST)
617df8bae1dSRodney W. Grimes 			ifa = ifa_ifwithdstaddr(dst);
61885911824SLuigi Rizzo 		if (ifa == NULL)
619df8bae1dSRodney W. Grimes 			ifa = ifa_ifwithaddr(gateway);
620df8bae1dSRodney W. Grimes 	} else {
621df8bae1dSRodney W. Grimes 		/*
622df8bae1dSRodney W. Grimes 		 * If we are adding a route to a remote net
623df8bae1dSRodney W. Grimes 		 * or host, the gateway may still be on the
624df8bae1dSRodney W. Grimes 		 * other end of a pt to pt link.
625df8bae1dSRodney W. Grimes 		 */
626df8bae1dSRodney W. Grimes 		ifa = ifa_ifwithdstaddr(gateway);
627df8bae1dSRodney W. Grimes 	}
62885911824SLuigi Rizzo 	if (ifa == NULL)
629df8bae1dSRodney W. Grimes 		ifa = ifa_ifwithnet(gateway);
63085911824SLuigi Rizzo 	if (ifa == NULL) {
6318b07e49aSJulian Elischer 		struct rtentry *rt = rtalloc1_fib(gateway, 0, 0UL, fibnum);
63285911824SLuigi Rizzo 		if (rt == NULL)
63385911824SLuigi Rizzo 			return (NULL);
634e034e82cSQing Li 		/*
635e034e82cSQing Li 		 * dismiss a gateway that is reachable only
636e034e82cSQing Li 		 * through the default router
637e034e82cSQing Li 		 */
638e034e82cSQing Li 		switch (gateway->sa_family) {
639e034e82cSQing Li 		case AF_INET:
640e034e82cSQing Li 			if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
641e034e82cSQing Li 				not_found = 1;
642e034e82cSQing Li 			break;
643e034e82cSQing Li 		case AF_INET6:
644e034e82cSQing Li 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
645e034e82cSQing Li 				not_found = 1;
646e034e82cSQing Li 			break;
647e034e82cSQing Li 		default:
648e034e82cSQing Li 			break;
649e034e82cSQing Li 		}
6507138d65cSSam Leffler 		RT_REMREF(rt);
651d1dd20beSSam Leffler 		RT_UNLOCK(rt);
652e034e82cSQing Li 		if (not_found)
653e034e82cSQing Li 			return (NULL);
65485911824SLuigi Rizzo 		if ((ifa = rt->rt_ifa) == NULL)
65585911824SLuigi Rizzo 			return (NULL);
656df8bae1dSRodney W. Grimes 	}
657df8bae1dSRodney W. Grimes 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
658df8bae1dSRodney W. Grimes 		struct ifaddr *oifa = ifa;
659df8bae1dSRodney W. Grimes 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
66085911824SLuigi Rizzo 		if (ifa == NULL)
661df8bae1dSRodney W. Grimes 			ifa = oifa;
662df8bae1dSRodney W. Grimes 	}
663df8bae1dSRodney W. Grimes 	return (ifa);
664df8bae1dSRodney W. Grimes }
665df8bae1dSRodney W. Grimes 
66685911824SLuigi Rizzo static walktree_f_t rt_fixdelete;
66785911824SLuigi Rizzo static walktree_f_t rt_fixchange;
668cd02a0b7SGarrett Wollman 
669cd02a0b7SGarrett Wollman struct rtfc_arg {
670cd02a0b7SGarrett Wollman 	struct rtentry *rt0;
671cd02a0b7SGarrett Wollman 	struct radix_node_head *rnh;
672cd02a0b7SGarrett Wollman };
67318e1f1f1SGarrett Wollman 
674b0a76b88SJulian Elischer /*
675b0a76b88SJulian Elischer  * Do appropriate manipulations of a routing tree given
676b0a76b88SJulian Elischer  * all the bits of info needed
677b0a76b88SJulian Elischer  */
678df8bae1dSRodney W. Grimes int
679d1dd20beSSam Leffler rtrequest(int req,
680d1dd20beSSam Leffler 	struct sockaddr *dst,
681d1dd20beSSam Leffler 	struct sockaddr *gateway,
682d1dd20beSSam Leffler 	struct sockaddr *netmask,
683d1dd20beSSam Leffler 	int flags,
684d1dd20beSSam Leffler 	struct rtentry **ret_nrt)
685df8bae1dSRodney W. Grimes {
6868b07e49aSJulian Elischer 	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
6878b07e49aSJulian Elischer }
6888b07e49aSJulian Elischer 
6898b07e49aSJulian Elischer int
6908b07e49aSJulian Elischer rtrequest_fib(int req,
6918b07e49aSJulian Elischer 	struct sockaddr *dst,
6928b07e49aSJulian Elischer 	struct sockaddr *gateway,
6938b07e49aSJulian Elischer 	struct sockaddr *netmask,
6948b07e49aSJulian Elischer 	int flags,
6958b07e49aSJulian Elischer 	struct rtentry **ret_nrt,
6968b07e49aSJulian Elischer 	u_int fibnum)
6978b07e49aSJulian Elischer {
6988071913dSRuslan Ermilov 	struct rt_addrinfo info;
6998071913dSRuslan Ermilov 
700ac4a76ebSBjoern A. Zeeb 	if (dst->sa_len == 0)
701ac4a76ebSBjoern A. Zeeb 		return(EINVAL);
702ac4a76ebSBjoern A. Zeeb 
7038071913dSRuslan Ermilov 	bzero((caddr_t)&info, sizeof(info));
7048071913dSRuslan Ermilov 	info.rti_flags = flags;
7058071913dSRuslan Ermilov 	info.rti_info[RTAX_DST] = dst;
7068071913dSRuslan Ermilov 	info.rti_info[RTAX_GATEWAY] = gateway;
7078071913dSRuslan Ermilov 	info.rti_info[RTAX_NETMASK] = netmask;
7088b07e49aSJulian Elischer 	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
7098071913dSRuslan Ermilov }
7108071913dSRuslan Ermilov 
7118071913dSRuslan Ermilov /*
7128071913dSRuslan Ermilov  * These (questionable) definitions of apparent local variables apply
7138071913dSRuslan Ermilov  * to the next two functions.  XXXXXX!!!
7148071913dSRuslan Ermilov  */
7158071913dSRuslan Ermilov #define	dst	info->rti_info[RTAX_DST]
7168071913dSRuslan Ermilov #define	gateway	info->rti_info[RTAX_GATEWAY]
7178071913dSRuslan Ermilov #define	netmask	info->rti_info[RTAX_NETMASK]
7188071913dSRuslan Ermilov #define	ifaaddr	info->rti_info[RTAX_IFA]
7198071913dSRuslan Ermilov #define	ifpaddr	info->rti_info[RTAX_IFP]
7208071913dSRuslan Ermilov #define	flags	info->rti_flags
7218071913dSRuslan Ermilov 
7228071913dSRuslan Ermilov int
723d1dd20beSSam Leffler rt_getifa(struct rt_addrinfo *info)
7248071913dSRuslan Ermilov {
7258b07e49aSJulian Elischer 	return (rt_getifa_fib(info, 0));
7268b07e49aSJulian Elischer }
7278b07e49aSJulian Elischer 
7288b07e49aSJulian Elischer int
7298b07e49aSJulian Elischer rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
7308b07e49aSJulian Elischer {
7318071913dSRuslan Ermilov 	struct ifaddr *ifa;
7328071913dSRuslan Ermilov 	int error = 0;
7338071913dSRuslan Ermilov 
7348071913dSRuslan Ermilov 	/*
7358071913dSRuslan Ermilov 	 * ifp may be specified by sockaddr_dl
7368071913dSRuslan Ermilov 	 * when protocol address is ambiguous.
7378071913dSRuslan Ermilov 	 */
7388071913dSRuslan Ermilov 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
7398071913dSRuslan Ermilov 	    ifpaddr->sa_family == AF_LINK &&
7408071913dSRuslan Ermilov 	    (ifa = ifa_ifwithnet(ifpaddr)) != NULL)
7418071913dSRuslan Ermilov 		info->rti_ifp = ifa->ifa_ifp;
7428071913dSRuslan Ermilov 	if (info->rti_ifa == NULL && ifaaddr != NULL)
7438071913dSRuslan Ermilov 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
7448071913dSRuslan Ermilov 	if (info->rti_ifa == NULL) {
7458071913dSRuslan Ermilov 		struct sockaddr *sa;
7468071913dSRuslan Ermilov 
7478071913dSRuslan Ermilov 		sa = ifaaddr != NULL ? ifaaddr :
7488071913dSRuslan Ermilov 		    (gateway != NULL ? gateway : dst);
7498071913dSRuslan Ermilov 		if (sa != NULL && info->rti_ifp != NULL)
7508071913dSRuslan Ermilov 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
7518071913dSRuslan Ermilov 		else if (dst != NULL && gateway != NULL)
7528b07e49aSJulian Elischer 			info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
7538b07e49aSJulian Elischer 							fibnum);
7548071913dSRuslan Ermilov 		else if (sa != NULL)
7558b07e49aSJulian Elischer 			info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
7568b07e49aSJulian Elischer 							fibnum);
7578071913dSRuslan Ermilov 	}
7588071913dSRuslan Ermilov 	if ((ifa = info->rti_ifa) != NULL) {
7598071913dSRuslan Ermilov 		if (info->rti_ifp == NULL)
7608071913dSRuslan Ermilov 			info->rti_ifp = ifa->ifa_ifp;
7618071913dSRuslan Ermilov 	} else
7628071913dSRuslan Ermilov 		error = ENETUNREACH;
7638071913dSRuslan Ermilov 	return (error);
7648071913dSRuslan Ermilov }
7658071913dSRuslan Ermilov 
7669c63e9dbSSam Leffler /*
7679c63e9dbSSam Leffler  * Expunges references to a route that's about to be reclaimed.
7689c63e9dbSSam Leffler  * The route must be locked.
7699c63e9dbSSam Leffler  */
7709c63e9dbSSam Leffler int
7719c63e9dbSSam Leffler rtexpunge(struct rtentry *rt)
7729c63e9dbSSam Leffler {
7739c63e9dbSSam Leffler 	struct radix_node *rn;
7749c63e9dbSSam Leffler 	struct radix_node_head *rnh;
7759c63e9dbSSam Leffler 	struct ifaddr *ifa;
7769c63e9dbSSam Leffler 	int error = 0;
7779c63e9dbSSam Leffler 
7789c63e9dbSSam Leffler 	RT_LOCK_ASSERT(rt);
7799c63e9dbSSam Leffler #if 0
7809c63e9dbSSam Leffler 	/*
7819c63e9dbSSam Leffler 	 * We cannot assume anything about the reference count
7829c63e9dbSSam Leffler 	 * because protocols call us in many situations; often
7839c63e9dbSSam Leffler 	 * before unwinding references to the table entry.
7849c63e9dbSSam Leffler 	 */
7859c63e9dbSSam Leffler 	KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt));
7869c63e9dbSSam Leffler #endif
7879c63e9dbSSam Leffler 	/*
7889c63e9dbSSam Leffler 	 * Find the correct routing tree to use for this Address Family
7899c63e9dbSSam Leffler 	 */
7908b07e49aSJulian Elischer 	rnh = rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
79185911824SLuigi Rizzo 	if (rnh == NULL)
7929c63e9dbSSam Leffler 		return (EAFNOSUPPORT);
7939c63e9dbSSam Leffler 
7949c63e9dbSSam Leffler 	RADIX_NODE_HEAD_LOCK(rnh);
7959c63e9dbSSam Leffler 
7969c63e9dbSSam Leffler 	/*
7979c63e9dbSSam Leffler 	 * Remove the item from the tree; it should be there,
7989c63e9dbSSam Leffler 	 * but when callers invoke us blindly it may not (sigh).
7999c63e9dbSSam Leffler 	 */
8009c63e9dbSSam Leffler 	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
80185911824SLuigi Rizzo 	if (rn == NULL) {
8029c63e9dbSSam Leffler 		error = ESRCH;
8039c63e9dbSSam Leffler 		goto bad;
8049c63e9dbSSam Leffler 	}
8059c63e9dbSSam Leffler 	KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
8069c63e9dbSSam Leffler 		("unexpected flags 0x%x", rn->rn_flags));
807d6941ce9SLuigi Rizzo 	KASSERT(rt == RNTORT(rn),
8089c63e9dbSSam Leffler 		("lookup mismatch, rt %p rn %p", rt, rn));
8099c63e9dbSSam Leffler 
8109c63e9dbSSam Leffler 	rt->rt_flags &= ~RTF_UP;
8119c63e9dbSSam Leffler 
8129c63e9dbSSam Leffler 	/*
8139c63e9dbSSam Leffler 	 * Now search what's left of the subtree for any cloned
8149c63e9dbSSam Leffler 	 * routes which might have been formed from this node.
8159c63e9dbSSam Leffler 	 */
81626d02ca7SAndre Oppermann 	if ((rt->rt_flags & RTF_CLONING) && rt_mask(rt))
8179c63e9dbSSam Leffler 		rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
8189c63e9dbSSam Leffler 				       rt_fixdelete, rt);
8199c63e9dbSSam Leffler 
8209c63e9dbSSam Leffler 	/*
8219c63e9dbSSam Leffler 	 * Remove any external references we may have.
8229c63e9dbSSam Leffler 	 * This might result in another rtentry being freed if
8239c63e9dbSSam Leffler 	 * we held its last reference.
8249c63e9dbSSam Leffler 	 */
8259c63e9dbSSam Leffler 	if (rt->rt_gwroute) {
826d6941ce9SLuigi Rizzo 		RTFREE(rt->rt_gwroute);
82785911824SLuigi Rizzo 		rt->rt_gwroute = NULL;
8289c63e9dbSSam Leffler 	}
8299c63e9dbSSam Leffler 
8309c63e9dbSSam Leffler 	/*
8319c63e9dbSSam Leffler 	 * Give the protocol a chance to keep things in sync.
8329c63e9dbSSam Leffler 	 */
8339c63e9dbSSam Leffler 	if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
8349c63e9dbSSam Leffler 		struct rt_addrinfo info;
8359c63e9dbSSam Leffler 
8369c63e9dbSSam Leffler 		bzero((caddr_t)&info, sizeof(info));
8379c63e9dbSSam Leffler 		info.rti_flags = rt->rt_flags;
8389c63e9dbSSam Leffler 		info.rti_info[RTAX_DST] = rt_key(rt);
8399c63e9dbSSam Leffler 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
8409c63e9dbSSam Leffler 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
8419c63e9dbSSam Leffler 		ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
8429c63e9dbSSam Leffler 	}
8439c63e9dbSSam Leffler 
8449c63e9dbSSam Leffler 	/*
8459c63e9dbSSam Leffler 	 * one more rtentry floating around that is not
8469c63e9dbSSam Leffler 	 * linked to the routing table.
8479c63e9dbSSam Leffler 	 */
8489c63e9dbSSam Leffler 	rttrash++;
8499c63e9dbSSam Leffler bad:
8509c63e9dbSSam Leffler 	RADIX_NODE_HEAD_UNLOCK(rnh);
8519c63e9dbSSam Leffler 	return (error);
8529c63e9dbSSam Leffler }
8539c63e9dbSSam Leffler 
8548071913dSRuslan Ermilov int
855d1dd20beSSam Leffler rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
8568071913dSRuslan Ermilov {
8578b07e49aSJulian Elischer 	return (rtrequest1_fib(req, info, ret_nrt, 0));
8588b07e49aSJulian Elischer }
8598b07e49aSJulian Elischer 
8608b07e49aSJulian Elischer int
8618b07e49aSJulian Elischer rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
8628b07e49aSJulian Elischer 				u_int fibnum)
8638b07e49aSJulian Elischer {
864d1dd20beSSam Leffler 	int error = 0;
865df8bae1dSRodney W. Grimes 	register struct rtentry *rt;
866df8bae1dSRodney W. Grimes 	register struct radix_node *rn;
867df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
868df8bae1dSRodney W. Grimes 	struct ifaddr *ifa;
869df8bae1dSRodney W. Grimes 	struct sockaddr *ndst;
870df8bae1dSRodney W. Grimes #define senderr(x) { error = x ; goto bad; }
871df8bae1dSRodney W. Grimes 
8728b07e49aSJulian Elischer 	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
8738b07e49aSJulian Elischer 	if (dst->sa_family != AF_INET)	/* Only INET supports > 1 fib now */
8748b07e49aSJulian Elischer 		fibnum = 0;
875b0a76b88SJulian Elischer 	/*
876b0a76b88SJulian Elischer 	 * Find the correct routing tree to use for this Address Family
877b0a76b88SJulian Elischer 	 */
8788b07e49aSJulian Elischer 	rnh = rt_tables[fibnum][dst->sa_family];
87985911824SLuigi Rizzo 	if (rnh == NULL)
880983985c1SJeffrey Hsu 		return (EAFNOSUPPORT);
881956b0b65SJeffrey Hsu 	RADIX_NODE_HEAD_LOCK(rnh);
882b0a76b88SJulian Elischer 	/*
883b0a76b88SJulian Elischer 	 * If we are adding a host route then we don't want to put
88466953138SRuslan Ermilov 	 * a netmask in the tree, nor do we want to clone it.
885b0a76b88SJulian Elischer 	 */
88666953138SRuslan Ermilov 	if (flags & RTF_HOST) {
88785911824SLuigi Rizzo 		netmask = NULL;
88826d02ca7SAndre Oppermann 		flags &= ~RTF_CLONING;
88966953138SRuslan Ermilov 	}
890df8bae1dSRodney W. Grimes 	switch (req) {
891df8bae1dSRodney W. Grimes 	case RTM_DELETE:
892e440aed9SQing Li #ifdef RADIX_MPATH
893e440aed9SQing Li 		/*
894e440aed9SQing Li 		 * if we got multipath routes, we require users to specify
895e440aed9SQing Li 		 * a matching RTAX_GATEWAY.
896e440aed9SQing Li 		 */
897e440aed9SQing Li 		if (rn_mpath_capable(rnh)) {
898e440aed9SQing Li 			struct rtentry *rto = NULL;
899e440aed9SQing Li 
900e440aed9SQing Li 			rn = rnh->rnh_matchaddr(dst, rnh);
901e440aed9SQing Li 			if (rn == NULL)
902e440aed9SQing Li 				senderr(ESRCH);
903e440aed9SQing Li  			rto = rt = RNTORT(rn);
904e440aed9SQing Li 			rt = rt_mpath_matchgate(rt, gateway);
905e440aed9SQing Li 			if (!rt)
906e440aed9SQing Li 				senderr(ESRCH);
907e440aed9SQing Li 			/*
908e440aed9SQing Li 			 * this is the first entry in the chain
909e440aed9SQing Li 			 */
910e440aed9SQing Li 			if (rto == rt) {
911e440aed9SQing Li 				rn = rn_mpath_next((struct radix_node *)rt);
912e440aed9SQing Li 				/*
913e440aed9SQing Li 				 * there is another entry, now it's active
914e440aed9SQing Li 				 */
915e440aed9SQing Li 				if (rn) {
916e440aed9SQing Li 					rto = RNTORT(rn);
917e440aed9SQing Li 					RT_LOCK(rto);
918e440aed9SQing Li 					rto->rt_flags |= RTF_UP;
919e440aed9SQing Li 					RT_UNLOCK(rto);
920e440aed9SQing Li 				} else if (rt->rt_flags & RTF_GATEWAY) {
921e440aed9SQing Li 					/*
922e440aed9SQing Li 					 * For gateway routes, we need to
923e440aed9SQing Li 					 * make sure that we we are deleting
924e440aed9SQing Li 					 * the correct gateway.
925e440aed9SQing Li 					 * rt_mpath_matchgate() does not
926e440aed9SQing Li 					 * check the case when there is only
927e440aed9SQing Li 					 * one route in the chain.
928e440aed9SQing Li 					 */
929e440aed9SQing Li 					if (gateway &&
930e440aed9SQing Li 					    (rt->rt_gateway->sa_len != gateway->sa_len ||
931e440aed9SQing Li 					    memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
932e440aed9SQing Li 						senderr(ESRCH);
933e440aed9SQing Li 				}
934e440aed9SQing Li 				/*
935e440aed9SQing Li 				 * use the normal delete code to remove
936e440aed9SQing Li 				 * the first entry
937e440aed9SQing Li 				 */
938e440aed9SQing Li 				goto normal_rtdel;
939e440aed9SQing Li 			}
940e440aed9SQing Li 			/*
941e440aed9SQing Li 			 * if the entry is 2nd and on up
942e440aed9SQing Li 			 */
943e440aed9SQing Li 			if (!rt_mpath_deldup(rto, rt))
944e440aed9SQing Li 				panic ("rtrequest1: rt_mpath_deldup");
945e440aed9SQing Li 			RT_LOCK(rt);
946e440aed9SQing Li 			RT_ADDREF(rt);
947e440aed9SQing Li 			rt->rt_flags &= ~RTF_UP;
948e440aed9SQing Li 			goto deldone;  /* done with the RTM_DELETE command */
949e440aed9SQing Li 		}
950e440aed9SQing Li 
951e440aed9SQing Li normal_rtdel:
952ea9cd9f2SBjoern A. Zeeb #endif
953b0a76b88SJulian Elischer 		/*
954b0a76b88SJulian Elischer 		 * Remove the item from the tree and return it.
955b0a76b88SJulian Elischer 		 * Complain if it is not there and do no more processing.
956b0a76b88SJulian Elischer 		 */
957d1dd20beSSam Leffler 		rn = rnh->rnh_deladdr(dst, netmask, rnh);
95885911824SLuigi Rizzo 		if (rn == NULL)
959df8bae1dSRodney W. Grimes 			senderr(ESRCH);
960df8bae1dSRodney W. Grimes 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
961df8bae1dSRodney W. Grimes 			panic ("rtrequest delete");
962d6941ce9SLuigi Rizzo 		rt = RNTORT(rn);
963d1dd20beSSam Leffler 		RT_LOCK(rt);
9647138d65cSSam Leffler 		RT_ADDREF(rt);
96571eba915SRuslan Ermilov 		rt->rt_flags &= ~RTF_UP;
966c2bed6a3SGarrett Wollman 
967c2bed6a3SGarrett Wollman 		/*
968c2bed6a3SGarrett Wollman 		 * Now search what's left of the subtree for any cloned
969c2bed6a3SGarrett Wollman 		 * routes which might have been formed from this node.
970c2bed6a3SGarrett Wollman 		 */
97126d02ca7SAndre Oppermann 		if ((rt->rt_flags & RTF_CLONING) &&
972089cdfadSRuslan Ermilov 		    rt_mask(rt)) {
973089cdfadSRuslan Ermilov 			rnh->rnh_walktree_from(rnh, dst, rt_mask(rt),
974c2bed6a3SGarrett Wollman 					       rt_fixdelete, rt);
975c2bed6a3SGarrett Wollman 		}
9763545b048SGarrett Wollman 
977b0a76b88SJulian Elischer 		/*
978b0a76b88SJulian Elischer 		 * Remove any external references we may have.
979b0a76b88SJulian Elischer 		 * This might result in another rtentry being freed if
980dc733423SDag-Erling Smørgrav 		 * we held its last reference.
981b0a76b88SJulian Elischer 		 */
9826ac3b69dSBill Fenner 		if (rt->rt_gwroute) {
983d6941ce9SLuigi Rizzo 			RTFREE(rt->rt_gwroute);
98485911824SLuigi Rizzo 			rt->rt_gwroute = NULL;
9856ac3b69dSBill Fenner 		}
9866ac3b69dSBill Fenner 
9873545b048SGarrett Wollman 		/*
988499676dfSJulian Elischer 		 * give the protocol a chance to keep things in sync.
989b0a76b88SJulian Elischer 		 */
990df8bae1dSRodney W. Grimes 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
9918071913dSRuslan Ermilov 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
992499676dfSJulian Elischer 
993ea9cd9f2SBjoern A. Zeeb #ifdef RADIX_MPATH
994e440aed9SQing Li deldone:
995ea9cd9f2SBjoern A. Zeeb #endif
996b0a76b88SJulian Elischer 		/*
997d6941ce9SLuigi Rizzo 		 * One more rtentry floating around that is not
998d6941ce9SLuigi Rizzo 		 * linked to the routing table. rttrash will be decremented
999d6941ce9SLuigi Rizzo 		 * when RTFREE(rt) is eventually called.
1000499676dfSJulian Elischer 		 */
1001499676dfSJulian Elischer 		rttrash++;
1002499676dfSJulian Elischer 
1003499676dfSJulian Elischer 		/*
1004499676dfSJulian Elischer 		 * If the caller wants it, then it can have it,
1005499676dfSJulian Elischer 		 * but it's up to it to free the rtentry as we won't be
1006499676dfSJulian Elischer 		 * doing it.
1007b0a76b88SJulian Elischer 		 */
1008d1dd20beSSam Leffler 		if (ret_nrt) {
1009df8bae1dSRodney W. Grimes 			*ret_nrt = rt;
1010d1dd20beSSam Leffler 			RT_UNLOCK(rt);
1011d1dd20beSSam Leffler 		} else
1012d1dd20beSSam Leffler 			RTFREE_LOCKED(rt);
1013df8bae1dSRodney W. Grimes 		break;
1014df8bae1dSRodney W. Grimes 
1015df8bae1dSRodney W. Grimes 	case RTM_RESOLVE:
101685911824SLuigi Rizzo 		if (ret_nrt == NULL || (rt = *ret_nrt) == NULL)
1017df8bae1dSRodney W. Grimes 			senderr(EINVAL);
1018df8bae1dSRodney W. Grimes 		ifa = rt->rt_ifa;
1019d1dd20beSSam Leffler 		/* XXX locking? */
10203682d2baSDavid Greenman 		flags = rt->rt_flags &
102126d02ca7SAndre Oppermann 		    ~(RTF_CLONING | RTF_STATIC);
1022995add1aSGarrett Wollman 		flags |= RTF_WASCLONED;
1023df8bae1dSRodney W. Grimes 		gateway = rt->rt_gateway;
102485911824SLuigi Rizzo 		if ((netmask = rt->rt_genmask) == NULL)
1025df8bae1dSRodney W. Grimes 			flags |= RTF_HOST;
1026df8bae1dSRodney W. Grimes 		goto makeroute;
1027df8bae1dSRodney W. Grimes 
1028df8bae1dSRodney W. Grimes 	case RTM_ADD:
10295df72964SGarrett Wollman 		if ((flags & RTF_GATEWAY) && !gateway)
103016a2e0a6SQing Li 			senderr(EINVAL);
103116a2e0a6SQing Li 		if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
103216a2e0a6SQing Li 		    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
103316a2e0a6SQing Li 			senderr(EINVAL);
10345df72964SGarrett Wollman 
10358b07e49aSJulian Elischer 		if (info->rti_ifa == NULL && (error = rt_getifa_fib(info, fibnum)))
10368071913dSRuslan Ermilov 			senderr(error);
10378071913dSRuslan Ermilov 		ifa = info->rti_ifa;
10385df72964SGarrett Wollman 
1039df8bae1dSRodney W. Grimes 	makeroute:
10402dc1d581SAndre Oppermann 		rt = uma_zalloc(rtzone, M_NOWAIT | M_ZERO);
104185911824SLuigi Rizzo 		if (rt == NULL)
1042df8bae1dSRodney W. Grimes 			senderr(ENOBUFS);
1043d1dd20beSSam Leffler 		RT_LOCK_INIT(rt);
1044df8bae1dSRodney W. Grimes 		rt->rt_flags = RTF_UP | flags;
10458b07e49aSJulian Elischer 		rt->rt_fibnum = fibnum;
1046499676dfSJulian Elischer 		/*
1047499676dfSJulian Elischer 		 * Add the gateway. Possibly re-malloc-ing the storage for it
1048499676dfSJulian Elischer 		 * also add the rt_gwroute if possible.
1049499676dfSJulian Elischer 		 */
1050d1dd20beSSam Leffler 		RT_LOCK(rt);
1051831a80b0SMatthew Dillon 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
1052d1dd20beSSam Leffler 			RT_LOCK_DESTROY(rt);
10532dc1d581SAndre Oppermann 			uma_zfree(rtzone, rt);
1054704b0666SBill Fenner 			senderr(error);
1055df8bae1dSRodney W. Grimes 		}
1056499676dfSJulian Elischer 
1057499676dfSJulian Elischer 		/*
1058499676dfSJulian Elischer 		 * point to the (possibly newly malloc'd) dest address.
1059499676dfSJulian Elischer 		 */
1060d1dd20beSSam Leffler 		ndst = (struct sockaddr *)rt_key(rt);
1061499676dfSJulian Elischer 
1062499676dfSJulian Elischer 		/*
1063499676dfSJulian Elischer 		 * make sure it contains the value we want (masked if needed).
1064499676dfSJulian Elischer 		 */
1065df8bae1dSRodney W. Grimes 		if (netmask) {
1066df8bae1dSRodney W. Grimes 			rt_maskedcopy(dst, ndst, netmask);
1067df8bae1dSRodney W. Grimes 		} else
10681838a647SLuigi Rizzo 			bcopy(dst, ndst, dst->sa_len);
10698e718bb4SGarrett Wollman 
10708e718bb4SGarrett Wollman 		/*
1071499676dfSJulian Elischer 		 * Note that we now have a reference to the ifa.
10728e718bb4SGarrett Wollman 		 * This moved from below so that rnh->rnh_addaddr() can
1073499676dfSJulian Elischer 		 * examine the ifa and  ifa->ifa_ifp if it so desires.
10748e718bb4SGarrett Wollman 		 */
107519fc74fbSJeffrey Hsu 		IFAREF(ifa);
10768e718bb4SGarrett Wollman 		rt->rt_ifa = ifa;
10778e718bb4SGarrett Wollman 		rt->rt_ifp = ifa->ifa_ifp;
10788e718bb4SGarrett Wollman 
1079e440aed9SQing Li #ifdef RADIX_MPATH
1080e440aed9SQing Li 		/* do not permit exactly the same dst/mask/gw pair */
1081e440aed9SQing Li 		if (rn_mpath_capable(rnh) &&
1082e440aed9SQing Li 			rt_mpath_conflict(rnh, rt, netmask)) {
1083e440aed9SQing Li 			if (rt->rt_gwroute)
1084e440aed9SQing Li 				RTFREE(rt->rt_gwroute);
1085e440aed9SQing Li 			if (rt->rt_ifa) {
1086e440aed9SQing Li 				IFAFREE(rt->rt_ifa);
1087e440aed9SQing Li 			}
1088e440aed9SQing Li 			Free(rt_key(rt));
1089e440aed9SQing Li 			RT_LOCK_DESTROY(rt);
1090e440aed9SQing Li 			uma_zfree(rtzone, rt);
1091e440aed9SQing Li 			senderr(EEXIST);
1092e440aed9SQing Li 		}
1093e440aed9SQing Li #endif
1094e440aed9SQing Li 
1095d1dd20beSSam Leffler 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
1096d1dd20beSSam Leffler 		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
109785911824SLuigi Rizzo 		if (rn == NULL) {
1098aca1a47cSGarrett Wollman 			struct rtentry *rt2;
1099aca1a47cSGarrett Wollman 			/*
1100aca1a47cSGarrett Wollman 			 * Uh-oh, we already have one of these in the tree.
1101aca1a47cSGarrett Wollman 			 * We do a special hack: if the route that's already
110226d02ca7SAndre Oppermann 			 * there was generated by the cloning mechanism
110326d02ca7SAndre Oppermann 			 * then we just blow it away and retry the insertion
110426d02ca7SAndre Oppermann 			 * of the new one.
1105aca1a47cSGarrett Wollman 			 */
11068b07e49aSJulian Elischer 			rt2 = rtalloc1_fib(dst, 0, 0, fibnum);
1107aca1a47cSGarrett Wollman 			if (rt2 && rt2->rt_parent) {
11089c63e9dbSSam Leffler 				rtexpunge(rt2);
11099c63e9dbSSam Leffler 				RT_UNLOCK(rt2);
1110d1dd20beSSam Leffler 				rn = rnh->rnh_addaddr(ndst, netmask,
1111aca1a47cSGarrett Wollman 						      rnh, rt->rt_nodes);
1112fde327d6SGarrett Wollman 			} else if (rt2) {
1113499676dfSJulian Elischer 				/* undo the extra ref we got */
1114d1dd20beSSam Leffler 				RTFREE_LOCKED(rt2);
1115aca1a47cSGarrett Wollman 			}
1116aca1a47cSGarrett Wollman 		}
1117aca1a47cSGarrett Wollman 
1118499676dfSJulian Elischer 		/*
1119499676dfSJulian Elischer 		 * If it still failed to go into the tree,
1120499676dfSJulian Elischer 		 * then un-make it (this should be a function)
1121499676dfSJulian Elischer 		 */
112285911824SLuigi Rizzo 		if (rn == NULL) {
1123df8bae1dSRodney W. Grimes 			if (rt->rt_gwroute)
1124d1dd20beSSam Leffler 				RTFREE(rt->rt_gwroute);
1125d1dd20beSSam Leffler 			if (rt->rt_ifa)
11268e718bb4SGarrett Wollman 				IFAFREE(rt->rt_ifa);
1127df8bae1dSRodney W. Grimes 			Free(rt_key(rt));
1128d1dd20beSSam Leffler 			RT_LOCK_DESTROY(rt);
11292dc1d581SAndre Oppermann 			uma_zfree(rtzone, rt);
1130df8bae1dSRodney W. Grimes 			senderr(EEXIST);
1131df8bae1dSRodney W. Grimes 		}
1132499676dfSJulian Elischer 
113385911824SLuigi Rizzo 		rt->rt_parent = NULL;
1134771edb14SGarrett Wollman 
1135499676dfSJulian Elischer 		/*
1136499676dfSJulian Elischer 		 * If we got here from RESOLVE, then we are cloning
1137499676dfSJulian Elischer 		 * so clone the rest, and note that we
1138499676dfSJulian Elischer 		 * are a clone (and increment the parent's references)
1139499676dfSJulian Elischer 		 */
1140c2bed6a3SGarrett Wollman 		if (req == RTM_RESOLVE) {
1141d1dd20beSSam Leffler 			KASSERT(ret_nrt && *ret_nrt,
1142d1dd20beSSam Leffler 				("no route to clone from"));
1143df8bae1dSRodney W. Grimes 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
114454e84abbSMike Silbersack 			rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
114526d02ca7SAndre Oppermann 			if ((*ret_nrt)->rt_flags & RTF_CLONING) {
1146d1dd20beSSam Leffler 				/*
1147d1dd20beSSam Leffler 				 * NB: We do not bump the refcnt on the parent
1148d1dd20beSSam Leffler 				 * entry under the assumption that it will
1149d1dd20beSSam Leffler 				 * remain so long as we do.  This is
1150d1dd20beSSam Leffler 				 * important when deleting the parent route
1151d1dd20beSSam Leffler 				 * as this operation requires traversing
1152d1dd20beSSam Leffler 				 * the tree to delete all clones and futzing
1153d1dd20beSSam Leffler 				 * with refcnts requires us to double-lock
1154d1dd20beSSam Leffler 				 * parent through this back reference.
1155d1dd20beSSam Leffler 				 */
1156d1dd20beSSam Leffler 				rt->rt_parent = *ret_nrt;
1157771edb14SGarrett Wollman 			}
115818e1f1f1SGarrett Wollman 		}
1159499676dfSJulian Elischer 
1160499676dfSJulian Elischer 		/*
1161a0c0e34bSGleb Smirnoff 		 * If this protocol has something to add to this then
1162499676dfSJulian Elischer 		 * allow it to do that as well.
1163499676dfSJulian Elischer 		 */
1164df8bae1dSRodney W. Grimes 		if (ifa->ifa_rtrequest)
11658071913dSRuslan Ermilov 			ifa->ifa_rtrequest(req, rt, info);
1166499676dfSJulian Elischer 
1167cd02a0b7SGarrett Wollman 		/*
1168cd02a0b7SGarrett Wollman 		 * We repeat the same procedure from rt_setgate() here because
1169cd02a0b7SGarrett Wollman 		 * it doesn't fire when we call it there because the node
1170cd02a0b7SGarrett Wollman 		 * hasn't been added to the tree yet.
1171cd02a0b7SGarrett Wollman 		 */
117236fea5deSRuslan Ermilov 		if (req == RTM_ADD &&
117385911824SLuigi Rizzo 		    !(rt->rt_flags & RTF_HOST) && rt_mask(rt) != NULL) {
1174cd02a0b7SGarrett Wollman 			struct rtfc_arg arg;
1175cd02a0b7SGarrett Wollman 			arg.rnh = rnh;
1176cd02a0b7SGarrett Wollman 			arg.rt0 = rt;
1177cd02a0b7SGarrett Wollman 			rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
1178cd02a0b7SGarrett Wollman 					       rt_fixchange, &arg);
1179cd02a0b7SGarrett Wollman 		}
1180cd02a0b7SGarrett Wollman 
1181499676dfSJulian Elischer 		/*
1182499676dfSJulian Elischer 		 * actually return a resultant rtentry and
1183499676dfSJulian Elischer 		 * give the caller a single reference.
1184499676dfSJulian Elischer 		 */
1185df8bae1dSRodney W. Grimes 		if (ret_nrt) {
1186df8bae1dSRodney W. Grimes 			*ret_nrt = rt;
11877138d65cSSam Leffler 			RT_ADDREF(rt);
1188df8bae1dSRodney W. Grimes 		}
1189d1dd20beSSam Leffler 		RT_UNLOCK(rt);
1190df8bae1dSRodney W. Grimes 		break;
11918071913dSRuslan Ermilov 	default:
11928071913dSRuslan Ermilov 		error = EOPNOTSUPP;
1193df8bae1dSRodney W. Grimes 	}
1194df8bae1dSRodney W. Grimes bad:
1195956b0b65SJeffrey Hsu 	RADIX_NODE_HEAD_UNLOCK(rnh);
1196df8bae1dSRodney W. Grimes 	return (error);
1197d1dd20beSSam Leffler #undef senderr
1198d1dd20beSSam Leffler }
1199d1dd20beSSam Leffler 
12008071913dSRuslan Ermilov #undef dst
12018071913dSRuslan Ermilov #undef gateway
12028071913dSRuslan Ermilov #undef netmask
12038071913dSRuslan Ermilov #undef ifaaddr
12048071913dSRuslan Ermilov #undef ifpaddr
12058071913dSRuslan Ermilov #undef flags
1206df8bae1dSRodney W. Grimes 
120718e1f1f1SGarrett Wollman /*
120818e1f1f1SGarrett Wollman  * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
120918e1f1f1SGarrett Wollman  * (i.e., the routes related to it by the operation of cloning).  This
1210c2bed6a3SGarrett Wollman  * routine is iterated over all potential former-child-routes by way of
1211c2bed6a3SGarrett Wollman  * rnh->rnh_walktree_from() above, and those that actually are children of
1212c2bed6a3SGarrett Wollman  * the late parent (passed in as VP here) are themselves deleted.
121318e1f1f1SGarrett Wollman  */
1214c2bed6a3SGarrett Wollman static int
1215d1dd20beSSam Leffler rt_fixdelete(struct radix_node *rn, void *vp)
121618e1f1f1SGarrett Wollman {
1217d6941ce9SLuigi Rizzo 	struct rtentry *rt = RNTORT(rn);
1218c2bed6a3SGarrett Wollman 	struct rtentry *rt0 = vp;
121918e1f1f1SGarrett Wollman 
122036fea5deSRuslan Ermilov 	if (rt->rt_parent == rt0 &&
122126d02ca7SAndre Oppermann 	    !(rt->rt_flags & (RTF_PINNED | RTF_CLONING))) {
12228b07e49aSJulian Elischer 		return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL, rt_mask(rt),
12238b07e49aSJulian Elischer 				 rt->rt_flags, NULL, rt->rt_fibnum);
122418e1f1f1SGarrett Wollman 	}
1225c2bed6a3SGarrett Wollman 	return 0;
122618e1f1f1SGarrett Wollman }
122718e1f1f1SGarrett Wollman 
1228cd02a0b7SGarrett Wollman /*
1229cd02a0b7SGarrett Wollman  * This routine is called from rt_setgate() to do the analogous thing for
1230cd02a0b7SGarrett Wollman  * adds and changes.  There is the added complication in this case of a
1231cd02a0b7SGarrett Wollman  * middle insert; i.e., insertion of a new network route between an older
1232cd02a0b7SGarrett Wollman  * network route and (cloned) host routes.  For this reason, a simple check
1233cd02a0b7SGarrett Wollman  * of rt->rt_parent is insufficient; each candidate route must be tested
1234cd02a0b7SGarrett Wollman  * against the (mask, value) of the new route (passed as before in vp)
12359a701516SHajimu UMEMOTO  * to see if the new route matches it.
1236cd02a0b7SGarrett Wollman  *
1237cd02a0b7SGarrett Wollman  * XXX - it may be possible to do fixdelete() for changes and reserve this
1238cd02a0b7SGarrett Wollman  * routine just for adds.  I'm not sure why I thought it was necessary to do
1239cd02a0b7SGarrett Wollman  * changes this way.
1240cd02a0b7SGarrett Wollman  */
1241cd02a0b7SGarrett Wollman 
1242cd02a0b7SGarrett Wollman static int
1243d1dd20beSSam Leffler rt_fixchange(struct radix_node *rn, void *vp)
1244cd02a0b7SGarrett Wollman {
1245d6941ce9SLuigi Rizzo 	struct rtentry *rt = RNTORT(rn);
1246cd02a0b7SGarrett Wollman 	struct rtfc_arg *ap = vp;
1247cd02a0b7SGarrett Wollman 	struct rtentry *rt0 = ap->rt0;
1248cd02a0b7SGarrett Wollman 	struct radix_node_head *rnh = ap->rnh;
12499a701516SHajimu UMEMOTO 	u_char *xk1, *xm1, *xk2, *xmp;
12509a701516SHajimu UMEMOTO 	int i, len, mlen;
1251cd02a0b7SGarrett Wollman 
125285911824SLuigi Rizzo 	/* make sure we have a parent, and route is not pinned or cloning */
125336fea5deSRuslan Ermilov 	if (!rt->rt_parent ||
125485911824SLuigi Rizzo 	    (rt->rt_flags & (RTF_PINNED | RTF_CLONING)))
1255cd02a0b7SGarrett Wollman 		return 0;
1256cd02a0b7SGarrett Wollman 
125785911824SLuigi Rizzo 	if (rt->rt_parent == rt0)	/* parent match */
125885911824SLuigi Rizzo 		goto delete_rt;
1259cd02a0b7SGarrett Wollman 	/*
1260cd02a0b7SGarrett Wollman 	 * There probably is a function somewhere which does this...
1261cd02a0b7SGarrett Wollman 	 * if not, there should be.
1262cd02a0b7SGarrett Wollman 	 */
1263d1dd20beSSam Leffler 	len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
1264cd02a0b7SGarrett Wollman 
1265cd02a0b7SGarrett Wollman 	xk1 = (u_char *)rt_key(rt0);
1266cd02a0b7SGarrett Wollman 	xm1 = (u_char *)rt_mask(rt0);
1267cd02a0b7SGarrett Wollman 	xk2 = (u_char *)rt_key(rt);
1268cd02a0b7SGarrett Wollman 
12699a701516SHajimu UMEMOTO 	/* avoid applying a less specific route */
12709a701516SHajimu UMEMOTO 	xmp = (u_char *)rt_mask(rt->rt_parent);
1271d1dd20beSSam Leffler 	mlen = rt_key(rt->rt_parent)->sa_len;
127285911824SLuigi Rizzo 	if (mlen > rt_key(rt0)->sa_len)		/* less specific route */
12739a701516SHajimu UMEMOTO 		return 0;
127485911824SLuigi Rizzo 	for (i = rnh->rnh_treetop->rn_offset; i < mlen; i++)
127585911824SLuigi Rizzo 		if ((xmp[i] & ~(xmp[i] ^ xm1[i])) != xmp[i])
127685911824SLuigi Rizzo 			return 0;	/* less specific route */
12779a701516SHajimu UMEMOTO 
127885911824SLuigi Rizzo 	for (i = rnh->rnh_treetop->rn_offset; i < len; i++)
127985911824SLuigi Rizzo 		if ((xk2[i] & xm1[i]) != xk1[i])
128085911824SLuigi Rizzo 			return 0;	/* no match */
1281cd02a0b7SGarrett Wollman 
1282cd02a0b7SGarrett Wollman 	/*
1283cd02a0b7SGarrett Wollman 	 * OK, this node is a clone, and matches the node currently being
1284cd02a0b7SGarrett Wollman 	 * changed/added under the node's mask.  So, get rid of it.
1285cd02a0b7SGarrett Wollman 	 */
128685911824SLuigi Rizzo delete_rt:
12878b07e49aSJulian Elischer 	return rtrequest_fib(RTM_DELETE, rt_key(rt), NULL,
12888b07e49aSJulian Elischer 			 rt_mask(rt), rt->rt_flags, NULL, rt->rt_fibnum);
1289cd02a0b7SGarrett Wollman }
1290cd02a0b7SGarrett Wollman 
1291df8bae1dSRodney W. Grimes int
1292d1dd20beSSam Leffler rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
1293df8bae1dSRodney W. Grimes {
1294d1dd20beSSam Leffler 	/* XXX dst may be overwritten, can we move this to below */
12958b07e49aSJulian Elischer 	struct radix_node_head *rnh = rt_tables[rt->rt_fibnum][dst->sa_family];
1296e74642dfSLuigi Rizzo 	int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
1297d1dd20beSSam Leffler 
1298f321ff15SMaxime Henrion again:
1299d1dd20beSSam Leffler 	RT_LOCK_ASSERT(rt);
1300df8bae1dSRodney W. Grimes 
13011db1fffaSBill Fenner 	/*
13021db1fffaSBill Fenner 	 * A host route with the destination equal to the gateway
13031db1fffaSBill Fenner 	 * will interfere with keeping LLINFO in the routing
13041db1fffaSBill Fenner 	 * table, so disallow it.
13051db1fffaSBill Fenner 	 */
1306d1dd20beSSam Leffler 	if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
13071db1fffaSBill Fenner 					(RTF_HOST|RTF_GATEWAY)) &&
1308d1dd20beSSam Leffler 	    dst->sa_len == gate->sa_len &&
1309d1dd20beSSam Leffler 	    bcmp(dst, gate, dst->sa_len) == 0) {
13101db1fffaSBill Fenner 		/*
13111db1fffaSBill Fenner 		 * The route might already exist if this is an RTM_CHANGE
13121db1fffaSBill Fenner 		 * or a routing redirect, so try to delete it.
13131db1fffaSBill Fenner 		 */
1314d1dd20beSSam Leffler 		if (rt_key(rt))
13159c63e9dbSSam Leffler 			rtexpunge(rt);
13161db1fffaSBill Fenner 		return EADDRNOTAVAIL;
13171db1fffaSBill Fenner 	}
13181db1fffaSBill Fenner 
1319499676dfSJulian Elischer 	/*
13202d7e9eadSGleb Smirnoff 	 * Cloning loop avoidance in case of bad configuration.
13212d7e9eadSGleb Smirnoff 	 */
13222d7e9eadSGleb Smirnoff 	if (rt->rt_flags & RTF_GATEWAY) {
13232d7e9eadSGleb Smirnoff 		struct rtentry *gwrt;
13242d7e9eadSGleb Smirnoff 
13252d7e9eadSGleb Smirnoff 		RT_UNLOCK(rt);		/* XXX workaround LOR */
13268b07e49aSJulian Elischer 		gwrt = rtalloc1_fib(gate, 1, 0, rt->rt_fibnum);
13272d7e9eadSGleb Smirnoff 		if (gwrt == rt) {
13282d7e9eadSGleb Smirnoff 			RT_REMREF(rt);
13292d7e9eadSGleb Smirnoff 			return (EADDRINUSE); /* failure */
13302d7e9eadSGleb Smirnoff 		}
1331f321ff15SMaxime Henrion 		/*
1332f321ff15SMaxime Henrion 		 * Try to reacquire the lock on rt, and if it fails,
1333f321ff15SMaxime Henrion 		 * clean state and restart from scratch.
1334f321ff15SMaxime Henrion 		 */
1335f321ff15SMaxime Henrion 		if (!RT_TRYLOCK(rt)) {
1336f321ff15SMaxime Henrion 			RTFREE_LOCKED(gwrt);
13372d7e9eadSGleb Smirnoff 			RT_LOCK(rt);
1338f321ff15SMaxime Henrion 			goto again;
1339f321ff15SMaxime Henrion 		}
13402d7e9eadSGleb Smirnoff 		/*
13412d7e9eadSGleb Smirnoff 		 * If there is already a gwroute, then drop it. If we
13422d7e9eadSGleb Smirnoff 		 * are asked to replace route with itself, then do
13432d7e9eadSGleb Smirnoff 		 * not leak its refcounter.
13442d7e9eadSGleb Smirnoff 		 */
13452d7e9eadSGleb Smirnoff 		if (rt->rt_gwroute != NULL) {
13462d7e9eadSGleb Smirnoff 			if (rt->rt_gwroute == gwrt) {
13472d7e9eadSGleb Smirnoff 				RT_REMREF(rt->rt_gwroute);
13482d7e9eadSGleb Smirnoff 			} else
13492d7e9eadSGleb Smirnoff 				RTFREE(rt->rt_gwroute);
13502d7e9eadSGleb Smirnoff 		}
13512d7e9eadSGleb Smirnoff 
13522d7e9eadSGleb Smirnoff 		if ((rt->rt_gwroute = gwrt) != NULL)
13532d7e9eadSGleb Smirnoff 			RT_UNLOCK(rt->rt_gwroute);
13542d7e9eadSGleb Smirnoff 	}
13552d7e9eadSGleb Smirnoff 
13562d7e9eadSGleb Smirnoff 	/*
135785911824SLuigi Rizzo 	 * Prepare to store the gateway in rt->rt_gateway.
135885911824SLuigi Rizzo 	 * Both dst and gateway are stored one after the other in the same
135985911824SLuigi Rizzo 	 * malloc'd chunk. If we have room, we can reuse the old buffer,
136085911824SLuigi Rizzo 	 * rt_gateway already points to the right place.
136185911824SLuigi Rizzo 	 * Otherwise, malloc a new block and update the 'dst' address.
1362499676dfSJulian Elischer 	 */
136385911824SLuigi Rizzo 	if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
136485911824SLuigi Rizzo 		caddr_t new;
136585911824SLuigi Rizzo 
1366df8bae1dSRodney W. Grimes 		R_Malloc(new, caddr_t, dlen + glen);
136785911824SLuigi Rizzo 		if (new == NULL)
13681db1fffaSBill Fenner 			return ENOBUFS;
1369499676dfSJulian Elischer 		/*
137085911824SLuigi Rizzo 		 * XXX note, we copy from *dst and not *rt_key(rt) because
137185911824SLuigi Rizzo 		 * rt_setgate() can be called to initialize a newly
137285911824SLuigi Rizzo 		 * allocated route entry, in which case rt_key(rt) == NULL
137385911824SLuigi Rizzo 		 * (and also rt->rt_gateway == NULL).
137485911824SLuigi Rizzo 		 * Free()/free() handle a NULL argument just fine.
1375499676dfSJulian Elischer 		 */
13761838a647SLuigi Rizzo 		bcopy(dst, new, dlen);
137785911824SLuigi Rizzo 		Free(rt_key(rt));	/* free old block, if any */
1378445e045bSAlexander Kabaev 		rt_key(rt) = (struct sockaddr *)new;
137985911824SLuigi Rizzo 		rt->rt_gateway = (struct sockaddr *)(new + dlen);
1380df8bae1dSRodney W. Grimes 	}
1381499676dfSJulian Elischer 
1382499676dfSJulian Elischer 	/*
138385911824SLuigi Rizzo 	 * Copy the new gateway value into the memory chunk.
138485911824SLuigi Rizzo 	 */
138585911824SLuigi Rizzo 	bcopy(gate, rt->rt_gateway, glen);
138685911824SLuigi Rizzo 
138785911824SLuigi Rizzo 	/*
1388cd02a0b7SGarrett Wollman 	 * This isn't going to do anything useful for host routes, so
1389cd02a0b7SGarrett Wollman 	 * don't bother.  Also make sure we have a reasonable mask
1390cd02a0b7SGarrett Wollman 	 * (we don't yet have one during adds).
1391cd02a0b7SGarrett Wollman 	 */
1392cd02a0b7SGarrett Wollman 	if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
1393cd02a0b7SGarrett Wollman 		struct rtfc_arg arg;
1394d1dd20beSSam Leffler 
1395cd02a0b7SGarrett Wollman 		arg.rnh = rnh;
1396cd02a0b7SGarrett Wollman 		arg.rt0 = rt;
1397e21afc60SSam Leffler 		RT_UNLOCK(rt);		/* XXX workaround LOR */
1398956b0b65SJeffrey Hsu 		RADIX_NODE_HEAD_LOCK(rnh);
139972b9c8c9SSam Leffler 		RT_LOCK(rt);
1400cd02a0b7SGarrett Wollman 		rnh->rnh_walktree_from(rnh, rt_key(rt), rt_mask(rt),
1401cd02a0b7SGarrett Wollman 				       rt_fixchange, &arg);
1402956b0b65SJeffrey Hsu 		RADIX_NODE_HEAD_UNLOCK(rnh);
1403cd02a0b7SGarrett Wollman 	}
1404cd02a0b7SGarrett Wollman 
1405df8bae1dSRodney W. Grimes 	return 0;
1406df8bae1dSRodney W. Grimes }
1407df8bae1dSRodney W. Grimes 
1408f708ef1bSPoul-Henning Kamp static void
1409d1dd20beSSam Leffler rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
1410df8bae1dSRodney W. Grimes {
1411df8bae1dSRodney W. Grimes 	register u_char *cp1 = (u_char *)src;
1412df8bae1dSRodney W. Grimes 	register u_char *cp2 = (u_char *)dst;
1413df8bae1dSRodney W. Grimes 	register u_char *cp3 = (u_char *)netmask;
1414df8bae1dSRodney W. Grimes 	u_char *cplim = cp2 + *cp3;
1415df8bae1dSRodney W. Grimes 	u_char *cplim2 = cp2 + *cp1;
1416df8bae1dSRodney W. Grimes 
1417df8bae1dSRodney W. Grimes 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1418df8bae1dSRodney W. Grimes 	cp3 += 2;
1419df8bae1dSRodney W. Grimes 	if (cplim > cplim2)
1420df8bae1dSRodney W. Grimes 		cplim = cplim2;
1421df8bae1dSRodney W. Grimes 	while (cp2 < cplim)
1422df8bae1dSRodney W. Grimes 		*cp2++ = *cp1++ & *cp3++;
1423df8bae1dSRodney W. Grimes 	if (cp2 < cplim2)
1424df8bae1dSRodney W. Grimes 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1425df8bae1dSRodney W. Grimes }
1426df8bae1dSRodney W. Grimes 
1427df8bae1dSRodney W. Grimes /*
1428df8bae1dSRodney W. Grimes  * Set up a routing table entry, normally
1429df8bae1dSRodney W. Grimes  * for an interface.
1430df8bae1dSRodney W. Grimes  */
14318b07e49aSJulian Elischer #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
14328b07e49aSJulian Elischer static inline  int
14338b07e49aSJulian Elischer rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
1434df8bae1dSRodney W. Grimes {
14355aca0b30SLuigi Rizzo 	struct sockaddr *dst;
14368071913dSRuslan Ermilov 	struct sockaddr *netmask;
143785911824SLuigi Rizzo 	struct rtentry *rt = NULL;
14388071913dSRuslan Ermilov 	struct rt_addrinfo info;
1439e440aed9SQing Li 	int error = 0;
14408b07e49aSJulian Elischer 	int startfib, endfib;
14418b07e49aSJulian Elischer 	char tempbuf[_SOCKADDR_TMPSIZE];
14428b07e49aSJulian Elischer 	int didwork = 0;
14438b07e49aSJulian Elischer 	int a_failure = 0;
1444df8bae1dSRodney W. Grimes 
14458071913dSRuslan Ermilov 	if (flags & RTF_HOST) {
14468071913dSRuslan Ermilov 		dst = ifa->ifa_dstaddr;
14478071913dSRuslan Ermilov 		netmask = NULL;
14488071913dSRuslan Ermilov 	} else {
14498071913dSRuslan Ermilov 		dst = ifa->ifa_addr;
14508071913dSRuslan Ermilov 		netmask = ifa->ifa_netmask;
14518071913dSRuslan Ermilov 	}
14528b07e49aSJulian Elischer 	if ( dst->sa_family != AF_INET)
14538b07e49aSJulian Elischer 		fibnum = 0;
14548b07e49aSJulian Elischer 	if (fibnum == -1) {
14558b07e49aSJulian Elischer 		startfib = 0;
14568b07e49aSJulian Elischer 		endfib = rt_numfibs - 1;
14578b07e49aSJulian Elischer 	} else {
14588b07e49aSJulian Elischer 		KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
14598b07e49aSJulian Elischer 		startfib = fibnum;
14608b07e49aSJulian Elischer 		endfib = fibnum;
14618b07e49aSJulian Elischer 	}
1462ac4a76ebSBjoern A. Zeeb 	if (dst->sa_len == 0)
1463ac4a76ebSBjoern A. Zeeb 		return(EINVAL);
1464ac4a76ebSBjoern A. Zeeb 
1465b0a76b88SJulian Elischer 	/*
14668b07e49aSJulian Elischer 	 * If it's a delete, check that if it exists,
14678b07e49aSJulian Elischer 	 * it's on the correct interface or we might scrub
14688b07e49aSJulian Elischer 	 * a route to another ifa which would
1469b0a76b88SJulian Elischer 	 * be confusing at best and possibly worse.
1470b0a76b88SJulian Elischer 	 */
1471df8bae1dSRodney W. Grimes 	if (cmd == RTM_DELETE) {
1472b0a76b88SJulian Elischer 		/*
1473b0a76b88SJulian Elischer 		 * It's a delete, so it should already exist..
1474b0a76b88SJulian Elischer 		 * If it's a net, mask off the host bits
1475b0a76b88SJulian Elischer 		 * (Assuming we have a mask)
14768b07e49aSJulian Elischer 		 * XXX this is kinda inet specific..
1477b0a76b88SJulian Elischer 		 */
14788071913dSRuslan Ermilov 		if (netmask != NULL) {
14798b07e49aSJulian Elischer 			rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
14808b07e49aSJulian Elischer 			dst = (struct sockaddr *)tempbuf;
1481df8bae1dSRodney W. Grimes 		}
14828b07e49aSJulian Elischer 	}
14838b07e49aSJulian Elischer 	/*
14848b07e49aSJulian Elischer 	 * Now go through all the requested tables (fibs) and do the
14858b07e49aSJulian Elischer 	 * requested action. Realistically, this will either be fib 0
14868b07e49aSJulian Elischer 	 * for protocols that don't do multiple tables or all the
14878b07e49aSJulian Elischer 	 * tables for those that do. XXX For this version only AF_INET.
14888b07e49aSJulian Elischer 	 * When that changes code should be refactored to protocol
14898b07e49aSJulian Elischer 	 * independent parts and protocol dependent parts.
14908b07e49aSJulian Elischer 	 */
14918b07e49aSJulian Elischer 	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
14928b07e49aSJulian Elischer 		if (cmd == RTM_DELETE) {
14938b07e49aSJulian Elischer 			struct radix_node_head *rnh;
14948b07e49aSJulian Elischer 			struct radix_node *rn;
1495b0a76b88SJulian Elischer 			/*
14968071913dSRuslan Ermilov 			 * Look up an rtentry that is in the routing tree and
14978071913dSRuslan Ermilov 			 * contains the correct info.
1498b0a76b88SJulian Elischer 			 */
14998b07e49aSJulian Elischer 			if ((rnh = rt_tables[fibnum][dst->sa_family]) == NULL)
15008b07e49aSJulian Elischer 				/* this table doesn't exist but others might */
15018b07e49aSJulian Elischer 				continue;
1502956b0b65SJeffrey Hsu 			RADIX_NODE_HEAD_LOCK(rnh);
1503e440aed9SQing Li #ifdef RADIX_MPATH
1504e440aed9SQing Li 			if (rn_mpath_capable(rnh)) {
1505e440aed9SQing Li 
1506e440aed9SQing Li 				rn = rnh->rnh_matchaddr(dst, rnh);
1507e440aed9SQing Li 				if (rn == NULL)
1508e440aed9SQing Li 					error = ESRCH;
1509e440aed9SQing Li 				else {
1510e440aed9SQing Li 					rt = RNTORT(rn);
1511e440aed9SQing Li 					/*
15128b07e49aSJulian Elischer 					 * for interface route the
15138b07e49aSJulian Elischer 					 * rt->rt_gateway is sockaddr_intf
15148b07e49aSJulian Elischer 					 * for cloning ARP entries, so
15158b07e49aSJulian Elischer 					 * rt_mpath_matchgate must use the
15168b07e49aSJulian Elischer 					 * interface address
1517e440aed9SQing Li 					 */
15188b07e49aSJulian Elischer 					rt = rt_mpath_matchgate(rt,
15198b07e49aSJulian Elischer 					    ifa->ifa_addr);
1520e440aed9SQing Li 					if (!rt)
1521e440aed9SQing Li 						error = ESRCH;
1522e440aed9SQing Li 				}
1523e440aed9SQing Li 			}
1524e440aed9SQing Li 			else
1525e440aed9SQing Li #endif
15268b07e49aSJulian Elischer 			rn = rnh->rnh_lookup(dst, netmask, rnh);
15278b07e49aSJulian Elischer 			error = (rn == NULL ||
15288071913dSRuslan Ermilov 			    (rn->rn_flags & RNF_ROOT) ||
1529d6941ce9SLuigi Rizzo 			    RNTORT(rn)->rt_ifa != ifa ||
153085911824SLuigi Rizzo 			    !sa_equal((struct sockaddr *)rn->rn_key, dst));
1531956b0b65SJeffrey Hsu 			RADIX_NODE_HEAD_UNLOCK(rnh);
1532956b0b65SJeffrey Hsu 			if (error) {
15338b07e49aSJulian Elischer 				/* this is only an error if bad on ALL tables */
15348b07e49aSJulian Elischer 				continue;
1535df8bae1dSRodney W. Grimes 			}
1536b0a76b88SJulian Elischer 		}
1537b0a76b88SJulian Elischer 		/*
1538b0a76b88SJulian Elischer 		 * Do the actual request
1539b0a76b88SJulian Elischer 		 */
15408071913dSRuslan Ermilov 		bzero((caddr_t)&info, sizeof(info));
15418071913dSRuslan Ermilov 		info.rti_ifa = ifa;
15428071913dSRuslan Ermilov 		info.rti_flags = flags | ifa->ifa_flags;
15438071913dSRuslan Ermilov 		info.rti_info[RTAX_DST] = dst;
15448071913dSRuslan Ermilov 		info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
15458071913dSRuslan Ermilov 		info.rti_info[RTAX_NETMASK] = netmask;
15468b07e49aSJulian Elischer 		error = rtrequest1_fib(cmd, &info, &rt, fibnum);
15475aca0b30SLuigi Rizzo 		if (error == 0 && rt != NULL) {
15488071913dSRuslan Ermilov 			/*
15496f99b44cSBrian Somers 			 * notify any listening routing agents of the change
15508071913dSRuslan Ermilov 			 */
1551d1dd20beSSam Leffler 			RT_LOCK(rt);
1552e440aed9SQing Li #ifdef RADIX_MPATH
1553e440aed9SQing Li 			/*
1554e440aed9SQing Li 			 * in case address alias finds the first address
1555e440aed9SQing Li 			 * e.g. ifconfig bge0 192.103.54.246/24
1556e440aed9SQing Li 			 * e.g. ifconfig bge0 192.103.54.247/24
1557e440aed9SQing Li 			 * the address set in the route is 192.103.54.246
1558e440aed9SQing Li 			 * so we need to replace it with 192.103.54.247
1559e440aed9SQing Li 			 */
15608b07e49aSJulian Elischer 			if (memcmp(rt->rt_ifa->ifa_addr,
15618b07e49aSJulian Elischer 			    ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
1562e440aed9SQing Li 				IFAFREE(rt->rt_ifa);
1563e440aed9SQing Li 				IFAREF(ifa);
1564e440aed9SQing Li 				rt->rt_ifp = ifa->ifa_ifp;
1565e440aed9SQing Li 				rt->rt_ifa = ifa;
1566e440aed9SQing Li 			}
1567e440aed9SQing Li #endif
15688071913dSRuslan Ermilov 			rt_newaddrmsg(cmd, ifa, error, rt);
15698071913dSRuslan Ermilov 			if (cmd == RTM_DELETE) {
1570b0a76b88SJulian Elischer 				/*
15718b07e49aSJulian Elischer 				 * If we are deleting, and we found an entry,
15728b07e49aSJulian Elischer 				 * then it's been removed from the tree..
15738b07e49aSJulian Elischer 				 * now throw it away.
1574b0a76b88SJulian Elischer 				 */
1575d1dd20beSSam Leffler 				RTFREE_LOCKED(rt);
1576d1dd20beSSam Leffler 			} else {
1577d1dd20beSSam Leffler 				if (cmd == RTM_ADD) {
1578b0a76b88SJulian Elischer 					/*
15798b07e49aSJulian Elischer 					 * We just wanted to add it..
15808b07e49aSJulian Elischer 					 * we don't actually need a reference.
1581b0a76b88SJulian Elischer 					 */
15827138d65cSSam Leffler 					RT_REMREF(rt);
1583df8bae1dSRodney W. Grimes 				}
1584d1dd20beSSam Leffler 				RT_UNLOCK(rt);
1585d1dd20beSSam Leffler 			}
15868b07e49aSJulian Elischer 			didwork = 1;
1587df8bae1dSRodney W. Grimes 		}
15888b07e49aSJulian Elischer 		if (error)
15898b07e49aSJulian Elischer 			a_failure = error;
15908b07e49aSJulian Elischer 	}
15918b07e49aSJulian Elischer 	if (cmd == RTM_DELETE) {
15928b07e49aSJulian Elischer 		if (didwork) {
15938b07e49aSJulian Elischer 			error = 0;
15948b07e49aSJulian Elischer 		} else {
15958b07e49aSJulian Elischer 			/* we only give an error if it wasn't in any table */
15968b07e49aSJulian Elischer 			error = ((flags & RTF_HOST) ?
15978b07e49aSJulian Elischer 			    EHOSTUNREACH : ENETUNREACH);
15988b07e49aSJulian Elischer 		}
15998b07e49aSJulian Elischer 	} else {
16008b07e49aSJulian Elischer 		if (a_failure) {
16018b07e49aSJulian Elischer 			/* return an error if any of them failed */
16028b07e49aSJulian Elischer 			error = a_failure;
16038b07e49aSJulian Elischer 		}
16048b07e49aSJulian Elischer 	}
16053ec66d6cSDavid Greenman 	return (error);
16063ec66d6cSDavid Greenman }
1607cb64988fSLuoqi Chen 
16088b07e49aSJulian Elischer /* special one for inet internal use. may not use. */
16098b07e49aSJulian Elischer int
16108b07e49aSJulian Elischer rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
16118b07e49aSJulian Elischer {
16128b07e49aSJulian Elischer 	return (rtinit1(ifa, cmd, flags, -1));
16138b07e49aSJulian Elischer }
16148b07e49aSJulian Elischer 
16158b07e49aSJulian Elischer /*
16168b07e49aSJulian Elischer  * Set up a routing table entry, normally
16178b07e49aSJulian Elischer  * for an interface.
16188b07e49aSJulian Elischer  */
16198b07e49aSJulian Elischer int
16208b07e49aSJulian Elischer rtinit(struct ifaddr *ifa, int cmd, int flags)
16218b07e49aSJulian Elischer {
16228b07e49aSJulian Elischer 	struct sockaddr *dst;
16238b07e49aSJulian Elischer 	int fib = 0;
16248b07e49aSJulian Elischer 
16258b07e49aSJulian Elischer 	if (flags & RTF_HOST) {
16268b07e49aSJulian Elischer 		dst = ifa->ifa_dstaddr;
16278b07e49aSJulian Elischer 	} else {
16288b07e49aSJulian Elischer 		dst = ifa->ifa_addr;
16298b07e49aSJulian Elischer 	}
16308b07e49aSJulian Elischer 
16318b07e49aSJulian Elischer 	if (dst->sa_family == AF_INET)
16328b07e49aSJulian Elischer 		fib = -1;
16338b07e49aSJulian Elischer 	return (rtinit1(ifa, cmd, flags, fib));
16348b07e49aSJulian Elischer }
16358b07e49aSJulian Elischer 
1636d1dd20beSSam Leffler /*
163776927022SLuigi Rizzo  * rt_check() is invoked on each layer 2 output path, prior to
163876927022SLuigi Rizzo  * encapsulating outbound packets.
163976927022SLuigi Rizzo  *
164076927022SLuigi Rizzo  * The function is mostly used to find a routing entry for the gateway,
164176927022SLuigi Rizzo  * which in some protocol families could also point to the link-level
164276927022SLuigi Rizzo  * address for the gateway itself (the side effect of revalidating the
164376927022SLuigi Rizzo  * route to the destination is rather pointless at this stage, we did it
164476927022SLuigi Rizzo  * already a moment before in the pr_output() routine to locate the ifp
164576927022SLuigi Rizzo  * and gateway to use).
164676927022SLuigi Rizzo  *
164776927022SLuigi Rizzo  * When we remove the layer-3 to layer-2 mapping tables from the
164876927022SLuigi Rizzo  * routing table, this function can be removed.
164976927022SLuigi Rizzo  *
165076927022SLuigi Rizzo  * === On input ===
165176927022SLuigi Rizzo  *   *dst is the address of the NEXT HOP (which coincides with the
165276927022SLuigi Rizzo  *	final destination if directly reachable);
165376927022SLuigi Rizzo  *   *lrt0 points to the cached route to the final destination;
165476927022SLuigi Rizzo  *   *lrt is not meaningful;
16558b07e49aSJulian Elischer  *    fibnum is the index to the correct network fib for this packet
165676927022SLuigi Rizzo  *
165776927022SLuigi Rizzo  * === Operation ===
165876927022SLuigi Rizzo  * If the route is marked down try to find a new route.  If the route
1659d1dd20beSSam Leffler  * to the gateway is gone, try to setup a new route.  Otherwise,
1660d1dd20beSSam Leffler  * if the route is marked for packets to be rejected, enforce that.
1661d1dd20beSSam Leffler  *
166276927022SLuigi Rizzo  * === On return ===
166376927022SLuigi Rizzo  *   *dst is unchanged;
166476927022SLuigi Rizzo  *   *lrt0 points to the (possibly new) route to the final destination
166576927022SLuigi Rizzo  *   *lrt points to the route to the next hop
1666d1dd20beSSam Leffler  *
1667490b9d88SLuigi Rizzo  * Their values are meaningful ONLY if no error is returned.
1668d1dd20beSSam Leffler  */
16697f760c48SMatthew N. Dodd int
1670d1dd20beSSam Leffler rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
16717f760c48SMatthew N. Dodd {
16728b07e49aSJulian Elischer 	return (rt_check_fib(lrt, lrt0, dst, 0));
16738b07e49aSJulian Elischer }
16748b07e49aSJulian Elischer 
16758b07e49aSJulian Elischer int
16768b07e49aSJulian Elischer rt_check_fib(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst,
16778b07e49aSJulian Elischer 		u_int fibnum)
16788b07e49aSJulian Elischer {
16797f760c48SMatthew N. Dodd 	struct rtentry *rt;
16807f760c48SMatthew N. Dodd 	struct rtentry *rt0;
16817f760c48SMatthew N. Dodd 	int error;
16827f760c48SMatthew N. Dodd 
1683530f95fcSGleb Smirnoff 	KASSERT(*lrt0 != NULL, ("rt_check"));
1684530f95fcSGleb Smirnoff 	rt = rt0 = *lrt0;
1685530f95fcSGleb Smirnoff 
1686d1dd20beSSam Leffler 	/* NB: the locking here is tortuous... */
1687d1dd20beSSam Leffler 	RT_LOCK(rt);
1688d1dd20beSSam Leffler 	if ((rt->rt_flags & RTF_UP) == 0) {
1689d1dd20beSSam Leffler 		RT_UNLOCK(rt);
16908b07e49aSJulian Elischer 		rt = rtalloc1_fib(dst, 1, 0UL, fibnum);
16917f760c48SMatthew N. Dodd 		if (rt != NULL) {
16927138d65cSSam Leffler 			RT_REMREF(rt);
1693d4b2657fSSam Leffler 			/* XXX what about if change? */
1694d1dd20beSSam Leffler 		} else
1695a0c0e34bSGleb Smirnoff 			return (EHOSTUNREACH);
1696d1dd20beSSam Leffler 		rt0 = rt;
16977f760c48SMatthew N. Dodd 	}
1698d1dd20beSSam Leffler 	/* XXX BSD/OS checks dst->sa_family != AF_NS */
16997f760c48SMatthew N. Dodd 	if (rt->rt_flags & RTF_GATEWAY) {
170085911824SLuigi Rizzo 		if (rt->rt_gwroute == NULL)
17017f760c48SMatthew N. Dodd 			goto lookup;
17027f760c48SMatthew N. Dodd 		rt = rt->rt_gwroute;
1703d1dd20beSSam Leffler 		RT_LOCK(rt);		/* NB: gwroute */
17047f760c48SMatthew N. Dodd 		if ((rt->rt_flags & RTF_UP) == 0) {
1705a0c0e34bSGleb Smirnoff 			RTFREE_LOCKED(rt);	/* unlock gwroute */
17067f760c48SMatthew N. Dodd 			rt = rt0;
170721b415b2SJohn Baldwin 			rt0->rt_gwroute = NULL;
17087f760c48SMatthew N. Dodd 		lookup:
1709d1dd20beSSam Leffler 			RT_UNLOCK(rt0);
17108b07e49aSJulian Elischer /* XXX MRT link level looked up in table 0 */
17118b07e49aSJulian Elischer 			rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0);
17121a41f910SQing Li 			if (rt == rt0) {
17131a41f910SQing Li 				RT_REMREF(rt0);
17141a41f910SQing Li 				RT_UNLOCK(rt0);
1715a0c0e34bSGleb Smirnoff 				return (ENETUNREACH);
17161a41f910SQing Li 			}
1717d1dd20beSSam Leffler 			RT_LOCK(rt0);
171821b415b2SJohn Baldwin 			if (rt0->rt_gwroute != NULL)
171921b415b2SJohn Baldwin 				RTFREE(rt0->rt_gwroute);
1720d1dd20beSSam Leffler 			rt0->rt_gwroute = rt;
172185911824SLuigi Rizzo 			if (rt == NULL) {
1722d1dd20beSSam Leffler 				RT_UNLOCK(rt0);
1723a0c0e34bSGleb Smirnoff 				return (EHOSTUNREACH);
17247f760c48SMatthew N. Dodd 			}
17257f760c48SMatthew N. Dodd 		}
1726d1dd20beSSam Leffler 		RT_UNLOCK(rt0);
1727d1dd20beSSam Leffler 	}
1728d1dd20beSSam Leffler 	/* XXX why are we inspecting rmx_expire? */
1729d1dd20beSSam Leffler 	error = (rt->rt_flags & RTF_REJECT) &&
1730d1dd20beSSam Leffler 		(rt->rt_rmx.rmx_expire == 0 ||
1731fe53256dSAndre Oppermann 			time_uptime < rt->rt_rmx.rmx_expire);
17329bd8ca30SGleb Smirnoff 	if (error) {
1733d1dd20beSSam Leffler 		RT_UNLOCK(rt);
1734a0c0e34bSGleb Smirnoff 		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
17357f760c48SMatthew N. Dodd 	}
1736530f95fcSGleb Smirnoff 
17379bd8ca30SGleb Smirnoff 	*lrt = rt;
17387f760c48SMatthew N. Dodd 	*lrt0 = rt0;
1739d1dd20beSSam Leffler 	return (0);
17407f760c48SMatthew N. Dodd }
17417f760c48SMatthew N. Dodd 
17426a800098SYoshinobu Inoue /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */
17436a800098SYoshinobu Inoue SYSINIT(route, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
1744