xref: /freebsd/sys/net/route.c (revision 096f27864f5d3059ffd82c3b1c5c4ef56cb627f6)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1980, 1986, 1991, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
2942e9e16dSRuslan Ermilov  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
30c3aac50fSPeter Wemm  * $FreeBSD$
31df8bae1dSRodney W. Grimes  */
328b07e49aSJulian Elischer /************************************************************************
338b07e49aSJulian Elischer  * Note: In this file a 'fib' is a "forwarding information base"	*
348b07e49aSJulian Elischer  * Which is the new name for an in kernel routing (next hop) table.	*
358b07e49aSJulian Elischer  ***********************************************************************/
36df8bae1dSRodney W. Grimes 
371d5e9e22SEivind Eklund #include "opt_inet.h"
38*096f2786SBjoern A. Zeeb #include "opt_inet6.h"
398b07e49aSJulian Elischer #include "opt_route.h"
404bd49128SPeter Wemm #include "opt_mrouting.h"
41e440aed9SQing Li #include "opt_mpath.h"
424bd49128SPeter Wemm 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
456e6b3f7cSQing Li #include <sys/syslog.h>
464d1d4912SBruce Evans #include <sys/malloc.h>
47df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
48df8bae1dSRodney W. Grimes #include <sys/socket.h>
498b07e49aSJulian Elischer #include <sys/sysctl.h>
503120b9d4SKip Macy #include <sys/syslog.h>
518b07e49aSJulian Elischer #include <sys/sysproto.h>
528b07e49aSJulian Elischer #include <sys/proc.h>
53df8bae1dSRodney W. Grimes #include <sys/domain.h>
54cb64988fSLuoqi Chen #include <sys/kernel.h>
55df8bae1dSRodney W. Grimes 
56df8bae1dSRodney W. Grimes #include <net/if.h>
576e6b3f7cSQing Li #include <net/if_dl.h>
58df8bae1dSRodney W. Grimes #include <net/route.h>
59530c0060SRobert Watson #include <net/vnet.h>
60e5c610d6SQing Li #include <net/flowtable.h>
61df8bae1dSRodney W. Grimes 
62e440aed9SQing Li #ifdef RADIX_MPATH
63e440aed9SQing Li #include <net/radix_mpath.h>
64e440aed9SQing Li #endif
65e440aed9SQing Li 
66df8bae1dSRodney W. Grimes #include <netinet/in.h>
67b5e8ce9fSBruce Evans #include <netinet/ip_mroute.h>
68df8bae1dSRodney W. Grimes 
692dc1d581SAndre Oppermann #include <vm/uma.h>
702dc1d581SAndre Oppermann 
718b07e49aSJulian Elischer u_int rt_numfibs = RT_NUMFIBS;
72f88910cdSMatthew D Fleming SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RD, &rt_numfibs, 0, "");
7366e8505fSJulian Elischer /*
7466e8505fSJulian Elischer  * Allow the boot code to allow LESS than RT_MAXFIBS to be used.
7566e8505fSJulian Elischer  * We can't do more because storage is statically allocated for now.
7666e8505fSJulian Elischer  * (for compatibility reasons.. this will change).
7766e8505fSJulian Elischer  */
788b07e49aSJulian Elischer TUNABLE_INT("net.fibs", &rt_numfibs);
798b07e49aSJulian Elischer 
8066e8505fSJulian Elischer /*
8166e8505fSJulian Elischer  * By default add routes to all fibs for new interfaces.
8266e8505fSJulian Elischer  * Once this is set to 0 then only allocate routes on interface
8366e8505fSJulian Elischer  * changes for the FIB of the caller when adding a new set of addresses
8466e8505fSJulian Elischer  * to an interface.  XXX this is a shotgun aproach to a problem that needs
8566e8505fSJulian Elischer  * a more fine grained solution.. that will come.
8666e8505fSJulian Elischer  */
8766e8505fSJulian Elischer u_int rt_add_addr_allfibs = 1;
88f88910cdSMatthew D Fleming SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RW,
8966e8505fSJulian Elischer     &rt_add_addr_allfibs, 0, "");
9066e8505fSJulian Elischer TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
9166e8505fSJulian Elischer 
92eddfbb76SRobert Watson VNET_DEFINE(struct rtstat, rtstat);
931e77c105SRobert Watson #define	V_rtstat	VNET(rtstat)
94b58ea5f3SBjoern A. Zeeb 
9582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct radix_node_head *, rt_tables);
9682cea7e6SBjoern A. Zeeb #define	V_rt_tables	VNET(rt_tables)
9782cea7e6SBjoern A. Zeeb 
9882cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rttrash);		/* routes not in table but not freed */
9982cea7e6SBjoern A. Zeeb #define	V_rttrash	VNET(rttrash)
10082cea7e6SBjoern A. Zeeb 
101bfe1aba4SMarko Zec 
102d6941ce9SLuigi Rizzo /* compare two sockaddr structures */
103d6941ce9SLuigi Rizzo #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
104d6941ce9SLuigi Rizzo 
105d6941ce9SLuigi Rizzo /*
106d6941ce9SLuigi Rizzo  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
107d6941ce9SLuigi Rizzo  * The operation can be done safely (in this code) because a
108d6941ce9SLuigi Rizzo  * 'struct rtentry' starts with two 'struct radix_node''s, the first
109d6941ce9SLuigi Rizzo  * one representing leaf nodes in the routing tree, which is
110d6941ce9SLuigi Rizzo  * what the code in radix.c passes us as a 'struct radix_node'.
111d6941ce9SLuigi Rizzo  *
112d6941ce9SLuigi Rizzo  * But because there are a lot of assumptions in this conversion,
113d6941ce9SLuigi Rizzo  * do not cast explicitly, but always use the macro below.
114d6941ce9SLuigi Rizzo  */
115d6941ce9SLuigi Rizzo #define RNTORT(p)	((struct rtentry *)(p))
116d6941ce9SLuigi Rizzo 
1173e288e62SDimitry Andric static VNET_DEFINE(uma_zone_t, rtzone);		/* Routing table UMA zone. */
11882cea7e6SBjoern A. Zeeb #define	V_rtzone	VNET(rtzone)
11982cea7e6SBjoern A. Zeeb 
1208b07e49aSJulian Elischer /*
1218b07e49aSJulian Elischer  * handler for net.my_fibnum
1228b07e49aSJulian Elischer  */
1238b07e49aSJulian Elischer static int
1248b07e49aSJulian Elischer sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
125df8bae1dSRodney W. Grimes {
1268b07e49aSJulian Elischer         int fibnum;
1278b07e49aSJulian Elischer         int error;
1288b07e49aSJulian Elischer 
1298b07e49aSJulian Elischer         fibnum = curthread->td_proc->p_fibnum;
1308b07e49aSJulian Elischer         error = sysctl_handle_int(oidp, &fibnum, 0, req);
1318b07e49aSJulian Elischer         return (error);
132df8bae1dSRodney W. Grimes }
133df8bae1dSRodney W. Grimes 
1348b07e49aSJulian Elischer SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
1358b07e49aSJulian Elischer             NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
1362dc1d581SAndre Oppermann 
137c2c2a7c1SBjoern A. Zeeb static __inline struct radix_node_head **
138c2c2a7c1SBjoern A. Zeeb rt_tables_get_rnh_ptr(int table, int fam)
139c2c2a7c1SBjoern A. Zeeb {
140c2c2a7c1SBjoern A. Zeeb 	struct radix_node_head **rnh;
141c2c2a7c1SBjoern A. Zeeb 
142c2c2a7c1SBjoern A. Zeeb 	KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
143c2c2a7c1SBjoern A. Zeeb 	    __func__));
144c2c2a7c1SBjoern A. Zeeb 	KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.",
145c2c2a7c1SBjoern A. Zeeb 	    __func__));
146c2c2a7c1SBjoern A. Zeeb 
147c2c2a7c1SBjoern A. Zeeb 	/* rnh is [fib=0][af=0]. */
148c2c2a7c1SBjoern A. Zeeb 	rnh = (struct radix_node_head **)V_rt_tables;
149c2c2a7c1SBjoern A. Zeeb 	/* Get the offset to the requested table and fam. */
150c2c2a7c1SBjoern A. Zeeb 	rnh += table * (AF_MAX+1) + fam;
151c2c2a7c1SBjoern A. Zeeb 
152c2c2a7c1SBjoern A. Zeeb 	return (rnh);
153c2c2a7c1SBjoern A. Zeeb }
154c2c2a7c1SBjoern A. Zeeb 
155c2c2a7c1SBjoern A. Zeeb struct radix_node_head *
156c2c2a7c1SBjoern A. Zeeb rt_tables_get_rnh(int table, int fam)
157c2c2a7c1SBjoern A. Zeeb {
158c2c2a7c1SBjoern A. Zeeb 
159c2c2a7c1SBjoern A. Zeeb 	return (*rt_tables_get_rnh_ptr(table, fam));
160c2c2a7c1SBjoern A. Zeeb }
161c2c2a7c1SBjoern A. Zeeb 
162d0728d71SRobert Watson /*
163d0728d71SRobert Watson  * route initialization must occur before ip6_init2(), which happenas at
164d0728d71SRobert Watson  * SI_ORDER_MIDDLE.
165d0728d71SRobert Watson  */
1662eb5613fSLuigi Rizzo static void
1672eb5613fSLuigi Rizzo route_init(void)
168df8bae1dSRodney W. Grimes {
169614cb839SLuigi Rizzo 	struct domain *dom;
170614cb839SLuigi Rizzo 	int max_keylen = 0;
1718b07e49aSJulian Elischer 
1726f95a5ebSJulian Elischer 	/* whack the tunable ints into  line. */
1738b07e49aSJulian Elischer 	if (rt_numfibs > RT_MAXFIBS)
1748b07e49aSJulian Elischer 		rt_numfibs = RT_MAXFIBS;
1758b07e49aSJulian Elischer 	if (rt_numfibs == 0)
1768b07e49aSJulian Elischer 		rt_numfibs = 1;
177614cb839SLuigi Rizzo 
178614cb839SLuigi Rizzo 	for (dom = domains; dom; dom = dom->dom_next)
179614cb839SLuigi Rizzo 		if (dom->dom_maxrtkey > max_keylen)
180614cb839SLuigi Rizzo 			max_keylen = dom->dom_maxrtkey;
181614cb839SLuigi Rizzo 
182614cb839SLuigi Rizzo 	rn_init(max_keylen);	/* init all zeroes, all ones, mask table */
1831ed81b73SMarko Zec }
184d0728d71SRobert Watson SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, 0);
1851ed81b73SMarko Zec 
186d0728d71SRobert Watson static void
187d0728d71SRobert Watson vnet_route_init(const void *unused __unused)
1881ed81b73SMarko Zec {
1891ed81b73SMarko Zec 	struct domain *dom;
190c2c2a7c1SBjoern A. Zeeb 	struct radix_node_head **rnh;
191c2c2a7c1SBjoern A. Zeeb 	int table;
1921ed81b73SMarko Zec 	int fam;
1931ed81b73SMarko Zec 
194c2c2a7c1SBjoern A. Zeeb 	V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
195c2c2a7c1SBjoern A. Zeeb 	    sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
196c2c2a7c1SBjoern A. Zeeb 
1971ed81b73SMarko Zec 	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
1981ed81b73SMarko Zec 	    NULL, NULL, UMA_ALIGN_PTR, 0);
1998b07e49aSJulian Elischer 	for (dom = domains; dom; dom = dom->dom_next) {
200b680a383SBjoern A. Zeeb 		if (dom->dom_rtattach == NULL)
201b680a383SBjoern A. Zeeb 			continue;
202b680a383SBjoern A. Zeeb 
2038b07e49aSJulian Elischer 		for  (table = 0; table < rt_numfibs; table++) {
204b680a383SBjoern A. Zeeb 			fam = dom->dom_family;
205b680a383SBjoern A. Zeeb 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
206b680a383SBjoern A. Zeeb 				break;
207b680a383SBjoern A. Zeeb 
208b680a383SBjoern A. Zeeb 			/*
209b680a383SBjoern A. Zeeb 			 * XXX MRT rtattach will be also called from
210b680a383SBjoern A. Zeeb 			 * vfs_export.c but the offset will be 0 (only for
211b680a383SBjoern A. Zeeb 			 * AF_INET and AF_INET6 which don't need it anyhow).
2128b07e49aSJulian Elischer 			 */
213c2c2a7c1SBjoern A. Zeeb 			rnh = rt_tables_get_rnh_ptr(table, fam);
214c2c2a7c1SBjoern A. Zeeb 			if (rnh == NULL)
215c2c2a7c1SBjoern A. Zeeb 				panic("%s: rnh NULL", __func__);
216b680a383SBjoern A. Zeeb 			dom->dom_rtattach((void **)rnh, dom->dom_rtoffset);
2178b07e49aSJulian Elischer 		}
2188b07e49aSJulian Elischer 	}
2198b07e49aSJulian Elischer }
220d0728d71SRobert Watson VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
221d0728d71SRobert Watson     vnet_route_init, 0);
2228b07e49aSJulian Elischer 
223bc29160dSMarko Zec #ifdef VIMAGE
224d0728d71SRobert Watson static void
225d0728d71SRobert Watson vnet_route_uninit(const void *unused __unused)
226bc29160dSMarko Zec {
227bc29160dSMarko Zec 	int table;
228bc29160dSMarko Zec 	int fam;
229bc29160dSMarko Zec 	struct domain *dom;
230bc29160dSMarko Zec 	struct radix_node_head **rnh;
231bc29160dSMarko Zec 
232bc29160dSMarko Zec 	for (dom = domains; dom; dom = dom->dom_next) {
233b680a383SBjoern A. Zeeb 		if (dom->dom_rtdetach == NULL)
234b680a383SBjoern A. Zeeb 			continue;
235b680a383SBjoern A. Zeeb 
236bc29160dSMarko Zec 		for (table = 0; table < rt_numfibs; table++) {
237b680a383SBjoern A. Zeeb 			fam = dom->dom_family;
238b680a383SBjoern A. Zeeb 
239b680a383SBjoern A. Zeeb 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
240b680a383SBjoern A. Zeeb 				break;
241b680a383SBjoern A. Zeeb 
242bc29160dSMarko Zec 			rnh = rt_tables_get_rnh_ptr(table, fam);
243bc29160dSMarko Zec 			if (rnh == NULL)
244bc29160dSMarko Zec 				panic("%s: rnh NULL", __func__);
245b680a383SBjoern A. Zeeb 			dom->dom_rtdetach((void **)rnh, dom->dom_rtoffset);
246bc29160dSMarko Zec 		}
247bc29160dSMarko Zec 	}
248bc29160dSMarko Zec }
249d0728d71SRobert Watson VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
250d0728d71SRobert Watson     vnet_route_uninit, 0);
251bc29160dSMarko Zec #endif
252bc29160dSMarko Zec 
2538b07e49aSJulian Elischer #ifndef _SYS_SYSPROTO_H_
2548b07e49aSJulian Elischer struct setfib_args {
2558b07e49aSJulian Elischer 	int     fibnum;
2568b07e49aSJulian Elischer };
2578b07e49aSJulian Elischer #endif
2588b07e49aSJulian Elischer int
2598451d0ddSKip Macy sys_setfib(struct thread *td, struct setfib_args *uap)
2608b07e49aSJulian Elischer {
2618b07e49aSJulian Elischer 	if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
2628b07e49aSJulian Elischer 		return EINVAL;
2638b07e49aSJulian Elischer 	td->td_proc->p_fibnum = uap->fibnum;
2648b07e49aSJulian Elischer 	return (0);
265df8bae1dSRodney W. Grimes }
266df8bae1dSRodney W. Grimes 
267df8bae1dSRodney W. Grimes /*
268df8bae1dSRodney W. Grimes  * Packet routing routines.
269df8bae1dSRodney W. Grimes  */
270df8bae1dSRodney W. Grimes void
271d1dd20beSSam Leffler rtalloc(struct route *ro)
272df8bae1dSRodney W. Grimes {
2738b07e49aSJulian Elischer 	rtalloc_ign_fib(ro, 0UL, 0);
2748b07e49aSJulian Elischer }
2758b07e49aSJulian Elischer 
2768b07e49aSJulian Elischer void
2778b07e49aSJulian Elischer rtalloc_fib(struct route *ro, u_int fibnum)
2788b07e49aSJulian Elischer {
2798b07e49aSJulian Elischer 	rtalloc_ign_fib(ro, 0UL, fibnum);
280df8bae1dSRodney W. Grimes }
281df8bae1dSRodney W. Grimes 
282652082e6SGarrett Wollman void
283d1dd20beSSam Leffler rtalloc_ign(struct route *ro, u_long ignore)
284652082e6SGarrett Wollman {
28568f956b8SJohn Polstra 	struct rtentry *rt;
28668f956b8SJohn Polstra 
28768f956b8SJohn Polstra 	if ((rt = ro->ro_rt) != NULL) {
28868f956b8SJohn Polstra 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
28968f956b8SJohn Polstra 			return;
29068f956b8SJohn Polstra 		RTFREE(rt);
29166810dd0SYoshinobu Inoue 		ro->ro_rt = NULL;
29268f956b8SJohn Polstra 	}
2938b07e49aSJulian Elischer 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, 0);
2948b07e49aSJulian Elischer 	if (ro->ro_rt)
2958b07e49aSJulian Elischer 		RT_UNLOCK(ro->ro_rt);
2968b07e49aSJulian Elischer }
2978b07e49aSJulian Elischer 
2988b07e49aSJulian Elischer void
2998b07e49aSJulian Elischer rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
3008b07e49aSJulian Elischer {
3018b07e49aSJulian Elischer 	struct rtentry *rt;
3028b07e49aSJulian Elischer 
3038b07e49aSJulian Elischer 	if ((rt = ro->ro_rt) != NULL) {
3048b07e49aSJulian Elischer 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
3058b07e49aSJulian Elischer 			return;
3068b07e49aSJulian Elischer 		RTFREE(rt);
3078b07e49aSJulian Elischer 		ro->ro_rt = NULL;
3088b07e49aSJulian Elischer 	}
3098b07e49aSJulian Elischer 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
310d1dd20beSSam Leffler 	if (ro->ro_rt)
311d1dd20beSSam Leffler 		RT_UNLOCK(ro->ro_rt);
312652082e6SGarrett Wollman }
313652082e6SGarrett Wollman 
314b0a76b88SJulian Elischer /*
315b0a76b88SJulian Elischer  * Look up the route that matches the address given
316b0a76b88SJulian Elischer  * Or, at least try.. Create a cloned route if needed.
317d1dd20beSSam Leffler  *
318d1dd20beSSam Leffler  * The returned route, if any, is locked.
319b0a76b88SJulian Elischer  */
320df8bae1dSRodney W. Grimes struct rtentry *
321d1dd20beSSam Leffler rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
322df8bae1dSRodney W. Grimes {
3238b07e49aSJulian Elischer 	return (rtalloc1_fib(dst, report, ignflags, 0));
3248b07e49aSJulian Elischer }
3258b07e49aSJulian Elischer 
3268b07e49aSJulian Elischer struct rtentry *
3278b07e49aSJulian Elischer rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
3288b07e49aSJulian Elischer 		    u_int fibnum)
3298b07e49aSJulian Elischer {
3308b07e49aSJulian Elischer 	struct radix_node_head *rnh;
331d1dd20beSSam Leffler 	struct radix_node *rn;
332d1dd20beSSam Leffler 	struct rtentry *newrt;
333df8bae1dSRodney W. Grimes 	struct rt_addrinfo info;
3346e6b3f7cSQing Li 	int err = 0, msgtype = RTM_MISS;
3353120b9d4SKip Macy 	int needlock;
336df8bae1dSRodney W. Grimes 
3378b07e49aSJulian Elischer 	KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
338b680a383SBjoern A. Zeeb 	switch (dst->sa_family) {
339b680a383SBjoern A. Zeeb 	case AF_INET6:
340b680a383SBjoern A. Zeeb 	case AF_INET:
341b680a383SBjoern A. Zeeb 		/* We support multiple FIBs. */
342b680a383SBjoern A. Zeeb 		break;
343b680a383SBjoern A. Zeeb 	default:
344b680a383SBjoern A. Zeeb 		fibnum = RT_DEFAULT_FIB;
345b680a383SBjoern A. Zeeb 		break;
346b680a383SBjoern A. Zeeb 	}
347c2c2a7c1SBjoern A. Zeeb 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
348e579f1c1SDmitry Chagin 	newrt = NULL;
349523e6002SDmitry Chagin 	if (rnh == NULL)
350523e6002SDmitry Chagin 		goto miss;
351523e6002SDmitry Chagin 
352b0a76b88SJulian Elischer 	/*
353b0a76b88SJulian Elischer 	 * Look up the address in the table for that Address Family
354b0a76b88SJulian Elischer 	 */
3553120b9d4SKip Macy 	needlock = !(ignflags & RTF_RNH_LOCKED);
3563120b9d4SKip Macy 	if (needlock)
3573120b9d4SKip Macy 		RADIX_NODE_HEAD_RLOCK(rnh);
3583120b9d4SKip Macy #ifdef INVARIANTS
3593120b9d4SKip Macy 	else
3603120b9d4SKip Macy 		RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
3613120b9d4SKip Macy #endif
3623120b9d4SKip Macy 	rn = rnh->rnh_matchaddr(dst, rnh);
3633120b9d4SKip Macy 	if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
3646a873ef7SDmitry Chagin 		newrt = RNTORT(rn);
3653120b9d4SKip Macy 		RT_LOCK(newrt);
3663120b9d4SKip Macy 		RT_ADDREF(newrt);
3673120b9d4SKip Macy 		if (needlock)
3683120b9d4SKip Macy 			RADIX_NODE_HEAD_RUNLOCK(rnh);
3693120b9d4SKip Macy 		goto done;
3706e6b3f7cSQing Li 
3716e6b3f7cSQing Li 	} else if (needlock)
3723120b9d4SKip Macy 		RADIX_NODE_HEAD_RUNLOCK(rnh);
3733120b9d4SKip Macy 
374b0a76b88SJulian Elischer 	/*
375b0a76b88SJulian Elischer 	 * Either we hit the root or couldn't find any match,
376b0a76b88SJulian Elischer 	 * Which basically means
377b0a76b88SJulian Elischer 	 * "caint get there frm here"
378b0a76b88SJulian Elischer 	 */
379956b0b65SJeffrey Hsu miss:
380523e6002SDmitry Chagin 	V_rtstat.rts_unreach++;
381523e6002SDmitry Chagin 
3826e6b3f7cSQing Li 	if (report) {
383b0a76b88SJulian Elischer 		/*
384b0a76b88SJulian Elischer 		 * If required, report the failure to the supervising
385b0a76b88SJulian Elischer 		 * Authorities.
386b0a76b88SJulian Elischer 		 * For a delete, this is not an error. (report == 0)
387b0a76b88SJulian Elischer 		 */
3886f5967c0SBruce Evans 		bzero(&info, sizeof(info));
389df8bae1dSRodney W. Grimes 		info.rti_info[RTAX_DST] = dst;
390528737fdSBjoern A. Zeeb 		rt_missmsg_fib(msgtype, &info, 0, err, fibnum);
391df8bae1dSRodney W. Grimes 	}
3923120b9d4SKip Macy done:
393d1dd20beSSam Leffler 	if (newrt)
394d1dd20beSSam Leffler 		RT_LOCK_ASSERT(newrt);
395df8bae1dSRodney W. Grimes 	return (newrt);
396df8bae1dSRodney W. Grimes }
397df8bae1dSRodney W. Grimes 
398499676dfSJulian Elischer /*
399499676dfSJulian Elischer  * Remove a reference count from an rtentry.
400499676dfSJulian Elischer  * If the count gets low enough, take it out of the routing table
401499676dfSJulian Elischer  */
402df8bae1dSRodney W. Grimes void
403d1dd20beSSam Leffler rtfree(struct rtentry *rt)
404df8bae1dSRodney W. Grimes {
40585911824SLuigi Rizzo 	struct radix_node_head *rnh;
406df8bae1dSRodney W. Grimes 
407a0c0e34bSGleb Smirnoff 	KASSERT(rt != NULL,("%s: NULL rt", __func__));
408c2c2a7c1SBjoern A. Zeeb 	rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
409a0c0e34bSGleb Smirnoff 	KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
410499676dfSJulian Elischer 
411d1dd20beSSam Leffler 	RT_LOCK_ASSERT(rt);
412d1dd20beSSam Leffler 
413499676dfSJulian Elischer 	/*
414a0c0e34bSGleb Smirnoff 	 * The callers should use RTFREE_LOCKED() or RTFREE(), so
415a0c0e34bSGleb Smirnoff 	 * we should come here exactly with the last reference.
416499676dfSJulian Elischer 	 */
4177138d65cSSam Leffler 	RT_REMREF(rt);
418a0c0e34bSGleb Smirnoff 	if (rt->rt_refcnt > 0) {
419a42ea597SQing Li 		log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt);
420d1dd20beSSam Leffler 		goto done;
421a0c0e34bSGleb Smirnoff 	}
4229c63e9dbSSam Leffler 
4239c63e9dbSSam Leffler 	/*
4249c63e9dbSSam Leffler 	 * On last reference give the "close method" a chance
4259c63e9dbSSam Leffler 	 * to cleanup private state.  This also permits (for
4269c63e9dbSSam Leffler 	 * IPv4 and IPv6) a chance to decide if the routing table
4279c63e9dbSSam Leffler 	 * entry should be purged immediately or at a later time.
4289c63e9dbSSam Leffler 	 * When an immediate purge is to happen the close routine
4299c63e9dbSSam Leffler 	 * typically calls rtexpunge which clears the RTF_UP flag
4309c63e9dbSSam Leffler 	 * on the entry so that the code below reclaims the storage.
4319c63e9dbSSam Leffler 	 */
432d1dd20beSSam Leffler 	if (rt->rt_refcnt == 0 && rnh->rnh_close)
4335c2dae8eSGarrett Wollman 		rnh->rnh_close((struct radix_node *)rt, rnh);
434499676dfSJulian Elischer 
435499676dfSJulian Elischer 	/*
436499676dfSJulian Elischer 	 * If we are no longer "up" (and ref == 0)
437499676dfSJulian Elischer 	 * then we can free the resources associated
438499676dfSJulian Elischer 	 * with the route.
439499676dfSJulian Elischer 	 */
440d1dd20beSSam Leffler 	if ((rt->rt_flags & RTF_UP) == 0) {
441df8bae1dSRodney W. Grimes 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
442df8bae1dSRodney W. Grimes 			panic("rtfree 2");
443499676dfSJulian Elischer 		/*
444499676dfSJulian Elischer 		 * the rtentry must have been removed from the routing table
445499676dfSJulian Elischer 		 * so it is represented in rttrash.. remove that now.
446499676dfSJulian Elischer 		 */
447603724d3SBjoern A. Zeeb 		V_rttrash--;
448499676dfSJulian Elischer #ifdef	DIAGNOSTIC
449df8bae1dSRodney W. Grimes 		if (rt->rt_refcnt < 0) {
450623ae52eSPoul-Henning Kamp 			printf("rtfree: %p not freed (neg refs)\n", rt);
451d1dd20beSSam Leffler 			goto done;
452df8bae1dSRodney W. Grimes 		}
453499676dfSJulian Elischer #endif
454499676dfSJulian Elischer 		/*
455499676dfSJulian Elischer 		 * release references on items we hold them on..
456499676dfSJulian Elischer 		 * e.g other routes and ifaddrs.
457499676dfSJulian Elischer 		 */
45819fc74fbSJeffrey Hsu 		if (rt->rt_ifa)
4591099f828SRobert Watson 			ifa_free(rt->rt_ifa);
460499676dfSJulian Elischer 		/*
461499676dfSJulian Elischer 		 * The key is separatly alloc'd so free it (see rt_setgate()).
462499676dfSJulian Elischer 		 * This also frees the gateway, as they are always malloc'd
463499676dfSJulian Elischer 		 * together.
464499676dfSJulian Elischer 		 */
465df8bae1dSRodney W. Grimes 		Free(rt_key(rt));
466499676dfSJulian Elischer 
467499676dfSJulian Elischer 		/*
468499676dfSJulian Elischer 		 * and the rtentry itself of course
469499676dfSJulian Elischer 		 */
470d1dd20beSSam Leffler 		RT_LOCK_DESTROY(rt);
4711ed81b73SMarko Zec 		uma_zfree(V_rtzone, rt);
472d1dd20beSSam Leffler 		return;
473df8bae1dSRodney W. Grimes 	}
474d1dd20beSSam Leffler done:
475d1dd20beSSam Leffler 	RT_UNLOCK(rt);
476df8bae1dSRodney W. Grimes }
477df8bae1dSRodney W. Grimes 
478df8bae1dSRodney W. Grimes 
479df8bae1dSRodney W. Grimes /*
480df8bae1dSRodney W. Grimes  * Force a routing table entry to the specified
481df8bae1dSRodney W. Grimes  * destination to go through the given gateway.
482df8bae1dSRodney W. Grimes  * Normally called as a result of a routing redirect
483df8bae1dSRodney W. Grimes  * message from the network layer.
484df8bae1dSRodney W. Grimes  */
48526f9a767SRodney W. Grimes void
486d1dd20beSSam Leffler rtredirect(struct sockaddr *dst,
487d1dd20beSSam Leffler 	struct sockaddr *gateway,
488d1dd20beSSam Leffler 	struct sockaddr *netmask,
489d1dd20beSSam Leffler 	int flags,
490d1dd20beSSam Leffler 	struct sockaddr *src)
491df8bae1dSRodney W. Grimes {
4928b07e49aSJulian Elischer 	rtredirect_fib(dst, gateway, netmask, flags, src, 0);
4938b07e49aSJulian Elischer }
4948b07e49aSJulian Elischer 
4958b07e49aSJulian Elischer void
4968b07e49aSJulian Elischer rtredirect_fib(struct sockaddr *dst,
4978b07e49aSJulian Elischer 	struct sockaddr *gateway,
4988b07e49aSJulian Elischer 	struct sockaddr *netmask,
4998b07e49aSJulian Elischer 	int flags,
5008b07e49aSJulian Elischer 	struct sockaddr *src,
5018b07e49aSJulian Elischer 	u_int fibnum)
5028b07e49aSJulian Elischer {
5038e7e854cSKip Macy 	struct rtentry *rt, *rt0 = NULL;
504df8bae1dSRodney W. Grimes 	int error = 0;
50585911824SLuigi Rizzo 	short *stat = NULL;
506df8bae1dSRodney W. Grimes 	struct rt_addrinfo info;
507df8bae1dSRodney W. Grimes 	struct ifaddr *ifa;
508c2c2a7c1SBjoern A. Zeeb 	struct radix_node_head *rnh;
509c2c2a7c1SBjoern A. Zeeb 
5108c0fec80SRobert Watson 	ifa = NULL;
511c2c2a7c1SBjoern A. Zeeb 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
512c2c2a7c1SBjoern A. Zeeb 	if (rnh == NULL) {
513c2c2a7c1SBjoern A. Zeeb 		error = EAFNOSUPPORT;
514c2c2a7c1SBjoern A. Zeeb 		goto out;
515c2c2a7c1SBjoern A. Zeeb 	}
516df8bae1dSRodney W. Grimes 
517df8bae1dSRodney W. Grimes 	/* verify the gateway is directly reachable */
5180ed6142bSQing Li 	if ((ifa = ifa_ifwithnet(gateway, 0)) == NULL) {
519df8bae1dSRodney W. Grimes 		error = ENETUNREACH;
520df8bae1dSRodney W. Grimes 		goto out;
521df8bae1dSRodney W. Grimes 	}
5228b07e49aSJulian Elischer 	rt = rtalloc1_fib(dst, 0, 0UL, fibnum);	/* NB: rt is locked */
523df8bae1dSRodney W. Grimes 	/*
524df8bae1dSRodney W. Grimes 	 * If the redirect isn't from our current router for this dst,
525df8bae1dSRodney W. Grimes 	 * it's either old or wrong.  If it redirects us to ourselves,
526df8bae1dSRodney W. Grimes 	 * we have a routing loop, perhaps as a result of an interface
527df8bae1dSRodney W. Grimes 	 * going down recently.
528df8bae1dSRodney W. Grimes 	 */
529df8bae1dSRodney W. Grimes 	if (!(flags & RTF_DONE) && rt &&
530956b0b65SJeffrey Hsu 	     (!sa_equal(src, rt->rt_gateway) || rt->rt_ifa != ifa))
531df8bae1dSRodney W. Grimes 		error = EINVAL;
5328896f83aSRobert Watson 	else if (ifa_ifwithaddr_check(gateway))
533df8bae1dSRodney W. Grimes 		error = EHOSTUNREACH;
534df8bae1dSRodney W. Grimes 	if (error)
535df8bae1dSRodney W. Grimes 		goto done;
536df8bae1dSRodney W. Grimes 	/*
537df8bae1dSRodney W. Grimes 	 * Create a new entry if we just got back a wildcard entry
5386bccea7cSRebecca Cran 	 * or the lookup failed.  This is necessary for hosts
539df8bae1dSRodney W. Grimes 	 * which use routing redirects generated by smart gateways
540df8bae1dSRodney W. Grimes 	 * to dynamically build the routing tables.
541df8bae1dSRodney W. Grimes 	 */
54285911824SLuigi Rizzo 	if (rt == NULL || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
543df8bae1dSRodney W. Grimes 		goto create;
544df8bae1dSRodney W. Grimes 	/*
545df8bae1dSRodney W. Grimes 	 * Don't listen to the redirect if it's
546df8bae1dSRodney W. Grimes 	 * for a route to an interface.
547df8bae1dSRodney W. Grimes 	 */
548df8bae1dSRodney W. Grimes 	if (rt->rt_flags & RTF_GATEWAY) {
549df8bae1dSRodney W. Grimes 		if (((rt->rt_flags & RTF_HOST) == 0) && (flags & RTF_HOST)) {
550df8bae1dSRodney W. Grimes 			/*
551df8bae1dSRodney W. Grimes 			 * Changing from route to net => route to host.
552df8bae1dSRodney W. Grimes 			 * Create new route, rather than smashing route to net.
553df8bae1dSRodney W. Grimes 			 */
554df8bae1dSRodney W. Grimes 		create:
5558e7e854cSKip Macy 			rt0 = rt;
5568e7e854cSKip Macy 			rt = NULL;
5578e7e854cSKip Macy 
558df8bae1dSRodney W. Grimes 			flags |=  RTF_GATEWAY | RTF_DYNAMIC;
5598071913dSRuslan Ermilov 			bzero((caddr_t)&info, sizeof(info));
5608071913dSRuslan Ermilov 			info.rti_info[RTAX_DST] = dst;
5618071913dSRuslan Ermilov 			info.rti_info[RTAX_GATEWAY] = gateway;
5628071913dSRuslan Ermilov 			info.rti_info[RTAX_NETMASK] = netmask;
5638071913dSRuslan Ermilov 			info.rti_ifa = ifa;
5648071913dSRuslan Ermilov 			info.rti_flags = flags;
5653120b9d4SKip Macy 			if (rt0 != NULL)
5663120b9d4SKip Macy 				RT_UNLOCK(rt0);	/* drop lock to avoid LOR with RNH */
5678b07e49aSJulian Elischer 			error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
568d1dd20beSSam Leffler 			if (rt != NULL) {
5694de5d90cSSam Leffler 				RT_LOCK(rt);
5703120b9d4SKip Macy 				if (rt0 != NULL)
57129910a5aSKip Macy 					EVENTHANDLER_INVOKE(route_redirect_event, rt0, rt, dst);
5728071913dSRuslan Ermilov 				flags = rt->rt_flags;
573d1dd20beSSam Leffler 			}
5743120b9d4SKip Macy 			if (rt0 != NULL)
5753120b9d4SKip Macy 				RTFREE(rt0);
5768e7e854cSKip Macy 
577603724d3SBjoern A. Zeeb 			stat = &V_rtstat.rts_dynamic;
578df8bae1dSRodney W. Grimes 		} else {
5798e7e854cSKip Macy 			struct rtentry *gwrt;
5808e7e854cSKip Macy 
581df8bae1dSRodney W. Grimes 			/*
582df8bae1dSRodney W. Grimes 			 * Smash the current notion of the gateway to
583df8bae1dSRodney W. Grimes 			 * this destination.  Should check about netmask!!!
584df8bae1dSRodney W. Grimes 			 */
585df8bae1dSRodney W. Grimes 			rt->rt_flags |= RTF_MODIFIED;
586df8bae1dSRodney W. Grimes 			flags |= RTF_MODIFIED;
587603724d3SBjoern A. Zeeb 			stat = &V_rtstat.rts_newgateway;
588499676dfSJulian Elischer 			/*
589499676dfSJulian Elischer 			 * add the key and gateway (in one malloc'd chunk).
590499676dfSJulian Elischer 			 */
5913120b9d4SKip Macy 			RT_UNLOCK(rt);
5923120b9d4SKip Macy 			RADIX_NODE_HEAD_LOCK(rnh);
5933120b9d4SKip Macy 			RT_LOCK(rt);
594df8bae1dSRodney W. Grimes 			rt_setgate(rt, rt_key(rt), gateway);
5953120b9d4SKip Macy 			gwrt = rtalloc1(gateway, 1, RTF_RNH_LOCKED);
5963120b9d4SKip Macy 			RADIX_NODE_HEAD_UNLOCK(rnh);
59729910a5aSKip Macy 			EVENTHANDLER_INVOKE(route_redirect_event, rt, gwrt, dst);
5988e7e854cSKip Macy 			RTFREE_LOCKED(gwrt);
599df8bae1dSRodney W. Grimes 		}
600df8bae1dSRodney W. Grimes 	} else
601df8bae1dSRodney W. Grimes 		error = EHOSTUNREACH;
602df8bae1dSRodney W. Grimes done:
603d1dd20beSSam Leffler 	if (rt)
6041951e633SJohn Baldwin 		RTFREE_LOCKED(rt);
605df8bae1dSRodney W. Grimes out:
606df8bae1dSRodney W. Grimes 	if (error)
607603724d3SBjoern A. Zeeb 		V_rtstat.rts_badredirect++;
608df8bae1dSRodney W. Grimes 	else if (stat != NULL)
609df8bae1dSRodney W. Grimes 		(*stat)++;
610df8bae1dSRodney W. Grimes 	bzero((caddr_t)&info, sizeof(info));
611df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_DST] = dst;
612df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_GATEWAY] = gateway;
613df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_NETMASK] = netmask;
614df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_AUTHOR] = src;
615528737fdSBjoern A. Zeeb 	rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
6168c0fec80SRobert Watson 	if (ifa != NULL)
6178c0fec80SRobert Watson 		ifa_free(ifa);
618df8bae1dSRodney W. Grimes }
619df8bae1dSRodney W. Grimes 
6208b07e49aSJulian Elischer int
6218b07e49aSJulian Elischer rtioctl(u_long req, caddr_t data)
6228b07e49aSJulian Elischer {
6238b07e49aSJulian Elischer 	return (rtioctl_fib(req, data, 0));
6248b07e49aSJulian Elischer }
6258b07e49aSJulian Elischer 
626df8bae1dSRodney W. Grimes /*
627df8bae1dSRodney W. Grimes  * Routing table ioctl interface.
628df8bae1dSRodney W. Grimes  */
629df8bae1dSRodney W. Grimes int
6308b07e49aSJulian Elischer rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
631df8bae1dSRodney W. Grimes {
6325090559bSChristian S.J. Peron 
6335090559bSChristian S.J. Peron 	/*
6345090559bSChristian S.J. Peron 	 * If more ioctl commands are added here, make sure the proper
6355090559bSChristian S.J. Peron 	 * super-user checks are being performed because it is possible for
6365090559bSChristian S.J. Peron 	 * prison-root to make it this far if raw sockets have been enabled
6375090559bSChristian S.J. Peron 	 * in jails.
6385090559bSChristian S.J. Peron 	 */
639623ae52eSPoul-Henning Kamp #ifdef INET
640f0068c4aSGarrett Wollman 	/* Multicast goop, grrr... */
6418b07e49aSJulian Elischer 	return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
642623ae52eSPoul-Henning Kamp #else /* INET */
643623ae52eSPoul-Henning Kamp 	return ENXIO;
644623ae52eSPoul-Henning Kamp #endif /* INET */
645df8bae1dSRodney W. Grimes }
646df8bae1dSRodney W. Grimes 
6478c0fec80SRobert Watson /*
6488c0fec80SRobert Watson  * For both ifa_ifwithroute() routines, 'ifa' is returned referenced.
6498c0fec80SRobert Watson  */
650df8bae1dSRodney W. Grimes struct ifaddr *
651d1dd20beSSam Leffler ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
652df8bae1dSRodney W. Grimes {
6538b07e49aSJulian Elischer 	return (ifa_ifwithroute_fib(flags, dst, gateway, 0));
6548b07e49aSJulian Elischer }
6558b07e49aSJulian Elischer 
6568b07e49aSJulian Elischer struct ifaddr *
6578b07e49aSJulian Elischer ifa_ifwithroute_fib(int flags, struct sockaddr *dst, struct sockaddr *gateway,
6588b07e49aSJulian Elischer 				u_int fibnum)
6598b07e49aSJulian Elischer {
660df8bae1dSRodney W. Grimes 	register struct ifaddr *ifa;
661e034e82cSQing Li 	int not_found = 0;
662d1dd20beSSam Leffler 
663df8bae1dSRodney W. Grimes 	if ((flags & RTF_GATEWAY) == 0) {
664df8bae1dSRodney W. Grimes 		/*
665df8bae1dSRodney W. Grimes 		 * If we are adding a route to an interface,
666df8bae1dSRodney W. Grimes 		 * and the interface is a pt to pt link
667df8bae1dSRodney W. Grimes 		 * we should search for the destination
668df8bae1dSRodney W. Grimes 		 * as our clue to the interface.  Otherwise
669df8bae1dSRodney W. Grimes 		 * we can use the local address.
670df8bae1dSRodney W. Grimes 		 */
67185911824SLuigi Rizzo 		ifa = NULL;
67285911824SLuigi Rizzo 		if (flags & RTF_HOST)
673df8bae1dSRodney W. Grimes 			ifa = ifa_ifwithdstaddr(dst);
67485911824SLuigi Rizzo 		if (ifa == NULL)
675df8bae1dSRodney W. Grimes 			ifa = ifa_ifwithaddr(gateway);
676df8bae1dSRodney W. Grimes 	} else {
677df8bae1dSRodney W. Grimes 		/*
678df8bae1dSRodney W. Grimes 		 * If we are adding a route to a remote net
679df8bae1dSRodney W. Grimes 		 * or host, the gateway may still be on the
680df8bae1dSRodney W. Grimes 		 * other end of a pt to pt link.
681df8bae1dSRodney W. Grimes 		 */
682df8bae1dSRodney W. Grimes 		ifa = ifa_ifwithdstaddr(gateway);
683df8bae1dSRodney W. Grimes 	}
68485911824SLuigi Rizzo 	if (ifa == NULL)
6850ed6142bSQing Li 		ifa = ifa_ifwithnet(gateway, 0);
68685911824SLuigi Rizzo 	if (ifa == NULL) {
6879b20205dSKip Macy 		struct rtentry *rt = rtalloc1_fib(gateway, 0, RTF_RNH_LOCKED, fibnum);
68885911824SLuigi Rizzo 		if (rt == NULL)
68985911824SLuigi Rizzo 			return (NULL);
690e034e82cSQing Li 		/*
691e034e82cSQing Li 		 * dismiss a gateway that is reachable only
692e034e82cSQing Li 		 * through the default router
693e034e82cSQing Li 		 */
694e034e82cSQing Li 		switch (gateway->sa_family) {
695e034e82cSQing Li 		case AF_INET:
696e034e82cSQing Li 			if (satosin(rt_key(rt))->sin_addr.s_addr == INADDR_ANY)
697e034e82cSQing Li 				not_found = 1;
698e034e82cSQing Li 			break;
699e034e82cSQing Li 		case AF_INET6:
700e034e82cSQing Li 			if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(rt_key(rt))->sin6_addr))
701e034e82cSQing Li 				not_found = 1;
702e034e82cSQing Li 			break;
703e034e82cSQing Li 		default:
704e034e82cSQing Li 			break;
705e034e82cSQing Li 		}
7068c0fec80SRobert Watson 		if (!not_found && rt->rt_ifa != NULL) {
7078c0fec80SRobert Watson 			ifa = rt->rt_ifa;
7088c0fec80SRobert Watson 			ifa_ref(ifa);
7098c0fec80SRobert Watson 		}
7107138d65cSSam Leffler 		RT_REMREF(rt);
711d1dd20beSSam Leffler 		RT_UNLOCK(rt);
7128c0fec80SRobert Watson 		if (not_found || ifa == NULL)
71385911824SLuigi Rizzo 			return (NULL);
714df8bae1dSRodney W. Grimes 	}
715df8bae1dSRodney W. Grimes 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
716df8bae1dSRodney W. Grimes 		struct ifaddr *oifa = ifa;
717df8bae1dSRodney W. Grimes 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
71885911824SLuigi Rizzo 		if (ifa == NULL)
719df8bae1dSRodney W. Grimes 			ifa = oifa;
7208c0fec80SRobert Watson 		else
7218c0fec80SRobert Watson 			ifa_free(oifa);
722df8bae1dSRodney W. Grimes 	}
723df8bae1dSRodney W. Grimes 	return (ifa);
724df8bae1dSRodney W. Grimes }
725df8bae1dSRodney W. Grimes 
726b0a76b88SJulian Elischer /*
727b0a76b88SJulian Elischer  * Do appropriate manipulations of a routing tree given
728b0a76b88SJulian Elischer  * all the bits of info needed
729b0a76b88SJulian Elischer  */
730df8bae1dSRodney W. Grimes int
731d1dd20beSSam Leffler rtrequest(int req,
732d1dd20beSSam Leffler 	struct sockaddr *dst,
733d1dd20beSSam Leffler 	struct sockaddr *gateway,
734d1dd20beSSam Leffler 	struct sockaddr *netmask,
735d1dd20beSSam Leffler 	int flags,
736d1dd20beSSam Leffler 	struct rtentry **ret_nrt)
737df8bae1dSRodney W. Grimes {
7388b07e49aSJulian Elischer 	return (rtrequest_fib(req, dst, gateway, netmask, flags, ret_nrt, 0));
7398b07e49aSJulian Elischer }
7408b07e49aSJulian Elischer 
7418b07e49aSJulian Elischer int
7428b07e49aSJulian Elischer rtrequest_fib(int req,
7438b07e49aSJulian Elischer 	struct sockaddr *dst,
7448b07e49aSJulian Elischer 	struct sockaddr *gateway,
7458b07e49aSJulian Elischer 	struct sockaddr *netmask,
7468b07e49aSJulian Elischer 	int flags,
7478b07e49aSJulian Elischer 	struct rtentry **ret_nrt,
7488b07e49aSJulian Elischer 	u_int fibnum)
7498b07e49aSJulian Elischer {
7508071913dSRuslan Ermilov 	struct rt_addrinfo info;
7518071913dSRuslan Ermilov 
752ac4a76ebSBjoern A. Zeeb 	if (dst->sa_len == 0)
753ac4a76ebSBjoern A. Zeeb 		return(EINVAL);
754ac4a76ebSBjoern A. Zeeb 
7558071913dSRuslan Ermilov 	bzero((caddr_t)&info, sizeof(info));
7568071913dSRuslan Ermilov 	info.rti_flags = flags;
7578071913dSRuslan Ermilov 	info.rti_info[RTAX_DST] = dst;
7588071913dSRuslan Ermilov 	info.rti_info[RTAX_GATEWAY] = gateway;
7598071913dSRuslan Ermilov 	info.rti_info[RTAX_NETMASK] = netmask;
7608b07e49aSJulian Elischer 	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
7618071913dSRuslan Ermilov }
7628071913dSRuslan Ermilov 
7638071913dSRuslan Ermilov /*
7648071913dSRuslan Ermilov  * These (questionable) definitions of apparent local variables apply
7658071913dSRuslan Ermilov  * to the next two functions.  XXXXXX!!!
7668071913dSRuslan Ermilov  */
7678071913dSRuslan Ermilov #define	dst	info->rti_info[RTAX_DST]
7688071913dSRuslan Ermilov #define	gateway	info->rti_info[RTAX_GATEWAY]
7698071913dSRuslan Ermilov #define	netmask	info->rti_info[RTAX_NETMASK]
7708071913dSRuslan Ermilov #define	ifaaddr	info->rti_info[RTAX_IFA]
7718071913dSRuslan Ermilov #define	ifpaddr	info->rti_info[RTAX_IFP]
7728071913dSRuslan Ermilov #define	flags	info->rti_flags
7738071913dSRuslan Ermilov 
7748071913dSRuslan Ermilov int
775d1dd20beSSam Leffler rt_getifa(struct rt_addrinfo *info)
7768071913dSRuslan Ermilov {
7778b07e49aSJulian Elischer 	return (rt_getifa_fib(info, 0));
7788b07e49aSJulian Elischer }
7798b07e49aSJulian Elischer 
7808c0fec80SRobert Watson /*
7818c0fec80SRobert Watson  * Look up rt_addrinfo for a specific fib.  Note that if rti_ifa is defined,
7828c0fec80SRobert Watson  * it will be referenced so the caller must free it.
7838c0fec80SRobert Watson  */
7848b07e49aSJulian Elischer int
7858b07e49aSJulian Elischer rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
7868b07e49aSJulian Elischer {
7878071913dSRuslan Ermilov 	struct ifaddr *ifa;
7888071913dSRuslan Ermilov 	int error = 0;
7898071913dSRuslan Ermilov 
7908071913dSRuslan Ermilov 	/*
7918071913dSRuslan Ermilov 	 * ifp may be specified by sockaddr_dl
7928071913dSRuslan Ermilov 	 * when protocol address is ambiguous.
7938071913dSRuslan Ermilov 	 */
7948071913dSRuslan Ermilov 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
7958071913dSRuslan Ermilov 	    ifpaddr->sa_family == AF_LINK &&
7960ed6142bSQing Li 	    (ifa = ifa_ifwithnet(ifpaddr, 0)) != NULL) {
7978071913dSRuslan Ermilov 		info->rti_ifp = ifa->ifa_ifp;
7988c0fec80SRobert Watson 		ifa_free(ifa);
7998c0fec80SRobert Watson 	}
8008071913dSRuslan Ermilov 	if (info->rti_ifa == NULL && ifaaddr != NULL)
8018071913dSRuslan Ermilov 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
8028071913dSRuslan Ermilov 	if (info->rti_ifa == NULL) {
8038071913dSRuslan Ermilov 		struct sockaddr *sa;
8048071913dSRuslan Ermilov 
8058071913dSRuslan Ermilov 		sa = ifaaddr != NULL ? ifaaddr :
8068071913dSRuslan Ermilov 		    (gateway != NULL ? gateway : dst);
8078071913dSRuslan Ermilov 		if (sa != NULL && info->rti_ifp != NULL)
8088071913dSRuslan Ermilov 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
8098071913dSRuslan Ermilov 		else if (dst != NULL && gateway != NULL)
8108b07e49aSJulian Elischer 			info->rti_ifa = ifa_ifwithroute_fib(flags, dst, gateway,
8118b07e49aSJulian Elischer 							fibnum);
8128071913dSRuslan Ermilov 		else if (sa != NULL)
8138b07e49aSJulian Elischer 			info->rti_ifa = ifa_ifwithroute_fib(flags, sa, sa,
8148b07e49aSJulian Elischer 							fibnum);
8158071913dSRuslan Ermilov 	}
8168071913dSRuslan Ermilov 	if ((ifa = info->rti_ifa) != NULL) {
8178071913dSRuslan Ermilov 		if (info->rti_ifp == NULL)
8188071913dSRuslan Ermilov 			info->rti_ifp = ifa->ifa_ifp;
8198071913dSRuslan Ermilov 	} else
8208071913dSRuslan Ermilov 		error = ENETUNREACH;
8218071913dSRuslan Ermilov 	return (error);
8228071913dSRuslan Ermilov }
8238071913dSRuslan Ermilov 
8249c63e9dbSSam Leffler /*
8259c63e9dbSSam Leffler  * Expunges references to a route that's about to be reclaimed.
8269c63e9dbSSam Leffler  * The route must be locked.
8279c63e9dbSSam Leffler  */
8289c63e9dbSSam Leffler int
8299c63e9dbSSam Leffler rtexpunge(struct rtentry *rt)
8309c63e9dbSSam Leffler {
831c7ea0aa6SQing Li #if !defined(RADIX_MPATH)
8329c63e9dbSSam Leffler 	struct radix_node *rn;
833c7ea0aa6SQing Li #else
834c7ea0aa6SQing Li 	struct rt_addrinfo info;
835c7ea0aa6SQing Li 	int fib;
836c7ea0aa6SQing Li 	struct rtentry *rt0;
837c7ea0aa6SQing Li #endif
8389c63e9dbSSam Leffler 	struct radix_node_head *rnh;
8399c63e9dbSSam Leffler 	struct ifaddr *ifa;
8409c63e9dbSSam Leffler 	int error = 0;
8419c63e9dbSSam Leffler 
8426e6b3f7cSQing Li 	/*
8436e6b3f7cSQing Li 	 * Find the correct routing tree to use for this Address Family
8446e6b3f7cSQing Li 	 */
845c2c2a7c1SBjoern A. Zeeb 	rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
8469c63e9dbSSam Leffler 	RT_LOCK_ASSERT(rt);
8476e6b3f7cSQing Li 	if (rnh == NULL)
8486e6b3f7cSQing Li 		return (EAFNOSUPPORT);
8493120b9d4SKip Macy 	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
850c7ea0aa6SQing Li 
851c7ea0aa6SQing Li #ifdef RADIX_MPATH
852c7ea0aa6SQing Li 	fib = rt->rt_fibnum;
853c7ea0aa6SQing Li 	bzero(&info, sizeof(info));
854c7ea0aa6SQing Li 	info.rti_ifp = rt->rt_ifp;
855c7ea0aa6SQing Li 	info.rti_flags = RTF_RNH_LOCKED;
856c7ea0aa6SQing Li 	info.rti_info[RTAX_DST] = rt_key(rt);
857c7ea0aa6SQing Li 	info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
858c7ea0aa6SQing Li 
859c7ea0aa6SQing Li 	RT_UNLOCK(rt);
860c7ea0aa6SQing Li 	error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
861c7ea0aa6SQing Li 
862c7ea0aa6SQing Li 	if (error == 0 && rt0 != NULL) {
863c7ea0aa6SQing Li 		rt = rt0;
864c7ea0aa6SQing Li 		RT_LOCK(rt);
865c7ea0aa6SQing Li 	} else if (error != 0) {
866c7ea0aa6SQing Li 		RT_LOCK(rt);
867c7ea0aa6SQing Li 		return (error);
868c7ea0aa6SQing Li 	}
869c7ea0aa6SQing Li #else
8709c63e9dbSSam Leffler 	/*
8719c63e9dbSSam Leffler 	 * Remove the item from the tree; it should be there,
8729c63e9dbSSam Leffler 	 * but when callers invoke us blindly it may not (sigh).
8739c63e9dbSSam Leffler 	 */
8749c63e9dbSSam Leffler 	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
87585911824SLuigi Rizzo 	if (rn == NULL) {
8769c63e9dbSSam Leffler 		error = ESRCH;
8779c63e9dbSSam Leffler 		goto bad;
8789c63e9dbSSam Leffler 	}
8799c63e9dbSSam Leffler 	KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
8809c63e9dbSSam Leffler 		("unexpected flags 0x%x", rn->rn_flags));
881d6941ce9SLuigi Rizzo 	KASSERT(rt == RNTORT(rn),
8829c63e9dbSSam Leffler 		("lookup mismatch, rt %p rn %p", rt, rn));
883c7ea0aa6SQing Li #endif /* RADIX_MPATH */
8849c63e9dbSSam Leffler 
8859c63e9dbSSam Leffler 	rt->rt_flags &= ~RTF_UP;
8869c63e9dbSSam Leffler 
8879c63e9dbSSam Leffler 	/*
8889c63e9dbSSam Leffler 	 * Give the protocol a chance to keep things in sync.
8899c63e9dbSSam Leffler 	 */
8909c63e9dbSSam Leffler 	if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
8919c63e9dbSSam Leffler 		struct rt_addrinfo info;
8929c63e9dbSSam Leffler 
8939c63e9dbSSam Leffler 		bzero((caddr_t)&info, sizeof(info));
8949c63e9dbSSam Leffler 		info.rti_flags = rt->rt_flags;
8959c63e9dbSSam Leffler 		info.rti_info[RTAX_DST] = rt_key(rt);
8969c63e9dbSSam Leffler 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
8979c63e9dbSSam Leffler 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
8989c63e9dbSSam Leffler 		ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
8999c63e9dbSSam Leffler 	}
9009c63e9dbSSam Leffler 
9019c63e9dbSSam Leffler 	/*
9029c63e9dbSSam Leffler 	 * one more rtentry floating around that is not
9039c63e9dbSSam Leffler 	 * linked to the routing table.
9049c63e9dbSSam Leffler 	 */
905603724d3SBjoern A. Zeeb 	V_rttrash++;
906c7ea0aa6SQing Li #if !defined(RADIX_MPATH)
9079c63e9dbSSam Leffler bad:
908c7ea0aa6SQing Li #endif
9099c63e9dbSSam Leffler 	return (error);
9109c63e9dbSSam Leffler }
9119c63e9dbSSam Leffler 
912427ac07fSKip Macy #ifdef RADIX_MPATH
913427ac07fSKip Macy static int
914427ac07fSKip Macy rn_mpath_update(int req, struct rt_addrinfo *info,
915427ac07fSKip Macy     struct radix_node_head *rnh, struct rtentry **ret_nrt)
916427ac07fSKip Macy {
917427ac07fSKip Macy 	/*
918427ac07fSKip Macy 	 * if we got multipath routes, we require users to specify
919427ac07fSKip Macy 	 * a matching RTAX_GATEWAY.
920427ac07fSKip Macy 	 */
921427ac07fSKip Macy 	struct rtentry *rt, *rto = NULL;
922427ac07fSKip Macy 	register struct radix_node *rn;
923427ac07fSKip Macy 	int error = 0;
924427ac07fSKip Macy 
925427ac07fSKip Macy 	rn = rnh->rnh_matchaddr(dst, rnh);
926427ac07fSKip Macy 	if (rn == NULL)
927427ac07fSKip Macy 		return (ESRCH);
928427ac07fSKip Macy 	rto = rt = RNTORT(rn);
929427ac07fSKip Macy 	rt = rt_mpath_matchgate(rt, gateway);
930427ac07fSKip Macy 	if (rt == NULL)
931427ac07fSKip Macy 		return (ESRCH);
932427ac07fSKip Macy 	/*
933427ac07fSKip Macy 	 * this is the first entry in the chain
934427ac07fSKip Macy 	 */
935427ac07fSKip Macy 	if (rto == rt) {
936427ac07fSKip Macy 		rn = rn_mpath_next((struct radix_node *)rt);
937427ac07fSKip Macy 		/*
938427ac07fSKip Macy 		 * there is another entry, now it's active
939427ac07fSKip Macy 		 */
940427ac07fSKip Macy 		if (rn) {
941427ac07fSKip Macy 			rto = RNTORT(rn);
942427ac07fSKip Macy 			RT_LOCK(rto);
943427ac07fSKip Macy 			rto->rt_flags |= RTF_UP;
944427ac07fSKip Macy 			RT_UNLOCK(rto);
945427ac07fSKip Macy 		} else if (rt->rt_flags & RTF_GATEWAY) {
946427ac07fSKip Macy 			/*
947427ac07fSKip Macy 			 * For gateway routes, we need to
948427ac07fSKip Macy 			 * make sure that we we are deleting
949427ac07fSKip Macy 			 * the correct gateway.
950427ac07fSKip Macy 			 * rt_mpath_matchgate() does not
951427ac07fSKip Macy 			 * check the case when there is only
952427ac07fSKip Macy 			 * one route in the chain.
953427ac07fSKip Macy 			 */
954427ac07fSKip Macy 			if (gateway &&
955427ac07fSKip Macy 			    (rt->rt_gateway->sa_len != gateway->sa_len ||
956427ac07fSKip Macy 				memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
957427ac07fSKip Macy 				error = ESRCH;
9586a7bff2cSKip Macy 			else {
9596a7bff2cSKip Macy 				/*
9606a7bff2cSKip Macy 				 * remove from tree before returning it
9616a7bff2cSKip Macy 				 * to the caller
9626a7bff2cSKip Macy 				 */
9636a7bff2cSKip Macy 				rn = rnh->rnh_deladdr(dst, netmask, rnh);
9646a7bff2cSKip Macy 				KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
9656a7bff2cSKip Macy 				goto gwdelete;
9666a7bff2cSKip Macy 			}
9676a7bff2cSKip Macy 
968427ac07fSKip Macy 		}
969427ac07fSKip Macy 		/*
970427ac07fSKip Macy 		 * use the normal delete code to remove
971427ac07fSKip Macy 		 * the first entry
972427ac07fSKip Macy 		 */
973427ac07fSKip Macy 		if (req != RTM_DELETE)
974427ac07fSKip Macy 			goto nondelete;
975427ac07fSKip Macy 
976427ac07fSKip Macy 		error = ENOENT;
977427ac07fSKip Macy 		goto done;
978427ac07fSKip Macy 	}
979427ac07fSKip Macy 
980427ac07fSKip Macy 	/*
981427ac07fSKip Macy 	 * if the entry is 2nd and on up
982427ac07fSKip Macy 	 */
983427ac07fSKip Macy 	if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
984427ac07fSKip Macy 		panic ("rtrequest1: rt_mpath_deldup");
9856a7bff2cSKip Macy gwdelete:
986427ac07fSKip Macy 	RT_LOCK(rt);
987427ac07fSKip Macy 	RT_ADDREF(rt);
988427ac07fSKip Macy 	if (req == RTM_DELETE) {
989427ac07fSKip Macy 		rt->rt_flags &= ~RTF_UP;
990427ac07fSKip Macy 		/*
991427ac07fSKip Macy 		 * One more rtentry floating around that is not
992427ac07fSKip Macy 		 * linked to the routing table. rttrash will be decremented
993427ac07fSKip Macy 		 * when RTFREE(rt) is eventually called.
994427ac07fSKip Macy 		 */
995427ac07fSKip Macy 		V_rttrash++;
996427ac07fSKip Macy 	}
997427ac07fSKip Macy 
998427ac07fSKip Macy nondelete:
999427ac07fSKip Macy 	if (req != RTM_DELETE)
1000427ac07fSKip Macy 		panic("unrecognized request %d", req);
1001427ac07fSKip Macy 
1002427ac07fSKip Macy 
1003427ac07fSKip Macy 	/*
1004427ac07fSKip Macy 	 * If the caller wants it, then it can have it,
1005427ac07fSKip Macy 	 * but it's up to it to free the rtentry as we won't be
1006427ac07fSKip Macy 	 * doing it.
1007427ac07fSKip Macy 	 */
1008427ac07fSKip Macy 	if (ret_nrt) {
1009427ac07fSKip Macy 		*ret_nrt = rt;
1010427ac07fSKip Macy 		RT_UNLOCK(rt);
1011427ac07fSKip Macy 	} else
1012427ac07fSKip Macy 		RTFREE_LOCKED(rt);
1013427ac07fSKip Macy done:
1014427ac07fSKip Macy 	return (error);
1015427ac07fSKip Macy }
1016427ac07fSKip Macy #endif
1017427ac07fSKip Macy 
10188071913dSRuslan Ermilov int
10198b07e49aSJulian Elischer rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
10208b07e49aSJulian Elischer 				u_int fibnum)
10218b07e49aSJulian Elischer {
10223120b9d4SKip Macy 	int error = 0, needlock = 0;
1023df8bae1dSRodney W. Grimes 	register struct rtentry *rt;
1024e5c610d6SQing Li #ifdef FLOWTABLE
1025e5c610d6SQing Li 	register struct rtentry *rt0;
1026e5c610d6SQing Li #endif
1027df8bae1dSRodney W. Grimes 	register struct radix_node *rn;
1028df8bae1dSRodney W. Grimes 	register struct radix_node_head *rnh;
1029df8bae1dSRodney W. Grimes 	struct ifaddr *ifa;
1030df8bae1dSRodney W. Grimes 	struct sockaddr *ndst;
103146a70de2SQing Li 	struct sockaddr_storage mdst;
1032df8bae1dSRodney W. Grimes #define senderr(x) { error = x ; goto bad; }
1033df8bae1dSRodney W. Grimes 
10348b07e49aSJulian Elischer 	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
1035b680a383SBjoern A. Zeeb 	switch (dst->sa_family) {
1036b680a383SBjoern A. Zeeb 	case AF_INET6:
1037b680a383SBjoern A. Zeeb 	case AF_INET:
1038b680a383SBjoern A. Zeeb 		/* We support multiple FIBs. */
1039b680a383SBjoern A. Zeeb 		break;
1040b680a383SBjoern A. Zeeb 	default:
1041b680a383SBjoern A. Zeeb 		fibnum = RT_DEFAULT_FIB;
1042b680a383SBjoern A. Zeeb 		break;
1043b680a383SBjoern A. Zeeb 	}
1044b680a383SBjoern A. Zeeb 
1045b0a76b88SJulian Elischer 	/*
1046b0a76b88SJulian Elischer 	 * Find the correct routing tree to use for this Address Family
1047b0a76b88SJulian Elischer 	 */
1048c2c2a7c1SBjoern A. Zeeb 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
104985911824SLuigi Rizzo 	if (rnh == NULL)
1050983985c1SJeffrey Hsu 		return (EAFNOSUPPORT);
10513120b9d4SKip Macy 	needlock = ((flags & RTF_RNH_LOCKED) == 0);
10523120b9d4SKip Macy 	flags &= ~RTF_RNH_LOCKED;
10533120b9d4SKip Macy 	if (needlock)
1054956b0b65SJeffrey Hsu 		RADIX_NODE_HEAD_LOCK(rnh);
1055c96b8224SKip Macy 	else
1056c96b8224SKip Macy 		RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
1057b0a76b88SJulian Elischer 	/*
1058b0a76b88SJulian Elischer 	 * If we are adding a host route then we don't want to put
105966953138SRuslan Ermilov 	 * a netmask in the tree, nor do we want to clone it.
1060b0a76b88SJulian Elischer 	 */
10616e6b3f7cSQing Li 	if (flags & RTF_HOST)
106285911824SLuigi Rizzo 		netmask = NULL;
10636e6b3f7cSQing Li 
1064df8bae1dSRodney W. Grimes 	switch (req) {
1065df8bae1dSRodney W. Grimes 	case RTM_DELETE:
106646a70de2SQing Li 		if (netmask) {
106746a70de2SQing Li 			rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
106846a70de2SQing Li 			dst = (struct sockaddr *)&mdst;
106946a70de2SQing Li 		}
1070e440aed9SQing Li #ifdef RADIX_MPATH
1071e440aed9SQing Li 		if (rn_mpath_capable(rnh)) {
1072427ac07fSKip Macy 			error = rn_mpath_update(req, info, rnh, ret_nrt);
1073e440aed9SQing Li 			/*
1074427ac07fSKip Macy 			 * "bad" holds true for the success case
1075427ac07fSKip Macy 			 * as well
1076e440aed9SQing Li 			 */
1077427ac07fSKip Macy 			if (error != ENOENT)
1078427ac07fSKip Macy 				goto bad;
1079c7ea0aa6SQing Li 			error = 0;
1080e440aed9SQing Li 		}
1081ea9cd9f2SBjoern A. Zeeb #endif
1082b0a76b88SJulian Elischer 		/*
1083b0a76b88SJulian Elischer 		 * Remove the item from the tree and return it.
1084b0a76b88SJulian Elischer 		 * Complain if it is not there and do no more processing.
1085b0a76b88SJulian Elischer 		 */
1086d1dd20beSSam Leffler 		rn = rnh->rnh_deladdr(dst, netmask, rnh);
108785911824SLuigi Rizzo 		if (rn == NULL)
1088df8bae1dSRodney W. Grimes 			senderr(ESRCH);
1089df8bae1dSRodney W. Grimes 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
1090df8bae1dSRodney W. Grimes 			panic ("rtrequest delete");
1091d6941ce9SLuigi Rizzo 		rt = RNTORT(rn);
1092d1dd20beSSam Leffler 		RT_LOCK(rt);
10937138d65cSSam Leffler 		RT_ADDREF(rt);
109471eba915SRuslan Ermilov 		rt->rt_flags &= ~RTF_UP;
1095c2bed6a3SGarrett Wollman 
1096c2bed6a3SGarrett Wollman 		/*
1097499676dfSJulian Elischer 		 * give the protocol a chance to keep things in sync.
1098b0a76b88SJulian Elischer 		 */
1099df8bae1dSRodney W. Grimes 		if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
11008071913dSRuslan Ermilov 			ifa->ifa_rtrequest(RTM_DELETE, rt, info);
1101499676dfSJulian Elischer 
1102b0a76b88SJulian Elischer 		/*
1103d6941ce9SLuigi Rizzo 		 * One more rtentry floating around that is not
1104d6941ce9SLuigi Rizzo 		 * linked to the routing table. rttrash will be decremented
1105d6941ce9SLuigi Rizzo 		 * when RTFREE(rt) is eventually called.
1106499676dfSJulian Elischer 		 */
1107603724d3SBjoern A. Zeeb 		V_rttrash++;
1108499676dfSJulian Elischer 
1109499676dfSJulian Elischer 		/*
1110499676dfSJulian Elischer 		 * If the caller wants it, then it can have it,
1111499676dfSJulian Elischer 		 * but it's up to it to free the rtentry as we won't be
1112499676dfSJulian Elischer 		 * doing it.
1113b0a76b88SJulian Elischer 		 */
1114d1dd20beSSam Leffler 		if (ret_nrt) {
1115df8bae1dSRodney W. Grimes 			*ret_nrt = rt;
1116d1dd20beSSam Leffler 			RT_UNLOCK(rt);
1117d1dd20beSSam Leffler 		} else
1118d1dd20beSSam Leffler 			RTFREE_LOCKED(rt);
1119df8bae1dSRodney W. Grimes 		break;
1120df8bae1dSRodney W. Grimes 	case RTM_RESOLVE:
11216e6b3f7cSQing Li 		/*
11226e6b3f7cSQing Li 		 * resolve was only used for route cloning
11236e6b3f7cSQing Li 		 * here for compat
11246e6b3f7cSQing Li 		 */
11256e6b3f7cSQing Li 		break;
1126df8bae1dSRodney W. Grimes 	case RTM_ADD:
11275df72964SGarrett Wollman 		if ((flags & RTF_GATEWAY) && !gateway)
112816a2e0a6SQing Li 			senderr(EINVAL);
112916a2e0a6SQing Li 		if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
113016a2e0a6SQing Li 		    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
113116a2e0a6SQing Li 			senderr(EINVAL);
11325df72964SGarrett Wollman 
11338c0fec80SRobert Watson 		if (info->rti_ifa == NULL) {
11348c0fec80SRobert Watson 			error = rt_getifa_fib(info, fibnum);
11358c0fec80SRobert Watson 			if (error)
11368071913dSRuslan Ermilov 				senderr(error);
11378c0fec80SRobert Watson 		} else
11388c0fec80SRobert Watson 			ifa_ref(info->rti_ifa);
11398071913dSRuslan Ermilov 		ifa = info->rti_ifa;
11401ed81b73SMarko Zec 		rt = uma_zalloc(V_rtzone, M_NOWAIT | M_ZERO);
11418c0fec80SRobert Watson 		if (rt == NULL) {
11428c0fec80SRobert Watson 			ifa_free(ifa);
1143df8bae1dSRodney W. Grimes 			senderr(ENOBUFS);
11448c0fec80SRobert Watson 		}
1145d1dd20beSSam Leffler 		RT_LOCK_INIT(rt);
1146df8bae1dSRodney W. Grimes 		rt->rt_flags = RTF_UP | flags;
11478b07e49aSJulian Elischer 		rt->rt_fibnum = fibnum;
1148499676dfSJulian Elischer 		/*
1149499676dfSJulian Elischer 		 * Add the gateway. Possibly re-malloc-ing the storage for it
11506e6b3f7cSQing Li 		 *
1151499676dfSJulian Elischer 		 */
1152d1dd20beSSam Leffler 		RT_LOCK(rt);
1153831a80b0SMatthew Dillon 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
1154d1dd20beSSam Leffler 			RT_LOCK_DESTROY(rt);
11558c0fec80SRobert Watson 			ifa_free(ifa);
11561ed81b73SMarko Zec 			uma_zfree(V_rtzone, rt);
1157704b0666SBill Fenner 			senderr(error);
1158df8bae1dSRodney W. Grimes 		}
1159499676dfSJulian Elischer 
1160499676dfSJulian Elischer 		/*
1161499676dfSJulian Elischer 		 * point to the (possibly newly malloc'd) dest address.
1162499676dfSJulian Elischer 		 */
1163d1dd20beSSam Leffler 		ndst = (struct sockaddr *)rt_key(rt);
1164499676dfSJulian Elischer 
1165499676dfSJulian Elischer 		/*
1166499676dfSJulian Elischer 		 * make sure it contains the value we want (masked if needed).
1167499676dfSJulian Elischer 		 */
1168df8bae1dSRodney W. Grimes 		if (netmask) {
1169df8bae1dSRodney W. Grimes 			rt_maskedcopy(dst, ndst, netmask);
1170df8bae1dSRodney W. Grimes 		} else
11711838a647SLuigi Rizzo 			bcopy(dst, ndst, dst->sa_len);
11728e718bb4SGarrett Wollman 
11738e718bb4SGarrett Wollman 		/*
11748c0fec80SRobert Watson 		 * We use the ifa reference returned by rt_getifa_fib().
11758e718bb4SGarrett Wollman 		 * This moved from below so that rnh->rnh_addaddr() can
1176499676dfSJulian Elischer 		 * examine the ifa and  ifa->ifa_ifp if it so desires.
11778e718bb4SGarrett Wollman 		 */
11788e718bb4SGarrett Wollman 		rt->rt_ifa = ifa;
11798e718bb4SGarrett Wollman 		rt->rt_ifp = ifa->ifa_ifp;
1180427ac07fSKip Macy 		rt->rt_rmx.rmx_weight = 1;
11818e718bb4SGarrett Wollman 
1182e440aed9SQing Li #ifdef RADIX_MPATH
1183e440aed9SQing Li 		/* do not permit exactly the same dst/mask/gw pair */
1184e440aed9SQing Li 		if (rn_mpath_capable(rnh) &&
1185e440aed9SQing Li 			rt_mpath_conflict(rnh, rt, netmask)) {
11861099f828SRobert Watson 			ifa_free(rt->rt_ifa);
1187e440aed9SQing Li 			Free(rt_key(rt));
1188e440aed9SQing Li 			RT_LOCK_DESTROY(rt);
11891ed81b73SMarko Zec 			uma_zfree(V_rtzone, rt);
1190e440aed9SQing Li 			senderr(EEXIST);
1191e440aed9SQing Li 		}
1192e440aed9SQing Li #endif
1193e440aed9SQing Li 
1194e5c610d6SQing Li #ifdef FLOWTABLE
1195e5c610d6SQing Li 		rt0 = NULL;
1196*096f2786SBjoern A. Zeeb 		/* "flow-table" only supports IPv6 and IPv4 at the moment. */
1197*096f2786SBjoern A. Zeeb 		switch (dst->sa_family) {
1198*096f2786SBjoern A. Zeeb #ifdef INET6
1199*096f2786SBjoern A. Zeeb 		case AF_INET6:
1200*096f2786SBjoern A. Zeeb #endif
1201db44ff40SBjoern A. Zeeb #ifdef INET
1202*096f2786SBjoern A. Zeeb 		case AF_INET:
1203*096f2786SBjoern A. Zeeb #endif
1204*096f2786SBjoern A. Zeeb #if defined(INET6) || defined(INET)
1205e5c610d6SQing Li 			rn = rnh->rnh_matchaddr(dst, rnh);
1206e5c610d6SQing Li 			if (rn && ((rn->rn_flags & RNF_ROOT) == 0)) {
1207e5c610d6SQing Li 				struct sockaddr *mask;
1208e5c610d6SQing Li 				u_char *m, *n;
1209e5c610d6SQing Li 				int len;
1210e5c610d6SQing Li 
1211e5c610d6SQing Li 				/*
1212e5c610d6SQing Li 				 * compare mask to see if the new route is
1213e5c610d6SQing Li 				 * more specific than the existing one
1214e5c610d6SQing Li 				 */
1215e5c610d6SQing Li 				rt0 = RNTORT(rn);
1216e5c610d6SQing Li 				RT_LOCK(rt0);
1217e5c610d6SQing Li 				RT_ADDREF(rt0);
1218e5c610d6SQing Li 				RT_UNLOCK(rt0);
1219e5c610d6SQing Li 				/*
1220e5c610d6SQing Li 				 * A host route is already present, so
1221e5c610d6SQing Li 				 * leave the flow-table entries as is.
1222e5c610d6SQing Li 				 */
1223e5c610d6SQing Li 				if (rt0->rt_flags & RTF_HOST) {
1224e5c610d6SQing Li 					RTFREE(rt0);
1225e5c610d6SQing Li 					rt0 = NULL;
1226e5c610d6SQing Li 				} else if (!(flags & RTF_HOST) && netmask) {
1227e5c610d6SQing Li 					mask = rt_mask(rt0);
1228e5c610d6SQing Li 					len = mask->sa_len;
1229e5c610d6SQing Li 					m = (u_char *)mask;
1230e5c610d6SQing Li 					n = (u_char *)netmask;
1231e5c610d6SQing Li 					while (len-- > 0) {
1232e5c610d6SQing Li 						if (*n != *m)
1233e5c610d6SQing Li 							break;
1234e5c610d6SQing Li 						n++;
1235e5c610d6SQing Li 						m++;
1236e5c610d6SQing Li 					}
1237e5c610d6SQing Li 					if (len == 0 || (*n < *m)) {
1238e5c610d6SQing Li 						RTFREE(rt0);
1239e5c610d6SQing Li 						rt0 = NULL;
1240e5c610d6SQing Li 					}
1241e5c610d6SQing Li 				}
1242e5c610d6SQing Li 			}
1243*096f2786SBjoern A. Zeeb #endif/* INET6 || INET */
1244e5c610d6SQing Li 		}
1245*096f2786SBjoern A. Zeeb #endif /* FLOWTABLE */
1246e5c610d6SQing Li 
1247d1dd20beSSam Leffler 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
1248d1dd20beSSam Leffler 		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
1249499676dfSJulian Elischer 		/*
1250499676dfSJulian Elischer 		 * If it still failed to go into the tree,
1251499676dfSJulian Elischer 		 * then un-make it (this should be a function)
1252499676dfSJulian Elischer 		 */
125385911824SLuigi Rizzo 		if (rn == NULL) {
12541099f828SRobert Watson 			ifa_free(rt->rt_ifa);
1255df8bae1dSRodney W. Grimes 			Free(rt_key(rt));
1256d1dd20beSSam Leffler 			RT_LOCK_DESTROY(rt);
12571ed81b73SMarko Zec 			uma_zfree(V_rtzone, rt);
1258e5c610d6SQing Li #ifdef FLOWTABLE
1259e5c610d6SQing Li 			if (rt0 != NULL)
1260e5c610d6SQing Li 				RTFREE(rt0);
1261e5c610d6SQing Li #endif
1262df8bae1dSRodney W. Grimes 			senderr(EEXIST);
1263df8bae1dSRodney W. Grimes 		}
1264e5c610d6SQing Li #ifdef FLOWTABLE
1265e5c610d6SQing Li 		else if (rt0 != NULL) {
1266*096f2786SBjoern A. Zeeb 			switch (dst->sa_family) {
1267*096f2786SBjoern A. Zeeb #ifdef INET6
1268*096f2786SBjoern A. Zeeb 			case AF_INET6:
1269*096f2786SBjoern A. Zeeb 				flowtable_route_flush(V_ip6_ft, rt0);
1270*096f2786SBjoern A. Zeeb 				break;
1271db44ff40SBjoern A. Zeeb #endif
1272*096f2786SBjoern A. Zeeb #ifdef INET
1273*096f2786SBjoern A. Zeeb 			case AF_INET:
1274*096f2786SBjoern A. Zeeb 				flowtable_route_flush(V_ip_ft, rt0);
1275*096f2786SBjoern A. Zeeb 				break;
1276*096f2786SBjoern A. Zeeb #endif
1277*096f2786SBjoern A. Zeeb 			}
1278e5c610d6SQing Li 			RTFREE(rt0);
1279e5c610d6SQing Li 		}
1280e5c610d6SQing Li #endif
1281499676dfSJulian Elischer 
1282499676dfSJulian Elischer 		/*
1283a0c0e34bSGleb Smirnoff 		 * If this protocol has something to add to this then
1284499676dfSJulian Elischer 		 * allow it to do that as well.
1285499676dfSJulian Elischer 		 */
1286df8bae1dSRodney W. Grimes 		if (ifa->ifa_rtrequest)
12878071913dSRuslan Ermilov 			ifa->ifa_rtrequest(req, rt, info);
1288499676dfSJulian Elischer 
1289cd02a0b7SGarrett Wollman 		/*
1290499676dfSJulian Elischer 		 * actually return a resultant rtentry and
1291499676dfSJulian Elischer 		 * give the caller a single reference.
1292499676dfSJulian Elischer 		 */
1293df8bae1dSRodney W. Grimes 		if (ret_nrt) {
1294df8bae1dSRodney W. Grimes 			*ret_nrt = rt;
12957138d65cSSam Leffler 			RT_ADDREF(rt);
1296df8bae1dSRodney W. Grimes 		}
1297d1dd20beSSam Leffler 		RT_UNLOCK(rt);
1298df8bae1dSRodney W. Grimes 		break;
12998071913dSRuslan Ermilov 	default:
13008071913dSRuslan Ermilov 		error = EOPNOTSUPP;
1301df8bae1dSRodney W. Grimes 	}
1302df8bae1dSRodney W. Grimes bad:
13033120b9d4SKip Macy 	if (needlock)
1304956b0b65SJeffrey Hsu 		RADIX_NODE_HEAD_UNLOCK(rnh);
1305df8bae1dSRodney W. Grimes 	return (error);
1306d1dd20beSSam Leffler #undef senderr
1307d1dd20beSSam Leffler }
1308d1dd20beSSam Leffler 
13098071913dSRuslan Ermilov #undef dst
13108071913dSRuslan Ermilov #undef gateway
13118071913dSRuslan Ermilov #undef netmask
13128071913dSRuslan Ermilov #undef ifaaddr
13138071913dSRuslan Ermilov #undef ifpaddr
13148071913dSRuslan Ermilov #undef flags
1315df8bae1dSRodney W. Grimes 
1316df8bae1dSRodney W. Grimes int
1317d1dd20beSSam Leffler rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
1318df8bae1dSRodney W. Grimes {
1319d1dd20beSSam Leffler 	/* XXX dst may be overwritten, can we move this to below */
13206e6b3f7cSQing Li 	int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
13216e6b3f7cSQing Li #ifdef INVARIANTS
1322c2c2a7c1SBjoern A. Zeeb 	struct radix_node_head *rnh;
1323c2c2a7c1SBjoern A. Zeeb 
1324c2c2a7c1SBjoern A. Zeeb 	rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family);
13256e6b3f7cSQing Li #endif
1326d1dd20beSSam Leffler 
1327d1dd20beSSam Leffler 	RT_LOCK_ASSERT(rt);
13283120b9d4SKip Macy 	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
1329df8bae1dSRodney W. Grimes 
13301db1fffaSBill Fenner 	/*
133185911824SLuigi Rizzo 	 * Prepare to store the gateway in rt->rt_gateway.
133285911824SLuigi Rizzo 	 * Both dst and gateway are stored one after the other in the same
133385911824SLuigi Rizzo 	 * malloc'd chunk. If we have room, we can reuse the old buffer,
133485911824SLuigi Rizzo 	 * rt_gateway already points to the right place.
133585911824SLuigi Rizzo 	 * Otherwise, malloc a new block and update the 'dst' address.
1336499676dfSJulian Elischer 	 */
133785911824SLuigi Rizzo 	if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
133885911824SLuigi Rizzo 		caddr_t new;
133985911824SLuigi Rizzo 
1340df8bae1dSRodney W. Grimes 		R_Malloc(new, caddr_t, dlen + glen);
134185911824SLuigi Rizzo 		if (new == NULL)
13421db1fffaSBill Fenner 			return ENOBUFS;
1343499676dfSJulian Elischer 		/*
134485911824SLuigi Rizzo 		 * XXX note, we copy from *dst and not *rt_key(rt) because
134585911824SLuigi Rizzo 		 * rt_setgate() can be called to initialize a newly
134685911824SLuigi Rizzo 		 * allocated route entry, in which case rt_key(rt) == NULL
134785911824SLuigi Rizzo 		 * (and also rt->rt_gateway == NULL).
134885911824SLuigi Rizzo 		 * Free()/free() handle a NULL argument just fine.
1349499676dfSJulian Elischer 		 */
13501838a647SLuigi Rizzo 		bcopy(dst, new, dlen);
135185911824SLuigi Rizzo 		Free(rt_key(rt));	/* free old block, if any */
1352445e045bSAlexander Kabaev 		rt_key(rt) = (struct sockaddr *)new;
135385911824SLuigi Rizzo 		rt->rt_gateway = (struct sockaddr *)(new + dlen);
1354df8bae1dSRodney W. Grimes 	}
1355499676dfSJulian Elischer 
1356499676dfSJulian Elischer 	/*
135785911824SLuigi Rizzo 	 * Copy the new gateway value into the memory chunk.
135885911824SLuigi Rizzo 	 */
135985911824SLuigi Rizzo 	bcopy(gate, rt->rt_gateway, glen);
136085911824SLuigi Rizzo 
13616e6b3f7cSQing Li 	return (0);
1362df8bae1dSRodney W. Grimes }
1363df8bae1dSRodney W. Grimes 
1364c7ab6602SQing Li void
1365d1dd20beSSam Leffler rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
1366df8bae1dSRodney W. Grimes {
1367df8bae1dSRodney W. Grimes 	register u_char *cp1 = (u_char *)src;
1368df8bae1dSRodney W. Grimes 	register u_char *cp2 = (u_char *)dst;
1369df8bae1dSRodney W. Grimes 	register u_char *cp3 = (u_char *)netmask;
1370df8bae1dSRodney W. Grimes 	u_char *cplim = cp2 + *cp3;
1371df8bae1dSRodney W. Grimes 	u_char *cplim2 = cp2 + *cp1;
1372df8bae1dSRodney W. Grimes 
1373df8bae1dSRodney W. Grimes 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1374df8bae1dSRodney W. Grimes 	cp3 += 2;
1375df8bae1dSRodney W. Grimes 	if (cplim > cplim2)
1376df8bae1dSRodney W. Grimes 		cplim = cplim2;
1377df8bae1dSRodney W. Grimes 	while (cp2 < cplim)
1378df8bae1dSRodney W. Grimes 		*cp2++ = *cp1++ & *cp3++;
1379df8bae1dSRodney W. Grimes 	if (cp2 < cplim2)
1380df8bae1dSRodney W. Grimes 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1381df8bae1dSRodney W. Grimes }
1382df8bae1dSRodney W. Grimes 
1383df8bae1dSRodney W. Grimes /*
1384df8bae1dSRodney W. Grimes  * Set up a routing table entry, normally
1385df8bae1dSRodney W. Grimes  * for an interface.
1386df8bae1dSRodney W. Grimes  */
13878b07e49aSJulian Elischer #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
13888b07e49aSJulian Elischer static inline  int
13898b07e49aSJulian Elischer rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
1390df8bae1dSRodney W. Grimes {
13915aca0b30SLuigi Rizzo 	struct sockaddr *dst;
13928071913dSRuslan Ermilov 	struct sockaddr *netmask;
139385911824SLuigi Rizzo 	struct rtentry *rt = NULL;
13948071913dSRuslan Ermilov 	struct rt_addrinfo info;
1395e440aed9SQing Li 	int error = 0;
13968b07e49aSJulian Elischer 	int startfib, endfib;
13978b07e49aSJulian Elischer 	char tempbuf[_SOCKADDR_TMPSIZE];
13988b07e49aSJulian Elischer 	int didwork = 0;
13998b07e49aSJulian Elischer 	int a_failure = 0;
14006e6b3f7cSQing Li 	static struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
1401df8bae1dSRodney W. Grimes 
14028071913dSRuslan Ermilov 	if (flags & RTF_HOST) {
14038071913dSRuslan Ermilov 		dst = ifa->ifa_dstaddr;
14048071913dSRuslan Ermilov 		netmask = NULL;
14058071913dSRuslan Ermilov 	} else {
14068071913dSRuslan Ermilov 		dst = ifa->ifa_addr;
14078071913dSRuslan Ermilov 		netmask = ifa->ifa_netmask;
14088071913dSRuslan Ermilov 	}
1409b680a383SBjoern A. Zeeb 	switch (dst->sa_family) {
1410b680a383SBjoern A. Zeeb 	case AF_INET6:
1411b680a383SBjoern A. Zeeb 	case AF_INET:
1412b680a383SBjoern A. Zeeb 		/* We support multiple FIBs. */
1413b680a383SBjoern A. Zeeb 		break;
1414b680a383SBjoern A. Zeeb 	default:
1415b680a383SBjoern A. Zeeb 		fibnum = RT_DEFAULT_FIB;
1416b680a383SBjoern A. Zeeb 		break;
1417b680a383SBjoern A. Zeeb 	}
14188b07e49aSJulian Elischer 	if (fibnum == -1) {
141966e8505fSJulian Elischer 		if (rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD) {
142066e8505fSJulian Elischer 			startfib = endfib = curthread->td_proc->p_fibnum;
142166e8505fSJulian Elischer 		} else {
14228b07e49aSJulian Elischer 			startfib = 0;
14238b07e49aSJulian Elischer 			endfib = rt_numfibs - 1;
142466e8505fSJulian Elischer 		}
14258b07e49aSJulian Elischer 	} else {
14268b07e49aSJulian Elischer 		KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
14278b07e49aSJulian Elischer 		startfib = fibnum;
14288b07e49aSJulian Elischer 		endfib = fibnum;
14298b07e49aSJulian Elischer 	}
1430ac4a76ebSBjoern A. Zeeb 	if (dst->sa_len == 0)
1431ac4a76ebSBjoern A. Zeeb 		return(EINVAL);
1432ac4a76ebSBjoern A. Zeeb 
1433b0a76b88SJulian Elischer 	/*
14348b07e49aSJulian Elischer 	 * If it's a delete, check that if it exists,
14358b07e49aSJulian Elischer 	 * it's on the correct interface or we might scrub
14368b07e49aSJulian Elischer 	 * a route to another ifa which would
1437b0a76b88SJulian Elischer 	 * be confusing at best and possibly worse.
1438b0a76b88SJulian Elischer 	 */
1439df8bae1dSRodney W. Grimes 	if (cmd == RTM_DELETE) {
1440b0a76b88SJulian Elischer 		/*
1441b0a76b88SJulian Elischer 		 * It's a delete, so it should already exist..
1442b0a76b88SJulian Elischer 		 * If it's a net, mask off the host bits
1443b0a76b88SJulian Elischer 		 * (Assuming we have a mask)
14448b07e49aSJulian Elischer 		 * XXX this is kinda inet specific..
1445b0a76b88SJulian Elischer 		 */
14468071913dSRuslan Ermilov 		if (netmask != NULL) {
14478b07e49aSJulian Elischer 			rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
14488b07e49aSJulian Elischer 			dst = (struct sockaddr *)tempbuf;
1449df8bae1dSRodney W. Grimes 		}
14508b07e49aSJulian Elischer 	}
14518b07e49aSJulian Elischer 	/*
14528b07e49aSJulian Elischer 	 * Now go through all the requested tables (fibs) and do the
14538b07e49aSJulian Elischer 	 * requested action. Realistically, this will either be fib 0
14548b07e49aSJulian Elischer 	 * for protocols that don't do multiple tables or all the
14558b07e49aSJulian Elischer 	 * tables for those that do. XXX For this version only AF_INET.
14568b07e49aSJulian Elischer 	 * When that changes code should be refactored to protocol
14578b07e49aSJulian Elischer 	 * independent parts and protocol dependent parts.
14588b07e49aSJulian Elischer 	 */
14598b07e49aSJulian Elischer 	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
14608b07e49aSJulian Elischer 		if (cmd == RTM_DELETE) {
14618b07e49aSJulian Elischer 			struct radix_node_head *rnh;
14628b07e49aSJulian Elischer 			struct radix_node *rn;
1463b0a76b88SJulian Elischer 			/*
14648071913dSRuslan Ermilov 			 * Look up an rtentry that is in the routing tree and
14658071913dSRuslan Ermilov 			 * contains the correct info.
1466b0a76b88SJulian Elischer 			 */
1467c2c2a7c1SBjoern A. Zeeb 			rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
1468c2c2a7c1SBjoern A. Zeeb 			if (rnh == NULL)
14698b07e49aSJulian Elischer 				/* this table doesn't exist but others might */
14708b07e49aSJulian Elischer 				continue;
1471956b0b65SJeffrey Hsu 			RADIX_NODE_HEAD_LOCK(rnh);
1472e440aed9SQing Li #ifdef RADIX_MPATH
1473e440aed9SQing Li 			if (rn_mpath_capable(rnh)) {
1474e440aed9SQing Li 
1475e440aed9SQing Li 				rn = rnh->rnh_matchaddr(dst, rnh);
1476e440aed9SQing Li 				if (rn == NULL)
1477e440aed9SQing Li 					error = ESRCH;
1478e440aed9SQing Li 				else {
1479e440aed9SQing Li 					rt = RNTORT(rn);
1480e440aed9SQing Li 					/*
14818b07e49aSJulian Elischer 					 * for interface route the
14828b07e49aSJulian Elischer 					 * rt->rt_gateway is sockaddr_intf
14838b07e49aSJulian Elischer 					 * for cloning ARP entries, so
14848b07e49aSJulian Elischer 					 * rt_mpath_matchgate must use the
14858b07e49aSJulian Elischer 					 * interface address
1486e440aed9SQing Li 					 */
14878b07e49aSJulian Elischer 					rt = rt_mpath_matchgate(rt,
14888b07e49aSJulian Elischer 					    ifa->ifa_addr);
1489e440aed9SQing Li 					if (!rt)
1490e440aed9SQing Li 						error = ESRCH;
1491e440aed9SQing Li 				}
1492e440aed9SQing Li 			}
1493e440aed9SQing Li 			else
1494e440aed9SQing Li #endif
14958b07e49aSJulian Elischer 			rn = rnh->rnh_lookup(dst, netmask, rnh);
14968b07e49aSJulian Elischer 			error = (rn == NULL ||
14978071913dSRuslan Ermilov 			    (rn->rn_flags & RNF_ROOT) ||
1498d6941ce9SLuigi Rizzo 			    RNTORT(rn)->rt_ifa != ifa ||
149985911824SLuigi Rizzo 			    !sa_equal((struct sockaddr *)rn->rn_key, dst));
1500956b0b65SJeffrey Hsu 			RADIX_NODE_HEAD_UNLOCK(rnh);
1501956b0b65SJeffrey Hsu 			if (error) {
15028b07e49aSJulian Elischer 				/* this is only an error if bad on ALL tables */
15038b07e49aSJulian Elischer 				continue;
1504df8bae1dSRodney W. Grimes 			}
1505b0a76b88SJulian Elischer 		}
1506b0a76b88SJulian Elischer 		/*
1507b0a76b88SJulian Elischer 		 * Do the actual request
1508b0a76b88SJulian Elischer 		 */
15098071913dSRuslan Ermilov 		bzero((caddr_t)&info, sizeof(info));
15108071913dSRuslan Ermilov 		info.rti_ifa = ifa;
1511e9ff3d45SKevin Lo 		info.rti_flags = flags | (ifa->ifa_flags & ~IFA_RTSELF);
15128071913dSRuslan Ermilov 		info.rti_info[RTAX_DST] = dst;
15136e6b3f7cSQing Li 		/*
15146e6b3f7cSQing Li 		 * doing this for compatibility reasons
15156e6b3f7cSQing Li 		 */
15166e6b3f7cSQing Li 		if (cmd == RTM_ADD)
15176e6b3f7cSQing Li 			info.rti_info[RTAX_GATEWAY] =
15186e6b3f7cSQing Li 			    (struct sockaddr *)&null_sdl;
15196e6b3f7cSQing Li 		else
15208071913dSRuslan Ermilov 			info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
15218071913dSRuslan Ermilov 		info.rti_info[RTAX_NETMASK] = netmask;
15228b07e49aSJulian Elischer 		error = rtrequest1_fib(cmd, &info, &rt, fibnum);
15235aca0b30SLuigi Rizzo 		if (error == 0 && rt != NULL) {
15248071913dSRuslan Ermilov 			/*
15256f99b44cSBrian Somers 			 * notify any listening routing agents of the change
15268071913dSRuslan Ermilov 			 */
1527d1dd20beSSam Leffler 			RT_LOCK(rt);
1528e440aed9SQing Li #ifdef RADIX_MPATH
1529e440aed9SQing Li 			/*
1530e440aed9SQing Li 			 * in case address alias finds the first address
15318d74af36SBjoern A. Zeeb 			 * e.g. ifconfig bge0 192.0.2.246/24
15328d74af36SBjoern A. Zeeb 			 * e.g. ifconfig bge0 192.0.2.247/24
15338d74af36SBjoern A. Zeeb 			 * the address set in the route is 192.0.2.246
15348d74af36SBjoern A. Zeeb 			 * so we need to replace it with 192.0.2.247
1535e440aed9SQing Li 			 */
15368b07e49aSJulian Elischer 			if (memcmp(rt->rt_ifa->ifa_addr,
15378b07e49aSJulian Elischer 			    ifa->ifa_addr, ifa->ifa_addr->sa_len)) {
15381099f828SRobert Watson 				ifa_free(rt->rt_ifa);
15391099f828SRobert Watson 				ifa_ref(ifa);
1540e440aed9SQing Li 				rt->rt_ifp = ifa->ifa_ifp;
1541e440aed9SQing Li 				rt->rt_ifa = ifa;
1542e440aed9SQing Li 			}
1543e440aed9SQing Li #endif
15446e6b3f7cSQing Li 			/*
15456e6b3f7cSQing Li 			 * doing this for compatibility reasons
15466e6b3f7cSQing Li 			 */
15476e6b3f7cSQing Li 			if (cmd == RTM_ADD) {
15486e6b3f7cSQing Li 			    ((struct sockaddr_dl *)rt->rt_gateway)->sdl_type  =
15496e6b3f7cSQing Li 				rt->rt_ifp->if_type;
15506e6b3f7cSQing Li 			    ((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
15516e6b3f7cSQing Li 				rt->rt_ifp->if_index;
15526e6b3f7cSQing Li 			}
15537f279720SMichael Tuexen 			RT_ADDREF(rt);
15547f279720SMichael Tuexen 			RT_UNLOCK(rt);
1555528737fdSBjoern A. Zeeb 			rt_newaddrmsg_fib(cmd, ifa, error, rt, fibnum);
15567f279720SMichael Tuexen 			RT_LOCK(rt);
15577f279720SMichael Tuexen 			RT_REMREF(rt);
15588071913dSRuslan Ermilov 			if (cmd == RTM_DELETE) {
1559b0a76b88SJulian Elischer 				/*
15608b07e49aSJulian Elischer 				 * If we are deleting, and we found an entry,
15618b07e49aSJulian Elischer 				 * then it's been removed from the tree..
15628b07e49aSJulian Elischer 				 * now throw it away.
1563b0a76b88SJulian Elischer 				 */
1564d1dd20beSSam Leffler 				RTFREE_LOCKED(rt);
1565d1dd20beSSam Leffler 			} else {
1566d1dd20beSSam Leffler 				if (cmd == RTM_ADD) {
1567b0a76b88SJulian Elischer 					/*
15688b07e49aSJulian Elischer 					 * We just wanted to add it..
15698b07e49aSJulian Elischer 					 * we don't actually need a reference.
1570b0a76b88SJulian Elischer 					 */
15717138d65cSSam Leffler 					RT_REMREF(rt);
1572df8bae1dSRodney W. Grimes 				}
1573d1dd20beSSam Leffler 				RT_UNLOCK(rt);
1574d1dd20beSSam Leffler 			}
15758b07e49aSJulian Elischer 			didwork = 1;
1576df8bae1dSRodney W. Grimes 		}
15778b07e49aSJulian Elischer 		if (error)
15788b07e49aSJulian Elischer 			a_failure = error;
15798b07e49aSJulian Elischer 	}
15808b07e49aSJulian Elischer 	if (cmd == RTM_DELETE) {
15818b07e49aSJulian Elischer 		if (didwork) {
15828b07e49aSJulian Elischer 			error = 0;
15838b07e49aSJulian Elischer 		} else {
15848b07e49aSJulian Elischer 			/* we only give an error if it wasn't in any table */
15858b07e49aSJulian Elischer 			error = ((flags & RTF_HOST) ?
15868b07e49aSJulian Elischer 			    EHOSTUNREACH : ENETUNREACH);
15878b07e49aSJulian Elischer 		}
15888b07e49aSJulian Elischer 	} else {
15898b07e49aSJulian Elischer 		if (a_failure) {
15908b07e49aSJulian Elischer 			/* return an error if any of them failed */
15918b07e49aSJulian Elischer 			error = a_failure;
15928b07e49aSJulian Elischer 		}
15938b07e49aSJulian Elischer 	}
15943ec66d6cSDavid Greenman 	return (error);
15953ec66d6cSDavid Greenman }
1596cb64988fSLuoqi Chen 
15978b07e49aSJulian Elischer /* special one for inet internal use. may not use. */
15988b07e49aSJulian Elischer int
15998b07e49aSJulian Elischer rtinit_fib(struct ifaddr *ifa, int cmd, int flags)
16008b07e49aSJulian Elischer {
16018b07e49aSJulian Elischer 	return (rtinit1(ifa, cmd, flags, -1));
16028b07e49aSJulian Elischer }
16038b07e49aSJulian Elischer 
16048b07e49aSJulian Elischer /*
16058b07e49aSJulian Elischer  * Set up a routing table entry, normally
16068b07e49aSJulian Elischer  * for an interface.
16078b07e49aSJulian Elischer  */
16088b07e49aSJulian Elischer int
16098b07e49aSJulian Elischer rtinit(struct ifaddr *ifa, int cmd, int flags)
16108b07e49aSJulian Elischer {
16118b07e49aSJulian Elischer 	struct sockaddr *dst;
16128b07e49aSJulian Elischer 	int fib = 0;
16138b07e49aSJulian Elischer 
16148b07e49aSJulian Elischer 	if (flags & RTF_HOST) {
16158b07e49aSJulian Elischer 		dst = ifa->ifa_dstaddr;
16168b07e49aSJulian Elischer 	} else {
16178b07e49aSJulian Elischer 		dst = ifa->ifa_addr;
16188b07e49aSJulian Elischer 	}
16198b07e49aSJulian Elischer 
1620b680a383SBjoern A. Zeeb 	switch (dst->sa_family) {
1621b680a383SBjoern A. Zeeb 	case AF_INET6:
1622b680a383SBjoern A. Zeeb 	case AF_INET:
1623b680a383SBjoern A. Zeeb 		/* We do support multiple FIBs. */
16248b07e49aSJulian Elischer 		fib = -1;
1625b680a383SBjoern A. Zeeb 		break;
1626b680a383SBjoern A. Zeeb 	}
16278b07e49aSJulian Elischer 	return (rtinit1(ifa, cmd, flags, fib));
16288b07e49aSJulian Elischer }
1629