xref: /freebsd/sys/net/route.c (revision da187ddb3d23836e0e079b6f7172a778a29a5732)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1980, 1986, 1991, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
3142e9e16dSRuslan Ermilov  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
32c3aac50fSPeter Wemm  * $FreeBSD$
33df8bae1dSRodney W. Grimes  */
348b07e49aSJulian Elischer /************************************************************************
358b07e49aSJulian Elischer  * Note: In this file a 'fib' is a "forwarding information base"	*
368b07e49aSJulian Elischer  * Which is the new name for an in kernel routing (next hop) table.	*
378b07e49aSJulian Elischer  ***********************************************************************/
38df8bae1dSRodney W. Grimes 
391d5e9e22SEivind Eklund #include "opt_inet.h"
40096f2786SBjoern A. Zeeb #include "opt_inet6.h"
414bd49128SPeter Wemm #include "opt_mrouting.h"
42e440aed9SQing Li #include "opt_mpath.h"
43d6e23cf0SMichael Tuexen #include "opt_route.h"
444bd49128SPeter Wemm 
45df8bae1dSRodney W. Grimes #include <sys/param.h>
46df8bae1dSRodney W. Grimes #include <sys/systm.h>
474d1d4912SBruce Evans #include <sys/malloc.h>
48df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
49df8bae1dSRodney W. Grimes #include <sys/socket.h>
508b07e49aSJulian Elischer #include <sys/sysctl.h>
513120b9d4SKip Macy #include <sys/syslog.h>
528b07e49aSJulian Elischer #include <sys/sysproto.h>
538b07e49aSJulian Elischer #include <sys/proc.h>
54df8bae1dSRodney W. Grimes #include <sys/domain.h>
5569104ebeSMichael Tuexen #include <sys/eventhandler.h>
56cb64988fSLuoqi Chen #include <sys/kernel.h>
5720efcfc6SAndrey V. Elsukov #include <sys/lock.h>
5820efcfc6SAndrey V. Elsukov #include <sys/rmlock.h>
59df8bae1dSRodney W. Grimes 
60df8bae1dSRodney W. Grimes #include <net/if.h>
6176039bc8SGleb Smirnoff #include <net/if_var.h>
626e6b3f7cSQing Li #include <net/if_dl.h>
63df8bae1dSRodney W. Grimes #include <net/route.h>
64*da187ddbSAlexander V. Chernikov #include <net/route/route_ctl.h>
65e7d8af4fSAlexander V. Chernikov #include <net/route/route_var.h>
66a6663252SAlexander V. Chernikov #include <net/route/nhop.h>
67a6663252SAlexander V. Chernikov #include <net/route/shared.h>
68530c0060SRobert Watson #include <net/vnet.h>
69df8bae1dSRodney W. Grimes 
70e440aed9SQing Li #ifdef RADIX_MPATH
71e440aed9SQing Li #include <net/radix_mpath.h>
72e440aed9SQing Li #endif
73e440aed9SQing Li 
74df8bae1dSRodney W. Grimes #include <netinet/in.h>
75b5e8ce9fSBruce Evans #include <netinet/ip_mroute.h>
76df8bae1dSRodney W. Grimes 
772dc1d581SAndre Oppermann #include <vm/uma.h>
782dc1d581SAndre Oppermann 
794871fc4aSJulian Elischer #define	RT_MAXFIBS	UINT16_MAX
80bfca216eSBjoern A. Zeeb 
81bfca216eSBjoern A. Zeeb /* Kernel config default option. */
82bfca216eSBjoern A. Zeeb #ifdef ROUTETABLES
83bfca216eSBjoern A. Zeeb #if ROUTETABLES <= 0
84bfca216eSBjoern A. Zeeb #error "ROUTETABLES defined too low"
85bfca216eSBjoern A. Zeeb #endif
86bfca216eSBjoern A. Zeeb #if ROUTETABLES > RT_MAXFIBS
87bfca216eSBjoern A. Zeeb #error "ROUTETABLES defined too big"
88bfca216eSBjoern A. Zeeb #endif
89bfca216eSBjoern A. Zeeb #define	RT_NUMFIBS	ROUTETABLES
90bfca216eSBjoern A. Zeeb #endif /* ROUTETABLES */
91bfca216eSBjoern A. Zeeb /* Initialize to default if not otherwise set. */
92bfca216eSBjoern A. Zeeb #ifndef	RT_NUMFIBS
93bfca216eSBjoern A. Zeeb #define	RT_NUMFIBS	1
94bfca216eSBjoern A. Zeeb #endif
95bfca216eSBjoern A. Zeeb 
964871fc4aSJulian Elischer /* This is read-only.. */
978b07e49aSJulian Elischer u_int rt_numfibs = RT_NUMFIBS;
98af3b2549SHans Petter Selasky SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, "");
998b07e49aSJulian Elischer 
10066e8505fSJulian Elischer /*
10166e8505fSJulian Elischer  * By default add routes to all fibs for new interfaces.
10266e8505fSJulian Elischer  * Once this is set to 0 then only allocate routes on interface
10366e8505fSJulian Elischer  * changes for the FIB of the caller when adding a new set of addresses
10466e8505fSJulian Elischer  * to an interface.  XXX this is a shotgun aproach to a problem that needs
10566e8505fSJulian Elischer  * a more fine grained solution.. that will come.
106a8498625SBjoern A. Zeeb  * XXX also has the problems getting the FIB from curthread which will not
107a8498625SBjoern A. Zeeb  * always work given the fib can be overridden and prefixes can be added
108a8498625SBjoern A. Zeeb  * from the network stack context.
10966e8505fSJulian Elischer  */
110ee0bd4b9SHiroki Sato VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1;
111ee0bd4b9SHiroki Sato SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
112ee0bd4b9SHiroki Sato     &VNET_NAME(rt_add_addr_allfibs), 0, "");
11366e8505fSJulian Elischer 
114a6663252SAlexander V. Chernikov VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat);
115185c3d2bSGleb Smirnoff 
116185c3d2bSGleb Smirnoff VNET_PCPUSTAT_SYSINIT(rtstat);
117185c3d2bSGleb Smirnoff #ifdef VIMAGE
118185c3d2bSGleb Smirnoff VNET_PCPUSTAT_SYSUNINIT(rtstat);
119185c3d2bSGleb Smirnoff #endif
120b58ea5f3SBjoern A. Zeeb 
12161eee0e2SAlexander V. Chernikov VNET_DEFINE(struct rib_head *, rt_tables);
12282cea7e6SBjoern A. Zeeb #define	V_rt_tables	VNET(rt_tables)
12382cea7e6SBjoern A. Zeeb 
124bfe1aba4SMarko Zec 
1254d2c2509SAlexander V. Chernikov VNET_DEFINE(uma_zone_t, rtzone);		/* Routing table UMA zone. */
12682cea7e6SBjoern A. Zeeb #define	V_rtzone	VNET(rtzone)
12782cea7e6SBjoern A. Zeeb 
128d6e23cf0SMichael Tuexen EVENTHANDLER_LIST_DEFINE(rt_addrmsg);
129d6e23cf0SMichael Tuexen 
130539642a2SAlexander V. Chernikov static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
131539642a2SAlexander V. Chernikov     void *arg);
1322bbab0afSAlexander V. Chernikov static void destroy_rtentry_epoch(epoch_context_t ctx);
1339a1b64d5SAlexander V. Chernikov static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info,
1349a1b64d5SAlexander V. Chernikov     int flags);
135c77462ddSAlexander V. Chernikov 
1368b07e49aSJulian Elischer /*
1378b07e49aSJulian Elischer  * handler for net.my_fibnum
1388b07e49aSJulian Elischer  */
1398b07e49aSJulian Elischer static int
1408b07e49aSJulian Elischer sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
141df8bae1dSRodney W. Grimes {
1428b07e49aSJulian Elischer         int fibnum;
1438b07e49aSJulian Elischer         int error;
1448b07e49aSJulian Elischer 
1458b07e49aSJulian Elischer         fibnum = curthread->td_proc->p_fibnum;
1468b07e49aSJulian Elischer         error = sysctl_handle_int(oidp, &fibnum, 0, req);
1478b07e49aSJulian Elischer         return (error);
148df8bae1dSRodney W. Grimes }
149df8bae1dSRodney W. Grimes 
1507029da5cSPawel Biernacki SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
1517029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
1527029da5cSPawel Biernacki     &sysctl_my_fibnum, "I",
1537029da5cSPawel Biernacki     "default FIB of caller");
1542dc1d581SAndre Oppermann 
15561eee0e2SAlexander V. Chernikov static __inline struct rib_head **
156c2c2a7c1SBjoern A. Zeeb rt_tables_get_rnh_ptr(int table, int fam)
157c2c2a7c1SBjoern A. Zeeb {
15861eee0e2SAlexander V. Chernikov 	struct rib_head **rnh;
159c2c2a7c1SBjoern A. Zeeb 
16034a5582cSAlexander V. Chernikov 	KASSERT(table >= 0 && table < rt_numfibs,
16134a5582cSAlexander V. Chernikov 	    ("%s: table out of bounds (0 <= %d < %d)", __func__, table,
16234a5582cSAlexander V. Chernikov 	     rt_numfibs));
16334a5582cSAlexander V. Chernikov 	KASSERT(fam >= 0 && fam < (AF_MAX + 1),
16434a5582cSAlexander V. Chernikov 	    ("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1));
165c2c2a7c1SBjoern A. Zeeb 
166c2c2a7c1SBjoern A. Zeeb 	/* rnh is [fib=0][af=0]. */
16761eee0e2SAlexander V. Chernikov 	rnh = (struct rib_head **)V_rt_tables;
168c2c2a7c1SBjoern A. Zeeb 	/* Get the offset to the requested table and fam. */
169c2c2a7c1SBjoern A. Zeeb 	rnh += table * (AF_MAX+1) + fam;
170c2c2a7c1SBjoern A. Zeeb 
171c2c2a7c1SBjoern A. Zeeb 	return (rnh);
172c2c2a7c1SBjoern A. Zeeb }
173c2c2a7c1SBjoern A. Zeeb 
17461eee0e2SAlexander V. Chernikov struct rib_head *
175c2c2a7c1SBjoern A. Zeeb rt_tables_get_rnh(int table, int fam)
176c2c2a7c1SBjoern A. Zeeb {
177c2c2a7c1SBjoern A. Zeeb 
178c2c2a7c1SBjoern A. Zeeb 	return (*rt_tables_get_rnh_ptr(table, fam));
179c2c2a7c1SBjoern A. Zeeb }
180c2c2a7c1SBjoern A. Zeeb 
1814f321dbdSBjoern A. Zeeb u_int
18284cc0778SGeorge V. Neville-Neil rt_tables_get_gen(int table, int fam)
18384cc0778SGeorge V. Neville-Neil {
18484cc0778SGeorge V. Neville-Neil 	struct rib_head *rnh;
18584cc0778SGeorge V. Neville-Neil 
18684cc0778SGeorge V. Neville-Neil 	rnh = *rt_tables_get_rnh_ptr(table, fam);
1876d768226SGeorge V. Neville-Neil 	KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d",
1886d768226SGeorge V. Neville-Neil 	    __func__, table, fam));
18984cc0778SGeorge V. Neville-Neil 	return (rnh->rnh_gen);
19084cc0778SGeorge V. Neville-Neil }
19184cc0778SGeorge V. Neville-Neil 
19284cc0778SGeorge V. Neville-Neil 
193d0728d71SRobert Watson /*
194d0728d71SRobert Watson  * route initialization must occur before ip6_init2(), which happenas at
195d0728d71SRobert Watson  * SI_ORDER_MIDDLE.
196d0728d71SRobert Watson  */
1972eb5613fSLuigi Rizzo static void
1982eb5613fSLuigi Rizzo route_init(void)
199df8bae1dSRodney W. Grimes {
2008b07e49aSJulian Elischer 
2016f95a5ebSJulian Elischer 	/* whack the tunable ints into  line. */
2028b07e49aSJulian Elischer 	if (rt_numfibs > RT_MAXFIBS)
2038b07e49aSJulian Elischer 		rt_numfibs = RT_MAXFIBS;
2048b07e49aSJulian Elischer 	if (rt_numfibs == 0)
2058b07e49aSJulian Elischer 		rt_numfibs = 1;
206a6663252SAlexander V. Chernikov 	nhops_init();
2071ed81b73SMarko Zec }
208891cf3edSEd Maste SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL);
2091ed81b73SMarko Zec 
210e3a7aa6fSGleb Smirnoff static int
211e3a7aa6fSGleb Smirnoff rtentry_zinit(void *mem, int size, int how)
212e3a7aa6fSGleb Smirnoff {
213e3a7aa6fSGleb Smirnoff 	struct rtentry *rt = mem;
214e3a7aa6fSGleb Smirnoff 
215e3a7aa6fSGleb Smirnoff 	RT_LOCK_INIT(rt);
216e3a7aa6fSGleb Smirnoff 
217e3a7aa6fSGleb Smirnoff 	return (0);
218e3a7aa6fSGleb Smirnoff }
219e3a7aa6fSGleb Smirnoff 
220e3a7aa6fSGleb Smirnoff static void
221e3a7aa6fSGleb Smirnoff rtentry_zfini(void *mem, int size)
222e3a7aa6fSGleb Smirnoff {
223e3a7aa6fSGleb Smirnoff 	struct rtentry *rt = mem;
224e3a7aa6fSGleb Smirnoff 
225e3a7aa6fSGleb Smirnoff 	RT_LOCK_DESTROY(rt);
226e3a7aa6fSGleb Smirnoff }
227e3a7aa6fSGleb Smirnoff 
228e3a7aa6fSGleb Smirnoff static int
229e3a7aa6fSGleb Smirnoff rtentry_ctor(void *mem, int size, void *arg, int how)
230e3a7aa6fSGleb Smirnoff {
231e3a7aa6fSGleb Smirnoff 	struct rtentry *rt = mem;
232e3a7aa6fSGleb Smirnoff 
233e3a7aa6fSGleb Smirnoff 	bzero(rt, offsetof(struct rtentry, rt_endzero));
234e8b0643eSAlexander V. Chernikov 	rt->rt_chain = NULL;
235e3a7aa6fSGleb Smirnoff 
236e3a7aa6fSGleb Smirnoff 	return (0);
237e3a7aa6fSGleb Smirnoff }
238e3a7aa6fSGleb Smirnoff 
239d0728d71SRobert Watson static void
240256ea2abSGleb Smirnoff rtentry_dtor(void *mem, int size, void *arg)
241256ea2abSGleb Smirnoff {
242256ea2abSGleb Smirnoff 	struct rtentry *rt = mem;
243256ea2abSGleb Smirnoff 
244256ea2abSGleb Smirnoff 	RT_UNLOCK_COND(rt);
245256ea2abSGleb Smirnoff }
246256ea2abSGleb Smirnoff 
247256ea2abSGleb Smirnoff static void
248d0728d71SRobert Watson vnet_route_init(const void *unused __unused)
2491ed81b73SMarko Zec {
2501ed81b73SMarko Zec 	struct domain *dom;
25161eee0e2SAlexander V. Chernikov 	struct rib_head **rnh;
252c2c2a7c1SBjoern A. Zeeb 	int table;
2531ed81b73SMarko Zec 	int fam;
2541ed81b73SMarko Zec 
255c2c2a7c1SBjoern A. Zeeb 	V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
25661eee0e2SAlexander V. Chernikov 	    sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO);
257c2c2a7c1SBjoern A. Zeeb 
258e3a7aa6fSGleb Smirnoff 	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
259256ea2abSGleb Smirnoff 	    rtentry_ctor, rtentry_dtor,
260e3a7aa6fSGleb Smirnoff 	    rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
2618b07e49aSJulian Elischer 	for (dom = domains; dom; dom = dom->dom_next) {
262b680a383SBjoern A. Zeeb 		if (dom->dom_rtattach == NULL)
263b680a383SBjoern A. Zeeb 			continue;
264b680a383SBjoern A. Zeeb 
2658b07e49aSJulian Elischer 		for  (table = 0; table < rt_numfibs; table++) {
266b680a383SBjoern A. Zeeb 			fam = dom->dom_family;
267b680a383SBjoern A. Zeeb 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
268b680a383SBjoern A. Zeeb 				break;
269b680a383SBjoern A. Zeeb 
270c2c2a7c1SBjoern A. Zeeb 			rnh = rt_tables_get_rnh_ptr(table, fam);
271c2c2a7c1SBjoern A. Zeeb 			if (rnh == NULL)
272c2c2a7c1SBjoern A. Zeeb 				panic("%s: rnh NULL", __func__);
273ead85fe4SAlexander V. Chernikov 			dom->dom_rtattach((void **)rnh, 0, table);
2748b07e49aSJulian Elischer 		}
2758b07e49aSJulian Elischer 	}
2768b07e49aSJulian Elischer }
277d0728d71SRobert Watson VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
278d0728d71SRobert Watson     vnet_route_init, 0);
2798b07e49aSJulian Elischer 
280bc29160dSMarko Zec #ifdef VIMAGE
281d0728d71SRobert Watson static void
282d0728d71SRobert Watson vnet_route_uninit(const void *unused __unused)
283bc29160dSMarko Zec {
284bc29160dSMarko Zec 	int table;
285bc29160dSMarko Zec 	int fam;
286bc29160dSMarko Zec 	struct domain *dom;
28761eee0e2SAlexander V. Chernikov 	struct rib_head **rnh;
288bc29160dSMarko Zec 
289bc29160dSMarko Zec 	for (dom = domains; dom; dom = dom->dom_next) {
290b680a383SBjoern A. Zeeb 		if (dom->dom_rtdetach == NULL)
291b680a383SBjoern A. Zeeb 			continue;
292b680a383SBjoern A. Zeeb 
293bc29160dSMarko Zec 		for (table = 0; table < rt_numfibs; table++) {
294b680a383SBjoern A. Zeeb 			fam = dom->dom_family;
295b680a383SBjoern A. Zeeb 
296b680a383SBjoern A. Zeeb 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
297b680a383SBjoern A. Zeeb 				break;
298b680a383SBjoern A. Zeeb 
299bc29160dSMarko Zec 			rnh = rt_tables_get_rnh_ptr(table, fam);
300bc29160dSMarko Zec 			if (rnh == NULL)
301bc29160dSMarko Zec 				panic("%s: rnh NULL", __func__);
30257c3556bSAlexander V. Chernikov 			dom->dom_rtdetach((void **)rnh, 0);
303bc29160dSMarko Zec 		}
304bc29160dSMarko Zec 	}
3056274ce3eSCraig Rodrigues 
3062bbab0afSAlexander V. Chernikov 	/*
3072bbab0afSAlexander V. Chernikov 	 * dom_rtdetach calls rt_table_destroy(), which
3082bbab0afSAlexander V. Chernikov 	 *  schedules deletion for all rtentries, nexthops and control
3092bbab0afSAlexander V. Chernikov 	 *  structures. Wait for the destruction callbacks to fire.
3102bbab0afSAlexander V. Chernikov 	 * Note that this should result in freeing all rtentries, but
3112bbab0afSAlexander V. Chernikov 	 *  nexthops deletions will be scheduled for the next epoch run
3122bbab0afSAlexander V. Chernikov 	 *  and will be completed after vnet teardown.
3132bbab0afSAlexander V. Chernikov 	 */
3142bbab0afSAlexander V. Chernikov 	epoch_drain_callbacks(net_epoch_preempt);
3152bbab0afSAlexander V. Chernikov 
3166274ce3eSCraig Rodrigues 	free(V_rt_tables, M_RTABLE);
3176274ce3eSCraig Rodrigues 	uma_zdestroy(V_rtzone);
318bc29160dSMarko Zec }
31989856f7eSBjoern A. Zeeb VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
320d0728d71SRobert Watson     vnet_route_uninit, 0);
321bc29160dSMarko Zec #endif
322bc29160dSMarko Zec 
32361eee0e2SAlexander V. Chernikov struct rib_head *
324ead85fe4SAlexander V. Chernikov rt_table_init(int offset, int family, u_int fibnum)
32561eee0e2SAlexander V. Chernikov {
32661eee0e2SAlexander V. Chernikov 	struct rib_head *rh;
32761eee0e2SAlexander V. Chernikov 
32861eee0e2SAlexander V. Chernikov 	rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO);
32961eee0e2SAlexander V. Chernikov 
33061eee0e2SAlexander V. Chernikov 	/* TODO: These details should be hidded inside radix.c */
33161eee0e2SAlexander V. Chernikov 	/* Init masks tree */
33261eee0e2SAlexander V. Chernikov 	rn_inithead_internal(&rh->head, rh->rnh_nodes, offset);
33361eee0e2SAlexander V. Chernikov 	rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0);
33461eee0e2SAlexander V. Chernikov 	rh->head.rnh_masks = &rh->rmhead;
33561eee0e2SAlexander V. Chernikov 
336ead85fe4SAlexander V. Chernikov 	/* Save metadata associated with this routing table. */
337ead85fe4SAlexander V. Chernikov 	rh->rib_family = family;
338ead85fe4SAlexander V. Chernikov 	rh->rib_fibnum = fibnum;
339ead85fe4SAlexander V. Chernikov #ifdef VIMAGE
340ead85fe4SAlexander V. Chernikov 	rh->rib_vnet = curvnet;
341ead85fe4SAlexander V. Chernikov #endif
342ead85fe4SAlexander V. Chernikov 
34334a5582cSAlexander V. Chernikov 	tmproutes_init(rh);
34434a5582cSAlexander V. Chernikov 
34561eee0e2SAlexander V. Chernikov 	/* Init locks */
346abe95d87SAndrey V. Elsukov 	RIB_LOCK_INIT(rh);
34761eee0e2SAlexander V. Chernikov 
348a6663252SAlexander V. Chernikov 	nhops_init_rib(rh);
349a6663252SAlexander V. Chernikov 
350*da187ddbSAlexander V. Chernikov 	/* Init subscription system */
351*da187ddbSAlexander V. Chernikov 	CK_STAILQ_INIT(&rh->rnh_subscribers);
352*da187ddbSAlexander V. Chernikov 
35361eee0e2SAlexander V. Chernikov 	/* Finally, set base callbacks */
35461eee0e2SAlexander V. Chernikov 	rh->rnh_addaddr = rn_addroute;
35561eee0e2SAlexander V. Chernikov 	rh->rnh_deladdr = rn_delete;
35661eee0e2SAlexander V. Chernikov 	rh->rnh_matchaddr = rn_match;
35761eee0e2SAlexander V. Chernikov 	rh->rnh_lookup = rn_lookup;
35861eee0e2SAlexander V. Chernikov 	rh->rnh_walktree = rn_walktree;
35961eee0e2SAlexander V. Chernikov 	rh->rnh_walktree_from = rn_walktree_from;
36061eee0e2SAlexander V. Chernikov 
36161eee0e2SAlexander V. Chernikov 	return (rh);
36261eee0e2SAlexander V. Chernikov }
36361eee0e2SAlexander V. Chernikov 
364a5243af2SBjoern A. Zeeb static int
365a5243af2SBjoern A. Zeeb rt_freeentry(struct radix_node *rn, void *arg)
366a5243af2SBjoern A. Zeeb {
367a5243af2SBjoern A. Zeeb 	struct radix_head * const rnh = arg;
368a5243af2SBjoern A. Zeeb 	struct radix_node *x;
369a5243af2SBjoern A. Zeeb 
370a5243af2SBjoern A. Zeeb 	x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
371a5243af2SBjoern A. Zeeb 	if (x != NULL)
372a5243af2SBjoern A. Zeeb 		R_Free(x);
373a5243af2SBjoern A. Zeeb 	return (0);
374a5243af2SBjoern A. Zeeb }
375a5243af2SBjoern A. Zeeb 
37661eee0e2SAlexander V. Chernikov void
37761eee0e2SAlexander V. Chernikov rt_table_destroy(struct rib_head *rh)
37861eee0e2SAlexander V. Chernikov {
37961eee0e2SAlexander V. Chernikov 
38034a5582cSAlexander V. Chernikov 	tmproutes_destroy(rh);
38134a5582cSAlexander V. Chernikov 
382a5243af2SBjoern A. Zeeb 	rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
383a5243af2SBjoern A. Zeeb 
384a6663252SAlexander V. Chernikov 	nhops_destroy_rib(rh);
385a6663252SAlexander V. Chernikov 
38661eee0e2SAlexander V. Chernikov 	/* Assume table is already empty */
387abe95d87SAndrey V. Elsukov 	RIB_LOCK_DESTROY(rh);
38861eee0e2SAlexander V. Chernikov 	free(rh, M_RTABLE);
38961eee0e2SAlexander V. Chernikov }
39061eee0e2SAlexander V. Chernikov 
39161eee0e2SAlexander V. Chernikov 
3928b07e49aSJulian Elischer #ifndef _SYS_SYSPROTO_H_
3938b07e49aSJulian Elischer struct setfib_args {
3948b07e49aSJulian Elischer 	int     fibnum;
3958b07e49aSJulian Elischer };
3968b07e49aSJulian Elischer #endif
3978b07e49aSJulian Elischer int
3988451d0ddSKip Macy sys_setfib(struct thread *td, struct setfib_args *uap)
3998b07e49aSJulian Elischer {
4008b07e49aSJulian Elischer 	if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
4018b07e49aSJulian Elischer 		return EINVAL;
4028b07e49aSJulian Elischer 	td->td_proc->p_fibnum = uap->fibnum;
4038b07e49aSJulian Elischer 	return (0);
404df8bae1dSRodney W. Grimes }
405df8bae1dSRodney W. Grimes 
406df8bae1dSRodney W. Grimes /*
407499676dfSJulian Elischer  * Remove a reference count from an rtentry.
408499676dfSJulian Elischer  * If the count gets low enough, take it out of the routing table
409499676dfSJulian Elischer  */
410df8bae1dSRodney W. Grimes void
411d1dd20beSSam Leffler rtfree(struct rtentry *rt)
412df8bae1dSRodney W. Grimes {
413df8bae1dSRodney W. Grimes 
414a0c0e34bSGleb Smirnoff 	KASSERT(rt != NULL,("%s: NULL rt", __func__));
415499676dfSJulian Elischer 
416d1dd20beSSam Leffler 	RT_LOCK_ASSERT(rt);
417d1dd20beSSam Leffler 
418cb86ca48SAlexander V. Chernikov 	RT_UNLOCK(rt);
4192bbab0afSAlexander V. Chernikov 	epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
4202bbab0afSAlexander V. Chernikov 	    &rt->rt_epoch_ctx);
421df8bae1dSRodney W. Grimes }
422df8bae1dSRodney W. Grimes 
4232bbab0afSAlexander V. Chernikov static void
4242bbab0afSAlexander V. Chernikov destroy_rtentry(struct rtentry *rt)
425dd4776f0SAlexander V. Chernikov {
426dd4776f0SAlexander V. Chernikov 
4272bbab0afSAlexander V. Chernikov 	/*
4282bbab0afSAlexander V. Chernikov 	 * At this moment rnh, nh_control may be already freed.
4292bbab0afSAlexander V. Chernikov 	 * nhop interface may have been migrated to a different vnet.
4302bbab0afSAlexander V. Chernikov 	 * Use vnet stored in the nexthop to delete the entry.
4312bbab0afSAlexander V. Chernikov 	 */
4322bbab0afSAlexander V. Chernikov 	CURVNET_SET(nhop_get_vnet(rt->rt_nhop));
4332bbab0afSAlexander V. Chernikov 
4342bbab0afSAlexander V. Chernikov 	/* Unreference nexthop */
4352bbab0afSAlexander V. Chernikov 	nhop_free(rt->rt_nhop);
4362bbab0afSAlexander V. Chernikov 
4372bbab0afSAlexander V. Chernikov 	uma_zfree(V_rtzone, rt);
4382bbab0afSAlexander V. Chernikov 
4392bbab0afSAlexander V. Chernikov 	CURVNET_RESTORE();
4402bbab0afSAlexander V. Chernikov }
4412bbab0afSAlexander V. Chernikov 
4422bbab0afSAlexander V. Chernikov /*
4432bbab0afSAlexander V. Chernikov  * Epoch callback indicating rtentry is safe to destroy
4442bbab0afSAlexander V. Chernikov  */
4452bbab0afSAlexander V. Chernikov static void
4462bbab0afSAlexander V. Chernikov destroy_rtentry_epoch(epoch_context_t ctx)
4472bbab0afSAlexander V. Chernikov {
4482bbab0afSAlexander V. Chernikov 	struct rtentry *rt;
4492bbab0afSAlexander V. Chernikov 
4502bbab0afSAlexander V. Chernikov 	rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);
4512bbab0afSAlexander V. Chernikov 
4522bbab0afSAlexander V. Chernikov 	destroy_rtentry(rt);
453dd4776f0SAlexander V. Chernikov }
454dd4776f0SAlexander V. Chernikov 
455dd4776f0SAlexander V. Chernikov /*
45634a5582cSAlexander V. Chernikov  * Adds a temporal redirect entry to the routing table.
45734a5582cSAlexander V. Chernikov  * @fibnum: fib number
45834a5582cSAlexander V. Chernikov  * @dst: destination to install redirect to
45934a5582cSAlexander V. Chernikov  * @gateway: gateway to go via
46034a5582cSAlexander V. Chernikov  * @author: sockaddr of originating router, can be NULL
46134a5582cSAlexander V. Chernikov  * @ifp: interface to use for the redirected route
46234a5582cSAlexander V. Chernikov  * @flags: set of flags to add. Allowed: RTF_GATEWAY
46334a5582cSAlexander V. Chernikov  * @lifetime_sec: time in seconds to expire this redirect.
46434a5582cSAlexander V. Chernikov  *
46534a5582cSAlexander V. Chernikov  * Retuns 0 on success, errno otherwise.
466df8bae1dSRodney W. Grimes  */
46734a5582cSAlexander V. Chernikov int
46834a5582cSAlexander V. Chernikov rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
46934a5582cSAlexander V. Chernikov     struct sockaddr *author, struct ifnet *ifp, int flags, int lifetime_sec)
4708b07e49aSJulian Elischer {
471f2b2e77aSAlexander V. Chernikov 	struct rtentry *rt;
47234a5582cSAlexander V. Chernikov 	int error;
473df8bae1dSRodney W. Grimes 	struct rt_addrinfo info;
47434a5582cSAlexander V. Chernikov 	struct rt_metrics rti_rmx;
475df8bae1dSRodney W. Grimes 	struct ifaddr *ifa;
476c2c2a7c1SBjoern A. Zeeb 
477b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
478b8a6e03fSGleb Smirnoff 
47934a5582cSAlexander V. Chernikov 	if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL)
48034a5582cSAlexander V. Chernikov 		return (EAFNOSUPPORT);
4818e7e854cSKip Macy 
48234a5582cSAlexander V. Chernikov 	/* Verify the allowed flag mask. */
48334a5582cSAlexander V. Chernikov 	KASSERT(((flags & ~(RTF_GATEWAY)) == 0),
48434a5582cSAlexander V. Chernikov 	    ("invalid redirect flags: %x", flags));
48534a5582cSAlexander V. Chernikov 
48634a5582cSAlexander V. Chernikov 	/* Get the best ifa for the given interface and gateway. */
48734a5582cSAlexander V. Chernikov 	if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL)
48834a5582cSAlexander V. Chernikov 		return (ENETUNREACH);
48934a5582cSAlexander V. Chernikov 	ifa_ref(ifa);
49034a5582cSAlexander V. Chernikov 
49134a5582cSAlexander V. Chernikov 	bzero(&info, sizeof(info));
4928071913dSRuslan Ermilov 	info.rti_info[RTAX_DST] = dst;
4938071913dSRuslan Ermilov 	info.rti_info[RTAX_GATEWAY] = gateway;
4948071913dSRuslan Ermilov 	info.rti_ifa = ifa;
49534a5582cSAlexander V. Chernikov 	info.rti_ifp = ifp;
496ea277332SAlexander V. Chernikov 	info.rti_flags = flags | RTF_HOST | RTF_DYNAMIC;
49734a5582cSAlexander V. Chernikov 
49834a5582cSAlexander V. Chernikov 	/* Setup route metrics to define expire time. */
49934a5582cSAlexander V. Chernikov 	bzero(&rti_rmx, sizeof(rti_rmx));
50034a5582cSAlexander V. Chernikov 	/* Set expire time as absolute. */
50134a5582cSAlexander V. Chernikov 	rti_rmx.rmx_expire = lifetime_sec + time_second;
50234a5582cSAlexander V. Chernikov 	info.rti_mflags |= RTV_EXPIRE;
50334a5582cSAlexander V. Chernikov 	info.rti_rmx = &rti_rmx;
50434a5582cSAlexander V. Chernikov 
5058b07e49aSJulian Elischer 	error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
50634a5582cSAlexander V. Chernikov 	ifa_free(ifa);
50734a5582cSAlexander V. Chernikov 
50834a5582cSAlexander V. Chernikov 	if (error != 0) {
50934a5582cSAlexander V. Chernikov 		/* TODO: add per-fib redirect stats. */
51034a5582cSAlexander V. Chernikov 		return (error);
51134a5582cSAlexander V. Chernikov 	}
51234a5582cSAlexander V. Chernikov 
5134de5d90cSSam Leffler 	RT_LOCK(rt);
5148071913dSRuslan Ermilov 	flags = rt->rt_flags;
5152bbab0afSAlexander V. Chernikov 	RT_UNLOCK(rt);
51634a5582cSAlexander V. Chernikov 
51734a5582cSAlexander V. Chernikov 	RTSTAT_INC(rts_dynamic);
51834a5582cSAlexander V. Chernikov 
51934a5582cSAlexander V. Chernikov 	/* Send notification of a route addition to userland. */
52034a5582cSAlexander V. Chernikov 	bzero(&info, sizeof(info));
521df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_DST] = dst;
522df8bae1dSRodney W. Grimes 	info.rti_info[RTAX_GATEWAY] = gateway;
52334a5582cSAlexander V. Chernikov 	info.rti_info[RTAX_AUTHOR] = author;
524528737fdSBjoern A. Zeeb 	rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
52534a5582cSAlexander V. Chernikov 
52634a5582cSAlexander V. Chernikov 	return (0);
527df8bae1dSRodney W. Grimes }
528df8bae1dSRodney W. Grimes 
529df8bae1dSRodney W. Grimes /*
530df8bae1dSRodney W. Grimes  * Routing table ioctl interface.
531df8bae1dSRodney W. Grimes  */
532df8bae1dSRodney W. Grimes int
5338b07e49aSJulian Elischer rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
534df8bae1dSRodney W. Grimes {
5355090559bSChristian S.J. Peron 
5365090559bSChristian S.J. Peron 	/*
5375090559bSChristian S.J. Peron 	 * If more ioctl commands are added here, make sure the proper
5385090559bSChristian S.J. Peron 	 * super-user checks are being performed because it is possible for
5395090559bSChristian S.J. Peron 	 * prison-root to make it this far if raw sockets have been enabled
5405090559bSChristian S.J. Peron 	 * in jails.
5415090559bSChristian S.J. Peron 	 */
542623ae52eSPoul-Henning Kamp #ifdef INET
543f0068c4aSGarrett Wollman 	/* Multicast goop, grrr... */
5448b07e49aSJulian Elischer 	return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
545623ae52eSPoul-Henning Kamp #else /* INET */
546623ae52eSPoul-Henning Kamp 	return ENXIO;
547623ae52eSPoul-Henning Kamp #endif /* INET */
548df8bae1dSRodney W. Grimes }
549df8bae1dSRodney W. Grimes 
550df8bae1dSRodney W. Grimes struct ifaddr *
5514d2c2509SAlexander V. Chernikov ifa_ifwithroute(int flags, const struct sockaddr *dst,
5524d2c2509SAlexander V. Chernikov     const struct sockaddr *gateway, u_int fibnum)
5538b07e49aSJulian Elischer {
554f59c6cb0SAlexander V. Chernikov 	struct ifaddr *ifa;
555d1dd20beSSam Leffler 
55697168be8SGleb Smirnoff 	NET_EPOCH_ASSERT();
557df8bae1dSRodney W. Grimes 	if ((flags & RTF_GATEWAY) == 0) {
558df8bae1dSRodney W. Grimes 		/*
559df8bae1dSRodney W. Grimes 		 * If we are adding a route to an interface,
560df8bae1dSRodney W. Grimes 		 * and the interface is a pt to pt link
561df8bae1dSRodney W. Grimes 		 * we should search for the destination
562df8bae1dSRodney W. Grimes 		 * as our clue to the interface.  Otherwise
563df8bae1dSRodney W. Grimes 		 * we can use the local address.
564df8bae1dSRodney W. Grimes 		 */
56585911824SLuigi Rizzo 		ifa = NULL;
56685911824SLuigi Rizzo 		if (flags & RTF_HOST)
5674f8585e0SAlan Somers 			ifa = ifa_ifwithdstaddr(dst, fibnum);
56885911824SLuigi Rizzo 		if (ifa == NULL)
569df8bae1dSRodney W. Grimes 			ifa = ifa_ifwithaddr(gateway);
570df8bae1dSRodney W. Grimes 	} else {
571df8bae1dSRodney W. Grimes 		/*
572df8bae1dSRodney W. Grimes 		 * If we are adding a route to a remote net
573df8bae1dSRodney W. Grimes 		 * or host, the gateway may still be on the
574df8bae1dSRodney W. Grimes 		 * other end of a pt to pt link.
575df8bae1dSRodney W. Grimes 		 */
5764f8585e0SAlan Somers 		ifa = ifa_ifwithdstaddr(gateway, fibnum);
577df8bae1dSRodney W. Grimes 	}
57885911824SLuigi Rizzo 	if (ifa == NULL)
5794f8585e0SAlan Somers 		ifa = ifa_ifwithnet(gateway, 0, fibnum);
58085911824SLuigi Rizzo 	if (ifa == NULL) {
581682b902dSAlexander V. Chernikov 		struct nhop_object *nh;
582b83aa367SAndrey V. Elsukov 
583682b902dSAlexander V. Chernikov 		nh = rib_lookup(fibnum, gateway, NHR_NONE, 0);
584682b902dSAlexander V. Chernikov 
585e034e82cSQing Li 		/*
586e034e82cSQing Li 		 * dismiss a gateway that is reachable only
587e034e82cSQing Li 		 * through the default router
588e034e82cSQing Li 		 */
589682b902dSAlexander V. Chernikov 		if ((nh == NULL) || (nh->nh_flags & NHF_DEFAULT))
590682b902dSAlexander V. Chernikov 			return (NULL);
591682b902dSAlexander V. Chernikov 		ifa = nh->nh_ifa;
592df8bae1dSRodney W. Grimes 	}
593df8bae1dSRodney W. Grimes 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
594df8bae1dSRodney W. Grimes 		struct ifaddr *oifa = ifa;
595df8bae1dSRodney W. Grimes 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
59685911824SLuigi Rizzo 		if (ifa == NULL)
597df8bae1dSRodney W. Grimes 			ifa = oifa;
598df8bae1dSRodney W. Grimes 	}
599682b902dSAlexander V. Chernikov 
600df8bae1dSRodney W. Grimes 	return (ifa);
601df8bae1dSRodney W. Grimes }
602df8bae1dSRodney W. Grimes 
603b0a76b88SJulian Elischer /*
604b0a76b88SJulian Elischer  * Do appropriate manipulations of a routing tree given
605b0a76b88SJulian Elischer  * all the bits of info needed
606b0a76b88SJulian Elischer  */
607df8bae1dSRodney W. Grimes int
6088b07e49aSJulian Elischer rtrequest_fib(int req,
6098b07e49aSJulian Elischer 	struct sockaddr *dst,
6108b07e49aSJulian Elischer 	struct sockaddr *gateway,
6118b07e49aSJulian Elischer 	struct sockaddr *netmask,
6128b07e49aSJulian Elischer 	int flags,
6138b07e49aSJulian Elischer 	struct rtentry **ret_nrt,
6148b07e49aSJulian Elischer 	u_int fibnum)
6158b07e49aSJulian Elischer {
6168071913dSRuslan Ermilov 	struct rt_addrinfo info;
6178071913dSRuslan Ermilov 
618ac4a76ebSBjoern A. Zeeb 	if (dst->sa_len == 0)
619ac4a76ebSBjoern A. Zeeb 		return(EINVAL);
620ac4a76ebSBjoern A. Zeeb 
6218071913dSRuslan Ermilov 	bzero((caddr_t)&info, sizeof(info));
6228071913dSRuslan Ermilov 	info.rti_flags = flags;
6238071913dSRuslan Ermilov 	info.rti_info[RTAX_DST] = dst;
6248071913dSRuslan Ermilov 	info.rti_info[RTAX_GATEWAY] = gateway;
6258071913dSRuslan Ermilov 	info.rti_info[RTAX_NETMASK] = netmask;
6268b07e49aSJulian Elischer 	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
6278071913dSRuslan Ermilov }
6288071913dSRuslan Ermilov 
6294bdf0b6aSAlexander V. Chernikov 
6302caee4beSAlexander V. Chernikov /*
6319a1b64d5SAlexander V. Chernikov  * Copy most of @rt data into @info.
6329a1b64d5SAlexander V. Chernikov  *
6339a1b64d5SAlexander V. Chernikov  * If @flags contains NHR_COPY, copies dst,netmask and gw to the
6349a1b64d5SAlexander V. Chernikov  * pointers specified by @info structure. Assume such pointers
6359a1b64d5SAlexander V. Chernikov  * are zeroed sockaddr-like structures with sa_len field initialized
6369a1b64d5SAlexander V. Chernikov  * to reflect size of the provided buffer. if no NHR_COPY is specified,
6379a1b64d5SAlexander V. Chernikov  * point dst,netmask and gw @info fields to appropriate @rt values.
6389a1b64d5SAlexander V. Chernikov  *
6396b5d8e30SMark Johnston  * if @flags contains NHR_REF, do refcouting on rt_ifp and rt_ifa.
6409a1b64d5SAlexander V. Chernikov  *
6419a1b64d5SAlexander V. Chernikov  * Returns 0 on success.
6429a1b64d5SAlexander V. Chernikov  */
6439a1b64d5SAlexander V. Chernikov int
6449a1b64d5SAlexander V. Chernikov rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
6459a1b64d5SAlexander V. Chernikov {
6469a1b64d5SAlexander V. Chernikov 	struct rt_metrics *rmx;
6479a1b64d5SAlexander V. Chernikov 	struct sockaddr *src, *dst;
648aaad3c4fSAlexander V. Chernikov 	struct nhop_object *nh;
6499a1b64d5SAlexander V. Chernikov 	int sa_len;
6509a1b64d5SAlexander V. Chernikov 
6519a1b64d5SAlexander V. Chernikov 	if (flags & NHR_COPY) {
6529a1b64d5SAlexander V. Chernikov 		/* Copy destination if dst is non-zero */
6539a1b64d5SAlexander V. Chernikov 		src = rt_key(rt);
6549a1b64d5SAlexander V. Chernikov 		dst = info->rti_info[RTAX_DST];
6559a1b64d5SAlexander V. Chernikov 		sa_len = src->sa_len;
65616703ea8SAlexander V. Chernikov 		if (dst != NULL) {
6579a1b64d5SAlexander V. Chernikov 			if (src->sa_len > dst->sa_len)
6589a1b64d5SAlexander V. Chernikov 				return (ENOMEM);
6599a1b64d5SAlexander V. Chernikov 			memcpy(dst, src, src->sa_len);
6609a1b64d5SAlexander V. Chernikov 			info->rti_addrs |= RTA_DST;
6619a1b64d5SAlexander V. Chernikov 		}
6629a1b64d5SAlexander V. Chernikov 
6639a1b64d5SAlexander V. Chernikov 		/* Copy mask if set && dst is non-zero */
6649a1b64d5SAlexander V. Chernikov 		src = rt_mask(rt);
6659a1b64d5SAlexander V. Chernikov 		dst = info->rti_info[RTAX_NETMASK];
6669a1b64d5SAlexander V. Chernikov 		if (src != NULL && dst != NULL) {
6679a1b64d5SAlexander V. Chernikov 
6689a1b64d5SAlexander V. Chernikov 			/*
6699a1b64d5SAlexander V. Chernikov 			 * Radix stores different value in sa_len,
6709a1b64d5SAlexander V. Chernikov 			 * assume rt_mask() to have the same length
6719a1b64d5SAlexander V. Chernikov 			 * as rt_key()
6729a1b64d5SAlexander V. Chernikov 			 */
6739a1b64d5SAlexander V. Chernikov 			if (sa_len > dst->sa_len)
6749a1b64d5SAlexander V. Chernikov 				return (ENOMEM);
6759a1b64d5SAlexander V. Chernikov 			memcpy(dst, src, src->sa_len);
6769a1b64d5SAlexander V. Chernikov 			info->rti_addrs |= RTA_NETMASK;
6779a1b64d5SAlexander V. Chernikov 		}
6789a1b64d5SAlexander V. Chernikov 
6799a1b64d5SAlexander V. Chernikov 		/* Copy gateway is set && dst is non-zero */
680aaad3c4fSAlexander V. Chernikov 		src = &rt->rt_nhop->gw_sa;
6819a1b64d5SAlexander V. Chernikov 		dst = info->rti_info[RTAX_GATEWAY];
6829a1b64d5SAlexander V. Chernikov 		if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){
6839a1b64d5SAlexander V. Chernikov 			if (src->sa_len > dst->sa_len)
6849a1b64d5SAlexander V. Chernikov 				return (ENOMEM);
6859a1b64d5SAlexander V. Chernikov 			memcpy(dst, src, src->sa_len);
6869a1b64d5SAlexander V. Chernikov 			info->rti_addrs |= RTA_GATEWAY;
6879a1b64d5SAlexander V. Chernikov 		}
6889a1b64d5SAlexander V. Chernikov 	} else {
6899a1b64d5SAlexander V. Chernikov 		info->rti_info[RTAX_DST] = rt_key(rt);
6909a1b64d5SAlexander V. Chernikov 		info->rti_addrs |= RTA_DST;
6919a1b64d5SAlexander V. Chernikov 		if (rt_mask(rt) != NULL) {
6929a1b64d5SAlexander V. Chernikov 			info->rti_info[RTAX_NETMASK] = rt_mask(rt);
6939a1b64d5SAlexander V. Chernikov 			info->rti_addrs |= RTA_NETMASK;
6949a1b64d5SAlexander V. Chernikov 		}
6959a1b64d5SAlexander V. Chernikov 		if (rt->rt_flags & RTF_GATEWAY) {
696aaad3c4fSAlexander V. Chernikov 			info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
6979a1b64d5SAlexander V. Chernikov 			info->rti_addrs |= RTA_GATEWAY;
6989a1b64d5SAlexander V. Chernikov 		}
6999a1b64d5SAlexander V. Chernikov 	}
7009a1b64d5SAlexander V. Chernikov 
701aaad3c4fSAlexander V. Chernikov 	nh = rt->rt_nhop;
7029a1b64d5SAlexander V. Chernikov 	rmx = info->rti_rmx;
7039a1b64d5SAlexander V. Chernikov 	if (rmx != NULL) {
7049a1b64d5SAlexander V. Chernikov 		info->rti_mflags |= RTV_MTU;
705aaad3c4fSAlexander V. Chernikov 		rmx->rmx_mtu = nh->nh_mtu;
7069a1b64d5SAlexander V. Chernikov 	}
7079a1b64d5SAlexander V. Chernikov 
708aaad3c4fSAlexander V. Chernikov 	info->rti_flags = rt->rt_flags | nhop_get_rtflags(nh);
709aaad3c4fSAlexander V. Chernikov 	info->rti_ifp = nh->nh_ifp;
710aaad3c4fSAlexander V. Chernikov 	info->rti_ifa = nh->nh_ifa;
7119a1b64d5SAlexander V. Chernikov 	if (flags & NHR_REF) {
7129a1b64d5SAlexander V. Chernikov 		if_ref(info->rti_ifp);
7136b5d8e30SMark Johnston 		ifa_ref(info->rti_ifa);
7149a1b64d5SAlexander V. Chernikov 	}
7159a1b64d5SAlexander V. Chernikov 
7169a1b64d5SAlexander V. Chernikov 	return (0);
7179a1b64d5SAlexander V. Chernikov }
7189a1b64d5SAlexander V. Chernikov 
7199a1b64d5SAlexander V. Chernikov /*
7209a1b64d5SAlexander V. Chernikov  * Lookups up route entry for @dst in RIB database for fib @fibnum.
7219a1b64d5SAlexander V. Chernikov  * Exports entry data to @info using rt_exportinfo().
7229a1b64d5SAlexander V. Chernikov  *
7236b5d8e30SMark Johnston  * If @flags contains NHR_REF, refcouting is performed on rt_ifp and rt_ifa.
7246b5d8e30SMark Johnston  * All references can be released later by calling rib_free_info().
7259a1b64d5SAlexander V. Chernikov  *
7269a1b64d5SAlexander V. Chernikov  * Returns 0 on success.
7279a1b64d5SAlexander V. Chernikov  * Returns ENOENT for lookup failure, ENOMEM for export failure.
7289a1b64d5SAlexander V. Chernikov  */
7299a1b64d5SAlexander V. Chernikov int
7309a1b64d5SAlexander V. Chernikov rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
7319a1b64d5SAlexander V. Chernikov     uint32_t flowid, struct rt_addrinfo *info)
7329a1b64d5SAlexander V. Chernikov {
73320efcfc6SAndrey V. Elsukov 	RIB_RLOCK_TRACKER;
73461eee0e2SAlexander V. Chernikov 	struct rib_head *rh;
7359a1b64d5SAlexander V. Chernikov 	struct radix_node *rn;
7369a1b64d5SAlexander V. Chernikov 	struct rtentry *rt;
7379a1b64d5SAlexander V. Chernikov 	int error;
7389a1b64d5SAlexander V. Chernikov 
7399a1b64d5SAlexander V. Chernikov 	KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum"));
7409a1b64d5SAlexander V. Chernikov 	rh = rt_tables_get_rnh(fibnum, dst->sa_family);
7419a1b64d5SAlexander V. Chernikov 	if (rh == NULL)
7429a1b64d5SAlexander V. Chernikov 		return (ENOENT);
7439a1b64d5SAlexander V. Chernikov 
74461eee0e2SAlexander V. Chernikov 	RIB_RLOCK(rh);
74561eee0e2SAlexander V. Chernikov 	rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head);
7469a1b64d5SAlexander V. Chernikov 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
7479a1b64d5SAlexander V. Chernikov 		rt = RNTORT(rn);
7489a1b64d5SAlexander V. Chernikov 		/* Ensure route & ifp is UP */
7498c61eb21SAlexander V. Chernikov 		if (RT_LINK_IS_UP(rt->rt_nhop->nh_ifp)) {
7509a1b64d5SAlexander V. Chernikov 			flags = (flags & NHR_REF) | NHR_COPY;
7519a1b64d5SAlexander V. Chernikov 			error = rt_exportinfo(rt, info, flags);
75261eee0e2SAlexander V. Chernikov 			RIB_RUNLOCK(rh);
7539a1b64d5SAlexander V. Chernikov 
7549a1b64d5SAlexander V. Chernikov 			return (error);
7559a1b64d5SAlexander V. Chernikov 		}
7569a1b64d5SAlexander V. Chernikov 	}
75761eee0e2SAlexander V. Chernikov 	RIB_RUNLOCK(rh);
7589a1b64d5SAlexander V. Chernikov 
7599a1b64d5SAlexander V. Chernikov 	return (ENOENT);
7609a1b64d5SAlexander V. Chernikov }
7619a1b64d5SAlexander V. Chernikov 
7629a1b64d5SAlexander V. Chernikov /*
7639a1b64d5SAlexander V. Chernikov  * Releases all references acquired by rib_lookup_info() when
7649a1b64d5SAlexander V. Chernikov  * called with NHR_REF flags.
7659a1b64d5SAlexander V. Chernikov  */
7669a1b64d5SAlexander V. Chernikov void
7679a1b64d5SAlexander V. Chernikov rib_free_info(struct rt_addrinfo *info)
7689a1b64d5SAlexander V. Chernikov {
7699a1b64d5SAlexander V. Chernikov 
7706b5d8e30SMark Johnston 	ifa_free(info->rti_ifa);
7719a1b64d5SAlexander V. Chernikov 	if_rele(info->rti_ifp);
7729a1b64d5SAlexander V. Chernikov }
7739a1b64d5SAlexander V. Chernikov 
7749a1b64d5SAlexander V. Chernikov /*
7752caee4beSAlexander V. Chernikov  * Iterates over all existing fibs in system calling
7762caee4beSAlexander V. Chernikov  *  @setwa_f function prior to traversing each fib.
7772caee4beSAlexander V. Chernikov  *  Calls @wa_f function for each element in current fib.
7782caee4beSAlexander V. Chernikov  * If af is not AF_UNSPEC, iterates over fibs in particular
7792caee4beSAlexander V. Chernikov  * address family.
7802caee4beSAlexander V. Chernikov  */
7814bdf0b6aSAlexander V. Chernikov void
7822caee4beSAlexander V. Chernikov rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
7832caee4beSAlexander V. Chernikov     void *arg)
7844bdf0b6aSAlexander V. Chernikov {
78561eee0e2SAlexander V. Chernikov 	struct rib_head *rnh;
7864bdf0b6aSAlexander V. Chernikov 	uint32_t fibnum;
7874bdf0b6aSAlexander V. Chernikov 	int i;
7884bdf0b6aSAlexander V. Chernikov 
7894bdf0b6aSAlexander V. Chernikov 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
7904bdf0b6aSAlexander V. Chernikov 		/* Do we want some specific family? */
7914bdf0b6aSAlexander V. Chernikov 		if (af != AF_UNSPEC) {
7924bdf0b6aSAlexander V. Chernikov 			rnh = rt_tables_get_rnh(fibnum, af);
7934bdf0b6aSAlexander V. Chernikov 			if (rnh == NULL)
7944bdf0b6aSAlexander V. Chernikov 				continue;
7954bdf0b6aSAlexander V. Chernikov 			if (setwa_f != NULL)
796e4790abfSAlexander V. Chernikov 				setwa_f(rnh, fibnum, af, arg);
7974bdf0b6aSAlexander V. Chernikov 
79861eee0e2SAlexander V. Chernikov 			RIB_WLOCK(rnh);
79961eee0e2SAlexander V. Chernikov 			rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
80061eee0e2SAlexander V. Chernikov 			RIB_WUNLOCK(rnh);
8014bdf0b6aSAlexander V. Chernikov 			continue;
8024bdf0b6aSAlexander V. Chernikov 		}
8034bdf0b6aSAlexander V. Chernikov 
8044bdf0b6aSAlexander V. Chernikov 		for (i = 1; i <= AF_MAX; i++) {
8054bdf0b6aSAlexander V. Chernikov 			rnh = rt_tables_get_rnh(fibnum, i);
8064bdf0b6aSAlexander V. Chernikov 			if (rnh == NULL)
8074bdf0b6aSAlexander V. Chernikov 				continue;
8084bdf0b6aSAlexander V. Chernikov 			if (setwa_f != NULL)
8094bdf0b6aSAlexander V. Chernikov 				setwa_f(rnh, fibnum, i, arg);
8104bdf0b6aSAlexander V. Chernikov 
81161eee0e2SAlexander V. Chernikov 			RIB_WLOCK(rnh);
81261eee0e2SAlexander V. Chernikov 			rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
81361eee0e2SAlexander V. Chernikov 			RIB_WUNLOCK(rnh);
8144bdf0b6aSAlexander V. Chernikov 		}
8154bdf0b6aSAlexander V. Chernikov 	}
8164bdf0b6aSAlexander V. Chernikov }
8174bdf0b6aSAlexander V. Chernikov 
81834a5582cSAlexander V. Chernikov /*
81934a5582cSAlexander V. Chernikov  * Iterates over all existing fibs in system and deletes each element
82034a5582cSAlexander V. Chernikov  *  for which @filter_f function returns non-zero value.
82134a5582cSAlexander V. Chernikov  * If @family is not AF_UNSPEC, iterates over fibs in particular
82234a5582cSAlexander V. Chernikov  * address family.
82334a5582cSAlexander V. Chernikov  */
82434a5582cSAlexander V. Chernikov void
82534a5582cSAlexander V. Chernikov rt_foreach_fib_walk_del(int family, rt_filter_f_t *filter_f, void *arg)
82634a5582cSAlexander V. Chernikov {
82734a5582cSAlexander V. Chernikov 	u_int fibnum;
82834a5582cSAlexander V. Chernikov 	int i, start, end;
82934a5582cSAlexander V. Chernikov 
83034a5582cSAlexander V. Chernikov 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
83134a5582cSAlexander V. Chernikov 		/* Do we want some specific family? */
83234a5582cSAlexander V. Chernikov 		if (family != AF_UNSPEC) {
83334a5582cSAlexander V. Chernikov 			start = family;
83434a5582cSAlexander V. Chernikov 			end = family;
83534a5582cSAlexander V. Chernikov 		} else {
83634a5582cSAlexander V. Chernikov 			start = 1;
83734a5582cSAlexander V. Chernikov 			end = AF_MAX;
83834a5582cSAlexander V. Chernikov 		}
83934a5582cSAlexander V. Chernikov 
84034a5582cSAlexander V. Chernikov 		for (i = start; i <= end; i++) {
84134a5582cSAlexander V. Chernikov 			if (rt_tables_get_rnh(fibnum, i) == NULL)
84234a5582cSAlexander V. Chernikov 				continue;
84334a5582cSAlexander V. Chernikov 
84434a5582cSAlexander V. Chernikov 			rib_walk_del(fibnum, i, filter_f, arg, 0);
845e8b0643eSAlexander V. Chernikov 		}
846e8b0643eSAlexander V. Chernikov 	}
847e8b0643eSAlexander V. Chernikov }
848e8b0643eSAlexander V. Chernikov 
8494bdf0b6aSAlexander V. Chernikov /*
8504bdf0b6aSAlexander V. Chernikov  * Delete Routes for a Network Interface
8514bdf0b6aSAlexander V. Chernikov  *
8524bdf0b6aSAlexander V. Chernikov  * Called for each routing entry via the rnh->rnh_walktree() call above
8534bdf0b6aSAlexander V. Chernikov  * to delete all route entries referencing a detaching network interface.
8544bdf0b6aSAlexander V. Chernikov  *
8554bdf0b6aSAlexander V. Chernikov  * Arguments:
8564bdf0b6aSAlexander V. Chernikov  *	rt	pointer to rtentry
857539642a2SAlexander V. Chernikov  *	nh	pointer to nhop
8584bdf0b6aSAlexander V. Chernikov  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
8594bdf0b6aSAlexander V. Chernikov  *
8604bdf0b6aSAlexander V. Chernikov  * Returns:
8614bdf0b6aSAlexander V. Chernikov  *	0	successful
8624bdf0b6aSAlexander V. Chernikov  *	errno	failed - reason indicated
8634bdf0b6aSAlexander V. Chernikov  */
8644bdf0b6aSAlexander V. Chernikov static int
865539642a2SAlexander V. Chernikov rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *nh, void *arg)
8664bdf0b6aSAlexander V. Chernikov {
8674bdf0b6aSAlexander V. Chernikov 	struct ifnet	*ifp = arg;
8684bdf0b6aSAlexander V. Chernikov 
869539642a2SAlexander V. Chernikov 	if (nh->nh_ifp != ifp)
8704bdf0b6aSAlexander V. Chernikov 		return (0);
8714bdf0b6aSAlexander V. Chernikov 
8724bdf0b6aSAlexander V. Chernikov 	/*
8734bdf0b6aSAlexander V. Chernikov 	 * Protect (sorta) against walktree recursion problems
8744bdf0b6aSAlexander V. Chernikov 	 * with cloned routes
8754bdf0b6aSAlexander V. Chernikov 	 */
8764bdf0b6aSAlexander V. Chernikov 	if ((rt->rt_flags & RTF_UP) == 0)
8774bdf0b6aSAlexander V. Chernikov 		return (0);
8784bdf0b6aSAlexander V. Chernikov 
879e8b0643eSAlexander V. Chernikov 	return (1);
8804bdf0b6aSAlexander V. Chernikov }
8814bdf0b6aSAlexander V. Chernikov 
8824bdf0b6aSAlexander V. Chernikov /*
8834bdf0b6aSAlexander V. Chernikov  * Delete all remaining routes using this interface
8844bdf0b6aSAlexander V. Chernikov  * Unfortuneatly the only way to do this is to slog through
8854bdf0b6aSAlexander V. Chernikov  * the entire routing table looking for routes which point
8864bdf0b6aSAlexander V. Chernikov  * to this interface...oh well...
8874bdf0b6aSAlexander V. Chernikov  */
8884bdf0b6aSAlexander V. Chernikov void
88980ae8d60SBjoern A. Zeeb rt_flushifroutes_af(struct ifnet *ifp, int af)
89080ae8d60SBjoern A. Zeeb {
89180ae8d60SBjoern A. Zeeb 	KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d",
89280ae8d60SBjoern A. Zeeb 	    __func__, af, AF_MAX));
89380ae8d60SBjoern A. Zeeb 
89480ae8d60SBjoern A. Zeeb 	rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp);
89580ae8d60SBjoern A. Zeeb }
89680ae8d60SBjoern A. Zeeb 
89780ae8d60SBjoern A. Zeeb void
8984bdf0b6aSAlexander V. Chernikov rt_flushifroutes(struct ifnet *ifp)
8994bdf0b6aSAlexander V. Chernikov {
9004bdf0b6aSAlexander V. Chernikov 
901e8b0643eSAlexander V. Chernikov 	rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp);
9024bdf0b6aSAlexander V. Chernikov }
9034bdf0b6aSAlexander V. Chernikov 
9048071913dSRuslan Ermilov /*
9058c0fec80SRobert Watson  * Look up rt_addrinfo for a specific fib.  Note that if rti_ifa is defined,
9068c0fec80SRobert Watson  * it will be referenced so the caller must free it.
9072ad7ed6eSAlexander V. Chernikov  *
9082ad7ed6eSAlexander V. Chernikov  * Assume basic consistency checks are executed by callers:
9092ad7ed6eSAlexander V. Chernikov  * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well.
9108c0fec80SRobert Watson  */
9118b07e49aSJulian Elischer int
9128b07e49aSJulian Elischer rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
9138b07e49aSJulian Elischer {
9144d2c2509SAlexander V. Chernikov 	const struct sockaddr *dst, *gateway, *ifpaddr, *ifaaddr;
915a68cc388SGleb Smirnoff 	struct epoch_tracker et;
9164d2c2509SAlexander V. Chernikov 	int needref, error, flags;
9174d2c2509SAlexander V. Chernikov 
9184d2c2509SAlexander V. Chernikov 	dst = info->rti_info[RTAX_DST];
9194d2c2509SAlexander V. Chernikov 	gateway = info->rti_info[RTAX_GATEWAY];
9204d2c2509SAlexander V. Chernikov 	ifpaddr = info->rti_info[RTAX_IFP];
9214d2c2509SAlexander V. Chernikov 	ifaaddr = info->rti_info[RTAX_IFA];
9224d2c2509SAlexander V. Chernikov 	flags = info->rti_flags;
9238071913dSRuslan Ermilov 
9248071913dSRuslan Ermilov 	/*
9258071913dSRuslan Ermilov 	 * ifp may be specified by sockaddr_dl
9268071913dSRuslan Ermilov 	 * when protocol address is ambiguous.
9278071913dSRuslan Ermilov 	 */
9281ebec5faSMatt Macy 	error = 0;
9291ebec5faSMatt Macy 	needref = (info->rti_ifa == NULL);
930a68cc388SGleb Smirnoff 	NET_EPOCH_ENTER(et);
9312ad7ed6eSAlexander V. Chernikov 
9322ad7ed6eSAlexander V. Chernikov 	/* If we have interface specified by the ifindex in the address, use it */
9338071913dSRuslan Ermilov 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
9342ad7ed6eSAlexander V. Chernikov 	    ifpaddr->sa_family == AF_LINK) {
9352ad7ed6eSAlexander V. Chernikov 	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)ifpaddr;
9362ad7ed6eSAlexander V. Chernikov 	    if (sdl->sdl_index != 0)
937270b83b9SHans Petter Selasky 		    info->rti_ifp = ifnet_byindex(sdl->sdl_index);
9388c0fec80SRobert Watson 	}
9392ad7ed6eSAlexander V. Chernikov 	/*
9402ad7ed6eSAlexander V. Chernikov 	 * If we have source address specified, try to find it
9412ad7ed6eSAlexander V. Chernikov 	 * TODO: avoid enumerating all ifas on all interfaces.
9422ad7ed6eSAlexander V. Chernikov 	 */
9438071913dSRuslan Ermilov 	if (info->rti_ifa == NULL && ifaaddr != NULL)
9448071913dSRuslan Ermilov 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
9458071913dSRuslan Ermilov 	if (info->rti_ifa == NULL) {
9464d2c2509SAlexander V. Chernikov 		const struct sockaddr *sa;
9478071913dSRuslan Ermilov 
9482ad7ed6eSAlexander V. Chernikov 		/*
9492ad7ed6eSAlexander V. Chernikov 		 * Most common use case for the userland-supplied routes.
9502ad7ed6eSAlexander V. Chernikov 		 *
9512ad7ed6eSAlexander V. Chernikov 		 * Choose sockaddr to select ifa.
9522ad7ed6eSAlexander V. Chernikov 		 * -- if ifp is set --
9532ad7ed6eSAlexander V. Chernikov 		 * Order of preference:
9542ad7ed6eSAlexander V. Chernikov 		 * 1) IFA address
9552ad7ed6eSAlexander V. Chernikov 		 * 2) gateway address
9562ad7ed6eSAlexander V. Chernikov 		 *   Note: for interface routes link-level gateway address
9572ad7ed6eSAlexander V. Chernikov 		 *     is specified to indicate the interface index without
9582ad7ed6eSAlexander V. Chernikov 		 *     specifying RTF_GATEWAY. In this case, ignore gateway
9592ad7ed6eSAlexander V. Chernikov 		 *   Note: gateway AF may be different from dst AF. In this case,
9602ad7ed6eSAlexander V. Chernikov 		 *   ignore gateway
9612ad7ed6eSAlexander V. Chernikov 		 * 3) final destination.
9622ad7ed6eSAlexander V. Chernikov 		 * 4) if all of these fails, try to get at least link-level ifa.
9632ad7ed6eSAlexander V. Chernikov 		 * -- else --
9642ad7ed6eSAlexander V. Chernikov 		 * try to lookup gateway or dst in the routing table to get ifa
9652ad7ed6eSAlexander V. Chernikov 		 */
9662ad7ed6eSAlexander V. Chernikov 		if (info->rti_info[RTAX_IFA] != NULL)
9672ad7ed6eSAlexander V. Chernikov 			sa = info->rti_info[RTAX_IFA];
9682ad7ed6eSAlexander V. Chernikov 		else if ((info->rti_flags & RTF_GATEWAY) != 0 &&
9692ad7ed6eSAlexander V. Chernikov 		    gateway->sa_family == dst->sa_family)
9702ad7ed6eSAlexander V. Chernikov 			sa = gateway;
9712ad7ed6eSAlexander V. Chernikov 		else
9722ad7ed6eSAlexander V. Chernikov 			sa = dst;
9732ad7ed6eSAlexander V. Chernikov 		if (info->rti_ifp != NULL) {
9748071913dSRuslan Ermilov 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
9752ad7ed6eSAlexander V. Chernikov 			/* Case 4 */
9762ad7ed6eSAlexander V. Chernikov 			if (info->rti_ifa == NULL && gateway != NULL)
9772ad7ed6eSAlexander V. Chernikov 				info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp);
9782ad7ed6eSAlexander V. Chernikov 		} else if (dst != NULL && gateway != NULL)
9794f8585e0SAlan Somers 			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
9808b07e49aSJulian Elischer 							fibnum);
9818071913dSRuslan Ermilov 		else if (sa != NULL)
9824f8585e0SAlan Somers 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa,
9838b07e49aSJulian Elischer 							fibnum);
9848071913dSRuslan Ermilov 	}
9851ebec5faSMatt Macy 	if (needref && info->rti_ifa != NULL) {
9868071913dSRuslan Ermilov 		if (info->rti_ifp == NULL)
987134804c8SMatt Macy 			info->rti_ifp = info->rti_ifa->ifa_ifp;
9884f6c66ccSMatt Macy 		ifa_ref(info->rti_ifa);
9898071913dSRuslan Ermilov 	} else
9908071913dSRuslan Ermilov 		error = ENETUNREACH;
991a68cc388SGleb Smirnoff 	NET_EPOCH_EXIT(et);
9928071913dSRuslan Ermilov 	return (error);
9938071913dSRuslan Ermilov }
9948071913dSRuslan Ermilov 
9957f948f12SAlexander V. Chernikov void
9967f948f12SAlexander V. Chernikov rt_updatemtu(struct ifnet *ifp)
9977f948f12SAlexander V. Chernikov {
99861eee0e2SAlexander V. Chernikov 	struct rib_head *rnh;
9999e022295SAlexander V. Chernikov 	int mtu;
10007f948f12SAlexander V. Chernikov 	int i, j;
10017f948f12SAlexander V. Chernikov 
10027f948f12SAlexander V. Chernikov 	/*
10037f948f12SAlexander V. Chernikov 	 * Try to update rt_mtu for all routes using this interface
10047f948f12SAlexander V. Chernikov 	 * Unfortunately the only way to do this is to traverse all
10057f948f12SAlexander V. Chernikov 	 * routing tables in all fibs/domains.
10067f948f12SAlexander V. Chernikov 	 */
10077f948f12SAlexander V. Chernikov 	for (i = 1; i <= AF_MAX; i++) {
10089e022295SAlexander V. Chernikov 		mtu = if_getmtu_family(ifp, i);
10097f948f12SAlexander V. Chernikov 		for (j = 0; j < rt_numfibs; j++) {
10107f948f12SAlexander V. Chernikov 			rnh = rt_tables_get_rnh(j, i);
10117f948f12SAlexander V. Chernikov 			if (rnh == NULL)
10127f948f12SAlexander V. Chernikov 				continue;
10139e022295SAlexander V. Chernikov 			nhops_update_ifmtu(rnh, ifp, mtu);
10147f948f12SAlexander V. Chernikov 		}
10157f948f12SAlexander V. Chernikov 	}
10167f948f12SAlexander V. Chernikov }
10177f948f12SAlexander V. Chernikov 
10187f948f12SAlexander V. Chernikov 
10195a2f4cbdSAlexander V. Chernikov #if 0
10205a2f4cbdSAlexander V. Chernikov int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
10215a2f4cbdSAlexander V. Chernikov int rt_print(char *buf, int buflen, struct rtentry *rt);
10225a2f4cbdSAlexander V. Chernikov 
10235a2f4cbdSAlexander V. Chernikov int
10245a2f4cbdSAlexander V. Chernikov p_sockaddr(char *buf, int buflen, struct sockaddr *s)
10255a2f4cbdSAlexander V. Chernikov {
10265a2f4cbdSAlexander V. Chernikov 	void *paddr = NULL;
10275a2f4cbdSAlexander V. Chernikov 
10285a2f4cbdSAlexander V. Chernikov 	switch (s->sa_family) {
10295a2f4cbdSAlexander V. Chernikov 	case AF_INET:
10305a2f4cbdSAlexander V. Chernikov 		paddr = &((struct sockaddr_in *)s)->sin_addr;
10315a2f4cbdSAlexander V. Chernikov 		break;
10325a2f4cbdSAlexander V. Chernikov 	case AF_INET6:
10335a2f4cbdSAlexander V. Chernikov 		paddr = &((struct sockaddr_in6 *)s)->sin6_addr;
10345a2f4cbdSAlexander V. Chernikov 		break;
10355a2f4cbdSAlexander V. Chernikov 	}
10365a2f4cbdSAlexander V. Chernikov 
10375a2f4cbdSAlexander V. Chernikov 	if (paddr == NULL)
10385a2f4cbdSAlexander V. Chernikov 		return (0);
10395a2f4cbdSAlexander V. Chernikov 
10405a2f4cbdSAlexander V. Chernikov 	if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL)
10415a2f4cbdSAlexander V. Chernikov 		return (0);
10425a2f4cbdSAlexander V. Chernikov 
10435a2f4cbdSAlexander V. Chernikov 	return (strlen(buf));
10445a2f4cbdSAlexander V. Chernikov }
10455a2f4cbdSAlexander V. Chernikov 
10465a2f4cbdSAlexander V. Chernikov int
10475a2f4cbdSAlexander V. Chernikov rt_print(char *buf, int buflen, struct rtentry *rt)
10485a2f4cbdSAlexander V. Chernikov {
10495a2f4cbdSAlexander V. Chernikov 	struct sockaddr *addr, *mask;
10505a2f4cbdSAlexander V. Chernikov 	int i = 0;
10515a2f4cbdSAlexander V. Chernikov 
10525a2f4cbdSAlexander V. Chernikov 	addr = rt_key(rt);
10535a2f4cbdSAlexander V. Chernikov 	mask = rt_mask(rt);
10545a2f4cbdSAlexander V. Chernikov 
10555a2f4cbdSAlexander V. Chernikov 	i = p_sockaddr(buf, buflen, addr);
10565a2f4cbdSAlexander V. Chernikov 	if (!(rt->rt_flags & RTF_HOST)) {
10575a2f4cbdSAlexander V. Chernikov 		buf[i++] = '/';
10585a2f4cbdSAlexander V. Chernikov 		i += p_sockaddr(buf + i, buflen - i, mask);
10595a2f4cbdSAlexander V. Chernikov 	}
10605a2f4cbdSAlexander V. Chernikov 
10615a2f4cbdSAlexander V. Chernikov 	if (rt->rt_flags & RTF_GATEWAY) {
10625a2f4cbdSAlexander V. Chernikov 		buf[i++] = '>';
1063aaad3c4fSAlexander V. Chernikov 		i += p_sockaddr(buf + i, buflen - i, &rt->rt_nhop->gw_sa);
10645a2f4cbdSAlexander V. Chernikov 	}
10655a2f4cbdSAlexander V. Chernikov 
10665a2f4cbdSAlexander V. Chernikov 	return (i);
10675a2f4cbdSAlexander V. Chernikov }
10685a2f4cbdSAlexander V. Chernikov #endif
10695a2f4cbdSAlexander V. Chernikov 
1070427ac07fSKip Macy #ifdef RADIX_MPATH
1071e8b0643eSAlexander V. Chernikov /*
1072e8b0643eSAlexander V. Chernikov  * Deletes key for single-path routes, unlinks rtentry with
1073e8b0643eSAlexander V. Chernikov  * gateway specified in @info from multi-path routes.
1074e8b0643eSAlexander V. Chernikov  *
1075e8b0643eSAlexander V. Chernikov  * Returnes unlinked entry. In case of failure, returns NULL
1076e8b0643eSAlexander V. Chernikov  * and sets @perror to ESRCH.
1077e8b0643eSAlexander V. Chernikov  */
10784d2c2509SAlexander V. Chernikov struct radix_node *
107961eee0e2SAlexander V. Chernikov rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info,
1080e8b0643eSAlexander V. Chernikov     struct rtentry *rto, int *perror)
1081427ac07fSKip Macy {
1082427ac07fSKip Macy 	/*
1083427ac07fSKip Macy 	 * if we got multipath routes, we require users to specify
1084427ac07fSKip Macy 	 * a matching RTAX_GATEWAY.
1085427ac07fSKip Macy 	 */
1086e8b0643eSAlexander V. Chernikov 	struct rtentry *rt; // *rto = NULL;
1087f59c6cb0SAlexander V. Chernikov 	struct radix_node *rn;
1088e8b0643eSAlexander V. Chernikov 	struct sockaddr *gw;
1089427ac07fSKip Macy 
1090e8b0643eSAlexander V. Chernikov 	gw = info->rti_info[RTAX_GATEWAY];
1091e8b0643eSAlexander V. Chernikov 	rt = rt_mpath_matchgate(rto, gw);
1092e8b0643eSAlexander V. Chernikov 	if (rt == NULL) {
1093e8b0643eSAlexander V. Chernikov 		*perror = ESRCH;
1094e8b0643eSAlexander V. Chernikov 		return (NULL);
1095e8b0643eSAlexander V. Chernikov 	}
10965a2f4cbdSAlexander V. Chernikov 
1097427ac07fSKip Macy 	/*
1098427ac07fSKip Macy 	 * this is the first entry in the chain
1099427ac07fSKip Macy 	 */
1100427ac07fSKip Macy 	if (rto == rt) {
1101427ac07fSKip Macy 		rn = rn_mpath_next((struct radix_node *)rt);
1102427ac07fSKip Macy 		/*
1103427ac07fSKip Macy 		 * there is another entry, now it's active
1104427ac07fSKip Macy 		 */
1105427ac07fSKip Macy 		if (rn) {
1106427ac07fSKip Macy 			rto = RNTORT(rn);
1107427ac07fSKip Macy 			RT_LOCK(rto);
1108427ac07fSKip Macy 			rto->rt_flags |= RTF_UP;
1109427ac07fSKip Macy 			RT_UNLOCK(rto);
1110427ac07fSKip Macy 		} else if (rt->rt_flags & RTF_GATEWAY) {
1111427ac07fSKip Macy 			/*
1112427ac07fSKip Macy 			 * For gateway routes, we need to
1113427ac07fSKip Macy 			 * make sure that we we are deleting
1114427ac07fSKip Macy 			 * the correct gateway.
1115427ac07fSKip Macy 			 * rt_mpath_matchgate() does not
1116427ac07fSKip Macy 			 * check the case when there is only
1117427ac07fSKip Macy 			 * one route in the chain.
1118427ac07fSKip Macy 			 */
1119e8b0643eSAlexander V. Chernikov 			if (gw &&
1120aaad3c4fSAlexander V. Chernikov 			    (rt->rt_nhop->gw_sa.sa_len != gw->sa_len ||
1121aaad3c4fSAlexander V. Chernikov 				memcmp(&rt->rt_nhop->gw_sa, gw, gw->sa_len))) {
1122e8b0643eSAlexander V. Chernikov 				*perror = ESRCH;
1123e8b0643eSAlexander V. Chernikov 				return (NULL);
1124e8b0643eSAlexander V. Chernikov 			}
11256a7bff2cSKip Macy 		}
11266a7bff2cSKip Macy 
1127427ac07fSKip Macy 		/*
1128427ac07fSKip Macy 		 * use the normal delete code to remove
1129427ac07fSKip Macy 		 * the first entry
1130427ac07fSKip Macy 		 */
11314d2c2509SAlexander V. Chernikov 		rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST],
11324d2c2509SAlexander V. Chernikov 					info->rti_info[RTAX_NETMASK],
11334d2c2509SAlexander V. Chernikov 					&rnh->head);
1134e8b0643eSAlexander V. Chernikov 		*perror = 0;
1135e8b0643eSAlexander V. Chernikov 		return (rn);
1136427ac07fSKip Macy 	}
1137427ac07fSKip Macy 
1138427ac07fSKip Macy 	/*
1139427ac07fSKip Macy 	 * if the entry is 2nd and on up
1140427ac07fSKip Macy 	 */
1141e8b0643eSAlexander V. Chernikov 	if (rt_mpath_deldup(rto, rt) == 0)
1142427ac07fSKip Macy 		panic ("rtrequest1: rt_mpath_deldup");
1143e8b0643eSAlexander V. Chernikov 	*perror = 0;
1144e8b0643eSAlexander V. Chernikov 	rn = (struct radix_node *)rt;
1145e8b0643eSAlexander V. Chernikov 	return (rn);
1146427ac07fSKip Macy }
1147427ac07fSKip Macy #endif
1148427ac07fSKip Macy 
11498071913dSRuslan Ermilov int
11508b07e49aSJulian Elischer rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
11518b07e49aSJulian Elischer 				u_int fibnum)
11528b07e49aSJulian Elischer {
1153aef2d5fbSAlexander V. Chernikov 	const struct sockaddr *dst;
115461eee0e2SAlexander V. Chernikov 	struct rib_head *rnh;
1155*da187ddbSAlexander V. Chernikov 	struct rib_cmd_info rc;
1156aef2d5fbSAlexander V. Chernikov 	int error;
1157df8bae1dSRodney W. Grimes 
11588b07e49aSJulian Elischer 	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
1159aef2d5fbSAlexander V. Chernikov 	KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
11602bbab0afSAlexander V. Chernikov 	NET_EPOCH_ASSERT();
1161aef2d5fbSAlexander V. Chernikov 
1162aef2d5fbSAlexander V. Chernikov 	dst = info->rti_info[RTAX_DST];
1163aef2d5fbSAlexander V. Chernikov 
1164b680a383SBjoern A. Zeeb 	switch (dst->sa_family) {
1165b680a383SBjoern A. Zeeb 	case AF_INET6:
1166b680a383SBjoern A. Zeeb 	case AF_INET:
1167b680a383SBjoern A. Zeeb 		/* We support multiple FIBs. */
1168b680a383SBjoern A. Zeeb 		break;
1169b680a383SBjoern A. Zeeb 	default:
1170b680a383SBjoern A. Zeeb 		fibnum = RT_DEFAULT_FIB;
1171b680a383SBjoern A. Zeeb 		break;
1172b680a383SBjoern A. Zeeb 	}
1173b680a383SBjoern A. Zeeb 
1174b0a76b88SJulian Elischer 	/*
1175b0a76b88SJulian Elischer 	 * Find the correct routing tree to use for this Address Family
1176b0a76b88SJulian Elischer 	 */
1177c2c2a7c1SBjoern A. Zeeb 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
117885911824SLuigi Rizzo 	if (rnh == NULL)
1179983985c1SJeffrey Hsu 		return (EAFNOSUPPORT);
1180048738b5SAlexander V. Chernikov 
1181b0a76b88SJulian Elischer 	/*
1182b0a76b88SJulian Elischer 	 * If we are adding a host route then we don't want to put
118366953138SRuslan Ermilov 	 * a netmask in the tree, nor do we want to clone it.
1184b0a76b88SJulian Elischer 	 */
1185aef2d5fbSAlexander V. Chernikov 	if (info->rti_flags & RTF_HOST)
1186aef2d5fbSAlexander V. Chernikov 		info->rti_info[RTAX_NETMASK] = NULL;
11876e6b3f7cSQing Li 
1188*da187ddbSAlexander V. Chernikov 	bzero(&rc, sizeof(struct rib_cmd_info));
1189aef2d5fbSAlexander V. Chernikov 	error = 0;
1190df8bae1dSRodney W. Grimes 	switch (req) {
1191df8bae1dSRodney W. Grimes 	case RTM_DELETE:
1192*da187ddbSAlexander V. Chernikov 		error = del_route(rnh, info, &rc);
1193df8bae1dSRodney W. Grimes 		break;
1194df8bae1dSRodney W. Grimes 	case RTM_RESOLVE:
11956e6b3f7cSQing Li 		/*
11966e6b3f7cSQing Li 		 * resolve was only used for route cloning
11976e6b3f7cSQing Li 		 * here for compat
11986e6b3f7cSQing Li 		 */
11996e6b3f7cSQing Li 		break;
1200df8bae1dSRodney W. Grimes 	case RTM_ADD:
1201*da187ddbSAlexander V. Chernikov 		error = add_route(rnh, info, &rc);
1202aef2d5fbSAlexander V. Chernikov 		break;
1203aef2d5fbSAlexander V. Chernikov 	case RTM_CHANGE:
1204*da187ddbSAlexander V. Chernikov 		error = change_route(rnh, info, &rc);
1205aef2d5fbSAlexander V. Chernikov 		break;
1206aef2d5fbSAlexander V. Chernikov 	default:
1207aef2d5fbSAlexander V. Chernikov 		error = EOPNOTSUPP;
1208aef2d5fbSAlexander V. Chernikov 	}
1209aef2d5fbSAlexander V. Chernikov 
1210*da187ddbSAlexander V. Chernikov 	if (ret_nrt != NULL)
1211*da187ddbSAlexander V. Chernikov 		*ret_nrt = rc.rc_rt;
1212*da187ddbSAlexander V. Chernikov 
1213aef2d5fbSAlexander V. Chernikov 	return (error);
1214aef2d5fbSAlexander V. Chernikov }
1215aef2d5fbSAlexander V. Chernikov 
12164d2c2509SAlexander V. Chernikov void
12170fb9298dSAlexander V. Chernikov rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt)
12180fb9298dSAlexander V. Chernikov {
12190fb9298dSAlexander V. Chernikov 
12200fb9298dSAlexander V. Chernikov 	if (info->rti_mflags & RTV_WEIGHT)
12210fb9298dSAlexander V. Chernikov 		rt->rt_weight = info->rti_rmx->rmx_weight;
12220fb9298dSAlexander V. Chernikov 	/* Kernel -> userland timebase conversion. */
12230fb9298dSAlexander V. Chernikov 	if (info->rti_mflags & RTV_EXPIRE)
12240fb9298dSAlexander V. Chernikov 		rt->rt_expire = info->rti_rmx->rmx_expire ?
12250fb9298dSAlexander V. Chernikov 		    info->rti_rmx->rmx_expire - time_second + time_uptime : 0;
12260fb9298dSAlexander V. Chernikov }
12270fb9298dSAlexander V. Chernikov 
1228c7ab6602SQing Li void
1229d1dd20beSSam Leffler rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
1230df8bae1dSRodney W. Grimes {
1231f59c6cb0SAlexander V. Chernikov 	u_char *cp1 = (u_char *)src;
1232f59c6cb0SAlexander V. Chernikov 	u_char *cp2 = (u_char *)dst;
1233f59c6cb0SAlexander V. Chernikov 	u_char *cp3 = (u_char *)netmask;
1234df8bae1dSRodney W. Grimes 	u_char *cplim = cp2 + *cp3;
1235df8bae1dSRodney W. Grimes 	u_char *cplim2 = cp2 + *cp1;
1236df8bae1dSRodney W. Grimes 
1237df8bae1dSRodney W. Grimes 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1238df8bae1dSRodney W. Grimes 	cp3 += 2;
1239df8bae1dSRodney W. Grimes 	if (cplim > cplim2)
1240df8bae1dSRodney W. Grimes 		cplim = cplim2;
1241df8bae1dSRodney W. Grimes 	while (cp2 < cplim)
1242df8bae1dSRodney W. Grimes 		*cp2++ = *cp1++ & *cp3++;
1243df8bae1dSRodney W. Grimes 	if (cp2 < cplim2)
1244df8bae1dSRodney W. Grimes 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1245df8bae1dSRodney W. Grimes }
1246df8bae1dSRodney W. Grimes 
1247df8bae1dSRodney W. Grimes /*
1248df8bae1dSRodney W. Grimes  * Set up a routing table entry, normally
1249df8bae1dSRodney W. Grimes  * for an interface.
1250df8bae1dSRodney W. Grimes  */
12518b07e49aSJulian Elischer #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
12528b07e49aSJulian Elischer static inline  int
12538b07e49aSJulian Elischer rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
1254df8bae1dSRodney W. Grimes {
125520efcfc6SAndrey V. Elsukov 	RIB_RLOCK_TRACKER;
12562bbab0afSAlexander V. Chernikov 	struct epoch_tracker et;
12575aca0b30SLuigi Rizzo 	struct sockaddr *dst;
12588071913dSRuslan Ermilov 	struct sockaddr *netmask;
125985911824SLuigi Rizzo 	struct rtentry *rt = NULL;
12608071913dSRuslan Ermilov 	struct rt_addrinfo info;
1261e440aed9SQing Li 	int error = 0;
12628b07e49aSJulian Elischer 	int startfib, endfib;
12638b07e49aSJulian Elischer 	char tempbuf[_SOCKADDR_TMPSIZE];
12648b07e49aSJulian Elischer 	int didwork = 0;
12658b07e49aSJulian Elischer 	int a_failure = 0;
1266aaad3c4fSAlexander V. Chernikov 	struct sockaddr_dl_short *sdl = NULL;
126761eee0e2SAlexander V. Chernikov 	struct rib_head *rnh;
1268df8bae1dSRodney W. Grimes 
12698071913dSRuslan Ermilov 	if (flags & RTF_HOST) {
12708071913dSRuslan Ermilov 		dst = ifa->ifa_dstaddr;
12718071913dSRuslan Ermilov 		netmask = NULL;
12728071913dSRuslan Ermilov 	} else {
12738071913dSRuslan Ermilov 		dst = ifa->ifa_addr;
12748071913dSRuslan Ermilov 		netmask = ifa->ifa_netmask;
12758071913dSRuslan Ermilov 	}
1276b3dd0771SBjoern A. Zeeb 	if (dst->sa_len == 0)
1277b3dd0771SBjoern A. Zeeb 		return(EINVAL);
1278b680a383SBjoern A. Zeeb 	switch (dst->sa_family) {
1279b680a383SBjoern A. Zeeb 	case AF_INET6:
1280b680a383SBjoern A. Zeeb 	case AF_INET:
1281b680a383SBjoern A. Zeeb 		/* We support multiple FIBs. */
1282b680a383SBjoern A. Zeeb 		break;
1283b680a383SBjoern A. Zeeb 	default:
1284b680a383SBjoern A. Zeeb 		fibnum = RT_DEFAULT_FIB;
1285b680a383SBjoern A. Zeeb 		break;
1286b680a383SBjoern A. Zeeb 	}
12877d9b6df1SAlexander V. Chernikov 	if (fibnum == RT_ALL_FIBS) {
1288ee0bd4b9SHiroki Sato 		if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD)
12890489b891SAlan Somers 			startfib = endfib = ifa->ifa_ifp->if_fib;
1290ee0bd4b9SHiroki Sato 		else {
12918b07e49aSJulian Elischer 			startfib = 0;
12928b07e49aSJulian Elischer 			endfib = rt_numfibs - 1;
129366e8505fSJulian Elischer 		}
12948b07e49aSJulian Elischer 	} else {
12958b07e49aSJulian Elischer 		KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
12968b07e49aSJulian Elischer 		startfib = fibnum;
12978b07e49aSJulian Elischer 		endfib = fibnum;
12988b07e49aSJulian Elischer 	}
1299ac4a76ebSBjoern A. Zeeb 
1300b0a76b88SJulian Elischer 	/*
13018b07e49aSJulian Elischer 	 * If it's a delete, check that if it exists,
13028b07e49aSJulian Elischer 	 * it's on the correct interface or we might scrub
13038b07e49aSJulian Elischer 	 * a route to another ifa which would
1304b0a76b88SJulian Elischer 	 * be confusing at best and possibly worse.
1305b0a76b88SJulian Elischer 	 */
1306df8bae1dSRodney W. Grimes 	if (cmd == RTM_DELETE) {
1307b0a76b88SJulian Elischer 		/*
1308b0a76b88SJulian Elischer 		 * It's a delete, so it should already exist..
1309b0a76b88SJulian Elischer 		 * If it's a net, mask off the host bits
1310b0a76b88SJulian Elischer 		 * (Assuming we have a mask)
13118b07e49aSJulian Elischer 		 * XXX this is kinda inet specific..
1312b0a76b88SJulian Elischer 		 */
13138071913dSRuslan Ermilov 		if (netmask != NULL) {
13148b07e49aSJulian Elischer 			rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
13158b07e49aSJulian Elischer 			dst = (struct sockaddr *)tempbuf;
1316df8bae1dSRodney W. Grimes 		}
1317563ab4e4SAlexander V. Chernikov 	} else if (cmd == RTM_ADD) {
1318aaad3c4fSAlexander V. Chernikov 		sdl = (struct sockaddr_dl_short *)tempbuf;
1319aaad3c4fSAlexander V. Chernikov 		bzero(sdl, sizeof(struct sockaddr_dl_short));
1320563ab4e4SAlexander V. Chernikov 		sdl->sdl_family = AF_LINK;
1321aaad3c4fSAlexander V. Chernikov 		sdl->sdl_len = sizeof(struct sockaddr_dl_short);
1322563ab4e4SAlexander V. Chernikov 		sdl->sdl_type = ifa->ifa_ifp->if_type;
1323563ab4e4SAlexander V. Chernikov 		sdl->sdl_index = ifa->ifa_ifp->if_index;
13248b07e49aSJulian Elischer         }
13258b07e49aSJulian Elischer 	/*
13268b07e49aSJulian Elischer 	 * Now go through all the requested tables (fibs) and do the
13278b07e49aSJulian Elischer 	 * requested action. Realistically, this will either be fib 0
13288b07e49aSJulian Elischer 	 * for protocols that don't do multiple tables or all the
1329a8498625SBjoern A. Zeeb 	 * tables for those that do.
13308b07e49aSJulian Elischer 	 */
13318b07e49aSJulian Elischer 	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
13328b07e49aSJulian Elischer 		if (cmd == RTM_DELETE) {
13338b07e49aSJulian Elischer 			struct radix_node *rn;
1334b0a76b88SJulian Elischer 			/*
13358071913dSRuslan Ermilov 			 * Look up an rtentry that is in the routing tree and
13368071913dSRuslan Ermilov 			 * contains the correct info.
1337b0a76b88SJulian Elischer 			 */
1338c2c2a7c1SBjoern A. Zeeb 			rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
1339c2c2a7c1SBjoern A. Zeeb 			if (rnh == NULL)
13408b07e49aSJulian Elischer 				/* this table doesn't exist but others might */
13418b07e49aSJulian Elischer 				continue;
134261eee0e2SAlexander V. Chernikov 			RIB_RLOCK(rnh);
134361eee0e2SAlexander V. Chernikov 			rn = rnh->rnh_lookup(dst, netmask, &rnh->head);
1344e440aed9SQing Li #ifdef RADIX_MPATH
134561eee0e2SAlexander V. Chernikov 			if (rt_mpath_capable(rnh)) {
1346e440aed9SQing Li 
1347e440aed9SQing Li 				if (rn == NULL)
1348e440aed9SQing Li 					error = ESRCH;
1349e440aed9SQing Li 				else {
1350e440aed9SQing Li 					rt = RNTORT(rn);
1351e440aed9SQing Li 					/*
13529e022295SAlexander V. Chernikov 					 * for interface route the gateway
13539e022295SAlexander V. Chernikov 					 * gateway is sockaddr_dl, so
13548b07e49aSJulian Elischer 					 * rt_mpath_matchgate must use the
13558b07e49aSJulian Elischer 					 * interface address
1356e440aed9SQing Li 					 */
13578b07e49aSJulian Elischer 					rt = rt_mpath_matchgate(rt,
13588b07e49aSJulian Elischer 					    ifa->ifa_addr);
1359034c09ffSAlexander V. Chernikov 					if (rt == NULL)
1360e440aed9SQing Li 						error = ESRCH;
1361e440aed9SQing Li 				}
1362e440aed9SQing Li 			}
1363e440aed9SQing Li #endif
13648b07e49aSJulian Elischer 			error = (rn == NULL ||
13658071913dSRuslan Ermilov 			    (rn->rn_flags & RNF_ROOT) ||
13668c61eb21SAlexander V. Chernikov 			    RNTORT(rn)->rt_nhop->nh_ifa != ifa);
136761eee0e2SAlexander V. Chernikov 			RIB_RUNLOCK(rnh);
1368956b0b65SJeffrey Hsu 			if (error) {
13698b07e49aSJulian Elischer 				/* this is only an error if bad on ALL tables */
13708b07e49aSJulian Elischer 				continue;
1371df8bae1dSRodney W. Grimes 			}
1372b0a76b88SJulian Elischer 		}
1373b0a76b88SJulian Elischer 		/*
1374b0a76b88SJulian Elischer 		 * Do the actual request
1375b0a76b88SJulian Elischer 		 */
13768071913dSRuslan Ermilov 		bzero((caddr_t)&info, sizeof(info));
13778071913dSRuslan Ermilov 		info.rti_ifa = ifa;
13783034f43fSAlexander V. Chernikov 		info.rti_flags = flags |
13793034f43fSAlexander V. Chernikov 		    (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
13808071913dSRuslan Ermilov 		info.rti_info[RTAX_DST] = dst;
13816e6b3f7cSQing Li 		/*
13826e6b3f7cSQing Li 		 * doing this for compatibility reasons
13836e6b3f7cSQing Li 		 */
13846e6b3f7cSQing Li 		if (cmd == RTM_ADD)
1385563ab4e4SAlexander V. Chernikov 			info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sdl;
13866e6b3f7cSQing Li 		else
13878071913dSRuslan Ermilov 			info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
13888071913dSRuslan Ermilov 		info.rti_info[RTAX_NETMASK] = netmask;
13892bbab0afSAlexander V. Chernikov 		NET_EPOCH_ENTER(et);
13908b07e49aSJulian Elischer 		error = rtrequest1_fib(cmd, &info, &rt, fibnum);
13915aca0b30SLuigi Rizzo 		if (error == 0 && rt != NULL) {
13928071913dSRuslan Ermilov 			/*
13936f99b44cSBrian Somers 			 * notify any listening routing agents of the change
13948071913dSRuslan Ermilov 			 */
13959e022295SAlexander V. Chernikov 
13969e022295SAlexander V. Chernikov 			/* TODO: interface routes/aliases */
1397e02d3fe7SAlexander V. Chernikov 			rt_newaddrmsg_fib(cmd, ifa, rt, fibnum);
13988b07e49aSJulian Elischer 			didwork = 1;
1399df8bae1dSRodney W. Grimes 		}
14002bbab0afSAlexander V. Chernikov 		NET_EPOCH_EXIT(et);
14018b07e49aSJulian Elischer 		if (error)
14028b07e49aSJulian Elischer 			a_failure = error;
14038b07e49aSJulian Elischer 	}
14048b07e49aSJulian Elischer 	if (cmd == RTM_DELETE) {
14058b07e49aSJulian Elischer 		if (didwork) {
14068b07e49aSJulian Elischer 			error = 0;
14078b07e49aSJulian Elischer 		} else {
14088b07e49aSJulian Elischer 			/* we only give an error if it wasn't in any table */
14098b07e49aSJulian Elischer 			error = ((flags & RTF_HOST) ?
14108b07e49aSJulian Elischer 			    EHOSTUNREACH : ENETUNREACH);
14118b07e49aSJulian Elischer 		}
14128b07e49aSJulian Elischer 	} else {
14138b07e49aSJulian Elischer 		if (a_failure) {
14148b07e49aSJulian Elischer 			/* return an error if any of them failed */
14158b07e49aSJulian Elischer 			error = a_failure;
14168b07e49aSJulian Elischer 		}
14178b07e49aSJulian Elischer 	}
14183ec66d6cSDavid Greenman 	return (error);
14193ec66d6cSDavid Greenman }
1420cb64988fSLuoqi Chen 
14218b07e49aSJulian Elischer /*
14228b07e49aSJulian Elischer  * Set up a routing table entry, normally
14238b07e49aSJulian Elischer  * for an interface.
14248b07e49aSJulian Elischer  */
14258b07e49aSJulian Elischer int
14268b07e49aSJulian Elischer rtinit(struct ifaddr *ifa, int cmd, int flags)
14278b07e49aSJulian Elischer {
14288b07e49aSJulian Elischer 	struct sockaddr *dst;
1429a8498625SBjoern A. Zeeb 	int fib = RT_DEFAULT_FIB;
14308b07e49aSJulian Elischer 
14318b07e49aSJulian Elischer 	if (flags & RTF_HOST) {
14328b07e49aSJulian Elischer 		dst = ifa->ifa_dstaddr;
14338b07e49aSJulian Elischer 	} else {
14348b07e49aSJulian Elischer 		dst = ifa->ifa_addr;
14358b07e49aSJulian Elischer 	}
14368b07e49aSJulian Elischer 
1437b680a383SBjoern A. Zeeb 	switch (dst->sa_family) {
1438b680a383SBjoern A. Zeeb 	case AF_INET6:
1439b680a383SBjoern A. Zeeb 	case AF_INET:
1440b680a383SBjoern A. Zeeb 		/* We do support multiple FIBs. */
14417d9b6df1SAlexander V. Chernikov 		fib = RT_ALL_FIBS;
1442b680a383SBjoern A. Zeeb 		break;
1443b680a383SBjoern A. Zeeb 	}
14448b07e49aSJulian Elischer 	return (rtinit1(ifa, cmd, flags, fib));
14458b07e49aSJulian Elischer }
14464cbac30bSAlexander V. Chernikov 
14474cbac30bSAlexander V. Chernikov /*
14484cbac30bSAlexander V. Chernikov  * Announce interface address arrival/withdraw
14494cbac30bSAlexander V. Chernikov  * Returns 0 on success.
14504cbac30bSAlexander V. Chernikov  */
14514cbac30bSAlexander V. Chernikov int
14524cbac30bSAlexander V. Chernikov rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
14534cbac30bSAlexander V. Chernikov {
14544cbac30bSAlexander V. Chernikov 
14554cbac30bSAlexander V. Chernikov 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1456d375edc9SAlexander V. Chernikov 	    ("unexpected cmd %d", cmd));
1457d375edc9SAlexander V. Chernikov 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
1458d375edc9SAlexander V. Chernikov 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
14594cbac30bSAlexander V. Chernikov 
1460d6e23cf0SMichael Tuexen 	EVENTHANDLER_DIRECT_INVOKE(rt_addrmsg, ifa, cmd);
14614cbac30bSAlexander V. Chernikov 	return (rtsock_addrmsg(cmd, ifa, fibnum));
14624cbac30bSAlexander V. Chernikov }
14634cbac30bSAlexander V. Chernikov 
14644cbac30bSAlexander V. Chernikov /*
1465e02d3fe7SAlexander V. Chernikov  * Announce kernel-originated route addition/removal to rtsock based on @rt data.
1466e02d3fe7SAlexander V. Chernikov  * cmd: RTM_ cmd
1467e02d3fe7SAlexander V. Chernikov  * @rt: valid rtentry
1468e02d3fe7SAlexander V. Chernikov  * @ifp: target route interface
1469e02d3fe7SAlexander V. Chernikov  * @fibnum: fib id or RT_ALL_FIBS
1470e02d3fe7SAlexander V. Chernikov  *
14714cbac30bSAlexander V. Chernikov  * Returns 0 on success.
14724cbac30bSAlexander V. Chernikov  */
14734cbac30bSAlexander V. Chernikov int
1474e02d3fe7SAlexander V. Chernikov rt_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs,
14754cbac30bSAlexander V. Chernikov     int fibnum)
14764cbac30bSAlexander V. Chernikov {
14774cbac30bSAlexander V. Chernikov 
14784cbac30bSAlexander V. Chernikov 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1479d375edc9SAlexander V. Chernikov 	    ("unexpected cmd %d", cmd));
14804cbac30bSAlexander V. Chernikov 
1481d375edc9SAlexander V. Chernikov 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
1482d375edc9SAlexander V. Chernikov 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
14834cbac30bSAlexander V. Chernikov 
14844cbac30bSAlexander V. Chernikov 	KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__));
14854cbac30bSAlexander V. Chernikov 
1486e02d3fe7SAlexander V. Chernikov 	return (rtsock_routemsg(cmd, rt, ifp, 0, fibnum));
14874cbac30bSAlexander V. Chernikov }
14884cbac30bSAlexander V. Chernikov 
1489e02d3fe7SAlexander V. Chernikov /*
1490e02d3fe7SAlexander V. Chernikov  * Announce kernel-originated route addition/removal to rtsock based on @rt data.
1491e02d3fe7SAlexander V. Chernikov  * cmd: RTM_ cmd
1492e02d3fe7SAlexander V. Chernikov  * @info: addrinfo structure with valid data.
1493e02d3fe7SAlexander V. Chernikov  * @fibnum: fib id or RT_ALL_FIBS
1494e02d3fe7SAlexander V. Chernikov  *
1495e02d3fe7SAlexander V. Chernikov  * Returns 0 on success.
1496e02d3fe7SAlexander V. Chernikov  */
1497e02d3fe7SAlexander V. Chernikov int
1498e02d3fe7SAlexander V. Chernikov rt_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
14994cbac30bSAlexander V. Chernikov {
15004cbac30bSAlexander V. Chernikov 
1501e02d3fe7SAlexander V. Chernikov 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE || cmd == RTM_CHANGE,
1502e02d3fe7SAlexander V. Chernikov 	    ("unexpected cmd %d", cmd));
1503e02d3fe7SAlexander V. Chernikov 
1504e02d3fe7SAlexander V. Chernikov 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
1505e02d3fe7SAlexander V. Chernikov 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
1506e02d3fe7SAlexander V. Chernikov 
1507e02d3fe7SAlexander V. Chernikov 	KASSERT(info->rti_info[RTAX_DST] != NULL, (":%s: RTAX_DST must be supplied", __func__));
1508e02d3fe7SAlexander V. Chernikov 
1509e02d3fe7SAlexander V. Chernikov 	return (rtsock_routemsg_info(cmd, info, fibnum));
15104cbac30bSAlexander V. Chernikov }
15114cbac30bSAlexander V. Chernikov 
1512e02d3fe7SAlexander V. Chernikov 
15134cbac30bSAlexander V. Chernikov /*
15144cbac30bSAlexander V. Chernikov  * This is called to generate messages from the routing socket
15154cbac30bSAlexander V. Chernikov  * indicating a network interface has had addresses associated with it.
15164cbac30bSAlexander V. Chernikov  */
15174cbac30bSAlexander V. Chernikov void
1518e02d3fe7SAlexander V. Chernikov rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, struct rtentry *rt, int fibnum)
15194cbac30bSAlexander V. Chernikov {
15204cbac30bSAlexander V. Chernikov 
15214cbac30bSAlexander V. Chernikov 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
15224cbac30bSAlexander V. Chernikov 		("unexpected cmd %u", cmd));
1523d375edc9SAlexander V. Chernikov 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
1524d375edc9SAlexander V. Chernikov 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
15254cbac30bSAlexander V. Chernikov 
15264cbac30bSAlexander V. Chernikov 	if (cmd == RTM_ADD) {
15274cbac30bSAlexander V. Chernikov 		rt_addrmsg(cmd, ifa, fibnum);
15284cbac30bSAlexander V. Chernikov 		if (rt != NULL)
1529e02d3fe7SAlexander V. Chernikov 			rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum);
15304cbac30bSAlexander V. Chernikov 	} else {
15314cbac30bSAlexander V. Chernikov 		if (rt != NULL)
1532e02d3fe7SAlexander V. Chernikov 			rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum);
15334cbac30bSAlexander V. Chernikov 		rt_addrmsg(cmd, ifa, fibnum);
15344cbac30bSAlexander V. Chernikov 	}
15354cbac30bSAlexander V. Chernikov }
15364cbac30bSAlexander V. Chernikov 
1537