xref: /freebsd/sys/net/route.c (revision d65cd7a57bf0600b722afc770838a5d0c1c3a8e1)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1980, 1986, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *	@(#)route.c	8.3.1.1 (Berkeley) 2/23/95
32  * $FreeBSD$
33  */
34 /************************************************************************
35  * Note: In this file a 'fib' is a "forwarding information base"	*
36  * Which is the new name for an in kernel routing (next hop) table.	*
37  ***********************************************************************/
38 
39 #include "opt_inet.h"
40 #include "opt_inet6.h"
41 #include "opt_mrouting.h"
42 #include "opt_mpath.h"
43 #include "opt_route.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/socket.h>
50 #include <sys/sysctl.h>
51 #include <sys/syslog.h>
52 #include <sys/sysproto.h>
53 #include <sys/proc.h>
54 #include <sys/domain.h>
55 #include <sys/eventhandler.h>
56 #include <sys/kernel.h>
57 #include <sys/lock.h>
58 #include <sys/rmlock.h>
59 
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_dl.h>
63 #include <net/route.h>
64 #include <net/route/route_var.h>
65 #include <net/route/nhop.h>
66 #include <net/route/shared.h>
67 #include <net/vnet.h>
68 
69 #ifdef RADIX_MPATH
70 #include <net/radix_mpath.h>
71 #endif
72 
73 #include <netinet/in.h>
74 #include <netinet/ip_mroute.h>
75 
76 #include <vm/uma.h>
77 
78 #define	RT_MAXFIBS	UINT16_MAX
79 
80 /* Kernel config default option. */
81 #ifdef ROUTETABLES
82 #if ROUTETABLES <= 0
83 #error "ROUTETABLES defined too low"
84 #endif
85 #if ROUTETABLES > RT_MAXFIBS
86 #error "ROUTETABLES defined too big"
87 #endif
88 #define	RT_NUMFIBS	ROUTETABLES
89 #endif /* ROUTETABLES */
90 /* Initialize to default if not otherwise set. */
91 #ifndef	RT_NUMFIBS
92 #define	RT_NUMFIBS	1
93 #endif
94 
95 /* This is read-only.. */
96 u_int rt_numfibs = RT_NUMFIBS;
97 SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, "");
98 
99 /*
100  * By default add routes to all fibs for new interfaces.
101  * Once this is set to 0 then only allocate routes on interface
102  * changes for the FIB of the caller when adding a new set of addresses
103  * to an interface.  XXX this is a shotgun aproach to a problem that needs
104  * a more fine grained solution.. that will come.
105  * XXX also has the problems getting the FIB from curthread which will not
106  * always work given the fib can be overridden and prefixes can be added
107  * from the network stack context.
108  */
109 VNET_DEFINE(u_int, rt_add_addr_allfibs) = 1;
110 SYSCTL_UINT(_net, OID_AUTO, add_addr_allfibs, CTLFLAG_RWTUN | CTLFLAG_VNET,
111     &VNET_NAME(rt_add_addr_allfibs), 0, "");
112 
113 VNET_PCPUSTAT_DEFINE(struct rtstat, rtstat);
114 
115 VNET_PCPUSTAT_SYSINIT(rtstat);
116 #ifdef VIMAGE
117 VNET_PCPUSTAT_SYSUNINIT(rtstat);
118 #endif
119 
120 VNET_DEFINE(struct rib_head *, rt_tables);
121 #define	V_rt_tables	VNET(rt_tables)
122 
123 
124 /*
125  * Convert a 'struct radix_node *' to a 'struct rtentry *'.
126  * The operation can be done safely (in this code) because a
127  * 'struct rtentry' starts with two 'struct radix_node''s, the first
128  * one representing leaf nodes in the routing tree, which is
129  * what the code in radix.c passes us as a 'struct radix_node'.
130  *
131  * But because there are a lot of assumptions in this conversion,
132  * do not cast explicitly, but always use the macro below.
133  */
134 #define RNTORT(p)	((struct rtentry *)(p))
135 
136 VNET_DEFINE_STATIC(uma_zone_t, rtzone);		/* Routing table UMA zone. */
137 #define	V_rtzone	VNET(rtzone)
138 
139 EVENTHANDLER_LIST_DEFINE(rt_addrmsg);
140 
141 static int rt_getifa_fib(struct rt_addrinfo *, u_int);
142 static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *);
143 static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
144     void *arg);
145 static struct rtentry *rt_unlinkrte(struct rib_head *rnh,
146     struct rt_addrinfo *info, int *perror);
147 static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info);
148 static void destroy_rtentry_epoch(epoch_context_t ctx);
149 #ifdef RADIX_MPATH
150 static struct radix_node *rt_mpath_unlink(struct rib_head *rnh,
151     struct rt_addrinfo *info, struct rtentry *rto, int *perror);
152 #endif
153 static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info,
154     int flags);
155 
156 static int add_route(struct rib_head *rnh, struct rt_addrinfo *info,
157     struct rtentry **ret_nrt);
158 static int del_route(struct rib_head *rnh, struct rt_addrinfo *info,
159     struct rtentry **ret_nrt);
160 static int change_route(struct rib_head *, struct rt_addrinfo *,
161     struct rtentry **);
162 
163 /*
164  * handler for net.my_fibnum
165  */
166 static int
167 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
168 {
169         int fibnum;
170         int error;
171 
172         fibnum = curthread->td_proc->p_fibnum;
173         error = sysctl_handle_int(oidp, &fibnum, 0, req);
174         return (error);
175 }
176 
177 SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
178     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
179     &sysctl_my_fibnum, "I",
180     "default FIB of caller");
181 
182 static __inline struct rib_head **
183 rt_tables_get_rnh_ptr(int table, int fam)
184 {
185 	struct rib_head **rnh;
186 
187 	KASSERT(table >= 0 && table < rt_numfibs,
188 	    ("%s: table out of bounds (0 <= %d < %d)", __func__, table,
189 	     rt_numfibs));
190 	KASSERT(fam >= 0 && fam < (AF_MAX + 1),
191 	    ("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1));
192 
193 	/* rnh is [fib=0][af=0]. */
194 	rnh = (struct rib_head **)V_rt_tables;
195 	/* Get the offset to the requested table and fam. */
196 	rnh += table * (AF_MAX+1) + fam;
197 
198 	return (rnh);
199 }
200 
201 struct rib_head *
202 rt_tables_get_rnh(int table, int fam)
203 {
204 
205 	return (*rt_tables_get_rnh_ptr(table, fam));
206 }
207 
208 u_int
209 rt_tables_get_gen(int table, int fam)
210 {
211 	struct rib_head *rnh;
212 
213 	rnh = *rt_tables_get_rnh_ptr(table, fam);
214 	KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d",
215 	    __func__, table, fam));
216 	return (rnh->rnh_gen);
217 }
218 
219 
220 /*
221  * route initialization must occur before ip6_init2(), which happenas at
222  * SI_ORDER_MIDDLE.
223  */
224 static void
225 route_init(void)
226 {
227 
228 	/* whack the tunable ints into  line. */
229 	if (rt_numfibs > RT_MAXFIBS)
230 		rt_numfibs = RT_MAXFIBS;
231 	if (rt_numfibs == 0)
232 		rt_numfibs = 1;
233 	nhops_init();
234 }
235 SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL);
236 
237 static int
238 rtentry_zinit(void *mem, int size, int how)
239 {
240 	struct rtentry *rt = mem;
241 
242 	RT_LOCK_INIT(rt);
243 
244 	return (0);
245 }
246 
247 static void
248 rtentry_zfini(void *mem, int size)
249 {
250 	struct rtentry *rt = mem;
251 
252 	RT_LOCK_DESTROY(rt);
253 }
254 
255 static int
256 rtentry_ctor(void *mem, int size, void *arg, int how)
257 {
258 	struct rtentry *rt = mem;
259 
260 	bzero(rt, offsetof(struct rtentry, rt_endzero));
261 	rt->rt_chain = NULL;
262 
263 	return (0);
264 }
265 
266 static void
267 rtentry_dtor(void *mem, int size, void *arg)
268 {
269 	struct rtentry *rt = mem;
270 
271 	RT_UNLOCK_COND(rt);
272 }
273 
274 static void
275 vnet_route_init(const void *unused __unused)
276 {
277 	struct domain *dom;
278 	struct rib_head **rnh;
279 	int table;
280 	int fam;
281 
282 	V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
283 	    sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO);
284 
285 	V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry),
286 	    rtentry_ctor, rtentry_dtor,
287 	    rtentry_zinit, rtentry_zfini, UMA_ALIGN_PTR, 0);
288 	for (dom = domains; dom; dom = dom->dom_next) {
289 		if (dom->dom_rtattach == NULL)
290 			continue;
291 
292 		for  (table = 0; table < rt_numfibs; table++) {
293 			fam = dom->dom_family;
294 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
295 				break;
296 
297 			rnh = rt_tables_get_rnh_ptr(table, fam);
298 			if (rnh == NULL)
299 				panic("%s: rnh NULL", __func__);
300 			dom->dom_rtattach((void **)rnh, 0, table);
301 		}
302 	}
303 }
304 VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
305     vnet_route_init, 0);
306 
307 #ifdef VIMAGE
308 static void
309 vnet_route_uninit(const void *unused __unused)
310 {
311 	int table;
312 	int fam;
313 	struct domain *dom;
314 	struct rib_head **rnh;
315 
316 	for (dom = domains; dom; dom = dom->dom_next) {
317 		if (dom->dom_rtdetach == NULL)
318 			continue;
319 
320 		for (table = 0; table < rt_numfibs; table++) {
321 			fam = dom->dom_family;
322 
323 			if (table != 0 && fam != AF_INET6 && fam != AF_INET)
324 				break;
325 
326 			rnh = rt_tables_get_rnh_ptr(table, fam);
327 			if (rnh == NULL)
328 				panic("%s: rnh NULL", __func__);
329 			dom->dom_rtdetach((void **)rnh, 0);
330 		}
331 	}
332 
333 	/*
334 	 * dom_rtdetach calls rt_table_destroy(), which
335 	 *  schedules deletion for all rtentries, nexthops and control
336 	 *  structures. Wait for the destruction callbacks to fire.
337 	 * Note that this should result in freeing all rtentries, but
338 	 *  nexthops deletions will be scheduled for the next epoch run
339 	 *  and will be completed after vnet teardown.
340 	 */
341 	epoch_drain_callbacks(net_epoch_preempt);
342 
343 	free(V_rt_tables, M_RTABLE);
344 	uma_zdestroy(V_rtzone);
345 }
346 VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
347     vnet_route_uninit, 0);
348 #endif
349 
350 struct rib_head *
351 rt_table_init(int offset, int family, u_int fibnum)
352 {
353 	struct rib_head *rh;
354 
355 	rh = malloc(sizeof(struct rib_head), M_RTABLE, M_WAITOK | M_ZERO);
356 
357 	/* TODO: These details should be hidded inside radix.c */
358 	/* Init masks tree */
359 	rn_inithead_internal(&rh->head, rh->rnh_nodes, offset);
360 	rn_inithead_internal(&rh->rmhead.head, rh->rmhead.mask_nodes, 0);
361 	rh->head.rnh_masks = &rh->rmhead;
362 
363 	/* Save metadata associated with this routing table. */
364 	rh->rib_family = family;
365 	rh->rib_fibnum = fibnum;
366 #ifdef VIMAGE
367 	rh->rib_vnet = curvnet;
368 #endif
369 
370 	tmproutes_init(rh);
371 
372 	/* Init locks */
373 	RIB_LOCK_INIT(rh);
374 
375 	nhops_init_rib(rh);
376 
377 	/* Finally, set base callbacks */
378 	rh->rnh_addaddr = rn_addroute;
379 	rh->rnh_deladdr = rn_delete;
380 	rh->rnh_matchaddr = rn_match;
381 	rh->rnh_lookup = rn_lookup;
382 	rh->rnh_walktree = rn_walktree;
383 	rh->rnh_walktree_from = rn_walktree_from;
384 
385 	return (rh);
386 }
387 
388 static int
389 rt_freeentry(struct radix_node *rn, void *arg)
390 {
391 	struct radix_head * const rnh = arg;
392 	struct radix_node *x;
393 
394 	x = (struct radix_node *)rn_delete(rn + 2, NULL, rnh);
395 	if (x != NULL)
396 		R_Free(x);
397 	return (0);
398 }
399 
400 void
401 rt_table_destroy(struct rib_head *rh)
402 {
403 
404 	tmproutes_destroy(rh);
405 
406 	rn_walktree(&rh->rmhead.head, rt_freeentry, &rh->rmhead.head);
407 
408 	nhops_destroy_rib(rh);
409 
410 	/* Assume table is already empty */
411 	RIB_LOCK_DESTROY(rh);
412 	free(rh, M_RTABLE);
413 }
414 
415 
416 #ifndef _SYS_SYSPROTO_H_
417 struct setfib_args {
418 	int     fibnum;
419 };
420 #endif
421 int
422 sys_setfib(struct thread *td, struct setfib_args *uap)
423 {
424 	if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
425 		return EINVAL;
426 	td->td_proc->p_fibnum = uap->fibnum;
427 	return (0);
428 }
429 
430 /*
431  * Remove a reference count from an rtentry.
432  * If the count gets low enough, take it out of the routing table
433  */
434 void
435 rtfree(struct rtentry *rt)
436 {
437 
438 	KASSERT(rt != NULL,("%s: NULL rt", __func__));
439 
440 	RT_LOCK_ASSERT(rt);
441 
442 	/*
443 	 * The callers should use RTFREE_LOCKED() or RTFREE(), so
444 	 * we should come here exactly with the last reference.
445 	 */
446 	RT_REMREF(rt);
447 	if (rt->rt_refcnt > 0) {
448 		log(LOG_DEBUG, "%s: %p has %d refs\n", __func__, rt, rt->rt_refcnt);
449 		goto done;
450 	}
451 
452 	/*
453 	 * If we are no longer "up" (and ref == 0)
454 	 * then we can free the resources associated
455 	 * with the route.
456 	 */
457 	if ((rt->rt_flags & RTF_UP) == 0) {
458 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
459 			panic("rtfree 2");
460 #ifdef	DIAGNOSTIC
461 		if (rt->rt_refcnt < 0) {
462 			printf("rtfree: %p not freed (neg refs)\n", rt);
463 			goto done;
464 		}
465 #endif
466 		epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
467 		    &rt->rt_epoch_ctx);
468 
469 		/*
470 		 * FALLTHROUGH to RT_UNLOCK() so the reporting functions
471 		 * have consistent behaviour of operating on unlocked entry.
472 		 */
473 	}
474 done:
475 	RT_UNLOCK(rt);
476 }
477 
478 static void
479 destroy_rtentry(struct rtentry *rt)
480 {
481 
482 	/*
483 	 * At this moment rnh, nh_control may be already freed.
484 	 * nhop interface may have been migrated to a different vnet.
485 	 * Use vnet stored in the nexthop to delete the entry.
486 	 */
487 	CURVNET_SET(nhop_get_vnet(rt->rt_nhop));
488 
489 	/* Unreference nexthop */
490 	nhop_free(rt->rt_nhop);
491 
492 	uma_zfree(V_rtzone, rt);
493 
494 	CURVNET_RESTORE();
495 }
496 
497 /*
498  * Epoch callback indicating rtentry is safe to destroy
499  */
500 static void
501 destroy_rtentry_epoch(epoch_context_t ctx)
502 {
503 	struct rtentry *rt;
504 
505 	rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);
506 
507 	destroy_rtentry(rt);
508 }
509 
510 /*
511  * Adds a temporal redirect entry to the routing table.
512  * @fibnum: fib number
513  * @dst: destination to install redirect to
514  * @gateway: gateway to go via
515  * @author: sockaddr of originating router, can be NULL
516  * @ifp: interface to use for the redirected route
517  * @flags: set of flags to add. Allowed: RTF_GATEWAY
518  * @lifetime_sec: time in seconds to expire this redirect.
519  *
520  * Retuns 0 on success, errno otherwise.
521  */
522 int
523 rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
524     struct sockaddr *author, struct ifnet *ifp, int flags, int lifetime_sec)
525 {
526 	struct rtentry *rt;
527 	int error;
528 	struct rt_addrinfo info;
529 	struct rt_metrics rti_rmx;
530 	struct ifaddr *ifa;
531 
532 	NET_EPOCH_ASSERT();
533 
534 	if (rt_tables_get_rnh(fibnum, dst->sa_family) == NULL)
535 		return (EAFNOSUPPORT);
536 
537 	/* Verify the allowed flag mask. */
538 	KASSERT(((flags & ~(RTF_GATEWAY)) == 0),
539 	    ("invalid redirect flags: %x", flags));
540 
541 	/* Get the best ifa for the given interface and gateway. */
542 	if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL)
543 		return (ENETUNREACH);
544 	ifa_ref(ifa);
545 
546 	bzero(&info, sizeof(info));
547 	info.rti_info[RTAX_DST] = dst;
548 	info.rti_info[RTAX_GATEWAY] = gateway;
549 	info.rti_ifa = ifa;
550 	info.rti_ifp = ifp;
551 	info.rti_flags = flags | RTF_HOST | RTF_DYNAMIC;
552 
553 	/* Setup route metrics to define expire time. */
554 	bzero(&rti_rmx, sizeof(rti_rmx));
555 	/* Set expire time as absolute. */
556 	rti_rmx.rmx_expire = lifetime_sec + time_second;
557 	info.rti_mflags |= RTV_EXPIRE;
558 	info.rti_rmx = &rti_rmx;
559 
560 	error = rtrequest1_fib(RTM_ADD, &info, &rt, fibnum);
561 	ifa_free(ifa);
562 
563 	if (error != 0) {
564 		/* TODO: add per-fib redirect stats. */
565 		return (error);
566 	}
567 
568 	RT_LOCK(rt);
569 	flags = rt->rt_flags;
570 	RT_UNLOCK(rt);
571 
572 	RTSTAT_INC(rts_dynamic);
573 
574 	/* Send notification of a route addition to userland. */
575 	bzero(&info, sizeof(info));
576 	info.rti_info[RTAX_DST] = dst;
577 	info.rti_info[RTAX_GATEWAY] = gateway;
578 	info.rti_info[RTAX_AUTHOR] = author;
579 	rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
580 
581 	return (0);
582 }
583 
584 /*
585  * Routing table ioctl interface.
586  */
587 int
588 rtioctl_fib(u_long req, caddr_t data, u_int fibnum)
589 {
590 
591 	/*
592 	 * If more ioctl commands are added here, make sure the proper
593 	 * super-user checks are being performed because it is possible for
594 	 * prison-root to make it this far if raw sockets have been enabled
595 	 * in jails.
596 	 */
597 #ifdef INET
598 	/* Multicast goop, grrr... */
599 	return mrt_ioctl ? mrt_ioctl(req, data, fibnum) : EOPNOTSUPP;
600 #else /* INET */
601 	return ENXIO;
602 #endif /* INET */
603 }
604 
605 struct ifaddr *
606 ifa_ifwithroute(int flags, const struct sockaddr *dst, struct sockaddr *gateway,
607 				u_int fibnum)
608 {
609 	struct ifaddr *ifa;
610 
611 	NET_EPOCH_ASSERT();
612 	if ((flags & RTF_GATEWAY) == 0) {
613 		/*
614 		 * If we are adding a route to an interface,
615 		 * and the interface is a pt to pt link
616 		 * we should search for the destination
617 		 * as our clue to the interface.  Otherwise
618 		 * we can use the local address.
619 		 */
620 		ifa = NULL;
621 		if (flags & RTF_HOST)
622 			ifa = ifa_ifwithdstaddr(dst, fibnum);
623 		if (ifa == NULL)
624 			ifa = ifa_ifwithaddr(gateway);
625 	} else {
626 		/*
627 		 * If we are adding a route to a remote net
628 		 * or host, the gateway may still be on the
629 		 * other end of a pt to pt link.
630 		 */
631 		ifa = ifa_ifwithdstaddr(gateway, fibnum);
632 	}
633 	if (ifa == NULL)
634 		ifa = ifa_ifwithnet(gateway, 0, fibnum);
635 	if (ifa == NULL) {
636 		struct nhop_object *nh;
637 
638 		nh = rib_lookup(fibnum, gateway, NHR_NONE, 0);
639 
640 		/*
641 		 * dismiss a gateway that is reachable only
642 		 * through the default router
643 		 */
644 		if ((nh == NULL) || (nh->nh_flags & NHF_DEFAULT))
645 			return (NULL);
646 		ifa = nh->nh_ifa;
647 	}
648 	if (ifa->ifa_addr->sa_family != dst->sa_family) {
649 		struct ifaddr *oifa = ifa;
650 		ifa = ifaof_ifpforaddr(dst, ifa->ifa_ifp);
651 		if (ifa == NULL)
652 			ifa = oifa;
653 	}
654 
655 	return (ifa);
656 }
657 
658 /*
659  * Do appropriate manipulations of a routing tree given
660  * all the bits of info needed
661  */
662 int
663 rtrequest_fib(int req,
664 	struct sockaddr *dst,
665 	struct sockaddr *gateway,
666 	struct sockaddr *netmask,
667 	int flags,
668 	struct rtentry **ret_nrt,
669 	u_int fibnum)
670 {
671 	struct rt_addrinfo info;
672 
673 	if (dst->sa_len == 0)
674 		return(EINVAL);
675 
676 	bzero((caddr_t)&info, sizeof(info));
677 	info.rti_flags = flags;
678 	info.rti_info[RTAX_DST] = dst;
679 	info.rti_info[RTAX_GATEWAY] = gateway;
680 	info.rti_info[RTAX_NETMASK] = netmask;
681 	return rtrequest1_fib(req, &info, ret_nrt, fibnum);
682 }
683 
684 
685 /*
686  * Copy most of @rt data into @info.
687  *
688  * If @flags contains NHR_COPY, copies dst,netmask and gw to the
689  * pointers specified by @info structure. Assume such pointers
690  * are zeroed sockaddr-like structures with sa_len field initialized
691  * to reflect size of the provided buffer. if no NHR_COPY is specified,
692  * point dst,netmask and gw @info fields to appropriate @rt values.
693  *
694  * if @flags contains NHR_REF, do refcouting on rt_ifp and rt_ifa.
695  *
696  * Returns 0 on success.
697  */
698 int
699 rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
700 {
701 	struct rt_metrics *rmx;
702 	struct sockaddr *src, *dst;
703 	struct nhop_object *nh;
704 	int sa_len;
705 
706 	if (flags & NHR_COPY) {
707 		/* Copy destination if dst is non-zero */
708 		src = rt_key(rt);
709 		dst = info->rti_info[RTAX_DST];
710 		sa_len = src->sa_len;
711 		if (dst != NULL) {
712 			if (src->sa_len > dst->sa_len)
713 				return (ENOMEM);
714 			memcpy(dst, src, src->sa_len);
715 			info->rti_addrs |= RTA_DST;
716 		}
717 
718 		/* Copy mask if set && dst is non-zero */
719 		src = rt_mask(rt);
720 		dst = info->rti_info[RTAX_NETMASK];
721 		if (src != NULL && dst != NULL) {
722 
723 			/*
724 			 * Radix stores different value in sa_len,
725 			 * assume rt_mask() to have the same length
726 			 * as rt_key()
727 			 */
728 			if (sa_len > dst->sa_len)
729 				return (ENOMEM);
730 			memcpy(dst, src, src->sa_len);
731 			info->rti_addrs |= RTA_NETMASK;
732 		}
733 
734 		/* Copy gateway is set && dst is non-zero */
735 		src = &rt->rt_nhop->gw_sa;
736 		dst = info->rti_info[RTAX_GATEWAY];
737 		if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){
738 			if (src->sa_len > dst->sa_len)
739 				return (ENOMEM);
740 			memcpy(dst, src, src->sa_len);
741 			info->rti_addrs |= RTA_GATEWAY;
742 		}
743 	} else {
744 		info->rti_info[RTAX_DST] = rt_key(rt);
745 		info->rti_addrs |= RTA_DST;
746 		if (rt_mask(rt) != NULL) {
747 			info->rti_info[RTAX_NETMASK] = rt_mask(rt);
748 			info->rti_addrs |= RTA_NETMASK;
749 		}
750 		if (rt->rt_flags & RTF_GATEWAY) {
751 			info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
752 			info->rti_addrs |= RTA_GATEWAY;
753 		}
754 	}
755 
756 	nh = rt->rt_nhop;
757 	rmx = info->rti_rmx;
758 	if (rmx != NULL) {
759 		info->rti_mflags |= RTV_MTU;
760 		rmx->rmx_mtu = nh->nh_mtu;
761 	}
762 
763 	info->rti_flags = rt->rt_flags | nhop_get_rtflags(nh);
764 	info->rti_ifp = nh->nh_ifp;
765 	info->rti_ifa = nh->nh_ifa;
766 	if (flags & NHR_REF) {
767 		if_ref(info->rti_ifp);
768 		ifa_ref(info->rti_ifa);
769 	}
770 
771 	return (0);
772 }
773 
774 /*
775  * Lookups up route entry for @dst in RIB database for fib @fibnum.
776  * Exports entry data to @info using rt_exportinfo().
777  *
778  * If @flags contains NHR_REF, refcouting is performed on rt_ifp and rt_ifa.
779  * All references can be released later by calling rib_free_info().
780  *
781  * Returns 0 on success.
782  * Returns ENOENT for lookup failure, ENOMEM for export failure.
783  */
784 int
785 rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
786     uint32_t flowid, struct rt_addrinfo *info)
787 {
788 	RIB_RLOCK_TRACKER;
789 	struct rib_head *rh;
790 	struct radix_node *rn;
791 	struct rtentry *rt;
792 	int error;
793 
794 	KASSERT((fibnum < rt_numfibs), ("rib_lookup_rte: bad fibnum"));
795 	rh = rt_tables_get_rnh(fibnum, dst->sa_family);
796 	if (rh == NULL)
797 		return (ENOENT);
798 
799 	RIB_RLOCK(rh);
800 	rn = rh->rnh_matchaddr(__DECONST(void *, dst), &rh->head);
801 	if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
802 		rt = RNTORT(rn);
803 		/* Ensure route & ifp is UP */
804 		if (RT_LINK_IS_UP(rt->rt_nhop->nh_ifp)) {
805 			flags = (flags & NHR_REF) | NHR_COPY;
806 			error = rt_exportinfo(rt, info, flags);
807 			RIB_RUNLOCK(rh);
808 
809 			return (error);
810 		}
811 	}
812 	RIB_RUNLOCK(rh);
813 
814 	return (ENOENT);
815 }
816 
817 /*
818  * Releases all references acquired by rib_lookup_info() when
819  * called with NHR_REF flags.
820  */
821 void
822 rib_free_info(struct rt_addrinfo *info)
823 {
824 
825 	ifa_free(info->rti_ifa);
826 	if_rele(info->rti_ifp);
827 }
828 
829 /*
830  * Iterates over all existing fibs in system calling
831  *  @setwa_f function prior to traversing each fib.
832  *  Calls @wa_f function for each element in current fib.
833  * If af is not AF_UNSPEC, iterates over fibs in particular
834  * address family.
835  */
836 void
837 rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
838     void *arg)
839 {
840 	struct rib_head *rnh;
841 	uint32_t fibnum;
842 	int i;
843 
844 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
845 		/* Do we want some specific family? */
846 		if (af != AF_UNSPEC) {
847 			rnh = rt_tables_get_rnh(fibnum, af);
848 			if (rnh == NULL)
849 				continue;
850 			if (setwa_f != NULL)
851 				setwa_f(rnh, fibnum, af, arg);
852 
853 			RIB_WLOCK(rnh);
854 			rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
855 			RIB_WUNLOCK(rnh);
856 			continue;
857 		}
858 
859 		for (i = 1; i <= AF_MAX; i++) {
860 			rnh = rt_tables_get_rnh(fibnum, i);
861 			if (rnh == NULL)
862 				continue;
863 			if (setwa_f != NULL)
864 				setwa_f(rnh, fibnum, i, arg);
865 
866 			RIB_WLOCK(rnh);
867 			rnh->rnh_walktree(&rnh->head, (walktree_f_t *)wa_f,arg);
868 			RIB_WUNLOCK(rnh);
869 		}
870 	}
871 }
872 
873 struct rt_delinfo
874 {
875 	struct rt_addrinfo info;
876 	struct rib_head *rnh;
877 	struct rtentry *head;
878 };
879 
880 /*
881  * Conditionally unlinks @rn from radix tree based
882  * on info data passed in @arg.
883  */
884 static int
885 rt_checkdelroute(struct radix_node *rn, void *arg)
886 {
887 	struct rt_delinfo *di;
888 	struct rt_addrinfo *info;
889 	struct rtentry *rt;
890 	int error;
891 
892 	di = (struct rt_delinfo *)arg;
893 	rt = (struct rtentry *)rn;
894 	info = &di->info;
895 	error = 0;
896 
897 	info->rti_info[RTAX_DST] = rt_key(rt);
898 	info->rti_info[RTAX_NETMASK] = rt_mask(rt);
899 	info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
900 
901 	rt = rt_unlinkrte(di->rnh, info, &error);
902 	if (rt == NULL) {
903 		/* Either not allowed or not matched. Skip entry */
904 		return (0);
905 	}
906 
907 	/* Entry was unlinked. Add to the list and return */
908 	rt->rt_chain = di->head;
909 	di->head = rt;
910 
911 	return (0);
912 }
913 
914 /*
915  * Iterates over a routing table specified by @fibnum and @family and
916  *  deletes elements marked by @filter_f.
917  * @fibnum: rtable id
918  * @family: AF_ address family
919  * @filter_f: function returning non-zero value for items to delete
920  * @arg: data to pass to the @filter_f function
921  * @report: true if rtsock notification is needed.
922  */
923 void
924 rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool report)
925 {
926 	struct rib_head *rnh;
927 	struct rt_delinfo di;
928 	struct rtentry *rt;
929 
930 	rnh = rt_tables_get_rnh(fibnum, family);
931 	if (rnh == NULL)
932 		return;
933 
934 	bzero(&di, sizeof(di));
935 	di.info.rti_filter = filter_f;
936 	di.info.rti_filterdata = arg;
937 	di.rnh = rnh;
938 
939 	RIB_WLOCK(rnh);
940 	rnh->rnh_walktree(&rnh->head, rt_checkdelroute, &di);
941 	RIB_WUNLOCK(rnh);
942 
943 	if (di.head == NULL)
944 		return;
945 
946 	/* We might have something to reclaim. */
947 	while (di.head != NULL) {
948 		rt = di.head;
949 		di.head = rt->rt_chain;
950 		rt->rt_chain = NULL;
951 
952 		/* TODO std rt -> rt_addrinfo export */
953 		di.info.rti_info[RTAX_DST] = rt_key(rt);
954 		di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
955 
956 		rt_notifydelete(rt, &di.info);
957 
958 		if (report)
959 			rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0,
960 			    fibnum);
961 		RTFREE_LOCKED(rt);
962 	}
963 }
964 
965 /*
966  * Iterates over all existing fibs in system and deletes each element
967  *  for which @filter_f function returns non-zero value.
968  * If @family is not AF_UNSPEC, iterates over fibs in particular
969  * address family.
970  */
971 void
972 rt_foreach_fib_walk_del(int family, rt_filter_f_t *filter_f, void *arg)
973 {
974 	u_int fibnum;
975 	int i, start, end;
976 
977 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
978 		/* Do we want some specific family? */
979 		if (family != AF_UNSPEC) {
980 			start = family;
981 			end = family;
982 		} else {
983 			start = 1;
984 			end = AF_MAX;
985 		}
986 
987 		for (i = start; i <= end; i++) {
988 			if (rt_tables_get_rnh(fibnum, i) == NULL)
989 				continue;
990 
991 			rib_walk_del(fibnum, i, filter_f, arg, 0);
992 		}
993 	}
994 }
995 
996 /*
997  * Delete Routes for a Network Interface
998  *
999  * Called for each routing entry via the rnh->rnh_walktree() call above
1000  * to delete all route entries referencing a detaching network interface.
1001  *
1002  * Arguments:
1003  *	rt	pointer to rtentry
1004  *	nh	pointer to nhop
1005  *	arg	argument passed to rnh->rnh_walktree() - detaching interface
1006  *
1007  * Returns:
1008  *	0	successful
1009  *	errno	failed - reason indicated
1010  */
1011 static int
1012 rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *nh, void *arg)
1013 {
1014 	struct ifnet	*ifp = arg;
1015 
1016 	if (nh->nh_ifp != ifp)
1017 		return (0);
1018 
1019 	/*
1020 	 * Protect (sorta) against walktree recursion problems
1021 	 * with cloned routes
1022 	 */
1023 	if ((rt->rt_flags & RTF_UP) == 0)
1024 		return (0);
1025 
1026 	return (1);
1027 }
1028 
1029 /*
1030  * Delete all remaining routes using this interface
1031  * Unfortuneatly the only way to do this is to slog through
1032  * the entire routing table looking for routes which point
1033  * to this interface...oh well...
1034  */
1035 void
1036 rt_flushifroutes_af(struct ifnet *ifp, int af)
1037 {
1038 	KASSERT((af >= 1 && af <= AF_MAX), ("%s: af %d not >= 1 and <= %d",
1039 	    __func__, af, AF_MAX));
1040 
1041 	rt_foreach_fib_walk_del(af, rt_ifdelroute, ifp);
1042 }
1043 
1044 void
1045 rt_flushifroutes(struct ifnet *ifp)
1046 {
1047 
1048 	rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp);
1049 }
1050 
1051 /*
1052  * Conditionally unlinks rtentry matching data inside @info from @rnh.
1053  * Returns unlinked, locked and referenced @rtentry on success,
1054  * Returns NULL and sets @perror to:
1055  * ESRCH - if prefix was not found,
1056  * EADDRINUSE - if trying to delete PINNED route without appropriate flag.
1057  * ENOENT - if supplied filter function returned 0 (not matched).
1058  */
1059 static struct rtentry *
1060 rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror)
1061 {
1062 	struct sockaddr *dst, *netmask;
1063 	struct rtentry *rt;
1064 	struct radix_node *rn;
1065 
1066 	dst = info->rti_info[RTAX_DST];
1067 	netmask = info->rti_info[RTAX_NETMASK];
1068 
1069 	rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, &rnh->head);
1070 	if (rt == NULL) {
1071 		*perror = ESRCH;
1072 		return (NULL);
1073 	}
1074 
1075 	if ((info->rti_flags & RTF_PINNED) == 0) {
1076 		/* Check if target route can be deleted */
1077 		if (rt->rt_flags & RTF_PINNED) {
1078 			*perror = EADDRINUSE;
1079 			return (NULL);
1080 		}
1081 	}
1082 
1083 	if (info->rti_filter != NULL) {
1084 		if (info->rti_filter(rt, rt->rt_nhop, info->rti_filterdata)==0){
1085 			/* Not matched */
1086 			*perror = ENOENT;
1087 			return (NULL);
1088 		}
1089 
1090 		/*
1091 		 * Filter function requested rte deletion.
1092 		 * Ease the caller work by filling in remaining info
1093 		 * from that particular entry.
1094 		 */
1095 		info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
1096 	}
1097 
1098 	/*
1099 	 * Remove the item from the tree and return it.
1100 	 * Complain if it is not there and do no more processing.
1101 	 */
1102 	*perror = ESRCH;
1103 #ifdef RADIX_MPATH
1104 	if (rt_mpath_capable(rnh))
1105 		rn = rt_mpath_unlink(rnh, info, rt, perror);
1106 	else
1107 #endif
1108 	rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
1109 	if (rn == NULL)
1110 		return (NULL);
1111 
1112 	if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
1113 		panic ("rtrequest delete");
1114 
1115 	rt = RNTORT(rn);
1116 	RT_LOCK(rt);
1117 	RT_ADDREF(rt);
1118 	rt->rt_flags &= ~RTF_UP;
1119 
1120 	*perror = 0;
1121 
1122 	return (rt);
1123 }
1124 
1125 static void
1126 rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
1127 {
1128 	struct ifaddr *ifa;
1129 
1130 	/*
1131 	 * give the protocol a chance to keep things in sync.
1132 	 */
1133 	ifa = rt->rt_nhop->nh_ifa;
1134 	if (ifa != NULL && ifa->ifa_rtrequest != NULL)
1135 		ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info);
1136 }
1137 
1138 
1139 /*
1140  * These (questionable) definitions of apparent local variables apply
1141  * to the next two functions.  XXXXXX!!!
1142  */
1143 #define	dst	info->rti_info[RTAX_DST]
1144 #define	gateway	info->rti_info[RTAX_GATEWAY]
1145 #define	netmask	info->rti_info[RTAX_NETMASK]
1146 #define	ifaaddr	info->rti_info[RTAX_IFA]
1147 #define	ifpaddr	info->rti_info[RTAX_IFP]
1148 #define	flags	info->rti_flags
1149 
1150 /*
1151  * Look up rt_addrinfo for a specific fib.  Note that if rti_ifa is defined,
1152  * it will be referenced so the caller must free it.
1153  *
1154  * Assume basic consistency checks are executed by callers:
1155  * RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well.
1156  */
1157 int
1158 rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
1159 {
1160 	struct epoch_tracker et;
1161 	int needref, error;
1162 
1163 	/*
1164 	 * ifp may be specified by sockaddr_dl
1165 	 * when protocol address is ambiguous.
1166 	 */
1167 	error = 0;
1168 	needref = (info->rti_ifa == NULL);
1169 	NET_EPOCH_ENTER(et);
1170 
1171 	/* If we have interface specified by the ifindex in the address, use it */
1172 	if (info->rti_ifp == NULL && ifpaddr != NULL &&
1173 	    ifpaddr->sa_family == AF_LINK) {
1174 	    const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)ifpaddr;
1175 	    if (sdl->sdl_index != 0)
1176 		    info->rti_ifp = ifnet_byindex(sdl->sdl_index);
1177 	}
1178 	/*
1179 	 * If we have source address specified, try to find it
1180 	 * TODO: avoid enumerating all ifas on all interfaces.
1181 	 */
1182 	if (info->rti_ifa == NULL && ifaaddr != NULL)
1183 		info->rti_ifa = ifa_ifwithaddr(ifaaddr);
1184 	if (info->rti_ifa == NULL) {
1185 		struct sockaddr *sa;
1186 
1187 		/*
1188 		 * Most common use case for the userland-supplied routes.
1189 		 *
1190 		 * Choose sockaddr to select ifa.
1191 		 * -- if ifp is set --
1192 		 * Order of preference:
1193 		 * 1) IFA address
1194 		 * 2) gateway address
1195 		 *   Note: for interface routes link-level gateway address
1196 		 *     is specified to indicate the interface index without
1197 		 *     specifying RTF_GATEWAY. In this case, ignore gateway
1198 		 *   Note: gateway AF may be different from dst AF. In this case,
1199 		 *   ignore gateway
1200 		 * 3) final destination.
1201 		 * 4) if all of these fails, try to get at least link-level ifa.
1202 		 * -- else --
1203 		 * try to lookup gateway or dst in the routing table to get ifa
1204 		 */
1205 		if (info->rti_info[RTAX_IFA] != NULL)
1206 			sa = info->rti_info[RTAX_IFA];
1207 		else if ((info->rti_flags & RTF_GATEWAY) != 0 &&
1208 		    gateway->sa_family == dst->sa_family)
1209 			sa = gateway;
1210 		else
1211 			sa = dst;
1212 		if (info->rti_ifp != NULL) {
1213 			info->rti_ifa = ifaof_ifpforaddr(sa, info->rti_ifp);
1214 			/* Case 4 */
1215 			if (info->rti_ifa == NULL && gateway != NULL)
1216 				info->rti_ifa = ifaof_ifpforaddr(gateway, info->rti_ifp);
1217 		} else if (dst != NULL && gateway != NULL)
1218 			info->rti_ifa = ifa_ifwithroute(flags, dst, gateway,
1219 							fibnum);
1220 		else if (sa != NULL)
1221 			info->rti_ifa = ifa_ifwithroute(flags, sa, sa,
1222 							fibnum);
1223 	}
1224 	if (needref && info->rti_ifa != NULL) {
1225 		if (info->rti_ifp == NULL)
1226 			info->rti_ifp = info->rti_ifa->ifa_ifp;
1227 		ifa_ref(info->rti_ifa);
1228 	} else
1229 		error = ENETUNREACH;
1230 	NET_EPOCH_EXIT(et);
1231 	return (error);
1232 }
1233 
1234 void
1235 rt_updatemtu(struct ifnet *ifp)
1236 {
1237 	struct rib_head *rnh;
1238 	int mtu;
1239 	int i, j;
1240 
1241 	/*
1242 	 * Try to update rt_mtu for all routes using this interface
1243 	 * Unfortunately the only way to do this is to traverse all
1244 	 * routing tables in all fibs/domains.
1245 	 */
1246 	for (i = 1; i <= AF_MAX; i++) {
1247 		mtu = if_getmtu_family(ifp, i);
1248 		for (j = 0; j < rt_numfibs; j++) {
1249 			rnh = rt_tables_get_rnh(j, i);
1250 			if (rnh == NULL)
1251 				continue;
1252 			nhops_update_ifmtu(rnh, ifp, mtu);
1253 		}
1254 	}
1255 }
1256 
1257 
1258 #if 0
1259 int p_sockaddr(char *buf, int buflen, struct sockaddr *s);
1260 int rt_print(char *buf, int buflen, struct rtentry *rt);
1261 
1262 int
1263 p_sockaddr(char *buf, int buflen, struct sockaddr *s)
1264 {
1265 	void *paddr = NULL;
1266 
1267 	switch (s->sa_family) {
1268 	case AF_INET:
1269 		paddr = &((struct sockaddr_in *)s)->sin_addr;
1270 		break;
1271 	case AF_INET6:
1272 		paddr = &((struct sockaddr_in6 *)s)->sin6_addr;
1273 		break;
1274 	}
1275 
1276 	if (paddr == NULL)
1277 		return (0);
1278 
1279 	if (inet_ntop(s->sa_family, paddr, buf, buflen) == NULL)
1280 		return (0);
1281 
1282 	return (strlen(buf));
1283 }
1284 
1285 int
1286 rt_print(char *buf, int buflen, struct rtentry *rt)
1287 {
1288 	struct sockaddr *addr, *mask;
1289 	int i = 0;
1290 
1291 	addr = rt_key(rt);
1292 	mask = rt_mask(rt);
1293 
1294 	i = p_sockaddr(buf, buflen, addr);
1295 	if (!(rt->rt_flags & RTF_HOST)) {
1296 		buf[i++] = '/';
1297 		i += p_sockaddr(buf + i, buflen - i, mask);
1298 	}
1299 
1300 	if (rt->rt_flags & RTF_GATEWAY) {
1301 		buf[i++] = '>';
1302 		i += p_sockaddr(buf + i, buflen - i, &rt->rt_nhop->gw_sa);
1303 	}
1304 
1305 	return (i);
1306 }
1307 #endif
1308 
1309 #ifdef RADIX_MPATH
1310 /*
1311  * Deletes key for single-path routes, unlinks rtentry with
1312  * gateway specified in @info from multi-path routes.
1313  *
1314  * Returnes unlinked entry. In case of failure, returns NULL
1315  * and sets @perror to ESRCH.
1316  */
1317 static struct radix_node *
1318 rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info,
1319     struct rtentry *rto, int *perror)
1320 {
1321 	/*
1322 	 * if we got multipath routes, we require users to specify
1323 	 * a matching RTAX_GATEWAY.
1324 	 */
1325 	struct rtentry *rt; // *rto = NULL;
1326 	struct radix_node *rn;
1327 	struct sockaddr *gw;
1328 
1329 	gw = info->rti_info[RTAX_GATEWAY];
1330 	rt = rt_mpath_matchgate(rto, gw);
1331 	if (rt == NULL) {
1332 		*perror = ESRCH;
1333 		return (NULL);
1334 	}
1335 
1336 	/*
1337 	 * this is the first entry in the chain
1338 	 */
1339 	if (rto == rt) {
1340 		rn = rn_mpath_next((struct radix_node *)rt);
1341 		/*
1342 		 * there is another entry, now it's active
1343 		 */
1344 		if (rn) {
1345 			rto = RNTORT(rn);
1346 			RT_LOCK(rto);
1347 			rto->rt_flags |= RTF_UP;
1348 			RT_UNLOCK(rto);
1349 		} else if (rt->rt_flags & RTF_GATEWAY) {
1350 			/*
1351 			 * For gateway routes, we need to
1352 			 * make sure that we we are deleting
1353 			 * the correct gateway.
1354 			 * rt_mpath_matchgate() does not
1355 			 * check the case when there is only
1356 			 * one route in the chain.
1357 			 */
1358 			if (gw &&
1359 			    (rt->rt_nhop->gw_sa.sa_len != gw->sa_len ||
1360 				memcmp(&rt->rt_nhop->gw_sa, gw, gw->sa_len))) {
1361 				*perror = ESRCH;
1362 				return (NULL);
1363 			}
1364 		}
1365 
1366 		/*
1367 		 * use the normal delete code to remove
1368 		 * the first entry
1369 		 */
1370 		rn = rnh->rnh_deladdr(dst, netmask, &rnh->head);
1371 		*perror = 0;
1372 		return (rn);
1373 	}
1374 
1375 	/*
1376 	 * if the entry is 2nd and on up
1377 	 */
1378 	if (rt_mpath_deldup(rto, rt) == 0)
1379 		panic ("rtrequest1: rt_mpath_deldup");
1380 	*perror = 0;
1381 	rn = (struct radix_node *)rt;
1382 	return (rn);
1383 }
1384 #endif
1385 
1386 #undef dst
1387 #undef gateway
1388 #undef netmask
1389 #undef ifaaddr
1390 #undef ifpaddr
1391 #undef flags
1392 
1393 int
1394 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
1395 				u_int fibnum)
1396 {
1397 	const struct sockaddr *dst;
1398 	struct rib_head *rnh;
1399 	int error;
1400 
1401 	KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
1402 	KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
1403 	NET_EPOCH_ASSERT();
1404 
1405 	dst = info->rti_info[RTAX_DST];
1406 
1407 	switch (dst->sa_family) {
1408 	case AF_INET6:
1409 	case AF_INET:
1410 		/* We support multiple FIBs. */
1411 		break;
1412 	default:
1413 		fibnum = RT_DEFAULT_FIB;
1414 		break;
1415 	}
1416 
1417 	/*
1418 	 * Find the correct routing tree to use for this Address Family
1419 	 */
1420 	rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
1421 	if (rnh == NULL)
1422 		return (EAFNOSUPPORT);
1423 
1424 	/*
1425 	 * If we are adding a host route then we don't want to put
1426 	 * a netmask in the tree, nor do we want to clone it.
1427 	 */
1428 	if (info->rti_flags & RTF_HOST)
1429 		info->rti_info[RTAX_NETMASK] = NULL;
1430 
1431 	error = 0;
1432 	switch (req) {
1433 	case RTM_DELETE:
1434 		error = del_route(rnh, info, ret_nrt);
1435 		break;
1436 	case RTM_RESOLVE:
1437 		/*
1438 		 * resolve was only used for route cloning
1439 		 * here for compat
1440 		 */
1441 		break;
1442 	case RTM_ADD:
1443 		error = add_route(rnh, info, ret_nrt);
1444 		break;
1445 	case RTM_CHANGE:
1446 		error = change_route(rnh, info, ret_nrt);
1447 		break;
1448 	default:
1449 		error = EOPNOTSUPP;
1450 	}
1451 
1452 	return (error);
1453 }
1454 
1455 static int
1456 add_route(struct rib_head *rnh, struct rt_addrinfo *info,
1457     struct rtentry **ret_nrt)
1458 {
1459 	struct sockaddr *dst, *ndst, *gateway, *netmask;
1460 	struct rtentry *rt, *rt_old;
1461 	struct nhop_object *nh;
1462 	struct radix_node *rn;
1463 	struct ifaddr *ifa;
1464 	int error, flags;
1465 	struct epoch_tracker et;
1466 
1467 	dst = info->rti_info[RTAX_DST];
1468 	gateway = info->rti_info[RTAX_GATEWAY];
1469 	netmask = info->rti_info[RTAX_NETMASK];
1470 	flags = info->rti_flags;
1471 
1472 	if ((flags & RTF_GATEWAY) && !gateway)
1473 		return (EINVAL);
1474 	if (dst && gateway && (dst->sa_family != gateway->sa_family) &&
1475 	    (gateway->sa_family != AF_UNSPEC) && (gateway->sa_family != AF_LINK))
1476 		return (EINVAL);
1477 
1478 	if (dst->sa_len > sizeof(((struct rtentry *)NULL)->rt_dstb))
1479 		return (EINVAL);
1480 
1481 	if (info->rti_ifa == NULL) {
1482 		error = rt_getifa_fib(info, rnh->rib_fibnum);
1483 		if (error)
1484 			return (error);
1485 	} else {
1486 		ifa_ref(info->rti_ifa);
1487 	}
1488 
1489 	NET_EPOCH_ENTER(et);
1490 	error = nhop_create_from_info(rnh, info, &nh);
1491 	NET_EPOCH_EXIT(et);
1492 	if (error != 0) {
1493 		ifa_free(info->rti_ifa);
1494 		return (error);
1495 	}
1496 
1497 	rt = uma_zalloc(V_rtzone, M_NOWAIT);
1498 	if (rt == NULL) {
1499 		ifa_free(info->rti_ifa);
1500 		nhop_free(nh);
1501 		return (ENOBUFS);
1502 	}
1503 	rt->rt_flags = RTF_UP | flags;
1504 	rt->rt_nhop = nh;
1505 
1506 	/* Fill in dst */
1507 	memcpy(&rt->rt_dst, dst, dst->sa_len);
1508 	rt_key(rt) = &rt->rt_dst;
1509 
1510 	/*
1511 	 * point to the (possibly newly malloc'd) dest address.
1512 	 */
1513 	ndst = (struct sockaddr *)rt_key(rt);
1514 
1515 	/*
1516 	 * make sure it contains the value we want (masked if needed).
1517 	 */
1518 	if (netmask) {
1519 		rt_maskedcopy(dst, ndst, netmask);
1520 	} else
1521 		bcopy(dst, ndst, dst->sa_len);
1522 
1523 	/*
1524 	 * We use the ifa reference returned by rt_getifa_fib().
1525 	 * This moved from below so that rnh->rnh_addaddr() can
1526 	 * examine the ifa and  ifa->ifa_ifp if it so desires.
1527 	 */
1528 	ifa = info->rti_ifa;
1529 	rt->rt_weight = 1;
1530 
1531 	rt_setmetrics(info, rt);
1532 
1533 	RIB_WLOCK(rnh);
1534 	RT_LOCK(rt);
1535 #ifdef RADIX_MPATH
1536 	/* do not permit exactly the same dst/mask/gw pair */
1537 	if (rt_mpath_capable(rnh) &&
1538 		rt_mpath_conflict(rnh, rt, netmask)) {
1539 		RIB_WUNLOCK(rnh);
1540 
1541 		nhop_free(nh);
1542 		uma_zfree(V_rtzone, rt);
1543 		return (EEXIST);
1544 	}
1545 #endif
1546 
1547 	rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
1548 
1549 	if (rn != NULL && rt->rt_expire > 0)
1550 		tmproutes_update(rnh, rt);
1551 
1552 	rt_old = NULL;
1553 	if (rn == NULL && (info->rti_flags & RTF_PINNED) != 0) {
1554 
1555 		/*
1556 		 * Force removal and re-try addition
1557 		 * TODO: better multipath&pinned support
1558 		 */
1559 		struct sockaddr *info_dst = info->rti_info[RTAX_DST];
1560 		info->rti_info[RTAX_DST] = ndst;
1561 		/* Do not delete existing PINNED(interface) routes */
1562 		info->rti_flags &= ~RTF_PINNED;
1563 		rt_old = rt_unlinkrte(rnh, info, &error);
1564 		info->rti_flags |= RTF_PINNED;
1565 		info->rti_info[RTAX_DST] = info_dst;
1566 		if (rt_old != NULL)
1567 			rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head,
1568 			    rt->rt_nodes);
1569 	}
1570 	RIB_WUNLOCK(rnh);
1571 
1572 	if (rt_old != NULL)
1573 		RT_UNLOCK(rt_old);
1574 
1575 	/*
1576 	 * If it still failed to go into the tree,
1577 	 * then un-make it (this should be a function)
1578 	 */
1579 	if (rn == NULL) {
1580 		nhop_free(nh);
1581 		uma_zfree(V_rtzone, rt);
1582 		return (EEXIST);
1583 	}
1584 
1585 	if (rt_old != NULL) {
1586 		rt_notifydelete(rt_old, info);
1587 		RTFREE(rt_old);
1588 	}
1589 
1590 	/*
1591 	 * If this protocol has something to add to this then
1592 	 * allow it to do that as well.
1593 	 */
1594 	if (ifa->ifa_rtrequest)
1595 		ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info);
1596 
1597 	/*
1598 	 * actually return a resultant rtentry
1599 	 */
1600 	if (ret_nrt)
1601 		*ret_nrt = rt;
1602 	rnh->rnh_gen++;		/* Routing table updated */
1603 	RT_UNLOCK(rt);
1604 
1605 	return (0);
1606 }
1607 
1608 static int
1609 del_route(struct rib_head *rnh, struct rt_addrinfo *info,
1610     struct rtentry **ret_nrt)
1611 {
1612 	struct sockaddr *dst, *netmask;
1613 	struct sockaddr_storage mdst;
1614 	struct rtentry *rt;
1615 	int error;
1616 
1617 	dst = info->rti_info[RTAX_DST];
1618 	netmask = info->rti_info[RTAX_NETMASK];
1619 
1620 	if (netmask) {
1621 		if (dst->sa_len > sizeof(mdst))
1622 			return (EINVAL);
1623 		rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
1624 		dst = (struct sockaddr *)&mdst;
1625 	}
1626 
1627 	RIB_WLOCK(rnh);
1628 	rt = rt_unlinkrte(rnh, info, &error);
1629 	RIB_WUNLOCK(rnh);
1630 	if (error != 0)
1631 		return (error);
1632 
1633 	rt_notifydelete(rt, info);
1634 
1635 	/*
1636 	 * If the caller wants it, then it can have it,
1637 	 * the entry will be deleted after the end of the current epoch.
1638 	 */
1639 	if (ret_nrt)
1640 		*ret_nrt = rt;
1641 
1642 	RTFREE_LOCKED(rt);
1643 
1644 	return (0);
1645 }
1646 
1647 static int
1648 change_route_one(struct rib_head *rnh, struct rt_addrinfo *info,
1649     struct rtentry **ret_nrt)
1650 {
1651 	RIB_RLOCK_TRACKER;
1652 	struct rtentry *rt = NULL;
1653 	int error = 0;
1654 	int free_ifa = 0;
1655 	struct nhop_object *nh, *nh_orig;
1656 
1657 	RIB_RLOCK(rnh);
1658 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1659 	    info->rti_info[RTAX_NETMASK], &rnh->head);
1660 
1661 	if (rt == NULL) {
1662 		RIB_RUNLOCK(rnh);
1663 		return (ESRCH);
1664 	}
1665 
1666 #ifdef RADIX_MPATH
1667 	/*
1668 	 * If we got multipath routes,
1669 	 * we require users to specify a matching RTAX_GATEWAY.
1670 	 */
1671 	if (rt_mpath_capable(rnh)) {
1672 		rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]);
1673 		if (rt == NULL) {
1674 			RIB_RUNLOCK(rnh);
1675 			return (ESRCH);
1676 		}
1677 	}
1678 #endif
1679 	nh_orig = rt->rt_nhop;
1680 
1681 	RIB_RUNLOCK(rnh);
1682 
1683 	rt = NULL;
1684 	nh = NULL;
1685 
1686 	/*
1687 	 * New gateway could require new ifaddr, ifp;
1688 	 * flags may also be different; ifp may be specified
1689 	 * by ll sockaddr when protocol address is ambiguous
1690 	 */
1691 	if (((nh_orig->nh_flags & NHF_GATEWAY) &&
1692 	    info->rti_info[RTAX_GATEWAY] != NULL) ||
1693 	    info->rti_info[RTAX_IFP] != NULL ||
1694 	    (info->rti_info[RTAX_IFA] != NULL &&
1695 	     !sa_equal(info->rti_info[RTAX_IFA], nh_orig->nh_ifa->ifa_addr))) {
1696 		error = rt_getifa_fib(info, rnh->rib_fibnum);
1697 		if (info->rti_ifa != NULL)
1698 			free_ifa = 1;
1699 
1700 		if (error != 0) {
1701 			if (free_ifa) {
1702 				ifa_free(info->rti_ifa);
1703 				info->rti_ifa = NULL;
1704 			}
1705 
1706 			return (error);
1707 		}
1708 	}
1709 
1710 	error = nhop_create_from_nhop(rnh, nh_orig, info, &nh);
1711 	if (free_ifa) {
1712 		ifa_free(info->rti_ifa);
1713 		info->rti_ifa = NULL;
1714 	}
1715 	if (error != 0)
1716 		return (error);
1717 
1718 	RIB_WLOCK(rnh);
1719 
1720 	/* Lookup rtentry once again and check if nexthop is still the same */
1721 	rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
1722 	    info->rti_info[RTAX_NETMASK], &rnh->head);
1723 
1724 	if (rt == NULL) {
1725 		RIB_WUNLOCK(rnh);
1726 		nhop_free(nh);
1727 		return (ESRCH);
1728 	}
1729 
1730 	if (rt->rt_nhop != nh_orig) {
1731 		RIB_WUNLOCK(rnh);
1732 		nhop_free(nh);
1733 		return (EAGAIN);
1734 	}
1735 
1736 	/* Proceed with the update */
1737 	RT_LOCK(rt);
1738 
1739 	/* Provide notification to the protocols.*/
1740 	if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest)
1741 		nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info);
1742 
1743 	rt->rt_nhop = nh;
1744 	rt_setmetrics(info, rt);
1745 
1746 	if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest)
1747 		nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info);
1748 
1749 	if (ret_nrt != NULL)
1750 		*ret_nrt = rt;
1751 
1752 	RT_UNLOCK(rt);
1753 
1754 	/* Update generation id to reflect rtable change */
1755 	rnh->rnh_gen++;
1756 
1757 	RIB_WUNLOCK(rnh);
1758 
1759 	nhop_free(nh_orig);
1760 
1761 	return (0);
1762 }
1763 
1764 static int
1765 change_route(struct rib_head *rnh, struct rt_addrinfo *info,
1766     struct rtentry **ret_nrt)
1767 {
1768 	int error;
1769 
1770 	/* Check if updated gateway exists */
1771 	if ((info->rti_flags & RTF_GATEWAY) &&
1772 	    (info->rti_info[RTAX_GATEWAY] == NULL))
1773 		return (EINVAL);
1774 
1775 	/*
1776 	 * route change is done in multiple steps, with dropping and
1777 	 * reacquiring lock. In the situations with multiple processes
1778 	 * changes the same route in can lead to the case when route
1779 	 * is changed between the steps. Address it by retrying the operation
1780 	 * multiple times before failing.
1781 	 */
1782 	for (int i = 0; i < RIB_MAX_RETRIES; i++) {
1783 		error = change_route_one(rnh, info, ret_nrt);
1784 		if (error != EAGAIN)
1785 			break;
1786 	}
1787 
1788 	return (error);
1789 }
1790 
1791 
1792 static void
1793 rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt)
1794 {
1795 
1796 	if (info->rti_mflags & RTV_WEIGHT)
1797 		rt->rt_weight = info->rti_rmx->rmx_weight;
1798 	/* Kernel -> userland timebase conversion. */
1799 	if (info->rti_mflags & RTV_EXPIRE)
1800 		rt->rt_expire = info->rti_rmx->rmx_expire ?
1801 		    info->rti_rmx->rmx_expire - time_second + time_uptime : 0;
1802 }
1803 
1804 void
1805 rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netmask)
1806 {
1807 	u_char *cp1 = (u_char *)src;
1808 	u_char *cp2 = (u_char *)dst;
1809 	u_char *cp3 = (u_char *)netmask;
1810 	u_char *cplim = cp2 + *cp3;
1811 	u_char *cplim2 = cp2 + *cp1;
1812 
1813 	*cp2++ = *cp1++; *cp2++ = *cp1++; /* copies sa_len & sa_family */
1814 	cp3 += 2;
1815 	if (cplim > cplim2)
1816 		cplim = cplim2;
1817 	while (cp2 < cplim)
1818 		*cp2++ = *cp1++ & *cp3++;
1819 	if (cp2 < cplim2)
1820 		bzero((caddr_t)cp2, (unsigned)(cplim2 - cp2));
1821 }
1822 
1823 /*
1824  * Set up a routing table entry, normally
1825  * for an interface.
1826  */
1827 #define _SOCKADDR_TMPSIZE 128 /* Not too big.. kernel stack size is limited */
1828 static inline  int
1829 rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
1830 {
1831 	RIB_RLOCK_TRACKER;
1832 	struct epoch_tracker et;
1833 	struct sockaddr *dst;
1834 	struct sockaddr *netmask;
1835 	struct rtentry *rt = NULL;
1836 	struct rt_addrinfo info;
1837 	int error = 0;
1838 	int startfib, endfib;
1839 	char tempbuf[_SOCKADDR_TMPSIZE];
1840 	int didwork = 0;
1841 	int a_failure = 0;
1842 	struct sockaddr_dl_short *sdl = NULL;
1843 	struct rib_head *rnh;
1844 
1845 	if (flags & RTF_HOST) {
1846 		dst = ifa->ifa_dstaddr;
1847 		netmask = NULL;
1848 	} else {
1849 		dst = ifa->ifa_addr;
1850 		netmask = ifa->ifa_netmask;
1851 	}
1852 	if (dst->sa_len == 0)
1853 		return(EINVAL);
1854 	switch (dst->sa_family) {
1855 	case AF_INET6:
1856 	case AF_INET:
1857 		/* We support multiple FIBs. */
1858 		break;
1859 	default:
1860 		fibnum = RT_DEFAULT_FIB;
1861 		break;
1862 	}
1863 	if (fibnum == RT_ALL_FIBS) {
1864 		if (V_rt_add_addr_allfibs == 0 && cmd == (int)RTM_ADD)
1865 			startfib = endfib = ifa->ifa_ifp->if_fib;
1866 		else {
1867 			startfib = 0;
1868 			endfib = rt_numfibs - 1;
1869 		}
1870 	} else {
1871 		KASSERT((fibnum < rt_numfibs), ("rtinit1: bad fibnum"));
1872 		startfib = fibnum;
1873 		endfib = fibnum;
1874 	}
1875 
1876 	/*
1877 	 * If it's a delete, check that if it exists,
1878 	 * it's on the correct interface or we might scrub
1879 	 * a route to another ifa which would
1880 	 * be confusing at best and possibly worse.
1881 	 */
1882 	if (cmd == RTM_DELETE) {
1883 		/*
1884 		 * It's a delete, so it should already exist..
1885 		 * If it's a net, mask off the host bits
1886 		 * (Assuming we have a mask)
1887 		 * XXX this is kinda inet specific..
1888 		 */
1889 		if (netmask != NULL) {
1890 			rt_maskedcopy(dst, (struct sockaddr *)tempbuf, netmask);
1891 			dst = (struct sockaddr *)tempbuf;
1892 		}
1893 	} else if (cmd == RTM_ADD) {
1894 		sdl = (struct sockaddr_dl_short *)tempbuf;
1895 		bzero(sdl, sizeof(struct sockaddr_dl_short));
1896 		sdl->sdl_family = AF_LINK;
1897 		sdl->sdl_len = sizeof(struct sockaddr_dl_short);
1898 		sdl->sdl_type = ifa->ifa_ifp->if_type;
1899 		sdl->sdl_index = ifa->ifa_ifp->if_index;
1900         }
1901 	/*
1902 	 * Now go through all the requested tables (fibs) and do the
1903 	 * requested action. Realistically, this will either be fib 0
1904 	 * for protocols that don't do multiple tables or all the
1905 	 * tables for those that do.
1906 	 */
1907 	for ( fibnum = startfib; fibnum <= endfib; fibnum++) {
1908 		if (cmd == RTM_DELETE) {
1909 			struct radix_node *rn;
1910 			/*
1911 			 * Look up an rtentry that is in the routing tree and
1912 			 * contains the correct info.
1913 			 */
1914 			rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
1915 			if (rnh == NULL)
1916 				/* this table doesn't exist but others might */
1917 				continue;
1918 			RIB_RLOCK(rnh);
1919 			rn = rnh->rnh_lookup(dst, netmask, &rnh->head);
1920 #ifdef RADIX_MPATH
1921 			if (rt_mpath_capable(rnh)) {
1922 
1923 				if (rn == NULL)
1924 					error = ESRCH;
1925 				else {
1926 					rt = RNTORT(rn);
1927 					/*
1928 					 * for interface route the gateway
1929 					 * gateway is sockaddr_dl, so
1930 					 * rt_mpath_matchgate must use the
1931 					 * interface address
1932 					 */
1933 					rt = rt_mpath_matchgate(rt,
1934 					    ifa->ifa_addr);
1935 					if (rt == NULL)
1936 						error = ESRCH;
1937 				}
1938 			}
1939 #endif
1940 			error = (rn == NULL ||
1941 			    (rn->rn_flags & RNF_ROOT) ||
1942 			    RNTORT(rn)->rt_nhop->nh_ifa != ifa);
1943 			RIB_RUNLOCK(rnh);
1944 			if (error) {
1945 				/* this is only an error if bad on ALL tables */
1946 				continue;
1947 			}
1948 		}
1949 		/*
1950 		 * Do the actual request
1951 		 */
1952 		bzero((caddr_t)&info, sizeof(info));
1953 		info.rti_ifa = ifa;
1954 		info.rti_flags = flags |
1955 		    (ifa->ifa_flags & ~IFA_RTSELF) | RTF_PINNED;
1956 		info.rti_info[RTAX_DST] = dst;
1957 		/*
1958 		 * doing this for compatibility reasons
1959 		 */
1960 		if (cmd == RTM_ADD)
1961 			info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)sdl;
1962 		else
1963 			info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
1964 		info.rti_info[RTAX_NETMASK] = netmask;
1965 		NET_EPOCH_ENTER(et);
1966 		error = rtrequest1_fib(cmd, &info, &rt, fibnum);
1967 		if (error == 0 && rt != NULL) {
1968 			/*
1969 			 * notify any listening routing agents of the change
1970 			 */
1971 
1972 			/* TODO: interface routes/aliases */
1973 			rt_newaddrmsg_fib(cmd, ifa, rt, fibnum);
1974 			didwork = 1;
1975 		}
1976 		NET_EPOCH_EXIT(et);
1977 		if (error)
1978 			a_failure = error;
1979 	}
1980 	if (cmd == RTM_DELETE) {
1981 		if (didwork) {
1982 			error = 0;
1983 		} else {
1984 			/* we only give an error if it wasn't in any table */
1985 			error = ((flags & RTF_HOST) ?
1986 			    EHOSTUNREACH : ENETUNREACH);
1987 		}
1988 	} else {
1989 		if (a_failure) {
1990 			/* return an error if any of them failed */
1991 			error = a_failure;
1992 		}
1993 	}
1994 	return (error);
1995 }
1996 
1997 /*
1998  * Set up a routing table entry, normally
1999  * for an interface.
2000  */
2001 int
2002 rtinit(struct ifaddr *ifa, int cmd, int flags)
2003 {
2004 	struct sockaddr *dst;
2005 	int fib = RT_DEFAULT_FIB;
2006 
2007 	if (flags & RTF_HOST) {
2008 		dst = ifa->ifa_dstaddr;
2009 	} else {
2010 		dst = ifa->ifa_addr;
2011 	}
2012 
2013 	switch (dst->sa_family) {
2014 	case AF_INET6:
2015 	case AF_INET:
2016 		/* We do support multiple FIBs. */
2017 		fib = RT_ALL_FIBS;
2018 		break;
2019 	}
2020 	return (rtinit1(ifa, cmd, flags, fib));
2021 }
2022 
2023 /*
2024  * Announce interface address arrival/withdraw
2025  * Returns 0 on success.
2026  */
2027 int
2028 rt_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
2029 {
2030 
2031 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
2032 	    ("unexpected cmd %d", cmd));
2033 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
2034 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
2035 
2036 	EVENTHANDLER_DIRECT_INVOKE(rt_addrmsg, ifa, cmd);
2037 	return (rtsock_addrmsg(cmd, ifa, fibnum));
2038 }
2039 
2040 /*
2041  * Announce kernel-originated route addition/removal to rtsock based on @rt data.
2042  * cmd: RTM_ cmd
2043  * @rt: valid rtentry
2044  * @ifp: target route interface
2045  * @fibnum: fib id or RT_ALL_FIBS
2046  *
2047  * Returns 0 on success.
2048  */
2049 int
2050 rt_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs,
2051     int fibnum)
2052 {
2053 
2054 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
2055 	    ("unexpected cmd %d", cmd));
2056 
2057 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
2058 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
2059 
2060 	KASSERT(rt_key(rt) != NULL, (":%s: rt_key must be supplied", __func__));
2061 
2062 	return (rtsock_routemsg(cmd, rt, ifp, 0, fibnum));
2063 }
2064 
2065 /*
2066  * Announce kernel-originated route addition/removal to rtsock based on @rt data.
2067  * cmd: RTM_ cmd
2068  * @info: addrinfo structure with valid data.
2069  * @fibnum: fib id or RT_ALL_FIBS
2070  *
2071  * Returns 0 on success.
2072  */
2073 int
2074 rt_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
2075 {
2076 
2077 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE || cmd == RTM_CHANGE,
2078 	    ("unexpected cmd %d", cmd));
2079 
2080 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
2081 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
2082 
2083 	KASSERT(info->rti_info[RTAX_DST] != NULL, (":%s: RTAX_DST must be supplied", __func__));
2084 
2085 	return (rtsock_routemsg_info(cmd, info, fibnum));
2086 }
2087 
2088 
2089 /*
2090  * This is called to generate messages from the routing socket
2091  * indicating a network interface has had addresses associated with it.
2092  */
2093 void
2094 rt_newaddrmsg_fib(int cmd, struct ifaddr *ifa, struct rtentry *rt, int fibnum)
2095 {
2096 
2097 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
2098 		("unexpected cmd %u", cmd));
2099 	KASSERT(fibnum == RT_ALL_FIBS || (fibnum >= 0 && fibnum < rt_numfibs),
2100 	    ("%s: fib out of range 0 <=%d<%d", __func__, fibnum, rt_numfibs));
2101 
2102 	if (cmd == RTM_ADD) {
2103 		rt_addrmsg(cmd, ifa, fibnum);
2104 		if (rt != NULL)
2105 			rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum);
2106 	} else {
2107 		if (rt != NULL)
2108 			rt_routemsg(cmd, rt, ifa->ifa_ifp, 0, fibnum);
2109 		rt_addrmsg(cmd, ifa, fibnum);
2110 	}
2111 }
2112 
2113