xref: /freebsd/sys/net/rtsock.c (revision 54ebdd631db8c0bba2baab0155f603a8b5cf014a)
1 /*-
2  * Copyright (c) 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
30  * $FreeBSD$
31  */
32 #include "opt_sctp.h"
33 #include "opt_mpath.h"
34 #include "opt_inet.h"
35 #include "opt_inet6.h"
36 
37 #include <sys/param.h>
38 #include <sys/domain.h>
39 #include <sys/jail.h>
40 #include <sys/kernel.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/protosw.h>
47 #include <sys/rwlock.h>
48 #include <sys/signalvar.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/vimage.h>
54 
55 #include <net/if.h>
56 #include <net/netisr.h>
57 #include <net/raw_cb.h>
58 #include <net/route.h>
59 #include <net/vnet.h>
60 
61 #include <netinet/in.h>
62 #ifdef INET6
63 #include <netinet6/scope6_var.h>
64 #endif
65 
66 #ifdef SCTP
67 extern void sctp_addr_change(struct ifaddr *ifa, int cmd);
68 #endif /* SCTP */
69 
70 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
71 
72 /* NB: these are not modified */
73 static struct	sockaddr route_src = { 2, PF_ROUTE, };
74 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
75 
76 static struct {
77 	int	ip_count;	/* attached w/ AF_INET */
78 	int	ip6_count;	/* attached w/ AF_INET6 */
79 	int	ipx_count;	/* attached w/ AF_IPX */
80 	int	any_count;	/* total attached */
81 } route_cb;
82 
83 struct mtx rtsock_mtx;
84 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
85 
86 #define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
87 #define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
88 #define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
89 
90 static struct	ifqueue rtsintrq;
91 
92 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD, 0, "");
93 SYSCTL_INT(_net_route, OID_AUTO, netisr_maxqlen, CTLFLAG_RW,
94     &rtsintrq.ifq_maxlen, 0, "maximum routing socket dispatch queue length");
95 
96 struct walkarg {
97 	int	w_tmemsize;
98 	int	w_op, w_arg;
99 	caddr_t	w_tmem;
100 	struct sysctl_req *w_req;
101 };
102 
103 static void	rts_input(struct mbuf *m);
104 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
105 static int	rt_msg2(int type, struct rt_addrinfo *rtinfo,
106 			caddr_t cp, struct walkarg *w);
107 static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
108 			struct rt_addrinfo *rtinfo);
109 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
110 static int	sysctl_iflist(int af, struct walkarg *w);
111 static int	sysctl_ifmalist(int af, struct walkarg *w);
112 static int	route_output(struct mbuf *m, struct socket *so);
113 static void	rt_setmetrics(u_long which, const struct rt_metrics *in,
114 			struct rt_metrics_lite *out);
115 static void	rt_getmetrics(const struct rt_metrics_lite *in,
116 			struct rt_metrics *out);
117 static void	rt_dispatch(struct mbuf *, const struct sockaddr *);
118 
119 static void
120 rts_init(void)
121 {
122 	int tmp;
123 
124 	rtsintrq.ifq_maxlen = 256;
125 	if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
126 		rtsintrq.ifq_maxlen = tmp;
127 	mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF);
128 	netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, 0);
129 }
130 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0);
131 
132 static void
133 rts_input(struct mbuf *m)
134 {
135 	struct sockproto route_proto;
136 	unsigned short *family;
137 	struct m_tag *tag;
138 
139 	route_proto.sp_family = PF_ROUTE;
140 	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
141 	if (tag != NULL) {
142 		family = (unsigned short *)(tag + 1);
143 		route_proto.sp_protocol = *family;
144 		m_tag_delete(m, tag);
145 	} else
146 		route_proto.sp_protocol = 0;
147 
148 	raw_input(m, &route_proto, &route_src);
149 }
150 
151 /*
152  * It really doesn't make any sense at all for this code to share much
153  * with raw_usrreq.c, since its functionality is so restricted.  XXX
154  */
155 static void
156 rts_abort(struct socket *so)
157 {
158 
159 	raw_usrreqs.pru_abort(so);
160 }
161 
162 static void
163 rts_close(struct socket *so)
164 {
165 
166 	raw_usrreqs.pru_close(so);
167 }
168 
169 /* pru_accept is EOPNOTSUPP */
170 
171 static int
172 rts_attach(struct socket *so, int proto, struct thread *td)
173 {
174 	struct rawcb *rp;
175 	int s, error;
176 
177 	KASSERT(so->so_pcb == NULL, ("rts_attach: so_pcb != NULL"));
178 
179 	/* XXX */
180 	rp = malloc(sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
181 	if (rp == NULL)
182 		return ENOBUFS;
183 
184 	/*
185 	 * The splnet() is necessary to block protocols from sending
186 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
187 	 * this PCB is extant but incompletely initialized.
188 	 * Probably we should try to do more of this work beforehand and
189 	 * eliminate the spl.
190 	 */
191 	s = splnet();
192 	so->so_pcb = (caddr_t)rp;
193 	so->so_fibnum = td->td_proc->p_fibnum;
194 	error = raw_attach(so, proto);
195 	rp = sotorawcb(so);
196 	if (error) {
197 		splx(s);
198 		so->so_pcb = NULL;
199 		free(rp, M_PCB);
200 		return error;
201 	}
202 	RTSOCK_LOCK();
203 	switch(rp->rcb_proto.sp_protocol) {
204 	case AF_INET:
205 		route_cb.ip_count++;
206 		break;
207 	case AF_INET6:
208 		route_cb.ip6_count++;
209 		break;
210 	case AF_IPX:
211 		route_cb.ipx_count++;
212 		break;
213 	}
214 	route_cb.any_count++;
215 	RTSOCK_UNLOCK();
216 	soisconnected(so);
217 	so->so_options |= SO_USELOOPBACK;
218 	splx(s);
219 	return 0;
220 }
221 
222 static int
223 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
224 {
225 
226 	return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
227 }
228 
229 static int
230 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
231 {
232 
233 	return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
234 }
235 
236 /* pru_connect2 is EOPNOTSUPP */
237 /* pru_control is EOPNOTSUPP */
238 
239 static void
240 rts_detach(struct socket *so)
241 {
242 	struct rawcb *rp = sotorawcb(so);
243 
244 	KASSERT(rp != NULL, ("rts_detach: rp == NULL"));
245 
246 	RTSOCK_LOCK();
247 	switch(rp->rcb_proto.sp_protocol) {
248 	case AF_INET:
249 		route_cb.ip_count--;
250 		break;
251 	case AF_INET6:
252 		route_cb.ip6_count--;
253 		break;
254 	case AF_IPX:
255 		route_cb.ipx_count--;
256 		break;
257 	}
258 	route_cb.any_count--;
259 	RTSOCK_UNLOCK();
260 	raw_usrreqs.pru_detach(so);
261 }
262 
263 static int
264 rts_disconnect(struct socket *so)
265 {
266 
267 	return (raw_usrreqs.pru_disconnect(so));
268 }
269 
270 /* pru_listen is EOPNOTSUPP */
271 
272 static int
273 rts_peeraddr(struct socket *so, struct sockaddr **nam)
274 {
275 
276 	return (raw_usrreqs.pru_peeraddr(so, nam));
277 }
278 
279 /* pru_rcvd is EOPNOTSUPP */
280 /* pru_rcvoob is EOPNOTSUPP */
281 
282 static int
283 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
284 	 struct mbuf *control, struct thread *td)
285 {
286 
287 	return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
288 }
289 
290 /* pru_sense is null */
291 
292 static int
293 rts_shutdown(struct socket *so)
294 {
295 
296 	return (raw_usrreqs.pru_shutdown(so));
297 }
298 
299 static int
300 rts_sockaddr(struct socket *so, struct sockaddr **nam)
301 {
302 
303 	return (raw_usrreqs.pru_sockaddr(so, nam));
304 }
305 
306 static struct pr_usrreqs route_usrreqs = {
307 	.pru_abort =		rts_abort,
308 	.pru_attach =		rts_attach,
309 	.pru_bind =		rts_bind,
310 	.pru_connect =		rts_connect,
311 	.pru_detach =		rts_detach,
312 	.pru_disconnect =	rts_disconnect,
313 	.pru_peeraddr =		rts_peeraddr,
314 	.pru_send =		rts_send,
315 	.pru_shutdown =		rts_shutdown,
316 	.pru_sockaddr =		rts_sockaddr,
317 	.pru_close =		rts_close,
318 };
319 
320 #ifndef _SOCKADDR_UNION_DEFINED
321 #define	_SOCKADDR_UNION_DEFINED
322 /*
323  * The union of all possible address formats we handle.
324  */
325 union sockaddr_union {
326 	struct sockaddr		sa;
327 	struct sockaddr_in	sin;
328 	struct sockaddr_in6	sin6;
329 };
330 #endif /* _SOCKADDR_UNION_DEFINED */
331 
332 static int
333 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
334     struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
335 {
336 
337 	switch (info->rti_info[RTAX_DST]->sa_family) {
338 #ifdef INET
339 	case AF_INET:
340 	{
341 		struct in_addr ia;
342 
343 		/*
344 		 * 1. Check if the returned address is part of the jail.
345 		 */
346 		ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->sin_addr;
347 		if (prison_check_ip4(cred, &ia) != 0) {
348 			info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
349 
350 		} else {
351 			struct ifaddr *ifa;
352 			int found;
353 
354 			found = 0;
355 
356 			/*
357 			 * 2. Try to find an address on the given outgoing
358 			 *    interface that belongs to the jail.
359 			 */
360 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
361 				struct sockaddr *sa;
362 				sa = ifa->ifa_addr;
363 				if (sa->sa_family != AF_INET)
364 					continue;
365 				ia = ((struct sockaddr_in *)sa)->sin_addr;
366 				if (prison_check_ip4(cred, &ia) != 0) {
367 					found = 1;
368 					break;
369 				}
370 			}
371 			if (!found) {
372 				/*
373 				 * 3. As a last resort return the 'default'
374 				 * jail address.
375 				 */
376 				if (prison_getip4(cred, &ia) != 0)
377 					return (ESRCH);
378 			}
379 			bzero(&saun->sin, sizeof(struct sockaddr_in));
380 			saun->sin.sin_len = sizeof(struct sockaddr_in);
381 			saun->sin.sin_family = AF_INET;
382 			saun->sin.sin_addr.s_addr = ia.s_addr;
383 			info->rti_info[RTAX_IFA] =
384 			    (struct sockaddr *)&saun->sin;
385 		}
386 		break;
387 	}
388 #endif
389 #ifdef INET6
390 	case AF_INET6:
391 	{
392 		struct in6_addr ia6;
393 
394 		/*
395 		 * 1. Check if the returned address is part of the jail.
396 		 */
397 		bcopy(&((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->sin6_addr,
398 		    &ia6, sizeof(struct in6_addr));
399 		if (prison_check_ip6(cred, &ia6) != 0) {
400 			info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
401 		} else {
402 			struct ifaddr *ifa;
403 			int found;
404 
405 			found = 0;
406 
407 			/*
408 			 * 2. Try to find an address on the given outgoing
409 			 *    interface that belongs to the jail.
410 			 */
411 			TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
412 				struct sockaddr *sa;
413 				sa = ifa->ifa_addr;
414 				if (sa->sa_family != AF_INET6)
415 					continue;
416 				bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
417 				    &ia6, sizeof(struct in6_addr));
418 				if (prison_check_ip6(cred, &ia6) != 0) {
419 					found = 1;
420 					break;
421 				}
422 			}
423 			if (!found) {
424 				/*
425 				 * 3. As a last resort return the 'default'
426 				 * jail address.
427 				 */
428 				if (prison_getip6(cred, &ia6) != 0)
429 					return (ESRCH);
430 			}
431 			bzero(&saun->sin6, sizeof(struct sockaddr_in6));
432 			saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
433 			saun->sin6.sin6_family = AF_INET6;
434 			bcopy(&ia6, &saun->sin6.sin6_addr,
435 			    sizeof(struct in6_addr));
436 			if (sa6_recoverscope(&saun->sin6) != 0)
437 				return (ESRCH);
438 			info->rti_info[RTAX_IFA] =
439 			    (struct sockaddr *)&saun->sin6;
440 		}
441 		break;
442 	}
443 #endif
444 	default:
445 		return (ESRCH);
446 	}
447 	return (0);
448 }
449 
450 /*ARGSUSED*/
451 static int
452 route_output(struct mbuf *m, struct socket *so)
453 {
454 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
455 	INIT_VNET_NET(so->so_vnet);
456 	struct rt_msghdr *rtm = NULL;
457 	struct rtentry *rt = NULL;
458 	struct radix_node_head *rnh;
459 	struct rt_addrinfo info;
460 	int len, error = 0;
461 	struct ifnet *ifp = NULL;
462 	union sockaddr_union saun;
463 
464 #define senderr(e) { error = e; goto flush;}
465 	if (m == NULL || ((m->m_len < sizeof(long)) &&
466 		       (m = m_pullup(m, sizeof(long))) == NULL))
467 		return (ENOBUFS);
468 	if ((m->m_flags & M_PKTHDR) == 0)
469 		panic("route_output");
470 	len = m->m_pkthdr.len;
471 	if (len < sizeof(*rtm) ||
472 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
473 		info.rti_info[RTAX_DST] = NULL;
474 		senderr(EINVAL);
475 	}
476 	R_Malloc(rtm, struct rt_msghdr *, len);
477 	if (rtm == NULL) {
478 		info.rti_info[RTAX_DST] = NULL;
479 		senderr(ENOBUFS);
480 	}
481 	m_copydata(m, 0, len, (caddr_t)rtm);
482 	if (rtm->rtm_version != RTM_VERSION) {
483 		info.rti_info[RTAX_DST] = NULL;
484 		senderr(EPROTONOSUPPORT);
485 	}
486 	rtm->rtm_pid = curproc->p_pid;
487 	bzero(&info, sizeof(info));
488 	info.rti_addrs = rtm->rtm_addrs;
489 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
490 		info.rti_info[RTAX_DST] = NULL;
491 		senderr(EINVAL);
492 	}
493 	info.rti_flags = rtm->rtm_flags;
494 	if (info.rti_info[RTAX_DST] == NULL ||
495 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
496 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
497 	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
498 		senderr(EINVAL);
499 	if (info.rti_info[RTAX_GENMASK]) {
500 		struct radix_node *t;
501 		t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1);
502 		if (t != NULL &&
503 		    bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1,
504 		    (char *)(void *)t->rn_key + 1,
505 		    ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0)
506 			info.rti_info[RTAX_GENMASK] =
507 			    (struct sockaddr *)t->rn_key;
508 		else
509 			senderr(ENOBUFS);
510 	}
511 
512 	/*
513 	 * Verify that the caller has the appropriate privilege; RTM_GET
514 	 * is the only operation the non-superuser is allowed.
515 	 */
516 	if (rtm->rtm_type != RTM_GET) {
517 		error = priv_check(curthread, PRIV_NET_ROUTE);
518 		if (error)
519 			senderr(error);
520 	}
521 
522 	switch (rtm->rtm_type) {
523 		struct rtentry *saved_nrt;
524 
525 	case RTM_ADD:
526 		if (info.rti_info[RTAX_GATEWAY] == NULL)
527 			senderr(EINVAL);
528 		saved_nrt = NULL;
529 		error = rtrequest1_fib(RTM_ADD, &info, &saved_nrt,
530 		    so->so_fibnum);
531 		if (error == 0 && saved_nrt) {
532 			RT_LOCK(saved_nrt);
533 			rt_setmetrics(rtm->rtm_inits,
534 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
535 			rtm->rtm_index = saved_nrt->rt_ifp->if_index;
536 			RT_REMREF(saved_nrt);
537 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
538 			RT_UNLOCK(saved_nrt);
539 		}
540 		break;
541 
542 	case RTM_DELETE:
543 		saved_nrt = NULL;
544 		error = rtrequest1_fib(RTM_DELETE, &info, &saved_nrt,
545 		    so->so_fibnum);
546 		if (error == 0) {
547 			RT_LOCK(saved_nrt);
548 			rt = saved_nrt;
549 			goto report;
550 		}
551 		break;
552 
553 	case RTM_GET:
554 	case RTM_CHANGE:
555 	case RTM_LOCK:
556 		rnh = V_rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family];
557 		if (rnh == NULL)
558 			senderr(EAFNOSUPPORT);
559 		RADIX_NODE_HEAD_RLOCK(rnh);
560 		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
561 			info.rti_info[RTAX_NETMASK], rnh);
562 		if (rt == NULL) {	/* XXX looks bogus */
563 			RADIX_NODE_HEAD_RUNLOCK(rnh);
564 			senderr(ESRCH);
565 		}
566 #ifdef RADIX_MPATH
567 		/*
568 		 * for RTM_CHANGE/LOCK, if we got multipath routes,
569 		 * we require users to specify a matching RTAX_GATEWAY.
570 		 *
571 		 * for RTM_GET, gate is optional even with multipath.
572 		 * if gate == NULL the first match is returned.
573 		 * (no need to call rt_mpath_matchgate if gate == NULL)
574 		 */
575 		if (rn_mpath_capable(rnh) &&
576 		    (rtm->rtm_type != RTM_GET || info.rti_info[RTAX_GATEWAY])) {
577 			rt = rt_mpath_matchgate(rt, info.rti_info[RTAX_GATEWAY]);
578 			if (!rt) {
579 				RADIX_NODE_HEAD_RUNLOCK(rnh);
580 				senderr(ESRCH);
581 			}
582 		}
583 #endif
584 		RT_LOCK(rt);
585 		RT_ADDREF(rt);
586 		RADIX_NODE_HEAD_RUNLOCK(rnh);
587 
588 		/*
589 		 * Fix for PR: 82974
590 		 *
591 		 * RTM_CHANGE/LOCK need a perfect match, rn_lookup()
592 		 * returns a perfect match in case a netmask is
593 		 * specified.  For host routes only a longest prefix
594 		 * match is returned so it is necessary to compare the
595 		 * existence of the netmask.  If both have a netmask
596 		 * rnh_lookup() did a perfect match and if none of them
597 		 * have a netmask both are host routes which is also a
598 		 * perfect match.
599 		 */
600 
601 		if (rtm->rtm_type != RTM_GET &&
602 		    (!rt_mask(rt) != !info.rti_info[RTAX_NETMASK])) {
603 			RT_UNLOCK(rt);
604 			senderr(ESRCH);
605 		}
606 
607 		switch(rtm->rtm_type) {
608 
609 		case RTM_GET:
610 		report:
611 			RT_LOCK_ASSERT(rt);
612 			info.rti_info[RTAX_DST] = rt_key(rt);
613 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
614 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
615 			info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
616 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
617 				ifp = rt->rt_ifp;
618 				if (ifp) {
619 					info.rti_info[RTAX_IFP] =
620 					    ifp->if_addr->ifa_addr;
621 					if (jailed(so->so_cred)) {
622 						error = rtm_get_jailed(
623 						    &info, ifp, rt, &saun,
624 						    so->so_cred);
625 						if (error != 0) {
626 							RT_UNLOCK(rt);
627 							senderr(ESRCH);
628 						}
629 					} else {
630 						info.rti_info[RTAX_IFA] =
631 						    rt->rt_ifa->ifa_addr;
632 					}
633 					if (ifp->if_flags & IFF_POINTOPOINT)
634 						info.rti_info[RTAX_BRD] =
635 						    rt->rt_ifa->ifa_dstaddr;
636 					rtm->rtm_index = ifp->if_index;
637 				} else {
638 					info.rti_info[RTAX_IFP] = NULL;
639 					info.rti_info[RTAX_IFA] = NULL;
640 				}
641 			} else if ((ifp = rt->rt_ifp) != NULL) {
642 				rtm->rtm_index = ifp->if_index;
643 			}
644 			len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
645 			if (len > rtm->rtm_msglen) {
646 				struct rt_msghdr *new_rtm;
647 				R_Malloc(new_rtm, struct rt_msghdr *, len);
648 				if (new_rtm == NULL) {
649 					RT_UNLOCK(rt);
650 					senderr(ENOBUFS);
651 				}
652 				bcopy(rtm, new_rtm, rtm->rtm_msglen);
653 				Free(rtm); rtm = new_rtm;
654 			}
655 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
656 			rtm->rtm_flags = rt->rt_flags;
657 			rtm->rtm_use = 0;
658 			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
659 			rtm->rtm_addrs = info.rti_addrs;
660 			break;
661 
662 		case RTM_CHANGE:
663 			/*
664 			 * New gateway could require new ifaddr, ifp;
665 			 * flags may also be different; ifp may be specified
666 			 * by ll sockaddr when protocol address is ambiguous
667 			 */
668 			if (((rt->rt_flags & RTF_GATEWAY) &&
669 			     info.rti_info[RTAX_GATEWAY] != NULL) ||
670 			    info.rti_info[RTAX_IFP] != NULL ||
671 			    (info.rti_info[RTAX_IFA] != NULL &&
672 			     !sa_equal(info.rti_info[RTAX_IFA],
673 				       rt->rt_ifa->ifa_addr))) {
674 				RT_UNLOCK(rt);
675 				RADIX_NODE_HEAD_LOCK(rnh);
676 				error = rt_getifa_fib(&info, rt->rt_fibnum);
677 				RADIX_NODE_HEAD_UNLOCK(rnh);
678 				if (error != 0)
679 					senderr(error);
680 				RT_LOCK(rt);
681 			}
682 			if (info.rti_ifa != NULL &&
683 			    info.rti_ifa != rt->rt_ifa &&
684 			    rt->rt_ifa != NULL &&
685 			    rt->rt_ifa->ifa_rtrequest != NULL) {
686 				rt->rt_ifa->ifa_rtrequest(RTM_DELETE, rt,
687 				    &info);
688 				IFAFREE(rt->rt_ifa);
689 			}
690 			if (info.rti_info[RTAX_GATEWAY] != NULL) {
691 				RT_UNLOCK(rt);
692 				RADIX_NODE_HEAD_LOCK(rnh);
693 				RT_LOCK(rt);
694 
695 				error = rt_setgate(rt, rt_key(rt),
696 				    info.rti_info[RTAX_GATEWAY]);
697 				RADIX_NODE_HEAD_UNLOCK(rnh);
698 				if (error != 0) {
699 					RT_UNLOCK(rt);
700 					senderr(error);
701 				}
702 				if (!(rt->rt_flags & RTF_LLINFO))
703 					rt->rt_flags |= RTF_GATEWAY;
704 			}
705 			if (info.rti_ifa != NULL &&
706 			    info.rti_ifa != rt->rt_ifa) {
707 				IFAREF(info.rti_ifa);
708 				rt->rt_ifa = info.rti_ifa;
709 				rt->rt_ifp = info.rti_ifp;
710 			}
711 			/* Allow some flags to be toggled on change. */
712 			if (rtm->rtm_fmask & RTF_FMASK)
713 				rt->rt_flags = (rt->rt_flags &
714 				    ~rtm->rtm_fmask) |
715 				    (rtm->rtm_flags & rtm->rtm_fmask);
716 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
717 					&rt->rt_rmx);
718 			rtm->rtm_index = rt->rt_ifp->if_index;
719 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
720 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
721 			if (info.rti_info[RTAX_GENMASK])
722 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
723 			/* FALLTHROUGH */
724 		case RTM_LOCK:
725 			/* We don't support locks anymore */
726 			break;
727 		}
728 		RT_UNLOCK(rt);
729 		break;
730 
731 	default:
732 		senderr(EOPNOTSUPP);
733 	}
734 
735 flush:
736 	if (rtm) {
737 		if (error)
738 			rtm->rtm_errno = error;
739 		else
740 			rtm->rtm_flags |= RTF_DONE;
741 	}
742 	if (rt)		/* XXX can this be true? */
743 		RTFREE(rt);
744     {
745 	struct rawcb *rp = NULL;
746 	/*
747 	 * Check to see if we don't want our own messages.
748 	 */
749 	if ((so->so_options & SO_USELOOPBACK) == 0) {
750 		if (route_cb.any_count <= 1) {
751 			if (rtm)
752 				Free(rtm);
753 			m_freem(m);
754 			return (error);
755 		}
756 		/* There is another listener, so construct message */
757 		rp = sotorawcb(so);
758 	}
759 	if (rtm) {
760 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
761 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
762 			m_freem(m);
763 			m = NULL;
764 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
765 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
766 		Free(rtm);
767 	}
768 	if (m) {
769 		if (rp) {
770 			/*
771 			 * XXX insure we don't get a copy by
772 			 * invalidating our protocol
773 			 */
774 			unsigned short family = rp->rcb_proto.sp_family;
775 			rp->rcb_proto.sp_family = 0;
776 			rt_dispatch(m, info.rti_info[RTAX_DST]);
777 			rp->rcb_proto.sp_family = family;
778 		} else
779 			rt_dispatch(m, info.rti_info[RTAX_DST]);
780 	}
781     }
782 	return (error);
783 #undef	sa_equal
784 }
785 
786 static void
787 rt_setmetrics(u_long which, const struct rt_metrics *in,
788 	struct rt_metrics_lite *out)
789 {
790 #define metric(f, e) if (which & (f)) out->e = in->e;
791 	/*
792 	 * Only these are stored in the routing entry since introduction
793 	 * of tcp hostcache. The rest is ignored.
794 	 */
795 	metric(RTV_MTU, rmx_mtu);
796 	/* Userland -> kernel timebase conversion. */
797 	if (which & RTV_EXPIRE)
798 		out->rmx_expire = in->rmx_expire ?
799 		    in->rmx_expire - time_second + time_uptime : 0;
800 #undef metric
801 }
802 
803 static void
804 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
805 {
806 #define metric(e) out->e = in->e;
807 	bzero(out, sizeof(*out));
808 	metric(rmx_mtu);
809 	/* Kernel -> userland timebase conversion. */
810 	out->rmx_expire = in->rmx_expire ?
811 	    in->rmx_expire - time_uptime + time_second : 0;
812 #undef metric
813 }
814 
815 /*
816  * Extract the addresses of the passed sockaddrs.
817  * Do a little sanity checking so as to avoid bad memory references.
818  * This data is derived straight from userland.
819  */
820 static int
821 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
822 {
823 	struct sockaddr *sa;
824 	int i;
825 
826 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
827 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
828 			continue;
829 		sa = (struct sockaddr *)cp;
830 		/*
831 		 * It won't fit.
832 		 */
833 		if (cp + sa->sa_len > cplim)
834 			return (EINVAL);
835 		/*
836 		 * there are no more.. quit now
837 		 * If there are more bits, they are in error.
838 		 * I've seen this. route(1) can evidently generate these.
839 		 * This causes kernel to core dump.
840 		 * for compatibility, If we see this, point to a safe address.
841 		 */
842 		if (sa->sa_len == 0) {
843 			rtinfo->rti_info[i] = &sa_zero;
844 			return (0); /* should be EINVAL but for compat */
845 		}
846 		/* accept it */
847 		rtinfo->rti_info[i] = sa;
848 		cp += SA_SIZE(sa);
849 	}
850 	return (0);
851 }
852 
853 static struct mbuf *
854 rt_msg1(int type, struct rt_addrinfo *rtinfo)
855 {
856 	struct rt_msghdr *rtm;
857 	struct mbuf *m;
858 	int i;
859 	struct sockaddr *sa;
860 	int len, dlen;
861 
862 	switch (type) {
863 
864 	case RTM_DELADDR:
865 	case RTM_NEWADDR:
866 		len = sizeof(struct ifa_msghdr);
867 		break;
868 
869 	case RTM_DELMADDR:
870 	case RTM_NEWMADDR:
871 		len = sizeof(struct ifma_msghdr);
872 		break;
873 
874 	case RTM_IFINFO:
875 		len = sizeof(struct if_msghdr);
876 		break;
877 
878 	case RTM_IFANNOUNCE:
879 	case RTM_IEEE80211:
880 		len = sizeof(struct if_announcemsghdr);
881 		break;
882 
883 	default:
884 		len = sizeof(struct rt_msghdr);
885 	}
886 	if (len > MCLBYTES)
887 		panic("rt_msg1");
888 	m = m_gethdr(M_DONTWAIT, MT_DATA);
889 	if (m && len > MHLEN) {
890 		MCLGET(m, M_DONTWAIT);
891 		if ((m->m_flags & M_EXT) == 0) {
892 			m_free(m);
893 			m = NULL;
894 		}
895 	}
896 	if (m == NULL)
897 		return (m);
898 	m->m_pkthdr.len = m->m_len = len;
899 	m->m_pkthdr.rcvif = NULL;
900 	rtm = mtod(m, struct rt_msghdr *);
901 	bzero((caddr_t)rtm, len);
902 	for (i = 0; i < RTAX_MAX; i++) {
903 		if ((sa = rtinfo->rti_info[i]) == NULL)
904 			continue;
905 		rtinfo->rti_addrs |= (1 << i);
906 		dlen = SA_SIZE(sa);
907 		m_copyback(m, len, dlen, (caddr_t)sa);
908 		len += dlen;
909 	}
910 	if (m->m_pkthdr.len != len) {
911 		m_freem(m);
912 		return (NULL);
913 	}
914 	rtm->rtm_msglen = len;
915 	rtm->rtm_version = RTM_VERSION;
916 	rtm->rtm_type = type;
917 	return (m);
918 }
919 
920 static int
921 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
922 {
923 	int i;
924 	int len, dlen, second_time = 0;
925 	caddr_t cp0;
926 
927 	rtinfo->rti_addrs = 0;
928 again:
929 	switch (type) {
930 
931 	case RTM_DELADDR:
932 	case RTM_NEWADDR:
933 		len = sizeof(struct ifa_msghdr);
934 		break;
935 
936 	case RTM_IFINFO:
937 		len = sizeof(struct if_msghdr);
938 		break;
939 
940 	case RTM_NEWMADDR:
941 		len = sizeof(struct ifma_msghdr);
942 		break;
943 
944 	default:
945 		len = sizeof(struct rt_msghdr);
946 	}
947 	cp0 = cp;
948 	if (cp0)
949 		cp += len;
950 	for (i = 0; i < RTAX_MAX; i++) {
951 		struct sockaddr *sa;
952 
953 		if ((sa = rtinfo->rti_info[i]) == NULL)
954 			continue;
955 		rtinfo->rti_addrs |= (1 << i);
956 		dlen = SA_SIZE(sa);
957 		if (cp) {
958 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
959 			cp += dlen;
960 		}
961 		len += dlen;
962 	}
963 	len = ALIGN(len);
964 	if (cp == NULL && w != NULL && !second_time) {
965 		struct walkarg *rw = w;
966 
967 		if (rw->w_req) {
968 			if (rw->w_tmemsize < len) {
969 				if (rw->w_tmem)
970 					free(rw->w_tmem, M_RTABLE);
971 				rw->w_tmem = (caddr_t)
972 					malloc(len, M_RTABLE, M_NOWAIT);
973 				if (rw->w_tmem)
974 					rw->w_tmemsize = len;
975 			}
976 			if (rw->w_tmem) {
977 				cp = rw->w_tmem;
978 				second_time = 1;
979 				goto again;
980 			}
981 		}
982 	}
983 	if (cp) {
984 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
985 
986 		rtm->rtm_version = RTM_VERSION;
987 		rtm->rtm_type = type;
988 		rtm->rtm_msglen = len;
989 	}
990 	return (len);
991 }
992 
993 /*
994  * This routine is called to generate a message from the routing
995  * socket indicating that a redirect has occured, a routing lookup
996  * has failed, or that a protocol has detected timeouts to a particular
997  * destination.
998  */
999 void
1000 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1001 {
1002 	struct rt_msghdr *rtm;
1003 	struct mbuf *m;
1004 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1005 
1006 	if (route_cb.any_count == 0)
1007 		return;
1008 	m = rt_msg1(type, rtinfo);
1009 	if (m == NULL)
1010 		return;
1011 	rtm = mtod(m, struct rt_msghdr *);
1012 	rtm->rtm_flags = RTF_DONE | flags;
1013 	rtm->rtm_errno = error;
1014 	rtm->rtm_addrs = rtinfo->rti_addrs;
1015 	rt_dispatch(m, sa);
1016 }
1017 
1018 /*
1019  * This routine is called to generate a message from the routing
1020  * socket indicating that the status of a network interface has changed.
1021  */
1022 void
1023 rt_ifmsg(struct ifnet *ifp)
1024 {
1025 	struct if_msghdr *ifm;
1026 	struct mbuf *m;
1027 	struct rt_addrinfo info;
1028 
1029 	if (route_cb.any_count == 0)
1030 		return;
1031 	bzero((caddr_t)&info, sizeof(info));
1032 	m = rt_msg1(RTM_IFINFO, &info);
1033 	if (m == NULL)
1034 		return;
1035 	ifm = mtod(m, struct if_msghdr *);
1036 	ifm->ifm_index = ifp->if_index;
1037 	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1038 	ifm->ifm_data = ifp->if_data;
1039 	ifm->ifm_addrs = 0;
1040 	rt_dispatch(m, NULL);
1041 }
1042 
1043 /*
1044  * This is called to generate messages from the routing socket
1045  * indicating a network interface has had addresses associated with it.
1046  * if we ever reverse the logic and replace messages TO the routing
1047  * socket indicate a request to configure interfaces, then it will
1048  * be unnecessary as the routing socket will automatically generate
1049  * copies of it.
1050  */
1051 void
1052 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
1053 {
1054 	struct rt_addrinfo info;
1055 	struct sockaddr *sa = NULL;
1056 	int pass;
1057 	struct mbuf *m = NULL;
1058 	struct ifnet *ifp = ifa->ifa_ifp;
1059 
1060 	KASSERT(cmd == RTM_ADD || cmd == RTM_DELETE,
1061 		("unexpected cmd %u", cmd));
1062 #ifdef SCTP
1063 	/*
1064 	 * notify the SCTP stack
1065 	 * this will only get called when an address is added/deleted
1066 	 * XXX pass the ifaddr struct instead if ifa->ifa_addr...
1067 	 */
1068 	sctp_addr_change(ifa, cmd);
1069 #endif /* SCTP */
1070 	if (route_cb.any_count == 0)
1071 		return;
1072 	for (pass = 1; pass < 3; pass++) {
1073 		bzero((caddr_t)&info, sizeof(info));
1074 		if ((cmd == RTM_ADD && pass == 1) ||
1075 		    (cmd == RTM_DELETE && pass == 2)) {
1076 			struct ifa_msghdr *ifam;
1077 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
1078 
1079 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
1080 			info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
1081 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1082 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1083 			if ((m = rt_msg1(ncmd, &info)) == NULL)
1084 				continue;
1085 			ifam = mtod(m, struct ifa_msghdr *);
1086 			ifam->ifam_index = ifp->if_index;
1087 			ifam->ifam_metric = ifa->ifa_metric;
1088 			ifam->ifam_flags = ifa->ifa_flags;
1089 			ifam->ifam_addrs = info.rti_addrs;
1090 		}
1091 		if ((cmd == RTM_ADD && pass == 2) ||
1092 		    (cmd == RTM_DELETE && pass == 1)) {
1093 			struct rt_msghdr *rtm;
1094 
1095 			if (rt == NULL)
1096 				continue;
1097 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1098 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
1099 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1100 			if ((m = rt_msg1(cmd, &info)) == NULL)
1101 				continue;
1102 			rtm = mtod(m, struct rt_msghdr *);
1103 			rtm->rtm_index = ifp->if_index;
1104 			rtm->rtm_flags |= rt->rt_flags;
1105 			rtm->rtm_errno = error;
1106 			rtm->rtm_addrs = info.rti_addrs;
1107 		}
1108 		rt_dispatch(m, sa);
1109 	}
1110 }
1111 
1112 /*
1113  * This is the analogue to the rt_newaddrmsg which performs the same
1114  * function but for multicast group memberhips.  This is easier since
1115  * there is no route state to worry about.
1116  */
1117 void
1118 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
1119 {
1120 	struct rt_addrinfo info;
1121 	struct mbuf *m = NULL;
1122 	struct ifnet *ifp = ifma->ifma_ifp;
1123 	struct ifma_msghdr *ifmam;
1124 
1125 	if (route_cb.any_count == 0)
1126 		return;
1127 
1128 	bzero((caddr_t)&info, sizeof(info));
1129 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1130 	info.rti_info[RTAX_IFP] = ifp ? ifp->if_addr->ifa_addr : NULL;
1131 	/*
1132 	 * If a link-layer address is present, present it as a ``gateway''
1133 	 * (similarly to how ARP entries, e.g., are presented).
1134 	 */
1135 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
1136 	m = rt_msg1(cmd, &info);
1137 	if (m == NULL)
1138 		return;
1139 	ifmam = mtod(m, struct ifma_msghdr *);
1140 	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
1141 	    __func__));
1142 	ifmam->ifmam_index = ifp->if_index;
1143 	ifmam->ifmam_addrs = info.rti_addrs;
1144 	rt_dispatch(m, ifma->ifma_addr);
1145 }
1146 
1147 static struct mbuf *
1148 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
1149 	struct rt_addrinfo *info)
1150 {
1151 	struct if_announcemsghdr *ifan;
1152 	struct mbuf *m;
1153 
1154 	if (route_cb.any_count == 0)
1155 		return NULL;
1156 	bzero((caddr_t)info, sizeof(*info));
1157 	m = rt_msg1(type, info);
1158 	if (m != NULL) {
1159 		ifan = mtod(m, struct if_announcemsghdr *);
1160 		ifan->ifan_index = ifp->if_index;
1161 		strlcpy(ifan->ifan_name, ifp->if_xname,
1162 			sizeof(ifan->ifan_name));
1163 		ifan->ifan_what = what;
1164 	}
1165 	return m;
1166 }
1167 
1168 /*
1169  * This is called to generate routing socket messages indicating
1170  * IEEE80211 wireless events.
1171  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
1172  */
1173 void
1174 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
1175 {
1176 	struct mbuf *m;
1177 	struct rt_addrinfo info;
1178 
1179 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info);
1180 	if (m != NULL) {
1181 		/*
1182 		 * Append the ieee80211 data.  Try to stick it in the
1183 		 * mbuf containing the ifannounce msg; otherwise allocate
1184 		 * a new mbuf and append.
1185 		 *
1186 		 * NB: we assume m is a single mbuf.
1187 		 */
1188 		if (data_len > M_TRAILINGSPACE(m)) {
1189 			struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
1190 			if (n == NULL) {
1191 				m_freem(m);
1192 				return;
1193 			}
1194 			bcopy(data, mtod(n, void *), data_len);
1195 			n->m_len = data_len;
1196 			m->m_next = n;
1197 		} else if (data_len > 0) {
1198 			bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
1199 			m->m_len += data_len;
1200 		}
1201 		if (m->m_flags & M_PKTHDR)
1202 			m->m_pkthdr.len += data_len;
1203 		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
1204 		rt_dispatch(m, NULL);
1205 	}
1206 }
1207 
1208 /*
1209  * This is called to generate routing socket messages indicating
1210  * network interface arrival and departure.
1211  */
1212 void
1213 rt_ifannouncemsg(struct ifnet *ifp, int what)
1214 {
1215 	struct mbuf *m;
1216 	struct rt_addrinfo info;
1217 
1218 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info);
1219 	if (m != NULL)
1220 		rt_dispatch(m, NULL);
1221 }
1222 
1223 static void
1224 rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
1225 {
1226 	INIT_VNET_NET(curvnet);
1227 	struct m_tag *tag;
1228 
1229 	/*
1230 	 * Preserve the family from the sockaddr, if any, in an m_tag for
1231 	 * use when injecting the mbuf into the routing socket buffer from
1232 	 * the netisr.
1233 	 */
1234 	if (sa != NULL) {
1235 		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
1236 		    M_NOWAIT);
1237 		if (tag == NULL) {
1238 			m_freem(m);
1239 			return;
1240 		}
1241 		*(unsigned short *)(tag + 1) = sa->sa_family;
1242 		m_tag_prepend(m, tag);
1243 	}
1244 	netisr_queue(NETISR_ROUTE, m);	/* mbuf is free'd on failure. */
1245 }
1246 
1247 /*
1248  * This is used in dumping the kernel table via sysctl().
1249  */
1250 static int
1251 sysctl_dumpentry(struct radix_node *rn, void *vw)
1252 {
1253 	struct walkarg *w = vw;
1254 	struct rtentry *rt = (struct rtentry *)rn;
1255 	int error = 0, size;
1256 	struct rt_addrinfo info;
1257 
1258 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
1259 		return 0;
1260 	bzero((caddr_t)&info, sizeof(info));
1261 	info.rti_info[RTAX_DST] = rt_key(rt);
1262 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
1263 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
1264 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
1265 	if (rt->rt_ifp) {
1266 		info.rti_info[RTAX_IFP] = rt->rt_ifp->if_addr->ifa_addr;
1267 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
1268 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1269 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1270 	}
1271 	size = rt_msg2(RTM_GET, &info, NULL, w);
1272 	if (w->w_req && w->w_tmem) {
1273 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1274 
1275 		rtm->rtm_flags = rt->rt_flags;
1276 		rtm->rtm_use = rt->rt_rmx.rmx_pksent;
1277 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1278 		rtm->rtm_index = rt->rt_ifp->if_index;
1279 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1280 		rtm->rtm_addrs = info.rti_addrs;
1281 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1282 		return (error);
1283 	}
1284 	return (error);
1285 }
1286 
1287 static int
1288 sysctl_iflist(int af, struct walkarg *w)
1289 {
1290 	INIT_VNET_NET(curvnet);
1291 	struct ifnet *ifp;
1292 	struct ifaddr *ifa;
1293 	struct rt_addrinfo info;
1294 	int len, error = 0;
1295 
1296 	bzero((caddr_t)&info, sizeof(info));
1297 	IFNET_RLOCK();
1298 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1299 		if (w->w_arg && w->w_arg != ifp->if_index)
1300 			continue;
1301 		ifa = ifp->if_addr;
1302 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1303 		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1304 		info.rti_info[RTAX_IFP] = NULL;
1305 		if (w->w_req && w->w_tmem) {
1306 			struct if_msghdr *ifm;
1307 
1308 			ifm = (struct if_msghdr *)w->w_tmem;
1309 			ifm->ifm_index = ifp->if_index;
1310 			ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1311 			ifm->ifm_data = ifp->if_data;
1312 			ifm->ifm_addrs = info.rti_addrs;
1313 			error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
1314 			if (error)
1315 				goto done;
1316 		}
1317 		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1318 			if (af && af != ifa->ifa_addr->sa_family)
1319 				continue;
1320 			if (jailed(curthread->td_ucred) &&
1321 			    !prison_if(curthread->td_ucred, ifa->ifa_addr))
1322 				continue;
1323 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1324 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1325 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1326 			len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1327 			if (w->w_req && w->w_tmem) {
1328 				struct ifa_msghdr *ifam;
1329 
1330 				ifam = (struct ifa_msghdr *)w->w_tmem;
1331 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1332 				ifam->ifam_flags = ifa->ifa_flags;
1333 				ifam->ifam_metric = ifa->ifa_metric;
1334 				ifam->ifam_addrs = info.rti_addrs;
1335 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1336 				if (error)
1337 					goto done;
1338 			}
1339 		}
1340 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1341 			info.rti_info[RTAX_BRD] = NULL;
1342 	}
1343 done:
1344 	IFNET_RUNLOCK();
1345 	return (error);
1346 }
1347 
1348 int
1349 sysctl_ifmalist(int af, struct walkarg *w)
1350 {
1351 	INIT_VNET_NET(curvnet);
1352 	struct ifnet *ifp;
1353 	struct ifmultiaddr *ifma;
1354 	struct	rt_addrinfo info;
1355 	int	len, error = 0;
1356 	struct ifaddr *ifa;
1357 
1358 	bzero((caddr_t)&info, sizeof(info));
1359 	IFNET_RLOCK();
1360 	TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
1361 		if (w->w_arg && w->w_arg != ifp->if_index)
1362 			continue;
1363 		ifa = ifp->if_addr;
1364 		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1365 		IF_ADDR_LOCK(ifp);
1366 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1367 			if (af && af != ifma->ifma_addr->sa_family)
1368 				continue;
1369 			if (jailed(curproc->p_ucred) &&
1370 			    !prison_if(curproc->p_ucred, ifma->ifma_addr))
1371 				continue;
1372 			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1373 			info.rti_info[RTAX_GATEWAY] =
1374 			    (ifma->ifma_addr->sa_family != AF_LINK) ?
1375 			    ifma->ifma_lladdr : NULL;
1376 			len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
1377 			if (w->w_req && w->w_tmem) {
1378 				struct ifma_msghdr *ifmam;
1379 
1380 				ifmam = (struct ifma_msghdr *)w->w_tmem;
1381 				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1382 				ifmam->ifmam_flags = 0;
1383 				ifmam->ifmam_addrs = info.rti_addrs;
1384 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1385 				if (error) {
1386 					IF_ADDR_UNLOCK(ifp);
1387 					goto done;
1388 				}
1389 			}
1390 		}
1391 		IF_ADDR_UNLOCK(ifp);
1392 	}
1393 done:
1394 	IFNET_RUNLOCK();
1395 	return (error);
1396 }
1397 
1398 static int
1399 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1400 {
1401 	INIT_VNET_NET(curvnet);
1402 	int	*name = (int *)arg1;
1403 	u_int	namelen = arg2;
1404 	struct radix_node_head *rnh;
1405 	int	i, lim, error = EINVAL;
1406 	u_char	af;
1407 	struct	walkarg w;
1408 
1409 	name ++;
1410 	namelen--;
1411 	if (req->newptr)
1412 		return (EPERM);
1413 	if (namelen != 3)
1414 		return ((namelen < 3) ? EISDIR : ENOTDIR);
1415 	af = name[0];
1416 	if (af > AF_MAX)
1417 		return (EINVAL);
1418 	bzero(&w, sizeof(w));
1419 	w.w_op = name[1];
1420 	w.w_arg = name[2];
1421 	w.w_req = req;
1422 
1423 	error = sysctl_wire_old_buffer(req, 0);
1424 	if (error)
1425 		return (error);
1426 	switch (w.w_op) {
1427 
1428 	case NET_RT_DUMP:
1429 	case NET_RT_FLAGS:
1430 		if (af == 0) {			/* dump all tables */
1431 			i = 1;
1432 			lim = AF_MAX;
1433 		} else				/* dump only one table */
1434 			i = lim = af;
1435 		for (error = 0; error == 0 && i <= lim; i++)
1436 			if ((rnh = V_rt_tables[curthread->td_proc->p_fibnum][i]) != NULL) {
1437 				RADIX_NODE_HEAD_LOCK(rnh);
1438 			    	error = rnh->rnh_walktree(rnh,
1439 				    sysctl_dumpentry, &w);
1440 				RADIX_NODE_HEAD_UNLOCK(rnh);
1441 			} else if (af != 0)
1442 				error = EAFNOSUPPORT;
1443 		break;
1444 
1445 	case NET_RT_IFLIST:
1446 		error = sysctl_iflist(af, &w);
1447 		break;
1448 
1449 	case NET_RT_IFMALIST:
1450 		error = sysctl_ifmalist(af, &w);
1451 		break;
1452 	}
1453 	if (w.w_tmem)
1454 		free(w.w_tmem, M_RTABLE);
1455 	return (error);
1456 }
1457 
1458 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1459 
1460 /*
1461  * Definitions of protocols supported in the ROUTE domain.
1462  */
1463 
1464 static struct domain routedomain;		/* or at least forward */
1465 
1466 static struct protosw routesw[] = {
1467 {
1468 	.pr_type =		SOCK_RAW,
1469 	.pr_domain =		&routedomain,
1470 	.pr_flags =		PR_ATOMIC|PR_ADDR,
1471 	.pr_output =		route_output,
1472 	.pr_ctlinput =		raw_ctlinput,
1473 	.pr_init =		raw_init,
1474 	.pr_usrreqs =		&route_usrreqs
1475 }
1476 };
1477 
1478 static struct domain routedomain = {
1479 	.dom_family =		PF_ROUTE,
1480 	.dom_name =		 "route",
1481 	.dom_protosw =		routesw,
1482 	.dom_protoswNPROTOSW =	&routesw[sizeof(routesw)/sizeof(routesw[0])]
1483 };
1484 
1485 DOMAIN_SET(route);
1486