xref: /freebsd/sys/net/rtsock.c (revision 2546665afcaf0d53dc2c7058fee96354b3680f5a)
1 /*
2  * Copyright (c) 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
30  * $FreeBSD$
31  */
32 
33 #include <sys/param.h>
34 #include <sys/domain.h>
35 #include <sys/kernel.h>
36 #include <sys/jail.h>
37 #include <sys/malloc.h>
38 #include <sys/mbuf.h>
39 #include <sys/proc.h>
40 #include <sys/protosw.h>
41 #include <sys/signalvar.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 
47 #include <net/if.h>
48 #include <net/netisr.h>
49 #include <net/raw_cb.h>
50 #include <net/route.h>
51 
52 #include <netinet/in.h>
53 
54 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
55 
56 /* NB: these are not modified */
57 static struct	sockaddr route_dst = { 2, PF_ROUTE, };
58 static struct	sockaddr route_src = { 2, PF_ROUTE, };
59 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
60 
61 static struct {
62 	int	ip_count;	/* attacked w/ AF_INET */
63 	int	ip6_count;	/* attached w/ AF_INET6 */
64 	int	ipx_count;	/* attached w/ AF_IPX */
65 	int	any_count;	/* total attached */
66 } route_cb;
67 
68 struct mtx rtsock_mtx;
69 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
70 
71 #define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
72 #define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
73 #define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
74 
75 static struct	ifqueue rtsintrq;
76 
77 struct walkarg {
78 	int	w_tmemsize;
79 	int	w_op, w_arg;
80 	caddr_t	w_tmem;
81 	struct sysctl_req *w_req;
82 };
83 
84 static void	rts_input(struct mbuf *m);
85 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
86 static int	rt_msg2(int type, struct rt_addrinfo *rtinfo,
87 			caddr_t cp, struct walkarg *w);
88 static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
89 			struct rt_addrinfo *rtinfo);
90 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
91 static int	sysctl_iflist(int af, struct walkarg *w);
92 static int	sysctl_ifmalist(int af, struct walkarg *w);
93 static int	route_output(struct mbuf *m, struct socket *so);
94 static void	rt_setmetrics(u_long which, const struct rt_metrics *in,
95 			struct rt_metrics_lite *out);
96 static void	rt_getmetrics(const struct rt_metrics_lite *in,
97 			struct rt_metrics *out);
98 static void	rt_dispatch(struct mbuf *, const struct sockaddr *);
99 
100 static void
101 rts_init(void)
102 {
103 
104 	rtsintrq.ifq_maxlen = IFQ_MAXLEN;
105 	mtx_init(&rtsintrq.ifq_mtx, "rts_inq", NULL, MTX_DEF);
106 	netisr_register(NETISR_ROUTE, rts_input, &rtsintrq, NETISR_MPSAFE);
107 }
108 SYSINIT(rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rts_init, 0)
109 
110 static void
111 rts_input(struct mbuf *m)
112 {
113 	struct sockproto route_proto;
114 	unsigned short *family;
115 	struct m_tag *tag;
116 
117 	route_proto.sp_family = PF_ROUTE;
118 	tag = m_tag_find(m, PACKET_TAG_RTSOCKFAM, NULL);
119 	if (tag != NULL) {
120 		family = (unsigned short *)(tag + 1);
121 		route_proto.sp_protocol = *family;
122 		m_tag_delete(m, tag);
123 	} else
124 		route_proto.sp_protocol = 0;
125 
126 	raw_input(m, &route_proto, &route_src, &route_dst);
127 }
128 
129 /*
130  * It really doesn't make any sense at all for this code to share much
131  * with raw_usrreq.c, since its functionality is so restricted.  XXX
132  */
133 static int
134 rts_abort(struct socket *so)
135 {
136 	int s, error;
137 	s = splnet();
138 	error = raw_usrreqs.pru_abort(so);
139 	splx(s);
140 	return error;
141 }
142 
143 /* pru_accept is EOPNOTSUPP */
144 
145 static int
146 rts_attach(struct socket *so, int proto, struct thread *td)
147 {
148 	struct rawcb *rp;
149 	int s, error;
150 
151 	if (sotorawcb(so) != NULL)
152 		return EISCONN;	/* XXX panic? */
153 	/* XXX */
154 	MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
155 	if (rp == NULL)
156 		return ENOBUFS;
157 
158 	/*
159 	 * The splnet() is necessary to block protocols from sending
160 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
161 	 * this PCB is extant but incompletely initialized.
162 	 * Probably we should try to do more of this work beforehand and
163 	 * eliminate the spl.
164 	 */
165 	s = splnet();
166 	so->so_pcb = (caddr_t)rp;
167 	error = raw_attach(so, proto);
168 	rp = sotorawcb(so);
169 	if (error) {
170 		splx(s);
171 		so->so_pcb = NULL;
172 		free(rp, M_PCB);
173 		return error;
174 	}
175 	RTSOCK_LOCK();
176 	switch(rp->rcb_proto.sp_protocol) {
177 	case AF_INET:
178 		route_cb.ip_count++;
179 		break;
180 	case AF_INET6:
181 		route_cb.ip6_count++;
182 		break;
183 	case AF_IPX:
184 		route_cb.ipx_count++;
185 		break;
186 	}
187 	rp->rcb_faddr = &route_src;
188 	route_cb.any_count++;
189 	RTSOCK_UNLOCK();
190 	soisconnected(so);
191 	so->so_options |= SO_USELOOPBACK;
192 	splx(s);
193 	return 0;
194 }
195 
196 static int
197 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
198 {
199 	int s, error;
200 	s = splnet();
201 	error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */
202 	splx(s);
203 	return error;
204 }
205 
206 static int
207 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
208 {
209 	int s, error;
210 	s = splnet();
211 	error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */
212 	splx(s);
213 	return error;
214 }
215 
216 /* pru_connect2 is EOPNOTSUPP */
217 /* pru_control is EOPNOTSUPP */
218 
219 static int
220 rts_detach(struct socket *so)
221 {
222 	struct rawcb *rp = sotorawcb(so);
223 	int s, error;
224 
225 	s = splnet();
226 	if (rp != NULL) {
227 		RTSOCK_LOCK();
228 		switch(rp->rcb_proto.sp_protocol) {
229 		case AF_INET:
230 			route_cb.ip_count--;
231 			break;
232 		case AF_INET6:
233 			route_cb.ip6_count--;
234 			break;
235 		case AF_IPX:
236 			route_cb.ipx_count--;
237 			break;
238 		}
239 		route_cb.any_count--;
240 		RTSOCK_UNLOCK();
241 	}
242 	error = raw_usrreqs.pru_detach(so);
243 	splx(s);
244 	return error;
245 }
246 
247 static int
248 rts_disconnect(struct socket *so)
249 {
250 	int s, error;
251 	s = splnet();
252 	error = raw_usrreqs.pru_disconnect(so);
253 	splx(s);
254 	return error;
255 }
256 
257 /* pru_listen is EOPNOTSUPP */
258 
259 static int
260 rts_peeraddr(struct socket *so, struct sockaddr **nam)
261 {
262 	int s, error;
263 	s = splnet();
264 	error = raw_usrreqs.pru_peeraddr(so, nam);
265 	splx(s);
266 	return error;
267 }
268 
269 /* pru_rcvd is EOPNOTSUPP */
270 /* pru_rcvoob is EOPNOTSUPP */
271 
272 static int
273 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
274 	 struct mbuf *control, struct thread *td)
275 {
276 	int s, error;
277 	s = splnet();
278 	error = raw_usrreqs.pru_send(so, flags, m, nam, control, td);
279 	splx(s);
280 	return error;
281 }
282 
283 /* pru_sense is null */
284 
285 static int
286 rts_shutdown(struct socket *so)
287 {
288 	int s, error;
289 	s = splnet();
290 	error = raw_usrreqs.pru_shutdown(so);
291 	splx(s);
292 	return error;
293 }
294 
295 static int
296 rts_sockaddr(struct socket *so, struct sockaddr **nam)
297 {
298 	int s, error;
299 	s = splnet();
300 	error = raw_usrreqs.pru_sockaddr(so, nam);
301 	splx(s);
302 	return error;
303 }
304 
305 static struct pr_usrreqs route_usrreqs = {
306 	rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect,
307 	pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect,
308 	pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp,
309 	rts_send, pru_sense_null, rts_shutdown, rts_sockaddr,
310 	sosend, soreceive, sopoll, pru_sosetlabel_null
311 };
312 
313 /*ARGSUSED*/
314 static int
315 route_output(struct mbuf *m, struct socket *so)
316 {
317 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
318 	struct rt_msghdr *rtm = NULL;
319 	struct rtentry *rt = NULL;
320 	struct radix_node_head *rnh;
321 	struct rt_addrinfo info;
322 	int len, error = 0;
323 	struct ifnet *ifp = NULL;
324 	struct ifaddr *ifa = NULL;
325 	struct sockaddr_in jail;
326 
327 #define senderr(e) { error = e; goto flush;}
328 	if (m == NULL || ((m->m_len < sizeof(long)) &&
329 		       (m = m_pullup(m, sizeof(long))) == NULL))
330 		return (ENOBUFS);
331 	if ((m->m_flags & M_PKTHDR) == 0)
332 		panic("route_output");
333 	len = m->m_pkthdr.len;
334 	if (len < sizeof(*rtm) ||
335 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
336 		info.rti_info[RTAX_DST] = NULL;
337 		senderr(EINVAL);
338 	}
339 	R_Malloc(rtm, struct rt_msghdr *, len);
340 	if (rtm == NULL) {
341 		info.rti_info[RTAX_DST] = NULL;
342 		senderr(ENOBUFS);
343 	}
344 	m_copydata(m, 0, len, (caddr_t)rtm);
345 	if (rtm->rtm_version != RTM_VERSION) {
346 		info.rti_info[RTAX_DST] = NULL;
347 		senderr(EPROTONOSUPPORT);
348 	}
349 	rtm->rtm_pid = curproc->p_pid;
350 	bzero(&info, sizeof(info));
351 	info.rti_addrs = rtm->rtm_addrs;
352 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
353 		info.rti_info[RTAX_DST] = NULL;
354 		senderr(EINVAL);
355 	}
356 	info.rti_flags = rtm->rtm_flags;
357 	if (info.rti_info[RTAX_DST] == NULL ||
358 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
359 	    (info.rti_info[RTAX_GATEWAY] != NULL &&
360 	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
361 		senderr(EINVAL);
362 	if (info.rti_info[RTAX_GENMASK]) {
363 		struct radix_node *t;
364 		t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1);
365 		if (t != NULL &&
366 		    bcmp((char *)(void *)info.rti_info[RTAX_GENMASK] + 1,
367 		    (char *)(void *)t->rn_key + 1,
368 		    ((struct sockaddr *)t->rn_key)->sa_len - 1) == 0)
369 			info.rti_info[RTAX_GENMASK] =
370 			    (struct sockaddr *)t->rn_key;
371 		else
372 			senderr(ENOBUFS);
373 	}
374 
375 	/*
376 	 * Verify that the caller has the appropriate privilege; RTM_GET
377 	 * is the only operation the non-superuser is allowed.
378 	 */
379 	if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0)
380 		senderr(error);
381 
382 	switch (rtm->rtm_type) {
383 		struct rtentry *saved_nrt;
384 
385 	case RTM_ADD:
386 		if (info.rti_info[RTAX_GATEWAY] == NULL)
387 			senderr(EINVAL);
388 		saved_nrt = NULL;
389 		error = rtrequest1(RTM_ADD, &info, &saved_nrt);
390 		if (error == 0 && saved_nrt) {
391 			RT_LOCK(saved_nrt);
392 			rt_setmetrics(rtm->rtm_inits,
393 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
394 			RT_REMREF(saved_nrt);
395 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
396 			RT_UNLOCK(saved_nrt);
397 		}
398 		break;
399 
400 	case RTM_DELETE:
401 		saved_nrt = NULL;
402 		error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
403 		if (error == 0) {
404 			RT_LOCK(saved_nrt);
405 			rt = saved_nrt;
406 			goto report;
407 		}
408 		break;
409 
410 	case RTM_GET:
411 	case RTM_CHANGE:
412 	case RTM_LOCK:
413 		rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
414 		if (rnh == NULL)
415 			senderr(EAFNOSUPPORT);
416 		RADIX_NODE_HEAD_LOCK(rnh);
417 		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
418 			info.rti_info[RTAX_NETMASK], rnh);
419 		RADIX_NODE_HEAD_UNLOCK(rnh);
420 		if (rt == NULL)		/* XXX looks bogus */
421 			senderr(ESRCH);
422 		RT_LOCK(rt);
423 		RT_ADDREF(rt);
424 
425 		switch(rtm->rtm_type) {
426 
427 		case RTM_GET:
428 		report:
429 			RT_LOCK_ASSERT(rt);
430 			info.rti_info[RTAX_DST] = rt_key(rt);
431 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
432 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
433 			info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
434 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
435 				ifp = rt->rt_ifp;
436 				if (ifp) {
437 					info.rti_info[RTAX_IFP] =
438 					    ifaddr_byindex(ifp->if_index)->ifa_addr;
439 					if (jailed(so->so_cred)) {
440 						bzero(&jail, sizeof(jail));
441 						jail.sin_family = PF_INET;
442 						jail.sin_len = sizeof(jail);
443 						jail.sin_addr.s_addr =
444 						htonl(prison_getip(so->so_cred));
445 						info.rti_info[RTAX_IFA] =
446 						    (struct sockaddr *)&jail;
447 					} else
448 						info.rti_info[RTAX_IFA] =
449 						    rt->rt_ifa->ifa_addr;
450 					if (ifp->if_flags & IFF_POINTOPOINT)
451 						info.rti_info[RTAX_BRD] =
452 						    rt->rt_ifa->ifa_dstaddr;
453 					rtm->rtm_index = ifp->if_index;
454 				} else {
455 					info.rti_info[RTAX_IFP] = NULL;
456 					info.rti_info[RTAX_IFA] = NULL;
457 				}
458 			}
459 			len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
460 			if (len > rtm->rtm_msglen) {
461 				struct rt_msghdr *new_rtm;
462 				R_Malloc(new_rtm, struct rt_msghdr *, len);
463 				if (new_rtm == NULL) {
464 					RT_UNLOCK(rt);
465 					senderr(ENOBUFS);
466 				}
467 				bcopy(rtm, new_rtm, rtm->rtm_msglen);
468 				Free(rtm); rtm = new_rtm;
469 			}
470 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
471 			rtm->rtm_flags = rt->rt_flags;
472 			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
473 			rtm->rtm_addrs = info.rti_addrs;
474 			break;
475 
476 		case RTM_CHANGE:
477 			/*
478 			 * New gateway could require new ifaddr, ifp;
479 			 * flags may also be different; ifp may be specified
480 			 * by ll sockaddr when protocol address is ambiguous
481 			 */
482 			if (((rt->rt_flags & RTF_GATEWAY) &&
483 			     info.rti_info[RTAX_GATEWAY] != NULL) ||
484 			    info.rti_info[RTAX_IFP] != NULL ||
485 			    (info.rti_info[RTAX_IFA] != NULL &&
486 			     !sa_equal(info.rti_info[RTAX_IFA],
487 				       rt->rt_ifa->ifa_addr))) {
488 				if ((error = rt_getifa(&info)) != 0) {
489 					RT_UNLOCK(rt);
490 					senderr(error);
491 				}
492 			}
493 			if (info.rti_info[RTAX_GATEWAY] != NULL &&
494 			    (error = rt_setgate(rt, rt_key(rt),
495 					info.rti_info[RTAX_GATEWAY])) != 0) {
496 				RT_UNLOCK(rt);
497 				senderr(error);
498 			}
499 			if ((ifa = info.rti_ifa) != NULL) {
500 				struct ifaddr *oifa = rt->rt_ifa;
501 				if (oifa != ifa) {
502 					if (oifa) {
503 						if (oifa->ifa_rtrequest)
504 							oifa->ifa_rtrequest(
505 								RTM_DELETE, rt,
506 								&info);
507 						IFAFREE(oifa);
508 					}
509 				        IFAREF(ifa);
510 				        rt->rt_ifa = ifa;
511 				        rt->rt_ifp = info.rti_ifp;
512 				}
513 			}
514 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
515 					&rt->rt_rmx);
516 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
517 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
518 			if (info.rti_info[RTAX_GENMASK])
519 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
520 			/* FALLTHROUGH */
521 		case RTM_LOCK:
522 			/* We don't support locks anymore */
523 			break;
524 		}
525 		RT_UNLOCK(rt);
526 		break;
527 
528 	default:
529 		senderr(EOPNOTSUPP);
530 	}
531 
532 flush:
533 	if (rtm) {
534 		if (error)
535 			rtm->rtm_errno = error;
536 		else
537 			rtm->rtm_flags |= RTF_DONE;
538 	}
539 	if (rt)		/* XXX can this be true? */
540 		RTFREE(rt);
541     {
542 	struct rawcb *rp = NULL;
543 	/*
544 	 * Check to see if we don't want our own messages.
545 	 */
546 	if ((so->so_options & SO_USELOOPBACK) == 0) {
547 		if (route_cb.any_count <= 1) {
548 			if (rtm)
549 				Free(rtm);
550 			m_freem(m);
551 			return (error);
552 		}
553 		/* There is another listener, so construct message */
554 		rp = sotorawcb(so);
555 	}
556 	if (rtm) {
557 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
558 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
559 			m_freem(m);
560 			m = NULL;
561 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
562 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
563 		Free(rtm);
564 	}
565 	if (m) {
566 		if (rp) {
567 			/*
568 			 * XXX insure we don't get a copy by
569 			 * invalidating our protocol
570 			 */
571 			unsigned short family = rp->rcb_proto.sp_family;
572 			rp->rcb_proto.sp_family = 0;
573 			rt_dispatch(m, info.rti_info[RTAX_DST]);
574 			rp->rcb_proto.sp_family = family;
575 		} else
576 			rt_dispatch(m, info.rti_info[RTAX_DST]);
577 	}
578     }
579 	return (error);
580 #undef	sa_equal
581 }
582 
583 static void
584 rt_setmetrics(u_long which, const struct rt_metrics *in,
585 	struct rt_metrics_lite *out)
586 {
587 #define metric(f, e) if (which & (f)) out->e = in->e;
588 	/*
589 	 * Only these are stored in the routing entry since introduction
590 	 * of tcp hostcache. The rest is ignored.
591 	 */
592 	metric(RTV_MTU, rmx_mtu);
593 	metric(RTV_EXPIRE, rmx_expire);
594 #undef metric
595 }
596 
597 static void
598 rt_getmetrics(const struct rt_metrics_lite *in, struct rt_metrics *out)
599 {
600 #define metric(e) out->e = in->e;
601 	bzero(out, sizeof(*out));
602 	metric(rmx_mtu);
603 	metric(rmx_expire);
604 #undef metric
605 }
606 
607 /*
608  * Extract the addresses of the passed sockaddrs.
609  * Do a little sanity checking so as to avoid bad memory references.
610  * This data is derived straight from userland.
611  */
612 static int
613 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
614 {
615 	struct sockaddr *sa;
616 	int i;
617 
618 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
619 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
620 			continue;
621 		sa = (struct sockaddr *)cp;
622 		/*
623 		 * It won't fit.
624 		 */
625 		if (cp + sa->sa_len > cplim)
626 			return (EINVAL);
627 		/*
628 		 * there are no more.. quit now
629 		 * If there are more bits, they are in error.
630 		 * I've seen this. route(1) can evidently generate these.
631 		 * This causes kernel to core dump.
632 		 * for compatibility, If we see this, point to a safe address.
633 		 */
634 		if (sa->sa_len == 0) {
635 			rtinfo->rti_info[i] = &sa_zero;
636 			return (0); /* should be EINVAL but for compat */
637 		}
638 		/* accept it */
639 		rtinfo->rti_info[i] = sa;
640 		cp += SA_SIZE(sa);
641 	}
642 	return (0);
643 }
644 
645 static struct mbuf *
646 rt_msg1(int type, struct rt_addrinfo *rtinfo)
647 {
648 	struct rt_msghdr *rtm;
649 	struct mbuf *m;
650 	int i;
651 	struct sockaddr *sa;
652 	int len, dlen;
653 
654 	switch (type) {
655 
656 	case RTM_DELADDR:
657 	case RTM_NEWADDR:
658 		len = sizeof(struct ifa_msghdr);
659 		break;
660 
661 	case RTM_DELMADDR:
662 	case RTM_NEWMADDR:
663 		len = sizeof(struct ifma_msghdr);
664 		break;
665 
666 	case RTM_IFINFO:
667 		len = sizeof(struct if_msghdr);
668 		break;
669 
670 	case RTM_IFANNOUNCE:
671 		len = sizeof(struct if_announcemsghdr);
672 		break;
673 
674 	default:
675 		len = sizeof(struct rt_msghdr);
676 	}
677 	if (len > MCLBYTES)
678 		panic("rt_msg1");
679 	m = m_gethdr(M_DONTWAIT, MT_DATA);
680 	if (m && len > MHLEN) {
681 		MCLGET(m, M_DONTWAIT);
682 		if ((m->m_flags & M_EXT) == 0) {
683 			m_free(m);
684 			m = NULL;
685 		}
686 	}
687 	if (m == NULL)
688 		return (m);
689 	m->m_pkthdr.len = m->m_len = len;
690 	m->m_pkthdr.rcvif = NULL;
691 	rtm = mtod(m, struct rt_msghdr *);
692 	bzero((caddr_t)rtm, len);
693 	for (i = 0; i < RTAX_MAX; i++) {
694 		if ((sa = rtinfo->rti_info[i]) == NULL)
695 			continue;
696 		rtinfo->rti_addrs |= (1 << i);
697 		dlen = SA_SIZE(sa);
698 		m_copyback(m, len, dlen, (caddr_t)sa);
699 		len += dlen;
700 	}
701 	if (m->m_pkthdr.len != len) {
702 		m_freem(m);
703 		return (NULL);
704 	}
705 	rtm->rtm_msglen = len;
706 	rtm->rtm_version = RTM_VERSION;
707 	rtm->rtm_type = type;
708 	return (m);
709 }
710 
711 static int
712 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
713 {
714 	int i;
715 	int len, dlen, second_time = 0;
716 	caddr_t cp0;
717 
718 	rtinfo->rti_addrs = 0;
719 again:
720 	switch (type) {
721 
722 	case RTM_DELADDR:
723 	case RTM_NEWADDR:
724 		len = sizeof(struct ifa_msghdr);
725 		break;
726 
727 	case RTM_IFINFO:
728 		len = sizeof(struct if_msghdr);
729 		break;
730 
731 	case RTM_NEWMADDR:
732 		len = sizeof(struct ifma_msghdr);
733 		break;
734 
735 	default:
736 		len = sizeof(struct rt_msghdr);
737 	}
738 	cp0 = cp;
739 	if (cp0)
740 		cp += len;
741 	for (i = 0; i < RTAX_MAX; i++) {
742 		struct sockaddr *sa;
743 
744 		if ((sa = rtinfo->rti_info[i]) == NULL)
745 			continue;
746 		rtinfo->rti_addrs |= (1 << i);
747 		dlen = SA_SIZE(sa);
748 		if (cp) {
749 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
750 			cp += dlen;
751 		}
752 		len += dlen;
753 	}
754 	len = ALIGN(len);
755 	if (cp == NULL && w != NULL && !second_time) {
756 		struct walkarg *rw = w;
757 
758 		if (rw->w_req) {
759 			if (rw->w_tmemsize < len) {
760 				if (rw->w_tmem)
761 					free(rw->w_tmem, M_RTABLE);
762 				rw->w_tmem = (caddr_t)
763 					malloc(len, M_RTABLE, M_NOWAIT);
764 				if (rw->w_tmem)
765 					rw->w_tmemsize = len;
766 			}
767 			if (rw->w_tmem) {
768 				cp = rw->w_tmem;
769 				second_time = 1;
770 				goto again;
771 			}
772 		}
773 	}
774 	if (cp) {
775 		struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
776 
777 		rtm->rtm_version = RTM_VERSION;
778 		rtm->rtm_type = type;
779 		rtm->rtm_msglen = len;
780 	}
781 	return (len);
782 }
783 
784 /*
785  * This routine is called to generate a message from the routing
786  * socket indicating that a redirect has occured, a routing lookup
787  * has failed, or that a protocol has detected timeouts to a particular
788  * destination.
789  */
790 void
791 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
792 {
793 	struct rt_msghdr *rtm;
794 	struct mbuf *m;
795 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
796 
797 	if (route_cb.any_count == 0)
798 		return;
799 	m = rt_msg1(type, rtinfo);
800 	if (m == NULL)
801 		return;
802 	rtm = mtod(m, struct rt_msghdr *);
803 	rtm->rtm_flags = RTF_DONE | flags;
804 	rtm->rtm_errno = error;
805 	rtm->rtm_addrs = rtinfo->rti_addrs;
806 	rt_dispatch(m, sa);
807 }
808 
809 /*
810  * This routine is called to generate a message from the routing
811  * socket indicating that the status of a network interface has changed.
812  */
813 void
814 rt_ifmsg(struct ifnet *ifp)
815 {
816 	struct if_msghdr *ifm;
817 	struct mbuf *m;
818 	struct rt_addrinfo info;
819 
820 	if (route_cb.any_count == 0)
821 		return;
822 	bzero((caddr_t)&info, sizeof(info));
823 	m = rt_msg1(RTM_IFINFO, &info);
824 	if (m == NULL)
825 		return;
826 	ifm = mtod(m, struct if_msghdr *);
827 	ifm->ifm_index = ifp->if_index;
828 	ifm->ifm_flags = ifp->if_flags;
829 	ifm->ifm_data = ifp->if_data;
830 	ifm->ifm_addrs = 0;
831 	rt_dispatch(m, NULL);
832 }
833 
834 /*
835  * This is called to generate messages from the routing socket
836  * indicating a network interface has had addresses associated with it.
837  * if we ever reverse the logic and replace messages TO the routing
838  * socket indicate a request to configure interfaces, then it will
839  * be unnecessary as the routing socket will automatically generate
840  * copies of it.
841  */
842 void
843 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
844 {
845 	struct rt_addrinfo info;
846 	struct sockaddr *sa = NULL;
847 	int pass;
848 	struct mbuf *m = NULL;
849 	struct ifnet *ifp = ifa->ifa_ifp;
850 
851 	if (route_cb.any_count == 0)
852 		return;
853 	for (pass = 1; pass < 3; pass++) {
854 		bzero((caddr_t)&info, sizeof(info));
855 		if ((cmd == RTM_ADD && pass == 1) ||
856 		    (cmd == RTM_DELETE && pass == 2)) {
857 			struct ifa_msghdr *ifam;
858 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
859 
860 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
861 			info.rti_info[RTAX_IFP] =
862 			    ifaddr_byindex(ifp->if_index)->ifa_addr;
863 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
864 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
865 			if ((m = rt_msg1(ncmd, &info)) == NULL)
866 				continue;
867 			ifam = mtod(m, struct ifa_msghdr *);
868 			ifam->ifam_index = ifp->if_index;
869 			ifam->ifam_metric = ifa->ifa_metric;
870 			ifam->ifam_flags = ifa->ifa_flags;
871 			ifam->ifam_addrs = info.rti_addrs;
872 		}
873 		if ((cmd == RTM_ADD && pass == 2) ||
874 		    (cmd == RTM_DELETE && pass == 1)) {
875 			struct rt_msghdr *rtm;
876 
877 			if (rt == NULL)
878 				continue;
879 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
880 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
881 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
882 			if ((m = rt_msg1(cmd, &info)) == NULL)
883 				continue;
884 			rtm = mtod(m, struct rt_msghdr *);
885 			rtm->rtm_index = ifp->if_index;
886 			rtm->rtm_flags |= rt->rt_flags;
887 			rtm->rtm_errno = error;
888 			rtm->rtm_addrs = info.rti_addrs;
889 		}
890 		rt_dispatch(m, sa);
891 	}
892 }
893 
894 /*
895  * This is the analogue to the rt_newaddrmsg which performs the same
896  * function but for multicast group memberhips.  This is easier since
897  * there is no route state to worry about.
898  */
899 void
900 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
901 {
902 	struct rt_addrinfo info;
903 	struct mbuf *m = NULL;
904 	struct ifnet *ifp = ifma->ifma_ifp;
905 	struct ifma_msghdr *ifmam;
906 
907 	if (route_cb.any_count == 0)
908 		return;
909 
910 	bzero((caddr_t)&info, sizeof(info));
911 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
912 	info.rti_info[RTAX_IFP] =
913 	    ifp ? ifaddr_byindex(ifp->if_index)->ifa_addr : NULL;
914 	/*
915 	 * If a link-layer address is present, present it as a ``gateway''
916 	 * (similarly to how ARP entries, e.g., are presented).
917 	 */
918 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
919 	m = rt_msg1(cmd, &info);
920 	if (m == NULL)
921 		return;
922 	ifmam = mtod(m, struct ifma_msghdr *);
923 	ifmam->ifmam_index = ifp->if_index;
924 	ifmam->ifmam_addrs = info.rti_addrs;
925 	rt_dispatch(m, ifma->ifma_addr);
926 }
927 
928 /*
929  * This is called to generate routing socket messages indicating
930  * network interface arrival and departure.
931  */
932 void
933 rt_ifannouncemsg(struct ifnet *ifp, int what)
934 {
935 	struct if_announcemsghdr *ifan;
936 	struct mbuf *m;
937 	struct rt_addrinfo info;
938 
939 	if (route_cb.any_count == 0)
940 		return;
941 	bzero((caddr_t)&info, sizeof(info));
942 	m = rt_msg1(RTM_IFANNOUNCE, &info);
943 	if (m == NULL)
944 		return;
945 	ifan = mtod(m, struct if_announcemsghdr *);
946 	ifan->ifan_index = ifp->if_index;
947 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
948 	ifan->ifan_what = what;
949 	rt_dispatch(m, NULL);
950  }
951 
952 static void
953 rt_dispatch(struct mbuf *m, const struct sockaddr *sa)
954 {
955 	unsigned short *family;
956 	struct m_tag *tag;
957 
958 	/*
959 	 * Preserve the family from the sockaddr, if any, in an m_tag for
960 	 * use when injecting the mbuf into the routing socket buffer from
961 	 * the netisr.
962 	 */
963 	if (sa != NULL) {
964 		tag = m_tag_get(PACKET_TAG_RTSOCKFAM, sizeof(unsigned short),
965 		    M_NOWAIT);
966 		if (tag == NULL) {
967 			m_freem(m);
968 			return;
969 		}
970 		family = (unsigned short *)(tag + 1);
971 		*family = sa ? sa->sa_family : 0;
972 		m_tag_prepend(m, tag);
973 	}
974 	netisr_queue(NETISR_ROUTE, m);
975 }
976 
977 /*
978  * This is used in dumping the kernel table via sysctl().
979  */
980 static int
981 sysctl_dumpentry(struct radix_node *rn, void *vw)
982 {
983 	struct walkarg *w = vw;
984 	struct rtentry *rt = (struct rtentry *)rn;
985 	int error = 0, size;
986 	struct rt_addrinfo info;
987 
988 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
989 		return 0;
990 	bzero((caddr_t)&info, sizeof(info));
991 	info.rti_info[RTAX_DST] = rt_key(rt);
992 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
993 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
994 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
995 	if (rt->rt_ifp) {
996 		info.rti_info[RTAX_IFP] =
997 		    ifaddr_byindex(rt->rt_ifp->if_index)->ifa_addr;
998 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
999 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
1000 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
1001 	}
1002 	size = rt_msg2(RTM_GET, &info, NULL, w);
1003 	if (w->w_req && w->w_tmem) {
1004 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
1005 
1006 		rtm->rtm_flags = rt->rt_flags;
1007 		rtm->rtm_use = rt->rt_rmx.rmx_pksent;
1008 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
1009 		rtm->rtm_index = rt->rt_ifp->if_index;
1010 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
1011 		rtm->rtm_addrs = info.rti_addrs;
1012 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
1013 		return (error);
1014 	}
1015 	return (error);
1016 }
1017 
1018 static int
1019 sysctl_iflist(int af, struct walkarg *w)
1020 {
1021 	struct ifnet *ifp;
1022 	struct ifaddr *ifa;
1023 	struct rt_addrinfo info;
1024 	int len, error = 0;
1025 
1026 	bzero((caddr_t)&info, sizeof(info));
1027 	/* IFNET_RLOCK(); */		/* could sleep XXX */
1028 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1029 		if (w->w_arg && w->w_arg != ifp->if_index)
1030 			continue;
1031 		ifa = ifaddr_byindex(ifp->if_index);
1032 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
1033 		len = rt_msg2(RTM_IFINFO, &info, NULL, w);
1034 		info.rti_info[RTAX_IFP] = NULL;
1035 		if (w->w_req && w->w_tmem) {
1036 			struct if_msghdr *ifm;
1037 
1038 			ifm = (struct if_msghdr *)w->w_tmem;
1039 			ifm->ifm_index = ifp->if_index;
1040 			ifm->ifm_flags = ifp->if_flags;
1041 			ifm->ifm_data = ifp->if_data;
1042 			ifm->ifm_addrs = info.rti_addrs;
1043 			error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
1044 			if (error)
1045 				goto done;
1046 		}
1047 		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != NULL) {
1048 			if (af && af != ifa->ifa_addr->sa_family)
1049 				continue;
1050 			if (jailed(curthread->td_ucred) &&
1051 			    prison_if(curthread->td_ucred, ifa->ifa_addr))
1052 				continue;
1053 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1054 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1055 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1056 			len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
1057 			if (w->w_req && w->w_tmem) {
1058 				struct ifa_msghdr *ifam;
1059 
1060 				ifam = (struct ifa_msghdr *)w->w_tmem;
1061 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1062 				ifam->ifam_flags = ifa->ifa_flags;
1063 				ifam->ifam_metric = ifa->ifa_metric;
1064 				ifam->ifam_addrs = info.rti_addrs;
1065 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1066 				if (error)
1067 					goto done;
1068 			}
1069 		}
1070 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1071 			info.rti_info[RTAX_BRD] = NULL;
1072 	}
1073 done:
1074 	/* IFNET_RUNLOCK(); */ /* XXX */
1075 	return (error);
1076 }
1077 
1078 int
1079 sysctl_ifmalist(int af, struct walkarg *w)
1080 {
1081 	struct ifnet *ifp;
1082 	struct ifmultiaddr *ifma;
1083 	struct	rt_addrinfo info;
1084 	int	len, error = 0;
1085 	struct ifaddr *ifa;
1086 
1087 	bzero((caddr_t)&info, sizeof(info));
1088 	/* IFNET_RLOCK(); */		/* could sleep XXX */
1089 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1090 		if (w->w_arg && w->w_arg != ifp->if_index)
1091 			continue;
1092 		ifa = ifaddr_byindex(ifp->if_index);
1093 		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
1094 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1095 			if (af && af != ifma->ifma_addr->sa_family)
1096 				continue;
1097 			if (jailed(curproc->p_ucred) &&
1098 			    prison_if(curproc->p_ucred, ifma->ifma_addr))
1099 				continue;
1100 			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1101 			info.rti_info[RTAX_GATEWAY] =
1102 			    (ifma->ifma_addr->sa_family != AF_LINK) ?
1103 			    ifma->ifma_lladdr : NULL;
1104 			len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
1105 			if (w->w_req && w->w_tmem) {
1106 				struct ifma_msghdr *ifmam;
1107 
1108 				ifmam = (struct ifma_msghdr *)w->w_tmem;
1109 				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1110 				ifmam->ifmam_flags = 0;
1111 				ifmam->ifmam_addrs = info.rti_addrs;
1112 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1113 				if (error)
1114 					goto done;
1115 			}
1116 		}
1117 	}
1118 done:
1119 	/* IFNET_RUNLOCK(); */ /* XXX */
1120 	return (error);
1121 }
1122 
1123 static int
1124 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1125 {
1126 	int	*name = (int *)arg1;
1127 	u_int	namelen = arg2;
1128 	struct radix_node_head *rnh;
1129 	int	i, lim, s, error = EINVAL;
1130 	u_char	af;
1131 	struct	walkarg w;
1132 
1133 	name ++;
1134 	namelen--;
1135 	if (req->newptr)
1136 		return (EPERM);
1137 	if (namelen != 3)
1138 		return ((namelen < 3) ? EISDIR : ENOTDIR);
1139 	af = name[0];
1140 	if (af > AF_MAX)
1141 		return (EINVAL);
1142 	bzero(&w, sizeof(w));
1143 	w.w_op = name[1];
1144 	w.w_arg = name[2];
1145 	w.w_req = req;
1146 
1147 	s = splnet();
1148 	switch (w.w_op) {
1149 
1150 	case NET_RT_DUMP:
1151 	case NET_RT_FLAGS:
1152 		if (af == 0) {			/* dump all tables */
1153 			i = 1;
1154 			lim = AF_MAX;
1155 		} else				/* dump only one table */
1156 			i = lim = af;
1157 		for (error = 0; error == 0 && i <= lim; i++)
1158 			if ((rnh = rt_tables[i]) != NULL) {
1159 				/* RADIX_NODE_HEAD_LOCK(rnh); */
1160 			    	error = rnh->rnh_walktree(rnh,
1161 				    sysctl_dumpentry, &w);/* could sleep XXX */
1162 				/* RADIX_NODE_HEAD_UNLOCK(rnh); */
1163 			} else if (af != 0)
1164 				error = EAFNOSUPPORT;
1165 		break;
1166 
1167 	case NET_RT_IFLIST:
1168 		error = sysctl_iflist(af, &w);
1169 		break;
1170 
1171 	case NET_RT_IFMALIST:
1172 		error = sysctl_ifmalist(af, &w);
1173 		break;
1174 	}
1175 	splx(s);
1176 	if (w.w_tmem)
1177 		free(w.w_tmem, M_RTABLE);
1178 	return (error);
1179 }
1180 
1181 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1182 
1183 /*
1184  * Definitions of protocols supported in the ROUTE domain.
1185  */
1186 
1187 extern struct domain routedomain;		/* or at least forward */
1188 
1189 static struct protosw routesw[] = {
1190 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
1191   0,		route_output,	raw_ctlinput,	0,
1192   0,
1193   raw_init,	0,		0,		0,
1194   &route_usrreqs
1195 }
1196 };
1197 
1198 static struct domain routedomain =
1199     { PF_ROUTE, "route", 0, 0, 0,
1200       routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
1201 
1202 DOMAIN_SET(route);
1203