xref: /freebsd/sys/net/rtsock.c (revision 0fa02ea5f786ef02befd46f8f083f48c8cd9630b)
1 /*
2  * Copyright (c) 1988, 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)rtsock.c	8.7 (Berkeley) 10/12/95
34  * $FreeBSD$
35  */
36 
37 #include <sys/param.h>
38 #include <sys/domain.h>
39 #include <sys/kernel.h>
40 #include <sys/jail.h>
41 #include <sys/malloc.h>
42 #include <sys/mbuf.h>
43 #include <sys/proc.h>
44 #include <sys/protosw.h>
45 #include <sys/signalvar.h>
46 #include <sys/socket.h>
47 #include <sys/socketvar.h>
48 #include <sys/sysctl.h>
49 #include <sys/systm.h>
50 
51 #include <net/if.h>
52 #include <net/raw_cb.h>
53 #include <net/route.h>
54 
55 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
56 
57 /* NB: these are not modified */
58 static struct	sockaddr route_dst = { 2, PF_ROUTE, };
59 static struct	sockaddr route_src = { 2, PF_ROUTE, };
60 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
61 
62 static struct {
63 	int	ip_count;	/* attacked w/ AF_INET */
64 	int	ip6_count;	/* attached w/ AF_INET6 */
65 	int	ipx_count;	/* attached w/ AF_IPX */
66 	int	any_count;	/* total attached */
67 } route_cb;
68 
69 struct mtx rtsock_mtx;
70 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
71 
72 #define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
73 #define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
74 #define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
75 
76 struct walkarg {
77 	int	w_tmemsize;
78 	int	w_op, w_arg;
79 	caddr_t	w_tmem;
80 	struct sysctl_req *w_req;
81 };
82 
83 static struct mbuf *rt_msg1(int, struct rt_addrinfo *);
84 static int	rt_msg2(int, struct rt_addrinfo *, caddr_t, struct walkarg *);
85 static int	rt_xaddrs(caddr_t, caddr_t, struct rt_addrinfo *);
86 static int	sysctl_dumpentry(struct radix_node *rn, void *vw);
87 static int	sysctl_iflist(int af, struct walkarg *w);
88 static int	sysctl_ifmalist(int af, struct walkarg *w);
89 static int	route_output(struct mbuf *, struct socket *);
90 static void	rt_setmetrics(u_long, struct rt_metrics *, struct rt_metrics_lite *);
91 static void	rt_getmetrics(struct rt_metrics_lite *, struct rt_metrics *);
92 static void	rt_dispatch(struct mbuf *, struct sockaddr *);
93 
94 /*
95  * It really doesn't make any sense at all for this code to share much
96  * with raw_usrreq.c, since its functionality is so restricted.  XXX
97  */
98 static int
99 rts_abort(struct socket *so)
100 {
101 	int s, error;
102 	s = splnet();
103 	error = raw_usrreqs.pru_abort(so);
104 	splx(s);
105 	return error;
106 }
107 
108 /* pru_accept is EOPNOTSUPP */
109 
110 static int
111 rts_attach(struct socket *so, int proto, struct thread *td)
112 {
113 	struct rawcb *rp;
114 	int s, error;
115 
116 	if (sotorawcb(so) != 0)
117 		return EISCONN;	/* XXX panic? */
118 	/* XXX */
119 	MALLOC(rp, struct rawcb *, sizeof *rp, M_PCB, M_WAITOK | M_ZERO);
120 	if (rp == 0)
121 		return ENOBUFS;
122 
123 	/*
124 	 * The splnet() is necessary to block protocols from sending
125 	 * error notifications (like RTM_REDIRECT or RTM_LOSING) while
126 	 * this PCB is extant but incompletely initialized.
127 	 * Probably we should try to do more of this work beforehand and
128 	 * eliminate the spl.
129 	 */
130 	s = splnet();
131 	so->so_pcb = (caddr_t)rp;
132 	error = raw_attach(so, proto);
133 	rp = sotorawcb(so);
134 	if (error) {
135 		splx(s);
136 		so->so_pcb = NULL;
137 		free(rp, M_PCB);
138 		return error;
139 	}
140 	RTSOCK_LOCK();
141 	switch(rp->rcb_proto.sp_protocol) {
142 	case AF_INET:
143 		route_cb.ip_count++;
144 		break;
145 	case AF_INET6:
146 		route_cb.ip6_count++;
147 		break;
148 	case AF_IPX:
149 		route_cb.ipx_count++;
150 		break;
151 	}
152 	rp->rcb_faddr = &route_src;
153 	route_cb.any_count++;
154 	RTSOCK_UNLOCK();
155 	soisconnected(so);
156 	so->so_options |= SO_USELOOPBACK;
157 	splx(s);
158 	return 0;
159 }
160 
161 static int
162 rts_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
163 {
164 	int s, error;
165 	s = splnet();
166 	error = raw_usrreqs.pru_bind(so, nam, td); /* xxx just EINVAL */
167 	splx(s);
168 	return error;
169 }
170 
171 static int
172 rts_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
173 {
174 	int s, error;
175 	s = splnet();
176 	error = raw_usrreqs.pru_connect(so, nam, td); /* XXX just EINVAL */
177 	splx(s);
178 	return error;
179 }
180 
181 /* pru_connect2 is EOPNOTSUPP */
182 /* pru_control is EOPNOTSUPP */
183 
184 static int
185 rts_detach(struct socket *so)
186 {
187 	struct rawcb *rp = sotorawcb(so);
188 	int s, error;
189 
190 	s = splnet();
191 	if (rp != 0) {
192 		RTSOCK_LOCK();
193 		switch(rp->rcb_proto.sp_protocol) {
194 		case AF_INET:
195 			route_cb.ip_count--;
196 			break;
197 		case AF_INET6:
198 			route_cb.ip6_count--;
199 			break;
200 		case AF_IPX:
201 			route_cb.ipx_count--;
202 			break;
203 		}
204 		route_cb.any_count--;
205 		RTSOCK_UNLOCK();
206 	}
207 	error = raw_usrreqs.pru_detach(so);
208 	splx(s);
209 	return error;
210 }
211 
212 static int
213 rts_disconnect(struct socket *so)
214 {
215 	int s, error;
216 	s = splnet();
217 	error = raw_usrreqs.pru_disconnect(so);
218 	splx(s);
219 	return error;
220 }
221 
222 /* pru_listen is EOPNOTSUPP */
223 
224 static int
225 rts_peeraddr(struct socket *so, struct sockaddr **nam)
226 {
227 	int s, error;
228 	s = splnet();
229 	error = raw_usrreqs.pru_peeraddr(so, nam);
230 	splx(s);
231 	return error;
232 }
233 
234 /* pru_rcvd is EOPNOTSUPP */
235 /* pru_rcvoob is EOPNOTSUPP */
236 
237 static int
238 rts_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
239 	 struct mbuf *control, struct thread *td)
240 {
241 	int s, error;
242 	s = splnet();
243 	error = raw_usrreqs.pru_send(so, flags, m, nam, control, td);
244 	splx(s);
245 	return error;
246 }
247 
248 /* pru_sense is null */
249 
250 static int
251 rts_shutdown(struct socket *so)
252 {
253 	int s, error;
254 	s = splnet();
255 	error = raw_usrreqs.pru_shutdown(so);
256 	splx(s);
257 	return error;
258 }
259 
260 static int
261 rts_sockaddr(struct socket *so, struct sockaddr **nam)
262 {
263 	int s, error;
264 	s = splnet();
265 	error = raw_usrreqs.pru_sockaddr(so, nam);
266 	splx(s);
267 	return error;
268 }
269 
270 static struct pr_usrreqs route_usrreqs = {
271 	rts_abort, pru_accept_notsupp, rts_attach, rts_bind, rts_connect,
272 	pru_connect2_notsupp, pru_control_notsupp, rts_detach, rts_disconnect,
273 	pru_listen_notsupp, rts_peeraddr, pru_rcvd_notsupp, pru_rcvoob_notsupp,
274 	rts_send, pru_sense_null, rts_shutdown, rts_sockaddr,
275 	sosend, soreceive, sopoll, pru_sosetlabel_null
276 };
277 
278 /*ARGSUSED*/
279 static int
280 route_output(m, so)
281 	register struct mbuf *m;
282 	struct socket *so;
283 {
284 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
285 	register struct rt_msghdr *rtm = 0;
286 	register struct rtentry *rt = 0;
287 	struct radix_node_head *rnh;
288 	struct rt_addrinfo info;
289 	int len, error = 0;
290 	struct ifnet *ifp = 0;
291 	struct ifaddr *ifa = 0;
292 
293 #define senderr(e) { error = e; goto flush;}
294 	if (m == 0 || ((m->m_len < sizeof(long)) &&
295 		       (m = m_pullup(m, sizeof(long))) == 0))
296 		return (ENOBUFS);
297 	if ((m->m_flags & M_PKTHDR) == 0)
298 		panic("route_output");
299 	len = m->m_pkthdr.len;
300 	if (len < sizeof(*rtm) ||
301 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen) {
302 		info.rti_info[RTAX_DST] = 0;
303 		senderr(EINVAL);
304 	}
305 	R_Malloc(rtm, struct rt_msghdr *, len);
306 	if (rtm == 0) {
307 		info.rti_info[RTAX_DST] = 0;
308 		senderr(ENOBUFS);
309 	}
310 	m_copydata(m, 0, len, (caddr_t)rtm);
311 	if (rtm->rtm_version != RTM_VERSION) {
312 		info.rti_info[RTAX_DST] = 0;
313 		senderr(EPROTONOSUPPORT);
314 	}
315 	rtm->rtm_pid = curproc->p_pid;
316 	bzero(&info, sizeof(info));
317 	info.rti_addrs = rtm->rtm_addrs;
318 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, &info)) {
319 		info.rti_info[RTAX_DST] = 0;
320 		senderr(EINVAL);
321 	}
322 	info.rti_flags = rtm->rtm_flags;
323 	if (info.rti_info[RTAX_DST] == 0 ||
324 	    info.rti_info[RTAX_DST]->sa_family >= AF_MAX ||
325 	    (info.rti_info[RTAX_GATEWAY] != 0 &&
326 	     info.rti_info[RTAX_GATEWAY]->sa_family >= AF_MAX))
327 		senderr(EINVAL);
328 	if (info.rti_info[RTAX_GENMASK]) {
329 		struct radix_node *t;
330 		t = rn_addmask((caddr_t) info.rti_info[RTAX_GENMASK], 0, 1);
331 		if (t && Bcmp((caddr_t *) info.rti_info[RTAX_GENMASK] + 1,
332 			      (caddr_t *)t->rn_key + 1,
333 			      *(u_char *)t->rn_key - 1) == 0)
334 			info.rti_info[RTAX_GENMASK] =
335 				(struct sockaddr *)(t->rn_key);
336 		else
337 			senderr(ENOBUFS);
338 	}
339 
340 	/*
341 	 * Verify that the caller has the appropriate privilege; RTM_GET
342 	 * is the only operation the non-superuser is allowed.
343 	 */
344 	if (rtm->rtm_type != RTM_GET && (error = suser(curthread)) != 0)
345 		senderr(error);
346 
347 	switch (rtm->rtm_type) {
348 		struct rtentry *saved_nrt;
349 
350 	case RTM_ADD:
351 		if (info.rti_info[RTAX_GATEWAY] == 0)
352 			senderr(EINVAL);
353 		saved_nrt = 0;
354 		error = rtrequest1(RTM_ADD, &info, &saved_nrt);
355 		if (error == 0 && saved_nrt) {
356 			RT_LOCK(saved_nrt);
357 			rt_setmetrics(rtm->rtm_inits,
358 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
359 			RT_REMREF(saved_nrt);
360 			saved_nrt->rt_genmask = info.rti_info[RTAX_GENMASK];
361 			RT_UNLOCK(saved_nrt);
362 		}
363 		break;
364 
365 	case RTM_DELETE:
366 		saved_nrt = 0;
367 		error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
368 		if (error == 0) {
369 			RT_LOCK(saved_nrt);
370 			rt = saved_nrt;
371 			goto report;
372 		}
373 		break;
374 
375 	case RTM_GET:
376 	case RTM_CHANGE:
377 	case RTM_LOCK:
378 		rnh = rt_tables[info.rti_info[RTAX_DST]->sa_family];
379 		if (rnh == 0)
380 			senderr(EAFNOSUPPORT);
381 		RADIX_NODE_HEAD_LOCK(rnh);
382 		rt = (struct rtentry *) rnh->rnh_lookup(info.rti_info[RTAX_DST],
383 			info.rti_info[RTAX_NETMASK], rnh);
384 		RADIX_NODE_HEAD_UNLOCK(rnh);
385 		if (rt == NULL)		/* XXX looks bogus */
386 			senderr(ESRCH);
387 		RT_LOCK(rt);
388 		RT_ADDREF(rt);
389 
390 		switch(rtm->rtm_type) {
391 
392 		case RTM_GET:
393 		report:
394 			RT_LOCK_ASSERT(rt);
395 			info.rti_info[RTAX_DST] = rt_key(rt);
396 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
397 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
398 			info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
399 			if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
400 				ifp = rt->rt_ifp;
401 				if (ifp) {
402 					info.rti_info[RTAX_IFP] = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
403 					info.rti_info[RTAX_IFA] =
404 						rt->rt_ifa->ifa_addr;
405 					if (ifp->if_flags & IFF_POINTOPOINT)
406 						 info.rti_info[RTAX_BRD] =
407 							rt->rt_ifa->ifa_dstaddr;
408 					rtm->rtm_index = ifp->if_index;
409 				} else {
410 					info.rti_info[RTAX_IFP] = 0;
411 					info.rti_info[RTAX_IFA] = 0;
412 				}
413 			}
414 			len = rt_msg2(rtm->rtm_type, &info, (caddr_t)0,
415 				(struct walkarg *)0);
416 			if (len > rtm->rtm_msglen) {
417 				struct rt_msghdr *new_rtm;
418 				R_Malloc(new_rtm, struct rt_msghdr *, len);
419 				if (new_rtm == 0) {
420 					RT_UNLOCK(rt);
421 					senderr(ENOBUFS);
422 				}
423 				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
424 				Free(rtm); rtm = new_rtm;
425 			}
426 			(void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm,
427 				(struct walkarg *)0);
428 			rtm->rtm_flags = rt->rt_flags;
429 			rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
430 			rtm->rtm_addrs = info.rti_addrs;
431 			break;
432 
433 		case RTM_CHANGE:
434 			/*
435 			 * New gateway could require new ifaddr, ifp;
436 			 * flags may also be different; ifp may be specified
437 			 * by ll sockaddr when protocol address is ambiguous
438 			 */
439 			if (((rt->rt_flags & RTF_GATEWAY) &&
440 			     info.rti_info[RTAX_GATEWAY] != NULL) ||
441 			    info.rti_info[RTAX_IFP] != NULL ||
442 			    (info.rti_info[RTAX_IFA] != NULL &&
443 			     !sa_equal(info.rti_info[RTAX_IFA],
444 				       rt->rt_ifa->ifa_addr))) {
445 				if ((error = rt_getifa(&info)) != 0) {
446 					RT_UNLOCK(rt);
447 					senderr(error);
448 				}
449 			}
450 			if (info.rti_info[RTAX_GATEWAY] != NULL &&
451 			    (error = rt_setgate(rt, rt_key(rt),
452 					info.rti_info[RTAX_GATEWAY])) != 0) {
453 				RT_UNLOCK(rt);
454 				senderr(error);
455 			}
456 			if ((ifa = info.rti_ifa) != NULL) {
457 				struct ifaddr *oifa = rt->rt_ifa;
458 				if (oifa != ifa) {
459 					if (oifa) {
460 						if (oifa->ifa_rtrequest)
461 							oifa->ifa_rtrequest(
462 								RTM_DELETE, rt,
463 								&info);
464 						IFAFREE(oifa);
465 					}
466 				        IFAREF(ifa);
467 				        rt->rt_ifa = ifa;
468 				        rt->rt_ifp = info.rti_ifp;
469 				}
470 			}
471 			rt_setmetrics(rtm->rtm_inits, &rtm->rtm_rmx,
472 					&rt->rt_rmx);
473 			if (rt->rt_ifa && rt->rt_ifa->ifa_rtrequest)
474 			       rt->rt_ifa->ifa_rtrequest(RTM_ADD, rt, &info);
475 			if (info.rti_info[RTAX_GENMASK])
476 				rt->rt_genmask = info.rti_info[RTAX_GENMASK];
477 			/* FALLTHROUGH */
478 		case RTM_LOCK:
479 			/* We don't support locks anymore */
480 			break;
481 		}
482 		RT_UNLOCK(rt);
483 		break;
484 
485 	default:
486 		senderr(EOPNOTSUPP);
487 	}
488 
489 flush:
490 	if (rtm) {
491 		if (error)
492 			rtm->rtm_errno = error;
493 		else
494 			rtm->rtm_flags |= RTF_DONE;
495 	}
496 	if (rt)		/* XXX can this be true? */
497 		RTFREE(rt);
498     {
499 	register struct rawcb *rp = 0;
500 	/*
501 	 * Check to see if we don't want our own messages.
502 	 */
503 	if ((so->so_options & SO_USELOOPBACK) == 0) {
504 		if (route_cb.any_count <= 1) {
505 			if (rtm)
506 				Free(rtm);
507 			m_freem(m);
508 			return (error);
509 		}
510 		/* There is another listener, so construct message */
511 		rp = sotorawcb(so);
512 	}
513 	if (rtm) {
514 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
515 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
516 			m_freem(m);
517 			m = NULL;
518 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
519 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
520 		Free(rtm);
521 	}
522 	if (m) {
523 		if (rp) {
524 			/*
525 			 * XXX insure we don't get a copy by
526 			 * invalidating our protocol
527 			 */
528 			unsigned short family = rp->rcb_proto.sp_family;
529 			rp->rcb_proto.sp_family = 0;
530 			rt_dispatch(m, info.rti_info[RTAX_DST]);
531 			rp->rcb_proto.sp_family = family;
532 		} else
533 			rt_dispatch(m, info.rti_info[RTAX_DST]);
534 	}
535     }
536 	return (error);
537 #undef	sa_equal
538 }
539 
540 static void
541 rt_setmetrics(u_long which, struct rt_metrics *in, struct rt_metrics_lite *out)
542 {
543 #define metric(f, e) if (which & (f)) out->e = in->e;
544 	/*
545 	 * Only these are stored in the routing entry since introduction
546 	 * of tcp hostcache. The rest is ignored.
547 	 */
548 	metric(RTV_MTU, rmx_mtu);
549 	metric(RTV_EXPIRE, rmx_expire);
550 #undef metric
551 }
552 
553 static void
554 rt_getmetrics(struct rt_metrics_lite *in, struct rt_metrics *out)
555 {
556 #define metric(e) out->e = in->e;
557 	bzero(out, sizeof(*out));
558 	metric(rmx_mtu);
559 	metric(rmx_expire);
560 #undef metric
561 }
562 
563 #define ROUNDUP(a) \
564 	((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
565 
566 /*
567  * Extract the addresses of the passed sockaddrs.
568  * Do a little sanity checking so as to avoid bad memory references.
569  * This data is derived straight from userland.
570  */
571 static int
572 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
573 {
574 #define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
575 	register struct sockaddr *sa;
576 	register int i;
577 
578 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
579 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
580 			continue;
581 		sa = (struct sockaddr *)cp;
582 		/*
583 		 * It won't fit.
584 		 */
585 		if (cp + sa->sa_len > cplim)
586 			return (EINVAL);
587 		/*
588 		 * there are no more.. quit now
589 		 * If there are more bits, they are in error.
590 		 * I've seen this. route(1) can evidently generate these.
591 		 * This causes kernel to core dump.
592 		 * for compatibility, If we see this, point to a safe address.
593 		 */
594 		if (sa->sa_len == 0) {
595 			rtinfo->rti_info[i] = &sa_zero;
596 			return (0); /* should be EINVAL but for compat */
597 		}
598 		/* accept it */
599 		rtinfo->rti_info[i] = sa;
600 		ADVANCE(cp, sa);
601 	}
602 	return (0);
603 #undef ADVANCE
604 }
605 
606 static struct mbuf *
607 rt_msg1(int type, struct rt_addrinfo *rtinfo)
608 {
609 	register struct rt_msghdr *rtm;
610 	register struct mbuf *m;
611 	register int i;
612 	register struct sockaddr *sa;
613 	int len, dlen;
614 
615 	switch (type) {
616 
617 	case RTM_DELADDR:
618 	case RTM_NEWADDR:
619 		len = sizeof(struct ifa_msghdr);
620 		break;
621 
622 	case RTM_DELMADDR:
623 	case RTM_NEWMADDR:
624 		len = sizeof(struct ifma_msghdr);
625 		break;
626 
627 	case RTM_IFINFO:
628 		len = sizeof(struct if_msghdr);
629 		break;
630 
631 	case RTM_IFANNOUNCE:
632 		len = sizeof(struct if_announcemsghdr);
633 		break;
634 
635 	default:
636 		len = sizeof(struct rt_msghdr);
637 	}
638 	if (len > MCLBYTES)
639 		panic("rt_msg1");
640 	m = m_gethdr(M_DONTWAIT, MT_DATA);
641 	if (m && len > MHLEN) {
642 		MCLGET(m, M_DONTWAIT);
643 		if ((m->m_flags & M_EXT) == 0) {
644 			m_free(m);
645 			m = NULL;
646 		}
647 	}
648 	if (m == 0)
649 		return (m);
650 	m->m_pkthdr.len = m->m_len = len;
651 	m->m_pkthdr.rcvif = 0;
652 	rtm = mtod(m, struct rt_msghdr *);
653 	bzero((caddr_t)rtm, len);
654 	for (i = 0; i < RTAX_MAX; i++) {
655 		if ((sa = rtinfo->rti_info[i]) == NULL)
656 			continue;
657 		rtinfo->rti_addrs |= (1 << i);
658 		dlen = ROUNDUP(sa->sa_len);
659 		m_copyback(m, len, dlen, (caddr_t)sa);
660 		len += dlen;
661 	}
662 	if (m->m_pkthdr.len != len) {
663 		m_freem(m);
664 		return (NULL);
665 	}
666 	rtm->rtm_msglen = len;
667 	rtm->rtm_version = RTM_VERSION;
668 	rtm->rtm_type = type;
669 	return (m);
670 }
671 
672 static int
673 rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
674 {
675 	register int i;
676 	int len, dlen, second_time = 0;
677 	caddr_t cp0;
678 
679 	rtinfo->rti_addrs = 0;
680 again:
681 	switch (type) {
682 
683 	case RTM_DELADDR:
684 	case RTM_NEWADDR:
685 		len = sizeof(struct ifa_msghdr);
686 		break;
687 
688 	case RTM_IFINFO:
689 		len = sizeof(struct if_msghdr);
690 		break;
691 
692 	case RTM_NEWMADDR:
693 		len = sizeof(struct ifma_msghdr);
694 		break;
695 
696 	default:
697 		len = sizeof(struct rt_msghdr);
698 	}
699 	cp0 = cp;
700 	if (cp0)
701 		cp += len;
702 	for (i = 0; i < RTAX_MAX; i++) {
703 		register struct sockaddr *sa;
704 
705 		if ((sa = rtinfo->rti_info[i]) == 0)
706 			continue;
707 		rtinfo->rti_addrs |= (1 << i);
708 		dlen = ROUNDUP(sa->sa_len);
709 		if (cp) {
710 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
711 			cp += dlen;
712 		}
713 		len += dlen;
714 	}
715 	len = ALIGN(len);
716 	if (cp == 0 && w != NULL && !second_time) {
717 		register struct walkarg *rw = w;
718 
719 		if (rw->w_req) {
720 			if (rw->w_tmemsize < len) {
721 				if (rw->w_tmem)
722 					free(rw->w_tmem, M_RTABLE);
723 				rw->w_tmem = (caddr_t)
724 					malloc(len, M_RTABLE, M_NOWAIT);
725 				if (rw->w_tmem)
726 					rw->w_tmemsize = len;
727 			}
728 			if (rw->w_tmem) {
729 				cp = rw->w_tmem;
730 				second_time = 1;
731 				goto again;
732 			}
733 		}
734 	}
735 	if (cp) {
736 		register struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
737 
738 		rtm->rtm_version = RTM_VERSION;
739 		rtm->rtm_type = type;
740 		rtm->rtm_msglen = len;
741 	}
742 	return (len);
743 }
744 
745 /*
746  * This routine is called to generate a message from the routing
747  * socket indicating that a redirect has occured, a routing lookup
748  * has failed, or that a protocol has detected timeouts to a particular
749  * destination.
750  */
751 void
752 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
753 {
754 	struct rt_msghdr *rtm;
755 	struct mbuf *m;
756 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
757 
758 	if (route_cb.any_count == 0)
759 		return;
760 	m = rt_msg1(type, rtinfo);
761 	if (m == 0)
762 		return;
763 	rtm = mtod(m, struct rt_msghdr *);
764 	rtm->rtm_flags = RTF_DONE | flags;
765 	rtm->rtm_errno = error;
766 	rtm->rtm_addrs = rtinfo->rti_addrs;
767 	rt_dispatch(m, sa);
768 }
769 
770 /*
771  * This routine is called to generate a message from the routing
772  * socket indicating that the status of a network interface has changed.
773  */
774 void
775 rt_ifmsg(struct ifnet *ifp)
776 {
777 	struct if_msghdr *ifm;
778 	struct mbuf *m;
779 	struct rt_addrinfo info;
780 
781 	if (route_cb.any_count == 0)
782 		return;
783 	bzero((caddr_t)&info, sizeof(info));
784 	m = rt_msg1(RTM_IFINFO, &info);
785 	if (m == 0)
786 		return;
787 	ifm = mtod(m, struct if_msghdr *);
788 	ifm->ifm_index = ifp->if_index;
789 	ifm->ifm_flags = ifp->if_flags;
790 	ifm->ifm_data = ifp->if_data;
791 	ifm->ifm_addrs = 0;
792 	rt_dispatch(m, NULL);
793 }
794 
795 /*
796  * This is called to generate messages from the routing socket
797  * indicating a network interface has had addresses associated with it.
798  * if we ever reverse the logic and replace messages TO the routing
799  * socket indicate a request to configure interfaces, then it will
800  * be unnecessary as the routing socket will automatically generate
801  * copies of it.
802  */
803 void
804 rt_newaddrmsg(int cmd, struct ifaddr *ifa, int error, struct rtentry *rt)
805 {
806 	struct rt_addrinfo info;
807 	struct sockaddr *sa = 0;
808 	int pass;
809 	struct mbuf *m = 0;
810 	struct ifnet *ifp = ifa->ifa_ifp;
811 
812 	if (route_cb.any_count == 0)
813 		return;
814 	for (pass = 1; pass < 3; pass++) {
815 		bzero((caddr_t)&info, sizeof(info));
816 		if ((cmd == RTM_ADD && pass == 1) ||
817 		    (cmd == RTM_DELETE && pass == 2)) {
818 			register struct ifa_msghdr *ifam;
819 			int ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
820 
821 			info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
822 			info.rti_info[RTAX_IFP] = TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
823 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
824 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
825 			if ((m = rt_msg1(ncmd, &info)) == NULL)
826 				continue;
827 			ifam = mtod(m, struct ifa_msghdr *);
828 			ifam->ifam_index = ifp->if_index;
829 			ifam->ifam_metric = ifa->ifa_metric;
830 			ifam->ifam_flags = ifa->ifa_flags;
831 			ifam->ifam_addrs = info.rti_addrs;
832 		}
833 		if ((cmd == RTM_ADD && pass == 2) ||
834 		    (cmd == RTM_DELETE && pass == 1)) {
835 			register struct rt_msghdr *rtm;
836 
837 			if (rt == 0)
838 				continue;
839 			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
840 			info.rti_info[RTAX_DST] = sa = rt_key(rt);
841 			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
842 			if ((m = rt_msg1(cmd, &info)) == NULL)
843 				continue;
844 			rtm = mtod(m, struct rt_msghdr *);
845 			rtm->rtm_index = ifp->if_index;
846 			rtm->rtm_flags |= rt->rt_flags;
847 			rtm->rtm_errno = error;
848 			rtm->rtm_addrs = info.rti_addrs;
849 		}
850 		rt_dispatch(m, sa);
851 	}
852 }
853 
854 /*
855  * This is the analogue to the rt_newaddrmsg which performs the same
856  * function but for multicast group memberhips.  This is easier since
857  * there is no route state to worry about.
858  */
859 void
860 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
861 {
862 	struct rt_addrinfo info;
863 	struct mbuf *m = 0;
864 	struct ifnet *ifp = ifma->ifma_ifp;
865 	struct ifma_msghdr *ifmam;
866 
867 	if (route_cb.any_count == 0)
868 		return;
869 
870 	bzero((caddr_t)&info, sizeof(info));
871 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
872 	if (ifp && TAILQ_FIRST(&ifp->if_addrhead))
873 		info.rti_info[RTAX_IFP] =
874 			TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
875 	else
876 		info.rti_info[RTAX_IFP] = NULL;
877 	/*
878 	 * If a link-layer address is present, present it as a ``gateway''
879 	 * (similarly to how ARP entries, e.g., are presented).
880 	 */
881 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
882 	m = rt_msg1(cmd, &info);
883 	if (m == NULL)
884 		return;
885 	ifmam = mtod(m, struct ifma_msghdr *);
886 	ifmam->ifmam_index = ifp->if_index;
887 	ifmam->ifmam_addrs = info.rti_addrs;
888 	rt_dispatch(m, ifma->ifma_addr);
889 }
890 
891 /*
892  * This is called to generate routing socket messages indicating
893  * network interface arrival and departure.
894  */
895 void
896 rt_ifannouncemsg(struct ifnet *ifp, int what)
897 {
898 	struct if_announcemsghdr *ifan;
899 	struct mbuf *m;
900 	struct rt_addrinfo info;
901 
902 	if (route_cb.any_count == 0)
903 		return;
904 	bzero((caddr_t)&info, sizeof(info));
905 	m = rt_msg1(RTM_IFANNOUNCE, &info);
906 	if (m == NULL)
907 		return;
908 	ifan = mtod(m, struct if_announcemsghdr *);
909 	ifan->ifan_index = ifp->if_index;
910 	strlcpy(ifan->ifan_name, ifp->if_xname, sizeof(ifan->ifan_name));
911 	ifan->ifan_what = what;
912 	rt_dispatch(m, NULL);
913  }
914 
915 static void
916 rt_dispatch(struct mbuf *m, struct sockaddr *sa)
917 {
918 	struct sockproto route_proto;
919 
920 	route_proto.sp_family = PF_ROUTE;
921 	route_proto.sp_protocol = sa ?  sa->sa_family : 0;
922 	raw_input(m, &route_proto, &route_src, &route_dst);
923 }
924 
925 /*
926  * This is used in dumping the kernel table via sysctl().
927  */
928 static int
929 sysctl_dumpentry(struct radix_node *rn, void *vw)
930 {
931 	struct walkarg *w = vw;
932 	struct rtentry *rt = (struct rtentry *)rn;
933 	int error = 0, size;
934 	struct rt_addrinfo info;
935 
936 	if (w->w_op == NET_RT_FLAGS && !(rt->rt_flags & w->w_arg))
937 		return 0;
938 	bzero((caddr_t)&info, sizeof(info));
939 	info.rti_info[RTAX_DST] = rt_key(rt);
940 	info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
941 	info.rti_info[RTAX_NETMASK] = rt_mask(rt);
942 	info.rti_info[RTAX_GENMASK] = rt->rt_genmask;
943 	if (rt->rt_ifp) {
944 		info.rti_info[RTAX_IFP] =
945 			TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr;
946 		info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
947 		if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
948 			info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
949 	}
950 	size = rt_msg2(RTM_GET, &info, 0, w);
951 	if (w->w_req && w->w_tmem) {
952 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
953 
954 		rtm->rtm_flags = rt->rt_flags;
955 		rtm->rtm_use = rt->rt_rmx.rmx_pksent;
956 		rt_getmetrics(&rt->rt_rmx, &rtm->rtm_rmx);
957 		rtm->rtm_index = rt->rt_ifp->if_index;
958 		rtm->rtm_errno = rtm->rtm_pid = rtm->rtm_seq = 0;
959 		rtm->rtm_addrs = info.rti_addrs;
960 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
961 		return (error);
962 	}
963 	return (error);
964 }
965 
966 static int
967 sysctl_iflist(int af, struct walkarg *w)
968 {
969 	struct ifnet *ifp;
970 	struct ifaddr *ifa;
971 	struct rt_addrinfo info;
972 	int len, error = 0;
973 
974 	bzero((caddr_t)&info, sizeof(info));
975 	/* IFNET_RLOCK(); */		/* could sleep XXX */
976 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
977 		if (w->w_arg && w->w_arg != ifp->if_index)
978 			continue;
979 		ifa = TAILQ_FIRST(&ifp->if_addrhead);
980 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
981 		len = rt_msg2(RTM_IFINFO, &info, (caddr_t)0, w);
982 		info.rti_info[RTAX_IFP] = 0;
983 		if (w->w_req && w->w_tmem) {
984 			struct if_msghdr *ifm;
985 
986 			ifm = (struct if_msghdr *)w->w_tmem;
987 			ifm->ifm_index = ifp->if_index;
988 			ifm->ifm_flags = ifp->if_flags;
989 			ifm->ifm_data = ifp->if_data;
990 			ifm->ifm_addrs = info.rti_addrs;
991 			error = SYSCTL_OUT(w->w_req,(caddr_t)ifm, len);
992 			if (error)
993 				goto done;
994 		}
995 		while ((ifa = TAILQ_NEXT(ifa, ifa_link)) != 0) {
996 			if (af && af != ifa->ifa_addr->sa_family)
997 				continue;
998 			if (jailed(curthread->td_ucred) &&
999 			    prison_if(curthread->td_ucred, ifa->ifa_addr))
1000 				continue;
1001 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
1002 			info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
1003 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
1004 			len = rt_msg2(RTM_NEWADDR, &info, 0, w);
1005 			if (w->w_req && w->w_tmem) {
1006 				struct ifa_msghdr *ifam;
1007 
1008 				ifam = (struct ifa_msghdr *)w->w_tmem;
1009 				ifam->ifam_index = ifa->ifa_ifp->if_index;
1010 				ifam->ifam_flags = ifa->ifa_flags;
1011 				ifam->ifam_metric = ifa->ifa_metric;
1012 				ifam->ifam_addrs = info.rti_addrs;
1013 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1014 				if (error)
1015 					goto done;
1016 			}
1017 		}
1018 		info.rti_info[RTAX_IFA] = info.rti_info[RTAX_NETMASK] =
1019 			info.rti_info[RTAX_BRD] = 0;
1020 	}
1021 done:
1022 	/* IFNET_RUNLOCK(); */ /* XXX */
1023 	return (error);
1024 }
1025 
1026 int
1027 sysctl_ifmalist(af, w)
1028 	int	af;
1029 	register struct	walkarg *w;
1030 {
1031 	register struct ifnet *ifp;
1032 	struct ifmultiaddr *ifma;
1033 	struct	rt_addrinfo info;
1034 	int	len, error = 0;
1035 
1036 	bzero((caddr_t)&info, sizeof(info));
1037 	/* IFNET_RLOCK(); */		/* could sleep XXX */
1038 	TAILQ_FOREACH(ifp, &ifnet, if_link) {
1039 		if (w->w_arg && w->w_arg != ifp->if_index)
1040 			continue;
1041 		TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
1042 			if (af && af != ifma->ifma_addr->sa_family)
1043 				continue;
1044 			if (jailed(curproc->p_ucred) &&
1045 			    prison_if(curproc->p_ucred, ifma->ifma_addr))
1046 				continue;
1047 			info.rti_addrs = RTA_IFA;
1048 			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
1049 			if (TAILQ_FIRST(&ifp->if_addrhead)) {
1050 				info.rti_addrs |= RTA_IFP;
1051 				info.rti_info[RTAX_IFP] =
1052 				    TAILQ_FIRST(&ifp->if_addrhead)->ifa_addr;
1053 			} else
1054 				info.rti_info[RTAX_IFP] = NULL;
1055 
1056 			if (ifma->ifma_addr->sa_family != AF_LINK) {
1057 				info.rti_addrs |= RTA_GATEWAY;
1058 				info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
1059 			} else
1060 				info.rti_info[RTAX_GATEWAY] = NULL;
1061 
1062 			len = rt_msg2(RTM_NEWMADDR, &info, 0, w);
1063 			if (w->w_req && w->w_tmem) {
1064 				register struct ifma_msghdr *ifmam;
1065 
1066 				ifmam = (struct ifma_msghdr *)w->w_tmem;
1067 				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
1068 				ifmam->ifmam_flags = 0;
1069 				ifmam->ifmam_addrs = info.rti_addrs;
1070 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
1071 				if (error)
1072 					goto done;
1073 			}
1074 		}
1075 	}
1076 done:
1077 	/* IFNET_RUNLOCK(); */ /* XXX */
1078 	return (error);
1079 }
1080 
1081 static int
1082 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
1083 {
1084 	int	*name = (int *)arg1;
1085 	u_int	namelen = arg2;
1086 	struct radix_node_head *rnh;
1087 	int	i, s, error = EINVAL;
1088 	u_char  af;
1089 	struct	walkarg w;
1090 
1091 	name ++;
1092 	namelen--;
1093 	if (req->newptr)
1094 		return (EPERM);
1095 	if (namelen != 3)
1096 		return ((namelen < 3) ? EISDIR : ENOTDIR);
1097 	af = name[0];
1098 	if (af > AF_MAX)
1099 		return (EINVAL);
1100 	Bzero(&w, sizeof(w));
1101 	w.w_op = name[1];
1102 	w.w_arg = name[2];
1103 	w.w_req = req;
1104 
1105 	s = splnet();
1106 	switch (w.w_op) {
1107 
1108 	case NET_RT_DUMP:
1109 	case NET_RT_FLAGS:
1110 		if (af != 0) {
1111 			if ((rnh = rt_tables[af]) != NULL) {
1112 				/* RADIX_NODE_HEAD_LOCK(rnh); */
1113 			    	error = rnh->rnh_walktree(rnh,
1114 				    sysctl_dumpentry, &w);/* could sleep XXX */
1115 				/* RADIX_NODE_HEAD_UNLOCK(rnh); */
1116 			} else
1117 				error = EAFNOSUPPORT;
1118 		} else {
1119 			for (i = 1; i <= AF_MAX; i++)
1120 				if ((rnh = rt_tables[i]) != NULL) {
1121 					/* RADIX_NODE_HEAD_LOCK(rnh); */
1122 					error = rnh->rnh_walktree(rnh,
1123 					    sysctl_dumpentry, &w);
1124 					/* RADIX_NODE_HEAD_UNLOCK(rnh); */
1125 					if (error)
1126 						break;
1127 				}
1128 		}
1129 		break;
1130 
1131 	case NET_RT_IFLIST:
1132 		error = sysctl_iflist(af, &w);
1133 		break;
1134 
1135 	case NET_RT_IFMALIST:
1136 		error = sysctl_ifmalist(af, &w);
1137 		break;
1138 	}
1139 	splx(s);
1140 	if (w.w_tmem)
1141 		free(w.w_tmem, M_RTABLE);
1142 	return (error);
1143 }
1144 
1145 SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD, sysctl_rtsock, "");
1146 
1147 /*
1148  * Definitions of protocols supported in the ROUTE domain.
1149  */
1150 
1151 extern struct domain routedomain;		/* or at least forward */
1152 
1153 static struct protosw routesw[] = {
1154 { SOCK_RAW,	&routedomain,	0,		PR_ATOMIC|PR_ADDR,
1155   0,		route_output,	raw_ctlinput,	0,
1156   0,
1157   raw_init,	0,		0,		0,
1158   &route_usrreqs
1159 }
1160 };
1161 
1162 static struct domain routedomain =
1163     { PF_ROUTE, "route", 0, 0, 0,
1164       routesw, &routesw[sizeof(routesw)/sizeof(routesw[0])] };
1165 
1166 DOMAIN_SET(route);
1167