xref: /freebsd/sys/net/rtsock.c (revision 349fcf079ca32d5c93e45366d2b27638747affeb)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1988, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 #include "opt_ddb.h"
32 #include "opt_route.h"
33 #include "opt_inet.h"
34 #include "opt_inet6.h"
35 
36 #include <sys/param.h>
37 #include <sys/jail.h>
38 #include <sys/kernel.h>
39 #include <sys/eventhandler.h>
40 #include <sys/domain.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/priv.h>
45 #include <sys/proc.h>
46 #include <sys/protosw.h>
47 #include <sys/rmlock.h>
48 #include <sys/rwlock.h>
49 #include <sys/signalvar.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 
55 #include <net/if.h>
56 #include <net/if_var.h>
57 #include <net/if_private.h>
58 #include <net/if_dl.h>
59 #include <net/if_llatbl.h>
60 #include <net/if_types.h>
61 #include <net/netisr.h>
62 #include <net/route.h>
63 #include <net/route/route_ctl.h>
64 #include <net/route/route_var.h>
65 #include <net/vnet.h>
66 
67 #include <netinet/in.h>
68 #include <netinet/if_ether.h>
69 #include <netinet/ip_carp.h>
70 #ifdef INET6
71 #include <netinet6/in6_var.h>
72 #include <netinet6/ip6_var.h>
73 #include <netinet6/scope6_var.h>
74 #endif
75 #include <net/route/nhop.h>
76 
77 #define	DEBUG_MOD_NAME	rtsock
78 #define	DEBUG_MAX_LEVEL	LOG_DEBUG
79 #include <net/route/route_debug.h>
80 _DECLARE_DEBUG(LOG_INFO);
81 
82 #ifdef COMPAT_FREEBSD32
83 #include <sys/mount.h>
84 #include <compat/freebsd32/freebsd32.h>
85 
86 struct if_msghdr32 {
87 	uint16_t ifm_msglen;
88 	uint8_t	ifm_version;
89 	uint8_t	ifm_type;
90 	int32_t	ifm_addrs;
91 	int32_t	ifm_flags;
92 	uint16_t ifm_index;
93 	uint16_t _ifm_spare1;
94 	struct	if_data ifm_data;
95 };
96 
97 struct if_msghdrl32 {
98 	uint16_t ifm_msglen;
99 	uint8_t	ifm_version;
100 	uint8_t	ifm_type;
101 	int32_t	ifm_addrs;
102 	int32_t	ifm_flags;
103 	uint16_t ifm_index;
104 	uint16_t _ifm_spare1;
105 	uint16_t ifm_len;
106 	uint16_t ifm_data_off;
107 	uint32_t _ifm_spare2;
108 	struct	if_data ifm_data;
109 };
110 
111 struct ifa_msghdrl32 {
112 	uint16_t ifam_msglen;
113 	uint8_t	ifam_version;
114 	uint8_t	ifam_type;
115 	int32_t	ifam_addrs;
116 	int32_t	ifam_flags;
117 	uint16_t ifam_index;
118 	uint16_t _ifam_spare1;
119 	uint16_t ifam_len;
120 	uint16_t ifam_data_off;
121 	int32_t	ifam_metric;
122 	struct	if_data ifam_data;
123 };
124 
125 #define SA_SIZE32(sa)						\
126     (  (((struct sockaddr *)(sa))->sa_len == 0) ?		\
127 	sizeof(int)		:				\
128 	1 + ( (((struct sockaddr *)(sa))->sa_len - 1) | (sizeof(int) - 1) ) )
129 
130 #endif /* COMPAT_FREEBSD32 */
131 
132 struct linear_buffer {
133 	char		*base;	/* Base allocated memory pointer */
134 	uint32_t	offset;	/* Currently used offset */
135 	uint32_t	size;	/* Total buffer size */
136 };
137 #define	SCRATCH_BUFFER_SIZE	1024
138 
139 #define	RTS_PID_LOG(_l, _fmt, ...)					\
140 	RT_LOG_##_l(_l, "PID %d: " _fmt, curproc ? curproc->p_pid : 0,	\
141 	    ## __VA_ARGS__)
142 
143 MALLOC_DEFINE(M_RTABLE, "routetbl", "routing tables");
144 
145 /* NB: these are not modified */
146 static struct	sockaddr route_src = { 2, PF_ROUTE, };
147 static struct	sockaddr sa_zero   = { sizeof(sa_zero), AF_INET, };
148 
149 /* These are external hooks for CARP. */
150 int	(*carp_get_vhid_p)(struct ifaddr *);
151 
152 /*
153  * Used by rtsock callback code to decide whether to filter the update
154  * notification to a socket bound to a particular FIB.
155  */
156 #define	RTS_FILTER_FIB	M_PROTO8
157 /*
158  * Used to store address family of the notification.
159  */
160 #define	m_rtsock_family	m_pkthdr.PH_loc.eight[0]
161 
162 struct rcb {
163 	LIST_ENTRY(rcb) list;
164 	struct socket	*rcb_socket;
165 	sa_family_t	rcb_family;
166 };
167 
168 typedef struct {
169 	LIST_HEAD(, rcb)	cblist;
170 	int	ip_count;	/* attached w/ AF_INET */
171 	int	ip6_count;	/* attached w/ AF_INET6 */
172 	int	any_count;	/* total attached */
173 } route_cb_t;
174 VNET_DEFINE_STATIC(route_cb_t, route_cb);
175 #define	V_route_cb VNET(route_cb)
176 
177 struct mtx rtsock_mtx;
178 MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
179 
180 #define	RTSOCK_LOCK()	mtx_lock(&rtsock_mtx)
181 #define	RTSOCK_UNLOCK()	mtx_unlock(&rtsock_mtx)
182 #define	RTSOCK_LOCK_ASSERT()	mtx_assert(&rtsock_mtx, MA_OWNED)
183 
184 SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
185 
186 struct walkarg {
187 	int	family;
188 	int	w_tmemsize;
189 	int	w_op, w_arg;
190 	caddr_t	w_tmem;
191 	struct sysctl_req *w_req;
192 	struct sockaddr *dst;
193 	struct sockaddr *mask;
194 };
195 
196 static void	rts_input(struct mbuf *m);
197 static struct mbuf *rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo);
198 static int	rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
199 			struct walkarg *w, int *plen);
200 static int	rt_xaddrs(caddr_t cp, caddr_t cplim,
201 			struct rt_addrinfo *rtinfo);
202 static int	cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb);
203 static int	sysctl_dumpentry(struct rtentry *rt, void *vw);
204 static int	sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh,
205 			uint32_t weight, struct walkarg *w);
206 static int	sysctl_iflist(int af, struct walkarg *w);
207 static int	sysctl_ifmalist(int af, struct walkarg *w);
208 static void	rt_getmetrics(const struct rtentry *rt,
209 			const struct nhop_object *nh, struct rt_metrics *out);
210 static void	rt_dispatch(struct mbuf *, sa_family_t);
211 static void	rt_ifannouncemsg(struct ifnet *, int, const char *);
212 static int	handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
213 			struct rt_msghdr *rtm, struct rib_cmd_info *rc);
214 static int	update_rtm_from_rc(struct rt_addrinfo *info,
215 			struct rt_msghdr **prtm, int alloc_len,
216 			struct rib_cmd_info *rc, struct nhop_object *nh);
217 static void	send_rtm_reply(struct socket *so, struct rt_msghdr *rtm,
218 			struct mbuf *m, sa_family_t saf, u_int fibnum,
219 			int rtm_errno);
220 static void	rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc);
221 static void	rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask);
222 
223 static struct netisr_handler rtsock_nh = {
224 	.nh_name = "rtsock",
225 	.nh_handler = rts_input,
226 	.nh_proto = NETISR_ROUTE,
227 	.nh_policy = NETISR_POLICY_SOURCE,
228 };
229 
230 static int
sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)231 sysctl_route_netisr_maxqlen(SYSCTL_HANDLER_ARGS)
232 {
233 	int error, qlimit;
234 
235 	netisr_getqlimit(&rtsock_nh, &qlimit);
236 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
237         if (error || !req->newptr)
238                 return (error);
239 	if (qlimit < 1)
240 		return (EINVAL);
241 	return (netisr_setqlimit(&rtsock_nh, qlimit));
242 }
243 SYSCTL_PROC(_net_route, OID_AUTO, netisr_maxqlen,
244     CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
245     0, 0, sysctl_route_netisr_maxqlen, "I",
246     "maximum routing socket dispatch queue length");
247 
248 static void
vnet_rts_init(void)249 vnet_rts_init(void)
250 {
251 	int tmp;
252 
253 	if (IS_DEFAULT_VNET(curvnet)) {
254 		if (TUNABLE_INT_FETCH("net.route.netisr_maxqlen", &tmp))
255 			rtsock_nh.nh_qlimit = tmp;
256 		netisr_register(&rtsock_nh);
257 	}
258 #ifdef VIMAGE
259 	 else
260 		netisr_register_vnet(&rtsock_nh);
261 #endif
262 }
263 VNET_SYSINIT(vnet_rtsock, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
264     vnet_rts_init, NULL);
265 
266 #ifdef VIMAGE
267 static void
vnet_rts_uninit(void)268 vnet_rts_uninit(void)
269 {
270 
271 	netisr_unregister_vnet(&rtsock_nh);
272 }
273 VNET_SYSUNINIT(vnet_rts_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
274     vnet_rts_uninit, NULL);
275 #endif
276 
277 static void
report_route_event(const struct rib_cmd_info * rc,void * _cbdata)278 report_route_event(const struct rib_cmd_info *rc, void *_cbdata)
279 {
280 	uint32_t fibnum = (uint32_t)(uintptr_t)_cbdata;
281 	struct nhop_object *nh;
282 
283 	nh = rc->rc_cmd == RTM_DELETE ? rc->rc_nh_old : rc->rc_nh_new;
284 	rt_routemsg(rc->rc_cmd, rc->rc_rt, nh, fibnum);
285 }
286 
287 static void
rts_handle_route_event(uint32_t fibnum,const struct rib_cmd_info * rc)288 rts_handle_route_event(uint32_t fibnum, const struct rib_cmd_info *rc)
289 {
290 #ifdef ROUTE_MPATH
291 	if ((rc->rc_nh_new && NH_IS_NHGRP(rc->rc_nh_new)) ||
292 	    (rc->rc_nh_old && NH_IS_NHGRP(rc->rc_nh_old))) {
293 		rib_decompose_notification(rc, report_route_event,
294 		    (void *)(uintptr_t)fibnum);
295 	} else
296 #endif
297 		report_route_event(rc, (void *)(uintptr_t)fibnum);
298 }
299 static struct rtbridge rtsbridge = {
300 	.route_f = rts_handle_route_event,
301 	.ifmsg_f = rtsock_ifmsg,
302 };
303 static struct rtbridge *rtsbridge_orig_p;
304 
305 static void
rtsock_notify_event(uint32_t fibnum,const struct rib_cmd_info * rc)306 rtsock_notify_event(uint32_t fibnum, const struct rib_cmd_info *rc)
307 {
308 	netlink_callback_p->route_f(fibnum, rc);
309 }
310 
311 static void
rtsock_init(void * dummy __unused)312 rtsock_init(void *dummy __unused)
313 {
314 	rtsbridge_orig_p = rtsock_callback_p;
315 	rtsock_callback_p = &rtsbridge;
316 }
317 SYSINIT(rtsock_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, rtsock_init, NULL);
318 
319 static void
rts_ifnet_attached(void * arg __unused,struct ifnet * ifp)320 rts_ifnet_attached(void *arg __unused, struct ifnet *ifp)
321 {
322 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL, NULL);
323 }
324 EVENTHANDLER_DEFINE(ifnet_attached_event, rts_ifnet_attached, NULL, 0);
325 
326 static void
rts_handle_ifnet_departure(void * arg __unused,struct ifnet * ifp)327 rts_handle_ifnet_departure(void *arg __unused, struct ifnet *ifp)
328 {
329 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE, NULL);
330 }
331 EVENTHANDLER_DEFINE(ifnet_departure_event, rts_handle_ifnet_departure, NULL, 0);
332 
333 static void
rts_handle_ifnet_rename(void * arg __unused,struct ifnet * ifp,const char * old_name)334 rts_handle_ifnet_rename(void *arg __unused, struct ifnet *ifp,
335     const char *old_name)
336 {
337 	rt_ifannouncemsg(ifp, IFAN_DEPARTURE, old_name);
338 	rt_ifannouncemsg(ifp, IFAN_ARRIVAL, NULL);
339 }
340 EVENTHANDLER_DEFINE(ifnet_rename_event, rts_handle_ifnet_rename, NULL, 0);
341 
342 static void
rts_append_data(struct socket * so,struct mbuf * m)343 rts_append_data(struct socket *so, struct mbuf *m)
344 {
345 
346 	if (sbappendaddr(&so->so_rcv, &route_src, m, NULL) == 0) {
347 		soroverflow(so);
348 		m_freem(m);
349 	} else
350 		sorwakeup(so);
351 }
352 
353 static void
rts_input(struct mbuf * m)354 rts_input(struct mbuf *m)
355 {
356 	struct rcb *rcb;
357 	struct socket *last;
358 
359 	last = NULL;
360 	RTSOCK_LOCK();
361 	LIST_FOREACH(rcb, &V_route_cb.cblist, list) {
362 		if (rcb->rcb_family != AF_UNSPEC &&
363 		    rcb->rcb_family != m->m_rtsock_family)
364 			continue;
365 		if ((m->m_flags & RTS_FILTER_FIB) &&
366 		    M_GETFIB(m) != rcb->rcb_socket->so_fibnum)
367 			continue;
368 		if (last != NULL) {
369 			struct mbuf *n;
370 
371 			n = m_copym(m, 0, M_COPYALL, M_NOWAIT);
372 			if (n != NULL)
373 				rts_append_data(last, n);
374 		}
375 		last = rcb->rcb_socket;
376 	}
377 	if (last != NULL)
378 		rts_append_data(last, m);
379 	else
380 		m_freem(m);
381 	RTSOCK_UNLOCK();
382 }
383 
384 static void
rts_close(struct socket * so)385 rts_close(struct socket *so)
386 {
387 
388 	soisdisconnected(so);
389 }
390 
391 static SYSCTL_NODE(_net, OID_AUTO, rtsock, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
392     "Routing socket infrastructure");
393 static u_long rts_sendspace = 8192;
394 SYSCTL_ULONG(_net_rtsock, OID_AUTO, sendspace, CTLFLAG_RW, &rts_sendspace, 0,
395     "Default routing socket send space");
396 static u_long rts_recvspace = 8192;
397 SYSCTL_ULONG(_net_rtsock, OID_AUTO, recvspace, CTLFLAG_RW, &rts_recvspace, 0,
398     "Default routing socket receive space");
399 
400 static int
rts_attach(struct socket * so,int proto,struct thread * td)401 rts_attach(struct socket *so, int proto, struct thread *td)
402 {
403 	struct rcb *rcb;
404 	int error;
405 
406 	error = soreserve(so, rts_sendspace, rts_recvspace);
407 	if (error)
408 		return (error);
409 
410 	rcb = malloc(sizeof(*rcb), M_PCB, M_WAITOK);
411 	rcb->rcb_socket = so;
412 	rcb->rcb_family = proto;
413 
414 	so->so_pcb = rcb;
415 	so->so_fibnum = td->td_proc->p_fibnum;
416 	so->so_options |= SO_USELOOPBACK;
417 
418 	RTSOCK_LOCK();
419 	LIST_INSERT_HEAD(&V_route_cb.cblist, rcb, list);
420 	switch (proto) {
421 	case AF_INET:
422 		V_route_cb.ip_count++;
423 		break;
424 	case AF_INET6:
425 		V_route_cb.ip6_count++;
426 		break;
427 	}
428 	V_route_cb.any_count++;
429 	RTSOCK_UNLOCK();
430 	soisconnected(so);
431 
432 	return (0);
433 }
434 
435 static int
rts_ctloutput(struct socket * so,struct sockopt * sopt)436 rts_ctloutput(struct socket *so, struct sockopt *sopt)
437 {
438 	int error, optval;
439 
440 	error = ENOPROTOOPT;
441 	if (sopt->sopt_dir == SOPT_SET) {
442 		switch (sopt->sopt_level) {
443 		case SOL_SOCKET:
444 			switch (sopt->sopt_name) {
445 			case SO_SETFIB:
446 				error = sooptcopyin(sopt, &optval,
447 				    sizeof(optval), sizeof(optval));
448 				if (error != 0)
449 					break;
450 				error = sosetfib(so, optval);
451 				break;
452 			}
453 			break;
454 		}
455 	}
456 	return (error);
457 }
458 
459 static void
rts_detach(struct socket * so)460 rts_detach(struct socket *so)
461 {
462 	struct rcb *rcb = so->so_pcb;
463 
464 	RTSOCK_LOCK();
465 	LIST_REMOVE(rcb, list);
466 	switch(rcb->rcb_family) {
467 	case AF_INET:
468 		V_route_cb.ip_count--;
469 		break;
470 	case AF_INET6:
471 		V_route_cb.ip6_count--;
472 		break;
473 	}
474 	V_route_cb.any_count--;
475 	RTSOCK_UNLOCK();
476 	free(rcb, M_PCB);
477 	so->so_pcb = NULL;
478 }
479 
480 static int
rts_disconnect(struct socket * so)481 rts_disconnect(struct socket *so)
482 {
483 
484 	return (ENOTCONN);
485 }
486 
487 static int
rts_shutdown(struct socket * so,enum shutdown_how how)488 rts_shutdown(struct socket *so, enum shutdown_how how)
489 {
490 	/*
491 	 * Note: route socket marks itself as connected through its lifetime.
492 	 */
493 	switch (how) {
494 	case SHUT_RD:
495 		sorflush(so);
496 		break;
497 	case SHUT_RDWR:
498 		sorflush(so);
499 		/* FALLTHROUGH */
500 	case SHUT_WR:
501 		socantsendmore(so);
502 	}
503 
504 	return (0);
505 }
506 
507 #ifndef _SOCKADDR_UNION_DEFINED
508 #define	_SOCKADDR_UNION_DEFINED
509 /*
510  * The union of all possible address formats we handle.
511  */
512 union sockaddr_union {
513 	struct sockaddr		sa;
514 	struct sockaddr_in	sin;
515 	struct sockaddr_in6	sin6;
516 };
517 #endif /* _SOCKADDR_UNION_DEFINED */
518 
519 static int
rtm_get_jailed(struct rt_addrinfo * info,struct ifnet * ifp,struct nhop_object * nh,union sockaddr_union * saun,struct ucred * cred)520 rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
521     struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred)
522 {
523 #if defined(INET) || defined(INET6)
524 	struct epoch_tracker et;
525 #endif
526 
527 	/* First, see if the returned address is part of the jail. */
528 	if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) {
529 		info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
530 		return (0);
531 	}
532 
533 	switch (info->rti_info[RTAX_DST]->sa_family) {
534 #ifdef INET
535 	case AF_INET:
536 	{
537 		struct in_addr ia;
538 		struct ifaddr *ifa;
539 		int found;
540 
541 		found = 0;
542 		/*
543 		 * Try to find an address on the given outgoing interface
544 		 * that belongs to the jail.
545 		 */
546 		NET_EPOCH_ENTER(et);
547 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
548 			struct sockaddr *sa;
549 			sa = ifa->ifa_addr;
550 			if (sa->sa_family != AF_INET)
551 				continue;
552 			ia = ((struct sockaddr_in *)sa)->sin_addr;
553 			if (prison_check_ip4(cred, &ia) == 0) {
554 				found = 1;
555 				break;
556 			}
557 		}
558 		NET_EPOCH_EXIT(et);
559 		if (!found) {
560 			/*
561 			 * As a last resort return the 'default' jail address.
562 			 */
563 			ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)->
564 			    sin_addr;
565 			if (prison_get_ip4(cred, &ia) != 0)
566 				return (ESRCH);
567 		}
568 		bzero(&saun->sin, sizeof(struct sockaddr_in));
569 		saun->sin.sin_len = sizeof(struct sockaddr_in);
570 		saun->sin.sin_family = AF_INET;
571 		saun->sin.sin_addr.s_addr = ia.s_addr;
572 		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin;
573 		break;
574 	}
575 #endif
576 #ifdef INET6
577 	case AF_INET6:
578 	{
579 		struct in6_addr ia6;
580 		struct ifaddr *ifa;
581 		int found;
582 
583 		found = 0;
584 		/*
585 		 * Try to find an address on the given outgoing interface
586 		 * that belongs to the jail.
587 		 */
588 		NET_EPOCH_ENTER(et);
589 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
590 			struct sockaddr *sa;
591 			sa = ifa->ifa_addr;
592 			if (sa->sa_family != AF_INET6)
593 				continue;
594 			bcopy(&((struct sockaddr_in6 *)sa)->sin6_addr,
595 			    &ia6, sizeof(struct in6_addr));
596 			if (prison_check_ip6(cred, &ia6) == 0) {
597 				found = 1;
598 				break;
599 			}
600 		}
601 		NET_EPOCH_EXIT(et);
602 		if (!found) {
603 			/*
604 			 * As a last resort return the 'default' jail address.
605 			 */
606 			ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)->
607 			    sin6_addr;
608 			if (prison_get_ip6(cred, &ia6) != 0)
609 				return (ESRCH);
610 		}
611 		bzero(&saun->sin6, sizeof(struct sockaddr_in6));
612 		saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
613 		saun->sin6.sin6_family = AF_INET6;
614 		bcopy(&ia6, &saun->sin6.sin6_addr, sizeof(struct in6_addr));
615 		if (sa6_recoverscope(&saun->sin6) != 0)
616 			return (ESRCH);
617 		info->rti_info[RTAX_IFA] = (struct sockaddr *)&saun->sin6;
618 		break;
619 	}
620 #endif
621 	default:
622 		return (ESRCH);
623 	}
624 	return (0);
625 }
626 
627 static int
fill_blackholeinfo(struct rt_addrinfo * info,union sockaddr_union * saun)628 fill_blackholeinfo(struct rt_addrinfo *info, union sockaddr_union *saun)
629 {
630 	struct ifaddr *ifa;
631 	sa_family_t saf;
632 
633 	if (V_loif == NULL) {
634 		RTS_PID_LOG(LOG_INFO, "Unable to add blackhole/reject nhop without loopback");
635 		return (ENOTSUP);
636 	}
637 	info->rti_ifp = V_loif;
638 
639 	saf = info->rti_info[RTAX_DST]->sa_family;
640 
641 	CK_STAILQ_FOREACH(ifa, &info->rti_ifp->if_addrhead, ifa_link) {
642 		if (ifa->ifa_addr->sa_family == saf) {
643 			info->rti_ifa = ifa;
644 			break;
645 		}
646 	}
647 	if (info->rti_ifa == NULL) {
648 		RTS_PID_LOG(LOG_INFO, "Unable to find ifa for blackhole/reject nhop");
649 		return (ENOTSUP);
650 	}
651 
652 	bzero(saun, sizeof(union sockaddr_union));
653 	switch (saf) {
654 #ifdef INET
655 	case AF_INET:
656 		saun->sin.sin_family = AF_INET;
657 		saun->sin.sin_len = sizeof(struct sockaddr_in);
658 		saun->sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
659 		break;
660 #endif
661 #ifdef INET6
662 	case AF_INET6:
663 		saun->sin6.sin6_family = AF_INET6;
664 		saun->sin6.sin6_len = sizeof(struct sockaddr_in6);
665 		saun->sin6.sin6_addr = in6addr_loopback;
666 		break;
667 #endif
668 	default:
669 		RTS_PID_LOG(LOG_INFO, "unsupported family: %d", saf);
670 		return (ENOTSUP);
671 	}
672 	info->rti_info[RTAX_GATEWAY] = &saun->sa;
673 	info->rti_flags |= RTF_GATEWAY;
674 
675 	return (0);
676 }
677 
678 /*
679  * Fills in @info based on userland-provided @rtm message.
680  *
681  * Returns 0 on success.
682  */
683 static int
fill_addrinfo(struct rt_msghdr * rtm,int len,struct linear_buffer * lb,u_int fibnum,struct rt_addrinfo * info)684 fill_addrinfo(struct rt_msghdr *rtm, int len, struct linear_buffer *lb, u_int fibnum,
685     struct rt_addrinfo *info)
686 {
687 	int error;
688 
689 	rtm->rtm_pid = curproc->p_pid;
690 	info->rti_addrs = rtm->rtm_addrs;
691 
692 	info->rti_mflags = rtm->rtm_inits;
693 	info->rti_rmx = &rtm->rtm_rmx;
694 
695 	/*
696 	 * rt_xaddrs() performs s6_addr[2] := sin6_scope_id for AF_INET6
697 	 * link-local address because rtrequest requires addresses with
698 	 * embedded scope id.
699 	 */
700 	if (rt_xaddrs((caddr_t)(rtm + 1), len + (caddr_t)rtm, info))
701 		return (EINVAL);
702 
703 	info->rti_flags = rtm->rtm_flags;
704 	error = cleanup_xaddrs(info, lb);
705 	if (error != 0)
706 		return (error);
707 	/*
708 	 * Verify that the caller has the appropriate privilege; RTM_GET
709 	 * is the only operation the non-superuser is allowed.
710 	 */
711 	if (rtm->rtm_type != RTM_GET) {
712 		error = priv_check(curthread, PRIV_NET_ROUTE);
713 		if (error != 0)
714 			return (error);
715 	}
716 
717 	/*
718 	 * The given gateway address may be an interface address.
719 	 * For example, issuing a "route change" command on a route
720 	 * entry that was created from a tunnel, and the gateway
721 	 * address given is the local end point. In this case the
722 	 * RTF_GATEWAY flag must be cleared or the destination will
723 	 * not be reachable even though there is no error message.
724 	 */
725 	if (info->rti_info[RTAX_GATEWAY] != NULL &&
726 	    info->rti_info[RTAX_GATEWAY]->sa_family != AF_LINK) {
727 		struct nhop_object *nh;
728 
729 		/*
730 		 * A host route through the loopback interface is
731 		 * installed for each interface address. In pre 8.0
732 		 * releases the interface address of a PPP link type
733 		 * is not reachable locally. This behavior is fixed as
734 		 * part of the new L2/L3 redesign and rewrite work. The
735 		 * signature of this interface address route is the
736 		 * AF_LINK sa_family type of the gateway, and the
737 		 * rt_ifp has the IFF_LOOPBACK flag set.
738 		 */
739 		nh = rib_lookup(fibnum, info->rti_info[RTAX_GATEWAY], NHR_NONE, 0);
740 		if (nh != NULL && nh->gw_sa.sa_family == AF_LINK &&
741 		    nh->nh_ifp->if_flags & IFF_LOOPBACK) {
742 				info->rti_flags &= ~RTF_GATEWAY;
743 				info->rti_flags |= RTF_GWFLAG_COMPAT;
744 		}
745 	}
746 
747 	return (0);
748 }
749 
750 static struct nhop_object *
select_nhop(struct nhop_object * nh,const struct sockaddr * gw)751 select_nhop(struct nhop_object *nh, const struct sockaddr *gw)
752 {
753 	if (!NH_IS_NHGRP(nh))
754 		return (nh);
755 #ifdef ROUTE_MPATH
756 	const struct weightened_nhop *wn;
757 	uint32_t num_nhops;
758 	wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
759 	if (gw == NULL)
760 		return (wn[0].nh);
761 	for (int i = 0; i < num_nhops; i++) {
762 		if (match_nhop_gw(wn[i].nh, gw))
763 			return (wn[i].nh);
764 	}
765 #endif
766 	return (NULL);
767 }
768 
769 /*
770  * Handles RTM_GET message from routing socket, returning matching rt.
771  *
772  * Returns:
773  * 0 on success, with locked and referenced matching rt in @rt_nrt
774  * errno of failure
775  */
776 static int
handle_rtm_get(struct rt_addrinfo * info,u_int fibnum,struct rt_msghdr * rtm,struct rib_cmd_info * rc)777 handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
778     struct rt_msghdr *rtm, struct rib_cmd_info *rc)
779 {
780 	RIB_RLOCK_TRACKER;
781 	struct rib_head *rnh;
782 	struct nhop_object *nh;
783 	sa_family_t saf;
784 
785 	saf = info->rti_info[RTAX_DST]->sa_family;
786 
787 	rnh = rt_tables_get_rnh(fibnum, saf);
788 	if (rnh == NULL)
789 		return (EAFNOSUPPORT);
790 
791 	RIB_RLOCK(rnh);
792 
793 	/*
794 	 * By (implicit) convention host route (one without netmask)
795 	 * means longest-prefix-match request and the route with netmask
796 	 * means exact-match lookup.
797 	 * As cleanup_xaddrs() cleans up info flags&addrs for the /32,/128
798 	 * prefixes, use original data to check for the netmask presence.
799 	 */
800 	if ((rtm->rtm_addrs & RTA_NETMASK) == 0) {
801 		/*
802 		 * Provide longest prefix match for
803 		 * address lookup (no mask).
804 		 * 'route -n get addr'
805 		 */
806 		rc->rc_rt = (struct rtentry *) rnh->rnh_matchaddr(
807 		    info->rti_info[RTAX_DST], &rnh->head);
808 	} else
809 		rc->rc_rt = (struct rtentry *) rnh->rnh_lookup(
810 		    info->rti_info[RTAX_DST],
811 		    info->rti_info[RTAX_NETMASK], &rnh->head);
812 
813 	if (rc->rc_rt == NULL) {
814 		RIB_RUNLOCK(rnh);
815 		return (ESRCH);
816 	}
817 
818 	nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
819 	if (nh == NULL) {
820 		RIB_RUNLOCK(rnh);
821 		return (ESRCH);
822 	}
823 	/*
824 	 * If performing proxied L2 entry insertion, and
825 	 * the actual PPP host entry is found, perform
826 	 * another search to retrieve the prefix route of
827 	 * the local end point of the PPP link.
828 	 * TODO: move this logic to userland.
829 	 */
830 	if (rtm->rtm_flags & RTF_ANNOUNCE) {
831 		struct sockaddr_storage laddr;
832 
833 		if (nh->nh_ifp != NULL &&
834 		    nh->nh_ifp->if_type == IFT_PROPVIRTUAL) {
835 			struct ifaddr *ifa;
836 
837 			ifa = ifa_ifwithnet(info->rti_info[RTAX_DST], 1,
838 					RT_ALL_FIBS);
839 			if (ifa != NULL)
840 				rt_maskedcopy(ifa->ifa_addr,
841 					      (struct sockaddr *)&laddr,
842 					      ifa->ifa_netmask);
843 		} else
844 			rt_maskedcopy(nh->nh_ifa->ifa_addr,
845 				      (struct sockaddr *)&laddr,
846 				      nh->nh_ifa->ifa_netmask);
847 		/*
848 		 * refactor rt and no lock operation necessary
849 		 */
850 		rc->rc_rt = (struct rtentry *)rnh->rnh_matchaddr(
851 		    (struct sockaddr *)&laddr, &rnh->head);
852 		if (rc->rc_rt == NULL) {
853 			RIB_RUNLOCK(rnh);
854 			return (ESRCH);
855 		}
856 		nh = select_nhop(rt_get_raw_nhop(rc->rc_rt), info->rti_info[RTAX_GATEWAY]);
857 		if (nh == NULL) {
858 			RIB_RUNLOCK(rnh);
859 			return (ESRCH);
860 		}
861 	}
862 	rc->rc_nh_new = nh;
863 	rc->rc_nh_weight = rc->rc_rt->rt_weight;
864 	RIB_RUNLOCK(rnh);
865 
866 	return (0);
867 }
868 
869 static void
init_sockaddrs_family(int family,struct sockaddr * dst,struct sockaddr * mask)870 init_sockaddrs_family(int family, struct sockaddr *dst, struct sockaddr *mask)
871 {
872 #ifdef INET
873 	if (family == AF_INET) {
874 		struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
875 		struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
876 
877 		bzero(dst4, sizeof(struct sockaddr_in));
878 		bzero(mask4, sizeof(struct sockaddr_in));
879 
880 		dst4->sin_family = AF_INET;
881 		dst4->sin_len = sizeof(struct sockaddr_in);
882 		mask4->sin_family = AF_INET;
883 		mask4->sin_len = sizeof(struct sockaddr_in);
884 	}
885 #endif
886 #ifdef INET6
887 	if (family == AF_INET6) {
888 		struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
889 		struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
890 
891 		bzero(dst6, sizeof(struct sockaddr_in6));
892 		bzero(mask6, sizeof(struct sockaddr_in6));
893 
894 		dst6->sin6_family = AF_INET6;
895 		dst6->sin6_len = sizeof(struct sockaddr_in6);
896 		mask6->sin6_family = AF_INET6;
897 		mask6->sin6_len = sizeof(struct sockaddr_in6);
898 	}
899 #endif
900 }
901 
902 static void
export_rtaddrs(const struct rtentry * rt,struct sockaddr * dst,struct sockaddr * mask)903 export_rtaddrs(const struct rtentry *rt, struct sockaddr *dst,
904     struct sockaddr *mask)
905 {
906 #ifdef INET
907 	if (dst->sa_family == AF_INET) {
908 		struct sockaddr_in *dst4 = (struct sockaddr_in *)dst;
909 		struct sockaddr_in *mask4 = (struct sockaddr_in *)mask;
910 		uint32_t scopeid = 0;
911 		rt_get_inet_prefix_pmask(rt, &dst4->sin_addr, &mask4->sin_addr,
912 		    &scopeid);
913 		return;
914 	}
915 #endif
916 #ifdef INET6
917 	if (dst->sa_family == AF_INET6) {
918 		struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst;
919 		struct sockaddr_in6 *mask6 = (struct sockaddr_in6 *)mask;
920 		uint32_t scopeid = 0;
921 		rt_get_inet6_prefix_pmask(rt, &dst6->sin6_addr,
922 		    &mask6->sin6_addr, &scopeid);
923 		dst6->sin6_scope_id = scopeid;
924 		return;
925 	}
926 #endif
927 }
928 
929 static int
update_rtm_from_info(struct rt_addrinfo * info,struct rt_msghdr ** prtm,int alloc_len)930 update_rtm_from_info(struct rt_addrinfo *info, struct rt_msghdr **prtm,
931     int alloc_len)
932 {
933 	struct rt_msghdr *rtm, *orig_rtm = NULL;
934 	struct walkarg w;
935 	int len;
936 
937 	rtm = *prtm;
938 	/* Check if we need to realloc storage */
939 	rtsock_msg_buffer(rtm->rtm_type, info, NULL, &len);
940 	if (len > alloc_len) {
941 		struct rt_msghdr *tmp_rtm;
942 
943 		tmp_rtm = malloc(len, M_TEMP, M_NOWAIT);
944 		if (tmp_rtm == NULL)
945 			return (ENOBUFS);
946 		bcopy(rtm, tmp_rtm, rtm->rtm_msglen);
947 		orig_rtm = rtm;
948 		rtm = tmp_rtm;
949 		alloc_len = len;
950 
951 		/*
952 		 * Delay freeing original rtm as info contains
953 		 * data referencing it.
954 		 */
955 	}
956 
957 	w = (struct walkarg ){
958 		.w_tmem = (caddr_t)rtm,
959 		.w_tmemsize = alloc_len,
960 	};
961 	rtsock_msg_buffer(rtm->rtm_type, info, &w, &len);
962 	rtm->rtm_addrs = info->rti_addrs;
963 
964 	if (orig_rtm != NULL)
965 		free(orig_rtm, M_TEMP);
966 	*prtm = rtm;
967 	return (0);
968 }
969 
970 
971 /*
972  * Update sockaddrs, flags, etc in @prtm based on @rc data.
973  * rtm can be reallocated.
974  *
975  * Returns 0 on success, along with pointer to (potentially reallocated)
976  *  rtm.
977  *
978  */
979 static int
update_rtm_from_rc(struct rt_addrinfo * info,struct rt_msghdr ** prtm,int alloc_len,struct rib_cmd_info * rc,struct nhop_object * nh)980 update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm,
981     int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh)
982 {
983 	union sockaddr_union saun;
984 	struct rt_msghdr *rtm;
985 	struct ifnet *ifp;
986 	int error;
987 
988 	rtm = *prtm;
989 	union sockaddr_union sa_dst, sa_mask;
990 	int family = info->rti_info[RTAX_DST]->sa_family;
991 	init_sockaddrs_family(family, &sa_dst.sa, &sa_mask.sa);
992 	export_rtaddrs(rc->rc_rt, &sa_dst.sa, &sa_mask.sa);
993 
994 	info->rti_info[RTAX_DST] = &sa_dst.sa;
995 	info->rti_info[RTAX_NETMASK] = rt_is_host(rc->rc_rt) ? NULL : &sa_mask.sa;
996 	info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
997 	info->rti_info[RTAX_GENMASK] = 0;
998 	ifp = nh->nh_ifp;
999 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
1000 		if (ifp) {
1001 			info->rti_info[RTAX_IFP] =
1002 			    ifp->if_addr->ifa_addr;
1003 			error = rtm_get_jailed(info, ifp, nh,
1004 			    &saun, curthread->td_ucred);
1005 			if (error != 0)
1006 				return (error);
1007 			if (ifp->if_flags & IFF_POINTOPOINT)
1008 				info->rti_info[RTAX_BRD] =
1009 				    nh->nh_ifa->ifa_dstaddr;
1010 			rtm->rtm_index = ifp->if_index;
1011 		} else {
1012 			info->rti_info[RTAX_IFP] = NULL;
1013 			info->rti_info[RTAX_IFA] = NULL;
1014 		}
1015 	} else if (ifp != NULL)
1016 		rtm->rtm_index = ifp->if_index;
1017 
1018 	if ((error = update_rtm_from_info(info, prtm, alloc_len)) != 0)
1019 		return (error);
1020 
1021 	rtm = *prtm;
1022 	rtm->rtm_flags = rc->rc_rt->rte_flags | nhop_get_rtflags(nh);
1023 	if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
1024 		rtm->rtm_flags = RTF_GATEWAY |
1025 			(rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
1026 	rt_getmetrics(rc->rc_rt, nh, &rtm->rtm_rmx);
1027 	rtm->rtm_rmx.rmx_weight = rc->rc_nh_weight;
1028 
1029 	return (0);
1030 }
1031 
1032 #ifdef ROUTE_MPATH
1033 static void
save_del_notification(const struct rib_cmd_info * rc,void * _cbdata)1034 save_del_notification(const struct rib_cmd_info *rc, void *_cbdata)
1035 {
1036 	struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
1037 
1038 	if (rc->rc_cmd == RTM_DELETE)
1039 		*rc_new = *rc;
1040 }
1041 
1042 static void
save_add_notification(const struct rib_cmd_info * rc,void * _cbdata)1043 save_add_notification(const struct rib_cmd_info *rc, void *_cbdata)
1044 {
1045 	struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
1046 
1047 	if (rc->rc_cmd == RTM_ADD)
1048 		*rc_new = *rc;
1049 }
1050 #endif
1051 
1052 #if defined(INET6) || defined(INET)
1053 static struct sockaddr *
alloc_sockaddr_aligned(struct linear_buffer * lb,int len)1054 alloc_sockaddr_aligned(struct linear_buffer *lb, int len)
1055 {
1056 	len = roundup2(len, sizeof(uint64_t));
1057 	if (lb->offset + len > lb->size)
1058 		return (NULL);
1059 	struct sockaddr *sa = (struct sockaddr *)(lb->base + lb->offset);
1060 	lb->offset += len;
1061 	return (sa);
1062 }
1063 #endif
1064 
1065 static int
rts_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct thread * td)1066 rts_send(struct socket *so, int flags, struct mbuf *m,
1067     struct sockaddr *nam, struct mbuf *control, struct thread *td)
1068 {
1069 	struct rt_msghdr *rtm = NULL;
1070 	struct rt_addrinfo info;
1071 	struct epoch_tracker et;
1072 #ifdef INET6
1073 	struct sockaddr_storage ss;
1074 	struct sockaddr_in6 *sin6;
1075 	int i, rti_need_deembed = 0;
1076 #endif
1077 	int alloc_len = 0, len, error = 0, fibnum;
1078 	sa_family_t saf = AF_UNSPEC;
1079 	struct rib_cmd_info rc;
1080 	struct nhop_object *nh;
1081 
1082 	if ((flags & PRUS_OOB) || control != NULL) {
1083 		m_freem(m);
1084 		if (control != NULL)
1085 			m_freem(control);
1086 		return (EOPNOTSUPP);
1087 	}
1088 
1089 	fibnum = so->so_fibnum;
1090 #define senderr(e) { error = e; goto flush;}
1091 	if (m == NULL || ((m->m_len < sizeof(long)) &&
1092 		       (m = m_pullup(m, sizeof(long))) == NULL))
1093 		return (ENOBUFS);
1094 	if ((m->m_flags & M_PKTHDR) == 0)
1095 		panic("route_output");
1096 	NET_EPOCH_ENTER(et);
1097 	len = m->m_pkthdr.len;
1098 	if (len < sizeof(*rtm) ||
1099 	    len != mtod(m, struct rt_msghdr *)->rtm_msglen)
1100 		senderr(EINVAL);
1101 
1102 	/*
1103 	 * Most of current messages are in range 200-240 bytes,
1104 	 * minimize possible re-allocation on reply using larger size
1105 	 * buffer aligned on 1k boundaty.
1106 	 */
1107 	alloc_len = roundup2(len, 1024);
1108 	int total_len = alloc_len + SCRATCH_BUFFER_SIZE;
1109 	if ((rtm = malloc(total_len, M_TEMP, M_NOWAIT)) == NULL)
1110 		senderr(ENOBUFS);
1111 
1112 	m_copydata(m, 0, len, (caddr_t)rtm);
1113 	bzero(&info, sizeof(info));
1114 	nh = NULL;
1115 	struct linear_buffer lb = {
1116 		.base = (char *)rtm + alloc_len,
1117 		.size = SCRATCH_BUFFER_SIZE,
1118 	};
1119 
1120 	if (rtm->rtm_version != RTM_VERSION) {
1121 		/* Do not touch message since format is unknown */
1122 		free(rtm, M_TEMP);
1123 		rtm = NULL;
1124 		senderr(EPROTONOSUPPORT);
1125 	}
1126 
1127 	/*
1128 	 * Starting from here, it is possible
1129 	 * to alter original message and insert
1130 	 * caller PID and error value.
1131 	 */
1132 
1133 	if ((error = fill_addrinfo(rtm, len, &lb, fibnum, &info)) != 0) {
1134 		senderr(error);
1135 	}
1136 	/* fill_addringo() embeds scope into IPv6 addresses */
1137 #ifdef INET6
1138 	rti_need_deembed = 1;
1139 #endif
1140 
1141 	saf = info.rti_info[RTAX_DST]->sa_family;
1142 
1143 	/* support for new ARP code */
1144 	if (rtm->rtm_flags & RTF_LLDATA) {
1145 		error = lla_rt_output(rtm, &info);
1146 		goto flush;
1147 	}
1148 
1149 	union sockaddr_union gw_saun;
1150 	int blackhole_flags = rtm->rtm_flags & (RTF_BLACKHOLE|RTF_REJECT);
1151 	if (blackhole_flags != 0) {
1152 		if (blackhole_flags != (RTF_BLACKHOLE | RTF_REJECT))
1153 			error = fill_blackholeinfo(&info, &gw_saun);
1154 		else {
1155 			RTS_PID_LOG(LOG_DEBUG, "both BLACKHOLE and REJECT flags specifiied");
1156 			error = EINVAL;
1157 		}
1158 		if (error != 0)
1159 			senderr(error);
1160 	}
1161 
1162 	switch (rtm->rtm_type) {
1163 	case RTM_ADD:
1164 	case RTM_CHANGE:
1165 		if (rtm->rtm_type == RTM_ADD) {
1166 			if (info.rti_info[RTAX_GATEWAY] == NULL) {
1167 				RTS_PID_LOG(LOG_DEBUG, "RTM_ADD w/o gateway");
1168 				senderr(EINVAL);
1169 			}
1170 		}
1171 		error = rib_action(fibnum, rtm->rtm_type, &info, &rc);
1172 		if (error == 0) {
1173 			rtsock_notify_event(fibnum, &rc);
1174 #ifdef ROUTE_MPATH
1175 			if (NH_IS_NHGRP(rc.rc_nh_new) ||
1176 			    (rc.rc_nh_old && NH_IS_NHGRP(rc.rc_nh_old))) {
1177 				struct rib_cmd_info rc_simple = {};
1178 				rib_decompose_notification(&rc,
1179 				    save_add_notification, (void *)&rc_simple);
1180 				rc = rc_simple;
1181 			}
1182 #endif
1183 			/* nh MAY be empty if RTM_CHANGE request is no-op */
1184 			nh = rc.rc_nh_new;
1185 			if (nh != NULL) {
1186 				rtm->rtm_index = nh->nh_ifp->if_index;
1187 				rtm->rtm_flags = rc.rc_rt->rte_flags | nhop_get_rtflags(nh);
1188 			}
1189 		}
1190 		break;
1191 
1192 	case RTM_DELETE:
1193 		error = rib_action(fibnum, RTM_DELETE, &info, &rc);
1194 		if (error == 0) {
1195 			rtsock_notify_event(fibnum, &rc);
1196 #ifdef ROUTE_MPATH
1197 			if (NH_IS_NHGRP(rc.rc_nh_old) ||
1198 			    (rc.rc_nh_new && NH_IS_NHGRP(rc.rc_nh_new))) {
1199 				struct rib_cmd_info rc_simple = {};
1200 				rib_decompose_notification(&rc,
1201 				    save_del_notification, (void *)&rc_simple);
1202 				rc = rc_simple;
1203 			}
1204 #endif
1205 			nh = rc.rc_nh_old;
1206 		}
1207 		break;
1208 
1209 	case RTM_GET:
1210 		error = handle_rtm_get(&info, fibnum, rtm, &rc);
1211 		if (error != 0)
1212 			senderr(error);
1213 		nh = rc.rc_nh_new;
1214 
1215 		if (!rt_is_exportable(rc.rc_rt, curthread->td_ucred))
1216 			senderr(ESRCH);
1217 		break;
1218 
1219 	default:
1220 		senderr(EOPNOTSUPP);
1221 	}
1222 
1223 	if (error == 0 && nh != NULL) {
1224 		error = update_rtm_from_rc(&info, &rtm, alloc_len, &rc, nh);
1225 		/*
1226 		 * Note that some sockaddr pointers may have changed to
1227 		 * point to memory outsize @rtm. Some may be pointing
1228 		 * to the on-stack variables.
1229 		 * Given that, any pointer in @info CANNOT BE USED.
1230 		 */
1231 
1232 		/*
1233 		 * scopeid deembedding has been performed while
1234 		 * writing updated rtm in rtsock_msg_buffer().
1235 		 * With that in mind, skip deembedding procedure below.
1236 		 */
1237 #ifdef INET6
1238 		rti_need_deembed = 0;
1239 #endif
1240 	}
1241 
1242 flush:
1243 	NET_EPOCH_EXIT(et);
1244 
1245 #ifdef INET6
1246 	if (rtm != NULL) {
1247 		if (rti_need_deembed) {
1248 			/* sin6_scope_id is recovered before sending rtm. */
1249 			sin6 = (struct sockaddr_in6 *)&ss;
1250 			for (i = 0; i < RTAX_MAX; i++) {
1251 				if (info.rti_info[i] == NULL)
1252 					continue;
1253 				if (info.rti_info[i]->sa_family != AF_INET6)
1254 					continue;
1255 				bcopy(info.rti_info[i], sin6, sizeof(*sin6));
1256 				if (sa6_recoverscope(sin6) == 0)
1257 					bcopy(sin6, info.rti_info[i],
1258 						    sizeof(*sin6));
1259 			}
1260 			if (update_rtm_from_info(&info, &rtm, alloc_len) != 0) {
1261 				if (error != 0)
1262 					error = ENOBUFS;
1263 			}
1264 		}
1265 	}
1266 #endif
1267 	send_rtm_reply(so, rtm, m, saf, fibnum, error);
1268 
1269 	return (error);
1270 }
1271 
1272 /*
1273  * Sends the prepared reply message in @rtm to all rtsock clients.
1274  * Frees @m and @rtm.
1275  *
1276  */
1277 static void
send_rtm_reply(struct socket * so,struct rt_msghdr * rtm,struct mbuf * m,sa_family_t saf,u_int fibnum,int rtm_errno)1278 send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m,
1279     sa_family_t saf, u_int fibnum, int rtm_errno)
1280 {
1281 	struct rcb *rcb = NULL;
1282 
1283 	/*
1284 	 * Check to see if we don't want our own messages.
1285 	 */
1286 	if ((so->so_options & SO_USELOOPBACK) == 0) {
1287 		if (V_route_cb.any_count <= 1) {
1288 			if (rtm != NULL)
1289 				free(rtm, M_TEMP);
1290 			m_freem(m);
1291 			return;
1292 		}
1293 		/* There is another listener, so construct message */
1294 		rcb = so->so_pcb;
1295 	}
1296 
1297 	if (rtm != NULL) {
1298 		if (rtm_errno!= 0)
1299 			rtm->rtm_errno = rtm_errno;
1300 		else
1301 			rtm->rtm_flags |= RTF_DONE;
1302 
1303 		m_copyback(m, 0, rtm->rtm_msglen, (caddr_t)rtm);
1304 		if (m->m_pkthdr.len < rtm->rtm_msglen) {
1305 			m_freem(m);
1306 			m = NULL;
1307 		} else if (m->m_pkthdr.len > rtm->rtm_msglen)
1308 			m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
1309 
1310 		free(rtm, M_TEMP);
1311 	}
1312 	if (m != NULL) {
1313 		M_SETFIB(m, fibnum);
1314 		m->m_flags |= RTS_FILTER_FIB;
1315 		if (rcb) {
1316 			/*
1317 			 * XXX insure we don't get a copy by
1318 			 * invalidating our protocol
1319 			 */
1320 			sa_family_t family = rcb->rcb_family;
1321 			rcb->rcb_family = AF_UNSPEC;
1322 			rt_dispatch(m, saf);
1323 			rcb->rcb_family = family;
1324 		} else
1325 			rt_dispatch(m, saf);
1326 	}
1327 }
1328 
1329 static void
rt_getmetrics(const struct rtentry * rt,const struct nhop_object * nh,struct rt_metrics * out)1330 rt_getmetrics(const struct rtentry *rt, const struct nhop_object *nh,
1331     struct rt_metrics *out)
1332 {
1333 
1334 	bzero(out, sizeof(*out));
1335 	out->rmx_mtu = nh->nh_mtu;
1336 	out->rmx_weight = rt->rt_weight;
1337 	out->rmx_nhidx = nhop_get_idx(nh);
1338 	/* Kernel -> userland timebase conversion. */
1339 	out->rmx_expire = nhop_get_expire(nh) ?
1340 	    nhop_get_expire(nh) - time_uptime + time_second : 0;
1341 }
1342 
1343 /*
1344  * Extract the addresses of the passed sockaddrs.
1345  * Do a little sanity checking so as to avoid bad memory references.
1346  * This data is derived straight from userland.
1347  */
1348 static int
rt_xaddrs(caddr_t cp,caddr_t cplim,struct rt_addrinfo * rtinfo)1349 rt_xaddrs(caddr_t cp, caddr_t cplim, struct rt_addrinfo *rtinfo)
1350 {
1351 	struct sockaddr *sa;
1352 	int i;
1353 
1354 	for (i = 0; i < RTAX_MAX && cp < cplim; i++) {
1355 		if ((rtinfo->rti_addrs & (1 << i)) == 0)
1356 			continue;
1357 		sa = (struct sockaddr *)cp;
1358 		/*
1359 		 * It won't fit.
1360 		 */
1361 		if (cp + sa->sa_len > cplim) {
1362 			RTS_PID_LOG(LOG_DEBUG, "sa_len too big for sa type %d", i);
1363 			return (EINVAL);
1364 		}
1365 		/*
1366 		 * there are no more.. quit now
1367 		 * If there are more bits, they are in error.
1368 		 * I've seen this. route(1) can evidently generate these.
1369 		 * This causes kernel to core dump.
1370 		 * for compatibility, If we see this, point to a safe address.
1371 		 */
1372 		if (sa->sa_len == 0) {
1373 			rtinfo->rti_info[i] = &sa_zero;
1374 			return (0); /* should be EINVAL but for compat */
1375 		}
1376 		/* accept it */
1377 #ifdef INET6
1378 		if (sa->sa_family == AF_INET6)
1379 			sa6_embedscope((struct sockaddr_in6 *)sa,
1380 			    V_ip6_use_defzone);
1381 #endif
1382 		rtinfo->rti_info[i] = sa;
1383 		cp += SA_SIZE(sa);
1384 	}
1385 	return (0);
1386 }
1387 
1388 #ifdef INET
1389 static inline void
fill_sockaddr_inet(struct sockaddr_in * sin,struct in_addr addr)1390 fill_sockaddr_inet(struct sockaddr_in *sin, struct in_addr addr)
1391 {
1392 
1393 	const struct sockaddr_in nsin = {
1394 		.sin_family = AF_INET,
1395 		.sin_len = sizeof(struct sockaddr_in),
1396 		.sin_addr = addr,
1397 	};
1398 	*sin = nsin;
1399 }
1400 #endif
1401 
1402 #ifdef INET6
1403 static inline void
fill_sockaddr_inet6(struct sockaddr_in6 * sin6,const struct in6_addr * addr6,uint32_t scopeid)1404 fill_sockaddr_inet6(struct sockaddr_in6 *sin6, const struct in6_addr *addr6,
1405     uint32_t scopeid)
1406 {
1407 
1408 	const struct sockaddr_in6 nsin6 = {
1409 		.sin6_family = AF_INET6,
1410 		.sin6_len = sizeof(struct sockaddr_in6),
1411 		.sin6_addr = *addr6,
1412 		.sin6_scope_id = scopeid,
1413 	};
1414 	*sin6 = nsin6;
1415 }
1416 #endif
1417 
1418 #if defined(INET6) || defined(INET)
1419 /*
1420  * Checks if gateway is suitable for lltable operations.
1421  * Lltable code requires AF_LINK gateway with ifindex
1422  *  and mac address specified.
1423  * Returns 0 on success.
1424  */
1425 static int
cleanup_xaddrs_lladdr(struct rt_addrinfo * info)1426 cleanup_xaddrs_lladdr(struct rt_addrinfo *info)
1427 {
1428 	struct sockaddr_dl *sdl = (struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
1429 
1430 	if (sdl->sdl_family != AF_LINK)
1431 		return (EINVAL);
1432 
1433 	if (sdl->sdl_index == 0) {
1434 		RTS_PID_LOG(LOG_DEBUG, "AF_LINK gateway w/o ifindex");
1435 		return (EINVAL);
1436 	}
1437 
1438 	if (offsetof(struct sockaddr_dl, sdl_data) + sdl->sdl_nlen + sdl->sdl_alen > sdl->sdl_len) {
1439 		RTS_PID_LOG(LOG_DEBUG, "AF_LINK gw: sdl_nlen/sdl_alen too large");
1440 		return (EINVAL);
1441 	}
1442 
1443 	return (0);
1444 }
1445 
1446 static int
cleanup_xaddrs_gateway(struct rt_addrinfo * info,struct linear_buffer * lb)1447 cleanup_xaddrs_gateway(struct rt_addrinfo *info, struct linear_buffer *lb)
1448 {
1449 	struct sockaddr *gw = info->rti_info[RTAX_GATEWAY];
1450 	struct sockaddr *sa;
1451 
1452 	if (info->rti_flags & RTF_LLDATA)
1453 		return (cleanup_xaddrs_lladdr(info));
1454 
1455 	switch (gw->sa_family) {
1456 #ifdef INET
1457 	case AF_INET:
1458 		{
1459 			struct sockaddr_in *gw_sin = (struct sockaddr_in *)gw;
1460 
1461 			/* Ensure reads do not go beyoud SA boundary */
1462 			if (SA_SIZE(gw) < offsetof(struct sockaddr_in, sin_zero)) {
1463 				RTS_PID_LOG(LOG_DEBUG, "gateway sin_len too small: %d",
1464 				    gw->sa_len);
1465 				return (EINVAL);
1466 			}
1467 			sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_in));
1468 			if (sa == NULL)
1469 				return (ENOBUFS);
1470 			fill_sockaddr_inet((struct sockaddr_in *)sa, gw_sin->sin_addr);
1471 			info->rti_info[RTAX_GATEWAY] = sa;
1472 		}
1473 		break;
1474 #endif
1475 #ifdef INET6
1476 	case AF_INET6:
1477 		{
1478 			struct sockaddr_in6 *gw_sin6 = (struct sockaddr_in6 *)gw;
1479 			if (gw_sin6->sin6_len < sizeof(struct sockaddr_in6)) {
1480 				RTS_PID_LOG(LOG_DEBUG, "gateway sin6_len too small: %d",
1481 				    gw->sa_len);
1482 				return (EINVAL);
1483 			}
1484 			fill_sockaddr_inet6(gw_sin6, &gw_sin6->sin6_addr, 0);
1485 			break;
1486 		}
1487 #endif
1488 	case AF_LINK:
1489 		{
1490 			struct sockaddr_dl *gw_sdl;
1491 
1492 			size_t sdl_min_len = offsetof(struct sockaddr_dl, sdl_data);
1493 			gw_sdl = (struct sockaddr_dl *)gw;
1494 			if (gw_sdl->sdl_len < sdl_min_len) {
1495 				RTS_PID_LOG(LOG_DEBUG, "gateway sdl_len too small: %d",
1496 				    gw_sdl->sdl_len);
1497 				return (EINVAL);
1498 			}
1499 			sa = alloc_sockaddr_aligned(lb, sizeof(struct sockaddr_dl_short));
1500 			if (sa == NULL)
1501 				return (ENOBUFS);
1502 
1503 			const struct sockaddr_dl_short sdl = {
1504 				.sdl_family = AF_LINK,
1505 				.sdl_len = sizeof(struct sockaddr_dl_short),
1506 				.sdl_index = gw_sdl->sdl_index,
1507 			};
1508 			*((struct sockaddr_dl_short *)sa) = sdl;
1509 			info->rti_info[RTAX_GATEWAY] = sa;
1510 			break;
1511 		}
1512 	}
1513 
1514 	return (0);
1515 }
1516 #endif
1517 
1518 static void
remove_netmask(struct rt_addrinfo * info)1519 remove_netmask(struct rt_addrinfo *info)
1520 {
1521 	info->rti_info[RTAX_NETMASK] = NULL;
1522 	info->rti_flags |= RTF_HOST;
1523 	info->rti_addrs &= ~RTA_NETMASK;
1524 }
1525 
1526 #ifdef INET
1527 static int
cleanup_xaddrs_inet(struct rt_addrinfo * info,struct linear_buffer * lb)1528 cleanup_xaddrs_inet(struct rt_addrinfo *info, struct linear_buffer *lb)
1529 {
1530 	struct sockaddr_in *dst_sa, *mask_sa;
1531 	const int sa_len = sizeof(struct sockaddr_in);
1532 	struct in_addr dst, mask;
1533 
1534 	/* Check & fixup dst/netmask combination first */
1535 	dst_sa = (struct sockaddr_in *)info->rti_info[RTAX_DST];
1536 	mask_sa = (struct sockaddr_in *)info->rti_info[RTAX_NETMASK];
1537 
1538 	/* Ensure reads do not go beyound the buffer size */
1539 	if (SA_SIZE(dst_sa) < offsetof(struct sockaddr_in, sin_zero)) {
1540 		RTS_PID_LOG(LOG_DEBUG, "prefix dst sin_len too small: %d",
1541 		    dst_sa->sin_len);
1542 		return (EINVAL);
1543 	}
1544 
1545 	if ((mask_sa != NULL) && mask_sa->sin_len < sizeof(struct sockaddr_in)) {
1546 		/*
1547 		 * Some older routing software encode mask length into the
1548 		 * sin_len, thus resulting in "truncated" sockaddr.
1549 		 */
1550 		int len = mask_sa->sin_len - offsetof(struct sockaddr_in, sin_addr);
1551 		if (len >= 0) {
1552 			mask.s_addr = 0;
1553 			if (len > sizeof(struct in_addr))
1554 				len = sizeof(struct in_addr);
1555 			memcpy(&mask, &mask_sa->sin_addr, len);
1556 		} else {
1557 			RTS_PID_LOG(LOG_DEBUG, "prefix mask sin_len too small: %d",
1558 			    mask_sa->sin_len);
1559 			return (EINVAL);
1560 		}
1561 	} else
1562 		mask.s_addr = mask_sa ? mask_sa->sin_addr.s_addr : INADDR_BROADCAST;
1563 
1564 	dst.s_addr = htonl(ntohl(dst_sa->sin_addr.s_addr) & ntohl(mask.s_addr));
1565 
1566 	/* Construct new "clean" dst/mask sockaddresses */
1567 	if ((dst_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1568 		return (ENOBUFS);
1569 	fill_sockaddr_inet(dst_sa, dst);
1570 	info->rti_info[RTAX_DST] = (struct sockaddr *)dst_sa;
1571 
1572 	if (mask.s_addr != INADDR_BROADCAST) {
1573 		if ((mask_sa = (struct sockaddr_in *)alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1574 			return (ENOBUFS);
1575 		fill_sockaddr_inet(mask_sa, mask);
1576 		info->rti_info[RTAX_NETMASK] = (struct sockaddr *)mask_sa;
1577 		info->rti_flags &= ~RTF_HOST;
1578 	} else
1579 		remove_netmask(info);
1580 
1581 	/* Check gateway */
1582 	if (info->rti_info[RTAX_GATEWAY] != NULL)
1583 		return (cleanup_xaddrs_gateway(info, lb));
1584 
1585 	return (0);
1586 }
1587 #endif
1588 
1589 #ifdef INET6
1590 static int
cleanup_xaddrs_inet6(struct rt_addrinfo * info,struct linear_buffer * lb)1591 cleanup_xaddrs_inet6(struct rt_addrinfo *info, struct linear_buffer *lb)
1592 {
1593 	struct sockaddr *sa;
1594 	struct sockaddr_in6 *dst_sa, *mask_sa;
1595 	struct in6_addr mask, *dst;
1596 	const int sa_len = sizeof(struct sockaddr_in6);
1597 
1598 	/* Check & fixup dst/netmask combination first */
1599 	dst_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_DST];
1600 	mask_sa = (struct sockaddr_in6 *)info->rti_info[RTAX_NETMASK];
1601 
1602 	if (dst_sa->sin6_len < sizeof(struct sockaddr_in6)) {
1603 		RTS_PID_LOG(LOG_DEBUG, "prefix dst sin6_len too small: %d",
1604 		    dst_sa->sin6_len);
1605 		return (EINVAL);
1606 	}
1607 
1608 	if (mask_sa && mask_sa->sin6_len < sizeof(struct sockaddr_in6)) {
1609 		/*
1610 		 * Some older routing software encode mask length into the
1611 		 * sin6_len, thus resulting in "truncated" sockaddr.
1612 		 */
1613 		int len = mask_sa->sin6_len - offsetof(struct sockaddr_in6, sin6_addr);
1614 		if (len >= 0) {
1615 			bzero(&mask, sizeof(mask));
1616 			if (len > sizeof(struct in6_addr))
1617 				len = sizeof(struct in6_addr);
1618 			memcpy(&mask, &mask_sa->sin6_addr, len);
1619 		} else {
1620 			RTS_PID_LOG(LOG_DEBUG, "rtsock: prefix mask sin6_len too small: %d",
1621 			    mask_sa->sin6_len);
1622 			return (EINVAL);
1623 		}
1624 	} else
1625 		mask = mask_sa ? mask_sa->sin6_addr : in6mask128;
1626 
1627 	dst = &dst_sa->sin6_addr;
1628 	IN6_MASK_ADDR(dst, &mask);
1629 
1630 	if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1631 		return (ENOBUFS);
1632 	fill_sockaddr_inet6((struct sockaddr_in6 *)sa, dst, 0);
1633 	info->rti_info[RTAX_DST] = sa;
1634 
1635 	if (!IN6_ARE_ADDR_EQUAL(&mask, &in6mask128)) {
1636 		if ((sa = alloc_sockaddr_aligned(lb, sa_len)) == NULL)
1637 			return (ENOBUFS);
1638 		fill_sockaddr_inet6((struct sockaddr_in6 *)sa, &mask, 0);
1639 		info->rti_info[RTAX_NETMASK] = sa;
1640 		info->rti_flags &= ~RTF_HOST;
1641 	} else
1642 		remove_netmask(info);
1643 
1644 	/* Check gateway */
1645 	if (info->rti_info[RTAX_GATEWAY] != NULL)
1646 		return (cleanup_xaddrs_gateway(info, lb));
1647 
1648 	return (0);
1649 }
1650 #endif
1651 
1652 static int
cleanup_xaddrs(struct rt_addrinfo * info,struct linear_buffer * lb)1653 cleanup_xaddrs(struct rt_addrinfo *info, struct linear_buffer *lb)
1654 {
1655 	int error = EAFNOSUPPORT;
1656 
1657 	if (info->rti_info[RTAX_DST] == NULL) {
1658 		RTS_PID_LOG(LOG_DEBUG, "prefix dst is not set");
1659 		return (EINVAL);
1660 	}
1661 
1662 	if (info->rti_flags & RTF_LLDATA) {
1663 		/*
1664 		 * arp(8)/ndp(8) sends RTA_NETMASK for the associated
1665 		 * prefix along with the actual address in RTA_DST.
1666 		 * Remove netmask to avoid unnecessary address masking.
1667 		 */
1668 		remove_netmask(info);
1669 	}
1670 
1671 	switch (info->rti_info[RTAX_DST]->sa_family) {
1672 #ifdef INET
1673 	case AF_INET:
1674 		error = cleanup_xaddrs_inet(info, lb);
1675 		break;
1676 #endif
1677 #ifdef INET6
1678 	case AF_INET6:
1679 		error = cleanup_xaddrs_inet6(info, lb);
1680 		break;
1681 #endif
1682 	}
1683 
1684 	return (error);
1685 }
1686 
1687 /*
1688  * Fill in @dmask with valid netmask leaving original @smask
1689  * intact. Mostly used with radix netmasks.
1690  */
1691 struct sockaddr *
rtsock_fix_netmask(const struct sockaddr * dst,const struct sockaddr * smask,struct sockaddr_storage * dmask)1692 rtsock_fix_netmask(const struct sockaddr *dst, const struct sockaddr *smask,
1693     struct sockaddr_storage *dmask)
1694 {
1695 	if (dst == NULL || smask == NULL)
1696 		return (NULL);
1697 
1698 	memset(dmask, 0, dst->sa_len);
1699 	memcpy(dmask, smask, smask->sa_len);
1700 	dmask->ss_len = dst->sa_len;
1701 	dmask->ss_family = dst->sa_family;
1702 
1703 	return ((struct sockaddr *)dmask);
1704 }
1705 
1706 /*
1707  * Writes information related to @rtinfo object to newly-allocated mbuf.
1708  * Assumes MCLBYTES is enough to construct any message.
1709  * Used for OS notifications of vaious events (if/ifa announces,etc)
1710  *
1711  * Returns allocated mbuf or NULL on failure.
1712  */
1713 static struct mbuf *
rtsock_msg_mbuf(int type,struct rt_addrinfo * rtinfo)1714 rtsock_msg_mbuf(int type, struct rt_addrinfo *rtinfo)
1715 {
1716 	struct sockaddr_storage ss;
1717 	struct rt_msghdr *rtm;
1718 	struct mbuf *m;
1719 	int i;
1720 	struct sockaddr *sa;
1721 #ifdef INET6
1722 	struct sockaddr_in6 *sin6;
1723 #endif
1724 	int len, dlen;
1725 
1726 	switch (type) {
1727 	case RTM_DELADDR:
1728 	case RTM_NEWADDR:
1729 		len = sizeof(struct ifa_msghdr);
1730 		break;
1731 
1732 	case RTM_DELMADDR:
1733 	case RTM_NEWMADDR:
1734 		len = sizeof(struct ifma_msghdr);
1735 		break;
1736 
1737 	case RTM_IFINFO:
1738 		len = sizeof(struct if_msghdr);
1739 		break;
1740 
1741 	case RTM_IFANNOUNCE:
1742 	case RTM_IEEE80211:
1743 		len = sizeof(struct if_announcemsghdr);
1744 		break;
1745 
1746 	default:
1747 		len = sizeof(struct rt_msghdr);
1748 	}
1749 
1750 	/* XXXGL: can we use MJUMPAGESIZE cluster here? */
1751 	KASSERT(len <= MCLBYTES, ("%s: message too big", __func__));
1752 	if (len > MHLEN)
1753 		m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
1754 	else
1755 		m = m_gethdr(M_NOWAIT, MT_DATA);
1756 	if (m == NULL)
1757 		return (m);
1758 
1759 	m->m_pkthdr.len = m->m_len = len;
1760 	rtm = mtod(m, struct rt_msghdr *);
1761 	bzero((caddr_t)rtm, len);
1762 	for (i = 0; i < RTAX_MAX; i++) {
1763 		if ((sa = rtinfo->rti_info[i]) == NULL)
1764 			continue;
1765 		rtinfo->rti_addrs |= (1 << i);
1766 
1767 		dlen = SA_SIZE(sa);
1768 		KASSERT(dlen <= sizeof(ss),
1769 		    ("%s: sockaddr size overflow", __func__));
1770 		bzero(&ss, sizeof(ss));
1771 		bcopy(sa, &ss, sa->sa_len);
1772 		sa = (struct sockaddr *)&ss;
1773 #ifdef INET6
1774 		if (sa->sa_family == AF_INET6) {
1775 			sin6 = (struct sockaddr_in6 *)sa;
1776 			(void)sa6_recoverscope(sin6);
1777 		}
1778 #endif
1779 		m_copyback(m, len, dlen, (caddr_t)sa);
1780 		len += dlen;
1781 	}
1782 	if (m->m_pkthdr.len != len) {
1783 		m_freem(m);
1784 		return (NULL);
1785 	}
1786 	rtm->rtm_msglen = len;
1787 	rtm->rtm_version = RTM_VERSION;
1788 	rtm->rtm_type = type;
1789 	return (m);
1790 }
1791 
1792 /*
1793  * Writes information related to @rtinfo object to preallocated buffer.
1794  * Stores needed size in @plen. If @w is NULL, calculates size without
1795  * writing.
1796  * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
1797  *
1798  * Returns 0 on success.
1799  *
1800  */
1801 static int
rtsock_msg_buffer(int type,struct rt_addrinfo * rtinfo,struct walkarg * w,int * plen)1802 rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int *plen)
1803 {
1804 	struct sockaddr_storage ss;
1805 	int len, buflen = 0, dlen, i;
1806 	caddr_t cp = NULL;
1807 	struct rt_msghdr *rtm = NULL;
1808 #ifdef INET6
1809 	struct sockaddr_in6 *sin6;
1810 #endif
1811 #ifdef COMPAT_FREEBSD32
1812 	bool compat32;
1813 
1814 	compat32 = w != NULL && w->w_req != NULL &&
1815 	    (w->w_req->flags & SCTL_MASK32);
1816 #endif
1817 
1818 	switch (type) {
1819 	case RTM_DELADDR:
1820 	case RTM_NEWADDR:
1821 		if (w != NULL && w->w_op == NET_RT_IFLISTL) {
1822 #ifdef COMPAT_FREEBSD32
1823 			if (compat32)
1824 				len = sizeof(struct ifa_msghdrl32);
1825 			else
1826 #endif
1827 				len = sizeof(struct ifa_msghdrl);
1828 		} else
1829 			len = sizeof(struct ifa_msghdr);
1830 		break;
1831 
1832 	case RTM_IFINFO:
1833 		if (w != NULL && w->w_op == NET_RT_IFLISTL) {
1834 #ifdef COMPAT_FREEBSD32
1835 			if (compat32)
1836 				len = sizeof(struct if_msghdrl32);
1837 			else
1838 #endif
1839 				len = sizeof(struct if_msghdrl);
1840 		} else {
1841 #ifdef COMPAT_FREEBSD32
1842 			if (compat32)
1843 				len = sizeof(struct if_msghdr32);
1844 			else
1845 #endif
1846 				len = sizeof(struct if_msghdr);
1847 		}
1848 		break;
1849 
1850 	case RTM_NEWMADDR:
1851 		len = sizeof(struct ifma_msghdr);
1852 		break;
1853 
1854 	default:
1855 		len = sizeof(struct rt_msghdr);
1856 	}
1857 
1858 	if (w != NULL) {
1859 		rtm = (struct rt_msghdr *)w->w_tmem;
1860 		buflen = w->w_tmemsize - len;
1861 		cp = (caddr_t)w->w_tmem + len;
1862 	}
1863 
1864 	rtinfo->rti_addrs = 0;
1865 	for (i = 0; i < RTAX_MAX; i++) {
1866 		struct sockaddr *sa;
1867 
1868 		if ((sa = rtinfo->rti_info[i]) == NULL)
1869 			continue;
1870 		rtinfo->rti_addrs |= (1 << i);
1871 #ifdef COMPAT_FREEBSD32
1872 		if (compat32)
1873 			dlen = SA_SIZE32(sa);
1874 		else
1875 #endif
1876 			dlen = SA_SIZE(sa);
1877 		if (cp != NULL && buflen >= dlen) {
1878 			KASSERT(dlen <= sizeof(ss),
1879 			    ("%s: sockaddr size overflow", __func__));
1880 			bzero(&ss, sizeof(ss));
1881 			bcopy(sa, &ss, sa->sa_len);
1882 			sa = (struct sockaddr *)&ss;
1883 #ifdef INET6
1884 			if (sa->sa_family == AF_INET6) {
1885 				sin6 = (struct sockaddr_in6 *)sa;
1886 				(void)sa6_recoverscope(sin6);
1887 			}
1888 #endif
1889 			bcopy((caddr_t)sa, cp, (unsigned)dlen);
1890 			cp += dlen;
1891 			buflen -= dlen;
1892 		} else if (cp != NULL) {
1893 			/*
1894 			 * Buffer too small. Count needed size
1895 			 * and return with error.
1896 			 */
1897 			cp = NULL;
1898 		}
1899 
1900 		len += dlen;
1901 	}
1902 
1903 	if (cp != NULL) {
1904 		dlen = ALIGN(len) - len;
1905 		if (buflen < dlen)
1906 			cp = NULL;
1907 		else {
1908 			bzero(cp, dlen);
1909 			cp += dlen;
1910 			buflen -= dlen;
1911 		}
1912 	}
1913 	len = ALIGN(len);
1914 
1915 	if (cp != NULL) {
1916 		/* fill header iff buffer is large enough */
1917 		rtm->rtm_version = RTM_VERSION;
1918 		rtm->rtm_type = type;
1919 		rtm->rtm_msglen = len;
1920 	}
1921 
1922 	*plen = len;
1923 
1924 	if (w != NULL && cp == NULL)
1925 		return (ENOBUFS);
1926 
1927 	return (0);
1928 }
1929 
1930 /*
1931  * This routine is called to generate a message from the routing
1932  * socket indicating that a redirect has occurred, a routing lookup
1933  * has failed, or that a protocol has detected timeouts to a particular
1934  * destination.
1935  */
1936 void
rt_missmsg_fib(int type,struct rt_addrinfo * rtinfo,int flags,int error,int fibnum)1937 rt_missmsg_fib(int type, struct rt_addrinfo *rtinfo, int flags, int error,
1938     int fibnum)
1939 {
1940 	struct rt_msghdr *rtm;
1941 	struct mbuf *m;
1942 	struct sockaddr *sa = rtinfo->rti_info[RTAX_DST];
1943 
1944 	if (V_route_cb.any_count == 0)
1945 		return;
1946 	m = rtsock_msg_mbuf(type, rtinfo);
1947 	if (m == NULL)
1948 		return;
1949 
1950 	if (fibnum != RT_ALL_FIBS) {
1951 		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
1952 		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
1953 		M_SETFIB(m, fibnum);
1954 		m->m_flags |= RTS_FILTER_FIB;
1955 	}
1956 
1957 	rtm = mtod(m, struct rt_msghdr *);
1958 	rtm->rtm_flags = RTF_DONE | flags;
1959 	rtm->rtm_errno = error;
1960 	rtm->rtm_addrs = rtinfo->rti_addrs;
1961 	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
1962 }
1963 
1964 void
rt_missmsg(int type,struct rt_addrinfo * rtinfo,int flags,int error)1965 rt_missmsg(int type, struct rt_addrinfo *rtinfo, int flags, int error)
1966 {
1967 
1968 	rt_missmsg_fib(type, rtinfo, flags, error, RT_ALL_FIBS);
1969 }
1970 
1971 /*
1972  * This routine is called to generate a message from the routing
1973  * socket indicating that the status of a network interface has changed.
1974  */
1975 static void
rtsock_ifmsg(struct ifnet * ifp,int if_flags_mask __unused)1976 rtsock_ifmsg(struct ifnet *ifp, int if_flags_mask __unused)
1977 {
1978 	struct if_msghdr *ifm;
1979 	struct mbuf *m;
1980 	struct rt_addrinfo info;
1981 
1982 	if (V_route_cb.any_count == 0)
1983 		return;
1984 	bzero((caddr_t)&info, sizeof(info));
1985 	m = rtsock_msg_mbuf(RTM_IFINFO, &info);
1986 	if (m == NULL)
1987 		return;
1988 	ifm = mtod(m, struct if_msghdr *);
1989 	ifm->ifm_index = ifp->if_index;
1990 	ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
1991 	if_data_copy(ifp, &ifm->ifm_data);
1992 	ifm->ifm_addrs = 0;
1993 	rt_dispatch(m, AF_UNSPEC);
1994 }
1995 
1996 /*
1997  * Announce interface address arrival/withdraw.
1998  * Please do not call directly, use rt_addrmsg().
1999  * Assume input data to be valid.
2000  * Returns 0 on success.
2001  */
2002 int
rtsock_addrmsg(int cmd,struct ifaddr * ifa,int fibnum)2003 rtsock_addrmsg(int cmd, struct ifaddr *ifa, int fibnum)
2004 {
2005 	struct rt_addrinfo info;
2006 	struct sockaddr *sa;
2007 	int ncmd;
2008 	struct mbuf *m;
2009 	struct ifa_msghdr *ifam;
2010 	struct ifnet *ifp = ifa->ifa_ifp;
2011 	struct sockaddr_storage ss;
2012 
2013 	if (V_route_cb.any_count == 0)
2014 		return (0);
2015 
2016 	ncmd = cmd == RTM_ADD ? RTM_NEWADDR : RTM_DELADDR;
2017 
2018 	bzero((caddr_t)&info, sizeof(info));
2019 	info.rti_info[RTAX_IFA] = sa = ifa->ifa_addr;
2020 	info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
2021 	info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
2022 	    info.rti_info[RTAX_IFA], ifa->ifa_netmask, &ss);
2023 	info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2024 	if ((m = rtsock_msg_mbuf(ncmd, &info)) == NULL)
2025 		return (ENOBUFS);
2026 	ifam = mtod(m, struct ifa_msghdr *);
2027 	ifam->ifam_index = ifp->if_index;
2028 	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
2029 	ifam->ifam_flags = ifa->ifa_flags;
2030 	ifam->ifam_addrs = info.rti_addrs;
2031 
2032 	if (fibnum != RT_ALL_FIBS) {
2033 		M_SETFIB(m, fibnum);
2034 		m->m_flags |= RTS_FILTER_FIB;
2035 	}
2036 
2037 	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
2038 
2039 	return (0);
2040 }
2041 
2042 /*
2043  * Announce route addition/removal to rtsock based on @rt data.
2044  * Callers are advives to use rt_routemsg() instead of using this
2045  *  function directly.
2046  * Assume @rt data is consistent.
2047  *
2048  * Returns 0 on success.
2049  */
2050 int
rtsock_routemsg(int cmd,struct rtentry * rt,struct nhop_object * nh,int fibnum)2051 rtsock_routemsg(int cmd, struct rtentry *rt, struct nhop_object *nh,
2052     int fibnum)
2053 {
2054 	union sockaddr_union dst, mask;
2055 	struct rt_addrinfo info;
2056 
2057 	if (V_route_cb.any_count == 0)
2058 		return (0);
2059 
2060 	int family = rt_get_family(rt);
2061 	init_sockaddrs_family(family, &dst.sa, &mask.sa);
2062 	export_rtaddrs(rt, &dst.sa, &mask.sa);
2063 
2064 	bzero((caddr_t)&info, sizeof(info));
2065 	info.rti_info[RTAX_DST] = &dst.sa;
2066 	info.rti_info[RTAX_NETMASK] = &mask.sa;
2067 	info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
2068 	info.rti_flags = rt->rte_flags | nhop_get_rtflags(nh);
2069 	info.rti_ifp = nh->nh_ifp;
2070 
2071 	return (rtsock_routemsg_info(cmd, &info, fibnum));
2072 }
2073 
2074 int
rtsock_routemsg_info(int cmd,struct rt_addrinfo * info,int fibnum)2075 rtsock_routemsg_info(int cmd, struct rt_addrinfo *info, int fibnum)
2076 {
2077 	struct rt_msghdr *rtm;
2078 	struct sockaddr *sa;
2079 	struct mbuf *m;
2080 
2081 	if (V_route_cb.any_count == 0)
2082 		return (0);
2083 
2084 	if (info->rti_flags & RTF_HOST)
2085 		info->rti_info[RTAX_NETMASK] = NULL;
2086 
2087 	m = rtsock_msg_mbuf(cmd, info);
2088 	if (m == NULL)
2089 		return (ENOBUFS);
2090 
2091 	if (fibnum != RT_ALL_FIBS) {
2092 		KASSERT(fibnum >= 0 && fibnum < rt_numfibs, ("%s: fibnum out "
2093 		    "of range 0 <= %d < %d", __func__, fibnum, rt_numfibs));
2094 		M_SETFIB(m, fibnum);
2095 		m->m_flags |= RTS_FILTER_FIB;
2096 	}
2097 
2098 	rtm = mtod(m, struct rt_msghdr *);
2099 	rtm->rtm_addrs = info->rti_addrs;
2100 	if (info->rti_ifp != NULL)
2101 		rtm->rtm_index = info->rti_ifp->if_index;
2102 	/* Add RTF_DONE to indicate command 'completion' required by API */
2103 	info->rti_flags |= RTF_DONE;
2104 	/* Reported routes has to be up */
2105 	if (cmd == RTM_ADD || cmd == RTM_CHANGE)
2106 		info->rti_flags |= RTF_UP;
2107 	rtm->rtm_flags = info->rti_flags;
2108 
2109 	sa = info->rti_info[RTAX_DST];
2110 	rt_dispatch(m, sa ? sa->sa_family : AF_UNSPEC);
2111 
2112 	return (0);
2113 }
2114 
2115 /*
2116  * This is the analogue to the rt_newaddrmsg which performs the same
2117  * function but for multicast group memberhips.  This is easier since
2118  * there is no route state to worry about.
2119  */
2120 void
rt_newmaddrmsg(int cmd,struct ifmultiaddr * ifma)2121 rt_newmaddrmsg(int cmd, struct ifmultiaddr *ifma)
2122 {
2123 	struct rt_addrinfo info;
2124 	struct mbuf *m = NULL;
2125 	struct ifnet *ifp = ifma->ifma_ifp;
2126 	struct ifma_msghdr *ifmam;
2127 
2128 	if (V_route_cb.any_count == 0)
2129 		return;
2130 
2131 	bzero((caddr_t)&info, sizeof(info));
2132 	info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2133 	if (ifp && ifp->if_addr)
2134 		info.rti_info[RTAX_IFP] = ifp->if_addr->ifa_addr;
2135 	else
2136 		info.rti_info[RTAX_IFP] = NULL;
2137 	/*
2138 	 * If a link-layer address is present, present it as a ``gateway''
2139 	 * (similarly to how ARP entries, e.g., are presented).
2140 	 */
2141 	info.rti_info[RTAX_GATEWAY] = ifma->ifma_lladdr;
2142 	m = rtsock_msg_mbuf(cmd, &info);
2143 	if (m == NULL)
2144 		return;
2145 	ifmam = mtod(m, struct ifma_msghdr *);
2146 	KASSERT(ifp != NULL, ("%s: link-layer multicast address w/o ifp\n",
2147 	    __func__));
2148 	ifmam->ifmam_index = ifp->if_index;
2149 	ifmam->ifmam_addrs = info.rti_addrs;
2150 	rt_dispatch(m, ifma->ifma_addr ? ifma->ifma_addr->sa_family : AF_UNSPEC);
2151 }
2152 
2153 static struct mbuf *
rt_makeifannouncemsg(struct ifnet * ifp,int type,int what,struct rt_addrinfo * info,const char * ifname)2154 rt_makeifannouncemsg(struct ifnet *ifp, int type, int what,
2155     struct rt_addrinfo *info, const char *ifname)
2156 {
2157 	struct if_announcemsghdr *ifan;
2158 	struct mbuf *m;
2159 
2160 	if (V_route_cb.any_count == 0)
2161 		return NULL;
2162 	bzero((caddr_t)info, sizeof(*info));
2163 	m = rtsock_msg_mbuf(type, info);
2164 	if (m != NULL) {
2165 		ifan = mtod(m, struct if_announcemsghdr *);
2166 		ifan->ifan_index = ifp->if_index;
2167 		strlcpy(ifan->ifan_name,
2168 		    ifname != NULL ? ifname : ifp->if_xname,
2169 		    sizeof(ifan->ifan_name));
2170 		ifan->ifan_what = what;
2171 	}
2172 	return m;
2173 }
2174 
2175 /*
2176  * This is called to generate routing socket messages indicating
2177  * IEEE80211 wireless events.
2178  * XXX we piggyback on the RTM_IFANNOUNCE msg format in a clumsy way.
2179  */
2180 void
rt_ieee80211msg(struct ifnet * ifp,int what,void * data,size_t data_len)2181 rt_ieee80211msg(struct ifnet *ifp, int what, void *data, size_t data_len)
2182 {
2183 	struct mbuf *m;
2184 	struct rt_addrinfo info;
2185 
2186 	m = rt_makeifannouncemsg(ifp, RTM_IEEE80211, what, &info, NULL);
2187 	if (m != NULL) {
2188 		/*
2189 		 * Append the ieee80211 data.  Try to stick it in the
2190 		 * mbuf containing the ifannounce msg; otherwise allocate
2191 		 * a new mbuf and append.
2192 		 *
2193 		 * NB: we assume m is a single mbuf.
2194 		 */
2195 		if (data_len > M_TRAILINGSPACE(m)) {
2196 			struct mbuf *n = m_get(M_NOWAIT, MT_DATA);
2197 			if (n == NULL) {
2198 				m_freem(m);
2199 				return;
2200 			}
2201 			bcopy(data, mtod(n, void *), data_len);
2202 			n->m_len = data_len;
2203 			m->m_next = n;
2204 		} else if (data_len > 0) {
2205 			bcopy(data, mtod(m, u_int8_t *) + m->m_len, data_len);
2206 			m->m_len += data_len;
2207 		}
2208 		if (m->m_flags & M_PKTHDR)
2209 			m->m_pkthdr.len += data_len;
2210 		mtod(m, struct if_announcemsghdr *)->ifan_msglen += data_len;
2211 		rt_dispatch(m, AF_UNSPEC);
2212 	}
2213 }
2214 
2215 /*
2216  * This is called to generate routing socket messages indicating
2217  * network interface arrival and departure.
2218  */
2219 static void
rt_ifannouncemsg(struct ifnet * ifp,int what,const char * ifname)2220 rt_ifannouncemsg(struct ifnet *ifp, int what, const char *ifname)
2221 {
2222 	struct mbuf *m;
2223 	struct rt_addrinfo info;
2224 
2225 	m = rt_makeifannouncemsg(ifp, RTM_IFANNOUNCE, what, &info, ifname);
2226 	if (m != NULL)
2227 		rt_dispatch(m, AF_UNSPEC);
2228 }
2229 
2230 static void
rt_dispatch(struct mbuf * m,sa_family_t saf)2231 rt_dispatch(struct mbuf *m, sa_family_t saf)
2232 {
2233 
2234 	M_ASSERTPKTHDR(m);
2235 
2236 	m->m_rtsock_family = saf;
2237 	if (V_loif)
2238 		m->m_pkthdr.rcvif = V_loif;
2239 	else {
2240 		m_freem(m);
2241 		return;
2242 	}
2243 	netisr_queue(NETISR_ROUTE, m);	/* mbuf is free'd on failure. */
2244 }
2245 
2246 /*
2247  * This is used in dumping the kernel table via sysctl().
2248  */
2249 static int
sysctl_dumpentry(struct rtentry * rt,void * vw)2250 sysctl_dumpentry(struct rtentry *rt, void *vw)
2251 {
2252 	struct walkarg *w = vw;
2253 	struct nhop_object *nh;
2254 
2255 	NET_EPOCH_ASSERT();
2256 
2257 	if (!rt_is_exportable(rt, w->w_req->td->td_ucred))
2258 		return (0);
2259 
2260 	export_rtaddrs(rt, w->dst, w->mask);
2261 	nh = rt_get_raw_nhop(rt);
2262 #ifdef ROUTE_MPATH
2263 	if (NH_IS_NHGRP(nh)) {
2264 		const struct weightened_nhop *wn;
2265 		uint32_t num_nhops;
2266 		int error;
2267 		wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
2268 		for (int i = 0; i < num_nhops; i++) {
2269 			error = sysctl_dumpnhop(rt, wn[i].nh, wn[i].weight, w);
2270 			if (error != 0)
2271 				return (error);
2272 		}
2273 	} else
2274 #endif
2275 		sysctl_dumpnhop(rt, nh, rt->rt_weight, w);
2276 
2277 	return (0);
2278 }
2279 
2280 
2281 static int
sysctl_dumpnhop(struct rtentry * rt,struct nhop_object * nh,uint32_t weight,struct walkarg * w)2282 sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh, uint32_t weight,
2283     struct walkarg *w)
2284 {
2285 	struct rt_addrinfo info;
2286 	int error = 0, size;
2287 	uint32_t rtflags;
2288 
2289 	rtflags = nhop_get_rtflags(nh);
2290 
2291 	if (w->w_op == NET_RT_FLAGS && !(rtflags & w->w_arg))
2292 		return (0);
2293 
2294 	bzero((caddr_t)&info, sizeof(info));
2295 	info.rti_info[RTAX_DST] = w->dst;
2296 	info.rti_info[RTAX_GATEWAY] = &nh->gw_sa;
2297 	info.rti_info[RTAX_NETMASK] = (rtflags & RTF_HOST) ? NULL : w->mask;
2298 	info.rti_info[RTAX_GENMASK] = 0;
2299 	if (nh->nh_ifp && !(nh->nh_ifp->if_flags & IFF_DYING)) {
2300 		info.rti_info[RTAX_IFP] = nh->nh_ifp->if_addr->ifa_addr;
2301 		info.rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
2302 		if (nh->nh_ifp->if_flags & IFF_POINTOPOINT)
2303 			info.rti_info[RTAX_BRD] = nh->nh_ifa->ifa_dstaddr;
2304 	}
2305 	if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
2306 		return (error);
2307 	if (w->w_req && w->w_tmem) {
2308 		struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
2309 
2310 		bzero(&rtm->rtm_index,
2311 		    sizeof(*rtm) - offsetof(struct rt_msghdr, rtm_index));
2312 
2313 		/*
2314 		 * rte flags may consist of RTF_HOST (duplicated in nhop rtflags)
2315 		 * and RTF_UP (if entry is linked, which is always true here).
2316 		 * Given that, use nhop rtflags & add RTF_UP.
2317 		 */
2318 		rtm->rtm_flags = rtflags | RTF_UP;
2319 		if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
2320 			rtm->rtm_flags = RTF_GATEWAY |
2321 				(rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
2322 		rt_getmetrics(rt, nh, &rtm->rtm_rmx);
2323 		rtm->rtm_rmx.rmx_weight = weight;
2324 		rtm->rtm_index = nh->nh_ifp->if_index;
2325 		rtm->rtm_addrs = info.rti_addrs;
2326 		error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
2327 		return (error);
2328 	}
2329 	return (error);
2330 }
2331 
2332 static int
sysctl_iflist_ifml(struct ifnet * ifp,const struct if_data * src_ifd,struct rt_addrinfo * info,struct walkarg * w,int len)2333 sysctl_iflist_ifml(struct ifnet *ifp, const struct if_data *src_ifd,
2334     struct rt_addrinfo *info, struct walkarg *w, int len)
2335 {
2336 	struct if_msghdrl *ifm;
2337 	struct if_data *ifd;
2338 
2339 	ifm = (struct if_msghdrl *)w->w_tmem;
2340 
2341 #ifdef COMPAT_FREEBSD32
2342 	if (w->w_req->flags & SCTL_MASK32) {
2343 		struct if_msghdrl32 *ifm32;
2344 
2345 		ifm32 = (struct if_msghdrl32 *)ifm;
2346 		ifm32->ifm_addrs = info->rti_addrs;
2347 		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2348 		ifm32->ifm_index = ifp->if_index;
2349 		ifm32->_ifm_spare1 = 0;
2350 		ifm32->ifm_len = sizeof(*ifm32);
2351 		ifm32->ifm_data_off = offsetof(struct if_msghdrl32, ifm_data);
2352 		ifm32->_ifm_spare2 = 0;
2353 		ifd = &ifm32->ifm_data;
2354 	} else
2355 #endif
2356 	{
2357 		ifm->ifm_addrs = info->rti_addrs;
2358 		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2359 		ifm->ifm_index = ifp->if_index;
2360 		ifm->_ifm_spare1 = 0;
2361 		ifm->ifm_len = sizeof(*ifm);
2362 		ifm->ifm_data_off = offsetof(struct if_msghdrl, ifm_data);
2363 		ifm->_ifm_spare2 = 0;
2364 		ifd = &ifm->ifm_data;
2365 	}
2366 
2367 	memcpy(ifd, src_ifd, sizeof(*ifd));
2368 
2369 	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
2370 }
2371 
2372 static int
sysctl_iflist_ifm(struct ifnet * ifp,const struct if_data * src_ifd,struct rt_addrinfo * info,struct walkarg * w,int len)2373 sysctl_iflist_ifm(struct ifnet *ifp, const struct if_data *src_ifd,
2374     struct rt_addrinfo *info, struct walkarg *w, int len)
2375 {
2376 	struct if_msghdr *ifm;
2377 	struct if_data *ifd;
2378 
2379 	ifm = (struct if_msghdr *)w->w_tmem;
2380 
2381 #ifdef COMPAT_FREEBSD32
2382 	if (w->w_req->flags & SCTL_MASK32) {
2383 		struct if_msghdr32 *ifm32;
2384 
2385 		ifm32 = (struct if_msghdr32 *)ifm;
2386 		ifm32->ifm_addrs = info->rti_addrs;
2387 		ifm32->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2388 		ifm32->ifm_index = ifp->if_index;
2389 		ifm32->_ifm_spare1 = 0;
2390 		ifd = &ifm32->ifm_data;
2391 	} else
2392 #endif
2393 	{
2394 		ifm->ifm_addrs = info->rti_addrs;
2395 		ifm->ifm_flags = ifp->if_flags | ifp->if_drv_flags;
2396 		ifm->ifm_index = ifp->if_index;
2397 		ifm->_ifm_spare1 = 0;
2398 		ifd = &ifm->ifm_data;
2399 	}
2400 
2401 	memcpy(ifd, src_ifd, sizeof(*ifd));
2402 
2403 	return (SYSCTL_OUT(w->w_req, (caddr_t)ifm, len));
2404 }
2405 
2406 static int
sysctl_iflist_ifaml(struct ifaddr * ifa,struct rt_addrinfo * info,struct walkarg * w,int len)2407 sysctl_iflist_ifaml(struct ifaddr *ifa, struct rt_addrinfo *info,
2408     struct walkarg *w, int len)
2409 {
2410 	struct ifa_msghdrl *ifam;
2411 	struct if_data *ifd;
2412 
2413 	ifam = (struct ifa_msghdrl *)w->w_tmem;
2414 
2415 #ifdef COMPAT_FREEBSD32
2416 	if (w->w_req->flags & SCTL_MASK32) {
2417 		struct ifa_msghdrl32 *ifam32;
2418 
2419 		ifam32 = (struct ifa_msghdrl32 *)ifam;
2420 		ifam32->ifam_addrs = info->rti_addrs;
2421 		ifam32->ifam_flags = ifa->ifa_flags;
2422 		ifam32->ifam_index = ifa->ifa_ifp->if_index;
2423 		ifam32->_ifam_spare1 = 0;
2424 		ifam32->ifam_len = sizeof(*ifam32);
2425 		ifam32->ifam_data_off =
2426 		    offsetof(struct ifa_msghdrl32, ifam_data);
2427 		ifam32->ifam_metric = ifa->ifa_ifp->if_metric;
2428 		ifd = &ifam32->ifam_data;
2429 	} else
2430 #endif
2431 	{
2432 		ifam->ifam_addrs = info->rti_addrs;
2433 		ifam->ifam_flags = ifa->ifa_flags;
2434 		ifam->ifam_index = ifa->ifa_ifp->if_index;
2435 		ifam->_ifam_spare1 = 0;
2436 		ifam->ifam_len = sizeof(*ifam);
2437 		ifam->ifam_data_off = offsetof(struct ifa_msghdrl, ifam_data);
2438 		ifam->ifam_metric = ifa->ifa_ifp->if_metric;
2439 		ifd = &ifam->ifam_data;
2440 	}
2441 
2442 	bzero(ifd, sizeof(*ifd));
2443 	ifd->ifi_datalen = sizeof(struct if_data);
2444 	ifd->ifi_ipackets = counter_u64_fetch(ifa->ifa_ipackets);
2445 	ifd->ifi_opackets = counter_u64_fetch(ifa->ifa_opackets);
2446 	ifd->ifi_ibytes = counter_u64_fetch(ifa->ifa_ibytes);
2447 	ifd->ifi_obytes = counter_u64_fetch(ifa->ifa_obytes);
2448 
2449 	/* Fixup if_data carp(4) vhid. */
2450 	if (carp_get_vhid_p != NULL)
2451 		ifd->ifi_vhid = (*carp_get_vhid_p)(ifa);
2452 
2453 	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
2454 }
2455 
2456 static int
sysctl_iflist_ifam(struct ifaddr * ifa,struct rt_addrinfo * info,struct walkarg * w,int len)2457 sysctl_iflist_ifam(struct ifaddr *ifa, struct rt_addrinfo *info,
2458     struct walkarg *w, int len)
2459 {
2460 	struct ifa_msghdr *ifam;
2461 
2462 	ifam = (struct ifa_msghdr *)w->w_tmem;
2463 	ifam->ifam_addrs = info->rti_addrs;
2464 	ifam->ifam_flags = ifa->ifa_flags;
2465 	ifam->ifam_index = ifa->ifa_ifp->if_index;
2466 	ifam->_ifam_spare1 = 0;
2467 	ifam->ifam_metric = ifa->ifa_ifp->if_metric;
2468 
2469 	return (SYSCTL_OUT(w->w_req, w->w_tmem, len));
2470 }
2471 
2472 static int
sysctl_iflist(int af,struct walkarg * w)2473 sysctl_iflist(int af, struct walkarg *w)
2474 {
2475 	struct ifnet *ifp;
2476 	struct ifaddr *ifa;
2477 	struct if_data ifd;
2478 	struct rt_addrinfo info;
2479 	int len, error = 0;
2480 	struct sockaddr_storage ss;
2481 
2482 	bzero((caddr_t)&info, sizeof(info));
2483 	bzero(&ifd, sizeof(ifd));
2484 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2485 		if (w->w_arg && w->w_arg != ifp->if_index)
2486 			continue;
2487 		if_data_copy(ifp, &ifd);
2488 		ifa = ifp->if_addr;
2489 		info.rti_info[RTAX_IFP] = ifa->ifa_addr;
2490 		error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
2491 		if (error != 0)
2492 			goto done;
2493 		info.rti_info[RTAX_IFP] = NULL;
2494 		if (w->w_req && w->w_tmem) {
2495 			if (w->w_op == NET_RT_IFLISTL)
2496 				error = sysctl_iflist_ifml(ifp, &ifd, &info, w,
2497 				    len);
2498 			else
2499 				error = sysctl_iflist_ifm(ifp, &ifd, &info, w,
2500 				    len);
2501 			if (error)
2502 				goto done;
2503 		}
2504 		while ((ifa = CK_STAILQ_NEXT(ifa, ifa_link)) != NULL) {
2505 			if (af && af != ifa->ifa_addr->sa_family)
2506 				continue;
2507 			if (prison_if(w->w_req->td->td_ucred,
2508 			    ifa->ifa_addr) != 0)
2509 				continue;
2510 			info.rti_info[RTAX_IFA] = ifa->ifa_addr;
2511 			info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(
2512 			    ifa->ifa_addr, ifa->ifa_netmask, &ss);
2513 			info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
2514 			error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
2515 			if (error != 0)
2516 				goto done;
2517 			if (w->w_req && w->w_tmem) {
2518 				if (w->w_op == NET_RT_IFLISTL)
2519 					error = sysctl_iflist_ifaml(ifa, &info,
2520 					    w, len);
2521 				else
2522 					error = sysctl_iflist_ifam(ifa, &info,
2523 					    w, len);
2524 				if (error)
2525 					goto done;
2526 			}
2527 		}
2528 		info.rti_info[RTAX_IFA] = NULL;
2529 		info.rti_info[RTAX_NETMASK] = NULL;
2530 		info.rti_info[RTAX_BRD] = NULL;
2531 	}
2532 done:
2533 	return (error);
2534 }
2535 
2536 static int
sysctl_ifmalist(int af,struct walkarg * w)2537 sysctl_ifmalist(int af, struct walkarg *w)
2538 {
2539 	struct rt_addrinfo info;
2540 	struct ifaddr *ifa;
2541 	struct ifmultiaddr *ifma;
2542 	struct ifnet *ifp;
2543 	int error, len;
2544 
2545 	NET_EPOCH_ASSERT();
2546 
2547 	error = 0;
2548 	bzero((caddr_t)&info, sizeof(info));
2549 
2550 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
2551 		if (w->w_arg && w->w_arg != ifp->if_index)
2552 			continue;
2553 		ifa = ifp->if_addr;
2554 		info.rti_info[RTAX_IFP] = ifa ? ifa->ifa_addr : NULL;
2555 		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2556 			if (af && af != ifma->ifma_addr->sa_family)
2557 				continue;
2558 			if (prison_if(w->w_req->td->td_ucred,
2559 			    ifma->ifma_addr) != 0)
2560 				continue;
2561 			info.rti_info[RTAX_IFA] = ifma->ifma_addr;
2562 			info.rti_info[RTAX_GATEWAY] =
2563 			    (ifma->ifma_addr->sa_family != AF_LINK) ?
2564 			    ifma->ifma_lladdr : NULL;
2565 			error = rtsock_msg_buffer(RTM_NEWMADDR, &info, w, &len);
2566 			if (error != 0)
2567 				break;
2568 			if (w->w_req && w->w_tmem) {
2569 				struct ifma_msghdr *ifmam;
2570 
2571 				ifmam = (struct ifma_msghdr *)w->w_tmem;
2572 				ifmam->ifmam_index = ifma->ifma_ifp->if_index;
2573 				ifmam->ifmam_flags = 0;
2574 				ifmam->ifmam_addrs = info.rti_addrs;
2575 				ifmam->_ifmam_spare1 = 0;
2576 				error = SYSCTL_OUT(w->w_req, w->w_tmem, len);
2577 				if (error != 0)
2578 					break;
2579 			}
2580 		}
2581 		if (error != 0)
2582 			break;
2583 	}
2584 	return (error);
2585 }
2586 
2587 static void
rtable_sysctl_dump(uint32_t fibnum,int family,struct walkarg * w)2588 rtable_sysctl_dump(uint32_t fibnum, int family, struct walkarg *w)
2589 {
2590 	union sockaddr_union sa_dst, sa_mask;
2591 
2592 	w->family = family;
2593 	w->dst = (struct sockaddr *)&sa_dst;
2594 	w->mask = (struct sockaddr *)&sa_mask;
2595 
2596 	init_sockaddrs_family(family, w->dst, w->mask);
2597 
2598 	rib_walk(fibnum, family, false, sysctl_dumpentry, w);
2599 }
2600 
2601 static int
sysctl_rtsock(SYSCTL_HANDLER_ARGS)2602 sysctl_rtsock(SYSCTL_HANDLER_ARGS)
2603 {
2604 	struct epoch_tracker et;
2605 	int	*name = (int *)arg1;
2606 	u_int	namelen = arg2;
2607 	struct rib_head *rnh = NULL; /* silence compiler. */
2608 	int	i, lim, error = EINVAL;
2609 	int	fib = 0;
2610 	u_char	af;
2611 	struct	walkarg w;
2612 
2613 	if (namelen < 3)
2614 		return (EINVAL);
2615 
2616 	name++;
2617 	namelen--;
2618 	if (req->newptr)
2619 		return (EPERM);
2620 	if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP || name[1] == NET_RT_NHGRP) {
2621 		if (namelen == 3)
2622 			fib = req->td->td_proc->p_fibnum;
2623 		else if (namelen == 4)
2624 			fib = (name[3] == RT_ALL_FIBS) ?
2625 			    req->td->td_proc->p_fibnum : name[3];
2626 		else
2627 			return ((namelen < 3) ? EISDIR : ENOTDIR);
2628 		if (fib < 0 || fib >= rt_numfibs)
2629 			return (EINVAL);
2630 	} else if (namelen != 3)
2631 		return ((namelen < 3) ? EISDIR : ENOTDIR);
2632 	af = name[0];
2633 	if (af > AF_MAX)
2634 		return (EINVAL);
2635 	bzero(&w, sizeof(w));
2636 	w.w_op = name[1];
2637 	w.w_arg = name[2];
2638 	w.w_req = req;
2639 
2640 	error = sysctl_wire_old_buffer(req, 0);
2641 	if (error)
2642 		return (error);
2643 
2644 	/*
2645 	 * Allocate reply buffer in advance.
2646 	 * All rtsock messages has maximum length of u_short.
2647 	 */
2648 	w.w_tmemsize = 65536;
2649 	w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
2650 
2651 	NET_EPOCH_ENTER(et);
2652 	switch (w.w_op) {
2653 	case NET_RT_DUMP:
2654 	case NET_RT_FLAGS:
2655 		if (af == 0) {			/* dump all tables */
2656 			i = 1;
2657 			lim = AF_MAX;
2658 		} else				/* dump only one table */
2659 			i = lim = af;
2660 
2661 		/*
2662 		 * take care of llinfo entries, the caller must
2663 		 * specify an AF
2664 		 */
2665 		if (w.w_op == NET_RT_FLAGS &&
2666 		    (w.w_arg == 0 || w.w_arg & RTF_LLINFO)) {
2667 			if (af != 0)
2668 				error = lltable_sysctl_dumparp(af, w.w_req);
2669 			else
2670 				error = EINVAL;
2671 			break;
2672 		}
2673 		/*
2674 		 * take care of routing entries
2675 		 */
2676 		for (error = 0; error == 0 && i <= lim; i++) {
2677 			rnh = rt_tables_get_rnh(fib, i);
2678 			if (rnh != NULL) {
2679 				rtable_sysctl_dump(fib, i, &w);
2680 			} else if (af != 0)
2681 				error = EAFNOSUPPORT;
2682 		}
2683 		break;
2684 	case NET_RT_NHOP:
2685 	case NET_RT_NHGRP:
2686 		/* Allow dumping one specific af/fib at a time */
2687 		if (namelen < 4) {
2688 			error = EINVAL;
2689 			break;
2690 		}
2691 		fib = name[3];
2692 		if (fib < 0 || fib > rt_numfibs) {
2693 			error = EINVAL;
2694 			break;
2695 		}
2696 		rnh = rt_tables_get_rnh(fib, af);
2697 		if (rnh == NULL) {
2698 			error = EAFNOSUPPORT;
2699 			break;
2700 		}
2701 		if (w.w_op == NET_RT_NHOP)
2702 			error = nhops_dump_sysctl(rnh, w.w_req);
2703 		else
2704 #ifdef ROUTE_MPATH
2705 			error = nhgrp_dump_sysctl(rnh, w.w_req);
2706 #else
2707 			error = ENOTSUP;
2708 #endif
2709 		break;
2710 	case NET_RT_IFLIST:
2711 	case NET_RT_IFLISTL:
2712 		error = sysctl_iflist(af, &w);
2713 		break;
2714 
2715 	case NET_RT_IFMALIST:
2716 		error = sysctl_ifmalist(af, &w);
2717 		break;
2718 	}
2719 	NET_EPOCH_EXIT(et);
2720 
2721 	free(w.w_tmem, M_TEMP);
2722 	return (error);
2723 }
2724 
2725 static SYSCTL_NODE(_net, PF_ROUTE, routetable, CTLFLAG_RD | CTLFLAG_MPSAFE,
2726     sysctl_rtsock, "Return route tables and interface/address lists");
2727 
2728 /*
2729  * Definitions of protocols supported in the ROUTE domain.
2730  */
2731 
2732 static struct domain routedomain;		/* or at least forward */
2733 
2734 static struct protosw routesw = {
2735 	.pr_type =		SOCK_RAW,
2736 	.pr_flags =		PR_ATOMIC|PR_ADDR,
2737 	.pr_abort =		rts_close,
2738 	.pr_attach =		rts_attach,
2739 	.pr_ctloutput =		rts_ctloutput,
2740 	.pr_detach =		rts_detach,
2741 	.pr_send =		rts_send,
2742 	.pr_shutdown =		rts_shutdown,
2743 	.pr_disconnect =	rts_disconnect,
2744 	.pr_close =		rts_close,
2745 };
2746 
2747 static struct domain routedomain = {
2748 	.dom_family =		PF_ROUTE,
2749 	.dom_name =		"route",
2750 	.dom_nprotosw =		1,
2751 	.dom_protosw =		{ &routesw },
2752 };
2753 
2754 DOMAIN_SET(route);
2755