xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_rts.c (revision cb6207858a9fcc2feaee22e626912fba281ac969)
1 /*
2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * Copyright (c) 1988, 1991, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
39  */
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 /*
44  * This file contains routines that processes routing socket requests.
45  */
46 
47 #include <sys/types.h>
48 #include <sys/stream.h>
49 #include <sys/stropts.h>
50 #include <sys/ddi.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 
56 #include <sys/systm.h>
57 #include <sys/param.h>
58 #include <sys/socket.h>
59 #include <sys/strsun.h>
60 #include <net/if.h>
61 #include <net/route.h>
62 #include <netinet/in.h>
63 #include <net/if_dl.h>
64 #include <netinet/ip6.h>
65 
66 #include <inet/common.h>
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_ftable.h>
72 #include <inet/ip_rts.h>
73 
74 #include <inet/ipclassifier.h>
75 
76 #include <sys/tsol/tndb.h>
77 #include <sys/tsol/tnet.h>
78 
79 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
80 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
81 
82 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
83 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
84     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
85     ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *);
86 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
87     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
88     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
89     ushort_t *src_indexp, sa_family_t *afp, tsol_rtsecattr_t *rtsecattr,
90     int *error);
91 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
92 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
93 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire,
94     sa_family_t af);
95 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
96 static void	ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
97 
98 /*
99  * Send the ack to all the routing queues.  In case of the originating queue,
100  * send it only if the loopback is set.
101  *
102  * Messages are sent upstream only on routing sockets that did not specify an
103  * address family when they were created or when the address family matches the
104  * one specified by the caller.
105  *
106  */
107 void
108 rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af, ip_stack_t *ipst)
109 {
110 	mblk_t	*mp1;
111 	int	checkqfull;
112 	conn_t 	*connp, *next_connp;
113 
114 	mutex_enter(&ipst->ips_rts_clients->connf_lock);
115 	connp = ipst->ips_rts_clients->connf_head;
116 
117 	while (connp != NULL) {
118 		/*
119 		 * If there was a family specified when this routing socket was
120 		 * created and it doesn't match the family of the message to
121 		 * copy, then continue.
122 		 */
123 		if ((connp->conn_proto != AF_UNSPEC) &&
124 		    (connp->conn_proto != af)) {
125 			connp = connp->conn_next;
126 			continue;
127 		}
128 		/*
129 		 * For the originating queue, we only copy the message upstream
130 		 * if loopback is set.  For others reading on the routing
131 		 * socket, we check if there is room upstream for a copy of the
132 		 * message.
133 		 */
134 		if ((q != NULL) && (CONNP_TO_RQ(connp) == RD(q))) {
135 			if (connp->conn_loopback == 0) {
136 				connp = connp->conn_next;
137 				continue;
138 			}
139 			/*
140 			 * Just because it is the same queue doesn't mean it
141 			 * will promptly read its acks. Have to avoid using
142 			 * all of kernel memory.
143 			 */
144 			checkqfull = B_TRUE;
145 		} else {
146 			checkqfull = B_TRUE;
147 		}
148 		CONN_INC_REF(connp);
149 		mutex_exit(&ipst->ips_rts_clients->connf_lock);
150 		if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) {
151 			mp1 = dupmsg(mp);
152 			if (mp1 == NULL)
153 				mp1 = copymsg(mp);
154 			if (mp1 != NULL)
155 				putnext(CONNP_TO_RQ(connp), mp1);
156 		}
157 
158 		mutex_enter(&ipst->ips_rts_clients->connf_lock);
159 		/* Follow the next pointer before releasing the conn. */
160 		next_connp = connp->conn_next;
161 		CONN_DEC_REF(connp);
162 		connp = next_connp;
163 	}
164 	mutex_exit(&ipst->ips_rts_clients->connf_lock);
165 	freemsg(mp);
166 }
167 
168 /*
169  * Takes an ire and sends an ack to all the routing sockets. This
170  * routine is used
171  * - when a route is created/deleted through the ioctl interface.
172  * - when ire_expire deletes a stale redirect
173  */
174 void
175 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
176 {
177 	mblk_t		*mp;
178 	rt_msghdr_t	*rtm;
179 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
180 	sa_family_t	af;
181 	in6_addr_t	gw_addr_v6;
182 
183 	if (ire == NULL)
184 		return;
185 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
186 	    ire->ire_ipversion == IPV6_VERSION);
187 
188 	if (ire->ire_flags & RTF_SETSRC)
189 		rtm_addrs |= RTA_SRC;
190 
191 	switch (ire->ire_ipversion) {
192 	case IPV4_VERSION:
193 		af = AF_INET;
194 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
195 		if (mp == NULL)
196 			return;
197 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
198 		    ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp,
199 		    0, NULL);
200 		break;
201 	case IPV6_VERSION:
202 		af = AF_INET6;
203 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
204 		if (mp == NULL)
205 			return;
206 		mutex_enter(&ire->ire_lock);
207 		gw_addr_v6 = ire->ire_gateway_addr_v6;
208 		mutex_exit(&ire->ire_lock);
209 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
210 		    &ire->ire_mask_v6, &gw_addr_v6,
211 		    &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
212 		    NULL, mp, 0, NULL);
213 		break;
214 	}
215 	rtm = (rt_msghdr_t *)mp->b_rptr;
216 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
217 	rtm->rtm_addrs = rtm_addrs;
218 	rtm->rtm_flags = ire->ire_flags;
219 	if (error != 0)
220 		rtm->rtm_errno = error;
221 	else
222 		rtm->rtm_flags |= RTF_DONE;
223 	rts_queue_input(mp, NULL, af, ipst);
224 }
225 
226 /* ARGSUSED */
227 static void
228 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy)
229 {
230 	(void) ip_rts_request(q, mp, DB_CRED(mp));
231 }
232 
233 /*
234  * Processes requests received on a routing socket. It extracts all the
235  * arguments and calls the appropriate function to process the request.
236  *
237  * RTA_SRC bit flag requests are sent by mipagent and 'route -setsrc'.
238  * RTA_SRCIFP bit flag requests are sent by mipagent only.
239  *
240  * In general, this function does not consume the message supplied but rather
241  * sends the message upstream with an appropriate UNIX errno.
242  *
243  * We may need to restart this operation if the ipif cannot be looked up
244  * due to an exclusive operation that is currently in progress. The restart
245  * entry point is ip_rts_request_retry. While the request is enqueud in the
246  * ipsq the ioctl could be aborted and the conn close. To ensure that we don't
247  * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is
248  * released at the completion of the rts ioctl at the end of this function
249  * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and
250  * conn close occurs in conn_ioctl_cleanup.
251  */
252 int
253 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
254 {
255 	rt_msghdr_t	*rtm = NULL;
256 	in6_addr_t	dst_addr_v6;
257 	in6_addr_t	src_addr_v6;
258 	in6_addr_t	gw_addr_v6;
259 	in6_addr_t	net_mask_v6;
260 	in6_addr_t	author_v6;
261 	in6_addr_t	if_addr_v6;
262 	mblk_t		*mp1, *ioc_mp = mp;
263 	ire_t		*ire = NULL;
264 	ire_t		*sire = NULL;
265 	int		error = 0;
266 	int		match_flags = MATCH_IRE_DSTONLY;
267 	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
268 	int		found_addrs;
269 	sa_family_t	af;
270 	ipaddr_t	dst_addr;
271 	ipaddr_t	gw_addr;
272 	ipaddr_t	src_addr;
273 	ipaddr_t	net_mask;
274 	ushort_t	index;
275 	ushort_t	src_index;
276 	ipif_t		*ipif = NULL;
277 	ipif_t		*src_ipif = NULL;
278 	ipif_t		*tmp_ipif = NULL;
279 	IOCP		iocp = (IOCP)mp->b_rptr;
280 	conn_t		*connp;
281 	boolean_t	gcgrp_xtraref = B_FALSE;
282 	tsol_gcgrp_addr_t ga;
283 	tsol_rtsecattr_t rtsecattr;
284 	struct rtsa_s	*rtsap = NULL;
285 	tsol_gcgrp_t	*gcgrp = NULL;
286 	tsol_gc_t	*gc = NULL;
287 	ts_label_t	*tsl = NULL;
288 	zoneid_t	zoneid;
289 	ip_stack_t	*ipst;
290 
291 	ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
292 
293 	ASSERT(CONN_Q(q));
294 	connp = Q_TO_CONN(q);
295 	zoneid = connp->conn_zoneid;
296 	ipst = connp->conn_netstack->netstack_ip;
297 
298 	ASSERT(mp->b_cont != NULL);
299 	/* ioc_mp holds mp */
300 	mp = mp->b_cont;
301 
302 	/*
303 	 * The Routing Socket data starts on
304 	 * next block. If there is no next block
305 	 * this is an indication from routing module
306 	 * that it is a routing socket stream queue.
307 	 */
308 	if (mp->b_cont != NULL) {
309 		mp1 = dupmsg(mp->b_cont);
310 		if (mp1 == NULL) {
311 			error  = ENOBUFS;
312 			goto done;
313 		}
314 		mp = mp1;
315 	} else {
316 		/*
317 		 * This is a message from RTS module
318 		 * indicating that this is a Routing Socket
319 		 * Stream. Insert this conn_t in routing
320 		 * socket client list.
321 		 */
322 
323 		connp->conn_loopback = 1;
324 		ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
325 
326 		goto done;
327 	}
328 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
329 		freemsg(mp);
330 		error =  EINVAL;
331 		goto done;
332 	}
333 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
334 		freemsg(mp);
335 		error = EINVAL;
336 		goto done;
337 	}
338 
339 	/*
340 	 * Check the routing message for basic consistency including the
341 	 * version number and that the number of octets written is the same
342 	 * as specified by the rtm_msglen field.
343 	 *
344 	 * At this point, an error can be delivered back via rtm_errno.
345 	 */
346 	rtm = (rt_msghdr_t *)mp->b_rptr;
347 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
348 		error = EINVAL;
349 		goto done;
350 	}
351 	if (rtm->rtm_version != RTM_VERSION) {
352 		error = EPROTONOSUPPORT;
353 		goto done;
354 	}
355 
356 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
357 	if (rtm->rtm_type != RTM_GET &&
358 	    rtm->rtm_type != RTM_RESOLVE &&
359 	    (ioc_cr == NULL ||
360 	    secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
361 		error = EPERM;
362 		goto done;
363 	}
364 
365 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
366 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &src_index, &af,
367 	    &rtsecattr, &error);
368 
369 	if (error != 0)
370 		goto done;
371 
372 	if ((found_addrs & RTA_DST) == 0) {
373 		error = EINVAL;
374 		goto done;
375 	}
376 
377 	/*
378 	 * Based on the address family of the destination address, determine
379 	 * the destination, gateway and netmask and return the appropriate error
380 	 * if an unknown address family was specified (following the errno
381 	 * values that 4.4BSD-Lite2 returns.)
382 	 */
383 	switch (af) {
384 	case AF_INET:
385 		/*
386 		 * RTA_SRCIFP is supported for interface route only.
387 		 * Thus a gateway route with srcifindex is rejected,
388 		 * except if it's a request to add reverse tunnel
389 		 * route.
390 		 */
391 		if ((rtm->rtm_flags & RTF_GATEWAY) &&
392 		    (found_addrs & RTA_SRCIFP) &&
393 		    !(found_addrs & RTA_SRC)) {
394 			error = EINVAL;
395 			goto done;
396 		}
397 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
398 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
399 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
400 		if (((found_addrs & RTA_NETMASK) == 0) ||
401 		    (rtm->rtm_flags & RTF_HOST))
402 			net_mask = IP_HOST_MASK;
403 		else
404 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
405 		break;
406 	case AF_INET6:
407 		/*
408 		 * RTA_SRCIFP is not a valid flag for IPv6 routes.
409 		 */
410 		if (found_addrs & RTA_SRCIFP) {
411 			error = EINVAL;
412 			goto done;
413 		}
414 		if (((found_addrs & RTA_NETMASK) == 0) ||
415 		    (rtm->rtm_flags & RTF_HOST))
416 			net_mask_v6 = ipv6_all_ones;
417 		break;
418 	default:
419 		/*
420 		 * These errno values are meant to be compatible with
421 		 * 4.4BSD-Lite2 for the given message types.
422 		 */
423 		switch (rtm->rtm_type) {
424 		case RTM_ADD:
425 		case RTM_DELETE:
426 			error = ESRCH;
427 			goto done;
428 		case RTM_GET:
429 		case RTM_CHANGE:
430 			error = EAFNOSUPPORT;
431 			goto done;
432 		default:
433 			error = EOPNOTSUPP;
434 			goto done;
435 		}
436 	}
437 
438 	/*
439 	 * At this point, the address family must be something known.
440 	 */
441 	ASSERT(af == AF_INET || af == AF_INET6);
442 
443 	if (index != 0) {
444 		ill_t   *ill;
445 
446 		/*
447 		 * IPC must be refheld somewhere in ip_wput_nondata or
448 		 * ip_wput_ioctl etc... and cleaned up if ioctl is killed.
449 		 * If ILL_CHANGING the request is queued in the ipsq.
450 		 */
451 		ill = ill_lookup_on_ifindex(index, af == AF_INET6,
452 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error,
453 		    ipst);
454 		if (ill == NULL) {
455 			if (error != EINPROGRESS)
456 				error = EINVAL;
457 			goto done;
458 		}
459 
460 		ipif = ipif_get_next_ipif(NULL, ill);
461 		ill_refrele(ill);
462 		/*
463 		 * If this is replacement ipif, prevent a route from
464 		 * being added.
465 		 */
466 		if (ipif != NULL && ipif->ipif_replace_zero) {
467 			error = ENETDOWN;
468 			goto done;
469 		}
470 		match_flags |= MATCH_IRE_ILL;
471 	}
472 
473 	/* RTA_SRCIFP is unsupported on AF_INET6. */
474 	if (af == AF_INET && src_index != 0) {
475 		ill_t   *ill;
476 
477 		/* If ILL_CHANGING the request is queued in the ipsq. */
478 		ill = ill_lookup_on_ifindex(src_index, B_FALSE,
479 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error,
480 		    ipst);
481 		if (ill == NULL) {
482 			if (error != EINPROGRESS)
483 				error = EINVAL;
484 			goto done;
485 		}
486 
487 		src_ipif = ipif_get_next_ipif(NULL, ill);
488 		ill_refrele(ill);
489 	}
490 	/*
491 	 * If a netmask was supplied in the message, then subsequent route
492 	 * lookups will attempt to match on the netmask as well.
493 	 */
494 	if ((found_addrs & RTA_NETMASK) != 0)
495 		match_flags |= MATCH_IRE_MASK;
496 
497 	/*
498 	 * We only process any passed-in route security attributes for
499 	 * either RTM_ADD or RTM_CHANGE message; We overload them
500 	 * to do an RTM_GET as a different label; ignore otherwise.
501 	 */
502 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
503 	    rtm->rtm_type == RTM_GET) {
504 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
505 		if (rtsecattr.rtsa_cnt > 0)
506 			rtsap = &rtsecattr.rtsa_attr[0];
507 	}
508 
509 	switch (rtm->rtm_type) {
510 	case RTM_ADD:
511 		/* if we are adding a route, gateway is a must */
512 		if ((found_addrs & RTA_GATEWAY) == 0) {
513 			error = EINVAL;
514 			goto done;
515 		}
516 
517 		/* Multirouting does not support net routes. */
518 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
519 		    RTF_MULTIRT) {
520 			error = EADDRNOTAVAIL;
521 			goto done;
522 		}
523 
524 		/*
525 		 * Multirouting and user-specified source addresses
526 		 * do not support interface based routing.
527 		 * Assigning a source address to an interface based
528 		 * route is achievable by plumbing a new ipif and
529 		 * setting up the interface route via this ipif,
530 		 * though.
531 		 */
532 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
533 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
534 				error = EADDRNOTAVAIL;
535 				goto done;
536 			}
537 		}
538 
539 		switch (af) {
540 		case AF_INET:
541 			if (src_addr != INADDR_ANY) {
542 				/*
543 				 * If there is a source address, but
544 				 * no RTF_SETSRC modifier, setup a MobileIP
545 				 * reverse tunnel.
546 				 */
547 				if ((rtm->rtm_flags & RTF_SETSRC) == 0) {
548 					error = ip_mrtun_rt_add(src_addr,
549 					    rtm->rtm_flags, ipif,
550 					    src_ipif, &ire, CONNP_TO_WQ(connp),
551 					    ioc_mp, ip_rts_request_retry, ipst);
552 					break;
553 				}
554 				/*
555 				 * The RTF_SETSRC flag is present, check that
556 				 * the supplied src address is not the loopback
557 				 * address. This would produce martian packets.
558 				 */
559 				if (src_addr == htonl(INADDR_LOOPBACK)) {
560 					error = EINVAL;
561 					goto done;
562 				}
563 				/*
564 				 * Also check that the supplied address is a
565 				 * valid, local one.
566 				 */
567 				tmp_ipif = ipif_lookup_addr(src_addr, NULL,
568 				    ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
569 				    ip_rts_request_retry, &error, ipst);
570 				if (tmp_ipif == NULL) {
571 					if (error != EINPROGRESS)
572 						error = EADDRNOTAVAIL;
573 					goto done;
574 				}
575 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
576 				    (tmp_ipif->ipif_flags &
577 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
578 					error = EINVAL;
579 					goto done;
580 				}
581 			} else {
582 				/*
583 				 * The RTF_SETSRC modifier must be associated
584 				 * to a non-null source address.
585 				 */
586 				if (rtm->rtm_flags & RTF_SETSRC) {
587 					error = EINVAL;
588 					goto done;
589 				}
590 			}
591 
592 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
593 			    rtm->rtm_flags, ipif, src_ipif, &ire, B_FALSE,
594 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
595 			    rtsap, ipst);
596 			if (ipif != NULL)
597 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
598 			break;
599 		case AF_INET6:
600 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
601 				/*
602 				 * If there is a source address, but
603 				 * no RTF_SETSRC modifier, reject, as
604 				 * MobileIP IPv6 reverse tunnels are
605 				 * not supported.
606 				 */
607 				if ((rtm->rtm_flags & RTF_SETSRC) == 0) {
608 					error = EINVAL;
609 					goto done;
610 				}
611 				/*
612 				 * The RTF_SETSRC flag is present, check that
613 				 * the supplied src address is not the loopback
614 				 * address. This would produce martian packets.
615 				 */
616 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
617 					error = EINVAL;
618 					goto done;
619 				}
620 				/*
621 				 * Also check that the supplied address is a
622 				 * valid, local one.
623 				 */
624 				tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6,
625 				    NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
626 				    ip_rts_request_retry, &error, ipst);
627 				if (tmp_ipif == NULL) {
628 					if (error != EINPROGRESS)
629 						error = EADDRNOTAVAIL;
630 					goto done;
631 				}
632 
633 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
634 				    (tmp_ipif->ipif_flags &
635 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
636 					error = EINVAL;
637 					goto done;
638 				}
639 
640 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
641 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
642 				    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
643 				    ip_rts_request_retry, rtsap, ipst);
644 				break;
645 			}
646 			/*
647 			 * The RTF_SETSRC modifier must be associated
648 			 * to a non-null source address.
649 			 */
650 			if (rtm->rtm_flags & RTF_SETSRC) {
651 				error = EINVAL;
652 				goto done;
653 			}
654 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
655 			    &gw_addr_v6, NULL, rtm->rtm_flags,
656 			    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
657 			    ip_rts_request_retry, rtsap, ipst);
658 			if (ipif != NULL)
659 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
660 			break;
661 		}
662 		if (error != 0)
663 			goto done;
664 		ASSERT(ire != NULL);
665 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
666 		break;
667 	case RTM_DELETE:
668 		/* if we are deleting a route, gateway is a must */
669 		if ((found_addrs & RTA_GATEWAY) == 0) {
670 			error = EINVAL;
671 			goto done;
672 		}
673 		/*
674 		 * The RTF_SETSRC modifier does not make sense
675 		 * when deleting a route.
676 		 */
677 		if (rtm->rtm_flags & RTF_SETSRC) {
678 			error = EINVAL;
679 			goto done;
680 		}
681 
682 		switch (af) {
683 		case AF_INET:
684 			/*
685 			 * If there is a source address, delete
686 			 * a MobileIP reverse tunnel.
687 			 */
688 			if (src_addr != INADDR_ANY) {
689 				error = ip_mrtun_rt_delete(src_addr,
690 				    src_ipif);
691 				break;
692 			}
693 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
694 			    found_addrs, rtm->rtm_flags, ipif, src_ipif,
695 			    B_FALSE, CONNP_TO_WQ(connp), ioc_mp,
696 			    ip_rts_request_retry, ipst);
697 			break;
698 		case AF_INET6:
699 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
700 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
701 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
702 			    ipst);
703 			break;
704 		}
705 		break;
706 	case RTM_GET:
707 	case RTM_CHANGE:
708 		/*
709 		 * In the case of RTM_GET, the forwarding table should be
710 		 * searched recursively with default being matched if the
711 		 * specific route doesn't exist.  Also, if a gateway was
712 		 * specified then the gateway address must also be matched.
713 		 *
714 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
715 		 * is the new gateway address so matching on the gateway address
716 		 * is not done.  This can lead to ambiguity when looking up the
717 		 * route to change as usually only the destination (and netmask,
718 		 * if supplied) is used for the lookup.  However if a RTA_IFP
719 		 * sockaddr is also supplied, it can disambiguate which route to
720 		 * change provided the ambigous routes are tied to distinct
721 		 * ill's (or interface indices).  If the routes are not tied to
722 		 * any particular interfaces (for example, with traditional
723 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
724 		 * it won't match any such routes.
725 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
726 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
727 		 */
728 		if (((found_addrs & RTA_SRC) != 0) &&
729 		    ((rtm->rtm_type == RTM_GET) ||
730 		    !(rtm->rtm_flags & RTF_SETSRC))) {
731 			error = EOPNOTSUPP;
732 			goto done;
733 		}
734 
735 		if (rtm->rtm_type == RTM_GET) {
736 			match_flags |=
737 			    (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE |
738 			    MATCH_IRE_SECATTR);
739 			match_flags_local |= MATCH_IRE_SECATTR;
740 			if ((found_addrs & RTA_GATEWAY) != 0)
741 				match_flags |= MATCH_IRE_GW;
742 			if (ioc_cr)
743 				tsl = crgetlabel(ioc_cr);
744 			if (rtsap != NULL) {
745 				if (rtsa_validate(rtsap) != 0) {
746 					error = EINVAL;
747 					goto done;
748 				}
749 				if (tsl != NULL &&
750 				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
751 				    (tsl->tsl_doi != rtsap->rtsa_doi ||
752 				    !bldominates(&tsl->tsl_label,
753 				    &rtsap->rtsa_slrange.lower_bound))) {
754 					error = EPERM;
755 					goto done;
756 				}
757 				tsl = labelalloc(
758 				    &rtsap->rtsa_slrange.lower_bound,
759 				    rtsap->rtsa_doi, KM_NOSLEEP);
760 			}
761 		}
762 		if (rtm->rtm_type == RTM_CHANGE) {
763 			if ((found_addrs & RTA_GATEWAY) &&
764 			    (rtm->rtm_flags & RTF_SETSRC)) {
765 				/*
766 				 * Do not want to change the gateway,
767 				 * but rather the source address.
768 				 */
769 				match_flags |= MATCH_IRE_GW;
770 			}
771 		}
772 
773 		/*
774 		 * If the netmask is all ones (either as supplied or as derived
775 		 * above), then first check for an IRE_LOOPBACK or
776 		 * IRE_LOCAL entry.
777 		 *
778 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
779 		 * entry, then look in the forwarding table.
780 		 */
781 		switch (af) {
782 		case AF_INET:
783 			if (net_mask == IP_HOST_MASK) {
784 				ire = ire_ctable_lookup(dst_addr, gw_addr,
785 				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
786 				    tsl, match_flags_local, ipst);
787 				/*
788 				 * If we found an IRE_LOCAL, make sure
789 				 * it is one that would be used by this
790 				 * zone to send packets.
791 				 */
792 				if (ire != NULL &&
793 				    ire->ire_type == IRE_LOCAL &&
794 				    ipst->ips_ip_restrict_interzone_loopback &&
795 				    !ire_local_ok_across_zones(ire,
796 				    zoneid, &dst_addr, tsl, ipst)) {
797 					ire_refrele(ire);
798 					ire = NULL;
799 				}
800 			}
801 			if (ire == NULL) {
802 				ire = ire_ftable_lookup(dst_addr, net_mask,
803 				    gw_addr, 0, ipif, &sire, zoneid, 0,
804 				    tsl, match_flags, ipst);
805 			}
806 			break;
807 		case AF_INET6:
808 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
809 				ire = ire_ctable_lookup_v6(&dst_addr_v6,
810 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
811 				    zoneid, tsl, match_flags_local, ipst);
812 				/*
813 				 * If we found an IRE_LOCAL, make sure
814 				 * it is one that would be used by this
815 				 * zone to send packets.
816 				 */
817 				if (ire != NULL &&
818 				    ire->ire_type == IRE_LOCAL &&
819 				    ipst->ips_ip_restrict_interzone_loopback &&
820 				    !ire_local_ok_across_zones(ire,
821 				    zoneid, (void *)&dst_addr_v6, tsl, ipst)) {
822 					ire_refrele(ire);
823 					ire = NULL;
824 				}
825 			}
826 			if (ire == NULL) {
827 				ire = ire_ftable_lookup_v6(&dst_addr_v6,
828 				    &net_mask_v6, &gw_addr_v6, 0, ipif, &sire,
829 				    zoneid, 0, tsl, match_flags, ipst);
830 			}
831 			break;
832 		}
833 		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
834 			label_rele(tsl);
835 
836 		if (ire == NULL) {
837 			error = ESRCH;
838 			goto done;
839 		}
840 		/* we know the IRE before we come here */
841 		switch (rtm->rtm_type) {
842 		case RTM_GET:
843 			mp1 = rts_rtmget(mp, ire, sire, af);
844 			if (mp1 == NULL) {
845 				error = ENOBUFS;
846 				goto done;
847 			}
848 			freemsg(mp);
849 			mp = mp1;
850 			rtm = (rt_msghdr_t *)mp->b_rptr;
851 			break;
852 		case RTM_CHANGE:
853 			/*
854 			 * Do not allow to the multirouting state of a route
855 			 * to be changed. This aims to prevent undesirable
856 			 * stages where both multirt and non-multirt routes
857 			 * for the same destination are declared.
858 			 */
859 			if ((ire->ire_flags & RTF_MULTIRT) !=
860 			    (rtm->rtm_flags & RTF_MULTIRT)) {
861 				error = EINVAL;
862 				goto done;
863 			}
864 			/*
865 			 * Note that we do not need to do
866 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
867 			 * in metrics or gateway will not affect existing
868 			 * routes since it does not create a more specific
869 			 * route.
870 			 */
871 			switch (af) {
872 			case AF_INET:
873 				ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
874 				if ((found_addrs & RTA_GATEWAY) != 0 &&
875 				    (ire->ire_gateway_addr != gw_addr)) {
876 					ire->ire_gateway_addr = gw_addr;
877 				}
878 
879 				if (rtsap != NULL) {
880 					ga.ga_af = AF_INET;
881 					IN6_IPADDR_TO_V4MAPPED(
882 					    ire->ire_gateway_addr, &ga.ga_addr);
883 
884 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
885 					if (gcgrp == NULL) {
886 						error = ENOMEM;
887 						goto done;
888 					}
889 				}
890 
891 				if ((found_addrs & RTA_SRC) != 0 &&
892 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
893 				    (ire->ire_src_addr != src_addr)) {
894 
895 					if (src_addr != INADDR_ANY) {
896 						/*
897 						 * The RTF_SETSRC flag is
898 						 * present, check that the
899 						 * supplied src address is not
900 						 * the loopback address. This
901 						 * would produce martian
902 						 * packets.
903 						 */
904 						if (src_addr ==
905 						    htonl(INADDR_LOOPBACK)) {
906 							error = EINVAL;
907 							goto done;
908 						}
909 						/*
910 						 * Also check that the the
911 						 * supplied addr is a valid
912 						 * local address.
913 						 */
914 						tmp_ipif = ipif_lookup_addr(
915 						    src_addr, NULL, ALL_ZONES,
916 						    CONNP_TO_WQ(connp), ioc_mp,
917 						    ip_rts_request_retry,
918 						    &error, ipst);
919 						if (tmp_ipif == NULL) {
920 							error = (error ==
921 							    EINPROGRESS) ?
922 							    error :
923 							    EADDRNOTAVAIL;
924 							goto done;
925 						}
926 
927 						if (!(tmp_ipif->ipif_flags &
928 						    IPIF_UP) ||
929 						    (tmp_ipif->ipif_flags &
930 						    (IPIF_NOLOCAL |
931 						    IPIF_ANYCAST))) {
932 							error = EINVAL;
933 							goto done;
934 						}
935 						ire->ire_flags |= RTF_SETSRC;
936 					} else {
937 						ire->ire_flags &= ~RTF_SETSRC;
938 					}
939 					ire->ire_src_addr = src_addr;
940 				}
941 				break;
942 			case AF_INET6:
943 				ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
944 				mutex_enter(&ire->ire_lock);
945 				if ((found_addrs & RTA_GATEWAY) != 0 &&
946 				    !IN6_ARE_ADDR_EQUAL(
947 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
948 					ire->ire_gateway_addr_v6 = gw_addr_v6;
949 				}
950 
951 				if (rtsap != NULL) {
952 					ga.ga_af = AF_INET6;
953 					ga.ga_addr = ire->ire_gateway_addr_v6;
954 
955 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
956 					if (gcgrp == NULL) {
957 						error = ENOMEM;
958 						goto done;
959 					}
960 				}
961 
962 				if ((found_addrs & RTA_SRC) != 0 &&
963 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
964 				    !IN6_ARE_ADDR_EQUAL(
965 					&ire->ire_src_addr_v6, &src_addr_v6)) {
966 
967 					if (!IN6_IS_ADDR_UNSPECIFIED(
968 					    &src_addr_v6)) {
969 						/*
970 						 * The RTF_SETSRC flag is
971 						 * present, check that the
972 						 * supplied src address is not
973 						 * the loopback address. This
974 						 * would produce martian
975 						 * packets.
976 						 */
977 						if (IN6_IS_ADDR_LOOPBACK(
978 						    &src_addr_v6)) {
979 							mutex_exit(
980 							    &ire->ire_lock);
981 							error = EINVAL;
982 							goto done;
983 						}
984 						/*
985 						 * Also check that the the
986 						 * supplied addr is a valid
987 						 * local address.
988 						 */
989 						tmp_ipif = ipif_lookup_addr_v6(
990 						    &src_addr_v6, NULL,
991 						    ALL_ZONES,
992 						    CONNP_TO_WQ(connp), ioc_mp,
993 						    ip_rts_request_retry,
994 						    &error, ipst);
995 						if (tmp_ipif == NULL) {
996 							mutex_exit(
997 							    &ire->ire_lock);
998 							error = (error ==
999 							    EINPROGRESS) ?
1000 							    error :
1001 							    EADDRNOTAVAIL;
1002 							goto done;
1003 						}
1004 						if (!(tmp_ipif->ipif_flags &
1005 						    IPIF_UP) ||
1006 						    (tmp_ipif->ipif_flags &
1007 						    (IPIF_NOLOCAL |
1008 						    IPIF_ANYCAST))) {
1009 							mutex_exit(
1010 							    &ire->ire_lock);
1011 							error = EINVAL;
1012 							goto done;
1013 						}
1014 						ire->ire_flags |= RTF_SETSRC;
1015 					} else {
1016 						ire->ire_flags &= ~RTF_SETSRC;
1017 					}
1018 					ire->ire_src_addr_v6 = src_addr_v6;
1019 				}
1020 				mutex_exit(&ire->ire_lock);
1021 				break;
1022 			}
1023 
1024 			if (rtsap != NULL) {
1025 				in_addr_t ga_addr4;
1026 
1027 				ASSERT(gcgrp != NULL);
1028 
1029 				/*
1030 				 * Create and add the security attribute to
1031 				 * prefix IRE; it will add a reference to the
1032 				 * group upon allocating a new entry.  If it
1033 				 * finds an already-existing entry for the
1034 				 * security attribute, it simply returns it
1035 				 * and no new group reference is made.
1036 				 */
1037 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
1038 				if (gc == NULL ||
1039 				    (error = tsol_ire_init_gwattr(ire,
1040 				    ire->ire_ipversion, gc, NULL)) != 0) {
1041 					if (gc != NULL) {
1042 						GC_REFRELE(gc);
1043 					} else {
1044 						/* gc_create failed */
1045 						error = ENOMEM;
1046 					}
1047 					goto done;
1048 				}
1049 
1050 				/*
1051 				 * Now delete any existing gateway IRE caches
1052 				 * as well as all caches using the gateway,
1053 				 * and allow them to be created on demand
1054 				 * through ip_newroute{_v6}.
1055 				 */
1056 				IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4);
1057 				if (af == AF_INET) {
1058 					ire_clookup_delete_cache_gw(
1059 					    ga_addr4, ALL_ZONES, ipst);
1060 				} else {
1061 					ire_clookup_delete_cache_gw_v6(
1062 					    &ga.ga_addr, ALL_ZONES, ipst);
1063 				}
1064 			}
1065 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
1066 			break;
1067 		}
1068 		break;
1069 	default:
1070 		error = EOPNOTSUPP;
1071 		break;
1072 	}
1073 done:
1074 	if (ire != NULL)
1075 		ire_refrele(ire);
1076 	if (sire != NULL)
1077 		ire_refrele(sire);
1078 	if (ipif != NULL)
1079 		ipif_refrele(ipif);
1080 	if (src_ipif != NULL)
1081 		ipif_refrele(src_ipif);
1082 	if (tmp_ipif != NULL)
1083 		ipif_refrele(tmp_ipif);
1084 
1085 	if (gcgrp_xtraref)
1086 		GCGRP_REFRELE(gcgrp);
1087 
1088 	if (error == EINPROGRESS) {
1089 		if (rtm != NULL)
1090 			freemsg(mp);
1091 		return (error);
1092 	}
1093 	if (rtm != NULL) {
1094 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1095 		if (error != 0) {
1096 			rtm->rtm_errno = error;
1097 			/* Send error ACK */
1098 			ip1dbg(("ip_rts_request: error %d\n", error));
1099 		} else {
1100 			rtm->rtm_flags |= RTF_DONE;
1101 			/* OK ACK already set up by caller except this */
1102 			ip2dbg(("ip_rts_request: OK ACK\n"));
1103 		}
1104 		rts_queue_input(mp, q, af, ipst);
1105 	}
1106 	iocp->ioc_error = error;
1107 	ioc_mp->b_datap->db_type = M_IOCACK;
1108 	if (iocp->ioc_error != 0)
1109 		iocp->ioc_count = 0;
1110 	qreply(q, ioc_mp);
1111 	/* conn was refheld in ip_wput_ioctl. */
1112 	CONN_OPER_PENDING_DONE(connp);
1113 
1114 	return (error);
1115 }
1116 
1117 /*
1118  * Build a reply to the RTM_GET request contained in the given message block
1119  * using the retrieved IRE of the destination address, the parent IRE (if it
1120  * exists) and the address family.
1121  *
1122  * Returns a pointer to a message block containing the reply if successful,
1123  * otherwise NULL is returned.
1124  */
1125 static mblk_t *
1126 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af)
1127 {
1128 	rt_msghdr_t	*rtm;
1129 	rt_msghdr_t	*new_rtm;
1130 	mblk_t		*new_mp;
1131 	int		rtm_addrs;
1132 	int		rtm_flags;
1133 	in6_addr_t	gw_addr_v6;
1134 	tsol_ire_gw_secattr_t *attrp = NULL;
1135 	tsol_gc_t	*gc = NULL;
1136 	tsol_gcgrp_t	*gcgrp = NULL;
1137 	int		sacnt = 0;
1138 
1139 	ASSERT(ire->ire_ipif != NULL);
1140 	rtm = (rt_msghdr_t *)mp->b_rptr;
1141 
1142 	if (sire != NULL && sire->ire_gw_secattr != NULL)
1143 		attrp = sire->ire_gw_secattr;
1144 	else if (ire->ire_gw_secattr != NULL)
1145 		attrp = ire->ire_gw_secattr;
1146 
1147 	if (attrp != NULL) {
1148 		mutex_enter(&attrp->igsa_lock);
1149 		if ((gc = attrp->igsa_gc) != NULL) {
1150 			gcgrp = gc->gc_grp;
1151 			ASSERT(gcgrp != NULL);
1152 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1153 			sacnt = 1;
1154 		} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {
1155 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1156 			gc = gcgrp->gcgrp_head;
1157 			sacnt = gcgrp->gcgrp_count;
1158 		}
1159 		mutex_exit(&attrp->igsa_lock);
1160 
1161 		/* do nothing if there's no gc to report */
1162 		if (gc == NULL) {
1163 			ASSERT(sacnt == 0);
1164 			if (gcgrp != NULL) {
1165 				/* we might as well drop the lock now */
1166 				rw_exit(&gcgrp->gcgrp_rwlock);
1167 				gcgrp = NULL;
1168 			}
1169 			attrp = NULL;
1170 		}
1171 
1172 		ASSERT(gc == NULL || (gcgrp != NULL &&
1173 		    RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)));
1174 	}
1175 	ASSERT(sacnt == 0 || gc != NULL);
1176 
1177 	/*
1178 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1179 	 *
1180 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1181 	 * RTA_IFP and RTA_IFA if either is defined, and also
1182 	 * returns RTA_BRD if the appropriate interface is
1183 	 * point-to-point.
1184 	 */
1185 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1186 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
1187 		rtm_addrs |= (RTA_IFP | RTA_IFA);
1188 		if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT)
1189 			rtm_addrs |= RTA_BRD;
1190 	}
1191 
1192 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt);
1193 	if (new_mp == NULL) {
1194 		if (gcgrp != NULL)
1195 			rw_exit(&gcgrp->gcgrp_rwlock);
1196 		return (NULL);
1197 	}
1198 
1199 	/*
1200 	 * We set the destination address, gateway address,
1201 	 * netmask and flags in the RTM_GET response depending
1202 	 * on whether we found a parent IRE or not.
1203 	 * In particular, if we did find a parent IRE during the
1204 	 * recursive search, use that IRE's gateway address.
1205 	 * Otherwise, we use the IRE's source address for the
1206 	 * gateway address.
1207 	 */
1208 	ASSERT(af == AF_INET || af == AF_INET6);
1209 	switch (af) {
1210 	case AF_INET:
1211 		if (sire == NULL) {
1212 			rtm_flags = ire->ire_flags;
1213 			rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1214 			    ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr,
1215 			    ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif,
1216 			    new_mp, sacnt, gc);
1217 		} else {
1218 			if (sire->ire_flags & RTF_SETSRC)
1219 				rtm_addrs |= RTA_SRC;
1220 
1221 			rtm_flags = sire->ire_flags;
1222 			rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr,
1223 			    sire->ire_mask, sire->ire_gateway_addr,
1224 			    (sire->ire_flags & RTF_SETSRC) ?
1225 				sire->ire_src_addr : ire->ire_src_addr,
1226 			    ire->ire_ipif->ipif_pp_dst_addr,
1227 			    0, ire->ire_ipif, new_mp, sacnt, gc);
1228 		}
1229 		break;
1230 	case AF_INET6:
1231 		if (sire == NULL) {
1232 			rtm_flags = ire->ire_flags;
1233 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1234 			    &ire->ire_mask_v6, &ire->ire_src_addr_v6,
1235 			    &ire->ire_src_addr_v6,
1236 			    &ire->ire_ipif->ipif_v6pp_dst_addr,
1237 			    &ipv6_all_zeros, ire->ire_ipif, new_mp,
1238 			    sacnt, gc);
1239 		} else {
1240 			if (sire->ire_flags & RTF_SETSRC)
1241 				rtm_addrs |= RTA_SRC;
1242 
1243 			rtm_flags = sire->ire_flags;
1244 			mutex_enter(&sire->ire_lock);
1245 			gw_addr_v6 = sire->ire_gateway_addr_v6;
1246 			mutex_exit(&sire->ire_lock);
1247 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6,
1248 			    &sire->ire_mask_v6, &gw_addr_v6,
1249 			    (sire->ire_flags & RTF_SETSRC) ?
1250 				&sire->ire_src_addr_v6 : &ire->ire_src_addr_v6,
1251 			    &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1252 			    ire->ire_ipif, new_mp, sacnt, gc);
1253 		}
1254 		break;
1255 	}
1256 
1257 	if (gcgrp != NULL)
1258 		rw_exit(&gcgrp->gcgrp_rwlock);
1259 
1260 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1261 
1262 	/*
1263 	 * The rtm_msglen, rtm_version and rtm_type fields in
1264 	 * RTM_GET response are filled in by rts_fill_msg.
1265 	 *
1266 	 * rtm_addrs and rtm_flags are filled in based on what
1267 	 * was requested and the state of the IREs looked up
1268 	 * above.
1269 	 *
1270 	 * rtm_inits and rtm_rmx are filled in with metrics
1271 	 * based on whether a parent IRE was found or not.
1272 	 *
1273 	 * TODO: rtm_index and rtm_use should probably be
1274 	 * filled in with something resonable here and not just
1275 	 * copied from the request.
1276 	 */
1277 	new_rtm->rtm_index = rtm->rtm_index;
1278 	new_rtm->rtm_pid = rtm->rtm_pid;
1279 	new_rtm->rtm_seq = rtm->rtm_seq;
1280 	new_rtm->rtm_use = rtm->rtm_use;
1281 	new_rtm->rtm_addrs = rtm_addrs;
1282 	new_rtm->rtm_flags = rtm_flags;
1283 	if (sire == NULL)
1284 		new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
1285 	else
1286 		new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx);
1287 
1288 	return (new_mp);
1289 }
1290 
1291 /*
1292  * Fill the given if_data_t with interface statistics.
1293  */
1294 static void
1295 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1296 {
1297 	if_data->ifi_type = ipif->ipif_type;	/* ethernet, tokenring, etc */
1298 	if_data->ifi_addrlen = 0;		/* media address length */
1299 	if_data->ifi_hdrlen = 0;		/* media header length */
1300 	if_data->ifi_mtu = ipif->ipif_mtu;	/* maximum transmission unit */
1301 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
1302 	if_data->ifi_baudrate = 0;		/* linespeed */
1303 
1304 	if_data->ifi_ipackets = 0;		/* packets received on if */
1305 	if_data->ifi_ierrors = 0;		/* input errors on interface */
1306 	if_data->ifi_opackets = 0;		/* packets sent on interface */
1307 	if_data->ifi_oerrors = 0;		/* output errors on if */
1308 	if_data->ifi_collisions = 0;		/* collisions on csma if */
1309 	if_data->ifi_ibytes = 0;		/* total number received */
1310 	if_data->ifi_obytes = 0;		/* total number sent */
1311 	if_data->ifi_imcasts = 0;		/* multicast packets received */
1312 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
1313 	if_data->ifi_iqdrops = 0;		/* dropped on input */
1314 	if_data->ifi_noproto = 0;		/* destined for unsupported */
1315 						/* protocol. */
1316 }
1317 
1318 /*
1319  * Set the metrics on a forwarding table route.
1320  */
1321 static void
1322 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1323 {
1324 	clock_t		rtt;
1325 	clock_t		rtt_sd;
1326 	ipif_t		*ipif;
1327 	ifrt_t		*ifrt;
1328 	mblk_t		*mp;
1329 	in6_addr_t	gw_addr_v6;
1330 
1331 	/*
1332 	 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the
1333 	 * common case of no metrics.
1334 	 */
1335 	if (which == 0)
1336 		return;
1337 	ire->ire_uinfo.iulp_set = B_TRUE;
1338 
1339 	/*
1340 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1341 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1342 	 * microseconds.
1343 	 */
1344 	if (which & RTV_RTT)
1345 		rtt = metrics->rmx_rtt / 1000;
1346 	if (which & RTV_RTTVAR)
1347 		rtt_sd = metrics->rmx_rttvar / 1000;
1348 
1349 	/*
1350 	 * Update the metrics in the IRE itself.
1351 	 */
1352 	mutex_enter(&ire->ire_lock);
1353 	if (which & RTV_MTU)
1354 		ire->ire_max_frag = metrics->rmx_mtu;
1355 	if (which & RTV_RTT)
1356 		ire->ire_uinfo.iulp_rtt = rtt;
1357 	if (which & RTV_SSTHRESH)
1358 		ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh;
1359 	if (which & RTV_RTTVAR)
1360 		ire->ire_uinfo.iulp_rtt_sd = rtt_sd;
1361 	if (which & RTV_SPIPE)
1362 		ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe;
1363 	if (which & RTV_RPIPE)
1364 		ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe;
1365 	mutex_exit(&ire->ire_lock);
1366 
1367 	/*
1368 	 * Search through the ifrt_t chain hanging off the IPIF in order to
1369 	 * reflect the metric change there.
1370 	 */
1371 	ipif = ire->ire_ipif;
1372 	if (ipif == NULL)
1373 		return;
1374 	ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1375 	    ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1376 	if (ipif->ipif_isv6) {
1377 		mutex_enter(&ire->ire_lock);
1378 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1379 		mutex_exit(&ire->ire_lock);
1380 	}
1381 	mutex_enter(&ipif->ipif_saved_ire_lock);
1382 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1383 		/*
1384 		 * On a given ipif, the triple of address, gateway and mask is
1385 		 * unique for each saved IRE (in the case of ordinary interface
1386 		 * routes, the gateway address is all-zeroes).
1387 		 */
1388 		ifrt = (ifrt_t *)mp->b_rptr;
1389 		if (ipif->ipif_isv6) {
1390 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1391 			    &ire->ire_addr_v6) ||
1392 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1393 			    &gw_addr_v6) ||
1394 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1395 			    &ire->ire_mask_v6))
1396 				continue;
1397 		} else {
1398 			if (ifrt->ifrt_addr != ire->ire_addr ||
1399 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1400 			    ifrt->ifrt_mask != ire->ire_mask)
1401 				continue;
1402 		}
1403 		if (which & RTV_MTU)
1404 			ifrt->ifrt_max_frag = metrics->rmx_mtu;
1405 		if (which & RTV_RTT)
1406 			ifrt->ifrt_iulp_info.iulp_rtt = rtt;
1407 		if (which & RTV_SSTHRESH) {
1408 			ifrt->ifrt_iulp_info.iulp_ssthresh =
1409 			    metrics->rmx_ssthresh;
1410 		}
1411 		if (which & RTV_RTTVAR)
1412 			ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar;
1413 		if (which & RTV_SPIPE)
1414 			ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe;
1415 		if (which & RTV_RPIPE)
1416 			ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe;
1417 		break;
1418 	}
1419 	mutex_exit(&ipif->ipif_saved_ire_lock);
1420 }
1421 
1422 /*
1423  * Get the metrics from a forwarding table route.
1424  */
1425 static int
1426 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
1427 {
1428 	int	metrics_set = 0;
1429 
1430 	bzero(metrics, sizeof (rt_metrics_t));
1431 	/*
1432 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1433 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1434 	 * microseconds.
1435 	 */
1436 	metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000;
1437 	metrics_set |= RTV_RTT;
1438 	metrics->rmx_mtu = ire->ire_max_frag;
1439 	metrics_set |= RTV_MTU;
1440 	metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh;
1441 	metrics_set |= RTV_SSTHRESH;
1442 	metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000;
1443 	metrics_set |= RTV_RTTVAR;
1444 	metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe;
1445 	metrics_set |= RTV_SPIPE;
1446 	metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe;
1447 	metrics_set |= RTV_RPIPE;
1448 	return (metrics_set);
1449 }
1450 
1451 /*
1452  * Takes a pointer to a routing message and extracts necessary info by looking
1453  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1454  * passed (all of which must be valid).
1455  *
1456  * The bitmask of sockaddrs actually found in the message is returned, or zero
1457  * is returned in the case of an error.
1458  */
1459 static int
1460 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1461     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1462     in6_addr_t *in_src_addrp, ushort_t *indexp, ushort_t *src_indexp,
1463     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error)
1464 {
1465 	struct sockaddr *sa;
1466 	int	i;
1467 	int	addr_bits;
1468 	int	length;
1469 	int	found_addrs = 0;
1470 	caddr_t	cp;
1471 	size_t	size;
1472 	struct sockaddr_dl *sdl;
1473 
1474 	*dst_addrp = ipv6_all_zeros;
1475 	*gw_addrp = ipv6_all_zeros;
1476 	*net_maskp = ipv6_all_zeros;
1477 	*authorp = ipv6_all_zeros;
1478 	*if_addrp = ipv6_all_zeros;
1479 	*in_src_addrp = ipv6_all_zeros;
1480 	*indexp = 0;
1481 	*src_indexp = 0;
1482 	*afp = AF_UNSPEC;
1483 	rtsecattr->rtsa_cnt = 0;
1484 	*error = 0;
1485 
1486 	/*
1487 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1488 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
1489 	 */
1490 	cp = (caddr_t)&rtm[1];
1491 	length = rtm->rtm_msglen;
1492 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1493 		/*
1494 		 * The address family we are working with starts out as
1495 		 * AF_UNSPEC, but is set to the one specified with the
1496 		 * destination address.
1497 		 *
1498 		 * If the "working" address family that has been set to
1499 		 * something other than AF_UNSPEC, then the address family of
1500 		 * subsequent sockaddrs must either be AF_UNSPEC (for
1501 		 * compatibility with older programs) or must be the same as our
1502 		 * "working" one.
1503 		 *
1504 		 * This code assumes that RTA_DST (1) comes first in the loop.
1505 		 */
1506 		sa = (struct sockaddr *)cp;
1507 		addr_bits = (rtm->rtm_addrs & (1 << i));
1508 		if (addr_bits == 0)
1509 			continue;
1510 		switch (addr_bits) {
1511 		case RTA_DST:
1512 			size = rts_copyfromsockaddr(sa, dst_addrp);
1513 			*afp = sa->sa_family;
1514 			break;
1515 		case RTA_GATEWAY:
1516 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1517 				return (0);
1518 			size = rts_copyfromsockaddr(sa, gw_addrp);
1519 			break;
1520 		case RTA_NETMASK:
1521 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1522 				return (0);
1523 			size = rts_copyfromsockaddr(sa, net_maskp);
1524 			break;
1525 		case RTA_IFP:
1526 			if (sa->sa_family != AF_LINK &&
1527 			    sa->sa_family != AF_UNSPEC)
1528 				return (0);
1529 			sdl = (struct sockaddr_dl *)cp;
1530 			*indexp = sdl->sdl_index;
1531 			size = sizeof (struct sockaddr_dl);
1532 			break;
1533 		case RTA_SRC:
1534 			/* Source address of the incoming packet */
1535 			size = rts_copyfromsockaddr(sa, in_src_addrp);
1536 			*afp = sa->sa_family;
1537 			break;
1538 		case RTA_SRCIFP:
1539 			/* Return incoming interface index pointer */
1540 			if (sa->sa_family != AF_LINK &&
1541 			    sa->sa_family != AF_UNSPEC)
1542 				return (0);
1543 			sdl = (struct sockaddr_dl *)cp;
1544 			*src_indexp = sdl->sdl_index;
1545 			size = sizeof (struct sockaddr_dl);
1546 			break;
1547 		case RTA_IFA:
1548 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1549 				return (0);
1550 			size = rts_copyfromsockaddr(sa, if_addrp);
1551 			break;
1552 		case RTA_AUTHOR:
1553 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1554 				return (0);
1555 			size = rts_copyfromsockaddr(sa, authorp);
1556 			break;
1557 		default:
1558 			return (0);
1559 		}
1560 		if (size == 0)
1561 			return (0);
1562 		cp += size;
1563 		found_addrs |= addr_bits;
1564 	}
1565 
1566 	/*
1567 	 * Parse the routing message and look for any security-
1568 	 * related attributes for the route.  For each valid
1569 	 * attribute, allocate/obtain the corresponding kernel
1570 	 * route security attributes.
1571 	 */
1572 	*error = tsol_rtsa_init(rtm, rtsecattr, cp);
1573 	ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1574 
1575 	return (found_addrs);
1576 }
1577 
1578 /*
1579  * Fills the message with the given info.
1580  */
1581 static void
1582 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1583     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1584     const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc)
1585 {
1586 	rt_msghdr_t	*rtm;
1587 	sin_t		*sin;
1588 	size_t		data_size, header_size;
1589 	uchar_t		*cp;
1590 	int		i;
1591 
1592 	ASSERT(mp != NULL);
1593 	ASSERT(sacnt == 0 || gc != NULL);
1594 	/*
1595 	 * First find the type of the message
1596 	 * and its length.
1597 	 */
1598 	header_size = rts_header_msg_size(type);
1599 	/*
1600 	 * Now find the size of the data
1601 	 * that follows the message header.
1602 	 */
1603 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt);
1604 
1605 	rtm = (rt_msghdr_t *)mp->b_rptr;
1606 	mp->b_wptr = &mp->b_rptr[header_size];
1607 	cp = mp->b_wptr;
1608 	bzero(cp, data_size);
1609 	for (i = 0; i < RTA_NUMBITS; i++) {
1610 		sin = (sin_t *)cp;
1611 		switch (rtm_addrs & (1 << i)) {
1612 		case RTA_DST:
1613 			sin->sin_addr.s_addr = dst;
1614 			sin->sin_family = AF_INET;
1615 			cp += sizeof (sin_t);
1616 			break;
1617 		case RTA_GATEWAY:
1618 			sin->sin_addr.s_addr = gateway;
1619 			sin->sin_family = AF_INET;
1620 			cp += sizeof (sin_t);
1621 			break;
1622 		case RTA_NETMASK:
1623 			sin->sin_addr.s_addr = mask;
1624 			sin->sin_family = AF_INET;
1625 			cp += sizeof (sin_t);
1626 			break;
1627 		case RTA_IFP:
1628 			cp += ill_dls_info((struct sockaddr_dl *)cp, ipif);
1629 			break;
1630 		case RTA_SRCIFP:
1631 			/*
1632 			 * RTA_SRCIFP is not yet supported
1633 			 * for RTM_GET and RTM_CHANGE
1634 			 */
1635 			break;
1636 		case RTA_IFA:
1637 		case RTA_SRC:
1638 			sin->sin_addr.s_addr = src_addr;
1639 			sin->sin_family = AF_INET;
1640 			cp += sizeof (sin_t);
1641 			break;
1642 		case RTA_AUTHOR:
1643 			sin->sin_addr.s_addr = author;
1644 			sin->sin_family = AF_INET;
1645 			cp += sizeof (sin_t);
1646 			break;
1647 		case RTA_BRD:
1648 			/*
1649 			 * RTA_BRD is used typically to specify a point-to-point
1650 			 * destination address.
1651 			 */
1652 			sin->sin_addr.s_addr = brd_addr;
1653 			sin->sin_family = AF_INET;
1654 			cp += sizeof (sin_t);
1655 			break;
1656 		}
1657 	}
1658 
1659 	if (gc != NULL) {
1660 		rtm_ext_t *rtm_ext;
1661 		struct rtsa_s *rp_dst;
1662 		tsol_rtsecattr_t *rsap;
1663 		int i;
1664 
1665 		ASSERT(gc->gc_grp != NULL);
1666 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1667 		ASSERT(sacnt > 0);
1668 
1669 		rtm_ext = (rtm_ext_t *)cp;
1670 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1671 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt);
1672 
1673 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1674 		rsap->rtsa_cnt = sacnt;
1675 		rp_dst = rsap->rtsa_attr;
1676 
1677 		for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) {
1678 			ASSERT(gc->gc_db != NULL);
1679 			bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1680 		}
1681 		cp = (uchar_t *)rp_dst;
1682 	}
1683 
1684 	mp->b_wptr = cp;
1685 	mp->b_cont = NULL;
1686 	/*
1687 	 * set the fields that are common to
1688 	 * to different messages.
1689 	 */
1690 	rtm->rtm_msglen = (short)(header_size + data_size);
1691 	rtm->rtm_version = RTM_VERSION;
1692 	rtm->rtm_type = (uchar_t)type;
1693 }
1694 
1695 /*
1696  * Allocates and initializes a routing socket message.
1697  */
1698 mblk_t *
1699 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1700 {
1701 	size_t	length;
1702 	mblk_t	*mp;
1703 
1704 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1705 	mp = allocb(length, BPRI_MED);
1706 	if (mp == NULL)
1707 		return (mp);
1708 	bzero(mp->b_rptr, length);
1709 	return (mp);
1710 }
1711 
1712 /*
1713  * Returns the size of the routing
1714  * socket message header size.
1715  */
1716 size_t
1717 rts_header_msg_size(int type)
1718 {
1719 	switch (type) {
1720 	case RTM_DELADDR:
1721 	case RTM_NEWADDR:
1722 		return (sizeof (ifa_msghdr_t));
1723 	case RTM_IFINFO:
1724 		return (sizeof (if_msghdr_t));
1725 	default:
1726 		return (sizeof (rt_msghdr_t));
1727 	}
1728 }
1729 
1730 /*
1731  * Returns the size of the message needed with the given rtm_addrs and family.
1732  *
1733  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1734  * of the same family (currently either AF_INET or AF_INET6).
1735  */
1736 size_t
1737 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1738 {
1739 	int	i;
1740 	size_t	length = 0;
1741 
1742 	for (i = 0; i < RTA_NUMBITS; i++) {
1743 		switch (rtm_addrs & (1 << i)) {
1744 		case RTA_IFP:
1745 			length += sizeof (struct sockaddr_dl);
1746 			break;
1747 		case RTA_DST:
1748 		case RTA_GATEWAY:
1749 		case RTA_NETMASK:
1750 		case RTA_SRC:
1751 		case RTA_SRCIFP:
1752 		case RTA_IFA:
1753 		case RTA_AUTHOR:
1754 		case RTA_BRD:
1755 			ASSERT(af == AF_INET || af == AF_INET6);
1756 			switch (af) {
1757 			case AF_INET:
1758 				length += sizeof (sin_t);
1759 				break;
1760 			case AF_INET6:
1761 				length += sizeof (sin6_t);
1762 				break;
1763 			}
1764 			break;
1765 		}
1766 	}
1767 	if (sacnt > 0)
1768 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1769 
1770 	return (length);
1771 }
1772 
1773 /*
1774  * This routine is called to generate a message to the routing
1775  * socket indicating that a redirect has occured, a routing lookup
1776  * has failed, or that a protocol has detected timeouts to a particular
1777  * destination. This routine is called for message types RTM_LOSING,
1778  * RTM_REDIRECT, and RTM_MISS.
1779  */
1780 void
1781 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1782     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
1783     ip_stack_t *ipst)
1784 {
1785 	rt_msghdr_t	*rtm;
1786 	mblk_t		*mp;
1787 
1788 	if (rtm_addrs == 0)
1789 		return;
1790 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1791 	if (mp == NULL)
1792 		return;
1793 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1794 	    author, NULL, mp, 0, NULL);
1795 	rtm = (rt_msghdr_t *)mp->b_rptr;
1796 	rtm->rtm_flags = flags;
1797 	rtm->rtm_errno = error;
1798 	rtm->rtm_flags |= RTF_DONE;
1799 	rtm->rtm_addrs = rtm_addrs;
1800 	rts_queue_input(mp, NULL, AF_INET, ipst);
1801 }
1802 
1803 /*
1804  * This routine is called to generate a message to the routing
1805  * socket indicating that the status of a network interface has changed.
1806  * Message type generated RTM_IFINFO.
1807  */
1808 void
1809 ip_rts_ifmsg(const ipif_t *ipif)
1810 {
1811 	if_msghdr_t	*ifm;
1812 	mblk_t		*mp;
1813 	sa_family_t	af;
1814 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1815 
1816 	/*
1817 	 * This message should be generated only
1818 	 * when the physical device is changing
1819 	 * state.
1820 	 */
1821 	if (ipif->ipif_id != 0)
1822 		return;
1823 	if (ipif->ipif_isv6) {
1824 		af = AF_INET6;
1825 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1826 		if (mp == NULL)
1827 			return;
1828 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1829 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1830 		    &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL);
1831 	} else {
1832 		af = AF_INET;
1833 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1834 		if (mp == NULL)
1835 			return;
1836 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp,
1837 		    0, NULL);
1838 	}
1839 	ifm = (if_msghdr_t *)mp->b_rptr;
1840 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1841 	ifm->ifm_flags = ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1842 	    ipif->ipif_ill->ill_phyint->phyint_flags;
1843 	rts_getifdata(&ifm->ifm_data, ipif);
1844 	ifm->ifm_addrs = RTA_IFP;
1845 	rts_queue_input(mp, NULL, af, ipst);
1846 }
1847 
1848 /*
1849  * This is called to generate messages to the routing socket
1850  * indicating a network interface has had addresses associated with it.
1851  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1852  */
1853 void
1854 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif)
1855 {
1856 	int		pass;
1857 	int		ncmd;
1858 	int		rtm_addrs;
1859 	mblk_t		*mp;
1860 	ifa_msghdr_t	*ifam;
1861 	rt_msghdr_t	*rtm;
1862 	sa_family_t	af;
1863 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1864 
1865 	if (ipif->ipif_isv6)
1866 		af = AF_INET6;
1867 	else
1868 		af = AF_INET;
1869 	/*
1870 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1871 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1872 	 */
1873 	for (pass = 1; pass < 3; pass++) {
1874 		if ((cmd == RTM_ADD && pass == 1) ||
1875 		    (cmd == RTM_DELETE && pass == 2)) {
1876 			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
1877 
1878 			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
1879 			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
1880 			if (mp == NULL)
1881 				continue;
1882 			switch (af) {
1883 			case AF_INET:
1884 				rts_fill_msg(ncmd, rtm_addrs, 0,
1885 				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
1886 				    ipif->ipif_pp_dst_addr, 0, ipif, mp,
1887 				    0, NULL);
1888 				break;
1889 			case AF_INET6:
1890 				rts_fill_msg_v6(ncmd, rtm_addrs,
1891 				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
1892 				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
1893 				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1894 				    ipif, mp, 0, NULL);
1895 				break;
1896 			}
1897 			ifam = (ifa_msghdr_t *)mp->b_rptr;
1898 			ifam->ifam_index =
1899 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1900 			ifam->ifam_metric = ipif->ipif_metric;
1901 			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1902 			ifam->ifam_addrs = rtm_addrs;
1903 			rts_queue_input(mp, NULL, af, ipst);
1904 		}
1905 		if ((cmd == RTM_ADD && pass == 2) ||
1906 		    (cmd == RTM_DELETE && pass == 1)) {
1907 			rtm_addrs = (RTA_DST | RTA_NETMASK);
1908 			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
1909 			if (mp == NULL)
1910 				continue;
1911 			switch (af) {
1912 			case AF_INET:
1913 				rts_fill_msg(cmd, rtm_addrs,
1914 				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
1915 				    0, 0, 0, NULL, mp, 0, NULL);
1916 				break;
1917 			case AF_INET6:
1918 				rts_fill_msg_v6(cmd, rtm_addrs,
1919 				    &ipif->ipif_v6lcl_addr,
1920 				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
1921 				    &ipv6_all_zeros, &ipv6_all_zeros,
1922 				    &ipv6_all_zeros, NULL, mp, 0, NULL);
1923 				break;
1924 			}
1925 			rtm = (rt_msghdr_t *)mp->b_rptr;
1926 			rtm->rtm_index =
1927 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1928 			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1929 			rtm->rtm_errno = error;
1930 			if (error == 0)
1931 				rtm->rtm_flags |= RTF_DONE;
1932 			rtm->rtm_addrs = rtm_addrs;
1933 			rts_queue_input(mp, NULL, af, ipst);
1934 		}
1935 	}
1936 }
1937 
1938 /*
1939  * Based on the address family specified in a sockaddr, copy the address field
1940  * into an in6_addr_t.
1941  *
1942  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
1943  * compatibility with programs that leave the family cleared in the sockaddr.
1944  * Callers of rts_copyfromsockaddr should check the family themselves if they
1945  * wish to verify its value.
1946  *
1947  * In the case of AF_INET6, a check is made to ensure that address is not an
1948  * IPv4-mapped address.
1949  */
1950 size_t
1951 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
1952 {
1953 	switch (sa->sa_family) {
1954 	case AF_INET:
1955 	case AF_UNSPEC:
1956 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
1957 		return (sizeof (sin_t));
1958 	case AF_INET6:
1959 		*addrp = ((sin6_t *)sa)->sin6_addr;
1960 		if (IN6_IS_ADDR_V4MAPPED(addrp))
1961 			return (0);
1962 		return (sizeof (sin6_t));
1963 	default:
1964 		return (0);
1965 	}
1966 }
1967