xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_rts.c (revision 8dea286086b540419ab7594c626f1153fe6e99be)
1 /*
2  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * Copyright (c) 1988, 1991, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
39  */
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 /*
44  * This file contains routines that processes routing socket requests.
45  */
46 
47 #include <sys/types.h>
48 #include <sys/stream.h>
49 #include <sys/stropts.h>
50 #include <sys/ddi.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 
56 #include <sys/systm.h>
57 #include <sys/param.h>
58 #include <sys/socket.h>
59 #include <sys/strsun.h>
60 #include <net/if.h>
61 #include <net/route.h>
62 #include <netinet/in.h>
63 #include <net/if_dl.h>
64 #include <netinet/ip6.h>
65 
66 #include <inet/common.h>
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_ftable.h>
72 #include <inet/ip_rts.h>
73 
74 #include <inet/ipclassifier.h>
75 
76 #include <sys/tsol/tndb.h>
77 #include <sys/tsol/tnet.h>
78 
79 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
80 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
81 
82 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
83 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
84     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
85     ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *);
86 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
87     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
88     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
89     ushort_t *src_indexp, sa_family_t *afp, tsol_rtsecattr_t *rtsecattr,
90     int *error);
91 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
92 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
93 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire,
94     sa_family_t af);
95 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
96 static void	ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
97 
98 /*
99  * Send the ack to all the routing queues.  In case of the originating queue,
100  * send it only if the loopback is set.
101  *
102  * Messages are sent upstream only on routing sockets that did not specify an
103  * address family when they were created or when the address family matches the
104  * one specified by the caller.
105  *
106  */
107 void
108 rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af)
109 {
110 	mblk_t	*mp1;
111 	int	checkqfull;
112 	conn_t 	*connp, *next_connp;
113 
114 	mutex_enter(&rts_clients.connf_lock);
115 	connp = rts_clients.connf_head;
116 
117 	while (connp != NULL) {
118 		/*
119 		 * If there was a family specified when this routing socket was
120 		 * created and it doesn't match the family of the message to
121 		 * copy, then continue.
122 		 */
123 		if ((connp->conn_proto != AF_UNSPEC) &&
124 		    (connp->conn_proto != af)) {
125 			connp = connp->conn_next;
126 			continue;
127 		}
128 		/*
129 		 * For the originating queue, we only copy the message upstream
130 		 * if loopback is set.  For others reading on the routing
131 		 * socket, we check if there is room upstream for a copy of the
132 		 * message.
133 		 */
134 		if ((q != NULL) && (CONNP_TO_RQ(connp) == RD(q))) {
135 			if (connp->conn_loopback == 0) {
136 				connp = connp->conn_next;
137 				continue;
138 			}
139 			checkqfull = B_FALSE;
140 		} else {
141 			checkqfull = B_TRUE;
142 		}
143 		CONN_INC_REF(connp);
144 		mutex_exit(&rts_clients.connf_lock);
145 		if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) {
146 			mp1 = dupmsg(mp);
147 			if (mp1 == NULL)
148 				mp1 = copymsg(mp);
149 			if (mp1 != NULL)
150 				putnext(CONNP_TO_RQ(connp), mp1);
151 		}
152 
153 		mutex_enter(&rts_clients.connf_lock);
154 		/* Follow the next pointer before releasing the conn. */
155 		next_connp = connp->conn_next;
156 		CONN_DEC_REF(connp);
157 		connp = next_connp;
158 	}
159 	mutex_exit(&rts_clients.connf_lock);
160 	freemsg(mp);
161 }
162 
163 /*
164  * Takes an ire and sends an ack to all the routing sockets. This
165  * routine is used
166  * - when a route is created/deleted through the ioctl interface.
167  * - when ire_expire deletes a stale redirect
168  */
169 void
170 ip_rts_rtmsg(int type, ire_t *ire, int error)
171 {
172 	mblk_t		*mp;
173 	rt_msghdr_t	*rtm;
174 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
175 	sa_family_t	af;
176 	in6_addr_t	gw_addr_v6;
177 
178 	if (ire == NULL)
179 		return;
180 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
181 	    ire->ire_ipversion == IPV6_VERSION);
182 
183 	if (ire->ire_flags & RTF_SETSRC)
184 		rtm_addrs |= RTA_SRC;
185 
186 	switch (ire->ire_ipversion) {
187 	case IPV4_VERSION:
188 		af = AF_INET;
189 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
190 		if (mp == NULL)
191 			return;
192 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
193 		    ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp,
194 		    0, NULL);
195 		break;
196 	case IPV6_VERSION:
197 		af = AF_INET6;
198 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
199 		if (mp == NULL)
200 			return;
201 		mutex_enter(&ire->ire_lock);
202 		gw_addr_v6 = ire->ire_gateway_addr_v6;
203 		mutex_exit(&ire->ire_lock);
204 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
205 		    &ire->ire_mask_v6, &gw_addr_v6,
206 		    &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
207 		    NULL, mp, 0, NULL);
208 		break;
209 	}
210 	rtm = (rt_msghdr_t *)mp->b_rptr;
211 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
212 	rtm->rtm_addrs = rtm_addrs;
213 	rtm->rtm_flags = ire->ire_flags;
214 	if (error != 0)
215 		rtm->rtm_errno = error;
216 	else
217 		rtm->rtm_flags |= RTF_DONE;
218 	rts_queue_input(mp, NULL, af);
219 }
220 
221 /* ARGSUSED */
222 static void
223 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy)
224 {
225 	(void) ip_rts_request(q, mp, DB_CRED(mp));
226 }
227 
228 /*
229  * Processes requests received on a routing socket. It extracts all the
230  * arguments and calls the appropriate function to process the request.
231  *
232  * RTA_SRC bit flag requests are sent by mipagent and 'route -setsrc'.
233  * RTA_SRCIFP bit flag requests are sent by mipagent only.
234  *
235  * In general, this function does not consume the message supplied but rather
236  * sends the message upstream with an appropriate UNIX errno.
237  *
238  * We may need to restart this operation if the ipif cannot be looked up
239  * due to an exclusive operation that is currently in progress. The restart
240  * entry point is ip_rts_request_retry. While the request is enqueud in the
241  * ipsq the ioctl could be aborted and the conn close. To ensure that we don't
242  * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is
243  * released at the completion of the rts ioctl at the end of this function
244  * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and
245  * conn close occurs in conn_ioctl_cleanup.
246  */
247 int
248 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
249 {
250 	rt_msghdr_t	*rtm = NULL;
251 	in6_addr_t	dst_addr_v6;
252 	in6_addr_t	src_addr_v6;
253 	in6_addr_t	gw_addr_v6;
254 	in6_addr_t	net_mask_v6;
255 	in6_addr_t	author_v6;
256 	in6_addr_t	if_addr_v6;
257 	mblk_t		*mp1, *ioc_mp = mp;
258 	ire_t		*ire = NULL;
259 	ire_t		*sire = NULL;
260 	int		error = 0;
261 	int		match_flags = MATCH_IRE_DSTONLY;
262 	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
263 	int		found_addrs;
264 	sa_family_t	af;
265 	ipaddr_t	dst_addr;
266 	ipaddr_t	gw_addr;
267 	ipaddr_t	src_addr;
268 	ipaddr_t	net_mask;
269 	ushort_t	index;
270 	ushort_t	src_index;
271 	ipif_t		*ipif = NULL;
272 	ipif_t		*src_ipif = NULL;
273 	ipif_t		*tmp_ipif = NULL;
274 	IOCP		iocp = (IOCP)mp->b_rptr;
275 	conn_t		*connp;
276 	boolean_t	gcgrp_xtraref = B_FALSE;
277 	tsol_gcgrp_addr_t ga;
278 	tsol_rtsecattr_t rtsecattr;
279 	struct rtsa_s	*rtsap = NULL;
280 	tsol_gcgrp_t	*gcgrp = NULL;
281 	tsol_gc_t	*gc = NULL;
282 	ts_label_t	*tsl = NULL;
283 	zoneid_t	zoneid;
284 
285 	ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
286 
287 	ASSERT(CONN_Q(q));
288 	connp = Q_TO_CONN(q);
289 	zoneid = connp->conn_zoneid;
290 
291 	ASSERT(mp->b_cont != NULL);
292 	/* ioc_mp holds mp */
293 	mp = mp->b_cont;
294 
295 	/*
296 	 * The Routing Socket data starts on
297 	 * next block. If there is no next block
298 	 * this is an indication from routing module
299 	 * that it is a routing socket stream queue.
300 	 */
301 	if (mp->b_cont != NULL) {
302 		mp1 = dupmsg(mp->b_cont);
303 		if (mp1 == NULL) {
304 			error  = ENOBUFS;
305 			goto done;
306 		}
307 		mp = mp1;
308 	} else {
309 		/*
310 		 * This is a message from RTS module
311 		 * indicating that this is a Routing Socket
312 		 * Stream. Insert this conn_t in routing
313 		 * socket client list.
314 		 */
315 
316 		connp->conn_loopback = 1;
317 		ipcl_hash_insert_wildcard(&rts_clients, connp);
318 
319 		goto done;
320 	}
321 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
322 		freemsg(mp);
323 		error =  EINVAL;
324 		goto done;
325 	}
326 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
327 		freemsg(mp);
328 		error = EINVAL;
329 		goto done;
330 	}
331 
332 	/*
333 	 * Check the routing message for basic consistency including the
334 	 * version number and that the number of octets written is the same
335 	 * as specified by the rtm_msglen field.
336 	 *
337 	 * At this point, an error can be delivered back via rtm_errno.
338 	 */
339 	rtm = (rt_msghdr_t *)mp->b_rptr;
340 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
341 		error = EINVAL;
342 		goto done;
343 	}
344 	if (rtm->rtm_version != RTM_VERSION) {
345 		error = EPROTONOSUPPORT;
346 		goto done;
347 	}
348 
349 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
350 	if (rtm->rtm_type != RTM_GET &&
351 	    rtm->rtm_type != RTM_RESOLVE &&
352 	    (ioc_cr == NULL ||
353 	    secpolicy_net_config(ioc_cr, B_FALSE) != 0)) {
354 		error = EPERM;
355 		goto done;
356 	}
357 
358 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
359 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &src_index, &af,
360 	    &rtsecattr, &error);
361 
362 	if (error != 0)
363 		goto done;
364 
365 	if ((found_addrs & RTA_DST) == 0) {
366 		error = EINVAL;
367 		goto done;
368 	}
369 
370 	/*
371 	 * Based on the address family of the destination address, determine
372 	 * the destination, gateway and netmask and return the appropriate error
373 	 * if an unknown address family was specified (following the errno
374 	 * values that 4.4BSD-Lite2 returns.)
375 	 */
376 	switch (af) {
377 	case AF_INET:
378 		/*
379 		 * RTA_SRCIFP is supported for interface route only.
380 		 * Thus a gateway route with srcifindex is rejected,
381 		 * except if it's a request to add reverse tunnel
382 		 * route.
383 		 */
384 		if ((rtm->rtm_flags & RTF_GATEWAY) &&
385 		    (found_addrs & RTA_SRCIFP) &&
386 		    !(found_addrs & RTA_SRC)) {
387 			error = EINVAL;
388 			goto done;
389 		}
390 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
391 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
392 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
393 		if (((found_addrs & RTA_NETMASK) == 0) ||
394 		    (rtm->rtm_flags & RTF_HOST))
395 			net_mask = IP_HOST_MASK;
396 		else
397 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
398 		break;
399 	case AF_INET6:
400 		/*
401 		 * RTA_SRCIFP is not a valid flag for IPv6 routes.
402 		 */
403 		if (found_addrs & RTA_SRCIFP) {
404 			error = EINVAL;
405 			goto done;
406 		}
407 		if (((found_addrs & RTA_NETMASK) == 0) ||
408 		    (rtm->rtm_flags & RTF_HOST))
409 			net_mask_v6 = ipv6_all_ones;
410 		break;
411 	default:
412 		/*
413 		 * These errno values are meant to be compatible with
414 		 * 4.4BSD-Lite2 for the given message types.
415 		 */
416 		switch (rtm->rtm_type) {
417 		case RTM_ADD:
418 		case RTM_DELETE:
419 			error = ESRCH;
420 			goto done;
421 		case RTM_GET:
422 		case RTM_CHANGE:
423 			error = EAFNOSUPPORT;
424 			goto done;
425 		default:
426 			error = EOPNOTSUPP;
427 			goto done;
428 		}
429 	}
430 
431 	/*
432 	 * At this point, the address family must be something known.
433 	 */
434 	ASSERT(af == AF_INET || af == AF_INET6);
435 
436 	if (index != 0) {
437 		ill_t   *ill;
438 
439 		/*
440 		 * IPC must be refheld somewhere in ip_wput_nondata or
441 		 * ip_wput_ioctl etc... and cleaned up if ioctl is killed.
442 		 * If ILL_CHANGING the request is queued in the ipsq.
443 		 */
444 		ill = ill_lookup_on_ifindex(index, af == AF_INET6,
445 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error);
446 		if (ill == NULL) {
447 			if (error != EINPROGRESS)
448 				error = EINVAL;
449 			goto done;
450 		}
451 
452 		ipif = ipif_get_next_ipif(NULL, ill);
453 		ill_refrele(ill);
454 		/*
455 		 * If this is replacement ipif, prevent a route from
456 		 * being added.
457 		 */
458 		if (ipif != NULL && ipif->ipif_replace_zero) {
459 			error = ENETDOWN;
460 			goto done;
461 		}
462 		match_flags |= MATCH_IRE_ILL;
463 	}
464 
465 	/* RTA_SRCIFP is unsupported on AF_INET6. */
466 	if (af == AF_INET && src_index != 0) {
467 		ill_t   *ill;
468 
469 		/* If ILL_CHANGING the request is queued in the ipsq. */
470 		ill = ill_lookup_on_ifindex(src_index, B_FALSE,
471 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error);
472 		if (ill == NULL) {
473 			if (error != EINPROGRESS)
474 				error = EINVAL;
475 			goto done;
476 		}
477 
478 		src_ipif = ipif_get_next_ipif(NULL, ill);
479 		ill_refrele(ill);
480 	}
481 	/*
482 	 * If a netmask was supplied in the message, then subsequent route
483 	 * lookups will attempt to match on the netmask as well.
484 	 */
485 	if ((found_addrs & RTA_NETMASK) != 0)
486 		match_flags |= MATCH_IRE_MASK;
487 
488 	/*
489 	 * We only process any passed-in route security attributes for
490 	 * either RTM_ADD or RTM_CHANGE message; We overload them
491 	 * to do an RTM_GET as a different label; ignore otherwise.
492 	 */
493 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
494 	    rtm->rtm_type == RTM_GET) {
495 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
496 		if (rtsecattr.rtsa_cnt > 0)
497 			rtsap = &rtsecattr.rtsa_attr[0];
498 	}
499 
500 	switch (rtm->rtm_type) {
501 	case RTM_ADD:
502 		/* if we are adding a route, gateway is a must */
503 		if ((found_addrs & RTA_GATEWAY) == 0) {
504 			error = EINVAL;
505 			goto done;
506 		}
507 
508 		/* Multirouting does not support net routes. */
509 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
510 		    RTF_MULTIRT) {
511 			error = EADDRNOTAVAIL;
512 			goto done;
513 		}
514 
515 		/*
516 		 * Multirouting and user-specified source addresses
517 		 * do not support interface based routing.
518 		 * Assigning a source address to an interface based
519 		 * route is achievable by plumbing a new ipif and
520 		 * setting up the interface route via this ipif,
521 		 * though.
522 		 */
523 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
524 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
525 				error = EADDRNOTAVAIL;
526 				goto done;
527 			}
528 		}
529 
530 		switch (af) {
531 		case AF_INET:
532 			if (src_addr != INADDR_ANY) {
533 				/*
534 				 * If there is a source address, but
535 				 * no RTF_SETSRC modifier, setup a MobileIP
536 				 * reverse tunnel.
537 				 */
538 				if ((rtm->rtm_flags & RTF_SETSRC) == 0) {
539 					error = ip_mrtun_rt_add(src_addr,
540 					    rtm->rtm_flags, ipif,
541 					    src_ipif, &ire, CONNP_TO_WQ(connp),
542 					    ioc_mp, ip_rts_request_retry);
543 					break;
544 				}
545 				/*
546 				 * The RTF_SETSRC flag is present, check that
547 				 * the supplied src address is not the loopback
548 				 * address. This would produce martian packets.
549 				 */
550 				if (src_addr == htonl(INADDR_LOOPBACK)) {
551 					error = EINVAL;
552 					goto done;
553 				}
554 				/*
555 				 * Also check that the supplied address is a
556 				 * valid, local one.
557 				 */
558 				tmp_ipif = ipif_lookup_addr(src_addr, NULL,
559 				    ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
560 				    ip_rts_request_retry, &error);
561 				if (tmp_ipif == NULL) {
562 					if (error != EINPROGRESS)
563 						error = EADDRNOTAVAIL;
564 					goto done;
565 				}
566 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
567 				    (tmp_ipif->ipif_flags &
568 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
569 					error = EINVAL;
570 					goto done;
571 				}
572 			} else {
573 				/*
574 				 * The RTF_SETSRC modifier must be associated
575 				 * to a non-null source address.
576 				 */
577 				if (rtm->rtm_flags & RTF_SETSRC) {
578 					error = EINVAL;
579 					goto done;
580 				}
581 			}
582 
583 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
584 			    rtm->rtm_flags, ipif, src_ipif, &ire, B_FALSE,
585 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
586 			    rtsap);
587 			if (ipif != NULL)
588 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
589 			break;
590 		case AF_INET6:
591 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
592 				/*
593 				 * If there is a source address, but
594 				 * no RTF_SETSRC modifier, reject, as
595 				 * MobileIP IPv6 reverse tunnels are
596 				 * not supported.
597 				 */
598 				if ((rtm->rtm_flags & RTF_SETSRC) == 0) {
599 					error = EINVAL;
600 					goto done;
601 				}
602 				/*
603 				 * The RTF_SETSRC flag is present, check that
604 				 * the supplied src address is not the loopback
605 				 * address. This would produce martian packets.
606 				 */
607 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
608 					error = EINVAL;
609 					goto done;
610 				}
611 				/*
612 				 * Also check that the supplied address is a
613 				 * valid, local one.
614 				 */
615 				tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6,
616 				    NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
617 				    ip_rts_request_retry, &error);
618 				if (tmp_ipif == NULL) {
619 					if (error != EINPROGRESS)
620 						error = EADDRNOTAVAIL;
621 					goto done;
622 				}
623 
624 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
625 				    (tmp_ipif->ipif_flags &
626 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
627 					error = EINVAL;
628 					goto done;
629 				}
630 
631 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
632 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
633 				    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
634 				    ip_rts_request_retry, rtsap);
635 				break;
636 			}
637 			/*
638 			 * The RTF_SETSRC modifier must be associated
639 			 * to a non-null source address.
640 			 */
641 			if (rtm->rtm_flags & RTF_SETSRC) {
642 				error = EINVAL;
643 				goto done;
644 			}
645 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
646 			    &gw_addr_v6, NULL, rtm->rtm_flags,
647 			    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
648 			    ip_rts_request_retry, rtsap);
649 			if (ipif != NULL)
650 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
651 			break;
652 		}
653 		if (error != 0)
654 			goto done;
655 		ASSERT(ire != NULL);
656 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
657 		break;
658 	case RTM_DELETE:
659 		/* if we are deleting a route, gateway is a must */
660 		if ((found_addrs & RTA_GATEWAY) == 0) {
661 			error = EINVAL;
662 			goto done;
663 		}
664 		/*
665 		 * The RTF_SETSRC modifier does not make sense
666 		 * when deleting a route.
667 		 */
668 		if (rtm->rtm_flags & RTF_SETSRC) {
669 			error = EINVAL;
670 			goto done;
671 		}
672 
673 		switch (af) {
674 		case AF_INET:
675 			/*
676 			 * If there is a source address, delete
677 			 * a MobileIP reverse tunnel.
678 			 */
679 			if (src_addr != INADDR_ANY) {
680 				error = ip_mrtun_rt_delete(src_addr,
681 				    src_ipif);
682 				break;
683 			}
684 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
685 			    found_addrs, rtm->rtm_flags, ipif, src_ipif,
686 			    B_FALSE, CONNP_TO_WQ(connp), ioc_mp,
687 			    ip_rts_request_retry);
688 			break;
689 		case AF_INET6:
690 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
691 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
692 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry);
693 			break;
694 		}
695 		break;
696 	case RTM_GET:
697 	case RTM_CHANGE:
698 		/*
699 		 * In the case of RTM_GET, the forwarding table should be
700 		 * searched recursively with default being matched if the
701 		 * specific route doesn't exist.  Also, if a gateway was
702 		 * specified then the gateway address must also be matched.
703 		 *
704 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
705 		 * is the new gateway address so matching on the gateway address
706 		 * is not done.  This can lead to ambiguity when looking up the
707 		 * route to change as usually only the destination (and netmask,
708 		 * if supplied) is used for the lookup.  However if a RTA_IFP
709 		 * sockaddr is also supplied, it can disambiguate which route to
710 		 * change provided the ambigous routes are tied to distinct
711 		 * ill's (or interface indices).  If the routes are not tied to
712 		 * any particular interfaces (for example, with traditional
713 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
714 		 * it won't match any such routes.
715 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
716 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
717 		 */
718 		if (((found_addrs & RTA_SRC) != 0) &&
719 		    ((rtm->rtm_type == RTM_GET) ||
720 		    !(rtm->rtm_flags & RTF_SETSRC))) {
721 			error = EOPNOTSUPP;
722 			goto done;
723 		}
724 
725 		if (rtm->rtm_type == RTM_GET) {
726 			match_flags |=
727 			    (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE |
728 			    MATCH_IRE_SECATTR);
729 			match_flags_local |= MATCH_IRE_SECATTR;
730 			if ((found_addrs & RTA_GATEWAY) != 0)
731 				match_flags |= MATCH_IRE_GW;
732 			if (ioc_cr)
733 				tsl = crgetlabel(ioc_cr);
734 			if (rtsap != NULL) {
735 				if (rtsa_validate(rtsap) != 0) {
736 					error = EINVAL;
737 					goto done;
738 				}
739 				if (tsl != NULL &&
740 				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
741 				    (tsl->tsl_doi != rtsap->rtsa_doi ||
742 				    !bldominates(&tsl->tsl_label,
743 				    &rtsap->rtsa_slrange.lower_bound))) {
744 					error = EPERM;
745 					goto done;
746 				}
747 				tsl = labelalloc(
748 				    &rtsap->rtsa_slrange.lower_bound,
749 				    rtsap->rtsa_doi, KM_NOSLEEP);
750 			}
751 		}
752 		if (rtm->rtm_type == RTM_CHANGE) {
753 			if ((found_addrs & RTA_GATEWAY) &&
754 			    (rtm->rtm_flags & RTF_SETSRC)) {
755 				/*
756 				 * Do not want to change the gateway,
757 				 * but rather the source address.
758 				 */
759 				match_flags |= MATCH_IRE_GW;
760 			}
761 		}
762 
763 		/*
764 		 * If the netmask is all ones (either as supplied or as derived
765 		 * above), then first check for an IRE_LOOPBACK or
766 		 * IRE_LOCAL entry.
767 		 *
768 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
769 		 * entry, then look in the forwarding table.
770 		 */
771 		switch (af) {
772 		case AF_INET:
773 			if (net_mask == IP_HOST_MASK) {
774 				ire = ire_ctable_lookup(dst_addr, gw_addr,
775 				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
776 				    tsl, match_flags_local);
777 				/*
778 				 * If we found an IRE_LOCAL, make sure
779 				 * it is one that would be used by this
780 				 * zone to send packets.
781 				 */
782 				if (ire != NULL &&
783 				    ire->ire_type == IRE_LOCAL &&
784 				    ip_restrict_interzone_loopback &&
785 				    !ire_local_ok_across_zones(ire,
786 				    zoneid, &dst_addr, tsl)) {
787 					ire_refrele(ire);
788 					ire = NULL;
789 				}
790 			}
791 			if (ire == NULL) {
792 				ire = ire_ftable_lookup(dst_addr, net_mask,
793 				    gw_addr, 0, ipif, &sire, zoneid, 0,
794 				    tsl, match_flags);
795 			}
796 			break;
797 		case AF_INET6:
798 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
799 				ire = ire_ctable_lookup_v6(&dst_addr_v6,
800 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
801 				    zoneid, tsl, match_flags_local);
802 				/*
803 				 * If we found an IRE_LOCAL, make sure
804 				 * it is one that would be used by this
805 				 * zone to send packets.
806 				 */
807 				if (ire != NULL &&
808 				    ire->ire_type == IRE_LOCAL &&
809 				    ip_restrict_interzone_loopback &&
810 				    !ire_local_ok_across_zones(ire,
811 				    zoneid, (void *)&dst_addr_v6, tsl)) {
812 					ire_refrele(ire);
813 					ire = NULL;
814 				}
815 			}
816 			if (ire == NULL) {
817 				ire = ire_ftable_lookup_v6(&dst_addr_v6,
818 				    &net_mask_v6, &gw_addr_v6, 0, ipif, &sire,
819 				    zoneid, 0, tsl, match_flags);
820 			}
821 			break;
822 		}
823 		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
824 			label_rele(tsl);
825 
826 		if (ire == NULL) {
827 			error = ESRCH;
828 			goto done;
829 		}
830 		/* we know the IRE before we come here */
831 		switch (rtm->rtm_type) {
832 		case RTM_GET:
833 			mp1 = rts_rtmget(mp, ire, sire, af);
834 			if (mp1 == NULL) {
835 				error = ENOBUFS;
836 				goto done;
837 			}
838 			freemsg(mp);
839 			mp = mp1;
840 			rtm = (rt_msghdr_t *)mp->b_rptr;
841 			break;
842 		case RTM_CHANGE:
843 			/*
844 			 * Do not allow to the multirouting state of a route
845 			 * to be changed. This aims to prevent undesirable
846 			 * stages where both multirt and non-multirt routes
847 			 * for the same destination are declared.
848 			 */
849 			if ((ire->ire_flags & RTF_MULTIRT) !=
850 			    (rtm->rtm_flags & RTF_MULTIRT)) {
851 				error = EINVAL;
852 				goto done;
853 			}
854 			/*
855 			 * Note that we do not need to do
856 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
857 			 * in metrics or gateway will not affect existing
858 			 * routes since it does not create a more specific
859 			 * route.
860 			 */
861 			switch (af) {
862 			case AF_INET:
863 				ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
864 				if ((found_addrs & RTA_GATEWAY) != 0 &&
865 				    (ire->ire_gateway_addr != gw_addr)) {
866 					ire->ire_gateway_addr = gw_addr;
867 				}
868 
869 				if (rtsap != NULL) {
870 					ga.ga_af = AF_INET;
871 					IN6_IPADDR_TO_V4MAPPED(
872 					    ire->ire_gateway_addr, &ga.ga_addr);
873 
874 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
875 					if (gcgrp == NULL) {
876 						error = ENOMEM;
877 						goto done;
878 					}
879 				}
880 
881 				if ((found_addrs & RTA_SRC) != 0 &&
882 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
883 				    (ire->ire_src_addr != src_addr)) {
884 
885 					if (src_addr != INADDR_ANY) {
886 						/*
887 						 * The RTF_SETSRC flag is
888 						 * present, check that the
889 						 * supplied src address is not
890 						 * the loopback address. This
891 						 * would produce martian
892 						 * packets.
893 						 */
894 						if (src_addr ==
895 						    htonl(INADDR_LOOPBACK)) {
896 							error = EINVAL;
897 							goto done;
898 						}
899 						/*
900 						 * Also check that the the
901 						 * supplied addr is a valid
902 						 * local address.
903 						 */
904 						tmp_ipif = ipif_lookup_addr(
905 						    src_addr, NULL, ALL_ZONES,
906 						    CONNP_TO_WQ(connp), ioc_mp,
907 						    ip_rts_request_retry,
908 						    &error);
909 						if (tmp_ipif == NULL) {
910 							error = (error ==
911 							    EINPROGRESS) ?
912 							    error :
913 							    EADDRNOTAVAIL;
914 							goto done;
915 						}
916 
917 						if (!(tmp_ipif->ipif_flags &
918 						    IPIF_UP) ||
919 						    (tmp_ipif->ipif_flags &
920 						    (IPIF_NOLOCAL |
921 						    IPIF_ANYCAST))) {
922 							error = EINVAL;
923 							goto done;
924 						}
925 						ire->ire_flags |= RTF_SETSRC;
926 					} else {
927 						ire->ire_flags &= ~RTF_SETSRC;
928 					}
929 					ire->ire_src_addr = src_addr;
930 				}
931 				break;
932 			case AF_INET6:
933 				ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
934 				mutex_enter(&ire->ire_lock);
935 				if ((found_addrs & RTA_GATEWAY) != 0 &&
936 				    !IN6_ARE_ADDR_EQUAL(
937 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
938 					ire->ire_gateway_addr_v6 = gw_addr_v6;
939 				}
940 
941 				if (rtsap != NULL) {
942 					ga.ga_af = AF_INET6;
943 					ga.ga_addr = ire->ire_gateway_addr_v6;
944 
945 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
946 					if (gcgrp == NULL) {
947 						error = ENOMEM;
948 						goto done;
949 					}
950 				}
951 
952 				if ((found_addrs & RTA_SRC) != 0 &&
953 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
954 				    !IN6_ARE_ADDR_EQUAL(
955 					&ire->ire_src_addr_v6, &src_addr_v6)) {
956 
957 					if (!IN6_IS_ADDR_UNSPECIFIED(
958 					    &src_addr_v6)) {
959 						/*
960 						 * The RTF_SETSRC flag is
961 						 * present, check that the
962 						 * supplied src address is not
963 						 * the loopback address. This
964 						 * would produce martian
965 						 * packets.
966 						 */
967 						if (IN6_IS_ADDR_LOOPBACK(
968 						    &src_addr_v6)) {
969 							mutex_exit(
970 							    &ire->ire_lock);
971 							error = EINVAL;
972 							goto done;
973 						}
974 						/*
975 						 * Also check that the the
976 						 * supplied addr is a valid
977 						 * local address.
978 						 */
979 						tmp_ipif = ipif_lookup_addr_v6(
980 						    &src_addr_v6, NULL,
981 						    ALL_ZONES,
982 						    CONNP_TO_WQ(connp), ioc_mp,
983 						    ip_rts_request_retry,
984 						    &error);
985 						if (tmp_ipif == NULL) {
986 							mutex_exit(
987 							    &ire->ire_lock);
988 							error = (error ==
989 							    EINPROGRESS) ?
990 							    error :
991 							    EADDRNOTAVAIL;
992 							goto done;
993 						}
994 						if (!(tmp_ipif->ipif_flags &
995 						    IPIF_UP) ||
996 						    (tmp_ipif->ipif_flags &
997 						    (IPIF_NOLOCAL |
998 						    IPIF_ANYCAST))) {
999 							mutex_exit(
1000 							    &ire->ire_lock);
1001 							error = EINVAL;
1002 							goto done;
1003 						}
1004 						ire->ire_flags |= RTF_SETSRC;
1005 					} else {
1006 						ire->ire_flags &= ~RTF_SETSRC;
1007 					}
1008 					ire->ire_src_addr_v6 = src_addr_v6;
1009 				}
1010 				mutex_exit(&ire->ire_lock);
1011 				break;
1012 			}
1013 
1014 			if (rtsap != NULL) {
1015 				in_addr_t ga_addr4;
1016 
1017 				ASSERT(gcgrp != NULL);
1018 
1019 				/*
1020 				 * Create and add the security attribute to
1021 				 * prefix IRE; it will add a reference to the
1022 				 * group upon allocating a new entry.  If it
1023 				 * finds an already-existing entry for the
1024 				 * security attribute, it simply returns it
1025 				 * and no new group reference is made.
1026 				 */
1027 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
1028 				if (gc == NULL ||
1029 				    (error = tsol_ire_init_gwattr(ire,
1030 				    ire->ire_ipversion, gc, NULL)) != 0) {
1031 					if (gc != NULL) {
1032 						GC_REFRELE(gc);
1033 					} else {
1034 						/* gc_create failed */
1035 						error = ENOMEM;
1036 					}
1037 					goto done;
1038 				}
1039 
1040 				/*
1041 				 * Now delete any existing gateway IRE caches
1042 				 * as well as all caches using the gateway,
1043 				 * and allow them to be created on demand
1044 				 * through ip_newroute{_v6}.
1045 				 */
1046 				IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4);
1047 				if (af == AF_INET) {
1048 					ire_clookup_delete_cache_gw(
1049 					    ga_addr4, ALL_ZONES);
1050 				} else {
1051 					ire_clookup_delete_cache_gw_v6(
1052 					    &ga.ga_addr, ALL_ZONES);
1053 				}
1054 			}
1055 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
1056 			break;
1057 		}
1058 		break;
1059 	default:
1060 		error = EOPNOTSUPP;
1061 		break;
1062 	}
1063 done:
1064 	if (ire != NULL)
1065 		ire_refrele(ire);
1066 	if (sire != NULL)
1067 		ire_refrele(sire);
1068 	if (ipif != NULL)
1069 		ipif_refrele(ipif);
1070 	if (src_ipif != NULL)
1071 		ipif_refrele(src_ipif);
1072 	if (tmp_ipif != NULL)
1073 		ipif_refrele(tmp_ipif);
1074 
1075 	if (gcgrp_xtraref)
1076 		GCGRP_REFRELE(gcgrp);
1077 
1078 	if (error == EINPROGRESS) {
1079 		if (rtm != NULL)
1080 			freemsg(mp);
1081 		return (error);
1082 	}
1083 	if (rtm != NULL) {
1084 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1085 		if (error != 0) {
1086 			rtm->rtm_errno = error;
1087 			/* Send error ACK */
1088 			ip1dbg(("ip_rts_request: error %d\n", error));
1089 		} else {
1090 			rtm->rtm_flags |= RTF_DONE;
1091 			/* OK ACK already set up by caller except this */
1092 			ip2dbg(("ip_rts_request: OK ACK\n"));
1093 		}
1094 		rts_queue_input(mp, q, af);
1095 	}
1096 	iocp->ioc_error = error;
1097 	ioc_mp->b_datap->db_type = M_IOCACK;
1098 	if (iocp->ioc_error != 0)
1099 		iocp->ioc_count = 0;
1100 	qreply(q, ioc_mp);
1101 	/* conn was refheld in ip_wput_ioctl. */
1102 	CONN_OPER_PENDING_DONE(connp);
1103 
1104 	return (error);
1105 }
1106 
1107 /*
1108  * Build a reply to the RTM_GET request contained in the given message block
1109  * using the retrieved IRE of the destination address, the parent IRE (if it
1110  * exists) and the address family.
1111  *
1112  * Returns a pointer to a message block containing the reply if successful,
1113  * otherwise NULL is returned.
1114  */
1115 static mblk_t *
1116 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af)
1117 {
1118 	rt_msghdr_t	*rtm;
1119 	rt_msghdr_t	*new_rtm;
1120 	mblk_t		*new_mp;
1121 	int		rtm_addrs;
1122 	int		rtm_flags;
1123 	in6_addr_t	gw_addr_v6;
1124 	tsol_ire_gw_secattr_t *attrp = NULL;
1125 	tsol_gc_t	*gc = NULL;
1126 	tsol_gcgrp_t	*gcgrp = NULL;
1127 	int		sacnt = 0;
1128 
1129 	ASSERT(ire->ire_ipif != NULL);
1130 	rtm = (rt_msghdr_t *)mp->b_rptr;
1131 
1132 	if (sire != NULL && sire->ire_gw_secattr != NULL)
1133 		attrp = sire->ire_gw_secattr;
1134 	else if (ire->ire_gw_secattr != NULL)
1135 		attrp = ire->ire_gw_secattr;
1136 
1137 	if (attrp != NULL) {
1138 		mutex_enter(&attrp->igsa_lock);
1139 		if ((gc = attrp->igsa_gc) != NULL) {
1140 			gcgrp = gc->gc_grp;
1141 			ASSERT(gcgrp != NULL);
1142 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1143 			sacnt = 1;
1144 		} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {
1145 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1146 			gc = gcgrp->gcgrp_head;
1147 			sacnt = gcgrp->gcgrp_count;
1148 		}
1149 		mutex_exit(&attrp->igsa_lock);
1150 
1151 		/* do nothing if there's no gc to report */
1152 		if (gc == NULL) {
1153 			ASSERT(sacnt == 0);
1154 			if (gcgrp != NULL) {
1155 				/* we might as well drop the lock now */
1156 				rw_exit(&gcgrp->gcgrp_rwlock);
1157 				gcgrp = NULL;
1158 			}
1159 			attrp = NULL;
1160 		}
1161 
1162 		ASSERT(gc == NULL || (gcgrp != NULL &&
1163 		    RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)));
1164 	}
1165 	ASSERT(sacnt == 0 || gc != NULL);
1166 
1167 	/*
1168 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1169 	 *
1170 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1171 	 * RTA_IFP and RTA_IFA if either is defined, and also
1172 	 * returns RTA_BRD if the appropriate interface is
1173 	 * point-to-point.
1174 	 */
1175 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1176 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
1177 		rtm_addrs |= (RTA_IFP | RTA_IFA);
1178 		if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT)
1179 			rtm_addrs |= RTA_BRD;
1180 	}
1181 
1182 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt);
1183 	if (new_mp == NULL) {
1184 		if (gcgrp != NULL)
1185 			rw_exit(&gcgrp->gcgrp_rwlock);
1186 		return (NULL);
1187 	}
1188 
1189 	/*
1190 	 * We set the destination address, gateway address,
1191 	 * netmask and flags in the RTM_GET response depending
1192 	 * on whether we found a parent IRE or not.
1193 	 * In particular, if we did find a parent IRE during the
1194 	 * recursive search, use that IRE's gateway address.
1195 	 * Otherwise, we use the IRE's source address for the
1196 	 * gateway address.
1197 	 */
1198 	ASSERT(af == AF_INET || af == AF_INET6);
1199 	switch (af) {
1200 	case AF_INET:
1201 		if (sire == NULL) {
1202 			rtm_flags = ire->ire_flags;
1203 			rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1204 			    ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr,
1205 			    ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif,
1206 			    new_mp, sacnt, gc);
1207 		} else {
1208 			if (sire->ire_flags & RTF_SETSRC)
1209 				rtm_addrs |= RTA_SRC;
1210 
1211 			rtm_flags = sire->ire_flags;
1212 			rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr,
1213 			    sire->ire_mask, sire->ire_gateway_addr,
1214 			    (sire->ire_flags & RTF_SETSRC) ?
1215 				sire->ire_src_addr : ire->ire_src_addr,
1216 			    ire->ire_ipif->ipif_pp_dst_addr,
1217 			    0, ire->ire_ipif, new_mp, sacnt, gc);
1218 		}
1219 		break;
1220 	case AF_INET6:
1221 		if (sire == NULL) {
1222 			rtm_flags = ire->ire_flags;
1223 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1224 			    &ire->ire_mask_v6, &ire->ire_src_addr_v6,
1225 			    &ire->ire_src_addr_v6,
1226 			    &ire->ire_ipif->ipif_v6pp_dst_addr,
1227 			    &ipv6_all_zeros, ire->ire_ipif, new_mp,
1228 			    sacnt, gc);
1229 		} else {
1230 			if (sire->ire_flags & RTF_SETSRC)
1231 				rtm_addrs |= RTA_SRC;
1232 
1233 			rtm_flags = sire->ire_flags;
1234 			mutex_enter(&sire->ire_lock);
1235 			gw_addr_v6 = sire->ire_gateway_addr_v6;
1236 			mutex_exit(&sire->ire_lock);
1237 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6,
1238 			    &sire->ire_mask_v6, &gw_addr_v6,
1239 			    (sire->ire_flags & RTF_SETSRC) ?
1240 				&sire->ire_src_addr_v6 : &ire->ire_src_addr_v6,
1241 			    &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1242 			    ire->ire_ipif, new_mp, sacnt, gc);
1243 		}
1244 		break;
1245 	}
1246 
1247 	if (gcgrp != NULL)
1248 		rw_exit(&gcgrp->gcgrp_rwlock);
1249 
1250 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1251 
1252 	/*
1253 	 * The rtm_msglen, rtm_version and rtm_type fields in
1254 	 * RTM_GET response are filled in by rts_fill_msg.
1255 	 *
1256 	 * rtm_addrs and rtm_flags are filled in based on what
1257 	 * was requested and the state of the IREs looked up
1258 	 * above.
1259 	 *
1260 	 * rtm_inits and rtm_rmx are filled in with metrics
1261 	 * based on whether a parent IRE was found or not.
1262 	 *
1263 	 * TODO: rtm_index and rtm_use should probably be
1264 	 * filled in with something resonable here and not just
1265 	 * copied from the request.
1266 	 */
1267 	new_rtm->rtm_index = rtm->rtm_index;
1268 	new_rtm->rtm_pid = rtm->rtm_pid;
1269 	new_rtm->rtm_seq = rtm->rtm_seq;
1270 	new_rtm->rtm_use = rtm->rtm_use;
1271 	new_rtm->rtm_addrs = rtm_addrs;
1272 	new_rtm->rtm_flags = rtm_flags;
1273 	if (sire == NULL)
1274 		new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
1275 	else
1276 		new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx);
1277 
1278 	return (new_mp);
1279 }
1280 
1281 /*
1282  * Fill the given if_data_t with interface statistics.
1283  */
1284 static void
1285 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1286 {
1287 	if_data->ifi_type = ipif->ipif_type;	/* ethernet, tokenring, etc */
1288 	if_data->ifi_addrlen = 0;		/* media address length */
1289 	if_data->ifi_hdrlen = 0;		/* media header length */
1290 	if_data->ifi_mtu = ipif->ipif_mtu;	/* maximum transmission unit */
1291 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
1292 	if_data->ifi_baudrate = 0;		/* linespeed */
1293 
1294 	if_data->ifi_ipackets = 0;		/* packets received on if */
1295 	if_data->ifi_ierrors = 0;		/* input errors on interface */
1296 	if_data->ifi_opackets = 0;		/* packets sent on interface */
1297 	if_data->ifi_oerrors = 0;		/* output errors on if */
1298 	if_data->ifi_collisions = 0;		/* collisions on csma if */
1299 	if_data->ifi_ibytes = 0;		/* total number received */
1300 	if_data->ifi_obytes = 0;		/* total number sent */
1301 	if_data->ifi_imcasts = 0;		/* multicast packets received */
1302 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
1303 	if_data->ifi_iqdrops = 0;		/* dropped on input */
1304 	if_data->ifi_noproto = 0;		/* destined for unsupported */
1305 						/* protocol. */
1306 }
1307 
1308 /*
1309  * Set the metrics on a forwarding table route.
1310  */
1311 static void
1312 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1313 {
1314 	clock_t		rtt;
1315 	clock_t		rtt_sd;
1316 	ipif_t		*ipif;
1317 	ifrt_t		*ifrt;
1318 	mblk_t		*mp;
1319 	in6_addr_t	gw_addr_v6;
1320 
1321 	/*
1322 	 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the
1323 	 * common case of no metrics.
1324 	 */
1325 	if (which == 0)
1326 		return;
1327 	ire->ire_uinfo.iulp_set = B_TRUE;
1328 
1329 	/*
1330 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1331 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1332 	 * microseconds.
1333 	 */
1334 	if (which & RTV_RTT)
1335 		rtt = metrics->rmx_rtt / 1000;
1336 	if (which & RTV_RTTVAR)
1337 		rtt_sd = metrics->rmx_rttvar / 1000;
1338 
1339 	/*
1340 	 * Update the metrics in the IRE itself.
1341 	 */
1342 	mutex_enter(&ire->ire_lock);
1343 	if (which & RTV_MTU)
1344 		ire->ire_max_frag = metrics->rmx_mtu;
1345 	if (which & RTV_RTT)
1346 		ire->ire_uinfo.iulp_rtt = rtt;
1347 	if (which & RTV_SSTHRESH)
1348 		ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh;
1349 	if (which & RTV_RTTVAR)
1350 		ire->ire_uinfo.iulp_rtt_sd = rtt_sd;
1351 	if (which & RTV_SPIPE)
1352 		ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe;
1353 	if (which & RTV_RPIPE)
1354 		ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe;
1355 	mutex_exit(&ire->ire_lock);
1356 
1357 	/*
1358 	 * Search through the ifrt_t chain hanging off the IPIF in order to
1359 	 * reflect the metric change there.
1360 	 */
1361 	ipif = ire->ire_ipif;
1362 	if (ipif == NULL)
1363 		return;
1364 	ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1365 	    ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1366 	if (ipif->ipif_isv6) {
1367 		mutex_enter(&ire->ire_lock);
1368 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1369 		mutex_exit(&ire->ire_lock);
1370 	}
1371 	mutex_enter(&ipif->ipif_saved_ire_lock);
1372 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1373 		/*
1374 		 * On a given ipif, the triple of address, gateway and mask is
1375 		 * unique for each saved IRE (in the case of ordinary interface
1376 		 * routes, the gateway address is all-zeroes).
1377 		 */
1378 		ifrt = (ifrt_t *)mp->b_rptr;
1379 		if (ipif->ipif_isv6) {
1380 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1381 			    &ire->ire_addr_v6) ||
1382 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1383 			    &gw_addr_v6) ||
1384 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1385 			    &ire->ire_mask_v6))
1386 				continue;
1387 		} else {
1388 			if (ifrt->ifrt_addr != ire->ire_addr ||
1389 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1390 			    ifrt->ifrt_mask != ire->ire_mask)
1391 				continue;
1392 		}
1393 		if (which & RTV_MTU)
1394 			ifrt->ifrt_max_frag = metrics->rmx_mtu;
1395 		if (which & RTV_RTT)
1396 			ifrt->ifrt_iulp_info.iulp_rtt = rtt;
1397 		if (which & RTV_SSTHRESH) {
1398 			ifrt->ifrt_iulp_info.iulp_ssthresh =
1399 			    metrics->rmx_ssthresh;
1400 		}
1401 		if (which & RTV_RTTVAR)
1402 			ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar;
1403 		if (which & RTV_SPIPE)
1404 			ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe;
1405 		if (which & RTV_RPIPE)
1406 			ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe;
1407 		break;
1408 	}
1409 	mutex_exit(&ipif->ipif_saved_ire_lock);
1410 }
1411 
1412 /*
1413  * Get the metrics from a forwarding table route.
1414  */
1415 static int
1416 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
1417 {
1418 	int	metrics_set = 0;
1419 
1420 	bzero(metrics, sizeof (rt_metrics_t));
1421 	/*
1422 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1423 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1424 	 * microseconds.
1425 	 */
1426 	metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000;
1427 	metrics_set |= RTV_RTT;
1428 	metrics->rmx_mtu = ire->ire_max_frag;
1429 	metrics_set |= RTV_MTU;
1430 	metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh;
1431 	metrics_set |= RTV_SSTHRESH;
1432 	metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000;
1433 	metrics_set |= RTV_RTTVAR;
1434 	metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe;
1435 	metrics_set |= RTV_SPIPE;
1436 	metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe;
1437 	metrics_set |= RTV_RPIPE;
1438 	return (metrics_set);
1439 }
1440 
1441 /*
1442  * Takes a pointer to a routing message and extracts necessary info by looking
1443  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1444  * passed (all of which must be valid).
1445  *
1446  * The bitmask of sockaddrs actually found in the message is returned, or zero
1447  * is returned in the case of an error.
1448  */
1449 static int
1450 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1451     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1452     in6_addr_t *in_src_addrp, ushort_t *indexp, ushort_t *src_indexp,
1453     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error)
1454 {
1455 	struct sockaddr *sa;
1456 	int	i;
1457 	int	addr_bits;
1458 	int	length;
1459 	int	found_addrs = 0;
1460 	caddr_t	cp;
1461 	size_t	size;
1462 	struct sockaddr_dl *sdl;
1463 
1464 	*dst_addrp = ipv6_all_zeros;
1465 	*gw_addrp = ipv6_all_zeros;
1466 	*net_maskp = ipv6_all_zeros;
1467 	*authorp = ipv6_all_zeros;
1468 	*if_addrp = ipv6_all_zeros;
1469 	*in_src_addrp = ipv6_all_zeros;
1470 	*indexp = 0;
1471 	*src_indexp = 0;
1472 	*afp = AF_UNSPEC;
1473 	rtsecattr->rtsa_cnt = 0;
1474 	*error = 0;
1475 
1476 	/*
1477 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1478 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
1479 	 */
1480 	cp = (caddr_t)&rtm[1];
1481 	length = rtm->rtm_msglen;
1482 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1483 		/*
1484 		 * The address family we are working with starts out as
1485 		 * AF_UNSPEC, but is set to the one specified with the
1486 		 * destination address.
1487 		 *
1488 		 * If the "working" address family that has been set to
1489 		 * something other than AF_UNSPEC, then the address family of
1490 		 * subsequent sockaddrs must either be AF_UNSPEC (for
1491 		 * compatibility with older programs) or must be the same as our
1492 		 * "working" one.
1493 		 *
1494 		 * This code assumes that RTA_DST (1) comes first in the loop.
1495 		 */
1496 		sa = (struct sockaddr *)cp;
1497 		addr_bits = (rtm->rtm_addrs & (1 << i));
1498 		if (addr_bits == 0)
1499 			continue;
1500 		switch (addr_bits) {
1501 		case RTA_DST:
1502 			size = rts_copyfromsockaddr(sa, dst_addrp);
1503 			*afp = sa->sa_family;
1504 			break;
1505 		case RTA_GATEWAY:
1506 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1507 				return (0);
1508 			size = rts_copyfromsockaddr(sa, gw_addrp);
1509 			break;
1510 		case RTA_NETMASK:
1511 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1512 				return (0);
1513 			size = rts_copyfromsockaddr(sa, net_maskp);
1514 			break;
1515 		case RTA_IFP:
1516 			if (sa->sa_family != AF_LINK &&
1517 			    sa->sa_family != AF_UNSPEC)
1518 				return (0);
1519 			sdl = (struct sockaddr_dl *)cp;
1520 			*indexp = sdl->sdl_index;
1521 			size = sizeof (struct sockaddr_dl);
1522 			break;
1523 		case RTA_SRC:
1524 			/* Source address of the incoming packet */
1525 			size = rts_copyfromsockaddr(sa, in_src_addrp);
1526 			*afp = sa->sa_family;
1527 			break;
1528 		case RTA_SRCIFP:
1529 			/* Return incoming interface index pointer */
1530 			if (sa->sa_family != AF_LINK &&
1531 			    sa->sa_family != AF_UNSPEC)
1532 				return (0);
1533 			sdl = (struct sockaddr_dl *)cp;
1534 			*src_indexp = sdl->sdl_index;
1535 			size = sizeof (struct sockaddr_dl);
1536 			break;
1537 		case RTA_IFA:
1538 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1539 				return (0);
1540 			size = rts_copyfromsockaddr(sa, if_addrp);
1541 			break;
1542 		case RTA_AUTHOR:
1543 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1544 				return (0);
1545 			size = rts_copyfromsockaddr(sa, authorp);
1546 			break;
1547 		default:
1548 			return (0);
1549 		}
1550 		if (size == 0)
1551 			return (0);
1552 		cp += size;
1553 		found_addrs |= addr_bits;
1554 	}
1555 
1556 	/*
1557 	 * Parse the routing message and look for any security-
1558 	 * related attributes for the route.  For each valid
1559 	 * attribute, allocate/obtain the corresponding kernel
1560 	 * route security attributes.
1561 	 */
1562 	*error = tsol_rtsa_init(rtm, rtsecattr, cp);
1563 	ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1564 
1565 	return (found_addrs);
1566 }
1567 
1568 /*
1569  * Fills the message with the given info.
1570  */
1571 static void
1572 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1573     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1574     const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc)
1575 {
1576 	rt_msghdr_t	*rtm;
1577 	sin_t		*sin;
1578 	size_t		data_size, header_size;
1579 	uchar_t		*cp;
1580 	int		i;
1581 
1582 	ASSERT(mp != NULL);
1583 	ASSERT(sacnt == 0 || gc != NULL);
1584 	/*
1585 	 * First find the type of the message
1586 	 * and its length.
1587 	 */
1588 	header_size = rts_header_msg_size(type);
1589 	/*
1590 	 * Now find the size of the data
1591 	 * that follows the message header.
1592 	 */
1593 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt);
1594 
1595 	rtm = (rt_msghdr_t *)mp->b_rptr;
1596 	mp->b_wptr = &mp->b_rptr[header_size];
1597 	cp = mp->b_wptr;
1598 	bzero(cp, data_size);
1599 	for (i = 0; i < RTA_NUMBITS; i++) {
1600 		sin = (sin_t *)cp;
1601 		switch (rtm_addrs & (1 << i)) {
1602 		case RTA_DST:
1603 			sin->sin_addr.s_addr = dst;
1604 			sin->sin_family = AF_INET;
1605 			cp += sizeof (sin_t);
1606 			break;
1607 		case RTA_GATEWAY:
1608 			sin->sin_addr.s_addr = gateway;
1609 			sin->sin_family = AF_INET;
1610 			cp += sizeof (sin_t);
1611 			break;
1612 		case RTA_NETMASK:
1613 			sin->sin_addr.s_addr = mask;
1614 			sin->sin_family = AF_INET;
1615 			cp += sizeof (sin_t);
1616 			break;
1617 		case RTA_IFP:
1618 			cp += ill_dls_info((struct sockaddr_dl *)cp, ipif);
1619 			break;
1620 		case RTA_SRCIFP:
1621 			/*
1622 			 * RTA_SRCIFP is not yet supported
1623 			 * for RTM_GET and RTM_CHANGE
1624 			 */
1625 			break;
1626 		case RTA_IFA:
1627 		case RTA_SRC:
1628 			sin->sin_addr.s_addr = src_addr;
1629 			sin->sin_family = AF_INET;
1630 			cp += sizeof (sin_t);
1631 			break;
1632 		case RTA_AUTHOR:
1633 			sin->sin_addr.s_addr = author;
1634 			sin->sin_family = AF_INET;
1635 			cp += sizeof (sin_t);
1636 			break;
1637 		case RTA_BRD:
1638 			/*
1639 			 * RTA_BRD is used typically to specify a point-to-point
1640 			 * destination address.
1641 			 */
1642 			sin->sin_addr.s_addr = brd_addr;
1643 			sin->sin_family = AF_INET;
1644 			cp += sizeof (sin_t);
1645 			break;
1646 		}
1647 	}
1648 
1649 	if (gc != NULL) {
1650 		rtm_ext_t *rtm_ext;
1651 		struct rtsa_s *rp_dst;
1652 		tsol_rtsecattr_t *rsap;
1653 		int i;
1654 
1655 		ASSERT(gc->gc_grp != NULL);
1656 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1657 		ASSERT(sacnt > 0);
1658 
1659 		rtm_ext = (rtm_ext_t *)cp;
1660 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1661 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt);
1662 
1663 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1664 		rsap->rtsa_cnt = sacnt;
1665 		rp_dst = rsap->rtsa_attr;
1666 
1667 		for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) {
1668 			ASSERT(gc->gc_db != NULL);
1669 			bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1670 		}
1671 		cp = (uchar_t *)rp_dst;
1672 	}
1673 
1674 	mp->b_wptr = cp;
1675 	mp->b_cont = NULL;
1676 	/*
1677 	 * set the fields that are common to
1678 	 * to different messages.
1679 	 */
1680 	rtm->rtm_msglen = (short)(header_size + data_size);
1681 	rtm->rtm_version = RTM_VERSION;
1682 	rtm->rtm_type = (uchar_t)type;
1683 }
1684 
1685 /*
1686  * Allocates and initializes a routing socket message.
1687  */
1688 mblk_t *
1689 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1690 {
1691 	size_t	length;
1692 	mblk_t	*mp;
1693 
1694 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1695 	mp = allocb(length, BPRI_MED);
1696 	if (mp == NULL)
1697 		return (mp);
1698 	bzero(mp->b_rptr, length);
1699 	return (mp);
1700 }
1701 
1702 /*
1703  * Returns the size of the routing
1704  * socket message header size.
1705  */
1706 size_t
1707 rts_header_msg_size(int type)
1708 {
1709 	switch (type) {
1710 	case RTM_DELADDR:
1711 	case RTM_NEWADDR:
1712 		return (sizeof (ifa_msghdr_t));
1713 	case RTM_IFINFO:
1714 		return (sizeof (if_msghdr_t));
1715 	default:
1716 		return (sizeof (rt_msghdr_t));
1717 	}
1718 }
1719 
1720 /*
1721  * Returns the size of the message needed with the given rtm_addrs and family.
1722  *
1723  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1724  * of the same family (currently either AF_INET or AF_INET6).
1725  */
1726 size_t
1727 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1728 {
1729 	int	i;
1730 	size_t	length = 0;
1731 
1732 	for (i = 0; i < RTA_NUMBITS; i++) {
1733 		switch (rtm_addrs & (1 << i)) {
1734 		case RTA_IFP:
1735 			length += sizeof (struct sockaddr_dl);
1736 			break;
1737 		case RTA_DST:
1738 		case RTA_GATEWAY:
1739 		case RTA_NETMASK:
1740 		case RTA_SRC:
1741 		case RTA_SRCIFP:
1742 		case RTA_IFA:
1743 		case RTA_AUTHOR:
1744 		case RTA_BRD:
1745 			ASSERT(af == AF_INET || af == AF_INET6);
1746 			switch (af) {
1747 			case AF_INET:
1748 				length += sizeof (sin_t);
1749 				break;
1750 			case AF_INET6:
1751 				length += sizeof (sin6_t);
1752 				break;
1753 			}
1754 			break;
1755 		}
1756 	}
1757 	if (sacnt > 0)
1758 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1759 
1760 	return (length);
1761 }
1762 
1763 /*
1764  * This routine is called to generate a message to the routing
1765  * socket indicating that a redirect has occured, a routing lookup
1766  * has failed, or that a protocol has detected timeouts to a particular
1767  * destination. This routine is called for message types RTM_LOSING,
1768  * RTM_REDIRECT, and RTM_MISS.
1769  */
1770 void
1771 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1772     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs)
1773 {
1774 	rt_msghdr_t	*rtm;
1775 	mblk_t		*mp;
1776 
1777 	if (rtm_addrs == 0)
1778 		return;
1779 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1780 	if (mp == NULL)
1781 		return;
1782 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1783 	    author, NULL, mp, 0, NULL);
1784 	rtm = (rt_msghdr_t *)mp->b_rptr;
1785 	rtm->rtm_flags = flags;
1786 	rtm->rtm_errno = error;
1787 	rtm->rtm_flags |= RTF_DONE;
1788 	rtm->rtm_addrs = rtm_addrs;
1789 	rts_queue_input(mp, NULL, AF_INET);
1790 }
1791 
1792 /*
1793  * This routine is called to generate a message to the routing
1794  * socket indicating that the status of a network interface has changed.
1795  * Message type generated RTM_IFINFO.
1796  */
1797 void
1798 ip_rts_ifmsg(const ipif_t *ipif)
1799 {
1800 	if_msghdr_t	*ifm;
1801 	mblk_t		*mp;
1802 	sa_family_t	af;
1803 
1804 	/*
1805 	 * This message should be generated only
1806 	 * when the physical device is changing
1807 	 * state.
1808 	 */
1809 	if (ipif->ipif_id != 0)
1810 		return;
1811 	if (ipif->ipif_isv6) {
1812 		af = AF_INET6;
1813 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1814 		if (mp == NULL)
1815 			return;
1816 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1817 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1818 		    &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL);
1819 	} else {
1820 		af = AF_INET;
1821 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1822 		if (mp == NULL)
1823 			return;
1824 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp,
1825 		    0, NULL);
1826 	}
1827 	ifm = (if_msghdr_t *)mp->b_rptr;
1828 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1829 	ifm->ifm_flags = ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1830 	    ipif->ipif_ill->ill_phyint->phyint_flags;
1831 	rts_getifdata(&ifm->ifm_data, ipif);
1832 	ifm->ifm_addrs = RTA_IFP;
1833 	rts_queue_input(mp, NULL, af);
1834 }
1835 
1836 /*
1837  * This is called to generate messages to the routing socket
1838  * indicating a network interface has had addresses associated with it.
1839  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1840  */
1841 void
1842 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif)
1843 {
1844 	int		pass;
1845 	int		ncmd;
1846 	int		rtm_addrs;
1847 	mblk_t		*mp;
1848 	ifa_msghdr_t	*ifam;
1849 	rt_msghdr_t	*rtm;
1850 	sa_family_t	af;
1851 
1852 	if (ipif->ipif_isv6)
1853 		af = AF_INET6;
1854 	else
1855 		af = AF_INET;
1856 	/*
1857 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1858 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1859 	 */
1860 	for (pass = 1; pass < 3; pass++) {
1861 		if ((cmd == RTM_ADD && pass == 1) ||
1862 		    (cmd == RTM_DELETE && pass == 2)) {
1863 			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
1864 
1865 			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
1866 			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
1867 			if (mp == NULL)
1868 				continue;
1869 			switch (af) {
1870 			case AF_INET:
1871 				rts_fill_msg(ncmd, rtm_addrs, 0,
1872 				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
1873 				    ipif->ipif_pp_dst_addr, 0, ipif, mp,
1874 				    0, NULL);
1875 				break;
1876 			case AF_INET6:
1877 				rts_fill_msg_v6(ncmd, rtm_addrs,
1878 				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
1879 				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
1880 				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1881 				    ipif, mp, 0, NULL);
1882 				break;
1883 			}
1884 			ifam = (ifa_msghdr_t *)mp->b_rptr;
1885 			ifam->ifam_index =
1886 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1887 			ifam->ifam_metric = ipif->ipif_metric;
1888 			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1889 			ifam->ifam_addrs = rtm_addrs;
1890 			rts_queue_input(mp, NULL, af);
1891 		}
1892 		if ((cmd == RTM_ADD && pass == 2) ||
1893 		    (cmd == RTM_DELETE && pass == 1)) {
1894 			rtm_addrs = (RTA_DST | RTA_NETMASK);
1895 			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
1896 			if (mp == NULL)
1897 				continue;
1898 			switch (af) {
1899 			case AF_INET:
1900 				rts_fill_msg(cmd, rtm_addrs,
1901 				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
1902 				    0, 0, 0, NULL, mp, 0, NULL);
1903 				break;
1904 			case AF_INET6:
1905 				rts_fill_msg_v6(cmd, rtm_addrs,
1906 				    &ipif->ipif_v6lcl_addr,
1907 				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
1908 				    &ipv6_all_zeros, &ipv6_all_zeros,
1909 				    &ipv6_all_zeros, NULL, mp, 0, NULL);
1910 				break;
1911 			}
1912 			rtm = (rt_msghdr_t *)mp->b_rptr;
1913 			rtm->rtm_index =
1914 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1915 			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1916 			rtm->rtm_errno = error;
1917 			if (error == 0)
1918 				rtm->rtm_flags |= RTF_DONE;
1919 			rtm->rtm_addrs = rtm_addrs;
1920 			rts_queue_input(mp, NULL, af);
1921 		}
1922 	}
1923 }
1924 
1925 /*
1926  * Based on the address family specified in a sockaddr, copy the address field
1927  * into an in6_addr_t.
1928  *
1929  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
1930  * compatibility with programs that leave the family cleared in the sockaddr.
1931  * Callers of rts_copyfromsockaddr should check the family themselves if they
1932  * wish to verify its value.
1933  *
1934  * In the case of AF_INET6, a check is made to ensure that address is not an
1935  * IPv4-mapped address.
1936  */
1937 size_t
1938 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
1939 {
1940 	switch (sa->sa_family) {
1941 	case AF_INET:
1942 	case AF_UNSPEC:
1943 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
1944 		return (sizeof (sin_t));
1945 	case AF_INET6:
1946 		*addrp = ((sin6_t *)sa)->sin6_addr;
1947 		if (IN6_IS_ADDR_V4MAPPED(addrp))
1948 			return (0);
1949 		return (sizeof (sin6_t));
1950 	default:
1951 		return (0);
1952 	}
1953 }
1954