xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_rts.c (revision 8af2c5b9bdbf69a55f079d7ad9483d38fae9f023)
1 /*
2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * Copyright (c) 1988, 1991, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
39  */
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 /*
44  * This file contains routines that processes routing socket requests.
45  */
46 
47 #include <sys/types.h>
48 #include <sys/stream.h>
49 #include <sys/stropts.h>
50 #include <sys/ddi.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 
56 #include <sys/systm.h>
57 #include <sys/param.h>
58 #include <sys/socket.h>
59 #include <sys/strsun.h>
60 #include <net/if.h>
61 #include <net/route.h>
62 #include <netinet/in.h>
63 #include <net/if_dl.h>
64 #include <netinet/ip6.h>
65 
66 #include <inet/common.h>
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_ftable.h>
72 #include <inet/ip_rts.h>
73 
74 #include <inet/ipclassifier.h>
75 
76 #include <sys/tsol/tndb.h>
77 #include <sys/tsol/tnet.h>
78 
79 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
80 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
81 
82 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
83 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
84     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
85     ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *);
86 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
87     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
88     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
89     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
90 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
91 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
92 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire,
93     sa_family_t af);
94 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
95 static void	ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
96 
97 /*
98  * Send the ack to all the routing queues.  In case of the originating queue,
99  * send it only if the loopback is set.
100  *
101  * Messages are sent upstream only on routing sockets that did not specify an
102  * address family when they were created or when the address family matches the
103  * one specified by the caller.
104  *
105  */
106 void
107 rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af, ip_stack_t *ipst)
108 {
109 	mblk_t	*mp1;
110 	int	checkqfull;
111 	conn_t 	*connp, *next_connp;
112 
113 	mutex_enter(&ipst->ips_rts_clients->connf_lock);
114 	connp = ipst->ips_rts_clients->connf_head;
115 
116 	while (connp != NULL) {
117 		/*
118 		 * If there was a family specified when this routing socket was
119 		 * created and it doesn't match the family of the message to
120 		 * copy, then continue.
121 		 */
122 		if ((connp->conn_proto != AF_UNSPEC) &&
123 		    (connp->conn_proto != af)) {
124 			connp = connp->conn_next;
125 			continue;
126 		}
127 		/*
128 		 * For the originating queue, we only copy the message upstream
129 		 * if loopback is set.  For others reading on the routing
130 		 * socket, we check if there is room upstream for a copy of the
131 		 * message.
132 		 */
133 		if ((q != NULL) && (CONNP_TO_RQ(connp) == RD(q))) {
134 			if (connp->conn_loopback == 0) {
135 				connp = connp->conn_next;
136 				continue;
137 			}
138 			/*
139 			 * Just because it is the same queue doesn't mean it
140 			 * will promptly read its acks. Have to avoid using
141 			 * all of kernel memory.
142 			 */
143 			checkqfull = B_TRUE;
144 		} else {
145 			checkqfull = B_TRUE;
146 		}
147 		CONN_INC_REF(connp);
148 		mutex_exit(&ipst->ips_rts_clients->connf_lock);
149 		if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) {
150 			mp1 = dupmsg(mp);
151 			if (mp1 == NULL)
152 				mp1 = copymsg(mp);
153 			if (mp1 != NULL)
154 				putnext(CONNP_TO_RQ(connp), mp1);
155 		}
156 
157 		mutex_enter(&ipst->ips_rts_clients->connf_lock);
158 		/* Follow the next pointer before releasing the conn. */
159 		next_connp = connp->conn_next;
160 		CONN_DEC_REF(connp);
161 		connp = next_connp;
162 	}
163 	mutex_exit(&ipst->ips_rts_clients->connf_lock);
164 	freemsg(mp);
165 }
166 
167 /*
168  * Takes an ire and sends an ack to all the routing sockets. This
169  * routine is used
170  * - when a route is created/deleted through the ioctl interface.
171  * - when ire_expire deletes a stale redirect
172  */
173 void
174 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
175 {
176 	mblk_t		*mp;
177 	rt_msghdr_t	*rtm;
178 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
179 	sa_family_t	af;
180 	in6_addr_t	gw_addr_v6;
181 
182 	if (ire == NULL)
183 		return;
184 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
185 	    ire->ire_ipversion == IPV6_VERSION);
186 
187 	if (ire->ire_flags & RTF_SETSRC)
188 		rtm_addrs |= RTA_SRC;
189 
190 	switch (ire->ire_ipversion) {
191 	case IPV4_VERSION:
192 		af = AF_INET;
193 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
194 		if (mp == NULL)
195 			return;
196 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
197 		    ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp,
198 		    0, NULL);
199 		break;
200 	case IPV6_VERSION:
201 		af = AF_INET6;
202 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
203 		if (mp == NULL)
204 			return;
205 		mutex_enter(&ire->ire_lock);
206 		gw_addr_v6 = ire->ire_gateway_addr_v6;
207 		mutex_exit(&ire->ire_lock);
208 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
209 		    &ire->ire_mask_v6, &gw_addr_v6,
210 		    &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
211 		    NULL, mp, 0, NULL);
212 		break;
213 	}
214 	rtm = (rt_msghdr_t *)mp->b_rptr;
215 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
216 	rtm->rtm_addrs = rtm_addrs;
217 	rtm->rtm_flags = ire->ire_flags;
218 	if (error != 0)
219 		rtm->rtm_errno = error;
220 	else
221 		rtm->rtm_flags |= RTF_DONE;
222 	rts_queue_input(mp, NULL, af, ipst);
223 }
224 
225 /* ARGSUSED */
226 static void
227 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy)
228 {
229 	(void) ip_rts_request(q, mp, DB_CRED(mp));
230 }
231 
232 /*
233  * Processes requests received on a routing socket. It extracts all the
234  * arguments and calls the appropriate function to process the request.
235  *
236  * RTA_SRC bit flag requests are sent by 'route -setsrc'.
237  *
238  * In general, this function does not consume the message supplied but rather
239  * sends the message upstream with an appropriate UNIX errno.
240  *
241  * We may need to restart this operation if the ipif cannot be looked up
242  * due to an exclusive operation that is currently in progress. The restart
243  * entry point is ip_rts_request_retry. While the request is enqueud in the
244  * ipsq the ioctl could be aborted and the conn close. To ensure that we don't
245  * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is
246  * released at the completion of the rts ioctl at the end of this function
247  * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and
248  * conn close occurs in conn_ioctl_cleanup.
249  */
250 int
251 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
252 {
253 	rt_msghdr_t	*rtm = NULL;
254 	in6_addr_t	dst_addr_v6;
255 	in6_addr_t	src_addr_v6;
256 	in6_addr_t	gw_addr_v6;
257 	in6_addr_t	net_mask_v6;
258 	in6_addr_t	author_v6;
259 	in6_addr_t	if_addr_v6;
260 	mblk_t		*mp1, *ioc_mp = mp;
261 	ire_t		*ire = NULL;
262 	ire_t		*sire = NULL;
263 	int		error = 0;
264 	int		match_flags = MATCH_IRE_DSTONLY;
265 	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
266 	int		found_addrs;
267 	sa_family_t	af;
268 	ipaddr_t	dst_addr;
269 	ipaddr_t	gw_addr;
270 	ipaddr_t	src_addr;
271 	ipaddr_t	net_mask;
272 	ushort_t	index;
273 	ipif_t		*ipif = NULL;
274 	ipif_t		*tmp_ipif = NULL;
275 	IOCP		iocp = (IOCP)mp->b_rptr;
276 	conn_t		*connp;
277 	boolean_t	gcgrp_xtraref = B_FALSE;
278 	tsol_gcgrp_addr_t ga;
279 	tsol_rtsecattr_t rtsecattr;
280 	struct rtsa_s	*rtsap = NULL;
281 	tsol_gcgrp_t	*gcgrp = NULL;
282 	tsol_gc_t	*gc = NULL;
283 	ts_label_t	*tsl = NULL;
284 	zoneid_t	zoneid;
285 	ip_stack_t	*ipst;
286 
287 	ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
288 
289 	ASSERT(CONN_Q(q));
290 	connp = Q_TO_CONN(q);
291 	zoneid = connp->conn_zoneid;
292 	ipst = connp->conn_netstack->netstack_ip;
293 
294 	ASSERT(mp->b_cont != NULL);
295 	/* ioc_mp holds mp */
296 	mp = mp->b_cont;
297 
298 	/*
299 	 * The Routing Socket data starts on
300 	 * next block. If there is no next block
301 	 * this is an indication from routing module
302 	 * that it is a routing socket stream queue.
303 	 */
304 	if (mp->b_cont != NULL) {
305 		mp1 = dupmsg(mp->b_cont);
306 		if (mp1 == NULL) {
307 			error  = ENOBUFS;
308 			goto done;
309 		}
310 		mp = mp1;
311 	} else {
312 		/*
313 		 * This is a message from RTS module
314 		 * indicating that this is a Routing Socket
315 		 * Stream. Insert this conn_t in routing
316 		 * socket client list.
317 		 */
318 
319 		connp->conn_loopback = 1;
320 		ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
321 
322 		goto done;
323 	}
324 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
325 		freemsg(mp);
326 		error =  EINVAL;
327 		goto done;
328 	}
329 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
330 		freemsg(mp);
331 		error = EINVAL;
332 		goto done;
333 	}
334 
335 	/*
336 	 * Check the routing message for basic consistency including the
337 	 * version number and that the number of octets written is the same
338 	 * as specified by the rtm_msglen field.
339 	 *
340 	 * At this point, an error can be delivered back via rtm_errno.
341 	 */
342 	rtm = (rt_msghdr_t *)mp->b_rptr;
343 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
344 		error = EINVAL;
345 		goto done;
346 	}
347 	if (rtm->rtm_version != RTM_VERSION) {
348 		error = EPROTONOSUPPORT;
349 		goto done;
350 	}
351 
352 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
353 	if (rtm->rtm_type != RTM_GET &&
354 	    rtm->rtm_type != RTM_RESOLVE &&
355 	    (ioc_cr == NULL ||
356 	    secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
357 		error = EPERM;
358 		goto done;
359 	}
360 
361 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
362 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
363 	    &error);
364 
365 	if (error != 0)
366 		goto done;
367 
368 	if ((found_addrs & RTA_DST) == 0) {
369 		error = EINVAL;
370 		goto done;
371 	}
372 
373 	/*
374 	 * Based on the address family of the destination address, determine
375 	 * the destination, gateway and netmask and return the appropriate error
376 	 * if an unknown address family was specified (following the errno
377 	 * values that 4.4BSD-Lite2 returns.)
378 	 */
379 	switch (af) {
380 	case AF_INET:
381 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
382 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
383 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
384 		if (((found_addrs & RTA_NETMASK) == 0) ||
385 		    (rtm->rtm_flags & RTF_HOST))
386 			net_mask = IP_HOST_MASK;
387 		else
388 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
389 		break;
390 	case AF_INET6:
391 		if (((found_addrs & RTA_NETMASK) == 0) ||
392 		    (rtm->rtm_flags & RTF_HOST))
393 			net_mask_v6 = ipv6_all_ones;
394 		break;
395 	default:
396 		/*
397 		 * These errno values are meant to be compatible with
398 		 * 4.4BSD-Lite2 for the given message types.
399 		 */
400 		switch (rtm->rtm_type) {
401 		case RTM_ADD:
402 		case RTM_DELETE:
403 			error = ESRCH;
404 			goto done;
405 		case RTM_GET:
406 		case RTM_CHANGE:
407 			error = EAFNOSUPPORT;
408 			goto done;
409 		default:
410 			error = EOPNOTSUPP;
411 			goto done;
412 		}
413 	}
414 
415 	/*
416 	 * At this point, the address family must be something known.
417 	 */
418 	ASSERT(af == AF_INET || af == AF_INET6);
419 
420 	if (index != 0) {
421 		ill_t   *ill;
422 
423 		/*
424 		 * IPC must be refheld somewhere in ip_wput_nondata or
425 		 * ip_wput_ioctl etc... and cleaned up if ioctl is killed.
426 		 * If ILL_CHANGING the request is queued in the ipsq.
427 		 */
428 		ill = ill_lookup_on_ifindex(index, af == AF_INET6,
429 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error,
430 		    ipst);
431 		if (ill == NULL) {
432 			if (error != EINPROGRESS)
433 				error = EINVAL;
434 			goto done;
435 		}
436 
437 		ipif = ipif_get_next_ipif(NULL, ill);
438 		ill_refrele(ill);
439 		/*
440 		 * If this is replacement ipif, prevent a route from
441 		 * being added.
442 		 */
443 		if (ipif != NULL && ipif->ipif_replace_zero) {
444 			error = ENETDOWN;
445 			goto done;
446 		}
447 		match_flags |= MATCH_IRE_ILL;
448 	}
449 
450 	/*
451 	 * If a netmask was supplied in the message, then subsequent route
452 	 * lookups will attempt to match on the netmask as well.
453 	 */
454 	if ((found_addrs & RTA_NETMASK) != 0)
455 		match_flags |= MATCH_IRE_MASK;
456 
457 	/*
458 	 * We only process any passed-in route security attributes for
459 	 * either RTM_ADD or RTM_CHANGE message; We overload them
460 	 * to do an RTM_GET as a different label; ignore otherwise.
461 	 */
462 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
463 	    rtm->rtm_type == RTM_GET) {
464 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
465 		if (rtsecattr.rtsa_cnt > 0)
466 			rtsap = &rtsecattr.rtsa_attr[0];
467 	}
468 
469 	switch (rtm->rtm_type) {
470 	case RTM_ADD:
471 		/* if we are adding a route, gateway is a must */
472 		if ((found_addrs & RTA_GATEWAY) == 0) {
473 			error = EINVAL;
474 			goto done;
475 		}
476 
477 		/* Multirouting does not support net routes. */
478 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
479 		    RTF_MULTIRT) {
480 			error = EADDRNOTAVAIL;
481 			goto done;
482 		}
483 
484 		/*
485 		 * Multirouting and user-specified source addresses
486 		 * do not support interface based routing.
487 		 * Assigning a source address to an interface based
488 		 * route is achievable by plumbing a new ipif and
489 		 * setting up the interface route via this ipif,
490 		 * though.
491 		 */
492 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
493 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
494 				error = EADDRNOTAVAIL;
495 				goto done;
496 			}
497 		}
498 
499 		switch (af) {
500 		case AF_INET:
501 			if (src_addr != INADDR_ANY) {
502 				/*
503 				 * The RTF_SETSRC flag is present, check that
504 				 * the supplied src address is not the loopback
505 				 * address. This would produce martian packets.
506 				 */
507 				if (src_addr == htonl(INADDR_LOOPBACK)) {
508 					error = EINVAL;
509 					goto done;
510 				}
511 				/*
512 				 * Also check that the supplied address is a
513 				 * valid, local one.
514 				 */
515 				tmp_ipif = ipif_lookup_addr(src_addr, NULL,
516 				    ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
517 				    ip_rts_request_retry, &error, ipst);
518 				if (tmp_ipif == NULL) {
519 					if (error != EINPROGRESS)
520 						error = EADDRNOTAVAIL;
521 					goto done;
522 				}
523 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
524 				    (tmp_ipif->ipif_flags &
525 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
526 					error = EINVAL;
527 					goto done;
528 				}
529 			} else {
530 				/*
531 				 * The RTF_SETSRC modifier must be associated
532 				 * to a non-null source address.
533 				 */
534 				if (rtm->rtm_flags & RTF_SETSRC) {
535 					error = EINVAL;
536 					goto done;
537 				}
538 			}
539 
540 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
541 			    rtm->rtm_flags, ipif, &ire, B_FALSE,
542 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
543 			    rtsap, ipst);
544 			if (ipif != NULL)
545 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
546 			break;
547 		case AF_INET6:
548 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
549 				/*
550 				 * The RTF_SETSRC flag is present, check that
551 				 * the supplied src address is not the loopback
552 				 * address. This would produce martian packets.
553 				 */
554 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
555 					error = EINVAL;
556 					goto done;
557 				}
558 				/*
559 				 * Also check that the supplied address is a
560 				 * valid, local one.
561 				 */
562 				tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6,
563 				    NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
564 				    ip_rts_request_retry, &error, ipst);
565 				if (tmp_ipif == NULL) {
566 					if (error != EINPROGRESS)
567 						error = EADDRNOTAVAIL;
568 					goto done;
569 				}
570 
571 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
572 				    (tmp_ipif->ipif_flags &
573 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
574 					error = EINVAL;
575 					goto done;
576 				}
577 
578 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
579 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
580 				    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
581 				    ip_rts_request_retry, rtsap, ipst);
582 				break;
583 			}
584 			/*
585 			 * The RTF_SETSRC modifier must be associated
586 			 * to a non-null source address.
587 			 */
588 			if (rtm->rtm_flags & RTF_SETSRC) {
589 				error = EINVAL;
590 				goto done;
591 			}
592 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
593 			    &gw_addr_v6, NULL, rtm->rtm_flags,
594 			    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
595 			    ip_rts_request_retry, rtsap, ipst);
596 			if (ipif != NULL)
597 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
598 			break;
599 		}
600 		if (error != 0)
601 			goto done;
602 		ASSERT(ire != NULL);
603 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
604 		break;
605 	case RTM_DELETE:
606 		/* if we are deleting a route, gateway is a must */
607 		if ((found_addrs & RTA_GATEWAY) == 0) {
608 			error = EINVAL;
609 			goto done;
610 		}
611 		/*
612 		 * The RTF_SETSRC modifier does not make sense
613 		 * when deleting a route.
614 		 */
615 		if (rtm->rtm_flags & RTF_SETSRC) {
616 			error = EINVAL;
617 			goto done;
618 		}
619 
620 		switch (af) {
621 		case AF_INET:
622 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
623 			    found_addrs, rtm->rtm_flags, ipif, B_FALSE,
624 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
625 			    ipst);
626 			break;
627 		case AF_INET6:
628 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
629 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
630 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
631 			    ipst);
632 			break;
633 		}
634 		break;
635 	case RTM_GET:
636 	case RTM_CHANGE:
637 		/*
638 		 * In the case of RTM_GET, the forwarding table should be
639 		 * searched recursively with default being matched if the
640 		 * specific route doesn't exist.  Also, if a gateway was
641 		 * specified then the gateway address must also be matched.
642 		 *
643 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
644 		 * is the new gateway address so matching on the gateway address
645 		 * is not done.  This can lead to ambiguity when looking up the
646 		 * route to change as usually only the destination (and netmask,
647 		 * if supplied) is used for the lookup.  However if a RTA_IFP
648 		 * sockaddr is also supplied, it can disambiguate which route to
649 		 * change provided the ambigous routes are tied to distinct
650 		 * ill's (or interface indices).  If the routes are not tied to
651 		 * any particular interfaces (for example, with traditional
652 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
653 		 * it won't match any such routes.
654 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
655 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
656 		 */
657 		if (((found_addrs & RTA_SRC) != 0) &&
658 		    ((rtm->rtm_type == RTM_GET) ||
659 		    !(rtm->rtm_flags & RTF_SETSRC))) {
660 			error = EOPNOTSUPP;
661 			goto done;
662 		}
663 
664 		if (rtm->rtm_type == RTM_GET) {
665 			match_flags |=
666 			    (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE |
667 			    MATCH_IRE_SECATTR);
668 			match_flags_local |= MATCH_IRE_SECATTR;
669 			if ((found_addrs & RTA_GATEWAY) != 0)
670 				match_flags |= MATCH_IRE_GW;
671 			if (ioc_cr)
672 				tsl = crgetlabel(ioc_cr);
673 			if (rtsap != NULL) {
674 				if (rtsa_validate(rtsap) != 0) {
675 					error = EINVAL;
676 					goto done;
677 				}
678 				if (tsl != NULL &&
679 				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
680 				    (tsl->tsl_doi != rtsap->rtsa_doi ||
681 				    !bldominates(&tsl->tsl_label,
682 				    &rtsap->rtsa_slrange.lower_bound))) {
683 					error = EPERM;
684 					goto done;
685 				}
686 				tsl = labelalloc(
687 				    &rtsap->rtsa_slrange.lower_bound,
688 				    rtsap->rtsa_doi, KM_NOSLEEP);
689 			}
690 		}
691 		if (rtm->rtm_type == RTM_CHANGE) {
692 			if ((found_addrs & RTA_GATEWAY) &&
693 			    (rtm->rtm_flags & RTF_SETSRC)) {
694 				/*
695 				 * Do not want to change the gateway,
696 				 * but rather the source address.
697 				 */
698 				match_flags |= MATCH_IRE_GW;
699 			}
700 		}
701 
702 		/*
703 		 * If the netmask is all ones (either as supplied or as derived
704 		 * above), then first check for an IRE_LOOPBACK or
705 		 * IRE_LOCAL entry.
706 		 *
707 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
708 		 * entry, then look in the forwarding table.
709 		 */
710 		switch (af) {
711 		case AF_INET:
712 			if (net_mask == IP_HOST_MASK) {
713 				ire = ire_ctable_lookup(dst_addr, gw_addr,
714 				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
715 				    tsl, match_flags_local, ipst);
716 				/*
717 				 * If we found an IRE_LOCAL, make sure
718 				 * it is one that would be used by this
719 				 * zone to send packets.
720 				 */
721 				if (ire != NULL &&
722 				    ire->ire_type == IRE_LOCAL &&
723 				    ipst->ips_ip_restrict_interzone_loopback &&
724 				    !ire_local_ok_across_zones(ire,
725 				    zoneid, &dst_addr, tsl, ipst)) {
726 					ire_refrele(ire);
727 					ire = NULL;
728 				}
729 			}
730 			if (ire == NULL) {
731 				ire = ire_ftable_lookup(dst_addr, net_mask,
732 				    gw_addr, 0, ipif, &sire, zoneid, 0,
733 				    tsl, match_flags, ipst);
734 			}
735 			break;
736 		case AF_INET6:
737 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
738 				ire = ire_ctable_lookup_v6(&dst_addr_v6,
739 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
740 				    zoneid, tsl, match_flags_local, ipst);
741 				/*
742 				 * If we found an IRE_LOCAL, make sure
743 				 * it is one that would be used by this
744 				 * zone to send packets.
745 				 */
746 				if (ire != NULL &&
747 				    ire->ire_type == IRE_LOCAL &&
748 				    ipst->ips_ip_restrict_interzone_loopback &&
749 				    !ire_local_ok_across_zones(ire,
750 				    zoneid, (void *)&dst_addr_v6, tsl, ipst)) {
751 					ire_refrele(ire);
752 					ire = NULL;
753 				}
754 			}
755 			if (ire == NULL) {
756 				ire = ire_ftable_lookup_v6(&dst_addr_v6,
757 				    &net_mask_v6, &gw_addr_v6, 0, ipif, &sire,
758 				    zoneid, 0, tsl, match_flags, ipst);
759 			}
760 			break;
761 		}
762 		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
763 			label_rele(tsl);
764 
765 		if (ire == NULL) {
766 			error = ESRCH;
767 			goto done;
768 		}
769 		/* we know the IRE before we come here */
770 		switch (rtm->rtm_type) {
771 		case RTM_GET:
772 			mp1 = rts_rtmget(mp, ire, sire, af);
773 			if (mp1 == NULL) {
774 				error = ENOBUFS;
775 				goto done;
776 			}
777 			freemsg(mp);
778 			mp = mp1;
779 			rtm = (rt_msghdr_t *)mp->b_rptr;
780 			break;
781 		case RTM_CHANGE:
782 			/*
783 			 * Do not allow to the multirouting state of a route
784 			 * to be changed. This aims to prevent undesirable
785 			 * stages where both multirt and non-multirt routes
786 			 * for the same destination are declared.
787 			 */
788 			if ((ire->ire_flags & RTF_MULTIRT) !=
789 			    (rtm->rtm_flags & RTF_MULTIRT)) {
790 				error = EINVAL;
791 				goto done;
792 			}
793 			/*
794 			 * Note that we do not need to do
795 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
796 			 * in metrics or gateway will not affect existing
797 			 * routes since it does not create a more specific
798 			 * route.
799 			 */
800 			switch (af) {
801 			case AF_INET:
802 				ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
803 				if ((found_addrs & RTA_GATEWAY) != 0 &&
804 				    (ire->ire_gateway_addr != gw_addr)) {
805 					ire->ire_gateway_addr = gw_addr;
806 				}
807 
808 				if (rtsap != NULL) {
809 					ga.ga_af = AF_INET;
810 					IN6_IPADDR_TO_V4MAPPED(
811 					    ire->ire_gateway_addr, &ga.ga_addr);
812 
813 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
814 					if (gcgrp == NULL) {
815 						error = ENOMEM;
816 						goto done;
817 					}
818 				}
819 
820 				if ((found_addrs & RTA_SRC) != 0 &&
821 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
822 				    (ire->ire_src_addr != src_addr)) {
823 
824 					if (src_addr != INADDR_ANY) {
825 						/*
826 						 * The RTF_SETSRC flag is
827 						 * present, check that the
828 						 * supplied src address is not
829 						 * the loopback address. This
830 						 * would produce martian
831 						 * packets.
832 						 */
833 						if (src_addr ==
834 						    htonl(INADDR_LOOPBACK)) {
835 							error = EINVAL;
836 							goto done;
837 						}
838 						/*
839 						 * Also check that the the
840 						 * supplied addr is a valid
841 						 * local address.
842 						 */
843 						tmp_ipif = ipif_lookup_addr(
844 						    src_addr, NULL, ALL_ZONES,
845 						    CONNP_TO_WQ(connp), ioc_mp,
846 						    ip_rts_request_retry,
847 						    &error, ipst);
848 						if (tmp_ipif == NULL) {
849 							error = (error ==
850 							    EINPROGRESS) ?
851 							    error :
852 							    EADDRNOTAVAIL;
853 							goto done;
854 						}
855 
856 						if (!(tmp_ipif->ipif_flags &
857 						    IPIF_UP) ||
858 						    (tmp_ipif->ipif_flags &
859 						    (IPIF_NOLOCAL |
860 						    IPIF_ANYCAST))) {
861 							error = EINVAL;
862 							goto done;
863 						}
864 						ire->ire_flags |= RTF_SETSRC;
865 					} else {
866 						ire->ire_flags &= ~RTF_SETSRC;
867 					}
868 					ire->ire_src_addr = src_addr;
869 				}
870 				break;
871 			case AF_INET6:
872 				ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
873 				mutex_enter(&ire->ire_lock);
874 				if ((found_addrs & RTA_GATEWAY) != 0 &&
875 				    !IN6_ARE_ADDR_EQUAL(
876 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
877 					ire->ire_gateway_addr_v6 = gw_addr_v6;
878 				}
879 
880 				if (rtsap != NULL) {
881 					ga.ga_af = AF_INET6;
882 					ga.ga_addr = ire->ire_gateway_addr_v6;
883 
884 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
885 					if (gcgrp == NULL) {
886 						error = ENOMEM;
887 						goto done;
888 					}
889 				}
890 
891 				if ((found_addrs & RTA_SRC) != 0 &&
892 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
893 				    !IN6_ARE_ADDR_EQUAL(
894 					&ire->ire_src_addr_v6, &src_addr_v6)) {
895 
896 					if (!IN6_IS_ADDR_UNSPECIFIED(
897 					    &src_addr_v6)) {
898 						/*
899 						 * The RTF_SETSRC flag is
900 						 * present, check that the
901 						 * supplied src address is not
902 						 * the loopback address. This
903 						 * would produce martian
904 						 * packets.
905 						 */
906 						if (IN6_IS_ADDR_LOOPBACK(
907 						    &src_addr_v6)) {
908 							mutex_exit(
909 							    &ire->ire_lock);
910 							error = EINVAL;
911 							goto done;
912 						}
913 						/*
914 						 * Also check that the the
915 						 * supplied addr is a valid
916 						 * local address.
917 						 */
918 						tmp_ipif = ipif_lookup_addr_v6(
919 						    &src_addr_v6, NULL,
920 						    ALL_ZONES,
921 						    CONNP_TO_WQ(connp), ioc_mp,
922 						    ip_rts_request_retry,
923 						    &error, ipst);
924 						if (tmp_ipif == NULL) {
925 							mutex_exit(
926 							    &ire->ire_lock);
927 							error = (error ==
928 							    EINPROGRESS) ?
929 							    error :
930 							    EADDRNOTAVAIL;
931 							goto done;
932 						}
933 						if (!(tmp_ipif->ipif_flags &
934 						    IPIF_UP) ||
935 						    (tmp_ipif->ipif_flags &
936 						    (IPIF_NOLOCAL |
937 						    IPIF_ANYCAST))) {
938 							mutex_exit(
939 							    &ire->ire_lock);
940 							error = EINVAL;
941 							goto done;
942 						}
943 						ire->ire_flags |= RTF_SETSRC;
944 					} else {
945 						ire->ire_flags &= ~RTF_SETSRC;
946 					}
947 					ire->ire_src_addr_v6 = src_addr_v6;
948 				}
949 				mutex_exit(&ire->ire_lock);
950 				break;
951 			}
952 
953 			if (rtsap != NULL) {
954 				in_addr_t ga_addr4;
955 
956 				ASSERT(gcgrp != NULL);
957 
958 				/*
959 				 * Create and add the security attribute to
960 				 * prefix IRE; it will add a reference to the
961 				 * group upon allocating a new entry.  If it
962 				 * finds an already-existing entry for the
963 				 * security attribute, it simply returns it
964 				 * and no new group reference is made.
965 				 */
966 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
967 				if (gc == NULL ||
968 				    (error = tsol_ire_init_gwattr(ire,
969 				    ire->ire_ipversion, gc, NULL)) != 0) {
970 					if (gc != NULL) {
971 						GC_REFRELE(gc);
972 					} else {
973 						/* gc_create failed */
974 						error = ENOMEM;
975 					}
976 					goto done;
977 				}
978 
979 				/*
980 				 * Now delete any existing gateway IRE caches
981 				 * as well as all caches using the gateway,
982 				 * and allow them to be created on demand
983 				 * through ip_newroute{_v6}.
984 				 */
985 				IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4);
986 				if (af == AF_INET) {
987 					ire_clookup_delete_cache_gw(
988 					    ga_addr4, ALL_ZONES, ipst);
989 				} else {
990 					ire_clookup_delete_cache_gw_v6(
991 					    &ga.ga_addr, ALL_ZONES, ipst);
992 				}
993 			}
994 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
995 			break;
996 		}
997 		break;
998 	default:
999 		error = EOPNOTSUPP;
1000 		break;
1001 	}
1002 done:
1003 	if (ire != NULL)
1004 		ire_refrele(ire);
1005 	if (sire != NULL)
1006 		ire_refrele(sire);
1007 	if (ipif != NULL)
1008 		ipif_refrele(ipif);
1009 	if (tmp_ipif != NULL)
1010 		ipif_refrele(tmp_ipif);
1011 
1012 	if (gcgrp_xtraref)
1013 		GCGRP_REFRELE(gcgrp);
1014 
1015 	if (error == EINPROGRESS) {
1016 		if (rtm != NULL)
1017 			freemsg(mp);
1018 		return (error);
1019 	}
1020 	if (rtm != NULL) {
1021 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1022 		if (error != 0) {
1023 			rtm->rtm_errno = error;
1024 			/* Send error ACK */
1025 			ip1dbg(("ip_rts_request: error %d\n", error));
1026 		} else {
1027 			rtm->rtm_flags |= RTF_DONE;
1028 			/* OK ACK already set up by caller except this */
1029 			ip2dbg(("ip_rts_request: OK ACK\n"));
1030 		}
1031 		rts_queue_input(mp, q, af, ipst);
1032 	}
1033 	iocp->ioc_error = error;
1034 	ioc_mp->b_datap->db_type = M_IOCACK;
1035 	if (iocp->ioc_error != 0)
1036 		iocp->ioc_count = 0;
1037 	qreply(q, ioc_mp);
1038 	/* conn was refheld in ip_wput_ioctl. */
1039 	CONN_OPER_PENDING_DONE(connp);
1040 
1041 	return (error);
1042 }
1043 
1044 /*
1045  * Build a reply to the RTM_GET request contained in the given message block
1046  * using the retrieved IRE of the destination address, the parent IRE (if it
1047  * exists) and the address family.
1048  *
1049  * Returns a pointer to a message block containing the reply if successful,
1050  * otherwise NULL is returned.
1051  */
1052 static mblk_t *
1053 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af)
1054 {
1055 	rt_msghdr_t	*rtm;
1056 	rt_msghdr_t	*new_rtm;
1057 	mblk_t		*new_mp;
1058 	int		rtm_addrs;
1059 	int		rtm_flags;
1060 	in6_addr_t	gw_addr_v6;
1061 	tsol_ire_gw_secattr_t *attrp = NULL;
1062 	tsol_gc_t	*gc = NULL;
1063 	tsol_gcgrp_t	*gcgrp = NULL;
1064 	int		sacnt = 0;
1065 
1066 	ASSERT(ire->ire_ipif != NULL);
1067 	rtm = (rt_msghdr_t *)mp->b_rptr;
1068 
1069 	if (sire != NULL && sire->ire_gw_secattr != NULL)
1070 		attrp = sire->ire_gw_secattr;
1071 	else if (ire->ire_gw_secattr != NULL)
1072 		attrp = ire->ire_gw_secattr;
1073 
1074 	if (attrp != NULL) {
1075 		mutex_enter(&attrp->igsa_lock);
1076 		if ((gc = attrp->igsa_gc) != NULL) {
1077 			gcgrp = gc->gc_grp;
1078 			ASSERT(gcgrp != NULL);
1079 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1080 			sacnt = 1;
1081 		} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {
1082 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1083 			gc = gcgrp->gcgrp_head;
1084 			sacnt = gcgrp->gcgrp_count;
1085 		}
1086 		mutex_exit(&attrp->igsa_lock);
1087 
1088 		/* do nothing if there's no gc to report */
1089 		if (gc == NULL) {
1090 			ASSERT(sacnt == 0);
1091 			if (gcgrp != NULL) {
1092 				/* we might as well drop the lock now */
1093 				rw_exit(&gcgrp->gcgrp_rwlock);
1094 				gcgrp = NULL;
1095 			}
1096 			attrp = NULL;
1097 		}
1098 
1099 		ASSERT(gc == NULL || (gcgrp != NULL &&
1100 		    RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)));
1101 	}
1102 	ASSERT(sacnt == 0 || gc != NULL);
1103 
1104 	/*
1105 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1106 	 *
1107 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1108 	 * RTA_IFP and RTA_IFA if either is defined, and also
1109 	 * returns RTA_BRD if the appropriate interface is
1110 	 * point-to-point.
1111 	 */
1112 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1113 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
1114 		rtm_addrs |= (RTA_IFP | RTA_IFA);
1115 		if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT)
1116 			rtm_addrs |= RTA_BRD;
1117 	}
1118 
1119 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt);
1120 	if (new_mp == NULL) {
1121 		if (gcgrp != NULL)
1122 			rw_exit(&gcgrp->gcgrp_rwlock);
1123 		return (NULL);
1124 	}
1125 
1126 	/*
1127 	 * We set the destination address, gateway address,
1128 	 * netmask and flags in the RTM_GET response depending
1129 	 * on whether we found a parent IRE or not.
1130 	 * In particular, if we did find a parent IRE during the
1131 	 * recursive search, use that IRE's gateway address.
1132 	 * Otherwise, we use the IRE's source address for the
1133 	 * gateway address.
1134 	 */
1135 	ASSERT(af == AF_INET || af == AF_INET6);
1136 	switch (af) {
1137 	case AF_INET:
1138 		if (sire == NULL) {
1139 			rtm_flags = ire->ire_flags;
1140 			rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1141 			    ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr,
1142 			    ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif,
1143 			    new_mp, sacnt, gc);
1144 		} else {
1145 			if (sire->ire_flags & RTF_SETSRC)
1146 				rtm_addrs |= RTA_SRC;
1147 
1148 			rtm_flags = sire->ire_flags;
1149 			rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr,
1150 			    sire->ire_mask, sire->ire_gateway_addr,
1151 			    (sire->ire_flags & RTF_SETSRC) ?
1152 				sire->ire_src_addr : ire->ire_src_addr,
1153 			    ire->ire_ipif->ipif_pp_dst_addr,
1154 			    0, ire->ire_ipif, new_mp, sacnt, gc);
1155 		}
1156 		break;
1157 	case AF_INET6:
1158 		if (sire == NULL) {
1159 			rtm_flags = ire->ire_flags;
1160 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1161 			    &ire->ire_mask_v6, &ire->ire_src_addr_v6,
1162 			    &ire->ire_src_addr_v6,
1163 			    &ire->ire_ipif->ipif_v6pp_dst_addr,
1164 			    &ipv6_all_zeros, ire->ire_ipif, new_mp,
1165 			    sacnt, gc);
1166 		} else {
1167 			if (sire->ire_flags & RTF_SETSRC)
1168 				rtm_addrs |= RTA_SRC;
1169 
1170 			rtm_flags = sire->ire_flags;
1171 			mutex_enter(&sire->ire_lock);
1172 			gw_addr_v6 = sire->ire_gateway_addr_v6;
1173 			mutex_exit(&sire->ire_lock);
1174 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6,
1175 			    &sire->ire_mask_v6, &gw_addr_v6,
1176 			    (sire->ire_flags & RTF_SETSRC) ?
1177 				&sire->ire_src_addr_v6 : &ire->ire_src_addr_v6,
1178 			    &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1179 			    ire->ire_ipif, new_mp, sacnt, gc);
1180 		}
1181 		break;
1182 	}
1183 
1184 	if (gcgrp != NULL)
1185 		rw_exit(&gcgrp->gcgrp_rwlock);
1186 
1187 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1188 
1189 	/*
1190 	 * The rtm_msglen, rtm_version and rtm_type fields in
1191 	 * RTM_GET response are filled in by rts_fill_msg.
1192 	 *
1193 	 * rtm_addrs and rtm_flags are filled in based on what
1194 	 * was requested and the state of the IREs looked up
1195 	 * above.
1196 	 *
1197 	 * rtm_inits and rtm_rmx are filled in with metrics
1198 	 * based on whether a parent IRE was found or not.
1199 	 *
1200 	 * TODO: rtm_index and rtm_use should probably be
1201 	 * filled in with something resonable here and not just
1202 	 * copied from the request.
1203 	 */
1204 	new_rtm->rtm_index = rtm->rtm_index;
1205 	new_rtm->rtm_pid = rtm->rtm_pid;
1206 	new_rtm->rtm_seq = rtm->rtm_seq;
1207 	new_rtm->rtm_use = rtm->rtm_use;
1208 	new_rtm->rtm_addrs = rtm_addrs;
1209 	new_rtm->rtm_flags = rtm_flags;
1210 	if (sire == NULL)
1211 		new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
1212 	else
1213 		new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx);
1214 
1215 	return (new_mp);
1216 }
1217 
1218 /*
1219  * Fill the given if_data_t with interface statistics.
1220  */
1221 static void
1222 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1223 {
1224 	if_data->ifi_type = ipif->ipif_type;	/* ethernet, tokenring, etc */
1225 	if_data->ifi_addrlen = 0;		/* media address length */
1226 	if_data->ifi_hdrlen = 0;		/* media header length */
1227 	if_data->ifi_mtu = ipif->ipif_mtu;	/* maximum transmission unit */
1228 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
1229 	if_data->ifi_baudrate = 0;		/* linespeed */
1230 
1231 	if_data->ifi_ipackets = 0;		/* packets received on if */
1232 	if_data->ifi_ierrors = 0;		/* input errors on interface */
1233 	if_data->ifi_opackets = 0;		/* packets sent on interface */
1234 	if_data->ifi_oerrors = 0;		/* output errors on if */
1235 	if_data->ifi_collisions = 0;		/* collisions on csma if */
1236 	if_data->ifi_ibytes = 0;		/* total number received */
1237 	if_data->ifi_obytes = 0;		/* total number sent */
1238 	if_data->ifi_imcasts = 0;		/* multicast packets received */
1239 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
1240 	if_data->ifi_iqdrops = 0;		/* dropped on input */
1241 	if_data->ifi_noproto = 0;		/* destined for unsupported */
1242 						/* protocol. */
1243 }
1244 
1245 /*
1246  * Set the metrics on a forwarding table route.
1247  */
1248 static void
1249 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1250 {
1251 	clock_t		rtt;
1252 	clock_t		rtt_sd;
1253 	ipif_t		*ipif;
1254 	ifrt_t		*ifrt;
1255 	mblk_t		*mp;
1256 	in6_addr_t	gw_addr_v6;
1257 
1258 	/*
1259 	 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the
1260 	 * common case of no metrics.
1261 	 */
1262 	if (which == 0)
1263 		return;
1264 	ire->ire_uinfo.iulp_set = B_TRUE;
1265 
1266 	/*
1267 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1268 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1269 	 * microseconds.
1270 	 */
1271 	if (which & RTV_RTT)
1272 		rtt = metrics->rmx_rtt / 1000;
1273 	if (which & RTV_RTTVAR)
1274 		rtt_sd = metrics->rmx_rttvar / 1000;
1275 
1276 	/*
1277 	 * Update the metrics in the IRE itself.
1278 	 */
1279 	mutex_enter(&ire->ire_lock);
1280 	if (which & RTV_MTU)
1281 		ire->ire_max_frag = metrics->rmx_mtu;
1282 	if (which & RTV_RTT)
1283 		ire->ire_uinfo.iulp_rtt = rtt;
1284 	if (which & RTV_SSTHRESH)
1285 		ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh;
1286 	if (which & RTV_RTTVAR)
1287 		ire->ire_uinfo.iulp_rtt_sd = rtt_sd;
1288 	if (which & RTV_SPIPE)
1289 		ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe;
1290 	if (which & RTV_RPIPE)
1291 		ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe;
1292 	mutex_exit(&ire->ire_lock);
1293 
1294 	/*
1295 	 * Search through the ifrt_t chain hanging off the IPIF in order to
1296 	 * reflect the metric change there.
1297 	 */
1298 	ipif = ire->ire_ipif;
1299 	if (ipif == NULL)
1300 		return;
1301 	ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1302 	    ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1303 	if (ipif->ipif_isv6) {
1304 		mutex_enter(&ire->ire_lock);
1305 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1306 		mutex_exit(&ire->ire_lock);
1307 	}
1308 	mutex_enter(&ipif->ipif_saved_ire_lock);
1309 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1310 		/*
1311 		 * On a given ipif, the triple of address, gateway and mask is
1312 		 * unique for each saved IRE (in the case of ordinary interface
1313 		 * routes, the gateway address is all-zeroes).
1314 		 */
1315 		ifrt = (ifrt_t *)mp->b_rptr;
1316 		if (ipif->ipif_isv6) {
1317 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1318 			    &ire->ire_addr_v6) ||
1319 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1320 			    &gw_addr_v6) ||
1321 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1322 			    &ire->ire_mask_v6))
1323 				continue;
1324 		} else {
1325 			if (ifrt->ifrt_addr != ire->ire_addr ||
1326 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1327 			    ifrt->ifrt_mask != ire->ire_mask)
1328 				continue;
1329 		}
1330 		if (which & RTV_MTU)
1331 			ifrt->ifrt_max_frag = metrics->rmx_mtu;
1332 		if (which & RTV_RTT)
1333 			ifrt->ifrt_iulp_info.iulp_rtt = rtt;
1334 		if (which & RTV_SSTHRESH) {
1335 			ifrt->ifrt_iulp_info.iulp_ssthresh =
1336 			    metrics->rmx_ssthresh;
1337 		}
1338 		if (which & RTV_RTTVAR)
1339 			ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar;
1340 		if (which & RTV_SPIPE)
1341 			ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe;
1342 		if (which & RTV_RPIPE)
1343 			ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe;
1344 		break;
1345 	}
1346 	mutex_exit(&ipif->ipif_saved_ire_lock);
1347 }
1348 
1349 /*
1350  * Get the metrics from a forwarding table route.
1351  */
1352 static int
1353 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
1354 {
1355 	int	metrics_set = 0;
1356 
1357 	bzero(metrics, sizeof (rt_metrics_t));
1358 	/*
1359 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1360 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1361 	 * microseconds.
1362 	 */
1363 	metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000;
1364 	metrics_set |= RTV_RTT;
1365 	metrics->rmx_mtu = ire->ire_max_frag;
1366 	metrics_set |= RTV_MTU;
1367 	metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh;
1368 	metrics_set |= RTV_SSTHRESH;
1369 	metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000;
1370 	metrics_set |= RTV_RTTVAR;
1371 	metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe;
1372 	metrics_set |= RTV_SPIPE;
1373 	metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe;
1374 	metrics_set |= RTV_RPIPE;
1375 	return (metrics_set);
1376 }
1377 
1378 /*
1379  * Takes a pointer to a routing message and extracts necessary info by looking
1380  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1381  * passed (all of which must be valid).
1382  *
1383  * The bitmask of sockaddrs actually found in the message is returned, or zero
1384  * is returned in the case of an error.
1385  */
1386 static int
1387 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1388     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1389     in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
1390     tsol_rtsecattr_t *rtsecattr, int *error)
1391 {
1392 	struct sockaddr *sa;
1393 	int	i;
1394 	int	addr_bits;
1395 	int	length;
1396 	int	found_addrs = 0;
1397 	caddr_t	cp;
1398 	size_t	size;
1399 	struct sockaddr_dl *sdl;
1400 
1401 	*dst_addrp = ipv6_all_zeros;
1402 	*gw_addrp = ipv6_all_zeros;
1403 	*net_maskp = ipv6_all_zeros;
1404 	*authorp = ipv6_all_zeros;
1405 	*if_addrp = ipv6_all_zeros;
1406 	*in_src_addrp = ipv6_all_zeros;
1407 	*indexp = 0;
1408 	*afp = AF_UNSPEC;
1409 	rtsecattr->rtsa_cnt = 0;
1410 	*error = 0;
1411 
1412 	/*
1413 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1414 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
1415 	 */
1416 	cp = (caddr_t)&rtm[1];
1417 	length = rtm->rtm_msglen;
1418 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1419 		/*
1420 		 * The address family we are working with starts out as
1421 		 * AF_UNSPEC, but is set to the one specified with the
1422 		 * destination address.
1423 		 *
1424 		 * If the "working" address family that has been set to
1425 		 * something other than AF_UNSPEC, then the address family of
1426 		 * subsequent sockaddrs must either be AF_UNSPEC (for
1427 		 * compatibility with older programs) or must be the same as our
1428 		 * "working" one.
1429 		 *
1430 		 * This code assumes that RTA_DST (1) comes first in the loop.
1431 		 */
1432 		sa = (struct sockaddr *)cp;
1433 		addr_bits = (rtm->rtm_addrs & (1 << i));
1434 		if (addr_bits == 0)
1435 			continue;
1436 		switch (addr_bits) {
1437 		case RTA_DST:
1438 			size = rts_copyfromsockaddr(sa, dst_addrp);
1439 			*afp = sa->sa_family;
1440 			break;
1441 		case RTA_GATEWAY:
1442 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1443 				return (0);
1444 			size = rts_copyfromsockaddr(sa, gw_addrp);
1445 			break;
1446 		case RTA_NETMASK:
1447 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1448 				return (0);
1449 			size = rts_copyfromsockaddr(sa, net_maskp);
1450 			break;
1451 		case RTA_IFP:
1452 			if (sa->sa_family != AF_LINK &&
1453 			    sa->sa_family != AF_UNSPEC)
1454 				return (0);
1455 			sdl = (struct sockaddr_dl *)cp;
1456 			*indexp = sdl->sdl_index;
1457 			size = sizeof (struct sockaddr_dl);
1458 			break;
1459 		case RTA_SRC:
1460 			/* Source address of the incoming packet */
1461 			size = rts_copyfromsockaddr(sa, in_src_addrp);
1462 			*afp = sa->sa_family;
1463 			break;
1464 		case RTA_IFA:
1465 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1466 				return (0);
1467 			size = rts_copyfromsockaddr(sa, if_addrp);
1468 			break;
1469 		case RTA_AUTHOR:
1470 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1471 				return (0);
1472 			size = rts_copyfromsockaddr(sa, authorp);
1473 			break;
1474 		default:
1475 			return (0);
1476 		}
1477 		if (size == 0)
1478 			return (0);
1479 		cp += size;
1480 		found_addrs |= addr_bits;
1481 	}
1482 
1483 	/*
1484 	 * Parse the routing message and look for any security-
1485 	 * related attributes for the route.  For each valid
1486 	 * attribute, allocate/obtain the corresponding kernel
1487 	 * route security attributes.
1488 	 */
1489 	*error = tsol_rtsa_init(rtm, rtsecattr, cp);
1490 	ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1491 
1492 	return (found_addrs);
1493 }
1494 
1495 /*
1496  * Fills the message with the given info.
1497  */
1498 static void
1499 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1500     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1501     const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc)
1502 {
1503 	rt_msghdr_t	*rtm;
1504 	sin_t		*sin;
1505 	size_t		data_size, header_size;
1506 	uchar_t		*cp;
1507 	int		i;
1508 
1509 	ASSERT(mp != NULL);
1510 	ASSERT(sacnt == 0 || gc != NULL);
1511 	/*
1512 	 * First find the type of the message
1513 	 * and its length.
1514 	 */
1515 	header_size = rts_header_msg_size(type);
1516 	/*
1517 	 * Now find the size of the data
1518 	 * that follows the message header.
1519 	 */
1520 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt);
1521 
1522 	rtm = (rt_msghdr_t *)mp->b_rptr;
1523 	mp->b_wptr = &mp->b_rptr[header_size];
1524 	cp = mp->b_wptr;
1525 	bzero(cp, data_size);
1526 	for (i = 0; i < RTA_NUMBITS; i++) {
1527 		sin = (sin_t *)cp;
1528 		switch (rtm_addrs & (1 << i)) {
1529 		case RTA_DST:
1530 			sin->sin_addr.s_addr = dst;
1531 			sin->sin_family = AF_INET;
1532 			cp += sizeof (sin_t);
1533 			break;
1534 		case RTA_GATEWAY:
1535 			sin->sin_addr.s_addr = gateway;
1536 			sin->sin_family = AF_INET;
1537 			cp += sizeof (sin_t);
1538 			break;
1539 		case RTA_NETMASK:
1540 			sin->sin_addr.s_addr = mask;
1541 			sin->sin_family = AF_INET;
1542 			cp += sizeof (sin_t);
1543 			break;
1544 		case RTA_IFP:
1545 			cp += ill_dls_info((struct sockaddr_dl *)cp, ipif);
1546 			break;
1547 		case RTA_IFA:
1548 		case RTA_SRC:
1549 			sin->sin_addr.s_addr = src_addr;
1550 			sin->sin_family = AF_INET;
1551 			cp += sizeof (sin_t);
1552 			break;
1553 		case RTA_AUTHOR:
1554 			sin->sin_addr.s_addr = author;
1555 			sin->sin_family = AF_INET;
1556 			cp += sizeof (sin_t);
1557 			break;
1558 		case RTA_BRD:
1559 			/*
1560 			 * RTA_BRD is used typically to specify a point-to-point
1561 			 * destination address.
1562 			 */
1563 			sin->sin_addr.s_addr = brd_addr;
1564 			sin->sin_family = AF_INET;
1565 			cp += sizeof (sin_t);
1566 			break;
1567 		}
1568 	}
1569 
1570 	if (gc != NULL) {
1571 		rtm_ext_t *rtm_ext;
1572 		struct rtsa_s *rp_dst;
1573 		tsol_rtsecattr_t *rsap;
1574 		int i;
1575 
1576 		ASSERT(gc->gc_grp != NULL);
1577 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1578 		ASSERT(sacnt > 0);
1579 
1580 		rtm_ext = (rtm_ext_t *)cp;
1581 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1582 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt);
1583 
1584 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1585 		rsap->rtsa_cnt = sacnt;
1586 		rp_dst = rsap->rtsa_attr;
1587 
1588 		for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) {
1589 			ASSERT(gc->gc_db != NULL);
1590 			bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1591 		}
1592 		cp = (uchar_t *)rp_dst;
1593 	}
1594 
1595 	mp->b_wptr = cp;
1596 	mp->b_cont = NULL;
1597 	/*
1598 	 * set the fields that are common to
1599 	 * to different messages.
1600 	 */
1601 	rtm->rtm_msglen = (short)(header_size + data_size);
1602 	rtm->rtm_version = RTM_VERSION;
1603 	rtm->rtm_type = (uchar_t)type;
1604 }
1605 
1606 /*
1607  * Allocates and initializes a routing socket message.
1608  */
1609 mblk_t *
1610 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1611 {
1612 	size_t	length;
1613 	mblk_t	*mp;
1614 
1615 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1616 	mp = allocb(length, BPRI_MED);
1617 	if (mp == NULL)
1618 		return (mp);
1619 	bzero(mp->b_rptr, length);
1620 	return (mp);
1621 }
1622 
1623 /*
1624  * Returns the size of the routing
1625  * socket message header size.
1626  */
1627 size_t
1628 rts_header_msg_size(int type)
1629 {
1630 	switch (type) {
1631 	case RTM_DELADDR:
1632 	case RTM_NEWADDR:
1633 		return (sizeof (ifa_msghdr_t));
1634 	case RTM_IFINFO:
1635 		return (sizeof (if_msghdr_t));
1636 	default:
1637 		return (sizeof (rt_msghdr_t));
1638 	}
1639 }
1640 
1641 /*
1642  * Returns the size of the message needed with the given rtm_addrs and family.
1643  *
1644  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1645  * of the same family (currently either AF_INET or AF_INET6).
1646  */
1647 size_t
1648 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1649 {
1650 	int	i;
1651 	size_t	length = 0;
1652 
1653 	for (i = 0; i < RTA_NUMBITS; i++) {
1654 		switch (rtm_addrs & (1 << i)) {
1655 		case RTA_IFP:
1656 			length += sizeof (struct sockaddr_dl);
1657 			break;
1658 		case RTA_DST:
1659 		case RTA_GATEWAY:
1660 		case RTA_NETMASK:
1661 		case RTA_SRC:
1662 		case RTA_IFA:
1663 		case RTA_AUTHOR:
1664 		case RTA_BRD:
1665 			ASSERT(af == AF_INET || af == AF_INET6);
1666 			switch (af) {
1667 			case AF_INET:
1668 				length += sizeof (sin_t);
1669 				break;
1670 			case AF_INET6:
1671 				length += sizeof (sin6_t);
1672 				break;
1673 			}
1674 			break;
1675 		}
1676 	}
1677 	if (sacnt > 0)
1678 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1679 
1680 	return (length);
1681 }
1682 
1683 /*
1684  * This routine is called to generate a message to the routing
1685  * socket indicating that a redirect has occured, a routing lookup
1686  * has failed, or that a protocol has detected timeouts to a particular
1687  * destination. This routine is called for message types RTM_LOSING,
1688  * RTM_REDIRECT, and RTM_MISS.
1689  */
1690 void
1691 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1692     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
1693     ip_stack_t *ipst)
1694 {
1695 	rt_msghdr_t	*rtm;
1696 	mblk_t		*mp;
1697 
1698 	if (rtm_addrs == 0)
1699 		return;
1700 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1701 	if (mp == NULL)
1702 		return;
1703 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1704 	    author, NULL, mp, 0, NULL);
1705 	rtm = (rt_msghdr_t *)mp->b_rptr;
1706 	rtm->rtm_flags = flags;
1707 	rtm->rtm_errno = error;
1708 	rtm->rtm_flags |= RTF_DONE;
1709 	rtm->rtm_addrs = rtm_addrs;
1710 	rts_queue_input(mp, NULL, AF_INET, ipst);
1711 }
1712 
1713 /*
1714  * This routine is called to generate a message to the routing
1715  * socket indicating that the status of a network interface has changed.
1716  * Message type generated RTM_IFINFO.
1717  */
1718 void
1719 ip_rts_ifmsg(const ipif_t *ipif)
1720 {
1721 	if_msghdr_t	*ifm;
1722 	mblk_t		*mp;
1723 	sa_family_t	af;
1724 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1725 
1726 	/*
1727 	 * This message should be generated only
1728 	 * when the physical device is changing
1729 	 * state.
1730 	 */
1731 	if (ipif->ipif_id != 0)
1732 		return;
1733 	if (ipif->ipif_isv6) {
1734 		af = AF_INET6;
1735 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1736 		if (mp == NULL)
1737 			return;
1738 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1739 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1740 		    &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL);
1741 	} else {
1742 		af = AF_INET;
1743 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1744 		if (mp == NULL)
1745 			return;
1746 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp,
1747 		    0, NULL);
1748 	}
1749 	ifm = (if_msghdr_t *)mp->b_rptr;
1750 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1751 	ifm->ifm_flags = ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1752 	    ipif->ipif_ill->ill_phyint->phyint_flags;
1753 	rts_getifdata(&ifm->ifm_data, ipif);
1754 	ifm->ifm_addrs = RTA_IFP;
1755 	rts_queue_input(mp, NULL, af, ipst);
1756 }
1757 
1758 /*
1759  * This is called to generate messages to the routing socket
1760  * indicating a network interface has had addresses associated with it.
1761  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1762  */
1763 void
1764 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif)
1765 {
1766 	int		pass;
1767 	int		ncmd;
1768 	int		rtm_addrs;
1769 	mblk_t		*mp;
1770 	ifa_msghdr_t	*ifam;
1771 	rt_msghdr_t	*rtm;
1772 	sa_family_t	af;
1773 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1774 
1775 	if (ipif->ipif_isv6)
1776 		af = AF_INET6;
1777 	else
1778 		af = AF_INET;
1779 	/*
1780 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1781 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1782 	 */
1783 	for (pass = 1; pass < 3; pass++) {
1784 		if ((cmd == RTM_ADD && pass == 1) ||
1785 		    (cmd == RTM_DELETE && pass == 2)) {
1786 			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
1787 
1788 			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
1789 			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
1790 			if (mp == NULL)
1791 				continue;
1792 			switch (af) {
1793 			case AF_INET:
1794 				rts_fill_msg(ncmd, rtm_addrs, 0,
1795 				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
1796 				    ipif->ipif_pp_dst_addr, 0, ipif, mp,
1797 				    0, NULL);
1798 				break;
1799 			case AF_INET6:
1800 				rts_fill_msg_v6(ncmd, rtm_addrs,
1801 				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
1802 				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
1803 				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1804 				    ipif, mp, 0, NULL);
1805 				break;
1806 			}
1807 			ifam = (ifa_msghdr_t *)mp->b_rptr;
1808 			ifam->ifam_index =
1809 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1810 			ifam->ifam_metric = ipif->ipif_metric;
1811 			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1812 			ifam->ifam_addrs = rtm_addrs;
1813 			rts_queue_input(mp, NULL, af, ipst);
1814 		}
1815 		if ((cmd == RTM_ADD && pass == 2) ||
1816 		    (cmd == RTM_DELETE && pass == 1)) {
1817 			rtm_addrs = (RTA_DST | RTA_NETMASK);
1818 			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
1819 			if (mp == NULL)
1820 				continue;
1821 			switch (af) {
1822 			case AF_INET:
1823 				rts_fill_msg(cmd, rtm_addrs,
1824 				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
1825 				    0, 0, 0, NULL, mp, 0, NULL);
1826 				break;
1827 			case AF_INET6:
1828 				rts_fill_msg_v6(cmd, rtm_addrs,
1829 				    &ipif->ipif_v6lcl_addr,
1830 				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
1831 				    &ipv6_all_zeros, &ipv6_all_zeros,
1832 				    &ipv6_all_zeros, NULL, mp, 0, NULL);
1833 				break;
1834 			}
1835 			rtm = (rt_msghdr_t *)mp->b_rptr;
1836 			rtm->rtm_index =
1837 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1838 			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1839 			rtm->rtm_errno = error;
1840 			if (error == 0)
1841 				rtm->rtm_flags |= RTF_DONE;
1842 			rtm->rtm_addrs = rtm_addrs;
1843 			rts_queue_input(mp, NULL, af, ipst);
1844 		}
1845 	}
1846 }
1847 
1848 /*
1849  * Based on the address family specified in a sockaddr, copy the address field
1850  * into an in6_addr_t.
1851  *
1852  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
1853  * compatibility with programs that leave the family cleared in the sockaddr.
1854  * Callers of rts_copyfromsockaddr should check the family themselves if they
1855  * wish to verify its value.
1856  *
1857  * In the case of AF_INET6, a check is made to ensure that address is not an
1858  * IPv4-mapped address.
1859  */
1860 size_t
1861 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
1862 {
1863 	switch (sa->sa_family) {
1864 	case AF_INET:
1865 	case AF_UNSPEC:
1866 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
1867 		return (sizeof (sin_t));
1868 	case AF_INET6:
1869 		*addrp = ((sin6_t *)sa)->sin6_addr;
1870 		if (IN6_IS_ADDR_V4MAPPED(addrp))
1871 			return (0);
1872 		return (sizeof (sin6_t));
1873 	default:
1874 		return (0);
1875 	}
1876 }
1877