xref: /titanic_52/usr/src/uts/common/inet/ip/ip_rts.c (revision 9b4e3ac25d882519cad3fc11f0c53b07f4e60536)
1 /*
2  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * Copyright (c) 1988, 1991, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
39  */
40 
41 /*
42  * This file contains routines that processes routing socket requests.
43  */
44 
45 #include <sys/types.h>
46 #include <sys/stream.h>
47 #include <sys/stropts.h>
48 #include <sys/ddi.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/policy.h>
52 #include <sys/zone.h>
53 
54 #include <sys/systm.h>
55 #include <sys/param.h>
56 #include <sys/socket.h>
57 #include <sys/strsun.h>
58 #include <net/if.h>
59 #include <net/route.h>
60 #include <netinet/in.h>
61 #include <net/if_dl.h>
62 #include <netinet/ip6.h>
63 
64 #include <inet/common.h>
65 #include <inet/ip.h>
66 #include <inet/ip6.h>
67 #include <inet/ip_if.h>
68 #include <inet/ip_ire.h>
69 #include <inet/ip_ftable.h>
70 #include <inet/ip_rts.h>
71 
72 #include <inet/ipclassifier.h>
73 
74 #include <sys/tsol/tndb.h>
75 #include <sys/tsol/tnet.h>
76 
77 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
78 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
79 
80 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
81 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
82     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
83     ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *);
84 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
85     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
86     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
87     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
88 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
89 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
90 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire,
91     sa_family_t af);
92 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
93 static void	ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
94 
95 /*
96  * Send the ack to all the routing queues.  In case of the originating queue,
97  * send it only if the loopback is set.
98  *
99  * Messages are sent upstream only on routing sockets that did not specify an
100  * address family when they were created or when the address family matches the
101  * one specified by the caller.
102  *
103  */
104 void
105 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, ip_stack_t *ipst)
106 {
107 	mblk_t	*mp1;
108 	conn_t 	*connp, *next_connp;
109 
110 	mutex_enter(&ipst->ips_rts_clients->connf_lock);
111 	connp = ipst->ips_rts_clients->connf_head;
112 
113 	while (connp != NULL) {
114 		/*
115 		 * If there was a family specified when this routing socket was
116 		 * created and it doesn't match the family of the message to
117 		 * copy, then continue.
118 		 */
119 		if ((connp->conn_proto != AF_UNSPEC) &&
120 		    (connp->conn_proto != af)) {
121 			connp = connp->conn_next;
122 			continue;
123 		}
124 		/*
125 		 * For the originating queue, we only copy the message upstream
126 		 * if loopback is set.  For others reading on the routing
127 		 * socket, we check if there is room upstream for a copy of the
128 		 * message.
129 		 */
130 		if ((o_connp == connp) && connp->conn_loopback == 0) {
131 				connp = connp->conn_next;
132 				continue;
133 		}
134 		CONN_INC_REF(connp);
135 		mutex_exit(&ipst->ips_rts_clients->connf_lock);
136 		/* Pass to rts_input */
137 		if ((IPCL_IS_NONSTR(connp) && !PROTO_FLOW_CNTRLD(connp))||
138 		    (!IPCL_IS_NONSTR(connp) &&
139 		    canputnext(CONNP_TO_RQ(connp)))) {
140 			mp1 = dupmsg(mp);
141 			if (mp1 == NULL)
142 				mp1 = copymsg(mp);
143 			if (mp1 != NULL)
144 				(connp->conn_recv)(connp, mp1, NULL);
145 		}
146 
147 		mutex_enter(&ipst->ips_rts_clients->connf_lock);
148 		/* Follow the next pointer before releasing the conn. */
149 		next_connp = connp->conn_next;
150 		CONN_DEC_REF(connp);
151 		connp = next_connp;
152 	}
153 	mutex_exit(&ipst->ips_rts_clients->connf_lock);
154 	freemsg(mp);
155 }
156 
157 /*
158  * Takes an ire and sends an ack to all the routing sockets. This
159  * routine is used
160  * - when a route is created/deleted through the ioctl interface.
161  * - when ire_expire deletes a stale redirect
162  */
163 void
164 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
165 {
166 	mblk_t		*mp;
167 	rt_msghdr_t	*rtm;
168 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
169 	sa_family_t	af;
170 	in6_addr_t	gw_addr_v6;
171 
172 	if (ire == NULL)
173 		return;
174 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
175 	    ire->ire_ipversion == IPV6_VERSION);
176 
177 	if (ire->ire_flags & RTF_SETSRC)
178 		rtm_addrs |= RTA_SRC;
179 
180 	switch (ire->ire_ipversion) {
181 	case IPV4_VERSION:
182 		af = AF_INET;
183 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
184 		if (mp == NULL)
185 			return;
186 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
187 		    ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp,
188 		    0, NULL);
189 		break;
190 	case IPV6_VERSION:
191 		af = AF_INET6;
192 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
193 		if (mp == NULL)
194 			return;
195 		mutex_enter(&ire->ire_lock);
196 		gw_addr_v6 = ire->ire_gateway_addr_v6;
197 		mutex_exit(&ire->ire_lock);
198 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
199 		    &ire->ire_mask_v6, &gw_addr_v6,
200 		    &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
201 		    NULL, mp, 0, NULL);
202 		break;
203 	}
204 	rtm = (rt_msghdr_t *)mp->b_rptr;
205 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
206 	rtm->rtm_addrs = rtm_addrs;
207 	rtm->rtm_flags = ire->ire_flags;
208 	if (error != 0)
209 		rtm->rtm_errno = error;
210 	else
211 		rtm->rtm_flags |= RTF_DONE;
212 	rts_queue_input(mp, NULL, af, ipst);
213 }
214 
215 /* ARGSUSED */
216 static void
217 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy)
218 {
219 	(void) ip_rts_request(q, mp, DB_CRED(mp));
220 }
221 
222 /*
223  * This is a call from the RTS module
224  * indicating that this is a Routing Socket
225  * Stream. Insert this conn_t in routing
226  * socket client list.
227  */
228 void
229 ip_rts_register(conn_t *connp)
230 {
231 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
232 
233 	connp->conn_loopback = 1;
234 	ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
235 }
236 
237 /*
238  * This is a call from the RTS module indicating that it is closing.
239  */
240 void
241 ip_rts_unregister(conn_t *connp)
242 {
243 	ipcl_hash_remove(connp);
244 }
245 
246 /*
247  * Processes requests received on a routing socket. It extracts all the
248  * arguments and calls the appropriate function to process the request.
249  *
250  * RTA_SRC bit flag requests are sent by 'route -setsrc'.
251  *
252  * In general, this function does not consume the message supplied but rather
253  * sends the message upstream with an appropriate UNIX errno.
254  *
255  * We may need to restart this operation if the ipif cannot be looked up
256  * due to an exclusive operation that is currently in progress. The restart
257  * entry point is ip_rts_request_retry. While the request is enqueud in the
258  * ipsq the ioctl could be aborted and the conn close. To ensure that we don't
259  * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is
260  * released at the completion of the rts ioctl at the end of this function
261  * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and
262  * conn close occurs in conn_ioctl_cleanup.
263  */
264 int
265 ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
266 {
267 	rt_msghdr_t	*rtm = NULL;
268 	in6_addr_t	dst_addr_v6;
269 	in6_addr_t	src_addr_v6;
270 	in6_addr_t	gw_addr_v6;
271 	in6_addr_t	net_mask_v6;
272 	in6_addr_t	author_v6;
273 	in6_addr_t	if_addr_v6;
274 	mblk_t		*mp1, *ioc_mp = mp;
275 	ire_t		*ire = NULL;
276 	ire_t		*sire = NULL;
277 	int		error = 0;
278 	int		match_flags = MATCH_IRE_DSTONLY;
279 	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
280 	int		found_addrs;
281 	sa_family_t	af;
282 	ipaddr_t	dst_addr;
283 	ipaddr_t	gw_addr;
284 	ipaddr_t	src_addr;
285 	ipaddr_t	net_mask;
286 	ushort_t	index;
287 	ipif_t		*ipif = NULL;
288 	ipif_t		*tmp_ipif = NULL;
289 	IOCP		iocp = (IOCP)mp->b_rptr;
290 	boolean_t	gcgrp_xtraref = B_FALSE;
291 	tsol_gcgrp_addr_t ga;
292 	tsol_rtsecattr_t rtsecattr;
293 	struct rtsa_s	*rtsap = NULL;
294 	tsol_gcgrp_t	*gcgrp = NULL;
295 	tsol_gc_t	*gc = NULL;
296 	ts_label_t	*tsl = NULL;
297 	zoneid_t	zoneid;
298 	ip_stack_t	*ipst;
299 
300 	ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
301 
302 	zoneid = connp->conn_zoneid;
303 	ipst = connp->conn_netstack->netstack_ip;
304 
305 	ASSERT(mp->b_cont != NULL);
306 	/* ioc_mp holds mp */
307 	mp = mp->b_cont;
308 
309 	/*
310 	 * The Routing Socket data starts on
311 	 * next block. If there is no next block
312 	 * this is an indication from routing module
313 	 * that it is a routing socket stream queue.
314 	 * We need to support that for compatibility with SDP since
315 	 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
316 	 */
317 	if (mp->b_cont == NULL) {
318 		/*
319 		 * This is a message from SDP
320 		 * indicating that this is a Routing Socket
321 		 * Stream. Insert this conn_t in routing
322 		 * socket client list.
323 		 */
324 		connp->conn_loopback = 1;
325 		ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
326 		goto done;
327 	}
328 	mp1 = dupmsg(mp->b_cont);
329 	if (mp1 == NULL) {
330 		error  = ENOBUFS;
331 		goto done;
332 	}
333 	mp = mp1;
334 
335 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
336 		freemsg(mp);
337 		error =  EINVAL;
338 		goto done;
339 	}
340 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
341 		freemsg(mp);
342 		error = EINVAL;
343 		goto done;
344 	}
345 
346 	/*
347 	 * Check the routing message for basic consistency including the
348 	 * version number and that the number of octets written is the same
349 	 * as specified by the rtm_msglen field.
350 	 *
351 	 * At this point, an error can be delivered back via rtm_errno.
352 	 */
353 	rtm = (rt_msghdr_t *)mp->b_rptr;
354 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
355 		error = EINVAL;
356 		goto done;
357 	}
358 	if (rtm->rtm_version != RTM_VERSION) {
359 		error = EPROTONOSUPPORT;
360 		goto done;
361 	}
362 
363 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
364 	if (rtm->rtm_type != RTM_GET &&
365 	    rtm->rtm_type != RTM_RESOLVE &&
366 	    (ioc_cr == NULL ||
367 	    secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
368 		error = EPERM;
369 		goto done;
370 	}
371 
372 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
373 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
374 	    &error);
375 
376 	if (error != 0)
377 		goto done;
378 
379 	if ((found_addrs & RTA_DST) == 0) {
380 		error = EINVAL;
381 		goto done;
382 	}
383 
384 	/*
385 	 * Based on the address family of the destination address, determine
386 	 * the destination, gateway and netmask and return the appropriate error
387 	 * if an unknown address family was specified (following the errno
388 	 * values that 4.4BSD-Lite2 returns.)
389 	 */
390 	switch (af) {
391 	case AF_INET:
392 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
393 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
394 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
395 		if (((found_addrs & RTA_NETMASK) == 0) ||
396 		    (rtm->rtm_flags & RTF_HOST))
397 			net_mask = IP_HOST_MASK;
398 		else
399 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
400 		break;
401 	case AF_INET6:
402 		if (((found_addrs & RTA_NETMASK) == 0) ||
403 		    (rtm->rtm_flags & RTF_HOST))
404 			net_mask_v6 = ipv6_all_ones;
405 		break;
406 	default:
407 		/*
408 		 * These errno values are meant to be compatible with
409 		 * 4.4BSD-Lite2 for the given message types.
410 		 */
411 		switch (rtm->rtm_type) {
412 		case RTM_ADD:
413 		case RTM_DELETE:
414 			error = ESRCH;
415 			goto done;
416 		case RTM_GET:
417 		case RTM_CHANGE:
418 			error = EAFNOSUPPORT;
419 			goto done;
420 		default:
421 			error = EOPNOTSUPP;
422 			goto done;
423 		}
424 	}
425 
426 	/*
427 	 * At this point, the address family must be something known.
428 	 */
429 	ASSERT(af == AF_INET || af == AF_INET6);
430 
431 	if (index != 0) {
432 		ill_t   *ill;
433 
434 		/*
435 		 * IPC must be refheld somewhere in ip_wput_nondata or
436 		 * ip_wput_ioctl etc... and cleaned up if ioctl is killed.
437 		 * If ILL_CHANGING the request is queued in the ipsq.
438 		 */
439 		ill = ill_lookup_on_ifindex(index, af == AF_INET6,
440 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error,
441 		    ipst);
442 		if (ill == NULL) {
443 			if (error != EINPROGRESS)
444 				error = EINVAL;
445 			goto done;
446 		}
447 
448 		ipif = ipif_get_next_ipif(NULL, ill);
449 		ill_refrele(ill);
450 		/*
451 		 * If this is replacement ipif, prevent a route from
452 		 * being added.
453 		 */
454 		if (ipif != NULL && ipif->ipif_replace_zero) {
455 			error = ENETDOWN;
456 			goto done;
457 		}
458 		match_flags |= MATCH_IRE_ILL;
459 	}
460 
461 	/*
462 	 * If a netmask was supplied in the message, then subsequent route
463 	 * lookups will attempt to match on the netmask as well.
464 	 */
465 	if ((found_addrs & RTA_NETMASK) != 0)
466 		match_flags |= MATCH_IRE_MASK;
467 
468 	/*
469 	 * We only process any passed-in route security attributes for
470 	 * either RTM_ADD or RTM_CHANGE message; We overload them
471 	 * to do an RTM_GET as a different label; ignore otherwise.
472 	 */
473 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
474 	    rtm->rtm_type == RTM_GET) {
475 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
476 		if (rtsecattr.rtsa_cnt > 0)
477 			rtsap = &rtsecattr.rtsa_attr[0];
478 	}
479 
480 	switch (rtm->rtm_type) {
481 	case RTM_ADD:
482 		/* if we are adding a route, gateway is a must */
483 		if ((found_addrs & RTA_GATEWAY) == 0) {
484 			error = EINVAL;
485 			goto done;
486 		}
487 
488 		/* Multirouting does not support net routes. */
489 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
490 		    RTF_MULTIRT) {
491 			error = EADDRNOTAVAIL;
492 			goto done;
493 		}
494 
495 		/*
496 		 * Multirouting and user-specified source addresses
497 		 * do not support interface based routing.
498 		 * Assigning a source address to an interface based
499 		 * route is achievable by plumbing a new ipif and
500 		 * setting up the interface route via this ipif,
501 		 * though.
502 		 */
503 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
504 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
505 				error = EADDRNOTAVAIL;
506 				goto done;
507 			}
508 		}
509 
510 		switch (af) {
511 		case AF_INET:
512 			if (src_addr != INADDR_ANY) {
513 				/*
514 				 * The RTF_SETSRC flag is present, check that
515 				 * the supplied src address is not the loopback
516 				 * address. This would produce martian packets.
517 				 */
518 				if (src_addr == htonl(INADDR_LOOPBACK)) {
519 					error = EINVAL;
520 					goto done;
521 				}
522 				/*
523 				 * Also check that the supplied address is a
524 				 * valid, local one.
525 				 */
526 				tmp_ipif = ipif_lookup_addr(src_addr, NULL,
527 				    ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
528 				    ip_rts_request_retry, &error, ipst);
529 				if (tmp_ipif == NULL) {
530 					if (error != EINPROGRESS)
531 						error = EADDRNOTAVAIL;
532 					goto done;
533 				}
534 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
535 				    (tmp_ipif->ipif_flags &
536 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
537 					error = EINVAL;
538 					goto done;
539 				}
540 			} else {
541 				/*
542 				 * The RTF_SETSRC modifier must be associated
543 				 * to a non-null source address.
544 				 */
545 				if (rtm->rtm_flags & RTF_SETSRC) {
546 					error = EINVAL;
547 					goto done;
548 				}
549 			}
550 
551 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
552 			    rtm->rtm_flags, ipif, &ire, B_FALSE,
553 			    WR(q), ioc_mp, ip_rts_request_retry,
554 			    rtsap, ipst);
555 			if (ipif != NULL)
556 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
557 			break;
558 		case AF_INET6:
559 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
560 				/*
561 				 * The RTF_SETSRC flag is present, check that
562 				 * the supplied src address is not the loopback
563 				 * address. This would produce martian packets.
564 				 */
565 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
566 					error = EINVAL;
567 					goto done;
568 				}
569 				/*
570 				 * Also check that the supplied address is a
571 				 * valid, local one.
572 				 */
573 				tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6,
574 				    NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
575 				    ip_rts_request_retry, &error, ipst);
576 				if (tmp_ipif == NULL) {
577 					if (error != EINPROGRESS)
578 						error = EADDRNOTAVAIL;
579 					goto done;
580 				}
581 
582 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
583 				    (tmp_ipif->ipif_flags &
584 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
585 					error = EINVAL;
586 					goto done;
587 				}
588 
589 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
590 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
591 				    ipif, &ire, WR(q), ioc_mp,
592 				    ip_rts_request_retry, rtsap, ipst);
593 				break;
594 			}
595 			/*
596 			 * The RTF_SETSRC modifier must be associated
597 			 * to a non-null source address.
598 			 */
599 			if (rtm->rtm_flags & RTF_SETSRC) {
600 				error = EINVAL;
601 				goto done;
602 			}
603 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
604 			    &gw_addr_v6, NULL, rtm->rtm_flags,
605 			    ipif, &ire, WR(q), ioc_mp,
606 			    ip_rts_request_retry, rtsap, ipst);
607 			if (ipif != NULL)
608 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
609 			break;
610 		}
611 		if (error != 0)
612 			goto done;
613 		ASSERT(ire != NULL);
614 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
615 		break;
616 	case RTM_DELETE:
617 		/* if we are deleting a route, gateway is a must */
618 		if ((found_addrs & RTA_GATEWAY) == 0) {
619 			error = EINVAL;
620 			goto done;
621 		}
622 		/*
623 		 * The RTF_SETSRC modifier does not make sense
624 		 * when deleting a route.
625 		 */
626 		if (rtm->rtm_flags & RTF_SETSRC) {
627 			error = EINVAL;
628 			goto done;
629 		}
630 
631 		switch (af) {
632 		case AF_INET:
633 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
634 			    found_addrs, rtm->rtm_flags, ipif, B_FALSE,
635 			    WR(q), ioc_mp, ip_rts_request_retry, ipst);
636 			break;
637 		case AF_INET6:
638 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
639 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
640 			    WR(q), ioc_mp, ip_rts_request_retry, ipst);
641 			break;
642 		}
643 		break;
644 	case RTM_GET:
645 	case RTM_CHANGE:
646 		/*
647 		 * In the case of RTM_GET, the forwarding table should be
648 		 * searched recursively with default being matched if the
649 		 * specific route doesn't exist.  Also, if a gateway was
650 		 * specified then the gateway address must also be matched.
651 		 *
652 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
653 		 * is the new gateway address so matching on the gateway address
654 		 * is not done.  This can lead to ambiguity when looking up the
655 		 * route to change as usually only the destination (and netmask,
656 		 * if supplied) is used for the lookup.  However if a RTA_IFP
657 		 * sockaddr is also supplied, it can disambiguate which route to
658 		 * change provided the ambigous routes are tied to distinct
659 		 * ill's (or interface indices).  If the routes are not tied to
660 		 * any particular interfaces (for example, with traditional
661 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
662 		 * it won't match any such routes.
663 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
664 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
665 		 */
666 		if (((found_addrs & RTA_SRC) != 0) &&
667 		    ((rtm->rtm_type == RTM_GET) ||
668 		    !(rtm->rtm_flags & RTF_SETSRC))) {
669 			error = EOPNOTSUPP;
670 			goto done;
671 		}
672 
673 		if (rtm->rtm_type == RTM_GET) {
674 			match_flags |=
675 			    (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE |
676 			    MATCH_IRE_SECATTR);
677 			match_flags_local |= MATCH_IRE_SECATTR;
678 			if ((found_addrs & RTA_GATEWAY) != 0)
679 				match_flags |= MATCH_IRE_GW;
680 			if (ioc_cr)
681 				tsl = crgetlabel(ioc_cr);
682 			if (rtsap != NULL) {
683 				if (rtsa_validate(rtsap) != 0) {
684 					error = EINVAL;
685 					goto done;
686 				}
687 				if (tsl != NULL &&
688 				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
689 				    (tsl->tsl_doi != rtsap->rtsa_doi ||
690 				    !bldominates(&tsl->tsl_label,
691 				    &rtsap->rtsa_slrange.lower_bound))) {
692 					error = EPERM;
693 					goto done;
694 				}
695 				tsl = labelalloc(
696 				    &rtsap->rtsa_slrange.lower_bound,
697 				    rtsap->rtsa_doi, KM_NOSLEEP);
698 			}
699 		}
700 		if (rtm->rtm_type == RTM_CHANGE) {
701 			if ((found_addrs & RTA_GATEWAY) &&
702 			    (rtm->rtm_flags & RTF_SETSRC)) {
703 				/*
704 				 * Do not want to change the gateway,
705 				 * but rather the source address.
706 				 */
707 				match_flags |= MATCH_IRE_GW;
708 			}
709 		}
710 
711 		/*
712 		 * If the netmask is all ones (either as supplied or as derived
713 		 * above), then first check for an IRE_LOOPBACK or
714 		 * IRE_LOCAL entry.
715 		 *
716 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
717 		 * entry, then look in the forwarding table.
718 		 */
719 		switch (af) {
720 		case AF_INET:
721 			if (net_mask == IP_HOST_MASK) {
722 				ire = ire_ctable_lookup(dst_addr, gw_addr,
723 				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
724 				    tsl, match_flags_local, ipst);
725 				/*
726 				 * If we found an IRE_LOCAL, make sure
727 				 * it is one that would be used by this
728 				 * zone to send packets.
729 				 */
730 				if (ire != NULL &&
731 				    ire->ire_type == IRE_LOCAL &&
732 				    ipst->ips_ip_restrict_interzone_loopback &&
733 				    !ire_local_ok_across_zones(ire,
734 				    zoneid, &dst_addr, tsl, ipst)) {
735 					ire_refrele(ire);
736 					ire = NULL;
737 				}
738 			}
739 			if (ire == NULL) {
740 				ire = ire_ftable_lookup(dst_addr, net_mask,
741 				    gw_addr, 0, ipif, &sire, zoneid, 0,
742 				    tsl, match_flags, ipst);
743 			}
744 			break;
745 		case AF_INET6:
746 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
747 				ire = ire_ctable_lookup_v6(&dst_addr_v6,
748 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
749 				    zoneid, tsl, match_flags_local, ipst);
750 				/*
751 				 * If we found an IRE_LOCAL, make sure
752 				 * it is one that would be used by this
753 				 * zone to send packets.
754 				 */
755 				if (ire != NULL &&
756 				    ire->ire_type == IRE_LOCAL &&
757 				    ipst->ips_ip_restrict_interzone_loopback &&
758 				    !ire_local_ok_across_zones(ire,
759 				    zoneid, (void *)&dst_addr_v6, tsl, ipst)) {
760 					ire_refrele(ire);
761 					ire = NULL;
762 				}
763 			}
764 			if (ire == NULL) {
765 				ire = ire_ftable_lookup_v6(&dst_addr_v6,
766 				    &net_mask_v6, &gw_addr_v6, 0, ipif, &sire,
767 				    zoneid, 0, tsl, match_flags, ipst);
768 			}
769 			break;
770 		}
771 		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
772 			label_rele(tsl);
773 
774 		if (ire == NULL) {
775 			error = ESRCH;
776 			goto done;
777 		}
778 		/* we know the IRE before we come here */
779 		switch (rtm->rtm_type) {
780 		case RTM_GET:
781 			mp1 = rts_rtmget(mp, ire, sire, af);
782 			if (mp1 == NULL) {
783 				error = ENOBUFS;
784 				goto done;
785 			}
786 			freemsg(mp);
787 			mp = mp1;
788 			rtm = (rt_msghdr_t *)mp->b_rptr;
789 			break;
790 		case RTM_CHANGE:
791 			/*
792 			 * Do not allow to the multirouting state of a route
793 			 * to be changed. This aims to prevent undesirable
794 			 * stages where both multirt and non-multirt routes
795 			 * for the same destination are declared.
796 			 */
797 			if ((ire->ire_flags & RTF_MULTIRT) !=
798 			    (rtm->rtm_flags & RTF_MULTIRT)) {
799 				error = EINVAL;
800 				goto done;
801 			}
802 			/*
803 			 * Note that we do not need to do
804 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
805 			 * in metrics or gateway will not affect existing
806 			 * routes since it does not create a more specific
807 			 * route.
808 			 */
809 			switch (af) {
810 			case AF_INET:
811 				ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
812 				if ((found_addrs & RTA_GATEWAY) != 0 &&
813 				    (ire->ire_gateway_addr != gw_addr)) {
814 					ire->ire_gateway_addr = gw_addr;
815 				}
816 
817 				if (rtsap != NULL) {
818 					ga.ga_af = AF_INET;
819 					IN6_IPADDR_TO_V4MAPPED(
820 					    ire->ire_gateway_addr, &ga.ga_addr);
821 
822 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
823 					if (gcgrp == NULL) {
824 						error = ENOMEM;
825 						goto done;
826 					}
827 				}
828 
829 				if ((found_addrs & RTA_SRC) != 0 &&
830 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
831 				    (ire->ire_src_addr != src_addr)) {
832 
833 					if (src_addr != INADDR_ANY) {
834 						/*
835 						 * The RTF_SETSRC flag is
836 						 * present, check that the
837 						 * supplied src address is not
838 						 * the loopback address. This
839 						 * would produce martian
840 						 * packets.
841 						 */
842 						if (src_addr ==
843 						    htonl(INADDR_LOOPBACK)) {
844 							error = EINVAL;
845 							goto done;
846 						}
847 						/*
848 						 * Also check that the the
849 						 * supplied addr is a valid
850 						 * local address.
851 						 */
852 						tmp_ipif = ipif_lookup_addr(
853 						    src_addr, NULL, ALL_ZONES,
854 						    WR(q), ioc_mp,
855 						    ip_rts_request_retry,
856 						    &error, ipst);
857 						if (tmp_ipif == NULL) {
858 							error = (error ==
859 							    EINPROGRESS) ?
860 							    error :
861 							    EADDRNOTAVAIL;
862 							goto done;
863 						}
864 
865 						if (!(tmp_ipif->ipif_flags &
866 						    IPIF_UP) ||
867 						    (tmp_ipif->ipif_flags &
868 						    (IPIF_NOLOCAL |
869 						    IPIF_ANYCAST))) {
870 							error = EINVAL;
871 							goto done;
872 						}
873 						ire->ire_flags |= RTF_SETSRC;
874 					} else {
875 						ire->ire_flags &= ~RTF_SETSRC;
876 					}
877 					ire->ire_src_addr = src_addr;
878 				}
879 				break;
880 			case AF_INET6:
881 				ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
882 				mutex_enter(&ire->ire_lock);
883 				if ((found_addrs & RTA_GATEWAY) != 0 &&
884 				    !IN6_ARE_ADDR_EQUAL(
885 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
886 					ire->ire_gateway_addr_v6 = gw_addr_v6;
887 				}
888 
889 				if (rtsap != NULL) {
890 					ga.ga_af = AF_INET6;
891 					ga.ga_addr = ire->ire_gateway_addr_v6;
892 
893 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
894 					if (gcgrp == NULL) {
895 						error = ENOMEM;
896 						goto done;
897 					}
898 				}
899 
900 				if ((found_addrs & RTA_SRC) != 0 &&
901 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
902 				    !IN6_ARE_ADDR_EQUAL(
903 				    &ire->ire_src_addr_v6, &src_addr_v6)) {
904 
905 					if (!IN6_IS_ADDR_UNSPECIFIED(
906 					    &src_addr_v6)) {
907 						/*
908 						 * The RTF_SETSRC flag is
909 						 * present, check that the
910 						 * supplied src address is not
911 						 * the loopback address. This
912 						 * would produce martian
913 						 * packets.
914 						 */
915 						if (IN6_IS_ADDR_LOOPBACK(
916 						    &src_addr_v6)) {
917 							mutex_exit(
918 							    &ire->ire_lock);
919 							error = EINVAL;
920 							goto done;
921 						}
922 						/*
923 						 * Also check that the the
924 						 * supplied addr is a valid
925 						 * local address.
926 						 */
927 						tmp_ipif = ipif_lookup_addr_v6(
928 						    &src_addr_v6, NULL,
929 						    ALL_ZONES,
930 						    CONNP_TO_WQ(connp), ioc_mp,
931 						    ip_rts_request_retry,
932 						    &error, ipst);
933 						if (tmp_ipif == NULL) {
934 							mutex_exit(
935 							    &ire->ire_lock);
936 							error = (error ==
937 							    EINPROGRESS) ?
938 							    error :
939 							    EADDRNOTAVAIL;
940 							goto done;
941 						}
942 						if (!(tmp_ipif->ipif_flags &
943 						    IPIF_UP) ||
944 						    (tmp_ipif->ipif_flags &
945 						    (IPIF_NOLOCAL |
946 						    IPIF_ANYCAST))) {
947 							mutex_exit(
948 							    &ire->ire_lock);
949 							error = EINVAL;
950 							goto done;
951 						}
952 						ire->ire_flags |= RTF_SETSRC;
953 					} else {
954 						ire->ire_flags &= ~RTF_SETSRC;
955 					}
956 					ire->ire_src_addr_v6 = src_addr_v6;
957 				}
958 				mutex_exit(&ire->ire_lock);
959 				break;
960 			}
961 
962 			if (rtsap != NULL) {
963 				in_addr_t ga_addr4;
964 
965 				ASSERT(gcgrp != NULL);
966 
967 				/*
968 				 * Create and add the security attribute to
969 				 * prefix IRE; it will add a reference to the
970 				 * group upon allocating a new entry.  If it
971 				 * finds an already-existing entry for the
972 				 * security attribute, it simply returns it
973 				 * and no new group reference is made.
974 				 */
975 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
976 				if (gc == NULL ||
977 				    (error = tsol_ire_init_gwattr(ire,
978 				    ire->ire_ipversion, gc, NULL)) != 0) {
979 					if (gc != NULL) {
980 						GC_REFRELE(gc);
981 					} else {
982 						/* gc_create failed */
983 						error = ENOMEM;
984 					}
985 					goto done;
986 				}
987 
988 				/*
989 				 * Now delete any existing gateway IRE caches
990 				 * as well as all caches using the gateway,
991 				 * and allow them to be created on demand
992 				 * through ip_newroute{_v6}.
993 				 */
994 				IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4);
995 				if (af == AF_INET) {
996 					ire_clookup_delete_cache_gw(
997 					    ga_addr4, ALL_ZONES, ipst);
998 				} else {
999 					ire_clookup_delete_cache_gw_v6(
1000 					    &ga.ga_addr, ALL_ZONES, ipst);
1001 				}
1002 			}
1003 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
1004 			break;
1005 		}
1006 		break;
1007 	default:
1008 		error = EOPNOTSUPP;
1009 		break;
1010 	}
1011 done:
1012 	if (ire != NULL)
1013 		ire_refrele(ire);
1014 	if (sire != NULL)
1015 		ire_refrele(sire);
1016 	if (ipif != NULL)
1017 		ipif_refrele(ipif);
1018 	if (tmp_ipif != NULL)
1019 		ipif_refrele(tmp_ipif);
1020 
1021 	if (gcgrp_xtraref)
1022 		GCGRP_REFRELE(gcgrp);
1023 
1024 	if (error == EINPROGRESS) {
1025 		if (rtm != NULL)
1026 			freemsg(mp);
1027 		return (error);
1028 	}
1029 	if (rtm != NULL) {
1030 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1031 		if (error != 0) {
1032 			rtm->rtm_errno = error;
1033 			/* Send error ACK */
1034 			ip1dbg(("ip_rts_request: error %d\n", error));
1035 		} else {
1036 			rtm->rtm_flags |= RTF_DONE;
1037 			/* OK ACK already set up by caller except this */
1038 			ip2dbg(("ip_rts_request: OK ACK\n"));
1039 		}
1040 		rts_queue_input(mp, connp, af, ipst);
1041 	}
1042 
1043 	iocp->ioc_error = error;
1044 	ioc_mp->b_datap->db_type = M_IOCACK;
1045 	if (iocp->ioc_error != 0)
1046 		iocp->ioc_count = 0;
1047 	(connp->conn_recv)(connp, ioc_mp, NULL);
1048 
1049 	/* conn was refheld in ip_wput_ioctl. */
1050 	CONN_OPER_PENDING_DONE(connp);
1051 
1052 	return (error);
1053 }
1054 
1055 int
1056 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
1057 {
1058 	return (ip_rts_request_common(q, mp, Q_TO_CONN(q), ioc_cr));
1059 }
1060 
1061 /*
1062  * Build a reply to the RTM_GET request contained in the given message block
1063  * using the retrieved IRE of the destination address, the parent IRE (if it
1064  * exists) and the address family.
1065  *
1066  * Returns a pointer to a message block containing the reply if successful,
1067  * otherwise NULL is returned.
1068  */
1069 static mblk_t *
1070 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af)
1071 {
1072 	rt_msghdr_t	*rtm;
1073 	rt_msghdr_t	*new_rtm;
1074 	mblk_t		*new_mp;
1075 	int		rtm_addrs;
1076 	int		rtm_flags;
1077 	in6_addr_t	gw_addr_v6;
1078 	tsol_ire_gw_secattr_t *attrp = NULL;
1079 	tsol_gc_t	*gc = NULL;
1080 	tsol_gcgrp_t	*gcgrp = NULL;
1081 	int		sacnt = 0;
1082 
1083 	ASSERT(ire->ire_ipif != NULL);
1084 	rtm = (rt_msghdr_t *)mp->b_rptr;
1085 
1086 	if (sire != NULL && sire->ire_gw_secattr != NULL)
1087 		attrp = sire->ire_gw_secattr;
1088 	else if (ire->ire_gw_secattr != NULL)
1089 		attrp = ire->ire_gw_secattr;
1090 
1091 	if (attrp != NULL) {
1092 		mutex_enter(&attrp->igsa_lock);
1093 		if ((gc = attrp->igsa_gc) != NULL) {
1094 			gcgrp = gc->gc_grp;
1095 			ASSERT(gcgrp != NULL);
1096 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1097 			sacnt = 1;
1098 		} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {
1099 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1100 			gc = gcgrp->gcgrp_head;
1101 			sacnt = gcgrp->gcgrp_count;
1102 		}
1103 		mutex_exit(&attrp->igsa_lock);
1104 
1105 		/* do nothing if there's no gc to report */
1106 		if (gc == NULL) {
1107 			ASSERT(sacnt == 0);
1108 			if (gcgrp != NULL) {
1109 				/* we might as well drop the lock now */
1110 				rw_exit(&gcgrp->gcgrp_rwlock);
1111 				gcgrp = NULL;
1112 			}
1113 			attrp = NULL;
1114 		}
1115 
1116 		ASSERT(gc == NULL || (gcgrp != NULL &&
1117 		    RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)));
1118 	}
1119 	ASSERT(sacnt == 0 || gc != NULL);
1120 
1121 	/*
1122 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1123 	 *
1124 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1125 	 * RTA_IFP and RTA_IFA if either is defined, and also
1126 	 * returns RTA_BRD if the appropriate interface is
1127 	 * point-to-point.
1128 	 */
1129 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1130 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
1131 		rtm_addrs |= (RTA_IFP | RTA_IFA);
1132 		if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT)
1133 			rtm_addrs |= RTA_BRD;
1134 	}
1135 
1136 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt);
1137 	if (new_mp == NULL) {
1138 		if (gcgrp != NULL)
1139 			rw_exit(&gcgrp->gcgrp_rwlock);
1140 		return (NULL);
1141 	}
1142 
1143 	/*
1144 	 * We set the destination address, gateway address,
1145 	 * netmask and flags in the RTM_GET response depending
1146 	 * on whether we found a parent IRE or not.
1147 	 * In particular, if we did find a parent IRE during the
1148 	 * recursive search, use that IRE's gateway address.
1149 	 * Otherwise, we use the IRE's source address for the
1150 	 * gateway address.
1151 	 */
1152 	ASSERT(af == AF_INET || af == AF_INET6);
1153 	switch (af) {
1154 	case AF_INET:
1155 		if (sire == NULL) {
1156 			rtm_flags = ire->ire_flags;
1157 			rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1158 			    ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr,
1159 			    ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif,
1160 			    new_mp, sacnt, gc);
1161 		} else {
1162 			if (sire->ire_flags & RTF_SETSRC)
1163 				rtm_addrs |= RTA_SRC;
1164 
1165 			rtm_flags = sire->ire_flags;
1166 			rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr,
1167 			    sire->ire_mask, sire->ire_gateway_addr,
1168 			    (sire->ire_flags & RTF_SETSRC) ?
1169 			    sire->ire_src_addr : ire->ire_src_addr,
1170 			    ire->ire_ipif->ipif_pp_dst_addr,
1171 			    0, ire->ire_ipif, new_mp, sacnt, gc);
1172 		}
1173 		break;
1174 	case AF_INET6:
1175 		if (sire == NULL) {
1176 			rtm_flags = ire->ire_flags;
1177 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1178 			    &ire->ire_mask_v6, &ire->ire_src_addr_v6,
1179 			    &ire->ire_src_addr_v6,
1180 			    &ire->ire_ipif->ipif_v6pp_dst_addr,
1181 			    &ipv6_all_zeros, ire->ire_ipif, new_mp,
1182 			    sacnt, gc);
1183 		} else {
1184 			if (sire->ire_flags & RTF_SETSRC)
1185 				rtm_addrs |= RTA_SRC;
1186 
1187 			rtm_flags = sire->ire_flags;
1188 			mutex_enter(&sire->ire_lock);
1189 			gw_addr_v6 = sire->ire_gateway_addr_v6;
1190 			mutex_exit(&sire->ire_lock);
1191 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6,
1192 			    &sire->ire_mask_v6, &gw_addr_v6,
1193 			    (sire->ire_flags & RTF_SETSRC) ?
1194 			    &sire->ire_src_addr_v6 : &ire->ire_src_addr_v6,
1195 			    &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1196 			    ire->ire_ipif, new_mp, sacnt, gc);
1197 		}
1198 		break;
1199 	}
1200 
1201 	if (gcgrp != NULL)
1202 		rw_exit(&gcgrp->gcgrp_rwlock);
1203 
1204 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1205 
1206 	/*
1207 	 * The rtm_msglen, rtm_version and rtm_type fields in
1208 	 * RTM_GET response are filled in by rts_fill_msg.
1209 	 *
1210 	 * rtm_addrs and rtm_flags are filled in based on what
1211 	 * was requested and the state of the IREs looked up
1212 	 * above.
1213 	 *
1214 	 * rtm_inits and rtm_rmx are filled in with metrics
1215 	 * based on whether a parent IRE was found or not.
1216 	 *
1217 	 * TODO: rtm_index and rtm_use should probably be
1218 	 * filled in with something resonable here and not just
1219 	 * copied from the request.
1220 	 */
1221 	new_rtm->rtm_index = rtm->rtm_index;
1222 	new_rtm->rtm_pid = rtm->rtm_pid;
1223 	new_rtm->rtm_seq = rtm->rtm_seq;
1224 	new_rtm->rtm_use = rtm->rtm_use;
1225 	new_rtm->rtm_addrs = rtm_addrs;
1226 	new_rtm->rtm_flags = rtm_flags;
1227 	if (sire == NULL)
1228 		new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
1229 	else
1230 		new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx);
1231 
1232 	return (new_mp);
1233 }
1234 
1235 /*
1236  * Fill the given if_data_t with interface statistics.
1237  */
1238 static void
1239 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1240 {
1241 	if_data->ifi_type = ipif->ipif_type;	/* ethernet, tokenring, etc */
1242 	if_data->ifi_addrlen = 0;		/* media address length */
1243 	if_data->ifi_hdrlen = 0;		/* media header length */
1244 	if_data->ifi_mtu = ipif->ipif_mtu;	/* maximum transmission unit */
1245 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
1246 	if_data->ifi_baudrate = 0;		/* linespeed */
1247 
1248 	if_data->ifi_ipackets = 0;		/* packets received on if */
1249 	if_data->ifi_ierrors = 0;		/* input errors on interface */
1250 	if_data->ifi_opackets = 0;		/* packets sent on interface */
1251 	if_data->ifi_oerrors = 0;		/* output errors on if */
1252 	if_data->ifi_collisions = 0;		/* collisions on csma if */
1253 	if_data->ifi_ibytes = 0;		/* total number received */
1254 	if_data->ifi_obytes = 0;		/* total number sent */
1255 	if_data->ifi_imcasts = 0;		/* multicast packets received */
1256 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
1257 	if_data->ifi_iqdrops = 0;		/* dropped on input */
1258 	if_data->ifi_noproto = 0;		/* destined for unsupported */
1259 						/* protocol. */
1260 }
1261 
1262 /*
1263  * Set the metrics on a forwarding table route.
1264  */
1265 static void
1266 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1267 {
1268 	clock_t		rtt;
1269 	clock_t		rtt_sd;
1270 	ipif_t		*ipif;
1271 	ifrt_t		*ifrt;
1272 	mblk_t		*mp;
1273 	in6_addr_t	gw_addr_v6;
1274 
1275 	/*
1276 	 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the
1277 	 * common case of no metrics.
1278 	 */
1279 	if (which == 0)
1280 		return;
1281 	ire->ire_uinfo.iulp_set = B_TRUE;
1282 
1283 	/*
1284 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1285 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1286 	 * microseconds.
1287 	 */
1288 	if (which & RTV_RTT)
1289 		rtt = metrics->rmx_rtt / 1000;
1290 	if (which & RTV_RTTVAR)
1291 		rtt_sd = metrics->rmx_rttvar / 1000;
1292 
1293 	/*
1294 	 * Update the metrics in the IRE itself.
1295 	 */
1296 	mutex_enter(&ire->ire_lock);
1297 	if (which & RTV_MTU)
1298 		ire->ire_max_frag = metrics->rmx_mtu;
1299 	if (which & RTV_RTT)
1300 		ire->ire_uinfo.iulp_rtt = rtt;
1301 	if (which & RTV_SSTHRESH)
1302 		ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh;
1303 	if (which & RTV_RTTVAR)
1304 		ire->ire_uinfo.iulp_rtt_sd = rtt_sd;
1305 	if (which & RTV_SPIPE)
1306 		ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe;
1307 	if (which & RTV_RPIPE)
1308 		ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe;
1309 	mutex_exit(&ire->ire_lock);
1310 
1311 	/*
1312 	 * Search through the ifrt_t chain hanging off the IPIF in order to
1313 	 * reflect the metric change there.
1314 	 */
1315 	ipif = ire->ire_ipif;
1316 	if (ipif == NULL)
1317 		return;
1318 	ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1319 	    ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1320 	if (ipif->ipif_isv6) {
1321 		mutex_enter(&ire->ire_lock);
1322 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1323 		mutex_exit(&ire->ire_lock);
1324 	}
1325 	mutex_enter(&ipif->ipif_saved_ire_lock);
1326 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1327 		/*
1328 		 * On a given ipif, the triple of address, gateway and mask is
1329 		 * unique for each saved IRE (in the case of ordinary interface
1330 		 * routes, the gateway address is all-zeroes).
1331 		 */
1332 		ifrt = (ifrt_t *)mp->b_rptr;
1333 		if (ipif->ipif_isv6) {
1334 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1335 			    &ire->ire_addr_v6) ||
1336 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1337 			    &gw_addr_v6) ||
1338 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1339 			    &ire->ire_mask_v6))
1340 				continue;
1341 		} else {
1342 			if (ifrt->ifrt_addr != ire->ire_addr ||
1343 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1344 			    ifrt->ifrt_mask != ire->ire_mask)
1345 				continue;
1346 		}
1347 		if (which & RTV_MTU)
1348 			ifrt->ifrt_max_frag = metrics->rmx_mtu;
1349 		if (which & RTV_RTT)
1350 			ifrt->ifrt_iulp_info.iulp_rtt = rtt;
1351 		if (which & RTV_SSTHRESH) {
1352 			ifrt->ifrt_iulp_info.iulp_ssthresh =
1353 			    metrics->rmx_ssthresh;
1354 		}
1355 		if (which & RTV_RTTVAR)
1356 			ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar;
1357 		if (which & RTV_SPIPE)
1358 			ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe;
1359 		if (which & RTV_RPIPE)
1360 			ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe;
1361 		break;
1362 	}
1363 	mutex_exit(&ipif->ipif_saved_ire_lock);
1364 }
1365 
1366 /*
1367  * Get the metrics from a forwarding table route.
1368  */
1369 static int
1370 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
1371 {
1372 	int	metrics_set = 0;
1373 
1374 	bzero(metrics, sizeof (rt_metrics_t));
1375 	/*
1376 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1377 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1378 	 * microseconds.
1379 	 */
1380 	metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000;
1381 	metrics_set |= RTV_RTT;
1382 	metrics->rmx_mtu = ire->ire_max_frag;
1383 	metrics_set |= RTV_MTU;
1384 	metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh;
1385 	metrics_set |= RTV_SSTHRESH;
1386 	metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000;
1387 	metrics_set |= RTV_RTTVAR;
1388 	metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe;
1389 	metrics_set |= RTV_SPIPE;
1390 	metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe;
1391 	metrics_set |= RTV_RPIPE;
1392 	return (metrics_set);
1393 }
1394 
1395 /*
1396  * Takes a pointer to a routing message and extracts necessary info by looking
1397  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1398  * passed (all of which must be valid).
1399  *
1400  * The bitmask of sockaddrs actually found in the message is returned, or zero
1401  * is returned in the case of an error.
1402  */
1403 static int
1404 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1405     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1406     in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
1407     tsol_rtsecattr_t *rtsecattr, int *error)
1408 {
1409 	struct sockaddr *sa;
1410 	int	i;
1411 	int	addr_bits;
1412 	int	length;
1413 	int	found_addrs = 0;
1414 	caddr_t	cp;
1415 	size_t	size;
1416 	struct sockaddr_dl *sdl;
1417 
1418 	*dst_addrp = ipv6_all_zeros;
1419 	*gw_addrp = ipv6_all_zeros;
1420 	*net_maskp = ipv6_all_zeros;
1421 	*authorp = ipv6_all_zeros;
1422 	*if_addrp = ipv6_all_zeros;
1423 	*in_src_addrp = ipv6_all_zeros;
1424 	*indexp = 0;
1425 	*afp = AF_UNSPEC;
1426 	rtsecattr->rtsa_cnt = 0;
1427 	*error = 0;
1428 
1429 	/*
1430 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1431 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
1432 	 */
1433 	cp = (caddr_t)&rtm[1];
1434 	length = rtm->rtm_msglen;
1435 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1436 		/*
1437 		 * The address family we are working with starts out as
1438 		 * AF_UNSPEC, but is set to the one specified with the
1439 		 * destination address.
1440 		 *
1441 		 * If the "working" address family that has been set to
1442 		 * something other than AF_UNSPEC, then the address family of
1443 		 * subsequent sockaddrs must either be AF_UNSPEC (for
1444 		 * compatibility with older programs) or must be the same as our
1445 		 * "working" one.
1446 		 *
1447 		 * This code assumes that RTA_DST (1) comes first in the loop.
1448 		 */
1449 		sa = (struct sockaddr *)cp;
1450 		addr_bits = (rtm->rtm_addrs & (1 << i));
1451 		if (addr_bits == 0)
1452 			continue;
1453 		switch (addr_bits) {
1454 		case RTA_DST:
1455 			size = rts_copyfromsockaddr(sa, dst_addrp);
1456 			*afp = sa->sa_family;
1457 			break;
1458 		case RTA_GATEWAY:
1459 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1460 				return (0);
1461 			size = rts_copyfromsockaddr(sa, gw_addrp);
1462 			break;
1463 		case RTA_NETMASK:
1464 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1465 				return (0);
1466 			size = rts_copyfromsockaddr(sa, net_maskp);
1467 			break;
1468 		case RTA_IFP:
1469 			if (sa->sa_family != AF_LINK &&
1470 			    sa->sa_family != AF_UNSPEC)
1471 				return (0);
1472 			sdl = (struct sockaddr_dl *)cp;
1473 			*indexp = sdl->sdl_index;
1474 			size = sizeof (struct sockaddr_dl);
1475 			break;
1476 		case RTA_SRC:
1477 			/* Source address of the incoming packet */
1478 			size = rts_copyfromsockaddr(sa, in_src_addrp);
1479 			*afp = sa->sa_family;
1480 			break;
1481 		case RTA_IFA:
1482 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1483 				return (0);
1484 			size = rts_copyfromsockaddr(sa, if_addrp);
1485 			break;
1486 		case RTA_AUTHOR:
1487 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1488 				return (0);
1489 			size = rts_copyfromsockaddr(sa, authorp);
1490 			break;
1491 		default:
1492 			return (0);
1493 		}
1494 		if (size == 0)
1495 			return (0);
1496 		cp += size;
1497 		found_addrs |= addr_bits;
1498 	}
1499 
1500 	/*
1501 	 * Parse the routing message and look for any security-
1502 	 * related attributes for the route.  For each valid
1503 	 * attribute, allocate/obtain the corresponding kernel
1504 	 * route security attributes.
1505 	 */
1506 	*error = tsol_rtsa_init(rtm, rtsecattr, cp);
1507 	ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1508 
1509 	return (found_addrs);
1510 }
1511 
1512 /*
1513  * Fills the message with the given info.
1514  */
1515 static void
1516 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1517     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1518     const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc)
1519 {
1520 	rt_msghdr_t	*rtm;
1521 	sin_t		*sin;
1522 	size_t		data_size, header_size;
1523 	uchar_t		*cp;
1524 	int		i;
1525 
1526 	ASSERT(mp != NULL);
1527 	ASSERT(sacnt == 0 || gc != NULL);
1528 	/*
1529 	 * First find the type of the message
1530 	 * and its length.
1531 	 */
1532 	header_size = rts_header_msg_size(type);
1533 	/*
1534 	 * Now find the size of the data
1535 	 * that follows the message header.
1536 	 */
1537 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt);
1538 
1539 	rtm = (rt_msghdr_t *)mp->b_rptr;
1540 	mp->b_wptr = &mp->b_rptr[header_size];
1541 	cp = mp->b_wptr;
1542 	bzero(cp, data_size);
1543 	for (i = 0; i < RTA_NUMBITS; i++) {
1544 		sin = (sin_t *)cp;
1545 		switch (rtm_addrs & (1 << i)) {
1546 		case RTA_DST:
1547 			sin->sin_addr.s_addr = dst;
1548 			sin->sin_family = AF_INET;
1549 			cp += sizeof (sin_t);
1550 			break;
1551 		case RTA_GATEWAY:
1552 			sin->sin_addr.s_addr = gateway;
1553 			sin->sin_family = AF_INET;
1554 			cp += sizeof (sin_t);
1555 			break;
1556 		case RTA_NETMASK:
1557 			sin->sin_addr.s_addr = mask;
1558 			sin->sin_family = AF_INET;
1559 			cp += sizeof (sin_t);
1560 			break;
1561 		case RTA_IFP:
1562 			cp += ill_dls_info((struct sockaddr_dl *)cp, ipif);
1563 			break;
1564 		case RTA_IFA:
1565 		case RTA_SRC:
1566 			sin->sin_addr.s_addr = src_addr;
1567 			sin->sin_family = AF_INET;
1568 			cp += sizeof (sin_t);
1569 			break;
1570 		case RTA_AUTHOR:
1571 			sin->sin_addr.s_addr = author;
1572 			sin->sin_family = AF_INET;
1573 			cp += sizeof (sin_t);
1574 			break;
1575 		case RTA_BRD:
1576 			/*
1577 			 * RTA_BRD is used typically to specify a point-to-point
1578 			 * destination address.
1579 			 */
1580 			sin->sin_addr.s_addr = brd_addr;
1581 			sin->sin_family = AF_INET;
1582 			cp += sizeof (sin_t);
1583 			break;
1584 		}
1585 	}
1586 
1587 	if (gc != NULL) {
1588 		rtm_ext_t *rtm_ext;
1589 		struct rtsa_s *rp_dst;
1590 		tsol_rtsecattr_t *rsap;
1591 		int i;
1592 
1593 		ASSERT(gc->gc_grp != NULL);
1594 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1595 		ASSERT(sacnt > 0);
1596 
1597 		rtm_ext = (rtm_ext_t *)cp;
1598 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1599 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt);
1600 
1601 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1602 		rsap->rtsa_cnt = sacnt;
1603 		rp_dst = rsap->rtsa_attr;
1604 
1605 		for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) {
1606 			ASSERT(gc->gc_db != NULL);
1607 			bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1608 		}
1609 		cp = (uchar_t *)rp_dst;
1610 	}
1611 
1612 	mp->b_wptr = cp;
1613 	mp->b_cont = NULL;
1614 	/*
1615 	 * set the fields that are common to
1616 	 * to different messages.
1617 	 */
1618 	rtm->rtm_msglen = (short)(header_size + data_size);
1619 	rtm->rtm_version = RTM_VERSION;
1620 	rtm->rtm_type = (uchar_t)type;
1621 }
1622 
1623 /*
1624  * Allocates and initializes a routing socket message.
1625  */
1626 mblk_t *
1627 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1628 {
1629 	size_t	length;
1630 	mblk_t	*mp;
1631 
1632 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1633 	mp = allocb(length, BPRI_MED);
1634 	if (mp == NULL)
1635 		return (mp);
1636 	bzero(mp->b_rptr, length);
1637 	return (mp);
1638 }
1639 
1640 /*
1641  * Returns the size of the routing
1642  * socket message header size.
1643  */
1644 size_t
1645 rts_header_msg_size(int type)
1646 {
1647 	switch (type) {
1648 	case RTM_DELADDR:
1649 	case RTM_NEWADDR:
1650 		return (sizeof (ifa_msghdr_t));
1651 	case RTM_IFINFO:
1652 		return (sizeof (if_msghdr_t));
1653 	default:
1654 		return (sizeof (rt_msghdr_t));
1655 	}
1656 }
1657 
1658 /*
1659  * Returns the size of the message needed with the given rtm_addrs and family.
1660  *
1661  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1662  * of the same family (currently either AF_INET or AF_INET6).
1663  */
1664 size_t
1665 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1666 {
1667 	int	i;
1668 	size_t	length = 0;
1669 
1670 	for (i = 0; i < RTA_NUMBITS; i++) {
1671 		switch (rtm_addrs & (1 << i)) {
1672 		case RTA_IFP:
1673 			length += sizeof (struct sockaddr_dl);
1674 			break;
1675 		case RTA_DST:
1676 		case RTA_GATEWAY:
1677 		case RTA_NETMASK:
1678 		case RTA_SRC:
1679 		case RTA_IFA:
1680 		case RTA_AUTHOR:
1681 		case RTA_BRD:
1682 			ASSERT(af == AF_INET || af == AF_INET6);
1683 			switch (af) {
1684 			case AF_INET:
1685 				length += sizeof (sin_t);
1686 				break;
1687 			case AF_INET6:
1688 				length += sizeof (sin6_t);
1689 				break;
1690 			}
1691 			break;
1692 		}
1693 	}
1694 	if (sacnt > 0)
1695 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1696 
1697 	return (length);
1698 }
1699 
1700 /*
1701  * This routine is called to generate a message to the routing
1702  * socket indicating that a redirect has occured, a routing lookup
1703  * has failed, or that a protocol has detected timeouts to a particular
1704  * destination. This routine is called for message types RTM_LOSING,
1705  * RTM_REDIRECT, and RTM_MISS.
1706  */
1707 void
1708 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1709     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
1710     ip_stack_t *ipst)
1711 {
1712 	rt_msghdr_t	*rtm;
1713 	mblk_t		*mp;
1714 
1715 	if (rtm_addrs == 0)
1716 		return;
1717 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1718 	if (mp == NULL)
1719 		return;
1720 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1721 	    author, NULL, mp, 0, NULL);
1722 	rtm = (rt_msghdr_t *)mp->b_rptr;
1723 	rtm->rtm_flags = flags;
1724 	rtm->rtm_errno = error;
1725 	rtm->rtm_flags |= RTF_DONE;
1726 	rtm->rtm_addrs = rtm_addrs;
1727 	rts_queue_input(mp, NULL, AF_INET, ipst);
1728 }
1729 
1730 /*
1731  * This routine is called to generate a message to the routing
1732  * socket indicating that the status of a network interface has changed.
1733  * Message type generated RTM_IFINFO.
1734  */
1735 void
1736 ip_rts_ifmsg(const ipif_t *ipif)
1737 {
1738 	if_msghdr_t	*ifm;
1739 	mblk_t		*mp;
1740 	sa_family_t	af;
1741 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1742 
1743 	/*
1744 	 * This message should be generated only
1745 	 * when the physical device is changing
1746 	 * state.
1747 	 */
1748 	if (ipif->ipif_id != 0)
1749 		return;
1750 	if (ipif->ipif_isv6) {
1751 		af = AF_INET6;
1752 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1753 		if (mp == NULL)
1754 			return;
1755 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1756 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1757 		    &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL);
1758 	} else {
1759 		af = AF_INET;
1760 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1761 		if (mp == NULL)
1762 			return;
1763 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp,
1764 		    0, NULL);
1765 	}
1766 	ifm = (if_msghdr_t *)mp->b_rptr;
1767 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1768 	ifm->ifm_flags = ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1769 	    ipif->ipif_ill->ill_phyint->phyint_flags;
1770 	rts_getifdata(&ifm->ifm_data, ipif);
1771 	ifm->ifm_addrs = RTA_IFP;
1772 	rts_queue_input(mp, NULL, af, ipst);
1773 }
1774 
1775 /*
1776  * This is called to generate messages to the routing socket
1777  * indicating a network interface has had addresses associated with it.
1778  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1779  */
1780 void
1781 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif)
1782 {
1783 	int		pass;
1784 	int		ncmd;
1785 	int		rtm_addrs;
1786 	mblk_t		*mp;
1787 	ifa_msghdr_t	*ifam;
1788 	rt_msghdr_t	*rtm;
1789 	sa_family_t	af;
1790 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1791 
1792 	if (ipif->ipif_isv6)
1793 		af = AF_INET6;
1794 	else
1795 		af = AF_INET;
1796 	/*
1797 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1798 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1799 	 */
1800 	for (pass = 1; pass < 3; pass++) {
1801 		if ((cmd == RTM_ADD && pass == 1) ||
1802 		    (cmd == RTM_DELETE && pass == 2)) {
1803 			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
1804 
1805 			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
1806 			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
1807 			if (mp == NULL)
1808 				continue;
1809 			switch (af) {
1810 			case AF_INET:
1811 				rts_fill_msg(ncmd, rtm_addrs, 0,
1812 				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
1813 				    ipif->ipif_pp_dst_addr, 0, ipif, mp,
1814 				    0, NULL);
1815 				break;
1816 			case AF_INET6:
1817 				rts_fill_msg_v6(ncmd, rtm_addrs,
1818 				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
1819 				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
1820 				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1821 				    ipif, mp, 0, NULL);
1822 				break;
1823 			}
1824 			ifam = (ifa_msghdr_t *)mp->b_rptr;
1825 			ifam->ifam_index =
1826 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1827 			ifam->ifam_metric = ipif->ipif_metric;
1828 			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1829 			ifam->ifam_addrs = rtm_addrs;
1830 			rts_queue_input(mp, NULL, af, ipst);
1831 		}
1832 		if ((cmd == RTM_ADD && pass == 2) ||
1833 		    (cmd == RTM_DELETE && pass == 1)) {
1834 			rtm_addrs = (RTA_DST | RTA_NETMASK);
1835 			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
1836 			if (mp == NULL)
1837 				continue;
1838 			switch (af) {
1839 			case AF_INET:
1840 				rts_fill_msg(cmd, rtm_addrs,
1841 				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
1842 				    0, 0, 0, NULL, mp, 0, NULL);
1843 				break;
1844 			case AF_INET6:
1845 				rts_fill_msg_v6(cmd, rtm_addrs,
1846 				    &ipif->ipif_v6lcl_addr,
1847 				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
1848 				    &ipv6_all_zeros, &ipv6_all_zeros,
1849 				    &ipv6_all_zeros, NULL, mp, 0, NULL);
1850 				break;
1851 			}
1852 			rtm = (rt_msghdr_t *)mp->b_rptr;
1853 			rtm->rtm_index =
1854 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1855 			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1856 			rtm->rtm_errno = error;
1857 			if (error == 0)
1858 				rtm->rtm_flags |= RTF_DONE;
1859 			rtm->rtm_addrs = rtm_addrs;
1860 			rts_queue_input(mp, NULL, af, ipst);
1861 		}
1862 	}
1863 }
1864 
1865 /*
1866  * Based on the address family specified in a sockaddr, copy the address field
1867  * into an in6_addr_t.
1868  *
1869  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
1870  * compatibility with programs that leave the family cleared in the sockaddr.
1871  * Callers of rts_copyfromsockaddr should check the family themselves if they
1872  * wish to verify its value.
1873  *
1874  * In the case of AF_INET6, a check is made to ensure that address is not an
1875  * IPv4-mapped address.
1876  */
1877 size_t
1878 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
1879 {
1880 	switch (sa->sa_family) {
1881 	case AF_INET:
1882 	case AF_UNSPEC:
1883 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
1884 		return (sizeof (sin_t));
1885 	case AF_INET6:
1886 		*addrp = ((sin6_t *)sa)->sin6_addr;
1887 		if (IN6_IS_ADDR_V4MAPPED(addrp))
1888 			return (0);
1889 		return (sizeof (sin6_t));
1890 	default:
1891 		return (0);
1892 	}
1893 }
1894