xref: /titanic_50/usr/src/uts/common/inet/ip/ip_rts.c (revision 11c2b4c0e543fe2e1e5910cde1f4422cc3218160)
1 /*
2  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * Copyright (c) 1988, 1991, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
39  */
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 /*
44  * This file contains routines that processes routing socket requests.
45  */
46 
47 #include <sys/types.h>
48 #include <sys/stream.h>
49 #include <sys/stropts.h>
50 #include <sys/ddi.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 
56 #include <sys/systm.h>
57 #include <sys/param.h>
58 #include <sys/socket.h>
59 #include <sys/strsun.h>
60 #include <net/if.h>
61 #include <net/route.h>
62 #include <netinet/in.h>
63 #include <net/if_dl.h>
64 #include <netinet/ip6.h>
65 
66 #include <inet/common.h>
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_ftable.h>
72 #include <inet/ip_rts.h>
73 
74 #include <inet/ipclassifier.h>
75 
76 #include <sys/tsol/tndb.h>
77 #include <sys/tsol/tnet.h>
78 
79 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
80 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
81 
82 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
83 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
84     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
85     ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *);
86 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
87     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
88     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
89     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
90 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
91 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
92 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire,
93     sa_family_t af);
94 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
95 static void	ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
96 
97 /*
98  * Send the ack to all the routing queues.  In case of the originating queue,
99  * send it only if the loopback is set.
100  *
101  * Messages are sent upstream only on routing sockets that did not specify an
102  * address family when they were created or when the address family matches the
103  * one specified by the caller.
104  *
105  */
106 void
107 rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af, ip_stack_t *ipst)
108 {
109 	mblk_t	*mp1;
110 	int	checkqfull;
111 	conn_t 	*connp, *next_connp;
112 
113 	mutex_enter(&ipst->ips_rts_clients->connf_lock);
114 	connp = ipst->ips_rts_clients->connf_head;
115 
116 	while (connp != NULL) {
117 		/*
118 		 * If there was a family specified when this routing socket was
119 		 * created and it doesn't match the family of the message to
120 		 * copy, then continue.
121 		 */
122 		if ((connp->conn_proto != AF_UNSPEC) &&
123 		    (connp->conn_proto != af)) {
124 			connp = connp->conn_next;
125 			continue;
126 		}
127 		/*
128 		 * For the originating queue, we only copy the message upstream
129 		 * if loopback is set.  For others reading on the routing
130 		 * socket, we check if there is room upstream for a copy of the
131 		 * message.
132 		 */
133 		if ((q != NULL) && (CONNP_TO_RQ(connp) == RD(q))) {
134 			if (connp->conn_loopback == 0) {
135 				connp = connp->conn_next;
136 				continue;
137 			}
138 			/*
139 			 * Just because it is the same queue doesn't mean it
140 			 * will promptly read its acks. Have to avoid using
141 			 * all of kernel memory.
142 			 */
143 			checkqfull = B_TRUE;
144 		} else {
145 			checkqfull = B_TRUE;
146 		}
147 		CONN_INC_REF(connp);
148 		mutex_exit(&ipst->ips_rts_clients->connf_lock);
149 		/* Pass to rts_input */
150 		if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) {
151 			mp1 = dupmsg(mp);
152 			if (mp1 == NULL)
153 				mp1 = copymsg(mp);
154 			if (mp1 != NULL)
155 				(connp->conn_recv)(connp, mp1, NULL);
156 		}
157 
158 		mutex_enter(&ipst->ips_rts_clients->connf_lock);
159 		/* Follow the next pointer before releasing the conn. */
160 		next_connp = connp->conn_next;
161 		CONN_DEC_REF(connp);
162 		connp = next_connp;
163 	}
164 	mutex_exit(&ipst->ips_rts_clients->connf_lock);
165 	freemsg(mp);
166 }
167 
168 /*
169  * Takes an ire and sends an ack to all the routing sockets. This
170  * routine is used
171  * - when a route is created/deleted through the ioctl interface.
172  * - when ire_expire deletes a stale redirect
173  */
174 void
175 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
176 {
177 	mblk_t		*mp;
178 	rt_msghdr_t	*rtm;
179 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
180 	sa_family_t	af;
181 	in6_addr_t	gw_addr_v6;
182 
183 	if (ire == NULL)
184 		return;
185 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
186 	    ire->ire_ipversion == IPV6_VERSION);
187 
188 	if (ire->ire_flags & RTF_SETSRC)
189 		rtm_addrs |= RTA_SRC;
190 
191 	switch (ire->ire_ipversion) {
192 	case IPV4_VERSION:
193 		af = AF_INET;
194 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
195 		if (mp == NULL)
196 			return;
197 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
198 		    ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp,
199 		    0, NULL);
200 		break;
201 	case IPV6_VERSION:
202 		af = AF_INET6;
203 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
204 		if (mp == NULL)
205 			return;
206 		mutex_enter(&ire->ire_lock);
207 		gw_addr_v6 = ire->ire_gateway_addr_v6;
208 		mutex_exit(&ire->ire_lock);
209 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
210 		    &ire->ire_mask_v6, &gw_addr_v6,
211 		    &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
212 		    NULL, mp, 0, NULL);
213 		break;
214 	}
215 	rtm = (rt_msghdr_t *)mp->b_rptr;
216 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
217 	rtm->rtm_addrs = rtm_addrs;
218 	rtm->rtm_flags = ire->ire_flags;
219 	if (error != 0)
220 		rtm->rtm_errno = error;
221 	else
222 		rtm->rtm_flags |= RTF_DONE;
223 	rts_queue_input(mp, NULL, af, ipst);
224 }
225 
226 /* ARGSUSED */
227 static void
228 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy)
229 {
230 	(void) ip_rts_request(q, mp, DB_CRED(mp));
231 }
232 
233 /*
234  * This is a call from the RTS module
235  * indicating that this is a Routing Socket
236  * Stream. Insert this conn_t in routing
237  * socket client list.
238  */
239 void
240 ip_rts_register(conn_t *connp)
241 {
242 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
243 
244 	connp->conn_loopback = 1;
245 	ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
246 }
247 
248 /*
249  * This is a call from the RTS module indicating that it is closing.
250  */
251 void
252 ip_rts_unregister(conn_t *connp)
253 {
254 	ipcl_hash_remove(connp);
255 }
256 
257 /*
258  * Processes requests received on a routing socket. It extracts all the
259  * arguments and calls the appropriate function to process the request.
260  *
261  * RTA_SRC bit flag requests are sent by 'route -setsrc'.
262  *
263  * In general, this function does not consume the message supplied but rather
264  * sends the message upstream with an appropriate UNIX errno.
265  *
266  * We may need to restart this operation if the ipif cannot be looked up
267  * due to an exclusive operation that is currently in progress. The restart
268  * entry point is ip_rts_request_retry. While the request is enqueud in the
269  * ipsq the ioctl could be aborted and the conn close. To ensure that we don't
270  * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is
271  * released at the completion of the rts ioctl at the end of this function
272  * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and
273  * conn close occurs in conn_ioctl_cleanup.
274  */
275 int
276 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
277 {
278 	rt_msghdr_t	*rtm = NULL;
279 	in6_addr_t	dst_addr_v6;
280 	in6_addr_t	src_addr_v6;
281 	in6_addr_t	gw_addr_v6;
282 	in6_addr_t	net_mask_v6;
283 	in6_addr_t	author_v6;
284 	in6_addr_t	if_addr_v6;
285 	mblk_t		*mp1, *ioc_mp = mp;
286 	ire_t		*ire = NULL;
287 	ire_t		*sire = NULL;
288 	int		error = 0;
289 	int		match_flags = MATCH_IRE_DSTONLY;
290 	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
291 	int		found_addrs;
292 	sa_family_t	af;
293 	ipaddr_t	dst_addr;
294 	ipaddr_t	gw_addr;
295 	ipaddr_t	src_addr;
296 	ipaddr_t	net_mask;
297 	ushort_t	index;
298 	ipif_t		*ipif = NULL;
299 	ipif_t		*tmp_ipif = NULL;
300 	IOCP		iocp = (IOCP)mp->b_rptr;
301 	conn_t		*connp;
302 	boolean_t	gcgrp_xtraref = B_FALSE;
303 	tsol_gcgrp_addr_t ga;
304 	tsol_rtsecattr_t rtsecattr;
305 	struct rtsa_s	*rtsap = NULL;
306 	tsol_gcgrp_t	*gcgrp = NULL;
307 	tsol_gc_t	*gc = NULL;
308 	ts_label_t	*tsl = NULL;
309 	zoneid_t	zoneid;
310 	ip_stack_t	*ipst;
311 
312 	ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
313 
314 	ASSERT(CONN_Q(q));
315 	connp = Q_TO_CONN(q);
316 	zoneid = connp->conn_zoneid;
317 	ipst = connp->conn_netstack->netstack_ip;
318 
319 	ASSERT(mp->b_cont != NULL);
320 	/* ioc_mp holds mp */
321 	mp = mp->b_cont;
322 
323 	/*
324 	 * The Routing Socket data starts on
325 	 * next block. If there is no next block
326 	 * this is an indication from routing module
327 	 * that it is a routing socket stream queue.
328 	 * We need to support that for compatibility with SDP since
329 	 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
330 	 */
331 	if (mp->b_cont == NULL) {
332 		/*
333 		 * This is a message from SDP
334 		 * indicating that this is a Routing Socket
335 		 * Stream. Insert this conn_t in routing
336 		 * socket client list.
337 		 */
338 		connp->conn_loopback = 1;
339 		ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
340 		goto done;
341 	}
342 	mp1 = dupmsg(mp->b_cont);
343 	if (mp1 == NULL) {
344 		error  = ENOBUFS;
345 		goto done;
346 	}
347 	mp = mp1;
348 
349 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
350 		freemsg(mp);
351 		error =  EINVAL;
352 		goto done;
353 	}
354 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
355 		freemsg(mp);
356 		error = EINVAL;
357 		goto done;
358 	}
359 
360 	/*
361 	 * Check the routing message for basic consistency including the
362 	 * version number and that the number of octets written is the same
363 	 * as specified by the rtm_msglen field.
364 	 *
365 	 * At this point, an error can be delivered back via rtm_errno.
366 	 */
367 	rtm = (rt_msghdr_t *)mp->b_rptr;
368 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
369 		error = EINVAL;
370 		goto done;
371 	}
372 	if (rtm->rtm_version != RTM_VERSION) {
373 		error = EPROTONOSUPPORT;
374 		goto done;
375 	}
376 
377 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
378 	if (rtm->rtm_type != RTM_GET &&
379 	    rtm->rtm_type != RTM_RESOLVE &&
380 	    (ioc_cr == NULL ||
381 	    secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
382 		error = EPERM;
383 		goto done;
384 	}
385 
386 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
387 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
388 	    &error);
389 
390 	if (error != 0)
391 		goto done;
392 
393 	if ((found_addrs & RTA_DST) == 0) {
394 		error = EINVAL;
395 		goto done;
396 	}
397 
398 	/*
399 	 * Based on the address family of the destination address, determine
400 	 * the destination, gateway and netmask and return the appropriate error
401 	 * if an unknown address family was specified (following the errno
402 	 * values that 4.4BSD-Lite2 returns.)
403 	 */
404 	switch (af) {
405 	case AF_INET:
406 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
407 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
408 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
409 		if (((found_addrs & RTA_NETMASK) == 0) ||
410 		    (rtm->rtm_flags & RTF_HOST))
411 			net_mask = IP_HOST_MASK;
412 		else
413 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
414 		break;
415 	case AF_INET6:
416 		if (((found_addrs & RTA_NETMASK) == 0) ||
417 		    (rtm->rtm_flags & RTF_HOST))
418 			net_mask_v6 = ipv6_all_ones;
419 		break;
420 	default:
421 		/*
422 		 * These errno values are meant to be compatible with
423 		 * 4.4BSD-Lite2 for the given message types.
424 		 */
425 		switch (rtm->rtm_type) {
426 		case RTM_ADD:
427 		case RTM_DELETE:
428 			error = ESRCH;
429 			goto done;
430 		case RTM_GET:
431 		case RTM_CHANGE:
432 			error = EAFNOSUPPORT;
433 			goto done;
434 		default:
435 			error = EOPNOTSUPP;
436 			goto done;
437 		}
438 	}
439 
440 	/*
441 	 * At this point, the address family must be something known.
442 	 */
443 	ASSERT(af == AF_INET || af == AF_INET6);
444 
445 	if (index != 0) {
446 		ill_t   *ill;
447 
448 		/*
449 		 * IPC must be refheld somewhere in ip_wput_nondata or
450 		 * ip_wput_ioctl etc... and cleaned up if ioctl is killed.
451 		 * If ILL_CHANGING the request is queued in the ipsq.
452 		 */
453 		ill = ill_lookup_on_ifindex(index, af == AF_INET6,
454 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error,
455 		    ipst);
456 		if (ill == NULL) {
457 			if (error != EINPROGRESS)
458 				error = EINVAL;
459 			goto done;
460 		}
461 
462 		ipif = ipif_get_next_ipif(NULL, ill);
463 		ill_refrele(ill);
464 		/*
465 		 * If this is replacement ipif, prevent a route from
466 		 * being added.
467 		 */
468 		if (ipif != NULL && ipif->ipif_replace_zero) {
469 			error = ENETDOWN;
470 			goto done;
471 		}
472 		match_flags |= MATCH_IRE_ILL;
473 	}
474 
475 	/*
476 	 * If a netmask was supplied in the message, then subsequent route
477 	 * lookups will attempt to match on the netmask as well.
478 	 */
479 	if ((found_addrs & RTA_NETMASK) != 0)
480 		match_flags |= MATCH_IRE_MASK;
481 
482 	/*
483 	 * We only process any passed-in route security attributes for
484 	 * either RTM_ADD or RTM_CHANGE message; We overload them
485 	 * to do an RTM_GET as a different label; ignore otherwise.
486 	 */
487 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
488 	    rtm->rtm_type == RTM_GET) {
489 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
490 		if (rtsecattr.rtsa_cnt > 0)
491 			rtsap = &rtsecattr.rtsa_attr[0];
492 	}
493 
494 	switch (rtm->rtm_type) {
495 	case RTM_ADD:
496 		/* if we are adding a route, gateway is a must */
497 		if ((found_addrs & RTA_GATEWAY) == 0) {
498 			error = EINVAL;
499 			goto done;
500 		}
501 
502 		/* Multirouting does not support net routes. */
503 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
504 		    RTF_MULTIRT) {
505 			error = EADDRNOTAVAIL;
506 			goto done;
507 		}
508 
509 		/*
510 		 * Multirouting and user-specified source addresses
511 		 * do not support interface based routing.
512 		 * Assigning a source address to an interface based
513 		 * route is achievable by plumbing a new ipif and
514 		 * setting up the interface route via this ipif,
515 		 * though.
516 		 */
517 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
518 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
519 				error = EADDRNOTAVAIL;
520 				goto done;
521 			}
522 		}
523 
524 		switch (af) {
525 		case AF_INET:
526 			if (src_addr != INADDR_ANY) {
527 				/*
528 				 * The RTF_SETSRC flag is present, check that
529 				 * the supplied src address is not the loopback
530 				 * address. This would produce martian packets.
531 				 */
532 				if (src_addr == htonl(INADDR_LOOPBACK)) {
533 					error = EINVAL;
534 					goto done;
535 				}
536 				/*
537 				 * Also check that the supplied address is a
538 				 * valid, local one.
539 				 */
540 				tmp_ipif = ipif_lookup_addr(src_addr, NULL,
541 				    ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
542 				    ip_rts_request_retry, &error, ipst);
543 				if (tmp_ipif == NULL) {
544 					if (error != EINPROGRESS)
545 						error = EADDRNOTAVAIL;
546 					goto done;
547 				}
548 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
549 				    (tmp_ipif->ipif_flags &
550 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
551 					error = EINVAL;
552 					goto done;
553 				}
554 			} else {
555 				/*
556 				 * The RTF_SETSRC modifier must be associated
557 				 * to a non-null source address.
558 				 */
559 				if (rtm->rtm_flags & RTF_SETSRC) {
560 					error = EINVAL;
561 					goto done;
562 				}
563 			}
564 
565 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
566 			    rtm->rtm_flags, ipif, &ire, B_FALSE,
567 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
568 			    rtsap, ipst);
569 			if (ipif != NULL)
570 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
571 			break;
572 		case AF_INET6:
573 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
574 				/*
575 				 * The RTF_SETSRC flag is present, check that
576 				 * the supplied src address is not the loopback
577 				 * address. This would produce martian packets.
578 				 */
579 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
580 					error = EINVAL;
581 					goto done;
582 				}
583 				/*
584 				 * Also check that the supplied address is a
585 				 * valid, local one.
586 				 */
587 				tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6,
588 				    NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
589 				    ip_rts_request_retry, &error, ipst);
590 				if (tmp_ipif == NULL) {
591 					if (error != EINPROGRESS)
592 						error = EADDRNOTAVAIL;
593 					goto done;
594 				}
595 
596 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
597 				    (tmp_ipif->ipif_flags &
598 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
599 					error = EINVAL;
600 					goto done;
601 				}
602 
603 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
604 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
605 				    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
606 				    ip_rts_request_retry, rtsap, ipst);
607 				break;
608 			}
609 			/*
610 			 * The RTF_SETSRC modifier must be associated
611 			 * to a non-null source address.
612 			 */
613 			if (rtm->rtm_flags & RTF_SETSRC) {
614 				error = EINVAL;
615 				goto done;
616 			}
617 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
618 			    &gw_addr_v6, NULL, rtm->rtm_flags,
619 			    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
620 			    ip_rts_request_retry, rtsap, ipst);
621 			if (ipif != NULL)
622 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
623 			break;
624 		}
625 		if (error != 0)
626 			goto done;
627 		ASSERT(ire != NULL);
628 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
629 		break;
630 	case RTM_DELETE:
631 		/* if we are deleting a route, gateway is a must */
632 		if ((found_addrs & RTA_GATEWAY) == 0) {
633 			error = EINVAL;
634 			goto done;
635 		}
636 		/*
637 		 * The RTF_SETSRC modifier does not make sense
638 		 * when deleting a route.
639 		 */
640 		if (rtm->rtm_flags & RTF_SETSRC) {
641 			error = EINVAL;
642 			goto done;
643 		}
644 
645 		switch (af) {
646 		case AF_INET:
647 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
648 			    found_addrs, rtm->rtm_flags, ipif, B_FALSE,
649 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
650 			    ipst);
651 			break;
652 		case AF_INET6:
653 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
654 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
655 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
656 			    ipst);
657 			break;
658 		}
659 		break;
660 	case RTM_GET:
661 	case RTM_CHANGE:
662 		/*
663 		 * In the case of RTM_GET, the forwarding table should be
664 		 * searched recursively with default being matched if the
665 		 * specific route doesn't exist.  Also, if a gateway was
666 		 * specified then the gateway address must also be matched.
667 		 *
668 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
669 		 * is the new gateway address so matching on the gateway address
670 		 * is not done.  This can lead to ambiguity when looking up the
671 		 * route to change as usually only the destination (and netmask,
672 		 * if supplied) is used for the lookup.  However if a RTA_IFP
673 		 * sockaddr is also supplied, it can disambiguate which route to
674 		 * change provided the ambigous routes are tied to distinct
675 		 * ill's (or interface indices).  If the routes are not tied to
676 		 * any particular interfaces (for example, with traditional
677 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
678 		 * it won't match any such routes.
679 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
680 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
681 		 */
682 		if (((found_addrs & RTA_SRC) != 0) &&
683 		    ((rtm->rtm_type == RTM_GET) ||
684 		    !(rtm->rtm_flags & RTF_SETSRC))) {
685 			error = EOPNOTSUPP;
686 			goto done;
687 		}
688 
689 		if (rtm->rtm_type == RTM_GET) {
690 			match_flags |=
691 			    (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE |
692 			    MATCH_IRE_SECATTR);
693 			match_flags_local |= MATCH_IRE_SECATTR;
694 			if ((found_addrs & RTA_GATEWAY) != 0)
695 				match_flags |= MATCH_IRE_GW;
696 			if (ioc_cr)
697 				tsl = crgetlabel(ioc_cr);
698 			if (rtsap != NULL) {
699 				if (rtsa_validate(rtsap) != 0) {
700 					error = EINVAL;
701 					goto done;
702 				}
703 				if (tsl != NULL &&
704 				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
705 				    (tsl->tsl_doi != rtsap->rtsa_doi ||
706 				    !bldominates(&tsl->tsl_label,
707 				    &rtsap->rtsa_slrange.lower_bound))) {
708 					error = EPERM;
709 					goto done;
710 				}
711 				tsl = labelalloc(
712 				    &rtsap->rtsa_slrange.lower_bound,
713 				    rtsap->rtsa_doi, KM_NOSLEEP);
714 			}
715 		}
716 		if (rtm->rtm_type == RTM_CHANGE) {
717 			if ((found_addrs & RTA_GATEWAY) &&
718 			    (rtm->rtm_flags & RTF_SETSRC)) {
719 				/*
720 				 * Do not want to change the gateway,
721 				 * but rather the source address.
722 				 */
723 				match_flags |= MATCH_IRE_GW;
724 			}
725 		}
726 
727 		/*
728 		 * If the netmask is all ones (either as supplied or as derived
729 		 * above), then first check for an IRE_LOOPBACK or
730 		 * IRE_LOCAL entry.
731 		 *
732 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
733 		 * entry, then look in the forwarding table.
734 		 */
735 		switch (af) {
736 		case AF_INET:
737 			if (net_mask == IP_HOST_MASK) {
738 				ire = ire_ctable_lookup(dst_addr, gw_addr,
739 				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
740 				    tsl, match_flags_local, ipst);
741 				/*
742 				 * If we found an IRE_LOCAL, make sure
743 				 * it is one that would be used by this
744 				 * zone to send packets.
745 				 */
746 				if (ire != NULL &&
747 				    ire->ire_type == IRE_LOCAL &&
748 				    ipst->ips_ip_restrict_interzone_loopback &&
749 				    !ire_local_ok_across_zones(ire,
750 				    zoneid, &dst_addr, tsl, ipst)) {
751 					ire_refrele(ire);
752 					ire = NULL;
753 				}
754 			}
755 			if (ire == NULL) {
756 				ire = ire_ftable_lookup(dst_addr, net_mask,
757 				    gw_addr, 0, ipif, &sire, zoneid, 0,
758 				    tsl, match_flags, ipst);
759 			}
760 			break;
761 		case AF_INET6:
762 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
763 				ire = ire_ctable_lookup_v6(&dst_addr_v6,
764 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
765 				    zoneid, tsl, match_flags_local, ipst);
766 				/*
767 				 * If we found an IRE_LOCAL, make sure
768 				 * it is one that would be used by this
769 				 * zone to send packets.
770 				 */
771 				if (ire != NULL &&
772 				    ire->ire_type == IRE_LOCAL &&
773 				    ipst->ips_ip_restrict_interzone_loopback &&
774 				    !ire_local_ok_across_zones(ire,
775 				    zoneid, (void *)&dst_addr_v6, tsl, ipst)) {
776 					ire_refrele(ire);
777 					ire = NULL;
778 				}
779 			}
780 			if (ire == NULL) {
781 				ire = ire_ftable_lookup_v6(&dst_addr_v6,
782 				    &net_mask_v6, &gw_addr_v6, 0, ipif, &sire,
783 				    zoneid, 0, tsl, match_flags, ipst);
784 			}
785 			break;
786 		}
787 		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
788 			label_rele(tsl);
789 
790 		if (ire == NULL) {
791 			error = ESRCH;
792 			goto done;
793 		}
794 		/* we know the IRE before we come here */
795 		switch (rtm->rtm_type) {
796 		case RTM_GET:
797 			mp1 = rts_rtmget(mp, ire, sire, af);
798 			if (mp1 == NULL) {
799 				error = ENOBUFS;
800 				goto done;
801 			}
802 			freemsg(mp);
803 			mp = mp1;
804 			rtm = (rt_msghdr_t *)mp->b_rptr;
805 			break;
806 		case RTM_CHANGE:
807 			/*
808 			 * Do not allow to the multirouting state of a route
809 			 * to be changed. This aims to prevent undesirable
810 			 * stages where both multirt and non-multirt routes
811 			 * for the same destination are declared.
812 			 */
813 			if ((ire->ire_flags & RTF_MULTIRT) !=
814 			    (rtm->rtm_flags & RTF_MULTIRT)) {
815 				error = EINVAL;
816 				goto done;
817 			}
818 			/*
819 			 * Note that we do not need to do
820 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
821 			 * in metrics or gateway will not affect existing
822 			 * routes since it does not create a more specific
823 			 * route.
824 			 */
825 			switch (af) {
826 			case AF_INET:
827 				ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
828 				if ((found_addrs & RTA_GATEWAY) != 0 &&
829 				    (ire->ire_gateway_addr != gw_addr)) {
830 					ire->ire_gateway_addr = gw_addr;
831 				}
832 
833 				if (rtsap != NULL) {
834 					ga.ga_af = AF_INET;
835 					IN6_IPADDR_TO_V4MAPPED(
836 					    ire->ire_gateway_addr, &ga.ga_addr);
837 
838 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
839 					if (gcgrp == NULL) {
840 						error = ENOMEM;
841 						goto done;
842 					}
843 				}
844 
845 				if ((found_addrs & RTA_SRC) != 0 &&
846 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
847 				    (ire->ire_src_addr != src_addr)) {
848 
849 					if (src_addr != INADDR_ANY) {
850 						/*
851 						 * The RTF_SETSRC flag is
852 						 * present, check that the
853 						 * supplied src address is not
854 						 * the loopback address. This
855 						 * would produce martian
856 						 * packets.
857 						 */
858 						if (src_addr ==
859 						    htonl(INADDR_LOOPBACK)) {
860 							error = EINVAL;
861 							goto done;
862 						}
863 						/*
864 						 * Also check that the the
865 						 * supplied addr is a valid
866 						 * local address.
867 						 */
868 						tmp_ipif = ipif_lookup_addr(
869 						    src_addr, NULL, ALL_ZONES,
870 						    CONNP_TO_WQ(connp), ioc_mp,
871 						    ip_rts_request_retry,
872 						    &error, ipst);
873 						if (tmp_ipif == NULL) {
874 							error = (error ==
875 							    EINPROGRESS) ?
876 							    error :
877 							    EADDRNOTAVAIL;
878 							goto done;
879 						}
880 
881 						if (!(tmp_ipif->ipif_flags &
882 						    IPIF_UP) ||
883 						    (tmp_ipif->ipif_flags &
884 						    (IPIF_NOLOCAL |
885 						    IPIF_ANYCAST))) {
886 							error = EINVAL;
887 							goto done;
888 						}
889 						ire->ire_flags |= RTF_SETSRC;
890 					} else {
891 						ire->ire_flags &= ~RTF_SETSRC;
892 					}
893 					ire->ire_src_addr = src_addr;
894 				}
895 				break;
896 			case AF_INET6:
897 				ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
898 				mutex_enter(&ire->ire_lock);
899 				if ((found_addrs & RTA_GATEWAY) != 0 &&
900 				    !IN6_ARE_ADDR_EQUAL(
901 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
902 					ire->ire_gateway_addr_v6 = gw_addr_v6;
903 				}
904 
905 				if (rtsap != NULL) {
906 					ga.ga_af = AF_INET6;
907 					ga.ga_addr = ire->ire_gateway_addr_v6;
908 
909 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
910 					if (gcgrp == NULL) {
911 						error = ENOMEM;
912 						goto done;
913 					}
914 				}
915 
916 				if ((found_addrs & RTA_SRC) != 0 &&
917 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
918 				    !IN6_ARE_ADDR_EQUAL(
919 				    &ire->ire_src_addr_v6, &src_addr_v6)) {
920 
921 					if (!IN6_IS_ADDR_UNSPECIFIED(
922 					    &src_addr_v6)) {
923 						/*
924 						 * The RTF_SETSRC flag is
925 						 * present, check that the
926 						 * supplied src address is not
927 						 * the loopback address. This
928 						 * would produce martian
929 						 * packets.
930 						 */
931 						if (IN6_IS_ADDR_LOOPBACK(
932 						    &src_addr_v6)) {
933 							mutex_exit(
934 							    &ire->ire_lock);
935 							error = EINVAL;
936 							goto done;
937 						}
938 						/*
939 						 * Also check that the the
940 						 * supplied addr is a valid
941 						 * local address.
942 						 */
943 						tmp_ipif = ipif_lookup_addr_v6(
944 						    &src_addr_v6, NULL,
945 						    ALL_ZONES,
946 						    CONNP_TO_WQ(connp), ioc_mp,
947 						    ip_rts_request_retry,
948 						    &error, ipst);
949 						if (tmp_ipif == NULL) {
950 							mutex_exit(
951 							    &ire->ire_lock);
952 							error = (error ==
953 							    EINPROGRESS) ?
954 							    error :
955 							    EADDRNOTAVAIL;
956 							goto done;
957 						}
958 						if (!(tmp_ipif->ipif_flags &
959 						    IPIF_UP) ||
960 						    (tmp_ipif->ipif_flags &
961 						    (IPIF_NOLOCAL |
962 						    IPIF_ANYCAST))) {
963 							mutex_exit(
964 							    &ire->ire_lock);
965 							error = EINVAL;
966 							goto done;
967 						}
968 						ire->ire_flags |= RTF_SETSRC;
969 					} else {
970 						ire->ire_flags &= ~RTF_SETSRC;
971 					}
972 					ire->ire_src_addr_v6 = src_addr_v6;
973 				}
974 				mutex_exit(&ire->ire_lock);
975 				break;
976 			}
977 
978 			if (rtsap != NULL) {
979 				in_addr_t ga_addr4;
980 
981 				ASSERT(gcgrp != NULL);
982 
983 				/*
984 				 * Create and add the security attribute to
985 				 * prefix IRE; it will add a reference to the
986 				 * group upon allocating a new entry.  If it
987 				 * finds an already-existing entry for the
988 				 * security attribute, it simply returns it
989 				 * and no new group reference is made.
990 				 */
991 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
992 				if (gc == NULL ||
993 				    (error = tsol_ire_init_gwattr(ire,
994 				    ire->ire_ipversion, gc, NULL)) != 0) {
995 					if (gc != NULL) {
996 						GC_REFRELE(gc);
997 					} else {
998 						/* gc_create failed */
999 						error = ENOMEM;
1000 					}
1001 					goto done;
1002 				}
1003 
1004 				/*
1005 				 * Now delete any existing gateway IRE caches
1006 				 * as well as all caches using the gateway,
1007 				 * and allow them to be created on demand
1008 				 * through ip_newroute{_v6}.
1009 				 */
1010 				IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4);
1011 				if (af == AF_INET) {
1012 					ire_clookup_delete_cache_gw(
1013 					    ga_addr4, ALL_ZONES, ipst);
1014 				} else {
1015 					ire_clookup_delete_cache_gw_v6(
1016 					    &ga.ga_addr, ALL_ZONES, ipst);
1017 				}
1018 			}
1019 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
1020 			break;
1021 		}
1022 		break;
1023 	default:
1024 		error = EOPNOTSUPP;
1025 		break;
1026 	}
1027 done:
1028 	if (ire != NULL)
1029 		ire_refrele(ire);
1030 	if (sire != NULL)
1031 		ire_refrele(sire);
1032 	if (ipif != NULL)
1033 		ipif_refrele(ipif);
1034 	if (tmp_ipif != NULL)
1035 		ipif_refrele(tmp_ipif);
1036 
1037 	if (gcgrp_xtraref)
1038 		GCGRP_REFRELE(gcgrp);
1039 
1040 	if (error == EINPROGRESS) {
1041 		if (rtm != NULL)
1042 			freemsg(mp);
1043 		return (error);
1044 	}
1045 	if (rtm != NULL) {
1046 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1047 		if (error != 0) {
1048 			rtm->rtm_errno = error;
1049 			/* Send error ACK */
1050 			ip1dbg(("ip_rts_request: error %d\n", error));
1051 		} else {
1052 			rtm->rtm_flags |= RTF_DONE;
1053 			/* OK ACK already set up by caller except this */
1054 			ip2dbg(("ip_rts_request: OK ACK\n"));
1055 		}
1056 		rts_queue_input(mp, q, af, ipst);
1057 	}
1058 	iocp->ioc_error = error;
1059 	ioc_mp->b_datap->db_type = M_IOCACK;
1060 	if (iocp->ioc_error != 0)
1061 		iocp->ioc_count = 0;
1062 	(connp->conn_recv)(connp, ioc_mp, NULL);
1063 	/* conn was refheld in ip_wput_ioctl. */
1064 	CONN_OPER_PENDING_DONE(connp);
1065 
1066 	return (error);
1067 }
1068 
1069 /*
1070  * Build a reply to the RTM_GET request contained in the given message block
1071  * using the retrieved IRE of the destination address, the parent IRE (if it
1072  * exists) and the address family.
1073  *
1074  * Returns a pointer to a message block containing the reply if successful,
1075  * otherwise NULL is returned.
1076  */
1077 static mblk_t *
1078 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af)
1079 {
1080 	rt_msghdr_t	*rtm;
1081 	rt_msghdr_t	*new_rtm;
1082 	mblk_t		*new_mp;
1083 	int		rtm_addrs;
1084 	int		rtm_flags;
1085 	in6_addr_t	gw_addr_v6;
1086 	tsol_ire_gw_secattr_t *attrp = NULL;
1087 	tsol_gc_t	*gc = NULL;
1088 	tsol_gcgrp_t	*gcgrp = NULL;
1089 	int		sacnt = 0;
1090 
1091 	ASSERT(ire->ire_ipif != NULL);
1092 	rtm = (rt_msghdr_t *)mp->b_rptr;
1093 
1094 	if (sire != NULL && sire->ire_gw_secattr != NULL)
1095 		attrp = sire->ire_gw_secattr;
1096 	else if (ire->ire_gw_secattr != NULL)
1097 		attrp = ire->ire_gw_secattr;
1098 
1099 	if (attrp != NULL) {
1100 		mutex_enter(&attrp->igsa_lock);
1101 		if ((gc = attrp->igsa_gc) != NULL) {
1102 			gcgrp = gc->gc_grp;
1103 			ASSERT(gcgrp != NULL);
1104 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1105 			sacnt = 1;
1106 		} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {
1107 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1108 			gc = gcgrp->gcgrp_head;
1109 			sacnt = gcgrp->gcgrp_count;
1110 		}
1111 		mutex_exit(&attrp->igsa_lock);
1112 
1113 		/* do nothing if there's no gc to report */
1114 		if (gc == NULL) {
1115 			ASSERT(sacnt == 0);
1116 			if (gcgrp != NULL) {
1117 				/* we might as well drop the lock now */
1118 				rw_exit(&gcgrp->gcgrp_rwlock);
1119 				gcgrp = NULL;
1120 			}
1121 			attrp = NULL;
1122 		}
1123 
1124 		ASSERT(gc == NULL || (gcgrp != NULL &&
1125 		    RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)));
1126 	}
1127 	ASSERT(sacnt == 0 || gc != NULL);
1128 
1129 	/*
1130 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1131 	 *
1132 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1133 	 * RTA_IFP and RTA_IFA if either is defined, and also
1134 	 * returns RTA_BRD if the appropriate interface is
1135 	 * point-to-point.
1136 	 */
1137 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1138 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
1139 		rtm_addrs |= (RTA_IFP | RTA_IFA);
1140 		if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT)
1141 			rtm_addrs |= RTA_BRD;
1142 	}
1143 
1144 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt);
1145 	if (new_mp == NULL) {
1146 		if (gcgrp != NULL)
1147 			rw_exit(&gcgrp->gcgrp_rwlock);
1148 		return (NULL);
1149 	}
1150 
1151 	/*
1152 	 * We set the destination address, gateway address,
1153 	 * netmask and flags in the RTM_GET response depending
1154 	 * on whether we found a parent IRE or not.
1155 	 * In particular, if we did find a parent IRE during the
1156 	 * recursive search, use that IRE's gateway address.
1157 	 * Otherwise, we use the IRE's source address for the
1158 	 * gateway address.
1159 	 */
1160 	ASSERT(af == AF_INET || af == AF_INET6);
1161 	switch (af) {
1162 	case AF_INET:
1163 		if (sire == NULL) {
1164 			rtm_flags = ire->ire_flags;
1165 			rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1166 			    ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr,
1167 			    ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif,
1168 			    new_mp, sacnt, gc);
1169 		} else {
1170 			if (sire->ire_flags & RTF_SETSRC)
1171 				rtm_addrs |= RTA_SRC;
1172 
1173 			rtm_flags = sire->ire_flags;
1174 			rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr,
1175 			    sire->ire_mask, sire->ire_gateway_addr,
1176 			    (sire->ire_flags & RTF_SETSRC) ?
1177 			    sire->ire_src_addr : ire->ire_src_addr,
1178 			    ire->ire_ipif->ipif_pp_dst_addr,
1179 			    0, ire->ire_ipif, new_mp, sacnt, gc);
1180 		}
1181 		break;
1182 	case AF_INET6:
1183 		if (sire == NULL) {
1184 			rtm_flags = ire->ire_flags;
1185 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1186 			    &ire->ire_mask_v6, &ire->ire_src_addr_v6,
1187 			    &ire->ire_src_addr_v6,
1188 			    &ire->ire_ipif->ipif_v6pp_dst_addr,
1189 			    &ipv6_all_zeros, ire->ire_ipif, new_mp,
1190 			    sacnt, gc);
1191 		} else {
1192 			if (sire->ire_flags & RTF_SETSRC)
1193 				rtm_addrs |= RTA_SRC;
1194 
1195 			rtm_flags = sire->ire_flags;
1196 			mutex_enter(&sire->ire_lock);
1197 			gw_addr_v6 = sire->ire_gateway_addr_v6;
1198 			mutex_exit(&sire->ire_lock);
1199 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6,
1200 			    &sire->ire_mask_v6, &gw_addr_v6,
1201 			    (sire->ire_flags & RTF_SETSRC) ?
1202 			    &sire->ire_src_addr_v6 : &ire->ire_src_addr_v6,
1203 			    &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1204 			    ire->ire_ipif, new_mp, sacnt, gc);
1205 		}
1206 		break;
1207 	}
1208 
1209 	if (gcgrp != NULL)
1210 		rw_exit(&gcgrp->gcgrp_rwlock);
1211 
1212 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1213 
1214 	/*
1215 	 * The rtm_msglen, rtm_version and rtm_type fields in
1216 	 * RTM_GET response are filled in by rts_fill_msg.
1217 	 *
1218 	 * rtm_addrs and rtm_flags are filled in based on what
1219 	 * was requested and the state of the IREs looked up
1220 	 * above.
1221 	 *
1222 	 * rtm_inits and rtm_rmx are filled in with metrics
1223 	 * based on whether a parent IRE was found or not.
1224 	 *
1225 	 * TODO: rtm_index and rtm_use should probably be
1226 	 * filled in with something resonable here and not just
1227 	 * copied from the request.
1228 	 */
1229 	new_rtm->rtm_index = rtm->rtm_index;
1230 	new_rtm->rtm_pid = rtm->rtm_pid;
1231 	new_rtm->rtm_seq = rtm->rtm_seq;
1232 	new_rtm->rtm_use = rtm->rtm_use;
1233 	new_rtm->rtm_addrs = rtm_addrs;
1234 	new_rtm->rtm_flags = rtm_flags;
1235 	if (sire == NULL)
1236 		new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
1237 	else
1238 		new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx);
1239 
1240 	return (new_mp);
1241 }
1242 
1243 /*
1244  * Fill the given if_data_t with interface statistics.
1245  */
1246 static void
1247 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1248 {
1249 	if_data->ifi_type = ipif->ipif_type;	/* ethernet, tokenring, etc */
1250 	if_data->ifi_addrlen = 0;		/* media address length */
1251 	if_data->ifi_hdrlen = 0;		/* media header length */
1252 	if_data->ifi_mtu = ipif->ipif_mtu;	/* maximum transmission unit */
1253 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
1254 	if_data->ifi_baudrate = 0;		/* linespeed */
1255 
1256 	if_data->ifi_ipackets = 0;		/* packets received on if */
1257 	if_data->ifi_ierrors = 0;		/* input errors on interface */
1258 	if_data->ifi_opackets = 0;		/* packets sent on interface */
1259 	if_data->ifi_oerrors = 0;		/* output errors on if */
1260 	if_data->ifi_collisions = 0;		/* collisions on csma if */
1261 	if_data->ifi_ibytes = 0;		/* total number received */
1262 	if_data->ifi_obytes = 0;		/* total number sent */
1263 	if_data->ifi_imcasts = 0;		/* multicast packets received */
1264 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
1265 	if_data->ifi_iqdrops = 0;		/* dropped on input */
1266 	if_data->ifi_noproto = 0;		/* destined for unsupported */
1267 						/* protocol. */
1268 }
1269 
1270 /*
1271  * Set the metrics on a forwarding table route.
1272  */
1273 static void
1274 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1275 {
1276 	clock_t		rtt;
1277 	clock_t		rtt_sd;
1278 	ipif_t		*ipif;
1279 	ifrt_t		*ifrt;
1280 	mblk_t		*mp;
1281 	in6_addr_t	gw_addr_v6;
1282 
1283 	/*
1284 	 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the
1285 	 * common case of no metrics.
1286 	 */
1287 	if (which == 0)
1288 		return;
1289 	ire->ire_uinfo.iulp_set = B_TRUE;
1290 
1291 	/*
1292 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1293 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1294 	 * microseconds.
1295 	 */
1296 	if (which & RTV_RTT)
1297 		rtt = metrics->rmx_rtt / 1000;
1298 	if (which & RTV_RTTVAR)
1299 		rtt_sd = metrics->rmx_rttvar / 1000;
1300 
1301 	/*
1302 	 * Update the metrics in the IRE itself.
1303 	 */
1304 	mutex_enter(&ire->ire_lock);
1305 	if (which & RTV_MTU)
1306 		ire->ire_max_frag = metrics->rmx_mtu;
1307 	if (which & RTV_RTT)
1308 		ire->ire_uinfo.iulp_rtt = rtt;
1309 	if (which & RTV_SSTHRESH)
1310 		ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh;
1311 	if (which & RTV_RTTVAR)
1312 		ire->ire_uinfo.iulp_rtt_sd = rtt_sd;
1313 	if (which & RTV_SPIPE)
1314 		ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe;
1315 	if (which & RTV_RPIPE)
1316 		ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe;
1317 	mutex_exit(&ire->ire_lock);
1318 
1319 	/*
1320 	 * Search through the ifrt_t chain hanging off the IPIF in order to
1321 	 * reflect the metric change there.
1322 	 */
1323 	ipif = ire->ire_ipif;
1324 	if (ipif == NULL)
1325 		return;
1326 	ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1327 	    ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1328 	if (ipif->ipif_isv6) {
1329 		mutex_enter(&ire->ire_lock);
1330 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1331 		mutex_exit(&ire->ire_lock);
1332 	}
1333 	mutex_enter(&ipif->ipif_saved_ire_lock);
1334 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1335 		/*
1336 		 * On a given ipif, the triple of address, gateway and mask is
1337 		 * unique for each saved IRE (in the case of ordinary interface
1338 		 * routes, the gateway address is all-zeroes).
1339 		 */
1340 		ifrt = (ifrt_t *)mp->b_rptr;
1341 		if (ipif->ipif_isv6) {
1342 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1343 			    &ire->ire_addr_v6) ||
1344 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1345 			    &gw_addr_v6) ||
1346 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1347 			    &ire->ire_mask_v6))
1348 				continue;
1349 		} else {
1350 			if (ifrt->ifrt_addr != ire->ire_addr ||
1351 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1352 			    ifrt->ifrt_mask != ire->ire_mask)
1353 				continue;
1354 		}
1355 		if (which & RTV_MTU)
1356 			ifrt->ifrt_max_frag = metrics->rmx_mtu;
1357 		if (which & RTV_RTT)
1358 			ifrt->ifrt_iulp_info.iulp_rtt = rtt;
1359 		if (which & RTV_SSTHRESH) {
1360 			ifrt->ifrt_iulp_info.iulp_ssthresh =
1361 			    metrics->rmx_ssthresh;
1362 		}
1363 		if (which & RTV_RTTVAR)
1364 			ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar;
1365 		if (which & RTV_SPIPE)
1366 			ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe;
1367 		if (which & RTV_RPIPE)
1368 			ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe;
1369 		break;
1370 	}
1371 	mutex_exit(&ipif->ipif_saved_ire_lock);
1372 }
1373 
1374 /*
1375  * Get the metrics from a forwarding table route.
1376  */
1377 static int
1378 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
1379 {
1380 	int	metrics_set = 0;
1381 
1382 	bzero(metrics, sizeof (rt_metrics_t));
1383 	/*
1384 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1385 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1386 	 * microseconds.
1387 	 */
1388 	metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000;
1389 	metrics_set |= RTV_RTT;
1390 	metrics->rmx_mtu = ire->ire_max_frag;
1391 	metrics_set |= RTV_MTU;
1392 	metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh;
1393 	metrics_set |= RTV_SSTHRESH;
1394 	metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000;
1395 	metrics_set |= RTV_RTTVAR;
1396 	metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe;
1397 	metrics_set |= RTV_SPIPE;
1398 	metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe;
1399 	metrics_set |= RTV_RPIPE;
1400 	return (metrics_set);
1401 }
1402 
1403 /*
1404  * Takes a pointer to a routing message and extracts necessary info by looking
1405  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1406  * passed (all of which must be valid).
1407  *
1408  * The bitmask of sockaddrs actually found in the message is returned, or zero
1409  * is returned in the case of an error.
1410  */
1411 static int
1412 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1413     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1414     in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
1415     tsol_rtsecattr_t *rtsecattr, int *error)
1416 {
1417 	struct sockaddr *sa;
1418 	int	i;
1419 	int	addr_bits;
1420 	int	length;
1421 	int	found_addrs = 0;
1422 	caddr_t	cp;
1423 	size_t	size;
1424 	struct sockaddr_dl *sdl;
1425 
1426 	*dst_addrp = ipv6_all_zeros;
1427 	*gw_addrp = ipv6_all_zeros;
1428 	*net_maskp = ipv6_all_zeros;
1429 	*authorp = ipv6_all_zeros;
1430 	*if_addrp = ipv6_all_zeros;
1431 	*in_src_addrp = ipv6_all_zeros;
1432 	*indexp = 0;
1433 	*afp = AF_UNSPEC;
1434 	rtsecattr->rtsa_cnt = 0;
1435 	*error = 0;
1436 
1437 	/*
1438 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1439 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
1440 	 */
1441 	cp = (caddr_t)&rtm[1];
1442 	length = rtm->rtm_msglen;
1443 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1444 		/*
1445 		 * The address family we are working with starts out as
1446 		 * AF_UNSPEC, but is set to the one specified with the
1447 		 * destination address.
1448 		 *
1449 		 * If the "working" address family that has been set to
1450 		 * something other than AF_UNSPEC, then the address family of
1451 		 * subsequent sockaddrs must either be AF_UNSPEC (for
1452 		 * compatibility with older programs) or must be the same as our
1453 		 * "working" one.
1454 		 *
1455 		 * This code assumes that RTA_DST (1) comes first in the loop.
1456 		 */
1457 		sa = (struct sockaddr *)cp;
1458 		addr_bits = (rtm->rtm_addrs & (1 << i));
1459 		if (addr_bits == 0)
1460 			continue;
1461 		switch (addr_bits) {
1462 		case RTA_DST:
1463 			size = rts_copyfromsockaddr(sa, dst_addrp);
1464 			*afp = sa->sa_family;
1465 			break;
1466 		case RTA_GATEWAY:
1467 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1468 				return (0);
1469 			size = rts_copyfromsockaddr(sa, gw_addrp);
1470 			break;
1471 		case RTA_NETMASK:
1472 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1473 				return (0);
1474 			size = rts_copyfromsockaddr(sa, net_maskp);
1475 			break;
1476 		case RTA_IFP:
1477 			if (sa->sa_family != AF_LINK &&
1478 			    sa->sa_family != AF_UNSPEC)
1479 				return (0);
1480 			sdl = (struct sockaddr_dl *)cp;
1481 			*indexp = sdl->sdl_index;
1482 			size = sizeof (struct sockaddr_dl);
1483 			break;
1484 		case RTA_SRC:
1485 			/* Source address of the incoming packet */
1486 			size = rts_copyfromsockaddr(sa, in_src_addrp);
1487 			*afp = sa->sa_family;
1488 			break;
1489 		case RTA_IFA:
1490 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1491 				return (0);
1492 			size = rts_copyfromsockaddr(sa, if_addrp);
1493 			break;
1494 		case RTA_AUTHOR:
1495 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1496 				return (0);
1497 			size = rts_copyfromsockaddr(sa, authorp);
1498 			break;
1499 		default:
1500 			return (0);
1501 		}
1502 		if (size == 0)
1503 			return (0);
1504 		cp += size;
1505 		found_addrs |= addr_bits;
1506 	}
1507 
1508 	/*
1509 	 * Parse the routing message and look for any security-
1510 	 * related attributes for the route.  For each valid
1511 	 * attribute, allocate/obtain the corresponding kernel
1512 	 * route security attributes.
1513 	 */
1514 	*error = tsol_rtsa_init(rtm, rtsecattr, cp);
1515 	ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1516 
1517 	return (found_addrs);
1518 }
1519 
1520 /*
1521  * Fills the message with the given info.
1522  */
1523 static void
1524 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1525     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1526     const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc)
1527 {
1528 	rt_msghdr_t	*rtm;
1529 	sin_t		*sin;
1530 	size_t		data_size, header_size;
1531 	uchar_t		*cp;
1532 	int		i;
1533 
1534 	ASSERT(mp != NULL);
1535 	ASSERT(sacnt == 0 || gc != NULL);
1536 	/*
1537 	 * First find the type of the message
1538 	 * and its length.
1539 	 */
1540 	header_size = rts_header_msg_size(type);
1541 	/*
1542 	 * Now find the size of the data
1543 	 * that follows the message header.
1544 	 */
1545 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt);
1546 
1547 	rtm = (rt_msghdr_t *)mp->b_rptr;
1548 	mp->b_wptr = &mp->b_rptr[header_size];
1549 	cp = mp->b_wptr;
1550 	bzero(cp, data_size);
1551 	for (i = 0; i < RTA_NUMBITS; i++) {
1552 		sin = (sin_t *)cp;
1553 		switch (rtm_addrs & (1 << i)) {
1554 		case RTA_DST:
1555 			sin->sin_addr.s_addr = dst;
1556 			sin->sin_family = AF_INET;
1557 			cp += sizeof (sin_t);
1558 			break;
1559 		case RTA_GATEWAY:
1560 			sin->sin_addr.s_addr = gateway;
1561 			sin->sin_family = AF_INET;
1562 			cp += sizeof (sin_t);
1563 			break;
1564 		case RTA_NETMASK:
1565 			sin->sin_addr.s_addr = mask;
1566 			sin->sin_family = AF_INET;
1567 			cp += sizeof (sin_t);
1568 			break;
1569 		case RTA_IFP:
1570 			cp += ill_dls_info((struct sockaddr_dl *)cp, ipif);
1571 			break;
1572 		case RTA_IFA:
1573 		case RTA_SRC:
1574 			sin->sin_addr.s_addr = src_addr;
1575 			sin->sin_family = AF_INET;
1576 			cp += sizeof (sin_t);
1577 			break;
1578 		case RTA_AUTHOR:
1579 			sin->sin_addr.s_addr = author;
1580 			sin->sin_family = AF_INET;
1581 			cp += sizeof (sin_t);
1582 			break;
1583 		case RTA_BRD:
1584 			/*
1585 			 * RTA_BRD is used typically to specify a point-to-point
1586 			 * destination address.
1587 			 */
1588 			sin->sin_addr.s_addr = brd_addr;
1589 			sin->sin_family = AF_INET;
1590 			cp += sizeof (sin_t);
1591 			break;
1592 		}
1593 	}
1594 
1595 	if (gc != NULL) {
1596 		rtm_ext_t *rtm_ext;
1597 		struct rtsa_s *rp_dst;
1598 		tsol_rtsecattr_t *rsap;
1599 		int i;
1600 
1601 		ASSERT(gc->gc_grp != NULL);
1602 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1603 		ASSERT(sacnt > 0);
1604 
1605 		rtm_ext = (rtm_ext_t *)cp;
1606 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1607 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt);
1608 
1609 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1610 		rsap->rtsa_cnt = sacnt;
1611 		rp_dst = rsap->rtsa_attr;
1612 
1613 		for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) {
1614 			ASSERT(gc->gc_db != NULL);
1615 			bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1616 		}
1617 		cp = (uchar_t *)rp_dst;
1618 	}
1619 
1620 	mp->b_wptr = cp;
1621 	mp->b_cont = NULL;
1622 	/*
1623 	 * set the fields that are common to
1624 	 * to different messages.
1625 	 */
1626 	rtm->rtm_msglen = (short)(header_size + data_size);
1627 	rtm->rtm_version = RTM_VERSION;
1628 	rtm->rtm_type = (uchar_t)type;
1629 }
1630 
1631 /*
1632  * Allocates and initializes a routing socket message.
1633  */
1634 mblk_t *
1635 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1636 {
1637 	size_t	length;
1638 	mblk_t	*mp;
1639 
1640 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1641 	mp = allocb(length, BPRI_MED);
1642 	if (mp == NULL)
1643 		return (mp);
1644 	bzero(mp->b_rptr, length);
1645 	return (mp);
1646 }
1647 
1648 /*
1649  * Returns the size of the routing
1650  * socket message header size.
1651  */
1652 size_t
1653 rts_header_msg_size(int type)
1654 {
1655 	switch (type) {
1656 	case RTM_DELADDR:
1657 	case RTM_NEWADDR:
1658 		return (sizeof (ifa_msghdr_t));
1659 	case RTM_IFINFO:
1660 		return (sizeof (if_msghdr_t));
1661 	default:
1662 		return (sizeof (rt_msghdr_t));
1663 	}
1664 }
1665 
1666 /*
1667  * Returns the size of the message needed with the given rtm_addrs and family.
1668  *
1669  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1670  * of the same family (currently either AF_INET or AF_INET6).
1671  */
1672 size_t
1673 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1674 {
1675 	int	i;
1676 	size_t	length = 0;
1677 
1678 	for (i = 0; i < RTA_NUMBITS; i++) {
1679 		switch (rtm_addrs & (1 << i)) {
1680 		case RTA_IFP:
1681 			length += sizeof (struct sockaddr_dl);
1682 			break;
1683 		case RTA_DST:
1684 		case RTA_GATEWAY:
1685 		case RTA_NETMASK:
1686 		case RTA_SRC:
1687 		case RTA_IFA:
1688 		case RTA_AUTHOR:
1689 		case RTA_BRD:
1690 			ASSERT(af == AF_INET || af == AF_INET6);
1691 			switch (af) {
1692 			case AF_INET:
1693 				length += sizeof (sin_t);
1694 				break;
1695 			case AF_INET6:
1696 				length += sizeof (sin6_t);
1697 				break;
1698 			}
1699 			break;
1700 		}
1701 	}
1702 	if (sacnt > 0)
1703 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1704 
1705 	return (length);
1706 }
1707 
1708 /*
1709  * This routine is called to generate a message to the routing
1710  * socket indicating that a redirect has occured, a routing lookup
1711  * has failed, or that a protocol has detected timeouts to a particular
1712  * destination. This routine is called for message types RTM_LOSING,
1713  * RTM_REDIRECT, and RTM_MISS.
1714  */
1715 void
1716 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1717     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
1718     ip_stack_t *ipst)
1719 {
1720 	rt_msghdr_t	*rtm;
1721 	mblk_t		*mp;
1722 
1723 	if (rtm_addrs == 0)
1724 		return;
1725 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1726 	if (mp == NULL)
1727 		return;
1728 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1729 	    author, NULL, mp, 0, NULL);
1730 	rtm = (rt_msghdr_t *)mp->b_rptr;
1731 	rtm->rtm_flags = flags;
1732 	rtm->rtm_errno = error;
1733 	rtm->rtm_flags |= RTF_DONE;
1734 	rtm->rtm_addrs = rtm_addrs;
1735 	rts_queue_input(mp, NULL, AF_INET, ipst);
1736 }
1737 
1738 /*
1739  * This routine is called to generate a message to the routing
1740  * socket indicating that the status of a network interface has changed.
1741  * Message type generated RTM_IFINFO.
1742  */
1743 void
1744 ip_rts_ifmsg(const ipif_t *ipif)
1745 {
1746 	if_msghdr_t	*ifm;
1747 	mblk_t		*mp;
1748 	sa_family_t	af;
1749 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1750 
1751 	/*
1752 	 * This message should be generated only
1753 	 * when the physical device is changing
1754 	 * state.
1755 	 */
1756 	if (ipif->ipif_id != 0)
1757 		return;
1758 	if (ipif->ipif_isv6) {
1759 		af = AF_INET6;
1760 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1761 		if (mp == NULL)
1762 			return;
1763 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1764 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1765 		    &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL);
1766 	} else {
1767 		af = AF_INET;
1768 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1769 		if (mp == NULL)
1770 			return;
1771 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp,
1772 		    0, NULL);
1773 	}
1774 	ifm = (if_msghdr_t *)mp->b_rptr;
1775 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1776 	ifm->ifm_flags = ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1777 	    ipif->ipif_ill->ill_phyint->phyint_flags;
1778 	rts_getifdata(&ifm->ifm_data, ipif);
1779 	ifm->ifm_addrs = RTA_IFP;
1780 	rts_queue_input(mp, NULL, af, ipst);
1781 }
1782 
1783 /*
1784  * This is called to generate messages to the routing socket
1785  * indicating a network interface has had addresses associated with it.
1786  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1787  */
1788 void
1789 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif)
1790 {
1791 	int		pass;
1792 	int		ncmd;
1793 	int		rtm_addrs;
1794 	mblk_t		*mp;
1795 	ifa_msghdr_t	*ifam;
1796 	rt_msghdr_t	*rtm;
1797 	sa_family_t	af;
1798 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1799 
1800 	if (ipif->ipif_isv6)
1801 		af = AF_INET6;
1802 	else
1803 		af = AF_INET;
1804 	/*
1805 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1806 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1807 	 */
1808 	for (pass = 1; pass < 3; pass++) {
1809 		if ((cmd == RTM_ADD && pass == 1) ||
1810 		    (cmd == RTM_DELETE && pass == 2)) {
1811 			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
1812 
1813 			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
1814 			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
1815 			if (mp == NULL)
1816 				continue;
1817 			switch (af) {
1818 			case AF_INET:
1819 				rts_fill_msg(ncmd, rtm_addrs, 0,
1820 				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
1821 				    ipif->ipif_pp_dst_addr, 0, ipif, mp,
1822 				    0, NULL);
1823 				break;
1824 			case AF_INET6:
1825 				rts_fill_msg_v6(ncmd, rtm_addrs,
1826 				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
1827 				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
1828 				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1829 				    ipif, mp, 0, NULL);
1830 				break;
1831 			}
1832 			ifam = (ifa_msghdr_t *)mp->b_rptr;
1833 			ifam->ifam_index =
1834 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1835 			ifam->ifam_metric = ipif->ipif_metric;
1836 			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1837 			ifam->ifam_addrs = rtm_addrs;
1838 			rts_queue_input(mp, NULL, af, ipst);
1839 		}
1840 		if ((cmd == RTM_ADD && pass == 2) ||
1841 		    (cmd == RTM_DELETE && pass == 1)) {
1842 			rtm_addrs = (RTA_DST | RTA_NETMASK);
1843 			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
1844 			if (mp == NULL)
1845 				continue;
1846 			switch (af) {
1847 			case AF_INET:
1848 				rts_fill_msg(cmd, rtm_addrs,
1849 				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
1850 				    0, 0, 0, NULL, mp, 0, NULL);
1851 				break;
1852 			case AF_INET6:
1853 				rts_fill_msg_v6(cmd, rtm_addrs,
1854 				    &ipif->ipif_v6lcl_addr,
1855 				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
1856 				    &ipv6_all_zeros, &ipv6_all_zeros,
1857 				    &ipv6_all_zeros, NULL, mp, 0, NULL);
1858 				break;
1859 			}
1860 			rtm = (rt_msghdr_t *)mp->b_rptr;
1861 			rtm->rtm_index =
1862 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1863 			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1864 			rtm->rtm_errno = error;
1865 			if (error == 0)
1866 				rtm->rtm_flags |= RTF_DONE;
1867 			rtm->rtm_addrs = rtm_addrs;
1868 			rts_queue_input(mp, NULL, af, ipst);
1869 		}
1870 	}
1871 }
1872 
1873 /*
1874  * Based on the address family specified in a sockaddr, copy the address field
1875  * into an in6_addr_t.
1876  *
1877  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
1878  * compatibility with programs that leave the family cleared in the sockaddr.
1879  * Callers of rts_copyfromsockaddr should check the family themselves if they
1880  * wish to verify its value.
1881  *
1882  * In the case of AF_INET6, a check is made to ensure that address is not an
1883  * IPv4-mapped address.
1884  */
1885 size_t
1886 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
1887 {
1888 	switch (sa->sa_family) {
1889 	case AF_INET:
1890 	case AF_UNSPEC:
1891 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
1892 		return (sizeof (sin_t));
1893 	case AF_INET6:
1894 		*addrp = ((sin6_t *)sa)->sin6_addr;
1895 		if (IN6_IS_ADDR_V4MAPPED(addrp))
1896 			return (0);
1897 		return (sizeof (sin6_t));
1898 	default:
1899 		return (0);
1900 	}
1901 }
1902