xref: /titanic_51/usr/src/uts/common/inet/ip/ip_rts.c (revision 450b6d2173691fa21e232ce57199dd9925b4da6e)
1 /*
2  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * Copyright (c) 1988, 1991, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
39  */
40 
41 /*
42  * This file contains routines that processes routing socket requests.
43  */
44 
45 #include <sys/types.h>
46 #include <sys/stream.h>
47 #include <sys/stropts.h>
48 #include <sys/ddi.h>
49 #include <sys/cmn_err.h>
50 #include <sys/debug.h>
51 #include <sys/policy.h>
52 #include <sys/zone.h>
53 
54 #include <sys/systm.h>
55 #include <sys/param.h>
56 #include <sys/socket.h>
57 #include <sys/strsun.h>
58 #include <net/if.h>
59 #include <net/route.h>
60 #include <netinet/in.h>
61 #include <net/if_dl.h>
62 #include <netinet/ip6.h>
63 
64 #include <inet/common.h>
65 #include <inet/ip.h>
66 #include <inet/ip6.h>
67 #include <inet/ip_if.h>
68 #include <inet/ip_ire.h>
69 #include <inet/ip_ftable.h>
70 #include <inet/ip_rts.h>
71 
72 #include <inet/ipclassifier.h>
73 
74 #include <sys/tsol/tndb.h>
75 #include <sys/tsol/tnet.h>
76 
77 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
78 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
79 
80 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
81 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
82     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
83     ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *);
84 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
85     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
86     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
87     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error);
88 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
89 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
90 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire,
91     sa_family_t af);
92 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
93 static void	ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
94 
95 /*
96  * Send `mp' to all eligible routing queues.  A queue is ineligible if:
97  *
98  *  1. SO_USELOOPBACK is off and it is not the originating queue.
99  *  2. RTAW_UNDER_IPMP is on and RTSQ_UNDER_IPMP is clear in `flags'.
100  *  3. RTAW_UNDER_IPMP is off and RTSQ_NORMAL is clear in `flags'.
101  *  4. It is not the same address family as `af', and `af' isn't AF_UNSPEC.
102  */
103 void
104 rts_queue_input(mblk_t *mp, conn_t *o_connp, sa_family_t af, uint_t flags,
105     ip_stack_t *ipst)
106 {
107 	mblk_t	*mp1;
108 	conn_t 	*connp, *next_connp;
109 
110 	/*
111 	 * Since we don't have an ill_t here, RTSQ_DEFAULT must already be
112 	 * resolved to one or more of RTSQ_NORMAL|RTSQ_UNDER_IPMP by now.
113 	 */
114 	ASSERT(!(flags & RTSQ_DEFAULT));
115 
116 	mutex_enter(&ipst->ips_rts_clients->connf_lock);
117 	connp = ipst->ips_rts_clients->connf_head;
118 
119 	for (; connp != NULL; connp = next_connp) {
120 		next_connp = connp->conn_next;
121 
122 		/*
123 		 * If there was a family specified when this routing socket was
124 		 * created and it doesn't match the family of the message to
125 		 * copy, then continue.
126 		 */
127 		if ((connp->conn_proto != AF_UNSPEC) &&
128 		    (connp->conn_proto != af))
129 			continue;
130 
131 		/*
132 		 * Queue the message only if the conn_t and flags match.
133 		 */
134 		if (connp->conn_rtaware & RTAW_UNDER_IPMP) {
135 			if (!(flags & RTSQ_UNDER_IPMP))
136 				continue;
137 		} else {
138 			if (!(flags & RTSQ_NORMAL))
139 				continue;
140 		}
141 
142 		/*
143 		 * For the originating queue, we only copy the message upstream
144 		 * if loopback is set.  For others reading on the routing
145 		 * socket, we check if there is room upstream for a copy of the
146 		 * message.
147 		 */
148 		if ((o_connp == connp) && connp->conn_loopback == 0) {
149 			connp = connp->conn_next;
150 			continue;
151 		}
152 		CONN_INC_REF(connp);
153 		mutex_exit(&ipst->ips_rts_clients->connf_lock);
154 		/* Pass to rts_input */
155 		if ((IPCL_IS_NONSTR(connp) && !PROTO_FLOW_CNTRLD(connp))||
156 		    (!IPCL_IS_NONSTR(connp) &&
157 		    canputnext(CONNP_TO_RQ(connp)))) {
158 			mp1 = dupmsg(mp);
159 			if (mp1 == NULL)
160 				mp1 = copymsg(mp);
161 			if (mp1 != NULL)
162 				(connp->conn_recv)(connp, mp1, NULL);
163 		}
164 
165 		mutex_enter(&ipst->ips_rts_clients->connf_lock);
166 		/* reload next_connp since conn_next may have changed */
167 		next_connp = connp->conn_next;
168 		CONN_DEC_REF(connp);
169 	}
170 	mutex_exit(&ipst->ips_rts_clients->connf_lock);
171 	freemsg(mp);
172 }
173 
174 /*
175  * Takes an ire and sends an ack to all the routing sockets. This
176  * routine is used
177  * - when a route is created/deleted through the ioctl interface.
178  * - when ire_expire deletes a stale redirect
179  */
180 void
181 ip_rts_rtmsg(int type, ire_t *ire, int error, ip_stack_t *ipst)
182 {
183 	mblk_t		*mp;
184 	rt_msghdr_t	*rtm;
185 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
186 	sa_family_t	af;
187 	in6_addr_t	gw_addr_v6;
188 
189 	if (ire == NULL)
190 		return;
191 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
192 	    ire->ire_ipversion == IPV6_VERSION);
193 
194 	if (ire->ire_flags & RTF_SETSRC)
195 		rtm_addrs |= RTA_SRC;
196 
197 	switch (ire->ire_ipversion) {
198 	case IPV4_VERSION:
199 		af = AF_INET;
200 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
201 		if (mp == NULL)
202 			return;
203 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
204 		    ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp,
205 		    0, NULL);
206 		break;
207 	case IPV6_VERSION:
208 		af = AF_INET6;
209 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
210 		if (mp == NULL)
211 			return;
212 		mutex_enter(&ire->ire_lock);
213 		gw_addr_v6 = ire->ire_gateway_addr_v6;
214 		mutex_exit(&ire->ire_lock);
215 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
216 		    &ire->ire_mask_v6, &gw_addr_v6,
217 		    &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
218 		    NULL, mp, 0, NULL);
219 		break;
220 	}
221 	rtm = (rt_msghdr_t *)mp->b_rptr;
222 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
223 	rtm->rtm_addrs = rtm_addrs;
224 	rtm->rtm_flags = ire->ire_flags;
225 	if (error != 0)
226 		rtm->rtm_errno = error;
227 	else
228 		rtm->rtm_flags |= RTF_DONE;
229 	rts_queue_input(mp, NULL, af, RTSQ_ALL, ipst);
230 }
231 
232 /* ARGSUSED */
233 static void
234 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy)
235 {
236 	(void) ip_rts_request(q, mp, DB_CRED(mp));
237 }
238 
239 /*
240  * This is a call from the RTS module
241  * indicating that this is a Routing Socket
242  * Stream. Insert this conn_t in routing
243  * socket client list.
244  */
245 void
246 ip_rts_register(conn_t *connp)
247 {
248 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
249 
250 	connp->conn_loopback = 1;
251 	ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
252 }
253 
254 /*
255  * This is a call from the RTS module indicating that it is closing.
256  */
257 void
258 ip_rts_unregister(conn_t *connp)
259 {
260 	ipcl_hash_remove(connp);
261 }
262 
263 /*
264  * Processes requests received on a routing socket. It extracts all the
265  * arguments and calls the appropriate function to process the request.
266  *
267  * RTA_SRC bit flag requests are sent by 'route -setsrc'.
268  *
269  * In general, this function does not consume the message supplied but rather
270  * sends the message upstream with an appropriate UNIX errno.
271  *
272  * We may need to restart this operation if the ipif cannot be looked up
273  * due to an exclusive operation that is currently in progress. The restart
274  * entry point is ip_rts_request_retry. While the request is enqueud in the
275  * ipsq the ioctl could be aborted and the conn close. To ensure that we don't
276  * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is
277  * released at the completion of the rts ioctl at the end of this function
278  * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and
279  * conn close occurs in conn_ioctl_cleanup.
280  */
281 int
282 ip_rts_request_common(queue_t *q, mblk_t *mp, conn_t *connp, cred_t *ioc_cr)
283 {
284 	rt_msghdr_t	*rtm = NULL;
285 	in6_addr_t	dst_addr_v6;
286 	in6_addr_t	src_addr_v6;
287 	in6_addr_t	gw_addr_v6;
288 	in6_addr_t	net_mask_v6;
289 	in6_addr_t	author_v6;
290 	in6_addr_t	if_addr_v6;
291 	mblk_t		*mp1, *ioc_mp = mp;
292 	ire_t		*ire = NULL;
293 	ire_t		*sire = NULL;
294 	int		error = 0;
295 	int		match_flags = MATCH_IRE_DSTONLY;
296 	int		match_flags_local = MATCH_IRE_TYPE | MATCH_IRE_GW;
297 	int		found_addrs;
298 	sa_family_t	af;
299 	ipaddr_t	dst_addr;
300 	ipaddr_t	gw_addr;
301 	ipaddr_t	src_addr;
302 	ipaddr_t	net_mask;
303 	ushort_t	index;
304 	ipif_t		*ipif = NULL;
305 	ipif_t		*tmp_ipif = NULL;
306 	IOCP		iocp = (IOCP)mp->b_rptr;
307 	boolean_t	gcgrp_xtraref = B_FALSE;
308 	tsol_gcgrp_addr_t ga;
309 	tsol_rtsecattr_t rtsecattr;
310 	struct rtsa_s	*rtsap = NULL;
311 	tsol_gcgrp_t	*gcgrp = NULL;
312 	tsol_gc_t	*gc = NULL;
313 	ts_label_t	*tsl = NULL;
314 	zoneid_t	zoneid;
315 	ip_stack_t	*ipst;
316 
317 	ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
318 
319 	zoneid = connp->conn_zoneid;
320 	ipst = connp->conn_netstack->netstack_ip;
321 
322 	ASSERT(mp->b_cont != NULL);
323 	/* ioc_mp holds mp */
324 	mp = mp->b_cont;
325 
326 	/*
327 	 * The Routing Socket data starts on
328 	 * next block. If there is no next block
329 	 * this is an indication from routing module
330 	 * that it is a routing socket stream queue.
331 	 * We need to support that for compatibility with SDP since
332 	 * it has a contract private interface to use IP_IOC_RTS_REQUEST.
333 	 */
334 	if (mp->b_cont == NULL) {
335 		/*
336 		 * This is a message from SDP
337 		 * indicating that this is a Routing Socket
338 		 * Stream. Insert this conn_t in routing
339 		 * socket client list.
340 		 */
341 		connp->conn_loopback = 1;
342 		ipcl_hash_insert_wildcard(ipst->ips_rts_clients, connp);
343 		goto done;
344 	}
345 	mp1 = dupmsg(mp->b_cont);
346 	if (mp1 == NULL) {
347 		error  = ENOBUFS;
348 		goto done;
349 	}
350 	mp = mp1;
351 
352 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
353 		freemsg(mp);
354 		error =  EINVAL;
355 		goto done;
356 	}
357 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
358 		freemsg(mp);
359 		error = EINVAL;
360 		goto done;
361 	}
362 
363 	/*
364 	 * Check the routing message for basic consistency including the
365 	 * version number and that the number of octets written is the same
366 	 * as specified by the rtm_msglen field.
367 	 *
368 	 * At this point, an error can be delivered back via rtm_errno.
369 	 */
370 	rtm = (rt_msghdr_t *)mp->b_rptr;
371 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
372 		error = EINVAL;
373 		goto done;
374 	}
375 	if (rtm->rtm_version != RTM_VERSION) {
376 		error = EPROTONOSUPPORT;
377 		goto done;
378 	}
379 
380 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
381 	if (rtm->rtm_type != RTM_GET &&
382 	    rtm->rtm_type != RTM_RESOLVE &&
383 	    (ioc_cr == NULL ||
384 	    secpolicy_ip_config(ioc_cr, B_FALSE) != 0)) {
385 		error = EPERM;
386 		goto done;
387 	}
388 
389 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
390 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &af, &rtsecattr,
391 	    &error);
392 
393 	if (error != 0)
394 		goto done;
395 
396 	if ((found_addrs & RTA_DST) == 0) {
397 		error = EINVAL;
398 		goto done;
399 	}
400 
401 	/*
402 	 * Based on the address family of the destination address, determine
403 	 * the destination, gateway and netmask and return the appropriate error
404 	 * if an unknown address family was specified (following the errno
405 	 * values that 4.4BSD-Lite2 returns.)
406 	 */
407 	switch (af) {
408 	case AF_INET:
409 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
410 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
411 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
412 		if (((found_addrs & RTA_NETMASK) == 0) ||
413 		    (rtm->rtm_flags & RTF_HOST))
414 			net_mask = IP_HOST_MASK;
415 		else
416 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
417 		break;
418 	case AF_INET6:
419 		if (((found_addrs & RTA_NETMASK) == 0) ||
420 		    (rtm->rtm_flags & RTF_HOST))
421 			net_mask_v6 = ipv6_all_ones;
422 		break;
423 	default:
424 		/*
425 		 * These errno values are meant to be compatible with
426 		 * 4.4BSD-Lite2 for the given message types.
427 		 */
428 		switch (rtm->rtm_type) {
429 		case RTM_ADD:
430 		case RTM_DELETE:
431 			error = ESRCH;
432 			goto done;
433 		case RTM_GET:
434 		case RTM_CHANGE:
435 			error = EAFNOSUPPORT;
436 			goto done;
437 		default:
438 			error = EOPNOTSUPP;
439 			goto done;
440 		}
441 	}
442 
443 	/*
444 	 * At this point, the address family must be something known.
445 	 */
446 	ASSERT(af == AF_INET || af == AF_INET6);
447 
448 	if (index != 0) {
449 		ill_t   *ill;
450 lookup:
451 		/*
452 		 * IPC must be refheld somewhere in ip_wput_nondata or
453 		 * ip_wput_ioctl etc... and cleaned up if ioctl is killed.
454 		 * If ILL_CHANGING the request is queued in the ipsq.
455 		 */
456 		ill = ill_lookup_on_ifindex(index, af == AF_INET6,
457 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error,
458 		    ipst);
459 		if (ill == NULL) {
460 			if (error != EINPROGRESS)
461 				error = EINVAL;
462 			goto done;
463 		}
464 
465 		/*
466 		 * Since all interfaces in an IPMP group must be equivalent,
467 		 * we prevent changes to a specific underlying interface's
468 		 * routing configuration.  However, for backward compatibility,
469 		 * we intepret a request to add a route on an underlying
470 		 * interface as a request to add a route on its IPMP interface.
471 		 */
472 		if (IS_UNDER_IPMP(ill)) {
473 			switch (rtm->rtm_type) {
474 			case RTM_CHANGE:
475 			case RTM_DELETE:
476 				ill_refrele(ill);
477 				error = EINVAL;
478 				goto done;
479 			case RTM_ADD:
480 				index = ipmp_ill_get_ipmp_ifindex(ill);
481 				ill_refrele(ill);
482 				if (index == 0) {
483 					error = EINVAL;
484 					goto done;
485 				}
486 				goto lookup;
487 			}
488 		}
489 
490 		ipif = ipif_get_next_ipif(NULL, ill);
491 		ill_refrele(ill);
492 		match_flags |= MATCH_IRE_ILL;
493 	}
494 
495 	/*
496 	 * If a netmask was supplied in the message, then subsequent route
497 	 * lookups will attempt to match on the netmask as well.
498 	 */
499 	if ((found_addrs & RTA_NETMASK) != 0)
500 		match_flags |= MATCH_IRE_MASK;
501 
502 	/*
503 	 * We only process any passed-in route security attributes for
504 	 * either RTM_ADD or RTM_CHANGE message; We overload them
505 	 * to do an RTM_GET as a different label; ignore otherwise.
506 	 */
507 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE ||
508 	    rtm->rtm_type == RTM_GET) {
509 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
510 		if (rtsecattr.rtsa_cnt > 0)
511 			rtsap = &rtsecattr.rtsa_attr[0];
512 	}
513 
514 	switch (rtm->rtm_type) {
515 	case RTM_ADD:
516 		/* if we are adding a route, gateway is a must */
517 		if ((found_addrs & RTA_GATEWAY) == 0) {
518 			error = EINVAL;
519 			goto done;
520 		}
521 
522 		/* Multirouting does not support net routes. */
523 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
524 		    RTF_MULTIRT) {
525 			error = EADDRNOTAVAIL;
526 			goto done;
527 		}
528 
529 		/*
530 		 * Multirouting and user-specified source addresses
531 		 * do not support interface based routing.
532 		 * Assigning a source address to an interface based
533 		 * route is achievable by plumbing a new ipif and
534 		 * setting up the interface route via this ipif,
535 		 * though.
536 		 */
537 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
538 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
539 				error = EADDRNOTAVAIL;
540 				goto done;
541 			}
542 		}
543 
544 		switch (af) {
545 		case AF_INET:
546 			if (src_addr != INADDR_ANY) {
547 				/*
548 				 * The RTF_SETSRC flag is present, check that
549 				 * the supplied src address is not the loopback
550 				 * address. This would produce martian packets.
551 				 */
552 				if (src_addr == htonl(INADDR_LOOPBACK)) {
553 					error = EINVAL;
554 					goto done;
555 				}
556 				/*
557 				 * Also check that the supplied address is a
558 				 * valid, local one.
559 				 */
560 				tmp_ipif = ipif_lookup_addr(src_addr, NULL,
561 				    ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
562 				    ip_rts_request_retry, &error, ipst);
563 				if (tmp_ipif == NULL) {
564 					if (error != EINPROGRESS)
565 						error = EADDRNOTAVAIL;
566 					goto done;
567 				}
568 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
569 				    (tmp_ipif->ipif_flags &
570 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
571 					error = EINVAL;
572 					goto done;
573 				}
574 			} else {
575 				/*
576 				 * The RTF_SETSRC modifier must be associated
577 				 * to a non-null source address.
578 				 */
579 				if (rtm->rtm_flags & RTF_SETSRC) {
580 					error = EINVAL;
581 					goto done;
582 				}
583 			}
584 
585 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
586 			    rtm->rtm_flags, ipif, &ire, B_FALSE,
587 			    WR(q), ioc_mp, ip_rts_request_retry,
588 			    rtsap, ipst);
589 			if (ipif != NULL)
590 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
591 			break;
592 		case AF_INET6:
593 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
594 				/*
595 				 * The RTF_SETSRC flag is present, check that
596 				 * the supplied src address is not the loopback
597 				 * address. This would produce martian packets.
598 				 */
599 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
600 					error = EINVAL;
601 					goto done;
602 				}
603 				/*
604 				 * Also check that the supplied address is a
605 				 * valid, local one.
606 				 */
607 				tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6,
608 				    NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
609 				    ip_rts_request_retry, &error, ipst);
610 				if (tmp_ipif == NULL) {
611 					if (error != EINPROGRESS)
612 						error = EADDRNOTAVAIL;
613 					goto done;
614 				}
615 
616 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
617 				    (tmp_ipif->ipif_flags &
618 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
619 					error = EINVAL;
620 					goto done;
621 				}
622 
623 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
624 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
625 				    ipif, &ire, WR(q), ioc_mp,
626 				    ip_rts_request_retry, rtsap, ipst);
627 				break;
628 			}
629 			/*
630 			 * The RTF_SETSRC modifier must be associated
631 			 * to a non-null source address.
632 			 */
633 			if (rtm->rtm_flags & RTF_SETSRC) {
634 				error = EINVAL;
635 				goto done;
636 			}
637 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
638 			    &gw_addr_v6, NULL, rtm->rtm_flags,
639 			    ipif, &ire, WR(q), ioc_mp,
640 			    ip_rts_request_retry, rtsap, ipst);
641 			if (ipif != NULL)
642 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
643 			break;
644 		}
645 		if (error != 0)
646 			goto done;
647 		ASSERT(ire != NULL);
648 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
649 		break;
650 	case RTM_DELETE:
651 		/* if we are deleting a route, gateway is a must */
652 		if ((found_addrs & RTA_GATEWAY) == 0) {
653 			error = EINVAL;
654 			goto done;
655 		}
656 		/*
657 		 * The RTF_SETSRC modifier does not make sense
658 		 * when deleting a route.
659 		 */
660 		if (rtm->rtm_flags & RTF_SETSRC) {
661 			error = EINVAL;
662 			goto done;
663 		}
664 
665 		switch (af) {
666 		case AF_INET:
667 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
668 			    found_addrs, rtm->rtm_flags, ipif, B_FALSE,
669 			    WR(q), ioc_mp, ip_rts_request_retry, ipst);
670 			break;
671 		case AF_INET6:
672 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
673 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
674 			    WR(q), ioc_mp, ip_rts_request_retry, ipst);
675 			break;
676 		}
677 		break;
678 	case RTM_GET:
679 	case RTM_CHANGE:
680 		/*
681 		 * In the case of RTM_GET, the forwarding table should be
682 		 * searched recursively with default being matched if the
683 		 * specific route doesn't exist.  Also, if a gateway was
684 		 * specified then the gateway address must also be matched.
685 		 *
686 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
687 		 * is the new gateway address so matching on the gateway address
688 		 * is not done.  This can lead to ambiguity when looking up the
689 		 * route to change as usually only the destination (and netmask,
690 		 * if supplied) is used for the lookup.  However if a RTA_IFP
691 		 * sockaddr is also supplied, it can disambiguate which route to
692 		 * change provided the ambigous routes are tied to distinct
693 		 * ill's (or interface indices).  If the routes are not tied to
694 		 * any particular interfaces (for example, with traditional
695 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
696 		 * it won't match any such routes.
697 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
698 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
699 		 */
700 		if (((found_addrs & RTA_SRC) != 0) &&
701 		    ((rtm->rtm_type == RTM_GET) ||
702 		    !(rtm->rtm_flags & RTF_SETSRC))) {
703 			error = EOPNOTSUPP;
704 			goto done;
705 		}
706 
707 		if (rtm->rtm_type == RTM_GET) {
708 			match_flags |=
709 			    (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE |
710 			    MATCH_IRE_SECATTR);
711 			match_flags_local |= MATCH_IRE_SECATTR;
712 			if ((found_addrs & RTA_GATEWAY) != 0)
713 				match_flags |= MATCH_IRE_GW;
714 			if (ioc_cr)
715 				tsl = crgetlabel(ioc_cr);
716 			if (rtsap != NULL) {
717 				if (rtsa_validate(rtsap) != 0) {
718 					error = EINVAL;
719 					goto done;
720 				}
721 				if (tsl != NULL &&
722 				    crgetzoneid(ioc_cr) != GLOBAL_ZONEID &&
723 				    (tsl->tsl_doi != rtsap->rtsa_doi ||
724 				    !bldominates(&tsl->tsl_label,
725 				    &rtsap->rtsa_slrange.lower_bound))) {
726 					error = EPERM;
727 					goto done;
728 				}
729 				tsl = labelalloc(
730 				    &rtsap->rtsa_slrange.lower_bound,
731 				    rtsap->rtsa_doi, KM_NOSLEEP);
732 			}
733 		}
734 		if (rtm->rtm_type == RTM_CHANGE) {
735 			if ((found_addrs & RTA_GATEWAY) &&
736 			    (rtm->rtm_flags & RTF_SETSRC)) {
737 				/*
738 				 * Do not want to change the gateway,
739 				 * but rather the source address.
740 				 */
741 				match_flags |= MATCH_IRE_GW;
742 			}
743 		}
744 
745 		/*
746 		 * If the netmask is all ones (either as supplied or as derived
747 		 * above), then first check for an IRE_LOOPBACK or
748 		 * IRE_LOCAL entry.
749 		 *
750 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
751 		 * entry, then look in the forwarding table.
752 		 */
753 		switch (af) {
754 		case AF_INET:
755 			if (net_mask == IP_HOST_MASK) {
756 				ire = ire_ctable_lookup(dst_addr, gw_addr,
757 				    IRE_LOCAL | IRE_LOOPBACK, NULL, zoneid,
758 				    tsl, match_flags_local, ipst);
759 				/*
760 				 * If we found an IRE_LOCAL, make sure
761 				 * it is one that would be used by this
762 				 * zone to send packets.
763 				 */
764 				if (ire != NULL &&
765 				    ire->ire_type == IRE_LOCAL &&
766 				    ipst->ips_ip_restrict_interzone_loopback &&
767 				    !ire_local_ok_across_zones(ire,
768 				    zoneid, &dst_addr, tsl, ipst)) {
769 					ire_refrele(ire);
770 					ire = NULL;
771 				}
772 			}
773 			if (ire == NULL) {
774 				ire = ire_ftable_lookup(dst_addr, net_mask,
775 				    gw_addr, 0, ipif, &sire, zoneid, 0,
776 				    tsl, match_flags, ipst);
777 			}
778 			break;
779 		case AF_INET6:
780 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
781 				ire = ire_ctable_lookup_v6(&dst_addr_v6,
782 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
783 				    zoneid, tsl, match_flags_local, ipst);
784 				/*
785 				 * If we found an IRE_LOCAL, make sure
786 				 * it is one that would be used by this
787 				 * zone to send packets.
788 				 */
789 				if (ire != NULL &&
790 				    ire->ire_type == IRE_LOCAL &&
791 				    ipst->ips_ip_restrict_interzone_loopback &&
792 				    !ire_local_ok_across_zones(ire,
793 				    zoneid, (void *)&dst_addr_v6, tsl, ipst)) {
794 					ire_refrele(ire);
795 					ire = NULL;
796 				}
797 			}
798 			if (ire == NULL) {
799 				ire = ire_ftable_lookup_v6(&dst_addr_v6,
800 				    &net_mask_v6, &gw_addr_v6, 0, ipif, &sire,
801 				    zoneid, 0, tsl, match_flags, ipst);
802 			}
803 			break;
804 		}
805 		if (tsl != NULL && tsl != crgetlabel(ioc_cr))
806 			label_rele(tsl);
807 
808 		if (ire == NULL) {
809 			error = ESRCH;
810 			goto done;
811 		}
812 		/* we know the IRE before we come here */
813 		switch (rtm->rtm_type) {
814 		case RTM_GET:
815 			mp1 = rts_rtmget(mp, ire, sire, af);
816 			if (mp1 == NULL) {
817 				error = ENOBUFS;
818 				goto done;
819 			}
820 			freemsg(mp);
821 			mp = mp1;
822 			rtm = (rt_msghdr_t *)mp->b_rptr;
823 			break;
824 		case RTM_CHANGE:
825 			/*
826 			 * Do not allow to the multirouting state of a route
827 			 * to be changed. This aims to prevent undesirable
828 			 * stages where both multirt and non-multirt routes
829 			 * for the same destination are declared.
830 			 */
831 			if ((ire->ire_flags & RTF_MULTIRT) !=
832 			    (rtm->rtm_flags & RTF_MULTIRT)) {
833 				error = EINVAL;
834 				goto done;
835 			}
836 			/*
837 			 * Note that we do not need to do
838 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
839 			 * in metrics or gateway will not affect existing
840 			 * routes since it does not create a more specific
841 			 * route.
842 			 */
843 			switch (af) {
844 			case AF_INET:
845 				ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
846 				if ((found_addrs & RTA_GATEWAY) != 0 &&
847 				    (ire->ire_gateway_addr != gw_addr)) {
848 					ire->ire_gateway_addr = gw_addr;
849 				}
850 
851 				if (rtsap != NULL) {
852 					ga.ga_af = AF_INET;
853 					IN6_IPADDR_TO_V4MAPPED(
854 					    ire->ire_gateway_addr, &ga.ga_addr);
855 
856 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
857 					if (gcgrp == NULL) {
858 						error = ENOMEM;
859 						goto done;
860 					}
861 				}
862 
863 				if ((found_addrs & RTA_SRC) != 0 &&
864 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
865 				    (ire->ire_src_addr != src_addr)) {
866 
867 					if (src_addr != INADDR_ANY) {
868 						/*
869 						 * The RTF_SETSRC flag is
870 						 * present, check that the
871 						 * supplied src address is not
872 						 * the loopback address. This
873 						 * would produce martian
874 						 * packets.
875 						 */
876 						if (src_addr ==
877 						    htonl(INADDR_LOOPBACK)) {
878 							error = EINVAL;
879 							goto done;
880 						}
881 						/*
882 						 * Also check that the the
883 						 * supplied addr is a valid
884 						 * local address.
885 						 */
886 						tmp_ipif = ipif_lookup_addr(
887 						    src_addr, NULL, ALL_ZONES,
888 						    WR(q), ioc_mp,
889 						    ip_rts_request_retry,
890 						    &error, ipst);
891 						if (tmp_ipif == NULL) {
892 							error = (error ==
893 							    EINPROGRESS) ?
894 							    error :
895 							    EADDRNOTAVAIL;
896 							goto done;
897 						}
898 
899 						if (!(tmp_ipif->ipif_flags &
900 						    IPIF_UP) ||
901 						    (tmp_ipif->ipif_flags &
902 						    (IPIF_NOLOCAL |
903 						    IPIF_ANYCAST))) {
904 							error = EINVAL;
905 							goto done;
906 						}
907 						ire->ire_flags |= RTF_SETSRC;
908 					} else {
909 						ire->ire_flags &= ~RTF_SETSRC;
910 					}
911 					ire->ire_src_addr = src_addr;
912 				}
913 				break;
914 			case AF_INET6:
915 				ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
916 				mutex_enter(&ire->ire_lock);
917 				if ((found_addrs & RTA_GATEWAY) != 0 &&
918 				    !IN6_ARE_ADDR_EQUAL(
919 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
920 					ire->ire_gateway_addr_v6 = gw_addr_v6;
921 				}
922 
923 				if (rtsap != NULL) {
924 					ga.ga_af = AF_INET6;
925 					ga.ga_addr = ire->ire_gateway_addr_v6;
926 
927 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
928 					if (gcgrp == NULL) {
929 						error = ENOMEM;
930 						goto done;
931 					}
932 				}
933 
934 				if ((found_addrs & RTA_SRC) != 0 &&
935 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
936 				    !IN6_ARE_ADDR_EQUAL(
937 				    &ire->ire_src_addr_v6, &src_addr_v6)) {
938 
939 					if (!IN6_IS_ADDR_UNSPECIFIED(
940 					    &src_addr_v6)) {
941 						/*
942 						 * The RTF_SETSRC flag is
943 						 * present, check that the
944 						 * supplied src address is not
945 						 * the loopback address. This
946 						 * would produce martian
947 						 * packets.
948 						 */
949 						if (IN6_IS_ADDR_LOOPBACK(
950 						    &src_addr_v6)) {
951 							mutex_exit(
952 							    &ire->ire_lock);
953 							error = EINVAL;
954 							goto done;
955 						}
956 						/*
957 						 * Also check that the the
958 						 * supplied addr is a valid
959 						 * local address.
960 						 */
961 						tmp_ipif = ipif_lookup_addr_v6(
962 						    &src_addr_v6, NULL,
963 						    ALL_ZONES,
964 						    CONNP_TO_WQ(connp), ioc_mp,
965 						    ip_rts_request_retry,
966 						    &error, ipst);
967 						if (tmp_ipif == NULL) {
968 							mutex_exit(
969 							    &ire->ire_lock);
970 							error = (error ==
971 							    EINPROGRESS) ?
972 							    error :
973 							    EADDRNOTAVAIL;
974 							goto done;
975 						}
976 						if (!(tmp_ipif->ipif_flags &
977 						    IPIF_UP) ||
978 						    (tmp_ipif->ipif_flags &
979 						    (IPIF_NOLOCAL |
980 						    IPIF_ANYCAST))) {
981 							mutex_exit(
982 							    &ire->ire_lock);
983 							error = EINVAL;
984 							goto done;
985 						}
986 						ire->ire_flags |= RTF_SETSRC;
987 					} else {
988 						ire->ire_flags &= ~RTF_SETSRC;
989 					}
990 					ire->ire_src_addr_v6 = src_addr_v6;
991 				}
992 				mutex_exit(&ire->ire_lock);
993 				break;
994 			}
995 
996 			if (rtsap != NULL) {
997 				in_addr_t ga_addr4;
998 
999 				ASSERT(gcgrp != NULL);
1000 
1001 				/*
1002 				 * Create and add the security attribute to
1003 				 * prefix IRE; it will add a reference to the
1004 				 * group upon allocating a new entry.  If it
1005 				 * finds an already-existing entry for the
1006 				 * security attribute, it simply returns it
1007 				 * and no new group reference is made.
1008 				 */
1009 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
1010 				if (gc == NULL ||
1011 				    (error = tsol_ire_init_gwattr(ire,
1012 				    ire->ire_ipversion, gc, NULL)) != 0) {
1013 					if (gc != NULL) {
1014 						GC_REFRELE(gc);
1015 					} else {
1016 						/* gc_create failed */
1017 						error = ENOMEM;
1018 					}
1019 					goto done;
1020 				}
1021 
1022 				/*
1023 				 * Now delete any existing gateway IRE caches
1024 				 * as well as all caches using the gateway,
1025 				 * and allow them to be created on demand
1026 				 * through ip_newroute{_v6}.
1027 				 */
1028 				IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4);
1029 				if (af == AF_INET) {
1030 					ire_clookup_delete_cache_gw(
1031 					    ga_addr4, ALL_ZONES, ipst);
1032 				} else {
1033 					ire_clookup_delete_cache_gw_v6(
1034 					    &ga.ga_addr, ALL_ZONES, ipst);
1035 				}
1036 			}
1037 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
1038 			break;
1039 		}
1040 		break;
1041 	default:
1042 		error = EOPNOTSUPP;
1043 		break;
1044 	}
1045 done:
1046 	if (ire != NULL)
1047 		ire_refrele(ire);
1048 	if (sire != NULL)
1049 		ire_refrele(sire);
1050 	if (ipif != NULL)
1051 		ipif_refrele(ipif);
1052 	if (tmp_ipif != NULL)
1053 		ipif_refrele(tmp_ipif);
1054 
1055 	if (gcgrp_xtraref)
1056 		GCGRP_REFRELE(gcgrp);
1057 
1058 	if (error == EINPROGRESS) {
1059 		if (rtm != NULL)
1060 			freemsg(mp);
1061 		return (error);
1062 	}
1063 	if (rtm != NULL) {
1064 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1065 		if (error != 0) {
1066 			rtm->rtm_errno = error;
1067 			/* Send error ACK */
1068 			ip1dbg(("ip_rts_request: error %d\n", error));
1069 		} else {
1070 			rtm->rtm_flags |= RTF_DONE;
1071 			/* OK ACK already set up by caller except this */
1072 			ip2dbg(("ip_rts_request: OK ACK\n"));
1073 		}
1074 		rts_queue_input(mp, connp, af, RTSQ_ALL, ipst);
1075 	}
1076 
1077 	iocp->ioc_error = error;
1078 	ioc_mp->b_datap->db_type = M_IOCACK;
1079 	if (iocp->ioc_error != 0)
1080 		iocp->ioc_count = 0;
1081 	(connp->conn_recv)(connp, ioc_mp, NULL);
1082 
1083 	/* conn was refheld in ip_wput_ioctl. */
1084 	CONN_OPER_PENDING_DONE(connp);
1085 
1086 	return (error);
1087 }
1088 
1089 int
1090 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
1091 {
1092 	return (ip_rts_request_common(q, mp, Q_TO_CONN(q), ioc_cr));
1093 }
1094 
1095 /*
1096  * Build a reply to the RTM_GET request contained in the given message block
1097  * using the retrieved IRE of the destination address, the parent IRE (if it
1098  * exists) and the address family.
1099  *
1100  * Returns a pointer to a message block containing the reply if successful,
1101  * otherwise NULL is returned.
1102  */
1103 static mblk_t *
1104 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af)
1105 {
1106 	rt_msghdr_t	*rtm;
1107 	rt_msghdr_t	*new_rtm;
1108 	mblk_t		*new_mp;
1109 	int		rtm_addrs;
1110 	int		rtm_flags;
1111 	in6_addr_t	gw_addr_v6;
1112 	tsol_ire_gw_secattr_t *attrp = NULL;
1113 	tsol_gc_t	*gc = NULL;
1114 	tsol_gcgrp_t	*gcgrp = NULL;
1115 	int		sacnt = 0;
1116 
1117 	ASSERT(ire->ire_ipif != NULL);
1118 	rtm = (rt_msghdr_t *)mp->b_rptr;
1119 
1120 	if (sire != NULL && sire->ire_gw_secattr != NULL)
1121 		attrp = sire->ire_gw_secattr;
1122 	else if (ire->ire_gw_secattr != NULL)
1123 		attrp = ire->ire_gw_secattr;
1124 
1125 	if (attrp != NULL) {
1126 		mutex_enter(&attrp->igsa_lock);
1127 		if ((gc = attrp->igsa_gc) != NULL) {
1128 			gcgrp = gc->gc_grp;
1129 			ASSERT(gcgrp != NULL);
1130 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1131 			sacnt = 1;
1132 		} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {
1133 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1134 			gc = gcgrp->gcgrp_head;
1135 			sacnt = gcgrp->gcgrp_count;
1136 		}
1137 		mutex_exit(&attrp->igsa_lock);
1138 
1139 		/* do nothing if there's no gc to report */
1140 		if (gc == NULL) {
1141 			ASSERT(sacnt == 0);
1142 			if (gcgrp != NULL) {
1143 				/* we might as well drop the lock now */
1144 				rw_exit(&gcgrp->gcgrp_rwlock);
1145 				gcgrp = NULL;
1146 			}
1147 			attrp = NULL;
1148 		}
1149 
1150 		ASSERT(gc == NULL || (gcgrp != NULL &&
1151 		    RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)));
1152 	}
1153 	ASSERT(sacnt == 0 || gc != NULL);
1154 
1155 	/*
1156 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1157 	 *
1158 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1159 	 * RTA_IFP and RTA_IFA if either is defined, and also
1160 	 * returns RTA_BRD if the appropriate interface is
1161 	 * point-to-point.
1162 	 */
1163 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1164 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
1165 		rtm_addrs |= (RTA_IFP | RTA_IFA);
1166 		if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT)
1167 			rtm_addrs |= RTA_BRD;
1168 	}
1169 
1170 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt);
1171 	if (new_mp == NULL) {
1172 		if (gcgrp != NULL)
1173 			rw_exit(&gcgrp->gcgrp_rwlock);
1174 		return (NULL);
1175 	}
1176 
1177 	/*
1178 	 * We set the destination address, gateway address,
1179 	 * netmask and flags in the RTM_GET response depending
1180 	 * on whether we found a parent IRE or not.
1181 	 * In particular, if we did find a parent IRE during the
1182 	 * recursive search, use that IRE's gateway address.
1183 	 * Otherwise, we use the IRE's source address for the
1184 	 * gateway address.
1185 	 */
1186 	ASSERT(af == AF_INET || af == AF_INET6);
1187 	switch (af) {
1188 	case AF_INET:
1189 		if (sire == NULL) {
1190 			rtm_flags = ire->ire_flags;
1191 			rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1192 			    ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr,
1193 			    ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif,
1194 			    new_mp, sacnt, gc);
1195 		} else {
1196 			if (sire->ire_flags & RTF_SETSRC)
1197 				rtm_addrs |= RTA_SRC;
1198 
1199 			rtm_flags = sire->ire_flags;
1200 			rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr,
1201 			    sire->ire_mask, sire->ire_gateway_addr,
1202 			    (sire->ire_flags & RTF_SETSRC) ?
1203 			    sire->ire_src_addr : ire->ire_src_addr,
1204 			    ire->ire_ipif->ipif_pp_dst_addr,
1205 			    0, ire->ire_ipif, new_mp, sacnt, gc);
1206 		}
1207 		break;
1208 	case AF_INET6:
1209 		if (sire == NULL) {
1210 			rtm_flags = ire->ire_flags;
1211 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1212 			    &ire->ire_mask_v6, &ire->ire_src_addr_v6,
1213 			    &ire->ire_src_addr_v6,
1214 			    &ire->ire_ipif->ipif_v6pp_dst_addr,
1215 			    &ipv6_all_zeros, ire->ire_ipif, new_mp,
1216 			    sacnt, gc);
1217 		} else {
1218 			if (sire->ire_flags & RTF_SETSRC)
1219 				rtm_addrs |= RTA_SRC;
1220 
1221 			rtm_flags = sire->ire_flags;
1222 			mutex_enter(&sire->ire_lock);
1223 			gw_addr_v6 = sire->ire_gateway_addr_v6;
1224 			mutex_exit(&sire->ire_lock);
1225 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6,
1226 			    &sire->ire_mask_v6, &gw_addr_v6,
1227 			    (sire->ire_flags & RTF_SETSRC) ?
1228 			    &sire->ire_src_addr_v6 : &ire->ire_src_addr_v6,
1229 			    &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1230 			    ire->ire_ipif, new_mp, sacnt, gc);
1231 		}
1232 		break;
1233 	}
1234 
1235 	if (gcgrp != NULL)
1236 		rw_exit(&gcgrp->gcgrp_rwlock);
1237 
1238 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1239 
1240 	/*
1241 	 * The rtm_msglen, rtm_version and rtm_type fields in
1242 	 * RTM_GET response are filled in by rts_fill_msg.
1243 	 *
1244 	 * rtm_addrs and rtm_flags are filled in based on what
1245 	 * was requested and the state of the IREs looked up
1246 	 * above.
1247 	 *
1248 	 * rtm_inits and rtm_rmx are filled in with metrics
1249 	 * based on whether a parent IRE was found or not.
1250 	 *
1251 	 * TODO: rtm_index and rtm_use should probably be
1252 	 * filled in with something resonable here and not just
1253 	 * copied from the request.
1254 	 */
1255 	new_rtm->rtm_index = rtm->rtm_index;
1256 	new_rtm->rtm_pid = rtm->rtm_pid;
1257 	new_rtm->rtm_seq = rtm->rtm_seq;
1258 	new_rtm->rtm_use = rtm->rtm_use;
1259 	new_rtm->rtm_addrs = rtm_addrs;
1260 	new_rtm->rtm_flags = rtm_flags;
1261 	if (sire == NULL)
1262 		new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
1263 	else
1264 		new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx);
1265 
1266 	return (new_mp);
1267 }
1268 
1269 /*
1270  * Fill the given if_data_t with interface statistics.
1271  */
1272 static void
1273 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1274 {
1275 	if_data->ifi_type = ipif->ipif_type;	/* ethernet, tokenring, etc */
1276 	if_data->ifi_addrlen = 0;		/* media address length */
1277 	if_data->ifi_hdrlen = 0;		/* media header length */
1278 	if_data->ifi_mtu = ipif->ipif_mtu;	/* maximum transmission unit */
1279 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
1280 	if_data->ifi_baudrate = 0;		/* linespeed */
1281 
1282 	if_data->ifi_ipackets = 0;		/* packets received on if */
1283 	if_data->ifi_ierrors = 0;		/* input errors on interface */
1284 	if_data->ifi_opackets = 0;		/* packets sent on interface */
1285 	if_data->ifi_oerrors = 0;		/* output errors on if */
1286 	if_data->ifi_collisions = 0;		/* collisions on csma if */
1287 	if_data->ifi_ibytes = 0;		/* total number received */
1288 	if_data->ifi_obytes = 0;		/* total number sent */
1289 	if_data->ifi_imcasts = 0;		/* multicast packets received */
1290 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
1291 	if_data->ifi_iqdrops = 0;		/* dropped on input */
1292 	if_data->ifi_noproto = 0;		/* destined for unsupported */
1293 						/* protocol. */
1294 }
1295 
1296 /*
1297  * Set the metrics on a forwarding table route.
1298  */
1299 static void
1300 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1301 {
1302 	clock_t		rtt;
1303 	clock_t		rtt_sd;
1304 	ipif_t		*ipif;
1305 	ifrt_t		*ifrt;
1306 	mblk_t		*mp;
1307 	in6_addr_t	gw_addr_v6;
1308 
1309 	/*
1310 	 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the
1311 	 * common case of no metrics.
1312 	 */
1313 	if (which == 0)
1314 		return;
1315 	ire->ire_uinfo.iulp_set = B_TRUE;
1316 
1317 	/*
1318 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1319 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1320 	 * microseconds.
1321 	 */
1322 	if (which & RTV_RTT)
1323 		rtt = metrics->rmx_rtt / 1000;
1324 	if (which & RTV_RTTVAR)
1325 		rtt_sd = metrics->rmx_rttvar / 1000;
1326 
1327 	/*
1328 	 * Update the metrics in the IRE itself.
1329 	 */
1330 	mutex_enter(&ire->ire_lock);
1331 	if (which & RTV_MTU)
1332 		ire->ire_max_frag = metrics->rmx_mtu;
1333 	if (which & RTV_RTT)
1334 		ire->ire_uinfo.iulp_rtt = rtt;
1335 	if (which & RTV_SSTHRESH)
1336 		ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh;
1337 	if (which & RTV_RTTVAR)
1338 		ire->ire_uinfo.iulp_rtt_sd = rtt_sd;
1339 	if (which & RTV_SPIPE)
1340 		ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe;
1341 	if (which & RTV_RPIPE)
1342 		ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe;
1343 	mutex_exit(&ire->ire_lock);
1344 
1345 	/*
1346 	 * Search through the ifrt_t chain hanging off the IPIF in order to
1347 	 * reflect the metric change there.
1348 	 */
1349 	ipif = ire->ire_ipif;
1350 	if (ipif == NULL)
1351 		return;
1352 	ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1353 	    ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1354 	if (ipif->ipif_isv6) {
1355 		mutex_enter(&ire->ire_lock);
1356 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1357 		mutex_exit(&ire->ire_lock);
1358 	}
1359 	mutex_enter(&ipif->ipif_saved_ire_lock);
1360 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1361 		/*
1362 		 * On a given ipif, the triple of address, gateway and mask is
1363 		 * unique for each saved IRE (in the case of ordinary interface
1364 		 * routes, the gateway address is all-zeroes).
1365 		 */
1366 		ifrt = (ifrt_t *)mp->b_rptr;
1367 		if (ipif->ipif_isv6) {
1368 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1369 			    &ire->ire_addr_v6) ||
1370 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1371 			    &gw_addr_v6) ||
1372 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1373 			    &ire->ire_mask_v6))
1374 				continue;
1375 		} else {
1376 			if (ifrt->ifrt_addr != ire->ire_addr ||
1377 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1378 			    ifrt->ifrt_mask != ire->ire_mask)
1379 				continue;
1380 		}
1381 		if (which & RTV_MTU)
1382 			ifrt->ifrt_max_frag = metrics->rmx_mtu;
1383 		if (which & RTV_RTT)
1384 			ifrt->ifrt_iulp_info.iulp_rtt = rtt;
1385 		if (which & RTV_SSTHRESH) {
1386 			ifrt->ifrt_iulp_info.iulp_ssthresh =
1387 			    metrics->rmx_ssthresh;
1388 		}
1389 		if (which & RTV_RTTVAR)
1390 			ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar;
1391 		if (which & RTV_SPIPE)
1392 			ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe;
1393 		if (which & RTV_RPIPE)
1394 			ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe;
1395 		break;
1396 	}
1397 	mutex_exit(&ipif->ipif_saved_ire_lock);
1398 }
1399 
1400 /*
1401  * Get the metrics from a forwarding table route.
1402  */
1403 static int
1404 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
1405 {
1406 	int	metrics_set = 0;
1407 
1408 	bzero(metrics, sizeof (rt_metrics_t));
1409 	/*
1410 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1411 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1412 	 * microseconds.
1413 	 */
1414 	metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000;
1415 	metrics_set |= RTV_RTT;
1416 	metrics->rmx_mtu = ire->ire_max_frag;
1417 	metrics_set |= RTV_MTU;
1418 	metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh;
1419 	metrics_set |= RTV_SSTHRESH;
1420 	metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000;
1421 	metrics_set |= RTV_RTTVAR;
1422 	metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe;
1423 	metrics_set |= RTV_SPIPE;
1424 	metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe;
1425 	metrics_set |= RTV_RPIPE;
1426 	return (metrics_set);
1427 }
1428 
1429 /*
1430  * Takes a pointer to a routing message and extracts necessary info by looking
1431  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1432  * passed (all of which must be valid).
1433  *
1434  * The bitmask of sockaddrs actually found in the message is returned, or zero
1435  * is returned in the case of an error.
1436  */
1437 static int
1438 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1439     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1440     in6_addr_t *in_src_addrp, ushort_t *indexp, sa_family_t *afp,
1441     tsol_rtsecattr_t *rtsecattr, int *error)
1442 {
1443 	struct sockaddr *sa;
1444 	int	i;
1445 	int	addr_bits;
1446 	int	length;
1447 	int	found_addrs = 0;
1448 	caddr_t	cp;
1449 	size_t	size;
1450 	struct sockaddr_dl *sdl;
1451 
1452 	*dst_addrp = ipv6_all_zeros;
1453 	*gw_addrp = ipv6_all_zeros;
1454 	*net_maskp = ipv6_all_zeros;
1455 	*authorp = ipv6_all_zeros;
1456 	*if_addrp = ipv6_all_zeros;
1457 	*in_src_addrp = ipv6_all_zeros;
1458 	*indexp = 0;
1459 	*afp = AF_UNSPEC;
1460 	rtsecattr->rtsa_cnt = 0;
1461 	*error = 0;
1462 
1463 	/*
1464 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1465 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
1466 	 */
1467 	cp = (caddr_t)&rtm[1];
1468 	length = rtm->rtm_msglen;
1469 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1470 		/*
1471 		 * The address family we are working with starts out as
1472 		 * AF_UNSPEC, but is set to the one specified with the
1473 		 * destination address.
1474 		 *
1475 		 * If the "working" address family that has been set to
1476 		 * something other than AF_UNSPEC, then the address family of
1477 		 * subsequent sockaddrs must either be AF_UNSPEC (for
1478 		 * compatibility with older programs) or must be the same as our
1479 		 * "working" one.
1480 		 *
1481 		 * This code assumes that RTA_DST (1) comes first in the loop.
1482 		 */
1483 		sa = (struct sockaddr *)cp;
1484 		addr_bits = (rtm->rtm_addrs & (1 << i));
1485 		if (addr_bits == 0)
1486 			continue;
1487 		switch (addr_bits) {
1488 		case RTA_DST:
1489 			size = rts_copyfromsockaddr(sa, dst_addrp);
1490 			*afp = sa->sa_family;
1491 			break;
1492 		case RTA_GATEWAY:
1493 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1494 				return (0);
1495 			size = rts_copyfromsockaddr(sa, gw_addrp);
1496 			break;
1497 		case RTA_NETMASK:
1498 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1499 				return (0);
1500 			size = rts_copyfromsockaddr(sa, net_maskp);
1501 			break;
1502 		case RTA_IFP:
1503 			if (sa->sa_family != AF_LINK &&
1504 			    sa->sa_family != AF_UNSPEC)
1505 				return (0);
1506 			sdl = (struct sockaddr_dl *)cp;
1507 			*indexp = sdl->sdl_index;
1508 			size = sizeof (struct sockaddr_dl);
1509 			break;
1510 		case RTA_SRC:
1511 			/* Source address of the incoming packet */
1512 			size = rts_copyfromsockaddr(sa, in_src_addrp);
1513 			*afp = sa->sa_family;
1514 			break;
1515 		case RTA_IFA:
1516 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1517 				return (0);
1518 			size = rts_copyfromsockaddr(sa, if_addrp);
1519 			break;
1520 		case RTA_AUTHOR:
1521 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1522 				return (0);
1523 			size = rts_copyfromsockaddr(sa, authorp);
1524 			break;
1525 		default:
1526 			return (0);
1527 		}
1528 		if (size == 0)
1529 			return (0);
1530 		cp += size;
1531 		found_addrs |= addr_bits;
1532 	}
1533 
1534 	/*
1535 	 * Parse the routing message and look for any security-
1536 	 * related attributes for the route.  For each valid
1537 	 * attribute, allocate/obtain the corresponding kernel
1538 	 * route security attributes.
1539 	 */
1540 	*error = tsol_rtsa_init(rtm, rtsecattr, cp);
1541 	ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1542 
1543 	return (found_addrs);
1544 }
1545 
1546 /*
1547  * Fills the message with the given info.
1548  */
1549 static void
1550 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1551     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1552     const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc)
1553 {
1554 	rt_msghdr_t	*rtm;
1555 	sin_t		*sin;
1556 	size_t		data_size, header_size;
1557 	uchar_t		*cp;
1558 	int		i;
1559 
1560 	ASSERT(mp != NULL);
1561 	ASSERT(sacnt == 0 || gc != NULL);
1562 	/*
1563 	 * First find the type of the message
1564 	 * and its length.
1565 	 */
1566 	header_size = rts_header_msg_size(type);
1567 	/*
1568 	 * Now find the size of the data
1569 	 * that follows the message header.
1570 	 */
1571 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt);
1572 
1573 	rtm = (rt_msghdr_t *)mp->b_rptr;
1574 	mp->b_wptr = &mp->b_rptr[header_size];
1575 	cp = mp->b_wptr;
1576 	bzero(cp, data_size);
1577 	for (i = 0; i < RTA_NUMBITS; i++) {
1578 		sin = (sin_t *)cp;
1579 		switch (rtm_addrs & (1 << i)) {
1580 		case RTA_DST:
1581 			sin->sin_addr.s_addr = dst;
1582 			sin->sin_family = AF_INET;
1583 			cp += sizeof (sin_t);
1584 			break;
1585 		case RTA_GATEWAY:
1586 			sin->sin_addr.s_addr = gateway;
1587 			sin->sin_family = AF_INET;
1588 			cp += sizeof (sin_t);
1589 			break;
1590 		case RTA_NETMASK:
1591 			sin->sin_addr.s_addr = mask;
1592 			sin->sin_family = AF_INET;
1593 			cp += sizeof (sin_t);
1594 			break;
1595 		case RTA_IFP:
1596 			cp += ill_dls_info((struct sockaddr_dl *)cp, ipif);
1597 			break;
1598 		case RTA_IFA:
1599 		case RTA_SRC:
1600 			sin->sin_addr.s_addr = src_addr;
1601 			sin->sin_family = AF_INET;
1602 			cp += sizeof (sin_t);
1603 			break;
1604 		case RTA_AUTHOR:
1605 			sin->sin_addr.s_addr = author;
1606 			sin->sin_family = AF_INET;
1607 			cp += sizeof (sin_t);
1608 			break;
1609 		case RTA_BRD:
1610 			/*
1611 			 * RTA_BRD is used typically to specify a point-to-point
1612 			 * destination address.
1613 			 */
1614 			sin->sin_addr.s_addr = brd_addr;
1615 			sin->sin_family = AF_INET;
1616 			cp += sizeof (sin_t);
1617 			break;
1618 		}
1619 	}
1620 
1621 	if (gc != NULL) {
1622 		rtm_ext_t *rtm_ext;
1623 		struct rtsa_s *rp_dst;
1624 		tsol_rtsecattr_t *rsap;
1625 		int i;
1626 
1627 		ASSERT(gc->gc_grp != NULL);
1628 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1629 		ASSERT(sacnt > 0);
1630 
1631 		rtm_ext = (rtm_ext_t *)cp;
1632 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1633 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt);
1634 
1635 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1636 		rsap->rtsa_cnt = sacnt;
1637 		rp_dst = rsap->rtsa_attr;
1638 
1639 		for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) {
1640 			ASSERT(gc->gc_db != NULL);
1641 			bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1642 		}
1643 		cp = (uchar_t *)rp_dst;
1644 	}
1645 
1646 	mp->b_wptr = cp;
1647 	mp->b_cont = NULL;
1648 	/*
1649 	 * set the fields that are common to
1650 	 * to different messages.
1651 	 */
1652 	rtm->rtm_msglen = (short)(header_size + data_size);
1653 	rtm->rtm_version = RTM_VERSION;
1654 	rtm->rtm_type = (uchar_t)type;
1655 }
1656 
1657 /*
1658  * Allocates and initializes a routing socket message.
1659  */
1660 mblk_t *
1661 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1662 {
1663 	size_t	length;
1664 	mblk_t	*mp;
1665 
1666 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1667 	mp = allocb(length, BPRI_MED);
1668 	if (mp == NULL)
1669 		return (mp);
1670 	bzero(mp->b_rptr, length);
1671 	return (mp);
1672 }
1673 
1674 /*
1675  * Returns the size of the routing
1676  * socket message header size.
1677  */
1678 size_t
1679 rts_header_msg_size(int type)
1680 {
1681 	switch (type) {
1682 	case RTM_DELADDR:
1683 	case RTM_NEWADDR:
1684 		return (sizeof (ifa_msghdr_t));
1685 	case RTM_IFINFO:
1686 		return (sizeof (if_msghdr_t));
1687 	default:
1688 		return (sizeof (rt_msghdr_t));
1689 	}
1690 }
1691 
1692 /*
1693  * Returns the size of the message needed with the given rtm_addrs and family.
1694  *
1695  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1696  * of the same family (currently either AF_INET or AF_INET6).
1697  */
1698 size_t
1699 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1700 {
1701 	int	i;
1702 	size_t	length = 0;
1703 
1704 	for (i = 0; i < RTA_NUMBITS; i++) {
1705 		switch (rtm_addrs & (1 << i)) {
1706 		case RTA_IFP:
1707 			length += sizeof (struct sockaddr_dl);
1708 			break;
1709 		case RTA_DST:
1710 		case RTA_GATEWAY:
1711 		case RTA_NETMASK:
1712 		case RTA_SRC:
1713 		case RTA_IFA:
1714 		case RTA_AUTHOR:
1715 		case RTA_BRD:
1716 			ASSERT(af == AF_INET || af == AF_INET6);
1717 			switch (af) {
1718 			case AF_INET:
1719 				length += sizeof (sin_t);
1720 				break;
1721 			case AF_INET6:
1722 				length += sizeof (sin6_t);
1723 				break;
1724 			}
1725 			break;
1726 		}
1727 	}
1728 	if (sacnt > 0)
1729 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1730 
1731 	return (length);
1732 }
1733 
1734 /*
1735  * This routine is called to generate a message to the routing
1736  * socket indicating that a redirect has occured, a routing lookup
1737  * has failed, or that a protocol has detected timeouts to a particular
1738  * destination. This routine is called for message types RTM_LOSING,
1739  * RTM_REDIRECT, and RTM_MISS.
1740  */
1741 void
1742 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1743     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs,
1744     ip_stack_t *ipst)
1745 {
1746 	rt_msghdr_t	*rtm;
1747 	mblk_t		*mp;
1748 
1749 	if (rtm_addrs == 0)
1750 		return;
1751 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1752 	if (mp == NULL)
1753 		return;
1754 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1755 	    author, NULL, mp, 0, NULL);
1756 	rtm = (rt_msghdr_t *)mp->b_rptr;
1757 	rtm->rtm_flags = flags;
1758 	rtm->rtm_errno = error;
1759 	rtm->rtm_flags |= RTF_DONE;
1760 	rtm->rtm_addrs = rtm_addrs;
1761 	rts_queue_input(mp, NULL, AF_INET, RTSQ_ALL, ipst);
1762 }
1763 
1764 /*
1765  * This routine is called to generate a message to the routing
1766  * socket indicating that the status of a network interface has changed.
1767  * Message type generated RTM_IFINFO.
1768  */
1769 void
1770 ip_rts_ifmsg(const ipif_t *ipif, uint_t flags)
1771 {
1772 	ip_rts_xifmsg(ipif, 0, 0, flags);
1773 }
1774 
1775 void
1776 ip_rts_xifmsg(const ipif_t *ipif, uint64_t set, uint64_t clear, uint_t flags)
1777 {
1778 	if_msghdr_t	*ifm;
1779 	mblk_t		*mp;
1780 	sa_family_t	af;
1781 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1782 
1783 	/*
1784 	 * This message should be generated only when the physical interface
1785 	 * is changing state.
1786 	 */
1787 	if (ipif->ipif_id != 0)
1788 		return;
1789 
1790 	if (ipif->ipif_isv6) {
1791 		af = AF_INET6;
1792 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1793 		if (mp == NULL)
1794 			return;
1795 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1796 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1797 		    &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL);
1798 	} else {
1799 		af = AF_INET;
1800 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1801 		if (mp == NULL)
1802 			return;
1803 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp,
1804 		    0, NULL);
1805 	}
1806 	ifm = (if_msghdr_t *)mp->b_rptr;
1807 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1808 	ifm->ifm_flags = (ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1809 	    ipif->ipif_ill->ill_phyint->phyint_flags | set) & ~clear;
1810 	rts_getifdata(&ifm->ifm_data, ipif);
1811 	ifm->ifm_addrs = RTA_IFP;
1812 
1813 	if (flags & RTSQ_DEFAULT) {
1814 		flags = RTSQ_ALL;
1815 		/*
1816 		 * If this message is for an underlying interface, prevent
1817 		 * "normal" (IPMP-unaware) routing sockets from seeing it.
1818 		 */
1819 		if (IS_UNDER_IPMP(ipif->ipif_ill))
1820 			flags &= ~RTSQ_NORMAL;
1821 	}
1822 
1823 	rts_queue_input(mp, NULL, af, flags, ipst);
1824 }
1825 
1826 /*
1827  * This is called to generate messages to the routing socket
1828  * indicating a network interface has had addresses associated with it.
1829  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1830  */
1831 void
1832 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif, uint_t flags)
1833 {
1834 	int		pass;
1835 	int		ncmd;
1836 	int		rtm_addrs;
1837 	mblk_t		*mp;
1838 	ifa_msghdr_t	*ifam;
1839 	rt_msghdr_t	*rtm;
1840 	sa_family_t	af;
1841 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1842 
1843 	if (ipif->ipif_isv6)
1844 		af = AF_INET6;
1845 	else
1846 		af = AF_INET;
1847 
1848 	if (flags & RTSQ_DEFAULT) {
1849 		flags = RTSQ_ALL;
1850 		/*
1851 		 * If this message is for an underlying interface, prevent
1852 		 * "normal" (IPMP-unaware) routing sockets from seeing it.
1853 		 */
1854 		if (IS_UNDER_IPMP(ipif->ipif_ill))
1855 			flags &= ~RTSQ_NORMAL;
1856 	}
1857 
1858 	/*
1859 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1860 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1861 	 */
1862 	for (pass = 1; pass < 3; pass++) {
1863 		if ((cmd == RTM_ADD && pass == 1) ||
1864 		    (cmd == RTM_DELETE && pass == 2)) {
1865 			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
1866 
1867 			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD | RTA_IFP);
1868 			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
1869 			if (mp == NULL)
1870 				continue;
1871 			switch (af) {
1872 			case AF_INET:
1873 				rts_fill_msg(ncmd, rtm_addrs, 0,
1874 				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
1875 				    ipif->ipif_pp_dst_addr, 0, ipif, mp,
1876 				    0, NULL);
1877 				break;
1878 			case AF_INET6:
1879 				rts_fill_msg_v6(ncmd, rtm_addrs,
1880 				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
1881 				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
1882 				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1883 				    ipif, mp, 0, NULL);
1884 				break;
1885 			}
1886 			ifam = (ifa_msghdr_t *)mp->b_rptr;
1887 			ifam->ifam_index =
1888 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1889 			ifam->ifam_metric = ipif->ipif_metric;
1890 			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1891 			ifam->ifam_addrs = rtm_addrs;
1892 			rts_queue_input(mp, NULL, af, flags, ipst);
1893 		}
1894 		if ((cmd == RTM_ADD && pass == 2) ||
1895 		    (cmd == RTM_DELETE && pass == 1)) {
1896 			rtm_addrs = (RTA_DST | RTA_NETMASK);
1897 			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
1898 			if (mp == NULL)
1899 				continue;
1900 			switch (af) {
1901 			case AF_INET:
1902 				rts_fill_msg(cmd, rtm_addrs,
1903 				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
1904 				    0, 0, 0, NULL, mp, 0, NULL);
1905 				break;
1906 			case AF_INET6:
1907 				rts_fill_msg_v6(cmd, rtm_addrs,
1908 				    &ipif->ipif_v6lcl_addr,
1909 				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
1910 				    &ipv6_all_zeros, &ipv6_all_zeros,
1911 				    &ipv6_all_zeros, NULL, mp, 0, NULL);
1912 				break;
1913 			}
1914 			rtm = (rt_msghdr_t *)mp->b_rptr;
1915 			rtm->rtm_index =
1916 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1917 			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1918 			rtm->rtm_errno = error;
1919 			if (error == 0)
1920 				rtm->rtm_flags |= RTF_DONE;
1921 			rtm->rtm_addrs = rtm_addrs;
1922 			rts_queue_input(mp, NULL, af, flags, ipst);
1923 		}
1924 	}
1925 }
1926 
1927 /*
1928  * Based on the address family specified in a sockaddr, copy the address field
1929  * into an in6_addr_t.
1930  *
1931  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
1932  * compatibility with programs that leave the family cleared in the sockaddr.
1933  * Callers of rts_copyfromsockaddr should check the family themselves if they
1934  * wish to verify its value.
1935  *
1936  * In the case of AF_INET6, a check is made to ensure that address is not an
1937  * IPv4-mapped address.
1938  */
1939 size_t
1940 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
1941 {
1942 	switch (sa->sa_family) {
1943 	case AF_INET:
1944 	case AF_UNSPEC:
1945 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
1946 		return (sizeof (sin_t));
1947 	case AF_INET6:
1948 		*addrp = ((sin6_t *)sa)->sin6_addr;
1949 		if (IN6_IS_ADDR_V4MAPPED(addrp))
1950 			return (0);
1951 		return (sizeof (sin6_t));
1952 	default:
1953 		return (0);
1954 	}
1955 }
1956