xref: /titanic_44/usr/src/uts/common/inet/ip/ip_rts.c (revision b369f4b871a39ef94e220443957975f445f52eb6)
1 /*
2  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*
7  * Copyright (c) 1988, 1991, 1993
8  *	The Regents of the University of California.  All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)rtsock.c	8.6 (Berkeley) 2/11/95
39  */
40 
41 #pragma ident	"%Z%%M%	%I%	%E% SMI"
42 
43 /*
44  * This file contains routines that processes routing socket requests.
45  */
46 
47 #include <sys/types.h>
48 #include <sys/stream.h>
49 #include <sys/stropts.h>
50 #include <sys/ddi.h>
51 #include <sys/cmn_err.h>
52 #include <sys/debug.h>
53 #include <sys/policy.h>
54 #include <sys/zone.h>
55 
56 #include <sys/systm.h>
57 #include <sys/param.h>
58 #include <sys/socket.h>
59 #include <sys/strsun.h>
60 #include <net/if.h>
61 #include <net/route.h>
62 #include <netinet/in.h>
63 #include <net/if_dl.h>
64 #include <netinet/ip6.h>
65 
66 #include <inet/common.h>
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
69 #include <inet/ip_if.h>
70 #include <inet/ip_ire.h>
71 #include <inet/ip_rts.h>
72 
73 #include <inet/ipclassifier.h>
74 
75 #include <sys/tsol/tndb.h>
76 #include <sys/tsol/tnet.h>
77 
78 #define	RTS_MSG_SIZE(type, rtm_addrs, af, sacnt) \
79 	(rts_data_msg_size(rtm_addrs, af, sacnt) + rts_header_msg_size(type))
80 
81 static size_t	rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp);
82 static void	rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst,
83     ipaddr_t mask, ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr,
84     ipaddr_t author, const ipif_t *ipif, mblk_t *mp, uint_t, const tsol_gc_t *);
85 static int	rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp,
86     in6_addr_t *gw_addrp, in6_addr_t *net_maskp, in6_addr_t *authorp,
87     in6_addr_t *if_addrp, in6_addr_t *src_addrp, ushort_t *indexp,
88     ushort_t *src_indexp, sa_family_t *afp, tsol_rtsecattr_t *rtsecattr,
89     int *error);
90 static void	rts_getifdata(if_data_t *if_data, const ipif_t *ipif);
91 static int	rts_getmetrics(ire_t *ire, rt_metrics_t *metrics);
92 static mblk_t	*rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire,
93     sa_family_t af);
94 static void	rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics);
95 static void	ip_rts_request_retry(ipsq_t *, queue_t *q, mblk_t *mp, void *);
96 
97 /*
98  * Send the ack to all the routing queues.  In case of the originating queue,
99  * send it only if the loopback is set.
100  *
101  * Messages are sent upstream only on routing sockets that did not specify an
102  * address family when they were created or when the address family matches the
103  * one specified by the caller.
104  *
105  */
106 void
107 rts_queue_input(mblk_t *mp, queue_t *q, sa_family_t af)
108 {
109 	mblk_t	*mp1;
110 	int	checkqfull;
111 	conn_t 	*connp, *next_connp;
112 
113 	mutex_enter(&rts_clients.connf_lock);
114 	connp = rts_clients.connf_head;
115 
116 	while (connp != NULL) {
117 		/*
118 		 * If there was a family specified when this routing socket was
119 		 * created and it doesn't match the family of the message to
120 		 * copy, then continue.
121 		 */
122 		if ((connp->conn_proto != AF_UNSPEC) &&
123 		    (connp->conn_proto != af)) {
124 			connp = connp->conn_next;
125 			continue;
126 		}
127 		/*
128 		 * For the originating queue, we only copy the message upstream
129 		 * if loopback is set.  For others reading on the routing
130 		 * socket, we check if there is room upstream for a copy of the
131 		 * message.
132 		 */
133 		if ((q != NULL) && (CONNP_TO_RQ(connp) == RD(q))) {
134 			if (connp->conn_loopback == 0) {
135 				connp = connp->conn_next;
136 				continue;
137 			}
138 			checkqfull = B_FALSE;
139 		} else {
140 			checkqfull = B_TRUE;
141 		}
142 		CONN_INC_REF(connp);
143 		mutex_exit(&rts_clients.connf_lock);
144 		if (!checkqfull || canputnext(CONNP_TO_RQ(connp))) {
145 			mp1 = dupmsg(mp);
146 			if (mp1 == NULL)
147 				mp1 = copymsg(mp);
148 			if (mp1 != NULL)
149 				putnext(CONNP_TO_RQ(connp), mp1);
150 		}
151 
152 		mutex_enter(&rts_clients.connf_lock);
153 		/* Follow the next pointer before releasing the conn. */
154 		next_connp = connp->conn_next;
155 		CONN_DEC_REF(connp);
156 		connp = next_connp;
157 	}
158 	mutex_exit(&rts_clients.connf_lock);
159 	freemsg(mp);
160 }
161 
162 /*
163  * Takes an ire and sends an ack to all the routing sockets. This
164  * routine is used
165  * - when a route is created/deleted through the ioctl interface.
166  * - when ire_expire deletes a stale redirect
167  */
168 void
169 ip_rts_rtmsg(int type, ire_t *ire, int error)
170 {
171 	mblk_t		*mp;
172 	rt_msghdr_t	*rtm;
173 	int		rtm_addrs = (RTA_DST | RTA_NETMASK | RTA_GATEWAY);
174 	sa_family_t	af;
175 	in6_addr_t	gw_addr_v6;
176 
177 	if (ire == NULL)
178 		return;
179 	ASSERT(ire->ire_ipversion == IPV4_VERSION ||
180 	    ire->ire_ipversion == IPV6_VERSION);
181 
182 	if (ire->ire_flags & RTF_SETSRC)
183 		rtm_addrs |= RTA_SRC;
184 
185 	switch (ire->ire_ipversion) {
186 	case IPV4_VERSION:
187 		af = AF_INET;
188 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
189 		if (mp == NULL)
190 			return;
191 		rts_fill_msg(type, rtm_addrs, ire->ire_addr, ire->ire_mask,
192 		    ire->ire_gateway_addr, ire->ire_src_addr, 0, 0, NULL, mp,
193 		    0, NULL);
194 		break;
195 	case IPV6_VERSION:
196 		af = AF_INET6;
197 		mp = rts_alloc_msg(type, rtm_addrs, af, 0);
198 		if (mp == NULL)
199 			return;
200 		mutex_enter(&ire->ire_lock);
201 		gw_addr_v6 = ire->ire_gateway_addr_v6;
202 		mutex_exit(&ire->ire_lock);
203 		rts_fill_msg_v6(type, rtm_addrs, &ire->ire_addr_v6,
204 		    &ire->ire_mask_v6, &gw_addr_v6,
205 		    &ire->ire_src_addr_v6, &ipv6_all_zeros, &ipv6_all_zeros,
206 		    NULL, mp, 0, NULL);
207 		break;
208 	}
209 	rtm = (rt_msghdr_t *)mp->b_rptr;
210 	mp->b_wptr = (uchar_t *)&mp->b_rptr[rtm->rtm_msglen];
211 	rtm->rtm_addrs = rtm_addrs;
212 	rtm->rtm_flags = ire->ire_flags;
213 	if (error != 0)
214 		rtm->rtm_errno = error;
215 	else
216 		rtm->rtm_flags |= RTF_DONE;
217 	rts_queue_input(mp, NULL, af);
218 }
219 
220 /* ARGSUSED */
221 static void
222 ip_rts_request_retry(ipsq_t *dummy_sq, queue_t *q, mblk_t *mp, void *dummy)
223 {
224 	(void) ip_rts_request(q, mp, DB_CRED(mp));
225 }
226 
227 /*
228  * Processes requests received on a routing socket. It extracts all the
229  * arguments and calls the appropriate function to process the request.
230  *
231  * RTA_SRC bit flag requests are sent by mipagent and 'route -setsrc'.
232  * RTA_SRCIFP bit flag requests are sent by mipagent only.
233  *
234  * In general, this function does not consume the message supplied but rather
235  * sends the message upstream with an appropriate UNIX errno.
236  *
237  * We may need to restart this operation if the ipif cannot be looked up
238  * due to an exclusive operation that is currently in progress. The restart
239  * entry point is ip_rts_request_retry. While the request is enqueud in the
240  * ipsq the ioctl could be aborted and the conn close. To ensure that we don't
241  * have stale conn pointers, ip_wput_ioctl does a conn refhold. This is
242  * released at the completion of the rts ioctl at the end of this function
243  * by calling CONN_OPER_PENDING_DONE or when the ioctl is aborted and
244  * conn close occurs in conn_ioctl_cleanup.
245  */
246 int
247 ip_rts_request(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
248 {
249 	rt_msghdr_t	*rtm = NULL;
250 	in6_addr_t	dst_addr_v6;
251 	in6_addr_t	src_addr_v6;
252 	in6_addr_t	gw_addr_v6;
253 	in6_addr_t	net_mask_v6;
254 	in6_addr_t	author_v6;
255 	in6_addr_t	if_addr_v6;
256 	mblk_t		*mp1, *ioc_mp = mp;
257 	ire_t		*ire = NULL;
258 	ire_t		*sire = NULL;
259 	int		error = 0;
260 	int		match_flags = MATCH_IRE_DSTONLY;
261 	int		found_addrs;
262 	sa_family_t	af;
263 	ipaddr_t	dst_addr;
264 	ipaddr_t	gw_addr;
265 	ipaddr_t	src_addr;
266 	ipaddr_t	net_mask;
267 	ushort_t	index;
268 	ushort_t	src_index;
269 	ipif_t		*ipif = NULL;
270 	ipif_t		*src_ipif = NULL;
271 	ipif_t		*tmp_ipif = NULL;
272 	IOCP		iocp = (IOCP)mp->b_rptr;
273 	conn_t		*connp;
274 	boolean_t	gcgrp_xtraref = B_FALSE;
275 	tsol_gcgrp_addr_t ga;
276 	tsol_rtsecattr_t rtsecattr;
277 	struct rtsa_s	*rtsap = NULL;
278 	tsol_gcgrp_t	*gcgrp = NULL;
279 	tsol_gc_t	*gc = NULL;
280 
281 	ip1dbg(("ip_rts_request: mp is %x\n", DB_TYPE(mp)));
282 
283 	ASSERT(CONN_Q(q));
284 	connp = Q_TO_CONN(q);
285 
286 	ASSERT(mp->b_cont != NULL);
287 	/* ioc_mp holds mp */
288 	mp = mp->b_cont;
289 
290 	/*
291 	 * The Routing Socket data starts on
292 	 * next block. If there is no next block
293 	 * this is an indication from routing module
294 	 * that it is a routing socket stream queue.
295 	 */
296 	if (mp->b_cont != NULL) {
297 		mp1 = dupmsg(mp->b_cont);
298 		if (mp1 == NULL) {
299 			freemsg(mp);
300 			error  = ENOBUFS;
301 			goto done;
302 		}
303 		mp = mp1;
304 	} else {
305 		/*
306 		 * This is a message from RTS module
307 		 * indicating that this is a Routing Socket
308 		 * Stream. Insert this conn_t in routing
309 		 * socket client list.
310 		 */
311 
312 		connp->conn_loopback = 1;
313 		ipcl_hash_insert_wildcard(&rts_clients, connp);
314 
315 		goto done;
316 	}
317 	if (mp->b_cont != NULL && !pullupmsg(mp, -1)) {
318 		freemsg(mp);
319 		error =  EINVAL;
320 		goto done;
321 	}
322 	if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
323 		freemsg(mp);
324 		error = EINVAL;
325 		goto done;
326 	}
327 
328 	/*
329 	 * Check the routing message for basic consistency including the
330 	 * version number and that the number of octets written is the same
331 	 * as specified by the rtm_msglen field.
332 	 *
333 	 * At this point, an error can be delivered back via rtm_errno.
334 	 */
335 	rtm = (rt_msghdr_t *)mp->b_rptr;
336 	if ((mp->b_wptr - mp->b_rptr) != rtm->rtm_msglen) {
337 		error = EINVAL;
338 		goto done;
339 	}
340 	if (rtm->rtm_version != RTM_VERSION) {
341 		error = EPROTONOSUPPORT;
342 		goto done;
343 	}
344 
345 	/* Only allow RTM_GET or RTM_RESOLVE for unprivileged process */
346 	if (rtm->rtm_type != RTM_GET &&
347 	    rtm->rtm_type != RTM_RESOLVE &&
348 	    (ioc_cr == NULL ||
349 	    secpolicy_net_config(ioc_cr, B_FALSE) != 0)) {
350 		error = EPERM;
351 		goto done;
352 	}
353 
354 	found_addrs = rts_getaddrs(rtm, &dst_addr_v6, &gw_addr_v6, &net_mask_v6,
355 	    &author_v6, &if_addr_v6, &src_addr_v6, &index, &src_index, &af,
356 	    &rtsecattr, &error);
357 
358 	if (error != 0)
359 		goto done;
360 
361 	if ((found_addrs & RTA_DST) == 0) {
362 		error = EINVAL;
363 		goto done;
364 	}
365 
366 	/*
367 	 * Based on the address family of the destination address, determine
368 	 * the destination, gateway and netmask and return the appropriate error
369 	 * if an unknown address family was specified (following the errno
370 	 * values that 4.4BSD-Lite2 returns.)
371 	 */
372 	switch (af) {
373 	case AF_INET:
374 		/*
375 		 * RTA_SRCIFP is supported for interface route only.
376 		 * Thus a gateway route with srcifindex is rejected,
377 		 * except if it's a request to add reverse tunnel
378 		 * route.
379 		 */
380 		if ((rtm->rtm_flags & RTF_GATEWAY) &&
381 		    (found_addrs & RTA_SRCIFP) &&
382 		    !(found_addrs & RTA_SRC)) {
383 			error = EINVAL;
384 			goto done;
385 		}
386 		IN6_V4MAPPED_TO_IPADDR(&dst_addr_v6, dst_addr);
387 		IN6_V4MAPPED_TO_IPADDR(&src_addr_v6, src_addr);
388 		IN6_V4MAPPED_TO_IPADDR(&gw_addr_v6, gw_addr);
389 		if (((found_addrs & RTA_NETMASK) == 0) ||
390 		    (rtm->rtm_flags & RTF_HOST))
391 			net_mask = IP_HOST_MASK;
392 		else
393 			IN6_V4MAPPED_TO_IPADDR(&net_mask_v6, net_mask);
394 		break;
395 	case AF_INET6:
396 		/*
397 		 * RTA_SRCIFP is not a valid flag for IPv6 routes.
398 		 */
399 		if (found_addrs & RTA_SRCIFP) {
400 			error = EINVAL;
401 			goto done;
402 		}
403 		if (((found_addrs & RTA_NETMASK) == 0) ||
404 		    (rtm->rtm_flags & RTF_HOST))
405 			net_mask_v6 = ipv6_all_ones;
406 		break;
407 	default:
408 		/*
409 		 * These errno values are meant to be compatible with
410 		 * 4.4BSD-Lite2 for the given message types.
411 		 */
412 		switch (rtm->rtm_type) {
413 		case RTM_ADD:
414 		case RTM_DELETE:
415 			error = ESRCH;
416 			goto done;
417 		case RTM_GET:
418 		case RTM_CHANGE:
419 			error = EAFNOSUPPORT;
420 			goto done;
421 		default:
422 			error = EOPNOTSUPP;
423 			goto done;
424 		}
425 	}
426 
427 	/*
428 	 * At this point, the address family must be something known.
429 	 */
430 	ASSERT(af == AF_INET || af == AF_INET6);
431 
432 	if (index != 0) {
433 		ill_t   *ill;
434 
435 		/*
436 		 * IPC must be refheld somewhere in ip_wput_nondata or
437 		 * ip_wput_ioctl etc... and cleaned up if ioctl is killed.
438 		 * If ILL_CHANGING the request is queued in the ipsq.
439 		 */
440 		ill = ill_lookup_on_ifindex(index, af == AF_INET6,
441 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error);
442 		if (ill == NULL) {
443 			if (error != EINPROGRESS)
444 				error = EINVAL;
445 			goto done;
446 		}
447 
448 		ipif = ipif_get_next_ipif(NULL, ill);
449 		ill_refrele(ill);
450 		/*
451 		 * If this is replacement ipif, prevent a route from
452 		 * being added.
453 		 */
454 		if (ipif != NULL && ipif->ipif_replace_zero) {
455 			error = ENETDOWN;
456 			goto done;
457 		}
458 		match_flags |= MATCH_IRE_ILL;
459 	}
460 
461 	/* RTA_SRCIFP is unsupported on AF_INET6. */
462 	if (af == AF_INET && src_index != 0) {
463 		ill_t   *ill;
464 
465 		/* If ILL_CHANGING the request is queued in the ipsq. */
466 		ill = ill_lookup_on_ifindex(src_index, B_FALSE,
467 		    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry, &error);
468 		if (ill == NULL) {
469 			if (error != EINPROGRESS)
470 				error = EINVAL;
471 			goto done;
472 		}
473 
474 		src_ipif = ipif_get_next_ipif(NULL, ill);
475 		ill_refrele(ill);
476 	}
477 	/*
478 	 * If a netmask was supplied in the message, then subsequent route
479 	 * lookups will attempt to match on the netmask as well.
480 	 */
481 	if ((found_addrs & RTA_NETMASK) != 0)
482 		match_flags |= MATCH_IRE_MASK;
483 
484 	/*
485 	 * We only process any passed-in route security attributes for
486 	 * either RTM_ADD or RTM_CHANGE message; ignore otherwise.
487 	 */
488 	if (rtm->rtm_type == RTM_ADD || rtm->rtm_type == RTM_CHANGE) {
489 		ASSERT(rtsecattr.rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
490 		if (rtsecattr.rtsa_cnt > 0)
491 			rtsap = &rtsecattr.rtsa_attr[0];
492 	}
493 
494 	switch (rtm->rtm_type) {
495 	case RTM_ADD:
496 		/* if we are adding a route, gateway is a must */
497 		if ((found_addrs & RTA_GATEWAY) == 0) {
498 			error = EINVAL;
499 			goto done;
500 		}
501 
502 		/* Multirouting does not support net routes. */
503 		if ((rtm->rtm_flags & (RTF_MULTIRT | RTF_HOST)) ==
504 		    RTF_MULTIRT) {
505 			error = EADDRNOTAVAIL;
506 			goto done;
507 		}
508 
509 		/*
510 		 * Multirouting and user-specified source addresses
511 		 * do not support interface based routing.
512 		 * Assigning a source address to an interface based
513 		 * route is achievable by plumbing a new ipif and
514 		 * setting up the interface route via this ipif,
515 		 * though.
516 		 */
517 		if (rtm->rtm_flags & (RTF_MULTIRT | RTF_SETSRC)) {
518 			if ((rtm->rtm_flags & RTF_GATEWAY) == 0) {
519 				error = EADDRNOTAVAIL;
520 				goto done;
521 			}
522 		}
523 
524 		switch (af) {
525 		case AF_INET:
526 			if (src_addr != INADDR_ANY) {
527 				/*
528 				 * If there is a source address, but
529 				 * no RTF_SETSRC modifier, setup a MobileIP
530 				 * reverse tunnel.
531 				 */
532 				if ((rtm->rtm_flags & RTF_SETSRC) == 0) {
533 					error = ip_mrtun_rt_add(src_addr,
534 					    rtm->rtm_flags, ipif,
535 					    src_ipif, &ire, CONNP_TO_WQ(connp),
536 					    ioc_mp, ip_rts_request_retry);
537 					break;
538 				}
539 				/*
540 				 * The RTF_SETSRC flag is present, check that
541 				 * the supplied src address is not the loopback
542 				 * address. This would produce martian packets.
543 				 */
544 				if (src_addr == htonl(INADDR_LOOPBACK)) {
545 					error = EINVAL;
546 					goto done;
547 				}
548 				/*
549 				 * Also check that the supplied address is a
550 				 * valid, local one.
551 				 */
552 				tmp_ipif = ipif_lookup_addr(src_addr, NULL,
553 				    ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
554 				    ip_rts_request_retry, &error);
555 				if (tmp_ipif == NULL) {
556 					if (error != EINPROGRESS)
557 						error = EADDRNOTAVAIL;
558 					goto done;
559 				}
560 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
561 				    (tmp_ipif->ipif_flags &
562 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
563 					error = EINVAL;
564 					goto done;
565 				}
566 			} else {
567 				/*
568 				 * The RTF_SETSRC modifier must be associated
569 				 * to a non-null source address.
570 				 */
571 				if (rtm->rtm_flags & RTF_SETSRC) {
572 					error = EINVAL;
573 					goto done;
574 				}
575 			}
576 
577 			error = ip_rt_add(dst_addr, net_mask, gw_addr, src_addr,
578 			    rtm->rtm_flags, ipif, src_ipif, &ire, B_FALSE,
579 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry,
580 			    rtsap);
581 			if (ipif != NULL)
582 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
583 			break;
584 		case AF_INET6:
585 			if (!IN6_IS_ADDR_UNSPECIFIED(&src_addr_v6)) {
586 				/*
587 				 * If there is a source address, but
588 				 * no RTF_SETSRC modifier, reject, as
589 				 * MobileIP IPv6 reverse tunnels are
590 				 * not supported.
591 				 */
592 				if ((rtm->rtm_flags & RTF_SETSRC) == 0) {
593 					error = EINVAL;
594 					goto done;
595 				}
596 				/*
597 				 * The RTF_SETSRC flag is present, check that
598 				 * the supplied src address is not the loopback
599 				 * address. This would produce martian packets.
600 				 */
601 				if (IN6_IS_ADDR_LOOPBACK(&src_addr_v6)) {
602 					error = EINVAL;
603 					goto done;
604 				}
605 				/*
606 				 * Also check that the supplied address is a
607 				 * valid, local one.
608 				 */
609 				tmp_ipif = ipif_lookup_addr_v6(&src_addr_v6,
610 				    NULL, ALL_ZONES, CONNP_TO_WQ(connp), ioc_mp,
611 				    ip_rts_request_retry, &error);
612 				if (tmp_ipif == NULL) {
613 					if (error != EINPROGRESS)
614 						error = EADDRNOTAVAIL;
615 					goto done;
616 				}
617 
618 				if (!(tmp_ipif->ipif_flags & IPIF_UP) ||
619 				    (tmp_ipif->ipif_flags &
620 				    (IPIF_NOLOCAL | IPIF_ANYCAST))) {
621 					error = EINVAL;
622 					goto done;
623 				}
624 
625 				error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
626 				    &gw_addr_v6, &src_addr_v6, rtm->rtm_flags,
627 				    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
628 				    ip_rts_request_retry, rtsap);
629 				break;
630 			}
631 			/*
632 			 * The RTF_SETSRC modifier must be associated
633 			 * to a non-null source address.
634 			 */
635 			if (rtm->rtm_flags & RTF_SETSRC) {
636 				error = EINVAL;
637 				goto done;
638 			}
639 			error = ip_rt_add_v6(&dst_addr_v6, &net_mask_v6,
640 			    &gw_addr_v6, NULL, rtm->rtm_flags,
641 			    ipif, &ire, CONNP_TO_WQ(connp), ioc_mp,
642 			    ip_rts_request_retry, rtsap);
643 			if (ipif != NULL)
644 				ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
645 			break;
646 		}
647 		if (error != 0)
648 			goto done;
649 		ASSERT(ire != NULL);
650 		rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
651 		break;
652 	case RTM_DELETE:
653 		/* if we are deleting a route, gateway is a must */
654 		if ((found_addrs & RTA_GATEWAY) == 0) {
655 			error = EINVAL;
656 			goto done;
657 		}
658 		/*
659 		 * The RTF_SETSRC modifier does not make sense
660 		 * when deleting a route.
661 		 */
662 		if (rtm->rtm_flags & RTF_SETSRC) {
663 			error = EINVAL;
664 			goto done;
665 		}
666 
667 		switch (af) {
668 		case AF_INET:
669 			/*
670 			 * If there is a source address, delete
671 			 * a MobileIP reverse tunnel.
672 			 */
673 			if (src_addr != INADDR_ANY) {
674 				error = ip_mrtun_rt_delete(src_addr,
675 				    src_ipif);
676 				break;
677 			}
678 			error = ip_rt_delete(dst_addr, net_mask, gw_addr,
679 			    found_addrs, rtm->rtm_flags, ipif, src_ipif,
680 			    B_FALSE, CONNP_TO_WQ(connp), ioc_mp,
681 			    ip_rts_request_retry);
682 			break;
683 		case AF_INET6:
684 			error = ip_rt_delete_v6(&dst_addr_v6, &net_mask_v6,
685 			    &gw_addr_v6, found_addrs, rtm->rtm_flags, ipif,
686 			    CONNP_TO_WQ(connp), ioc_mp, ip_rts_request_retry);
687 			break;
688 		}
689 		break;
690 	case RTM_GET:
691 	case RTM_CHANGE:
692 		/*
693 		 * In the case of RTM_GET, the forwarding table should be
694 		 * searched recursively with default being matched if the
695 		 * specific route doesn't exist.  Also, if a gateway was
696 		 * specified then the gateway address must also be matched.
697 		 *
698 		 * In the case of RTM_CHANGE, the gateway address (if supplied)
699 		 * is the new gateway address so matching on the gateway address
700 		 * is not done.  This can lead to ambiguity when looking up the
701 		 * route to change as usually only the destination (and netmask,
702 		 * if supplied) is used for the lookup.  However if a RTA_IFP
703 		 * sockaddr is also supplied, it can disambiguate which route to
704 		 * change provided the ambigous routes are tied to distinct
705 		 * ill's (or interface indices).  If the routes are not tied to
706 		 * any particular interfaces (for example, with traditional
707 		 * gateway routes), then a RTA_IFP sockaddr will be of no use as
708 		 * it won't match any such routes.
709 		 * RTA_SRC is not supported for RTM_GET and RTM_CHANGE,
710 		 * except when RTM_CHANGE is combined to RTF_SETSRC.
711 		 */
712 		if (((found_addrs & RTA_SRC) != 0) &&
713 		    ((rtm->rtm_type == RTM_GET) ||
714 		    !(rtm->rtm_flags & RTF_SETSRC))) {
715 			error = EOPNOTSUPP;
716 			goto done;
717 		}
718 
719 		if (rtm->rtm_type == RTM_GET) {
720 			match_flags |=
721 			    (MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE);
722 			if ((found_addrs & RTA_GATEWAY) != 0)
723 				match_flags |= MATCH_IRE_GW;
724 		}
725 		if (rtm->rtm_type == RTM_CHANGE) {
726 			if ((found_addrs & RTA_GATEWAY) &&
727 			    (rtm->rtm_flags & RTF_SETSRC)) {
728 				/*
729 				 * Do not want to change the gateway,
730 				 * but rather the source address.
731 				 */
732 				match_flags |= MATCH_IRE_GW;
733 			}
734 		}
735 
736 		/*
737 		 * If the netmask is all ones (either as supplied or as derived
738 		 * above), then first check for an IRE_LOOPBACK or
739 		 * IRE_LOCAL entry.
740 		 *
741 		 * If we didn't check for or find an IRE_LOOPBACK or IRE_LOCAL
742 		 * entry, then look in the forwarding table.
743 		 */
744 		switch (af) {
745 		case AF_INET:
746 			if (net_mask == IP_HOST_MASK) {
747 				ire = ire_ctable_lookup(dst_addr, gw_addr,
748 				    IRE_LOCAL | IRE_LOOPBACK, NULL, ALL_ZONES,
749 				    NULL, MATCH_IRE_TYPE | MATCH_IRE_GW);
750 			}
751 			if (ire == NULL) {
752 				ire = ire_ftable_lookup(dst_addr, net_mask,
753 				    gw_addr, 0, ipif, &sire, ALL_ZONES, 0,
754 				    NULL, match_flags);
755 			}
756 			break;
757 		case AF_INET6:
758 			if (IN6_ARE_ADDR_EQUAL(&net_mask_v6, &ipv6_all_ones)) {
759 				ire = ire_ctable_lookup_v6(&dst_addr_v6,
760 				    &gw_addr_v6, IRE_LOCAL | IRE_LOOPBACK, NULL,
761 				    ALL_ZONES, NULL,
762 				    MATCH_IRE_TYPE | MATCH_IRE_GW);
763 			}
764 			if (ire == NULL) {
765 				ire = ire_ftable_lookup_v6(&dst_addr_v6,
766 				    &net_mask_v6, &gw_addr_v6, 0, ipif, &sire,
767 				    ALL_ZONES, 0, NULL, match_flags);
768 			}
769 			break;
770 		}
771 
772 		if (ire == NULL) {
773 			error = ESRCH;
774 			goto done;
775 		}
776 		/* we know the IRE before we come here */
777 		switch (rtm->rtm_type) {
778 		case RTM_GET:
779 			mp1 = rts_rtmget(mp, ire, sire, af);
780 			if (mp1 == NULL) {
781 				error = ENOBUFS;
782 				goto done;
783 			}
784 			freemsg(mp);
785 			mp = mp1;
786 			rtm = (rt_msghdr_t *)mp->b_rptr;
787 			break;
788 		case RTM_CHANGE:
789 			/*
790 			 * Do not allow to the multirouting state of a route
791 			 * to be changed. This aims to prevent undesirable
792 			 * stages where both multirt and non-multirt routes
793 			 * for the same destination are declared.
794 			 */
795 			if ((ire->ire_flags & RTF_MULTIRT) !=
796 			    (rtm->rtm_flags & RTF_MULTIRT)) {
797 				error = EINVAL;
798 				goto done;
799 			}
800 			/*
801 			 * Note that we do not need to do
802 			 * ire_flush_cache_*(IRE_FLUSH_ADD) as a change
803 			 * in metrics or gateway will not affect existing
804 			 * routes since it does not create a more specific
805 			 * route.
806 			 */
807 			switch (af) {
808 			case AF_INET:
809 				ire_flush_cache_v4(ire, IRE_FLUSH_DELETE);
810 				if ((found_addrs & RTA_GATEWAY) != 0 &&
811 				    (ire->ire_gateway_addr != gw_addr)) {
812 					ire->ire_gateway_addr = gw_addr;
813 				}
814 
815 				if (rtsap != NULL) {
816 					ga.ga_af = AF_INET;
817 					IN6_IPADDR_TO_V4MAPPED(
818 					    ire->ire_gateway_addr, &ga.ga_addr);
819 
820 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
821 					if (gcgrp == NULL) {
822 						error = ENOMEM;
823 						goto done;
824 					}
825 				}
826 
827 				if ((found_addrs & RTA_SRC) != 0 &&
828 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
829 				    (ire->ire_src_addr != src_addr)) {
830 
831 					if (src_addr != INADDR_ANY) {
832 						/*
833 						 * The RTF_SETSRC flag is
834 						 * present, check that the
835 						 * supplied src address is not
836 						 * the loopback address. This
837 						 * would produce martian
838 						 * packets.
839 						 */
840 						if (src_addr ==
841 						    htonl(INADDR_LOOPBACK)) {
842 							error = EINVAL;
843 							goto done;
844 						}
845 						/*
846 						 * Also check that the the
847 						 * supplied addr is a valid
848 						 * local address.
849 						 */
850 						tmp_ipif = ipif_lookup_addr(
851 						    src_addr, NULL, ALL_ZONES,
852 						    CONNP_TO_WQ(connp), ioc_mp,
853 						    ip_rts_request_retry,
854 						    &error);
855 						if (tmp_ipif == NULL) {
856 							error = (error ==
857 							    EINPROGRESS) ?
858 							    error :
859 							    EADDRNOTAVAIL;
860 							goto done;
861 						}
862 
863 						if (!(tmp_ipif->ipif_flags &
864 						    IPIF_UP) ||
865 						    (tmp_ipif->ipif_flags &
866 						    (IPIF_NOLOCAL |
867 						    IPIF_ANYCAST))) {
868 							error = EINVAL;
869 							goto done;
870 						}
871 						ire->ire_flags |= RTF_SETSRC;
872 					} else {
873 						ire->ire_flags &= ~RTF_SETSRC;
874 					}
875 					ire->ire_src_addr = src_addr;
876 				}
877 				break;
878 			case AF_INET6:
879 				ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
880 				mutex_enter(&ire->ire_lock);
881 				if ((found_addrs & RTA_GATEWAY) != 0 &&
882 				    !IN6_ARE_ADDR_EQUAL(
883 				    &ire->ire_gateway_addr_v6, &gw_addr_v6)) {
884 					ire->ire_gateway_addr_v6 = gw_addr_v6;
885 				}
886 
887 				if (rtsap != NULL) {
888 					ga.ga_af = AF_INET6;
889 					ga.ga_addr = ire->ire_gateway_addr_v6;
890 
891 					gcgrp = gcgrp_lookup(&ga, B_TRUE);
892 					if (gcgrp == NULL) {
893 						error = ENOMEM;
894 						goto done;
895 					}
896 				}
897 
898 				if ((found_addrs & RTA_SRC) != 0 &&
899 				    (rtm->rtm_flags & RTF_SETSRC) != 0 &&
900 				    !IN6_ARE_ADDR_EQUAL(
901 					&ire->ire_src_addr_v6, &src_addr_v6)) {
902 
903 					if (!IN6_IS_ADDR_UNSPECIFIED(
904 					    &src_addr_v6)) {
905 						/*
906 						 * The RTF_SETSRC flag is
907 						 * present, check that the
908 						 * supplied src address is not
909 						 * the loopback address. This
910 						 * would produce martian
911 						 * packets.
912 						 */
913 						if (IN6_IS_ADDR_LOOPBACK(
914 						    &src_addr_v6)) {
915 							mutex_exit(
916 							    &ire->ire_lock);
917 							error = EINVAL;
918 							goto done;
919 						}
920 						/*
921 						 * Also check that the the
922 						 * supplied addr is a valid
923 						 * local address.
924 						 */
925 						tmp_ipif = ipif_lookup_addr_v6(
926 						    &src_addr_v6, NULL,
927 						    ALL_ZONES,
928 						    CONNP_TO_WQ(connp), ioc_mp,
929 						    ip_rts_request_retry,
930 						    &error);
931 						if (tmp_ipif == NULL) {
932 							mutex_exit(
933 							    &ire->ire_lock);
934 							error = (error ==
935 							    EINPROGRESS) ?
936 							    error :
937 							    EADDRNOTAVAIL;
938 							goto done;
939 						}
940 						if (!(tmp_ipif->ipif_flags &
941 						    IPIF_UP) ||
942 						    (tmp_ipif->ipif_flags &
943 						    (IPIF_NOLOCAL |
944 						    IPIF_ANYCAST))) {
945 							mutex_exit(
946 							    &ire->ire_lock);
947 							error = EINVAL;
948 							goto done;
949 						}
950 						ire->ire_flags |= RTF_SETSRC;
951 					} else {
952 						ire->ire_flags &= ~RTF_SETSRC;
953 					}
954 					ire->ire_src_addr_v6 = src_addr_v6;
955 				}
956 				mutex_exit(&ire->ire_lock);
957 				break;
958 			}
959 
960 			if (rtsap != NULL) {
961 				in_addr_t ga_addr4;
962 
963 				ASSERT(gcgrp != NULL);
964 
965 				/*
966 				 * Create and add the security attribute to
967 				 * prefix IRE; it will add a reference to the
968 				 * group upon allocating a new entry.  If it
969 				 * finds an already-existing entry for the
970 				 * security attribute, it simply returns it
971 				 * and no new group reference is made.
972 				 */
973 				gc = gc_create(rtsap, gcgrp, &gcgrp_xtraref);
974 				if (gc == NULL ||
975 				    (error = tsol_ire_init_gwattr(ire,
976 				    ire->ire_ipversion, gc, NULL)) != 0) {
977 					if (gc != NULL) {
978 						GC_REFRELE(gc);
979 					} else {
980 						/* gc_create failed */
981 						error = ENOMEM;
982 					}
983 					goto done;
984 				}
985 
986 				/*
987 				 * Now delete any existing gateway IRE caches
988 				 * as well as all caches using the gateway,
989 				 * and allow them to be created on demand
990 				 * through ip_newroute{_v6}.
991 				 */
992 				IN6_V4MAPPED_TO_IPADDR(&ga.ga_addr, ga_addr4);
993 				if (af == AF_INET) {
994 					ire_clookup_delete_cache_gw(
995 					    ga_addr4, ALL_ZONES);
996 				} else {
997 					ire_clookup_delete_cache_gw_v6(
998 					    &ga.ga_addr, ALL_ZONES);
999 				}
1000 			}
1001 			rts_setmetrics(ire, rtm->rtm_inits, &rtm->rtm_rmx);
1002 			break;
1003 		}
1004 		break;
1005 	default:
1006 		error = EOPNOTSUPP;
1007 		break;
1008 	}
1009 done:
1010 	if (ire != NULL)
1011 		ire_refrele(ire);
1012 	if (sire != NULL)
1013 		ire_refrele(sire);
1014 	if (ipif != NULL)
1015 		ipif_refrele(ipif);
1016 	if (src_ipif != NULL)
1017 		ipif_refrele(src_ipif);
1018 	if (tmp_ipif != NULL)
1019 		ipif_refrele(tmp_ipif);
1020 
1021 	if (gcgrp_xtraref)
1022 		GCGRP_REFRELE(gcgrp);
1023 
1024 	if (error == EINPROGRESS)
1025 		return (error);
1026 	if (rtm != NULL) {
1027 		ASSERT(mp->b_wptr <= mp->b_datap->db_lim);
1028 		if (error != 0) {
1029 			rtm->rtm_errno = error;
1030 			/* Send error ACK */
1031 			ip1dbg(("ip_rts_request: error %d\n", error));
1032 		} else {
1033 			rtm->rtm_flags |= RTF_DONE;
1034 			/* OK ACK already set up by caller except this */
1035 			ip2dbg(("ip_rts_request: OK ACK\n"));
1036 		}
1037 		rts_queue_input(mp, q, af);
1038 	}
1039 	iocp->ioc_error = error;
1040 	ioc_mp->b_datap->db_type = M_IOCACK;
1041 	if (iocp->ioc_error != 0)
1042 		iocp->ioc_count = 0;
1043 	qreply(q, ioc_mp);
1044 	/* conn was refheld in ip_wput_ioctl. */
1045 	CONN_OPER_PENDING_DONE(connp);
1046 
1047 	return (error);
1048 }
1049 
1050 /*
1051  * Build a reply to the RTM_GET request contained in the given message block
1052  * using the retrieved IRE of the destination address, the parent IRE (if it
1053  * exists) and the address family.
1054  *
1055  * Returns a pointer to a message block containing the reply if successful,
1056  * otherwise NULL is returned.
1057  */
1058 static mblk_t *
1059 rts_rtmget(mblk_t *mp, ire_t *ire, ire_t *sire, sa_family_t af)
1060 {
1061 	rt_msghdr_t	*rtm;
1062 	rt_msghdr_t	*new_rtm;
1063 	mblk_t		*new_mp;
1064 	int		rtm_addrs;
1065 	int		rtm_flags;
1066 	in6_addr_t	gw_addr_v6;
1067 	tsol_ire_gw_secattr_t *attrp = NULL;
1068 	tsol_gc_t	*gc = NULL;
1069 	tsol_gcgrp_t	*gcgrp = NULL;
1070 	int		sacnt = 0;
1071 
1072 	ASSERT(ire->ire_ipif != NULL);
1073 	rtm = (rt_msghdr_t *)mp->b_rptr;
1074 
1075 	if (sire != NULL && sire->ire_gw_secattr != NULL)
1076 		attrp = sire->ire_gw_secattr;
1077 	else if (ire->ire_gw_secattr != NULL)
1078 		attrp = ire->ire_gw_secattr;
1079 
1080 	if (attrp != NULL) {
1081 		mutex_enter(&attrp->igsa_lock);
1082 		if ((gc = attrp->igsa_gc) != NULL) {
1083 			gcgrp = gc->gc_grp;
1084 			ASSERT(gcgrp != NULL);
1085 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1086 			sacnt = 1;
1087 		} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {
1088 			rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1089 			gc = gcgrp->gcgrp_head;
1090 			sacnt = gcgrp->gcgrp_count;
1091 		}
1092 		mutex_exit(&attrp->igsa_lock);
1093 
1094 		/* do nothing if there's no gc to report */
1095 		if (gc == NULL) {
1096 			ASSERT(sacnt == 0);
1097 			if (gcgrp != NULL) {
1098 				/* we might as well drop the lock now */
1099 				rw_exit(&gcgrp->gcgrp_rwlock);
1100 				gcgrp = NULL;
1101 			}
1102 			attrp = NULL;
1103 		}
1104 
1105 		ASSERT(gc == NULL || (gcgrp != NULL &&
1106 		    RW_LOCK_HELD(&gcgrp->gcgrp_rwlock)));
1107 	}
1108 	ASSERT(sacnt == 0 || gc != NULL);
1109 
1110 	/*
1111 	 * Always return RTA_DST, RTA_GATEWAY and RTA_NETMASK.
1112 	 *
1113 	 * The 4.4BSD-Lite2 code (net/rtsock.c) returns both
1114 	 * RTA_IFP and RTA_IFA if either is defined, and also
1115 	 * returns RTA_BRD if the appropriate interface is
1116 	 * point-to-point.
1117 	 */
1118 	rtm_addrs = (RTA_DST | RTA_GATEWAY | RTA_NETMASK);
1119 	if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
1120 		rtm_addrs |= (RTA_IFP | RTA_IFA);
1121 		if (ire->ire_ipif->ipif_flags & IPIF_POINTOPOINT)
1122 			rtm_addrs |= RTA_BRD;
1123 	}
1124 
1125 	new_mp = rts_alloc_msg(RTM_GET, rtm_addrs, af, sacnt);
1126 	if (new_mp == NULL) {
1127 		if (gcgrp != NULL)
1128 			rw_exit(&gcgrp->gcgrp_rwlock);
1129 		return (NULL);
1130 	}
1131 
1132 	/*
1133 	 * We set the destination address, gateway address,
1134 	 * netmask and flags in the RTM_GET response depending
1135 	 * on whether we found a parent IRE or not.
1136 	 * In particular, if we did find a parent IRE during the
1137 	 * recursive search, use that IRE's gateway address.
1138 	 * Otherwise, we use the IRE's source address for the
1139 	 * gateway address.
1140 	 */
1141 	ASSERT(af == AF_INET || af == AF_INET6);
1142 	switch (af) {
1143 	case AF_INET:
1144 		if (sire == NULL) {
1145 			rtm_flags = ire->ire_flags;
1146 			rts_fill_msg(RTM_GET, rtm_addrs, ire->ire_addr,
1147 			    ire->ire_mask, ire->ire_src_addr, ire->ire_src_addr,
1148 			    ire->ire_ipif->ipif_pp_dst_addr, 0, ire->ire_ipif,
1149 			    new_mp, sacnt, gc);
1150 		} else {
1151 			if (sire->ire_flags & RTF_SETSRC)
1152 				rtm_addrs |= RTA_SRC;
1153 
1154 			rtm_flags = sire->ire_flags;
1155 			rts_fill_msg(RTM_GET, rtm_addrs, sire->ire_addr,
1156 			    sire->ire_mask, sire->ire_gateway_addr,
1157 			    (sire->ire_flags & RTF_SETSRC) ?
1158 				sire->ire_src_addr : ire->ire_src_addr,
1159 			    ire->ire_ipif->ipif_pp_dst_addr,
1160 			    0, ire->ire_ipif, new_mp, sacnt, gc);
1161 		}
1162 		break;
1163 	case AF_INET6:
1164 		if (sire == NULL) {
1165 			rtm_flags = ire->ire_flags;
1166 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &ire->ire_addr_v6,
1167 			    &ire->ire_mask_v6, &ire->ire_src_addr_v6,
1168 			    &ire->ire_src_addr_v6,
1169 			    &ire->ire_ipif->ipif_v6pp_dst_addr,
1170 			    &ipv6_all_zeros, ire->ire_ipif, new_mp,
1171 			    sacnt, gc);
1172 		} else {
1173 			if (sire->ire_flags & RTF_SETSRC)
1174 				rtm_addrs |= RTA_SRC;
1175 
1176 			rtm_flags = sire->ire_flags;
1177 			mutex_enter(&sire->ire_lock);
1178 			gw_addr_v6 = sire->ire_gateway_addr_v6;
1179 			mutex_exit(&sire->ire_lock);
1180 			rts_fill_msg_v6(RTM_GET, rtm_addrs, &sire->ire_addr_v6,
1181 			    &sire->ire_mask_v6, &gw_addr_v6,
1182 			    (sire->ire_flags & RTF_SETSRC) ?
1183 				&sire->ire_src_addr_v6 : &ire->ire_src_addr_v6,
1184 			    &ire->ire_ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1185 			    ire->ire_ipif, new_mp, sacnt, gc);
1186 		}
1187 		break;
1188 	}
1189 
1190 	if (gcgrp != NULL)
1191 		rw_exit(&gcgrp->gcgrp_rwlock);
1192 
1193 	new_rtm = (rt_msghdr_t *)new_mp->b_rptr;
1194 
1195 	/*
1196 	 * The rtm_msglen, rtm_version and rtm_type fields in
1197 	 * RTM_GET response are filled in by rts_fill_msg.
1198 	 *
1199 	 * rtm_addrs and rtm_flags are filled in based on what
1200 	 * was requested and the state of the IREs looked up
1201 	 * above.
1202 	 *
1203 	 * rtm_inits and rtm_rmx are filled in with metrics
1204 	 * based on whether a parent IRE was found or not.
1205 	 *
1206 	 * TODO: rtm_index and rtm_use should probably be
1207 	 * filled in with something resonable here and not just
1208 	 * copied from the request.
1209 	 */
1210 	new_rtm->rtm_index = rtm->rtm_index;
1211 	new_rtm->rtm_pid = rtm->rtm_pid;
1212 	new_rtm->rtm_seq = rtm->rtm_seq;
1213 	new_rtm->rtm_use = rtm->rtm_use;
1214 	new_rtm->rtm_addrs = rtm_addrs;
1215 	new_rtm->rtm_flags = rtm_flags;
1216 	if (sire == NULL)
1217 		new_rtm->rtm_inits = rts_getmetrics(ire, &new_rtm->rtm_rmx);
1218 	else
1219 		new_rtm->rtm_inits = rts_getmetrics(sire, &new_rtm->rtm_rmx);
1220 
1221 	return (new_mp);
1222 }
1223 
1224 /*
1225  * Fill the given if_data_t with interface statistics.
1226  */
1227 static void
1228 rts_getifdata(if_data_t *if_data, const ipif_t *ipif)
1229 {
1230 	if_data->ifi_type = ipif->ipif_type;	/* ethernet, tokenring, etc */
1231 	if_data->ifi_addrlen = 0;		/* media address length */
1232 	if_data->ifi_hdrlen = 0;		/* media header length */
1233 	if_data->ifi_mtu = ipif->ipif_mtu;	/* maximum transmission unit */
1234 	if_data->ifi_metric = ipif->ipif_metric; /* metric (external only) */
1235 	if_data->ifi_baudrate = 0;		/* linespeed */
1236 
1237 	if_data->ifi_ipackets = 0;		/* packets received on if */
1238 	if_data->ifi_ierrors = 0;		/* input errors on interface */
1239 	if_data->ifi_opackets = 0;		/* packets sent on interface */
1240 	if_data->ifi_oerrors = 0;		/* output errors on if */
1241 	if_data->ifi_collisions = 0;		/* collisions on csma if */
1242 	if_data->ifi_ibytes = 0;		/* total number received */
1243 	if_data->ifi_obytes = 0;		/* total number sent */
1244 	if_data->ifi_imcasts = 0;		/* multicast packets received */
1245 	if_data->ifi_omcasts = 0;		/* multicast packets sent */
1246 	if_data->ifi_iqdrops = 0;		/* dropped on input */
1247 	if_data->ifi_noproto = 0;		/* destined for unsupported */
1248 						/* protocol. */
1249 }
1250 
1251 /*
1252  * Set the metrics on a forwarding table route.
1253  */
1254 static void
1255 rts_setmetrics(ire_t *ire, uint_t which, rt_metrics_t *metrics)
1256 {
1257 	clock_t		rtt;
1258 	clock_t		rtt_sd;
1259 	ipif_t		*ipif;
1260 	ifrt_t		*ifrt;
1261 	mblk_t		*mp;
1262 	in6_addr_t	gw_addr_v6;
1263 
1264 	/*
1265 	 * Bypass obtaining the lock and searching ipif_saved_ire_mp in the
1266 	 * common case of no metrics.
1267 	 */
1268 	if (which == 0)
1269 		return;
1270 	ire->ire_uinfo.iulp_set = B_TRUE;
1271 
1272 	/*
1273 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1274 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1275 	 * microseconds.
1276 	 */
1277 	if (which & RTV_RTT)
1278 		rtt = metrics->rmx_rtt / 1000;
1279 	if (which & RTV_RTTVAR)
1280 		rtt_sd = metrics->rmx_rttvar / 1000;
1281 
1282 	/*
1283 	 * Update the metrics in the IRE itself.
1284 	 */
1285 	mutex_enter(&ire->ire_lock);
1286 	if (which & RTV_MTU)
1287 		ire->ire_max_frag = metrics->rmx_mtu;
1288 	if (which & RTV_RTT)
1289 		ire->ire_uinfo.iulp_rtt = rtt;
1290 	if (which & RTV_SSTHRESH)
1291 		ire->ire_uinfo.iulp_ssthresh = metrics->rmx_ssthresh;
1292 	if (which & RTV_RTTVAR)
1293 		ire->ire_uinfo.iulp_rtt_sd = rtt_sd;
1294 	if (which & RTV_SPIPE)
1295 		ire->ire_uinfo.iulp_spipe = metrics->rmx_sendpipe;
1296 	if (which & RTV_RPIPE)
1297 		ire->ire_uinfo.iulp_rpipe = metrics->rmx_recvpipe;
1298 	mutex_exit(&ire->ire_lock);
1299 
1300 	/*
1301 	 * Search through the ifrt_t chain hanging off the IPIF in order to
1302 	 * reflect the metric change there.
1303 	 */
1304 	ipif = ire->ire_ipif;
1305 	if (ipif == NULL)
1306 		return;
1307 	ASSERT((ipif->ipif_isv6 && ire->ire_ipversion == IPV6_VERSION) ||
1308 	    ((!ipif->ipif_isv6 && ire->ire_ipversion == IPV4_VERSION)));
1309 	if (ipif->ipif_isv6) {
1310 		mutex_enter(&ire->ire_lock);
1311 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1312 		mutex_exit(&ire->ire_lock);
1313 	}
1314 	mutex_enter(&ipif->ipif_saved_ire_lock);
1315 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1316 		/*
1317 		 * On a given ipif, the triple of address, gateway and mask is
1318 		 * unique for each saved IRE (in the case of ordinary interface
1319 		 * routes, the gateway address is all-zeroes).
1320 		 */
1321 		ifrt = (ifrt_t *)mp->b_rptr;
1322 		if (ipif->ipif_isv6) {
1323 			if (!IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1324 			    &ire->ire_addr_v6) ||
1325 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1326 			    &gw_addr_v6) ||
1327 			    !IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1328 			    &ire->ire_mask_v6))
1329 				continue;
1330 		} else {
1331 			if (ifrt->ifrt_addr != ire->ire_addr ||
1332 			    ifrt->ifrt_gateway_addr != ire->ire_gateway_addr ||
1333 			    ifrt->ifrt_mask != ire->ire_mask)
1334 				continue;
1335 		}
1336 		if (which & RTV_MTU)
1337 			ifrt->ifrt_max_frag = metrics->rmx_mtu;
1338 		if (which & RTV_RTT)
1339 			ifrt->ifrt_iulp_info.iulp_rtt = rtt;
1340 		if (which & RTV_SSTHRESH) {
1341 			ifrt->ifrt_iulp_info.iulp_ssthresh =
1342 			    metrics->rmx_ssthresh;
1343 		}
1344 		if (which & RTV_RTTVAR)
1345 			ifrt->ifrt_iulp_info.iulp_rtt_sd = metrics->rmx_rttvar;
1346 		if (which & RTV_SPIPE)
1347 			ifrt->ifrt_iulp_info.iulp_spipe = metrics->rmx_sendpipe;
1348 		if (which & RTV_RPIPE)
1349 			ifrt->ifrt_iulp_info.iulp_rpipe = metrics->rmx_recvpipe;
1350 		break;
1351 	}
1352 	mutex_exit(&ipif->ipif_saved_ire_lock);
1353 }
1354 
1355 /*
1356  * Get the metrics from a forwarding table route.
1357  */
1358 static int
1359 rts_getmetrics(ire_t *ire, rt_metrics_t *metrics)
1360 {
1361 	int	metrics_set = 0;
1362 
1363 	bzero(metrics, sizeof (rt_metrics_t));
1364 	/*
1365 	 * iulp_rtt and iulp_rtt_sd are in milliseconds, but 4.4BSD-Lite2's
1366 	 * <net/route.h> says: rmx_rtt and rmx_rttvar are stored as
1367 	 * microseconds.
1368 	 */
1369 	metrics->rmx_rtt = ire->ire_uinfo.iulp_rtt * 1000;
1370 	metrics_set |= RTV_RTT;
1371 	metrics->rmx_mtu = ire->ire_max_frag;
1372 	metrics_set |= RTV_MTU;
1373 	metrics->rmx_ssthresh = ire->ire_uinfo.iulp_ssthresh;
1374 	metrics_set |= RTV_SSTHRESH;
1375 	metrics->rmx_rttvar = ire->ire_uinfo.iulp_rtt_sd * 1000;
1376 	metrics_set |= RTV_RTTVAR;
1377 	metrics->rmx_sendpipe = ire->ire_uinfo.iulp_spipe;
1378 	metrics_set |= RTV_SPIPE;
1379 	metrics->rmx_recvpipe = ire->ire_uinfo.iulp_rpipe;
1380 	metrics_set |= RTV_RPIPE;
1381 	return (metrics_set);
1382 }
1383 
1384 /*
1385  * Takes a pointer to a routing message and extracts necessary info by looking
1386  * at the rtm->rtm_addrs bits and store the requested sockaddrs in the pointers
1387  * passed (all of which must be valid).
1388  *
1389  * The bitmask of sockaddrs actually found in the message is returned, or zero
1390  * is returned in the case of an error.
1391  */
1392 static int
1393 rts_getaddrs(rt_msghdr_t *rtm, in6_addr_t *dst_addrp, in6_addr_t *gw_addrp,
1394     in6_addr_t *net_maskp, in6_addr_t *authorp, in6_addr_t *if_addrp,
1395     in6_addr_t *in_src_addrp, ushort_t *indexp, ushort_t *src_indexp,
1396     sa_family_t *afp, tsol_rtsecattr_t *rtsecattr, int *error)
1397 {
1398 	struct sockaddr *sa;
1399 	int	i;
1400 	int	addr_bits;
1401 	int	length;
1402 	int	found_addrs = 0;
1403 	caddr_t	cp;
1404 	size_t	size;
1405 	struct sockaddr_dl *sdl;
1406 
1407 	*dst_addrp = ipv6_all_zeros;
1408 	*gw_addrp = ipv6_all_zeros;
1409 	*net_maskp = ipv6_all_zeros;
1410 	*authorp = ipv6_all_zeros;
1411 	*if_addrp = ipv6_all_zeros;
1412 	*in_src_addrp = ipv6_all_zeros;
1413 	*indexp = 0;
1414 	*src_indexp = 0;
1415 	*afp = AF_UNSPEC;
1416 	rtsecattr->rtsa_cnt = 0;
1417 	*error = 0;
1418 
1419 	/*
1420 	 * At present we handle only RTA_DST, RTA_GATEWAY, RTA_NETMASK, RTA_IFP,
1421 	 * RTA_IFA and RTA_AUTHOR.  The rest will be added as we need them.
1422 	 */
1423 	cp = (caddr_t)&rtm[1];
1424 	length = rtm->rtm_msglen;
1425 	for (i = 0; (i < RTA_NUMBITS) && ((cp - (caddr_t)rtm) < length); i++) {
1426 		/*
1427 		 * The address family we are working with starts out as
1428 		 * AF_UNSPEC, but is set to the one specified with the
1429 		 * destination address.
1430 		 *
1431 		 * If the "working" address family that has been set to
1432 		 * something other than AF_UNSPEC, then the address family of
1433 		 * subsequent sockaddrs must either be AF_UNSPEC (for
1434 		 * compatibility with older programs) or must be the same as our
1435 		 * "working" one.
1436 		 *
1437 		 * This code assumes that RTA_DST (1) comes first in the loop.
1438 		 */
1439 		sa = (struct sockaddr *)cp;
1440 		addr_bits = (rtm->rtm_addrs & (1 << i));
1441 		if (addr_bits == 0)
1442 			continue;
1443 		switch (addr_bits) {
1444 		case RTA_DST:
1445 			size = rts_copyfromsockaddr(sa, dst_addrp);
1446 			*afp = sa->sa_family;
1447 			break;
1448 		case RTA_GATEWAY:
1449 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1450 				return (0);
1451 			size = rts_copyfromsockaddr(sa, gw_addrp);
1452 			break;
1453 		case RTA_NETMASK:
1454 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1455 				return (0);
1456 			size = rts_copyfromsockaddr(sa, net_maskp);
1457 			break;
1458 		case RTA_IFP:
1459 			if (sa->sa_family != AF_LINK &&
1460 			    sa->sa_family != AF_UNSPEC)
1461 				return (0);
1462 			sdl = (struct sockaddr_dl *)cp;
1463 			*indexp = sdl->sdl_index;
1464 			size = sizeof (struct sockaddr_dl);
1465 			break;
1466 		case RTA_SRC:
1467 			/* Source address of the incoming packet */
1468 			size = rts_copyfromsockaddr(sa, in_src_addrp);
1469 			*afp = sa->sa_family;
1470 			break;
1471 		case RTA_SRCIFP:
1472 			/* Return incoming interface index pointer */
1473 			if (sa->sa_family != AF_LINK &&
1474 			    sa->sa_family != AF_UNSPEC)
1475 				return (0);
1476 			sdl = (struct sockaddr_dl *)cp;
1477 			*src_indexp = sdl->sdl_index;
1478 			size = sizeof (struct sockaddr_dl);
1479 			break;
1480 		case RTA_IFA:
1481 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1482 				return (0);
1483 			size = rts_copyfromsockaddr(sa, if_addrp);
1484 			break;
1485 		case RTA_AUTHOR:
1486 			if (sa->sa_family != *afp && sa->sa_family != AF_UNSPEC)
1487 				return (0);
1488 			size = rts_copyfromsockaddr(sa, authorp);
1489 			break;
1490 		default:
1491 			return (0);
1492 		}
1493 		if (size == 0)
1494 			return (0);
1495 		cp += size;
1496 		found_addrs |= addr_bits;
1497 	}
1498 
1499 	/*
1500 	 * Parse the routing message and look for any security-
1501 	 * related attributes for the route.  For each valid
1502 	 * attribute, allocate/obtain the corresponding kernel
1503 	 * route security attributes.
1504 	 */
1505 	*error = tsol_rtsa_init(rtm, rtsecattr, cp);
1506 	ASSERT(rtsecattr->rtsa_cnt <= TSOL_RTSA_REQUEST_MAX);
1507 
1508 	return (found_addrs);
1509 }
1510 
1511 /*
1512  * Fills the message with the given info.
1513  */
1514 static void
1515 rts_fill_msg(int type, int rtm_addrs, ipaddr_t dst, ipaddr_t mask,
1516     ipaddr_t gateway, ipaddr_t src_addr, ipaddr_t brd_addr, ipaddr_t author,
1517     const ipif_t *ipif, mblk_t *mp, uint_t sacnt, const tsol_gc_t *gc)
1518 {
1519 	rt_msghdr_t	*rtm;
1520 	sin_t		*sin;
1521 	size_t		data_size, header_size;
1522 	uchar_t		*cp;
1523 	int		i;
1524 
1525 	ASSERT(mp != NULL);
1526 	ASSERT(sacnt == 0 || gc != NULL);
1527 	/*
1528 	 * First find the type of the message
1529 	 * and its length.
1530 	 */
1531 	header_size = rts_header_msg_size(type);
1532 	/*
1533 	 * Now find the size of the data
1534 	 * that follows the message header.
1535 	 */
1536 	data_size = rts_data_msg_size(rtm_addrs, AF_INET, sacnt);
1537 
1538 	rtm = (rt_msghdr_t *)mp->b_rptr;
1539 	mp->b_wptr = &mp->b_rptr[header_size];
1540 	cp = mp->b_wptr;
1541 	bzero(cp, data_size);
1542 	for (i = 0; i < RTA_NUMBITS; i++) {
1543 		sin = (sin_t *)cp;
1544 		switch (rtm_addrs & (1 << i)) {
1545 		case RTA_DST:
1546 			sin->sin_addr.s_addr = dst;
1547 			sin->sin_family = AF_INET;
1548 			cp += sizeof (sin_t);
1549 			break;
1550 		case RTA_GATEWAY:
1551 			sin->sin_addr.s_addr = gateway;
1552 			sin->sin_family = AF_INET;
1553 			cp += sizeof (sin_t);
1554 			break;
1555 		case RTA_NETMASK:
1556 			sin->sin_addr.s_addr = mask;
1557 			sin->sin_family = AF_INET;
1558 			cp += sizeof (sin_t);
1559 			break;
1560 		case RTA_IFP:
1561 			cp += ill_dls_info((struct sockaddr_dl *)cp, ipif);
1562 			break;
1563 		case RTA_SRCIFP:
1564 			/*
1565 			 * RTA_SRCIFP is not yet supported
1566 			 * for RTM_GET and RTM_CHANGE
1567 			 */
1568 			break;
1569 		case RTA_IFA:
1570 		case RTA_SRC:
1571 			sin->sin_addr.s_addr = src_addr;
1572 			sin->sin_family = AF_INET;
1573 			cp += sizeof (sin_t);
1574 			break;
1575 		case RTA_AUTHOR:
1576 			sin->sin_addr.s_addr = author;
1577 			sin->sin_family = AF_INET;
1578 			cp += sizeof (sin_t);
1579 			break;
1580 		case RTA_BRD:
1581 			/*
1582 			 * RTA_BRD is used typically to specify a point-to-point
1583 			 * destination address.
1584 			 */
1585 			sin->sin_addr.s_addr = brd_addr;
1586 			sin->sin_family = AF_INET;
1587 			cp += sizeof (sin_t);
1588 			break;
1589 		}
1590 	}
1591 
1592 	if (gc != NULL) {
1593 		rtm_ext_t *rtm_ext;
1594 		struct rtsa_s *rp_dst;
1595 		tsol_rtsecattr_t *rsap;
1596 		int i;
1597 
1598 		ASSERT(gc->gc_grp != NULL);
1599 		ASSERT(RW_LOCK_HELD(&gc->gc_grp->gcgrp_rwlock));
1600 		ASSERT(sacnt > 0);
1601 
1602 		rtm_ext = (rtm_ext_t *)cp;
1603 		rtm_ext->rtmex_type = RTMEX_GATEWAY_SECATTR;
1604 		rtm_ext->rtmex_len = TSOL_RTSECATTR_SIZE(sacnt);
1605 
1606 		rsap = (tsol_rtsecattr_t *)(rtm_ext + 1);
1607 		rsap->rtsa_cnt = sacnt;
1608 		rp_dst = rsap->rtsa_attr;
1609 
1610 		for (i = 0; i < sacnt; i++, gc = gc->gc_next, rp_dst++) {
1611 			ASSERT(gc->gc_db != NULL);
1612 			bcopy(&gc->gc_db->gcdb_attr, rp_dst, sizeof (*rp_dst));
1613 		}
1614 		cp = (uchar_t *)rp_dst;
1615 	}
1616 
1617 	mp->b_wptr = cp;
1618 	mp->b_cont = NULL;
1619 	/*
1620 	 * set the fields that are common to
1621 	 * to different messages.
1622 	 */
1623 	rtm->rtm_msglen = (short)(header_size + data_size);
1624 	rtm->rtm_version = RTM_VERSION;
1625 	rtm->rtm_type = (uchar_t)type;
1626 }
1627 
1628 /*
1629  * Allocates and initializes a routing socket message.
1630  */
1631 mblk_t *
1632 rts_alloc_msg(int type, int rtm_addrs, sa_family_t af, uint_t sacnt)
1633 {
1634 	size_t	length;
1635 	mblk_t	*mp;
1636 
1637 	length = RTS_MSG_SIZE(type, rtm_addrs, af, sacnt);
1638 	mp = allocb(length, BPRI_MED);
1639 	if (mp == NULL)
1640 		return (mp);
1641 	bzero(mp->b_rptr, length);
1642 	return (mp);
1643 }
1644 
1645 /*
1646  * Returns the size of the routing
1647  * socket message header size.
1648  */
1649 size_t
1650 rts_header_msg_size(int type)
1651 {
1652 	switch (type) {
1653 	case RTM_DELADDR:
1654 	case RTM_NEWADDR:
1655 		return (sizeof (ifa_msghdr_t));
1656 	case RTM_IFINFO:
1657 		return (sizeof (if_msghdr_t));
1658 	default:
1659 		return (sizeof (rt_msghdr_t));
1660 	}
1661 }
1662 
1663 /*
1664  * Returns the size of the message needed with the given rtm_addrs and family.
1665  *
1666  * It is assumed that all of the sockaddrs (with the exception of RTA_IFP) are
1667  * of the same family (currently either AF_INET or AF_INET6).
1668  */
1669 size_t
1670 rts_data_msg_size(int rtm_addrs, sa_family_t af, uint_t sacnt)
1671 {
1672 	int	i;
1673 	size_t	length = 0;
1674 
1675 	for (i = 0; i < RTA_NUMBITS; i++) {
1676 		switch (rtm_addrs & (1 << i)) {
1677 		case RTA_IFP:
1678 			length += sizeof (struct sockaddr_dl);
1679 			break;
1680 		case RTA_DST:
1681 		case RTA_GATEWAY:
1682 		case RTA_NETMASK:
1683 		case RTA_SRC:
1684 		case RTA_SRCIFP:
1685 		case RTA_IFA:
1686 		case RTA_AUTHOR:
1687 		case RTA_BRD:
1688 			ASSERT(af == AF_INET || af == AF_INET6);
1689 			switch (af) {
1690 			case AF_INET:
1691 				length += sizeof (sin_t);
1692 				break;
1693 			case AF_INET6:
1694 				length += sizeof (sin6_t);
1695 				break;
1696 			}
1697 			break;
1698 		}
1699 	}
1700 	if (sacnt > 0)
1701 		length += sizeof (rtm_ext_t) + TSOL_RTSECATTR_SIZE(sacnt);
1702 
1703 	return (length);
1704 }
1705 
1706 /*
1707  * This routine is called to generate a message to the routing
1708  * socket indicating that a redirect has occured, a routing lookup
1709  * has failed, or that a protocol has detected timeouts to a particular
1710  * destination. This routine is called for message types RTM_LOSING,
1711  * RTM_REDIRECT, and RTM_MISS.
1712  */
1713 void
1714 ip_rts_change(int type, ipaddr_t dst_addr, ipaddr_t gw_addr, ipaddr_t net_mask,
1715     ipaddr_t source, ipaddr_t author, int flags, int error, int rtm_addrs)
1716 {
1717 	rt_msghdr_t	*rtm;
1718 	mblk_t		*mp;
1719 
1720 	if (rtm_addrs == 0)
1721 		return;
1722 	mp = rts_alloc_msg(type, rtm_addrs, AF_INET, 0);
1723 	if (mp == NULL)
1724 		return;
1725 	rts_fill_msg(type, rtm_addrs, dst_addr, net_mask, gw_addr, source, 0,
1726 	    author, NULL, mp, 0, NULL);
1727 	rtm = (rt_msghdr_t *)mp->b_rptr;
1728 	rtm->rtm_flags = flags;
1729 	rtm->rtm_errno = error;
1730 	rtm->rtm_flags |= RTF_DONE;
1731 	rtm->rtm_addrs = rtm_addrs;
1732 	rts_queue_input(mp, NULL, AF_INET);
1733 }
1734 
1735 /*
1736  * This routine is called to generate a message to the routing
1737  * socket indicating that the status of a network interface has changed.
1738  * Message type generated RTM_IFINFO.
1739  */
1740 void
1741 ip_rts_ifmsg(const ipif_t *ipif)
1742 {
1743 	if_msghdr_t	*ifm;
1744 	mblk_t		*mp;
1745 	sa_family_t	af;
1746 
1747 	/*
1748 	 * This message should be generated only
1749 	 * when the physical device is changing
1750 	 * state.
1751 	 */
1752 	if (ipif->ipif_id != 0)
1753 		return;
1754 	if (ipif->ipif_isv6) {
1755 		af = AF_INET6;
1756 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1757 		if (mp == NULL)
1758 			return;
1759 		rts_fill_msg_v6(RTM_IFINFO, RTA_IFP, &ipv6_all_zeros,
1760 		    &ipv6_all_zeros, &ipv6_all_zeros, &ipv6_all_zeros,
1761 		    &ipv6_all_zeros, &ipv6_all_zeros, ipif, mp, 0, NULL);
1762 	} else {
1763 		af = AF_INET;
1764 		mp = rts_alloc_msg(RTM_IFINFO, RTA_IFP, af, 0);
1765 		if (mp == NULL)
1766 			return;
1767 		rts_fill_msg(RTM_IFINFO, RTA_IFP, 0, 0, 0, 0, 0, 0, ipif, mp,
1768 		    0, NULL);
1769 	}
1770 	ifm = (if_msghdr_t *)mp->b_rptr;
1771 	ifm->ifm_index = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1772 	ifm->ifm_flags = ipif->ipif_flags | ipif->ipif_ill->ill_flags |
1773 	    ipif->ipif_ill->ill_phyint->phyint_flags;
1774 	rts_getifdata(&ifm->ifm_data, ipif);
1775 	ifm->ifm_addrs = RTA_IFP;
1776 	rts_queue_input(mp, NULL, af);
1777 }
1778 
1779 /*
1780  * This is called to generate messages to the routing socket
1781  * indicating a network interface has had addresses associated with it.
1782  * The structure of the code is based on the 4.4BSD-Lite2 <net/rtsock.c>.
1783  */
1784 void
1785 ip_rts_newaddrmsg(int cmd, int error, const ipif_t *ipif)
1786 {
1787 	int		pass;
1788 	int		ncmd;
1789 	int		rtm_addrs;
1790 	mblk_t		*mp;
1791 	ifa_msghdr_t	*ifam;
1792 	rt_msghdr_t	*rtm;
1793 	sa_family_t	af;
1794 
1795 	if (ipif->ipif_isv6)
1796 		af = AF_INET6;
1797 	else
1798 		af = AF_INET;
1799 	/*
1800 	 * If the request is DELETE, send RTM_DELETE and RTM_DELADDR.
1801 	 * if the request is ADD, send RTM_NEWADDR and RTM_ADD.
1802 	 */
1803 	for (pass = 1; pass < 3; pass++) {
1804 		if ((cmd == RTM_ADD && pass == 1) ||
1805 		    (cmd == RTM_DELETE && pass == 2)) {
1806 			ncmd = ((cmd == RTM_ADD) ? RTM_NEWADDR : RTM_DELADDR);
1807 
1808 			rtm_addrs = (RTA_IFA | RTA_NETMASK | RTA_BRD);
1809 			mp = rts_alloc_msg(ncmd, rtm_addrs, af, 0);
1810 			if (mp == NULL)
1811 				continue;
1812 			switch (af) {
1813 			case AF_INET:
1814 				rts_fill_msg(ncmd, rtm_addrs, 0,
1815 				    ipif->ipif_net_mask, 0, ipif->ipif_lcl_addr,
1816 				    ipif->ipif_pp_dst_addr, 0, NULL, mp,
1817 				    0, NULL);
1818 				break;
1819 			case AF_INET6:
1820 				rts_fill_msg_v6(ncmd, rtm_addrs,
1821 				    &ipv6_all_zeros, &ipif->ipif_v6net_mask,
1822 				    &ipv6_all_zeros, &ipif->ipif_v6lcl_addr,
1823 				    &ipif->ipif_v6pp_dst_addr, &ipv6_all_zeros,
1824 				    NULL, mp, 0, NULL);
1825 				break;
1826 			}
1827 			ifam = (ifa_msghdr_t *)mp->b_rptr;
1828 			ifam->ifam_index =
1829 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1830 			ifam->ifam_metric = ipif->ipif_metric;
1831 			ifam->ifam_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1832 			ifam->ifam_addrs = rtm_addrs;
1833 			rts_queue_input(mp, NULL, af);
1834 		}
1835 		if ((cmd == RTM_ADD && pass == 2) ||
1836 		    (cmd == RTM_DELETE && pass == 1)) {
1837 			rtm_addrs = (RTA_DST | RTA_NETMASK);
1838 			mp = rts_alloc_msg(cmd, rtm_addrs, af, 0);
1839 			if (mp == NULL)
1840 				continue;
1841 			switch (af) {
1842 			case AF_INET:
1843 				rts_fill_msg(cmd, rtm_addrs,
1844 				    ipif->ipif_lcl_addr, ipif->ipif_net_mask, 0,
1845 				    0, 0, 0, NULL, mp, 0, NULL);
1846 				break;
1847 			case AF_INET6:
1848 				rts_fill_msg_v6(cmd, rtm_addrs,
1849 				    &ipif->ipif_v6lcl_addr,
1850 				    &ipif->ipif_v6net_mask, &ipv6_all_zeros,
1851 				    &ipv6_all_zeros, &ipv6_all_zeros,
1852 				    &ipv6_all_zeros, NULL, mp, 0, NULL);
1853 				break;
1854 			}
1855 			rtm = (rt_msghdr_t *)mp->b_rptr;
1856 			rtm->rtm_index =
1857 			    ipif->ipif_ill->ill_phyint->phyint_ifindex;
1858 			rtm->rtm_flags = ((cmd == RTM_ADD) ? RTF_UP : 0);
1859 			rtm->rtm_errno = error;
1860 			if (error == 0)
1861 				rtm->rtm_flags |= RTF_DONE;
1862 			rtm->rtm_addrs = rtm_addrs;
1863 			rts_queue_input(mp, NULL, af);
1864 		}
1865 	}
1866 }
1867 
1868 /*
1869  * Based on the address family specified in a sockaddr, copy the address field
1870  * into an in6_addr_t.
1871  *
1872  * In the case of AF_UNSPEC, we assume the family is actually AF_INET for
1873  * compatibility with programs that leave the family cleared in the sockaddr.
1874  * Callers of rts_copyfromsockaddr should check the family themselves if they
1875  * wish to verify its value.
1876  *
1877  * In the case of AF_INET6, a check is made to ensure that address is not an
1878  * IPv4-mapped address.
1879  */
1880 size_t
1881 rts_copyfromsockaddr(struct sockaddr *sa, in6_addr_t *addrp)
1882 {
1883 	switch (sa->sa_family) {
1884 	case AF_INET:
1885 	case AF_UNSPEC:
1886 		IN6_IPADDR_TO_V4MAPPED(((sin_t *)sa)->sin_addr.s_addr, addrp);
1887 		return (sizeof (sin_t));
1888 	case AF_INET6:
1889 		*addrp = ((sin6_t *)sa)->sin6_addr;
1890 		if (IN6_IS_ADDR_V4MAPPED(addrp))
1891 			return (0);
1892 		return (sizeof (sin6_t));
1893 	default:
1894 		return (0);
1895 	}
1896 }
1897