xref: /titanic_52/usr/src/uts/common/inet/ip/ip6_if.c (revision 2ae51e795e518fd8980f736920a1a38dd17b3ad6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 1990 Mentat Inc.
27  */
28 
29 /*
30  * This file contains the interface control functions for IPv6.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/sysmacros.h>
35 #include <sys/stream.h>
36 #include <sys/dlpi.h>
37 #include <sys/stropts.h>
38 #include <sys/ddi.h>
39 #include <sys/cmn_err.h>
40 #include <sys/kstat.h>
41 #include <sys/debug.h>
42 #include <sys/zone.h>
43 #include <sys/policy.h>
44 
45 #include <sys/systm.h>
46 #include <sys/param.h>
47 #include <sys/socket.h>
48 #include <sys/isa_defs.h>
49 #include <net/if.h>
50 #include <net/if_dl.h>
51 #include <net/route.h>
52 #include <netinet/in.h>
53 #include <netinet/igmp_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet/icmp6.h>
56 #include <netinet/in.h>
57 
58 #include <inet/common.h>
59 #include <inet/nd.h>
60 #include <inet/mib2.h>
61 #include <inet/ip.h>
62 #include <inet/ip6.h>
63 #include <inet/ip_multi.h>
64 #include <inet/ip_ire.h>
65 #include <inet/ip_rts.h>
66 #include <inet/ip_ndp.h>
67 #include <inet/ip_if.h>
68 #include <inet/ip6_asp.h>
69 #include <inet/tun.h>
70 #include <inet/ipclassifier.h>
71 #include <inet/sctp_ip.h>
72 
73 #include <sys/tsol/tndb.h>
74 #include <sys/tsol/tnet.h>
75 
76 static in6_addr_t	ipv6_ll_template =
77 			{(uint32_t)V6_LINKLOCAL, 0x0, 0x0, 0x0};
78 
79 static ipif_t *
80 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst,
81     queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst);
82 
83 /*
84  * These two functions, ipif_lookup_group_v6() and ill_lookup_group_v6(),
85  * are called when an application does not specify an interface to be
86  * used for multicast traffic.  It calls ire_lookup_multi_v6() to look
87  * for an interface route for the specified multicast group.  Doing
88  * this allows the administrator to add prefix routes for multicast to
89  * indicate which interface to be used for multicast traffic in the above
90  * scenario.  The route could be for all multicast (ff00::/8), for a single
91  * multicast group (a /128 route) or anything in between.  If there is no
92  * such multicast route, we just find any multicast capable interface and
93  * return it.
94  */
95 ipif_t *
96 ipif_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst)
97 {
98 	ire_t	*ire;
99 	ipif_t	*ipif;
100 
101 	ire = ire_lookup_multi_v6(group, zoneid, ipst);
102 	if (ire != NULL) {
103 		ipif = ire->ire_ipif;
104 		ipif_refhold(ipif);
105 		ire_refrele(ire);
106 		return (ipif);
107 	}
108 
109 	return (ipif_lookup_multicast(ipst, zoneid, B_TRUE));
110 }
111 
112 ill_t *
113 ill_lookup_group_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst)
114 {
115 	ire_t	*ire;
116 	ill_t	*ill;
117 	ipif_t	*ipif;
118 
119 	ire = ire_lookup_multi_v6(group, zoneid, ipst);
120 	if (ire != NULL) {
121 		ill = ire->ire_ipif->ipif_ill;
122 		ill_refhold(ill);
123 		ire_refrele(ire);
124 		return (ill);
125 	}
126 
127 	ipif = ipif_lookup_multicast(ipst, zoneid, B_TRUE);
128 	if (ipif == NULL)
129 		return (NULL);
130 
131 	ill = ipif->ipif_ill;
132 	ill_refhold(ill);
133 	ipif_refrele(ipif);
134 	return (ill);
135 }
136 
137 /*
138  * Look for an ipif with the specified interface address and destination.
139  * The destination address is used only for matching point-to-point interfaces.
140  */
141 static ipif_t *
142 ipif_lookup_interface_v6(const in6_addr_t *if_addr, const in6_addr_t *dst,
143     queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst)
144 {
145 	ipif_t	*ipif;
146 	ill_t	*ill;
147 	ipsq_t	*ipsq;
148 	ill_walk_context_t ctx;
149 
150 	if (error != NULL)
151 		*error = 0;
152 
153 	/*
154 	 * First match all the point-to-point interfaces
155 	 * before looking at non-point-to-point interfaces.
156 	 * This is done to avoid returning non-point-to-point
157 	 * ipif instead of unnumbered point-to-point ipif.
158 	 */
159 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
160 	ill = ILL_START_WALK_V6(&ctx, ipst);
161 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
162 		GRAB_CONN_LOCK(q);
163 		mutex_enter(&ill->ill_lock);
164 		for (ipif = ill->ill_ipif; ipif != NULL;
165 		    ipif = ipif->ipif_next) {
166 			/* Allow the ipif to be down */
167 			if ((ipif->ipif_flags & IPIF_POINTOPOINT) &&
168 			    (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
169 			    if_addr)) &&
170 			    (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr,
171 			    dst))) {
172 				if (IPIF_CAN_LOOKUP(ipif)) {
173 					ipif_refhold_locked(ipif);
174 					mutex_exit(&ill->ill_lock);
175 					RELEASE_CONN_LOCK(q);
176 					rw_exit(&ipst->ips_ill_g_lock);
177 					return (ipif);
178 				} else if (IPIF_CAN_WAIT(ipif, q)) {
179 					ipsq = ill->ill_phyint->phyint_ipsq;
180 					mutex_enter(&ipsq->ipsq_lock);
181 					mutex_exit(&ill->ill_lock);
182 					rw_exit(&ipst->ips_ill_g_lock);
183 					ipsq_enq(ipsq, q, mp, func, NEW_OP,
184 					    ill);
185 					mutex_exit(&ipsq->ipsq_lock);
186 					RELEASE_CONN_LOCK(q);
187 					if (error != NULL)
188 						*error = EINPROGRESS;
189 					return (NULL);
190 				}
191 			}
192 		}
193 		mutex_exit(&ill->ill_lock);
194 		RELEASE_CONN_LOCK(q);
195 	}
196 	rw_exit(&ipst->ips_ill_g_lock);
197 	/* lookup the ipif based on interface address */
198 	ipif = ipif_lookup_addr_v6(if_addr, NULL, ALL_ZONES, q, mp, func,
199 	    error, ipst);
200 	ASSERT(ipif == NULL || ipif->ipif_isv6);
201 	return (ipif);
202 }
203 
204 /*
205  * Look for an ipif with the specified address. For point-point links
206  * we look for matches on either the destination address and the local
207  * address, but we ignore the check on the local address if IPIF_UNNUMBERED
208  * is set.
209  * Matches on a specific ill if match_ill is set.
210  */
211 /* ARGSUSED */
212 ipif_t *
213 ipif_lookup_addr_v6(const in6_addr_t *addr, ill_t *match_ill, zoneid_t zoneid,
214     queue_t *q, mblk_t *mp, ipsq_func_t func, int *error, ip_stack_t *ipst)
215 {
216 	ipif_t	*ipif;
217 	ill_t	*ill;
218 	boolean_t  ptp = B_FALSE;
219 	ipsq_t	*ipsq;
220 	ill_walk_context_t ctx;
221 
222 	if (error != NULL)
223 		*error = 0;
224 
225 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
226 	/*
227 	 * Repeat twice, first based on local addresses and
228 	 * next time for pointopoint.
229 	 */
230 repeat:
231 	ill = ILL_START_WALK_V6(&ctx, ipst);
232 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
233 		if (match_ill != NULL && ill != match_ill) {
234 			continue;
235 		}
236 		GRAB_CONN_LOCK(q);
237 		mutex_enter(&ill->ill_lock);
238 		for (ipif = ill->ill_ipif; ipif != NULL;
239 		    ipif = ipif->ipif_next) {
240 			if (zoneid != ALL_ZONES &&
241 			    ipif->ipif_zoneid != zoneid &&
242 			    ipif->ipif_zoneid != ALL_ZONES)
243 				continue;
244 			/* Allow the ipif to be down */
245 			if ((!ptp && (IN6_ARE_ADDR_EQUAL(
246 			    &ipif->ipif_v6lcl_addr, addr) &&
247 			    (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) ||
248 			    (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) &&
249 			    IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr,
250 			    addr))) {
251 				if (IPIF_CAN_LOOKUP(ipif)) {
252 					ipif_refhold_locked(ipif);
253 					mutex_exit(&ill->ill_lock);
254 					RELEASE_CONN_LOCK(q);
255 					rw_exit(&ipst->ips_ill_g_lock);
256 					return (ipif);
257 				} else if (IPIF_CAN_WAIT(ipif, q)) {
258 					ipsq = ill->ill_phyint->phyint_ipsq;
259 					mutex_enter(&ipsq->ipsq_lock);
260 					mutex_exit(&ill->ill_lock);
261 					rw_exit(&ipst->ips_ill_g_lock);
262 					ipsq_enq(ipsq, q, mp, func, NEW_OP,
263 					    ill);
264 					mutex_exit(&ipsq->ipsq_lock);
265 					RELEASE_CONN_LOCK(q);
266 					if (error != NULL)
267 						*error = EINPROGRESS;
268 					return (NULL);
269 				}
270 			}
271 		}
272 		mutex_exit(&ill->ill_lock);
273 		RELEASE_CONN_LOCK(q);
274 	}
275 
276 	/* If we already did the ptp case, then we are done */
277 	if (ptp) {
278 		rw_exit(&ipst->ips_ill_g_lock);
279 		if (error != NULL)
280 			*error = ENXIO;
281 		return (NULL);
282 	}
283 	ptp = B_TRUE;
284 	goto repeat;
285 }
286 
287 boolean_t
288 ip_addr_exists_v6(const in6_addr_t *addr, zoneid_t zoneid,
289     ip_stack_t *ipst)
290 {
291 	ipif_t	*ipif;
292 	ill_t	*ill;
293 	ill_walk_context_t ctx;
294 
295 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
296 
297 	ill = ILL_START_WALK_V6(&ctx, ipst);
298 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
299 		mutex_enter(&ill->ill_lock);
300 		for (ipif = ill->ill_ipif; ipif != NULL;
301 		    ipif = ipif->ipif_next) {
302 			if (zoneid != ALL_ZONES &&
303 			    ipif->ipif_zoneid != zoneid &&
304 			    ipif->ipif_zoneid != ALL_ZONES)
305 				continue;
306 			/* Allow the ipif to be down */
307 			if (((IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
308 			    addr) &&
309 			    (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) ||
310 			    ((ipif->ipif_flags & IPIF_POINTOPOINT) &&
311 			    IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr,
312 			    addr))) {
313 				mutex_exit(&ill->ill_lock);
314 				rw_exit(&ipst->ips_ill_g_lock);
315 				return (B_TRUE);
316 			}
317 		}
318 		mutex_exit(&ill->ill_lock);
319 	}
320 
321 	rw_exit(&ipst->ips_ill_g_lock);
322 	return (B_FALSE);
323 }
324 
325 /*
326  * Look for an ipif with the specified address. For point-point links
327  * we look for matches on either the destination address and the local
328  * address, but we ignore the check on the local address if IPIF_UNNUMBERED
329  * is set.
330  * Matches on a specific ill if match_ill is set.
331  * Return the zoneid for the ipif. ALL_ZONES if none found.
332  */
333 zoneid_t
334 ipif_lookup_addr_zoneid_v6(const in6_addr_t *addr, ill_t *match_ill,
335     ip_stack_t *ipst)
336 {
337 	ipif_t	*ipif;
338 	ill_t	*ill;
339 	boolean_t  ptp = B_FALSE;
340 	ill_walk_context_t ctx;
341 	zoneid_t	zoneid;
342 
343 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
344 	/*
345 	 * Repeat twice, first based on local addresses and
346 	 * next time for pointopoint.
347 	 */
348 repeat:
349 	ill = ILL_START_WALK_V6(&ctx, ipst);
350 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
351 		if (match_ill != NULL && ill != match_ill) {
352 			continue;
353 		}
354 		mutex_enter(&ill->ill_lock);
355 		for (ipif = ill->ill_ipif; ipif != NULL;
356 		    ipif = ipif->ipif_next) {
357 			/* Allow the ipif to be down */
358 			if ((!ptp && (IN6_ARE_ADDR_EQUAL(
359 			    &ipif->ipif_v6lcl_addr, addr) &&
360 			    (ipif->ipif_flags & IPIF_UNNUMBERED) == 0)) ||
361 			    (ptp && (ipif->ipif_flags & IPIF_POINTOPOINT) &&
362 			    IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6pp_dst_addr,
363 			    addr)) &&
364 			    !(ipif->ipif_state_flags & IPIF_CONDEMNED)) {
365 				zoneid = ipif->ipif_zoneid;
366 				mutex_exit(&ill->ill_lock);
367 				rw_exit(&ipst->ips_ill_g_lock);
368 				/*
369 				 * If ipif_zoneid was ALL_ZONES then we have
370 				 * a trusted extensions shared IP address.
371 				 * In that case GLOBAL_ZONEID works to send.
372 				 */
373 				if (zoneid == ALL_ZONES)
374 					zoneid = GLOBAL_ZONEID;
375 				return (zoneid);
376 			}
377 		}
378 		mutex_exit(&ill->ill_lock);
379 	}
380 
381 	/* If we already did the ptp case, then we are done */
382 	if (ptp) {
383 		rw_exit(&ipst->ips_ill_g_lock);
384 		return (ALL_ZONES);
385 	}
386 	ptp = B_TRUE;
387 	goto repeat;
388 }
389 
390 /*
391  * Perform various checks to verify that an address would make sense as a local
392  * interface address.  This is currently only called when an attempt is made
393  * to set a local address.
394  *
395  * Does not allow a v4-mapped address, an address that equals the subnet
396  * anycast address, ... a multicast address, ...
397  */
398 boolean_t
399 ip_local_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask)
400 {
401 	in6_addr_t subnet;
402 
403 	if (IN6_IS_ADDR_UNSPECIFIED(addr))
404 		return (B_TRUE);	/* Allow all zeros */
405 
406 	/*
407 	 * Don't allow all zeroes or host part, but allow
408 	 * all ones netmask.
409 	 */
410 	V6_MASK_COPY(*addr, *subnet_mask, subnet);
411 	if (IN6_IS_ADDR_V4MAPPED(addr) ||
412 	    (IN6_ARE_ADDR_EQUAL(addr, &subnet) &&
413 	    !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) ||
414 	    (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))) ||
415 	    IN6_IS_ADDR_MULTICAST(addr))
416 		return (B_FALSE);
417 
418 	return (B_TRUE);
419 }
420 
421 /*
422  * Perform various checks to verify that an address would make sense as a
423  * remote/subnet interface address.
424  */
425 boolean_t
426 ip_remote_addr_ok_v6(const in6_addr_t *addr, const in6_addr_t *subnet_mask)
427 {
428 	in6_addr_t subnet;
429 
430 	if (IN6_IS_ADDR_UNSPECIFIED(addr))
431 		return (B_TRUE);	/* Allow all zeros */
432 
433 	V6_MASK_COPY(*addr, *subnet_mask, subnet);
434 	if (IN6_IS_ADDR_V4MAPPED(addr) ||
435 	    (IN6_ARE_ADDR_EQUAL(addr, &subnet) &&
436 	    !IN6_ARE_ADDR_EQUAL(subnet_mask, &ipv6_all_ones)) ||
437 	    IN6_IS_ADDR_MULTICAST(addr) ||
438 	    (IN6_IS_ADDR_V4COMPAT(addr) && CLASSD(V4_PART_OF_V6((*addr)))))
439 		return (B_FALSE);
440 
441 	return (B_TRUE);
442 }
443 
444 /*
445  * ip_rt_add_v6 is called to add an IPv6 route to the forwarding table.
446  * ipif_arg is passed in to associate it with the correct interface
447  * (for link-local destinations and gateways).
448  */
449 /* ARGSUSED1 */
450 int
451 ip_rt_add_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask,
452     const in6_addr_t *gw_addr, const in6_addr_t *src_addr, int flags,
453     ipif_t *ipif_arg, ire_t **ire_arg, queue_t *q, mblk_t *mp, ipsq_func_t func,
454     struct rtsa_s *sp, ip_stack_t *ipst)
455 {
456 	ire_t	*ire;
457 	ire_t	*gw_ire = NULL;
458 	ipif_t	*ipif;
459 	boolean_t ipif_refheld = B_FALSE;
460 	uint_t	type;
461 	int	match_flags = MATCH_IRE_TYPE;
462 	int	error;
463 	tsol_gc_t *gc = NULL;
464 	tsol_gcgrp_t *gcgrp = NULL;
465 	boolean_t gcgrp_xtraref = B_FALSE;
466 
467 	if (ire_arg != NULL)
468 		*ire_arg = NULL;
469 
470 	/*
471 	 * Prevent routes with a zero gateway from being created (since
472 	 * interfaces can currently be plumbed and brought up with no assigned
473 	 * address).
474 	 */
475 	if (IN6_IS_ADDR_UNSPECIFIED(gw_addr))
476 		return (ENETUNREACH);
477 
478 	/*
479 	 * If this is the case of RTF_HOST being set, then we set the netmask
480 	 * to all ones (regardless if one was supplied).
481 	 */
482 	if (flags & RTF_HOST)
483 		mask = &ipv6_all_ones;
484 
485 	/*
486 	 * Get the ipif, if any, corresponding to the gw_addr
487 	 */
488 	ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func,
489 	    &error, ipst);
490 	if (ipif != NULL)
491 		ipif_refheld = B_TRUE;
492 	else if (error == EINPROGRESS) {
493 		ip1dbg(("ip_rt_add_v6: null and EINPROGRESS"));
494 		return (error);
495 	}
496 
497 	/*
498 	 * GateD will attempt to create routes with a loopback interface
499 	 * address as the gateway and with RTF_GATEWAY set.  We allow
500 	 * these routes to be added, but create them as interface routes
501 	 * since the gateway is an interface address.
502 	 */
503 	if ((ipif != NULL) && (ipif->ipif_ire_type == IRE_LOOPBACK)) {
504 		flags &= ~RTF_GATEWAY;
505 		if (IN6_ARE_ADDR_EQUAL(gw_addr, &ipv6_loopback) &&
506 		    IN6_ARE_ADDR_EQUAL(dst_addr, &ipv6_loopback) &&
507 		    IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones)) {
508 			ire = ire_ctable_lookup_v6(dst_addr, 0, IRE_LOOPBACK,
509 			    ipif, ALL_ZONES, NULL, match_flags, ipst);
510 			if (ire != NULL) {
511 				ire_refrele(ire);
512 				if (ipif_refheld)
513 					ipif_refrele(ipif);
514 				return (EEXIST);
515 			}
516 			ip1dbg(("ipif_up_done: 0x%p creating IRE 0x%x"
517 			    "for 0x%x\n", (void *)ipif,
518 			    ipif->ipif_ire_type,
519 			    ntohl(ipif->ipif_lcl_addr)));
520 			ire = ire_create_v6(
521 			    dst_addr,
522 			    mask,
523 			    &ipif->ipif_v6src_addr,
524 			    NULL,
525 			    &ipif->ipif_mtu,
526 			    NULL,
527 			    NULL,
528 			    NULL,
529 			    ipif->ipif_net_type,
530 			    ipif,
531 			    NULL,
532 			    0,
533 			    0,
534 			    flags,
535 			    &ire_uinfo_null,
536 			    NULL,
537 			    NULL,
538 			    ipst);
539 			if (ire == NULL) {
540 				if (ipif_refheld)
541 					ipif_refrele(ipif);
542 				return (ENOMEM);
543 			}
544 			error = ire_add(&ire, q, mp, func, B_FALSE);
545 			if (error == 0)
546 				goto save_ire;
547 			/*
548 			 * In the result of failure, ire_add() will have already
549 			 * deleted the ire in question, so there is no need to
550 			 * do that here.
551 			 */
552 			if (ipif_refheld)
553 				ipif_refrele(ipif);
554 			return (error);
555 		}
556 	}
557 
558 	/*
559 	 * Traditionally, interface routes are ones where RTF_GATEWAY isn't set
560 	 * and the gateway address provided is one of the system's interface
561 	 * addresses.  By using the routing socket interface and supplying an
562 	 * RTA_IFP sockaddr with an interface index, an alternate method of
563 	 * specifying an interface route to be created is available which uses
564 	 * the interface index that specifies the outgoing interface rather than
565 	 * the address of an outgoing interface (which may not be able to
566 	 * uniquely identify an interface).  When coupled with the RTF_GATEWAY
567 	 * flag, routes can be specified which not only specify the next-hop to
568 	 * be used when routing to a certain prefix, but also which outgoing
569 	 * interface should be used.
570 	 *
571 	 * Previously, interfaces would have unique addresses assigned to them
572 	 * and so the address assigned to a particular interface could be used
573 	 * to identify a particular interface.  One exception to this was the
574 	 * case of an unnumbered interface (where IPIF_UNNUMBERED was set).
575 	 *
576 	 * With the advent of IPv6 and its link-local addresses, this
577 	 * restriction was relaxed and interfaces could share addresses between
578 	 * themselves.  In fact, typically all of the link-local interfaces on
579 	 * an IPv6 node or router will have the same link-local address.  In
580 	 * order to differentiate between these interfaces, the use of an
581 	 * interface index is necessary and this index can be carried inside a
582 	 * RTA_IFP sockaddr (which is actually a sockaddr_dl).  One restriction
583 	 * of using the interface index, however, is that all of the ipif's that
584 	 * are part of an ill have the same index and so the RTA_IFP sockaddr
585 	 * cannot be used to differentiate between ipif's (or logical
586 	 * interfaces) that belong to the same ill (physical interface).
587 	 *
588 	 * For example, in the following case involving IPv4 interfaces and
589 	 * logical interfaces
590 	 *
591 	 *	192.0.2.32	255.255.255.224	192.0.2.33	U	if0
592 	 *	192.0.2.32	255.255.255.224	192.0.2.34	U	if0:1
593 	 *	192.0.2.32	255.255.255.224	192.0.2.35	U	if0:2
594 	 *
595 	 * the ipif's corresponding to each of these interface routes can be
596 	 * uniquely identified by the "gateway" (actually interface address).
597 	 *
598 	 * In this case involving multiple IPv6 default routes to a particular
599 	 * link-local gateway, the use of RTA_IFP is necessary to specify which
600 	 * default route is of interest:
601 	 *
602 	 *	default		fe80::123:4567:89ab:cdef	U	if0
603 	 *	default		fe80::123:4567:89ab:cdef	U	if1
604 	 */
605 
606 	/* RTF_GATEWAY not set */
607 	if (!(flags & RTF_GATEWAY)) {
608 		queue_t	*stq;
609 
610 		if (sp != NULL) {
611 			ip2dbg(("ip_rt_add_v6: gateway security attributes "
612 			    "cannot be set with interface route\n"));
613 			if (ipif_refheld)
614 				ipif_refrele(ipif);
615 			return (EINVAL);
616 		}
617 
618 		/*
619 		 * As the interface index specified with the RTA_IFP sockaddr is
620 		 * the same for all ipif's off of an ill, the matching logic
621 		 * below uses MATCH_IRE_ILL if such an index was specified.
622 		 * This means that routes sharing the same prefix when added
623 		 * using a RTA_IFP sockaddr must have distinct interface
624 		 * indices (namely, they must be on distinct ill's).
625 		 *
626 		 * On the other hand, since the gateway address will usually be
627 		 * different for each ipif on the system, the matching logic
628 		 * uses MATCH_IRE_IPIF in the case of a traditional interface
629 		 * route.  This means that interface routes for the same prefix
630 		 * can be created if they belong to distinct ipif's and if a
631 		 * RTA_IFP sockaddr is not present.
632 		 */
633 		if (ipif_arg != NULL) {
634 			if (ipif_refheld) {
635 				ipif_refrele(ipif);
636 				ipif_refheld = B_FALSE;
637 			}
638 			ipif = ipif_arg;
639 			match_flags |= MATCH_IRE_ILL;
640 		} else {
641 			/*
642 			 * Check the ipif corresponding to the gw_addr
643 			 */
644 			if (ipif == NULL)
645 				return (ENETUNREACH);
646 			match_flags |= MATCH_IRE_IPIF;
647 		}
648 
649 		ASSERT(ipif != NULL);
650 		/*
651 		 * We check for an existing entry at this point.
652 		 */
653 		match_flags |= MATCH_IRE_MASK;
654 		ire = ire_ftable_lookup_v6(dst_addr, mask, 0, IRE_INTERFACE,
655 		    ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst);
656 		if (ire != NULL) {
657 			ire_refrele(ire);
658 			if (ipif_refheld)
659 				ipif_refrele(ipif);
660 			return (EEXIST);
661 		}
662 
663 		stq = (ipif->ipif_net_type == IRE_IF_RESOLVER)
664 		    ? ipif->ipif_rq : ipif->ipif_wq;
665 
666 		/*
667 		 * Create a copy of the IRE_LOOPBACK, IRE_IF_NORESOLVER or
668 		 * IRE_IF_RESOLVER with the modified address and netmask.
669 		 */
670 		ire = ire_create_v6(
671 		    dst_addr,
672 		    mask,
673 		    &ipif->ipif_v6src_addr,
674 		    NULL,
675 		    &ipif->ipif_mtu,
676 		    NULL,
677 		    NULL,
678 		    stq,
679 		    ipif->ipif_net_type,
680 		    ipif,
681 		    NULL,
682 		    0,
683 		    0,
684 		    flags,
685 		    &ire_uinfo_null,
686 		    NULL,
687 		    NULL,
688 		    ipst);
689 		if (ire == NULL) {
690 			if (ipif_refheld)
691 				ipif_refrele(ipif);
692 			return (ENOMEM);
693 		}
694 
695 		/*
696 		 * Some software (for example, GateD and Sun Cluster) attempts
697 		 * to create (what amount to) IRE_PREFIX routes with the
698 		 * loopback address as the gateway.  This is primarily done to
699 		 * set up prefixes with the RTF_REJECT flag set (for example,
700 		 * when generating aggregate routes). We also OR in the
701 		 * RTF_BLACKHOLE flag as these interface routes, by
702 		 * definition, can only be that.
703 		 *
704 		 * If the IRE type (as defined by ipif->ipif_net_type) is
705 		 * IRE_LOOPBACK, then we map the request into a
706 		 * IRE_IF_NORESOLVER.
707 		 *
708 		 * Needless to say, the real IRE_LOOPBACK is NOT created by this
709 		 * routine, but rather using ire_create_v6() directly.
710 		 */
711 		if (ipif->ipif_net_type == IRE_LOOPBACK) {
712 			ire->ire_type = IRE_IF_NORESOLVER;
713 			ire->ire_flags |= RTF_BLACKHOLE;
714 		}
715 		error = ire_add(&ire, q, mp, func, B_FALSE);
716 		if (error == 0)
717 			goto save_ire;
718 		/*
719 		 * In the result of failure, ire_add() will have already
720 		 * deleted the ire in question, so there is no need to
721 		 * do that here.
722 		 */
723 		if (ipif_refheld)
724 			ipif_refrele(ipif);
725 		return (error);
726 	}
727 	if (ipif_refheld) {
728 		ipif_refrele(ipif);
729 		ipif_refheld = B_FALSE;
730 	}
731 
732 	/*
733 	 * Get an interface IRE for the specified gateway.
734 	 * If we don't have an IRE_IF_NORESOLVER or IRE_IF_RESOLVER for the
735 	 * gateway, it is currently unreachable and we fail the request
736 	 * accordingly.
737 	 */
738 	ipif = ipif_arg;
739 	if (ipif_arg != NULL)
740 		match_flags |= MATCH_IRE_ILL;
741 	gw_ire = ire_ftable_lookup_v6(gw_addr, 0, 0, IRE_INTERFACE, ipif_arg,
742 	    NULL, ALL_ZONES, 0, NULL, match_flags, ipst);
743 	if (gw_ire == NULL)
744 		return (ENETUNREACH);
745 
746 	/*
747 	 * We create one of three types of IREs as a result of this request
748 	 * based on the netmask.  A netmask of all ones (which is automatically
749 	 * assumed when RTF_HOST is set) results in an IRE_HOST being created.
750 	 * An all zeroes netmask implies a default route so an IRE_DEFAULT is
751 	 * created.  Otherwise, an IRE_PREFIX route is created for the
752 	 * destination prefix.
753 	 */
754 	if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones))
755 		type = IRE_HOST;
756 	else if (IN6_IS_ADDR_UNSPECIFIED(mask))
757 		type = IRE_DEFAULT;
758 	else
759 		type = IRE_PREFIX;
760 
761 	/* check for a duplicate entry */
762 	ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type, ipif_arg,
763 	    NULL, ALL_ZONES, 0, NULL,
764 	    match_flags | MATCH_IRE_MASK | MATCH_IRE_GW, ipst);
765 	if (ire != NULL) {
766 		ire_refrele(gw_ire);
767 		ire_refrele(ire);
768 		return (EEXIST);
769 	}
770 
771 	/* Security attribute exists */
772 	if (sp != NULL) {
773 		tsol_gcgrp_addr_t ga;
774 
775 		/* find or create the gateway credentials group */
776 		ga.ga_af = AF_INET6;
777 		ga.ga_addr = *gw_addr;
778 
779 		/* we hold reference to it upon success */
780 		gcgrp = gcgrp_lookup(&ga, B_TRUE);
781 		if (gcgrp == NULL) {
782 			ire_refrele(gw_ire);
783 			return (ENOMEM);
784 		}
785 
786 		/*
787 		 * Create and add the security attribute to the group; a
788 		 * reference to the group is made upon allocating a new
789 		 * entry successfully.  If it finds an already-existing
790 		 * entry for the security attribute in the group, it simply
791 		 * returns it and no new reference is made to the group.
792 		 */
793 		gc = gc_create(sp, gcgrp, &gcgrp_xtraref);
794 		if (gc == NULL) {
795 			/* release reference held by gcgrp_lookup */
796 			GCGRP_REFRELE(gcgrp);
797 			ire_refrele(gw_ire);
798 			return (ENOMEM);
799 		}
800 	}
801 
802 	/* Create the IRE. */
803 	ire = ire_create_v6(
804 	    dst_addr,				/* dest address */
805 	    mask,				/* mask */
806 	    /* src address assigned by the caller? */
807 	    (((flags & RTF_SETSRC) && !IN6_IS_ADDR_UNSPECIFIED(src_addr)) ?
808 	    src_addr : NULL),
809 	    gw_addr,				/* gateway address */
810 	    &gw_ire->ire_max_frag,
811 	    NULL,				/* no src nce */
812 	    NULL,				/* no recv-from queue */
813 	    NULL,				/* no send-to queue */
814 	    (ushort_t)type,			/* IRE type */
815 	    ipif_arg,
816 	    NULL,
817 	    0,
818 	    0,
819 	    flags,
820 	    &gw_ire->ire_uinfo,			/* Inherit ULP info from gw */
821 	    gc,					/* security attribute */
822 	    NULL,
823 	    ipst);
824 
825 	/*
826 	 * The ire holds a reference to the 'gc' and the 'gc' holds a
827 	 * reference to the 'gcgrp'. We can now release the extra reference
828 	 * the 'gcgrp' acquired in the gcgrp_lookup, if it was not used.
829 	 */
830 	if (gcgrp_xtraref)
831 		GCGRP_REFRELE(gcgrp);
832 	if (ire == NULL) {
833 		if (gc != NULL)
834 			GC_REFRELE(gc);
835 		ire_refrele(gw_ire);
836 		return (ENOMEM);
837 	}
838 
839 	/*
840 	 * POLICY: should we allow an RTF_HOST with address INADDR_ANY?
841 	 * SUN/OS socket stuff does but do we really want to allow ::0 ?
842 	 */
843 
844 	/* Add the new IRE. */
845 	error = ire_add(&ire, q, mp, func, B_FALSE);
846 	/*
847 	 * In the result of failure, ire_add() will have already
848 	 * deleted the ire in question, so there is no need to
849 	 * do that here.
850 	 */
851 	if (error != 0) {
852 		ire_refrele(gw_ire);
853 		return (error);
854 	}
855 
856 	if (flags & RTF_MULTIRT) {
857 		/*
858 		 * Invoke the CGTP (multirouting) filtering module
859 		 * to add the dst address in the filtering database.
860 		 * Replicated inbound packets coming from that address
861 		 * will be filtered to discard the duplicates.
862 		 * It is not necessary to call the CGTP filter hook
863 		 * when the dst address is a multicast, because an
864 		 * IP source address cannot be a multicast.
865 		 */
866 		if (ipst->ips_ip_cgtp_filter_ops != NULL &&
867 		    !IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))) {
868 			int res;
869 
870 			res = ipst->ips_ip_cgtp_filter_ops->cfo_add_dest_v6(
871 			    ipst->ips_netstack->netstack_stackid,
872 			    &ire->ire_addr_v6,
873 			    &ire->ire_gateway_addr_v6,
874 			    &ire->ire_src_addr_v6,
875 			    &gw_ire->ire_src_addr_v6);
876 			if (res != 0) {
877 				ire_refrele(gw_ire);
878 				ire_delete(ire);
879 				return (res);
880 			}
881 		}
882 	}
883 
884 	/*
885 	 * Now that the prefix IRE entry has been created, delete any
886 	 * existing gateway IRE cache entries as well as any IRE caches
887 	 * using the gateway, and force them to be created through
888 	 * ip_newroute_v6.
889 	 */
890 	if (gc != NULL) {
891 		ASSERT(gcgrp != NULL);
892 		ire_clookup_delete_cache_gw_v6(gw_addr, ALL_ZONES, ipst);
893 	}
894 
895 save_ire:
896 	if (gw_ire != NULL) {
897 		ire_refrele(gw_ire);
898 	}
899 	if (ipif != NULL) {
900 		mblk_t	*save_mp;
901 
902 		/*
903 		 * Save enough information so that we can recreate the IRE if
904 		 * the interface goes down and then up.  The metrics associated
905 		 * with the route will be saved as well when rts_setmetrics() is
906 		 * called after the IRE has been created.  In the case where
907 		 * memory cannot be allocated, none of this information will be
908 		 * saved.
909 		 */
910 		save_mp = allocb(sizeof (ifrt_t), BPRI_MED);
911 		if (save_mp != NULL) {
912 			ifrt_t	*ifrt;
913 
914 			save_mp->b_wptr += sizeof (ifrt_t);
915 			ifrt = (ifrt_t *)save_mp->b_rptr;
916 			bzero(ifrt, sizeof (ifrt_t));
917 			ifrt->ifrt_type = ire->ire_type;
918 			ifrt->ifrt_v6addr = ire->ire_addr_v6;
919 			mutex_enter(&ire->ire_lock);
920 			ifrt->ifrt_v6gateway_addr = ire->ire_gateway_addr_v6;
921 			ifrt->ifrt_v6src_addr = ire->ire_src_addr_v6;
922 			mutex_exit(&ire->ire_lock);
923 			ifrt->ifrt_v6mask = ire->ire_mask_v6;
924 			ifrt->ifrt_flags = ire->ire_flags;
925 			ifrt->ifrt_max_frag = ire->ire_max_frag;
926 			mutex_enter(&ipif->ipif_saved_ire_lock);
927 			save_mp->b_cont = ipif->ipif_saved_ire_mp;
928 			ipif->ipif_saved_ire_mp = save_mp;
929 			ipif->ipif_saved_ire_cnt++;
930 			mutex_exit(&ipif->ipif_saved_ire_lock);
931 		}
932 	}
933 	if (ire_arg != NULL) {
934 		/*
935 		 * Store the ire that was successfully added into where ire_arg
936 		 * points to so that callers don't have to look it up
937 		 * themselves (but they are responsible for ire_refrele()ing
938 		 * the ire when they are finished with it).
939 		 */
940 		*ire_arg = ire;
941 	} else {
942 		ire_refrele(ire);		/* Held in ire_add */
943 	}
944 	if (ipif_refheld)
945 		ipif_refrele(ipif);
946 	return (0);
947 }
948 
949 /*
950  * ip_rt_delete_v6 is called to delete an IPv6 route.
951  * ipif_arg is passed in to associate it with the correct interface
952  * (for link-local destinations and gateways).
953  */
954 /* ARGSUSED4 */
955 int
956 ip_rt_delete_v6(const in6_addr_t *dst_addr, const in6_addr_t *mask,
957     const in6_addr_t *gw_addr, uint_t rtm_addrs, int flags, ipif_t *ipif_arg,
958     queue_t *q, mblk_t *mp, ipsq_func_t func, ip_stack_t *ipst)
959 {
960 	ire_t	*ire = NULL;
961 	ipif_t	*ipif;
962 	uint_t	type;
963 	uint_t	match_flags = MATCH_IRE_TYPE;
964 	int	err = 0;
965 	boolean_t	ipif_refheld = B_FALSE;
966 
967 	/*
968 	 * If this is the case of RTF_HOST being set, then we set the netmask
969 	 * to all ones.  Otherwise, we use the netmask if one was supplied.
970 	 */
971 	if (flags & RTF_HOST) {
972 		mask = &ipv6_all_ones;
973 		match_flags |= MATCH_IRE_MASK;
974 	} else if (rtm_addrs & RTA_NETMASK) {
975 		match_flags |= MATCH_IRE_MASK;
976 	}
977 
978 	/*
979 	 * Note that RTF_GATEWAY is never set on a delete, therefore
980 	 * we check if the gateway address is one of our interfaces first,
981 	 * and fall back on RTF_GATEWAY routes.
982 	 *
983 	 * This makes it possible to delete an original
984 	 * IRE_IF_NORESOLVER/IRE_IF_RESOLVER - consistent with SunOS 4.1.
985 	 *
986 	 * As the interface index specified with the RTA_IFP sockaddr is the
987 	 * same for all ipif's off of an ill, the matching logic below uses
988 	 * MATCH_IRE_ILL if such an index was specified.  This means a route
989 	 * sharing the same prefix and interface index as the the route
990 	 * intended to be deleted might be deleted instead if a RTA_IFP sockaddr
991 	 * is specified in the request.
992 	 *
993 	 * On the other hand, since the gateway address will usually be
994 	 * different for each ipif on the system, the matching logic
995 	 * uses MATCH_IRE_IPIF in the case of a traditional interface
996 	 * route.  This means that interface routes for the same prefix can be
997 	 * uniquely identified if they belong to distinct ipif's and if a
998 	 * RTA_IFP sockaddr is not present.
999 	 *
1000 	 * For more detail on specifying routes by gateway address and by
1001 	 * interface index, see the comments in ip_rt_add_v6().
1002 	 */
1003 	ipif = ipif_lookup_interface_v6(gw_addr, dst_addr, q, mp, func, &err,
1004 	    ipst);
1005 	if (ipif != NULL) {
1006 		ipif_refheld = B_TRUE;
1007 		if (ipif_arg != NULL) {
1008 			ipif_refrele(ipif);
1009 			ipif_refheld = B_FALSE;
1010 			ipif = ipif_arg;
1011 			match_flags |= MATCH_IRE_ILL;
1012 		} else {
1013 			match_flags |= MATCH_IRE_IPIF;
1014 		}
1015 
1016 		if (ipif->ipif_ire_type == IRE_LOOPBACK)
1017 			ire = ire_ctable_lookup_v6(dst_addr, 0, IRE_LOOPBACK,
1018 			    ipif, ALL_ZONES, NULL, match_flags, ipst);
1019 		if (ire == NULL)
1020 			ire = ire_ftable_lookup_v6(dst_addr, mask, 0,
1021 			    IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0, NULL,
1022 			    match_flags, ipst);
1023 	} else if (err == EINPROGRESS) {
1024 		return (err);
1025 	} else {
1026 		err = 0;
1027 	}
1028 	if (ire == NULL) {
1029 		/*
1030 		 * At this point, the gateway address is not one of our own
1031 		 * addresses or a matching interface route was not found.  We
1032 		 * set the IRE type to lookup based on whether
1033 		 * this is a host route, a default route or just a prefix.
1034 		 *
1035 		 * If an ipif_arg was passed in, then the lookup is based on an
1036 		 * interface index so MATCH_IRE_ILL is added to match_flags.
1037 		 * In any case, MATCH_IRE_IPIF is cleared and MATCH_IRE_GW is
1038 		 * set as the route being looked up is not a traditional
1039 		 * interface route.
1040 		 */
1041 		match_flags &= ~MATCH_IRE_IPIF;
1042 		match_flags |= MATCH_IRE_GW;
1043 		if (ipif_arg != NULL)
1044 			match_flags |= MATCH_IRE_ILL;
1045 		if (IN6_ARE_ADDR_EQUAL(mask, &ipv6_all_ones))
1046 			type = IRE_HOST;
1047 		else if (IN6_IS_ADDR_UNSPECIFIED(mask))
1048 			type = IRE_DEFAULT;
1049 		else
1050 			type = IRE_PREFIX;
1051 		ire = ire_ftable_lookup_v6(dst_addr, mask, gw_addr, type,
1052 		    ipif_arg, NULL, ALL_ZONES, 0, NULL, match_flags, ipst);
1053 	}
1054 
1055 	if (ipif_refheld) {
1056 		ipif_refrele(ipif);
1057 		ipif_refheld = B_FALSE;
1058 	}
1059 	if (ire == NULL)
1060 		return (ESRCH);
1061 
1062 	if (ire->ire_flags & RTF_MULTIRT) {
1063 		/*
1064 		 * Invoke the CGTP (multirouting) filtering module
1065 		 * to remove the dst address from the filtering database.
1066 		 * Packets coming from that address will no longer be
1067 		 * filtered to remove duplicates.
1068 		 */
1069 		if (ipst->ips_ip_cgtp_filter_ops != NULL) {
1070 			err = ipst->ips_ip_cgtp_filter_ops->cfo_del_dest_v6(
1071 			    ipst->ips_netstack->netstack_stackid,
1072 			    &ire->ire_addr_v6, &ire->ire_gateway_addr_v6);
1073 		}
1074 	}
1075 
1076 	ipif = ire->ire_ipif;
1077 	if (ipif != NULL) {
1078 		mblk_t		**mpp;
1079 		mblk_t		*mp;
1080 		ifrt_t		*ifrt;
1081 		in6_addr_t	gw_addr_v6;
1082 
1083 		/* Remove from ipif_saved_ire_mp list if it is there */
1084 		mutex_enter(&ire->ire_lock);
1085 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1086 		mutex_exit(&ire->ire_lock);
1087 		mutex_enter(&ipif->ipif_saved_ire_lock);
1088 		for (mpp = &ipif->ipif_saved_ire_mp; *mpp != NULL;
1089 		    mpp = &(*mpp)->b_cont) {
1090 			/*
1091 			 * On a given ipif, the triple of address, gateway and
1092 			 * mask is unique for each saved IRE (in the case of
1093 			 * ordinary interface routes, the gateway address is
1094 			 * all-zeroes).
1095 			 */
1096 			mp = *mpp;
1097 			ifrt = (ifrt_t *)mp->b_rptr;
1098 			if (IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6addr,
1099 			    &ire->ire_addr_v6) &&
1100 			    IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6gateway_addr,
1101 			    &gw_addr_v6) &&
1102 			    IN6_ARE_ADDR_EQUAL(&ifrt->ifrt_v6mask,
1103 			    &ire->ire_mask_v6)) {
1104 				*mpp = mp->b_cont;
1105 				ipif->ipif_saved_ire_cnt--;
1106 				freeb(mp);
1107 				break;
1108 			}
1109 		}
1110 		mutex_exit(&ipif->ipif_saved_ire_lock);
1111 	}
1112 	ire_delete(ire);
1113 	ire_refrele(ire);
1114 	return (err);
1115 }
1116 
1117 /*
1118  * Derive a token from the link layer address.
1119  */
1120 boolean_t
1121 ill_setdefaulttoken(ill_t *ill)
1122 {
1123 	int 		i;
1124 	in6_addr_t	v6addr, v6mask;
1125 
1126 	if (!MEDIA_V6INTFID(ill->ill_media, ill->ill_phys_addr_length,
1127 	    ill->ill_phys_addr, &v6addr))
1128 		return (B_FALSE);
1129 
1130 	(void) ip_plen_to_mask_v6(IPV6_TOKEN_LEN, &v6mask);
1131 
1132 	for (i = 0; i < 4; i++)
1133 		v6mask.s6_addr32[i] = v6mask.s6_addr32[i] ^
1134 		    (uint32_t)0xffffffff;
1135 
1136 	V6_MASK_COPY(v6addr, v6mask, ill->ill_token);
1137 	ill->ill_token_length = IPV6_TOKEN_LEN;
1138 	return (B_TRUE);
1139 }
1140 
1141 /*
1142  * Create a link-local address from a token.
1143  */
1144 static void
1145 ipif_get_linklocal(in6_addr_t *dest, const in6_addr_t *token)
1146 {
1147 	int i;
1148 
1149 	for (i = 0; i < 4; i++) {
1150 		dest->s6_addr32[i] =
1151 		    token->s6_addr32[i] | ipv6_ll_template.s6_addr32[i];
1152 	}
1153 }
1154 
1155 /*
1156  * Set a nice default address for either automatic tunnels tsrc/96 or
1157  * 6to4 tunnels 2002:<tsrc>::1/64
1158  */
1159 static void
1160 ipif_set_tun_auto_addr(ipif_t *ipif, struct iftun_req *ta)
1161 {
1162 	sin6_t	sin6;
1163 	sin_t	*sin;
1164 	ill_t 	*ill = ipif->ipif_ill;
1165 	tun_t *tp = (tun_t *)ill->ill_wq->q_next->q_ptr;
1166 
1167 	if (ta->ifta_saddr.ss_family != AF_INET ||
1168 	    (ipif->ipif_flags & IPIF_UP) || !ipif->ipif_isv6 ||
1169 	    (ta->ifta_flags & IFTUN_SRC) == 0)
1170 		return;
1171 
1172 	/*
1173 	 * Check the tunnel type by examining q_next->q_ptr
1174 	 */
1175 	if (tp->tun_flags & TUN_AUTOMATIC) {
1176 		/* this is an automatic tunnel */
1177 		(void) ip_plen_to_mask_v6(IPV6_ABITS - IP_ABITS,
1178 		    &ipif->ipif_v6net_mask);
1179 		bzero(&sin6, sizeof (sin6_t));
1180 		sin = (sin_t *)&ta->ifta_saddr;
1181 		V4_PART_OF_V6(sin6.sin6_addr) = sin->sin_addr.s_addr;
1182 		sin6.sin6_family = AF_INET6;
1183 		(void) ip_sioctl_addr(ipif, (sin_t *)&sin6,
1184 		    NULL, NULL, NULL, NULL);
1185 	} else if (tp->tun_flags & TUN_6TO4) {
1186 		/* this is a 6to4 tunnel */
1187 		(void) ip_plen_to_mask_v6(IPV6_PREFIX_LEN,
1188 		    &ipif->ipif_v6net_mask);
1189 		sin = (sin_t *)&ta->ifta_saddr;
1190 		/* create a 6to4 address from the IPv4 tsrc */
1191 		IN6_V4ADDR_TO_6TO4(&sin->sin_addr, &sin6.sin6_addr);
1192 		sin6.sin6_family = AF_INET6;
1193 		(void) ip_sioctl_addr(ipif, (sin_t *)&sin6,
1194 		    NULL, NULL, NULL, NULL);
1195 	} else {
1196 		ip1dbg(("ipif_set_tun_auto_addr: Unknown tunnel type"));
1197 		return;
1198 	}
1199 }
1200 
1201 /*
1202  * Set link local for ipif_id 0 of a configured tunnel based on the
1203  * tsrc or tdst parameter
1204  * For tunnels over IPv4 use the IPv4 address prepended with 32 zeros as
1205  * the token.
1206  * For tunnels over IPv6 use the low-order 64 bits of the "inner" IPv6 address
1207  * as the token for the "outer" link.
1208  */
1209 void
1210 ipif_set_tun_llink(ill_t *ill, struct iftun_req *ta)
1211 {
1212 	ipif_t		*ipif;
1213 	sin_t		*sin;
1214 	in6_addr_t	*s6addr;
1215 
1216 	ASSERT(IAM_WRITER_ILL(ill));
1217 
1218 	/* The first ipif must be id zero. */
1219 	ipif = ill->ill_ipif;
1220 	ASSERT(ipif->ipif_id == 0);
1221 
1222 	/* no link local for automatic tunnels */
1223 	if (!(ipif->ipif_flags & IPIF_POINTOPOINT)) {
1224 		ipif_set_tun_auto_addr(ipif, ta);
1225 		return;
1226 	}
1227 
1228 	if ((ta->ifta_flags & IFTUN_DST) &&
1229 	    IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6pp_dst_addr)) {
1230 		sin6_t	sin6;
1231 
1232 		ASSERT(!(ipif->ipif_flags & IPIF_UP));
1233 		bzero(&sin6, sizeof (sin6_t));
1234 		if ((ta->ifta_saddr.ss_family == AF_INET)) {
1235 			sin = (sin_t *)&ta->ifta_daddr;
1236 			V4_PART_OF_V6(sin6.sin6_addr) =
1237 			    sin->sin_addr.s_addr;
1238 		} else {
1239 			s6addr =
1240 			    &((sin6_t *)&ta->ifta_daddr)->sin6_addr;
1241 			sin6.sin6_addr.s6_addr32[3] = s6addr->s6_addr32[3];
1242 			sin6.sin6_addr.s6_addr32[2] = s6addr->s6_addr32[2];
1243 		}
1244 		ipif_get_linklocal(&ipif->ipif_v6pp_dst_addr,
1245 		    &sin6.sin6_addr);
1246 		ipif->ipif_v6subnet = ipif->ipif_v6pp_dst_addr;
1247 	}
1248 	if ((ta->ifta_flags & IFTUN_SRC)) {
1249 		ASSERT(!(ipif->ipif_flags & IPIF_UP));
1250 
1251 		/* Set the token if it isn't already set */
1252 		if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token)) {
1253 			if ((ta->ifta_saddr.ss_family == AF_INET)) {
1254 				sin = (sin_t *)&ta->ifta_saddr;
1255 				V4_PART_OF_V6(ill->ill_token) =
1256 				    sin->sin_addr.s_addr;
1257 			} else {
1258 				s6addr =
1259 				    &((sin6_t *)&ta->ifta_saddr)->sin6_addr;
1260 				ill->ill_token.s6_addr32[3] =
1261 				    s6addr->s6_addr32[3];
1262 				ill->ill_token.s6_addr32[2] =
1263 				    s6addr->s6_addr32[2];
1264 			}
1265 			ill->ill_token_length = IPV6_TOKEN_LEN;
1266 		}
1267 		/*
1268 		 * Attempt to set the link local address if it isn't set.
1269 		 */
1270 		if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr))
1271 			(void) ipif_setlinklocal(ipif);
1272 	}
1273 }
1274 
1275 /*
1276  * Is it not possible to set the link local address?
1277  * The address can be set if the token is set, and the token
1278  * isn't too long.
1279  * Return B_TRUE if the address can't be set, or B_FALSE if it can.
1280  */
1281 boolean_t
1282 ipif_cant_setlinklocal(ipif_t *ipif)
1283 {
1284 	ill_t *ill = ipif->ipif_ill;
1285 
1286 	if (IN6_IS_ADDR_UNSPECIFIED(&ill->ill_token) ||
1287 	    ill->ill_token_length > IPV6_ABITS - IPV6_LL_PREFIXLEN)
1288 		return (B_TRUE);
1289 
1290 	return (B_FALSE);
1291 }
1292 
1293 /*
1294  * Generate a link-local address from the token.
1295  * Return zero if the address was set, or non-zero if it couldn't be set.
1296  */
1297 int
1298 ipif_setlinklocal(ipif_t *ipif)
1299 {
1300 	ill_t		*ill = ipif->ipif_ill;
1301 	in6_addr_t	ov6addr;
1302 
1303 	ASSERT(IAM_WRITER_ILL(ill));
1304 
1305 	if (ipif_cant_setlinklocal(ipif))
1306 		return (-1);
1307 
1308 	ov6addr = ipif->ipif_v6lcl_addr;
1309 	ipif_get_linklocal(&ipif->ipif_v6lcl_addr, &ill->ill_token);
1310 	sctp_update_ipif_addr(ipif, ov6addr);
1311 	(void) ip_plen_to_mask_v6(IPV6_LL_PREFIXLEN, &ipif->ipif_v6net_mask);
1312 	V6_MASK_COPY(ipif->ipif_v6lcl_addr, ipif->ipif_v6net_mask,
1313 	    ipif->ipif_v6subnet);
1314 
1315 	if (ipif->ipif_flags & IPIF_NOLOCAL) {
1316 		ipif->ipif_v6src_addr = ipv6_all_zeros;
1317 	} else {
1318 		ipif->ipif_v6src_addr = ipif->ipif_v6lcl_addr;
1319 	}
1320 	return (0);
1321 }
1322 
1323 /*
1324  * This function sets up the multicast mappings in NDP.
1325  * Unlike ARP, there are no mapping_mps here. We delete the
1326  * mapping nces and add a new one.
1327  *
1328  * Returns non-zero on error and 0 on success.
1329  */
1330 int
1331 ipif_ndp_setup_multicast(ipif_t *ipif, nce_t **ret_nce)
1332 {
1333 	ill_t		*ill = ipif->ipif_ill;
1334 	in6_addr_t	v6_mcast_addr = {(uint32_t)V6_MCAST, 0, 0, 0};
1335 	in6_addr_t	v6_mcast_mask = {(uint32_t)V6_MCAST, 0, 0, 0};
1336 	in6_addr_t	v6_extract_mask;
1337 	uchar_t		*phys_addr, *bphys_addr, *alloc_phys;
1338 	nce_t		*mnce = NULL;
1339 	int		err = 0;
1340 	phyint_t	*phyi = ill->ill_phyint;
1341 	uint32_t	hw_extract_start;
1342 	dl_unitdata_req_t *dlur;
1343 	ip_stack_t	*ipst = ill->ill_ipst;
1344 
1345 	if (ret_nce != NULL)
1346 		*ret_nce = NULL;
1347 	/*
1348 	 * Delete the mapping nce. Normally these should not exist
1349 	 * as a previous ipif_down -> ipif_ndp_down should have deleted
1350 	 * all the nces. But they can exist if ip_rput_dlpi_writer
1351 	 * calls this when PHYI_MULTI_BCAST is set.
1352 	 */
1353 	mnce = ndp_lookup_v6(ill, &v6_mcast_addr, B_FALSE);
1354 	if (mnce != NULL) {
1355 		ndp_delete(mnce);
1356 		NCE_REFRELE(mnce);
1357 		mnce = NULL;
1358 	}
1359 
1360 	/*
1361 	 * Get media specific v6 mapping information. Note that
1362 	 * nd_lla_len can be 0 for tunnels.
1363 	 */
1364 	alloc_phys = kmem_alloc(ill->ill_nd_lla_len, KM_NOSLEEP);
1365 	if ((alloc_phys == NULL) && (ill->ill_nd_lla_len != 0))
1366 		return (ENOMEM);
1367 	/*
1368 	 * Determine the broadcast address.
1369 	 */
1370 	dlur = (dl_unitdata_req_t *)ill->ill_bcast_mp->b_rptr;
1371 	if (ill->ill_sap_length < 0)
1372 		bphys_addr = (uchar_t *)dlur + dlur->dl_dest_addr_offset;
1373 	else
1374 		bphys_addr = (uchar_t *)dlur +
1375 		    dlur->dl_dest_addr_offset + ill->ill_sap_length;
1376 
1377 	/*
1378 	 * Check PHYI_MULTI_BCAST and possible length of physical
1379 	 * address to determine if we use the mapping or the
1380 	 * broadcast address.
1381 	 */
1382 	if ((phyi->phyint_flags & PHYI_MULTI_BCAST) ||
1383 	    (!MEDIA_V6MINFO(ill->ill_media, ill->ill_nd_lla_len,
1384 	    bphys_addr, alloc_phys, &hw_extract_start,
1385 	    &v6_extract_mask))) {
1386 		if (ill->ill_phys_addr_length > IP_MAX_HW_LEN) {
1387 			kmem_free(alloc_phys, ill->ill_nd_lla_len);
1388 			return (E2BIG);
1389 		}
1390 		/* Use the link-layer broadcast address for MULTI_BCAST */
1391 		phys_addr = bphys_addr;
1392 		bzero(&v6_extract_mask, sizeof (v6_extract_mask));
1393 		hw_extract_start = ill->ill_nd_lla_len;
1394 	} else {
1395 		phys_addr = alloc_phys;
1396 	}
1397 	if ((ipif->ipif_flags & IPIF_BROADCAST) ||
1398 	    (ill->ill_flags & ILLF_MULTICAST) ||
1399 	    (phyi->phyint_flags & PHYI_MULTI_BCAST)) {
1400 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
1401 		err = ndp_add_v6(ill,
1402 		    phys_addr,
1403 		    &v6_mcast_addr,	/* v6 address */
1404 		    &v6_mcast_mask,	/* v6 mask */
1405 		    &v6_extract_mask,
1406 		    hw_extract_start,
1407 		    NCE_F_MAPPING | NCE_F_PERMANENT | NCE_F_NONUD,
1408 		    ND_REACHABLE,
1409 		    &mnce);
1410 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
1411 		if (err == 0) {
1412 			if (ret_nce != NULL) {
1413 				*ret_nce = mnce;
1414 			} else {
1415 				NCE_REFRELE(mnce);
1416 			}
1417 		}
1418 	}
1419 	kmem_free(alloc_phys, ill->ill_nd_lla_len);
1420 	return (err);
1421 }
1422 
1423 /*
1424  * Get the resolver set up for a new ipif.  (Always called as writer.)
1425  */
1426 int
1427 ipif_ndp_up(ipif_t *ipif)
1428 {
1429 	ill_t		*ill = ipif->ipif_ill;
1430 	int		err = 0;
1431 	nce_t		*nce = NULL;
1432 	nce_t		*mnce = NULL;
1433 
1434 	ip1dbg(("ipif_ndp_up(%s:%u)\n", ill->ill_name, ipif->ipif_id));
1435 
1436 	/*
1437 	 * ND not supported on XRESOLV interfaces. If ND support (multicast)
1438 	 * added later, take out this check.
1439 	 */
1440 	if ((ill->ill_flags & ILLF_XRESOLV) ||
1441 	    IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) ||
1442 	    (!(ill->ill_net_type & IRE_INTERFACE))) {
1443 		ipif->ipif_addr_ready = 1;
1444 		return (0);
1445 	}
1446 
1447 	/*
1448 	 * Need to setup multicast mapping only when the first
1449 	 * interface is coming UP.
1450 	 */
1451 	if (ill->ill_ipif_up_count == 0 &&
1452 	    (ill->ill_flags & ILLF_MULTICAST)) {
1453 		/*
1454 		 * We set the multicast before setting up the mapping for
1455 		 * local address because ipif_ndp_setup_multicast does
1456 		 * ndp_walk to delete nces which will delete the mapping
1457 		 * for local address also if we added the mapping for
1458 		 * local address first.
1459 		 */
1460 		err = ipif_ndp_setup_multicast(ipif, &mnce);
1461 		if (err != 0)
1462 			return (err);
1463 	}
1464 
1465 	if ((ipif->ipif_flags & (IPIF_UNNUMBERED|IPIF_NOLOCAL)) == 0) {
1466 		uint16_t	flags;
1467 		uchar_t	*hw_addr = NULL;
1468 
1469 		/* Permanent entries don't need NUD */
1470 		flags = NCE_F_PERMANENT | NCE_F_NONUD;
1471 		if (ill->ill_flags & ILLF_ROUTER)
1472 			flags |= NCE_F_ISROUTER;
1473 
1474 		if (ipif->ipif_flags & IPIF_ANYCAST)
1475 			flags |= NCE_F_ANYCAST;
1476 
1477 		if (ill->ill_net_type == IRE_IF_RESOLVER) {
1478 			hw_addr = ill->ill_nd_lla;
1479 
1480 			if (ill->ill_move_in_progress) {
1481 				/*
1482 				 * Addresses are failing over to this ill.
1483 				 * Don't wait for NUD to see this change.
1484 				 * Publish our new link-layer address.
1485 				 */
1486 				flags |= NCE_F_UNSOL_ADV;
1487 			}
1488 		}
1489 		err = ndp_lookup_then_add_v6(ill,
1490 		    hw_addr,
1491 		    &ipif->ipif_v6lcl_addr,
1492 		    &ipv6_all_ones,
1493 		    &ipv6_all_zeros,
1494 		    0,
1495 		    flags,
1496 		    ND_PROBE,	/* Causes Duplicate Address Detection to run */
1497 		    &nce);
1498 		switch (err) {
1499 		case 0:
1500 			ip1dbg(("ipif_ndp_up: NCE created for %s\n",
1501 			    ill->ill_name));
1502 			ipif->ipif_addr_ready = 1;
1503 			break;
1504 		case EINPROGRESS:
1505 			ip1dbg(("ipif_ndp_up: running DAD now for %s\n",
1506 			    ill->ill_name));
1507 			break;
1508 		case EEXIST:
1509 			NCE_REFRELE(nce);
1510 			ip1dbg(("ipif_ndp_up: NCE already exists for %s\n",
1511 			    ill->ill_name));
1512 			if (mnce != NULL) {
1513 				ndp_delete(mnce);
1514 				NCE_REFRELE(mnce);
1515 			}
1516 			return (err);
1517 		default:
1518 			ip1dbg(("ipif_ndp_up: NCE creation failed %s\n",
1519 			    ill->ill_name));
1520 			if (mnce != NULL) {
1521 				ndp_delete(mnce);
1522 				NCE_REFRELE(mnce);
1523 			}
1524 			return (err);
1525 		}
1526 	} else {
1527 		/* No local NCE for this entry */
1528 		ipif->ipif_addr_ready = 1;
1529 	}
1530 	if (nce != NULL)
1531 		NCE_REFRELE(nce);
1532 	if (mnce != NULL)
1533 		NCE_REFRELE(mnce);
1534 	return (0);
1535 }
1536 
1537 /* Remove all cache entries for this logical interface */
1538 void
1539 ipif_ndp_down(ipif_t *ipif)
1540 {
1541 	nce_t	*nce;
1542 
1543 	if (ipif->ipif_isv6) {
1544 		nce = ndp_lookup_v6(ipif->ipif_ill, &ipif->ipif_v6lcl_addr,
1545 		    B_FALSE);
1546 		if (nce != NULL) {
1547 			ndp_delete(nce);
1548 			NCE_REFRELE(nce);
1549 		}
1550 	}
1551 	/*
1552 	 * Remove mapping and all other nces dependent on this ill
1553 	 * when the last ipif is going away.
1554 	 */
1555 	if (ipif->ipif_ill->ill_ipif_up_count == 0) {
1556 		ndp_walk(ipif->ipif_ill, (pfi_t)ndp_delete_per_ill,
1557 		    (uchar_t *)ipif->ipif_ill, ipif->ipif_ill->ill_ipst);
1558 	}
1559 }
1560 
1561 /*
1562  * Used when an interface comes up to recreate any extra routes on this
1563  * interface.
1564  */
1565 static ire_t **
1566 ipif_recover_ire_v6(ipif_t *ipif)
1567 {
1568 	mblk_t	*mp;
1569 	ire_t   **ipif_saved_irep;
1570 	ire_t   **irep;
1571 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
1572 
1573 	ip1dbg(("ipif_recover_ire_v6(%s:%u)", ipif->ipif_ill->ill_name,
1574 	    ipif->ipif_id));
1575 
1576 	ASSERT(ipif->ipif_isv6);
1577 
1578 	mutex_enter(&ipif->ipif_saved_ire_lock);
1579 	ipif_saved_irep = (ire_t **)kmem_zalloc(sizeof (ire_t *) *
1580 	    ipif->ipif_saved_ire_cnt, KM_NOSLEEP);
1581 	if (ipif_saved_irep == NULL) {
1582 		mutex_exit(&ipif->ipif_saved_ire_lock);
1583 		return (NULL);
1584 	}
1585 
1586 	irep = ipif_saved_irep;
1587 
1588 	for (mp = ipif->ipif_saved_ire_mp; mp != NULL; mp = mp->b_cont) {
1589 		ire_t		*ire;
1590 		queue_t		*rfq;
1591 		queue_t		*stq;
1592 		ifrt_t		*ifrt;
1593 		in6_addr_t	*src_addr;
1594 		in6_addr_t	*gateway_addr;
1595 		char		buf[INET6_ADDRSTRLEN];
1596 		ushort_t	type;
1597 
1598 		/*
1599 		 * When the ire was initially created and then added in
1600 		 * ip_rt_add_v6(), it was created either using
1601 		 * ipif->ipif_net_type in the case of a traditional interface
1602 		 * route, or as one of the IRE_OFFSUBNET types (with the
1603 		 * exception of IRE_HOST type redirect ire which is created by
1604 		 * icmp_redirect_v6() and which we don't need to save or
1605 		 * recover).  In the case where ipif->ipif_net_type was
1606 		 * IRE_LOOPBACK, ip_rt_add_v6() will update the ire_type to
1607 		 * IRE_IF_NORESOLVER before calling ire_add_v6() to satisfy
1608 		 * software like GateD and Sun Cluster which creates routes
1609 		 * using the the loopback interface's address as a gateway.
1610 		 *
1611 		 * As ifrt->ifrt_type reflects the already updated ire_type,
1612 		 * ire_create_v6() will be called in the same way here as in
1613 		 * ip_rt_add_v6(), namely using ipif->ipif_net_type when the
1614 		 * route looks like a traditional interface route (where
1615 		 * ifrt->ifrt_type & IRE_INTERFACE is true) and otherwise
1616 		 * using the saved ifrt->ifrt_type.  This means that in
1617 		 * the case where ipif->ipif_net_type is IRE_LOOPBACK,
1618 		 * the ire created by ire_create_v6() will be an IRE_LOOPBACK,
1619 		 * it will then be turned into an IRE_IF_NORESOLVER and then
1620 		 * added by ire_add_v6().
1621 		 */
1622 		ifrt = (ifrt_t *)mp->b_rptr;
1623 		if (ifrt->ifrt_type & IRE_INTERFACE) {
1624 			rfq = NULL;
1625 			stq = (ipif->ipif_net_type == IRE_IF_RESOLVER)
1626 			    ? ipif->ipif_rq : ipif->ipif_wq;
1627 			src_addr = (ifrt->ifrt_flags & RTF_SETSRC)
1628 			    ? &ifrt->ifrt_v6src_addr
1629 			    : &ipif->ipif_v6src_addr;
1630 			gateway_addr = NULL;
1631 			type = ipif->ipif_net_type;
1632 		} else {
1633 			rfq = NULL;
1634 			stq = NULL;
1635 			src_addr = (ifrt->ifrt_flags & RTF_SETSRC)
1636 			    ? &ifrt->ifrt_v6src_addr : NULL;
1637 			gateway_addr = &ifrt->ifrt_v6gateway_addr;
1638 			type = ifrt->ifrt_type;
1639 		}
1640 
1641 		/*
1642 		 * Create a copy of the IRE with the saved address and netmask.
1643 		 */
1644 		ip1dbg(("ipif_recover_ire_v6: creating IRE %s (%d) for %s/%d\n",
1645 		    ip_nv_lookup(ire_nv_tbl, ifrt->ifrt_type), ifrt->ifrt_type,
1646 		    inet_ntop(AF_INET6, &ifrt->ifrt_v6addr, buf, sizeof (buf)),
1647 		    ip_mask_to_plen_v6(&ifrt->ifrt_v6mask)));
1648 		ire = ire_create_v6(
1649 		    &ifrt->ifrt_v6addr,
1650 		    &ifrt->ifrt_v6mask,
1651 		    src_addr,
1652 		    gateway_addr,
1653 		    &ifrt->ifrt_max_frag,
1654 		    NULL,
1655 		    rfq,
1656 		    stq,
1657 		    type,
1658 		    ipif,
1659 		    NULL,
1660 		    0,
1661 		    0,
1662 		    ifrt->ifrt_flags,
1663 		    &ifrt->ifrt_iulp_info,
1664 		    NULL,
1665 		    NULL,
1666 		    ipst);
1667 		if (ire == NULL) {
1668 			mutex_exit(&ipif->ipif_saved_ire_lock);
1669 			kmem_free(ipif_saved_irep,
1670 			    ipif->ipif_saved_ire_cnt * sizeof (ire_t *));
1671 			return (NULL);
1672 		}
1673 
1674 		/*
1675 		 * Some software (for example, GateD and Sun Cluster) attempts
1676 		 * to create (what amount to) IRE_PREFIX routes with the
1677 		 * loopback address as the gateway.  This is primarily done to
1678 		 * set up prefixes with the RTF_REJECT flag set (for example,
1679 		 * when generating aggregate routes.)
1680 		 *
1681 		 * If the IRE type (as defined by ipif->ipif_net_type) is
1682 		 * IRE_LOOPBACK, then we map the request into a
1683 		 * IRE_IF_NORESOLVER.
1684 		 */
1685 		if (ipif->ipif_net_type == IRE_LOOPBACK)
1686 			ire->ire_type = IRE_IF_NORESOLVER;
1687 		/*
1688 		 * ire held by ire_add, will be refreled' in ipif_up_done
1689 		 * towards the end
1690 		 */
1691 		(void) ire_add(&ire, NULL, NULL, NULL, B_FALSE);
1692 		*irep = ire;
1693 		irep++;
1694 		ip1dbg(("ipif_recover_ire_v6: added ire %p\n", (void *)ire));
1695 	}
1696 	mutex_exit(&ipif->ipif_saved_ire_lock);
1697 	return (ipif_saved_irep);
1698 }
1699 
1700 /*
1701  * Return the scope of the given IPv6 address.  If the address is an
1702  * IPv4 mapped IPv6 address, return the scope of the corresponding
1703  * IPv4 address.
1704  */
1705 in6addr_scope_t
1706 ip_addr_scope_v6(const in6_addr_t *addr)
1707 {
1708 	static in6_addr_t ipv6loopback = IN6ADDR_LOOPBACK_INIT;
1709 
1710 	if (IN6_IS_ADDR_V4MAPPED(addr)) {
1711 		in_addr_t v4addr_h = ntohl(V4_PART_OF_V6((*addr)));
1712 		if ((v4addr_h >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
1713 		    (v4addr_h & IN_AUTOCONF_MASK) == IN_AUTOCONF_NET)
1714 			return (IP6_SCOPE_LINKLOCAL);
1715 		if ((v4addr_h & IN_PRIVATE8_MASK) == IN_PRIVATE8_NET ||
1716 		    (v4addr_h & IN_PRIVATE12_MASK) == IN_PRIVATE12_NET ||
1717 		    (v4addr_h & IN_PRIVATE16_MASK) == IN_PRIVATE16_NET)
1718 			return (IP6_SCOPE_SITELOCAL);
1719 		return (IP6_SCOPE_GLOBAL);
1720 	}
1721 
1722 	if (IN6_IS_ADDR_MULTICAST(addr))
1723 		return (IN6_ADDR_MC_SCOPE(addr));
1724 
1725 	/* link-local and loopback addresses are of link-local scope */
1726 	if (IN6_IS_ADDR_LINKLOCAL(addr) ||
1727 	    IN6_ARE_ADDR_EQUAL(addr, &ipv6loopback))
1728 		return (IP6_SCOPE_LINKLOCAL);
1729 	if (IN6_IS_ADDR_SITELOCAL(addr))
1730 		return (IP6_SCOPE_SITELOCAL);
1731 	return (IP6_SCOPE_GLOBAL);
1732 }
1733 
1734 
1735 /*
1736  * Returns the length of the common prefix of a1 and a2, as per
1737  * CommonPrefixLen() defined in RFC 3484.
1738  */
1739 static int
1740 ip_common_prefix_v6(const in6_addr_t *a1, const in6_addr_t *a2)
1741 {
1742 	int i;
1743 	uint32_t a1val, a2val, mask;
1744 
1745 	for (i = 0; i < 4; i++) {
1746 		if ((a1val = a1->s6_addr32[i]) != (a2val = a2->s6_addr32[i])) {
1747 			a1val ^= a2val;
1748 			i *= 32;
1749 			mask = 0x80000000u;
1750 			while (!(a1val & mask)) {
1751 				mask >>= 1;
1752 				i++;
1753 			}
1754 			return (i);
1755 		}
1756 	}
1757 	return (IPV6_ABITS);
1758 }
1759 
1760 #define	IPIF_VALID_IPV6_SOURCE(ipif) \
1761 	(((ipif)->ipif_flags & IPIF_UP) && \
1762 	!((ipif)->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST)) && \
1763 	(ipif)->ipif_addr_ready)
1764 
1765 /* source address candidate */
1766 typedef struct candidate {
1767 	ipif_t		*cand_ipif;
1768 	/* The properties of this candidate */
1769 	boolean_t	cand_isdst;
1770 	boolean_t	cand_isdst_set;
1771 	in6addr_scope_t	cand_scope;
1772 	boolean_t	cand_scope_set;
1773 	boolean_t	cand_isdeprecated;
1774 	boolean_t	cand_isdeprecated_set;
1775 	boolean_t	cand_ispreferred;
1776 	boolean_t	cand_ispreferred_set;
1777 	boolean_t	cand_matchedinterface;
1778 	boolean_t	cand_matchedinterface_set;
1779 	boolean_t	cand_matchedlabel;
1780 	boolean_t	cand_matchedlabel_set;
1781 	boolean_t	cand_istmp;
1782 	boolean_t	cand_istmp_set;
1783 	int		cand_common_pref;
1784 	boolean_t	cand_common_pref_set;
1785 	boolean_t	cand_pref_eq;
1786 	boolean_t	cand_pref_eq_set;
1787 	int		cand_pref_len;
1788 	boolean_t	cand_pref_len_set;
1789 } cand_t;
1790 #define	cand_srcaddr	cand_ipif->ipif_v6lcl_addr
1791 #define	cand_mask	cand_ipif->ipif_v6net_mask
1792 #define	cand_flags	cand_ipif->ipif_flags
1793 #define	cand_ill	cand_ipif->ipif_ill
1794 #define	cand_zoneid	cand_ipif->ipif_zoneid
1795 
1796 /* information about the destination for source address selection */
1797 typedef struct dstinfo {
1798 	const in6_addr_t	*dst_addr;
1799 	ill_t			*dst_ill;
1800 	uint_t			dst_restrict_ill;
1801 	boolean_t		dst_prefer_src_tmp;
1802 	in6addr_scope_t		dst_scope;
1803 	char			*dst_label;
1804 } dstinfo_t;
1805 
1806 /*
1807  * The following functions are rules used to select a source address in
1808  * ipif_select_source_v6().  Each rule compares a current candidate (cc)
1809  * against the best candidate (bc).  Each rule has three possible outcomes;
1810  * the candidate is preferred over the best candidate (CAND_PREFER), the
1811  * candidate is not preferred over the best candidate (CAND_AVOID), or the
1812  * candidate is of equal value as the best candidate (CAND_TIE).
1813  *
1814  * These rules are part of a greater "Default Address Selection for IPv6"
1815  * sheme, which is standards based work coming out of the IETF ipv6 working
1816  * group.  The IETF document defines both IPv6 source address selection and
1817  * destination address ordering.  The rules defined here implement the IPv6
1818  * source address selection.  Destination address ordering is done by
1819  * libnsl, and uses a similar set of rules to implement the sorting.
1820  *
1821  * Most of the rules are defined by the RFC and are not typically altered.  The
1822  * last rule, number 8, has language that allows for local preferences.  In the
1823  * scheme below, this means that new Solaris rules should normally go between
1824  * rule_ifprefix and rule_prefix.
1825  */
1826 typedef enum {CAND_AVOID, CAND_TIE, CAND_PREFER} rule_res_t;
1827 typedef	rule_res_t (*rulef_t)(cand_t *, cand_t *, const dstinfo_t *,
1828     ip_stack_t *);
1829 
1830 /* Prefer an address if it is equal to the destination address. */
1831 /* ARGSUSED3 */
1832 static rule_res_t
1833 rule_isdst(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst)
1834 {
1835 	if (!bc->cand_isdst_set) {
1836 		bc->cand_isdst =
1837 		    IN6_ARE_ADDR_EQUAL(&bc->cand_srcaddr, dstinfo->dst_addr);
1838 		bc->cand_isdst_set = B_TRUE;
1839 	}
1840 
1841 	cc->cand_isdst =
1842 	    IN6_ARE_ADDR_EQUAL(&cc->cand_srcaddr, dstinfo->dst_addr);
1843 	cc->cand_isdst_set = B_TRUE;
1844 
1845 	if (cc->cand_isdst == bc->cand_isdst)
1846 		return (CAND_TIE);
1847 	else if (cc->cand_isdst)
1848 		return (CAND_PREFER);
1849 	else
1850 		return (CAND_AVOID);
1851 }
1852 
1853 /*
1854  * Prefer addresses that are of closest scope to the destination.  Always
1855  * prefer addresses that are of greater scope than the destination over
1856  * those that are of lesser scope than the destination.
1857  */
1858 /* ARGSUSED3 */
1859 static rule_res_t
1860 rule_scope(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst)
1861 {
1862 	if (!bc->cand_scope_set) {
1863 		bc->cand_scope = ip_addr_scope_v6(&bc->cand_srcaddr);
1864 		bc->cand_scope_set = B_TRUE;
1865 	}
1866 
1867 	cc->cand_scope = ip_addr_scope_v6(&cc->cand_srcaddr);
1868 	cc->cand_scope_set = B_TRUE;
1869 
1870 	if (cc->cand_scope < bc->cand_scope) {
1871 		if (cc->cand_scope < dstinfo->dst_scope)
1872 			return (CAND_AVOID);
1873 		else
1874 			return (CAND_PREFER);
1875 	} else if (bc->cand_scope < cc->cand_scope) {
1876 		if (bc->cand_scope < dstinfo->dst_scope)
1877 			return (CAND_PREFER);
1878 		else
1879 			return (CAND_AVOID);
1880 	} else {
1881 		return (CAND_TIE);
1882 	}
1883 }
1884 
1885 /*
1886  * Prefer non-deprecated source addresses.
1887  */
1888 /* ARGSUSED2 */
1889 static rule_res_t
1890 rule_deprecated(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1891     ip_stack_t *ipst)
1892 {
1893 	if (!bc->cand_isdeprecated_set) {
1894 		bc->cand_isdeprecated =
1895 		    ((bc->cand_flags & IPIF_DEPRECATED) != 0);
1896 		bc->cand_isdeprecated_set = B_TRUE;
1897 	}
1898 
1899 	cc->cand_isdeprecated = ((cc->cand_flags & IPIF_DEPRECATED) != 0);
1900 	cc->cand_isdeprecated_set = B_TRUE;
1901 
1902 	if (bc->cand_isdeprecated == cc->cand_isdeprecated)
1903 		return (CAND_TIE);
1904 	else if (cc->cand_isdeprecated)
1905 		return (CAND_AVOID);
1906 	else
1907 		return (CAND_PREFER);
1908 }
1909 
1910 /*
1911  * Prefer source addresses that have the IPIF_PREFERRED flag set.  This
1912  * rule must be before rule_interface because the flag could be set on any
1913  * interface, not just the interface being used for outgoing packets (for
1914  * example, the IFF_PREFERRED could be set on an address assigned to the
1915  * loopback interface).
1916  */
1917 /* ARGSUSED2 */
1918 static rule_res_t
1919 rule_preferred(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1920     ip_stack_t *ipst)
1921 {
1922 	if (!bc->cand_ispreferred_set) {
1923 		bc->cand_ispreferred = ((bc->cand_flags & IPIF_PREFERRED) != 0);
1924 		bc->cand_ispreferred_set = B_TRUE;
1925 	}
1926 
1927 	cc->cand_ispreferred = ((cc->cand_flags & IPIF_PREFERRED) != 0);
1928 	cc->cand_ispreferred_set = B_TRUE;
1929 
1930 	if (bc->cand_ispreferred == cc->cand_ispreferred)
1931 		return (CAND_TIE);
1932 	else if (cc->cand_ispreferred)
1933 		return (CAND_PREFER);
1934 	else
1935 		return (CAND_AVOID);
1936 }
1937 
1938 /*
1939  * Prefer source addresses that are assigned to the outgoing interface, or
1940  * to an interface that is in the same IPMP group as the outgoing
1941  * interface.
1942  */
1943 /* ARGSUSED3 */
1944 static rule_res_t
1945 rule_interface(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
1946     ip_stack_t *ipst)
1947 {
1948 	ill_t *dstill = dstinfo->dst_ill;
1949 
1950 	/*
1951 	 * If dstinfo->dst_restrict_ill is set, this rule is unnecessary
1952 	 * since we know all candidates will be on the same link.
1953 	 */
1954 	if (dstinfo->dst_restrict_ill)
1955 		return (CAND_TIE);
1956 
1957 	if (!bc->cand_matchedinterface_set) {
1958 		bc->cand_matchedinterface = (bc->cand_ill == dstill ||
1959 		    (dstill->ill_group != NULL &&
1960 		    dstill->ill_group == bc->cand_ill->ill_group));
1961 		bc->cand_matchedinterface_set = B_TRUE;
1962 	}
1963 
1964 	cc->cand_matchedinterface = (cc->cand_ill == dstill ||
1965 	    (dstill->ill_group != NULL &&
1966 	    dstill->ill_group == cc->cand_ill->ill_group));
1967 	cc->cand_matchedinterface_set = B_TRUE;
1968 
1969 	if (bc->cand_matchedinterface == cc->cand_matchedinterface)
1970 		return (CAND_TIE);
1971 	else if (cc->cand_matchedinterface)
1972 		return (CAND_PREFER);
1973 	else
1974 		return (CAND_AVOID);
1975 }
1976 
1977 /*
1978  * Prefer source addresses whose label matches the destination's label.
1979  */
1980 static rule_res_t
1981 rule_label(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst)
1982 {
1983 	char *label;
1984 
1985 	if (!bc->cand_matchedlabel_set) {
1986 		label = ip6_asp_lookup(&bc->cand_srcaddr, NULL, ipst);
1987 		bc->cand_matchedlabel =
1988 		    ip6_asp_labelcmp(label, dstinfo->dst_label);
1989 		bc->cand_matchedlabel_set = B_TRUE;
1990 	}
1991 
1992 	label = ip6_asp_lookup(&cc->cand_srcaddr, NULL, ipst);
1993 	cc->cand_matchedlabel = ip6_asp_labelcmp(label, dstinfo->dst_label);
1994 	cc->cand_matchedlabel_set = B_TRUE;
1995 
1996 	if (bc->cand_matchedlabel == cc->cand_matchedlabel)
1997 		return (CAND_TIE);
1998 	else if (cc->cand_matchedlabel)
1999 		return (CAND_PREFER);
2000 	else
2001 		return (CAND_AVOID);
2002 }
2003 
2004 /*
2005  * Prefer public addresses over temporary ones.  An application can reverse
2006  * the logic of this rule and prefer temporary addresses by using the
2007  * IPV6_SRC_PREFERENCES socket option.
2008  */
2009 /* ARGSUSED3 */
2010 static rule_res_t
2011 rule_temporary(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
2012     ip_stack_t *ipst)
2013 {
2014 	if (!bc->cand_istmp_set) {
2015 		bc->cand_istmp = ((bc->cand_flags & IPIF_TEMPORARY) != 0);
2016 		bc->cand_istmp_set = B_TRUE;
2017 	}
2018 
2019 	cc->cand_istmp = ((cc->cand_flags & IPIF_TEMPORARY) != 0);
2020 	cc->cand_istmp_set = B_TRUE;
2021 
2022 	if (bc->cand_istmp == cc->cand_istmp)
2023 		return (CAND_TIE);
2024 
2025 	if (dstinfo->dst_prefer_src_tmp && cc->cand_istmp)
2026 		return (CAND_PREFER);
2027 	else if (!dstinfo->dst_prefer_src_tmp && !cc->cand_istmp)
2028 		return (CAND_PREFER);
2029 	else
2030 		return (CAND_AVOID);
2031 }
2032 
2033 /*
2034  * Prefer source addresses with longer matching prefix with the destination
2035  * under the interface mask.  This gets us on the same subnet before applying
2036  * any Solaris-specific rules.
2037  */
2038 /* ARGSUSED3 */
2039 static rule_res_t
2040 rule_ifprefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
2041     ip_stack_t *ipst)
2042 {
2043 	if (!bc->cand_pref_eq_set) {
2044 		bc->cand_pref_eq = V6_MASK_EQ_2(bc->cand_srcaddr,
2045 		    bc->cand_mask, *dstinfo->dst_addr);
2046 		bc->cand_pref_eq_set = B_TRUE;
2047 	}
2048 
2049 	cc->cand_pref_eq = V6_MASK_EQ_2(cc->cand_srcaddr, cc->cand_mask,
2050 	    *dstinfo->dst_addr);
2051 	cc->cand_pref_eq_set = B_TRUE;
2052 
2053 	if (bc->cand_pref_eq) {
2054 		if (cc->cand_pref_eq) {
2055 			if (!bc->cand_pref_len_set) {
2056 				bc->cand_pref_len =
2057 				    ip_mask_to_plen_v6(&bc->cand_mask);
2058 				bc->cand_pref_len_set = B_TRUE;
2059 			}
2060 			cc->cand_pref_len = ip_mask_to_plen_v6(&cc->cand_mask);
2061 			cc->cand_pref_len_set = B_TRUE;
2062 			if (bc->cand_pref_len == cc->cand_pref_len)
2063 				return (CAND_TIE);
2064 			else if (bc->cand_pref_len > cc->cand_pref_len)
2065 				return (CAND_AVOID);
2066 			else
2067 				return (CAND_PREFER);
2068 		} else {
2069 			return (CAND_AVOID);
2070 		}
2071 	} else {
2072 		if (cc->cand_pref_eq)
2073 			return (CAND_PREFER);
2074 		else
2075 			return (CAND_TIE);
2076 	}
2077 }
2078 
2079 /*
2080  * Prefer to use zone-specific addresses when possible instead of all-zones
2081  * addresses.
2082  */
2083 /* ARGSUSED2 */
2084 static rule_res_t
2085 rule_zone_specific(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
2086     ip_stack_t *ipst)
2087 {
2088 	if ((bc->cand_zoneid == ALL_ZONES) ==
2089 	    (cc->cand_zoneid == ALL_ZONES))
2090 		return (CAND_TIE);
2091 	else if (cc->cand_zoneid == ALL_ZONES)
2092 		return (CAND_AVOID);
2093 	else
2094 		return (CAND_PREFER);
2095 }
2096 
2097 /*
2098  * Prefer to use DHCPv6 (first) and static addresses (second) when possible
2099  * instead of statelessly autoconfigured addresses.
2100  *
2101  * This is done after trying all other preferences (and before the final tie
2102  * breaker) so that, if all else is equal, we select addresses configured by
2103  * DHCPv6 over other addresses.  We presume that DHCPv6 addresses, unlike
2104  * stateless autoconfigured addresses, are deliberately configured by an
2105  * administrator, and thus are correctly set up in DNS and network packet
2106  * filters.
2107  */
2108 /* ARGSUSED2 */
2109 static rule_res_t
2110 rule_addr_type(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
2111     ip_stack_t *ipst)
2112 {
2113 #define	ATYPE(x)	\
2114 	((x) & IPIF_DHCPRUNNING) ? 1 : ((x) & IPIF_ADDRCONF) ? 3 : 2
2115 	int bcval = ATYPE(bc->cand_flags);
2116 	int ccval = ATYPE(cc->cand_flags);
2117 #undef ATYPE
2118 
2119 	if (bcval == ccval)
2120 		return (CAND_TIE);
2121 	else if (ccval < bcval)
2122 		return (CAND_PREFER);
2123 	else
2124 		return (CAND_AVOID);
2125 }
2126 
2127 /*
2128  * Prefer source addresses with longer matching prefix with the destination.
2129  * We do the longest matching prefix calculation by doing an xor of both
2130  * addresses with the destination, and pick the address with the longest string
2131  * of leading zeros, as per CommonPrefixLen() defined in RFC 3484.
2132  */
2133 /* ARGSUSED3 */
2134 static rule_res_t
2135 rule_prefix(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo, ip_stack_t *ipst)
2136 {
2137 	if (!bc->cand_common_pref_set) {
2138 		bc->cand_common_pref = ip_common_prefix_v6(&bc->cand_srcaddr,
2139 		    dstinfo->dst_addr);
2140 		bc->cand_common_pref_set = B_TRUE;
2141 	}
2142 
2143 	cc->cand_common_pref = ip_common_prefix_v6(&cc->cand_srcaddr,
2144 	    dstinfo->dst_addr);
2145 	cc->cand_common_pref_set = B_TRUE;
2146 
2147 	if (bc->cand_common_pref == cc->cand_common_pref)
2148 		return (CAND_TIE);
2149 	else if (bc->cand_common_pref > cc->cand_common_pref)
2150 		return (CAND_AVOID);
2151 	else
2152 		return (CAND_PREFER);
2153 }
2154 
2155 /*
2156  * Last rule: we must pick something, so just prefer the current best
2157  * candidate.
2158  */
2159 /* ARGSUSED */
2160 static rule_res_t
2161 rule_must_be_last(cand_t *bc, cand_t *cc, const dstinfo_t *dstinfo,
2162     ip_stack_t *ipst)
2163 {
2164 	return (CAND_AVOID);
2165 }
2166 
2167 /*
2168  * Determine the best source address given a destination address and a
2169  * destination ill.  If no suitable source address is found, it returns
2170  * NULL. If there is a usable address pointed to by the usesrc
2171  * (i.e ill_usesrc_ifindex != 0) then return that first since it is more
2172  * fine grained (i.e per interface)
2173  *
2174  * This implementation is based on the "Default Address Selection for IPv6"
2175  * specification produced by the IETF IPv6 working group.  It has been
2176  * implemented so that the list of addresses is only traversed once (the
2177  * specification's algorithm could traverse the list of addresses once for
2178  * every rule).
2179  *
2180  * The restrict_ill argument restricts the algorithm to chose a source
2181  * address that is assigned to the destination ill or an ill in the same
2182  * IPMP group as the destination ill.  This is used when the destination
2183  * address is a link-local or multicast address, and when
2184  * ipv6_strict_dst_multihoming is turned on.
2185  *
2186  * src_prefs is the caller's set of source address preferences.  If source
2187  * address selection is being called to determine the source address of a
2188  * connected socket (from ip_bind_connected_v6()), then the preferences are
2189  * taken from conn_src_preferences.  These preferences can be set on a
2190  * per-socket basis using the IPV6_SRC_PREFERENCES socket option.  The only
2191  * preference currently implemented is for rfc3041 temporary addresses.
2192  */
2193 ipif_t *
2194 ipif_select_source_v6(ill_t *dstill, const in6_addr_t *dst,
2195     uint_t restrict_ill, uint32_t src_prefs, zoneid_t zoneid)
2196 {
2197 	dstinfo_t	dstinfo;
2198 	char		dstr[INET6_ADDRSTRLEN];
2199 	char		sstr[INET6_ADDRSTRLEN];
2200 	ipif_t		*ipif;
2201 	ill_t		*ill, *usesrc_ill = NULL;
2202 	ill_walk_context_t	ctx;
2203 	cand_t		best_c;	/* The best candidate */
2204 	cand_t		curr_c;	/* The current candidate */
2205 	uint_t		index;
2206 	boolean_t	first_candidate = B_TRUE;
2207 	rule_res_t	rule_result;
2208 	tsol_tpc_t	*src_rhtp, *dst_rhtp;
2209 	ip_stack_t	*ipst = dstill->ill_ipst;
2210 
2211 	/*
2212 	 * The list of ordering rules.  They are applied in the order they
2213 	 * appear in the list.
2214 	 *
2215 	 * Solaris doesn't currently support Mobile IPv6, so there's no
2216 	 * rule_mipv6 corresponding to rule 4 in the specification.
2217 	 */
2218 	rulef_t	rules[] = {
2219 		rule_isdst,
2220 		rule_scope,
2221 		rule_deprecated,
2222 		rule_preferred,
2223 		rule_interface,
2224 		rule_label,
2225 		rule_temporary,
2226 		rule_ifprefix,			/* local rules after this */
2227 		rule_zone_specific,
2228 		rule_addr_type,
2229 		rule_prefix,			/* local rules before this */
2230 		rule_must_be_last,		/* must always be last */
2231 		NULL
2232 	};
2233 
2234 	ASSERT(dstill->ill_isv6);
2235 	ASSERT(!IN6_IS_ADDR_V4MAPPED(dst));
2236 
2237 	/*
2238 	 * Check if there is a usable src address pointed to by the
2239 	 * usesrc ifindex. This has higher precedence since it is
2240 	 * finer grained (i.e per interface) v/s being system wide.
2241 	 */
2242 	if (dstill->ill_usesrc_ifindex != 0) {
2243 		if ((usesrc_ill =
2244 		    ill_lookup_on_ifindex(dstill->ill_usesrc_ifindex, B_TRUE,
2245 		    NULL, NULL, NULL, NULL, ipst)) != NULL) {
2246 			dstinfo.dst_ill = usesrc_ill;
2247 		} else {
2248 			return (NULL);
2249 		}
2250 	} else {
2251 		dstinfo.dst_ill = dstill;
2252 	}
2253 
2254 	/*
2255 	 * If we're dealing with an unlabeled destination on a labeled system,
2256 	 * make sure that we ignore source addresses that are incompatible with
2257 	 * the destination's default label.  That destination's default label
2258 	 * must dominate the minimum label on the source address.
2259 	 *
2260 	 * (Note that this has to do with Trusted Solaris.  It's not related to
2261 	 * the labels described by ip6_asp_lookup.)
2262 	 */
2263 	dst_rhtp = NULL;
2264 	if (is_system_labeled()) {
2265 		dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE);
2266 		if (dst_rhtp == NULL)
2267 			return (NULL);
2268 		if (dst_rhtp->tpc_tp.host_type != UNLABELED) {
2269 			TPC_RELE(dst_rhtp);
2270 			dst_rhtp = NULL;
2271 		}
2272 	}
2273 
2274 	dstinfo.dst_addr = dst;
2275 	dstinfo.dst_scope = ip_addr_scope_v6(dst);
2276 	dstinfo.dst_label = ip6_asp_lookup(dst, NULL, ipst);
2277 	dstinfo.dst_prefer_src_tmp = ((src_prefs & IPV6_PREFER_SRC_TMP) != 0);
2278 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
2279 	/*
2280 	 * Section three of the I-D states that for multicast and
2281 	 * link-local destinations, the candidate set must be restricted to
2282 	 * an interface that is on the same link as the outgoing interface.
2283 	 * Also, when ipv6_strict_dst_multihoming is turned on, always
2284 	 * restrict the source address to the destination link as doing
2285 	 * otherwise will almost certainly cause problems.
2286 	 */
2287 	if (IN6_IS_ADDR_LINKLOCAL(dst) || IN6_IS_ADDR_MULTICAST(dst) ||
2288 	    ipst->ips_ipv6_strict_dst_multihoming || usesrc_ill != NULL) {
2289 		if (restrict_ill == RESTRICT_TO_NONE)
2290 			dstinfo.dst_restrict_ill = RESTRICT_TO_GROUP;
2291 		else
2292 			dstinfo.dst_restrict_ill = restrict_ill;
2293 	} else {
2294 		dstinfo.dst_restrict_ill = restrict_ill;
2295 	}
2296 
2297 	bzero(&best_c, sizeof (cand_t));
2298 
2299 	/*
2300 	 * Take a pass through the list of IPv6 interfaces to chose the
2301 	 * best possible source address.  If restrict_ill is true, we only
2302 	 * iterate through the ill's that are in the same IPMP group as the
2303 	 * destination's outgoing ill.  If restrict_ill is false, we walk
2304 	 * the entire list of IPv6 ill's.
2305 	 */
2306 	if (dstinfo.dst_restrict_ill != RESTRICT_TO_NONE) {
2307 		if (dstinfo.dst_ill->ill_group != NULL &&
2308 		    dstinfo.dst_restrict_ill == RESTRICT_TO_GROUP) {
2309 			ill = dstinfo.dst_ill->ill_group->illgrp_ill;
2310 		} else {
2311 			ill = dstinfo.dst_ill;
2312 		}
2313 	} else {
2314 		ill = ILL_START_WALK_V6(&ctx, ipst);
2315 	}
2316 
2317 	while (ill != NULL) {
2318 		ASSERT(ill->ill_isv6);
2319 
2320 		/*
2321 		 * Avoid FAILED/OFFLINE ills.
2322 		 * Global and site local addresses will failover and
2323 		 * will be available on the new ill.
2324 		 * But link local addresses don't move.
2325 		 */
2326 		if (dstinfo.dst_restrict_ill != RESTRICT_TO_ILL &&
2327 		    ill->ill_phyint->phyint_flags &
2328 		    (PHYI_OFFLINE | PHYI_FAILED))
2329 			goto next_ill;
2330 
2331 		for (ipif = ill->ill_ipif; ipif != NULL;
2332 		    ipif = ipif->ipif_next) {
2333 
2334 			if (!IPIF_VALID_IPV6_SOURCE(ipif))
2335 				continue;
2336 
2337 			if (zoneid != ALL_ZONES &&
2338 			    ipif->ipif_zoneid != zoneid &&
2339 			    ipif->ipif_zoneid != ALL_ZONES)
2340 				continue;
2341 
2342 			/*
2343 			 * Check compatibility of local address for
2344 			 * destination's default label if we're on a labeled
2345 			 * system.  Incompatible addresses can't be used at
2346 			 * all and must be skipped over.
2347 			 */
2348 			if (dst_rhtp != NULL) {
2349 				boolean_t incompat;
2350 
2351 				src_rhtp = find_tpc(&ipif->ipif_v6lcl_addr,
2352 				    IPV6_VERSION, B_FALSE);
2353 				if (src_rhtp == NULL)
2354 					continue;
2355 				incompat =
2356 				    src_rhtp->tpc_tp.host_type != SUN_CIPSO ||
2357 				    src_rhtp->tpc_tp.tp_doi !=
2358 				    dst_rhtp->tpc_tp.tp_doi ||
2359 				    (!_blinrange(&dst_rhtp->tpc_tp.tp_def_label,
2360 				    &src_rhtp->tpc_tp.tp_sl_range_cipso) &&
2361 				    !blinlset(&dst_rhtp->tpc_tp.tp_def_label,
2362 				    src_rhtp->tpc_tp.tp_sl_set_cipso));
2363 				TPC_RELE(src_rhtp);
2364 				if (incompat)
2365 					continue;
2366 			}
2367 
2368 			if (first_candidate) {
2369 				/*
2370 				 * This is first valid address in the list.
2371 				 * It is automatically the best candidate
2372 				 * so far.
2373 				 */
2374 				best_c.cand_ipif = ipif;
2375 				first_candidate = B_FALSE;
2376 				continue;
2377 			}
2378 
2379 			bzero(&curr_c, sizeof (cand_t));
2380 			curr_c.cand_ipif = ipif;
2381 
2382 			/*
2383 			 * Compare this current candidate (curr_c) with the
2384 			 * best candidate (best_c) by applying the
2385 			 * comparison rules in order until one breaks the
2386 			 * tie.
2387 			 */
2388 			for (index = 0; rules[index] != NULL; index++) {
2389 				/* Apply a comparison rule. */
2390 				rule_result =
2391 				    (rules[index])(&best_c, &curr_c, &dstinfo,
2392 				    ipst);
2393 				if (rule_result == CAND_AVOID) {
2394 					/*
2395 					 * The best candidate is still the
2396 					 * best candidate.  Forget about
2397 					 * this current candidate and go on
2398 					 * to the next one.
2399 					 */
2400 					break;
2401 				} else if (rule_result == CAND_PREFER) {
2402 					/*
2403 					 * This candidate is prefered.  It
2404 					 * becomes the best candidate so
2405 					 * far.  Go on to the next address.
2406 					 */
2407 					best_c = curr_c;
2408 					break;
2409 				}
2410 				/* We have a tie, apply the next rule. */
2411 			}
2412 
2413 			/*
2414 			 * The last rule must be a tie breaker rule and
2415 			 * must never produce a tie.  At this point, the
2416 			 * candidate should have either been rejected, or
2417 			 * have been prefered as the best candidate so far.
2418 			 */
2419 			ASSERT(rule_result != CAND_TIE);
2420 		}
2421 
2422 		/*
2423 		 * We may be walking the linked-list of ill's in an
2424 		 * IPMP group or traversing the IPv6 ill avl tree. If it is a
2425 		 * usesrc ILL then it can't be part of IPMP group and we
2426 		 * will exit the while loop.
2427 		 */
2428 next_ill:
2429 		if (dstinfo.dst_restrict_ill == RESTRICT_TO_ILL)
2430 			ill = NULL;
2431 		else if (dstinfo.dst_restrict_ill == RESTRICT_TO_GROUP)
2432 			ill = ill->ill_group_next;
2433 		else
2434 			ill = ill_next(&ctx, ill);
2435 	}
2436 
2437 	ipif = best_c.cand_ipif;
2438 	ip1dbg(("ipif_select_source_v6(%s, %s) -> %s\n",
2439 	    dstinfo.dst_ill->ill_name,
2440 	    inet_ntop(AF_INET6, dstinfo.dst_addr, dstr, sizeof (dstr)),
2441 	    (ipif == NULL ? "NULL" :
2442 	    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, sstr, sizeof (sstr)))));
2443 
2444 	if (usesrc_ill != NULL)
2445 		ill_refrele(usesrc_ill);
2446 
2447 	if (dst_rhtp != NULL)
2448 		TPC_RELE(dst_rhtp);
2449 
2450 	if (ipif == NULL) {
2451 		rw_exit(&ipst->ips_ill_g_lock);
2452 		return (NULL);
2453 	}
2454 
2455 	mutex_enter(&ipif->ipif_ill->ill_lock);
2456 	if (IPIF_CAN_LOOKUP(ipif)) {
2457 		ipif_refhold_locked(ipif);
2458 		mutex_exit(&ipif->ipif_ill->ill_lock);
2459 		rw_exit(&ipst->ips_ill_g_lock);
2460 		return (ipif);
2461 	}
2462 	mutex_exit(&ipif->ipif_ill->ill_lock);
2463 	rw_exit(&ipst->ips_ill_g_lock);
2464 	ip1dbg(("ipif_select_source_v6 cannot lookup ipif %p"
2465 	    " returning null \n", (void *)ipif));
2466 
2467 	return (NULL);
2468 }
2469 
2470 /*
2471  * If old_ipif is not NULL, see if ipif was derived from old
2472  * ipif and if so, recreate the interface route by re-doing
2473  * source address selection. This happens when ipif_down ->
2474  * ipif_update_other_ipifs calls us.
2475  *
2476  * If old_ipif is NULL, just redo the source address selection
2477  * if needed. This happens when illgrp_insert or ipif_up_done_v6
2478  * calls us.
2479  */
2480 void
2481 ipif_recreate_interface_routes_v6(ipif_t *old_ipif, ipif_t *ipif)
2482 {
2483 	ire_t *ire;
2484 	ire_t *ipif_ire;
2485 	queue_t *stq;
2486 	ill_t *ill;
2487 	ipif_t *nipif = NULL;
2488 	boolean_t nipif_refheld = B_FALSE;
2489 	boolean_t ip6_asp_table_held = B_FALSE;
2490 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
2491 
2492 	ill = ipif->ipif_ill;
2493 
2494 	if (!(ipif->ipif_flags &
2495 	    (IPIF_NOLOCAL|IPIF_ANYCAST|IPIF_DEPRECATED))) {
2496 		/*
2497 		 * Can't possibly have borrowed the source
2498 		 * from old_ipif.
2499 		 */
2500 		return;
2501 	}
2502 
2503 	/*
2504 	 * Is there any work to be done? No work if the address
2505 	 * is INADDR_ANY, loopback or NOLOCAL or ANYCAST (
2506 	 * ipif_select_source_v6() does not borrow addresses from
2507 	 * NOLOCAL and ANYCAST interfaces).
2508 	 */
2509 	if ((old_ipif != NULL) &&
2510 	    ((IN6_IS_ADDR_UNSPECIFIED(&old_ipif->ipif_v6lcl_addr)) ||
2511 	    (old_ipif->ipif_ill->ill_wq == NULL) ||
2512 	    (old_ipif->ipif_flags &
2513 	    (IPIF_NOLOCAL|IPIF_ANYCAST)))) {
2514 		return;
2515 	}
2516 
2517 	/*
2518 	 * Perform the same checks as when creating the
2519 	 * IRE_INTERFACE in ipif_up_done_v6.
2520 	 */
2521 	if (!(ipif->ipif_flags & IPIF_UP))
2522 		return;
2523 
2524 	if ((ipif->ipif_flags & IPIF_NOXMIT))
2525 		return;
2526 
2527 	if (IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) &&
2528 	    IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))
2529 		return;
2530 
2531 	/*
2532 	 * We know that ipif uses some other source for its
2533 	 * IRE_INTERFACE. Is it using the source of this
2534 	 * old_ipif?
2535 	 */
2536 	ipif_ire = ipif_to_ire_v6(ipif);
2537 	if (ipif_ire == NULL)
2538 		return;
2539 
2540 	if (old_ipif != NULL &&
2541 	    !IN6_ARE_ADDR_EQUAL(&old_ipif->ipif_v6lcl_addr,
2542 	    &ipif_ire->ire_src_addr_v6)) {
2543 		ire_refrele(ipif_ire);
2544 		return;
2545 	}
2546 
2547 	if (ip_debug > 2) {
2548 		/* ip1dbg */
2549 		pr_addr_dbg("ipif_recreate_interface_routes_v6: deleting IRE"
2550 		    " for src %s\n", AF_INET6, &ipif_ire->ire_src_addr_v6);
2551 	}
2552 
2553 	stq = ipif_ire->ire_stq;
2554 
2555 	/*
2556 	 * Can't use our source address. Select a different source address
2557 	 * for the IRE_INTERFACE.  We restrict interface route source
2558 	 * address selection to ipif's assigned to the same link as the
2559 	 * interface.
2560 	 */
2561 	if (ip6_asp_can_lookup(ipst)) {
2562 		ip6_asp_table_held = B_TRUE;
2563 		nipif = ipif_select_source_v6(ill, &ipif->ipif_v6subnet,
2564 		    RESTRICT_TO_GROUP, IPV6_PREFER_SRC_DEFAULT,
2565 		    ipif->ipif_zoneid);
2566 	}
2567 	if (nipif == NULL) {
2568 		/* Last resort - all ipif's have IPIF_NOLOCAL */
2569 		nipif = ipif;
2570 	} else {
2571 		nipif_refheld = B_TRUE;
2572 	}
2573 
2574 	ire = ire_create_v6(
2575 	    &ipif->ipif_v6subnet,	/* dest pref */
2576 	    &ipif->ipif_v6net_mask,	/* mask */
2577 	    &nipif->ipif_v6src_addr,	/* src addr */
2578 	    NULL,			/* no gateway */
2579 	    &ipif->ipif_mtu,		/* max frag */
2580 	    NULL,			/* no src nce */
2581 	    NULL,			/* no recv from queue */
2582 	    stq,			/* send-to queue */
2583 	    ill->ill_net_type,		/* IF_[NO]RESOLVER */
2584 	    ipif,
2585 	    NULL,
2586 	    0,
2587 	    0,
2588 	    0,
2589 	    &ire_uinfo_null,
2590 	    NULL,
2591 	    NULL,
2592 	    ipst);
2593 
2594 	if (ire != NULL) {
2595 		ire_t *ret_ire;
2596 		int   error;
2597 
2598 		/*
2599 		 * We don't need ipif_ire anymore. We need to delete
2600 		 * before we add so that ire_add does not detect
2601 		 * duplicates.
2602 		 */
2603 		ire_delete(ipif_ire);
2604 		ret_ire = ire;
2605 		error = ire_add(&ret_ire, NULL, NULL, NULL, B_FALSE);
2606 		ASSERT(error == 0);
2607 		ASSERT(ret_ire == ire);
2608 		if (ret_ire != NULL) {
2609 			/* Held in ire_add */
2610 			ire_refrele(ret_ire);
2611 		}
2612 	}
2613 	/*
2614 	 * Either we are falling through from above or could not
2615 	 * allocate a replacement.
2616 	 */
2617 	ire_refrele(ipif_ire);
2618 	if (ip6_asp_table_held)
2619 		ip6_asp_table_refrele(ipst);
2620 	if (nipif_refheld)
2621 		ipif_refrele(nipif);
2622 }
2623 
2624 /*
2625  * This old_ipif is going away.
2626  *
2627  * Determine if any other ipif's are using our address as
2628  * ipif_v6lcl_addr (due to those being IPIF_NOLOCAL, IPIF_ANYCAST, or
2629  * IPIF_DEPRECATED).
2630  * Find the IRE_INTERFACE for such ipif's and recreate them
2631  * to use an different source address following the rules in
2632  * ipif_up_done_v6.
2633  *
2634  * This function takes an illgrp as an argument so that illgrp_delete
2635  * can call this to update source address even after deleting the
2636  * old_ipif->ipif_ill from the ill group.
2637  */
2638 void
2639 ipif_update_other_ipifs_v6(ipif_t *old_ipif, ill_group_t *illgrp)
2640 {
2641 	ipif_t	*ipif;
2642 	ill_t	*ill;
2643 	char	buf[INET6_ADDRSTRLEN];
2644 
2645 	ASSERT(IAM_WRITER_IPIF(old_ipif));
2646 
2647 	ill = old_ipif->ipif_ill;
2648 
2649 	ip1dbg(("ipif_update_other_ipifs_v6(%s, %s)\n",
2650 	    ill->ill_name,
2651 	    inet_ntop(AF_INET6, &old_ipif->ipif_v6lcl_addr,
2652 	    buf, sizeof (buf))));
2653 
2654 	/*
2655 	 * If this part of a group, look at all ills as ipif_select_source
2656 	 * borrows a source address across all the ills in the group.
2657 	 */
2658 	if (illgrp != NULL)
2659 		ill = illgrp->illgrp_ill;
2660 
2661 	/* Don't need a lock since this is a writer */
2662 	for (; ill != NULL; ill = ill->ill_group_next) {
2663 		for (ipif = ill->ill_ipif; ipif != NULL;
2664 		    ipif = ipif->ipif_next) {
2665 
2666 			if (ipif == old_ipif)
2667 				continue;
2668 
2669 			ipif_recreate_interface_routes_v6(old_ipif, ipif);
2670 		}
2671 	}
2672 }
2673 
2674 /*
2675  * Perform an attach and bind to get phys addr plus info_req for
2676  * the physical device.
2677  * q and mp represents an ioctl which will be queued waiting for
2678  * completion of the DLPI message exchange.
2679  * MUST be called on an ill queue. Can not set conn_pending_ill for that
2680  * reason thus the DL_PHYS_ADDR_ACK code does not assume ill_pending_q.
2681  *
2682  * Returns EINPROGRESS when mp has been consumed by queueing it on
2683  * ill_pending_mp and the ioctl will complete in ip_rput.
2684  */
2685 int
2686 ill_dl_phys(ill_t *ill, ipif_t *ipif, mblk_t *mp, queue_t *q)
2687 {
2688 	mblk_t	*v6token_mp = NULL;
2689 	mblk_t	*v6lla_mp = NULL;
2690 	mblk_t	*phys_mp = NULL;
2691 	mblk_t	*info_mp = NULL;
2692 	mblk_t	*attach_mp = NULL;
2693 	mblk_t	*bind_mp = NULL;
2694 	mblk_t	*unbind_mp = NULL;
2695 	mblk_t	*notify_mp = NULL;
2696 
2697 	ip1dbg(("ill_dl_phys(%s:%u)\n", ill->ill_name, ipif->ipif_id));
2698 	ASSERT(ill->ill_dlpi_style_set);
2699 	ASSERT(WR(q)->q_next != NULL);
2700 
2701 	if (ill->ill_isv6) {
2702 		v6token_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) +
2703 		    sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
2704 		if (v6token_mp == NULL)
2705 			goto bad;
2706 		((dl_phys_addr_req_t *)v6token_mp->b_rptr)->dl_addr_type =
2707 		    DL_IPV6_TOKEN;
2708 
2709 		v6lla_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) +
2710 		    sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
2711 		if (v6lla_mp == NULL)
2712 			goto bad;
2713 		((dl_phys_addr_req_t *)v6lla_mp->b_rptr)->dl_addr_type =
2714 		    DL_IPV6_LINK_LAYER_ADDR;
2715 	}
2716 
2717 	/*
2718 	 * Allocate a DL_NOTIFY_REQ and set the notifications we want.
2719 	 */
2720 	notify_mp = ip_dlpi_alloc(sizeof (dl_notify_req_t) + sizeof (long),
2721 	    DL_NOTIFY_REQ);
2722 	if (notify_mp == NULL)
2723 		goto bad;
2724 	((dl_notify_req_t *)notify_mp->b_rptr)->dl_notifications =
2725 	    (DL_NOTE_PHYS_ADDR | DL_NOTE_SDU_SIZE | DL_NOTE_FASTPATH_FLUSH |
2726 	    DL_NOTE_LINK_UP | DL_NOTE_LINK_DOWN | DL_NOTE_CAPAB_RENEG);
2727 
2728 	phys_mp = ip_dlpi_alloc(sizeof (dl_phys_addr_req_t) +
2729 	    sizeof (t_scalar_t), DL_PHYS_ADDR_REQ);
2730 	if (phys_mp == NULL)
2731 		goto bad;
2732 	((dl_phys_addr_req_t *)phys_mp->b_rptr)->dl_addr_type =
2733 	    DL_CURR_PHYS_ADDR;
2734 
2735 	info_mp = ip_dlpi_alloc(
2736 	    sizeof (dl_info_req_t) + sizeof (dl_info_ack_t),
2737 	    DL_INFO_REQ);
2738 	if (info_mp == NULL)
2739 		goto bad;
2740 
2741 	bind_mp = ip_dlpi_alloc(sizeof (dl_bind_req_t) + sizeof (long),
2742 	    DL_BIND_REQ);
2743 	if (bind_mp == NULL)
2744 		goto bad;
2745 	((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ill->ill_sap;
2746 	((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS;
2747 
2748 	unbind_mp = ip_dlpi_alloc(sizeof (dl_unbind_req_t), DL_UNBIND_REQ);
2749 	if (unbind_mp == NULL)
2750 		goto bad;
2751 
2752 	/* If we need to attach, pre-alloc and initialize the mblk */
2753 	if (ill->ill_needs_attach) {
2754 		attach_mp = ip_dlpi_alloc(sizeof (dl_attach_req_t),
2755 		    DL_ATTACH_REQ);
2756 		if (attach_mp == NULL)
2757 			goto bad;
2758 		((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = ill->ill_ppa;
2759 	}
2760 
2761 	/*
2762 	 * Here we are going to delay the ioctl ack until after
2763 	 * ACKs from DL_PHYS_ADDR_REQ. So need to save the
2764 	 * original ioctl message before sending the requests
2765 	 */
2766 	mutex_enter(&ill->ill_lock);
2767 	/* ipsq_pending_mp_add won't fail since we pass in a NULL connp */
2768 	(void) ipsq_pending_mp_add(NULL, ipif, ill->ill_wq, mp, 0);
2769 	/*
2770 	 * Set ill_phys_addr_pend to zero. It will be set to the addr_type of
2771 	 * the DL_PHYS_ADDR_REQ in ill_dlpi_send() and ill_dlpi_done(). It will
2772 	 * be used to track which DL_PHYS_ADDR_REQ is being ACK'd/NAK'd.
2773 	 */
2774 	ill->ill_phys_addr_pend = 0;
2775 	mutex_exit(&ill->ill_lock);
2776 
2777 	if (attach_mp != NULL) {
2778 		ip1dbg(("ill_dl_phys: attach\n"));
2779 		ill_dlpi_send(ill, attach_mp);
2780 	}
2781 	ill_dlpi_send(ill, bind_mp);
2782 	ill_dlpi_send(ill, info_mp);
2783 	if (ill->ill_isv6) {
2784 		ill_dlpi_send(ill, v6token_mp);
2785 		ill_dlpi_send(ill, v6lla_mp);
2786 	}
2787 	ill_dlpi_send(ill, phys_mp);
2788 	ill_dlpi_send(ill, notify_mp);
2789 	ill_dlpi_send(ill, unbind_mp);
2790 
2791 	/*
2792 	 * This operation will complete in ip_rput_dlpi_writer with either
2793 	 * a DL_PHYS_ADDR_ACK or DL_ERROR_ACK.
2794 	 */
2795 	return (EINPROGRESS);
2796 bad:
2797 	freemsg(v6token_mp);
2798 	freemsg(v6lla_mp);
2799 	freemsg(phys_mp);
2800 	freemsg(info_mp);
2801 	freemsg(attach_mp);
2802 	freemsg(bind_mp);
2803 	freemsg(unbind_mp);
2804 	freemsg(notify_mp);
2805 	return (ENOMEM);
2806 }
2807 
2808 uint_t ip_loopback_mtu_v6plus = IP_LOOPBACK_MTU + IPV6_HDR_LEN + 20;
2809 
2810 /*
2811  * DLPI is up.
2812  * Create all the IREs associated with an interface bring up multicast.
2813  * Set the interface flag and finish other initialization
2814  * that potentially had to be differed to after DL_BIND_ACK.
2815  */
2816 int
2817 ipif_up_done_v6(ipif_t *ipif)
2818 {
2819 	ire_t	*ire_array[20];
2820 	ire_t	**irep = ire_array;
2821 	ire_t	**irep1;
2822 	ill_t	*ill = ipif->ipif_ill;
2823 	queue_t	*stq;
2824 	in6_addr_t	v6addr;
2825 	in6_addr_t	route_mask;
2826 	ipif_t	 *src_ipif = NULL;
2827 	ipif_t   *tmp_ipif;
2828 	boolean_t	flush_ire_cache = B_TRUE;
2829 	int	err;
2830 	char	buf[INET6_ADDRSTRLEN];
2831 	phyint_t *phyi;
2832 	ire_t	**ipif_saved_irep = NULL;
2833 	int ipif_saved_ire_cnt;
2834 	int cnt;
2835 	boolean_t src_ipif_held = B_FALSE;
2836 	boolean_t ire_added = B_FALSE;
2837 	boolean_t loopback = B_FALSE;
2838 	boolean_t ip6_asp_table_held = B_FALSE;
2839 	ip_stack_t	*ipst = ill->ill_ipst;
2840 
2841 	ip1dbg(("ipif_up_done_v6(%s:%u)\n",
2842 	    ipif->ipif_ill->ill_name, ipif->ipif_id));
2843 
2844 	/* Check if this is a loopback interface */
2845 	if (ipif->ipif_ill->ill_wq == NULL)
2846 		loopback = B_TRUE;
2847 
2848 	ASSERT(ipif->ipif_isv6);
2849 	ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
2850 
2851 	/*
2852 	 * If all other interfaces for this ill are down or DEPRECATED,
2853 	 * or otherwise unsuitable for source address selection, remove
2854 	 * any IRE_CACHE entries for this ill to make sure source
2855 	 * address selection gets to take this new ipif into account.
2856 	 * No need to hold ill_lock while traversing the ipif list since
2857 	 * we are writer
2858 	 */
2859 	for (tmp_ipif = ill->ill_ipif; tmp_ipif;
2860 	    tmp_ipif = tmp_ipif->ipif_next) {
2861 		if (((tmp_ipif->ipif_flags &
2862 		    (IPIF_NOXMIT|IPIF_ANYCAST|IPIF_NOLOCAL|IPIF_DEPRECATED)) ||
2863 		    !(tmp_ipif->ipif_flags & IPIF_UP)) ||
2864 		    (tmp_ipif == ipif))
2865 			continue;
2866 		/* first useable pre-existing interface */
2867 		flush_ire_cache = B_FALSE;
2868 		break;
2869 	}
2870 	if (flush_ire_cache)
2871 		ire_walk_ill_v6(MATCH_IRE_ILL_GROUP | MATCH_IRE_TYPE,
2872 		    IRE_CACHE, ill_ipif_cache_delete, (char *)ill, ill);
2873 
2874 	/*
2875 	 * Figure out which way the send-to queue should go.  Only
2876 	 * IRE_IF_RESOLVER or IRE_IF_NORESOLVER should show up here.
2877 	 */
2878 	switch (ill->ill_net_type) {
2879 	case IRE_IF_RESOLVER:
2880 		stq = ill->ill_rq;
2881 		break;
2882 	case IRE_IF_NORESOLVER:
2883 	case IRE_LOOPBACK:
2884 		stq = ill->ill_wq;
2885 		break;
2886 	default:
2887 		return (EINVAL);
2888 	}
2889 
2890 	if (IS_LOOPBACK(ill)) {
2891 		/*
2892 		 * lo0:1 and subsequent ipifs were marked IRE_LOCAL in
2893 		 * ipif_lookup_on_name(), but in the case of zones we can have
2894 		 * several loopback addresses on lo0. So all the interfaces with
2895 		 * loopback addresses need to be marked IRE_LOOPBACK.
2896 		 */
2897 		if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, &ipv6_loopback))
2898 			ipif->ipif_ire_type = IRE_LOOPBACK;
2899 		else
2900 			ipif->ipif_ire_type = IRE_LOCAL;
2901 	}
2902 
2903 	if (ipif->ipif_flags & (IPIF_NOLOCAL|IPIF_ANYCAST|IPIF_DEPRECATED)) {
2904 		/*
2905 		 * Can't use our source address. Select a different
2906 		 * source address for the IRE_INTERFACE and IRE_LOCAL
2907 		 */
2908 		if (ip6_asp_can_lookup(ipst)) {
2909 			ip6_asp_table_held = B_TRUE;
2910 			src_ipif = ipif_select_source_v6(ipif->ipif_ill,
2911 			    &ipif->ipif_v6subnet, RESTRICT_TO_NONE,
2912 			    IPV6_PREFER_SRC_DEFAULT, ipif->ipif_zoneid);
2913 		}
2914 		if (src_ipif == NULL)
2915 			src_ipif = ipif;	/* Last resort */
2916 		else
2917 			src_ipif_held = B_TRUE;
2918 	} else {
2919 		src_ipif = ipif;
2920 	}
2921 
2922 	if (!IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6lcl_addr) &&
2923 	    !(ipif->ipif_flags & IPIF_NOLOCAL)) {
2924 
2925 		/*
2926 		 * If we're on a labeled system then make sure that zone-
2927 		 * private addresses have proper remote host database entries.
2928 		 */
2929 		if (is_system_labeled() &&
2930 		    ipif->ipif_ire_type != IRE_LOOPBACK) {
2931 			if (ip6opt_ls == 0) {
2932 				cmn_err(CE_WARN, "IPv6 not enabled "
2933 				    "via /etc/system");
2934 				return (EINVAL);
2935 			}
2936 			if (!tsol_check_interface_address(ipif))
2937 				return (EINVAL);
2938 		}
2939 
2940 		/* Register the source address for __sin6_src_id */
2941 		err = ip_srcid_insert(&ipif->ipif_v6lcl_addr,
2942 		    ipif->ipif_zoneid, ipst);
2943 		if (err != 0) {
2944 			ip0dbg(("ipif_up_done_v6: srcid_insert %d\n", err));
2945 			if (src_ipif_held)
2946 				ipif_refrele(src_ipif);
2947 			if (ip6_asp_table_held)
2948 				ip6_asp_table_refrele(ipst);
2949 			return (err);
2950 		}
2951 		/*
2952 		 * If the interface address is set, create the LOCAL
2953 		 * or LOOPBACK IRE.
2954 		 */
2955 		ip1dbg(("ipif_up_done_v6: creating IRE %d for %s\n",
2956 		    ipif->ipif_ire_type,
2957 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
2958 		    buf, sizeof (buf))));
2959 
2960 		*irep++ = ire_create_v6(
2961 		    &ipif->ipif_v6lcl_addr,		/* dest address */
2962 		    &ipv6_all_ones,			/* mask */
2963 		    &src_ipif->ipif_v6src_addr,		/* source address */
2964 		    NULL,				/* no gateway */
2965 		    &ip_loopback_mtu_v6plus,		/* max frag size */
2966 		    NULL,
2967 		    ipif->ipif_rq,			/* recv-from queue */
2968 		    NULL,				/* no send-to queue */
2969 		    ipif->ipif_ire_type,		/* LOCAL or LOOPBACK */
2970 		    ipif,				/* interface */
2971 		    NULL,
2972 		    0,
2973 		    0,
2974 		    (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0,
2975 		    &ire_uinfo_null,
2976 		    NULL,
2977 		    NULL,
2978 		    ipst);
2979 	}
2980 
2981 	/*
2982 	 * Set up the IRE_IF_RESOLVER or IRE_IF_NORESOLVER, as appropriate.
2983 	 * Note that atun interfaces have an all-zero ipif_v6subnet.
2984 	 * Thus we allow a zero subnet as long as the mask is non-zero.
2985 	 */
2986 	if (stq != NULL && !(ipif->ipif_flags & IPIF_NOXMIT) &&
2987 	    !(IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6subnet) &&
2988 	    IN6_IS_ADDR_UNSPECIFIED(&ipif->ipif_v6net_mask))) {
2989 		/* ipif_v6subnet is ipif_v6pp_dst_addr for pt-pt */
2990 		v6addr = ipif->ipif_v6subnet;
2991 
2992 		if (ipif->ipif_flags & IPIF_POINTOPOINT) {
2993 			route_mask = ipv6_all_ones;
2994 		} else {
2995 			route_mask = ipif->ipif_v6net_mask;
2996 		}
2997 
2998 		ip1dbg(("ipif_up_done_v6: creating if IRE %d for %s\n",
2999 		    ill->ill_net_type,
3000 		    inet_ntop(AF_INET6, &v6addr, buf, sizeof (buf))));
3001 
3002 		*irep++ = ire_create_v6(
3003 		    &v6addr,			/* dest pref */
3004 		    &route_mask,		/* mask */
3005 		    &src_ipif->ipif_v6src_addr,	/* src addr */
3006 		    NULL,			/* no gateway */
3007 		    &ipif->ipif_mtu,		/* max frag */
3008 		    NULL,			/* no src nce */
3009 		    NULL,			/* no recv from queue */
3010 		    stq,			/* send-to queue */
3011 		    ill->ill_net_type,		/* IF_[NO]RESOLVER */
3012 		    ipif,
3013 		    NULL,
3014 		    0,
3015 		    0,
3016 		    (ipif->ipif_flags & IPIF_PRIVATE) ? RTF_PRIVATE : 0,
3017 		    &ire_uinfo_null,
3018 		    NULL,
3019 		    NULL,
3020 		    ipst);
3021 	}
3022 
3023 	/*
3024 	 * Setup 2002::/16 route, if this interface is a 6to4 tunnel
3025 	 */
3026 	if (IN6_IS_ADDR_6TO4(&ipif->ipif_v6lcl_addr) &&
3027 	    (ill->ill_is_6to4tun)) {
3028 		/*
3029 		 * Destination address is 2002::/16
3030 		 */
3031 #ifdef	_BIG_ENDIAN
3032 		const in6_addr_t prefix_addr = { 0x20020000U, 0, 0, 0 };
3033 		const in6_addr_t prefix_mask = { 0xffff0000U, 0, 0, 0 };
3034 #else
3035 		const in6_addr_t prefix_addr = { 0x00000220U, 0, 0, 0 };
3036 		const in6_addr_t prefix_mask = { 0x0000ffffU, 0, 0, 0 };
3037 #endif /* _BIG_ENDIAN */
3038 		char	buf2[INET6_ADDRSTRLEN];
3039 		ire_t *isdup;
3040 		in6_addr_t *first_addr = &ill->ill_ipif->ipif_v6lcl_addr;
3041 
3042 		/*
3043 		 * check to see if this route has already been added for
3044 		 * this tunnel interface.
3045 		 */
3046 		isdup = ire_ftable_lookup_v6(first_addr, &prefix_mask, 0,
3047 		    IRE_IF_NORESOLVER, ill->ill_ipif, NULL, ALL_ZONES, 0, NULL,
3048 		    (MATCH_IRE_SRC | MATCH_IRE_MASK), ipst);
3049 
3050 		if (isdup == NULL) {
3051 			ip1dbg(("ipif_up_done_v6: creating if IRE %d for %s",
3052 			    IRE_IF_NORESOLVER, inet_ntop(AF_INET6, &v6addr,
3053 			    buf2, sizeof (buf2))));
3054 
3055 			*irep++ = ire_create_v6(
3056 			    &prefix_addr,		/* 2002:: */
3057 			    &prefix_mask,		/* ffff:: */
3058 			    &ipif->ipif_v6lcl_addr, 	/* src addr */
3059 			    NULL, 			/* gateway */
3060 			    &ipif->ipif_mtu, 		/* max_frag */
3061 			    NULL, 			/* no src nce */
3062 			    NULL, 			/* no rfq */
3063 			    ill->ill_wq, 		/* stq */
3064 			    IRE_IF_NORESOLVER,		/* type */
3065 			    ipif,			/* interface */
3066 			    NULL,			/* v6cmask */
3067 			    0,
3068 			    0,
3069 			    RTF_UP,
3070 			    &ire_uinfo_null,
3071 			    NULL,
3072 			    NULL,
3073 			    ipst);
3074 		} else {
3075 			ire_refrele(isdup);
3076 		}
3077 	}
3078 
3079 	/* If an earlier ire_create failed, get out now */
3080 	for (irep1 = irep; irep1 > ire_array; ) {
3081 		irep1--;
3082 		if (*irep1 == NULL) {
3083 			ip1dbg(("ipif_up_done_v6: NULL ire found in"
3084 			    " ire_array\n"));
3085 			err = ENOMEM;
3086 			goto bad;
3087 		}
3088 	}
3089 
3090 	ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
3091 
3092 	/*
3093 	 * Need to atomically check for ip_addr_availablity_check
3094 	 * now under ill_g_lock, and if it fails got bad, and remove
3095 	 * from group also
3096 	 */
3097 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
3098 	mutex_enter(&ipst->ips_ip_addr_avail_lock);
3099 	ill->ill_ipif_up_count++;
3100 	ipif->ipif_flags |= IPIF_UP;
3101 	err = ip_addr_availability_check(ipif);
3102 	mutex_exit(&ipst->ips_ip_addr_avail_lock);
3103 	rw_exit(&ipst->ips_ill_g_lock);
3104 
3105 	if (err != 0) {
3106 		/*
3107 		 * Our address may already be up on the same ill. In this case,
3108 		 * the external resolver entry for our ipif replaced the one for
3109 		 * the other ipif. So we don't want to delete it (otherwise the
3110 		 * other ipif would be unable to send packets).
3111 		 * ip_addr_availability_check() identifies this case for us and
3112 		 * returns EADDRINUSE; we need to turn it into EADDRNOTAVAIL
3113 		 * which is the expected error code.
3114 		 */
3115 		if (err == EADDRINUSE) {
3116 			if (ipif->ipif_ill->ill_flags & ILLF_XRESOLV) {
3117 				freemsg(ipif->ipif_arp_del_mp);
3118 				ipif->ipif_arp_del_mp = NULL;
3119 			}
3120 			err = EADDRNOTAVAIL;
3121 		}
3122 		ill->ill_ipif_up_count--;
3123 		ipif->ipif_flags &= ~IPIF_UP;
3124 		goto bad;
3125 	}
3126 
3127 	/*
3128 	 * Add in all newly created IREs. We want to add before
3129 	 * we call ifgrp_insert which wants to know whether
3130 	 * IRE_IF_RESOLVER exists or not.
3131 	 *
3132 	 * NOTE : We refrele the ire though we may branch to "bad"
3133 	 *	  later on where we do ire_delete. This is okay
3134 	 *	  because nobody can delete it as we are running
3135 	 *	  exclusively.
3136 	 */
3137 	for (irep1 = irep; irep1 > ire_array; ) {
3138 		irep1--;
3139 		/* Shouldn't be adding any bcast ire's */
3140 		ASSERT((*irep1)->ire_type != IRE_BROADCAST);
3141 		ASSERT(!MUTEX_HELD(&ipif->ipif_ill->ill_lock));
3142 		/*
3143 		 * refheld by ire_add. refele towards the end of the func
3144 		 */
3145 		(void) ire_add(irep1, NULL, NULL, NULL, B_FALSE);
3146 	}
3147 	if (ip6_asp_table_held) {
3148 		ip6_asp_table_refrele(ipst);
3149 		ip6_asp_table_held = B_FALSE;
3150 	}
3151 	ire_added = B_TRUE;
3152 
3153 	/*
3154 	 * Form groups if possible.
3155 	 *
3156 	 * If we are supposed to be in a ill_group with a name, insert it
3157 	 * now as we know that at least one ipif is UP. Otherwise form
3158 	 * nameless groups.
3159 	 *
3160 	 * If ip_enable_group_ifs is set and ipif address is not ::0, insert
3161 	 * this ipif into the appropriate interface group, or create a
3162 	 * new one. If this is already in a nameless group, we try to form
3163 	 * a bigger group looking at other ills potentially sharing this
3164 	 * ipif's prefix.
3165 	 */
3166 	phyi = ill->ill_phyint;
3167 	if (phyi->phyint_groupname_len != 0) {
3168 		ASSERT(phyi->phyint_groupname != NULL);
3169 		if (ill->ill_ipif_up_count == 1) {
3170 			ASSERT(ill->ill_group == NULL);
3171 			err = illgrp_insert(&ipst->ips_illgrp_head_v6, ill,
3172 			    phyi->phyint_groupname, NULL, B_TRUE);
3173 			if (err != 0) {
3174 				ip1dbg(("ipif_up_done_v6: illgrp allocation "
3175 				    "failed, error %d\n", err));
3176 				goto bad;
3177 			}
3178 		}
3179 		ASSERT(ill->ill_group != NULL);
3180 	}
3181 
3182 	/* Recover any additional IRE_IF_[NO]RESOLVER entries for this ipif */
3183 	ipif_saved_ire_cnt = ipif->ipif_saved_ire_cnt;
3184 	ipif_saved_irep = ipif_recover_ire_v6(ipif);
3185 
3186 	if (ill->ill_need_recover_multicast) {
3187 		/*
3188 		 * Need to recover all multicast memberships in the driver.
3189 		 * This had to be deferred until we had attached.
3190 		 */
3191 		ill_recover_multicast(ill);
3192 	}
3193 	/* Join the allhosts multicast address and the solicited node MC */
3194 	ipif_multicast_up(ipif);
3195 
3196 	if (!loopback) {
3197 		/*
3198 		 * See whether anybody else would benefit from the
3199 		 * new ipif that we added. We call this always rather
3200 		 * than while adding a non-IPIF_NOLOCAL/DEPRECATED/ANYCAST
3201 		 * ipif for the benefit of illgrp_insert (done above)
3202 		 * which does not do source address selection as it does
3203 		 * not want to re-create interface routes that we are
3204 		 * having reference to it here.
3205 		 */
3206 		ill_update_source_selection(ill);
3207 	}
3208 
3209 	for (irep1 = irep; irep1 > ire_array; ) {
3210 		irep1--;
3211 		if (*irep1 != NULL) {
3212 			/* was held in ire_add */
3213 			ire_refrele(*irep1);
3214 		}
3215 	}
3216 
3217 	cnt = ipif_saved_ire_cnt;
3218 	for (irep1 = ipif_saved_irep; cnt > 0; irep1++, cnt--) {
3219 		if (*irep1 != NULL) {
3220 			/* was held in ire_add */
3221 			ire_refrele(*irep1);
3222 		}
3223 	}
3224 
3225 	if (ipif->ipif_addr_ready)
3226 		ipif_up_notify(ipif);
3227 
3228 	if (ipif_saved_irep != NULL) {
3229 		kmem_free(ipif_saved_irep,
3230 		    ipif_saved_ire_cnt * sizeof (ire_t *));
3231 	}
3232 
3233 	if (src_ipif_held)
3234 		ipif_refrele(src_ipif);
3235 
3236 	return (0);
3237 
3238 bad:
3239 	if (ip6_asp_table_held)
3240 		ip6_asp_table_refrele(ipst);
3241 	/*
3242 	 * We don't have to bother removing from ill groups because
3243 	 *
3244 	 * 1) For groups with names, we insert only when the first ipif
3245 	 *    comes up. In that case if it fails, it will not be in any
3246 	 *    group. So, we need not try to remove for that case.
3247 	 *
3248 	 * 2) For groups without names, either we tried to insert ipif_ill
3249 	 *    in a group as singleton or found some other group to become
3250 	 *    a bigger group. For the former, if it fails we don't have
3251 	 *    anything to do as ipif_ill is not in the group and for the
3252 	 *    latter, there are no failures in illgrp_insert/illgrp_delete
3253 	 *    (ENOMEM can't occur for this. Check ifgrp_insert).
3254 	 */
3255 
3256 	while (irep > ire_array) {
3257 		irep--;
3258 		if (*irep != NULL) {
3259 			ire_delete(*irep);
3260 			if (ire_added)
3261 				ire_refrele(*irep);
3262 		}
3263 
3264 	}
3265 	(void) ip_srcid_remove(&ipif->ipif_v6lcl_addr, ipif->ipif_zoneid, ipst);
3266 
3267 	if (ipif_saved_irep != NULL) {
3268 		kmem_free(ipif_saved_irep,
3269 		    ipif_saved_ire_cnt * sizeof (ire_t *));
3270 	}
3271 	if (src_ipif_held)
3272 		ipif_refrele(src_ipif);
3273 
3274 	ipif_ndp_down(ipif);
3275 	if (ipif->ipif_ill->ill_flags & ILLF_XRESOLV)
3276 		ipif_arp_down(ipif);
3277 
3278 	return (err);
3279 }
3280 
3281 /*
3282  * Delete an ND entry and the corresponding IRE_CACHE entry if it exists.
3283  */
3284 /* ARGSUSED */
3285 int
3286 ip_siocdelndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
3287     ip_ioctl_cmd_t *ipip, void *dummy_ifreq)
3288 {
3289 	in6_addr_t	addr;
3290 	sin6_t		*sin6;
3291 	nce_t		*nce;
3292 	struct lifreq	*lifr;
3293 	lif_nd_req_t	*lnr;
3294 	mblk_t	*mp1;
3295 
3296 	mp1 = mp->b_cont->b_cont;
3297 	lifr = (struct lifreq *)mp1->b_rptr;
3298 	lnr = &lifr->lifr_nd;
3299 	/* Only allow for logical unit zero i.e. not on "le0:17" */
3300 	if (ipif->ipif_id != 0)
3301 		return (EINVAL);
3302 
3303 	if (!ipif->ipif_isv6)
3304 		return (EINVAL);
3305 
3306 	if (lnr->lnr_addr.ss_family != AF_INET6)
3307 		return (EAFNOSUPPORT);
3308 
3309 	sin6 = (sin6_t *)&lnr->lnr_addr;
3310 	addr = sin6->sin6_addr;
3311 	nce = ndp_lookup_v6(ipif->ipif_ill, &addr, B_FALSE);
3312 	if (nce == NULL)
3313 		return (ESRCH);
3314 	ndp_delete(nce);
3315 	NCE_REFRELE(nce);
3316 	return (0);
3317 }
3318 
3319 /*
3320  * Return nbr cache info.
3321  */
3322 /* ARGSUSED */
3323 int
3324 ip_siocqueryndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
3325     ip_ioctl_cmd_t *ipip, void *dummy_ifreq)
3326 {
3327 	ill_t		*ill = ipif->ipif_ill;
3328 	struct lifreq	*lifr;
3329 	lif_nd_req_t	*lnr;
3330 
3331 	lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr;
3332 	lnr = &lifr->lifr_nd;
3333 	/* Only allow for logical unit zero i.e. not on "le0:17" */
3334 	if (ipif->ipif_id != 0)
3335 		return (EINVAL);
3336 
3337 	if (!ipif->ipif_isv6)
3338 		return (EINVAL);
3339 
3340 	if (lnr->lnr_addr.ss_family != AF_INET6)
3341 		return (EAFNOSUPPORT);
3342 
3343 	if (ill->ill_phys_addr_length > sizeof (lnr->lnr_hdw_addr))
3344 		return (EINVAL);
3345 
3346 	return (ndp_query(ill, lnr));
3347 }
3348 
3349 /*
3350  * Perform an update of the nd entry for the specified address.
3351  */
3352 /* ARGSUSED */
3353 int
3354 ip_siocsetndp_v6(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
3355     ip_ioctl_cmd_t *ipip, void *dummy_ifreq)
3356 {
3357 	ill_t		*ill = ipif->ipif_ill;
3358 	struct	lifreq	*lifr;
3359 	lif_nd_req_t	*lnr;
3360 
3361 	ASSERT(!(q->q_flag & QREADR) && q->q_next == NULL);
3362 
3363 	lifr = (struct lifreq *)mp->b_cont->b_cont->b_rptr;
3364 	lnr = &lifr->lifr_nd;
3365 	/* Only allow for logical unit zero i.e. not on "le0:17" */
3366 	if (ipif->ipif_id != 0)
3367 		return (EINVAL);
3368 
3369 	if (!ipif->ipif_isv6)
3370 		return (EINVAL);
3371 
3372 	if (lnr->lnr_addr.ss_family != AF_INET6)
3373 		return (EAFNOSUPPORT);
3374 
3375 	return (ndp_sioc_update(ill, lnr));
3376 }
3377