xref: /titanic_52/usr/src/uts/common/inet/ip/ip6_ire.c (revision 53a7b6b6763f5865522a76e5e887390a8f4777d7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 1990 Mentat Inc.
27  */
28 
29 /*
30  * This file contains routines that manipulate Internet Routing Entries (IREs).
31  */
32 #include <sys/types.h>
33 #include <sys/stream.h>
34 #include <sys/stropts.h>
35 #include <sys/ddi.h>
36 #include <sys/cmn_err.h>
37 
38 #include <sys/systm.h>
39 #include <sys/param.h>
40 #include <sys/socket.h>
41 #include <net/if.h>
42 #include <net/route.h>
43 #include <netinet/in.h>
44 #include <net/if_dl.h>
45 #include <netinet/ip6.h>
46 #include <netinet/icmp6.h>
47 
48 #include <inet/common.h>
49 #include <inet/mi.h>
50 #include <inet/ip.h>
51 #include <inet/ip6.h>
52 #include <inet/ip_ndp.h>
53 #include <inet/ip_if.h>
54 #include <inet/ip_ire.h>
55 #include <inet/ipclassifier.h>
56 #include <inet/nd.h>
57 #include <sys/kmem.h>
58 #include <sys/zone.h>
59 
60 #include <sys/tsol/label.h>
61 #include <sys/tsol/tnet.h>
62 
63 static	ire_t	ire_null;
64 
65 static ire_t	*ire_ihandle_lookup_onlink_v6(ire_t *cire);
66 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr,
67     const in6_addr_t *mask, const in6_addr_t *gateway, int type,
68     const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle,
69     const ts_label_t *tsl, int match_flags);
70 static	ire_t	*ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *,
71     const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *,
72     ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t,
73     const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
74 static	ire_t	*ip6_ctable_lookup_impl(ire_ctable_args_t *);
75 
76 
77 /*
78  * Initialize the ire that is specific to IPv6 part and call
79  * ire_init_common to finish it.
80  */
81 static ire_t *
82 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask,
83     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
84     uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type,
85     ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle,
86     uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc,
87     tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
88 {
89 
90 	/*
91 	 * Reject IRE security attribute creation/initialization
92 	 * if system is not running in Trusted mode.
93 	 */
94 	if ((gc != NULL || gcgrp != NULL) && !is_system_labeled())
95 		return (NULL);
96 
97 
98 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced);
99 	ire->ire_addr_v6 = *v6addr;
100 
101 	if (v6src_addr != NULL)
102 		ire->ire_src_addr_v6 = *v6src_addr;
103 	if (v6mask != NULL) {
104 		ire->ire_mask_v6 = *v6mask;
105 		ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6);
106 	}
107 	if (v6gateway != NULL)
108 		ire->ire_gateway_addr_v6 = *v6gateway;
109 
110 	if (type == IRE_CACHE && v6cmask != NULL)
111 		ire->ire_cmask_v6 = *v6cmask;
112 
113 	/*
114 	 * Multirouted packets need to have a fragment header added so that
115 	 * the receiver is able to discard duplicates according to their
116 	 * fragment identifier.
117 	 */
118 	if (type == IRE_CACHE && (flags & RTF_MULTIRT)) {
119 		ire->ire_frag_flag = IPH_FRAG_HDR;
120 	}
121 
122 	/* ire_init_common will free the mblks upon encountering any failure */
123 	if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif,
124 	    phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst))
125 		return (NULL);
126 
127 	return (ire);
128 }
129 
130 /*
131  * Similar to ire_create_v6 except that it is called only when
132  * we want to allocate ire as an mblk e.g. we have a external
133  * resolver. Do we need this in IPv6 ?
134  *
135  * IPv6 initializes the ire_nce in ire_add_v6, which expects to
136  * find the ire_nce to be null when it is called. So, although
137  * we have a src_nce parameter (in the interest of matching up with
138  * the argument list of the v4 version), we ignore the src_nce
139  * argument here.
140  */
141 /* ARGSUSED */
142 ire_t *
143 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
144     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
145     nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type,
146     ipif_t *ipif, const in6_addr_t *v6cmask,
147     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
148     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
149 {
150 	ire_t	*ire;
151 	ire_t	*ret_ire;
152 	mblk_t	*mp;
153 
154 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
155 
156 	/* Allocate the new IRE. */
157 	mp = allocb(sizeof (ire_t), BPRI_MED);
158 	if (mp == NULL) {
159 		ip1dbg(("ire_create_mp_v6: alloc failed\n"));
160 		return (NULL);
161 	}
162 
163 	ire = (ire_t *)mp->b_rptr;
164 	mp->b_wptr = (uchar_t *)&ire[1];
165 
166 	/* Start clean. */
167 	*ire = ire_null;
168 	ire->ire_mp = mp;
169 	mp->b_datap->db_type = IRE_DB_TYPE;
170 
171 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
172 	    NULL, rfq, stq, type, ipif, v6cmask, phandle,
173 	    ihandle, flags, ulp_info, gc, gcgrp, ipst);
174 
175 	if (ret_ire == NULL) {
176 		freeb(ire->ire_mp);
177 		return (NULL);
178 	}
179 	return (ire);
180 }
181 
182 /*
183  * ire_create_v6 is called to allocate and initialize a new IRE.
184  *
185  * NOTE : This is called as writer sometimes though not required
186  * by this function.
187  *
188  * See comments above ire_create_mp_v6() for the rationale behind the
189  * unused src_nce argument.
190  */
191 /* ARGSUSED */
192 ire_t *
193 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
194     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
195     uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq,
196     ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask,
197     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
198     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
199 {
200 	ire_t	*ire;
201 	ire_t	*ret_ire;
202 
203 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
204 
205 	ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
206 	if (ire == NULL) {
207 		ip1dbg(("ire_create_v6: alloc failed\n"));
208 		return (NULL);
209 	}
210 	*ire = ire_null;
211 
212 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
213 	    max_fragp, rfq, stq, type, ipif, v6cmask, phandle,
214 	    ihandle, flags, ulp_info, gc, gcgrp, ipst);
215 
216 	if (ret_ire == NULL) {
217 		kmem_cache_free(ire_cache, ire);
218 		return (NULL);
219 	}
220 	ASSERT(ret_ire == ire);
221 	return (ire);
222 }
223 
224 /*
225  * Find an IRE_INTERFACE for the multicast group.
226  * Allows different routes for multicast addresses
227  * in the unicast routing table (akin to FF::0/8 but could be more specific)
228  * which point at different interfaces. This is used when IPV6_MULTICAST_IF
229  * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't
230  * specify the interface to join on.
231  *
232  * Supports link-local addresses by following the ipif/ill when recursing.
233  */
234 ire_t *
235 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst)
236 {
237 	ire_t	*ire;
238 	ipif_t	*ipif = NULL;
239 	int	match_flags = MATCH_IRE_TYPE;
240 	in6_addr_t gw_addr_v6;
241 
242 	ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL,
243 	    zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst);
244 
245 	/* We search a resolvable ire in case of multirouting. */
246 	if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) {
247 		ire_t *cire = NULL;
248 		/*
249 		 * If the route is not resolvable, the looked up ire
250 		 * may be changed here. In that case, ire_multirt_lookup()
251 		 * IRE_REFRELE the original ire and change it.
252 		 */
253 		(void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW,
254 		    NULL, ipst);
255 		if (cire != NULL)
256 			ire_refrele(cire);
257 	}
258 	if (ire == NULL)
259 		return (NULL);
260 	/*
261 	 * Make sure we follow ire_ipif.
262 	 *
263 	 * We need to determine the interface route through
264 	 * which the gateway will be reached. We don't really
265 	 * care which interface is picked if the interface is
266 	 * part of a group.
267 	 */
268 	if (ire->ire_ipif != NULL) {
269 		ipif = ire->ire_ipif;
270 		match_flags |= MATCH_IRE_ILL_GROUP;
271 	}
272 
273 	switch (ire->ire_type) {
274 	case IRE_DEFAULT:
275 	case IRE_PREFIX:
276 	case IRE_HOST:
277 		mutex_enter(&ire->ire_lock);
278 		gw_addr_v6 = ire->ire_gateway_addr_v6;
279 		mutex_exit(&ire->ire_lock);
280 		ire_refrele(ire);
281 		ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0,
282 		    IRE_INTERFACE, ipif, NULL, zoneid, 0,
283 		    NULL, match_flags, ipst);
284 		return (ire);
285 	case IRE_IF_NORESOLVER:
286 	case IRE_IF_RESOLVER:
287 		return (ire);
288 	default:
289 		ire_refrele(ire);
290 		return (NULL);
291 	}
292 }
293 
294 /*
295  * Return any local address.  We use this to target ourselves
296  * when the src address was specified as 'default'.
297  * Preference for IRE_LOCAL entries.
298  */
299 ire_t *
300 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst)
301 {
302 	ire_t	*ire;
303 	irb_t	*irb;
304 	ire_t	*maybe = NULL;
305 	int i;
306 
307 	for (i = 0; i < ipst->ips_ip6_cache_table_size;  i++) {
308 		irb = &ipst->ips_ip_cache_table_v6[i];
309 		if (irb->irb_ire == NULL)
310 			continue;
311 		rw_enter(&irb->irb_lock, RW_READER);
312 		for (ire = irb->irb_ire; ire; ire = ire->ire_next) {
313 			if ((ire->ire_marks & IRE_MARK_CONDEMNED) ||
314 			    ire->ire_zoneid != zoneid &&
315 			    ire->ire_zoneid != ALL_ZONES)
316 				continue;
317 			switch (ire->ire_type) {
318 			case IRE_LOOPBACK:
319 				if (maybe == NULL) {
320 					IRE_REFHOLD(ire);
321 					maybe = ire;
322 				}
323 				break;
324 			case IRE_LOCAL:
325 				if (maybe != NULL) {
326 					ire_refrele(maybe);
327 				}
328 				IRE_REFHOLD(ire);
329 				rw_exit(&irb->irb_lock);
330 				return (ire);
331 			}
332 		}
333 		rw_exit(&irb->irb_lock);
334 	}
335 	return (maybe);
336 }
337 
338 /*
339  * This function takes a mask and returns number of bits set in the
340  * mask (the represented prefix length).  Assumes a contiguous mask.
341  */
342 int
343 ip_mask_to_plen_v6(const in6_addr_t *v6mask)
344 {
345 	int		bits;
346 	int		plen = IPV6_ABITS;
347 	int		i;
348 
349 	for (i = 3; i >= 0; i--) {
350 		if (v6mask->s6_addr32[i] == 0) {
351 			plen -= 32;
352 			continue;
353 		}
354 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
355 		if (bits == 0)
356 			break;
357 		plen -= bits;
358 	}
359 
360 	return (plen);
361 }
362 
363 /*
364  * Convert a prefix length to the mask for that prefix.
365  * Returns the argument bitmask.
366  */
367 in6_addr_t *
368 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
369 {
370 	uint32_t *ptr;
371 
372 	if (plen < 0 || plen > IPV6_ABITS)
373 		return (NULL);
374 	*bitmask = ipv6_all_zeros;
375 
376 	ptr = (uint32_t *)bitmask;
377 	while (plen > 32) {
378 		*ptr++ = 0xffffffffU;
379 		plen -= 32;
380 	}
381 	*ptr = htonl(0xffffffffU << (32 - plen));
382 	return (bitmask);
383 }
384 
385 /*
386  * Add a fully initialized IRE to an appropriate
387  * table based on ire_type.
388  *
389  * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and
390  * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT.
391  *
392  * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK
393  * and IRE_CACHE.
394  *
395  * NOTE : This function is called as writer though not required
396  * by this function.
397  */
398 int
399 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func)
400 {
401 	ire_t	*ire1;
402 	int	mask_table_index;
403 	irb_t	*irb_ptr;
404 	ire_t	**irep;
405 	int	flags;
406 	ire_t	*pire = NULL;
407 	ill_t	*stq_ill;
408 	boolean_t	ndp_g_lock_held = B_FALSE;
409 	ire_t	*ire = *ire_p;
410 	int	error;
411 	ip_stack_t	*ipst = ire->ire_ipst;
412 
413 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
414 	ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */
415 	ASSERT(ire->ire_nce == NULL);
416 
417 	/* Find the appropriate list head. */
418 	switch (ire->ire_type) {
419 	case IRE_HOST:
420 		ire->ire_mask_v6 = ipv6_all_ones;
421 		ire->ire_masklen = IPV6_ABITS;
422 		if ((ire->ire_flags & RTF_SETSRC) == 0)
423 			ire->ire_src_addr_v6 = ipv6_all_zeros;
424 		break;
425 	case IRE_CACHE:
426 	case IRE_LOCAL:
427 	case IRE_LOOPBACK:
428 		ire->ire_mask_v6 = ipv6_all_ones;
429 		ire->ire_masklen = IPV6_ABITS;
430 		break;
431 	case IRE_PREFIX:
432 		if ((ire->ire_flags & RTF_SETSRC) == 0)
433 			ire->ire_src_addr_v6 = ipv6_all_zeros;
434 		break;
435 	case IRE_DEFAULT:
436 		if ((ire->ire_flags & RTF_SETSRC) == 0)
437 			ire->ire_src_addr_v6 = ipv6_all_zeros;
438 		break;
439 	case IRE_IF_RESOLVER:
440 	case IRE_IF_NORESOLVER:
441 		break;
442 	default:
443 		printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n",
444 		    (void *)ire, ire->ire_type);
445 		ire_delete(ire);
446 		*ire_p = NULL;
447 		return (EINVAL);
448 	}
449 
450 	/* Make sure the address is properly masked. */
451 	V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6);
452 
453 	if ((ire->ire_type & IRE_CACHETABLE) == 0) {
454 		/* IRE goes into Forward Table */
455 		mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6);
456 		if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) ==
457 		    NULL) {
458 			irb_t *ptr;
459 			int i;
460 
461 			ptr = (irb_t *)mi_zalloc((
462 			    ipst->ips_ip6_ftable_hash_size * sizeof (irb_t)));
463 			if (ptr == NULL) {
464 				ire_delete(ire);
465 				*ire_p = NULL;
466 				return (ENOMEM);
467 			}
468 			for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
469 				rw_init(&ptr[i].irb_lock, NULL,
470 				    RW_DEFAULT, NULL);
471 			}
472 			mutex_enter(&ipst->ips_ire_ft_init_lock);
473 			if (ipst->ips_ip_forwarding_table_v6[
474 			    mask_table_index] == NULL) {
475 				ipst->ips_ip_forwarding_table_v6[
476 				    mask_table_index] = ptr;
477 				mutex_exit(&ipst->ips_ire_ft_init_lock);
478 			} else {
479 				/*
480 				 * Some other thread won the race in
481 				 * initializing the forwarding table at the
482 				 * same index.
483 				 */
484 				mutex_exit(&ipst->ips_ire_ft_init_lock);
485 				for (i = 0; i < ipst->ips_ip6_ftable_hash_size;
486 				    i++) {
487 					rw_destroy(&ptr[i].irb_lock);
488 				}
489 				mi_free(ptr);
490 			}
491 		}
492 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][
493 		    IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6,
494 		    ipst->ips_ip6_ftable_hash_size)]);
495 	} else {
496 		irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(
497 		    ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]);
498 	}
499 	/*
500 	 * For xresolv interfaces (v6 interfaces with an external
501 	 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6
502 	 * are unable to prevent the deletion of the interface route
503 	 * while adding an IRE_CACHE for an on-link destination
504 	 * in the IRE_IF_RESOLVER case, since the ire has to go to
505 	 * the external resolver and return. We can't do a REFHOLD on the
506 	 * associated interface ire for fear of the message being freed
507 	 * if the external resolver can't resolve the address.
508 	 * Here we look up the interface ire in the forwarding table
509 	 * and make sure that the interface route has not been deleted.
510 	 */
511 	if (ire->ire_type == IRE_CACHE &&
512 	    IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) &&
513 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) &&
514 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) {
515 
516 		pire = ire_ihandle_lookup_onlink_v6(ire);
517 		if (pire == NULL) {
518 			ire_delete(ire);
519 			*ire_p = NULL;
520 			return (EINVAL);
521 		}
522 		/* Prevent pire from getting deleted */
523 		IRB_REFHOLD(pire->ire_bucket);
524 		/* Has it been removed already? */
525 		if (pire->ire_marks & IRE_MARK_CONDEMNED) {
526 			IRB_REFRELE(pire->ire_bucket);
527 			ire_refrele(pire);
528 			ire_delete(ire);
529 			*ire_p = NULL;
530 			return (EINVAL);
531 		}
532 	}
533 
534 	flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW);
535 	/*
536 	 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check
537 	 * for duplicates because :
538 	 *
539 	 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be
540 	 *    pointing at different ills. A real duplicate is
541 	 *    a match on both ire_ipif and ire_stq.
542 	 *
543 	 * 2) We could have multiple packets trying to create
544 	 *    an IRE_CACHE for the same ill.
545 	 *
546 	 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants
547 	 * to go out on a particular ill. Rather than looking at the
548 	 * packet, we depend on the above for MATCH_IRE_ILL here.
549 	 *
550 	 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have
551 	 * multiple IRE_CACHES for an ill for the same destination
552 	 * with various scoped addresses i.e represented by ipifs.
553 	 *
554 	 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES.
555 	 */
556 	if (ire->ire_ipif != NULL)
557 		flags |= MATCH_IRE_IPIF;
558 	/*
559 	 * If we are creating hidden ires, make sure we search on
560 	 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are
561 	 * searching for duplicates below. Otherwise we could
562 	 * potentially find an IRE on some other interface
563 	 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We
564 	 * shouldn't do this as this will lead to an infinite loop as
565 	 * eventually we need an hidden ire for this packet to go
566 	 * out. MATCH_IRE_ILL is already marked above.
567 	 */
568 	if (ire->ire_marks & IRE_MARK_HIDDEN) {
569 		ASSERT(ire->ire_type == IRE_CACHE);
570 		flags |= MATCH_IRE_MARK_HIDDEN;
571 	}
572 
573 	/*
574 	 * Start the atomic add of the ire. Grab the ill locks,
575 	 * ill_g_usesrc_lock and the bucket lock. Check for condemned.
576 	 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself.
577 	 */
578 	if (ire->ire_type == IRE_CACHE) {
579 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
580 		ndp_g_lock_held = B_TRUE;
581 	}
582 
583 	/*
584 	 * If ipif or ill is changing ire_atomic_start() may queue the
585 	 * request and return EINPROGRESS.
586 	 */
587 
588 	error = ire_atomic_start(irb_ptr, ire, q, mp, func);
589 	if (error != 0) {
590 		if (ndp_g_lock_held)
591 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
592 		/*
593 		 * We don't know whether it is a valid ipif or not.
594 		 * So, set it to NULL. This assumes that the ire has not added
595 		 * a reference to the ipif.
596 		 */
597 		ire->ire_ipif = NULL;
598 		ire_delete(ire);
599 		if (pire != NULL) {
600 			IRB_REFRELE(pire->ire_bucket);
601 			ire_refrele(pire);
602 		}
603 		*ire_p = NULL;
604 		return (error);
605 	}
606 	/*
607 	 * To avoid creating ires having stale values for the ire_max_frag
608 	 * we get the latest value atomically here. For more details
609 	 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE
610 	 * in ip_rput_dlpi_writer
611 	 */
612 	if (ire->ire_max_fragp == NULL) {
613 		if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6))
614 			ire->ire_max_frag = ire->ire_ipif->ipif_mtu;
615 		else
616 			ire->ire_max_frag = pire->ire_max_frag;
617 	} else {
618 		uint_t  max_frag;
619 
620 		max_frag = *ire->ire_max_fragp;
621 		ire->ire_max_fragp = NULL;
622 		ire->ire_max_frag = max_frag;
623 	}
624 
625 	/*
626 	 * Atomically check for duplicate and insert in the table.
627 	 */
628 	for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
629 		if (ire1->ire_marks & IRE_MARK_CONDEMNED)
630 			continue;
631 
632 		if (ire->ire_type == IRE_CACHE) {
633 			/*
634 			 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES.
635 			 * As ire_ipif and ire_stq could point to two
636 			 * different ills, we can't pass just ire_ipif to
637 			 * ire_match_args and get a match on both ills.
638 			 * This is just needed for duplicate checks here and
639 			 * so we don't add an extra argument to
640 			 * ire_match_args for this. Do it locally.
641 			 *
642 			 * NOTE : Currently there is no part of the code
643 			 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL
644 			 * match for IRE_CACHEs. Thus we don't want to
645 			 * extend the arguments to ire_match_args_v6.
646 			 */
647 			if (ire1->ire_stq != ire->ire_stq)
648 				continue;
649 			/*
650 			 * Multiroute IRE_CACHEs for a given destination can
651 			 * have the same ire_ipif, typically if their source
652 			 * address is forced using RTF_SETSRC, and the same
653 			 * send-to queue. We differentiate them using the parent
654 			 * handle.
655 			 */
656 			if ((ire1->ire_flags & RTF_MULTIRT) &&
657 			    (ire->ire_flags & RTF_MULTIRT) &&
658 			    (ire1->ire_phandle != ire->ire_phandle))
659 				continue;
660 		}
661 		if (ire1->ire_zoneid != ire->ire_zoneid)
662 			continue;
663 		if (ire_match_args_v6(ire1, &ire->ire_addr_v6,
664 		    &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
665 		    ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL,
666 		    flags)) {
667 			/*
668 			 * Return the old ire after doing a REFHOLD.
669 			 * As most of the callers continue to use the IRE
670 			 * after adding, we return a held ire. This will
671 			 * avoid a lookup in the caller again. If the callers
672 			 * don't want to use it, they need to do a REFRELE.
673 			 */
674 			ip1dbg(("found dup ire existing %p new %p",
675 			    (void *)ire1, (void *)ire));
676 			IRE_REFHOLD(ire1);
677 			if (ndp_g_lock_held)
678 				mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
679 			ire_atomic_end(irb_ptr, ire);
680 			ire_delete(ire);
681 			if (pire != NULL) {
682 				/*
683 				 * Assert that it is
684 				 * not yet removed from the list.
685 				 */
686 				ASSERT(pire->ire_ptpn != NULL);
687 				IRB_REFRELE(pire->ire_bucket);
688 				ire_refrele(pire);
689 			}
690 			*ire_p = ire1;
691 			return (0);
692 		}
693 	}
694 	if (ire->ire_type == IRE_CACHE) {
695 		in6_addr_t gw_addr_v6;
696 		ill_t	*ill = ire_to_ill(ire);
697 		char	buf[INET6_ADDRSTRLEN];
698 		nce_t	*nce;
699 
700 		/*
701 		 * All IRE_CACHE types must have a nce.  If this is
702 		 * not the case the entry will not be added. We need
703 		 * to make sure that if somebody deletes the nce
704 		 * after we looked up, they will find this ire and
705 		 * delete the ire. To delete this ire one needs the
706 		 * bucket lock which we are still holding here. So,
707 		 * even if the nce gets deleted after we looked up,
708 		 * this ire  will get deleted.
709 		 *
710 		 * NOTE : Don't need the ire_lock for accessing
711 		 * ire_gateway_addr_v6 as it is appearing first
712 		 * time on the list and rts_setgwr_v6 could not
713 		 * be changing this.
714 		 */
715 		gw_addr_v6 = ire->ire_gateway_addr_v6;
716 		if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) {
717 			nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE);
718 		} else {
719 			nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE);
720 		}
721 		if (nce == NULL)
722 			goto failed;
723 
724 		/* Pair of refhold, refrele just to get the tracing right */
725 		NCE_REFHOLD_TO_REFHOLD_NOTR(nce);
726 		/*
727 		 * Atomically make sure that new IREs don't point
728 		 * to an NCE that is logically deleted (CONDEMNED).
729 		 * ndp_delete() first marks the NCE CONDEMNED.
730 		 * This ensures that the nce_refcnt won't increase
731 		 * due to new nce_lookups or due to addition of new IREs
732 		 * pointing to this NCE. Then ndp_delete() cleans up
733 		 * existing references. If we don't do it atomically here,
734 		 * ndp_delete() -> nce_ire_delete() will not be able to
735 		 * clean up the IRE list completely, and the nce_refcnt
736 		 * won't go down to zero.
737 		 */
738 		mutex_enter(&nce->nce_lock);
739 		if (ill->ill_flags & ILLF_XRESOLV) {
740 			/*
741 			 * If we used an external resolver, we may not
742 			 * have gone through neighbor discovery to get here.
743 			 * Must update the nce_state before the next check.
744 			 */
745 			if (nce->nce_state == ND_INCOMPLETE)
746 				nce->nce_state = ND_REACHABLE;
747 		}
748 		if (nce->nce_state == ND_INCOMPLETE ||
749 		    (nce->nce_flags & NCE_F_CONDEMNED) ||
750 		    (nce->nce_state == ND_UNREACHABLE)) {
751 failed:
752 			if (ndp_g_lock_held)
753 				mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
754 			if (nce != NULL)
755 				mutex_exit(&nce->nce_lock);
756 			ire_atomic_end(irb_ptr, ire);
757 			ip1dbg(("ire_add_v6: No nce for dst %s \n",
758 			    inet_ntop(AF_INET6, &ire->ire_addr_v6,
759 			    buf, sizeof (buf))));
760 			ire_delete(ire);
761 			if (pire != NULL) {
762 				/*
763 				 * Assert that it is
764 				 * not yet removed from the list.
765 				 */
766 				ASSERT(pire->ire_ptpn != NULL);
767 				IRB_REFRELE(pire->ire_bucket);
768 				ire_refrele(pire);
769 			}
770 			if (nce != NULL)
771 				NCE_REFRELE_NOTR(nce);
772 			*ire_p = NULL;
773 			return (EINVAL);
774 		} else {
775 			ire->ire_nce = nce;
776 		}
777 		mutex_exit(&nce->nce_lock);
778 	}
779 	/*
780 	 * Find the first entry that matches ire_addr - provides
781 	 * tail insertion. *irep will be null if no match.
782 	 */
783 	irep = (ire_t **)irb_ptr;
784 	while ((ire1 = *irep) != NULL &&
785 	    !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6))
786 		irep = &ire1->ire_next;
787 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
788 
789 	if (*irep != NULL) {
790 		/*
791 		 * Find the last ire which matches ire_addr_v6.
792 		 * Needed to do tail insertion among entries with the same
793 		 * ire_addr_v6.
794 		 */
795 		while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6,
796 		    &ire1->ire_addr_v6)) {
797 			irep = &ire1->ire_next;
798 			ire1 = *irep;
799 			if (ire1 == NULL)
800 				break;
801 		}
802 	}
803 
804 	if (ire->ire_type == IRE_DEFAULT) {
805 		/*
806 		 * We keep a count of default gateways which is used when
807 		 * assigning them as routes.
808 		 */
809 		ipst->ips_ipv6_ire_default_count++;
810 		ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */
811 	}
812 	/* Insert at *irep */
813 	ire1 = *irep;
814 	if (ire1 != NULL)
815 		ire1->ire_ptpn = &ire->ire_next;
816 	ire->ire_next = ire1;
817 	/* Link the new one in. */
818 	ire->ire_ptpn = irep;
819 	/*
820 	 * ire_walk routines de-reference ire_next without holding
821 	 * a lock. Before we point to the new ire, we want to make
822 	 * sure the store that sets the ire_next of the new ire
823 	 * reaches global visibility, so that ire_walk routines
824 	 * don't see a truncated list of ires i.e if the ire_next
825 	 * of the new ire gets set after we do "*irep = ire" due
826 	 * to re-ordering, the ire_walk thread will see a NULL
827 	 * once it accesses the ire_next of the new ire.
828 	 * membar_producer() makes sure that the following store
829 	 * happens *after* all of the above stores.
830 	 */
831 	membar_producer();
832 	*irep = ire;
833 	ire->ire_bucket = irb_ptr;
834 	/*
835 	 * We return a bumped up IRE above. Keep it symmetrical
836 	 * so that the callers will always have to release. This
837 	 * helps the callers of this function because they continue
838 	 * to use the IRE after adding and hence they don't have to
839 	 * lookup again after we return the IRE.
840 	 *
841 	 * NOTE : We don't have to use atomics as this is appearing
842 	 * in the list for the first time and no one else can bump
843 	 * up the reference count on this yet.
844 	 */
845 	IRE_REFHOLD_LOCKED(ire);
846 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted);
847 	irb_ptr->irb_ire_cnt++;
848 	if (ire->ire_marks & IRE_MARK_TEMPORARY)
849 		irb_ptr->irb_tmp_ire_cnt++;
850 
851 	if (ire->ire_ipif != NULL) {
852 		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ire->ire_ipif,
853 		    (char *), "ire", (void *), ire);
854 		ire->ire_ipif->ipif_ire_cnt++;
855 		if (ire->ire_stq != NULL) {
856 			stq_ill = (ill_t *)ire->ire_stq->q_ptr;
857 			DTRACE_PROBE3(ill__incr__cnt, (ill_t *), stq_ill,
858 			    (char *), "ire", (void *), ire);
859 			stq_ill->ill_ire_cnt++;
860 		}
861 	} else {
862 		ASSERT(ire->ire_stq == NULL);
863 	}
864 
865 	if (ndp_g_lock_held)
866 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
867 	ire_atomic_end(irb_ptr, ire);
868 
869 	if (pire != NULL) {
870 		/* Assert that it is not removed from the list yet */
871 		ASSERT(pire->ire_ptpn != NULL);
872 		IRB_REFRELE(pire->ire_bucket);
873 		ire_refrele(pire);
874 	}
875 
876 	if (ire->ire_type != IRE_CACHE) {
877 		/*
878 		 * For ire's with with host mask see if there is an entry
879 		 * in the cache. If there is one flush the whole cache as
880 		 * there might be multiple entries due to RTF_MULTIRT (CGTP).
881 		 * If no entry is found than there is no need to flush the
882 		 * cache.
883 		 */
884 
885 		if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) {
886 			ire_t *lire;
887 			lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL,
888 			    IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
889 			    ipst);
890 			if (lire != NULL) {
891 				ire_refrele(lire);
892 				ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
893 			}
894 		} else {
895 			ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
896 		}
897 	}
898 
899 	*ire_p = ire;
900 	return (0);
901 }
902 
903 /*
904  * Search for all HOST REDIRECT routes that are
905  * pointing at the specified gateway and
906  * delete them. This routine is called only
907  * when a default gateway is going away.
908  */
909 static void
910 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst)
911 {
912 	irb_t *irb_ptr;
913 	irb_t *irb;
914 	ire_t *ire;
915 	in6_addr_t gw_addr_v6;
916 	int i;
917 
918 	/* get the hash table for HOST routes */
919 	irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)];
920 	if (irb_ptr == NULL)
921 		return;
922 	for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) {
923 		irb = &irb_ptr[i];
924 		IRB_REFHOLD(irb);
925 		for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
926 			if (!(ire->ire_flags & RTF_DYNAMIC))
927 				continue;
928 			mutex_enter(&ire->ire_lock);
929 			gw_addr_v6 = ire->ire_gateway_addr_v6;
930 			mutex_exit(&ire->ire_lock);
931 			if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway))
932 				ire_delete(ire);
933 		}
934 		IRB_REFRELE(irb);
935 	}
936 }
937 
938 /*
939  * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart
940  * of ip_ire_clookup_and_delete. The difference being this function does not
941  * return any value. IPv6 processing of a gratuitous ARP, as it stands, is
942  * different than IPv4 in that, regardless of the presence of a cache entry
943  * for this address, an ire_walk_v6 is done. Another difference is that unlike
944  * in the case of IPv4 this does not take an ipif_t argument, since it is only
945  * called by ip_arp_news and the match is always only on the address.
946  */
947 void
948 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst)
949 {
950 	irb_t		*irb;
951 	ire_t		*cire;
952 	boolean_t	found = B_FALSE;
953 
954 	irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
955 	    ipst->ips_ip6_cache_table_size)];
956 	IRB_REFHOLD(irb);
957 	for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) {
958 		if (cire->ire_marks & IRE_MARK_CONDEMNED)
959 			continue;
960 		if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) {
961 
962 			/* This signifies start of a match */
963 			if (!found)
964 				found = B_TRUE;
965 			if (cire->ire_type == IRE_CACHE) {
966 				if (cire->ire_nce != NULL)
967 					ndp_delete(cire->ire_nce);
968 				ire_delete_v6(cire);
969 			}
970 		/* End of the match */
971 		} else if (found)
972 			break;
973 	}
974 	IRB_REFRELE(irb);
975 }
976 
977 /*
978  * Delete the specified IRE.
979  * All calls should use ire_delete().
980  * Sometimes called as writer though not required by this function.
981  *
982  * NOTE : This function is called only if the ire was added
983  * in the list.
984  */
985 void
986 ire_delete_v6(ire_t *ire)
987 {
988 	in6_addr_t gw_addr_v6;
989 	ip_stack_t	*ipst = ire->ire_ipst;
990 
991 	ASSERT(ire->ire_refcnt >= 1);
992 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
993 
994 	if (ire->ire_type != IRE_CACHE)
995 		ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
996 	if (ire->ire_type == IRE_DEFAULT) {
997 		/*
998 		 * when a default gateway is going away
999 		 * delete all the host redirects pointing at that
1000 		 * gateway.
1001 		 */
1002 		mutex_enter(&ire->ire_lock);
1003 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1004 		mutex_exit(&ire->ire_lock);
1005 		ire_delete_host_redirects_v6(&gw_addr_v6, ipst);
1006 	}
1007 }
1008 
1009 /*
1010  * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect
1011  * entries.
1012  */
1013 /*ARGSUSED1*/
1014 void
1015 ire_delete_cache_v6(ire_t *ire, char *arg)
1016 {
1017 	char    addrstr1[INET6_ADDRSTRLEN];
1018 	char    addrstr2[INET6_ADDRSTRLEN];
1019 
1020 	if ((ire->ire_type & IRE_CACHE) ||
1021 	    (ire->ire_flags & RTF_DYNAMIC)) {
1022 		ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n",
1023 		    inet_ntop(AF_INET6, &ire->ire_addr_v6,
1024 		    addrstr1, sizeof (addrstr1)),
1025 		    ire->ire_type,
1026 		    inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6,
1027 		    addrstr2, sizeof (addrstr2))));
1028 		ire_delete(ire);
1029 	}
1030 
1031 }
1032 
1033 /*
1034  * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries
1035  * that have a given gateway address.
1036  */
1037 void
1038 ire_delete_cache_gw_v6(ire_t *ire, char *addr)
1039 {
1040 	in6_addr_t	*gw_addr = (in6_addr_t *)addr;
1041 	char		buf1[INET6_ADDRSTRLEN];
1042 	char		buf2[INET6_ADDRSTRLEN];
1043 	in6_addr_t	ire_gw_addr_v6;
1044 
1045 	if (!(ire->ire_type & IRE_CACHE) &&
1046 	    !(ire->ire_flags & RTF_DYNAMIC))
1047 		return;
1048 
1049 	mutex_enter(&ire->ire_lock);
1050 	ire_gw_addr_v6 = ire->ire_gateway_addr_v6;
1051 	mutex_exit(&ire->ire_lock);
1052 
1053 	if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) {
1054 		ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n",
1055 		    inet_ntop(AF_INET6, &ire->ire_src_addr_v6,
1056 		    buf1, sizeof (buf1)),
1057 		    ire->ire_type,
1058 		    inet_ntop(AF_INET6, &ire_gw_addr_v6,
1059 		    buf2, sizeof (buf2))));
1060 		ire_delete(ire);
1061 	}
1062 }
1063 
1064 /*
1065  * Remove all IRE_CACHE entries that match
1066  * the ire specified.  (Sometimes called
1067  * as writer though not required by this function.)
1068  *
1069  * The flag argument indicates if the
1070  * flush request is due to addition
1071  * of new route (IRE_FLUSH_ADD) or deletion of old
1072  * route (IRE_FLUSH_DELETE).
1073  *
1074  * This routine takes only the IREs from the forwarding
1075  * table and flushes the corresponding entries from
1076  * the cache table.
1077  *
1078  * When flushing due to the deletion of an old route, it
1079  * just checks the cache handles (ire_phandle and ire_ihandle) and
1080  * deletes the ones that match.
1081  *
1082  * When flushing due to the creation of a new route, it checks
1083  * if a cache entry's address matches the one in the IRE and
1084  * that the cache entry's parent has a less specific mask than the
1085  * one in IRE. The destination of such a cache entry could be the
1086  * gateway for other cache entries, so we need to flush those as
1087  * well by looking for gateway addresses matching the IRE's address.
1088  */
1089 void
1090 ire_flush_cache_v6(ire_t *ire, int flag)
1091 {
1092 	int i;
1093 	ire_t *cire;
1094 	irb_t *irb;
1095 	ip_stack_t	*ipst = ire->ire_ipst;
1096 
1097 	if (ire->ire_type & IRE_CACHE)
1098 		return;
1099 
1100 	/*
1101 	 * If a default is just created, there is no point
1102 	 * in going through the cache, as there will not be any
1103 	 * cached ires.
1104 	 */
1105 	if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD)
1106 		return;
1107 	if (flag == IRE_FLUSH_ADD) {
1108 		/*
1109 		 * This selective flush is
1110 		 * due to the addition of
1111 		 * new IRE.
1112 		 */
1113 		for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
1114 			irb = &ipst->ips_ip_cache_table_v6[i];
1115 			if ((cire = irb->irb_ire) == NULL)
1116 				continue;
1117 			IRB_REFHOLD(irb);
1118 			for (cire = irb->irb_ire; cire != NULL;
1119 			    cire = cire->ire_next) {
1120 				if (cire->ire_type != IRE_CACHE)
1121 					continue;
1122 				/*
1123 				 * If 'cire' belongs to the same subnet
1124 				 * as the new ire being added, and 'cire'
1125 				 * is derived from a prefix that is less
1126 				 * specific than the new ire being added,
1127 				 * we need to flush 'cire'; for instance,
1128 				 * when a new interface comes up.
1129 				 */
1130 				if ((V6_MASK_EQ_2(cire->ire_addr_v6,
1131 				    ire->ire_mask_v6, ire->ire_addr_v6) &&
1132 				    (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <=
1133 				    ire->ire_masklen))) {
1134 					ire_delete(cire);
1135 					continue;
1136 				}
1137 				/*
1138 				 * This is the case when the ire_gateway_addr
1139 				 * of 'cire' belongs to the same subnet as
1140 				 * the new ire being added.
1141 				 * Flushing such ires is sometimes required to
1142 				 * avoid misrouting: say we have a machine with
1143 				 * two interfaces (I1 and I2), a default router
1144 				 * R on the I1 subnet, and a host route to an
1145 				 * off-link destination D with a gateway G on
1146 				 * the I2 subnet.
1147 				 * Under normal operation, we will have an
1148 				 * on-link cache entry for G and an off-link
1149 				 * cache entry for D with G as ire_gateway_addr,
1150 				 * traffic to D will reach its destination
1151 				 * through gateway G.
1152 				 * If the administrator does 'ifconfig I2 down',
1153 				 * the cache entries for D and G will be
1154 				 * flushed. However, G will now be resolved as
1155 				 * an off-link destination using R (the default
1156 				 * router) as gateway. Then D will also be
1157 				 * resolved as an off-link destination using G
1158 				 * as gateway - this behavior is due to
1159 				 * compatibility reasons, see comment in
1160 				 * ire_ihandle_lookup_offlink(). Traffic to D
1161 				 * will go to the router R and probably won't
1162 				 * reach the destination.
1163 				 * The administrator then does 'ifconfig I2 up'.
1164 				 * Since G is on the I2 subnet, this routine
1165 				 * will flush its cache entry. It must also
1166 				 * flush the cache entry for D, otherwise
1167 				 * traffic will stay misrouted until the IRE
1168 				 * times out.
1169 				 */
1170 				if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6,
1171 				    ire->ire_mask_v6, ire->ire_addr_v6)) {
1172 					ire_delete(cire);
1173 					continue;
1174 				}
1175 			}
1176 			IRB_REFRELE(irb);
1177 		}
1178 	} else {
1179 		/*
1180 		 * delete the cache entries based on
1181 		 * handle in the IRE as this IRE is
1182 		 * being deleted/changed.
1183 		 */
1184 		for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
1185 			irb = &ipst->ips_ip_cache_table_v6[i];
1186 			if ((cire = irb->irb_ire) == NULL)
1187 				continue;
1188 			IRB_REFHOLD(irb);
1189 			for (cire = irb->irb_ire; cire != NULL;
1190 			    cire = cire->ire_next) {
1191 				if (cire->ire_type != IRE_CACHE)
1192 					continue;
1193 				if ((cire->ire_phandle == 0 ||
1194 				    cire->ire_phandle != ire->ire_phandle) &&
1195 				    (cire->ire_ihandle == 0 ||
1196 				    cire->ire_ihandle != ire->ire_ihandle))
1197 					continue;
1198 				ire_delete(cire);
1199 			}
1200 			IRB_REFRELE(irb);
1201 		}
1202 	}
1203 }
1204 
1205 /*
1206  * Matches the arguments passed with the values in the ire.
1207  *
1208  * Note: for match types that match using "ipif" passed in, ipif
1209  * must be checked for non-NULL before calling this routine.
1210  */
1211 static boolean_t
1212 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask,
1213     const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid,
1214     uint32_t ihandle, const ts_label_t *tsl, int match_flags)
1215 {
1216 	in6_addr_t masked_addr;
1217 	in6_addr_t gw_addr_v6;
1218 	ill_t *ire_ill = NULL, *dst_ill;
1219 	ill_t *ipif_ill = NULL;
1220 	ill_group_t *ire_ill_group = NULL;
1221 	ill_group_t *ipif_ill_group = NULL;
1222 	ipif_t	*src_ipif;
1223 
1224 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
1225 	ASSERT(addr != NULL);
1226 	ASSERT(mask != NULL);
1227 	ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL);
1228 	ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) ||
1229 	    (ipif != NULL && ipif->ipif_isv6));
1230 
1231 	/*
1232 	 * HIDDEN cache entries have to be looked up specifically with
1233 	 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set
1234 	 * when the interface is FAILED or INACTIVE. In that case,
1235 	 * any IRE_CACHES that exists should be marked with
1236 	 * IRE_MARK_HIDDEN. So, we don't really need to match below
1237 	 * for IRE_MARK_HIDDEN. But we do so for consistency.
1238 	 */
1239 	if (!(match_flags & MATCH_IRE_MARK_HIDDEN) &&
1240 	    (ire->ire_marks & IRE_MARK_HIDDEN))
1241 		return (B_FALSE);
1242 
1243 	if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid &&
1244 	    ire->ire_zoneid != ALL_ZONES) {
1245 		/*
1246 		 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is
1247 		 * valid and does not match that of ire_zoneid, a failure to
1248 		 * match is reported at this point. Otherwise, since some IREs
1249 		 * that are available in the global zone can be used in local
1250 		 * zones, additional checks need to be performed:
1251 		 *
1252 		 *	IRE_CACHE and IRE_LOOPBACK entries should
1253 		 *	never be matched in this situation.
1254 		 *
1255 		 *	IRE entries that have an interface associated with them
1256 		 *	should in general not match unless they are an IRE_LOCAL
1257 		 *	or in the case when MATCH_IRE_DEFAULT has been set in
1258 		 *	the caller.  In the case of the former, checking of the
1259 		 *	other fields supplied should take place.
1260 		 *
1261 		 *	In the case where MATCH_IRE_DEFAULT has been set,
1262 		 *	all of the ipif's associated with the IRE's ill are
1263 		 *	checked to see if there is a matching zoneid.  If any
1264 		 *	one ipif has a matching zoneid, this IRE is a
1265 		 *	potential candidate so checking of the other fields
1266 		 *	takes place.
1267 		 *
1268 		 *	In the case where the IRE_INTERFACE has a usable source
1269 		 *	address (indicated by ill_usesrc_ifindex) in the
1270 		 *	correct zone then it's permitted to return this IRE
1271 		 */
1272 		if (match_flags & MATCH_IRE_ZONEONLY)
1273 			return (B_FALSE);
1274 		if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK))
1275 			return (B_FALSE);
1276 		/*
1277 		 * Note, IRE_INTERFACE can have the stq as NULL. For
1278 		 * example, if the default multicast route is tied to
1279 		 * the loopback address.
1280 		 */
1281 		if ((ire->ire_type & IRE_INTERFACE) &&
1282 		    (ire->ire_stq != NULL)) {
1283 			dst_ill = (ill_t *)ire->ire_stq->q_ptr;
1284 			/*
1285 			 * If there is a usable source address in the
1286 			 * zone, then it's ok to return an
1287 			 * IRE_INTERFACE
1288 			 */
1289 			if ((dst_ill->ill_usesrc_ifindex != 0) &&
1290 			    (src_ipif = ipif_select_source_v6(dst_ill, addr,
1291 			    RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid))
1292 			    != NULL) {
1293 				ip3dbg(("ire_match_args: src_ipif %p"
1294 				    " dst_ill %p", (void *)src_ipif,
1295 				    (void *)dst_ill));
1296 				ipif_refrele(src_ipif);
1297 			} else {
1298 				ip3dbg(("ire_match_args: src_ipif NULL"
1299 				    " dst_ill %p\n", (void *)dst_ill));
1300 				return (B_FALSE);
1301 			}
1302 		}
1303 		if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL &&
1304 		    !(ire->ire_type & IRE_INTERFACE)) {
1305 			ipif_t	*tipif;
1306 
1307 			if ((match_flags & MATCH_IRE_DEFAULT) == 0)
1308 				return (B_FALSE);
1309 			mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock);
1310 			for (tipif = ire->ire_ipif->ipif_ill->ill_ipif;
1311 			    tipif != NULL; tipif = tipif->ipif_next) {
1312 				if (IPIF_CAN_LOOKUP(tipif) &&
1313 				    (tipif->ipif_flags & IPIF_UP) &&
1314 				    (tipif->ipif_zoneid == zoneid ||
1315 				    tipif->ipif_zoneid == ALL_ZONES))
1316 					break;
1317 			}
1318 			mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock);
1319 			if (tipif == NULL)
1320 				return (B_FALSE);
1321 		}
1322 	}
1323 
1324 	if (match_flags & MATCH_IRE_GW) {
1325 		mutex_enter(&ire->ire_lock);
1326 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1327 		mutex_exit(&ire->ire_lock);
1328 	}
1329 	/*
1330 	 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that
1331 	 * somebody wants to send out on a particular interface which
1332 	 * is given by ire_stq and hence use ire_stq to derive the ill
1333 	 * value. ire_ipif for IRE_CACHES is just the
1334 	 * means of getting a source address i.e ire_src_addr_v6 =
1335 	 * ire->ire_ipif->ipif_src_addr_v6.
1336 	 */
1337 	if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) {
1338 		ire_ill = ire_to_ill(ire);
1339 		if (ire_ill != NULL)
1340 			ire_ill_group = ire_ill->ill_group;
1341 		ipif_ill = ipif->ipif_ill;
1342 		ipif_ill_group = ipif_ill->ill_group;
1343 	}
1344 
1345 	/* No ire_addr_v6 bits set past the mask */
1346 	ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6,
1347 	    ire->ire_addr_v6));
1348 	V6_MASK_COPY(*addr, *mask, masked_addr);
1349 
1350 	if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) &&
1351 	    ((!(match_flags & MATCH_IRE_GW)) ||
1352 	    IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) &&
1353 	    ((!(match_flags & MATCH_IRE_TYPE)) ||
1354 	    (ire->ire_type & type)) &&
1355 	    ((!(match_flags & MATCH_IRE_SRC)) ||
1356 	    IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6,
1357 	    &ipif->ipif_v6src_addr)) &&
1358 	    ((!(match_flags & MATCH_IRE_IPIF)) ||
1359 	    (ire->ire_ipif == ipif)) &&
1360 	    ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) ||
1361 	    (ire->ire_type != IRE_CACHE ||
1362 	    ire->ire_marks & IRE_MARK_HIDDEN)) &&
1363 	    ((!(match_flags & MATCH_IRE_ILL)) ||
1364 	    (ire_ill == ipif_ill)) &&
1365 	    ((!(match_flags & MATCH_IRE_IHANDLE)) ||
1366 	    (ire->ire_ihandle == ihandle)) &&
1367 	    ((!(match_flags & MATCH_IRE_ILL_GROUP)) ||
1368 	    (ire_ill == ipif_ill) ||
1369 	    (ire_ill_group != NULL &&
1370 	    ire_ill_group == ipif_ill_group)) &&
1371 	    ((!(match_flags & MATCH_IRE_SECATTR)) ||
1372 	    (!is_system_labeled()) ||
1373 	    (tsol_ire_match_gwattr(ire, tsl) == 0))) {
1374 		/* We found the matched IRE */
1375 		return (B_TRUE);
1376 	}
1377 	return (B_FALSE);
1378 }
1379 
1380 /*
1381  * Lookup for a route in all the tables
1382  */
1383 ire_t *
1384 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1385     const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire,
1386     zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst)
1387 {
1388 	ire_t *ire = NULL;
1389 
1390 	/*
1391 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1392 	 * MATCH_IRE_ILL is set.
1393 	 */
1394 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
1395 	    (ipif == NULL))
1396 		return (NULL);
1397 
1398 	/*
1399 	 * might be asking for a cache lookup,
1400 	 * This is not best way to lookup cache,
1401 	 * user should call ire_cache_lookup directly.
1402 	 *
1403 	 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then
1404 	 * in the forwarding table, if the applicable type flags were set.
1405 	 */
1406 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) {
1407 		ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid,
1408 		    tsl, flags, ipst);
1409 		if (ire != NULL)
1410 			return (ire);
1411 	}
1412 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) {
1413 		ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif,
1414 		    pire, zoneid, 0, tsl, flags, ipst);
1415 	}
1416 	return (ire);
1417 }
1418 
1419 /*
1420  * Lookup a route in forwarding table.
1421  * specific lookup is indicated by passing the
1422  * required parameters and indicating the
1423  * match required in flag field.
1424  *
1425  * Looking for default route can be done in three ways
1426  * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field
1427  *    along with other matches.
1428  * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags
1429  *    field along with other matches.
1430  * 3) if the destination and mask are passed as zeros.
1431  *
1432  * A request to return a default route if no route
1433  * is found, can be specified by setting MATCH_IRE_DEFAULT
1434  * in flags.
1435  *
1436  * It does not support recursion more than one level. It
1437  * will do recursive lookup only when the lookup maps to
1438  * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed.
1439  *
1440  * If the routing table is setup to allow more than one level
1441  * of recursion, the cleaning up cache table will not work resulting
1442  * in invalid routing.
1443  *
1444  * Supports link-local addresses by following the ipif/ill when recursing.
1445  *
1446  * NOTE : When this function returns NULL, pire has already been released.
1447  *	  pire is valid only when this function successfully returns an
1448  *	  ire.
1449  */
1450 ire_t *
1451 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1452     const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire,
1453     zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags,
1454     ip_stack_t *ipst)
1455 {
1456 	irb_t *irb_ptr;
1457 	ire_t	*rire;
1458 	ire_t *ire = NULL;
1459 	ire_t	*saved_ire;
1460 	nce_t	*nce;
1461 	int i;
1462 	in6_addr_t gw_addr_v6;
1463 
1464 	ASSERT(addr != NULL);
1465 	ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL);
1466 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
1467 	ASSERT(ipif == NULL || ipif->ipif_isv6);
1468 
1469 	/*
1470 	 * When we return NULL from this function, we should make
1471 	 * sure that *pire is NULL so that the callers will not
1472 	 * wrongly REFRELE the pire.
1473 	 */
1474 	if (pire != NULL)
1475 		*pire = NULL;
1476 	/*
1477 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1478 	 * MATCH_IRE_ILL is set.
1479 	 */
1480 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
1481 	    (ipif == NULL))
1482 		return (NULL);
1483 
1484 	/*
1485 	 * If the mask is known, the lookup
1486 	 * is simple, if the mask is not known
1487 	 * we need to search.
1488 	 */
1489 	if (flags & MATCH_IRE_MASK) {
1490 		uint_t masklen;
1491 
1492 		masklen = ip_mask_to_plen_v6(mask);
1493 		if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL)
1494 			return (NULL);
1495 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][
1496 		    IRE_ADDR_MASK_HASH_V6(*addr, *mask,
1497 		    ipst->ips_ip6_ftable_hash_size)]);
1498 		rw_enter(&irb_ptr->irb_lock, RW_READER);
1499 		for (ire = irb_ptr->irb_ire; ire != NULL;
1500 		    ire = ire->ire_next) {
1501 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
1502 				continue;
1503 			if (ire_match_args_v6(ire, addr, mask, gateway, type,
1504 			    ipif, zoneid, ihandle, tsl, flags))
1505 				goto found_ire;
1506 		}
1507 		rw_exit(&irb_ptr->irb_lock);
1508 	} else {
1509 		/*
1510 		 * In this case we don't know the mask, we need to
1511 		 * search the table assuming different mask sizes.
1512 		 * we start with 128 bit mask, we don't allow default here.
1513 		 */
1514 		for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) {
1515 			in6_addr_t tmpmask;
1516 
1517 			if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
1518 				continue;
1519 			(void) ip_plen_to_mask_v6(i, &tmpmask);
1520 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][
1521 			    IRE_ADDR_MASK_HASH_V6(*addr, tmpmask,
1522 			    ipst->ips_ip6_ftable_hash_size)];
1523 			rw_enter(&irb_ptr->irb_lock, RW_READER);
1524 			for (ire = irb_ptr->irb_ire; ire != NULL;
1525 			    ire = ire->ire_next) {
1526 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1527 					continue;
1528 				if (ire_match_args_v6(ire, addr,
1529 				    &ire->ire_mask_v6, gateway, type, ipif,
1530 				    zoneid, ihandle, tsl, flags))
1531 					goto found_ire;
1532 			}
1533 			rw_exit(&irb_ptr->irb_lock);
1534 		}
1535 	}
1536 
1537 	/*
1538 	 * We come here if no route has yet been found.
1539 	 *
1540 	 * Handle the case where default route is
1541 	 * requested by specifying type as one of the possible
1542 	 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE).
1543 	 *
1544 	 * If MATCH_IRE_MASK is specified, then the appropriate default route
1545 	 * would have been found above if it exists so it isn't looked up here.
1546 	 * If MATCH_IRE_DEFAULT was also specified, then a default route will be
1547 	 * searched for later.
1548 	 */
1549 	if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE &&
1550 	    (type & (IRE_DEFAULT | IRE_INTERFACE))) {
1551 		if (ipst->ips_ip_forwarding_table_v6[0] != NULL) {
1552 			/* addr & mask is zero for defaults */
1553 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][
1554 			    IRE_ADDR_HASH_V6(ipv6_all_zeros,
1555 			    ipst->ips_ip6_ftable_hash_size)];
1556 			rw_enter(&irb_ptr->irb_lock, RW_READER);
1557 			for (ire = irb_ptr->irb_ire; ire != NULL;
1558 			    ire = ire->ire_next) {
1559 
1560 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1561 					continue;
1562 
1563 				if (ire_match_args_v6(ire, addr,
1564 				    &ipv6_all_zeros, gateway, type, ipif,
1565 				    zoneid, ihandle, tsl, flags))
1566 					goto found_ire;
1567 			}
1568 			rw_exit(&irb_ptr->irb_lock);
1569 		}
1570 	}
1571 	/*
1572 	 * We come here only if no route is found.
1573 	 * see if the default route can be used which is allowed
1574 	 * only if the default matching criteria is specified.
1575 	 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT
1576 	 * entries. However, the ip_forwarding_table_v6[0] also contains
1577 	 * interface routes thus the count can be zero.
1578 	 */
1579 	saved_ire = NULL;
1580 	if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) ==
1581 	    MATCH_IRE_DEFAULT) {
1582 		ire_t	*ire_origin;
1583 		uint_t	g_index;
1584 		uint_t	index;
1585 
1586 		if (ipst->ips_ip_forwarding_table_v6[0] == NULL)
1587 			return (NULL);
1588 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0];
1589 
1590 		/*
1591 		 * Keep a tab on the bucket while looking the IRE_DEFAULT
1592 		 * entries. We need to keep track of a particular IRE
1593 		 * (ire_origin) so this ensures that it will not be unlinked
1594 		 * from the hash list during the recursive lookup below.
1595 		 */
1596 		IRB_REFHOLD(irb_ptr);
1597 		ire = irb_ptr->irb_ire;
1598 		if (ire == NULL) {
1599 			IRB_REFRELE(irb_ptr);
1600 			return (NULL);
1601 		}
1602 
1603 		/*
1604 		 * Get the index first, since it can be changed by other
1605 		 * threads. Then get to the right default route skipping
1606 		 * default interface routes if any. As we hold a reference on
1607 		 * the IRE bucket, ipv6_ire_default_count can only increase so
1608 		 * we can't reach the end of the hash list unexpectedly.
1609 		 */
1610 		if (ipst->ips_ipv6_ire_default_count != 0) {
1611 			g_index = ipst->ips_ipv6_ire_default_index++;
1612 			index = g_index % ipst->ips_ipv6_ire_default_count;
1613 			while (index != 0) {
1614 				if (!(ire->ire_type & IRE_INTERFACE))
1615 					index--;
1616 				ire = ire->ire_next;
1617 			}
1618 			ASSERT(ire != NULL);
1619 		} else {
1620 			/*
1621 			 * No default route, so we only have default interface
1622 			 * routes: don't enter the first loop.
1623 			 */
1624 			ire = NULL;
1625 		}
1626 
1627 		/*
1628 		 * Round-robin the default routers list looking for a neighbor
1629 		 * that matches the passed in parameters and is reachable.  If
1630 		 * none found, just return a route from the default router list
1631 		 * if it exists. If we can't find a default route (IRE_DEFAULT),
1632 		 * look for interface default routes.
1633 		 * We start with the ire we found above and we walk the hash
1634 		 * list until we're back where we started, see
1635 		 * ire_get_next_default_ire(). It doesn't matter if default
1636 		 * routes are added or deleted by other threads - we know this
1637 		 * ire will stay in the list because we hold a reference on the
1638 		 * ire bucket.
1639 		 * NB: if we only have interface default routes, ire is NULL so
1640 		 * we don't even enter this loop (see above).
1641 		 */
1642 		ire_origin = ire;
1643 		for (; ire != NULL;
1644 		    ire = ire_get_next_default_ire(ire, ire_origin)) {
1645 
1646 			if (ire_match_args_v6(ire, addr,
1647 			    &ipv6_all_zeros, gateway, type, ipif,
1648 			    zoneid, ihandle, tsl, flags)) {
1649 				int match_flags;
1650 
1651 				/*
1652 				 * We have something to work with.
1653 				 * If we can find a resolved/reachable
1654 				 * entry, we will use this. Otherwise
1655 				 * we'll try to find an entry that has
1656 				 * a resolved cache entry. We will fallback
1657 				 * on this if we don't find anything else.
1658 				 */
1659 				if (saved_ire == NULL)
1660 					saved_ire = ire;
1661 				mutex_enter(&ire->ire_lock);
1662 				gw_addr_v6 = ire->ire_gateway_addr_v6;
1663 				mutex_exit(&ire->ire_lock);
1664 				match_flags = MATCH_IRE_ILL_GROUP |
1665 				    MATCH_IRE_SECATTR;
1666 				rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL,
1667 				    0, ire->ire_ipif, zoneid, tsl, match_flags,
1668 				    ipst);
1669 				if (rire != NULL) {
1670 					nce = rire->ire_nce;
1671 					if (nce != NULL &&
1672 					    NCE_ISREACHABLE(nce) &&
1673 					    nce->nce_flags & NCE_F_ISROUTER) {
1674 						ire_refrele(rire);
1675 						IRE_REFHOLD(ire);
1676 						IRB_REFRELE(irb_ptr);
1677 						goto found_ire_held;
1678 					} else if (nce != NULL &&
1679 					    !(nce->nce_flags &
1680 					    NCE_F_ISROUTER)) {
1681 						/*
1682 						 * Make sure we don't use
1683 						 * this ire
1684 						 */
1685 						if (saved_ire == ire)
1686 							saved_ire = NULL;
1687 					}
1688 					ire_refrele(rire);
1689 				} else if (ipst->
1690 				    ips_ipv6_ire_default_count > 1 &&
1691 				    zoneid != GLOBAL_ZONEID) {
1692 					/*
1693 					 * When we're in a local zone, we're
1694 					 * only interested in default routers
1695 					 * that are reachable through ipifs
1696 					 * within our zone.
1697 					 * The potentially expensive call to
1698 					 * ire_route_lookup_v6() is avoided when
1699 					 * we have only one default route.
1700 					 */
1701 					int ire_match_flags = MATCH_IRE_TYPE |
1702 					    MATCH_IRE_SECATTR;
1703 
1704 					if (ire->ire_ipif != NULL) {
1705 						ire_match_flags |=
1706 						    MATCH_IRE_ILL_GROUP;
1707 					}
1708 					rire = ire_route_lookup_v6(&gw_addr_v6,
1709 					    NULL, NULL, IRE_INTERFACE,
1710 					    ire->ire_ipif, NULL,
1711 					    zoneid, tsl, ire_match_flags, ipst);
1712 					if (rire != NULL) {
1713 						ire_refrele(rire);
1714 						saved_ire = ire;
1715 					} else if (saved_ire == ire) {
1716 						/*
1717 						 * Make sure we don't use
1718 						 * this ire
1719 						 */
1720 						saved_ire = NULL;
1721 					}
1722 				}
1723 			}
1724 		}
1725 		if (saved_ire != NULL) {
1726 			ire = saved_ire;
1727 			IRE_REFHOLD(ire);
1728 			IRB_REFRELE(irb_ptr);
1729 			goto found_ire_held;
1730 		} else {
1731 			/*
1732 			 * Look for a interface default route matching the
1733 			 * args passed in. No round robin here. Just pick
1734 			 * the right one.
1735 			 */
1736 			for (ire = irb_ptr->irb_ire; ire != NULL;
1737 			    ire = ire->ire_next) {
1738 
1739 				if (!(ire->ire_type & IRE_INTERFACE))
1740 					continue;
1741 
1742 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1743 					continue;
1744 
1745 				if (ire_match_args_v6(ire, addr,
1746 				    &ipv6_all_zeros, gateway, type, ipif,
1747 				    zoneid, ihandle, tsl, flags)) {
1748 					IRE_REFHOLD(ire);
1749 					IRB_REFRELE(irb_ptr);
1750 					goto found_ire_held;
1751 				}
1752 			}
1753 			IRB_REFRELE(irb_ptr);
1754 		}
1755 	}
1756 	ASSERT(ire == NULL);
1757 	ip1dbg(("ire_ftable_lookup_v6: returning NULL ire"));
1758 	return (NULL);
1759 found_ire:
1760 	ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0);
1761 	IRE_REFHOLD(ire);
1762 	rw_exit(&irb_ptr->irb_lock);
1763 
1764 found_ire_held:
1765 	if ((flags & MATCH_IRE_RJ_BHOLE) &&
1766 	    (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) {
1767 		return (ire);
1768 	}
1769 	/*
1770 	 * At this point, IRE that was found must be an IRE_FORWARDTABLE
1771 	 * or IRE_CACHETABLE type.  If this is a recursive lookup and an
1772 	 * IRE_INTERFACE type was found, return that.  If it was some other
1773 	 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it
1774 	 * is necessary to fill in the  parent IRE pointed to by pire, and
1775 	 * then lookup the gateway address of  the parent.  For backwards
1776 	 * compatiblity, if this lookup returns an
1777 	 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level
1778 	 * of lookup is done.
1779 	 */
1780 	if (flags & MATCH_IRE_RECURSIVE) {
1781 		const ipif_t *gw_ipif;
1782 		int match_flags = MATCH_IRE_DSTONLY;
1783 
1784 		if (ire->ire_type & IRE_INTERFACE)
1785 			return (ire);
1786 		if (pire != NULL)
1787 			*pire = ire;
1788 		/*
1789 		 * If we can't find an IRE_INTERFACE or the caller has not
1790 		 * asked for pire, we need to REFRELE the saved_ire.
1791 		 */
1792 		saved_ire = ire;
1793 
1794 		/*
1795 		 * Currently MATCH_IRE_ILL is never used with
1796 		 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while
1797 		 * sending out packets as MATCH_IRE_ILL is used only
1798 		 * for communicating with on-link hosts. We can't assert
1799 		 * that here as RTM_GET calls this function with
1800 		 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE.
1801 		 * We have already used the MATCH_IRE_ILL in determining
1802 		 * the right prefix route at this point. To match the
1803 		 * behavior of how we locate routes while sending out
1804 		 * packets, we don't want to use MATCH_IRE_ILL below
1805 		 * while locating the interface route.
1806 		 */
1807 		if (ire->ire_ipif != NULL)
1808 			match_flags |= MATCH_IRE_ILL_GROUP;
1809 
1810 		mutex_enter(&ire->ire_lock);
1811 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1812 		mutex_exit(&ire->ire_lock);
1813 
1814 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0,
1815 		    ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst);
1816 		if (ire == NULL) {
1817 			/*
1818 			 * In this case we have to deal with the
1819 			 * MATCH_IRE_PARENT flag, which means the
1820 			 * parent has to be returned if ire is NULL.
1821 			 * The aim of this is to have (at least) a starting
1822 			 * ire when we want to look at all of the ires in a
1823 			 * bucket aimed at a single destination (as is the
1824 			 * case in ip_newroute_v6 for the RTF_MULTIRT
1825 			 * flagged routes).
1826 			 */
1827 			if (flags & MATCH_IRE_PARENT) {
1828 				if (pire != NULL) {
1829 					/*
1830 					 * Need an extra REFHOLD, if the
1831 					 * parent ire is returned via both
1832 					 * ire and pire.
1833 					 */
1834 					IRE_REFHOLD(saved_ire);
1835 				}
1836 				ire = saved_ire;
1837 			} else {
1838 				ire_refrele(saved_ire);
1839 				if (pire != NULL)
1840 					*pire = NULL;
1841 			}
1842 			return (ire);
1843 		}
1844 		if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) {
1845 			/*
1846 			 * If the caller did not ask for pire, release
1847 			 * it now.
1848 			 */
1849 			if (pire == NULL) {
1850 				ire_refrele(saved_ire);
1851 			}
1852 			return (ire);
1853 		}
1854 		match_flags |= MATCH_IRE_TYPE;
1855 		mutex_enter(&ire->ire_lock);
1856 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1857 		mutex_exit(&ire->ire_lock);
1858 		gw_ipif = ire->ire_ipif;
1859 		ire_refrele(ire);
1860 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL,
1861 		    (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid,
1862 		    NULL, match_flags, ipst);
1863 		if (ire == NULL) {
1864 			/*
1865 			 * In this case we have to deal with the
1866 			 * MATCH_IRE_PARENT flag, which means the
1867 			 * parent has to be returned if ire is NULL.
1868 			 * The aim of this is to have (at least) a starting
1869 			 * ire when we want to look at all of the ires in a
1870 			 * bucket aimed at a single destination (as is the
1871 			 * case in ip_newroute_v6 for the RTF_MULTIRT
1872 			 * flagged routes).
1873 			 */
1874 			if (flags & MATCH_IRE_PARENT) {
1875 				if (pire != NULL) {
1876 					/*
1877 					 * Need an extra REFHOLD, if the
1878 					 * parent ire is returned via both
1879 					 * ire and pire.
1880 					 */
1881 					IRE_REFHOLD(saved_ire);
1882 				}
1883 				ire = saved_ire;
1884 			} else {
1885 				ire_refrele(saved_ire);
1886 				if (pire != NULL)
1887 					*pire = NULL;
1888 			}
1889 			return (ire);
1890 		} else if (pire == NULL) {
1891 			/*
1892 			 * If the caller did not ask for pire, release
1893 			 * it now.
1894 			 */
1895 			ire_refrele(saved_ire);
1896 		}
1897 		return (ire);
1898 	}
1899 
1900 	ASSERT(pire == NULL || *pire == NULL);
1901 	return (ire);
1902 }
1903 
1904 /*
1905  * Delete the IRE cache for the gateway and all IRE caches whose
1906  * ire_gateway_addr_v6 points to this gateway, and allow them to
1907  * be created on demand by ip_newroute_v6.
1908  */
1909 void
1910 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid,
1911 	ip_stack_t *ipst)
1912 {
1913 	irb_t *irb;
1914 	ire_t *ire;
1915 
1916 	irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
1917 	    ipst->ips_ip6_cache_table_size)];
1918 	IRB_REFHOLD(irb);
1919 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
1920 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
1921 			continue;
1922 
1923 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
1924 		if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0,
1925 		    IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) {
1926 			ire_delete(ire);
1927 		}
1928 	}
1929 	IRB_REFRELE(irb);
1930 
1931 	ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst);
1932 }
1933 
1934 /*
1935  * Looks up cache table for a route.
1936  * specific lookup can be indicated by
1937  * passing the MATCH_* flags and the
1938  * necessary parameters.
1939  */
1940 ire_t *
1941 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway,
1942     int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl,
1943     int flags, ip_stack_t *ipst)
1944 {
1945 	ire_ctable_args_t	margs;
1946 
1947 	margs.ict_addr = (void *)addr;
1948 	margs.ict_gateway = (void *)gateway;
1949 	margs.ict_type = type;
1950 	margs.ict_ipif = ipif;
1951 	margs.ict_zoneid = zoneid;
1952 	margs.ict_tsl = tsl;
1953 	margs.ict_flags = flags;
1954 	margs.ict_ipst = ipst;
1955 	margs.ict_wq = NULL;
1956 
1957 	return (ip6_ctable_lookup_impl(&margs));
1958 }
1959 
1960 /*
1961  * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers
1962  * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get
1963  * to the hidden ones.
1964  *
1965  * In general the zoneid has to match (where ALL_ZONES match all of them).
1966  * But for IRE_LOCAL we also need to handle the case where L2 should
1967  * conceptually loop back the packet. This is necessary since neither
1968  * Ethernet drivers nor Ethernet hardware loops back packets sent to their
1969  * own MAC address. This loopback is needed when the normal
1970  * routes (ignoring IREs with different zoneids) would send out the packet on
1971  * the same ill (or ill group) as the ill with which this IRE_LOCAL is
1972  * associated.
1973  *
1974  * Earlier versions of this code always matched an IRE_LOCAL independently of
1975  * the zoneid. We preserve that earlier behavior when
1976  * ip_restrict_interzone_loopback is turned off.
1977  */
1978 ire_t *
1979 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid,
1980     const ts_label_t *tsl, ip_stack_t *ipst)
1981 {
1982 	irb_t *irb_ptr;
1983 	ire_t *ire;
1984 
1985 	irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
1986 	    ipst->ips_ip6_cache_table_size)];
1987 	rw_enter(&irb_ptr->irb_lock, RW_READER);
1988 	for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) {
1989 		if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN))
1990 			continue;
1991 		if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) {
1992 			/*
1993 			 * Finally, check if the security policy has any
1994 			 * restriction on using this route for the specified
1995 			 * message.
1996 			 */
1997 			if (tsl != NULL &&
1998 			    ire->ire_gw_secattr != NULL &&
1999 			    tsol_ire_match_gwattr(ire, tsl) != 0) {
2000 				continue;
2001 			}
2002 
2003 			if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid ||
2004 			    ire->ire_zoneid == ALL_ZONES) {
2005 				IRE_REFHOLD(ire);
2006 				rw_exit(&irb_ptr->irb_lock);
2007 				return (ire);
2008 			}
2009 
2010 			if (ire->ire_type == IRE_LOCAL) {
2011 				if (ipst->ips_ip_restrict_interzone_loopback &&
2012 				    !ire_local_ok_across_zones(ire, zoneid,
2013 				    (void *)addr, tsl, ipst))
2014 					continue;
2015 
2016 				IRE_REFHOLD(ire);
2017 				rw_exit(&irb_ptr->irb_lock);
2018 				return (ire);
2019 			}
2020 		}
2021 	}
2022 	rw_exit(&irb_ptr->irb_lock);
2023 	return (NULL);
2024 }
2025 
2026 /*
2027  * Locate the interface ire that is tied to the cache ire 'cire' via
2028  * cire->ire_ihandle.
2029  *
2030  * We are trying to create the cache ire for an onlink destn. or
2031  * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER
2032  * case for xresolv interfaces, after the ire has come back from
2033  * an external resolver.
2034  */
2035 static ire_t *
2036 ire_ihandle_lookup_onlink_v6(ire_t *cire)
2037 {
2038 	ire_t	*ire;
2039 	int	match_flags;
2040 	int	i;
2041 	int	j;
2042 	irb_t	*irb_ptr;
2043 	ip_stack_t	*ipst = cire->ire_ipst;
2044 
2045 	ASSERT(cire != NULL);
2046 
2047 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
2048 	/*
2049 	 * We know that the mask of the interface ire equals cire->ire_cmask.
2050 	 * (When ip_newroute_v6() created 'cire' for an on-link destn.
2051 	 * it set its cmask from the interface ire's mask)
2052 	 */
2053 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6,
2054 	    NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle,
2055 	    NULL, match_flags, ipst);
2056 	if (ire != NULL)
2057 		return (ire);
2058 	/*
2059 	 * If we didn't find an interface ire above, we can't declare failure.
2060 	 * For backwards compatibility, we need to support prefix routes
2061 	 * pointing to next hop gateways that are not on-link.
2062 	 *
2063 	 * In the resolver/noresolver case, ip_newroute_v6() thinks
2064 	 * it is creating the cache ire for an onlink destination in 'cire'.
2065 	 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6()
2066 	 * cheated it, by doing ire_route_lookup_v6() twice and returning an
2067 	 * interface ire.
2068 	 *
2069 	 * Eg. default	-	gw1			(line 1)
2070 	 *	gw1	-	gw2			(line 2)
2071 	 *	gw2	-	hme0			(line 3)
2072 	 *
2073 	 * In the above example, ip_newroute_v6() tried to create the cache ire
2074 	 * 'cire' for gw1, based on the interface route in line 3. The
2075 	 * ire_ftable_lookup_v6() above fails, because there is
2076 	 * no interface route to reach gw1. (it is gw2). We fall thru below.
2077 	 *
2078 	 * Do a brute force search based on the ihandle in a subset of the
2079 	 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise
2080 	 * things become very complex, since we don't have 'pire' in this
2081 	 * case. (Also note that this method is not possible in the offlink
2082 	 * case because we don't know the mask)
2083 	 */
2084 	i = ip_mask_to_plen_v6(&cire->ire_cmask_v6);
2085 	if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
2086 		return (NULL);
2087 	for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) {
2088 		irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j];
2089 		rw_enter(&irb_ptr->irb_lock, RW_READER);
2090 		for (ire = irb_ptr->irb_ire; ire != NULL;
2091 		    ire = ire->ire_next) {
2092 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
2093 				continue;
2094 			if ((ire->ire_type & IRE_INTERFACE) &&
2095 			    (ire->ire_ihandle == cire->ire_ihandle)) {
2096 				IRE_REFHOLD(ire);
2097 				rw_exit(&irb_ptr->irb_lock);
2098 				return (ire);
2099 			}
2100 		}
2101 		rw_exit(&irb_ptr->irb_lock);
2102 	}
2103 	return (NULL);
2104 }
2105 
2106 
2107 /*
2108  * Locate the interface ire that is tied to the cache ire 'cire' via
2109  * cire->ire_ihandle.
2110  *
2111  * We are trying to create the cache ire for an offlink destn based
2112  * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire
2113  * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in
2114  * the IRE_CACHE case.
2115  */
2116 ire_t *
2117 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire)
2118 {
2119 	ire_t	*ire;
2120 	int	match_flags;
2121 	in6_addr_t	gw_addr;
2122 	ipif_t		*gw_ipif;
2123 	ip_stack_t	*ipst = cire->ire_ipst;
2124 
2125 	ASSERT(cire != NULL && pire != NULL);
2126 
2127 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
2128 	/*
2129 	 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only
2130 	 * for on-link hosts. We should never be here for onlink.
2131 	 * Thus, use MATCH_IRE_ILL_GROUP.
2132 	 */
2133 	if (pire->ire_ipif != NULL)
2134 		match_flags |= MATCH_IRE_ILL_GROUP;
2135 	/*
2136 	 * We know that the mask of the interface ire equals cire->ire_cmask.
2137 	 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set
2138 	 * its cmask from the interface ire's mask)
2139 	 */
2140 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0,
2141 	    IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
2142 	    NULL, match_flags, ipst);
2143 	if (ire != NULL)
2144 		return (ire);
2145 	/*
2146 	 * If we didn't find an interface ire above, we can't declare failure.
2147 	 * For backwards compatibility, we need to support prefix routes
2148 	 * pointing to next hop gateways that are not on-link.
2149 	 *
2150 	 * Assume we are trying to ping some offlink destn, and we have the
2151 	 * routing table below.
2152 	 *
2153 	 * Eg.	default	- gw1		<--- pire	(line 1)
2154 	 *	gw1	- gw2				(line 2)
2155 	 *	gw2	- hme0				(line 3)
2156 	 *
2157 	 * If we already have a cache ire for gw1 in 'cire', the
2158 	 * ire_ftable_lookup_v6 above would have failed, since there is no
2159 	 * interface ire to reach gw1. We will fallthru below.
2160 	 *
2161 	 * Here we duplicate the steps that ire_ftable_lookup_v6() did in
2162 	 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case.
2163 	 * The differences are the following
2164 	 * i.   We want the interface ire only, so we call
2165 	 *	ire_ftable_lookup_v6() instead of ire_route_lookup_v6()
2166 	 * ii.  We look for only prefix routes in the 1st call below.
2167 	 * ii.  We want to match on the ihandle in the 2nd call below.
2168 	 */
2169 	match_flags =  MATCH_IRE_TYPE;
2170 	if (pire->ire_ipif != NULL)
2171 		match_flags |= MATCH_IRE_ILL_GROUP;
2172 
2173 	mutex_enter(&pire->ire_lock);
2174 	gw_addr = pire->ire_gateway_addr_v6;
2175 	mutex_exit(&pire->ire_lock);
2176 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET,
2177 	    pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst);
2178 	if (ire == NULL)
2179 		return (NULL);
2180 	/*
2181 	 * At this point 'ire' corresponds to the entry shown in line 2.
2182 	 * gw_addr is 'gw2' in the example above.
2183 	 */
2184 	mutex_enter(&ire->ire_lock);
2185 	gw_addr = ire->ire_gateway_addr_v6;
2186 	mutex_exit(&ire->ire_lock);
2187 	gw_ipif = ire->ire_ipif;
2188 	ire_refrele(ire);
2189 
2190 	match_flags |= MATCH_IRE_IHANDLE;
2191 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE,
2192 	    gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
2193 	    NULL, match_flags, ipst);
2194 	return (ire);
2195 }
2196 
2197 /*
2198  * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER
2199  * ire associated with the specified ipif.
2200  *
2201  * This might occasionally be called when IPIF_UP is not set since
2202  * the IPV6_MULTICAST_IF as well as creating interface routes
2203  * allows specifying a down ipif (ipif_lookup* match ipifs that are down).
2204  *
2205  * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on
2206  * the ipif this routine might return NULL.
2207  * (Sometimes called as writer though not required by this function.)
2208  */
2209 ire_t *
2210 ipif_to_ire_v6(const ipif_t *ipif)
2211 {
2212 	ire_t	*ire;
2213 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
2214 
2215 	ASSERT(ipif->ipif_isv6);
2216 	if (ipif->ipif_ire_type == IRE_LOOPBACK) {
2217 		ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL,
2218 		    IRE_LOOPBACK, ipif, ALL_ZONES, NULL,
2219 		    (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst);
2220 	} else if (ipif->ipif_flags & IPIF_POINTOPOINT) {
2221 		/* In this case we need to lookup destination address. */
2222 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr,
2223 		    &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES,
2224 		    0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF |
2225 		    MATCH_IRE_MASK), ipst);
2226 	} else {
2227 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet,
2228 		    &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL,
2229 		    ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF |
2230 		    MATCH_IRE_MASK), ipst);
2231 	}
2232 	return (ire);
2233 }
2234 
2235 /*
2236  * Return B_TRUE if a multirt route is resolvable
2237  * (or if no route is resolved yet), B_FALSE otherwise.
2238  * This only works in the global zone.
2239  */
2240 boolean_t
2241 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl,
2242     ip_stack_t *ipst)
2243 {
2244 	ire_t	*first_fire;
2245 	ire_t	*first_cire;
2246 	ire_t	*fire;
2247 	ire_t	*cire;
2248 	irb_t	*firb;
2249 	irb_t	*cirb;
2250 	int	unres_cnt = 0;
2251 	boolean_t resolvable = B_FALSE;
2252 
2253 	/* Retrieve the first IRE_HOST that matches the destination */
2254 	first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST,
2255 	    NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE |
2256 	    MATCH_IRE_SECATTR, ipst);
2257 
2258 	/* No route at all */
2259 	if (first_fire == NULL) {
2260 		return (B_TRUE);
2261 	}
2262 
2263 	firb = first_fire->ire_bucket;
2264 	ASSERT(firb);
2265 
2266 	/* Retrieve the first IRE_CACHE ire for that destination. */
2267 	first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst);
2268 
2269 	/* No resolved route. */
2270 	if (first_cire == NULL) {
2271 		ire_refrele(first_fire);
2272 		return (B_TRUE);
2273 	}
2274 
2275 	/* At least one route is resolved. */
2276 
2277 	cirb = first_cire->ire_bucket;
2278 	ASSERT(cirb);
2279 
2280 	/* Count the number of routes to that dest that are declared. */
2281 	IRB_REFHOLD(firb);
2282 	for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
2283 		if (!(fire->ire_flags & RTF_MULTIRT))
2284 			continue;
2285 		if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp))
2286 			continue;
2287 		unres_cnt++;
2288 	}
2289 	IRB_REFRELE(firb);
2290 
2291 
2292 	/* Then subtract the number of routes to that dst that are resolved */
2293 	IRB_REFHOLD(cirb);
2294 	for (cire = first_cire; cire != NULL; cire = cire->ire_next) {
2295 		if (!(cire->ire_flags & RTF_MULTIRT))
2296 			continue;
2297 		if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp))
2298 			continue;
2299 		if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN))
2300 			continue;
2301 		unres_cnt--;
2302 	}
2303 	IRB_REFRELE(cirb);
2304 
2305 	/* At least one route is unresolved; search for a resolvable route. */
2306 	if (unres_cnt > 0)
2307 		resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire,
2308 		    MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst);
2309 
2310 	if (first_fire)
2311 		ire_refrele(first_fire);
2312 
2313 	if (first_cire)
2314 		ire_refrele(first_cire);
2315 
2316 	return (resolvable);
2317 }
2318 
2319 
2320 /*
2321  * Return B_TRUE and update *ire_arg and *fire_arg
2322  * if at least one resolvable route is found.
2323  * Return B_FALSE otherwise (all routes are resolved or
2324  * the remaining unresolved routes are all unresolvable).
2325  * This only works in the global zone.
2326  */
2327 boolean_t
2328 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags,
2329     const ts_label_t *tsl, ip_stack_t *ipst)
2330 {
2331 	clock_t	delta;
2332 	ire_t	*best_fire = NULL;
2333 	ire_t	*best_cire = NULL;
2334 	ire_t	*first_fire;
2335 	ire_t	*first_cire;
2336 	ire_t	*fire;
2337 	ire_t	*cire;
2338 	irb_t	*firb = NULL;
2339 	irb_t	*cirb = NULL;
2340 	ire_t	*gw_ire;
2341 	boolean_t	already_resolved;
2342 	boolean_t	res;
2343 	in6_addr_t	v6dst;
2344 	in6_addr_t	v6gw;
2345 
2346 	ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, "
2347 	    "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags));
2348 
2349 	ASSERT(ire_arg);
2350 	ASSERT(fire_arg);
2351 
2352 	/* Not an IRE_HOST ire; give up. */
2353 	if ((*fire_arg == NULL) ||
2354 	    ((*fire_arg)->ire_type != IRE_HOST)) {
2355 		return (B_FALSE);
2356 	}
2357 
2358 	/* This is the first IRE_HOST ire for that destination. */
2359 	first_fire = *fire_arg;
2360 	firb = first_fire->ire_bucket;
2361 	ASSERT(firb);
2362 
2363 	mutex_enter(&first_fire->ire_lock);
2364 	v6dst = first_fire->ire_addr_v6;
2365 	mutex_exit(&first_fire->ire_lock);
2366 
2367 	ip2dbg(("ire_multirt_lookup_v6: dst %08x\n",
2368 	    ntohl(V4_PART_OF_V6(v6dst))));
2369 
2370 	/*
2371 	 * Retrieve the first IRE_CACHE ire for that destination;
2372 	 * if we don't find one, no route for that dest is
2373 	 * resolved yet.
2374 	 */
2375 	first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst);
2376 	if (first_cire) {
2377 		cirb = first_cire->ire_bucket;
2378 	}
2379 
2380 	ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire));
2381 
2382 	/*
2383 	 * Search for a resolvable route, giving the top priority
2384 	 * to routes that can be resolved without any call to the resolver.
2385 	 */
2386 	IRB_REFHOLD(firb);
2387 
2388 	if (!IN6_IS_ADDR_MULTICAST(&v6dst)) {
2389 		/*
2390 		 * For all multiroute IRE_HOST ires for that destination,
2391 		 * check if the route via the IRE_HOST's gateway is
2392 		 * resolved yet.
2393 		 */
2394 		for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
2395 
2396 			if (!(fire->ire_flags & RTF_MULTIRT))
2397 				continue;
2398 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
2399 				continue;
2400 
2401 			if (fire->ire_gw_secattr != NULL &&
2402 			    tsol_ire_match_gwattr(fire, tsl) != 0) {
2403 				continue;
2404 			}
2405 
2406 			mutex_enter(&fire->ire_lock);
2407 			v6gw = fire->ire_gateway_addr_v6;
2408 			mutex_exit(&fire->ire_lock);
2409 
2410 			ip2dbg(("ire_multirt_lookup_v6: fire %p, "
2411 			    "ire_addr %08x, ire_gateway_addr %08x\n",
2412 			    (void *)fire,
2413 			    ntohl(V4_PART_OF_V6(fire->ire_addr_v6)),
2414 			    ntohl(V4_PART_OF_V6(v6gw))));
2415 
2416 			already_resolved = B_FALSE;
2417 
2418 			if (first_cire) {
2419 				ASSERT(cirb);
2420 
2421 				IRB_REFHOLD(cirb);
2422 				/*
2423 				 * For all IRE_CACHE ires for that
2424 				 * destination.
2425 				 */
2426 				for (cire = first_cire;
2427 				    cire != NULL;
2428 				    cire = cire->ire_next) {
2429 
2430 					if (!(cire->ire_flags & RTF_MULTIRT))
2431 						continue;
2432 					if (!IN6_ARE_ADDR_EQUAL(
2433 					    &cire->ire_addr_v6, &v6dst))
2434 						continue;
2435 					if (cire->ire_marks &
2436 					    (IRE_MARK_CONDEMNED|
2437 					    IRE_MARK_HIDDEN))
2438 						continue;
2439 
2440 					if (cire->ire_gw_secattr != NULL &&
2441 					    tsol_ire_match_gwattr(cire,
2442 					    tsl) != 0) {
2443 						continue;
2444 					}
2445 
2446 					/*
2447 					 * Check if the IRE_CACHE's gateway
2448 					 * matches the IRE_HOST's gateway.
2449 					 */
2450 					if (IN6_ARE_ADDR_EQUAL(
2451 					    &cire->ire_gateway_addr_v6,
2452 					    &v6gw)) {
2453 						already_resolved = B_TRUE;
2454 						break;
2455 					}
2456 				}
2457 				IRB_REFRELE(cirb);
2458 			}
2459 
2460 			/*
2461 			 * This route is already resolved;
2462 			 * proceed with next one.
2463 			 */
2464 			if (already_resolved) {
2465 				ip2dbg(("ire_multirt_lookup_v6: found cire %p, "
2466 				    "already resolved\n", (void *)cire));
2467 				continue;
2468 			}
2469 
2470 			/*
2471 			 * The route is unresolved; is it actually
2472 			 * resolvable, i.e. is there a cache or a resolver
2473 			 * for the gateway?
2474 			 */
2475 			gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL,
2476 			    ALL_ZONES, tsl, MATCH_IRE_RECURSIVE |
2477 			    MATCH_IRE_SECATTR, ipst);
2478 
2479 			ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n",
2480 			    (void *)gw_ire));
2481 
2482 			/*
2483 			 * This route can be resolved without any call to the
2484 			 * resolver; if the MULTIRT_CACHEGW flag is set,
2485 			 * give the top priority to this ire and exit the
2486 			 * loop.
2487 			 * This occurs when an resolver reply is processed
2488 			 * through ip_wput_nondata()
2489 			 */
2490 			if ((flags & MULTIRT_CACHEGW) &&
2491 			    (gw_ire != NULL) &&
2492 			    (gw_ire->ire_type & IRE_CACHETABLE)) {
2493 				/*
2494 				 * Release the resolver associated to the
2495 				 * previous candidate best ire, if any.
2496 				 */
2497 				if (best_cire) {
2498 					ire_refrele(best_cire);
2499 					ASSERT(best_fire);
2500 				}
2501 
2502 				best_fire = fire;
2503 				best_cire = gw_ire;
2504 
2505 				ip2dbg(("ire_multirt_lookup_v6: found top prio "
2506 				    "best_fire %p, best_cire %p\n",
2507 				    (void *)best_fire, (void *)best_cire));
2508 				break;
2509 			}
2510 
2511 			/*
2512 			 * Compute the time elapsed since our preceding
2513 			 * attempt to  resolve that route.
2514 			 * If the MULTIRT_USESTAMP flag is set, we take that
2515 			 * route into account only if this time interval
2516 			 * exceeds ip_multirt_resolution_interval;
2517 			 * this prevents us from attempting to resolve a
2518 			 * broken route upon each sending of a packet.
2519 			 */
2520 			delta = lbolt - fire->ire_last_used_time;
2521 			delta = TICK_TO_MSEC(delta);
2522 
2523 			res = (boolean_t)
2524 			    ((delta > ipst->
2525 			    ips_ip_multirt_resolution_interval) ||
2526 			    (!(flags & MULTIRT_USESTAMP)));
2527 
2528 			ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, "
2529 			    "res %d\n",
2530 			    (void *)fire, delta, res));
2531 
2532 			if (res) {
2533 				/*
2534 				 * A resolver exists for the gateway: save
2535 				 * the current IRE_HOST ire as a candidate
2536 				 * best ire. If we later discover that a
2537 				 * top priority ire exists (i.e. no need to
2538 				 * call the resolver), then this new ire
2539 				 * will be preferred to the current one.
2540 				 */
2541 				if (gw_ire != NULL) {
2542 					if (best_fire == NULL) {
2543 						ASSERT(best_cire == NULL);
2544 
2545 						best_fire = fire;
2546 						best_cire = gw_ire;
2547 
2548 						ip2dbg(("ire_multirt_lookup_v6:"
2549 						    "found candidate "
2550 						    "best_fire %p, "
2551 						    "best_cire %p\n",
2552 						    (void *)best_fire,
2553 						    (void *)best_cire));
2554 
2555 						/*
2556 						 * If MULTIRT_CACHEGW is not
2557 						 * set, we ignore the top
2558 						 * priority ires that can
2559 						 * be resolved without any
2560 						 * call to the resolver;
2561 						 * In that case, there is
2562 						 * actually no need
2563 						 * to continue the loop.
2564 						 */
2565 						if (!(flags &
2566 						    MULTIRT_CACHEGW)) {
2567 							break;
2568 						}
2569 						continue;
2570 					}
2571 				} else {
2572 					/*
2573 					 * No resolver for the gateway: the
2574 					 * route is not resolvable.
2575 					 * If the MULTIRT_SETSTAMP flag is
2576 					 * set, we stamp the IRE_HOST ire,
2577 					 * so we will not select it again
2578 					 * during this resolution interval.
2579 					 */
2580 					if (flags & MULTIRT_SETSTAMP)
2581 						fire->ire_last_used_time =
2582 						    lbolt;
2583 				}
2584 			}
2585 
2586 			if (gw_ire != NULL)
2587 				ire_refrele(gw_ire);
2588 		}
2589 	} else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */
2590 
2591 		for (fire = first_fire;
2592 		    fire != NULL;
2593 		    fire = fire->ire_next) {
2594 
2595 			if (!(fire->ire_flags & RTF_MULTIRT))
2596 				continue;
2597 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
2598 				continue;
2599 
2600 			if (fire->ire_gw_secattr != NULL &&
2601 			    tsol_ire_match_gwattr(fire, tsl) != 0) {
2602 				continue;
2603 			}
2604 
2605 			already_resolved = B_FALSE;
2606 
2607 			mutex_enter(&fire->ire_lock);
2608 			v6gw = fire->ire_gateway_addr_v6;
2609 			mutex_exit(&fire->ire_lock);
2610 
2611 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
2612 			    IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl,
2613 			    MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE |
2614 			    MATCH_IRE_SECATTR, ipst);
2615 
2616 			/* No resolver for the gateway; we skip this ire. */
2617 			if (gw_ire == NULL) {
2618 				continue;
2619 			}
2620 
2621 			if (first_cire) {
2622 
2623 				IRB_REFHOLD(cirb);
2624 				/*
2625 				 * For all IRE_CACHE ires for that
2626 				 * destination.
2627 				 */
2628 				for (cire = first_cire;
2629 				    cire != NULL;
2630 				    cire = cire->ire_next) {
2631 
2632 					if (!(cire->ire_flags & RTF_MULTIRT))
2633 						continue;
2634 					if (!IN6_ARE_ADDR_EQUAL(
2635 					    &cire->ire_addr_v6, &v6dst))
2636 						continue;
2637 					if (cire->ire_marks &
2638 					    (IRE_MARK_CONDEMNED|
2639 					    IRE_MARK_HIDDEN))
2640 						continue;
2641 
2642 					if (cire->ire_gw_secattr != NULL &&
2643 					    tsol_ire_match_gwattr(cire,
2644 					    tsl) != 0) {
2645 						continue;
2646 					}
2647 
2648 					/*
2649 					 * Cache entries are linked to the
2650 					 * parent routes using the parent handle
2651 					 * (ire_phandle). If no cache entry has
2652 					 * the same handle as fire, fire is
2653 					 * still unresolved.
2654 					 */
2655 					ASSERT(cire->ire_phandle != 0);
2656 					if (cire->ire_phandle ==
2657 					    fire->ire_phandle) {
2658 						already_resolved = B_TRUE;
2659 						break;
2660 					}
2661 				}
2662 				IRB_REFRELE(cirb);
2663 			}
2664 
2665 			/*
2666 			 * This route is already resolved; proceed with
2667 			 * next one.
2668 			 */
2669 			if (already_resolved) {
2670 				ire_refrele(gw_ire);
2671 				continue;
2672 			}
2673 
2674 			/*
2675 			 * Compute the time elapsed since our preceding
2676 			 * attempt to resolve that route.
2677 			 * If the MULTIRT_USESTAMP flag is set, we take
2678 			 * that route into account only if this time
2679 			 * interval exceeds ip_multirt_resolution_interval;
2680 			 * this prevents us from attempting to resolve a
2681 			 * broken route upon each sending of a packet.
2682 			 */
2683 			delta = lbolt - fire->ire_last_used_time;
2684 			delta = TICK_TO_MSEC(delta);
2685 
2686 			res = (boolean_t)
2687 			    ((delta > ipst->
2688 			    ips_ip_multirt_resolution_interval) ||
2689 			    (!(flags & MULTIRT_USESTAMP)));
2690 
2691 			ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, "
2692 			    "flags %04x, res %d\n",
2693 			    (void *)fire, delta, flags, res));
2694 
2695 			if (res) {
2696 				if (best_cire) {
2697 					/*
2698 					 * Release the resolver associated
2699 					 * to the preceding candidate best
2700 					 * ire, if any.
2701 					 */
2702 					ire_refrele(best_cire);
2703 					ASSERT(best_fire);
2704 				}
2705 				best_fire = fire;
2706 				best_cire = gw_ire;
2707 				continue;
2708 			}
2709 
2710 			ire_refrele(gw_ire);
2711 		}
2712 	}
2713 
2714 	if (best_fire) {
2715 		IRE_REFHOLD(best_fire);
2716 	}
2717 	IRB_REFRELE(firb);
2718 
2719 	/* Release the first IRE_CACHE we initially looked up, if any. */
2720 	if (first_cire)
2721 		ire_refrele(first_cire);
2722 
2723 	/* Found a resolvable route. */
2724 	if (best_fire) {
2725 		ASSERT(best_cire);
2726 
2727 		if (*fire_arg)
2728 			ire_refrele(*fire_arg);
2729 		if (*ire_arg)
2730 			ire_refrele(*ire_arg);
2731 
2732 		/*
2733 		 * Update the passed arguments with the
2734 		 * resolvable multirt route we found
2735 		 */
2736 		*fire_arg = best_fire;
2737 		*ire_arg = best_cire;
2738 
2739 		ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, "
2740 		    "*fire_arg %p, *ire_arg %p\n",
2741 		    (void *)best_fire, (void *)best_cire));
2742 
2743 		return (B_TRUE);
2744 	}
2745 
2746 	ASSERT(best_cire == NULL);
2747 
2748 	ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, "
2749 	    "*ire_arg %p\n",
2750 	    (void *)*fire_arg, (void *)*ire_arg));
2751 
2752 	/* No resolvable route. */
2753 	return (B_FALSE);
2754 }
2755 
2756 
2757 /*
2758  * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp'
2759  * that goes through 'ipif'. As a fallback, a route that goes through
2760  * ipif->ipif_ill can be returned.
2761  */
2762 ire_t *
2763 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp)
2764 {
2765 	ire_t	*ire;
2766 	ire_t	*save_ire = NULL;
2767 	ire_t   *gw_ire;
2768 	irb_t   *irb;
2769 	in6_addr_t v6gw;
2770 	int	match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL;
2771 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
2772 
2773 	ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0,
2774 	    NULL, MATCH_IRE_DEFAULT, ipst);
2775 
2776 	if (ire == NULL)
2777 		return (NULL);
2778 
2779 	irb = ire->ire_bucket;
2780 	ASSERT(irb);
2781 
2782 	IRB_REFHOLD(irb);
2783 	ire_refrele(ire);
2784 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
2785 		if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) ||
2786 		    (ipif->ipif_zoneid != ire->ire_zoneid &&
2787 		    ire->ire_zoneid != ALL_ZONES)) {
2788 			continue;
2789 		}
2790 
2791 		switch (ire->ire_type) {
2792 		case IRE_DEFAULT:
2793 		case IRE_PREFIX:
2794 		case IRE_HOST:
2795 			mutex_enter(&ire->ire_lock);
2796 			v6gw = ire->ire_gateway_addr_v6;
2797 			mutex_exit(&ire->ire_lock);
2798 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
2799 			    IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0,
2800 			    NULL, match_flags, ipst);
2801 
2802 			if (gw_ire != NULL) {
2803 				if (save_ire != NULL) {
2804 					ire_refrele(save_ire);
2805 				}
2806 				IRE_REFHOLD(ire);
2807 				if (gw_ire->ire_ipif == ipif) {
2808 					ire_refrele(gw_ire);
2809 
2810 					IRB_REFRELE(irb);
2811 					return (ire);
2812 				}
2813 				ire_refrele(gw_ire);
2814 				save_ire = ire;
2815 			}
2816 			break;
2817 		case IRE_IF_NORESOLVER:
2818 		case IRE_IF_RESOLVER:
2819 			if (ire->ire_ipif == ipif) {
2820 				if (save_ire != NULL) {
2821 					ire_refrele(save_ire);
2822 				}
2823 				IRE_REFHOLD(ire);
2824 
2825 				IRB_REFRELE(irb);
2826 				return (ire);
2827 			}
2828 			break;
2829 		}
2830 	}
2831 	IRB_REFRELE(irb);
2832 
2833 	return (save_ire);
2834 }
2835 
2836 /*
2837  * This is the implementation of the IPv6 IRE cache lookup procedure.
2838  * Separating the interface from the implementation allows additional
2839  * flexibility when specifying search criteria.
2840  */
2841 static ire_t *
2842 ip6_ctable_lookup_impl(ire_ctable_args_t *margs)
2843 {
2844 	irb_t			*irb_ptr;
2845 	ire_t			*ire;
2846 	ip_stack_t		*ipst = margs->ict_ipst;
2847 
2848 	if ((margs->ict_flags &
2849 	    (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
2850 	    (margs->ict_ipif == NULL)) {
2851 		return (NULL);
2852 	}
2853 
2854 	irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(
2855 	    *((in6_addr_t *)(margs->ict_addr)),
2856 	    ipst->ips_ip6_cache_table_size)];
2857 	rw_enter(&irb_ptr->irb_lock, RW_READER);
2858 	for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) {
2859 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
2860 			continue;
2861 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
2862 		if (ire_match_args_v6(ire, (in6_addr_t *)margs->ict_addr,
2863 		    &ire->ire_mask_v6, (in6_addr_t *)margs->ict_gateway,
2864 		    margs->ict_type, margs->ict_ipif, margs->ict_zoneid, 0,
2865 		    margs->ict_tsl, margs->ict_flags)) {
2866 			IRE_REFHOLD(ire);
2867 			rw_exit(&irb_ptr->irb_lock);
2868 			return (ire);
2869 		}
2870 	}
2871 
2872 	rw_exit(&irb_ptr->irb_lock);
2873 	return (NULL);
2874 }
2875