xref: /titanic_50/usr/src/uts/common/inet/ip/ip6_ire.c (revision 9bbf5ba14ae201f78f3d6b47a9fac96d68649275)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 1990 Mentat Inc.
27  */
28 
29 /*
30  * This file contains routines that manipulate Internet Routing Entries (IREs).
31  */
32 #include <sys/types.h>
33 #include <sys/stream.h>
34 #include <sys/stropts.h>
35 #include <sys/ddi.h>
36 #include <sys/cmn_err.h>
37 
38 #include <sys/systm.h>
39 #include <sys/param.h>
40 #include <sys/socket.h>
41 #include <net/if.h>
42 #include <net/route.h>
43 #include <netinet/in.h>
44 #include <net/if_dl.h>
45 #include <netinet/ip6.h>
46 #include <netinet/icmp6.h>
47 
48 #include <inet/common.h>
49 #include <inet/mi.h>
50 #include <inet/ip.h>
51 #include <inet/ip6.h>
52 #include <inet/ip_ndp.h>
53 #include <inet/ip_if.h>
54 #include <inet/ip_ire.h>
55 #include <inet/ipclassifier.h>
56 #include <inet/nd.h>
57 #include <sys/kmem.h>
58 #include <sys/zone.h>
59 
60 #include <sys/tsol/label.h>
61 #include <sys/tsol/tnet.h>
62 
63 static	ire_t	ire_null;
64 
65 static ire_t	*ire_ihandle_lookup_onlink_v6(ire_t *cire);
66 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr,
67     const in6_addr_t *mask, const in6_addr_t *gateway, int type,
68     const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle,
69     const ts_label_t *tsl, int match_flags);
70 static	ire_t	*ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *,
71     const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *,
72     ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t,
73     const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
74 static	ire_t	*ip6_ctable_lookup_impl(ire_ctable_args_t *);
75 
76 /*
77  * Initialize the ire that is specific to IPv6 part and call
78  * ire_init_common to finish it.
79  */
80 static ire_t *
81 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask,
82     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
83     uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type,
84     ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle,
85     uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc,
86     tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
87 {
88 
89 	/*
90 	 * Reject IRE security attribute creation/initialization
91 	 * if system is not running in Trusted mode.
92 	 */
93 	if ((gc != NULL || gcgrp != NULL) && !is_system_labeled())
94 		return (NULL);
95 
96 
97 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced);
98 	ire->ire_addr_v6 = *v6addr;
99 
100 	if (v6src_addr != NULL)
101 		ire->ire_src_addr_v6 = *v6src_addr;
102 	if (v6mask != NULL) {
103 		ire->ire_mask_v6 = *v6mask;
104 		ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6);
105 	}
106 	if (v6gateway != NULL)
107 		ire->ire_gateway_addr_v6 = *v6gateway;
108 
109 	if (type == IRE_CACHE && v6cmask != NULL)
110 		ire->ire_cmask_v6 = *v6cmask;
111 
112 	/*
113 	 * Multirouted packets need to have a fragment header added so that
114 	 * the receiver is able to discard duplicates according to their
115 	 * fragment identifier.
116 	 */
117 	if (type == IRE_CACHE && (flags & RTF_MULTIRT)) {
118 		ire->ire_frag_flag = IPH_FRAG_HDR;
119 	}
120 
121 	/* ire_init_common will free the mblks upon encountering any failure */
122 	if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif,
123 	    phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst))
124 		return (NULL);
125 
126 	return (ire);
127 }
128 
129 /*
130  * Similar to ire_create_v6 except that it is called only when
131  * we want to allocate ire as an mblk e.g. we have a external
132  * resolver. Do we need this in IPv6 ?
133  *
134  * IPv6 initializes the ire_nce in ire_add_v6, which expects to
135  * find the ire_nce to be null when it is called. So, although
136  * we have a src_nce parameter (in the interest of matching up with
137  * the argument list of the v4 version), we ignore the src_nce
138  * argument here.
139  */
140 /* ARGSUSED */
141 ire_t *
142 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
143     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
144     nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type,
145     ipif_t *ipif, const in6_addr_t *v6cmask,
146     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
147     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
148 {
149 	ire_t	*ire;
150 	ire_t	*ret_ire;
151 	mblk_t	*mp;
152 
153 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
154 
155 	/* Allocate the new IRE. */
156 	mp = allocb(sizeof (ire_t), BPRI_MED);
157 	if (mp == NULL) {
158 		ip1dbg(("ire_create_mp_v6: alloc failed\n"));
159 		return (NULL);
160 	}
161 
162 	ire = (ire_t *)mp->b_rptr;
163 	mp->b_wptr = (uchar_t *)&ire[1];
164 
165 	/* Start clean. */
166 	*ire = ire_null;
167 	ire->ire_mp = mp;
168 	mp->b_datap->db_type = IRE_DB_TYPE;
169 
170 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
171 	    NULL, rfq, stq, type, ipif, v6cmask, phandle,
172 	    ihandle, flags, ulp_info, gc, gcgrp, ipst);
173 
174 	if (ret_ire == NULL) {
175 		freeb(ire->ire_mp);
176 		return (NULL);
177 	}
178 	return (ire);
179 }
180 
181 /*
182  * ire_create_v6 is called to allocate and initialize a new IRE.
183  *
184  * NOTE : This is called as writer sometimes though not required
185  * by this function.
186  *
187  * See comments above ire_create_mp_v6() for the rationale behind the
188  * unused src_nce argument.
189  */
190 /* ARGSUSED */
191 ire_t *
192 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
193     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
194     uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq,
195     ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask,
196     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
197     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
198 {
199 	ire_t	*ire;
200 	ire_t	*ret_ire;
201 
202 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
203 
204 	ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
205 	if (ire == NULL) {
206 		ip1dbg(("ire_create_v6: alloc failed\n"));
207 		return (NULL);
208 	}
209 	*ire = ire_null;
210 
211 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
212 	    max_fragp, rfq, stq, type, ipif, v6cmask, phandle,
213 	    ihandle, flags, ulp_info, gc, gcgrp, ipst);
214 
215 	if (ret_ire == NULL) {
216 		kmem_cache_free(ire_cache, ire);
217 		return (NULL);
218 	}
219 	ASSERT(ret_ire == ire);
220 	return (ire);
221 }
222 
223 /*
224  * Find an IRE_INTERFACE for the multicast group.
225  * Allows different routes for multicast addresses
226  * in the unicast routing table (akin to FF::0/8 but could be more specific)
227  * which point at different interfaces. This is used when IPV6_MULTICAST_IF
228  * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't
229  * specify the interface to join on.
230  *
231  * Supports link-local addresses by following the ipif/ill when recursing.
232  */
233 ire_t *
234 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst)
235 {
236 	ire_t	*ire;
237 	ipif_t	*ipif = NULL;
238 	int	match_flags = MATCH_IRE_TYPE;
239 	in6_addr_t gw_addr_v6;
240 
241 	ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL,
242 	    zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst);
243 
244 	/* We search a resolvable ire in case of multirouting. */
245 	if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) {
246 		ire_t *cire = NULL;
247 		/*
248 		 * If the route is not resolvable, the looked up ire
249 		 * may be changed here. In that case, ire_multirt_lookup_v6()
250 		 * IRE_REFRELE the original ire and change it.
251 		 */
252 		(void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW,
253 		    NULL, ipst);
254 		if (cire != NULL)
255 			ire_refrele(cire);
256 	}
257 	if (ire == NULL)
258 		return (NULL);
259 	/*
260 	 * Make sure we follow ire_ipif.
261 	 *
262 	 * We need to determine the interface route through
263 	 * which the gateway will be reached.
264 	 */
265 	if (ire->ire_ipif != NULL) {
266 		ipif = ire->ire_ipif;
267 		match_flags |= MATCH_IRE_ILL;
268 	}
269 
270 	switch (ire->ire_type) {
271 	case IRE_DEFAULT:
272 	case IRE_PREFIX:
273 	case IRE_HOST:
274 		mutex_enter(&ire->ire_lock);
275 		gw_addr_v6 = ire->ire_gateway_addr_v6;
276 		mutex_exit(&ire->ire_lock);
277 		ire_refrele(ire);
278 		ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0,
279 		    IRE_INTERFACE, ipif, NULL, zoneid, 0,
280 		    NULL, match_flags, ipst);
281 		return (ire);
282 	case IRE_IF_NORESOLVER:
283 	case IRE_IF_RESOLVER:
284 		return (ire);
285 	default:
286 		ire_refrele(ire);
287 		return (NULL);
288 	}
289 }
290 
291 /*
292  * Return any local address.  We use this to target ourselves
293  * when the src address was specified as 'default'.
294  * Preference for IRE_LOCAL entries.
295  */
296 ire_t *
297 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst)
298 {
299 	ire_t	*ire;
300 	irb_t	*irb;
301 	ire_t	*maybe = NULL;
302 	int i;
303 
304 	for (i = 0; i < ipst->ips_ip6_cache_table_size;  i++) {
305 		irb = &ipst->ips_ip_cache_table_v6[i];
306 		if (irb->irb_ire == NULL)
307 			continue;
308 		rw_enter(&irb->irb_lock, RW_READER);
309 		for (ire = irb->irb_ire; ire; ire = ire->ire_next) {
310 			if ((ire->ire_marks & IRE_MARK_CONDEMNED) ||
311 			    ire->ire_zoneid != zoneid &&
312 			    ire->ire_zoneid != ALL_ZONES)
313 				continue;
314 			switch (ire->ire_type) {
315 			case IRE_LOOPBACK:
316 				if (maybe == NULL) {
317 					IRE_REFHOLD(ire);
318 					maybe = ire;
319 				}
320 				break;
321 			case IRE_LOCAL:
322 				if (maybe != NULL) {
323 					ire_refrele(maybe);
324 				}
325 				IRE_REFHOLD(ire);
326 				rw_exit(&irb->irb_lock);
327 				return (ire);
328 			}
329 		}
330 		rw_exit(&irb->irb_lock);
331 	}
332 	return (maybe);
333 }
334 
335 /*
336  * This function takes a mask and returns number of bits set in the
337  * mask (the represented prefix length).  Assumes a contiguous mask.
338  */
339 int
340 ip_mask_to_plen_v6(const in6_addr_t *v6mask)
341 {
342 	int		bits;
343 	int		plen = IPV6_ABITS;
344 	int		i;
345 
346 	for (i = 3; i >= 0; i--) {
347 		if (v6mask->s6_addr32[i] == 0) {
348 			plen -= 32;
349 			continue;
350 		}
351 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
352 		if (bits == 0)
353 			break;
354 		plen -= bits;
355 	}
356 
357 	return (plen);
358 }
359 
360 /*
361  * Convert a prefix length to the mask for that prefix.
362  * Returns the argument bitmask.
363  */
364 in6_addr_t *
365 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
366 {
367 	uint32_t *ptr;
368 
369 	if (plen < 0 || plen > IPV6_ABITS)
370 		return (NULL);
371 	*bitmask = ipv6_all_zeros;
372 
373 	ptr = (uint32_t *)bitmask;
374 	while (plen > 32) {
375 		*ptr++ = 0xffffffffU;
376 		plen -= 32;
377 	}
378 	*ptr = htonl(0xffffffffU << (32 - plen));
379 	return (bitmask);
380 }
381 
382 /*
383  * Add a fully initialized IRE to an appropriate
384  * table based on ire_type.
385  *
386  * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and
387  * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT.
388  *
389  * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK
390  * and IRE_CACHE.
391  *
392  * NOTE : This function is called as writer though not required
393  * by this function.
394  */
395 int
396 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func)
397 {
398 	ire_t	*ire1;
399 	int	mask_table_index;
400 	irb_t	*irb_ptr;
401 	ire_t	**irep;
402 	int	flags;
403 	ire_t	*pire = NULL;
404 	ill_t	*stq_ill;
405 	boolean_t	ndp_g_lock_held = B_FALSE;
406 	ire_t	*ire = *ire_p;
407 	int	error;
408 	ip_stack_t	*ipst = ire->ire_ipst;
409 	uint_t	marks = 0;
410 
411 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
412 	ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */
413 	ASSERT(ire->ire_nce == NULL);
414 
415 	/*
416 	 * IREs with source addresses hosted on interfaces that are under IPMP
417 	 * should be hidden so that applications don't accidentally end up
418 	 * sending packets with test addresses as their source addresses, or
419 	 * sending out interfaces that are e.g. IFF_INACTIVE.  Hide them here.
420 	 * (We let IREs with unspecified source addresses slip through since
421 	 * ire_send_v6() will delete them automatically.)
422 	 */
423 	if (ire->ire_ipif != NULL && IS_UNDER_IPMP(ire->ire_ipif->ipif_ill) &&
424 	    !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_src_addr_v6)) {
425 		DTRACE_PROBE1(ipmp__mark__testhidden, ire_t *, ire);
426 		marks |= IRE_MARK_TESTHIDDEN;
427 	}
428 
429 	/* Find the appropriate list head. */
430 	switch (ire->ire_type) {
431 	case IRE_HOST:
432 		ire->ire_mask_v6 = ipv6_all_ones;
433 		ire->ire_masklen = IPV6_ABITS;
434 		ire->ire_marks |= marks;
435 		if ((ire->ire_flags & RTF_SETSRC) == 0)
436 			ire->ire_src_addr_v6 = ipv6_all_zeros;
437 		break;
438 	case IRE_CACHE:
439 		ire->ire_mask_v6 = ipv6_all_ones;
440 		ire->ire_masklen = IPV6_ABITS;
441 		ire->ire_marks |= marks;
442 		break;
443 	case IRE_LOCAL:
444 	case IRE_LOOPBACK:
445 		ire->ire_mask_v6 = ipv6_all_ones;
446 		ire->ire_masklen = IPV6_ABITS;
447 		break;
448 	case IRE_PREFIX:
449 	case IRE_DEFAULT:
450 		ire->ire_marks |= marks;
451 		if ((ire->ire_flags & RTF_SETSRC) == 0)
452 			ire->ire_src_addr_v6 = ipv6_all_zeros;
453 		break;
454 	case IRE_IF_RESOLVER:
455 	case IRE_IF_NORESOLVER:
456 		ire->ire_marks |= marks;
457 		break;
458 	default:
459 		printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n",
460 		    (void *)ire, ire->ire_type);
461 		ire_delete(ire);
462 		*ire_p = NULL;
463 		return (EINVAL);
464 	}
465 
466 	/* Make sure the address is properly masked. */
467 	V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6);
468 
469 	if ((ire->ire_type & IRE_CACHETABLE) == 0) {
470 		/* IRE goes into Forward Table */
471 		mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6);
472 		if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) ==
473 		    NULL) {
474 			irb_t *ptr;
475 			int i;
476 
477 			ptr = (irb_t *)mi_zalloc((
478 			    ipst->ips_ip6_ftable_hash_size * sizeof (irb_t)));
479 			if (ptr == NULL) {
480 				ire_delete(ire);
481 				*ire_p = NULL;
482 				return (ENOMEM);
483 			}
484 			for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
485 				rw_init(&ptr[i].irb_lock, NULL,
486 				    RW_DEFAULT, NULL);
487 			}
488 			mutex_enter(&ipst->ips_ire_ft_init_lock);
489 			if (ipst->ips_ip_forwarding_table_v6[
490 			    mask_table_index] == NULL) {
491 				ipst->ips_ip_forwarding_table_v6[
492 				    mask_table_index] = ptr;
493 				mutex_exit(&ipst->ips_ire_ft_init_lock);
494 			} else {
495 				/*
496 				 * Some other thread won the race in
497 				 * initializing the forwarding table at the
498 				 * same index.
499 				 */
500 				mutex_exit(&ipst->ips_ire_ft_init_lock);
501 				for (i = 0; i < ipst->ips_ip6_ftable_hash_size;
502 				    i++) {
503 					rw_destroy(&ptr[i].irb_lock);
504 				}
505 				mi_free(ptr);
506 			}
507 		}
508 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][
509 		    IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6,
510 		    ipst->ips_ip6_ftable_hash_size)]);
511 	} else {
512 		irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(
513 		    ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]);
514 	}
515 	/*
516 	 * For xresolv interfaces (v6 interfaces with an external
517 	 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6
518 	 * are unable to prevent the deletion of the interface route
519 	 * while adding an IRE_CACHE for an on-link destination
520 	 * in the IRE_IF_RESOLVER case, since the ire has to go to
521 	 * the external resolver and return. We can't do a REFHOLD on the
522 	 * associated interface ire for fear of the message being freed
523 	 * if the external resolver can't resolve the address.
524 	 * Here we look up the interface ire in the forwarding table
525 	 * and make sure that the interface route has not been deleted.
526 	 */
527 	if (ire->ire_type == IRE_CACHE &&
528 	    IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) &&
529 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) &&
530 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) {
531 
532 		pire = ire_ihandle_lookup_onlink_v6(ire);
533 		if (pire == NULL) {
534 			ire_delete(ire);
535 			*ire_p = NULL;
536 			return (EINVAL);
537 		}
538 		/* Prevent pire from getting deleted */
539 		IRB_REFHOLD(pire->ire_bucket);
540 		/* Has it been removed already? */
541 		if (pire->ire_marks & IRE_MARK_CONDEMNED) {
542 			IRB_REFRELE(pire->ire_bucket);
543 			ire_refrele(pire);
544 			ire_delete(ire);
545 			*ire_p = NULL;
546 			return (EINVAL);
547 		}
548 	}
549 
550 	flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW);
551 	/*
552 	 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check
553 	 * for duplicates because :
554 	 *
555 	 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be
556 	 *    pointing at different ills. A real duplicate is
557 	 *    a match on both ire_ipif and ire_stq.
558 	 *
559 	 * 2) We could have multiple packets trying to create
560 	 *    an IRE_CACHE for the same ill.
561 	 *
562 	 * Rather than looking at the packet, we depend on the above for
563 	 * MATCH_IRE_ILL here.
564 	 *
565 	 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have
566 	 * multiple IRE_CACHES for an ill for the same destination
567 	 * with various scoped addresses i.e represented by ipifs.
568 	 *
569 	 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES.
570 	 */
571 	if (ire->ire_ipif != NULL)
572 		flags |= MATCH_IRE_IPIF;
573 
574 	/*
575 	 * If we are creating a hidden IRE, make sure we search for
576 	 * hidden IREs when searching for duplicates below.
577 	 * Otherwise, we might find an IRE on some other interface
578 	 * that's not marked hidden.
579 	 */
580 	if (ire->ire_marks & IRE_MARK_TESTHIDDEN)
581 		flags |= MATCH_IRE_MARK_TESTHIDDEN;
582 
583 	/*
584 	 * Start the atomic add of the ire. Grab the ill locks,
585 	 * ill_g_usesrc_lock and the bucket lock. Check for condemned.
586 	 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself.
587 	 */
588 	if (ire->ire_type == IRE_CACHE) {
589 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
590 		ndp_g_lock_held = B_TRUE;
591 	}
592 
593 	/*
594 	 * If ipif or ill is changing ire_atomic_start() may queue the
595 	 * request and return EINPROGRESS.
596 	 */
597 
598 	error = ire_atomic_start(irb_ptr, ire, q, mp, func);
599 	if (error != 0) {
600 		if (ndp_g_lock_held)
601 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
602 		/*
603 		 * We don't know whether it is a valid ipif or not.
604 		 * So, set it to NULL. This assumes that the ire has not added
605 		 * a reference to the ipif.
606 		 */
607 		ire->ire_ipif = NULL;
608 		ire_delete(ire);
609 		if (pire != NULL) {
610 			IRB_REFRELE(pire->ire_bucket);
611 			ire_refrele(pire);
612 		}
613 		*ire_p = NULL;
614 		return (error);
615 	}
616 	/*
617 	 * To avoid creating ires having stale values for the ire_max_frag
618 	 * we get the latest value atomically here. For more details
619 	 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE
620 	 * in ip_rput_dlpi_writer
621 	 */
622 	if (ire->ire_max_fragp == NULL) {
623 		if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6))
624 			ire->ire_max_frag = ire->ire_ipif->ipif_mtu;
625 		else
626 			ire->ire_max_frag = pire->ire_max_frag;
627 	} else {
628 		uint_t  max_frag;
629 
630 		max_frag = *ire->ire_max_fragp;
631 		ire->ire_max_fragp = NULL;
632 		ire->ire_max_frag = max_frag;
633 	}
634 
635 	/*
636 	 * Atomically check for duplicate and insert in the table.
637 	 */
638 	for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
639 		if (ire1->ire_marks & IRE_MARK_CONDEMNED)
640 			continue;
641 
642 		if (ire->ire_type == IRE_CACHE) {
643 			/*
644 			 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES.
645 			 * As ire_ipif and ire_stq could point to two
646 			 * different ills, we can't pass just ire_ipif to
647 			 * ire_match_args and get a match on both ills.
648 			 * This is just needed for duplicate checks here and
649 			 * so we don't add an extra argument to
650 			 * ire_match_args for this. Do it locally.
651 			 *
652 			 * NOTE : Currently there is no part of the code
653 			 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL
654 			 * match for IRE_CACHEs. Thus we don't want to
655 			 * extend the arguments to ire_match_args_v6.
656 			 */
657 			if (ire1->ire_stq != ire->ire_stq)
658 				continue;
659 			/*
660 			 * Multiroute IRE_CACHEs for a given destination can
661 			 * have the same ire_ipif, typically if their source
662 			 * address is forced using RTF_SETSRC, and the same
663 			 * send-to queue. We differentiate them using the parent
664 			 * handle.
665 			 */
666 			if ((ire1->ire_flags & RTF_MULTIRT) &&
667 			    (ire->ire_flags & RTF_MULTIRT) &&
668 			    (ire1->ire_phandle != ire->ire_phandle))
669 				continue;
670 		}
671 		if (ire1->ire_zoneid != ire->ire_zoneid)
672 			continue;
673 		if (ire_match_args_v6(ire1, &ire->ire_addr_v6,
674 		    &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
675 		    ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL,
676 		    flags)) {
677 			/*
678 			 * Return the old ire after doing a REFHOLD.
679 			 * As most of the callers continue to use the IRE
680 			 * after adding, we return a held ire. This will
681 			 * avoid a lookup in the caller again. If the callers
682 			 * don't want to use it, they need to do a REFRELE.
683 			 */
684 			ip1dbg(("found dup ire existing %p new %p",
685 			    (void *)ire1, (void *)ire));
686 			IRE_REFHOLD(ire1);
687 			if (ndp_g_lock_held)
688 				mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
689 			ire_atomic_end(irb_ptr, ire);
690 			ire_delete(ire);
691 			if (pire != NULL) {
692 				/*
693 				 * Assert that it is
694 				 * not yet removed from the list.
695 				 */
696 				ASSERT(pire->ire_ptpn != NULL);
697 				IRB_REFRELE(pire->ire_bucket);
698 				ire_refrele(pire);
699 			}
700 			*ire_p = ire1;
701 			return (0);
702 		}
703 	}
704 	if (ire->ire_type == IRE_CACHE) {
705 		const in6_addr_t *addr_v6;
706 		ill_t	*ill = ire_to_ill(ire);
707 		char	buf[INET6_ADDRSTRLEN];
708 		nce_t	*nce;
709 
710 		/*
711 		 * All IRE_CACHE types must have a nce.  If this is
712 		 * not the case the entry will not be added. We need
713 		 * to make sure that if somebody deletes the nce
714 		 * after we looked up, they will find this ire and
715 		 * delete the ire. To delete this ire one needs the
716 		 * bucket lock which we are still holding here. So,
717 		 * even if the nce gets deleted after we looked up,
718 		 * this ire  will get deleted.
719 		 *
720 		 * NOTE : Don't need the ire_lock for accessing
721 		 * ire_gateway_addr_v6 as it is appearing first
722 		 * time on the list and rts_setgwr_v6 could not
723 		 * be changing this.
724 		 */
725 		addr_v6 = &ire->ire_gateway_addr_v6;
726 		if (IN6_IS_ADDR_UNSPECIFIED(addr_v6))
727 			addr_v6 = &ire->ire_addr_v6;
728 
729 		/* nce fastpath is per-ill; don't match across illgrp */
730 		nce = ndp_lookup_v6(ill, B_FALSE, addr_v6, B_TRUE);
731 		if (nce == NULL)
732 			goto failed;
733 
734 		/* Pair of refhold, refrele just to get the tracing right */
735 		NCE_REFHOLD_TO_REFHOLD_NOTR(nce);
736 		/*
737 		 * Atomically make sure that new IREs don't point
738 		 * to an NCE that is logically deleted (CONDEMNED).
739 		 * ndp_delete() first marks the NCE CONDEMNED.
740 		 * This ensures that the nce_refcnt won't increase
741 		 * due to new nce_lookups or due to addition of new IREs
742 		 * pointing to this NCE. Then ndp_delete() cleans up
743 		 * existing references. If we don't do it atomically here,
744 		 * ndp_delete() -> nce_ire_delete() will not be able to
745 		 * clean up the IRE list completely, and the nce_refcnt
746 		 * won't go down to zero.
747 		 */
748 		mutex_enter(&nce->nce_lock);
749 		if (ill->ill_flags & ILLF_XRESOLV) {
750 			/*
751 			 * If we used an external resolver, we may not
752 			 * have gone through neighbor discovery to get here.
753 			 * Must update the nce_state before the next check.
754 			 */
755 			if (nce->nce_state == ND_INCOMPLETE)
756 				nce->nce_state = ND_REACHABLE;
757 		}
758 		if (nce->nce_state == ND_INCOMPLETE ||
759 		    (nce->nce_flags & NCE_F_CONDEMNED) ||
760 		    (nce->nce_state == ND_UNREACHABLE)) {
761 failed:
762 			if (ndp_g_lock_held)
763 				mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
764 			if (nce != NULL)
765 				mutex_exit(&nce->nce_lock);
766 			ire_atomic_end(irb_ptr, ire);
767 			ip1dbg(("ire_add_v6: No nce for dst %s \n",
768 			    inet_ntop(AF_INET6, &ire->ire_addr_v6,
769 			    buf, sizeof (buf))));
770 			ire_delete(ire);
771 			if (pire != NULL) {
772 				/*
773 				 * Assert that it is
774 				 * not yet removed from the list.
775 				 */
776 				ASSERT(pire->ire_ptpn != NULL);
777 				IRB_REFRELE(pire->ire_bucket);
778 				ire_refrele(pire);
779 			}
780 			if (nce != NULL)
781 				NCE_REFRELE_NOTR(nce);
782 			*ire_p = NULL;
783 			return (EINVAL);
784 		} else {
785 			ire->ire_nce = nce;
786 		}
787 		mutex_exit(&nce->nce_lock);
788 	}
789 	/*
790 	 * Find the first entry that matches ire_addr - provides
791 	 * tail insertion. *irep will be null if no match.
792 	 */
793 	irep = (ire_t **)irb_ptr;
794 	while ((ire1 = *irep) != NULL &&
795 	    !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6))
796 		irep = &ire1->ire_next;
797 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
798 
799 	if (*irep != NULL) {
800 		/*
801 		 * Find the last ire which matches ire_addr_v6.
802 		 * Needed to do tail insertion among entries with the same
803 		 * ire_addr_v6.
804 		 */
805 		while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6,
806 		    &ire1->ire_addr_v6)) {
807 			irep = &ire1->ire_next;
808 			ire1 = *irep;
809 			if (ire1 == NULL)
810 				break;
811 		}
812 	}
813 
814 	if (ire->ire_type == IRE_DEFAULT) {
815 		/*
816 		 * We keep a count of default gateways which is used when
817 		 * assigning them as routes.
818 		 */
819 		ipst->ips_ipv6_ire_default_count++;
820 		ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */
821 	}
822 	/* Insert at *irep */
823 	ire1 = *irep;
824 	if (ire1 != NULL)
825 		ire1->ire_ptpn = &ire->ire_next;
826 	ire->ire_next = ire1;
827 	/* Link the new one in. */
828 	ire->ire_ptpn = irep;
829 	/*
830 	 * ire_walk routines de-reference ire_next without holding
831 	 * a lock. Before we point to the new ire, we want to make
832 	 * sure the store that sets the ire_next of the new ire
833 	 * reaches global visibility, so that ire_walk routines
834 	 * don't see a truncated list of ires i.e if the ire_next
835 	 * of the new ire gets set after we do "*irep = ire" due
836 	 * to re-ordering, the ire_walk thread will see a NULL
837 	 * once it accesses the ire_next of the new ire.
838 	 * membar_producer() makes sure that the following store
839 	 * happens *after* all of the above stores.
840 	 */
841 	membar_producer();
842 	*irep = ire;
843 	ire->ire_bucket = irb_ptr;
844 	/*
845 	 * We return a bumped up IRE above. Keep it symmetrical
846 	 * so that the callers will always have to release. This
847 	 * helps the callers of this function because they continue
848 	 * to use the IRE after adding and hence they don't have to
849 	 * lookup again after we return the IRE.
850 	 *
851 	 * NOTE : We don't have to use atomics as this is appearing
852 	 * in the list for the first time and no one else can bump
853 	 * up the reference count on this yet.
854 	 */
855 	IRE_REFHOLD_LOCKED(ire);
856 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted);
857 	irb_ptr->irb_ire_cnt++;
858 	if (ire->ire_marks & IRE_MARK_TEMPORARY)
859 		irb_ptr->irb_tmp_ire_cnt++;
860 
861 	if (ire->ire_ipif != NULL) {
862 		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ire->ire_ipif,
863 		    (char *), "ire", (void *), ire);
864 		ire->ire_ipif->ipif_ire_cnt++;
865 		if (ire->ire_stq != NULL) {
866 			stq_ill = (ill_t *)ire->ire_stq->q_ptr;
867 			DTRACE_PROBE3(ill__incr__cnt, (ill_t *), stq_ill,
868 			    (char *), "ire", (void *), ire);
869 			stq_ill->ill_ire_cnt++;
870 		}
871 	} else {
872 		ASSERT(ire->ire_stq == NULL);
873 	}
874 
875 	if (ndp_g_lock_held)
876 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
877 	ire_atomic_end(irb_ptr, ire);
878 
879 	if (pire != NULL) {
880 		/* Assert that it is not removed from the list yet */
881 		ASSERT(pire->ire_ptpn != NULL);
882 		IRB_REFRELE(pire->ire_bucket);
883 		ire_refrele(pire);
884 	}
885 
886 	if (ire->ire_type != IRE_CACHE) {
887 		/*
888 		 * For ire's with with host mask see if there is an entry
889 		 * in the cache. If there is one flush the whole cache as
890 		 * there might be multiple entries due to RTF_MULTIRT (CGTP).
891 		 * If no entry is found than there is no need to flush the
892 		 * cache.
893 		 */
894 
895 		if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) {
896 			ire_t *lire;
897 			lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL,
898 			    IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
899 			    ipst);
900 			if (lire != NULL) {
901 				ire_refrele(lire);
902 				ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
903 			}
904 		} else {
905 			ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
906 		}
907 	}
908 
909 	*ire_p = ire;
910 	return (0);
911 }
912 
913 /*
914  * Search for all HOST REDIRECT routes that are
915  * pointing at the specified gateway and
916  * delete them. This routine is called only
917  * when a default gateway is going away.
918  */
919 static void
920 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst)
921 {
922 	irb_t *irb_ptr;
923 	irb_t *irb;
924 	ire_t *ire;
925 	in6_addr_t gw_addr_v6;
926 	int i;
927 
928 	/* get the hash table for HOST routes */
929 	irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)];
930 	if (irb_ptr == NULL)
931 		return;
932 	for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) {
933 		irb = &irb_ptr[i];
934 		IRB_REFHOLD(irb);
935 		for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
936 			if (!(ire->ire_flags & RTF_DYNAMIC))
937 				continue;
938 			mutex_enter(&ire->ire_lock);
939 			gw_addr_v6 = ire->ire_gateway_addr_v6;
940 			mutex_exit(&ire->ire_lock);
941 			if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway))
942 				ire_delete(ire);
943 		}
944 		IRB_REFRELE(irb);
945 	}
946 }
947 
948 /*
949  * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart
950  * of ip_ire_clookup_and_delete. The difference being this function does not
951  * return any value. IPv6 processing of a gratuitous ARP, as it stands, is
952  * different than IPv4 in that, regardless of the presence of a cache entry
953  * for this address, an ire_walk_v6 is done. Another difference is that unlike
954  * in the case of IPv4 this does not take an ipif_t argument, since it is only
955  * called by ip_arp_news and the match is always only on the address.
956  */
957 void
958 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst)
959 {
960 	irb_t		*irb;
961 	ire_t		*cire;
962 	boolean_t	found = B_FALSE;
963 
964 	irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
965 	    ipst->ips_ip6_cache_table_size)];
966 	IRB_REFHOLD(irb);
967 	for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) {
968 		if (cire->ire_marks & IRE_MARK_CONDEMNED)
969 			continue;
970 		if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) {
971 
972 			/* This signifies start of a match */
973 			if (!found)
974 				found = B_TRUE;
975 			if (cire->ire_type == IRE_CACHE) {
976 				if (cire->ire_nce != NULL)
977 					ndp_delete(cire->ire_nce);
978 				ire_delete_v6(cire);
979 			}
980 		/* End of the match */
981 		} else if (found)
982 			break;
983 	}
984 	IRB_REFRELE(irb);
985 }
986 
987 /*
988  * Delete the specified IRE.
989  * All calls should use ire_delete().
990  * Sometimes called as writer though not required by this function.
991  *
992  * NOTE : This function is called only if the ire was added
993  * in the list.
994  */
995 void
996 ire_delete_v6(ire_t *ire)
997 {
998 	in6_addr_t gw_addr_v6;
999 	ip_stack_t	*ipst = ire->ire_ipst;
1000 
1001 	ASSERT(ire->ire_refcnt >= 1);
1002 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
1003 
1004 	if (ire->ire_type != IRE_CACHE)
1005 		ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
1006 	if (ire->ire_type == IRE_DEFAULT) {
1007 		/*
1008 		 * when a default gateway is going away
1009 		 * delete all the host redirects pointing at that
1010 		 * gateway.
1011 		 */
1012 		mutex_enter(&ire->ire_lock);
1013 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1014 		mutex_exit(&ire->ire_lock);
1015 		ire_delete_host_redirects_v6(&gw_addr_v6, ipst);
1016 	}
1017 }
1018 
1019 /*
1020  * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect
1021  * entries.
1022  */
1023 /*ARGSUSED1*/
1024 void
1025 ire_delete_cache_v6(ire_t *ire, char *arg)
1026 {
1027 	char    addrstr1[INET6_ADDRSTRLEN];
1028 	char    addrstr2[INET6_ADDRSTRLEN];
1029 
1030 	if ((ire->ire_type & IRE_CACHE) ||
1031 	    (ire->ire_flags & RTF_DYNAMIC)) {
1032 		ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n",
1033 		    inet_ntop(AF_INET6, &ire->ire_addr_v6,
1034 		    addrstr1, sizeof (addrstr1)),
1035 		    ire->ire_type,
1036 		    inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6,
1037 		    addrstr2, sizeof (addrstr2))));
1038 		ire_delete(ire);
1039 	}
1040 
1041 }
1042 
1043 /*
1044  * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries
1045  * that have a given gateway address.
1046  */
1047 void
1048 ire_delete_cache_gw_v6(ire_t *ire, char *addr)
1049 {
1050 	in6_addr_t	*gw_addr = (in6_addr_t *)addr;
1051 	char		buf1[INET6_ADDRSTRLEN];
1052 	char		buf2[INET6_ADDRSTRLEN];
1053 	in6_addr_t	ire_gw_addr_v6;
1054 
1055 	if (!(ire->ire_type & IRE_CACHE) &&
1056 	    !(ire->ire_flags & RTF_DYNAMIC))
1057 		return;
1058 
1059 	mutex_enter(&ire->ire_lock);
1060 	ire_gw_addr_v6 = ire->ire_gateway_addr_v6;
1061 	mutex_exit(&ire->ire_lock);
1062 
1063 	if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) {
1064 		ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n",
1065 		    inet_ntop(AF_INET6, &ire->ire_src_addr_v6,
1066 		    buf1, sizeof (buf1)),
1067 		    ire->ire_type,
1068 		    inet_ntop(AF_INET6, &ire_gw_addr_v6,
1069 		    buf2, sizeof (buf2))));
1070 		ire_delete(ire);
1071 	}
1072 }
1073 
1074 /*
1075  * Remove all IRE_CACHE entries that match
1076  * the ire specified.  (Sometimes called
1077  * as writer though not required by this function.)
1078  *
1079  * The flag argument indicates if the
1080  * flush request is due to addition
1081  * of new route (IRE_FLUSH_ADD) or deletion of old
1082  * route (IRE_FLUSH_DELETE).
1083  *
1084  * This routine takes only the IREs from the forwarding
1085  * table and flushes the corresponding entries from
1086  * the cache table.
1087  *
1088  * When flushing due to the deletion of an old route, it
1089  * just checks the cache handles (ire_phandle and ire_ihandle) and
1090  * deletes the ones that match.
1091  *
1092  * When flushing due to the creation of a new route, it checks
1093  * if a cache entry's address matches the one in the IRE and
1094  * that the cache entry's parent has a less specific mask than the
1095  * one in IRE. The destination of such a cache entry could be the
1096  * gateway for other cache entries, so we need to flush those as
1097  * well by looking for gateway addresses matching the IRE's address.
1098  */
1099 void
1100 ire_flush_cache_v6(ire_t *ire, int flag)
1101 {
1102 	int i;
1103 	ire_t *cire;
1104 	irb_t *irb;
1105 	ip_stack_t	*ipst = ire->ire_ipst;
1106 
1107 	if (ire->ire_type & IRE_CACHE)
1108 		return;
1109 
1110 	/*
1111 	 * If a default is just created, there is no point
1112 	 * in going through the cache, as there will not be any
1113 	 * cached ires.
1114 	 */
1115 	if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD)
1116 		return;
1117 	if (flag == IRE_FLUSH_ADD) {
1118 		/*
1119 		 * This selective flush is
1120 		 * due to the addition of
1121 		 * new IRE.
1122 		 */
1123 		for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
1124 			irb = &ipst->ips_ip_cache_table_v6[i];
1125 			if ((cire = irb->irb_ire) == NULL)
1126 				continue;
1127 			IRB_REFHOLD(irb);
1128 			for (cire = irb->irb_ire; cire != NULL;
1129 			    cire = cire->ire_next) {
1130 				if (cire->ire_type != IRE_CACHE)
1131 					continue;
1132 				/*
1133 				 * If 'cire' belongs to the same subnet
1134 				 * as the new ire being added, and 'cire'
1135 				 * is derived from a prefix that is less
1136 				 * specific than the new ire being added,
1137 				 * we need to flush 'cire'; for instance,
1138 				 * when a new interface comes up.
1139 				 */
1140 				if ((V6_MASK_EQ_2(cire->ire_addr_v6,
1141 				    ire->ire_mask_v6, ire->ire_addr_v6) &&
1142 				    (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <=
1143 				    ire->ire_masklen))) {
1144 					ire_delete(cire);
1145 					continue;
1146 				}
1147 				/*
1148 				 * This is the case when the ire_gateway_addr
1149 				 * of 'cire' belongs to the same subnet as
1150 				 * the new ire being added.
1151 				 * Flushing such ires is sometimes required to
1152 				 * avoid misrouting: say we have a machine with
1153 				 * two interfaces (I1 and I2), a default router
1154 				 * R on the I1 subnet, and a host route to an
1155 				 * off-link destination D with a gateway G on
1156 				 * the I2 subnet.
1157 				 * Under normal operation, we will have an
1158 				 * on-link cache entry for G and an off-link
1159 				 * cache entry for D with G as ire_gateway_addr,
1160 				 * traffic to D will reach its destination
1161 				 * through gateway G.
1162 				 * If the administrator does 'ifconfig I2 down',
1163 				 * the cache entries for D and G will be
1164 				 * flushed. However, G will now be resolved as
1165 				 * an off-link destination using R (the default
1166 				 * router) as gateway. Then D will also be
1167 				 * resolved as an off-link destination using G
1168 				 * as gateway - this behavior is due to
1169 				 * compatibility reasons, see comment in
1170 				 * ire_ihandle_lookup_offlink(). Traffic to D
1171 				 * will go to the router R and probably won't
1172 				 * reach the destination.
1173 				 * The administrator then does 'ifconfig I2 up'.
1174 				 * Since G is on the I2 subnet, this routine
1175 				 * will flush its cache entry. It must also
1176 				 * flush the cache entry for D, otherwise
1177 				 * traffic will stay misrouted until the IRE
1178 				 * times out.
1179 				 */
1180 				if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6,
1181 				    ire->ire_mask_v6, ire->ire_addr_v6)) {
1182 					ire_delete(cire);
1183 					continue;
1184 				}
1185 			}
1186 			IRB_REFRELE(irb);
1187 		}
1188 	} else {
1189 		/*
1190 		 * delete the cache entries based on
1191 		 * handle in the IRE as this IRE is
1192 		 * being deleted/changed.
1193 		 */
1194 		for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
1195 			irb = &ipst->ips_ip_cache_table_v6[i];
1196 			if ((cire = irb->irb_ire) == NULL)
1197 				continue;
1198 			IRB_REFHOLD(irb);
1199 			for (cire = irb->irb_ire; cire != NULL;
1200 			    cire = cire->ire_next) {
1201 				if (cire->ire_type != IRE_CACHE)
1202 					continue;
1203 				if ((cire->ire_phandle == 0 ||
1204 				    cire->ire_phandle != ire->ire_phandle) &&
1205 				    (cire->ire_ihandle == 0 ||
1206 				    cire->ire_ihandle != ire->ire_ihandle))
1207 					continue;
1208 				ire_delete(cire);
1209 			}
1210 			IRB_REFRELE(irb);
1211 		}
1212 	}
1213 }
1214 
1215 /*
1216  * Matches the arguments passed with the values in the ire.
1217  *
1218  * Note: for match types that match using "ipif" passed in, ipif
1219  * must be checked for non-NULL before calling this routine.
1220  */
1221 static boolean_t
1222 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask,
1223     const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid,
1224     uint32_t ihandle, const ts_label_t *tsl, int match_flags)
1225 {
1226 	in6_addr_t masked_addr;
1227 	in6_addr_t gw_addr_v6;
1228 	ill_t *ire_ill = NULL, *dst_ill;
1229 	ill_t *ipif_ill = NULL;
1230 	ipif_t	*src_ipif;
1231 
1232 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
1233 	ASSERT(addr != NULL);
1234 	ASSERT(mask != NULL);
1235 	ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL);
1236 	ASSERT((!(match_flags & MATCH_IRE_ILL)) ||
1237 	    (ipif != NULL && ipif->ipif_isv6));
1238 
1239 	/*
1240 	 * If MATCH_IRE_MARK_TESTHIDDEN is set, then only return the IRE if it
1241 	 * is in fact hidden, to ensure the caller gets the right one.  One
1242 	 * exception: if the caller passed MATCH_IRE_IHANDLE, then they
1243 	 * already know the identity of the given IRE_INTERFACE entry and
1244 	 * there's no point trying to hide it from them.
1245 	 */
1246 	if (ire->ire_marks & IRE_MARK_TESTHIDDEN) {
1247 		if (match_flags & MATCH_IRE_IHANDLE)
1248 			match_flags |= MATCH_IRE_MARK_TESTHIDDEN;
1249 
1250 		if (!(match_flags & MATCH_IRE_MARK_TESTHIDDEN))
1251 			return (B_FALSE);
1252 	}
1253 
1254 	if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid &&
1255 	    ire->ire_zoneid != ALL_ZONES) {
1256 		/*
1257 		 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is
1258 		 * valid and does not match that of ire_zoneid, a failure to
1259 		 * match is reported at this point. Otherwise, since some IREs
1260 		 * that are available in the global zone can be used in local
1261 		 * zones, additional checks need to be performed:
1262 		 *
1263 		 *	IRE_CACHE and IRE_LOOPBACK entries should
1264 		 *	never be matched in this situation.
1265 		 *
1266 		 *	IRE entries that have an interface associated with them
1267 		 *	should in general not match unless they are an IRE_LOCAL
1268 		 *	or in the case when MATCH_IRE_DEFAULT has been set in
1269 		 *	the caller.  In the case of the former, checking of the
1270 		 *	other fields supplied should take place.
1271 		 *
1272 		 *	In the case where MATCH_IRE_DEFAULT has been set,
1273 		 *	all of the ipif's associated with the IRE's ill are
1274 		 *	checked to see if there is a matching zoneid.  If any
1275 		 *	one ipif has a matching zoneid, this IRE is a
1276 		 *	potential candidate so checking of the other fields
1277 		 *	takes place.
1278 		 *
1279 		 *	In the case where the IRE_INTERFACE has a usable source
1280 		 *	address (indicated by ill_usesrc_ifindex) in the
1281 		 *	correct zone then it's permitted to return this IRE
1282 		 */
1283 		if (match_flags & MATCH_IRE_ZONEONLY)
1284 			return (B_FALSE);
1285 		if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK))
1286 			return (B_FALSE);
1287 		/*
1288 		 * Note, IRE_INTERFACE can have the stq as NULL. For
1289 		 * example, if the default multicast route is tied to
1290 		 * the loopback address.
1291 		 */
1292 		if ((ire->ire_type & IRE_INTERFACE) &&
1293 		    (ire->ire_stq != NULL)) {
1294 			dst_ill = (ill_t *)ire->ire_stq->q_ptr;
1295 			/*
1296 			 * If there is a usable source address in the
1297 			 * zone, then it's ok to return an
1298 			 * IRE_INTERFACE
1299 			 */
1300 			if ((dst_ill->ill_usesrc_ifindex != 0) &&
1301 			    (src_ipif = ipif_select_source_v6(dst_ill, addr,
1302 			    B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid))
1303 			    != NULL) {
1304 				ip3dbg(("ire_match_args: src_ipif %p"
1305 				    " dst_ill %p", (void *)src_ipif,
1306 				    (void *)dst_ill));
1307 				ipif_refrele(src_ipif);
1308 			} else {
1309 				ip3dbg(("ire_match_args: src_ipif NULL"
1310 				    " dst_ill %p\n", (void *)dst_ill));
1311 				return (B_FALSE);
1312 			}
1313 		}
1314 		if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL &&
1315 		    !(ire->ire_type & IRE_INTERFACE)) {
1316 			ipif_t	*tipif;
1317 
1318 			if ((match_flags & MATCH_IRE_DEFAULT) == 0)
1319 				return (B_FALSE);
1320 			mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock);
1321 			for (tipif = ire->ire_ipif->ipif_ill->ill_ipif;
1322 			    tipif != NULL; tipif = tipif->ipif_next) {
1323 				if (IPIF_CAN_LOOKUP(tipif) &&
1324 				    (tipif->ipif_flags & IPIF_UP) &&
1325 				    (tipif->ipif_zoneid == zoneid ||
1326 				    tipif->ipif_zoneid == ALL_ZONES))
1327 					break;
1328 			}
1329 			mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock);
1330 			if (tipif == NULL)
1331 				return (B_FALSE);
1332 		}
1333 	}
1334 
1335 	if (match_flags & MATCH_IRE_GW) {
1336 		mutex_enter(&ire->ire_lock);
1337 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1338 		mutex_exit(&ire->ire_lock);
1339 	}
1340 
1341 	/*
1342 	 * For IRE_CACHE entries, MATCH_IRE_ILL means that somebody wants to
1343 	 * send out ire_stq (ire_ipif for IRE_CACHE entries is just the means
1344 	 * of getting a source address -- i.e., ire_src_addr_v6 ==
1345 	 * ire->ire_ipif->ipif_v6src_addr).  ire_to_ill() handles this.
1346 	 *
1347 	 * NOTE: For IPMP, MATCH_IRE_ILL usually matches any ill in the group.
1348 	 * However, if MATCH_IRE_MARK_TESTHIDDEN is set (i.e., the IRE is for
1349 	 * IPMP test traffic), then the ill must match exactly.
1350 	 */
1351 	if (match_flags & MATCH_IRE_ILL) {
1352 		ire_ill = ire_to_ill(ire);
1353 		ipif_ill = ipif->ipif_ill;
1354 	}
1355 
1356 	/* No ire_addr_v6 bits set past the mask */
1357 	ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6,
1358 	    ire->ire_addr_v6));
1359 	V6_MASK_COPY(*addr, *mask, masked_addr);
1360 
1361 	if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) &&
1362 	    ((!(match_flags & MATCH_IRE_GW)) ||
1363 	    IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) &&
1364 	    ((!(match_flags & MATCH_IRE_TYPE)) ||
1365 	    (ire->ire_type & type)) &&
1366 	    ((!(match_flags & MATCH_IRE_SRC)) ||
1367 	    IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6,
1368 	    &ipif->ipif_v6src_addr)) &&
1369 	    ((!(match_flags & MATCH_IRE_IPIF)) ||
1370 	    (ire->ire_ipif == ipif)) &&
1371 	    ((!(match_flags & MATCH_IRE_MARK_TESTHIDDEN)) ||
1372 	    (ire->ire_marks & IRE_MARK_TESTHIDDEN)) &&
1373 	    ((!(match_flags & MATCH_IRE_ILL)) ||
1374 	    (ire_ill == ipif_ill ||
1375 	    (!(match_flags & MATCH_IRE_MARK_TESTHIDDEN) &&
1376 	    ire_ill != NULL && IS_IN_SAME_ILLGRP(ipif_ill, ire_ill)))) &&
1377 	    ((!(match_flags & MATCH_IRE_IHANDLE)) ||
1378 	    (ire->ire_ihandle == ihandle)) &&
1379 	    ((!(match_flags & MATCH_IRE_SECATTR)) ||
1380 	    (!is_system_labeled()) ||
1381 	    (tsol_ire_match_gwattr(ire, tsl) == 0))) {
1382 		/* We found the matched IRE */
1383 		return (B_TRUE);
1384 	}
1385 	return (B_FALSE);
1386 }
1387 
1388 /*
1389  * Lookup for a route in all the tables
1390  */
1391 ire_t *
1392 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1393     const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire,
1394     zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst)
1395 {
1396 	ire_t *ire = NULL;
1397 
1398 	/*
1399 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1400 	 * MATCH_IRE_ILL is set.
1401 	 */
1402 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) && (ipif == NULL))
1403 		return (NULL);
1404 
1405 	/*
1406 	 * might be asking for a cache lookup,
1407 	 * This is not best way to lookup cache,
1408 	 * user should call ire_cache_lookup directly.
1409 	 *
1410 	 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then
1411 	 * in the forwarding table, if the applicable type flags were set.
1412 	 */
1413 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) {
1414 		ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid,
1415 		    tsl, flags, ipst);
1416 		if (ire != NULL)
1417 			return (ire);
1418 	}
1419 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) {
1420 		ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif,
1421 		    pire, zoneid, 0, tsl, flags, ipst);
1422 	}
1423 	return (ire);
1424 }
1425 
1426 /*
1427  * Lookup a route in forwarding table.
1428  * specific lookup is indicated by passing the
1429  * required parameters and indicating the
1430  * match required in flag field.
1431  *
1432  * Looking for default route can be done in three ways
1433  * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field
1434  *    along with other matches.
1435  * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags
1436  *    field along with other matches.
1437  * 3) if the destination and mask are passed as zeros.
1438  *
1439  * A request to return a default route if no route
1440  * is found, can be specified by setting MATCH_IRE_DEFAULT
1441  * in flags.
1442  *
1443  * It does not support recursion more than one level. It
1444  * will do recursive lookup only when the lookup maps to
1445  * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed.
1446  *
1447  * If the routing table is setup to allow more than one level
1448  * of recursion, the cleaning up cache table will not work resulting
1449  * in invalid routing.
1450  *
1451  * Supports link-local addresses by following the ipif/ill when recursing.
1452  *
1453  * NOTE : When this function returns NULL, pire has already been released.
1454  *	  pire is valid only when this function successfully returns an
1455  *	  ire.
1456  */
1457 ire_t *
1458 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1459     const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire,
1460     zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags,
1461     ip_stack_t *ipst)
1462 {
1463 	irb_t *irb_ptr;
1464 	ire_t	*rire;
1465 	ire_t *ire = NULL;
1466 	ire_t	*saved_ire;
1467 	nce_t	*nce;
1468 	int i;
1469 	in6_addr_t gw_addr_v6;
1470 
1471 	ASSERT(addr != NULL);
1472 	ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL);
1473 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
1474 	ASSERT(ipif == NULL || ipif->ipif_isv6);
1475 
1476 	/*
1477 	 * When we return NULL from this function, we should make
1478 	 * sure that *pire is NULL so that the callers will not
1479 	 * wrongly REFRELE the pire.
1480 	 */
1481 	if (pire != NULL)
1482 		*pire = NULL;
1483 	/*
1484 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1485 	 * MATCH_IRE_ILL is set.
1486 	 */
1487 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) && (ipif == NULL))
1488 		return (NULL);
1489 
1490 	/*
1491 	 * If the mask is known, the lookup
1492 	 * is simple, if the mask is not known
1493 	 * we need to search.
1494 	 */
1495 	if (flags & MATCH_IRE_MASK) {
1496 		uint_t masklen;
1497 
1498 		masklen = ip_mask_to_plen_v6(mask);
1499 		if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL)
1500 			return (NULL);
1501 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][
1502 		    IRE_ADDR_MASK_HASH_V6(*addr, *mask,
1503 		    ipst->ips_ip6_ftable_hash_size)]);
1504 		rw_enter(&irb_ptr->irb_lock, RW_READER);
1505 		for (ire = irb_ptr->irb_ire; ire != NULL;
1506 		    ire = ire->ire_next) {
1507 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
1508 				continue;
1509 			if (ire_match_args_v6(ire, addr, mask, gateway, type,
1510 			    ipif, zoneid, ihandle, tsl, flags))
1511 				goto found_ire;
1512 		}
1513 		rw_exit(&irb_ptr->irb_lock);
1514 	} else {
1515 		/*
1516 		 * In this case we don't know the mask, we need to
1517 		 * search the table assuming different mask sizes.
1518 		 * we start with 128 bit mask, we don't allow default here.
1519 		 */
1520 		for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) {
1521 			in6_addr_t tmpmask;
1522 
1523 			if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
1524 				continue;
1525 			(void) ip_plen_to_mask_v6(i, &tmpmask);
1526 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][
1527 			    IRE_ADDR_MASK_HASH_V6(*addr, tmpmask,
1528 			    ipst->ips_ip6_ftable_hash_size)];
1529 			rw_enter(&irb_ptr->irb_lock, RW_READER);
1530 			for (ire = irb_ptr->irb_ire; ire != NULL;
1531 			    ire = ire->ire_next) {
1532 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1533 					continue;
1534 				if (ire_match_args_v6(ire, addr,
1535 				    &ire->ire_mask_v6, gateway, type, ipif,
1536 				    zoneid, ihandle, tsl, flags))
1537 					goto found_ire;
1538 			}
1539 			rw_exit(&irb_ptr->irb_lock);
1540 		}
1541 	}
1542 
1543 	/*
1544 	 * We come here if no route has yet been found.
1545 	 *
1546 	 * Handle the case where default route is
1547 	 * requested by specifying type as one of the possible
1548 	 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE).
1549 	 *
1550 	 * If MATCH_IRE_MASK is specified, then the appropriate default route
1551 	 * would have been found above if it exists so it isn't looked up here.
1552 	 * If MATCH_IRE_DEFAULT was also specified, then a default route will be
1553 	 * searched for later.
1554 	 */
1555 	if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE &&
1556 	    (type & (IRE_DEFAULT | IRE_INTERFACE))) {
1557 		if (ipst->ips_ip_forwarding_table_v6[0] != NULL) {
1558 			/* addr & mask is zero for defaults */
1559 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][
1560 			    IRE_ADDR_HASH_V6(ipv6_all_zeros,
1561 			    ipst->ips_ip6_ftable_hash_size)];
1562 			rw_enter(&irb_ptr->irb_lock, RW_READER);
1563 			for (ire = irb_ptr->irb_ire; ire != NULL;
1564 			    ire = ire->ire_next) {
1565 
1566 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1567 					continue;
1568 
1569 				if (ire_match_args_v6(ire, addr,
1570 				    &ipv6_all_zeros, gateway, type, ipif,
1571 				    zoneid, ihandle, tsl, flags))
1572 					goto found_ire;
1573 			}
1574 			rw_exit(&irb_ptr->irb_lock);
1575 		}
1576 	}
1577 	/*
1578 	 * We come here only if no route is found.
1579 	 * see if the default route can be used which is allowed
1580 	 * only if the default matching criteria is specified.
1581 	 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT
1582 	 * entries. However, the ip_forwarding_table_v6[0] also contains
1583 	 * interface routes thus the count can be zero.
1584 	 */
1585 	saved_ire = NULL;
1586 	if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) ==
1587 	    MATCH_IRE_DEFAULT) {
1588 		ire_t	*ire_origin;
1589 		uint_t	g_index;
1590 		uint_t	index;
1591 
1592 		if (ipst->ips_ip_forwarding_table_v6[0] == NULL)
1593 			return (NULL);
1594 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0];
1595 
1596 		/*
1597 		 * Keep a tab on the bucket while looking the IRE_DEFAULT
1598 		 * entries. We need to keep track of a particular IRE
1599 		 * (ire_origin) so this ensures that it will not be unlinked
1600 		 * from the hash list during the recursive lookup below.
1601 		 */
1602 		IRB_REFHOLD(irb_ptr);
1603 		ire = irb_ptr->irb_ire;
1604 		if (ire == NULL) {
1605 			IRB_REFRELE(irb_ptr);
1606 			return (NULL);
1607 		}
1608 
1609 		/*
1610 		 * Get the index first, since it can be changed by other
1611 		 * threads. Then get to the right default route skipping
1612 		 * default interface routes if any. As we hold a reference on
1613 		 * the IRE bucket, ipv6_ire_default_count can only increase so
1614 		 * we can't reach the end of the hash list unexpectedly.
1615 		 */
1616 		if (ipst->ips_ipv6_ire_default_count != 0) {
1617 			g_index = ipst->ips_ipv6_ire_default_index++;
1618 			index = g_index % ipst->ips_ipv6_ire_default_count;
1619 			while (index != 0) {
1620 				if (!(ire->ire_type & IRE_INTERFACE))
1621 					index--;
1622 				ire = ire->ire_next;
1623 			}
1624 			ASSERT(ire != NULL);
1625 		} else {
1626 			/*
1627 			 * No default route, so we only have default interface
1628 			 * routes: don't enter the first loop.
1629 			 */
1630 			ire = NULL;
1631 		}
1632 
1633 		/*
1634 		 * Round-robin the default routers list looking for a neighbor
1635 		 * that matches the passed in parameters and is reachable.  If
1636 		 * none found, just return a route from the default router list
1637 		 * if it exists. If we can't find a default route (IRE_DEFAULT),
1638 		 * look for interface default routes.
1639 		 * We start with the ire we found above and we walk the hash
1640 		 * list until we're back where we started, see
1641 		 * ire_get_next_default_ire(). It doesn't matter if default
1642 		 * routes are added or deleted by other threads - we know this
1643 		 * ire will stay in the list because we hold a reference on the
1644 		 * ire bucket.
1645 		 * NB: if we only have interface default routes, ire is NULL so
1646 		 * we don't even enter this loop (see above).
1647 		 */
1648 		ire_origin = ire;
1649 		for (; ire != NULL;
1650 		    ire = ire_get_next_default_ire(ire, ire_origin)) {
1651 
1652 			if (ire_match_args_v6(ire, addr,
1653 			    &ipv6_all_zeros, gateway, type, ipif,
1654 			    zoneid, ihandle, tsl, flags)) {
1655 				int match_flags;
1656 
1657 				/*
1658 				 * We have something to work with.
1659 				 * If we can find a resolved/reachable
1660 				 * entry, we will use this. Otherwise
1661 				 * we'll try to find an entry that has
1662 				 * a resolved cache entry. We will fallback
1663 				 * on this if we don't find anything else.
1664 				 */
1665 				if (saved_ire == NULL)
1666 					saved_ire = ire;
1667 				mutex_enter(&ire->ire_lock);
1668 				gw_addr_v6 = ire->ire_gateway_addr_v6;
1669 				mutex_exit(&ire->ire_lock);
1670 				match_flags = MATCH_IRE_ILL | MATCH_IRE_SECATTR;
1671 				rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL,
1672 				    0, ire->ire_ipif, zoneid, tsl, match_flags,
1673 				    ipst);
1674 				if (rire != NULL) {
1675 					nce = rire->ire_nce;
1676 					if (nce != NULL &&
1677 					    NCE_ISREACHABLE(nce) &&
1678 					    nce->nce_flags & NCE_F_ISROUTER) {
1679 						ire_refrele(rire);
1680 						IRE_REFHOLD(ire);
1681 						IRB_REFRELE(irb_ptr);
1682 						goto found_ire_held;
1683 					} else if (nce != NULL &&
1684 					    !(nce->nce_flags &
1685 					    NCE_F_ISROUTER)) {
1686 						/*
1687 						 * Make sure we don't use
1688 						 * this ire
1689 						 */
1690 						if (saved_ire == ire)
1691 							saved_ire = NULL;
1692 					}
1693 					ire_refrele(rire);
1694 				} else if (ipst->
1695 				    ips_ipv6_ire_default_count > 1 &&
1696 				    zoneid != GLOBAL_ZONEID) {
1697 					/*
1698 					 * When we're in a local zone, we're
1699 					 * only interested in default routers
1700 					 * that are reachable through ipifs
1701 					 * within our zone.
1702 					 * The potentially expensive call to
1703 					 * ire_route_lookup_v6() is avoided when
1704 					 * we have only one default route.
1705 					 */
1706 					int ire_match_flags = MATCH_IRE_TYPE |
1707 					    MATCH_IRE_SECATTR;
1708 
1709 					if (ire->ire_ipif != NULL) {
1710 						ire_match_flags |=
1711 						    MATCH_IRE_ILL;
1712 					}
1713 					rire = ire_route_lookup_v6(&gw_addr_v6,
1714 					    NULL, NULL, IRE_INTERFACE,
1715 					    ire->ire_ipif, NULL,
1716 					    zoneid, tsl, ire_match_flags, ipst);
1717 					if (rire != NULL) {
1718 						ire_refrele(rire);
1719 						saved_ire = ire;
1720 					} else if (saved_ire == ire) {
1721 						/*
1722 						 * Make sure we don't use
1723 						 * this ire
1724 						 */
1725 						saved_ire = NULL;
1726 					}
1727 				}
1728 			}
1729 		}
1730 		if (saved_ire != NULL) {
1731 			ire = saved_ire;
1732 			IRE_REFHOLD(ire);
1733 			IRB_REFRELE(irb_ptr);
1734 			goto found_ire_held;
1735 		} else {
1736 			/*
1737 			 * Look for a interface default route matching the
1738 			 * args passed in. No round robin here. Just pick
1739 			 * the right one.
1740 			 */
1741 			for (ire = irb_ptr->irb_ire; ire != NULL;
1742 			    ire = ire->ire_next) {
1743 
1744 				if (!(ire->ire_type & IRE_INTERFACE))
1745 					continue;
1746 
1747 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1748 					continue;
1749 
1750 				if (ire_match_args_v6(ire, addr,
1751 				    &ipv6_all_zeros, gateway, type, ipif,
1752 				    zoneid, ihandle, tsl, flags)) {
1753 					IRE_REFHOLD(ire);
1754 					IRB_REFRELE(irb_ptr);
1755 					goto found_ire_held;
1756 				}
1757 			}
1758 			IRB_REFRELE(irb_ptr);
1759 		}
1760 	}
1761 	ASSERT(ire == NULL);
1762 	ip1dbg(("ire_ftable_lookup_v6: returning NULL ire"));
1763 	return (NULL);
1764 found_ire:
1765 	ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0);
1766 	IRE_REFHOLD(ire);
1767 	rw_exit(&irb_ptr->irb_lock);
1768 
1769 found_ire_held:
1770 	if ((flags & MATCH_IRE_RJ_BHOLE) &&
1771 	    (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) {
1772 		return (ire);
1773 	}
1774 	/*
1775 	 * At this point, IRE that was found must be an IRE_FORWARDTABLE
1776 	 * or IRE_CACHETABLE type.  If this is a recursive lookup and an
1777 	 * IRE_INTERFACE type was found, return that.  If it was some other
1778 	 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it
1779 	 * is necessary to fill in the  parent IRE pointed to by pire, and
1780 	 * then lookup the gateway address of  the parent.  For backwards
1781 	 * compatiblity, if this lookup returns an
1782 	 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level
1783 	 * of lookup is done.
1784 	 */
1785 	if (flags & MATCH_IRE_RECURSIVE) {
1786 		const ipif_t *gw_ipif;
1787 		int match_flags = MATCH_IRE_DSTONLY;
1788 
1789 		if (ire->ire_type & IRE_INTERFACE)
1790 			return (ire);
1791 		if (pire != NULL)
1792 			*pire = ire;
1793 		/*
1794 		 * If we can't find an IRE_INTERFACE or the caller has not
1795 		 * asked for pire, we need to REFRELE the saved_ire.
1796 		 */
1797 		saved_ire = ire;
1798 
1799 		if (ire->ire_ipif != NULL)
1800 			match_flags |= MATCH_IRE_ILL;
1801 
1802 		mutex_enter(&ire->ire_lock);
1803 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1804 		mutex_exit(&ire->ire_lock);
1805 
1806 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0,
1807 		    ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst);
1808 		if (ire == NULL) {
1809 			/*
1810 			 * In this case we have to deal with the
1811 			 * MATCH_IRE_PARENT flag, which means the
1812 			 * parent has to be returned if ire is NULL.
1813 			 * The aim of this is to have (at least) a starting
1814 			 * ire when we want to look at all of the ires in a
1815 			 * bucket aimed at a single destination (as is the
1816 			 * case in ip_newroute_v6 for the RTF_MULTIRT
1817 			 * flagged routes).
1818 			 */
1819 			if (flags & MATCH_IRE_PARENT) {
1820 				if (pire != NULL) {
1821 					/*
1822 					 * Need an extra REFHOLD, if the
1823 					 * parent ire is returned via both
1824 					 * ire and pire.
1825 					 */
1826 					IRE_REFHOLD(saved_ire);
1827 				}
1828 				ire = saved_ire;
1829 			} else {
1830 				ire_refrele(saved_ire);
1831 				if (pire != NULL)
1832 					*pire = NULL;
1833 			}
1834 			return (ire);
1835 		}
1836 		if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) {
1837 			/*
1838 			 * If the caller did not ask for pire, release
1839 			 * it now.
1840 			 */
1841 			if (pire == NULL) {
1842 				ire_refrele(saved_ire);
1843 			}
1844 			return (ire);
1845 		}
1846 		match_flags |= MATCH_IRE_TYPE;
1847 		mutex_enter(&ire->ire_lock);
1848 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1849 		mutex_exit(&ire->ire_lock);
1850 		gw_ipif = ire->ire_ipif;
1851 		ire_refrele(ire);
1852 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL,
1853 		    (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid,
1854 		    NULL, match_flags, ipst);
1855 		if (ire == NULL) {
1856 			/*
1857 			 * In this case we have to deal with the
1858 			 * MATCH_IRE_PARENT flag, which means the
1859 			 * parent has to be returned if ire is NULL.
1860 			 * The aim of this is to have (at least) a starting
1861 			 * ire when we want to look at all of the ires in a
1862 			 * bucket aimed at a single destination (as is the
1863 			 * case in ip_newroute_v6 for the RTF_MULTIRT
1864 			 * flagged routes).
1865 			 */
1866 			if (flags & MATCH_IRE_PARENT) {
1867 				if (pire != NULL) {
1868 					/*
1869 					 * Need an extra REFHOLD, if the
1870 					 * parent ire is returned via both
1871 					 * ire and pire.
1872 					 */
1873 					IRE_REFHOLD(saved_ire);
1874 				}
1875 				ire = saved_ire;
1876 			} else {
1877 				ire_refrele(saved_ire);
1878 				if (pire != NULL)
1879 					*pire = NULL;
1880 			}
1881 			return (ire);
1882 		} else if (pire == NULL) {
1883 			/*
1884 			 * If the caller did not ask for pire, release
1885 			 * it now.
1886 			 */
1887 			ire_refrele(saved_ire);
1888 		}
1889 		return (ire);
1890 	}
1891 
1892 	ASSERT(pire == NULL || *pire == NULL);
1893 	return (ire);
1894 }
1895 
1896 /*
1897  * Delete the IRE cache for the gateway and all IRE caches whose
1898  * ire_gateway_addr_v6 points to this gateway, and allow them to
1899  * be created on demand by ip_newroute_v6.
1900  */
1901 void
1902 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid,
1903 	ip_stack_t *ipst)
1904 {
1905 	irb_t *irb;
1906 	ire_t *ire;
1907 
1908 	irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
1909 	    ipst->ips_ip6_cache_table_size)];
1910 	IRB_REFHOLD(irb);
1911 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
1912 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
1913 			continue;
1914 
1915 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
1916 		if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0,
1917 		    IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) {
1918 			ire_delete(ire);
1919 		}
1920 	}
1921 	IRB_REFRELE(irb);
1922 
1923 	ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst);
1924 }
1925 
1926 /*
1927  * Looks up cache table for a route.
1928  * specific lookup can be indicated by
1929  * passing the MATCH_* flags and the
1930  * necessary parameters.
1931  */
1932 ire_t *
1933 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway,
1934     int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl,
1935     int flags, ip_stack_t *ipst)
1936 {
1937 	ire_ctable_args_t	margs;
1938 
1939 	margs.ict_addr = (void *)addr;
1940 	margs.ict_gateway = (void *)gateway;
1941 	margs.ict_type = type;
1942 	margs.ict_ipif = ipif;
1943 	margs.ict_zoneid = zoneid;
1944 	margs.ict_tsl = tsl;
1945 	margs.ict_flags = flags;
1946 	margs.ict_ipst = ipst;
1947 	margs.ict_wq = NULL;
1948 
1949 	return (ip6_ctable_lookup_impl(&margs));
1950 }
1951 
1952 /*
1953  * Lookup cache.
1954  *
1955  * In general the zoneid has to match (where ALL_ZONES match all of them).
1956  * But for IRE_LOCAL we also need to handle the case where L2 should
1957  * conceptually loop back the packet. This is necessary since neither
1958  * Ethernet drivers nor Ethernet hardware loops back packets sent to their
1959  * own MAC address. This loopback is needed when the normal
1960  * routes (ignoring IREs with different zoneids) would send out the packet on
1961  * the same ill as the ill with which this IRE_LOCAL is associated.
1962  *
1963  * Earlier versions of this code always matched an IRE_LOCAL independently of
1964  * the zoneid. We preserve that earlier behavior when
1965  * ip_restrict_interzone_loopback is turned off.
1966  */
1967 ire_t *
1968 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid,
1969     const ts_label_t *tsl, ip_stack_t *ipst)
1970 {
1971 	irb_t *irb_ptr;
1972 	ire_t *ire;
1973 
1974 	irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
1975 	    ipst->ips_ip6_cache_table_size)];
1976 	rw_enter(&irb_ptr->irb_lock, RW_READER);
1977 	for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) {
1978 		if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_TESTHIDDEN))
1979 			continue;
1980 		if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) {
1981 			/*
1982 			 * Finally, check if the security policy has any
1983 			 * restriction on using this route for the specified
1984 			 * message.
1985 			 */
1986 			if (tsl != NULL &&
1987 			    ire->ire_gw_secattr != NULL &&
1988 			    tsol_ire_match_gwattr(ire, tsl) != 0) {
1989 				continue;
1990 			}
1991 
1992 			if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid ||
1993 			    ire->ire_zoneid == ALL_ZONES) {
1994 				IRE_REFHOLD(ire);
1995 				rw_exit(&irb_ptr->irb_lock);
1996 				return (ire);
1997 			}
1998 
1999 			if (ire->ire_type == IRE_LOCAL) {
2000 				if (ipst->ips_ip_restrict_interzone_loopback &&
2001 				    !ire_local_ok_across_zones(ire, zoneid,
2002 				    (void *)addr, tsl, ipst))
2003 					continue;
2004 
2005 				IRE_REFHOLD(ire);
2006 				rw_exit(&irb_ptr->irb_lock);
2007 				return (ire);
2008 			}
2009 		}
2010 	}
2011 	rw_exit(&irb_ptr->irb_lock);
2012 	return (NULL);
2013 }
2014 
2015 /*
2016  * Locate the interface ire that is tied to the cache ire 'cire' via
2017  * cire->ire_ihandle.
2018  *
2019  * We are trying to create the cache ire for an onlink destn. or
2020  * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER
2021  * case for xresolv interfaces, after the ire has come back from
2022  * an external resolver.
2023  */
2024 static ire_t *
2025 ire_ihandle_lookup_onlink_v6(ire_t *cire)
2026 {
2027 	ire_t	*ire;
2028 	int	match_flags;
2029 	int	i;
2030 	int	j;
2031 	irb_t	*irb_ptr;
2032 	ip_stack_t	*ipst = cire->ire_ipst;
2033 
2034 	ASSERT(cire != NULL);
2035 
2036 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
2037 	/*
2038 	 * We know that the mask of the interface ire equals cire->ire_cmask.
2039 	 * (When ip_newroute_v6() created 'cire' for an on-link destn.
2040 	 * it set its cmask from the interface ire's mask)
2041 	 */
2042 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6,
2043 	    NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle,
2044 	    NULL, match_flags, ipst);
2045 	if (ire != NULL)
2046 		return (ire);
2047 	/*
2048 	 * If we didn't find an interface ire above, we can't declare failure.
2049 	 * For backwards compatibility, we need to support prefix routes
2050 	 * pointing to next hop gateways that are not on-link.
2051 	 *
2052 	 * In the resolver/noresolver case, ip_newroute_v6() thinks
2053 	 * it is creating the cache ire for an onlink destination in 'cire'.
2054 	 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6()
2055 	 * cheated it, by doing ire_route_lookup_v6() twice and returning an
2056 	 * interface ire.
2057 	 *
2058 	 * Eg. default	-	gw1			(line 1)
2059 	 *	gw1	-	gw2			(line 2)
2060 	 *	gw2	-	hme0			(line 3)
2061 	 *
2062 	 * In the above example, ip_newroute_v6() tried to create the cache ire
2063 	 * 'cire' for gw1, based on the interface route in line 3. The
2064 	 * ire_ftable_lookup_v6() above fails, because there is
2065 	 * no interface route to reach gw1. (it is gw2). We fall thru below.
2066 	 *
2067 	 * Do a brute force search based on the ihandle in a subset of the
2068 	 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise
2069 	 * things become very complex, since we don't have 'pire' in this
2070 	 * case. (Also note that this method is not possible in the offlink
2071 	 * case because we don't know the mask)
2072 	 */
2073 	i = ip_mask_to_plen_v6(&cire->ire_cmask_v6);
2074 	if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
2075 		return (NULL);
2076 	for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) {
2077 		irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j];
2078 		rw_enter(&irb_ptr->irb_lock, RW_READER);
2079 		for (ire = irb_ptr->irb_ire; ire != NULL;
2080 		    ire = ire->ire_next) {
2081 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
2082 				continue;
2083 			if ((ire->ire_type & IRE_INTERFACE) &&
2084 			    (ire->ire_ihandle == cire->ire_ihandle)) {
2085 				IRE_REFHOLD(ire);
2086 				rw_exit(&irb_ptr->irb_lock);
2087 				return (ire);
2088 			}
2089 		}
2090 		rw_exit(&irb_ptr->irb_lock);
2091 	}
2092 	return (NULL);
2093 }
2094 
2095 
2096 /*
2097  * Locate the interface ire that is tied to the cache ire 'cire' via
2098  * cire->ire_ihandle.
2099  *
2100  * We are trying to create the cache ire for an offlink destn based
2101  * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire
2102  * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in
2103  * the IRE_CACHE case.
2104  */
2105 ire_t *
2106 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire)
2107 {
2108 	ire_t	*ire;
2109 	int	match_flags;
2110 	in6_addr_t	gw_addr;
2111 	ipif_t		*gw_ipif;
2112 	ip_stack_t	*ipst = cire->ire_ipst;
2113 
2114 	ASSERT(cire != NULL && pire != NULL);
2115 
2116 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
2117 	if (pire->ire_ipif != NULL)
2118 		match_flags |= MATCH_IRE_ILL;
2119 	/*
2120 	 * We know that the mask of the interface ire equals cire->ire_cmask.
2121 	 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set
2122 	 * its cmask from the interface ire's mask)
2123 	 */
2124 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0,
2125 	    IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
2126 	    NULL, match_flags, ipst);
2127 	if (ire != NULL)
2128 		return (ire);
2129 	/*
2130 	 * If we didn't find an interface ire above, we can't declare failure.
2131 	 * For backwards compatibility, we need to support prefix routes
2132 	 * pointing to next hop gateways that are not on-link.
2133 	 *
2134 	 * Assume we are trying to ping some offlink destn, and we have the
2135 	 * routing table below.
2136 	 *
2137 	 * Eg.	default	- gw1		<--- pire	(line 1)
2138 	 *	gw1	- gw2				(line 2)
2139 	 *	gw2	- hme0				(line 3)
2140 	 *
2141 	 * If we already have a cache ire for gw1 in 'cire', the
2142 	 * ire_ftable_lookup_v6 above would have failed, since there is no
2143 	 * interface ire to reach gw1. We will fallthru below.
2144 	 *
2145 	 * Here we duplicate the steps that ire_ftable_lookup_v6() did in
2146 	 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case.
2147 	 * The differences are the following
2148 	 * i.   We want the interface ire only, so we call
2149 	 *	ire_ftable_lookup_v6() instead of ire_route_lookup_v6()
2150 	 * ii.  We look for only prefix routes in the 1st call below.
2151 	 * ii.  We want to match on the ihandle in the 2nd call below.
2152 	 */
2153 	match_flags =  MATCH_IRE_TYPE;
2154 	if (pire->ire_ipif != NULL)
2155 		match_flags |= MATCH_IRE_ILL;
2156 
2157 	mutex_enter(&pire->ire_lock);
2158 	gw_addr = pire->ire_gateway_addr_v6;
2159 	mutex_exit(&pire->ire_lock);
2160 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET,
2161 	    pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst);
2162 	if (ire == NULL)
2163 		return (NULL);
2164 	/*
2165 	 * At this point 'ire' corresponds to the entry shown in line 2.
2166 	 * gw_addr is 'gw2' in the example above.
2167 	 */
2168 	mutex_enter(&ire->ire_lock);
2169 	gw_addr = ire->ire_gateway_addr_v6;
2170 	mutex_exit(&ire->ire_lock);
2171 	gw_ipif = ire->ire_ipif;
2172 	ire_refrele(ire);
2173 
2174 	match_flags |= MATCH_IRE_IHANDLE;
2175 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE,
2176 	    gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
2177 	    NULL, match_flags, ipst);
2178 	return (ire);
2179 }
2180 
2181 /*
2182  * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER
2183  * ire associated with the specified ipif.
2184  *
2185  * This might occasionally be called when IPIF_UP is not set since
2186  * the IPV6_MULTICAST_IF as well as creating interface routes
2187  * allows specifying a down ipif (ipif_lookup* match ipifs that are down).
2188  *
2189  * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on
2190  * the ipif this routine might return NULL.
2191  * (Sometimes called as writer though not required by this function.)
2192  */
2193 ire_t *
2194 ipif_to_ire_v6(const ipif_t *ipif)
2195 {
2196 	ire_t	*ire;
2197 	ip_stack_t *ipst = ipif->ipif_ill->ill_ipst;
2198 	uint_t	match_flags = MATCH_IRE_TYPE | MATCH_IRE_IPIF;
2199 
2200 	/*
2201 	 * IRE_INTERFACE entries for ills under IPMP are IRE_MARK_TESTHIDDEN
2202 	 * so that they aren't accidentally returned.  However, if the
2203 	 * caller's ipif is on an ill under IPMP, there's no need to hide 'em.
2204 	 */
2205 	if (IS_UNDER_IPMP(ipif->ipif_ill))
2206 		match_flags |= MATCH_IRE_MARK_TESTHIDDEN;
2207 
2208 	ASSERT(ipif->ipif_isv6);
2209 	if (ipif->ipif_ire_type == IRE_LOOPBACK) {
2210 		ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL,
2211 		    IRE_LOOPBACK, ipif, ALL_ZONES, NULL, match_flags, ipst);
2212 	} else if (ipif->ipif_flags & IPIF_POINTOPOINT) {
2213 		/* In this case we need to lookup destination address. */
2214 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr,
2215 		    &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES,
2216 		    0, NULL, (match_flags | MATCH_IRE_MASK), ipst);
2217 	} else {
2218 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet,
2219 		    &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL,
2220 		    ALL_ZONES, 0, NULL, (match_flags | MATCH_IRE_MASK), ipst);
2221 	}
2222 	return (ire);
2223 }
2224 
2225 /*
2226  * Return B_TRUE if a multirt route is resolvable
2227  * (or if no route is resolved yet), B_FALSE otherwise.
2228  * This only works in the global zone.
2229  */
2230 boolean_t
2231 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl,
2232     ip_stack_t *ipst)
2233 {
2234 	ire_t	*first_fire;
2235 	ire_t	*first_cire;
2236 	ire_t	*fire;
2237 	ire_t	*cire;
2238 	irb_t	*firb;
2239 	irb_t	*cirb;
2240 	int	unres_cnt = 0;
2241 	boolean_t resolvable = B_FALSE;
2242 
2243 	/* Retrieve the first IRE_HOST that matches the destination */
2244 	first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST,
2245 	    NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE |
2246 	    MATCH_IRE_SECATTR, ipst);
2247 
2248 	/* No route at all */
2249 	if (first_fire == NULL) {
2250 		return (B_TRUE);
2251 	}
2252 
2253 	firb = first_fire->ire_bucket;
2254 	ASSERT(firb);
2255 
2256 	/* Retrieve the first IRE_CACHE ire for that destination. */
2257 	first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst);
2258 
2259 	/* No resolved route. */
2260 	if (first_cire == NULL) {
2261 		ire_refrele(first_fire);
2262 		return (B_TRUE);
2263 	}
2264 
2265 	/* At least one route is resolved. */
2266 
2267 	cirb = first_cire->ire_bucket;
2268 	ASSERT(cirb);
2269 
2270 	/* Count the number of routes to that dest that are declared. */
2271 	IRB_REFHOLD(firb);
2272 	for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
2273 		if (!(fire->ire_flags & RTF_MULTIRT))
2274 			continue;
2275 		if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp))
2276 			continue;
2277 		unres_cnt++;
2278 	}
2279 	IRB_REFRELE(firb);
2280 
2281 
2282 	/* Then subtract the number of routes to that dst that are resolved */
2283 	IRB_REFHOLD(cirb);
2284 	for (cire = first_cire; cire != NULL; cire = cire->ire_next) {
2285 		if (!(cire->ire_flags & RTF_MULTIRT))
2286 			continue;
2287 		if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp))
2288 			continue;
2289 		if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_TESTHIDDEN))
2290 			continue;
2291 		unres_cnt--;
2292 	}
2293 	IRB_REFRELE(cirb);
2294 
2295 	/* At least one route is unresolved; search for a resolvable route. */
2296 	if (unres_cnt > 0)
2297 		resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire,
2298 		    MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst);
2299 
2300 	if (first_fire)
2301 		ire_refrele(first_fire);
2302 
2303 	if (first_cire)
2304 		ire_refrele(first_cire);
2305 
2306 	return (resolvable);
2307 }
2308 
2309 
2310 /*
2311  * Return B_TRUE and update *ire_arg and *fire_arg
2312  * if at least one resolvable route is found.
2313  * Return B_FALSE otherwise (all routes are resolved or
2314  * the remaining unresolved routes are all unresolvable).
2315  * This only works in the global zone.
2316  */
2317 boolean_t
2318 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags,
2319     const ts_label_t *tsl, ip_stack_t *ipst)
2320 {
2321 	clock_t	delta;
2322 	ire_t	*best_fire = NULL;
2323 	ire_t	*best_cire = NULL;
2324 	ire_t	*first_fire;
2325 	ire_t	*first_cire;
2326 	ire_t	*fire;
2327 	ire_t	*cire;
2328 	irb_t	*firb = NULL;
2329 	irb_t	*cirb = NULL;
2330 	ire_t	*gw_ire;
2331 	boolean_t	already_resolved;
2332 	boolean_t	res;
2333 	in6_addr_t	v6dst;
2334 	in6_addr_t	v6gw;
2335 
2336 	ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, "
2337 	    "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags));
2338 
2339 	ASSERT(ire_arg);
2340 	ASSERT(fire_arg);
2341 
2342 	/* Not an IRE_HOST ire; give up. */
2343 	if ((*fire_arg == NULL) ||
2344 	    ((*fire_arg)->ire_type != IRE_HOST)) {
2345 		return (B_FALSE);
2346 	}
2347 
2348 	/* This is the first IRE_HOST ire for that destination. */
2349 	first_fire = *fire_arg;
2350 	firb = first_fire->ire_bucket;
2351 	ASSERT(firb);
2352 
2353 	mutex_enter(&first_fire->ire_lock);
2354 	v6dst = first_fire->ire_addr_v6;
2355 	mutex_exit(&first_fire->ire_lock);
2356 
2357 	ip2dbg(("ire_multirt_lookup_v6: dst %08x\n",
2358 	    ntohl(V4_PART_OF_V6(v6dst))));
2359 
2360 	/*
2361 	 * Retrieve the first IRE_CACHE ire for that destination;
2362 	 * if we don't find one, no route for that dest is
2363 	 * resolved yet.
2364 	 */
2365 	first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst);
2366 	if (first_cire) {
2367 		cirb = first_cire->ire_bucket;
2368 	}
2369 
2370 	ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire));
2371 
2372 	/*
2373 	 * Search for a resolvable route, giving the top priority
2374 	 * to routes that can be resolved without any call to the resolver.
2375 	 */
2376 	IRB_REFHOLD(firb);
2377 
2378 	if (!IN6_IS_ADDR_MULTICAST(&v6dst)) {
2379 		/*
2380 		 * For all multiroute IRE_HOST ires for that destination,
2381 		 * check if the route via the IRE_HOST's gateway is
2382 		 * resolved yet.
2383 		 */
2384 		for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
2385 
2386 			if (!(fire->ire_flags & RTF_MULTIRT))
2387 				continue;
2388 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
2389 				continue;
2390 
2391 			if (fire->ire_gw_secattr != NULL &&
2392 			    tsol_ire_match_gwattr(fire, tsl) != 0) {
2393 				continue;
2394 			}
2395 
2396 			mutex_enter(&fire->ire_lock);
2397 			v6gw = fire->ire_gateway_addr_v6;
2398 			mutex_exit(&fire->ire_lock);
2399 
2400 			ip2dbg(("ire_multirt_lookup_v6: fire %p, "
2401 			    "ire_addr %08x, ire_gateway_addr %08x\n",
2402 			    (void *)fire,
2403 			    ntohl(V4_PART_OF_V6(fire->ire_addr_v6)),
2404 			    ntohl(V4_PART_OF_V6(v6gw))));
2405 
2406 			already_resolved = B_FALSE;
2407 
2408 			if (first_cire) {
2409 				ASSERT(cirb);
2410 
2411 				IRB_REFHOLD(cirb);
2412 				/*
2413 				 * For all IRE_CACHE ires for that
2414 				 * destination.
2415 				 */
2416 				for (cire = first_cire;
2417 				    cire != NULL;
2418 				    cire = cire->ire_next) {
2419 
2420 					if (!(cire->ire_flags & RTF_MULTIRT))
2421 						continue;
2422 					if (!IN6_ARE_ADDR_EQUAL(
2423 					    &cire->ire_addr_v6, &v6dst))
2424 						continue;
2425 					if (cire->ire_marks &
2426 					    (IRE_MARK_CONDEMNED|
2427 					    IRE_MARK_TESTHIDDEN))
2428 						continue;
2429 
2430 					if (cire->ire_gw_secattr != NULL &&
2431 					    tsol_ire_match_gwattr(cire,
2432 					    tsl) != 0) {
2433 						continue;
2434 					}
2435 
2436 					/*
2437 					 * Check if the IRE_CACHE's gateway
2438 					 * matches the IRE_HOST's gateway.
2439 					 */
2440 					if (IN6_ARE_ADDR_EQUAL(
2441 					    &cire->ire_gateway_addr_v6,
2442 					    &v6gw)) {
2443 						already_resolved = B_TRUE;
2444 						break;
2445 					}
2446 				}
2447 				IRB_REFRELE(cirb);
2448 			}
2449 
2450 			/*
2451 			 * This route is already resolved;
2452 			 * proceed with next one.
2453 			 */
2454 			if (already_resolved) {
2455 				ip2dbg(("ire_multirt_lookup_v6: found cire %p, "
2456 				    "already resolved\n", (void *)cire));
2457 				continue;
2458 			}
2459 
2460 			/*
2461 			 * The route is unresolved; is it actually
2462 			 * resolvable, i.e. is there a cache or a resolver
2463 			 * for the gateway?
2464 			 */
2465 			gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL,
2466 			    ALL_ZONES, tsl, MATCH_IRE_RECURSIVE |
2467 			    MATCH_IRE_SECATTR, ipst);
2468 
2469 			ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n",
2470 			    (void *)gw_ire));
2471 
2472 			/*
2473 			 * This route can be resolved without any call to the
2474 			 * resolver; if the MULTIRT_CACHEGW flag is set,
2475 			 * give the top priority to this ire and exit the
2476 			 * loop.
2477 			 * This occurs when an resolver reply is processed
2478 			 * through ip_wput_nondata()
2479 			 */
2480 			if ((flags & MULTIRT_CACHEGW) &&
2481 			    (gw_ire != NULL) &&
2482 			    (gw_ire->ire_type & IRE_CACHETABLE)) {
2483 				/*
2484 				 * Release the resolver associated to the
2485 				 * previous candidate best ire, if any.
2486 				 */
2487 				if (best_cire) {
2488 					ire_refrele(best_cire);
2489 					ASSERT(best_fire);
2490 				}
2491 
2492 				best_fire = fire;
2493 				best_cire = gw_ire;
2494 
2495 				ip2dbg(("ire_multirt_lookup_v6: found top prio "
2496 				    "best_fire %p, best_cire %p\n",
2497 				    (void *)best_fire, (void *)best_cire));
2498 				break;
2499 			}
2500 
2501 			/*
2502 			 * Compute the time elapsed since our preceding
2503 			 * attempt to  resolve that route.
2504 			 * If the MULTIRT_USESTAMP flag is set, we take that
2505 			 * route into account only if this time interval
2506 			 * exceeds ip_multirt_resolution_interval;
2507 			 * this prevents us from attempting to resolve a
2508 			 * broken route upon each sending of a packet.
2509 			 */
2510 			delta = lbolt - fire->ire_last_used_time;
2511 			delta = TICK_TO_MSEC(delta);
2512 
2513 			res = (boolean_t)
2514 			    ((delta > ipst->
2515 			    ips_ip_multirt_resolution_interval) ||
2516 			    (!(flags & MULTIRT_USESTAMP)));
2517 
2518 			ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, "
2519 			    "res %d\n",
2520 			    (void *)fire, delta, res));
2521 
2522 			if (res) {
2523 				/*
2524 				 * A resolver exists for the gateway: save
2525 				 * the current IRE_HOST ire as a candidate
2526 				 * best ire. If we later discover that a
2527 				 * top priority ire exists (i.e. no need to
2528 				 * call the resolver), then this new ire
2529 				 * will be preferred to the current one.
2530 				 */
2531 				if (gw_ire != NULL) {
2532 					if (best_fire == NULL) {
2533 						ASSERT(best_cire == NULL);
2534 
2535 						best_fire = fire;
2536 						best_cire = gw_ire;
2537 
2538 						ip2dbg(("ire_multirt_lookup_v6:"
2539 						    "found candidate "
2540 						    "best_fire %p, "
2541 						    "best_cire %p\n",
2542 						    (void *)best_fire,
2543 						    (void *)best_cire));
2544 
2545 						/*
2546 						 * If MULTIRT_CACHEGW is not
2547 						 * set, we ignore the top
2548 						 * priority ires that can
2549 						 * be resolved without any
2550 						 * call to the resolver;
2551 						 * In that case, there is
2552 						 * actually no need
2553 						 * to continue the loop.
2554 						 */
2555 						if (!(flags &
2556 						    MULTIRT_CACHEGW)) {
2557 							break;
2558 						}
2559 						continue;
2560 					}
2561 				} else {
2562 					/*
2563 					 * No resolver for the gateway: the
2564 					 * route is not resolvable.
2565 					 * If the MULTIRT_SETSTAMP flag is
2566 					 * set, we stamp the IRE_HOST ire,
2567 					 * so we will not select it again
2568 					 * during this resolution interval.
2569 					 */
2570 					if (flags & MULTIRT_SETSTAMP)
2571 						fire->ire_last_used_time =
2572 						    lbolt;
2573 				}
2574 			}
2575 
2576 			if (gw_ire != NULL)
2577 				ire_refrele(gw_ire);
2578 		}
2579 	} else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */
2580 
2581 		for (fire = first_fire;
2582 		    fire != NULL;
2583 		    fire = fire->ire_next) {
2584 
2585 			if (!(fire->ire_flags & RTF_MULTIRT))
2586 				continue;
2587 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
2588 				continue;
2589 
2590 			if (fire->ire_gw_secattr != NULL &&
2591 			    tsol_ire_match_gwattr(fire, tsl) != 0) {
2592 				continue;
2593 			}
2594 
2595 			already_resolved = B_FALSE;
2596 
2597 			mutex_enter(&fire->ire_lock);
2598 			v6gw = fire->ire_gateway_addr_v6;
2599 			mutex_exit(&fire->ire_lock);
2600 
2601 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
2602 			    IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl,
2603 			    MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE |
2604 			    MATCH_IRE_SECATTR, ipst);
2605 
2606 			/* No resolver for the gateway; we skip this ire. */
2607 			if (gw_ire == NULL) {
2608 				continue;
2609 			}
2610 
2611 			if (first_cire) {
2612 
2613 				IRB_REFHOLD(cirb);
2614 				/*
2615 				 * For all IRE_CACHE ires for that
2616 				 * destination.
2617 				 */
2618 				for (cire = first_cire;
2619 				    cire != NULL;
2620 				    cire = cire->ire_next) {
2621 
2622 					if (!(cire->ire_flags & RTF_MULTIRT))
2623 						continue;
2624 					if (!IN6_ARE_ADDR_EQUAL(
2625 					    &cire->ire_addr_v6, &v6dst))
2626 						continue;
2627 					if (cire->ire_marks &
2628 					    IRE_MARK_CONDEMNED)
2629 						continue;
2630 
2631 					if (cire->ire_gw_secattr != NULL &&
2632 					    tsol_ire_match_gwattr(cire,
2633 					    tsl) != 0) {
2634 						continue;
2635 					}
2636 
2637 					/*
2638 					 * Cache entries are linked to the
2639 					 * parent routes using the parent handle
2640 					 * (ire_phandle). If no cache entry has
2641 					 * the same handle as fire, fire is
2642 					 * still unresolved.
2643 					 */
2644 					ASSERT(cire->ire_phandle != 0);
2645 					if (cire->ire_phandle ==
2646 					    fire->ire_phandle) {
2647 						already_resolved = B_TRUE;
2648 						break;
2649 					}
2650 				}
2651 				IRB_REFRELE(cirb);
2652 			}
2653 
2654 			/*
2655 			 * This route is already resolved; proceed with
2656 			 * next one.
2657 			 */
2658 			if (already_resolved) {
2659 				ire_refrele(gw_ire);
2660 				continue;
2661 			}
2662 
2663 			/*
2664 			 * Compute the time elapsed since our preceding
2665 			 * attempt to resolve that route.
2666 			 * If the MULTIRT_USESTAMP flag is set, we take
2667 			 * that route into account only if this time
2668 			 * interval exceeds ip_multirt_resolution_interval;
2669 			 * this prevents us from attempting to resolve a
2670 			 * broken route upon each sending of a packet.
2671 			 */
2672 			delta = lbolt - fire->ire_last_used_time;
2673 			delta = TICK_TO_MSEC(delta);
2674 
2675 			res = (boolean_t)
2676 			    ((delta > ipst->
2677 			    ips_ip_multirt_resolution_interval) ||
2678 			    (!(flags & MULTIRT_USESTAMP)));
2679 
2680 			ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, "
2681 			    "flags %04x, res %d\n",
2682 			    (void *)fire, delta, flags, res));
2683 
2684 			if (res) {
2685 				if (best_cire) {
2686 					/*
2687 					 * Release the resolver associated
2688 					 * to the preceding candidate best
2689 					 * ire, if any.
2690 					 */
2691 					ire_refrele(best_cire);
2692 					ASSERT(best_fire);
2693 				}
2694 				best_fire = fire;
2695 				best_cire = gw_ire;
2696 				continue;
2697 			}
2698 
2699 			ire_refrele(gw_ire);
2700 		}
2701 	}
2702 
2703 	if (best_fire) {
2704 		IRE_REFHOLD(best_fire);
2705 	}
2706 	IRB_REFRELE(firb);
2707 
2708 	/* Release the first IRE_CACHE we initially looked up, if any. */
2709 	if (first_cire)
2710 		ire_refrele(first_cire);
2711 
2712 	/* Found a resolvable route. */
2713 	if (best_fire) {
2714 		ASSERT(best_cire);
2715 
2716 		if (*fire_arg)
2717 			ire_refrele(*fire_arg);
2718 		if (*ire_arg)
2719 			ire_refrele(*ire_arg);
2720 
2721 		/*
2722 		 * Update the passed arguments with the
2723 		 * resolvable multirt route we found
2724 		 */
2725 		*fire_arg = best_fire;
2726 		*ire_arg = best_cire;
2727 
2728 		ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, "
2729 		    "*fire_arg %p, *ire_arg %p\n",
2730 		    (void *)best_fire, (void *)best_cire));
2731 
2732 		return (B_TRUE);
2733 	}
2734 
2735 	ASSERT(best_cire == NULL);
2736 
2737 	ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, "
2738 	    "*ire_arg %p\n",
2739 	    (void *)*fire_arg, (void *)*ire_arg));
2740 
2741 	/* No resolvable route. */
2742 	return (B_FALSE);
2743 }
2744 
2745 
2746 /*
2747  * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp'
2748  * that goes through 'ipif'. As a fallback, a route that goes through
2749  * ipif->ipif_ill can be returned.
2750  */
2751 ire_t *
2752 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp)
2753 {
2754 	ire_t	*ire;
2755 	ire_t	*save_ire = NULL;
2756 	ire_t   *gw_ire;
2757 	irb_t   *irb;
2758 	in6_addr_t v6gw;
2759 	int	match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL;
2760 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
2761 
2762 	ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0,
2763 	    NULL, MATCH_IRE_DEFAULT, ipst);
2764 
2765 	if (ire == NULL)
2766 		return (NULL);
2767 
2768 	irb = ire->ire_bucket;
2769 	ASSERT(irb);
2770 
2771 	IRB_REFHOLD(irb);
2772 	ire_refrele(ire);
2773 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
2774 		if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) ||
2775 		    (ipif->ipif_zoneid != ire->ire_zoneid &&
2776 		    ire->ire_zoneid != ALL_ZONES)) {
2777 			continue;
2778 		}
2779 
2780 		switch (ire->ire_type) {
2781 		case IRE_DEFAULT:
2782 		case IRE_PREFIX:
2783 		case IRE_HOST:
2784 			mutex_enter(&ire->ire_lock);
2785 			v6gw = ire->ire_gateway_addr_v6;
2786 			mutex_exit(&ire->ire_lock);
2787 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
2788 			    IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0,
2789 			    NULL, match_flags, ipst);
2790 
2791 			if (gw_ire != NULL) {
2792 				if (save_ire != NULL) {
2793 					ire_refrele(save_ire);
2794 				}
2795 				IRE_REFHOLD(ire);
2796 				if (gw_ire->ire_ipif == ipif) {
2797 					ire_refrele(gw_ire);
2798 
2799 					IRB_REFRELE(irb);
2800 					return (ire);
2801 				}
2802 				ire_refrele(gw_ire);
2803 				save_ire = ire;
2804 			}
2805 			break;
2806 		case IRE_IF_NORESOLVER:
2807 		case IRE_IF_RESOLVER:
2808 			if (ire->ire_ipif == ipif) {
2809 				if (save_ire != NULL) {
2810 					ire_refrele(save_ire);
2811 				}
2812 				IRE_REFHOLD(ire);
2813 
2814 				IRB_REFRELE(irb);
2815 				return (ire);
2816 			}
2817 			break;
2818 		}
2819 	}
2820 	IRB_REFRELE(irb);
2821 
2822 	return (save_ire);
2823 }
2824 
2825 /*
2826  * This is the implementation of the IPv6 IRE cache lookup procedure.
2827  * Separating the interface from the implementation allows additional
2828  * flexibility when specifying search criteria.
2829  */
2830 static ire_t *
2831 ip6_ctable_lookup_impl(ire_ctable_args_t *margs)
2832 {
2833 	irb_t			*irb_ptr;
2834 	ire_t			*ire;
2835 	ip_stack_t		*ipst = margs->ict_ipst;
2836 
2837 	if ((margs->ict_flags & (MATCH_IRE_SRC | MATCH_IRE_ILL)) &&
2838 	    (margs->ict_ipif == NULL)) {
2839 		return (NULL);
2840 	}
2841 
2842 	irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(
2843 	    *((in6_addr_t *)(margs->ict_addr)),
2844 	    ipst->ips_ip6_cache_table_size)];
2845 	rw_enter(&irb_ptr->irb_lock, RW_READER);
2846 	for (ire = irb_ptr->irb_ire; ire != NULL; ire = ire->ire_next) {
2847 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
2848 			continue;
2849 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
2850 		if (ire_match_args_v6(ire, (in6_addr_t *)margs->ict_addr,
2851 		    &ire->ire_mask_v6, (in6_addr_t *)margs->ict_gateway,
2852 		    margs->ict_type, margs->ict_ipif, margs->ict_zoneid, 0,
2853 		    margs->ict_tsl, margs->ict_flags)) {
2854 			IRE_REFHOLD(ire);
2855 			rw_exit(&irb_ptr->irb_lock);
2856 			return (ire);
2857 		}
2858 	}
2859 
2860 	rw_exit(&irb_ptr->irb_lock);
2861 	return (NULL);
2862 }
2863