xref: /titanic_44/usr/src/uts/common/inet/ip/ip6_ire.c (revision d2afb7a9bb42dc1844db2269e127f1f63707091b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 1990 Mentat Inc.
27  */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 /*
32  * This file contains routines that manipulate Internet Routing Entries (IREs).
33  */
34 #include <sys/types.h>
35 #include <sys/stream.h>
36 #include <sys/stropts.h>
37 #include <sys/ddi.h>
38 #include <sys/cmn_err.h>
39 
40 #include <sys/systm.h>
41 #include <sys/param.h>
42 #include <sys/socket.h>
43 #include <net/if.h>
44 #include <net/route.h>
45 #include <netinet/in.h>
46 #include <net/if_dl.h>
47 #include <netinet/ip6.h>
48 #include <netinet/icmp6.h>
49 
50 #include <inet/common.h>
51 #include <inet/mi.h>
52 #include <inet/ip.h>
53 #include <inet/ip6.h>
54 #include <inet/ip_ndp.h>
55 #include <inet/ip_if.h>
56 #include <inet/ip_ire.h>
57 #include <inet/ipclassifier.h>
58 #include <inet/nd.h>
59 #include <sys/kmem.h>
60 #include <sys/zone.h>
61 
62 #include <sys/tsol/label.h>
63 #include <sys/tsol/tnet.h>
64 
65 static	ire_t	ire_null;
66 
67 static ire_t	*ire_ihandle_lookup_onlink_v6(ire_t *cire);
68 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr,
69     const in6_addr_t *mask, const in6_addr_t *gateway, int type,
70     const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle,
71     const ts_label_t *tsl, int match_flags);
72 static	ire_t	*ire_init_v6(ire_t *, const in6_addr_t *, const in6_addr_t *,
73     const in6_addr_t *, const in6_addr_t *, uint_t *, queue_t *, queue_t *,
74     ushort_t, ipif_t *, const in6_addr_t *, uint32_t, uint32_t, uint_t,
75     const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
76 
77 
78 /*
79  * Initialize the ire that is specific to IPv6 part and call
80  * ire_init_common to finish it.
81  */
82 static ire_t *
83 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask,
84     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
85     uint_t *max_fragp, queue_t *rfq, queue_t *stq, ushort_t type,
86     ipif_t *ipif, const in6_addr_t *v6cmask, uint32_t phandle,
87     uint32_t ihandle, uint_t flags, const iulp_t *ulp_info, tsol_gc_t *gc,
88     tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
89 {
90 
91 	/*
92 	 * Reject IRE security attribute creation/initialization
93 	 * if system is not running in Trusted mode.
94 	 */
95 	if ((gc != NULL || gcgrp != NULL) && !is_system_labeled())
96 		return (NULL);
97 
98 
99 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced);
100 	ire->ire_addr_v6 = *v6addr;
101 
102 	if (v6src_addr != NULL)
103 		ire->ire_src_addr_v6 = *v6src_addr;
104 	if (v6mask != NULL) {
105 		ire->ire_mask_v6 = *v6mask;
106 		ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6);
107 	}
108 	if (v6gateway != NULL)
109 		ire->ire_gateway_addr_v6 = *v6gateway;
110 
111 	if (type == IRE_CACHE && v6cmask != NULL)
112 		ire->ire_cmask_v6 = *v6cmask;
113 
114 	/*
115 	 * Multirouted packets need to have a fragment header added so that
116 	 * the receiver is able to discard duplicates according to their
117 	 * fragment identifier.
118 	 */
119 	if (type == IRE_CACHE && (flags & RTF_MULTIRT)) {
120 		ire->ire_frag_flag = IPH_FRAG_HDR;
121 	}
122 
123 	/* ire_init_common will free the mblks upon encountering any failure */
124 	if (!ire_init_common(ire, max_fragp, NULL, rfq, stq, type, ipif,
125 	    phandle, ihandle, flags, IPV6_VERSION, ulp_info, gc, gcgrp, ipst))
126 		return (NULL);
127 
128 	return (ire);
129 }
130 
131 /*
132  * Similar to ire_create_v6 except that it is called only when
133  * we want to allocate ire as an mblk e.g. we have a external
134  * resolver. Do we need this in IPv6 ?
135  *
136  * IPv6 initializes the ire_nce in ire_add_v6, which expects to
137  * find the ire_nce to be null when it is called. So, although
138  * we have a src_nce parameter (in the interest of matching up with
139  * the argument list of the v4 version), we ignore the src_nce
140  * argument here.
141  */
142 /* ARGSUSED */
143 ire_t *
144 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
145     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
146     nce_t *src_nce, queue_t *rfq, queue_t *stq, ushort_t type,
147     ipif_t *ipif, const in6_addr_t *v6cmask,
148     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
149     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
150 {
151 	ire_t	*ire;
152 	ire_t	*ret_ire;
153 	mblk_t	*mp;
154 
155 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
156 
157 	/* Allocate the new IRE. */
158 	mp = allocb(sizeof (ire_t), BPRI_MED);
159 	if (mp == NULL) {
160 		ip1dbg(("ire_create_mp_v6: alloc failed\n"));
161 		return (NULL);
162 	}
163 
164 	ire = (ire_t *)mp->b_rptr;
165 	mp->b_wptr = (uchar_t *)&ire[1];
166 
167 	/* Start clean. */
168 	*ire = ire_null;
169 	ire->ire_mp = mp;
170 	mp->b_datap->db_type = IRE_DB_TYPE;
171 
172 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
173 	    NULL, rfq, stq, type, ipif, v6cmask, phandle,
174 	    ihandle, flags, ulp_info, gc, gcgrp, ipst);
175 
176 	if (ret_ire == NULL) {
177 		freeb(ire->ire_mp);
178 		return (NULL);
179 	}
180 	return (ire);
181 }
182 
183 /*
184  * ire_create_v6 is called to allocate and initialize a new IRE.
185  *
186  * NOTE : This is called as writer sometimes though not required
187  * by this function.
188  *
189  * See comments above ire_create_mp_v6() for the rationale behind the
190  * unused src_nce argument.
191  */
192 /* ARGSUSED */
193 ire_t *
194 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
195     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
196     uint_t *max_fragp, nce_t *src_nce, queue_t *rfq, queue_t *stq,
197     ushort_t type, ipif_t *ipif, const in6_addr_t *v6cmask,
198     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info,
199     tsol_gc_t *gc, tsol_gcgrp_t *gcgrp, ip_stack_t *ipst)
200 {
201 	ire_t	*ire;
202 	ire_t	*ret_ire;
203 
204 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
205 
206 	ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
207 	if (ire == NULL) {
208 		ip1dbg(("ire_create_v6: alloc failed\n"));
209 		return (NULL);
210 	}
211 	*ire = ire_null;
212 
213 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
214 	    max_fragp, rfq, stq, type, ipif, v6cmask, phandle,
215 	    ihandle, flags, ulp_info, gc, gcgrp, ipst);
216 
217 	if (ret_ire == NULL) {
218 		kmem_cache_free(ire_cache, ire);
219 		return (NULL);
220 	}
221 	ASSERT(ret_ire == ire);
222 	return (ire);
223 }
224 
225 /*
226  * Find an IRE_INTERFACE for the multicast group.
227  * Allows different routes for multicast addresses
228  * in the unicast routing table (akin to FF::0/8 but could be more specific)
229  * which point at different interfaces. This is used when IPV6_MULTICAST_IF
230  * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't
231  * specify the interface to join on.
232  *
233  * Supports link-local addresses by following the ipif/ill when recursing.
234  */
235 ire_t *
236 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid, ip_stack_t *ipst)
237 {
238 	ire_t	*ire;
239 	ipif_t	*ipif = NULL;
240 	int	match_flags = MATCH_IRE_TYPE;
241 	in6_addr_t gw_addr_v6;
242 
243 	ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL,
244 	    zoneid, 0, NULL, MATCH_IRE_DEFAULT, ipst);
245 
246 	/* We search a resolvable ire in case of multirouting. */
247 	if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) {
248 		ire_t *cire = NULL;
249 		/*
250 		 * If the route is not resolvable, the looked up ire
251 		 * may be changed here. In that case, ire_multirt_lookup()
252 		 * IRE_REFRELE the original ire and change it.
253 		 */
254 		(void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW,
255 		    NULL, ipst);
256 		if (cire != NULL)
257 			ire_refrele(cire);
258 	}
259 	if (ire == NULL)
260 		return (NULL);
261 	/*
262 	 * Make sure we follow ire_ipif.
263 	 *
264 	 * We need to determine the interface route through
265 	 * which the gateway will be reached. We don't really
266 	 * care which interface is picked if the interface is
267 	 * part of a group.
268 	 */
269 	if (ire->ire_ipif != NULL) {
270 		ipif = ire->ire_ipif;
271 		match_flags |= MATCH_IRE_ILL_GROUP;
272 	}
273 
274 	switch (ire->ire_type) {
275 	case IRE_DEFAULT:
276 	case IRE_PREFIX:
277 	case IRE_HOST:
278 		mutex_enter(&ire->ire_lock);
279 		gw_addr_v6 = ire->ire_gateway_addr_v6;
280 		mutex_exit(&ire->ire_lock);
281 		ire_refrele(ire);
282 		ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0,
283 		    IRE_INTERFACE, ipif, NULL, zoneid, 0,
284 		    NULL, match_flags, ipst);
285 		return (ire);
286 	case IRE_IF_NORESOLVER:
287 	case IRE_IF_RESOLVER:
288 		return (ire);
289 	default:
290 		ire_refrele(ire);
291 		return (NULL);
292 	}
293 }
294 
295 /*
296  * Return any local address.  We use this to target ourselves
297  * when the src address was specified as 'default'.
298  * Preference for IRE_LOCAL entries.
299  */
300 ire_t *
301 ire_lookup_local_v6(zoneid_t zoneid, ip_stack_t *ipst)
302 {
303 	ire_t	*ire;
304 	irb_t	*irb;
305 	ire_t	*maybe = NULL;
306 	int i;
307 
308 	for (i = 0; i < ipst->ips_ip6_cache_table_size;  i++) {
309 		irb = &ipst->ips_ip_cache_table_v6[i];
310 		if (irb->irb_ire == NULL)
311 			continue;
312 		rw_enter(&irb->irb_lock, RW_READER);
313 		for (ire = irb->irb_ire; ire; ire = ire->ire_next) {
314 			if ((ire->ire_marks & IRE_MARK_CONDEMNED) ||
315 			    ire->ire_zoneid != zoneid &&
316 			    ire->ire_zoneid != ALL_ZONES)
317 				continue;
318 			switch (ire->ire_type) {
319 			case IRE_LOOPBACK:
320 				if (maybe == NULL) {
321 					IRE_REFHOLD(ire);
322 					maybe = ire;
323 				}
324 				break;
325 			case IRE_LOCAL:
326 				if (maybe != NULL) {
327 					ire_refrele(maybe);
328 				}
329 				IRE_REFHOLD(ire);
330 				rw_exit(&irb->irb_lock);
331 				return (ire);
332 			}
333 		}
334 		rw_exit(&irb->irb_lock);
335 	}
336 	return (maybe);
337 }
338 
339 /*
340  * This function takes a mask and returns number of bits set in the
341  * mask (the represented prefix length).  Assumes a contiguous mask.
342  */
343 int
344 ip_mask_to_plen_v6(const in6_addr_t *v6mask)
345 {
346 	int		bits;
347 	int		plen = IPV6_ABITS;
348 	int		i;
349 
350 	for (i = 3; i >= 0; i--) {
351 		if (v6mask->s6_addr32[i] == 0) {
352 			plen -= 32;
353 			continue;
354 		}
355 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
356 		if (bits == 0)
357 			break;
358 		plen -= bits;
359 	}
360 
361 	return (plen);
362 }
363 
364 /*
365  * Convert a prefix length to the mask for that prefix.
366  * Returns the argument bitmask.
367  */
368 in6_addr_t *
369 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
370 {
371 	uint32_t *ptr;
372 
373 	if (plen < 0 || plen > IPV6_ABITS)
374 		return (NULL);
375 	*bitmask = ipv6_all_zeros;
376 
377 	ptr = (uint32_t *)bitmask;
378 	while (plen > 32) {
379 		*ptr++ = 0xffffffffU;
380 		plen -= 32;
381 	}
382 	*ptr = htonl(0xffffffffU << (32 - plen));
383 	return (bitmask);
384 }
385 
386 /*
387  * Add a fully initialized IRE to an appropriate
388  * table based on ire_type.
389  *
390  * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST and
391  * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT.
392  *
393  * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK
394  * and IRE_CACHE.
395  *
396  * NOTE : This function is called as writer though not required
397  * by this function.
398  */
399 int
400 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func)
401 {
402 	ire_t	*ire1;
403 	int	mask_table_index;
404 	irb_t	*irb_ptr;
405 	ire_t	**irep;
406 	int	flags;
407 	ire_t	*pire = NULL;
408 	ill_t	*stq_ill;
409 	boolean_t	ndp_g_lock_held = B_FALSE;
410 	ire_t	*ire = *ire_p;
411 	int	error;
412 	ip_stack_t	*ipst = ire->ire_ipst;
413 
414 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
415 	ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */
416 	ASSERT(ire->ire_nce == NULL);
417 
418 	/* Find the appropriate list head. */
419 	switch (ire->ire_type) {
420 	case IRE_HOST:
421 		ire->ire_mask_v6 = ipv6_all_ones;
422 		ire->ire_masklen = IPV6_ABITS;
423 		if ((ire->ire_flags & RTF_SETSRC) == 0)
424 			ire->ire_src_addr_v6 = ipv6_all_zeros;
425 		break;
426 	case IRE_CACHE:
427 	case IRE_LOCAL:
428 	case IRE_LOOPBACK:
429 		ire->ire_mask_v6 = ipv6_all_ones;
430 		ire->ire_masklen = IPV6_ABITS;
431 		break;
432 	case IRE_PREFIX:
433 		if ((ire->ire_flags & RTF_SETSRC) == 0)
434 			ire->ire_src_addr_v6 = ipv6_all_zeros;
435 		break;
436 	case IRE_DEFAULT:
437 		if ((ire->ire_flags & RTF_SETSRC) == 0)
438 			ire->ire_src_addr_v6 = ipv6_all_zeros;
439 		break;
440 	case IRE_IF_RESOLVER:
441 	case IRE_IF_NORESOLVER:
442 		break;
443 	default:
444 		printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n",
445 		    (void *)ire, ire->ire_type);
446 		ire_delete(ire);
447 		*ire_p = NULL;
448 		return (EINVAL);
449 	}
450 
451 	/* Make sure the address is properly masked. */
452 	V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6);
453 
454 	if ((ire->ire_type & IRE_CACHETABLE) == 0) {
455 		/* IRE goes into Forward Table */
456 		mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6);
457 		if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) ==
458 		    NULL) {
459 			irb_t *ptr;
460 			int i;
461 
462 			ptr = (irb_t *)mi_zalloc((
463 			    ipst->ips_ip6_ftable_hash_size * sizeof (irb_t)));
464 			if (ptr == NULL) {
465 				ire_delete(ire);
466 				*ire_p = NULL;
467 				return (ENOMEM);
468 			}
469 			for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
470 				rw_init(&ptr[i].irb_lock, NULL,
471 				    RW_DEFAULT, NULL);
472 			}
473 			mutex_enter(&ipst->ips_ire_ft_init_lock);
474 			if (ipst->ips_ip_forwarding_table_v6[
475 			    mask_table_index] == NULL) {
476 				ipst->ips_ip_forwarding_table_v6[
477 				    mask_table_index] = ptr;
478 				mutex_exit(&ipst->ips_ire_ft_init_lock);
479 			} else {
480 				/*
481 				 * Some other thread won the race in
482 				 * initializing the forwarding table at the
483 				 * same index.
484 				 */
485 				mutex_exit(&ipst->ips_ire_ft_init_lock);
486 				for (i = 0; i < ipst->ips_ip6_ftable_hash_size;
487 				    i++) {
488 					rw_destroy(&ptr[i].irb_lock);
489 				}
490 				mi_free(ptr);
491 			}
492 		}
493 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][
494 		    IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6,
495 		    ipst->ips_ip6_ftable_hash_size)]);
496 	} else {
497 		irb_ptr = &(ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(
498 		    ire->ire_addr_v6, ipst->ips_ip6_cache_table_size)]);
499 	}
500 	/*
501 	 * For xresolv interfaces (v6 interfaces with an external
502 	 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6
503 	 * are unable to prevent the deletion of the interface route
504 	 * while adding an IRE_CACHE for an on-link destination
505 	 * in the IRE_IF_RESOLVER case, since the ire has to go to
506 	 * the external resolver and return. We can't do a REFHOLD on the
507 	 * associated interface ire for fear of the message being freed
508 	 * if the external resolver can't resolve the address.
509 	 * Here we look up the interface ire in the forwarding table
510 	 * and make sure that the interface route has not been deleted.
511 	 */
512 	if (ire->ire_type == IRE_CACHE &&
513 	    IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) &&
514 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) &&
515 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) {
516 
517 		pire = ire_ihandle_lookup_onlink_v6(ire);
518 		if (pire == NULL) {
519 			ire_delete(ire);
520 			*ire_p = NULL;
521 			return (EINVAL);
522 		}
523 		/* Prevent pire from getting deleted */
524 		IRB_REFHOLD(pire->ire_bucket);
525 		/* Has it been removed already? */
526 		if (pire->ire_marks & IRE_MARK_CONDEMNED) {
527 			IRB_REFRELE(pire->ire_bucket);
528 			ire_refrele(pire);
529 			ire_delete(ire);
530 			*ire_p = NULL;
531 			return (EINVAL);
532 		}
533 	}
534 
535 	flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW);
536 	/*
537 	 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check
538 	 * for duplicates because :
539 	 *
540 	 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be
541 	 *    pointing at different ills. A real duplicate is
542 	 *    a match on both ire_ipif and ire_stq.
543 	 *
544 	 * 2) We could have multiple packets trying to create
545 	 *    an IRE_CACHE for the same ill.
546 	 *
547 	 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants
548 	 * to go out on a particular ill. Rather than looking at the
549 	 * packet, we depend on the above for MATCH_IRE_ILL here.
550 	 *
551 	 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have
552 	 * multiple IRE_CACHES for an ill for the same destination
553 	 * with various scoped addresses i.e represented by ipifs.
554 	 *
555 	 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES.
556 	 */
557 	if (ire->ire_ipif != NULL)
558 		flags |= MATCH_IRE_IPIF;
559 	/*
560 	 * If we are creating hidden ires, make sure we search on
561 	 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are
562 	 * searching for duplicates below. Otherwise we could
563 	 * potentially find an IRE on some other interface
564 	 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We
565 	 * shouldn't do this as this will lead to an infinite loop as
566 	 * eventually we need an hidden ire for this packet to go
567 	 * out. MATCH_IRE_ILL is already marked above.
568 	 */
569 	if (ire->ire_marks & IRE_MARK_HIDDEN) {
570 		ASSERT(ire->ire_type == IRE_CACHE);
571 		flags |= MATCH_IRE_MARK_HIDDEN;
572 	}
573 
574 	/*
575 	 * Start the atomic add of the ire. Grab the ill locks,
576 	 * ill_g_usesrc_lock and the bucket lock. Check for condemned.
577 	 * To avoid lock order problems, get the ndp6.ndp_g_lock now itself.
578 	 */
579 	if (ire->ire_type == IRE_CACHE) {
580 		mutex_enter(&ipst->ips_ndp6->ndp_g_lock);
581 		ndp_g_lock_held = B_TRUE;
582 	}
583 
584 	/*
585 	 * If ipif or ill is changing ire_atomic_start() may queue the
586 	 * request and return EINPROGRESS.
587 	 */
588 
589 	error = ire_atomic_start(irb_ptr, ire, q, mp, func);
590 	if (error != 0) {
591 		if (ndp_g_lock_held)
592 			mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
593 		/*
594 		 * We don't know whether it is a valid ipif or not.
595 		 * So, set it to NULL. This assumes that the ire has not added
596 		 * a reference to the ipif.
597 		 */
598 		ire->ire_ipif = NULL;
599 		ire_delete(ire);
600 		if (pire != NULL) {
601 			IRB_REFRELE(pire->ire_bucket);
602 			ire_refrele(pire);
603 		}
604 		*ire_p = NULL;
605 		return (error);
606 	}
607 	/*
608 	 * To avoid creating ires having stale values for the ire_max_frag
609 	 * we get the latest value atomically here. For more details
610 	 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE
611 	 * in ip_rput_dlpi_writer
612 	 */
613 	if (ire->ire_max_fragp == NULL) {
614 		if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6))
615 			ire->ire_max_frag = ire->ire_ipif->ipif_mtu;
616 		else
617 			ire->ire_max_frag = pire->ire_max_frag;
618 	} else {
619 		uint_t  max_frag;
620 
621 		max_frag = *ire->ire_max_fragp;
622 		ire->ire_max_fragp = NULL;
623 		ire->ire_max_frag = max_frag;
624 	}
625 
626 	/*
627 	 * Atomically check for duplicate and insert in the table.
628 	 */
629 	for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
630 		if (ire1->ire_marks & IRE_MARK_CONDEMNED)
631 			continue;
632 
633 		if (ire->ire_type == IRE_CACHE) {
634 			/*
635 			 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES.
636 			 * As ire_ipif and ire_stq could point to two
637 			 * different ills, we can't pass just ire_ipif to
638 			 * ire_match_args and get a match on both ills.
639 			 * This is just needed for duplicate checks here and
640 			 * so we don't add an extra argument to
641 			 * ire_match_args for this. Do it locally.
642 			 *
643 			 * NOTE : Currently there is no part of the code
644 			 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL
645 			 * match for IRE_CACHEs. Thus we don't want to
646 			 * extend the arguments to ire_match_args_v6.
647 			 */
648 			if (ire1->ire_stq != ire->ire_stq)
649 				continue;
650 			/*
651 			 * Multiroute IRE_CACHEs for a given destination can
652 			 * have the same ire_ipif, typically if their source
653 			 * address is forced using RTF_SETSRC, and the same
654 			 * send-to queue. We differentiate them using the parent
655 			 * handle.
656 			 */
657 			if ((ire1->ire_flags & RTF_MULTIRT) &&
658 			    (ire->ire_flags & RTF_MULTIRT) &&
659 			    (ire1->ire_phandle != ire->ire_phandle))
660 				continue;
661 		}
662 		if (ire1->ire_zoneid != ire->ire_zoneid)
663 			continue;
664 		if (ire_match_args_v6(ire1, &ire->ire_addr_v6,
665 		    &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
666 		    ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, NULL,
667 		    flags)) {
668 			/*
669 			 * Return the old ire after doing a REFHOLD.
670 			 * As most of the callers continue to use the IRE
671 			 * after adding, we return a held ire. This will
672 			 * avoid a lookup in the caller again. If the callers
673 			 * don't want to use it, they need to do a REFRELE.
674 			 */
675 			ip1dbg(("found dup ire existing %p new %p",
676 			    (void *)ire1, (void *)ire));
677 			IRE_REFHOLD(ire1);
678 			if (ndp_g_lock_held)
679 				mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
680 			ire_atomic_end(irb_ptr, ire);
681 			ire_delete(ire);
682 			if (pire != NULL) {
683 				/*
684 				 * Assert that it is
685 				 * not yet removed from the list.
686 				 */
687 				ASSERT(pire->ire_ptpn != NULL);
688 				IRB_REFRELE(pire->ire_bucket);
689 				ire_refrele(pire);
690 			}
691 			*ire_p = ire1;
692 			return (0);
693 		}
694 	}
695 	if (ire->ire_type == IRE_CACHE) {
696 		in6_addr_t gw_addr_v6;
697 		ill_t	*ill = ire_to_ill(ire);
698 		char	buf[INET6_ADDRSTRLEN];
699 		nce_t	*nce;
700 
701 		/*
702 		 * All IRE_CACHE types must have a nce.  If this is
703 		 * not the case the entry will not be added. We need
704 		 * to make sure that if somebody deletes the nce
705 		 * after we looked up, they will find this ire and
706 		 * delete the ire. To delete this ire one needs the
707 		 * bucket lock which we are still holding here. So,
708 		 * even if the nce gets deleted after we looked up,
709 		 * this ire  will get deleted.
710 		 *
711 		 * NOTE : Don't need the ire_lock for accessing
712 		 * ire_gateway_addr_v6 as it is appearing first
713 		 * time on the list and rts_setgwr_v6 could not
714 		 * be changing this.
715 		 */
716 		gw_addr_v6 = ire->ire_gateway_addr_v6;
717 		if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) {
718 			nce = ndp_lookup_v6(ill, &ire->ire_addr_v6, B_TRUE);
719 		} else {
720 			nce = ndp_lookup_v6(ill, &gw_addr_v6, B_TRUE);
721 		}
722 		if (nce == NULL)
723 			goto failed;
724 
725 		/* Pair of refhold, refrele just to get the tracing right */
726 		NCE_REFHOLD_TO_REFHOLD_NOTR(nce);
727 		/*
728 		 * Atomically make sure that new IREs don't point
729 		 * to an NCE that is logically deleted (CONDEMNED).
730 		 * ndp_delete() first marks the NCE CONDEMNED.
731 		 * This ensures that the nce_refcnt won't increase
732 		 * due to new nce_lookups or due to addition of new IREs
733 		 * pointing to this NCE. Then ndp_delete() cleans up
734 		 * existing references. If we don't do it atomically here,
735 		 * ndp_delete() -> nce_ire_delete() will not be able to
736 		 * clean up the IRE list completely, and the nce_refcnt
737 		 * won't go down to zero.
738 		 */
739 		mutex_enter(&nce->nce_lock);
740 		if (ill->ill_flags & ILLF_XRESOLV) {
741 			/*
742 			 * If we used an external resolver, we may not
743 			 * have gone through neighbor discovery to get here.
744 			 * Must update the nce_state before the next check.
745 			 */
746 			if (nce->nce_state == ND_INCOMPLETE)
747 				nce->nce_state = ND_REACHABLE;
748 		}
749 		if (nce->nce_state == ND_INCOMPLETE ||
750 		    (nce->nce_flags & NCE_F_CONDEMNED) ||
751 		    (nce->nce_state == ND_UNREACHABLE)) {
752 failed:
753 			if (ndp_g_lock_held)
754 				mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
755 			if (nce != NULL)
756 				mutex_exit(&nce->nce_lock);
757 			ire_atomic_end(irb_ptr, ire);
758 			ip1dbg(("ire_add_v6: No nce for dst %s \n",
759 			    inet_ntop(AF_INET6, &ire->ire_addr_v6,
760 			    buf, sizeof (buf))));
761 			ire_delete(ire);
762 			if (pire != NULL) {
763 				/*
764 				 * Assert that it is
765 				 * not yet removed from the list.
766 				 */
767 				ASSERT(pire->ire_ptpn != NULL);
768 				IRB_REFRELE(pire->ire_bucket);
769 				ire_refrele(pire);
770 			}
771 			if (nce != NULL)
772 				NCE_REFRELE_NOTR(nce);
773 			*ire_p = NULL;
774 			return (EINVAL);
775 		} else {
776 			ire->ire_nce = nce;
777 		}
778 		mutex_exit(&nce->nce_lock);
779 	}
780 	/*
781 	 * Find the first entry that matches ire_addr - provides
782 	 * tail insertion. *irep will be null if no match.
783 	 */
784 	irep = (ire_t **)irb_ptr;
785 	while ((ire1 = *irep) != NULL &&
786 	    !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6))
787 		irep = &ire1->ire_next;
788 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
789 
790 	if (*irep != NULL) {
791 		/*
792 		 * Find the last ire which matches ire_addr_v6.
793 		 * Needed to do tail insertion among entries with the same
794 		 * ire_addr_v6.
795 		 */
796 		while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6,
797 		    &ire1->ire_addr_v6)) {
798 			irep = &ire1->ire_next;
799 			ire1 = *irep;
800 			if (ire1 == NULL)
801 				break;
802 		}
803 	}
804 
805 	if (ire->ire_type == IRE_DEFAULT) {
806 		/*
807 		 * We keep a count of default gateways which is used when
808 		 * assigning them as routes.
809 		 */
810 		ipst->ips_ipv6_ire_default_count++;
811 		ASSERT(ipst->ips_ipv6_ire_default_count != 0); /* Wraparound */
812 	}
813 	/* Insert at *irep */
814 	ire1 = *irep;
815 	if (ire1 != NULL)
816 		ire1->ire_ptpn = &ire->ire_next;
817 	ire->ire_next = ire1;
818 	/* Link the new one in. */
819 	ire->ire_ptpn = irep;
820 	/*
821 	 * ire_walk routines de-reference ire_next without holding
822 	 * a lock. Before we point to the new ire, we want to make
823 	 * sure the store that sets the ire_next of the new ire
824 	 * reaches global visibility, so that ire_walk routines
825 	 * don't see a truncated list of ires i.e if the ire_next
826 	 * of the new ire gets set after we do "*irep = ire" due
827 	 * to re-ordering, the ire_walk thread will see a NULL
828 	 * once it accesses the ire_next of the new ire.
829 	 * membar_producer() makes sure that the following store
830 	 * happens *after* all of the above stores.
831 	 */
832 	membar_producer();
833 	*irep = ire;
834 	ire->ire_bucket = irb_ptr;
835 	/*
836 	 * We return a bumped up IRE above. Keep it symmetrical
837 	 * so that the callers will always have to release. This
838 	 * helps the callers of this function because they continue
839 	 * to use the IRE after adding and hence they don't have to
840 	 * lookup again after we return the IRE.
841 	 *
842 	 * NOTE : We don't have to use atomics as this is appearing
843 	 * in the list for the first time and no one else can bump
844 	 * up the reference count on this yet.
845 	 */
846 	IRE_REFHOLD_LOCKED(ire);
847 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted);
848 	irb_ptr->irb_ire_cnt++;
849 	if (ire->ire_marks & IRE_MARK_TEMPORARY)
850 		irb_ptr->irb_tmp_ire_cnt++;
851 
852 	if (ire->ire_ipif != NULL) {
853 		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ire->ire_ipif,
854 		    (char *), "ire", (void *), ire);
855 		ire->ire_ipif->ipif_ire_cnt++;
856 		if (ire->ire_stq != NULL) {
857 			stq_ill = (ill_t *)ire->ire_stq->q_ptr;
858 			DTRACE_PROBE3(ill__incr__cnt, (ill_t *), stq_ill,
859 			    (char *), "ire", (void *), ire);
860 			stq_ill->ill_ire_cnt++;
861 		}
862 	} else {
863 		ASSERT(ire->ire_stq == NULL);
864 	}
865 
866 	if (ndp_g_lock_held)
867 		mutex_exit(&ipst->ips_ndp6->ndp_g_lock);
868 	ire_atomic_end(irb_ptr, ire);
869 
870 	if (pire != NULL) {
871 		/* Assert that it is not removed from the list yet */
872 		ASSERT(pire->ire_ptpn != NULL);
873 		IRB_REFRELE(pire->ire_bucket);
874 		ire_refrele(pire);
875 	}
876 
877 	if (ire->ire_type != IRE_CACHE) {
878 		/*
879 		 * For ire's with with host mask see if there is an entry
880 		 * in the cache. If there is one flush the whole cache as
881 		 * there might be multiple entries due to RTF_MULTIRT (CGTP).
882 		 * If no entry is found than there is no need to flush the
883 		 * cache.
884 		 */
885 
886 		if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) {
887 			ire_t *lire;
888 			lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL,
889 			    IRE_CACHE, NULL, ALL_ZONES, NULL, MATCH_IRE_TYPE,
890 			    ipst);
891 			if (lire != NULL) {
892 				ire_refrele(lire);
893 				ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
894 			}
895 		} else {
896 			ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
897 		}
898 	}
899 
900 	*ire_p = ire;
901 	return (0);
902 }
903 
904 /*
905  * Search for all HOST REDIRECT routes that are
906  * pointing at the specified gateway and
907  * delete them. This routine is called only
908  * when a default gateway is going away.
909  */
910 static void
911 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst)
912 {
913 	irb_t *irb_ptr;
914 	irb_t *irb;
915 	ire_t *ire;
916 	in6_addr_t gw_addr_v6;
917 	int i;
918 
919 	/* get the hash table for HOST routes */
920 	irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)];
921 	if (irb_ptr == NULL)
922 		return;
923 	for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) {
924 		irb = &irb_ptr[i];
925 		IRB_REFHOLD(irb);
926 		for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
927 			if (!(ire->ire_flags & RTF_DYNAMIC))
928 				continue;
929 			mutex_enter(&ire->ire_lock);
930 			gw_addr_v6 = ire->ire_gateway_addr_v6;
931 			mutex_exit(&ire->ire_lock);
932 			if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway))
933 				ire_delete(ire);
934 		}
935 		IRB_REFRELE(irb);
936 	}
937 }
938 
939 /*
940  * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart
941  * of ip_ire_clookup_and_delete. The difference being this function does not
942  * return any value. IPv6 processing of a gratuitous ARP, as it stands, is
943  * different than IPv4 in that, regardless of the presence of a cache entry
944  * for this address, an ire_walk_v6 is done. Another difference is that unlike
945  * in the case of IPv4 this does not take an ipif_t argument, since it is only
946  * called by ip_arp_news and the match is always only on the address.
947  */
948 void
949 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr, ip_stack_t *ipst)
950 {
951 	irb_t		*irb;
952 	ire_t		*cire;
953 	boolean_t	found = B_FALSE;
954 
955 	irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
956 	    ipst->ips_ip6_cache_table_size)];
957 	IRB_REFHOLD(irb);
958 	for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) {
959 		if (cire->ire_marks & IRE_MARK_CONDEMNED)
960 			continue;
961 		if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) {
962 
963 			/* This signifies start of a match */
964 			if (!found)
965 				found = B_TRUE;
966 			if (cire->ire_type == IRE_CACHE) {
967 				if (cire->ire_nce != NULL)
968 					ndp_delete(cire->ire_nce);
969 				ire_delete_v6(cire);
970 			}
971 		/* End of the match */
972 		} else if (found)
973 			break;
974 	}
975 	IRB_REFRELE(irb);
976 }
977 
978 /*
979  * Delete the specified IRE.
980  * All calls should use ire_delete().
981  * Sometimes called as writer though not required by this function.
982  *
983  * NOTE : This function is called only if the ire was added
984  * in the list.
985  */
986 void
987 ire_delete_v6(ire_t *ire)
988 {
989 	in6_addr_t gw_addr_v6;
990 	ip_stack_t	*ipst = ire->ire_ipst;
991 
992 	ASSERT(ire->ire_refcnt >= 1);
993 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
994 
995 	if (ire->ire_type != IRE_CACHE)
996 		ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
997 	if (ire->ire_type == IRE_DEFAULT) {
998 		/*
999 		 * when a default gateway is going away
1000 		 * delete all the host redirects pointing at that
1001 		 * gateway.
1002 		 */
1003 		mutex_enter(&ire->ire_lock);
1004 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1005 		mutex_exit(&ire->ire_lock);
1006 		ire_delete_host_redirects_v6(&gw_addr_v6, ipst);
1007 	}
1008 }
1009 
1010 /*
1011  * ire_walk routine to delete all IRE_CACHE and IRE_HOST type redirect
1012  * entries.
1013  */
1014 /*ARGSUSED1*/
1015 void
1016 ire_delete_cache_v6(ire_t *ire, char *arg)
1017 {
1018 	char    addrstr1[INET6_ADDRSTRLEN];
1019 	char    addrstr2[INET6_ADDRSTRLEN];
1020 
1021 	if ((ire->ire_type & IRE_CACHE) ||
1022 	    (ire->ire_flags & RTF_DYNAMIC)) {
1023 		ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n",
1024 		    inet_ntop(AF_INET6, &ire->ire_addr_v6,
1025 		    addrstr1, sizeof (addrstr1)),
1026 		    ire->ire_type,
1027 		    inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6,
1028 		    addrstr2, sizeof (addrstr2))));
1029 		ire_delete(ire);
1030 	}
1031 
1032 }
1033 
1034 /*
1035  * ire_walk routine to delete all IRE_CACHE/IRE_HOST type redirect entries
1036  * that have a given gateway address.
1037  */
1038 void
1039 ire_delete_cache_gw_v6(ire_t *ire, char *addr)
1040 {
1041 	in6_addr_t	*gw_addr = (in6_addr_t *)addr;
1042 	char		buf1[INET6_ADDRSTRLEN];
1043 	char		buf2[INET6_ADDRSTRLEN];
1044 	in6_addr_t	ire_gw_addr_v6;
1045 
1046 	if (!(ire->ire_type & IRE_CACHE) &&
1047 	    !(ire->ire_flags & RTF_DYNAMIC))
1048 		return;
1049 
1050 	mutex_enter(&ire->ire_lock);
1051 	ire_gw_addr_v6 = ire->ire_gateway_addr_v6;
1052 	mutex_exit(&ire->ire_lock);
1053 
1054 	if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) {
1055 		ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n",
1056 		    inet_ntop(AF_INET6, &ire->ire_src_addr_v6,
1057 		    buf1, sizeof (buf1)),
1058 		    ire->ire_type,
1059 		    inet_ntop(AF_INET6, &ire_gw_addr_v6,
1060 		    buf2, sizeof (buf2))));
1061 		ire_delete(ire);
1062 	}
1063 }
1064 
1065 /*
1066  * Remove all IRE_CACHE entries that match
1067  * the ire specified.  (Sometimes called
1068  * as writer though not required by this function.)
1069  *
1070  * The flag argument indicates if the
1071  * flush request is due to addition
1072  * of new route (IRE_FLUSH_ADD) or deletion of old
1073  * route (IRE_FLUSH_DELETE).
1074  *
1075  * This routine takes only the IREs from the forwarding
1076  * table and flushes the corresponding entries from
1077  * the cache table.
1078  *
1079  * When flushing due to the deletion of an old route, it
1080  * just checks the cache handles (ire_phandle and ire_ihandle) and
1081  * deletes the ones that match.
1082  *
1083  * When flushing due to the creation of a new route, it checks
1084  * if a cache entry's address matches the one in the IRE and
1085  * that the cache entry's parent has a less specific mask than the
1086  * one in IRE. The destination of such a cache entry could be the
1087  * gateway for other cache entries, so we need to flush those as
1088  * well by looking for gateway addresses matching the IRE's address.
1089  */
1090 void
1091 ire_flush_cache_v6(ire_t *ire, int flag)
1092 {
1093 	int i;
1094 	ire_t *cire;
1095 	irb_t *irb;
1096 	ip_stack_t	*ipst = ire->ire_ipst;
1097 
1098 	if (ire->ire_type & IRE_CACHE)
1099 		return;
1100 
1101 	/*
1102 	 * If a default is just created, there is no point
1103 	 * in going through the cache, as there will not be any
1104 	 * cached ires.
1105 	 */
1106 	if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD)
1107 		return;
1108 	if (flag == IRE_FLUSH_ADD) {
1109 		/*
1110 		 * This selective flush is
1111 		 * due to the addition of
1112 		 * new IRE.
1113 		 */
1114 		for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
1115 			irb = &ipst->ips_ip_cache_table_v6[i];
1116 			if ((cire = irb->irb_ire) == NULL)
1117 				continue;
1118 			IRB_REFHOLD(irb);
1119 			for (cire = irb->irb_ire; cire != NULL;
1120 			    cire = cire->ire_next) {
1121 				if (cire->ire_type != IRE_CACHE)
1122 					continue;
1123 				/*
1124 				 * If 'cire' belongs to the same subnet
1125 				 * as the new ire being added, and 'cire'
1126 				 * is derived from a prefix that is less
1127 				 * specific than the new ire being added,
1128 				 * we need to flush 'cire'; for instance,
1129 				 * when a new interface comes up.
1130 				 */
1131 				if ((V6_MASK_EQ_2(cire->ire_addr_v6,
1132 				    ire->ire_mask_v6, ire->ire_addr_v6) &&
1133 				    (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <=
1134 				    ire->ire_masklen))) {
1135 					ire_delete(cire);
1136 					continue;
1137 				}
1138 				/*
1139 				 * This is the case when the ire_gateway_addr
1140 				 * of 'cire' belongs to the same subnet as
1141 				 * the new ire being added.
1142 				 * Flushing such ires is sometimes required to
1143 				 * avoid misrouting: say we have a machine with
1144 				 * two interfaces (I1 and I2), a default router
1145 				 * R on the I1 subnet, and a host route to an
1146 				 * off-link destination D with a gateway G on
1147 				 * the I2 subnet.
1148 				 * Under normal operation, we will have an
1149 				 * on-link cache entry for G and an off-link
1150 				 * cache entry for D with G as ire_gateway_addr,
1151 				 * traffic to D will reach its destination
1152 				 * through gateway G.
1153 				 * If the administrator does 'ifconfig I2 down',
1154 				 * the cache entries for D and G will be
1155 				 * flushed. However, G will now be resolved as
1156 				 * an off-link destination using R (the default
1157 				 * router) as gateway. Then D will also be
1158 				 * resolved as an off-link destination using G
1159 				 * as gateway - this behavior is due to
1160 				 * compatibility reasons, see comment in
1161 				 * ire_ihandle_lookup_offlink(). Traffic to D
1162 				 * will go to the router R and probably won't
1163 				 * reach the destination.
1164 				 * The administrator then does 'ifconfig I2 up'.
1165 				 * Since G is on the I2 subnet, this routine
1166 				 * will flush its cache entry. It must also
1167 				 * flush the cache entry for D, otherwise
1168 				 * traffic will stay misrouted until the IRE
1169 				 * times out.
1170 				 */
1171 				if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6,
1172 				    ire->ire_mask_v6, ire->ire_addr_v6)) {
1173 					ire_delete(cire);
1174 					continue;
1175 				}
1176 			}
1177 			IRB_REFRELE(irb);
1178 		}
1179 	} else {
1180 		/*
1181 		 * delete the cache entries based on
1182 		 * handle in the IRE as this IRE is
1183 		 * being deleted/changed.
1184 		 */
1185 		for (i = 0; i < ipst->ips_ip6_cache_table_size; i++) {
1186 			irb = &ipst->ips_ip_cache_table_v6[i];
1187 			if ((cire = irb->irb_ire) == NULL)
1188 				continue;
1189 			IRB_REFHOLD(irb);
1190 			for (cire = irb->irb_ire; cire != NULL;
1191 			    cire = cire->ire_next) {
1192 				if (cire->ire_type != IRE_CACHE)
1193 					continue;
1194 				if ((cire->ire_phandle == 0 ||
1195 				    cire->ire_phandle != ire->ire_phandle) &&
1196 				    (cire->ire_ihandle == 0 ||
1197 				    cire->ire_ihandle != ire->ire_ihandle))
1198 					continue;
1199 				ire_delete(cire);
1200 			}
1201 			IRB_REFRELE(irb);
1202 		}
1203 	}
1204 }
1205 
1206 /*
1207  * Matches the arguments passed with the values in the ire.
1208  *
1209  * Note: for match types that match using "ipif" passed in, ipif
1210  * must be checked for non-NULL before calling this routine.
1211  */
1212 static boolean_t
1213 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask,
1214     const in6_addr_t *gateway, int type, const ipif_t *ipif, zoneid_t zoneid,
1215     uint32_t ihandle, const ts_label_t *tsl, int match_flags)
1216 {
1217 	in6_addr_t masked_addr;
1218 	in6_addr_t gw_addr_v6;
1219 	ill_t *ire_ill = NULL, *dst_ill;
1220 	ill_t *ipif_ill = NULL;
1221 	ill_group_t *ire_ill_group = NULL;
1222 	ill_group_t *ipif_ill_group = NULL;
1223 	ipif_t	*src_ipif;
1224 
1225 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
1226 	ASSERT(addr != NULL);
1227 	ASSERT(mask != NULL);
1228 	ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL);
1229 	ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) ||
1230 	    (ipif != NULL && ipif->ipif_isv6));
1231 
1232 	/*
1233 	 * HIDDEN cache entries have to be looked up specifically with
1234 	 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set
1235 	 * when the interface is FAILED or INACTIVE. In that case,
1236 	 * any IRE_CACHES that exists should be marked with
1237 	 * IRE_MARK_HIDDEN. So, we don't really need to match below
1238 	 * for IRE_MARK_HIDDEN. But we do so for consistency.
1239 	 */
1240 	if (!(match_flags & MATCH_IRE_MARK_HIDDEN) &&
1241 	    (ire->ire_marks & IRE_MARK_HIDDEN))
1242 		return (B_FALSE);
1243 
1244 	if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid &&
1245 	    ire->ire_zoneid != ALL_ZONES) {
1246 		/*
1247 		 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is
1248 		 * valid and does not match that of ire_zoneid, a failure to
1249 		 * match is reported at this point. Otherwise, since some IREs
1250 		 * that are available in the global zone can be used in local
1251 		 * zones, additional checks need to be performed:
1252 		 *
1253 		 *	IRE_CACHE and IRE_LOOPBACK entries should
1254 		 *	never be matched in this situation.
1255 		 *
1256 		 *	IRE entries that have an interface associated with them
1257 		 *	should in general not match unless they are an IRE_LOCAL
1258 		 *	or in the case when MATCH_IRE_DEFAULT has been set in
1259 		 *	the caller.  In the case of the former, checking of the
1260 		 *	other fields supplied should take place.
1261 		 *
1262 		 *	In the case where MATCH_IRE_DEFAULT has been set,
1263 		 *	all of the ipif's associated with the IRE's ill are
1264 		 *	checked to see if there is a matching zoneid.  If any
1265 		 *	one ipif has a matching zoneid, this IRE is a
1266 		 *	potential candidate so checking of the other fields
1267 		 *	takes place.
1268 		 *
1269 		 *	In the case where the IRE_INTERFACE has a usable source
1270 		 *	address (indicated by ill_usesrc_ifindex) in the
1271 		 *	correct zone then it's permitted to return this IRE
1272 		 */
1273 		if (match_flags & MATCH_IRE_ZONEONLY)
1274 			return (B_FALSE);
1275 		if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK))
1276 			return (B_FALSE);
1277 		/*
1278 		 * Note, IRE_INTERFACE can have the stq as NULL. For
1279 		 * example, if the default multicast route is tied to
1280 		 * the loopback address.
1281 		 */
1282 		if ((ire->ire_type & IRE_INTERFACE) &&
1283 		    (ire->ire_stq != NULL)) {
1284 			dst_ill = (ill_t *)ire->ire_stq->q_ptr;
1285 			/*
1286 			 * If there is a usable source address in the
1287 			 * zone, then it's ok to return an
1288 			 * IRE_INTERFACE
1289 			 */
1290 			if ((dst_ill->ill_usesrc_ifindex != 0) &&
1291 			    (src_ipif = ipif_select_source_v6(dst_ill, addr,
1292 			    RESTRICT_TO_NONE, IPV6_PREFER_SRC_DEFAULT, zoneid))
1293 			    != NULL) {
1294 				ip3dbg(("ire_match_args: src_ipif %p"
1295 				    " dst_ill %p", (void *)src_ipif,
1296 				    (void *)dst_ill));
1297 				ipif_refrele(src_ipif);
1298 			} else {
1299 				ip3dbg(("ire_match_args: src_ipif NULL"
1300 				    " dst_ill %p\n", (void *)dst_ill));
1301 				return (B_FALSE);
1302 			}
1303 		}
1304 		if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL &&
1305 		    !(ire->ire_type & IRE_INTERFACE)) {
1306 			ipif_t	*tipif;
1307 
1308 			if ((match_flags & MATCH_IRE_DEFAULT) == 0)
1309 				return (B_FALSE);
1310 			mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock);
1311 			for (tipif = ire->ire_ipif->ipif_ill->ill_ipif;
1312 			    tipif != NULL; tipif = tipif->ipif_next) {
1313 				if (IPIF_CAN_LOOKUP(tipif) &&
1314 				    (tipif->ipif_flags & IPIF_UP) &&
1315 				    (tipif->ipif_zoneid == zoneid ||
1316 				    tipif->ipif_zoneid == ALL_ZONES))
1317 					break;
1318 			}
1319 			mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock);
1320 			if (tipif == NULL)
1321 				return (B_FALSE);
1322 		}
1323 	}
1324 
1325 	if (match_flags & MATCH_IRE_GW) {
1326 		mutex_enter(&ire->ire_lock);
1327 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1328 		mutex_exit(&ire->ire_lock);
1329 	}
1330 	/*
1331 	 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that
1332 	 * somebody wants to send out on a particular interface which
1333 	 * is given by ire_stq and hence use ire_stq to derive the ill
1334 	 * value. ire_ipif for IRE_CACHES is just the
1335 	 * means of getting a source address i.e ire_src_addr_v6 =
1336 	 * ire->ire_ipif->ipif_src_addr_v6.
1337 	 */
1338 	if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) {
1339 		ire_ill = ire_to_ill(ire);
1340 		if (ire_ill != NULL)
1341 			ire_ill_group = ire_ill->ill_group;
1342 		ipif_ill = ipif->ipif_ill;
1343 		ipif_ill_group = ipif_ill->ill_group;
1344 	}
1345 
1346 	/* No ire_addr_v6 bits set past the mask */
1347 	ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6,
1348 	    ire->ire_addr_v6));
1349 	V6_MASK_COPY(*addr, *mask, masked_addr);
1350 
1351 	if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) &&
1352 	    ((!(match_flags & MATCH_IRE_GW)) ||
1353 	    IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) &&
1354 	    ((!(match_flags & MATCH_IRE_TYPE)) ||
1355 	    (ire->ire_type & type)) &&
1356 	    ((!(match_flags & MATCH_IRE_SRC)) ||
1357 	    IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6,
1358 	    &ipif->ipif_v6src_addr)) &&
1359 	    ((!(match_flags & MATCH_IRE_IPIF)) ||
1360 	    (ire->ire_ipif == ipif)) &&
1361 	    ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) ||
1362 	    (ire->ire_type != IRE_CACHE ||
1363 	    ire->ire_marks & IRE_MARK_HIDDEN)) &&
1364 	    ((!(match_flags & MATCH_IRE_ILL)) ||
1365 	    (ire_ill == ipif_ill)) &&
1366 	    ((!(match_flags & MATCH_IRE_IHANDLE)) ||
1367 	    (ire->ire_ihandle == ihandle)) &&
1368 	    ((!(match_flags & MATCH_IRE_ILL_GROUP)) ||
1369 	    (ire_ill == ipif_ill) ||
1370 	    (ire_ill_group != NULL &&
1371 	    ire_ill_group == ipif_ill_group)) &&
1372 	    ((!(match_flags & MATCH_IRE_SECATTR)) ||
1373 	    (!is_system_labeled()) ||
1374 	    (tsol_ire_match_gwattr(ire, tsl) == 0))) {
1375 		/* We found the matched IRE */
1376 		return (B_TRUE);
1377 	}
1378 	return (B_FALSE);
1379 }
1380 
1381 /*
1382  * Lookup for a route in all the tables
1383  */
1384 ire_t *
1385 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1386     const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire,
1387     zoneid_t zoneid, const ts_label_t *tsl, int flags, ip_stack_t *ipst)
1388 {
1389 	ire_t *ire = NULL;
1390 
1391 	/*
1392 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1393 	 * MATCH_IRE_ILL is set.
1394 	 */
1395 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
1396 	    (ipif == NULL))
1397 		return (NULL);
1398 
1399 	/*
1400 	 * might be asking for a cache lookup,
1401 	 * This is not best way to lookup cache,
1402 	 * user should call ire_cache_lookup directly.
1403 	 *
1404 	 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then
1405 	 * in the forwarding table, if the applicable type flags were set.
1406 	 */
1407 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) {
1408 		ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid,
1409 		    tsl, flags, ipst);
1410 		if (ire != NULL)
1411 			return (ire);
1412 	}
1413 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) {
1414 		ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif,
1415 		    pire, zoneid, 0, tsl, flags, ipst);
1416 	}
1417 	return (ire);
1418 }
1419 
1420 /*
1421  * Lookup a route in forwarding table.
1422  * specific lookup is indicated by passing the
1423  * required parameters and indicating the
1424  * match required in flag field.
1425  *
1426  * Looking for default route can be done in three ways
1427  * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field
1428  *    along with other matches.
1429  * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags
1430  *    field along with other matches.
1431  * 3) if the destination and mask are passed as zeros.
1432  *
1433  * A request to return a default route if no route
1434  * is found, can be specified by setting MATCH_IRE_DEFAULT
1435  * in flags.
1436  *
1437  * It does not support recursion more than one level. It
1438  * will do recursive lookup only when the lookup maps to
1439  * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed.
1440  *
1441  * If the routing table is setup to allow more than one level
1442  * of recursion, the cleaning up cache table will not work resulting
1443  * in invalid routing.
1444  *
1445  * Supports link-local addresses by following the ipif/ill when recursing.
1446  *
1447  * NOTE : When this function returns NULL, pire has already been released.
1448  *	  pire is valid only when this function successfully returns an
1449  *	  ire.
1450  */
1451 ire_t *
1452 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1453     const in6_addr_t *gateway, int type, const ipif_t *ipif, ire_t **pire,
1454     zoneid_t zoneid, uint32_t ihandle, const ts_label_t *tsl, int flags,
1455     ip_stack_t *ipst)
1456 {
1457 	irb_t *irb_ptr;
1458 	ire_t	*rire;
1459 	ire_t *ire = NULL;
1460 	ire_t	*saved_ire;
1461 	nce_t	*nce;
1462 	int i;
1463 	in6_addr_t gw_addr_v6;
1464 
1465 	ASSERT(addr != NULL);
1466 	ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL);
1467 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
1468 	ASSERT(ipif == NULL || ipif->ipif_isv6);
1469 
1470 	/*
1471 	 * When we return NULL from this function, we should make
1472 	 * sure that *pire is NULL so that the callers will not
1473 	 * wrongly REFRELE the pire.
1474 	 */
1475 	if (pire != NULL)
1476 		*pire = NULL;
1477 	/*
1478 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1479 	 * MATCH_IRE_ILL is set.
1480 	 */
1481 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
1482 	    (ipif == NULL))
1483 		return (NULL);
1484 
1485 	/*
1486 	 * If the mask is known, the lookup
1487 	 * is simple, if the mask is not known
1488 	 * we need to search.
1489 	 */
1490 	if (flags & MATCH_IRE_MASK) {
1491 		uint_t masklen;
1492 
1493 		masklen = ip_mask_to_plen_v6(mask);
1494 		if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL)
1495 			return (NULL);
1496 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][
1497 		    IRE_ADDR_MASK_HASH_V6(*addr, *mask,
1498 		    ipst->ips_ip6_ftable_hash_size)]);
1499 		rw_enter(&irb_ptr->irb_lock, RW_READER);
1500 		for (ire = irb_ptr->irb_ire; ire != NULL;
1501 		    ire = ire->ire_next) {
1502 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
1503 				continue;
1504 			if (ire_match_args_v6(ire, addr, mask, gateway, type,
1505 			    ipif, zoneid, ihandle, tsl, flags))
1506 				goto found_ire;
1507 		}
1508 		rw_exit(&irb_ptr->irb_lock);
1509 	} else {
1510 		/*
1511 		 * In this case we don't know the mask, we need to
1512 		 * search the table assuming different mask sizes.
1513 		 * we start with 128 bit mask, we don't allow default here.
1514 		 */
1515 		for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) {
1516 			in6_addr_t tmpmask;
1517 
1518 			if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
1519 				continue;
1520 			(void) ip_plen_to_mask_v6(i, &tmpmask);
1521 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][
1522 			    IRE_ADDR_MASK_HASH_V6(*addr, tmpmask,
1523 			    ipst->ips_ip6_ftable_hash_size)];
1524 			rw_enter(&irb_ptr->irb_lock, RW_READER);
1525 			for (ire = irb_ptr->irb_ire; ire != NULL;
1526 			    ire = ire->ire_next) {
1527 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1528 					continue;
1529 				if (ire_match_args_v6(ire, addr,
1530 				    &ire->ire_mask_v6, gateway, type, ipif,
1531 				    zoneid, ihandle, tsl, flags))
1532 					goto found_ire;
1533 			}
1534 			rw_exit(&irb_ptr->irb_lock);
1535 		}
1536 	}
1537 
1538 	/*
1539 	 * We come here if no route has yet been found.
1540 	 *
1541 	 * Handle the case where default route is
1542 	 * requested by specifying type as one of the possible
1543 	 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE).
1544 	 *
1545 	 * If MATCH_IRE_MASK is specified, then the appropriate default route
1546 	 * would have been found above if it exists so it isn't looked up here.
1547 	 * If MATCH_IRE_DEFAULT was also specified, then a default route will be
1548 	 * searched for later.
1549 	 */
1550 	if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE &&
1551 	    (type & (IRE_DEFAULT | IRE_INTERFACE))) {
1552 		if (ipst->ips_ip_forwarding_table_v6[0] != NULL) {
1553 			/* addr & mask is zero for defaults */
1554 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[0][
1555 			    IRE_ADDR_HASH_V6(ipv6_all_zeros,
1556 			    ipst->ips_ip6_ftable_hash_size)];
1557 			rw_enter(&irb_ptr->irb_lock, RW_READER);
1558 			for (ire = irb_ptr->irb_ire; ire != NULL;
1559 			    ire = ire->ire_next) {
1560 
1561 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1562 					continue;
1563 
1564 				if (ire_match_args_v6(ire, addr,
1565 				    &ipv6_all_zeros, gateway, type, ipif,
1566 				    zoneid, ihandle, tsl, flags))
1567 					goto found_ire;
1568 			}
1569 			rw_exit(&irb_ptr->irb_lock);
1570 		}
1571 	}
1572 	/*
1573 	 * We come here only if no route is found.
1574 	 * see if the default route can be used which is allowed
1575 	 * only if the default matching criteria is specified.
1576 	 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT
1577 	 * entries. However, the ip_forwarding_table_v6[0] also contains
1578 	 * interface routes thus the count can be zero.
1579 	 */
1580 	saved_ire = NULL;
1581 	if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) ==
1582 	    MATCH_IRE_DEFAULT) {
1583 		ire_t	*ire_origin;
1584 		uint_t	g_index;
1585 		uint_t	index;
1586 
1587 		if (ipst->ips_ip_forwarding_table_v6[0] == NULL)
1588 			return (NULL);
1589 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[0])[0];
1590 
1591 		/*
1592 		 * Keep a tab on the bucket while looking the IRE_DEFAULT
1593 		 * entries. We need to keep track of a particular IRE
1594 		 * (ire_origin) so this ensures that it will not be unlinked
1595 		 * from the hash list during the recursive lookup below.
1596 		 */
1597 		IRB_REFHOLD(irb_ptr);
1598 		ire = irb_ptr->irb_ire;
1599 		if (ire == NULL) {
1600 			IRB_REFRELE(irb_ptr);
1601 			return (NULL);
1602 		}
1603 
1604 		/*
1605 		 * Get the index first, since it can be changed by other
1606 		 * threads. Then get to the right default route skipping
1607 		 * default interface routes if any. As we hold a reference on
1608 		 * the IRE bucket, ipv6_ire_default_count can only increase so
1609 		 * we can't reach the end of the hash list unexpectedly.
1610 		 */
1611 		if (ipst->ips_ipv6_ire_default_count != 0) {
1612 			g_index = ipst->ips_ipv6_ire_default_index++;
1613 			index = g_index % ipst->ips_ipv6_ire_default_count;
1614 			while (index != 0) {
1615 				if (!(ire->ire_type & IRE_INTERFACE))
1616 					index--;
1617 				ire = ire->ire_next;
1618 			}
1619 			ASSERT(ire != NULL);
1620 		} else {
1621 			/*
1622 			 * No default route, so we only have default interface
1623 			 * routes: don't enter the first loop.
1624 			 */
1625 			ire = NULL;
1626 		}
1627 
1628 		/*
1629 		 * Round-robin the default routers list looking for a neighbor
1630 		 * that matches the passed in parameters and is reachable.  If
1631 		 * none found, just return a route from the default router list
1632 		 * if it exists. If we can't find a default route (IRE_DEFAULT),
1633 		 * look for interface default routes.
1634 		 * We start with the ire we found above and we walk the hash
1635 		 * list until we're back where we started, see
1636 		 * ire_get_next_default_ire(). It doesn't matter if default
1637 		 * routes are added or deleted by other threads - we know this
1638 		 * ire will stay in the list because we hold a reference on the
1639 		 * ire bucket.
1640 		 * NB: if we only have interface default routes, ire is NULL so
1641 		 * we don't even enter this loop (see above).
1642 		 */
1643 		ire_origin = ire;
1644 		for (; ire != NULL;
1645 		    ire = ire_get_next_default_ire(ire, ire_origin)) {
1646 
1647 			if (ire_match_args_v6(ire, addr,
1648 			    &ipv6_all_zeros, gateway, type, ipif,
1649 			    zoneid, ihandle, tsl, flags)) {
1650 				int match_flags;
1651 
1652 				/*
1653 				 * We have something to work with.
1654 				 * If we can find a resolved/reachable
1655 				 * entry, we will use this. Otherwise
1656 				 * we'll try to find an entry that has
1657 				 * a resolved cache entry. We will fallback
1658 				 * on this if we don't find anything else.
1659 				 */
1660 				if (saved_ire == NULL)
1661 					saved_ire = ire;
1662 				mutex_enter(&ire->ire_lock);
1663 				gw_addr_v6 = ire->ire_gateway_addr_v6;
1664 				mutex_exit(&ire->ire_lock);
1665 				match_flags = MATCH_IRE_ILL_GROUP |
1666 				    MATCH_IRE_SECATTR;
1667 				rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL,
1668 				    0, ire->ire_ipif, zoneid, tsl, match_flags,
1669 				    ipst);
1670 				if (rire != NULL) {
1671 					nce = rire->ire_nce;
1672 					if (nce != NULL &&
1673 					    NCE_ISREACHABLE(nce) &&
1674 					    nce->nce_flags & NCE_F_ISROUTER) {
1675 						ire_refrele(rire);
1676 						IRE_REFHOLD(ire);
1677 						IRB_REFRELE(irb_ptr);
1678 						goto found_ire_held;
1679 					} else if (nce != NULL &&
1680 					    !(nce->nce_flags &
1681 					    NCE_F_ISROUTER)) {
1682 						/*
1683 						 * Make sure we don't use
1684 						 * this ire
1685 						 */
1686 						if (saved_ire == ire)
1687 							saved_ire = NULL;
1688 					}
1689 					ire_refrele(rire);
1690 				} else if (ipst->
1691 				    ips_ipv6_ire_default_count > 1 &&
1692 				    zoneid != GLOBAL_ZONEID) {
1693 					/*
1694 					 * When we're in a local zone, we're
1695 					 * only interested in default routers
1696 					 * that are reachable through ipifs
1697 					 * within our zone.
1698 					 * The potentially expensive call to
1699 					 * ire_route_lookup_v6() is avoided when
1700 					 * we have only one default route.
1701 					 */
1702 					int ire_match_flags = MATCH_IRE_TYPE |
1703 					    MATCH_IRE_SECATTR;
1704 
1705 					if (ire->ire_ipif != NULL) {
1706 						ire_match_flags |=
1707 						    MATCH_IRE_ILL_GROUP;
1708 					}
1709 					rire = ire_route_lookup_v6(&gw_addr_v6,
1710 					    NULL, NULL, IRE_INTERFACE,
1711 					    ire->ire_ipif, NULL,
1712 					    zoneid, tsl, ire_match_flags, ipst);
1713 					if (rire != NULL) {
1714 						ire_refrele(rire);
1715 						saved_ire = ire;
1716 					} else if (saved_ire == ire) {
1717 						/*
1718 						 * Make sure we don't use
1719 						 * this ire
1720 						 */
1721 						saved_ire = NULL;
1722 					}
1723 				}
1724 			}
1725 		}
1726 		if (saved_ire != NULL) {
1727 			ire = saved_ire;
1728 			IRE_REFHOLD(ire);
1729 			IRB_REFRELE(irb_ptr);
1730 			goto found_ire_held;
1731 		} else {
1732 			/*
1733 			 * Look for a interface default route matching the
1734 			 * args passed in. No round robin here. Just pick
1735 			 * the right one.
1736 			 */
1737 			for (ire = irb_ptr->irb_ire; ire != NULL;
1738 			    ire = ire->ire_next) {
1739 
1740 				if (!(ire->ire_type & IRE_INTERFACE))
1741 					continue;
1742 
1743 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1744 					continue;
1745 
1746 				if (ire_match_args_v6(ire, addr,
1747 				    &ipv6_all_zeros, gateway, type, ipif,
1748 				    zoneid, ihandle, tsl, flags)) {
1749 					IRE_REFHOLD(ire);
1750 					IRB_REFRELE(irb_ptr);
1751 					goto found_ire_held;
1752 				}
1753 			}
1754 			IRB_REFRELE(irb_ptr);
1755 		}
1756 	}
1757 	ASSERT(ire == NULL);
1758 	ip1dbg(("ire_ftable_lookup_v6: returning NULL ire"));
1759 	return (NULL);
1760 found_ire:
1761 	ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0);
1762 	IRE_REFHOLD(ire);
1763 	rw_exit(&irb_ptr->irb_lock);
1764 
1765 found_ire_held:
1766 	if ((flags & MATCH_IRE_RJ_BHOLE) &&
1767 	    (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) {
1768 		return (ire);
1769 	}
1770 	/*
1771 	 * At this point, IRE that was found must be an IRE_FORWARDTABLE
1772 	 * or IRE_CACHETABLE type.  If this is a recursive lookup and an
1773 	 * IRE_INTERFACE type was found, return that.  If it was some other
1774 	 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it
1775 	 * is necessary to fill in the  parent IRE pointed to by pire, and
1776 	 * then lookup the gateway address of  the parent.  For backwards
1777 	 * compatiblity, if this lookup returns an
1778 	 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level
1779 	 * of lookup is done.
1780 	 */
1781 	if (flags & MATCH_IRE_RECURSIVE) {
1782 		const ipif_t *gw_ipif;
1783 		int match_flags = MATCH_IRE_DSTONLY;
1784 
1785 		if (ire->ire_type & IRE_INTERFACE)
1786 			return (ire);
1787 		if (pire != NULL)
1788 			*pire = ire;
1789 		/*
1790 		 * If we can't find an IRE_INTERFACE or the caller has not
1791 		 * asked for pire, we need to REFRELE the saved_ire.
1792 		 */
1793 		saved_ire = ire;
1794 
1795 		/*
1796 		 * Currently MATCH_IRE_ILL is never used with
1797 		 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while
1798 		 * sending out packets as MATCH_IRE_ILL is used only
1799 		 * for communicating with on-link hosts. We can't assert
1800 		 * that here as RTM_GET calls this function with
1801 		 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE.
1802 		 * We have already used the MATCH_IRE_ILL in determining
1803 		 * the right prefix route at this point. To match the
1804 		 * behavior of how we locate routes while sending out
1805 		 * packets, we don't want to use MATCH_IRE_ILL below
1806 		 * while locating the interface route.
1807 		 */
1808 		if (ire->ire_ipif != NULL)
1809 			match_flags |= MATCH_IRE_ILL_GROUP;
1810 
1811 		mutex_enter(&ire->ire_lock);
1812 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1813 		mutex_exit(&ire->ire_lock);
1814 
1815 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0,
1816 		    ire->ire_ipif, NULL, zoneid, tsl, match_flags, ipst);
1817 		if (ire == NULL) {
1818 			/*
1819 			 * In this case we have to deal with the
1820 			 * MATCH_IRE_PARENT flag, which means the
1821 			 * parent has to be returned if ire is NULL.
1822 			 * The aim of this is to have (at least) a starting
1823 			 * ire when we want to look at all of the ires in a
1824 			 * bucket aimed at a single destination (as is the
1825 			 * case in ip_newroute_v6 for the RTF_MULTIRT
1826 			 * flagged routes).
1827 			 */
1828 			if (flags & MATCH_IRE_PARENT) {
1829 				if (pire != NULL) {
1830 					/*
1831 					 * Need an extra REFHOLD, if the
1832 					 * parent ire is returned via both
1833 					 * ire and pire.
1834 					 */
1835 					IRE_REFHOLD(saved_ire);
1836 				}
1837 				ire = saved_ire;
1838 			} else {
1839 				ire_refrele(saved_ire);
1840 				if (pire != NULL)
1841 					*pire = NULL;
1842 			}
1843 			return (ire);
1844 		}
1845 		if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) {
1846 			/*
1847 			 * If the caller did not ask for pire, release
1848 			 * it now.
1849 			 */
1850 			if (pire == NULL) {
1851 				ire_refrele(saved_ire);
1852 			}
1853 			return (ire);
1854 		}
1855 		match_flags |= MATCH_IRE_TYPE;
1856 		mutex_enter(&ire->ire_lock);
1857 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1858 		mutex_exit(&ire->ire_lock);
1859 		gw_ipif = ire->ire_ipif;
1860 		ire_refrele(ire);
1861 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL,
1862 		    (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid,
1863 		    NULL, match_flags, ipst);
1864 		if (ire == NULL) {
1865 			/*
1866 			 * In this case we have to deal with the
1867 			 * MATCH_IRE_PARENT flag, which means the
1868 			 * parent has to be returned if ire is NULL.
1869 			 * The aim of this is to have (at least) a starting
1870 			 * ire when we want to look at all of the ires in a
1871 			 * bucket aimed at a single destination (as is the
1872 			 * case in ip_newroute_v6 for the RTF_MULTIRT
1873 			 * flagged routes).
1874 			 */
1875 			if (flags & MATCH_IRE_PARENT) {
1876 				if (pire != NULL) {
1877 					/*
1878 					 * Need an extra REFHOLD, if the
1879 					 * parent ire is returned via both
1880 					 * ire and pire.
1881 					 */
1882 					IRE_REFHOLD(saved_ire);
1883 				}
1884 				ire = saved_ire;
1885 			} else {
1886 				ire_refrele(saved_ire);
1887 				if (pire != NULL)
1888 					*pire = NULL;
1889 			}
1890 			return (ire);
1891 		} else if (pire == NULL) {
1892 			/*
1893 			 * If the caller did not ask for pire, release
1894 			 * it now.
1895 			 */
1896 			ire_refrele(saved_ire);
1897 		}
1898 		return (ire);
1899 	}
1900 
1901 	ASSERT(pire == NULL || *pire == NULL);
1902 	return (ire);
1903 }
1904 
1905 /*
1906  * Delete the IRE cache for the gateway and all IRE caches whose
1907  * ire_gateway_addr_v6 points to this gateway, and allow them to
1908  * be created on demand by ip_newroute_v6.
1909  */
1910 void
1911 ire_clookup_delete_cache_gw_v6(const in6_addr_t *addr, zoneid_t zoneid,
1912 	ip_stack_t *ipst)
1913 {
1914 	irb_t *irb;
1915 	ire_t *ire;
1916 
1917 	irb = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
1918 	    ipst->ips_ip6_cache_table_size)];
1919 	IRB_REFHOLD(irb);
1920 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
1921 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
1922 			continue;
1923 
1924 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
1925 		if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, 0,
1926 		    IRE_CACHE, NULL, zoneid, 0, NULL, MATCH_IRE_TYPE)) {
1927 			ire_delete(ire);
1928 		}
1929 	}
1930 	IRB_REFRELE(irb);
1931 
1932 	ire_walk_v6(ire_delete_cache_gw_v6, (char *)addr, zoneid, ipst);
1933 }
1934 
1935 /*
1936  * Looks up cache table for a route.
1937  * specific lookup can be indicated by
1938  * passing the MATCH_* flags and the
1939  * necessary parameters.
1940  */
1941 ire_t *
1942 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway,
1943     int type, const ipif_t *ipif, zoneid_t zoneid, const ts_label_t *tsl,
1944     int flags, ip_stack_t *ipst)
1945 {
1946 	ire_t *ire;
1947 	irb_t *irb_ptr;
1948 	ASSERT(addr != NULL);
1949 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
1950 
1951 	/*
1952 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1953 	 * MATCH_IRE_ILL is set.
1954 	 */
1955 	if ((flags & (MATCH_IRE_SRC |  MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
1956 	    (ipif == NULL))
1957 		return (NULL);
1958 
1959 	irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
1960 	    ipst->ips_ip6_cache_table_size)];
1961 	rw_enter(&irb_ptr->irb_lock, RW_READER);
1962 	for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) {
1963 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
1964 			continue;
1965 
1966 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
1967 		if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway,
1968 		    type, ipif, zoneid, 0, tsl, flags)) {
1969 			IRE_REFHOLD(ire);
1970 			rw_exit(&irb_ptr->irb_lock);
1971 			return (ire);
1972 		}
1973 	}
1974 	rw_exit(&irb_ptr->irb_lock);
1975 	return (NULL);
1976 }
1977 
1978 /*
1979  * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers
1980  * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get
1981  * to the hidden ones.
1982  *
1983  * In general the zoneid has to match (where ALL_ZONES match all of them).
1984  * But for IRE_LOCAL we also need to handle the case where L2 should
1985  * conceptually loop back the packet. This is necessary since neither
1986  * Ethernet drivers nor Ethernet hardware loops back packets sent to their
1987  * own MAC address. This loopback is needed when the normal
1988  * routes (ignoring IREs with different zoneids) would send out the packet on
1989  * the same ill (or ill group) as the ill with which this IRE_LOCAL is
1990  * associated.
1991  *
1992  * Earlier versions of this code always matched an IRE_LOCAL independently of
1993  * the zoneid. We preserve that earlier behavior when
1994  * ip_restrict_interzone_loopback is turned off.
1995  */
1996 ire_t *
1997 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid,
1998     const ts_label_t *tsl, ip_stack_t *ipst)
1999 {
2000 	irb_t *irb_ptr;
2001 	ire_t *ire;
2002 
2003 	irb_ptr = &ipst->ips_ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
2004 	    ipst->ips_ip6_cache_table_size)];
2005 	rw_enter(&irb_ptr->irb_lock, RW_READER);
2006 	for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) {
2007 		if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN))
2008 			continue;
2009 		if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) {
2010 			/*
2011 			 * Finally, check if the security policy has any
2012 			 * restriction on using this route for the specified
2013 			 * message.
2014 			 */
2015 			if (tsl != NULL &&
2016 			    ire->ire_gw_secattr != NULL &&
2017 			    tsol_ire_match_gwattr(ire, tsl) != 0) {
2018 				continue;
2019 			}
2020 
2021 			if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid ||
2022 			    ire->ire_zoneid == ALL_ZONES) {
2023 				IRE_REFHOLD(ire);
2024 				rw_exit(&irb_ptr->irb_lock);
2025 				return (ire);
2026 			}
2027 
2028 			if (ire->ire_type == IRE_LOCAL) {
2029 				if (ipst->ips_ip_restrict_interzone_loopback &&
2030 				    !ire_local_ok_across_zones(ire, zoneid,
2031 				    (void *)addr, tsl, ipst))
2032 					continue;
2033 
2034 				IRE_REFHOLD(ire);
2035 				rw_exit(&irb_ptr->irb_lock);
2036 				return (ire);
2037 			}
2038 		}
2039 	}
2040 	rw_exit(&irb_ptr->irb_lock);
2041 	return (NULL);
2042 }
2043 
2044 /*
2045  * Locate the interface ire that is tied to the cache ire 'cire' via
2046  * cire->ire_ihandle.
2047  *
2048  * We are trying to create the cache ire for an onlink destn. or
2049  * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER
2050  * case for xresolv interfaces, after the ire has come back from
2051  * an external resolver.
2052  */
2053 static ire_t *
2054 ire_ihandle_lookup_onlink_v6(ire_t *cire)
2055 {
2056 	ire_t	*ire;
2057 	int	match_flags;
2058 	int	i;
2059 	int	j;
2060 	irb_t	*irb_ptr;
2061 	ip_stack_t	*ipst = cire->ire_ipst;
2062 
2063 	ASSERT(cire != NULL);
2064 
2065 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
2066 	/*
2067 	 * We know that the mask of the interface ire equals cire->ire_cmask.
2068 	 * (When ip_newroute_v6() created 'cire' for an on-link destn.
2069 	 * it set its cmask from the interface ire's mask)
2070 	 */
2071 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6,
2072 	    NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle,
2073 	    NULL, match_flags, ipst);
2074 	if (ire != NULL)
2075 		return (ire);
2076 	/*
2077 	 * If we didn't find an interface ire above, we can't declare failure.
2078 	 * For backwards compatibility, we need to support prefix routes
2079 	 * pointing to next hop gateways that are not on-link.
2080 	 *
2081 	 * In the resolver/noresolver case, ip_newroute_v6() thinks
2082 	 * it is creating the cache ire for an onlink destination in 'cire'.
2083 	 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6()
2084 	 * cheated it, by doing ire_route_lookup_v6() twice and returning an
2085 	 * interface ire.
2086 	 *
2087 	 * Eg. default	-	gw1			(line 1)
2088 	 *	gw1	-	gw2			(line 2)
2089 	 *	gw2	-	hme0			(line 3)
2090 	 *
2091 	 * In the above example, ip_newroute_v6() tried to create the cache ire
2092 	 * 'cire' for gw1, based on the interface route in line 3. The
2093 	 * ire_ftable_lookup_v6() above fails, because there is
2094 	 * no interface route to reach gw1. (it is gw2). We fall thru below.
2095 	 *
2096 	 * Do a brute force search based on the ihandle in a subset of the
2097 	 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise
2098 	 * things become very complex, since we don't have 'pire' in this
2099 	 * case. (Also note that this method is not possible in the offlink
2100 	 * case because we don't know the mask)
2101 	 */
2102 	i = ip_mask_to_plen_v6(&cire->ire_cmask_v6);
2103 	if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
2104 		return (NULL);
2105 	for (j = 0; j < ipst->ips_ip6_ftable_hash_size; j++) {
2106 		irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][j];
2107 		rw_enter(&irb_ptr->irb_lock, RW_READER);
2108 		for (ire = irb_ptr->irb_ire; ire != NULL;
2109 		    ire = ire->ire_next) {
2110 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
2111 				continue;
2112 			if ((ire->ire_type & IRE_INTERFACE) &&
2113 			    (ire->ire_ihandle == cire->ire_ihandle)) {
2114 				IRE_REFHOLD(ire);
2115 				rw_exit(&irb_ptr->irb_lock);
2116 				return (ire);
2117 			}
2118 		}
2119 		rw_exit(&irb_ptr->irb_lock);
2120 	}
2121 	return (NULL);
2122 }
2123 
2124 
2125 /*
2126  * Locate the interface ire that is tied to the cache ire 'cire' via
2127  * cire->ire_ihandle.
2128  *
2129  * We are trying to create the cache ire for an offlink destn based
2130  * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire
2131  * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in
2132  * the IRE_CACHE case.
2133  */
2134 ire_t *
2135 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire)
2136 {
2137 	ire_t	*ire;
2138 	int	match_flags;
2139 	in6_addr_t	gw_addr;
2140 	ipif_t		*gw_ipif;
2141 	ip_stack_t	*ipst = cire->ire_ipst;
2142 
2143 	ASSERT(cire != NULL && pire != NULL);
2144 
2145 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
2146 	/*
2147 	 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only
2148 	 * for on-link hosts. We should never be here for onlink.
2149 	 * Thus, use MATCH_IRE_ILL_GROUP.
2150 	 */
2151 	if (pire->ire_ipif != NULL)
2152 		match_flags |= MATCH_IRE_ILL_GROUP;
2153 	/*
2154 	 * We know that the mask of the interface ire equals cire->ire_cmask.
2155 	 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set
2156 	 * its cmask from the interface ire's mask)
2157 	 */
2158 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0,
2159 	    IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
2160 	    NULL, match_flags, ipst);
2161 	if (ire != NULL)
2162 		return (ire);
2163 	/*
2164 	 * If we didn't find an interface ire above, we can't declare failure.
2165 	 * For backwards compatibility, we need to support prefix routes
2166 	 * pointing to next hop gateways that are not on-link.
2167 	 *
2168 	 * Assume we are trying to ping some offlink destn, and we have the
2169 	 * routing table below.
2170 	 *
2171 	 * Eg.	default	- gw1		<--- pire	(line 1)
2172 	 *	gw1	- gw2				(line 2)
2173 	 *	gw2	- hme0				(line 3)
2174 	 *
2175 	 * If we already have a cache ire for gw1 in 'cire', the
2176 	 * ire_ftable_lookup_v6 above would have failed, since there is no
2177 	 * interface ire to reach gw1. We will fallthru below.
2178 	 *
2179 	 * Here we duplicate the steps that ire_ftable_lookup_v6() did in
2180 	 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case.
2181 	 * The differences are the following
2182 	 * i.   We want the interface ire only, so we call
2183 	 *	ire_ftable_lookup_v6() instead of ire_route_lookup_v6()
2184 	 * ii.  We look for only prefix routes in the 1st call below.
2185 	 * ii.  We want to match on the ihandle in the 2nd call below.
2186 	 */
2187 	match_flags =  MATCH_IRE_TYPE;
2188 	if (pire->ire_ipif != NULL)
2189 		match_flags |= MATCH_IRE_ILL_GROUP;
2190 
2191 	mutex_enter(&pire->ire_lock);
2192 	gw_addr = pire->ire_gateway_addr_v6;
2193 	mutex_exit(&pire->ire_lock);
2194 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET,
2195 	    pire->ire_ipif, NULL, ALL_ZONES, 0, NULL, match_flags, ipst);
2196 	if (ire == NULL)
2197 		return (NULL);
2198 	/*
2199 	 * At this point 'ire' corresponds to the entry shown in line 2.
2200 	 * gw_addr is 'gw2' in the example above.
2201 	 */
2202 	mutex_enter(&ire->ire_lock);
2203 	gw_addr = ire->ire_gateway_addr_v6;
2204 	mutex_exit(&ire->ire_lock);
2205 	gw_ipif = ire->ire_ipif;
2206 	ire_refrele(ire);
2207 
2208 	match_flags |= MATCH_IRE_IHANDLE;
2209 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE,
2210 	    gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
2211 	    NULL, match_flags, ipst);
2212 	return (ire);
2213 }
2214 
2215 /*
2216  * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER
2217  * ire associated with the specified ipif.
2218  *
2219  * This might occasionally be called when IPIF_UP is not set since
2220  * the IPV6_MULTICAST_IF as well as creating interface routes
2221  * allows specifying a down ipif (ipif_lookup* match ipifs that are down).
2222  *
2223  * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on
2224  * the ipif this routine might return NULL.
2225  * (Sometimes called as writer though not required by this function.)
2226  */
2227 ire_t *
2228 ipif_to_ire_v6(const ipif_t *ipif)
2229 {
2230 	ire_t	*ire;
2231 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
2232 
2233 	ASSERT(ipif->ipif_isv6);
2234 	if (ipif->ipif_ire_type == IRE_LOOPBACK) {
2235 		ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL,
2236 		    IRE_LOOPBACK, ipif, ALL_ZONES, NULL,
2237 		    (MATCH_IRE_TYPE | MATCH_IRE_IPIF), ipst);
2238 	} else if (ipif->ipif_flags & IPIF_POINTOPOINT) {
2239 		/* In this case we need to lookup destination address. */
2240 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr,
2241 		    &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES,
2242 		    0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF |
2243 		    MATCH_IRE_MASK), ipst);
2244 	} else {
2245 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet,
2246 		    &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL,
2247 		    ALL_ZONES, 0, NULL, (MATCH_IRE_TYPE | MATCH_IRE_IPIF |
2248 		    MATCH_IRE_MASK), ipst);
2249 	}
2250 	return (ire);
2251 }
2252 
2253 /*
2254  * Return B_TRUE if a multirt route is resolvable
2255  * (or if no route is resolved yet), B_FALSE otherwise.
2256  * This only works in the global zone.
2257  */
2258 boolean_t
2259 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp, const ts_label_t *tsl,
2260     ip_stack_t *ipst)
2261 {
2262 	ire_t	*first_fire;
2263 	ire_t	*first_cire;
2264 	ire_t	*fire;
2265 	ire_t	*cire;
2266 	irb_t	*firb;
2267 	irb_t	*cirb;
2268 	int	unres_cnt = 0;
2269 	boolean_t resolvable = B_FALSE;
2270 
2271 	/* Retrieve the first IRE_HOST that matches the destination */
2272 	first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST,
2273 	    NULL, NULL, ALL_ZONES, 0, tsl, MATCH_IRE_MASK | MATCH_IRE_TYPE |
2274 	    MATCH_IRE_SECATTR, ipst);
2275 
2276 	/* No route at all */
2277 	if (first_fire == NULL) {
2278 		return (B_TRUE);
2279 	}
2280 
2281 	firb = first_fire->ire_bucket;
2282 	ASSERT(firb);
2283 
2284 	/* Retrieve the first IRE_CACHE ire for that destination. */
2285 	first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID, tsl, ipst);
2286 
2287 	/* No resolved route. */
2288 	if (first_cire == NULL) {
2289 		ire_refrele(first_fire);
2290 		return (B_TRUE);
2291 	}
2292 
2293 	/* At least one route is resolved. */
2294 
2295 	cirb = first_cire->ire_bucket;
2296 	ASSERT(cirb);
2297 
2298 	/* Count the number of routes to that dest that are declared. */
2299 	IRB_REFHOLD(firb);
2300 	for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
2301 		if (!(fire->ire_flags & RTF_MULTIRT))
2302 			continue;
2303 		if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp))
2304 			continue;
2305 		unres_cnt++;
2306 	}
2307 	IRB_REFRELE(firb);
2308 
2309 
2310 	/* Then subtract the number of routes to that dst that are resolved */
2311 	IRB_REFHOLD(cirb);
2312 	for (cire = first_cire; cire != NULL; cire = cire->ire_next) {
2313 		if (!(cire->ire_flags & RTF_MULTIRT))
2314 			continue;
2315 		if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp))
2316 			continue;
2317 		if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN))
2318 			continue;
2319 		unres_cnt--;
2320 	}
2321 	IRB_REFRELE(cirb);
2322 
2323 	/* At least one route is unresolved; search for a resolvable route. */
2324 	if (unres_cnt > 0)
2325 		resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire,
2326 		    MULTIRT_USESTAMP|MULTIRT_CACHEGW, tsl, ipst);
2327 
2328 	if (first_fire)
2329 		ire_refrele(first_fire);
2330 
2331 	if (first_cire)
2332 		ire_refrele(first_cire);
2333 
2334 	return (resolvable);
2335 }
2336 
2337 
2338 /*
2339  * Return B_TRUE and update *ire_arg and *fire_arg
2340  * if at least one resolvable route is found.
2341  * Return B_FALSE otherwise (all routes are resolved or
2342  * the remaining unresolved routes are all unresolvable).
2343  * This only works in the global zone.
2344  */
2345 boolean_t
2346 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags,
2347     const ts_label_t *tsl, ip_stack_t *ipst)
2348 {
2349 	clock_t	delta;
2350 	ire_t	*best_fire = NULL;
2351 	ire_t	*best_cire = NULL;
2352 	ire_t	*first_fire;
2353 	ire_t	*first_cire;
2354 	ire_t	*fire;
2355 	ire_t	*cire;
2356 	irb_t	*firb = NULL;
2357 	irb_t	*cirb = NULL;
2358 	ire_t	*gw_ire;
2359 	boolean_t	already_resolved;
2360 	boolean_t	res;
2361 	in6_addr_t	v6dst;
2362 	in6_addr_t	v6gw;
2363 
2364 	ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, "
2365 	    "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags));
2366 
2367 	ASSERT(ire_arg);
2368 	ASSERT(fire_arg);
2369 
2370 	/* Not an IRE_HOST ire; give up. */
2371 	if ((*fire_arg == NULL) ||
2372 	    ((*fire_arg)->ire_type != IRE_HOST)) {
2373 		return (B_FALSE);
2374 	}
2375 
2376 	/* This is the first IRE_HOST ire for that destination. */
2377 	first_fire = *fire_arg;
2378 	firb = first_fire->ire_bucket;
2379 	ASSERT(firb);
2380 
2381 	mutex_enter(&first_fire->ire_lock);
2382 	v6dst = first_fire->ire_addr_v6;
2383 	mutex_exit(&first_fire->ire_lock);
2384 
2385 	ip2dbg(("ire_multirt_lookup_v6: dst %08x\n",
2386 	    ntohl(V4_PART_OF_V6(v6dst))));
2387 
2388 	/*
2389 	 * Retrieve the first IRE_CACHE ire for that destination;
2390 	 * if we don't find one, no route for that dest is
2391 	 * resolved yet.
2392 	 */
2393 	first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID, tsl, ipst);
2394 	if (first_cire) {
2395 		cirb = first_cire->ire_bucket;
2396 	}
2397 
2398 	ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire));
2399 
2400 	/*
2401 	 * Search for a resolvable route, giving the top priority
2402 	 * to routes that can be resolved without any call to the resolver.
2403 	 */
2404 	IRB_REFHOLD(firb);
2405 
2406 	if (!IN6_IS_ADDR_MULTICAST(&v6dst)) {
2407 		/*
2408 		 * For all multiroute IRE_HOST ires for that destination,
2409 		 * check if the route via the IRE_HOST's gateway is
2410 		 * resolved yet.
2411 		 */
2412 		for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
2413 
2414 			if (!(fire->ire_flags & RTF_MULTIRT))
2415 				continue;
2416 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
2417 				continue;
2418 
2419 			if (fire->ire_gw_secattr != NULL &&
2420 			    tsol_ire_match_gwattr(fire, tsl) != 0) {
2421 				continue;
2422 			}
2423 
2424 			mutex_enter(&fire->ire_lock);
2425 			v6gw = fire->ire_gateway_addr_v6;
2426 			mutex_exit(&fire->ire_lock);
2427 
2428 			ip2dbg(("ire_multirt_lookup_v6: fire %p, "
2429 			    "ire_addr %08x, ire_gateway_addr %08x\n",
2430 			    (void *)fire,
2431 			    ntohl(V4_PART_OF_V6(fire->ire_addr_v6)),
2432 			    ntohl(V4_PART_OF_V6(v6gw))));
2433 
2434 			already_resolved = B_FALSE;
2435 
2436 			if (first_cire) {
2437 				ASSERT(cirb);
2438 
2439 				IRB_REFHOLD(cirb);
2440 				/*
2441 				 * For all IRE_CACHE ires for that
2442 				 * destination.
2443 				 */
2444 				for (cire = first_cire;
2445 				    cire != NULL;
2446 				    cire = cire->ire_next) {
2447 
2448 					if (!(cire->ire_flags & RTF_MULTIRT))
2449 						continue;
2450 					if (!IN6_ARE_ADDR_EQUAL(
2451 					    &cire->ire_addr_v6, &v6dst))
2452 						continue;
2453 					if (cire->ire_marks &
2454 					    (IRE_MARK_CONDEMNED|
2455 					    IRE_MARK_HIDDEN))
2456 						continue;
2457 
2458 					if (cire->ire_gw_secattr != NULL &&
2459 					    tsol_ire_match_gwattr(cire,
2460 					    tsl) != 0) {
2461 						continue;
2462 					}
2463 
2464 					/*
2465 					 * Check if the IRE_CACHE's gateway
2466 					 * matches the IRE_HOST's gateway.
2467 					 */
2468 					if (IN6_ARE_ADDR_EQUAL(
2469 					    &cire->ire_gateway_addr_v6,
2470 					    &v6gw)) {
2471 						already_resolved = B_TRUE;
2472 						break;
2473 					}
2474 				}
2475 				IRB_REFRELE(cirb);
2476 			}
2477 
2478 			/*
2479 			 * This route is already resolved;
2480 			 * proceed with next one.
2481 			 */
2482 			if (already_resolved) {
2483 				ip2dbg(("ire_multirt_lookup_v6: found cire %p, "
2484 				    "already resolved\n", (void *)cire));
2485 				continue;
2486 			}
2487 
2488 			/*
2489 			 * The route is unresolved; is it actually
2490 			 * resolvable, i.e. is there a cache or a resolver
2491 			 * for the gateway?
2492 			 */
2493 			gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL,
2494 			    ALL_ZONES, tsl, MATCH_IRE_RECURSIVE |
2495 			    MATCH_IRE_SECATTR, ipst);
2496 
2497 			ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n",
2498 			    (void *)gw_ire));
2499 
2500 			/*
2501 			 * This route can be resolved without any call to the
2502 			 * resolver; if the MULTIRT_CACHEGW flag is set,
2503 			 * give the top priority to this ire and exit the
2504 			 * loop.
2505 			 * This occurs when an resolver reply is processed
2506 			 * through ip_wput_nondata()
2507 			 */
2508 			if ((flags & MULTIRT_CACHEGW) &&
2509 			    (gw_ire != NULL) &&
2510 			    (gw_ire->ire_type & IRE_CACHETABLE)) {
2511 				/*
2512 				 * Release the resolver associated to the
2513 				 * previous candidate best ire, if any.
2514 				 */
2515 				if (best_cire) {
2516 					ire_refrele(best_cire);
2517 					ASSERT(best_fire);
2518 				}
2519 
2520 				best_fire = fire;
2521 				best_cire = gw_ire;
2522 
2523 				ip2dbg(("ire_multirt_lookup_v6: found top prio "
2524 				    "best_fire %p, best_cire %p\n",
2525 				    (void *)best_fire, (void *)best_cire));
2526 				break;
2527 			}
2528 
2529 			/*
2530 			 * Compute the time elapsed since our preceding
2531 			 * attempt to  resolve that route.
2532 			 * If the MULTIRT_USESTAMP flag is set, we take that
2533 			 * route into account only if this time interval
2534 			 * exceeds ip_multirt_resolution_interval;
2535 			 * this prevents us from attempting to resolve a
2536 			 * broken route upon each sending of a packet.
2537 			 */
2538 			delta = lbolt - fire->ire_last_used_time;
2539 			delta = TICK_TO_MSEC(delta);
2540 
2541 			res = (boolean_t)
2542 			    ((delta > ipst->
2543 			    ips_ip_multirt_resolution_interval) ||
2544 			    (!(flags & MULTIRT_USESTAMP)));
2545 
2546 			ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, "
2547 			    "res %d\n",
2548 			    (void *)fire, delta, res));
2549 
2550 			if (res) {
2551 				/*
2552 				 * A resolver exists for the gateway: save
2553 				 * the current IRE_HOST ire as a candidate
2554 				 * best ire. If we later discover that a
2555 				 * top priority ire exists (i.e. no need to
2556 				 * call the resolver), then this new ire
2557 				 * will be preferred to the current one.
2558 				 */
2559 				if (gw_ire != NULL) {
2560 					if (best_fire == NULL) {
2561 						ASSERT(best_cire == NULL);
2562 
2563 						best_fire = fire;
2564 						best_cire = gw_ire;
2565 
2566 						ip2dbg(("ire_multirt_lookup_v6:"
2567 						    "found candidate "
2568 						    "best_fire %p, "
2569 						    "best_cire %p\n",
2570 						    (void *)best_fire,
2571 						    (void *)best_cire));
2572 
2573 						/*
2574 						 * If MULTIRT_CACHEGW is not
2575 						 * set, we ignore the top
2576 						 * priority ires that can
2577 						 * be resolved without any
2578 						 * call to the resolver;
2579 						 * In that case, there is
2580 						 * actually no need
2581 						 * to continue the loop.
2582 						 */
2583 						if (!(flags &
2584 						    MULTIRT_CACHEGW)) {
2585 							break;
2586 						}
2587 						continue;
2588 					}
2589 				} else {
2590 					/*
2591 					 * No resolver for the gateway: the
2592 					 * route is not resolvable.
2593 					 * If the MULTIRT_SETSTAMP flag is
2594 					 * set, we stamp the IRE_HOST ire,
2595 					 * so we will not select it again
2596 					 * during this resolution interval.
2597 					 */
2598 					if (flags & MULTIRT_SETSTAMP)
2599 						fire->ire_last_used_time =
2600 						    lbolt;
2601 				}
2602 			}
2603 
2604 			if (gw_ire != NULL)
2605 				ire_refrele(gw_ire);
2606 		}
2607 	} else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */
2608 
2609 		for (fire = first_fire;
2610 		    fire != NULL;
2611 		    fire = fire->ire_next) {
2612 
2613 			if (!(fire->ire_flags & RTF_MULTIRT))
2614 				continue;
2615 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
2616 				continue;
2617 
2618 			if (fire->ire_gw_secattr != NULL &&
2619 			    tsol_ire_match_gwattr(fire, tsl) != 0) {
2620 				continue;
2621 			}
2622 
2623 			already_resolved = B_FALSE;
2624 
2625 			mutex_enter(&fire->ire_lock);
2626 			v6gw = fire->ire_gateway_addr_v6;
2627 			mutex_exit(&fire->ire_lock);
2628 
2629 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
2630 			    IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0, tsl,
2631 			    MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE |
2632 			    MATCH_IRE_SECATTR, ipst);
2633 
2634 			/* No resolver for the gateway; we skip this ire. */
2635 			if (gw_ire == NULL) {
2636 				continue;
2637 			}
2638 
2639 			if (first_cire) {
2640 
2641 				IRB_REFHOLD(cirb);
2642 				/*
2643 				 * For all IRE_CACHE ires for that
2644 				 * destination.
2645 				 */
2646 				for (cire = first_cire;
2647 				    cire != NULL;
2648 				    cire = cire->ire_next) {
2649 
2650 					if (!(cire->ire_flags & RTF_MULTIRT))
2651 						continue;
2652 					if (!IN6_ARE_ADDR_EQUAL(
2653 					    &cire->ire_addr_v6, &v6dst))
2654 						continue;
2655 					if (cire->ire_marks &
2656 					    (IRE_MARK_CONDEMNED|
2657 					    IRE_MARK_HIDDEN))
2658 						continue;
2659 
2660 					if (cire->ire_gw_secattr != NULL &&
2661 					    tsol_ire_match_gwattr(cire,
2662 					    tsl) != 0) {
2663 						continue;
2664 					}
2665 
2666 					/*
2667 					 * Cache entries are linked to the
2668 					 * parent routes using the parent handle
2669 					 * (ire_phandle). If no cache entry has
2670 					 * the same handle as fire, fire is
2671 					 * still unresolved.
2672 					 */
2673 					ASSERT(cire->ire_phandle != 0);
2674 					if (cire->ire_phandle ==
2675 					    fire->ire_phandle) {
2676 						already_resolved = B_TRUE;
2677 						break;
2678 					}
2679 				}
2680 				IRB_REFRELE(cirb);
2681 			}
2682 
2683 			/*
2684 			 * This route is already resolved; proceed with
2685 			 * next one.
2686 			 */
2687 			if (already_resolved) {
2688 				ire_refrele(gw_ire);
2689 				continue;
2690 			}
2691 
2692 			/*
2693 			 * Compute the time elapsed since our preceding
2694 			 * attempt to resolve that route.
2695 			 * If the MULTIRT_USESTAMP flag is set, we take
2696 			 * that route into account only if this time
2697 			 * interval exceeds ip_multirt_resolution_interval;
2698 			 * this prevents us from attempting to resolve a
2699 			 * broken route upon each sending of a packet.
2700 			 */
2701 			delta = lbolt - fire->ire_last_used_time;
2702 			delta = TICK_TO_MSEC(delta);
2703 
2704 			res = (boolean_t)
2705 			    ((delta > ipst->
2706 			    ips_ip_multirt_resolution_interval) ||
2707 			    (!(flags & MULTIRT_USESTAMP)));
2708 
2709 			ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, "
2710 			    "flags %04x, res %d\n",
2711 			    (void *)fire, delta, flags, res));
2712 
2713 			if (res) {
2714 				if (best_cire) {
2715 					/*
2716 					 * Release the resolver associated
2717 					 * to the preceding candidate best
2718 					 * ire, if any.
2719 					 */
2720 					ire_refrele(best_cire);
2721 					ASSERT(best_fire);
2722 				}
2723 				best_fire = fire;
2724 				best_cire = gw_ire;
2725 				continue;
2726 			}
2727 
2728 			ire_refrele(gw_ire);
2729 		}
2730 	}
2731 
2732 	if (best_fire) {
2733 		IRE_REFHOLD(best_fire);
2734 	}
2735 	IRB_REFRELE(firb);
2736 
2737 	/* Release the first IRE_CACHE we initially looked up, if any. */
2738 	if (first_cire)
2739 		ire_refrele(first_cire);
2740 
2741 	/* Found a resolvable route. */
2742 	if (best_fire) {
2743 		ASSERT(best_cire);
2744 
2745 		if (*fire_arg)
2746 			ire_refrele(*fire_arg);
2747 		if (*ire_arg)
2748 			ire_refrele(*ire_arg);
2749 
2750 		/*
2751 		 * Update the passed arguments with the
2752 		 * resolvable multirt route we found
2753 		 */
2754 		*fire_arg = best_fire;
2755 		*ire_arg = best_cire;
2756 
2757 		ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, "
2758 		    "*fire_arg %p, *ire_arg %p\n",
2759 		    (void *)best_fire, (void *)best_cire));
2760 
2761 		return (B_TRUE);
2762 	}
2763 
2764 	ASSERT(best_cire == NULL);
2765 
2766 	ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, "
2767 	    "*ire_arg %p\n",
2768 	    (void *)*fire_arg, (void *)*ire_arg));
2769 
2770 	/* No resolvable route. */
2771 	return (B_FALSE);
2772 }
2773 
2774 
2775 /*
2776  * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp'
2777  * that goes through 'ipif'. As a fallback, a route that goes through
2778  * ipif->ipif_ill can be returned.
2779  */
2780 ire_t *
2781 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp)
2782 {
2783 	ire_t	*ire;
2784 	ire_t	*save_ire = NULL;
2785 	ire_t   *gw_ire;
2786 	irb_t   *irb;
2787 	in6_addr_t v6gw;
2788 	int	match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL;
2789 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
2790 
2791 	ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0,
2792 	    NULL, MATCH_IRE_DEFAULT, ipst);
2793 
2794 	if (ire == NULL)
2795 		return (NULL);
2796 
2797 	irb = ire->ire_bucket;
2798 	ASSERT(irb);
2799 
2800 	IRB_REFHOLD(irb);
2801 	ire_refrele(ire);
2802 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
2803 		if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) ||
2804 		    (ipif->ipif_zoneid != ire->ire_zoneid &&
2805 		    ire->ire_zoneid != ALL_ZONES)) {
2806 			continue;
2807 		}
2808 
2809 		switch (ire->ire_type) {
2810 		case IRE_DEFAULT:
2811 		case IRE_PREFIX:
2812 		case IRE_HOST:
2813 			mutex_enter(&ire->ire_lock);
2814 			v6gw = ire->ire_gateway_addr_v6;
2815 			mutex_exit(&ire->ire_lock);
2816 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
2817 			    IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0,
2818 			    NULL, match_flags, ipst);
2819 
2820 			if (gw_ire != NULL) {
2821 				if (save_ire != NULL) {
2822 					ire_refrele(save_ire);
2823 				}
2824 				IRE_REFHOLD(ire);
2825 				if (gw_ire->ire_ipif == ipif) {
2826 					ire_refrele(gw_ire);
2827 
2828 					IRB_REFRELE(irb);
2829 					return (ire);
2830 				}
2831 				ire_refrele(gw_ire);
2832 				save_ire = ire;
2833 			}
2834 			break;
2835 		case IRE_IF_NORESOLVER:
2836 		case IRE_IF_RESOLVER:
2837 			if (ire->ire_ipif == ipif) {
2838 				if (save_ire != NULL) {
2839 					ire_refrele(save_ire);
2840 				}
2841 				IRE_REFHOLD(ire);
2842 
2843 				IRB_REFRELE(irb);
2844 				return (ire);
2845 			}
2846 			break;
2847 		}
2848 	}
2849 	IRB_REFRELE(irb);
2850 
2851 	return (save_ire);
2852 }
2853