xref: /illumos-gate/usr/src/uts/common/inet/ip/ip6_ire.c (revision 4de2612967d06c4fdbf524a62556a1e8118a006f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright (c) 1990 Mentat Inc.
28  */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * This file contains routines that manipulate Internet Routing Entries (IREs).
34  */
35 #include <sys/types.h>
36 #include <sys/stream.h>
37 #include <sys/stropts.h>
38 #include <sys/strlog.h>
39 #include <sys/dlpi.h>
40 #include <sys/ddi.h>
41 #include <sys/cmn_err.h>
42 
43 #include <sys/systm.h>
44 #include <sys/param.h>
45 #include <sys/socket.h>
46 #include <net/if.h>
47 #include <net/route.h>
48 #include <netinet/in.h>
49 #include <net/if_dl.h>
50 #include <netinet/ip6.h>
51 #include <netinet/icmp6.h>
52 
53 #include <inet/common.h>
54 #include <inet/mi.h>
55 #include <inet/ip.h>
56 #include <inet/ip6.h>
57 #include <inet/arp.h>
58 #include <inet/ip_ndp.h>
59 #include <inet/ip_if.h>
60 #include <inet/ip_ire.h>
61 #include <inet/ip_rts.h>
62 #include <inet/ipclassifier.h>
63 #include <inet/nd.h>
64 #include <sys/kmem.h>
65 #include <sys/zone.h>
66 
67 irb_t *ip_forwarding_table_v6[IP6_MASK_TABLE_SIZE];
68 /* This is dynamically allocated in ip_ire_init */
69 irb_t *ip_cache_table_v6;
70 static	ire_t	ire_null;
71 
72 /* Defined in ip_ire.c */
73 extern uint32_t ip6_cache_table_size;
74 extern uint32_t ip6_ftable_hash_size;
75 
76 static ire_t	*ire_ihandle_lookup_onlink_v6(ire_t *cire);
77 static	void	ire_report_ftable_v6(ire_t *ire, char *mp);
78 static	void	ire_report_ctable_v6(ire_t *ire, char *mp);
79 static boolean_t ire_match_args_v6(ire_t *ire, const in6_addr_t *addr,
80     const in6_addr_t *mask, const in6_addr_t *gateway, int type, ipif_t *ipif,
81     zoneid_t zoneid, uint32_t ihandle, int match_flags);
82 
83 /*
84  * Named Dispatch routine to produce a formatted report on all IREs.
85  * This report is accessed by using the ndd utility to "get" ND variable
86  * "ip_ire_status_v6".
87  */
88 /* ARGSUSED */
89 int
90 ip_ire_report_v6(queue_t *q, mblk_t *mp, caddr_t arg, cred_t *ioc_cr)
91 {
92 	zoneid_t zoneid;
93 
94 	(void) mi_mpprintf(mp,
95 	    "IRE      " MI_COL_HDRPAD_STR
96 	    "rfq      " MI_COL_HDRPAD_STR
97 	    "stq      " MI_COL_HDRPAD_STR
98 	    " zone mxfrg rtt   rtt_sd ssthresh ref "
99 	    "rtomax tstamp_ok wscale_ok ecn_ok pmtud_ok sack sendpipe recvpipe "
100 	    "in/out/forward type    addr         mask         "
101 	    "src             gateway");
102 	/*
103 	 *   01234567 01234567 01234567 12345 12345 12345 12345  12345678 123
104 	 *   123456 123456789 123456789 123456 12345678 1234 12345678 12345678
105 	 *   in/out/forward xxxxxxxxxx
106 	 *   xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
107 	 *   xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
108 	 *   xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
109 	 *   xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:xxxx
110 	 */
111 
112 	/*
113 	 * Because of the ndd constraint, at most we can have 64K buffer
114 	 * to put in all IRE info.  So to be more efficient, just
115 	 * allocate a 64K buffer here, assuming we need that large buffer.
116 	 * This should be OK as only root can do ndd /dev/ip.
117 	 */
118 	if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) {
119 		/* The following may work even if we cannot get a large buf. */
120 		(void) mi_mpprintf(mp, "<< Out of buffer >>\n");
121 		return (0);
122 	}
123 	zoneid = Q_TO_CONN(q)->conn_zoneid;
124 	if (zoneid == GLOBAL_ZONEID)
125 		zoneid = ALL_ZONES;
126 
127 	ire_walk_v6(ire_report_ftable_v6, (char *)mp->b_cont, zoneid);
128 	ire_walk_v6(ire_report_ctable_v6, (char *)mp->b_cont, zoneid);
129 	return (0);
130 }
131 
132 /*
133  * ire_walk routine invoked for ip_ire_report_v6 for each IRE.
134  */
135 static void
136 ire_report_ftable_v6(ire_t *ire, char *mp)
137 {
138 	char	buf1[INET6_ADDRSTRLEN];
139 	char	buf2[INET6_ADDRSTRLEN];
140 	char	buf3[INET6_ADDRSTRLEN];
141 	char	buf4[INET6_ADDRSTRLEN];
142 	uint_t	fo_pkt_count;
143 	uint_t	ib_pkt_count;
144 	int	ref;
145 	in6_addr_t gw_addr_v6;
146 	uint_t	print_len, buf_len;
147 
148 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
149 	if (ire->ire_type & IRE_CACHETABLE)
150 	    return;
151 	buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr;
152 	if (buf_len <= 0)
153 		return;
154 
155 	/* Number of active references of this ire */
156 	ref = ire->ire_refcnt;
157 	/* "inbound" to a non local address is a forward */
158 	ib_pkt_count = ire->ire_ib_pkt_count;
159 	fo_pkt_count = 0;
160 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
161 	if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) {
162 		fo_pkt_count = ib_pkt_count;
163 		ib_pkt_count = 0;
164 	}
165 
166 	mutex_enter(&ire->ire_lock);
167 	gw_addr_v6 = ire->ire_gateway_addr_v6;
168 	mutex_exit(&ire->ire_lock);
169 
170 	print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len,
171 	    MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d "
172 	    "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d "
173 	    "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n",
174 	    (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq,
175 	    (int)ire->ire_zoneid,
176 	    ire->ire_max_frag, ire->ire_uinfo.iulp_rtt,
177 	    ire->ire_uinfo.iulp_rtt_sd,
178 	    ire->ire_uinfo.iulp_ssthresh, ref,
179 	    ire->ire_uinfo.iulp_rtomax,
180 	    (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0),
181 	    (ire->ire_uinfo.iulp_wscale_ok ? 1: 0),
182 	    (ire->ire_uinfo.iulp_ecn_ok ? 1: 0),
183 	    (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0),
184 	    ire->ire_uinfo.iulp_sack,
185 	    ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe,
186 	    ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count,
187 	    ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type),
188 	    inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)),
189 	    inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)),
190 	    inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)),
191 	    inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4)));
192 	if (print_len < buf_len) {
193 		((mblk_t *)mp)->b_wptr += print_len;
194 	} else {
195 		((mblk_t *)mp)->b_wptr += buf_len;
196 	}
197 }
198 
199 /* ire_walk routine invoked for ip_ire_report_v6 for each IRE. */
200 static void
201 ire_report_ctable_v6(ire_t *ire, char *mp)
202 {
203 	char	buf1[INET6_ADDRSTRLEN];
204 	char	buf2[INET6_ADDRSTRLEN];
205 	char	buf3[INET6_ADDRSTRLEN];
206 	char	buf4[INET6_ADDRSTRLEN];
207 	uint_t	fo_pkt_count;
208 	uint_t	ib_pkt_count;
209 	int	ref;
210 	in6_addr_t gw_addr_v6;
211 	uint_t	print_len, buf_len;
212 
213 	if ((ire->ire_type & IRE_CACHETABLE) == 0)
214 		return;
215 	buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr;
216 	if (buf_len <= 0)
217 		return;
218 
219 	/* Number of active references of this ire */
220 	ref = ire->ire_refcnt;
221 	/* "inbound" to a non local address is a forward */
222 	ib_pkt_count = ire->ire_ib_pkt_count;
223 	fo_pkt_count = 0;
224 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
225 	if (ire->ire_type & IRE_LOCAL) {
226 		fo_pkt_count = ib_pkt_count;
227 		ib_pkt_count = 0;
228 	}
229 
230 	mutex_enter(&ire->ire_lock);
231 	gw_addr_v6 = ire->ire_gateway_addr_v6;
232 	mutex_exit(&ire->ire_lock);
233 
234 	print_len =  snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len,
235 	    MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d "
236 	    "%05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d "
237 	    "%04d %08d %08d %d/%d/%d %s\n\t%s\n\t%s\n\t%s\n\t%s\n",
238 	    (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq,
239 	    (int)ire->ire_zoneid,
240 	    ire->ire_max_frag, ire->ire_uinfo.iulp_rtt,
241 	    ire->ire_uinfo.iulp_rtt_sd, ire->ire_uinfo.iulp_ssthresh, ref,
242 	    ire->ire_uinfo.iulp_rtomax,
243 	    (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0),
244 	    (ire->ire_uinfo.iulp_wscale_ok ? 1: 0),
245 	    (ire->ire_uinfo.iulp_ecn_ok ? 1: 0),
246 	    (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0),
247 	    ire->ire_uinfo.iulp_sack,
248 	    ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe,
249 	    ib_pkt_count, ire->ire_ob_pkt_count,
250 	    fo_pkt_count, ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type),
251 	    inet_ntop(AF_INET6, &ire->ire_addr_v6, buf1, sizeof (buf1)),
252 	    inet_ntop(AF_INET6, &ire->ire_mask_v6, buf2, sizeof (buf2)),
253 	    inet_ntop(AF_INET6, &ire->ire_src_addr_v6, buf3, sizeof (buf3)),
254 	    inet_ntop(AF_INET6, &gw_addr_v6, buf4, sizeof (buf4)));
255 	if (print_len < buf_len) {
256 		((mblk_t *)mp)->b_wptr += print_len;
257 	} else {
258 		((mblk_t *)mp)->b_wptr += buf_len;
259 	}
260 }
261 
262 
263 /*
264  * Initialize the ire that is specific to IPv6 part and call
265  * ire_init_common to finish it.
266  */
267 ire_t *
268 ire_init_v6(ire_t *ire, const in6_addr_t *v6addr,
269     const in6_addr_t *v6mask, const in6_addr_t *v6src_addr,
270     const in6_addr_t *v6gateway, uint_t *max_fragp,
271     mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type,
272     mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask,
273     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info)
274 {
275 	if (fp_mp != NULL) {
276 		/*
277 		 * We can't dupb() here as multiple threads could be
278 		 * calling dupb on the same mp which is incorrect.
279 		 * First dupb() should be called only by one thread.
280 		 */
281 		fp_mp = copyb(fp_mp);
282 		if (fp_mp == NULL)
283 			return (NULL);
284 	}
285 
286 	if (dlureq_mp != NULL) {
287 		/*
288 		 * We can't dupb() here as multiple threads could be
289 		 * calling dupb on the same mp which is incorrect.
290 		 * First dupb() should be called only by one thread.
291 		 */
292 		dlureq_mp = copyb(dlureq_mp);
293 		if (dlureq_mp == NULL) {
294 			if (fp_mp != NULL)
295 				freeb(fp_mp);
296 			return (NULL);
297 		}
298 	}
299 
300 	BUMP_IRE_STATS(ire_stats_v6, ire_stats_alloced);
301 	ire->ire_addr_v6 = *v6addr;
302 
303 	if (v6src_addr != NULL)
304 		ire->ire_src_addr_v6 = *v6src_addr;
305 	if (v6mask != NULL) {
306 		ire->ire_mask_v6 = *v6mask;
307 		ire->ire_masklen = ip_mask_to_plen_v6(&ire->ire_mask_v6);
308 	}
309 	if (v6gateway != NULL)
310 		ire->ire_gateway_addr_v6 = *v6gateway;
311 
312 	if (type == IRE_CACHE && v6cmask != NULL)
313 		ire->ire_cmask_v6 = *v6cmask;
314 
315 	/*
316 	 * Multirouted packets need to have a fragment header added so that
317 	 * the receiver is able to discard duplicates according to their
318 	 * fragment identifier.
319 	 */
320 	if (type == IRE_CACHE && (flags & RTF_MULTIRT)) {
321 		ire->ire_frag_flag = IPH_FRAG_HDR;
322 	}
323 
324 	ire_init_common(ire, max_fragp, fp_mp, rfq, stq, type, dlureq_mp,
325 	    ipif, NULL, phandle, ihandle, flags, IPV6_VERSION, ulp_info);
326 
327 	return (ire);
328 }
329 
330 /*
331  * Similar to ire_create_v6 except that it is called only when
332  * we want to allocate ire as an mblk e.g. we have a external
333  * resolver. Do we need this in IPv6 ?
334  */
335 ire_t *
336 ire_create_mp_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
337     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
338     mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type,
339     mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask,
340     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info)
341 {
342 	ire_t	*ire;
343 	ire_t	*ret_ire;
344 	mblk_t	*mp;
345 
346 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
347 
348 	/* Allocate the new IRE. */
349 	mp = allocb(sizeof (ire_t), BPRI_MED);
350 	if (mp == NULL) {
351 		ip1dbg(("ire_create_mp_v6: alloc failed\n"));
352 		return (NULL);
353 	}
354 
355 	ire = (ire_t *)mp->b_rptr;
356 	mp->b_wptr = (uchar_t *)&ire[1];
357 
358 	/* Start clean. */
359 	*ire = ire_null;
360 	ire->ire_mp = mp;
361 	mp->b_datap->db_type = IRE_DB_TYPE;
362 
363 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
364 	    NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle,
365 	    ihandle, flags, ulp_info);
366 
367 	if (ret_ire == NULL) {
368 		freeb(ire->ire_mp);
369 		return (NULL);
370 	}
371 	return (ire);
372 }
373 
374 /*
375  * ire_create_v6 is called to allocate and initialize a new IRE.
376  *
377  * NOTE : This is called as writer sometimes though not required
378  * by this function.
379  */
380 ire_t *
381 ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
382     const in6_addr_t *v6src_addr, const in6_addr_t *v6gateway,
383     uint_t *max_fragp, mblk_t *fp_mp, queue_t *rfq, queue_t *stq, ushort_t type,
384     mblk_t *dlureq_mp, ipif_t *ipif, const in6_addr_t *v6cmask,
385     uint32_t phandle, uint32_t ihandle, uint_t flags, const iulp_t *ulp_info)
386 {
387 	ire_t	*ire;
388 	ire_t	*ret_ire;
389 
390 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
391 
392 	ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
393 	if (ire == NULL) {
394 		ip1dbg(("ire_create_v6: alloc failed\n"));
395 		return (NULL);
396 	}
397 	*ire = ire_null;
398 
399 	ret_ire = ire_init_v6(ire, v6addr, v6mask, v6src_addr, v6gateway,
400 	    max_fragp, fp_mp, rfq, stq, type, dlureq_mp, ipif, v6cmask, phandle,
401 	    ihandle, flags, ulp_info);
402 
403 	if (ret_ire == NULL) {
404 		kmem_cache_free(ire_cache, ire);
405 		return (NULL);
406 	}
407 	ASSERT(ret_ire == ire);
408 	return (ire);
409 }
410 
411 /*
412  * Find an IRE_INTERFACE for the multicast group.
413  * Allows different routes for multicast addresses
414  * in the unicast routing table (akin to FF::0/8 but could be more specific)
415  * which point at different interfaces. This is used when IPV6_MULTICAST_IF
416  * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't
417  * specify the interface to join on.
418  *
419  * Supports link-local addresses by following the ipif/ill when recursing.
420  */
421 ire_t *
422 ire_lookup_multi_v6(const in6_addr_t *group, zoneid_t zoneid)
423 {
424 	ire_t	*ire;
425 	ipif_t	*ipif = NULL;
426 	int	match_flags = MATCH_IRE_TYPE;
427 	in6_addr_t gw_addr_v6;
428 
429 	ire = ire_ftable_lookup_v6(group, 0, 0, 0, NULL, NULL,
430 	    zoneid, 0, MATCH_IRE_DEFAULT);
431 
432 	/* We search a resolvable ire in case of multirouting. */
433 	if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) {
434 		ire_t *cire = NULL;
435 		/*
436 		 * If the route is not resolvable, the looked up ire
437 		 * may be changed here. In that case, ire_multirt_lookup()
438 		 * IRE_REFRELE the original ire and change it.
439 		 */
440 		(void) ire_multirt_lookup_v6(&cire, &ire, MULTIRT_CACHEGW);
441 		if (cire != NULL)
442 			ire_refrele(cire);
443 	}
444 	if (ire == NULL)
445 		return (NULL);
446 	/*
447 	 * Make sure we follow ire_ipif.
448 	 *
449 	 * We need to determine the interface route through
450 	 * which the gateway will be reached. We don't really
451 	 * care which interface is picked if the interface is
452 	 * part of a group.
453 	 */
454 	if (ire->ire_ipif != NULL) {
455 		ipif = ire->ire_ipif;
456 		match_flags |= MATCH_IRE_ILL_GROUP;
457 	}
458 
459 	switch (ire->ire_type) {
460 	case IRE_DEFAULT:
461 	case IRE_PREFIX:
462 	case IRE_HOST:
463 		mutex_enter(&ire->ire_lock);
464 		gw_addr_v6 = ire->ire_gateway_addr_v6;
465 		mutex_exit(&ire->ire_lock);
466 		ire_refrele(ire);
467 		ire = ire_ftable_lookup_v6(&gw_addr_v6, 0, 0,
468 		    IRE_INTERFACE, ipif, NULL, zoneid, 0,
469 		    match_flags);
470 		return (ire);
471 	case IRE_IF_NORESOLVER:
472 	case IRE_IF_RESOLVER:
473 		return (ire);
474 	default:
475 		ire_refrele(ire);
476 		return (NULL);
477 	}
478 }
479 
480 /*
481  * Return any local address.  We use this to target ourselves
482  * when the src address was specified as 'default'.
483  * Preference for IRE_LOCAL entries.
484  */
485 ire_t *
486 ire_lookup_local_v6(zoneid_t zoneid)
487 {
488 	ire_t	*ire;
489 	irb_t	*irb;
490 	ire_t	*maybe = NULL;
491 	int i;
492 
493 	for (i = 0; i < ip6_cache_table_size;  i++) {
494 		irb = &ip_cache_table_v6[i];
495 		if (irb->irb_ire == NULL)
496 			continue;
497 		rw_enter(&irb->irb_lock, RW_READER);
498 		for (ire = irb->irb_ire; ire; ire = ire->ire_next) {
499 			if ((ire->ire_marks & IRE_MARK_CONDEMNED) ||
500 			    ire->ire_zoneid != zoneid)
501 				continue;
502 			switch (ire->ire_type) {
503 			case IRE_LOOPBACK:
504 				if (maybe == NULL) {
505 					IRE_REFHOLD(ire);
506 					maybe = ire;
507 				}
508 				break;
509 			case IRE_LOCAL:
510 				if (maybe != NULL) {
511 					ire_refrele(maybe);
512 				}
513 				IRE_REFHOLD(ire);
514 				rw_exit(&irb->irb_lock);
515 				return (ire);
516 			}
517 		}
518 		rw_exit(&irb->irb_lock);
519 	}
520 	return (maybe);
521 }
522 
523 /*
524  * This function takes a mask and returns number of bits set in the
525  * mask (the represented prefix length).  Assumes a contiguous mask.
526  */
527 int
528 ip_mask_to_plen_v6(const in6_addr_t *v6mask)
529 {
530 	int		bits;
531 	int		plen = IPV6_ABITS;
532 	int		i;
533 
534 	for (i = 3; i >= 0; i--) {
535 		if (v6mask->s6_addr32[i] == 0) {
536 			plen -= 32;
537 			continue;
538 		}
539 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
540 		if (bits == 0)
541 			break;
542 		plen -= bits;
543 	}
544 
545 	return (plen);
546 }
547 
548 /*
549  * Convert a prefix length to the mask for that prefix.
550  * Returns the argument bitmask.
551  */
552 in6_addr_t *
553 ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
554 {
555 	uint32_t *ptr;
556 
557 	if (plen < 0 || plen > IPV6_ABITS)
558 		return (NULL);
559 	*bitmask = ipv6_all_zeros;
560 
561 	ptr = (uint32_t *)bitmask;
562 	while (plen > 32) {
563 		*ptr++ = 0xffffffffU;
564 		plen -= 32;
565 	}
566 	*ptr = htonl(0xffffffffU << (32 - plen));
567 	return (bitmask);
568 }
569 
570 /*
571  * Add a fully initialized IRE to an appropriate
572  * table based on ire_type.
573  *
574  * The forward table contains IRE_PREFIX/IRE_HOST/IRE_HOST_REDIRECT
575  * IRE_IF_RESOLVER/IRE_IF_NORESOLVER and IRE_DEFAULT.
576  *
577  * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK
578  * and IRE_CACHE.
579  *
580  * NOTE : This function is called as writer though not required
581  * by this function.
582  */
583 int
584 ire_add_v6(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func)
585 {
586 	ire_t	*ire1;
587 	int	mask_table_index;
588 	irb_t	*irb_ptr;
589 	ire_t	**irep;
590 	int	flags;
591 	ire_t	*pire = NULL;
592 	ill_t	*stq_ill;
593 	boolean_t	ndp_g_lock_held = B_FALSE;
594 	ire_t	*ire = *ire_p;
595 	int	error;
596 
597 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
598 	ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */
599 	ASSERT(ire->ire_nce == NULL);
600 
601 	/* Find the appropriate list head. */
602 	switch (ire->ire_type) {
603 	case IRE_HOST:
604 		ire->ire_mask_v6 = ipv6_all_ones;
605 		ire->ire_masklen = IPV6_ABITS;
606 		if ((ire->ire_flags & RTF_SETSRC) == 0)
607 			ire->ire_src_addr_v6 = ipv6_all_zeros;
608 		break;
609 	case IRE_HOST_REDIRECT:
610 		ire->ire_mask_v6 = ipv6_all_ones;
611 		ire->ire_masklen = IPV6_ABITS;
612 		ire->ire_src_addr_v6 = ipv6_all_zeros;
613 		break;
614 	case IRE_CACHE:
615 	case IRE_LOCAL:
616 	case IRE_LOOPBACK:
617 		ire->ire_mask_v6 = ipv6_all_ones;
618 		ire->ire_masklen = IPV6_ABITS;
619 		break;
620 	case IRE_PREFIX:
621 		if ((ire->ire_flags & RTF_SETSRC) == 0)
622 			ire->ire_src_addr_v6 = ipv6_all_zeros;
623 		break;
624 	case IRE_DEFAULT:
625 		if ((ire->ire_flags & RTF_SETSRC) == 0)
626 			ire->ire_src_addr_v6 = ipv6_all_zeros;
627 		break;
628 	case IRE_IF_RESOLVER:
629 	case IRE_IF_NORESOLVER:
630 		break;
631 	default:
632 		printf("ire_add_v6: ire %p has unrecognized IRE type (%d)\n",
633 		    (void *)ire, ire->ire_type);
634 		ire_delete(ire);
635 		*ire_p = NULL;
636 		return (EINVAL);
637 	}
638 
639 	/* Make sure the address is properly masked. */
640 	V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6);
641 
642 	if ((ire->ire_type & IRE_CACHETABLE) == 0) {
643 		/* IRE goes into Forward Table */
644 		mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6);
645 		if ((ip_forwarding_table_v6[mask_table_index]) == NULL) {
646 			irb_t *ptr;
647 			int i;
648 
649 			ptr = (irb_t *)mi_zalloc((ip6_ftable_hash_size *
650 			    sizeof (irb_t)));
651 			if (ptr == NULL) {
652 				ire_delete(ire);
653 				*ire_p = NULL;
654 				return (ENOMEM);
655 			}
656 			for (i = 0; i < ip6_ftable_hash_size; i++) {
657 				rw_init(&ptr[i].irb_lock, NULL,
658 				    RW_DEFAULT, NULL);
659 			}
660 			mutex_enter(&ire_ft_init_lock);
661 			if (ip_forwarding_table_v6[mask_table_index] == NULL) {
662 				ip_forwarding_table_v6[mask_table_index] = ptr;
663 				mutex_exit(&ire_ft_init_lock);
664 			} else {
665 				/*
666 				 * Some other thread won the race in
667 				 * initializing the forwarding table at the
668 				 * same index.
669 				 */
670 				mutex_exit(&ire_ft_init_lock);
671 				for (i = 0; i < ip6_ftable_hash_size; i++) {
672 					rw_destroy(&ptr[i].irb_lock);
673 				}
674 				mi_free(ptr);
675 			}
676 		}
677 		irb_ptr = &(ip_forwarding_table_v6[mask_table_index][
678 		    IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6,
679 		    ip6_ftable_hash_size)]);
680 	} else {
681 		irb_ptr = &(ip_cache_table_v6[IRE_ADDR_HASH_V6(
682 		    ire->ire_addr_v6, ip6_cache_table_size)]);
683 	}
684 	/*
685 	 * For xresolv interfaces (v6 interfaces with an external
686 	 * address resolver), ip_newroute_v6/ip_newroute_ipif_v6
687 	 * are unable to prevent the deletion of the interface route
688 	 * while adding an IRE_CACHE for an on-link destination
689 	 * in the IRE_IF_RESOLVER case, since the ire has to go to
690 	 * the external resolver and return. We can't do a REFHOLD on the
691 	 * associated interface ire for fear of the message being freed
692 	 * if the external resolver can't resolve the address.
693 	 * Here we look up the interface ire in the forwarding table
694 	 * and make sure that the interface route has not been deleted.
695 	 */
696 	if (ire->ire_type == IRE_CACHE &&
697 	    IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6) &&
698 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) &&
699 	    (((ill_t *)ire->ire_stq->q_ptr)->ill_flags & ILLF_XRESOLV)) {
700 
701 		pire = ire_ihandle_lookup_onlink_v6(ire);
702 		if (pire == NULL) {
703 			ire_delete(ire);
704 			*ire_p = NULL;
705 			return (EINVAL);
706 		}
707 		/* Prevent pire from getting deleted */
708 		IRB_REFHOLD(pire->ire_bucket);
709 		/* Has it been removed already? */
710 		if (pire->ire_marks & IRE_MARK_CONDEMNED) {
711 			IRB_REFRELE(pire->ire_bucket);
712 			ire_refrele(pire);
713 			ire_delete(ire);
714 			*ire_p = NULL;
715 			return (EINVAL);
716 		}
717 	}
718 
719 	flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW);
720 	/*
721 	 * For IRE_CACHES, MATCH_IRE_IPIF is not enough to check
722 	 * for duplicates because :
723 	 *
724 	 * 1) ire_ipif->ipif_ill and ire_stq->q_ptr could be
725 	 *    pointing at different ills. A real duplicate is
726 	 *    a match on both ire_ipif and ire_stq.
727 	 *
728 	 * 2) We could have multiple packets trying to create
729 	 *    an IRE_CACHE for the same ill.
730 	 *
731 	 * Moreover, IPIF_NOFAILOVER and IPV6_BOUND_PIF endpoints wants
732 	 * to go out on a particular ill. Rather than looking at the
733 	 * packet, we depend on the above for MATCH_IRE_ILL here.
734 	 *
735 	 * Unlike IPv4, MATCH_IRE_IPIF is needed here as we could have
736 	 * multiple IRE_CACHES for an ill for the same destination
737 	 * with various scoped addresses i.e represented by ipifs.
738 	 *
739 	 * MATCH_IRE_ILL is done implicitly below for IRE_CACHES.
740 	 */
741 	if (ire->ire_ipif != NULL)
742 		flags |= MATCH_IRE_IPIF;
743 	/*
744 	 * If we are creating hidden ires, make sure we search on
745 	 * this ill (MATCH_IRE_ILL) and a hidden ire, while we are
746 	 * searching for duplicates below. Otherwise we could
747 	 * potentially find an IRE on some other interface
748 	 * and it may not be a IRE marked with IRE_MARK_HIDDEN. We
749 	 * shouldn't do this as this will lead to an infinite loop as
750 	 * eventually we need an hidden ire for this packet to go
751 	 * out. MATCH_IRE_ILL is already marked above.
752 	 */
753 	if (ire->ire_marks & IRE_MARK_HIDDEN) {
754 		ASSERT(ire->ire_type == IRE_CACHE);
755 		flags |= MATCH_IRE_MARK_HIDDEN;
756 	}
757 
758 	/*
759 	 * Start the atomic add of the ire. Grab the ill locks,
760 	 * ill_g_usesrc_lock and the bucket lock. Check for condemned.
761 	 * To avoid lock order problems, get the ndp_g_lock now itself.
762 	 */
763 	if (ire->ire_type == IRE_CACHE) {
764 		mutex_enter(&ndp_g_lock);
765 		ndp_g_lock_held = B_TRUE;
766 	}
767 
768 	/*
769 	 * If ipif or ill is changing ire_atomic_start() may queue the
770 	 * request and return EINPROGRESS.
771 	 */
772 
773 	error = ire_atomic_start(irb_ptr, ire, q, mp, func);
774 	if (error != 0) {
775 		if (ndp_g_lock_held)
776 			mutex_exit(&ndp_g_lock);
777 		/*
778 		 * We don't know whether it is a valid ipif or not.
779 		 * So, set it to NULL. This assumes that the ire has not added
780 		 * a reference to the ipif.
781 		 */
782 		ire->ire_ipif = NULL;
783 		ire_delete(ire);
784 		if (pire != NULL) {
785 			IRB_REFRELE(pire->ire_bucket);
786 			ire_refrele(pire);
787 		}
788 		*ire_p = NULL;
789 		return (error);
790 	}
791 	/*
792 	 * To avoid creating ires having stale values for the ire_max_frag
793 	 * we get the latest value atomically here. For more details
794 	 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE
795 	 * in ip_rput_dlpi_writer
796 	 */
797 	if (ire->ire_max_fragp == NULL) {
798 		if (IN6_IS_ADDR_MULTICAST(&ire->ire_addr_v6))
799 			ire->ire_max_frag = ire->ire_ipif->ipif_mtu;
800 		else
801 			ire->ire_max_frag = pire->ire_max_frag;
802 	} else {
803 		uint_t  max_frag;
804 
805 		max_frag = *ire->ire_max_fragp;
806 		ire->ire_max_fragp = NULL;
807 		ire->ire_max_frag = max_frag;
808 	}
809 
810 	/*
811 	 * Atomically check for duplicate and insert in the table.
812 	 */
813 	for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
814 		if (ire1->ire_marks & IRE_MARK_CONDEMNED)
815 			continue;
816 
817 		if (ire->ire_type == IRE_CACHE) {
818 			/*
819 			 * We do MATCH_IRE_ILL implicitly here for IRE_CACHES.
820 			 * As ire_ipif and ire_stq could point to two
821 			 * different ills, we can't pass just ire_ipif to
822 			 * ire_match_args and get a match on both ills.
823 			 * This is just needed for duplicate checks here and
824 			 * so we don't add an extra argument to
825 			 * ire_match_args for this. Do it locally.
826 			 *
827 			 * NOTE : Currently there is no part of the code
828 			 * that asks for both MATH_IRE_IPIF and MATCH_IRE_ILL
829 			 * match for IRE_CACHEs. Thus we don't want to
830 			 * extend the arguments to ire_match_args_v6.
831 			 */
832 			if (ire1->ire_stq != ire->ire_stq)
833 				continue;
834 			/*
835 			 * Multiroute IRE_CACHEs for a given destination can
836 			 * have the same ire_ipif, typically if their source
837 			 * address is forced using RTF_SETSRC, and the same
838 			 * send-to queue. We differentiate them using the parent
839 			 * handle.
840 			 */
841 			if ((ire1->ire_flags & RTF_MULTIRT) &&
842 			    (ire->ire_flags & RTF_MULTIRT) &&
843 			    (ire1->ire_phandle != ire->ire_phandle))
844 				continue;
845 		}
846 		if (ire1->ire_zoneid != ire->ire_zoneid)
847 			continue;
848 		if (ire_match_args_v6(ire1, &ire->ire_addr_v6,
849 		    &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
850 		    ire->ire_type, ire->ire_ipif, ire->ire_zoneid, 0, flags)) {
851 			/*
852 			 * Return the old ire after doing a REFHOLD.
853 			 * As most of the callers continue to use the IRE
854 			 * after adding, we return a held ire. This will
855 			 * avoid a lookup in the caller again. If the callers
856 			 * don't want to use it, they need to do a REFRELE.
857 			 */
858 			ip1dbg(("found dup ire existing %p new %p",
859 			    (void *)ire1, (void *)ire));
860 			IRE_REFHOLD(ire1);
861 			if (ndp_g_lock_held)
862 				mutex_exit(&ndp_g_lock);
863 			ire_atomic_end(irb_ptr, ire);
864 			ire_delete(ire);
865 			if (pire != NULL) {
866 				/*
867 				 * Assert that it is
868 				 * not yet removed from the list.
869 				 */
870 				ASSERT(pire->ire_ptpn != NULL);
871 				IRB_REFRELE(pire->ire_bucket);
872 				ire_refrele(pire);
873 			}
874 			*ire_p = ire1;
875 			return (0);
876 		}
877 	}
878 	if (ire->ire_type == IRE_CACHE) {
879 		in6_addr_t gw_addr_v6;
880 		ill_t	*ill = ire_to_ill(ire);
881 		char	buf[INET6_ADDRSTRLEN];
882 		nce_t	*nce;
883 
884 		/*
885 		 * All IRE_CACHE types must have a nce.  If this is
886 		 * not the case the entry will not be added. We need
887 		 * to make sure that if somebody deletes the nce
888 		 * after we looked up, they will find this ire and
889 		 * delete the ire. To delete this ire one needs the
890 		 * bucket lock which we are still holding here. So,
891 		 * even if the nce gets deleted after we looked up,
892 		 * this ire  will get deleted.
893 		 *
894 		 * NOTE : Don't need the ire_lock for accessing
895 		 * ire_gateway_addr_v6 as it is appearing first
896 		 * time on the list and rts_setgwr_v6 could not
897 		 * be changing this.
898 		 */
899 		gw_addr_v6 = ire->ire_gateway_addr_v6;
900 		if (IN6_IS_ADDR_UNSPECIFIED(&gw_addr_v6)) {
901 			nce = ndp_lookup(ill, &ire->ire_addr_v6, B_TRUE);
902 		} else {
903 			nce = ndp_lookup(ill, &gw_addr_v6, B_TRUE);
904 		}
905 		if (nce == NULL)
906 			goto failed;
907 
908 		/* Pair of refhold, refrele just to get the tracing right */
909 		NCE_REFHOLD_NOTR(nce);
910 		NCE_REFRELE(nce);
911 		/*
912 		 * Atomically make sure that new IREs don't point
913 		 * to an NCE that is logically deleted (CONDEMNED).
914 		 * ndp_delete() first marks the NCE CONDEMNED.
915 		 * This ensures that the nce_refcnt won't increase
916 		 * due to new nce_lookups or due to addition of new IREs
917 		 * pointing to this NCE. Then ndp_delete() cleans up
918 		 * existing references. If we don't do it atomically here,
919 		 * ndp_delete() -> nce_ire_delete() will not be able to
920 		 * clean up the IRE list completely, and the nce_refcnt
921 		 * won't go down to zero.
922 		 */
923 		mutex_enter(&nce->nce_lock);
924 		if (ill->ill_flags & ILLF_XRESOLV) {
925 			/*
926 			 * If we used an external resolver, we may not
927 			 * have gone through neighbor discovery to get here.
928 			 * Must update the nce_state before the next check.
929 			 */
930 			if (nce->nce_state == ND_INCOMPLETE)
931 				nce->nce_state = ND_REACHABLE;
932 		}
933 		if (nce->nce_state == ND_INCOMPLETE ||
934 		    (nce->nce_flags & NCE_F_CONDEMNED) ||
935 		    (nce->nce_state == ND_UNREACHABLE)) {
936 failed:
937 			if (ndp_g_lock_held)
938 				mutex_exit(&ndp_g_lock);
939 			if (nce != NULL)
940 				mutex_exit(&nce->nce_lock);
941 			ire_atomic_end(irb_ptr, ire);
942 			ip1dbg(("ire_add_v6: No nce for dst %s \n",
943 			    inet_ntop(AF_INET6, &ire->ire_addr_v6,
944 			    buf, sizeof (buf))));
945 			ire_delete(ire);
946 			if (pire != NULL) {
947 				/*
948 				 * Assert that it is
949 				 * not yet removed from the list.
950 				 */
951 				ASSERT(pire->ire_ptpn != NULL);
952 				IRB_REFRELE(pire->ire_bucket);
953 				ire_refrele(pire);
954 			}
955 			if (nce != NULL)
956 				NCE_REFRELE_NOTR(nce);
957 			*ire_p = NULL;
958 			return (EINVAL);
959 		} else {
960 			ire->ire_nce = nce;
961 		}
962 		mutex_exit(&nce->nce_lock);
963 	}
964 	/*
965 	 * Find the first entry that matches ire_addr - provides
966 	 * tail insertion. *irep will be null if no match.
967 	 */
968 	irep = (ire_t **)irb_ptr;
969 	while ((ire1 = *irep) != NULL &&
970 	    !IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6))
971 		irep = &ire1->ire_next;
972 	ASSERT(!(ire->ire_type & IRE_BROADCAST));
973 
974 	if (*irep != NULL) {
975 		/*
976 		 * Find the last ire which matches ire_addr_v6.
977 		 * Needed to do tail insertion among entries with the same
978 		 * ire_addr_v6.
979 		 */
980 		while (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6,
981 		    &ire1->ire_addr_v6)) {
982 			irep = &ire1->ire_next;
983 			ire1 = *irep;
984 			if (ire1 == NULL)
985 				break;
986 		}
987 	}
988 
989 	if (ire->ire_type == IRE_DEFAULT) {
990 		/*
991 		 * We keep a count of default gateways which is used when
992 		 * assigning them as routes.
993 		 */
994 		ipv6_ire_default_count++;
995 		ASSERT(ipv6_ire_default_count != 0); /* Wraparound */
996 	}
997 	/* Insert at *irep */
998 	ire1 = *irep;
999 	if (ire1 != NULL)
1000 		ire1->ire_ptpn = &ire->ire_next;
1001 	ire->ire_next = ire1;
1002 	/* Link the new one in. */
1003 	ire->ire_ptpn = irep;
1004 	/*
1005 	 * ire_walk routines de-reference ire_next without holding
1006 	 * a lock. Before we point to the new ire, we want to make
1007 	 * sure the store that sets the ire_next of the new ire
1008 	 * reaches global visibility, so that ire_walk routines
1009 	 * don't see a truncated list of ires i.e if the ire_next
1010 	 * of the new ire gets set after we do "*irep = ire" due
1011 	 * to re-ordering, the ire_walk thread will see a NULL
1012 	 * once it accesses the ire_next of the new ire.
1013 	 * membar_producer() makes sure that the following store
1014 	 * happens *after* all of the above stores.
1015 	 */
1016 	membar_producer();
1017 	*irep = ire;
1018 	ire->ire_bucket = irb_ptr;
1019 	/*
1020 	 * We return a bumped up IRE above. Keep it symmetrical
1021 	 * so that the callers will always have to release. This
1022 	 * helps the callers of this function because they continue
1023 	 * to use the IRE after adding and hence they don't have to
1024 	 * lookup again after we return the IRE.
1025 	 *
1026 	 * NOTE : We don't have to use atomics as this is appearing
1027 	 * in the list for the first time and no one else can bump
1028 	 * up the reference count on this yet.
1029 	 */
1030 	IRE_REFHOLD_LOCKED(ire);
1031 	BUMP_IRE_STATS(ire_stats_v6, ire_stats_inserted);
1032 	irb_ptr->irb_ire_cnt++;
1033 	if (ire->ire_marks & IRE_MARK_TEMPORARY)
1034 		irb_ptr->irb_tmp_ire_cnt++;
1035 
1036 	if (ire->ire_ipif != NULL) {
1037 		ire->ire_ipif->ipif_ire_cnt++;
1038 		if (ire->ire_stq != NULL) {
1039 			stq_ill = (ill_t *)ire->ire_stq->q_ptr;
1040 			stq_ill->ill_ire_cnt++;
1041 		}
1042 	} else {
1043 		ASSERT(ire->ire_stq == NULL);
1044 	}
1045 
1046 	if (ndp_g_lock_held)
1047 		mutex_exit(&ndp_g_lock);
1048 	ire_atomic_end(irb_ptr, ire);
1049 
1050 	if (pire != NULL) {
1051 		/* Assert that it is not removed from the list yet */
1052 		ASSERT(pire->ire_ptpn != NULL);
1053 		IRB_REFRELE(pire->ire_bucket);
1054 		ire_refrele(pire);
1055 	}
1056 
1057 	if (ire->ire_type != IRE_CACHE) {
1058 		/*
1059 		 * For ire's with with host mask see if there is an entry
1060 		 * in the cache. If there is one flush the whole cache as
1061 		 * there might be multiple entries due to RTF_MULTIRT (CGTP).
1062 		 * If no entry is found than there is no need to flush the
1063 		 * cache.
1064 		 */
1065 
1066 		if (ip_mask_to_plen_v6(&ire->ire_mask_v6) == IPV6_ABITS) {
1067 			ire_t *lire;
1068 			lire = ire_ctable_lookup_v6(&ire->ire_addr_v6, NULL,
1069 			    IRE_CACHE, NULL, ALL_ZONES, MATCH_IRE_TYPE);
1070 			if (lire != NULL) {
1071 				ire_refrele(lire);
1072 				ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
1073 			}
1074 		} else {
1075 			ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
1076 		}
1077 	}
1078 
1079 	*ire_p = ire;
1080 	return (0);
1081 }
1082 
1083 /*
1084  * Search for all HOST REDIRECT routes that are
1085  * pointing at the specified gateway and
1086  * delete them. This routine is called only
1087  * when a default gateway is going away.
1088  */
1089 static void
1090 ire_delete_host_redirects_v6(const in6_addr_t *gateway)
1091 {
1092 	irb_t *irb_ptr;
1093 	irb_t *irb;
1094 	ire_t *ire;
1095 	in6_addr_t gw_addr_v6;
1096 	int i;
1097 
1098 	/* get the hash table for HOST routes */
1099 	irb_ptr = ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)];
1100 	if (irb_ptr == NULL)
1101 		return;
1102 	for (i = 0; (i < ip6_ftable_hash_size); i++) {
1103 		irb = &irb_ptr[i];
1104 		IRB_REFHOLD(irb);
1105 		for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
1106 			if (ire->ire_type != IRE_HOST_REDIRECT)
1107 				continue;
1108 			mutex_enter(&ire->ire_lock);
1109 			gw_addr_v6 = ire->ire_gateway_addr_v6;
1110 			mutex_exit(&ire->ire_lock);
1111 			if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway))
1112 				ire_delete(ire);
1113 		}
1114 		IRB_REFRELE(irb);
1115 	}
1116 }
1117 
1118 /*
1119  * Delete all the cache entries with this 'addr'. This is the IPv6 counterpart
1120  * of ip_ire_clookup_and_delete. The difference being this function does not
1121  * return any value. IPv6 processing of a gratuitous ARP, as it stands, is
1122  * different than IPv4 in that, regardless of the presence of a cache entry
1123  * for this address, an ire_walk_v6 is done. Another difference is that unlike
1124  * in the case of IPv4 this does not take an ipif_t argument, since it is only
1125  * called by ip_arp_news and the match is always only on the address.
1126  */
1127 void
1128 ip_ire_clookup_and_delete_v6(const in6_addr_t *addr)
1129 {
1130 	irb_t		*irb;
1131 	ire_t		*cire;
1132 	boolean_t	found = B_FALSE;
1133 
1134 	irb = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr, ip6_cache_table_size)];
1135 	IRB_REFHOLD(irb);
1136 	for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) {
1137 		if (cire->ire_marks == IRE_MARK_CONDEMNED)
1138 			continue;
1139 		if (IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, addr)) {
1140 
1141 			/* This signifies start of a match */
1142 			if (!found)
1143 				found = B_TRUE;
1144 			if (cire->ire_type == IRE_CACHE) {
1145 				if (cire->ire_nce != NULL)
1146 					ndp_delete(cire->ire_nce);
1147 				ire_delete_v6(cire);
1148 			}
1149 		/* End of the match */
1150 		} else if (found)
1151 			break;
1152 	}
1153 	IRB_REFRELE(irb);
1154 }
1155 
1156 /*
1157  * Delete the specified IRE.
1158  * All calls should use ire_delete().
1159  * Sometimes called as writer though not required by this function.
1160  *
1161  * NOTE : This function is called only if the ire was added
1162  * in the list.
1163  */
1164 void
1165 ire_delete_v6(ire_t *ire)
1166 {
1167 	in6_addr_t gw_addr_v6;
1168 
1169 	ASSERT(ire->ire_refcnt >= 1);
1170 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
1171 
1172 	if (ire->ire_type != IRE_CACHE)
1173 		ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
1174 	if (ire->ire_type == IRE_DEFAULT) {
1175 		/*
1176 		 * when a default gateway is going away
1177 		 * delete all the host redirects pointing at that
1178 		 * gateway.
1179 		 */
1180 		mutex_enter(&ire->ire_lock);
1181 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1182 		mutex_exit(&ire->ire_lock);
1183 		ire_delete_host_redirects_v6(&gw_addr_v6);
1184 	}
1185 }
1186 
1187 /*
1188  * ire_walk routine to delete all IRE_CACHE and IRE_HOST_REDIRECT
1189  * entries.
1190  */
1191 /*ARGSUSED1*/
1192 void
1193 ire_delete_cache_v6(ire_t *ire, char *arg)
1194 {
1195 	char    addrstr1[INET6_ADDRSTRLEN];
1196 	char    addrstr2[INET6_ADDRSTRLEN];
1197 
1198 	if (ire->ire_type & (IRE_CACHE | IRE_HOST_REDIRECT)) {
1199 		ip1dbg(("ire_delete_cache_v6: deleted %s type %d through %s\n",
1200 		    inet_ntop(AF_INET6, &ire->ire_addr_v6,
1201 			addrstr1, sizeof (addrstr1)),
1202 		    ire->ire_type,
1203 		    inet_ntop(AF_INET6, &ire->ire_gateway_addr_v6,
1204 			addrstr2, sizeof (addrstr2))));
1205 		ire_delete(ire);
1206 	}
1207 
1208 }
1209 
1210 /*
1211  * ire_walk routine to delete all IRE_CACHE/IRE_HOST_REDIRECT entries
1212  * that have a given gateway address.
1213  */
1214 void
1215 ire_delete_cache_gw_v6(ire_t *ire, char *addr)
1216 {
1217 	in6_addr_t	*gw_addr = (in6_addr_t *)addr;
1218 	char		buf1[INET6_ADDRSTRLEN];
1219 	char		buf2[INET6_ADDRSTRLEN];
1220 	in6_addr_t	ire_gw_addr_v6;
1221 
1222 	if (!(ire->ire_type & (IRE_CACHE|IRE_HOST_REDIRECT)))
1223 		return;
1224 
1225 	mutex_enter(&ire->ire_lock);
1226 	ire_gw_addr_v6 = ire->ire_gateway_addr_v6;
1227 	mutex_exit(&ire->ire_lock);
1228 
1229 	if (IN6_ARE_ADDR_EQUAL(&ire_gw_addr_v6, gw_addr)) {
1230 		ip1dbg(("ire_delete_cache_gw_v6: deleted %s type %d to %s\n",
1231 		    inet_ntop(AF_INET6, &ire->ire_src_addr_v6,
1232 		    buf1, sizeof (buf1)),
1233 		    ire->ire_type,
1234 		    inet_ntop(AF_INET6, &ire_gw_addr_v6,
1235 		    buf2, sizeof (buf2))));
1236 		ire_delete(ire);
1237 	}
1238 }
1239 
1240 /*
1241  * Remove all IRE_CACHE entries that match
1242  * the ire specified.  (Sometimes called
1243  * as writer though not required by this function.)
1244  *
1245  * The flag argument indicates if the
1246  * flush request is due to addition
1247  * of new route (IRE_FLUSH_ADD) or deletion of old
1248  * route (IRE_FLUSH_DELETE).
1249  *
1250  * This routine takes only the IREs from the forwarding
1251  * table and flushes the corresponding entries from
1252  * the cache table.
1253  *
1254  * When flushing due to the deletion of an old route, it
1255  * just checks the cache handles (ire_phandle and ire_ihandle) and
1256  * deletes the ones that match.
1257  *
1258  * When flushing due to the creation of a new route, it checks
1259  * if a cache entry's address matches the one in the IRE and
1260  * that the cache entry's parent has a less specific mask than the
1261  * one in IRE. The destination of such a cache entry could be the
1262  * gateway for other cache entries, so we need to flush those as
1263  * well by looking for gateway addresses matching the IRE's address.
1264  */
1265 void
1266 ire_flush_cache_v6(ire_t *ire, int flag)
1267 {
1268 	int i;
1269 	ire_t *cire;
1270 	irb_t *irb;
1271 
1272 	if (ire->ire_type & IRE_CACHE)
1273 	    return;
1274 
1275 	/*
1276 	 * If a default is just created, there is no point
1277 	 * in going through the cache, as there will not be any
1278 	 * cached ires.
1279 	 */
1280 	if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD)
1281 		return;
1282 	if (flag == IRE_FLUSH_ADD) {
1283 		/*
1284 		 * This selective flush is
1285 		 * due to the addition of
1286 		 * new IRE.
1287 		 */
1288 		for (i = 0; i < ip6_cache_table_size; i++) {
1289 			irb = &ip_cache_table_v6[i];
1290 			if ((cire = irb->irb_ire) == NULL)
1291 				continue;
1292 			IRB_REFHOLD(irb);
1293 			for (cire = irb->irb_ire; cire != NULL;
1294 			    cire = cire->ire_next) {
1295 				if (cire->ire_type != IRE_CACHE)
1296 					continue;
1297 				/*
1298 				 * If 'cire' belongs to the same subnet
1299 				 * as the new ire being added, and 'cire'
1300 				 * is derived from a prefix that is less
1301 				 * specific than the new ire being added,
1302 				 * we need to flush 'cire'; for instance,
1303 				 * when a new interface comes up.
1304 				 */
1305 				if ((V6_MASK_EQ_2(cire->ire_addr_v6,
1306 				    ire->ire_mask_v6, ire->ire_addr_v6) &&
1307 				    (ip_mask_to_plen_v6(&cire->ire_cmask_v6) <=
1308 				    ire->ire_masklen))) {
1309 					ire_delete(cire);
1310 					continue;
1311 				}
1312 				/*
1313 				 * This is the case when the ire_gateway_addr
1314 				 * of 'cire' belongs to the same subnet as
1315 				 * the new ire being added.
1316 				 * Flushing such ires is sometimes required to
1317 				 * avoid misrouting: say we have a machine with
1318 				 * two interfaces (I1 and I2), a default router
1319 				 * R on the I1 subnet, and a host route to an
1320 				 * off-link destination D with a gateway G on
1321 				 * the I2 subnet.
1322 				 * Under normal operation, we will have an
1323 				 * on-link cache entry for G and an off-link
1324 				 * cache entry for D with G as ire_gateway_addr,
1325 				 * traffic to D will reach its destination
1326 				 * through gateway G.
1327 				 * If the administrator does 'ifconfig I2 down',
1328 				 * the cache entries for D and G will be
1329 				 * flushed. However, G will now be resolved as
1330 				 * an off-link destination using R (the default
1331 				 * router) as gateway. Then D will also be
1332 				 * resolved as an off-link destination using G
1333 				 * as gateway - this behavior is due to
1334 				 * compatibility reasons, see comment in
1335 				 * ire_ihandle_lookup_offlink(). Traffic to D
1336 				 * will go to the router R and probably won't
1337 				 * reach the destination.
1338 				 * The administrator then does 'ifconfig I2 up'.
1339 				 * Since G is on the I2 subnet, this routine
1340 				 * will flush its cache entry. It must also
1341 				 * flush the cache entry for D, otherwise
1342 				 * traffic will stay misrouted until the IRE
1343 				 * times out.
1344 				 */
1345 				if (V6_MASK_EQ_2(cire->ire_gateway_addr_v6,
1346 				    ire->ire_mask_v6, ire->ire_addr_v6)) {
1347 					ire_delete(cire);
1348 					continue;
1349 				}
1350 			}
1351 			IRB_REFRELE(irb);
1352 		}
1353 	} else {
1354 		/*
1355 		 * delete the cache entries based on
1356 		 * handle in the IRE as this IRE is
1357 		 * being deleted/changed.
1358 		 */
1359 		for (i = 0; i < ip6_cache_table_size; i++) {
1360 			irb = &ip_cache_table_v6[i];
1361 			if ((cire = irb->irb_ire) == NULL)
1362 				continue;
1363 			IRB_REFHOLD(irb);
1364 			for (cire = irb->irb_ire; cire != NULL;
1365 			    cire = cire->ire_next) {
1366 				if (cire->ire_type != IRE_CACHE)
1367 					continue;
1368 				if ((cire->ire_phandle == 0 ||
1369 				    cire->ire_phandle != ire->ire_phandle) &&
1370 				    (cire->ire_ihandle == 0 ||
1371 				    cire->ire_ihandle != ire->ire_ihandle))
1372 					continue;
1373 				ire_delete(cire);
1374 			}
1375 			IRB_REFRELE(irb);
1376 		}
1377 	}
1378 }
1379 
1380 /*
1381  * Matches the arguments passed with the values in the ire.
1382  *
1383  * Note: for match types that match using "ipif" passed in, ipif
1384  * must be checked for non-NULL before calling this routine.
1385  */
1386 static boolean_t
1387 ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask,
1388     const in6_addr_t *gateway, int type, ipif_t *ipif, zoneid_t zoneid,
1389     uint32_t ihandle, int match_flags)
1390 {
1391 	in6_addr_t masked_addr;
1392 	in6_addr_t gw_addr_v6;
1393 	ill_t *ire_ill = NULL, *dst_ill;
1394 	ill_t *ipif_ill = NULL;
1395 	ill_group_t *ire_ill_group = NULL;
1396 	ill_group_t *ipif_ill_group = NULL;
1397 	ipif_t	*src_ipif;
1398 
1399 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
1400 	ASSERT(addr != NULL);
1401 	ASSERT(mask != NULL);
1402 	ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL);
1403 	ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP))) ||
1404 	    (ipif != NULL && ipif->ipif_isv6));
1405 	ASSERT(!(match_flags & MATCH_IRE_WQ));
1406 
1407 	/*
1408 	 * HIDDEN cache entries have to be looked up specifically with
1409 	 * MATCH_IRE_MARK_HIDDEN. MATCH_IRE_MARK_HIDDEN is usually set
1410 	 * when the interface is FAILED or INACTIVE. In that case,
1411 	 * any IRE_CACHES that exists should be marked with
1412 	 * IRE_MARK_HIDDEN. So, we don't really need to match below
1413 	 * for IRE_MARK_HIDDEN. But we do so for consistency.
1414 	 */
1415 	if (!(match_flags & MATCH_IRE_MARK_HIDDEN) &&
1416 	    (ire->ire_marks & IRE_MARK_HIDDEN))
1417 		return (B_FALSE);
1418 
1419 	if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid) {
1420 		/*
1421 		 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid is
1422 		 * valid and does not match that of ire_zoneid, a failure to
1423 		 * match is reported at this point. Otherwise, since some IREs
1424 		 * that are available in the global zone can be used in local
1425 		 * zones, additional checks need to be performed:
1426 		 *
1427 		 *	IRE_CACHE and IRE_LOOPBACK entries should
1428 		 *	never be matched in this situation.
1429 		 *
1430 		 *	IRE entries that have an interface associated with them
1431 		 *	should in general not match unless they are an IRE_LOCAL
1432 		 *	or in the case when MATCH_IRE_DEFAULT has been set in
1433 		 *	the caller.  In the case of the former, checking of the
1434 		 *	other fields supplied should take place.
1435 		 *
1436 		 *	In the case where MATCH_IRE_DEFAULT has been set,
1437 		 *	all of the ipif's associated with the IRE's ill are
1438 		 *	checked to see if there is a matching zoneid.  If any
1439 		 *	one ipif has a matching zoneid, this IRE is a
1440 		 *	potential candidate so checking of the other fields
1441 		 *	takes place.
1442 		 *
1443 		 *	In the case where the IRE_INTERFACE has a usable source
1444 		 *	address (indicated by ill_usesrc_ifindex) in the
1445 		 *	correct zone then it's permitted to return this IRE
1446 		 */
1447 		if (match_flags & MATCH_IRE_ZONEONLY)
1448 			return (B_FALSE);
1449 		if (ire->ire_type & (IRE_CACHE | IRE_LOOPBACK))
1450 			return (B_FALSE);
1451 		/*
1452 		 * Note, IRE_INTERFACE can have the stq as NULL. For
1453 		 * example, if the default multicast route is tied to
1454 		 * the loopback address.
1455 		 */
1456 		if ((ire->ire_type & IRE_INTERFACE) &&
1457 		    (ire->ire_stq != NULL)) {
1458 			dst_ill = (ill_t *)ire->ire_stq->q_ptr;
1459 			/*
1460 			 * If there is a usable source address in the
1461 			 * zone, then it's ok to return an
1462 			 * IRE_INTERFACE
1463 			 */
1464 			if ((dst_ill->ill_usesrc_ifindex != 0) &&
1465 			    (src_ipif = ipif_select_source_v6(dst_ill, addr,
1466 			    B_FALSE, IPV6_PREFER_SRC_DEFAULT, zoneid))
1467 			    != NULL) {
1468 				ip3dbg(("ire_match_args: src_ipif %p"
1469 				    " dst_ill %p", (void *)src_ipif,
1470 				    (void *)dst_ill));
1471 				ipif_refrele(src_ipif);
1472 			} else {
1473 				ip3dbg(("ire_match_args: src_ipif NULL"
1474 				    " dst_ill %p\n", (void *)dst_ill));
1475 				return (B_FALSE);
1476 			}
1477 		}
1478 		if (ire->ire_ipif != NULL && ire->ire_type != IRE_LOCAL &&
1479 		    !(ire->ire_type & IRE_INTERFACE)) {
1480 			ipif_t	*tipif;
1481 
1482 			if ((match_flags & MATCH_IRE_DEFAULT) == 0)
1483 				return (B_FALSE);
1484 			mutex_enter(&ire->ire_ipif->ipif_ill->ill_lock);
1485 			for (tipif = ire->ire_ipif->ipif_ill->ill_ipif;
1486 			    tipif != NULL; tipif = tipif->ipif_next) {
1487 				if (IPIF_CAN_LOOKUP(tipif) &&
1488 				    (tipif->ipif_flags & IPIF_UP) &&
1489 				    (tipif->ipif_zoneid == zoneid))
1490 					break;
1491 			}
1492 			mutex_exit(&ire->ire_ipif->ipif_ill->ill_lock);
1493 			if (tipif == NULL)
1494 				return (B_FALSE);
1495 		}
1496 	}
1497 
1498 	if (match_flags & MATCH_IRE_GW) {
1499 		mutex_enter(&ire->ire_lock);
1500 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1501 		mutex_exit(&ire->ire_lock);
1502 	}
1503 	/*
1504 	 * For IRE_CACHES, MATCH_IRE_ILL/ILL_GROUP really means that
1505 	 * somebody wants to send out on a particular interface which
1506 	 * is given by ire_stq and hence use ire_stq to derive the ill
1507 	 * value. ire_ipif for IRE_CACHES is just the
1508 	 * means of getting a source address i.e ire_src_addr_v6 =
1509 	 * ire->ire_ipif->ipif_src_addr_v6.
1510 	 */
1511 	if (match_flags & (MATCH_IRE_ILL|MATCH_IRE_ILL_GROUP)) {
1512 		ire_ill = ire_to_ill(ire);
1513 		if (ire_ill != NULL)
1514 			ire_ill_group = ire_ill->ill_group;
1515 		ipif_ill = ipif->ipif_ill;
1516 		ipif_ill_group = ipif_ill->ill_group;
1517 	}
1518 
1519 	/* No ire_addr_v6 bits set past the mask */
1520 	ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6,
1521 	    ire->ire_addr_v6));
1522 	V6_MASK_COPY(*addr, *mask, masked_addr);
1523 
1524 	if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) &&
1525 	    ((!(match_flags & MATCH_IRE_GW)) ||
1526 		IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) &&
1527 	    ((!(match_flags & MATCH_IRE_TYPE)) ||
1528 		(ire->ire_type & type)) &&
1529 	    ((!(match_flags & MATCH_IRE_SRC)) ||
1530 		IN6_ARE_ADDR_EQUAL(&ire->ire_src_addr_v6,
1531 		&ipif->ipif_v6src_addr)) &&
1532 	    ((!(match_flags & MATCH_IRE_IPIF)) ||
1533 		(ire->ire_ipif == ipif)) &&
1534 	    ((!(match_flags & MATCH_IRE_MARK_HIDDEN)) ||
1535 		(ire->ire_type != IRE_CACHE ||
1536 		ire->ire_marks & IRE_MARK_HIDDEN)) &&
1537 	    ((!(match_flags & MATCH_IRE_ILL)) ||
1538 		(ire_ill == ipif_ill)) &&
1539 	    ((!(match_flags & MATCH_IRE_IHANDLE)) ||
1540 		(ire->ire_ihandle == ihandle)) &&
1541 	    ((!(match_flags & MATCH_IRE_ILL_GROUP)) ||
1542 		(ire_ill == ipif_ill) ||
1543 		(ire_ill_group != NULL &&
1544 		ire_ill_group == ipif_ill_group))) {
1545 		/* We found the matched IRE */
1546 		return (B_TRUE);
1547 	}
1548 	return (B_FALSE);
1549 }
1550 
1551 /*
1552  * Lookup for a route in all the tables
1553  */
1554 ire_t *
1555 ire_route_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1556     const in6_addr_t *gateway, int type, ipif_t *ipif, ire_t **pire,
1557     zoneid_t zoneid, int flags)
1558 {
1559 	ire_t *ire = NULL;
1560 
1561 	/*
1562 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1563 	 * MATCH_IRE_ILL is set.
1564 	 */
1565 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
1566 	    (ipif == NULL))
1567 		return (NULL);
1568 
1569 	/*
1570 	 * might be asking for a cache lookup,
1571 	 * This is not best way to lookup cache,
1572 	 * user should call ire_cache_lookup directly.
1573 	 *
1574 	 * If MATCH_IRE_TYPE was set, first lookup in the cache table and then
1575 	 * in the forwarding table, if the applicable type flags were set.
1576 	 */
1577 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_CACHETABLE) != 0) {
1578 		ire = ire_ctable_lookup_v6(addr, gateway, type, ipif, zoneid,
1579 		    flags);
1580 		if (ire != NULL)
1581 			return (ire);
1582 	}
1583 	if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) {
1584 		ire = ire_ftable_lookup_v6(addr, mask, gateway, type, ipif,
1585 		    pire, zoneid, 0, flags);
1586 	}
1587 	return (ire);
1588 }
1589 
1590 /*
1591  * Lookup a route in forwarding table.
1592  * specific lookup is indicated by passing the
1593  * required parameters and indicating the
1594  * match required in flag field.
1595  *
1596  * Looking for default route can be done in three ways
1597  * 1) pass mask as ipv6_all_zeros and set MATCH_IRE_MASK in flags field
1598  *    along with other matches.
1599  * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags
1600  *    field along with other matches.
1601  * 3) if the destination and mask are passed as zeros.
1602  *
1603  * A request to return a default route if no route
1604  * is found, can be specified by setting MATCH_IRE_DEFAULT
1605  * in flags.
1606  *
1607  * It does not support recursion more than one level. It
1608  * will do recursive lookup only when the lookup maps to
1609  * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed.
1610  *
1611  * If the routing table is setup to allow more than one level
1612  * of recursion, the cleaning up cache table will not work resulting
1613  * in invalid routing.
1614  *
1615  * Supports link-local addresses by following the ipif/ill when recursing.
1616  *
1617  * NOTE : When this function returns NULL, pire has already been released.
1618  *	  pire is valid only when this function successfully returns an
1619  *	  ire.
1620  */
1621 ire_t *
1622 ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1623     const in6_addr_t *gateway, int type, ipif_t *ipif, ire_t **pire,
1624     zoneid_t zoneid, uint32_t ihandle, int flags)
1625 {
1626 	irb_t *irb_ptr;
1627 	ire_t	*rire;
1628 	ire_t *ire = NULL;
1629 	ire_t	*saved_ire;
1630 	nce_t	*nce;
1631 	int i;
1632 	in6_addr_t gw_addr_v6;
1633 
1634 	ASSERT(addr != NULL);
1635 	ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL);
1636 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
1637 	ASSERT(ipif == NULL || ipif->ipif_isv6);
1638 	ASSERT(!(flags & MATCH_IRE_WQ));
1639 
1640 	/*
1641 	 * When we return NULL from this function, we should make
1642 	 * sure that *pire is NULL so that the callers will not
1643 	 * wrongly REFRELE the pire.
1644 	 */
1645 	if (pire != NULL)
1646 		*pire = NULL;
1647 	/*
1648 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
1649 	 * MATCH_IRE_ILL is set.
1650 	 */
1651 	if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
1652 	    (ipif == NULL))
1653 		return (NULL);
1654 
1655 	/*
1656 	 * If the mask is known, the lookup
1657 	 * is simple, if the mask is not known
1658 	 * we need to search.
1659 	 */
1660 	if (flags & MATCH_IRE_MASK) {
1661 		uint_t masklen;
1662 
1663 		masklen = ip_mask_to_plen_v6(mask);
1664 		if (ip_forwarding_table_v6[masklen] == NULL)
1665 			return (NULL);
1666 		irb_ptr = &(ip_forwarding_table_v6[masklen][
1667 		    IRE_ADDR_MASK_HASH_V6(*addr, *mask, ip6_ftable_hash_size)]);
1668 		rw_enter(&irb_ptr->irb_lock, RW_READER);
1669 		for (ire = irb_ptr->irb_ire; ire != NULL;
1670 		    ire = ire->ire_next) {
1671 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
1672 				continue;
1673 			if (ire_match_args_v6(ire, addr, mask, gateway, type,
1674 			    ipif, zoneid, ihandle, flags))
1675 				goto found_ire;
1676 		}
1677 		rw_exit(&irb_ptr->irb_lock);
1678 	} else {
1679 		/*
1680 		 * In this case we don't know the mask, we need to
1681 		 * search the table assuming different mask sizes.
1682 		 * we start with 128 bit mask, we don't allow default here.
1683 		 */
1684 		for (i = (IP6_MASK_TABLE_SIZE - 1); i > 0; i--) {
1685 			in6_addr_t tmpmask;
1686 
1687 			if ((ip_forwarding_table_v6[i]) == NULL)
1688 				continue;
1689 			(void) ip_plen_to_mask_v6(i, &tmpmask);
1690 			irb_ptr = &ip_forwarding_table_v6[i][
1691 			    IRE_ADDR_MASK_HASH_V6(*addr, tmpmask,
1692 			    ip6_ftable_hash_size)];
1693 			rw_enter(&irb_ptr->irb_lock, RW_READER);
1694 			for (ire = irb_ptr->irb_ire; ire != NULL;
1695 			    ire = ire->ire_next) {
1696 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1697 					continue;
1698 				if (ire_match_args_v6(ire, addr,
1699 				    &ire->ire_mask_v6, gateway, type, ipif,
1700 				    zoneid, ihandle, flags))
1701 					goto found_ire;
1702 			}
1703 			rw_exit(&irb_ptr->irb_lock);
1704 		}
1705 	}
1706 
1707 	/*
1708 	 * We come here if no route has yet been found.
1709 	 *
1710 	 * Handle the case where default route is
1711 	 * requested by specifying type as one of the possible
1712 	 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE).
1713 	 *
1714 	 * If MATCH_IRE_MASK is specified, then the appropriate default route
1715 	 * would have been found above if it exists so it isn't looked up here.
1716 	 * If MATCH_IRE_DEFAULT was also specified, then a default route will be
1717 	 * searched for later.
1718 	 */
1719 	if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE &&
1720 	    (type & (IRE_DEFAULT | IRE_INTERFACE))) {
1721 		if (ip_forwarding_table_v6[0] != NULL) {
1722 			/* addr & mask is zero for defaults */
1723 			irb_ptr = &ip_forwarding_table_v6[0][
1724 			    IRE_ADDR_HASH_V6(ipv6_all_zeros,
1725 			    ip6_ftable_hash_size)];
1726 			rw_enter(&irb_ptr->irb_lock, RW_READER);
1727 			for (ire = irb_ptr->irb_ire; ire != NULL;
1728 			    ire = ire->ire_next) {
1729 
1730 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1731 					continue;
1732 
1733 				if (ire_match_args_v6(ire, addr,
1734 				    &ipv6_all_zeros, gateway, type, ipif,
1735 				    zoneid, ihandle, flags))
1736 					goto found_ire;
1737 			}
1738 			rw_exit(&irb_ptr->irb_lock);
1739 		}
1740 	}
1741 	/*
1742 	 * We come here only if no route is found.
1743 	 * see if the default route can be used which is allowed
1744 	 * only if the default matching criteria is specified.
1745 	 * The ipv6_ire_default_count tracks the number of IRE_DEFAULT
1746 	 * entries. However, the ip_forwarding_table_v6[0] also contains
1747 	 * interface routes thus the count can be zero.
1748 	 */
1749 	saved_ire = NULL;
1750 	if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) ==
1751 	    MATCH_IRE_DEFAULT) {
1752 		ire_t	*ire_origin;
1753 		uint_t	g_index;
1754 		uint_t	index;
1755 
1756 		if (ip_forwarding_table_v6[0] == NULL)
1757 			return (NULL);
1758 		irb_ptr = &(ip_forwarding_table_v6[0])[0];
1759 
1760 		/*
1761 		 * Keep a tab on the bucket while looking the IRE_DEFAULT
1762 		 * entries. We need to keep track of a particular IRE
1763 		 * (ire_origin) so this ensures that it will not be unlinked
1764 		 * from the hash list during the recursive lookup below.
1765 		 */
1766 		IRB_REFHOLD(irb_ptr);
1767 		ire = irb_ptr->irb_ire;
1768 		if (ire == NULL) {
1769 			IRB_REFRELE(irb_ptr);
1770 			return (NULL);
1771 		}
1772 
1773 		/*
1774 		 * Get the index first, since it can be changed by other
1775 		 * threads. Then get to the right default route skipping
1776 		 * default interface routes if any. As we hold a reference on
1777 		 * the IRE bucket, ipv6_ire_default_count can only increase so
1778 		 * we can't reach the end of the hash list unexpectedly.
1779 		 */
1780 		if (ipv6_ire_default_count != 0) {
1781 			g_index = ipv6_ire_default_index++;
1782 			index = g_index % ipv6_ire_default_count;
1783 			while (index != 0) {
1784 				if (!(ire->ire_type & IRE_INTERFACE))
1785 					index--;
1786 				ire = ire->ire_next;
1787 			}
1788 			ASSERT(ire != NULL);
1789 		} else {
1790 			/*
1791 			 * No default route, so we only have default interface
1792 			 * routes: don't enter the first loop.
1793 			 */
1794 			ire = NULL;
1795 		}
1796 
1797 		/*
1798 		 * Round-robin the default routers list looking for a neighbor
1799 		 * that matches the passed in parameters and is reachable.  If
1800 		 * none found, just return a route from the default router list
1801 		 * if it exists. If we can't find a default route (IRE_DEFAULT),
1802 		 * look for interface default routes.
1803 		 * We start with the ire we found above and we walk the hash
1804 		 * list until we're back where we started, see
1805 		 * ire_get_next_default_ire(). It doesn't matter if default
1806 		 * routes are added or deleted by other threads - we know this
1807 		 * ire will stay in the list because we hold a reference on the
1808 		 * ire bucket.
1809 		 * NB: if we only have interface default routes, ire is NULL so
1810 		 * we don't even enter this loop (see above).
1811 		 */
1812 		ire_origin = ire;
1813 		for (; ire != NULL;
1814 		    ire = ire_get_next_default_ire(ire, ire_origin)) {
1815 
1816 			if (ire_match_args_v6(ire, addr,
1817 			    &ipv6_all_zeros, gateway, type, ipif,
1818 			    zoneid, ihandle, flags)) {
1819 				int match_flags;
1820 
1821 				/*
1822 				 * We have something to work with.
1823 				 * If we can find a resolved/reachable
1824 				 * entry, we will use this. Otherwise
1825 				 * we'll try to find an entry that has
1826 				 * a resolved cache entry. We will fallback
1827 				 * on this if we don't find anything else.
1828 				 */
1829 				if (saved_ire == NULL)
1830 					saved_ire = ire;
1831 				mutex_enter(&ire->ire_lock);
1832 				gw_addr_v6 = ire->ire_gateway_addr_v6;
1833 				mutex_exit(&ire->ire_lock);
1834 				match_flags = MATCH_IRE_ILL_GROUP;
1835 				rire = ire_ctable_lookup_v6(&gw_addr_v6, NULL,
1836 				    0, ire->ire_ipif, zoneid, match_flags);
1837 				if (rire != NULL) {
1838 					nce = rire->ire_nce;
1839 					if (nce != NULL &&
1840 					    NCE_ISREACHABLE(nce) &&
1841 					    nce->nce_flags & NCE_F_ISROUTER) {
1842 						ire_refrele(rire);
1843 						IRE_REFHOLD(ire);
1844 						IRB_REFRELE(irb_ptr);
1845 						goto found_ire_held;
1846 					} else if (nce != NULL &&
1847 					    !(nce->nce_flags &
1848 					    NCE_F_ISROUTER)) {
1849 						/*
1850 						 * Make sure we don't use
1851 						 * this ire
1852 						 */
1853 						if (saved_ire == ire)
1854 							saved_ire = NULL;
1855 					}
1856 					ire_refrele(rire);
1857 				} else if (ipv6_ire_default_count > 1 &&
1858 				    zoneid != ALL_ZONES) {
1859 					/*
1860 					 * When we're in a local zone, we're
1861 					 * only interested in default routers
1862 					 * that are reachable through ipifs
1863 					 * within our zone.
1864 					 * The potentially expensive call to
1865 					 * ire_route_lookup_v6() is avoided when
1866 					 * we have only one default route.
1867 					 */
1868 					rire = ire_route_lookup_v6(&gw_addr_v6,
1869 					    NULL, NULL, 0, ire->ire_ipif, NULL,
1870 					    zoneid, match_flags);
1871 					if (rire != NULL) {
1872 						ire_refrele(rire);
1873 						saved_ire = ire;
1874 					} else if (saved_ire == ire) {
1875 						/*
1876 						 * Make sure we don't use
1877 						 * this ire
1878 						 */
1879 						saved_ire = NULL;
1880 					}
1881 				}
1882 			}
1883 		}
1884 		if (saved_ire != NULL) {
1885 			ire = saved_ire;
1886 			IRE_REFHOLD(ire);
1887 			IRB_REFRELE(irb_ptr);
1888 			goto found_ire_held;
1889 		} else {
1890 			/*
1891 			 * Look for a interface default route matching the
1892 			 * args passed in. No round robin here. Just pick
1893 			 * the right one.
1894 			 */
1895 			for (ire = irb_ptr->irb_ire; ire != NULL;
1896 			    ire = ire->ire_next) {
1897 
1898 				if (!(ire->ire_type & IRE_INTERFACE))
1899 					continue;
1900 
1901 				if (ire->ire_marks & IRE_MARK_CONDEMNED)
1902 					continue;
1903 
1904 				if (ire_match_args_v6(ire, addr,
1905 				    &ipv6_all_zeros, gateway, type, ipif,
1906 				    zoneid, ihandle, flags)) {
1907 					IRE_REFHOLD(ire);
1908 					IRB_REFRELE(irb_ptr);
1909 					goto found_ire_held;
1910 				}
1911 			}
1912 			IRB_REFRELE(irb_ptr);
1913 		}
1914 	}
1915 	ASSERT(ire == NULL);
1916 	ip1dbg(("ire_ftable_lookup_v6: returning NULL ire"));
1917 	return (NULL);
1918 found_ire:
1919 	ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0);
1920 	IRE_REFHOLD(ire);
1921 	rw_exit(&irb_ptr->irb_lock);
1922 
1923 found_ire_held:
1924 	if ((flags & MATCH_IRE_RJ_BHOLE) &&
1925 	    (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) {
1926 		return (ire);
1927 	}
1928 	/*
1929 	 * At this point, IRE that was found must be an IRE_FORWARDTABLE
1930 	 * or IRE_CACHETABLE type.  If this is a recursive lookup and an
1931 	 * IRE_INTERFACE type was found, return that.  If it was some other
1932 	 * IRE_FORWARDTABLE type of IRE (one of the prefix types), then it
1933 	 * is necessary to fill in the  parent IRE pointed to by pire, and
1934 	 * then lookup the gateway address of  the parent.  For backwards
1935 	 * compatiblity, if this lookup returns an
1936 	 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level
1937 	 * of lookup is done.
1938 	 */
1939 	if (flags & MATCH_IRE_RECURSIVE) {
1940 		ipif_t	*gw_ipif;
1941 		int match_flags = MATCH_IRE_DSTONLY;
1942 
1943 		if (ire->ire_type & IRE_INTERFACE)
1944 			return (ire);
1945 		if (pire != NULL)
1946 			*pire = ire;
1947 		/*
1948 		 * If we can't find an IRE_INTERFACE or the caller has not
1949 		 * asked for pire, we need to REFRELE the saved_ire.
1950 		 */
1951 		saved_ire = ire;
1952 
1953 		/*
1954 		 * Currently MATCH_IRE_ILL is never used with
1955 		 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while
1956 		 * sending out packets as MATCH_IRE_ILL is used only
1957 		 * for communicating with on-link hosts. We can't assert
1958 		 * that here as RTM_GET calls this function with
1959 		 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE.
1960 		 * We have already used the MATCH_IRE_ILL in determining
1961 		 * the right prefix route at this point. To match the
1962 		 * behavior of how we locate routes while sending out
1963 		 * packets, we don't want to use MATCH_IRE_ILL below
1964 		 * while locating the interface route.
1965 		 */
1966 		if (ire->ire_ipif != NULL)
1967 			match_flags |= MATCH_IRE_ILL_GROUP;
1968 
1969 		mutex_enter(&ire->ire_lock);
1970 		gw_addr_v6 = ire->ire_gateway_addr_v6;
1971 		mutex_exit(&ire->ire_lock);
1972 
1973 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL, 0,
1974 		    ire->ire_ipif, NULL, zoneid, match_flags);
1975 		if (ire == NULL) {
1976 			/*
1977 			 * In this case we have to deal with the
1978 			 * MATCH_IRE_PARENT flag, which means the
1979 			 * parent has to be returned if ire is NULL.
1980 			 * The aim of this is to have (at least) a starting
1981 			 * ire when we want to look at all of the ires in a
1982 			 * bucket aimed at a single destination (as is the
1983 			 * case in ip_newroute_v6 for the RTF_MULTIRT
1984 			 * flagged routes).
1985 			 */
1986 			if (flags & MATCH_IRE_PARENT) {
1987 				if (pire != NULL) {
1988 					/*
1989 					 * Need an extra REFHOLD, if the
1990 					 * parent ire is returned via both
1991 					 * ire and pire.
1992 					 */
1993 					IRE_REFHOLD(saved_ire);
1994 				}
1995 				ire = saved_ire;
1996 			} else {
1997 				ire_refrele(saved_ire);
1998 				if (pire != NULL)
1999 					*pire = NULL;
2000 			}
2001 			return (ire);
2002 		}
2003 		if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) {
2004 			/*
2005 			 * If the caller did not ask for pire, release
2006 			 * it now.
2007 			 */
2008 			if (pire == NULL) {
2009 				ire_refrele(saved_ire);
2010 			}
2011 			return (ire);
2012 		}
2013 		match_flags |= MATCH_IRE_TYPE;
2014 		mutex_enter(&ire->ire_lock);
2015 		gw_addr_v6 = ire->ire_gateway_addr_v6;
2016 		mutex_exit(&ire->ire_lock);
2017 		gw_ipif = ire->ire_ipif;
2018 		ire_refrele(ire);
2019 		ire = ire_route_lookup_v6(&gw_addr_v6, NULL, NULL,
2020 		    (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid,
2021 		    match_flags);
2022 		if (ire == NULL) {
2023 			/*
2024 			 * In this case we have to deal with the
2025 			 * MATCH_IRE_PARENT flag, which means the
2026 			 * parent has to be returned if ire is NULL.
2027 			 * The aim of this is to have (at least) a starting
2028 			 * ire when we want to look at all of the ires in a
2029 			 * bucket aimed at a single destination (as is the
2030 			 * case in ip_newroute_v6 for the RTF_MULTIRT
2031 			 * flagged routes).
2032 			 */
2033 			if (flags & MATCH_IRE_PARENT) {
2034 				if (pire != NULL) {
2035 					/*
2036 					 * Need an extra REFHOLD, if the
2037 					 * parent ire is returned via both
2038 					 * ire and pire.
2039 					 */
2040 					IRE_REFHOLD(saved_ire);
2041 				}
2042 				ire = saved_ire;
2043 			} else {
2044 				ire_refrele(saved_ire);
2045 				if (pire != NULL)
2046 					*pire = NULL;
2047 			}
2048 			return (ire);
2049 		} else if (pire == NULL) {
2050 			/*
2051 			 * If the caller did not ask for pire, release
2052 			 * it now.
2053 			 */
2054 			ire_refrele(saved_ire);
2055 		}
2056 		return (ire);
2057 	}
2058 
2059 	ASSERT(pire == NULL || *pire == NULL);
2060 	return (ire);
2061 }
2062 
2063 /*
2064  * Looks up cache table for a route.
2065  * specific lookup can be indicated by
2066  * passing the MATCH_* flags and the
2067  * necessary parameters.
2068  */
2069 ire_t *
2070 ire_ctable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *gateway,
2071     int type, ipif_t *ipif, zoneid_t zoneid, int flags)
2072 {
2073 	ire_t *ire;
2074 	irb_t *irb_ptr;
2075 	ASSERT(addr != NULL);
2076 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
2077 
2078 	/*
2079 	 * ire_match_args_v6() will dereference ipif MATCH_IRE_SRC or
2080 	 * MATCH_IRE_ILL is set.
2081 	 */
2082 	if ((flags & (MATCH_IRE_SRC |  MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
2083 	    (ipif == NULL))
2084 		return (NULL);
2085 
2086 	irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
2087 	    ip6_cache_table_size)];
2088 	rw_enter(&irb_ptr->irb_lock, RW_READER);
2089 	for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) {
2090 		if (ire->ire_marks & IRE_MARK_CONDEMNED)
2091 			continue;
2092 
2093 		ASSERT(IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, &ipv6_all_ones));
2094 		if (ire_match_args_v6(ire, addr, &ire->ire_mask_v6, gateway,
2095 		    type, ipif, zoneid, 0, flags)) {
2096 			IRE_REFHOLD(ire);
2097 			rw_exit(&irb_ptr->irb_lock);
2098 			return (ire);
2099 		}
2100 	}
2101 	rw_exit(&irb_ptr->irb_lock);
2102 	return (NULL);
2103 }
2104 
2105 /*
2106  * Lookup cache. Don't return IRE_MARK_HIDDEN entries. Callers
2107  * should use ire_ctable_lookup with MATCH_IRE_MARK_HIDDEN to get
2108  * to the hidden ones.
2109  */
2110 ire_t *
2111 ire_cache_lookup_v6(const in6_addr_t *addr, zoneid_t zoneid)
2112 {
2113 	irb_t *irb_ptr;
2114 	ire_t *ire;
2115 
2116 	irb_ptr = &ip_cache_table_v6[IRE_ADDR_HASH_V6(*addr,
2117 	    ip6_cache_table_size)];
2118 	rw_enter(&irb_ptr->irb_lock, RW_READER);
2119 	for (ire = irb_ptr->irb_ire; ire; ire = ire->ire_next) {
2120 		if (ire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN))
2121 			continue;
2122 		if (IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, addr)) {
2123 			if (zoneid == ALL_ZONES || ire->ire_zoneid == zoneid ||
2124 			    ire->ire_type == IRE_LOCAL) {
2125 				IRE_REFHOLD(ire);
2126 				rw_exit(&irb_ptr->irb_lock);
2127 				return (ire);
2128 			}
2129 		}
2130 	}
2131 	rw_exit(&irb_ptr->irb_lock);
2132 	return (NULL);
2133 }
2134 
2135 /*
2136  * Locate the interface ire that is tied to the cache ire 'cire' via
2137  * cire->ire_ihandle.
2138  *
2139  * We are trying to create the cache ire for an onlink destn. or
2140  * gateway in 'cire'. We are called from ire_add_v6() in the IRE_IF_RESOLVER
2141  * case for xresolv interfaces, after the ire has come back from
2142  * an external resolver.
2143  */
2144 static ire_t *
2145 ire_ihandle_lookup_onlink_v6(ire_t *cire)
2146 {
2147 	ire_t	*ire;
2148 	int	match_flags;
2149 	int	i;
2150 	int	j;
2151 	irb_t	*irb_ptr;
2152 
2153 	ASSERT(cire != NULL);
2154 
2155 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
2156 	/*
2157 	 * We know that the mask of the interface ire equals cire->ire_cmask.
2158 	 * (When ip_newroute_v6() created 'cire' for an on-link destn.
2159 	 * it set its cmask from the interface ire's mask)
2160 	 */
2161 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6,
2162 	    NULL, IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle,
2163 	    match_flags);
2164 	if (ire != NULL)
2165 		return (ire);
2166 	/*
2167 	 * If we didn't find an interface ire above, we can't declare failure.
2168 	 * For backwards compatibility, we need to support prefix routes
2169 	 * pointing to next hop gateways that are not on-link.
2170 	 *
2171 	 * In the resolver/noresolver case, ip_newroute_v6() thinks
2172 	 * it is creating the cache ire for an onlink destination in 'cire'.
2173 	 * But 'cire' is not actually onlink, because ire_ftable_lookup_v6()
2174 	 * cheated it, by doing ire_route_lookup_v6() twice and returning an
2175 	 * interface ire.
2176 	 *
2177 	 * Eg. default	-	gw1			(line 1)
2178 	 *	gw1	-	gw2			(line 2)
2179 	 *	gw2	-	hme0			(line 3)
2180 	 *
2181 	 * In the above example, ip_newroute_v6() tried to create the cache ire
2182 	 * 'cire' for gw1, based on the interface route in line 3. The
2183 	 * ire_ftable_lookup_v6() above fails, because there is
2184 	 * no interface route to reach gw1. (it is gw2). We fall thru below.
2185 	 *
2186 	 * Do a brute force search based on the ihandle in a subset of the
2187 	 * forwarding tables, corresponding to cire->ire_cmask_v6. Otherwise
2188 	 * things become very complex, since we don't have 'pire' in this
2189 	 * case. (Also note that this method is not possible in the offlink
2190 	 * case because we don't know the mask)
2191 	 */
2192 	i = ip_mask_to_plen_v6(&cire->ire_cmask_v6);
2193 	if ((ip_forwarding_table_v6[i]) == NULL)
2194 		return (NULL);
2195 	for (j = 0; j < ip6_ftable_hash_size; j++) {
2196 		irb_ptr = &ip_forwarding_table_v6[i][j];
2197 		rw_enter(&irb_ptr->irb_lock, RW_READER);
2198 		for (ire = irb_ptr->irb_ire; ire != NULL;
2199 		    ire = ire->ire_next) {
2200 			if (ire->ire_marks & IRE_MARK_CONDEMNED)
2201 				continue;
2202 			if ((ire->ire_type & IRE_INTERFACE) &&
2203 			    (ire->ire_ihandle == cire->ire_ihandle)) {
2204 				IRE_REFHOLD(ire);
2205 				rw_exit(&irb_ptr->irb_lock);
2206 				return (ire);
2207 			}
2208 		}
2209 		rw_exit(&irb_ptr->irb_lock);
2210 	}
2211 	return (NULL);
2212 }
2213 
2214 
2215 /*
2216  * Locate the interface ire that is tied to the cache ire 'cire' via
2217  * cire->ire_ihandle.
2218  *
2219  * We are trying to create the cache ire for an offlink destn based
2220  * on the cache ire of the gateway in 'cire'. 'pire' is the prefix ire
2221  * as found by ip_newroute_v6(). We are called from ip_newroute_v6() in
2222  * the IRE_CACHE case.
2223  */
2224 ire_t *
2225 ire_ihandle_lookup_offlink_v6(ire_t *cire, ire_t *pire)
2226 {
2227 	ire_t	*ire;
2228 	int	match_flags;
2229 	in6_addr_t	gw_addr;
2230 	ipif_t		*gw_ipif;
2231 
2232 	ASSERT(cire != NULL && pire != NULL);
2233 
2234 	match_flags =  MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
2235 	/*
2236 	 * ip_newroute_v6 calls ire_ftable_lookup with MATCH_IRE_ILL only
2237 	 * for on-link hosts. We should never be here for onlink.
2238 	 * Thus, use MATCH_IRE_ILL_GROUP.
2239 	 */
2240 	if (pire->ire_ipif != NULL)
2241 		match_flags |= MATCH_IRE_ILL_GROUP;
2242 	/*
2243 	 * We know that the mask of the interface ire equals cire->ire_cmask.
2244 	 * (When ip_newroute_v6() created 'cire' for an on-link destn. it set
2245 	 * its cmask from the interface ire's mask)
2246 	 */
2247 	ire = ire_ftable_lookup_v6(&cire->ire_addr_v6, &cire->ire_cmask_v6, 0,
2248 	    IRE_INTERFACE, pire->ire_ipif, NULL, ALL_ZONES, cire->ire_ihandle,
2249 	    match_flags);
2250 	if (ire != NULL)
2251 		return (ire);
2252 	/*
2253 	 * If we didn't find an interface ire above, we can't declare failure.
2254 	 * For backwards compatibility, we need to support prefix routes
2255 	 * pointing to next hop gateways that are not on-link.
2256 	 *
2257 	 * Assume we are trying to ping some offlink destn, and we have the
2258 	 * routing table below.
2259 	 *
2260 	 * Eg.	default	- gw1		<--- pire	(line 1)
2261 	 *	gw1	- gw2				(line 2)
2262 	 *	gw2	- hme0				(line 3)
2263 	 *
2264 	 * If we already have a cache ire for gw1 in 'cire', the
2265 	 * ire_ftable_lookup_v6 above would have failed, since there is no
2266 	 * interface ire to reach gw1. We will fallthru below.
2267 	 *
2268 	 * Here we duplicate the steps that ire_ftable_lookup_v6() did in
2269 	 * getting 'cire' from 'pire', in the MATCH_IRE_RECURSIVE case.
2270 	 * The differences are the following
2271 	 * i.   We want the interface ire only, so we call
2272 	 *	ire_ftable_lookup_v6() instead of ire_route_lookup_v6()
2273 	 * ii.  We look for only prefix routes in the 1st call below.
2274 	 * ii.  We want to match on the ihandle in the 2nd call below.
2275 	 */
2276 	match_flags =  MATCH_IRE_TYPE;
2277 	if (pire->ire_ipif != NULL)
2278 		match_flags |= MATCH_IRE_ILL_GROUP;
2279 
2280 	mutex_enter(&pire->ire_lock);
2281 	gw_addr = pire->ire_gateway_addr_v6;
2282 	mutex_exit(&pire->ire_lock);
2283 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_OFFSUBNET,
2284 	    pire->ire_ipif, NULL, ALL_ZONES, 0, match_flags);
2285 	if (ire == NULL)
2286 		return (NULL);
2287 	/*
2288 	 * At this point 'ire' corresponds to the entry shown in line 2.
2289 	 * gw_addr is 'gw2' in the example above.
2290 	 */
2291 	mutex_enter(&ire->ire_lock);
2292 	gw_addr = ire->ire_gateway_addr_v6;
2293 	mutex_exit(&ire->ire_lock);
2294 	gw_ipif = ire->ire_ipif;
2295 	ire_refrele(ire);
2296 
2297 	match_flags |= MATCH_IRE_IHANDLE;
2298 	ire = ire_ftable_lookup_v6(&gw_addr, 0, 0, IRE_INTERFACE,
2299 	    gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, match_flags);
2300 	return (ire);
2301 }
2302 
2303 /*
2304  * Return the IRE_LOOPBACK, IRE_IF_RESOLVER or IRE_IF_NORESOLVER
2305  * ire associated with the specified ipif.
2306  *
2307  * This might occasionally be called when IPIF_UP is not set since
2308  * the IPV6_MULTICAST_IF as well as creating interface routes
2309  * allows specifying a down ipif (ipif_lookup* match ipifs that are down).
2310  *
2311  * Note that if IPIF_NOLOCAL, IPIF_NOXMIT, or IPIF_DEPRECATED is set on
2312  * the ipif this routine might return NULL.
2313  * (Sometimes called as writer though not required by this function.)
2314  */
2315 ire_t *
2316 ipif_to_ire_v6(ipif_t *ipif)
2317 {
2318 	ire_t	*ire;
2319 
2320 	ASSERT(ipif->ipif_isv6);
2321 	if (ipif->ipif_ire_type == IRE_LOOPBACK) {
2322 		ire = ire_ctable_lookup_v6(&ipif->ipif_v6lcl_addr, NULL,
2323 		    IRE_LOOPBACK, ipif, ALL_ZONES,
2324 		    (MATCH_IRE_TYPE | MATCH_IRE_IPIF));
2325 	} else if (ipif->ipif_flags & IPIF_POINTOPOINT) {
2326 		/* In this case we need to lookup destination address. */
2327 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6pp_dst_addr,
2328 		    &ipv6_all_ones, NULL, IRE_INTERFACE, ipif, NULL, ALL_ZONES,
2329 		    0, (MATCH_IRE_TYPE | MATCH_IRE_IPIF | MATCH_IRE_MASK));
2330 	} else {
2331 		ire = ire_ftable_lookup_v6(&ipif->ipif_v6subnet,
2332 		    &ipif->ipif_v6net_mask, NULL, IRE_INTERFACE, ipif, NULL,
2333 		    ALL_ZONES, 0, (MATCH_IRE_TYPE | MATCH_IRE_IPIF |
2334 		    MATCH_IRE_MASK));
2335 	}
2336 	return (ire);
2337 }
2338 
2339 /*
2340  * Return B_TRUE if a multirt route is resolvable
2341  * (or if no route is resolved yet), B_FALSE otherwise.
2342  * This only works in the global zone.
2343  */
2344 boolean_t
2345 ire_multirt_need_resolve_v6(const in6_addr_t *v6dstp)
2346 {
2347 	ire_t	*first_fire;
2348 	ire_t	*first_cire;
2349 	ire_t	*fire;
2350 	ire_t	*cire;
2351 	irb_t	*firb;
2352 	irb_t	*cirb;
2353 	int	unres_cnt = 0;
2354 	boolean_t resolvable = B_FALSE;
2355 
2356 	/* Retrieve the first IRE_HOST that matches the destination */
2357 	first_fire = ire_ftable_lookup_v6(v6dstp, &ipv6_all_ones, 0, IRE_HOST,
2358 	    NULL, NULL, ALL_ZONES, 0, MATCH_IRE_MASK | MATCH_IRE_TYPE);
2359 
2360 	/* No route at all */
2361 	if (first_fire == NULL) {
2362 		return (B_TRUE);
2363 	}
2364 
2365 	firb = first_fire->ire_bucket;
2366 	ASSERT(firb);
2367 
2368 	/* Retrieve the first IRE_CACHE ire for that destination. */
2369 	first_cire = ire_cache_lookup_v6(v6dstp, GLOBAL_ZONEID);
2370 
2371 	/* No resolved route. */
2372 	if (first_cire == NULL) {
2373 		ire_refrele(first_fire);
2374 		return (B_TRUE);
2375 	}
2376 
2377 	/* At least one route is resolved. */
2378 
2379 	cirb = first_cire->ire_bucket;
2380 	ASSERT(cirb);
2381 
2382 	/* Count the number of routes to that dest that are declared. */
2383 	IRB_REFHOLD(firb);
2384 	for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
2385 		if (!(fire->ire_flags & RTF_MULTIRT))
2386 			continue;
2387 		if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, v6dstp))
2388 			continue;
2389 		unres_cnt++;
2390 	}
2391 	IRB_REFRELE(firb);
2392 
2393 
2394 	/* Then subtract the number of routes to that dst that are resolved */
2395 	IRB_REFHOLD(cirb);
2396 	for (cire = first_cire; cire != NULL; cire = cire->ire_next) {
2397 	    if (!(cire->ire_flags & RTF_MULTIRT))
2398 		continue;
2399 	    if (!IN6_ARE_ADDR_EQUAL(&cire->ire_addr_v6, v6dstp))
2400 		continue;
2401 	    if (cire->ire_marks & (IRE_MARK_CONDEMNED|IRE_MARK_HIDDEN))
2402 		continue;
2403 	    unres_cnt--;
2404 	}
2405 	IRB_REFRELE(cirb);
2406 
2407 	/* At least one route is unresolved; search for a resolvable route. */
2408 	if (unres_cnt > 0)
2409 		resolvable = ire_multirt_lookup_v6(&first_cire, &first_fire,
2410 		    MULTIRT_USESTAMP|MULTIRT_CACHEGW);
2411 
2412 	if (first_fire)
2413 		ire_refrele(first_fire);
2414 
2415 	if (first_cire)
2416 		ire_refrele(first_cire);
2417 
2418 	return (resolvable);
2419 }
2420 
2421 
2422 /*
2423  * Return B_TRUE and update *ire_arg and *fire_arg
2424  * if at least one resolvable route is found.
2425  * Return B_FALSE otherwise (all routes are resolved or
2426  * the remaining unresolved routes are all unresolvable).
2427  * This only works in the global zone.
2428  */
2429 boolean_t
2430 ire_multirt_lookup_v6(ire_t **ire_arg, ire_t **fire_arg, uint32_t flags)
2431 {
2432 	clock_t	delta;
2433 	ire_t	*best_fire = NULL;
2434 	ire_t	*best_cire = NULL;
2435 	ire_t	*first_fire;
2436 	ire_t	*first_cire;
2437 	ire_t	*fire;
2438 	ire_t	*cire;
2439 	irb_t	*firb = NULL;
2440 	irb_t	*cirb = NULL;
2441 	ire_t	*gw_ire;
2442 	boolean_t	already_resolved;
2443 	boolean_t	res;
2444 	in6_addr_t	v6dst;
2445 	in6_addr_t	v6gw;
2446 
2447 	ip2dbg(("ire_multirt_lookup_v6: *ire_arg %p, *fire_arg %p, "
2448 	    "flags %04x\n", (void *)*ire_arg, (void *)*fire_arg, flags));
2449 
2450 	ASSERT(ire_arg);
2451 	ASSERT(fire_arg);
2452 
2453 	/* Not an IRE_HOST ire; give up. */
2454 	if ((*fire_arg == NULL) ||
2455 	    ((*fire_arg)->ire_type != IRE_HOST)) {
2456 		return (B_FALSE);
2457 	}
2458 
2459 	/* This is the first IRE_HOST ire for that destination. */
2460 	first_fire = *fire_arg;
2461 	firb = first_fire->ire_bucket;
2462 	ASSERT(firb);
2463 
2464 	mutex_enter(&first_fire->ire_lock);
2465 	v6dst = first_fire->ire_addr_v6;
2466 	mutex_exit(&first_fire->ire_lock);
2467 
2468 	ip2dbg(("ire_multirt_lookup_v6: dst %08x\n",
2469 	    ntohl(V4_PART_OF_V6(v6dst))));
2470 
2471 	/*
2472 	 * Retrieve the first IRE_CACHE ire for that destination;
2473 	 * if we don't find one, no route for that dest is
2474 	 * resolved yet.
2475 	 */
2476 	first_cire = ire_cache_lookup_v6(&v6dst, GLOBAL_ZONEID);
2477 	if (first_cire) {
2478 		cirb = first_cire->ire_bucket;
2479 	}
2480 
2481 	ip2dbg(("ire_multirt_lookup_v6: first_cire %p\n", (void *)first_cire));
2482 
2483 	/*
2484 	 * Search for a resolvable route, giving the top priority
2485 	 * to routes that can be resolved without any call to the resolver.
2486 	 */
2487 	IRB_REFHOLD(firb);
2488 
2489 	if (!IN6_IS_ADDR_MULTICAST(&v6dst)) {
2490 		/*
2491 		 * For all multiroute IRE_HOST ires for that destination,
2492 		 * check if the route via the IRE_HOST's gateway is
2493 		 * resolved yet.
2494 		 */
2495 		for (fire = first_fire; fire != NULL; fire = fire->ire_next) {
2496 
2497 			if (!(fire->ire_flags & RTF_MULTIRT))
2498 				continue;
2499 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
2500 				continue;
2501 
2502 			mutex_enter(&fire->ire_lock);
2503 			v6gw = fire->ire_gateway_addr_v6;
2504 			mutex_exit(&fire->ire_lock);
2505 
2506 			ip2dbg(("ire_multirt_lookup_v6: fire %p, "
2507 			    "ire_addr %08x, ire_gateway_addr %08x\n",
2508 			    (void *)fire,
2509 			    ntohl(V4_PART_OF_V6(fire->ire_addr_v6)),
2510 			    ntohl(V4_PART_OF_V6(v6gw))));
2511 
2512 			already_resolved = B_FALSE;
2513 
2514 			if (first_cire) {
2515 				ASSERT(cirb);
2516 
2517 				IRB_REFHOLD(cirb);
2518 				/*
2519 				 * For all IRE_CACHE ires for that
2520 				 * destination.
2521 				 */
2522 				for (cire = first_cire;
2523 				    cire != NULL;
2524 				    cire = cire->ire_next) {
2525 
2526 					if (!(cire->ire_flags & RTF_MULTIRT))
2527 						continue;
2528 					if (!IN6_ARE_ADDR_EQUAL(
2529 					    &cire->ire_addr_v6, &v6dst))
2530 						continue;
2531 					if (cire->ire_marks &
2532 					    (IRE_MARK_CONDEMNED|
2533 						IRE_MARK_HIDDEN))
2534 						continue;
2535 					/*
2536 					 * Check if the IRE_CACHE's gateway
2537 					 * matches the IRE_HOST's gateway.
2538 					 */
2539 					if (IN6_ARE_ADDR_EQUAL(
2540 					    &cire->ire_gateway_addr_v6,
2541 					    &v6gw)) {
2542 						already_resolved = B_TRUE;
2543 						break;
2544 					}
2545 				}
2546 				IRB_REFRELE(cirb);
2547 			}
2548 
2549 			/*
2550 			 * This route is already resolved;
2551 			 * proceed with next one.
2552 			 */
2553 			if (already_resolved) {
2554 				ip2dbg(("ire_multirt_lookup_v6: found cire %p, "
2555 				    "already resolved\n", (void *)cire));
2556 				continue;
2557 			}
2558 
2559 			/*
2560 			 * The route is unresolved; is it actually
2561 			 * resolvable, i.e. is there a cache or a resolver
2562 			 * for the gateway?
2563 			 */
2564 			gw_ire = ire_route_lookup_v6(&v6gw, 0, 0, 0, NULL, NULL,
2565 			    ALL_ZONES, MATCH_IRE_RECURSIVE);
2566 
2567 			ip2dbg(("ire_multirt_lookup_v6: looked up gw_ire %p\n",
2568 			    (void *)gw_ire));
2569 
2570 			/*
2571 			 * This route can be resolved without any call to the
2572 			 * resolver; if the MULTIRT_CACHEGW flag is set,
2573 			 * give the top priority to this ire and exit the
2574 			 * loop.
2575 			 * This occurs when an resolver reply is processed
2576 			 * through ip_wput_nondata()
2577 			 */
2578 			if ((flags & MULTIRT_CACHEGW) &&
2579 			    (gw_ire != NULL) &&
2580 			    (gw_ire->ire_type & IRE_CACHETABLE)) {
2581 				/*
2582 				 * Release the resolver associated to the
2583 				 * previous candidate best ire, if any.
2584 				 */
2585 				if (best_cire) {
2586 					ire_refrele(best_cire);
2587 					ASSERT(best_fire);
2588 				}
2589 
2590 				best_fire = fire;
2591 				best_cire = gw_ire;
2592 
2593 				ip2dbg(("ire_multirt_lookup_v6: found top prio "
2594 				    "best_fire %p, best_cire %p\n",
2595 				    (void *)best_fire, (void *)best_cire));
2596 				break;
2597 			}
2598 
2599 			/*
2600 			 * Compute the time elapsed since our preceding
2601 			 * attempt to  resolve that route.
2602 			 * If the MULTIRT_USESTAMP flag is set, we take that
2603 			 * route into account only if this time interval
2604 			 * exceeds ip_multirt_resolution_interval;
2605 			 * this prevents us from attempting to resolve a
2606 			 * broken route upon each sending of a packet.
2607 			 */
2608 			delta = lbolt - fire->ire_last_used_time;
2609 			delta = TICK_TO_MSEC(delta);
2610 
2611 			res = (boolean_t)
2612 			    ((delta > ip_multirt_resolution_interval) ||
2613 				(!(flags & MULTIRT_USESTAMP)));
2614 
2615 			ip2dbg(("ire_multirt_lookup_v6: fire %p, delta %lu, "
2616 			    "res %d\n",
2617 			    (void *)fire, delta, res));
2618 
2619 			if (res) {
2620 				/*
2621 				 * A resolver exists for the gateway: save
2622 				 * the current IRE_HOST ire as a candidate
2623 				 * best ire. If we later discover that a
2624 				 * top priority ire exists (i.e. no need to
2625 				 * call the resolver), then this new ire
2626 				 * will be preferred to the current one.
2627 				 */
2628 				if (gw_ire != NULL) {
2629 					if (best_fire == NULL) {
2630 						ASSERT(best_cire == NULL);
2631 
2632 						best_fire = fire;
2633 						best_cire = gw_ire;
2634 
2635 						ip2dbg(("ire_multirt_lookup_v6:"
2636 						    "found candidate "
2637 						    "best_fire %p, "
2638 						    "best_cire %p\n",
2639 						    (void *)best_fire,
2640 						    (void *)best_cire));
2641 
2642 						/*
2643 						 * If MULTIRT_CACHEGW is not
2644 						 * set, we ignore the top
2645 						 * priority ires that can
2646 						 * be resolved without any
2647 						 * call to the resolver;
2648 						 * In that case, there is
2649 						 * actually no need
2650 						 * to continue the loop.
2651 						 */
2652 						if (!(flags &
2653 						    MULTIRT_CACHEGW)) {
2654 							break;
2655 						}
2656 						continue;
2657 					}
2658 				} else {
2659 					/*
2660 					 * No resolver for the gateway: the
2661 					 * route is not resolvable.
2662 					 * If the MULTIRT_SETSTAMP flag is
2663 					 * set, we stamp the IRE_HOST ire,
2664 					 * so we will not select it again
2665 					 * during this resolution interval.
2666 					 */
2667 					if (flags & MULTIRT_SETSTAMP)
2668 						fire->ire_last_used_time =
2669 						    lbolt;
2670 				}
2671 			}
2672 
2673 			if (gw_ire != NULL)
2674 				ire_refrele(gw_ire);
2675 		}
2676 	} else { /* IN6_IS_ADDR_MULTICAST(&v6dst) */
2677 
2678 		for (fire = first_fire;
2679 		    fire != NULL;
2680 		    fire = fire->ire_next) {
2681 
2682 			if (!(fire->ire_flags & RTF_MULTIRT))
2683 				continue;
2684 			if (!IN6_ARE_ADDR_EQUAL(&fire->ire_addr_v6, &v6dst))
2685 				continue;
2686 
2687 			already_resolved = B_FALSE;
2688 
2689 			mutex_enter(&fire->ire_lock);
2690 			v6gw = fire->ire_gateway_addr_v6;
2691 			mutex_exit(&fire->ire_lock);
2692 
2693 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
2694 			    IRE_INTERFACE, NULL, NULL, ALL_ZONES, 0,
2695 			    MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE);
2696 
2697 			/* No resolver for the gateway; we skip this ire. */
2698 			if (gw_ire == NULL) {
2699 				continue;
2700 			}
2701 
2702 			if (first_cire) {
2703 
2704 				IRB_REFHOLD(cirb);
2705 				/*
2706 				 * For all IRE_CACHE ires for that
2707 				 * destination.
2708 				 */
2709 				for (cire = first_cire;
2710 				    cire != NULL;
2711 				    cire = cire->ire_next) {
2712 
2713 					if (!(cire->ire_flags & RTF_MULTIRT))
2714 						continue;
2715 					if (!IN6_ARE_ADDR_EQUAL(
2716 					    &cire->ire_addr_v6, &v6dst))
2717 						continue;
2718 					if (cire->ire_marks &
2719 					    (IRE_MARK_CONDEMNED|
2720 						IRE_MARK_HIDDEN))
2721 						continue;
2722 					/*
2723 					 * Cache entries are linked to the
2724 					 * parent routes using the parent handle
2725 					 * (ire_phandle). If no cache entry has
2726 					 * the same handle as fire, fire is
2727 					 * still unresolved.
2728 					 */
2729 					ASSERT(cire->ire_phandle != 0);
2730 					if (cire->ire_phandle ==
2731 					    fire->ire_phandle) {
2732 						already_resolved = B_TRUE;
2733 						break;
2734 					}
2735 				}
2736 				IRB_REFRELE(cirb);
2737 			}
2738 
2739 			/*
2740 			 * This route is already resolved; proceed with
2741 			 * next one.
2742 			 */
2743 			if (already_resolved) {
2744 				ire_refrele(gw_ire);
2745 				continue;
2746 			}
2747 
2748 			/*
2749 			 * Compute the time elapsed since our preceding
2750 			 * attempt to resolve that route.
2751 			 * If the MULTIRT_USESTAMP flag is set, we take
2752 			 * that route into account only if this time
2753 			 * interval exceeds ip_multirt_resolution_interval;
2754 			 * this prevents us from attempting to resolve a
2755 			 * broken route upon each sending of a packet.
2756 			 */
2757 			delta = lbolt - fire->ire_last_used_time;
2758 			delta = TICK_TO_MSEC(delta);
2759 
2760 			res = (boolean_t)
2761 			    ((delta > ip_multirt_resolution_interval) ||
2762 			    (!(flags & MULTIRT_USESTAMP)));
2763 
2764 			ip3dbg(("ire_multirt_lookup_v6: fire %p, delta %lx, "
2765 			    "flags %04x, res %d\n",
2766 			    (void *)fire, delta, flags, res));
2767 
2768 			if (res) {
2769 				if (best_cire) {
2770 					/*
2771 					 * Release the resolver associated
2772 					 * to the preceding candidate best
2773 					 * ire, if any.
2774 					 */
2775 					ire_refrele(best_cire);
2776 					ASSERT(best_fire);
2777 				}
2778 				best_fire = fire;
2779 				best_cire = gw_ire;
2780 				continue;
2781 			}
2782 
2783 			ire_refrele(gw_ire);
2784 		}
2785 	}
2786 
2787 	if (best_fire) {
2788 		IRE_REFHOLD(best_fire);
2789 	}
2790 	IRB_REFRELE(firb);
2791 
2792 	/* Release the first IRE_CACHE we initially looked up, if any. */
2793 	if (first_cire)
2794 		ire_refrele(first_cire);
2795 
2796 	/* Found a resolvable route. */
2797 	if (best_fire) {
2798 		ASSERT(best_cire);
2799 
2800 		if (*fire_arg)
2801 			ire_refrele(*fire_arg);
2802 		if (*ire_arg)
2803 			ire_refrele(*ire_arg);
2804 
2805 		/*
2806 		 * Update the passed arguments with the
2807 		 * resolvable multirt route we found
2808 		 */
2809 		*fire_arg = best_fire;
2810 		*ire_arg = best_cire;
2811 
2812 		ip2dbg(("ire_multirt_lookup_v6: returning B_TRUE, "
2813 		    "*fire_arg %p, *ire_arg %p\n",
2814 		    (void *)best_fire, (void *)best_cire));
2815 
2816 		return (B_TRUE);
2817 	}
2818 
2819 	ASSERT(best_cire == NULL);
2820 
2821 	ip2dbg(("ire_multirt_lookup_v6: returning B_FALSE, *fire_arg %p, "
2822 	    "*ire_arg %p\n",
2823 	    (void *)*fire_arg, (void *)*ire_arg));
2824 
2825 	/* No resolvable route. */
2826 	return (B_FALSE);
2827 }
2828 
2829 
2830 /*
2831  * Find an IRE_OFFSUBNET IRE entry for the multicast address 'v6dstp'
2832  * that goes through 'ipif'. As a fallback, a route that goes through
2833  * ipif->ipif_ill can be returned.
2834  */
2835 ire_t *
2836 ipif_lookup_multi_ire_v6(ipif_t *ipif, const in6_addr_t *v6dstp)
2837 {
2838 	ire_t	*ire;
2839 	ire_t	*save_ire = NULL;
2840 	ire_t   *gw_ire;
2841 	irb_t   *irb;
2842 	in6_addr_t v6gw;
2843 	int	match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL;
2844 
2845 	ire = ire_ftable_lookup_v6(v6dstp, 0, 0, 0, NULL, NULL, ALL_ZONES, 0,
2846 	    MATCH_IRE_DEFAULT);
2847 
2848 	if (ire == NULL)
2849 		return (NULL);
2850 
2851 	irb = ire->ire_bucket;
2852 	ASSERT(irb);
2853 
2854 	IRB_REFHOLD(irb);
2855 	ire_refrele(ire);
2856 	for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
2857 		if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, v6dstp) ||
2858 		    ipif->ipif_zoneid != ire->ire_zoneid) {
2859 			continue;
2860 		}
2861 
2862 		switch (ire->ire_type) {
2863 		case IRE_DEFAULT:
2864 		case IRE_PREFIX:
2865 		case IRE_HOST:
2866 			mutex_enter(&ire->ire_lock);
2867 			v6gw = ire->ire_gateway_addr_v6;
2868 			mutex_exit(&ire->ire_lock);
2869 			gw_ire = ire_ftable_lookup_v6(&v6gw, 0, 0,
2870 			    IRE_INTERFACE, ipif, NULL, ALL_ZONES, 0,
2871 			    match_flags);
2872 
2873 			if (gw_ire != NULL) {
2874 				if (save_ire != NULL) {
2875 					ire_refrele(save_ire);
2876 				}
2877 				IRE_REFHOLD(ire);
2878 				if (gw_ire->ire_ipif == ipif) {
2879 					ire_refrele(gw_ire);
2880 
2881 					IRB_REFRELE(irb);
2882 					return (ire);
2883 				}
2884 				ire_refrele(gw_ire);
2885 				save_ire = ire;
2886 			}
2887 			break;
2888 		case IRE_IF_NORESOLVER:
2889 		case IRE_IF_RESOLVER:
2890 			if (ire->ire_ipif == ipif) {
2891 				if (save_ire != NULL) {
2892 					ire_refrele(save_ire);
2893 				}
2894 				IRE_REFHOLD(ire);
2895 
2896 				IRB_REFRELE(irb);
2897 				return (ire);
2898 			}
2899 			break;
2900 		}
2901 	}
2902 	IRB_REFRELE(irb);
2903 
2904 	return (save_ire);
2905 }
2906