xref: /titanic_52/usr/src/uts/common/inet/ip/ip6_ire.c (revision fff7ec1d8ce71b3d8a998ac4391a99860ce07180)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2278377681SSowmini Varadhan  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  * Copyright (c) 1990 Mentat Inc.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * This file contains routines that manipulate Internet Routing Entries (IREs).
287c478bd9Sstevel@tonic-gate  */
297c478bd9Sstevel@tonic-gate #include <sys/types.h>
307c478bd9Sstevel@tonic-gate #include <sys/stream.h>
317c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
327c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
347c478bd9Sstevel@tonic-gate 
357c478bd9Sstevel@tonic-gate #include <sys/systm.h>
367c478bd9Sstevel@tonic-gate #include <sys/param.h>
377c478bd9Sstevel@tonic-gate #include <sys/socket.h>
387c478bd9Sstevel@tonic-gate #include <net/if.h>
397c478bd9Sstevel@tonic-gate #include <net/route.h>
407c478bd9Sstevel@tonic-gate #include <netinet/in.h>
417c478bd9Sstevel@tonic-gate #include <net/if_dl.h>
427c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
437c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h>
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate #include <inet/common.h>
467c478bd9Sstevel@tonic-gate #include <inet/mi.h>
477c478bd9Sstevel@tonic-gate #include <inet/ip.h>
487c478bd9Sstevel@tonic-gate #include <inet/ip6.h>
497c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h>
507c478bd9Sstevel@tonic-gate #include <inet/ip_if.h>
517c478bd9Sstevel@tonic-gate #include <inet/ip_ire.h>
527c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h>
537c478bd9Sstevel@tonic-gate #include <inet/nd.h>
546e91bba0SGirish Moodalbail #include <inet/tunables.h>
557c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
567c478bd9Sstevel@tonic-gate #include <sys/zone.h>
577c478bd9Sstevel@tonic-gate 
5845916cd2Sjpk #include <sys/tsol/label.h>
5945916cd2Sjpk #include <sys/tsol/tnet.h>
6045916cd2Sjpk 
61bd670b35SErik Nordmark #define	IS_DEFAULT_ROUTE_V6(ire)	\
62bd670b35SErik Nordmark 	(((ire)->ire_type & IRE_DEFAULT) || \
63bd670b35SErik Nordmark 	    (((ire)->ire_type & IRE_INTERFACE) && \
64bd670b35SErik Nordmark 	    (IN6_IS_ADDR_UNSPECIFIED(&(ire)->ire_addr_v6))))
65bd670b35SErik Nordmark 
667c478bd9Sstevel@tonic-gate static	ire_t	ire_null;
677c478bd9Sstevel@tonic-gate 
68bd670b35SErik Nordmark static ire_t *
69bd670b35SErik Nordmark ire_ftable_lookup_impl_v6(const in6_addr_t *addr, const in6_addr_t *mask,
70bd670b35SErik Nordmark     const in6_addr_t *gateway, int type, const ill_t *ill,
71bd670b35SErik Nordmark     zoneid_t zoneid, const ts_label_t *tsl, int flags,
72bd670b35SErik Nordmark     ip_stack_t *ipst);
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate /*
757c478bd9Sstevel@tonic-gate  * Initialize the ire that is specific to IPv6 part and call
767c478bd9Sstevel@tonic-gate  * ire_init_common to finish it.
77bd670b35SErik Nordmark  * Returns zero or errno.
787c478bd9Sstevel@tonic-gate  */
79bd670b35SErik Nordmark int
8054da8755Ssowmini ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask,
81bd670b35SErik Nordmark     const in6_addr_t *v6gateway, ushort_t type, ill_t *ill,
82bd670b35SErik Nordmark     zoneid_t zoneid, uint_t flags, tsol_gc_t *gc, ip_stack_t *ipst)
837c478bd9Sstevel@tonic-gate {
84bd670b35SErik Nordmark 	int error;
85c793af95Ssangeeta 
8645916cd2Sjpk 	/*
87bd670b35SErik Nordmark 	 * Reject IRE security attmakeribute creation/initialization
8845916cd2Sjpk 	 * if system is not running in Trusted mode.
8945916cd2Sjpk 	 */
90bd670b35SErik Nordmark 	if (gc != NULL && !is_system_labeled())
91bd670b35SErik Nordmark 		return (EINVAL);
927c478bd9Sstevel@tonic-gate 
93f4b3ec61Sdh155122 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced);
94bd670b35SErik Nordmark 	if (v6addr != NULL)
957c478bd9Sstevel@tonic-gate 		ire->ire_addr_v6 = *v6addr;
967c478bd9Sstevel@tonic-gate 	if (v6gateway != NULL)
977c478bd9Sstevel@tonic-gate 		ire->ire_gateway_addr_v6 = *v6gateway;
987c478bd9Sstevel@tonic-gate 
99bd670b35SErik Nordmark 	/* Make sure we don't have stray values in some fields */
100bd670b35SErik Nordmark 	switch (type) {
101bd670b35SErik Nordmark 	case IRE_LOOPBACK:
102bd670b35SErik Nordmark 	case IRE_HOST:
103bd670b35SErik Nordmark 	case IRE_LOCAL:
104bd670b35SErik Nordmark 	case IRE_IF_CLONE:
105bd670b35SErik Nordmark 		ire->ire_mask_v6 = ipv6_all_ones;
106bd670b35SErik Nordmark 		ire->ire_masklen = IPV6_ABITS;
107bd670b35SErik Nordmark 		break;
108bd670b35SErik Nordmark 	case IRE_PREFIX:
109bd670b35SErik Nordmark 	case IRE_DEFAULT:
110bd670b35SErik Nordmark 	case IRE_IF_RESOLVER:
111bd670b35SErik Nordmark 	case IRE_IF_NORESOLVER:
112bd670b35SErik Nordmark 		if (v6mask != NULL) {
113bd670b35SErik Nordmark 			ire->ire_mask_v6 = *v6mask;
114bd670b35SErik Nordmark 			ire->ire_masklen =
115bd670b35SErik Nordmark 			    ip_mask_to_plen_v6(&ire->ire_mask_v6);
116bd670b35SErik Nordmark 		}
117bd670b35SErik Nordmark 		break;
118bd670b35SErik Nordmark 	case IRE_MULTICAST:
119bd670b35SErik Nordmark 	case IRE_NOROUTE:
120bd670b35SErik Nordmark 		ASSERT(v6mask == NULL);
121bd670b35SErik Nordmark 		break;
122bd670b35SErik Nordmark 	default:
123bd670b35SErik Nordmark 		ASSERT(0);
124bd670b35SErik Nordmark 		return (EINVAL);
125bd670b35SErik Nordmark 	}
1267c478bd9Sstevel@tonic-gate 
127bd670b35SErik Nordmark 	error = ire_init_common(ire, type, ill, zoneid, flags, IPV6_VERSION,
128bd670b35SErik Nordmark 	    gc, ipst);
129bd670b35SErik Nordmark 	if (error != NULL)
130bd670b35SErik Nordmark 		return (error);
131bd670b35SErik Nordmark 
132bd670b35SErik Nordmark 	/* Determine which function pointers to use */
133bd670b35SErik Nordmark 	ire->ire_postfragfn = ip_xmit;		/* Common case */
134bd670b35SErik Nordmark 
135bd670b35SErik Nordmark 	switch (ire->ire_type) {
136bd670b35SErik Nordmark 	case IRE_LOCAL:
137bd670b35SErik Nordmark 		ire->ire_sendfn = ire_send_local_v6;
138bd670b35SErik Nordmark 		ire->ire_recvfn = ire_recv_local_v6;
139bd670b35SErik Nordmark 		ASSERT(ire->ire_ill != NULL);
1401cb875aeSCathy Zhou 		if (ire->ire_ill->ill_flags & ILLF_NOACCEPT)
141bd670b35SErik Nordmark 			ire->ire_recvfn = ire_recv_noaccept_v6;
142bd670b35SErik Nordmark 		break;
143bd670b35SErik Nordmark 	case IRE_LOOPBACK:
144bd670b35SErik Nordmark 		ire->ire_sendfn = ire_send_local_v6;
145bd670b35SErik Nordmark 		ire->ire_recvfn = ire_recv_loopback_v6;
146bd670b35SErik Nordmark 		break;
147bd670b35SErik Nordmark 	case IRE_MULTICAST:
148bd670b35SErik Nordmark 		ire->ire_postfragfn = ip_postfrag_loopcheck;
149bd670b35SErik Nordmark 		ire->ire_sendfn = ire_send_multicast_v6;
150bd670b35SErik Nordmark 		ire->ire_recvfn = ire_recv_multicast_v6;
151bd670b35SErik Nordmark 		break;
152bd670b35SErik Nordmark 	default:
1537c478bd9Sstevel@tonic-gate 		/*
154bd670b35SErik Nordmark 		 * For IRE_IF_ALL and IRE_OFFLINK we forward received
155bd670b35SErik Nordmark 		 * packets by default.
1567c478bd9Sstevel@tonic-gate 		 */
157bd670b35SErik Nordmark 		ire->ire_sendfn = ire_send_wire_v6;
158bd670b35SErik Nordmark 		ire->ire_recvfn = ire_recv_forward_v6;
159bd670b35SErik Nordmark 		break;
1607c478bd9Sstevel@tonic-gate 	}
161bd670b35SErik Nordmark 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
162bd670b35SErik Nordmark 		ire->ire_sendfn = ire_send_noroute_v6;
163bd670b35SErik Nordmark 		ire->ire_recvfn = ire_recv_noroute_v6;
164bd670b35SErik Nordmark 	} else if (ire->ire_flags & RTF_MULTIRT) {
165bd670b35SErik Nordmark 		ire->ire_postfragfn = ip_postfrag_multirt_v6;
166bd670b35SErik Nordmark 		ire->ire_sendfn = ire_send_multirt_v6;
167bd670b35SErik Nordmark 		ire->ire_recvfn = ire_recv_multirt_v6;
1687c478bd9Sstevel@tonic-gate 	}
169bd670b35SErik Nordmark 	ire->ire_nce_capable = ire_determine_nce_capable(ire);
170bd670b35SErik Nordmark 	return (0);
1717c478bd9Sstevel@tonic-gate }
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate /*
1747c478bd9Sstevel@tonic-gate  * ire_create_v6 is called to allocate and initialize a new IRE.
1757c478bd9Sstevel@tonic-gate  *
1767c478bd9Sstevel@tonic-gate  * NOTE : This is called as writer sometimes though not required
1777c478bd9Sstevel@tonic-gate  * by this function.
1787c478bd9Sstevel@tonic-gate  */
17954da8755Ssowmini /* ARGSUSED */
1807c478bd9Sstevel@tonic-gate ire_t *
1817c478bd9Sstevel@tonic-gate ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask,
182bd670b35SErik Nordmark     const in6_addr_t *v6gateway, ushort_t type, ill_t *ill, zoneid_t zoneid,
183bd670b35SErik Nordmark     uint_t flags, tsol_gc_t *gc, ip_stack_t *ipst)
1847c478bd9Sstevel@tonic-gate {
1857c478bd9Sstevel@tonic-gate 	ire_t	*ire;
186bd670b35SErik Nordmark 	int	error;
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate 	ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr));
1897c478bd9Sstevel@tonic-gate 
1907c478bd9Sstevel@tonic-gate 	ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
1917c478bd9Sstevel@tonic-gate 	if (ire == NULL) {
192bd670b35SErik Nordmark 		DTRACE_PROBE(kmem__cache__alloc);
1937c478bd9Sstevel@tonic-gate 		return (NULL);
1947c478bd9Sstevel@tonic-gate 	}
1957c478bd9Sstevel@tonic-gate 	*ire = ire_null;
1967c478bd9Sstevel@tonic-gate 
197bd670b35SErik Nordmark 	error = ire_init_v6(ire, v6addr, v6mask, v6gateway,
198bd670b35SErik Nordmark 	    type, ill, zoneid, flags, gc, ipst);
1997c478bd9Sstevel@tonic-gate 
200bd670b35SErik Nordmark 	if (error != 0) {
201bd670b35SErik Nordmark 		DTRACE_PROBE2(ire__init__v6, ire_t *, ire, int, error);
2027c478bd9Sstevel@tonic-gate 		kmem_cache_free(ire_cache, ire);
2037c478bd9Sstevel@tonic-gate 		return (NULL);
2047c478bd9Sstevel@tonic-gate 	}
2057c478bd9Sstevel@tonic-gate 	return (ire);
2067c478bd9Sstevel@tonic-gate }
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate /*
209bd670b35SErik Nordmark  * Find the ill matching a multicast group.
2107c478bd9Sstevel@tonic-gate  * Allows different routes for multicast addresses
2117c478bd9Sstevel@tonic-gate  * in the unicast routing table (akin to FF::0/8 but could be more specific)
2127c478bd9Sstevel@tonic-gate  * which point at different interfaces. This is used when IPV6_MULTICAST_IF
2137c478bd9Sstevel@tonic-gate  * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't
2147c478bd9Sstevel@tonic-gate  * specify the interface to join on.
2157c478bd9Sstevel@tonic-gate  *
216bd670b35SErik Nordmark  * Supports link-local addresses by using ire_route_recursive which follows
217bd670b35SErik Nordmark  * the ill when recursing.
2187c478bd9Sstevel@tonic-gate  *
219bd670b35SErik Nordmark  * To handle CGTP, since we don't have a separate IRE_MULTICAST for each group
220bd670b35SErik Nordmark  * and the MULTIRT property can be different for different groups, we
221bd670b35SErik Nordmark  * extract RTF_MULTIRT from the special unicast route added for a group
222bd670b35SErik Nordmark  * with CGTP and pass that back in the multirtp argument.
223bd670b35SErik Nordmark  * This is used in ip_set_destination etc to set ixa_postfragfn for multicast.
224bd670b35SErik Nordmark  * We have a setsrcp argument for the same reason.
2257c478bd9Sstevel@tonic-gate  */
226bd670b35SErik Nordmark ill_t *
227bd670b35SErik Nordmark ire_lookup_multi_ill_v6(const in6_addr_t *group, zoneid_t zoneid,
228bd670b35SErik Nordmark     ip_stack_t *ipst, boolean_t *multirtp, in6_addr_t *setsrcp)
229bd670b35SErik Nordmark {
230bd670b35SErik Nordmark 	ire_t	*ire;
231bd670b35SErik Nordmark 	ill_t	*ill;
2327c478bd9Sstevel@tonic-gate 
233bd670b35SErik Nordmark 	ire = ire_route_recursive_v6(group, 0, NULL, zoneid, NULL,
2349e3469d3SErik Nordmark 	    MATCH_IRE_DSTONLY, IRR_NONE, 0, ipst, setsrcp, NULL, NULL);
235bd670b35SErik Nordmark 	ASSERT(ire != NULL);
236bd670b35SErik Nordmark 
237bd670b35SErik Nordmark 	if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
2387c478bd9Sstevel@tonic-gate 		ire_refrele(ire);
2397c478bd9Sstevel@tonic-gate 		return (NULL);
2407c478bd9Sstevel@tonic-gate 	}
2417c478bd9Sstevel@tonic-gate 
242bd670b35SErik Nordmark 	if (multirtp != NULL)
243bd670b35SErik Nordmark 		*multirtp = (ire->ire_flags & RTF_MULTIRT) != 0;
2447c478bd9Sstevel@tonic-gate 
245bd670b35SErik Nordmark 	ill = ire_nexthop_ill(ire);
246bd670b35SErik Nordmark 	ire_refrele(ire);
247bd670b35SErik Nordmark 	return (ill);
2487c478bd9Sstevel@tonic-gate }
2497c478bd9Sstevel@tonic-gate 
2507c478bd9Sstevel@tonic-gate /*
2517c478bd9Sstevel@tonic-gate  * This function takes a mask and returns number of bits set in the
2527c478bd9Sstevel@tonic-gate  * mask (the represented prefix length).  Assumes a contiguous mask.
2537c478bd9Sstevel@tonic-gate  */
2547c478bd9Sstevel@tonic-gate int
2557c478bd9Sstevel@tonic-gate ip_mask_to_plen_v6(const in6_addr_t *v6mask)
2567c478bd9Sstevel@tonic-gate {
2577c478bd9Sstevel@tonic-gate 	int		bits;
2587c478bd9Sstevel@tonic-gate 	int		plen = IPV6_ABITS;
2597c478bd9Sstevel@tonic-gate 	int		i;
2607c478bd9Sstevel@tonic-gate 
2617c478bd9Sstevel@tonic-gate 	for (i = 3; i >= 0; i--) {
2627c478bd9Sstevel@tonic-gate 		if (v6mask->s6_addr32[i] == 0) {
2637c478bd9Sstevel@tonic-gate 			plen -= 32;
2647c478bd9Sstevel@tonic-gate 			continue;
2657c478bd9Sstevel@tonic-gate 		}
2667c478bd9Sstevel@tonic-gate 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
2677c478bd9Sstevel@tonic-gate 		if (bits == 0)
2687c478bd9Sstevel@tonic-gate 			break;
2697c478bd9Sstevel@tonic-gate 		plen -= bits;
2707c478bd9Sstevel@tonic-gate 	}
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate 	return (plen);
2737c478bd9Sstevel@tonic-gate }
2747c478bd9Sstevel@tonic-gate 
2757c478bd9Sstevel@tonic-gate /*
2767c478bd9Sstevel@tonic-gate  * Convert a prefix length to the mask for that prefix.
2777c478bd9Sstevel@tonic-gate  * Returns the argument bitmask.
2787c478bd9Sstevel@tonic-gate  */
2797c478bd9Sstevel@tonic-gate in6_addr_t *
2807c478bd9Sstevel@tonic-gate ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
2817c478bd9Sstevel@tonic-gate {
2827c478bd9Sstevel@tonic-gate 	uint32_t *ptr;
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate 	if (plen < 0 || plen > IPV6_ABITS)
2857c478bd9Sstevel@tonic-gate 		return (NULL);
2867c478bd9Sstevel@tonic-gate 	*bitmask = ipv6_all_zeros;
287bd670b35SErik Nordmark 	if (plen == 0)
288bd670b35SErik Nordmark 		return (bitmask);
2897c478bd9Sstevel@tonic-gate 
2907c478bd9Sstevel@tonic-gate 	ptr = (uint32_t *)bitmask;
2917c478bd9Sstevel@tonic-gate 	while (plen > 32) {
2927c478bd9Sstevel@tonic-gate 		*ptr++ = 0xffffffffU;
2937c478bd9Sstevel@tonic-gate 		plen -= 32;
2947c478bd9Sstevel@tonic-gate 	}
2957c478bd9Sstevel@tonic-gate 	*ptr = htonl(0xffffffffU << (32 - plen));
2967c478bd9Sstevel@tonic-gate 	return (bitmask);
2977c478bd9Sstevel@tonic-gate }
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate /*
300bd670b35SErik Nordmark  * Add a fully initialized IPv6 IRE to the forwarding table.
301bd670b35SErik Nordmark  * This returns NULL on failure, or a held IRE on success.
302bd670b35SErik Nordmark  * Normally the returned IRE is the same as the argument. But a different
303bd670b35SErik Nordmark  * IRE will be returned if the added IRE is deemed identical to an existing
304bd670b35SErik Nordmark  * one. In that case ire_identical_ref will be increased.
305bd670b35SErik Nordmark  * The caller always needs to do an ire_refrele() on the returned IRE.
3067c478bd9Sstevel@tonic-gate  */
307bd670b35SErik Nordmark ire_t *
308bd670b35SErik Nordmark ire_add_v6(ire_t *ire)
3097c478bd9Sstevel@tonic-gate {
3107c478bd9Sstevel@tonic-gate 	ire_t	*ire1;
3117c478bd9Sstevel@tonic-gate 	int	mask_table_index;
3127c478bd9Sstevel@tonic-gate 	irb_t	*irb_ptr;
3137c478bd9Sstevel@tonic-gate 	ire_t	**irep;
314bd670b35SErik Nordmark 	int	match_flags;
3157c478bd9Sstevel@tonic-gate 	int	error;
316f4b3ec61Sdh155122 	ip_stack_t	*ipst = ire->ire_ipst;
3177c478bd9Sstevel@tonic-gate 
3187c478bd9Sstevel@tonic-gate 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	/* Make sure the address is properly masked. */
3217c478bd9Sstevel@tonic-gate 	V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6);
3227c478bd9Sstevel@tonic-gate 
3237c478bd9Sstevel@tonic-gate 	mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6);
324bd670b35SErik Nordmark 	if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == NULL) {
3257c478bd9Sstevel@tonic-gate 		irb_t *ptr;
3267c478bd9Sstevel@tonic-gate 		int i;
3277c478bd9Sstevel@tonic-gate 
328bd670b35SErik Nordmark 		ptr = (irb_t *)mi_zalloc((ipst->ips_ip6_ftable_hash_size *
329bd670b35SErik Nordmark 		    sizeof (irb_t)));
3307c478bd9Sstevel@tonic-gate 		if (ptr == NULL) {
3317c478bd9Sstevel@tonic-gate 			ire_delete(ire);
332bd670b35SErik Nordmark 			return (NULL);
3337c478bd9Sstevel@tonic-gate 		}
334f4b3ec61Sdh155122 		for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
335bd670b35SErik Nordmark 			rw_init(&ptr[i].irb_lock, NULL, RW_DEFAULT, NULL);
336f1c454b4SSowmini Varadhan 			ptr[i].irb_ipst = ipst;
3377c478bd9Sstevel@tonic-gate 		}
338f4b3ec61Sdh155122 		mutex_enter(&ipst->ips_ire_ft_init_lock);
339bd670b35SErik Nordmark 		if (ipst->ips_ip_forwarding_table_v6[mask_table_index] ==
340bd670b35SErik Nordmark 		    NULL) {
341bd670b35SErik Nordmark 			ipst->ips_ip_forwarding_table_v6[mask_table_index] =
342bd670b35SErik Nordmark 			    ptr;
343f4b3ec61Sdh155122 			mutex_exit(&ipst->ips_ire_ft_init_lock);
3447c478bd9Sstevel@tonic-gate 		} else {
3457c478bd9Sstevel@tonic-gate 			/*
3467c478bd9Sstevel@tonic-gate 			 * Some other thread won the race in
3477c478bd9Sstevel@tonic-gate 			 * initializing the forwarding table at the
3487c478bd9Sstevel@tonic-gate 			 * same index.
3497c478bd9Sstevel@tonic-gate 			 */
350f4b3ec61Sdh155122 			mutex_exit(&ipst->ips_ire_ft_init_lock);
351bd670b35SErik Nordmark 			for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) {
3527c478bd9Sstevel@tonic-gate 				rw_destroy(&ptr[i].irb_lock);
3537c478bd9Sstevel@tonic-gate 			}
3547c478bd9Sstevel@tonic-gate 			mi_free(ptr);
3557c478bd9Sstevel@tonic-gate 		}
3567c478bd9Sstevel@tonic-gate 	}
357f4b3ec61Sdh155122 	irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][
3587c478bd9Sstevel@tonic-gate 	    IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6,
359f4b3ec61Sdh155122 	    ipst->ips_ip6_ftable_hash_size)]);
3607c478bd9Sstevel@tonic-gate 
361bd670b35SErik Nordmark 	match_flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW);
362bd670b35SErik Nordmark 	if (ire->ire_ill != NULL)
363bd670b35SErik Nordmark 		match_flags |= MATCH_IRE_ILL;
3647c478bd9Sstevel@tonic-gate 	/*
365bd670b35SErik Nordmark 	 * Start the atomic add of the ire. Grab the bucket lock and the
366bd670b35SErik Nordmark 	 * ill lock. Check for condemned.
3677c478bd9Sstevel@tonic-gate 	 */
368bd670b35SErik Nordmark 	error = ire_atomic_start(irb_ptr, ire);
369bd670b35SErik Nordmark 	if (error != 0) {
370bd670b35SErik Nordmark 		ire_delete(ire);
371bd670b35SErik Nordmark 		return (NULL);
372bd670b35SErik Nordmark 	}
373e11c3f44Smeem 
3747c478bd9Sstevel@tonic-gate 	/*
375e11c3f44Smeem 	 * If we are creating a hidden IRE, make sure we search for
376e11c3f44Smeem 	 * hidden IREs when searching for duplicates below.
377e11c3f44Smeem 	 * Otherwise, we might find an IRE on some other interface
378e11c3f44Smeem 	 * that's not marked hidden.
3797c478bd9Sstevel@tonic-gate 	 */
380bd670b35SErik Nordmark 	if (ire->ire_testhidden)
381bd670b35SErik Nordmark 		match_flags |= MATCH_IRE_TESTHIDDEN;
3827c478bd9Sstevel@tonic-gate 
3837c478bd9Sstevel@tonic-gate 	/*
3847c478bd9Sstevel@tonic-gate 	 * Atomically check for duplicate and insert in the table.
3857c478bd9Sstevel@tonic-gate 	 */
3867c478bd9Sstevel@tonic-gate 	for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) {
387bd670b35SErik Nordmark 		if (IRE_IS_CONDEMNED(ire1))
3887c478bd9Sstevel@tonic-gate 			continue;
3897c478bd9Sstevel@tonic-gate 		/*
390bd670b35SErik Nordmark 		 * Here we need an exact match on zoneid, i.e.,
391bd670b35SErik Nordmark 		 * ire_match_args doesn't fit.
3927c478bd9Sstevel@tonic-gate 		 */
3937c478bd9Sstevel@tonic-gate 		if (ire1->ire_zoneid != ire->ire_zoneid)
3947c478bd9Sstevel@tonic-gate 			continue;
395bd670b35SErik Nordmark 
396bd670b35SErik Nordmark 		if (ire1->ire_type != ire->ire_type)
397bd670b35SErik Nordmark 			continue;
398bd670b35SErik Nordmark 
399bd670b35SErik Nordmark 		/*
400bd670b35SErik Nordmark 		 * Note: We do not allow multiple routes that differ only
401bd670b35SErik Nordmark 		 * in the gateway security attributes; such routes are
402bd670b35SErik Nordmark 		 * considered duplicates.
403bd670b35SErik Nordmark 		 * To change that we explicitly have to treat them as
404bd670b35SErik Nordmark 		 * different here.
405bd670b35SErik Nordmark 		 */
4067c478bd9Sstevel@tonic-gate 		if (ire_match_args_v6(ire1, &ire->ire_addr_v6,
4077c478bd9Sstevel@tonic-gate 		    &ire->ire_mask_v6, &ire->ire_gateway_addr_v6,
408bd670b35SErik Nordmark 		    ire->ire_type, ire->ire_ill, ire->ire_zoneid, NULL,
409bd670b35SErik Nordmark 		    match_flags)) {
4107c478bd9Sstevel@tonic-gate 			/*
4117c478bd9Sstevel@tonic-gate 			 * Return the old ire after doing a REFHOLD.
4127c478bd9Sstevel@tonic-gate 			 * As most of the callers continue to use the IRE
4137c478bd9Sstevel@tonic-gate 			 * after adding, we return a held ire. This will
4147c478bd9Sstevel@tonic-gate 			 * avoid a lookup in the caller again. If the callers
4157c478bd9Sstevel@tonic-gate 			 * don't want to use it, they need to do a REFRELE.
41678377681SSowmini Varadhan 			 *
41778377681SSowmini Varadhan 			 * We only allow exactly one IRE_IF_CLONE for any dst,
41878377681SSowmini Varadhan 			 * so, if the is an IF_CLONE, return the ire without
41978377681SSowmini Varadhan 			 * an identical_ref, but with an ire_ref held.
4207c478bd9Sstevel@tonic-gate 			 */
42178377681SSowmini Varadhan 			if (ire->ire_type != IRE_IF_CLONE) {
42278377681SSowmini Varadhan 				atomic_add_32(&ire1->ire_identical_ref, 1);
42378377681SSowmini Varadhan 				DTRACE_PROBE2(ire__add__exist, ire_t *, ire1,
42478377681SSowmini Varadhan 				    ire_t *, ire);
42578377681SSowmini Varadhan 			}
4267c478bd9Sstevel@tonic-gate 			ip1dbg(("found dup ire existing %p new %p",
4277c478bd9Sstevel@tonic-gate 			    (void *)ire1, (void *)ire));
428bd670b35SErik Nordmark 			ire_refhold(ire1);
4297c478bd9Sstevel@tonic-gate 			ire_atomic_end(irb_ptr, ire);
4307c478bd9Sstevel@tonic-gate 			ire_delete(ire);
431bd670b35SErik Nordmark 			return (ire1);
4327c478bd9Sstevel@tonic-gate 		}
4337c478bd9Sstevel@tonic-gate 	}
4347c478bd9Sstevel@tonic-gate 
4357c478bd9Sstevel@tonic-gate 	/*
436bd670b35SErik Nordmark 	 * Normally we do head insertion since most things do not care about
437bd670b35SErik Nordmark 	 * the order of the IREs in the bucket.
438bd670b35SErik Nordmark 	 * However, due to shared-IP zones (and restrict_interzone_loopback)
439bd670b35SErik Nordmark 	 * we can have an IRE_LOCAL as well as IRE_IF_CLONE for the same
440bd670b35SErik Nordmark 	 * address. For that reason we do tail insertion for IRE_IF_CLONE.
4417c478bd9Sstevel@tonic-gate 	 */
4427c478bd9Sstevel@tonic-gate 	irep = (ire_t **)irb_ptr;
443bd670b35SErik Nordmark 	if (ire->ire_type & IRE_IF_CLONE) {
444bd670b35SErik Nordmark 		while ((ire1 = *irep) != NULL)
4457c478bd9Sstevel@tonic-gate 			irep = &ire1->ire_next;
4467c478bd9Sstevel@tonic-gate 	}
4477c478bd9Sstevel@tonic-gate 	/* Insert at *irep */
4487c478bd9Sstevel@tonic-gate 	ire1 = *irep;
4497c478bd9Sstevel@tonic-gate 	if (ire1 != NULL)
4507c478bd9Sstevel@tonic-gate 		ire1->ire_ptpn = &ire->ire_next;
4517c478bd9Sstevel@tonic-gate 	ire->ire_next = ire1;
4527c478bd9Sstevel@tonic-gate 	/* Link the new one in. */
4537c478bd9Sstevel@tonic-gate 	ire->ire_ptpn = irep;
4547c478bd9Sstevel@tonic-gate 	/*
4557c478bd9Sstevel@tonic-gate 	 * ire_walk routines de-reference ire_next without holding
4567c478bd9Sstevel@tonic-gate 	 * a lock. Before we point to the new ire, we want to make
4577c478bd9Sstevel@tonic-gate 	 * sure the store that sets the ire_next of the new ire
4587c478bd9Sstevel@tonic-gate 	 * reaches global visibility, so that ire_walk routines
4597c478bd9Sstevel@tonic-gate 	 * don't see a truncated list of ires i.e if the ire_next
4607c478bd9Sstevel@tonic-gate 	 * of the new ire gets set after we do "*irep = ire" due
4617c478bd9Sstevel@tonic-gate 	 * to re-ordering, the ire_walk thread will see a NULL
4627c478bd9Sstevel@tonic-gate 	 * once it accesses the ire_next of the new ire.
4637c478bd9Sstevel@tonic-gate 	 * membar_producer() makes sure that the following store
4647c478bd9Sstevel@tonic-gate 	 * happens *after* all of the above stores.
4657c478bd9Sstevel@tonic-gate 	 */
4667c478bd9Sstevel@tonic-gate 	membar_producer();
4677c478bd9Sstevel@tonic-gate 	*irep = ire;
4687c478bd9Sstevel@tonic-gate 	ire->ire_bucket = irb_ptr;
4697c478bd9Sstevel@tonic-gate 	/*
4707c478bd9Sstevel@tonic-gate 	 * We return a bumped up IRE above. Keep it symmetrical
4717c478bd9Sstevel@tonic-gate 	 * so that the callers will always have to release. This
4727c478bd9Sstevel@tonic-gate 	 * helps the callers of this function because they continue
4737c478bd9Sstevel@tonic-gate 	 * to use the IRE after adding and hence they don't have to
4747c478bd9Sstevel@tonic-gate 	 * lookup again after we return the IRE.
4757c478bd9Sstevel@tonic-gate 	 *
4767c478bd9Sstevel@tonic-gate 	 * NOTE : We don't have to use atomics as this is appearing
4777c478bd9Sstevel@tonic-gate 	 * in the list for the first time and no one else can bump
4787c478bd9Sstevel@tonic-gate 	 * up the reference count on this yet.
4797c478bd9Sstevel@tonic-gate 	 */
480bd670b35SErik Nordmark 	ire_refhold_locked(ire);
481f4b3ec61Sdh155122 	BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted);
4827c478bd9Sstevel@tonic-gate 	irb_ptr->irb_ire_cnt++;
4837c478bd9Sstevel@tonic-gate 
484bd670b35SErik Nordmark 	if (ire->ire_ill != NULL) {
485bd670b35SErik Nordmark 		DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ire->ire_ill,
486968d2fd1Ssowmini 		    (char *), "ire", (void *), ire);
487bd670b35SErik Nordmark 		ire->ire_ill->ill_ire_cnt++;
488bd670b35SErik Nordmark 		ASSERT(ire->ire_ill->ill_ire_cnt != 0);	/* Wraparound */
4897c478bd9Sstevel@tonic-gate 	}
4907c478bd9Sstevel@tonic-gate 	ire_atomic_end(irb_ptr, ire);
4917c478bd9Sstevel@tonic-gate 
492bd670b35SErik Nordmark 	/* Make any caching of the IREs be notified or updated */
4937c478bd9Sstevel@tonic-gate 	ire_flush_cache_v6(ire, IRE_FLUSH_ADD);
4947c478bd9Sstevel@tonic-gate 
495bd670b35SErik Nordmark 	return (ire);
4967c478bd9Sstevel@tonic-gate }
4977c478bd9Sstevel@tonic-gate 
4987c478bd9Sstevel@tonic-gate /*
4997c478bd9Sstevel@tonic-gate  * Search for all HOST REDIRECT routes that are
5007c478bd9Sstevel@tonic-gate  * pointing at the specified gateway and
5017c478bd9Sstevel@tonic-gate  * delete them. This routine is called only
5027c478bd9Sstevel@tonic-gate  * when a default gateway is going away.
5037c478bd9Sstevel@tonic-gate  */
5047c478bd9Sstevel@tonic-gate static void
505f4b3ec61Sdh155122 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst)
5067c478bd9Sstevel@tonic-gate {
5077c478bd9Sstevel@tonic-gate 	irb_t *irb_ptr;
5087c478bd9Sstevel@tonic-gate 	irb_t *irb;
5097c478bd9Sstevel@tonic-gate 	ire_t *ire;
5107c478bd9Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
5117c478bd9Sstevel@tonic-gate 	int i;
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 	/* get the hash table for HOST routes */
514f4b3ec61Sdh155122 	irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)];
5157c478bd9Sstevel@tonic-gate 	if (irb_ptr == NULL)
5167c478bd9Sstevel@tonic-gate 		return;
517f4b3ec61Sdh155122 	for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) {
5187c478bd9Sstevel@tonic-gate 		irb = &irb_ptr[i];
519bd670b35SErik Nordmark 		irb_refhold(irb);
5207c478bd9Sstevel@tonic-gate 		for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
5216bdb8e66Sdd193516 			if (!(ire->ire_flags & RTF_DYNAMIC))
5227c478bd9Sstevel@tonic-gate 				continue;
5237c478bd9Sstevel@tonic-gate 			mutex_enter(&ire->ire_lock);
5247c478bd9Sstevel@tonic-gate 			gw_addr_v6 = ire->ire_gateway_addr_v6;
5257c478bd9Sstevel@tonic-gate 			mutex_exit(&ire->ire_lock);
5267c478bd9Sstevel@tonic-gate 			if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway))
5277c478bd9Sstevel@tonic-gate 				ire_delete(ire);
5287c478bd9Sstevel@tonic-gate 		}
529bd670b35SErik Nordmark 		irb_refrele(irb);
5307c478bd9Sstevel@tonic-gate 	}
5317c478bd9Sstevel@tonic-gate }
5327c478bd9Sstevel@tonic-gate 
5337c478bd9Sstevel@tonic-gate /*
5347c478bd9Sstevel@tonic-gate  * Delete the specified IRE.
5357c478bd9Sstevel@tonic-gate  * All calls should use ire_delete().
5367c478bd9Sstevel@tonic-gate  * Sometimes called as writer though not required by this function.
5377c478bd9Sstevel@tonic-gate  *
5387c478bd9Sstevel@tonic-gate  * NOTE : This function is called only if the ire was added
5397c478bd9Sstevel@tonic-gate  * in the list.
5407c478bd9Sstevel@tonic-gate  */
5417c478bd9Sstevel@tonic-gate void
5427c478bd9Sstevel@tonic-gate ire_delete_v6(ire_t *ire)
5437c478bd9Sstevel@tonic-gate {
5447c478bd9Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
545f4b3ec61Sdh155122 	ip_stack_t	*ipst = ire->ire_ipst;
5467c478bd9Sstevel@tonic-gate 
547bd670b35SErik Nordmark 	/*
548bd670b35SErik Nordmark 	 * Make sure ire_generation increases from ire_flush_cache happen
549bd670b35SErik Nordmark 	 * after any lookup/reader has read ire_generation.
550bd670b35SErik Nordmark 	 * Since the rw_enter makes us wait until any lookup/reader has
551bd670b35SErik Nordmark 	 * completed we can exit the lock immediately.
552bd670b35SErik Nordmark 	 */
553bd670b35SErik Nordmark 	rw_enter(&ipst->ips_ip6_ire_head_lock, RW_WRITER);
554bd670b35SErik Nordmark 	rw_exit(&ipst->ips_ip6_ire_head_lock);
555bd670b35SErik Nordmark 
5567c478bd9Sstevel@tonic-gate 	ASSERT(ire->ire_refcnt >= 1);
5577c478bd9Sstevel@tonic-gate 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate 	ire_flush_cache_v6(ire, IRE_FLUSH_DELETE);
560bd670b35SErik Nordmark 
5617c478bd9Sstevel@tonic-gate 	if (ire->ire_type == IRE_DEFAULT) {
5627c478bd9Sstevel@tonic-gate 		/*
5637c478bd9Sstevel@tonic-gate 		 * when a default gateway is going away
5647c478bd9Sstevel@tonic-gate 		 * delete all the host redirects pointing at that
5657c478bd9Sstevel@tonic-gate 		 * gateway.
5667c478bd9Sstevel@tonic-gate 		 */
5677c478bd9Sstevel@tonic-gate 		mutex_enter(&ire->ire_lock);
5687c478bd9Sstevel@tonic-gate 		gw_addr_v6 = ire->ire_gateway_addr_v6;
5697c478bd9Sstevel@tonic-gate 		mutex_exit(&ire->ire_lock);
570f4b3ec61Sdh155122 		ire_delete_host_redirects_v6(&gw_addr_v6, ipst);
5717c478bd9Sstevel@tonic-gate 	}
5727c478bd9Sstevel@tonic-gate 
5737c478bd9Sstevel@tonic-gate 	/*
574bd670b35SErik Nordmark 	 * If we are deleting an IRE_INTERFACE then we make sure we also
575bd670b35SErik Nordmark 	 * delete any IRE_IF_CLONE that has been created from it.
576bd670b35SErik Nordmark 	 * Those are always in ire_dep_children.
5777c478bd9Sstevel@tonic-gate 	 */
578bd670b35SErik Nordmark 	if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0)
579bd670b35SErik Nordmark 		ire_dep_delete_if_clone(ire);
5807c478bd9Sstevel@tonic-gate 
581bd670b35SErik Nordmark 	/* Remove from parent dependencies and child */
582bd670b35SErik Nordmark 	rw_enter(&ipst->ips_ire_dep_lock, RW_WRITER);
583bd670b35SErik Nordmark 	if (ire->ire_dep_parent != NULL) {
584bd670b35SErik Nordmark 		ire_dep_remove(ire);
5857c478bd9Sstevel@tonic-gate 	}
586bd670b35SErik Nordmark 	while (ire->ire_dep_children != NULL)
587bd670b35SErik Nordmark 		ire_dep_remove(ire->ire_dep_children);
588bd670b35SErik Nordmark 	rw_exit(&ipst->ips_ire_dep_lock);
5897c478bd9Sstevel@tonic-gate }
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate /*
592bd670b35SErik Nordmark  * When an IRE is added or deleted this routine is called to make sure
593bd670b35SErik Nordmark  * any caching of IRE information is notified or updated.
5947c478bd9Sstevel@tonic-gate  *
595bd670b35SErik Nordmark  * The flag argument indicates if the flush request is due to addition
596bd670b35SErik Nordmark  * of new route (IRE_FLUSH_ADD), deletion of old route (IRE_FLUSH_DELETE),
597bd670b35SErik Nordmark  * or a change to ire_gateway_addr (IRE_FLUSH_GWCHANGE).
5987c478bd9Sstevel@tonic-gate  */
5997c478bd9Sstevel@tonic-gate void
6007c478bd9Sstevel@tonic-gate ire_flush_cache_v6(ire_t *ire, int flag)
6017c478bd9Sstevel@tonic-gate {
602f4b3ec61Sdh155122 	ip_stack_t *ipst = ire->ire_ipst;
6037c478bd9Sstevel@tonic-gate 
604bd670b35SErik Nordmark 	/*
605bd670b35SErik Nordmark 	 * IRE_IF_CLONE ire's don't provide any new information
606bd670b35SErik Nordmark 	 * than the parent from which they are cloned, so don't
607bd670b35SErik Nordmark 	 * perturb the generation numbers.
608bd670b35SErik Nordmark 	 */
609bd670b35SErik Nordmark 	if (ire->ire_type & IRE_IF_CLONE)
6107c478bd9Sstevel@tonic-gate 		return;
6117c478bd9Sstevel@tonic-gate 
6127c478bd9Sstevel@tonic-gate 	/*
613bd670b35SErik Nordmark 	 * Ensure that an ire_add during a lookup serializes the updates of
614bd670b35SErik Nordmark 	 * the generation numbers under ire_head_lock so that the lookup gets
615bd670b35SErik Nordmark 	 * either the old ire and old generation number, or a new ire and new
616bd670b35SErik Nordmark 	 * generation number.
6177c478bd9Sstevel@tonic-gate 	 */
618bd670b35SErik Nordmark 	rw_enter(&ipst->ips_ip6_ire_head_lock, RW_WRITER);
619bd670b35SErik Nordmark 
620bd670b35SErik Nordmark 	/*
621bd670b35SErik Nordmark 	 * If a route was just added, we need to notify everybody that
622bd670b35SErik Nordmark 	 * has cached an IRE_NOROUTE since there might now be a better
623bd670b35SErik Nordmark 	 * route for them.
624bd670b35SErik Nordmark 	 */
6257c478bd9Sstevel@tonic-gate 	if (flag == IRE_FLUSH_ADD) {
626bd670b35SErik Nordmark 		ire_increment_generation(ipst->ips_ire_reject_v6);
627bd670b35SErik Nordmark 		ire_increment_generation(ipst->ips_ire_blackhole_v6);
628bd670b35SErik Nordmark 	}
629bd670b35SErik Nordmark 
630bd670b35SErik Nordmark 	/* Adding a default can't otherwise provide a better route */
631bd670b35SErik Nordmark 	if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) {
632bd670b35SErik Nordmark 		rw_exit(&ipst->ips_ip6_ire_head_lock);
633bd670b35SErik Nordmark 		return;
634bd670b35SErik Nordmark 	}
635bd670b35SErik Nordmark 
636bd670b35SErik Nordmark 	switch (flag) {
637bd670b35SErik Nordmark 	case IRE_FLUSH_DELETE:
638bd670b35SErik Nordmark 	case IRE_FLUSH_GWCHANGE:
6397c478bd9Sstevel@tonic-gate 		/*
640bd670b35SErik Nordmark 		 * Update ire_generation for all ire_dep_children chains
641bd670b35SErik Nordmark 		 * starting with this IRE
6427c478bd9Sstevel@tonic-gate 		 */
643bd670b35SErik Nordmark 		ire_dep_incr_generation(ire);
644bd670b35SErik Nordmark 		break;
645bd670b35SErik Nordmark 	case IRE_FLUSH_ADD: {
646bd670b35SErik Nordmark 		in6_addr_t	addr;
647bd670b35SErik Nordmark 		in6_addr_t	mask;
648bd670b35SErik Nordmark 		ip_stack_t	*ipst = ire->ire_ipst;
649bd670b35SErik Nordmark 		uint_t		masklen;
650bd670b35SErik Nordmark 
6517c478bd9Sstevel@tonic-gate 		/*
652bd670b35SErik Nordmark 		 * Find an IRE which is a shorter match than the ire to be added
653bd670b35SErik Nordmark 		 * For any such IRE (which we repeat) we update the
654bd670b35SErik Nordmark 		 * ire_generation the same way as in the delete case.
6557c478bd9Sstevel@tonic-gate 		 */
656bd670b35SErik Nordmark 		addr = ire->ire_addr_v6;
657bd670b35SErik Nordmark 		mask = ire->ire_mask_v6;
658bd670b35SErik Nordmark 		masklen = ip_mask_to_plen_v6(&mask);
659bd670b35SErik Nordmark 
660bd670b35SErik Nordmark 		ire = ire_ftable_lookup_impl_v6(&addr, &mask, NULL, 0, NULL,
661bd670b35SErik Nordmark 		    ALL_ZONES, NULL, MATCH_IRE_SHORTERMASK, ipst);
662bd670b35SErik Nordmark 		while (ire != NULL) {
663bd670b35SErik Nordmark 			/* We need to handle all in the same bucket */
664bd670b35SErik Nordmark 			irb_increment_generation(ire->ire_bucket);
665bd670b35SErik Nordmark 
666bd670b35SErik Nordmark 			mask = ire->ire_mask_v6;
667bd670b35SErik Nordmark 			ASSERT(masklen > ip_mask_to_plen_v6(&mask));
668bd670b35SErik Nordmark 			masklen = ip_mask_to_plen_v6(&mask);
669bd670b35SErik Nordmark 			ire_refrele(ire);
670bd670b35SErik Nordmark 			ire = ire_ftable_lookup_impl_v6(&addr, &mask, NULL, 0,
671bd670b35SErik Nordmark 			    NULL, ALL_ZONES, NULL, MATCH_IRE_SHORTERMASK, ipst);
6727c478bd9Sstevel@tonic-gate 		}
6737c478bd9Sstevel@tonic-gate 		}
674bd670b35SErik Nordmark 		break;
6757c478bd9Sstevel@tonic-gate 	}
676bd670b35SErik Nordmark 	rw_exit(&ipst->ips_ip6_ire_head_lock);
6777c478bd9Sstevel@tonic-gate }
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate /*
6807c478bd9Sstevel@tonic-gate  * Matches the arguments passed with the values in the ire.
6817c478bd9Sstevel@tonic-gate  *
682bd670b35SErik Nordmark  * Note: for match types that match using "ill" passed in, ill
6837c478bd9Sstevel@tonic-gate  * must be checked for non-NULL before calling this routine.
6847c478bd9Sstevel@tonic-gate  */
685bd670b35SErik Nordmark boolean_t
6867c478bd9Sstevel@tonic-gate ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask,
687bd670b35SErik Nordmark     const in6_addr_t *gateway, int type, const ill_t *ill, zoneid_t zoneid,
688bd670b35SErik Nordmark     const ts_label_t *tsl, int match_flags)
6897c478bd9Sstevel@tonic-gate {
6907c478bd9Sstevel@tonic-gate 	in6_addr_t masked_addr;
6917c478bd9Sstevel@tonic-gate 	in6_addr_t gw_addr_v6;
6927c478bd9Sstevel@tonic-gate 	ill_t *ire_ill = NULL, *dst_ill;
693bd670b35SErik Nordmark 	ip_stack_t *ipst = ire->ire_ipst;
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate 	ASSERT(ire->ire_ipversion == IPV6_VERSION);
6967c478bd9Sstevel@tonic-gate 	ASSERT(addr != NULL);
6977c478bd9Sstevel@tonic-gate 	ASSERT(mask != NULL);
6987c478bd9Sstevel@tonic-gate 	ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL);
69944b099c4SSowmini Varadhan 	ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_SRC_ILL))) ||
700bd670b35SErik Nordmark 	    (ill != NULL && ill->ill_isv6));
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate 	/*
703bd670b35SErik Nordmark 	 * If MATCH_IRE_TESTHIDDEN is set, then only return the IRE if it
704bd670b35SErik Nordmark 	 * is in fact hidden, to ensure the caller gets the right one.
7057c478bd9Sstevel@tonic-gate 	 */
706bd670b35SErik Nordmark 	if (ire->ire_testhidden) {
707bd670b35SErik Nordmark 		if (!(match_flags & MATCH_IRE_TESTHIDDEN))
7087c478bd9Sstevel@tonic-gate 			return (B_FALSE);
709e11c3f44Smeem 	}
7107c478bd9Sstevel@tonic-gate 
71145916cd2Sjpk 	if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid &&
71245916cd2Sjpk 	    ire->ire_zoneid != ALL_ZONES) {
7137c478bd9Sstevel@tonic-gate 		/*
714bd670b35SErik Nordmark 		 * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid
715bd670b35SErik Nordmark 		 * does not match that of ire_zoneid, a failure to
7167c478bd9Sstevel@tonic-gate 		 * match is reported at this point. Otherwise, since some IREs
7177c478bd9Sstevel@tonic-gate 		 * that are available in the global zone can be used in local
7187c478bd9Sstevel@tonic-gate 		 * zones, additional checks need to be performed:
7197c478bd9Sstevel@tonic-gate 		 *
720bd670b35SErik Nordmark 		 * IRE_LOOPBACK
721bd670b35SErik Nordmark 		 *	entries should never be matched in this situation.
722bd670b35SErik Nordmark 		 *	Each zone has its own IRE_LOOPBACK.
7237c478bd9Sstevel@tonic-gate 		 *
724bd670b35SErik Nordmark 		 * IRE_LOCAL
725bd670b35SErik Nordmark 		 *	We allow them for any zoneid. ire_route_recursive
726bd670b35SErik Nordmark 		 *	does additional checks when
727bd670b35SErik Nordmark 		 *	ip_restrict_interzone_loopback is set.
7287c478bd9Sstevel@tonic-gate 		 *
729bd670b35SErik Nordmark 		 * If ill_usesrc_ifindex is set
730bd670b35SErik Nordmark 		 *	Then we check if the zone has a valid source address
731bd670b35SErik Nordmark 		 *	on the usesrc ill.
7327c478bd9Sstevel@tonic-gate 		 *
733bd670b35SErik Nordmark 		 * If ire_ill is set, then check that the zone has an ipif
734bd670b35SErik Nordmark 		 *	on that ill.
735bd670b35SErik Nordmark 		 *
736bd670b35SErik Nordmark 		 * Outside of this function (in ire_round_robin) we check
737bd670b35SErik Nordmark 		 * that any IRE_OFFLINK has a gateway that reachable from the
738bd670b35SErik Nordmark 		 * zone when we have multiple choices (ECMP).
7397c478bd9Sstevel@tonic-gate 		 */
7407c478bd9Sstevel@tonic-gate 		if (match_flags & MATCH_IRE_ZONEONLY)
7417c478bd9Sstevel@tonic-gate 			return (B_FALSE);
742bd670b35SErik Nordmark 		if (ire->ire_type & IRE_LOOPBACK)
7437c478bd9Sstevel@tonic-gate 			return (B_FALSE);
744bd670b35SErik Nordmark 
745bd670b35SErik Nordmark 		if (ire->ire_type & IRE_LOCAL)
746bd670b35SErik Nordmark 			goto matchit;
747bd670b35SErik Nordmark 
7487c478bd9Sstevel@tonic-gate 		/*
749bd670b35SErik Nordmark 		 * The normal case of IRE_ONLINK has a matching zoneid.
750bd670b35SErik Nordmark 		 * Here we handle the case when shared-IP zones have been
751bd670b35SErik Nordmark 		 * configured with IP addresses on vniN. In that case it
752bd670b35SErik Nordmark 		 * is ok for traffic from a zone to use IRE_ONLINK routes
753bd670b35SErik Nordmark 		 * if the ill has a usesrc pointing at vniN
754bd670b35SErik Nordmark 		 * Applies to IRE_INTERFACE.
7557c478bd9Sstevel@tonic-gate 		 */
756bd670b35SErik Nordmark 		dst_ill = ire->ire_ill;
757bd670b35SErik Nordmark 		if (ire->ire_type & IRE_ONLINK) {
758bd670b35SErik Nordmark 			uint_t	ifindex;
759bd670b35SErik Nordmark 
760bd670b35SErik Nordmark 			/*
761bd670b35SErik Nordmark 			 * Note there is no IRE_INTERFACE on vniN thus
762bd670b35SErik Nordmark 			 * can't do an IRE lookup for a matching route.
763bd670b35SErik Nordmark 			 */
764bd670b35SErik Nordmark 			ifindex = dst_ill->ill_usesrc_ifindex;
765bd670b35SErik Nordmark 			if (ifindex == 0)
766bd670b35SErik Nordmark 				return (B_FALSE);
767bd670b35SErik Nordmark 
7687c478bd9Sstevel@tonic-gate 			/*
7697c478bd9Sstevel@tonic-gate 			 * If there is a usable source address in the
770bd670b35SErik Nordmark 			 * zone, then it's ok to return this IRE_INTERFACE
7717c478bd9Sstevel@tonic-gate 			 */
772bd670b35SErik Nordmark 			if (!ipif_zone_avail(ifindex, dst_ill->ill_isv6,
773bd670b35SErik Nordmark 			    zoneid, ipst)) {
774bd670b35SErik Nordmark 				ip3dbg(("ire_match_args: no usrsrc for zone"
7757c478bd9Sstevel@tonic-gate 				    " dst_ill %p\n", (void *)dst_ill));
7767c478bd9Sstevel@tonic-gate 				return (B_FALSE);
7777c478bd9Sstevel@tonic-gate 			}
7787c478bd9Sstevel@tonic-gate 		}
779bd670b35SErik Nordmark 		/*
78044b099c4SSowmini Varadhan 		 * For example, with
781bd670b35SErik Nordmark 		 * route add 11.0.0.0 gw1 -ifp bge0
782bd670b35SErik Nordmark 		 * route add 11.0.0.0 gw2 -ifp bge1
783bd670b35SErik Nordmark 		 * this code would differentiate based on
784bd670b35SErik Nordmark 		 * where the sending zone has addresses.
785bd670b35SErik Nordmark 		 * Only if the zone has an address on bge0 can it use the first
786bd670b35SErik Nordmark 		 * route. It isn't clear if this behavior is documented
787bd670b35SErik Nordmark 		 * anywhere.
788bd670b35SErik Nordmark 		 */
789bd670b35SErik Nordmark 		if (dst_ill != NULL && (ire->ire_type & IRE_OFFLINK)) {
7907c478bd9Sstevel@tonic-gate 			ipif_t	*tipif;
7917c478bd9Sstevel@tonic-gate 
792bd670b35SErik Nordmark 			mutex_enter(&dst_ill->ill_lock);
793bd670b35SErik Nordmark 			for (tipif = dst_ill->ill_ipif;
7947c478bd9Sstevel@tonic-gate 			    tipif != NULL; tipif = tipif->ipif_next) {
795bd670b35SErik Nordmark 				if (!IPIF_IS_CONDEMNED(tipif) &&
7967c478bd9Sstevel@tonic-gate 				    (tipif->ipif_flags & IPIF_UP) &&
79745916cd2Sjpk 				    (tipif->ipif_zoneid == zoneid ||
79845916cd2Sjpk 				    tipif->ipif_zoneid == ALL_ZONES))
7997c478bd9Sstevel@tonic-gate 					break;
8007c478bd9Sstevel@tonic-gate 			}
801bd670b35SErik Nordmark 			mutex_exit(&dst_ill->ill_lock);
8027c478bd9Sstevel@tonic-gate 			if (tipif == NULL)
8037c478bd9Sstevel@tonic-gate 				return (B_FALSE);
8047c478bd9Sstevel@tonic-gate 		}
8057c478bd9Sstevel@tonic-gate 	}
8067c478bd9Sstevel@tonic-gate 
807bd670b35SErik Nordmark matchit:
80844b099c4SSowmini Varadhan 	ire_ill = ire->ire_ill;
8097c478bd9Sstevel@tonic-gate 	if (match_flags & MATCH_IRE_GW) {
8107c478bd9Sstevel@tonic-gate 		mutex_enter(&ire->ire_lock);
8117c478bd9Sstevel@tonic-gate 		gw_addr_v6 = ire->ire_gateway_addr_v6;
8127c478bd9Sstevel@tonic-gate 		mutex_exit(&ire->ire_lock);
8137c478bd9Sstevel@tonic-gate 	}
814bd670b35SErik Nordmark 	if (match_flags & MATCH_IRE_ILL) {
815e11c3f44Smeem 
8167c478bd9Sstevel@tonic-gate 		/*
817bd670b35SErik Nordmark 		 * If asked to match an ill, we *must* match
818bd670b35SErik Nordmark 		 * on the ire_ill for ipmp test addresses, or
819bd670b35SErik Nordmark 		 * any of the ill in the group for data addresses.
820bd670b35SErik Nordmark 		 * If we don't, we may as well fail.
821bd670b35SErik Nordmark 		 * However, we need an exception for IRE_LOCALs to ensure
822bd670b35SErik Nordmark 		 * we loopback packets even sent to test addresses on different
823bd670b35SErik Nordmark 		 * interfaces in the group.
8247c478bd9Sstevel@tonic-gate 		 */
825bd670b35SErik Nordmark 		if ((match_flags & MATCH_IRE_TESTHIDDEN) &&
826bd670b35SErik Nordmark 		    !(ire->ire_type & IRE_LOCAL)) {
827bd670b35SErik Nordmark 			if (ire->ire_ill != ill)
828bd670b35SErik Nordmark 				return (B_FALSE);
829bd670b35SErik Nordmark 		} else  {
830bd670b35SErik Nordmark 			match_flags &= ~MATCH_IRE_TESTHIDDEN;
831bd670b35SErik Nordmark 			/*
832bd670b35SErik Nordmark 			 * We know that ill is not NULL, but ire_ill could be
833bd670b35SErik Nordmark 			 * NULL
834bd670b35SErik Nordmark 			 */
835bd670b35SErik Nordmark 			if (ire_ill == NULL || !IS_ON_SAME_LAN(ill, ire_ill))
836bd670b35SErik Nordmark 				return (B_FALSE);
8377c478bd9Sstevel@tonic-gate 		}
838bd670b35SErik Nordmark 	}
83944b099c4SSowmini Varadhan 	if (match_flags & MATCH_IRE_SRC_ILL) {
84044b099c4SSowmini Varadhan 		if (ire_ill == NULL)
84144b099c4SSowmini Varadhan 			return (B_FALSE);
84244b099c4SSowmini Varadhan 		if (!IS_ON_SAME_LAN(ill, ire_ill)) {
84344b099c4SSowmini Varadhan 			if (ire_ill->ill_usesrc_ifindex == 0 ||
84444b099c4SSowmini Varadhan 			    (ire_ill->ill_usesrc_ifindex !=
84544b099c4SSowmini Varadhan 			    ill->ill_phyint->phyint_ifindex))
84644b099c4SSowmini Varadhan 				return (B_FALSE);
84744b099c4SSowmini Varadhan 		}
84844b099c4SSowmini Varadhan 	}
84944b099c4SSowmini Varadhan 
8507c478bd9Sstevel@tonic-gate 	/* No ire_addr_v6 bits set past the mask */
8517c478bd9Sstevel@tonic-gate 	ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6,
8527c478bd9Sstevel@tonic-gate 	    ire->ire_addr_v6));
8537c478bd9Sstevel@tonic-gate 	V6_MASK_COPY(*addr, *mask, masked_addr);
8547c478bd9Sstevel@tonic-gate 	if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) &&
8557c478bd9Sstevel@tonic-gate 	    ((!(match_flags & MATCH_IRE_GW)) ||
85601685f97SSowmini Varadhan 	    ((!(match_flags & MATCH_IRE_DIRECT)) ||
85701685f97SSowmini Varadhan 	    !(ire->ire_flags & RTF_INDIRECT)) &&
8587c478bd9Sstevel@tonic-gate 	    IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) &&
859bd670b35SErik Nordmark 	    ((!(match_flags & MATCH_IRE_TYPE)) || (ire->ire_type & type)) &&
860bd670b35SErik Nordmark 	    ((!(match_flags & MATCH_IRE_TESTHIDDEN)) || ire->ire_testhidden) &&
861bd670b35SErik Nordmark 	    ((!(match_flags & MATCH_IRE_MASK)) ||
862bd670b35SErik Nordmark 	    (IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, mask))) &&
86345916cd2Sjpk 	    ((!(match_flags & MATCH_IRE_SECATTR)) ||
86445916cd2Sjpk 	    (!is_system_labeled()) ||
86545916cd2Sjpk 	    (tsol_ire_match_gwattr(ire, tsl) == 0))) {
8667c478bd9Sstevel@tonic-gate 		/* We found the matched IRE */
8677c478bd9Sstevel@tonic-gate 		return (B_TRUE);
8687c478bd9Sstevel@tonic-gate 	}
8697c478bd9Sstevel@tonic-gate 	return (B_FALSE);
8707c478bd9Sstevel@tonic-gate }
8717c478bd9Sstevel@tonic-gate 
8727c478bd9Sstevel@tonic-gate /*
873bd670b35SErik Nordmark  * Check if the zoneid (not ALL_ZONES) has an IRE_INTERFACE for the specified
874bd670b35SErik Nordmark  * gateway address. If ill is non-NULL we also match on it.
875bd670b35SErik Nordmark  * The caller must hold a read lock on RADIX_NODE_HEAD if lock_held is set.
8767c478bd9Sstevel@tonic-gate  */
877bd670b35SErik Nordmark boolean_t
878bd670b35SErik Nordmark ire_gateway_ok_zone_v6(const in6_addr_t *gateway, zoneid_t zoneid, ill_t *ill,
879bd670b35SErik Nordmark     const ts_label_t *tsl, ip_stack_t *ipst, boolean_t lock_held)
8807c478bd9Sstevel@tonic-gate {
881bd670b35SErik Nordmark 	ire_t	*ire;
882bd670b35SErik Nordmark 	uint_t	match_flags;
8837c478bd9Sstevel@tonic-gate 
884bd670b35SErik Nordmark 	if (lock_held)
885bd670b35SErik Nordmark 		ASSERT(RW_READ_HELD(&ipst->ips_ip6_ire_head_lock));
886bd670b35SErik Nordmark 	else
887bd670b35SErik Nordmark 		rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER);
8887c478bd9Sstevel@tonic-gate 
889bd670b35SErik Nordmark 	match_flags = MATCH_IRE_TYPE | MATCH_IRE_SECATTR;
890bd670b35SErik Nordmark 	if (ill != NULL)
891bd670b35SErik Nordmark 		match_flags |= MATCH_IRE_ILL;
892bd670b35SErik Nordmark 
893bd670b35SErik Nordmark 	ire = ire_ftable_lookup_impl_v6(gateway, &ipv6_all_zeros,
894bd670b35SErik Nordmark 	    &ipv6_all_zeros, IRE_INTERFACE, ill, zoneid, tsl, match_flags,
895bd670b35SErik Nordmark 	    ipst);
896bd670b35SErik Nordmark 
897bd670b35SErik Nordmark 	if (!lock_held)
898bd670b35SErik Nordmark 		rw_exit(&ipst->ips_ip6_ire_head_lock);
899bd670b35SErik Nordmark 	if (ire != NULL) {
900bd670b35SErik Nordmark 		ire_refrele(ire);
901bd670b35SErik Nordmark 		return (B_TRUE);
902bd670b35SErik Nordmark 	} else {
903bd670b35SErik Nordmark 		return (B_FALSE);
9047c478bd9Sstevel@tonic-gate 	}
9057c478bd9Sstevel@tonic-gate }
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate /*
9087c478bd9Sstevel@tonic-gate  * Lookup a route in forwarding table.
9097c478bd9Sstevel@tonic-gate  * specific lookup is indicated by passing the
9107c478bd9Sstevel@tonic-gate  * required parameters and indicating the
9117c478bd9Sstevel@tonic-gate  * match required in flag field.
9127c478bd9Sstevel@tonic-gate  *
9137c478bd9Sstevel@tonic-gate  * Supports link-local addresses by following the ipif/ill when recursing.
9147c478bd9Sstevel@tonic-gate  */
9157c478bd9Sstevel@tonic-gate ire_t *
9167c478bd9Sstevel@tonic-gate ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask,
917bd670b35SErik Nordmark     const in6_addr_t *gateway, int type, const ill_t *ill,
918bd670b35SErik Nordmark     zoneid_t zoneid, const ts_label_t *tsl, int flags,
919bd670b35SErik Nordmark     uint32_t xmit_hint, ip_stack_t *ipst, uint_t *generationp)
9207c478bd9Sstevel@tonic-gate {
9217c478bd9Sstevel@tonic-gate 	ire_t *ire = NULL;
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate 	ASSERT(addr != NULL);
9247c478bd9Sstevel@tonic-gate 	ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL);
9257c478bd9Sstevel@tonic-gate 	ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL);
926bd670b35SErik Nordmark 	ASSERT(ill == NULL || ill->ill_isv6);
927bd670b35SErik Nordmark 
928bd670b35SErik Nordmark 	ASSERT(!IN6_IS_ADDR_V4MAPPED(addr));
9297c478bd9Sstevel@tonic-gate 
9307c478bd9Sstevel@tonic-gate 	/*
931bd670b35SErik Nordmark 	 * ire_match_args_v6() will dereference ill if MATCH_IRE_ILL
93244b099c4SSowmini Varadhan 	 * or MATCH_IRE_SRC_ILL is set.
9337c478bd9Sstevel@tonic-gate 	 */
93444b099c4SSowmini Varadhan 	if ((flags & (MATCH_IRE_ILL|MATCH_IRE_SRC_ILL)) && (ill == NULL))
9357c478bd9Sstevel@tonic-gate 		return (NULL);
9367c478bd9Sstevel@tonic-gate 
937bd670b35SErik Nordmark 	rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER);
938bd670b35SErik Nordmark 	ire = ire_ftable_lookup_impl_v6(addr, mask, gateway, type, ill, zoneid,
939bd670b35SErik Nordmark 	    tsl, flags, ipst);
940bd670b35SErik Nordmark 	if (ire == NULL) {
941bd670b35SErik Nordmark 		rw_exit(&ipst->ips_ip6_ire_head_lock);
942bd670b35SErik Nordmark 		return (NULL);
943bd670b35SErik Nordmark 	}
944bd670b35SErik Nordmark 
945bd670b35SErik Nordmark 	/*
946bd670b35SErik Nordmark 	 * round-robin only if we have more than one route in the bucket.
947bd670b35SErik Nordmark 	 * ips_ip_ecmp_behavior controls when we do ECMP
948bd670b35SErik Nordmark 	 *	2:	always
949bd670b35SErik Nordmark 	 *	1:	for IRE_DEFAULT and /0 IRE_INTERFACE
950bd670b35SErik Nordmark 	 *	0:	never
951bd670b35SErik Nordmark 	 *
952bd670b35SErik Nordmark 	 * Note: if we found an IRE_IF_CLONE we won't look at the bucket with
953bd670b35SErik Nordmark 	 * other ECMP IRE_INTERFACEs since the IRE_IF_CLONE is a /128 match
954bd670b35SErik Nordmark 	 * and the IRE_INTERFACESs are likely to be shorter matches.
955bd670b35SErik Nordmark 	 */
956bd670b35SErik Nordmark 	if (ire->ire_bucket->irb_ire_cnt > 1 && !(flags & MATCH_IRE_GW)) {
957bd670b35SErik Nordmark 		if (ipst->ips_ip_ecmp_behavior == 2 ||
958bd670b35SErik Nordmark 		    (ipst->ips_ip_ecmp_behavior == 1 &&
959bd670b35SErik Nordmark 		    IS_DEFAULT_ROUTE_V6(ire))) {
960bd670b35SErik Nordmark 			ire_t	*next_ire;
961bd670b35SErik Nordmark 			ire_ftable_args_t margs;
962bd670b35SErik Nordmark 
963188e1664SErik Nordmark 			bzero(&margs, sizeof (margs));
964bd670b35SErik Nordmark 			margs.ift_addr_v6 = *addr;
965bd670b35SErik Nordmark 			if (mask != NULL)
966bd670b35SErik Nordmark 				margs.ift_mask_v6 = *mask;
967bd670b35SErik Nordmark 			if (gateway != NULL)
968bd670b35SErik Nordmark 				margs.ift_gateway_v6 = *gateway;
969bd670b35SErik Nordmark 			margs.ift_type = type;
970bd670b35SErik Nordmark 			margs.ift_ill = ill;
971bd670b35SErik Nordmark 			margs.ift_zoneid = zoneid;
972bd670b35SErik Nordmark 			margs.ift_tsl = tsl;
973bd670b35SErik Nordmark 			margs.ift_flags = flags;
974bd670b35SErik Nordmark 
975bd670b35SErik Nordmark 			next_ire = ire_round_robin(ire->ire_bucket, &margs,
976bd670b35SErik Nordmark 			    xmit_hint, ire, ipst);
977bd670b35SErik Nordmark 			if (next_ire == NULL) {
978bd670b35SErik Nordmark 				/* keep ire if next_ire is null */
979bd670b35SErik Nordmark 				goto done;
980bd670b35SErik Nordmark 			}
981bd670b35SErik Nordmark 			ire_refrele(ire);
982bd670b35SErik Nordmark 			ire = next_ire;
983bd670b35SErik Nordmark 		}
984bd670b35SErik Nordmark 	}
985bd670b35SErik Nordmark 
986bd670b35SErik Nordmark done:
987bd670b35SErik Nordmark 	/* Return generation before dropping lock */
988bd670b35SErik Nordmark 	if (generationp != NULL)
989bd670b35SErik Nordmark 		*generationp = ire->ire_generation;
990bd670b35SErik Nordmark 
991bd670b35SErik Nordmark 	rw_exit(&ipst->ips_ip6_ire_head_lock);
992bd670b35SErik Nordmark 
993bd670b35SErik Nordmark 	/*
994bd670b35SErik Nordmark 	 * For shared-IP zones we need additional checks to what was
995bd670b35SErik Nordmark 	 * done in ire_match_args to make sure IRE_LOCALs are handled.
996bd670b35SErik Nordmark 	 *
997bd670b35SErik Nordmark 	 * When ip_restrict_interzone_loopback is set, then
998bd670b35SErik Nordmark 	 * we ensure that IRE_LOCAL are only used for loopback
999bd670b35SErik Nordmark 	 * between zones when the logical "Ethernet" would
1000bd670b35SErik Nordmark 	 * have looped them back. That is, if in the absense of
1001bd670b35SErik Nordmark 	 * the IRE_LOCAL we would have sent to packet out the
1002bd670b35SErik Nordmark 	 * same ill.
1003bd670b35SErik Nordmark 	 */
1004bd670b35SErik Nordmark 	if ((ire->ire_type & IRE_LOCAL) && zoneid != ALL_ZONES &&
1005bd670b35SErik Nordmark 	    ire->ire_zoneid != zoneid && ire->ire_zoneid != ALL_ZONES &&
1006bd670b35SErik Nordmark 	    ipst->ips_ip_restrict_interzone_loopback) {
1007bd670b35SErik Nordmark 		ire = ire_alt_local(ire, zoneid, tsl, ill, generationp);
1008bd670b35SErik Nordmark 		ASSERT(ire != NULL);
1009bd670b35SErik Nordmark 	}
1010bd670b35SErik Nordmark 
1011bd670b35SErik Nordmark 	return (ire);
1012bd670b35SErik Nordmark }
1013bd670b35SErik Nordmark 
1014bd670b35SErik Nordmark /*
1015bd670b35SErik Nordmark  * Look up a single ire. The caller holds either the read or write lock.
1016bd670b35SErik Nordmark  */
1017bd670b35SErik Nordmark ire_t *
1018bd670b35SErik Nordmark ire_ftable_lookup_impl_v6(const in6_addr_t *addr, const in6_addr_t *mask,
1019bd670b35SErik Nordmark     const in6_addr_t *gateway, int type, const ill_t *ill,
1020bd670b35SErik Nordmark     zoneid_t zoneid, const ts_label_t *tsl, int flags,
1021bd670b35SErik Nordmark     ip_stack_t *ipst)
1022bd670b35SErik Nordmark {
1023bd670b35SErik Nordmark 	irb_t *irb_ptr;
1024bd670b35SErik Nordmark 	ire_t *ire = NULL;
1025bd670b35SErik Nordmark 	int i;
1026bd670b35SErik Nordmark 
1027bd670b35SErik Nordmark 	ASSERT(RW_LOCK_HELD(&ipst->ips_ip6_ire_head_lock));
1028bd670b35SErik Nordmark 
10297c478bd9Sstevel@tonic-gate 	/*
10307c478bd9Sstevel@tonic-gate 	 * If the mask is known, the lookup
10317c478bd9Sstevel@tonic-gate 	 * is simple, if the mask is not known
10327c478bd9Sstevel@tonic-gate 	 * we need to search.
10337c478bd9Sstevel@tonic-gate 	 */
10347c478bd9Sstevel@tonic-gate 	if (flags & MATCH_IRE_MASK) {
10357c478bd9Sstevel@tonic-gate 		uint_t masklen;
10367c478bd9Sstevel@tonic-gate 
10377c478bd9Sstevel@tonic-gate 		masklen = ip_mask_to_plen_v6(mask);
1038bd670b35SErik Nordmark 		if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) {
10397c478bd9Sstevel@tonic-gate 			return (NULL);
1040bd670b35SErik Nordmark 		}
1041f4b3ec61Sdh155122 		irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][
1042f4b3ec61Sdh155122 		    IRE_ADDR_MASK_HASH_V6(*addr, *mask,
1043f4b3ec61Sdh155122 		    ipst->ips_ip6_ftable_hash_size)]);
10447c478bd9Sstevel@tonic-gate 		rw_enter(&irb_ptr->irb_lock, RW_READER);
10457c478bd9Sstevel@tonic-gate 		for (ire = irb_ptr->irb_ire; ire != NULL;
10467c478bd9Sstevel@tonic-gate 		    ire = ire->ire_next) {
1047bd670b35SErik Nordmark 			if (IRE_IS_CONDEMNED(ire))
10487c478bd9Sstevel@tonic-gate 				continue;
10497c478bd9Sstevel@tonic-gate 			if (ire_match_args_v6(ire, addr, mask, gateway, type,
1050bd670b35SErik Nordmark 			    ill, zoneid, tsl, flags))
10517c478bd9Sstevel@tonic-gate 				goto found_ire;
10527c478bd9Sstevel@tonic-gate 		}
10537c478bd9Sstevel@tonic-gate 		rw_exit(&irb_ptr->irb_lock);
10547c478bd9Sstevel@tonic-gate 	} else {
1055bd670b35SErik Nordmark 		uint_t masklen;
1056bd670b35SErik Nordmark 
10577c478bd9Sstevel@tonic-gate 		/*
10587c478bd9Sstevel@tonic-gate 		 * In this case we don't know the mask, we need to
10597c478bd9Sstevel@tonic-gate 		 * search the table assuming different mask sizes.
10607c478bd9Sstevel@tonic-gate 		 */
1061bd670b35SErik Nordmark 		if (flags & MATCH_IRE_SHORTERMASK) {
1062bd670b35SErik Nordmark 			masklen = ip_mask_to_plen_v6(mask);
1063bd670b35SErik Nordmark 			if (masklen == 0) {
1064bd670b35SErik Nordmark 				/* Nothing shorter than zero */
1065bd670b35SErik Nordmark 				return (NULL);
1066bd670b35SErik Nordmark 			}
1067bd670b35SErik Nordmark 			masklen--;
1068bd670b35SErik Nordmark 		} else {
1069bd670b35SErik Nordmark 			masklen = IP6_MASK_TABLE_SIZE - 1;
1070bd670b35SErik Nordmark 		}
1071bd670b35SErik Nordmark 
1072bd670b35SErik Nordmark 		for (i = masklen; i >= 0; i--) {
10737c478bd9Sstevel@tonic-gate 			in6_addr_t tmpmask;
10747c478bd9Sstevel@tonic-gate 
1075f4b3ec61Sdh155122 			if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL)
10767c478bd9Sstevel@tonic-gate 				continue;
10777c478bd9Sstevel@tonic-gate 			(void) ip_plen_to_mask_v6(i, &tmpmask);
1078f4b3ec61Sdh155122 			irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][
10797c478bd9Sstevel@tonic-gate 			    IRE_ADDR_MASK_HASH_V6(*addr, tmpmask,
1080f4b3ec61Sdh155122 			    ipst->ips_ip6_ftable_hash_size)];
10817c478bd9Sstevel@tonic-gate 			rw_enter(&irb_ptr->irb_lock, RW_READER);
10827c478bd9Sstevel@tonic-gate 			for (ire = irb_ptr->irb_ire; ire != NULL;
10837c478bd9Sstevel@tonic-gate 			    ire = ire->ire_next) {
1084bd670b35SErik Nordmark 				if (IRE_IS_CONDEMNED(ire))
10857c478bd9Sstevel@tonic-gate 					continue;
10867c478bd9Sstevel@tonic-gate 				if (ire_match_args_v6(ire, addr,
1087bd670b35SErik Nordmark 				    &ire->ire_mask_v6, gateway, type, ill,
1088bd670b35SErik Nordmark 				    zoneid, tsl, flags))
10897c478bd9Sstevel@tonic-gate 					goto found_ire;
10907c478bd9Sstevel@tonic-gate 			}
10917c478bd9Sstevel@tonic-gate 			rw_exit(&irb_ptr->irb_lock);
10927c478bd9Sstevel@tonic-gate 		}
10937c478bd9Sstevel@tonic-gate 	}
10947c478bd9Sstevel@tonic-gate 	ASSERT(ire == NULL);
10957c478bd9Sstevel@tonic-gate 	ip1dbg(("ire_ftable_lookup_v6: returning NULL ire"));
10967c478bd9Sstevel@tonic-gate 	return (NULL);
1097bd670b35SErik Nordmark 
10987c478bd9Sstevel@tonic-gate found_ire:
1099bd670b35SErik Nordmark 	ire_refhold(ire);
11007c478bd9Sstevel@tonic-gate 	rw_exit(&irb_ptr->irb_lock);
11017c478bd9Sstevel@tonic-gate 	return (ire);
11027c478bd9Sstevel@tonic-gate }
1103bd670b35SErik Nordmark 
1104bd670b35SErik Nordmark 
11057c478bd9Sstevel@tonic-gate /*
1106bd670b35SErik Nordmark  * This function is called by
1107bd670b35SErik Nordmark  * ip_input/ire_route_recursive when doing a route lookup on only the
1108bd670b35SErik Nordmark  * destination address.
1109bd670b35SErik Nordmark  *
1110bd670b35SErik Nordmark  * The optimizations of this function over ire_ftable_lookup are:
1111bd670b35SErik Nordmark  *	o removing unnecessary flag matching
1112bd670b35SErik Nordmark  *	o doing longest prefix match instead of overloading it further
1113bd670b35SErik Nordmark  *	  with the unnecessary "best_prefix_match"
1114bd670b35SErik Nordmark  *
1115bd670b35SErik Nordmark  * If no route is found we return IRE_NOROUTE.
11167c478bd9Sstevel@tonic-gate  */
1117bd670b35SErik Nordmark ire_t *
1118bd670b35SErik Nordmark ire_ftable_lookup_simple_v6(const in6_addr_t *addr, uint32_t xmit_hint,
1119bd670b35SErik Nordmark     ip_stack_t *ipst, uint_t *generationp)
1120bd670b35SErik Nordmark {
1121bd670b35SErik Nordmark 	ire_t	*ire;
11227c478bd9Sstevel@tonic-gate 
1123bd670b35SErik Nordmark 	ire = ire_ftable_lookup_v6(addr, NULL, NULL, 0, NULL, ALL_ZONES, NULL,
1124bd670b35SErik Nordmark 	    MATCH_IRE_DSTONLY, xmit_hint, ipst, generationp);
11257c478bd9Sstevel@tonic-gate 	if (ire == NULL) {
1126bd670b35SErik Nordmark 		ire = ire_reject(ipst, B_TRUE);
1127bd670b35SErik Nordmark 		if (generationp != NULL)
1128bd670b35SErik Nordmark 			*generationp = IRE_GENERATION_VERIFY;
11297c478bd9Sstevel@tonic-gate 	}
1130bd670b35SErik Nordmark 	/* ftable_lookup did round robin */
11317c478bd9Sstevel@tonic-gate 	return (ire);
11327c478bd9Sstevel@tonic-gate }
1133bd670b35SErik Nordmark 
1134bd670b35SErik Nordmark ire_t *
113544b099c4SSowmini Varadhan ip_select_route_v6(const in6_addr_t *dst, const in6_addr_t src,
113644b099c4SSowmini Varadhan     ip_xmit_attr_t *ixa, uint_t *generationp, in6_addr_t *setsrcp,
113744b099c4SSowmini Varadhan     int *errorp, boolean_t *multirtp)
1138bd670b35SErik Nordmark {
1139bd670b35SErik Nordmark 	ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1140bd670b35SErik Nordmark 
114144b099c4SSowmini Varadhan 	return (ip_select_route(dst, src, ixa, generationp, setsrcp, errorp,
1142bd670b35SErik Nordmark 	    multirtp));
1143bd670b35SErik Nordmark }
1144bd670b35SErik Nordmark 
11457c478bd9Sstevel@tonic-gate /*
1146bd670b35SErik Nordmark  * Recursively look for a route to the destination. Can also match on
1147bd670b35SErik Nordmark  * the zoneid, ill, and label. Used for the data paths. See also
1148bd670b35SErik Nordmark  * ire_route_recursive_dstonly.
1149bd670b35SErik Nordmark  *
11509e3469d3SErik Nordmark  * If IRR_ALLOCATE is not set then we will only inspect the existing IREs; never
1151bd670b35SErik Nordmark  * create an IRE_IF_CLONE. This is used on the receive side when we are not
1152bd670b35SErik Nordmark  * forwarding.
11539e3469d3SErik Nordmark  * If IRR_INCOMPLETE is set then we return the IRE even if we can't correctly
11549e3469d3SErik Nordmark  * resolve the gateway.
1155bd670b35SErik Nordmark  *
1156bd670b35SErik Nordmark  * Note that this function never returns NULL. It returns an IRE_NOROUTE
1157bd670b35SErik Nordmark  * instead.
1158bd670b35SErik Nordmark  *
1159bd670b35SErik Nordmark  * If we find any IRE_LOCAL|BROADCAST etc past the first iteration it
1160bd670b35SErik Nordmark  * is an error.
1161bd670b35SErik Nordmark  * Allow at most one RTF_INDIRECT.
11627c478bd9Sstevel@tonic-gate  */
1163bd670b35SErik Nordmark ire_t *
1164bd670b35SErik Nordmark ire_route_recursive_impl_v6(ire_t *ire,
1165bd670b35SErik Nordmark     const in6_addr_t *nexthop, uint_t ire_type, const ill_t *ill_arg,
1166bd670b35SErik Nordmark     zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args,
11679e3469d3SErik Nordmark     uint_t irr_flags, uint32_t xmit_hint, ip_stack_t *ipst,
1168bd670b35SErik Nordmark     in6_addr_t *setsrcp, tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp)
1169bd670b35SErik Nordmark {
1170bd670b35SErik Nordmark 	int		i, j;
1171bd670b35SErik Nordmark 	in6_addr_t	v6nexthop = *nexthop;
1172bd670b35SErik Nordmark 	ire_t		*ires[MAX_IRE_RECURSION];
1173bd670b35SErik Nordmark 	uint_t		generation;
1174bd670b35SErik Nordmark 	uint_t		generations[MAX_IRE_RECURSION];
1175bd670b35SErik Nordmark 	boolean_t	need_refrele = B_FALSE;
1176bd670b35SErik Nordmark 	boolean_t	invalidate = B_FALSE;
1177bd670b35SErik Nordmark 	ill_t		*ill = NULL;
117801685f97SSowmini Varadhan 	uint_t		maskoff = (IRE_LOCAL|IRE_LOOPBACK);
1179bd670b35SErik Nordmark 
1180bd670b35SErik Nordmark 	if (setsrcp != NULL)
1181bd670b35SErik Nordmark 		ASSERT(IN6_IS_ADDR_UNSPECIFIED(setsrcp));
1182bd670b35SErik Nordmark 	if (gwattrp != NULL)
1183bd670b35SErik Nordmark 		ASSERT(*gwattrp == NULL);
1184bd670b35SErik Nordmark 
1185bd670b35SErik Nordmark 	/*
1186bd670b35SErik Nordmark 	 * We iterate up to three times to resolve a route, even though
1187bd670b35SErik Nordmark 	 * we have four slots in the array. The extra slot is for an
1188bd670b35SErik Nordmark 	 * IRE_IF_CLONE we might need to create.
1189bd670b35SErik Nordmark 	 */
1190bd670b35SErik Nordmark 	i = 0;
1191bd670b35SErik Nordmark 	while (i < MAX_IRE_RECURSION - 1) {
1192bd670b35SErik Nordmark 		/* ire_ftable_lookup handles round-robin/ECMP */
11937c478bd9Sstevel@tonic-gate 		if (ire == NULL) {
1194bd670b35SErik Nordmark 			ire = ire_ftable_lookup_v6(&v6nexthop, 0, 0, ire_type,
119544b099c4SSowmini Varadhan 			    (ill != NULL ? ill : ill_arg), zoneid, tsl,
1196bd670b35SErik Nordmark 			    match_args, xmit_hint, ipst, &generation);
11977c478bd9Sstevel@tonic-gate 		} else {
1198bd670b35SErik Nordmark 			/* Caller passed it; extra hold since we will rele */
1199bd670b35SErik Nordmark 			ire_refhold(ire);
1200bd670b35SErik Nordmark 			if (generationp != NULL)
1201bd670b35SErik Nordmark 				generation = *generationp;
1202bd670b35SErik Nordmark 			else
1203bd670b35SErik Nordmark 				generation = IRE_GENERATION_VERIFY;
12047c478bd9Sstevel@tonic-gate 		}
12057c478bd9Sstevel@tonic-gate 
120601685f97SSowmini Varadhan 		if (ire == NULL) {
120701685f97SSowmini Varadhan 			if (i > 0 && (irr_flags & IRR_INCOMPLETE)) {
12089e3469d3SErik Nordmark 				ire = ires[0];
12099e3469d3SErik Nordmark 				ire_refhold(ire);
12109e3469d3SErik Nordmark 			} else {
1211bd670b35SErik Nordmark 				ire = ire_reject(ipst, B_TRUE);
12129e3469d3SErik Nordmark 			}
1213bd670b35SErik Nordmark 			goto error;
1214bd670b35SErik Nordmark 		}
121501685f97SSowmini Varadhan 
121601685f97SSowmini Varadhan 		/* Need to return the ire with RTF_REJECT|BLACKHOLE */
121701685f97SSowmini Varadhan 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))
121801685f97SSowmini Varadhan 			goto error;
121901685f97SSowmini Varadhan 
122001685f97SSowmini Varadhan 		ASSERT(!(ire->ire_type & IRE_MULTICAST)); /* Not in ftable */
1221*fff7ec1dSSowmini Varadhan 		/*
1222*fff7ec1dSSowmini Varadhan 		 * Verify that the IRE_IF_CLONE has a consistent generation
1223*fff7ec1dSSowmini Varadhan 		 * number.
1224*fff7ec1dSSowmini Varadhan 		 */
1225*fff7ec1dSSowmini Varadhan 		if ((ire->ire_type & IRE_IF_CLONE) && !ire_clone_verify(ire)) {
1226*fff7ec1dSSowmini Varadhan 			ire_refrele(ire);
1227*fff7ec1dSSowmini Varadhan 			ire = NULL;
1228*fff7ec1dSSowmini Varadhan 			continue;
1229*fff7ec1dSSowmini Varadhan 		}
123001685f97SSowmini Varadhan 
123101685f97SSowmini Varadhan 		/*
123201685f97SSowmini Varadhan 		 * Don't allow anything unusual past the first iteration.
123301685f97SSowmini Varadhan 		 * After the first lookup, we should no longer look for
123401685f97SSowmini Varadhan 		 * (IRE_LOCAL|IRE_LOOPBACK) or RTF_INDIRECT routes.
123501685f97SSowmini Varadhan 		 *
123601685f97SSowmini Varadhan 		 * In addition, after we have found a direct IRE_OFFLINK,
123701685f97SSowmini Varadhan 		 * we should only look for interface or clone routes.
123801685f97SSowmini Varadhan 		 */
123901685f97SSowmini Varadhan 		match_args |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */
124001685f97SSowmini Varadhan 		if ((ire->ire_type & IRE_OFFLINK) &&
124101685f97SSowmini Varadhan 		    !(ire->ire_flags & RTF_INDIRECT)) {
124201685f97SSowmini Varadhan 			ire_type = IRE_IF_ALL;
124301685f97SSowmini Varadhan 		} else {
124401685f97SSowmini Varadhan 			if (!(match_args & MATCH_IRE_TYPE))
124501685f97SSowmini Varadhan 				ire_type = (IRE_OFFLINK|IRE_ONLINK);
124601685f97SSowmini Varadhan 			ire_type &= ~maskoff; /* no more LOCAL, LOOPBACK */
1247bd670b35SErik Nordmark 		}
124801685f97SSowmini Varadhan 		match_args |= MATCH_IRE_TYPE;
1249bd670b35SErik Nordmark 		/* We have a usable IRE */
1250bd670b35SErik Nordmark 		ires[i] = ire;
1251bd670b35SErik Nordmark 		generations[i] = generation;
1252bd670b35SErik Nordmark 		i++;
1253bd670b35SErik Nordmark 
1254bd670b35SErik Nordmark 		/* The first RTF_SETSRC address is passed back if setsrcp */
1255bd670b35SErik Nordmark 		if ((ire->ire_flags & RTF_SETSRC) &&
1256bd670b35SErik Nordmark 		    setsrcp != NULL && IN6_IS_ADDR_UNSPECIFIED(setsrcp)) {
1257bd670b35SErik Nordmark 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(
1258bd670b35SErik Nordmark 			    &ire->ire_setsrc_addr_v6));
1259bd670b35SErik Nordmark 			*setsrcp = ire->ire_setsrc_addr_v6;
12607c478bd9Sstevel@tonic-gate 		}
12617c478bd9Sstevel@tonic-gate 
1262bd670b35SErik Nordmark 		/* The first ire_gw_secattr is passed back if gwattrp */
1263bd670b35SErik Nordmark 		if (ire->ire_gw_secattr != NULL &&
1264bd670b35SErik Nordmark 		    gwattrp != NULL && *gwattrp == NULL)
1265bd670b35SErik Nordmark 			*gwattrp = ire->ire_gw_secattr;
1266bd670b35SErik Nordmark 
1267bd670b35SErik Nordmark 		/*
1268bd670b35SErik Nordmark 		 * Check if we have a short-cut pointer to an IRE for this
1269bd670b35SErik Nordmark 		 * destination, and that the cached dependency isn't stale.
1270bd670b35SErik Nordmark 		 * In that case we've rejoined an existing tree towards a
1271bd670b35SErik Nordmark 		 * parent, thus we don't need to continue the loop to
1272bd670b35SErik Nordmark 		 * discover the rest of the tree.
1273bd670b35SErik Nordmark 		 */
12747c478bd9Sstevel@tonic-gate 		mutex_enter(&ire->ire_lock);
1275bd670b35SErik Nordmark 		if (ire->ire_dep_parent != NULL &&
1276bd670b35SErik Nordmark 		    ire->ire_dep_parent->ire_generation ==
1277bd670b35SErik Nordmark 		    ire->ire_dep_parent_generation) {
12787c478bd9Sstevel@tonic-gate 			mutex_exit(&ire->ire_lock);
1279bd670b35SErik Nordmark 			ire = NULL;
1280bd670b35SErik Nordmark 			goto done;
1281bd670b35SErik Nordmark 		}
1282bd670b35SErik Nordmark 		mutex_exit(&ire->ire_lock);
12837c478bd9Sstevel@tonic-gate 
1284bd670b35SErik Nordmark 		/*
1285bd670b35SErik Nordmark 		 * If this type should have an ire_nce_cache (even if it
1286bd670b35SErik Nordmark 		 * doesn't yet have one) then we are done. Includes
1287bd670b35SErik Nordmark 		 * IRE_INTERFACE with a full 128 bit mask.
1288bd670b35SErik Nordmark 		 */
1289bd670b35SErik Nordmark 		if (ire->ire_nce_capable) {
1290bd670b35SErik Nordmark 			ire = NULL;
1291bd670b35SErik Nordmark 			goto done;
12927c478bd9Sstevel@tonic-gate 		}
1293bd670b35SErik Nordmark 		ASSERT(!(ire->ire_type & IRE_IF_CLONE));
1294bd670b35SErik Nordmark 		/*
1295bd670b35SErik Nordmark 		 * For an IRE_INTERFACE we create an IRE_IF_CLONE for this
1296bd670b35SErik Nordmark 		 * particular destination
1297bd670b35SErik Nordmark 		 */
1298bd670b35SErik Nordmark 		if (ire->ire_type & IRE_INTERFACE) {
1299bd670b35SErik Nordmark 			ire_t		*clone;
13007c478bd9Sstevel@tonic-gate 
1301bd670b35SErik Nordmark 			ASSERT(ire->ire_masklen != IPV6_ABITS);
13027c478bd9Sstevel@tonic-gate 
1303bd670b35SErik Nordmark 			/*
1304bd670b35SErik Nordmark 			 * In the case of ip_input and ILLF_FORWARDING not
13059e3469d3SErik Nordmark 			 * being set, and in the case of RTM_GET, there is
13069e3469d3SErik Nordmark 			 * no point in allocating an IRE_IF_CLONE. We return
13079e3469d3SErik Nordmark 			 * the IRE_INTERFACE. Note that !IRR_ALLOCATE can
13089e3469d3SErik Nordmark 			 * result in a ire_dep_parent which is IRE_IF_*
13099e3469d3SErik Nordmark 			 * without an IRE_IF_CLONE.
1310bd670b35SErik Nordmark 			 * We recover from that when we need to send packets
1311bd670b35SErik Nordmark 			 * by ensuring that the generations become
1312bd670b35SErik Nordmark 			 * IRE_GENERATION_VERIFY in this case.
1313bd670b35SErik Nordmark 			 */
13149e3469d3SErik Nordmark 			if (!(irr_flags & IRR_ALLOCATE)) {
1315bd670b35SErik Nordmark 				invalidate = B_TRUE;
1316bd670b35SErik Nordmark 				ire = NULL;
1317bd670b35SErik Nordmark 				goto done;
1318bd670b35SErik Nordmark 			}
1319bd670b35SErik Nordmark 
1320bd670b35SErik Nordmark 			clone = ire_create_if_clone(ire, &v6nexthop,
1321bd670b35SErik Nordmark 			    &generation);
1322bd670b35SErik Nordmark 			if (clone == NULL) {
1323bd670b35SErik Nordmark 				/*
1324bd670b35SErik Nordmark 				 * Temporary failure - no memory.
1325bd670b35SErik Nordmark 				 * Don't want caller to cache IRE_NOROUTE.
1326bd670b35SErik Nordmark 				 */
1327bd670b35SErik Nordmark 				invalidate = B_TRUE;
1328bd670b35SErik Nordmark 				ire = ire_blackhole(ipst, B_TRUE);
1329bd670b35SErik Nordmark 				goto error;
1330bd670b35SErik Nordmark 			}
1331bd670b35SErik Nordmark 			/*
1332bd670b35SErik Nordmark 			 * Make clone next to last entry and the
1333bd670b35SErik Nordmark 			 * IRE_INTERFACE the last in the dependency
1334bd670b35SErik Nordmark 			 * chain since the clone depends on the
1335bd670b35SErik Nordmark 			 * IRE_INTERFACE.
1336bd670b35SErik Nordmark 			 */
1337bd670b35SErik Nordmark 			ASSERT(i >= 1);
1338bd670b35SErik Nordmark 			ASSERT(i < MAX_IRE_RECURSION);
1339bd670b35SErik Nordmark 
1340bd670b35SErik Nordmark 			ires[i] = ires[i-1];
1341bd670b35SErik Nordmark 			generations[i] = generations[i-1];
1342bd670b35SErik Nordmark 			ires[i-1] = clone;
1343bd670b35SErik Nordmark 			generations[i-1] = generation;
1344bd670b35SErik Nordmark 			i++;
1345bd670b35SErik Nordmark 
1346bd670b35SErik Nordmark 			ire = NULL;
1347bd670b35SErik Nordmark 			goto done;
13487c478bd9Sstevel@tonic-gate 		}
13495b17e9bdSJon Anderson 
13505b17e9bdSJon Anderson 		/*
1351bd670b35SErik Nordmark 		 * We only match on the type and optionally ILL when
1352bd670b35SErik Nordmark 		 * recursing. The type match is used by some callers
1353bd670b35SErik Nordmark 		 * to exclude certain types (such as IRE_IF_CLONE or
1354bd670b35SErik Nordmark 		 * IRE_LOCAL|IRE_LOOPBACK).
135544b099c4SSowmini Varadhan 		 *
135644b099c4SSowmini Varadhan 		 * In the MATCH_IRE_SRC_ILL case, ill_arg may be the 'srcof'
135744b099c4SSowmini Varadhan 		 * ire->ire_ill, and we want to find the IRE_INTERFACE for
135844b099c4SSowmini Varadhan 		 * ire_ill, so we set ill to the ire_ill
13595b17e9bdSJon Anderson 		 */
136001685f97SSowmini Varadhan 		match_args &= (MATCH_IRE_TYPE | MATCH_IRE_DIRECT);
1361bd670b35SErik Nordmark 		v6nexthop = ire->ire_gateway_addr_v6;
1362bd670b35SErik Nordmark 		if (ill == NULL && ire->ire_ill != NULL) {
1363bd670b35SErik Nordmark 			ill = ire->ire_ill;
1364bd670b35SErik Nordmark 			need_refrele = B_TRUE;
1365bd670b35SErik Nordmark 			ill_refhold(ill);
1366bd670b35SErik Nordmark 			match_args |= MATCH_IRE_ILL;
13675b17e9bdSJon Anderson 		}
1368bd670b35SErik Nordmark 		ire = NULL;
1369bd670b35SErik Nordmark 	}
1370bd670b35SErik Nordmark 	ASSERT(ire == NULL);
1371bd670b35SErik Nordmark 	ire = ire_reject(ipst, B_TRUE);
1372bd670b35SErik Nordmark 
1373bd670b35SErik Nordmark error:
1374bd670b35SErik Nordmark 	ASSERT(ire != NULL);
1375bd670b35SErik Nordmark 	if (need_refrele)
1376bd670b35SErik Nordmark 		ill_refrele(ill);
1377bd670b35SErik Nordmark 
1378bd670b35SErik Nordmark 	/*
1379bd670b35SErik Nordmark 	 * In the case of MULTIRT we want to try a different IRE the next
1380bd670b35SErik Nordmark 	 * time. We let the next packet retry in that case.
1381bd670b35SErik Nordmark 	 */
1382bd670b35SErik Nordmark 	if (i > 0 && (ires[0]->ire_flags & RTF_MULTIRT))
1383bd670b35SErik Nordmark 		(void) ire_no_good(ires[0]);
1384bd670b35SErik Nordmark 
1385bd670b35SErik Nordmark cleanup:
1386bd670b35SErik Nordmark 	/* cleanup ires[i] */
1387bd670b35SErik Nordmark 	ire_dep_unbuild(ires, i);
1388bd670b35SErik Nordmark 	for (j = 0; j < i; j++)
1389bd670b35SErik Nordmark 		ire_refrele(ires[j]);
1390bd670b35SErik Nordmark 
13919e3469d3SErik Nordmark 	ASSERT((ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
13929e3469d3SErik Nordmark 	    (irr_flags & IRR_INCOMPLETE));
1393bd670b35SErik Nordmark 	/*
1394bd670b35SErik Nordmark 	 * Use IRE_GENERATION_VERIFY to ensure that ip_output will redo the
1395bd670b35SErik Nordmark 	 * ip_select_route since the reject or lack of memory might be gone.
1396bd670b35SErik Nordmark 	 */
1397bd670b35SErik Nordmark 	if (generationp != NULL)
1398bd670b35SErik Nordmark 		*generationp = IRE_GENERATION_VERIFY;
1399bd670b35SErik Nordmark 	return (ire);
1400bd670b35SErik Nordmark 
1401bd670b35SErik Nordmark done:
1402bd670b35SErik Nordmark 	ASSERT(ire == NULL);
1403bd670b35SErik Nordmark 	if (need_refrele)
1404bd670b35SErik Nordmark 		ill_refrele(ill);
1405bd670b35SErik Nordmark 
1406bd670b35SErik Nordmark 	/* Build dependencies */
1407188e1664SErik Nordmark 	if (i > 1 && !ire_dep_build(ires, generations, i)) {
1408bd670b35SErik Nordmark 		/* Something in chain was condemned; tear it apart */
1409bd670b35SErik Nordmark 		ire = ire_blackhole(ipst, B_TRUE);
1410bd670b35SErik Nordmark 		goto cleanup;
1411bd670b35SErik Nordmark 	}
1412bd670b35SErik Nordmark 
1413bd670b35SErik Nordmark 	/*
1414bd670b35SErik Nordmark 	 * Release all refholds except the one for ires[0] that we
1415bd670b35SErik Nordmark 	 * will return to the caller.
1416bd670b35SErik Nordmark 	 */
1417bd670b35SErik Nordmark 	for (j = 1; j < i; j++)
1418bd670b35SErik Nordmark 		ire_refrele(ires[j]);
1419bd670b35SErik Nordmark 
1420bd670b35SErik Nordmark 	if (invalidate) {
1421bd670b35SErik Nordmark 		/*
1422bd670b35SErik Nordmark 		 * Since we needed to allocate but couldn't we need to make
1423bd670b35SErik Nordmark 		 * sure that the dependency chain is rebuilt the next time.
1424bd670b35SErik Nordmark 		 */
1425bd670b35SErik Nordmark 		ire_dep_invalidate_generations(ires[0]);
1426bd670b35SErik Nordmark 		generation = IRE_GENERATION_VERIFY;
1427bd670b35SErik Nordmark 	} else {
1428bd670b35SErik Nordmark 		/*
1429bd670b35SErik Nordmark 		 * IREs can have been added or deleted while we did the
1430bd670b35SErik Nordmark 		 * recursive lookup and we can't catch those until we've built
1431bd670b35SErik Nordmark 		 * the dependencies. We verify the stored
1432bd670b35SErik Nordmark 		 * ire_dep_parent_generation to catch any such changes and
1433bd670b35SErik Nordmark 		 * return IRE_GENERATION_VERIFY (which will cause
1434bd670b35SErik Nordmark 		 * ip_select_route to be called again so we can redo the
1435bd670b35SErik Nordmark 		 * recursive lookup next time we send a packet.
1436bd670b35SErik Nordmark 		 */
1437188e1664SErik Nordmark 		if (ires[0]->ire_dep_parent == NULL)
1438188e1664SErik Nordmark 			generation = ires[0]->ire_generation;
1439188e1664SErik Nordmark 		else
1440bd670b35SErik Nordmark 			generation = ire_dep_validate_generations(ires[0]);
1441bd670b35SErik Nordmark 		if (generations[0] != ires[0]->ire_generation) {
1442bd670b35SErik Nordmark 			/* Something changed at the top */
1443bd670b35SErik Nordmark 			generation = IRE_GENERATION_VERIFY;
1444bd670b35SErik Nordmark 		}
1445bd670b35SErik Nordmark 	}
1446bd670b35SErik Nordmark 	if (generationp != NULL)
1447bd670b35SErik Nordmark 		*generationp = generation;
1448bd670b35SErik Nordmark 
1449bd670b35SErik Nordmark 	return (ires[0]);
1450bd670b35SErik Nordmark }
1451bd670b35SErik Nordmark 
1452bd670b35SErik Nordmark ire_t *
1453bd670b35SErik Nordmark ire_route_recursive_v6(const in6_addr_t *nexthop, uint_t ire_type,
1454bd670b35SErik Nordmark     const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args,
14559e3469d3SErik Nordmark     uint_t irr_flags, uint32_t xmit_hint, ip_stack_t *ipst,
1456bd670b35SErik Nordmark     in6_addr_t *setsrcp, tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp)
1457bd670b35SErik Nordmark {
1458bd670b35SErik Nordmark 	return (ire_route_recursive_impl_v6(NULL, nexthop, ire_type, ill,
14599e3469d3SErik Nordmark 	    zoneid, tsl, match_args, irr_flags, xmit_hint, ipst, setsrcp,
1460bd670b35SErik Nordmark 	    gwattrp, generationp));
1461bd670b35SErik Nordmark }
1462bd670b35SErik Nordmark 
1463bd670b35SErik Nordmark /*
1464bd670b35SErik Nordmark  * Recursively look for a route to the destination.
1465bd670b35SErik Nordmark  * We only handle a destination match here, yet we have the same arguments
1466bd670b35SErik Nordmark  * as the full match to allow function pointers to select between the two.
1467bd670b35SErik Nordmark  *
1468bd670b35SErik Nordmark  * Note that this function never returns NULL. It returns an IRE_NOROUTE
1469bd670b35SErik Nordmark  * instead.
1470bd670b35SErik Nordmark  *
1471bd670b35SErik Nordmark  * If we find any IRE_LOCAL|BROADCAST etc past the first iteration it
1472bd670b35SErik Nordmark  * is an error.
1473bd670b35SErik Nordmark  * Allow at most one RTF_INDIRECT.
1474bd670b35SErik Nordmark  */
1475bd670b35SErik Nordmark ire_t *
14769e3469d3SErik Nordmark ire_route_recursive_dstonly_v6(const in6_addr_t *nexthop, uint_t irr_flags,
1477bd670b35SErik Nordmark     uint32_t xmit_hint, ip_stack_t *ipst)
1478bd670b35SErik Nordmark {
1479bd670b35SErik Nordmark 	ire_t	*ire;
1480bd670b35SErik Nordmark 	ire_t	*ire1;
1481bd670b35SErik Nordmark 	uint_t	generation;
1482bd670b35SErik Nordmark 
1483bd670b35SErik Nordmark 	/* ire_ftable_lookup handles round-robin/ECMP */
1484bd670b35SErik Nordmark 	ire = ire_ftable_lookup_simple_v6(nexthop, xmit_hint, ipst,
1485bd670b35SErik Nordmark 	    &generation);
1486bd670b35SErik Nordmark 	ASSERT(ire != NULL);
1487bd670b35SErik Nordmark 
1488bd670b35SErik Nordmark 	/*
1489*fff7ec1dSSowmini Varadhan 	 * If the IRE has a current cached parent we know that the whole
1490*fff7ec1dSSowmini Varadhan 	 * parent chain is current, hence we don't need to discover and
1491*fff7ec1dSSowmini Varadhan 	 * build any dependencies by doing a recursive lookup.
1492*fff7ec1dSSowmini Varadhan 	 */
1493*fff7ec1dSSowmini Varadhan 	mutex_enter(&ire->ire_lock);
1494*fff7ec1dSSowmini Varadhan 	if (ire->ire_dep_parent != NULL) {
1495*fff7ec1dSSowmini Varadhan 		if (ire->ire_dep_parent->ire_generation ==
1496*fff7ec1dSSowmini Varadhan 		    ire->ire_dep_parent_generation) {
1497*fff7ec1dSSowmini Varadhan 			mutex_exit(&ire->ire_lock);
1498*fff7ec1dSSowmini Varadhan 			return (ire);
1499*fff7ec1dSSowmini Varadhan 		}
1500*fff7ec1dSSowmini Varadhan 		mutex_exit(&ire->ire_lock);
1501*fff7ec1dSSowmini Varadhan 	} else {
1502*fff7ec1dSSowmini Varadhan 		mutex_exit(&ire->ire_lock);
1503*fff7ec1dSSowmini Varadhan 		/*
1504bd670b35SErik Nordmark 		 * If this type should have an ire_nce_cache (even if it
1505bd670b35SErik Nordmark 		 * doesn't yet have one) then we are done. Includes
1506bd670b35SErik Nordmark 		 * IRE_INTERFACE with a full 128 bit mask.
1507bd670b35SErik Nordmark 		 */
1508bd670b35SErik Nordmark 		if (ire->ire_nce_capable)
1509bd670b35SErik Nordmark 			return (ire);
15105b17e9bdSJon Anderson 	}
15115b17e9bdSJon Anderson 
1512bd670b35SErik Nordmark 	/*
1513bd670b35SErik Nordmark 	 * Fallback to loop in the normal code starting with the ire
1514bd670b35SErik Nordmark 	 * we found. Normally this would return the same ire.
1515bd670b35SErik Nordmark 	 */
1516bd670b35SErik Nordmark 	ire1 = ire_route_recursive_impl_v6(ire, nexthop, 0, NULL, ALL_ZONES,
15179e3469d3SErik Nordmark 	    NULL, MATCH_IRE_DSTONLY, irr_flags, xmit_hint, ipst, NULL, NULL,
1518bd670b35SErik Nordmark 	    &generation);
1519bd670b35SErik Nordmark 	ire_refrele(ire);
1520bd670b35SErik Nordmark 	return (ire1);
15215b17e9bdSJon Anderson }
1522