17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 545916cd2Sjpk * Common Development and Distribution License (the "License"). 645916cd2Sjpk * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 2278377681SSowmini Varadhan * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 237c478bd9Sstevel@tonic-gate * Copyright (c) 1990 Mentat Inc. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * This file contains routines that manipulate Internet Routing Entries (IREs). 287c478bd9Sstevel@tonic-gate */ 297c478bd9Sstevel@tonic-gate #include <sys/types.h> 307c478bd9Sstevel@tonic-gate #include <sys/stream.h> 317c478bd9Sstevel@tonic-gate #include <sys/stropts.h> 327c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 337c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 347c478bd9Sstevel@tonic-gate 357c478bd9Sstevel@tonic-gate #include <sys/systm.h> 367c478bd9Sstevel@tonic-gate #include <sys/param.h> 377c478bd9Sstevel@tonic-gate #include <sys/socket.h> 387c478bd9Sstevel@tonic-gate #include <net/if.h> 397c478bd9Sstevel@tonic-gate #include <net/route.h> 407c478bd9Sstevel@tonic-gate #include <netinet/in.h> 417c478bd9Sstevel@tonic-gate #include <net/if_dl.h> 427c478bd9Sstevel@tonic-gate #include <netinet/ip6.h> 437c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h> 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate #include <inet/common.h> 467c478bd9Sstevel@tonic-gate #include <inet/mi.h> 477c478bd9Sstevel@tonic-gate #include <inet/ip.h> 487c478bd9Sstevel@tonic-gate #include <inet/ip6.h> 497c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h> 507c478bd9Sstevel@tonic-gate #include <inet/ip_if.h> 517c478bd9Sstevel@tonic-gate #include <inet/ip_ire.h> 527c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h> 537c478bd9Sstevel@tonic-gate #include <inet/nd.h> 546e91bba0SGirish Moodalbail #include <inet/tunables.h> 557c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 567c478bd9Sstevel@tonic-gate #include <sys/zone.h> 577c478bd9Sstevel@tonic-gate 5845916cd2Sjpk #include <sys/tsol/label.h> 5945916cd2Sjpk #include <sys/tsol/tnet.h> 6045916cd2Sjpk 61bd670b35SErik Nordmark #define IS_DEFAULT_ROUTE_V6(ire) \ 62bd670b35SErik Nordmark (((ire)->ire_type & IRE_DEFAULT) || \ 63bd670b35SErik Nordmark (((ire)->ire_type & IRE_INTERFACE) && \ 64bd670b35SErik Nordmark (IN6_IS_ADDR_UNSPECIFIED(&(ire)->ire_addr_v6)))) 65bd670b35SErik Nordmark 667c478bd9Sstevel@tonic-gate static ire_t ire_null; 677c478bd9Sstevel@tonic-gate 68bd670b35SErik Nordmark static ire_t * 69bd670b35SErik Nordmark ire_ftable_lookup_impl_v6(const in6_addr_t *addr, const in6_addr_t *mask, 70bd670b35SErik Nordmark const in6_addr_t *gateway, int type, const ill_t *ill, 71bd670b35SErik Nordmark zoneid_t zoneid, const ts_label_t *tsl, int flags, 72bd670b35SErik Nordmark ip_stack_t *ipst); 737c478bd9Sstevel@tonic-gate 747c478bd9Sstevel@tonic-gate /* 757c478bd9Sstevel@tonic-gate * Initialize the ire that is specific to IPv6 part and call 767c478bd9Sstevel@tonic-gate * ire_init_common to finish it. 77bd670b35SErik Nordmark * Returns zero or errno. 787c478bd9Sstevel@tonic-gate */ 79bd670b35SErik Nordmark int 8054da8755Ssowmini ire_init_v6(ire_t *ire, const in6_addr_t *v6addr, const in6_addr_t *v6mask, 81bd670b35SErik Nordmark const in6_addr_t *v6gateway, ushort_t type, ill_t *ill, 82bd670b35SErik Nordmark zoneid_t zoneid, uint_t flags, tsol_gc_t *gc, ip_stack_t *ipst) 837c478bd9Sstevel@tonic-gate { 84bd670b35SErik Nordmark int error; 85c793af95Ssangeeta 8645916cd2Sjpk /* 87bd670b35SErik Nordmark * Reject IRE security attmakeribute creation/initialization 8845916cd2Sjpk * if system is not running in Trusted mode. 8945916cd2Sjpk */ 90bd670b35SErik Nordmark if (gc != NULL && !is_system_labeled()) 91bd670b35SErik Nordmark return (EINVAL); 927c478bd9Sstevel@tonic-gate 93f4b3ec61Sdh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_alloced); 94bd670b35SErik Nordmark if (v6addr != NULL) 957c478bd9Sstevel@tonic-gate ire->ire_addr_v6 = *v6addr; 967c478bd9Sstevel@tonic-gate if (v6gateway != NULL) 977c478bd9Sstevel@tonic-gate ire->ire_gateway_addr_v6 = *v6gateway; 987c478bd9Sstevel@tonic-gate 99bd670b35SErik Nordmark /* Make sure we don't have stray values in some fields */ 100bd670b35SErik Nordmark switch (type) { 101bd670b35SErik Nordmark case IRE_LOOPBACK: 102bd670b35SErik Nordmark case IRE_HOST: 103bd670b35SErik Nordmark case IRE_LOCAL: 104bd670b35SErik Nordmark case IRE_IF_CLONE: 105bd670b35SErik Nordmark ire->ire_mask_v6 = ipv6_all_ones; 106bd670b35SErik Nordmark ire->ire_masklen = IPV6_ABITS; 107bd670b35SErik Nordmark break; 108bd670b35SErik Nordmark case IRE_PREFIX: 109bd670b35SErik Nordmark case IRE_DEFAULT: 110bd670b35SErik Nordmark case IRE_IF_RESOLVER: 111bd670b35SErik Nordmark case IRE_IF_NORESOLVER: 112bd670b35SErik Nordmark if (v6mask != NULL) { 113bd670b35SErik Nordmark ire->ire_mask_v6 = *v6mask; 114bd670b35SErik Nordmark ire->ire_masklen = 115bd670b35SErik Nordmark ip_mask_to_plen_v6(&ire->ire_mask_v6); 116bd670b35SErik Nordmark } 117bd670b35SErik Nordmark break; 118bd670b35SErik Nordmark case IRE_MULTICAST: 119bd670b35SErik Nordmark case IRE_NOROUTE: 120bd670b35SErik Nordmark ASSERT(v6mask == NULL); 121bd670b35SErik Nordmark break; 122bd670b35SErik Nordmark default: 123bd670b35SErik Nordmark ASSERT(0); 124bd670b35SErik Nordmark return (EINVAL); 125bd670b35SErik Nordmark } 1267c478bd9Sstevel@tonic-gate 127bd670b35SErik Nordmark error = ire_init_common(ire, type, ill, zoneid, flags, IPV6_VERSION, 128bd670b35SErik Nordmark gc, ipst); 129bd670b35SErik Nordmark if (error != NULL) 130bd670b35SErik Nordmark return (error); 131bd670b35SErik Nordmark 132bd670b35SErik Nordmark /* Determine which function pointers to use */ 133bd670b35SErik Nordmark ire->ire_postfragfn = ip_xmit; /* Common case */ 134bd670b35SErik Nordmark 135bd670b35SErik Nordmark switch (ire->ire_type) { 136bd670b35SErik Nordmark case IRE_LOCAL: 137bd670b35SErik Nordmark ire->ire_sendfn = ire_send_local_v6; 138bd670b35SErik Nordmark ire->ire_recvfn = ire_recv_local_v6; 139bd670b35SErik Nordmark ASSERT(ire->ire_ill != NULL); 1401cb875aeSCathy Zhou if (ire->ire_ill->ill_flags & ILLF_NOACCEPT) 141bd670b35SErik Nordmark ire->ire_recvfn = ire_recv_noaccept_v6; 142bd670b35SErik Nordmark break; 143bd670b35SErik Nordmark case IRE_LOOPBACK: 144bd670b35SErik Nordmark ire->ire_sendfn = ire_send_local_v6; 145bd670b35SErik Nordmark ire->ire_recvfn = ire_recv_loopback_v6; 146bd670b35SErik Nordmark break; 147bd670b35SErik Nordmark case IRE_MULTICAST: 148bd670b35SErik Nordmark ire->ire_postfragfn = ip_postfrag_loopcheck; 149bd670b35SErik Nordmark ire->ire_sendfn = ire_send_multicast_v6; 150bd670b35SErik Nordmark ire->ire_recvfn = ire_recv_multicast_v6; 151bd670b35SErik Nordmark break; 152bd670b35SErik Nordmark default: 1537c478bd9Sstevel@tonic-gate /* 154bd670b35SErik Nordmark * For IRE_IF_ALL and IRE_OFFLINK we forward received 155bd670b35SErik Nordmark * packets by default. 1567c478bd9Sstevel@tonic-gate */ 157bd670b35SErik Nordmark ire->ire_sendfn = ire_send_wire_v6; 158bd670b35SErik Nordmark ire->ire_recvfn = ire_recv_forward_v6; 159bd670b35SErik Nordmark break; 1607c478bd9Sstevel@tonic-gate } 161bd670b35SErik Nordmark if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 162bd670b35SErik Nordmark ire->ire_sendfn = ire_send_noroute_v6; 163bd670b35SErik Nordmark ire->ire_recvfn = ire_recv_noroute_v6; 164bd670b35SErik Nordmark } else if (ire->ire_flags & RTF_MULTIRT) { 165bd670b35SErik Nordmark ire->ire_postfragfn = ip_postfrag_multirt_v6; 166bd670b35SErik Nordmark ire->ire_sendfn = ire_send_multirt_v6; 167bd670b35SErik Nordmark ire->ire_recvfn = ire_recv_multirt_v6; 1687c478bd9Sstevel@tonic-gate } 169bd670b35SErik Nordmark ire->ire_nce_capable = ire_determine_nce_capable(ire); 170bd670b35SErik Nordmark return (0); 1717c478bd9Sstevel@tonic-gate } 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate /* 1747c478bd9Sstevel@tonic-gate * ire_create_v6 is called to allocate and initialize a new IRE. 1757c478bd9Sstevel@tonic-gate * 1767c478bd9Sstevel@tonic-gate * NOTE : This is called as writer sometimes though not required 1777c478bd9Sstevel@tonic-gate * by this function. 1787c478bd9Sstevel@tonic-gate */ 17954da8755Ssowmini /* ARGSUSED */ 1807c478bd9Sstevel@tonic-gate ire_t * 1817c478bd9Sstevel@tonic-gate ire_create_v6(const in6_addr_t *v6addr, const in6_addr_t *v6mask, 182bd670b35SErik Nordmark const in6_addr_t *v6gateway, ushort_t type, ill_t *ill, zoneid_t zoneid, 183bd670b35SErik Nordmark uint_t flags, tsol_gc_t *gc, ip_stack_t *ipst) 1847c478bd9Sstevel@tonic-gate { 1857c478bd9Sstevel@tonic-gate ire_t *ire; 186bd670b35SErik Nordmark int error; 1877c478bd9Sstevel@tonic-gate 1887c478bd9Sstevel@tonic-gate ASSERT(!IN6_IS_ADDR_V4MAPPED(v6addr)); 1897c478bd9Sstevel@tonic-gate 1907c478bd9Sstevel@tonic-gate ire = kmem_cache_alloc(ire_cache, KM_NOSLEEP); 1917c478bd9Sstevel@tonic-gate if (ire == NULL) { 192bd670b35SErik Nordmark DTRACE_PROBE(kmem__cache__alloc); 1937c478bd9Sstevel@tonic-gate return (NULL); 1947c478bd9Sstevel@tonic-gate } 1957c478bd9Sstevel@tonic-gate *ire = ire_null; 1967c478bd9Sstevel@tonic-gate 197bd670b35SErik Nordmark error = ire_init_v6(ire, v6addr, v6mask, v6gateway, 198bd670b35SErik Nordmark type, ill, zoneid, flags, gc, ipst); 1997c478bd9Sstevel@tonic-gate 200bd670b35SErik Nordmark if (error != 0) { 201bd670b35SErik Nordmark DTRACE_PROBE2(ire__init__v6, ire_t *, ire, int, error); 2027c478bd9Sstevel@tonic-gate kmem_cache_free(ire_cache, ire); 2037c478bd9Sstevel@tonic-gate return (NULL); 2047c478bd9Sstevel@tonic-gate } 2057c478bd9Sstevel@tonic-gate return (ire); 2067c478bd9Sstevel@tonic-gate } 2077c478bd9Sstevel@tonic-gate 2087c478bd9Sstevel@tonic-gate /* 209bd670b35SErik Nordmark * Find the ill matching a multicast group. 2107c478bd9Sstevel@tonic-gate * Allows different routes for multicast addresses 2117c478bd9Sstevel@tonic-gate * in the unicast routing table (akin to FF::0/8 but could be more specific) 2127c478bd9Sstevel@tonic-gate * which point at different interfaces. This is used when IPV6_MULTICAST_IF 2137c478bd9Sstevel@tonic-gate * isn't specified (when sending) and when IPV6_JOIN_GROUP doesn't 2147c478bd9Sstevel@tonic-gate * specify the interface to join on. 2157c478bd9Sstevel@tonic-gate * 216bd670b35SErik Nordmark * Supports link-local addresses by using ire_route_recursive which follows 217bd670b35SErik Nordmark * the ill when recursing. 2187c478bd9Sstevel@tonic-gate * 219bd670b35SErik Nordmark * To handle CGTP, since we don't have a separate IRE_MULTICAST for each group 220bd670b35SErik Nordmark * and the MULTIRT property can be different for different groups, we 221bd670b35SErik Nordmark * extract RTF_MULTIRT from the special unicast route added for a group 222bd670b35SErik Nordmark * with CGTP and pass that back in the multirtp argument. 223bd670b35SErik Nordmark * This is used in ip_set_destination etc to set ixa_postfragfn for multicast. 224bd670b35SErik Nordmark * We have a setsrcp argument for the same reason. 2257c478bd9Sstevel@tonic-gate */ 226bd670b35SErik Nordmark ill_t * 227bd670b35SErik Nordmark ire_lookup_multi_ill_v6(const in6_addr_t *group, zoneid_t zoneid, 228bd670b35SErik Nordmark ip_stack_t *ipst, boolean_t *multirtp, in6_addr_t *setsrcp) 229bd670b35SErik Nordmark { 230bd670b35SErik Nordmark ire_t *ire; 231bd670b35SErik Nordmark ill_t *ill; 2327c478bd9Sstevel@tonic-gate 233bd670b35SErik Nordmark ire = ire_route_recursive_v6(group, 0, NULL, zoneid, NULL, 2349e3469d3SErik Nordmark MATCH_IRE_DSTONLY, IRR_NONE, 0, ipst, setsrcp, NULL, NULL); 235bd670b35SErik Nordmark ASSERT(ire != NULL); 236bd670b35SErik Nordmark 237bd670b35SErik Nordmark if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 2387c478bd9Sstevel@tonic-gate ire_refrele(ire); 2397c478bd9Sstevel@tonic-gate return (NULL); 2407c478bd9Sstevel@tonic-gate } 2417c478bd9Sstevel@tonic-gate 242bd670b35SErik Nordmark if (multirtp != NULL) 243bd670b35SErik Nordmark *multirtp = (ire->ire_flags & RTF_MULTIRT) != 0; 2447c478bd9Sstevel@tonic-gate 245bd670b35SErik Nordmark ill = ire_nexthop_ill(ire); 246bd670b35SErik Nordmark ire_refrele(ire); 247bd670b35SErik Nordmark return (ill); 2487c478bd9Sstevel@tonic-gate } 2497c478bd9Sstevel@tonic-gate 2507c478bd9Sstevel@tonic-gate /* 2517c478bd9Sstevel@tonic-gate * This function takes a mask and returns number of bits set in the 2527c478bd9Sstevel@tonic-gate * mask (the represented prefix length). Assumes a contiguous mask. 2537c478bd9Sstevel@tonic-gate */ 2547c478bd9Sstevel@tonic-gate int 2557c478bd9Sstevel@tonic-gate ip_mask_to_plen_v6(const in6_addr_t *v6mask) 2567c478bd9Sstevel@tonic-gate { 2577c478bd9Sstevel@tonic-gate int bits; 2587c478bd9Sstevel@tonic-gate int plen = IPV6_ABITS; 2597c478bd9Sstevel@tonic-gate int i; 2607c478bd9Sstevel@tonic-gate 2617c478bd9Sstevel@tonic-gate for (i = 3; i >= 0; i--) { 2627c478bd9Sstevel@tonic-gate if (v6mask->s6_addr32[i] == 0) { 2637c478bd9Sstevel@tonic-gate plen -= 32; 2647c478bd9Sstevel@tonic-gate continue; 2657c478bd9Sstevel@tonic-gate } 2667c478bd9Sstevel@tonic-gate bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1; 2677c478bd9Sstevel@tonic-gate if (bits == 0) 2687c478bd9Sstevel@tonic-gate break; 2697c478bd9Sstevel@tonic-gate plen -= bits; 2707c478bd9Sstevel@tonic-gate } 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate return (plen); 2737c478bd9Sstevel@tonic-gate } 2747c478bd9Sstevel@tonic-gate 2757c478bd9Sstevel@tonic-gate /* 2767c478bd9Sstevel@tonic-gate * Convert a prefix length to the mask for that prefix. 2777c478bd9Sstevel@tonic-gate * Returns the argument bitmask. 2787c478bd9Sstevel@tonic-gate */ 2797c478bd9Sstevel@tonic-gate in6_addr_t * 2807c478bd9Sstevel@tonic-gate ip_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask) 2817c478bd9Sstevel@tonic-gate { 2827c478bd9Sstevel@tonic-gate uint32_t *ptr; 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate if (plen < 0 || plen > IPV6_ABITS) 2857c478bd9Sstevel@tonic-gate return (NULL); 2867c478bd9Sstevel@tonic-gate *bitmask = ipv6_all_zeros; 287bd670b35SErik Nordmark if (plen == 0) 288bd670b35SErik Nordmark return (bitmask); 2897c478bd9Sstevel@tonic-gate 2907c478bd9Sstevel@tonic-gate ptr = (uint32_t *)bitmask; 2917c478bd9Sstevel@tonic-gate while (plen > 32) { 2927c478bd9Sstevel@tonic-gate *ptr++ = 0xffffffffU; 2937c478bd9Sstevel@tonic-gate plen -= 32; 2947c478bd9Sstevel@tonic-gate } 2957c478bd9Sstevel@tonic-gate *ptr = htonl(0xffffffffU << (32 - plen)); 2967c478bd9Sstevel@tonic-gate return (bitmask); 2977c478bd9Sstevel@tonic-gate } 2987c478bd9Sstevel@tonic-gate 2997c478bd9Sstevel@tonic-gate /* 300bd670b35SErik Nordmark * Add a fully initialized IPv6 IRE to the forwarding table. 301bd670b35SErik Nordmark * This returns NULL on failure, or a held IRE on success. 302bd670b35SErik Nordmark * Normally the returned IRE is the same as the argument. But a different 303bd670b35SErik Nordmark * IRE will be returned if the added IRE is deemed identical to an existing 304bd670b35SErik Nordmark * one. In that case ire_identical_ref will be increased. 305bd670b35SErik Nordmark * The caller always needs to do an ire_refrele() on the returned IRE. 3067c478bd9Sstevel@tonic-gate */ 307bd670b35SErik Nordmark ire_t * 308bd670b35SErik Nordmark ire_add_v6(ire_t *ire) 3097c478bd9Sstevel@tonic-gate { 3107c478bd9Sstevel@tonic-gate ire_t *ire1; 3117c478bd9Sstevel@tonic-gate int mask_table_index; 3127c478bd9Sstevel@tonic-gate irb_t *irb_ptr; 3137c478bd9Sstevel@tonic-gate ire_t **irep; 314bd670b35SErik Nordmark int match_flags; 3157c478bd9Sstevel@tonic-gate int error; 316f4b3ec61Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 3177c478bd9Sstevel@tonic-gate 3187c478bd9Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 3197c478bd9Sstevel@tonic-gate 3207c478bd9Sstevel@tonic-gate /* Make sure the address is properly masked. */ 3217c478bd9Sstevel@tonic-gate V6_MASK_COPY(ire->ire_addr_v6, ire->ire_mask_v6, ire->ire_addr_v6); 3227c478bd9Sstevel@tonic-gate 3237c478bd9Sstevel@tonic-gate mask_table_index = ip_mask_to_plen_v6(&ire->ire_mask_v6); 324bd670b35SErik Nordmark if ((ipst->ips_ip_forwarding_table_v6[mask_table_index]) == NULL) { 3257c478bd9Sstevel@tonic-gate irb_t *ptr; 3267c478bd9Sstevel@tonic-gate int i; 3277c478bd9Sstevel@tonic-gate 328bd670b35SErik Nordmark ptr = (irb_t *)mi_zalloc((ipst->ips_ip6_ftable_hash_size * 329bd670b35SErik Nordmark sizeof (irb_t))); 3307c478bd9Sstevel@tonic-gate if (ptr == NULL) { 3317c478bd9Sstevel@tonic-gate ire_delete(ire); 332bd670b35SErik Nordmark return (NULL); 3337c478bd9Sstevel@tonic-gate } 334f4b3ec61Sdh155122 for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 335bd670b35SErik Nordmark rw_init(&ptr[i].irb_lock, NULL, RW_DEFAULT, NULL); 336f1c454b4SSowmini Varadhan ptr[i].irb_ipst = ipst; 3377c478bd9Sstevel@tonic-gate } 338f4b3ec61Sdh155122 mutex_enter(&ipst->ips_ire_ft_init_lock); 339bd670b35SErik Nordmark if (ipst->ips_ip_forwarding_table_v6[mask_table_index] == 340bd670b35SErik Nordmark NULL) { 341bd670b35SErik Nordmark ipst->ips_ip_forwarding_table_v6[mask_table_index] = 342bd670b35SErik Nordmark ptr; 343f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ire_ft_init_lock); 3447c478bd9Sstevel@tonic-gate } else { 3457c478bd9Sstevel@tonic-gate /* 3467c478bd9Sstevel@tonic-gate * Some other thread won the race in 3477c478bd9Sstevel@tonic-gate * initializing the forwarding table at the 3487c478bd9Sstevel@tonic-gate * same index. 3497c478bd9Sstevel@tonic-gate */ 350f4b3ec61Sdh155122 mutex_exit(&ipst->ips_ire_ft_init_lock); 351bd670b35SErik Nordmark for (i = 0; i < ipst->ips_ip6_ftable_hash_size; i++) { 3527c478bd9Sstevel@tonic-gate rw_destroy(&ptr[i].irb_lock); 3537c478bd9Sstevel@tonic-gate } 3547c478bd9Sstevel@tonic-gate mi_free(ptr); 3557c478bd9Sstevel@tonic-gate } 3567c478bd9Sstevel@tonic-gate } 357f4b3ec61Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[mask_table_index][ 3587c478bd9Sstevel@tonic-gate IRE_ADDR_MASK_HASH_V6(ire->ire_addr_v6, ire->ire_mask_v6, 359f4b3ec61Sdh155122 ipst->ips_ip6_ftable_hash_size)]); 3607c478bd9Sstevel@tonic-gate 361bd670b35SErik Nordmark match_flags = (MATCH_IRE_MASK | MATCH_IRE_TYPE | MATCH_IRE_GW); 362bd670b35SErik Nordmark if (ire->ire_ill != NULL) 363bd670b35SErik Nordmark match_flags |= MATCH_IRE_ILL; 3647c478bd9Sstevel@tonic-gate /* 365bd670b35SErik Nordmark * Start the atomic add of the ire. Grab the bucket lock and the 366bd670b35SErik Nordmark * ill lock. Check for condemned. 3677c478bd9Sstevel@tonic-gate */ 368bd670b35SErik Nordmark error = ire_atomic_start(irb_ptr, ire); 369bd670b35SErik Nordmark if (error != 0) { 370bd670b35SErik Nordmark ire_delete(ire); 371bd670b35SErik Nordmark return (NULL); 372bd670b35SErik Nordmark } 373e11c3f44Smeem 3747c478bd9Sstevel@tonic-gate /* 375e11c3f44Smeem * If we are creating a hidden IRE, make sure we search for 376e11c3f44Smeem * hidden IREs when searching for duplicates below. 377e11c3f44Smeem * Otherwise, we might find an IRE on some other interface 378e11c3f44Smeem * that's not marked hidden. 3797c478bd9Sstevel@tonic-gate */ 380bd670b35SErik Nordmark if (ire->ire_testhidden) 381bd670b35SErik Nordmark match_flags |= MATCH_IRE_TESTHIDDEN; 3827c478bd9Sstevel@tonic-gate 3837c478bd9Sstevel@tonic-gate /* 3847c478bd9Sstevel@tonic-gate * Atomically check for duplicate and insert in the table. 3857c478bd9Sstevel@tonic-gate */ 3867c478bd9Sstevel@tonic-gate for (ire1 = irb_ptr->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 387bd670b35SErik Nordmark if (IRE_IS_CONDEMNED(ire1)) 3887c478bd9Sstevel@tonic-gate continue; 3897c478bd9Sstevel@tonic-gate /* 390bd670b35SErik Nordmark * Here we need an exact match on zoneid, i.e., 391bd670b35SErik Nordmark * ire_match_args doesn't fit. 3927c478bd9Sstevel@tonic-gate */ 3937c478bd9Sstevel@tonic-gate if (ire1->ire_zoneid != ire->ire_zoneid) 3947c478bd9Sstevel@tonic-gate continue; 395bd670b35SErik Nordmark 396bd670b35SErik Nordmark if (ire1->ire_type != ire->ire_type) 397bd670b35SErik Nordmark continue; 398bd670b35SErik Nordmark 399bd670b35SErik Nordmark /* 400bd670b35SErik Nordmark * Note: We do not allow multiple routes that differ only 401bd670b35SErik Nordmark * in the gateway security attributes; such routes are 402bd670b35SErik Nordmark * considered duplicates. 403bd670b35SErik Nordmark * To change that we explicitly have to treat them as 404bd670b35SErik Nordmark * different here. 405bd670b35SErik Nordmark */ 4067c478bd9Sstevel@tonic-gate if (ire_match_args_v6(ire1, &ire->ire_addr_v6, 4077c478bd9Sstevel@tonic-gate &ire->ire_mask_v6, &ire->ire_gateway_addr_v6, 408bd670b35SErik Nordmark ire->ire_type, ire->ire_ill, ire->ire_zoneid, NULL, 409bd670b35SErik Nordmark match_flags)) { 4107c478bd9Sstevel@tonic-gate /* 4117c478bd9Sstevel@tonic-gate * Return the old ire after doing a REFHOLD. 4127c478bd9Sstevel@tonic-gate * As most of the callers continue to use the IRE 4137c478bd9Sstevel@tonic-gate * after adding, we return a held ire. This will 4147c478bd9Sstevel@tonic-gate * avoid a lookup in the caller again. If the callers 4157c478bd9Sstevel@tonic-gate * don't want to use it, they need to do a REFRELE. 41678377681SSowmini Varadhan * 41778377681SSowmini Varadhan * We only allow exactly one IRE_IF_CLONE for any dst, 41878377681SSowmini Varadhan * so, if the is an IF_CLONE, return the ire without 41978377681SSowmini Varadhan * an identical_ref, but with an ire_ref held. 4207c478bd9Sstevel@tonic-gate */ 42178377681SSowmini Varadhan if (ire->ire_type != IRE_IF_CLONE) { 42278377681SSowmini Varadhan atomic_add_32(&ire1->ire_identical_ref, 1); 42378377681SSowmini Varadhan DTRACE_PROBE2(ire__add__exist, ire_t *, ire1, 42478377681SSowmini Varadhan ire_t *, ire); 42578377681SSowmini Varadhan } 4267c478bd9Sstevel@tonic-gate ip1dbg(("found dup ire existing %p new %p", 4277c478bd9Sstevel@tonic-gate (void *)ire1, (void *)ire)); 428bd670b35SErik Nordmark ire_refhold(ire1); 4297c478bd9Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire); 4307c478bd9Sstevel@tonic-gate ire_delete(ire); 431bd670b35SErik Nordmark return (ire1); 4327c478bd9Sstevel@tonic-gate } 4337c478bd9Sstevel@tonic-gate } 4347c478bd9Sstevel@tonic-gate 4357c478bd9Sstevel@tonic-gate /* 436bd670b35SErik Nordmark * Normally we do head insertion since most things do not care about 437bd670b35SErik Nordmark * the order of the IREs in the bucket. 438bd670b35SErik Nordmark * However, due to shared-IP zones (and restrict_interzone_loopback) 439bd670b35SErik Nordmark * we can have an IRE_LOCAL as well as IRE_IF_CLONE for the same 440bd670b35SErik Nordmark * address. For that reason we do tail insertion for IRE_IF_CLONE. 4417c478bd9Sstevel@tonic-gate */ 4427c478bd9Sstevel@tonic-gate irep = (ire_t **)irb_ptr; 443bd670b35SErik Nordmark if (ire->ire_type & IRE_IF_CLONE) { 444bd670b35SErik Nordmark while ((ire1 = *irep) != NULL) 4457c478bd9Sstevel@tonic-gate irep = &ire1->ire_next; 4467c478bd9Sstevel@tonic-gate } 4477c478bd9Sstevel@tonic-gate /* Insert at *irep */ 4487c478bd9Sstevel@tonic-gate ire1 = *irep; 4497c478bd9Sstevel@tonic-gate if (ire1 != NULL) 4507c478bd9Sstevel@tonic-gate ire1->ire_ptpn = &ire->ire_next; 4517c478bd9Sstevel@tonic-gate ire->ire_next = ire1; 4527c478bd9Sstevel@tonic-gate /* Link the new one in. */ 4537c478bd9Sstevel@tonic-gate ire->ire_ptpn = irep; 4547c478bd9Sstevel@tonic-gate /* 4557c478bd9Sstevel@tonic-gate * ire_walk routines de-reference ire_next without holding 4567c478bd9Sstevel@tonic-gate * a lock. Before we point to the new ire, we want to make 4577c478bd9Sstevel@tonic-gate * sure the store that sets the ire_next of the new ire 4587c478bd9Sstevel@tonic-gate * reaches global visibility, so that ire_walk routines 4597c478bd9Sstevel@tonic-gate * don't see a truncated list of ires i.e if the ire_next 4607c478bd9Sstevel@tonic-gate * of the new ire gets set after we do "*irep = ire" due 4617c478bd9Sstevel@tonic-gate * to re-ordering, the ire_walk thread will see a NULL 4627c478bd9Sstevel@tonic-gate * once it accesses the ire_next of the new ire. 4637c478bd9Sstevel@tonic-gate * membar_producer() makes sure that the following store 4647c478bd9Sstevel@tonic-gate * happens *after* all of the above stores. 4657c478bd9Sstevel@tonic-gate */ 4667c478bd9Sstevel@tonic-gate membar_producer(); 4677c478bd9Sstevel@tonic-gate *irep = ire; 4687c478bd9Sstevel@tonic-gate ire->ire_bucket = irb_ptr; 4697c478bd9Sstevel@tonic-gate /* 4707c478bd9Sstevel@tonic-gate * We return a bumped up IRE above. Keep it symmetrical 4717c478bd9Sstevel@tonic-gate * so that the callers will always have to release. This 4727c478bd9Sstevel@tonic-gate * helps the callers of this function because they continue 4737c478bd9Sstevel@tonic-gate * to use the IRE after adding and hence they don't have to 4747c478bd9Sstevel@tonic-gate * lookup again after we return the IRE. 4757c478bd9Sstevel@tonic-gate * 4767c478bd9Sstevel@tonic-gate * NOTE : We don't have to use atomics as this is appearing 4777c478bd9Sstevel@tonic-gate * in the list for the first time and no one else can bump 4787c478bd9Sstevel@tonic-gate * up the reference count on this yet. 4797c478bd9Sstevel@tonic-gate */ 480bd670b35SErik Nordmark ire_refhold_locked(ire); 481f4b3ec61Sdh155122 BUMP_IRE_STATS(ipst->ips_ire_stats_v6, ire_stats_inserted); 4827c478bd9Sstevel@tonic-gate irb_ptr->irb_ire_cnt++; 4837c478bd9Sstevel@tonic-gate 484bd670b35SErik Nordmark if (ire->ire_ill != NULL) { 485bd670b35SErik Nordmark DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ire->ire_ill, 486968d2fd1Ssowmini (char *), "ire", (void *), ire); 487bd670b35SErik Nordmark ire->ire_ill->ill_ire_cnt++; 488bd670b35SErik Nordmark ASSERT(ire->ire_ill->ill_ire_cnt != 0); /* Wraparound */ 4897c478bd9Sstevel@tonic-gate } 4907c478bd9Sstevel@tonic-gate ire_atomic_end(irb_ptr, ire); 4917c478bd9Sstevel@tonic-gate 492bd670b35SErik Nordmark /* Make any caching of the IREs be notified or updated */ 4937c478bd9Sstevel@tonic-gate ire_flush_cache_v6(ire, IRE_FLUSH_ADD); 4947c478bd9Sstevel@tonic-gate 495bd670b35SErik Nordmark return (ire); 4967c478bd9Sstevel@tonic-gate } 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate /* 4997c478bd9Sstevel@tonic-gate * Search for all HOST REDIRECT routes that are 5007c478bd9Sstevel@tonic-gate * pointing at the specified gateway and 5017c478bd9Sstevel@tonic-gate * delete them. This routine is called only 5027c478bd9Sstevel@tonic-gate * when a default gateway is going away. 5037c478bd9Sstevel@tonic-gate */ 5047c478bd9Sstevel@tonic-gate static void 505f4b3ec61Sdh155122 ire_delete_host_redirects_v6(const in6_addr_t *gateway, ip_stack_t *ipst) 5067c478bd9Sstevel@tonic-gate { 5077c478bd9Sstevel@tonic-gate irb_t *irb_ptr; 5087c478bd9Sstevel@tonic-gate irb_t *irb; 5097c478bd9Sstevel@tonic-gate ire_t *ire; 5107c478bd9Sstevel@tonic-gate in6_addr_t gw_addr_v6; 5117c478bd9Sstevel@tonic-gate int i; 5127c478bd9Sstevel@tonic-gate 5137c478bd9Sstevel@tonic-gate /* get the hash table for HOST routes */ 514f4b3ec61Sdh155122 irb_ptr = ipst->ips_ip_forwarding_table_v6[(IP6_MASK_TABLE_SIZE - 1)]; 5157c478bd9Sstevel@tonic-gate if (irb_ptr == NULL) 5167c478bd9Sstevel@tonic-gate return; 517f4b3ec61Sdh155122 for (i = 0; (i < ipst->ips_ip6_ftable_hash_size); i++) { 5187c478bd9Sstevel@tonic-gate irb = &irb_ptr[i]; 519bd670b35SErik Nordmark irb_refhold(irb); 5207c478bd9Sstevel@tonic-gate for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) { 5216bdb8e66Sdd193516 if (!(ire->ire_flags & RTF_DYNAMIC)) 5227c478bd9Sstevel@tonic-gate continue; 5237c478bd9Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 5247c478bd9Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 5257c478bd9Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 5267c478bd9Sstevel@tonic-gate if (IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) 5277c478bd9Sstevel@tonic-gate ire_delete(ire); 5287c478bd9Sstevel@tonic-gate } 529bd670b35SErik Nordmark irb_refrele(irb); 5307c478bd9Sstevel@tonic-gate } 5317c478bd9Sstevel@tonic-gate } 5327c478bd9Sstevel@tonic-gate 5337c478bd9Sstevel@tonic-gate /* 5347c478bd9Sstevel@tonic-gate * Delete the specified IRE. 5357c478bd9Sstevel@tonic-gate * All calls should use ire_delete(). 5367c478bd9Sstevel@tonic-gate * Sometimes called as writer though not required by this function. 5377c478bd9Sstevel@tonic-gate * 5387c478bd9Sstevel@tonic-gate * NOTE : This function is called only if the ire was added 5397c478bd9Sstevel@tonic-gate * in the list. 5407c478bd9Sstevel@tonic-gate */ 5417c478bd9Sstevel@tonic-gate void 5427c478bd9Sstevel@tonic-gate ire_delete_v6(ire_t *ire) 5437c478bd9Sstevel@tonic-gate { 5447c478bd9Sstevel@tonic-gate in6_addr_t gw_addr_v6; 545f4b3ec61Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 5467c478bd9Sstevel@tonic-gate 547bd670b35SErik Nordmark /* 548bd670b35SErik Nordmark * Make sure ire_generation increases from ire_flush_cache happen 549bd670b35SErik Nordmark * after any lookup/reader has read ire_generation. 550bd670b35SErik Nordmark * Since the rw_enter makes us wait until any lookup/reader has 551bd670b35SErik Nordmark * completed we can exit the lock immediately. 552bd670b35SErik Nordmark */ 553bd670b35SErik Nordmark rw_enter(&ipst->ips_ip6_ire_head_lock, RW_WRITER); 554bd670b35SErik Nordmark rw_exit(&ipst->ips_ip6_ire_head_lock); 555bd670b35SErik Nordmark 5567c478bd9Sstevel@tonic-gate ASSERT(ire->ire_refcnt >= 1); 5577c478bd9Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 5587c478bd9Sstevel@tonic-gate 5597c478bd9Sstevel@tonic-gate ire_flush_cache_v6(ire, IRE_FLUSH_DELETE); 560bd670b35SErik Nordmark 5617c478bd9Sstevel@tonic-gate if (ire->ire_type == IRE_DEFAULT) { 5627c478bd9Sstevel@tonic-gate /* 5637c478bd9Sstevel@tonic-gate * when a default gateway is going away 5647c478bd9Sstevel@tonic-gate * delete all the host redirects pointing at that 5657c478bd9Sstevel@tonic-gate * gateway. 5667c478bd9Sstevel@tonic-gate */ 5677c478bd9Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 5687c478bd9Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 5697c478bd9Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 570f4b3ec61Sdh155122 ire_delete_host_redirects_v6(&gw_addr_v6, ipst); 5717c478bd9Sstevel@tonic-gate } 5727c478bd9Sstevel@tonic-gate 5737c478bd9Sstevel@tonic-gate /* 574bd670b35SErik Nordmark * If we are deleting an IRE_INTERFACE then we make sure we also 575bd670b35SErik Nordmark * delete any IRE_IF_CLONE that has been created from it. 576bd670b35SErik Nordmark * Those are always in ire_dep_children. 5777c478bd9Sstevel@tonic-gate */ 578bd670b35SErik Nordmark if ((ire->ire_type & IRE_INTERFACE) && ire->ire_dep_children != 0) 579bd670b35SErik Nordmark ire_dep_delete_if_clone(ire); 5807c478bd9Sstevel@tonic-gate 581bd670b35SErik Nordmark /* Remove from parent dependencies and child */ 582bd670b35SErik Nordmark rw_enter(&ipst->ips_ire_dep_lock, RW_WRITER); 583bd670b35SErik Nordmark if (ire->ire_dep_parent != NULL) { 584bd670b35SErik Nordmark ire_dep_remove(ire); 5857c478bd9Sstevel@tonic-gate } 586bd670b35SErik Nordmark while (ire->ire_dep_children != NULL) 587bd670b35SErik Nordmark ire_dep_remove(ire->ire_dep_children); 588bd670b35SErik Nordmark rw_exit(&ipst->ips_ire_dep_lock); 5897c478bd9Sstevel@tonic-gate } 5907c478bd9Sstevel@tonic-gate 5917c478bd9Sstevel@tonic-gate /* 592bd670b35SErik Nordmark * When an IRE is added or deleted this routine is called to make sure 593bd670b35SErik Nordmark * any caching of IRE information is notified or updated. 5947c478bd9Sstevel@tonic-gate * 595bd670b35SErik Nordmark * The flag argument indicates if the flush request is due to addition 596bd670b35SErik Nordmark * of new route (IRE_FLUSH_ADD), deletion of old route (IRE_FLUSH_DELETE), 597bd670b35SErik Nordmark * or a change to ire_gateway_addr (IRE_FLUSH_GWCHANGE). 5987c478bd9Sstevel@tonic-gate */ 5997c478bd9Sstevel@tonic-gate void 6007c478bd9Sstevel@tonic-gate ire_flush_cache_v6(ire_t *ire, int flag) 6017c478bd9Sstevel@tonic-gate { 602f4b3ec61Sdh155122 ip_stack_t *ipst = ire->ire_ipst; 6037c478bd9Sstevel@tonic-gate 604bd670b35SErik Nordmark /* 605bd670b35SErik Nordmark * IRE_IF_CLONE ire's don't provide any new information 606bd670b35SErik Nordmark * than the parent from which they are cloned, so don't 607bd670b35SErik Nordmark * perturb the generation numbers. 608bd670b35SErik Nordmark */ 609bd670b35SErik Nordmark if (ire->ire_type & IRE_IF_CLONE) 6107c478bd9Sstevel@tonic-gate return; 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate /* 613bd670b35SErik Nordmark * Ensure that an ire_add during a lookup serializes the updates of 614bd670b35SErik Nordmark * the generation numbers under ire_head_lock so that the lookup gets 615bd670b35SErik Nordmark * either the old ire and old generation number, or a new ire and new 616bd670b35SErik Nordmark * generation number. 6177c478bd9Sstevel@tonic-gate */ 618bd670b35SErik Nordmark rw_enter(&ipst->ips_ip6_ire_head_lock, RW_WRITER); 619bd670b35SErik Nordmark 620bd670b35SErik Nordmark /* 621bd670b35SErik Nordmark * If a route was just added, we need to notify everybody that 622bd670b35SErik Nordmark * has cached an IRE_NOROUTE since there might now be a better 623bd670b35SErik Nordmark * route for them. 624bd670b35SErik Nordmark */ 6257c478bd9Sstevel@tonic-gate if (flag == IRE_FLUSH_ADD) { 626bd670b35SErik Nordmark ire_increment_generation(ipst->ips_ire_reject_v6); 627bd670b35SErik Nordmark ire_increment_generation(ipst->ips_ire_blackhole_v6); 628bd670b35SErik Nordmark } 629bd670b35SErik Nordmark 630bd670b35SErik Nordmark /* Adding a default can't otherwise provide a better route */ 631bd670b35SErik Nordmark if (ire->ire_type == IRE_DEFAULT && flag == IRE_FLUSH_ADD) { 632bd670b35SErik Nordmark rw_exit(&ipst->ips_ip6_ire_head_lock); 633bd670b35SErik Nordmark return; 634bd670b35SErik Nordmark } 635bd670b35SErik Nordmark 636bd670b35SErik Nordmark switch (flag) { 637bd670b35SErik Nordmark case IRE_FLUSH_DELETE: 638bd670b35SErik Nordmark case IRE_FLUSH_GWCHANGE: 6397c478bd9Sstevel@tonic-gate /* 640bd670b35SErik Nordmark * Update ire_generation for all ire_dep_children chains 641bd670b35SErik Nordmark * starting with this IRE 6427c478bd9Sstevel@tonic-gate */ 643bd670b35SErik Nordmark ire_dep_incr_generation(ire); 644bd670b35SErik Nordmark break; 645bd670b35SErik Nordmark case IRE_FLUSH_ADD: { 646bd670b35SErik Nordmark in6_addr_t addr; 647bd670b35SErik Nordmark in6_addr_t mask; 648bd670b35SErik Nordmark ip_stack_t *ipst = ire->ire_ipst; 649bd670b35SErik Nordmark uint_t masklen; 650bd670b35SErik Nordmark 6517c478bd9Sstevel@tonic-gate /* 652bd670b35SErik Nordmark * Find an IRE which is a shorter match than the ire to be added 653bd670b35SErik Nordmark * For any such IRE (which we repeat) we update the 654bd670b35SErik Nordmark * ire_generation the same way as in the delete case. 6557c478bd9Sstevel@tonic-gate */ 656bd670b35SErik Nordmark addr = ire->ire_addr_v6; 657bd670b35SErik Nordmark mask = ire->ire_mask_v6; 658bd670b35SErik Nordmark masklen = ip_mask_to_plen_v6(&mask); 659bd670b35SErik Nordmark 660bd670b35SErik Nordmark ire = ire_ftable_lookup_impl_v6(&addr, &mask, NULL, 0, NULL, 661bd670b35SErik Nordmark ALL_ZONES, NULL, MATCH_IRE_SHORTERMASK, ipst); 662bd670b35SErik Nordmark while (ire != NULL) { 663bd670b35SErik Nordmark /* We need to handle all in the same bucket */ 664bd670b35SErik Nordmark irb_increment_generation(ire->ire_bucket); 665bd670b35SErik Nordmark 666bd670b35SErik Nordmark mask = ire->ire_mask_v6; 667bd670b35SErik Nordmark ASSERT(masklen > ip_mask_to_plen_v6(&mask)); 668bd670b35SErik Nordmark masklen = ip_mask_to_plen_v6(&mask); 669bd670b35SErik Nordmark ire_refrele(ire); 670bd670b35SErik Nordmark ire = ire_ftable_lookup_impl_v6(&addr, &mask, NULL, 0, 671bd670b35SErik Nordmark NULL, ALL_ZONES, NULL, MATCH_IRE_SHORTERMASK, ipst); 6727c478bd9Sstevel@tonic-gate } 6737c478bd9Sstevel@tonic-gate } 674bd670b35SErik Nordmark break; 6757c478bd9Sstevel@tonic-gate } 676bd670b35SErik Nordmark rw_exit(&ipst->ips_ip6_ire_head_lock); 6777c478bd9Sstevel@tonic-gate } 6787c478bd9Sstevel@tonic-gate 6797c478bd9Sstevel@tonic-gate /* 6807c478bd9Sstevel@tonic-gate * Matches the arguments passed with the values in the ire. 6817c478bd9Sstevel@tonic-gate * 682bd670b35SErik Nordmark * Note: for match types that match using "ill" passed in, ill 6837c478bd9Sstevel@tonic-gate * must be checked for non-NULL before calling this routine. 6847c478bd9Sstevel@tonic-gate */ 685bd670b35SErik Nordmark boolean_t 6867c478bd9Sstevel@tonic-gate ire_match_args_v6(ire_t *ire, const in6_addr_t *addr, const in6_addr_t *mask, 687bd670b35SErik Nordmark const in6_addr_t *gateway, int type, const ill_t *ill, zoneid_t zoneid, 688bd670b35SErik Nordmark const ts_label_t *tsl, int match_flags) 6897c478bd9Sstevel@tonic-gate { 6907c478bd9Sstevel@tonic-gate in6_addr_t masked_addr; 6917c478bd9Sstevel@tonic-gate in6_addr_t gw_addr_v6; 6927c478bd9Sstevel@tonic-gate ill_t *ire_ill = NULL, *dst_ill; 693bd670b35SErik Nordmark ip_stack_t *ipst = ire->ire_ipst; 6947c478bd9Sstevel@tonic-gate 6957c478bd9Sstevel@tonic-gate ASSERT(ire->ire_ipversion == IPV6_VERSION); 6967c478bd9Sstevel@tonic-gate ASSERT(addr != NULL); 6977c478bd9Sstevel@tonic-gate ASSERT(mask != NULL); 6987c478bd9Sstevel@tonic-gate ASSERT((!(match_flags & MATCH_IRE_GW)) || gateway != NULL); 69944b099c4SSowmini Varadhan ASSERT((!(match_flags & (MATCH_IRE_ILL|MATCH_IRE_SRC_ILL))) || 700bd670b35SErik Nordmark (ill != NULL && ill->ill_isv6)); 7017c478bd9Sstevel@tonic-gate 7027c478bd9Sstevel@tonic-gate /* 703bd670b35SErik Nordmark * If MATCH_IRE_TESTHIDDEN is set, then only return the IRE if it 704bd670b35SErik Nordmark * is in fact hidden, to ensure the caller gets the right one. 7057c478bd9Sstevel@tonic-gate */ 706bd670b35SErik Nordmark if (ire->ire_testhidden) { 707bd670b35SErik Nordmark if (!(match_flags & MATCH_IRE_TESTHIDDEN)) 7087c478bd9Sstevel@tonic-gate return (B_FALSE); 709e11c3f44Smeem } 7107c478bd9Sstevel@tonic-gate 71145916cd2Sjpk if (zoneid != ALL_ZONES && zoneid != ire->ire_zoneid && 71245916cd2Sjpk ire->ire_zoneid != ALL_ZONES) { 7137c478bd9Sstevel@tonic-gate /* 714bd670b35SErik Nordmark * If MATCH_IRE_ZONEONLY has been set and the supplied zoneid 715bd670b35SErik Nordmark * does not match that of ire_zoneid, a failure to 7167c478bd9Sstevel@tonic-gate * match is reported at this point. Otherwise, since some IREs 7177c478bd9Sstevel@tonic-gate * that are available in the global zone can be used in local 7187c478bd9Sstevel@tonic-gate * zones, additional checks need to be performed: 7197c478bd9Sstevel@tonic-gate * 720bd670b35SErik Nordmark * IRE_LOOPBACK 721bd670b35SErik Nordmark * entries should never be matched in this situation. 722bd670b35SErik Nordmark * Each zone has its own IRE_LOOPBACK. 7237c478bd9Sstevel@tonic-gate * 724bd670b35SErik Nordmark * IRE_LOCAL 725bd670b35SErik Nordmark * We allow them for any zoneid. ire_route_recursive 726bd670b35SErik Nordmark * does additional checks when 727bd670b35SErik Nordmark * ip_restrict_interzone_loopback is set. 7287c478bd9Sstevel@tonic-gate * 729bd670b35SErik Nordmark * If ill_usesrc_ifindex is set 730bd670b35SErik Nordmark * Then we check if the zone has a valid source address 731bd670b35SErik Nordmark * on the usesrc ill. 7327c478bd9Sstevel@tonic-gate * 733bd670b35SErik Nordmark * If ire_ill is set, then check that the zone has an ipif 734bd670b35SErik Nordmark * on that ill. 735bd670b35SErik Nordmark * 736bd670b35SErik Nordmark * Outside of this function (in ire_round_robin) we check 737bd670b35SErik Nordmark * that any IRE_OFFLINK has a gateway that reachable from the 738bd670b35SErik Nordmark * zone when we have multiple choices (ECMP). 7397c478bd9Sstevel@tonic-gate */ 7407c478bd9Sstevel@tonic-gate if (match_flags & MATCH_IRE_ZONEONLY) 7417c478bd9Sstevel@tonic-gate return (B_FALSE); 742bd670b35SErik Nordmark if (ire->ire_type & IRE_LOOPBACK) 7437c478bd9Sstevel@tonic-gate return (B_FALSE); 744bd670b35SErik Nordmark 745bd670b35SErik Nordmark if (ire->ire_type & IRE_LOCAL) 746bd670b35SErik Nordmark goto matchit; 747bd670b35SErik Nordmark 7487c478bd9Sstevel@tonic-gate /* 749bd670b35SErik Nordmark * The normal case of IRE_ONLINK has a matching zoneid. 750bd670b35SErik Nordmark * Here we handle the case when shared-IP zones have been 751bd670b35SErik Nordmark * configured with IP addresses on vniN. In that case it 752bd670b35SErik Nordmark * is ok for traffic from a zone to use IRE_ONLINK routes 753bd670b35SErik Nordmark * if the ill has a usesrc pointing at vniN 754bd670b35SErik Nordmark * Applies to IRE_INTERFACE. 7557c478bd9Sstevel@tonic-gate */ 756bd670b35SErik Nordmark dst_ill = ire->ire_ill; 757bd670b35SErik Nordmark if (ire->ire_type & IRE_ONLINK) { 758bd670b35SErik Nordmark uint_t ifindex; 759bd670b35SErik Nordmark 760bd670b35SErik Nordmark /* 761bd670b35SErik Nordmark * Note there is no IRE_INTERFACE on vniN thus 762bd670b35SErik Nordmark * can't do an IRE lookup for a matching route. 763bd670b35SErik Nordmark */ 764bd670b35SErik Nordmark ifindex = dst_ill->ill_usesrc_ifindex; 765bd670b35SErik Nordmark if (ifindex == 0) 766bd670b35SErik Nordmark return (B_FALSE); 767bd670b35SErik Nordmark 7687c478bd9Sstevel@tonic-gate /* 7697c478bd9Sstevel@tonic-gate * If there is a usable source address in the 770bd670b35SErik Nordmark * zone, then it's ok to return this IRE_INTERFACE 7717c478bd9Sstevel@tonic-gate */ 772bd670b35SErik Nordmark if (!ipif_zone_avail(ifindex, dst_ill->ill_isv6, 773bd670b35SErik Nordmark zoneid, ipst)) { 774bd670b35SErik Nordmark ip3dbg(("ire_match_args: no usrsrc for zone" 7757c478bd9Sstevel@tonic-gate " dst_ill %p\n", (void *)dst_ill)); 7767c478bd9Sstevel@tonic-gate return (B_FALSE); 7777c478bd9Sstevel@tonic-gate } 7787c478bd9Sstevel@tonic-gate } 779bd670b35SErik Nordmark /* 78044b099c4SSowmini Varadhan * For example, with 781bd670b35SErik Nordmark * route add 11.0.0.0 gw1 -ifp bge0 782bd670b35SErik Nordmark * route add 11.0.0.0 gw2 -ifp bge1 783bd670b35SErik Nordmark * this code would differentiate based on 784bd670b35SErik Nordmark * where the sending zone has addresses. 785bd670b35SErik Nordmark * Only if the zone has an address on bge0 can it use the first 786bd670b35SErik Nordmark * route. It isn't clear if this behavior is documented 787bd670b35SErik Nordmark * anywhere. 788bd670b35SErik Nordmark */ 789bd670b35SErik Nordmark if (dst_ill != NULL && (ire->ire_type & IRE_OFFLINK)) { 7907c478bd9Sstevel@tonic-gate ipif_t *tipif; 7917c478bd9Sstevel@tonic-gate 792bd670b35SErik Nordmark mutex_enter(&dst_ill->ill_lock); 793bd670b35SErik Nordmark for (tipif = dst_ill->ill_ipif; 7947c478bd9Sstevel@tonic-gate tipif != NULL; tipif = tipif->ipif_next) { 795bd670b35SErik Nordmark if (!IPIF_IS_CONDEMNED(tipif) && 7967c478bd9Sstevel@tonic-gate (tipif->ipif_flags & IPIF_UP) && 79745916cd2Sjpk (tipif->ipif_zoneid == zoneid || 79845916cd2Sjpk tipif->ipif_zoneid == ALL_ZONES)) 7997c478bd9Sstevel@tonic-gate break; 8007c478bd9Sstevel@tonic-gate } 801bd670b35SErik Nordmark mutex_exit(&dst_ill->ill_lock); 8027c478bd9Sstevel@tonic-gate if (tipif == NULL) 8037c478bd9Sstevel@tonic-gate return (B_FALSE); 8047c478bd9Sstevel@tonic-gate } 8057c478bd9Sstevel@tonic-gate } 8067c478bd9Sstevel@tonic-gate 807bd670b35SErik Nordmark matchit: 80844b099c4SSowmini Varadhan ire_ill = ire->ire_ill; 8097c478bd9Sstevel@tonic-gate if (match_flags & MATCH_IRE_GW) { 8107c478bd9Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 8117c478bd9Sstevel@tonic-gate gw_addr_v6 = ire->ire_gateway_addr_v6; 8127c478bd9Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 8137c478bd9Sstevel@tonic-gate } 814bd670b35SErik Nordmark if (match_flags & MATCH_IRE_ILL) { 815e11c3f44Smeem 8167c478bd9Sstevel@tonic-gate /* 817bd670b35SErik Nordmark * If asked to match an ill, we *must* match 818bd670b35SErik Nordmark * on the ire_ill for ipmp test addresses, or 819bd670b35SErik Nordmark * any of the ill in the group for data addresses. 820bd670b35SErik Nordmark * If we don't, we may as well fail. 821bd670b35SErik Nordmark * However, we need an exception for IRE_LOCALs to ensure 822bd670b35SErik Nordmark * we loopback packets even sent to test addresses on different 823bd670b35SErik Nordmark * interfaces in the group. 8247c478bd9Sstevel@tonic-gate */ 825bd670b35SErik Nordmark if ((match_flags & MATCH_IRE_TESTHIDDEN) && 826bd670b35SErik Nordmark !(ire->ire_type & IRE_LOCAL)) { 827bd670b35SErik Nordmark if (ire->ire_ill != ill) 828bd670b35SErik Nordmark return (B_FALSE); 829bd670b35SErik Nordmark } else { 830bd670b35SErik Nordmark match_flags &= ~MATCH_IRE_TESTHIDDEN; 831bd670b35SErik Nordmark /* 832bd670b35SErik Nordmark * We know that ill is not NULL, but ire_ill could be 833bd670b35SErik Nordmark * NULL 834bd670b35SErik Nordmark */ 835bd670b35SErik Nordmark if (ire_ill == NULL || !IS_ON_SAME_LAN(ill, ire_ill)) 836bd670b35SErik Nordmark return (B_FALSE); 8377c478bd9Sstevel@tonic-gate } 838bd670b35SErik Nordmark } 83944b099c4SSowmini Varadhan if (match_flags & MATCH_IRE_SRC_ILL) { 84044b099c4SSowmini Varadhan if (ire_ill == NULL) 84144b099c4SSowmini Varadhan return (B_FALSE); 84244b099c4SSowmini Varadhan if (!IS_ON_SAME_LAN(ill, ire_ill)) { 84344b099c4SSowmini Varadhan if (ire_ill->ill_usesrc_ifindex == 0 || 84444b099c4SSowmini Varadhan (ire_ill->ill_usesrc_ifindex != 84544b099c4SSowmini Varadhan ill->ill_phyint->phyint_ifindex)) 84644b099c4SSowmini Varadhan return (B_FALSE); 84744b099c4SSowmini Varadhan } 84844b099c4SSowmini Varadhan } 84944b099c4SSowmini Varadhan 8507c478bd9Sstevel@tonic-gate /* No ire_addr_v6 bits set past the mask */ 8517c478bd9Sstevel@tonic-gate ASSERT(V6_MASK_EQ(ire->ire_addr_v6, ire->ire_mask_v6, 8527c478bd9Sstevel@tonic-gate ire->ire_addr_v6)); 8537c478bd9Sstevel@tonic-gate V6_MASK_COPY(*addr, *mask, masked_addr); 8547c478bd9Sstevel@tonic-gate if (V6_MASK_EQ(*addr, *mask, ire->ire_addr_v6) && 8557c478bd9Sstevel@tonic-gate ((!(match_flags & MATCH_IRE_GW)) || 85601685f97SSowmini Varadhan ((!(match_flags & MATCH_IRE_DIRECT)) || 85701685f97SSowmini Varadhan !(ire->ire_flags & RTF_INDIRECT)) && 8587c478bd9Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&gw_addr_v6, gateway)) && 859bd670b35SErik Nordmark ((!(match_flags & MATCH_IRE_TYPE)) || (ire->ire_type & type)) && 860bd670b35SErik Nordmark ((!(match_flags & MATCH_IRE_TESTHIDDEN)) || ire->ire_testhidden) && 861bd670b35SErik Nordmark ((!(match_flags & MATCH_IRE_MASK)) || 862bd670b35SErik Nordmark (IN6_ARE_ADDR_EQUAL(&ire->ire_mask_v6, mask))) && 86345916cd2Sjpk ((!(match_flags & MATCH_IRE_SECATTR)) || 86445916cd2Sjpk (!is_system_labeled()) || 86545916cd2Sjpk (tsol_ire_match_gwattr(ire, tsl) == 0))) { 8667c478bd9Sstevel@tonic-gate /* We found the matched IRE */ 8677c478bd9Sstevel@tonic-gate return (B_TRUE); 8687c478bd9Sstevel@tonic-gate } 8697c478bd9Sstevel@tonic-gate return (B_FALSE); 8707c478bd9Sstevel@tonic-gate } 8717c478bd9Sstevel@tonic-gate 8727c478bd9Sstevel@tonic-gate /* 873bd670b35SErik Nordmark * Check if the zoneid (not ALL_ZONES) has an IRE_INTERFACE for the specified 874bd670b35SErik Nordmark * gateway address. If ill is non-NULL we also match on it. 875bd670b35SErik Nordmark * The caller must hold a read lock on RADIX_NODE_HEAD if lock_held is set. 8767c478bd9Sstevel@tonic-gate */ 877bd670b35SErik Nordmark boolean_t 878bd670b35SErik Nordmark ire_gateway_ok_zone_v6(const in6_addr_t *gateway, zoneid_t zoneid, ill_t *ill, 879bd670b35SErik Nordmark const ts_label_t *tsl, ip_stack_t *ipst, boolean_t lock_held) 8807c478bd9Sstevel@tonic-gate { 881bd670b35SErik Nordmark ire_t *ire; 882bd670b35SErik Nordmark uint_t match_flags; 8837c478bd9Sstevel@tonic-gate 884bd670b35SErik Nordmark if (lock_held) 885bd670b35SErik Nordmark ASSERT(RW_READ_HELD(&ipst->ips_ip6_ire_head_lock)); 886bd670b35SErik Nordmark else 887bd670b35SErik Nordmark rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER); 8887c478bd9Sstevel@tonic-gate 889bd670b35SErik Nordmark match_flags = MATCH_IRE_TYPE | MATCH_IRE_SECATTR; 890bd670b35SErik Nordmark if (ill != NULL) 891bd670b35SErik Nordmark match_flags |= MATCH_IRE_ILL; 892bd670b35SErik Nordmark 893bd670b35SErik Nordmark ire = ire_ftable_lookup_impl_v6(gateway, &ipv6_all_zeros, 894bd670b35SErik Nordmark &ipv6_all_zeros, IRE_INTERFACE, ill, zoneid, tsl, match_flags, 895bd670b35SErik Nordmark ipst); 896bd670b35SErik Nordmark 897bd670b35SErik Nordmark if (!lock_held) 898bd670b35SErik Nordmark rw_exit(&ipst->ips_ip6_ire_head_lock); 899bd670b35SErik Nordmark if (ire != NULL) { 900bd670b35SErik Nordmark ire_refrele(ire); 901bd670b35SErik Nordmark return (B_TRUE); 902bd670b35SErik Nordmark } else { 903bd670b35SErik Nordmark return (B_FALSE); 9047c478bd9Sstevel@tonic-gate } 9057c478bd9Sstevel@tonic-gate } 9067c478bd9Sstevel@tonic-gate 9077c478bd9Sstevel@tonic-gate /* 9087c478bd9Sstevel@tonic-gate * Lookup a route in forwarding table. 9097c478bd9Sstevel@tonic-gate * specific lookup is indicated by passing the 9107c478bd9Sstevel@tonic-gate * required parameters and indicating the 9117c478bd9Sstevel@tonic-gate * match required in flag field. 9127c478bd9Sstevel@tonic-gate * 9137c478bd9Sstevel@tonic-gate * Supports link-local addresses by following the ipif/ill when recursing. 9147c478bd9Sstevel@tonic-gate */ 9157c478bd9Sstevel@tonic-gate ire_t * 9167c478bd9Sstevel@tonic-gate ire_ftable_lookup_v6(const in6_addr_t *addr, const in6_addr_t *mask, 917bd670b35SErik Nordmark const in6_addr_t *gateway, int type, const ill_t *ill, 918bd670b35SErik Nordmark zoneid_t zoneid, const ts_label_t *tsl, int flags, 919bd670b35SErik Nordmark uint32_t xmit_hint, ip_stack_t *ipst, uint_t *generationp) 9207c478bd9Sstevel@tonic-gate { 9217c478bd9Sstevel@tonic-gate ire_t *ire = NULL; 9227c478bd9Sstevel@tonic-gate 9237c478bd9Sstevel@tonic-gate ASSERT(addr != NULL); 9247c478bd9Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_MASK)) || mask != NULL); 9257c478bd9Sstevel@tonic-gate ASSERT((!(flags & MATCH_IRE_GW)) || gateway != NULL); 926bd670b35SErik Nordmark ASSERT(ill == NULL || ill->ill_isv6); 927bd670b35SErik Nordmark 928bd670b35SErik Nordmark ASSERT(!IN6_IS_ADDR_V4MAPPED(addr)); 9297c478bd9Sstevel@tonic-gate 9307c478bd9Sstevel@tonic-gate /* 931bd670b35SErik Nordmark * ire_match_args_v6() will dereference ill if MATCH_IRE_ILL 93244b099c4SSowmini Varadhan * or MATCH_IRE_SRC_ILL is set. 9337c478bd9Sstevel@tonic-gate */ 93444b099c4SSowmini Varadhan if ((flags & (MATCH_IRE_ILL|MATCH_IRE_SRC_ILL)) && (ill == NULL)) 9357c478bd9Sstevel@tonic-gate return (NULL); 9367c478bd9Sstevel@tonic-gate 937bd670b35SErik Nordmark rw_enter(&ipst->ips_ip6_ire_head_lock, RW_READER); 938bd670b35SErik Nordmark ire = ire_ftable_lookup_impl_v6(addr, mask, gateway, type, ill, zoneid, 939bd670b35SErik Nordmark tsl, flags, ipst); 940bd670b35SErik Nordmark if (ire == NULL) { 941bd670b35SErik Nordmark rw_exit(&ipst->ips_ip6_ire_head_lock); 942bd670b35SErik Nordmark return (NULL); 943bd670b35SErik Nordmark } 944bd670b35SErik Nordmark 945bd670b35SErik Nordmark /* 946bd670b35SErik Nordmark * round-robin only if we have more than one route in the bucket. 947bd670b35SErik Nordmark * ips_ip_ecmp_behavior controls when we do ECMP 948bd670b35SErik Nordmark * 2: always 949bd670b35SErik Nordmark * 1: for IRE_DEFAULT and /0 IRE_INTERFACE 950bd670b35SErik Nordmark * 0: never 951bd670b35SErik Nordmark * 952bd670b35SErik Nordmark * Note: if we found an IRE_IF_CLONE we won't look at the bucket with 953bd670b35SErik Nordmark * other ECMP IRE_INTERFACEs since the IRE_IF_CLONE is a /128 match 954bd670b35SErik Nordmark * and the IRE_INTERFACESs are likely to be shorter matches. 955bd670b35SErik Nordmark */ 956bd670b35SErik Nordmark if (ire->ire_bucket->irb_ire_cnt > 1 && !(flags & MATCH_IRE_GW)) { 957bd670b35SErik Nordmark if (ipst->ips_ip_ecmp_behavior == 2 || 958bd670b35SErik Nordmark (ipst->ips_ip_ecmp_behavior == 1 && 959bd670b35SErik Nordmark IS_DEFAULT_ROUTE_V6(ire))) { 960bd670b35SErik Nordmark ire_t *next_ire; 961bd670b35SErik Nordmark ire_ftable_args_t margs; 962bd670b35SErik Nordmark 963188e1664SErik Nordmark bzero(&margs, sizeof (margs)); 964bd670b35SErik Nordmark margs.ift_addr_v6 = *addr; 965bd670b35SErik Nordmark if (mask != NULL) 966bd670b35SErik Nordmark margs.ift_mask_v6 = *mask; 967bd670b35SErik Nordmark if (gateway != NULL) 968bd670b35SErik Nordmark margs.ift_gateway_v6 = *gateway; 969bd670b35SErik Nordmark margs.ift_type = type; 970bd670b35SErik Nordmark margs.ift_ill = ill; 971bd670b35SErik Nordmark margs.ift_zoneid = zoneid; 972bd670b35SErik Nordmark margs.ift_tsl = tsl; 973bd670b35SErik Nordmark margs.ift_flags = flags; 974bd670b35SErik Nordmark 975bd670b35SErik Nordmark next_ire = ire_round_robin(ire->ire_bucket, &margs, 976bd670b35SErik Nordmark xmit_hint, ire, ipst); 977bd670b35SErik Nordmark if (next_ire == NULL) { 978bd670b35SErik Nordmark /* keep ire if next_ire is null */ 979bd670b35SErik Nordmark goto done; 980bd670b35SErik Nordmark } 981bd670b35SErik Nordmark ire_refrele(ire); 982bd670b35SErik Nordmark ire = next_ire; 983bd670b35SErik Nordmark } 984bd670b35SErik Nordmark } 985bd670b35SErik Nordmark 986bd670b35SErik Nordmark done: 987bd670b35SErik Nordmark /* Return generation before dropping lock */ 988bd670b35SErik Nordmark if (generationp != NULL) 989bd670b35SErik Nordmark *generationp = ire->ire_generation; 990bd670b35SErik Nordmark 991bd670b35SErik Nordmark rw_exit(&ipst->ips_ip6_ire_head_lock); 992bd670b35SErik Nordmark 993bd670b35SErik Nordmark /* 994bd670b35SErik Nordmark * For shared-IP zones we need additional checks to what was 995bd670b35SErik Nordmark * done in ire_match_args to make sure IRE_LOCALs are handled. 996bd670b35SErik Nordmark * 997bd670b35SErik Nordmark * When ip_restrict_interzone_loopback is set, then 998bd670b35SErik Nordmark * we ensure that IRE_LOCAL are only used for loopback 999bd670b35SErik Nordmark * between zones when the logical "Ethernet" would 1000bd670b35SErik Nordmark * have looped them back. That is, if in the absense of 1001bd670b35SErik Nordmark * the IRE_LOCAL we would have sent to packet out the 1002bd670b35SErik Nordmark * same ill. 1003bd670b35SErik Nordmark */ 1004bd670b35SErik Nordmark if ((ire->ire_type & IRE_LOCAL) && zoneid != ALL_ZONES && 1005bd670b35SErik Nordmark ire->ire_zoneid != zoneid && ire->ire_zoneid != ALL_ZONES && 1006bd670b35SErik Nordmark ipst->ips_ip_restrict_interzone_loopback) { 1007bd670b35SErik Nordmark ire = ire_alt_local(ire, zoneid, tsl, ill, generationp); 1008bd670b35SErik Nordmark ASSERT(ire != NULL); 1009bd670b35SErik Nordmark } 1010bd670b35SErik Nordmark 1011bd670b35SErik Nordmark return (ire); 1012bd670b35SErik Nordmark } 1013bd670b35SErik Nordmark 1014bd670b35SErik Nordmark /* 1015bd670b35SErik Nordmark * Look up a single ire. The caller holds either the read or write lock. 1016bd670b35SErik Nordmark */ 1017bd670b35SErik Nordmark ire_t * 1018bd670b35SErik Nordmark ire_ftable_lookup_impl_v6(const in6_addr_t *addr, const in6_addr_t *mask, 1019bd670b35SErik Nordmark const in6_addr_t *gateway, int type, const ill_t *ill, 1020bd670b35SErik Nordmark zoneid_t zoneid, const ts_label_t *tsl, int flags, 1021bd670b35SErik Nordmark ip_stack_t *ipst) 1022bd670b35SErik Nordmark { 1023bd670b35SErik Nordmark irb_t *irb_ptr; 1024bd670b35SErik Nordmark ire_t *ire = NULL; 1025bd670b35SErik Nordmark int i; 1026bd670b35SErik Nordmark 1027bd670b35SErik Nordmark ASSERT(RW_LOCK_HELD(&ipst->ips_ip6_ire_head_lock)); 1028bd670b35SErik Nordmark 10297c478bd9Sstevel@tonic-gate /* 10307c478bd9Sstevel@tonic-gate * If the mask is known, the lookup 10317c478bd9Sstevel@tonic-gate * is simple, if the mask is not known 10327c478bd9Sstevel@tonic-gate * we need to search. 10337c478bd9Sstevel@tonic-gate */ 10347c478bd9Sstevel@tonic-gate if (flags & MATCH_IRE_MASK) { 10357c478bd9Sstevel@tonic-gate uint_t masklen; 10367c478bd9Sstevel@tonic-gate 10377c478bd9Sstevel@tonic-gate masklen = ip_mask_to_plen_v6(mask); 1038bd670b35SErik Nordmark if (ipst->ips_ip_forwarding_table_v6[masklen] == NULL) { 10397c478bd9Sstevel@tonic-gate return (NULL); 1040bd670b35SErik Nordmark } 1041f4b3ec61Sdh155122 irb_ptr = &(ipst->ips_ip_forwarding_table_v6[masklen][ 1042f4b3ec61Sdh155122 IRE_ADDR_MASK_HASH_V6(*addr, *mask, 1043f4b3ec61Sdh155122 ipst->ips_ip6_ftable_hash_size)]); 10447c478bd9Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 10457c478bd9Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 10467c478bd9Sstevel@tonic-gate ire = ire->ire_next) { 1047bd670b35SErik Nordmark if (IRE_IS_CONDEMNED(ire)) 10487c478bd9Sstevel@tonic-gate continue; 10497c478bd9Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, mask, gateway, type, 1050bd670b35SErik Nordmark ill, zoneid, tsl, flags)) 10517c478bd9Sstevel@tonic-gate goto found_ire; 10527c478bd9Sstevel@tonic-gate } 10537c478bd9Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 10547c478bd9Sstevel@tonic-gate } else { 1055bd670b35SErik Nordmark uint_t masklen; 1056bd670b35SErik Nordmark 10577c478bd9Sstevel@tonic-gate /* 10587c478bd9Sstevel@tonic-gate * In this case we don't know the mask, we need to 10597c478bd9Sstevel@tonic-gate * search the table assuming different mask sizes. 10607c478bd9Sstevel@tonic-gate */ 1061bd670b35SErik Nordmark if (flags & MATCH_IRE_SHORTERMASK) { 1062bd670b35SErik Nordmark masklen = ip_mask_to_plen_v6(mask); 1063bd670b35SErik Nordmark if (masklen == 0) { 1064bd670b35SErik Nordmark /* Nothing shorter than zero */ 1065bd670b35SErik Nordmark return (NULL); 1066bd670b35SErik Nordmark } 1067bd670b35SErik Nordmark masklen--; 1068bd670b35SErik Nordmark } else { 1069bd670b35SErik Nordmark masklen = IP6_MASK_TABLE_SIZE - 1; 1070bd670b35SErik Nordmark } 1071bd670b35SErik Nordmark 1072bd670b35SErik Nordmark for (i = masklen; i >= 0; i--) { 10737c478bd9Sstevel@tonic-gate in6_addr_t tmpmask; 10747c478bd9Sstevel@tonic-gate 1075f4b3ec61Sdh155122 if ((ipst->ips_ip_forwarding_table_v6[i]) == NULL) 10767c478bd9Sstevel@tonic-gate continue; 10777c478bd9Sstevel@tonic-gate (void) ip_plen_to_mask_v6(i, &tmpmask); 1078f4b3ec61Sdh155122 irb_ptr = &ipst->ips_ip_forwarding_table_v6[i][ 10797c478bd9Sstevel@tonic-gate IRE_ADDR_MASK_HASH_V6(*addr, tmpmask, 1080f4b3ec61Sdh155122 ipst->ips_ip6_ftable_hash_size)]; 10817c478bd9Sstevel@tonic-gate rw_enter(&irb_ptr->irb_lock, RW_READER); 10827c478bd9Sstevel@tonic-gate for (ire = irb_ptr->irb_ire; ire != NULL; 10837c478bd9Sstevel@tonic-gate ire = ire->ire_next) { 1084bd670b35SErik Nordmark if (IRE_IS_CONDEMNED(ire)) 10857c478bd9Sstevel@tonic-gate continue; 10867c478bd9Sstevel@tonic-gate if (ire_match_args_v6(ire, addr, 1087bd670b35SErik Nordmark &ire->ire_mask_v6, gateway, type, ill, 1088bd670b35SErik Nordmark zoneid, tsl, flags)) 10897c478bd9Sstevel@tonic-gate goto found_ire; 10907c478bd9Sstevel@tonic-gate } 10917c478bd9Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 10927c478bd9Sstevel@tonic-gate } 10937c478bd9Sstevel@tonic-gate } 10947c478bd9Sstevel@tonic-gate ASSERT(ire == NULL); 10957c478bd9Sstevel@tonic-gate ip1dbg(("ire_ftable_lookup_v6: returning NULL ire")); 10967c478bd9Sstevel@tonic-gate return (NULL); 1097bd670b35SErik Nordmark 10987c478bd9Sstevel@tonic-gate found_ire: 1099bd670b35SErik Nordmark ire_refhold(ire); 11007c478bd9Sstevel@tonic-gate rw_exit(&irb_ptr->irb_lock); 11017c478bd9Sstevel@tonic-gate return (ire); 11027c478bd9Sstevel@tonic-gate } 1103bd670b35SErik Nordmark 1104bd670b35SErik Nordmark 11057c478bd9Sstevel@tonic-gate /* 1106bd670b35SErik Nordmark * This function is called by 1107bd670b35SErik Nordmark * ip_input/ire_route_recursive when doing a route lookup on only the 1108bd670b35SErik Nordmark * destination address. 1109bd670b35SErik Nordmark * 1110bd670b35SErik Nordmark * The optimizations of this function over ire_ftable_lookup are: 1111bd670b35SErik Nordmark * o removing unnecessary flag matching 1112bd670b35SErik Nordmark * o doing longest prefix match instead of overloading it further 1113bd670b35SErik Nordmark * with the unnecessary "best_prefix_match" 1114bd670b35SErik Nordmark * 1115bd670b35SErik Nordmark * If no route is found we return IRE_NOROUTE. 11167c478bd9Sstevel@tonic-gate */ 1117bd670b35SErik Nordmark ire_t * 1118bd670b35SErik Nordmark ire_ftable_lookup_simple_v6(const in6_addr_t *addr, uint32_t xmit_hint, 1119bd670b35SErik Nordmark ip_stack_t *ipst, uint_t *generationp) 1120bd670b35SErik Nordmark { 1121bd670b35SErik Nordmark ire_t *ire; 11227c478bd9Sstevel@tonic-gate 1123bd670b35SErik Nordmark ire = ire_ftable_lookup_v6(addr, NULL, NULL, 0, NULL, ALL_ZONES, NULL, 1124bd670b35SErik Nordmark MATCH_IRE_DSTONLY, xmit_hint, ipst, generationp); 11257c478bd9Sstevel@tonic-gate if (ire == NULL) { 1126bd670b35SErik Nordmark ire = ire_reject(ipst, B_TRUE); 1127bd670b35SErik Nordmark if (generationp != NULL) 1128bd670b35SErik Nordmark *generationp = IRE_GENERATION_VERIFY; 11297c478bd9Sstevel@tonic-gate } 1130bd670b35SErik Nordmark /* ftable_lookup did round robin */ 11317c478bd9Sstevel@tonic-gate return (ire); 11327c478bd9Sstevel@tonic-gate } 1133bd670b35SErik Nordmark 1134bd670b35SErik Nordmark ire_t * 113544b099c4SSowmini Varadhan ip_select_route_v6(const in6_addr_t *dst, const in6_addr_t src, 113644b099c4SSowmini Varadhan ip_xmit_attr_t *ixa, uint_t *generationp, in6_addr_t *setsrcp, 113744b099c4SSowmini Varadhan int *errorp, boolean_t *multirtp) 1138bd670b35SErik Nordmark { 1139bd670b35SErik Nordmark ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4)); 1140bd670b35SErik Nordmark 114144b099c4SSowmini Varadhan return (ip_select_route(dst, src, ixa, generationp, setsrcp, errorp, 1142bd670b35SErik Nordmark multirtp)); 1143bd670b35SErik Nordmark } 1144bd670b35SErik Nordmark 11457c478bd9Sstevel@tonic-gate /* 1146bd670b35SErik Nordmark * Recursively look for a route to the destination. Can also match on 1147bd670b35SErik Nordmark * the zoneid, ill, and label. Used for the data paths. See also 1148bd670b35SErik Nordmark * ire_route_recursive_dstonly. 1149bd670b35SErik Nordmark * 11509e3469d3SErik Nordmark * If IRR_ALLOCATE is not set then we will only inspect the existing IREs; never 1151bd670b35SErik Nordmark * create an IRE_IF_CLONE. This is used on the receive side when we are not 1152bd670b35SErik Nordmark * forwarding. 11539e3469d3SErik Nordmark * If IRR_INCOMPLETE is set then we return the IRE even if we can't correctly 11549e3469d3SErik Nordmark * resolve the gateway. 1155bd670b35SErik Nordmark * 1156bd670b35SErik Nordmark * Note that this function never returns NULL. It returns an IRE_NOROUTE 1157bd670b35SErik Nordmark * instead. 1158bd670b35SErik Nordmark * 1159bd670b35SErik Nordmark * If we find any IRE_LOCAL|BROADCAST etc past the first iteration it 1160bd670b35SErik Nordmark * is an error. 1161bd670b35SErik Nordmark * Allow at most one RTF_INDIRECT. 11627c478bd9Sstevel@tonic-gate */ 1163bd670b35SErik Nordmark ire_t * 1164bd670b35SErik Nordmark ire_route_recursive_impl_v6(ire_t *ire, 1165bd670b35SErik Nordmark const in6_addr_t *nexthop, uint_t ire_type, const ill_t *ill_arg, 1166bd670b35SErik Nordmark zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args, 11679e3469d3SErik Nordmark uint_t irr_flags, uint32_t xmit_hint, ip_stack_t *ipst, 1168bd670b35SErik Nordmark in6_addr_t *setsrcp, tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp) 1169bd670b35SErik Nordmark { 1170bd670b35SErik Nordmark int i, j; 1171bd670b35SErik Nordmark in6_addr_t v6nexthop = *nexthop; 1172bd670b35SErik Nordmark ire_t *ires[MAX_IRE_RECURSION]; 1173bd670b35SErik Nordmark uint_t generation; 1174bd670b35SErik Nordmark uint_t generations[MAX_IRE_RECURSION]; 1175bd670b35SErik Nordmark boolean_t need_refrele = B_FALSE; 1176bd670b35SErik Nordmark boolean_t invalidate = B_FALSE; 1177bd670b35SErik Nordmark ill_t *ill = NULL; 117801685f97SSowmini Varadhan uint_t maskoff = (IRE_LOCAL|IRE_LOOPBACK); 1179bd670b35SErik Nordmark 1180bd670b35SErik Nordmark if (setsrcp != NULL) 1181bd670b35SErik Nordmark ASSERT(IN6_IS_ADDR_UNSPECIFIED(setsrcp)); 1182bd670b35SErik Nordmark if (gwattrp != NULL) 1183bd670b35SErik Nordmark ASSERT(*gwattrp == NULL); 1184bd670b35SErik Nordmark 1185bd670b35SErik Nordmark /* 1186bd670b35SErik Nordmark * We iterate up to three times to resolve a route, even though 1187bd670b35SErik Nordmark * we have four slots in the array. The extra slot is for an 1188bd670b35SErik Nordmark * IRE_IF_CLONE we might need to create. 1189bd670b35SErik Nordmark */ 1190bd670b35SErik Nordmark i = 0; 1191bd670b35SErik Nordmark while (i < MAX_IRE_RECURSION - 1) { 1192bd670b35SErik Nordmark /* ire_ftable_lookup handles round-robin/ECMP */ 11937c478bd9Sstevel@tonic-gate if (ire == NULL) { 1194bd670b35SErik Nordmark ire = ire_ftable_lookup_v6(&v6nexthop, 0, 0, ire_type, 119544b099c4SSowmini Varadhan (ill != NULL ? ill : ill_arg), zoneid, tsl, 1196bd670b35SErik Nordmark match_args, xmit_hint, ipst, &generation); 11977c478bd9Sstevel@tonic-gate } else { 1198bd670b35SErik Nordmark /* Caller passed it; extra hold since we will rele */ 1199bd670b35SErik Nordmark ire_refhold(ire); 1200bd670b35SErik Nordmark if (generationp != NULL) 1201bd670b35SErik Nordmark generation = *generationp; 1202bd670b35SErik Nordmark else 1203bd670b35SErik Nordmark generation = IRE_GENERATION_VERIFY; 12047c478bd9Sstevel@tonic-gate } 12057c478bd9Sstevel@tonic-gate 120601685f97SSowmini Varadhan if (ire == NULL) { 120701685f97SSowmini Varadhan if (i > 0 && (irr_flags & IRR_INCOMPLETE)) { 12089e3469d3SErik Nordmark ire = ires[0]; 12099e3469d3SErik Nordmark ire_refhold(ire); 12109e3469d3SErik Nordmark } else { 1211bd670b35SErik Nordmark ire = ire_reject(ipst, B_TRUE); 12129e3469d3SErik Nordmark } 1213bd670b35SErik Nordmark goto error; 1214bd670b35SErik Nordmark } 121501685f97SSowmini Varadhan 121601685f97SSowmini Varadhan /* Need to return the ire with RTF_REJECT|BLACKHOLE */ 121701685f97SSowmini Varadhan if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) 121801685f97SSowmini Varadhan goto error; 121901685f97SSowmini Varadhan 122001685f97SSowmini Varadhan ASSERT(!(ire->ire_type & IRE_MULTICAST)); /* Not in ftable */ 1221*fff7ec1dSSowmini Varadhan /* 1222*fff7ec1dSSowmini Varadhan * Verify that the IRE_IF_CLONE has a consistent generation 1223*fff7ec1dSSowmini Varadhan * number. 1224*fff7ec1dSSowmini Varadhan */ 1225*fff7ec1dSSowmini Varadhan if ((ire->ire_type & IRE_IF_CLONE) && !ire_clone_verify(ire)) { 1226*fff7ec1dSSowmini Varadhan ire_refrele(ire); 1227*fff7ec1dSSowmini Varadhan ire = NULL; 1228*fff7ec1dSSowmini Varadhan continue; 1229*fff7ec1dSSowmini Varadhan } 123001685f97SSowmini Varadhan 123101685f97SSowmini Varadhan /* 123201685f97SSowmini Varadhan * Don't allow anything unusual past the first iteration. 123301685f97SSowmini Varadhan * After the first lookup, we should no longer look for 123401685f97SSowmini Varadhan * (IRE_LOCAL|IRE_LOOPBACK) or RTF_INDIRECT routes. 123501685f97SSowmini Varadhan * 123601685f97SSowmini Varadhan * In addition, after we have found a direct IRE_OFFLINK, 123701685f97SSowmini Varadhan * we should only look for interface or clone routes. 123801685f97SSowmini Varadhan */ 123901685f97SSowmini Varadhan match_args |= MATCH_IRE_DIRECT; /* no more RTF_INDIRECTs */ 124001685f97SSowmini Varadhan if ((ire->ire_type & IRE_OFFLINK) && 124101685f97SSowmini Varadhan !(ire->ire_flags & RTF_INDIRECT)) { 124201685f97SSowmini Varadhan ire_type = IRE_IF_ALL; 124301685f97SSowmini Varadhan } else { 124401685f97SSowmini Varadhan if (!(match_args & MATCH_IRE_TYPE)) 124501685f97SSowmini Varadhan ire_type = (IRE_OFFLINK|IRE_ONLINK); 124601685f97SSowmini Varadhan ire_type &= ~maskoff; /* no more LOCAL, LOOPBACK */ 1247bd670b35SErik Nordmark } 124801685f97SSowmini Varadhan match_args |= MATCH_IRE_TYPE; 1249bd670b35SErik Nordmark /* We have a usable IRE */ 1250bd670b35SErik Nordmark ires[i] = ire; 1251bd670b35SErik Nordmark generations[i] = generation; 1252bd670b35SErik Nordmark i++; 1253bd670b35SErik Nordmark 1254bd670b35SErik Nordmark /* The first RTF_SETSRC address is passed back if setsrcp */ 1255bd670b35SErik Nordmark if ((ire->ire_flags & RTF_SETSRC) && 1256bd670b35SErik Nordmark setsrcp != NULL && IN6_IS_ADDR_UNSPECIFIED(setsrcp)) { 1257bd670b35SErik Nordmark ASSERT(!IN6_IS_ADDR_UNSPECIFIED( 1258bd670b35SErik Nordmark &ire->ire_setsrc_addr_v6)); 1259bd670b35SErik Nordmark *setsrcp = ire->ire_setsrc_addr_v6; 12607c478bd9Sstevel@tonic-gate } 12617c478bd9Sstevel@tonic-gate 1262bd670b35SErik Nordmark /* The first ire_gw_secattr is passed back if gwattrp */ 1263bd670b35SErik Nordmark if (ire->ire_gw_secattr != NULL && 1264bd670b35SErik Nordmark gwattrp != NULL && *gwattrp == NULL) 1265bd670b35SErik Nordmark *gwattrp = ire->ire_gw_secattr; 1266bd670b35SErik Nordmark 1267bd670b35SErik Nordmark /* 1268bd670b35SErik Nordmark * Check if we have a short-cut pointer to an IRE for this 1269bd670b35SErik Nordmark * destination, and that the cached dependency isn't stale. 1270bd670b35SErik Nordmark * In that case we've rejoined an existing tree towards a 1271bd670b35SErik Nordmark * parent, thus we don't need to continue the loop to 1272bd670b35SErik Nordmark * discover the rest of the tree. 1273bd670b35SErik Nordmark */ 12747c478bd9Sstevel@tonic-gate mutex_enter(&ire->ire_lock); 1275bd670b35SErik Nordmark if (ire->ire_dep_parent != NULL && 1276bd670b35SErik Nordmark ire->ire_dep_parent->ire_generation == 1277bd670b35SErik Nordmark ire->ire_dep_parent_generation) { 12787c478bd9Sstevel@tonic-gate mutex_exit(&ire->ire_lock); 1279bd670b35SErik Nordmark ire = NULL; 1280bd670b35SErik Nordmark goto done; 1281bd670b35SErik Nordmark } 1282bd670b35SErik Nordmark mutex_exit(&ire->ire_lock); 12837c478bd9Sstevel@tonic-gate 1284bd670b35SErik Nordmark /* 1285bd670b35SErik Nordmark * If this type should have an ire_nce_cache (even if it 1286bd670b35SErik Nordmark * doesn't yet have one) then we are done. Includes 1287bd670b35SErik Nordmark * IRE_INTERFACE with a full 128 bit mask. 1288bd670b35SErik Nordmark */ 1289bd670b35SErik Nordmark if (ire->ire_nce_capable) { 1290bd670b35SErik Nordmark ire = NULL; 1291bd670b35SErik Nordmark goto done; 12927c478bd9Sstevel@tonic-gate } 1293bd670b35SErik Nordmark ASSERT(!(ire->ire_type & IRE_IF_CLONE)); 1294bd670b35SErik Nordmark /* 1295bd670b35SErik Nordmark * For an IRE_INTERFACE we create an IRE_IF_CLONE for this 1296bd670b35SErik Nordmark * particular destination 1297bd670b35SErik Nordmark */ 1298bd670b35SErik Nordmark if (ire->ire_type & IRE_INTERFACE) { 1299bd670b35SErik Nordmark ire_t *clone; 13007c478bd9Sstevel@tonic-gate 1301bd670b35SErik Nordmark ASSERT(ire->ire_masklen != IPV6_ABITS); 13027c478bd9Sstevel@tonic-gate 1303bd670b35SErik Nordmark /* 1304bd670b35SErik Nordmark * In the case of ip_input and ILLF_FORWARDING not 13059e3469d3SErik Nordmark * being set, and in the case of RTM_GET, there is 13069e3469d3SErik Nordmark * no point in allocating an IRE_IF_CLONE. We return 13079e3469d3SErik Nordmark * the IRE_INTERFACE. Note that !IRR_ALLOCATE can 13089e3469d3SErik Nordmark * result in a ire_dep_parent which is IRE_IF_* 13099e3469d3SErik Nordmark * without an IRE_IF_CLONE. 1310bd670b35SErik Nordmark * We recover from that when we need to send packets 1311bd670b35SErik Nordmark * by ensuring that the generations become 1312bd670b35SErik Nordmark * IRE_GENERATION_VERIFY in this case. 1313bd670b35SErik Nordmark */ 13149e3469d3SErik Nordmark if (!(irr_flags & IRR_ALLOCATE)) { 1315bd670b35SErik Nordmark invalidate = B_TRUE; 1316bd670b35SErik Nordmark ire = NULL; 1317bd670b35SErik Nordmark goto done; 1318bd670b35SErik Nordmark } 1319bd670b35SErik Nordmark 1320bd670b35SErik Nordmark clone = ire_create_if_clone(ire, &v6nexthop, 1321bd670b35SErik Nordmark &generation); 1322bd670b35SErik Nordmark if (clone == NULL) { 1323bd670b35SErik Nordmark /* 1324bd670b35SErik Nordmark * Temporary failure - no memory. 1325bd670b35SErik Nordmark * Don't want caller to cache IRE_NOROUTE. 1326bd670b35SErik Nordmark */ 1327bd670b35SErik Nordmark invalidate = B_TRUE; 1328bd670b35SErik Nordmark ire = ire_blackhole(ipst, B_TRUE); 1329bd670b35SErik Nordmark goto error; 1330bd670b35SErik Nordmark } 1331bd670b35SErik Nordmark /* 1332bd670b35SErik Nordmark * Make clone next to last entry and the 1333bd670b35SErik Nordmark * IRE_INTERFACE the last in the dependency 1334bd670b35SErik Nordmark * chain since the clone depends on the 1335bd670b35SErik Nordmark * IRE_INTERFACE. 1336bd670b35SErik Nordmark */ 1337bd670b35SErik Nordmark ASSERT(i >= 1); 1338bd670b35SErik Nordmark ASSERT(i < MAX_IRE_RECURSION); 1339bd670b35SErik Nordmark 1340bd670b35SErik Nordmark ires[i] = ires[i-1]; 1341bd670b35SErik Nordmark generations[i] = generations[i-1]; 1342bd670b35SErik Nordmark ires[i-1] = clone; 1343bd670b35SErik Nordmark generations[i-1] = generation; 1344bd670b35SErik Nordmark i++; 1345bd670b35SErik Nordmark 1346bd670b35SErik Nordmark ire = NULL; 1347bd670b35SErik Nordmark goto done; 13487c478bd9Sstevel@tonic-gate } 13495b17e9bdSJon Anderson 13505b17e9bdSJon Anderson /* 1351bd670b35SErik Nordmark * We only match on the type and optionally ILL when 1352bd670b35SErik Nordmark * recursing. The type match is used by some callers 1353bd670b35SErik Nordmark * to exclude certain types (such as IRE_IF_CLONE or 1354bd670b35SErik Nordmark * IRE_LOCAL|IRE_LOOPBACK). 135544b099c4SSowmini Varadhan * 135644b099c4SSowmini Varadhan * In the MATCH_IRE_SRC_ILL case, ill_arg may be the 'srcof' 135744b099c4SSowmini Varadhan * ire->ire_ill, and we want to find the IRE_INTERFACE for 135844b099c4SSowmini Varadhan * ire_ill, so we set ill to the ire_ill 13595b17e9bdSJon Anderson */ 136001685f97SSowmini Varadhan match_args &= (MATCH_IRE_TYPE | MATCH_IRE_DIRECT); 1361bd670b35SErik Nordmark v6nexthop = ire->ire_gateway_addr_v6; 1362bd670b35SErik Nordmark if (ill == NULL && ire->ire_ill != NULL) { 1363bd670b35SErik Nordmark ill = ire->ire_ill; 1364bd670b35SErik Nordmark need_refrele = B_TRUE; 1365bd670b35SErik Nordmark ill_refhold(ill); 1366bd670b35SErik Nordmark match_args |= MATCH_IRE_ILL; 13675b17e9bdSJon Anderson } 1368bd670b35SErik Nordmark ire = NULL; 1369bd670b35SErik Nordmark } 1370bd670b35SErik Nordmark ASSERT(ire == NULL); 1371bd670b35SErik Nordmark ire = ire_reject(ipst, B_TRUE); 1372bd670b35SErik Nordmark 1373bd670b35SErik Nordmark error: 1374bd670b35SErik Nordmark ASSERT(ire != NULL); 1375bd670b35SErik Nordmark if (need_refrele) 1376bd670b35SErik Nordmark ill_refrele(ill); 1377bd670b35SErik Nordmark 1378bd670b35SErik Nordmark /* 1379bd670b35SErik Nordmark * In the case of MULTIRT we want to try a different IRE the next 1380bd670b35SErik Nordmark * time. We let the next packet retry in that case. 1381bd670b35SErik Nordmark */ 1382bd670b35SErik Nordmark if (i > 0 && (ires[0]->ire_flags & RTF_MULTIRT)) 1383bd670b35SErik Nordmark (void) ire_no_good(ires[0]); 1384bd670b35SErik Nordmark 1385bd670b35SErik Nordmark cleanup: 1386bd670b35SErik Nordmark /* cleanup ires[i] */ 1387bd670b35SErik Nordmark ire_dep_unbuild(ires, i); 1388bd670b35SErik Nordmark for (j = 0; j < i; j++) 1389bd670b35SErik Nordmark ire_refrele(ires[j]); 1390bd670b35SErik Nordmark 13919e3469d3SErik Nordmark ASSERT((ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 13929e3469d3SErik Nordmark (irr_flags & IRR_INCOMPLETE)); 1393bd670b35SErik Nordmark /* 1394bd670b35SErik Nordmark * Use IRE_GENERATION_VERIFY to ensure that ip_output will redo the 1395bd670b35SErik Nordmark * ip_select_route since the reject or lack of memory might be gone. 1396bd670b35SErik Nordmark */ 1397bd670b35SErik Nordmark if (generationp != NULL) 1398bd670b35SErik Nordmark *generationp = IRE_GENERATION_VERIFY; 1399bd670b35SErik Nordmark return (ire); 1400bd670b35SErik Nordmark 1401bd670b35SErik Nordmark done: 1402bd670b35SErik Nordmark ASSERT(ire == NULL); 1403bd670b35SErik Nordmark if (need_refrele) 1404bd670b35SErik Nordmark ill_refrele(ill); 1405bd670b35SErik Nordmark 1406bd670b35SErik Nordmark /* Build dependencies */ 1407188e1664SErik Nordmark if (i > 1 && !ire_dep_build(ires, generations, i)) { 1408bd670b35SErik Nordmark /* Something in chain was condemned; tear it apart */ 1409bd670b35SErik Nordmark ire = ire_blackhole(ipst, B_TRUE); 1410bd670b35SErik Nordmark goto cleanup; 1411bd670b35SErik Nordmark } 1412bd670b35SErik Nordmark 1413bd670b35SErik Nordmark /* 1414bd670b35SErik Nordmark * Release all refholds except the one for ires[0] that we 1415bd670b35SErik Nordmark * will return to the caller. 1416bd670b35SErik Nordmark */ 1417bd670b35SErik Nordmark for (j = 1; j < i; j++) 1418bd670b35SErik Nordmark ire_refrele(ires[j]); 1419bd670b35SErik Nordmark 1420bd670b35SErik Nordmark if (invalidate) { 1421bd670b35SErik Nordmark /* 1422bd670b35SErik Nordmark * Since we needed to allocate but couldn't we need to make 1423bd670b35SErik Nordmark * sure that the dependency chain is rebuilt the next time. 1424bd670b35SErik Nordmark */ 1425bd670b35SErik Nordmark ire_dep_invalidate_generations(ires[0]); 1426bd670b35SErik Nordmark generation = IRE_GENERATION_VERIFY; 1427bd670b35SErik Nordmark } else { 1428bd670b35SErik Nordmark /* 1429bd670b35SErik Nordmark * IREs can have been added or deleted while we did the 1430bd670b35SErik Nordmark * recursive lookup and we can't catch those until we've built 1431bd670b35SErik Nordmark * the dependencies. We verify the stored 1432bd670b35SErik Nordmark * ire_dep_parent_generation to catch any such changes and 1433bd670b35SErik Nordmark * return IRE_GENERATION_VERIFY (which will cause 1434bd670b35SErik Nordmark * ip_select_route to be called again so we can redo the 1435bd670b35SErik Nordmark * recursive lookup next time we send a packet. 1436bd670b35SErik Nordmark */ 1437188e1664SErik Nordmark if (ires[0]->ire_dep_parent == NULL) 1438188e1664SErik Nordmark generation = ires[0]->ire_generation; 1439188e1664SErik Nordmark else 1440bd670b35SErik Nordmark generation = ire_dep_validate_generations(ires[0]); 1441bd670b35SErik Nordmark if (generations[0] != ires[0]->ire_generation) { 1442bd670b35SErik Nordmark /* Something changed at the top */ 1443bd670b35SErik Nordmark generation = IRE_GENERATION_VERIFY; 1444bd670b35SErik Nordmark } 1445bd670b35SErik Nordmark } 1446bd670b35SErik Nordmark if (generationp != NULL) 1447bd670b35SErik Nordmark *generationp = generation; 1448bd670b35SErik Nordmark 1449bd670b35SErik Nordmark return (ires[0]); 1450bd670b35SErik Nordmark } 1451bd670b35SErik Nordmark 1452bd670b35SErik Nordmark ire_t * 1453bd670b35SErik Nordmark ire_route_recursive_v6(const in6_addr_t *nexthop, uint_t ire_type, 1454bd670b35SErik Nordmark const ill_t *ill, zoneid_t zoneid, const ts_label_t *tsl, uint_t match_args, 14559e3469d3SErik Nordmark uint_t irr_flags, uint32_t xmit_hint, ip_stack_t *ipst, 1456bd670b35SErik Nordmark in6_addr_t *setsrcp, tsol_ire_gw_secattr_t **gwattrp, uint_t *generationp) 1457bd670b35SErik Nordmark { 1458bd670b35SErik Nordmark return (ire_route_recursive_impl_v6(NULL, nexthop, ire_type, ill, 14599e3469d3SErik Nordmark zoneid, tsl, match_args, irr_flags, xmit_hint, ipst, setsrcp, 1460bd670b35SErik Nordmark gwattrp, generationp)); 1461bd670b35SErik Nordmark } 1462bd670b35SErik Nordmark 1463bd670b35SErik Nordmark /* 1464bd670b35SErik Nordmark * Recursively look for a route to the destination. 1465bd670b35SErik Nordmark * We only handle a destination match here, yet we have the same arguments 1466bd670b35SErik Nordmark * as the full match to allow function pointers to select between the two. 1467bd670b35SErik Nordmark * 1468bd670b35SErik Nordmark * Note that this function never returns NULL. It returns an IRE_NOROUTE 1469bd670b35SErik Nordmark * instead. 1470bd670b35SErik Nordmark * 1471bd670b35SErik Nordmark * If we find any IRE_LOCAL|BROADCAST etc past the first iteration it 1472bd670b35SErik Nordmark * is an error. 1473bd670b35SErik Nordmark * Allow at most one RTF_INDIRECT. 1474bd670b35SErik Nordmark */ 1475bd670b35SErik Nordmark ire_t * 14769e3469d3SErik Nordmark ire_route_recursive_dstonly_v6(const in6_addr_t *nexthop, uint_t irr_flags, 1477bd670b35SErik Nordmark uint32_t xmit_hint, ip_stack_t *ipst) 1478bd670b35SErik Nordmark { 1479bd670b35SErik Nordmark ire_t *ire; 1480bd670b35SErik Nordmark ire_t *ire1; 1481bd670b35SErik Nordmark uint_t generation; 1482bd670b35SErik Nordmark 1483bd670b35SErik Nordmark /* ire_ftable_lookup handles round-robin/ECMP */ 1484bd670b35SErik Nordmark ire = ire_ftable_lookup_simple_v6(nexthop, xmit_hint, ipst, 1485bd670b35SErik Nordmark &generation); 1486bd670b35SErik Nordmark ASSERT(ire != NULL); 1487bd670b35SErik Nordmark 1488bd670b35SErik Nordmark /* 1489*fff7ec1dSSowmini Varadhan * If the IRE has a current cached parent we know that the whole 1490*fff7ec1dSSowmini Varadhan * parent chain is current, hence we don't need to discover and 1491*fff7ec1dSSowmini Varadhan * build any dependencies by doing a recursive lookup. 1492*fff7ec1dSSowmini Varadhan */ 1493*fff7ec1dSSowmini Varadhan mutex_enter(&ire->ire_lock); 1494*fff7ec1dSSowmini Varadhan if (ire->ire_dep_parent != NULL) { 1495*fff7ec1dSSowmini Varadhan if (ire->ire_dep_parent->ire_generation == 1496*fff7ec1dSSowmini Varadhan ire->ire_dep_parent_generation) { 1497*fff7ec1dSSowmini Varadhan mutex_exit(&ire->ire_lock); 1498*fff7ec1dSSowmini Varadhan return (ire); 1499*fff7ec1dSSowmini Varadhan } 1500*fff7ec1dSSowmini Varadhan mutex_exit(&ire->ire_lock); 1501*fff7ec1dSSowmini Varadhan } else { 1502*fff7ec1dSSowmini Varadhan mutex_exit(&ire->ire_lock); 1503*fff7ec1dSSowmini Varadhan /* 1504bd670b35SErik Nordmark * If this type should have an ire_nce_cache (even if it 1505bd670b35SErik Nordmark * doesn't yet have one) then we are done. Includes 1506bd670b35SErik Nordmark * IRE_INTERFACE with a full 128 bit mask. 1507bd670b35SErik Nordmark */ 1508bd670b35SErik Nordmark if (ire->ire_nce_capable) 1509bd670b35SErik Nordmark return (ire); 15105b17e9bdSJon Anderson } 15115b17e9bdSJon Anderson 1512bd670b35SErik Nordmark /* 1513bd670b35SErik Nordmark * Fallback to loop in the normal code starting with the ire 1514bd670b35SErik Nordmark * we found. Normally this would return the same ire. 1515bd670b35SErik Nordmark */ 1516bd670b35SErik Nordmark ire1 = ire_route_recursive_impl_v6(ire, nexthop, 0, NULL, ALL_ZONES, 15179e3469d3SErik Nordmark NULL, MATCH_IRE_DSTONLY, irr_flags, xmit_hint, ipst, NULL, NULL, 1518bd670b35SErik Nordmark &generation); 1519bd670b35SErik Nordmark ire_refrele(ire); 1520bd670b35SErik Nordmark return (ire1); 15215b17e9bdSJon Anderson } 1522