1bd670b35SErik Nordmark /* 2bd670b35SErik Nordmark * CDDL HEADER START 3bd670b35SErik Nordmark * 4bd670b35SErik Nordmark * The contents of this file are subject to the terms of the 5bd670b35SErik Nordmark * Common Development and Distribution License (the "License"). 6bd670b35SErik Nordmark * You may not use this file except in compliance with the License. 7bd670b35SErik Nordmark * 8bd670b35SErik Nordmark * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9bd670b35SErik Nordmark * or http://www.opensolaris.org/os/licensing. 10bd670b35SErik Nordmark * See the License for the specific language governing permissions 11bd670b35SErik Nordmark * and limitations under the License. 12bd670b35SErik Nordmark * 13bd670b35SErik Nordmark * When distributing Covered Code, include this CDDL HEADER in each 14bd670b35SErik Nordmark * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15bd670b35SErik Nordmark * If applicable, add the following below this CDDL HEADER, with the 16bd670b35SErik Nordmark * fields enclosed by brackets "[]" replaced with your own identifying 17bd670b35SErik Nordmark * information: Portions Copyright [yyyy] [name of copyright owner] 18bd670b35SErik Nordmark * 19bd670b35SErik Nordmark * CDDL HEADER END 20bd670b35SErik Nordmark */ 21bd670b35SErik Nordmark 22bd670b35SErik Nordmark /* 239e3469d3SErik Nordmark * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24bd670b35SErik Nordmark * Use is subject to license terms. 25bd670b35SErik Nordmark */ 26bd670b35SErik Nordmark /* Copyright (c) 1990 Mentat Inc. */ 27bd670b35SErik Nordmark 28bd670b35SErik Nordmark #include <sys/types.h> 29bd670b35SErik Nordmark #include <sys/stream.h> 30bd670b35SErik Nordmark #include <sys/strsubr.h> 31bd670b35SErik Nordmark #include <sys/dlpi.h> 32bd670b35SErik Nordmark #include <sys/strsun.h> 33bd670b35SErik Nordmark #include <sys/zone.h> 34bd670b35SErik Nordmark #include <sys/ddi.h> 35bd670b35SErik Nordmark #include <sys/sunddi.h> 36bd670b35SErik Nordmark #include <sys/cmn_err.h> 37bd670b35SErik Nordmark #include <sys/debug.h> 38bd670b35SErik Nordmark #include <sys/atomic.h> 39bd670b35SErik Nordmark 40bd670b35SErik Nordmark #include <sys/systm.h> 41bd670b35SErik Nordmark #include <sys/param.h> 42bd670b35SErik Nordmark #include <sys/kmem.h> 43bd670b35SErik Nordmark #include <sys/sdt.h> 44bd670b35SErik Nordmark #include <sys/socket.h> 45bd670b35SErik Nordmark #include <sys/mac.h> 46bd670b35SErik Nordmark #include <net/if.h> 47bd670b35SErik Nordmark #include <net/if_arp.h> 48bd670b35SErik Nordmark #include <net/route.h> 49bd670b35SErik Nordmark #include <sys/sockio.h> 50bd670b35SErik Nordmark #include <netinet/in.h> 51bd670b35SErik Nordmark #include <net/if_dl.h> 52bd670b35SErik Nordmark 53bd670b35SErik Nordmark #include <inet/common.h> 54bd670b35SErik Nordmark #include <inet/mi.h> 55bd670b35SErik Nordmark #include <inet/mib2.h> 56bd670b35SErik Nordmark #include <inet/nd.h> 57bd670b35SErik Nordmark #include <inet/arp.h> 58bd670b35SErik Nordmark #include <inet/snmpcom.h> 59bd670b35SErik Nordmark #include <inet/kstatcom.h> 60bd670b35SErik Nordmark 61bd670b35SErik Nordmark #include <netinet/igmp_var.h> 62bd670b35SErik Nordmark #include <netinet/ip6.h> 63bd670b35SErik Nordmark #include <netinet/icmp6.h> 64bd670b35SErik Nordmark #include <netinet/sctp.h> 65bd670b35SErik Nordmark 66bd670b35SErik Nordmark #include <inet/ip.h> 67bd670b35SErik Nordmark #include <inet/ip_impl.h> 68bd670b35SErik Nordmark #include <inet/ip6.h> 69bd670b35SErik Nordmark #include <inet/ip6_asp.h> 70bd670b35SErik Nordmark #include <inet/tcp.h> 71bd670b35SErik Nordmark #include <inet/ip_multi.h> 72bd670b35SErik Nordmark #include <inet/ip_if.h> 73bd670b35SErik Nordmark #include <inet/ip_ire.h> 74bd670b35SErik Nordmark #include <inet/ip_ftable.h> 75bd670b35SErik Nordmark #include <inet/ip_rts.h> 76bd670b35SErik Nordmark #include <inet/optcom.h> 77bd670b35SErik Nordmark #include <inet/ip_ndp.h> 78bd670b35SErik Nordmark #include <inet/ip_listutils.h> 79bd670b35SErik Nordmark #include <netinet/igmp.h> 80bd670b35SErik Nordmark #include <netinet/ip_mroute.h> 81bd670b35SErik Nordmark #include <inet/ipp_common.h> 82bd670b35SErik Nordmark 83bd670b35SErik Nordmark #include <net/pfkeyv2.h> 84bd670b35SErik Nordmark #include <inet/sadb.h> 85bd670b35SErik Nordmark #include <inet/ipsec_impl.h> 86bd670b35SErik Nordmark #include <inet/ipdrop.h> 87bd670b35SErik Nordmark #include <inet/ip_netinfo.h> 88bd670b35SErik Nordmark 89bd670b35SErik Nordmark #include <sys/pattr.h> 90bd670b35SErik Nordmark #include <inet/ipclassifier.h> 91bd670b35SErik Nordmark #include <inet/sctp_ip.h> 92bd670b35SErik Nordmark #include <inet/sctp/sctp_impl.h> 93bd670b35SErik Nordmark #include <inet/udp_impl.h> 94bd670b35SErik Nordmark #include <sys/sunddi.h> 95bd670b35SErik Nordmark 96bd670b35SErik Nordmark #include <sys/tsol/label.h> 97bd670b35SErik Nordmark #include <sys/tsol/tnet.h> 98bd670b35SErik Nordmark 99bd670b35SErik Nordmark #ifdef DEBUG 100bd670b35SErik Nordmark extern boolean_t skip_sctp_cksum; 101bd670b35SErik Nordmark #endif 102bd670b35SErik Nordmark 103bd670b35SErik Nordmark int 104bd670b35SErik Nordmark ip_output_simple_v6(mblk_t *mp, ip_xmit_attr_t *ixa) 105bd670b35SErik Nordmark { 106bd670b35SErik Nordmark ip6_t *ip6h; 107bd670b35SErik Nordmark in6_addr_t firsthop; /* In IP header */ 108bd670b35SErik Nordmark in6_addr_t dst; /* End of source route, or ip6_dst if none */ 109bd670b35SErik Nordmark ire_t *ire; 110bd670b35SErik Nordmark in6_addr_t setsrc; 111bd670b35SErik Nordmark int error; 112bd670b35SErik Nordmark ill_t *ill = NULL; 113bd670b35SErik Nordmark dce_t *dce = NULL; 114bd670b35SErik Nordmark nce_t *nce; 115bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 116bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 117bd670b35SErik Nordmark uint8_t *nexthdrp; 118bd670b35SErik Nordmark boolean_t repeat = B_FALSE; 119bd670b35SErik Nordmark boolean_t multirt = B_FALSE; 120bd670b35SErik Nordmark uint_t ifindex; 121d3d50737SRafael Vanoni int64_t now; 122bd670b35SErik Nordmark 123bd670b35SErik Nordmark ip6h = (ip6_t *)mp->b_rptr; 124bd670b35SErik Nordmark ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 125bd670b35SErik Nordmark 126bd670b35SErik Nordmark ASSERT(ixa->ixa_nce == NULL); 127bd670b35SErik Nordmark 128bd670b35SErik Nordmark ixa->ixa_pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 129bd670b35SErik Nordmark ASSERT(ixa->ixa_pktlen == msgdsize(mp)); 130bd670b35SErik Nordmark if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ixa->ixa_ip_hdr_length, 131bd670b35SErik Nordmark &nexthdrp)) { 132bd670b35SErik Nordmark /* Malformed packet */ 133bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 134bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 135bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, NULL); 136bd670b35SErik Nordmark freemsg(mp); 137bd670b35SErik Nordmark return (EINVAL); 138bd670b35SErik Nordmark } 139bd670b35SErik Nordmark ixa->ixa_protocol = *nexthdrp; 140bd670b35SErik Nordmark 141bd670b35SErik Nordmark /* 142bd670b35SErik Nordmark * Assumes that source routed packets have already been massaged by 143bd670b35SErik Nordmark * the ULP (ip_massage_options_v6) and as a result ip6_dst is the next 144bd670b35SErik Nordmark * hop in the source route. The final destination is used for IPsec 145bd670b35SErik Nordmark * policy and DCE lookup. 146bd670b35SErik Nordmark */ 147bd670b35SErik Nordmark firsthop = ip6h->ip6_dst; 148bd670b35SErik Nordmark dst = ip_get_dst_v6(ip6h, mp, NULL); 149bd670b35SErik Nordmark 150bd670b35SErik Nordmark repeat_ire: 151bd670b35SErik Nordmark error = 0; 152bd670b35SErik Nordmark setsrc = ipv6_all_zeros; 153*44b099c4SSowmini Varadhan ire = ip_select_route_v6(&firsthop, ip6h->ip6_src, ixa, NULL, &setsrc, 154*44b099c4SSowmini Varadhan &error, &multirt); 155bd670b35SErik Nordmark ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 156bd670b35SErik Nordmark if (error != 0) { 157bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 158bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 159bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, NULL); 160bd670b35SErik Nordmark freemsg(mp); 161bd670b35SErik Nordmark goto done; 162bd670b35SErik Nordmark } 163bd670b35SErik Nordmark 164bd670b35SErik Nordmark if (ire->ire_flags & (RTF_BLACKHOLE|RTF_REJECT)) { 165bd670b35SErik Nordmark /* ire_ill might be NULL hence need to skip some code */ 166bd670b35SErik Nordmark if (ixaflags & IXAF_SET_SOURCE) 167bd670b35SErik Nordmark ip6h->ip6_src = ipv6_loopback; 168bd670b35SErik Nordmark ixa->ixa_fragsize = IP_MAXPACKET; 169bd670b35SErik Nordmark ire->ire_ob_pkt_count++; 170bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 171bd670b35SErik Nordmark /* No dce yet; use default one */ 172bd670b35SErik Nordmark error = (ire->ire_sendfn)(ire, mp, ip6h, ixa, 173bd670b35SErik Nordmark &ipst->ips_dce_default->dce_ident); 174bd670b35SErik Nordmark goto done; 175bd670b35SErik Nordmark } 176bd670b35SErik Nordmark 177bd670b35SErik Nordmark /* Note that ip6_dst is only used for IRE_MULTICAST */ 178bd670b35SErik Nordmark nce = ire_to_nce(ire, INADDR_ANY, &ip6h->ip6_dst); 179bd670b35SErik Nordmark if (nce == NULL) { 180bd670b35SErik Nordmark /* Allocation failure? */ 181bd670b35SErik Nordmark ip_drop_output("ire_to_nce", mp, ill); 182bd670b35SErik Nordmark freemsg(mp); 183bd670b35SErik Nordmark error = ENOBUFS; 184bd670b35SErik Nordmark goto done; 185bd670b35SErik Nordmark } 186bd670b35SErik Nordmark if (nce->nce_is_condemned) { 187bd670b35SErik Nordmark nce_t *nce1; 188bd670b35SErik Nordmark 189bd670b35SErik Nordmark nce1 = ire_handle_condemned_nce(nce, ire, NULL, ip6h, B_TRUE); 190bd670b35SErik Nordmark nce_refrele(nce); 191bd670b35SErik Nordmark if (nce1 == NULL) { 192bd670b35SErik Nordmark if (!repeat) { 193bd670b35SErik Nordmark /* Try finding a better IRE */ 194bd670b35SErik Nordmark repeat = B_TRUE; 195bd670b35SErik Nordmark ire_refrele(ire); 196bd670b35SErik Nordmark goto repeat_ire; 197bd670b35SErik Nordmark } 198bd670b35SErik Nordmark /* Tried twice - drop packet */ 199bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 200bd670b35SErik Nordmark ip_drop_output("No nce", mp, ill); 201bd670b35SErik Nordmark freemsg(mp); 202bd670b35SErik Nordmark error = ENOBUFS; 203bd670b35SErik Nordmark goto done; 204bd670b35SErik Nordmark } 205bd670b35SErik Nordmark nce = nce1; 206bd670b35SErik Nordmark } 207bd670b35SErik Nordmark /* 208bd670b35SErik Nordmark * For multicast with multirt we have a flag passed back from 209bd670b35SErik Nordmark * ire_lookup_multi_ill_v6 since we don't have an IRE for each 210bd670b35SErik Nordmark * possible multicast address. 211bd670b35SErik Nordmark * We also need a flag for multicast since we can't check 212bd670b35SErik Nordmark * whether RTF_MULTIRT is set in ixa_ire for multicast. 213bd670b35SErik Nordmark */ 214bd670b35SErik Nordmark if (multirt) { 215bd670b35SErik Nordmark ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 216bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 217bd670b35SErik Nordmark } else { 218bd670b35SErik Nordmark ixa->ixa_postfragfn = ire->ire_postfragfn; 219bd670b35SErik Nordmark ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 220bd670b35SErik Nordmark } 221bd670b35SErik Nordmark ASSERT(ixa->ixa_nce == NULL); 222bd670b35SErik Nordmark ixa->ixa_nce = nce; 223bd670b35SErik Nordmark 224bd670b35SErik Nordmark /* 225bd670b35SErik Nordmark * Check for a dce_t with a path mtu. 226bd670b35SErik Nordmark */ 227bd670b35SErik Nordmark ifindex = 0; 228bd670b35SErik Nordmark if (IN6_IS_ADDR_LINKSCOPE(&dst)) 229bd670b35SErik Nordmark ifindex = nce->nce_common->ncec_ill->ill_phyint->phyint_ifindex; 230bd670b35SErik Nordmark 231bd670b35SErik Nordmark dce = dce_lookup_v6(&dst, ifindex, ipst, NULL); 232bd670b35SErik Nordmark ASSERT(dce != NULL); 233bd670b35SErik Nordmark 234bd670b35SErik Nordmark if (!(ixaflags & IXAF_PMTU_DISCOVERY)) { 235bd670b35SErik Nordmark ixa->ixa_fragsize = IPV6_MIN_MTU; 236bd670b35SErik Nordmark } else if (dce->dce_flags & DCEF_PMTU) { 237bd670b35SErik Nordmark /* 238bd670b35SErik Nordmark * To avoid a periodic timer to increase the path MTU we 239bd670b35SErik Nordmark * look at dce_last_change_time each time we send a packet. 240bd670b35SErik Nordmark */ 241d3d50737SRafael Vanoni now = ddi_get_lbolt64(); 242d3d50737SRafael Vanoni if (TICK_TO_SEC(now) - dce->dce_last_change_time > 243bd670b35SErik Nordmark ipst->ips_ip_pathmtu_interval) { 244bd670b35SErik Nordmark /* 245bd670b35SErik Nordmark * Older than 20 minutes. Drop the path MTU information. 246bd670b35SErik Nordmark */ 247bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 248bd670b35SErik Nordmark dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU); 249d3d50737SRafael Vanoni dce->dce_last_change_time = TICK_TO_SEC(now); 250bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 251bd670b35SErik Nordmark dce_increment_generation(dce); 252bd670b35SErik Nordmark ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire); 253bd670b35SErik Nordmark } else { 254bd670b35SErik Nordmark uint_t fragsize; 255bd670b35SErik Nordmark 256bd670b35SErik Nordmark fragsize = ip_get_base_mtu(nce->nce_ill, ire); 257bd670b35SErik Nordmark if (fragsize > dce->dce_pmtu) 258bd670b35SErik Nordmark fragsize = dce->dce_pmtu; 259bd670b35SErik Nordmark ixa->ixa_fragsize = fragsize; 260bd670b35SErik Nordmark } 261bd670b35SErik Nordmark } else { 262bd670b35SErik Nordmark ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire); 263bd670b35SErik Nordmark } 264bd670b35SErik Nordmark 265bd670b35SErik Nordmark /* 266bd670b35SErik Nordmark * We use use ire_nexthop_ill (and not ncec_ill) to avoid the under ipmp 267bd670b35SErik Nordmark * interface for source address selection. 268bd670b35SErik Nordmark */ 269bd670b35SErik Nordmark ill = ire_nexthop_ill(ire); 270bd670b35SErik Nordmark 271bd670b35SErik Nordmark if (ixaflags & IXAF_SET_SOURCE) { 272bd670b35SErik Nordmark in6_addr_t src; 273bd670b35SErik Nordmark 274bd670b35SErik Nordmark /* 275bd670b35SErik Nordmark * We use the final destination to get 276bd670b35SErik Nordmark * correct selection for source routed packets 277bd670b35SErik Nordmark */ 278bd670b35SErik Nordmark 279bd670b35SErik Nordmark /* If unreachable we have no ill but need some source */ 280bd670b35SErik Nordmark if (ill == NULL) { 281bd670b35SErik Nordmark src = ipv6_loopback; 282bd670b35SErik Nordmark error = 0; 283bd670b35SErik Nordmark } else { 284bd670b35SErik Nordmark error = ip_select_source_v6(ill, &setsrc, &dst, 285bd670b35SErik Nordmark ixa->ixa_zoneid, ipst, B_FALSE, 286bd670b35SErik Nordmark ixa->ixa_src_preferences, &src, NULL, NULL); 287bd670b35SErik Nordmark } 288bd670b35SErik Nordmark if (error != 0) { 289bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 290bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 291bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - no source", 292bd670b35SErik Nordmark mp, ill); 293bd670b35SErik Nordmark freemsg(mp); 294bd670b35SErik Nordmark goto done; 295bd670b35SErik Nordmark } 296bd670b35SErik Nordmark ip6h->ip6_src = src; 297bd670b35SErik Nordmark } else if (ixaflags & IXAF_VERIFY_SOURCE) { 298bd670b35SErik Nordmark /* Check if the IP source is assigned to the host. */ 299bd670b35SErik Nordmark if (!ip_verify_src(mp, ixa, NULL)) { 300bd670b35SErik Nordmark /* Don't send a packet with a source that isn't ours */ 301bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 302bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 303bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - invalid source", 304bd670b35SErik Nordmark mp, ill); 305bd670b35SErik Nordmark freemsg(mp); 306bd670b35SErik Nordmark error = EADDRNOTAVAIL; 307bd670b35SErik Nordmark goto done; 308bd670b35SErik Nordmark } 309bd670b35SErik Nordmark } 310bd670b35SErik Nordmark 311bd670b35SErik Nordmark /* 312bd670b35SErik Nordmark * Check against global IPsec policy to set the AH/ESP attributes. 313bd670b35SErik Nordmark * IPsec will set IXAF_IPSEC_* and ixa_ipsec_* as appropriate. 314bd670b35SErik Nordmark */ 315bd670b35SErik Nordmark if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) { 316bd670b35SErik Nordmark ASSERT(ixa->ixa_ipsec_policy == NULL); 317bd670b35SErik Nordmark mp = ip_output_attach_policy(mp, NULL, ip6h, NULL, ixa); 318bd670b35SErik Nordmark if (mp == NULL) { 319bd670b35SErik Nordmark /* MIB and ip_drop_packet already done */ 320bd670b35SErik Nordmark return (EHOSTUNREACH); /* IPsec policy failure */ 321bd670b35SErik Nordmark } 322bd670b35SErik Nordmark } 323bd670b35SErik Nordmark 324bd670b35SErik Nordmark if (ill != NULL) { 325bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 326bd670b35SErik Nordmark } else { 327bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 328bd670b35SErik Nordmark } 329bd670b35SErik Nordmark 330bd670b35SErik Nordmark /* 331bd670b35SErik Nordmark * We update the statistics on the most specific IRE i.e., the first 332bd670b35SErik Nordmark * one we found. 333bd670b35SErik Nordmark * We don't have an IRE when we fragment, hence ire_ob_pkt_count 334bd670b35SErik Nordmark * can only count the use prior to fragmentation. However the MIB 335bd670b35SErik Nordmark * counters on the ill will be incremented in post fragmentation. 336bd670b35SErik Nordmark */ 337bd670b35SErik Nordmark ire->ire_ob_pkt_count++; 338bd670b35SErik Nordmark 339bd670b35SErik Nordmark /* 340bd670b35SErik Nordmark * Based on ire_type and ire_flags call one of: 341bd670b35SErik Nordmark * ire_send_local_v6 - for IRE_LOCAL and IRE_LOOPBACK 342bd670b35SErik Nordmark * ire_send_multirt_v6 - if RTF_MULTIRT 343bd670b35SErik Nordmark * ire_send_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE 344bd670b35SErik Nordmark * ire_send_multicast_v6 - for IRE_MULTICAST 345bd670b35SErik Nordmark * ire_send_wire_v6 - for the rest. 346bd670b35SErik Nordmark */ 347bd670b35SErik Nordmark error = (ire->ire_sendfn)(ire, mp, ip6h, ixa, &dce->dce_ident); 348bd670b35SErik Nordmark done: 349bd670b35SErik Nordmark ire_refrele(ire); 350bd670b35SErik Nordmark if (dce != NULL) 351bd670b35SErik Nordmark dce_refrele(dce); 352bd670b35SErik Nordmark if (ill != NULL) 353bd670b35SErik Nordmark ill_refrele(ill); 354bd670b35SErik Nordmark if (ixa->ixa_nce != NULL) 355bd670b35SErik Nordmark nce_refrele(ixa->ixa_nce); 356bd670b35SErik Nordmark ixa->ixa_nce = NULL; 357bd670b35SErik Nordmark return (error); 358bd670b35SErik Nordmark } 359bd670b35SErik Nordmark 360bd670b35SErik Nordmark /* 361bd670b35SErik Nordmark * ire_sendfn() functions. 362bd670b35SErik Nordmark * These functions use the following xmit_attr: 363bd670b35SErik Nordmark * - ixa_fragsize - read to determine whether or not to fragment 364bd670b35SErik Nordmark * - IXAF_IPSEC_SECURE - to determine whether or not to invoke IPsec 365bd670b35SErik Nordmark * - ixa_ipsec_* are used inside IPsec 366bd670b35SErik Nordmark * - IXAF_LOOPBACK_COPY - for multicast 367bd670b35SErik Nordmark */ 368bd670b35SErik Nordmark 369bd670b35SErik Nordmark 370bd670b35SErik Nordmark /* 371bd670b35SErik Nordmark * ire_sendfn for IRE_LOCAL and IRE_LOOPBACK 372bd670b35SErik Nordmark * 373bd670b35SErik Nordmark * The checks for restrict_interzone_loopback are done in ire_route_recursive. 374bd670b35SErik Nordmark */ 375bd670b35SErik Nordmark /* ARGSUSED4 */ 376bd670b35SErik Nordmark int 377bd670b35SErik Nordmark ire_send_local_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 378bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 379bd670b35SErik Nordmark { 380bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)iph_arg; 381bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 382bd670b35SErik Nordmark ill_t *ill = ire->ire_ill; 383bd670b35SErik Nordmark ip_recv_attr_t iras; /* NOTE: No bzero for performance */ 384bd670b35SErik Nordmark uint_t pktlen = ixa->ixa_pktlen; 385bd670b35SErik Nordmark 386bd670b35SErik Nordmark /* 387bd670b35SErik Nordmark * No fragmentation, no nce, and no application of IPsec. 388bd670b35SErik Nordmark * 389bd670b35SErik Nordmark * 390bd670b35SErik Nordmark * Note different order between IP provider and FW_HOOKS than in 391bd670b35SErik Nordmark * send_wire case. 392bd670b35SErik Nordmark */ 393bd670b35SErik Nordmark 394bd670b35SErik Nordmark /* 395bd670b35SErik Nordmark * DTrace this as ip:::send. A packet blocked by FW_HOOKS will fire the 396bd670b35SErik Nordmark * send probe, but not the receive probe. 397bd670b35SErik Nordmark */ 398bd670b35SErik Nordmark DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 399bd670b35SErik Nordmark ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 400bd670b35SErik Nordmark int, 1); 401bd670b35SErik Nordmark 402bd670b35SErik Nordmark DTRACE_PROBE4(ip6__loopback__out__start, 403bd670b35SErik Nordmark ill_t *, NULL, ill_t *, ill, 404bd670b35SErik Nordmark ip6_t *, ip6h, mblk_t *, mp); 405bd670b35SErik Nordmark 406bd670b35SErik Nordmark if (HOOKS6_INTERESTED_LOOPBACK_OUT(ipst)) { 407bd670b35SErik Nordmark int error; 408bd670b35SErik Nordmark 409bd670b35SErik Nordmark FW_HOOKS(ipst->ips_ip6_loopback_out_event, 410bd670b35SErik Nordmark ipst->ips_ipv6firewall_loopback_out, 411bd670b35SErik Nordmark NULL, ill, ip6h, mp, mp, 0, ipst, error); 412bd670b35SErik Nordmark 413bd670b35SErik Nordmark DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, mp); 414bd670b35SErik Nordmark if (mp == NULL) 415bd670b35SErik Nordmark return (error); 416bd670b35SErik Nordmark 417bd670b35SErik Nordmark /* 418bd670b35SErik Nordmark * Even if the destination was changed by the filter we use the 419bd670b35SErik Nordmark * forwarding decision that was made based on the address 420bd670b35SErik Nordmark * in ip_output/ip_set_destination. 421bd670b35SErik Nordmark */ 422bd670b35SErik Nordmark /* Length could be different */ 423bd670b35SErik Nordmark ip6h = (ip6_t *)mp->b_rptr; 424bd670b35SErik Nordmark pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 425bd670b35SErik Nordmark } 426bd670b35SErik Nordmark 427bd670b35SErik Nordmark /* 428bd670b35SErik Nordmark * If a callback is enabled then we need to know the 429bd670b35SErik Nordmark * source and destination zoneids for the packet. We already 430bd670b35SErik Nordmark * have those handy. 431bd670b35SErik Nordmark */ 432bd670b35SErik Nordmark if (ipst->ips_ip6_observe.he_interested) { 433bd670b35SErik Nordmark zoneid_t szone, dzone; 434bd670b35SErik Nordmark zoneid_t stackzoneid; 435bd670b35SErik Nordmark 436bd670b35SErik Nordmark stackzoneid = netstackid_to_zoneid( 437bd670b35SErik Nordmark ipst->ips_netstack->netstack_stackid); 438bd670b35SErik Nordmark 439bd670b35SErik Nordmark if (stackzoneid == GLOBAL_ZONEID) { 440bd670b35SErik Nordmark /* Shared-IP zone */ 441bd670b35SErik Nordmark dzone = ire->ire_zoneid; 442bd670b35SErik Nordmark szone = ixa->ixa_zoneid; 443bd670b35SErik Nordmark } else { 444bd670b35SErik Nordmark szone = dzone = stackzoneid; 445bd670b35SErik Nordmark } 446bd670b35SErik Nordmark ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst); 447bd670b35SErik Nordmark } 448bd670b35SErik Nordmark 449bd670b35SErik Nordmark /* Handle lo0 stats */ 450bd670b35SErik Nordmark ipst->ips_loopback_packets++; 451bd670b35SErik Nordmark 452bd670b35SErik Nordmark /* 453bd670b35SErik Nordmark * Update output mib stats. Note that we can't move into the icmp 454bd670b35SErik Nordmark * sender (icmp_output etc) since they don't know the ill and the 455bd670b35SErik Nordmark * stats are per ill. 456bd670b35SErik Nordmark */ 457bd670b35SErik Nordmark if (ixa->ixa_protocol == IPPROTO_ICMPV6) { 458bd670b35SErik Nordmark icmp6_t *icmp6; 459bd670b35SErik Nordmark 460bd670b35SErik Nordmark icmp6 = (icmp6_t *)((uchar_t *)ip6h + ixa->ixa_ip_hdr_length); 461bd670b35SErik Nordmark icmp_update_out_mib_v6(ill, icmp6); 462bd670b35SErik Nordmark } 463bd670b35SErik Nordmark 464bd670b35SErik Nordmark DTRACE_PROBE4(ip6__loopback__in__start, 465bd670b35SErik Nordmark ill_t *, ill, ill_t *, NULL, 466bd670b35SErik Nordmark ip6_t *, ip6h, mblk_t *, mp); 467bd670b35SErik Nordmark 468bd670b35SErik Nordmark if (HOOKS6_INTERESTED_LOOPBACK_IN(ipst)) { 469bd670b35SErik Nordmark int error; 470bd670b35SErik Nordmark 471bd670b35SErik Nordmark FW_HOOKS(ipst->ips_ip6_loopback_in_event, 472bd670b35SErik Nordmark ipst->ips_ipv6firewall_loopback_in, 473bd670b35SErik Nordmark ill, NULL, ip6h, mp, mp, 0, ipst, error); 474bd670b35SErik Nordmark 475bd670b35SErik Nordmark DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, mp); 476bd670b35SErik Nordmark if (mp == NULL) 477bd670b35SErik Nordmark return (error); 478bd670b35SErik Nordmark 479bd670b35SErik Nordmark /* 480bd670b35SErik Nordmark * Even if the destination was changed by the filter we use the 481bd670b35SErik Nordmark * forwarding decision that was made based on the address 482bd670b35SErik Nordmark * in ip_output/ip_set_destination. 483bd670b35SErik Nordmark */ 484bd670b35SErik Nordmark /* Length could be different */ 485bd670b35SErik Nordmark ip6h = (ip6_t *)mp->b_rptr; 486bd670b35SErik Nordmark pktlen = ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN; 487bd670b35SErik Nordmark } 488bd670b35SErik Nordmark 489bd670b35SErik Nordmark DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 490bd670b35SErik Nordmark ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, ip6h, 491bd670b35SErik Nordmark int, 1); 492bd670b35SErik Nordmark 493bd670b35SErik Nordmark /* Map ixa to ira including IPsec policies */ 494bd670b35SErik Nordmark ipsec_out_to_in(ixa, ill, &iras); 495bd670b35SErik Nordmark iras.ira_pktlen = pktlen; 496bd670b35SErik Nordmark 497bd670b35SErik Nordmark ire->ire_ib_pkt_count++; 498bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 499bd670b35SErik Nordmark UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, pktlen); 500bd670b35SErik Nordmark 501bd670b35SErik Nordmark /* Destined to ire_zoneid - use that for fanout */ 502bd670b35SErik Nordmark iras.ira_zoneid = ire->ire_zoneid; 503bd670b35SErik Nordmark 504bd670b35SErik Nordmark if (is_system_labeled()) { 505bd670b35SErik Nordmark iras.ira_flags |= IRAF_SYSTEM_LABELED; 506bd670b35SErik Nordmark 507bd670b35SErik Nordmark /* 508bd670b35SErik Nordmark * This updates ira_cred, ira_tsl and ira_free_flags based 509bd670b35SErik Nordmark * on the label. We don't expect this to ever fail for 510bd670b35SErik Nordmark * loopback packets, so we silently drop the packet should it 511bd670b35SErik Nordmark * fail. 512bd670b35SErik Nordmark */ 513bd670b35SErik Nordmark if (!tsol_get_pkt_label(mp, IPV6_VERSION, &iras)) { 514bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 515bd670b35SErik Nordmark ip_drop_input("tsol_get_pkt_label", mp, ill); 516bd670b35SErik Nordmark freemsg(mp); 517bd670b35SErik Nordmark return (0); 518bd670b35SErik Nordmark } 519bd670b35SErik Nordmark ASSERT(iras.ira_tsl != NULL); 520bd670b35SErik Nordmark 521bd670b35SErik Nordmark /* tsol_get_pkt_label sometimes does pullupmsg */ 522bd670b35SErik Nordmark ip6h = (ip6_t *)mp->b_rptr; 523bd670b35SErik Nordmark } 524bd670b35SErik Nordmark 525bd670b35SErik Nordmark ip_fanout_v6(mp, ip6h, &iras); 526bd670b35SErik Nordmark 527bd670b35SErik Nordmark /* We moved any IPsec refs from ixa to iras */ 528bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 529bd670b35SErik Nordmark return (0); 530bd670b35SErik Nordmark } 531bd670b35SErik Nordmark 532bd670b35SErik Nordmark static void 533bd670b35SErik Nordmark multirt_check_v6(ire_t *ire, ip6_t *ip6h, ip_xmit_attr_t *ixa) 534bd670b35SErik Nordmark { 535bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 536bd670b35SErik Nordmark 537bd670b35SErik Nordmark /* Limit the TTL on multirt packets. Do this even if IPV6_HOPLIMIT */ 538bd670b35SErik Nordmark if (ire->ire_type & IRE_MULTICAST) { 539bd670b35SErik Nordmark if (ip6h->ip6_hops > 1) { 540bd670b35SErik Nordmark ip2dbg(("ire_send_multirt_v6: forcing multicast " 541bd670b35SErik Nordmark "multirt TTL to 1 (was %d)\n", ip6h->ip6_hops)); 542bd670b35SErik Nordmark ip6h->ip6_hops = 1; 543bd670b35SErik Nordmark } 544bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_NO_TTL_CHANGE; 545bd670b35SErik Nordmark } else if ((ipst->ips_ip_multirt_ttl > 0) && 546bd670b35SErik Nordmark (ip6h->ip6_hops > ipst->ips_ip_multirt_ttl)) { 547bd670b35SErik Nordmark ip6h->ip6_hops = ipst->ips_ip_multirt_ttl; 548bd670b35SErik Nordmark /* 549bd670b35SErik Nordmark * Need to ensure we don't increase the ttl should we go through 550bd670b35SErik Nordmark * ire_send_multicast. 551bd670b35SErik Nordmark */ 552bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_NO_TTL_CHANGE; 553bd670b35SErik Nordmark } 554bd670b35SErik Nordmark 555bd670b35SErik Nordmark /* For IPv6 this also needs to insert a fragment header */ 556bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_IPV6_ADD_FRAGHDR; 557bd670b35SErik Nordmark } 558bd670b35SErik Nordmark 559bd670b35SErik Nordmark /* 560bd670b35SErik Nordmark * ire_sendfn for IRE_MULTICAST 561bd670b35SErik Nordmark * 562bd670b35SErik Nordmark * Note that we do path MTU discovery by default for IPv6 multicast. But 563bd670b35SErik Nordmark * since unconnected UDP and RAW sockets don't set IXAF_PMTU_DISCOVERY 564bd670b35SErik Nordmark * only connected sockets get this by default. 565bd670b35SErik Nordmark */ 566bd670b35SErik Nordmark int 567bd670b35SErik Nordmark ire_send_multicast_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 568bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 569bd670b35SErik Nordmark { 570bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)iph_arg; 571bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 572bd670b35SErik Nordmark ill_t *ill = ire->ire_ill; 573bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 574bd670b35SErik Nordmark 575bd670b35SErik Nordmark /* 576bd670b35SErik Nordmark * The IRE_MULTICAST is the same whether or not multirt is in use. 577bd670b35SErik Nordmark * Hence we need special-case code. 578bd670b35SErik Nordmark */ 579bd670b35SErik Nordmark if (ixaflags & IXAF_MULTIRT_MULTICAST) 580bd670b35SErik Nordmark multirt_check_v6(ire, ip6h, ixa); 581bd670b35SErik Nordmark 582bd670b35SErik Nordmark /* 583bd670b35SErik Nordmark * Check if anything in ip_input_v6 wants a copy of the transmitted 584bd670b35SErik Nordmark * packet (after IPsec and fragmentation) 585bd670b35SErik Nordmark * 586bd670b35SErik Nordmark * 1. Multicast routers always need a copy unless SO_DONTROUTE is set 587bd670b35SErik Nordmark * RSVP and the rsvp daemon is an example of a 588bd670b35SErik Nordmark * protocol and user level process that 589bd670b35SErik Nordmark * handles it's own routing. Hence, it uses the 590bd670b35SErik Nordmark * SO_DONTROUTE option to accomplish this. 591bd670b35SErik Nordmark * 2. If the sender has set IP_MULTICAST_LOOP, then we just 592bd670b35SErik Nordmark * check whether there are any receivers for the group on the ill 593bd670b35SErik Nordmark * (ignoring the zoneid). 594bd670b35SErik Nordmark * 3. If IP_MULTICAST_LOOP is not set, then we check if there are 595bd670b35SErik Nordmark * any members in other shared-IP zones. 596bd670b35SErik Nordmark * If such members exist, then we indicate that the sending zone 597bd670b35SErik Nordmark * shouldn't get a loopback copy to preserve the IP_MULTICAST_LOOP 598bd670b35SErik Nordmark * behavior. 599bd670b35SErik Nordmark * 600bd670b35SErik Nordmark * When we loopback we skip hardware checksum to make sure loopback 601bd670b35SErik Nordmark * copy is checksumed. 602bd670b35SErik Nordmark * 603bd670b35SErik Nordmark * Note that ire_ill is the upper in the case of IPMP. 604bd670b35SErik Nordmark */ 605bd670b35SErik Nordmark ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM); 606bd670b35SErik Nordmark if (ipst->ips_ip_g_mrouter && ill->ill_mrouter_cnt > 0 && 607bd670b35SErik Nordmark !(ixaflags & IXAF_DONTROUTE)) { 608bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 609bd670b35SErik Nordmark } else if (ixaflags & IXAF_MULTICAST_LOOP) { 610bd670b35SErik Nordmark /* 611bd670b35SErik Nordmark * If this zone or any other zone has members then loopback 612bd670b35SErik Nordmark * a copy. 613bd670b35SErik Nordmark */ 614bd670b35SErik Nordmark if (ill_hasmembers_v6(ill, &ip6h->ip6_dst)) 615bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 616bd670b35SErik Nordmark } else if (ipst->ips_netstack->netstack_numzones > 1) { 617bd670b35SErik Nordmark /* 618bd670b35SErik Nordmark * This zone should not have a copy. But there are some other 619bd670b35SErik Nordmark * zones which might have members. 620bd670b35SErik Nordmark */ 621bd670b35SErik Nordmark if (ill_hasmembers_otherzones_v6(ill, &ip6h->ip6_dst, 622bd670b35SErik Nordmark ixa->ixa_zoneid)) { 623bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_NO_LOOP_ZONEID_SET; 624bd670b35SErik Nordmark ixa->ixa_no_loop_zoneid = ixa->ixa_zoneid; 625bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 626bd670b35SErik Nordmark } 627bd670b35SErik Nordmark } 628bd670b35SErik Nordmark 629bd670b35SErik Nordmark /* 630bd670b35SErik Nordmark * Unless IPV6_HOPLIMIT or ire_send_multirt_v6 already set a ttl, 631bd670b35SErik Nordmark * force the ttl to the IP_MULTICAST_TTL value 632bd670b35SErik Nordmark */ 633bd670b35SErik Nordmark if (!(ixaflags & IXAF_NO_TTL_CHANGE)) { 634bd670b35SErik Nordmark ip6h->ip6_hops = ixa->ixa_multicast_ttl; 635bd670b35SErik Nordmark } 636bd670b35SErik Nordmark 637bd670b35SErik Nordmark return (ire_send_wire_v6(ire, mp, ip6h, ixa, identp)); 638bd670b35SErik Nordmark } 639bd670b35SErik Nordmark 640bd670b35SErik Nordmark /* 641bd670b35SErik Nordmark * ire_sendfn for IREs with RTF_MULTIRT 642bd670b35SErik Nordmark */ 643bd670b35SErik Nordmark int 644bd670b35SErik Nordmark ire_send_multirt_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 645bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 646bd670b35SErik Nordmark { 647bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)iph_arg; 648bd670b35SErik Nordmark 649bd670b35SErik Nordmark multirt_check_v6(ire, ip6h, ixa); 650bd670b35SErik Nordmark 651bd670b35SErik Nordmark if (ire->ire_type & IRE_MULTICAST) 652bd670b35SErik Nordmark return (ire_send_multicast_v6(ire, mp, ip6h, ixa, identp)); 653bd670b35SErik Nordmark else 654bd670b35SErik Nordmark return (ire_send_wire_v6(ire, mp, ip6h, ixa, identp)); 655bd670b35SErik Nordmark } 656bd670b35SErik Nordmark 657bd670b35SErik Nordmark /* 658bd670b35SErik Nordmark * ire_sendfn for IREs with RTF_REJECT/RTF_BLACKHOLE, including IRE_NOROUTE 659bd670b35SErik Nordmark */ 660bd670b35SErik Nordmark /* ARGSUSED4 */ 661bd670b35SErik Nordmark int 662bd670b35SErik Nordmark ire_send_noroute_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 663bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 664bd670b35SErik Nordmark { 665bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)iph_arg; 666bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 667bd670b35SErik Nordmark ill_t *ill; 668bd670b35SErik Nordmark ip_recv_attr_t iras; 669bd670b35SErik Nordmark boolean_t dummy; 670bd670b35SErik Nordmark 671bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); 672bd670b35SErik Nordmark 673bd670b35SErik Nordmark if (ire->ire_type & IRE_NOROUTE) { 674bd670b35SErik Nordmark /* A lack of a route as opposed to RTF_REJECT|BLACKHOLE */ 675bd670b35SErik Nordmark ip_rts_change_v6(RTM_MISS, &ip6h->ip6_dst, 0, 0, 0, 0, 0, 0, 676bd670b35SErik Nordmark RTA_DST, ipst); 677bd670b35SErik Nordmark } 678bd670b35SErik Nordmark 679bd670b35SErik Nordmark if (ire->ire_flags & RTF_BLACKHOLE) { 680bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutNoRoutes RTF_BLACKHOLE", mp, NULL); 681bd670b35SErik Nordmark freemsg(mp); 682bd670b35SErik Nordmark /* No error even for local senders - silent blackhole */ 683bd670b35SErik Nordmark return (0); 684bd670b35SErik Nordmark } 685bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutNoRoutes RTF_REJECT", mp, NULL); 686bd670b35SErik Nordmark 687bd670b35SErik Nordmark /* 688bd670b35SErik Nordmark * We need an ill_t for the ip_recv_attr_t even though this packet 689bd670b35SErik Nordmark * was never received and icmp_unreachable doesn't currently use 690bd670b35SErik Nordmark * ira_ill. 691bd670b35SErik Nordmark */ 692bd670b35SErik Nordmark ill = ill_lookup_on_name("lo0", B_FALSE, 693bd670b35SErik Nordmark !(ixa->ixa_flags & IRAF_IS_IPV4), &dummy, ipst); 694bd670b35SErik Nordmark if (ill == NULL) { 695bd670b35SErik Nordmark freemsg(mp); 696bd670b35SErik Nordmark return (EHOSTUNREACH); 697bd670b35SErik Nordmark } 698bd670b35SErik Nordmark 699bd670b35SErik Nordmark bzero(&iras, sizeof (iras)); 700bd670b35SErik Nordmark /* Map ixa to ira including IPsec policies */ 701bd670b35SErik Nordmark ipsec_out_to_in(ixa, ill, &iras); 702bd670b35SErik Nordmark 703bd670b35SErik Nordmark icmp_unreachable_v6(mp, ICMP6_DST_UNREACH_NOROUTE, B_FALSE, &iras); 704bd670b35SErik Nordmark /* We moved any IPsec refs from ixa to iras */ 705bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 706bd670b35SErik Nordmark 707bd670b35SErik Nordmark ill_refrele(ill); 708bd670b35SErik Nordmark return (EHOSTUNREACH); 709bd670b35SErik Nordmark } 710bd670b35SErik Nordmark 711bd670b35SErik Nordmark /* 712bd670b35SErik Nordmark * Calculate a checksum ignoring any hardware capabilities 713bd670b35SErik Nordmark * 714bd670b35SErik Nordmark * Returns B_FALSE if the packet was too short for the checksum. Caller 715bd670b35SErik Nordmark * should free and do stats. 716bd670b35SErik Nordmark */ 717bd670b35SErik Nordmark static boolean_t 718bd670b35SErik Nordmark ip_output_sw_cksum_v6(mblk_t *mp, ip6_t *ip6h, ip_xmit_attr_t *ixa) 719bd670b35SErik Nordmark { 720bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 721bd670b35SErik Nordmark uint_t pktlen = ixa->ixa_pktlen; 722bd670b35SErik Nordmark uint16_t *cksump; 723bd670b35SErik Nordmark uint32_t cksum; 724bd670b35SErik Nordmark uint8_t protocol = ixa->ixa_protocol; 725bd670b35SErik Nordmark uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length; 726bd670b35SErik Nordmark 727bd670b35SErik Nordmark #define iphs ((uint16_t *)ip6h) 728bd670b35SErik Nordmark 729bd670b35SErik Nordmark /* Just in case it contained garbage */ 730bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS; 731bd670b35SErik Nordmark 732bd670b35SErik Nordmark /* 733bd670b35SErik Nordmark * Calculate ULP checksum 734bd670b35SErik Nordmark */ 735bd670b35SErik Nordmark if (protocol == IPPROTO_TCP) { 736bd670b35SErik Nordmark cksump = IPH_TCPH_CHECKSUMP(ip6h, ip_hdr_length); 737bd670b35SErik Nordmark cksum = IP_TCP_CSUM_COMP; 738bd670b35SErik Nordmark } else if (protocol == IPPROTO_UDP) { 739bd670b35SErik Nordmark cksump = IPH_UDPH_CHECKSUMP(ip6h, ip_hdr_length); 740bd670b35SErik Nordmark cksum = IP_UDP_CSUM_COMP; 741bd670b35SErik Nordmark } else if (protocol == IPPROTO_SCTP) { 742bd670b35SErik Nordmark sctp_hdr_t *sctph; 743bd670b35SErik Nordmark 744bd670b35SErik Nordmark ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph))); 745bd670b35SErik Nordmark sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length); 746bd670b35SErik Nordmark /* 747bd670b35SErik Nordmark * Zero out the checksum field to ensure proper 748bd670b35SErik Nordmark * checksum calculation. 749bd670b35SErik Nordmark */ 750bd670b35SErik Nordmark sctph->sh_chksum = 0; 751bd670b35SErik Nordmark #ifdef DEBUG 752bd670b35SErik Nordmark if (!skip_sctp_cksum) 753bd670b35SErik Nordmark #endif 754bd670b35SErik Nordmark sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length); 755bd670b35SErik Nordmark return (B_TRUE); 756bd670b35SErik Nordmark } else if (ixa->ixa_flags & IXAF_SET_RAW_CKSUM) { 757bd670b35SErik Nordmark /* 758bd670b35SErik Nordmark * icmp has placed length and routing 759bd670b35SErik Nordmark * header adjustment in the checksum field. 760bd670b35SErik Nordmark */ 761bd670b35SErik Nordmark cksump = (uint16_t *)(((uint8_t *)ip6h) + ip_hdr_length + 762bd670b35SErik Nordmark ixa->ixa_raw_cksum_offset); 763bd670b35SErik Nordmark cksum = htons(protocol); 764bd670b35SErik Nordmark } else if (protocol == IPPROTO_ICMPV6) { 765bd670b35SErik Nordmark cksump = IPH_ICMPV6_CHECKSUMP(ip6h, ip_hdr_length); 766bd670b35SErik Nordmark cksum = IP_ICMPV6_CSUM_COMP; /* Pseudo-header cksum */ 767bd670b35SErik Nordmark } else { 768bd670b35SErik Nordmark return (B_TRUE); 769bd670b35SErik Nordmark } 770bd670b35SErik Nordmark 771bd670b35SErik Nordmark /* ULP puts the checksum field is in the first mblk */ 772bd670b35SErik Nordmark ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr); 773bd670b35SErik Nordmark 774bd670b35SErik Nordmark /* 775bd670b35SErik Nordmark * We accumulate the pseudo header checksum in cksum. 776bd670b35SErik Nordmark * This is pretty hairy code, so watch close. One 777bd670b35SErik Nordmark * thing to keep in mind is that UDP and TCP have 778bd670b35SErik Nordmark * stored their respective datagram lengths in their 779bd670b35SErik Nordmark * checksum fields. This lines things up real nice. 780bd670b35SErik Nordmark */ 781bd670b35SErik Nordmark cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 782bd670b35SErik Nordmark iphs[8] + iphs[9] + iphs[10] + iphs[11] + 783bd670b35SErik Nordmark iphs[12] + iphs[13] + iphs[14] + iphs[15] + 784bd670b35SErik Nordmark iphs[16] + iphs[17] + iphs[18] + iphs[19]; 785bd670b35SErik Nordmark cksum = IP_CSUM(mp, ip_hdr_length, cksum); 786bd670b35SErik Nordmark 787bd670b35SErik Nordmark /* 788bd670b35SErik Nordmark * For UDP/IPv6 a zero UDP checksum is not allowed. 789bd670b35SErik Nordmark * Change to 0xffff 790bd670b35SErik Nordmark */ 791bd670b35SErik Nordmark if (protocol == IPPROTO_UDP && cksum == 0) 792bd670b35SErik Nordmark *cksump = ~cksum; 793bd670b35SErik Nordmark else 794bd670b35SErik Nordmark *cksump = cksum; 795bd670b35SErik Nordmark 796bd670b35SErik Nordmark IP6_STAT(ipst, ip6_out_sw_cksum); 797bd670b35SErik Nordmark IP6_STAT_UPDATE(ipst, ip6_out_sw_cksum_bytes, pktlen); 798bd670b35SErik Nordmark 799bd670b35SErik Nordmark /* No IP header checksum for IPv6 */ 800bd670b35SErik Nordmark 801bd670b35SErik Nordmark return (B_TRUE); 802bd670b35SErik Nordmark #undef iphs 803bd670b35SErik Nordmark } 804bd670b35SErik Nordmark 805bd670b35SErik Nordmark /* There are drivers that can't do partial checksum for ICMPv6 */ 806bd670b35SErik Nordmark int nxge_cksum_workaround = 1; 807bd670b35SErik Nordmark 808bd670b35SErik Nordmark /* 809bd670b35SErik Nordmark * Calculate the ULP checksum - try to use hardware. 810bd670b35SErik Nordmark * In the case of MULTIRT or multicast the 811bd670b35SErik Nordmark * IXAF_NO_HW_CKSUM is set in which case we use software. 812bd670b35SErik Nordmark * 813bd670b35SErik Nordmark * Returns B_FALSE if the packet was too short for the checksum. Caller 814bd670b35SErik Nordmark * should free and do stats. 815bd670b35SErik Nordmark */ 816bd670b35SErik Nordmark static boolean_t 817bd670b35SErik Nordmark ip_output_cksum_v6(iaflags_t ixaflags, mblk_t *mp, ip6_t *ip6h, 818bd670b35SErik Nordmark ip_xmit_attr_t *ixa, ill_t *ill) 819bd670b35SErik Nordmark { 820bd670b35SErik Nordmark uint_t pktlen = ixa->ixa_pktlen; 821bd670b35SErik Nordmark uint16_t *cksump; 822bd670b35SErik Nordmark uint16_t hck_flags; 823bd670b35SErik Nordmark uint32_t cksum; 824bd670b35SErik Nordmark uint8_t protocol = ixa->ixa_protocol; 825bd670b35SErik Nordmark uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length; 826bd670b35SErik Nordmark 827bd670b35SErik Nordmark #define iphs ((uint16_t *)ip6h) 828bd670b35SErik Nordmark 829bd670b35SErik Nordmark if ((ixaflags & IXAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 830bd670b35SErik Nordmark !dohwcksum) { 831bd670b35SErik Nordmark return (ip_output_sw_cksum_v6(mp, ip6h, ixa)); 832bd670b35SErik Nordmark } 833bd670b35SErik Nordmark 834bd670b35SErik Nordmark /* 835bd670b35SErik Nordmark * Calculate ULP checksum. Note that we don't use cksump and cksum 836bd670b35SErik Nordmark * if the ill has FULL support. 837bd670b35SErik Nordmark */ 838bd670b35SErik Nordmark if (protocol == IPPROTO_TCP) { 839bd670b35SErik Nordmark cksump = IPH_TCPH_CHECKSUMP(ip6h, ip_hdr_length); 840bd670b35SErik Nordmark cksum = IP_TCP_CSUM_COMP; /* Pseudo-header cksum */ 841bd670b35SErik Nordmark } else if (protocol == IPPROTO_UDP) { 842bd670b35SErik Nordmark cksump = IPH_UDPH_CHECKSUMP(ip6h, ip_hdr_length); 843bd670b35SErik Nordmark cksum = IP_UDP_CSUM_COMP; /* Pseudo-header cksum */ 844bd670b35SErik Nordmark } else if (protocol == IPPROTO_SCTP) { 845bd670b35SErik Nordmark sctp_hdr_t *sctph; 846bd670b35SErik Nordmark 847bd670b35SErik Nordmark ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph))); 848bd670b35SErik Nordmark sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length); 849bd670b35SErik Nordmark /* 850bd670b35SErik Nordmark * Zero out the checksum field to ensure proper 851bd670b35SErik Nordmark * checksum calculation. 852bd670b35SErik Nordmark */ 853bd670b35SErik Nordmark sctph->sh_chksum = 0; 854bd670b35SErik Nordmark #ifdef DEBUG 855bd670b35SErik Nordmark if (!skip_sctp_cksum) 856bd670b35SErik Nordmark #endif 857bd670b35SErik Nordmark sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length); 858bd670b35SErik Nordmark goto ip_hdr_cksum; 859bd670b35SErik Nordmark } else if (ixa->ixa_flags & IXAF_SET_RAW_CKSUM) { 860bd670b35SErik Nordmark /* 861bd670b35SErik Nordmark * icmp has placed length and routing 862bd670b35SErik Nordmark * header adjustment in the checksum field. 863bd670b35SErik Nordmark */ 864bd670b35SErik Nordmark cksump = (uint16_t *)(((uint8_t *)ip6h) + ip_hdr_length + 865bd670b35SErik Nordmark ixa->ixa_raw_cksum_offset); 866bd670b35SErik Nordmark cksum = htons(protocol); 867bd670b35SErik Nordmark } else if (protocol == IPPROTO_ICMPV6) { 868bd670b35SErik Nordmark cksump = IPH_ICMPV6_CHECKSUMP(ip6h, ip_hdr_length); 869bd670b35SErik Nordmark cksum = IP_ICMPV6_CSUM_COMP; /* Pseudo-header cksum */ 870bd670b35SErik Nordmark } else { 871bd670b35SErik Nordmark ip_hdr_cksum: 872bd670b35SErik Nordmark /* No IP header checksum for IPv6 */ 873bd670b35SErik Nordmark return (B_TRUE); 874bd670b35SErik Nordmark } 875bd670b35SErik Nordmark 876bd670b35SErik Nordmark /* ULP puts the checksum field is in the first mblk */ 877bd670b35SErik Nordmark ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr); 878bd670b35SErik Nordmark 879bd670b35SErik Nordmark /* 880bd670b35SErik Nordmark * Underlying interface supports hardware checksum offload for 881bd670b35SErik Nordmark * the payload; leave the payload checksum for the hardware to 882bd670b35SErik Nordmark * calculate. N.B: We only need to set up checksum info on the 883bd670b35SErik Nordmark * first mblk. 884bd670b35SErik Nordmark */ 885bd670b35SErik Nordmark hck_flags = ill->ill_hcksum_capab->ill_hcksum_txflags; 886bd670b35SErik Nordmark 887bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS; 888bd670b35SErik Nordmark if (hck_flags & HCKSUM_INET_FULL_V6) { 889bd670b35SErik Nordmark /* 890bd670b35SErik Nordmark * Hardware calculates pseudo-header, header and the 891bd670b35SErik Nordmark * payload checksums, so clear the checksum field in 892bd670b35SErik Nordmark * the protocol header. 893bd670b35SErik Nordmark */ 894bd670b35SErik Nordmark *cksump = 0; 895bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; 896bd670b35SErik Nordmark return (B_TRUE); 897bd670b35SErik Nordmark } 898bd670b35SErik Nordmark if (((hck_flags) & HCKSUM_INET_PARTIAL) && 899bd670b35SErik Nordmark (protocol != IPPROTO_ICMPV6 || !nxge_cksum_workaround)) { 900bd670b35SErik Nordmark /* 901bd670b35SErik Nordmark * Partial checksum offload has been enabled. Fill 902bd670b35SErik Nordmark * the checksum field in the protocol header with the 903bd670b35SErik Nordmark * pseudo-header checksum value. 904bd670b35SErik Nordmark * 905bd670b35SErik Nordmark * We accumulate the pseudo header checksum in cksum. 906bd670b35SErik Nordmark * This is pretty hairy code, so watch close. One 907bd670b35SErik Nordmark * thing to keep in mind is that UDP and TCP have 908bd670b35SErik Nordmark * stored their respective datagram lengths in their 909bd670b35SErik Nordmark * checksum fields. This lines things up real nice. 910bd670b35SErik Nordmark */ 911bd670b35SErik Nordmark cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + 912bd670b35SErik Nordmark iphs[8] + iphs[9] + iphs[10] + iphs[11] + 913bd670b35SErik Nordmark iphs[12] + iphs[13] + iphs[14] + iphs[15] + 914bd670b35SErik Nordmark iphs[16] + iphs[17] + iphs[18] + iphs[19]; 915bd670b35SErik Nordmark cksum += *(cksump); 916bd670b35SErik Nordmark cksum = (cksum & 0xFFFF) + (cksum >> 16); 917bd670b35SErik Nordmark *(cksump) = (cksum & 0xFFFF) + (cksum >> 16); 918bd670b35SErik Nordmark 919bd670b35SErik Nordmark /* 920bd670b35SErik Nordmark * Offsets are relative to beginning of IP header. 921bd670b35SErik Nordmark */ 922bd670b35SErik Nordmark DB_CKSUMSTART(mp) = ip_hdr_length; 923bd670b35SErik Nordmark DB_CKSUMSTUFF(mp) = (uint8_t *)cksump - (uint8_t *)ip6h; 924bd670b35SErik Nordmark DB_CKSUMEND(mp) = pktlen; 925bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM; 926bd670b35SErik Nordmark return (B_TRUE); 927bd670b35SErik Nordmark } 928bd670b35SErik Nordmark /* Hardware capabilities include neither full nor partial IPv6 */ 929bd670b35SErik Nordmark return (ip_output_sw_cksum_v6(mp, ip6h, ixa)); 930bd670b35SErik Nordmark #undef iphs 931bd670b35SErik Nordmark } 932bd670b35SErik Nordmark 933bd670b35SErik Nordmark /* 934bd670b35SErik Nordmark * ire_sendfn for offlink and onlink destinations. 935bd670b35SErik Nordmark * Also called from the multicast, and multirt send functions. 936bd670b35SErik Nordmark * 937bd670b35SErik Nordmark * Assumes that the caller has a hold on the ire. 938bd670b35SErik Nordmark * 939bd670b35SErik Nordmark * This function doesn't care if the IRE just became condemned since that 940bd670b35SErik Nordmark * can happen at any time. 941bd670b35SErik Nordmark */ 942bd670b35SErik Nordmark /* ARGSUSED */ 943bd670b35SErik Nordmark int 944bd670b35SErik Nordmark ire_send_wire_v6(ire_t *ire, mblk_t *mp, void *iph_arg, 945bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 946bd670b35SErik Nordmark { 947bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 948bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)iph_arg; 949bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 950bd670b35SErik Nordmark ill_t *ill; 951bd670b35SErik Nordmark uint32_t pktlen = ixa->ixa_pktlen; 952bd670b35SErik Nordmark 953bd670b35SErik Nordmark ASSERT(ixa->ixa_nce != NULL); 954bd670b35SErik Nordmark ill = ixa->ixa_nce->nce_ill; 955bd670b35SErik Nordmark 956bd670b35SErik Nordmark /* 957bd670b35SErik Nordmark * Update output mib stats. Note that we can't move into the icmp 958bd670b35SErik Nordmark * sender (icmp_output etc) since they don't know the ill and the 959bd670b35SErik Nordmark * stats are per ill. 960bd670b35SErik Nordmark * 961bd670b35SErik Nordmark * With IPMP we record the stats on the upper ill. 962bd670b35SErik Nordmark */ 963bd670b35SErik Nordmark if (ixa->ixa_protocol == IPPROTO_ICMPV6) { 964bd670b35SErik Nordmark icmp6_t *icmp6; 965bd670b35SErik Nordmark 966bd670b35SErik Nordmark icmp6 = (icmp6_t *)((uchar_t *)ip6h + ixa->ixa_ip_hdr_length); 967bd670b35SErik Nordmark icmp_update_out_mib_v6(ixa->ixa_nce->nce_common->ncec_ill, 968bd670b35SErik Nordmark icmp6); 969bd670b35SErik Nordmark } 970bd670b35SErik Nordmark 971bd670b35SErik Nordmark if (ixaflags & IXAF_DONTROUTE) 972bd670b35SErik Nordmark ip6h->ip6_hops = 1; 973bd670b35SErik Nordmark 974bd670b35SErik Nordmark /* 975bd670b35SErik Nordmark * This might set b_band, thus the IPsec and fragmentation 976bd670b35SErik Nordmark * code in IP ensures that b_band is updated in the first mblk. 977bd670b35SErik Nordmark */ 978bd670b35SErik Nordmark if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 979bd670b35SErik Nordmark /* ip_process translates an IS_UNDER_IPMP */ 980bd670b35SErik Nordmark mp = ip_process(IPP_LOCAL_OUT, mp, ill, ill); 981bd670b35SErik Nordmark if (mp == NULL) { 982bd670b35SErik Nordmark /* ip_drop_packet and MIB done */ 983bd670b35SErik Nordmark return (0); /* Might just be delayed */ 984bd670b35SErik Nordmark } 985bd670b35SErik Nordmark } 986bd670b35SErik Nordmark 987bd670b35SErik Nordmark /* 988bd670b35SErik Nordmark * To handle IPsec/iptun's labeling needs we need to tag packets 989bd670b35SErik Nordmark * while we still have ixa_tsl 990bd670b35SErik Nordmark */ 991bd670b35SErik Nordmark if (is_system_labeled() && ixa->ixa_tsl != NULL && 992bd670b35SErik Nordmark (ill->ill_mactype == DL_6TO4 || ill->ill_mactype == DL_IPV4 || 993bd670b35SErik Nordmark ill->ill_mactype == DL_IPV6)) { 994bd670b35SErik Nordmark cred_t *newcr; 995bd670b35SErik Nordmark 996bd670b35SErik Nordmark newcr = copycred_from_tslabel(ixa->ixa_cred, ixa->ixa_tsl, 997bd670b35SErik Nordmark KM_NOSLEEP); 998bd670b35SErik Nordmark if (newcr == NULL) { 999bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1000bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - newcr", 1001bd670b35SErik Nordmark mp, ill); 1002bd670b35SErik Nordmark freemsg(mp); 1003bd670b35SErik Nordmark return (ENOBUFS); 1004bd670b35SErik Nordmark } 1005bd670b35SErik Nordmark mblk_setcred(mp, newcr, NOPID); 1006bd670b35SErik Nordmark crfree(newcr); /* mblk_setcred did its own crhold */ 1007bd670b35SErik Nordmark } 1008bd670b35SErik Nordmark 1009bd670b35SErik Nordmark /* 1010bd670b35SErik Nordmark * IXAF_IPV6_ADD_FRAGHDR is set for CGTP so that we will add a 1011bd670b35SErik Nordmark * fragment header without fragmenting. CGTP on the receiver will 1012bd670b35SErik Nordmark * filter duplicates on the ident field. 1013bd670b35SErik Nordmark */ 1014bd670b35SErik Nordmark if (pktlen > ixa->ixa_fragsize || 1015bd670b35SErik Nordmark (ixaflags & (IXAF_IPSEC_SECURE|IXAF_IPV6_ADD_FRAGHDR))) { 1016bd670b35SErik Nordmark uint32_t ident; 1017bd670b35SErik Nordmark 1018bd670b35SErik Nordmark if (ixaflags & IXAF_IPSEC_SECURE) 1019bd670b35SErik Nordmark pktlen += ipsec_out_extra_length(ixa); 1020bd670b35SErik Nordmark 1021bd670b35SErik Nordmark if (pktlen > IP_MAXPACKET) 1022bd670b35SErik Nordmark return (EMSGSIZE); 1023bd670b35SErik Nordmark 1024bd670b35SErik Nordmark if (ixaflags & IXAF_SET_ULP_CKSUM) { 1025bd670b35SErik Nordmark /* 1026bd670b35SErik Nordmark * Compute ULP checksum using software 1027bd670b35SErik Nordmark */ 1028bd670b35SErik Nordmark if (!ip_output_sw_cksum_v6(mp, ip6h, ixa)) { 1029bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1030bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill); 1031bd670b35SErik Nordmark freemsg(mp); 1032bd670b35SErik Nordmark return (EINVAL); 1033bd670b35SErik Nordmark } 1034bd670b35SErik Nordmark /* Avoid checksum again below if we only add fraghdr */ 1035bd670b35SErik Nordmark ixaflags &= ~IXAF_SET_ULP_CKSUM; 1036bd670b35SErik Nordmark } 1037bd670b35SErik Nordmark 1038bd670b35SErik Nordmark /* 1039bd670b35SErik Nordmark * If we need a fragment header, pick the ident and insert 1040bd670b35SErik Nordmark * the header before IPsec to we have a place to store 1041bd670b35SErik Nordmark * the ident value. 1042bd670b35SErik Nordmark */ 1043bd670b35SErik Nordmark if ((ixaflags & IXAF_IPV6_ADD_FRAGHDR) || 1044bd670b35SErik Nordmark pktlen > ixa->ixa_fragsize) { 1045bd670b35SErik Nordmark /* 1046bd670b35SErik Nordmark * If this packet would generate a icmp_frag_needed 1047bd670b35SErik Nordmark * message, we need to handle it before we do the IPsec 1048bd670b35SErik Nordmark * processing. Otherwise, we need to strip the IPsec 1049bd670b35SErik Nordmark * headers before we send up the message to the ULPs 1050bd670b35SErik Nordmark * which becomes messy and difficult. 1051bd670b35SErik Nordmark */ 1052bd670b35SErik Nordmark if ((pktlen > ixa->ixa_fragsize) && 1053bd670b35SErik Nordmark (ixaflags & IXAF_DONTFRAG)) { 1054bd670b35SErik Nordmark /* Generate ICMP and return error */ 1055bd670b35SErik Nordmark ip_recv_attr_t iras; 1056bd670b35SErik Nordmark 1057bd670b35SErik Nordmark DTRACE_PROBE4(ip6__fragsize__fail, 1058bd670b35SErik Nordmark uint_t, pktlen, uint_t, ixa->ixa_fragsize, 1059bd670b35SErik Nordmark uint_t, ixa->ixa_pktlen, 1060bd670b35SErik Nordmark uint_t, ixa->ixa_pmtu); 1061bd670b35SErik Nordmark 1062bd670b35SErik Nordmark bzero(&iras, sizeof (iras)); 1063bd670b35SErik Nordmark /* Map ixa to ira including IPsec policies */ 1064bd670b35SErik Nordmark ipsec_out_to_in(ixa, ill, &iras); 1065bd670b35SErik Nordmark 1066bd670b35SErik Nordmark ip_drop_output("ICMP6_PKT_TOO_BIG", mp, ill); 1067bd670b35SErik Nordmark icmp_pkt2big_v6(mp, ixa->ixa_fragsize, B_TRUE, 1068bd670b35SErik Nordmark &iras); 1069bd670b35SErik Nordmark /* We moved any IPsec refs from ixa to iras */ 1070bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 1071bd670b35SErik Nordmark return (EMSGSIZE); 1072bd670b35SErik Nordmark } 1073bd670b35SErik Nordmark DTRACE_PROBE4(ip6__fragsize__ok, uint_t, pktlen, 1074bd670b35SErik Nordmark uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen, 1075bd670b35SErik Nordmark uint_t, ixa->ixa_pmtu); 1076bd670b35SErik Nordmark /* 1077bd670b35SErik Nordmark * Assign an ident value for this packet. There could 1078bd670b35SErik Nordmark * be other threads targeting the same destination, so 1079bd670b35SErik Nordmark * we have to arrange for a atomic increment. 1080bd670b35SErik Nordmark * Normally ixa_extra_ident is 0, but in the case of 1081bd670b35SErik Nordmark * LSO it will be the number of TCP segments that the 1082bd670b35SErik Nordmark * driver/hardware will extraly construct. 1083bd670b35SErik Nordmark * 1084bd670b35SErik Nordmark * Note that cl_inet_ipident has only been used for 1085bd670b35SErik Nordmark * IPv4. We don't use it here. 1086bd670b35SErik Nordmark */ 1087bd670b35SErik Nordmark ident = atomic_add_32_nv(identp, ixa->ixa_extra_ident + 1088bd670b35SErik Nordmark 1); 1089bd670b35SErik Nordmark ixa->ixa_ident = ident; /* In case we do IPsec */ 1090bd670b35SErik Nordmark } 1091bd670b35SErik Nordmark if (ixaflags & IXAF_IPSEC_SECURE) { 1092bd670b35SErik Nordmark /* 1093bd670b35SErik Nordmark * Pass in sufficient information so that 1094bd670b35SErik Nordmark * IPsec can determine whether to fragment, and 1095bd670b35SErik Nordmark * which function to call after fragmentation. 1096bd670b35SErik Nordmark */ 1097bd670b35SErik Nordmark return (ipsec_out_process(mp, ixa)); 1098bd670b35SErik Nordmark } 1099bd670b35SErik Nordmark 1100bd670b35SErik Nordmark mp = ip_fraghdr_add_v6(mp, ident, ixa); 1101bd670b35SErik Nordmark if (mp == NULL) { 1102bd670b35SErik Nordmark /* MIB and ip_drop_output already done */ 1103bd670b35SErik Nordmark return (ENOMEM); 1104bd670b35SErik Nordmark } 1105bd670b35SErik Nordmark ASSERT(pktlen == ixa->ixa_pktlen); 1106bd670b35SErik Nordmark pktlen += sizeof (ip6_frag_t); 1107bd670b35SErik Nordmark 1108bd670b35SErik Nordmark if (pktlen > ixa->ixa_fragsize) { 1109bd670b35SErik Nordmark return (ip_fragment_v6(mp, ixa->ixa_nce, ixaflags, 1110bd670b35SErik Nordmark pktlen, ixa->ixa_fragsize, 1111bd670b35SErik Nordmark ixa->ixa_xmit_hint, ixa->ixa_zoneid, 1112bd670b35SErik Nordmark ixa->ixa_no_loop_zoneid, ixa->ixa_postfragfn, 1113bd670b35SErik Nordmark &ixa->ixa_cookie)); 1114bd670b35SErik Nordmark } 1115bd670b35SErik Nordmark } 1116bd670b35SErik Nordmark if (ixaflags & IXAF_SET_ULP_CKSUM) { 1117bd670b35SErik Nordmark /* Compute ULP checksum and IP header checksum */ 1118bd670b35SErik Nordmark /* An IS_UNDER_IPMP ill is ok here */ 1119bd670b35SErik Nordmark if (!ip_output_cksum_v6(ixaflags, mp, ip6h, ixa, ill)) { 1120bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1121bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill); 1122bd670b35SErik Nordmark freemsg(mp); 1123bd670b35SErik Nordmark return (EINVAL); 1124bd670b35SErik Nordmark } 1125bd670b35SErik Nordmark } 1126bd670b35SErik Nordmark return ((ixa->ixa_postfragfn)(mp, ixa->ixa_nce, ixaflags, 1127bd670b35SErik Nordmark pktlen, ixa->ixa_xmit_hint, ixa->ixa_zoneid, 1128bd670b35SErik Nordmark ixa->ixa_no_loop_zoneid, &ixa->ixa_cookie)); 1129bd670b35SErik Nordmark } 1130bd670b35SErik Nordmark 1131bd670b35SErik Nordmark /* 1132bd670b35SErik Nordmark * Post fragmentation function for RTF_MULTIRT routes. 1133bd670b35SErik Nordmark * Since IRE_MULTICASTs might have RTF_MULTIRT, this function 1134bd670b35SErik Nordmark * checks IXAF_LOOPBACK_COPY. 1135bd670b35SErik Nordmark * 1136bd670b35SErik Nordmark * If no packet is sent due to failures then we return an errno, but if at 1137bd670b35SErik Nordmark * least one succeeded we return zero. 1138bd670b35SErik Nordmark */ 1139bd670b35SErik Nordmark int 1140bd670b35SErik Nordmark ip_postfrag_multirt_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, 1141bd670b35SErik Nordmark uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 1142bd670b35SErik Nordmark uintptr_t *ixacookie) 1143bd670b35SErik Nordmark { 1144bd670b35SErik Nordmark irb_t *irb; 1145bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)mp->b_rptr; 1146bd670b35SErik Nordmark ire_t *ire; 1147bd670b35SErik Nordmark ire_t *ire1; 1148bd670b35SErik Nordmark mblk_t *mp1; 1149bd670b35SErik Nordmark nce_t *nce1; 1150bd670b35SErik Nordmark ill_t *ill = nce->nce_ill; 1151bd670b35SErik Nordmark ill_t *ill1; 1152bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 1153bd670b35SErik Nordmark int error = 0; 1154bd670b35SErik Nordmark int num_sent = 0; 1155bd670b35SErik Nordmark int err; 1156bd670b35SErik Nordmark uint_t ire_type; 1157bd670b35SErik Nordmark in6_addr_t nexthop; 1158bd670b35SErik Nordmark 1159bd670b35SErik Nordmark ASSERT(!(ixaflags & IXAF_IS_IPV4)); 1160bd670b35SErik Nordmark 1161bd670b35SErik Nordmark /* Check for IXAF_LOOPBACK_COPY */ 1162bd670b35SErik Nordmark if (ixaflags & IXAF_LOOPBACK_COPY) { 1163bd670b35SErik Nordmark mblk_t *mp1; 1164bd670b35SErik Nordmark 1165bd670b35SErik Nordmark mp1 = copymsg(mp); 1166bd670b35SErik Nordmark if (mp1 == NULL) { 1167bd670b35SErik Nordmark /* Failed to deliver the loopback copy. */ 1168bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1169bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill); 1170bd670b35SErik Nordmark error = ENOBUFS; 1171bd670b35SErik Nordmark } else { 1172bd670b35SErik Nordmark ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len, 1173bd670b35SErik Nordmark nolzid); 1174bd670b35SErik Nordmark } 1175bd670b35SErik Nordmark } 1176bd670b35SErik Nordmark 1177bd670b35SErik Nordmark /* 1178bd670b35SErik Nordmark * Loop over RTF_MULTIRT for ip6_dst in the same bucket. Send 1179bd670b35SErik Nordmark * a copy to each one. 1180bd670b35SErik Nordmark * Use the nce (nexthop) and ip6_dst to find the ire. 1181bd670b35SErik Nordmark * 1182bd670b35SErik Nordmark * MULTIRT is not designed to work with shared-IP zones thus we don't 1183bd670b35SErik Nordmark * need to pass a zoneid or a label to the IRE lookup. 1184bd670b35SErik Nordmark */ 1185bd670b35SErik Nordmark if (IN6_ARE_ADDR_EQUAL(&nce->nce_addr, &ip6h->ip6_dst)) { 1186bd670b35SErik Nordmark /* Broadcast and multicast case */ 1187bd670b35SErik Nordmark ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0, 0, NULL, 1188bd670b35SErik Nordmark ALL_ZONES, NULL, MATCH_IRE_DSTONLY, 0, ipst, NULL); 1189bd670b35SErik Nordmark } else { 1190bd670b35SErik Nordmark /* Unicast case */ 1191bd670b35SErik Nordmark ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, &nce->nce_addr, 1192bd670b35SErik Nordmark 0, NULL, ALL_ZONES, NULL, MATCH_IRE_GW, 0, ipst, NULL); 1193bd670b35SErik Nordmark } 1194bd670b35SErik Nordmark 1195bd670b35SErik Nordmark if (ire == NULL || 1196bd670b35SErik Nordmark (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 1197bd670b35SErik Nordmark !(ire->ire_flags & RTF_MULTIRT)) { 1198bd670b35SErik Nordmark /* Drop */ 1199bd670b35SErik Nordmark ip_drop_output("ip_postfrag_multirt didn't find route", 1200bd670b35SErik Nordmark mp, nce->nce_ill); 1201bd670b35SErik Nordmark if (ire != NULL) 1202bd670b35SErik Nordmark ire_refrele(ire); 1203bd670b35SErik Nordmark return (ENETUNREACH); 1204bd670b35SErik Nordmark } 1205bd670b35SErik Nordmark 1206bd670b35SErik Nordmark irb = ire->ire_bucket; 1207bd670b35SErik Nordmark irb_refhold(irb); 1208bd670b35SErik Nordmark for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1209bd670b35SErik Nordmark if (IRE_IS_CONDEMNED(ire1) || 1210bd670b35SErik Nordmark !(ire1->ire_flags & RTF_MULTIRT)) 1211bd670b35SErik Nordmark continue; 1212bd670b35SErik Nordmark 1213bd670b35SErik Nordmark /* Note: When IPv6 uses radix tree we don't need this check */ 1214bd670b35SErik Nordmark if (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &ire1->ire_addr_v6)) 1215bd670b35SErik Nordmark continue; 1216bd670b35SErik Nordmark 1217bd670b35SErik Nordmark /* Do the ire argument one after the loop */ 1218bd670b35SErik Nordmark if (ire1 == ire) 1219bd670b35SErik Nordmark continue; 1220bd670b35SErik Nordmark 1221bd670b35SErik Nordmark ill1 = ire_nexthop_ill(ire1); 1222bd670b35SErik Nordmark if (ill1 == NULL) { 1223bd670b35SErik Nordmark /* 1224bd670b35SErik Nordmark * This ire might not have been picked by 1225bd670b35SErik Nordmark * ire_route_recursive, in which case ire_dep might 1226bd670b35SErik Nordmark * not have been setup yet. 1227bd670b35SErik Nordmark * We kick ire_route_recursive to try to resolve 1228bd670b35SErik Nordmark * starting at ire1. 1229bd670b35SErik Nordmark */ 1230bd670b35SErik Nordmark ire_t *ire2; 1231*44b099c4SSowmini Varadhan uint_t match_flags = MATCH_IRE_DSTONLY; 1232bd670b35SErik Nordmark 1233*44b099c4SSowmini Varadhan if (ire1->ire_ill != NULL) 1234*44b099c4SSowmini Varadhan match_flags |= MATCH_IRE_ILL; 1235bd670b35SErik Nordmark ire2 = ire_route_recursive_impl_v6(ire1, 1236bd670b35SErik Nordmark &ire1->ire_addr_v6, ire1->ire_type, ire1->ire_ill, 1237*44b099c4SSowmini Varadhan ire1->ire_zoneid, NULL, match_flags, 12389e3469d3SErik Nordmark IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL); 1239bd670b35SErik Nordmark if (ire2 != NULL) 1240bd670b35SErik Nordmark ire_refrele(ire2); 1241bd670b35SErik Nordmark ill1 = ire_nexthop_ill(ire1); 1242bd670b35SErik Nordmark } 1243bd670b35SErik Nordmark if (ill1 == NULL) { 1244bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1245bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - no ill", 1246bd670b35SErik Nordmark mp, ill); 1247bd670b35SErik Nordmark error = ENETUNREACH; 1248bd670b35SErik Nordmark continue; 1249bd670b35SErik Nordmark } 1250bd670b35SErik Nordmark /* Pick the addr and type to use for ndp_nce_init */ 1251bd670b35SErik Nordmark if (nce->nce_common->ncec_flags & NCE_F_MCAST) { 1252bd670b35SErik Nordmark ire_type = IRE_MULTICAST; 1253bd670b35SErik Nordmark nexthop = ip6h->ip6_dst; 1254bd670b35SErik Nordmark } else { 1255bd670b35SErik Nordmark ire_type = ire1->ire_type; /* Doesn't matter */ 1256bd670b35SErik Nordmark nexthop = ire1->ire_gateway_addr_v6; 1257bd670b35SErik Nordmark } 1258bd670b35SErik Nordmark 1259bd670b35SErik Nordmark /* If IPMP meta or under, then we just drop */ 1260bd670b35SErik Nordmark if (ill1->ill_grp != NULL) { 1261bd670b35SErik Nordmark BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 1262bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - IPMP", 1263bd670b35SErik Nordmark mp, ill1); 1264bd670b35SErik Nordmark ill_refrele(ill1); 1265bd670b35SErik Nordmark error = ENETUNREACH; 1266bd670b35SErik Nordmark continue; 1267bd670b35SErik Nordmark } 1268bd670b35SErik Nordmark 1269bd670b35SErik Nordmark nce1 = ndp_nce_init(ill1, &nexthop, ire_type); 1270bd670b35SErik Nordmark if (nce1 == NULL) { 1271bd670b35SErik Nordmark BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 1272bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - no nce", 1273bd670b35SErik Nordmark mp, ill1); 1274bd670b35SErik Nordmark ill_refrele(ill1); 1275bd670b35SErik Nordmark error = ENOBUFS; 1276bd670b35SErik Nordmark continue; 1277bd670b35SErik Nordmark } 1278bd670b35SErik Nordmark mp1 = copymsg(mp); 1279bd670b35SErik Nordmark if (mp1 == NULL) { 1280bd670b35SErik Nordmark BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 1281bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill1); 1282bd670b35SErik Nordmark nce_refrele(nce1); 1283bd670b35SErik Nordmark ill_refrele(ill1); 1284bd670b35SErik Nordmark error = ENOBUFS; 1285bd670b35SErik Nordmark continue; 1286bd670b35SErik Nordmark } 1287bd670b35SErik Nordmark /* Preserve HW checksum for this copy */ 1288bd670b35SErik Nordmark DB_CKSUMSTART(mp1) = DB_CKSUMSTART(mp); 1289bd670b35SErik Nordmark DB_CKSUMSTUFF(mp1) = DB_CKSUMSTUFF(mp); 1290bd670b35SErik Nordmark DB_CKSUMEND(mp1) = DB_CKSUMEND(mp); 1291bd670b35SErik Nordmark DB_CKSUMFLAGS(mp1) = DB_CKSUMFLAGS(mp); 1292bd670b35SErik Nordmark DB_LSOMSS(mp1) = DB_LSOMSS(mp); 1293bd670b35SErik Nordmark 1294bd670b35SErik Nordmark ire1->ire_ob_pkt_count++; 1295bd670b35SErik Nordmark err = ip_xmit(mp1, nce1, ixaflags, pkt_len, xmit_hint, szone, 1296bd670b35SErik Nordmark 0, ixacookie); 1297bd670b35SErik Nordmark if (err == 0) 1298bd670b35SErik Nordmark num_sent++; 1299bd670b35SErik Nordmark else 1300bd670b35SErik Nordmark error = err; 1301bd670b35SErik Nordmark nce_refrele(nce1); 1302bd670b35SErik Nordmark ill_refrele(ill1); 1303bd670b35SErik Nordmark } 1304bd670b35SErik Nordmark irb_refrele(irb); 1305bd670b35SErik Nordmark ire_refrele(ire); 1306bd670b35SErik Nordmark /* Finally, the main one */ 1307bd670b35SErik Nordmark err = ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0, 1308bd670b35SErik Nordmark ixacookie); 1309bd670b35SErik Nordmark if (err == 0) 1310bd670b35SErik Nordmark num_sent++; 1311bd670b35SErik Nordmark else 1312bd670b35SErik Nordmark error = err; 1313bd670b35SErik Nordmark if (num_sent > 0) 1314bd670b35SErik Nordmark return (0); 1315bd670b35SErik Nordmark else 1316bd670b35SErik Nordmark return (error); 1317bd670b35SErik Nordmark } 1318