1bd670b35SErik Nordmark /* 2bd670b35SErik Nordmark * CDDL HEADER START 3bd670b35SErik Nordmark * 4bd670b35SErik Nordmark * The contents of this file are subject to the terms of the 5bd670b35SErik Nordmark * Common Development and Distribution License (the "License"). 6bd670b35SErik Nordmark * You may not use this file except in compliance with the License. 7bd670b35SErik Nordmark * 8bd670b35SErik Nordmark * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9bd670b35SErik Nordmark * or http://www.opensolaris.org/os/licensing. 10bd670b35SErik Nordmark * See the License for the specific language governing permissions 11bd670b35SErik Nordmark * and limitations under the License. 12bd670b35SErik Nordmark * 13bd670b35SErik Nordmark * When distributing Covered Code, include this CDDL HEADER in each 14bd670b35SErik Nordmark * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15bd670b35SErik Nordmark * If applicable, add the following below this CDDL HEADER, with the 16bd670b35SErik Nordmark * fields enclosed by brackets "[]" replaced with your own identifying 17bd670b35SErik Nordmark * information: Portions Copyright [yyyy] [name of copyright owner] 18bd670b35SErik Nordmark * 19bd670b35SErik Nordmark * CDDL HEADER END 20bd670b35SErik Nordmark */ 21bd670b35SErik Nordmark 22bd670b35SErik Nordmark /* 231eee170aSErik Nordmark * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24bd670b35SErik Nordmark */ 25bd670b35SErik Nordmark /* Copyright (c) 1990 Mentat Inc. */ 26bd670b35SErik Nordmark 27bd670b35SErik Nordmark #include <sys/types.h> 28bd670b35SErik Nordmark #include <sys/stream.h> 29bd670b35SErik Nordmark #include <sys/strsubr.h> 30bd670b35SErik Nordmark #include <sys/dlpi.h> 31bd670b35SErik Nordmark #include <sys/strsun.h> 32bd670b35SErik Nordmark #include <sys/zone.h> 33bd670b35SErik Nordmark #include <sys/ddi.h> 34bd670b35SErik Nordmark #include <sys/sunddi.h> 35bd670b35SErik Nordmark #include <sys/cmn_err.h> 36bd670b35SErik Nordmark #include <sys/debug.h> 37bd670b35SErik Nordmark #include <sys/atomic.h> 38bd670b35SErik Nordmark 39bd670b35SErik Nordmark #include <sys/systm.h> 40bd670b35SErik Nordmark #include <sys/param.h> 41bd670b35SErik Nordmark #include <sys/kmem.h> 42bd670b35SErik Nordmark #include <sys/sdt.h> 43bd670b35SErik Nordmark #include <sys/socket.h> 44bd670b35SErik Nordmark #include <sys/mac.h> 45bd670b35SErik Nordmark #include <net/if.h> 46bd670b35SErik Nordmark #include <net/if_arp.h> 47bd670b35SErik Nordmark #include <net/route.h> 48bd670b35SErik Nordmark #include <sys/sockio.h> 49bd670b35SErik Nordmark #include <netinet/in.h> 50bd670b35SErik Nordmark #include <net/if_dl.h> 51bd670b35SErik Nordmark 52bd670b35SErik Nordmark #include <inet/common.h> 53bd670b35SErik Nordmark #include <inet/mi.h> 54bd670b35SErik Nordmark #include <inet/mib2.h> 55bd670b35SErik Nordmark #include <inet/nd.h> 56bd670b35SErik Nordmark #include <inet/arp.h> 57bd670b35SErik Nordmark #include <inet/snmpcom.h> 58bd670b35SErik Nordmark #include <inet/kstatcom.h> 59bd670b35SErik Nordmark 60bd670b35SErik Nordmark #include <netinet/igmp_var.h> 61bd670b35SErik Nordmark #include <netinet/ip6.h> 62bd670b35SErik Nordmark #include <netinet/icmp6.h> 63bd670b35SErik Nordmark #include <netinet/sctp.h> 64bd670b35SErik Nordmark 65bd670b35SErik Nordmark #include <inet/ip.h> 66bd670b35SErik Nordmark #include <inet/ip_impl.h> 67bd670b35SErik Nordmark #include <inet/ip6.h> 68bd670b35SErik Nordmark #include <inet/ip6_asp.h> 69bd670b35SErik Nordmark #include <inet/tcp.h> 70bd670b35SErik Nordmark #include <inet/ip_multi.h> 71bd670b35SErik Nordmark #include <inet/ip_if.h> 72bd670b35SErik Nordmark #include <inet/ip_ire.h> 73bd670b35SErik Nordmark #include <inet/ip_ftable.h> 74bd670b35SErik Nordmark #include <inet/ip_rts.h> 75bd670b35SErik Nordmark #include <inet/optcom.h> 76bd670b35SErik Nordmark #include <inet/ip_ndp.h> 77bd670b35SErik Nordmark #include <inet/ip_listutils.h> 78bd670b35SErik Nordmark #include <netinet/igmp.h> 79bd670b35SErik Nordmark #include <netinet/ip_mroute.h> 80bd670b35SErik Nordmark #include <inet/ipp_common.h> 81bd670b35SErik Nordmark 82bd670b35SErik Nordmark #include <net/pfkeyv2.h> 83bd670b35SErik Nordmark #include <inet/sadb.h> 84bd670b35SErik Nordmark #include <inet/ipsec_impl.h> 85bd670b35SErik Nordmark #include <inet/ipdrop.h> 86bd670b35SErik Nordmark #include <inet/ip_netinfo.h> 87bd670b35SErik Nordmark 88bd670b35SErik Nordmark #include <sys/pattr.h> 89bd670b35SErik Nordmark #include <inet/ipclassifier.h> 90bd670b35SErik Nordmark #include <inet/sctp_ip.h> 91bd670b35SErik Nordmark #include <inet/sctp/sctp_impl.h> 92bd670b35SErik Nordmark #include <inet/udp_impl.h> 93bd670b35SErik Nordmark #include <sys/sunddi.h> 94bd670b35SErik Nordmark 95bd670b35SErik Nordmark #include <sys/tsol/label.h> 96bd670b35SErik Nordmark #include <sys/tsol/tnet.h> 97bd670b35SErik Nordmark 98b36a561eSErik Nordmark #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 99b36a561eSErik Nordmark 100bd670b35SErik Nordmark #ifdef DEBUG 101bd670b35SErik Nordmark extern boolean_t skip_sctp_cksum; 102bd670b35SErik Nordmark #endif 103bd670b35SErik Nordmark 104bd670b35SErik Nordmark static int ip_verify_nce(mblk_t *, ip_xmit_attr_t *); 105bd670b35SErik Nordmark static int ip_verify_dce(mblk_t *, ip_xmit_attr_t *); 106bd670b35SErik Nordmark static boolean_t ip_verify_lso(ill_t *, ip_xmit_attr_t *); 107bd670b35SErik Nordmark static boolean_t ip_verify_zcopy(ill_t *, ip_xmit_attr_t *); 108bd670b35SErik Nordmark static void ip_output_simple_broadcast(ip_xmit_attr_t *, mblk_t *); 109bd670b35SErik Nordmark 110bd670b35SErik Nordmark /* 111bd670b35SErik Nordmark * There are two types of output functions for IP used for different 112bd670b35SErik Nordmark * purposes: 113bd670b35SErik Nordmark * - ip_output_simple() is when sending ICMP errors, TCP resets, etc when there 114bd670b35SErik Nordmark * is no context in the form of a conn_t. However, there is a 115bd670b35SErik Nordmark * ip_xmit_attr_t that the callers use to influence interface selection 116bd670b35SErik Nordmark * (needed for ICMP echo as well as IPv6 link-locals) and IPsec. 117bd670b35SErik Nordmark * 118bd670b35SErik Nordmark * - conn_ip_output() is used when sending packets with a conn_t and 119bd670b35SErik Nordmark * ip_set_destination has been called to cache information. In that case 120bd670b35SErik Nordmark * various socket options are recorded in the ip_xmit_attr_t and should 121bd670b35SErik Nordmark * be taken into account. 122bd670b35SErik Nordmark */ 123bd670b35SErik Nordmark 124bd670b35SErik Nordmark /* 125bd670b35SErik Nordmark * The caller *must* have called conn_connect() or ip_attr_connect() 126bd670b35SErik Nordmark * before calling conn_ip_output(). The caller needs to redo that each time 127bd670b35SErik Nordmark * the destination IP address or port changes, as well as each time there is 128bd670b35SErik Nordmark * a change to any socket option that would modify how packets are routed out 129bd670b35SErik Nordmark * of the box (e.g., SO_DONTROUTE, IP_NEXTHOP, IP_BOUND_IF). 130bd670b35SErik Nordmark * 131bd670b35SErik Nordmark * The ULP caller has to serialize the use of a single ip_xmit_attr_t. 132bd670b35SErik Nordmark * We assert for that here. 133bd670b35SErik Nordmark */ 134bd670b35SErik Nordmark int 135bd670b35SErik Nordmark conn_ip_output(mblk_t *mp, ip_xmit_attr_t *ixa) 136bd670b35SErik Nordmark { 137bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 138bd670b35SErik Nordmark ire_t *ire; 139bd670b35SErik Nordmark nce_t *nce; 140bd670b35SErik Nordmark dce_t *dce; 141bd670b35SErik Nordmark ill_t *ill; 142bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 143bd670b35SErik Nordmark int error; 144bd670b35SErik Nordmark 145bd670b35SErik Nordmark /* We defer ipIfStatsHCOutRequests until an error or we have an ill */ 146bd670b35SErik Nordmark 147bd670b35SErik Nordmark ASSERT(ixa->ixa_ire != NULL); 148bd670b35SErik Nordmark /* Note there is no ixa_nce when reject and blackhole routes */ 149bd670b35SErik Nordmark ASSERT(ixa->ixa_dce != NULL); /* Could be default dce */ 150bd670b35SErik Nordmark 151bd670b35SErik Nordmark #ifdef DEBUG 152bd670b35SErik Nordmark ASSERT(ixa->ixa_curthread == NULL); 153bd670b35SErik Nordmark ixa->ixa_curthread = curthread; 154bd670b35SErik Nordmark #endif 155bd670b35SErik Nordmark 156bd670b35SErik Nordmark /* 157bd670b35SErik Nordmark * Even on labeled systems we can have a NULL ixa_tsl e.g., 158bd670b35SErik Nordmark * for IGMP/MLD traffic. 159bd670b35SErik Nordmark */ 160bd670b35SErik Nordmark 161bd670b35SErik Nordmark ire = ixa->ixa_ire; 162bd670b35SErik Nordmark 163bd670b35SErik Nordmark /* 164bd670b35SErik Nordmark * If the ULP says the (old) IRE resulted in reachability we 165bd670b35SErik Nordmark * record this before determine whether to use a new IRE. 166bd670b35SErik Nordmark * No locking for performance reasons. 167bd670b35SErik Nordmark */ 168bd670b35SErik Nordmark if (ixaflags & IXAF_REACH_CONF) 169bd670b35SErik Nordmark ire->ire_badcnt = 0; 170bd670b35SErik Nordmark 171bd670b35SErik Nordmark /* 172bd670b35SErik Nordmark * Has routing changed since we cached the results of the lookup? 173bd670b35SErik Nordmark * 174bd670b35SErik Nordmark * This check captures all of: 175bd670b35SErik Nordmark * - the cached ire being deleted (by means of the special 176bd670b35SErik Nordmark * IRE_GENERATION_CONDEMNED) 177bd670b35SErik Nordmark * - A potentially better ire being added (ire_generation being 178bd670b35SErik Nordmark * increased) 179bd670b35SErik Nordmark * - A deletion of the nexthop ire that was used when we did the 180bd670b35SErik Nordmark * lookup. 181bd670b35SErik Nordmark * - An addition of a potentially better nexthop ire. 182bd670b35SErik Nordmark * The last two are handled by walking and increasing the generation 183bd670b35SErik Nordmark * number on all dependant IREs in ire_flush_cache(). 184bd670b35SErik Nordmark * 185bd670b35SErik Nordmark * The check also handles all cases of RTF_REJECT and RTF_BLACKHOLE 186bd670b35SErik Nordmark * since we ensure that each time we set ixa_ire to such an IRE we 187bd670b35SErik Nordmark * make sure the ixa_ire_generation does not match (by using 188bd670b35SErik Nordmark * IRE_GENERATION_VERIFY). 189bd670b35SErik Nordmark */ 190bd670b35SErik Nordmark if (ire->ire_generation != ixa->ixa_ire_generation) { 191bd670b35SErik Nordmark error = ip_verify_ire(mp, ixa); 192bd670b35SErik Nordmark if (error != 0) { 193bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - verify ire", 194bd670b35SErik Nordmark mp, NULL); 195bd670b35SErik Nordmark goto drop; 196bd670b35SErik Nordmark } 197bd670b35SErik Nordmark ire = ixa->ixa_ire; 198bd670b35SErik Nordmark ASSERT(ire != NULL); 199bd670b35SErik Nordmark if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 200bd670b35SErik Nordmark #ifdef DEBUG 201bd670b35SErik Nordmark ASSERT(ixa->ixa_curthread == curthread); 202bd670b35SErik Nordmark ixa->ixa_curthread = NULL; 203bd670b35SErik Nordmark #endif 204bd670b35SErik Nordmark ire->ire_ob_pkt_count++; 205bd670b35SErik Nordmark /* ixa_dce might be condemned; use default one */ 206bd670b35SErik Nordmark return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, ixa, 207bd670b35SErik Nordmark &ipst->ips_dce_default->dce_ident)); 208bd670b35SErik Nordmark } 209bd670b35SErik Nordmark /* 210bd670b35SErik Nordmark * If the ncec changed then ip_verify_ire already set 211bd670b35SErik Nordmark * ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 212bd670b35SErik Nordmark * so we can recheck the interface mtu. 213bd670b35SErik Nordmark */ 214bd670b35SErik Nordmark 215bd670b35SErik Nordmark /* 216bd670b35SErik Nordmark * Note that ire->ire_generation could already have changed. 217bd670b35SErik Nordmark * We catch that next time we send a packet. 218bd670b35SErik Nordmark */ 219bd670b35SErik Nordmark } 220bd670b35SErik Nordmark 221bd670b35SErik Nordmark /* 222bd670b35SErik Nordmark * No need to lock access to ixa_nce since the ip_xmit_attr usage 223bd670b35SErik Nordmark * is single threaded. 224bd670b35SErik Nordmark */ 225bd670b35SErik Nordmark ASSERT(ixa->ixa_nce != NULL); 226bd670b35SErik Nordmark nce = ixa->ixa_nce; 227bd670b35SErik Nordmark if (nce->nce_is_condemned) { 228bd670b35SErik Nordmark error = ip_verify_nce(mp, ixa); 229bd670b35SErik Nordmark /* 230bd670b35SErik Nordmark * In case ZEROCOPY capability become not available, we 231bd670b35SErik Nordmark * copy the message and free the original one. We might 232bd670b35SErik Nordmark * be copying more data than needed but it doesn't hurt 233bd670b35SErik Nordmark * since such change rarely happens. 234bd670b35SErik Nordmark */ 235bd670b35SErik Nordmark switch (error) { 236bd670b35SErik Nordmark case 0: 237bd670b35SErik Nordmark break; 238bd670b35SErik Nordmark case ENOTSUP: { /* ZEROCOPY */ 239bd670b35SErik Nordmark mblk_t *nmp; 240bd670b35SErik Nordmark 241bd670b35SErik Nordmark if ((nmp = copymsg(mp)) != NULL) { 242bd670b35SErik Nordmark freemsg(mp); 243bd670b35SErik Nordmark mp = nmp; 244bd670b35SErik Nordmark 245bd670b35SErik Nordmark break; 246bd670b35SErik Nordmark } 247bd670b35SErik Nordmark /* FALLTHROUGH */ 248bd670b35SErik Nordmark } 249bd670b35SErik Nordmark default: 250bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - verify nce", 251bd670b35SErik Nordmark mp, NULL); 252bd670b35SErik Nordmark goto drop; 253bd670b35SErik Nordmark } 254bd670b35SErik Nordmark ire = ixa->ixa_ire; 255bd670b35SErik Nordmark ASSERT(ire != NULL); 256bd670b35SErik Nordmark if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 257bd670b35SErik Nordmark #ifdef DEBUG 258bd670b35SErik Nordmark ASSERT(ixa->ixa_curthread == curthread); 259bd670b35SErik Nordmark ixa->ixa_curthread = NULL; 260bd670b35SErik Nordmark #endif 261bd670b35SErik Nordmark ire->ire_ob_pkt_count++; 262bd670b35SErik Nordmark /* ixa_dce might be condemned; use default one */ 263bd670b35SErik Nordmark return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, 264bd670b35SErik Nordmark ixa, &ipst->ips_dce_default->dce_ident)); 265bd670b35SErik Nordmark } 266bd670b35SErik Nordmark ASSERT(ixa->ixa_nce != NULL); 267bd670b35SErik Nordmark nce = ixa->ixa_nce; 268bd670b35SErik Nordmark 269bd670b35SErik Nordmark /* 270bd670b35SErik Nordmark * Note that some other event could already have made 271bd670b35SErik Nordmark * the new nce condemned. We catch that next time we 272bd670b35SErik Nordmark * try to send a packet. 273bd670b35SErik Nordmark */ 274bd670b35SErik Nordmark } 275bd670b35SErik Nordmark /* 276bd670b35SErik Nordmark * If there is no per-destination dce_t then we have a reference to 277bd670b35SErik Nordmark * the default dce_t (which merely contains the dce_ipid). 278bd670b35SErik Nordmark * The generation check captures both the introduction of a 279bd670b35SErik Nordmark * per-destination dce_t (e.g., due to ICMP packet too big) and 280bd670b35SErik Nordmark * any change to the per-destination dce (including it becoming 281bd670b35SErik Nordmark * condemned by use of the special DCE_GENERATION_CONDEMNED). 282bd670b35SErik Nordmark */ 283bd670b35SErik Nordmark dce = ixa->ixa_dce; 284bd670b35SErik Nordmark 285bd670b35SErik Nordmark /* 286bd670b35SErik Nordmark * To avoid a periodic timer to increase the path MTU we 287bd670b35SErik Nordmark * look at dce_last_change_time each time we send a packet. 288bd670b35SErik Nordmark */ 289b36a561eSErik Nordmark if (dce->dce_flags & DCEF_PMTU) { 290b36a561eSErik Nordmark int64_t now = LBOLT_FASTPATH64; 291b36a561eSErik Nordmark 292b36a561eSErik Nordmark if ((TICK_TO_SEC(now) - dce->dce_last_change_time > 293bd670b35SErik Nordmark ipst->ips_ip_pathmtu_interval)) { 294bd670b35SErik Nordmark /* 295bd670b35SErik Nordmark * Older than 20 minutes. Drop the path MTU information. 296b36a561eSErik Nordmark * Since the path MTU changes as a result of this, 297b36a561eSErik Nordmark * twiddle ixa_dce_generation to make us go through the 298b36a561eSErik Nordmark * dce verification code in conn_ip_output. 299bd670b35SErik Nordmark */ 300bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 301bd670b35SErik Nordmark dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU); 302d3d50737SRafael Vanoni dce->dce_last_change_time = TICK_TO_SEC(now); 303bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 304bd670b35SErik Nordmark dce_increment_generation(dce); 305bd670b35SErik Nordmark } 306b36a561eSErik Nordmark } 307bd670b35SErik Nordmark 308bd670b35SErik Nordmark if (dce->dce_generation != ixa->ixa_dce_generation) { 309bd670b35SErik Nordmark error = ip_verify_dce(mp, ixa); 310bd670b35SErik Nordmark if (error != 0) { 311bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - verify dce", 312bd670b35SErik Nordmark mp, NULL); 313bd670b35SErik Nordmark goto drop; 314bd670b35SErik Nordmark } 315bd670b35SErik Nordmark dce = ixa->ixa_dce; 316bd670b35SErik Nordmark 317bd670b35SErik Nordmark /* 318bd670b35SErik Nordmark * Note that some other event could already have made the 319bd670b35SErik Nordmark * new dce's generation number change. 320bd670b35SErik Nordmark * We catch that next time we try to send a packet. 321bd670b35SErik Nordmark */ 322bd670b35SErik Nordmark } 323bd670b35SErik Nordmark 324bd670b35SErik Nordmark ill = nce->nce_ill; 325bd670b35SErik Nordmark 326bd670b35SErik Nordmark /* 327bd670b35SErik Nordmark * An initial ixa_fragsize was set in ip_set_destination 328bd670b35SErik Nordmark * and we update it if any routing changes above. 329bd670b35SErik Nordmark * A change to ill_mtu with ifconfig will increase all dce_generation 3301eee170aSErik Nordmark * so that we will detect that with the generation check. Ditto for 3311eee170aSErik Nordmark * ill_mc_mtu. 332bd670b35SErik Nordmark */ 333bd670b35SErik Nordmark 334bd670b35SErik Nordmark /* 335bd670b35SErik Nordmark * Caller needs to make sure IXAF_VERIFY_SRC is not set if 336bd670b35SErik Nordmark * conn_unspec_src. 337bd670b35SErik Nordmark */ 338bd670b35SErik Nordmark if ((ixaflags & IXAF_VERIFY_SOURCE) && 339bd670b35SErik Nordmark ixa->ixa_src_generation != ipst->ips_src_generation) { 340bd670b35SErik Nordmark /* Check if the IP source is still assigned to the host. */ 341bd670b35SErik Nordmark uint_t gen; 342bd670b35SErik Nordmark 343bd670b35SErik Nordmark if (!ip_verify_src(mp, ixa, &gen)) { 344bd670b35SErik Nordmark /* Don't send a packet with a source that isn't ours */ 345bd670b35SErik Nordmark error = EADDRNOTAVAIL; 346bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - invalid src", 347bd670b35SErik Nordmark mp, NULL); 348bd670b35SErik Nordmark goto drop; 349bd670b35SErik Nordmark } 350bd670b35SErik Nordmark /* The source is still valid - update the generation number */ 351bd670b35SErik Nordmark ixa->ixa_src_generation = gen; 352bd670b35SErik Nordmark } 353bd670b35SErik Nordmark 354bd670b35SErik Nordmark /* 355bd670b35SErik Nordmark * We don't have an IRE when we fragment, hence ire_ob_pkt_count 356bd670b35SErik Nordmark * can only count the use prior to fragmentation. However the MIB 357bd670b35SErik Nordmark * counters on the ill will be incremented in post fragmentation. 358bd670b35SErik Nordmark */ 359bd670b35SErik Nordmark ire->ire_ob_pkt_count++; 360bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 361bd670b35SErik Nordmark 362bd670b35SErik Nordmark /* 363bd670b35SErik Nordmark * Based on ire_type and ire_flags call one of: 364bd670b35SErik Nordmark * ire_send_local_v* - for IRE_LOCAL and IRE_LOOPBACK 365bd670b35SErik Nordmark * ire_send_multirt_v* - if RTF_MULTIRT 366bd670b35SErik Nordmark * ire_send_noroute_v* - if RTF_REJECT or RTF_BLACHOLE 367bd670b35SErik Nordmark * ire_send_multicast_v* - for IRE_MULTICAST 368bd670b35SErik Nordmark * ire_send_broadcast_v4 - for IRE_BROADCAST 369bd670b35SErik Nordmark * ire_send_wire_v* - for the rest. 370bd670b35SErik Nordmark */ 371bd670b35SErik Nordmark #ifdef DEBUG 372bd670b35SErik Nordmark ASSERT(ixa->ixa_curthread == curthread); 373bd670b35SErik Nordmark ixa->ixa_curthread = NULL; 374bd670b35SErik Nordmark #endif 375bd670b35SErik Nordmark return ((ire->ire_sendfn)(ire, mp, mp->b_rptr, ixa, &dce->dce_ident)); 376bd670b35SErik Nordmark 377bd670b35SErik Nordmark drop: 378bd670b35SErik Nordmark if (ixaflags & IXAF_IS_IPV4) { 379bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 380bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 381bd670b35SErik Nordmark } else { 382bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsHCOutRequests); 383bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip6_mib, ipIfStatsOutDiscards); 384bd670b35SErik Nordmark } 385bd670b35SErik Nordmark freemsg(mp); 386bd670b35SErik Nordmark #ifdef DEBUG 387bd670b35SErik Nordmark ASSERT(ixa->ixa_curthread == curthread); 388bd670b35SErik Nordmark ixa->ixa_curthread = NULL; 389bd670b35SErik Nordmark #endif 390bd670b35SErik Nordmark return (error); 391bd670b35SErik Nordmark } 392bd670b35SErik Nordmark 393bd670b35SErik Nordmark /* 394bd670b35SErik Nordmark * Handle both IPv4 and IPv6. Sets the generation number 395bd670b35SErik Nordmark * to allow the caller to know when to call us again. 396bd670b35SErik Nordmark * Returns true if the source address in the packet is a valid source. 397bd670b35SErik Nordmark * We handle callers which try to send with a zero address (since we only 398bd670b35SErik Nordmark * get here if UNSPEC_SRC is not set). 399bd670b35SErik Nordmark */ 400bd670b35SErik Nordmark boolean_t 401bd670b35SErik Nordmark ip_verify_src(mblk_t *mp, ip_xmit_attr_t *ixa, uint_t *generationp) 402bd670b35SErik Nordmark { 403bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 404bd670b35SErik Nordmark 405bd670b35SErik Nordmark /* 406bd670b35SErik Nordmark * Need to grab the generation number before we check to 407bd670b35SErik Nordmark * avoid a race with a change to the set of local addresses. 408bd670b35SErik Nordmark * No lock needed since the thread which updates the set of local 409bd670b35SErik Nordmark * addresses use ipif/ill locks and exit those (hence a store memory 410bd670b35SErik Nordmark * barrier) before doing the atomic increase of ips_src_generation. 411bd670b35SErik Nordmark */ 412bd670b35SErik Nordmark if (generationp != NULL) 413bd670b35SErik Nordmark *generationp = ipst->ips_src_generation; 414bd670b35SErik Nordmark 415bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_IS_IPV4) { 416bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)mp->b_rptr; 417bd670b35SErik Nordmark 418bd670b35SErik Nordmark if (ipha->ipha_src == INADDR_ANY) 419bd670b35SErik Nordmark return (B_FALSE); 420bd670b35SErik Nordmark 421bd670b35SErik Nordmark return (ip_laddr_verify_v4(ipha->ipha_src, ixa->ixa_zoneid, 422bd670b35SErik Nordmark ipst, B_FALSE) != IPVL_BAD); 423bd670b35SErik Nordmark } else { 424bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)mp->b_rptr; 425bd670b35SErik Nordmark uint_t scopeid; 426bd670b35SErik Nordmark 427bd670b35SErik Nordmark if (IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) 428bd670b35SErik Nordmark return (B_FALSE); 429bd670b35SErik Nordmark 430bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_SCOPEID_SET) 431bd670b35SErik Nordmark scopeid = ixa->ixa_scopeid; 432bd670b35SErik Nordmark else 433bd670b35SErik Nordmark scopeid = 0; 434bd670b35SErik Nordmark 435bd670b35SErik Nordmark return (ip_laddr_verify_v6(&ip6h->ip6_src, ixa->ixa_zoneid, 436bd670b35SErik Nordmark ipst, B_FALSE, scopeid) != IPVL_BAD); 437bd670b35SErik Nordmark } 438bd670b35SErik Nordmark } 439bd670b35SErik Nordmark 440bd670b35SErik Nordmark /* 441bd670b35SErik Nordmark * Handle both IPv4 and IPv6. Reverify/recalculate the IRE to use. 442bd670b35SErik Nordmark */ 443bd670b35SErik Nordmark int 444bd670b35SErik Nordmark ip_verify_ire(mblk_t *mp, ip_xmit_attr_t *ixa) 445bd670b35SErik Nordmark { 446bd670b35SErik Nordmark uint_t gen; 447bd670b35SErik Nordmark ire_t *ire; 448bd670b35SErik Nordmark nce_t *nce; 449bd670b35SErik Nordmark int error; 450bd670b35SErik Nordmark boolean_t multirt = B_FALSE; 451bd670b35SErik Nordmark 452bd670b35SErik Nordmark /* 453bd670b35SErik Nordmark * Redo ip_select_route. 454bd670b35SErik Nordmark * Need to grab generation number as part of the lookup to 455bd670b35SErik Nordmark * avoid race. 456bd670b35SErik Nordmark */ 457bd670b35SErik Nordmark error = 0; 458bd670b35SErik Nordmark ire = ip_select_route_pkt(mp, ixa, &gen, &error, &multirt); 459bd670b35SErik Nordmark ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 460bd670b35SErik Nordmark if (error != 0) { 461bd670b35SErik Nordmark ire_refrele(ire); 462bd670b35SErik Nordmark return (error); 463bd670b35SErik Nordmark } 464bd670b35SErik Nordmark 465bd670b35SErik Nordmark if (ixa->ixa_ire != NULL) 466bd670b35SErik Nordmark ire_refrele_notr(ixa->ixa_ire); 467bd670b35SErik Nordmark #ifdef DEBUG 468bd670b35SErik Nordmark ire_refhold_notr(ire); 469bd670b35SErik Nordmark ire_refrele(ire); 470bd670b35SErik Nordmark #endif 471bd670b35SErik Nordmark ixa->ixa_ire = ire; 472bd670b35SErik Nordmark ixa->ixa_ire_generation = gen; 473bd670b35SErik Nordmark if (multirt) { 474bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_IS_IPV4) 475bd670b35SErik Nordmark ixa->ixa_postfragfn = ip_postfrag_multirt_v4; 476bd670b35SErik Nordmark else 477bd670b35SErik Nordmark ixa->ixa_postfragfn = ip_postfrag_multirt_v6; 478bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 479bd670b35SErik Nordmark } else { 480bd670b35SErik Nordmark ixa->ixa_postfragfn = ire->ire_postfragfn; 481bd670b35SErik Nordmark ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 482bd670b35SErik Nordmark } 483bd670b35SErik Nordmark 484bd670b35SErik Nordmark /* 485bd670b35SErik Nordmark * Don't look for an nce for reject or blackhole. 486bd670b35SErik Nordmark * They have ire_generation set to IRE_GENERATION_VERIFY which 487bd670b35SErik Nordmark * makes conn_ip_output avoid references to ixa_nce. 488bd670b35SErik Nordmark */ 489bd670b35SErik Nordmark if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) { 490bd670b35SErik Nordmark ASSERT(ixa->ixa_ire_generation == IRE_GENERATION_VERIFY); 491bd670b35SErik Nordmark ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 492bd670b35SErik Nordmark return (0); 493bd670b35SErik Nordmark } 494bd670b35SErik Nordmark 495bd670b35SErik Nordmark /* The NCE could now be different */ 496bd670b35SErik Nordmark nce = ire_to_nce_pkt(ire, mp); 497bd670b35SErik Nordmark if (nce == NULL) { 498bd670b35SErik Nordmark /* 499bd670b35SErik Nordmark * Allocation failure. Make sure we redo ire/nce selection 500bd670b35SErik Nordmark * next time we send. 501bd670b35SErik Nordmark */ 502bd670b35SErik Nordmark ixa->ixa_ire_generation = IRE_GENERATION_VERIFY; 503bd670b35SErik Nordmark ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 504bd670b35SErik Nordmark return (ENOBUFS); 505bd670b35SErik Nordmark } 506bd670b35SErik Nordmark if (nce == ixa->ixa_nce) { 507bd670b35SErik Nordmark /* No change */ 508bd670b35SErik Nordmark nce_refrele(nce); 509bd670b35SErik Nordmark return (0); 510bd670b35SErik Nordmark } 511bd670b35SErik Nordmark 512bd670b35SErik Nordmark /* 513bd670b35SErik Nordmark * Since the path MTU might change as a result of this 514bd670b35SErik Nordmark * route change, we twiddle ixa_dce_generation to 515bd670b35SErik Nordmark * make conn_ip_output go through the ip_verify_dce code. 516bd670b35SErik Nordmark */ 517bd670b35SErik Nordmark ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 518bd670b35SErik Nordmark 519bd670b35SErik Nordmark if (ixa->ixa_nce != NULL) 520bd670b35SErik Nordmark nce_refrele(ixa->ixa_nce); 521bd670b35SErik Nordmark ixa->ixa_nce = nce; 522bd670b35SErik Nordmark return (0); 523bd670b35SErik Nordmark } 524bd670b35SErik Nordmark 525bd670b35SErik Nordmark /* 526bd670b35SErik Nordmark * Handle both IPv4 and IPv6. Reverify/recalculate the NCE to use. 527bd670b35SErik Nordmark */ 528bd670b35SErik Nordmark static int 529bd670b35SErik Nordmark ip_verify_nce(mblk_t *mp, ip_xmit_attr_t *ixa) 530bd670b35SErik Nordmark { 531bd670b35SErik Nordmark ire_t *ire = ixa->ixa_ire; 532bd670b35SErik Nordmark nce_t *nce; 533bd670b35SErik Nordmark int error = 0; 534bd670b35SErik Nordmark ipha_t *ipha = NULL; 535bd670b35SErik Nordmark ip6_t *ip6h = NULL; 536bd670b35SErik Nordmark 537bd670b35SErik Nordmark if (ire->ire_ipversion == IPV4_VERSION) 538bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 539bd670b35SErik Nordmark else 540bd670b35SErik Nordmark ip6h = (ip6_t *)mp->b_rptr; 541bd670b35SErik Nordmark 542bd670b35SErik Nordmark nce = ire_handle_condemned_nce(ixa->ixa_nce, ire, ipha, ip6h, B_TRUE); 543bd670b35SErik Nordmark if (nce == NULL) { 544bd670b35SErik Nordmark /* Try to find a better ire */ 545bd670b35SErik Nordmark return (ip_verify_ire(mp, ixa)); 546bd670b35SErik Nordmark } 547bd670b35SErik Nordmark 548bd670b35SErik Nordmark /* 549bd670b35SErik Nordmark * The hardware offloading capabilities, for example LSO, of the 550bd670b35SErik Nordmark * interface might have changed, so do sanity verification here. 551bd670b35SErik Nordmark */ 552bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_VERIFY_LSO) { 553bd670b35SErik Nordmark if (!ip_verify_lso(nce->nce_ill, ixa)) { 554bd670b35SErik Nordmark ASSERT(ixa->ixa_notify != NULL); 555bd670b35SErik Nordmark ixa->ixa_notify(ixa->ixa_notify_cookie, ixa, 556bd670b35SErik Nordmark IXAN_LSO, 0); 557bd670b35SErik Nordmark error = ENOTSUP; 558bd670b35SErik Nordmark } 559bd670b35SErik Nordmark } 560bd670b35SErik Nordmark 561bd670b35SErik Nordmark /* 562bd670b35SErik Nordmark * Verify ZEROCOPY capability of underlying ill. Notify the ULP with 563bd670b35SErik Nordmark * any ZEROCOPY changes. In case ZEROCOPY capability is not available 564bd670b35SErik Nordmark * any more, return error so that conn_ip_output() can take care of 565bd670b35SErik Nordmark * the ZEROCOPY message properly. It's safe to continue send the 566bd670b35SErik Nordmark * message when ZEROCOPY newly become available. 567bd670b35SErik Nordmark */ 568bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_VERIFY_ZCOPY) { 569bd670b35SErik Nordmark if (!ip_verify_zcopy(nce->nce_ill, ixa)) { 570bd670b35SErik Nordmark ASSERT(ixa->ixa_notify != NULL); 571bd670b35SErik Nordmark ixa->ixa_notify(ixa->ixa_notify_cookie, ixa, 572bd670b35SErik Nordmark IXAN_ZCOPY, 0); 573bd670b35SErik Nordmark if ((ixa->ixa_flags & IXAF_ZCOPY_CAPAB) == 0) 574bd670b35SErik Nordmark error = ENOTSUP; 575bd670b35SErik Nordmark } 576bd670b35SErik Nordmark } 577bd670b35SErik Nordmark 578bd670b35SErik Nordmark /* 579bd670b35SErik Nordmark * Since the path MTU might change as a result of this 580bd670b35SErik Nordmark * change, we twiddle ixa_dce_generation to 581bd670b35SErik Nordmark * make conn_ip_output go through the ip_verify_dce code. 582bd670b35SErik Nordmark */ 583bd670b35SErik Nordmark ixa->ixa_dce_generation = DCE_GENERATION_VERIFY; 584bd670b35SErik Nordmark 585bd670b35SErik Nordmark nce_refrele(ixa->ixa_nce); 586bd670b35SErik Nordmark ixa->ixa_nce = nce; 587bd670b35SErik Nordmark return (error); 588bd670b35SErik Nordmark } 589bd670b35SErik Nordmark 590bd670b35SErik Nordmark /* 591bd670b35SErik Nordmark * Handle both IPv4 and IPv6. Reverify/recalculate the DCE to use. 592bd670b35SErik Nordmark */ 593bd670b35SErik Nordmark static int 594bd670b35SErik Nordmark ip_verify_dce(mblk_t *mp, ip_xmit_attr_t *ixa) 595bd670b35SErik Nordmark { 596bd670b35SErik Nordmark dce_t *dce; 597bd670b35SErik Nordmark uint_t gen; 598bd670b35SErik Nordmark uint_t pmtu; 599bd670b35SErik Nordmark 600bd670b35SErik Nordmark dce = dce_lookup_pkt(mp, ixa, &gen); 601bd670b35SErik Nordmark ASSERT(dce != NULL); 602bd670b35SErik Nordmark 603bd670b35SErik Nordmark dce_refrele_notr(ixa->ixa_dce); 604bd670b35SErik Nordmark #ifdef DEBUG 605bd670b35SErik Nordmark dce_refhold_notr(dce); 606bd670b35SErik Nordmark dce_refrele(dce); 607bd670b35SErik Nordmark #endif 608bd670b35SErik Nordmark ixa->ixa_dce = dce; 609bd670b35SErik Nordmark ixa->ixa_dce_generation = gen; 610bd670b35SErik Nordmark 611bd670b35SErik Nordmark /* Extract the (path) mtu from the dce, ncec_ill etc */ 612bd670b35SErik Nordmark pmtu = ip_get_pmtu(ixa); 613bd670b35SErik Nordmark 614bd670b35SErik Nordmark /* 615bd670b35SErik Nordmark * Tell ULP about PMTU changes - increase or decrease - by returning 616bd670b35SErik Nordmark * an error if IXAF_VERIFY_PMTU is set. In such case, ULP should update 617bd670b35SErik Nordmark * both ixa_pmtu and ixa_fragsize appropriately. 618bd670b35SErik Nordmark * 619bd670b35SErik Nordmark * If ULP doesn't set that flag then we need to update ixa_fragsize 620bd670b35SErik Nordmark * since routing could have changed the ill after after ixa_fragsize 621bd670b35SErik Nordmark * was set previously in the conn_ip_output path or in 622bd670b35SErik Nordmark * ip_set_destination. 623bd670b35SErik Nordmark * 624bd670b35SErik Nordmark * In case of LSO, ixa_fragsize might be greater than ixa_pmtu. 625bd670b35SErik Nordmark * 626bd670b35SErik Nordmark * In the case of a path MTU increase we send the packet after the 627bd670b35SErik Nordmark * notify to the ULP. 628bd670b35SErik Nordmark */ 629bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_VERIFY_PMTU) { 630bd670b35SErik Nordmark if (ixa->ixa_pmtu != pmtu) { 631bd670b35SErik Nordmark uint_t oldmtu = ixa->ixa_pmtu; 632bd670b35SErik Nordmark 633bd670b35SErik Nordmark DTRACE_PROBE2(verify_pmtu, uint32_t, pmtu, 634bd670b35SErik Nordmark uint32_t, ixa->ixa_pmtu); 635bd670b35SErik Nordmark ASSERT(ixa->ixa_notify != NULL); 636bd670b35SErik Nordmark ixa->ixa_notify(ixa->ixa_notify_cookie, ixa, 637bd670b35SErik Nordmark IXAN_PMTU, pmtu); 638bd670b35SErik Nordmark if (pmtu < oldmtu) 639bd670b35SErik Nordmark return (EMSGSIZE); 640bd670b35SErik Nordmark } 641bd670b35SErik Nordmark } else { 642bd670b35SErik Nordmark ixa->ixa_fragsize = pmtu; 643bd670b35SErik Nordmark } 644bd670b35SErik Nordmark return (0); 645bd670b35SErik Nordmark } 646bd670b35SErik Nordmark 647bd670b35SErik Nordmark /* 648bd670b35SErik Nordmark * Verify LSO usability. Keep the return value simple to indicate whether 649bd670b35SErik Nordmark * the LSO capability has changed. Handle both IPv4 and IPv6. 650bd670b35SErik Nordmark */ 651bd670b35SErik Nordmark static boolean_t 652bd670b35SErik Nordmark ip_verify_lso(ill_t *ill, ip_xmit_attr_t *ixa) 653bd670b35SErik Nordmark { 654bd670b35SErik Nordmark ill_lso_capab_t *lsoc = &ixa->ixa_lso_capab; 655bd670b35SErik Nordmark ill_lso_capab_t *new_lsoc = ill->ill_lso_capab; 656bd670b35SErik Nordmark 657bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_LSO_CAPAB) { 658bd670b35SErik Nordmark /* 659bd670b35SErik Nordmark * Not unsable any more. 660bd670b35SErik Nordmark */ 661bd670b35SErik Nordmark if ((ixa->ixa_flags & IXAF_IPSEC_SECURE) || 662bd670b35SErik Nordmark (ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) || 663bd670b35SErik Nordmark (ixa->ixa_ire->ire_flags & RTF_MULTIRT) || 664bd670b35SErik Nordmark ((ixa->ixa_flags & IXAF_IS_IPV4) ? 665bd670b35SErik Nordmark !ILL_LSO_TCP_IPV4_USABLE(ill) : 666bd670b35SErik Nordmark !ILL_LSO_TCP_IPV6_USABLE(ill))) { 667bd670b35SErik Nordmark ixa->ixa_flags &= ~IXAF_LSO_CAPAB; 668bd670b35SErik Nordmark 669bd670b35SErik Nordmark return (B_FALSE); 670bd670b35SErik Nordmark } 671bd670b35SErik Nordmark 672bd670b35SErik Nordmark /* 673bd670b35SErik Nordmark * Capability has changed, refresh the copy in ixa. 674bd670b35SErik Nordmark */ 675bd670b35SErik Nordmark if (lsoc->ill_lso_max != new_lsoc->ill_lso_max) { 676bd670b35SErik Nordmark *lsoc = *new_lsoc; 677bd670b35SErik Nordmark 678bd670b35SErik Nordmark return (B_FALSE); 679bd670b35SErik Nordmark } 680bd670b35SErik Nordmark } else { /* Was not usable */ 681bd670b35SErik Nordmark if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE) && 682bd670b35SErik Nordmark !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) && 683bd670b35SErik Nordmark !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) && 684bd670b35SErik Nordmark ((ixa->ixa_flags & IXAF_IS_IPV4) ? 685bd670b35SErik Nordmark ILL_LSO_TCP_IPV4_USABLE(ill) : 686bd670b35SErik Nordmark ILL_LSO_TCP_IPV6_USABLE(ill))) { 687bd670b35SErik Nordmark *lsoc = *new_lsoc; 688bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_LSO_CAPAB; 689bd670b35SErik Nordmark 690bd670b35SErik Nordmark return (B_FALSE); 691bd670b35SErik Nordmark } 692bd670b35SErik Nordmark } 693bd670b35SErik Nordmark 694bd670b35SErik Nordmark return (B_TRUE); 695bd670b35SErik Nordmark } 696bd670b35SErik Nordmark 697bd670b35SErik Nordmark /* 698bd670b35SErik Nordmark * Verify ZEROCOPY usability. Keep the return value simple to indicate whether 699bd670b35SErik Nordmark * the ZEROCOPY capability has changed. Handle both IPv4 and IPv6. 700bd670b35SErik Nordmark */ 701bd670b35SErik Nordmark static boolean_t 702bd670b35SErik Nordmark ip_verify_zcopy(ill_t *ill, ip_xmit_attr_t *ixa) 703bd670b35SErik Nordmark { 704bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_ZCOPY_CAPAB) { 705bd670b35SErik Nordmark /* 706bd670b35SErik Nordmark * Not unsable any more. 707bd670b35SErik Nordmark */ 708bd670b35SErik Nordmark if ((ixa->ixa_flags & IXAF_IPSEC_SECURE) || 709bd670b35SErik Nordmark (ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) || 710bd670b35SErik Nordmark (ixa->ixa_ire->ire_flags & RTF_MULTIRT) || 711bd670b35SErik Nordmark !ILL_ZCOPY_USABLE(ill)) { 712bd670b35SErik Nordmark ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB; 713bd670b35SErik Nordmark 714bd670b35SErik Nordmark return (B_FALSE); 715bd670b35SErik Nordmark } 716bd670b35SErik Nordmark } else { /* Was not usable */ 717bd670b35SErik Nordmark if (!(ixa->ixa_flags & IXAF_IPSEC_SECURE) && 718bd670b35SErik Nordmark !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) && 719bd670b35SErik Nordmark !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) && 720bd670b35SErik Nordmark ILL_ZCOPY_USABLE(ill)) { 721bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_ZCOPY_CAPAB; 722bd670b35SErik Nordmark 723bd670b35SErik Nordmark return (B_FALSE); 724bd670b35SErik Nordmark } 725bd670b35SErik Nordmark } 726bd670b35SErik Nordmark 727bd670b35SErik Nordmark return (B_TRUE); 728bd670b35SErik Nordmark } 729bd670b35SErik Nordmark 730bd670b35SErik Nordmark 731bd670b35SErik Nordmark /* 732bd670b35SErik Nordmark * When there is no conn_t context, this will send a packet. 733bd670b35SErik Nordmark * The caller must *not* have called conn_connect() or ip_attr_connect() 734bd670b35SErik Nordmark * before calling ip_output_simple(). 735bd670b35SErik Nordmark * Handles IPv4 and IPv6. Returns zero or an errno such as ENETUNREACH. 736bd670b35SErik Nordmark * Honors IXAF_SET_SOURCE. 737bd670b35SErik Nordmark * 738bd670b35SErik Nordmark * We acquire the ire and after calling ire_sendfn we release 739bd670b35SErik Nordmark * the hold on the ire. Ditto for the nce and dce. 740bd670b35SErik Nordmark * 741bd670b35SErik Nordmark * This assumes that the caller has set the following in ip_xmit_attr_t: 742bd670b35SErik Nordmark * ixa_tsl, ixa_zoneid, and ixa_ipst must always be set. 743bd670b35SErik Nordmark * If ixa_ifindex is non-zero it means send out that ill. (If it is 744bd670b35SErik Nordmark * an upper IPMP ill we load balance across the group; if a lower we send 745bd670b35SErik Nordmark * on that lower ill without load balancing.) 746bd670b35SErik Nordmark * IXAF_IS_IPV4 must be set correctly. 747bd670b35SErik Nordmark * If IXAF_IPSEC_SECURE is set then the ixa_ipsec_* fields must be set. 748bd670b35SErik Nordmark * If IXAF_NO_IPSEC is set we'd skip IPsec policy lookup. 749bd670b35SErik Nordmark * If neither of those two are set we do an IPsec policy lookup. 750bd670b35SErik Nordmark * 751bd670b35SErik Nordmark * We handle setting things like 752bd670b35SErik Nordmark * ixa_pktlen 753bd670b35SErik Nordmark * ixa_ip_hdr_length 754bd670b35SErik Nordmark * ixa->ixa_protocol 755bd670b35SErik Nordmark * 756bd670b35SErik Nordmark * The caller may set ixa_xmit_hint, which is used for ECMP selection and 757bd670b35SErik Nordmark * transmit ring selecting in GLD. 758bd670b35SErik Nordmark * 759bd670b35SErik Nordmark * The caller must do an ixa_cleanup() to release any IPsec references 760bd670b35SErik Nordmark * after we return. 761bd670b35SErik Nordmark */ 762bd670b35SErik Nordmark int 763bd670b35SErik Nordmark ip_output_simple(mblk_t *mp, ip_xmit_attr_t *ixa) 764bd670b35SErik Nordmark { 765bd670b35SErik Nordmark ts_label_t *effective_tsl = NULL; 766bd670b35SErik Nordmark int err; 767bd670b35SErik Nordmark 768bd670b35SErik Nordmark ASSERT(ixa->ixa_ipst != NULL); 769bd670b35SErik Nordmark 770bd670b35SErik Nordmark if (is_system_labeled()) { 771bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 772bd670b35SErik Nordmark 773bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_IS_IPV4) { 774bd670b35SErik Nordmark err = tsol_check_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid, 775bd670b35SErik Nordmark &mp, CONN_MAC_DEFAULT, B_FALSE, ixa->ixa_ipst, 776bd670b35SErik Nordmark &effective_tsl); 777bd670b35SErik Nordmark } else { 778bd670b35SErik Nordmark err = tsol_check_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid, 779bd670b35SErik Nordmark &mp, CONN_MAC_DEFAULT, B_FALSE, ixa->ixa_ipst, 780bd670b35SErik Nordmark &effective_tsl); 781bd670b35SErik Nordmark } 782bd670b35SErik Nordmark if (err != 0) { 783bd670b35SErik Nordmark ip2dbg(("tsol_check: label check failed (%d)\n", err)); 784bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 785bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 786bd670b35SErik Nordmark ip_drop_output("tsol_check_label", mp, NULL); 787bd670b35SErik Nordmark freemsg(mp); 788bd670b35SErik Nordmark return (err); 789bd670b35SErik Nordmark } 790bd670b35SErik Nordmark if (effective_tsl != NULL) { 791bd670b35SErik Nordmark /* Update the label */ 792bd670b35SErik Nordmark ip_xmit_attr_replace_tsl(ixa, effective_tsl); 793bd670b35SErik Nordmark } 794bd670b35SErik Nordmark } 795bd670b35SErik Nordmark 796bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_IS_IPV4) 797bd670b35SErik Nordmark return (ip_output_simple_v4(mp, ixa)); 798bd670b35SErik Nordmark else 799bd670b35SErik Nordmark return (ip_output_simple_v6(mp, ixa)); 800bd670b35SErik Nordmark } 801bd670b35SErik Nordmark 802bd670b35SErik Nordmark int 803bd670b35SErik Nordmark ip_output_simple_v4(mblk_t *mp, ip_xmit_attr_t *ixa) 804bd670b35SErik Nordmark { 805bd670b35SErik Nordmark ipha_t *ipha; 806bd670b35SErik Nordmark ipaddr_t firsthop; /* In IP header */ 807bd670b35SErik Nordmark ipaddr_t dst; /* End of source route, or ipha_dst if none */ 808bd670b35SErik Nordmark ire_t *ire; 809bd670b35SErik Nordmark ipaddr_t setsrc; /* RTF_SETSRC */ 810bd670b35SErik Nordmark int error; 811bd670b35SErik Nordmark ill_t *ill = NULL; 812bd670b35SErik Nordmark dce_t *dce = NULL; 813bd670b35SErik Nordmark nce_t *nce; 814bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 815bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 816bd670b35SErik Nordmark boolean_t repeat = B_FALSE; 817bd670b35SErik Nordmark boolean_t multirt = B_FALSE; 818d3d50737SRafael Vanoni int64_t now; 819bd670b35SErik Nordmark 820bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 821bd670b35SErik Nordmark ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 822bd670b35SErik Nordmark 823bd670b35SErik Nordmark /* 824bd670b35SErik Nordmark * Even on labeled systems we can have a NULL ixa_tsl e.g., 825bd670b35SErik Nordmark * for IGMP/MLD traffic. 826bd670b35SErik Nordmark */ 827bd670b35SErik Nordmark 828bd670b35SErik Nordmark /* Caller already set flags */ 829bd670b35SErik Nordmark ASSERT(ixa->ixa_flags & IXAF_IS_IPV4); 830bd670b35SErik Nordmark 831bd670b35SErik Nordmark ASSERT(ixa->ixa_nce == NULL); 832bd670b35SErik Nordmark 833bd670b35SErik Nordmark ixa->ixa_pktlen = ntohs(ipha->ipha_length); 834bd670b35SErik Nordmark ASSERT(ixa->ixa_pktlen == msgdsize(mp)); 835bd670b35SErik Nordmark ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(ipha); 836bd670b35SErik Nordmark ixa->ixa_protocol = ipha->ipha_protocol; 837bd670b35SErik Nordmark 838bd670b35SErik Nordmark /* 839bd670b35SErik Nordmark * Assumes that source routed packets have already been massaged by 840bd670b35SErik Nordmark * the ULP (ip_massage_options) and as a result ipha_dst is the next 841bd670b35SErik Nordmark * hop in the source route. The final destination is used for IPsec 842bd670b35SErik Nordmark * policy and DCE lookup. 843bd670b35SErik Nordmark */ 844bd670b35SErik Nordmark firsthop = ipha->ipha_dst; 845bd670b35SErik Nordmark dst = ip_get_dst(ipha); 846bd670b35SErik Nordmark 847bd670b35SErik Nordmark repeat_ire: 848bd670b35SErik Nordmark error = 0; 849bd670b35SErik Nordmark setsrc = INADDR_ANY; 85044b099c4SSowmini Varadhan ire = ip_select_route_v4(firsthop, ipha->ipha_src, ixa, NULL, 85144b099c4SSowmini Varadhan &setsrc, &error, &multirt); 852bd670b35SErik Nordmark ASSERT(ire != NULL); /* IRE_NOROUTE if none found */ 853bd670b35SErik Nordmark if (error != 0) { 854bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 855bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 856bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - select route", mp, NULL); 857bd670b35SErik Nordmark freemsg(mp); 858bd670b35SErik Nordmark goto done; 859bd670b35SErik Nordmark } 860bd670b35SErik Nordmark 861bd670b35SErik Nordmark if (ire->ire_flags & (RTF_BLACKHOLE|RTF_REJECT)) { 862bd670b35SErik Nordmark /* ire_ill might be NULL hence need to skip some code */ 863bd670b35SErik Nordmark if (ixaflags & IXAF_SET_SOURCE) 864bd670b35SErik Nordmark ipha->ipha_src = htonl(INADDR_LOOPBACK); 865bd670b35SErik Nordmark ixa->ixa_fragsize = IP_MAXPACKET; 866bd670b35SErik Nordmark ill = NULL; 867bd670b35SErik Nordmark nce = NULL; 868bd670b35SErik Nordmark ire->ire_ob_pkt_count++; 869bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 870bd670b35SErik Nordmark /* No dce yet; use default one */ 871bd670b35SErik Nordmark error = (ire->ire_sendfn)(ire, mp, ipha, ixa, 872bd670b35SErik Nordmark &ipst->ips_dce_default->dce_ident); 873bd670b35SErik Nordmark goto done; 874bd670b35SErik Nordmark } 875bd670b35SErik Nordmark 876bd670b35SErik Nordmark /* Note that ipha_dst is only used for IRE_MULTICAST */ 877bd670b35SErik Nordmark nce = ire_to_nce(ire, ipha->ipha_dst, NULL); 878bd670b35SErik Nordmark if (nce == NULL) { 879bd670b35SErik Nordmark /* Allocation failure? */ 880bd670b35SErik Nordmark ip_drop_output("ire_to_nce", mp, ill); 881bd670b35SErik Nordmark freemsg(mp); 882bd670b35SErik Nordmark error = ENOBUFS; 883bd670b35SErik Nordmark goto done; 884bd670b35SErik Nordmark } 885bd670b35SErik Nordmark if (nce->nce_is_condemned) { 886bd670b35SErik Nordmark nce_t *nce1; 887bd670b35SErik Nordmark 888bd670b35SErik Nordmark nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_TRUE); 889bd670b35SErik Nordmark nce_refrele(nce); 890bd670b35SErik Nordmark if (nce1 == NULL) { 891bd670b35SErik Nordmark if (!repeat) { 892bd670b35SErik Nordmark /* Try finding a better IRE */ 893bd670b35SErik Nordmark repeat = B_TRUE; 894bd670b35SErik Nordmark ire_refrele(ire); 895bd670b35SErik Nordmark goto repeat_ire; 896bd670b35SErik Nordmark } 897bd670b35SErik Nordmark /* Tried twice - drop packet */ 898bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 899bd670b35SErik Nordmark ip_drop_output("No nce", mp, ill); 900bd670b35SErik Nordmark freemsg(mp); 901bd670b35SErik Nordmark error = ENOBUFS; 902bd670b35SErik Nordmark goto done; 903bd670b35SErik Nordmark } 904bd670b35SErik Nordmark nce = nce1; 905bd670b35SErik Nordmark } 906bd670b35SErik Nordmark 907bd670b35SErik Nordmark /* 908bd670b35SErik Nordmark * For multicast with multirt we have a flag passed back from 909bd670b35SErik Nordmark * ire_lookup_multi_ill_v4 since we don't have an IRE for each 910bd670b35SErik Nordmark * possible multicast address. 911bd670b35SErik Nordmark * We also need a flag for multicast since we can't check 912bd670b35SErik Nordmark * whether RTF_MULTIRT is set in ixa_ire for multicast. 913bd670b35SErik Nordmark */ 914bd670b35SErik Nordmark if (multirt) { 915bd670b35SErik Nordmark ixa->ixa_postfragfn = ip_postfrag_multirt_v4; 916bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST; 917bd670b35SErik Nordmark } else { 918bd670b35SErik Nordmark ixa->ixa_postfragfn = ire->ire_postfragfn; 919bd670b35SErik Nordmark ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST; 920bd670b35SErik Nordmark } 921bd670b35SErik Nordmark ASSERT(ixa->ixa_nce == NULL); 922bd670b35SErik Nordmark ixa->ixa_nce = nce; 923bd670b35SErik Nordmark 924bd670b35SErik Nordmark /* 925bd670b35SErik Nordmark * Check for a dce_t with a path mtu. 926bd670b35SErik Nordmark */ 927bd670b35SErik Nordmark dce = dce_lookup_v4(dst, ipst, NULL); 928bd670b35SErik Nordmark ASSERT(dce != NULL); 929bd670b35SErik Nordmark 930bd670b35SErik Nordmark if (!(ixaflags & IXAF_PMTU_DISCOVERY)) { 931bd670b35SErik Nordmark ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire); 932bd670b35SErik Nordmark } else if (dce->dce_flags & DCEF_PMTU) { 933bd670b35SErik Nordmark /* 934bd670b35SErik Nordmark * To avoid a periodic timer to increase the path MTU we 935bd670b35SErik Nordmark * look at dce_last_change_time each time we send a packet. 936bd670b35SErik Nordmark */ 937d3d50737SRafael Vanoni now = ddi_get_lbolt64(); 938d3d50737SRafael Vanoni if (TICK_TO_SEC(now) - dce->dce_last_change_time > 939bd670b35SErik Nordmark ipst->ips_ip_pathmtu_interval) { 940bd670b35SErik Nordmark /* 941bd670b35SErik Nordmark * Older than 20 minutes. Drop the path MTU information. 942bd670b35SErik Nordmark */ 943bd670b35SErik Nordmark mutex_enter(&dce->dce_lock); 944bd670b35SErik Nordmark dce->dce_flags &= ~(DCEF_PMTU|DCEF_TOO_SMALL_PMTU); 945d3d50737SRafael Vanoni dce->dce_last_change_time = TICK_TO_SEC(now); 946bd670b35SErik Nordmark mutex_exit(&dce->dce_lock); 947bd670b35SErik Nordmark dce_increment_generation(dce); 948bd670b35SErik Nordmark ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire); 949bd670b35SErik Nordmark } else { 950bd670b35SErik Nordmark uint_t fragsize; 951bd670b35SErik Nordmark 952bd670b35SErik Nordmark fragsize = ip_get_base_mtu(nce->nce_ill, ire); 953bd670b35SErik Nordmark if (fragsize > dce->dce_pmtu) 954bd670b35SErik Nordmark fragsize = dce->dce_pmtu; 955bd670b35SErik Nordmark ixa->ixa_fragsize = fragsize; 956bd670b35SErik Nordmark } 957bd670b35SErik Nordmark } else { 958bd670b35SErik Nordmark ixa->ixa_fragsize = ip_get_base_mtu(nce->nce_ill, ire); 959bd670b35SErik Nordmark } 960bd670b35SErik Nordmark 961bd670b35SErik Nordmark /* 962bd670b35SErik Nordmark * We use use ire_nexthop_ill (and not ncec_ill) to avoid the under ipmp 963bd670b35SErik Nordmark * interface for source address selection. 964bd670b35SErik Nordmark */ 965bd670b35SErik Nordmark ill = ire_nexthop_ill(ire); 966bd670b35SErik Nordmark 967bd670b35SErik Nordmark if (ixaflags & IXAF_SET_SOURCE) { 968bd670b35SErik Nordmark ipaddr_t src; 969bd670b35SErik Nordmark 970bd670b35SErik Nordmark /* 971bd670b35SErik Nordmark * We use the final destination to get 972bd670b35SErik Nordmark * correct selection for source routed packets 973bd670b35SErik Nordmark */ 974bd670b35SErik Nordmark 975bd670b35SErik Nordmark /* If unreachable we have no ill but need some source */ 976bd670b35SErik Nordmark if (ill == NULL) { 977bd670b35SErik Nordmark src = htonl(INADDR_LOOPBACK); 978bd670b35SErik Nordmark error = 0; 979bd670b35SErik Nordmark } else { 980bd670b35SErik Nordmark error = ip_select_source_v4(ill, setsrc, dst, 981bd670b35SErik Nordmark ixa->ixa_multicast_ifaddr, ixa->ixa_zoneid, ipst, 982bd670b35SErik Nordmark &src, NULL, NULL); 983bd670b35SErik Nordmark } 984bd670b35SErik Nordmark if (error != 0) { 985bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 986bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 987bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - no source", 988bd670b35SErik Nordmark mp, ill); 989bd670b35SErik Nordmark freemsg(mp); 990bd670b35SErik Nordmark goto done; 991bd670b35SErik Nordmark } 992bd670b35SErik Nordmark ipha->ipha_src = src; 993bd670b35SErik Nordmark } else if (ixaflags & IXAF_VERIFY_SOURCE) { 994bd670b35SErik Nordmark /* Check if the IP source is assigned to the host. */ 995bd670b35SErik Nordmark if (!ip_verify_src(mp, ixa, NULL)) { 996bd670b35SErik Nordmark /* Don't send a packet with a source that isn't ours */ 997bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 998bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 999bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - invalid source", 1000bd670b35SErik Nordmark mp, ill); 1001bd670b35SErik Nordmark freemsg(mp); 1002bd670b35SErik Nordmark error = EADDRNOTAVAIL; 1003bd670b35SErik Nordmark goto done; 1004bd670b35SErik Nordmark } 1005bd670b35SErik Nordmark } 1006bd670b35SErik Nordmark 1007bd670b35SErik Nordmark 1008bd670b35SErik Nordmark /* 1009bd670b35SErik Nordmark * Check against global IPsec policy to set the AH/ESP attributes. 1010bd670b35SErik Nordmark * IPsec will set IXAF_IPSEC_* and ixa_ipsec_* as appropriate. 1011bd670b35SErik Nordmark */ 1012bd670b35SErik Nordmark if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) { 1013bd670b35SErik Nordmark ASSERT(ixa->ixa_ipsec_policy == NULL); 1014bd670b35SErik Nordmark mp = ip_output_attach_policy(mp, ipha, NULL, NULL, ixa); 1015bd670b35SErik Nordmark if (mp == NULL) { 1016bd670b35SErik Nordmark /* MIB and ip_drop_packet already done */ 1017bd670b35SErik Nordmark return (EHOSTUNREACH); /* IPsec policy failure */ 1018bd670b35SErik Nordmark } 1019bd670b35SErik Nordmark } 1020bd670b35SErik Nordmark 1021bd670b35SErik Nordmark if (ill != NULL) { 1022bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 1023bd670b35SErik Nordmark } else { 1024bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 1025bd670b35SErik Nordmark } 1026bd670b35SErik Nordmark 1027bd670b35SErik Nordmark /* 1028bd670b35SErik Nordmark * We update the statistics on the most specific IRE i.e., the first 1029bd670b35SErik Nordmark * one we found. 1030bd670b35SErik Nordmark * We don't have an IRE when we fragment, hence ire_ob_pkt_count 1031bd670b35SErik Nordmark * can only count the use prior to fragmentation. However the MIB 1032bd670b35SErik Nordmark * counters on the ill will be incremented in post fragmentation. 1033bd670b35SErik Nordmark */ 1034bd670b35SErik Nordmark ire->ire_ob_pkt_count++; 1035bd670b35SErik Nordmark 1036bd670b35SErik Nordmark /* 1037bd670b35SErik Nordmark * Based on ire_type and ire_flags call one of: 1038bd670b35SErik Nordmark * ire_send_local_v4 - for IRE_LOCAL and IRE_LOOPBACK 1039bd670b35SErik Nordmark * ire_send_multirt_v4 - if RTF_MULTIRT 1040bd670b35SErik Nordmark * ire_send_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE 1041bd670b35SErik Nordmark * ire_send_multicast_v4 - for IRE_MULTICAST 1042bd670b35SErik Nordmark * ire_send_broadcast_v4 - for IRE_BROADCAST 1043bd670b35SErik Nordmark * ire_send_wire_v4 - for the rest. 1044bd670b35SErik Nordmark */ 1045bd670b35SErik Nordmark error = (ire->ire_sendfn)(ire, mp, ipha, ixa, &dce->dce_ident); 1046bd670b35SErik Nordmark done: 1047bd670b35SErik Nordmark ire_refrele(ire); 1048bd670b35SErik Nordmark if (dce != NULL) 1049bd670b35SErik Nordmark dce_refrele(dce); 1050bd670b35SErik Nordmark if (ill != NULL) 1051bd670b35SErik Nordmark ill_refrele(ill); 1052bd670b35SErik Nordmark if (ixa->ixa_nce != NULL) 1053bd670b35SErik Nordmark nce_refrele(ixa->ixa_nce); 1054bd670b35SErik Nordmark ixa->ixa_nce = NULL; 1055bd670b35SErik Nordmark return (error); 1056bd670b35SErik Nordmark } 1057bd670b35SErik Nordmark 1058bd670b35SErik Nordmark /* 1059bd670b35SErik Nordmark * ire_sendfn() functions. 1060bd670b35SErik Nordmark * These functions use the following xmit_attr: 1061bd670b35SErik Nordmark * - ixa_fragsize - read to determine whether or not to fragment 1062bd670b35SErik Nordmark * - IXAF_IPSEC_SECURE - to determine whether or not to invoke IPsec 1063bd670b35SErik Nordmark * - ixa_ipsec_* are used inside IPsec 1064bd670b35SErik Nordmark * - IXAF_SET_SOURCE - replace IP source in broadcast case. 1065bd670b35SErik Nordmark * - IXAF_LOOPBACK_COPY - for multicast and broadcast 1066bd670b35SErik Nordmark */ 1067bd670b35SErik Nordmark 1068bd670b35SErik Nordmark 1069bd670b35SErik Nordmark /* 1070bd670b35SErik Nordmark * ire_sendfn for IRE_LOCAL and IRE_LOOPBACK 1071bd670b35SErik Nordmark * 1072bd670b35SErik Nordmark * The checks for restrict_interzone_loopback are done in ire_route_recursive. 1073bd670b35SErik Nordmark */ 1074bd670b35SErik Nordmark /* ARGSUSED4 */ 1075bd670b35SErik Nordmark int 1076bd670b35SErik Nordmark ire_send_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1077bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 1078bd670b35SErik Nordmark { 1079bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1080bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 1081bd670b35SErik Nordmark ill_t *ill = ire->ire_ill; 1082bd670b35SErik Nordmark ip_recv_attr_t iras; /* NOTE: No bzero for performance */ 1083bd670b35SErik Nordmark uint_t pktlen = ixa->ixa_pktlen; 1084bd670b35SErik Nordmark 1085bd670b35SErik Nordmark /* 1086bd670b35SErik Nordmark * No fragmentation, no nce, no application of IPsec, 1087bd670b35SErik Nordmark * and no ipha_ident assignment. 1088bd670b35SErik Nordmark * 1089bd670b35SErik Nordmark * Note different order between IP provider and FW_HOOKS than in 1090bd670b35SErik Nordmark * send_wire case. 1091bd670b35SErik Nordmark */ 1092bd670b35SErik Nordmark 1093bd670b35SErik Nordmark /* 1094bd670b35SErik Nordmark * DTrace this as ip:::send. A packet blocked by FW_HOOKS will fire the 1095bd670b35SErik Nordmark * send probe, but not the receive probe. 1096bd670b35SErik Nordmark */ 1097bd670b35SErik Nordmark DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 1098bd670b35SErik Nordmark ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 1099bd670b35SErik Nordmark int, 1); 1100bd670b35SErik Nordmark 1101bd670b35SErik Nordmark if (HOOKS4_INTERESTED_LOOPBACK_OUT(ipst)) { 1102bd670b35SErik Nordmark int error; 1103bd670b35SErik Nordmark 1104bd670b35SErik Nordmark DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, 1105bd670b35SErik Nordmark ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 1106bd670b35SErik Nordmark FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1107bd670b35SErik Nordmark ipst->ips_ipv4firewall_loopback_out, 1108bd670b35SErik Nordmark NULL, ill, ipha, mp, mp, 0, ipst, error); 1109bd670b35SErik Nordmark DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp); 1110bd670b35SErik Nordmark if (mp == NULL) 1111bd670b35SErik Nordmark return (error); 1112bd670b35SErik Nordmark 1113bd670b35SErik Nordmark /* 1114bd670b35SErik Nordmark * Even if the destination was changed by the filter we use the 1115bd670b35SErik Nordmark * forwarding decision that was made based on the address 1116bd670b35SErik Nordmark * in ip_output/ip_set_destination. 1117bd670b35SErik Nordmark */ 1118bd670b35SErik Nordmark /* Length could be different */ 1119bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 1120bd670b35SErik Nordmark pktlen = ntohs(ipha->ipha_length); 1121bd670b35SErik Nordmark } 1122bd670b35SErik Nordmark 1123bd670b35SErik Nordmark /* 1124bd670b35SErik Nordmark * If a callback is enabled then we need to know the 1125bd670b35SErik Nordmark * source and destination zoneids for the packet. We already 1126bd670b35SErik Nordmark * have those handy. 1127bd670b35SErik Nordmark */ 1128bd670b35SErik Nordmark if (ipst->ips_ip4_observe.he_interested) { 1129bd670b35SErik Nordmark zoneid_t szone, dzone; 1130bd670b35SErik Nordmark zoneid_t stackzoneid; 1131bd670b35SErik Nordmark 1132bd670b35SErik Nordmark stackzoneid = netstackid_to_zoneid( 1133bd670b35SErik Nordmark ipst->ips_netstack->netstack_stackid); 1134bd670b35SErik Nordmark 1135bd670b35SErik Nordmark if (stackzoneid == GLOBAL_ZONEID) { 1136bd670b35SErik Nordmark /* Shared-IP zone */ 1137bd670b35SErik Nordmark dzone = ire->ire_zoneid; 1138bd670b35SErik Nordmark szone = ixa->ixa_zoneid; 1139bd670b35SErik Nordmark } else { 1140bd670b35SErik Nordmark szone = dzone = stackzoneid; 1141bd670b35SErik Nordmark } 1142bd670b35SErik Nordmark ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, ipst); 1143bd670b35SErik Nordmark } 1144bd670b35SErik Nordmark 1145bd670b35SErik Nordmark /* Handle lo0 stats */ 1146bd670b35SErik Nordmark ipst->ips_loopback_packets++; 1147bd670b35SErik Nordmark 1148bd670b35SErik Nordmark /* Map ixa to ira including IPsec policies */ 1149bd670b35SErik Nordmark ipsec_out_to_in(ixa, ill, &iras); 1150bd670b35SErik Nordmark iras.ira_pktlen = pktlen; 1151bd670b35SErik Nordmark 1152bd670b35SErik Nordmark if (!IS_SIMPLE_IPH(ipha)) { 1153bd670b35SErik Nordmark ip_output_local_options(ipha, ipst); 1154bd670b35SErik Nordmark iras.ira_flags |= IRAF_IPV4_OPTIONS; 1155bd670b35SErik Nordmark } 1156bd670b35SErik Nordmark 1157bd670b35SErik Nordmark if (HOOKS4_INTERESTED_LOOPBACK_IN(ipst)) { 1158bd670b35SErik Nordmark int error; 1159bd670b35SErik Nordmark 1160bd670b35SErik Nordmark DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill, 1161bd670b35SErik Nordmark ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp); 1162bd670b35SErik Nordmark FW_HOOKS(ipst->ips_ip4_loopback_in_event, 1163bd670b35SErik Nordmark ipst->ips_ipv4firewall_loopback_in, 1164bd670b35SErik Nordmark ill, NULL, ipha, mp, mp, 0, ipst, error); 1165bd670b35SErik Nordmark 1166bd670b35SErik Nordmark DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp); 1167bd670b35SErik Nordmark if (mp == NULL) { 1168bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 1169bd670b35SErik Nordmark return (error); 1170bd670b35SErik Nordmark } 1171bd670b35SErik Nordmark /* 1172bd670b35SErik Nordmark * Even if the destination was changed by the filter we use the 1173bd670b35SErik Nordmark * forwarding decision that was made based on the address 1174bd670b35SErik Nordmark * in ip_output/ip_set_destination. 1175bd670b35SErik Nordmark */ 1176bd670b35SErik Nordmark /* Length could be different */ 1177bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 1178bd670b35SErik Nordmark pktlen = iras.ira_pktlen = ntohs(ipha->ipha_length); 1179bd670b35SErik Nordmark } 1180bd670b35SErik Nordmark 1181bd670b35SErik Nordmark DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 1182bd670b35SErik Nordmark ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 1183bd670b35SErik Nordmark int, 1); 1184bd670b35SErik Nordmark 1185bd670b35SErik Nordmark ire->ire_ib_pkt_count++; 1186bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 1187bd670b35SErik Nordmark UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, pktlen); 1188bd670b35SErik Nordmark 1189bd670b35SErik Nordmark /* Destined to ire_zoneid - use that for fanout */ 1190bd670b35SErik Nordmark iras.ira_zoneid = ire->ire_zoneid; 1191bd670b35SErik Nordmark 1192bd670b35SErik Nordmark if (is_system_labeled()) { 1193bd670b35SErik Nordmark iras.ira_flags |= IRAF_SYSTEM_LABELED; 1194bd670b35SErik Nordmark 1195bd670b35SErik Nordmark /* 1196bd670b35SErik Nordmark * This updates ira_cred, ira_tsl and ira_free_flags based 1197bd670b35SErik Nordmark * on the label. We don't expect this to ever fail for 1198bd670b35SErik Nordmark * loopback packets, so we silently drop the packet should it 1199bd670b35SErik Nordmark * fail. 1200bd670b35SErik Nordmark */ 1201bd670b35SErik Nordmark if (!tsol_get_pkt_label(mp, IPV4_VERSION, &iras)) { 1202bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1203bd670b35SErik Nordmark ip_drop_input("tsol_get_pkt_label", mp, ill); 1204bd670b35SErik Nordmark freemsg(mp); 1205bd670b35SErik Nordmark return (0); 1206bd670b35SErik Nordmark } 1207bd670b35SErik Nordmark ASSERT(iras.ira_tsl != NULL); 1208bd670b35SErik Nordmark 1209bd670b35SErik Nordmark /* tsol_get_pkt_label sometimes does pullupmsg */ 1210bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 1211bd670b35SErik Nordmark } 1212bd670b35SErik Nordmark 1213bd670b35SErik Nordmark ip_fanout_v4(mp, ipha, &iras); 1214bd670b35SErik Nordmark 1215bd670b35SErik Nordmark /* We moved any IPsec refs from ixa to iras */ 1216bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 1217bd670b35SErik Nordmark return (0); 1218bd670b35SErik Nordmark } 1219bd670b35SErik Nordmark 1220bd670b35SErik Nordmark /* 1221bd670b35SErik Nordmark * ire_sendfn for IRE_BROADCAST 1222bd670b35SErik Nordmark * If the broadcast address is present on multiple ills and ixa_ifindex 1223bd670b35SErik Nordmark * isn't set, then we generate 1224bd670b35SErik Nordmark * a separate datagram (potentially with different source address) for 1225bd670b35SErik Nordmark * those ills. In any case, only one copy is looped back to ip_input_v4. 1226bd670b35SErik Nordmark */ 1227bd670b35SErik Nordmark int 1228bd670b35SErik Nordmark ire_send_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1229bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 1230bd670b35SErik Nordmark { 1231bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1232bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 1233bd670b35SErik Nordmark irb_t *irb = ire->ire_bucket; 1234bd670b35SErik Nordmark ire_t *ire1; 1235bd670b35SErik Nordmark mblk_t *mp1; 1236bd670b35SErik Nordmark ipha_t *ipha1; 1237bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 1238bd670b35SErik Nordmark nce_t *nce1, *nce_orig; 1239bd670b35SErik Nordmark 1240bd670b35SErik Nordmark /* 1241bd670b35SErik Nordmark * Unless ire_send_multirt_v4 already set a ttl, force the 1242bd670b35SErik Nordmark * ttl to a smallish value. 1243bd670b35SErik Nordmark */ 1244bd670b35SErik Nordmark if (!(ixa->ixa_flags & IXAF_NO_TTL_CHANGE)) { 1245bd670b35SErik Nordmark /* 1246bd670b35SErik Nordmark * To avoid broadcast storms, we usually set the TTL to 1 for 1247bd670b35SErik Nordmark * broadcasts. This can 1248bd670b35SErik Nordmark * be overridden stack-wide through the ip_broadcast_ttl 1249bd670b35SErik Nordmark * ndd tunable, or on a per-connection basis through the 1250bd670b35SErik Nordmark * IP_BROADCAST_TTL socket option. 1251bd670b35SErik Nordmark * 1252bd670b35SErik Nordmark * If SO_DONTROUTE/IXAF_DONTROUTE is set, then ire_send_wire_v4 1253bd670b35SErik Nordmark * will force ttl to one after we've set this. 1254bd670b35SErik Nordmark */ 1255bd670b35SErik Nordmark if (ixaflags & IXAF_BROADCAST_TTL_SET) 1256bd670b35SErik Nordmark ipha->ipha_ttl = ixa->ixa_broadcast_ttl; 1257bd670b35SErik Nordmark else 1258bd670b35SErik Nordmark ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl; 1259bd670b35SErik Nordmark } 1260bd670b35SErik Nordmark /* 1261bd670b35SErik Nordmark * Make sure we get a loopback copy (after IPsec and frag) 1262bd670b35SErik Nordmark * Skip hardware checksum so that loopback copy is checksumed. 1263bd670b35SErik Nordmark */ 1264bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 1265bd670b35SErik Nordmark 1266bd670b35SErik Nordmark /* Do we need to potentially generate multiple copies? */ 1267bd670b35SErik Nordmark if (irb->irb_ire_cnt == 1 || ixa->ixa_ifindex != 0) 1268bd670b35SErik Nordmark return (ire_send_wire_v4(ire, mp, ipha, ixa, identp)); 1269bd670b35SErik Nordmark 1270bd670b35SErik Nordmark /* 1271bd670b35SErik Nordmark * Loop over all IRE_BROADCAST in the bucket (might only be one). 1272bd670b35SErik Nordmark * Note that everything in the bucket has the same destination address. 1273bd670b35SErik Nordmark */ 1274bd670b35SErik Nordmark irb_refhold(irb); 1275bd670b35SErik Nordmark for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1276bd670b35SErik Nordmark /* We do the main IRE after the end of the loop */ 1277bd670b35SErik Nordmark if (ire1 == ire) 1278bd670b35SErik Nordmark continue; 1279bd670b35SErik Nordmark 1280bd670b35SErik Nordmark /* 1281bd670b35SErik Nordmark * Only IREs for the same IP address should be in the same 1282bd670b35SErik Nordmark * bucket. 1283bd670b35SErik Nordmark * But could have IRE_HOSTs in the case of CGTP. 1284bd670b35SErik Nordmark * If we find any multirt routes we bail out of the loop 1285bd670b35SErik Nordmark * and just do the single packet at the end; ip_postfrag_multirt 1286bd670b35SErik Nordmark * will duplicate the packet. 1287bd670b35SErik Nordmark */ 1288bd670b35SErik Nordmark ASSERT(ire1->ire_addr == ire->ire_addr); 1289bd670b35SErik Nordmark if (!(ire1->ire_type & IRE_BROADCAST)) 1290bd670b35SErik Nordmark continue; 1291bd670b35SErik Nordmark 1292bd670b35SErik Nordmark if (IRE_IS_CONDEMNED(ire1)) 1293bd670b35SErik Nordmark continue; 1294bd670b35SErik Nordmark 1295bd670b35SErik Nordmark if (ixa->ixa_zoneid != ALL_ZONES && 1296bd670b35SErik Nordmark ire->ire_zoneid != ire1->ire_zoneid) 1297bd670b35SErik Nordmark continue; 1298bd670b35SErik Nordmark 1299bd670b35SErik Nordmark ASSERT(ire->ire_ill != ire1->ire_ill && ire1->ire_ill != NULL); 1300bd670b35SErik Nordmark 1301bd670b35SErik Nordmark if (ire1->ire_flags & RTF_MULTIRT) 1302bd670b35SErik Nordmark break; 1303bd670b35SErik Nordmark 1304bd670b35SErik Nordmark /* 1305bd670b35SErik Nordmark * For IPMP we only send for the ipmp_ill. arp_nce_init() will 1306bd670b35SErik Nordmark * ensure that this goes out on the cast_ill. 1307bd670b35SErik Nordmark */ 1308bd670b35SErik Nordmark if (IS_UNDER_IPMP(ire1->ire_ill)) 1309bd670b35SErik Nordmark continue; 1310bd670b35SErik Nordmark 1311bd670b35SErik Nordmark mp1 = copymsg(mp); 1312bd670b35SErik Nordmark if (mp1 == NULL) { 1313bd670b35SErik Nordmark BUMP_MIB(ire1->ire_ill->ill_ip_mib, 1314bd670b35SErik Nordmark ipIfStatsOutDiscards); 1315bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", 1316bd670b35SErik Nordmark mp, ire1->ire_ill); 1317bd670b35SErik Nordmark continue; 1318bd670b35SErik Nordmark } 1319bd670b35SErik Nordmark 1320bd670b35SErik Nordmark ipha1 = (ipha_t *)mp1->b_rptr; 1321bd670b35SErik Nordmark if (ixa->ixa_flags & IXAF_SET_SOURCE) { 1322bd670b35SErik Nordmark /* 1323bd670b35SErik Nordmark * Need to pick a different source address for each 1324bd670b35SErik Nordmark * interface. If we have a global IPsec policy and 1325bd670b35SErik Nordmark * no per-socket policy then we punt to 1326bd670b35SErik Nordmark * ip_output_simple_v4 using a separate ip_xmit_attr_t. 1327bd670b35SErik Nordmark */ 1328bd670b35SErik Nordmark if (ixaflags & IXAF_IPSEC_GLOBAL_POLICY) { 1329bd670b35SErik Nordmark ip_output_simple_broadcast(ixa, mp1); 1330bd670b35SErik Nordmark continue; 1331bd670b35SErik Nordmark } 1332bd670b35SErik Nordmark /* Pick a new source address for each interface */ 1333bd670b35SErik Nordmark if (ip_select_source_v4(ire1->ire_ill, INADDR_ANY, 1334bd670b35SErik Nordmark ipha1->ipha_dst, INADDR_ANY, ixa->ixa_zoneid, ipst, 1335bd670b35SErik Nordmark &ipha1->ipha_src, NULL, NULL) != 0) { 1336bd670b35SErik Nordmark BUMP_MIB(ire1->ire_ill->ill_ip_mib, 1337bd670b35SErik Nordmark ipIfStatsOutDiscards); 1338bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - select " 1339bd670b35SErik Nordmark "broadcast source", mp1, ire1->ire_ill); 1340bd670b35SErik Nordmark freemsg(mp1); 1341bd670b35SErik Nordmark continue; 1342bd670b35SErik Nordmark } 1343bd670b35SErik Nordmark /* 1344bd670b35SErik Nordmark * Check against global IPsec policy to set the AH/ESP 1345bd670b35SErik Nordmark * attributes. IPsec will set IXAF_IPSEC_* and 1346bd670b35SErik Nordmark * ixa_ipsec_* as appropriate. 1347bd670b35SErik Nordmark */ 1348bd670b35SErik Nordmark if (!(ixaflags & (IXAF_NO_IPSEC|IXAF_IPSEC_SECURE))) { 1349bd670b35SErik Nordmark ASSERT(ixa->ixa_ipsec_policy == NULL); 1350bd670b35SErik Nordmark mp1 = ip_output_attach_policy(mp1, ipha, NULL, 1351bd670b35SErik Nordmark NULL, ixa); 1352bd670b35SErik Nordmark if (mp1 == NULL) { 1353bd670b35SErik Nordmark /* 1354bd670b35SErik Nordmark * MIB and ip_drop_packet already 1355bd670b35SErik Nordmark * done 1356bd670b35SErik Nordmark */ 1357bd670b35SErik Nordmark continue; 1358bd670b35SErik Nordmark } 1359bd670b35SErik Nordmark } 1360bd670b35SErik Nordmark } 1361bd670b35SErik Nordmark /* Make sure we have an NCE on this ill */ 1362bd670b35SErik Nordmark nce1 = arp_nce_init(ire1->ire_ill, ire1->ire_addr, 1363bd670b35SErik Nordmark ire1->ire_type); 1364bd670b35SErik Nordmark if (nce1 == NULL) { 1365bd670b35SErik Nordmark BUMP_MIB(ire1->ire_ill->ill_ip_mib, 1366bd670b35SErik Nordmark ipIfStatsOutDiscards); 1367bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - broadcast nce", 1368bd670b35SErik Nordmark mp1, ire1->ire_ill); 1369bd670b35SErik Nordmark freemsg(mp1); 1370bd670b35SErik Nordmark continue; 1371bd670b35SErik Nordmark } 1372bd670b35SErik Nordmark nce_orig = ixa->ixa_nce; 1373bd670b35SErik Nordmark ixa->ixa_nce = nce1; 1374bd670b35SErik Nordmark 1375bd670b35SErik Nordmark ire_refhold(ire1); 1376bd670b35SErik Nordmark /* 1377bd670b35SErik Nordmark * Ignore any errors here. We just collect the errno for 1378bd670b35SErik Nordmark * the main ire below 1379bd670b35SErik Nordmark */ 1380bd670b35SErik Nordmark (void) ire_send_wire_v4(ire1, mp1, ipha1, ixa, identp); 1381bd670b35SErik Nordmark ire_refrele(ire1); 1382bd670b35SErik Nordmark 1383bd670b35SErik Nordmark ixa->ixa_nce = nce_orig; 1384bd670b35SErik Nordmark nce_refrele(nce1); 1385bd670b35SErik Nordmark 1386bd670b35SErik Nordmark ixa->ixa_flags &= ~IXAF_LOOPBACK_COPY; 1387bd670b35SErik Nordmark } 1388bd670b35SErik Nordmark irb_refrele(irb); 1389bd670b35SErik Nordmark /* Finally, the main one */ 1390bd670b35SErik Nordmark 1391bd670b35SErik Nordmark /* 1392bd670b35SErik Nordmark * For IPMP we only send broadcasts on the ipmp_ill. 1393bd670b35SErik Nordmark */ 1394bd670b35SErik Nordmark if (IS_UNDER_IPMP(ire->ire_ill)) { 1395bd670b35SErik Nordmark freemsg(mp); 1396bd670b35SErik Nordmark return (0); 1397bd670b35SErik Nordmark } 1398bd670b35SErik Nordmark 1399bd670b35SErik Nordmark return (ire_send_wire_v4(ire, mp, ipha, ixa, identp)); 1400bd670b35SErik Nordmark } 1401bd670b35SErik Nordmark 1402bd670b35SErik Nordmark /* 1403bd670b35SErik Nordmark * Send a packet using a different source address and different 1404bd670b35SErik Nordmark * IPsec policy. 1405bd670b35SErik Nordmark */ 1406bd670b35SErik Nordmark static void 1407bd670b35SErik Nordmark ip_output_simple_broadcast(ip_xmit_attr_t *ixa, mblk_t *mp) 1408bd670b35SErik Nordmark { 1409bd670b35SErik Nordmark ip_xmit_attr_t ixas; 1410bd670b35SErik Nordmark 1411bd670b35SErik Nordmark bzero(&ixas, sizeof (ixas)); 1412bd670b35SErik Nordmark ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 1413bd670b35SErik Nordmark ixas.ixa_zoneid = ixa->ixa_zoneid; 1414bd670b35SErik Nordmark ixas.ixa_ifindex = 0; 1415bd670b35SErik Nordmark ixas.ixa_ipst = ixa->ixa_ipst; 1416bd670b35SErik Nordmark ixas.ixa_cred = ixa->ixa_cred; 1417bd670b35SErik Nordmark ixas.ixa_cpid = ixa->ixa_cpid; 1418bd670b35SErik Nordmark ixas.ixa_tsl = ixa->ixa_tsl; 1419bd670b35SErik Nordmark ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1420bd670b35SErik Nordmark 1421bd670b35SErik Nordmark (void) ip_output_simple(mp, &ixas); 1422bd670b35SErik Nordmark ixa_cleanup(&ixas); 1423bd670b35SErik Nordmark } 1424bd670b35SErik Nordmark 1425bd670b35SErik Nordmark 1426bd670b35SErik Nordmark static void 1427bd670b35SErik Nordmark multirt_check_v4(ire_t *ire, ipha_t *ipha, ip_xmit_attr_t *ixa) 1428bd670b35SErik Nordmark { 1429bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 1430bd670b35SErik Nordmark 1431bd670b35SErik Nordmark /* Limit the TTL on multirt packets */ 1432bd670b35SErik Nordmark if (ire->ire_type & IRE_MULTICAST) { 1433bd670b35SErik Nordmark if (ipha->ipha_ttl > 1) { 1434bd670b35SErik Nordmark ip2dbg(("ire_send_multirt_v4: forcing multicast " 1435bd670b35SErik Nordmark "multirt TTL to 1 (was %d), dst 0x%08x\n", 1436bd670b35SErik Nordmark ipha->ipha_ttl, ntohl(ire->ire_addr))); 1437bd670b35SErik Nordmark ipha->ipha_ttl = 1; 1438bd670b35SErik Nordmark } 1439bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_NO_TTL_CHANGE; 1440bd670b35SErik Nordmark } else if ((ipst->ips_ip_multirt_ttl > 0) && 1441bd670b35SErik Nordmark (ipha->ipha_ttl > ipst->ips_ip_multirt_ttl)) { 1442bd670b35SErik Nordmark ipha->ipha_ttl = ipst->ips_ip_multirt_ttl; 1443bd670b35SErik Nordmark /* 1444bd670b35SErik Nordmark * Need to ensure we don't increase the ttl should we go through 1445bd670b35SErik Nordmark * ire_send_broadcast or multicast. 1446bd670b35SErik Nordmark */ 1447bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_NO_TTL_CHANGE; 1448bd670b35SErik Nordmark } 1449bd670b35SErik Nordmark } 1450bd670b35SErik Nordmark 1451bd670b35SErik Nordmark /* 1452bd670b35SErik Nordmark * ire_sendfn for IRE_MULTICAST 1453bd670b35SErik Nordmark */ 1454bd670b35SErik Nordmark int 1455bd670b35SErik Nordmark ire_send_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1456bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 1457bd670b35SErik Nordmark { 1458bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1459bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 1460bd670b35SErik Nordmark ill_t *ill = ire->ire_ill; 1461bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 1462bd670b35SErik Nordmark 1463bd670b35SErik Nordmark /* 1464bd670b35SErik Nordmark * The IRE_MULTICAST is the same whether or not multirt is in use. 1465bd670b35SErik Nordmark * Hence we need special-case code. 1466bd670b35SErik Nordmark */ 1467bd670b35SErik Nordmark if (ixaflags & IXAF_MULTIRT_MULTICAST) 1468bd670b35SErik Nordmark multirt_check_v4(ire, ipha, ixa); 1469bd670b35SErik Nordmark 1470bd670b35SErik Nordmark /* 1471bd670b35SErik Nordmark * Check if anything in ip_input_v4 wants a copy of the transmitted 1472bd670b35SErik Nordmark * packet (after IPsec and fragmentation) 1473bd670b35SErik Nordmark * 1474bd670b35SErik Nordmark * 1. Multicast routers always need a copy unless SO_DONTROUTE is set 1475bd670b35SErik Nordmark * RSVP and the rsvp daemon is an example of a 1476bd670b35SErik Nordmark * protocol and user level process that 1477bd670b35SErik Nordmark * handles it's own routing. Hence, it uses the 1478bd670b35SErik Nordmark * SO_DONTROUTE option to accomplish this. 1479bd670b35SErik Nordmark * 2. If the sender has set IP_MULTICAST_LOOP, then we just 1480bd670b35SErik Nordmark * check whether there are any receivers for the group on the ill 1481bd670b35SErik Nordmark * (ignoring the zoneid). 1482bd670b35SErik Nordmark * 3. If IP_MULTICAST_LOOP is not set, then we check if there are 1483bd670b35SErik Nordmark * any members in other shared-IP zones. 1484bd670b35SErik Nordmark * If such members exist, then we indicate that the sending zone 1485bd670b35SErik Nordmark * shouldn't get a loopback copy to preserve the IP_MULTICAST_LOOP 1486bd670b35SErik Nordmark * behavior. 1487bd670b35SErik Nordmark * 1488bd670b35SErik Nordmark * When we loopback we skip hardware checksum to make sure loopback 1489bd670b35SErik Nordmark * copy is checksumed. 1490bd670b35SErik Nordmark * 1491bd670b35SErik Nordmark * Note that ire_ill is the upper in the case of IPMP. 1492bd670b35SErik Nordmark */ 1493bd670b35SErik Nordmark ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM); 1494bd670b35SErik Nordmark if (ipst->ips_ip_g_mrouter && ill->ill_mrouter_cnt > 0 && 1495bd670b35SErik Nordmark !(ixaflags & IXAF_DONTROUTE)) { 1496bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 1497bd670b35SErik Nordmark } else if (ixaflags & IXAF_MULTICAST_LOOP) { 1498bd670b35SErik Nordmark /* 1499bd670b35SErik Nordmark * If this zone or any other zone has members then loopback 1500bd670b35SErik Nordmark * a copy. 1501bd670b35SErik Nordmark */ 1502bd670b35SErik Nordmark if (ill_hasmembers_v4(ill, ipha->ipha_dst)) 1503bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 1504bd670b35SErik Nordmark } else if (ipst->ips_netstack->netstack_numzones > 1) { 1505bd670b35SErik Nordmark /* 1506bd670b35SErik Nordmark * This zone should not have a copy. But there are some other 1507bd670b35SErik Nordmark * zones which might have members. 1508bd670b35SErik Nordmark */ 1509bd670b35SErik Nordmark if (ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst, 1510bd670b35SErik Nordmark ixa->ixa_zoneid)) { 1511bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_NO_LOOP_ZONEID_SET; 1512bd670b35SErik Nordmark ixa->ixa_no_loop_zoneid = ixa->ixa_zoneid; 1513bd670b35SErik Nordmark ixa->ixa_flags |= IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM; 1514bd670b35SErik Nordmark } 1515bd670b35SErik Nordmark } 1516bd670b35SErik Nordmark 1517bd670b35SErik Nordmark /* 1518bd670b35SErik Nordmark * Unless ire_send_multirt_v4 or icmp_output_hdrincl already set a ttl, 1519bd670b35SErik Nordmark * force the ttl to the IP_MULTICAST_TTL value 1520bd670b35SErik Nordmark */ 1521bd670b35SErik Nordmark if (!(ixaflags & IXAF_NO_TTL_CHANGE)) { 1522bd670b35SErik Nordmark ipha->ipha_ttl = ixa->ixa_multicast_ttl; 1523bd670b35SErik Nordmark } 1524bd670b35SErik Nordmark 1525bd670b35SErik Nordmark return (ire_send_wire_v4(ire, mp, ipha, ixa, identp)); 1526bd670b35SErik Nordmark } 1527bd670b35SErik Nordmark 1528bd670b35SErik Nordmark /* 1529bd670b35SErik Nordmark * ire_sendfn for IREs with RTF_MULTIRT 1530bd670b35SErik Nordmark */ 1531bd670b35SErik Nordmark int 1532bd670b35SErik Nordmark ire_send_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1533bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 1534bd670b35SErik Nordmark { 1535bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1536bd670b35SErik Nordmark 1537bd670b35SErik Nordmark multirt_check_v4(ire, ipha, ixa); 1538bd670b35SErik Nordmark 1539bd670b35SErik Nordmark if (ire->ire_type & IRE_MULTICAST) 1540bd670b35SErik Nordmark return (ire_send_multicast_v4(ire, mp, ipha, ixa, identp)); 1541bd670b35SErik Nordmark else if (ire->ire_type & IRE_BROADCAST) 1542bd670b35SErik Nordmark return (ire_send_broadcast_v4(ire, mp, ipha, ixa, identp)); 1543bd670b35SErik Nordmark else 1544bd670b35SErik Nordmark return (ire_send_wire_v4(ire, mp, ipha, ixa, identp)); 1545bd670b35SErik Nordmark } 1546bd670b35SErik Nordmark 1547bd670b35SErik Nordmark /* 1548bd670b35SErik Nordmark * ire_sendfn for IREs with RTF_REJECT/RTF_BLACKHOLE, including IRE_NOROUTE 1549bd670b35SErik Nordmark */ 1550bd670b35SErik Nordmark int 1551bd670b35SErik Nordmark ire_send_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1552bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 1553bd670b35SErik Nordmark { 1554bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 1555bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1556bd670b35SErik Nordmark ill_t *ill; 1557bd670b35SErik Nordmark ip_recv_attr_t iras; 1558bd670b35SErik Nordmark boolean_t dummy; 1559bd670b35SErik Nordmark 1560bd670b35SErik Nordmark /* We assign an IP ident for nice errors */ 15611a5e258fSJosef 'Jeff' Sipek ipha->ipha_ident = atomic_inc_32_nv(identp); 1562bd670b35SErik Nordmark 1563bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutNoRoutes); 1564bd670b35SErik Nordmark 1565bd670b35SErik Nordmark if (ire->ire_type & IRE_NOROUTE) { 1566bd670b35SErik Nordmark /* A lack of a route as opposed to RTF_REJECT|BLACKHOLE */ 1567bd670b35SErik Nordmark ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, 1568bd670b35SErik Nordmark RTA_DST, ipst); 1569bd670b35SErik Nordmark } 1570bd670b35SErik Nordmark 1571bd670b35SErik Nordmark if (ire->ire_flags & RTF_BLACKHOLE) { 1572bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutNoRoutes RTF_BLACKHOLE", mp, NULL); 1573bd670b35SErik Nordmark freemsg(mp); 1574bd670b35SErik Nordmark /* No error even for local senders - silent blackhole */ 1575bd670b35SErik Nordmark return (0); 1576bd670b35SErik Nordmark } 1577bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutNoRoutes RTF_REJECT", mp, NULL); 1578bd670b35SErik Nordmark 1579bd670b35SErik Nordmark /* 1580bd670b35SErik Nordmark * We need an ill_t for the ip_recv_attr_t even though this packet 1581bd670b35SErik Nordmark * was never received and icmp_unreachable doesn't currently use 1582bd670b35SErik Nordmark * ira_ill. 1583bd670b35SErik Nordmark */ 1584bd670b35SErik Nordmark ill = ill_lookup_on_name("lo0", B_FALSE, 1585bd670b35SErik Nordmark !(ixa->ixa_flags & IRAF_IS_IPV4), &dummy, ipst); 1586bd670b35SErik Nordmark if (ill == NULL) { 1587bd670b35SErik Nordmark freemsg(mp); 1588bd670b35SErik Nordmark return (EHOSTUNREACH); 1589bd670b35SErik Nordmark } 1590bd670b35SErik Nordmark 1591bd670b35SErik Nordmark bzero(&iras, sizeof (iras)); 1592bd670b35SErik Nordmark /* Map ixa to ira including IPsec policies */ 1593bd670b35SErik Nordmark ipsec_out_to_in(ixa, ill, &iras); 1594bd670b35SErik Nordmark 1595bd670b35SErik Nordmark if (ip_source_routed(ipha, ipst)) { 1596bd670b35SErik Nordmark icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, &iras); 1597bd670b35SErik Nordmark } else { 1598bd670b35SErik Nordmark icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, &iras); 1599bd670b35SErik Nordmark } 1600bd670b35SErik Nordmark /* We moved any IPsec refs from ixa to iras */ 1601bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 1602bd670b35SErik Nordmark ill_refrele(ill); 1603bd670b35SErik Nordmark return (EHOSTUNREACH); 1604bd670b35SErik Nordmark } 1605bd670b35SErik Nordmark 1606bd670b35SErik Nordmark /* 1607bd670b35SErik Nordmark * Calculate a checksum ignoring any hardware capabilities 1608bd670b35SErik Nordmark * 1609bd670b35SErik Nordmark * Returns B_FALSE if the packet was too short for the checksum. Caller 1610bd670b35SErik Nordmark * should free and do stats. 1611bd670b35SErik Nordmark */ 1612bd670b35SErik Nordmark static boolean_t 1613bd670b35SErik Nordmark ip_output_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_xmit_attr_t *ixa) 1614bd670b35SErik Nordmark { 1615bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 1616bd670b35SErik Nordmark uint_t pktlen = ixa->ixa_pktlen; 1617bd670b35SErik Nordmark uint16_t *cksump; 1618bd670b35SErik Nordmark uint32_t cksum; 1619bd670b35SErik Nordmark uint8_t protocol = ixa->ixa_protocol; 1620bd670b35SErik Nordmark uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length; 1621bd670b35SErik Nordmark ipaddr_t dst = ipha->ipha_dst; 1622bd670b35SErik Nordmark ipaddr_t src = ipha->ipha_src; 1623bd670b35SErik Nordmark 1624bd670b35SErik Nordmark /* Just in case it contained garbage */ 1625bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS; 1626bd670b35SErik Nordmark 1627bd670b35SErik Nordmark /* 1628bd670b35SErik Nordmark * Calculate ULP checksum 1629bd670b35SErik Nordmark */ 1630bd670b35SErik Nordmark if (protocol == IPPROTO_TCP) { 1631bd670b35SErik Nordmark cksump = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_length); 1632bd670b35SErik Nordmark cksum = IP_TCP_CSUM_COMP; 1633bd670b35SErik Nordmark } else if (protocol == IPPROTO_UDP) { 1634bd670b35SErik Nordmark cksump = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_length); 1635bd670b35SErik Nordmark cksum = IP_UDP_CSUM_COMP; 1636bd670b35SErik Nordmark } else if (protocol == IPPROTO_SCTP) { 1637bd670b35SErik Nordmark sctp_hdr_t *sctph; 1638bd670b35SErik Nordmark 1639bd670b35SErik Nordmark ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph))); 1640bd670b35SErik Nordmark sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length); 1641bd670b35SErik Nordmark /* 1642bd670b35SErik Nordmark * Zero out the checksum field to ensure proper 1643bd670b35SErik Nordmark * checksum calculation. 1644bd670b35SErik Nordmark */ 1645bd670b35SErik Nordmark sctph->sh_chksum = 0; 1646bd670b35SErik Nordmark #ifdef DEBUG 1647bd670b35SErik Nordmark if (!skip_sctp_cksum) 1648bd670b35SErik Nordmark #endif 1649bd670b35SErik Nordmark sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length); 1650bd670b35SErik Nordmark goto ip_hdr_cksum; 1651bd670b35SErik Nordmark } else { 1652bd670b35SErik Nordmark goto ip_hdr_cksum; 1653bd670b35SErik Nordmark } 1654bd670b35SErik Nordmark 1655bd670b35SErik Nordmark /* ULP puts the checksum field is in the first mblk */ 1656bd670b35SErik Nordmark ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr); 1657bd670b35SErik Nordmark 1658bd670b35SErik Nordmark /* 1659bd670b35SErik Nordmark * We accumulate the pseudo header checksum in cksum. 1660bd670b35SErik Nordmark * This is pretty hairy code, so watch close. One 1661bd670b35SErik Nordmark * thing to keep in mind is that UDP and TCP have 1662bd670b35SErik Nordmark * stored their respective datagram lengths in their 1663bd670b35SErik Nordmark * checksum fields. This lines things up real nice. 1664bd670b35SErik Nordmark */ 1665bd670b35SErik Nordmark cksum += (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 1666bd670b35SErik Nordmark 1667bd670b35SErik Nordmark cksum = IP_CSUM(mp, ip_hdr_length, cksum); 1668bd670b35SErik Nordmark /* 1669bd670b35SErik Nordmark * For UDP/IPv4 a zero means that the packets wasn't checksummed. 1670bd670b35SErik Nordmark * Change to 0xffff 1671bd670b35SErik Nordmark */ 1672bd670b35SErik Nordmark if (protocol == IPPROTO_UDP && cksum == 0) 1673bd670b35SErik Nordmark *cksump = ~cksum; 1674bd670b35SErik Nordmark else 1675bd670b35SErik Nordmark *cksump = cksum; 1676bd670b35SErik Nordmark 1677bd670b35SErik Nordmark IP_STAT(ipst, ip_out_sw_cksum); 1678bd670b35SErik Nordmark IP_STAT_UPDATE(ipst, ip_out_sw_cksum_bytes, pktlen); 1679bd670b35SErik Nordmark 1680bd670b35SErik Nordmark ip_hdr_cksum: 1681bd670b35SErik Nordmark /* Calculate IPv4 header checksum */ 1682bd670b35SErik Nordmark ipha->ipha_hdr_checksum = 0; 1683bd670b35SErik Nordmark ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1684bd670b35SErik Nordmark return (B_TRUE); 1685bd670b35SErik Nordmark } 1686bd670b35SErik Nordmark 1687bd670b35SErik Nordmark /* 1688bd670b35SErik Nordmark * Calculate the ULP checksum - try to use hardware. 1689bd670b35SErik Nordmark * In the case of MULTIRT, broadcast or multicast the 1690bd670b35SErik Nordmark * IXAF_NO_HW_CKSUM is set in which case we use software. 1691bd670b35SErik Nordmark * 1692bd670b35SErik Nordmark * If the hardware supports IP header checksum offload; then clear the 1693bd670b35SErik Nordmark * contents of IP header checksum field as expected by NIC. 1694bd670b35SErik Nordmark * Do this only if we offloaded either full or partial sum. 1695bd670b35SErik Nordmark * 1696bd670b35SErik Nordmark * Returns B_FALSE if the packet was too short for the checksum. Caller 1697bd670b35SErik Nordmark * should free and do stats. 1698bd670b35SErik Nordmark */ 1699bd670b35SErik Nordmark static boolean_t 1700bd670b35SErik Nordmark ip_output_cksum_v4(iaflags_t ixaflags, mblk_t *mp, ipha_t *ipha, 1701bd670b35SErik Nordmark ip_xmit_attr_t *ixa, ill_t *ill) 1702bd670b35SErik Nordmark { 1703bd670b35SErik Nordmark uint_t pktlen = ixa->ixa_pktlen; 1704bd670b35SErik Nordmark uint16_t *cksump; 1705bd670b35SErik Nordmark uint16_t hck_flags; 1706bd670b35SErik Nordmark uint32_t cksum; 1707bd670b35SErik Nordmark uint8_t protocol = ixa->ixa_protocol; 1708bd670b35SErik Nordmark uint16_t ip_hdr_length = ixa->ixa_ip_hdr_length; 1709bd670b35SErik Nordmark 1710bd670b35SErik Nordmark if ((ixaflags & IXAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 1711bd670b35SErik Nordmark !dohwcksum) { 1712bd670b35SErik Nordmark return (ip_output_sw_cksum_v4(mp, ipha, ixa)); 1713bd670b35SErik Nordmark } 1714bd670b35SErik Nordmark 1715bd670b35SErik Nordmark /* 1716bd670b35SErik Nordmark * Calculate ULP checksum. Note that we don't use cksump and cksum 1717bd670b35SErik Nordmark * if the ill has FULL support. 1718bd670b35SErik Nordmark */ 1719bd670b35SErik Nordmark if (protocol == IPPROTO_TCP) { 1720bd670b35SErik Nordmark cksump = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_length); 1721bd670b35SErik Nordmark cksum = IP_TCP_CSUM_COMP; /* Pseudo-header cksum */ 1722bd670b35SErik Nordmark } else if (protocol == IPPROTO_UDP) { 1723bd670b35SErik Nordmark cksump = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_length); 1724bd670b35SErik Nordmark cksum = IP_UDP_CSUM_COMP; /* Pseudo-header cksum */ 1725bd670b35SErik Nordmark } else if (protocol == IPPROTO_SCTP) { 1726bd670b35SErik Nordmark sctp_hdr_t *sctph; 1727bd670b35SErik Nordmark 1728bd670b35SErik Nordmark ASSERT(MBLKL(mp) >= (ip_hdr_length + sizeof (*sctph))); 1729bd670b35SErik Nordmark sctph = (sctp_hdr_t *)(mp->b_rptr + ip_hdr_length); 1730bd670b35SErik Nordmark /* 1731bd670b35SErik Nordmark * Zero out the checksum field to ensure proper 1732bd670b35SErik Nordmark * checksum calculation. 1733bd670b35SErik Nordmark */ 1734bd670b35SErik Nordmark sctph->sh_chksum = 0; 1735bd670b35SErik Nordmark #ifdef DEBUG 1736bd670b35SErik Nordmark if (!skip_sctp_cksum) 1737bd670b35SErik Nordmark #endif 1738bd670b35SErik Nordmark sctph->sh_chksum = sctp_cksum(mp, ip_hdr_length); 1739bd670b35SErik Nordmark goto ip_hdr_cksum; 1740bd670b35SErik Nordmark } else { 1741bd670b35SErik Nordmark ip_hdr_cksum: 1742bd670b35SErik Nordmark /* Calculate IPv4 header checksum */ 1743bd670b35SErik Nordmark ipha->ipha_hdr_checksum = 0; 1744bd670b35SErik Nordmark ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1745bd670b35SErik Nordmark return (B_TRUE); 1746bd670b35SErik Nordmark } 1747bd670b35SErik Nordmark 1748bd670b35SErik Nordmark /* ULP puts the checksum field is in the first mblk */ 1749bd670b35SErik Nordmark ASSERT(((uchar_t *)cksump) + sizeof (uint16_t) <= mp->b_wptr); 1750bd670b35SErik Nordmark 1751bd670b35SErik Nordmark /* 1752bd670b35SErik Nordmark * Underlying interface supports hardware checksum offload for 1753bd670b35SErik Nordmark * the payload; leave the payload checksum for the hardware to 1754bd670b35SErik Nordmark * calculate. N.B: We only need to set up checksum info on the 1755bd670b35SErik Nordmark * first mblk. 1756bd670b35SErik Nordmark */ 1757bd670b35SErik Nordmark hck_flags = ill->ill_hcksum_capab->ill_hcksum_txflags; 1758bd670b35SErik Nordmark 1759bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) &= ~HCK_FLAGS; 1760bd670b35SErik Nordmark if (hck_flags & HCKSUM_INET_FULL_V4) { 1761bd670b35SErik Nordmark /* 1762bd670b35SErik Nordmark * Hardware calculates pseudo-header, header and the 1763bd670b35SErik Nordmark * payload checksums, so clear the checksum field in 1764bd670b35SErik Nordmark * the protocol header. 1765bd670b35SErik Nordmark */ 1766bd670b35SErik Nordmark *cksump = 0; 1767bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM; 1768bd670b35SErik Nordmark 1769bd670b35SErik Nordmark ipha->ipha_hdr_checksum = 0; 1770bd670b35SErik Nordmark if (hck_flags & HCKSUM_IPHDRCKSUM) { 1771bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; 1772bd670b35SErik Nordmark } else { 1773bd670b35SErik Nordmark ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1774bd670b35SErik Nordmark } 1775bd670b35SErik Nordmark return (B_TRUE); 1776bd670b35SErik Nordmark } 1777bd670b35SErik Nordmark if ((hck_flags) & HCKSUM_INET_PARTIAL) { 1778bd670b35SErik Nordmark ipaddr_t dst = ipha->ipha_dst; 1779bd670b35SErik Nordmark ipaddr_t src = ipha->ipha_src; 1780bd670b35SErik Nordmark /* 1781bd670b35SErik Nordmark * Partial checksum offload has been enabled. Fill 1782bd670b35SErik Nordmark * the checksum field in the protocol header with the 1783bd670b35SErik Nordmark * pseudo-header checksum value. 1784bd670b35SErik Nordmark * 1785bd670b35SErik Nordmark * We accumulate the pseudo header checksum in cksum. 1786bd670b35SErik Nordmark * This is pretty hairy code, so watch close. One 1787bd670b35SErik Nordmark * thing to keep in mind is that UDP and TCP have 1788bd670b35SErik Nordmark * stored their respective datagram lengths in their 1789bd670b35SErik Nordmark * checksum fields. This lines things up real nice. 1790bd670b35SErik Nordmark */ 1791bd670b35SErik Nordmark cksum += (dst >> 16) + (dst & 0xFFFF) + 1792bd670b35SErik Nordmark (src >> 16) + (src & 0xFFFF); 1793bd670b35SErik Nordmark cksum += *(cksump); 1794bd670b35SErik Nordmark cksum = (cksum & 0xFFFF) + (cksum >> 16); 1795bd670b35SErik Nordmark *(cksump) = (cksum & 0xFFFF) + (cksum >> 16); 1796bd670b35SErik Nordmark 1797bd670b35SErik Nordmark /* 1798bd670b35SErik Nordmark * Offsets are relative to beginning of IP header. 1799bd670b35SErik Nordmark */ 1800bd670b35SErik Nordmark DB_CKSUMSTART(mp) = ip_hdr_length; 1801bd670b35SErik Nordmark DB_CKSUMSTUFF(mp) = (uint8_t *)cksump - (uint8_t *)ipha; 1802bd670b35SErik Nordmark DB_CKSUMEND(mp) = pktlen; 1803bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) |= HCK_PARTIALCKSUM; 1804bd670b35SErik Nordmark 1805bd670b35SErik Nordmark ipha->ipha_hdr_checksum = 0; 1806bd670b35SErik Nordmark if (hck_flags & HCKSUM_IPHDRCKSUM) { 1807bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM; 1808bd670b35SErik Nordmark } else { 1809bd670b35SErik Nordmark ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1810bd670b35SErik Nordmark } 1811bd670b35SErik Nordmark return (B_TRUE); 1812bd670b35SErik Nordmark } 1813bd670b35SErik Nordmark /* Hardware capabilities include neither full nor partial IPv4 */ 1814bd670b35SErik Nordmark return (ip_output_sw_cksum_v4(mp, ipha, ixa)); 1815bd670b35SErik Nordmark } 1816bd670b35SErik Nordmark 1817bd670b35SErik Nordmark /* 1818bd670b35SErik Nordmark * ire_sendfn for offlink and onlink destinations. 1819bd670b35SErik Nordmark * Also called from the multicast, broadcast, multirt send functions. 1820bd670b35SErik Nordmark * 1821bd670b35SErik Nordmark * Assumes that the caller has a hold on the ire. 1822bd670b35SErik Nordmark * 1823bd670b35SErik Nordmark * This function doesn't care if the IRE just became condemned since that 1824bd670b35SErik Nordmark * can happen at any time. 1825bd670b35SErik Nordmark */ 1826bd670b35SErik Nordmark /* ARGSUSED */ 1827bd670b35SErik Nordmark int 1828bd670b35SErik Nordmark ire_send_wire_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1829bd670b35SErik Nordmark ip_xmit_attr_t *ixa, uint32_t *identp) 1830bd670b35SErik Nordmark { 1831bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 1832bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1833bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 1834bd670b35SErik Nordmark ill_t *ill; 1835bd670b35SErik Nordmark 1836bd670b35SErik Nordmark ASSERT(ixa->ixa_nce != NULL); 1837bd670b35SErik Nordmark ill = ixa->ixa_nce->nce_ill; 1838bd670b35SErik Nordmark 1839*d3c3f6c7SDavid Hanisch /* 1840*d3c3f6c7SDavid Hanisch * This package comes from ipf, we have already been here once and 1841*d3c3f6c7SDavid Hanisch * all work is already done. Go to send directly. Especially 1842*d3c3f6c7SDavid Hanisch * - don't change ip header (ipha_ident, ipha_ttl), it's already set, 1843*d3c3f6c7SDavid Hanisch * and in case of no cksum offload, the cksum would become invalid 1844*d3c3f6c7SDavid Hanisch * - don't touch cksums, they are already prepared 1845*d3c3f6c7SDavid Hanisch * - don't check for fragmentation, ixa_fragsize for LSO is lost 1846*d3c3f6c7SDavid Hanisch * on the way and the check would fail in case of LSO 1847*d3c3f6c7SDavid Hanisch */ 1848*d3c3f6c7SDavid Hanisch if (ixaflags & IXAF_NO_PFHOOK) 1849*d3c3f6c7SDavid Hanisch goto sendit; 1850*d3c3f6c7SDavid Hanisch 1851bd670b35SErik Nordmark if (ixaflags & IXAF_DONTROUTE) 1852bd670b35SErik Nordmark ipha->ipha_ttl = 1; 1853bd670b35SErik Nordmark 1854bd670b35SErik Nordmark /* 1855bd670b35SErik Nordmark * Assign an ident value for this packet. There could be other 1856bd670b35SErik Nordmark * threads targeting the same destination, so we have to arrange 1857bd670b35SErik Nordmark * for a atomic increment. Note that we use a 32-bit atomic add 1858bd670b35SErik Nordmark * because it has better performance than its 16-bit sibling. 1859bd670b35SErik Nordmark * 1860bd670b35SErik Nordmark * Normally ixa_extra_ident is 0, but in the case of LSO it will 1861bd670b35SErik Nordmark * be the number of TCP segments that the driver/hardware will 1862bd670b35SErik Nordmark * extraly construct. 1863bd670b35SErik Nordmark * 1864bd670b35SErik Nordmark * If running in cluster mode and if the source address 1865bd670b35SErik Nordmark * belongs to a replicated service then vector through 1866bd670b35SErik Nordmark * cl_inet_ipident vector to allocate ip identifier 1867bd670b35SErik Nordmark * NOTE: This is a contract private interface with the 1868bd670b35SErik Nordmark * clustering group. 1869bd670b35SErik Nordmark */ 1870bd670b35SErik Nordmark if (cl_inet_ipident != NULL) { 1871bd670b35SErik Nordmark ipaddr_t src = ipha->ipha_src; 1872bd670b35SErik Nordmark ipaddr_t dst = ipha->ipha_dst; 1873bd670b35SErik Nordmark netstackid_t stack_id = ipst->ips_netstack->netstack_stackid; 1874bd670b35SErik Nordmark 1875bd670b35SErik Nordmark ASSERT(cl_inet_isclusterwide != NULL); 1876bd670b35SErik Nordmark if ((*cl_inet_isclusterwide)(stack_id, IPPROTO_IP, 1877bd670b35SErik Nordmark AF_INET, (uint8_t *)(uintptr_t)src, NULL)) { 1878bd670b35SErik Nordmark /* 1879bd670b35SErik Nordmark * Note: not correct with LSO since we can't allocate 1880bd670b35SErik Nordmark * ixa_extra_ident+1 consecutive values. 1881bd670b35SErik Nordmark */ 1882bd670b35SErik Nordmark ipha->ipha_ident = (*cl_inet_ipident)(stack_id, 1883bd670b35SErik Nordmark IPPROTO_IP, AF_INET, (uint8_t *)(uintptr_t)src, 1884bd670b35SErik Nordmark (uint8_t *)(uintptr_t)dst, NULL); 1885bd670b35SErik Nordmark } else { 1886bd670b35SErik Nordmark ipha->ipha_ident = atomic_add_32_nv(identp, 1887bd670b35SErik Nordmark ixa->ixa_extra_ident + 1); 1888bd670b35SErik Nordmark } 1889bd670b35SErik Nordmark } else { 1890bd670b35SErik Nordmark ipha->ipha_ident = atomic_add_32_nv(identp, 1891bd670b35SErik Nordmark ixa->ixa_extra_ident + 1); 1892bd670b35SErik Nordmark } 1893bd670b35SErik Nordmark #ifndef _BIG_ENDIAN 1894bd670b35SErik Nordmark ipha->ipha_ident = htons(ipha->ipha_ident); 1895bd670b35SErik Nordmark #endif 1896bd670b35SErik Nordmark 1897bd670b35SErik Nordmark /* 1898bd670b35SErik Nordmark * This might set b_band, thus the IPsec and fragmentation 1899bd670b35SErik Nordmark * code in IP ensures that b_band is updated in the first mblk. 1900bd670b35SErik Nordmark */ 1901bd670b35SErik Nordmark if (IPP_ENABLED(IPP_LOCAL_OUT, ipst)) { 1902bd670b35SErik Nordmark /* ip_process translates an IS_UNDER_IPMP */ 1903bd670b35SErik Nordmark mp = ip_process(IPP_LOCAL_OUT, mp, ill, ill); 1904bd670b35SErik Nordmark if (mp == NULL) { 1905bd670b35SErik Nordmark /* ip_drop_packet and MIB done */ 1906bd670b35SErik Nordmark return (0); /* Might just be delayed */ 1907bd670b35SErik Nordmark } 1908bd670b35SErik Nordmark } 1909bd670b35SErik Nordmark 1910bd670b35SErik Nordmark /* 1911bd670b35SErik Nordmark * Verify any IPv4 options. 1912bd670b35SErik Nordmark * 1913bd670b35SErik Nordmark * The presense of IP options also forces the network stack to 1914bd670b35SErik Nordmark * calculate the checksum in software. This is because: 1915bd670b35SErik Nordmark * 1916bd670b35SErik Nordmark * Wrap around: certain partial-checksum NICs (eri, ce) limit 1917bd670b35SErik Nordmark * the size of "start offset" width to 6-bit. This effectively 1918bd670b35SErik Nordmark * sets the largest value of the offset to 64-bytes, starting 1919bd670b35SErik Nordmark * from the MAC header. When the cumulative MAC and IP headers 1920bd670b35SErik Nordmark * exceed such limit, the offset will wrap around. This causes 1921bd670b35SErik Nordmark * the checksum to be calculated at the wrong place. 1922bd670b35SErik Nordmark * 1923bd670b35SErik Nordmark * IPv4 source routing: none of the full-checksum capable NICs 1924bd670b35SErik Nordmark * is capable of correctly handling the IPv4 source-routing 1925bd670b35SErik Nordmark * option for purposes of calculating the pseudo-header; the 1926bd670b35SErik Nordmark * actual destination is different from the destination in the 1927bd670b35SErik Nordmark * header which is that of the next-hop. (This case may not be 1928bd670b35SErik Nordmark * true for NICs which can parse IPv6 extension headers, but 1929bd670b35SErik Nordmark * we choose to simplify the implementation by not offloading 1930bd670b35SErik Nordmark * checksum when they are present.) 1931bd670b35SErik Nordmark */ 1932bd670b35SErik Nordmark if (!IS_SIMPLE_IPH(ipha)) { 1933bd670b35SErik Nordmark ixaflags = ixa->ixa_flags |= IXAF_NO_HW_CKSUM; 1934bd670b35SErik Nordmark /* An IS_UNDER_IPMP ill is ok here */ 1935bd670b35SErik Nordmark if (ip_output_options(mp, ipha, ixa, ill)) { 1936bd670b35SErik Nordmark /* Packet has been consumed and ICMP error sent */ 1937bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1938bd670b35SErik Nordmark return (EINVAL); 1939bd670b35SErik Nordmark } 1940bd670b35SErik Nordmark } 1941bd670b35SErik Nordmark 1942bd670b35SErik Nordmark /* 1943bd670b35SErik Nordmark * To handle IPsec/iptun's labeling needs we need to tag packets 1944bd670b35SErik Nordmark * while we still have ixa_tsl 1945bd670b35SErik Nordmark */ 1946bd670b35SErik Nordmark if (is_system_labeled() && ixa->ixa_tsl != NULL && 1947bd670b35SErik Nordmark (ill->ill_mactype == DL_6TO4 || ill->ill_mactype == DL_IPV4 || 1948bd670b35SErik Nordmark ill->ill_mactype == DL_IPV6)) { 1949bd670b35SErik Nordmark cred_t *newcr; 1950bd670b35SErik Nordmark 1951bd670b35SErik Nordmark newcr = copycred_from_tslabel(ixa->ixa_cred, ixa->ixa_tsl, 1952bd670b35SErik Nordmark KM_NOSLEEP); 1953bd670b35SErik Nordmark if (newcr == NULL) { 1954bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1955bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - newcr", 1956bd670b35SErik Nordmark mp, ill); 1957bd670b35SErik Nordmark freemsg(mp); 1958bd670b35SErik Nordmark return (ENOBUFS); 1959bd670b35SErik Nordmark } 1960bd670b35SErik Nordmark mblk_setcred(mp, newcr, NOPID); 1961bd670b35SErik Nordmark crfree(newcr); /* mblk_setcred did its own crhold */ 1962bd670b35SErik Nordmark } 1963bd670b35SErik Nordmark 1964bd670b35SErik Nordmark if (ixa->ixa_pktlen > ixa->ixa_fragsize || 1965bd670b35SErik Nordmark (ixaflags & IXAF_IPSEC_SECURE)) { 1966bd670b35SErik Nordmark uint32_t pktlen; 1967bd670b35SErik Nordmark 1968bd670b35SErik Nordmark pktlen = ixa->ixa_pktlen; 1969bd670b35SErik Nordmark if (ixaflags & IXAF_IPSEC_SECURE) 1970bd670b35SErik Nordmark pktlen += ipsec_out_extra_length(ixa); 1971bd670b35SErik Nordmark 1972bd670b35SErik Nordmark if (pktlen > IP_MAXPACKET) 1973bd670b35SErik Nordmark return (EMSGSIZE); 1974bd670b35SErik Nordmark 1975bd670b35SErik Nordmark if (ixaflags & IXAF_SET_ULP_CKSUM) { 1976bd670b35SErik Nordmark /* 1977bd670b35SErik Nordmark * Compute ULP checksum and IP header checksum 1978bd670b35SErik Nordmark * using software 1979bd670b35SErik Nordmark */ 1980bd670b35SErik Nordmark if (!ip_output_sw_cksum_v4(mp, ipha, ixa)) { 1981bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 1982bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill); 1983bd670b35SErik Nordmark freemsg(mp); 1984bd670b35SErik Nordmark return (EINVAL); 1985bd670b35SErik Nordmark } 1986bd670b35SErik Nordmark } else { 1987bd670b35SErik Nordmark /* Calculate IPv4 header checksum */ 1988bd670b35SErik Nordmark ipha->ipha_hdr_checksum = 0; 1989bd670b35SErik Nordmark ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1990bd670b35SErik Nordmark } 1991bd670b35SErik Nordmark 1992bd670b35SErik Nordmark /* 1993bd670b35SErik Nordmark * If this packet would generate a icmp_frag_needed 1994bd670b35SErik Nordmark * message, we need to handle it before we do the IPsec 1995bd670b35SErik Nordmark * processing. Otherwise, we need to strip the IPsec 1996bd670b35SErik Nordmark * headers before we send up the message to the ULPs 1997bd670b35SErik Nordmark * which becomes messy and difficult. 1998bd670b35SErik Nordmark * 1999bd670b35SErik Nordmark * We check using IXAF_DONTFRAG. The DF bit in the header 2000bd670b35SErik Nordmark * is not inspected - it will be copied to any generated 2001bd670b35SErik Nordmark * fragments. 2002bd670b35SErik Nordmark */ 2003bd670b35SErik Nordmark if ((pktlen > ixa->ixa_fragsize) && 2004bd670b35SErik Nordmark (ixaflags & IXAF_DONTFRAG)) { 2005bd670b35SErik Nordmark /* Generate ICMP and return error */ 2006bd670b35SErik Nordmark ip_recv_attr_t iras; 2007bd670b35SErik Nordmark 2008bd670b35SErik Nordmark DTRACE_PROBE4(ip4__fragsize__fail, uint_t, pktlen, 2009bd670b35SErik Nordmark uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen, 2010bd670b35SErik Nordmark uint_t, ixa->ixa_pmtu); 2011bd670b35SErik Nordmark 2012bd670b35SErik Nordmark bzero(&iras, sizeof (iras)); 2013bd670b35SErik Nordmark /* Map ixa to ira including IPsec policies */ 2014bd670b35SErik Nordmark ipsec_out_to_in(ixa, ill, &iras); 2015bd670b35SErik Nordmark 2016bd670b35SErik Nordmark ip_drop_output("ICMP_FRAG_NEEDED", mp, ill); 2017bd670b35SErik Nordmark icmp_frag_needed(mp, ixa->ixa_fragsize, &iras); 2018bd670b35SErik Nordmark /* We moved any IPsec refs from ixa to iras */ 2019bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 2020bd670b35SErik Nordmark return (EMSGSIZE); 2021bd670b35SErik Nordmark } 2022bd670b35SErik Nordmark DTRACE_PROBE4(ip4__fragsize__ok, uint_t, pktlen, 2023bd670b35SErik Nordmark uint_t, ixa->ixa_fragsize, uint_t, ixa->ixa_pktlen, 2024bd670b35SErik Nordmark uint_t, ixa->ixa_pmtu); 2025bd670b35SErik Nordmark 2026bd670b35SErik Nordmark if (ixaflags & IXAF_IPSEC_SECURE) { 2027bd670b35SErik Nordmark /* 2028bd670b35SErik Nordmark * Pass in sufficient information so that 2029bd670b35SErik Nordmark * IPsec can determine whether to fragment, and 2030bd670b35SErik Nordmark * which function to call after fragmentation. 2031bd670b35SErik Nordmark */ 2032bd670b35SErik Nordmark return (ipsec_out_process(mp, ixa)); 2033bd670b35SErik Nordmark } 2034bd670b35SErik Nordmark return (ip_fragment_v4(mp, ixa->ixa_nce, ixaflags, 2035bd670b35SErik Nordmark ixa->ixa_pktlen, ixa->ixa_fragsize, ixa->ixa_xmit_hint, 2036bd670b35SErik Nordmark ixa->ixa_zoneid, ixa->ixa_no_loop_zoneid, 2037bd670b35SErik Nordmark ixa->ixa_postfragfn, &ixa->ixa_cookie)); 2038bd670b35SErik Nordmark } 2039*d3c3f6c7SDavid Hanisch 2040bd670b35SErik Nordmark if (ixaflags & IXAF_SET_ULP_CKSUM) { 2041bd670b35SErik Nordmark /* Compute ULP checksum and IP header checksum */ 2042bd670b35SErik Nordmark /* An IS_UNDER_IPMP ill is ok here */ 2043bd670b35SErik Nordmark if (!ip_output_cksum_v4(ixaflags, mp, ipha, ixa, ill)) { 2044bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2045bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill); 2046bd670b35SErik Nordmark freemsg(mp); 2047bd670b35SErik Nordmark return (EINVAL); 2048bd670b35SErik Nordmark } 2049bd670b35SErik Nordmark } else { 2050bd670b35SErik Nordmark /* Calculate IPv4 header checksum */ 2051bd670b35SErik Nordmark ipha->ipha_hdr_checksum = 0; 2052bd670b35SErik Nordmark ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 2053bd670b35SErik Nordmark } 2054*d3c3f6c7SDavid Hanisch 2055*d3c3f6c7SDavid Hanisch sendit: 2056bd670b35SErik Nordmark return ((ixa->ixa_postfragfn)(mp, ixa->ixa_nce, ixaflags, 2057bd670b35SErik Nordmark ixa->ixa_pktlen, ixa->ixa_xmit_hint, ixa->ixa_zoneid, 2058bd670b35SErik Nordmark ixa->ixa_no_loop_zoneid, &ixa->ixa_cookie)); 2059bd670b35SErik Nordmark } 2060bd670b35SErik Nordmark 2061bd670b35SErik Nordmark /* 2062bd670b35SErik Nordmark * Send mp into ip_input 2063bd670b35SErik Nordmark * Common for IPv4 and IPv6 2064bd670b35SErik Nordmark */ 2065bd670b35SErik Nordmark void 2066bd670b35SErik Nordmark ip_postfrag_loopback(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, 2067bd670b35SErik Nordmark uint_t pkt_len, zoneid_t nolzid) 2068bd670b35SErik Nordmark { 2069bd670b35SErik Nordmark rtc_t rtc; 2070bd670b35SErik Nordmark ill_t *ill = nce->nce_ill; 2071bd670b35SErik Nordmark ip_recv_attr_t iras; /* NOTE: No bzero for performance */ 2072bd670b35SErik Nordmark ncec_t *ncec; 2073bd670b35SErik Nordmark 2074bd670b35SErik Nordmark ncec = nce->nce_common; 2075bd670b35SErik Nordmark iras.ira_flags = IRAF_VERIFY_IP_CKSUM | IRAF_VERIFY_ULP_CKSUM | 2076bd670b35SErik Nordmark IRAF_LOOPBACK | IRAF_L2SRC_LOOPBACK; 2077bd670b35SErik Nordmark if (ncec->ncec_flags & NCE_F_BCAST) 2078bd670b35SErik Nordmark iras.ira_flags |= IRAF_L2DST_BROADCAST; 2079bd670b35SErik Nordmark else if (ncec->ncec_flags & NCE_F_MCAST) 2080bd670b35SErik Nordmark iras.ira_flags |= IRAF_L2DST_MULTICAST; 2081bd670b35SErik Nordmark 2082bd670b35SErik Nordmark iras.ira_free_flags = 0; 2083bd670b35SErik Nordmark iras.ira_cred = NULL; 2084bd670b35SErik Nordmark iras.ira_cpid = NOPID; 2085bd670b35SErik Nordmark iras.ira_tsl = NULL; 2086bd670b35SErik Nordmark iras.ira_zoneid = ALL_ZONES; 2087bd670b35SErik Nordmark iras.ira_pktlen = pkt_len; 2088bd670b35SErik Nordmark UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, iras.ira_pktlen); 2089bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 2090bd670b35SErik Nordmark 2091bd670b35SErik Nordmark if (ixaflags & IXAF_IS_IPV4) 2092bd670b35SErik Nordmark iras.ira_flags |= IRAF_IS_IPV4; 2093bd670b35SErik Nordmark 2094bd670b35SErik Nordmark iras.ira_ill = iras.ira_rill = ill; 2095bd670b35SErik Nordmark iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 2096bd670b35SErik Nordmark iras.ira_rifindex = iras.ira_ruifindex; 2097bd670b35SErik Nordmark iras.ira_mhip = NULL; 2098bd670b35SErik Nordmark 2099bd670b35SErik Nordmark iras.ira_flags |= ixaflags & IAF_MASK; 2100bd670b35SErik Nordmark iras.ira_no_loop_zoneid = nolzid; 2101bd670b35SErik Nordmark 2102bd670b35SErik Nordmark /* Broadcast and multicast doesn't care about the squeue */ 2103bd670b35SErik Nordmark iras.ira_sqp = NULL; 2104bd670b35SErik Nordmark 2105bd670b35SErik Nordmark rtc.rtc_ire = NULL; 2106bd670b35SErik Nordmark if (ixaflags & IXAF_IS_IPV4) { 2107bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)mp->b_rptr; 2108bd670b35SErik Nordmark 2109bd670b35SErik Nordmark rtc.rtc_ipaddr = INADDR_ANY; 2110bd670b35SErik Nordmark 2111bd670b35SErik Nordmark (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc); 2112bd670b35SErik Nordmark if (rtc.rtc_ire != NULL) { 2113bd670b35SErik Nordmark ASSERT(rtc.rtc_ipaddr != INADDR_ANY); 2114bd670b35SErik Nordmark ire_refrele(rtc.rtc_ire); 2115bd670b35SErik Nordmark } 2116bd670b35SErik Nordmark } else { 2117bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2118bd670b35SErik Nordmark 2119bd670b35SErik Nordmark rtc.rtc_ip6addr = ipv6_all_zeros; 2120bd670b35SErik Nordmark 2121bd670b35SErik Nordmark (*ill->ill_inputfn)(mp, ip6h, &ip6h->ip6_dst, &iras, &rtc); 2122bd670b35SErik Nordmark if (rtc.rtc_ire != NULL) { 2123bd670b35SErik Nordmark ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc.rtc_ip6addr)); 2124bd670b35SErik Nordmark ire_refrele(rtc.rtc_ire); 2125bd670b35SErik Nordmark } 2126bd670b35SErik Nordmark } 2127bd670b35SErik Nordmark /* Any references to clean up? No hold on ira */ 2128bd670b35SErik Nordmark if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 2129bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 2130bd670b35SErik Nordmark } 2131bd670b35SErik Nordmark 2132bd670b35SErik Nordmark /* 2133bd670b35SErik Nordmark * Post fragmentation function for IRE_MULTICAST and IRE_BROADCAST which 2134bd670b35SErik Nordmark * looks at the IXAF_LOOPBACK_COPY flag. 2135bd670b35SErik Nordmark * Common for IPv4 and IPv6. 2136bd670b35SErik Nordmark * 2137bd670b35SErik Nordmark * If the loopback copy fails (due to no memory) but we send the packet out 2138bd670b35SErik Nordmark * on the wire we return no failure. Only in the case we supress the wire 2139bd670b35SErik Nordmark * sending do we take the loopback failure into account. 2140bd670b35SErik Nordmark * 2141bd670b35SErik Nordmark * Note that we do not perform DTRACE_IP7 and FW_HOOKS for the looped back copy. 2142bd670b35SErik Nordmark * Those operations are performed on this packet in ip_xmit() and it would 2143bd670b35SErik Nordmark * be odd to do it twice for the same packet. 2144bd670b35SErik Nordmark */ 2145bd670b35SErik Nordmark int 2146bd670b35SErik Nordmark ip_postfrag_loopcheck(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, 2147bd670b35SErik Nordmark uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 2148bd670b35SErik Nordmark uintptr_t *ixacookie) 2149bd670b35SErik Nordmark { 2150bd670b35SErik Nordmark ill_t *ill = nce->nce_ill; 2151bd670b35SErik Nordmark int error = 0; 2152bd670b35SErik Nordmark 2153bd670b35SErik Nordmark /* 2154bd670b35SErik Nordmark * Check for IXAF_LOOPBACK_COPY - send a copy to ip as if the driver 2155bd670b35SErik Nordmark * had looped it back 2156bd670b35SErik Nordmark */ 2157bd670b35SErik Nordmark if (ixaflags & IXAF_LOOPBACK_COPY) { 2158bd670b35SErik Nordmark mblk_t *mp1; 2159bd670b35SErik Nordmark 2160bd670b35SErik Nordmark mp1 = copymsg(mp); 2161bd670b35SErik Nordmark if (mp1 == NULL) { 2162bd670b35SErik Nordmark /* Failed to deliver the loopback copy. */ 2163bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2164bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill); 2165bd670b35SErik Nordmark error = ENOBUFS; 2166bd670b35SErik Nordmark } else { 2167bd670b35SErik Nordmark ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len, 2168bd670b35SErik Nordmark nolzid); 2169bd670b35SErik Nordmark } 2170bd670b35SErik Nordmark } 2171bd670b35SErik Nordmark 2172bd670b35SErik Nordmark /* 2173bd670b35SErik Nordmark * If TTL = 0 then only do the loopback to this host i.e. we are 2174bd670b35SErik Nordmark * done. We are also done if this was the 2175bd670b35SErik Nordmark * loopback interface since it is sufficient 2176bd670b35SErik Nordmark * to loopback one copy of a multicast packet. 2177bd670b35SErik Nordmark */ 2178bd670b35SErik Nordmark if (ixaflags & IXAF_IS_IPV4) { 2179bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)mp->b_rptr; 2180bd670b35SErik Nordmark 2181bd670b35SErik Nordmark if (ipha->ipha_ttl == 0) { 2182bd670b35SErik Nordmark ip_drop_output("multicast ipha_ttl not sent to wire", 2183bd670b35SErik Nordmark mp, ill); 2184bd670b35SErik Nordmark freemsg(mp); 2185bd670b35SErik Nordmark return (error); 2186bd670b35SErik Nordmark } 2187bd670b35SErik Nordmark } else { 2188bd670b35SErik Nordmark ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2189bd670b35SErik Nordmark 2190bd670b35SErik Nordmark if (ip6h->ip6_hops == 0) { 2191bd670b35SErik Nordmark ip_drop_output("multicast ipha_ttl not sent to wire", 2192bd670b35SErik Nordmark mp, ill); 2193bd670b35SErik Nordmark freemsg(mp); 2194bd670b35SErik Nordmark return (error); 2195bd670b35SErik Nordmark } 2196bd670b35SErik Nordmark } 2197bd670b35SErik Nordmark if (nce->nce_ill->ill_wq == NULL) { 2198bd670b35SErik Nordmark /* Loopback interface */ 2199bd670b35SErik Nordmark ip_drop_output("multicast on lo0 not sent to wire", mp, ill); 2200bd670b35SErik Nordmark freemsg(mp); 2201bd670b35SErik Nordmark return (error); 2202bd670b35SErik Nordmark } 2203bd670b35SErik Nordmark 2204bd670b35SErik Nordmark return (ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0, 2205bd670b35SErik Nordmark ixacookie)); 2206bd670b35SErik Nordmark } 2207bd670b35SErik Nordmark 2208bd670b35SErik Nordmark /* 2209bd670b35SErik Nordmark * Post fragmentation function for RTF_MULTIRT routes. 2210bd670b35SErik Nordmark * Since IRE_BROADCASTs can have RTF_MULTIRT, this function 2211bd670b35SErik Nordmark * checks IXAF_LOOPBACK_COPY. 2212bd670b35SErik Nordmark * 2213bd670b35SErik Nordmark * If no packet is sent due to failures then we return an errno, but if at 2214bd670b35SErik Nordmark * least one succeeded we return zero. 2215bd670b35SErik Nordmark */ 2216bd670b35SErik Nordmark int 2217bd670b35SErik Nordmark ip_postfrag_multirt_v4(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, 2218bd670b35SErik Nordmark uint_t pkt_len, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid, 2219bd670b35SErik Nordmark uintptr_t *ixacookie) 2220bd670b35SErik Nordmark { 2221bd670b35SErik Nordmark irb_t *irb; 2222bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)mp->b_rptr; 2223bd670b35SErik Nordmark ire_t *ire; 2224bd670b35SErik Nordmark ire_t *ire1; 2225bd670b35SErik Nordmark mblk_t *mp1; 2226bd670b35SErik Nordmark nce_t *nce1; 2227bd670b35SErik Nordmark ill_t *ill = nce->nce_ill; 2228bd670b35SErik Nordmark ill_t *ill1; 2229bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 2230bd670b35SErik Nordmark int error = 0; 2231bd670b35SErik Nordmark int num_sent = 0; 2232bd670b35SErik Nordmark int err; 2233bd670b35SErik Nordmark uint_t ire_type; 2234bd670b35SErik Nordmark ipaddr_t nexthop; 2235bd670b35SErik Nordmark 2236bd670b35SErik Nordmark ASSERT(ixaflags & IXAF_IS_IPV4); 2237bd670b35SErik Nordmark 2238bd670b35SErik Nordmark /* Check for IXAF_LOOPBACK_COPY */ 2239bd670b35SErik Nordmark if (ixaflags & IXAF_LOOPBACK_COPY) { 2240bd670b35SErik Nordmark mblk_t *mp1; 2241bd670b35SErik Nordmark 2242bd670b35SErik Nordmark mp1 = copymsg(mp); 2243bd670b35SErik Nordmark if (mp1 == NULL) { 2244bd670b35SErik Nordmark /* Failed to deliver the loopback copy. */ 2245bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2246bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill); 2247bd670b35SErik Nordmark error = ENOBUFS; 2248bd670b35SErik Nordmark } else { 2249bd670b35SErik Nordmark ip_postfrag_loopback(mp1, nce, ixaflags, pkt_len, 2250bd670b35SErik Nordmark nolzid); 2251bd670b35SErik Nordmark } 2252bd670b35SErik Nordmark } 2253bd670b35SErik Nordmark 2254bd670b35SErik Nordmark /* 2255bd670b35SErik Nordmark * Loop over RTF_MULTIRT for ipha_dst in the same bucket. Send 2256bd670b35SErik Nordmark * a copy to each one. 2257bd670b35SErik Nordmark * Use the nce (nexthop) and ipha_dst to find the ire. 2258bd670b35SErik Nordmark * 2259bd670b35SErik Nordmark * MULTIRT is not designed to work with shared-IP zones thus we don't 2260bd670b35SErik Nordmark * need to pass a zoneid or a label to the IRE lookup. 2261bd670b35SErik Nordmark */ 2262bd670b35SErik Nordmark if (V4_PART_OF_V6(nce->nce_addr) == ipha->ipha_dst) { 2263bd670b35SErik Nordmark /* Broadcast and multicast case */ 2264bd670b35SErik Nordmark ire = ire_ftable_lookup_v4(ipha->ipha_dst, 0, 0, 0, 2265bd670b35SErik Nordmark NULL, ALL_ZONES, NULL, MATCH_IRE_DSTONLY, 0, ipst, NULL); 2266bd670b35SErik Nordmark } else { 2267bd670b35SErik Nordmark ipaddr_t v4addr = V4_PART_OF_V6(nce->nce_addr); 2268bd670b35SErik Nordmark 2269bd670b35SErik Nordmark /* Unicast case */ 2270bd670b35SErik Nordmark ire = ire_ftable_lookup_v4(ipha->ipha_dst, 0, v4addr, 0, 2271bd670b35SErik Nordmark NULL, ALL_ZONES, NULL, MATCH_IRE_GW, 0, ipst, NULL); 2272bd670b35SErik Nordmark } 2273bd670b35SErik Nordmark 2274bd670b35SErik Nordmark if (ire == NULL || 2275bd670b35SErik Nordmark (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) || 2276bd670b35SErik Nordmark !(ire->ire_flags & RTF_MULTIRT)) { 2277bd670b35SErik Nordmark /* Drop */ 2278bd670b35SErik Nordmark ip_drop_output("ip_postfrag_multirt didn't find route", 2279bd670b35SErik Nordmark mp, nce->nce_ill); 2280bd670b35SErik Nordmark if (ire != NULL) 2281bd670b35SErik Nordmark ire_refrele(ire); 2282bd670b35SErik Nordmark return (ENETUNREACH); 2283bd670b35SErik Nordmark } 2284bd670b35SErik Nordmark 2285bd670b35SErik Nordmark irb = ire->ire_bucket; 2286bd670b35SErik Nordmark irb_refhold(irb); 2287bd670b35SErik Nordmark for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 2288bd670b35SErik Nordmark /* 2289bd670b35SErik Nordmark * For broadcast we can have a mixture of IRE_BROADCAST and 2290bd670b35SErik Nordmark * IRE_HOST due to the manually added IRE_HOSTs that are used 2291bd670b35SErik Nordmark * to trigger the creation of the special CGTP broadcast routes. 2292bd670b35SErik Nordmark * Thus we have to skip if ire_type doesn't match the original. 2293bd670b35SErik Nordmark */ 2294bd670b35SErik Nordmark if (IRE_IS_CONDEMNED(ire1) || 2295bd670b35SErik Nordmark !(ire1->ire_flags & RTF_MULTIRT) || 2296bd670b35SErik Nordmark ire1->ire_type != ire->ire_type) 2297bd670b35SErik Nordmark continue; 2298bd670b35SErik Nordmark 2299bd670b35SErik Nordmark /* Do the ire argument one after the loop */ 2300bd670b35SErik Nordmark if (ire1 == ire) 2301bd670b35SErik Nordmark continue; 2302bd670b35SErik Nordmark 2303bd670b35SErik Nordmark ill1 = ire_nexthop_ill(ire1); 2304bd670b35SErik Nordmark if (ill1 == NULL) { 2305bd670b35SErik Nordmark /* 2306bd670b35SErik Nordmark * This ire might not have been picked by 2307bd670b35SErik Nordmark * ire_route_recursive, in which case ire_dep might 2308bd670b35SErik Nordmark * not have been setup yet. 2309bd670b35SErik Nordmark * We kick ire_route_recursive to try to resolve 2310bd670b35SErik Nordmark * starting at ire1. 2311bd670b35SErik Nordmark */ 2312bd670b35SErik Nordmark ire_t *ire2; 231344b099c4SSowmini Varadhan uint_t match_flags = MATCH_IRE_DSTONLY; 2314bd670b35SErik Nordmark 231544b099c4SSowmini Varadhan if (ire1->ire_ill != NULL) 231644b099c4SSowmini Varadhan match_flags |= MATCH_IRE_ILL; 2317bd670b35SErik Nordmark ire2 = ire_route_recursive_impl_v4(ire1, 2318bd670b35SErik Nordmark ire1->ire_addr, ire1->ire_type, ire1->ire_ill, 231944b099c4SSowmini Varadhan ire1->ire_zoneid, NULL, match_flags, 23209e3469d3SErik Nordmark IRR_ALLOCATE, 0, ipst, NULL, NULL, NULL); 2321bd670b35SErik Nordmark if (ire2 != NULL) 2322bd670b35SErik Nordmark ire_refrele(ire2); 2323bd670b35SErik Nordmark ill1 = ire_nexthop_ill(ire1); 2324bd670b35SErik Nordmark } 2325bd670b35SErik Nordmark 2326bd670b35SErik Nordmark if (ill1 == NULL) { 2327bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 2328bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - no ill", 2329bd670b35SErik Nordmark mp, ill); 2330bd670b35SErik Nordmark error = ENETUNREACH; 2331bd670b35SErik Nordmark continue; 2332bd670b35SErik Nordmark } 2333bd670b35SErik Nordmark 2334bd670b35SErik Nordmark /* Pick the addr and type to use for arp_nce_init */ 2335bd670b35SErik Nordmark if (nce->nce_common->ncec_flags & NCE_F_BCAST) { 2336bd670b35SErik Nordmark ire_type = IRE_BROADCAST; 2337bd670b35SErik Nordmark nexthop = ire1->ire_gateway_addr; 2338bd670b35SErik Nordmark } else if (nce->nce_common->ncec_flags & NCE_F_MCAST) { 2339bd670b35SErik Nordmark ire_type = IRE_MULTICAST; 2340bd670b35SErik Nordmark nexthop = ipha->ipha_dst; 2341bd670b35SErik Nordmark } else { 2342bd670b35SErik Nordmark ire_type = ire1->ire_type; /* Doesn't matter */ 2343bd670b35SErik Nordmark nexthop = ire1->ire_gateway_addr; 2344bd670b35SErik Nordmark } 2345bd670b35SErik Nordmark 2346bd670b35SErik Nordmark /* If IPMP meta or under, then we just drop */ 2347bd670b35SErik Nordmark if (ill1->ill_grp != NULL) { 2348bd670b35SErik Nordmark BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 2349bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - IPMP", 2350bd670b35SErik Nordmark mp, ill1); 2351bd670b35SErik Nordmark ill_refrele(ill1); 2352bd670b35SErik Nordmark error = ENETUNREACH; 2353bd670b35SErik Nordmark continue; 2354bd670b35SErik Nordmark } 2355bd670b35SErik Nordmark 2356bd670b35SErik Nordmark nce1 = arp_nce_init(ill1, nexthop, ire_type); 2357bd670b35SErik Nordmark if (nce1 == NULL) { 2358bd670b35SErik Nordmark BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 2359bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards - no nce", 2360bd670b35SErik Nordmark mp, ill1); 2361bd670b35SErik Nordmark ill_refrele(ill1); 2362bd670b35SErik Nordmark error = ENETUNREACH; 2363bd670b35SErik Nordmark continue; 2364bd670b35SErik Nordmark } 2365bd670b35SErik Nordmark mp1 = copymsg(mp); 2366bd670b35SErik Nordmark if (mp1 == NULL) { 2367bd670b35SErik Nordmark BUMP_MIB(ill1->ill_ip_mib, ipIfStatsOutDiscards); 2368bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutDiscards", mp, ill1); 2369bd670b35SErik Nordmark nce_refrele(nce1); 2370bd670b35SErik Nordmark ill_refrele(ill1); 2371bd670b35SErik Nordmark error = ENOBUFS; 2372bd670b35SErik Nordmark continue; 2373bd670b35SErik Nordmark } 2374bd670b35SErik Nordmark /* Preserve HW checksum for this copy */ 2375bd670b35SErik Nordmark DB_CKSUMSTART(mp1) = DB_CKSUMSTART(mp); 2376bd670b35SErik Nordmark DB_CKSUMSTUFF(mp1) = DB_CKSUMSTUFF(mp); 2377bd670b35SErik Nordmark DB_CKSUMEND(mp1) = DB_CKSUMEND(mp); 2378bd670b35SErik Nordmark DB_CKSUMFLAGS(mp1) = DB_CKSUMFLAGS(mp); 2379bd670b35SErik Nordmark DB_LSOMSS(mp1) = DB_LSOMSS(mp); 2380bd670b35SErik Nordmark 2381bd670b35SErik Nordmark ire1->ire_ob_pkt_count++; 2382bd670b35SErik Nordmark err = ip_xmit(mp1, nce1, ixaflags, pkt_len, xmit_hint, szone, 2383bd670b35SErik Nordmark 0, ixacookie); 2384bd670b35SErik Nordmark if (err == 0) 2385bd670b35SErik Nordmark num_sent++; 2386bd670b35SErik Nordmark else 2387bd670b35SErik Nordmark error = err; 2388bd670b35SErik Nordmark nce_refrele(nce1); 2389bd670b35SErik Nordmark ill_refrele(ill1); 2390bd670b35SErik Nordmark } 2391bd670b35SErik Nordmark irb_refrele(irb); 2392bd670b35SErik Nordmark ire_refrele(ire); 2393bd670b35SErik Nordmark /* Finally, the main one */ 2394bd670b35SErik Nordmark err = ip_xmit(mp, nce, ixaflags, pkt_len, xmit_hint, szone, 0, 2395bd670b35SErik Nordmark ixacookie); 2396bd670b35SErik Nordmark if (err == 0) 2397bd670b35SErik Nordmark num_sent++; 2398bd670b35SErik Nordmark else 2399bd670b35SErik Nordmark error = err; 2400bd670b35SErik Nordmark if (num_sent > 0) 2401bd670b35SErik Nordmark return (0); 2402bd670b35SErik Nordmark else 2403bd670b35SErik Nordmark return (error); 2404bd670b35SErik Nordmark } 2405bd670b35SErik Nordmark 2406bd670b35SErik Nordmark /* 2407bd670b35SErik Nordmark * Verify local connectivity. This check is called by ULP fusion code. 2408bd670b35SErik Nordmark * The generation number on an IRE_LOCAL or IRE_LOOPBACK only changes if 2409bd670b35SErik Nordmark * the interface is brought down and back up. So we simply fail the local 2410bd670b35SErik Nordmark * process. The caller, TCP Fusion, should unfuse the connection. 2411bd670b35SErik Nordmark */ 2412bd670b35SErik Nordmark boolean_t 2413bd670b35SErik Nordmark ip_output_verify_local(ip_xmit_attr_t *ixa) 2414bd670b35SErik Nordmark { 2415bd670b35SErik Nordmark ire_t *ire = ixa->ixa_ire; 2416bd670b35SErik Nordmark 2417bd670b35SErik Nordmark if (!(ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) 2418bd670b35SErik Nordmark return (B_FALSE); 2419bd670b35SErik Nordmark 2420bd670b35SErik Nordmark return (ixa->ixa_ire->ire_generation == ixa->ixa_ire_generation); 2421bd670b35SErik Nordmark } 2422bd670b35SErik Nordmark 2423bd670b35SErik Nordmark /* 2424bd670b35SErik Nordmark * Local process for ULP loopback, TCP Fusion. Handle both IPv4 and IPv6. 2425bd670b35SErik Nordmark * 2426bd670b35SErik Nordmark * The caller must call ip_output_verify_local() first. This function handles 2427bd670b35SErik Nordmark * IPobs, FW_HOOKS, and/or IPsec cases sequentially. 2428bd670b35SErik Nordmark */ 2429bd670b35SErik Nordmark mblk_t * 2430bd670b35SErik Nordmark ip_output_process_local(mblk_t *mp, ip_xmit_attr_t *ixa, boolean_t hooks_out, 2431bd670b35SErik Nordmark boolean_t hooks_in, conn_t *peer_connp) 2432bd670b35SErik Nordmark { 2433bd670b35SErik Nordmark ill_t *ill = ixa->ixa_ire->ire_ill; 2434bd670b35SErik Nordmark ipha_t *ipha = NULL; 2435bd670b35SErik Nordmark ip6_t *ip6h = NULL; 2436bd670b35SErik Nordmark ip_stack_t *ipst = ixa->ixa_ipst; 2437bd670b35SErik Nordmark iaflags_t ixaflags = ixa->ixa_flags; 2438bd670b35SErik Nordmark ip_recv_attr_t iras; 2439bd670b35SErik Nordmark int error; 2440bd670b35SErik Nordmark 2441bd670b35SErik Nordmark ASSERT(mp != NULL); 2442bd670b35SErik Nordmark 2443bd670b35SErik Nordmark if (ixaflags & IXAF_IS_IPV4) { 2444bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 2445bd670b35SErik Nordmark 2446bd670b35SErik Nordmark /* 2447bd670b35SErik Nordmark * If a callback is enabled then we need to know the 2448bd670b35SErik Nordmark * source and destination zoneids for the packet. We already 2449bd670b35SErik Nordmark * have those handy. 2450bd670b35SErik Nordmark */ 2451bd670b35SErik Nordmark if (ipst->ips_ip4_observe.he_interested) { 2452bd670b35SErik Nordmark zoneid_t szone, dzone; 2453bd670b35SErik Nordmark zoneid_t stackzoneid; 2454bd670b35SErik Nordmark 2455bd670b35SErik Nordmark stackzoneid = netstackid_to_zoneid( 2456bd670b35SErik Nordmark ipst->ips_netstack->netstack_stackid); 2457bd670b35SErik Nordmark 2458bd670b35SErik Nordmark if (stackzoneid == GLOBAL_ZONEID) { 2459bd670b35SErik Nordmark /* Shared-IP zone */ 2460bd670b35SErik Nordmark dzone = ixa->ixa_ire->ire_zoneid; 2461bd670b35SErik Nordmark szone = ixa->ixa_zoneid; 2462bd670b35SErik Nordmark } else { 2463bd670b35SErik Nordmark szone = dzone = stackzoneid; 2464bd670b35SErik Nordmark } 2465bd670b35SErik Nordmark ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 2466bd670b35SErik Nordmark ipst); 2467bd670b35SErik Nordmark } 2468bd670b35SErik Nordmark DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 2469bd670b35SErik Nordmark ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, 2470bd670b35SErik Nordmark NULL, int, 1); 2471bd670b35SErik Nordmark 2472bd670b35SErik Nordmark /* FW_HOOKS: LOOPBACK_OUT */ 2473bd670b35SErik Nordmark if (hooks_out) { 2474bd670b35SErik Nordmark DTRACE_PROBE4(ip4__loopback__out__start, ill_t *, NULL, 2475bd670b35SErik Nordmark ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 2476bd670b35SErik Nordmark FW_HOOKS(ipst->ips_ip4_loopback_out_event, 2477bd670b35SErik Nordmark ipst->ips_ipv4firewall_loopback_out, 2478bd670b35SErik Nordmark NULL, ill, ipha, mp, mp, 0, ipst, error); 2479bd670b35SErik Nordmark DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, mp); 2480bd670b35SErik Nordmark } 2481bd670b35SErik Nordmark if (mp == NULL) 2482bd670b35SErik Nordmark return (NULL); 2483bd670b35SErik Nordmark 2484bd670b35SErik Nordmark /* FW_HOOKS: LOOPBACK_IN */ 2485bd670b35SErik Nordmark if (hooks_in) { 2486bd670b35SErik Nordmark DTRACE_PROBE4(ip4__loopback__in__start, ill_t *, ill, 2487bd670b35SErik Nordmark ill_t *, NULL, ipha_t *, ipha, mblk_t *, mp); 2488bd670b35SErik Nordmark FW_HOOKS(ipst->ips_ip4_loopback_in_event, 2489bd670b35SErik Nordmark ipst->ips_ipv4firewall_loopback_in, 2490bd670b35SErik Nordmark ill, NULL, ipha, mp, mp, 0, ipst, error); 2491bd670b35SErik Nordmark DTRACE_PROBE1(ip4__loopback__in__end, mblk_t *, mp); 2492bd670b35SErik Nordmark } 2493bd670b35SErik Nordmark if (mp == NULL) 2494bd670b35SErik Nordmark return (NULL); 2495bd670b35SErik Nordmark 2496bd670b35SErik Nordmark DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 2497bd670b35SErik Nordmark ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, 2498bd670b35SErik Nordmark NULL, int, 1); 2499bd670b35SErik Nordmark 2500bd670b35SErik Nordmark /* Inbound IPsec polocies */ 2501bd670b35SErik Nordmark if (peer_connp != NULL) { 2502bd670b35SErik Nordmark /* Map ixa to ira including IPsec policies. */ 2503bd670b35SErik Nordmark ipsec_out_to_in(ixa, ill, &iras); 2504bd670b35SErik Nordmark mp = ipsec_check_inbound_policy(mp, peer_connp, ipha, 2505bd670b35SErik Nordmark NULL, &iras); 2506bd670b35SErik Nordmark } 2507bd670b35SErik Nordmark } else { 2508bd670b35SErik Nordmark ip6h = (ip6_t *)mp->b_rptr; 2509bd670b35SErik Nordmark 2510bd670b35SErik Nordmark /* 2511bd670b35SErik Nordmark * If a callback is enabled then we need to know the 2512bd670b35SErik Nordmark * source and destination zoneids for the packet. We already 2513bd670b35SErik Nordmark * have those handy. 2514bd670b35SErik Nordmark */ 2515bd670b35SErik Nordmark if (ipst->ips_ip6_observe.he_interested) { 2516bd670b35SErik Nordmark zoneid_t szone, dzone; 2517bd670b35SErik Nordmark zoneid_t stackzoneid; 2518bd670b35SErik Nordmark 2519bd670b35SErik Nordmark stackzoneid = netstackid_to_zoneid( 2520bd670b35SErik Nordmark ipst->ips_netstack->netstack_stackid); 2521bd670b35SErik Nordmark 2522bd670b35SErik Nordmark if (stackzoneid == GLOBAL_ZONEID) { 2523bd670b35SErik Nordmark /* Shared-IP zone */ 2524bd670b35SErik Nordmark dzone = ixa->ixa_ire->ire_zoneid; 2525bd670b35SErik Nordmark szone = ixa->ixa_zoneid; 2526bd670b35SErik Nordmark } else { 2527bd670b35SErik Nordmark szone = dzone = stackzoneid; 2528bd670b35SErik Nordmark } 2529bd670b35SErik Nordmark ipobs_hook(mp, IPOBS_HOOK_LOCAL, szone, dzone, ill, 2530bd670b35SErik Nordmark ipst); 2531bd670b35SErik Nordmark } 2532bd670b35SErik Nordmark DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 2533bd670b35SErik Nordmark ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, 2534bd670b35SErik Nordmark ip6h, int, 1); 2535bd670b35SErik Nordmark 2536bd670b35SErik Nordmark /* FW_HOOKS: LOOPBACK_OUT */ 2537bd670b35SErik Nordmark if (hooks_out) { 2538bd670b35SErik Nordmark DTRACE_PROBE4(ip6__loopback__out__start, ill_t *, NULL, 2539bd670b35SErik Nordmark ill_t *, ill, ip6_t *, ip6h, mblk_t *, mp); 2540bd670b35SErik Nordmark FW_HOOKS6(ipst->ips_ip6_loopback_out_event, 2541bd670b35SErik Nordmark ipst->ips_ipv6firewall_loopback_out, 2542bd670b35SErik Nordmark NULL, ill, ip6h, mp, mp, 0, ipst, error); 2543bd670b35SErik Nordmark DTRACE_PROBE1(ip6__loopback__out__end, mblk_t *, mp); 2544bd670b35SErik Nordmark } 2545bd670b35SErik Nordmark if (mp == NULL) 2546bd670b35SErik Nordmark return (NULL); 2547bd670b35SErik Nordmark 2548bd670b35SErik Nordmark /* FW_HOOKS: LOOPBACK_IN */ 2549bd670b35SErik Nordmark if (hooks_in) { 2550bd670b35SErik Nordmark DTRACE_PROBE4(ip6__loopback__in__start, ill_t *, ill, 2551bd670b35SErik Nordmark ill_t *, NULL, ip6_t *, ip6h, mblk_t *, mp); 2552bd670b35SErik Nordmark FW_HOOKS6(ipst->ips_ip6_loopback_in_event, 2553bd670b35SErik Nordmark ipst->ips_ipv6firewall_loopback_in, 2554bd670b35SErik Nordmark ill, NULL, ip6h, mp, mp, 0, ipst, error); 2555bd670b35SErik Nordmark DTRACE_PROBE1(ip6__loopback__in__end, mblk_t *, mp); 2556bd670b35SErik Nordmark } 2557bd670b35SErik Nordmark if (mp == NULL) 2558bd670b35SErik Nordmark return (NULL); 2559bd670b35SErik Nordmark 2560bd670b35SErik Nordmark DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 2561bd670b35SErik Nordmark ip6h, __dtrace_ipsr_ill_t *, ill, ipha_t *, NULL, ip6_t *, 2562bd670b35SErik Nordmark ip6h, int, 1); 2563bd670b35SErik Nordmark 2564bd670b35SErik Nordmark /* Inbound IPsec polocies */ 2565bd670b35SErik Nordmark if (peer_connp != NULL) { 2566bd670b35SErik Nordmark /* Map ixa to ira including IPsec policies. */ 2567bd670b35SErik Nordmark ipsec_out_to_in(ixa, ill, &iras); 2568bd670b35SErik Nordmark mp = ipsec_check_inbound_policy(mp, peer_connp, NULL, 2569bd670b35SErik Nordmark ip6h, &iras); 2570bd670b35SErik Nordmark } 2571bd670b35SErik Nordmark } 2572bd670b35SErik Nordmark 2573bd670b35SErik Nordmark if (mp == NULL) { 2574bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2575bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", NULL, ill); 2576bd670b35SErik Nordmark } 2577bd670b35SErik Nordmark 2578bd670b35SErik Nordmark return (mp); 2579bd670b35SErik Nordmark } 2580