1bd670b35SErik Nordmark /* 2bd670b35SErik Nordmark * CDDL HEADER START 3bd670b35SErik Nordmark * 4bd670b35SErik Nordmark * The contents of this file are subject to the terms of the 5bd670b35SErik Nordmark * Common Development and Distribution License (the "License"). 6bd670b35SErik Nordmark * You may not use this file except in compliance with the License. 7bd670b35SErik Nordmark * 8bd670b35SErik Nordmark * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9bd670b35SErik Nordmark * or http://www.opensolaris.org/os/licensing. 10bd670b35SErik Nordmark * See the License for the specific language governing permissions 11bd670b35SErik Nordmark * and limitations under the License. 12bd670b35SErik Nordmark * 13bd670b35SErik Nordmark * When distributing Covered Code, include this CDDL HEADER in each 14bd670b35SErik Nordmark * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15bd670b35SErik Nordmark * If applicable, add the following below this CDDL HEADER, with the 16bd670b35SErik Nordmark * fields enclosed by brackets "[]" replaced with your own identifying 17bd670b35SErik Nordmark * information: Portions Copyright [yyyy] [name of copyright owner] 18bd670b35SErik Nordmark * 19bd670b35SErik Nordmark * CDDL HEADER END 20bd670b35SErik Nordmark */ 21bd670b35SErik Nordmark 22bd670b35SErik Nordmark /* 231eee170aSErik Nordmark * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24*af10b639SDan McDonald * 25*af10b639SDan McDonald * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 26bd670b35SErik Nordmark */ 27bd670b35SErik Nordmark /* Copyright (c) 1990 Mentat Inc. */ 28bd670b35SErik Nordmark 29bd670b35SErik Nordmark #include <sys/types.h> 30bd670b35SErik Nordmark #include <sys/stream.h> 31bd670b35SErik Nordmark #include <sys/dlpi.h> 32bd670b35SErik Nordmark #include <sys/stropts.h> 33bd670b35SErik Nordmark #include <sys/sysmacros.h> 34bd670b35SErik Nordmark #include <sys/strsubr.h> 35bd670b35SErik Nordmark #include <sys/strlog.h> 36bd670b35SErik Nordmark #include <sys/strsun.h> 37bd670b35SErik Nordmark #include <sys/zone.h> 38bd670b35SErik Nordmark #define _SUN_TPI_VERSION 2 39bd670b35SErik Nordmark #include <sys/tihdr.h> 40bd670b35SErik Nordmark #include <sys/xti_inet.h> 41bd670b35SErik Nordmark #include <sys/ddi.h> 42bd670b35SErik Nordmark #include <sys/sunddi.h> 43bd670b35SErik Nordmark #include <sys/cmn_err.h> 44bd670b35SErik Nordmark #include <sys/debug.h> 45bd670b35SErik Nordmark #include <sys/kobj.h> 46bd670b35SErik Nordmark #include <sys/modctl.h> 47bd670b35SErik Nordmark #include <sys/atomic.h> 48bd670b35SErik Nordmark #include <sys/policy.h> 49bd670b35SErik Nordmark #include <sys/priv.h> 50bd670b35SErik Nordmark 51bd670b35SErik Nordmark #include <sys/systm.h> 52bd670b35SErik Nordmark #include <sys/param.h> 53bd670b35SErik Nordmark #include <sys/kmem.h> 54bd670b35SErik Nordmark #include <sys/sdt.h> 55bd670b35SErik Nordmark #include <sys/socket.h> 56bd670b35SErik Nordmark #include <sys/vtrace.h> 57bd670b35SErik Nordmark #include <sys/isa_defs.h> 58bd670b35SErik Nordmark #include <sys/mac.h> 59bd670b35SErik Nordmark #include <net/if.h> 60bd670b35SErik Nordmark #include <net/if_arp.h> 61bd670b35SErik Nordmark #include <net/route.h> 62bd670b35SErik Nordmark #include <sys/sockio.h> 63bd670b35SErik Nordmark #include <netinet/in.h> 64bd670b35SErik Nordmark #include <net/if_dl.h> 65bd670b35SErik Nordmark 66bd670b35SErik Nordmark #include <inet/common.h> 67bd670b35SErik Nordmark #include <inet/mi.h> 68bd670b35SErik Nordmark #include <inet/mib2.h> 69bd670b35SErik Nordmark #include <inet/nd.h> 70bd670b35SErik Nordmark #include <inet/arp.h> 71bd670b35SErik Nordmark #include <inet/snmpcom.h> 72bd670b35SErik Nordmark #include <inet/kstatcom.h> 73bd670b35SErik Nordmark 74bd670b35SErik Nordmark #include <netinet/igmp_var.h> 75bd670b35SErik Nordmark #include <netinet/ip6.h> 76bd670b35SErik Nordmark #include <netinet/icmp6.h> 77bd670b35SErik Nordmark #include <netinet/sctp.h> 78bd670b35SErik Nordmark 79bd670b35SErik Nordmark #include <inet/ip.h> 80bd670b35SErik Nordmark #include <inet/ip_impl.h> 81bd670b35SErik Nordmark #include <inet/ip6.h> 82bd670b35SErik Nordmark #include <inet/ip6_asp.h> 83bd670b35SErik Nordmark #include <inet/optcom.h> 84bd670b35SErik Nordmark #include <inet/tcp.h> 85bd670b35SErik Nordmark #include <inet/tcp_impl.h> 86bd670b35SErik Nordmark #include <inet/ip_multi.h> 87bd670b35SErik Nordmark #include <inet/ip_if.h> 88bd670b35SErik Nordmark #include <inet/ip_ire.h> 89bd670b35SErik Nordmark #include <inet/ip_ftable.h> 90bd670b35SErik Nordmark #include <inet/ip_rts.h> 91bd670b35SErik Nordmark #include <inet/ip_ndp.h> 92bd670b35SErik Nordmark #include <inet/ip_listutils.h> 93bd670b35SErik Nordmark #include <netinet/igmp.h> 94bd670b35SErik Nordmark #include <netinet/ip_mroute.h> 95bd670b35SErik Nordmark #include <inet/ipp_common.h> 96bd670b35SErik Nordmark 97bd670b35SErik Nordmark #include <net/pfkeyv2.h> 98bd670b35SErik Nordmark #include <inet/sadb.h> 99bd670b35SErik Nordmark #include <inet/ipsec_impl.h> 100bd670b35SErik Nordmark #include <inet/ipdrop.h> 101bd670b35SErik Nordmark #include <inet/ip_netinfo.h> 102bd670b35SErik Nordmark #include <inet/ilb_ip.h> 103bd670b35SErik Nordmark #include <sys/squeue_impl.h> 104bd670b35SErik Nordmark #include <sys/squeue.h> 105bd670b35SErik Nordmark 106bd670b35SErik Nordmark #include <sys/ethernet.h> 107bd670b35SErik Nordmark #include <net/if_types.h> 108bd670b35SErik Nordmark #include <sys/cpuvar.h> 109bd670b35SErik Nordmark 110bd670b35SErik Nordmark #include <ipp/ipp.h> 111bd670b35SErik Nordmark #include <ipp/ipp_impl.h> 112bd670b35SErik Nordmark #include <ipp/ipgpc/ipgpc.h> 113bd670b35SErik Nordmark 114bd670b35SErik Nordmark #include <sys/pattr.h> 115bd670b35SErik Nordmark #include <inet/ipclassifier.h> 116bd670b35SErik Nordmark #include <inet/sctp_ip.h> 117bd670b35SErik Nordmark #include <inet/sctp/sctp_impl.h> 118bd670b35SErik Nordmark #include <inet/udp_impl.h> 119bd670b35SErik Nordmark #include <sys/sunddi.h> 120bd670b35SErik Nordmark 121bd670b35SErik Nordmark #include <sys/tsol/label.h> 122bd670b35SErik Nordmark #include <sys/tsol/tnet.h> 123bd670b35SErik Nordmark 124b36a561eSErik Nordmark #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */ 125bd670b35SErik Nordmark 126bd670b35SErik Nordmark #ifdef DEBUG 127bd670b35SErik Nordmark extern boolean_t skip_sctp_cksum; 128bd670b35SErik Nordmark #endif 129bd670b35SErik Nordmark 130bd670b35SErik Nordmark static void ip_input_local_v4(ire_t *, mblk_t *, ipha_t *, 131bd670b35SErik Nordmark ip_recv_attr_t *); 132bd670b35SErik Nordmark 133bd670b35SErik Nordmark static void ip_input_broadcast_v4(ire_t *, mblk_t *, ipha_t *, 134bd670b35SErik Nordmark ip_recv_attr_t *); 135bd670b35SErik Nordmark static void ip_input_multicast_v4(ire_t *, mblk_t *, ipha_t *, 136bd670b35SErik Nordmark ip_recv_attr_t *); 137bd670b35SErik Nordmark 138bd670b35SErik Nordmark #pragma inline(ip_input_common_v4, ip_input_local_v4, ip_forward_xmit_v4) 139bd670b35SErik Nordmark 140bd670b35SErik Nordmark /* 141bd670b35SErik Nordmark * Direct read side procedure capable of dealing with chains. GLDv3 based 142bd670b35SErik Nordmark * drivers call this function directly with mblk chains while STREAMS 143bd670b35SErik Nordmark * read side procedure ip_rput() calls this for single packet with ip_ring 144bd670b35SErik Nordmark * set to NULL to process one packet at a time. 145bd670b35SErik Nordmark * 146bd670b35SErik Nordmark * The ill will always be valid if this function is called directly from 147bd670b35SErik Nordmark * the driver. 148bd670b35SErik Nordmark * 149bd670b35SErik Nordmark * If ip_input() is called from GLDv3: 150bd670b35SErik Nordmark * 151bd670b35SErik Nordmark * - This must be a non-VLAN IP stream. 152bd670b35SErik Nordmark * - 'mp' is either an untagged or a special priority-tagged packet. 153bd670b35SErik Nordmark * - Any VLAN tag that was in the MAC header has been stripped. 154bd670b35SErik Nordmark * 155bd670b35SErik Nordmark * If the IP header in packet is not 32-bit aligned, every message in the 156bd670b35SErik Nordmark * chain will be aligned before further operations. This is required on SPARC 157bd670b35SErik Nordmark * platform. 158bd670b35SErik Nordmark */ 159bd670b35SErik Nordmark void 160bd670b35SErik Nordmark ip_input(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 161bd670b35SErik Nordmark struct mac_header_info_s *mhip) 162bd670b35SErik Nordmark { 163bd670b35SErik Nordmark (void) ip_input_common_v4(ill, ip_ring, mp_chain, mhip, NULL, NULL, 164bd670b35SErik Nordmark NULL); 165bd670b35SErik Nordmark } 166bd670b35SErik Nordmark 167bd670b35SErik Nordmark /* 168bd670b35SErik Nordmark * ip_accept_tcp() - This function is called by the squeue when it retrieves 169bd670b35SErik Nordmark * a chain of packets in the poll mode. The packets have gone through the 170bd670b35SErik Nordmark * data link processing but not IP processing. For performance and latency 171bd670b35SErik Nordmark * reasons, the squeue wants to process the chain in line instead of feeding 172bd670b35SErik Nordmark * it back via ip_input path. 173bd670b35SErik Nordmark * 174bd670b35SErik Nordmark * We set up the ip_recv_attr_t with IRAF_TARGET_SQP to that ip_fanout_v4 175bd670b35SErik Nordmark * will pass back any TCP packets matching the target sqp to 176bd670b35SErik Nordmark * ip_input_common_v4 using ira_target_sqp_mp. Other packets are handled by 177bd670b35SErik Nordmark * ip_input_v4 and ip_fanout_v4 as normal. 178bd670b35SErik Nordmark * The TCP packets that match the target squeue are returned to the caller 179bd670b35SErik Nordmark * as a b_next chain after each packet has been prepend with an mblk 180bd670b35SErik Nordmark * from ip_recv_attr_to_mblk. 181bd670b35SErik Nordmark */ 182bd670b35SErik Nordmark mblk_t * 183bd670b35SErik Nordmark ip_accept_tcp(ill_t *ill, ill_rx_ring_t *ip_ring, squeue_t *target_sqp, 184bd670b35SErik Nordmark mblk_t *mp_chain, mblk_t **last, uint_t *cnt) 185bd670b35SErik Nordmark { 186bd670b35SErik Nordmark return (ip_input_common_v4(ill, ip_ring, mp_chain, NULL, target_sqp, 187bd670b35SErik Nordmark last, cnt)); 188bd670b35SErik Nordmark } 189bd670b35SErik Nordmark 190bd670b35SErik Nordmark /* 191bd670b35SErik Nordmark * Used by ip_input and ip_accept_tcp 192bd670b35SErik Nordmark * The last three arguments are only used by ip_accept_tcp, and mhip is 193bd670b35SErik Nordmark * only used by ip_input. 194bd670b35SErik Nordmark */ 195bd670b35SErik Nordmark mblk_t * 196bd670b35SErik Nordmark ip_input_common_v4(ill_t *ill, ill_rx_ring_t *ip_ring, mblk_t *mp_chain, 197bd670b35SErik Nordmark struct mac_header_info_s *mhip, squeue_t *target_sqp, 198bd670b35SErik Nordmark mblk_t **last, uint_t *cnt) 199bd670b35SErik Nordmark { 200bd670b35SErik Nordmark mblk_t *mp; 201bd670b35SErik Nordmark ipha_t *ipha; 202bd670b35SErik Nordmark ip_recv_attr_t iras; /* Receive attributes */ 203bd670b35SErik Nordmark rtc_t rtc; 204bd670b35SErik Nordmark iaflags_t chain_flags = 0; /* Fixed for chain */ 205bd670b35SErik Nordmark mblk_t *ahead = NULL; /* Accepted head */ 206bd670b35SErik Nordmark mblk_t *atail = NULL; /* Accepted tail */ 207bd670b35SErik Nordmark uint_t acnt = 0; /* Accepted count */ 208bd670b35SErik Nordmark 209bd670b35SErik Nordmark ASSERT(mp_chain != NULL); 210bd670b35SErik Nordmark ASSERT(ill != NULL); 211bd670b35SErik Nordmark 212bd670b35SErik Nordmark /* These ones do not change as we loop over packets */ 213bd670b35SErik Nordmark iras.ira_ill = iras.ira_rill = ill; 214bd670b35SErik Nordmark iras.ira_ruifindex = ill->ill_phyint->phyint_ifindex; 215bd670b35SErik Nordmark iras.ira_rifindex = iras.ira_ruifindex; 216bd670b35SErik Nordmark iras.ira_sqp = NULL; 217bd670b35SErik Nordmark iras.ira_ring = ip_ring; 218bd670b35SErik Nordmark /* For ECMP and outbound transmit ring selection */ 219bd670b35SErik Nordmark iras.ira_xmit_hint = ILL_RING_TO_XMIT_HINT(ip_ring); 220bd670b35SErik Nordmark 221bd670b35SErik Nordmark iras.ira_target_sqp = target_sqp; 222bd670b35SErik Nordmark iras.ira_target_sqp_mp = NULL; 223bd670b35SErik Nordmark if (target_sqp != NULL) 224bd670b35SErik Nordmark chain_flags |= IRAF_TARGET_SQP; 225bd670b35SErik Nordmark 226bd670b35SErik Nordmark /* 227bd670b35SErik Nordmark * We try to have a mhip pointer when possible, but 228bd670b35SErik Nordmark * it might be NULL in some cases. In those cases we 229bd670b35SErik Nordmark * have to assume unicast. 230bd670b35SErik Nordmark */ 231bd670b35SErik Nordmark iras.ira_mhip = mhip; 232bd670b35SErik Nordmark iras.ira_flags = 0; 233bd670b35SErik Nordmark if (mhip != NULL) { 234bd670b35SErik Nordmark switch (mhip->mhi_dsttype) { 235bd670b35SErik Nordmark case MAC_ADDRTYPE_MULTICAST : 236bd670b35SErik Nordmark chain_flags |= IRAF_L2DST_MULTICAST; 237bd670b35SErik Nordmark break; 238bd670b35SErik Nordmark case MAC_ADDRTYPE_BROADCAST : 239bd670b35SErik Nordmark chain_flags |= IRAF_L2DST_BROADCAST; 240bd670b35SErik Nordmark break; 241bd670b35SErik Nordmark } 242bd670b35SErik Nordmark } 243bd670b35SErik Nordmark 244bd670b35SErik Nordmark /* 245bd670b35SErik Nordmark * Initialize the one-element route cache. 246bd670b35SErik Nordmark * 247bd670b35SErik Nordmark * We do ire caching from one iteration to 248bd670b35SErik Nordmark * another. In the event the packet chain contains 249bd670b35SErik Nordmark * all packets from the same dst, this caching saves 250bd670b35SErik Nordmark * an ire_route_recursive for each of the succeeding 251bd670b35SErik Nordmark * packets in a packet chain. 252bd670b35SErik Nordmark */ 253bd670b35SErik Nordmark rtc.rtc_ire = NULL; 254bd670b35SErik Nordmark rtc.rtc_ipaddr = INADDR_ANY; 255bd670b35SErik Nordmark 256bd670b35SErik Nordmark /* Loop over b_next */ 257bd670b35SErik Nordmark for (mp = mp_chain; mp != NULL; mp = mp_chain) { 258bd670b35SErik Nordmark mp_chain = mp->b_next; 259bd670b35SErik Nordmark mp->b_next = NULL; 260bd670b35SErik Nordmark 261bd670b35SErik Nordmark ASSERT(DB_TYPE(mp) == M_DATA); 262bd670b35SErik Nordmark 263bd670b35SErik Nordmark 264bd670b35SErik Nordmark /* 265bd670b35SErik Nordmark * if db_ref > 1 then copymsg and free original. Packet 266bd670b35SErik Nordmark * may be changed and we do not want the other entity 267bd670b35SErik Nordmark * who has a reference to this message to trip over the 268bd670b35SErik Nordmark * changes. This is a blind change because trying to 269bd670b35SErik Nordmark * catch all places that might change the packet is too 270bd670b35SErik Nordmark * difficult. 271bd670b35SErik Nordmark * 272bd670b35SErik Nordmark * This corresponds to the fast path case, where we have 273bd670b35SErik Nordmark * a chain of M_DATA mblks. We check the db_ref count 274bd670b35SErik Nordmark * of only the 1st data block in the mblk chain. There 275bd670b35SErik Nordmark * doesn't seem to be a reason why a device driver would 276bd670b35SErik Nordmark * send up data with varying db_ref counts in the mblk 277bd670b35SErik Nordmark * chain. In any case the Fast path is a private 278bd670b35SErik Nordmark * interface, and our drivers don't do such a thing. 279bd670b35SErik Nordmark * Given the above assumption, there is no need to walk 280bd670b35SErik Nordmark * down the entire mblk chain (which could have a 281bd670b35SErik Nordmark * potential performance problem) 282bd670b35SErik Nordmark * 283bd670b35SErik Nordmark * The "(DB_REF(mp) > 1)" check was moved from ip_rput() 284bd670b35SErik Nordmark * to here because of exclusive ip stacks and vnics. 285bd670b35SErik Nordmark * Packets transmitted from exclusive stack over vnic 286bd670b35SErik Nordmark * can have db_ref > 1 and when it gets looped back to 287bd670b35SErik Nordmark * another vnic in a different zone, you have ip_input() 288bd670b35SErik Nordmark * getting dblks with db_ref > 1. So if someone 289bd670b35SErik Nordmark * complains of TCP performance under this scenario, 290bd670b35SErik Nordmark * take a serious look here on the impact of copymsg(). 291bd670b35SErik Nordmark */ 292bd670b35SErik Nordmark if (DB_REF(mp) > 1) { 293bd670b35SErik Nordmark if ((mp = ip_fix_dbref(mp, &iras)) == NULL) { 294bd670b35SErik Nordmark /* mhip might point into 1st packet in chain */ 295bd670b35SErik Nordmark iras.ira_mhip = NULL; 296bd670b35SErik Nordmark continue; 297bd670b35SErik Nordmark } 298bd670b35SErik Nordmark } 299bd670b35SErik Nordmark 300bd670b35SErik Nordmark /* 301bd670b35SErik Nordmark * IP header ptr not aligned? 302bd670b35SErik Nordmark * OR IP header not complete in first mblk 303bd670b35SErik Nordmark */ 304bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 305bd670b35SErik Nordmark if (!OK_32PTR(ipha) || MBLKL(mp) < IP_SIMPLE_HDR_LENGTH) { 306bd670b35SErik Nordmark mp = ip_check_and_align_header(mp, IP_SIMPLE_HDR_LENGTH, 307bd670b35SErik Nordmark &iras); 308bd670b35SErik Nordmark if (mp == NULL) { 309bd670b35SErik Nordmark /* mhip might point into 1st packet in chain */ 310bd670b35SErik Nordmark iras.ira_mhip = NULL; 311bd670b35SErik Nordmark continue; 312bd670b35SErik Nordmark } 313bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 314bd670b35SErik Nordmark } 315bd670b35SErik Nordmark 316bd670b35SErik Nordmark /* Protect against a mix of Ethertypes and IP versions */ 317bd670b35SErik Nordmark if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 318bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors); 319bd670b35SErik Nordmark ip_drop_input("ipIfStatsInHdrErrors", mp, ill); 320bd670b35SErik Nordmark freemsg(mp); 321bd670b35SErik Nordmark /* mhip might point into 1st packet in the chain. */ 322bd670b35SErik Nordmark iras.ira_mhip = NULL; 323bd670b35SErik Nordmark continue; 324bd670b35SErik Nordmark } 325bd670b35SErik Nordmark 326bd670b35SErik Nordmark /* 327bd670b35SErik Nordmark * Check for Martian addrs; we have to explicitly 328bd670b35SErik Nordmark * test for for zero dst since this is also used as 329bd670b35SErik Nordmark * an indication that the rtc is not used. 330bd670b35SErik Nordmark */ 331bd670b35SErik Nordmark if (ipha->ipha_dst == INADDR_ANY) { 332bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 333bd670b35SErik Nordmark ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 334bd670b35SErik Nordmark freemsg(mp); 335bd670b35SErik Nordmark /* mhip might point into 1st packet in the chain. */ 336bd670b35SErik Nordmark iras.ira_mhip = NULL; 337bd670b35SErik Nordmark continue; 338bd670b35SErik Nordmark } 339bd670b35SErik Nordmark 340bd670b35SErik Nordmark /* 341bd670b35SErik Nordmark * Keep L2SRC from a previous packet in chain since mhip 342bd670b35SErik Nordmark * might point into an earlier packet in the chain. 343bd670b35SErik Nordmark * Keep IRAF_VERIFIED_SRC to avoid redoing broadcast 344bd670b35SErik Nordmark * source check in forwarding path. 345bd670b35SErik Nordmark */ 346bd670b35SErik Nordmark chain_flags |= (iras.ira_flags & 347bd670b35SErik Nordmark (IRAF_L2SRC_SET|IRAF_VERIFIED_SRC)); 348bd670b35SErik Nordmark 349bd670b35SErik Nordmark iras.ira_flags = IRAF_IS_IPV4 | IRAF_VERIFY_IP_CKSUM | 350bd670b35SErik Nordmark IRAF_VERIFY_ULP_CKSUM | chain_flags; 351bd670b35SErik Nordmark iras.ira_free_flags = 0; 352bd670b35SErik Nordmark iras.ira_cred = NULL; 353bd670b35SErik Nordmark iras.ira_cpid = NOPID; 354bd670b35SErik Nordmark iras.ira_tsl = NULL; 355bd670b35SErik Nordmark iras.ira_zoneid = ALL_ZONES; /* Default for forwarding */ 356bd670b35SErik Nordmark 357bd670b35SErik Nordmark /* 358bd670b35SErik Nordmark * We must count all incoming packets, even if they end 359bd670b35SErik Nordmark * up being dropped later on. Defer counting bytes until 360bd670b35SErik Nordmark * we have the whole IP header in first mblk. 361bd670b35SErik Nordmark */ 362bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInReceives); 363bd670b35SErik Nordmark 364bd670b35SErik Nordmark iras.ira_pktlen = ntohs(ipha->ipha_length); 365bd670b35SErik Nordmark UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInOctets, 366bd670b35SErik Nordmark iras.ira_pktlen); 367bd670b35SErik Nordmark 368bd670b35SErik Nordmark /* 369bd670b35SErik Nordmark * Call one of: 370bd670b35SErik Nordmark * ill_input_full_v4 371bd670b35SErik Nordmark * ill_input_short_v4 372bd670b35SErik Nordmark * The former is used in unusual cases. See ill_set_inputfn(). 373bd670b35SErik Nordmark */ 374bd670b35SErik Nordmark (*ill->ill_inputfn)(mp, ipha, &ipha->ipha_dst, &iras, &rtc); 375bd670b35SErik Nordmark 376bd670b35SErik Nordmark /* Any references to clean up? No hold on ira_ill */ 377bd670b35SErik Nordmark if (iras.ira_flags & (IRAF_IPSEC_SECURE|IRAF_SYSTEM_LABELED)) 378bd670b35SErik Nordmark ira_cleanup(&iras, B_FALSE); 379bd670b35SErik Nordmark 380bd670b35SErik Nordmark if (iras.ira_target_sqp_mp != NULL) { 381bd670b35SErik Nordmark /* Better be called from ip_accept_tcp */ 382bd670b35SErik Nordmark ASSERT(target_sqp != NULL); 383bd670b35SErik Nordmark 384bd670b35SErik Nordmark /* Found one packet to accept */ 385bd670b35SErik Nordmark mp = iras.ira_target_sqp_mp; 386bd670b35SErik Nordmark iras.ira_target_sqp_mp = NULL; 387bd670b35SErik Nordmark ASSERT(ip_recv_attr_is_mblk(mp)); 388bd670b35SErik Nordmark 389bd670b35SErik Nordmark if (atail != NULL) 390bd670b35SErik Nordmark atail->b_next = mp; 391bd670b35SErik Nordmark else 392bd670b35SErik Nordmark ahead = mp; 393bd670b35SErik Nordmark atail = mp; 394bd670b35SErik Nordmark acnt++; 395bd670b35SErik Nordmark mp = NULL; 396bd670b35SErik Nordmark } 397bd670b35SErik Nordmark /* mhip might point into 1st packet in the chain. */ 398bd670b35SErik Nordmark iras.ira_mhip = NULL; 399bd670b35SErik Nordmark } 400bd670b35SErik Nordmark /* Any remaining references to the route cache? */ 401bd670b35SErik Nordmark if (rtc.rtc_ire != NULL) { 402bd670b35SErik Nordmark ASSERT(rtc.rtc_ipaddr != INADDR_ANY); 403bd670b35SErik Nordmark ire_refrele(rtc.rtc_ire); 404bd670b35SErik Nordmark } 405bd670b35SErik Nordmark 406bd670b35SErik Nordmark if (ahead != NULL) { 407bd670b35SErik Nordmark /* Better be called from ip_accept_tcp */ 408bd670b35SErik Nordmark ASSERT(target_sqp != NULL); 409bd670b35SErik Nordmark *last = atail; 410bd670b35SErik Nordmark *cnt = acnt; 411bd670b35SErik Nordmark return (ahead); 412bd670b35SErik Nordmark } 413bd670b35SErik Nordmark 414bd670b35SErik Nordmark return (NULL); 415bd670b35SErik Nordmark } 416bd670b35SErik Nordmark 417bd670b35SErik Nordmark /* 418bd670b35SErik Nordmark * This input function is used when 419bd670b35SErik Nordmark * - is_system_labeled() 420bd670b35SErik Nordmark * - CGTP filtering 421bd670b35SErik Nordmark * - DHCP unicast before we have an IP address configured 422bd670b35SErik Nordmark * - there is an listener for IPPROTO_RSVP 423bd670b35SErik Nordmark */ 424bd670b35SErik Nordmark void 425bd670b35SErik Nordmark ill_input_full_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 426bd670b35SErik Nordmark ip_recv_attr_t *ira, rtc_t *rtc) 427bd670b35SErik Nordmark { 428bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 429bd670b35SErik Nordmark ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 430bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 431bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 432bd670b35SErik Nordmark int cgtp_flt_pkt; 433bd670b35SErik Nordmark 434bd670b35SErik Nordmark ASSERT(ira->ira_tsl == NULL); 435bd670b35SErik Nordmark 436bd670b35SErik Nordmark /* 437bd670b35SErik Nordmark * Attach any necessary label information to 438bd670b35SErik Nordmark * this packet 439bd670b35SErik Nordmark */ 440bd670b35SErik Nordmark if (is_system_labeled()) { 441bd670b35SErik Nordmark ira->ira_flags |= IRAF_SYSTEM_LABELED; 442bd670b35SErik Nordmark 443bd670b35SErik Nordmark /* 444bd670b35SErik Nordmark * This updates ira_cred, ira_tsl and ira_free_flags based 445bd670b35SErik Nordmark * on the label. 446bd670b35SErik Nordmark */ 447bd670b35SErik Nordmark if (!tsol_get_pkt_label(mp, IPV4_VERSION, ira)) { 448bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 449bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 450bd670b35SErik Nordmark freemsg(mp); 451bd670b35SErik Nordmark return; 452bd670b35SErik Nordmark } 453bd670b35SErik Nordmark /* Note that ira_tsl can be NULL here. */ 454bd670b35SErik Nordmark 455bd670b35SErik Nordmark /* tsol_get_pkt_label sometimes does pullupmsg */ 456bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 457bd670b35SErik Nordmark } 458bd670b35SErik Nordmark 459bd670b35SErik Nordmark /* 460bd670b35SErik Nordmark * Invoke the CGTP (multirouting) filtering module to process 461bd670b35SErik Nordmark * the incoming packet. Packets identified as duplicates 462bd670b35SErik Nordmark * must be discarded. Filtering is active only if the 463bd670b35SErik Nordmark * the ip_cgtp_filter ndd variable is non-zero. 464bd670b35SErik Nordmark */ 465bd670b35SErik Nordmark cgtp_flt_pkt = CGTP_IP_PKT_NOT_CGTP; 466bd670b35SErik Nordmark if (ipst->ips_ip_cgtp_filter && 467bd670b35SErik Nordmark ipst->ips_ip_cgtp_filter_ops != NULL) { 468bd670b35SErik Nordmark netstackid_t stackid; 469bd670b35SErik Nordmark 470bd670b35SErik Nordmark stackid = ipst->ips_netstack->netstack_stackid; 471bd670b35SErik Nordmark /* 472bd670b35SErik Nordmark * CGTP and IPMP are mutually exclusive so 473bd670b35SErik Nordmark * phyint_ifindex is fine here. 474bd670b35SErik Nordmark */ 475bd670b35SErik Nordmark cgtp_flt_pkt = 476bd670b35SErik Nordmark ipst->ips_ip_cgtp_filter_ops->cfo_filter(stackid, 477bd670b35SErik Nordmark ill->ill_phyint->phyint_ifindex, mp); 478bd670b35SErik Nordmark if (cgtp_flt_pkt == CGTP_IP_PKT_DUPLICATE) { 479bd670b35SErik Nordmark ip_drop_input("CGTP_IP_PKT_DUPLICATE", mp, ill); 480bd670b35SErik Nordmark freemsg(mp); 481bd670b35SErik Nordmark return; 482bd670b35SErik Nordmark } 483bd670b35SErik Nordmark } 484bd670b35SErik Nordmark 485bd670b35SErik Nordmark /* 486bd670b35SErik Nordmark * Brutal hack for DHCPv4 unicast: RFC2131 allows a DHCP 487bd670b35SErik Nordmark * server to unicast DHCP packets to a DHCP client using the 488bd670b35SErik Nordmark * IP address it is offering to the client. This can be 489bd670b35SErik Nordmark * disabled through the "broadcast bit", but not all DHCP 490bd670b35SErik Nordmark * servers honor that bit. Therefore, to interoperate with as 491bd670b35SErik Nordmark * many DHCP servers as possible, the DHCP client allows the 492bd670b35SErik Nordmark * server to unicast, but we treat those packets as broadcast 493bd670b35SErik Nordmark * here. Note that we don't rewrite the packet itself since 494bd670b35SErik Nordmark * (a) that would mess up the checksums and (b) the DHCP 495bd670b35SErik Nordmark * client conn is bound to INADDR_ANY so ip_fanout_udp() will 496bd670b35SErik Nordmark * hand it the packet regardless. 497bd670b35SErik Nordmark */ 498bd670b35SErik Nordmark if (ill->ill_dhcpinit != 0 && 499bd670b35SErik Nordmark ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION && 500bd670b35SErik Nordmark ipha->ipha_protocol == IPPROTO_UDP) { 501bd670b35SErik Nordmark udpha_t *udpha; 502bd670b35SErik Nordmark 503bd670b35SErik Nordmark ipha = ip_pullup(mp, sizeof (ipha_t) + sizeof (udpha_t), ira); 504bd670b35SErik Nordmark if (ipha == NULL) { 505bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 506bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards - dhcp", mp, ill); 507bd670b35SErik Nordmark freemsg(mp); 508bd670b35SErik Nordmark return; 509bd670b35SErik Nordmark } 510bd670b35SErik Nordmark /* Reload since pullupmsg() can change b_rptr. */ 511bd670b35SErik Nordmark udpha = (udpha_t *)&ipha[1]; 512bd670b35SErik Nordmark 513bd670b35SErik Nordmark if (ntohs(udpha->uha_dst_port) == IPPORT_BOOTPC) { 514bd670b35SErik Nordmark DTRACE_PROBE2(ip4__dhcpinit__pkt, ill_t *, ill, 515bd670b35SErik Nordmark mblk_t *, mp); 516bd670b35SErik Nordmark /* 517bd670b35SErik Nordmark * This assumes that we deliver to all conns for 518bd670b35SErik Nordmark * multicast and broadcast packets. 519bd670b35SErik Nordmark */ 520bd670b35SErik Nordmark nexthop = INADDR_BROADCAST; 521bd670b35SErik Nordmark ira->ira_flags |= IRAF_DHCP_UNICAST; 522bd670b35SErik Nordmark } 523bd670b35SErik Nordmark } 524bd670b35SErik Nordmark 525bd670b35SErik Nordmark /* 526bd670b35SErik Nordmark * If rsvpd is running, let RSVP daemon handle its processing 527bd670b35SErik Nordmark * and forwarding of RSVP multicast/unicast packets. 528bd670b35SErik Nordmark * If rsvpd is not running but mrouted is running, RSVP 529bd670b35SErik Nordmark * multicast packets are forwarded as multicast traffic 530bd670b35SErik Nordmark * and RSVP unicast packets are forwarded by unicast router. 531bd670b35SErik Nordmark * If neither rsvpd nor mrouted is running, RSVP multicast 532bd670b35SErik Nordmark * packets are not forwarded, but the unicast packets are 533bd670b35SErik Nordmark * forwarded like unicast traffic. 534bd670b35SErik Nordmark */ 535bd670b35SErik Nordmark if (ipha->ipha_protocol == IPPROTO_RSVP && 536bd670b35SErik Nordmark ipst->ips_ipcl_proto_fanout_v4[IPPROTO_RSVP].connf_head != NULL) { 537bd670b35SErik Nordmark /* RSVP packet and rsvpd running. Treat as ours */ 538bd670b35SErik Nordmark ip2dbg(("ip_input: RSVP for us: 0x%x\n", ntohl(nexthop))); 539bd670b35SErik Nordmark /* 540bd670b35SErik Nordmark * We use a multicast address to get the packet to 541bd670b35SErik Nordmark * ire_recv_multicast_v4. There will not be a membership 542bd670b35SErik Nordmark * check since we set IRAF_RSVP 543bd670b35SErik Nordmark */ 544bd670b35SErik Nordmark nexthop = htonl(INADDR_UNSPEC_GROUP); 545bd670b35SErik Nordmark ira->ira_flags |= IRAF_RSVP; 546bd670b35SErik Nordmark } 547bd670b35SErik Nordmark 548bd670b35SErik Nordmark ill_input_short_v4(mp, ipha, &nexthop, ira, rtc); 549bd670b35SErik Nordmark } 550bd670b35SErik Nordmark 551bd670b35SErik Nordmark /* 552bd670b35SErik Nordmark * This is the tail-end of the full receive side packet handling. 553bd670b35SErik Nordmark * It can be used directly when the configuration is simple. 554bd670b35SErik Nordmark */ 555bd670b35SErik Nordmark void 556bd670b35SErik Nordmark ill_input_short_v4(mblk_t *mp, void *iph_arg, void *nexthop_arg, 557bd670b35SErik Nordmark ip_recv_attr_t *ira, rtc_t *rtc) 558bd670b35SErik Nordmark { 559bd670b35SErik Nordmark ire_t *ire; 560bd670b35SErik Nordmark uint_t opt_len; 561bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 562bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 563bd670b35SErik Nordmark uint_t pkt_len; 564bd670b35SErik Nordmark ssize_t len; 565bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 566bd670b35SErik Nordmark ipaddr_t nexthop = *(ipaddr_t *)nexthop_arg; 567bd670b35SErik Nordmark ilb_stack_t *ilbs = ipst->ips_netstack->netstack_ilb; 5689e3469d3SErik Nordmark uint_t irr_flags; 569bd670b35SErik Nordmark #define rptr ((uchar_t *)ipha) 570bd670b35SErik Nordmark 571bd670b35SErik Nordmark ASSERT(DB_TYPE(mp) == M_DATA); 572bd670b35SErik Nordmark 573bd670b35SErik Nordmark /* 574bd670b35SErik Nordmark * The following test for loopback is faster than 575bd670b35SErik Nordmark * IP_LOOPBACK_ADDR(), because it avoids any bitwise 576bd670b35SErik Nordmark * operations. 577bd670b35SErik Nordmark * Note that these addresses are always in network byte order 578bd670b35SErik Nordmark */ 57901685f97SSowmini Varadhan if (((*(uchar_t *)&ipha->ipha_dst) == IN_LOOPBACKNET) || 58001685f97SSowmini Varadhan ((*(uchar_t *)&ipha->ipha_src) == IN_LOOPBACKNET)) { 581bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 582bd670b35SErik Nordmark ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 583bd670b35SErik Nordmark freemsg(mp); 584bd670b35SErik Nordmark return; 585bd670b35SErik Nordmark } 586bd670b35SErik Nordmark 587bd670b35SErik Nordmark len = mp->b_wptr - rptr; 588bd670b35SErik Nordmark pkt_len = ira->ira_pktlen; 589bd670b35SErik Nordmark 590bd670b35SErik Nordmark /* multiple mblk or too short */ 591bd670b35SErik Nordmark len -= pkt_len; 592bd670b35SErik Nordmark if (len != 0) { 593bd670b35SErik Nordmark mp = ip_check_length(mp, rptr, len, pkt_len, 594bd670b35SErik Nordmark IP_SIMPLE_HDR_LENGTH, ira); 595bd670b35SErik Nordmark if (mp == NULL) 596bd670b35SErik Nordmark return; 597bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 598bd670b35SErik Nordmark } 599bd670b35SErik Nordmark 600bd670b35SErik Nordmark DTRACE_IP7(receive, mblk_t *, mp, conn_t *, NULL, void_ip_t *, 601bd670b35SErik Nordmark ipha, __dtrace_ipsr_ill_t *, ill, ipha_t *, ipha, ip6_t *, NULL, 602bd670b35SErik Nordmark int, 0); 603bd670b35SErik Nordmark 604bd670b35SErik Nordmark /* 605bd670b35SErik Nordmark * The event for packets being received from a 'physical' 606bd670b35SErik Nordmark * interface is placed after validation of the source and/or 607bd670b35SErik Nordmark * destination address as being local so that packets can be 608bd670b35SErik Nordmark * redirected to loopback addresses using ipnat. 609bd670b35SErik Nordmark */ 610bd670b35SErik Nordmark DTRACE_PROBE4(ip4__physical__in__start, 611bd670b35SErik Nordmark ill_t *, ill, ill_t *, NULL, 612bd670b35SErik Nordmark ipha_t *, ipha, mblk_t *, mp); 613bd670b35SErik Nordmark 614bd670b35SErik Nordmark if (HOOKS4_INTERESTED_PHYSICAL_IN(ipst)) { 615bd670b35SErik Nordmark int ll_multicast = 0; 616bd670b35SErik Nordmark int error; 617bd670b35SErik Nordmark ipaddr_t orig_dst = ipha->ipha_dst; 618bd670b35SErik Nordmark 619bd670b35SErik Nordmark if (ira->ira_flags & IRAF_L2DST_MULTICAST) 620bd670b35SErik Nordmark ll_multicast = HPE_MULTICAST; 621bd670b35SErik Nordmark else if (ira->ira_flags & IRAF_L2DST_BROADCAST) 622bd670b35SErik Nordmark ll_multicast = HPE_BROADCAST; 623bd670b35SErik Nordmark 624bd670b35SErik Nordmark FW_HOOKS(ipst->ips_ip4_physical_in_event, 625bd670b35SErik Nordmark ipst->ips_ipv4firewall_physical_in, 626bd670b35SErik Nordmark ill, NULL, ipha, mp, mp, ll_multicast, ipst, error); 627bd670b35SErik Nordmark 628bd670b35SErik Nordmark DTRACE_PROBE1(ip4__physical__in__end, mblk_t *, mp); 629bd670b35SErik Nordmark 630bd670b35SErik Nordmark if (mp == NULL) 631bd670b35SErik Nordmark return; 632bd670b35SErik Nordmark /* The length could have changed */ 633bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 634bd670b35SErik Nordmark ira->ira_pktlen = ntohs(ipha->ipha_length); 635bd670b35SErik Nordmark pkt_len = ira->ira_pktlen; 636bd670b35SErik Nordmark 637bd670b35SErik Nordmark /* 638bd670b35SErik Nordmark * In case the destination changed we override any previous 639bd670b35SErik Nordmark * change to nexthop. 640bd670b35SErik Nordmark */ 641bd670b35SErik Nordmark if (orig_dst != ipha->ipha_dst) 642bd670b35SErik Nordmark nexthop = ipha->ipha_dst; 643bd670b35SErik Nordmark if (nexthop == INADDR_ANY) { 644bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors); 645bd670b35SErik Nordmark ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 646bd670b35SErik Nordmark freemsg(mp); 647bd670b35SErik Nordmark return; 648bd670b35SErik Nordmark } 649bd670b35SErik Nordmark } 650bd670b35SErik Nordmark 651bd670b35SErik Nordmark if (ipst->ips_ip4_observe.he_interested) { 652bd670b35SErik Nordmark zoneid_t dzone; 653bd670b35SErik Nordmark 654bd670b35SErik Nordmark /* 655bd670b35SErik Nordmark * On the inbound path the src zone will be unknown as 656bd670b35SErik Nordmark * this packet has come from the wire. 657bd670b35SErik Nordmark */ 658bd670b35SErik Nordmark dzone = ip_get_zoneid_v4(nexthop, mp, ira, ALL_ZONES); 659bd670b35SErik Nordmark ipobs_hook(mp, IPOBS_HOOK_INBOUND, ALL_ZONES, dzone, ill, ipst); 660bd670b35SErik Nordmark } 661bd670b35SErik Nordmark 662bd670b35SErik Nordmark /* 663bd670b35SErik Nordmark * If there is a good HW IP header checksum we clear the need 664bd670b35SErik Nordmark * look at the IP header checksum. 665bd670b35SErik Nordmark */ 666bd670b35SErik Nordmark if ((DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM) && 667bd670b35SErik Nordmark ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 668bd670b35SErik Nordmark /* Header checksum was ok. Clear the flag */ 669bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) &= ~HCK_IPV4_HDRCKSUM; 670bd670b35SErik Nordmark ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 671bd670b35SErik Nordmark } 672bd670b35SErik Nordmark 673bd670b35SErik Nordmark /* 674bd670b35SErik Nordmark * Here we check to see if we machine is setup as 675bd670b35SErik Nordmark * L3 loadbalancer and if the incoming packet is for a VIP 676bd670b35SErik Nordmark * 677bd670b35SErik Nordmark * Check the following: 678bd670b35SErik Nordmark * - there is at least a rule 679bd670b35SErik Nordmark * - protocol of the packet is supported 680bd670b35SErik Nordmark */ 681bd670b35SErik Nordmark if (ilb_has_rules(ilbs) && ILB_SUPP_L4(ipha->ipha_protocol)) { 682bd670b35SErik Nordmark ipaddr_t lb_dst; 683bd670b35SErik Nordmark int lb_ret; 684bd670b35SErik Nordmark 685bd670b35SErik Nordmark /* For convenience, we pull up the mblk. */ 686bd670b35SErik Nordmark if (mp->b_cont != NULL) { 687bd670b35SErik Nordmark if (pullupmsg(mp, -1) == 0) { 688bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 689bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards - pullupmsg", 690bd670b35SErik Nordmark mp, ill); 691bd670b35SErik Nordmark freemsg(mp); 692bd670b35SErik Nordmark return; 693bd670b35SErik Nordmark } 694bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 695bd670b35SErik Nordmark } 696bd670b35SErik Nordmark 697bd670b35SErik Nordmark /* 698bd670b35SErik Nordmark * We just drop all fragments going to any VIP, at 699bd670b35SErik Nordmark * least for now.... 700bd670b35SErik Nordmark */ 701bd670b35SErik Nordmark if (ntohs(ipha->ipha_fragment_offset_and_flags) & 702bd670b35SErik Nordmark (IPH_MF | IPH_OFFSET)) { 703bd670b35SErik Nordmark if (!ilb_rule_match_vip_v4(ilbs, nexthop, NULL)) { 704bd670b35SErik Nordmark goto after_ilb; 705bd670b35SErik Nordmark } 706bd670b35SErik Nordmark 707bd670b35SErik Nordmark ILB_KSTAT_UPDATE(ilbs, ip_frag_in, 1); 708bd670b35SErik Nordmark ILB_KSTAT_UPDATE(ilbs, ip_frag_dropped, 1); 709bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 710bd670b35SErik Nordmark ip_drop_input("ILB fragment", mp, ill); 711bd670b35SErik Nordmark freemsg(mp); 712bd670b35SErik Nordmark return; 713bd670b35SErik Nordmark } 714bd670b35SErik Nordmark lb_ret = ilb_check_v4(ilbs, ill, mp, ipha, ipha->ipha_protocol, 715bd670b35SErik Nordmark (uint8_t *)ipha + IPH_HDR_LENGTH(ipha), &lb_dst); 716bd670b35SErik Nordmark 717bd670b35SErik Nordmark if (lb_ret == ILB_DROPPED) { 718bd670b35SErik Nordmark /* Is this the right counter to increase? */ 719bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 720bd670b35SErik Nordmark ip_drop_input("ILB_DROPPED", mp, ill); 721bd670b35SErik Nordmark freemsg(mp); 722bd670b35SErik Nordmark return; 723bd670b35SErik Nordmark } 724bd670b35SErik Nordmark if (lb_ret == ILB_BALANCED) { 725bd670b35SErik Nordmark /* Set the dst to that of the chosen server */ 726bd670b35SErik Nordmark nexthop = lb_dst; 727bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) = 0; 728bd670b35SErik Nordmark } 729bd670b35SErik Nordmark } 730bd670b35SErik Nordmark 731bd670b35SErik Nordmark after_ilb: 732bd670b35SErik Nordmark opt_len = ipha->ipha_version_and_hdr_length - IP_SIMPLE_HDR_VERSION; 733bd670b35SErik Nordmark ira->ira_ip_hdr_length = IP_SIMPLE_HDR_LENGTH; 734bd670b35SErik Nordmark if (opt_len != 0) { 735bd670b35SErik Nordmark int error = 0; 736bd670b35SErik Nordmark 737bd670b35SErik Nordmark ira->ira_ip_hdr_length += (opt_len << 2); 738bd670b35SErik Nordmark ira->ira_flags |= IRAF_IPV4_OPTIONS; 739bd670b35SErik Nordmark 740bd670b35SErik Nordmark /* IP Options present! Validate the length. */ 741bd670b35SErik Nordmark mp = ip_check_optlen(mp, ipha, opt_len, pkt_len, ira); 742bd670b35SErik Nordmark if (mp == NULL) 743bd670b35SErik Nordmark return; 744bd670b35SErik Nordmark 745bd670b35SErik Nordmark /* Might have changed */ 746bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 747bd670b35SErik Nordmark 748bd670b35SErik Nordmark /* Verify IP header checksum before parsing the options */ 749bd670b35SErik Nordmark if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 750bd670b35SErik Nordmark ip_csum_hdr(ipha)) { 751bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 752bd670b35SErik Nordmark ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 753bd670b35SErik Nordmark freemsg(mp); 754bd670b35SErik Nordmark return; 755bd670b35SErik Nordmark } 756bd670b35SErik Nordmark ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 757bd670b35SErik Nordmark 758bd670b35SErik Nordmark /* 759bd670b35SErik Nordmark * Go off to ip_input_options which returns the next hop 760bd670b35SErik Nordmark * destination address, which may have been affected 761bd670b35SErik Nordmark * by source routing. 762bd670b35SErik Nordmark */ 763bd670b35SErik Nordmark IP_STAT(ipst, ip_opt); 764bd670b35SErik Nordmark 765bd670b35SErik Nordmark nexthop = ip_input_options(ipha, nexthop, mp, ira, &error); 766bd670b35SErik Nordmark if (error != 0) { 767bd670b35SErik Nordmark /* 768bd670b35SErik Nordmark * An ICMP error has been sent and the packet has 769bd670b35SErik Nordmark * been dropped. 770bd670b35SErik Nordmark */ 771bd670b35SErik Nordmark return; 772bd670b35SErik Nordmark } 773bd670b35SErik Nordmark } 7749e3469d3SErik Nordmark 7759e3469d3SErik Nordmark if (ill->ill_flags & ILLF_ROUTER) 7769e3469d3SErik Nordmark irr_flags = IRR_ALLOCATE; 7779e3469d3SErik Nordmark else 7789e3469d3SErik Nordmark irr_flags = IRR_NONE; 7799e3469d3SErik Nordmark 780bd670b35SErik Nordmark /* Can not use route cache with TX since the labels can differ */ 781bd670b35SErik Nordmark if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 782bd670b35SErik Nordmark if (CLASSD(nexthop)) { 783bd670b35SErik Nordmark ire = ire_multicast(ill); 784bd670b35SErik Nordmark } else { 785bd670b35SErik Nordmark /* Match destination and label */ 786bd670b35SErik Nordmark ire = ire_route_recursive_v4(nexthop, 0, NULL, 787bd670b35SErik Nordmark ALL_ZONES, ira->ira_tsl, MATCH_IRE_SECATTR, 7889e3469d3SErik Nordmark irr_flags, ira->ira_xmit_hint, ipst, NULL, NULL, 7899e3469d3SErik Nordmark NULL); 790bd670b35SErik Nordmark } 791bd670b35SErik Nordmark /* Update the route cache so we do the ire_refrele */ 792bd670b35SErik Nordmark ASSERT(ire != NULL); 793bd670b35SErik Nordmark if (rtc->rtc_ire != NULL) 794bd670b35SErik Nordmark ire_refrele(rtc->rtc_ire); 795bd670b35SErik Nordmark rtc->rtc_ire = ire; 796bd670b35SErik Nordmark rtc->rtc_ipaddr = nexthop; 797*af10b639SDan McDonald } else if (nexthop == rtc->rtc_ipaddr && rtc->rtc_ire != NULL) { 798bd670b35SErik Nordmark /* Use the route cache */ 799bd670b35SErik Nordmark ire = rtc->rtc_ire; 800bd670b35SErik Nordmark } else { 801bd670b35SErik Nordmark /* Update the route cache */ 802bd670b35SErik Nordmark if (CLASSD(nexthop)) { 803bd670b35SErik Nordmark ire = ire_multicast(ill); 804bd670b35SErik Nordmark } else { 805bd670b35SErik Nordmark /* Just match the destination */ 8069e3469d3SErik Nordmark ire = ire_route_recursive_dstonly_v4(nexthop, irr_flags, 8079e3469d3SErik Nordmark ira->ira_xmit_hint, ipst); 808bd670b35SErik Nordmark } 809bd670b35SErik Nordmark ASSERT(ire != NULL); 810bd670b35SErik Nordmark if (rtc->rtc_ire != NULL) 811bd670b35SErik Nordmark ire_refrele(rtc->rtc_ire); 812bd670b35SErik Nordmark rtc->rtc_ire = ire; 813bd670b35SErik Nordmark rtc->rtc_ipaddr = nexthop; 814bd670b35SErik Nordmark } 815bd670b35SErik Nordmark 816bd670b35SErik Nordmark ire->ire_ib_pkt_count++; 817bd670b35SErik Nordmark 818bd670b35SErik Nordmark /* 819bd670b35SErik Nordmark * Based on ire_type and ire_flags call one of: 820bd670b35SErik Nordmark * ire_recv_local_v4 - for IRE_LOCAL 821bd670b35SErik Nordmark * ire_recv_loopback_v4 - for IRE_LOOPBACK 822bd670b35SErik Nordmark * ire_recv_multirt_v4 - if RTF_MULTIRT 823bd670b35SErik Nordmark * ire_recv_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE 824bd670b35SErik Nordmark * ire_recv_multicast_v4 - for IRE_MULTICAST 825bd670b35SErik Nordmark * ire_recv_broadcast_v4 - for IRE_BROADCAST 826bd670b35SErik Nordmark * ire_recv_noaccept_v4 - for ire_noaccept ones 827bd670b35SErik Nordmark * ire_recv_forward_v4 - for the rest. 828bd670b35SErik Nordmark */ 829bd670b35SErik Nordmark (*ire->ire_recvfn)(ire, mp, ipha, ira); 830bd670b35SErik Nordmark } 831bd670b35SErik Nordmark #undef rptr 832bd670b35SErik Nordmark 833bd670b35SErik Nordmark /* 834bd670b35SErik Nordmark * ire_recvfn for IREs that need forwarding 835bd670b35SErik Nordmark */ 836bd670b35SErik Nordmark void 837bd670b35SErik Nordmark ire_recv_forward_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 838bd670b35SErik Nordmark { 839bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 840bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 841bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 842bd670b35SErik Nordmark ill_t *dst_ill; 843bd670b35SErik Nordmark nce_t *nce; 844bd670b35SErik Nordmark ipaddr_t src = ipha->ipha_src; 845bd670b35SErik Nordmark uint32_t added_tx_len; 846bd670b35SErik Nordmark uint32_t mtu, iremtu; 847bd670b35SErik Nordmark 848bd670b35SErik Nordmark if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 849bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 850bd670b35SErik Nordmark ip_drop_input("l2 multicast not forwarded", mp, ill); 851bd670b35SErik Nordmark freemsg(mp); 852bd670b35SErik Nordmark return; 853bd670b35SErik Nordmark } 854bd670b35SErik Nordmark 855bd670b35SErik Nordmark if (!(ill->ill_flags & ILLF_ROUTER) && !ip_source_routed(ipha, ipst)) { 856bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 857bd670b35SErik Nordmark ip_drop_input("ipIfStatsForwProhibits", mp, ill); 858bd670b35SErik Nordmark freemsg(mp); 859bd670b35SErik Nordmark return; 860bd670b35SErik Nordmark } 861bd670b35SErik Nordmark 862bd670b35SErik Nordmark /* 863bd670b35SErik Nordmark * Either ire_nce_capable or ire_dep_parent would be set for the IRE 864bd670b35SErik Nordmark * when it is found by ire_route_recursive, but that some other thread 865bd670b35SErik Nordmark * could have changed the routes with the effect of clearing 866bd670b35SErik Nordmark * ire_dep_parent. In that case we'd end up dropping the packet, or 867bd670b35SErik Nordmark * finding a new nce below. 868bd670b35SErik Nordmark * Get, allocate, or update the nce. 869bd670b35SErik Nordmark * We get a refhold on ire_nce_cache as a result of this to avoid races 870bd670b35SErik Nordmark * where ire_nce_cache is deleted. 871bd670b35SErik Nordmark * 872bd670b35SErik Nordmark * This ensures that we don't forward if the interface is down since 873bd670b35SErik Nordmark * ipif_down removes all the nces. 874bd670b35SErik Nordmark */ 875bd670b35SErik Nordmark mutex_enter(&ire->ire_lock); 876bd670b35SErik Nordmark nce = ire->ire_nce_cache; 877bd670b35SErik Nordmark if (nce == NULL) { 878bd670b35SErik Nordmark /* Not yet set up - try to set one up */ 879bd670b35SErik Nordmark mutex_exit(&ire->ire_lock); 880bd670b35SErik Nordmark (void) ire_revalidate_nce(ire); 881bd670b35SErik Nordmark mutex_enter(&ire->ire_lock); 882bd670b35SErik Nordmark nce = ire->ire_nce_cache; 883bd670b35SErik Nordmark if (nce == NULL) { 884bd670b35SErik Nordmark mutex_exit(&ire->ire_lock); 885bd670b35SErik Nordmark /* The ire_dep_parent chain went bad, or no memory */ 886bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 887bd670b35SErik Nordmark ip_drop_input("No ire_dep_parent", mp, ill); 888bd670b35SErik Nordmark freemsg(mp); 889bd670b35SErik Nordmark return; 890bd670b35SErik Nordmark } 891bd670b35SErik Nordmark } 892bd670b35SErik Nordmark nce_refhold(nce); 893bd670b35SErik Nordmark mutex_exit(&ire->ire_lock); 894bd670b35SErik Nordmark 895bd670b35SErik Nordmark if (nce->nce_is_condemned) { 896bd670b35SErik Nordmark nce_t *nce1; 897bd670b35SErik Nordmark 898bd670b35SErik Nordmark nce1 = ire_handle_condemned_nce(nce, ire, ipha, NULL, B_FALSE); 899bd670b35SErik Nordmark nce_refrele(nce); 900bd670b35SErik Nordmark if (nce1 == NULL) { 901bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 902bd670b35SErik Nordmark ip_drop_input("No nce", mp, ill); 903bd670b35SErik Nordmark freemsg(mp); 904bd670b35SErik Nordmark return; 905bd670b35SErik Nordmark } 906bd670b35SErik Nordmark nce = nce1; 907bd670b35SErik Nordmark } 908bd670b35SErik Nordmark dst_ill = nce->nce_ill; 909bd670b35SErik Nordmark 910bd670b35SErik Nordmark /* 911bd670b35SErik Nordmark * Unless we are forwarding, drop the packet. 912bd670b35SErik Nordmark * We have to let source routed packets through if they go out 913bd670b35SErik Nordmark * the same interface i.e., they are 'ping -l' packets. 914bd670b35SErik Nordmark */ 915bd670b35SErik Nordmark if (!(dst_ill->ill_flags & ILLF_ROUTER) && 916bd670b35SErik Nordmark !(ip_source_routed(ipha, ipst) && dst_ill == ill)) { 917bd670b35SErik Nordmark if (ip_source_routed(ipha, ipst)) { 918bd670b35SErik Nordmark ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", mp, ill); 919bd670b35SErik Nordmark icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 920bd670b35SErik Nordmark nce_refrele(nce); 921bd670b35SErik Nordmark return; 922bd670b35SErik Nordmark } 923bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 924bd670b35SErik Nordmark ip_drop_input("ipIfStatsForwProhibits", mp, ill); 925bd670b35SErik Nordmark freemsg(mp); 926bd670b35SErik Nordmark nce_refrele(nce); 927bd670b35SErik Nordmark return; 928bd670b35SErik Nordmark } 929bd670b35SErik Nordmark 930bd670b35SErik Nordmark if (ire->ire_zoneid != GLOBAL_ZONEID && ire->ire_zoneid != ALL_ZONES) { 931bd670b35SErik Nordmark ipaddr_t dst = ipha->ipha_dst; 932bd670b35SErik Nordmark 933bd670b35SErik Nordmark ire->ire_ib_pkt_count--; 934bd670b35SErik Nordmark /* 935bd670b35SErik Nordmark * Should only use IREs that are visible from the 936bd670b35SErik Nordmark * global zone for forwarding. 937bd670b35SErik Nordmark * Take a source route into account the same way as ip_input 938bd670b35SErik Nordmark * did. 939bd670b35SErik Nordmark */ 940bd670b35SErik Nordmark if (ira->ira_flags & IRAF_IPV4_OPTIONS) { 941bd670b35SErik Nordmark int error = 0; 942bd670b35SErik Nordmark 943bd670b35SErik Nordmark dst = ip_input_options(ipha, dst, mp, ira, &error); 944bd670b35SErik Nordmark ASSERT(error == 0); /* ip_input checked */ 945bd670b35SErik Nordmark } 946bd670b35SErik Nordmark ire = ire_route_recursive_v4(dst, 0, NULL, GLOBAL_ZONEID, 947bd670b35SErik Nordmark ira->ira_tsl, MATCH_IRE_SECATTR, 9489e3469d3SErik Nordmark (ill->ill_flags & ILLF_ROUTER) ? IRR_ALLOCATE : IRR_NONE, 9499e3469d3SErik Nordmark ira->ira_xmit_hint, ipst, NULL, NULL, NULL); 950bd670b35SErik Nordmark ire->ire_ib_pkt_count++; 951bd670b35SErik Nordmark (*ire->ire_recvfn)(ire, mp, ipha, ira); 952bd670b35SErik Nordmark ire_refrele(ire); 953bd670b35SErik Nordmark nce_refrele(nce); 954bd670b35SErik Nordmark return; 955bd670b35SErik Nordmark } 956bd670b35SErik Nordmark 957bd670b35SErik Nordmark /* 958bd670b35SErik Nordmark * ipIfStatsHCInForwDatagrams should only be increment if there 959bd670b35SErik Nordmark * will be an attempt to forward the packet, which is why we 960bd670b35SErik Nordmark * increment after the above condition has been checked. 961bd670b35SErik Nordmark */ 962bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 963bd670b35SErik Nordmark 964bd670b35SErik Nordmark /* Initiate Read side IPPF processing */ 965bd670b35SErik Nordmark if (IPP_ENABLED(IPP_FWD_IN, ipst)) { 966bd670b35SErik Nordmark /* ip_process translates an IS_UNDER_IPMP */ 967bd670b35SErik Nordmark mp = ip_process(IPP_FWD_IN, mp, ill, ill); 968bd670b35SErik Nordmark if (mp == NULL) { 969bd670b35SErik Nordmark /* ip_drop_packet and MIB done */ 970bd670b35SErik Nordmark ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred " 971bd670b35SErik Nordmark "during IPPF processing\n")); 972bd670b35SErik Nordmark nce_refrele(nce); 973bd670b35SErik Nordmark return; 974bd670b35SErik Nordmark } 975bd670b35SErik Nordmark } 976bd670b35SErik Nordmark 977bd670b35SErik Nordmark DTRACE_PROBE4(ip4__forwarding__start, 978bd670b35SErik Nordmark ill_t *, ill, ill_t *, dst_ill, ipha_t *, ipha, mblk_t *, mp); 979bd670b35SErik Nordmark 980bd670b35SErik Nordmark if (HOOKS4_INTERESTED_FORWARDING(ipst)) { 981bd670b35SErik Nordmark int error; 982bd670b35SErik Nordmark 983bd670b35SErik Nordmark FW_HOOKS(ipst->ips_ip4_forwarding_event, 984bd670b35SErik Nordmark ipst->ips_ipv4firewall_forwarding, 985bd670b35SErik Nordmark ill, dst_ill, ipha, mp, mp, 0, ipst, error); 986bd670b35SErik Nordmark 987bd670b35SErik Nordmark DTRACE_PROBE1(ip4__forwarding__end, mblk_t *, mp); 988bd670b35SErik Nordmark 989bd670b35SErik Nordmark if (mp == NULL) { 990bd670b35SErik Nordmark nce_refrele(nce); 991bd670b35SErik Nordmark return; 992bd670b35SErik Nordmark } 993bd670b35SErik Nordmark /* 994bd670b35SErik Nordmark * Even if the destination was changed by the filter we use the 995bd670b35SErik Nordmark * forwarding decision that was made based on the address 996bd670b35SErik Nordmark * in ip_input. 997bd670b35SErik Nordmark */ 998bd670b35SErik Nordmark 999bd670b35SErik Nordmark /* Might have changed */ 1000bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 1001bd670b35SErik Nordmark ira->ira_pktlen = ntohs(ipha->ipha_length); 1002bd670b35SErik Nordmark } 1003bd670b35SErik Nordmark 1004bd670b35SErik Nordmark /* Packet is being forwarded. Turning off hwcksum flag. */ 1005bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) = 0; 1006bd670b35SErik Nordmark 1007bd670b35SErik Nordmark /* 1008bd670b35SErik Nordmark * Martian Address Filtering [RFC 1812, Section 5.3.7] 1009bd670b35SErik Nordmark * The loopback address check for both src and dst has already 1010bd670b35SErik Nordmark * been checked in ip_input 1011bd670b35SErik Nordmark * In the future one can envision adding RPF checks using number 3. 1012bd670b35SErik Nordmark * If we already checked the same source address we can skip this. 1013bd670b35SErik Nordmark */ 1014bd670b35SErik Nordmark if (!(ira->ira_flags & IRAF_VERIFIED_SRC) || 1015bd670b35SErik Nordmark src != ira->ira_verified_src) { 1016bd670b35SErik Nordmark switch (ipst->ips_src_check) { 1017bd670b35SErik Nordmark case 0: 1018bd670b35SErik Nordmark break; 1019bd670b35SErik Nordmark case 2: 1020bd670b35SErik Nordmark if (ip_type_v4(src, ipst) == IRE_BROADCAST) { 1021bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 1022bd670b35SErik Nordmark ipIfStatsForwProhibits); 1023bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 1024bd670b35SErik Nordmark ipIfStatsInAddrErrors); 1025bd670b35SErik Nordmark ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1026bd670b35SErik Nordmark freemsg(mp); 1027bd670b35SErik Nordmark nce_refrele(nce); 1028bd670b35SErik Nordmark return; 1029bd670b35SErik Nordmark } 1030bd670b35SErik Nordmark /* FALLTHRU */ 1031bd670b35SErik Nordmark 1032bd670b35SErik Nordmark case 1: 1033bd670b35SErik Nordmark if (CLASSD(src)) { 1034bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 1035bd670b35SErik Nordmark ipIfStatsForwProhibits); 1036bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 1037bd670b35SErik Nordmark ipIfStatsInAddrErrors); 1038bd670b35SErik Nordmark ip_drop_input("ipIfStatsInAddrErrors", mp, ill); 1039bd670b35SErik Nordmark freemsg(mp); 1040bd670b35SErik Nordmark nce_refrele(nce); 1041bd670b35SErik Nordmark return; 1042bd670b35SErik Nordmark } 1043bd670b35SErik Nordmark break; 1044bd670b35SErik Nordmark } 1045bd670b35SErik Nordmark /* Remember for next packet */ 1046bd670b35SErik Nordmark ira->ira_flags |= IRAF_VERIFIED_SRC; 1047bd670b35SErik Nordmark ira->ira_verified_src = src; 1048bd670b35SErik Nordmark } 1049bd670b35SErik Nordmark 1050bd670b35SErik Nordmark /* 1051bd670b35SErik Nordmark * Check if packet is going out the same link on which it arrived. 1052bd670b35SErik Nordmark * Means we might need to send a redirect. 1053bd670b35SErik Nordmark */ 1054bd670b35SErik Nordmark if (IS_ON_SAME_LAN(dst_ill, ill) && ipst->ips_ip_g_send_redirects) { 1055bd670b35SErik Nordmark ip_send_potential_redirect_v4(mp, ipha, ire, ira); 1056bd670b35SErik Nordmark } 1057bd670b35SErik Nordmark 1058bd670b35SErik Nordmark added_tx_len = 0; 1059bd670b35SErik Nordmark if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 1060bd670b35SErik Nordmark mblk_t *mp1; 1061bd670b35SErik Nordmark uint32_t old_pkt_len = ira->ira_pktlen; 1062bd670b35SErik Nordmark 10636b7506c7SErik Nordmark /* Verify IP header checksum before adding/removing options */ 10646b7506c7SErik Nordmark if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && 10656b7506c7SErik Nordmark ip_csum_hdr(ipha)) { 10666b7506c7SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 10676b7506c7SErik Nordmark ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 10686b7506c7SErik Nordmark freemsg(mp); 10696b7506c7SErik Nordmark nce_refrele(nce); 10706b7506c7SErik Nordmark return; 10716b7506c7SErik Nordmark } 10726b7506c7SErik Nordmark ira->ira_flags &= ~IRAF_VERIFY_IP_CKSUM; 10736b7506c7SErik Nordmark 1074bd670b35SErik Nordmark /* 1075bd670b35SErik Nordmark * Check if it can be forwarded and add/remove 1076bd670b35SErik Nordmark * CIPSO options as needed. 1077bd670b35SErik Nordmark */ 1078bd670b35SErik Nordmark if ((mp1 = tsol_ip_forward(ire, mp, ira)) == NULL) { 1079bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1080bd670b35SErik Nordmark ip_drop_input("tsol_ip_forward", mp, ill); 1081bd670b35SErik Nordmark freemsg(mp); 1082bd670b35SErik Nordmark nce_refrele(nce); 1083bd670b35SErik Nordmark return; 1084bd670b35SErik Nordmark } 1085bd670b35SErik Nordmark /* 1086bd670b35SErik Nordmark * Size may have changed. Remember amount added in case 1087bd670b35SErik Nordmark * IP needs to send an ICMP too big. 1088bd670b35SErik Nordmark */ 1089bd670b35SErik Nordmark mp = mp1; 1090bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 1091bd670b35SErik Nordmark ira->ira_pktlen = ntohs(ipha->ipha_length); 1092bd670b35SErik Nordmark ira->ira_ip_hdr_length = IPH_HDR_LENGTH(ipha); 1093bd670b35SErik Nordmark if (ira->ira_pktlen > old_pkt_len) 1094bd670b35SErik Nordmark added_tx_len = ira->ira_pktlen - old_pkt_len; 1095bd670b35SErik Nordmark 1096bd670b35SErik Nordmark /* Options can have been added or removed */ 1097bd670b35SErik Nordmark if (ira->ira_ip_hdr_length != IP_SIMPLE_HDR_LENGTH) 1098bd670b35SErik Nordmark ira->ira_flags |= IRAF_IPV4_OPTIONS; 1099bd670b35SErik Nordmark else 1100bd670b35SErik Nordmark ira->ira_flags &= ~IRAF_IPV4_OPTIONS; 1101bd670b35SErik Nordmark } 1102bd670b35SErik Nordmark 1103bd670b35SErik Nordmark mtu = dst_ill->ill_mtu; 1104bd670b35SErik Nordmark if ((iremtu = ire->ire_metrics.iulp_mtu) != 0 && iremtu < mtu) 1105bd670b35SErik Nordmark mtu = iremtu; 1106bd670b35SErik Nordmark ip_forward_xmit_v4(nce, ill, mp, ipha, ira, mtu, added_tx_len); 1107bd670b35SErik Nordmark nce_refrele(nce); 1108bd670b35SErik Nordmark } 1109bd670b35SErik Nordmark 1110bd670b35SErik Nordmark /* 1111bd670b35SErik Nordmark * Used for sending out unicast and multicast packets that are 1112bd670b35SErik Nordmark * forwarded. 1113bd670b35SErik Nordmark */ 1114bd670b35SErik Nordmark void 1115bd670b35SErik Nordmark ip_forward_xmit_v4(nce_t *nce, ill_t *ill, mblk_t *mp, ipha_t *ipha, 1116bd670b35SErik Nordmark ip_recv_attr_t *ira, uint32_t mtu, uint32_t added_tx_len) 1117bd670b35SErik Nordmark { 1118bd670b35SErik Nordmark ill_t *dst_ill = nce->nce_ill; 1119bd670b35SErik Nordmark uint32_t pkt_len; 1120bd670b35SErik Nordmark uint32_t sum; 1121bd670b35SErik Nordmark iaflags_t iraflags = ira->ira_flags; 1122bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 1123bd670b35SErik Nordmark iaflags_t ixaflags; 1124bd670b35SErik Nordmark 1125bd670b35SErik Nordmark if (ipha->ipha_ttl <= 1) { 1126bd670b35SErik Nordmark /* Perhaps the checksum was bad */ 1127bd670b35SErik Nordmark if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1128bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1129bd670b35SErik Nordmark ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1130bd670b35SErik Nordmark freemsg(mp); 1131bd670b35SErik Nordmark return; 1132bd670b35SErik Nordmark } 1133bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1134bd670b35SErik Nordmark ip_drop_input("ICMP_TTL_EXCEEDED", mp, ill); 1135bd670b35SErik Nordmark icmp_time_exceeded(mp, ICMP_TTL_EXCEEDED, ira); 1136bd670b35SErik Nordmark return; 1137bd670b35SErik Nordmark } 1138bd670b35SErik Nordmark ipha->ipha_ttl--; 1139bd670b35SErik Nordmark /* Adjust the checksum to reflect the ttl decrement. */ 1140bd670b35SErik Nordmark sum = (int)ipha->ipha_hdr_checksum + IP_HDR_CSUM_TTL_ADJUST; 1141bd670b35SErik Nordmark ipha->ipha_hdr_checksum = (uint16_t)(sum + (sum >> 16)); 1142bd670b35SErik Nordmark 1143bd670b35SErik Nordmark /* Check if there are options to update */ 1144bd670b35SErik Nordmark if (iraflags & IRAF_IPV4_OPTIONS) { 1145bd670b35SErik Nordmark ASSERT(ipha->ipha_version_and_hdr_length != 1146bd670b35SErik Nordmark IP_SIMPLE_HDR_VERSION); 1147bd670b35SErik Nordmark ASSERT(!(iraflags & IRAF_VERIFY_IP_CKSUM)); 1148bd670b35SErik Nordmark 1149bd670b35SErik Nordmark if (!ip_forward_options(mp, ipha, dst_ill, ira)) { 1150bd670b35SErik Nordmark /* ipIfStatsForwProhibits and ip_drop_input done */ 1151bd670b35SErik Nordmark return; 1152bd670b35SErik Nordmark } 1153bd670b35SErik Nordmark 1154bd670b35SErik Nordmark ipha->ipha_hdr_checksum = 0; 1155bd670b35SErik Nordmark ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1156bd670b35SErik Nordmark } 1157bd670b35SErik Nordmark 1158bd670b35SErik Nordmark /* Initiate Write side IPPF processing before any fragmentation */ 1159bd670b35SErik Nordmark if (IPP_ENABLED(IPP_FWD_OUT, ipst)) { 1160bd670b35SErik Nordmark /* ip_process translates an IS_UNDER_IPMP */ 1161bd670b35SErik Nordmark mp = ip_process(IPP_FWD_OUT, mp, dst_ill, dst_ill); 1162bd670b35SErik Nordmark if (mp == NULL) { 1163bd670b35SErik Nordmark /* ip_drop_packet and MIB done */ 1164bd670b35SErik Nordmark ip2dbg(("ire_recv_forward_v4: pkt dropped/deferred" \ 1165bd670b35SErik Nordmark " during IPPF processing\n")); 1166bd670b35SErik Nordmark return; 1167bd670b35SErik Nordmark } 1168bd670b35SErik Nordmark } 1169bd670b35SErik Nordmark 1170bd670b35SErik Nordmark pkt_len = ira->ira_pktlen; 1171bd670b35SErik Nordmark 1172bd670b35SErik Nordmark BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsHCOutForwDatagrams); 1173bd670b35SErik Nordmark 1174bd670b35SErik Nordmark ixaflags = IXAF_IS_IPV4 | IXAF_NO_DEV_FLOW_CTL; 1175bd670b35SErik Nordmark 1176bd670b35SErik Nordmark if (pkt_len > mtu) { 1177bd670b35SErik Nordmark /* 1178bd670b35SErik Nordmark * It needs fragging on its way out. If we haven't 1179bd670b35SErik Nordmark * verified the header checksum yet we do it now since 1180bd670b35SErik Nordmark * are going to put a surely good checksum in the 1181bd670b35SErik Nordmark * outgoing header, we have to make sure that it 1182bd670b35SErik Nordmark * was good coming in. 1183bd670b35SErik Nordmark */ 1184bd670b35SErik Nordmark if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1185bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1186bd670b35SErik Nordmark ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1187bd670b35SErik Nordmark freemsg(mp); 1188bd670b35SErik Nordmark return; 1189bd670b35SErik Nordmark } 1190bd670b35SErik Nordmark if (ipha->ipha_fragment_offset_and_flags & IPH_DF_HTONS) { 1191bd670b35SErik Nordmark BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutFragFails); 1192bd670b35SErik Nordmark ip_drop_output("ipIfStatsOutFragFails", mp, dst_ill); 1193bd670b35SErik Nordmark if (iraflags & IRAF_SYSTEM_LABELED) { 1194bd670b35SErik Nordmark /* 1195bd670b35SErik Nordmark * Remove any CIPSO option added by 1196bd670b35SErik Nordmark * tsol_ip_forward, and make sure we report 1197bd670b35SErik Nordmark * a path MTU so that there 1198bd670b35SErik Nordmark * is room to add such a CIPSO option for future 1199bd670b35SErik Nordmark * packets. 1200bd670b35SErik Nordmark */ 1201bd670b35SErik Nordmark mtu = tsol_pmtu_adjust(mp, mtu, added_tx_len, 1202bd670b35SErik Nordmark AF_INET); 1203bd670b35SErik Nordmark } 1204bd670b35SErik Nordmark 1205bd670b35SErik Nordmark icmp_frag_needed(mp, mtu, ira); 1206bd670b35SErik Nordmark return; 1207bd670b35SErik Nordmark } 1208bd670b35SErik Nordmark 1209bd670b35SErik Nordmark (void) ip_fragment_v4(mp, nce, ixaflags, pkt_len, mtu, 1210bd670b35SErik Nordmark ira->ira_xmit_hint, GLOBAL_ZONEID, 0, ip_xmit, NULL); 1211bd670b35SErik Nordmark return; 1212bd670b35SErik Nordmark } 1213bd670b35SErik Nordmark 1214bd670b35SErik Nordmark ASSERT(pkt_len == ntohs(((ipha_t *)mp->b_rptr)->ipha_length)); 1215bd670b35SErik Nordmark if (iraflags & IRAF_LOOPBACK_COPY) { 1216bd670b35SErik Nordmark /* 1217bd670b35SErik Nordmark * IXAF_NO_LOOP_ZONEID is not set hence 7th arg 1218bd670b35SErik Nordmark * is don't care 1219bd670b35SErik Nordmark */ 1220bd670b35SErik Nordmark (void) ip_postfrag_loopcheck(mp, nce, 1221bd670b35SErik Nordmark ixaflags | IXAF_LOOPBACK_COPY, 1222bd670b35SErik Nordmark pkt_len, ira->ira_xmit_hint, GLOBAL_ZONEID, 0, NULL); 1223bd670b35SErik Nordmark } else { 1224bd670b35SErik Nordmark (void) ip_xmit(mp, nce, ixaflags, pkt_len, ira->ira_xmit_hint, 1225bd670b35SErik Nordmark GLOBAL_ZONEID, 0, NULL); 1226bd670b35SErik Nordmark } 1227bd670b35SErik Nordmark } 1228bd670b35SErik Nordmark 1229bd670b35SErik Nordmark /* 1230bd670b35SErik Nordmark * ire_recvfn for RTF_REJECT and RTF_BLACKHOLE routes, including IRE_NOROUTE, 1231bd670b35SErik Nordmark * which is what ire_route_recursive returns when there is no matching ire. 1232bd670b35SErik Nordmark * Send ICMP unreachable unless blackhole. 1233bd670b35SErik Nordmark */ 1234bd670b35SErik Nordmark void 1235bd670b35SErik Nordmark ire_recv_noroute_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1236bd670b35SErik Nordmark { 1237bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1238bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1239bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 1240bd670b35SErik Nordmark 1241bd670b35SErik Nordmark /* Would we have forwarded this packet if we had a route? */ 1242bd670b35SErik Nordmark if (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) { 1243bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1244bd670b35SErik Nordmark ip_drop_input("l2 multicast not forwarded", mp, ill); 1245bd670b35SErik Nordmark freemsg(mp); 1246bd670b35SErik Nordmark return; 1247bd670b35SErik Nordmark } 1248bd670b35SErik Nordmark 1249bd670b35SErik Nordmark if (!(ill->ill_flags & ILLF_ROUTER)) { 1250bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1251bd670b35SErik Nordmark ip_drop_input("ipIfStatsForwProhibits", mp, ill); 1252bd670b35SErik Nordmark freemsg(mp); 1253bd670b35SErik Nordmark return; 1254bd670b35SErik Nordmark } 1255bd670b35SErik Nordmark /* 1256bd670b35SErik Nordmark * If we had a route this could have been forwarded. Count as such. 1257bd670b35SErik Nordmark * 1258bd670b35SErik Nordmark * ipIfStatsHCInForwDatagrams should only be increment if there 1259bd670b35SErik Nordmark * will be an attempt to forward the packet, which is why we 1260bd670b35SErik Nordmark * increment after the above condition has been checked. 1261bd670b35SErik Nordmark */ 1262bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInForwDatagrams); 1263bd670b35SErik Nordmark 1264bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1265bd670b35SErik Nordmark 1266bd670b35SErik Nordmark ip_rts_change(RTM_MISS, ipha->ipha_dst, 0, 0, 0, 0, 0, 0, RTA_DST, 1267bd670b35SErik Nordmark ipst); 1268bd670b35SErik Nordmark 1269bd670b35SErik Nordmark if (ire->ire_flags & RTF_BLACKHOLE) { 1270bd670b35SErik Nordmark ip_drop_input("ipIfStatsInNoRoutes RTF_BLACKHOLE", mp, ill); 1271bd670b35SErik Nordmark freemsg(mp); 1272bd670b35SErik Nordmark } else { 1273bd670b35SErik Nordmark ip_drop_input("ipIfStatsInNoRoutes RTF_REJECT", mp, ill); 1274bd670b35SErik Nordmark 1275bd670b35SErik Nordmark if (ip_source_routed(ipha, ipst)) { 1276bd670b35SErik Nordmark icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, ira); 1277bd670b35SErik Nordmark } else { 1278bd670b35SErik Nordmark icmp_unreachable(mp, ICMP_HOST_UNREACHABLE, ira); 1279bd670b35SErik Nordmark } 1280bd670b35SErik Nordmark } 1281bd670b35SErik Nordmark } 1282bd670b35SErik Nordmark 1283bd670b35SErik Nordmark /* 1284bd670b35SErik Nordmark * ire_recvfn for IRE_LOCALs marked with ire_noaccept. Such IREs are used for 1285bd670b35SErik Nordmark * VRRP when in noaccept mode. 1286bd670b35SErik Nordmark * We silently drop the packet. ARP handles packets even if noaccept is set. 1287bd670b35SErik Nordmark */ 1288bd670b35SErik Nordmark /* ARGSUSED */ 1289bd670b35SErik Nordmark void 1290bd670b35SErik Nordmark ire_recv_noaccept_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1291bd670b35SErik Nordmark ip_recv_attr_t *ira) 1292bd670b35SErik Nordmark { 1293bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1294bd670b35SErik Nordmark 1295bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1296bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards - noaccept", mp, ill); 1297bd670b35SErik Nordmark freemsg(mp); 1298bd670b35SErik Nordmark } 1299bd670b35SErik Nordmark 1300bd670b35SErik Nordmark /* 1301bd670b35SErik Nordmark * ire_recvfn for IRE_BROADCAST. 1302bd670b35SErik Nordmark */ 1303bd670b35SErik Nordmark void 1304bd670b35SErik Nordmark ire_recv_broadcast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1305bd670b35SErik Nordmark ip_recv_attr_t *ira) 1306bd670b35SErik Nordmark { 1307bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1308bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1309bd670b35SErik Nordmark ill_t *dst_ill = ire->ire_ill; 1310bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 1311bd670b35SErik Nordmark ire_t *alt_ire; 1312bd670b35SErik Nordmark nce_t *nce; 1313bd670b35SErik Nordmark ipaddr_t ipha_dst; 1314bd670b35SErik Nordmark 1315bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInBcastPkts); 1316bd670b35SErik Nordmark 1317bd670b35SErik Nordmark /* Tag for higher-level protocols */ 1318bd670b35SErik Nordmark ira->ira_flags |= IRAF_BROADCAST; 1319bd670b35SErik Nordmark 1320bd670b35SErik Nordmark /* 1321bd670b35SErik Nordmark * Whether local or directed broadcast forwarding: don't allow 1322bd670b35SErik Nordmark * for TCP. 1323bd670b35SErik Nordmark */ 1324bd670b35SErik Nordmark if (ipha->ipha_protocol == IPPROTO_TCP) { 1325bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1326bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 1327bd670b35SErik Nordmark freemsg(mp); 1328bd670b35SErik Nordmark return; 1329bd670b35SErik Nordmark } 1330bd670b35SErik Nordmark 1331bd670b35SErik Nordmark /* 1332bd670b35SErik Nordmark * So that we don't end up with dups, only one ill an IPMP group is 1333bd670b35SErik Nordmark * nominated to receive broadcast traffic. 1334bd670b35SErik Nordmark * If we have no cast_ill we are liberal and accept everything. 1335bd670b35SErik Nordmark */ 1336bd670b35SErik Nordmark if (IS_UNDER_IPMP(ill)) { 1337bd670b35SErik Nordmark /* For an under ill_grp can change under lock */ 1338bd670b35SErik Nordmark rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1339bd670b35SErik Nordmark if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1340bd670b35SErik Nordmark ill->ill_grp->ig_cast_ill != NULL) { 1341bd670b35SErik Nordmark rw_exit(&ipst->ips_ill_g_lock); 1342bd670b35SErik Nordmark /* No MIB since this is normal operation */ 1343bd670b35SErik Nordmark ip_drop_input("not nom_cast", mp, ill); 1344bd670b35SErik Nordmark freemsg(mp); 1345bd670b35SErik Nordmark return; 1346bd670b35SErik Nordmark } 1347bd670b35SErik Nordmark rw_exit(&ipst->ips_ill_g_lock); 1348bd670b35SErik Nordmark 1349bd670b35SErik Nordmark ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1350bd670b35SErik Nordmark } 1351bd670b35SErik Nordmark 1352bd670b35SErik Nordmark /* 1353bd670b35SErik Nordmark * After reassembly and IPsec we will need to duplicate the 1354bd670b35SErik Nordmark * broadcast packet for all matching zones on the ill. 1355bd670b35SErik Nordmark */ 1356bd670b35SErik Nordmark ira->ira_zoneid = ALL_ZONES; 1357bd670b35SErik Nordmark 1358bd670b35SErik Nordmark /* 1359bd670b35SErik Nordmark * Check for directed broadcast i.e. ire->ire_ill is different than 1360bd670b35SErik Nordmark * the incoming ill. 1361bd670b35SErik Nordmark * The same broadcast address can be assigned to multiple interfaces 1362bd670b35SErik Nordmark * so have to check explicitly for that case by looking up the alt_ire 1363bd670b35SErik Nordmark */ 1364bd670b35SErik Nordmark if (dst_ill == ill && !(ire->ire_flags & RTF_MULTIRT)) { 1365bd670b35SErik Nordmark /* Reassemble on the ill on which the packet arrived */ 1366bd670b35SErik Nordmark ip_input_local_v4(ire, mp, ipha, ira); 1367bd670b35SErik Nordmark /* Restore */ 1368bd670b35SErik Nordmark ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1369bd670b35SErik Nordmark return; 1370bd670b35SErik Nordmark } 1371bd670b35SErik Nordmark 1372bd670b35SErik Nordmark /* Is there an IRE_BROADCAST on the incoming ill? */ 1373bd670b35SErik Nordmark ipha_dst = ((ira->ira_flags & IRAF_DHCP_UNICAST) ? INADDR_BROADCAST : 1374bd670b35SErik Nordmark ipha->ipha_dst); 1375bd670b35SErik Nordmark alt_ire = ire_ftable_lookup_v4(ipha_dst, 0, 0, IRE_BROADCAST, ill, 1376bd670b35SErik Nordmark ALL_ZONES, ira->ira_tsl, 1377bd670b35SErik Nordmark MATCH_IRE_TYPE|MATCH_IRE_ILL|MATCH_IRE_SECATTR, 0, ipst, NULL); 1378bd670b35SErik Nordmark if (alt_ire != NULL) { 1379bd670b35SErik Nordmark /* Not a directed broadcast */ 1380bd670b35SErik Nordmark /* 1381bd670b35SErik Nordmark * In the special case of multirouted broadcast 1382bd670b35SErik Nordmark * packets, we unconditionally need to "gateway" 1383bd670b35SErik Nordmark * them to the appropriate interface here so that reassembly 1384bd670b35SErik Nordmark * works. We know that the IRE_BROADCAST on cgtp0 doesn't 1385bd670b35SErik Nordmark * have RTF_MULTIRT set so we look for such an IRE in the 1386bd670b35SErik Nordmark * bucket. 1387bd670b35SErik Nordmark */ 1388bd670b35SErik Nordmark if (alt_ire->ire_flags & RTF_MULTIRT) { 1389bd670b35SErik Nordmark irb_t *irb; 1390bd670b35SErik Nordmark ire_t *ire1; 1391bd670b35SErik Nordmark 1392bd670b35SErik Nordmark irb = ire->ire_bucket; 1393bd670b35SErik Nordmark irb_refhold(irb); 1394bd670b35SErik Nordmark for (ire1 = irb->irb_ire; ire1 != NULL; 1395bd670b35SErik Nordmark ire1 = ire1->ire_next) { 1396bd670b35SErik Nordmark if (IRE_IS_CONDEMNED(ire1)) 1397bd670b35SErik Nordmark continue; 1398bd670b35SErik Nordmark if (!(ire1->ire_type & IRE_BROADCAST) || 1399bd670b35SErik Nordmark (ire1->ire_flags & RTF_MULTIRT)) 1400bd670b35SErik Nordmark continue; 1401bd670b35SErik Nordmark ill = ire1->ire_ill; 1402bd670b35SErik Nordmark ill_refhold(ill); 1403bd670b35SErik Nordmark break; 1404bd670b35SErik Nordmark } 1405bd670b35SErik Nordmark irb_refrele(irb); 1406bd670b35SErik Nordmark if (ire1 != NULL) { 1407bd670b35SErik Nordmark ill_t *orig_ill = ira->ira_ill; 1408bd670b35SErik Nordmark 1409bd670b35SErik Nordmark ire_refrele(alt_ire); 1410bd670b35SErik Nordmark /* Reassemble on the new ill */ 1411bd670b35SErik Nordmark ira->ira_ill = ill; 1412bd670b35SErik Nordmark ip_input_local_v4(ire, mp, ipha, ira); 1413bd670b35SErik Nordmark ill_refrele(ill); 1414bd670b35SErik Nordmark /* Restore */ 1415bd670b35SErik Nordmark ira->ira_ill = orig_ill; 1416bd670b35SErik Nordmark ira->ira_ruifindex = 1417bd670b35SErik Nordmark orig_ill->ill_phyint->phyint_ifindex; 1418bd670b35SErik Nordmark return; 1419bd670b35SErik Nordmark } 1420bd670b35SErik Nordmark } 1421bd670b35SErik Nordmark ire_refrele(alt_ire); 1422bd670b35SErik Nordmark /* Reassemble on the ill on which the packet arrived */ 1423bd670b35SErik Nordmark ip_input_local_v4(ire, mp, ipha, ira); 1424bd670b35SErik Nordmark goto done; 1425bd670b35SErik Nordmark } 1426bd670b35SErik Nordmark 1427bd670b35SErik Nordmark /* 1428bd670b35SErik Nordmark * This is a directed broadcast 1429bd670b35SErik Nordmark * 1430bd670b35SErik Nordmark * If directed broadcast is allowed, then forward the packet out 1431bd670b35SErik Nordmark * the destination interface with IXAF_LOOPBACK_COPY set. That will 1432bd670b35SErik Nordmark * result in ip_input() receiving a copy of the packet on the 1433bd670b35SErik Nordmark * appropriate ill. (We could optimize this to avoid the extra trip 1434bd670b35SErik Nordmark * via ip_input(), but since directed broadcasts are normally disabled 1435bd670b35SErik Nordmark * it doesn't make sense to optimize it.) 1436bd670b35SErik Nordmark */ 1437bd670b35SErik Nordmark if (!ipst->ips_ip_g_forward_directed_bcast || 1438bd670b35SErik Nordmark (ira->ira_flags & (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST))) { 1439bd670b35SErik Nordmark ip_drop_input("directed broadcast not allowed", mp, ill); 1440bd670b35SErik Nordmark freemsg(mp); 1441bd670b35SErik Nordmark goto done; 1442bd670b35SErik Nordmark } 1443bd670b35SErik Nordmark if ((ira->ira_flags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1444bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1445bd670b35SErik Nordmark ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1446bd670b35SErik Nordmark freemsg(mp); 1447bd670b35SErik Nordmark goto done; 1448bd670b35SErik Nordmark } 1449bd670b35SErik Nordmark 1450bd670b35SErik Nordmark /* 1451bd670b35SErik Nordmark * Clear the indication that this may have hardware 1452bd670b35SErik Nordmark * checksum as we are not using it for forwarding. 1453bd670b35SErik Nordmark */ 1454bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) = 0; 1455bd670b35SErik Nordmark 1456bd670b35SErik Nordmark /* 1457bd670b35SErik Nordmark * Adjust ttl to 2 (1+1 - the forward engine will decrement it by one. 1458bd670b35SErik Nordmark */ 1459bd670b35SErik Nordmark ipha->ipha_ttl = ipst->ips_ip_broadcast_ttl + 1; 1460bd670b35SErik Nordmark ipha->ipha_hdr_checksum = 0; 1461bd670b35SErik Nordmark ipha->ipha_hdr_checksum = ip_csum_hdr(ipha); 1462bd670b35SErik Nordmark 1463bd670b35SErik Nordmark /* 1464bd670b35SErik Nordmark * We use ip_forward_xmit to do any fragmentation. 1465bd670b35SErik Nordmark * and loopback copy on the outbound interface. 1466bd670b35SErik Nordmark * 1467bd670b35SErik Nordmark * Make it so that IXAF_LOOPBACK_COPY to be set on transmit side. 1468bd670b35SErik Nordmark */ 1469bd670b35SErik Nordmark ira->ira_flags |= IRAF_LOOPBACK_COPY; 1470bd670b35SErik Nordmark 1471bd670b35SErik Nordmark nce = arp_nce_init(dst_ill, ipha->ipha_dst, IRE_BROADCAST); 1472bd670b35SErik Nordmark if (nce == NULL) { 1473bd670b35SErik Nordmark BUMP_MIB(dst_ill->ill_ip_mib, ipIfStatsOutDiscards); 1474bd670b35SErik Nordmark ip_drop_output("No nce", mp, dst_ill); 1475bd670b35SErik Nordmark freemsg(mp); 1476bd670b35SErik Nordmark goto done; 1477bd670b35SErik Nordmark } 1478bd670b35SErik Nordmark 14791eee170aSErik Nordmark ip_forward_xmit_v4(nce, ill, mp, ipha, ira, dst_ill->ill_mc_mtu, 0); 1480bd670b35SErik Nordmark nce_refrele(nce); 1481bd670b35SErik Nordmark done: 1482bd670b35SErik Nordmark /* Restore */ 1483bd670b35SErik Nordmark ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1484bd670b35SErik Nordmark } 1485bd670b35SErik Nordmark 1486bd670b35SErik Nordmark /* 1487bd670b35SErik Nordmark * ire_recvfn for IRE_MULTICAST. 1488bd670b35SErik Nordmark */ 1489bd670b35SErik Nordmark void 1490bd670b35SErik Nordmark ire_recv_multicast_v4(ire_t *ire, mblk_t *mp, void *iph_arg, 1491bd670b35SErik Nordmark ip_recv_attr_t *ira) 1492bd670b35SErik Nordmark { 1493bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1494bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1495bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 1496bd670b35SErik Nordmark 1497bd670b35SErik Nordmark ASSERT(ire->ire_ill == ira->ira_ill); 1498bd670b35SErik Nordmark 1499bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastPkts); 1500bd670b35SErik Nordmark UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCInMcastOctets, ira->ira_pktlen); 1501bd670b35SErik Nordmark 1502bd670b35SErik Nordmark /* RSVP hook */ 1503bd670b35SErik Nordmark if (ira->ira_flags & IRAF_RSVP) 1504bd670b35SErik Nordmark goto forus; 1505bd670b35SErik Nordmark 1506bd670b35SErik Nordmark /* Tag for higher-level protocols */ 1507bd670b35SErik Nordmark ira->ira_flags |= IRAF_MULTICAST; 1508bd670b35SErik Nordmark 1509bd670b35SErik Nordmark /* 1510bd670b35SErik Nordmark * So that we don't end up with dups, only one ill an IPMP group is 1511bd670b35SErik Nordmark * nominated to receive multicast traffic. 1512bd670b35SErik Nordmark * If we have no cast_ill we are liberal and accept everything. 1513bd670b35SErik Nordmark */ 1514bd670b35SErik Nordmark if (IS_UNDER_IPMP(ill)) { 1515bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 1516bd670b35SErik Nordmark 1517bd670b35SErik Nordmark /* For an under ill_grp can change under lock */ 1518bd670b35SErik Nordmark rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1519bd670b35SErik Nordmark if (!ill->ill_nom_cast && ill->ill_grp != NULL && 1520bd670b35SErik Nordmark ill->ill_grp->ig_cast_ill != NULL) { 1521bd670b35SErik Nordmark rw_exit(&ipst->ips_ill_g_lock); 1522bd670b35SErik Nordmark ip_drop_input("not on cast ill", mp, ill); 1523bd670b35SErik Nordmark freemsg(mp); 1524bd670b35SErik Nordmark return; 1525bd670b35SErik Nordmark } 1526bd670b35SErik Nordmark rw_exit(&ipst->ips_ill_g_lock); 1527bd670b35SErik Nordmark /* 1528bd670b35SErik Nordmark * We switch to the upper ill so that mrouter and hasmembers 1529bd670b35SErik Nordmark * can operate on upper here and in ip_input_multicast. 1530bd670b35SErik Nordmark */ 1531bd670b35SErik Nordmark ill = ipmp_ill_hold_ipmp_ill(ill); 1532bd670b35SErik Nordmark if (ill != NULL) { 1533bd670b35SErik Nordmark ASSERT(ill != ira->ira_ill); 1534bd670b35SErik Nordmark ASSERT(ire->ire_ill == ira->ira_ill); 1535bd670b35SErik Nordmark ira->ira_ill = ill; 1536bd670b35SErik Nordmark ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1537bd670b35SErik Nordmark } else { 1538bd670b35SErik Nordmark ill = ira->ira_ill; 1539bd670b35SErik Nordmark } 1540bd670b35SErik Nordmark } 1541bd670b35SErik Nordmark 1542bd670b35SErik Nordmark /* 1543bd670b35SErik Nordmark * Check if we are a multicast router - send ip_mforward a copy of 1544bd670b35SErik Nordmark * the packet. 1545bd670b35SErik Nordmark * Due to mroute_decap tunnels we consider forwarding packets even if 1546bd670b35SErik Nordmark * mrouted has not joined the allmulti group on this interface. 1547bd670b35SErik Nordmark */ 1548bd670b35SErik Nordmark if (ipst->ips_ip_g_mrouter) { 1549bd670b35SErik Nordmark int retval; 1550bd670b35SErik Nordmark 1551bd670b35SErik Nordmark /* 1552bd670b35SErik Nordmark * Clear the indication that this may have hardware 1553bd670b35SErik Nordmark * checksum as we are not using it for forwarding. 1554bd670b35SErik Nordmark */ 1555bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) = 0; 1556bd670b35SErik Nordmark 1557bd670b35SErik Nordmark /* 1558bd670b35SErik Nordmark * ip_mforward helps us make these distinctions: If received 1559bd670b35SErik Nordmark * on tunnel and not IGMP, then drop. 1560bd670b35SErik Nordmark * If IGMP packet, then don't check membership 1561bd670b35SErik Nordmark * If received on a phyint and IGMP or PIM, then 1562bd670b35SErik Nordmark * don't check membership 1563bd670b35SErik Nordmark */ 1564bd670b35SErik Nordmark retval = ip_mforward(mp, ira); 1565bd670b35SErik Nordmark /* ip_mforward updates mib variables if needed */ 1566bd670b35SErik Nordmark 1567bd670b35SErik Nordmark switch (retval) { 1568bd670b35SErik Nordmark case 0: 1569bd670b35SErik Nordmark /* 1570bd670b35SErik Nordmark * pkt is okay and arrived on phyint. 1571bd670b35SErik Nordmark * 1572bd670b35SErik Nordmark * If we are running as a multicast router 1573bd670b35SErik Nordmark * we need to see all IGMP and/or PIM packets. 1574bd670b35SErik Nordmark */ 1575bd670b35SErik Nordmark if ((ipha->ipha_protocol == IPPROTO_IGMP) || 1576bd670b35SErik Nordmark (ipha->ipha_protocol == IPPROTO_PIM)) { 1577bd670b35SErik Nordmark goto forus; 1578bd670b35SErik Nordmark } 1579bd670b35SErik Nordmark break; 1580bd670b35SErik Nordmark case -1: 1581bd670b35SErik Nordmark /* pkt is mal-formed, toss it */ 1582bd670b35SErik Nordmark freemsg(mp); 1583bd670b35SErik Nordmark goto done; 1584bd670b35SErik Nordmark case 1: 1585bd670b35SErik Nordmark /* 1586bd670b35SErik Nordmark * pkt is okay and arrived on a tunnel 1587bd670b35SErik Nordmark * 1588bd670b35SErik Nordmark * If we are running a multicast router 1589bd670b35SErik Nordmark * we need to see all igmp packets. 1590bd670b35SErik Nordmark */ 1591bd670b35SErik Nordmark if (ipha->ipha_protocol == IPPROTO_IGMP) { 1592bd670b35SErik Nordmark goto forus; 1593bd670b35SErik Nordmark } 1594bd670b35SErik Nordmark ip_drop_input("Multicast on tunnel ignored", mp, ill); 1595bd670b35SErik Nordmark freemsg(mp); 1596bd670b35SErik Nordmark goto done; 1597bd670b35SErik Nordmark } 1598bd670b35SErik Nordmark } 1599bd670b35SErik Nordmark 1600bd670b35SErik Nordmark /* 1601bd670b35SErik Nordmark * Check if we have members on this ill. This is not necessary for 1602bd670b35SErik Nordmark * correctness because even if the NIC/GLD had a leaky filter, we 1603bd670b35SErik Nordmark * filter before passing to each conn_t. 1604bd670b35SErik Nordmark */ 1605bd670b35SErik Nordmark if (!ill_hasmembers_v4(ill, ipha->ipha_dst)) { 1606bd670b35SErik Nordmark /* 1607bd670b35SErik Nordmark * Nobody interested 1608bd670b35SErik Nordmark * 1609bd670b35SErik Nordmark * This might just be caused by the fact that 1610bd670b35SErik Nordmark * multiple IP Multicast addresses map to the same 1611bd670b35SErik Nordmark * link layer multicast - no need to increment counter! 1612bd670b35SErik Nordmark */ 1613bd670b35SErik Nordmark ip_drop_input("Multicast with no members", mp, ill); 1614bd670b35SErik Nordmark freemsg(mp); 1615bd670b35SErik Nordmark goto done; 1616bd670b35SErik Nordmark } 1617bd670b35SErik Nordmark forus: 1618bd670b35SErik Nordmark ip2dbg(("ire_recv_multicast_v4: multicast for us: 0x%x\n", 1619bd670b35SErik Nordmark ntohl(ipha->ipha_dst))); 1620bd670b35SErik Nordmark 1621bd670b35SErik Nordmark /* 1622bd670b35SErik Nordmark * After reassembly and IPsec we will need to duplicate the 1623bd670b35SErik Nordmark * multicast packet for all matching zones on the ill. 1624bd670b35SErik Nordmark */ 1625bd670b35SErik Nordmark ira->ira_zoneid = ALL_ZONES; 1626bd670b35SErik Nordmark 1627bd670b35SErik Nordmark /* Reassemble on the ill on which the packet arrived */ 1628bd670b35SErik Nordmark ip_input_local_v4(ire, mp, ipha, ira); 1629bd670b35SErik Nordmark done: 1630bd670b35SErik Nordmark if (ill != ire->ire_ill) { 1631bd670b35SErik Nordmark ill_refrele(ill); 1632bd670b35SErik Nordmark ira->ira_ill = ire->ire_ill; 1633bd670b35SErik Nordmark ira->ira_ruifindex = ira->ira_ill->ill_phyint->phyint_ifindex; 1634bd670b35SErik Nordmark } 1635bd670b35SErik Nordmark } 1636bd670b35SErik Nordmark 1637bd670b35SErik Nordmark /* 1638bd670b35SErik Nordmark * ire_recvfn for IRE_OFFLINK with RTF_MULTIRT. 1639bd670b35SErik Nordmark * Drop packets since we don't forward out multirt routes. 1640bd670b35SErik Nordmark */ 1641bd670b35SErik Nordmark /* ARGSUSED */ 1642bd670b35SErik Nordmark void 1643bd670b35SErik Nordmark ire_recv_multirt_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1644bd670b35SErik Nordmark { 1645bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1646bd670b35SErik Nordmark 1647bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInNoRoutes); 1648bd670b35SErik Nordmark ip_drop_input("Not forwarding out MULTIRT", mp, ill); 1649bd670b35SErik Nordmark freemsg(mp); 1650bd670b35SErik Nordmark } 1651bd670b35SErik Nordmark 1652bd670b35SErik Nordmark /* 1653bd670b35SErik Nordmark * ire_recvfn for IRE_LOOPBACK. This is only used when a FW_HOOK 1654bd670b35SErik Nordmark * has rewritten the packet to have a loopback destination address (We 1655bd670b35SErik Nordmark * filter out packet with a loopback destination from arriving over the wire). 1656bd670b35SErik Nordmark * We don't know what zone to use, thus we always use the GLOBAL_ZONEID. 1657bd670b35SErik Nordmark */ 1658bd670b35SErik Nordmark void 1659bd670b35SErik Nordmark ire_recv_loopback_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1660bd670b35SErik Nordmark { 1661bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1662bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1663bd670b35SErik Nordmark ill_t *ire_ill = ire->ire_ill; 1664bd670b35SErik Nordmark 1665bd670b35SErik Nordmark ira->ira_zoneid = GLOBAL_ZONEID; 1666bd670b35SErik Nordmark 1667bd670b35SErik Nordmark /* Switch to the lo0 ill for further processing */ 1668bd670b35SErik Nordmark if (ire_ill != ill) { 1669bd670b35SErik Nordmark /* 1670bd670b35SErik Nordmark * Update ira_ill to be the ILL on which the IP address 1671bd670b35SErik Nordmark * is hosted. 1672bd670b35SErik Nordmark * No need to hold the ill since we have a hold on the ire 1673bd670b35SErik Nordmark */ 1674bd670b35SErik Nordmark ASSERT(ira->ira_ill == ira->ira_rill); 1675bd670b35SErik Nordmark ira->ira_ill = ire_ill; 1676bd670b35SErik Nordmark 1677bd670b35SErik Nordmark ip_input_local_v4(ire, mp, ipha, ira); 1678bd670b35SErik Nordmark 1679bd670b35SErik Nordmark /* Restore */ 1680bd670b35SErik Nordmark ASSERT(ira->ira_ill == ire_ill); 1681bd670b35SErik Nordmark ira->ira_ill = ill; 1682bd670b35SErik Nordmark return; 1683bd670b35SErik Nordmark 1684bd670b35SErik Nordmark } 1685bd670b35SErik Nordmark ip_input_local_v4(ire, mp, ipha, ira); 1686bd670b35SErik Nordmark } 1687bd670b35SErik Nordmark 1688bd670b35SErik Nordmark /* 1689bd670b35SErik Nordmark * ire_recvfn for IRE_LOCAL. 1690bd670b35SErik Nordmark */ 1691bd670b35SErik Nordmark void 1692bd670b35SErik Nordmark ire_recv_local_v4(ire_t *ire, mblk_t *mp, void *iph_arg, ip_recv_attr_t *ira) 1693bd670b35SErik Nordmark { 1694bd670b35SErik Nordmark ipha_t *ipha = (ipha_t *)iph_arg; 1695bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1696bd670b35SErik Nordmark ill_t *ire_ill = ire->ire_ill; 1697bd670b35SErik Nordmark 1698bd670b35SErik Nordmark /* Make a note for DAD that this address is in use */ 1699b36a561eSErik Nordmark ire->ire_last_used_time = LBOLT_FASTPATH; 1700bd670b35SErik Nordmark 1701bd670b35SErik Nordmark /* Only target the IRE_LOCAL with the right zoneid. */ 1702bd670b35SErik Nordmark ira->ira_zoneid = ire->ire_zoneid; 1703bd670b35SErik Nordmark 1704bd670b35SErik Nordmark /* 1705bd670b35SErik Nordmark * If the packet arrived on the wrong ill, we check that 1706bd670b35SErik Nordmark * this is ok. 1707bd670b35SErik Nordmark * If it is, then we ensure that we do the reassembly on 1708bd670b35SErik Nordmark * the ill on which the address is hosted. We keep ira_rill as 1709bd670b35SErik Nordmark * the one on which the packet arrived, so that IP_PKTINFO and 1710bd670b35SErik Nordmark * friends can report this. 1711bd670b35SErik Nordmark */ 1712bd670b35SErik Nordmark if (ire_ill != ill) { 1713bd670b35SErik Nordmark ire_t *new_ire; 1714bd670b35SErik Nordmark 1715bd670b35SErik Nordmark new_ire = ip_check_multihome(&ipha->ipha_dst, ire, ill); 1716bd670b35SErik Nordmark if (new_ire == NULL) { 1717bd670b35SErik Nordmark /* Drop packet */ 1718bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits); 1719bd670b35SErik Nordmark ip_drop_input("ipIfStatsInForwProhibits", mp, ill); 1720bd670b35SErik Nordmark freemsg(mp); 1721bd670b35SErik Nordmark return; 1722bd670b35SErik Nordmark } 1723bd670b35SErik Nordmark /* 1724bd670b35SErik Nordmark * Update ira_ill to be the ILL on which the IP address 1725bd670b35SErik Nordmark * is hosted. No need to hold the ill since we have a 1726bd670b35SErik Nordmark * hold on the ire. Note that we do the switch even if 1727bd670b35SErik Nordmark * new_ire == ire (for IPMP, ire would be the one corresponding 1728bd670b35SErik Nordmark * to the IPMP ill). 1729bd670b35SErik Nordmark */ 1730bd670b35SErik Nordmark ASSERT(ira->ira_ill == ira->ira_rill); 1731bd670b35SErik Nordmark ira->ira_ill = new_ire->ire_ill; 1732bd670b35SErik Nordmark 1733bd670b35SErik Nordmark /* ira_ruifindex tracks the upper for ira_rill */ 1734bd670b35SErik Nordmark if (IS_UNDER_IPMP(ill)) 1735bd670b35SErik Nordmark ira->ira_ruifindex = ill_get_upper_ifindex(ill); 1736bd670b35SErik Nordmark 1737bd670b35SErik Nordmark ip_input_local_v4(new_ire, mp, ipha, ira); 1738bd670b35SErik Nordmark 1739bd670b35SErik Nordmark /* Restore */ 1740bd670b35SErik Nordmark ASSERT(ira->ira_ill == new_ire->ire_ill); 1741bd670b35SErik Nordmark ira->ira_ill = ill; 1742bd670b35SErik Nordmark ira->ira_ruifindex = ill->ill_phyint->phyint_ifindex; 1743bd670b35SErik Nordmark 1744bd670b35SErik Nordmark if (new_ire != ire) 1745bd670b35SErik Nordmark ire_refrele(new_ire); 1746bd670b35SErik Nordmark return; 1747bd670b35SErik Nordmark } 1748bd670b35SErik Nordmark 1749bd670b35SErik Nordmark ip_input_local_v4(ire, mp, ipha, ira); 1750bd670b35SErik Nordmark } 1751bd670b35SErik Nordmark 1752bd670b35SErik Nordmark /* 1753bd670b35SErik Nordmark * Common function for packets arriving for the host. Handles 1754bd670b35SErik Nordmark * checksum verification, reassembly checks, etc. 1755bd670b35SErik Nordmark */ 1756bd670b35SErik Nordmark static void 1757bd670b35SErik Nordmark ip_input_local_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1758bd670b35SErik Nordmark { 1759bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1760bd670b35SErik Nordmark iaflags_t iraflags = ira->ira_flags; 1761bd670b35SErik Nordmark 1762bd670b35SErik Nordmark /* 1763bd670b35SErik Nordmark * Verify IP header checksum. If the packet was AH or ESP then 1764bd670b35SErik Nordmark * this flag has already been cleared. Likewise if the packet 1765bd670b35SErik Nordmark * had a hardware checksum. 1766bd670b35SErik Nordmark */ 1767bd670b35SErik Nordmark if ((iraflags & IRAF_VERIFY_IP_CKSUM) && ip_csum_hdr(ipha)) { 1768bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInCksumErrs); 1769bd670b35SErik Nordmark ip_drop_input("ipIfStatsInCksumErrs", mp, ill); 1770bd670b35SErik Nordmark freemsg(mp); 1771bd670b35SErik Nordmark return; 1772bd670b35SErik Nordmark } 1773bd670b35SErik Nordmark 1774bd670b35SErik Nordmark if (iraflags & IRAF_IPV4_OPTIONS) { 1775bd670b35SErik Nordmark if (!ip_input_local_options(mp, ipha, ira)) { 1776bd670b35SErik Nordmark /* Error has been sent and mp consumed */ 1777bd670b35SErik Nordmark return; 1778bd670b35SErik Nordmark } 1779f30c160eSRoamer /* 1780f30c160eSRoamer * Some old hardware does partial checksum by including the 1781f30c160eSRoamer * whole IP header, so the partial checksum value might have 1782f30c160eSRoamer * become invalid if any option in the packet have been 1783f30c160eSRoamer * updated. Always clear partial checksum flag here. 1784f30c160eSRoamer */ 1785f30c160eSRoamer DB_CKSUMFLAGS(mp) &= ~HCK_PARTIALCKSUM; 1786bd670b35SErik Nordmark } 1787bd670b35SErik Nordmark 1788bd670b35SErik Nordmark /* 1789bd670b35SErik Nordmark * Is packet part of fragmented IP packet? 1790bd670b35SErik Nordmark * We compare against defined values in network byte order 1791bd670b35SErik Nordmark */ 1792bd670b35SErik Nordmark if (ipha->ipha_fragment_offset_and_flags & 1793bd670b35SErik Nordmark (IPH_MF_HTONS | IPH_OFFSET_HTONS)) { 1794bd670b35SErik Nordmark /* 1795bd670b35SErik Nordmark * Make sure we have ira_l2src before we loose the original 1796bd670b35SErik Nordmark * mblk 1797bd670b35SErik Nordmark */ 1798bd670b35SErik Nordmark if (!(ira->ira_flags & IRAF_L2SRC_SET)) 1799bd670b35SErik Nordmark ip_setl2src(mp, ira, ira->ira_rill); 1800bd670b35SErik Nordmark 1801bd670b35SErik Nordmark mp = ip_input_fragment(mp, ipha, ira); 1802bd670b35SErik Nordmark if (mp == NULL) 1803bd670b35SErik Nordmark return; 1804bd670b35SErik Nordmark /* Completed reassembly */ 1805bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 1806bd670b35SErik Nordmark } 1807bd670b35SErik Nordmark 1808bd670b35SErik Nordmark /* 1809bd670b35SErik Nordmark * For broadcast and multicast we need some extra work before 1810bd670b35SErik Nordmark * we call ip_fanout_v4(), since in the case of shared-IP zones 1811bd670b35SErik Nordmark * we need to pretend that a packet arrived for each zoneid. 1812bd670b35SErik Nordmark */ 1813bd670b35SErik Nordmark if (iraflags & IRAF_MULTIBROADCAST) { 1814bd670b35SErik Nordmark if (iraflags & IRAF_BROADCAST) 1815bd670b35SErik Nordmark ip_input_broadcast_v4(ire, mp, ipha, ira); 1816bd670b35SErik Nordmark else 1817bd670b35SErik Nordmark ip_input_multicast_v4(ire, mp, ipha, ira); 1818bd670b35SErik Nordmark return; 1819bd670b35SErik Nordmark } 1820bd670b35SErik Nordmark ip_fanout_v4(mp, ipha, ira); 1821bd670b35SErik Nordmark } 1822bd670b35SErik Nordmark 1823bd670b35SErik Nordmark 1824bd670b35SErik Nordmark /* 1825bd670b35SErik Nordmark * Handle multiple zones which match the same broadcast address 1826bd670b35SErik Nordmark * and ill by delivering a packet to each of them. 1827bd670b35SErik Nordmark * Walk the bucket and look for different ire_zoneid but otherwise 1828bd670b35SErik Nordmark * the same IRE (same ill/addr/mask/type). 1829bd670b35SErik Nordmark * Note that ire_add() tracks IREs that are identical in all 1830bd670b35SErik Nordmark * fields (addr/mask/type/gw/ill/zoneid) within a single IRE by 1831bd670b35SErik Nordmark * increasing ire_identical_cnt. Thus we don't need to be concerned 1832bd670b35SErik Nordmark * about those. 1833bd670b35SErik Nordmark */ 1834bd670b35SErik Nordmark static void 1835bd670b35SErik Nordmark ip_input_broadcast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1836bd670b35SErik Nordmark { 1837bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1838bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 1839bd670b35SErik Nordmark netstack_t *ns = ipst->ips_netstack; 1840bd670b35SErik Nordmark irb_t *irb; 1841bd670b35SErik Nordmark ire_t *ire1; 1842bd670b35SErik Nordmark mblk_t *mp1; 1843bd670b35SErik Nordmark ipha_t *ipha1; 184444b099c4SSowmini Varadhan uint_t ira_pktlen = ira->ira_pktlen; 184544b099c4SSowmini Varadhan uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1846bd670b35SErik Nordmark 1847bd670b35SErik Nordmark irb = ire->ire_bucket; 1848bd670b35SErik Nordmark 1849bd670b35SErik Nordmark /* 1850bd670b35SErik Nordmark * If we don't have more than one shared-IP zone, or if 1851bd670b35SErik Nordmark * there can't be more than one IRE_BROADCAST for this 1852bd670b35SErik Nordmark * IP address, then just set the zoneid and proceed. 1853bd670b35SErik Nordmark */ 1854bd670b35SErik Nordmark if (ns->netstack_numzones == 1 || irb->irb_ire_cnt == 1) { 1855bd670b35SErik Nordmark ira->ira_zoneid = ire->ire_zoneid; 1856bd670b35SErik Nordmark 1857bd670b35SErik Nordmark ip_fanout_v4(mp, ipha, ira); 1858bd670b35SErik Nordmark return; 1859bd670b35SErik Nordmark } 1860bd670b35SErik Nordmark irb_refhold(irb); 1861bd670b35SErik Nordmark for (ire1 = irb->irb_ire; ire1 != NULL; ire1 = ire1->ire_next) { 1862bd670b35SErik Nordmark /* We do the main IRE after the end of the loop */ 1863bd670b35SErik Nordmark if (ire1 == ire) 1864bd670b35SErik Nordmark continue; 1865bd670b35SErik Nordmark 1866bd670b35SErik Nordmark /* 1867bd670b35SErik Nordmark * Only IREs for the same IP address should be in the same 1868bd670b35SErik Nordmark * bucket. 1869bd670b35SErik Nordmark * But could have IRE_HOSTs in the case of CGTP. 1870bd670b35SErik Nordmark */ 1871bd670b35SErik Nordmark ASSERT(ire1->ire_addr == ire->ire_addr); 1872bd670b35SErik Nordmark if (!(ire1->ire_type & IRE_BROADCAST)) 1873bd670b35SErik Nordmark continue; 1874bd670b35SErik Nordmark 1875bd670b35SErik Nordmark if (IRE_IS_CONDEMNED(ire1)) 1876bd670b35SErik Nordmark continue; 1877bd670b35SErik Nordmark 1878bd670b35SErik Nordmark mp1 = copymsg(mp); 1879bd670b35SErik Nordmark if (mp1 == NULL) { 1880bd670b35SErik Nordmark /* Failed to deliver to one zone */ 1881bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1882bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 1883bd670b35SErik Nordmark continue; 1884bd670b35SErik Nordmark } 1885bd670b35SErik Nordmark ira->ira_zoneid = ire1->ire_zoneid; 1886bd670b35SErik Nordmark ipha1 = (ipha_t *)mp1->b_rptr; 1887bd670b35SErik Nordmark ip_fanout_v4(mp1, ipha1, ira); 188844b099c4SSowmini Varadhan /* 188944b099c4SSowmini Varadhan * IPsec might have modified ira_pktlen and ira_ip_hdr_length 189044b099c4SSowmini Varadhan * so we restore them for a potential next iteration 189144b099c4SSowmini Varadhan */ 189244b099c4SSowmini Varadhan ira->ira_pktlen = ira_pktlen; 189344b099c4SSowmini Varadhan ira->ira_ip_hdr_length = ira_ip_hdr_length; 1894bd670b35SErik Nordmark } 1895bd670b35SErik Nordmark irb_refrele(irb); 1896bd670b35SErik Nordmark /* Do the main ire */ 1897bd670b35SErik Nordmark ira->ira_zoneid = ire->ire_zoneid; 1898bd670b35SErik Nordmark ip_fanout_v4(mp, ipha, ira); 1899bd670b35SErik Nordmark } 1900bd670b35SErik Nordmark 1901bd670b35SErik Nordmark /* 1902bd670b35SErik Nordmark * Handle multiple zones which want to receive the same multicast packets 1903bd670b35SErik Nordmark * on this ill by delivering a packet to each of them. 1904bd670b35SErik Nordmark * 1905bd670b35SErik Nordmark * Note that for packets delivered to transports we could instead do this 1906bd670b35SErik Nordmark * as part of the fanout code, but since we need to handle icmp_inbound 1907bd670b35SErik Nordmark * it is simpler to have multicast work the same as broadcast. 1908bd670b35SErik Nordmark * 1909bd670b35SErik Nordmark * The ip_fanout matching for multicast matches based on ilm independent of 1910bd670b35SErik Nordmark * zoneid since the zoneid restriction is applied when joining a multicast 1911bd670b35SErik Nordmark * group. 1912bd670b35SErik Nordmark */ 1913bd670b35SErik Nordmark /* ARGSUSED */ 1914bd670b35SErik Nordmark static void 1915bd670b35SErik Nordmark ip_input_multicast_v4(ire_t *ire, mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 1916bd670b35SErik Nordmark { 1917bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 1918bd670b35SErik Nordmark iaflags_t iraflags = ira->ira_flags; 1919bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 1920bd670b35SErik Nordmark netstack_t *ns = ipst->ips_netstack; 1921bd670b35SErik Nordmark zoneid_t zoneid; 1922bd670b35SErik Nordmark mblk_t *mp1; 1923bd670b35SErik Nordmark ipha_t *ipha1; 192444b099c4SSowmini Varadhan uint_t ira_pktlen = ira->ira_pktlen; 192544b099c4SSowmini Varadhan uint16_t ira_ip_hdr_length = ira->ira_ip_hdr_length; 1926bd670b35SErik Nordmark 1927bd670b35SErik Nordmark /* ire_recv_multicast has switched to the upper ill for IPMP */ 1928bd670b35SErik Nordmark ASSERT(!IS_UNDER_IPMP(ill)); 1929bd670b35SErik Nordmark 1930bd670b35SErik Nordmark /* 1931bd670b35SErik Nordmark * If we don't have more than one shared-IP zone, or if 1932bd670b35SErik Nordmark * there are no members in anything but the global zone, 1933bd670b35SErik Nordmark * then just set the zoneid and proceed. 1934bd670b35SErik Nordmark */ 1935bd670b35SErik Nordmark if (ns->netstack_numzones == 1 || 1936bd670b35SErik Nordmark !ill_hasmembers_otherzones_v4(ill, ipha->ipha_dst, 1937bd670b35SErik Nordmark GLOBAL_ZONEID)) { 1938bd670b35SErik Nordmark ira->ira_zoneid = GLOBAL_ZONEID; 1939bd670b35SErik Nordmark 1940bd670b35SErik Nordmark /* If sender didn't want this zone to receive it, drop */ 1941bd670b35SErik Nordmark if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1942bd670b35SErik Nordmark ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1943bd670b35SErik Nordmark ip_drop_input("Multicast but wrong zoneid", mp, ill); 1944bd670b35SErik Nordmark freemsg(mp); 1945bd670b35SErik Nordmark return; 1946bd670b35SErik Nordmark } 1947bd670b35SErik Nordmark ip_fanout_v4(mp, ipha, ira); 1948bd670b35SErik Nordmark return; 1949bd670b35SErik Nordmark } 1950bd670b35SErik Nordmark 1951bd670b35SErik Nordmark /* 1952bd670b35SErik Nordmark * Here we loop over all zoneids that have members in the group 1953bd670b35SErik Nordmark * and deliver a packet to ip_fanout for each zoneid. 1954bd670b35SErik Nordmark * 1955bd670b35SErik Nordmark * First find any members in the lowest numeric zoneid by looking for 1956bd670b35SErik Nordmark * first zoneid larger than -1 (ALL_ZONES). 1957bd670b35SErik Nordmark * We terminate the loop when we receive -1 (ALL_ZONES). 1958bd670b35SErik Nordmark */ 1959bd670b35SErik Nordmark zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, ALL_ZONES); 1960bd670b35SErik Nordmark for (; zoneid != ALL_ZONES; 1961bd670b35SErik Nordmark zoneid = ill_hasmembers_nextzone_v4(ill, ipha->ipha_dst, zoneid)) { 1962bd670b35SErik Nordmark /* 1963bd670b35SErik Nordmark * Avoid an extra copymsg/freemsg by skipping global zone here 1964bd670b35SErik Nordmark * and doing that at the end. 1965bd670b35SErik Nordmark */ 1966bd670b35SErik Nordmark if (zoneid == GLOBAL_ZONEID) 1967bd670b35SErik Nordmark continue; 1968bd670b35SErik Nordmark 1969bd670b35SErik Nordmark ira->ira_zoneid = zoneid; 1970bd670b35SErik Nordmark 1971bd670b35SErik Nordmark /* If sender didn't want this zone to receive it, skip */ 1972bd670b35SErik Nordmark if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1973bd670b35SErik Nordmark ira->ira_no_loop_zoneid == ira->ira_zoneid) 1974bd670b35SErik Nordmark continue; 1975bd670b35SErik Nordmark 1976bd670b35SErik Nordmark mp1 = copymsg(mp); 1977bd670b35SErik Nordmark if (mp1 == NULL) { 1978bd670b35SErik Nordmark /* Failed to deliver to one zone */ 1979bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 1980bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 1981bd670b35SErik Nordmark continue; 1982bd670b35SErik Nordmark } 1983bd670b35SErik Nordmark ipha1 = (ipha_t *)mp1->b_rptr; 1984bd670b35SErik Nordmark ip_fanout_v4(mp1, ipha1, ira); 198544b099c4SSowmini Varadhan /* 198644b099c4SSowmini Varadhan * IPsec might have modified ira_pktlen and ira_ip_hdr_length 198744b099c4SSowmini Varadhan * so we restore them for a potential next iteration 198844b099c4SSowmini Varadhan */ 198944b099c4SSowmini Varadhan ira->ira_pktlen = ira_pktlen; 199044b099c4SSowmini Varadhan ira->ira_ip_hdr_length = ira_ip_hdr_length; 1991bd670b35SErik Nordmark } 1992bd670b35SErik Nordmark 1993bd670b35SErik Nordmark /* Do the main ire */ 1994bd670b35SErik Nordmark ira->ira_zoneid = GLOBAL_ZONEID; 1995bd670b35SErik Nordmark /* If sender didn't want this zone to receive it, drop */ 1996bd670b35SErik Nordmark if ((iraflags & IRAF_NO_LOOP_ZONEID_SET) && 1997bd670b35SErik Nordmark ira->ira_no_loop_zoneid == ira->ira_zoneid) { 1998bd670b35SErik Nordmark ip_drop_input("Multicast but wrong zoneid", mp, ill); 1999bd670b35SErik Nordmark freemsg(mp); 2000bd670b35SErik Nordmark } else { 2001bd670b35SErik Nordmark ip_fanout_v4(mp, ipha, ira); 2002bd670b35SErik Nordmark } 2003bd670b35SErik Nordmark } 2004bd670b35SErik Nordmark 2005bd670b35SErik Nordmark 2006bd670b35SErik Nordmark /* 2007bd670b35SErik Nordmark * Determine the zoneid and IRAF_TX_* flags if trusted extensions 2008bd670b35SErik Nordmark * is in use. Updates ira_zoneid and ira_flags as a result. 2009bd670b35SErik Nordmark */ 2010bd670b35SErik Nordmark static void 2011bd670b35SErik Nordmark ip_fanout_tx_v4(mblk_t *mp, ipha_t *ipha, uint8_t protocol, 2012bd670b35SErik Nordmark uint_t ip_hdr_length, ip_recv_attr_t *ira) 2013bd670b35SErik Nordmark { 2014bd670b35SErik Nordmark uint16_t *up; 2015bd670b35SErik Nordmark uint16_t lport; 2016bd670b35SErik Nordmark zoneid_t zoneid; 2017bd670b35SErik Nordmark 2018bd670b35SErik Nordmark ASSERT(ira->ira_flags & IRAF_SYSTEM_LABELED); 2019bd670b35SErik Nordmark 2020bd670b35SErik Nordmark /* 2021bd670b35SErik Nordmark * If the packet is unlabeled we might allow read-down 2022bd670b35SErik Nordmark * for MAC_EXEMPT. Below we clear this if it is a multi-level 2023bd670b35SErik Nordmark * port (MLP). 2024bd670b35SErik Nordmark * Note that ira_tsl can be NULL here. 2025bd670b35SErik Nordmark */ 2026bd670b35SErik Nordmark if (ira->ira_tsl != NULL && ira->ira_tsl->tsl_flags & TSLF_UNLABELED) 2027bd670b35SErik Nordmark ira->ira_flags |= IRAF_TX_MAC_EXEMPTABLE; 2028bd670b35SErik Nordmark 2029bd670b35SErik Nordmark if (ira->ira_zoneid != ALL_ZONES) 2030bd670b35SErik Nordmark return; 2031bd670b35SErik Nordmark 2032bd670b35SErik Nordmark ira->ira_flags |= IRAF_TX_SHARED_ADDR; 2033bd670b35SErik Nordmark 2034bd670b35SErik Nordmark up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2035bd670b35SErik Nordmark switch (protocol) { 2036bd670b35SErik Nordmark case IPPROTO_TCP: 2037bd670b35SErik Nordmark case IPPROTO_SCTP: 2038bd670b35SErik Nordmark case IPPROTO_UDP: 2039bd670b35SErik Nordmark /* Caller ensures this */ 2040bd670b35SErik Nordmark ASSERT(((uchar_t *)ipha) + ip_hdr_length +4 <= mp->b_wptr); 2041bd670b35SErik Nordmark 2042bd670b35SErik Nordmark /* 2043bd670b35SErik Nordmark * Only these transports support MLP. 2044bd670b35SErik Nordmark * We know their destination port numbers is in 2045bd670b35SErik Nordmark * the same place in the header. 2046bd670b35SErik Nordmark */ 2047bd670b35SErik Nordmark lport = up[1]; 2048bd670b35SErik Nordmark 2049bd670b35SErik Nordmark /* 2050bd670b35SErik Nordmark * No need to handle exclusive-stack zones 2051bd670b35SErik Nordmark * since ALL_ZONES only applies to the shared IP instance. 2052bd670b35SErik Nordmark */ 2053bd670b35SErik Nordmark zoneid = tsol_mlp_findzone(protocol, lport); 2054bd670b35SErik Nordmark /* 2055bd670b35SErik Nordmark * If no shared MLP is found, tsol_mlp_findzone returns 2056bd670b35SErik Nordmark * ALL_ZONES. In that case, we assume it's SLP, and 2057bd670b35SErik Nordmark * search for the zone based on the packet label. 2058bd670b35SErik Nordmark * 2059bd670b35SErik Nordmark * If there is such a zone, we prefer to find a 2060bd670b35SErik Nordmark * connection in it. Otherwise, we look for a 2061bd670b35SErik Nordmark * MAC-exempt connection in any zone whose label 2062bd670b35SErik Nordmark * dominates the default label on the packet. 2063bd670b35SErik Nordmark */ 2064bd670b35SErik Nordmark if (zoneid == ALL_ZONES) 2065bd670b35SErik Nordmark zoneid = tsol_attr_to_zoneid(ira); 2066bd670b35SErik Nordmark else 2067bd670b35SErik Nordmark ira->ira_flags &= ~IRAF_TX_MAC_EXEMPTABLE; 2068bd670b35SErik Nordmark break; 2069bd670b35SErik Nordmark default: 2070bd670b35SErik Nordmark /* Handle shared address for other protocols */ 2071bd670b35SErik Nordmark zoneid = tsol_attr_to_zoneid(ira); 2072bd670b35SErik Nordmark break; 2073bd670b35SErik Nordmark } 2074bd670b35SErik Nordmark ira->ira_zoneid = zoneid; 2075bd670b35SErik Nordmark } 2076bd670b35SErik Nordmark 2077bd670b35SErik Nordmark /* 2078bd670b35SErik Nordmark * Increment checksum failure statistics 2079bd670b35SErik Nordmark */ 2080bd670b35SErik Nordmark static void 2081bd670b35SErik Nordmark ip_input_cksum_err_v4(uint8_t protocol, uint16_t hck_flags, ill_t *ill) 2082bd670b35SErik Nordmark { 2083bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 2084bd670b35SErik Nordmark 2085bd670b35SErik Nordmark switch (protocol) { 2086bd670b35SErik Nordmark case IPPROTO_TCP: 2087bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, tcpIfStatsInErrs); 2088bd670b35SErik Nordmark 2089bd670b35SErik Nordmark if (hck_flags & HCK_FULLCKSUM) 2090bd670b35SErik Nordmark IP_STAT(ipst, ip_tcp_in_full_hw_cksum_err); 2091bd670b35SErik Nordmark else if (hck_flags & HCK_PARTIALCKSUM) 2092bd670b35SErik Nordmark IP_STAT(ipst, ip_tcp_in_part_hw_cksum_err); 2093bd670b35SErik Nordmark else 2094bd670b35SErik Nordmark IP_STAT(ipst, ip_tcp_in_sw_cksum_err); 2095bd670b35SErik Nordmark break; 2096bd670b35SErik Nordmark case IPPROTO_UDP: 2097bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, udpIfStatsInCksumErrs); 2098bd670b35SErik Nordmark if (hck_flags & HCK_FULLCKSUM) 2099bd670b35SErik Nordmark IP_STAT(ipst, ip_udp_in_full_hw_cksum_err); 2100bd670b35SErik Nordmark else if (hck_flags & HCK_PARTIALCKSUM) 2101bd670b35SErik Nordmark IP_STAT(ipst, ip_udp_in_part_hw_cksum_err); 2102bd670b35SErik Nordmark else 2103bd670b35SErik Nordmark IP_STAT(ipst, ip_udp_in_sw_cksum_err); 2104bd670b35SErik Nordmark break; 2105bd670b35SErik Nordmark case IPPROTO_ICMP: 2106bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2107bd670b35SErik Nordmark break; 2108bd670b35SErik Nordmark default: 2109bd670b35SErik Nordmark ASSERT(0); 2110bd670b35SErik Nordmark break; 2111bd670b35SErik Nordmark } 2112bd670b35SErik Nordmark } 2113bd670b35SErik Nordmark 2114bd670b35SErik Nordmark /* Calculate the IPv4 pseudo-header checksum */ 2115bd670b35SErik Nordmark uint32_t 2116bd670b35SErik Nordmark ip_input_cksum_pseudo_v4(ipha_t *ipha, ip_recv_attr_t *ira) 2117bd670b35SErik Nordmark { 2118bd670b35SErik Nordmark uint_t ulp_len; 2119bd670b35SErik Nordmark uint32_t cksum; 2120bd670b35SErik Nordmark uint8_t protocol = ira->ira_protocol; 2121bd670b35SErik Nordmark uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2122bd670b35SErik Nordmark 2123bd670b35SErik Nordmark #define iphs ((uint16_t *)ipha) 2124bd670b35SErik Nordmark 2125bd670b35SErik Nordmark switch (protocol) { 2126bd670b35SErik Nordmark case IPPROTO_TCP: 2127bd670b35SErik Nordmark ulp_len = ira->ira_pktlen - ip_hdr_length; 2128bd670b35SErik Nordmark 2129bd670b35SErik Nordmark /* Protocol and length */ 2130bd670b35SErik Nordmark cksum = htons(ulp_len) + IP_TCP_CSUM_COMP; 2131bd670b35SErik Nordmark /* IP addresses */ 2132bd670b35SErik Nordmark cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2133bd670b35SErik Nordmark break; 2134bd670b35SErik Nordmark 2135bd670b35SErik Nordmark case IPPROTO_UDP: { 2136bd670b35SErik Nordmark udpha_t *udpha; 2137bd670b35SErik Nordmark 2138bd670b35SErik Nordmark udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2139bd670b35SErik Nordmark 2140bd670b35SErik Nordmark /* Protocol and length */ 2141bd670b35SErik Nordmark cksum = udpha->uha_length + IP_UDP_CSUM_COMP; 2142bd670b35SErik Nordmark /* IP addresses */ 2143bd670b35SErik Nordmark cksum += iphs[6] + iphs[7] + iphs[8] + iphs[9]; 2144bd670b35SErik Nordmark break; 2145bd670b35SErik Nordmark } 2146bd670b35SErik Nordmark 2147bd670b35SErik Nordmark default: 2148bd670b35SErik Nordmark cksum = 0; 2149bd670b35SErik Nordmark break; 2150bd670b35SErik Nordmark } 2151bd670b35SErik Nordmark #undef iphs 2152bd670b35SErik Nordmark return (cksum); 2153bd670b35SErik Nordmark } 2154bd670b35SErik Nordmark 2155bd670b35SErik Nordmark 2156bd670b35SErik Nordmark /* 2157bd670b35SErik Nordmark * Software verification of the ULP checksums. 2158bd670b35SErik Nordmark * Returns B_TRUE if ok. 2159bd670b35SErik Nordmark * Increments statistics of failed. 2160bd670b35SErik Nordmark */ 2161bd670b35SErik Nordmark static boolean_t 2162bd670b35SErik Nordmark ip_input_sw_cksum_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2163bd670b35SErik Nordmark { 2164bd670b35SErik Nordmark ip_stack_t *ipst = ira->ira_ill->ill_ipst; 2165bd670b35SErik Nordmark uint32_t cksum; 2166bd670b35SErik Nordmark uint8_t protocol = ira->ira_protocol; 2167bd670b35SErik Nordmark uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2168bd670b35SErik Nordmark 2169bd670b35SErik Nordmark IP_STAT(ipst, ip_in_sw_cksum); 2170bd670b35SErik Nordmark 2171bd670b35SErik Nordmark ASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP); 2172bd670b35SErik Nordmark 2173bd670b35SErik Nordmark cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2174bd670b35SErik Nordmark cksum = IP_CSUM(mp, ip_hdr_length, cksum); 2175bd670b35SErik Nordmark if (cksum == 0) 2176bd670b35SErik Nordmark return (B_TRUE); 2177bd670b35SErik Nordmark 2178bd670b35SErik Nordmark ip_input_cksum_err_v4(protocol, 0, ira->ira_ill); 2179bd670b35SErik Nordmark return (B_FALSE); 2180bd670b35SErik Nordmark } 2181bd670b35SErik Nordmark 2182bd670b35SErik Nordmark /* 2183bd670b35SErik Nordmark * Verify the ULP checksums. 2184bd670b35SErik Nordmark * Returns B_TRUE if ok, or if the ULP doesn't have a well-defined checksum 2185bd670b35SErik Nordmark * algorithm. 2186bd670b35SErik Nordmark * Increments statistics if failed. 2187bd670b35SErik Nordmark */ 2188bd670b35SErik Nordmark static boolean_t 2189bd670b35SErik Nordmark ip_input_cksum_v4(iaflags_t iraflags, mblk_t *mp, ipha_t *ipha, 2190bd670b35SErik Nordmark ip_recv_attr_t *ira) 2191bd670b35SErik Nordmark { 2192bd670b35SErik Nordmark ill_t *ill = ira->ira_rill; 2193bd670b35SErik Nordmark uint16_t hck_flags; 2194bd670b35SErik Nordmark uint32_t cksum; 2195bd670b35SErik Nordmark mblk_t *mp1; 2196bd670b35SErik Nordmark int32_t len; 2197bd670b35SErik Nordmark uint8_t protocol = ira->ira_protocol; 2198bd670b35SErik Nordmark uint16_t ip_hdr_length = ira->ira_ip_hdr_length; 2199bd670b35SErik Nordmark 2200bd670b35SErik Nordmark 2201bd670b35SErik Nordmark switch (protocol) { 2202bd670b35SErik Nordmark case IPPROTO_TCP: 2203bd670b35SErik Nordmark break; 2204bd670b35SErik Nordmark 2205bd670b35SErik Nordmark case IPPROTO_UDP: { 2206bd670b35SErik Nordmark udpha_t *udpha; 2207bd670b35SErik Nordmark 2208bd670b35SErik Nordmark udpha = (udpha_t *)((uchar_t *)ipha + ip_hdr_length); 2209bd670b35SErik Nordmark if (udpha->uha_checksum == 0) { 2210bd670b35SErik Nordmark /* Packet doesn't have a UDP checksum */ 2211bd670b35SErik Nordmark return (B_TRUE); 2212bd670b35SErik Nordmark } 2213bd670b35SErik Nordmark break; 2214bd670b35SErik Nordmark } 2215bd670b35SErik Nordmark case IPPROTO_SCTP: { 2216bd670b35SErik Nordmark sctp_hdr_t *sctph; 2217bd670b35SErik Nordmark uint32_t pktsum; 2218bd670b35SErik Nordmark 2219bd670b35SErik Nordmark sctph = (sctp_hdr_t *)((uchar_t *)ipha + ip_hdr_length); 2220bd670b35SErik Nordmark #ifdef DEBUG 2221bd670b35SErik Nordmark if (skip_sctp_cksum) 2222bd670b35SErik Nordmark return (B_TRUE); 2223bd670b35SErik Nordmark #endif 2224bd670b35SErik Nordmark pktsum = sctph->sh_chksum; 2225bd670b35SErik Nordmark sctph->sh_chksum = 0; 2226bd670b35SErik Nordmark cksum = sctp_cksum(mp, ip_hdr_length); 2227bd670b35SErik Nordmark sctph->sh_chksum = pktsum; 2228bd670b35SErik Nordmark if (cksum == pktsum) 2229bd670b35SErik Nordmark return (B_TRUE); 2230bd670b35SErik Nordmark 2231bd670b35SErik Nordmark /* 2232bd670b35SErik Nordmark * Defer until later whether a bad checksum is ok 2233bd670b35SErik Nordmark * in order to allow RAW sockets to use Adler checksum 2234bd670b35SErik Nordmark * with SCTP. 2235bd670b35SErik Nordmark */ 2236bd670b35SErik Nordmark ira->ira_flags |= IRAF_SCTP_CSUM_ERR; 2237bd670b35SErik Nordmark return (B_TRUE); 2238bd670b35SErik Nordmark } 2239bd670b35SErik Nordmark 2240bd670b35SErik Nordmark default: 2241bd670b35SErik Nordmark /* No ULP checksum to verify. */ 2242bd670b35SErik Nordmark return (B_TRUE); 2243bd670b35SErik Nordmark } 2244bd670b35SErik Nordmark /* 2245bd670b35SErik Nordmark * Revert to software checksum calculation if the interface 2246bd670b35SErik Nordmark * isn't capable of checksum offload. 2247bd670b35SErik Nordmark * We clear DB_CKSUMFLAGS when going through IPsec in ip_fanout. 2248bd670b35SErik Nordmark * Note: IRAF_NO_HW_CKSUM is not currently used. 2249bd670b35SErik Nordmark */ 2250bd670b35SErik Nordmark ASSERT(!IS_IPMP(ill)); 2251bd670b35SErik Nordmark if ((iraflags & IRAF_NO_HW_CKSUM) || !ILL_HCKSUM_CAPABLE(ill) || 2252bd670b35SErik Nordmark !dohwcksum) { 2253bd670b35SErik Nordmark return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2254bd670b35SErik Nordmark } 2255bd670b35SErik Nordmark 2256bd670b35SErik Nordmark /* 2257bd670b35SErik Nordmark * We apply this for all ULP protocols. Does the HW know to 2258bd670b35SErik Nordmark * not set the flags for SCTP and other protocols. 2259bd670b35SErik Nordmark */ 2260bd670b35SErik Nordmark 2261bd670b35SErik Nordmark hck_flags = DB_CKSUMFLAGS(mp); 2262bd670b35SErik Nordmark 22630dc2366fSVenugopal Iyer if (hck_flags & HCK_FULLCKSUM_OK) { 22640dc2366fSVenugopal Iyer /* 22650dc2366fSVenugopal Iyer * Hardware has already verified the checksum. 22660dc2366fSVenugopal Iyer */ 22670dc2366fSVenugopal Iyer return (B_TRUE); 22680dc2366fSVenugopal Iyer } 22690dc2366fSVenugopal Iyer 2270bd670b35SErik Nordmark if (hck_flags & HCK_FULLCKSUM) { 2271bd670b35SErik Nordmark /* 2272bd670b35SErik Nordmark * Full checksum has been computed by the hardware 2273bd670b35SErik Nordmark * and has been attached. If the driver wants us to 2274bd670b35SErik Nordmark * verify the correctness of the attached value, in 2275bd670b35SErik Nordmark * order to protect against faulty hardware, compare 2276bd670b35SErik Nordmark * it against -0 (0xFFFF) to see if it's valid. 2277bd670b35SErik Nordmark */ 2278bd670b35SErik Nordmark cksum = DB_CKSUM16(mp); 2279bd670b35SErik Nordmark if (cksum == 0xFFFF) 2280bd670b35SErik Nordmark return (B_TRUE); 2281bd670b35SErik Nordmark ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2282bd670b35SErik Nordmark return (B_FALSE); 2283bd670b35SErik Nordmark } 2284bd670b35SErik Nordmark 2285bd670b35SErik Nordmark mp1 = mp->b_cont; 2286bd670b35SErik Nordmark if ((hck_flags & HCK_PARTIALCKSUM) && 2287bd670b35SErik Nordmark (mp1 == NULL || mp1->b_cont == NULL) && 2288bd670b35SErik Nordmark ip_hdr_length >= DB_CKSUMSTART(mp) && 2289bd670b35SErik Nordmark ((len = ip_hdr_length - DB_CKSUMSTART(mp)) & 1) == 0) { 2290bd670b35SErik Nordmark uint32_t adj; 2291bd670b35SErik Nordmark uchar_t *cksum_start; 2292bd670b35SErik Nordmark 2293bd670b35SErik Nordmark cksum = ip_input_cksum_pseudo_v4(ipha, ira); 2294bd670b35SErik Nordmark 2295bd670b35SErik Nordmark cksum_start = ((uchar_t *)ipha + DB_CKSUMSTART(mp)); 2296bd670b35SErik Nordmark 2297bd670b35SErik Nordmark /* 2298bd670b35SErik Nordmark * Partial checksum has been calculated by hardware 2299bd670b35SErik Nordmark * and attached to the packet; in addition, any 2300bd670b35SErik Nordmark * prepended extraneous data is even byte aligned, 2301bd670b35SErik Nordmark * and there are at most two mblks associated with 2302bd670b35SErik Nordmark * the packet. If any such data exists, we adjust 2303bd670b35SErik Nordmark * the checksum; also take care any postpended data. 2304bd670b35SErik Nordmark */ 2305bd670b35SErik Nordmark IP_ADJCKSUM_PARTIAL(cksum_start, mp, mp1, len, adj); 2306bd670b35SErik Nordmark /* 2307bd670b35SErik Nordmark * One's complement subtract extraneous checksum 2308bd670b35SErik Nordmark */ 2309bd670b35SErik Nordmark cksum += DB_CKSUM16(mp); 2310bd670b35SErik Nordmark if (adj >= cksum) 2311bd670b35SErik Nordmark cksum = ~(adj - cksum) & 0xFFFF; 2312bd670b35SErik Nordmark else 2313bd670b35SErik Nordmark cksum -= adj; 2314bd670b35SErik Nordmark cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2315bd670b35SErik Nordmark cksum = (cksum & 0xFFFF) + ((int)cksum >> 16); 2316bd670b35SErik Nordmark if (!(~cksum & 0xFFFF)) 2317bd670b35SErik Nordmark return (B_TRUE); 2318bd670b35SErik Nordmark 2319bd670b35SErik Nordmark ip_input_cksum_err_v4(protocol, hck_flags, ira->ira_ill); 2320bd670b35SErik Nordmark return (B_FALSE); 2321bd670b35SErik Nordmark } 2322bd670b35SErik Nordmark return (ip_input_sw_cksum_v4(mp, ipha, ira)); 2323bd670b35SErik Nordmark } 2324bd670b35SErik Nordmark 2325bd670b35SErik Nordmark 2326bd670b35SErik Nordmark /* 2327bd670b35SErik Nordmark * Handle fanout of received packets. 2328bd670b35SErik Nordmark * Unicast packets that are looped back (from ire_send_local_v4) and packets 2329bd670b35SErik Nordmark * from the wire are differentiated by checking IRAF_VERIFY_ULP_CKSUM. 2330bd670b35SErik Nordmark * 2331bd670b35SErik Nordmark * IPQoS Notes 2332bd670b35SErik Nordmark * Before sending it to the client, invoke IPPF processing. Policy processing 2333bd670b35SErik Nordmark * takes place only if the callout_position, IPP_LOCAL_IN, is enabled. 2334bd670b35SErik Nordmark */ 2335bd670b35SErik Nordmark void 2336bd670b35SErik Nordmark ip_fanout_v4(mblk_t *mp, ipha_t *ipha, ip_recv_attr_t *ira) 2337bd670b35SErik Nordmark { 2338bd670b35SErik Nordmark ill_t *ill = ira->ira_ill; 2339bd670b35SErik Nordmark iaflags_t iraflags = ira->ira_flags; 2340bd670b35SErik Nordmark ip_stack_t *ipst = ill->ill_ipst; 2341bd670b35SErik Nordmark uint8_t protocol = ipha->ipha_protocol; 2342bd670b35SErik Nordmark conn_t *connp; 2343bd670b35SErik Nordmark #define rptr ((uchar_t *)ipha) 2344bd670b35SErik Nordmark uint_t ip_hdr_length; 2345bd670b35SErik Nordmark uint_t min_ulp_header_length; 2346bd670b35SErik Nordmark int offset; 2347bd670b35SErik Nordmark ssize_t len; 2348bd670b35SErik Nordmark netstack_t *ns = ipst->ips_netstack; 2349bd670b35SErik Nordmark ipsec_stack_t *ipss = ns->netstack_ipsec; 2350bd670b35SErik Nordmark ill_t *rill = ira->ira_rill; 2351bd670b35SErik Nordmark 2352bd670b35SErik Nordmark ASSERT(ira->ira_pktlen == ntohs(ipha->ipha_length)); 2353bd670b35SErik Nordmark 2354bd670b35SErik Nordmark ip_hdr_length = ira->ira_ip_hdr_length; 2355bd670b35SErik Nordmark ira->ira_protocol = protocol; 2356bd670b35SErik Nordmark 2357bd670b35SErik Nordmark /* 2358bd670b35SErik Nordmark * Time for IPP once we've done reassembly and IPsec. 2359bd670b35SErik Nordmark * We skip this for loopback packets since we don't do IPQoS 2360bd670b35SErik Nordmark * on loopback. 2361bd670b35SErik Nordmark */ 2362bd670b35SErik Nordmark if (IPP_ENABLED(IPP_LOCAL_IN, ipst) && 2363bd670b35SErik Nordmark !(iraflags & IRAF_LOOPBACK) && 2364bd670b35SErik Nordmark (protocol != IPPROTO_ESP || protocol != IPPROTO_AH)) { 2365bd670b35SErik Nordmark /* 2366bd670b35SErik Nordmark * Use the interface on which the packet arrived - not where 2367bd670b35SErik Nordmark * the IP address is hosted. 2368bd670b35SErik Nordmark */ 2369bd670b35SErik Nordmark /* ip_process translates an IS_UNDER_IPMP */ 2370bd670b35SErik Nordmark mp = ip_process(IPP_LOCAL_IN, mp, rill, ill); 2371bd670b35SErik Nordmark if (mp == NULL) { 2372bd670b35SErik Nordmark /* ip_drop_packet and MIB done */ 2373bd670b35SErik Nordmark return; 2374bd670b35SErik Nordmark } 2375bd670b35SErik Nordmark } 2376bd670b35SErik Nordmark 2377bd670b35SErik Nordmark /* Determine the minimum required size of the upper-layer header */ 2378bd670b35SErik Nordmark /* Need to do this for at least the set of ULPs that TX handles. */ 2379bd670b35SErik Nordmark switch (protocol) { 2380bd670b35SErik Nordmark case IPPROTO_TCP: 2381bd670b35SErik Nordmark min_ulp_header_length = TCP_MIN_HEADER_LENGTH; 2382bd670b35SErik Nordmark break; 2383bd670b35SErik Nordmark case IPPROTO_SCTP: 2384bd670b35SErik Nordmark min_ulp_header_length = SCTP_COMMON_HDR_LENGTH; 2385bd670b35SErik Nordmark break; 2386bd670b35SErik Nordmark case IPPROTO_UDP: 2387bd670b35SErik Nordmark min_ulp_header_length = UDPH_SIZE; 2388bd670b35SErik Nordmark break; 2389bd670b35SErik Nordmark case IPPROTO_ICMP: 2390bd670b35SErik Nordmark min_ulp_header_length = ICMPH_SIZE; 2391bd670b35SErik Nordmark break; 2392bd670b35SErik Nordmark default: 2393bd670b35SErik Nordmark min_ulp_header_length = 0; 2394bd670b35SErik Nordmark break; 2395bd670b35SErik Nordmark } 2396bd670b35SErik Nordmark /* Make sure we have the min ULP header length */ 2397bd670b35SErik Nordmark len = mp->b_wptr - rptr; 2398bd670b35SErik Nordmark if (len < ip_hdr_length + min_ulp_header_length) { 2399bd670b35SErik Nordmark if (ira->ira_pktlen < ip_hdr_length + min_ulp_header_length) { 2400bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts); 2401bd670b35SErik Nordmark ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill); 2402bd670b35SErik Nordmark freemsg(mp); 2403bd670b35SErik Nordmark return; 2404bd670b35SErik Nordmark } 2405bd670b35SErik Nordmark IP_STAT(ipst, ip_recv_pullup); 2406bd670b35SErik Nordmark ipha = ip_pullup(mp, ip_hdr_length + min_ulp_header_length, 2407bd670b35SErik Nordmark ira); 2408bd670b35SErik Nordmark if (ipha == NULL) 2409bd670b35SErik Nordmark goto discard; 2410bd670b35SErik Nordmark len = mp->b_wptr - rptr; 2411bd670b35SErik Nordmark } 2412bd670b35SErik Nordmark 2413bd670b35SErik Nordmark /* 2414bd670b35SErik Nordmark * If trusted extensions then determine the zoneid and TX specific 2415bd670b35SErik Nordmark * ira_flags. 2416bd670b35SErik Nordmark */ 2417bd670b35SErik Nordmark if (iraflags & IRAF_SYSTEM_LABELED) { 2418bd670b35SErik Nordmark /* This can update ira->ira_flags and ira->ira_zoneid */ 2419bd670b35SErik Nordmark ip_fanout_tx_v4(mp, ipha, protocol, ip_hdr_length, ira); 2420bd670b35SErik Nordmark iraflags = ira->ira_flags; 2421bd670b35SErik Nordmark } 2422bd670b35SErik Nordmark 2423bd670b35SErik Nordmark 2424bd670b35SErik Nordmark /* Verify ULP checksum. Handles TCP, UDP, and SCTP */ 2425bd670b35SErik Nordmark if (iraflags & IRAF_VERIFY_ULP_CKSUM) { 2426bd670b35SErik Nordmark if (!ip_input_cksum_v4(iraflags, mp, ipha, ira)) { 2427bd670b35SErik Nordmark /* Bad checksum. Stats are already incremented */ 2428bd670b35SErik Nordmark ip_drop_input("Bad ULP checksum", mp, ill); 2429bd670b35SErik Nordmark freemsg(mp); 2430bd670b35SErik Nordmark return; 2431bd670b35SErik Nordmark } 2432bd670b35SErik Nordmark /* IRAF_SCTP_CSUM_ERR could have been set */ 2433bd670b35SErik Nordmark iraflags = ira->ira_flags; 2434bd670b35SErik Nordmark } 2435bd670b35SErik Nordmark switch (protocol) { 2436bd670b35SErik Nordmark case IPPROTO_TCP: 2437bd670b35SErik Nordmark /* For TCP, discard broadcast and multicast packets. */ 2438bd670b35SErik Nordmark if (iraflags & IRAF_MULTIBROADCAST) 2439bd670b35SErik Nordmark goto discard; 2440bd670b35SErik Nordmark 2441bd670b35SErik Nordmark /* First mblk contains IP+TCP headers per above check */ 2442bd670b35SErik Nordmark ASSERT(len >= ip_hdr_length + TCP_MIN_HEADER_LENGTH); 2443bd670b35SErik Nordmark 2444bd670b35SErik Nordmark /* TCP options present? */ 2445bd670b35SErik Nordmark offset = ((uchar_t *)ipha)[ip_hdr_length + 12] >> 4; 2446bd670b35SErik Nordmark if (offset != 5) { 2447bd670b35SErik Nordmark if (offset < 5) 2448bd670b35SErik Nordmark goto discard; 2449bd670b35SErik Nordmark 2450bd670b35SErik Nordmark /* 2451bd670b35SErik Nordmark * There must be TCP options. 2452bd670b35SErik Nordmark * Make sure we can grab them. 2453bd670b35SErik Nordmark */ 2454bd670b35SErik Nordmark offset <<= 2; 2455bd670b35SErik Nordmark offset += ip_hdr_length; 2456bd670b35SErik Nordmark if (len < offset) { 2457bd670b35SErik Nordmark if (ira->ira_pktlen < offset) { 2458bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 2459bd670b35SErik Nordmark ipIfStatsInTruncatedPkts); 2460bd670b35SErik Nordmark ip_drop_input( 2461bd670b35SErik Nordmark "ipIfStatsInTruncatedPkts", 2462bd670b35SErik Nordmark mp, ill); 2463bd670b35SErik Nordmark freemsg(mp); 2464bd670b35SErik Nordmark return; 2465bd670b35SErik Nordmark } 2466bd670b35SErik Nordmark IP_STAT(ipst, ip_recv_pullup); 2467bd670b35SErik Nordmark ipha = ip_pullup(mp, offset, ira); 2468bd670b35SErik Nordmark if (ipha == NULL) 2469bd670b35SErik Nordmark goto discard; 2470bd670b35SErik Nordmark len = mp->b_wptr - rptr; 2471bd670b35SErik Nordmark } 2472bd670b35SErik Nordmark } 2473bd670b35SErik Nordmark 2474bd670b35SErik Nordmark /* 2475bd670b35SErik Nordmark * Pass up a squeue hint to tcp. 2476bd670b35SErik Nordmark * If ira_sqp is already set (this is loopback) we leave it 2477bd670b35SErik Nordmark * alone. 2478bd670b35SErik Nordmark */ 2479bd670b35SErik Nordmark if (ira->ira_sqp == NULL) { 2480bd670b35SErik Nordmark ira->ira_sqp = ip_squeue_get(ira->ira_ring); 2481bd670b35SErik Nordmark } 2482bd670b35SErik Nordmark 2483bd670b35SErik Nordmark /* Look for AF_INET or AF_INET6 that matches */ 2484bd670b35SErik Nordmark connp = ipcl_classify_v4(mp, IPPROTO_TCP, ip_hdr_length, 2485bd670b35SErik Nordmark ira, ipst); 2486bd670b35SErik Nordmark if (connp == NULL) { 2487bd670b35SErik Nordmark /* Send the TH_RST */ 2488bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2489bd670b35SErik Nordmark tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2490bd670b35SErik Nordmark return; 2491bd670b35SErik Nordmark } 2492bd670b35SErik Nordmark if (connp->conn_incoming_ifindex != 0 && 2493bd670b35SErik Nordmark connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2494bd670b35SErik Nordmark CONN_DEC_REF(connp); 2495bd670b35SErik Nordmark 2496bd670b35SErik Nordmark /* Send the TH_RST */ 2497bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2498bd670b35SErik Nordmark tcp_xmit_listeners_reset(mp, ira, ipst, NULL); 2499bd670b35SErik Nordmark return; 2500bd670b35SErik Nordmark } 2501bd670b35SErik Nordmark if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2502bd670b35SErik Nordmark (iraflags & IRAF_IPSEC_SECURE)) { 2503bd670b35SErik Nordmark mp = ipsec_check_inbound_policy(mp, connp, 2504bd670b35SErik Nordmark ipha, NULL, ira); 2505bd670b35SErik Nordmark if (mp == NULL) { 2506bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2507bd670b35SErik Nordmark /* Note that mp is NULL */ 2508bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 2509bd670b35SErik Nordmark CONN_DEC_REF(connp); 2510bd670b35SErik Nordmark return; 2511bd670b35SErik Nordmark } 2512bd670b35SErik Nordmark } 2513bd670b35SErik Nordmark /* Found a client; up it goes */ 2514bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2515bd670b35SErik Nordmark ira->ira_ill = ira->ira_rill = NULL; 2516bd670b35SErik Nordmark if (!IPCL_IS_TCP(connp)) { 2517bd670b35SErik Nordmark /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */ 2518bd670b35SErik Nordmark (connp->conn_recv)(connp, mp, NULL, ira); 2519bd670b35SErik Nordmark CONN_DEC_REF(connp); 2520bd670b35SErik Nordmark ira->ira_ill = ill; 2521bd670b35SErik Nordmark ira->ira_rill = rill; 2522bd670b35SErik Nordmark return; 2523bd670b35SErik Nordmark } 2524bd670b35SErik Nordmark 2525bd670b35SErik Nordmark /* 2526bd670b35SErik Nordmark * We do different processing whether called from 2527bd670b35SErik Nordmark * ip_accept_tcp and we match the target, don't match 2528bd670b35SErik Nordmark * the target, and when we are called by ip_input. 2529bd670b35SErik Nordmark */ 2530bd670b35SErik Nordmark if (iraflags & IRAF_TARGET_SQP) { 2531bd670b35SErik Nordmark if (ira->ira_target_sqp == connp->conn_sqp) { 2532bd670b35SErik Nordmark mblk_t *attrmp; 2533bd670b35SErik Nordmark 2534bd670b35SErik Nordmark attrmp = ip_recv_attr_to_mblk(ira); 2535bd670b35SErik Nordmark if (attrmp == NULL) { 2536bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 2537bd670b35SErik Nordmark ipIfStatsInDiscards); 2538bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", 2539bd670b35SErik Nordmark mp, ill); 2540bd670b35SErik Nordmark freemsg(mp); 2541bd670b35SErik Nordmark CONN_DEC_REF(connp); 2542bd670b35SErik Nordmark } else { 2543bd670b35SErik Nordmark SET_SQUEUE(attrmp, connp->conn_recv, 2544bd670b35SErik Nordmark connp); 2545bd670b35SErik Nordmark attrmp->b_cont = mp; 2546bd670b35SErik Nordmark ASSERT(ira->ira_target_sqp_mp == NULL); 2547bd670b35SErik Nordmark ira->ira_target_sqp_mp = attrmp; 2548bd670b35SErik Nordmark /* 2549bd670b35SErik Nordmark * Conn ref release when drained from 2550bd670b35SErik Nordmark * the squeue. 2551bd670b35SErik Nordmark */ 2552bd670b35SErik Nordmark } 2553bd670b35SErik Nordmark } else { 2554bd670b35SErik Nordmark SQUEUE_ENTER_ONE(connp->conn_sqp, mp, 2555bd670b35SErik Nordmark connp->conn_recv, connp, ira, SQ_FILL, 2556bd670b35SErik Nordmark SQTAG_IP_TCP_INPUT); 2557bd670b35SErik Nordmark } 2558bd670b35SErik Nordmark } else { 2559bd670b35SErik Nordmark SQUEUE_ENTER_ONE(connp->conn_sqp, mp, connp->conn_recv, 2560bd670b35SErik Nordmark connp, ira, ip_squeue_flag, SQTAG_IP_TCP_INPUT); 2561bd670b35SErik Nordmark } 2562bd670b35SErik Nordmark ira->ira_ill = ill; 2563bd670b35SErik Nordmark ira->ira_rill = rill; 2564bd670b35SErik Nordmark return; 2565bd670b35SErik Nordmark 2566bd670b35SErik Nordmark case IPPROTO_SCTP: { 2567bd670b35SErik Nordmark sctp_hdr_t *sctph; 2568bd670b35SErik Nordmark in6_addr_t map_src, map_dst; 2569bd670b35SErik Nordmark uint32_t ports; /* Source and destination ports */ 2570bd670b35SErik Nordmark sctp_stack_t *sctps = ipst->ips_netstack->netstack_sctp; 2571bd670b35SErik Nordmark 2572bd670b35SErik Nordmark /* For SCTP, discard broadcast and multicast packets. */ 2573bd670b35SErik Nordmark if (iraflags & IRAF_MULTIBROADCAST) 2574bd670b35SErik Nordmark goto discard; 2575bd670b35SErik Nordmark 2576bd670b35SErik Nordmark /* 2577bd670b35SErik Nordmark * Since there is no SCTP h/w cksum support yet, just 2578bd670b35SErik Nordmark * clear the flag. 2579bd670b35SErik Nordmark */ 2580bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) = 0; 2581bd670b35SErik Nordmark 2582bd670b35SErik Nordmark /* Length ensured above */ 2583bd670b35SErik Nordmark ASSERT(MBLKL(mp) >= ip_hdr_length + SCTP_COMMON_HDR_LENGTH); 2584bd670b35SErik Nordmark sctph = (sctp_hdr_t *)(rptr + ip_hdr_length); 2585bd670b35SErik Nordmark 2586bd670b35SErik Nordmark /* get the ports */ 2587bd670b35SErik Nordmark ports = *(uint32_t *)&sctph->sh_sport; 2588bd670b35SErik Nordmark 2589bd670b35SErik Nordmark IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst); 2590bd670b35SErik Nordmark IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src); 2591bd670b35SErik Nordmark if (iraflags & IRAF_SCTP_CSUM_ERR) { 2592bd670b35SErik Nordmark /* 2593bd670b35SErik Nordmark * No potential sctp checksum errors go to the Sun 2594bd670b35SErik Nordmark * sctp stack however they might be Adler-32 summed 2595bd670b35SErik Nordmark * packets a userland stack bound to a raw IP socket 2596bd670b35SErik Nordmark * could reasonably use. Note though that Adler-32 is 2597bd670b35SErik Nordmark * a long deprecated algorithm and customer sctp 2598bd670b35SErik Nordmark * networks should eventually migrate to CRC-32 at 2599bd670b35SErik Nordmark * which time this facility should be removed. 2600bd670b35SErik Nordmark */ 2601bd670b35SErik Nordmark ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2602bd670b35SErik Nordmark return; 2603bd670b35SErik Nordmark } 2604a5407c02SAnil udupa connp = sctp_fanout(&map_src, &map_dst, ports, ira, mp, 2605a5407c02SAnil udupa sctps, sctph); 2606bd670b35SErik Nordmark if (connp == NULL) { 2607bd670b35SErik Nordmark /* Check for raw socket or OOTB handling */ 2608bd670b35SErik Nordmark ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2609bd670b35SErik Nordmark return; 2610bd670b35SErik Nordmark } 2611bd670b35SErik Nordmark if (connp->conn_incoming_ifindex != 0 && 2612bd670b35SErik Nordmark connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2613bd670b35SErik Nordmark CONN_DEC_REF(connp); 2614bd670b35SErik Nordmark /* Check for raw socket or OOTB handling */ 2615bd670b35SErik Nordmark ip_fanout_sctp_raw(mp, ipha, NULL, ports, ira); 2616bd670b35SErik Nordmark return; 2617bd670b35SErik Nordmark } 2618bd670b35SErik Nordmark 2619bd670b35SErik Nordmark /* Found a client; up it goes */ 2620bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2621bd670b35SErik Nordmark sctp_input(connp, ipha, NULL, mp, ira); 2622bd670b35SErik Nordmark /* sctp_input does a rele of the sctp_t */ 2623bd670b35SErik Nordmark return; 2624bd670b35SErik Nordmark } 2625bd670b35SErik Nordmark 2626bd670b35SErik Nordmark case IPPROTO_UDP: 2627bd670b35SErik Nordmark /* First mblk contains IP+UDP headers as checked above */ 2628bd670b35SErik Nordmark ASSERT(MBLKL(mp) >= ip_hdr_length + UDPH_SIZE); 2629bd670b35SErik Nordmark 2630bd670b35SErik Nordmark if (iraflags & IRAF_MULTIBROADCAST) { 2631bd670b35SErik Nordmark uint16_t *up; /* Pointer to ports in ULP header */ 2632bd670b35SErik Nordmark 2633bd670b35SErik Nordmark up = (uint16_t *)((uchar_t *)ipha + ip_hdr_length); 2634bd670b35SErik Nordmark ip_fanout_udp_multi_v4(mp, ipha, up[1], up[0], ira); 2635bd670b35SErik Nordmark return; 2636bd670b35SErik Nordmark } 2637bd670b35SErik Nordmark 2638bd670b35SErik Nordmark /* Look for AF_INET or AF_INET6 that matches */ 2639bd670b35SErik Nordmark connp = ipcl_classify_v4(mp, IPPROTO_UDP, ip_hdr_length, 2640bd670b35SErik Nordmark ira, ipst); 2641bd670b35SErik Nordmark if (connp == NULL) { 2642bd670b35SErik Nordmark no_udp_match: 2643bd670b35SErik Nordmark if (ipst->ips_ipcl_proto_fanout_v4[IPPROTO_UDP]. 2644bd670b35SErik Nordmark connf_head != NULL) { 2645bd670b35SErik Nordmark ASSERT(ira->ira_protocol == IPPROTO_UDP); 2646bd670b35SErik Nordmark ip_fanout_proto_v4(mp, ipha, ira); 2647bd670b35SErik Nordmark } else { 2648bd670b35SErik Nordmark ip_fanout_send_icmp_v4(mp, 2649bd670b35SErik Nordmark ICMP_DEST_UNREACHABLE, 2650bd670b35SErik Nordmark ICMP_PORT_UNREACHABLE, ira); 2651bd670b35SErik Nordmark } 2652bd670b35SErik Nordmark return; 2653bd670b35SErik Nordmark 2654bd670b35SErik Nordmark } 2655bd670b35SErik Nordmark if (connp->conn_incoming_ifindex != 0 && 2656bd670b35SErik Nordmark connp->conn_incoming_ifindex != ira->ira_ruifindex) { 2657bd670b35SErik Nordmark CONN_DEC_REF(connp); 2658bd670b35SErik Nordmark goto no_udp_match; 2659bd670b35SErik Nordmark } 2660bd670b35SErik Nordmark if (IPCL_IS_NONSTR(connp) ? connp->conn_flow_cntrld : 2661bd670b35SErik Nordmark !canputnext(connp->conn_rq)) { 2662bd670b35SErik Nordmark CONN_DEC_REF(connp); 2663bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, udpIfStatsInOverflows); 2664bd670b35SErik Nordmark ip_drop_input("udpIfStatsInOverflows", mp, ill); 2665bd670b35SErik Nordmark freemsg(mp); 2666bd670b35SErik Nordmark return; 2667bd670b35SErik Nordmark } 2668bd670b35SErik Nordmark if (CONN_INBOUND_POLICY_PRESENT(connp, ipss) || 2669bd670b35SErik Nordmark (iraflags & IRAF_IPSEC_SECURE)) { 2670bd670b35SErik Nordmark mp = ipsec_check_inbound_policy(mp, connp, 2671bd670b35SErik Nordmark ipha, NULL, ira); 2672bd670b35SErik Nordmark if (mp == NULL) { 2673bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2674bd670b35SErik Nordmark /* Note that mp is NULL */ 2675bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 2676bd670b35SErik Nordmark CONN_DEC_REF(connp); 2677bd670b35SErik Nordmark return; 2678bd670b35SErik Nordmark } 2679bd670b35SErik Nordmark } 2680bd670b35SErik Nordmark /* 2681bd670b35SErik Nordmark * Remove 0-spi if it's 0, or move everything behind 2682bd670b35SErik Nordmark * the UDP header over it and forward to ESP via 2683bd670b35SErik Nordmark * ip_fanout_v4(). 2684bd670b35SErik Nordmark */ 2685bd670b35SErik Nordmark if (connp->conn_udp->udp_nat_t_endpoint) { 2686bd670b35SErik Nordmark if (iraflags & IRAF_IPSEC_SECURE) { 2687bd670b35SErik Nordmark ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2688bd670b35SErik Nordmark DROPPER(ipss, ipds_esp_nat_t_ipsec), 2689bd670b35SErik Nordmark &ipss->ipsec_dropper); 2690bd670b35SErik Nordmark CONN_DEC_REF(connp); 2691bd670b35SErik Nordmark return; 2692bd670b35SErik Nordmark } 2693bd670b35SErik Nordmark 2694bd670b35SErik Nordmark mp = zero_spi_check(mp, ira); 2695bd670b35SErik Nordmark if (mp == NULL) { 2696bd670b35SErik Nordmark /* 2697bd670b35SErik Nordmark * Packet was consumed - probably sent to 2698bd670b35SErik Nordmark * ip_fanout_v4. 2699bd670b35SErik Nordmark */ 2700bd670b35SErik Nordmark CONN_DEC_REF(connp); 2701bd670b35SErik Nordmark return; 2702bd670b35SErik Nordmark } 2703bd670b35SErik Nordmark /* Else continue like a normal UDP packet. */ 2704bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 2705bd670b35SErik Nordmark protocol = ipha->ipha_protocol; 2706bd670b35SErik Nordmark ira->ira_protocol = protocol; 2707bd670b35SErik Nordmark } 2708bd670b35SErik Nordmark /* Found a client; up it goes */ 2709bd670b35SErik Nordmark IP_STAT(ipst, ip_udp_fannorm); 2710bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2711bd670b35SErik Nordmark ira->ira_ill = ira->ira_rill = NULL; 2712bd670b35SErik Nordmark (connp->conn_recv)(connp, mp, NULL, ira); 2713bd670b35SErik Nordmark CONN_DEC_REF(connp); 2714bd670b35SErik Nordmark ira->ira_ill = ill; 2715bd670b35SErik Nordmark ira->ira_rill = rill; 2716bd670b35SErik Nordmark return; 2717bd670b35SErik Nordmark default: 2718bd670b35SErik Nordmark break; 2719bd670b35SErik Nordmark } 2720bd670b35SErik Nordmark 2721bd670b35SErik Nordmark /* 2722bd670b35SErik Nordmark * Clear hardware checksumming flag as it is currently only 2723bd670b35SErik Nordmark * used by TCP and UDP. 2724bd670b35SErik Nordmark */ 2725bd670b35SErik Nordmark DB_CKSUMFLAGS(mp) = 0; 2726bd670b35SErik Nordmark 2727bd670b35SErik Nordmark switch (protocol) { 2728bd670b35SErik Nordmark case IPPROTO_ICMP: 2729bd670b35SErik Nordmark /* 2730bd670b35SErik Nordmark * We need to accomodate icmp messages coming in clear 2731bd670b35SErik Nordmark * until we get everything secure from the wire. If 2732bd670b35SErik Nordmark * icmp_accept_clear_messages is zero we check with 2733bd670b35SErik Nordmark * the global policy and act accordingly. If it is 2734bd670b35SErik Nordmark * non-zero, we accept the message without any checks. 2735bd670b35SErik Nordmark * But *this does not mean* that this will be delivered 2736bd670b35SErik Nordmark * to RAW socket clients. By accepting we might send 2737bd670b35SErik Nordmark * replies back, change our MTU value etc., 2738bd670b35SErik Nordmark * but delivery to the ULP/clients depends on their 2739bd670b35SErik Nordmark * policy dispositions. 2740bd670b35SErik Nordmark */ 2741bd670b35SErik Nordmark if (ipst->ips_icmp_accept_clear_messages == 0) { 2742bd670b35SErik Nordmark mp = ipsec_check_global_policy(mp, NULL, 2743bd670b35SErik Nordmark ipha, NULL, ira, ns); 2744bd670b35SErik Nordmark if (mp == NULL) 2745bd670b35SErik Nordmark return; 2746bd670b35SErik Nordmark } 2747bd670b35SErik Nordmark 2748bd670b35SErik Nordmark /* 2749bd670b35SErik Nordmark * On a labeled system, we have to check whether the zone 2750bd670b35SErik Nordmark * itself is permitted to receive raw traffic. 2751bd670b35SErik Nordmark */ 2752bd670b35SErik Nordmark if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2753bd670b35SErik Nordmark if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 2754bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_icmp_mib, icmpInErrors); 2755bd670b35SErik Nordmark ip_drop_input("tsol_can_accept_raw", mp, ill); 2756bd670b35SErik Nordmark freemsg(mp); 2757bd670b35SErik Nordmark return; 2758bd670b35SErik Nordmark } 2759bd670b35SErik Nordmark } 2760bd670b35SErik Nordmark 2761bd670b35SErik Nordmark /* 2762bd670b35SErik Nordmark * ICMP header checksum, including checksum field, 2763bd670b35SErik Nordmark * should be zero. 2764bd670b35SErik Nordmark */ 2765bd670b35SErik Nordmark if (IP_CSUM(mp, ip_hdr_length, 0)) { 2766bd670b35SErik Nordmark BUMP_MIB(&ipst->ips_icmp_mib, icmpInCksumErrs); 2767bd670b35SErik Nordmark ip_drop_input("icmpInCksumErrs", mp, ill); 2768bd670b35SErik Nordmark freemsg(mp); 2769bd670b35SErik Nordmark return; 2770bd670b35SErik Nordmark } 2771bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2772bd670b35SErik Nordmark mp = icmp_inbound_v4(mp, ira); 2773bd670b35SErik Nordmark if (mp == NULL) { 2774bd670b35SErik Nordmark /* No need to pass to RAW sockets */ 2775bd670b35SErik Nordmark return; 2776bd670b35SErik Nordmark } 2777bd670b35SErik Nordmark break; 2778bd670b35SErik Nordmark 2779bd670b35SErik Nordmark case IPPROTO_IGMP: 2780bd670b35SErik Nordmark /* 2781bd670b35SErik Nordmark * If we are not willing to accept IGMP packets in clear, 2782bd670b35SErik Nordmark * then check with global policy. 2783bd670b35SErik Nordmark */ 2784bd670b35SErik Nordmark if (ipst->ips_igmp_accept_clear_messages == 0) { 2785bd670b35SErik Nordmark mp = ipsec_check_global_policy(mp, NULL, 2786bd670b35SErik Nordmark ipha, NULL, ira, ns); 2787bd670b35SErik Nordmark if (mp == NULL) 2788bd670b35SErik Nordmark return; 2789bd670b35SErik Nordmark } 2790bd670b35SErik Nordmark if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2791bd670b35SErik Nordmark !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2792bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2793bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 2794bd670b35SErik Nordmark freemsg(mp); 2795bd670b35SErik Nordmark return; 2796bd670b35SErik Nordmark } 2797bd670b35SErik Nordmark /* 2798bd670b35SErik Nordmark * Validate checksum 2799bd670b35SErik Nordmark */ 2800bd670b35SErik Nordmark if (IP_CSUM(mp, ip_hdr_length, 0)) { 2801bd670b35SErik Nordmark ++ipst->ips_igmpstat.igps_rcv_badsum; 2802bd670b35SErik Nordmark ip_drop_input("igps_rcv_badsum", mp, ill); 2803bd670b35SErik Nordmark freemsg(mp); 2804bd670b35SErik Nordmark return; 2805bd670b35SErik Nordmark } 2806bd670b35SErik Nordmark 2807bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2808bd670b35SErik Nordmark mp = igmp_input(mp, ira); 2809bd670b35SErik Nordmark if (mp == NULL) { 2810bd670b35SErik Nordmark /* Bad packet - discarded by igmp_input */ 2811bd670b35SErik Nordmark return; 2812bd670b35SErik Nordmark } 2813bd670b35SErik Nordmark break; 2814bd670b35SErik Nordmark case IPPROTO_PIM: 2815bd670b35SErik Nordmark /* 2816bd670b35SErik Nordmark * If we are not willing to accept PIM packets in clear, 2817bd670b35SErik Nordmark * then check with global policy. 2818bd670b35SErik Nordmark */ 2819bd670b35SErik Nordmark if (ipst->ips_pim_accept_clear_messages == 0) { 2820bd670b35SErik Nordmark mp = ipsec_check_global_policy(mp, NULL, 2821bd670b35SErik Nordmark ipha, NULL, ira, ns); 2822bd670b35SErik Nordmark if (mp == NULL) 2823bd670b35SErik Nordmark return; 2824bd670b35SErik Nordmark } 2825bd670b35SErik Nordmark if ((ira->ira_flags & IRAF_SYSTEM_LABELED) && 2826bd670b35SErik Nordmark !tsol_can_accept_raw(mp, ira, B_TRUE)) { 2827bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2828bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 2829bd670b35SErik Nordmark freemsg(mp); 2830bd670b35SErik Nordmark return; 2831bd670b35SErik Nordmark } 2832bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2833bd670b35SErik Nordmark 2834bd670b35SErik Nordmark /* Checksum is verified in pim_input */ 2835bd670b35SErik Nordmark mp = pim_input(mp, ira); 2836bd670b35SErik Nordmark if (mp == NULL) { 2837bd670b35SErik Nordmark /* Bad packet - discarded by pim_input */ 2838bd670b35SErik Nordmark return; 2839bd670b35SErik Nordmark } 2840bd670b35SErik Nordmark break; 2841bd670b35SErik Nordmark case IPPROTO_AH: 2842bd670b35SErik Nordmark case IPPROTO_ESP: { 2843bd670b35SErik Nordmark /* 2844bd670b35SErik Nordmark * Fast path for AH/ESP. 2845bd670b35SErik Nordmark */ 2846bd670b35SErik Nordmark netstack_t *ns = ipst->ips_netstack; 2847bd670b35SErik Nordmark ipsec_stack_t *ipss = ns->netstack_ipsec; 2848bd670b35SErik Nordmark 2849bd670b35SErik Nordmark IP_STAT(ipst, ipsec_proto_ahesp); 2850bd670b35SErik Nordmark 2851bd670b35SErik Nordmark if (!ipsec_loaded(ipss)) { 2852bd670b35SErik Nordmark ip_proto_not_sup(mp, ira); 2853bd670b35SErik Nordmark return; 2854bd670b35SErik Nordmark } 2855bd670b35SErik Nordmark 2856bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 2857bd670b35SErik Nordmark /* select inbound SA and have IPsec process the pkt */ 2858bd670b35SErik Nordmark if (protocol == IPPROTO_ESP) { 2859bd670b35SErik Nordmark esph_t *esph; 2860bd670b35SErik Nordmark boolean_t esp_in_udp_sa; 2861bd670b35SErik Nordmark boolean_t esp_in_udp_packet; 2862bd670b35SErik Nordmark 2863bd670b35SErik Nordmark mp = ipsec_inbound_esp_sa(mp, ira, &esph); 2864bd670b35SErik Nordmark if (mp == NULL) 2865bd670b35SErik Nordmark return; 2866bd670b35SErik Nordmark 2867bd670b35SErik Nordmark ASSERT(esph != NULL); 2868bd670b35SErik Nordmark ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2869bd670b35SErik Nordmark ASSERT(ira->ira_ipsec_esp_sa != NULL); 2870bd670b35SErik Nordmark ASSERT(ira->ira_ipsec_esp_sa->ipsa_input_func != NULL); 2871bd670b35SErik Nordmark 2872bd670b35SErik Nordmark esp_in_udp_sa = ((ira->ira_ipsec_esp_sa->ipsa_flags & 2873bd670b35SErik Nordmark IPSA_F_NATT) != 0); 2874bd670b35SErik Nordmark esp_in_udp_packet = 2875bd670b35SErik Nordmark (ira->ira_flags & IRAF_ESP_UDP_PORTS) != 0; 2876bd670b35SErik Nordmark 2877bd670b35SErik Nordmark /* 2878bd670b35SErik Nordmark * The following is a fancy, but quick, way of saying: 2879bd670b35SErik Nordmark * ESP-in-UDP SA and Raw ESP packet --> drop 2880bd670b35SErik Nordmark * OR 2881bd670b35SErik Nordmark * ESP SA and ESP-in-UDP packet --> drop 2882bd670b35SErik Nordmark */ 2883bd670b35SErik Nordmark if (esp_in_udp_sa != esp_in_udp_packet) { 2884bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2885bd670b35SErik Nordmark ip_drop_packet(mp, B_TRUE, ira->ira_ill, 2886bd670b35SErik Nordmark DROPPER(ipss, ipds_esp_no_sa), 2887bd670b35SErik Nordmark &ipss->ipsec_dropper); 2888bd670b35SErik Nordmark return; 2889bd670b35SErik Nordmark } 2890bd670b35SErik Nordmark mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, 2891bd670b35SErik Nordmark ira); 2892bd670b35SErik Nordmark } else { 2893bd670b35SErik Nordmark ah_t *ah; 2894bd670b35SErik Nordmark 2895bd670b35SErik Nordmark mp = ipsec_inbound_ah_sa(mp, ira, &ah); 2896bd670b35SErik Nordmark if (mp == NULL) 2897bd670b35SErik Nordmark return; 2898bd670b35SErik Nordmark 2899bd670b35SErik Nordmark ASSERT(ah != NULL); 2900bd670b35SErik Nordmark ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 2901bd670b35SErik Nordmark ASSERT(ira->ira_ipsec_ah_sa != NULL); 2902bd670b35SErik Nordmark ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL); 2903bd670b35SErik Nordmark mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, 2904bd670b35SErik Nordmark ira); 2905bd670b35SErik Nordmark } 2906bd670b35SErik Nordmark 2907bd670b35SErik Nordmark if (mp == NULL) { 2908bd670b35SErik Nordmark /* 2909bd670b35SErik Nordmark * Either it failed or is pending. In the former case 2910bd670b35SErik Nordmark * ipIfStatsInDiscards was increased. 2911bd670b35SErik Nordmark */ 2912bd670b35SErik Nordmark return; 2913bd670b35SErik Nordmark } 2914bd670b35SErik Nordmark /* we're done with IPsec processing, send it up */ 2915bd670b35SErik Nordmark ip_input_post_ipsec(mp, ira); 2916bd670b35SErik Nordmark return; 2917bd670b35SErik Nordmark } 2918bd670b35SErik Nordmark case IPPROTO_ENCAP: { 2919bd670b35SErik Nordmark ipha_t *inner_ipha; 2920bd670b35SErik Nordmark 2921bd670b35SErik Nordmark /* 2922bd670b35SErik Nordmark * Handle self-encapsulated packets (IP-in-IP where 2923bd670b35SErik Nordmark * the inner addresses == the outer addresses). 2924bd670b35SErik Nordmark */ 2925bd670b35SErik Nordmark if ((uchar_t *)ipha + ip_hdr_length + sizeof (ipha_t) > 2926bd670b35SErik Nordmark mp->b_wptr) { 2927bd670b35SErik Nordmark if (ira->ira_pktlen < 2928bd670b35SErik Nordmark ip_hdr_length + sizeof (ipha_t)) { 2929bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 2930bd670b35SErik Nordmark ipIfStatsInTruncatedPkts); 2931bd670b35SErik Nordmark ip_drop_input("ipIfStatsInTruncatedPkts", 2932bd670b35SErik Nordmark mp, ill); 2933bd670b35SErik Nordmark freemsg(mp); 2934bd670b35SErik Nordmark return; 2935bd670b35SErik Nordmark } 2936bd670b35SErik Nordmark ipha = ip_pullup(mp, (uchar_t *)ipha + ip_hdr_length + 2937bd670b35SErik Nordmark sizeof (ipha_t) - mp->b_rptr, ira); 2938bd670b35SErik Nordmark if (ipha == NULL) { 2939bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2940bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 2941bd670b35SErik Nordmark freemsg(mp); 2942bd670b35SErik Nordmark return; 2943bd670b35SErik Nordmark } 2944bd670b35SErik Nordmark } 2945bd670b35SErik Nordmark inner_ipha = (ipha_t *)((uchar_t *)ipha + ip_hdr_length); 2946bd670b35SErik Nordmark /* 2947bd670b35SErik Nordmark * Check the sanity of the inner IP header. 2948bd670b35SErik Nordmark */ 2949bd670b35SErik Nordmark if ((IPH_HDR_VERSION(inner_ipha) != IPV4_VERSION)) { 2950bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2951bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 2952bd670b35SErik Nordmark freemsg(mp); 2953bd670b35SErik Nordmark return; 2954bd670b35SErik Nordmark } 2955bd670b35SErik Nordmark if (IPH_HDR_LENGTH(inner_ipha) < sizeof (ipha_t)) { 2956bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2957bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 2958bd670b35SErik Nordmark freemsg(mp); 2959bd670b35SErik Nordmark return; 2960bd670b35SErik Nordmark } 2961bd670b35SErik Nordmark if (inner_ipha->ipha_src != ipha->ipha_src || 2962bd670b35SErik Nordmark inner_ipha->ipha_dst != ipha->ipha_dst) { 2963bd670b35SErik Nordmark /* We fallthru to iptun fanout below */ 2964bd670b35SErik Nordmark goto iptun; 2965bd670b35SErik Nordmark } 2966bd670b35SErik Nordmark 2967bd670b35SErik Nordmark /* 2968bd670b35SErik Nordmark * Self-encapsulated tunnel packet. Remove 2969bd670b35SErik Nordmark * the outer IP header and fanout again. 2970bd670b35SErik Nordmark * We also need to make sure that the inner 2971bd670b35SErik Nordmark * header is pulled up until options. 2972bd670b35SErik Nordmark */ 2973bd670b35SErik Nordmark mp->b_rptr = (uchar_t *)inner_ipha; 2974bd670b35SErik Nordmark ipha = inner_ipha; 2975bd670b35SErik Nordmark ip_hdr_length = IPH_HDR_LENGTH(ipha); 2976bd670b35SErik Nordmark if ((uchar_t *)ipha + ip_hdr_length > mp->b_wptr) { 2977bd670b35SErik Nordmark if (ira->ira_pktlen < 2978bd670b35SErik Nordmark (uchar_t *)ipha + ip_hdr_length - mp->b_rptr) { 2979bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, 2980bd670b35SErik Nordmark ipIfStatsInTruncatedPkts); 2981bd670b35SErik Nordmark ip_drop_input("ipIfStatsInTruncatedPkts", 2982bd670b35SErik Nordmark mp, ill); 2983bd670b35SErik Nordmark freemsg(mp); 2984bd670b35SErik Nordmark return; 2985bd670b35SErik Nordmark } 2986bd670b35SErik Nordmark ipha = ip_pullup(mp, 2987bd670b35SErik Nordmark (uchar_t *)ipha + ip_hdr_length - mp->b_rptr, ira); 2988bd670b35SErik Nordmark if (ipha == NULL) { 2989bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 2990bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 2991bd670b35SErik Nordmark freemsg(mp); 2992bd670b35SErik Nordmark return; 2993bd670b35SErik Nordmark } 2994bd670b35SErik Nordmark } 2995bd670b35SErik Nordmark if (ip_hdr_length > sizeof (ipha_t)) { 2996bd670b35SErik Nordmark /* We got options on the inner packet. */ 2997bd670b35SErik Nordmark ipaddr_t dst = ipha->ipha_dst; 2998bd670b35SErik Nordmark int error = 0; 2999bd670b35SErik Nordmark 3000bd670b35SErik Nordmark dst = ip_input_options(ipha, dst, mp, ira, &error); 3001bd670b35SErik Nordmark if (error != 0) { 3002bd670b35SErik Nordmark /* 3003bd670b35SErik Nordmark * An ICMP error has been sent and the packet 3004bd670b35SErik Nordmark * has been dropped. 3005bd670b35SErik Nordmark */ 3006bd670b35SErik Nordmark return; 3007bd670b35SErik Nordmark } 3008bd670b35SErik Nordmark if (dst != ipha->ipha_dst) { 3009bd670b35SErik Nordmark /* 3010bd670b35SErik Nordmark * Someone put a source-route in 3011bd670b35SErik Nordmark * the inside header of a self- 3012bd670b35SErik Nordmark * encapsulated packet. Drop it 3013bd670b35SErik Nordmark * with extreme prejudice and let 3014bd670b35SErik Nordmark * the sender know. 3015bd670b35SErik Nordmark */ 3016bd670b35SErik Nordmark ip_drop_input("ICMP_SOURCE_ROUTE_FAILED", 3017bd670b35SErik Nordmark mp, ill); 3018bd670b35SErik Nordmark icmp_unreachable(mp, ICMP_SOURCE_ROUTE_FAILED, 3019bd670b35SErik Nordmark ira); 3020bd670b35SErik Nordmark return; 3021bd670b35SErik Nordmark } 3022bd670b35SErik Nordmark } 3023bd670b35SErik Nordmark if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) { 3024bd670b35SErik Nordmark /* 3025bd670b35SErik Nordmark * This means that somebody is sending 3026bd670b35SErik Nordmark * Self-encapsualted packets without AH/ESP. 3027bd670b35SErik Nordmark * 3028bd670b35SErik Nordmark * Send this packet to find a tunnel endpoint. 3029bd670b35SErik Nordmark * if I can't find one, an ICMP 3030bd670b35SErik Nordmark * PROTOCOL_UNREACHABLE will get sent. 3031bd670b35SErik Nordmark */ 3032bd670b35SErik Nordmark protocol = ipha->ipha_protocol; 3033bd670b35SErik Nordmark ira->ira_protocol = protocol; 3034bd670b35SErik Nordmark goto iptun; 3035bd670b35SErik Nordmark } 3036bd670b35SErik Nordmark 3037bd670b35SErik Nordmark /* Update based on removed IP header */ 3038bd670b35SErik Nordmark ira->ira_ip_hdr_length = ip_hdr_length; 3039bd670b35SErik Nordmark ira->ira_pktlen = ntohs(ipha->ipha_length); 3040bd670b35SErik Nordmark 3041bd670b35SErik Nordmark if (ira->ira_flags & IRAF_IPSEC_DECAPS) { 3042bd670b35SErik Nordmark /* 3043bd670b35SErik Nordmark * This packet is self-encapsulated multiple 3044bd670b35SErik Nordmark * times. We don't want to recurse infinitely. 3045bd670b35SErik Nordmark * To keep it simple, drop the packet. 3046bd670b35SErik Nordmark */ 3047bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3048bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 3049bd670b35SErik Nordmark freemsg(mp); 3050bd670b35SErik Nordmark return; 3051bd670b35SErik Nordmark } 3052bd670b35SErik Nordmark ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE); 3053bd670b35SErik Nordmark ira->ira_flags |= IRAF_IPSEC_DECAPS; 3054bd670b35SErik Nordmark 3055bd670b35SErik Nordmark ip_input_post_ipsec(mp, ira); 3056bd670b35SErik Nordmark return; 3057bd670b35SErik Nordmark } 3058bd670b35SErik Nordmark 3059bd670b35SErik Nordmark iptun: /* IPPROTO_ENCAPS that is not self-encapsulated */ 3060bd670b35SErik Nordmark case IPPROTO_IPV6: 3061bd670b35SErik Nordmark /* iptun will verify trusted label */ 3062bd670b35SErik Nordmark connp = ipcl_classify_v4(mp, protocol, ip_hdr_length, 3063bd670b35SErik Nordmark ira, ipst); 3064bd670b35SErik Nordmark if (connp != NULL) { 3065bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCInDelivers); 3066bd670b35SErik Nordmark ira->ira_ill = ira->ira_rill = NULL; 3067bd670b35SErik Nordmark (connp->conn_recv)(connp, mp, NULL, ira); 3068bd670b35SErik Nordmark CONN_DEC_REF(connp); 3069bd670b35SErik Nordmark ira->ira_ill = ill; 3070bd670b35SErik Nordmark ira->ira_rill = rill; 3071bd670b35SErik Nordmark return; 3072bd670b35SErik Nordmark } 3073bd670b35SErik Nordmark /* FALLTHRU */ 3074bd670b35SErik Nordmark default: 3075bd670b35SErik Nordmark /* 3076bd670b35SErik Nordmark * On a labeled system, we have to check whether the zone 3077bd670b35SErik Nordmark * itself is permitted to receive raw traffic. 3078bd670b35SErik Nordmark */ 3079bd670b35SErik Nordmark if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 3080bd670b35SErik Nordmark if (!tsol_can_accept_raw(mp, ira, B_FALSE)) { 3081bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3082bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 3083bd670b35SErik Nordmark freemsg(mp); 3084bd670b35SErik Nordmark return; 3085bd670b35SErik Nordmark } 3086bd670b35SErik Nordmark } 3087bd670b35SErik Nordmark break; 3088bd670b35SErik Nordmark } 3089bd670b35SErik Nordmark 3090bd670b35SErik Nordmark /* 3091bd670b35SErik Nordmark * The above input functions may have returned the pulled up message. 3092bd670b35SErik Nordmark * So ipha need to be reinitialized. 3093bd670b35SErik Nordmark */ 3094bd670b35SErik Nordmark ipha = (ipha_t *)mp->b_rptr; 3095bd670b35SErik Nordmark ira->ira_protocol = protocol = ipha->ipha_protocol; 3096bd670b35SErik Nordmark if (ipst->ips_ipcl_proto_fanout_v4[protocol].connf_head == NULL) { 3097bd670b35SErik Nordmark /* 3098bd670b35SErik Nordmark * No user-level listener for these packets packets. 3099bd670b35SErik Nordmark * Check for IPPROTO_ENCAP... 3100bd670b35SErik Nordmark */ 3101bd670b35SErik Nordmark if (protocol == IPPROTO_ENCAP && ipst->ips_ip_g_mrouter) { 3102bd670b35SErik Nordmark /* 3103bd670b35SErik Nordmark * Check policy here, 3104bd670b35SErik Nordmark * THEN ship off to ip_mroute_decap(). 3105bd670b35SErik Nordmark * 3106bd670b35SErik Nordmark * BTW, If I match a configured IP-in-IP 3107bd670b35SErik Nordmark * tunnel above, this path will not be reached, and 3108bd670b35SErik Nordmark * ip_mroute_decap will never be called. 3109bd670b35SErik Nordmark */ 3110bd670b35SErik Nordmark mp = ipsec_check_global_policy(mp, connp, 3111bd670b35SErik Nordmark ipha, NULL, ira, ns); 3112bd670b35SErik Nordmark if (mp != NULL) { 3113bd670b35SErik Nordmark ip_mroute_decap(mp, ira); 3114bd670b35SErik Nordmark } /* Else we already freed everything! */ 3115bd670b35SErik Nordmark } else { 3116bd670b35SErik Nordmark ip_proto_not_sup(mp, ira); 3117bd670b35SErik Nordmark } 3118bd670b35SErik Nordmark return; 3119bd670b35SErik Nordmark } 3120bd670b35SErik Nordmark 3121bd670b35SErik Nordmark /* 3122bd670b35SErik Nordmark * Handle fanout to raw sockets. There 3123bd670b35SErik Nordmark * can be more than one stream bound to a particular 3124bd670b35SErik Nordmark * protocol. When this is the case, each one gets a copy 3125bd670b35SErik Nordmark * of any incoming packets. 3126bd670b35SErik Nordmark */ 3127bd670b35SErik Nordmark ASSERT(ira->ira_protocol == ipha->ipha_protocol); 3128bd670b35SErik Nordmark ip_fanout_proto_v4(mp, ipha, ira); 3129bd670b35SErik Nordmark return; 3130bd670b35SErik Nordmark 3131bd670b35SErik Nordmark discard: 3132bd670b35SErik Nordmark BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards); 3133bd670b35SErik Nordmark ip_drop_input("ipIfStatsInDiscards", mp, ill); 3134bd670b35SErik Nordmark freemsg(mp); 3135bd670b35SErik Nordmark #undef rptr 3136bd670b35SErik Nordmark } 3137