1da14cebeSEric Cheng /* 2da14cebeSEric Cheng * CDDL HEADER START 3da14cebeSEric Cheng * 4da14cebeSEric Cheng * The contents of this file are subject to the terms of the 5da14cebeSEric Cheng * Common Development and Distribution License (the "License"). 6da14cebeSEric Cheng * You may not use this file except in compliance with the License. 7da14cebeSEric Cheng * 8da14cebeSEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9da14cebeSEric Cheng * or http://www.opensolaris.org/os/licensing. 10da14cebeSEric Cheng * See the License for the specific language governing permissions 11da14cebeSEric Cheng * and limitations under the License. 12da14cebeSEric Cheng * 13da14cebeSEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 14da14cebeSEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15da14cebeSEric Cheng * If applicable, add the following below this CDDL HEADER, with the 16da14cebeSEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 17da14cebeSEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 18da14cebeSEric Cheng * 19da14cebeSEric Cheng * CDDL HEADER END 20da14cebeSEric Cheng */ 21da14cebeSEric Cheng /* 22*5cd376e8SJimmy Vetayases * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 23da14cebeSEric Cheng */ 24da14cebeSEric Cheng 25da14cebeSEric Cheng /* 26da14cebeSEric Cheng * MAC Services Module - misc utilities 27da14cebeSEric Cheng */ 28da14cebeSEric Cheng 29da14cebeSEric Cheng #include <sys/types.h> 30da14cebeSEric Cheng #include <sys/mac.h> 31da14cebeSEric Cheng #include <sys/mac_impl.h> 32da14cebeSEric Cheng #include <sys/mac_client_priv.h> 33da14cebeSEric Cheng #include <sys/mac_client_impl.h> 34da14cebeSEric Cheng #include <sys/mac_soft_ring.h> 35da14cebeSEric Cheng #include <sys/strsubr.h> 36da14cebeSEric Cheng #include <sys/strsun.h> 37da14cebeSEric Cheng #include <sys/vlan.h> 38da14cebeSEric Cheng #include <sys/pattr.h> 39da14cebeSEric Cheng #include <sys/pci_tools.h> 40da14cebeSEric Cheng #include <inet/ip.h> 41da14cebeSEric Cheng #include <inet/ip_impl.h> 42da14cebeSEric Cheng #include <inet/ip6.h> 43da14cebeSEric Cheng #include <sys/vtrace.h> 44da14cebeSEric Cheng #include <sys/dlpi.h> 45da14cebeSEric Cheng #include <sys/sunndi.h> 46ae6aa22aSVenugopal Iyer #include <inet/ipsec_impl.h> 47ae6aa22aSVenugopal Iyer #include <inet/sadb.h> 48ae6aa22aSVenugopal Iyer #include <inet/ipsecesp.h> 49ae6aa22aSVenugopal Iyer #include <inet/ipsecah.h> 50da14cebeSEric Cheng 51da14cebeSEric Cheng /* 52da14cebeSEric Cheng * Copy an mblk, preserving its hardware checksum flags. 53da14cebeSEric Cheng */ 54da14cebeSEric Cheng static mblk_t * 55da14cebeSEric Cheng mac_copymsg_cksum(mblk_t *mp) 56da14cebeSEric Cheng { 57da14cebeSEric Cheng mblk_t *mp1; 58da14cebeSEric Cheng uint32_t start, stuff, end, value, flags; 59da14cebeSEric Cheng 60da14cebeSEric Cheng mp1 = copymsg(mp); 61da14cebeSEric Cheng if (mp1 == NULL) 62da14cebeSEric Cheng return (NULL); 63da14cebeSEric Cheng 64da14cebeSEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 65da14cebeSEric Cheng (void) hcksum_assoc(mp1, NULL, NULL, start, stuff, end, value, 66da14cebeSEric Cheng flags, KM_NOSLEEP); 67da14cebeSEric Cheng 68da14cebeSEric Cheng return (mp1); 69da14cebeSEric Cheng } 70da14cebeSEric Cheng 71da14cebeSEric Cheng /* 72da14cebeSEric Cheng * Copy an mblk chain, presenting the hardware checksum flags of the 73da14cebeSEric Cheng * individual mblks. 74da14cebeSEric Cheng */ 75da14cebeSEric Cheng mblk_t * 76da14cebeSEric Cheng mac_copymsgchain_cksum(mblk_t *mp) 77da14cebeSEric Cheng { 78da14cebeSEric Cheng mblk_t *nmp = NULL; 79da14cebeSEric Cheng mblk_t **nmpp = &nmp; 80da14cebeSEric Cheng 81da14cebeSEric Cheng for (; mp != NULL; mp = mp->b_next) { 82da14cebeSEric Cheng if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) { 83da14cebeSEric Cheng freemsgchain(nmp); 84da14cebeSEric Cheng return (NULL); 85da14cebeSEric Cheng } 86da14cebeSEric Cheng 87da14cebeSEric Cheng nmpp = &((*nmpp)->b_next); 88da14cebeSEric Cheng } 89da14cebeSEric Cheng 90da14cebeSEric Cheng return (nmp); 91da14cebeSEric Cheng } 92da14cebeSEric Cheng 93da14cebeSEric Cheng /* 94da14cebeSEric Cheng * Process the specified mblk chain for proper handling of hardware 95da14cebeSEric Cheng * checksum offload. This routine is invoked for loopback traffic 96da14cebeSEric Cheng * between MAC clients. 97da14cebeSEric Cheng * The function handles a NULL mblk chain passed as argument. 98da14cebeSEric Cheng */ 99da14cebeSEric Cheng mblk_t * 100da14cebeSEric Cheng mac_fix_cksum(mblk_t *mp_chain) 101da14cebeSEric Cheng { 102da14cebeSEric Cheng mblk_t *mp, *prev = NULL, *new_chain = mp_chain, *mp1; 103da14cebeSEric Cheng uint32_t flags, start, stuff, end, value; 104da14cebeSEric Cheng 105da14cebeSEric Cheng for (mp = mp_chain; mp != NULL; prev = mp, mp = mp->b_next) { 106da14cebeSEric Cheng uint16_t len; 107da14cebeSEric Cheng uint32_t offset; 108da14cebeSEric Cheng struct ether_header *ehp; 109da14cebeSEric Cheng uint16_t sap; 110da14cebeSEric Cheng 111da14cebeSEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, 112da14cebeSEric Cheng &flags); 113da14cebeSEric Cheng if (flags == 0) 114da14cebeSEric Cheng continue; 115da14cebeSEric Cheng 116da14cebeSEric Cheng /* 117da14cebeSEric Cheng * Since the processing of checksum offload for loopback 118da14cebeSEric Cheng * traffic requires modification of the packet contents, 119da14cebeSEric Cheng * ensure sure that we are always modifying our own copy. 120da14cebeSEric Cheng */ 121da14cebeSEric Cheng if (DB_REF(mp) > 1) { 122da14cebeSEric Cheng mp1 = copymsg(mp); 123da14cebeSEric Cheng if (mp1 == NULL) 124da14cebeSEric Cheng continue; 125da14cebeSEric Cheng mp1->b_next = mp->b_next; 126da14cebeSEric Cheng mp->b_next = NULL; 127da14cebeSEric Cheng freemsg(mp); 128da14cebeSEric Cheng if (prev != NULL) 129da14cebeSEric Cheng prev->b_next = mp1; 130da14cebeSEric Cheng else 131da14cebeSEric Cheng new_chain = mp1; 132da14cebeSEric Cheng mp = mp1; 133da14cebeSEric Cheng } 134da14cebeSEric Cheng 135da14cebeSEric Cheng /* 136da14cebeSEric Cheng * Ethernet, and optionally VLAN header. 137da14cebeSEric Cheng */ 138da14cebeSEric Cheng /* LINTED: improper alignment cast */ 139da14cebeSEric Cheng ehp = (struct ether_header *)mp->b_rptr; 140da14cebeSEric Cheng if (ntohs(ehp->ether_type) == VLAN_TPID) { 141da14cebeSEric Cheng struct ether_vlan_header *evhp; 142da14cebeSEric Cheng 143da14cebeSEric Cheng ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 144da14cebeSEric Cheng /* LINTED: improper alignment cast */ 145da14cebeSEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 146da14cebeSEric Cheng sap = ntohs(evhp->ether_type); 147da14cebeSEric Cheng offset = sizeof (struct ether_vlan_header); 148da14cebeSEric Cheng } else { 149da14cebeSEric Cheng sap = ntohs(ehp->ether_type); 150da14cebeSEric Cheng offset = sizeof (struct ether_header); 151da14cebeSEric Cheng } 152da14cebeSEric Cheng 153da14cebeSEric Cheng if (MBLKL(mp) <= offset) { 154da14cebeSEric Cheng offset -= MBLKL(mp); 155da14cebeSEric Cheng if (mp->b_cont == NULL) { 156da14cebeSEric Cheng /* corrupted packet, skip it */ 157da14cebeSEric Cheng if (prev != NULL) 158da14cebeSEric Cheng prev->b_next = mp->b_next; 159da14cebeSEric Cheng else 160da14cebeSEric Cheng new_chain = mp->b_next; 161da14cebeSEric Cheng mp1 = mp->b_next; 162da14cebeSEric Cheng mp->b_next = NULL; 163da14cebeSEric Cheng freemsg(mp); 164da14cebeSEric Cheng mp = mp1; 165da14cebeSEric Cheng continue; 166da14cebeSEric Cheng } 167da14cebeSEric Cheng mp = mp->b_cont; 168da14cebeSEric Cheng } 169da14cebeSEric Cheng 170da14cebeSEric Cheng if (flags & (HCK_FULLCKSUM | HCK_IPV4_HDRCKSUM)) { 171da14cebeSEric Cheng ipha_t *ipha = NULL; 172da14cebeSEric Cheng 173da14cebeSEric Cheng /* 174da14cebeSEric Cheng * In order to compute the full and header 175da14cebeSEric Cheng * checksums, we need to find and parse 176da14cebeSEric Cheng * the IP and/or ULP headers. 177da14cebeSEric Cheng */ 178da14cebeSEric Cheng 179da14cebeSEric Cheng sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 180da14cebeSEric Cheng 181da14cebeSEric Cheng /* 182da14cebeSEric Cheng * IP header. 183da14cebeSEric Cheng */ 184da14cebeSEric Cheng if (sap != ETHERTYPE_IP) 185da14cebeSEric Cheng continue; 186da14cebeSEric Cheng 187da14cebeSEric Cheng ASSERT(MBLKL(mp) >= offset + sizeof (ipha_t)); 188da14cebeSEric Cheng /* LINTED: improper alignment cast */ 189da14cebeSEric Cheng ipha = (ipha_t *)(mp->b_rptr + offset); 190da14cebeSEric Cheng 191da14cebeSEric Cheng if (flags & HCK_FULLCKSUM) { 192da14cebeSEric Cheng ipaddr_t src, dst; 193da14cebeSEric Cheng uint32_t cksum; 194da14cebeSEric Cheng uint16_t *up; 195da14cebeSEric Cheng uint8_t proto; 196da14cebeSEric Cheng 197da14cebeSEric Cheng /* 198da14cebeSEric Cheng * Pointer to checksum field in ULP header. 199da14cebeSEric Cheng */ 200da14cebeSEric Cheng proto = ipha->ipha_protocol; 201da14cebeSEric Cheng ASSERT(ipha->ipha_version_and_hdr_length == 202da14cebeSEric Cheng IP_SIMPLE_HDR_VERSION); 203fd0939efSDavid Edmondson 204fd0939efSDavid Edmondson switch (proto) { 205fd0939efSDavid Edmondson case IPPROTO_TCP: 206da14cebeSEric Cheng /* LINTED: improper alignment cast */ 207da14cebeSEric Cheng up = IPH_TCPH_CHECKSUMP(ipha, 208da14cebeSEric Cheng IP_SIMPLE_HDR_LENGTH); 209fd0939efSDavid Edmondson break; 210fd0939efSDavid Edmondson 211fd0939efSDavid Edmondson case IPPROTO_UDP: 212da14cebeSEric Cheng /* LINTED: improper alignment cast */ 213da14cebeSEric Cheng up = IPH_UDPH_CHECKSUMP(ipha, 214da14cebeSEric Cheng IP_SIMPLE_HDR_LENGTH); 215fd0939efSDavid Edmondson break; 216fd0939efSDavid Edmondson 217fd0939efSDavid Edmondson default: 218fd0939efSDavid Edmondson cmn_err(CE_WARN, "mac_fix_cksum: " 219fd0939efSDavid Edmondson "unexpected protocol: %d", proto); 220fd0939efSDavid Edmondson continue; 221da14cebeSEric Cheng } 222da14cebeSEric Cheng 223da14cebeSEric Cheng /* 224da14cebeSEric Cheng * Pseudo-header checksum. 225da14cebeSEric Cheng */ 226da14cebeSEric Cheng src = ipha->ipha_src; 227da14cebeSEric Cheng dst = ipha->ipha_dst; 228da14cebeSEric Cheng len = ntohs(ipha->ipha_length) - 229da14cebeSEric Cheng IP_SIMPLE_HDR_LENGTH; 230da14cebeSEric Cheng 231da14cebeSEric Cheng cksum = (dst >> 16) + (dst & 0xFFFF) + 232da14cebeSEric Cheng (src >> 16) + (src & 0xFFFF); 233da14cebeSEric Cheng cksum += htons(len); 234da14cebeSEric Cheng 235da14cebeSEric Cheng /* 236da14cebeSEric Cheng * The checksum value stored in the packet needs 237da14cebeSEric Cheng * to be correct. Compute it here. 238da14cebeSEric Cheng */ 239da14cebeSEric Cheng *up = 0; 240da14cebeSEric Cheng cksum += (((proto) == IPPROTO_UDP) ? 241da14cebeSEric Cheng IP_UDP_CSUM_COMP : IP_TCP_CSUM_COMP); 242da14cebeSEric Cheng cksum = IP_CSUM(mp, IP_SIMPLE_HDR_LENGTH + 243da14cebeSEric Cheng offset, cksum); 244da14cebeSEric Cheng *(up) = (uint16_t)(cksum ? cksum : ~cksum); 245da14cebeSEric Cheng 2460dc2366fSVenugopal Iyer /* 2470dc2366fSVenugopal Iyer * Flag the packet so that it appears 2480dc2366fSVenugopal Iyer * that the checksum has already been 2490dc2366fSVenugopal Iyer * verified by the hardware. 2500dc2366fSVenugopal Iyer */ 2510dc2366fSVenugopal Iyer flags &= ~HCK_FULLCKSUM; 252da14cebeSEric Cheng flags |= HCK_FULLCKSUM_OK; 2530dc2366fSVenugopal Iyer value = 0; 254da14cebeSEric Cheng } 255da14cebeSEric Cheng 256da14cebeSEric Cheng if (flags & HCK_IPV4_HDRCKSUM) { 257da14cebeSEric Cheng ASSERT(ipha != NULL); 258da14cebeSEric Cheng ipha->ipha_hdr_checksum = 259da14cebeSEric Cheng (uint16_t)ip_csum_hdr(ipha); 2600dc2366fSVenugopal Iyer flags &= ~HCK_IPV4_HDRCKSUM; 2610dc2366fSVenugopal Iyer flags |= HCK_IPV4_HDRCKSUM_OK; 2620dc2366fSVenugopal Iyer 263da14cebeSEric Cheng } 264da14cebeSEric Cheng } 265da14cebeSEric Cheng 266da14cebeSEric Cheng if (flags & HCK_PARTIALCKSUM) { 267da14cebeSEric Cheng uint16_t *up, partial, cksum; 268da14cebeSEric Cheng uchar_t *ipp; /* ptr to beginning of IP header */ 269da14cebeSEric Cheng 270da14cebeSEric Cheng if (mp->b_cont != NULL) { 271da14cebeSEric Cheng mblk_t *mp1; 272da14cebeSEric Cheng 273da14cebeSEric Cheng mp1 = msgpullup(mp, offset + end); 274da14cebeSEric Cheng if (mp1 == NULL) 275da14cebeSEric Cheng continue; 276da14cebeSEric Cheng mp1->b_next = mp->b_next; 277da14cebeSEric Cheng mp->b_next = NULL; 278da14cebeSEric Cheng freemsg(mp); 279da14cebeSEric Cheng if (prev != NULL) 280da14cebeSEric Cheng prev->b_next = mp1; 281da14cebeSEric Cheng else 282da14cebeSEric Cheng new_chain = mp1; 283da14cebeSEric Cheng mp = mp1; 284da14cebeSEric Cheng } 285da14cebeSEric Cheng 286da14cebeSEric Cheng ipp = mp->b_rptr + offset; 287da14cebeSEric Cheng /* LINTED: cast may result in improper alignment */ 288da14cebeSEric Cheng up = (uint16_t *)((uchar_t *)ipp + stuff); 289da14cebeSEric Cheng partial = *up; 290da14cebeSEric Cheng *up = 0; 291da14cebeSEric Cheng 292da14cebeSEric Cheng cksum = IP_BCSUM_PARTIAL(mp->b_rptr + offset + start, 293da14cebeSEric Cheng end - start, partial); 294da14cebeSEric Cheng cksum = ~cksum; 295da14cebeSEric Cheng *up = cksum ? cksum : ~cksum; 296da14cebeSEric Cheng 297da14cebeSEric Cheng /* 298da14cebeSEric Cheng * Since we already computed the whole checksum, 299da14cebeSEric Cheng * indicate to the stack that it has already 300da14cebeSEric Cheng * been verified by the hardware. 301da14cebeSEric Cheng */ 302da14cebeSEric Cheng flags &= ~HCK_PARTIALCKSUM; 3030dc2366fSVenugopal Iyer flags |= HCK_FULLCKSUM_OK; 3040dc2366fSVenugopal Iyer value = 0; 305da14cebeSEric Cheng } 306da14cebeSEric Cheng 307da14cebeSEric Cheng (void) hcksum_assoc(mp, NULL, NULL, start, stuff, end, 308da14cebeSEric Cheng value, flags, KM_NOSLEEP); 309da14cebeSEric Cheng } 310da14cebeSEric Cheng 311da14cebeSEric Cheng return (new_chain); 312da14cebeSEric Cheng } 313da14cebeSEric Cheng 314da14cebeSEric Cheng /* 315da14cebeSEric Cheng * Add VLAN tag to the specified mblk. 316da14cebeSEric Cheng */ 317da14cebeSEric Cheng mblk_t * 318da14cebeSEric Cheng mac_add_vlan_tag(mblk_t *mp, uint_t pri, uint16_t vid) 319da14cebeSEric Cheng { 320da14cebeSEric Cheng mblk_t *hmp; 321da14cebeSEric Cheng struct ether_vlan_header *evhp; 322da14cebeSEric Cheng struct ether_header *ehp; 323da14cebeSEric Cheng uint32_t start, stuff, end, value, flags; 324da14cebeSEric Cheng 325da14cebeSEric Cheng ASSERT(pri != 0 || vid != 0); 326da14cebeSEric Cheng 327da14cebeSEric Cheng /* 328da14cebeSEric Cheng * Allocate an mblk for the new tagged ethernet header, 329da14cebeSEric Cheng * and copy the MAC addresses and ethertype from the 330da14cebeSEric Cheng * original header. 331da14cebeSEric Cheng */ 332da14cebeSEric Cheng 333da14cebeSEric Cheng hmp = allocb(sizeof (struct ether_vlan_header), BPRI_MED); 334da14cebeSEric Cheng if (hmp == NULL) { 335da14cebeSEric Cheng freemsg(mp); 336da14cebeSEric Cheng return (NULL); 337da14cebeSEric Cheng } 338da14cebeSEric Cheng 339da14cebeSEric Cheng evhp = (struct ether_vlan_header *)hmp->b_rptr; 340da14cebeSEric Cheng ehp = (struct ether_header *)mp->b_rptr; 341da14cebeSEric Cheng 342da14cebeSEric Cheng bcopy(ehp, evhp, (ETHERADDRL * 2)); 343da14cebeSEric Cheng evhp->ether_type = ehp->ether_type; 344da14cebeSEric Cheng evhp->ether_tpid = htons(ETHERTYPE_VLAN); 345da14cebeSEric Cheng 346da14cebeSEric Cheng hmp->b_wptr += sizeof (struct ether_vlan_header); 347da14cebeSEric Cheng mp->b_rptr += sizeof (struct ether_header); 348da14cebeSEric Cheng 349da14cebeSEric Cheng /* 350da14cebeSEric Cheng * Free the original message if it's now empty. Link the 351da14cebeSEric Cheng * rest of messages to the header message. 352da14cebeSEric Cheng */ 353da14cebeSEric Cheng hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 354da14cebeSEric Cheng (void) hcksum_assoc(hmp, NULL, NULL, start, stuff, end, value, flags, 355da14cebeSEric Cheng KM_NOSLEEP); 356da14cebeSEric Cheng if (MBLKL(mp) == 0) { 357da14cebeSEric Cheng hmp->b_cont = mp->b_cont; 358da14cebeSEric Cheng freeb(mp); 359da14cebeSEric Cheng } else { 360da14cebeSEric Cheng hmp->b_cont = mp; 361da14cebeSEric Cheng } 362da14cebeSEric Cheng ASSERT(MBLKL(hmp) >= sizeof (struct ether_vlan_header)); 363da14cebeSEric Cheng 364da14cebeSEric Cheng /* 365da14cebeSEric Cheng * Initialize the new TCI (Tag Control Information). 366da14cebeSEric Cheng */ 367da14cebeSEric Cheng evhp->ether_tci = htons(VLAN_TCI(pri, 0, vid)); 368da14cebeSEric Cheng 369da14cebeSEric Cheng return (hmp); 370da14cebeSEric Cheng } 371da14cebeSEric Cheng 372da14cebeSEric Cheng /* 373da14cebeSEric Cheng * Adds a VLAN tag with the specified VID and priority to each mblk of 374da14cebeSEric Cheng * the specified chain. 375da14cebeSEric Cheng */ 376da14cebeSEric Cheng mblk_t * 377da14cebeSEric Cheng mac_add_vlan_tag_chain(mblk_t *mp_chain, uint_t pri, uint16_t vid) 378da14cebeSEric Cheng { 379da14cebeSEric Cheng mblk_t *next_mp, **prev, *mp; 380da14cebeSEric Cheng 381da14cebeSEric Cheng mp = mp_chain; 382da14cebeSEric Cheng prev = &mp_chain; 383da14cebeSEric Cheng 384da14cebeSEric Cheng while (mp != NULL) { 385da14cebeSEric Cheng next_mp = mp->b_next; 386da14cebeSEric Cheng mp->b_next = NULL; 387da14cebeSEric Cheng if ((mp = mac_add_vlan_tag(mp, pri, vid)) == NULL) { 388da14cebeSEric Cheng freemsgchain(next_mp); 389da14cebeSEric Cheng break; 390da14cebeSEric Cheng } 391da14cebeSEric Cheng *prev = mp; 392da14cebeSEric Cheng prev = &mp->b_next; 393da14cebeSEric Cheng mp = mp->b_next = next_mp; 394da14cebeSEric Cheng } 395da14cebeSEric Cheng 396da14cebeSEric Cheng return (mp_chain); 397da14cebeSEric Cheng } 398da14cebeSEric Cheng 399da14cebeSEric Cheng /* 400da14cebeSEric Cheng * Strip VLAN tag 401da14cebeSEric Cheng */ 402da14cebeSEric Cheng mblk_t * 403da14cebeSEric Cheng mac_strip_vlan_tag(mblk_t *mp) 404da14cebeSEric Cheng { 405da14cebeSEric Cheng mblk_t *newmp; 406da14cebeSEric Cheng struct ether_vlan_header *evhp; 407da14cebeSEric Cheng 408da14cebeSEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 409da14cebeSEric Cheng if (ntohs(evhp->ether_tpid) == ETHERTYPE_VLAN) { 410da14cebeSEric Cheng ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 411da14cebeSEric Cheng 412da14cebeSEric Cheng if (DB_REF(mp) > 1) { 413da14cebeSEric Cheng newmp = copymsg(mp); 414da14cebeSEric Cheng if (newmp == NULL) 415da14cebeSEric Cheng return (NULL); 416da14cebeSEric Cheng freemsg(mp); 417da14cebeSEric Cheng mp = newmp; 418da14cebeSEric Cheng } 419da14cebeSEric Cheng 420da14cebeSEric Cheng evhp = (struct ether_vlan_header *)mp->b_rptr; 421da14cebeSEric Cheng 422da14cebeSEric Cheng ovbcopy(mp->b_rptr, mp->b_rptr + VLAN_TAGSZ, 2 * ETHERADDRL); 423da14cebeSEric Cheng mp->b_rptr += VLAN_TAGSZ; 424da14cebeSEric Cheng } 425da14cebeSEric Cheng return (mp); 426da14cebeSEric Cheng } 427da14cebeSEric Cheng 428da14cebeSEric Cheng /* 429da14cebeSEric Cheng * Strip VLAN tag from each mblk of the chain. 430da14cebeSEric Cheng */ 431da14cebeSEric Cheng mblk_t * 432da14cebeSEric Cheng mac_strip_vlan_tag_chain(mblk_t *mp_chain) 433da14cebeSEric Cheng { 434da14cebeSEric Cheng mblk_t *mp, *next_mp, **prev; 435da14cebeSEric Cheng 436da14cebeSEric Cheng mp = mp_chain; 437da14cebeSEric Cheng prev = &mp_chain; 438da14cebeSEric Cheng 439da14cebeSEric Cheng while (mp != NULL) { 440da14cebeSEric Cheng next_mp = mp->b_next; 441da14cebeSEric Cheng mp->b_next = NULL; 442da14cebeSEric Cheng if ((mp = mac_strip_vlan_tag(mp)) == NULL) { 443da14cebeSEric Cheng freemsgchain(next_mp); 444da14cebeSEric Cheng break; 445da14cebeSEric Cheng } 446da14cebeSEric Cheng *prev = mp; 447da14cebeSEric Cheng prev = &mp->b_next; 448da14cebeSEric Cheng mp = mp->b_next = next_mp; 449da14cebeSEric Cheng } 450da14cebeSEric Cheng 451da14cebeSEric Cheng return (mp_chain); 452da14cebeSEric Cheng } 453da14cebeSEric Cheng 454da14cebeSEric Cheng /* 455da14cebeSEric Cheng * Default callback function. Used when the datapath is not yet initialized. 456da14cebeSEric Cheng */ 457da14cebeSEric Cheng /* ARGSUSED */ 458da14cebeSEric Cheng void 459da14cebeSEric Cheng mac_pkt_drop(void *arg, mac_resource_handle_t resource, mblk_t *mp, 460da14cebeSEric Cheng boolean_t loopback) 461da14cebeSEric Cheng { 462da14cebeSEric Cheng mblk_t *mp1 = mp; 463da14cebeSEric Cheng 464da14cebeSEric Cheng while (mp1 != NULL) { 465da14cebeSEric Cheng mp1->b_prev = NULL; 466da14cebeSEric Cheng mp1->b_queue = NULL; 467da14cebeSEric Cheng mp1 = mp1->b_next; 468da14cebeSEric Cheng } 469da14cebeSEric Cheng freemsgchain(mp); 470da14cebeSEric Cheng } 471da14cebeSEric Cheng 472da14cebeSEric Cheng /* 473da14cebeSEric Cheng * Determines the IPv6 header length accounting for all the optional IPv6 474da14cebeSEric Cheng * headers (hop-by-hop, destination, routing and fragment). The header length 475da14cebeSEric Cheng * and next header value (a transport header) is captured. 476da14cebeSEric Cheng * 477da14cebeSEric Cheng * Returns B_FALSE if all the IP headers are not in the same mblk otherwise 478da14cebeSEric Cheng * returns B_TRUE. 479da14cebeSEric Cheng */ 480da14cebeSEric Cheng boolean_t 4810dc2366fSVenugopal Iyer mac_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr, uint16_t *hdr_length, 4820dc2366fSVenugopal Iyer uint8_t *next_hdr, ip6_frag_t **fragp) 483da14cebeSEric Cheng { 484da14cebeSEric Cheng uint16_t length; 485da14cebeSEric Cheng uint_t ehdrlen; 486da14cebeSEric Cheng uint8_t *whereptr; 487da14cebeSEric Cheng uint8_t *nexthdrp; 488da14cebeSEric Cheng ip6_dest_t *desthdr; 489da14cebeSEric Cheng ip6_rthdr_t *rthdr; 490da14cebeSEric Cheng ip6_frag_t *fraghdr; 491da14cebeSEric Cheng 492da14cebeSEric Cheng if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr) 493da14cebeSEric Cheng return (B_FALSE); 494bd670b35SErik Nordmark ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION); 495da14cebeSEric Cheng length = IPV6_HDR_LEN; 496da14cebeSEric Cheng whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */ 497da14cebeSEric Cheng 4980dc2366fSVenugopal Iyer if (fragp != NULL) 4990dc2366fSVenugopal Iyer *fragp = NULL; 5009820c710SBaban Kenkre 501da14cebeSEric Cheng nexthdrp = &ip6h->ip6_nxt; 502da14cebeSEric Cheng while (whereptr < endptr) { 503da14cebeSEric Cheng /* Is there enough left for len + nexthdr? */ 504da14cebeSEric Cheng if (whereptr + MIN_EHDR_LEN > endptr) 505da14cebeSEric Cheng break; 506da14cebeSEric Cheng 507da14cebeSEric Cheng switch (*nexthdrp) { 508da14cebeSEric Cheng case IPPROTO_HOPOPTS: 509da14cebeSEric Cheng case IPPROTO_DSTOPTS: 510da14cebeSEric Cheng /* Assumes the headers are identical for hbh and dst */ 511da14cebeSEric Cheng desthdr = (ip6_dest_t *)whereptr; 512da14cebeSEric Cheng ehdrlen = 8 * (desthdr->ip6d_len + 1); 513da14cebeSEric Cheng if ((uchar_t *)desthdr + ehdrlen > endptr) 514da14cebeSEric Cheng return (B_FALSE); 515da14cebeSEric Cheng nexthdrp = &desthdr->ip6d_nxt; 516da14cebeSEric Cheng break; 517da14cebeSEric Cheng case IPPROTO_ROUTING: 518da14cebeSEric Cheng rthdr = (ip6_rthdr_t *)whereptr; 519da14cebeSEric Cheng ehdrlen = 8 * (rthdr->ip6r_len + 1); 520da14cebeSEric Cheng if ((uchar_t *)rthdr + ehdrlen > endptr) 521da14cebeSEric Cheng return (B_FALSE); 522da14cebeSEric Cheng nexthdrp = &rthdr->ip6r_nxt; 523da14cebeSEric Cheng break; 524da14cebeSEric Cheng case IPPROTO_FRAGMENT: 525da14cebeSEric Cheng fraghdr = (ip6_frag_t *)whereptr; 526da14cebeSEric Cheng ehdrlen = sizeof (ip6_frag_t); 527da14cebeSEric Cheng if ((uchar_t *)&fraghdr[1] > endptr) 528da14cebeSEric Cheng return (B_FALSE); 529da14cebeSEric Cheng nexthdrp = &fraghdr->ip6f_nxt; 5300dc2366fSVenugopal Iyer if (fragp != NULL) 5310dc2366fSVenugopal Iyer *fragp = fraghdr; 532da14cebeSEric Cheng break; 533da14cebeSEric Cheng case IPPROTO_NONE: 534da14cebeSEric Cheng /* No next header means we're finished */ 535da14cebeSEric Cheng default: 536da14cebeSEric Cheng *hdr_length = length; 537da14cebeSEric Cheng *next_hdr = *nexthdrp; 538da14cebeSEric Cheng return (B_TRUE); 539da14cebeSEric Cheng } 540da14cebeSEric Cheng length += ehdrlen; 541da14cebeSEric Cheng whereptr += ehdrlen; 542da14cebeSEric Cheng *hdr_length = length; 543da14cebeSEric Cheng *next_hdr = *nexthdrp; 544da14cebeSEric Cheng } 545da14cebeSEric Cheng switch (*nexthdrp) { 546da14cebeSEric Cheng case IPPROTO_HOPOPTS: 547da14cebeSEric Cheng case IPPROTO_DSTOPTS: 548da14cebeSEric Cheng case IPPROTO_ROUTING: 549da14cebeSEric Cheng case IPPROTO_FRAGMENT: 550da14cebeSEric Cheng /* 551da14cebeSEric Cheng * If any know extension headers are still to be processed, 552da14cebeSEric Cheng * the packet's malformed (or at least all the IP header(s) are 553da14cebeSEric Cheng * not in the same mblk - and that should never happen. 554da14cebeSEric Cheng */ 555da14cebeSEric Cheng return (B_FALSE); 556da14cebeSEric Cheng 557da14cebeSEric Cheng default: 558da14cebeSEric Cheng /* 559da14cebeSEric Cheng * If we get here, we know that all of the IP headers were in 560da14cebeSEric Cheng * the same mblk, even if the ULP header is in the next mblk. 561da14cebeSEric Cheng */ 562da14cebeSEric Cheng *hdr_length = length; 563da14cebeSEric Cheng *next_hdr = *nexthdrp; 564da14cebeSEric Cheng return (B_TRUE); 565da14cebeSEric Cheng } 566da14cebeSEric Cheng } 567da14cebeSEric Cheng 5680dc2366fSVenugopal Iyer /* 5690dc2366fSVenugopal Iyer * The following set of routines are there to take care of interrupt 5700dc2366fSVenugopal Iyer * re-targeting for legacy (fixed) interrupts. Some older versions 5710dc2366fSVenugopal Iyer * of the popular NICs like e1000g do not support MSI-X interrupts 5720dc2366fSVenugopal Iyer * and they reserve fixed interrupts for RX/TX rings. To re-target 5730dc2366fSVenugopal Iyer * these interrupts, PCITOOL ioctls need to be used. 5740dc2366fSVenugopal Iyer */ 575da14cebeSEric Cheng typedef struct mac_dladm_intr { 576da14cebeSEric Cheng int ino; 577da14cebeSEric Cheng int cpu_id; 578da14cebeSEric Cheng char driver_path[MAXPATHLEN]; 579da14cebeSEric Cheng char nexus_path[MAXPATHLEN]; 580da14cebeSEric Cheng } mac_dladm_intr_t; 581da14cebeSEric Cheng 582da14cebeSEric Cheng /* Bind the interrupt to cpu_num */ 583da14cebeSEric Cheng static int 5847ff178cdSJimmy Vetayases mac_set_intr(ldi_handle_t lh, processorid_t cpu_num, int oldcpuid, int ino) 585da14cebeSEric Cheng { 586da14cebeSEric Cheng pcitool_intr_set_t iset; 587da14cebeSEric Cheng int err; 588da14cebeSEric Cheng 5897ff178cdSJimmy Vetayases iset.old_cpu = oldcpuid; 590da14cebeSEric Cheng iset.ino = ino; 591da14cebeSEric Cheng iset.cpu_id = cpu_num; 592da14cebeSEric Cheng iset.user_version = PCITOOL_VERSION; 593da14cebeSEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_SET_INTR, (intptr_t)&iset, FKIOCTL, 594da14cebeSEric Cheng kcred, NULL); 595da14cebeSEric Cheng 596da14cebeSEric Cheng return (err); 597da14cebeSEric Cheng } 598da14cebeSEric Cheng 599da14cebeSEric Cheng /* 600da14cebeSEric Cheng * Search interrupt information. iget is filled in with the info to search 601da14cebeSEric Cheng */ 602da14cebeSEric Cheng static boolean_t 603da14cebeSEric Cheng mac_search_intrinfo(pcitool_intr_get_t *iget_p, mac_dladm_intr_t *dln) 604da14cebeSEric Cheng { 605da14cebeSEric Cheng int i; 606da14cebeSEric Cheng char driver_path[2 * MAXPATHLEN]; 607da14cebeSEric Cheng 608da14cebeSEric Cheng for (i = 0; i < iget_p->num_devs; i++) { 609da14cebeSEric Cheng (void) strlcpy(driver_path, iget_p->dev[i].path, MAXPATHLEN); 610da14cebeSEric Cheng (void) snprintf(&driver_path[strlen(driver_path)], MAXPATHLEN, 611da14cebeSEric Cheng ":%s%d", iget_p->dev[i].driver_name, 612da14cebeSEric Cheng iget_p->dev[i].dev_inst); 613da14cebeSEric Cheng /* Match the device path for the device path */ 614da14cebeSEric Cheng if (strcmp(driver_path, dln->driver_path) == 0) { 615da14cebeSEric Cheng dln->ino = iget_p->ino; 616da14cebeSEric Cheng dln->cpu_id = iget_p->cpu_id; 617da14cebeSEric Cheng return (B_TRUE); 618da14cebeSEric Cheng } 619da14cebeSEric Cheng } 620da14cebeSEric Cheng return (B_FALSE); 621da14cebeSEric Cheng } 622da14cebeSEric Cheng 623da14cebeSEric Cheng /* 624da14cebeSEric Cheng * Get information about ino, i.e. if this is the interrupt for our 625da14cebeSEric Cheng * device and where it is bound etc. 626da14cebeSEric Cheng */ 627da14cebeSEric Cheng static boolean_t 6287ff178cdSJimmy Vetayases mac_get_single_intr(ldi_handle_t lh, int oldcpuid, int ino, 6297ff178cdSJimmy Vetayases mac_dladm_intr_t *dln) 630da14cebeSEric Cheng { 631da14cebeSEric Cheng pcitool_intr_get_t *iget_p; 632da14cebeSEric Cheng int ipsz; 633da14cebeSEric Cheng int nipsz; 634da14cebeSEric Cheng int err; 635da14cebeSEric Cheng uint8_t inum; 636da14cebeSEric Cheng 637da14cebeSEric Cheng /* 638da14cebeSEric Cheng * Check if SLEEP is OK, i.e if could come here in response to 639da14cebeSEric Cheng * changing the fanout due to some callback from the driver, say 640da14cebeSEric Cheng * link speed changes. 641da14cebeSEric Cheng */ 642da14cebeSEric Cheng ipsz = PCITOOL_IGET_SIZE(0); 643da14cebeSEric Cheng iget_p = kmem_zalloc(ipsz, KM_SLEEP); 644da14cebeSEric Cheng 645da14cebeSEric Cheng iget_p->num_devs_ret = 0; 646da14cebeSEric Cheng iget_p->user_version = PCITOOL_VERSION; 6477ff178cdSJimmy Vetayases iget_p->cpu_id = oldcpuid; 648da14cebeSEric Cheng iget_p->ino = ino; 649da14cebeSEric Cheng 650da14cebeSEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 651da14cebeSEric Cheng FKIOCTL, kcred, NULL); 652da14cebeSEric Cheng if (err != 0) { 653da14cebeSEric Cheng kmem_free(iget_p, ipsz); 654da14cebeSEric Cheng return (B_FALSE); 655da14cebeSEric Cheng } 656da14cebeSEric Cheng if (iget_p->num_devs == 0) { 657da14cebeSEric Cheng kmem_free(iget_p, ipsz); 658da14cebeSEric Cheng return (B_FALSE); 659da14cebeSEric Cheng } 660da14cebeSEric Cheng inum = iget_p->num_devs; 661da14cebeSEric Cheng if (iget_p->num_devs_ret < iget_p->num_devs) { 662da14cebeSEric Cheng /* Reallocate */ 663da14cebeSEric Cheng nipsz = PCITOOL_IGET_SIZE(iget_p->num_devs); 664da14cebeSEric Cheng 665da14cebeSEric Cheng kmem_free(iget_p, ipsz); 666da14cebeSEric Cheng ipsz = nipsz; 667da14cebeSEric Cheng iget_p = kmem_zalloc(ipsz, KM_SLEEP); 668da14cebeSEric Cheng 669da14cebeSEric Cheng iget_p->num_devs_ret = inum; 6707ff178cdSJimmy Vetayases iget_p->cpu_id = oldcpuid; 671da14cebeSEric Cheng iget_p->ino = ino; 672da14cebeSEric Cheng iget_p->user_version = PCITOOL_VERSION; 673da14cebeSEric Cheng err = ldi_ioctl(lh, PCITOOL_DEVICE_GET_INTR, (intptr_t)iget_p, 674da14cebeSEric Cheng FKIOCTL, kcred, NULL); 675da14cebeSEric Cheng if (err != 0) { 676da14cebeSEric Cheng kmem_free(iget_p, ipsz); 677da14cebeSEric Cheng return (B_FALSE); 678da14cebeSEric Cheng } 679da14cebeSEric Cheng /* defensive */ 680da14cebeSEric Cheng if (iget_p->num_devs != iget_p->num_devs_ret) { 681da14cebeSEric Cheng kmem_free(iget_p, ipsz); 682da14cebeSEric Cheng return (B_FALSE); 683da14cebeSEric Cheng } 684da14cebeSEric Cheng } 685da14cebeSEric Cheng 686da14cebeSEric Cheng if (mac_search_intrinfo(iget_p, dln)) { 687da14cebeSEric Cheng kmem_free(iget_p, ipsz); 688da14cebeSEric Cheng return (B_TRUE); 689da14cebeSEric Cheng } 690da14cebeSEric Cheng kmem_free(iget_p, ipsz); 691da14cebeSEric Cheng return (B_FALSE); 692da14cebeSEric Cheng } 693da14cebeSEric Cheng 694da14cebeSEric Cheng /* 695da14cebeSEric Cheng * Get the interrupts and check each one to see if it is for our device. 696da14cebeSEric Cheng */ 697da14cebeSEric Cheng static int 698da14cebeSEric Cheng mac_validate_intr(ldi_handle_t lh, mac_dladm_intr_t *dln, processorid_t cpuid) 699da14cebeSEric Cheng { 700da14cebeSEric Cheng pcitool_intr_info_t intr_info; 701da14cebeSEric Cheng int err; 702da14cebeSEric Cheng int ino; 7037ff178cdSJimmy Vetayases int oldcpuid; 704da14cebeSEric Cheng 705da14cebeSEric Cheng err = ldi_ioctl(lh, PCITOOL_SYSTEM_INTR_INFO, (intptr_t)&intr_info, 706da14cebeSEric Cheng FKIOCTL, kcred, NULL); 707da14cebeSEric Cheng if (err != 0) 708da14cebeSEric Cheng return (-1); 709da14cebeSEric Cheng 7107ff178cdSJimmy Vetayases for (oldcpuid = 0; oldcpuid < intr_info.num_cpu; oldcpuid++) { 711da14cebeSEric Cheng for (ino = 0; ino < intr_info.num_intr; ino++) { 7127ff178cdSJimmy Vetayases if (mac_get_single_intr(lh, oldcpuid, ino, dln)) { 713da14cebeSEric Cheng if (dln->cpu_id == cpuid) 714da14cebeSEric Cheng return (0); 715da14cebeSEric Cheng return (1); 716da14cebeSEric Cheng } 717da14cebeSEric Cheng } 7187ff178cdSJimmy Vetayases } 719da14cebeSEric Cheng return (-1); 720da14cebeSEric Cheng } 721da14cebeSEric Cheng 722da14cebeSEric Cheng /* 723da14cebeSEric Cheng * Obtain the nexus parent node info. for mdip. 724da14cebeSEric Cheng */ 725da14cebeSEric Cheng static dev_info_t * 726da14cebeSEric Cheng mac_get_nexus_node(dev_info_t *mdip, mac_dladm_intr_t *dln) 727da14cebeSEric Cheng { 728da14cebeSEric Cheng struct dev_info *tdip = (struct dev_info *)mdip; 729da14cebeSEric Cheng struct ddi_minor_data *minordata; 730da14cebeSEric Cheng int circ; 731da14cebeSEric Cheng dev_info_t *pdip; 732da14cebeSEric Cheng char pathname[MAXPATHLEN]; 733da14cebeSEric Cheng 734da14cebeSEric Cheng while (tdip != NULL) { 735c36aa31cSEric Cheng /* 736c36aa31cSEric Cheng * The netboot code could call this function while walking the 737c36aa31cSEric Cheng * device tree so we need to use ndi_devi_tryenter() here to 738c36aa31cSEric Cheng * avoid deadlock. 739c36aa31cSEric Cheng */ 740c36aa31cSEric Cheng if (ndi_devi_tryenter((dev_info_t *)tdip, &circ) == 0) 741c36aa31cSEric Cheng break; 742c36aa31cSEric Cheng 743da14cebeSEric Cheng for (minordata = tdip->devi_minor; minordata != NULL; 744da14cebeSEric Cheng minordata = minordata->next) { 745da14cebeSEric Cheng if (strncmp(minordata->ddm_node_type, DDI_NT_INTRCTL, 746da14cebeSEric Cheng strlen(DDI_NT_INTRCTL)) == 0) { 747da14cebeSEric Cheng pdip = minordata->dip; 748da14cebeSEric Cheng (void) ddi_pathname(pdip, pathname); 749da14cebeSEric Cheng (void) snprintf(dln->nexus_path, MAXPATHLEN, 750da14cebeSEric Cheng "/devices%s:intr", pathname); 751da14cebeSEric Cheng (void) ddi_pathname_minor(minordata, pathname); 752da14cebeSEric Cheng ndi_devi_exit((dev_info_t *)tdip, circ); 753da14cebeSEric Cheng return (pdip); 754da14cebeSEric Cheng } 755da14cebeSEric Cheng } 756da14cebeSEric Cheng ndi_devi_exit((dev_info_t *)tdip, circ); 757da14cebeSEric Cheng tdip = tdip->devi_parent; 758da14cebeSEric Cheng } 759da14cebeSEric Cheng return (NULL); 760da14cebeSEric Cheng } 761da14cebeSEric Cheng 762da14cebeSEric Cheng /* 763da14cebeSEric Cheng * For a primary MAC client, if the user has set a list or CPUs or 764da14cebeSEric Cheng * we have obtained it implicitly, we try to retarget the interrupt 765da14cebeSEric Cheng * for that device on one of the CPUs in the list. 766da14cebeSEric Cheng * We assign the interrupt to the same CPU as the poll thread. 767da14cebeSEric Cheng */ 768da14cebeSEric Cheng static boolean_t 769da14cebeSEric Cheng mac_check_interrupt_binding(dev_info_t *mdip, int32_t cpuid) 770da14cebeSEric Cheng { 771da14cebeSEric Cheng ldi_handle_t lh = NULL; 772da14cebeSEric Cheng ldi_ident_t li = NULL; 773da14cebeSEric Cheng int err; 774da14cebeSEric Cheng int ret; 775da14cebeSEric Cheng mac_dladm_intr_t dln; 776da14cebeSEric Cheng dev_info_t *dip; 777da14cebeSEric Cheng struct ddi_minor_data *minordata; 778da14cebeSEric Cheng 779da14cebeSEric Cheng dln.nexus_path[0] = '\0'; 780da14cebeSEric Cheng dln.driver_path[0] = '\0'; 781da14cebeSEric Cheng 782da14cebeSEric Cheng minordata = ((struct dev_info *)mdip)->devi_minor; 783da14cebeSEric Cheng while (minordata != NULL) { 784da14cebeSEric Cheng if (minordata->type == DDM_MINOR) 785da14cebeSEric Cheng break; 786da14cebeSEric Cheng minordata = minordata->next; 787da14cebeSEric Cheng } 788da14cebeSEric Cheng if (minordata == NULL) 789da14cebeSEric Cheng return (B_FALSE); 790da14cebeSEric Cheng 791da14cebeSEric Cheng (void) ddi_pathname_minor(minordata, dln.driver_path); 792da14cebeSEric Cheng 793da14cebeSEric Cheng dip = mac_get_nexus_node(mdip, &dln); 794da14cebeSEric Cheng /* defensive */ 795da14cebeSEric Cheng if (dip == NULL) 796da14cebeSEric Cheng return (B_FALSE); 797da14cebeSEric Cheng 798da14cebeSEric Cheng err = ldi_ident_from_major(ddi_driver_major(dip), &li); 799da14cebeSEric Cheng if (err != 0) 800da14cebeSEric Cheng return (B_FALSE); 801da14cebeSEric Cheng 802da14cebeSEric Cheng err = ldi_open_by_name(dln.nexus_path, FREAD|FWRITE, kcred, &lh, li); 803da14cebeSEric Cheng if (err != 0) 804da14cebeSEric Cheng return (B_FALSE); 805da14cebeSEric Cheng 806da14cebeSEric Cheng ret = mac_validate_intr(lh, &dln, cpuid); 807da14cebeSEric Cheng if (ret < 0) { 808da14cebeSEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 809da14cebeSEric Cheng return (B_FALSE); 810da14cebeSEric Cheng } 811da14cebeSEric Cheng /* cmn_note? */ 812da14cebeSEric Cheng if (ret != 0) 8137ff178cdSJimmy Vetayases if ((err = (mac_set_intr(lh, cpuid, dln.cpu_id, dln.ino))) 8147ff178cdSJimmy Vetayases != 0) { 815da14cebeSEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 816da14cebeSEric Cheng return (B_FALSE); 817da14cebeSEric Cheng } 818da14cebeSEric Cheng (void) ldi_close(lh, FREAD|FWRITE, kcred); 819da14cebeSEric Cheng return (B_TRUE); 820da14cebeSEric Cheng } 821da14cebeSEric Cheng 822da14cebeSEric Cheng void 823da14cebeSEric Cheng mac_client_set_intr_cpu(void *arg, mac_client_handle_t mch, int32_t cpuid) 824da14cebeSEric Cheng { 825da14cebeSEric Cheng dev_info_t *mdip = (dev_info_t *)arg; 826da14cebeSEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 827da14cebeSEric Cheng mac_resource_props_t *mrp; 828da14cebeSEric Cheng mac_perim_handle_t mph; 8290dc2366fSVenugopal Iyer flow_entry_t *flent = mcip->mci_flent; 8300dc2366fSVenugopal Iyer mac_soft_ring_set_t *rx_srs; 8310dc2366fSVenugopal Iyer mac_cpus_t *srs_cpu; 832da14cebeSEric Cheng 8330dc2366fSVenugopal Iyer if (!mac_check_interrupt_binding(mdip, cpuid)) 8340dc2366fSVenugopal Iyer cpuid = -1; 835da14cebeSEric Cheng mac_perim_enter_by_mh((mac_handle_t)mcip->mci_mip, &mph); 836da14cebeSEric Cheng mrp = MCIP_RESOURCE_PROPS(mcip); 8370dc2366fSVenugopal Iyer mrp->mrp_rx_intr_cpu = cpuid; 8380dc2366fSVenugopal Iyer if (flent != NULL && flent->fe_rx_srs_cnt == 2) { 8390dc2366fSVenugopal Iyer rx_srs = flent->fe_rx_srs[1]; 8400dc2366fSVenugopal Iyer srs_cpu = &rx_srs->srs_cpu; 8410dc2366fSVenugopal Iyer srs_cpu->mc_rx_intr_cpu = cpuid; 8420dc2366fSVenugopal Iyer } 843da14cebeSEric Cheng mac_perim_exit(mph); 844da14cebeSEric Cheng } 845da14cebeSEric Cheng 846da14cebeSEric Cheng int32_t 847da14cebeSEric Cheng mac_client_intr_cpu(mac_client_handle_t mch) 848da14cebeSEric Cheng { 849da14cebeSEric Cheng mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 850da14cebeSEric Cheng mac_cpus_t *srs_cpu; 851da14cebeSEric Cheng mac_soft_ring_set_t *rx_srs; 852da14cebeSEric Cheng flow_entry_t *flent = mcip->mci_flent; 853da14cebeSEric Cheng mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 8540dc2366fSVenugopal Iyer mac_ring_t *ring; 8550dc2366fSVenugopal Iyer mac_intr_t *mintr; 856da14cebeSEric Cheng 857da14cebeSEric Cheng /* 858da14cebeSEric Cheng * Check if we need to retarget the interrupt. We do this only 859da14cebeSEric Cheng * for the primary MAC client. We do this if we have the only 860da14cebeSEric Cheng * exclusive ring in the group. 861da14cebeSEric Cheng */ 862da14cebeSEric Cheng if (mac_is_primary_client(mcip) && flent->fe_rx_srs_cnt == 2) { 863da14cebeSEric Cheng rx_srs = flent->fe_rx_srs[1]; 864da14cebeSEric Cheng srs_cpu = &rx_srs->srs_cpu; 8650dc2366fSVenugopal Iyer ring = rx_srs->srs_ring; 8660dc2366fSVenugopal Iyer mintr = &ring->mr_info.mri_intr; 8670dc2366fSVenugopal Iyer /* 8680dc2366fSVenugopal Iyer * If ddi_handle is present or the poll CPU is 8690dc2366fSVenugopal Iyer * already bound to the interrupt CPU, return -1. 8700dc2366fSVenugopal Iyer */ 8710dc2366fSVenugopal Iyer if (mintr->mi_ddi_handle != NULL || 8720dc2366fSVenugopal Iyer ((mrp->mrp_ncpus != 0) && 8730dc2366fSVenugopal Iyer (mrp->mrp_rx_intr_cpu == srs_cpu->mc_rx_pollid))) { 874da14cebeSEric Cheng return (-1); 8750dc2366fSVenugopal Iyer } 8760dc2366fSVenugopal Iyer return (srs_cpu->mc_rx_pollid); 877da14cebeSEric Cheng } 878da14cebeSEric Cheng return (-1); 879da14cebeSEric Cheng } 880da14cebeSEric Cheng 881da14cebeSEric Cheng void * 882da14cebeSEric Cheng mac_get_devinfo(mac_handle_t mh) 883da14cebeSEric Cheng { 884da14cebeSEric Cheng mac_impl_t *mip = (mac_impl_t *)mh; 885da14cebeSEric Cheng 886da14cebeSEric Cheng return ((void *)mip->mi_dip); 887da14cebeSEric Cheng } 888ae6aa22aSVenugopal Iyer 8899820c710SBaban Kenkre #define PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1]) 890ae6aa22aSVenugopal Iyer #define PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3]) 891ae6aa22aSVenugopal Iyer #define PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5]) 892ae6aa22aSVenugopal Iyer 893ae6aa22aSVenugopal Iyer uint64_t 894ae6aa22aSVenugopal Iyer mac_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy, boolean_t is_outbound) 895ae6aa22aSVenugopal Iyer { 896ae6aa22aSVenugopal Iyer struct ether_header *ehp; 897ae6aa22aSVenugopal Iyer uint64_t hash = 0; 898ae6aa22aSVenugopal Iyer uint16_t sap; 899ae6aa22aSVenugopal Iyer uint_t skip_len; 900ae6aa22aSVenugopal Iyer uint8_t proto; 9019820c710SBaban Kenkre boolean_t ip_fragmented; 902ae6aa22aSVenugopal Iyer 903ae6aa22aSVenugopal Iyer /* 904ae6aa22aSVenugopal Iyer * We may want to have one of these per MAC type plugin in the 905ae6aa22aSVenugopal Iyer * future. For now supports only ethernet. 906ae6aa22aSVenugopal Iyer */ 907ae6aa22aSVenugopal Iyer if (media != DL_ETHER) 908ae6aa22aSVenugopal Iyer return (0L); 909ae6aa22aSVenugopal Iyer 910ae6aa22aSVenugopal Iyer /* for now we support only outbound packets */ 911ae6aa22aSVenugopal Iyer ASSERT(is_outbound); 912ae6aa22aSVenugopal Iyer ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))); 913ae6aa22aSVenugopal Iyer ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 914ae6aa22aSVenugopal Iyer 915ae6aa22aSVenugopal Iyer /* compute L2 hash */ 916ae6aa22aSVenugopal Iyer 917ae6aa22aSVenugopal Iyer ehp = (struct ether_header *)mp->b_rptr; 918ae6aa22aSVenugopal Iyer 919ae6aa22aSVenugopal Iyer if ((policy & MAC_PKT_HASH_L2) != 0) { 920ae6aa22aSVenugopal Iyer uchar_t *mac_src = ehp->ether_shost.ether_addr_octet; 921ae6aa22aSVenugopal Iyer uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet; 922ae6aa22aSVenugopal Iyer hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst); 923ae6aa22aSVenugopal Iyer policy &= ~MAC_PKT_HASH_L2; 924ae6aa22aSVenugopal Iyer } 925ae6aa22aSVenugopal Iyer 926ae6aa22aSVenugopal Iyer if (policy == 0) 927ae6aa22aSVenugopal Iyer goto done; 928ae6aa22aSVenugopal Iyer 929ae6aa22aSVenugopal Iyer /* skip ethernet header */ 930ae6aa22aSVenugopal Iyer 931ae6aa22aSVenugopal Iyer sap = ntohs(ehp->ether_type); 932ae6aa22aSVenugopal Iyer if (sap == ETHERTYPE_VLAN) { 933ae6aa22aSVenugopal Iyer struct ether_vlan_header *evhp; 934ae6aa22aSVenugopal Iyer mblk_t *newmp = NULL; 935ae6aa22aSVenugopal Iyer 936ae6aa22aSVenugopal Iyer skip_len = sizeof (struct ether_vlan_header); 937ae6aa22aSVenugopal Iyer if (MBLKL(mp) < skip_len) { 938ae6aa22aSVenugopal Iyer /* the vlan tag is the payload, pull up first */ 939ae6aa22aSVenugopal Iyer newmp = msgpullup(mp, -1); 940ae6aa22aSVenugopal Iyer if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) { 941ae6aa22aSVenugopal Iyer goto done; 942ae6aa22aSVenugopal Iyer } 943ae6aa22aSVenugopal Iyer evhp = (struct ether_vlan_header *)newmp->b_rptr; 944ae6aa22aSVenugopal Iyer } else { 945ae6aa22aSVenugopal Iyer evhp = (struct ether_vlan_header *)mp->b_rptr; 946ae6aa22aSVenugopal Iyer } 947ae6aa22aSVenugopal Iyer 948ae6aa22aSVenugopal Iyer sap = ntohs(evhp->ether_type); 949ae6aa22aSVenugopal Iyer freemsg(newmp); 950ae6aa22aSVenugopal Iyer } else { 951ae6aa22aSVenugopal Iyer skip_len = sizeof (struct ether_header); 952ae6aa22aSVenugopal Iyer } 953ae6aa22aSVenugopal Iyer 954ae6aa22aSVenugopal Iyer /* if ethernet header is in its own mblk, skip it */ 955ae6aa22aSVenugopal Iyer if (MBLKL(mp) <= skip_len) { 956ae6aa22aSVenugopal Iyer skip_len -= MBLKL(mp); 957ae6aa22aSVenugopal Iyer mp = mp->b_cont; 958ae6aa22aSVenugopal Iyer if (mp == NULL) 959ae6aa22aSVenugopal Iyer goto done; 960ae6aa22aSVenugopal Iyer } 961ae6aa22aSVenugopal Iyer 962ae6aa22aSVenugopal Iyer sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap; 963ae6aa22aSVenugopal Iyer 964ae6aa22aSVenugopal Iyer /* compute IP src/dst addresses hash and skip IPv{4,6} header */ 965ae6aa22aSVenugopal Iyer 966ae6aa22aSVenugopal Iyer switch (sap) { 967ae6aa22aSVenugopal Iyer case ETHERTYPE_IP: { 968ae6aa22aSVenugopal Iyer ipha_t *iphp; 969ae6aa22aSVenugopal Iyer 970ae6aa22aSVenugopal Iyer /* 971ae6aa22aSVenugopal Iyer * If the header is not aligned or the header doesn't fit 972ae6aa22aSVenugopal Iyer * in the mblk, bail now. Note that this may cause packets 973ae6aa22aSVenugopal Iyer * reordering. 974ae6aa22aSVenugopal Iyer */ 975ae6aa22aSVenugopal Iyer iphp = (ipha_t *)(mp->b_rptr + skip_len); 976ae6aa22aSVenugopal Iyer if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) || 977ae6aa22aSVenugopal Iyer !OK_32PTR((char *)iphp)) 978ae6aa22aSVenugopal Iyer goto done; 979ae6aa22aSVenugopal Iyer 980ae6aa22aSVenugopal Iyer proto = iphp->ipha_protocol; 981ae6aa22aSVenugopal Iyer skip_len += IPH_HDR_LENGTH(iphp); 982ae6aa22aSVenugopal Iyer 9839820c710SBaban Kenkre /* Check if the packet is fragmented. */ 9849820c710SBaban Kenkre ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) & 9859820c710SBaban Kenkre IPH_OFFSET; 9869820c710SBaban Kenkre 9879820c710SBaban Kenkre /* 9889820c710SBaban Kenkre * For fragmented packets, use addresses in addition to 9899820c710SBaban Kenkre * the frag_id to generate the hash inorder to get 9909820c710SBaban Kenkre * better distribution. 9919820c710SBaban Kenkre */ 9929820c710SBaban Kenkre if (ip_fragmented || (policy & MAC_PKT_HASH_L3) != 0) { 993ae6aa22aSVenugopal Iyer uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src); 994ae6aa22aSVenugopal Iyer uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst); 995ae6aa22aSVenugopal Iyer 996ae6aa22aSVenugopal Iyer hash ^= (PKT_HASH_4BYTES(ip_src) ^ 997ae6aa22aSVenugopal Iyer PKT_HASH_4BYTES(ip_dst)); 998ae6aa22aSVenugopal Iyer policy &= ~MAC_PKT_HASH_L3; 999ae6aa22aSVenugopal Iyer } 10009820c710SBaban Kenkre 10019820c710SBaban Kenkre if (ip_fragmented) { 10029820c710SBaban Kenkre uint8_t *identp = (uint8_t *)&iphp->ipha_ident; 10039820c710SBaban Kenkre hash ^= PKT_HASH_2BYTES(identp); 10049820c710SBaban Kenkre goto done; 10059820c710SBaban Kenkre } 1006ae6aa22aSVenugopal Iyer break; 1007ae6aa22aSVenugopal Iyer } 1008ae6aa22aSVenugopal Iyer case ETHERTYPE_IPV6: { 1009ae6aa22aSVenugopal Iyer ip6_t *ip6hp; 10100dc2366fSVenugopal Iyer ip6_frag_t *frag = NULL; 1011ae6aa22aSVenugopal Iyer uint16_t hdr_length; 1012ae6aa22aSVenugopal Iyer 1013ae6aa22aSVenugopal Iyer /* 1014ae6aa22aSVenugopal Iyer * If the header is not aligned or the header doesn't fit 1015ae6aa22aSVenugopal Iyer * in the mblk, bail now. Note that this may cause packets 1016ae6aa22aSVenugopal Iyer * reordering. 1017ae6aa22aSVenugopal Iyer */ 1018ae6aa22aSVenugopal Iyer 1019ae6aa22aSVenugopal Iyer ip6hp = (ip6_t *)(mp->b_rptr + skip_len); 1020ae6aa22aSVenugopal Iyer if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) || 1021ae6aa22aSVenugopal Iyer !OK_32PTR((char *)ip6hp)) 1022ae6aa22aSVenugopal Iyer goto done; 1023ae6aa22aSVenugopal Iyer 10240dc2366fSVenugopal Iyer if (!mac_ip_hdr_length_v6(ip6hp, mp->b_wptr, &hdr_length, 10250dc2366fSVenugopal Iyer &proto, &frag)) 1026ae6aa22aSVenugopal Iyer goto done; 1027ae6aa22aSVenugopal Iyer skip_len += hdr_length; 1028ae6aa22aSVenugopal Iyer 10299820c710SBaban Kenkre /* 10309820c710SBaban Kenkre * For fragmented packets, use addresses in addition to 10319820c710SBaban Kenkre * the frag_id to generate the hash inorder to get 10329820c710SBaban Kenkre * better distribution. 10339820c710SBaban Kenkre */ 10340dc2366fSVenugopal Iyer if (frag != NULL || (policy & MAC_PKT_HASH_L3) != 0) { 1035ae6aa22aSVenugopal Iyer uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]); 1036ae6aa22aSVenugopal Iyer uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]); 1037ae6aa22aSVenugopal Iyer 1038ae6aa22aSVenugopal Iyer hash ^= (PKT_HASH_4BYTES(ip_src) ^ 1039ae6aa22aSVenugopal Iyer PKT_HASH_4BYTES(ip_dst)); 1040ae6aa22aSVenugopal Iyer policy &= ~MAC_PKT_HASH_L3; 1041ae6aa22aSVenugopal Iyer } 10429820c710SBaban Kenkre 10430dc2366fSVenugopal Iyer if (frag != NULL) { 10440dc2366fSVenugopal Iyer uint8_t *identp = (uint8_t *)&frag->ip6f_ident; 10459820c710SBaban Kenkre hash ^= PKT_HASH_4BYTES(identp); 10469820c710SBaban Kenkre goto done; 10479820c710SBaban Kenkre } 1048ae6aa22aSVenugopal Iyer break; 1049ae6aa22aSVenugopal Iyer } 1050ae6aa22aSVenugopal Iyer default: 1051ae6aa22aSVenugopal Iyer goto done; 1052ae6aa22aSVenugopal Iyer } 1053ae6aa22aSVenugopal Iyer 1054ae6aa22aSVenugopal Iyer if (policy == 0) 1055ae6aa22aSVenugopal Iyer goto done; 1056ae6aa22aSVenugopal Iyer 1057ae6aa22aSVenugopal Iyer /* if ip header is in its own mblk, skip it */ 1058ae6aa22aSVenugopal Iyer if (MBLKL(mp) <= skip_len) { 1059ae6aa22aSVenugopal Iyer skip_len -= MBLKL(mp); 1060ae6aa22aSVenugopal Iyer mp = mp->b_cont; 1061ae6aa22aSVenugopal Iyer if (mp == NULL) 1062ae6aa22aSVenugopal Iyer goto done; 1063ae6aa22aSVenugopal Iyer } 1064ae6aa22aSVenugopal Iyer 1065ae6aa22aSVenugopal Iyer /* parse ULP header */ 1066ae6aa22aSVenugopal Iyer again: 1067ae6aa22aSVenugopal Iyer switch (proto) { 1068ae6aa22aSVenugopal Iyer case IPPROTO_TCP: 1069ae6aa22aSVenugopal Iyer case IPPROTO_UDP: 1070ae6aa22aSVenugopal Iyer case IPPROTO_ESP: 1071ae6aa22aSVenugopal Iyer case IPPROTO_SCTP: 1072ae6aa22aSVenugopal Iyer /* 1073ae6aa22aSVenugopal Iyer * These Internet Protocols are intentionally designed 1074ae6aa22aSVenugopal Iyer * for hashing from the git-go. Port numbers are in the first 1075ae6aa22aSVenugopal Iyer * word for transports, SPI is first for ESP. 1076ae6aa22aSVenugopal Iyer */ 1077ae6aa22aSVenugopal Iyer if (mp->b_rptr + skip_len + 4 > mp->b_wptr) 1078ae6aa22aSVenugopal Iyer goto done; 1079ae6aa22aSVenugopal Iyer hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len)); 1080ae6aa22aSVenugopal Iyer break; 1081ae6aa22aSVenugopal Iyer 1082ae6aa22aSVenugopal Iyer case IPPROTO_AH: { 1083ae6aa22aSVenugopal Iyer ah_t *ah = (ah_t *)(mp->b_rptr + skip_len); 1084ae6aa22aSVenugopal Iyer uint_t ah_length = AH_TOTAL_LEN(ah); 1085ae6aa22aSVenugopal Iyer 1086ae6aa22aSVenugopal Iyer if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr) 1087ae6aa22aSVenugopal Iyer goto done; 1088ae6aa22aSVenugopal Iyer 1089ae6aa22aSVenugopal Iyer proto = ah->ah_nexthdr; 1090ae6aa22aSVenugopal Iyer skip_len += ah_length; 1091ae6aa22aSVenugopal Iyer 1092ae6aa22aSVenugopal Iyer /* if AH header is in its own mblk, skip it */ 1093ae6aa22aSVenugopal Iyer if (MBLKL(mp) <= skip_len) { 1094ae6aa22aSVenugopal Iyer skip_len -= MBLKL(mp); 1095ae6aa22aSVenugopal Iyer mp = mp->b_cont; 1096ae6aa22aSVenugopal Iyer if (mp == NULL) 1097ae6aa22aSVenugopal Iyer goto done; 1098ae6aa22aSVenugopal Iyer } 1099ae6aa22aSVenugopal Iyer 1100ae6aa22aSVenugopal Iyer goto again; 1101ae6aa22aSVenugopal Iyer } 1102ae6aa22aSVenugopal Iyer } 1103ae6aa22aSVenugopal Iyer 1104ae6aa22aSVenugopal Iyer done: 1105ae6aa22aSVenugopal Iyer return (hash); 1106ae6aa22aSVenugopal Iyer } 1107