1da5577f0SRobert Mustacchi /* 2da5577f0SRobert Mustacchi * This file and its contents are supplied under the terms of the 3da5577f0SRobert Mustacchi * Common Development and Distribution License ("CDDL"), version 1.0. 4da5577f0SRobert Mustacchi * You may only use this file in accordance with the terms of version 5da5577f0SRobert Mustacchi * 1.0 of the CDDL. 6da5577f0SRobert Mustacchi * 7da5577f0SRobert Mustacchi * A full copy of the text of the CDDL should have accompanied this 8da5577f0SRobert Mustacchi * source. A copy of the CDDL is also available via the Internet at 9da5577f0SRobert Mustacchi * http://www.illumos.org/license/CDDL. 10da5577f0SRobert Mustacchi */ 11da5577f0SRobert Mustacchi 12da5577f0SRobert Mustacchi /* 13da5577f0SRobert Mustacchi * Copyright 2015 OmniTI Computer Consulting, Inc. All rights reserved. 14*8d5069bcSRyan Zezeski * Copyright 2019 Joyent, Inc. 15da5577f0SRobert Mustacchi */ 16da5577f0SRobert Mustacchi 17da5577f0SRobert Mustacchi #include "i40e_sw.h" 18da5577f0SRobert Mustacchi 19da5577f0SRobert Mustacchi /* 20da5577f0SRobert Mustacchi * --------------------------------------------------------- 21da5577f0SRobert Mustacchi * Buffer and Memory Management, Receiving, and Transmitting 22da5577f0SRobert Mustacchi * --------------------------------------------------------- 23da5577f0SRobert Mustacchi * 24da5577f0SRobert Mustacchi * Each physical function (PF), which is what we think of as an instance of the 25da5577f0SRobert Mustacchi * device driver, has a series of associated transmit and receive queue pairs. 26da5577f0SRobert Mustacchi * Effectively, what we think of in MAC as rings. Each of these has their own 27da5577f0SRobert Mustacchi * ring of descriptors which is used as part of doing DMA activity. 28da5577f0SRobert Mustacchi * 29da5577f0SRobert Mustacchi * The transmit ring of descriptors are 16-byte entries which are used to send 30da5577f0SRobert Mustacchi * packets, program filters, etc. The receive ring of descriptors are either 31da5577f0SRobert Mustacchi * 16-byte or 32-bytes each. At the moment, we opt to use the larger descriptor 32da5577f0SRobert Mustacchi * format so that we're in a better position if we ever want to leverage that 33da5577f0SRobert Mustacchi * information later on. 34da5577f0SRobert Mustacchi * 35da5577f0SRobert Mustacchi * However, these rings are just for descriptors, they don't talk or deal with 36da5577f0SRobert Mustacchi * how we actually store the memory that we need for DMA or the associated 37da5577f0SRobert Mustacchi * information that we need for keeping track of message blocks. To correspond 38da5577f0SRobert Mustacchi * to the hardware descriptor ring which is how we communicate with hardware, we 39da5577f0SRobert Mustacchi * introduce a control block which keeps track of our required metadata like DMA 40da5577f0SRobert Mustacchi * mappings. 41da5577f0SRobert Mustacchi * 42da5577f0SRobert Mustacchi * There are two main considerations that dictate how much memory and buffers 43da5577f0SRobert Mustacchi * we end up allocating. Those are: 44da5577f0SRobert Mustacchi * 45da5577f0SRobert Mustacchi * o The size of the ring (controlled through the driver.conf file) 46da5577f0SRobert Mustacchi * 47da5577f0SRobert Mustacchi * o The maximum size frame we can receive. 48da5577f0SRobert Mustacchi * 49da5577f0SRobert Mustacchi * The size of the rings currently defaults to 1024 descriptors and is stored in 50da5577f0SRobert Mustacchi * the i40e_t`i40e_rx_ring_size and i40e_t`i40e_tx_ring_size. 51da5577f0SRobert Mustacchi * 52da5577f0SRobert Mustacchi * While the size of the rings is controlled by the driver.conf, the maximum 53da5577f0SRobert Mustacchi * size frame is informed primarily through the use of dladm and the setting of 54da5577f0SRobert Mustacchi * the MTU property on the device. From the MTU, we then go and do some 55da5577f0SRobert Mustacchi * machinations. The first thing we do is we then have to add in space for the 56da5577f0SRobert Mustacchi * Ethernet header, potentially a VLAN header, and the FCS check. This value is 57da5577f0SRobert Mustacchi * what's stored as i40e_t`i40e_frame_max and is derived any time 58da5577f0SRobert Mustacchi * i40e_t`i40e_sdu changes. 59da5577f0SRobert Mustacchi * 60da5577f0SRobert Mustacchi * This size is then rounded up to the nearest 1k chunk, which represents the 61da5577f0SRobert Mustacchi * actual amount of memory that we'll allocate for a single frame. 62da5577f0SRobert Mustacchi * 63*8d5069bcSRyan Zezeski * Note, that for RX, we do something that might be unexpected. We always add 64da5577f0SRobert Mustacchi * an extra two bytes to the frame size that we allocate. We then offset the DMA 65da5577f0SRobert Mustacchi * address that we receive a packet into by two bytes. This ensures that the IP 66da5577f0SRobert Mustacchi * header will always be 4 byte aligned because the MAC header is either 14 or 67da5577f0SRobert Mustacchi * 18 bytes in length, depending on the use of 802.1Q tagging, which makes IP's 68da5577f0SRobert Mustacchi * and MAC's lives easier. 69da5577f0SRobert Mustacchi * 70*8d5069bcSRyan Zezeski * Both the RX and TX descriptor rings (which are what we use to communicate 71da5577f0SRobert Mustacchi * with hardware) are allocated as a single region of DMA memory which is the 72da5577f0SRobert Mustacchi * size of the descriptor (4 bytes and 2 bytes respectively) times the total 73*8d5069bcSRyan Zezeski * number of descriptors for an RX and TX ring. 74da5577f0SRobert Mustacchi * 75*8d5069bcSRyan Zezeski * While the RX and TX descriptors are allocated using DMA-based memory, the 76da5577f0SRobert Mustacchi * control blocks for each of them are allocated using normal kernel memory. 77da5577f0SRobert Mustacchi * They aren't special from a DMA perspective. We'll go over the design of both 78da5577f0SRobert Mustacchi * receiving and transmitting separately, as they have slightly different 79da5577f0SRobert Mustacchi * control blocks and different ways that we manage the relationship between 80da5577f0SRobert Mustacchi * control blocks and descriptors. 81da5577f0SRobert Mustacchi * 82da5577f0SRobert Mustacchi * --------------------------------- 83da5577f0SRobert Mustacchi * RX Descriptors and Control Blocks 84da5577f0SRobert Mustacchi * --------------------------------- 85da5577f0SRobert Mustacchi * 86da5577f0SRobert Mustacchi * For every descriptor in the ring that the driver has, we need some associated 87da5577f0SRobert Mustacchi * memory, which means that we need to have the receive specific control block. 88da5577f0SRobert Mustacchi * We have a couple different, but related goals: 89da5577f0SRobert Mustacchi * 90da5577f0SRobert Mustacchi * o Once we've completed the mc_start GLDv3 endpoint (i40e_m_start), we do 91da5577f0SRobert Mustacchi * not want to do any additional memory allocations or DMA allocations if 92da5577f0SRobert Mustacchi * we don't have to. 93da5577f0SRobert Mustacchi * 94da5577f0SRobert Mustacchi * o We'd like to try and do as much zero-copy as possible, while taking into 95da5577f0SRobert Mustacchi * account the cost of mapping in DMA resources. 96da5577f0SRobert Mustacchi * 97da5577f0SRobert Mustacchi * o We'd like to have every receive descriptor available. 98da5577f0SRobert Mustacchi * 99da5577f0SRobert Mustacchi * Now, these rules are a bit in tension with one another. The act of mapping in 100da5577f0SRobert Mustacchi * is an exercise of trying to find the break-even point between page table 101da5577f0SRobert Mustacchi * updates and bcopy. We currently start by using the same metrics that ixgbe 102da5577f0SRobert Mustacchi * used; however, it should be known that this value has effectively been 103da5577f0SRobert Mustacchi * cargo-culted across to yet another driver, sorry. 104da5577f0SRobert Mustacchi * 105da5577f0SRobert Mustacchi * If we receive a packet which is larger than our copy threshold, we'll create 106da5577f0SRobert Mustacchi * a message block out of the DMA memory via desballoc(9F) and send that up to 107da5577f0SRobert Mustacchi * MAC that way. This will cause us to be notified when the message block is 108da5577f0SRobert Mustacchi * then freed because it has been consumed, dropped, or otherwise. Otherwise, if 109da5577f0SRobert Mustacchi * it's less than the threshold, we'll try to use allocb and bcopy it into the 110da5577f0SRobert Mustacchi * block, thus allowing us to immediately reuse the DMA resource. Note, on debug 111da5577f0SRobert Mustacchi * builds, we allow someone to whack the variable i40e_debug_rx_mode to override 112da5577f0SRobert Mustacchi * the behavior and always do a bcopy or a DMA bind. 113da5577f0SRobert Mustacchi * 114da5577f0SRobert Mustacchi * To try and ensure that the device always has blocks that it can receive data 115da5577f0SRobert Mustacchi * into, we maintain two lists of control blocks, a working list and a free 116*8d5069bcSRyan Zezeski * list. Each list is sized equal to the number of descriptors in the RX ring. 117*8d5069bcSRyan Zezeski * During the GLDv3 mc_start routine, we allocate a number of RX control blocks 118da5577f0SRobert Mustacchi * equal to twice the number of descriptors in the ring and we assign them 119da5577f0SRobert Mustacchi * equally to the free list and to the working list. Each control block also has 120da5577f0SRobert Mustacchi * DMA memory allocated and associated with which it will be used to receive the 121da5577f0SRobert Mustacchi * actual packet data. All of a received frame's data will end up in a single 122da5577f0SRobert Mustacchi * DMA buffer. 123da5577f0SRobert Mustacchi * 124*8d5069bcSRyan Zezeski * During operation, we always maintain the invariant that each RX descriptor 125*8d5069bcSRyan Zezeski * has an associated RX control block which lives in the working list. If we 126da5577f0SRobert Mustacchi * feel that we should loan up DMA memory to MAC in the form of a message block, 127da5577f0SRobert Mustacchi * we can only do so if we can maintain this invariant. To do that, we swap in 128da5577f0SRobert Mustacchi * one of the buffers from the free list. If none are available, then we resort 129da5577f0SRobert Mustacchi * to using allocb(9F) and bcopy(9F) on the packet instead, regardless of the 130da5577f0SRobert Mustacchi * size. 131da5577f0SRobert Mustacchi * 132da5577f0SRobert Mustacchi * Loaned message blocks come back to use when freemsg(9F) or freeb(9F) is 133*8d5069bcSRyan Zezeski * called on the block, at which point we restore the RX control block to the 134da5577f0SRobert Mustacchi * free list and are able to reuse the DMA memory again. While the scheme may 135da5577f0SRobert Mustacchi * seem odd, it importantly keeps us out of trying to do any DMA allocations in 136da5577f0SRobert Mustacchi * the normal path of operation, even though we may still have to allocate 137da5577f0SRobert Mustacchi * message blocks and copy. 138da5577f0SRobert Mustacchi * 139*8d5069bcSRyan Zezeski * The following state machine describes the life time of a RX control block. In 140*8d5069bcSRyan Zezeski * the diagram we abbrviate the RX ring descriptor entry as rxd and the rx 141da5577f0SRobert Mustacchi * control block entry as rcb. 142da5577f0SRobert Mustacchi * 143da5577f0SRobert Mustacchi * | | 144da5577f0SRobert Mustacchi * * ... 1/2 of all initial rcb's ... * 145da5577f0SRobert Mustacchi * | | 146da5577f0SRobert Mustacchi * v v 147da5577f0SRobert Mustacchi * +------------------+ +------------------+ 148da5577f0SRobert Mustacchi * | rcb on free list |---*---------->| rcb on work list | 149da5577f0SRobert Mustacchi * +------------------+ . +------------------+ 150da5577f0SRobert Mustacchi * ^ . moved to | 151da5577f0SRobert Mustacchi * | replace rcb * . . Frame received, 152da5577f0SRobert Mustacchi * | loaned to | entry on free list 153da5577f0SRobert Mustacchi * | MAC + co. | available. rcb's 154da5577f0SRobert Mustacchi * | | memory made into mblk_t 155da5577f0SRobert Mustacchi * * . freemsg(9F) | and sent up to MAC. 156da5577f0SRobert Mustacchi * | called on | 157da5577f0SRobert Mustacchi * | loaned rcb | 158da5577f0SRobert Mustacchi * | and it is v 159da5577f0SRobert Mustacchi * | recycled. +-------------------+ 160da5577f0SRobert Mustacchi * +--------------------<-----| rcb loaned to MAC | 161da5577f0SRobert Mustacchi * +-------------------+ 162da5577f0SRobert Mustacchi * 163*8d5069bcSRyan Zezeski * Finally, note that every RX control block has a reference count on it. One 164da5577f0SRobert Mustacchi * reference is added as long as the driver has had the GLDv3 mc_start endpoint 165da5577f0SRobert Mustacchi * called. If the GLDv3 mc_stop entry point is called, IP has been unplumbed and 166da5577f0SRobert Mustacchi * no other DLPI consumers remain, then we'll decrement the reference count by 167*8d5069bcSRyan Zezeski * one. Whenever we loan up the RX control block and associated buffer to MAC, 168da5577f0SRobert Mustacchi * then we bump the reference count again. Even though the device is stopped, 169da5577f0SRobert Mustacchi * there may still be loaned frames in upper levels that we'll want to account 170da5577f0SRobert Mustacchi * for. Our callback from freemsg(9F)/freeb(9F) will take care of making sure 171da5577f0SRobert Mustacchi * that it is cleaned up. 172da5577f0SRobert Mustacchi * 173da5577f0SRobert Mustacchi * -------------------- 174da5577f0SRobert Mustacchi * Managing the RX Ring 175da5577f0SRobert Mustacchi * -------------------- 176da5577f0SRobert Mustacchi * 177da5577f0SRobert Mustacchi * The receive ring descriptors are arranged in a circular buffer with a head 178da5577f0SRobert Mustacchi * and tail pointer. There are both the conventional head and tail pointers 179da5577f0SRobert Mustacchi * which are used to partition the ring into two portions, a portion that we, 180da5577f0SRobert Mustacchi * the operating system, manage and a portion that is managed by hardware. When 181da5577f0SRobert Mustacchi * hardware owns a descriptor in the ring, it means that it is waiting for data 182da5577f0SRobert Mustacchi * to be filled in. However, when a portion of the ring is owned by the driver, 183da5577f0SRobert Mustacchi * then that means that the descriptor has been consumed and we need to go take 184da5577f0SRobert Mustacchi * a look at it. 185da5577f0SRobert Mustacchi * 186da5577f0SRobert Mustacchi * The initial head is configured to be zero by writing it as such in the 187da5577f0SRobert Mustacchi * receive queue context in the FPM (function private memory from the host). The 188da5577f0SRobert Mustacchi * initial tail is written to be the last descriptor. This is written to via the 189da5577f0SRobert Mustacchi * PCIe register I40E_QRX_TAIL(). Technically, hardware owns everything between 190da5577f0SRobert Mustacchi * the HEAD and TAIL, inclusive. Note that while we initially program the HEAD, 191da5577f0SRobert Mustacchi * the only values we ever consult ourselves are the TAIL register and our own 192da5577f0SRobert Mustacchi * state tracking. Effectively, we cache the HEAD register and then update it 193da5577f0SRobert Mustacchi * ourselves based on our work. 194da5577f0SRobert Mustacchi * 195*8d5069bcSRyan Zezeski * When we iterate over the RX descriptors and thus the received frames, we are 196da5577f0SRobert Mustacchi * either in an interrupt context or we've been asked by MAC to poll on the 197da5577f0SRobert Mustacchi * ring. If we've been asked to poll on the ring, we have a maximum number of 198*8d5069bcSRyan Zezeski * bytes of mblk_t's to return. If processing an RX descriptor would cause us to 199da5577f0SRobert Mustacchi * exceed that count, then we do not process it. When in interrupt context, we 200da5577f0SRobert Mustacchi * don't have a strict byte count. However, to ensure liveness, we limit the 201da5577f0SRobert Mustacchi * amount of data based on a configuration value 202da5577f0SRobert Mustacchi * (i40e_t`i40e_rx_limit_per_intr). The number that we've started with for this 203da5577f0SRobert Mustacchi * is based on similar numbers that are used for ixgbe. After some additional 204da5577f0SRobert Mustacchi * time in the field, we'll have a sense as to whether or not it should be 205da5577f0SRobert Mustacchi * changed. 206da5577f0SRobert Mustacchi * 207da5577f0SRobert Mustacchi * When processing, we start at our own HEAD pointer 208da5577f0SRobert Mustacchi * (i40e_rx_data_t`rxd_desc_next), which indicates the descriptor to start 209da5577f0SRobert Mustacchi * processing. Every RX descriptor has what's described as the DD bit. This bit 210da5577f0SRobert Mustacchi * (the LSB of the second 8-byte word), indicates whether or not the descriptor 211da5577f0SRobert Mustacchi * is done. When we give descriptors to the hardware, this value is always 212da5577f0SRobert Mustacchi * zero. When the hardware has finished a descriptor, it will always be one. 213da5577f0SRobert Mustacchi * 214da5577f0SRobert Mustacchi * The first thing that we check is whether the DD bit indicates that the 215da5577f0SRobert Mustacchi * current HEAD is ready. If it isn't, then we're done. That's the primary 216da5577f0SRobert Mustacchi * invariant of processing a frame. If it's done, then there are a few other 217da5577f0SRobert Mustacchi * things that we want to look at. In the same status word as the DD bit, there 218da5577f0SRobert Mustacchi * are two other important bits: 219da5577f0SRobert Mustacchi * 220da5577f0SRobert Mustacchi * o End of Packet (EOP) 221da5577f0SRobert Mustacchi * o Error bits 222da5577f0SRobert Mustacchi * 223da5577f0SRobert Mustacchi * The end of packet indicates that we have reached the last descriptor. Now, 224da5577f0SRobert Mustacchi * you might ask when would there be more than one descriptor. The reason for 225da5577f0SRobert Mustacchi * that might be due to large receive offload (lro) or header splitting 226da5577f0SRobert Mustacchi * functionality, which presently isn't supported in the driver. The error bits 227da5577f0SRobert Mustacchi * in the frame are only valid when EOP is set. 228da5577f0SRobert Mustacchi * 229da5577f0SRobert Mustacchi * If error bits are set on the frame, then we still consume it; however, we 230da5577f0SRobert Mustacchi * will not generate an mblk_t to send up to MAC. If there are no error bits 231da5577f0SRobert Mustacchi * set, then we'll consume the descriptor either using bcopy or DMA binding. See 232da5577f0SRobert Mustacchi * the earlier section 'RX DESCRIPTORS AND CONTROL BLOCKS' for more information 233da5577f0SRobert Mustacchi * on how that selection is made. 234da5577f0SRobert Mustacchi * 235da5577f0SRobert Mustacchi * Regardless of whether we construct an mblk_t or encounter an error, we end up 236da5577f0SRobert Mustacchi * resetting the descriptor. This re-arms the descriptor for hardware and in the 237da5577f0SRobert Mustacchi * process, we may end up assigning it a new receive control bock. After we do 238da5577f0SRobert Mustacchi * this, we always update our HEAD pointer, no matter what. 239da5577f0SRobert Mustacchi * 240da5577f0SRobert Mustacchi * Finally, once we've consumed as much as we will in a given window, we go and 241da5577f0SRobert Mustacchi * update the TAIL register to indicate all the frames we've consumed. We only 242da5577f0SRobert Mustacchi * do a single bulk write for the ring. 243da5577f0SRobert Mustacchi * 244da5577f0SRobert Mustacchi * --------------------------------- 245da5577f0SRobert Mustacchi * TX Descriptors and Control Blocks 246da5577f0SRobert Mustacchi * --------------------------------- 247da5577f0SRobert Mustacchi * 248da5577f0SRobert Mustacchi * While the transmit path is similar in spirit to the receive path, it works 249da5577f0SRobert Mustacchi * differently due to the fact that all data is originated by the operating 250da5577f0SRobert Mustacchi * system and not by the device. 251da5577f0SRobert Mustacchi * 252*8d5069bcSRyan Zezeski * Like RX, there is both a descriptor ring that we use to communicate to the 253da5577f0SRobert Mustacchi * driver and which points to the memory used to transmit a frame. Similarly, 254*8d5069bcSRyan Zezeski * there is a corresponding transmit control block, however, the correspondence 255*8d5069bcSRyan Zezeski * between descriptors and control blocks is more complex and not necessarily 256*8d5069bcSRyan Zezeski * 1-to-1. 257da5577f0SRobert Mustacchi * 258da5577f0SRobert Mustacchi * The driver is asked to process a single frame at a time. That message block 259da5577f0SRobert Mustacchi * may be made up of multiple fragments linked together by the mblk_t`b_cont 260da5577f0SRobert Mustacchi * member. The device has a hard limit of up to 8 buffers being allowed for use 261*8d5069bcSRyan Zezeski * for a single non-LSO packet or LSO segment. The number of TX ring entires 262*8d5069bcSRyan Zezeski * (and thus TX control blocks) used depends on the fragment sizes and DMA 263*8d5069bcSRyan Zezeski * layout, as explained below. 264da5577f0SRobert Mustacchi * 265*8d5069bcSRyan Zezeski * We alter our DMA strategy based on a threshold tied to the fragment size. 266*8d5069bcSRyan Zezeski * This threshold is configurable via the tx_dma_threshold property. If the 267*8d5069bcSRyan Zezeski * fragment is above the threshold, we DMA bind it -- consuming one TCB and 268*8d5069bcSRyan Zezeski * potentially several data descriptors. The exact number of descriptors (equal 269*8d5069bcSRyan Zezeski * to the number of DMA cookies) depends on page size, MTU size, b_rptr offset 270*8d5069bcSRyan Zezeski * into page, b_wptr offset into page, and the physical layout of the dblk's 271*8d5069bcSRyan Zezeski * memory (contiguous or not). Essentially, we are at the mercy of the DMA 272*8d5069bcSRyan Zezeski * engine and the dblk's memory allocation. Knowing the exact number of 273*8d5069bcSRyan Zezeski * descriptors up front is a task best not taken on by the driver itself. 274*8d5069bcSRyan Zezeski * Instead, we attempt to DMA bind the fragment and verify the descriptor 275*8d5069bcSRyan Zezeski * layout meets hardware constraints. If the proposed DMA bind does not satisfy 276*8d5069bcSRyan Zezeski * the hardware constaints, then we discard it and instead copy the entire 277*8d5069bcSRyan Zezeski * fragment into the pre-allocated TCB buffer (or buffers if the fragment is 278*8d5069bcSRyan Zezeski * larger than the TCB buffer). 279da5577f0SRobert Mustacchi * 280*8d5069bcSRyan Zezeski * If the fragment is below or at the threshold, we copy it to the pre-allocated 281*8d5069bcSRyan Zezeski * buffer of a TCB. We compress consecutive copy fragments into a single TCB to 282*8d5069bcSRyan Zezeski * conserve resources. We are guaranteed that the TCB buffer is made up of only 283*8d5069bcSRyan Zezeski * 1 DMA cookie; and therefore consumes only one descriptor on the controller. 284*8d5069bcSRyan Zezeski * 285*8d5069bcSRyan Zezeski * Furthermore, if the frame requires HW offloads such as LSO, tunneling or 286*8d5069bcSRyan Zezeski * filtering, then the TX data descriptors must be preceeded by a single TX 287*8d5069bcSRyan Zezeski * context descriptor. Because there is no DMA transfer associated with the 288*8d5069bcSRyan Zezeski * context descriptor, we allocate a control block with a special type which 289*8d5069bcSRyan Zezeski * indicates to the TX ring recycle code that there are no associated DMA 290*8d5069bcSRyan Zezeski * resources to unbind when the control block is free'd. 291*8d5069bcSRyan Zezeski * 292*8d5069bcSRyan Zezeski * If we don't have enough space in the ring or TX control blocks available, 293da5577f0SRobert Mustacchi * then we'll return the unprocessed message block to MAC. This will induce flow 294da5577f0SRobert Mustacchi * control and once we recycle enough entries, we'll once again enable sending 295da5577f0SRobert Mustacchi * on the ring. 296da5577f0SRobert Mustacchi * 297da5577f0SRobert Mustacchi * We size the working list as equal to the number of descriptors in the ring. 298da5577f0SRobert Mustacchi * We size the free list as equal to 1.5 times the number of descriptors in the 299*8d5069bcSRyan Zezeski * ring. We'll allocate a number of TX control block entries equal to the number 300da5577f0SRobert Mustacchi * of entries in the free list. By default, all entries are placed in the free 301da5577f0SRobert Mustacchi * list. As we come along and try to send something, we'll allocate entries from 302da5577f0SRobert Mustacchi * the free list and add them to the working list, where they'll stay until the 303da5577f0SRobert Mustacchi * hardware indicates that all of the data has been written back to us. The 304da5577f0SRobert Mustacchi * reason that we start with 1.5x is to help facilitate having more than one TX 305da5577f0SRobert Mustacchi * buffer associated with the DMA activity. 306da5577f0SRobert Mustacchi * 307da5577f0SRobert Mustacchi * -------------------- 308da5577f0SRobert Mustacchi * Managing the TX Ring 309da5577f0SRobert Mustacchi * -------------------- 310da5577f0SRobert Mustacchi * 311da5577f0SRobert Mustacchi * The transmit descriptor ring is driven by us. We maintain our own notion of a 312da5577f0SRobert Mustacchi * HEAD and TAIL register and we update the hardware with updates to the TAIL 313da5577f0SRobert Mustacchi * register. When the hardware is done writing out data, it updates us by 314da5577f0SRobert Mustacchi * writing back to a specific address, not by updating the individual 315da5577f0SRobert Mustacchi * descriptors. That address is a 4-byte region after the main transmit 316da5577f0SRobert Mustacchi * descriptor ring. This is why the descriptor ring has an extra descriptor's 317da5577f0SRobert Mustacchi * worth allocated to it. 318da5577f0SRobert Mustacchi * 319da5577f0SRobert Mustacchi * We maintain our notion of the HEAD in the i40e_trqpair_t`itrq_desc_head and 320da5577f0SRobert Mustacchi * the TAIL in the i40e_trqpair_t`itrq_desc_tail. When we write out frames, 321da5577f0SRobert Mustacchi * we'll update the tail there and in the I40E_QTX_TAIL() register. At various 322da5577f0SRobert Mustacchi * points in time, through both interrupts, and our own internal checks, we'll 323da5577f0SRobert Mustacchi * sync the write-back head portion of the DMA space. Based on the index it 324da5577f0SRobert Mustacchi * reports back, we'll free everything between our current HEAD and the 325da5577f0SRobert Mustacchi * indicated index and update HEAD to the new index. 326da5577f0SRobert Mustacchi * 327da5577f0SRobert Mustacchi * When a frame comes in, we try to use a number of transmit control blocks and 328da5577f0SRobert Mustacchi * we'll transition them from the free list to the work list. They'll get moved 329da5577f0SRobert Mustacchi * to the entry on the work list that corresponds with the transmit descriptor 330da5577f0SRobert Mustacchi * they correspond to. Once we are indicated that the corresponding descriptor 331da5577f0SRobert Mustacchi * has been freed, we'll return it to the list. 332da5577f0SRobert Mustacchi * 333da5577f0SRobert Mustacchi * The transmit control block free list is managed by keeping track of the 334da5577f0SRobert Mustacchi * number of entries in it, i40e_trqpair_t`itrq_tcb_free. We use it as a way to 335da5577f0SRobert Mustacchi * index into the free list and add things to it. In effect, we always push and 336da5577f0SRobert Mustacchi * pop from the tail and protect it with a single lock, 337da5577f0SRobert Mustacchi * i40e_trqpair_t`itrq_tcb_lock. This scheme is somewhat simplistic and may not 338da5577f0SRobert Mustacchi * stand up to further performance testing; however, it does allow us to get off 339da5577f0SRobert Mustacchi * the ground with the device driver. 340da5577f0SRobert Mustacchi * 341da5577f0SRobert Mustacchi * The following image describes where a given transmit control block lives in 342da5577f0SRobert Mustacchi * its lifetime: 343da5577f0SRobert Mustacchi * 344da5577f0SRobert Mustacchi * | 345da5577f0SRobert Mustacchi * * ... Initial placement for all tcb's 346da5577f0SRobert Mustacchi * | 347da5577f0SRobert Mustacchi * v 348da5577f0SRobert Mustacchi * +------------------+ +------------------+ 349da5577f0SRobert Mustacchi * | tcb on free list |---*------------------>| tcb on work list | 350da5577f0SRobert Mustacchi * +------------------+ . +------------------+ 351*8d5069bcSRyan Zezeski * ^ . N tcbs allocated[1] | 352da5577f0SRobert Mustacchi * | to send frame v 353da5577f0SRobert Mustacchi * | or fragment on | 354da5577f0SRobert Mustacchi * | wire, mblk from | 355da5577f0SRobert Mustacchi * | MAC associated. | 356da5577f0SRobert Mustacchi * | | 357da5577f0SRobert Mustacchi * +------*-------------------------------<----+ 358da5577f0SRobert Mustacchi * . 359da5577f0SRobert Mustacchi * . Hardware indicates 360da5577f0SRobert Mustacchi * entry transmitted. 361*8d5069bcSRyan Zezeski * tcbs recycled, mblk 362da5577f0SRobert Mustacchi * from MAC freed. 363da5577f0SRobert Mustacchi * 364*8d5069bcSRyan Zezeski * [1] We allocate N tcbs to transmit a single frame where N can be 1 context 365*8d5069bcSRyan Zezeski * descriptor plus 1 data descriptor, in the non-DMA-bind case. In the DMA 366*8d5069bcSRyan Zezeski * bind case, N can be 1 context descriptor plus 1 data descriptor per 367*8d5069bcSRyan Zezeski * b_cont in the mblk. In this case, the mblk is associated with the first 368*8d5069bcSRyan Zezeski * data descriptor and freed as part of freeing that data descriptor. 369*8d5069bcSRyan Zezeski * 370da5577f0SRobert Mustacchi * ------------ 371da5577f0SRobert Mustacchi * Blocking MAC 372da5577f0SRobert Mustacchi * ------------ 373da5577f0SRobert Mustacchi * 374*8d5069bcSRyan Zezeski * When performing transmit, we can run out of descriptors and ring entries. 375*8d5069bcSRyan Zezeski * When such a case happens, we return the mblk_t to MAC to indicate that we've 376*8d5069bcSRyan Zezeski * been blocked. At that point in time, MAC becomes blocked and will not 377*8d5069bcSRyan Zezeski * transmit anything out that specific ring until we notify MAC. To indicate 378*8d5069bcSRyan Zezeski * that we're in such a situation we set i40e_trqpair_t`itrq_tx_blocked member 379*8d5069bcSRyan Zezeski * to B_TRUE. 380da5577f0SRobert Mustacchi * 381*8d5069bcSRyan Zezeski * When we recycle TX descriptors then we'll end up signaling MAC by calling 382da5577f0SRobert Mustacchi * mac_tx_ring_update() if we were blocked, letting it know that it's safe to 383da5577f0SRobert Mustacchi * start sending frames out to us again. 384da5577f0SRobert Mustacchi */ 385da5577f0SRobert Mustacchi 386da5577f0SRobert Mustacchi /* 387da5577f0SRobert Mustacchi * We set our DMA alignment requests based on the smallest supported page size 388da5577f0SRobert Mustacchi * of the corresponding platform. 389da5577f0SRobert Mustacchi */ 390da5577f0SRobert Mustacchi #if defined(__sparc) 391da5577f0SRobert Mustacchi #define I40E_DMA_ALIGNMENT 0x2000ull 392da5577f0SRobert Mustacchi #elif defined(__x86) 393da5577f0SRobert Mustacchi #define I40E_DMA_ALIGNMENT 0x1000ull 394da5577f0SRobert Mustacchi #else 395da5577f0SRobert Mustacchi #error "unknown architecture for i40e" 396da5577f0SRobert Mustacchi #endif 397da5577f0SRobert Mustacchi 398da5577f0SRobert Mustacchi /* 399da5577f0SRobert Mustacchi * This structure is used to maintain information and flags related to 400*8d5069bcSRyan Zezeski * transmitting a frame. These fields are ultimately used to construct the 401*8d5069bcSRyan Zezeski * TX data descriptor(s) and, if necessary, the TX context descriptor. 402da5577f0SRobert Mustacchi */ 403da5577f0SRobert Mustacchi typedef struct i40e_tx_context { 404*8d5069bcSRyan Zezeski enum i40e_tx_desc_cmd_bits itc_data_cmdflags; 405*8d5069bcSRyan Zezeski uint32_t itc_data_offsets; 406*8d5069bcSRyan Zezeski enum i40e_tx_ctx_desc_cmd_bits itc_ctx_cmdflags; 407*8d5069bcSRyan Zezeski uint32_t itc_ctx_tsolen; 408*8d5069bcSRyan Zezeski uint32_t itc_ctx_mss; 409da5577f0SRobert Mustacchi } i40e_tx_context_t; 410da5577f0SRobert Mustacchi 411da5577f0SRobert Mustacchi /* 412da5577f0SRobert Mustacchi * Toggles on debug builds which can be used to override our RX behaviour based 413da5577f0SRobert Mustacchi * on thresholds. 414da5577f0SRobert Mustacchi */ 415da5577f0SRobert Mustacchi #ifdef DEBUG 416da5577f0SRobert Mustacchi typedef enum { 417da5577f0SRobert Mustacchi I40E_DEBUG_RX_DEFAULT = 0, 418da5577f0SRobert Mustacchi I40E_DEBUG_RX_BCOPY = 1, 419da5577f0SRobert Mustacchi I40E_DEBUG_RX_DMABIND = 2 420da5577f0SRobert Mustacchi } i40e_debug_rx_t; 421da5577f0SRobert Mustacchi 422da5577f0SRobert Mustacchi i40e_debug_rx_t i40e_debug_rx_mode = I40E_DEBUG_RX_DEFAULT; 423da5577f0SRobert Mustacchi #endif /* DEBUG */ 424da5577f0SRobert Mustacchi 425da5577f0SRobert Mustacchi /* 426da5577f0SRobert Mustacchi * Notes on the following pair of DMA attributes. The first attribute, 427da5577f0SRobert Mustacchi * i40e_static_dma_attr, is designed to be used for both the descriptor rings 428da5577f0SRobert Mustacchi * and the static buffers that we associate with control blocks. For this 429da5577f0SRobert Mustacchi * reason, we force an SGL length of one. While technically the driver supports 430*8d5069bcSRyan Zezeski * a larger SGL (5 on RX and 8 on TX), we opt to only use one to simplify our 431da5577f0SRobert Mustacchi * management here. In addition, when the Intel common code wants to allocate 432da5577f0SRobert Mustacchi * memory via the i40e_allocate_virt_mem osdep function, we have it leverage 433da5577f0SRobert Mustacchi * the static dma attr. 434da5577f0SRobert Mustacchi * 435*8d5069bcSRyan Zezeski * The latter two sets of attributes, are what we use when we're binding a 436*8d5069bcSRyan Zezeski * bunch of mblk_t fragments to go out the door. Note that the main difference 437*8d5069bcSRyan Zezeski * here is that we're allowed a larger SGL length. For non-LSO TX, we 438*8d5069bcSRyan Zezeski * restrict the SGL length to match the number of TX buffers available to the 439*8d5069bcSRyan Zezeski * PF (8). For the LSO case we can go much larger, with the caveat that each 440*8d5069bcSRyan Zezeski * MSS-sized chunk (segment) must not span more than 8 data descriptors and 441*8d5069bcSRyan Zezeski * hence must not span more than 8 cookies. 442da5577f0SRobert Mustacchi * 443da5577f0SRobert Mustacchi * Note, we default to setting ourselves to be DMA capable here. However, 444da5577f0SRobert Mustacchi * because we could have multiple instances which have different FMA error 445da5577f0SRobert Mustacchi * checking capabilities, or end up on different buses, we make these static 446da5577f0SRobert Mustacchi * and const and copy them into the i40e_t for the given device with the actual 447da5577f0SRobert Mustacchi * values that reflect the actual capabilities. 448da5577f0SRobert Mustacchi */ 449da5577f0SRobert Mustacchi static const ddi_dma_attr_t i40e_g_static_dma_attr = { 450da5577f0SRobert Mustacchi DMA_ATTR_V0, /* version number */ 451da5577f0SRobert Mustacchi 0x0000000000000000ull, /* low address */ 452da5577f0SRobert Mustacchi 0xFFFFFFFFFFFFFFFFull, /* high address */ 453da5577f0SRobert Mustacchi 0x00000000FFFFFFFFull, /* dma counter max */ 454da5577f0SRobert Mustacchi I40E_DMA_ALIGNMENT, /* alignment */ 455da5577f0SRobert Mustacchi 0x00000FFF, /* burst sizes */ 456da5577f0SRobert Mustacchi 0x00000001, /* minimum transfer size */ 457da5577f0SRobert Mustacchi 0x00000000FFFFFFFFull, /* maximum transfer size */ 458da5577f0SRobert Mustacchi 0xFFFFFFFFFFFFFFFFull, /* maximum segment size */ 459da5577f0SRobert Mustacchi 1, /* scatter/gather list length */ 460da5577f0SRobert Mustacchi 0x00000001, /* granularity */ 461da5577f0SRobert Mustacchi DDI_DMA_FLAGERR /* DMA flags */ 462da5577f0SRobert Mustacchi }; 463da5577f0SRobert Mustacchi 464da5577f0SRobert Mustacchi static const ddi_dma_attr_t i40e_g_txbind_dma_attr = { 465da5577f0SRobert Mustacchi DMA_ATTR_V0, /* version number */ 466da5577f0SRobert Mustacchi 0x0000000000000000ull, /* low address */ 467da5577f0SRobert Mustacchi 0xFFFFFFFFFFFFFFFFull, /* high address */ 468*8d5069bcSRyan Zezeski I40E_MAX_TX_BUFSZ - 1, /* dma counter max */ 469da5577f0SRobert Mustacchi I40E_DMA_ALIGNMENT, /* alignment */ 470da5577f0SRobert Mustacchi 0x00000FFF, /* burst sizes */ 471da5577f0SRobert Mustacchi 0x00000001, /* minimum transfer size */ 472da5577f0SRobert Mustacchi 0x00000000FFFFFFFFull, /* maximum transfer size */ 473da5577f0SRobert Mustacchi 0xFFFFFFFFFFFFFFFFull, /* maximum segment size */ 474da5577f0SRobert Mustacchi I40E_TX_MAX_COOKIE, /* scatter/gather list length */ 475da5577f0SRobert Mustacchi 0x00000001, /* granularity */ 476da5577f0SRobert Mustacchi DDI_DMA_FLAGERR /* DMA flags */ 477da5577f0SRobert Mustacchi }; 478da5577f0SRobert Mustacchi 479*8d5069bcSRyan Zezeski static const ddi_dma_attr_t i40e_g_txbind_lso_dma_attr = { 480*8d5069bcSRyan Zezeski DMA_ATTR_V0, /* version number */ 481*8d5069bcSRyan Zezeski 0x0000000000000000ull, /* low address */ 482*8d5069bcSRyan Zezeski 0xFFFFFFFFFFFFFFFFull, /* high address */ 483*8d5069bcSRyan Zezeski I40E_MAX_TX_BUFSZ - 1, /* dma counter max */ 484*8d5069bcSRyan Zezeski I40E_DMA_ALIGNMENT, /* alignment */ 485*8d5069bcSRyan Zezeski 0x00000FFF, /* burst sizes */ 486*8d5069bcSRyan Zezeski 0x00000001, /* minimum transfer size */ 487*8d5069bcSRyan Zezeski 0x00000000FFFFFFFFull, /* maximum transfer size */ 488*8d5069bcSRyan Zezeski 0xFFFFFFFFFFFFFFFFull, /* maximum segment size */ 489*8d5069bcSRyan Zezeski I40E_TX_LSO_MAX_COOKIE, /* scatter/gather list length */ 490*8d5069bcSRyan Zezeski 0x00000001, /* granularity */ 491*8d5069bcSRyan Zezeski DDI_DMA_FLAGERR /* DMA flags */ 492*8d5069bcSRyan Zezeski }; 493*8d5069bcSRyan Zezeski 494da5577f0SRobert Mustacchi /* 495da5577f0SRobert Mustacchi * Next, we have the attributes for these structures. The descriptor rings are 496da5577f0SRobert Mustacchi * all strictly little endian, while the data buffers are just arrays of bytes 497da5577f0SRobert Mustacchi * representing frames. Because of this, we purposefully simplify the driver 498da5577f0SRobert Mustacchi * programming life by programming the descriptor ring as little endian, while 499da5577f0SRobert Mustacchi * for the buffer data we keep it as unstructured. 500da5577f0SRobert Mustacchi * 501da5577f0SRobert Mustacchi * Note, that to keep the Intel common code operating in a reasonable way, when 502da5577f0SRobert Mustacchi * we allocate DMA memory for it, we do not use byte swapping and thus use the 503da5577f0SRobert Mustacchi * standard i40e_buf_acc_attr. 504da5577f0SRobert Mustacchi */ 505da5577f0SRobert Mustacchi static const ddi_device_acc_attr_t i40e_g_desc_acc_attr = { 506da5577f0SRobert Mustacchi DDI_DEVICE_ATTR_V0, 507da5577f0SRobert Mustacchi DDI_STRUCTURE_LE_ACC, 508da5577f0SRobert Mustacchi DDI_STRICTORDER_ACC 509da5577f0SRobert Mustacchi }; 510da5577f0SRobert Mustacchi 511da5577f0SRobert Mustacchi static const ddi_device_acc_attr_t i40e_g_buf_acc_attr = { 512da5577f0SRobert Mustacchi DDI_DEVICE_ATTR_V0, 513da5577f0SRobert Mustacchi DDI_NEVERSWAP_ACC, 514da5577f0SRobert Mustacchi DDI_STRICTORDER_ACC 515da5577f0SRobert Mustacchi }; 516da5577f0SRobert Mustacchi 517da5577f0SRobert Mustacchi /* 518da5577f0SRobert Mustacchi * The next two functions are designed to be type-safe versions of macros that 519da5577f0SRobert Mustacchi * are used to increment and decrement a descriptor index in the loop. Note, 520da5577f0SRobert Mustacchi * these are marked inline to try and keep the data path hot and they were 521da5577f0SRobert Mustacchi * effectively inlined in their previous life as macros. 522da5577f0SRobert Mustacchi */ 523da5577f0SRobert Mustacchi static inline int 524da5577f0SRobert Mustacchi i40e_next_desc(int base, int count, int size) 525da5577f0SRobert Mustacchi { 526da5577f0SRobert Mustacchi int out; 527da5577f0SRobert Mustacchi 528da5577f0SRobert Mustacchi ASSERT(base >= 0); 529da5577f0SRobert Mustacchi ASSERT(count > 0); 530da5577f0SRobert Mustacchi ASSERT(size > 0); 531da5577f0SRobert Mustacchi 532da5577f0SRobert Mustacchi if (base + count < size) { 533da5577f0SRobert Mustacchi out = base + count; 534da5577f0SRobert Mustacchi } else { 535da5577f0SRobert Mustacchi out = base + count - size; 536da5577f0SRobert Mustacchi } 537da5577f0SRobert Mustacchi 538da5577f0SRobert Mustacchi ASSERT(out >= 0 && out < size); 539da5577f0SRobert Mustacchi return (out); 540da5577f0SRobert Mustacchi } 541da5577f0SRobert Mustacchi 542da5577f0SRobert Mustacchi static inline int 543da5577f0SRobert Mustacchi i40e_prev_desc(int base, int count, int size) 544da5577f0SRobert Mustacchi { 545da5577f0SRobert Mustacchi int out; 546da5577f0SRobert Mustacchi 547da5577f0SRobert Mustacchi ASSERT(base >= 0); 548da5577f0SRobert Mustacchi ASSERT(count > 0); 549da5577f0SRobert Mustacchi ASSERT(size > 0); 550da5577f0SRobert Mustacchi 551da5577f0SRobert Mustacchi if (base >= count) { 552da5577f0SRobert Mustacchi out = base - count; 553da5577f0SRobert Mustacchi } else { 554da5577f0SRobert Mustacchi out = base - count + size; 555da5577f0SRobert Mustacchi } 556da5577f0SRobert Mustacchi 557da5577f0SRobert Mustacchi ASSERT(out >= 0 && out < size); 558da5577f0SRobert Mustacchi return (out); 559da5577f0SRobert Mustacchi } 560da5577f0SRobert Mustacchi 561da5577f0SRobert Mustacchi /* 562da5577f0SRobert Mustacchi * Free DMA memory that is represented by a i40e_dma_buffer_t. 563da5577f0SRobert Mustacchi */ 564da5577f0SRobert Mustacchi static void 565da5577f0SRobert Mustacchi i40e_free_dma_buffer(i40e_dma_buffer_t *dmap) 566da5577f0SRobert Mustacchi { 5676845d4e7SToomas Soome if (dmap->dmab_dma_address != 0) { 568da5577f0SRobert Mustacchi VERIFY(dmap->dmab_dma_handle != NULL); 569da5577f0SRobert Mustacchi (void) ddi_dma_unbind_handle(dmap->dmab_dma_handle); 5706845d4e7SToomas Soome dmap->dmab_dma_address = 0; 571da5577f0SRobert Mustacchi dmap->dmab_size = 0; 572da5577f0SRobert Mustacchi } 573da5577f0SRobert Mustacchi 574da5577f0SRobert Mustacchi if (dmap->dmab_acc_handle != NULL) { 575da5577f0SRobert Mustacchi ddi_dma_mem_free(&dmap->dmab_acc_handle); 576da5577f0SRobert Mustacchi dmap->dmab_acc_handle = NULL; 577da5577f0SRobert Mustacchi dmap->dmab_address = NULL; 578da5577f0SRobert Mustacchi } 579da5577f0SRobert Mustacchi 580da5577f0SRobert Mustacchi if (dmap->dmab_dma_handle != NULL) { 581da5577f0SRobert Mustacchi ddi_dma_free_handle(&dmap->dmab_dma_handle); 582da5577f0SRobert Mustacchi dmap->dmab_dma_handle = NULL; 583da5577f0SRobert Mustacchi } 584da5577f0SRobert Mustacchi 585da5577f0SRobert Mustacchi /* 586da5577f0SRobert Mustacchi * These should only be set if we have valid handles allocated and 587da5577f0SRobert Mustacchi * therefore should always be NULLed out due to the above code. This 588da5577f0SRobert Mustacchi * is here to catch us acting sloppy. 589da5577f0SRobert Mustacchi */ 5906845d4e7SToomas Soome ASSERT(dmap->dmab_dma_address == 0); 591da5577f0SRobert Mustacchi ASSERT(dmap->dmab_address == NULL); 592da5577f0SRobert Mustacchi ASSERT(dmap->dmab_size == 0); 593da5577f0SRobert Mustacchi dmap->dmab_len = 0; 594da5577f0SRobert Mustacchi } 595da5577f0SRobert Mustacchi 596da5577f0SRobert Mustacchi /* 597da5577f0SRobert Mustacchi * Allocate size bytes of DMA memory based on the passed in attributes. This 598da5577f0SRobert Mustacchi * fills in the information in dmap and is designed for all of our single cookie 599da5577f0SRobert Mustacchi * allocations. 600da5577f0SRobert Mustacchi */ 601da5577f0SRobert Mustacchi static boolean_t 602da5577f0SRobert Mustacchi i40e_alloc_dma_buffer(i40e_t *i40e, i40e_dma_buffer_t *dmap, 603da5577f0SRobert Mustacchi ddi_dma_attr_t *attrsp, ddi_device_acc_attr_t *accp, boolean_t stream, 604da5577f0SRobert Mustacchi boolean_t zero, size_t size) 605da5577f0SRobert Mustacchi { 606da5577f0SRobert Mustacchi int ret; 607da5577f0SRobert Mustacchi uint_t flags; 608da5577f0SRobert Mustacchi size_t len; 609da5577f0SRobert Mustacchi ddi_dma_cookie_t cookie; 610da5577f0SRobert Mustacchi uint_t ncookies; 611da5577f0SRobert Mustacchi 612da5577f0SRobert Mustacchi if (stream == B_TRUE) 613da5577f0SRobert Mustacchi flags = DDI_DMA_STREAMING; 614da5577f0SRobert Mustacchi else 615da5577f0SRobert Mustacchi flags = DDI_DMA_CONSISTENT; 616da5577f0SRobert Mustacchi 617da5577f0SRobert Mustacchi /* 618da5577f0SRobert Mustacchi * Step one: Allocate the DMA handle 619da5577f0SRobert Mustacchi */ 620da5577f0SRobert Mustacchi ret = ddi_dma_alloc_handle(i40e->i40e_dip, attrsp, DDI_DMA_DONTWAIT, 621da5577f0SRobert Mustacchi NULL, &dmap->dmab_dma_handle); 622da5577f0SRobert Mustacchi if (ret != DDI_SUCCESS) { 623da5577f0SRobert Mustacchi i40e_error(i40e, "failed to allocate dma handle for I/O " 624da5577f0SRobert Mustacchi "buffers: %d", ret); 625da5577f0SRobert Mustacchi dmap->dmab_dma_handle = NULL; 626da5577f0SRobert Mustacchi return (B_FALSE); 627da5577f0SRobert Mustacchi } 628da5577f0SRobert Mustacchi 629da5577f0SRobert Mustacchi /* 630da5577f0SRobert Mustacchi * Step two: Allocate the DMA memory 631da5577f0SRobert Mustacchi */ 632da5577f0SRobert Mustacchi ret = ddi_dma_mem_alloc(dmap->dmab_dma_handle, size, accp, flags, 633da5577f0SRobert Mustacchi DDI_DMA_DONTWAIT, NULL, &dmap->dmab_address, &len, 634da5577f0SRobert Mustacchi &dmap->dmab_acc_handle); 635da5577f0SRobert Mustacchi if (ret != DDI_SUCCESS) { 636da5577f0SRobert Mustacchi i40e_error(i40e, "failed to allocate %ld bytes of DMA for I/O " 637da5577f0SRobert Mustacchi "buffers", size); 638da5577f0SRobert Mustacchi dmap->dmab_address = NULL; 639da5577f0SRobert Mustacchi dmap->dmab_acc_handle = NULL; 640da5577f0SRobert Mustacchi i40e_free_dma_buffer(dmap); 641da5577f0SRobert Mustacchi return (B_FALSE); 642da5577f0SRobert Mustacchi } 643da5577f0SRobert Mustacchi 644da5577f0SRobert Mustacchi /* 645da5577f0SRobert Mustacchi * Step three: Optionally zero 646da5577f0SRobert Mustacchi */ 647da5577f0SRobert Mustacchi if (zero == B_TRUE) 648da5577f0SRobert Mustacchi bzero(dmap->dmab_address, len); 649da5577f0SRobert Mustacchi 650da5577f0SRobert Mustacchi /* 651da5577f0SRobert Mustacchi * Step four: Bind the memory 652da5577f0SRobert Mustacchi */ 653da5577f0SRobert Mustacchi ret = ddi_dma_addr_bind_handle(dmap->dmab_dma_handle, NULL, 654da5577f0SRobert Mustacchi dmap->dmab_address, len, DDI_DMA_RDWR | flags, DDI_DMA_DONTWAIT, 655da5577f0SRobert Mustacchi NULL, &cookie, &ncookies); 656da5577f0SRobert Mustacchi if (ret != DDI_DMA_MAPPED) { 657da5577f0SRobert Mustacchi i40e_error(i40e, "failed to allocate %ld bytes of DMA for I/O " 658da5577f0SRobert Mustacchi "buffers: %d", size, ret); 659da5577f0SRobert Mustacchi i40e_free_dma_buffer(dmap); 660da5577f0SRobert Mustacchi return (B_FALSE); 661da5577f0SRobert Mustacchi } 662da5577f0SRobert Mustacchi 663da5577f0SRobert Mustacchi VERIFY(ncookies == 1); 664da5577f0SRobert Mustacchi dmap->dmab_dma_address = cookie.dmac_laddress; 665da5577f0SRobert Mustacchi dmap->dmab_size = len; 666da5577f0SRobert Mustacchi dmap->dmab_len = 0; 667da5577f0SRobert Mustacchi return (B_TRUE); 668da5577f0SRobert Mustacchi } 669da5577f0SRobert Mustacchi 670da5577f0SRobert Mustacchi /* 671da5577f0SRobert Mustacchi * This function is called once the last pending rcb has been freed by the upper 672da5577f0SRobert Mustacchi * levels of the system. 673da5577f0SRobert Mustacchi */ 674da5577f0SRobert Mustacchi static void 675da5577f0SRobert Mustacchi i40e_free_rx_data(i40e_rx_data_t *rxd) 676da5577f0SRobert Mustacchi { 677da5577f0SRobert Mustacchi VERIFY(rxd->rxd_rcb_pending == 0); 678da5577f0SRobert Mustacchi 679da5577f0SRobert Mustacchi if (rxd->rxd_rcb_area != NULL) { 680da5577f0SRobert Mustacchi kmem_free(rxd->rxd_rcb_area, 681da5577f0SRobert Mustacchi sizeof (i40e_rx_control_block_t) * 682da5577f0SRobert Mustacchi (rxd->rxd_free_list_size + rxd->rxd_ring_size)); 683da5577f0SRobert Mustacchi rxd->rxd_rcb_area = NULL; 684da5577f0SRobert Mustacchi } 685da5577f0SRobert Mustacchi 686da5577f0SRobert Mustacchi if (rxd->rxd_free_list != NULL) { 687da5577f0SRobert Mustacchi kmem_free(rxd->rxd_free_list, 688da5577f0SRobert Mustacchi sizeof (i40e_rx_control_block_t *) * 689da5577f0SRobert Mustacchi rxd->rxd_free_list_size); 690da5577f0SRobert Mustacchi rxd->rxd_free_list = NULL; 691da5577f0SRobert Mustacchi } 692da5577f0SRobert Mustacchi 693da5577f0SRobert Mustacchi if (rxd->rxd_work_list != NULL) { 694da5577f0SRobert Mustacchi kmem_free(rxd->rxd_work_list, 695da5577f0SRobert Mustacchi sizeof (i40e_rx_control_block_t *) * 696da5577f0SRobert Mustacchi rxd->rxd_ring_size); 697da5577f0SRobert Mustacchi rxd->rxd_work_list = NULL; 698da5577f0SRobert Mustacchi } 699da5577f0SRobert Mustacchi 700da5577f0SRobert Mustacchi kmem_free(rxd, sizeof (i40e_rx_data_t)); 701da5577f0SRobert Mustacchi } 702da5577f0SRobert Mustacchi 703da5577f0SRobert Mustacchi static boolean_t 704da5577f0SRobert Mustacchi i40e_alloc_rx_data(i40e_t *i40e, i40e_trqpair_t *itrq) 705da5577f0SRobert Mustacchi { 706da5577f0SRobert Mustacchi i40e_rx_data_t *rxd; 707da5577f0SRobert Mustacchi 708da5577f0SRobert Mustacchi rxd = kmem_zalloc(sizeof (i40e_rx_data_t), KM_NOSLEEP); 709da5577f0SRobert Mustacchi if (rxd == NULL) 710da5577f0SRobert Mustacchi return (B_FALSE); 711da5577f0SRobert Mustacchi itrq->itrq_rxdata = rxd; 712da5577f0SRobert Mustacchi rxd->rxd_i40e = i40e; 713da5577f0SRobert Mustacchi 714da5577f0SRobert Mustacchi rxd->rxd_ring_size = i40e->i40e_rx_ring_size; 715da5577f0SRobert Mustacchi rxd->rxd_free_list_size = i40e->i40e_rx_ring_size; 716da5577f0SRobert Mustacchi 717da5577f0SRobert Mustacchi rxd->rxd_rcb_free = rxd->rxd_free_list_size; 718da5577f0SRobert Mustacchi 719da5577f0SRobert Mustacchi rxd->rxd_work_list = kmem_zalloc(sizeof (i40e_rx_control_block_t *) * 720da5577f0SRobert Mustacchi rxd->rxd_ring_size, KM_NOSLEEP); 721da5577f0SRobert Mustacchi if (rxd->rxd_work_list == NULL) { 722*8d5069bcSRyan Zezeski i40e_error(i40e, "failed to allocate RX work list for a ring " 723da5577f0SRobert Mustacchi "of %d entries for ring %d", rxd->rxd_ring_size, 724da5577f0SRobert Mustacchi itrq->itrq_index); 725da5577f0SRobert Mustacchi goto cleanup; 726da5577f0SRobert Mustacchi } 727da5577f0SRobert Mustacchi 728da5577f0SRobert Mustacchi rxd->rxd_free_list = kmem_zalloc(sizeof (i40e_rx_control_block_t *) * 729da5577f0SRobert Mustacchi rxd->rxd_free_list_size, KM_NOSLEEP); 730da5577f0SRobert Mustacchi if (rxd->rxd_free_list == NULL) { 731*8d5069bcSRyan Zezeski i40e_error(i40e, "failed to allocate a %d entry RX free list " 732da5577f0SRobert Mustacchi "for ring %d", rxd->rxd_free_list_size, itrq->itrq_index); 733da5577f0SRobert Mustacchi goto cleanup; 734da5577f0SRobert Mustacchi } 735da5577f0SRobert Mustacchi 736da5577f0SRobert Mustacchi rxd->rxd_rcb_area = kmem_zalloc(sizeof (i40e_rx_control_block_t) * 737da5577f0SRobert Mustacchi (rxd->rxd_free_list_size + rxd->rxd_ring_size), KM_NOSLEEP); 738da5577f0SRobert Mustacchi if (rxd->rxd_rcb_area == NULL) { 739da5577f0SRobert Mustacchi i40e_error(i40e, "failed to allocate a %d entry rcb area for " 740da5577f0SRobert Mustacchi "ring %d", rxd->rxd_ring_size + rxd->rxd_free_list_size, 741da5577f0SRobert Mustacchi itrq->itrq_index); 742da5577f0SRobert Mustacchi goto cleanup; 743da5577f0SRobert Mustacchi } 744da5577f0SRobert Mustacchi 745da5577f0SRobert Mustacchi return (B_TRUE); 746da5577f0SRobert Mustacchi 747da5577f0SRobert Mustacchi cleanup: 748da5577f0SRobert Mustacchi i40e_free_rx_data(rxd); 749da5577f0SRobert Mustacchi itrq->itrq_rxdata = NULL; 750da5577f0SRobert Mustacchi return (B_FALSE); 751da5577f0SRobert Mustacchi } 752da5577f0SRobert Mustacchi 753da5577f0SRobert Mustacchi /* 754da5577f0SRobert Mustacchi * Free all of the memory that we've allocated for DMA. Note that we may have 755da5577f0SRobert Mustacchi * buffers that we've loaned up to the OS which are still outstanding. We'll 756da5577f0SRobert Mustacchi * always free up the descriptor ring, because we no longer need that. For each 757da5577f0SRobert Mustacchi * rcb, we'll iterate over it and if we send the reference count to zero, then 758da5577f0SRobert Mustacchi * we'll free the message block and DMA related resources. However, if we don't 759da5577f0SRobert Mustacchi * take the last one, then we'll go ahead and keep track that we'll have pending 760da5577f0SRobert Mustacchi * data and clean it up when we get there. 761da5577f0SRobert Mustacchi */ 762da5577f0SRobert Mustacchi static void 763da5577f0SRobert Mustacchi i40e_free_rx_dma(i40e_rx_data_t *rxd, boolean_t failed_init) 764da5577f0SRobert Mustacchi { 765da5577f0SRobert Mustacchi uint32_t i, count, ref; 766da5577f0SRobert Mustacchi 767da5577f0SRobert Mustacchi i40e_rx_control_block_t *rcb; 768da5577f0SRobert Mustacchi i40e_t *i40e = rxd->rxd_i40e; 769da5577f0SRobert Mustacchi 770da5577f0SRobert Mustacchi i40e_free_dma_buffer(&rxd->rxd_desc_area); 771da5577f0SRobert Mustacchi rxd->rxd_desc_ring = NULL; 772da5577f0SRobert Mustacchi rxd->rxd_desc_next = 0; 773da5577f0SRobert Mustacchi 774da5577f0SRobert Mustacchi mutex_enter(&i40e->i40e_rx_pending_lock); 775da5577f0SRobert Mustacchi 776da5577f0SRobert Mustacchi rcb = rxd->rxd_rcb_area; 777da5577f0SRobert Mustacchi count = rxd->rxd_ring_size + rxd->rxd_free_list_size; 778da5577f0SRobert Mustacchi 779da5577f0SRobert Mustacchi for (i = 0; i < count; i++, rcb++) { 780da5577f0SRobert Mustacchi VERIFY(rcb != NULL); 781da5577f0SRobert Mustacchi 782da5577f0SRobert Mustacchi /* 783da5577f0SRobert Mustacchi * If we're cleaning up from a failed creation attempt, then an 784da5577f0SRobert Mustacchi * entry may never have been assembled which would mean that 785da5577f0SRobert Mustacchi * it's reference count is zero. If we find that, we leave it 786da5577f0SRobert Mustacchi * be, because nothing else should be modifying it at this 787da5577f0SRobert Mustacchi * point. We're not at the point that any more references can be 788da5577f0SRobert Mustacchi * added, just removed. 789da5577f0SRobert Mustacchi */ 790da5577f0SRobert Mustacchi if (failed_init == B_TRUE && rcb->rcb_ref == 0) 791da5577f0SRobert Mustacchi continue; 792da5577f0SRobert Mustacchi 793da5577f0SRobert Mustacchi ref = atomic_dec_32_nv(&rcb->rcb_ref); 794da5577f0SRobert Mustacchi if (ref == 0) { 795da5577f0SRobert Mustacchi freemsg(rcb->rcb_mp); 796da5577f0SRobert Mustacchi rcb->rcb_mp = NULL; 797da5577f0SRobert Mustacchi i40e_free_dma_buffer(&rcb->rcb_dma); 798da5577f0SRobert Mustacchi } else { 799da5577f0SRobert Mustacchi atomic_inc_32(&rxd->rxd_rcb_pending); 800da5577f0SRobert Mustacchi atomic_inc_32(&i40e->i40e_rx_pending); 801da5577f0SRobert Mustacchi } 802da5577f0SRobert Mustacchi } 803da5577f0SRobert Mustacchi mutex_exit(&i40e->i40e_rx_pending_lock); 804da5577f0SRobert Mustacchi } 805da5577f0SRobert Mustacchi 806da5577f0SRobert Mustacchi /* 807da5577f0SRobert Mustacchi * Initialize the DMA memory for the descriptor ring and for each frame in the 808da5577f0SRobert Mustacchi * control block list. 809da5577f0SRobert Mustacchi */ 810da5577f0SRobert Mustacchi static boolean_t 811da5577f0SRobert Mustacchi i40e_alloc_rx_dma(i40e_rx_data_t *rxd) 812da5577f0SRobert Mustacchi { 813da5577f0SRobert Mustacchi int i, count; 814da5577f0SRobert Mustacchi size_t dmasz; 815da5577f0SRobert Mustacchi i40e_rx_control_block_t *rcb; 816da5577f0SRobert Mustacchi i40e_t *i40e = rxd->rxd_i40e; 817da5577f0SRobert Mustacchi 818da5577f0SRobert Mustacchi /* 819*8d5069bcSRyan Zezeski * First allocate the RX descriptor ring. 820da5577f0SRobert Mustacchi */ 821da5577f0SRobert Mustacchi dmasz = sizeof (i40e_rx_desc_t) * rxd->rxd_ring_size; 822da5577f0SRobert Mustacchi VERIFY(dmasz > 0); 823da5577f0SRobert Mustacchi if (i40e_alloc_dma_buffer(i40e, &rxd->rxd_desc_area, 824da5577f0SRobert Mustacchi &i40e->i40e_static_dma_attr, &i40e->i40e_desc_acc_attr, B_FALSE, 825da5577f0SRobert Mustacchi B_TRUE, dmasz) == B_FALSE) { 826da5577f0SRobert Mustacchi i40e_error(i40e, "failed to allocate DMA resources " 827*8d5069bcSRyan Zezeski "for RX descriptor ring"); 828da5577f0SRobert Mustacchi return (B_FALSE); 829da5577f0SRobert Mustacchi } 830da5577f0SRobert Mustacchi rxd->rxd_desc_ring = 831da5577f0SRobert Mustacchi (i40e_rx_desc_t *)(uintptr_t)rxd->rxd_desc_area.dmab_address; 832da5577f0SRobert Mustacchi rxd->rxd_desc_next = 0; 833da5577f0SRobert Mustacchi 834da5577f0SRobert Mustacchi count = rxd->rxd_ring_size + rxd->rxd_free_list_size; 835da5577f0SRobert Mustacchi rcb = rxd->rxd_rcb_area; 836da5577f0SRobert Mustacchi 837da5577f0SRobert Mustacchi dmasz = i40e->i40e_rx_buf_size; 838da5577f0SRobert Mustacchi VERIFY(dmasz > 0); 839da5577f0SRobert Mustacchi for (i = 0; i < count; i++, rcb++) { 840da5577f0SRobert Mustacchi i40e_dma_buffer_t *dmap; 841da5577f0SRobert Mustacchi VERIFY(rcb != NULL); 842da5577f0SRobert Mustacchi 843da5577f0SRobert Mustacchi if (i < rxd->rxd_ring_size) { 844da5577f0SRobert Mustacchi rxd->rxd_work_list[i] = rcb; 845da5577f0SRobert Mustacchi } else { 846da5577f0SRobert Mustacchi rxd->rxd_free_list[i - rxd->rxd_ring_size] = rcb; 847da5577f0SRobert Mustacchi } 848da5577f0SRobert Mustacchi 849da5577f0SRobert Mustacchi dmap = &rcb->rcb_dma; 850da5577f0SRobert Mustacchi if (i40e_alloc_dma_buffer(i40e, dmap, 851da5577f0SRobert Mustacchi &i40e->i40e_static_dma_attr, &i40e->i40e_buf_acc_attr, 852da5577f0SRobert Mustacchi B_TRUE, B_FALSE, dmasz) == B_FALSE) { 853*8d5069bcSRyan Zezeski i40e_error(i40e, "failed to allocate RX dma buffer"); 854da5577f0SRobert Mustacchi return (B_FALSE); 855da5577f0SRobert Mustacchi } 856da5577f0SRobert Mustacchi 857da5577f0SRobert Mustacchi /* 858da5577f0SRobert Mustacchi * Initialize the control block and offset the DMA address. See 859da5577f0SRobert Mustacchi * the note in the big theory statement that explains how this 860da5577f0SRobert Mustacchi * helps IP deal with alignment. Note, we don't worry about 861da5577f0SRobert Mustacchi * whether or not we successfully get an mblk_t from desballoc, 862da5577f0SRobert Mustacchi * it's a common case that we have to handle later on in the 863da5577f0SRobert Mustacchi * system. 864da5577f0SRobert Mustacchi */ 865da5577f0SRobert Mustacchi dmap->dmab_size -= I40E_BUF_IPHDR_ALIGNMENT; 866da5577f0SRobert Mustacchi dmap->dmab_address += I40E_BUF_IPHDR_ALIGNMENT; 867da5577f0SRobert Mustacchi dmap->dmab_dma_address += I40E_BUF_IPHDR_ALIGNMENT; 868da5577f0SRobert Mustacchi 869da5577f0SRobert Mustacchi rcb->rcb_ref = 1; 870da5577f0SRobert Mustacchi rcb->rcb_rxd = rxd; 871da5577f0SRobert Mustacchi rcb->rcb_free_rtn.free_func = i40e_rx_recycle; 872da5577f0SRobert Mustacchi rcb->rcb_free_rtn.free_arg = (caddr_t)rcb; 873da5577f0SRobert Mustacchi rcb->rcb_mp = desballoc((unsigned char *)dmap->dmab_address, 874da5577f0SRobert Mustacchi dmap->dmab_size, 0, &rcb->rcb_free_rtn); 875da5577f0SRobert Mustacchi } 876da5577f0SRobert Mustacchi 877da5577f0SRobert Mustacchi return (B_TRUE); 878da5577f0SRobert Mustacchi } 879da5577f0SRobert Mustacchi 880da5577f0SRobert Mustacchi static void 881da5577f0SRobert Mustacchi i40e_free_tx_dma(i40e_trqpair_t *itrq) 882da5577f0SRobert Mustacchi { 883da5577f0SRobert Mustacchi size_t fsz; 884da5577f0SRobert Mustacchi 885da5577f0SRobert Mustacchi if (itrq->itrq_tcb_area != NULL) { 886da5577f0SRobert Mustacchi uint32_t i; 887da5577f0SRobert Mustacchi i40e_tx_control_block_t *tcb = itrq->itrq_tcb_area; 888da5577f0SRobert Mustacchi 889da5577f0SRobert Mustacchi for (i = 0; i < itrq->itrq_tx_free_list_size; i++, tcb++) { 890da5577f0SRobert Mustacchi i40e_free_dma_buffer(&tcb->tcb_dma); 891da5577f0SRobert Mustacchi if (tcb->tcb_dma_handle != NULL) { 892da5577f0SRobert Mustacchi ddi_dma_free_handle(&tcb->tcb_dma_handle); 893da5577f0SRobert Mustacchi tcb->tcb_dma_handle = NULL; 894da5577f0SRobert Mustacchi } 895*8d5069bcSRyan Zezeski if (tcb->tcb_lso_dma_handle != NULL) { 896*8d5069bcSRyan Zezeski ddi_dma_free_handle(&tcb->tcb_lso_dma_handle); 897*8d5069bcSRyan Zezeski tcb->tcb_lso_dma_handle = NULL; 898*8d5069bcSRyan Zezeski } 899da5577f0SRobert Mustacchi } 900da5577f0SRobert Mustacchi 901da5577f0SRobert Mustacchi fsz = sizeof (i40e_tx_control_block_t) * 902da5577f0SRobert Mustacchi itrq->itrq_tx_free_list_size; 903da5577f0SRobert Mustacchi kmem_free(itrq->itrq_tcb_area, fsz); 904da5577f0SRobert Mustacchi itrq->itrq_tcb_area = NULL; 905da5577f0SRobert Mustacchi } 906da5577f0SRobert Mustacchi 907da5577f0SRobert Mustacchi if (itrq->itrq_tcb_free_list != NULL) { 908da5577f0SRobert Mustacchi fsz = sizeof (i40e_tx_control_block_t *) * 909da5577f0SRobert Mustacchi itrq->itrq_tx_free_list_size; 910da5577f0SRobert Mustacchi kmem_free(itrq->itrq_tcb_free_list, fsz); 911da5577f0SRobert Mustacchi itrq->itrq_tcb_free_list = NULL; 912da5577f0SRobert Mustacchi } 913da5577f0SRobert Mustacchi 914da5577f0SRobert Mustacchi if (itrq->itrq_tcb_work_list != NULL) { 915da5577f0SRobert Mustacchi fsz = sizeof (i40e_tx_control_block_t *) * 916da5577f0SRobert Mustacchi itrq->itrq_tx_ring_size; 917da5577f0SRobert Mustacchi kmem_free(itrq->itrq_tcb_work_list, fsz); 918da5577f0SRobert Mustacchi itrq->itrq_tcb_work_list = NULL; 919da5577f0SRobert Mustacchi } 920da5577f0SRobert Mustacchi 921da5577f0SRobert Mustacchi i40e_free_dma_buffer(&itrq->itrq_desc_area); 922da5577f0SRobert Mustacchi itrq->itrq_desc_ring = NULL; 923da5577f0SRobert Mustacchi 924da5577f0SRobert Mustacchi } 925da5577f0SRobert Mustacchi 926da5577f0SRobert Mustacchi static boolean_t 927da5577f0SRobert Mustacchi i40e_alloc_tx_dma(i40e_trqpair_t *itrq) 928da5577f0SRobert Mustacchi { 929da5577f0SRobert Mustacchi int i, ret; 930da5577f0SRobert Mustacchi size_t dmasz; 931da5577f0SRobert Mustacchi i40e_tx_control_block_t *tcb; 932da5577f0SRobert Mustacchi i40e_t *i40e = itrq->itrq_i40e; 933da5577f0SRobert Mustacchi 934da5577f0SRobert Mustacchi itrq->itrq_tx_ring_size = i40e->i40e_tx_ring_size; 935da5577f0SRobert Mustacchi itrq->itrq_tx_free_list_size = i40e->i40e_tx_ring_size + 936da5577f0SRobert Mustacchi (i40e->i40e_tx_ring_size >> 1); 937da5577f0SRobert Mustacchi 938da5577f0SRobert Mustacchi /* 939*8d5069bcSRyan Zezeski * Allocate an additional TX descriptor for the writeback head. 940da5577f0SRobert Mustacchi */ 941da5577f0SRobert Mustacchi dmasz = sizeof (i40e_tx_desc_t) * itrq->itrq_tx_ring_size; 942da5577f0SRobert Mustacchi dmasz += sizeof (i40e_tx_desc_t); 943da5577f0SRobert Mustacchi 944da5577f0SRobert Mustacchi VERIFY(dmasz > 0); 945da5577f0SRobert Mustacchi if (i40e_alloc_dma_buffer(i40e, &itrq->itrq_desc_area, 946da5577f0SRobert Mustacchi &i40e->i40e_static_dma_attr, &i40e->i40e_desc_acc_attr, 947da5577f0SRobert Mustacchi B_FALSE, B_TRUE, dmasz) == B_FALSE) { 948*8d5069bcSRyan Zezeski i40e_error(i40e, "failed to allocate DMA resources for TX " 949da5577f0SRobert Mustacchi "descriptor ring"); 950da5577f0SRobert Mustacchi return (B_FALSE); 951da5577f0SRobert Mustacchi } 952da5577f0SRobert Mustacchi itrq->itrq_desc_ring = 953da5577f0SRobert Mustacchi (i40e_tx_desc_t *)(uintptr_t)itrq->itrq_desc_area.dmab_address; 954da5577f0SRobert Mustacchi itrq->itrq_desc_wbhead = (uint32_t *)(itrq->itrq_desc_ring + 955da5577f0SRobert Mustacchi itrq->itrq_tx_ring_size); 956da5577f0SRobert Mustacchi itrq->itrq_desc_head = 0; 957da5577f0SRobert Mustacchi itrq->itrq_desc_tail = 0; 958da5577f0SRobert Mustacchi itrq->itrq_desc_free = itrq->itrq_tx_ring_size; 959da5577f0SRobert Mustacchi 960da5577f0SRobert Mustacchi itrq->itrq_tcb_work_list = kmem_zalloc(itrq->itrq_tx_ring_size * 961da5577f0SRobert Mustacchi sizeof (i40e_tx_control_block_t *), KM_NOSLEEP); 962da5577f0SRobert Mustacchi if (itrq->itrq_tcb_work_list == NULL) { 963*8d5069bcSRyan Zezeski i40e_error(i40e, "failed to allocate a %d entry TX work list " 964da5577f0SRobert Mustacchi "for ring %d", itrq->itrq_tx_ring_size, itrq->itrq_index); 965da5577f0SRobert Mustacchi goto cleanup; 966da5577f0SRobert Mustacchi } 967da5577f0SRobert Mustacchi 968da5577f0SRobert Mustacchi itrq->itrq_tcb_free_list = kmem_zalloc(itrq->itrq_tx_free_list_size * 969da5577f0SRobert Mustacchi sizeof (i40e_tx_control_block_t *), KM_SLEEP); 970da5577f0SRobert Mustacchi if (itrq->itrq_tcb_free_list == NULL) { 971*8d5069bcSRyan Zezeski i40e_error(i40e, "failed to allocate a %d entry TX free list " 972da5577f0SRobert Mustacchi "for ring %d", itrq->itrq_tx_free_list_size, 973da5577f0SRobert Mustacchi itrq->itrq_index); 974da5577f0SRobert Mustacchi goto cleanup; 975da5577f0SRobert Mustacchi } 976da5577f0SRobert Mustacchi 977da5577f0SRobert Mustacchi /* 978*8d5069bcSRyan Zezeski * We allocate enough TX control blocks to cover the free list. 979da5577f0SRobert Mustacchi */ 980da5577f0SRobert Mustacchi itrq->itrq_tcb_area = kmem_zalloc(sizeof (i40e_tx_control_block_t) * 981da5577f0SRobert Mustacchi itrq->itrq_tx_free_list_size, KM_NOSLEEP); 982da5577f0SRobert Mustacchi if (itrq->itrq_tcb_area == NULL) { 983da5577f0SRobert Mustacchi i40e_error(i40e, "failed to allocate a %d entry tcb area for " 984da5577f0SRobert Mustacchi "ring %d", itrq->itrq_tx_free_list_size, itrq->itrq_index); 985da5577f0SRobert Mustacchi goto cleanup; 986da5577f0SRobert Mustacchi } 987da5577f0SRobert Mustacchi 988da5577f0SRobert Mustacchi /* 989da5577f0SRobert Mustacchi * For each tcb, allocate DMA memory. 990da5577f0SRobert Mustacchi */ 991da5577f0SRobert Mustacchi dmasz = i40e->i40e_tx_buf_size; 992da5577f0SRobert Mustacchi VERIFY(dmasz > 0); 993da5577f0SRobert Mustacchi tcb = itrq->itrq_tcb_area; 994da5577f0SRobert Mustacchi for (i = 0; i < itrq->itrq_tx_free_list_size; i++, tcb++) { 995da5577f0SRobert Mustacchi VERIFY(tcb != NULL); 996da5577f0SRobert Mustacchi 997da5577f0SRobert Mustacchi /* 998da5577f0SRobert Mustacchi * Allocate both a DMA buffer which we'll use for when we copy 999da5577f0SRobert Mustacchi * packets for transmission and allocate a DMA handle which 1000da5577f0SRobert Mustacchi * we'll use when we bind data. 1001da5577f0SRobert Mustacchi */ 1002da5577f0SRobert Mustacchi ret = ddi_dma_alloc_handle(i40e->i40e_dip, 1003da5577f0SRobert Mustacchi &i40e->i40e_txbind_dma_attr, DDI_DMA_DONTWAIT, NULL, 1004da5577f0SRobert Mustacchi &tcb->tcb_dma_handle); 1005da5577f0SRobert Mustacchi if (ret != DDI_SUCCESS) { 1006*8d5069bcSRyan Zezeski i40e_error(i40e, "failed to allocate DMA handle for TX " 1007da5577f0SRobert Mustacchi "data binding on ring %d: %d", itrq->itrq_index, 1008da5577f0SRobert Mustacchi ret); 1009da5577f0SRobert Mustacchi tcb->tcb_dma_handle = NULL; 1010da5577f0SRobert Mustacchi goto cleanup; 1011da5577f0SRobert Mustacchi } 1012da5577f0SRobert Mustacchi 1013*8d5069bcSRyan Zezeski ret = ddi_dma_alloc_handle(i40e->i40e_dip, 1014*8d5069bcSRyan Zezeski &i40e->i40e_txbind_lso_dma_attr, DDI_DMA_DONTWAIT, NULL, 1015*8d5069bcSRyan Zezeski &tcb->tcb_lso_dma_handle); 1016*8d5069bcSRyan Zezeski if (ret != DDI_SUCCESS) { 1017*8d5069bcSRyan Zezeski i40e_error(i40e, "failed to allocate DMA handle for TX " 1018*8d5069bcSRyan Zezeski "LSO data binding on ring %d: %d", itrq->itrq_index, 1019*8d5069bcSRyan Zezeski ret); 1020*8d5069bcSRyan Zezeski tcb->tcb_lso_dma_handle = NULL; 1021*8d5069bcSRyan Zezeski goto cleanup; 1022*8d5069bcSRyan Zezeski } 1023*8d5069bcSRyan Zezeski 1024da5577f0SRobert Mustacchi if (i40e_alloc_dma_buffer(i40e, &tcb->tcb_dma, 1025da5577f0SRobert Mustacchi &i40e->i40e_static_dma_attr, &i40e->i40e_buf_acc_attr, 1026da5577f0SRobert Mustacchi B_TRUE, B_FALSE, dmasz) == B_FALSE) { 1027da5577f0SRobert Mustacchi i40e_error(i40e, "failed to allocate %ld bytes of " 1028*8d5069bcSRyan Zezeski "DMA for TX data binding on ring %d", dmasz, 1029da5577f0SRobert Mustacchi itrq->itrq_index); 1030da5577f0SRobert Mustacchi goto cleanup; 1031da5577f0SRobert Mustacchi } 1032da5577f0SRobert Mustacchi 1033da5577f0SRobert Mustacchi itrq->itrq_tcb_free_list[i] = tcb; 1034da5577f0SRobert Mustacchi } 1035da5577f0SRobert Mustacchi 1036da5577f0SRobert Mustacchi itrq->itrq_tcb_free = itrq->itrq_tx_free_list_size; 1037da5577f0SRobert Mustacchi 1038da5577f0SRobert Mustacchi return (B_TRUE); 1039da5577f0SRobert Mustacchi 1040da5577f0SRobert Mustacchi cleanup: 1041da5577f0SRobert Mustacchi i40e_free_tx_dma(itrq); 1042da5577f0SRobert Mustacchi return (B_FALSE); 1043da5577f0SRobert Mustacchi } 1044da5577f0SRobert Mustacchi 1045da5577f0SRobert Mustacchi /* 1046da5577f0SRobert Mustacchi * Free all memory associated with all of the rings on this i40e instance. Note, 1047da5577f0SRobert Mustacchi * this is done as part of the GLDv3 stop routine. 1048da5577f0SRobert Mustacchi */ 1049da5577f0SRobert Mustacchi void 1050da5577f0SRobert Mustacchi i40e_free_ring_mem(i40e_t *i40e, boolean_t failed_init) 1051da5577f0SRobert Mustacchi { 1052da5577f0SRobert Mustacchi int i; 1053da5577f0SRobert Mustacchi 1054da5577f0SRobert Mustacchi for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 1055da5577f0SRobert Mustacchi i40e_rx_data_t *rxd = i40e->i40e_trqpairs[i].itrq_rxdata; 1056da5577f0SRobert Mustacchi 1057da5577f0SRobert Mustacchi /* 1058*8d5069bcSRyan Zezeski * In some cases i40e_alloc_rx_data() may have failed 1059*8d5069bcSRyan Zezeski * and in that case there is no rxd to free. 1060*8d5069bcSRyan Zezeski */ 1061*8d5069bcSRyan Zezeski if (rxd == NULL) 1062*8d5069bcSRyan Zezeski continue; 1063*8d5069bcSRyan Zezeski 1064*8d5069bcSRyan Zezeski /* 1065*8d5069bcSRyan Zezeski * Clean up our RX data. We have to free DMA resources first and 1066da5577f0SRobert Mustacchi * then if we have no more pending RCB's, then we'll go ahead 1067da5577f0SRobert Mustacchi * and clean things up. Note, we can't set the stopped flag on 1068*8d5069bcSRyan Zezeski * the RX data until after we've done the first pass of the 1069da5577f0SRobert Mustacchi * pending resources. Otherwise we might race with 1070da5577f0SRobert Mustacchi * i40e_rx_recycle on determining who should free the 1071da5577f0SRobert Mustacchi * i40e_rx_data_t above. 1072da5577f0SRobert Mustacchi */ 1073da5577f0SRobert Mustacchi i40e_free_rx_dma(rxd, failed_init); 1074da5577f0SRobert Mustacchi 1075da5577f0SRobert Mustacchi mutex_enter(&i40e->i40e_rx_pending_lock); 1076da5577f0SRobert Mustacchi rxd->rxd_shutdown = B_TRUE; 1077da5577f0SRobert Mustacchi if (rxd->rxd_rcb_pending == 0) { 1078da5577f0SRobert Mustacchi i40e_free_rx_data(rxd); 1079da5577f0SRobert Mustacchi i40e->i40e_trqpairs[i].itrq_rxdata = NULL; 1080da5577f0SRobert Mustacchi } 1081da5577f0SRobert Mustacchi mutex_exit(&i40e->i40e_rx_pending_lock); 1082da5577f0SRobert Mustacchi 1083da5577f0SRobert Mustacchi i40e_free_tx_dma(&i40e->i40e_trqpairs[i]); 1084da5577f0SRobert Mustacchi } 1085da5577f0SRobert Mustacchi } 1086da5577f0SRobert Mustacchi 1087da5577f0SRobert Mustacchi /* 1088da5577f0SRobert Mustacchi * Allocate all of the resources associated with all of the rings on this i40e 1089da5577f0SRobert Mustacchi * instance. Note this is done as part of the GLDv3 start routine and thus we 1090da5577f0SRobert Mustacchi * should not use blocking allocations. This takes care of both DMA and non-DMA 1091da5577f0SRobert Mustacchi * related resources. 1092da5577f0SRobert Mustacchi */ 1093da5577f0SRobert Mustacchi boolean_t 1094da5577f0SRobert Mustacchi i40e_alloc_ring_mem(i40e_t *i40e) 1095da5577f0SRobert Mustacchi { 1096da5577f0SRobert Mustacchi int i; 1097da5577f0SRobert Mustacchi 1098da5577f0SRobert Mustacchi for (i = 0; i < i40e->i40e_num_trqpairs; i++) { 1099da5577f0SRobert Mustacchi if (i40e_alloc_rx_data(i40e, &i40e->i40e_trqpairs[i]) == 1100da5577f0SRobert Mustacchi B_FALSE) 1101da5577f0SRobert Mustacchi goto unwind; 1102da5577f0SRobert Mustacchi 1103da5577f0SRobert Mustacchi if (i40e_alloc_rx_dma(i40e->i40e_trqpairs[i].itrq_rxdata) == 1104da5577f0SRobert Mustacchi B_FALSE) 1105da5577f0SRobert Mustacchi goto unwind; 1106da5577f0SRobert Mustacchi 1107da5577f0SRobert Mustacchi if (i40e_alloc_tx_dma(&i40e->i40e_trqpairs[i]) == B_FALSE) 1108da5577f0SRobert Mustacchi goto unwind; 1109da5577f0SRobert Mustacchi } 1110da5577f0SRobert Mustacchi 1111da5577f0SRobert Mustacchi return (B_TRUE); 1112da5577f0SRobert Mustacchi 1113da5577f0SRobert Mustacchi unwind: 1114da5577f0SRobert Mustacchi i40e_free_ring_mem(i40e, B_TRUE); 1115da5577f0SRobert Mustacchi return (B_FALSE); 1116da5577f0SRobert Mustacchi } 1117da5577f0SRobert Mustacchi 1118da5577f0SRobert Mustacchi 1119da5577f0SRobert Mustacchi /* 1120da5577f0SRobert Mustacchi * Because every instance of i40e may have different support for FMA 1121da5577f0SRobert Mustacchi * capabilities, we copy the DMA attributes into the i40e_t and set them that 1122da5577f0SRobert Mustacchi * way and use them for determining attributes. 1123da5577f0SRobert Mustacchi */ 1124da5577f0SRobert Mustacchi void 1125da5577f0SRobert Mustacchi i40e_init_dma_attrs(i40e_t *i40e, boolean_t fma) 1126da5577f0SRobert Mustacchi { 1127da5577f0SRobert Mustacchi bcopy(&i40e_g_static_dma_attr, &i40e->i40e_static_dma_attr, 1128da5577f0SRobert Mustacchi sizeof (ddi_dma_attr_t)); 1129da5577f0SRobert Mustacchi bcopy(&i40e_g_txbind_dma_attr, &i40e->i40e_txbind_dma_attr, 1130da5577f0SRobert Mustacchi sizeof (ddi_dma_attr_t)); 1131*8d5069bcSRyan Zezeski bcopy(&i40e_g_txbind_lso_dma_attr, &i40e->i40e_txbind_lso_dma_attr, 1132*8d5069bcSRyan Zezeski sizeof (ddi_dma_attr_t)); 1133da5577f0SRobert Mustacchi bcopy(&i40e_g_desc_acc_attr, &i40e->i40e_desc_acc_attr, 1134da5577f0SRobert Mustacchi sizeof (ddi_device_acc_attr_t)); 1135da5577f0SRobert Mustacchi bcopy(&i40e_g_buf_acc_attr, &i40e->i40e_buf_acc_attr, 1136da5577f0SRobert Mustacchi sizeof (ddi_device_acc_attr_t)); 1137da5577f0SRobert Mustacchi 1138da5577f0SRobert Mustacchi if (fma == B_TRUE) { 1139da5577f0SRobert Mustacchi i40e->i40e_static_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR; 1140da5577f0SRobert Mustacchi i40e->i40e_txbind_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR; 1141*8d5069bcSRyan Zezeski i40e->i40e_txbind_lso_dma_attr.dma_attr_flags |= 1142*8d5069bcSRyan Zezeski DDI_DMA_FLAGERR; 1143da5577f0SRobert Mustacchi } else { 1144da5577f0SRobert Mustacchi i40e->i40e_static_dma_attr.dma_attr_flags &= ~DDI_DMA_FLAGERR; 1145da5577f0SRobert Mustacchi i40e->i40e_txbind_dma_attr.dma_attr_flags &= ~DDI_DMA_FLAGERR; 1146*8d5069bcSRyan Zezeski i40e->i40e_txbind_lso_dma_attr.dma_attr_flags &= 1147*8d5069bcSRyan Zezeski ~DDI_DMA_FLAGERR; 1148da5577f0SRobert Mustacchi } 1149da5577f0SRobert Mustacchi } 1150da5577f0SRobert Mustacchi 1151da5577f0SRobert Mustacchi static void 1152da5577f0SRobert Mustacchi i40e_rcb_free(i40e_rx_data_t *rxd, i40e_rx_control_block_t *rcb) 1153da5577f0SRobert Mustacchi { 1154da5577f0SRobert Mustacchi mutex_enter(&rxd->rxd_free_lock); 1155da5577f0SRobert Mustacchi ASSERT(rxd->rxd_rcb_free < rxd->rxd_free_list_size); 1156da5577f0SRobert Mustacchi ASSERT(rxd->rxd_free_list[rxd->rxd_rcb_free] == NULL); 1157da5577f0SRobert Mustacchi rxd->rxd_free_list[rxd->rxd_rcb_free] = rcb; 1158da5577f0SRobert Mustacchi rxd->rxd_rcb_free++; 1159da5577f0SRobert Mustacchi mutex_exit(&rxd->rxd_free_lock); 1160da5577f0SRobert Mustacchi } 1161da5577f0SRobert Mustacchi 1162da5577f0SRobert Mustacchi static i40e_rx_control_block_t * 1163da5577f0SRobert Mustacchi i40e_rcb_alloc(i40e_rx_data_t *rxd) 1164da5577f0SRobert Mustacchi { 1165da5577f0SRobert Mustacchi i40e_rx_control_block_t *rcb; 1166da5577f0SRobert Mustacchi 1167da5577f0SRobert Mustacchi mutex_enter(&rxd->rxd_free_lock); 1168da5577f0SRobert Mustacchi if (rxd->rxd_rcb_free == 0) { 1169da5577f0SRobert Mustacchi mutex_exit(&rxd->rxd_free_lock); 1170da5577f0SRobert Mustacchi return (NULL); 1171da5577f0SRobert Mustacchi } 1172da5577f0SRobert Mustacchi rxd->rxd_rcb_free--; 1173da5577f0SRobert Mustacchi rcb = rxd->rxd_free_list[rxd->rxd_rcb_free]; 1174da5577f0SRobert Mustacchi VERIFY(rcb != NULL); 1175da5577f0SRobert Mustacchi rxd->rxd_free_list[rxd->rxd_rcb_free] = NULL; 1176da5577f0SRobert Mustacchi mutex_exit(&rxd->rxd_free_lock); 1177da5577f0SRobert Mustacchi 1178da5577f0SRobert Mustacchi return (rcb); 1179da5577f0SRobert Mustacchi } 1180da5577f0SRobert Mustacchi 1181da5577f0SRobert Mustacchi /* 1182da5577f0SRobert Mustacchi * This is the callback that we get from the OS when freemsg(9F) has been called 1183da5577f0SRobert Mustacchi * on a loaned descriptor. In addition, if we take the last reference count 1184*8d5069bcSRyan Zezeski * here, then we have to tear down all of the RX data. 1185da5577f0SRobert Mustacchi */ 1186da5577f0SRobert Mustacchi void 1187da5577f0SRobert Mustacchi i40e_rx_recycle(caddr_t arg) 1188da5577f0SRobert Mustacchi { 1189da5577f0SRobert Mustacchi uint32_t ref; 1190da5577f0SRobert Mustacchi i40e_rx_control_block_t *rcb; 1191da5577f0SRobert Mustacchi i40e_rx_data_t *rxd; 1192da5577f0SRobert Mustacchi i40e_t *i40e; 1193da5577f0SRobert Mustacchi 1194da5577f0SRobert Mustacchi /* LINTED: E_BAD_PTR_CAST_ALIGN */ 1195da5577f0SRobert Mustacchi rcb = (i40e_rx_control_block_t *)arg; 1196da5577f0SRobert Mustacchi rxd = rcb->rcb_rxd; 1197da5577f0SRobert Mustacchi i40e = rxd->rxd_i40e; 1198da5577f0SRobert Mustacchi 1199da5577f0SRobert Mustacchi /* 1200da5577f0SRobert Mustacchi * It's possible for this to be called with a reference count of zero. 1201da5577f0SRobert Mustacchi * That will happen when we're doing the freemsg after taking the last 1202da5577f0SRobert Mustacchi * reference because we're tearing down everything and this rcb is not 1203da5577f0SRobert Mustacchi * outstanding. 1204da5577f0SRobert Mustacchi */ 1205da5577f0SRobert Mustacchi if (rcb->rcb_ref == 0) 1206da5577f0SRobert Mustacchi return; 1207da5577f0SRobert Mustacchi 1208da5577f0SRobert Mustacchi /* 1209da5577f0SRobert Mustacchi * Don't worry about failure of desballoc here. It'll only become fatal 1210da5577f0SRobert Mustacchi * if we're trying to use it and we can't in i40e_rx_bind(). 1211da5577f0SRobert Mustacchi */ 1212da5577f0SRobert Mustacchi rcb->rcb_mp = desballoc((unsigned char *)rcb->rcb_dma.dmab_address, 1213da5577f0SRobert Mustacchi rcb->rcb_dma.dmab_size, 0, &rcb->rcb_free_rtn); 1214da5577f0SRobert Mustacchi i40e_rcb_free(rxd, rcb); 1215da5577f0SRobert Mustacchi 1216da5577f0SRobert Mustacchi /* 1217da5577f0SRobert Mustacchi * It's possible that the rcb was being used while we are shutting down 1218da5577f0SRobert Mustacchi * the device. In that case, we'll take the final reference from the 1219da5577f0SRobert Mustacchi * device here. 1220da5577f0SRobert Mustacchi */ 1221da5577f0SRobert Mustacchi ref = atomic_dec_32_nv(&rcb->rcb_ref); 1222da5577f0SRobert Mustacchi if (ref == 0) { 1223da5577f0SRobert Mustacchi freemsg(rcb->rcb_mp); 1224da5577f0SRobert Mustacchi rcb->rcb_mp = NULL; 1225da5577f0SRobert Mustacchi i40e_free_dma_buffer(&rcb->rcb_dma); 1226da5577f0SRobert Mustacchi 1227da5577f0SRobert Mustacchi mutex_enter(&i40e->i40e_rx_pending_lock); 1228da5577f0SRobert Mustacchi atomic_dec_32(&rxd->rxd_rcb_pending); 1229da5577f0SRobert Mustacchi atomic_dec_32(&i40e->i40e_rx_pending); 1230da5577f0SRobert Mustacchi 1231da5577f0SRobert Mustacchi /* 1232da5577f0SRobert Mustacchi * If this was the last block and it's been indicated that we've 1233da5577f0SRobert Mustacchi * passed the shutdown point, we should clean up. 1234da5577f0SRobert Mustacchi */ 1235da5577f0SRobert Mustacchi if (rxd->rxd_shutdown == B_TRUE && rxd->rxd_rcb_pending == 0) { 1236da5577f0SRobert Mustacchi i40e_free_rx_data(rxd); 1237da5577f0SRobert Mustacchi cv_broadcast(&i40e->i40e_rx_pending_cv); 1238da5577f0SRobert Mustacchi } 1239da5577f0SRobert Mustacchi 1240da5577f0SRobert Mustacchi mutex_exit(&i40e->i40e_rx_pending_lock); 1241da5577f0SRobert Mustacchi } 1242da5577f0SRobert Mustacchi } 1243da5577f0SRobert Mustacchi 1244da5577f0SRobert Mustacchi static mblk_t * 1245da5577f0SRobert Mustacchi i40e_rx_bind(i40e_trqpair_t *itrq, i40e_rx_data_t *rxd, uint32_t index, 1246da5577f0SRobert Mustacchi uint32_t plen) 1247da5577f0SRobert Mustacchi { 1248da5577f0SRobert Mustacchi mblk_t *mp; 1249da5577f0SRobert Mustacchi i40e_t *i40e = rxd->rxd_i40e; 1250da5577f0SRobert Mustacchi i40e_rx_control_block_t *rcb, *rep_rcb; 1251da5577f0SRobert Mustacchi 1252da5577f0SRobert Mustacchi ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); 1253da5577f0SRobert Mustacchi 1254da5577f0SRobert Mustacchi if ((rep_rcb = i40e_rcb_alloc(rxd)) == NULL) { 1255da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_rx_bind_norcb.value.ui64++; 1256da5577f0SRobert Mustacchi return (NULL); 1257da5577f0SRobert Mustacchi } 1258da5577f0SRobert Mustacchi 1259da5577f0SRobert Mustacchi rcb = rxd->rxd_work_list[index]; 1260da5577f0SRobert Mustacchi 1261da5577f0SRobert Mustacchi /* 1262da5577f0SRobert Mustacchi * Check to make sure we have a mblk_t. If we don't, this is our last 1263da5577f0SRobert Mustacchi * chance to try and get one. 1264da5577f0SRobert Mustacchi */ 1265da5577f0SRobert Mustacchi if (rcb->rcb_mp == NULL) { 1266da5577f0SRobert Mustacchi rcb->rcb_mp = 1267da5577f0SRobert Mustacchi desballoc((unsigned char *)rcb->rcb_dma.dmab_address, 1268da5577f0SRobert Mustacchi rcb->rcb_dma.dmab_size, 0, &rcb->rcb_free_rtn); 1269da5577f0SRobert Mustacchi if (rcb->rcb_mp == NULL) { 1270da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_rx_bind_nomp.value.ui64++; 1271da5577f0SRobert Mustacchi i40e_rcb_free(rxd, rcb); 1272da5577f0SRobert Mustacchi return (NULL); 1273da5577f0SRobert Mustacchi } 1274da5577f0SRobert Mustacchi } 1275da5577f0SRobert Mustacchi 1276da5577f0SRobert Mustacchi I40E_DMA_SYNC(&rcb->rcb_dma, DDI_DMA_SYNC_FORKERNEL); 1277da5577f0SRobert Mustacchi 1278da5577f0SRobert Mustacchi if (i40e_check_dma_handle(rcb->rcb_dma.dmab_dma_handle) != DDI_FM_OK) { 1279da5577f0SRobert Mustacchi ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 1280da5577f0SRobert Mustacchi atomic_or_32(&i40e->i40e_state, I40E_ERROR); 1281da5577f0SRobert Mustacchi i40e_rcb_free(rxd, rcb); 1282da5577f0SRobert Mustacchi return (NULL); 1283da5577f0SRobert Mustacchi } 1284da5577f0SRobert Mustacchi 1285da5577f0SRobert Mustacchi /* 1286da5577f0SRobert Mustacchi * Note, we've already accounted for the I40E_BUF_IPHDR_ALIGNMENT. 1287da5577f0SRobert Mustacchi */ 1288da5577f0SRobert Mustacchi mp = rcb->rcb_mp; 1289da5577f0SRobert Mustacchi atomic_inc_32(&rcb->rcb_ref); 1290da5577f0SRobert Mustacchi mp->b_wptr = mp->b_rptr + plen; 1291da5577f0SRobert Mustacchi mp->b_next = mp->b_cont = NULL; 1292da5577f0SRobert Mustacchi 1293da5577f0SRobert Mustacchi rxd->rxd_work_list[index] = rep_rcb; 1294da5577f0SRobert Mustacchi return (mp); 1295da5577f0SRobert Mustacchi } 1296da5577f0SRobert Mustacchi 1297da5577f0SRobert Mustacchi /* 1298da5577f0SRobert Mustacchi * We're going to allocate a new message block for this frame and attempt to 1299da5577f0SRobert Mustacchi * receive it. See the big theory statement for more information on when we copy 1300da5577f0SRobert Mustacchi * versus bind. 1301da5577f0SRobert Mustacchi */ 1302da5577f0SRobert Mustacchi static mblk_t * 1303da5577f0SRobert Mustacchi i40e_rx_copy(i40e_trqpair_t *itrq, i40e_rx_data_t *rxd, uint32_t index, 1304da5577f0SRobert Mustacchi uint32_t plen) 1305da5577f0SRobert Mustacchi { 1306da5577f0SRobert Mustacchi i40e_t *i40e = rxd->rxd_i40e; 1307da5577f0SRobert Mustacchi i40e_rx_control_block_t *rcb; 1308da5577f0SRobert Mustacchi mblk_t *mp; 1309da5577f0SRobert Mustacchi 1310da5577f0SRobert Mustacchi ASSERT(index < rxd->rxd_ring_size); 1311da5577f0SRobert Mustacchi rcb = rxd->rxd_work_list[index]; 1312da5577f0SRobert Mustacchi 1313da5577f0SRobert Mustacchi I40E_DMA_SYNC(&rcb->rcb_dma, DDI_DMA_SYNC_FORKERNEL); 1314da5577f0SRobert Mustacchi 1315da5577f0SRobert Mustacchi if (i40e_check_dma_handle(rcb->rcb_dma.dmab_dma_handle) != DDI_FM_OK) { 1316da5577f0SRobert Mustacchi ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 1317da5577f0SRobert Mustacchi atomic_or_32(&i40e->i40e_state, I40E_ERROR); 1318da5577f0SRobert Mustacchi return (NULL); 1319da5577f0SRobert Mustacchi } 1320da5577f0SRobert Mustacchi 1321da5577f0SRobert Mustacchi mp = allocb(plen + I40E_BUF_IPHDR_ALIGNMENT, 0); 1322da5577f0SRobert Mustacchi if (mp == NULL) { 1323da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_rx_copy_nomem.value.ui64++; 1324da5577f0SRobert Mustacchi return (NULL); 1325da5577f0SRobert Mustacchi } 1326da5577f0SRobert Mustacchi 1327da5577f0SRobert Mustacchi mp->b_rptr += I40E_BUF_IPHDR_ALIGNMENT; 1328da5577f0SRobert Mustacchi bcopy(rcb->rcb_dma.dmab_address, mp->b_rptr, plen); 1329da5577f0SRobert Mustacchi mp->b_wptr = mp->b_rptr + plen; 1330da5577f0SRobert Mustacchi 1331da5577f0SRobert Mustacchi return (mp); 1332da5577f0SRobert Mustacchi } 1333da5577f0SRobert Mustacchi 1334da5577f0SRobert Mustacchi /* 1335da5577f0SRobert Mustacchi * Determine if the device has enabled any checksum flags for us. The level of 1336da5577f0SRobert Mustacchi * checksum computed will depend on the type packet that we have, which is 1337da5577f0SRobert Mustacchi * contained in ptype. For example, the checksum logic it does will vary 1338da5577f0SRobert Mustacchi * depending on whether or not the packet is considered tunneled, whether it 1339da5577f0SRobert Mustacchi * recognizes the L4 type, etc. Section 8.3.4.3 summarizes which checksums are 1340da5577f0SRobert Mustacchi * valid. 1341da5577f0SRobert Mustacchi * 1342da5577f0SRobert Mustacchi * While there are additional checksums that we could recognize here, we'll need 1343da5577f0SRobert Mustacchi * to get some additional GLDv3 enhancements to be able to properly describe 1344da5577f0SRobert Mustacchi * them. 1345da5577f0SRobert Mustacchi */ 1346da5577f0SRobert Mustacchi static void 1347da5577f0SRobert Mustacchi i40e_rx_hcksum(i40e_trqpair_t *itrq, mblk_t *mp, uint64_t status, uint32_t err, 1348da5577f0SRobert Mustacchi uint32_t ptype) 1349da5577f0SRobert Mustacchi { 1350da5577f0SRobert Mustacchi uint32_t cksum; 1351da5577f0SRobert Mustacchi struct i40e_rx_ptype_decoded pinfo; 1352da5577f0SRobert Mustacchi 1353da5577f0SRobert Mustacchi ASSERT(ptype <= 255); 1354da5577f0SRobert Mustacchi pinfo = decode_rx_desc_ptype(ptype); 1355da5577f0SRobert Mustacchi 1356da5577f0SRobert Mustacchi cksum = 0; 1357da5577f0SRobert Mustacchi 1358da5577f0SRobert Mustacchi /* 1359da5577f0SRobert Mustacchi * If the ptype isn't something that we know in the driver, then we 1360da5577f0SRobert Mustacchi * shouldn't even consider moving forward. 1361da5577f0SRobert Mustacchi */ 1362da5577f0SRobert Mustacchi if (pinfo.known == 0) { 1363da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_unknown.value.ui64++; 1364da5577f0SRobert Mustacchi return; 1365da5577f0SRobert Mustacchi } 1366da5577f0SRobert Mustacchi 1367da5577f0SRobert Mustacchi /* 1368da5577f0SRobert Mustacchi * If hardware didn't set the L3L4P bit on the frame, then there is no 1369da5577f0SRobert Mustacchi * checksum offload to consider. 1370da5577f0SRobert Mustacchi */ 1371da5577f0SRobert Mustacchi if ((status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) == 0) { 1372da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_nol3l4p.value.ui64++; 1373da5577f0SRobert Mustacchi return; 1374da5577f0SRobert Mustacchi } 1375da5577f0SRobert Mustacchi 1376da5577f0SRobert Mustacchi /* 1377da5577f0SRobert Mustacchi * The device tells us that IPv6 checksums where a Destination Options 1378da5577f0SRobert Mustacchi * Header or a Routing header shouldn't be trusted. Discard all 1379da5577f0SRobert Mustacchi * checksums in this case. 1380da5577f0SRobert Mustacchi */ 1381da5577f0SRobert Mustacchi if (pinfo.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1382da5577f0SRobert Mustacchi pinfo.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6 && 1383da5577f0SRobert Mustacchi (status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))) { 1384da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_v6skip.value.ui64++; 1385da5577f0SRobert Mustacchi return; 1386da5577f0SRobert Mustacchi } 1387da5577f0SRobert Mustacchi 1388da5577f0SRobert Mustacchi /* 1389da5577f0SRobert Mustacchi * The hardware denotes three kinds of possible errors. Two are reserved 1390da5577f0SRobert Mustacchi * for inner and outer IP checksum errors (IPE and EIPE) and the latter 1391da5577f0SRobert Mustacchi * is for L4 checksum errors (L4E). If there is only one IP header, then 1392da5577f0SRobert Mustacchi * the only thing that we care about is IPE. Note that since we don't 1393da5577f0SRobert Mustacchi * support inner checksums, we will ignore IPE being set on tunneled 1394da5577f0SRobert Mustacchi * packets and only care about EIPE. 1395da5577f0SRobert Mustacchi */ 1396da5577f0SRobert Mustacchi if (pinfo.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1397da5577f0SRobert Mustacchi pinfo.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) { 1398da5577f0SRobert Mustacchi if (pinfo.tunnel_type == I40E_RX_PTYPE_OUTER_NONE) { 1399da5577f0SRobert Mustacchi if ((err & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)) != 0) { 1400da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_iperr.value.ui64++; 1401da5577f0SRobert Mustacchi } else { 1402da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_v4hdrok.value.ui64++; 1403da5577f0SRobert Mustacchi cksum |= HCK_IPV4_HDRCKSUM_OK; 1404da5577f0SRobert Mustacchi } 1405da5577f0SRobert Mustacchi } else { 1406da5577f0SRobert Mustacchi if ((err & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)) != 0) { 1407da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_eiperr.value.ui64++; 1408da5577f0SRobert Mustacchi } else { 1409da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_v4hdrok.value.ui64++; 1410da5577f0SRobert Mustacchi cksum |= HCK_IPV4_HDRCKSUM_OK; 1411da5577f0SRobert Mustacchi } 1412da5577f0SRobert Mustacchi } 1413da5577f0SRobert Mustacchi } 1414da5577f0SRobert Mustacchi 1415da5577f0SRobert Mustacchi /* 1416da5577f0SRobert Mustacchi * We only have meaningful L4 checksums in the case of IP->L4 and 1417da5577f0SRobert Mustacchi * IP->IP->L4. There is not outer L4 checksum data available in any 1418da5577f0SRobert Mustacchi * other case. Further, we don't bother reporting the valid checksum in 1419da5577f0SRobert Mustacchi * the case of IP->IP->L4 set. 1420da5577f0SRobert Mustacchi */ 1421da5577f0SRobert Mustacchi if (pinfo.outer_ip == I40E_RX_PTYPE_OUTER_IP && 1422da5577f0SRobert Mustacchi pinfo.tunnel_type == I40E_RX_PTYPE_TUNNEL_NONE && 1423da5577f0SRobert Mustacchi (pinfo.inner_prot == I40E_RX_PTYPE_INNER_PROT_UDP || 1424da5577f0SRobert Mustacchi pinfo.inner_prot == I40E_RX_PTYPE_INNER_PROT_TCP || 1425da5577f0SRobert Mustacchi pinfo.inner_prot == I40E_RX_PTYPE_INNER_PROT_ICMP || 1426da5577f0SRobert Mustacchi pinfo.inner_prot == I40E_RX_PTYPE_INNER_PROT_SCTP)) { 1427da5577f0SRobert Mustacchi ASSERT(pinfo.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4); 1428da5577f0SRobert Mustacchi if ((err & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)) != 0) { 1429da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_l4err.value.ui64++; 1430da5577f0SRobert Mustacchi } else { 1431da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_l4hdrok.value.ui64++; 1432da5577f0SRobert Mustacchi cksum |= HCK_FULLCKSUM_OK; 1433da5577f0SRobert Mustacchi } 1434da5577f0SRobert Mustacchi } 1435da5577f0SRobert Mustacchi 1436da5577f0SRobert Mustacchi if (cksum != 0) { 1437da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_set.value.ui64++; 1438da5577f0SRobert Mustacchi mac_hcksum_set(mp, 0, 0, 0, 0, cksum); 1439da5577f0SRobert Mustacchi } else { 1440da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_hck_miss.value.ui64++; 1441da5577f0SRobert Mustacchi } 1442da5577f0SRobert Mustacchi } 1443da5577f0SRobert Mustacchi 1444da5577f0SRobert Mustacchi mblk_t * 1445da5577f0SRobert Mustacchi i40e_ring_rx(i40e_trqpair_t *itrq, int poll_bytes) 1446da5577f0SRobert Mustacchi { 1447da5577f0SRobert Mustacchi i40e_t *i40e; 1448da5577f0SRobert Mustacchi i40e_hw_t *hw; 1449da5577f0SRobert Mustacchi i40e_rx_data_t *rxd; 1450da5577f0SRobert Mustacchi uint32_t cur_head; 1451da5577f0SRobert Mustacchi i40e_rx_desc_t *cur_desc; 1452da5577f0SRobert Mustacchi i40e_rx_control_block_t *rcb; 1453da5577f0SRobert Mustacchi uint64_t rx_bytes, rx_frames; 1454da5577f0SRobert Mustacchi uint64_t stword; 1455da5577f0SRobert Mustacchi mblk_t *mp, *mp_head, **mp_tail; 1456da5577f0SRobert Mustacchi 1457da5577f0SRobert Mustacchi ASSERT(MUTEX_HELD(&itrq->itrq_rx_lock)); 1458da5577f0SRobert Mustacchi rxd = itrq->itrq_rxdata; 1459da5577f0SRobert Mustacchi i40e = itrq->itrq_i40e; 1460da5577f0SRobert Mustacchi hw = &i40e->i40e_hw_space; 1461da5577f0SRobert Mustacchi 1462da5577f0SRobert Mustacchi if (!(i40e->i40e_state & I40E_STARTED) || 1463da5577f0SRobert Mustacchi (i40e->i40e_state & I40E_OVERTEMP) || 1464da5577f0SRobert Mustacchi (i40e->i40e_state & I40E_SUSPENDED) || 1465da5577f0SRobert Mustacchi (i40e->i40e_state & I40E_ERROR)) 1466da5577f0SRobert Mustacchi return (NULL); 1467da5577f0SRobert Mustacchi 1468da5577f0SRobert Mustacchi /* 1469da5577f0SRobert Mustacchi * Before we do anything else, we have to make sure that all of the DMA 1470da5577f0SRobert Mustacchi * buffers are synced up and then check to make sure that they're 1471da5577f0SRobert Mustacchi * actually good from an FM perspective. 1472da5577f0SRobert Mustacchi */ 1473da5577f0SRobert Mustacchi I40E_DMA_SYNC(&rxd->rxd_desc_area, DDI_DMA_SYNC_FORKERNEL); 1474da5577f0SRobert Mustacchi if (i40e_check_dma_handle(rxd->rxd_desc_area.dmab_dma_handle) != 1475da5577f0SRobert Mustacchi DDI_FM_OK) { 1476da5577f0SRobert Mustacchi ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 1477da5577f0SRobert Mustacchi atomic_or_32(&i40e->i40e_state, I40E_ERROR); 1478da5577f0SRobert Mustacchi return (NULL); 1479da5577f0SRobert Mustacchi } 1480da5577f0SRobert Mustacchi 1481da5577f0SRobert Mustacchi /* 1482da5577f0SRobert Mustacchi * Prepare our stats. We do a limited amount of processing in both 1483da5577f0SRobert Mustacchi * polling and interrupt context. The limit in interrupt context is 1484da5577f0SRobert Mustacchi * based on frames, in polling context based on bytes. 1485da5577f0SRobert Mustacchi */ 1486da5577f0SRobert Mustacchi rx_bytes = rx_frames = 0; 1487da5577f0SRobert Mustacchi mp_head = NULL; 1488da5577f0SRobert Mustacchi mp_tail = &mp_head; 1489da5577f0SRobert Mustacchi 1490da5577f0SRobert Mustacchi /* 1491da5577f0SRobert Mustacchi * At this point, the descriptor ring is available to check. We'll try 1492da5577f0SRobert Mustacchi * and process until we either run out of poll_bytes or descriptors. 1493da5577f0SRobert Mustacchi */ 1494da5577f0SRobert Mustacchi cur_head = rxd->rxd_desc_next; 1495da5577f0SRobert Mustacchi cur_desc = &rxd->rxd_desc_ring[cur_head]; 1496da5577f0SRobert Mustacchi stword = LE64_TO_CPU(cur_desc->wb.qword1.status_error_len); 1497da5577f0SRobert Mustacchi 1498da5577f0SRobert Mustacchi /* 1499da5577f0SRobert Mustacchi * Note, the primary invariant of this loop should be that cur_head, 1500da5577f0SRobert Mustacchi * cur_desc, and stword always point to the currently processed 1501da5577f0SRobert Mustacchi * descriptor. When we leave the loop, it should point to a descriptor 1502da5577f0SRobert Mustacchi * that HAS NOT been processed. Meaning, that if we haven't consumed the 1503da5577f0SRobert Mustacchi * frame, the descriptor should not be advanced. 1504da5577f0SRobert Mustacchi */ 1505da5577f0SRobert Mustacchi while ((stword & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) != 0) { 1506da5577f0SRobert Mustacchi uint32_t error, eop, plen, ptype; 1507da5577f0SRobert Mustacchi 1508da5577f0SRobert Mustacchi /* 1509da5577f0SRobert Mustacchi * The DD, PLEN, and EOP bits are the only ones that are valid 1510da5577f0SRobert Mustacchi * in every frame. The error information is only valid when EOP 1511da5577f0SRobert Mustacchi * is set in the same frame. 1512da5577f0SRobert Mustacchi * 1513da5577f0SRobert Mustacchi * At this time, because we don't do any LRO or header 1514da5577f0SRobert Mustacchi * splitting. We expect that every frame should have EOP set in 1515da5577f0SRobert Mustacchi * it. When later functionality comes in, we'll want to 1516da5577f0SRobert Mustacchi * re-evaluate this. 1517da5577f0SRobert Mustacchi */ 1518da5577f0SRobert Mustacchi eop = stword & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT); 1519da5577f0SRobert Mustacchi VERIFY(eop != 0); 1520da5577f0SRobert Mustacchi 1521da5577f0SRobert Mustacchi error = (stword & I40E_RXD_QW1_ERROR_MASK) >> 1522da5577f0SRobert Mustacchi I40E_RXD_QW1_ERROR_SHIFT; 1523da5577f0SRobert Mustacchi if (error & I40E_RX_ERR_BITS) { 1524da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_rx_desc_error.value.ui64++; 1525da5577f0SRobert Mustacchi goto discard; 1526da5577f0SRobert Mustacchi } 1527da5577f0SRobert Mustacchi 1528da5577f0SRobert Mustacchi plen = (stword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >> 1529da5577f0SRobert Mustacchi I40E_RXD_QW1_LENGTH_PBUF_SHIFT; 1530da5577f0SRobert Mustacchi 1531da5577f0SRobert Mustacchi ptype = (stword & I40E_RXD_QW1_PTYPE_MASK) >> 1532da5577f0SRobert Mustacchi I40E_RXD_QW1_PTYPE_SHIFT; 1533da5577f0SRobert Mustacchi 1534da5577f0SRobert Mustacchi /* 1535da5577f0SRobert Mustacchi * This packet contains valid data. We should check to see if 1536da5577f0SRobert Mustacchi * we're actually going to consume it based on its length (to 1537da5577f0SRobert Mustacchi * ensure that we don't overshoot our quota). We determine 1538da5577f0SRobert Mustacchi * whether to bcopy or bind the DMA resources based on the size 1539da5577f0SRobert Mustacchi * of the frame. However, if on debug, we allow it to be 1540da5577f0SRobert Mustacchi * overridden for testing purposes. 1541da5577f0SRobert Mustacchi * 1542da5577f0SRobert Mustacchi * We should be smarter about this and do DMA binding for 1543da5577f0SRobert Mustacchi * larger frames, but for now, it's really more important that 1544da5577f0SRobert Mustacchi * we actually just get something simple working. 1545da5577f0SRobert Mustacchi */ 1546da5577f0SRobert Mustacchi 1547da5577f0SRobert Mustacchi /* 1548da5577f0SRobert Mustacchi * Ensure we don't exceed our polling quota by reading this 1549da5577f0SRobert Mustacchi * frame. Note we only bump bytes now, we bump frames later. 1550da5577f0SRobert Mustacchi */ 1551da5577f0SRobert Mustacchi if ((poll_bytes != I40E_POLL_NULL) && 1552da5577f0SRobert Mustacchi (rx_bytes + plen) > poll_bytes) 1553da5577f0SRobert Mustacchi break; 1554da5577f0SRobert Mustacchi rx_bytes += plen; 1555da5577f0SRobert Mustacchi 1556da5577f0SRobert Mustacchi mp = NULL; 1557da5577f0SRobert Mustacchi if (plen >= i40e->i40e_rx_dma_min) 1558da5577f0SRobert Mustacchi mp = i40e_rx_bind(itrq, rxd, cur_head, plen); 1559da5577f0SRobert Mustacchi if (mp == NULL) 1560da5577f0SRobert Mustacchi mp = i40e_rx_copy(itrq, rxd, cur_head, plen); 1561da5577f0SRobert Mustacchi 1562da5577f0SRobert Mustacchi if (mp != NULL) { 1563da5577f0SRobert Mustacchi if (i40e->i40e_rx_hcksum_enable) 1564da5577f0SRobert Mustacchi i40e_rx_hcksum(itrq, mp, stword, error, ptype); 1565da5577f0SRobert Mustacchi *mp_tail = mp; 1566da5577f0SRobert Mustacchi mp_tail = &mp->b_next; 1567da5577f0SRobert Mustacchi } 1568da5577f0SRobert Mustacchi 1569da5577f0SRobert Mustacchi /* 1570da5577f0SRobert Mustacchi * Now we need to prepare this frame for use again. See the 1571da5577f0SRobert Mustacchi * discussion in the big theory statements. 1572da5577f0SRobert Mustacchi * 1573da5577f0SRobert Mustacchi * However, right now we're doing the simple version of this. 1574da5577f0SRobert Mustacchi * Normally what we'd do would depend on whether or not we were 1575da5577f0SRobert Mustacchi * doing DMA binding or bcopying. But because we're always doing 1576da5577f0SRobert Mustacchi * bcopying, we can just always use the current index as a key 1577da5577f0SRobert Mustacchi * for what to do and reassign the buffer based on the ring. 1578da5577f0SRobert Mustacchi */ 1579da5577f0SRobert Mustacchi discard: 1580da5577f0SRobert Mustacchi rcb = rxd->rxd_work_list[cur_head]; 1581da5577f0SRobert Mustacchi cur_desc->read.pkt_addr = 1582da5577f0SRobert Mustacchi CPU_TO_LE64((uintptr_t)rcb->rcb_dma.dmab_dma_address); 1583da5577f0SRobert Mustacchi cur_desc->read.hdr_addr = 0; 1584da5577f0SRobert Mustacchi 1585da5577f0SRobert Mustacchi /* 1586da5577f0SRobert Mustacchi * Finally, update our loop invariants. 1587da5577f0SRobert Mustacchi */ 1588da5577f0SRobert Mustacchi cur_head = i40e_next_desc(cur_head, 1, rxd->rxd_ring_size); 1589da5577f0SRobert Mustacchi cur_desc = &rxd->rxd_desc_ring[cur_head]; 1590da5577f0SRobert Mustacchi stword = LE64_TO_CPU(cur_desc->wb.qword1.status_error_len); 1591da5577f0SRobert Mustacchi 1592da5577f0SRobert Mustacchi /* 1593da5577f0SRobert Mustacchi * To help provide liveness, we limit the amount of data that 1594da5577f0SRobert Mustacchi * we'll end up counting. Note that in these cases, an interrupt 1595da5577f0SRobert Mustacchi * is not dissimilar from a polling request. 1596da5577f0SRobert Mustacchi */ 1597da5577f0SRobert Mustacchi rx_frames++; 1598da5577f0SRobert Mustacchi if (rx_frames > i40e->i40e_rx_limit_per_intr) { 1599da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_rx_intr_limit.value.ui64++; 1600da5577f0SRobert Mustacchi break; 1601da5577f0SRobert Mustacchi } 1602da5577f0SRobert Mustacchi } 1603da5577f0SRobert Mustacchi 1604da5577f0SRobert Mustacchi /* 1605da5577f0SRobert Mustacchi * As we've modified the ring, we need to make sure that we sync the 1606da5577f0SRobert Mustacchi * descriptor ring for the device. Next, we update the hardware and 1607da5577f0SRobert Mustacchi * update our notion of where the head for us to read from hardware is 1608da5577f0SRobert Mustacchi * next. 1609da5577f0SRobert Mustacchi */ 1610da5577f0SRobert Mustacchi I40E_DMA_SYNC(&rxd->rxd_desc_area, DDI_DMA_SYNC_FORDEV); 1611da5577f0SRobert Mustacchi if (i40e_check_dma_handle(rxd->rxd_desc_area.dmab_dma_handle) != 1612da5577f0SRobert Mustacchi DDI_FM_OK) { 1613da5577f0SRobert Mustacchi ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 1614da5577f0SRobert Mustacchi atomic_or_32(&i40e->i40e_state, I40E_ERROR); 1615da5577f0SRobert Mustacchi } 1616da5577f0SRobert Mustacchi 1617da5577f0SRobert Mustacchi if (rx_frames != 0) { 1618da5577f0SRobert Mustacchi uint32_t tail; 1619da5577f0SRobert Mustacchi ddi_acc_handle_t rh = i40e->i40e_osdep_space.ios_reg_handle; 1620da5577f0SRobert Mustacchi rxd->rxd_desc_next = cur_head; 1621da5577f0SRobert Mustacchi tail = i40e_prev_desc(cur_head, 1, rxd->rxd_ring_size); 1622da5577f0SRobert Mustacchi 1623da5577f0SRobert Mustacchi I40E_WRITE_REG(hw, I40E_QRX_TAIL(itrq->itrq_index), tail); 1624da5577f0SRobert Mustacchi if (i40e_check_acc_handle(rh) != DDI_FM_OK) { 1625da5577f0SRobert Mustacchi ddi_fm_service_impact(i40e->i40e_dip, 1626da5577f0SRobert Mustacchi DDI_SERVICE_DEGRADED); 1627da5577f0SRobert Mustacchi atomic_or_32(&i40e->i40e_state, I40E_ERROR); 1628da5577f0SRobert Mustacchi } 1629da5577f0SRobert Mustacchi 1630da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_bytes.value.ui64 += rx_bytes; 1631da5577f0SRobert Mustacchi itrq->itrq_rxstat.irxs_packets.value.ui64 += rx_frames; 1632da5577f0SRobert Mustacchi } 1633da5577f0SRobert Mustacchi 1634da5577f0SRobert Mustacchi #ifdef DEBUG 1635da5577f0SRobert Mustacchi if (rx_frames == 0) { 1636da5577f0SRobert Mustacchi ASSERT(rx_bytes == 0); 1637da5577f0SRobert Mustacchi } 1638da5577f0SRobert Mustacchi #endif 1639da5577f0SRobert Mustacchi 1640da5577f0SRobert Mustacchi return (mp_head); 1641da5577f0SRobert Mustacchi } 1642da5577f0SRobert Mustacchi 1643da5577f0SRobert Mustacchi /* 1644da5577f0SRobert Mustacchi * This function is called by the GLDv3 when it wants to poll on a ring. The 1645da5577f0SRobert Mustacchi * only primary difference from when we call this during an interrupt is that we 1646da5577f0SRobert Mustacchi * have a limit on the number of bytes that we should consume. 1647da5577f0SRobert Mustacchi */ 1648da5577f0SRobert Mustacchi mblk_t * 1649da5577f0SRobert Mustacchi i40e_ring_rx_poll(void *arg, int poll_bytes) 1650da5577f0SRobert Mustacchi { 1651da5577f0SRobert Mustacchi i40e_trqpair_t *itrq = arg; 1652da5577f0SRobert Mustacchi mblk_t *mp; 1653da5577f0SRobert Mustacchi 1654da5577f0SRobert Mustacchi ASSERT(poll_bytes > 0); 1655da5577f0SRobert Mustacchi if (poll_bytes == 0) 1656da5577f0SRobert Mustacchi return (NULL); 1657da5577f0SRobert Mustacchi 1658da5577f0SRobert Mustacchi mutex_enter(&itrq->itrq_rx_lock); 1659da5577f0SRobert Mustacchi mp = i40e_ring_rx(itrq, poll_bytes); 1660da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_rx_lock); 1661da5577f0SRobert Mustacchi 1662da5577f0SRobert Mustacchi return (mp); 1663da5577f0SRobert Mustacchi } 1664da5577f0SRobert Mustacchi 1665da5577f0SRobert Mustacchi /* 1666da5577f0SRobert Mustacchi * This is a structure I wish someone would fill out for me for dorking with the 1667da5577f0SRobert Mustacchi * checksums. When we get some more experience with this, we should go ahead and 1668da5577f0SRobert Mustacchi * consider adding this to MAC. 1669da5577f0SRobert Mustacchi */ 1670da5577f0SRobert Mustacchi typedef enum mac_ether_offload_flags { 1671da5577f0SRobert Mustacchi MEOI_L2INFO_SET = 0x01, 1672da5577f0SRobert Mustacchi MEOI_VLAN_TAGGED = 0x02, 1673da5577f0SRobert Mustacchi MEOI_L3INFO_SET = 0x04, 1674da5577f0SRobert Mustacchi MEOI_L3CKSUM_SET = 0x08, 1675da5577f0SRobert Mustacchi MEOI_L4INFO_SET = 0x10, 1676da5577f0SRobert Mustacchi MEOI_L4CKSUM_SET = 0x20 1677da5577f0SRobert Mustacchi } mac_ether_offload_flags_t; 1678da5577f0SRobert Mustacchi 1679da5577f0SRobert Mustacchi typedef struct mac_ether_offload_info { 1680da5577f0SRobert Mustacchi mac_ether_offload_flags_t meoi_flags; 1681da5577f0SRobert Mustacchi uint8_t meoi_l2hlen; /* How long is the Ethernet header? */ 1682da5577f0SRobert Mustacchi uint16_t meoi_l3proto; /* What's the Ethertype */ 1683da5577f0SRobert Mustacchi uint8_t meoi_l3hlen; /* How long is the header? */ 1684da5577f0SRobert Mustacchi uint8_t meoi_l4proto; /* What is the payload type? */ 1685da5577f0SRobert Mustacchi uint8_t meoi_l4hlen; /* How long is the L4 header */ 1686da5577f0SRobert Mustacchi mblk_t *meoi_l3ckmp; /* Which mblk has the l3 checksum */ 1687da5577f0SRobert Mustacchi off_t meoi_l3ckoff; /* What's the offset to it */ 1688da5577f0SRobert Mustacchi mblk_t *meoi_l4ckmp; /* Which mblk has the L4 checksum */ 1689da5577f0SRobert Mustacchi off_t meoi_l4off; /* What is the offset to it? */ 1690da5577f0SRobert Mustacchi } mac_ether_offload_info_t; 1691da5577f0SRobert Mustacchi 1692da5577f0SRobert Mustacchi /* 1693da5577f0SRobert Mustacchi * This is something that we'd like to make a general MAC function. Before we do 1694da5577f0SRobert Mustacchi * that, we should add support for TSO. 1695da5577f0SRobert Mustacchi * 1696da5577f0SRobert Mustacchi * We should really keep track of our offset and not walk everything every 1697da5577f0SRobert Mustacchi * time. I can't imagine that this will be kind to us at high packet rates; 1698da5577f0SRobert Mustacchi * however, for the moment, let's leave that. 1699da5577f0SRobert Mustacchi * 1700da5577f0SRobert Mustacchi * This walks a message block chain without pulling up to fill in the context 1701da5577f0SRobert Mustacchi * information. Note that the data we care about could be hidden across more 1702da5577f0SRobert Mustacchi * than one mblk_t. 1703da5577f0SRobert Mustacchi */ 1704da5577f0SRobert Mustacchi static int 1705da5577f0SRobert Mustacchi i40e_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out) 1706da5577f0SRobert Mustacchi { 1707da5577f0SRobert Mustacchi size_t mpsize; 1708da5577f0SRobert Mustacchi uint8_t *bp; 1709da5577f0SRobert Mustacchi 1710da5577f0SRobert Mustacchi mpsize = msgsize(mp); 1711da5577f0SRobert Mustacchi /* Check for overflow */ 1712da5577f0SRobert Mustacchi if (off + sizeof (uint16_t) > mpsize) 1713da5577f0SRobert Mustacchi return (-1); 1714da5577f0SRobert Mustacchi 1715da5577f0SRobert Mustacchi mpsize = MBLKL(mp); 1716da5577f0SRobert Mustacchi while (off >= mpsize) { 1717da5577f0SRobert Mustacchi mp = mp->b_cont; 1718da5577f0SRobert Mustacchi off -= mpsize; 1719da5577f0SRobert Mustacchi mpsize = MBLKL(mp); 1720da5577f0SRobert Mustacchi } 1721da5577f0SRobert Mustacchi 1722da5577f0SRobert Mustacchi bp = mp->b_rptr + off; 1723da5577f0SRobert Mustacchi *out = *bp; 1724da5577f0SRobert Mustacchi return (0); 1725da5577f0SRobert Mustacchi 1726da5577f0SRobert Mustacchi } 1727da5577f0SRobert Mustacchi 1728da5577f0SRobert Mustacchi static int 1729da5577f0SRobert Mustacchi i40e_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out) 1730da5577f0SRobert Mustacchi { 1731da5577f0SRobert Mustacchi size_t mpsize; 1732da5577f0SRobert Mustacchi uint8_t *bp; 1733da5577f0SRobert Mustacchi 1734da5577f0SRobert Mustacchi mpsize = msgsize(mp); 1735da5577f0SRobert Mustacchi /* Check for overflow */ 1736da5577f0SRobert Mustacchi if (off + sizeof (uint16_t) > mpsize) 1737da5577f0SRobert Mustacchi return (-1); 1738da5577f0SRobert Mustacchi 1739da5577f0SRobert Mustacchi mpsize = MBLKL(mp); 1740da5577f0SRobert Mustacchi while (off >= mpsize) { 1741da5577f0SRobert Mustacchi mp = mp->b_cont; 1742da5577f0SRobert Mustacchi off -= mpsize; 1743da5577f0SRobert Mustacchi mpsize = MBLKL(mp); 1744da5577f0SRobert Mustacchi } 1745da5577f0SRobert Mustacchi 1746da5577f0SRobert Mustacchi /* 1747da5577f0SRobert Mustacchi * Data is in network order. Note the second byte of data might be in 1748da5577f0SRobert Mustacchi * the next mp. 1749da5577f0SRobert Mustacchi */ 1750da5577f0SRobert Mustacchi bp = mp->b_rptr + off; 1751da5577f0SRobert Mustacchi *out = *bp << 8; 1752da5577f0SRobert Mustacchi if (off + 1 == mpsize) { 1753da5577f0SRobert Mustacchi mp = mp->b_cont; 1754da5577f0SRobert Mustacchi bp = mp->b_rptr; 1755da5577f0SRobert Mustacchi } else { 1756da5577f0SRobert Mustacchi bp++; 1757da5577f0SRobert Mustacchi } 1758da5577f0SRobert Mustacchi 1759da5577f0SRobert Mustacchi *out |= *bp; 1760da5577f0SRobert Mustacchi return (0); 1761da5577f0SRobert Mustacchi 1762da5577f0SRobert Mustacchi } 1763da5577f0SRobert Mustacchi 1764da5577f0SRobert Mustacchi static int 1765da5577f0SRobert Mustacchi mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi) 1766da5577f0SRobert Mustacchi { 1767da5577f0SRobert Mustacchi size_t off; 1768da5577f0SRobert Mustacchi uint16_t ether; 1769da5577f0SRobert Mustacchi uint8_t ipproto, iplen, l4len, maclen; 1770da5577f0SRobert Mustacchi 1771da5577f0SRobert Mustacchi bzero(meoi, sizeof (mac_ether_offload_info_t)); 1772da5577f0SRobert Mustacchi 1773da5577f0SRobert Mustacchi off = offsetof(struct ether_header, ether_type); 1774da5577f0SRobert Mustacchi if (i40e_meoi_get_uint16(mp, off, ðer) != 0) 1775da5577f0SRobert Mustacchi return (-1); 1776da5577f0SRobert Mustacchi 1777da5577f0SRobert Mustacchi if (ether == ETHERTYPE_VLAN) { 1778da5577f0SRobert Mustacchi off = offsetof(struct ether_vlan_header, ether_type); 1779da5577f0SRobert Mustacchi if (i40e_meoi_get_uint16(mp, off, ðer) != 0) 1780da5577f0SRobert Mustacchi return (-1); 1781da5577f0SRobert Mustacchi meoi->meoi_flags |= MEOI_VLAN_TAGGED; 1782da5577f0SRobert Mustacchi maclen = sizeof (struct ether_vlan_header); 1783da5577f0SRobert Mustacchi } else { 1784da5577f0SRobert Mustacchi maclen = sizeof (struct ether_header); 1785da5577f0SRobert Mustacchi } 1786da5577f0SRobert Mustacchi meoi->meoi_flags |= MEOI_L2INFO_SET; 1787da5577f0SRobert Mustacchi meoi->meoi_l2hlen = maclen; 1788da5577f0SRobert Mustacchi meoi->meoi_l3proto = ether; 1789da5577f0SRobert Mustacchi 1790da5577f0SRobert Mustacchi switch (ether) { 1791da5577f0SRobert Mustacchi case ETHERTYPE_IP: 1792da5577f0SRobert Mustacchi /* 1793da5577f0SRobert Mustacchi * For IPv4 we need to get the length of the header, as it can 1794da5577f0SRobert Mustacchi * be variable. 1795da5577f0SRobert Mustacchi */ 1796da5577f0SRobert Mustacchi off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen; 1797da5577f0SRobert Mustacchi if (i40e_meoi_get_uint8(mp, off, &iplen) != 0) 1798da5577f0SRobert Mustacchi return (-1); 1799da5577f0SRobert Mustacchi iplen &= 0x0f; 1800da5577f0SRobert Mustacchi if (iplen < 5 || iplen > 0x0f) 1801da5577f0SRobert Mustacchi return (-1); 1802da5577f0SRobert Mustacchi iplen *= 4; 1803da5577f0SRobert Mustacchi off = offsetof(ipha_t, ipha_protocol) + maclen; 1804da5577f0SRobert Mustacchi if (i40e_meoi_get_uint8(mp, off, &ipproto) == -1) 1805da5577f0SRobert Mustacchi return (-1); 1806da5577f0SRobert Mustacchi break; 1807da5577f0SRobert Mustacchi case ETHERTYPE_IPV6: 1808da5577f0SRobert Mustacchi iplen = 40; 1809da5577f0SRobert Mustacchi off = offsetof(ip6_t, ip6_nxt) + maclen; 1810da5577f0SRobert Mustacchi if (i40e_meoi_get_uint8(mp, off, &ipproto) == -1) 1811da5577f0SRobert Mustacchi return (-1); 1812da5577f0SRobert Mustacchi break; 1813da5577f0SRobert Mustacchi default: 1814da5577f0SRobert Mustacchi return (0); 1815da5577f0SRobert Mustacchi } 1816da5577f0SRobert Mustacchi meoi->meoi_l3hlen = iplen; 1817da5577f0SRobert Mustacchi meoi->meoi_l4proto = ipproto; 1818da5577f0SRobert Mustacchi meoi->meoi_flags |= MEOI_L3INFO_SET; 1819da5577f0SRobert Mustacchi 1820da5577f0SRobert Mustacchi switch (ipproto) { 1821da5577f0SRobert Mustacchi case IPPROTO_TCP: 1822da5577f0SRobert Mustacchi off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen; 1823da5577f0SRobert Mustacchi if (i40e_meoi_get_uint8(mp, off, &l4len) == -1) 1824da5577f0SRobert Mustacchi return (-1); 1825da5577f0SRobert Mustacchi l4len = (l4len & 0xf0) >> 4; 1826da5577f0SRobert Mustacchi if (l4len < 5 || l4len > 0xf) 1827da5577f0SRobert Mustacchi return (-1); 1828da5577f0SRobert Mustacchi l4len *= 4; 1829da5577f0SRobert Mustacchi break; 1830da5577f0SRobert Mustacchi case IPPROTO_UDP: 1831da5577f0SRobert Mustacchi l4len = sizeof (struct udphdr); 1832da5577f0SRobert Mustacchi break; 1833da5577f0SRobert Mustacchi case IPPROTO_SCTP: 1834da5577f0SRobert Mustacchi l4len = sizeof (sctp_hdr_t); 1835da5577f0SRobert Mustacchi break; 1836da5577f0SRobert Mustacchi default: 1837da5577f0SRobert Mustacchi return (0); 1838da5577f0SRobert Mustacchi } 1839da5577f0SRobert Mustacchi 1840da5577f0SRobert Mustacchi meoi->meoi_l4hlen = l4len; 1841da5577f0SRobert Mustacchi meoi->meoi_flags |= MEOI_L4INFO_SET; 1842da5577f0SRobert Mustacchi return (0); 1843da5577f0SRobert Mustacchi } 1844da5577f0SRobert Mustacchi 1845da5577f0SRobert Mustacchi /* 1846da5577f0SRobert Mustacchi * Attempt to put togther the information we'll need to feed into a descriptor 1847da5577f0SRobert Mustacchi * to properly program the hardware for checksum offload as well as the 1848da5577f0SRobert Mustacchi * generally required flags. 1849da5577f0SRobert Mustacchi * 1850*8d5069bcSRyan Zezeski * The i40e_tx_context_t`itc_data_cmdflags contains the set of flags we need to 1851*8d5069bcSRyan Zezeski * 'or' into the descriptor based on the checksum flags for this mblk_t and the 1852da5577f0SRobert Mustacchi * actual information we care about. 1853*8d5069bcSRyan Zezeski * 1854*8d5069bcSRyan Zezeski * If the mblk requires LSO then we'll also gather the information that will be 1855*8d5069bcSRyan Zezeski * used to construct the Transmit Context Descriptor. 1856da5577f0SRobert Mustacchi */ 1857da5577f0SRobert Mustacchi static int 1858da5577f0SRobert Mustacchi i40e_tx_context(i40e_t *i40e, i40e_trqpair_t *itrq, mblk_t *mp, 1859*8d5069bcSRyan Zezeski mac_ether_offload_info_t *meo, i40e_tx_context_t *tctx) 1860da5577f0SRobert Mustacchi { 1861*8d5069bcSRyan Zezeski uint32_t chkflags, start, mss, lsoflags; 1862da5577f0SRobert Mustacchi i40e_txq_stat_t *txs = &itrq->itrq_txstat; 1863da5577f0SRobert Mustacchi 1864da5577f0SRobert Mustacchi bzero(tctx, sizeof (i40e_tx_context_t)); 1865da5577f0SRobert Mustacchi 1866da5577f0SRobert Mustacchi if (i40e->i40e_tx_hcksum_enable != B_TRUE) 1867da5577f0SRobert Mustacchi return (0); 1868da5577f0SRobert Mustacchi 1869*8d5069bcSRyan Zezeski mac_hcksum_get(mp, &start, NULL, NULL, NULL, &chkflags); 1870*8d5069bcSRyan Zezeski mac_lso_get(mp, &mss, &lsoflags); 1871da5577f0SRobert Mustacchi 1872*8d5069bcSRyan Zezeski if (chkflags == 0 && lsoflags == 0) 1873*8d5069bcSRyan Zezeski return (0); 1874da5577f0SRobert Mustacchi 1875da5577f0SRobert Mustacchi /* 1876da5577f0SRobert Mustacchi * Have we been asked to checksum an IPv4 header. If so, verify that we 1877da5577f0SRobert Mustacchi * have sufficient information and then set the proper fields in the 1878da5577f0SRobert Mustacchi * command structure. 1879da5577f0SRobert Mustacchi */ 1880*8d5069bcSRyan Zezeski if (chkflags & HCK_IPV4_HDRCKSUM) { 1881*8d5069bcSRyan Zezeski if ((meo->meoi_flags & MEOI_L2INFO_SET) == 0) { 1882da5577f0SRobert Mustacchi txs->itxs_hck_nol2info.value.ui64++; 1883da5577f0SRobert Mustacchi return (-1); 1884da5577f0SRobert Mustacchi } 1885*8d5069bcSRyan Zezeski if ((meo->meoi_flags & MEOI_L3INFO_SET) == 0) { 1886da5577f0SRobert Mustacchi txs->itxs_hck_nol3info.value.ui64++; 1887da5577f0SRobert Mustacchi return (-1); 1888da5577f0SRobert Mustacchi } 1889*8d5069bcSRyan Zezeski if (meo->meoi_l3proto != ETHERTYPE_IP) { 1890da5577f0SRobert Mustacchi txs->itxs_hck_badl3.value.ui64++; 1891da5577f0SRobert Mustacchi return (-1); 1892da5577f0SRobert Mustacchi } 1893*8d5069bcSRyan Zezeski tctx->itc_data_cmdflags |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM; 1894*8d5069bcSRyan Zezeski tctx->itc_data_offsets |= (meo->meoi_l2hlen >> 1) << 1895da5577f0SRobert Mustacchi I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 1896*8d5069bcSRyan Zezeski tctx->itc_data_offsets |= (meo->meoi_l3hlen >> 2) << 1897da5577f0SRobert Mustacchi I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1898da5577f0SRobert Mustacchi } 1899da5577f0SRobert Mustacchi 1900da5577f0SRobert Mustacchi /* 1901da5577f0SRobert Mustacchi * We've been asked to provide an L4 header, first, set up the IP 1902da5577f0SRobert Mustacchi * information in the descriptor if we haven't already before moving 1903da5577f0SRobert Mustacchi * onto seeing if we have enough information for the L4 checksum 1904da5577f0SRobert Mustacchi * offload. 1905da5577f0SRobert Mustacchi */ 1906*8d5069bcSRyan Zezeski if (chkflags & HCK_PARTIALCKSUM) { 1907*8d5069bcSRyan Zezeski if ((meo->meoi_flags & MEOI_L4INFO_SET) == 0) { 1908da5577f0SRobert Mustacchi txs->itxs_hck_nol4info.value.ui64++; 1909da5577f0SRobert Mustacchi return (-1); 1910da5577f0SRobert Mustacchi } 1911da5577f0SRobert Mustacchi 1912*8d5069bcSRyan Zezeski if (!(chkflags & HCK_IPV4_HDRCKSUM)) { 1913*8d5069bcSRyan Zezeski if ((meo->meoi_flags & MEOI_L2INFO_SET) == 0) { 1914da5577f0SRobert Mustacchi txs->itxs_hck_nol2info.value.ui64++; 1915da5577f0SRobert Mustacchi return (-1); 1916da5577f0SRobert Mustacchi } 1917*8d5069bcSRyan Zezeski if ((meo->meoi_flags & MEOI_L3INFO_SET) == 0) { 1918da5577f0SRobert Mustacchi txs->itxs_hck_nol3info.value.ui64++; 1919da5577f0SRobert Mustacchi return (-1); 1920da5577f0SRobert Mustacchi } 1921da5577f0SRobert Mustacchi 1922*8d5069bcSRyan Zezeski if (meo->meoi_l3proto == ETHERTYPE_IP) { 1923*8d5069bcSRyan Zezeski tctx->itc_data_cmdflags |= 1924da5577f0SRobert Mustacchi I40E_TX_DESC_CMD_IIPT_IPV4; 1925*8d5069bcSRyan Zezeski } else if (meo->meoi_l3proto == ETHERTYPE_IPV6) { 1926*8d5069bcSRyan Zezeski tctx->itc_data_cmdflags |= 1927da5577f0SRobert Mustacchi I40E_TX_DESC_CMD_IIPT_IPV6; 1928da5577f0SRobert Mustacchi } else { 1929da5577f0SRobert Mustacchi txs->itxs_hck_badl3.value.ui64++; 1930da5577f0SRobert Mustacchi return (-1); 1931da5577f0SRobert Mustacchi } 1932*8d5069bcSRyan Zezeski tctx->itc_data_offsets |= (meo->meoi_l2hlen >> 1) << 1933da5577f0SRobert Mustacchi I40E_TX_DESC_LENGTH_MACLEN_SHIFT; 1934*8d5069bcSRyan Zezeski tctx->itc_data_offsets |= (meo->meoi_l3hlen >> 2) << 1935da5577f0SRobert Mustacchi I40E_TX_DESC_LENGTH_IPLEN_SHIFT; 1936da5577f0SRobert Mustacchi } 1937da5577f0SRobert Mustacchi 1938*8d5069bcSRyan Zezeski switch (meo->meoi_l4proto) { 1939da5577f0SRobert Mustacchi case IPPROTO_TCP: 1940*8d5069bcSRyan Zezeski tctx->itc_data_cmdflags |= 1941*8d5069bcSRyan Zezeski I40E_TX_DESC_CMD_L4T_EOFT_TCP; 1942da5577f0SRobert Mustacchi break; 1943da5577f0SRobert Mustacchi case IPPROTO_UDP: 1944*8d5069bcSRyan Zezeski tctx->itc_data_cmdflags |= 1945*8d5069bcSRyan Zezeski I40E_TX_DESC_CMD_L4T_EOFT_UDP; 1946da5577f0SRobert Mustacchi break; 1947da5577f0SRobert Mustacchi case IPPROTO_SCTP: 1948*8d5069bcSRyan Zezeski tctx->itc_data_cmdflags |= 1949*8d5069bcSRyan Zezeski I40E_TX_DESC_CMD_L4T_EOFT_SCTP; 1950da5577f0SRobert Mustacchi break; 1951da5577f0SRobert Mustacchi default: 1952da5577f0SRobert Mustacchi txs->itxs_hck_badl4.value.ui64++; 1953da5577f0SRobert Mustacchi return (-1); 1954da5577f0SRobert Mustacchi } 1955da5577f0SRobert Mustacchi 1956*8d5069bcSRyan Zezeski tctx->itc_data_offsets |= (meo->meoi_l4hlen >> 2) << 1957da5577f0SRobert Mustacchi I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT; 1958da5577f0SRobert Mustacchi } 1959da5577f0SRobert Mustacchi 1960*8d5069bcSRyan Zezeski if (lsoflags & HW_LSO) { 1961*8d5069bcSRyan Zezeski /* 1962*8d5069bcSRyan Zezeski * LSO requires that checksum offloads are enabled. If for 1963*8d5069bcSRyan Zezeski * some reason they're not we bail out with an error. 1964*8d5069bcSRyan Zezeski */ 1965*8d5069bcSRyan Zezeski if ((chkflags & HCK_IPV4_HDRCKSUM) == 0 || 1966*8d5069bcSRyan Zezeski (chkflags & HCK_PARTIALCKSUM) == 0) { 1967*8d5069bcSRyan Zezeski txs->itxs_lso_nohck.value.ui64++; 1968*8d5069bcSRyan Zezeski return (-1); 1969*8d5069bcSRyan Zezeski } 1970*8d5069bcSRyan Zezeski 1971*8d5069bcSRyan Zezeski tctx->itc_ctx_cmdflags |= I40E_TX_CTX_DESC_TSO; 1972*8d5069bcSRyan Zezeski tctx->itc_ctx_mss = mss; 1973*8d5069bcSRyan Zezeski tctx->itc_ctx_tsolen = msgsize(mp) - 1974*8d5069bcSRyan Zezeski (meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen); 1975*8d5069bcSRyan Zezeski } 1976*8d5069bcSRyan Zezeski 1977da5577f0SRobert Mustacchi return (0); 1978da5577f0SRobert Mustacchi } 1979da5577f0SRobert Mustacchi 1980da5577f0SRobert Mustacchi static void 1981da5577f0SRobert Mustacchi i40e_tcb_free(i40e_trqpair_t *itrq, i40e_tx_control_block_t *tcb) 1982da5577f0SRobert Mustacchi { 1983da5577f0SRobert Mustacchi ASSERT(tcb != NULL); 1984da5577f0SRobert Mustacchi 1985da5577f0SRobert Mustacchi mutex_enter(&itrq->itrq_tcb_lock); 1986da5577f0SRobert Mustacchi ASSERT(itrq->itrq_tcb_free < itrq->itrq_tx_free_list_size); 1987da5577f0SRobert Mustacchi itrq->itrq_tcb_free_list[itrq->itrq_tcb_free] = tcb; 1988da5577f0SRobert Mustacchi itrq->itrq_tcb_free++; 1989da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tcb_lock); 1990da5577f0SRobert Mustacchi } 1991da5577f0SRobert Mustacchi 1992da5577f0SRobert Mustacchi static i40e_tx_control_block_t * 1993da5577f0SRobert Mustacchi i40e_tcb_alloc(i40e_trqpair_t *itrq) 1994da5577f0SRobert Mustacchi { 1995da5577f0SRobert Mustacchi i40e_tx_control_block_t *ret; 1996da5577f0SRobert Mustacchi 1997da5577f0SRobert Mustacchi mutex_enter(&itrq->itrq_tcb_lock); 1998da5577f0SRobert Mustacchi if (itrq->itrq_tcb_free == 0) { 1999da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tcb_lock); 2000da5577f0SRobert Mustacchi return (NULL); 2001da5577f0SRobert Mustacchi } 2002da5577f0SRobert Mustacchi 2003da5577f0SRobert Mustacchi itrq->itrq_tcb_free--; 2004da5577f0SRobert Mustacchi ret = itrq->itrq_tcb_free_list[itrq->itrq_tcb_free]; 2005da5577f0SRobert Mustacchi itrq->itrq_tcb_free_list[itrq->itrq_tcb_free] = NULL; 2006da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tcb_lock); 2007da5577f0SRobert Mustacchi 2008da5577f0SRobert Mustacchi ASSERT(ret != NULL); 2009da5577f0SRobert Mustacchi return (ret); 2010da5577f0SRobert Mustacchi } 2011da5577f0SRobert Mustacchi 2012da5577f0SRobert Mustacchi /* 2013da5577f0SRobert Mustacchi * This should be used to free any DMA resources, associated mblk_t's, etc. It's 2014da5577f0SRobert Mustacchi * used as part of recycling the message blocks when we have either an interrupt 2015da5577f0SRobert Mustacchi * or other activity that indicates that we need to take a look. 2016da5577f0SRobert Mustacchi */ 2017da5577f0SRobert Mustacchi static void 2018da5577f0SRobert Mustacchi i40e_tcb_reset(i40e_tx_control_block_t *tcb) 2019da5577f0SRobert Mustacchi { 2020da5577f0SRobert Mustacchi switch (tcb->tcb_type) { 2021da5577f0SRobert Mustacchi case I40E_TX_COPY: 2022da5577f0SRobert Mustacchi tcb->tcb_dma.dmab_len = 0; 2023da5577f0SRobert Mustacchi break; 2024da5577f0SRobert Mustacchi case I40E_TX_DMA: 2025*8d5069bcSRyan Zezeski if (tcb->tcb_used_lso == B_TRUE && tcb->tcb_bind_ncookies > 0) 2026*8d5069bcSRyan Zezeski (void) ddi_dma_unbind_handle(tcb->tcb_lso_dma_handle); 2027*8d5069bcSRyan Zezeski else if (tcb->tcb_bind_ncookies > 0) 2028da5577f0SRobert Mustacchi (void) ddi_dma_unbind_handle(tcb->tcb_dma_handle); 2029*8d5069bcSRyan Zezeski if (tcb->tcb_bind_info != NULL) { 2030*8d5069bcSRyan Zezeski kmem_free(tcb->tcb_bind_info, 2031*8d5069bcSRyan Zezeski tcb->tcb_bind_ncookies * 2032*8d5069bcSRyan Zezeski sizeof (struct i40e_dma_bind_info)); 2033*8d5069bcSRyan Zezeski } 2034*8d5069bcSRyan Zezeski tcb->tcb_bind_info = NULL; 2035*8d5069bcSRyan Zezeski tcb->tcb_bind_ncookies = 0; 2036*8d5069bcSRyan Zezeski tcb->tcb_used_lso = B_FALSE; 2037*8d5069bcSRyan Zezeski break; 2038*8d5069bcSRyan Zezeski case I40E_TX_DESC: 2039da5577f0SRobert Mustacchi break; 2040da5577f0SRobert Mustacchi case I40E_TX_NONE: 2041da5577f0SRobert Mustacchi /* Cast to pacify lint */ 2042da5577f0SRobert Mustacchi panic("trying to free tcb %p with bad type none", (void *)tcb); 2043da5577f0SRobert Mustacchi default: 2044da5577f0SRobert Mustacchi panic("unknown i40e tcb type: %d", tcb->tcb_type); 2045da5577f0SRobert Mustacchi } 2046da5577f0SRobert Mustacchi 2047da5577f0SRobert Mustacchi tcb->tcb_type = I40E_TX_NONE; 2048*8d5069bcSRyan Zezeski if (tcb->tcb_mp != NULL) { 2049da5577f0SRobert Mustacchi freemsg(tcb->tcb_mp); 2050da5577f0SRobert Mustacchi tcb->tcb_mp = NULL; 2051*8d5069bcSRyan Zezeski } 2052da5577f0SRobert Mustacchi tcb->tcb_next = NULL; 2053da5577f0SRobert Mustacchi } 2054da5577f0SRobert Mustacchi 2055da5577f0SRobert Mustacchi /* 2056da5577f0SRobert Mustacchi * This is called as part of shutting down to clean up all outstanding 2057da5577f0SRobert Mustacchi * descriptors. Similar to recycle, except we don't re-arm anything and instead 2058da5577f0SRobert Mustacchi * just return control blocks to the free list. 2059da5577f0SRobert Mustacchi */ 2060da5577f0SRobert Mustacchi void 2061da5577f0SRobert Mustacchi i40e_tx_cleanup_ring(i40e_trqpair_t *itrq) 2062da5577f0SRobert Mustacchi { 2063da5577f0SRobert Mustacchi uint32_t index; 2064da5577f0SRobert Mustacchi 2065da5577f0SRobert Mustacchi ASSERT(MUTEX_HELD(&itrq->itrq_tx_lock)); 2066da5577f0SRobert Mustacchi ASSERT(itrq->itrq_desc_free <= itrq->itrq_tx_ring_size); 2067da5577f0SRobert Mustacchi 2068da5577f0SRobert Mustacchi /* 2069da5577f0SRobert Mustacchi * Because we should have shut down the chip at this point, it should be 2070da5577f0SRobert Mustacchi * safe to just clean up all the entries between our head and tail. 2071da5577f0SRobert Mustacchi */ 2072da5577f0SRobert Mustacchi #ifdef DEBUG 2073da5577f0SRobert Mustacchi index = I40E_READ_REG(&itrq->itrq_i40e->i40e_hw_space, 2074da5577f0SRobert Mustacchi I40E_QTX_ENA(itrq->itrq_index)); 2075da5577f0SRobert Mustacchi VERIFY0(index & (I40E_QTX_ENA_QENA_REQ_MASK | 2076da5577f0SRobert Mustacchi I40E_QTX_ENA_QENA_STAT_MASK)); 2077da5577f0SRobert Mustacchi #endif 2078da5577f0SRobert Mustacchi 2079da5577f0SRobert Mustacchi index = itrq->itrq_desc_head; 2080da5577f0SRobert Mustacchi while (itrq->itrq_desc_free < itrq->itrq_tx_ring_size) { 2081da5577f0SRobert Mustacchi i40e_tx_control_block_t *tcb; 2082da5577f0SRobert Mustacchi 2083da5577f0SRobert Mustacchi tcb = itrq->itrq_tcb_work_list[index]; 2084*8d5069bcSRyan Zezeski if (tcb != NULL) { 2085da5577f0SRobert Mustacchi itrq->itrq_tcb_work_list[index] = NULL; 2086da5577f0SRobert Mustacchi i40e_tcb_reset(tcb); 2087da5577f0SRobert Mustacchi i40e_tcb_free(itrq, tcb); 2088*8d5069bcSRyan Zezeski } 2089da5577f0SRobert Mustacchi 2090da5577f0SRobert Mustacchi bzero(&itrq->itrq_desc_ring[index], sizeof (i40e_tx_desc_t)); 2091da5577f0SRobert Mustacchi index = i40e_next_desc(index, 1, itrq->itrq_tx_ring_size); 2092da5577f0SRobert Mustacchi itrq->itrq_desc_free++; 2093da5577f0SRobert Mustacchi } 2094da5577f0SRobert Mustacchi 2095da5577f0SRobert Mustacchi ASSERT(index == itrq->itrq_desc_tail); 2096da5577f0SRobert Mustacchi itrq->itrq_desc_head = index; 2097da5577f0SRobert Mustacchi } 2098da5577f0SRobert Mustacchi 2099da5577f0SRobert Mustacchi /* 2100da5577f0SRobert Mustacchi * We're here either by hook or by crook. We need to see if there are transmit 2101da5577f0SRobert Mustacchi * descriptors available for us to go and clean up and return to the hardware. 2102da5577f0SRobert Mustacchi * We may also be blocked, and if so, we should make sure that we let it know 2103da5577f0SRobert Mustacchi * we're good to go. 2104da5577f0SRobert Mustacchi */ 2105da5577f0SRobert Mustacchi void 2106da5577f0SRobert Mustacchi i40e_tx_recycle_ring(i40e_trqpair_t *itrq) 2107da5577f0SRobert Mustacchi { 2108da5577f0SRobert Mustacchi uint32_t wbhead, toclean, count; 2109da5577f0SRobert Mustacchi i40e_tx_control_block_t *tcbhead; 2110da5577f0SRobert Mustacchi i40e_t *i40e = itrq->itrq_i40e; 2111*8d5069bcSRyan Zezeski uint_t desc_per_tcb, i; 2112da5577f0SRobert Mustacchi 2113da5577f0SRobert Mustacchi mutex_enter(&itrq->itrq_tx_lock); 2114da5577f0SRobert Mustacchi 2115da5577f0SRobert Mustacchi ASSERT(itrq->itrq_desc_free <= itrq->itrq_tx_ring_size); 2116da5577f0SRobert Mustacchi if (itrq->itrq_desc_free == itrq->itrq_tx_ring_size) { 2117da5577f0SRobert Mustacchi if (itrq->itrq_tx_blocked == B_TRUE) { 2118da5577f0SRobert Mustacchi itrq->itrq_tx_blocked = B_FALSE; 2119da5577f0SRobert Mustacchi mac_tx_ring_update(i40e->i40e_mac_hdl, 2120da5577f0SRobert Mustacchi itrq->itrq_mactxring); 2121da5577f0SRobert Mustacchi itrq->itrq_txstat.itxs_num_unblocked.value.ui64++; 2122da5577f0SRobert Mustacchi } 2123da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tx_lock); 2124da5577f0SRobert Mustacchi return; 2125da5577f0SRobert Mustacchi } 2126da5577f0SRobert Mustacchi 2127da5577f0SRobert Mustacchi /* 2128da5577f0SRobert Mustacchi * Now we need to try and see if there's anything available. The driver 2129da5577f0SRobert Mustacchi * will write to the head location and it guarantees that it does not 2130da5577f0SRobert Mustacchi * use relaxed ordering. 2131da5577f0SRobert Mustacchi */ 2132da5577f0SRobert Mustacchi VERIFY0(ddi_dma_sync(itrq->itrq_desc_area.dmab_dma_handle, 2133da5577f0SRobert Mustacchi (uintptr_t)itrq->itrq_desc_wbhead, 2134da5577f0SRobert Mustacchi sizeof (uint32_t), DDI_DMA_SYNC_FORKERNEL)); 2135da5577f0SRobert Mustacchi 2136da5577f0SRobert Mustacchi if (i40e_check_dma_handle(itrq->itrq_desc_area.dmab_dma_handle) != 2137da5577f0SRobert Mustacchi DDI_FM_OK) { 2138da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tx_lock); 2139da5577f0SRobert Mustacchi ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 2140da5577f0SRobert Mustacchi atomic_or_32(&i40e->i40e_state, I40E_ERROR); 2141da5577f0SRobert Mustacchi return; 2142da5577f0SRobert Mustacchi } 2143da5577f0SRobert Mustacchi 2144da5577f0SRobert Mustacchi wbhead = *itrq->itrq_desc_wbhead; 2145da5577f0SRobert Mustacchi toclean = itrq->itrq_desc_head; 2146da5577f0SRobert Mustacchi count = 0; 2147da5577f0SRobert Mustacchi tcbhead = NULL; 2148da5577f0SRobert Mustacchi 2149da5577f0SRobert Mustacchi while (toclean != wbhead) { 2150da5577f0SRobert Mustacchi i40e_tx_control_block_t *tcb; 2151da5577f0SRobert Mustacchi 2152da5577f0SRobert Mustacchi tcb = itrq->itrq_tcb_work_list[toclean]; 2153da5577f0SRobert Mustacchi itrq->itrq_tcb_work_list[toclean] = NULL; 2154da5577f0SRobert Mustacchi ASSERT(tcb != NULL); 2155da5577f0SRobert Mustacchi tcb->tcb_next = tcbhead; 2156da5577f0SRobert Mustacchi tcbhead = tcb; 2157da5577f0SRobert Mustacchi 2158da5577f0SRobert Mustacchi /* 2159*8d5069bcSRyan Zezeski * In the DMA bind case, there may not necessarily be a 1:1 2160*8d5069bcSRyan Zezeski * mapping between tcb's and descriptors. If the tcb type 2161*8d5069bcSRyan Zezeski * indicates a DMA binding then check the number of DMA 2162*8d5069bcSRyan Zezeski * cookies to determine how many entries to clean in the 2163*8d5069bcSRyan Zezeski * descriptor ring. 2164*8d5069bcSRyan Zezeski */ 2165*8d5069bcSRyan Zezeski if (tcb->tcb_type == I40E_TX_DMA) 2166*8d5069bcSRyan Zezeski desc_per_tcb = tcb->tcb_bind_ncookies; 2167*8d5069bcSRyan Zezeski else 2168*8d5069bcSRyan Zezeski desc_per_tcb = 1; 2169*8d5069bcSRyan Zezeski 2170*8d5069bcSRyan Zezeski for (i = 0; i < desc_per_tcb; i++) { 2171*8d5069bcSRyan Zezeski /* 2172da5577f0SRobert Mustacchi * We zero this out for sanity purposes. 2173da5577f0SRobert Mustacchi */ 2174*8d5069bcSRyan Zezeski bzero(&itrq->itrq_desc_ring[toclean], 2175*8d5069bcSRyan Zezeski sizeof (i40e_tx_desc_t)); 2176*8d5069bcSRyan Zezeski toclean = i40e_next_desc(toclean, 1, 2177*8d5069bcSRyan Zezeski itrq->itrq_tx_ring_size); 2178da5577f0SRobert Mustacchi count++; 2179da5577f0SRobert Mustacchi } 2180*8d5069bcSRyan Zezeski } 2181da5577f0SRobert Mustacchi 2182da5577f0SRobert Mustacchi itrq->itrq_desc_head = wbhead; 2183da5577f0SRobert Mustacchi itrq->itrq_desc_free += count; 2184da5577f0SRobert Mustacchi itrq->itrq_txstat.itxs_recycled.value.ui64 += count; 2185da5577f0SRobert Mustacchi ASSERT(itrq->itrq_desc_free <= itrq->itrq_tx_ring_size); 2186da5577f0SRobert Mustacchi 2187da5577f0SRobert Mustacchi if (itrq->itrq_tx_blocked == B_TRUE && 2188da5577f0SRobert Mustacchi itrq->itrq_desc_free > i40e->i40e_tx_block_thresh) { 2189da5577f0SRobert Mustacchi itrq->itrq_tx_blocked = B_FALSE; 2190da5577f0SRobert Mustacchi 2191da5577f0SRobert Mustacchi mac_tx_ring_update(i40e->i40e_mac_hdl, itrq->itrq_mactxring); 2192da5577f0SRobert Mustacchi itrq->itrq_txstat.itxs_num_unblocked.value.ui64++; 2193da5577f0SRobert Mustacchi } 2194da5577f0SRobert Mustacchi 2195da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tx_lock); 2196da5577f0SRobert Mustacchi 2197da5577f0SRobert Mustacchi /* 2198da5577f0SRobert Mustacchi * Now clean up the tcb. 2199da5577f0SRobert Mustacchi */ 2200da5577f0SRobert Mustacchi while (tcbhead != NULL) { 2201da5577f0SRobert Mustacchi i40e_tx_control_block_t *tcb = tcbhead; 2202da5577f0SRobert Mustacchi 2203da5577f0SRobert Mustacchi tcbhead = tcb->tcb_next; 2204da5577f0SRobert Mustacchi i40e_tcb_reset(tcb); 2205da5577f0SRobert Mustacchi i40e_tcb_free(itrq, tcb); 2206da5577f0SRobert Mustacchi } 2207da5577f0SRobert Mustacchi 2208da5577f0SRobert Mustacchi DTRACE_PROBE2(i40e__recycle, i40e_trqpair_t *, itrq, uint32_t, count); 2209da5577f0SRobert Mustacchi } 2210da5577f0SRobert Mustacchi 2211*8d5069bcSRyan Zezeski static void 2212*8d5069bcSRyan Zezeski i40e_tx_copy_fragment(i40e_tx_control_block_t *tcb, const mblk_t *mp, 2213*8d5069bcSRyan Zezeski const size_t off, const size_t len) 2214*8d5069bcSRyan Zezeski { 2215*8d5069bcSRyan Zezeski const void *soff = mp->b_rptr + off; 2216*8d5069bcSRyan Zezeski void *doff = tcb->tcb_dma.dmab_address + tcb->tcb_dma.dmab_len; 2217*8d5069bcSRyan Zezeski 2218*8d5069bcSRyan Zezeski ASSERT3U(len, >, 0); 2219*8d5069bcSRyan Zezeski ASSERT3P(soff, >=, mp->b_rptr); 2220*8d5069bcSRyan Zezeski ASSERT3P(soff, <=, mp->b_wptr); 2221*8d5069bcSRyan Zezeski ASSERT3U(len, <=, MBLKL(mp)); 2222*8d5069bcSRyan Zezeski ASSERT3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr); 2223*8d5069bcSRyan Zezeski ASSERT3U(tcb->tcb_dma.dmab_size - tcb->tcb_dma.dmab_len, >=, len); 2224*8d5069bcSRyan Zezeski bcopy(soff, doff, len); 2225*8d5069bcSRyan Zezeski tcb->tcb_type = I40E_TX_COPY; 2226*8d5069bcSRyan Zezeski tcb->tcb_dma.dmab_len += len; 2227*8d5069bcSRyan Zezeski I40E_DMA_SYNC(&tcb->tcb_dma, DDI_DMA_SYNC_FORDEV); 2228*8d5069bcSRyan Zezeski } 2229*8d5069bcSRyan Zezeski 2230*8d5069bcSRyan Zezeski static i40e_tx_control_block_t * 2231*8d5069bcSRyan Zezeski i40e_tx_bind_fragment(i40e_trqpair_t *itrq, const mblk_t *mp, 2232*8d5069bcSRyan Zezeski size_t off, boolean_t use_lso) 2233*8d5069bcSRyan Zezeski { 2234*8d5069bcSRyan Zezeski ddi_dma_handle_t dma_handle; 2235*8d5069bcSRyan Zezeski ddi_dma_cookie_t dma_cookie; 2236*8d5069bcSRyan Zezeski uint_t i = 0, ncookies = 0, dmaflags; 2237*8d5069bcSRyan Zezeski i40e_tx_control_block_t *tcb; 2238*8d5069bcSRyan Zezeski i40e_txq_stat_t *txs = &itrq->itrq_txstat; 2239*8d5069bcSRyan Zezeski 2240*8d5069bcSRyan Zezeski if ((tcb = i40e_tcb_alloc(itrq)) == NULL) { 2241*8d5069bcSRyan Zezeski txs->itxs_err_notcb.value.ui64++; 2242*8d5069bcSRyan Zezeski return (NULL); 2243*8d5069bcSRyan Zezeski } 2244*8d5069bcSRyan Zezeski tcb->tcb_type = I40E_TX_DMA; 2245*8d5069bcSRyan Zezeski 2246*8d5069bcSRyan Zezeski if (use_lso == B_TRUE) 2247*8d5069bcSRyan Zezeski dma_handle = tcb->tcb_lso_dma_handle; 2248*8d5069bcSRyan Zezeski else 2249*8d5069bcSRyan Zezeski dma_handle = tcb->tcb_dma_handle; 2250*8d5069bcSRyan Zezeski 2251*8d5069bcSRyan Zezeski dmaflags = DDI_DMA_WRITE | DDI_DMA_STREAMING; 2252*8d5069bcSRyan Zezeski if (ddi_dma_addr_bind_handle(dma_handle, NULL, 2253*8d5069bcSRyan Zezeski (caddr_t)(mp->b_rptr + off), MBLKL(mp) - off, dmaflags, 2254*8d5069bcSRyan Zezeski DDI_DMA_DONTWAIT, NULL, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) { 2255*8d5069bcSRyan Zezeski txs->itxs_bind_fails.value.ui64++; 2256*8d5069bcSRyan Zezeski goto bffail; 2257*8d5069bcSRyan Zezeski } 2258*8d5069bcSRyan Zezeski 2259*8d5069bcSRyan Zezeski tcb->tcb_bind_ncookies = ncookies; 2260*8d5069bcSRyan Zezeski tcb->tcb_used_lso = use_lso; 2261*8d5069bcSRyan Zezeski 2262*8d5069bcSRyan Zezeski tcb->tcb_bind_info = 2263*8d5069bcSRyan Zezeski kmem_zalloc(ncookies * sizeof (struct i40e_dma_bind_info), 2264*8d5069bcSRyan Zezeski KM_NOSLEEP); 2265*8d5069bcSRyan Zezeski if (tcb->tcb_bind_info == NULL) 2266*8d5069bcSRyan Zezeski goto bffail; 2267*8d5069bcSRyan Zezeski 2268*8d5069bcSRyan Zezeski while (i < ncookies) { 2269*8d5069bcSRyan Zezeski if (i > 0) 2270*8d5069bcSRyan Zezeski ddi_dma_nextcookie(dma_handle, &dma_cookie); 2271*8d5069bcSRyan Zezeski 2272*8d5069bcSRyan Zezeski tcb->tcb_bind_info[i].dbi_paddr = 2273*8d5069bcSRyan Zezeski (caddr_t)dma_cookie.dmac_laddress; 2274*8d5069bcSRyan Zezeski tcb->tcb_bind_info[i++].dbi_len = dma_cookie.dmac_size; 2275*8d5069bcSRyan Zezeski } 2276*8d5069bcSRyan Zezeski 2277*8d5069bcSRyan Zezeski return (tcb); 2278*8d5069bcSRyan Zezeski 2279*8d5069bcSRyan Zezeski bffail: 2280*8d5069bcSRyan Zezeski i40e_tcb_reset(tcb); 2281*8d5069bcSRyan Zezeski i40e_tcb_free(itrq, tcb); 2282*8d5069bcSRyan Zezeski return (NULL); 2283*8d5069bcSRyan Zezeski } 2284*8d5069bcSRyan Zezeski 2285*8d5069bcSRyan Zezeski static void 2286*8d5069bcSRyan Zezeski i40e_tx_set_data_desc(i40e_trqpair_t *itrq, i40e_tx_context_t *tctx, 2287*8d5069bcSRyan Zezeski caddr_t buff, size_t len, boolean_t last_desc) 2288*8d5069bcSRyan Zezeski { 2289*8d5069bcSRyan Zezeski i40e_tx_desc_t *txdesc; 2290*8d5069bcSRyan Zezeski int cmd; 2291*8d5069bcSRyan Zezeski 2292*8d5069bcSRyan Zezeski ASSERT(MUTEX_HELD(&itrq->itrq_tx_lock)); 2293*8d5069bcSRyan Zezeski itrq->itrq_desc_free--; 2294*8d5069bcSRyan Zezeski txdesc = &itrq->itrq_desc_ring[itrq->itrq_desc_tail]; 2295*8d5069bcSRyan Zezeski itrq->itrq_desc_tail = i40e_next_desc(itrq->itrq_desc_tail, 1, 2296*8d5069bcSRyan Zezeski itrq->itrq_tx_ring_size); 2297*8d5069bcSRyan Zezeski 2298*8d5069bcSRyan Zezeski cmd = I40E_TX_DESC_CMD_ICRC | tctx->itc_data_cmdflags; 2299*8d5069bcSRyan Zezeski 2300*8d5069bcSRyan Zezeski /* 2301*8d5069bcSRyan Zezeski * The last data descriptor needs the EOP bit set, so that the HW knows 2302*8d5069bcSRyan Zezeski * that we're ready to send. Additionally, we set the RS (Report 2303*8d5069bcSRyan Zezeski * Status) bit, so that we are notified when the transmit engine has 2304*8d5069bcSRyan Zezeski * completed DMA'ing all of the data descriptors and data buffers 2305*8d5069bcSRyan Zezeski * associated with this frame. 2306*8d5069bcSRyan Zezeski */ 2307*8d5069bcSRyan Zezeski if (last_desc == B_TRUE) { 2308*8d5069bcSRyan Zezeski cmd |= I40E_TX_DESC_CMD_EOP; 2309*8d5069bcSRyan Zezeski cmd |= I40E_TX_DESC_CMD_RS; 2310*8d5069bcSRyan Zezeski } 2311*8d5069bcSRyan Zezeski 2312*8d5069bcSRyan Zezeski /* 2313*8d5069bcSRyan Zezeski * Per the X710 manual, section 8.4.2.1.1, the buffer size 2314*8d5069bcSRyan Zezeski * must be a value from 1 to 16K minus 1, inclusive. 2315*8d5069bcSRyan Zezeski */ 2316*8d5069bcSRyan Zezeski ASSERT3U(len, >=, 1); 2317*8d5069bcSRyan Zezeski ASSERT3U(len, <=, I40E_MAX_TX_BUFSZ); 2318*8d5069bcSRyan Zezeski 2319*8d5069bcSRyan Zezeski txdesc->buffer_addr = CPU_TO_LE64((uintptr_t)buff); 2320*8d5069bcSRyan Zezeski txdesc->cmd_type_offset_bsz = 2321*8d5069bcSRyan Zezeski LE_64(((uint64_t)I40E_TX_DESC_DTYPE_DATA | 2322*8d5069bcSRyan Zezeski ((uint64_t)tctx->itc_data_offsets << I40E_TXD_QW1_OFFSET_SHIFT) | 2323*8d5069bcSRyan Zezeski ((uint64_t)cmd << I40E_TXD_QW1_CMD_SHIFT) | 2324*8d5069bcSRyan Zezeski ((uint64_t)len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT))); 2325*8d5069bcSRyan Zezeski } 2326*8d5069bcSRyan Zezeski 2327*8d5069bcSRyan Zezeski /* 2328*8d5069bcSRyan Zezeski * Place 'tcb' on the tail of the list represented by 'head'/'tail'. 2329*8d5069bcSRyan Zezeski */ 2330*8d5069bcSRyan Zezeski static inline void 2331*8d5069bcSRyan Zezeski tcb_list_append(i40e_tx_control_block_t **head, i40e_tx_control_block_t **tail, 2332*8d5069bcSRyan Zezeski i40e_tx_control_block_t *tcb) 2333*8d5069bcSRyan Zezeski { 2334*8d5069bcSRyan Zezeski if (*head == NULL) { 2335*8d5069bcSRyan Zezeski *head = tcb; 2336*8d5069bcSRyan Zezeski *tail = *head; 2337*8d5069bcSRyan Zezeski } else { 2338*8d5069bcSRyan Zezeski ASSERT3P(*tail, !=, NULL); 2339*8d5069bcSRyan Zezeski ASSERT3P((*tail)->tcb_next, ==, NULL); 2340*8d5069bcSRyan Zezeski (*tail)->tcb_next = tcb; 2341*8d5069bcSRyan Zezeski *tail = tcb; 2342*8d5069bcSRyan Zezeski } 2343*8d5069bcSRyan Zezeski } 2344*8d5069bcSRyan Zezeski 2345*8d5069bcSRyan Zezeski /* 2346*8d5069bcSRyan Zezeski * This function takes a single packet, possibly consisting of 2347*8d5069bcSRyan Zezeski * multiple mblks, and creates a TCB chain to send to the controller. 2348*8d5069bcSRyan Zezeski * This TCB chain may span up to a maximum of 8 descriptors. A copy 2349*8d5069bcSRyan Zezeski * TCB consumes one descriptor; whereas a DMA TCB may consume 1 or 2350*8d5069bcSRyan Zezeski * more, depending on several factors. For each fragment (invidual 2351*8d5069bcSRyan Zezeski * mblk making up the packet), we determine if its size dictates a 2352*8d5069bcSRyan Zezeski * copy to the TCB buffer or a DMA bind of the dblk buffer. We keep a 2353*8d5069bcSRyan Zezeski * count of descriptors used; when that count reaches the max we force 2354*8d5069bcSRyan Zezeski * all remaining fragments into a single TCB buffer. We have a 2355*8d5069bcSRyan Zezeski * guarantee that the TCB buffer is always larger than the MTU -- so 2356*8d5069bcSRyan Zezeski * there is always enough room. Consecutive fragments below the DMA 2357*8d5069bcSRyan Zezeski * threshold are copied into a single TCB. In the event of an error 2358*8d5069bcSRyan Zezeski * this function returns NULL but leaves 'mp' alone. 2359*8d5069bcSRyan Zezeski */ 2360*8d5069bcSRyan Zezeski static i40e_tx_control_block_t * 2361*8d5069bcSRyan Zezeski i40e_non_lso_chain(i40e_trqpair_t *itrq, mblk_t *mp, uint_t *ndesc) 2362*8d5069bcSRyan Zezeski { 2363*8d5069bcSRyan Zezeski const mblk_t *nmp = mp; 2364*8d5069bcSRyan Zezeski uint_t needed_desc = 0; 2365*8d5069bcSRyan Zezeski boolean_t force_copy = B_FALSE; 2366*8d5069bcSRyan Zezeski i40e_tx_control_block_t *tcb = NULL, *tcbhead = NULL, *tcbtail = NULL; 2367*8d5069bcSRyan Zezeski i40e_t *i40e = itrq->itrq_i40e; 2368*8d5069bcSRyan Zezeski i40e_txq_stat_t *txs = &itrq->itrq_txstat; 2369*8d5069bcSRyan Zezeski 2370*8d5069bcSRyan Zezeski /* TCB buffer is always larger than MTU. */ 2371*8d5069bcSRyan Zezeski ASSERT3U(msgsize(mp), <, i40e->i40e_tx_buf_size); 2372*8d5069bcSRyan Zezeski 2373*8d5069bcSRyan Zezeski while (nmp != NULL) { 2374*8d5069bcSRyan Zezeski const size_t nmp_len = MBLKL(nmp); 2375*8d5069bcSRyan Zezeski 2376*8d5069bcSRyan Zezeski /* Ignore zero-length mblks. */ 2377*8d5069bcSRyan Zezeski if (nmp_len == 0) { 2378*8d5069bcSRyan Zezeski nmp = nmp->b_cont; 2379*8d5069bcSRyan Zezeski continue; 2380*8d5069bcSRyan Zezeski } 2381*8d5069bcSRyan Zezeski 2382*8d5069bcSRyan Zezeski if (nmp_len < i40e->i40e_tx_dma_min || force_copy) { 2383*8d5069bcSRyan Zezeski /* Compress consecutive copies into one TCB. */ 2384*8d5069bcSRyan Zezeski if (tcb != NULL && tcb->tcb_type == I40E_TX_COPY) { 2385*8d5069bcSRyan Zezeski i40e_tx_copy_fragment(tcb, nmp, 0, nmp_len); 2386*8d5069bcSRyan Zezeski nmp = nmp->b_cont; 2387*8d5069bcSRyan Zezeski continue; 2388*8d5069bcSRyan Zezeski } 2389*8d5069bcSRyan Zezeski 2390*8d5069bcSRyan Zezeski if ((tcb = i40e_tcb_alloc(itrq)) == NULL) { 2391*8d5069bcSRyan Zezeski txs->itxs_err_notcb.value.ui64++; 2392*8d5069bcSRyan Zezeski goto fail; 2393*8d5069bcSRyan Zezeski } 2394*8d5069bcSRyan Zezeski 2395*8d5069bcSRyan Zezeski /* 2396*8d5069bcSRyan Zezeski * TCB DMA buffer is guaranteed to be one 2397*8d5069bcSRyan Zezeski * cookie by i40e_alloc_dma_buffer(). 2398*8d5069bcSRyan Zezeski */ 2399*8d5069bcSRyan Zezeski i40e_tx_copy_fragment(tcb, nmp, 0, nmp_len); 2400*8d5069bcSRyan Zezeski needed_desc++; 2401*8d5069bcSRyan Zezeski tcb_list_append(&tcbhead, &tcbtail, tcb); 2402*8d5069bcSRyan Zezeski } else { 2403*8d5069bcSRyan Zezeski uint_t total_desc; 2404*8d5069bcSRyan Zezeski 2405*8d5069bcSRyan Zezeski tcb = i40e_tx_bind_fragment(itrq, nmp, 0, B_FALSE); 2406*8d5069bcSRyan Zezeski if (tcb == NULL) { 2407*8d5069bcSRyan Zezeski i40e_error(i40e, "dma bind failed!"); 2408*8d5069bcSRyan Zezeski goto fail; 2409*8d5069bcSRyan Zezeski } 2410*8d5069bcSRyan Zezeski 2411*8d5069bcSRyan Zezeski /* 2412*8d5069bcSRyan Zezeski * If the new total exceeds the max or we've 2413*8d5069bcSRyan Zezeski * reached the limit and there's data left, 2414*8d5069bcSRyan Zezeski * then give up binding and copy the rest into 2415*8d5069bcSRyan Zezeski * the pre-allocated TCB buffer. 2416*8d5069bcSRyan Zezeski */ 2417*8d5069bcSRyan Zezeski total_desc = needed_desc + tcb->tcb_bind_ncookies; 2418*8d5069bcSRyan Zezeski if ((total_desc > I40E_TX_MAX_COOKIE) || 2419*8d5069bcSRyan Zezeski (total_desc == I40E_TX_MAX_COOKIE && 2420*8d5069bcSRyan Zezeski nmp->b_cont != NULL)) { 2421*8d5069bcSRyan Zezeski i40e_tcb_reset(tcb); 2422*8d5069bcSRyan Zezeski i40e_tcb_free(itrq, tcb); 2423*8d5069bcSRyan Zezeski 2424*8d5069bcSRyan Zezeski if (tcbtail != NULL && 2425*8d5069bcSRyan Zezeski tcbtail->tcb_type == I40E_TX_COPY) { 2426*8d5069bcSRyan Zezeski tcb = tcbtail; 2427*8d5069bcSRyan Zezeski } else { 2428*8d5069bcSRyan Zezeski tcb = NULL; 2429*8d5069bcSRyan Zezeski } 2430*8d5069bcSRyan Zezeski 2431*8d5069bcSRyan Zezeski force_copy = B_TRUE; 2432*8d5069bcSRyan Zezeski txs->itxs_force_copy.value.ui64++; 2433*8d5069bcSRyan Zezeski continue; 2434*8d5069bcSRyan Zezeski } 2435*8d5069bcSRyan Zezeski 2436*8d5069bcSRyan Zezeski needed_desc += tcb->tcb_bind_ncookies; 2437*8d5069bcSRyan Zezeski tcb_list_append(&tcbhead, &tcbtail, tcb); 2438*8d5069bcSRyan Zezeski } 2439*8d5069bcSRyan Zezeski 2440*8d5069bcSRyan Zezeski nmp = nmp->b_cont; 2441*8d5069bcSRyan Zezeski } 2442*8d5069bcSRyan Zezeski 2443*8d5069bcSRyan Zezeski ASSERT3P(nmp, ==, NULL); 2444*8d5069bcSRyan Zezeski ASSERT3U(needed_desc, <=, I40E_TX_MAX_COOKIE); 2445*8d5069bcSRyan Zezeski ASSERT3P(tcbhead, !=, NULL); 2446*8d5069bcSRyan Zezeski *ndesc += needed_desc; 2447*8d5069bcSRyan Zezeski return (tcbhead); 2448*8d5069bcSRyan Zezeski 2449*8d5069bcSRyan Zezeski fail: 2450*8d5069bcSRyan Zezeski tcb = tcbhead; 2451*8d5069bcSRyan Zezeski while (tcb != NULL) { 2452*8d5069bcSRyan Zezeski i40e_tx_control_block_t *next = tcb->tcb_next; 2453*8d5069bcSRyan Zezeski 2454*8d5069bcSRyan Zezeski ASSERT(tcb->tcb_type == I40E_TX_DMA || 2455*8d5069bcSRyan Zezeski tcb->tcb_type == I40E_TX_COPY); 2456*8d5069bcSRyan Zezeski 2457*8d5069bcSRyan Zezeski tcb->tcb_mp = NULL; 2458*8d5069bcSRyan Zezeski i40e_tcb_reset(tcb); 2459*8d5069bcSRyan Zezeski i40e_tcb_free(itrq, tcb); 2460*8d5069bcSRyan Zezeski tcb = next; 2461*8d5069bcSRyan Zezeski } 2462*8d5069bcSRyan Zezeski 2463*8d5069bcSRyan Zezeski return (NULL); 2464*8d5069bcSRyan Zezeski } 2465*8d5069bcSRyan Zezeski 2466*8d5069bcSRyan Zezeski /* 2467*8d5069bcSRyan Zezeski * Section 8.4.1 of the 700-series programming guide states that a 2468*8d5069bcSRyan Zezeski * segment may span up to 8 data descriptors; including both header 2469*8d5069bcSRyan Zezeski * and payload data. However, empirical evidence shows that the 2470*8d5069bcSRyan Zezeski * controller freezes the Tx queue when presented with a segment of 8 2471*8d5069bcSRyan Zezeski * descriptors. Or, at least, when the first segment contains 8 2472*8d5069bcSRyan Zezeski * descriptors. One explanation is that the controller counts the 2473*8d5069bcSRyan Zezeski * context descriptor against the first segment, even though the 2474*8d5069bcSRyan Zezeski * programming guide makes no mention of such a constraint. In any 2475*8d5069bcSRyan Zezeski * case, we limit TSO segments to 7 descriptors to prevent Tx queue 2476*8d5069bcSRyan Zezeski * freezes. We still allow non-TSO segments to utilize all 8 2477*8d5069bcSRyan Zezeski * descriptors as they have not demonstrated the faulty behavior. 2478*8d5069bcSRyan Zezeski */ 2479*8d5069bcSRyan Zezeski uint_t i40e_lso_num_descs = 7; 2480*8d5069bcSRyan Zezeski 2481*8d5069bcSRyan Zezeski #define I40E_TCB_LEFT(tcb) \ 2482*8d5069bcSRyan Zezeski ((tcb)->tcb_dma.dmab_size - (tcb)->tcb_dma.dmab_len) 2483*8d5069bcSRyan Zezeski 2484*8d5069bcSRyan Zezeski /* 2485*8d5069bcSRyan Zezeski * This function is similar in spirit to i40e_non_lso_chain(), but 2486*8d5069bcSRyan Zezeski * much more complicated in reality. Like the previous function, it 2487*8d5069bcSRyan Zezeski * takes a packet (an LSO packet) as input and returns a chain of 2488*8d5069bcSRyan Zezeski * TCBs. The complication comes with the fact that we are no longer 2489*8d5069bcSRyan Zezeski * trying to fit the entire packet into 8 descriptors, but rather we 2490*8d5069bcSRyan Zezeski * must fit each MSS-size segment of the LSO packet into 8 descriptors. 2491*8d5069bcSRyan Zezeski * Except it's really 7 descriptors, see i40e_lso_num_descs. 2492*8d5069bcSRyan Zezeski * 2493*8d5069bcSRyan Zezeski * Your first inclination might be to verify that a given segment 2494*8d5069bcSRyan Zezeski * spans no more than 7 mblks; but it's actually much more subtle than 2495*8d5069bcSRyan Zezeski * that. First, let's describe what the hardware expects, and then we 2496*8d5069bcSRyan Zezeski * can expound on the software side of things. 2497*8d5069bcSRyan Zezeski * 2498*8d5069bcSRyan Zezeski * For an LSO packet the hardware expects the following: 2499*8d5069bcSRyan Zezeski * 2500*8d5069bcSRyan Zezeski * o Each MSS-sized segment must span no more than 7 descriptors. 2501*8d5069bcSRyan Zezeski * 2502*8d5069bcSRyan Zezeski * o The header size does not count towards the segment size. 2503*8d5069bcSRyan Zezeski * 2504*8d5069bcSRyan Zezeski * o If header and payload share the first descriptor, then the 2505*8d5069bcSRyan Zezeski * controller will count the descriptor twice. 2506*8d5069bcSRyan Zezeski * 2507*8d5069bcSRyan Zezeski * The most important thing to keep in mind is that the hardware does 2508*8d5069bcSRyan Zezeski * not view the segments in terms of mblks, like we do. The hardware 2509*8d5069bcSRyan Zezeski * only sees descriptors. It will iterate each descriptor in turn, 2510*8d5069bcSRyan Zezeski * keeping a tally of bytes seen and descriptors visited. If the byte 2511*8d5069bcSRyan Zezeski * count hasn't reached MSS by the time the descriptor count reaches 2512*8d5069bcSRyan Zezeski * 7, then the controller freezes the queue and we are stuck. 2513*8d5069bcSRyan Zezeski * Furthermore, the hardware picks up its tally where it left off. So 2514*8d5069bcSRyan Zezeski * if it reached MSS in the middle of a descriptor, it will start 2515*8d5069bcSRyan Zezeski * tallying the next segment in the middle of that descriptor. The 2516*8d5069bcSRyan Zezeski * hardware's view is entirely removed from the mblk chain or even the 2517*8d5069bcSRyan Zezeski * descriptor layout. Consider these facts: 2518*8d5069bcSRyan Zezeski * 2519*8d5069bcSRyan Zezeski * o The MSS will vary dpeneding on MTU and other factors. 2520*8d5069bcSRyan Zezeski * 2521*8d5069bcSRyan Zezeski * o The dblk allocation will sit at various offsets within a 2522*8d5069bcSRyan Zezeski * memory page. 2523*8d5069bcSRyan Zezeski * 2524*8d5069bcSRyan Zezeski * o The page size itself could vary in the future (i.e. not 2525*8d5069bcSRyan Zezeski * always 4K). 2526*8d5069bcSRyan Zezeski * 2527*8d5069bcSRyan Zezeski * o Just because a dblk is virtually contiguous doesn't mean 2528*8d5069bcSRyan Zezeski * it's physically contiguous. The number of cookies 2529*8d5069bcSRyan Zezeski * (descriptors) required by a DMA bind of a single dblk is at 2530*8d5069bcSRyan Zezeski * the mercy of the page size and physical layout. 2531*8d5069bcSRyan Zezeski * 2532*8d5069bcSRyan Zezeski * o The descriptors will most often NOT start/end on a MSS 2533*8d5069bcSRyan Zezeski * boundary. Thus the hardware will often start counting the 2534*8d5069bcSRyan Zezeski * MSS mid descriptor and finish mid descriptor. 2535*8d5069bcSRyan Zezeski * 2536*8d5069bcSRyan Zezeski * The upshot of all this is that the driver must learn to think like 2537*8d5069bcSRyan Zezeski * the controller; and verify that none of the constraints are broken. 2538*8d5069bcSRyan Zezeski * It does this by tallying up the segment just like the hardware 2539*8d5069bcSRyan Zezeski * would. This is handled by the two variables 'segsz' and 'segdesc'. 2540*8d5069bcSRyan Zezeski * After each attempt to bind a dblk, we check the constaints. If 2541*8d5069bcSRyan Zezeski * violated, we undo the DMA and force a copy until MSS is met. We 2542*8d5069bcSRyan Zezeski * have a guarantee that the TCB buffer is larger than MTU; thus 2543*8d5069bcSRyan Zezeski * ensuring we can always meet the MSS with a single copy buffer. We 2544*8d5069bcSRyan Zezeski * also copy consecutive non-DMA fragments into the same TCB buffer. 2545*8d5069bcSRyan Zezeski */ 2546*8d5069bcSRyan Zezeski static i40e_tx_control_block_t * 2547*8d5069bcSRyan Zezeski i40e_lso_chain(i40e_trqpair_t *itrq, const mblk_t *mp, 2548*8d5069bcSRyan Zezeski const mac_ether_offload_info_t *meo, const i40e_tx_context_t *tctx, 2549*8d5069bcSRyan Zezeski uint_t *ndesc) 2550*8d5069bcSRyan Zezeski { 2551*8d5069bcSRyan Zezeski size_t mp_len = MBLKL(mp); 2552*8d5069bcSRyan Zezeski /* 2553*8d5069bcSRyan Zezeski * The cpoff (copy offset) variable tracks the offset inside 2554*8d5069bcSRyan Zezeski * the current mp. There are cases where the entire mp is not 2555*8d5069bcSRyan Zezeski * fully copied in one go: such as the header copy followed by 2556*8d5069bcSRyan Zezeski * a non-DMA mblk, or a TCB buffer that only has enough space 2557*8d5069bcSRyan Zezeski * to copy part of the current mp. 2558*8d5069bcSRyan Zezeski */ 2559*8d5069bcSRyan Zezeski size_t cpoff = 0; 2560*8d5069bcSRyan Zezeski /* 2561*8d5069bcSRyan Zezeski * The segsz and segdesc variables track the controller's view 2562*8d5069bcSRyan Zezeski * of the segment. The needed_desc variable tracks the total 2563*8d5069bcSRyan Zezeski * number of data descriptors used by the driver. 2564*8d5069bcSRyan Zezeski */ 2565*8d5069bcSRyan Zezeski size_t segsz = 0; 2566*8d5069bcSRyan Zezeski uint_t segdesc = 0; 2567*8d5069bcSRyan Zezeski uint_t needed_desc = 0; 2568*8d5069bcSRyan Zezeski size_t hdrcopied = 0; 2569*8d5069bcSRyan Zezeski const size_t hdrlen = 2570*8d5069bcSRyan Zezeski meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen; 2571*8d5069bcSRyan Zezeski const size_t mss = tctx->itc_ctx_mss; 2572*8d5069bcSRyan Zezeski boolean_t force_copy = B_FALSE; 2573*8d5069bcSRyan Zezeski i40e_tx_control_block_t *tcb = NULL, *tcbhead = NULL, *tcbtail = NULL; 2574*8d5069bcSRyan Zezeski i40e_t *i40e = itrq->itrq_i40e; 2575*8d5069bcSRyan Zezeski i40e_txq_stat_t *txs = &itrq->itrq_txstat; 2576*8d5069bcSRyan Zezeski 2577*8d5069bcSRyan Zezeski /* 2578*8d5069bcSRyan Zezeski * We always copy the header in order to avoid more 2579*8d5069bcSRyan Zezeski * complicated code dealing with various edge cases. 2580*8d5069bcSRyan Zezeski */ 2581*8d5069bcSRyan Zezeski if ((tcb = i40e_tcb_alloc(itrq)) == NULL) { 2582*8d5069bcSRyan Zezeski txs->itxs_err_notcb.value.ui64++; 2583*8d5069bcSRyan Zezeski goto fail; 2584*8d5069bcSRyan Zezeski } 2585*8d5069bcSRyan Zezeski 2586*8d5069bcSRyan Zezeski needed_desc++; 2587*8d5069bcSRyan Zezeski tcb_list_append(&tcbhead, &tcbtail, tcb); 2588*8d5069bcSRyan Zezeski 2589*8d5069bcSRyan Zezeski while (hdrcopied < hdrlen) { 2590*8d5069bcSRyan Zezeski const size_t tocopy = MIN(hdrlen - hdrcopied, mp_len); 2591*8d5069bcSRyan Zezeski i40e_tx_copy_fragment(tcb, mp, 0, tocopy); 2592*8d5069bcSRyan Zezeski hdrcopied += tocopy; 2593*8d5069bcSRyan Zezeski cpoff += tocopy; 2594*8d5069bcSRyan Zezeski if (tocopy == mp_len) { 2595*8d5069bcSRyan Zezeski /* 2596*8d5069bcSRyan Zezeski * This is a bit of defensive programming. We 2597*8d5069bcSRyan Zezeski * should never have a chain too short to 2598*8d5069bcSRyan Zezeski * satisfy the headers -- but just in case. 2599*8d5069bcSRyan Zezeski */ 2600*8d5069bcSRyan Zezeski if ((mp = mp->b_cont) == NULL) { 2601*8d5069bcSRyan Zezeski txs->itxs_tx_short.value.ui64++; 2602*8d5069bcSRyan Zezeski goto fail; 2603*8d5069bcSRyan Zezeski } 2604*8d5069bcSRyan Zezeski 2605*8d5069bcSRyan Zezeski while ((mp_len = MBLKL(mp)) == 0) { 2606*8d5069bcSRyan Zezeski if ((mp = mp->b_cont) == NULL) { 2607*8d5069bcSRyan Zezeski txs->itxs_tx_short.value.ui64++; 2608*8d5069bcSRyan Zezeski goto fail; 2609*8d5069bcSRyan Zezeski } 2610*8d5069bcSRyan Zezeski } 2611*8d5069bcSRyan Zezeski cpoff = 0; 2612*8d5069bcSRyan Zezeski } 2613*8d5069bcSRyan Zezeski } 2614*8d5069bcSRyan Zezeski ASSERT3U(hdrcopied, ==, hdrlen); 2615*8d5069bcSRyan Zezeski 2616*8d5069bcSRyan Zezeski /* 2617*8d5069bcSRyan Zezeski * A single descriptor containing both header and data is 2618*8d5069bcSRyan Zezeski * counted twice by the controller. 2619*8d5069bcSRyan Zezeski */ 2620*8d5069bcSRyan Zezeski if (mp_len < i40e->i40e_tx_dma_min) { 2621*8d5069bcSRyan Zezeski segdesc = 2; 2622*8d5069bcSRyan Zezeski } else { 2623*8d5069bcSRyan Zezeski segdesc = 1; 2624*8d5069bcSRyan Zezeski } 2625*8d5069bcSRyan Zezeski 2626*8d5069bcSRyan Zezeski while (mp != NULL) { 2627*8d5069bcSRyan Zezeski mp_len = MBLKL(mp); 2628*8d5069bcSRyan Zezeski force_copy: 2629*8d5069bcSRyan Zezeski /* Ignore zero-length mblks. */ 2630*8d5069bcSRyan Zezeski if (mp_len == 0) { 2631*8d5069bcSRyan Zezeski mp = mp->b_cont; 2632*8d5069bcSRyan Zezeski cpoff = 0; 2633*8d5069bcSRyan Zezeski continue; 2634*8d5069bcSRyan Zezeski } 2635*8d5069bcSRyan Zezeski 2636*8d5069bcSRyan Zezeski /* 2637*8d5069bcSRyan Zezeski * We copy into the preallocated TCB buffer when the 2638*8d5069bcSRyan Zezeski * current fragment is less than the DMA threshold OR 2639*8d5069bcSRyan Zezeski * when the DMA bind can't meet the controller's 2640*8d5069bcSRyan Zezeski * segment descriptor limit. 2641*8d5069bcSRyan Zezeski */ 2642*8d5069bcSRyan Zezeski if (mp_len < i40e->i40e_tx_dma_min || force_copy) { 2643*8d5069bcSRyan Zezeski size_t tocopy; 2644*8d5069bcSRyan Zezeski 2645*8d5069bcSRyan Zezeski /* 2646*8d5069bcSRyan Zezeski * Our objective here is to compress 2647*8d5069bcSRyan Zezeski * consecutive copies into one TCB (until it 2648*8d5069bcSRyan Zezeski * is full). If there is no current TCB, or if 2649*8d5069bcSRyan Zezeski * it is a DMA TCB, then allocate a new one. 2650*8d5069bcSRyan Zezeski */ 2651*8d5069bcSRyan Zezeski if (tcb == NULL || 2652*8d5069bcSRyan Zezeski (tcb != NULL && tcb->tcb_type != I40E_TX_COPY)) { 2653*8d5069bcSRyan Zezeski if ((tcb = i40e_tcb_alloc(itrq)) == NULL) { 2654*8d5069bcSRyan Zezeski txs->itxs_err_notcb.value.ui64++; 2655*8d5069bcSRyan Zezeski goto fail; 2656*8d5069bcSRyan Zezeski } 2657*8d5069bcSRyan Zezeski 2658*8d5069bcSRyan Zezeski /* 2659*8d5069bcSRyan Zezeski * The TCB DMA buffer is guaranteed to 2660*8d5069bcSRyan Zezeski * be one cookie by i40e_alloc_dma_buffer(). 2661*8d5069bcSRyan Zezeski */ 2662*8d5069bcSRyan Zezeski needed_desc++; 2663*8d5069bcSRyan Zezeski segdesc++; 2664*8d5069bcSRyan Zezeski ASSERT3U(segdesc, <=, i40e_lso_num_descs); 2665*8d5069bcSRyan Zezeski tcb_list_append(&tcbhead, &tcbtail, tcb); 2666*8d5069bcSRyan Zezeski } else if (segdesc == 0) { 2667*8d5069bcSRyan Zezeski /* 2668*8d5069bcSRyan Zezeski * We are copying into an existing TCB 2669*8d5069bcSRyan Zezeski * but we just crossed the MSS 2670*8d5069bcSRyan Zezeski * boundary. Make sure to increment 2671*8d5069bcSRyan Zezeski * segdesc to track the descriptor 2672*8d5069bcSRyan Zezeski * count as the hardware would. 2673*8d5069bcSRyan Zezeski */ 2674*8d5069bcSRyan Zezeski segdesc++; 2675*8d5069bcSRyan Zezeski } 2676*8d5069bcSRyan Zezeski 2677*8d5069bcSRyan Zezeski tocopy = MIN(I40E_TCB_LEFT(tcb), mp_len - cpoff); 2678*8d5069bcSRyan Zezeski i40e_tx_copy_fragment(tcb, mp, cpoff, tocopy); 2679*8d5069bcSRyan Zezeski cpoff += tocopy; 2680*8d5069bcSRyan Zezeski segsz += tocopy; 2681*8d5069bcSRyan Zezeski 2682*8d5069bcSRyan Zezeski /* We have consumed the current mp. */ 2683*8d5069bcSRyan Zezeski if (cpoff == mp_len) { 2684*8d5069bcSRyan Zezeski mp = mp->b_cont; 2685*8d5069bcSRyan Zezeski cpoff = 0; 2686*8d5069bcSRyan Zezeski } 2687*8d5069bcSRyan Zezeski 2688*8d5069bcSRyan Zezeski /* We have consumed the current TCB buffer. */ 2689*8d5069bcSRyan Zezeski if (I40E_TCB_LEFT(tcb) == 0) { 2690*8d5069bcSRyan Zezeski tcb = NULL; 2691*8d5069bcSRyan Zezeski } 2692*8d5069bcSRyan Zezeski 2693*8d5069bcSRyan Zezeski /* 2694*8d5069bcSRyan Zezeski * We have met MSS with this copy; restart the 2695*8d5069bcSRyan Zezeski * counters. 2696*8d5069bcSRyan Zezeski */ 2697*8d5069bcSRyan Zezeski if (segsz >= mss) { 2698*8d5069bcSRyan Zezeski segsz = segsz % mss; 2699*8d5069bcSRyan Zezeski segdesc = segsz == 0 ? 0 : 1; 2700*8d5069bcSRyan Zezeski force_copy = B_FALSE; 2701*8d5069bcSRyan Zezeski } 2702*8d5069bcSRyan Zezeski 2703*8d5069bcSRyan Zezeski /* 2704*8d5069bcSRyan Zezeski * We are at the controller's descriptor 2705*8d5069bcSRyan Zezeski * limit; we must copy into the current TCB 2706*8d5069bcSRyan Zezeski * until MSS is reached. The TCB buffer is 2707*8d5069bcSRyan Zezeski * always bigger than the MTU so we know it is 2708*8d5069bcSRyan Zezeski * big enough to meet the MSS. 2709*8d5069bcSRyan Zezeski */ 2710*8d5069bcSRyan Zezeski if (segdesc == i40e_lso_num_descs) { 2711*8d5069bcSRyan Zezeski force_copy = B_TRUE; 2712*8d5069bcSRyan Zezeski } 2713*8d5069bcSRyan Zezeski } else { 2714*8d5069bcSRyan Zezeski uint_t tsegdesc = segdesc; 2715*8d5069bcSRyan Zezeski size_t tsegsz = segsz; 2716*8d5069bcSRyan Zezeski 2717*8d5069bcSRyan Zezeski ASSERT(force_copy == B_FALSE); 2718*8d5069bcSRyan Zezeski ASSERT3U(tsegdesc, <, i40e_lso_num_descs); 2719*8d5069bcSRyan Zezeski 2720*8d5069bcSRyan Zezeski tcb = i40e_tx_bind_fragment(itrq, mp, cpoff, B_TRUE); 2721*8d5069bcSRyan Zezeski if (tcb == NULL) { 2722*8d5069bcSRyan Zezeski i40e_error(i40e, "dma bind failed!"); 2723*8d5069bcSRyan Zezeski goto fail; 2724*8d5069bcSRyan Zezeski } 2725*8d5069bcSRyan Zezeski 2726*8d5069bcSRyan Zezeski for (uint_t i = 0; i < tcb->tcb_bind_ncookies; i++) { 2727*8d5069bcSRyan Zezeski struct i40e_dma_bind_info dbi = 2728*8d5069bcSRyan Zezeski tcb->tcb_bind_info[i]; 2729*8d5069bcSRyan Zezeski 2730*8d5069bcSRyan Zezeski tsegsz += dbi.dbi_len; 2731*8d5069bcSRyan Zezeski tsegdesc++; 2732*8d5069bcSRyan Zezeski ASSERT3U(tsegdesc, <=, i40e_lso_num_descs); 2733*8d5069bcSRyan Zezeski 2734*8d5069bcSRyan Zezeski /* 2735*8d5069bcSRyan Zezeski * We've met the MSS with this portion 2736*8d5069bcSRyan Zezeski * of the DMA. 2737*8d5069bcSRyan Zezeski */ 2738*8d5069bcSRyan Zezeski if (tsegsz >= mss) { 2739*8d5069bcSRyan Zezeski tsegsz = tsegsz % mss; 2740*8d5069bcSRyan Zezeski tsegdesc = tsegsz == 0 ? 0 : 1; 2741*8d5069bcSRyan Zezeski } 2742*8d5069bcSRyan Zezeski 2743*8d5069bcSRyan Zezeski /* 2744*8d5069bcSRyan Zezeski * We've reached max descriptors but 2745*8d5069bcSRyan Zezeski * have not met the MSS. Undo the bind 2746*8d5069bcSRyan Zezeski * and instead copy. 2747*8d5069bcSRyan Zezeski */ 2748*8d5069bcSRyan Zezeski if (tsegdesc == i40e_lso_num_descs) { 2749*8d5069bcSRyan Zezeski i40e_tcb_reset(tcb); 2750*8d5069bcSRyan Zezeski i40e_tcb_free(itrq, tcb); 2751*8d5069bcSRyan Zezeski 2752*8d5069bcSRyan Zezeski if (tcbtail != NULL && 2753*8d5069bcSRyan Zezeski I40E_TCB_LEFT(tcb) > 0 && 2754*8d5069bcSRyan Zezeski tcbtail->tcb_type == I40E_TX_COPY) { 2755*8d5069bcSRyan Zezeski tcb = tcbtail; 2756*8d5069bcSRyan Zezeski } else { 2757*8d5069bcSRyan Zezeski tcb = NULL; 2758*8d5069bcSRyan Zezeski } 2759*8d5069bcSRyan Zezeski 2760*8d5069bcSRyan Zezeski /* 2761*8d5069bcSRyan Zezeski * Remember, we are still on 2762*8d5069bcSRyan Zezeski * the same mp. 2763*8d5069bcSRyan Zezeski */ 2764*8d5069bcSRyan Zezeski force_copy = B_TRUE; 2765*8d5069bcSRyan Zezeski txs->itxs_tso_force_copy.value.ui64++; 2766*8d5069bcSRyan Zezeski goto force_copy; 2767*8d5069bcSRyan Zezeski } 2768*8d5069bcSRyan Zezeski } 2769*8d5069bcSRyan Zezeski 2770*8d5069bcSRyan Zezeski ASSERT3U(tsegdesc, <=, i40e_lso_num_descs); 2771*8d5069bcSRyan Zezeski ASSERT3U(tsegsz, <, mss); 2772*8d5069bcSRyan Zezeski 2773*8d5069bcSRyan Zezeski /* 2774*8d5069bcSRyan Zezeski * We've made if through the loop without 2775*8d5069bcSRyan Zezeski * breaking the segment descriptor contract 2776*8d5069bcSRyan Zezeski * with the controller -- replace the segment 2777*8d5069bcSRyan Zezeski * tracking values with the temporary ones. 2778*8d5069bcSRyan Zezeski */ 2779*8d5069bcSRyan Zezeski segdesc = tsegdesc; 2780*8d5069bcSRyan Zezeski segsz = tsegsz; 2781*8d5069bcSRyan Zezeski needed_desc += tcb->tcb_bind_ncookies; 2782*8d5069bcSRyan Zezeski cpoff = 0; 2783*8d5069bcSRyan Zezeski tcb_list_append(&tcbhead, &tcbtail, tcb); 2784*8d5069bcSRyan Zezeski mp = mp->b_cont; 2785*8d5069bcSRyan Zezeski } 2786*8d5069bcSRyan Zezeski } 2787*8d5069bcSRyan Zezeski 2788*8d5069bcSRyan Zezeski ASSERT3P(mp, ==, NULL); 2789*8d5069bcSRyan Zezeski ASSERT3P(tcbhead, !=, NULL); 2790*8d5069bcSRyan Zezeski *ndesc += needed_desc; 2791*8d5069bcSRyan Zezeski return (tcbhead); 2792*8d5069bcSRyan Zezeski 2793*8d5069bcSRyan Zezeski fail: 2794*8d5069bcSRyan Zezeski tcb = tcbhead; 2795*8d5069bcSRyan Zezeski while (tcb != NULL) { 2796*8d5069bcSRyan Zezeski i40e_tx_control_block_t *next = tcb->tcb_next; 2797*8d5069bcSRyan Zezeski 2798*8d5069bcSRyan Zezeski ASSERT(tcb->tcb_type == I40E_TX_DMA || 2799*8d5069bcSRyan Zezeski tcb->tcb_type == I40E_TX_COPY); 2800*8d5069bcSRyan Zezeski 2801*8d5069bcSRyan Zezeski tcb->tcb_mp = NULL; 2802*8d5069bcSRyan Zezeski i40e_tcb_reset(tcb); 2803*8d5069bcSRyan Zezeski i40e_tcb_free(itrq, tcb); 2804*8d5069bcSRyan Zezeski tcb = next; 2805*8d5069bcSRyan Zezeski } 2806*8d5069bcSRyan Zezeski 2807*8d5069bcSRyan Zezeski return (NULL); 2808*8d5069bcSRyan Zezeski } 2809*8d5069bcSRyan Zezeski 2810da5577f0SRobert Mustacchi /* 2811da5577f0SRobert Mustacchi * We've been asked to send a message block on the wire. We'll only have a 2812da5577f0SRobert Mustacchi * single chain. There will not be any b_next pointers; however, there may be 2813*8d5069bcSRyan Zezeski * multiple b_cont blocks. The number of b_cont blocks may exceed the 2814*8d5069bcSRyan Zezeski * controller's Tx descriptor limit. 2815da5577f0SRobert Mustacchi * 2816da5577f0SRobert Mustacchi * We may do one of three things with any given mblk_t chain: 2817da5577f0SRobert Mustacchi * 2818da5577f0SRobert Mustacchi * 1) Drop it 2819da5577f0SRobert Mustacchi * 2) Transmit it 2820da5577f0SRobert Mustacchi * 3) Return it 2821da5577f0SRobert Mustacchi * 2822da5577f0SRobert Mustacchi * If we return it to MAC, then MAC will flow control on our behalf. In other 2823da5577f0SRobert Mustacchi * words, it won't send us anything until we tell it that it's okay to send us 2824da5577f0SRobert Mustacchi * something. 2825da5577f0SRobert Mustacchi */ 2826da5577f0SRobert Mustacchi mblk_t * 2827da5577f0SRobert Mustacchi i40e_ring_tx(void *arg, mblk_t *mp) 2828da5577f0SRobert Mustacchi { 2829*8d5069bcSRyan Zezeski size_t msglen; 2830*8d5069bcSRyan Zezeski i40e_tx_control_block_t *tcb_ctx = NULL, *tcb = NULL, *tcbhead = NULL; 2831*8d5069bcSRyan Zezeski i40e_tx_context_desc_t *ctxdesc; 2832*8d5069bcSRyan Zezeski mac_ether_offload_info_t meo; 2833da5577f0SRobert Mustacchi i40e_tx_context_t tctx; 2834*8d5069bcSRyan Zezeski int type; 2835*8d5069bcSRyan Zezeski uint_t needed_desc = 0; 2836*8d5069bcSRyan Zezeski boolean_t do_ctx_desc = B_FALSE, use_lso = B_FALSE; 2837da5577f0SRobert Mustacchi 2838da5577f0SRobert Mustacchi i40e_trqpair_t *itrq = arg; 2839da5577f0SRobert Mustacchi i40e_t *i40e = itrq->itrq_i40e; 2840da5577f0SRobert Mustacchi i40e_hw_t *hw = &i40e->i40e_hw_space; 2841da5577f0SRobert Mustacchi i40e_txq_stat_t *txs = &itrq->itrq_txstat; 2842da5577f0SRobert Mustacchi 2843da5577f0SRobert Mustacchi ASSERT(mp->b_next == NULL); 2844da5577f0SRobert Mustacchi 2845da5577f0SRobert Mustacchi if (!(i40e->i40e_state & I40E_STARTED) || 2846da5577f0SRobert Mustacchi (i40e->i40e_state & I40E_OVERTEMP) || 2847da5577f0SRobert Mustacchi (i40e->i40e_state & I40E_SUSPENDED) || 2848da5577f0SRobert Mustacchi (i40e->i40e_state & I40E_ERROR) || 2849da5577f0SRobert Mustacchi (i40e->i40e_link_state != LINK_STATE_UP)) { 2850da5577f0SRobert Mustacchi freemsg(mp); 2851da5577f0SRobert Mustacchi return (NULL); 2852da5577f0SRobert Mustacchi } 2853da5577f0SRobert Mustacchi 2854*8d5069bcSRyan Zezeski if (mac_ether_offload_info(mp, &meo) != 0) { 2855*8d5069bcSRyan Zezeski freemsg(mp); 2856*8d5069bcSRyan Zezeski itrq->itrq_txstat.itxs_hck_meoifail.value.ui64++; 2857*8d5069bcSRyan Zezeski return (NULL); 2858*8d5069bcSRyan Zezeski } 2859*8d5069bcSRyan Zezeski 2860da5577f0SRobert Mustacchi /* 2861da5577f0SRobert Mustacchi * Figure out the relevant context about this frame that we might need 2862*8d5069bcSRyan Zezeski * for enabling checksum, LSO, etc. This also fills in information that 2863da5577f0SRobert Mustacchi * we might set around the packet type, etc. 2864da5577f0SRobert Mustacchi */ 2865*8d5069bcSRyan Zezeski if (i40e_tx_context(i40e, itrq, mp, &meo, &tctx) < 0) { 2866da5577f0SRobert Mustacchi freemsg(mp); 2867da5577f0SRobert Mustacchi itrq->itrq_txstat.itxs_err_context.value.ui64++; 2868da5577f0SRobert Mustacchi return (NULL); 2869da5577f0SRobert Mustacchi } 2870*8d5069bcSRyan Zezeski if (tctx.itc_ctx_cmdflags & I40E_TX_CTX_DESC_TSO) { 2871*8d5069bcSRyan Zezeski use_lso = B_TRUE; 2872*8d5069bcSRyan Zezeski do_ctx_desc = B_TRUE; 2873*8d5069bcSRyan Zezeski } 2874da5577f0SRobert Mustacchi 2875da5577f0SRobert Mustacchi /* 2876da5577f0SRobert Mustacchi * For the primordial driver we can punt on doing any recycling right 2877da5577f0SRobert Mustacchi * now; however, longer term we need to probably do some more pro-active 2878*8d5069bcSRyan Zezeski * recycling to cut back on stalls in the TX path. 2879da5577f0SRobert Mustacchi */ 2880da5577f0SRobert Mustacchi 2881*8d5069bcSRyan Zezeski msglen = msgsize(mp); 2882da5577f0SRobert Mustacchi 2883*8d5069bcSRyan Zezeski if (do_ctx_desc) { 2884da5577f0SRobert Mustacchi /* 2885*8d5069bcSRyan Zezeski * If we're doing tunneling or LSO, then we'll need a TX 2886*8d5069bcSRyan Zezeski * context descriptor in addition to one or more TX data 2887*8d5069bcSRyan Zezeski * descriptors. Since there's no data DMA block or handle 2888*8d5069bcSRyan Zezeski * associated with the context descriptor, we create a special 2889*8d5069bcSRyan Zezeski * control block that behaves effectively like a NOP. 2890da5577f0SRobert Mustacchi */ 2891*8d5069bcSRyan Zezeski if ((tcb_ctx = i40e_tcb_alloc(itrq)) == NULL) { 2892da5577f0SRobert Mustacchi txs->itxs_err_notcb.value.ui64++; 2893da5577f0SRobert Mustacchi goto txfail; 2894da5577f0SRobert Mustacchi } 2895*8d5069bcSRyan Zezeski tcb_ctx->tcb_type = I40E_TX_DESC; 2896*8d5069bcSRyan Zezeski needed_desc++; 2897da5577f0SRobert Mustacchi } 2898*8d5069bcSRyan Zezeski 2899*8d5069bcSRyan Zezeski if (!use_lso) { 2900*8d5069bcSRyan Zezeski tcbhead = i40e_non_lso_chain(itrq, mp, &needed_desc); 2901*8d5069bcSRyan Zezeski } else { 2902*8d5069bcSRyan Zezeski tcbhead = i40e_lso_chain(itrq, mp, &meo, &tctx, &needed_desc); 2903*8d5069bcSRyan Zezeski } 2904*8d5069bcSRyan Zezeski 2905*8d5069bcSRyan Zezeski if (tcbhead == NULL) 2906*8d5069bcSRyan Zezeski goto txfail; 2907*8d5069bcSRyan Zezeski 2908*8d5069bcSRyan Zezeski tcbhead->tcb_mp = mp; 2909da5577f0SRobert Mustacchi 2910da5577f0SRobert Mustacchi /* 2911*8d5069bcSRyan Zezeski * The second condition ensures that 'itrq_desc_tail' never 2912*8d5069bcSRyan Zezeski * equals 'itrq_desc_head'. This enforces the rule found in 2913*8d5069bcSRyan Zezeski * the second bullet point of section 8.4.3.1.5 of the XL710 2914*8d5069bcSRyan Zezeski * PG, which declares the TAIL pointer in I40E_QTX_TAIL should 2915*8d5069bcSRyan Zezeski * never overlap with the head. This means that we only ever 2916*8d5069bcSRyan Zezeski * have 'itrq_tx_ring_size - 1' total available descriptors. 2917da5577f0SRobert Mustacchi */ 2918da5577f0SRobert Mustacchi mutex_enter(&itrq->itrq_tx_lock); 2919*8d5069bcSRyan Zezeski if (itrq->itrq_desc_free < i40e->i40e_tx_block_thresh || 2920*8d5069bcSRyan Zezeski (itrq->itrq_desc_free - 1) < needed_desc) { 2921da5577f0SRobert Mustacchi txs->itxs_err_nodescs.value.ui64++; 2922da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tx_lock); 2923da5577f0SRobert Mustacchi goto txfail; 2924da5577f0SRobert Mustacchi } 2925da5577f0SRobert Mustacchi 2926*8d5069bcSRyan Zezeski if (do_ctx_desc) { 2927da5577f0SRobert Mustacchi /* 2928*8d5069bcSRyan Zezeski * If we're enabling any offloads for this frame, then we'll 2929*8d5069bcSRyan Zezeski * need to build up a transmit context descriptor, first. The 2930*8d5069bcSRyan Zezeski * context descriptor needs to be placed in the TX ring before 2931*8d5069bcSRyan Zezeski * the data descriptor(s). See section 8.4.2, table 8-16 2932da5577f0SRobert Mustacchi */ 2933*8d5069bcSRyan Zezeski uint_t tail = itrq->itrq_desc_tail; 2934da5577f0SRobert Mustacchi itrq->itrq_desc_free--; 2935*8d5069bcSRyan Zezeski ctxdesc = (i40e_tx_context_desc_t *)&itrq->itrq_desc_ring[tail]; 2936*8d5069bcSRyan Zezeski itrq->itrq_tcb_work_list[tail] = tcb_ctx; 2937*8d5069bcSRyan Zezeski itrq->itrq_desc_tail = i40e_next_desc(tail, 1, 2938da5577f0SRobert Mustacchi itrq->itrq_tx_ring_size); 2939da5577f0SRobert Mustacchi 2940*8d5069bcSRyan Zezeski /* QW0 */ 2941*8d5069bcSRyan Zezeski type = I40E_TX_DESC_DTYPE_CONTEXT; 2942*8d5069bcSRyan Zezeski ctxdesc->tunneling_params = 0; 2943*8d5069bcSRyan Zezeski ctxdesc->l2tag2 = 0; 2944*8d5069bcSRyan Zezeski 2945*8d5069bcSRyan Zezeski /* QW1 */ 2946*8d5069bcSRyan Zezeski ctxdesc->type_cmd_tso_mss = CPU_TO_LE64((uint64_t)type); 2947*8d5069bcSRyan Zezeski if (tctx.itc_ctx_cmdflags & I40E_TX_CTX_DESC_TSO) { 2948*8d5069bcSRyan Zezeski ctxdesc->type_cmd_tso_mss |= CPU_TO_LE64((uint64_t) 2949*8d5069bcSRyan Zezeski ((uint64_t)tctx.itc_ctx_cmdflags << 2950*8d5069bcSRyan Zezeski I40E_TXD_CTX_QW1_CMD_SHIFT) | 2951*8d5069bcSRyan Zezeski ((uint64_t)tctx.itc_ctx_tsolen << 2952*8d5069bcSRyan Zezeski I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) | 2953*8d5069bcSRyan Zezeski ((uint64_t)tctx.itc_ctx_mss << 2954*8d5069bcSRyan Zezeski I40E_TXD_CTX_QW1_MSS_SHIFT)); 2955*8d5069bcSRyan Zezeski } 2956*8d5069bcSRyan Zezeski } 2957*8d5069bcSRyan Zezeski 2958*8d5069bcSRyan Zezeski tcb = tcbhead; 2959*8d5069bcSRyan Zezeski while (tcb != NULL) { 2960*8d5069bcSRyan Zezeski 2961*8d5069bcSRyan Zezeski itrq->itrq_tcb_work_list[itrq->itrq_desc_tail] = tcb; 2962*8d5069bcSRyan Zezeski if (tcb->tcb_type == I40E_TX_COPY) { 2963*8d5069bcSRyan Zezeski boolean_t last_desc = (tcb->tcb_next == NULL); 2964*8d5069bcSRyan Zezeski 2965*8d5069bcSRyan Zezeski i40e_tx_set_data_desc(itrq, &tctx, 2966*8d5069bcSRyan Zezeski (caddr_t)tcb->tcb_dma.dmab_dma_address, 2967*8d5069bcSRyan Zezeski tcb->tcb_dma.dmab_len, last_desc); 2968*8d5069bcSRyan Zezeski } else { 2969*8d5069bcSRyan Zezeski boolean_t last_desc = B_FALSE; 2970*8d5069bcSRyan Zezeski ASSERT3S(tcb->tcb_type, ==, I40E_TX_DMA); 2971*8d5069bcSRyan Zezeski 2972*8d5069bcSRyan Zezeski for (uint_t c = 0; c < tcb->tcb_bind_ncookies; c++) { 2973*8d5069bcSRyan Zezeski last_desc = (c == tcb->tcb_bind_ncookies - 1) && 2974*8d5069bcSRyan Zezeski (tcb->tcb_next == NULL); 2975*8d5069bcSRyan Zezeski 2976*8d5069bcSRyan Zezeski i40e_tx_set_data_desc(itrq, &tctx, 2977*8d5069bcSRyan Zezeski tcb->tcb_bind_info[c].dbi_paddr, 2978*8d5069bcSRyan Zezeski tcb->tcb_bind_info[c].dbi_len, 2979*8d5069bcSRyan Zezeski last_desc); 2980*8d5069bcSRyan Zezeski } 2981*8d5069bcSRyan Zezeski } 2982*8d5069bcSRyan Zezeski 2983*8d5069bcSRyan Zezeski tcb = tcb->tcb_next; 2984*8d5069bcSRyan Zezeski } 2985da5577f0SRobert Mustacchi 2986da5577f0SRobert Mustacchi /* 2987da5577f0SRobert Mustacchi * Now, finally, sync the DMA data and alert hardware. 2988da5577f0SRobert Mustacchi */ 2989da5577f0SRobert Mustacchi I40E_DMA_SYNC(&itrq->itrq_desc_area, DDI_DMA_SYNC_FORDEV); 2990da5577f0SRobert Mustacchi 2991da5577f0SRobert Mustacchi I40E_WRITE_REG(hw, I40E_QTX_TAIL(itrq->itrq_index), 2992da5577f0SRobert Mustacchi itrq->itrq_desc_tail); 2993*8d5069bcSRyan Zezeski 2994da5577f0SRobert Mustacchi if (i40e_check_acc_handle(i40e->i40e_osdep_space.ios_reg_handle) != 2995da5577f0SRobert Mustacchi DDI_FM_OK) { 2996da5577f0SRobert Mustacchi /* 2997da5577f0SRobert Mustacchi * Note, we can't really go through and clean this up very well, 2998da5577f0SRobert Mustacchi * because the memory has been given to the device, so just 2999da5577f0SRobert Mustacchi * indicate it's been transmitted. 3000da5577f0SRobert Mustacchi */ 3001da5577f0SRobert Mustacchi ddi_fm_service_impact(i40e->i40e_dip, DDI_SERVICE_DEGRADED); 3002da5577f0SRobert Mustacchi atomic_or_32(&i40e->i40e_state, I40E_ERROR); 3003da5577f0SRobert Mustacchi } 3004da5577f0SRobert Mustacchi 3005*8d5069bcSRyan Zezeski txs->itxs_bytes.value.ui64 += msglen; 3006da5577f0SRobert Mustacchi txs->itxs_packets.value.ui64++; 3007*8d5069bcSRyan Zezeski txs->itxs_descriptors.value.ui64 += needed_desc; 3008da5577f0SRobert Mustacchi 3009da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tx_lock); 3010da5577f0SRobert Mustacchi 3011da5577f0SRobert Mustacchi return (NULL); 3012da5577f0SRobert Mustacchi 3013da5577f0SRobert Mustacchi txfail: 3014da5577f0SRobert Mustacchi /* 3015da5577f0SRobert Mustacchi * We ran out of resources. Return it to MAC and indicate that we'll 3016da5577f0SRobert Mustacchi * need to signal MAC. If there are allocated tcb's, return them now. 3017da5577f0SRobert Mustacchi * Make sure to reset their message block's, since we'll return them 3018da5577f0SRobert Mustacchi * back to MAC. 3019da5577f0SRobert Mustacchi */ 3020*8d5069bcSRyan Zezeski if (tcb_ctx != NULL) { 3021*8d5069bcSRyan Zezeski tcb_ctx->tcb_mp = NULL; 3022*8d5069bcSRyan Zezeski i40e_tcb_reset(tcb_ctx); 3023*8d5069bcSRyan Zezeski i40e_tcb_free(itrq, tcb_ctx); 3024*8d5069bcSRyan Zezeski } 3025*8d5069bcSRyan Zezeski 3026*8d5069bcSRyan Zezeski tcb = tcbhead; 3027*8d5069bcSRyan Zezeski while (tcb != NULL) { 3028*8d5069bcSRyan Zezeski i40e_tx_control_block_t *next = tcb->tcb_next; 3029*8d5069bcSRyan Zezeski 3030*8d5069bcSRyan Zezeski ASSERT(tcb->tcb_type == I40E_TX_DMA || 3031*8d5069bcSRyan Zezeski tcb->tcb_type == I40E_TX_COPY); 3032*8d5069bcSRyan Zezeski 3033da5577f0SRobert Mustacchi tcb->tcb_mp = NULL; 3034da5577f0SRobert Mustacchi i40e_tcb_reset(tcb); 3035da5577f0SRobert Mustacchi i40e_tcb_free(itrq, tcb); 3036*8d5069bcSRyan Zezeski tcb = next; 3037da5577f0SRobert Mustacchi } 3038da5577f0SRobert Mustacchi 3039da5577f0SRobert Mustacchi mutex_enter(&itrq->itrq_tx_lock); 3040da5577f0SRobert Mustacchi itrq->itrq_tx_blocked = B_TRUE; 3041da5577f0SRobert Mustacchi mutex_exit(&itrq->itrq_tx_lock); 3042da5577f0SRobert Mustacchi 3043da5577f0SRobert Mustacchi return (mp); 3044da5577f0SRobert Mustacchi } 3045