1c869993eSxy150489 /* 2c869993eSxy150489 * CDDL HEADER START 3c869993eSxy150489 * 4c869993eSxy150489 * The contents of this file are subject to the terms of the 5c869993eSxy150489 * Common Development and Distribution License (the "License"). 6c869993eSxy150489 * You may not use this file except in compliance with the License. 7c869993eSxy150489 * 80dc2366fSVenugopal Iyer * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 90dc2366fSVenugopal Iyer * or http://www.opensolaris.org/os/licensing. 10c869993eSxy150489 * See the License for the specific language governing permissions 11c869993eSxy150489 * and limitations under the License. 12c869993eSxy150489 * 130dc2366fSVenugopal Iyer * When distributing Covered Code, include this CDDL HEADER in each 140dc2366fSVenugopal Iyer * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15c869993eSxy150489 * If applicable, add the following below this CDDL HEADER, with the 16c869993eSxy150489 * fields enclosed by brackets "[]" replaced with your own identifying 17c869993eSxy150489 * information: Portions Copyright [yyyy] [name of copyright owner] 18c869993eSxy150489 * 19c869993eSxy150489 * CDDL HEADER END 20c869993eSxy150489 */ 21c869993eSxy150489 22c869993eSxy150489 /* 2369b2d733SGuoqing Zhu * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 2469b2d733SGuoqing Zhu */ 2569b2d733SGuoqing Zhu 2669b2d733SGuoqing Zhu /* 2769b2d733SGuoqing Zhu * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28da14cebeSEric Cheng */ 29c869993eSxy150489 30c869993eSxy150489 #include "igb_sw.h" 31c869993eSxy150489 32c869993eSxy150489 static boolean_t igb_tx(igb_tx_ring_t *, mblk_t *); 33c869993eSxy150489 static int igb_tx_copy(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 34fa25784cSxy150489 uint32_t, boolean_t); 35c869993eSxy150489 static int igb_tx_bind(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 36c869993eSxy150489 uint32_t); 37d11274aaSPaul Guo static int igb_tx_fill_ring(igb_tx_ring_t *, link_list_t *, tx_context_t *, 38d11274aaSPaul Guo size_t); 39c869993eSxy150489 static void igb_save_desc(tx_control_block_t *, uint64_t, size_t); 40c869993eSxy150489 static tx_control_block_t *igb_get_free_list(igb_tx_ring_t *); 41d11274aaSPaul Guo static int igb_get_tx_context(mblk_t *, tx_context_t *); 42d11274aaSPaul Guo static boolean_t igb_check_tx_context(igb_tx_ring_t *, tx_context_t *); 43d11274aaSPaul Guo static void igb_fill_tx_context(struct e1000_adv_tx_context_desc *, 44d11274aaSPaul Guo tx_context_t *, uint32_t); 45c869993eSxy150489 46c869993eSxy150489 #ifndef IGB_DEBUG 47c869993eSxy150489 #pragma inline(igb_save_desc) 48d11274aaSPaul Guo #pragma inline(igb_get_tx_context) 49d11274aaSPaul Guo #pragma inline(igb_check_tx_context) 50d11274aaSPaul Guo #pragma inline(igb_fill_tx_context) 51c869993eSxy150489 #endif 52c869993eSxy150489 53c869993eSxy150489 mblk_t * 54da14cebeSEric Cheng igb_tx_ring_send(void *arg, mblk_t *mp) 55c869993eSxy150489 { 56da14cebeSEric Cheng igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg; 57b607c8a3SKeith M Wesolowski igb_t *igb; 58c869993eSxy150489 59da14cebeSEric Cheng ASSERT(tx_ring != NULL); 60c869993eSxy150489 61b607c8a3SKeith M Wesolowski igb = tx_ring->igb; 62b607c8a3SKeith M Wesolowski 63b607c8a3SKeith M Wesolowski if ((igb->igb_state & IGB_SUSPENDED) || 64b607c8a3SKeith M Wesolowski (igb->igb_state & IGB_ERROR) || 65b607c8a3SKeith M Wesolowski !(igb->igb_state & IGB_STARTED) || 66b607c8a3SKeith M Wesolowski igb->link_state != LINK_STATE_UP) { 67cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China freemsg(mp); 68cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China return (NULL); 69cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China } 70cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China 71da14cebeSEric Cheng return ((igb_tx(tx_ring, mp)) ? NULL : mp); 72c869993eSxy150489 } 73c869993eSxy150489 74c869993eSxy150489 /* 75c869993eSxy150489 * igb_tx - Main transmit processing 76c869993eSxy150489 * 77c869993eSxy150489 * Called from igb_m_tx with an mblk ready to transmit. this 78c869993eSxy150489 * routine sets up the transmit descriptors and sends data to 79c869993eSxy150489 * the wire. 80c869993eSxy150489 * 81c869993eSxy150489 * One mblk can consist of several fragments, each fragment 82c869993eSxy150489 * will be processed with different methods based on the size. 83c869993eSxy150489 * For the fragments with size less than the bcopy threshold, 84c869993eSxy150489 * they will be processed by using bcopy; otherwise, they will 85c869993eSxy150489 * be processed by using DMA binding. 86c869993eSxy150489 * 87c869993eSxy150489 * To process the mblk, a tx control block is got from the 88c869993eSxy150489 * free list. One tx control block contains one tx buffer, which 89c869993eSxy150489 * is used to copy mblk fragments' data; and one tx DMA handle, 90c869993eSxy150489 * which is used to bind a mblk fragment with DMA resource. 91c869993eSxy150489 * 92c869993eSxy150489 * Several small mblk fragments can be copied into one tx control 93c869993eSxy150489 * block's buffer, and then the buffer will be transmitted with 94c869993eSxy150489 * one tx descriptor. 95c869993eSxy150489 * 96c869993eSxy150489 * A large fragment only binds with one tx control block's DMA 97c869993eSxy150489 * handle, and it can span several tx descriptors for transmitting. 98c869993eSxy150489 * 99c869993eSxy150489 * So to transmit a packet (mblk), several tx control blocks can 100c869993eSxy150489 * be used. After the processing, those tx control blocks will 101c869993eSxy150489 * be put to the work list. 102c869993eSxy150489 */ 103c869993eSxy150489 static boolean_t 104c869993eSxy150489 igb_tx(igb_tx_ring_t *tx_ring, mblk_t *mp) 105c869993eSxy150489 { 106c869993eSxy150489 igb_t *igb = tx_ring->igb; 107c869993eSxy150489 tx_type_t current_flag, next_flag; 108c869993eSxy150489 uint32_t current_len, next_len; 109c869993eSxy150489 uint32_t desc_total; 110c869993eSxy150489 size_t mbsize; 111c869993eSxy150489 int desc_num; 112c869993eSxy150489 boolean_t copy_done, eop; 113c869993eSxy150489 mblk_t *current_mp, *next_mp, *nmp; 114c869993eSxy150489 tx_control_block_t *tcb; 115d11274aaSPaul Guo tx_context_t tx_context, *ctx; 116c869993eSxy150489 link_list_t pending_list; 117ac7f5757Schenlu chen - Sun Microsystems - Beijing China mblk_t *hdr_new_mp = NULL; 118ac7f5757Schenlu chen - Sun Microsystems - Beijing China mblk_t *hdr_previous_mp = NULL; 119ac7f5757Schenlu chen - Sun Microsystems - Beijing China mblk_t *hdr_current_mp = NULL; 120d11274aaSPaul Guo uint32_t hdr_frag_len; 121d11274aaSPaul Guo uint32_t hdr_len, len; 122d11274aaSPaul Guo uint32_t copy_thresh; 123d11274aaSPaul Guo 124ac7f5757Schenlu chen - Sun Microsystems - Beijing China copy_thresh = igb->tx_copy_thresh; 125c869993eSxy150489 126c869993eSxy150489 /* Get the mblk size */ 127c869993eSxy150489 mbsize = 0; 128c869993eSxy150489 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 129d11274aaSPaul Guo mbsize += MBLKL(nmp); 130c869993eSxy150489 } 131c869993eSxy150489 132d11274aaSPaul Guo if (igb->tx_hcksum_enable) { 133d11274aaSPaul Guo ctx = &tx_context; 134c869993eSxy150489 /* 135d11274aaSPaul Guo * Retrieve offloading context information from the mblk 136d11274aaSPaul Guo * that will be used to decide whether/how to fill the 137d11274aaSPaul Guo * context descriptor. 138c869993eSxy150489 */ 139d11274aaSPaul Guo if (igb_get_tx_context(mp, ctx) != TX_CXT_SUCCESS) { 140d11274aaSPaul Guo freemsg(mp); 141d11274aaSPaul Guo return (B_TRUE); 142d11274aaSPaul Guo } 143d11274aaSPaul Guo 144d11274aaSPaul Guo if ((ctx->lso_flag && 145d11274aaSPaul Guo (mbsize > (ctx->mac_hdr_len + IGB_LSO_MAXLEN))) || 146d11274aaSPaul Guo (!ctx->lso_flag && 147d11274aaSPaul Guo (mbsize > (igb->max_frame_size - ETHERFCSL)))) { 148d11274aaSPaul Guo freemsg(mp); 149*e5513923SYuri Pankov igb_log(igb, IGB_LOG_INFO, "igb_tx: packet oversize"); 150d11274aaSPaul Guo return (B_TRUE); 151d11274aaSPaul Guo } 152d11274aaSPaul Guo } else { 153d11274aaSPaul Guo ctx = NULL; 154c869993eSxy150489 if (mbsize > (igb->max_frame_size - ETHERFCSL)) { 155c869993eSxy150489 freemsg(mp); 156*e5513923SYuri Pankov igb_log(igb, IGB_LOG_INFO, "igb_tx: packet oversize"); 157c869993eSxy150489 return (B_TRUE); 158c869993eSxy150489 } 159d11274aaSPaul Guo } 160c869993eSxy150489 161c869993eSxy150489 /* 162c869993eSxy150489 * Check and recycle tx descriptors. 163c869993eSxy150489 * The recycle threshold here should be selected carefully 164c869993eSxy150489 */ 165ac7f5757Schenlu chen - Sun Microsystems - Beijing China if (tx_ring->tbd_free < igb->tx_recycle_thresh) 166c869993eSxy150489 tx_ring->tx_recycle(tx_ring); 167c869993eSxy150489 168c869993eSxy150489 /* 169c869993eSxy150489 * After the recycling, if the tbd_free is less than the 170ac7f5757Schenlu chen - Sun Microsystems - Beijing China * tx_overload_threshold, assert overload, return B_FALSE; 171c869993eSxy150489 * and we need to re-schedule the tx again. 172c869993eSxy150489 */ 173ac7f5757Schenlu chen - Sun Microsystems - Beijing China if (tx_ring->tbd_free < igb->tx_overload_thresh) { 174c869993eSxy150489 tx_ring->reschedule = B_TRUE; 175c869993eSxy150489 IGB_DEBUG_STAT(tx_ring->stat_overload); 176c869993eSxy150489 return (B_FALSE); 177c869993eSxy150489 } 178c869993eSxy150489 179c869993eSxy150489 /* 180d11274aaSPaul Guo * The software should guarantee LSO packet header(MAC+IP+TCP) 181d11274aaSPaul Guo * to be within one descriptor - this is required by h/w. 182d11274aaSPaul Guo * Here will reallocate and refill the header if 183d11274aaSPaul Guo * the headers(MAC+IP+TCP) is physical memory non-contiguous. 184d11274aaSPaul Guo */ 185d11274aaSPaul Guo if (ctx && ctx->lso_flag) { 186ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_len = ctx->mac_hdr_len + ctx->ip_hdr_len + ctx->l4_hdr_len; 187d11274aaSPaul Guo len = MBLKL(mp); 188ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_current_mp = mp; 189d11274aaSPaul Guo while (len < hdr_len) { 190ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_previous_mp = hdr_current_mp; 191ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_current_mp = hdr_current_mp->b_cont; 192ac7f5757Schenlu chen - Sun Microsystems - Beijing China len += MBLKL(hdr_current_mp); 193d11274aaSPaul Guo } 194d11274aaSPaul Guo /* 195ac7f5757Schenlu chen - Sun Microsystems - Beijing China * If the header and the payload are in different mblks, 196ac7f5757Schenlu chen - Sun Microsystems - Beijing China * we simply force the header to be copied into pre-allocated 197ac7f5757Schenlu chen - Sun Microsystems - Beijing China * page-aligned buffer. 198d11274aaSPaul Guo */ 199ac7f5757Schenlu chen - Sun Microsystems - Beijing China if (len == hdr_len) 200ac7f5757Schenlu chen - Sun Microsystems - Beijing China goto adjust_threshold; 201d11274aaSPaul Guo 202ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_frag_len = hdr_len - (len - MBLKL(hdr_current_mp)); 203d11274aaSPaul Guo /* 204d11274aaSPaul Guo * There are two cases we will reallocate 205d11274aaSPaul Guo * a mblk for the last header fragment. 206d11274aaSPaul Guo * 1. the header is in multiple mblks and 207d11274aaSPaul Guo * the last fragment shares the same mblk 208d11274aaSPaul Guo * with the payload 209d11274aaSPaul Guo * 2. the header is in a single mblk shared 210d11274aaSPaul Guo * with the payload but the header crosses 211d11274aaSPaul Guo * a page. 212d11274aaSPaul Guo */ 213ac7f5757Schenlu chen - Sun Microsystems - Beijing China if ((hdr_current_mp != mp) || 214ac7f5757Schenlu chen - Sun Microsystems - Beijing China (P2NPHASE((uintptr_t)hdr_current_mp->b_rptr, igb->page_size) 215ac7f5757Schenlu chen - Sun Microsystems - Beijing China < hdr_len)) { 216d11274aaSPaul Guo /* 217ac7f5757Schenlu chen - Sun Microsystems - Beijing China * reallocate the mblk for the last header fragment, 218ac7f5757Schenlu chen - Sun Microsystems - Beijing China * expect it to be copied into pre-allocated 219d11274aaSPaul Guo * page-aligned buffer 220d11274aaSPaul Guo */ 221ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_new_mp = allocb(hdr_frag_len, NULL); 222ac7f5757Schenlu chen - Sun Microsystems - Beijing China if (!hdr_new_mp) { 223d11274aaSPaul Guo return (B_FALSE); 224d11274aaSPaul Guo } 225d11274aaSPaul Guo 226ac7f5757Schenlu chen - Sun Microsystems - Beijing China /* link the new header fragment with the other parts */ 227ac7f5757Schenlu chen - Sun Microsystems - Beijing China bcopy(hdr_current_mp->b_rptr, 228ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_new_mp->b_rptr, hdr_frag_len); 229ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 230ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_new_mp->b_cont = hdr_current_mp; 231ac7f5757Schenlu chen - Sun Microsystems - Beijing China if (hdr_previous_mp) 232ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_previous_mp->b_cont = hdr_new_mp; 233d11274aaSPaul Guo else 234ac7f5757Schenlu chen - Sun Microsystems - Beijing China mp = hdr_new_mp; 235ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_current_mp->b_rptr += hdr_frag_len; 236d11274aaSPaul Guo } 237ac7f5757Schenlu chen - Sun Microsystems - Beijing China adjust_threshold: 238ac7f5757Schenlu chen - Sun Microsystems - Beijing China /* 239ac7f5757Schenlu chen - Sun Microsystems - Beijing China * adjust the bcopy threshhold to guarantee 240ac7f5757Schenlu chen - Sun Microsystems - Beijing China * the header to use bcopy way 241ac7f5757Schenlu chen - Sun Microsystems - Beijing China */ 242d11274aaSPaul Guo if (copy_thresh < hdr_len) 243d11274aaSPaul Guo copy_thresh = hdr_len; 244d11274aaSPaul Guo } 245d11274aaSPaul Guo 246d11274aaSPaul Guo /* 247c869993eSxy150489 * The pending_list is a linked list that is used to save 248c869993eSxy150489 * the tx control blocks that have packet data processed 249c869993eSxy150489 * but have not put the data to the tx descriptor ring. 250c869993eSxy150489 * It is used to reduce the lock contention of the tx_lock. 251c869993eSxy150489 */ 252c869993eSxy150489 LINK_LIST_INIT(&pending_list); 253c869993eSxy150489 desc_num = 0; 254c869993eSxy150489 desc_total = 0; 255c869993eSxy150489 256c869993eSxy150489 current_mp = mp; 257d11274aaSPaul Guo current_len = MBLKL(current_mp); 258c869993eSxy150489 /* 259c869993eSxy150489 * Decide which method to use for the first fragment 260c869993eSxy150489 */ 261d11274aaSPaul Guo current_flag = (current_len <= copy_thresh) ? 262c869993eSxy150489 USE_COPY : USE_DMA; 263c869993eSxy150489 /* 264c869993eSxy150489 * If the mblk includes several contiguous small fragments, 265c869993eSxy150489 * they may be copied into one buffer. This flag is used to 266c869993eSxy150489 * indicate whether there are pending fragments that need to 267c869993eSxy150489 * be copied to the current tx buffer. 268c869993eSxy150489 * 269c869993eSxy150489 * If this flag is B_TRUE, it indicates that a new tx control 270c869993eSxy150489 * block is needed to process the next fragment using either 271c869993eSxy150489 * copy or DMA binding. 272c869993eSxy150489 * 273c869993eSxy150489 * Otherwise, it indicates that the next fragment will be 274c869993eSxy150489 * copied to the current tx buffer that is maintained by the 275c869993eSxy150489 * current tx control block. No new tx control block is needed. 276c869993eSxy150489 */ 277c869993eSxy150489 copy_done = B_TRUE; 278c869993eSxy150489 while (current_mp) { 279c869993eSxy150489 next_mp = current_mp->b_cont; 280c869993eSxy150489 eop = (next_mp == NULL); /* Last fragment of the packet? */ 281d11274aaSPaul Guo next_len = eop ? 0: MBLKL(next_mp); 282c869993eSxy150489 283c869993eSxy150489 /* 284c869993eSxy150489 * When the current fragment is an empty fragment, if 285c869993eSxy150489 * the next fragment will still be copied to the current 286c869993eSxy150489 * tx buffer, we cannot skip this fragment here. Because 287c869993eSxy150489 * the copy processing is pending for completion. We have 288c869993eSxy150489 * to process this empty fragment in the tx_copy routine. 289c869993eSxy150489 * 290c869993eSxy150489 * If the copy processing is completed or a DMA binding 291c869993eSxy150489 * processing is just completed, we can just skip this 292c869993eSxy150489 * empty fragment. 293c869993eSxy150489 */ 294c869993eSxy150489 if ((current_len == 0) && (copy_done)) { 295c869993eSxy150489 current_mp = next_mp; 296c869993eSxy150489 current_len = next_len; 297d11274aaSPaul Guo current_flag = (current_len <= copy_thresh) ? 298c869993eSxy150489 USE_COPY : USE_DMA; 299c869993eSxy150489 continue; 300c869993eSxy150489 } 301c869993eSxy150489 302c869993eSxy150489 if (copy_done) { 303c869993eSxy150489 /* 304c869993eSxy150489 * Get a new tx control block from the free list 305c869993eSxy150489 */ 306c869993eSxy150489 tcb = igb_get_free_list(tx_ring); 307c869993eSxy150489 308c869993eSxy150489 if (tcb == NULL) { 309c869993eSxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 310c869993eSxy150489 goto tx_failure; 311c869993eSxy150489 } 312c869993eSxy150489 313c869993eSxy150489 /* 314c869993eSxy150489 * Push the tx control block to the pending list 315c869993eSxy150489 * to avoid using lock too early 316c869993eSxy150489 */ 317c869993eSxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 318c869993eSxy150489 } 319c869993eSxy150489 320c869993eSxy150489 if (current_flag == USE_COPY) { 321c869993eSxy150489 /* 322c869993eSxy150489 * Check whether to use bcopy or DMA binding to process 323c869993eSxy150489 * the next fragment, and if using bcopy, whether we 324c869993eSxy150489 * need to continue copying the next fragment into the 325c869993eSxy150489 * current tx buffer. 326c869993eSxy150489 */ 327c869993eSxy150489 ASSERT((tcb->tx_buf.len + current_len) <= 328c869993eSxy150489 tcb->tx_buf.size); 329c869993eSxy150489 330c869993eSxy150489 if (eop) { 331c869993eSxy150489 /* 332c869993eSxy150489 * This is the last fragment of the packet, so 333c869993eSxy150489 * the copy processing will be completed with 334c869993eSxy150489 * this fragment. 335c869993eSxy150489 */ 336c869993eSxy150489 next_flag = USE_NONE; 337c869993eSxy150489 copy_done = B_TRUE; 338c869993eSxy150489 } else if ((tcb->tx_buf.len + current_len + next_len) > 339c869993eSxy150489 tcb->tx_buf.size) { 340c869993eSxy150489 /* 341c869993eSxy150489 * If the next fragment is too large to be 342c869993eSxy150489 * copied to the current tx buffer, we need 343c869993eSxy150489 * to complete the current copy processing. 344c869993eSxy150489 */ 345d11274aaSPaul Guo next_flag = (next_len > copy_thresh) ? 346c869993eSxy150489 USE_DMA: USE_COPY; 347c869993eSxy150489 copy_done = B_TRUE; 348d11274aaSPaul Guo } else if (next_len > copy_thresh) { 349c869993eSxy150489 /* 350c869993eSxy150489 * The next fragment needs to be processed with 351c869993eSxy150489 * DMA binding. So the copy prcessing will be 352c869993eSxy150489 * completed with the current fragment. 353c869993eSxy150489 */ 354c869993eSxy150489 next_flag = USE_DMA; 355c869993eSxy150489 copy_done = B_TRUE; 356c869993eSxy150489 } else { 357c869993eSxy150489 /* 358c869993eSxy150489 * Continue to copy the next fragment to the 359c869993eSxy150489 * current tx buffer. 360c869993eSxy150489 */ 361c869993eSxy150489 next_flag = USE_COPY; 362c869993eSxy150489 copy_done = B_FALSE; 363c869993eSxy150489 } 364c869993eSxy150489 365c869993eSxy150489 desc_num = igb_tx_copy(tx_ring, tcb, current_mp, 366fa25784cSxy150489 current_len, copy_done); 367c869993eSxy150489 } else { 368c869993eSxy150489 /* 369c869993eSxy150489 * Check whether to use bcopy or DMA binding to process 370c869993eSxy150489 * the next fragment. 371c869993eSxy150489 */ 372d11274aaSPaul Guo next_flag = (next_len > copy_thresh) ? 373c869993eSxy150489 USE_DMA: USE_COPY; 374c869993eSxy150489 ASSERT(copy_done == B_TRUE); 375c869993eSxy150489 376c869993eSxy150489 desc_num = igb_tx_bind(tx_ring, tcb, current_mp, 377c869993eSxy150489 current_len); 378c869993eSxy150489 } 379c869993eSxy150489 380c869993eSxy150489 if (desc_num > 0) 381c869993eSxy150489 desc_total += desc_num; 382c869993eSxy150489 else if (desc_num < 0) 383c869993eSxy150489 goto tx_failure; 384c869993eSxy150489 385c869993eSxy150489 current_mp = next_mp; 386c869993eSxy150489 current_len = next_len; 387c869993eSxy150489 current_flag = next_flag; 388c869993eSxy150489 } 389c869993eSxy150489 390c869993eSxy150489 /* 391c869993eSxy150489 * Attach the mblk to the last tx control block 392c869993eSxy150489 */ 393c869993eSxy150489 ASSERT(tcb); 394c869993eSxy150489 ASSERT(tcb->mp == NULL); 395c869993eSxy150489 tcb->mp = mp; 396c869993eSxy150489 397c869993eSxy150489 /* 398c869993eSxy150489 * Before fill the tx descriptor ring with the data, we need to 399c869993eSxy150489 * ensure there are adequate free descriptors for transmit 400c869993eSxy150489 * (including one context descriptor). 40169b2d733SGuoqing Zhu * Do not use up all the tx descriptors. 40269b2d733SGuoqing Zhu * Otherwise tx recycle will fail and cause false hang. 403c869993eSxy150489 */ 40469b2d733SGuoqing Zhu if (tx_ring->tbd_free <= (desc_total + 1)) { 405c869993eSxy150489 tx_ring->tx_recycle(tx_ring); 406c869993eSxy150489 } 407c869993eSxy150489 408c869993eSxy150489 mutex_enter(&tx_ring->tx_lock); 409c869993eSxy150489 410c869993eSxy150489 /* 411c869993eSxy150489 * If the number of free tx descriptors is not enough for transmit 412c869993eSxy150489 * then return failure. 413c869993eSxy150489 * 414c869993eSxy150489 * Note: we must put this check under the mutex protection to 415c869993eSxy150489 * ensure the correctness when multiple threads access it in 416c869993eSxy150489 * parallel. 417c869993eSxy150489 */ 41869b2d733SGuoqing Zhu if (tx_ring->tbd_free <= (desc_total + 1)) { 419c869993eSxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 420c869993eSxy150489 mutex_exit(&tx_ring->tx_lock); 421c869993eSxy150489 goto tx_failure; 422c869993eSxy150489 } 423c869993eSxy150489 424d11274aaSPaul Guo desc_num = igb_tx_fill_ring(tx_ring, &pending_list, ctx, mbsize); 425c869993eSxy150489 426c869993eSxy150489 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 427c869993eSxy150489 4280dc2366fSVenugopal Iyer /* Update per-ring tx statistics */ 4290dc2366fSVenugopal Iyer tx_ring->tx_pkts++; 4300dc2366fSVenugopal Iyer tx_ring->tx_bytes += mbsize; 4310dc2366fSVenugopal Iyer 432c869993eSxy150489 mutex_exit(&tx_ring->tx_lock); 433c869993eSxy150489 434c869993eSxy150489 return (B_TRUE); 435c869993eSxy150489 436c869993eSxy150489 tx_failure: 437c869993eSxy150489 /* 438ac7f5757Schenlu chen - Sun Microsystems - Beijing China * If new mblk has been allocted for the last header 439ac7f5757Schenlu chen - Sun Microsystems - Beijing China * fragment of a LSO packet, we should restore the 440ac7f5757Schenlu chen - Sun Microsystems - Beijing China * modified mp. 441ac7f5757Schenlu chen - Sun Microsystems - Beijing China */ 442ac7f5757Schenlu chen - Sun Microsystems - Beijing China if (hdr_new_mp) { 443ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_new_mp->b_cont = NULL; 444ac7f5757Schenlu chen - Sun Microsystems - Beijing China freeb(hdr_new_mp); 445ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_current_mp->b_rptr -= hdr_frag_len; 446ac7f5757Schenlu chen - Sun Microsystems - Beijing China if (hdr_previous_mp) 447ac7f5757Schenlu chen - Sun Microsystems - Beijing China hdr_previous_mp->b_cont = hdr_current_mp; 448ac7f5757Schenlu chen - Sun Microsystems - Beijing China else 449ac7f5757Schenlu chen - Sun Microsystems - Beijing China mp = hdr_current_mp; 450ac7f5757Schenlu chen - Sun Microsystems - Beijing China } 451ac7f5757Schenlu chen - Sun Microsystems - Beijing China 452ac7f5757Schenlu chen - Sun Microsystems - Beijing China /* 453c869993eSxy150489 * Discard the mblk and free the used resources 454c869993eSxy150489 */ 455c869993eSxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 456c869993eSxy150489 while (tcb) { 457c869993eSxy150489 tcb->mp = NULL; 458c869993eSxy150489 459c869993eSxy150489 igb_free_tcb(tcb); 460c869993eSxy150489 461c869993eSxy150489 tcb = (tx_control_block_t *) 462c869993eSxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 463c869993eSxy150489 } 464c869993eSxy150489 465c869993eSxy150489 /* 466c869993eSxy150489 * Return the tx control blocks in the pending list to the free list. 467c869993eSxy150489 */ 468c869993eSxy150489 igb_put_free_list(tx_ring, &pending_list); 469c869993eSxy150489 470c869993eSxy150489 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 471c869993eSxy150489 tx_ring->reschedule = B_TRUE; 472c869993eSxy150489 473c869993eSxy150489 return (B_FALSE); 474c869993eSxy150489 } 475c869993eSxy150489 476c869993eSxy150489 /* 477c869993eSxy150489 * igb_tx_copy 478c869993eSxy150489 * 479c869993eSxy150489 * Copy the mblk fragment to the pre-allocated tx buffer 480c869993eSxy150489 */ 481c869993eSxy150489 static int 482c869993eSxy150489 igb_tx_copy(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 483fa25784cSxy150489 uint32_t len, boolean_t copy_done) 484c869993eSxy150489 { 485c869993eSxy150489 dma_buffer_t *tx_buf; 486c869993eSxy150489 uint32_t desc_num; 487c869993eSxy150489 _NOTE(ARGUNUSED(tx_ring)); 488c869993eSxy150489 489c869993eSxy150489 tx_buf = &tcb->tx_buf; 490c869993eSxy150489 491c869993eSxy150489 /* 492c869993eSxy150489 * Copy the packet data of the mblk fragment into the 493c869993eSxy150489 * pre-allocated tx buffer, which is maintained by the 494c869993eSxy150489 * tx control block. 495c869993eSxy150489 * 496c869993eSxy150489 * Several mblk fragments can be copied into one tx buffer. 497c869993eSxy150489 * The destination address of the current copied fragment in 498c869993eSxy150489 * the tx buffer is next to the end of the previous copied 499c869993eSxy150489 * fragment. 500c869993eSxy150489 */ 501c869993eSxy150489 if (len > 0) { 502c869993eSxy150489 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 503c869993eSxy150489 504c869993eSxy150489 tx_buf->len += len; 505c869993eSxy150489 tcb->frag_num++; 506c869993eSxy150489 } 507c869993eSxy150489 508c869993eSxy150489 desc_num = 0; 509c869993eSxy150489 510c869993eSxy150489 /* 511c869993eSxy150489 * If it is the last fragment copied to the current tx buffer, 512c869993eSxy150489 * in other words, if there's no remaining fragment or the remaining 513c869993eSxy150489 * fragment requires a new tx control block to process, we need to 514c869993eSxy150489 * complete the current copy processing by syncing up the current 515c869993eSxy150489 * DMA buffer and saving the descriptor data. 516c869993eSxy150489 */ 517c869993eSxy150489 if (copy_done) { 518c869993eSxy150489 /* 519c869993eSxy150489 * Sync the DMA buffer of the packet data 520c869993eSxy150489 */ 521c869993eSxy150489 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 522c869993eSxy150489 523c869993eSxy150489 tcb->tx_type = USE_COPY; 524c869993eSxy150489 525c869993eSxy150489 /* 526c869993eSxy150489 * Save the address and length to the private data structure 527c869993eSxy150489 * of the tx control block, which will be used to fill the 528c869993eSxy150489 * tx descriptor ring after all the fragments are processed. 529c869993eSxy150489 */ 530c869993eSxy150489 igb_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 531c869993eSxy150489 desc_num++; 532c869993eSxy150489 } 533c869993eSxy150489 534c869993eSxy150489 return (desc_num); 535c869993eSxy150489 } 536c869993eSxy150489 537c869993eSxy150489 /* 538c869993eSxy150489 * igb_tx_bind 539c869993eSxy150489 * 540c869993eSxy150489 * Bind the mblk fragment with DMA 541c869993eSxy150489 */ 542c869993eSxy150489 static int 543c869993eSxy150489 igb_tx_bind(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 544c869993eSxy150489 uint32_t len) 545c869993eSxy150489 { 546c869993eSxy150489 int status, i; 547c869993eSxy150489 ddi_dma_cookie_t dma_cookie; 548c869993eSxy150489 uint_t ncookies; 549c869993eSxy150489 int desc_num; 550c869993eSxy150489 551c869993eSxy150489 /* 552c869993eSxy150489 * Use DMA binding to process the mblk fragment 553c869993eSxy150489 */ 554c869993eSxy150489 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 555c869993eSxy150489 (caddr_t)mp->b_rptr, len, 556c869993eSxy150489 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 557c869993eSxy150489 0, &dma_cookie, &ncookies); 558c869993eSxy150489 559c869993eSxy150489 if (status != DDI_DMA_MAPPED) { 560c869993eSxy150489 IGB_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 561c869993eSxy150489 return (-1); 562c869993eSxy150489 } 563c869993eSxy150489 564c869993eSxy150489 tcb->frag_num++; 565c869993eSxy150489 tcb->tx_type = USE_DMA; 566c869993eSxy150489 /* 567c869993eSxy150489 * Each fragment can span several cookies. One cookie will have 568c869993eSxy150489 * one tx descriptor to transmit. 569c869993eSxy150489 */ 570c869993eSxy150489 desc_num = 0; 571c869993eSxy150489 for (i = ncookies; i > 0; i--) { 572c869993eSxy150489 /* 573c869993eSxy150489 * Save the address and length to the private data structure 574c869993eSxy150489 * of the tx control block, which will be used to fill the 575c869993eSxy150489 * tx descriptor ring after all the fragments are processed. 576c869993eSxy150489 */ 577c869993eSxy150489 igb_save_desc(tcb, 578c869993eSxy150489 dma_cookie.dmac_laddress, 579c869993eSxy150489 dma_cookie.dmac_size); 580c869993eSxy150489 581c869993eSxy150489 desc_num++; 582c869993eSxy150489 583c869993eSxy150489 if (i > 1) 584c869993eSxy150489 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 585c869993eSxy150489 } 586c869993eSxy150489 587c869993eSxy150489 return (desc_num); 588c869993eSxy150489 } 589c869993eSxy150489 590c869993eSxy150489 /* 591d11274aaSPaul Guo * igb_get_tx_context 592c869993eSxy150489 * 593d11274aaSPaul Guo * Get the tx context information from the mblk 594c869993eSxy150489 */ 595d11274aaSPaul Guo static int 596d11274aaSPaul Guo igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) 597c869993eSxy150489 { 598c869993eSxy150489 uint32_t start; 599c869993eSxy150489 uint32_t flags; 600d11274aaSPaul Guo uint32_t lso_flag; 601d11274aaSPaul Guo uint32_t mss; 602c869993eSxy150489 uint32_t len; 603c869993eSxy150489 uint32_t size; 604c869993eSxy150489 uint32_t offset; 605c869993eSxy150489 unsigned char *pos; 606c869993eSxy150489 ushort_t etype; 607c869993eSxy150489 uint32_t mac_hdr_len; 608c869993eSxy150489 uint32_t l4_proto; 609d11274aaSPaul Guo uint32_t l4_hdr_len; 610c869993eSxy150489 611c869993eSxy150489 ASSERT(mp != NULL); 612c869993eSxy150489 6130dc2366fSVenugopal Iyer mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags); 614d11274aaSPaul Guo bzero(ctx, sizeof (tx_context_t)); 615c869993eSxy150489 616d11274aaSPaul Guo ctx->hcksum_flags = flags; 617c869993eSxy150489 618c869993eSxy150489 if (flags == 0) 619d11274aaSPaul Guo return (TX_CXT_SUCCESS); 620d11274aaSPaul Guo 6210dc2366fSVenugopal Iyer mac_lso_get(mp, &mss, &lso_flag); 622d11274aaSPaul Guo ctx->mss = mss; 623d11274aaSPaul Guo ctx->lso_flag = (lso_flag == HW_LSO); 624d11274aaSPaul Guo 625d11274aaSPaul Guo /* 626d11274aaSPaul Guo * LSO relies on tx h/w checksum, so here the packet will be 627d11274aaSPaul Guo * dropped if the h/w checksum flags are not set. 628d11274aaSPaul Guo */ 629d11274aaSPaul Guo if (ctx->lso_flag) { 630d11274aaSPaul Guo if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 631d11274aaSPaul Guo (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 632*e5513923SYuri Pankov igb_log(NULL, IGB_LOG_INFO, "igb_tx: h/w " 633d11274aaSPaul Guo "checksum flags are not set for LSO"); 634d11274aaSPaul Guo return (TX_CXT_E_LSO_CSUM); 635d11274aaSPaul Guo } 636d11274aaSPaul Guo } 637c869993eSxy150489 638c869993eSxy150489 etype = 0; 639c869993eSxy150489 mac_hdr_len = 0; 640c869993eSxy150489 l4_proto = 0; 641c869993eSxy150489 642c869993eSxy150489 /* 643c869993eSxy150489 * Firstly get the position of the ether_type/ether_tpid. 644c869993eSxy150489 * Here we don't assume the ether (VLAN) header is fully included 645c869993eSxy150489 * in one mblk fragment, so we go thourgh the fragments to parse 646c869993eSxy150489 * the ether type. 647c869993eSxy150489 */ 648d11274aaSPaul Guo size = len = MBLKL(mp); 649c869993eSxy150489 offset = offsetof(struct ether_header, ether_type); 650c869993eSxy150489 while (size <= offset) { 651c869993eSxy150489 mp = mp->b_cont; 652c869993eSxy150489 ASSERT(mp != NULL); 653d11274aaSPaul Guo len = MBLKL(mp); 654c869993eSxy150489 size += len; 655c869993eSxy150489 } 656c869993eSxy150489 pos = mp->b_rptr + offset + len - size; 657c869993eSxy150489 658c869993eSxy150489 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 659c869993eSxy150489 if (etype == ETHERTYPE_VLAN) { 660c869993eSxy150489 /* 661c869993eSxy150489 * Get the position of the ether_type in VLAN header 662c869993eSxy150489 */ 663c869993eSxy150489 offset = offsetof(struct ether_vlan_header, ether_type); 664c869993eSxy150489 while (size <= offset) { 665c869993eSxy150489 mp = mp->b_cont; 666c869993eSxy150489 ASSERT(mp != NULL); 667d11274aaSPaul Guo len = MBLKL(mp); 668c869993eSxy150489 size += len; 669c869993eSxy150489 } 670c869993eSxy150489 pos = mp->b_rptr + offset + len - size; 671c869993eSxy150489 672c869993eSxy150489 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 673c869993eSxy150489 mac_hdr_len = sizeof (struct ether_vlan_header); 674c869993eSxy150489 } else { 675c869993eSxy150489 mac_hdr_len = sizeof (struct ether_header); 676c869993eSxy150489 } 677c869993eSxy150489 678c869993eSxy150489 /* 679d11274aaSPaul Guo * Here we assume the IP(V6) header is fully included in one 680d11274aaSPaul Guo * mblk fragment. 681c869993eSxy150489 */ 682c869993eSxy150489 switch (etype) { 683c869993eSxy150489 case ETHERTYPE_IP: 684d11274aaSPaul Guo offset = mac_hdr_len; 685c869993eSxy150489 while (size <= offset) { 686c869993eSxy150489 mp = mp->b_cont; 687c869993eSxy150489 ASSERT(mp != NULL); 688d11274aaSPaul Guo len = MBLKL(mp); 689c869993eSxy150489 size += len; 690c869993eSxy150489 } 691c869993eSxy150489 pos = mp->b_rptr + offset + len - size; 692c869993eSxy150489 693d11274aaSPaul Guo if (ctx->lso_flag) { 694d11274aaSPaul Guo *((uint16_t *)(uintptr_t)(pos + offsetof(ipha_t, 695d11274aaSPaul Guo ipha_length))) = 0; 696d11274aaSPaul Guo 697d11274aaSPaul Guo /* 698d11274aaSPaul Guo * To utilize igb LSO, here need to fill 699d11274aaSPaul Guo * the tcp checksum field of the packet with the 700d11274aaSPaul Guo * following pseudo-header checksum: 701d11274aaSPaul Guo * (ip_source_addr, ip_destination_addr, l4_proto) 702d11274aaSPaul Guo * and also need to fill the ip header checksum 703d11274aaSPaul Guo * with zero. Currently the tcp/ip stack has done 704d11274aaSPaul Guo * these. 705d11274aaSPaul Guo */ 706d11274aaSPaul Guo } 707d11274aaSPaul Guo 708d11274aaSPaul Guo l4_proto = *(uint8_t *)(pos + offsetof(ipha_t, ipha_protocol)); 709c869993eSxy150489 break; 710c869993eSxy150489 case ETHERTYPE_IPV6: 711c869993eSxy150489 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 712c869993eSxy150489 while (size <= offset) { 713c869993eSxy150489 mp = mp->b_cont; 714c869993eSxy150489 ASSERT(mp != NULL); 715d11274aaSPaul Guo len = MBLKL(mp); 716c869993eSxy150489 size += len; 717c869993eSxy150489 } 718c869993eSxy150489 pos = mp->b_rptr + offset + len - size; 719c869993eSxy150489 720c869993eSxy150489 l4_proto = *(uint8_t *)pos; 721c869993eSxy150489 break; 722c869993eSxy150489 default: 723c869993eSxy150489 /* Unrecoverable error */ 724*e5513923SYuri Pankov igb_log(NULL, IGB_LOG_INFO, "Ethernet type field error with " 725d11274aaSPaul Guo "tx hcksum flag set"); 726d11274aaSPaul Guo return (TX_CXT_E_ETHER_TYPE); 727c869993eSxy150489 } 728c869993eSxy150489 729d11274aaSPaul Guo if (ctx->lso_flag) { 730d11274aaSPaul Guo offset = mac_hdr_len + start; 731d11274aaSPaul Guo while (size <= offset) { 732d11274aaSPaul Guo mp = mp->b_cont; 733d11274aaSPaul Guo ASSERT(mp != NULL); 734d11274aaSPaul Guo len = MBLKL(mp); 735d11274aaSPaul Guo size += len; 736d11274aaSPaul Guo } 737d11274aaSPaul Guo pos = mp->b_rptr + offset + len - size; 738d11274aaSPaul Guo 739d11274aaSPaul Guo l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 740d11274aaSPaul Guo } else { 741d11274aaSPaul Guo /* 742d11274aaSPaul Guo * l4 header length is only required for LSO 743d11274aaSPaul Guo */ 744d11274aaSPaul Guo l4_hdr_len = 0; 745d11274aaSPaul Guo } 746d11274aaSPaul Guo 747d11274aaSPaul Guo ctx->mac_hdr_len = mac_hdr_len; 748d11274aaSPaul Guo ctx->ip_hdr_len = start; 749d11274aaSPaul Guo ctx->l4_proto = l4_proto; 750d11274aaSPaul Guo ctx->l4_hdr_len = l4_hdr_len; 751d11274aaSPaul Guo 752d11274aaSPaul Guo return (TX_CXT_SUCCESS); 753c869993eSxy150489 } 754c869993eSxy150489 755c869993eSxy150489 /* 756d11274aaSPaul Guo * igb_check_tx_context 757c869993eSxy150489 * 758c869993eSxy150489 * Check if a new context descriptor is needed 759c869993eSxy150489 */ 760c869993eSxy150489 static boolean_t 761d11274aaSPaul Guo igb_check_tx_context(igb_tx_ring_t *tx_ring, tx_context_t *ctx) 762c869993eSxy150489 { 763d11274aaSPaul Guo tx_context_t *last; 764c869993eSxy150489 765d11274aaSPaul Guo if (ctx == NULL) 766c869993eSxy150489 return (B_FALSE); 767c869993eSxy150489 768c869993eSxy150489 /* 769d11274aaSPaul Guo * Compare the context data retrieved from the mblk and the 770d11274aaSPaul Guo * stored context data of the last context descriptor. The data 771c869993eSxy150489 * need to be checked are: 772c869993eSxy150489 * hcksum_flags 773c869993eSxy150489 * l4_proto 774d11274aaSPaul Guo * mss (only check for LSO) 775d11274aaSPaul Guo * l4_hdr_len (only check for LSO) 776c869993eSxy150489 * ip_hdr_len 777d11274aaSPaul Guo * mac_hdr_len 778c869993eSxy150489 * Either one of the above data is changed, a new context descriptor 779c869993eSxy150489 * will be needed. 780c869993eSxy150489 */ 781d11274aaSPaul Guo last = &tx_ring->tx_context; 782c869993eSxy150489 783d11274aaSPaul Guo if (ctx->hcksum_flags != 0) { 784d11274aaSPaul Guo if ((ctx->hcksum_flags != last->hcksum_flags) || 785d11274aaSPaul Guo (ctx->l4_proto != last->l4_proto) || 786d11274aaSPaul Guo (ctx->lso_flag && ((ctx->mss != last->mss) || 787d11274aaSPaul Guo (ctx->l4_hdr_len != last->l4_hdr_len))) || 788d11274aaSPaul Guo (ctx->ip_hdr_len != last->ip_hdr_len) || 789d11274aaSPaul Guo (ctx->mac_hdr_len != last->mac_hdr_len)) { 790c869993eSxy150489 return (B_TRUE); 791c869993eSxy150489 } 792c869993eSxy150489 } 793c869993eSxy150489 794c869993eSxy150489 return (B_FALSE); 795c869993eSxy150489 } 796c869993eSxy150489 797c869993eSxy150489 /* 798d11274aaSPaul Guo * igb_fill_tx_context 799c869993eSxy150489 * 800c869993eSxy150489 * Fill the context descriptor with hardware checksum informations 801c869993eSxy150489 */ 802c869993eSxy150489 static void 803d11274aaSPaul Guo igb_fill_tx_context(struct e1000_adv_tx_context_desc *ctx_tbd, 804d11274aaSPaul Guo tx_context_t *ctx, uint32_t ring_index) 805c869993eSxy150489 { 806c869993eSxy150489 /* 807c869993eSxy150489 * Fill the context descriptor with the checksum 808c869993eSxy150489 * context information we've got 809c869993eSxy150489 */ 810d11274aaSPaul Guo ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 811d11274aaSPaul Guo ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 812c869993eSxy150489 E1000_ADVTXD_MACLEN_SHIFT; 813c869993eSxy150489 814c869993eSxy150489 ctx_tbd->type_tucmd_mlhl = 815c869993eSxy150489 E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 816c869993eSxy150489 817d11274aaSPaul Guo if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 818c869993eSxy150489 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 819c869993eSxy150489 820d11274aaSPaul Guo if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 821d11274aaSPaul Guo switch (ctx->l4_proto) { 822c869993eSxy150489 case IPPROTO_TCP: 823c869993eSxy150489 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 824c869993eSxy150489 break; 825c869993eSxy150489 case IPPROTO_UDP: 826c869993eSxy150489 /* 827c869993eSxy150489 * We don't have to explicitly set: 828c869993eSxy150489 * ctx_tbd->type_tucmd_mlhl |= 829c869993eSxy150489 * E1000_ADVTXD_TUCMD_L4T_UDP; 830c869993eSxy150489 * Because E1000_ADVTXD_TUCMD_L4T_UDP == 0b 831c869993eSxy150489 */ 832c869993eSxy150489 break; 833c869993eSxy150489 default: 834c869993eSxy150489 /* Unrecoverable error */ 835*e5513923SYuri Pankov igb_log(NULL, IGB_LOG_INFO, 836*e5513923SYuri Pankov "L4 type error with tx hcksum"); 837c869993eSxy150489 break; 838c869993eSxy150489 } 839c869993eSxy150489 } 840c869993eSxy150489 841c869993eSxy150489 ctx_tbd->seqnum_seed = 0; 842da14cebeSEric Cheng ctx_tbd->mss_l4len_idx = ring_index << 4; 843d11274aaSPaul Guo if (ctx->lso_flag) { 844d11274aaSPaul Guo ctx_tbd->mss_l4len_idx |= 845d11274aaSPaul Guo (ctx->l4_hdr_len << E1000_ADVTXD_L4LEN_SHIFT) | 846d11274aaSPaul Guo (ctx->mss << E1000_ADVTXD_MSS_SHIFT); 847d11274aaSPaul Guo } 848c869993eSxy150489 } 849c869993eSxy150489 850c869993eSxy150489 /* 851c869993eSxy150489 * igb_tx_fill_ring 852c869993eSxy150489 * 853c869993eSxy150489 * Fill the tx descriptor ring with the data 854c869993eSxy150489 */ 855c869993eSxy150489 static int 856c869993eSxy150489 igb_tx_fill_ring(igb_tx_ring_t *tx_ring, link_list_t *pending_list, 857d11274aaSPaul Guo tx_context_t *ctx, size_t mbsize) 858c869993eSxy150489 { 859c869993eSxy150489 struct e1000_hw *hw = &tx_ring->igb->hw; 860c869993eSxy150489 boolean_t load_context; 861c869993eSxy150489 uint32_t index, tcb_index, desc_num; 862c869993eSxy150489 union e1000_adv_tx_desc *tbd, *first_tbd; 863c869993eSxy150489 tx_control_block_t *tcb, *first_tcb; 864c869993eSxy150489 uint32_t hcksum_flags; 865c869993eSxy150489 int i; 8668bb4b220Sgl147354 igb_t *igb = tx_ring->igb; 867c869993eSxy150489 868c869993eSxy150489 ASSERT(mutex_owned(&tx_ring->tx_lock)); 869c869993eSxy150489 870c869993eSxy150489 tbd = NULL; 871c869993eSxy150489 first_tbd = NULL; 872c869993eSxy150489 first_tcb = NULL; 873c869993eSxy150489 desc_num = 0; 874c869993eSxy150489 hcksum_flags = 0; 875c869993eSxy150489 load_context = B_FALSE; 876c869993eSxy150489 877c869993eSxy150489 /* 878c869993eSxy150489 * Get the index of the first tx descriptor that will be filled, 879c869993eSxy150489 * and the index of the first work list item that will be attached 880c869993eSxy150489 * with the first used tx control block in the pending list. 881c869993eSxy150489 * Note: the two indexes are the same. 882c869993eSxy150489 */ 883c869993eSxy150489 index = tx_ring->tbd_tail; 884c869993eSxy150489 tcb_index = tx_ring->tbd_tail; 885c869993eSxy150489 886d11274aaSPaul Guo if (ctx != NULL) { 887d11274aaSPaul Guo hcksum_flags = ctx->hcksum_flags; 888c869993eSxy150489 889c869993eSxy150489 /* 890c869993eSxy150489 * Check if a new context descriptor is needed for this packet 891c869993eSxy150489 */ 892d11274aaSPaul Guo load_context = igb_check_tx_context(tx_ring, ctx); 893c869993eSxy150489 if (load_context) { 894c869993eSxy150489 tbd = &tx_ring->tbd_ring[index]; 895c869993eSxy150489 896c869993eSxy150489 /* 897c869993eSxy150489 * Fill the context descriptor with the 898c869993eSxy150489 * hardware checksum offload informations. 899c869993eSxy150489 */ 900d11274aaSPaul Guo igb_fill_tx_context( 901d11274aaSPaul Guo (struct e1000_adv_tx_context_desc *)tbd, 902d11274aaSPaul Guo ctx, tx_ring->index); 903c869993eSxy150489 904c869993eSxy150489 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 905c869993eSxy150489 desc_num++; 906c869993eSxy150489 907c869993eSxy150489 /* 908c869993eSxy150489 * Store the checksum context data if 909c869993eSxy150489 * a new context descriptor is added 910c869993eSxy150489 */ 911d11274aaSPaul Guo tx_ring->tx_context = *ctx; 912c869993eSxy150489 } 913c869993eSxy150489 } 914c869993eSxy150489 915c869993eSxy150489 first_tbd = &tx_ring->tbd_ring[index]; 916c869993eSxy150489 917c869993eSxy150489 /* 918c869993eSxy150489 * Fill tx data descriptors with the data saved in the pending list. 919c869993eSxy150489 * The tx control blocks in the pending list are added to the work list 920c869993eSxy150489 * at the same time. 921c869993eSxy150489 * 922c869993eSxy150489 * The work list is strictly 1:1 corresponding to the descriptor ring. 923c869993eSxy150489 * One item of the work list corresponds to one tx descriptor. Because 924c869993eSxy150489 * one tx control block can span multiple tx descriptors, the tx 925c869993eSxy150489 * control block will be added to the first work list item that 926c869993eSxy150489 * corresponds to the first tx descriptor generated from that tx 927c869993eSxy150489 * control block. 928c869993eSxy150489 */ 929c869993eSxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 93069b2d733SGuoqing Zhu first_tcb = tcb; 931c869993eSxy150489 while (tcb != NULL) { 932c869993eSxy150489 933c869993eSxy150489 for (i = 0; i < tcb->desc_num; i++) { 934c869993eSxy150489 tbd = &tx_ring->tbd_ring[index]; 935c869993eSxy150489 936c869993eSxy150489 tbd->read.buffer_addr = tcb->desc[i].address; 937c869993eSxy150489 tbd->read.cmd_type_len = tcb->desc[i].length; 938c869993eSxy150489 939c869993eSxy150489 tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_RS | 94080a11ad2Schenlu chen - Sun Microsystems - Beijing China E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_DATA | 94180a11ad2Schenlu chen - Sun Microsystems - Beijing China E1000_ADVTXD_DCMD_IFCS; 942c869993eSxy150489 943c869993eSxy150489 tbd->read.olinfo_status = 0; 944c869993eSxy150489 945c869993eSxy150489 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 946c869993eSxy150489 desc_num++; 947c869993eSxy150489 } 948c869993eSxy150489 949c869993eSxy150489 /* 950c869993eSxy150489 * Add the tx control block to the work list 951c869993eSxy150489 */ 952c869993eSxy150489 ASSERT(tx_ring->work_list[tcb_index] == NULL); 953c869993eSxy150489 tx_ring->work_list[tcb_index] = tcb; 954c869993eSxy150489 955c869993eSxy150489 tcb_index = index; 956c869993eSxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 957c869993eSxy150489 } 958c869993eSxy150489 95969b2d733SGuoqing Zhu if (load_context) { 96069b2d733SGuoqing Zhu /* 96169b2d733SGuoqing Zhu * Count the checksum context descriptor for 96269b2d733SGuoqing Zhu * the first tx control block. 96369b2d733SGuoqing Zhu */ 96469b2d733SGuoqing Zhu first_tcb->desc_num++; 96569b2d733SGuoqing Zhu } 96669b2d733SGuoqing Zhu first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 96769b2d733SGuoqing Zhu 968c869993eSxy150489 /* 969c869993eSxy150489 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 970c869993eSxy150489 * valid in the first descriptor of the packet. 971d11274aaSPaul Guo * 82576 also requires the payload length setting even without LSO 972c869993eSxy150489 */ 973c869993eSxy150489 ASSERT(first_tbd != NULL); 974c869993eSxy150489 first_tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_IFCS; 975d11274aaSPaul Guo if (ctx != NULL && ctx->lso_flag) { 976d11274aaSPaul Guo first_tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 977d11274aaSPaul Guo first_tbd->read.olinfo_status |= 978d11274aaSPaul Guo (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 979d11274aaSPaul Guo - ctx->l4_hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT; 980d11274aaSPaul Guo } else { 9813f7e60a6Szhefeng xu - Sun Microsystems - Beijing China if (hw->mac.type >= e1000_82576) { 982d11274aaSPaul Guo first_tbd->read.olinfo_status |= 983d11274aaSPaul Guo (mbsize << E1000_ADVTXD_PAYLEN_SHIFT); 984d11274aaSPaul Guo } 98580a11ad2Schenlu chen - Sun Microsystems - Beijing China } 986c869993eSxy150489 987c869993eSxy150489 /* Set hardware checksum bits */ 988c869993eSxy150489 if (hcksum_flags != 0) { 989c869993eSxy150489 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 990c869993eSxy150489 first_tbd->read.olinfo_status |= 991c869993eSxy150489 E1000_TXD_POPTS_IXSM << 8; 992c869993eSxy150489 if (hcksum_flags & HCK_PARTIALCKSUM) 993c869993eSxy150489 first_tbd->read.olinfo_status |= 994c869993eSxy150489 E1000_TXD_POPTS_TXSM << 8; 995da14cebeSEric Cheng first_tbd->read.olinfo_status |= tx_ring->index << 4; 996c869993eSxy150489 } 997c869993eSxy150489 998c869993eSxy150489 /* 999c869993eSxy150489 * The last descriptor of packet needs End Of Packet (EOP), 1000c869993eSxy150489 * and Report Status (RS) bits set 1001c869993eSxy150489 */ 1002c869993eSxy150489 ASSERT(tbd != NULL); 1003c869993eSxy150489 tbd->read.cmd_type_len |= 1004c869993eSxy150489 E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS; 1005c869993eSxy150489 1006da14cebeSEric Cheng IGB_DEBUG_STAT(tx_ring->stat_pkt_cnt); 1007da14cebeSEric Cheng 1008c869993eSxy150489 /* 1009c869993eSxy150489 * Sync the DMA buffer of the tx descriptor ring 1010c869993eSxy150489 */ 1011c869993eSxy150489 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1012c869993eSxy150489 1013c869993eSxy150489 /* 1014c869993eSxy150489 * Update the number of the free tx descriptors. 1015c869993eSxy150489 * The mutual exclusion between the transmission and the recycling 1016c869993eSxy150489 * (for the tx descriptor ring and the work list) is implemented 1017c869993eSxy150489 * with the atomic operation on the number of the free tx descriptors. 1018c869993eSxy150489 * 1019c869993eSxy150489 * Note: we should always decrement the counter tbd_free before 1020c869993eSxy150489 * advancing the hardware TDT pointer to avoid the race condition - 1021c869993eSxy150489 * before the counter tbd_free is decremented, the transmit of the 1022c869993eSxy150489 * tx descriptors has done and the counter tbd_free is increased by 1023c869993eSxy150489 * the tx recycling. 1024c869993eSxy150489 */ 1025c869993eSxy150489 i = igb_atomic_reserve(&tx_ring->tbd_free, desc_num); 1026c869993eSxy150489 ASSERT(i >= 0); 1027c869993eSxy150489 1028c869993eSxy150489 tx_ring->tbd_tail = index; 1029c869993eSxy150489 1030c869993eSxy150489 /* 1031c869993eSxy150489 * Advance the hardware TDT pointer of the tx descriptor ring 1032c869993eSxy150489 */ 1033c869993eSxy150489 E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), index); 1034c869993eSxy150489 10358bb4b220Sgl147354 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) { 10368bb4b220Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 1037cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China atomic_or_32(&igb->igb_state, IGB_ERROR); 10388bb4b220Sgl147354 } 10398bb4b220Sgl147354 1040c869993eSxy150489 return (desc_num); 1041c869993eSxy150489 } 1042c869993eSxy150489 1043c869993eSxy150489 /* 1044c869993eSxy150489 * igb_save_desc 1045c869993eSxy150489 * 1046c869993eSxy150489 * Save the address/length pair to the private array 1047c869993eSxy150489 * of the tx control block. The address/length pairs 1048c869993eSxy150489 * will be filled into the tx descriptor ring later. 1049c869993eSxy150489 */ 1050c869993eSxy150489 static void 1051c869993eSxy150489 igb_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1052c869993eSxy150489 { 1053c869993eSxy150489 sw_desc_t *desc; 1054c869993eSxy150489 1055c869993eSxy150489 desc = &tcb->desc[tcb->desc_num]; 1056c869993eSxy150489 desc->address = address; 1057c869993eSxy150489 desc->length = length; 1058c869993eSxy150489 1059c869993eSxy150489 tcb->desc_num++; 1060c869993eSxy150489 } 1061c869993eSxy150489 1062c869993eSxy150489 /* 1063c869993eSxy150489 * igb_tx_recycle_legacy 1064c869993eSxy150489 * 1065c869993eSxy150489 * Recycle the tx descriptors and tx control blocks. 1066c869993eSxy150489 * 1067c869993eSxy150489 * The work list is traversed to check if the corresponding 1068c869993eSxy150489 * tx descriptors have been transmitted. If so, the resources 1069c869993eSxy150489 * bound to the tx control blocks will be freed, and those 1070c869993eSxy150489 * tx control blocks will be returned to the free list. 1071c869993eSxy150489 */ 1072c869993eSxy150489 uint32_t 1073c869993eSxy150489 igb_tx_recycle_legacy(igb_tx_ring_t *tx_ring) 1074c869993eSxy150489 { 107569b2d733SGuoqing Zhu uint32_t index, last_index, next_index; 1076c869993eSxy150489 int desc_num; 1077c869993eSxy150489 boolean_t desc_done; 1078c869993eSxy150489 tx_control_block_t *tcb; 1079c869993eSxy150489 link_list_t pending_list; 10808bb4b220Sgl147354 igb_t *igb = tx_ring->igb; 1081c869993eSxy150489 1082c869993eSxy150489 /* 1083c869993eSxy150489 * The mutex_tryenter() is used to avoid unnecessary 1084c869993eSxy150489 * lock contention. 1085c869993eSxy150489 */ 1086c869993eSxy150489 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 1087c869993eSxy150489 return (0); 1088c869993eSxy150489 1089c869993eSxy150489 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1090c869993eSxy150489 1091c869993eSxy150489 if (tx_ring->tbd_free == tx_ring->ring_size) { 1092c869993eSxy150489 tx_ring->recycle_fail = 0; 1093c869993eSxy150489 tx_ring->stall_watchdog = 0; 1094c869993eSxy150489 mutex_exit(&tx_ring->recycle_lock); 1095c869993eSxy150489 return (0); 1096c869993eSxy150489 } 1097c869993eSxy150489 1098c869993eSxy150489 /* 1099c869993eSxy150489 * Sync the DMA buffer of the tx descriptor ring 1100c869993eSxy150489 */ 1101c869993eSxy150489 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1102c869993eSxy150489 11038bb4b220Sgl147354 if (igb_check_dma_handle( 11048bb4b220Sgl147354 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1105b227c420Schenlu chen - Sun Microsystems - Beijing China mutex_exit(&tx_ring->recycle_lock); 11068bb4b220Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 1107cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China atomic_or_32(&igb->igb_state, IGB_ERROR); 1108cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China return (0); 11098bb4b220Sgl147354 } 11108bb4b220Sgl147354 1111c869993eSxy150489 LINK_LIST_INIT(&pending_list); 1112c869993eSxy150489 desc_num = 0; 1113c869993eSxy150489 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1114c869993eSxy150489 1115c869993eSxy150489 tcb = tx_ring->work_list[index]; 1116c869993eSxy150489 ASSERT(tcb != NULL); 1117c869993eSxy150489 111869b2d733SGuoqing Zhu while (tcb != NULL) { 1119c869993eSxy150489 1120c869993eSxy150489 /* 112169b2d733SGuoqing Zhu * Get the last tx descriptor of this packet. 112269b2d733SGuoqing Zhu * If the last tx descriptor is done, then 112369b2d733SGuoqing Zhu * we can recycle all descriptors of a packet 112469b2d733SGuoqing Zhu * which usually includes several tx control blocks. 112569b2d733SGuoqing Zhu * For some chips, LSO descriptors can not be recycled 112669b2d733SGuoqing Zhu * unless the whole packet's transmission is done. 112769b2d733SGuoqing Zhu * That's why packet level recycling is used here. 1128c869993eSxy150489 */ 112969b2d733SGuoqing Zhu last_index = tcb->last_index; 113069b2d733SGuoqing Zhu /* 113169b2d733SGuoqing Zhu * MAX_TX_RING_SIZE is used to judge whether 113269b2d733SGuoqing Zhu * the index is a valid value or not. 113369b2d733SGuoqing Zhu */ 113469b2d733SGuoqing Zhu if (last_index == MAX_TX_RING_SIZE) 113569b2d733SGuoqing Zhu break; 113669b2d733SGuoqing Zhu 113769b2d733SGuoqing Zhu next_index = NEXT_INDEX(last_index, 1, tx_ring->ring_size); 1138c869993eSxy150489 1139c869993eSxy150489 /* 1140c869993eSxy150489 * Check if the Descriptor Done bit is set 1141c869993eSxy150489 */ 1142c869993eSxy150489 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1143c869993eSxy150489 E1000_TXD_STAT_DD; 1144c869993eSxy150489 if (desc_done) { 114569b2d733SGuoqing Zhu while (tcb != NULL) { 1146c869993eSxy150489 /* 114769b2d733SGuoqing Zhu * Strip off the tx control block from the work 114869b2d733SGuoqing Zhu * list, and add it to the pending list. 1149c869993eSxy150489 */ 1150c869993eSxy150489 tx_ring->work_list[index] = NULL; 1151c869993eSxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1152c869993eSxy150489 1153c869993eSxy150489 /* 115469b2d733SGuoqing Zhu * Count the total number of the tx descriptors 115569b2d733SGuoqing Zhu * recycled. 1156c869993eSxy150489 */ 1157c869993eSxy150489 desc_num += tcb->desc_num; 1158c869993eSxy150489 1159c869993eSxy150489 /* 1160c869993eSxy150489 * Advance the index of the tx descriptor ring 1161c869993eSxy150489 */ 116269b2d733SGuoqing Zhu index = NEXT_INDEX(index, tcb->desc_num, 116369b2d733SGuoqing Zhu tx_ring->ring_size); 1164c869993eSxy150489 1165c869993eSxy150489 tcb = tx_ring->work_list[index]; 116669b2d733SGuoqing Zhu if (index == next_index) 116769b2d733SGuoqing Zhu break; 116869b2d733SGuoqing Zhu } 116969b2d733SGuoqing Zhu } else { 117069b2d733SGuoqing Zhu break; 1171c869993eSxy150489 } 1172c869993eSxy150489 } 1173c869993eSxy150489 1174c869993eSxy150489 /* 1175c869993eSxy150489 * If no tx descriptors are recycled, no need to do more processing 1176c869993eSxy150489 */ 1177c869993eSxy150489 if (desc_num == 0) { 1178c869993eSxy150489 tx_ring->recycle_fail++; 1179c869993eSxy150489 mutex_exit(&tx_ring->recycle_lock); 1180c869993eSxy150489 return (0); 1181c869993eSxy150489 } 1182c869993eSxy150489 1183c869993eSxy150489 tx_ring->recycle_fail = 0; 1184c869993eSxy150489 tx_ring->stall_watchdog = 0; 1185c869993eSxy150489 1186c869993eSxy150489 /* 1187c869993eSxy150489 * Update the head index of the tx descriptor ring 1188c869993eSxy150489 */ 1189c869993eSxy150489 tx_ring->tbd_head = index; 1190c869993eSxy150489 1191c869993eSxy150489 /* 1192c869993eSxy150489 * Update the number of the free tx descriptors with atomic operations 1193c869993eSxy150489 */ 1194c869993eSxy150489 atomic_add_32(&tx_ring->tbd_free, desc_num); 1195c869993eSxy150489 1196c869993eSxy150489 mutex_exit(&tx_ring->recycle_lock); 1197c869993eSxy150489 1198c869993eSxy150489 /* 1199c869993eSxy150489 * Free the resources used by the tx control blocks 1200c869993eSxy150489 * in the pending list 1201c869993eSxy150489 */ 1202c869993eSxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1203c869993eSxy150489 while (tcb != NULL) { 1204c869993eSxy150489 /* 1205c869993eSxy150489 * Release the resources occupied by the tx control block 1206c869993eSxy150489 */ 1207c869993eSxy150489 igb_free_tcb(tcb); 1208c869993eSxy150489 1209c869993eSxy150489 tcb = (tx_control_block_t *) 1210c869993eSxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 1211c869993eSxy150489 } 1212c869993eSxy150489 1213c869993eSxy150489 /* 1214c869993eSxy150489 * Add the tx control blocks in the pending list to the free list. 1215c869993eSxy150489 */ 1216c869993eSxy150489 igb_put_free_list(tx_ring, &pending_list); 1217c869993eSxy150489 1218c869993eSxy150489 return (desc_num); 1219c869993eSxy150489 } 1220c869993eSxy150489 1221c869993eSxy150489 /* 1222c869993eSxy150489 * igb_tx_recycle_head_wb 1223c869993eSxy150489 * 1224c869993eSxy150489 * Check the head write-back, and recycle all the transmitted 1225c869993eSxy150489 * tx descriptors and tx control blocks. 1226c869993eSxy150489 */ 1227c869993eSxy150489 uint32_t 1228c869993eSxy150489 igb_tx_recycle_head_wb(igb_tx_ring_t *tx_ring) 1229c869993eSxy150489 { 1230c869993eSxy150489 uint32_t index; 1231c869993eSxy150489 uint32_t head_wb; 1232c869993eSxy150489 int desc_num; 1233c869993eSxy150489 tx_control_block_t *tcb; 1234c869993eSxy150489 link_list_t pending_list; 12358bb4b220Sgl147354 igb_t *igb = tx_ring->igb; 1236c869993eSxy150489 1237c869993eSxy150489 /* 1238c869993eSxy150489 * The mutex_tryenter() is used to avoid unnecessary 1239c869993eSxy150489 * lock contention. 1240c869993eSxy150489 */ 1241c869993eSxy150489 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 1242c869993eSxy150489 return (0); 1243c869993eSxy150489 1244c869993eSxy150489 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1245c869993eSxy150489 1246c869993eSxy150489 if (tx_ring->tbd_free == tx_ring->ring_size) { 1247c869993eSxy150489 tx_ring->recycle_fail = 0; 1248c869993eSxy150489 tx_ring->stall_watchdog = 0; 1249c869993eSxy150489 mutex_exit(&tx_ring->recycle_lock); 1250c869993eSxy150489 return (0); 1251c869993eSxy150489 } 1252c869993eSxy150489 1253c869993eSxy150489 /* 1254c869993eSxy150489 * Sync the DMA buffer of the tx descriptor ring 1255c869993eSxy150489 * 1256c869993eSxy150489 * Note: For head write-back mode, the tx descriptors will not 1257c869993eSxy150489 * be written back, but the head write-back value is stored at 1258c869993eSxy150489 * the last extra tbd at the end of the DMA area, we still need 1259c869993eSxy150489 * to sync the head write-back value for kernel. 1260c869993eSxy150489 * 1261c869993eSxy150489 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1262c869993eSxy150489 */ 1263c869993eSxy150489 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1264c869993eSxy150489 sizeof (union e1000_adv_tx_desc) * tx_ring->ring_size, 1265c869993eSxy150489 sizeof (uint32_t), 1266c869993eSxy150489 DDI_DMA_SYNC_FORKERNEL); 1267c869993eSxy150489 12688bb4b220Sgl147354 if (igb_check_dma_handle( 12698bb4b220Sgl147354 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1270b227c420Schenlu chen - Sun Microsystems - Beijing China mutex_exit(&tx_ring->recycle_lock); 12718bb4b220Sgl147354 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 1272cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China atomic_or_32(&igb->igb_state, IGB_ERROR); 1273cf8dcc9bSzhefeng xu - Sun Microsystems - Beijing China return (0); 12748bb4b220Sgl147354 } 12758bb4b220Sgl147354 1276c869993eSxy150489 LINK_LIST_INIT(&pending_list); 1277c869993eSxy150489 desc_num = 0; 1278c869993eSxy150489 index = tx_ring->tbd_head; /* Next index to clean */ 1279c869993eSxy150489 1280c869993eSxy150489 /* 1281c869993eSxy150489 * Get the value of head write-back 1282c869993eSxy150489 */ 1283c869993eSxy150489 head_wb = *tx_ring->tbd_head_wb; 1284c869993eSxy150489 while (index != head_wb) { 1285c869993eSxy150489 tcb = tx_ring->work_list[index]; 1286c869993eSxy150489 ASSERT(tcb != NULL); 1287c869993eSxy150489 1288c869993eSxy150489 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1289c869993eSxy150489 tcb->desc_num) { 1290c869993eSxy150489 /* 1291c869993eSxy150489 * The current tx control block is not 1292c869993eSxy150489 * completely transmitted, stop recycling 1293c869993eSxy150489 */ 1294c869993eSxy150489 break; 1295c869993eSxy150489 } 1296c869993eSxy150489 1297c869993eSxy150489 /* 1298c869993eSxy150489 * Strip off the tx control block from the work list, 1299c869993eSxy150489 * and add it to the pending list. 1300c869993eSxy150489 */ 1301c869993eSxy150489 tx_ring->work_list[index] = NULL; 1302c869993eSxy150489 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1303c869993eSxy150489 1304c869993eSxy150489 /* 1305c869993eSxy150489 * Advance the index of the tx descriptor ring 1306c869993eSxy150489 */ 1307c869993eSxy150489 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1308c869993eSxy150489 1309c869993eSxy150489 /* 1310c869993eSxy150489 * Count the total number of the tx descriptors recycled 1311c869993eSxy150489 */ 1312c869993eSxy150489 desc_num += tcb->desc_num; 1313c869993eSxy150489 } 1314c869993eSxy150489 1315c869993eSxy150489 /* 1316c869993eSxy150489 * If no tx descriptors are recycled, no need to do more processing 1317c869993eSxy150489 */ 1318c869993eSxy150489 if (desc_num == 0) { 1319c869993eSxy150489 tx_ring->recycle_fail++; 1320c869993eSxy150489 mutex_exit(&tx_ring->recycle_lock); 1321c869993eSxy150489 return (0); 1322c869993eSxy150489 } 1323c869993eSxy150489 1324c869993eSxy150489 tx_ring->recycle_fail = 0; 1325c869993eSxy150489 tx_ring->stall_watchdog = 0; 1326c869993eSxy150489 1327c869993eSxy150489 /* 1328c869993eSxy150489 * Update the head index of the tx descriptor ring 1329c869993eSxy150489 */ 1330c869993eSxy150489 tx_ring->tbd_head = index; 1331c869993eSxy150489 1332c869993eSxy150489 /* 1333c869993eSxy150489 * Update the number of the free tx descriptors with atomic operations 1334c869993eSxy150489 */ 1335c869993eSxy150489 atomic_add_32(&tx_ring->tbd_free, desc_num); 1336c869993eSxy150489 1337c869993eSxy150489 mutex_exit(&tx_ring->recycle_lock); 1338c869993eSxy150489 1339c869993eSxy150489 /* 1340c869993eSxy150489 * Free the resources used by the tx control blocks 1341c869993eSxy150489 * in the pending list 1342c869993eSxy150489 */ 1343c869993eSxy150489 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1344c869993eSxy150489 while (tcb) { 1345c869993eSxy150489 /* 1346c869993eSxy150489 * Release the resources occupied by the tx control block 1347c869993eSxy150489 */ 1348c869993eSxy150489 igb_free_tcb(tcb); 1349c869993eSxy150489 1350c869993eSxy150489 tcb = (tx_control_block_t *) 1351c869993eSxy150489 LIST_GET_NEXT(&pending_list, &tcb->link); 1352c869993eSxy150489 } 1353c869993eSxy150489 1354c869993eSxy150489 /* 1355c869993eSxy150489 * Add the tx control blocks in the pending list to the free list. 1356c869993eSxy150489 */ 1357c869993eSxy150489 igb_put_free_list(tx_ring, &pending_list); 1358c869993eSxy150489 1359c869993eSxy150489 return (desc_num); 1360c869993eSxy150489 } 1361c869993eSxy150489 1362c869993eSxy150489 /* 1363c869993eSxy150489 * igb_free_tcb - free up the tx control block 1364c869993eSxy150489 * 1365c869993eSxy150489 * Free the resources of the tx control block, including 1366c869993eSxy150489 * unbind the previously bound DMA handle, and reset other 1367c869993eSxy150489 * control fields. 1368c869993eSxy150489 */ 1369c869993eSxy150489 void 1370c869993eSxy150489 igb_free_tcb(tx_control_block_t *tcb) 1371c869993eSxy150489 { 1372c869993eSxy150489 switch (tcb->tx_type) { 1373c869993eSxy150489 case USE_COPY: 1374c869993eSxy150489 /* 1375c869993eSxy150489 * Reset the buffer length that is used for copy 1376c869993eSxy150489 */ 1377c869993eSxy150489 tcb->tx_buf.len = 0; 1378c869993eSxy150489 break; 1379c869993eSxy150489 case USE_DMA: 1380c869993eSxy150489 /* 1381c869993eSxy150489 * Release the DMA resource that is used for 1382c869993eSxy150489 * DMA binding. 1383c869993eSxy150489 */ 1384c869993eSxy150489 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1385c869993eSxy150489 break; 1386c869993eSxy150489 default: 1387c869993eSxy150489 break; 1388c869993eSxy150489 } 1389c869993eSxy150489 1390c869993eSxy150489 /* 1391c869993eSxy150489 * Free the mblk 1392c869993eSxy150489 */ 1393c869993eSxy150489 if (tcb->mp != NULL) { 1394c869993eSxy150489 freemsg(tcb->mp); 1395c869993eSxy150489 tcb->mp = NULL; 1396c869993eSxy150489 } 1397c869993eSxy150489 1398c869993eSxy150489 tcb->tx_type = USE_NONE; 139969b2d733SGuoqing Zhu tcb->last_index = MAX_TX_RING_SIZE; 1400c869993eSxy150489 tcb->frag_num = 0; 1401c869993eSxy150489 tcb->desc_num = 0; 1402c869993eSxy150489 } 1403c869993eSxy150489 1404c869993eSxy150489 /* 1405c869993eSxy150489 * igb_get_free_list - Get a free tx control block from the free list 1406c869993eSxy150489 * 1407c869993eSxy150489 * The atomic operation on the number of the available tx control block 1408c869993eSxy150489 * in the free list is used to keep this routine mutual exclusive with 1409c869993eSxy150489 * the routine igb_put_check_list. 1410c869993eSxy150489 */ 1411c869993eSxy150489 static tx_control_block_t * 1412c869993eSxy150489 igb_get_free_list(igb_tx_ring_t *tx_ring) 1413c869993eSxy150489 { 1414c869993eSxy150489 tx_control_block_t *tcb; 1415c869993eSxy150489 1416c869993eSxy150489 /* 1417c869993eSxy150489 * Check and update the number of the free tx control block 1418c869993eSxy150489 * in the free list. 1419c869993eSxy150489 */ 1420c869993eSxy150489 if (igb_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1421c869993eSxy150489 return (NULL); 1422c869993eSxy150489 1423c869993eSxy150489 mutex_enter(&tx_ring->tcb_head_lock); 1424c869993eSxy150489 1425c869993eSxy150489 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1426c869993eSxy150489 ASSERT(tcb != NULL); 1427c869993eSxy150489 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1428c869993eSxy150489 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1429c869993eSxy150489 tx_ring->free_list_size); 1430c869993eSxy150489 1431c869993eSxy150489 mutex_exit(&tx_ring->tcb_head_lock); 1432c869993eSxy150489 1433c869993eSxy150489 return (tcb); 1434c869993eSxy150489 } 1435c869993eSxy150489 1436c869993eSxy150489 /* 1437c869993eSxy150489 * igb_put_free_list 1438c869993eSxy150489 * 1439c869993eSxy150489 * Put a list of used tx control blocks back to the free list 1440c869993eSxy150489 * 1441c869993eSxy150489 * A mutex is used here to ensure the serialization. The mutual exclusion 1442c869993eSxy150489 * between igb_get_free_list and igb_put_free_list is implemented with 1443c869993eSxy150489 * the atomic operation on the counter tcb_free. 1444c869993eSxy150489 */ 1445c869993eSxy150489 void 1446c869993eSxy150489 igb_put_free_list(igb_tx_ring_t *tx_ring, link_list_t *pending_list) 1447c869993eSxy150489 { 1448c869993eSxy150489 uint32_t index; 1449c869993eSxy150489 int tcb_num; 1450c869993eSxy150489 tx_control_block_t *tcb; 1451c869993eSxy150489 1452c869993eSxy150489 mutex_enter(&tx_ring->tcb_tail_lock); 1453c869993eSxy150489 1454c869993eSxy150489 index = tx_ring->tcb_tail; 1455c869993eSxy150489 1456c869993eSxy150489 tcb_num = 0; 1457c869993eSxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1458c869993eSxy150489 while (tcb != NULL) { 1459c869993eSxy150489 ASSERT(tx_ring->free_list[index] == NULL); 1460c869993eSxy150489 tx_ring->free_list[index] = tcb; 1461c869993eSxy150489 1462c869993eSxy150489 tcb_num++; 1463c869993eSxy150489 1464c869993eSxy150489 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1465c869993eSxy150489 1466c869993eSxy150489 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1467c869993eSxy150489 } 1468c869993eSxy150489 1469c869993eSxy150489 tx_ring->tcb_tail = index; 1470c869993eSxy150489 1471c869993eSxy150489 /* 1472c869993eSxy150489 * Update the number of the free tx control block 1473c869993eSxy150489 * in the free list. This operation must be placed 1474c869993eSxy150489 * under the protection of the lock. 1475c869993eSxy150489 */ 1476c869993eSxy150489 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1477c869993eSxy150489 1478c869993eSxy150489 mutex_exit(&tx_ring->tcb_tail_lock); 1479c869993eSxy150489 } 1480