1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28 */ 29 30 #include "igb_sw.h" 31 32 static boolean_t igb_tx(igb_tx_ring_t *, mblk_t *); 33 static int igb_tx_copy(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 34 uint32_t, boolean_t); 35 static int igb_tx_bind(igb_tx_ring_t *, tx_control_block_t *, mblk_t *, 36 uint32_t); 37 static int igb_tx_fill_ring(igb_tx_ring_t *, link_list_t *, tx_context_t *, 38 size_t); 39 static void igb_save_desc(tx_control_block_t *, uint64_t, size_t); 40 static tx_control_block_t *igb_get_free_list(igb_tx_ring_t *); 41 static int igb_get_tx_context(mblk_t *, tx_context_t *); 42 static boolean_t igb_check_tx_context(igb_tx_ring_t *, tx_context_t *); 43 static void igb_fill_tx_context(struct e1000_adv_tx_context_desc *, 44 tx_context_t *, uint32_t); 45 46 mblk_t * 47 igb_tx_ring_send(void *arg, mblk_t *mp) 48 { 49 igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg; 50 igb_t *igb; 51 52 ASSERT(tx_ring != NULL); 53 54 igb = tx_ring->igb; 55 56 if ((igb->igb_state & IGB_SUSPENDED) || 57 (igb->igb_state & IGB_ERROR) || 58 !(igb->igb_state & IGB_STARTED) || 59 igb->link_state != LINK_STATE_UP) { 60 freemsg(mp); 61 return (NULL); 62 } 63 64 return ((igb_tx(tx_ring, mp)) ? NULL : mp); 65 } 66 67 /* 68 * igb_tx - Main transmit processing 69 * 70 * Called from igb_m_tx with an mblk ready to transmit. this 71 * routine sets up the transmit descriptors and sends data to 72 * the wire. 73 * 74 * One mblk can consist of several fragments, each fragment 75 * will be processed with different methods based on the size. 76 * For the fragments with size less than the bcopy threshold, 77 * they will be processed by using bcopy; otherwise, they will 78 * be processed by using DMA binding. 79 * 80 * To process the mblk, a tx control block is got from the 81 * free list. One tx control block contains one tx buffer, which 82 * is used to copy mblk fragments' data; and one tx DMA handle, 83 * which is used to bind a mblk fragment with DMA resource. 84 * 85 * Several small mblk fragments can be copied into one tx control 86 * block's buffer, and then the buffer will be transmitted with 87 * one tx descriptor. 88 * 89 * A large fragment only binds with one tx control block's DMA 90 * handle, and it can span several tx descriptors for transmitting. 91 * 92 * So to transmit a packet (mblk), several tx control blocks can 93 * be used. After the processing, those tx control blocks will 94 * be put to the work list. 95 */ 96 static boolean_t 97 igb_tx(igb_tx_ring_t *tx_ring, mblk_t *mp) 98 { 99 igb_t *igb = tx_ring->igb; 100 tx_type_t current_flag, next_flag; 101 uint32_t current_len, next_len; 102 uint32_t desc_total; 103 size_t mbsize; 104 int desc_num; 105 boolean_t copy_done, eop; 106 mblk_t *current_mp, *next_mp, *nmp; 107 tx_control_block_t *tcb; 108 tx_context_t tx_context, *ctx; 109 link_list_t pending_list; 110 mblk_t *hdr_new_mp = NULL; 111 mblk_t *hdr_previous_mp = NULL; 112 mblk_t *hdr_current_mp = NULL; 113 uint32_t hdr_frag_len; 114 uint32_t hdr_len, len; 115 uint32_t copy_thresh; 116 117 copy_thresh = igb->tx_copy_thresh; 118 119 /* Get the mblk size */ 120 mbsize = 0; 121 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 122 mbsize += MBLKL(nmp); 123 } 124 125 if (igb->tx_hcksum_enable) { 126 ctx = &tx_context; 127 /* 128 * Retrieve offloading context information from the mblk 129 * that will be used to decide whether/how to fill the 130 * context descriptor. 131 */ 132 if (igb_get_tx_context(mp, ctx) != TX_CXT_SUCCESS) { 133 freemsg(mp); 134 return (B_TRUE); 135 } 136 137 if ((ctx->lso_flag && 138 (mbsize > (ctx->mac_hdr_len + IGB_LSO_MAXLEN))) || 139 (!ctx->lso_flag && 140 (mbsize > (igb->max_frame_size - ETHERFCSL)))) { 141 freemsg(mp); 142 igb_log(igb, IGB_LOG_INFO, "igb_tx: packet oversize"); 143 return (B_TRUE); 144 } 145 } else { 146 ctx = NULL; 147 if (mbsize > (igb->max_frame_size - ETHERFCSL)) { 148 freemsg(mp); 149 igb_log(igb, IGB_LOG_INFO, "igb_tx: packet oversize"); 150 return (B_TRUE); 151 } 152 } 153 154 /* 155 * Check and recycle tx descriptors. 156 * The recycle threshold here should be selected carefully 157 */ 158 if (tx_ring->tbd_free < igb->tx_recycle_thresh) 159 tx_ring->tx_recycle(tx_ring); 160 161 /* 162 * After the recycling, if the tbd_free is less than the 163 * tx_overload_threshold, assert overload, return B_FALSE; 164 * and we need to re-schedule the tx again. 165 */ 166 if (tx_ring->tbd_free < igb->tx_overload_thresh) { 167 tx_ring->reschedule = B_TRUE; 168 IGB_DEBUG_STAT(tx_ring->stat_overload); 169 return (B_FALSE); 170 } 171 172 /* 173 * The software should guarantee LSO packet header(MAC+IP+TCP) 174 * to be within one descriptor - this is required by h/w. 175 * Here will reallocate and refill the header if 176 * the headers(MAC+IP+TCP) is physical memory non-contiguous. 177 */ 178 if (ctx && ctx->lso_flag) { 179 hdr_len = ctx->mac_hdr_len + ctx->ip_hdr_len + ctx->l4_hdr_len; 180 len = MBLKL(mp); 181 hdr_current_mp = mp; 182 while (len < hdr_len) { 183 hdr_previous_mp = hdr_current_mp; 184 hdr_current_mp = hdr_current_mp->b_cont; 185 len += MBLKL(hdr_current_mp); 186 } 187 /* 188 * If the header and the payload are in different mblks, 189 * we simply force the header to be copied into pre-allocated 190 * page-aligned buffer. 191 */ 192 if (len == hdr_len) 193 goto adjust_threshold; 194 195 hdr_frag_len = hdr_len - (len - MBLKL(hdr_current_mp)); 196 /* 197 * There are two cases we will reallocate 198 * a mblk for the last header fragment. 199 * 1. the header is in multiple mblks and 200 * the last fragment shares the same mblk 201 * with the payload 202 * 2. the header is in a single mblk shared 203 * with the payload but the header crosses 204 * a page. 205 */ 206 if ((hdr_current_mp != mp) || 207 (P2NPHASE((uintptr_t)hdr_current_mp->b_rptr, igb->page_size) 208 < hdr_len)) { 209 /* 210 * reallocate the mblk for the last header fragment, 211 * expect it to be copied into pre-allocated 212 * page-aligned buffer 213 */ 214 hdr_new_mp = allocb(hdr_frag_len, 0); 215 if (!hdr_new_mp) { 216 return (B_FALSE); 217 } 218 219 /* link the new header fragment with the other parts */ 220 bcopy(hdr_current_mp->b_rptr, 221 hdr_new_mp->b_rptr, hdr_frag_len); 222 hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 223 hdr_new_mp->b_cont = hdr_current_mp; 224 if (hdr_previous_mp) 225 hdr_previous_mp->b_cont = hdr_new_mp; 226 else 227 mp = hdr_new_mp; 228 hdr_current_mp->b_rptr += hdr_frag_len; 229 } 230 adjust_threshold: 231 /* 232 * adjust the bcopy threshhold to guarantee 233 * the header to use bcopy way 234 */ 235 if (copy_thresh < hdr_len) 236 copy_thresh = hdr_len; 237 } 238 239 /* 240 * The pending_list is a linked list that is used to save 241 * the tx control blocks that have packet data processed 242 * but have not put the data to the tx descriptor ring. 243 * It is used to reduce the lock contention of the tx_lock. 244 */ 245 LINK_LIST_INIT(&pending_list); 246 desc_num = 0; 247 desc_total = 0; 248 249 current_mp = mp; 250 current_len = MBLKL(current_mp); 251 /* 252 * Decide which method to use for the first fragment 253 */ 254 current_flag = (current_len <= copy_thresh) ? 255 USE_COPY : USE_DMA; 256 /* 257 * If the mblk includes several contiguous small fragments, 258 * they may be copied into one buffer. This flag is used to 259 * indicate whether there are pending fragments that need to 260 * be copied to the current tx buffer. 261 * 262 * If this flag is B_TRUE, it indicates that a new tx control 263 * block is needed to process the next fragment using either 264 * copy or DMA binding. 265 * 266 * Otherwise, it indicates that the next fragment will be 267 * copied to the current tx buffer that is maintained by the 268 * current tx control block. No new tx control block is needed. 269 */ 270 copy_done = B_TRUE; 271 while (current_mp) { 272 next_mp = current_mp->b_cont; 273 eop = (next_mp == NULL); /* Last fragment of the packet? */ 274 next_len = eop ? 0: MBLKL(next_mp); 275 276 /* 277 * When the current fragment is an empty fragment, if 278 * the next fragment will still be copied to the current 279 * tx buffer, we cannot skip this fragment here. Because 280 * the copy processing is pending for completion. We have 281 * to process this empty fragment in the tx_copy routine. 282 * 283 * If the copy processing is completed or a DMA binding 284 * processing is just completed, we can just skip this 285 * empty fragment. 286 */ 287 if ((current_len == 0) && (copy_done)) { 288 current_mp = next_mp; 289 current_len = next_len; 290 current_flag = (current_len <= copy_thresh) ? 291 USE_COPY : USE_DMA; 292 continue; 293 } 294 295 if (copy_done) { 296 /* 297 * Get a new tx control block from the free list 298 */ 299 tcb = igb_get_free_list(tx_ring); 300 301 if (tcb == NULL) { 302 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 303 goto tx_failure; 304 } 305 306 /* 307 * Push the tx control block to the pending list 308 * to avoid using lock too early 309 */ 310 LIST_PUSH_TAIL(&pending_list, &tcb->link); 311 } 312 313 if (current_flag == USE_COPY) { 314 /* 315 * Check whether to use bcopy or DMA binding to process 316 * the next fragment, and if using bcopy, whether we 317 * need to continue copying the next fragment into the 318 * current tx buffer. 319 */ 320 ASSERT((tcb->tx_buf.len + current_len) <= 321 tcb->tx_buf.size); 322 323 if (eop) { 324 /* 325 * This is the last fragment of the packet, so 326 * the copy processing will be completed with 327 * this fragment. 328 */ 329 next_flag = USE_NONE; 330 copy_done = B_TRUE; 331 } else if ((tcb->tx_buf.len + current_len + next_len) > 332 tcb->tx_buf.size) { 333 /* 334 * If the next fragment is too large to be 335 * copied to the current tx buffer, we need 336 * to complete the current copy processing. 337 */ 338 next_flag = (next_len > copy_thresh) ? 339 USE_DMA: USE_COPY; 340 copy_done = B_TRUE; 341 } else if (next_len > copy_thresh) { 342 /* 343 * The next fragment needs to be processed with 344 * DMA binding. So the copy prcessing will be 345 * completed with the current fragment. 346 */ 347 next_flag = USE_DMA; 348 copy_done = B_TRUE; 349 } else { 350 /* 351 * Continue to copy the next fragment to the 352 * current tx buffer. 353 */ 354 next_flag = USE_COPY; 355 copy_done = B_FALSE; 356 } 357 358 desc_num = igb_tx_copy(tx_ring, tcb, current_mp, 359 current_len, copy_done); 360 } else { 361 /* 362 * Check whether to use bcopy or DMA binding to process 363 * the next fragment. 364 */ 365 next_flag = (next_len > copy_thresh) ? 366 USE_DMA: USE_COPY; 367 ASSERT(copy_done == B_TRUE); 368 369 desc_num = igb_tx_bind(tx_ring, tcb, current_mp, 370 current_len); 371 } 372 373 if (desc_num > 0) 374 desc_total += desc_num; 375 else if (desc_num < 0) 376 goto tx_failure; 377 378 current_mp = next_mp; 379 current_len = next_len; 380 current_flag = next_flag; 381 } 382 383 /* 384 * Attach the mblk to the last tx control block 385 */ 386 ASSERT(tcb); 387 ASSERT(tcb->mp == NULL); 388 tcb->mp = mp; 389 390 /* 391 * Before fill the tx descriptor ring with the data, we need to 392 * ensure there are adequate free descriptors for transmit 393 * (including one context descriptor). 394 * Do not use up all the tx descriptors. 395 * Otherwise tx recycle will fail and cause false hang. 396 */ 397 if (tx_ring->tbd_free <= (desc_total + 1)) { 398 tx_ring->tx_recycle(tx_ring); 399 } 400 401 mutex_enter(&tx_ring->tx_lock); 402 403 /* 404 * If the number of free tx descriptors is not enough for transmit 405 * then return failure. 406 * 407 * Note: we must put this check under the mutex protection to 408 * ensure the correctness when multiple threads access it in 409 * parallel. 410 */ 411 if (tx_ring->tbd_free <= (desc_total + 1)) { 412 IGB_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 413 mutex_exit(&tx_ring->tx_lock); 414 goto tx_failure; 415 } 416 417 desc_num = igb_tx_fill_ring(tx_ring, &pending_list, ctx, mbsize); 418 419 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 420 421 /* Update per-ring tx statistics */ 422 tx_ring->tx_pkts++; 423 tx_ring->tx_bytes += mbsize; 424 425 mutex_exit(&tx_ring->tx_lock); 426 427 return (B_TRUE); 428 429 tx_failure: 430 /* 431 * If new mblk has been allocted for the last header 432 * fragment of a LSO packet, we should restore the 433 * modified mp. 434 */ 435 if (hdr_new_mp) { 436 hdr_new_mp->b_cont = NULL; 437 freeb(hdr_new_mp); 438 hdr_current_mp->b_rptr -= hdr_frag_len; 439 if (hdr_previous_mp) 440 hdr_previous_mp->b_cont = hdr_current_mp; 441 else 442 mp = hdr_current_mp; 443 } 444 445 /* 446 * Discard the mblk and free the used resources 447 */ 448 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 449 while (tcb) { 450 tcb->mp = NULL; 451 452 igb_free_tcb(tcb); 453 454 tcb = (tx_control_block_t *) 455 LIST_GET_NEXT(&pending_list, &tcb->link); 456 } 457 458 /* 459 * Return the tx control blocks in the pending list to the free list. 460 */ 461 igb_put_free_list(tx_ring, &pending_list); 462 463 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 464 tx_ring->reschedule = B_TRUE; 465 466 return (B_FALSE); 467 } 468 469 /* 470 * igb_tx_copy 471 * 472 * Copy the mblk fragment to the pre-allocated tx buffer 473 */ 474 static int 475 igb_tx_copy(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 476 uint32_t len, boolean_t copy_done) 477 { 478 dma_buffer_t *tx_buf; 479 uint32_t desc_num; 480 _NOTE(ARGUNUSED(tx_ring)); 481 482 tx_buf = &tcb->tx_buf; 483 484 /* 485 * Copy the packet data of the mblk fragment into the 486 * pre-allocated tx buffer, which is maintained by the 487 * tx control block. 488 * 489 * Several mblk fragments can be copied into one tx buffer. 490 * The destination address of the current copied fragment in 491 * the tx buffer is next to the end of the previous copied 492 * fragment. 493 */ 494 if (len > 0) { 495 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 496 497 tx_buf->len += len; 498 tcb->frag_num++; 499 } 500 501 desc_num = 0; 502 503 /* 504 * If it is the last fragment copied to the current tx buffer, 505 * in other words, if there's no remaining fragment or the remaining 506 * fragment requires a new tx control block to process, we need to 507 * complete the current copy processing by syncing up the current 508 * DMA buffer and saving the descriptor data. 509 */ 510 if (copy_done) { 511 /* 512 * Sync the DMA buffer of the packet data 513 */ 514 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 515 516 tcb->tx_type = USE_COPY; 517 518 /* 519 * Save the address and length to the private data structure 520 * of the tx control block, which will be used to fill the 521 * tx descriptor ring after all the fragments are processed. 522 */ 523 igb_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 524 desc_num++; 525 } 526 527 return (desc_num); 528 } 529 530 /* 531 * igb_tx_bind 532 * 533 * Bind the mblk fragment with DMA 534 */ 535 static int 536 igb_tx_bind(igb_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 537 uint32_t len) 538 { 539 int status, i; 540 ddi_dma_cookie_t dma_cookie; 541 uint_t ncookies; 542 int desc_num; 543 544 /* 545 * Use DMA binding to process the mblk fragment 546 */ 547 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 548 (caddr_t)mp->b_rptr, len, 549 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 550 0, &dma_cookie, &ncookies); 551 552 if (status != DDI_DMA_MAPPED) { 553 IGB_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 554 return (-1); 555 } 556 557 tcb->frag_num++; 558 tcb->tx_type = USE_DMA; 559 /* 560 * Each fragment can span several cookies. One cookie will have 561 * one tx descriptor to transmit. 562 */ 563 desc_num = 0; 564 for (i = ncookies; i > 0; i--) { 565 /* 566 * Save the address and length to the private data structure 567 * of the tx control block, which will be used to fill the 568 * tx descriptor ring after all the fragments are processed. 569 */ 570 igb_save_desc(tcb, 571 dma_cookie.dmac_laddress, 572 dma_cookie.dmac_size); 573 574 desc_num++; 575 576 if (i > 1) 577 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 578 } 579 580 return (desc_num); 581 } 582 583 /* 584 * igb_get_tx_context 585 * 586 * Get the tx context information from the mblk 587 */ 588 static int 589 igb_get_tx_context(mblk_t *mp, tx_context_t *ctx) 590 { 591 uint32_t start; 592 uint32_t flags; 593 uint32_t lso_flag; 594 uint32_t lso_cksum; 595 uint32_t mss; 596 uint32_t len; 597 uint32_t size; 598 uint32_t offset; 599 unsigned char *pos; 600 ushort_t etype; 601 uint32_t mac_hdr_len; 602 uint32_t l4_proto; 603 uint32_t l4_hdr_len; 604 605 ASSERT(mp != NULL); 606 607 mac_hcksum_get(mp, &start, NULL, NULL, NULL, &flags); 608 bzero(ctx, sizeof (tx_context_t)); 609 610 ctx->hcksum_flags = flags; 611 612 if (flags == 0) 613 return (TX_CXT_SUCCESS); 614 615 mac_lso_get(mp, &mss, &lso_flag); 616 ctx->mss = mss; 617 ctx->lso_flag = (lso_flag == HW_LSO); 618 619 etype = 0; 620 mac_hdr_len = 0; 621 l4_proto = 0; 622 623 /* 624 * Firstly get the position of the ether_type/ether_tpid. 625 * Here we don't assume the ether (VLAN) header is fully included 626 * in one mblk fragment, so we go thourgh the fragments to parse 627 * the ether type. 628 */ 629 size = len = MBLKL(mp); 630 offset = offsetof(struct ether_header, ether_type); 631 while (size <= offset) { 632 mp = mp->b_cont; 633 ASSERT(mp != NULL); 634 len = MBLKL(mp); 635 size += len; 636 } 637 pos = mp->b_rptr + offset + len - size; 638 639 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 640 if (etype == ETHERTYPE_VLAN) { 641 /* 642 * Get the position of the ether_type in VLAN header 643 */ 644 offset = offsetof(struct ether_vlan_header, ether_type); 645 while (size <= offset) { 646 mp = mp->b_cont; 647 ASSERT(mp != NULL); 648 len = MBLKL(mp); 649 size += len; 650 } 651 pos = mp->b_rptr + offset + len - size; 652 653 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 654 mac_hdr_len = sizeof (struct ether_vlan_header); 655 } else { 656 mac_hdr_len = sizeof (struct ether_header); 657 } 658 659 /* 660 * Here we assume the IP(V6) header is fully included in one 661 * mblk fragment. 662 */ 663 lso_cksum = HCK_PARTIALCKSUM; 664 ctx->l3_proto = etype; 665 switch (etype) { 666 case ETHERTYPE_IP: 667 offset = mac_hdr_len; 668 while (size <= offset) { 669 mp = mp->b_cont; 670 ASSERT(mp != NULL); 671 len = MBLKL(mp); 672 size += len; 673 } 674 pos = mp->b_rptr + offset + len - size; 675 676 if (ctx->lso_flag) { 677 *((uint16_t *)(uintptr_t)(pos + offsetof(ipha_t, 678 ipha_length))) = 0; 679 680 /* 681 * To utilize igb LSO, here need to fill 682 * the tcp checksum field of the packet with the 683 * following pseudo-header checksum: 684 * (ip_source_addr, ip_destination_addr, l4_proto) 685 * and also need to fill the ip header checksum 686 * with zero. Currently the tcp/ip stack has done 687 * these. 688 */ 689 lso_cksum |= HCK_IPV4_HDRCKSUM; 690 } 691 692 l4_proto = *(uint8_t *)(pos + offsetof(ipha_t, ipha_protocol)); 693 break; 694 case ETHERTYPE_IPV6: 695 /* 696 * We need to zero out the length in the header. 697 */ 698 if (ctx->lso_flag) { 699 offset = offsetof(ip6_t, ip6_plen) + mac_hdr_len; 700 while (size <= offset) { 701 mp = mp->b_cont; 702 ASSERT(mp != NULL); 703 len = MBLKL(mp); 704 size += len; 705 } 706 pos = mp->b_rptr + offset + len - size; 707 *((uint16_t *)(uintptr_t)(pos)) = 0; 708 } 709 710 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 711 while (size <= offset) { 712 mp = mp->b_cont; 713 ASSERT(mp != NULL); 714 len = MBLKL(mp); 715 size += len; 716 } 717 pos = mp->b_rptr + offset + len - size; 718 719 l4_proto = *(uint8_t *)pos; 720 break; 721 default: 722 /* Unrecoverable error */ 723 igb_log(NULL, IGB_LOG_INFO, "Ethernet type field error with " 724 "tx hcksum flag set"); 725 return (TX_CXT_E_ETHER_TYPE); 726 } 727 728 if (ctx->lso_flag) { 729 /* 730 * LSO relies on tx h/w checksum, so here the packet will be 731 * dropped if the h/w checksum flags are not set. 732 */ 733 if ((ctx->hcksum_flags & lso_cksum) != lso_cksum) { 734 igb_log(NULL, IGB_LOG_INFO, "igb_tx: h/w " 735 "checksum flags are not set for LSO, found " 736 "0x%x, needed bits 0x%x", ctx->hcksum_flags, 737 lso_cksum); 738 return (TX_CXT_E_LSO_CSUM); 739 } 740 741 offset = mac_hdr_len + start; 742 while (size <= offset) { 743 mp = mp->b_cont; 744 ASSERT(mp != NULL); 745 len = MBLKL(mp); 746 size += len; 747 } 748 pos = mp->b_rptr + offset + len - size; 749 750 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 751 } else { 752 /* 753 * l4 header length is only required for LSO 754 */ 755 l4_hdr_len = 0; 756 } 757 758 ctx->mac_hdr_len = mac_hdr_len; 759 ctx->ip_hdr_len = start; 760 ctx->l4_proto = l4_proto; 761 ctx->l4_hdr_len = l4_hdr_len; 762 763 return (TX_CXT_SUCCESS); 764 } 765 766 /* 767 * igb_check_tx_context 768 * 769 * Check if a new context descriptor is needed 770 */ 771 static boolean_t 772 igb_check_tx_context(igb_tx_ring_t *tx_ring, tx_context_t *ctx) 773 { 774 tx_context_t *last; 775 776 if (ctx == NULL) 777 return (B_FALSE); 778 779 /* 780 * Compare the context data retrieved from the mblk and the 781 * stored context data of the last context descriptor. The data 782 * need to be checked are: 783 * hcksum_flags 784 * l4_proto 785 * l3_proto 786 * mss (only check for LSO) 787 * l4_hdr_len (only check for LSO) 788 * ip_hdr_len 789 * mac_hdr_len 790 * Either one of the above data is changed, a new context descriptor 791 * will be needed. 792 */ 793 last = &tx_ring->tx_context; 794 795 if (ctx->hcksum_flags != 0) { 796 if ((ctx->hcksum_flags != last->hcksum_flags) || 797 (ctx->l4_proto != last->l4_proto) || 798 (ctx->l3_proto != last->l3_proto) || 799 (ctx->lso_flag && ((ctx->mss != last->mss) || 800 (ctx->l4_hdr_len != last->l4_hdr_len))) || 801 (ctx->ip_hdr_len != last->ip_hdr_len) || 802 (ctx->mac_hdr_len != last->mac_hdr_len)) { 803 return (B_TRUE); 804 } 805 } 806 807 return (B_FALSE); 808 } 809 810 /* 811 * igb_fill_tx_context 812 * 813 * Fill the context descriptor with hardware checksum informations 814 */ 815 static void 816 igb_fill_tx_context(struct e1000_adv_tx_context_desc *ctx_tbd, 817 tx_context_t *ctx, uint32_t ring_index) 818 { 819 /* 820 * Fill the context descriptor with the checksum 821 * context information we've got 822 */ 823 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 824 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 825 E1000_ADVTXD_MACLEN_SHIFT; 826 827 ctx_tbd->type_tucmd_mlhl = 828 E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_CTXT; 829 830 /* 831 * When we have a TX context set up, we enforce that the ethertype is 832 * either IPv4 or IPv6 in igb_get_tx_context(). 833 */ 834 if (ctx->lso_flag || ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) { 835 if (ctx->l3_proto == ETHERTYPE_IP) { 836 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV4; 837 } else { 838 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; 839 } 840 } 841 842 if (ctx->lso_flag || ctx->hcksum_flags & HCK_PARTIALCKSUM) { 843 switch (ctx->l4_proto) { 844 case IPPROTO_TCP: 845 ctx_tbd->type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP; 846 break; 847 case IPPROTO_UDP: 848 /* 849 * We don't have to explicitly set: 850 * ctx_tbd->type_tucmd_mlhl |= 851 * E1000_ADVTXD_TUCMD_L4T_UDP; 852 * Because E1000_ADVTXD_TUCMD_L4T_UDP == 0b 853 */ 854 break; 855 default: 856 /* Unrecoverable error */ 857 igb_log(NULL, IGB_LOG_INFO, 858 "L4 type error with tx hcksum"); 859 break; 860 } 861 } 862 863 ctx_tbd->seqnum_seed = 0; 864 ctx_tbd->mss_l4len_idx = ring_index << 4; 865 if (ctx->lso_flag) { 866 ctx_tbd->mss_l4len_idx |= 867 (ctx->l4_hdr_len << E1000_ADVTXD_L4LEN_SHIFT) | 868 (ctx->mss << E1000_ADVTXD_MSS_SHIFT); 869 } 870 } 871 872 /* 873 * igb_tx_fill_ring 874 * 875 * Fill the tx descriptor ring with the data 876 */ 877 static int 878 igb_tx_fill_ring(igb_tx_ring_t *tx_ring, link_list_t *pending_list, 879 tx_context_t *ctx, size_t mbsize) 880 { 881 struct e1000_hw *hw = &tx_ring->igb->hw; 882 boolean_t load_context; 883 uint32_t index, tcb_index, desc_num; 884 union e1000_adv_tx_desc *tbd, *first_tbd; 885 tx_control_block_t *tcb, *first_tcb; 886 uint32_t hcksum_flags; 887 int i; 888 igb_t *igb = tx_ring->igb; 889 890 ASSERT(mutex_owned(&tx_ring->tx_lock)); 891 892 tbd = NULL; 893 first_tbd = NULL; 894 first_tcb = NULL; 895 desc_num = 0; 896 hcksum_flags = 0; 897 load_context = B_FALSE; 898 899 /* 900 * Get the index of the first tx descriptor that will be filled, 901 * and the index of the first work list item that will be attached 902 * with the first used tx control block in the pending list. 903 * Note: the two indexes are the same. 904 */ 905 index = tx_ring->tbd_tail; 906 tcb_index = tx_ring->tbd_tail; 907 908 if (ctx != NULL) { 909 hcksum_flags = ctx->hcksum_flags; 910 911 /* 912 * Check if a new context descriptor is needed for this packet 913 */ 914 load_context = igb_check_tx_context(tx_ring, ctx); 915 if (load_context) { 916 tbd = &tx_ring->tbd_ring[index]; 917 918 /* 919 * Fill the context descriptor with the 920 * hardware checksum offload informations. 921 */ 922 igb_fill_tx_context( 923 (struct e1000_adv_tx_context_desc *)tbd, 924 ctx, tx_ring->index); 925 926 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 927 desc_num++; 928 929 /* 930 * Store the checksum context data if 931 * a new context descriptor is added 932 */ 933 tx_ring->tx_context = *ctx; 934 } 935 } 936 937 first_tbd = &tx_ring->tbd_ring[index]; 938 939 /* 940 * Fill tx data descriptors with the data saved in the pending list. 941 * The tx control blocks in the pending list are added to the work list 942 * at the same time. 943 * 944 * The work list is strictly 1:1 corresponding to the descriptor ring. 945 * One item of the work list corresponds to one tx descriptor. Because 946 * one tx control block can span multiple tx descriptors, the tx 947 * control block will be added to the first work list item that 948 * corresponds to the first tx descriptor generated from that tx 949 * control block. 950 */ 951 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 952 first_tcb = tcb; 953 while (tcb != NULL) { 954 955 for (i = 0; i < tcb->desc_num; i++) { 956 tbd = &tx_ring->tbd_ring[index]; 957 958 tbd->read.buffer_addr = tcb->desc[i].address; 959 tbd->read.cmd_type_len = tcb->desc[i].length; 960 961 tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_RS | 962 E1000_ADVTXD_DCMD_DEXT | E1000_ADVTXD_DTYP_DATA | 963 E1000_ADVTXD_DCMD_IFCS; 964 965 tbd->read.olinfo_status = 0; 966 967 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 968 desc_num++; 969 } 970 971 /* 972 * Add the tx control block to the work list 973 */ 974 ASSERT(tx_ring->work_list[tcb_index] == NULL); 975 tx_ring->work_list[tcb_index] = tcb; 976 977 tcb_index = index; 978 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 979 } 980 981 if (load_context) { 982 /* 983 * Count the checksum context descriptor for 984 * the first tx control block. 985 */ 986 first_tcb->desc_num++; 987 } 988 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 989 990 /* 991 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 992 * valid in the first descriptor of the packet. 993 * 82576 also requires the payload length setting even without LSO 994 */ 995 ASSERT(first_tbd != NULL); 996 first_tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_IFCS; 997 if (ctx != NULL && ctx->lso_flag) { 998 first_tbd->read.cmd_type_len |= E1000_ADVTXD_DCMD_TSE; 999 first_tbd->read.olinfo_status |= 1000 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1001 - ctx->l4_hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT; 1002 } else { 1003 if (hw->mac.type >= e1000_82576) { 1004 first_tbd->read.olinfo_status |= 1005 (mbsize << E1000_ADVTXD_PAYLEN_SHIFT); 1006 } 1007 } 1008 1009 /* Set hardware checksum bits */ 1010 if (hcksum_flags != 0) { 1011 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1012 first_tbd->read.olinfo_status |= 1013 E1000_TXD_POPTS_IXSM << 8; 1014 if (hcksum_flags & HCK_PARTIALCKSUM) 1015 first_tbd->read.olinfo_status |= 1016 E1000_TXD_POPTS_TXSM << 8; 1017 first_tbd->read.olinfo_status |= tx_ring->index << 4; 1018 } 1019 1020 /* 1021 * The last descriptor of packet needs End Of Packet (EOP), 1022 * and Report Status (RS) bits set 1023 */ 1024 ASSERT(tbd != NULL); 1025 tbd->read.cmd_type_len |= 1026 E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS; 1027 1028 IGB_DEBUG_STAT(tx_ring->stat_pkt_cnt); 1029 1030 /* 1031 * Sync the DMA buffer of the tx descriptor ring 1032 */ 1033 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1034 1035 /* 1036 * Update the number of the free tx descriptors. 1037 * The mutual exclusion between the transmission and the recycling 1038 * (for the tx descriptor ring and the work list) is implemented 1039 * with the atomic operation on the number of the free tx descriptors. 1040 * 1041 * Note: we should always decrement the counter tbd_free before 1042 * advancing the hardware TDT pointer to avoid the race condition - 1043 * before the counter tbd_free is decremented, the transmit of the 1044 * tx descriptors has done and the counter tbd_free is increased by 1045 * the tx recycling. 1046 */ 1047 i = igb_atomic_reserve(&tx_ring->tbd_free, desc_num); 1048 ASSERT(i >= 0); 1049 1050 tx_ring->tbd_tail = index; 1051 1052 /* 1053 * Advance the hardware TDT pointer of the tx descriptor ring 1054 */ 1055 E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), index); 1056 1057 if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) { 1058 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 1059 atomic_or_32(&igb->igb_state, IGB_ERROR); 1060 } 1061 1062 return (desc_num); 1063 } 1064 1065 /* 1066 * igb_save_desc 1067 * 1068 * Save the address/length pair to the private array 1069 * of the tx control block. The address/length pairs 1070 * will be filled into the tx descriptor ring later. 1071 */ 1072 static void 1073 igb_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1074 { 1075 sw_desc_t *desc; 1076 1077 desc = &tcb->desc[tcb->desc_num]; 1078 desc->address = address; 1079 desc->length = length; 1080 1081 tcb->desc_num++; 1082 } 1083 1084 /* 1085 * igb_tx_recycle_legacy 1086 * 1087 * Recycle the tx descriptors and tx control blocks. 1088 * 1089 * The work list is traversed to check if the corresponding 1090 * tx descriptors have been transmitted. If so, the resources 1091 * bound to the tx control blocks will be freed, and those 1092 * tx control blocks will be returned to the free list. 1093 */ 1094 uint32_t 1095 igb_tx_recycle_legacy(igb_tx_ring_t *tx_ring) 1096 { 1097 uint32_t index, last_index, next_index; 1098 int desc_num; 1099 boolean_t desc_done; 1100 tx_control_block_t *tcb; 1101 link_list_t pending_list; 1102 igb_t *igb = tx_ring->igb; 1103 1104 /* 1105 * The mutex_tryenter() is used to avoid unnecessary 1106 * lock contention. 1107 */ 1108 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 1109 return (0); 1110 1111 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1112 1113 if (tx_ring->tbd_free == tx_ring->ring_size) { 1114 tx_ring->recycle_fail = 0; 1115 tx_ring->stall_watchdog = 0; 1116 mutex_exit(&tx_ring->recycle_lock); 1117 return (0); 1118 } 1119 1120 /* 1121 * Sync the DMA buffer of the tx descriptor ring 1122 */ 1123 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1124 1125 if (igb_check_dma_handle( 1126 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1127 mutex_exit(&tx_ring->recycle_lock); 1128 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 1129 atomic_or_32(&igb->igb_state, IGB_ERROR); 1130 return (0); 1131 } 1132 1133 LINK_LIST_INIT(&pending_list); 1134 desc_num = 0; 1135 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1136 1137 tcb = tx_ring->work_list[index]; 1138 ASSERT(tcb != NULL); 1139 1140 while (tcb != NULL) { 1141 1142 /* 1143 * Get the last tx descriptor of this packet. 1144 * If the last tx descriptor is done, then 1145 * we can recycle all descriptors of a packet 1146 * which usually includes several tx control blocks. 1147 * For some chips, LSO descriptors can not be recycled 1148 * unless the whole packet's transmission is done. 1149 * That's why packet level recycling is used here. 1150 */ 1151 last_index = tcb->last_index; 1152 /* 1153 * MAX_TX_RING_SIZE is used to judge whether 1154 * the index is a valid value or not. 1155 */ 1156 if (last_index == MAX_TX_RING_SIZE) 1157 break; 1158 1159 next_index = NEXT_INDEX(last_index, 1, tx_ring->ring_size); 1160 1161 /* 1162 * Check if the Descriptor Done bit is set 1163 */ 1164 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1165 E1000_TXD_STAT_DD; 1166 if (desc_done) { 1167 while (tcb != NULL) { 1168 /* 1169 * Strip off the tx control block from the work 1170 * list, and add it to the pending list. 1171 */ 1172 tx_ring->work_list[index] = NULL; 1173 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1174 1175 /* 1176 * Count the total number of the tx descriptors 1177 * recycled. 1178 */ 1179 desc_num += tcb->desc_num; 1180 1181 /* 1182 * Advance the index of the tx descriptor ring 1183 */ 1184 index = NEXT_INDEX(index, tcb->desc_num, 1185 tx_ring->ring_size); 1186 1187 tcb = tx_ring->work_list[index]; 1188 if (index == next_index) 1189 break; 1190 } 1191 } else { 1192 break; 1193 } 1194 } 1195 1196 /* 1197 * If no tx descriptors are recycled, no need to do more processing 1198 */ 1199 if (desc_num == 0) { 1200 tx_ring->recycle_fail++; 1201 mutex_exit(&tx_ring->recycle_lock); 1202 return (0); 1203 } 1204 1205 tx_ring->recycle_fail = 0; 1206 tx_ring->stall_watchdog = 0; 1207 1208 /* 1209 * Update the head index of the tx descriptor ring 1210 */ 1211 tx_ring->tbd_head = index; 1212 1213 /* 1214 * Update the number of the free tx descriptors with atomic operations 1215 */ 1216 atomic_add_32(&tx_ring->tbd_free, desc_num); 1217 1218 mutex_exit(&tx_ring->recycle_lock); 1219 1220 /* 1221 * Free the resources used by the tx control blocks 1222 * in the pending list 1223 */ 1224 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1225 while (tcb != NULL) { 1226 /* 1227 * Release the resources occupied by the tx control block 1228 */ 1229 igb_free_tcb(tcb); 1230 1231 tcb = (tx_control_block_t *) 1232 LIST_GET_NEXT(&pending_list, &tcb->link); 1233 } 1234 1235 /* 1236 * Add the tx control blocks in the pending list to the free list. 1237 */ 1238 igb_put_free_list(tx_ring, &pending_list); 1239 1240 return (desc_num); 1241 } 1242 1243 /* 1244 * igb_tx_recycle_head_wb 1245 * 1246 * Check the head write-back, and recycle all the transmitted 1247 * tx descriptors and tx control blocks. 1248 */ 1249 uint32_t 1250 igb_tx_recycle_head_wb(igb_tx_ring_t *tx_ring) 1251 { 1252 uint32_t index; 1253 uint32_t head_wb; 1254 int desc_num; 1255 tx_control_block_t *tcb; 1256 link_list_t pending_list; 1257 igb_t *igb = tx_ring->igb; 1258 1259 /* 1260 * The mutex_tryenter() is used to avoid unnecessary 1261 * lock contention. 1262 */ 1263 if (mutex_tryenter(&tx_ring->recycle_lock) == 0) 1264 return (0); 1265 1266 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1267 1268 if (tx_ring->tbd_free == tx_ring->ring_size) { 1269 tx_ring->recycle_fail = 0; 1270 tx_ring->stall_watchdog = 0; 1271 mutex_exit(&tx_ring->recycle_lock); 1272 return (0); 1273 } 1274 1275 /* 1276 * Sync the DMA buffer of the tx descriptor ring 1277 * 1278 * Note: For head write-back mode, the tx descriptors will not 1279 * be written back, but the head write-back value is stored at 1280 * the last extra tbd at the end of the DMA area, we still need 1281 * to sync the head write-back value for kernel. 1282 * 1283 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1284 */ 1285 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1286 sizeof (union e1000_adv_tx_desc) * tx_ring->ring_size, 1287 sizeof (uint32_t), 1288 DDI_DMA_SYNC_FORKERNEL); 1289 1290 if (igb_check_dma_handle( 1291 tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1292 mutex_exit(&tx_ring->recycle_lock); 1293 ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED); 1294 atomic_or_32(&igb->igb_state, IGB_ERROR); 1295 return (0); 1296 } 1297 1298 LINK_LIST_INIT(&pending_list); 1299 desc_num = 0; 1300 index = tx_ring->tbd_head; /* Next index to clean */ 1301 1302 /* 1303 * Get the value of head write-back 1304 */ 1305 head_wb = *tx_ring->tbd_head_wb; 1306 while (index != head_wb) { 1307 tcb = tx_ring->work_list[index]; 1308 ASSERT(tcb != NULL); 1309 1310 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1311 tcb->desc_num) { 1312 /* 1313 * The current tx control block is not 1314 * completely transmitted, stop recycling 1315 */ 1316 break; 1317 } 1318 1319 /* 1320 * Strip off the tx control block from the work list, 1321 * and add it to the pending list. 1322 */ 1323 tx_ring->work_list[index] = NULL; 1324 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1325 1326 /* 1327 * Advance the index of the tx descriptor ring 1328 */ 1329 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1330 1331 /* 1332 * Count the total number of the tx descriptors recycled 1333 */ 1334 desc_num += tcb->desc_num; 1335 } 1336 1337 /* 1338 * If no tx descriptors are recycled, no need to do more processing 1339 */ 1340 if (desc_num == 0) { 1341 tx_ring->recycle_fail++; 1342 mutex_exit(&tx_ring->recycle_lock); 1343 return (0); 1344 } 1345 1346 tx_ring->recycle_fail = 0; 1347 tx_ring->stall_watchdog = 0; 1348 1349 /* 1350 * Update the head index of the tx descriptor ring 1351 */ 1352 tx_ring->tbd_head = index; 1353 1354 /* 1355 * Update the number of the free tx descriptors with atomic operations 1356 */ 1357 atomic_add_32(&tx_ring->tbd_free, desc_num); 1358 1359 mutex_exit(&tx_ring->recycle_lock); 1360 1361 /* 1362 * Free the resources used by the tx control blocks 1363 * in the pending list 1364 */ 1365 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1366 while (tcb) { 1367 /* 1368 * Release the resources occupied by the tx control block 1369 */ 1370 igb_free_tcb(tcb); 1371 1372 tcb = (tx_control_block_t *) 1373 LIST_GET_NEXT(&pending_list, &tcb->link); 1374 } 1375 1376 /* 1377 * Add the tx control blocks in the pending list to the free list. 1378 */ 1379 igb_put_free_list(tx_ring, &pending_list); 1380 1381 return (desc_num); 1382 } 1383 1384 /* 1385 * igb_free_tcb - free up the tx control block 1386 * 1387 * Free the resources of the tx control block, including 1388 * unbind the previously bound DMA handle, and reset other 1389 * control fields. 1390 */ 1391 void 1392 igb_free_tcb(tx_control_block_t *tcb) 1393 { 1394 switch (tcb->tx_type) { 1395 case USE_COPY: 1396 /* 1397 * Reset the buffer length that is used for copy 1398 */ 1399 tcb->tx_buf.len = 0; 1400 break; 1401 case USE_DMA: 1402 /* 1403 * Release the DMA resource that is used for 1404 * DMA binding. 1405 */ 1406 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1407 break; 1408 default: 1409 break; 1410 } 1411 1412 /* 1413 * Free the mblk 1414 */ 1415 if (tcb->mp != NULL) { 1416 freemsg(tcb->mp); 1417 tcb->mp = NULL; 1418 } 1419 1420 tcb->tx_type = USE_NONE; 1421 tcb->last_index = MAX_TX_RING_SIZE; 1422 tcb->frag_num = 0; 1423 tcb->desc_num = 0; 1424 } 1425 1426 /* 1427 * igb_get_free_list - Get a free tx control block from the free list 1428 * 1429 * The atomic operation on the number of the available tx control block 1430 * in the free list is used to keep this routine mutual exclusive with 1431 * the routine igb_put_check_list. 1432 */ 1433 static tx_control_block_t * 1434 igb_get_free_list(igb_tx_ring_t *tx_ring) 1435 { 1436 tx_control_block_t *tcb; 1437 1438 /* 1439 * Check and update the number of the free tx control block 1440 * in the free list. 1441 */ 1442 if (igb_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1443 return (NULL); 1444 1445 mutex_enter(&tx_ring->tcb_head_lock); 1446 1447 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1448 ASSERT(tcb != NULL); 1449 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1450 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1451 tx_ring->free_list_size); 1452 1453 mutex_exit(&tx_ring->tcb_head_lock); 1454 1455 return (tcb); 1456 } 1457 1458 /* 1459 * igb_put_free_list 1460 * 1461 * Put a list of used tx control blocks back to the free list 1462 * 1463 * A mutex is used here to ensure the serialization. The mutual exclusion 1464 * between igb_get_free_list and igb_put_free_list is implemented with 1465 * the atomic operation on the counter tcb_free. 1466 */ 1467 void 1468 igb_put_free_list(igb_tx_ring_t *tx_ring, link_list_t *pending_list) 1469 { 1470 uint32_t index; 1471 int tcb_num; 1472 tx_control_block_t *tcb; 1473 1474 mutex_enter(&tx_ring->tcb_tail_lock); 1475 1476 index = tx_ring->tcb_tail; 1477 1478 tcb_num = 0; 1479 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1480 while (tcb != NULL) { 1481 ASSERT(tx_ring->free_list[index] == NULL); 1482 tx_ring->free_list[index] = tcb; 1483 1484 tcb_num++; 1485 1486 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1487 1488 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1489 } 1490 1491 tx_ring->tcb_tail = index; 1492 1493 /* 1494 * Update the number of the free tx control block 1495 * in the free list. This operation must be placed 1496 * under the protection of the lock. 1497 */ 1498 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1499 1500 mutex_exit(&tx_ring->tcb_tail_lock); 1501 } 1502