1 /* 2 * CDDL HEADER START 3 * 4 * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include "ixgbe_sw.h" 29 30 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 31 uint32_t, boolean_t); 32 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 33 uint32_t); 34 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 35 ixgbe_tx_context_t *, size_t); 36 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 37 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 38 39 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 40 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 41 ixgbe_tx_context_t *); 42 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 43 ixgbe_tx_context_t *); 44 45 #ifndef IXGBE_DEBUG 46 #pragma inline(ixgbe_save_desc) 47 #pragma inline(ixgbe_get_context) 48 #pragma inline(ixgbe_check_context) 49 #pragma inline(ixgbe_fill_context) 50 #endif 51 52 /* 53 * ixgbe_ring_tx 54 * 55 * To transmit one mblk through one specified ring. 56 * 57 * One mblk can consist of several fragments, each fragment 58 * will be processed with different methods based on the size. 59 * For the fragments with size less than the bcopy threshold, 60 * they will be processed by using bcopy; otherwise, they will 61 * be processed by using DMA binding. 62 * 63 * To process the mblk, a tx control block is got from the 64 * free list. One tx control block contains one tx buffer, which 65 * is used to copy mblk fragments' data; and one tx DMA handle, 66 * which is used to bind a mblk fragment with DMA resource. 67 * 68 * Several small mblk fragments can be copied into one tx control 69 * block's buffer, and then the buffer will be transmitted with 70 * one tx descriptor. 71 * 72 * A large fragment only binds with one tx control block's DMA 73 * handle, and it can span several tx descriptors for transmitting. 74 * 75 * So to transmit a packet (mblk), several tx control blocks can 76 * be used. After the processing, those tx control blocks will 77 * be put to the work list. 78 */ 79 mblk_t * 80 ixgbe_ring_tx(void *arg, mblk_t *mp) 81 { 82 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 83 ixgbe_t *ixgbe = tx_ring->ixgbe; 84 tx_type_t current_flag, next_flag; 85 uint32_t current_len, next_len; 86 uint32_t desc_total; 87 size_t mbsize; 88 int desc_num; 89 boolean_t copy_done, eop; 90 mblk_t *current_mp, *next_mp, *nmp; 91 tx_control_block_t *tcb; 92 ixgbe_tx_context_t tx_context, *ctx; 93 link_list_t pending_list; 94 uint32_t len, hdr_frag_len, hdr_len; 95 uint32_t copy_thresh; 96 mblk_t *new_mp; 97 mblk_t *pre_mp; 98 99 ASSERT(mp->b_next == NULL); 100 101 copy_thresh = tx_ring->copy_thresh; 102 103 /* Get the mblk size */ 104 mbsize = 0; 105 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 106 mbsize += MBLKL(nmp); 107 } 108 109 if (ixgbe->tx_hcksum_enable) { 110 /* 111 * Retrieve checksum context information from the mblk 112 * that will be used to decide whether/how to fill the 113 * context descriptor. 114 */ 115 ctx = &tx_context; 116 if (ixgbe_get_context(mp, ctx) < 0) { 117 freemsg(mp); 118 return (NULL); 119 } 120 121 /* 122 * If the mblk size exceeds the max size ixgbe could 123 * process, then discard this mblk, and return NULL. 124 */ 125 if ((ctx->lso_flag && 126 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 127 (!ctx->lso_flag && 128 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 129 freemsg(mp); 130 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 131 return (NULL); 132 } 133 } else { 134 ctx = NULL; 135 } 136 137 /* 138 * Check and recycle tx descriptors. 139 * The recycle threshold here should be selected carefully 140 */ 141 if (tx_ring->tbd_free < tx_ring->recycle_thresh) { 142 tx_ring->tx_recycle(tx_ring); 143 } 144 145 /* 146 * After the recycling, if the tbd_free is less than the 147 * overload_threshold, assert overload, return mp; 148 * and we need to re-schedule the tx again. 149 */ 150 if (tx_ring->tbd_free < tx_ring->overload_thresh) { 151 tx_ring->reschedule = B_TRUE; 152 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 153 return (mp); 154 } 155 156 /* 157 * The pending_list is a linked list that is used to save 158 * the tx control blocks that have packet data processed 159 * but have not put the data to the tx descriptor ring. 160 * It is used to reduce the lock contention of the tx_lock. 161 */ 162 LINK_LIST_INIT(&pending_list); 163 desc_num = 0; 164 desc_total = 0; 165 166 /* 167 * The software should guarantee LSO packet header(MAC+IP+TCP) 168 * to be within one descriptor. Here we reallocate and refill the 169 * the header if it's physical memory non-contiguous. 170 */ 171 if ((ctx != NULL) && ctx->lso_flag) { 172 /* find the last fragment of the header */ 173 len = MBLKL(mp); 174 ASSERT(len > 0); 175 nmp = mp; 176 pre_mp = NULL; 177 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 178 while (len < hdr_len) { 179 pre_mp = nmp; 180 nmp = nmp->b_cont; 181 len += MBLKL(nmp); 182 } 183 /* 184 * If the header and the payload are in different mblks, 185 * we simply force the header to be copied into pre-allocated 186 * page-aligned buffer. 187 */ 188 if (len == hdr_len) 189 goto adjust_threshold; 190 191 hdr_frag_len = hdr_len - (len - MBLKL(nmp)); 192 /* 193 * There are two cases we need to reallocate a mblk for the 194 * last header fragment: 195 * 1. the header is in multiple mblks and the last fragment 196 * share the same mblk with the payload 197 * 2. the header is in a single mblk shared with the payload 198 * and the header is physical memory non-contiguous 199 */ 200 if ((nmp != mp) || 201 (P2NPHASE((uintptr_t)nmp->b_rptr, ixgbe->sys_page_size) 202 < hdr_len)) { 203 IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 204 /* 205 * reallocate the mblk for the last header fragment, 206 * expect to bcopy into pre-allocated page-aligned 207 * buffer 208 */ 209 new_mp = allocb(hdr_frag_len, NULL); 210 if (!new_mp) 211 return (mp); 212 bcopy(nmp->b_rptr, new_mp->b_rptr, hdr_frag_len); 213 /* link the new header fragment with the other parts */ 214 new_mp->b_wptr = new_mp->b_rptr + hdr_frag_len; 215 new_mp->b_cont = nmp; 216 if (pre_mp) 217 pre_mp->b_cont = new_mp; 218 nmp->b_rptr += hdr_frag_len; 219 if (hdr_frag_len == hdr_len) 220 mp = new_mp; 221 } 222 adjust_threshold: 223 /* 224 * adjust the bcopy threshhold to guarantee 225 * the header to use bcopy way 226 */ 227 if (copy_thresh < hdr_len) 228 copy_thresh = hdr_len; 229 } 230 231 current_mp = mp; 232 current_len = MBLKL(current_mp); 233 /* 234 * Decide which method to use for the first fragment 235 */ 236 current_flag = (current_len <= copy_thresh) ? 237 USE_COPY : USE_DMA; 238 /* 239 * If the mblk includes several contiguous small fragments, 240 * they may be copied into one buffer. This flag is used to 241 * indicate whether there are pending fragments that need to 242 * be copied to the current tx buffer. 243 * 244 * If this flag is B_TRUE, it indicates that a new tx control 245 * block is needed to process the next fragment using either 246 * copy or DMA binding. 247 * 248 * Otherwise, it indicates that the next fragment will be 249 * copied to the current tx buffer that is maintained by the 250 * current tx control block. No new tx control block is needed. 251 */ 252 copy_done = B_TRUE; 253 while (current_mp) { 254 next_mp = current_mp->b_cont; 255 eop = (next_mp == NULL); /* Last fragment of the packet? */ 256 next_len = eop ? 0: MBLKL(next_mp); 257 258 /* 259 * When the current fragment is an empty fragment, if 260 * the next fragment will still be copied to the current 261 * tx buffer, we cannot skip this fragment here. Because 262 * the copy processing is pending for completion. We have 263 * to process this empty fragment in the tx_copy routine. 264 * 265 * If the copy processing is completed or a DMA binding 266 * processing is just completed, we can just skip this 267 * empty fragment. 268 */ 269 if ((current_len == 0) && (copy_done)) { 270 current_mp = next_mp; 271 current_len = next_len; 272 current_flag = (current_len <= copy_thresh) ? 273 USE_COPY : USE_DMA; 274 continue; 275 } 276 277 if (copy_done) { 278 /* 279 * Get a new tx control block from the free list 280 */ 281 tcb = ixgbe_get_free_list(tx_ring); 282 283 if (tcb == NULL) { 284 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 285 goto tx_failure; 286 } 287 288 /* 289 * Push the tx control block to the pending list 290 * to avoid using lock too early 291 */ 292 LIST_PUSH_TAIL(&pending_list, &tcb->link); 293 } 294 295 if (current_flag == USE_COPY) { 296 /* 297 * Check whether to use bcopy or DMA binding to process 298 * the next fragment, and if using bcopy, whether we 299 * need to continue copying the next fragment into the 300 * current tx buffer. 301 */ 302 ASSERT((tcb->tx_buf.len + current_len) <= 303 tcb->tx_buf.size); 304 305 if (eop) { 306 /* 307 * This is the last fragment of the packet, so 308 * the copy processing will be completed with 309 * this fragment. 310 */ 311 next_flag = USE_NONE; 312 copy_done = B_TRUE; 313 } else if ((tcb->tx_buf.len + current_len + next_len) > 314 tcb->tx_buf.size) { 315 /* 316 * If the next fragment is too large to be 317 * copied to the current tx buffer, we need 318 * to complete the current copy processing. 319 */ 320 next_flag = (next_len > copy_thresh) ? 321 USE_DMA: USE_COPY; 322 copy_done = B_TRUE; 323 } else if (next_len > copy_thresh) { 324 /* 325 * The next fragment needs to be processed with 326 * DMA binding. So the copy prcessing will be 327 * completed with the current fragment. 328 */ 329 next_flag = USE_DMA; 330 copy_done = B_TRUE; 331 } else { 332 /* 333 * Continue to copy the next fragment to the 334 * current tx buffer. 335 */ 336 next_flag = USE_COPY; 337 copy_done = B_FALSE; 338 } 339 340 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 341 current_len, copy_done); 342 } else { 343 /* 344 * Check whether to use bcopy or DMA binding to process 345 * the next fragment. 346 */ 347 next_flag = (next_len > copy_thresh) ? 348 USE_DMA: USE_COPY; 349 ASSERT(copy_done == B_TRUE); 350 351 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 352 current_len); 353 } 354 355 if (desc_num > 0) 356 desc_total += desc_num; 357 else if (desc_num < 0) 358 goto tx_failure; 359 360 current_mp = next_mp; 361 current_len = next_len; 362 current_flag = next_flag; 363 } 364 365 /* 366 * Attach the mblk to the last tx control block 367 */ 368 ASSERT(tcb); 369 ASSERT(tcb->mp == NULL); 370 tcb->mp = mp; 371 372 /* 373 * Before fill the tx descriptor ring with the data, we need to 374 * ensure there are adequate free descriptors for transmit 375 * (including one context descriptor). 376 */ 377 if (tx_ring->tbd_free < (desc_total + 1)) { 378 tx_ring->tx_recycle(tx_ring); 379 } 380 381 mutex_enter(&tx_ring->tx_lock); 382 383 /* 384 * If the number of free tx descriptors is not enough for transmit 385 * then return mp. 386 * 387 * Note: we must put this check under the mutex protection to 388 * ensure the correctness when multiple threads access it in 389 * parallel. 390 */ 391 if (tx_ring->tbd_free < (desc_total + 1)) { 392 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 393 mutex_exit(&tx_ring->tx_lock); 394 goto tx_failure; 395 } 396 397 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 398 mbsize); 399 400 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 401 402 mutex_exit(&tx_ring->tx_lock); 403 404 return (NULL); 405 406 tx_failure: 407 /* 408 * Discard the mblk and free the used resources 409 */ 410 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 411 while (tcb) { 412 tcb->mp = NULL; 413 414 ixgbe_free_tcb(tcb); 415 416 tcb = (tx_control_block_t *) 417 LIST_GET_NEXT(&pending_list, &tcb->link); 418 } 419 420 /* 421 * Return the tx control blocks in the pending list to the free list. 422 */ 423 ixgbe_put_free_list(tx_ring, &pending_list); 424 425 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 426 tx_ring->reschedule = B_TRUE; 427 428 return (mp); 429 } 430 431 /* 432 * ixgbe_tx_copy 433 * 434 * Copy the mblk fragment to the pre-allocated tx buffer 435 */ 436 static int 437 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 438 uint32_t len, boolean_t copy_done) 439 { 440 dma_buffer_t *tx_buf; 441 uint32_t desc_num; 442 _NOTE(ARGUNUSED(tx_ring)); 443 444 tx_buf = &tcb->tx_buf; 445 446 /* 447 * Copy the packet data of the mblk fragment into the 448 * pre-allocated tx buffer, which is maintained by the 449 * tx control block. 450 * 451 * Several mblk fragments can be copied into one tx buffer. 452 * The destination address of the current copied fragment in 453 * the tx buffer is next to the end of the previous copied 454 * fragment. 455 */ 456 if (len > 0) { 457 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 458 459 tx_buf->len += len; 460 tcb->frag_num++; 461 } 462 463 desc_num = 0; 464 465 /* 466 * If it is the last fragment copied to the current tx buffer, 467 * in other words, if there's no remaining fragment or the remaining 468 * fragment requires a new tx control block to process, we need to 469 * complete the current copy processing by syncing up the current 470 * DMA buffer and saving the descriptor data. 471 */ 472 if (copy_done) { 473 /* 474 * Sync the DMA buffer of the packet data 475 */ 476 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 477 478 tcb->tx_type = USE_COPY; 479 480 /* 481 * Save the address and length to the private data structure 482 * of the tx control block, which will be used to fill the 483 * tx descriptor ring after all the fragments are processed. 484 */ 485 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 486 desc_num++; 487 } 488 489 return (desc_num); 490 } 491 492 /* 493 * ixgbe_tx_bind 494 * 495 * Bind the mblk fragment with DMA 496 */ 497 static int 498 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 499 uint32_t len) 500 { 501 int status, i; 502 ddi_dma_cookie_t dma_cookie; 503 uint_t ncookies; 504 int desc_num; 505 506 /* 507 * Use DMA binding to process the mblk fragment 508 */ 509 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 510 (caddr_t)mp->b_rptr, len, 511 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 512 0, &dma_cookie, &ncookies); 513 514 if (status != DDI_DMA_MAPPED) { 515 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 516 return (-1); 517 } 518 519 tcb->frag_num++; 520 tcb->tx_type = USE_DMA; 521 /* 522 * Each fragment can span several cookies. One cookie will have 523 * one tx descriptor to transmit. 524 */ 525 desc_num = 0; 526 for (i = ncookies; i > 0; i--) { 527 /* 528 * Save the address and length to the private data structure 529 * of the tx control block, which will be used to fill the 530 * tx descriptor ring after all the fragments are processed. 531 */ 532 ixgbe_save_desc(tcb, 533 dma_cookie.dmac_laddress, 534 dma_cookie.dmac_size); 535 536 desc_num++; 537 538 if (i > 1) 539 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 540 } 541 542 return (desc_num); 543 } 544 545 /* 546 * ixgbe_get_context 547 * 548 * Get the context information from the mblk 549 */ 550 static int 551 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 552 { 553 uint32_t start; 554 uint32_t hckflags; 555 uint32_t lsoflags; 556 uint32_t mss; 557 uint32_t len; 558 uint32_t size; 559 uint32_t offset; 560 unsigned char *pos; 561 ushort_t etype; 562 uint32_t mac_hdr_len; 563 uint32_t l4_proto; 564 uint32_t l4_hdr_len; 565 566 ASSERT(mp != NULL); 567 568 hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &hckflags); 569 bzero(ctx, sizeof (ixgbe_tx_context_t)); 570 571 if (hckflags == 0) { 572 return (0); 573 } 574 575 ctx->hcksum_flags = hckflags; 576 577 lso_info_get(mp, &mss, &lsoflags); 578 ctx->mss = mss; 579 ctx->lso_flag = (lsoflags == HW_LSO); 580 581 /* 582 * LSO relies on tx h/w checksum, so here will drop the package 583 * if h/w checksum flag is not declared. 584 */ 585 if (ctx->lso_flag) { 586 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 587 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 588 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 589 "checksum flags are not specified when doing LSO"); 590 return (-1); 591 } 592 } 593 594 etype = 0; 595 mac_hdr_len = 0; 596 l4_proto = 0; 597 598 /* 599 * Firstly get the position of the ether_type/ether_tpid. 600 * Here we don't assume the ether (VLAN) header is fully included 601 * in one mblk fragment, so we go thourgh the fragments to parse 602 * the ether type. 603 */ 604 size = len = MBLKL(mp); 605 offset = offsetof(struct ether_header, ether_type); 606 while (size <= offset) { 607 mp = mp->b_cont; 608 ASSERT(mp != NULL); 609 len = MBLKL(mp); 610 size += len; 611 } 612 pos = mp->b_rptr + offset + len - size; 613 614 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 615 if (etype == ETHERTYPE_VLAN) { 616 /* 617 * Get the position of the ether_type in VLAN header 618 */ 619 offset = offsetof(struct ether_vlan_header, ether_type); 620 while (size <= offset) { 621 mp = mp->b_cont; 622 ASSERT(mp != NULL); 623 len = MBLKL(mp); 624 size += len; 625 } 626 pos = mp->b_rptr + offset + len - size; 627 628 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 629 mac_hdr_len = sizeof (struct ether_vlan_header); 630 } else { 631 mac_hdr_len = sizeof (struct ether_header); 632 } 633 634 /* 635 * Here we don't assume the IP(V6) header is fully included in 636 * one mblk fragment. 637 */ 638 switch (etype) { 639 case ETHERTYPE_IP: 640 if (ctx->lso_flag) { 641 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 642 while (size <= offset) { 643 mp = mp->b_cont; 644 ASSERT(mp != NULL); 645 len = MBLKL(mp); 646 size += len; 647 } 648 pos = mp->b_rptr + offset + len - size; 649 *((uint16_t *)(uintptr_t)(pos)) = 0; 650 651 offset = offsetof(ipha_t, ipha_hdr_checksum) + 652 mac_hdr_len; 653 while (size <= offset) { 654 mp = mp->b_cont; 655 ASSERT(mp != NULL); 656 len = MBLKL(mp); 657 size += len; 658 } 659 pos = mp->b_rptr + offset + len - size; 660 *((uint16_t *)(uintptr_t)(pos)) = 0; 661 662 /* 663 * To perform ixgbe LSO, here also need to fill 664 * the tcp checksum field of the packet with the 665 * following pseudo-header checksum: 666 * (ip_source_addr, ip_destination_addr, l4_proto) 667 * Currently the tcp/ip stack has done it. 668 */ 669 } 670 671 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 672 while (size <= offset) { 673 mp = mp->b_cont; 674 ASSERT(mp != NULL); 675 len = MBLKL(mp); 676 size += len; 677 } 678 pos = mp->b_rptr + offset + len - size; 679 680 l4_proto = *(uint8_t *)pos; 681 break; 682 case ETHERTYPE_IPV6: 683 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 684 while (size <= offset) { 685 mp = mp->b_cont; 686 ASSERT(mp != NULL); 687 len = MBLKL(mp); 688 size += len; 689 } 690 pos = mp->b_rptr + offset + len - size; 691 692 l4_proto = *(uint8_t *)pos; 693 break; 694 default: 695 /* Unrecoverable error */ 696 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 697 return (-2); 698 } 699 700 if (ctx->lso_flag) { 701 offset = mac_hdr_len + start; 702 while (size <= offset) { 703 mp = mp->b_cont; 704 ASSERT(mp != NULL); 705 len = MBLKL(mp); 706 size += len; 707 } 708 pos = mp->b_rptr + offset + len - size; 709 710 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 711 } else { 712 /* 713 * l4 header length is only required for LSO 714 */ 715 l4_hdr_len = 0; 716 } 717 718 ctx->mac_hdr_len = mac_hdr_len; 719 ctx->ip_hdr_len = start; 720 ctx->l4_proto = l4_proto; 721 ctx->l4_hdr_len = l4_hdr_len; 722 723 return (0); 724 } 725 726 /* 727 * ixgbe_check_context 728 * 729 * Check if a new context descriptor is needed 730 */ 731 static boolean_t 732 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 733 { 734 ixgbe_tx_context_t *last; 735 736 if (ctx == NULL) 737 return (B_FALSE); 738 739 /* 740 * Compare the context data retrieved from the mblk and the 741 * stored data of the last context descriptor. The data need 742 * to be checked are: 743 * hcksum_flags 744 * l4_proto 745 * mac_hdr_len 746 * ip_hdr_len 747 * lso_flag 748 * mss (only checked for LSO) 749 * l4_hr_len (only checked for LSO) 750 * Either one of the above data is changed, a new context descriptor 751 * will be needed. 752 */ 753 last = &tx_ring->tx_context; 754 755 if ((ctx->hcksum_flags != last->hcksum_flags) || 756 (ctx->l4_proto != last->l4_proto) || 757 (ctx->mac_hdr_len != last->mac_hdr_len) || 758 (ctx->ip_hdr_len != last->ip_hdr_len) || 759 (ctx->lso_flag != last->lso_flag) || 760 (ctx->lso_flag && ((ctx->mss != last->mss) || 761 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 762 return (B_TRUE); 763 } 764 765 return (B_FALSE); 766 } 767 768 /* 769 * ixgbe_fill_context 770 * 771 * Fill the context descriptor with hardware checksum informations 772 */ 773 static void 774 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 775 ixgbe_tx_context_t *ctx) 776 { 777 /* 778 * Fill the context descriptor with the checksum 779 * context information we've got. 780 */ 781 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 782 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 783 IXGBE_ADVTXD_MACLEN_SHIFT; 784 785 ctx_tbd->type_tucmd_mlhl = 786 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 787 788 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 789 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 790 791 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 792 switch (ctx->l4_proto) { 793 case IPPROTO_TCP: 794 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 795 break; 796 case IPPROTO_UDP: 797 /* 798 * We don't have to explicitly set: 799 * ctx_tbd->type_tucmd_mlhl |= 800 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 801 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 802 */ 803 break; 804 default: 805 /* Unrecoverable error */ 806 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 807 break; 808 } 809 } 810 811 ctx_tbd->seqnum_seed = 0; 812 813 if (ctx->lso_flag) { 814 ctx_tbd->mss_l4len_idx = 815 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 816 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 817 } else { 818 ctx_tbd->mss_l4len_idx = 0; 819 } 820 } 821 822 /* 823 * ixgbe_tx_fill_ring 824 * 825 * Fill the tx descriptor ring with the data 826 */ 827 static int 828 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 829 ixgbe_tx_context_t *ctx, size_t mbsize) 830 { 831 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 832 boolean_t load_context; 833 uint32_t index, tcb_index, desc_num; 834 union ixgbe_adv_tx_desc *tbd, *first_tbd; 835 tx_control_block_t *tcb, *first_tcb; 836 uint32_t hcksum_flags; 837 int i; 838 839 ASSERT(mutex_owned(&tx_ring->tx_lock)); 840 841 tbd = NULL; 842 first_tbd = NULL; 843 first_tcb = NULL; 844 desc_num = 0; 845 hcksum_flags = 0; 846 load_context = B_FALSE; 847 848 /* 849 * Get the index of the first tx descriptor that will be filled, 850 * and the index of the first work list item that will be attached 851 * with the first used tx control block in the pending list. 852 * Note: the two indexes are the same. 853 */ 854 index = tx_ring->tbd_tail; 855 tcb_index = tx_ring->tbd_tail; 856 857 if (ctx != NULL) { 858 hcksum_flags = ctx->hcksum_flags; 859 860 /* 861 * Check if a new context descriptor is needed for this packet 862 */ 863 load_context = ixgbe_check_context(tx_ring, ctx); 864 865 if (load_context) { 866 first_tcb = (tx_control_block_t *) 867 LIST_GET_HEAD(pending_list); 868 tbd = &tx_ring->tbd_ring[index]; 869 870 /* 871 * Fill the context descriptor with the 872 * hardware checksum offload informations. 873 */ 874 ixgbe_fill_context( 875 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 876 877 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 878 desc_num++; 879 880 /* 881 * Store the checksum context data if 882 * a new context descriptor is added 883 */ 884 tx_ring->tx_context = *ctx; 885 } 886 } 887 888 first_tbd = &tx_ring->tbd_ring[index]; 889 890 /* 891 * Fill tx data descriptors with the data saved in the pending list. 892 * The tx control blocks in the pending list are added to the work list 893 * at the same time. 894 * 895 * The work list is strictly 1:1 corresponding to the descriptor ring. 896 * One item of the work list corresponds to one tx descriptor. Because 897 * one tx control block can span multiple tx descriptors, the tx 898 * control block will be added to the first work list item that 899 * corresponds to the first tx descriptor generated from that tx 900 * control block. 901 */ 902 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 903 while (tcb != NULL) { 904 905 for (i = 0; i < tcb->desc_num; i++) { 906 tbd = &tx_ring->tbd_ring[index]; 907 908 tbd->read.buffer_addr = tcb->desc[i].address; 909 tbd->read.cmd_type_len = tcb->desc[i].length; 910 911 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_RS | 912 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_DATA; 913 914 tbd->read.olinfo_status = 0; 915 916 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 917 desc_num++; 918 } 919 920 if (first_tcb != NULL) { 921 /* 922 * Count the checksum context descriptor for 923 * the first tx control block. 924 */ 925 first_tcb->desc_num++; 926 first_tcb = NULL; 927 } 928 929 /* 930 * Add the tx control block to the work list 931 */ 932 ASSERT(tx_ring->work_list[tcb_index] == NULL); 933 tx_ring->work_list[tcb_index] = tcb; 934 935 tcb_index = index; 936 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 937 } 938 939 /* 940 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 941 * valid in the first descriptor of the packet. 942 * Setting paylen in every first_tbd for all parts. 943 * 82599 requires the packet length in paylen field with or without 944 * LSO and 82598 will ignore it in non-LSO mode. 945 */ 946 ASSERT(first_tbd != NULL); 947 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 948 949 switch (hw->mac.type) { 950 case ixgbe_mac_82599EB: 951 if (ctx != NULL && ctx->lso_flag) { 952 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 953 first_tbd->read.olinfo_status |= 954 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 955 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 956 } else { 957 first_tbd->read.olinfo_status |= 958 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 959 } 960 break; 961 case ixgbe_mac_82598EB: 962 if (ctx != NULL && ctx->lso_flag) { 963 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 964 first_tbd->read.olinfo_status |= 965 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 966 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 967 } 968 break; 969 default: 970 break; 971 } 972 973 /* Set hardware checksum bits */ 974 if (hcksum_flags != 0) { 975 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 976 first_tbd->read.olinfo_status |= 977 IXGBE_ADVTXD_POPTS_IXSM; 978 if (hcksum_flags & HCK_PARTIALCKSUM) 979 first_tbd->read.olinfo_status |= 980 IXGBE_ADVTXD_POPTS_TXSM; 981 } 982 983 /* 984 * The last descriptor of packet needs End Of Packet (EOP), 985 * and Report Status (RS) bits set 986 */ 987 ASSERT(tbd != NULL); 988 tbd->read.cmd_type_len |= 989 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 990 991 /* 992 * Sync the DMA buffer of the tx descriptor ring 993 */ 994 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 995 996 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 997 ddi_fm_service_impact(tx_ring->ixgbe->dip, 998 DDI_SERVICE_DEGRADED); 999 } 1000 1001 /* 1002 * Update the number of the free tx descriptors. 1003 * The mutual exclusion between the transmission and the recycling 1004 * (for the tx descriptor ring and the work list) is implemented 1005 * with the atomic operation on the number of the free tx descriptors. 1006 * 1007 * Note: we should always decrement the counter tbd_free before 1008 * advancing the hardware TDT pointer to avoid the race condition - 1009 * before the counter tbd_free is decremented, the transmit of the 1010 * tx descriptors has done and the counter tbd_free is increased by 1011 * the tx recycling. 1012 */ 1013 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1014 ASSERT(i >= 0); 1015 1016 tx_ring->tbd_tail = index; 1017 1018 /* 1019 * Advance the hardware TDT pointer of the tx descriptor ring 1020 */ 1021 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1022 1023 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1024 DDI_FM_OK) { 1025 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1026 DDI_SERVICE_DEGRADED); 1027 } 1028 1029 return (desc_num); 1030 } 1031 1032 /* 1033 * ixgbe_save_desc 1034 * 1035 * Save the address/length pair to the private array 1036 * of the tx control block. The address/length pairs 1037 * will be filled into the tx descriptor ring later. 1038 */ 1039 static void 1040 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1041 { 1042 sw_desc_t *desc; 1043 1044 desc = &tcb->desc[tcb->desc_num]; 1045 desc->address = address; 1046 desc->length = length; 1047 1048 tcb->desc_num++; 1049 } 1050 1051 /* 1052 * ixgbe_tx_recycle_legacy 1053 * 1054 * Recycle the tx descriptors and tx control blocks. 1055 * 1056 * The work list is traversed to check if the corresponding 1057 * tx descriptors have been transmitted. If so, the resources 1058 * bound to the tx control blocks will be freed, and those 1059 * tx control blocks will be returned to the free list. 1060 */ 1061 uint32_t 1062 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1063 { 1064 uint32_t index, last_index; 1065 int desc_num; 1066 boolean_t desc_done; 1067 tx_control_block_t *tcb; 1068 link_list_t pending_list; 1069 1070 /* 1071 * The mutex_tryenter() is used to avoid unnecessary 1072 * lock contention. 1073 */ 1074 mutex_enter(&tx_ring->recycle_lock); 1075 1076 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1077 1078 if (tx_ring->tbd_free == tx_ring->ring_size) { 1079 tx_ring->recycle_fail = 0; 1080 tx_ring->stall_watchdog = 0; 1081 if (tx_ring->reschedule) { 1082 tx_ring->reschedule = B_FALSE; 1083 mac_tx_ring_update(tx_ring->ixgbe->mac_hdl, 1084 tx_ring->ring_handle); 1085 } 1086 mutex_exit(&tx_ring->recycle_lock); 1087 return (0); 1088 } 1089 1090 /* 1091 * Sync the DMA buffer of the tx descriptor ring 1092 */ 1093 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1094 1095 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1096 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1097 DDI_SERVICE_DEGRADED); 1098 } 1099 1100 LINK_LIST_INIT(&pending_list); 1101 desc_num = 0; 1102 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1103 1104 tcb = tx_ring->work_list[index]; 1105 ASSERT(tcb != NULL); 1106 1107 desc_done = B_TRUE; 1108 while (desc_done && (tcb != NULL)) { 1109 1110 /* 1111 * Get the last tx descriptor of the tx control block. 1112 * If the last tx descriptor is done, it is done with 1113 * all the tx descriptors of the tx control block. 1114 * Then the tx control block and all the corresponding 1115 * tx descriptors can be recycled. 1116 */ 1117 last_index = NEXT_INDEX(index, tcb->desc_num - 1, 1118 tx_ring->ring_size); 1119 1120 /* 1121 * Check if the Descriptor Done bit is set 1122 */ 1123 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1124 IXGBE_TXD_STAT_DD; 1125 if (desc_done) { 1126 /* 1127 * Strip off the tx control block from the work list, 1128 * and add it to the pending list. 1129 */ 1130 tx_ring->work_list[index] = NULL; 1131 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1132 1133 /* 1134 * Count the total number of the tx descriptors recycled 1135 */ 1136 desc_num += tcb->desc_num; 1137 1138 /* 1139 * Advance the index of the tx descriptor ring 1140 */ 1141 index = NEXT_INDEX(last_index, 1, tx_ring->ring_size); 1142 1143 tcb = tx_ring->work_list[index]; 1144 } 1145 } 1146 1147 /* 1148 * If no tx descriptors are recycled, no need to do more processing 1149 */ 1150 if (desc_num == 0) { 1151 tx_ring->recycle_fail++; 1152 mutex_exit(&tx_ring->recycle_lock); 1153 return (0); 1154 } 1155 1156 tx_ring->recycle_fail = 0; 1157 tx_ring->stall_watchdog = 0; 1158 1159 /* 1160 * Update the head index of the tx descriptor ring 1161 */ 1162 tx_ring->tbd_head = index; 1163 1164 /* 1165 * Update the number of the free tx descriptors with atomic operations 1166 */ 1167 atomic_add_32(&tx_ring->tbd_free, desc_num); 1168 1169 if ((tx_ring->tbd_free >= tx_ring->resched_thresh) && 1170 (tx_ring->reschedule)) { 1171 tx_ring->reschedule = B_FALSE; 1172 mac_tx_ring_update(tx_ring->ixgbe->mac_hdl, 1173 tx_ring->ring_handle); 1174 } 1175 mutex_exit(&tx_ring->recycle_lock); 1176 1177 /* 1178 * Free the resources used by the tx control blocks 1179 * in the pending list 1180 */ 1181 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1182 while (tcb != NULL) { 1183 /* 1184 * Release the resources occupied by the tx control block 1185 */ 1186 ixgbe_free_tcb(tcb); 1187 1188 tcb = (tx_control_block_t *) 1189 LIST_GET_NEXT(&pending_list, &tcb->link); 1190 } 1191 1192 /* 1193 * Add the tx control blocks in the pending list to the free list. 1194 */ 1195 ixgbe_put_free_list(tx_ring, &pending_list); 1196 1197 return (desc_num); 1198 } 1199 1200 /* 1201 * ixgbe_tx_recycle_head_wb 1202 * 1203 * Check the head write-back, and recycle all the transmitted 1204 * tx descriptors and tx control blocks. 1205 */ 1206 uint32_t 1207 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1208 { 1209 uint32_t index; 1210 uint32_t head_wb; 1211 int desc_num; 1212 tx_control_block_t *tcb; 1213 link_list_t pending_list; 1214 1215 /* 1216 * The mutex_tryenter() is used to avoid unnecessary 1217 * lock contention. 1218 */ 1219 mutex_enter(&tx_ring->recycle_lock); 1220 1221 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1222 1223 if (tx_ring->tbd_free == tx_ring->ring_size) { 1224 tx_ring->recycle_fail = 0; 1225 tx_ring->stall_watchdog = 0; 1226 if (tx_ring->reschedule) { 1227 tx_ring->reschedule = B_FALSE; 1228 mac_tx_ring_update(tx_ring->ixgbe->mac_hdl, 1229 tx_ring->ring_handle); 1230 } 1231 mutex_exit(&tx_ring->recycle_lock); 1232 return (0); 1233 } 1234 1235 /* 1236 * Sync the DMA buffer of the tx descriptor ring 1237 * 1238 * Note: For head write-back mode, the tx descriptors will not 1239 * be written back, but the head write-back value is stored at 1240 * the last extra tbd at the end of the DMA area, we still need 1241 * to sync the head write-back value for kernel. 1242 * 1243 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1244 */ 1245 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1246 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1247 sizeof (uint32_t), 1248 DDI_DMA_SYNC_FORKERNEL); 1249 1250 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1251 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1252 DDI_SERVICE_DEGRADED); 1253 } 1254 1255 LINK_LIST_INIT(&pending_list); 1256 desc_num = 0; 1257 index = tx_ring->tbd_head; /* Next index to clean */ 1258 1259 /* 1260 * Get the value of head write-back 1261 */ 1262 head_wb = *tx_ring->tbd_head_wb; 1263 while (index != head_wb) { 1264 tcb = tx_ring->work_list[index]; 1265 ASSERT(tcb != NULL); 1266 1267 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1268 tcb->desc_num) { 1269 /* 1270 * The current tx control block is not 1271 * completely transmitted, stop recycling 1272 */ 1273 break; 1274 } 1275 1276 /* 1277 * Strip off the tx control block from the work list, 1278 * and add it to the pending list. 1279 */ 1280 tx_ring->work_list[index] = NULL; 1281 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1282 1283 /* 1284 * Advance the index of the tx descriptor ring 1285 */ 1286 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1287 1288 /* 1289 * Count the total number of the tx descriptors recycled 1290 */ 1291 desc_num += tcb->desc_num; 1292 } 1293 1294 /* 1295 * If no tx descriptors are recycled, no need to do more processing 1296 */ 1297 if (desc_num == 0) { 1298 tx_ring->recycle_fail++; 1299 mutex_exit(&tx_ring->recycle_lock); 1300 return (0); 1301 } 1302 1303 tx_ring->recycle_fail = 0; 1304 tx_ring->stall_watchdog = 0; 1305 1306 /* 1307 * Update the head index of the tx descriptor ring 1308 */ 1309 tx_ring->tbd_head = index; 1310 1311 /* 1312 * Update the number of the free tx descriptors with atomic operations 1313 */ 1314 atomic_add_32(&tx_ring->tbd_free, desc_num); 1315 1316 if ((tx_ring->tbd_free >= tx_ring->resched_thresh) && 1317 (tx_ring->reschedule)) { 1318 tx_ring->reschedule = B_FALSE; 1319 mac_tx_ring_update(tx_ring->ixgbe->mac_hdl, 1320 tx_ring->ring_handle); 1321 } 1322 mutex_exit(&tx_ring->recycle_lock); 1323 1324 /* 1325 * Free the resources used by the tx control blocks 1326 * in the pending list 1327 */ 1328 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1329 while (tcb) { 1330 /* 1331 * Release the resources occupied by the tx control block 1332 */ 1333 ixgbe_free_tcb(tcb); 1334 1335 tcb = (tx_control_block_t *) 1336 LIST_GET_NEXT(&pending_list, &tcb->link); 1337 } 1338 1339 /* 1340 * Add the tx control blocks in the pending list to the free list. 1341 */ 1342 ixgbe_put_free_list(tx_ring, &pending_list); 1343 1344 return (desc_num); 1345 } 1346 1347 /* 1348 * ixgbe_free_tcb - free up the tx control block 1349 * 1350 * Free the resources of the tx control block, including 1351 * unbind the previously bound DMA handle, and reset other 1352 * control fields. 1353 */ 1354 void 1355 ixgbe_free_tcb(tx_control_block_t *tcb) 1356 { 1357 switch (tcb->tx_type) { 1358 case USE_COPY: 1359 /* 1360 * Reset the buffer length that is used for copy 1361 */ 1362 tcb->tx_buf.len = 0; 1363 break; 1364 case USE_DMA: 1365 /* 1366 * Release the DMA resource that is used for 1367 * DMA binding. 1368 */ 1369 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1370 break; 1371 default: 1372 break; 1373 } 1374 1375 /* 1376 * Free the mblk 1377 */ 1378 if (tcb->mp != NULL) { 1379 freemsg(tcb->mp); 1380 tcb->mp = NULL; 1381 } 1382 1383 tcb->tx_type = USE_NONE; 1384 tcb->frag_num = 0; 1385 tcb->desc_num = 0; 1386 } 1387 1388 /* 1389 * ixgbe_get_free_list - Get a free tx control block from the free list 1390 * 1391 * The atomic operation on the number of the available tx control block 1392 * in the free list is used to keep this routine mutual exclusive with 1393 * the routine ixgbe_put_check_list. 1394 */ 1395 static tx_control_block_t * 1396 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 1397 { 1398 tx_control_block_t *tcb; 1399 1400 /* 1401 * Check and update the number of the free tx control block 1402 * in the free list. 1403 */ 1404 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1405 return (NULL); 1406 1407 mutex_enter(&tx_ring->tcb_head_lock); 1408 1409 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1410 ASSERT(tcb != NULL); 1411 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1412 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1413 tx_ring->free_list_size); 1414 1415 mutex_exit(&tx_ring->tcb_head_lock); 1416 1417 return (tcb); 1418 } 1419 1420 /* 1421 * ixgbe_put_free_list 1422 * 1423 * Put a list of used tx control blocks back to the free list 1424 * 1425 * A mutex is used here to ensure the serialization. The mutual exclusion 1426 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1427 * the atomic operation on the counter tcb_free. 1428 */ 1429 void 1430 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1431 { 1432 uint32_t index; 1433 int tcb_num; 1434 tx_control_block_t *tcb; 1435 1436 mutex_enter(&tx_ring->tcb_tail_lock); 1437 1438 index = tx_ring->tcb_tail; 1439 1440 tcb_num = 0; 1441 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1442 while (tcb != NULL) { 1443 ASSERT(tx_ring->free_list[index] == NULL); 1444 tx_ring->free_list[index] = tcb; 1445 1446 tcb_num++; 1447 1448 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1449 1450 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1451 } 1452 1453 tx_ring->tcb_tail = index; 1454 1455 /* 1456 * Update the number of the free tx control block 1457 * in the free list. This operation must be placed 1458 * under the protection of the lock. 1459 */ 1460 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1461 1462 mutex_exit(&tx_ring->tcb_tail_lock); 1463 } 1464