1 /* 2 * CDDL HEADER START 3 * 4 * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include "ixgbe_sw.h" 29 30 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 31 uint32_t, boolean_t); 32 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 33 uint32_t); 34 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 35 ixgbe_tx_context_t *, size_t); 36 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 37 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 38 39 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 40 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 41 ixgbe_tx_context_t *); 42 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 43 ixgbe_tx_context_t *); 44 45 #ifndef IXGBE_DEBUG 46 #pragma inline(ixgbe_save_desc) 47 #pragma inline(ixgbe_get_context) 48 #pragma inline(ixgbe_check_context) 49 #pragma inline(ixgbe_fill_context) 50 #endif 51 52 /* 53 * ixgbe_ring_tx 54 * 55 * To transmit one mblk through one specified ring. 56 * 57 * One mblk can consist of several fragments, each fragment 58 * will be processed with different methods based on the size. 59 * For the fragments with size less than the bcopy threshold, 60 * they will be processed by using bcopy; otherwise, they will 61 * be processed by using DMA binding. 62 * 63 * To process the mblk, a tx control block is got from the 64 * free list. One tx control block contains one tx buffer, which 65 * is used to copy mblk fragments' data; and one tx DMA handle, 66 * which is used to bind a mblk fragment with DMA resource. 67 * 68 * Several small mblk fragments can be copied into one tx control 69 * block's buffer, and then the buffer will be transmitted with 70 * one tx descriptor. 71 * 72 * A large fragment only binds with one tx control block's DMA 73 * handle, and it can span several tx descriptors for transmitting. 74 * 75 * So to transmit a packet (mblk), several tx control blocks can 76 * be used. After the processing, those tx control blocks will 77 * be put to the work list. 78 */ 79 mblk_t * 80 ixgbe_ring_tx(void *arg, mblk_t *mp) 81 { 82 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 83 ixgbe_t *ixgbe = tx_ring->ixgbe; 84 tx_type_t current_flag, next_flag; 85 uint32_t current_len, next_len; 86 uint32_t desc_total; 87 size_t mbsize; 88 int desc_num; 89 boolean_t copy_done, eop; 90 mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; 91 tx_control_block_t *tcb; 92 ixgbe_tx_context_t tx_context, *ctx; 93 link_list_t pending_list; 94 uint32_t len, hdr_frag_len, hdr_len; 95 uint32_t copy_thresh; 96 mblk_t *new_mp; 97 mblk_t *pre_mp; 98 99 ASSERT(mp->b_next == NULL); 100 101 copy_thresh = ixgbe->tx_copy_thresh; 102 103 /* Get the mblk size */ 104 mbsize = 0; 105 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 106 mbsize += MBLKL(nmp); 107 } 108 109 if (ixgbe->tx_hcksum_enable) { 110 /* 111 * Retrieve checksum context information from the mblk 112 * that will be used to decide whether/how to fill the 113 * context descriptor. 114 */ 115 ctx = &tx_context; 116 if (ixgbe_get_context(mp, ctx) < 0) { 117 freemsg(mp); 118 return (NULL); 119 } 120 121 /* 122 * If the mblk size exceeds the max size ixgbe could 123 * process, then discard this mblk, and return NULL. 124 */ 125 if ((ctx->lso_flag && 126 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 127 (!ctx->lso_flag && 128 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 129 freemsg(mp); 130 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 131 return (NULL); 132 } 133 } else { 134 ctx = NULL; 135 } 136 137 /* 138 * Check and recycle tx descriptors. 139 * The recycle threshold here should be selected carefully 140 */ 141 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 142 tx_ring->tx_recycle(tx_ring); 143 } 144 145 /* 146 * After the recycling, if the tbd_free is less than the 147 * overload_threshold, assert overload, return mp; 148 * and we need to re-schedule the tx again. 149 */ 150 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 151 tx_ring->reschedule = B_TRUE; 152 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 153 return (mp); 154 } 155 156 /* 157 * The pending_list is a linked list that is used to save 158 * the tx control blocks that have packet data processed 159 * but have not put the data to the tx descriptor ring. 160 * It is used to reduce the lock contention of the tx_lock. 161 */ 162 LINK_LIST_INIT(&pending_list); 163 desc_num = 0; 164 desc_total = 0; 165 166 /* 167 * The software should guarantee LSO packet header(MAC+IP+TCP) 168 * to be within one descriptor. Here we reallocate and refill the 169 * the header if it's physical memory non-contiguous. 170 */ 171 if ((ctx != NULL) && ctx->lso_flag) { 172 /* find the last fragment of the header */ 173 len = MBLKL(mp); 174 ASSERT(len > 0); 175 nmp = mp; 176 pre_mp = NULL; 177 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 178 while (len < hdr_len) { 179 pre_mp = nmp; 180 nmp = nmp->b_cont; 181 len += MBLKL(nmp); 182 } 183 /* 184 * If the header and the payload are in different mblks, 185 * we simply force the header to be copied into pre-allocated 186 * page-aligned buffer. 187 */ 188 if (len == hdr_len) 189 goto adjust_threshold; 190 191 hdr_frag_len = hdr_len - (len - MBLKL(nmp)); 192 /* 193 * There are two cases we need to reallocate a mblk for the 194 * last header fragment: 195 * 1. the header is in multiple mblks and the last fragment 196 * share the same mblk with the payload 197 * 2. the header is in a single mblk shared with the payload 198 * and the header is physical memory non-contiguous 199 */ 200 if ((nmp != mp) || 201 (P2NPHASE((uintptr_t)nmp->b_rptr, ixgbe->sys_page_size) 202 < hdr_len)) { 203 IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 204 /* 205 * reallocate the mblk for the last header fragment, 206 * expect to bcopy into pre-allocated page-aligned 207 * buffer 208 */ 209 new_mp = allocb(hdr_frag_len, NULL); 210 if (!new_mp) 211 return (mp); 212 bcopy(nmp->b_rptr, new_mp->b_rptr, hdr_frag_len); 213 /* link the new header fragment with the other parts */ 214 new_mp->b_wptr = new_mp->b_rptr + hdr_frag_len; 215 new_mp->b_cont = nmp; 216 if (pre_mp) 217 pre_mp->b_cont = new_mp; 218 nmp->b_rptr += hdr_frag_len; 219 if (hdr_frag_len == hdr_len) 220 mp = new_mp; 221 } 222 adjust_threshold: 223 /* 224 * adjust the bcopy threshhold to guarantee 225 * the header to use bcopy way 226 */ 227 if (copy_thresh < hdr_len) 228 copy_thresh = hdr_len; 229 } 230 231 current_mp = mp; 232 current_len = MBLKL(current_mp); 233 /* 234 * Decide which method to use for the first fragment 235 */ 236 current_flag = (current_len <= copy_thresh) ? 237 USE_COPY : USE_DMA; 238 /* 239 * If the mblk includes several contiguous small fragments, 240 * they may be copied into one buffer. This flag is used to 241 * indicate whether there are pending fragments that need to 242 * be copied to the current tx buffer. 243 * 244 * If this flag is B_TRUE, it indicates that a new tx control 245 * block is needed to process the next fragment using either 246 * copy or DMA binding. 247 * 248 * Otherwise, it indicates that the next fragment will be 249 * copied to the current tx buffer that is maintained by the 250 * current tx control block. No new tx control block is needed. 251 */ 252 copy_done = B_TRUE; 253 while (current_mp) { 254 next_mp = current_mp->b_cont; 255 eop = (next_mp == NULL); /* Last fragment of the packet? */ 256 next_len = eop ? 0: MBLKL(next_mp); 257 258 /* 259 * When the current fragment is an empty fragment, if 260 * the next fragment will still be copied to the current 261 * tx buffer, we cannot skip this fragment here. Because 262 * the copy processing is pending for completion. We have 263 * to process this empty fragment in the tx_copy routine. 264 * 265 * If the copy processing is completed or a DMA binding 266 * processing is just completed, we can just skip this 267 * empty fragment. 268 */ 269 if ((current_len == 0) && (copy_done)) { 270 current_mp = next_mp; 271 current_len = next_len; 272 current_flag = (current_len <= copy_thresh) ? 273 USE_COPY : USE_DMA; 274 continue; 275 } 276 277 if (copy_done) { 278 /* 279 * Get a new tx control block from the free list 280 */ 281 tcb = ixgbe_get_free_list(tx_ring); 282 283 if (tcb == NULL) { 284 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 285 goto tx_failure; 286 } 287 288 /* 289 * Push the tx control block to the pending list 290 * to avoid using lock too early 291 */ 292 LIST_PUSH_TAIL(&pending_list, &tcb->link); 293 } 294 295 if (current_flag == USE_COPY) { 296 /* 297 * Check whether to use bcopy or DMA binding to process 298 * the next fragment, and if using bcopy, whether we 299 * need to continue copying the next fragment into the 300 * current tx buffer. 301 */ 302 ASSERT((tcb->tx_buf.len + current_len) <= 303 tcb->tx_buf.size); 304 305 if (eop) { 306 /* 307 * This is the last fragment of the packet, so 308 * the copy processing will be completed with 309 * this fragment. 310 */ 311 next_flag = USE_NONE; 312 copy_done = B_TRUE; 313 } else if ((tcb->tx_buf.len + current_len + next_len) > 314 tcb->tx_buf.size) { 315 /* 316 * If the next fragment is too large to be 317 * copied to the current tx buffer, we need 318 * to complete the current copy processing. 319 */ 320 next_flag = (next_len > copy_thresh) ? 321 USE_DMA: USE_COPY; 322 copy_done = B_TRUE; 323 } else if (next_len > copy_thresh) { 324 /* 325 * The next fragment needs to be processed with 326 * DMA binding. So the copy prcessing will be 327 * completed with the current fragment. 328 */ 329 next_flag = USE_DMA; 330 copy_done = B_TRUE; 331 } else { 332 /* 333 * Continue to copy the next fragment to the 334 * current tx buffer. 335 */ 336 next_flag = USE_COPY; 337 copy_done = B_FALSE; 338 } 339 340 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 341 current_len, copy_done); 342 } else { 343 /* 344 * Check whether to use bcopy or DMA binding to process 345 * the next fragment. 346 */ 347 next_flag = (next_len > copy_thresh) ? 348 USE_DMA: USE_COPY; 349 ASSERT(copy_done == B_TRUE); 350 351 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 352 current_len); 353 } 354 355 if (desc_num > 0) 356 desc_total += desc_num; 357 else if (desc_num < 0) 358 goto tx_failure; 359 360 current_mp = next_mp; 361 current_len = next_len; 362 current_flag = next_flag; 363 } 364 365 /* 366 * Attach the mblk to the last tx control block 367 */ 368 ASSERT(tcb); 369 ASSERT(tcb->mp == NULL); 370 tcb->mp = mp; 371 372 /* 373 * 82598/82599 chipset has a limitation that no more than 32 tx 374 * descriptors can be transmited out at one time. 375 * 376 * Here is a workaround for it: pull up the mblk then send it 377 * out with bind way. By doing so, no more than MAX_COOKIE (18) 378 * descriptors is needed. 379 */ 380 if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { 381 IXGBE_DEBUG_STAT(tx_ring->stat_break_tbd_limit); 382 383 /* 384 * Discard the mblk and free the used resources 385 */ 386 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 387 while (tcb) { 388 tcb->mp = NULL; 389 ixgbe_free_tcb(tcb); 390 tcb = (tx_control_block_t *) 391 LIST_GET_NEXT(&pending_list, &tcb->link); 392 } 393 394 /* 395 * Return the tx control blocks in the pending list to 396 * the free list. 397 */ 398 ixgbe_put_free_list(tx_ring, &pending_list); 399 400 /* 401 * pull up the mblk and send it out with bind way 402 */ 403 if ((pull_mp = msgpullup(mp, -1)) == NULL) { 404 tx_ring->reschedule = B_TRUE; 405 return (mp); 406 } 407 408 LINK_LIST_INIT(&pending_list); 409 desc_total = 0; 410 411 /* 412 * if the packet is a LSO packet, we simply 413 * transmit the header in one descriptor using the copy way 414 */ 415 if ((ctx != NULL) && ctx->lso_flag) { 416 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + 417 ctx->l4_hdr_len; 418 419 tcb = ixgbe_get_free_list(tx_ring); 420 if (tcb == NULL) { 421 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 422 goto tx_failure; 423 } 424 desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, 425 hdr_len, B_TRUE); 426 LIST_PUSH_TAIL(&pending_list, &tcb->link); 427 desc_total += desc_num; 428 429 pull_mp->b_rptr += hdr_len; 430 } 431 432 tcb = ixgbe_get_free_list(tx_ring); 433 if (tcb == NULL) { 434 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 435 goto tx_failure; 436 } 437 if ((ctx != NULL) && ctx->lso_flag) { 438 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 439 mbsize - hdr_len); 440 } else { 441 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 442 mbsize); 443 } 444 if (desc_num < 0) { 445 goto tx_failure; 446 } 447 LIST_PUSH_TAIL(&pending_list, &tcb->link); 448 449 desc_total += desc_num; 450 tcb->mp = pull_mp; 451 } 452 453 /* 454 * Before fill the tx descriptor ring with the data, we need to 455 * ensure there are adequate free descriptors for transmit 456 * (including one context descriptor). 457 */ 458 if (tx_ring->tbd_free < (desc_total + 1)) { 459 tx_ring->tx_recycle(tx_ring); 460 } 461 462 mutex_enter(&tx_ring->tx_lock); 463 /* 464 * If the number of free tx descriptors is not enough for transmit 465 * then return mp. 466 * 467 * Note: we must put this check under the mutex protection to 468 * ensure the correctness when multiple threads access it in 469 * parallel. 470 */ 471 if (tx_ring->tbd_free < (desc_total + 1)) { 472 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 473 mutex_exit(&tx_ring->tx_lock); 474 goto tx_failure; 475 } 476 477 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 478 mbsize); 479 480 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 481 482 mutex_exit(&tx_ring->tx_lock); 483 484 /* 485 * now that the transmission succeeds, need to free the original 486 * mp if we used the pulling up mblk for transmission. 487 */ 488 if (pull_mp) { 489 freemsg(mp); 490 } 491 492 return (NULL); 493 494 tx_failure: 495 /* 496 * If transmission fails, need to free the pulling up mblk. 497 */ 498 if (pull_mp) { 499 freemsg(pull_mp); 500 } 501 502 /* 503 * Discard the mblk and free the used resources 504 */ 505 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 506 while (tcb) { 507 tcb->mp = NULL; 508 509 ixgbe_free_tcb(tcb); 510 511 tcb = (tx_control_block_t *) 512 LIST_GET_NEXT(&pending_list, &tcb->link); 513 } 514 515 /* 516 * Return the tx control blocks in the pending list to the free list. 517 */ 518 ixgbe_put_free_list(tx_ring, &pending_list); 519 520 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 521 tx_ring->reschedule = B_TRUE; 522 523 return (mp); 524 } 525 526 /* 527 * ixgbe_tx_copy 528 * 529 * Copy the mblk fragment to the pre-allocated tx buffer 530 */ 531 static int 532 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 533 uint32_t len, boolean_t copy_done) 534 { 535 dma_buffer_t *tx_buf; 536 uint32_t desc_num; 537 _NOTE(ARGUNUSED(tx_ring)); 538 539 tx_buf = &tcb->tx_buf; 540 541 /* 542 * Copy the packet data of the mblk fragment into the 543 * pre-allocated tx buffer, which is maintained by the 544 * tx control block. 545 * 546 * Several mblk fragments can be copied into one tx buffer. 547 * The destination address of the current copied fragment in 548 * the tx buffer is next to the end of the previous copied 549 * fragment. 550 */ 551 if (len > 0) { 552 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 553 554 tx_buf->len += len; 555 tcb->frag_num++; 556 } 557 558 desc_num = 0; 559 560 /* 561 * If it is the last fragment copied to the current tx buffer, 562 * in other words, if there's no remaining fragment or the remaining 563 * fragment requires a new tx control block to process, we need to 564 * complete the current copy processing by syncing up the current 565 * DMA buffer and saving the descriptor data. 566 */ 567 if (copy_done) { 568 /* 569 * Sync the DMA buffer of the packet data 570 */ 571 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 572 573 tcb->tx_type = USE_COPY; 574 575 /* 576 * Save the address and length to the private data structure 577 * of the tx control block, which will be used to fill the 578 * tx descriptor ring after all the fragments are processed. 579 */ 580 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 581 desc_num++; 582 } 583 584 return (desc_num); 585 } 586 587 /* 588 * ixgbe_tx_bind 589 * 590 * Bind the mblk fragment with DMA 591 */ 592 static int 593 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 594 uint32_t len) 595 { 596 int status, i; 597 ddi_dma_cookie_t dma_cookie; 598 uint_t ncookies; 599 int desc_num; 600 601 /* 602 * Use DMA binding to process the mblk fragment 603 */ 604 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 605 (caddr_t)mp->b_rptr, len, 606 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 607 0, &dma_cookie, &ncookies); 608 609 if (status != DDI_DMA_MAPPED) { 610 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 611 return (-1); 612 } 613 614 tcb->frag_num++; 615 tcb->tx_type = USE_DMA; 616 /* 617 * Each fragment can span several cookies. One cookie will have 618 * one tx descriptor to transmit. 619 */ 620 desc_num = 0; 621 for (i = ncookies; i > 0; i--) { 622 /* 623 * Save the address and length to the private data structure 624 * of the tx control block, which will be used to fill the 625 * tx descriptor ring after all the fragments are processed. 626 */ 627 ixgbe_save_desc(tcb, 628 dma_cookie.dmac_laddress, 629 dma_cookie.dmac_size); 630 631 desc_num++; 632 633 if (i > 1) 634 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 635 } 636 637 return (desc_num); 638 } 639 640 /* 641 * ixgbe_get_context 642 * 643 * Get the context information from the mblk 644 */ 645 static int 646 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 647 { 648 uint32_t start; 649 uint32_t hckflags; 650 uint32_t lsoflags; 651 uint32_t mss; 652 uint32_t len; 653 uint32_t size; 654 uint32_t offset; 655 unsigned char *pos; 656 ushort_t etype; 657 uint32_t mac_hdr_len; 658 uint32_t l4_proto; 659 uint32_t l4_hdr_len; 660 661 ASSERT(mp != NULL); 662 663 hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &hckflags); 664 bzero(ctx, sizeof (ixgbe_tx_context_t)); 665 666 if (hckflags == 0) { 667 return (0); 668 } 669 670 ctx->hcksum_flags = hckflags; 671 672 lso_info_get(mp, &mss, &lsoflags); 673 ctx->mss = mss; 674 ctx->lso_flag = (lsoflags == HW_LSO); 675 676 /* 677 * LSO relies on tx h/w checksum, so here will drop the package 678 * if h/w checksum flag is not declared. 679 */ 680 if (ctx->lso_flag) { 681 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 682 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 683 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 684 "checksum flags are not specified when doing LSO"); 685 return (-1); 686 } 687 } 688 689 etype = 0; 690 mac_hdr_len = 0; 691 l4_proto = 0; 692 693 /* 694 * Firstly get the position of the ether_type/ether_tpid. 695 * Here we don't assume the ether (VLAN) header is fully included 696 * in one mblk fragment, so we go thourgh the fragments to parse 697 * the ether type. 698 */ 699 size = len = MBLKL(mp); 700 offset = offsetof(struct ether_header, ether_type); 701 while (size <= offset) { 702 mp = mp->b_cont; 703 ASSERT(mp != NULL); 704 len = MBLKL(mp); 705 size += len; 706 } 707 pos = mp->b_rptr + offset + len - size; 708 709 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 710 if (etype == ETHERTYPE_VLAN) { 711 /* 712 * Get the position of the ether_type in VLAN header 713 */ 714 offset = offsetof(struct ether_vlan_header, ether_type); 715 while (size <= offset) { 716 mp = mp->b_cont; 717 ASSERT(mp != NULL); 718 len = MBLKL(mp); 719 size += len; 720 } 721 pos = mp->b_rptr + offset + len - size; 722 723 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 724 mac_hdr_len = sizeof (struct ether_vlan_header); 725 } else { 726 mac_hdr_len = sizeof (struct ether_header); 727 } 728 729 /* 730 * Here we don't assume the IP(V6) header is fully included in 731 * one mblk fragment. 732 */ 733 switch (etype) { 734 case ETHERTYPE_IP: 735 if (ctx->lso_flag) { 736 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 737 while (size <= offset) { 738 mp = mp->b_cont; 739 ASSERT(mp != NULL); 740 len = MBLKL(mp); 741 size += len; 742 } 743 pos = mp->b_rptr + offset + len - size; 744 *((uint16_t *)(uintptr_t)(pos)) = 0; 745 746 offset = offsetof(ipha_t, ipha_hdr_checksum) + 747 mac_hdr_len; 748 while (size <= offset) { 749 mp = mp->b_cont; 750 ASSERT(mp != NULL); 751 len = MBLKL(mp); 752 size += len; 753 } 754 pos = mp->b_rptr + offset + len - size; 755 *((uint16_t *)(uintptr_t)(pos)) = 0; 756 757 /* 758 * To perform ixgbe LSO, here also need to fill 759 * the tcp checksum field of the packet with the 760 * following pseudo-header checksum: 761 * (ip_source_addr, ip_destination_addr, l4_proto) 762 * Currently the tcp/ip stack has done it. 763 */ 764 } 765 766 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 767 while (size <= offset) { 768 mp = mp->b_cont; 769 ASSERT(mp != NULL); 770 len = MBLKL(mp); 771 size += len; 772 } 773 pos = mp->b_rptr + offset + len - size; 774 775 l4_proto = *(uint8_t *)pos; 776 break; 777 case ETHERTYPE_IPV6: 778 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 779 while (size <= offset) { 780 mp = mp->b_cont; 781 ASSERT(mp != NULL); 782 len = MBLKL(mp); 783 size += len; 784 } 785 pos = mp->b_rptr + offset + len - size; 786 787 l4_proto = *(uint8_t *)pos; 788 break; 789 default: 790 /* Unrecoverable error */ 791 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 792 return (-2); 793 } 794 795 if (ctx->lso_flag) { 796 offset = mac_hdr_len + start; 797 while (size <= offset) { 798 mp = mp->b_cont; 799 ASSERT(mp != NULL); 800 len = MBLKL(mp); 801 size += len; 802 } 803 pos = mp->b_rptr + offset + len - size; 804 805 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 806 } else { 807 /* 808 * l4 header length is only required for LSO 809 */ 810 l4_hdr_len = 0; 811 } 812 813 ctx->mac_hdr_len = mac_hdr_len; 814 ctx->ip_hdr_len = start; 815 ctx->l4_proto = l4_proto; 816 ctx->l4_hdr_len = l4_hdr_len; 817 818 return (0); 819 } 820 821 /* 822 * ixgbe_check_context 823 * 824 * Check if a new context descriptor is needed 825 */ 826 static boolean_t 827 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 828 { 829 ixgbe_tx_context_t *last; 830 831 if (ctx == NULL) 832 return (B_FALSE); 833 834 /* 835 * Compare the context data retrieved from the mblk and the 836 * stored data of the last context descriptor. The data need 837 * to be checked are: 838 * hcksum_flags 839 * l4_proto 840 * mac_hdr_len 841 * ip_hdr_len 842 * lso_flag 843 * mss (only checked for LSO) 844 * l4_hr_len (only checked for LSO) 845 * Either one of the above data is changed, a new context descriptor 846 * will be needed. 847 */ 848 last = &tx_ring->tx_context; 849 850 if ((ctx->hcksum_flags != last->hcksum_flags) || 851 (ctx->l4_proto != last->l4_proto) || 852 (ctx->mac_hdr_len != last->mac_hdr_len) || 853 (ctx->ip_hdr_len != last->ip_hdr_len) || 854 (ctx->lso_flag != last->lso_flag) || 855 (ctx->lso_flag && ((ctx->mss != last->mss) || 856 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 857 return (B_TRUE); 858 } 859 860 return (B_FALSE); 861 } 862 863 /* 864 * ixgbe_fill_context 865 * 866 * Fill the context descriptor with hardware checksum informations 867 */ 868 static void 869 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 870 ixgbe_tx_context_t *ctx) 871 { 872 /* 873 * Fill the context descriptor with the checksum 874 * context information we've got. 875 */ 876 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 877 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 878 IXGBE_ADVTXD_MACLEN_SHIFT; 879 880 ctx_tbd->type_tucmd_mlhl = 881 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 882 883 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 884 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 885 886 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 887 switch (ctx->l4_proto) { 888 case IPPROTO_TCP: 889 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 890 break; 891 case IPPROTO_UDP: 892 /* 893 * We don't have to explicitly set: 894 * ctx_tbd->type_tucmd_mlhl |= 895 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 896 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 897 */ 898 break; 899 default: 900 /* Unrecoverable error */ 901 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 902 break; 903 } 904 } 905 906 ctx_tbd->seqnum_seed = 0; 907 908 if (ctx->lso_flag) { 909 ctx_tbd->mss_l4len_idx = 910 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 911 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 912 } else { 913 ctx_tbd->mss_l4len_idx = 0; 914 } 915 } 916 917 /* 918 * ixgbe_tx_fill_ring 919 * 920 * Fill the tx descriptor ring with the data 921 */ 922 static int 923 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 924 ixgbe_tx_context_t *ctx, size_t mbsize) 925 { 926 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 927 boolean_t load_context; 928 uint32_t index, tcb_index, desc_num; 929 union ixgbe_adv_tx_desc *tbd, *first_tbd; 930 tx_control_block_t *tcb, *first_tcb; 931 uint32_t hcksum_flags; 932 int i; 933 934 ASSERT(mutex_owned(&tx_ring->tx_lock)); 935 936 tbd = NULL; 937 first_tbd = NULL; 938 first_tcb = NULL; 939 desc_num = 0; 940 hcksum_flags = 0; 941 load_context = B_FALSE; 942 943 /* 944 * Get the index of the first tx descriptor that will be filled, 945 * and the index of the first work list item that will be attached 946 * with the first used tx control block in the pending list. 947 * Note: the two indexes are the same. 948 */ 949 index = tx_ring->tbd_tail; 950 tcb_index = tx_ring->tbd_tail; 951 952 if (ctx != NULL) { 953 hcksum_flags = ctx->hcksum_flags; 954 955 /* 956 * Check if a new context descriptor is needed for this packet 957 */ 958 load_context = ixgbe_check_context(tx_ring, ctx); 959 960 if (load_context) { 961 tbd = &tx_ring->tbd_ring[index]; 962 963 /* 964 * Fill the context descriptor with the 965 * hardware checksum offload informations. 966 */ 967 ixgbe_fill_context( 968 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 969 970 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 971 desc_num++; 972 973 /* 974 * Store the checksum context data if 975 * a new context descriptor is added 976 */ 977 tx_ring->tx_context = *ctx; 978 } 979 } 980 981 first_tbd = &tx_ring->tbd_ring[index]; 982 983 /* 984 * Fill tx data descriptors with the data saved in the pending list. 985 * The tx control blocks in the pending list are added to the work list 986 * at the same time. 987 * 988 * The work list is strictly 1:1 corresponding to the descriptor ring. 989 * One item of the work list corresponds to one tx descriptor. Because 990 * one tx control block can span multiple tx descriptors, the tx 991 * control block will be added to the first work list item that 992 * corresponds to the first tx descriptor generated from that tx 993 * control block. 994 */ 995 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 996 first_tcb = tcb; 997 while (tcb != NULL) { 998 999 for (i = 0; i < tcb->desc_num; i++) { 1000 tbd = &tx_ring->tbd_ring[index]; 1001 1002 tbd->read.buffer_addr = tcb->desc[i].address; 1003 tbd->read.cmd_type_len = tcb->desc[i].length; 1004 1005 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 1006 | IXGBE_ADVTXD_DTYP_DATA; 1007 1008 tbd->read.olinfo_status = 0; 1009 1010 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1011 desc_num++; 1012 } 1013 1014 /* 1015 * Add the tx control block to the work list 1016 */ 1017 ASSERT(tx_ring->work_list[tcb_index] == NULL); 1018 tx_ring->work_list[tcb_index] = tcb; 1019 1020 tcb_index = index; 1021 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1022 } 1023 1024 if (load_context) { 1025 /* 1026 * Count the context descriptor for 1027 * the first tx control block. 1028 */ 1029 first_tcb->desc_num++; 1030 } 1031 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 1032 1033 /* 1034 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 1035 * valid in the first descriptor of the packet. 1036 * Setting paylen in every first_tbd for all parts. 1037 * 82599 requires the packet length in paylen field with or without 1038 * LSO and 82598 will ignore it in non-LSO mode. 1039 */ 1040 ASSERT(first_tbd != NULL); 1041 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 1042 1043 switch (hw->mac.type) { 1044 case ixgbe_mac_82599EB: 1045 if (ctx != NULL && ctx->lso_flag) { 1046 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1047 first_tbd->read.olinfo_status |= 1048 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1049 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1050 } else { 1051 first_tbd->read.olinfo_status |= 1052 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1053 } 1054 break; 1055 case ixgbe_mac_82598EB: 1056 if (ctx != NULL && ctx->lso_flag) { 1057 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1058 first_tbd->read.olinfo_status |= 1059 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1060 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1061 } 1062 break; 1063 default: 1064 break; 1065 } 1066 1067 /* Set hardware checksum bits */ 1068 if (hcksum_flags != 0) { 1069 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1070 first_tbd->read.olinfo_status |= 1071 IXGBE_ADVTXD_POPTS_IXSM; 1072 if (hcksum_flags & HCK_PARTIALCKSUM) 1073 first_tbd->read.olinfo_status |= 1074 IXGBE_ADVTXD_POPTS_TXSM; 1075 } 1076 1077 /* 1078 * The last descriptor of packet needs End Of Packet (EOP), 1079 * and Report Status (RS) bits set 1080 */ 1081 ASSERT(tbd != NULL); 1082 tbd->read.cmd_type_len |= 1083 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1084 1085 /* 1086 * Sync the DMA buffer of the tx descriptor ring 1087 */ 1088 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1089 1090 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1091 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1092 DDI_SERVICE_DEGRADED); 1093 } 1094 1095 /* 1096 * Update the number of the free tx descriptors. 1097 * The mutual exclusion between the transmission and the recycling 1098 * (for the tx descriptor ring and the work list) is implemented 1099 * with the atomic operation on the number of the free tx descriptors. 1100 * 1101 * Note: we should always decrement the counter tbd_free before 1102 * advancing the hardware TDT pointer to avoid the race condition - 1103 * before the counter tbd_free is decremented, the transmit of the 1104 * tx descriptors has done and the counter tbd_free is increased by 1105 * the tx recycling. 1106 */ 1107 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1108 ASSERT(i >= 0); 1109 1110 tx_ring->tbd_tail = index; 1111 1112 /* 1113 * Advance the hardware TDT pointer of the tx descriptor ring 1114 */ 1115 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1116 1117 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1118 DDI_FM_OK) { 1119 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1120 DDI_SERVICE_DEGRADED); 1121 } 1122 1123 return (desc_num); 1124 } 1125 1126 /* 1127 * ixgbe_save_desc 1128 * 1129 * Save the address/length pair to the private array 1130 * of the tx control block. The address/length pairs 1131 * will be filled into the tx descriptor ring later. 1132 */ 1133 static void 1134 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1135 { 1136 sw_desc_t *desc; 1137 1138 desc = &tcb->desc[tcb->desc_num]; 1139 desc->address = address; 1140 desc->length = length; 1141 1142 tcb->desc_num++; 1143 } 1144 1145 /* 1146 * ixgbe_tx_recycle_legacy 1147 * 1148 * Recycle the tx descriptors and tx control blocks. 1149 * 1150 * The work list is traversed to check if the corresponding 1151 * tx descriptors have been transmitted. If so, the resources 1152 * bound to the tx control blocks will be freed, and those 1153 * tx control blocks will be returned to the free list. 1154 */ 1155 uint32_t 1156 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1157 { 1158 uint32_t index, last_index, prev_index; 1159 int desc_num; 1160 boolean_t desc_done; 1161 tx_control_block_t *tcb; 1162 link_list_t pending_list; 1163 ixgbe_t *ixgbe = tx_ring->ixgbe; 1164 1165 mutex_enter(&tx_ring->recycle_lock); 1166 1167 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1168 1169 if (tx_ring->tbd_free == tx_ring->ring_size) { 1170 tx_ring->recycle_fail = 0; 1171 tx_ring->stall_watchdog = 0; 1172 if (tx_ring->reschedule) { 1173 tx_ring->reschedule = B_FALSE; 1174 mac_tx_ring_update(ixgbe->mac_hdl, 1175 tx_ring->ring_handle); 1176 } 1177 mutex_exit(&tx_ring->recycle_lock); 1178 return (0); 1179 } 1180 1181 /* 1182 * Sync the DMA buffer of the tx descriptor ring 1183 */ 1184 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1185 1186 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1187 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1188 } 1189 1190 LINK_LIST_INIT(&pending_list); 1191 desc_num = 0; 1192 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1193 1194 tcb = tx_ring->work_list[index]; 1195 ASSERT(tcb != NULL); 1196 1197 while (tcb != NULL) { 1198 /* 1199 * Get the last tx descriptor of this packet. 1200 * If the last tx descriptor is done, then 1201 * we can recycle all descriptors of a packet 1202 * which usually includes several tx control blocks. 1203 * For 82599, LSO descriptors can not be recycled 1204 * unless the whole packet's transmission is done. 1205 * That's why packet level recycling is used here. 1206 * For 82598, there's not such limit. 1207 */ 1208 last_index = tcb->last_index; 1209 /* 1210 * MAX_TX_RING_SIZE is used to judge whether 1211 * the index is a valid value or not. 1212 */ 1213 if (last_index == MAX_TX_RING_SIZE) 1214 break; 1215 1216 /* 1217 * Check if the Descriptor Done bit is set 1218 */ 1219 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1220 IXGBE_TXD_STAT_DD; 1221 if (desc_done) { 1222 /* 1223 * recycle all descriptors of the packet 1224 */ 1225 while (tcb != NULL) { 1226 /* 1227 * Strip off the tx control block from 1228 * the work list, and add it to the 1229 * pending list. 1230 */ 1231 tx_ring->work_list[index] = NULL; 1232 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1233 1234 /* 1235 * Count the total number of the tx 1236 * descriptors recycled 1237 */ 1238 desc_num += tcb->desc_num; 1239 1240 index = NEXT_INDEX(index, tcb->desc_num, 1241 tx_ring->ring_size); 1242 1243 tcb = tx_ring->work_list[index]; 1244 1245 prev_index = PREV_INDEX(index, 1, 1246 tx_ring->ring_size); 1247 if (prev_index == last_index) 1248 break; 1249 } 1250 } else { 1251 break; 1252 } 1253 } 1254 1255 /* 1256 * If no tx descriptors are recycled, no need to do more processing 1257 */ 1258 if (desc_num == 0) { 1259 tx_ring->recycle_fail++; 1260 mutex_exit(&tx_ring->recycle_lock); 1261 return (0); 1262 } 1263 1264 tx_ring->recycle_fail = 0; 1265 tx_ring->stall_watchdog = 0; 1266 1267 /* 1268 * Update the head index of the tx descriptor ring 1269 */ 1270 tx_ring->tbd_head = index; 1271 1272 /* 1273 * Update the number of the free tx descriptors with atomic operations 1274 */ 1275 atomic_add_32(&tx_ring->tbd_free, desc_num); 1276 1277 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1278 (tx_ring->reschedule)) { 1279 tx_ring->reschedule = B_FALSE; 1280 mac_tx_ring_update(ixgbe->mac_hdl, 1281 tx_ring->ring_handle); 1282 } 1283 mutex_exit(&tx_ring->recycle_lock); 1284 1285 /* 1286 * Free the resources used by the tx control blocks 1287 * in the pending list 1288 */ 1289 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1290 while (tcb != NULL) { 1291 /* 1292 * Release the resources occupied by the tx control block 1293 */ 1294 ixgbe_free_tcb(tcb); 1295 1296 tcb = (tx_control_block_t *) 1297 LIST_GET_NEXT(&pending_list, &tcb->link); 1298 } 1299 1300 /* 1301 * Add the tx control blocks in the pending list to the free list. 1302 */ 1303 ixgbe_put_free_list(tx_ring, &pending_list); 1304 1305 return (desc_num); 1306 } 1307 1308 /* 1309 * ixgbe_tx_recycle_head_wb 1310 * 1311 * Check the head write-back, and recycle all the transmitted 1312 * tx descriptors and tx control blocks. 1313 */ 1314 uint32_t 1315 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1316 { 1317 uint32_t index; 1318 uint32_t head_wb; 1319 int desc_num; 1320 tx_control_block_t *tcb; 1321 link_list_t pending_list; 1322 ixgbe_t *ixgbe = tx_ring->ixgbe; 1323 1324 mutex_enter(&tx_ring->recycle_lock); 1325 1326 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1327 1328 if (tx_ring->tbd_free == tx_ring->ring_size) { 1329 tx_ring->recycle_fail = 0; 1330 tx_ring->stall_watchdog = 0; 1331 if (tx_ring->reschedule) { 1332 tx_ring->reschedule = B_FALSE; 1333 mac_tx_ring_update(ixgbe->mac_hdl, 1334 tx_ring->ring_handle); 1335 } 1336 mutex_exit(&tx_ring->recycle_lock); 1337 return (0); 1338 } 1339 1340 /* 1341 * Sync the DMA buffer of the tx descriptor ring 1342 * 1343 * Note: For head write-back mode, the tx descriptors will not 1344 * be written back, but the head write-back value is stored at 1345 * the last extra tbd at the end of the DMA area, we still need 1346 * to sync the head write-back value for kernel. 1347 * 1348 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1349 */ 1350 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1351 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1352 sizeof (uint32_t), 1353 DDI_DMA_SYNC_FORKERNEL); 1354 1355 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1356 ddi_fm_service_impact(ixgbe->dip, 1357 DDI_SERVICE_DEGRADED); 1358 } 1359 1360 LINK_LIST_INIT(&pending_list); 1361 desc_num = 0; 1362 index = tx_ring->tbd_head; /* Next index to clean */ 1363 1364 /* 1365 * Get the value of head write-back 1366 */ 1367 head_wb = *tx_ring->tbd_head_wb; 1368 while (index != head_wb) { 1369 tcb = tx_ring->work_list[index]; 1370 ASSERT(tcb != NULL); 1371 1372 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1373 tcb->desc_num) { 1374 /* 1375 * The current tx control block is not 1376 * completely transmitted, stop recycling 1377 */ 1378 break; 1379 } 1380 1381 /* 1382 * Strip off the tx control block from the work list, 1383 * and add it to the pending list. 1384 */ 1385 tx_ring->work_list[index] = NULL; 1386 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1387 1388 /* 1389 * Advance the index of the tx descriptor ring 1390 */ 1391 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1392 1393 /* 1394 * Count the total number of the tx descriptors recycled 1395 */ 1396 desc_num += tcb->desc_num; 1397 } 1398 1399 /* 1400 * If no tx descriptors are recycled, no need to do more processing 1401 */ 1402 if (desc_num == 0) { 1403 tx_ring->recycle_fail++; 1404 mutex_exit(&tx_ring->recycle_lock); 1405 return (0); 1406 } 1407 1408 tx_ring->recycle_fail = 0; 1409 tx_ring->stall_watchdog = 0; 1410 1411 /* 1412 * Update the head index of the tx descriptor ring 1413 */ 1414 tx_ring->tbd_head = index; 1415 1416 /* 1417 * Update the number of the free tx descriptors with atomic operations 1418 */ 1419 atomic_add_32(&tx_ring->tbd_free, desc_num); 1420 1421 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1422 (tx_ring->reschedule)) { 1423 tx_ring->reschedule = B_FALSE; 1424 mac_tx_ring_update(ixgbe->mac_hdl, 1425 tx_ring->ring_handle); 1426 } 1427 mutex_exit(&tx_ring->recycle_lock); 1428 1429 /* 1430 * Free the resources used by the tx control blocks 1431 * in the pending list 1432 */ 1433 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1434 while (tcb) { 1435 /* 1436 * Release the resources occupied by the tx control block 1437 */ 1438 ixgbe_free_tcb(tcb); 1439 1440 tcb = (tx_control_block_t *) 1441 LIST_GET_NEXT(&pending_list, &tcb->link); 1442 } 1443 1444 /* 1445 * Add the tx control blocks in the pending list to the free list. 1446 */ 1447 ixgbe_put_free_list(tx_ring, &pending_list); 1448 1449 return (desc_num); 1450 } 1451 1452 /* 1453 * ixgbe_free_tcb - free up the tx control block 1454 * 1455 * Free the resources of the tx control block, including 1456 * unbind the previously bound DMA handle, and reset other 1457 * control fields. 1458 */ 1459 void 1460 ixgbe_free_tcb(tx_control_block_t *tcb) 1461 { 1462 switch (tcb->tx_type) { 1463 case USE_COPY: 1464 /* 1465 * Reset the buffer length that is used for copy 1466 */ 1467 tcb->tx_buf.len = 0; 1468 break; 1469 case USE_DMA: 1470 /* 1471 * Release the DMA resource that is used for 1472 * DMA binding. 1473 */ 1474 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1475 break; 1476 default: 1477 break; 1478 } 1479 1480 /* 1481 * Free the mblk 1482 */ 1483 if (tcb->mp != NULL) { 1484 freemsg(tcb->mp); 1485 tcb->mp = NULL; 1486 } 1487 1488 tcb->tx_type = USE_NONE; 1489 tcb->last_index = MAX_TX_RING_SIZE; 1490 tcb->frag_num = 0; 1491 tcb->desc_num = 0; 1492 } 1493 1494 /* 1495 * ixgbe_get_free_list - Get a free tx control block from the free list 1496 * 1497 * The atomic operation on the number of the available tx control block 1498 * in the free list is used to keep this routine mutual exclusive with 1499 * the routine ixgbe_put_check_list. 1500 */ 1501 static tx_control_block_t * 1502 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 1503 { 1504 tx_control_block_t *tcb; 1505 1506 /* 1507 * Check and update the number of the free tx control block 1508 * in the free list. 1509 */ 1510 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1511 return (NULL); 1512 1513 mutex_enter(&tx_ring->tcb_head_lock); 1514 1515 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1516 ASSERT(tcb != NULL); 1517 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1518 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1519 tx_ring->free_list_size); 1520 1521 mutex_exit(&tx_ring->tcb_head_lock); 1522 1523 return (tcb); 1524 } 1525 1526 /* 1527 * ixgbe_put_free_list 1528 * 1529 * Put a list of used tx control blocks back to the free list 1530 * 1531 * A mutex is used here to ensure the serialization. The mutual exclusion 1532 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1533 * the atomic operation on the counter tcb_free. 1534 */ 1535 void 1536 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1537 { 1538 uint32_t index; 1539 int tcb_num; 1540 tx_control_block_t *tcb; 1541 1542 mutex_enter(&tx_ring->tcb_tail_lock); 1543 1544 index = tx_ring->tcb_tail; 1545 1546 tcb_num = 0; 1547 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1548 while (tcb != NULL) { 1549 ASSERT(tx_ring->free_list[index] == NULL); 1550 tx_ring->free_list[index] = tcb; 1551 1552 tcb_num++; 1553 1554 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1555 1556 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1557 } 1558 1559 tx_ring->tcb_tail = index; 1560 1561 /* 1562 * Update the number of the free tx control block 1563 * in the free list. This operation must be placed 1564 * under the protection of the lock. 1565 */ 1566 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1567 1568 mutex_exit(&tx_ring->tcb_tail_lock); 1569 } 1570