1 /* 2 * CDDL HEADER START 3 * 4 * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include "ixgbe_sw.h" 29 30 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 31 uint32_t, boolean_t); 32 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 33 uint32_t); 34 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 35 ixgbe_tx_context_t *, size_t); 36 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 37 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 38 39 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 40 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 41 ixgbe_tx_context_t *); 42 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 43 ixgbe_tx_context_t *); 44 45 #ifndef IXGBE_DEBUG 46 #pragma inline(ixgbe_save_desc) 47 #pragma inline(ixgbe_get_context) 48 #pragma inline(ixgbe_check_context) 49 #pragma inline(ixgbe_fill_context) 50 #endif 51 52 /* 53 * ixgbe_ring_tx 54 * 55 * To transmit one mblk through one specified ring. 56 * 57 * One mblk can consist of several fragments, each fragment 58 * will be processed with different methods based on the size. 59 * For the fragments with size less than the bcopy threshold, 60 * they will be processed by using bcopy; otherwise, they will 61 * be processed by using DMA binding. 62 * 63 * To process the mblk, a tx control block is got from the 64 * free list. One tx control block contains one tx buffer, which 65 * is used to copy mblk fragments' data; and one tx DMA handle, 66 * which is used to bind a mblk fragment with DMA resource. 67 * 68 * Several small mblk fragments can be copied into one tx control 69 * block's buffer, and then the buffer will be transmitted with 70 * one tx descriptor. 71 * 72 * A large fragment only binds with one tx control block's DMA 73 * handle, and it can span several tx descriptors for transmitting. 74 * 75 * So to transmit a packet (mblk), several tx control blocks can 76 * be used. After the processing, those tx control blocks will 77 * be put to the work list. 78 */ 79 mblk_t * 80 ixgbe_ring_tx(void *arg, mblk_t *mp) 81 { 82 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 83 ixgbe_t *ixgbe = tx_ring->ixgbe; 84 tx_type_t current_flag, next_flag; 85 uint32_t current_len, next_len; 86 uint32_t desc_total; 87 size_t mbsize; 88 int desc_num; 89 boolean_t copy_done, eop; 90 mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; 91 tx_control_block_t *tcb; 92 ixgbe_tx_context_t tx_context, *ctx; 93 link_list_t pending_list; 94 uint32_t len, hdr_frag_len, hdr_len; 95 uint32_t copy_thresh; 96 mblk_t *hdr_new_mp = NULL; 97 mblk_t *hdr_pre_mp = NULL; 98 mblk_t *hdr_nmp = NULL; 99 100 ASSERT(mp->b_next == NULL); 101 102 if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || 103 (ixgbe->ixgbe_state & IXGBE_ERROR) || 104 !(ixgbe->ixgbe_state & IXGBE_STARTED)) { 105 return (mp); 106 } 107 108 copy_thresh = ixgbe->tx_copy_thresh; 109 110 /* Get the mblk size */ 111 mbsize = 0; 112 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 113 mbsize += MBLKL(nmp); 114 } 115 116 if (ixgbe->tx_hcksum_enable) { 117 /* 118 * Retrieve checksum context information from the mblk 119 * that will be used to decide whether/how to fill the 120 * context descriptor. 121 */ 122 ctx = &tx_context; 123 if (ixgbe_get_context(mp, ctx) < 0) { 124 freemsg(mp); 125 return (NULL); 126 } 127 128 /* 129 * If the mblk size exceeds the max size ixgbe could 130 * process, then discard this mblk, and return NULL. 131 */ 132 if ((ctx->lso_flag && 133 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 134 (!ctx->lso_flag && 135 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 136 freemsg(mp); 137 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 138 return (NULL); 139 } 140 } else { 141 ctx = NULL; 142 } 143 144 /* 145 * Check and recycle tx descriptors. 146 * The recycle threshold here should be selected carefully 147 */ 148 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 149 tx_ring->tx_recycle(tx_ring); 150 } 151 152 /* 153 * After the recycling, if the tbd_free is less than the 154 * overload_threshold, assert overload, return mp; 155 * and we need to re-schedule the tx again. 156 */ 157 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 158 tx_ring->reschedule = B_TRUE; 159 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 160 return (mp); 161 } 162 163 /* 164 * The pending_list is a linked list that is used to save 165 * the tx control blocks that have packet data processed 166 * but have not put the data to the tx descriptor ring. 167 * It is used to reduce the lock contention of the tx_lock. 168 */ 169 LINK_LIST_INIT(&pending_list); 170 desc_num = 0; 171 desc_total = 0; 172 173 /* 174 * The software should guarantee LSO packet header(MAC+IP+TCP) 175 * to be within one descriptor. Here we reallocate and refill the 176 * the header if it's physical memory non-contiguous. 177 */ 178 if ((ctx != NULL) && ctx->lso_flag) { 179 /* find the last fragment of the header */ 180 len = MBLKL(mp); 181 ASSERT(len > 0); 182 hdr_nmp = mp; 183 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 184 while (len < hdr_len) { 185 hdr_pre_mp = hdr_nmp; 186 hdr_nmp = hdr_nmp->b_cont; 187 len += MBLKL(hdr_nmp); 188 } 189 /* 190 * If the header and the payload are in different mblks, 191 * we simply force the header to be copied into pre-allocated 192 * page-aligned buffer. 193 */ 194 if (len == hdr_len) 195 goto adjust_threshold; 196 197 hdr_frag_len = hdr_len - (len - MBLKL(hdr_nmp)); 198 /* 199 * There are two cases we need to reallocate a mblk for the 200 * last header fragment: 201 * 1. the header is in multiple mblks and the last fragment 202 * share the same mblk with the payload 203 * 2. the header is in a single mblk shared with the payload 204 * and the header is physical memory non-contiguous 205 */ 206 if ((hdr_nmp != mp) || 207 (P2NPHASE((uintptr_t)hdr_nmp->b_rptr, ixgbe->sys_page_size) 208 < hdr_len)) { 209 IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 210 /* 211 * reallocate the mblk for the last header fragment, 212 * expect to bcopy into pre-allocated page-aligned 213 * buffer 214 */ 215 hdr_new_mp = allocb(hdr_frag_len, NULL); 216 if (!hdr_new_mp) 217 return (mp); 218 bcopy(hdr_nmp->b_rptr, hdr_new_mp->b_rptr, 219 hdr_frag_len); 220 /* link the new header fragment with the other parts */ 221 hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 222 hdr_new_mp->b_cont = hdr_nmp; 223 if (hdr_pre_mp) 224 hdr_pre_mp->b_cont = hdr_new_mp; 225 else 226 mp = hdr_new_mp; 227 hdr_nmp->b_rptr += hdr_frag_len; 228 } 229 adjust_threshold: 230 /* 231 * adjust the bcopy threshhold to guarantee 232 * the header to use bcopy way 233 */ 234 if (copy_thresh < hdr_len) 235 copy_thresh = hdr_len; 236 } 237 238 current_mp = mp; 239 current_len = MBLKL(current_mp); 240 /* 241 * Decide which method to use for the first fragment 242 */ 243 current_flag = (current_len <= copy_thresh) ? 244 USE_COPY : USE_DMA; 245 /* 246 * If the mblk includes several contiguous small fragments, 247 * they may be copied into one buffer. This flag is used to 248 * indicate whether there are pending fragments that need to 249 * be copied to the current tx buffer. 250 * 251 * If this flag is B_TRUE, it indicates that a new tx control 252 * block is needed to process the next fragment using either 253 * copy or DMA binding. 254 * 255 * Otherwise, it indicates that the next fragment will be 256 * copied to the current tx buffer that is maintained by the 257 * current tx control block. No new tx control block is needed. 258 */ 259 copy_done = B_TRUE; 260 while (current_mp) { 261 next_mp = current_mp->b_cont; 262 eop = (next_mp == NULL); /* Last fragment of the packet? */ 263 next_len = eop ? 0: MBLKL(next_mp); 264 265 /* 266 * When the current fragment is an empty fragment, if 267 * the next fragment will still be copied to the current 268 * tx buffer, we cannot skip this fragment here. Because 269 * the copy processing is pending for completion. We have 270 * to process this empty fragment in the tx_copy routine. 271 * 272 * If the copy processing is completed or a DMA binding 273 * processing is just completed, we can just skip this 274 * empty fragment. 275 */ 276 if ((current_len == 0) && (copy_done)) { 277 current_mp = next_mp; 278 current_len = next_len; 279 current_flag = (current_len <= copy_thresh) ? 280 USE_COPY : USE_DMA; 281 continue; 282 } 283 284 if (copy_done) { 285 /* 286 * Get a new tx control block from the free list 287 */ 288 tcb = ixgbe_get_free_list(tx_ring); 289 290 if (tcb == NULL) { 291 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 292 goto tx_failure; 293 } 294 295 /* 296 * Push the tx control block to the pending list 297 * to avoid using lock too early 298 */ 299 LIST_PUSH_TAIL(&pending_list, &tcb->link); 300 } 301 302 if (current_flag == USE_COPY) { 303 /* 304 * Check whether to use bcopy or DMA binding to process 305 * the next fragment, and if using bcopy, whether we 306 * need to continue copying the next fragment into the 307 * current tx buffer. 308 */ 309 ASSERT((tcb->tx_buf.len + current_len) <= 310 tcb->tx_buf.size); 311 312 if (eop) { 313 /* 314 * This is the last fragment of the packet, so 315 * the copy processing will be completed with 316 * this fragment. 317 */ 318 next_flag = USE_NONE; 319 copy_done = B_TRUE; 320 } else if ((tcb->tx_buf.len + current_len + next_len) > 321 tcb->tx_buf.size) { 322 /* 323 * If the next fragment is too large to be 324 * copied to the current tx buffer, we need 325 * to complete the current copy processing. 326 */ 327 next_flag = (next_len > copy_thresh) ? 328 USE_DMA: USE_COPY; 329 copy_done = B_TRUE; 330 } else if (next_len > copy_thresh) { 331 /* 332 * The next fragment needs to be processed with 333 * DMA binding. So the copy prcessing will be 334 * completed with the current fragment. 335 */ 336 next_flag = USE_DMA; 337 copy_done = B_TRUE; 338 } else { 339 /* 340 * Continue to copy the next fragment to the 341 * current tx buffer. 342 */ 343 next_flag = USE_COPY; 344 copy_done = B_FALSE; 345 } 346 347 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 348 current_len, copy_done); 349 } else { 350 /* 351 * Check whether to use bcopy or DMA binding to process 352 * the next fragment. 353 */ 354 next_flag = (next_len > copy_thresh) ? 355 USE_DMA: USE_COPY; 356 ASSERT(copy_done == B_TRUE); 357 358 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 359 current_len); 360 } 361 362 if (desc_num > 0) 363 desc_total += desc_num; 364 else if (desc_num < 0) 365 goto tx_failure; 366 367 current_mp = next_mp; 368 current_len = next_len; 369 current_flag = next_flag; 370 } 371 372 /* 373 * Attach the mblk to the last tx control block 374 */ 375 ASSERT(tcb); 376 ASSERT(tcb->mp == NULL); 377 tcb->mp = mp; 378 379 /* 380 * 82598/82599 chipset has a limitation that no more than 32 tx 381 * descriptors can be transmited out at one time. 382 * 383 * Here is a workaround for it: pull up the mblk then send it 384 * out with bind way. By doing so, no more than MAX_COOKIE (18) 385 * descriptors is needed. 386 */ 387 if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { 388 IXGBE_DEBUG_STAT(tx_ring->stat_break_tbd_limit); 389 390 /* 391 * Discard the mblk and free the used resources 392 */ 393 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 394 while (tcb) { 395 tcb->mp = NULL; 396 ixgbe_free_tcb(tcb); 397 tcb = (tx_control_block_t *) 398 LIST_GET_NEXT(&pending_list, &tcb->link); 399 } 400 401 /* 402 * Return the tx control blocks in the pending list to 403 * the free list. 404 */ 405 ixgbe_put_free_list(tx_ring, &pending_list); 406 407 /* 408 * pull up the mblk and send it out with bind way 409 */ 410 if ((pull_mp = msgpullup(mp, -1)) == NULL) { 411 tx_ring->reschedule = B_TRUE; 412 413 /* 414 * If new mblk has been allocted for the last header 415 * fragment of a LSO packet, we should restore the 416 * modified mp. 417 */ 418 if (hdr_new_mp) { 419 hdr_new_mp->b_cont = NULL; 420 freeb(hdr_new_mp); 421 hdr_nmp->b_rptr -= hdr_frag_len; 422 if (hdr_pre_mp) 423 hdr_pre_mp->b_cont = hdr_nmp; 424 else 425 mp = hdr_nmp; 426 } 427 return (mp); 428 } 429 430 LINK_LIST_INIT(&pending_list); 431 desc_total = 0; 432 433 /* 434 * if the packet is a LSO packet, we simply 435 * transmit the header in one descriptor using the copy way 436 */ 437 if ((ctx != NULL) && ctx->lso_flag) { 438 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + 439 ctx->l4_hdr_len; 440 441 tcb = ixgbe_get_free_list(tx_ring); 442 if (tcb == NULL) { 443 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 444 goto tx_failure; 445 } 446 desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, 447 hdr_len, B_TRUE); 448 LIST_PUSH_TAIL(&pending_list, &tcb->link); 449 desc_total += desc_num; 450 451 pull_mp->b_rptr += hdr_len; 452 } 453 454 tcb = ixgbe_get_free_list(tx_ring); 455 if (tcb == NULL) { 456 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 457 goto tx_failure; 458 } 459 if ((ctx != NULL) && ctx->lso_flag) { 460 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 461 mbsize - hdr_len); 462 } else { 463 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 464 mbsize); 465 } 466 if (desc_num < 0) { 467 goto tx_failure; 468 } 469 LIST_PUSH_TAIL(&pending_list, &tcb->link); 470 471 desc_total += desc_num; 472 tcb->mp = pull_mp; 473 } 474 475 /* 476 * Before fill the tx descriptor ring with the data, we need to 477 * ensure there are adequate free descriptors for transmit 478 * (including one context descriptor). 479 */ 480 if (tx_ring->tbd_free < (desc_total + 1)) { 481 tx_ring->tx_recycle(tx_ring); 482 } 483 484 mutex_enter(&tx_ring->tx_lock); 485 /* 486 * If the number of free tx descriptors is not enough for transmit 487 * then return mp. 488 * 489 * Note: we must put this check under the mutex protection to 490 * ensure the correctness when multiple threads access it in 491 * parallel. 492 */ 493 if (tx_ring->tbd_free < (desc_total + 1)) { 494 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 495 mutex_exit(&tx_ring->tx_lock); 496 goto tx_failure; 497 } 498 499 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 500 mbsize); 501 502 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 503 504 tx_ring->stat_obytes += mbsize; 505 tx_ring->stat_opackets ++; 506 507 mutex_exit(&tx_ring->tx_lock); 508 509 /* 510 * now that the transmission succeeds, need to free the original 511 * mp if we used the pulling up mblk for transmission. 512 */ 513 if (pull_mp) { 514 freemsg(mp); 515 } 516 517 return (NULL); 518 519 tx_failure: 520 /* 521 * If transmission fails, need to free the pulling up mblk. 522 */ 523 if (pull_mp) { 524 freemsg(pull_mp); 525 } 526 527 /* 528 * If new mblk has been allocted for the last header 529 * fragment of a LSO packet, we should restore the 530 * modified mp. 531 */ 532 if (hdr_new_mp) { 533 hdr_new_mp->b_cont = NULL; 534 freeb(hdr_new_mp); 535 hdr_nmp->b_rptr -= hdr_frag_len; 536 if (hdr_pre_mp) 537 hdr_pre_mp->b_cont = hdr_nmp; 538 else 539 mp = hdr_nmp; 540 } 541 /* 542 * Discard the mblk and free the used resources 543 */ 544 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 545 while (tcb) { 546 tcb->mp = NULL; 547 548 ixgbe_free_tcb(tcb); 549 550 tcb = (tx_control_block_t *) 551 LIST_GET_NEXT(&pending_list, &tcb->link); 552 } 553 554 /* 555 * Return the tx control blocks in the pending list to the free list. 556 */ 557 ixgbe_put_free_list(tx_ring, &pending_list); 558 559 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 560 tx_ring->reschedule = B_TRUE; 561 562 return (mp); 563 } 564 565 /* 566 * ixgbe_tx_copy 567 * 568 * Copy the mblk fragment to the pre-allocated tx buffer 569 */ 570 static int 571 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 572 uint32_t len, boolean_t copy_done) 573 { 574 dma_buffer_t *tx_buf; 575 uint32_t desc_num; 576 _NOTE(ARGUNUSED(tx_ring)); 577 578 tx_buf = &tcb->tx_buf; 579 580 /* 581 * Copy the packet data of the mblk fragment into the 582 * pre-allocated tx buffer, which is maintained by the 583 * tx control block. 584 * 585 * Several mblk fragments can be copied into one tx buffer. 586 * The destination address of the current copied fragment in 587 * the tx buffer is next to the end of the previous copied 588 * fragment. 589 */ 590 if (len > 0) { 591 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 592 593 tx_buf->len += len; 594 tcb->frag_num++; 595 } 596 597 desc_num = 0; 598 599 /* 600 * If it is the last fragment copied to the current tx buffer, 601 * in other words, if there's no remaining fragment or the remaining 602 * fragment requires a new tx control block to process, we need to 603 * complete the current copy processing by syncing up the current 604 * DMA buffer and saving the descriptor data. 605 */ 606 if (copy_done) { 607 /* 608 * Sync the DMA buffer of the packet data 609 */ 610 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 611 612 tcb->tx_type = USE_COPY; 613 614 /* 615 * Save the address and length to the private data structure 616 * of the tx control block, which will be used to fill the 617 * tx descriptor ring after all the fragments are processed. 618 */ 619 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 620 desc_num++; 621 } 622 623 return (desc_num); 624 } 625 626 /* 627 * ixgbe_tx_bind 628 * 629 * Bind the mblk fragment with DMA 630 */ 631 static int 632 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 633 uint32_t len) 634 { 635 int status, i; 636 ddi_dma_cookie_t dma_cookie; 637 uint_t ncookies; 638 int desc_num; 639 640 /* 641 * Use DMA binding to process the mblk fragment 642 */ 643 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 644 (caddr_t)mp->b_rptr, len, 645 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 646 0, &dma_cookie, &ncookies); 647 648 if (status != DDI_DMA_MAPPED) { 649 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 650 return (-1); 651 } 652 653 tcb->frag_num++; 654 tcb->tx_type = USE_DMA; 655 /* 656 * Each fragment can span several cookies. One cookie will have 657 * one tx descriptor to transmit. 658 */ 659 desc_num = 0; 660 for (i = ncookies; i > 0; i--) { 661 /* 662 * Save the address and length to the private data structure 663 * of the tx control block, which will be used to fill the 664 * tx descriptor ring after all the fragments are processed. 665 */ 666 ixgbe_save_desc(tcb, 667 dma_cookie.dmac_laddress, 668 dma_cookie.dmac_size); 669 670 desc_num++; 671 672 if (i > 1) 673 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 674 } 675 676 return (desc_num); 677 } 678 679 /* 680 * ixgbe_get_context 681 * 682 * Get the context information from the mblk 683 */ 684 static int 685 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 686 { 687 uint32_t start; 688 uint32_t hckflags; 689 uint32_t lsoflags; 690 uint32_t mss; 691 uint32_t len; 692 uint32_t size; 693 uint32_t offset; 694 unsigned char *pos; 695 ushort_t etype; 696 uint32_t mac_hdr_len; 697 uint32_t l4_proto; 698 uint32_t l4_hdr_len; 699 700 ASSERT(mp != NULL); 701 702 mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); 703 bzero(ctx, sizeof (ixgbe_tx_context_t)); 704 705 if (hckflags == 0) { 706 return (0); 707 } 708 709 ctx->hcksum_flags = hckflags; 710 711 mac_lso_get(mp, &mss, &lsoflags); 712 ctx->mss = mss; 713 ctx->lso_flag = (lsoflags == HW_LSO); 714 715 /* 716 * LSO relies on tx h/w checksum, so here will drop the package 717 * if h/w checksum flag is not declared. 718 */ 719 if (ctx->lso_flag) { 720 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 721 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 722 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 723 "checksum flags are not specified when doing LSO"); 724 return (-1); 725 } 726 } 727 728 etype = 0; 729 mac_hdr_len = 0; 730 l4_proto = 0; 731 732 /* 733 * Firstly get the position of the ether_type/ether_tpid. 734 * Here we don't assume the ether (VLAN) header is fully included 735 * in one mblk fragment, so we go thourgh the fragments to parse 736 * the ether type. 737 */ 738 size = len = MBLKL(mp); 739 offset = offsetof(struct ether_header, ether_type); 740 while (size <= offset) { 741 mp = mp->b_cont; 742 ASSERT(mp != NULL); 743 len = MBLKL(mp); 744 size += len; 745 } 746 pos = mp->b_rptr + offset + len - size; 747 748 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 749 if (etype == ETHERTYPE_VLAN) { 750 /* 751 * Get the position of the ether_type in VLAN header 752 */ 753 offset = offsetof(struct ether_vlan_header, ether_type); 754 while (size <= offset) { 755 mp = mp->b_cont; 756 ASSERT(mp != NULL); 757 len = MBLKL(mp); 758 size += len; 759 } 760 pos = mp->b_rptr + offset + len - size; 761 762 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 763 mac_hdr_len = sizeof (struct ether_vlan_header); 764 } else { 765 mac_hdr_len = sizeof (struct ether_header); 766 } 767 768 /* 769 * Here we don't assume the IP(V6) header is fully included in 770 * one mblk fragment. 771 */ 772 switch (etype) { 773 case ETHERTYPE_IP: 774 if (ctx->lso_flag) { 775 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 776 while (size <= offset) { 777 mp = mp->b_cont; 778 ASSERT(mp != NULL); 779 len = MBLKL(mp); 780 size += len; 781 } 782 pos = mp->b_rptr + offset + len - size; 783 *((uint16_t *)(uintptr_t)(pos)) = 0; 784 785 offset = offsetof(ipha_t, ipha_hdr_checksum) + 786 mac_hdr_len; 787 while (size <= offset) { 788 mp = mp->b_cont; 789 ASSERT(mp != NULL); 790 len = MBLKL(mp); 791 size += len; 792 } 793 pos = mp->b_rptr + offset + len - size; 794 *((uint16_t *)(uintptr_t)(pos)) = 0; 795 796 /* 797 * To perform ixgbe LSO, here also need to fill 798 * the tcp checksum field of the packet with the 799 * following pseudo-header checksum: 800 * (ip_source_addr, ip_destination_addr, l4_proto) 801 * Currently the tcp/ip stack has done it. 802 */ 803 } 804 805 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 806 while (size <= offset) { 807 mp = mp->b_cont; 808 ASSERT(mp != NULL); 809 len = MBLKL(mp); 810 size += len; 811 } 812 pos = mp->b_rptr + offset + len - size; 813 814 l4_proto = *(uint8_t *)pos; 815 break; 816 case ETHERTYPE_IPV6: 817 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 818 while (size <= offset) { 819 mp = mp->b_cont; 820 ASSERT(mp != NULL); 821 len = MBLKL(mp); 822 size += len; 823 } 824 pos = mp->b_rptr + offset + len - size; 825 826 l4_proto = *(uint8_t *)pos; 827 break; 828 default: 829 /* Unrecoverable error */ 830 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 831 return (-2); 832 } 833 834 if (ctx->lso_flag) { 835 offset = mac_hdr_len + start; 836 while (size <= offset) { 837 mp = mp->b_cont; 838 ASSERT(mp != NULL); 839 len = MBLKL(mp); 840 size += len; 841 } 842 pos = mp->b_rptr + offset + len - size; 843 844 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 845 } else { 846 /* 847 * l4 header length is only required for LSO 848 */ 849 l4_hdr_len = 0; 850 } 851 852 ctx->mac_hdr_len = mac_hdr_len; 853 ctx->ip_hdr_len = start; 854 ctx->l4_proto = l4_proto; 855 ctx->l4_hdr_len = l4_hdr_len; 856 857 return (0); 858 } 859 860 /* 861 * ixgbe_check_context 862 * 863 * Check if a new context descriptor is needed 864 */ 865 static boolean_t 866 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 867 { 868 ixgbe_tx_context_t *last; 869 870 if (ctx == NULL) 871 return (B_FALSE); 872 873 /* 874 * Compare the context data retrieved from the mblk and the 875 * stored data of the last context descriptor. The data need 876 * to be checked are: 877 * hcksum_flags 878 * l4_proto 879 * mac_hdr_len 880 * ip_hdr_len 881 * lso_flag 882 * mss (only checked for LSO) 883 * l4_hr_len (only checked for LSO) 884 * Either one of the above data is changed, a new context descriptor 885 * will be needed. 886 */ 887 last = &tx_ring->tx_context; 888 889 if ((ctx->hcksum_flags != last->hcksum_flags) || 890 (ctx->l4_proto != last->l4_proto) || 891 (ctx->mac_hdr_len != last->mac_hdr_len) || 892 (ctx->ip_hdr_len != last->ip_hdr_len) || 893 (ctx->lso_flag != last->lso_flag) || 894 (ctx->lso_flag && ((ctx->mss != last->mss) || 895 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 896 return (B_TRUE); 897 } 898 899 return (B_FALSE); 900 } 901 902 /* 903 * ixgbe_fill_context 904 * 905 * Fill the context descriptor with hardware checksum informations 906 */ 907 static void 908 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 909 ixgbe_tx_context_t *ctx) 910 { 911 /* 912 * Fill the context descriptor with the checksum 913 * context information we've got. 914 */ 915 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 916 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 917 IXGBE_ADVTXD_MACLEN_SHIFT; 918 919 ctx_tbd->type_tucmd_mlhl = 920 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 921 922 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 923 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 924 925 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 926 switch (ctx->l4_proto) { 927 case IPPROTO_TCP: 928 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 929 break; 930 case IPPROTO_UDP: 931 /* 932 * We don't have to explicitly set: 933 * ctx_tbd->type_tucmd_mlhl |= 934 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 935 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 936 */ 937 break; 938 default: 939 /* Unrecoverable error */ 940 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 941 break; 942 } 943 } 944 945 ctx_tbd->seqnum_seed = 0; 946 947 if (ctx->lso_flag) { 948 ctx_tbd->mss_l4len_idx = 949 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 950 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 951 } else { 952 ctx_tbd->mss_l4len_idx = 0; 953 } 954 } 955 956 /* 957 * ixgbe_tx_fill_ring 958 * 959 * Fill the tx descriptor ring with the data 960 */ 961 static int 962 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 963 ixgbe_tx_context_t *ctx, size_t mbsize) 964 { 965 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 966 boolean_t load_context; 967 uint32_t index, tcb_index, desc_num; 968 union ixgbe_adv_tx_desc *tbd, *first_tbd; 969 tx_control_block_t *tcb, *first_tcb; 970 uint32_t hcksum_flags; 971 int i; 972 973 ASSERT(mutex_owned(&tx_ring->tx_lock)); 974 975 tbd = NULL; 976 first_tbd = NULL; 977 first_tcb = NULL; 978 desc_num = 0; 979 hcksum_flags = 0; 980 load_context = B_FALSE; 981 982 /* 983 * Get the index of the first tx descriptor that will be filled, 984 * and the index of the first work list item that will be attached 985 * with the first used tx control block in the pending list. 986 * Note: the two indexes are the same. 987 */ 988 index = tx_ring->tbd_tail; 989 tcb_index = tx_ring->tbd_tail; 990 991 if (ctx != NULL) { 992 hcksum_flags = ctx->hcksum_flags; 993 994 /* 995 * Check if a new context descriptor is needed for this packet 996 */ 997 load_context = ixgbe_check_context(tx_ring, ctx); 998 999 if (load_context) { 1000 tbd = &tx_ring->tbd_ring[index]; 1001 1002 /* 1003 * Fill the context descriptor with the 1004 * hardware checksum offload informations. 1005 */ 1006 ixgbe_fill_context( 1007 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 1008 1009 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1010 desc_num++; 1011 1012 /* 1013 * Store the checksum context data if 1014 * a new context descriptor is added 1015 */ 1016 tx_ring->tx_context = *ctx; 1017 } 1018 } 1019 1020 first_tbd = &tx_ring->tbd_ring[index]; 1021 1022 /* 1023 * Fill tx data descriptors with the data saved in the pending list. 1024 * The tx control blocks in the pending list are added to the work list 1025 * at the same time. 1026 * 1027 * The work list is strictly 1:1 corresponding to the descriptor ring. 1028 * One item of the work list corresponds to one tx descriptor. Because 1029 * one tx control block can span multiple tx descriptors, the tx 1030 * control block will be added to the first work list item that 1031 * corresponds to the first tx descriptor generated from that tx 1032 * control block. 1033 */ 1034 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1035 first_tcb = tcb; 1036 while (tcb != NULL) { 1037 1038 for (i = 0; i < tcb->desc_num; i++) { 1039 tbd = &tx_ring->tbd_ring[index]; 1040 1041 tbd->read.buffer_addr = tcb->desc[i].address; 1042 tbd->read.cmd_type_len = tcb->desc[i].length; 1043 1044 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 1045 | IXGBE_ADVTXD_DTYP_DATA; 1046 1047 tbd->read.olinfo_status = 0; 1048 1049 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1050 desc_num++; 1051 } 1052 1053 /* 1054 * Add the tx control block to the work list 1055 */ 1056 ASSERT(tx_ring->work_list[tcb_index] == NULL); 1057 tx_ring->work_list[tcb_index] = tcb; 1058 1059 tcb_index = index; 1060 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1061 } 1062 1063 if (load_context) { 1064 /* 1065 * Count the context descriptor for 1066 * the first tx control block. 1067 */ 1068 first_tcb->desc_num++; 1069 } 1070 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 1071 1072 /* 1073 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 1074 * valid in the first descriptor of the packet. 1075 * Setting paylen in every first_tbd for all parts. 1076 * 82599 requires the packet length in paylen field with or without 1077 * LSO and 82598 will ignore it in non-LSO mode. 1078 */ 1079 ASSERT(first_tbd != NULL); 1080 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 1081 1082 switch (hw->mac.type) { 1083 case ixgbe_mac_82599EB: 1084 if (ctx != NULL && ctx->lso_flag) { 1085 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1086 first_tbd->read.olinfo_status |= 1087 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1088 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1089 } else { 1090 first_tbd->read.olinfo_status |= 1091 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1092 } 1093 break; 1094 case ixgbe_mac_82598EB: 1095 if (ctx != NULL && ctx->lso_flag) { 1096 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1097 first_tbd->read.olinfo_status |= 1098 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1099 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1100 } 1101 break; 1102 default: 1103 break; 1104 } 1105 1106 /* Set hardware checksum bits */ 1107 if (hcksum_flags != 0) { 1108 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1109 first_tbd->read.olinfo_status |= 1110 IXGBE_ADVTXD_POPTS_IXSM; 1111 if (hcksum_flags & HCK_PARTIALCKSUM) 1112 first_tbd->read.olinfo_status |= 1113 IXGBE_ADVTXD_POPTS_TXSM; 1114 } 1115 1116 /* 1117 * The last descriptor of packet needs End Of Packet (EOP), 1118 * and Report Status (RS) bits set 1119 */ 1120 ASSERT(tbd != NULL); 1121 tbd->read.cmd_type_len |= 1122 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1123 1124 /* 1125 * Sync the DMA buffer of the tx descriptor ring 1126 */ 1127 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1128 1129 /* 1130 * Update the number of the free tx descriptors. 1131 * The mutual exclusion between the transmission and the recycling 1132 * (for the tx descriptor ring and the work list) is implemented 1133 * with the atomic operation on the number of the free tx descriptors. 1134 * 1135 * Note: we should always decrement the counter tbd_free before 1136 * advancing the hardware TDT pointer to avoid the race condition - 1137 * before the counter tbd_free is decremented, the transmit of the 1138 * tx descriptors has done and the counter tbd_free is increased by 1139 * the tx recycling. 1140 */ 1141 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1142 ASSERT(i >= 0); 1143 1144 tx_ring->tbd_tail = index; 1145 1146 /* 1147 * Advance the hardware TDT pointer of the tx descriptor ring 1148 */ 1149 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1150 1151 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1152 DDI_FM_OK) { 1153 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1154 DDI_SERVICE_DEGRADED); 1155 atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); 1156 } 1157 1158 return (desc_num); 1159 } 1160 1161 /* 1162 * ixgbe_save_desc 1163 * 1164 * Save the address/length pair to the private array 1165 * of the tx control block. The address/length pairs 1166 * will be filled into the tx descriptor ring later. 1167 */ 1168 static void 1169 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1170 { 1171 sw_desc_t *desc; 1172 1173 desc = &tcb->desc[tcb->desc_num]; 1174 desc->address = address; 1175 desc->length = length; 1176 1177 tcb->desc_num++; 1178 } 1179 1180 /* 1181 * ixgbe_tx_recycle_legacy 1182 * 1183 * Recycle the tx descriptors and tx control blocks. 1184 * 1185 * The work list is traversed to check if the corresponding 1186 * tx descriptors have been transmitted. If so, the resources 1187 * bound to the tx control blocks will be freed, and those 1188 * tx control blocks will be returned to the free list. 1189 */ 1190 uint32_t 1191 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1192 { 1193 uint32_t index, last_index, prev_index; 1194 int desc_num; 1195 boolean_t desc_done; 1196 tx_control_block_t *tcb; 1197 link_list_t pending_list; 1198 ixgbe_t *ixgbe = tx_ring->ixgbe; 1199 1200 mutex_enter(&tx_ring->recycle_lock); 1201 1202 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1203 1204 if (tx_ring->tbd_free == tx_ring->ring_size) { 1205 tx_ring->recycle_fail = 0; 1206 tx_ring->stall_watchdog = 0; 1207 if (tx_ring->reschedule) { 1208 tx_ring->reschedule = B_FALSE; 1209 mac_tx_ring_update(ixgbe->mac_hdl, 1210 tx_ring->ring_handle); 1211 } 1212 mutex_exit(&tx_ring->recycle_lock); 1213 return (0); 1214 } 1215 1216 /* 1217 * Sync the DMA buffer of the tx descriptor ring 1218 */ 1219 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1220 1221 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1222 mutex_exit(&tx_ring->recycle_lock); 1223 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1224 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1225 return (0); 1226 } 1227 1228 LINK_LIST_INIT(&pending_list); 1229 desc_num = 0; 1230 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1231 1232 tcb = tx_ring->work_list[index]; 1233 ASSERT(tcb != NULL); 1234 1235 while (tcb != NULL) { 1236 /* 1237 * Get the last tx descriptor of this packet. 1238 * If the last tx descriptor is done, then 1239 * we can recycle all descriptors of a packet 1240 * which usually includes several tx control blocks. 1241 * For 82599, LSO descriptors can not be recycled 1242 * unless the whole packet's transmission is done. 1243 * That's why packet level recycling is used here. 1244 * For 82598, there's not such limit. 1245 */ 1246 last_index = tcb->last_index; 1247 /* 1248 * MAX_TX_RING_SIZE is used to judge whether 1249 * the index is a valid value or not. 1250 */ 1251 if (last_index == MAX_TX_RING_SIZE) 1252 break; 1253 1254 /* 1255 * Check if the Descriptor Done bit is set 1256 */ 1257 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1258 IXGBE_TXD_STAT_DD; 1259 if (desc_done) { 1260 /* 1261 * recycle all descriptors of the packet 1262 */ 1263 while (tcb != NULL) { 1264 /* 1265 * Strip off the tx control block from 1266 * the work list, and add it to the 1267 * pending list. 1268 */ 1269 tx_ring->work_list[index] = NULL; 1270 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1271 1272 /* 1273 * Count the total number of the tx 1274 * descriptors recycled 1275 */ 1276 desc_num += tcb->desc_num; 1277 1278 index = NEXT_INDEX(index, tcb->desc_num, 1279 tx_ring->ring_size); 1280 1281 tcb = tx_ring->work_list[index]; 1282 1283 prev_index = PREV_INDEX(index, 1, 1284 tx_ring->ring_size); 1285 if (prev_index == last_index) 1286 break; 1287 } 1288 } else { 1289 break; 1290 } 1291 } 1292 1293 /* 1294 * If no tx descriptors are recycled, no need to do more processing 1295 */ 1296 if (desc_num == 0) { 1297 tx_ring->recycle_fail++; 1298 mutex_exit(&tx_ring->recycle_lock); 1299 return (0); 1300 } 1301 1302 tx_ring->recycle_fail = 0; 1303 tx_ring->stall_watchdog = 0; 1304 1305 /* 1306 * Update the head index of the tx descriptor ring 1307 */ 1308 tx_ring->tbd_head = index; 1309 1310 /* 1311 * Update the number of the free tx descriptors with atomic operations 1312 */ 1313 atomic_add_32(&tx_ring->tbd_free, desc_num); 1314 1315 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1316 (tx_ring->reschedule)) { 1317 tx_ring->reschedule = B_FALSE; 1318 mac_tx_ring_update(ixgbe->mac_hdl, 1319 tx_ring->ring_handle); 1320 } 1321 mutex_exit(&tx_ring->recycle_lock); 1322 1323 /* 1324 * Free the resources used by the tx control blocks 1325 * in the pending list 1326 */ 1327 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1328 while (tcb != NULL) { 1329 /* 1330 * Release the resources occupied by the tx control block 1331 */ 1332 ixgbe_free_tcb(tcb); 1333 1334 tcb = (tx_control_block_t *) 1335 LIST_GET_NEXT(&pending_list, &tcb->link); 1336 } 1337 1338 /* 1339 * Add the tx control blocks in the pending list to the free list. 1340 */ 1341 ixgbe_put_free_list(tx_ring, &pending_list); 1342 1343 return (desc_num); 1344 } 1345 1346 /* 1347 * ixgbe_tx_recycle_head_wb 1348 * 1349 * Check the head write-back, and recycle all the transmitted 1350 * tx descriptors and tx control blocks. 1351 */ 1352 uint32_t 1353 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1354 { 1355 uint32_t index; 1356 uint32_t head_wb; 1357 int desc_num; 1358 tx_control_block_t *tcb; 1359 link_list_t pending_list; 1360 ixgbe_t *ixgbe = tx_ring->ixgbe; 1361 1362 mutex_enter(&tx_ring->recycle_lock); 1363 1364 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1365 1366 if (tx_ring->tbd_free == tx_ring->ring_size) { 1367 tx_ring->recycle_fail = 0; 1368 tx_ring->stall_watchdog = 0; 1369 if (tx_ring->reschedule) { 1370 tx_ring->reschedule = B_FALSE; 1371 mac_tx_ring_update(ixgbe->mac_hdl, 1372 tx_ring->ring_handle); 1373 } 1374 mutex_exit(&tx_ring->recycle_lock); 1375 return (0); 1376 } 1377 1378 /* 1379 * Sync the DMA buffer of the tx descriptor ring 1380 * 1381 * Note: For head write-back mode, the tx descriptors will not 1382 * be written back, but the head write-back value is stored at 1383 * the last extra tbd at the end of the DMA area, we still need 1384 * to sync the head write-back value for kernel. 1385 * 1386 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1387 */ 1388 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1389 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1390 sizeof (uint32_t), 1391 DDI_DMA_SYNC_FORKERNEL); 1392 1393 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1394 mutex_exit(&tx_ring->recycle_lock); 1395 ddi_fm_service_impact(ixgbe->dip, 1396 DDI_SERVICE_DEGRADED); 1397 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1398 return (0); 1399 } 1400 1401 LINK_LIST_INIT(&pending_list); 1402 desc_num = 0; 1403 index = tx_ring->tbd_head; /* Next index to clean */ 1404 1405 /* 1406 * Get the value of head write-back 1407 */ 1408 head_wb = *tx_ring->tbd_head_wb; 1409 while (index != head_wb) { 1410 tcb = tx_ring->work_list[index]; 1411 ASSERT(tcb != NULL); 1412 1413 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1414 tcb->desc_num) { 1415 /* 1416 * The current tx control block is not 1417 * completely transmitted, stop recycling 1418 */ 1419 break; 1420 } 1421 1422 /* 1423 * Strip off the tx control block from the work list, 1424 * and add it to the pending list. 1425 */ 1426 tx_ring->work_list[index] = NULL; 1427 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1428 1429 /* 1430 * Advance the index of the tx descriptor ring 1431 */ 1432 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1433 1434 /* 1435 * Count the total number of the tx descriptors recycled 1436 */ 1437 desc_num += tcb->desc_num; 1438 } 1439 1440 /* 1441 * If no tx descriptors are recycled, no need to do more processing 1442 */ 1443 if (desc_num == 0) { 1444 tx_ring->recycle_fail++; 1445 mutex_exit(&tx_ring->recycle_lock); 1446 return (0); 1447 } 1448 1449 tx_ring->recycle_fail = 0; 1450 tx_ring->stall_watchdog = 0; 1451 1452 /* 1453 * Update the head index of the tx descriptor ring 1454 */ 1455 tx_ring->tbd_head = index; 1456 1457 /* 1458 * Update the number of the free tx descriptors with atomic operations 1459 */ 1460 atomic_add_32(&tx_ring->tbd_free, desc_num); 1461 1462 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1463 (tx_ring->reschedule)) { 1464 tx_ring->reschedule = B_FALSE; 1465 mac_tx_ring_update(ixgbe->mac_hdl, 1466 tx_ring->ring_handle); 1467 } 1468 mutex_exit(&tx_ring->recycle_lock); 1469 1470 /* 1471 * Free the resources used by the tx control blocks 1472 * in the pending list 1473 */ 1474 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1475 while (tcb) { 1476 /* 1477 * Release the resources occupied by the tx control block 1478 */ 1479 ixgbe_free_tcb(tcb); 1480 1481 tcb = (tx_control_block_t *) 1482 LIST_GET_NEXT(&pending_list, &tcb->link); 1483 } 1484 1485 /* 1486 * Add the tx control blocks in the pending list to the free list. 1487 */ 1488 ixgbe_put_free_list(tx_ring, &pending_list); 1489 1490 return (desc_num); 1491 } 1492 1493 /* 1494 * ixgbe_free_tcb - free up the tx control block 1495 * 1496 * Free the resources of the tx control block, including 1497 * unbind the previously bound DMA handle, and reset other 1498 * control fields. 1499 */ 1500 void 1501 ixgbe_free_tcb(tx_control_block_t *tcb) 1502 { 1503 switch (tcb->tx_type) { 1504 case USE_COPY: 1505 /* 1506 * Reset the buffer length that is used for copy 1507 */ 1508 tcb->tx_buf.len = 0; 1509 break; 1510 case USE_DMA: 1511 /* 1512 * Release the DMA resource that is used for 1513 * DMA binding. 1514 */ 1515 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1516 break; 1517 default: 1518 break; 1519 } 1520 1521 /* 1522 * Free the mblk 1523 */ 1524 if (tcb->mp != NULL) { 1525 freemsg(tcb->mp); 1526 tcb->mp = NULL; 1527 } 1528 1529 tcb->tx_type = USE_NONE; 1530 tcb->last_index = MAX_TX_RING_SIZE; 1531 tcb->frag_num = 0; 1532 tcb->desc_num = 0; 1533 } 1534 1535 /* 1536 * ixgbe_get_free_list - Get a free tx control block from the free list 1537 * 1538 * The atomic operation on the number of the available tx control block 1539 * in the free list is used to keep this routine mutual exclusive with 1540 * the routine ixgbe_put_check_list. 1541 */ 1542 static tx_control_block_t * 1543 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 1544 { 1545 tx_control_block_t *tcb; 1546 1547 /* 1548 * Check and update the number of the free tx control block 1549 * in the free list. 1550 */ 1551 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1552 return (NULL); 1553 1554 mutex_enter(&tx_ring->tcb_head_lock); 1555 1556 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1557 ASSERT(tcb != NULL); 1558 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1559 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1560 tx_ring->free_list_size); 1561 1562 mutex_exit(&tx_ring->tcb_head_lock); 1563 1564 return (tcb); 1565 } 1566 1567 /* 1568 * ixgbe_put_free_list 1569 * 1570 * Put a list of used tx control blocks back to the free list 1571 * 1572 * A mutex is used here to ensure the serialization. The mutual exclusion 1573 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1574 * the atomic operation on the counter tcb_free. 1575 */ 1576 void 1577 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1578 { 1579 uint32_t index; 1580 int tcb_num; 1581 tx_control_block_t *tcb; 1582 1583 mutex_enter(&tx_ring->tcb_tail_lock); 1584 1585 index = tx_ring->tcb_tail; 1586 1587 tcb_num = 0; 1588 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1589 while (tcb != NULL) { 1590 ASSERT(tx_ring->free_list[index] == NULL); 1591 tx_ring->free_list[index] = tcb; 1592 1593 tcb_num++; 1594 1595 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1596 1597 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1598 } 1599 1600 tx_ring->tcb_tail = index; 1601 1602 /* 1603 * Update the number of the free tx control block 1604 * in the free list. This operation must be placed 1605 * under the protection of the lock. 1606 */ 1607 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1608 1609 mutex_exit(&tx_ring->tcb_tail_lock); 1610 } 1611