1 /* 2 * CDDL HEADER START 3 * 4 * Copyright(c) 2007-2009 Intel Corporation. All rights reserved. 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 25 * Use is subject to license terms. 26 */ 27 28 #include "ixgbe_sw.h" 29 30 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 31 uint32_t, boolean_t); 32 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 33 uint32_t); 34 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 35 ixgbe_tx_context_t *, size_t); 36 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 37 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 38 39 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 40 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 41 ixgbe_tx_context_t *); 42 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 43 ixgbe_tx_context_t *); 44 45 #ifndef IXGBE_DEBUG 46 #pragma inline(ixgbe_save_desc) 47 #pragma inline(ixgbe_get_context) 48 #pragma inline(ixgbe_check_context) 49 #pragma inline(ixgbe_fill_context) 50 #endif 51 52 /* 53 * ixgbe_ring_tx 54 * 55 * To transmit one mblk through one specified ring. 56 * 57 * One mblk can consist of several fragments, each fragment 58 * will be processed with different methods based on the size. 59 * For the fragments with size less than the bcopy threshold, 60 * they will be processed by using bcopy; otherwise, they will 61 * be processed by using DMA binding. 62 * 63 * To process the mblk, a tx control block is got from the 64 * free list. One tx control block contains one tx buffer, which 65 * is used to copy mblk fragments' data; and one tx DMA handle, 66 * which is used to bind a mblk fragment with DMA resource. 67 * 68 * Several small mblk fragments can be copied into one tx control 69 * block's buffer, and then the buffer will be transmitted with 70 * one tx descriptor. 71 * 72 * A large fragment only binds with one tx control block's DMA 73 * handle, and it can span several tx descriptors for transmitting. 74 * 75 * So to transmit a packet (mblk), several tx control blocks can 76 * be used. After the processing, those tx control blocks will 77 * be put to the work list. 78 */ 79 mblk_t * 80 ixgbe_ring_tx(void *arg, mblk_t *mp) 81 { 82 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 83 ixgbe_t *ixgbe = tx_ring->ixgbe; 84 tx_type_t current_flag, next_flag; 85 uint32_t current_len, next_len; 86 uint32_t desc_total; 87 size_t mbsize; 88 int desc_num; 89 boolean_t copy_done, eop; 90 mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; 91 tx_control_block_t *tcb; 92 ixgbe_tx_context_t tx_context, *ctx; 93 link_list_t pending_list; 94 uint32_t len, hdr_frag_len, hdr_len; 95 uint32_t copy_thresh; 96 mblk_t *hdr_new_mp = NULL; 97 mblk_t *hdr_pre_mp = NULL; 98 mblk_t *hdr_nmp = NULL; 99 100 ASSERT(mp->b_next == NULL); 101 102 copy_thresh = ixgbe->tx_copy_thresh; 103 104 /* Get the mblk size */ 105 mbsize = 0; 106 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 107 mbsize += MBLKL(nmp); 108 } 109 110 if (ixgbe->tx_hcksum_enable) { 111 /* 112 * Retrieve checksum context information from the mblk 113 * that will be used to decide whether/how to fill the 114 * context descriptor. 115 */ 116 ctx = &tx_context; 117 if (ixgbe_get_context(mp, ctx) < 0) { 118 freemsg(mp); 119 return (NULL); 120 } 121 122 /* 123 * If the mblk size exceeds the max size ixgbe could 124 * process, then discard this mblk, and return NULL. 125 */ 126 if ((ctx->lso_flag && 127 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 128 (!ctx->lso_flag && 129 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 130 freemsg(mp); 131 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 132 return (NULL); 133 } 134 } else { 135 ctx = NULL; 136 } 137 138 /* 139 * Check and recycle tx descriptors. 140 * The recycle threshold here should be selected carefully 141 */ 142 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 143 tx_ring->tx_recycle(tx_ring); 144 } 145 146 /* 147 * After the recycling, if the tbd_free is less than the 148 * overload_threshold, assert overload, return mp; 149 * and we need to re-schedule the tx again. 150 */ 151 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 152 tx_ring->reschedule = B_TRUE; 153 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 154 return (mp); 155 } 156 157 /* 158 * The pending_list is a linked list that is used to save 159 * the tx control blocks that have packet data processed 160 * but have not put the data to the tx descriptor ring. 161 * It is used to reduce the lock contention of the tx_lock. 162 */ 163 LINK_LIST_INIT(&pending_list); 164 desc_num = 0; 165 desc_total = 0; 166 167 /* 168 * The software should guarantee LSO packet header(MAC+IP+TCP) 169 * to be within one descriptor. Here we reallocate and refill the 170 * the header if it's physical memory non-contiguous. 171 */ 172 if ((ctx != NULL) && ctx->lso_flag) { 173 /* find the last fragment of the header */ 174 len = MBLKL(mp); 175 ASSERT(len > 0); 176 hdr_nmp = mp; 177 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 178 while (len < hdr_len) { 179 hdr_pre_mp = hdr_nmp; 180 hdr_nmp = hdr_nmp->b_cont; 181 len += MBLKL(hdr_nmp); 182 } 183 /* 184 * If the header and the payload are in different mblks, 185 * we simply force the header to be copied into pre-allocated 186 * page-aligned buffer. 187 */ 188 if (len == hdr_len) 189 goto adjust_threshold; 190 191 hdr_frag_len = hdr_len - (len - MBLKL(hdr_nmp)); 192 /* 193 * There are two cases we need to reallocate a mblk for the 194 * last header fragment: 195 * 1. the header is in multiple mblks and the last fragment 196 * share the same mblk with the payload 197 * 2. the header is in a single mblk shared with the payload 198 * and the header is physical memory non-contiguous 199 */ 200 if ((hdr_nmp != mp) || 201 (P2NPHASE((uintptr_t)hdr_nmp->b_rptr, ixgbe->sys_page_size) 202 < hdr_len)) { 203 IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 204 /* 205 * reallocate the mblk for the last header fragment, 206 * expect to bcopy into pre-allocated page-aligned 207 * buffer 208 */ 209 hdr_new_mp = allocb(hdr_frag_len, NULL); 210 if (!hdr_new_mp) 211 return (mp); 212 bcopy(hdr_nmp->b_rptr, hdr_new_mp->b_rptr, 213 hdr_frag_len); 214 /* link the new header fragment with the other parts */ 215 hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 216 hdr_new_mp->b_cont = hdr_nmp; 217 if (hdr_pre_mp) 218 hdr_pre_mp->b_cont = hdr_new_mp; 219 else 220 mp = hdr_new_mp; 221 hdr_nmp->b_rptr += hdr_frag_len; 222 } 223 adjust_threshold: 224 /* 225 * adjust the bcopy threshhold to guarantee 226 * the header to use bcopy way 227 */ 228 if (copy_thresh < hdr_len) 229 copy_thresh = hdr_len; 230 } 231 232 current_mp = mp; 233 current_len = MBLKL(current_mp); 234 /* 235 * Decide which method to use for the first fragment 236 */ 237 current_flag = (current_len <= copy_thresh) ? 238 USE_COPY : USE_DMA; 239 /* 240 * If the mblk includes several contiguous small fragments, 241 * they may be copied into one buffer. This flag is used to 242 * indicate whether there are pending fragments that need to 243 * be copied to the current tx buffer. 244 * 245 * If this flag is B_TRUE, it indicates that a new tx control 246 * block is needed to process the next fragment using either 247 * copy or DMA binding. 248 * 249 * Otherwise, it indicates that the next fragment will be 250 * copied to the current tx buffer that is maintained by the 251 * current tx control block. No new tx control block is needed. 252 */ 253 copy_done = B_TRUE; 254 while (current_mp) { 255 next_mp = current_mp->b_cont; 256 eop = (next_mp == NULL); /* Last fragment of the packet? */ 257 next_len = eop ? 0: MBLKL(next_mp); 258 259 /* 260 * When the current fragment is an empty fragment, if 261 * the next fragment will still be copied to the current 262 * tx buffer, we cannot skip this fragment here. Because 263 * the copy processing is pending for completion. We have 264 * to process this empty fragment in the tx_copy routine. 265 * 266 * If the copy processing is completed or a DMA binding 267 * processing is just completed, we can just skip this 268 * empty fragment. 269 */ 270 if ((current_len == 0) && (copy_done)) { 271 current_mp = next_mp; 272 current_len = next_len; 273 current_flag = (current_len <= copy_thresh) ? 274 USE_COPY : USE_DMA; 275 continue; 276 } 277 278 if (copy_done) { 279 /* 280 * Get a new tx control block from the free list 281 */ 282 tcb = ixgbe_get_free_list(tx_ring); 283 284 if (tcb == NULL) { 285 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 286 goto tx_failure; 287 } 288 289 /* 290 * Push the tx control block to the pending list 291 * to avoid using lock too early 292 */ 293 LIST_PUSH_TAIL(&pending_list, &tcb->link); 294 } 295 296 if (current_flag == USE_COPY) { 297 /* 298 * Check whether to use bcopy or DMA binding to process 299 * the next fragment, and if using bcopy, whether we 300 * need to continue copying the next fragment into the 301 * current tx buffer. 302 */ 303 ASSERT((tcb->tx_buf.len + current_len) <= 304 tcb->tx_buf.size); 305 306 if (eop) { 307 /* 308 * This is the last fragment of the packet, so 309 * the copy processing will be completed with 310 * this fragment. 311 */ 312 next_flag = USE_NONE; 313 copy_done = B_TRUE; 314 } else if ((tcb->tx_buf.len + current_len + next_len) > 315 tcb->tx_buf.size) { 316 /* 317 * If the next fragment is too large to be 318 * copied to the current tx buffer, we need 319 * to complete the current copy processing. 320 */ 321 next_flag = (next_len > copy_thresh) ? 322 USE_DMA: USE_COPY; 323 copy_done = B_TRUE; 324 } else if (next_len > copy_thresh) { 325 /* 326 * The next fragment needs to be processed with 327 * DMA binding. So the copy prcessing will be 328 * completed with the current fragment. 329 */ 330 next_flag = USE_DMA; 331 copy_done = B_TRUE; 332 } else { 333 /* 334 * Continue to copy the next fragment to the 335 * current tx buffer. 336 */ 337 next_flag = USE_COPY; 338 copy_done = B_FALSE; 339 } 340 341 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 342 current_len, copy_done); 343 } else { 344 /* 345 * Check whether to use bcopy or DMA binding to process 346 * the next fragment. 347 */ 348 next_flag = (next_len > copy_thresh) ? 349 USE_DMA: USE_COPY; 350 ASSERT(copy_done == B_TRUE); 351 352 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 353 current_len); 354 } 355 356 if (desc_num > 0) 357 desc_total += desc_num; 358 else if (desc_num < 0) 359 goto tx_failure; 360 361 current_mp = next_mp; 362 current_len = next_len; 363 current_flag = next_flag; 364 } 365 366 /* 367 * Attach the mblk to the last tx control block 368 */ 369 ASSERT(tcb); 370 ASSERT(tcb->mp == NULL); 371 tcb->mp = mp; 372 373 /* 374 * 82598/82599 chipset has a limitation that no more than 32 tx 375 * descriptors can be transmited out at one time. 376 * 377 * Here is a workaround for it: pull up the mblk then send it 378 * out with bind way. By doing so, no more than MAX_COOKIE (18) 379 * descriptors is needed. 380 */ 381 if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { 382 IXGBE_DEBUG_STAT(tx_ring->stat_break_tbd_limit); 383 384 /* 385 * Discard the mblk and free the used resources 386 */ 387 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 388 while (tcb) { 389 tcb->mp = NULL; 390 ixgbe_free_tcb(tcb); 391 tcb = (tx_control_block_t *) 392 LIST_GET_NEXT(&pending_list, &tcb->link); 393 } 394 395 /* 396 * Return the tx control blocks in the pending list to 397 * the free list. 398 */ 399 ixgbe_put_free_list(tx_ring, &pending_list); 400 401 /* 402 * pull up the mblk and send it out with bind way 403 */ 404 if ((pull_mp = msgpullup(mp, -1)) == NULL) { 405 tx_ring->reschedule = B_TRUE; 406 407 /* 408 * If new mblk has been allocted for the last header 409 * fragment of a LSO packet, we should restore the 410 * modified mp. 411 */ 412 if (hdr_new_mp) { 413 hdr_new_mp->b_cont = NULL; 414 freeb(hdr_new_mp); 415 hdr_nmp->b_rptr -= hdr_frag_len; 416 if (hdr_pre_mp) 417 hdr_pre_mp->b_cont = hdr_nmp; 418 else 419 mp = hdr_nmp; 420 } 421 return (mp); 422 } 423 424 LINK_LIST_INIT(&pending_list); 425 desc_total = 0; 426 427 /* 428 * if the packet is a LSO packet, we simply 429 * transmit the header in one descriptor using the copy way 430 */ 431 if ((ctx != NULL) && ctx->lso_flag) { 432 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + 433 ctx->l4_hdr_len; 434 435 tcb = ixgbe_get_free_list(tx_ring); 436 if (tcb == NULL) { 437 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 438 goto tx_failure; 439 } 440 desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, 441 hdr_len, B_TRUE); 442 LIST_PUSH_TAIL(&pending_list, &tcb->link); 443 desc_total += desc_num; 444 445 pull_mp->b_rptr += hdr_len; 446 } 447 448 tcb = ixgbe_get_free_list(tx_ring); 449 if (tcb == NULL) { 450 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 451 goto tx_failure; 452 } 453 if ((ctx != NULL) && ctx->lso_flag) { 454 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 455 mbsize - hdr_len); 456 } else { 457 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 458 mbsize); 459 } 460 if (desc_num < 0) { 461 goto tx_failure; 462 } 463 LIST_PUSH_TAIL(&pending_list, &tcb->link); 464 465 desc_total += desc_num; 466 tcb->mp = pull_mp; 467 } 468 469 /* 470 * Before fill the tx descriptor ring with the data, we need to 471 * ensure there are adequate free descriptors for transmit 472 * (including one context descriptor). 473 */ 474 if (tx_ring->tbd_free < (desc_total + 1)) { 475 tx_ring->tx_recycle(tx_ring); 476 } 477 478 mutex_enter(&tx_ring->tx_lock); 479 /* 480 * If the number of free tx descriptors is not enough for transmit 481 * then return mp. 482 * 483 * Note: we must put this check under the mutex protection to 484 * ensure the correctness when multiple threads access it in 485 * parallel. 486 */ 487 if (tx_ring->tbd_free < (desc_total + 1)) { 488 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 489 mutex_exit(&tx_ring->tx_lock); 490 goto tx_failure; 491 } 492 493 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 494 mbsize); 495 496 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 497 498 mutex_exit(&tx_ring->tx_lock); 499 500 /* 501 * now that the transmission succeeds, need to free the original 502 * mp if we used the pulling up mblk for transmission. 503 */ 504 if (pull_mp) { 505 freemsg(mp); 506 } 507 508 return (NULL); 509 510 tx_failure: 511 /* 512 * If transmission fails, need to free the pulling up mblk. 513 */ 514 if (pull_mp) { 515 freemsg(pull_mp); 516 } 517 518 /* 519 * If new mblk has been allocted for the last header 520 * fragment of a LSO packet, we should restore the 521 * modified mp. 522 */ 523 if (hdr_new_mp) { 524 hdr_new_mp->b_cont = NULL; 525 freeb(hdr_new_mp); 526 hdr_nmp->b_rptr -= hdr_frag_len; 527 if (hdr_pre_mp) 528 hdr_pre_mp->b_cont = hdr_nmp; 529 else 530 mp = hdr_nmp; 531 } 532 /* 533 * Discard the mblk and free the used resources 534 */ 535 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 536 while (tcb) { 537 tcb->mp = NULL; 538 539 ixgbe_free_tcb(tcb); 540 541 tcb = (tx_control_block_t *) 542 LIST_GET_NEXT(&pending_list, &tcb->link); 543 } 544 545 /* 546 * Return the tx control blocks in the pending list to the free list. 547 */ 548 ixgbe_put_free_list(tx_ring, &pending_list); 549 550 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 551 tx_ring->reschedule = B_TRUE; 552 553 return (mp); 554 } 555 556 /* 557 * ixgbe_tx_copy 558 * 559 * Copy the mblk fragment to the pre-allocated tx buffer 560 */ 561 static int 562 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 563 uint32_t len, boolean_t copy_done) 564 { 565 dma_buffer_t *tx_buf; 566 uint32_t desc_num; 567 _NOTE(ARGUNUSED(tx_ring)); 568 569 tx_buf = &tcb->tx_buf; 570 571 /* 572 * Copy the packet data of the mblk fragment into the 573 * pre-allocated tx buffer, which is maintained by the 574 * tx control block. 575 * 576 * Several mblk fragments can be copied into one tx buffer. 577 * The destination address of the current copied fragment in 578 * the tx buffer is next to the end of the previous copied 579 * fragment. 580 */ 581 if (len > 0) { 582 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 583 584 tx_buf->len += len; 585 tcb->frag_num++; 586 } 587 588 desc_num = 0; 589 590 /* 591 * If it is the last fragment copied to the current tx buffer, 592 * in other words, if there's no remaining fragment or the remaining 593 * fragment requires a new tx control block to process, we need to 594 * complete the current copy processing by syncing up the current 595 * DMA buffer and saving the descriptor data. 596 */ 597 if (copy_done) { 598 /* 599 * Sync the DMA buffer of the packet data 600 */ 601 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 602 603 tcb->tx_type = USE_COPY; 604 605 /* 606 * Save the address and length to the private data structure 607 * of the tx control block, which will be used to fill the 608 * tx descriptor ring after all the fragments are processed. 609 */ 610 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 611 desc_num++; 612 } 613 614 return (desc_num); 615 } 616 617 /* 618 * ixgbe_tx_bind 619 * 620 * Bind the mblk fragment with DMA 621 */ 622 static int 623 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 624 uint32_t len) 625 { 626 int status, i; 627 ddi_dma_cookie_t dma_cookie; 628 uint_t ncookies; 629 int desc_num; 630 631 /* 632 * Use DMA binding to process the mblk fragment 633 */ 634 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 635 (caddr_t)mp->b_rptr, len, 636 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 637 0, &dma_cookie, &ncookies); 638 639 if (status != DDI_DMA_MAPPED) { 640 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 641 return (-1); 642 } 643 644 tcb->frag_num++; 645 tcb->tx_type = USE_DMA; 646 /* 647 * Each fragment can span several cookies. One cookie will have 648 * one tx descriptor to transmit. 649 */ 650 desc_num = 0; 651 for (i = ncookies; i > 0; i--) { 652 /* 653 * Save the address and length to the private data structure 654 * of the tx control block, which will be used to fill the 655 * tx descriptor ring after all the fragments are processed. 656 */ 657 ixgbe_save_desc(tcb, 658 dma_cookie.dmac_laddress, 659 dma_cookie.dmac_size); 660 661 desc_num++; 662 663 if (i > 1) 664 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 665 } 666 667 return (desc_num); 668 } 669 670 /* 671 * ixgbe_get_context 672 * 673 * Get the context information from the mblk 674 */ 675 static int 676 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 677 { 678 uint32_t start; 679 uint32_t hckflags; 680 uint32_t lsoflags; 681 uint32_t mss; 682 uint32_t len; 683 uint32_t size; 684 uint32_t offset; 685 unsigned char *pos; 686 ushort_t etype; 687 uint32_t mac_hdr_len; 688 uint32_t l4_proto; 689 uint32_t l4_hdr_len; 690 691 ASSERT(mp != NULL); 692 693 hcksum_retrieve(mp, NULL, NULL, &start, NULL, NULL, NULL, &hckflags); 694 bzero(ctx, sizeof (ixgbe_tx_context_t)); 695 696 if (hckflags == 0) { 697 return (0); 698 } 699 700 ctx->hcksum_flags = hckflags; 701 702 lso_info_get(mp, &mss, &lsoflags); 703 ctx->mss = mss; 704 ctx->lso_flag = (lsoflags == HW_LSO); 705 706 /* 707 * LSO relies on tx h/w checksum, so here will drop the package 708 * if h/w checksum flag is not declared. 709 */ 710 if (ctx->lso_flag) { 711 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 712 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 713 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 714 "checksum flags are not specified when doing LSO"); 715 return (-1); 716 } 717 } 718 719 etype = 0; 720 mac_hdr_len = 0; 721 l4_proto = 0; 722 723 /* 724 * Firstly get the position of the ether_type/ether_tpid. 725 * Here we don't assume the ether (VLAN) header is fully included 726 * in one mblk fragment, so we go thourgh the fragments to parse 727 * the ether type. 728 */ 729 size = len = MBLKL(mp); 730 offset = offsetof(struct ether_header, ether_type); 731 while (size <= offset) { 732 mp = mp->b_cont; 733 ASSERT(mp != NULL); 734 len = MBLKL(mp); 735 size += len; 736 } 737 pos = mp->b_rptr + offset + len - size; 738 739 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 740 if (etype == ETHERTYPE_VLAN) { 741 /* 742 * Get the position of the ether_type in VLAN header 743 */ 744 offset = offsetof(struct ether_vlan_header, ether_type); 745 while (size <= offset) { 746 mp = mp->b_cont; 747 ASSERT(mp != NULL); 748 len = MBLKL(mp); 749 size += len; 750 } 751 pos = mp->b_rptr + offset + len - size; 752 753 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 754 mac_hdr_len = sizeof (struct ether_vlan_header); 755 } else { 756 mac_hdr_len = sizeof (struct ether_header); 757 } 758 759 /* 760 * Here we don't assume the IP(V6) header is fully included in 761 * one mblk fragment. 762 */ 763 switch (etype) { 764 case ETHERTYPE_IP: 765 if (ctx->lso_flag) { 766 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 767 while (size <= offset) { 768 mp = mp->b_cont; 769 ASSERT(mp != NULL); 770 len = MBLKL(mp); 771 size += len; 772 } 773 pos = mp->b_rptr + offset + len - size; 774 *((uint16_t *)(uintptr_t)(pos)) = 0; 775 776 offset = offsetof(ipha_t, ipha_hdr_checksum) + 777 mac_hdr_len; 778 while (size <= offset) { 779 mp = mp->b_cont; 780 ASSERT(mp != NULL); 781 len = MBLKL(mp); 782 size += len; 783 } 784 pos = mp->b_rptr + offset + len - size; 785 *((uint16_t *)(uintptr_t)(pos)) = 0; 786 787 /* 788 * To perform ixgbe LSO, here also need to fill 789 * the tcp checksum field of the packet with the 790 * following pseudo-header checksum: 791 * (ip_source_addr, ip_destination_addr, l4_proto) 792 * Currently the tcp/ip stack has done it. 793 */ 794 } 795 796 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 797 while (size <= offset) { 798 mp = mp->b_cont; 799 ASSERT(mp != NULL); 800 len = MBLKL(mp); 801 size += len; 802 } 803 pos = mp->b_rptr + offset + len - size; 804 805 l4_proto = *(uint8_t *)pos; 806 break; 807 case ETHERTYPE_IPV6: 808 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 809 while (size <= offset) { 810 mp = mp->b_cont; 811 ASSERT(mp != NULL); 812 len = MBLKL(mp); 813 size += len; 814 } 815 pos = mp->b_rptr + offset + len - size; 816 817 l4_proto = *(uint8_t *)pos; 818 break; 819 default: 820 /* Unrecoverable error */ 821 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 822 return (-2); 823 } 824 825 if (ctx->lso_flag) { 826 offset = mac_hdr_len + start; 827 while (size <= offset) { 828 mp = mp->b_cont; 829 ASSERT(mp != NULL); 830 len = MBLKL(mp); 831 size += len; 832 } 833 pos = mp->b_rptr + offset + len - size; 834 835 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 836 } else { 837 /* 838 * l4 header length is only required for LSO 839 */ 840 l4_hdr_len = 0; 841 } 842 843 ctx->mac_hdr_len = mac_hdr_len; 844 ctx->ip_hdr_len = start; 845 ctx->l4_proto = l4_proto; 846 ctx->l4_hdr_len = l4_hdr_len; 847 848 return (0); 849 } 850 851 /* 852 * ixgbe_check_context 853 * 854 * Check if a new context descriptor is needed 855 */ 856 static boolean_t 857 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 858 { 859 ixgbe_tx_context_t *last; 860 861 if (ctx == NULL) 862 return (B_FALSE); 863 864 /* 865 * Compare the context data retrieved from the mblk and the 866 * stored data of the last context descriptor. The data need 867 * to be checked are: 868 * hcksum_flags 869 * l4_proto 870 * mac_hdr_len 871 * ip_hdr_len 872 * lso_flag 873 * mss (only checked for LSO) 874 * l4_hr_len (only checked for LSO) 875 * Either one of the above data is changed, a new context descriptor 876 * will be needed. 877 */ 878 last = &tx_ring->tx_context; 879 880 if ((ctx->hcksum_flags != last->hcksum_flags) || 881 (ctx->l4_proto != last->l4_proto) || 882 (ctx->mac_hdr_len != last->mac_hdr_len) || 883 (ctx->ip_hdr_len != last->ip_hdr_len) || 884 (ctx->lso_flag != last->lso_flag) || 885 (ctx->lso_flag && ((ctx->mss != last->mss) || 886 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 887 return (B_TRUE); 888 } 889 890 return (B_FALSE); 891 } 892 893 /* 894 * ixgbe_fill_context 895 * 896 * Fill the context descriptor with hardware checksum informations 897 */ 898 static void 899 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 900 ixgbe_tx_context_t *ctx) 901 { 902 /* 903 * Fill the context descriptor with the checksum 904 * context information we've got. 905 */ 906 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 907 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 908 IXGBE_ADVTXD_MACLEN_SHIFT; 909 910 ctx_tbd->type_tucmd_mlhl = 911 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 912 913 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 914 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 915 916 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 917 switch (ctx->l4_proto) { 918 case IPPROTO_TCP: 919 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 920 break; 921 case IPPROTO_UDP: 922 /* 923 * We don't have to explicitly set: 924 * ctx_tbd->type_tucmd_mlhl |= 925 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 926 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 927 */ 928 break; 929 default: 930 /* Unrecoverable error */ 931 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 932 break; 933 } 934 } 935 936 ctx_tbd->seqnum_seed = 0; 937 938 if (ctx->lso_flag) { 939 ctx_tbd->mss_l4len_idx = 940 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 941 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 942 } else { 943 ctx_tbd->mss_l4len_idx = 0; 944 } 945 } 946 947 /* 948 * ixgbe_tx_fill_ring 949 * 950 * Fill the tx descriptor ring with the data 951 */ 952 static int 953 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 954 ixgbe_tx_context_t *ctx, size_t mbsize) 955 { 956 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 957 boolean_t load_context; 958 uint32_t index, tcb_index, desc_num; 959 union ixgbe_adv_tx_desc *tbd, *first_tbd; 960 tx_control_block_t *tcb, *first_tcb; 961 uint32_t hcksum_flags; 962 int i; 963 964 ASSERT(mutex_owned(&tx_ring->tx_lock)); 965 966 tbd = NULL; 967 first_tbd = NULL; 968 first_tcb = NULL; 969 desc_num = 0; 970 hcksum_flags = 0; 971 load_context = B_FALSE; 972 973 /* 974 * Get the index of the first tx descriptor that will be filled, 975 * and the index of the first work list item that will be attached 976 * with the first used tx control block in the pending list. 977 * Note: the two indexes are the same. 978 */ 979 index = tx_ring->tbd_tail; 980 tcb_index = tx_ring->tbd_tail; 981 982 if (ctx != NULL) { 983 hcksum_flags = ctx->hcksum_flags; 984 985 /* 986 * Check if a new context descriptor is needed for this packet 987 */ 988 load_context = ixgbe_check_context(tx_ring, ctx); 989 990 if (load_context) { 991 tbd = &tx_ring->tbd_ring[index]; 992 993 /* 994 * Fill the context descriptor with the 995 * hardware checksum offload informations. 996 */ 997 ixgbe_fill_context( 998 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 999 1000 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1001 desc_num++; 1002 1003 /* 1004 * Store the checksum context data if 1005 * a new context descriptor is added 1006 */ 1007 tx_ring->tx_context = *ctx; 1008 } 1009 } 1010 1011 first_tbd = &tx_ring->tbd_ring[index]; 1012 1013 /* 1014 * Fill tx data descriptors with the data saved in the pending list. 1015 * The tx control blocks in the pending list are added to the work list 1016 * at the same time. 1017 * 1018 * The work list is strictly 1:1 corresponding to the descriptor ring. 1019 * One item of the work list corresponds to one tx descriptor. Because 1020 * one tx control block can span multiple tx descriptors, the tx 1021 * control block will be added to the first work list item that 1022 * corresponds to the first tx descriptor generated from that tx 1023 * control block. 1024 */ 1025 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1026 first_tcb = tcb; 1027 while (tcb != NULL) { 1028 1029 for (i = 0; i < tcb->desc_num; i++) { 1030 tbd = &tx_ring->tbd_ring[index]; 1031 1032 tbd->read.buffer_addr = tcb->desc[i].address; 1033 tbd->read.cmd_type_len = tcb->desc[i].length; 1034 1035 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 1036 | IXGBE_ADVTXD_DTYP_DATA; 1037 1038 tbd->read.olinfo_status = 0; 1039 1040 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1041 desc_num++; 1042 } 1043 1044 /* 1045 * Add the tx control block to the work list 1046 */ 1047 ASSERT(tx_ring->work_list[tcb_index] == NULL); 1048 tx_ring->work_list[tcb_index] = tcb; 1049 1050 tcb_index = index; 1051 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1052 } 1053 1054 if (load_context) { 1055 /* 1056 * Count the context descriptor for 1057 * the first tx control block. 1058 */ 1059 first_tcb->desc_num++; 1060 } 1061 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 1062 1063 /* 1064 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 1065 * valid in the first descriptor of the packet. 1066 * Setting paylen in every first_tbd for all parts. 1067 * 82599 requires the packet length in paylen field with or without 1068 * LSO and 82598 will ignore it in non-LSO mode. 1069 */ 1070 ASSERT(first_tbd != NULL); 1071 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 1072 1073 switch (hw->mac.type) { 1074 case ixgbe_mac_82599EB: 1075 if (ctx != NULL && ctx->lso_flag) { 1076 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1077 first_tbd->read.olinfo_status |= 1078 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1079 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1080 } else { 1081 first_tbd->read.olinfo_status |= 1082 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1083 } 1084 break; 1085 case ixgbe_mac_82598EB: 1086 if (ctx != NULL && ctx->lso_flag) { 1087 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1088 first_tbd->read.olinfo_status |= 1089 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1090 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1091 } 1092 break; 1093 default: 1094 break; 1095 } 1096 1097 /* Set hardware checksum bits */ 1098 if (hcksum_flags != 0) { 1099 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1100 first_tbd->read.olinfo_status |= 1101 IXGBE_ADVTXD_POPTS_IXSM; 1102 if (hcksum_flags & HCK_PARTIALCKSUM) 1103 first_tbd->read.olinfo_status |= 1104 IXGBE_ADVTXD_POPTS_TXSM; 1105 } 1106 1107 /* 1108 * The last descriptor of packet needs End Of Packet (EOP), 1109 * and Report Status (RS) bits set 1110 */ 1111 ASSERT(tbd != NULL); 1112 tbd->read.cmd_type_len |= 1113 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1114 1115 /* 1116 * Sync the DMA buffer of the tx descriptor ring 1117 */ 1118 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1119 1120 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1121 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1122 DDI_SERVICE_DEGRADED); 1123 } 1124 1125 /* 1126 * Update the number of the free tx descriptors. 1127 * The mutual exclusion between the transmission and the recycling 1128 * (for the tx descriptor ring and the work list) is implemented 1129 * with the atomic operation on the number of the free tx descriptors. 1130 * 1131 * Note: we should always decrement the counter tbd_free before 1132 * advancing the hardware TDT pointer to avoid the race condition - 1133 * before the counter tbd_free is decremented, the transmit of the 1134 * tx descriptors has done and the counter tbd_free is increased by 1135 * the tx recycling. 1136 */ 1137 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1138 ASSERT(i >= 0); 1139 1140 tx_ring->tbd_tail = index; 1141 1142 /* 1143 * Advance the hardware TDT pointer of the tx descriptor ring 1144 */ 1145 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1146 1147 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1148 DDI_FM_OK) { 1149 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1150 DDI_SERVICE_DEGRADED); 1151 } 1152 1153 return (desc_num); 1154 } 1155 1156 /* 1157 * ixgbe_save_desc 1158 * 1159 * Save the address/length pair to the private array 1160 * of the tx control block. The address/length pairs 1161 * will be filled into the tx descriptor ring later. 1162 */ 1163 static void 1164 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1165 { 1166 sw_desc_t *desc; 1167 1168 desc = &tcb->desc[tcb->desc_num]; 1169 desc->address = address; 1170 desc->length = length; 1171 1172 tcb->desc_num++; 1173 } 1174 1175 /* 1176 * ixgbe_tx_recycle_legacy 1177 * 1178 * Recycle the tx descriptors and tx control blocks. 1179 * 1180 * The work list is traversed to check if the corresponding 1181 * tx descriptors have been transmitted. If so, the resources 1182 * bound to the tx control blocks will be freed, and those 1183 * tx control blocks will be returned to the free list. 1184 */ 1185 uint32_t 1186 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1187 { 1188 uint32_t index, last_index, prev_index; 1189 int desc_num; 1190 boolean_t desc_done; 1191 tx_control_block_t *tcb; 1192 link_list_t pending_list; 1193 ixgbe_t *ixgbe = tx_ring->ixgbe; 1194 1195 mutex_enter(&tx_ring->recycle_lock); 1196 1197 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1198 1199 if (tx_ring->tbd_free == tx_ring->ring_size) { 1200 tx_ring->recycle_fail = 0; 1201 tx_ring->stall_watchdog = 0; 1202 if (tx_ring->reschedule) { 1203 tx_ring->reschedule = B_FALSE; 1204 mac_tx_ring_update(ixgbe->mac_hdl, 1205 tx_ring->ring_handle); 1206 } 1207 mutex_exit(&tx_ring->recycle_lock); 1208 return (0); 1209 } 1210 1211 /* 1212 * Sync the DMA buffer of the tx descriptor ring 1213 */ 1214 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1215 1216 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1217 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1218 } 1219 1220 LINK_LIST_INIT(&pending_list); 1221 desc_num = 0; 1222 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1223 1224 tcb = tx_ring->work_list[index]; 1225 ASSERT(tcb != NULL); 1226 1227 while (tcb != NULL) { 1228 /* 1229 * Get the last tx descriptor of this packet. 1230 * If the last tx descriptor is done, then 1231 * we can recycle all descriptors of a packet 1232 * which usually includes several tx control blocks. 1233 * For 82599, LSO descriptors can not be recycled 1234 * unless the whole packet's transmission is done. 1235 * That's why packet level recycling is used here. 1236 * For 82598, there's not such limit. 1237 */ 1238 last_index = tcb->last_index; 1239 /* 1240 * MAX_TX_RING_SIZE is used to judge whether 1241 * the index is a valid value or not. 1242 */ 1243 if (last_index == MAX_TX_RING_SIZE) 1244 break; 1245 1246 /* 1247 * Check if the Descriptor Done bit is set 1248 */ 1249 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1250 IXGBE_TXD_STAT_DD; 1251 if (desc_done) { 1252 /* 1253 * recycle all descriptors of the packet 1254 */ 1255 while (tcb != NULL) { 1256 /* 1257 * Strip off the tx control block from 1258 * the work list, and add it to the 1259 * pending list. 1260 */ 1261 tx_ring->work_list[index] = NULL; 1262 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1263 1264 /* 1265 * Count the total number of the tx 1266 * descriptors recycled 1267 */ 1268 desc_num += tcb->desc_num; 1269 1270 index = NEXT_INDEX(index, tcb->desc_num, 1271 tx_ring->ring_size); 1272 1273 tcb = tx_ring->work_list[index]; 1274 1275 prev_index = PREV_INDEX(index, 1, 1276 tx_ring->ring_size); 1277 if (prev_index == last_index) 1278 break; 1279 } 1280 } else { 1281 break; 1282 } 1283 } 1284 1285 /* 1286 * If no tx descriptors are recycled, no need to do more processing 1287 */ 1288 if (desc_num == 0) { 1289 tx_ring->recycle_fail++; 1290 mutex_exit(&tx_ring->recycle_lock); 1291 return (0); 1292 } 1293 1294 tx_ring->recycle_fail = 0; 1295 tx_ring->stall_watchdog = 0; 1296 1297 /* 1298 * Update the head index of the tx descriptor ring 1299 */ 1300 tx_ring->tbd_head = index; 1301 1302 /* 1303 * Update the number of the free tx descriptors with atomic operations 1304 */ 1305 atomic_add_32(&tx_ring->tbd_free, desc_num); 1306 1307 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1308 (tx_ring->reschedule)) { 1309 tx_ring->reschedule = B_FALSE; 1310 mac_tx_ring_update(ixgbe->mac_hdl, 1311 tx_ring->ring_handle); 1312 } 1313 mutex_exit(&tx_ring->recycle_lock); 1314 1315 /* 1316 * Free the resources used by the tx control blocks 1317 * in the pending list 1318 */ 1319 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1320 while (tcb != NULL) { 1321 /* 1322 * Release the resources occupied by the tx control block 1323 */ 1324 ixgbe_free_tcb(tcb); 1325 1326 tcb = (tx_control_block_t *) 1327 LIST_GET_NEXT(&pending_list, &tcb->link); 1328 } 1329 1330 /* 1331 * Add the tx control blocks in the pending list to the free list. 1332 */ 1333 ixgbe_put_free_list(tx_ring, &pending_list); 1334 1335 return (desc_num); 1336 } 1337 1338 /* 1339 * ixgbe_tx_recycle_head_wb 1340 * 1341 * Check the head write-back, and recycle all the transmitted 1342 * tx descriptors and tx control blocks. 1343 */ 1344 uint32_t 1345 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1346 { 1347 uint32_t index; 1348 uint32_t head_wb; 1349 int desc_num; 1350 tx_control_block_t *tcb; 1351 link_list_t pending_list; 1352 ixgbe_t *ixgbe = tx_ring->ixgbe; 1353 1354 mutex_enter(&tx_ring->recycle_lock); 1355 1356 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1357 1358 if (tx_ring->tbd_free == tx_ring->ring_size) { 1359 tx_ring->recycle_fail = 0; 1360 tx_ring->stall_watchdog = 0; 1361 if (tx_ring->reschedule) { 1362 tx_ring->reschedule = B_FALSE; 1363 mac_tx_ring_update(ixgbe->mac_hdl, 1364 tx_ring->ring_handle); 1365 } 1366 mutex_exit(&tx_ring->recycle_lock); 1367 return (0); 1368 } 1369 1370 /* 1371 * Sync the DMA buffer of the tx descriptor ring 1372 * 1373 * Note: For head write-back mode, the tx descriptors will not 1374 * be written back, but the head write-back value is stored at 1375 * the last extra tbd at the end of the DMA area, we still need 1376 * to sync the head write-back value for kernel. 1377 * 1378 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1379 */ 1380 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1381 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1382 sizeof (uint32_t), 1383 DDI_DMA_SYNC_FORKERNEL); 1384 1385 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1386 ddi_fm_service_impact(ixgbe->dip, 1387 DDI_SERVICE_DEGRADED); 1388 } 1389 1390 LINK_LIST_INIT(&pending_list); 1391 desc_num = 0; 1392 index = tx_ring->tbd_head; /* Next index to clean */ 1393 1394 /* 1395 * Get the value of head write-back 1396 */ 1397 head_wb = *tx_ring->tbd_head_wb; 1398 while (index != head_wb) { 1399 tcb = tx_ring->work_list[index]; 1400 ASSERT(tcb != NULL); 1401 1402 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1403 tcb->desc_num) { 1404 /* 1405 * The current tx control block is not 1406 * completely transmitted, stop recycling 1407 */ 1408 break; 1409 } 1410 1411 /* 1412 * Strip off the tx control block from the work list, 1413 * and add it to the pending list. 1414 */ 1415 tx_ring->work_list[index] = NULL; 1416 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1417 1418 /* 1419 * Advance the index of the tx descriptor ring 1420 */ 1421 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1422 1423 /* 1424 * Count the total number of the tx descriptors recycled 1425 */ 1426 desc_num += tcb->desc_num; 1427 } 1428 1429 /* 1430 * If no tx descriptors are recycled, no need to do more processing 1431 */ 1432 if (desc_num == 0) { 1433 tx_ring->recycle_fail++; 1434 mutex_exit(&tx_ring->recycle_lock); 1435 return (0); 1436 } 1437 1438 tx_ring->recycle_fail = 0; 1439 tx_ring->stall_watchdog = 0; 1440 1441 /* 1442 * Update the head index of the tx descriptor ring 1443 */ 1444 tx_ring->tbd_head = index; 1445 1446 /* 1447 * Update the number of the free tx descriptors with atomic operations 1448 */ 1449 atomic_add_32(&tx_ring->tbd_free, desc_num); 1450 1451 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1452 (tx_ring->reschedule)) { 1453 tx_ring->reschedule = B_FALSE; 1454 mac_tx_ring_update(ixgbe->mac_hdl, 1455 tx_ring->ring_handle); 1456 } 1457 mutex_exit(&tx_ring->recycle_lock); 1458 1459 /* 1460 * Free the resources used by the tx control blocks 1461 * in the pending list 1462 */ 1463 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1464 while (tcb) { 1465 /* 1466 * Release the resources occupied by the tx control block 1467 */ 1468 ixgbe_free_tcb(tcb); 1469 1470 tcb = (tx_control_block_t *) 1471 LIST_GET_NEXT(&pending_list, &tcb->link); 1472 } 1473 1474 /* 1475 * Add the tx control blocks in the pending list to the free list. 1476 */ 1477 ixgbe_put_free_list(tx_ring, &pending_list); 1478 1479 return (desc_num); 1480 } 1481 1482 /* 1483 * ixgbe_free_tcb - free up the tx control block 1484 * 1485 * Free the resources of the tx control block, including 1486 * unbind the previously bound DMA handle, and reset other 1487 * control fields. 1488 */ 1489 void 1490 ixgbe_free_tcb(tx_control_block_t *tcb) 1491 { 1492 switch (tcb->tx_type) { 1493 case USE_COPY: 1494 /* 1495 * Reset the buffer length that is used for copy 1496 */ 1497 tcb->tx_buf.len = 0; 1498 break; 1499 case USE_DMA: 1500 /* 1501 * Release the DMA resource that is used for 1502 * DMA binding. 1503 */ 1504 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1505 break; 1506 default: 1507 break; 1508 } 1509 1510 /* 1511 * Free the mblk 1512 */ 1513 if (tcb->mp != NULL) { 1514 freemsg(tcb->mp); 1515 tcb->mp = NULL; 1516 } 1517 1518 tcb->tx_type = USE_NONE; 1519 tcb->last_index = MAX_TX_RING_SIZE; 1520 tcb->frag_num = 0; 1521 tcb->desc_num = 0; 1522 } 1523 1524 /* 1525 * ixgbe_get_free_list - Get a free tx control block from the free list 1526 * 1527 * The atomic operation on the number of the available tx control block 1528 * in the free list is used to keep this routine mutual exclusive with 1529 * the routine ixgbe_put_check_list. 1530 */ 1531 static tx_control_block_t * 1532 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 1533 { 1534 tx_control_block_t *tcb; 1535 1536 /* 1537 * Check and update the number of the free tx control block 1538 * in the free list. 1539 */ 1540 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1541 return (NULL); 1542 1543 mutex_enter(&tx_ring->tcb_head_lock); 1544 1545 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1546 ASSERT(tcb != NULL); 1547 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1548 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1549 tx_ring->free_list_size); 1550 1551 mutex_exit(&tx_ring->tcb_head_lock); 1552 1553 return (tcb); 1554 } 1555 1556 /* 1557 * ixgbe_put_free_list 1558 * 1559 * Put a list of used tx control blocks back to the free list 1560 * 1561 * A mutex is used here to ensure the serialization. The mutual exclusion 1562 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1563 * the atomic operation on the counter tcb_free. 1564 */ 1565 void 1566 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1567 { 1568 uint32_t index; 1569 int tcb_num; 1570 tx_control_block_t *tcb; 1571 1572 mutex_enter(&tx_ring->tcb_tail_lock); 1573 1574 index = tx_ring->tcb_tail; 1575 1576 tcb_num = 0; 1577 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1578 while (tcb != NULL) { 1579 ASSERT(tx_ring->free_list[index] == NULL); 1580 tx_ring->free_list[index] = tcb; 1581 1582 tcb_num++; 1583 1584 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1585 1586 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1587 } 1588 1589 tx_ring->tcb_tail = index; 1590 1591 /* 1592 * Update the number of the free tx control block 1593 * in the free list. This operation must be placed 1594 * under the protection of the lock. 1595 */ 1596 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1597 1598 mutex_exit(&tx_ring->tcb_tail_lock); 1599 } 1600