1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28 */ 29 30 #include "ixgbe_sw.h" 31 32 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 33 uint32_t, boolean_t); 34 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 35 uint32_t); 36 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 37 ixgbe_tx_context_t *, size_t); 38 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 39 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 40 41 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 42 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 43 ixgbe_tx_context_t *); 44 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 45 ixgbe_tx_context_t *); 46 47 #ifndef IXGBE_DEBUG 48 #pragma inline(ixgbe_save_desc) 49 #pragma inline(ixgbe_get_context) 50 #pragma inline(ixgbe_check_context) 51 #pragma inline(ixgbe_fill_context) 52 #endif 53 54 /* 55 * ixgbe_ring_tx 56 * 57 * To transmit one mblk through one specified ring. 58 * 59 * One mblk can consist of several fragments, each fragment 60 * will be processed with different methods based on the size. 61 * For the fragments with size less than the bcopy threshold, 62 * they will be processed by using bcopy; otherwise, they will 63 * be processed by using DMA binding. 64 * 65 * To process the mblk, a tx control block is got from the 66 * free list. One tx control block contains one tx buffer, which 67 * is used to copy mblk fragments' data; and one tx DMA handle, 68 * which is used to bind a mblk fragment with DMA resource. 69 * 70 * Several small mblk fragments can be copied into one tx control 71 * block's buffer, and then the buffer will be transmitted with 72 * one tx descriptor. 73 * 74 * A large fragment only binds with one tx control block's DMA 75 * handle, and it can span several tx descriptors for transmitting. 76 * 77 * So to transmit a packet (mblk), several tx control blocks can 78 * be used. After the processing, those tx control blocks will 79 * be put to the work list. 80 */ 81 mblk_t * 82 ixgbe_ring_tx(void *arg, mblk_t *mp) 83 { 84 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 85 ixgbe_t *ixgbe = tx_ring->ixgbe; 86 tx_type_t current_flag, next_flag; 87 uint32_t current_len, next_len; 88 uint32_t desc_total; 89 size_t mbsize; 90 int desc_num; 91 boolean_t copy_done, eop; 92 mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; 93 tx_control_block_t *tcb; 94 ixgbe_tx_context_t tx_context, *ctx; 95 link_list_t pending_list; 96 uint32_t len, hdr_frag_len, hdr_len; 97 uint32_t copy_thresh; 98 mblk_t *hdr_new_mp = NULL; 99 mblk_t *hdr_pre_mp = NULL; 100 mblk_t *hdr_nmp = NULL; 101 102 ASSERT(mp->b_next == NULL); 103 104 if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || 105 (ixgbe->ixgbe_state & IXGBE_ERROR) || 106 (ixgbe->ixgbe_state & IXGBE_OVERTEMP) || 107 !(ixgbe->ixgbe_state & IXGBE_STARTED)) { 108 return (mp); 109 } 110 111 copy_thresh = ixgbe->tx_copy_thresh; 112 113 /* Get the mblk size */ 114 mbsize = 0; 115 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 116 mbsize += MBLKL(nmp); 117 } 118 119 if (ixgbe->tx_hcksum_enable) { 120 /* 121 * Retrieve checksum context information from the mblk 122 * that will be used to decide whether/how to fill the 123 * context descriptor. 124 */ 125 ctx = &tx_context; 126 if (ixgbe_get_context(mp, ctx) < 0) { 127 freemsg(mp); 128 return (NULL); 129 } 130 131 /* 132 * If the mblk size exceeds the max size ixgbe could 133 * process, then discard this mblk, and return NULL. 134 */ 135 if ((ctx->lso_flag && 136 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 137 (!ctx->lso_flag && 138 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 139 freemsg(mp); 140 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 141 return (NULL); 142 } 143 } else { 144 ctx = NULL; 145 } 146 147 /* 148 * Check and recycle tx descriptors. 149 * The recycle threshold here should be selected carefully 150 */ 151 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 152 tx_ring->tx_recycle(tx_ring); 153 } 154 155 /* 156 * After the recycling, if the tbd_free is less than the 157 * overload_threshold, assert overload, return mp; 158 * and we need to re-schedule the tx again. 159 */ 160 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 161 tx_ring->reschedule = B_TRUE; 162 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 163 return (mp); 164 } 165 166 /* 167 * The pending_list is a linked list that is used to save 168 * the tx control blocks that have packet data processed 169 * but have not put the data to the tx descriptor ring. 170 * It is used to reduce the lock contention of the tx_lock. 171 */ 172 LINK_LIST_INIT(&pending_list); 173 desc_num = 0; 174 desc_total = 0; 175 176 /* 177 * The software should guarantee LSO packet header(MAC+IP+TCP) 178 * to be within one descriptor. Here we reallocate and refill the 179 * the header if it's physical memory non-contiguous. 180 */ 181 if ((ctx != NULL) && ctx->lso_flag) { 182 /* find the last fragment of the header */ 183 len = MBLKL(mp); 184 ASSERT(len > 0); 185 hdr_nmp = mp; 186 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 187 while (len < hdr_len) { 188 hdr_pre_mp = hdr_nmp; 189 hdr_nmp = hdr_nmp->b_cont; 190 len += MBLKL(hdr_nmp); 191 } 192 /* 193 * If the header and the payload are in different mblks, 194 * we simply force the header to be copied into pre-allocated 195 * page-aligned buffer. 196 */ 197 if (len == hdr_len) 198 goto adjust_threshold; 199 200 hdr_frag_len = hdr_len - (len - MBLKL(hdr_nmp)); 201 /* 202 * There are two cases we need to reallocate a mblk for the 203 * last header fragment: 204 * 1. the header is in multiple mblks and the last fragment 205 * share the same mblk with the payload 206 * 2. the header is in a single mblk shared with the payload 207 * and the header is physical memory non-contiguous 208 */ 209 if ((hdr_nmp != mp) || 210 (P2NPHASE((uintptr_t)hdr_nmp->b_rptr, ixgbe->sys_page_size) 211 < hdr_len)) { 212 IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 213 /* 214 * reallocate the mblk for the last header fragment, 215 * expect to bcopy into pre-allocated page-aligned 216 * buffer 217 */ 218 hdr_new_mp = allocb(hdr_frag_len, NULL); 219 if (!hdr_new_mp) 220 return (mp); 221 bcopy(hdr_nmp->b_rptr, hdr_new_mp->b_rptr, 222 hdr_frag_len); 223 /* link the new header fragment with the other parts */ 224 hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 225 hdr_new_mp->b_cont = hdr_nmp; 226 if (hdr_pre_mp) 227 hdr_pre_mp->b_cont = hdr_new_mp; 228 else 229 mp = hdr_new_mp; 230 hdr_nmp->b_rptr += hdr_frag_len; 231 } 232 adjust_threshold: 233 /* 234 * adjust the bcopy threshhold to guarantee 235 * the header to use bcopy way 236 */ 237 if (copy_thresh < hdr_len) 238 copy_thresh = hdr_len; 239 } 240 241 current_mp = mp; 242 current_len = MBLKL(current_mp); 243 /* 244 * Decide which method to use for the first fragment 245 */ 246 current_flag = (current_len <= copy_thresh) ? 247 USE_COPY : USE_DMA; 248 /* 249 * If the mblk includes several contiguous small fragments, 250 * they may be copied into one buffer. This flag is used to 251 * indicate whether there are pending fragments that need to 252 * be copied to the current tx buffer. 253 * 254 * If this flag is B_TRUE, it indicates that a new tx control 255 * block is needed to process the next fragment using either 256 * copy or DMA binding. 257 * 258 * Otherwise, it indicates that the next fragment will be 259 * copied to the current tx buffer that is maintained by the 260 * current tx control block. No new tx control block is needed. 261 */ 262 copy_done = B_TRUE; 263 while (current_mp) { 264 next_mp = current_mp->b_cont; 265 eop = (next_mp == NULL); /* Last fragment of the packet? */ 266 next_len = eop ? 0: MBLKL(next_mp); 267 268 /* 269 * When the current fragment is an empty fragment, if 270 * the next fragment will still be copied to the current 271 * tx buffer, we cannot skip this fragment here. Because 272 * the copy processing is pending for completion. We have 273 * to process this empty fragment in the tx_copy routine. 274 * 275 * If the copy processing is completed or a DMA binding 276 * processing is just completed, we can just skip this 277 * empty fragment. 278 */ 279 if ((current_len == 0) && (copy_done)) { 280 current_mp = next_mp; 281 current_len = next_len; 282 current_flag = (current_len <= copy_thresh) ? 283 USE_COPY : USE_DMA; 284 continue; 285 } 286 287 if (copy_done) { 288 /* 289 * Get a new tx control block from the free list 290 */ 291 tcb = ixgbe_get_free_list(tx_ring); 292 293 if (tcb == NULL) { 294 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 295 goto tx_failure; 296 } 297 298 /* 299 * Push the tx control block to the pending list 300 * to avoid using lock too early 301 */ 302 LIST_PUSH_TAIL(&pending_list, &tcb->link); 303 } 304 305 if (current_flag == USE_COPY) { 306 /* 307 * Check whether to use bcopy or DMA binding to process 308 * the next fragment, and if using bcopy, whether we 309 * need to continue copying the next fragment into the 310 * current tx buffer. 311 */ 312 ASSERT((tcb->tx_buf.len + current_len) <= 313 tcb->tx_buf.size); 314 315 if (eop) { 316 /* 317 * This is the last fragment of the packet, so 318 * the copy processing will be completed with 319 * this fragment. 320 */ 321 next_flag = USE_NONE; 322 copy_done = B_TRUE; 323 } else if ((tcb->tx_buf.len + current_len + next_len) > 324 tcb->tx_buf.size) { 325 /* 326 * If the next fragment is too large to be 327 * copied to the current tx buffer, we need 328 * to complete the current copy processing. 329 */ 330 next_flag = (next_len > copy_thresh) ? 331 USE_DMA: USE_COPY; 332 copy_done = B_TRUE; 333 } else if (next_len > copy_thresh) { 334 /* 335 * The next fragment needs to be processed with 336 * DMA binding. So the copy prcessing will be 337 * completed with the current fragment. 338 */ 339 next_flag = USE_DMA; 340 copy_done = B_TRUE; 341 } else { 342 /* 343 * Continue to copy the next fragment to the 344 * current tx buffer. 345 */ 346 next_flag = USE_COPY; 347 copy_done = B_FALSE; 348 } 349 350 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 351 current_len, copy_done); 352 } else { 353 /* 354 * Check whether to use bcopy or DMA binding to process 355 * the next fragment. 356 */ 357 next_flag = (next_len > copy_thresh) ? 358 USE_DMA: USE_COPY; 359 ASSERT(copy_done == B_TRUE); 360 361 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 362 current_len); 363 } 364 365 if (desc_num > 0) 366 desc_total += desc_num; 367 else if (desc_num < 0) 368 goto tx_failure; 369 370 current_mp = next_mp; 371 current_len = next_len; 372 current_flag = next_flag; 373 } 374 375 /* 376 * Attach the mblk to the last tx control block 377 */ 378 ASSERT(tcb); 379 ASSERT(tcb->mp == NULL); 380 tcb->mp = mp; 381 382 /* 383 * 82598/82599 chipset has a limitation that no more than 32 tx 384 * descriptors can be transmited out at one time. 385 * 386 * Here is a workaround for it: pull up the mblk then send it 387 * out with bind way. By doing so, no more than MAX_COOKIE (18) 388 * descriptors is needed. 389 */ 390 if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { 391 IXGBE_DEBUG_STAT(tx_ring->stat_break_tbd_limit); 392 393 /* 394 * Discard the mblk and free the used resources 395 */ 396 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 397 while (tcb) { 398 tcb->mp = NULL; 399 ixgbe_free_tcb(tcb); 400 tcb = (tx_control_block_t *) 401 LIST_GET_NEXT(&pending_list, &tcb->link); 402 } 403 404 /* 405 * Return the tx control blocks in the pending list to 406 * the free list. 407 */ 408 ixgbe_put_free_list(tx_ring, &pending_list); 409 410 /* 411 * pull up the mblk and send it out with bind way 412 */ 413 if ((pull_mp = msgpullup(mp, -1)) == NULL) { 414 tx_ring->reschedule = B_TRUE; 415 416 /* 417 * If new mblk has been allocted for the last header 418 * fragment of a LSO packet, we should restore the 419 * modified mp. 420 */ 421 if (hdr_new_mp) { 422 hdr_new_mp->b_cont = NULL; 423 freeb(hdr_new_mp); 424 hdr_nmp->b_rptr -= hdr_frag_len; 425 if (hdr_pre_mp) 426 hdr_pre_mp->b_cont = hdr_nmp; 427 else 428 mp = hdr_nmp; 429 } 430 return (mp); 431 } 432 433 LINK_LIST_INIT(&pending_list); 434 desc_total = 0; 435 436 /* 437 * if the packet is a LSO packet, we simply 438 * transmit the header in one descriptor using the copy way 439 */ 440 if ((ctx != NULL) && ctx->lso_flag) { 441 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + 442 ctx->l4_hdr_len; 443 444 tcb = ixgbe_get_free_list(tx_ring); 445 if (tcb == NULL) { 446 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 447 goto tx_failure; 448 } 449 desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, 450 hdr_len, B_TRUE); 451 LIST_PUSH_TAIL(&pending_list, &tcb->link); 452 desc_total += desc_num; 453 454 pull_mp->b_rptr += hdr_len; 455 } 456 457 tcb = ixgbe_get_free_list(tx_ring); 458 if (tcb == NULL) { 459 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 460 goto tx_failure; 461 } 462 if ((ctx != NULL) && ctx->lso_flag) { 463 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 464 mbsize - hdr_len); 465 } else { 466 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 467 mbsize); 468 } 469 if (desc_num < 0) { 470 goto tx_failure; 471 } 472 LIST_PUSH_TAIL(&pending_list, &tcb->link); 473 474 desc_total += desc_num; 475 tcb->mp = pull_mp; 476 } 477 478 /* 479 * Before fill the tx descriptor ring with the data, we need to 480 * ensure there are adequate free descriptors for transmit 481 * (including one context descriptor). 482 * Do not use up all the tx descriptors. 483 * Otherwise tx recycle will fail and cause false hang. 484 */ 485 if (tx_ring->tbd_free <= (desc_total + 1)) { 486 tx_ring->tx_recycle(tx_ring); 487 } 488 489 mutex_enter(&tx_ring->tx_lock); 490 /* 491 * If the number of free tx descriptors is not enough for transmit 492 * then return mp. 493 * 494 * Note: we must put this check under the mutex protection to 495 * ensure the correctness when multiple threads access it in 496 * parallel. 497 */ 498 if (tx_ring->tbd_free <= (desc_total + 1)) { 499 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 500 mutex_exit(&tx_ring->tx_lock); 501 goto tx_failure; 502 } 503 504 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 505 mbsize); 506 507 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 508 509 tx_ring->stat_obytes += mbsize; 510 tx_ring->stat_opackets ++; 511 512 mutex_exit(&tx_ring->tx_lock); 513 514 /* 515 * now that the transmission succeeds, need to free the original 516 * mp if we used the pulling up mblk for transmission. 517 */ 518 if (pull_mp) { 519 freemsg(mp); 520 } 521 522 return (NULL); 523 524 tx_failure: 525 /* 526 * If transmission fails, need to free the pulling up mblk. 527 */ 528 if (pull_mp) { 529 freemsg(pull_mp); 530 } 531 532 /* 533 * If new mblk has been allocted for the last header 534 * fragment of a LSO packet, we should restore the 535 * modified mp. 536 */ 537 if (hdr_new_mp) { 538 hdr_new_mp->b_cont = NULL; 539 freeb(hdr_new_mp); 540 hdr_nmp->b_rptr -= hdr_frag_len; 541 if (hdr_pre_mp) 542 hdr_pre_mp->b_cont = hdr_nmp; 543 else 544 mp = hdr_nmp; 545 } 546 /* 547 * Discard the mblk and free the used resources 548 */ 549 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 550 while (tcb) { 551 tcb->mp = NULL; 552 553 ixgbe_free_tcb(tcb); 554 555 tcb = (tx_control_block_t *) 556 LIST_GET_NEXT(&pending_list, &tcb->link); 557 } 558 559 /* 560 * Return the tx control blocks in the pending list to the free list. 561 */ 562 ixgbe_put_free_list(tx_ring, &pending_list); 563 564 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 565 tx_ring->reschedule = B_TRUE; 566 567 return (mp); 568 } 569 570 /* 571 * ixgbe_tx_copy 572 * 573 * Copy the mblk fragment to the pre-allocated tx buffer 574 */ 575 static int 576 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 577 uint32_t len, boolean_t copy_done) 578 { 579 dma_buffer_t *tx_buf; 580 uint32_t desc_num; 581 _NOTE(ARGUNUSED(tx_ring)); 582 583 tx_buf = &tcb->tx_buf; 584 585 /* 586 * Copy the packet data of the mblk fragment into the 587 * pre-allocated tx buffer, which is maintained by the 588 * tx control block. 589 * 590 * Several mblk fragments can be copied into one tx buffer. 591 * The destination address of the current copied fragment in 592 * the tx buffer is next to the end of the previous copied 593 * fragment. 594 */ 595 if (len > 0) { 596 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 597 598 tx_buf->len += len; 599 tcb->frag_num++; 600 } 601 602 desc_num = 0; 603 604 /* 605 * If it is the last fragment copied to the current tx buffer, 606 * in other words, if there's no remaining fragment or the remaining 607 * fragment requires a new tx control block to process, we need to 608 * complete the current copy processing by syncing up the current 609 * DMA buffer and saving the descriptor data. 610 */ 611 if (copy_done) { 612 /* 613 * Sync the DMA buffer of the packet data 614 */ 615 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 616 617 tcb->tx_type = USE_COPY; 618 619 /* 620 * Save the address and length to the private data structure 621 * of the tx control block, which will be used to fill the 622 * tx descriptor ring after all the fragments are processed. 623 */ 624 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 625 desc_num++; 626 } 627 628 return (desc_num); 629 } 630 631 /* 632 * ixgbe_tx_bind 633 * 634 * Bind the mblk fragment with DMA 635 */ 636 static int 637 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 638 uint32_t len) 639 { 640 int status, i; 641 ddi_dma_cookie_t dma_cookie; 642 uint_t ncookies; 643 int desc_num; 644 645 /* 646 * Use DMA binding to process the mblk fragment 647 */ 648 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 649 (caddr_t)mp->b_rptr, len, 650 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 651 0, &dma_cookie, &ncookies); 652 653 if (status != DDI_DMA_MAPPED) { 654 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 655 return (-1); 656 } 657 658 tcb->frag_num++; 659 tcb->tx_type = USE_DMA; 660 /* 661 * Each fragment can span several cookies. One cookie will have 662 * one tx descriptor to transmit. 663 */ 664 desc_num = 0; 665 for (i = ncookies; i > 0; i--) { 666 /* 667 * Save the address and length to the private data structure 668 * of the tx control block, which will be used to fill the 669 * tx descriptor ring after all the fragments are processed. 670 */ 671 ixgbe_save_desc(tcb, 672 dma_cookie.dmac_laddress, 673 dma_cookie.dmac_size); 674 675 desc_num++; 676 677 if (i > 1) 678 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 679 } 680 681 return (desc_num); 682 } 683 684 /* 685 * ixgbe_get_context 686 * 687 * Get the context information from the mblk 688 */ 689 static int 690 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 691 { 692 uint32_t start; 693 uint32_t hckflags; 694 uint32_t lsoflags; 695 uint32_t mss; 696 uint32_t len; 697 uint32_t size; 698 uint32_t offset; 699 unsigned char *pos; 700 ushort_t etype; 701 uint32_t mac_hdr_len; 702 uint32_t l4_proto; 703 uint32_t l4_hdr_len; 704 705 ASSERT(mp != NULL); 706 707 mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); 708 bzero(ctx, sizeof (ixgbe_tx_context_t)); 709 710 if (hckflags == 0) { 711 return (0); 712 } 713 714 ctx->hcksum_flags = hckflags; 715 716 mac_lso_get(mp, &mss, &lsoflags); 717 ctx->mss = mss; 718 ctx->lso_flag = (lsoflags == HW_LSO); 719 720 /* 721 * LSO relies on tx h/w checksum, so here will drop the package 722 * if h/w checksum flag is not declared. 723 */ 724 if (ctx->lso_flag) { 725 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 726 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 727 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 728 "checksum flags are not specified when doing LSO"); 729 return (-1); 730 } 731 } 732 733 etype = 0; 734 mac_hdr_len = 0; 735 l4_proto = 0; 736 737 /* 738 * Firstly get the position of the ether_type/ether_tpid. 739 * Here we don't assume the ether (VLAN) header is fully included 740 * in one mblk fragment, so we go thourgh the fragments to parse 741 * the ether type. 742 */ 743 size = len = MBLKL(mp); 744 offset = offsetof(struct ether_header, ether_type); 745 while (size <= offset) { 746 mp = mp->b_cont; 747 ASSERT(mp != NULL); 748 len = MBLKL(mp); 749 size += len; 750 } 751 pos = mp->b_rptr + offset + len - size; 752 753 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 754 if (etype == ETHERTYPE_VLAN) { 755 /* 756 * Get the position of the ether_type in VLAN header 757 */ 758 offset = offsetof(struct ether_vlan_header, ether_type); 759 while (size <= offset) { 760 mp = mp->b_cont; 761 ASSERT(mp != NULL); 762 len = MBLKL(mp); 763 size += len; 764 } 765 pos = mp->b_rptr + offset + len - size; 766 767 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 768 mac_hdr_len = sizeof (struct ether_vlan_header); 769 } else { 770 mac_hdr_len = sizeof (struct ether_header); 771 } 772 773 /* 774 * Here we don't assume the IP(V6) header is fully included in 775 * one mblk fragment. 776 */ 777 switch (etype) { 778 case ETHERTYPE_IP: 779 if (ctx->lso_flag) { 780 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 781 while (size <= offset) { 782 mp = mp->b_cont; 783 ASSERT(mp != NULL); 784 len = MBLKL(mp); 785 size += len; 786 } 787 pos = mp->b_rptr + offset + len - size; 788 *((uint16_t *)(uintptr_t)(pos)) = 0; 789 790 offset = offsetof(ipha_t, ipha_hdr_checksum) + 791 mac_hdr_len; 792 while (size <= offset) { 793 mp = mp->b_cont; 794 ASSERT(mp != NULL); 795 len = MBLKL(mp); 796 size += len; 797 } 798 pos = mp->b_rptr + offset + len - size; 799 *((uint16_t *)(uintptr_t)(pos)) = 0; 800 801 /* 802 * To perform ixgbe LSO, here also need to fill 803 * the tcp checksum field of the packet with the 804 * following pseudo-header checksum: 805 * (ip_source_addr, ip_destination_addr, l4_proto) 806 * Currently the tcp/ip stack has done it. 807 */ 808 } 809 810 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 811 while (size <= offset) { 812 mp = mp->b_cont; 813 ASSERT(mp != NULL); 814 len = MBLKL(mp); 815 size += len; 816 } 817 pos = mp->b_rptr + offset + len - size; 818 819 l4_proto = *(uint8_t *)pos; 820 break; 821 case ETHERTYPE_IPV6: 822 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 823 while (size <= offset) { 824 mp = mp->b_cont; 825 ASSERT(mp != NULL); 826 len = MBLKL(mp); 827 size += len; 828 } 829 pos = mp->b_rptr + offset + len - size; 830 831 l4_proto = *(uint8_t *)pos; 832 break; 833 default: 834 /* Unrecoverable error */ 835 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 836 return (-2); 837 } 838 839 if (ctx->lso_flag) { 840 offset = mac_hdr_len + start; 841 while (size <= offset) { 842 mp = mp->b_cont; 843 ASSERT(mp != NULL); 844 len = MBLKL(mp); 845 size += len; 846 } 847 pos = mp->b_rptr + offset + len - size; 848 849 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 850 } else { 851 /* 852 * l4 header length is only required for LSO 853 */ 854 l4_hdr_len = 0; 855 } 856 857 ctx->mac_hdr_len = mac_hdr_len; 858 ctx->ip_hdr_len = start; 859 ctx->l4_proto = l4_proto; 860 ctx->l4_hdr_len = l4_hdr_len; 861 862 return (0); 863 } 864 865 /* 866 * ixgbe_check_context 867 * 868 * Check if a new context descriptor is needed 869 */ 870 static boolean_t 871 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 872 { 873 ixgbe_tx_context_t *last; 874 875 if (ctx == NULL) 876 return (B_FALSE); 877 878 /* 879 * Compare the context data retrieved from the mblk and the 880 * stored data of the last context descriptor. The data need 881 * to be checked are: 882 * hcksum_flags 883 * l4_proto 884 * mac_hdr_len 885 * ip_hdr_len 886 * lso_flag 887 * mss (only checked for LSO) 888 * l4_hr_len (only checked for LSO) 889 * Either one of the above data is changed, a new context descriptor 890 * will be needed. 891 */ 892 last = &tx_ring->tx_context; 893 894 if ((ctx->hcksum_flags != last->hcksum_flags) || 895 (ctx->l4_proto != last->l4_proto) || 896 (ctx->mac_hdr_len != last->mac_hdr_len) || 897 (ctx->ip_hdr_len != last->ip_hdr_len) || 898 (ctx->lso_flag != last->lso_flag) || 899 (ctx->lso_flag && ((ctx->mss != last->mss) || 900 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 901 return (B_TRUE); 902 } 903 904 return (B_FALSE); 905 } 906 907 /* 908 * ixgbe_fill_context 909 * 910 * Fill the context descriptor with hardware checksum informations 911 */ 912 static void 913 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 914 ixgbe_tx_context_t *ctx) 915 { 916 /* 917 * Fill the context descriptor with the checksum 918 * context information we've got. 919 */ 920 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 921 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 922 IXGBE_ADVTXD_MACLEN_SHIFT; 923 924 ctx_tbd->type_tucmd_mlhl = 925 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 926 927 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 928 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 929 930 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 931 switch (ctx->l4_proto) { 932 case IPPROTO_TCP: 933 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 934 break; 935 case IPPROTO_UDP: 936 /* 937 * We don't have to explicitly set: 938 * ctx_tbd->type_tucmd_mlhl |= 939 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 940 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 941 */ 942 break; 943 default: 944 /* Unrecoverable error */ 945 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 946 break; 947 } 948 } 949 950 ctx_tbd->seqnum_seed = 0; 951 952 if (ctx->lso_flag) { 953 ctx_tbd->mss_l4len_idx = 954 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 955 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 956 } else { 957 ctx_tbd->mss_l4len_idx = 0; 958 } 959 } 960 961 /* 962 * ixgbe_tx_fill_ring 963 * 964 * Fill the tx descriptor ring with the data 965 */ 966 static int 967 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 968 ixgbe_tx_context_t *ctx, size_t mbsize) 969 { 970 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 971 boolean_t load_context; 972 uint32_t index, tcb_index, desc_num; 973 union ixgbe_adv_tx_desc *tbd, *first_tbd; 974 tx_control_block_t *tcb, *first_tcb; 975 uint32_t hcksum_flags; 976 int i; 977 978 ASSERT(mutex_owned(&tx_ring->tx_lock)); 979 980 tbd = NULL; 981 first_tbd = NULL; 982 first_tcb = NULL; 983 desc_num = 0; 984 hcksum_flags = 0; 985 load_context = B_FALSE; 986 987 /* 988 * Get the index of the first tx descriptor that will be filled, 989 * and the index of the first work list item that will be attached 990 * with the first used tx control block in the pending list. 991 * Note: the two indexes are the same. 992 */ 993 index = tx_ring->tbd_tail; 994 tcb_index = tx_ring->tbd_tail; 995 996 if (ctx != NULL) { 997 hcksum_flags = ctx->hcksum_flags; 998 999 /* 1000 * Check if a new context descriptor is needed for this packet 1001 */ 1002 load_context = ixgbe_check_context(tx_ring, ctx); 1003 1004 if (load_context) { 1005 tbd = &tx_ring->tbd_ring[index]; 1006 1007 /* 1008 * Fill the context descriptor with the 1009 * hardware checksum offload informations. 1010 */ 1011 ixgbe_fill_context( 1012 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 1013 1014 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1015 desc_num++; 1016 1017 /* 1018 * Store the checksum context data if 1019 * a new context descriptor is added 1020 */ 1021 tx_ring->tx_context = *ctx; 1022 } 1023 } 1024 1025 first_tbd = &tx_ring->tbd_ring[index]; 1026 1027 /* 1028 * Fill tx data descriptors with the data saved in the pending list. 1029 * The tx control blocks in the pending list are added to the work list 1030 * at the same time. 1031 * 1032 * The work list is strictly 1:1 corresponding to the descriptor ring. 1033 * One item of the work list corresponds to one tx descriptor. Because 1034 * one tx control block can span multiple tx descriptors, the tx 1035 * control block will be added to the first work list item that 1036 * corresponds to the first tx descriptor generated from that tx 1037 * control block. 1038 */ 1039 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1040 first_tcb = tcb; 1041 while (tcb != NULL) { 1042 1043 for (i = 0; i < tcb->desc_num; i++) { 1044 tbd = &tx_ring->tbd_ring[index]; 1045 1046 tbd->read.buffer_addr = tcb->desc[i].address; 1047 tbd->read.cmd_type_len = tcb->desc[i].length; 1048 1049 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 1050 | IXGBE_ADVTXD_DTYP_DATA; 1051 1052 tbd->read.olinfo_status = 0; 1053 1054 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1055 desc_num++; 1056 } 1057 1058 /* 1059 * Add the tx control block to the work list 1060 */ 1061 ASSERT(tx_ring->work_list[tcb_index] == NULL); 1062 tx_ring->work_list[tcb_index] = tcb; 1063 1064 tcb_index = index; 1065 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1066 } 1067 1068 if (load_context) { 1069 /* 1070 * Count the context descriptor for 1071 * the first tx control block. 1072 */ 1073 first_tcb->desc_num++; 1074 } 1075 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 1076 1077 /* 1078 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 1079 * valid in the first descriptor of the packet. 1080 * Setting paylen in every first_tbd for all parts. 1081 * 82599 requires the packet length in paylen field with or without 1082 * LSO and 82598 will ignore it in non-LSO mode. 1083 */ 1084 ASSERT(first_tbd != NULL); 1085 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 1086 1087 switch (hw->mac.type) { 1088 case ixgbe_mac_82598EB: 1089 if (ctx != NULL && ctx->lso_flag) { 1090 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1091 first_tbd->read.olinfo_status |= 1092 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1093 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1094 } 1095 break; 1096 1097 case ixgbe_mac_82599EB: 1098 if (ctx != NULL && ctx->lso_flag) { 1099 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1100 first_tbd->read.olinfo_status |= 1101 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1102 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1103 } else { 1104 first_tbd->read.olinfo_status |= 1105 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1106 } 1107 break; 1108 1109 default: 1110 break; 1111 } 1112 1113 /* Set hardware checksum bits */ 1114 if (hcksum_flags != 0) { 1115 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1116 first_tbd->read.olinfo_status |= 1117 IXGBE_ADVTXD_POPTS_IXSM; 1118 if (hcksum_flags & HCK_PARTIALCKSUM) 1119 first_tbd->read.olinfo_status |= 1120 IXGBE_ADVTXD_POPTS_TXSM; 1121 } 1122 1123 /* 1124 * The last descriptor of packet needs End Of Packet (EOP), 1125 * and Report Status (RS) bits set 1126 */ 1127 ASSERT(tbd != NULL); 1128 tbd->read.cmd_type_len |= 1129 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1130 1131 /* 1132 * Sync the DMA buffer of the tx descriptor ring 1133 */ 1134 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1135 1136 /* 1137 * Update the number of the free tx descriptors. 1138 * The mutual exclusion between the transmission and the recycling 1139 * (for the tx descriptor ring and the work list) is implemented 1140 * with the atomic operation on the number of the free tx descriptors. 1141 * 1142 * Note: we should always decrement the counter tbd_free before 1143 * advancing the hardware TDT pointer to avoid the race condition - 1144 * before the counter tbd_free is decremented, the transmit of the 1145 * tx descriptors has done and the counter tbd_free is increased by 1146 * the tx recycling. 1147 */ 1148 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1149 ASSERT(i >= 0); 1150 1151 tx_ring->tbd_tail = index; 1152 1153 /* 1154 * Advance the hardware TDT pointer of the tx descriptor ring 1155 */ 1156 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1157 1158 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1159 DDI_FM_OK) { 1160 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1161 DDI_SERVICE_DEGRADED); 1162 atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); 1163 } 1164 1165 return (desc_num); 1166 } 1167 1168 /* 1169 * ixgbe_save_desc 1170 * 1171 * Save the address/length pair to the private array 1172 * of the tx control block. The address/length pairs 1173 * will be filled into the tx descriptor ring later. 1174 */ 1175 static void 1176 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1177 { 1178 sw_desc_t *desc; 1179 1180 desc = &tcb->desc[tcb->desc_num]; 1181 desc->address = address; 1182 desc->length = length; 1183 1184 tcb->desc_num++; 1185 } 1186 1187 /* 1188 * ixgbe_tx_recycle_legacy 1189 * 1190 * Recycle the tx descriptors and tx control blocks. 1191 * 1192 * The work list is traversed to check if the corresponding 1193 * tx descriptors have been transmitted. If so, the resources 1194 * bound to the tx control blocks will be freed, and those 1195 * tx control blocks will be returned to the free list. 1196 */ 1197 uint32_t 1198 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1199 { 1200 uint32_t index, last_index, prev_index; 1201 int desc_num; 1202 boolean_t desc_done; 1203 tx_control_block_t *tcb; 1204 link_list_t pending_list; 1205 ixgbe_t *ixgbe = tx_ring->ixgbe; 1206 1207 mutex_enter(&tx_ring->recycle_lock); 1208 1209 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1210 1211 if (tx_ring->tbd_free == tx_ring->ring_size) { 1212 tx_ring->recycle_fail = 0; 1213 tx_ring->stall_watchdog = 0; 1214 if (tx_ring->reschedule) { 1215 tx_ring->reschedule = B_FALSE; 1216 mac_tx_ring_update(ixgbe->mac_hdl, 1217 tx_ring->ring_handle); 1218 } 1219 mutex_exit(&tx_ring->recycle_lock); 1220 return (0); 1221 } 1222 1223 /* 1224 * Sync the DMA buffer of the tx descriptor ring 1225 */ 1226 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1227 1228 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1229 mutex_exit(&tx_ring->recycle_lock); 1230 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1231 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1232 return (0); 1233 } 1234 1235 LINK_LIST_INIT(&pending_list); 1236 desc_num = 0; 1237 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1238 1239 tcb = tx_ring->work_list[index]; 1240 ASSERT(tcb != NULL); 1241 1242 while (tcb != NULL) { 1243 /* 1244 * Get the last tx descriptor of this packet. 1245 * If the last tx descriptor is done, then 1246 * we can recycle all descriptors of a packet 1247 * which usually includes several tx control blocks. 1248 * For 82599, LSO descriptors can not be recycled 1249 * unless the whole packet's transmission is done. 1250 * That's why packet level recycling is used here. 1251 * For 82598, there's not such limit. 1252 */ 1253 last_index = tcb->last_index; 1254 /* 1255 * MAX_TX_RING_SIZE is used to judge whether 1256 * the index is a valid value or not. 1257 */ 1258 if (last_index == MAX_TX_RING_SIZE) 1259 break; 1260 1261 /* 1262 * Check if the Descriptor Done bit is set 1263 */ 1264 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1265 IXGBE_TXD_STAT_DD; 1266 if (desc_done) { 1267 /* 1268 * recycle all descriptors of the packet 1269 */ 1270 while (tcb != NULL) { 1271 /* 1272 * Strip off the tx control block from 1273 * the work list, and add it to the 1274 * pending list. 1275 */ 1276 tx_ring->work_list[index] = NULL; 1277 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1278 1279 /* 1280 * Count the total number of the tx 1281 * descriptors recycled 1282 */ 1283 desc_num += tcb->desc_num; 1284 1285 index = NEXT_INDEX(index, tcb->desc_num, 1286 tx_ring->ring_size); 1287 1288 tcb = tx_ring->work_list[index]; 1289 1290 prev_index = PREV_INDEX(index, 1, 1291 tx_ring->ring_size); 1292 if (prev_index == last_index) 1293 break; 1294 } 1295 } else { 1296 break; 1297 } 1298 } 1299 1300 /* 1301 * If no tx descriptors are recycled, no need to do more processing 1302 */ 1303 if (desc_num == 0) { 1304 tx_ring->recycle_fail++; 1305 mutex_exit(&tx_ring->recycle_lock); 1306 return (0); 1307 } 1308 1309 tx_ring->recycle_fail = 0; 1310 tx_ring->stall_watchdog = 0; 1311 1312 /* 1313 * Update the head index of the tx descriptor ring 1314 */ 1315 tx_ring->tbd_head = index; 1316 1317 /* 1318 * Update the number of the free tx descriptors with atomic operations 1319 */ 1320 atomic_add_32(&tx_ring->tbd_free, desc_num); 1321 1322 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1323 (tx_ring->reschedule)) { 1324 tx_ring->reschedule = B_FALSE; 1325 mac_tx_ring_update(ixgbe->mac_hdl, 1326 tx_ring->ring_handle); 1327 } 1328 mutex_exit(&tx_ring->recycle_lock); 1329 1330 /* 1331 * Free the resources used by the tx control blocks 1332 * in the pending list 1333 */ 1334 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1335 while (tcb != NULL) { 1336 /* 1337 * Release the resources occupied by the tx control block 1338 */ 1339 ixgbe_free_tcb(tcb); 1340 1341 tcb = (tx_control_block_t *) 1342 LIST_GET_NEXT(&pending_list, &tcb->link); 1343 } 1344 1345 /* 1346 * Add the tx control blocks in the pending list to the free list. 1347 */ 1348 ixgbe_put_free_list(tx_ring, &pending_list); 1349 1350 return (desc_num); 1351 } 1352 1353 /* 1354 * ixgbe_tx_recycle_head_wb 1355 * 1356 * Check the head write-back, and recycle all the transmitted 1357 * tx descriptors and tx control blocks. 1358 */ 1359 uint32_t 1360 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1361 { 1362 uint32_t index; 1363 uint32_t head_wb; 1364 int desc_num; 1365 tx_control_block_t *tcb; 1366 link_list_t pending_list; 1367 ixgbe_t *ixgbe = tx_ring->ixgbe; 1368 1369 mutex_enter(&tx_ring->recycle_lock); 1370 1371 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1372 1373 if (tx_ring->tbd_free == tx_ring->ring_size) { 1374 tx_ring->recycle_fail = 0; 1375 tx_ring->stall_watchdog = 0; 1376 if (tx_ring->reschedule) { 1377 tx_ring->reschedule = B_FALSE; 1378 mac_tx_ring_update(ixgbe->mac_hdl, 1379 tx_ring->ring_handle); 1380 } 1381 mutex_exit(&tx_ring->recycle_lock); 1382 return (0); 1383 } 1384 1385 /* 1386 * Sync the DMA buffer of the tx descriptor ring 1387 * 1388 * Note: For head write-back mode, the tx descriptors will not 1389 * be written back, but the head write-back value is stored at 1390 * the last extra tbd at the end of the DMA area, we still need 1391 * to sync the head write-back value for kernel. 1392 * 1393 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1394 */ 1395 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1396 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1397 sizeof (uint32_t), 1398 DDI_DMA_SYNC_FORKERNEL); 1399 1400 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1401 mutex_exit(&tx_ring->recycle_lock); 1402 ddi_fm_service_impact(ixgbe->dip, 1403 DDI_SERVICE_DEGRADED); 1404 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1405 return (0); 1406 } 1407 1408 LINK_LIST_INIT(&pending_list); 1409 desc_num = 0; 1410 index = tx_ring->tbd_head; /* Next index to clean */ 1411 1412 /* 1413 * Get the value of head write-back 1414 */ 1415 head_wb = *tx_ring->tbd_head_wb; 1416 while (index != head_wb) { 1417 tcb = tx_ring->work_list[index]; 1418 ASSERT(tcb != NULL); 1419 1420 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1421 tcb->desc_num) { 1422 /* 1423 * The current tx control block is not 1424 * completely transmitted, stop recycling 1425 */ 1426 break; 1427 } 1428 1429 /* 1430 * Strip off the tx control block from the work list, 1431 * and add it to the pending list. 1432 */ 1433 tx_ring->work_list[index] = NULL; 1434 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1435 1436 /* 1437 * Advance the index of the tx descriptor ring 1438 */ 1439 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1440 1441 /* 1442 * Count the total number of the tx descriptors recycled 1443 */ 1444 desc_num += tcb->desc_num; 1445 } 1446 1447 /* 1448 * If no tx descriptors are recycled, no need to do more processing 1449 */ 1450 if (desc_num == 0) { 1451 tx_ring->recycle_fail++; 1452 mutex_exit(&tx_ring->recycle_lock); 1453 return (0); 1454 } 1455 1456 tx_ring->recycle_fail = 0; 1457 tx_ring->stall_watchdog = 0; 1458 1459 /* 1460 * Update the head index of the tx descriptor ring 1461 */ 1462 tx_ring->tbd_head = index; 1463 1464 /* 1465 * Update the number of the free tx descriptors with atomic operations 1466 */ 1467 atomic_add_32(&tx_ring->tbd_free, desc_num); 1468 1469 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1470 (tx_ring->reschedule)) { 1471 tx_ring->reschedule = B_FALSE; 1472 mac_tx_ring_update(ixgbe->mac_hdl, 1473 tx_ring->ring_handle); 1474 } 1475 mutex_exit(&tx_ring->recycle_lock); 1476 1477 /* 1478 * Free the resources used by the tx control blocks 1479 * in the pending list 1480 */ 1481 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1482 while (tcb) { 1483 /* 1484 * Release the resources occupied by the tx control block 1485 */ 1486 ixgbe_free_tcb(tcb); 1487 1488 tcb = (tx_control_block_t *) 1489 LIST_GET_NEXT(&pending_list, &tcb->link); 1490 } 1491 1492 /* 1493 * Add the tx control blocks in the pending list to the free list. 1494 */ 1495 ixgbe_put_free_list(tx_ring, &pending_list); 1496 1497 return (desc_num); 1498 } 1499 1500 /* 1501 * ixgbe_free_tcb - free up the tx control block 1502 * 1503 * Free the resources of the tx control block, including 1504 * unbind the previously bound DMA handle, and reset other 1505 * control fields. 1506 */ 1507 void 1508 ixgbe_free_tcb(tx_control_block_t *tcb) 1509 { 1510 switch (tcb->tx_type) { 1511 case USE_COPY: 1512 /* 1513 * Reset the buffer length that is used for copy 1514 */ 1515 tcb->tx_buf.len = 0; 1516 break; 1517 case USE_DMA: 1518 /* 1519 * Release the DMA resource that is used for 1520 * DMA binding. 1521 */ 1522 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1523 break; 1524 default: 1525 break; 1526 } 1527 1528 /* 1529 * Free the mblk 1530 */ 1531 if (tcb->mp != NULL) { 1532 freemsg(tcb->mp); 1533 tcb->mp = NULL; 1534 } 1535 1536 tcb->tx_type = USE_NONE; 1537 tcb->last_index = MAX_TX_RING_SIZE; 1538 tcb->frag_num = 0; 1539 tcb->desc_num = 0; 1540 } 1541 1542 /* 1543 * ixgbe_get_free_list - Get a free tx control block from the free list 1544 * 1545 * The atomic operation on the number of the available tx control block 1546 * in the free list is used to keep this routine mutual exclusive with 1547 * the routine ixgbe_put_check_list. 1548 */ 1549 static tx_control_block_t * 1550 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 1551 { 1552 tx_control_block_t *tcb; 1553 1554 /* 1555 * Check and update the number of the free tx control block 1556 * in the free list. 1557 */ 1558 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1559 return (NULL); 1560 1561 mutex_enter(&tx_ring->tcb_head_lock); 1562 1563 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1564 ASSERT(tcb != NULL); 1565 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1566 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1567 tx_ring->free_list_size); 1568 1569 mutex_exit(&tx_ring->tcb_head_lock); 1570 1571 return (tcb); 1572 } 1573 1574 /* 1575 * ixgbe_put_free_list 1576 * 1577 * Put a list of used tx control blocks back to the free list 1578 * 1579 * A mutex is used here to ensure the serialization. The mutual exclusion 1580 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1581 * the atomic operation on the counter tcb_free. 1582 */ 1583 void 1584 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1585 { 1586 uint32_t index; 1587 int tcb_num; 1588 tx_control_block_t *tcb; 1589 1590 mutex_enter(&tx_ring->tcb_tail_lock); 1591 1592 index = tx_ring->tcb_tail; 1593 1594 tcb_num = 0; 1595 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1596 while (tcb != NULL) { 1597 ASSERT(tx_ring->free_list[index] == NULL); 1598 tx_ring->free_list[index] = tcb; 1599 1600 tcb_num++; 1601 1602 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1603 1604 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1605 } 1606 1607 tx_ring->tcb_tail = index; 1608 1609 /* 1610 * Update the number of the free tx control block 1611 * in the free list. This operation must be placed 1612 * under the protection of the lock. 1613 */ 1614 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1615 1616 mutex_exit(&tx_ring->tcb_tail_lock); 1617 } 1618