1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 29 */ 30 31 #include "ixgbe_sw.h" 32 33 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 34 uint32_t, boolean_t); 35 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t *, mblk_t *, 36 uint32_t); 37 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 38 ixgbe_tx_context_t *, size_t); 39 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 40 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *); 41 42 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 43 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 44 ixgbe_tx_context_t *); 45 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 46 ixgbe_tx_context_t *); 47 48 #ifndef IXGBE_DEBUG 49 #pragma inline(ixgbe_save_desc) 50 #pragma inline(ixgbe_get_context) 51 #pragma inline(ixgbe_check_context) 52 #pragma inline(ixgbe_fill_context) 53 #endif 54 55 /* 56 * ixgbe_ring_tx 57 * 58 * To transmit one mblk through one specified ring. 59 * 60 * One mblk can consist of several fragments, each fragment 61 * will be processed with different methods based on the size. 62 * For the fragments with size less than the bcopy threshold, 63 * they will be processed by using bcopy; otherwise, they will 64 * be processed by using DMA binding. 65 * 66 * To process the mblk, a tx control block is got from the 67 * free list. One tx control block contains one tx buffer, which 68 * is used to copy mblk fragments' data; and one tx DMA handle, 69 * which is used to bind a mblk fragment with DMA resource. 70 * 71 * Several small mblk fragments can be copied into one tx control 72 * block's buffer, and then the buffer will be transmitted with 73 * one tx descriptor. 74 * 75 * A large fragment only binds with one tx control block's DMA 76 * handle, and it can span several tx descriptors for transmitting. 77 * 78 * So to transmit a packet (mblk), several tx control blocks can 79 * be used. After the processing, those tx control blocks will 80 * be put to the work list. 81 */ 82 mblk_t * 83 ixgbe_ring_tx(void *arg, mblk_t *mp) 84 { 85 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 86 ixgbe_t *ixgbe = tx_ring->ixgbe; 87 tx_type_t current_flag, next_flag; 88 uint32_t current_len, next_len; 89 uint32_t desc_total; 90 size_t mbsize; 91 int desc_num; 92 boolean_t copy_done, eop; 93 mblk_t *current_mp, *next_mp, *nmp, *pull_mp = NULL; 94 tx_control_block_t *tcb; 95 ixgbe_tx_context_t tx_context, *ctx; 96 link_list_t pending_list; 97 uint32_t len, hdr_frag_len, hdr_len; 98 uint32_t copy_thresh; 99 mblk_t *hdr_new_mp = NULL; 100 mblk_t *hdr_pre_mp = NULL; 101 mblk_t *hdr_nmp = NULL; 102 103 ASSERT(mp->b_next == NULL); 104 105 if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || 106 (ixgbe->ixgbe_state & IXGBE_ERROR) || 107 (ixgbe->ixgbe_state & IXGBE_OVERTEMP) || 108 !(ixgbe->ixgbe_state & IXGBE_STARTED)) { 109 return (mp); 110 } 111 112 copy_thresh = ixgbe->tx_copy_thresh; 113 114 /* Get the mblk size */ 115 mbsize = 0; 116 for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) { 117 mbsize += MBLKL(nmp); 118 } 119 120 if (ixgbe->tx_hcksum_enable) { 121 /* 122 * Retrieve checksum context information from the mblk 123 * that will be used to decide whether/how to fill the 124 * context descriptor. 125 */ 126 ctx = &tx_context; 127 if (ixgbe_get_context(mp, ctx) < 0) { 128 freemsg(mp); 129 return (NULL); 130 } 131 132 /* 133 * If the mblk size exceeds the max size ixgbe could 134 * process, then discard this mblk, and return NULL. 135 */ 136 if ((ctx->lso_flag && 137 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 138 (!ctx->lso_flag && 139 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 140 freemsg(mp); 141 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 142 return (NULL); 143 } 144 } else { 145 ctx = NULL; 146 } 147 148 /* 149 * Check and recycle tx descriptors. 150 * The recycle threshold here should be selected carefully 151 */ 152 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 153 tx_ring->tx_recycle(tx_ring); 154 } 155 156 /* 157 * After the recycling, if the tbd_free is less than the 158 * overload_threshold, assert overload, return mp; 159 * and we need to re-schedule the tx again. 160 */ 161 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 162 tx_ring->reschedule = B_TRUE; 163 IXGBE_DEBUG_STAT(tx_ring->stat_overload); 164 return (mp); 165 } 166 167 /* 168 * The pending_list is a linked list that is used to save 169 * the tx control blocks that have packet data processed 170 * but have not put the data to the tx descriptor ring. 171 * It is used to reduce the lock contention of the tx_lock. 172 */ 173 LINK_LIST_INIT(&pending_list); 174 desc_num = 0; 175 desc_total = 0; 176 177 /* 178 * The software should guarantee LSO packet header(MAC+IP+TCP) 179 * to be within one descriptor. Here we reallocate and refill the 180 * the header if it's physical memory non-contiguous. 181 */ 182 if ((ctx != NULL) && ctx->lso_flag) { 183 /* find the last fragment of the header */ 184 len = MBLKL(mp); 185 ASSERT(len > 0); 186 hdr_nmp = mp; 187 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 188 while (len < hdr_len) { 189 hdr_pre_mp = hdr_nmp; 190 hdr_nmp = hdr_nmp->b_cont; 191 len += MBLKL(hdr_nmp); 192 } 193 /* 194 * If the header and the payload are in different mblks, 195 * we simply force the header to be copied into pre-allocated 196 * page-aligned buffer. 197 */ 198 if (len == hdr_len) 199 goto adjust_threshold; 200 201 hdr_frag_len = hdr_len - (len - MBLKL(hdr_nmp)); 202 /* 203 * There are two cases we need to reallocate a mblk for the 204 * last header fragment: 205 * 1. the header is in multiple mblks and the last fragment 206 * share the same mblk with the payload 207 * 2. the header is in a single mblk shared with the payload 208 * and the header is physical memory non-contiguous 209 */ 210 if ((hdr_nmp != mp) || 211 (P2NPHASE((uintptr_t)hdr_nmp->b_rptr, ixgbe->sys_page_size) 212 < hdr_len)) { 213 IXGBE_DEBUG_STAT(tx_ring->stat_lso_header_fail); 214 /* 215 * reallocate the mblk for the last header fragment, 216 * expect to bcopy into pre-allocated page-aligned 217 * buffer 218 */ 219 hdr_new_mp = allocb(hdr_frag_len, NULL); 220 if (!hdr_new_mp) 221 return (mp); 222 bcopy(hdr_nmp->b_rptr, hdr_new_mp->b_rptr, 223 hdr_frag_len); 224 /* link the new header fragment with the other parts */ 225 hdr_new_mp->b_wptr = hdr_new_mp->b_rptr + hdr_frag_len; 226 hdr_new_mp->b_cont = hdr_nmp; 227 if (hdr_pre_mp) 228 hdr_pre_mp->b_cont = hdr_new_mp; 229 else 230 mp = hdr_new_mp; 231 hdr_nmp->b_rptr += hdr_frag_len; 232 } 233 adjust_threshold: 234 /* 235 * adjust the bcopy threshhold to guarantee 236 * the header to use bcopy way 237 */ 238 if (copy_thresh < hdr_len) 239 copy_thresh = hdr_len; 240 } 241 242 current_mp = mp; 243 current_len = MBLKL(current_mp); 244 /* 245 * Decide which method to use for the first fragment 246 */ 247 current_flag = (current_len <= copy_thresh) ? 248 USE_COPY : USE_DMA; 249 /* 250 * If the mblk includes several contiguous small fragments, 251 * they may be copied into one buffer. This flag is used to 252 * indicate whether there are pending fragments that need to 253 * be copied to the current tx buffer. 254 * 255 * If this flag is B_TRUE, it indicates that a new tx control 256 * block is needed to process the next fragment using either 257 * copy or DMA binding. 258 * 259 * Otherwise, it indicates that the next fragment will be 260 * copied to the current tx buffer that is maintained by the 261 * current tx control block. No new tx control block is needed. 262 */ 263 copy_done = B_TRUE; 264 while (current_mp) { 265 next_mp = current_mp->b_cont; 266 eop = (next_mp == NULL); /* Last fragment of the packet? */ 267 next_len = eop ? 0: MBLKL(next_mp); 268 269 /* 270 * When the current fragment is an empty fragment, if 271 * the next fragment will still be copied to the current 272 * tx buffer, we cannot skip this fragment here. Because 273 * the copy processing is pending for completion. We have 274 * to process this empty fragment in the tx_copy routine. 275 * 276 * If the copy processing is completed or a DMA binding 277 * processing is just completed, we can just skip this 278 * empty fragment. 279 */ 280 if ((current_len == 0) && (copy_done)) { 281 current_mp = next_mp; 282 current_len = next_len; 283 current_flag = (current_len <= copy_thresh) ? 284 USE_COPY : USE_DMA; 285 continue; 286 } 287 288 if (copy_done) { 289 /* 290 * Get a new tx control block from the free list 291 */ 292 tcb = ixgbe_get_free_list(tx_ring); 293 294 if (tcb == NULL) { 295 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 296 goto tx_failure; 297 } 298 299 /* 300 * Push the tx control block to the pending list 301 * to avoid using lock too early 302 */ 303 LIST_PUSH_TAIL(&pending_list, &tcb->link); 304 } 305 306 if (current_flag == USE_COPY) { 307 /* 308 * Check whether to use bcopy or DMA binding to process 309 * the next fragment, and if using bcopy, whether we 310 * need to continue copying the next fragment into the 311 * current tx buffer. 312 */ 313 ASSERT((tcb->tx_buf.len + current_len) <= 314 tcb->tx_buf.size); 315 316 if (eop) { 317 /* 318 * This is the last fragment of the packet, so 319 * the copy processing will be completed with 320 * this fragment. 321 */ 322 next_flag = USE_NONE; 323 copy_done = B_TRUE; 324 } else if ((tcb->tx_buf.len + current_len + next_len) > 325 tcb->tx_buf.size) { 326 /* 327 * If the next fragment is too large to be 328 * copied to the current tx buffer, we need 329 * to complete the current copy processing. 330 */ 331 next_flag = (next_len > copy_thresh) ? 332 USE_DMA: USE_COPY; 333 copy_done = B_TRUE; 334 } else if (next_len > copy_thresh) { 335 /* 336 * The next fragment needs to be processed with 337 * DMA binding. So the copy prcessing will be 338 * completed with the current fragment. 339 */ 340 next_flag = USE_DMA; 341 copy_done = B_TRUE; 342 } else { 343 /* 344 * Continue to copy the next fragment to the 345 * current tx buffer. 346 */ 347 next_flag = USE_COPY; 348 copy_done = B_FALSE; 349 } 350 351 desc_num = ixgbe_tx_copy(tx_ring, tcb, current_mp, 352 current_len, copy_done); 353 } else { 354 /* 355 * Check whether to use bcopy or DMA binding to process 356 * the next fragment. 357 */ 358 next_flag = (next_len > copy_thresh) ? 359 USE_DMA: USE_COPY; 360 ASSERT(copy_done == B_TRUE); 361 362 desc_num = ixgbe_tx_bind(tx_ring, tcb, current_mp, 363 current_len); 364 } 365 366 if (desc_num > 0) 367 desc_total += desc_num; 368 else if (desc_num < 0) 369 goto tx_failure; 370 371 current_mp = next_mp; 372 current_len = next_len; 373 current_flag = next_flag; 374 } 375 376 /* 377 * Attach the mblk to the last tx control block 378 */ 379 ASSERT(tcb); 380 ASSERT(tcb->mp == NULL); 381 tcb->mp = mp; 382 383 /* 384 * 82598/82599 chipset has a limitation that no more than 32 tx 385 * descriptors can be transmited out at one time. 386 * 387 * Here is a workaround for it: pull up the mblk then send it 388 * out with bind way. By doing so, no more than MAX_COOKIE (18) 389 * descriptors is needed. 390 */ 391 if (desc_total + 1 > IXGBE_TX_DESC_LIMIT) { 392 IXGBE_DEBUG_STAT(tx_ring->stat_break_tbd_limit); 393 394 /* 395 * Discard the mblk and free the used resources 396 */ 397 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 398 while (tcb) { 399 tcb->mp = NULL; 400 ixgbe_free_tcb(tcb); 401 tcb = (tx_control_block_t *) 402 LIST_GET_NEXT(&pending_list, &tcb->link); 403 } 404 405 /* 406 * Return the tx control blocks in the pending list to 407 * the free list. 408 */ 409 ixgbe_put_free_list(tx_ring, &pending_list); 410 411 /* 412 * pull up the mblk and send it out with bind way 413 */ 414 if ((pull_mp = msgpullup(mp, -1)) == NULL) { 415 tx_ring->reschedule = B_TRUE; 416 417 /* 418 * If new mblk has been allocted for the last header 419 * fragment of a LSO packet, we should restore the 420 * modified mp. 421 */ 422 if (hdr_new_mp) { 423 hdr_new_mp->b_cont = NULL; 424 freeb(hdr_new_mp); 425 hdr_nmp->b_rptr -= hdr_frag_len; 426 if (hdr_pre_mp) 427 hdr_pre_mp->b_cont = hdr_nmp; 428 else 429 mp = hdr_nmp; 430 } 431 return (mp); 432 } 433 434 LINK_LIST_INIT(&pending_list); 435 desc_total = 0; 436 437 /* 438 * if the packet is a LSO packet, we simply 439 * transmit the header in one descriptor using the copy way 440 */ 441 if ((ctx != NULL) && ctx->lso_flag) { 442 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + 443 ctx->l4_hdr_len; 444 445 tcb = ixgbe_get_free_list(tx_ring); 446 if (tcb == NULL) { 447 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 448 goto tx_failure; 449 } 450 desc_num = ixgbe_tx_copy(tx_ring, tcb, pull_mp, 451 hdr_len, B_TRUE); 452 LIST_PUSH_TAIL(&pending_list, &tcb->link); 453 desc_total += desc_num; 454 455 pull_mp->b_rptr += hdr_len; 456 } 457 458 tcb = ixgbe_get_free_list(tx_ring); 459 if (tcb == NULL) { 460 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tcb); 461 goto tx_failure; 462 } 463 if ((ctx != NULL) && ctx->lso_flag) { 464 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 465 mbsize - hdr_len); 466 } else { 467 desc_num = ixgbe_tx_bind(tx_ring, tcb, pull_mp, 468 mbsize); 469 } 470 if (desc_num < 0) { 471 goto tx_failure; 472 } 473 LIST_PUSH_TAIL(&pending_list, &tcb->link); 474 475 desc_total += desc_num; 476 tcb->mp = pull_mp; 477 } 478 479 /* 480 * Before fill the tx descriptor ring with the data, we need to 481 * ensure there are adequate free descriptors for transmit 482 * (including one context descriptor). 483 * Do not use up all the tx descriptors. 484 * Otherwise tx recycle will fail and cause false hang. 485 */ 486 if (tx_ring->tbd_free <= (desc_total + 1)) { 487 tx_ring->tx_recycle(tx_ring); 488 } 489 490 mutex_enter(&tx_ring->tx_lock); 491 /* 492 * If the number of free tx descriptors is not enough for transmit 493 * then return mp. 494 * 495 * Note: we must put this check under the mutex protection to 496 * ensure the correctness when multiple threads access it in 497 * parallel. 498 */ 499 if (tx_ring->tbd_free <= (desc_total + 1)) { 500 IXGBE_DEBUG_STAT(tx_ring->stat_fail_no_tbd); 501 mutex_exit(&tx_ring->tx_lock); 502 goto tx_failure; 503 } 504 505 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 506 mbsize); 507 508 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 509 510 tx_ring->stat_obytes += mbsize; 511 tx_ring->stat_opackets ++; 512 513 mutex_exit(&tx_ring->tx_lock); 514 515 /* 516 * now that the transmission succeeds, need to free the original 517 * mp if we used the pulling up mblk for transmission. 518 */ 519 if (pull_mp) { 520 freemsg(mp); 521 } 522 523 return (NULL); 524 525 tx_failure: 526 /* 527 * If transmission fails, need to free the pulling up mblk. 528 */ 529 if (pull_mp) { 530 freemsg(pull_mp); 531 } 532 533 /* 534 * If new mblk has been allocted for the last header 535 * fragment of a LSO packet, we should restore the 536 * modified mp. 537 */ 538 if (hdr_new_mp) { 539 hdr_new_mp->b_cont = NULL; 540 freeb(hdr_new_mp); 541 hdr_nmp->b_rptr -= hdr_frag_len; 542 if (hdr_pre_mp) 543 hdr_pre_mp->b_cont = hdr_nmp; 544 else 545 mp = hdr_nmp; 546 } 547 /* 548 * Discard the mblk and free the used resources 549 */ 550 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 551 while (tcb) { 552 tcb->mp = NULL; 553 554 ixgbe_free_tcb(tcb); 555 556 tcb = (tx_control_block_t *) 557 LIST_GET_NEXT(&pending_list, &tcb->link); 558 } 559 560 /* 561 * Return the tx control blocks in the pending list to the free list. 562 */ 563 ixgbe_put_free_list(tx_ring, &pending_list); 564 565 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 566 tx_ring->reschedule = B_TRUE; 567 568 return (mp); 569 } 570 571 /* 572 * ixgbe_tx_copy 573 * 574 * Copy the mblk fragment to the pre-allocated tx buffer 575 */ 576 static int 577 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 578 uint32_t len, boolean_t copy_done) 579 { 580 dma_buffer_t *tx_buf; 581 uint32_t desc_num; 582 _NOTE(ARGUNUSED(tx_ring)); 583 584 tx_buf = &tcb->tx_buf; 585 586 /* 587 * Copy the packet data of the mblk fragment into the 588 * pre-allocated tx buffer, which is maintained by the 589 * tx control block. 590 * 591 * Several mblk fragments can be copied into one tx buffer. 592 * The destination address of the current copied fragment in 593 * the tx buffer is next to the end of the previous copied 594 * fragment. 595 */ 596 if (len > 0) { 597 bcopy(mp->b_rptr, tx_buf->address + tx_buf->len, len); 598 599 tx_buf->len += len; 600 tcb->frag_num++; 601 } 602 603 desc_num = 0; 604 605 /* 606 * If it is the last fragment copied to the current tx buffer, 607 * in other words, if there's no remaining fragment or the remaining 608 * fragment requires a new tx control block to process, we need to 609 * complete the current copy processing by syncing up the current 610 * DMA buffer and saving the descriptor data. 611 */ 612 if (copy_done) { 613 /* 614 * Sync the DMA buffer of the packet data 615 */ 616 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 617 618 tcb->tx_type = USE_COPY; 619 620 /* 621 * Save the address and length to the private data structure 622 * of the tx control block, which will be used to fill the 623 * tx descriptor ring after all the fragments are processed. 624 */ 625 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 626 desc_num++; 627 } 628 629 return (desc_num); 630 } 631 632 /* 633 * ixgbe_tx_bind 634 * 635 * Bind the mblk fragment with DMA 636 */ 637 static int 638 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t *tcb, mblk_t *mp, 639 uint32_t len) 640 { 641 int status, i; 642 ddi_dma_cookie_t dma_cookie; 643 uint_t ncookies; 644 int desc_num; 645 646 /* 647 * Use DMA binding to process the mblk fragment 648 */ 649 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 650 (caddr_t)mp->b_rptr, len, 651 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 652 0, &dma_cookie, &ncookies); 653 654 if (status != DDI_DMA_MAPPED) { 655 IXGBE_DEBUG_STAT(tx_ring->stat_fail_dma_bind); 656 return (-1); 657 } 658 659 tcb->frag_num++; 660 tcb->tx_type = USE_DMA; 661 /* 662 * Each fragment can span several cookies. One cookie will have 663 * one tx descriptor to transmit. 664 */ 665 desc_num = 0; 666 for (i = ncookies; i > 0; i--) { 667 /* 668 * Save the address and length to the private data structure 669 * of the tx control block, which will be used to fill the 670 * tx descriptor ring after all the fragments are processed. 671 */ 672 ixgbe_save_desc(tcb, 673 dma_cookie.dmac_laddress, 674 dma_cookie.dmac_size); 675 676 desc_num++; 677 678 if (i > 1) 679 ddi_dma_nextcookie(tcb->tx_dma_handle, &dma_cookie); 680 } 681 682 return (desc_num); 683 } 684 685 /* 686 * ixgbe_get_context 687 * 688 * Get the context information from the mblk 689 */ 690 static int 691 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 692 { 693 uint32_t start; 694 uint32_t hckflags; 695 uint32_t lsoflags; 696 uint32_t mss; 697 uint32_t len; 698 uint32_t size; 699 uint32_t offset; 700 unsigned char *pos; 701 ushort_t etype; 702 uint32_t mac_hdr_len; 703 uint32_t l4_proto; 704 uint32_t l4_hdr_len; 705 706 ASSERT(mp != NULL); 707 708 mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); 709 bzero(ctx, sizeof (ixgbe_tx_context_t)); 710 711 if (hckflags == 0) { 712 return (0); 713 } 714 715 ctx->hcksum_flags = hckflags; 716 717 mac_lso_get(mp, &mss, &lsoflags); 718 ctx->mss = mss; 719 ctx->lso_flag = (lsoflags == HW_LSO); 720 721 /* 722 * LSO relies on tx h/w checksum, so here will drop the package 723 * if h/w checksum flag is not declared. 724 */ 725 if (ctx->lso_flag) { 726 if (!((ctx->hcksum_flags & HCK_PARTIALCKSUM) && 727 (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM))) { 728 IXGBE_DEBUGLOG_0(NULL, "ixgbe_tx: h/w " 729 "checksum flags are not specified when doing LSO"); 730 return (-1); 731 } 732 } 733 734 etype = 0; 735 mac_hdr_len = 0; 736 l4_proto = 0; 737 738 /* 739 * Firstly get the position of the ether_type/ether_tpid. 740 * Here we don't assume the ether (VLAN) header is fully included 741 * in one mblk fragment, so we go thourgh the fragments to parse 742 * the ether type. 743 */ 744 size = len = MBLKL(mp); 745 offset = offsetof(struct ether_header, ether_type); 746 while (size <= offset) { 747 mp = mp->b_cont; 748 ASSERT(mp != NULL); 749 len = MBLKL(mp); 750 size += len; 751 } 752 pos = mp->b_rptr + offset + len - size; 753 754 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 755 if (etype == ETHERTYPE_VLAN) { 756 /* 757 * Get the position of the ether_type in VLAN header 758 */ 759 offset = offsetof(struct ether_vlan_header, ether_type); 760 while (size <= offset) { 761 mp = mp->b_cont; 762 ASSERT(mp != NULL); 763 len = MBLKL(mp); 764 size += len; 765 } 766 pos = mp->b_rptr + offset + len - size; 767 768 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 769 mac_hdr_len = sizeof (struct ether_vlan_header); 770 } else { 771 mac_hdr_len = sizeof (struct ether_header); 772 } 773 774 /* 775 * Here we don't assume the IP(V6) header is fully included in 776 * one mblk fragment. 777 */ 778 switch (etype) { 779 case ETHERTYPE_IP: 780 if (ctx->lso_flag) { 781 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 782 while (size <= offset) { 783 mp = mp->b_cont; 784 ASSERT(mp != NULL); 785 len = MBLKL(mp); 786 size += len; 787 } 788 pos = mp->b_rptr + offset + len - size; 789 *((uint16_t *)(uintptr_t)(pos)) = 0; 790 791 offset = offsetof(ipha_t, ipha_hdr_checksum) + 792 mac_hdr_len; 793 while (size <= offset) { 794 mp = mp->b_cont; 795 ASSERT(mp != NULL); 796 len = MBLKL(mp); 797 size += len; 798 } 799 pos = mp->b_rptr + offset + len - size; 800 *((uint16_t *)(uintptr_t)(pos)) = 0; 801 802 /* 803 * To perform ixgbe LSO, here also need to fill 804 * the tcp checksum field of the packet with the 805 * following pseudo-header checksum: 806 * (ip_source_addr, ip_destination_addr, l4_proto) 807 * Currently the tcp/ip stack has done it. 808 */ 809 } 810 811 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 812 while (size <= offset) { 813 mp = mp->b_cont; 814 ASSERT(mp != NULL); 815 len = MBLKL(mp); 816 size += len; 817 } 818 pos = mp->b_rptr + offset + len - size; 819 820 l4_proto = *(uint8_t *)pos; 821 break; 822 case ETHERTYPE_IPV6: 823 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 824 while (size <= offset) { 825 mp = mp->b_cont; 826 ASSERT(mp != NULL); 827 len = MBLKL(mp); 828 size += len; 829 } 830 pos = mp->b_rptr + offset + len - size; 831 832 l4_proto = *(uint8_t *)pos; 833 break; 834 default: 835 /* Unrecoverable error */ 836 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 837 return (-2); 838 } 839 840 if (ctx->lso_flag) { 841 offset = mac_hdr_len + start; 842 while (size <= offset) { 843 mp = mp->b_cont; 844 ASSERT(mp != NULL); 845 len = MBLKL(mp); 846 size += len; 847 } 848 pos = mp->b_rptr + offset + len - size; 849 850 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 851 } else { 852 /* 853 * l4 header length is only required for LSO 854 */ 855 l4_hdr_len = 0; 856 } 857 858 ctx->mac_hdr_len = mac_hdr_len; 859 ctx->ip_hdr_len = start; 860 ctx->l4_proto = l4_proto; 861 ctx->l4_hdr_len = l4_hdr_len; 862 863 return (0); 864 } 865 866 /* 867 * ixgbe_check_context 868 * 869 * Check if a new context descriptor is needed 870 */ 871 static boolean_t 872 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 873 { 874 ixgbe_tx_context_t *last; 875 876 if (ctx == NULL) 877 return (B_FALSE); 878 879 /* 880 * Compare the context data retrieved from the mblk and the 881 * stored data of the last context descriptor. The data need 882 * to be checked are: 883 * hcksum_flags 884 * l4_proto 885 * mac_hdr_len 886 * ip_hdr_len 887 * lso_flag 888 * mss (only checked for LSO) 889 * l4_hr_len (only checked for LSO) 890 * Either one of the above data is changed, a new context descriptor 891 * will be needed. 892 */ 893 last = &tx_ring->tx_context; 894 895 if ((ctx->hcksum_flags != last->hcksum_flags) || 896 (ctx->l4_proto != last->l4_proto) || 897 (ctx->mac_hdr_len != last->mac_hdr_len) || 898 (ctx->ip_hdr_len != last->ip_hdr_len) || 899 (ctx->lso_flag != last->lso_flag) || 900 (ctx->lso_flag && ((ctx->mss != last->mss) || 901 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 902 return (B_TRUE); 903 } 904 905 return (B_FALSE); 906 } 907 908 /* 909 * ixgbe_fill_context 910 * 911 * Fill the context descriptor with hardware checksum informations 912 */ 913 static void 914 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 915 ixgbe_tx_context_t *ctx) 916 { 917 /* 918 * Fill the context descriptor with the checksum 919 * context information we've got. 920 */ 921 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 922 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 923 IXGBE_ADVTXD_MACLEN_SHIFT; 924 925 ctx_tbd->type_tucmd_mlhl = 926 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 927 928 if (ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) 929 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 930 931 if (ctx->hcksum_flags & HCK_PARTIALCKSUM) { 932 switch (ctx->l4_proto) { 933 case IPPROTO_TCP: 934 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 935 break; 936 case IPPROTO_UDP: 937 /* 938 * We don't have to explicitly set: 939 * ctx_tbd->type_tucmd_mlhl |= 940 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 941 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 942 */ 943 break; 944 default: 945 /* Unrecoverable error */ 946 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 947 break; 948 } 949 } 950 951 ctx_tbd->seqnum_seed = 0; 952 953 if (ctx->lso_flag) { 954 ctx_tbd->mss_l4len_idx = 955 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 956 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 957 } else { 958 ctx_tbd->mss_l4len_idx = 0; 959 } 960 } 961 962 /* 963 * ixgbe_tx_fill_ring 964 * 965 * Fill the tx descriptor ring with the data 966 */ 967 static int 968 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 969 ixgbe_tx_context_t *ctx, size_t mbsize) 970 { 971 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 972 boolean_t load_context; 973 uint32_t index, tcb_index, desc_num; 974 union ixgbe_adv_tx_desc *tbd, *first_tbd; 975 tx_control_block_t *tcb, *first_tcb; 976 uint32_t hcksum_flags; 977 int i; 978 979 ASSERT(mutex_owned(&tx_ring->tx_lock)); 980 981 tbd = NULL; 982 first_tbd = NULL; 983 first_tcb = NULL; 984 desc_num = 0; 985 hcksum_flags = 0; 986 load_context = B_FALSE; 987 988 /* 989 * Get the index of the first tx descriptor that will be filled, 990 * and the index of the first work list item that will be attached 991 * with the first used tx control block in the pending list. 992 * Note: the two indexes are the same. 993 */ 994 index = tx_ring->tbd_tail; 995 tcb_index = tx_ring->tbd_tail; 996 997 if (ctx != NULL) { 998 hcksum_flags = ctx->hcksum_flags; 999 1000 /* 1001 * Check if a new context descriptor is needed for this packet 1002 */ 1003 load_context = ixgbe_check_context(tx_ring, ctx); 1004 1005 if (load_context) { 1006 tbd = &tx_ring->tbd_ring[index]; 1007 1008 /* 1009 * Fill the context descriptor with the 1010 * hardware checksum offload informations. 1011 */ 1012 ixgbe_fill_context( 1013 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 1014 1015 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1016 desc_num++; 1017 1018 /* 1019 * Store the checksum context data if 1020 * a new context descriptor is added 1021 */ 1022 tx_ring->tx_context = *ctx; 1023 } 1024 } 1025 1026 first_tbd = &tx_ring->tbd_ring[index]; 1027 1028 /* 1029 * Fill tx data descriptors with the data saved in the pending list. 1030 * The tx control blocks in the pending list are added to the work list 1031 * at the same time. 1032 * 1033 * The work list is strictly 1:1 corresponding to the descriptor ring. 1034 * One item of the work list corresponds to one tx descriptor. Because 1035 * one tx control block can span multiple tx descriptors, the tx 1036 * control block will be added to the first work list item that 1037 * corresponds to the first tx descriptor generated from that tx 1038 * control block. 1039 */ 1040 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1041 first_tcb = tcb; 1042 while (tcb != NULL) { 1043 1044 for (i = 0; i < tcb->desc_num; i++) { 1045 tbd = &tx_ring->tbd_ring[index]; 1046 1047 tbd->read.buffer_addr = tcb->desc[i].address; 1048 tbd->read.cmd_type_len = tcb->desc[i].length; 1049 1050 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 1051 | IXGBE_ADVTXD_DTYP_DATA; 1052 1053 tbd->read.olinfo_status = 0; 1054 1055 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 1056 desc_num++; 1057 } 1058 1059 /* 1060 * Add the tx control block to the work list 1061 */ 1062 ASSERT(tx_ring->work_list[tcb_index] == NULL); 1063 tx_ring->work_list[tcb_index] = tcb; 1064 1065 tcb_index = index; 1066 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1067 } 1068 1069 if (load_context) { 1070 /* 1071 * Count the context descriptor for 1072 * the first tx control block. 1073 */ 1074 first_tcb->desc_num++; 1075 } 1076 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 1077 1078 /* 1079 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 1080 * valid in the first descriptor of the packet. 1081 * Setting paylen in every first_tbd for all parts. 1082 * 82599 and X540 require the packet length in paylen field with or 1083 * without LSO and 82598 will ignore it in non-LSO mode. 1084 */ 1085 ASSERT(first_tbd != NULL); 1086 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 1087 1088 switch (hw->mac.type) { 1089 case ixgbe_mac_82598EB: 1090 if (ctx != NULL && ctx->lso_flag) { 1091 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1092 first_tbd->read.olinfo_status |= 1093 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1094 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1095 } 1096 break; 1097 1098 case ixgbe_mac_82599EB: 1099 case ixgbe_mac_X540: 1100 if (ctx != NULL && ctx->lso_flag) { 1101 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1102 first_tbd->read.olinfo_status |= 1103 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1104 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1105 } else { 1106 first_tbd->read.olinfo_status |= 1107 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1108 } 1109 break; 1110 1111 default: 1112 break; 1113 } 1114 1115 /* Set hardware checksum bits */ 1116 if (hcksum_flags != 0) { 1117 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1118 first_tbd->read.olinfo_status |= 1119 IXGBE_ADVTXD_POPTS_IXSM; 1120 if (hcksum_flags & HCK_PARTIALCKSUM) 1121 first_tbd->read.olinfo_status |= 1122 IXGBE_ADVTXD_POPTS_TXSM; 1123 } 1124 1125 /* 1126 * The last descriptor of packet needs End Of Packet (EOP), 1127 * and Report Status (RS) bits set 1128 */ 1129 ASSERT(tbd != NULL); 1130 tbd->read.cmd_type_len |= 1131 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1132 1133 /* 1134 * Sync the DMA buffer of the tx descriptor ring 1135 */ 1136 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1137 1138 /* 1139 * Update the number of the free tx descriptors. 1140 * The mutual exclusion between the transmission and the recycling 1141 * (for the tx descriptor ring and the work list) is implemented 1142 * with the atomic operation on the number of the free tx descriptors. 1143 * 1144 * Note: we should always decrement the counter tbd_free before 1145 * advancing the hardware TDT pointer to avoid the race condition - 1146 * before the counter tbd_free is decremented, the transmit of the 1147 * tx descriptors has done and the counter tbd_free is increased by 1148 * the tx recycling. 1149 */ 1150 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1151 ASSERT(i >= 0); 1152 1153 tx_ring->tbd_tail = index; 1154 1155 /* 1156 * Advance the hardware TDT pointer of the tx descriptor ring 1157 */ 1158 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1159 1160 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1161 DDI_FM_OK) { 1162 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1163 DDI_SERVICE_DEGRADED); 1164 atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); 1165 } 1166 1167 return (desc_num); 1168 } 1169 1170 /* 1171 * ixgbe_save_desc 1172 * 1173 * Save the address/length pair to the private array 1174 * of the tx control block. The address/length pairs 1175 * will be filled into the tx descriptor ring later. 1176 */ 1177 static void 1178 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1179 { 1180 sw_desc_t *desc; 1181 1182 desc = &tcb->desc[tcb->desc_num]; 1183 desc->address = address; 1184 desc->length = length; 1185 1186 tcb->desc_num++; 1187 } 1188 1189 /* 1190 * ixgbe_tx_recycle_legacy 1191 * 1192 * Recycle the tx descriptors and tx control blocks. 1193 * 1194 * The work list is traversed to check if the corresponding 1195 * tx descriptors have been transmitted. If so, the resources 1196 * bound to the tx control blocks will be freed, and those 1197 * tx control blocks will be returned to the free list. 1198 */ 1199 uint32_t 1200 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1201 { 1202 uint32_t index, last_index, prev_index; 1203 int desc_num; 1204 boolean_t desc_done; 1205 tx_control_block_t *tcb; 1206 link_list_t pending_list; 1207 ixgbe_t *ixgbe = tx_ring->ixgbe; 1208 1209 mutex_enter(&tx_ring->recycle_lock); 1210 1211 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1212 1213 if (tx_ring->tbd_free == tx_ring->ring_size) { 1214 tx_ring->recycle_fail = 0; 1215 tx_ring->stall_watchdog = 0; 1216 if (tx_ring->reschedule) { 1217 tx_ring->reschedule = B_FALSE; 1218 mac_tx_ring_update(ixgbe->mac_hdl, 1219 tx_ring->ring_handle); 1220 } 1221 mutex_exit(&tx_ring->recycle_lock); 1222 return (0); 1223 } 1224 1225 /* 1226 * Sync the DMA buffer of the tx descriptor ring 1227 */ 1228 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1229 1230 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1231 mutex_exit(&tx_ring->recycle_lock); 1232 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1233 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1234 return (0); 1235 } 1236 1237 LINK_LIST_INIT(&pending_list); 1238 desc_num = 0; 1239 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1240 1241 tcb = tx_ring->work_list[index]; 1242 ASSERT(tcb != NULL); 1243 1244 while (tcb != NULL) { 1245 /* 1246 * Get the last tx descriptor of this packet. 1247 * If the last tx descriptor is done, then 1248 * we can recycle all descriptors of a packet 1249 * which usually includes several tx control blocks. 1250 * For 82599, LSO descriptors can not be recycled 1251 * unless the whole packet's transmission is done. 1252 * That's why packet level recycling is used here. 1253 * For 82598, there's not such limit. 1254 */ 1255 last_index = tcb->last_index; 1256 /* 1257 * MAX_TX_RING_SIZE is used to judge whether 1258 * the index is a valid value or not. 1259 */ 1260 if (last_index == MAX_TX_RING_SIZE) 1261 break; 1262 1263 /* 1264 * Check if the Descriptor Done bit is set 1265 */ 1266 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1267 IXGBE_TXD_STAT_DD; 1268 if (desc_done) { 1269 /* 1270 * recycle all descriptors of the packet 1271 */ 1272 while (tcb != NULL) { 1273 /* 1274 * Strip off the tx control block from 1275 * the work list, and add it to the 1276 * pending list. 1277 */ 1278 tx_ring->work_list[index] = NULL; 1279 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1280 1281 /* 1282 * Count the total number of the tx 1283 * descriptors recycled 1284 */ 1285 desc_num += tcb->desc_num; 1286 1287 index = NEXT_INDEX(index, tcb->desc_num, 1288 tx_ring->ring_size); 1289 1290 tcb = tx_ring->work_list[index]; 1291 1292 prev_index = PREV_INDEX(index, 1, 1293 tx_ring->ring_size); 1294 if (prev_index == last_index) 1295 break; 1296 } 1297 } else { 1298 break; 1299 } 1300 } 1301 1302 /* 1303 * If no tx descriptors are recycled, no need to do more processing 1304 */ 1305 if (desc_num == 0) { 1306 tx_ring->recycle_fail++; 1307 mutex_exit(&tx_ring->recycle_lock); 1308 return (0); 1309 } 1310 1311 tx_ring->recycle_fail = 0; 1312 tx_ring->stall_watchdog = 0; 1313 1314 /* 1315 * Update the head index of the tx descriptor ring 1316 */ 1317 tx_ring->tbd_head = index; 1318 1319 /* 1320 * Update the number of the free tx descriptors with atomic operations 1321 */ 1322 atomic_add_32(&tx_ring->tbd_free, desc_num); 1323 1324 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1325 (tx_ring->reschedule)) { 1326 tx_ring->reschedule = B_FALSE; 1327 mac_tx_ring_update(ixgbe->mac_hdl, 1328 tx_ring->ring_handle); 1329 } 1330 mutex_exit(&tx_ring->recycle_lock); 1331 1332 /* 1333 * Free the resources used by the tx control blocks 1334 * in the pending list 1335 */ 1336 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1337 while (tcb != NULL) { 1338 /* 1339 * Release the resources occupied by the tx control block 1340 */ 1341 ixgbe_free_tcb(tcb); 1342 1343 tcb = (tx_control_block_t *) 1344 LIST_GET_NEXT(&pending_list, &tcb->link); 1345 } 1346 1347 /* 1348 * Add the tx control blocks in the pending list to the free list. 1349 */ 1350 ixgbe_put_free_list(tx_ring, &pending_list); 1351 1352 return (desc_num); 1353 } 1354 1355 /* 1356 * ixgbe_tx_recycle_head_wb 1357 * 1358 * Check the head write-back, and recycle all the transmitted 1359 * tx descriptors and tx control blocks. 1360 */ 1361 uint32_t 1362 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1363 { 1364 uint32_t index; 1365 uint32_t head_wb; 1366 int desc_num; 1367 tx_control_block_t *tcb; 1368 link_list_t pending_list; 1369 ixgbe_t *ixgbe = tx_ring->ixgbe; 1370 1371 mutex_enter(&tx_ring->recycle_lock); 1372 1373 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1374 1375 if (tx_ring->tbd_free == tx_ring->ring_size) { 1376 tx_ring->recycle_fail = 0; 1377 tx_ring->stall_watchdog = 0; 1378 if (tx_ring->reschedule) { 1379 tx_ring->reschedule = B_FALSE; 1380 mac_tx_ring_update(ixgbe->mac_hdl, 1381 tx_ring->ring_handle); 1382 } 1383 mutex_exit(&tx_ring->recycle_lock); 1384 return (0); 1385 } 1386 1387 /* 1388 * Sync the DMA buffer of the tx descriptor ring 1389 * 1390 * Note: For head write-back mode, the tx descriptors will not 1391 * be written back, but the head write-back value is stored at 1392 * the last extra tbd at the end of the DMA area, we still need 1393 * to sync the head write-back value for kernel. 1394 * 1395 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1396 */ 1397 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1398 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1399 sizeof (uint32_t), 1400 DDI_DMA_SYNC_FORKERNEL); 1401 1402 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1403 mutex_exit(&tx_ring->recycle_lock); 1404 ddi_fm_service_impact(ixgbe->dip, 1405 DDI_SERVICE_DEGRADED); 1406 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1407 return (0); 1408 } 1409 1410 LINK_LIST_INIT(&pending_list); 1411 desc_num = 0; 1412 index = tx_ring->tbd_head; /* Next index to clean */ 1413 1414 /* 1415 * Get the value of head write-back 1416 */ 1417 head_wb = *tx_ring->tbd_head_wb; 1418 while (index != head_wb) { 1419 tcb = tx_ring->work_list[index]; 1420 ASSERT(tcb != NULL); 1421 1422 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1423 tcb->desc_num) { 1424 /* 1425 * The current tx control block is not 1426 * completely transmitted, stop recycling 1427 */ 1428 break; 1429 } 1430 1431 /* 1432 * Strip off the tx control block from the work list, 1433 * and add it to the pending list. 1434 */ 1435 tx_ring->work_list[index] = NULL; 1436 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1437 1438 /* 1439 * Advance the index of the tx descriptor ring 1440 */ 1441 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1442 1443 /* 1444 * Count the total number of the tx descriptors recycled 1445 */ 1446 desc_num += tcb->desc_num; 1447 } 1448 1449 /* 1450 * If no tx descriptors are recycled, no need to do more processing 1451 */ 1452 if (desc_num == 0) { 1453 tx_ring->recycle_fail++; 1454 mutex_exit(&tx_ring->recycle_lock); 1455 return (0); 1456 } 1457 1458 tx_ring->recycle_fail = 0; 1459 tx_ring->stall_watchdog = 0; 1460 1461 /* 1462 * Update the head index of the tx descriptor ring 1463 */ 1464 tx_ring->tbd_head = index; 1465 1466 /* 1467 * Update the number of the free tx descriptors with atomic operations 1468 */ 1469 atomic_add_32(&tx_ring->tbd_free, desc_num); 1470 1471 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1472 (tx_ring->reschedule)) { 1473 tx_ring->reschedule = B_FALSE; 1474 mac_tx_ring_update(ixgbe->mac_hdl, 1475 tx_ring->ring_handle); 1476 } 1477 mutex_exit(&tx_ring->recycle_lock); 1478 1479 /* 1480 * Free the resources used by the tx control blocks 1481 * in the pending list 1482 */ 1483 tcb = (tx_control_block_t *)LIST_GET_HEAD(&pending_list); 1484 while (tcb) { 1485 /* 1486 * Release the resources occupied by the tx control block 1487 */ 1488 ixgbe_free_tcb(tcb); 1489 1490 tcb = (tx_control_block_t *) 1491 LIST_GET_NEXT(&pending_list, &tcb->link); 1492 } 1493 1494 /* 1495 * Add the tx control blocks in the pending list to the free list. 1496 */ 1497 ixgbe_put_free_list(tx_ring, &pending_list); 1498 1499 return (desc_num); 1500 } 1501 1502 /* 1503 * ixgbe_free_tcb - free up the tx control block 1504 * 1505 * Free the resources of the tx control block, including 1506 * unbind the previously bound DMA handle, and reset other 1507 * control fields. 1508 */ 1509 void 1510 ixgbe_free_tcb(tx_control_block_t *tcb) 1511 { 1512 switch (tcb->tx_type) { 1513 case USE_COPY: 1514 /* 1515 * Reset the buffer length that is used for copy 1516 */ 1517 tcb->tx_buf.len = 0; 1518 break; 1519 case USE_DMA: 1520 /* 1521 * Release the DMA resource that is used for 1522 * DMA binding. 1523 */ 1524 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1525 break; 1526 default: 1527 break; 1528 } 1529 1530 /* 1531 * Free the mblk 1532 */ 1533 if (tcb->mp != NULL) { 1534 freemsg(tcb->mp); 1535 tcb->mp = NULL; 1536 } 1537 1538 tcb->tx_type = USE_NONE; 1539 tcb->last_index = MAX_TX_RING_SIZE; 1540 tcb->frag_num = 0; 1541 tcb->desc_num = 0; 1542 } 1543 1544 /* 1545 * ixgbe_get_free_list - Get a free tx control block from the free list 1546 * 1547 * The atomic operation on the number of the available tx control block 1548 * in the free list is used to keep this routine mutual exclusive with 1549 * the routine ixgbe_put_check_list. 1550 */ 1551 static tx_control_block_t * 1552 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring) 1553 { 1554 tx_control_block_t *tcb; 1555 1556 /* 1557 * Check and update the number of the free tx control block 1558 * in the free list. 1559 */ 1560 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) 1561 return (NULL); 1562 1563 mutex_enter(&tx_ring->tcb_head_lock); 1564 1565 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1566 ASSERT(tcb != NULL); 1567 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1568 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1569 tx_ring->free_list_size); 1570 1571 mutex_exit(&tx_ring->tcb_head_lock); 1572 1573 return (tcb); 1574 } 1575 1576 /* 1577 * ixgbe_put_free_list 1578 * 1579 * Put a list of used tx control blocks back to the free list 1580 * 1581 * A mutex is used here to ensure the serialization. The mutual exclusion 1582 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1583 * the atomic operation on the counter tcb_free. 1584 */ 1585 void 1586 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1587 { 1588 uint32_t index; 1589 int tcb_num; 1590 tx_control_block_t *tcb; 1591 1592 mutex_enter(&tx_ring->tcb_tail_lock); 1593 1594 index = tx_ring->tcb_tail; 1595 1596 tcb_num = 0; 1597 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1598 while (tcb != NULL) { 1599 ASSERT(tx_ring->free_list[index] == NULL); 1600 tx_ring->free_list[index] = tcb; 1601 1602 tcb_num++; 1603 1604 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1605 1606 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1607 } 1608 1609 tx_ring->tcb_tail = index; 1610 1611 /* 1612 * Update the number of the free tx control block 1613 * in the free list. This operation must be placed 1614 * under the protection of the lock. 1615 */ 1616 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1617 1618 mutex_exit(&tx_ring->tcb_tail_lock); 1619 } 1620