1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 29 * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. 30 * Copyright 2020 Joyent, Inc. 31 */ 32 33 #include "ixgbe_sw.h" 34 35 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t **, 36 link_list_t *, const void *, size_t); 37 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t **, 38 link_list_t *, uint8_t *, size_t); 39 static uint_t ixgbe_tcb_done(tx_control_block_t *); 40 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 41 ixgbe_tx_context_t *, size_t); 42 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 43 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *, 44 link_list_t *); 45 46 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 47 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 48 ixgbe_tx_context_t *); 49 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 50 ixgbe_tx_context_t *); 51 52 #ifndef IXGBE_DEBUG 53 #pragma inline(ixgbe_save_desc) 54 #pragma inline(ixgbe_get_context) 55 #pragma inline(ixgbe_check_context) 56 #pragma inline(ixgbe_fill_context) 57 #endif 58 59 /* 60 * ixgbe_ring_tx 61 * 62 * To transmit one mblk through one specified ring. 63 * 64 * One mblk can consist of several fragments, each fragment 65 * will be processed with different methods based on the size. 66 * For the fragments with size less than the bcopy threshold, 67 * they will be processed by using bcopy; otherwise, they will 68 * be processed by using DMA binding. 69 * 70 * To process the mblk, for each fragment, we pass a pointer to the location 71 * of the current transmit control block (tcb) (initialized to NULL) to either 72 * ixgbe_tx_copy() or ixgbe_tx_bind() (based on the size of the mblk fragment). 73 * ixgbe_tx_copy() and ixgbe_tx_bind() will either continue to use the current 74 * if possible, or close out the current tcb, allocate a new tcb, and update 75 * the passed location (tx_control_block_t **) to reflect the new current tcb. 76 * 77 * Since bound mblk fragments require their own tcb, the close, allocate new, 78 * and update steps occur on every call to ixgbe_tx_bind(), but since 79 * consecutive small mblk fragments can be combined into a single tcb, the 80 * close, allocate new, and update steps may not occur on every call to 81 * ixgbe_tx_copy(). If the current tcb is already being used to copy data and 82 * we call ixgbe_tx_copy(), if there is enough room in the current tcb for 83 * the current mblk fragment, we append the data from the mblk fragment. If 84 * we call ixgbe_tx_copy() and the current tcb isn't being used to copy (i.e. 85 * the previous iteration of the loop called ixgbe_tx_bind()), or doesn't 86 * have enough space for the mblk fragment, we close out the current tcb, 87 * grab a new tcb from the free list, and update the current tcb to the 88 * newly obtained tcb. 89 * 90 * When LSO (large segment offload) is enabled, we first copy the packet 91 * headers (ethernet, IP, and TCP/UDP) into their own descriptor before 92 * processing the remainder of the packet. The remaining bytes of the packet 93 * are then copied or mapped based on the fragment size as described above. 94 * 95 * Through the entire processing of a packet, we keep track of the number of 96 * DMA descriptors being used (either bound or pre-bound buffers used for 97 * copying) by this packet. Each tcb requires at least one DMA descriptor, but 98 * may require more than one. When a tcb is closed by ixgbe_tx_bind() or 99 * ixgbe_tx_copy(), it does so by calling ixgbe_tcb_done() which returns the 100 * number of DMA descriptors that are closed (ready for the HW). Since the 101 * hardware limits the number of descriptors that can be used to transmit a 102 * single packet, if the total number DMA descriptors required to transmit 103 * this packet exceeds this limit, we perform a msgpullup() and try again. 104 * Since our DMA attributes limit the number of DMA cookies allowed to 105 * map a single span of memory to a value (MAX_COOKIE) less than the 106 * maximum number of descriptors allowed for a packet (IXGBE_TX_DESC_LIMIT), 107 * as long as sufficient tcbs are available, we should always be able to 108 * process a packet that's contained in a single mblk_t (no additional 109 * fragments). 110 * 111 * Once all of the tcbs have been setup, ixgbe_tx_fill_ring() is called to 112 * setup the tx ring to transmit the tcbs and then tell the HW to start 113 * transmitting. When transmission is complete, an interrupt is triggered 114 * which calls the appropriate recycle routine to place the tcbs that were 115 * used in transmission back in the free list. We also may also try to 116 * recycle any available tcbs when the size of the tcb free list gets low 117 * or if the watchdog timer triggers. 118 * 119 */ 120 mblk_t * 121 ixgbe_ring_tx(void *arg, mblk_t *orig_mp) 122 { 123 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 124 ixgbe_t *ixgbe = tx_ring->ixgbe; 125 mblk_t *mp = orig_mp; 126 mblk_t *pull_mp = NULL; 127 tx_control_block_t *tcb; 128 size_t mbsize, offset, len; 129 uint32_t desc_total; 130 uint32_t copy_thresh; 131 int desc_num; 132 ixgbe_tx_context_t tx_context, *ctx = NULL; 133 link_list_t pending_list; 134 boolean_t limit_retry = B_FALSE; 135 136 ASSERT(mp->b_next == NULL); 137 138 if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || 139 (ixgbe->ixgbe_state & IXGBE_ERROR) || 140 (ixgbe->ixgbe_state & IXGBE_OVERTEMP) || 141 !(ixgbe->ixgbe_state & IXGBE_STARTED) || 142 ixgbe->link_state != LINK_STATE_UP) { 143 freemsg(mp); 144 return (NULL); 145 } 146 147 copy_thresh = ixgbe->tx_copy_thresh; 148 149 mbsize = msgsize(mp); 150 151 if (ixgbe->tx_hcksum_enable) { 152 /* 153 * Retrieve checksum context information from the mblk 154 * that will be used to decide whether/how to fill the 155 * context descriptor. 156 */ 157 ctx = &tx_context; 158 if (ixgbe_get_context(mp, ctx) < 0) { 159 freemsg(mp); 160 return (NULL); 161 } 162 163 /* 164 * If the mblk size exceeds the max size ixgbe could 165 * process, then discard this mblk, and return NULL. 166 */ 167 if ((ctx->lso_flag && 168 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 169 (!ctx->lso_flag && 170 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 171 freemsg(mp); 172 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 173 return (NULL); 174 } 175 } 176 177 /* 178 * If we use too many descriptors (see comments below), we may do 179 * pull_mp = msgpullup(orig_mp, -1), and jump back to here. As such, 180 * any time we error return past here, we should check and free 181 * pull_mp if != NULL. 182 */ 183 retry: 184 /* 185 * Check and recycle tx descriptors. 186 * The recycle threshold here should be selected carefully 187 */ 188 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 189 tx_ring->tx_recycle(tx_ring); 190 } 191 192 /* 193 * After the recycling, if the tbd_free is less than the 194 * overload_threshold, assert overload, return mp; 195 * and we need to re-schedule the tx again. 196 */ 197 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 198 tx_ring->reschedule = B_TRUE; 199 tx_ring->stat_overload++; 200 if (pull_mp != NULL) 201 freemsg(pull_mp); 202 return (orig_mp); 203 } 204 205 /* 206 * The pending_list is a linked list that is used to save 207 * the tx control blocks that have packet data processed 208 * but have not put the data to the tx descriptor ring. 209 * It is used to reduce the lock contention of the tx_lock. 210 */ 211 LINK_LIST_INIT(&pending_list); 212 213 tcb = NULL; 214 desc_num = 0; 215 desc_total = 0; 216 offset = 0; 217 218 /* 219 * For LSO, we always copy the packet header (Ethernet + IP + TCP/UDP) 220 * into a single descriptor separate from the remaining data. 221 */ 222 if ((ctx != NULL) && ctx->lso_flag) { 223 size_t hdr_len; 224 225 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 226 227 /* 228 * copy the first hdr_len bytes of mp (i.e. the Ethernet, IP, 229 * and TCP/UDP headers) into tcb. 230 */ 231 for (len = hdr_len; mp != NULL && len > 0; mp = mp->b_cont) { 232 size_t mlen = MBLKL(mp); 233 size_t amt = MIN(mlen, len); 234 int ret; 235 236 ret = ixgbe_tx_copy(tx_ring, &tcb, &pending_list, 237 mp->b_rptr, amt); 238 /* 239 * Since we're trying to copy all of the headers into 240 * a single buffer in a single tcb, if ixgbe_tx_copy() 241 * returns anything but 0, it means either no tcbs 242 * are available (< 0), or while copying, we spilled 243 * over and couldn't fit all the headers into a 244 * single tcb. 245 */ 246 if (ret != 0) { 247 if (ret > 0) 248 tx_ring->stat_lso_header_fail++; 249 goto tx_failure; 250 } 251 252 len -= amt; 253 254 /* 255 * If we copy less than the full amount of this 256 * mblk_t, we have some amount to copy below. 257 */ 258 if (amt < mlen) { 259 offset = amt; 260 break; 261 } 262 } 263 264 ASSERT0(len); 265 266 /* 267 * Finish off the header tcb, and start anew for the 268 * rest of the packet. 269 */ 270 desc_total += ixgbe_tcb_done(tcb); 271 tcb = NULL; 272 } 273 274 /* 275 * Process each remaining segment in the packet -- either binding 276 * the dblk_t or copying the contents of the dblk_t to an already 277 * bound buffer. When we copy, we will accumulate consecutive small 278 * (less than copy_thresh bytes) segments into a single tcb buffer 279 * until no more can fit (or we encounter a segment larger than 280 * copy_thresh and bind the dblk_t). 281 * 282 * Both ixgbe_tx_bind() and ixgbe_tx_copy() will allocate new 283 * transmit control blocks (tcb)s as needed (and append them onto 284 * 'pending_list'). Both functions also replace 'tcb' with the new 285 * tcb when they allocate a new tcb. 286 * 287 * We stop trying to process the packet once the number of descriptors 288 * used equals IXGBE_TX_DESC_LIMIT. Even if we're copying into the 289 * IXGBE_TX_DESC_LIMIT-th descriptor, we won't have room to add a 290 * context descriptor (since we're already at the limit), so there's 291 * no point in continuing. We'll pull up the mblk_t (see below) 292 * and try again. 293 */ 294 while (mp != NULL && desc_total < IXGBE_TX_DESC_LIMIT) { 295 uint8_t *rptr = mp->b_rptr + offset; 296 int ret; 297 298 len = MBLKL(mp) - offset; 299 offset = 0; 300 301 if (len > copy_thresh) { 302 ret = ixgbe_tx_bind(tx_ring, &tcb, &pending_list, rptr, 303 len); 304 } else { 305 ret = ixgbe_tx_copy(tx_ring, &tcb, &pending_list, rptr, 306 len); 307 } 308 309 if (ret < 0) 310 goto tx_failure; 311 312 desc_total += ret; 313 mp = mp->b_cont; 314 } 315 316 /* Finish off the last tcb */ 317 desc_total += ixgbe_tcb_done(tcb); 318 319 /* 320 * 82598/82599 chipset has a limitation that no more than 32 tx 321 * descriptors can be transmited out at one time. As noted above, 322 * we need to include space for a context descriptor in case its 323 * necessary, so we do this even if desc_total == IXGBE_TX_DESC_LIMIT 324 * as well as when it exceeds the limit. 325 * 326 * If we exceed this limit, we take the hit, do a msgpullup(), and 327 * then try again. Our DMA attributes guarantee we should never use 328 * more than MAX_COOKIE (18) descriptors to map a single mblk_t, so we 329 * should only need to retry once. 330 */ 331 if (desc_total >= IXGBE_TX_DESC_LIMIT) { 332 /* We shouldn't hit this path twice */ 333 VERIFY0(limit_retry); 334 335 tx_ring->stat_break_tbd_limit++; 336 337 /* Release all the tcbs we used previously */ 338 ixgbe_put_free_list(tx_ring, &pending_list); 339 desc_total = 0; 340 offset = 0; 341 342 pull_mp = msgpullup(orig_mp, -1); 343 if (pull_mp == NULL) { 344 tx_ring->reschedule = B_TRUE; 345 return (orig_mp); 346 } 347 348 mp = pull_mp; 349 limit_retry = B_TRUE; 350 goto retry; 351 } 352 353 /* 354 * Before filling the tx descriptor ring with the data, we need to 355 * ensure there are adequate free descriptors for transmit 356 * (including one context descriptor). 357 * Do not use up all the tx descriptors. 358 * Otherwise tx recycle will fail and cause false hang. 359 */ 360 if (tx_ring->tbd_free <= (desc_total + 1)) { 361 tx_ring->tx_recycle(tx_ring); 362 } 363 364 mutex_enter(&tx_ring->tx_lock); 365 /* 366 * If the number of free tx descriptors is not enough for transmit 367 * then return mp. 368 * 369 * Note: we must put this check under the mutex protection to 370 * ensure the correctness when multiple threads access it in 371 * parallel. 372 */ 373 if (tx_ring->tbd_free <= (desc_total + 1)) { 374 tx_ring->stat_fail_no_tbd++; 375 mutex_exit(&tx_ring->tx_lock); 376 goto tx_failure; 377 } 378 379 /* 380 * Attach the mblk_t we've setup to the last control block. 381 * This is only done once we know there are enough free descriptors 382 * to transmit so that the cleanup in tx_failure doesn't try to 383 * call freemsg() on mp (since we will want to return it). 384 */ 385 tcb->mp = (pull_mp != NULL) ? pull_mp : orig_mp; 386 387 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 388 mbsize); 389 390 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 391 392 tx_ring->stat_obytes += mbsize; 393 tx_ring->stat_opackets++; 394 395 mutex_exit(&tx_ring->tx_lock); 396 397 /* 398 * Now that tx is done, if we pulled up the original message, we 399 * can free the original message since it is no longer being 400 * used. 401 */ 402 if (pull_mp != NULL) { 403 freemsg(orig_mp); 404 } 405 406 return (NULL); 407 408 tx_failure: 409 /* 410 * If transmission fails, need to free the pulling up mblk. 411 */ 412 if (pull_mp) { 413 freemsg(pull_mp); 414 } 415 416 /* 417 * tcb->mp should not be set until we know we can transmit (see above), 418 * so it should always be NULL if we get here. 419 */ 420 VERIFY3P(tcb->mp, ==, NULL); 421 422 /* 423 * Return the tx control blocks in the pending list to the free list. 424 */ 425 ixgbe_put_free_list(tx_ring, &pending_list); 426 427 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 428 tx_ring->reschedule = B_TRUE; 429 430 return (orig_mp); 431 } 432 433 /* 434 * ixgbe_tx_copy 435 * 436 * Copy the mblk fragment to the pre-allocated tx buffer. Return -1 on error, 437 * otherwise return the number of descriptors we've completed in this call. 438 */ 439 static int 440 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t **tcbp, 441 link_list_t *pending_list, const void *buf, size_t len) 442 { 443 tx_control_block_t *tcb = *tcbp; 444 dma_buffer_t *tx_buf; 445 uint32_t desc_num = 0; 446 447 /* 448 * We need a new tcb -- either the current one (tcb) is NULL because 449 * we just started, tcb is being used for DMA, or tcb isn't large enough 450 * to hold the contents we need to copy. 451 */ 452 if (tcb == NULL || tcb->tx_type == USE_DMA || 453 tcb->tx_buf.len + len > tcb->tx_buf.size) { 454 tx_control_block_t *newtcb; 455 456 newtcb = ixgbe_get_free_list(tx_ring, pending_list); 457 if (newtcb == NULL) 458 return (-1); 459 460 newtcb->tx_type = USE_COPY; 461 462 if (tcb != NULL) 463 desc_num += ixgbe_tcb_done(tcb); 464 *tcbp = tcb = newtcb; 465 } 466 467 ASSERT3S(tcb->tx_type, ==, USE_COPY); 468 tx_buf = &tcb->tx_buf; 469 470 /* 471 * Copy the packet data of the mblk fragment into the 472 * pre-allocated tx buffer, which is maintained by the 473 * tx control block. 474 * 475 * Several mblk fragments can be copied into one tx buffer. 476 * The destination address of the current copied fragment in 477 * the tx buffer is next to the end of the previous copied 478 * fragment. 479 */ 480 if (len > 0) { 481 bcopy(buf, tx_buf->address + tx_buf->len, len); 482 483 tx_buf->len += len; 484 tcb->frag_num++; 485 } 486 487 return (desc_num); 488 } 489 490 /* 491 * ixgbe_tx_bind 492 * 493 * Bind the mblk fragment with DMA. Returns -1 on error, otherwise it 494 * returns the number of descriptors completed in this call. This count 495 * can include descriptors that weren't filled in by the current call to 496 * ixgbe_tx_bind() but were being used (but not yet completed) in previous 497 * calls to ixgbe_tx_bind() or ixgbe_tx_copy(). 498 */ 499 static int 500 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t **tcbp, 501 link_list_t *pending_list, uint8_t *buf, size_t len) 502 { 503 tx_control_block_t *tcb = NULL; 504 uint_t desc_num = 0; 505 int status; 506 507 tcb = ixgbe_get_free_list(tx_ring, pending_list); 508 if (tcb == NULL) 509 return (-1); 510 511 /* 512 * Use DMA binding to process the mblk fragment 513 */ 514 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 515 (caddr_t)buf, len, 516 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 517 0, NULL, NULL); 518 519 if (status != DDI_DMA_MAPPED) { 520 tx_ring->stat_fail_dma_bind++; 521 return (-1); 522 } 523 524 tcb->frag_num++; 525 tcb->tx_type = USE_DMA; 526 527 /* 528 * If there was an old tcb, we're about to replace it. Finish 529 * setting up the old tcb so we can replace it with the new one. 530 */ 531 if (*tcbp != NULL) 532 desc_num += ixgbe_tcb_done(*tcbp); 533 534 *tcbp = tcb; 535 return (desc_num); 536 } 537 538 /* 539 * Once we're done populating a tcb (either by binding or copying into 540 * a buffer in the tcb), get it ready for tx and return the number of 541 * descriptors used. 542 */ 543 static uint_t 544 ixgbe_tcb_done(tx_control_block_t *tcb) 545 { 546 uint_t desc_num = 0; 547 548 if (tcb->tx_type == USE_DMA) { 549 const ddi_dma_cookie_t *c; 550 551 for (c = ddi_dma_cookie_iter(tcb->tx_dma_handle, NULL); 552 c != NULL; 553 c = ddi_dma_cookie_iter(tcb->tx_dma_handle, c)) { 554 /* 555 * Save the address and length to the private data 556 * structure of the tx control block, which will be 557 * used to fill the tx descriptor ring after all the 558 * fragments are processed. 559 */ 560 ixgbe_save_desc(tcb, c->dmac_laddress, c->dmac_size); 561 desc_num++; 562 } 563 } else if (tcb->tx_type == USE_COPY) { 564 dma_buffer_t *tx_buf = &tcb->tx_buf; 565 566 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 567 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 568 desc_num++; 569 } else { 570 panic("invalid tcb type"); 571 } 572 573 return (desc_num); 574 } 575 576 /* 577 * ixgbe_get_context 578 * 579 * Get the context information from the mblk 580 */ 581 static int 582 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 583 { 584 uint32_t start; 585 uint32_t hckflags; 586 uint32_t lsoflags; 587 uint32_t lsocksum; 588 uint32_t mss; 589 uint32_t len; 590 uint32_t size; 591 uint32_t offset; 592 unsigned char *pos; 593 ushort_t etype; 594 uint32_t mac_hdr_len; 595 uint32_t l4_proto; 596 uint32_t l4_hdr_len; 597 598 ASSERT(mp != NULL); 599 600 mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); 601 bzero(ctx, sizeof (ixgbe_tx_context_t)); 602 603 if (hckflags == 0) { 604 return (0); 605 } 606 607 ctx->hcksum_flags = hckflags; 608 609 mac_lso_get(mp, &mss, &lsoflags); 610 ctx->mss = mss; 611 ctx->lso_flag = (lsoflags == HW_LSO); 612 613 etype = 0; 614 mac_hdr_len = 0; 615 l4_proto = 0; 616 617 /* 618 * Firstly get the position of the ether_type/ether_tpid. 619 * Here we don't assume the ether (VLAN) header is fully included 620 * in one mblk fragment, so we go thourgh the fragments to parse 621 * the ether type. 622 */ 623 size = len = MBLKL(mp); 624 offset = offsetof(struct ether_header, ether_type); 625 while (size <= offset) { 626 mp = mp->b_cont; 627 ASSERT(mp != NULL); 628 len = MBLKL(mp); 629 size += len; 630 } 631 pos = mp->b_rptr + offset + len - size; 632 633 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 634 if (etype == ETHERTYPE_VLAN) { 635 /* 636 * Get the position of the ether_type in VLAN header 637 */ 638 offset = offsetof(struct ether_vlan_header, ether_type); 639 while (size <= offset) { 640 mp = mp->b_cont; 641 ASSERT(mp != NULL); 642 len = MBLKL(mp); 643 size += len; 644 } 645 pos = mp->b_rptr + offset + len - size; 646 647 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 648 mac_hdr_len = sizeof (struct ether_vlan_header); 649 } else { 650 mac_hdr_len = sizeof (struct ether_header); 651 } 652 653 /* 654 * Here we don't assume the IP(V6) header is fully included in 655 * one mblk fragment. 656 */ 657 lsocksum = HCK_PARTIALCKSUM; 658 ctx->l3_proto = etype; 659 switch (etype) { 660 case ETHERTYPE_IP: 661 if (ctx->lso_flag) { 662 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 663 while (size <= offset) { 664 mp = mp->b_cont; 665 ASSERT(mp != NULL); 666 len = MBLKL(mp); 667 size += len; 668 } 669 pos = mp->b_rptr + offset + len - size; 670 *((uint16_t *)(uintptr_t)(pos)) = 0; 671 672 offset = offsetof(ipha_t, ipha_hdr_checksum) + 673 mac_hdr_len; 674 while (size <= offset) { 675 mp = mp->b_cont; 676 ASSERT(mp != NULL); 677 len = MBLKL(mp); 678 size += len; 679 } 680 pos = mp->b_rptr + offset + len - size; 681 *((uint16_t *)(uintptr_t)(pos)) = 0; 682 683 /* 684 * To perform ixgbe LSO, here also need to fill 685 * the tcp checksum field of the packet with the 686 * following pseudo-header checksum: 687 * (ip_source_addr, ip_destination_addr, l4_proto) 688 * Currently the tcp/ip stack has done it. 689 */ 690 lsocksum |= HCK_IPV4_HDRCKSUM; 691 } 692 693 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 694 while (size <= offset) { 695 mp = mp->b_cont; 696 ASSERT(mp != NULL); 697 len = MBLKL(mp); 698 size += len; 699 } 700 pos = mp->b_rptr + offset + len - size; 701 702 l4_proto = *(uint8_t *)pos; 703 break; 704 case ETHERTYPE_IPV6: 705 /* 706 * We need to zero out the length in the header. 707 */ 708 if (ctx->lso_flag) { 709 offset = offsetof(ip6_t, ip6_plen) + mac_hdr_len; 710 while (size <= offset) { 711 mp = mp->b_cont; 712 ASSERT(mp != NULL); 713 len = MBLKL(mp); 714 size += len; 715 } 716 pos = mp->b_rptr + offset + len - size; 717 *((uint16_t *)(uintptr_t)(pos)) = 0; 718 } 719 720 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 721 while (size <= offset) { 722 mp = mp->b_cont; 723 ASSERT(mp != NULL); 724 len = MBLKL(mp); 725 size += len; 726 } 727 pos = mp->b_rptr + offset + len - size; 728 729 l4_proto = *(uint8_t *)pos; 730 break; 731 default: 732 /* Unrecoverable error */ 733 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 734 return (-2); 735 } 736 737 if (ctx->lso_flag) { 738 /* 739 * LSO relies on tx h/w checksum, so here will drop the packet 740 * if h/w checksum flag is not declared. 741 */ 742 if ((ctx->hcksum_flags & lsocksum) != lsocksum) { 743 IXGBE_DEBUGLOG_2(NULL, "ixgbe_tx: h/w checksum flags " 744 "are not set for LSO, found 0x%x, needed bits 0x%x", 745 ctx->hcksum_flags, lsocksum); 746 return (-1); 747 } 748 749 750 offset = mac_hdr_len + start; 751 while (size <= offset) { 752 mp = mp->b_cont; 753 ASSERT(mp != NULL); 754 len = MBLKL(mp); 755 size += len; 756 } 757 pos = mp->b_rptr + offset + len - size; 758 759 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 760 } else { 761 /* 762 * l4 header length is only required for LSO 763 */ 764 l4_hdr_len = 0; 765 } 766 767 ctx->mac_hdr_len = mac_hdr_len; 768 ctx->ip_hdr_len = start; 769 ctx->l4_proto = l4_proto; 770 ctx->l4_hdr_len = l4_hdr_len; 771 772 return (0); 773 } 774 775 /* 776 * ixgbe_check_context 777 * 778 * Check if a new context descriptor is needed 779 */ 780 static boolean_t 781 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 782 { 783 ixgbe_tx_context_t *last; 784 785 if (ctx == NULL) 786 return (B_FALSE); 787 788 /* 789 * Compare the context data retrieved from the mblk and the 790 * stored data of the last context descriptor. The data need 791 * to be checked are: 792 * hcksum_flags 793 * l4_proto 794 * mac_hdr_len 795 * ip_hdr_len 796 * lso_flag 797 * mss (only checked for LSO) 798 * l4_hr_len (only checked for LSO) 799 * Either one of the above data is changed, a new context descriptor 800 * will be needed. 801 */ 802 last = &tx_ring->tx_context; 803 804 if ((ctx->hcksum_flags != last->hcksum_flags) || 805 (ctx->l4_proto != last->l4_proto) || 806 (ctx->l3_proto != last->l3_proto) || 807 (ctx->mac_hdr_len != last->mac_hdr_len) || 808 (ctx->ip_hdr_len != last->ip_hdr_len) || 809 (ctx->lso_flag != last->lso_flag) || 810 (ctx->lso_flag && ((ctx->mss != last->mss) || 811 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 812 return (B_TRUE); 813 } 814 815 return (B_FALSE); 816 } 817 818 /* 819 * ixgbe_fill_context 820 * 821 * Fill the context descriptor with hardware checksum informations 822 */ 823 static void 824 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 825 ixgbe_tx_context_t *ctx) 826 { 827 /* 828 * Fill the context descriptor with the checksum 829 * context information we've got. 830 */ 831 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 832 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 833 IXGBE_ADVTXD_MACLEN_SHIFT; 834 835 ctx_tbd->type_tucmd_mlhl = 836 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 837 /* 838 * When we have a TX context set up, we enforce that the ethertype is 839 * either IPv4 or IPv6 in ixgbe_get_tx_context(). 840 */ 841 if (ctx->lso_flag || ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) { 842 if (ctx->l3_proto == ETHERTYPE_IP) { 843 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 844 } else { 845 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 846 } 847 } 848 849 if (ctx->lso_flag || ctx->hcksum_flags & HCK_PARTIALCKSUM) { 850 switch (ctx->l4_proto) { 851 case IPPROTO_TCP: 852 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 853 break; 854 case IPPROTO_UDP: 855 /* 856 * We don't have to explicitly set: 857 * ctx_tbd->type_tucmd_mlhl |= 858 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 859 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 860 */ 861 break; 862 default: 863 /* Unrecoverable error */ 864 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 865 break; 866 } 867 } 868 869 ctx_tbd->seqnum_seed = 0; 870 871 if (ctx->lso_flag) { 872 ctx_tbd->mss_l4len_idx = 873 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 874 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 875 } else { 876 ctx_tbd->mss_l4len_idx = 0; 877 } 878 } 879 880 /* 881 * ixgbe_tx_fill_ring 882 * 883 * Fill the tx descriptor ring with the data 884 */ 885 static int 886 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 887 ixgbe_tx_context_t *ctx, size_t mbsize) 888 { 889 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 890 boolean_t load_context; 891 uint32_t index, tcb_index, desc_num; 892 union ixgbe_adv_tx_desc *tbd, *first_tbd; 893 tx_control_block_t *tcb, *first_tcb; 894 uint32_t hcksum_flags; 895 int i; 896 897 ASSERT(mutex_owned(&tx_ring->tx_lock)); 898 899 tbd = NULL; 900 first_tbd = NULL; 901 first_tcb = NULL; 902 desc_num = 0; 903 hcksum_flags = 0; 904 load_context = B_FALSE; 905 906 /* 907 * Get the index of the first tx descriptor that will be filled, 908 * and the index of the first work list item that will be attached 909 * with the first used tx control block in the pending list. 910 * Note: the two indexes are the same. 911 */ 912 index = tx_ring->tbd_tail; 913 tcb_index = tx_ring->tbd_tail; 914 915 if (ctx != NULL) { 916 hcksum_flags = ctx->hcksum_flags; 917 918 /* 919 * Check if a new context descriptor is needed for this packet 920 */ 921 load_context = ixgbe_check_context(tx_ring, ctx); 922 923 if (load_context) { 924 tbd = &tx_ring->tbd_ring[index]; 925 926 /* 927 * Fill the context descriptor with the 928 * hardware checksum offload informations. 929 */ 930 ixgbe_fill_context( 931 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 932 933 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 934 desc_num++; 935 936 /* 937 * Store the checksum context data if 938 * a new context descriptor is added 939 */ 940 tx_ring->tx_context = *ctx; 941 } 942 } 943 944 first_tbd = &tx_ring->tbd_ring[index]; 945 946 /* 947 * Fill tx data descriptors with the data saved in the pending list. 948 * The tx control blocks in the pending list are added to the work list 949 * at the same time. 950 * 951 * The work list is strictly 1:1 corresponding to the descriptor ring. 952 * One item of the work list corresponds to one tx descriptor. Because 953 * one tx control block can span multiple tx descriptors, the tx 954 * control block will be added to the first work list item that 955 * corresponds to the first tx descriptor generated from that tx 956 * control block. 957 */ 958 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 959 first_tcb = tcb; 960 while (tcb != NULL) { 961 962 for (i = 0; i < tcb->desc_num; i++) { 963 tbd = &tx_ring->tbd_ring[index]; 964 965 tbd->read.buffer_addr = tcb->desc[i].address; 966 tbd->read.cmd_type_len = tcb->desc[i].length; 967 968 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 969 | IXGBE_ADVTXD_DTYP_DATA; 970 971 tbd->read.olinfo_status = 0; 972 973 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 974 desc_num++; 975 } 976 977 /* 978 * Add the tx control block to the work list 979 */ 980 ASSERT(tx_ring->work_list[tcb_index] == NULL); 981 tx_ring->work_list[tcb_index] = tcb; 982 983 tcb_index = index; 984 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 985 } 986 987 if (load_context) { 988 /* 989 * Count the context descriptor for 990 * the first tx control block. 991 */ 992 first_tcb->desc_num++; 993 } 994 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 995 996 /* 997 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 998 * valid in the first descriptor of the packet. 999 * Setting paylen in every first_tbd for all parts. 1000 * 82599, X540 and X550 require the packet length in paylen field 1001 * with or without LSO and 82598 will ignore it in non-LSO mode. 1002 */ 1003 ASSERT(first_tbd != NULL); 1004 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 1005 1006 switch (hw->mac.type) { 1007 case ixgbe_mac_82598EB: 1008 if (ctx != NULL && ctx->lso_flag) { 1009 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1010 first_tbd->read.olinfo_status |= 1011 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1012 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1013 } 1014 break; 1015 1016 case ixgbe_mac_82599EB: 1017 case ixgbe_mac_X540: 1018 case ixgbe_mac_X550: 1019 case ixgbe_mac_X550EM_x: 1020 case ixgbe_mac_X550EM_a: 1021 if (ctx != NULL && ctx->lso_flag) { 1022 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1023 first_tbd->read.olinfo_status |= 1024 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1025 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1026 } else { 1027 first_tbd->read.olinfo_status |= 1028 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1029 } 1030 break; 1031 1032 default: 1033 break; 1034 } 1035 1036 /* Set hardware checksum bits */ 1037 if (hcksum_flags != 0) { 1038 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1039 first_tbd->read.olinfo_status |= 1040 IXGBE_ADVTXD_POPTS_IXSM; 1041 if (hcksum_flags & HCK_PARTIALCKSUM) 1042 first_tbd->read.olinfo_status |= 1043 IXGBE_ADVTXD_POPTS_TXSM; 1044 } 1045 1046 /* 1047 * The last descriptor of packet needs End Of Packet (EOP), 1048 * and Report Status (RS) bits set 1049 */ 1050 ASSERT(tbd != NULL); 1051 tbd->read.cmd_type_len |= 1052 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1053 1054 /* 1055 * Sync the DMA buffer of the tx descriptor ring 1056 */ 1057 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1058 1059 /* 1060 * Update the number of the free tx descriptors. 1061 * The mutual exclusion between the transmission and the recycling 1062 * (for the tx descriptor ring and the work list) is implemented 1063 * with the atomic operation on the number of the free tx descriptors. 1064 * 1065 * Note: we should always decrement the counter tbd_free before 1066 * advancing the hardware TDT pointer to avoid the race condition - 1067 * before the counter tbd_free is decremented, the transmit of the 1068 * tx descriptors has done and the counter tbd_free is increased by 1069 * the tx recycling. 1070 */ 1071 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1072 ASSERT(i >= 0); 1073 1074 tx_ring->tbd_tail = index; 1075 1076 /* 1077 * Advance the hardware TDT pointer of the tx descriptor ring 1078 */ 1079 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1080 1081 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1082 DDI_FM_OK) { 1083 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1084 DDI_SERVICE_DEGRADED); 1085 atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); 1086 } 1087 1088 return (desc_num); 1089 } 1090 1091 /* 1092 * ixgbe_save_desc 1093 * 1094 * Save the address/length pair to the private array 1095 * of the tx control block. The address/length pairs 1096 * will be filled into the tx descriptor ring later. 1097 */ 1098 static void 1099 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1100 { 1101 sw_desc_t *desc; 1102 1103 desc = &tcb->desc[tcb->desc_num]; 1104 desc->address = address; 1105 desc->length = length; 1106 1107 tcb->desc_num++; 1108 } 1109 1110 /* 1111 * ixgbe_tx_recycle_legacy 1112 * 1113 * Recycle the tx descriptors and tx control blocks. 1114 * 1115 * The work list is traversed to check if the corresponding 1116 * tx descriptors have been transmitted. If so, the resources 1117 * bound to the tx control blocks will be freed, and those 1118 * tx control blocks will be returned to the free list. 1119 */ 1120 uint32_t 1121 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1122 { 1123 uint32_t index, last_index, prev_index; 1124 int desc_num; 1125 boolean_t desc_done; 1126 tx_control_block_t *tcb; 1127 link_list_t pending_list; 1128 ixgbe_t *ixgbe = tx_ring->ixgbe; 1129 1130 mutex_enter(&tx_ring->recycle_lock); 1131 1132 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1133 1134 if (tx_ring->tbd_free == tx_ring->ring_size) { 1135 tx_ring->recycle_fail = 0; 1136 tx_ring->stall_watchdog = 0; 1137 if (tx_ring->reschedule) { 1138 tx_ring->reschedule = B_FALSE; 1139 mac_tx_ring_update(ixgbe->mac_hdl, 1140 tx_ring->ring_handle); 1141 } 1142 mutex_exit(&tx_ring->recycle_lock); 1143 return (0); 1144 } 1145 1146 /* 1147 * Sync the DMA buffer of the tx descriptor ring 1148 */ 1149 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1150 1151 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1152 mutex_exit(&tx_ring->recycle_lock); 1153 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1154 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1155 return (0); 1156 } 1157 1158 LINK_LIST_INIT(&pending_list); 1159 desc_num = 0; 1160 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1161 1162 tcb = tx_ring->work_list[index]; 1163 ASSERT(tcb != NULL); 1164 1165 while (tcb != NULL) { 1166 /* 1167 * Get the last tx descriptor of this packet. 1168 * If the last tx descriptor is done, then 1169 * we can recycle all descriptors of a packet 1170 * which usually includes several tx control blocks. 1171 * For 82599, LSO descriptors can not be recycled 1172 * unless the whole packet's transmission is done. 1173 * That's why packet level recycling is used here. 1174 * For 82598, there's not such limit. 1175 */ 1176 last_index = tcb->last_index; 1177 /* 1178 * MAX_TX_RING_SIZE is used to judge whether 1179 * the index is a valid value or not. 1180 */ 1181 if (last_index == MAX_TX_RING_SIZE) 1182 break; 1183 1184 /* 1185 * Check if the Descriptor Done bit is set 1186 */ 1187 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1188 IXGBE_TXD_STAT_DD; 1189 if (desc_done) { 1190 /* 1191 * recycle all descriptors of the packet 1192 */ 1193 while (tcb != NULL) { 1194 /* 1195 * Strip off the tx control block from 1196 * the work list, and add it to the 1197 * pending list. 1198 */ 1199 tx_ring->work_list[index] = NULL; 1200 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1201 1202 /* 1203 * Count the total number of the tx 1204 * descriptors recycled 1205 */ 1206 desc_num += tcb->desc_num; 1207 1208 index = NEXT_INDEX(index, tcb->desc_num, 1209 tx_ring->ring_size); 1210 1211 tcb = tx_ring->work_list[index]; 1212 1213 prev_index = PREV_INDEX(index, 1, 1214 tx_ring->ring_size); 1215 if (prev_index == last_index) 1216 break; 1217 } 1218 } else { 1219 break; 1220 } 1221 } 1222 1223 /* 1224 * If no tx descriptors are recycled, no need to do more processing 1225 */ 1226 if (desc_num == 0) { 1227 tx_ring->recycle_fail++; 1228 mutex_exit(&tx_ring->recycle_lock); 1229 return (0); 1230 } 1231 1232 tx_ring->recycle_fail = 0; 1233 tx_ring->stall_watchdog = 0; 1234 1235 /* 1236 * Update the head index of the tx descriptor ring 1237 */ 1238 tx_ring->tbd_head = index; 1239 1240 /* 1241 * Update the number of the free tx descriptors with atomic operations 1242 */ 1243 atomic_add_32(&tx_ring->tbd_free, desc_num); 1244 1245 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1246 (tx_ring->reschedule)) { 1247 tx_ring->reschedule = B_FALSE; 1248 mac_tx_ring_update(ixgbe->mac_hdl, 1249 tx_ring->ring_handle); 1250 } 1251 mutex_exit(&tx_ring->recycle_lock); 1252 1253 /* 1254 * Add the tx control blocks in the pending list to the free list. 1255 */ 1256 ixgbe_put_free_list(tx_ring, &pending_list); 1257 1258 return (desc_num); 1259 } 1260 1261 /* 1262 * ixgbe_tx_recycle_head_wb 1263 * 1264 * Check the head write-back, and recycle all the transmitted 1265 * tx descriptors and tx control blocks. 1266 */ 1267 uint32_t 1268 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1269 { 1270 uint32_t index; 1271 uint32_t head_wb; 1272 int desc_num; 1273 tx_control_block_t *tcb; 1274 link_list_t pending_list; 1275 ixgbe_t *ixgbe = tx_ring->ixgbe; 1276 1277 mutex_enter(&tx_ring->recycle_lock); 1278 1279 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1280 1281 if (tx_ring->tbd_free == tx_ring->ring_size) { 1282 tx_ring->recycle_fail = 0; 1283 tx_ring->stall_watchdog = 0; 1284 if (tx_ring->reschedule) { 1285 tx_ring->reschedule = B_FALSE; 1286 mac_tx_ring_update(ixgbe->mac_hdl, 1287 tx_ring->ring_handle); 1288 } 1289 mutex_exit(&tx_ring->recycle_lock); 1290 return (0); 1291 } 1292 1293 /* 1294 * Sync the DMA buffer of the tx descriptor ring 1295 * 1296 * Note: For head write-back mode, the tx descriptors will not 1297 * be written back, but the head write-back value is stored at 1298 * the last extra tbd at the end of the DMA area, we still need 1299 * to sync the head write-back value for kernel. 1300 * 1301 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1302 */ 1303 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1304 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1305 sizeof (uint32_t), 1306 DDI_DMA_SYNC_FORKERNEL); 1307 1308 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1309 mutex_exit(&tx_ring->recycle_lock); 1310 ddi_fm_service_impact(ixgbe->dip, 1311 DDI_SERVICE_DEGRADED); 1312 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1313 return (0); 1314 } 1315 1316 LINK_LIST_INIT(&pending_list); 1317 desc_num = 0; 1318 index = tx_ring->tbd_head; /* Next index to clean */ 1319 1320 /* 1321 * Get the value of head write-back 1322 */ 1323 head_wb = *tx_ring->tbd_head_wb; 1324 while (index != head_wb) { 1325 tcb = tx_ring->work_list[index]; 1326 ASSERT(tcb != NULL); 1327 1328 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1329 tcb->desc_num) { 1330 /* 1331 * The current tx control block is not 1332 * completely transmitted, stop recycling 1333 */ 1334 break; 1335 } 1336 1337 /* 1338 * Strip off the tx control block from the work list, 1339 * and add it to the pending list. 1340 */ 1341 tx_ring->work_list[index] = NULL; 1342 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1343 1344 /* 1345 * Advance the index of the tx descriptor ring 1346 */ 1347 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1348 1349 /* 1350 * Count the total number of the tx descriptors recycled 1351 */ 1352 desc_num += tcb->desc_num; 1353 } 1354 1355 /* 1356 * If no tx descriptors are recycled, no need to do more processing 1357 */ 1358 if (desc_num == 0) { 1359 tx_ring->recycle_fail++; 1360 mutex_exit(&tx_ring->recycle_lock); 1361 return (0); 1362 } 1363 1364 tx_ring->recycle_fail = 0; 1365 tx_ring->stall_watchdog = 0; 1366 1367 /* 1368 * Update the head index of the tx descriptor ring 1369 */ 1370 tx_ring->tbd_head = index; 1371 1372 /* 1373 * Update the number of the free tx descriptors with atomic operations 1374 */ 1375 atomic_add_32(&tx_ring->tbd_free, desc_num); 1376 1377 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1378 (tx_ring->reschedule)) { 1379 tx_ring->reschedule = B_FALSE; 1380 mac_tx_ring_update(ixgbe->mac_hdl, 1381 tx_ring->ring_handle); 1382 } 1383 mutex_exit(&tx_ring->recycle_lock); 1384 1385 /* 1386 * Add the tx control blocks in the pending list to the free list. 1387 */ 1388 ixgbe_put_free_list(tx_ring, &pending_list); 1389 1390 return (desc_num); 1391 } 1392 1393 /* 1394 * ixgbe_free_tcb - free up the tx control block 1395 * 1396 * Free the resources of the tx control block, including 1397 * unbind the previously bound DMA handle, and reset other 1398 * control fields. 1399 */ 1400 void 1401 ixgbe_free_tcb(tx_control_block_t *tcb) 1402 { 1403 if (tcb == NULL) 1404 return; 1405 1406 switch (tcb->tx_type) { 1407 case USE_COPY: 1408 /* 1409 * Reset the buffer length that is used for copy 1410 */ 1411 tcb->tx_buf.len = 0; 1412 break; 1413 case USE_DMA: 1414 /* 1415 * Release the DMA resource that is used for 1416 * DMA binding. 1417 */ 1418 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1419 break; 1420 default: 1421 break; 1422 } 1423 1424 /* 1425 * Free the mblk 1426 */ 1427 if (tcb->mp != NULL) { 1428 freemsg(tcb->mp); 1429 tcb->mp = NULL; 1430 } 1431 1432 tcb->tx_type = USE_NONE; 1433 tcb->last_index = MAX_TX_RING_SIZE; 1434 tcb->frag_num = 0; 1435 tcb->desc_num = 0; 1436 } 1437 1438 /* 1439 * ixgbe_get_free_list - Get a free tx control block from the free list. 1440 * Returns the tx control block and appends it to list. 1441 * 1442 * The atomic operation on the number of the available tx control block 1443 * in the free list is used to keep this routine mutual exclusive with 1444 * the routine ixgbe_put_check_list. 1445 */ 1446 static tx_control_block_t * 1447 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *list) 1448 { 1449 tx_control_block_t *tcb; 1450 1451 /* 1452 * Check and update the number of the free tx control block 1453 * in the free list. 1454 */ 1455 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) { 1456 tx_ring->stat_fail_no_tcb++; 1457 return (NULL); 1458 } 1459 1460 mutex_enter(&tx_ring->tcb_head_lock); 1461 1462 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1463 ASSERT(tcb != NULL); 1464 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1465 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1466 tx_ring->free_list_size); 1467 1468 mutex_exit(&tx_ring->tcb_head_lock); 1469 1470 LIST_PUSH_TAIL(list, &tcb->link); 1471 return (tcb); 1472 } 1473 1474 /* 1475 * ixgbe_put_free_list 1476 * 1477 * Put a list of used tx control blocks back to the free list 1478 * 1479 * A mutex is used here to ensure the serialization. The mutual exclusion 1480 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1481 * the atomic operation on the counter tcb_free. 1482 */ 1483 void 1484 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1485 { 1486 uint32_t index; 1487 int tcb_num; 1488 tx_control_block_t *tcb; 1489 1490 for (tcb = (tx_control_block_t *)LIST_GET_HEAD(pending_list); 1491 tcb != NULL; 1492 tcb = (tx_control_block_t *)LIST_GET_NEXT(pending_list, tcb)) { 1493 /* 1494 * Despite the name, ixgbe_free_tcb() just releases the 1495 * resources in tcb, but does not free tcb itself. 1496 */ 1497 ixgbe_free_tcb(tcb); 1498 } 1499 1500 mutex_enter(&tx_ring->tcb_tail_lock); 1501 1502 index = tx_ring->tcb_tail; 1503 1504 tcb_num = 0; 1505 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1506 while (tcb != NULL) { 1507 ASSERT(tx_ring->free_list[index] == NULL); 1508 tx_ring->free_list[index] = tcb; 1509 1510 tcb_num++; 1511 1512 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1513 1514 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1515 } 1516 1517 tx_ring->tcb_tail = index; 1518 1519 /* 1520 * Update the number of the free tx control block 1521 * in the free list. This operation must be placed 1522 * under the protection of the lock. 1523 */ 1524 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1525 1526 mutex_exit(&tx_ring->tcb_tail_lock); 1527 } 1528