1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright(c) 2007-2010 Intel Corporation. All rights reserved. 24 */ 25 26 /* 27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 28 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 29 * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved. 30 * Copyright 2021 Joyent, Inc. 31 */ 32 33 #include "ixgbe_sw.h" 34 35 static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t **, 36 link_list_t *, const void *, size_t); 37 static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t **, 38 link_list_t *, uint8_t *, size_t); 39 static uint_t ixgbe_tcb_done(tx_control_block_t *); 40 static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *, 41 ixgbe_tx_context_t *, size_t); 42 static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t); 43 static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *, 44 link_list_t *); 45 46 static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *); 47 static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *, 48 ixgbe_tx_context_t *); 49 static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *, 50 ixgbe_tx_context_t *); 51 52 /* 53 * ixgbe_ring_tx 54 * 55 * To transmit one mblk through one specified ring. 56 * 57 * One mblk can consist of several fragments, each fragment 58 * will be processed with different methods based on the size. 59 * For the fragments with size less than the bcopy threshold, 60 * they will be processed by using bcopy; otherwise, they will 61 * be processed by using DMA binding. 62 * 63 * To process the mblk, for each fragment, we pass a pointer to the location 64 * of the current transmit control block (tcb) (initialized to NULL) to either 65 * ixgbe_tx_copy() or ixgbe_tx_bind() (based on the size of the mblk fragment). 66 * ixgbe_tx_copy() and ixgbe_tx_bind() will either continue to use the current 67 * if possible, or close out the current tcb, allocate a new tcb, and update 68 * the passed location (tx_control_block_t **) to reflect the new current tcb. 69 * 70 * Since bound mblk fragments require their own tcb, the close, allocate new, 71 * and update steps occur on every call to ixgbe_tx_bind(), but since 72 * consecutive small mblk fragments can be combined into a single tcb, the 73 * close, allocate new, and update steps may not occur on every call to 74 * ixgbe_tx_copy(). If the current tcb is already being used to copy data and 75 * we call ixgbe_tx_copy(), if there is enough room in the current tcb for 76 * the current mblk fragment, we append the data from the mblk fragment. If 77 * we call ixgbe_tx_copy() and the current tcb isn't being used to copy (i.e. 78 * the previous iteration of the loop called ixgbe_tx_bind()), or doesn't 79 * have enough space for the mblk fragment, we close out the current tcb, 80 * grab a new tcb from the free list, and update the current tcb to the 81 * newly obtained tcb. 82 * 83 * When LSO (large segment offload) is enabled, we first copy the packet 84 * headers (ethernet, IP, and TCP/UDP) into their own descriptor before 85 * processing the remainder of the packet. The remaining bytes of the packet 86 * are then copied or mapped based on the fragment size as described above. 87 * 88 * Through the entire processing of a packet, we keep track of the number of 89 * DMA descriptors being used (either bound or pre-bound buffers used for 90 * copying) by this packet. Each tcb requires at least one DMA descriptor, but 91 * may require more than one. When a tcb is closed by ixgbe_tx_bind() or 92 * ixgbe_tx_copy(), it does so by calling ixgbe_tcb_done() which returns the 93 * number of DMA descriptors that are closed (ready for the HW). Since the 94 * hardware limits the number of descriptors that can be used to transmit a 95 * single packet, if the total number DMA descriptors required to transmit 96 * this packet exceeds this limit, we perform a msgpullup() and try again. 97 * Since our DMA attributes limit the number of DMA cookies allowed to 98 * map a single span of memory to a value (MAX_COOKIE) less than the 99 * maximum number of descriptors allowed for a packet (IXGBE_TX_DESC_LIMIT), 100 * as long as sufficient tcbs are available, we should always be able to 101 * process a packet that's contained in a single mblk_t (no additional 102 * fragments). 103 * 104 * Once all of the tcbs have been setup, ixgbe_tx_fill_ring() is called to 105 * setup the tx ring to transmit the tcbs and then tell the HW to start 106 * transmitting. When transmission is complete, an interrupt is triggered 107 * which calls the appropriate recycle routine to place the tcbs that were 108 * used in transmission back in the free list. We also may also try to 109 * recycle any available tcbs when the size of the tcb free list gets low 110 * or if the watchdog timer triggers. 111 * 112 */ 113 mblk_t * 114 ixgbe_ring_tx(void *arg, mblk_t *orig_mp) 115 { 116 ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg; 117 ixgbe_t *ixgbe = tx_ring->ixgbe; 118 mblk_t *mp = orig_mp; 119 mblk_t *pull_mp = NULL; 120 tx_control_block_t *tcb; 121 size_t mbsize, offset, len; 122 uint32_t desc_total; 123 uint32_t copy_thresh; 124 int desc_num; 125 ixgbe_tx_context_t tx_context, *ctx = NULL; 126 link_list_t pending_list; 127 boolean_t limit_retry = B_FALSE; 128 129 ASSERT(mp->b_next == NULL); 130 131 if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) || 132 (ixgbe->ixgbe_state & IXGBE_ERROR) || 133 (ixgbe->ixgbe_state & IXGBE_OVERTEMP) || 134 !(ixgbe->ixgbe_state & IXGBE_STARTED) || 135 ixgbe->link_state != LINK_STATE_UP) { 136 freemsg(mp); 137 return (NULL); 138 } 139 140 copy_thresh = ixgbe->tx_copy_thresh; 141 142 mbsize = msgsize(mp); 143 144 if (ixgbe->tx_hcksum_enable) { 145 /* 146 * Retrieve checksum context information from the mblk 147 * that will be used to decide whether/how to fill the 148 * context descriptor. 149 */ 150 ctx = &tx_context; 151 if (ixgbe_get_context(mp, ctx) < 0) { 152 freemsg(mp); 153 return (NULL); 154 } 155 156 /* 157 * If the mblk size exceeds the max size ixgbe could 158 * process, then discard this mblk, and return NULL. 159 */ 160 if ((ctx->lso_flag && 161 ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) || 162 (!ctx->lso_flag && 163 (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) { 164 freemsg(mp); 165 IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize"); 166 return (NULL); 167 } 168 } 169 170 /* 171 * If we use too many descriptors (see comments below), we may do 172 * pull_mp = msgpullup(orig_mp, -1), and jump back to here. As such, 173 * any time we error return past here, we should check and free 174 * pull_mp if != NULL. 175 */ 176 retry: 177 /* 178 * Check and recycle tx descriptors. 179 * The recycle threshold here should be selected carefully 180 */ 181 if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) { 182 tx_ring->tx_recycle(tx_ring); 183 } 184 185 /* 186 * After the recycling, if the tbd_free is less than the 187 * overload_threshold, assert overload, return mp; 188 * and we need to re-schedule the tx again. 189 */ 190 if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) { 191 tx_ring->reschedule = B_TRUE; 192 tx_ring->stat_overload++; 193 if (pull_mp != NULL) 194 freemsg(pull_mp); 195 return (orig_mp); 196 } 197 198 /* 199 * The pending_list is a linked list that is used to save 200 * the tx control blocks that have packet data processed 201 * but have not put the data to the tx descriptor ring. 202 * It is used to reduce the lock contention of the tx_lock. 203 */ 204 LINK_LIST_INIT(&pending_list); 205 206 tcb = NULL; 207 desc_num = 0; 208 desc_total = 0; 209 offset = 0; 210 211 /* 212 * For LSO, we always copy the packet header (Ethernet + IP + TCP/UDP) 213 * into a single descriptor separate from the remaining data. 214 */ 215 if ((ctx != NULL) && ctx->lso_flag) { 216 size_t hdr_len; 217 218 hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len; 219 220 /* 221 * copy the first hdr_len bytes of mp (i.e. the Ethernet, IP, 222 * and TCP/UDP headers) into tcb. 223 */ 224 for (len = hdr_len; mp != NULL && len > 0; mp = mp->b_cont) { 225 size_t mlen = MBLKL(mp); 226 size_t amt = MIN(mlen, len); 227 int ret; 228 229 ret = ixgbe_tx_copy(tx_ring, &tcb, &pending_list, 230 mp->b_rptr, amt); 231 /* 232 * Since we're trying to copy all of the headers into 233 * a single buffer in a single tcb, if ixgbe_tx_copy() 234 * returns anything but 0, it means either no tcbs 235 * are available (< 0), or while copying, we spilled 236 * over and couldn't fit all the headers into a 237 * single tcb. 238 */ 239 if (ret != 0) { 240 if (ret > 0) 241 tx_ring->stat_lso_header_fail++; 242 goto tx_failure; 243 } 244 245 len -= amt; 246 247 /* 248 * If we copy less than the full amount of this 249 * mblk_t, we have some amount to copy below. 250 */ 251 if (amt < mlen) { 252 offset = amt; 253 break; 254 } 255 } 256 257 ASSERT0(len); 258 259 /* 260 * Finish off the header tcb, and start anew for the 261 * rest of the packet. 262 */ 263 desc_total += ixgbe_tcb_done(tcb); 264 tcb = NULL; 265 } 266 267 /* 268 * Process each remaining segment in the packet -- either binding 269 * the dblk_t or copying the contents of the dblk_t to an already 270 * bound buffer. When we copy, we will accumulate consecutive small 271 * (less than copy_thresh bytes) segments into a single tcb buffer 272 * until no more can fit (or we encounter a segment larger than 273 * copy_thresh and bind the dblk_t). 274 * 275 * Both ixgbe_tx_bind() and ixgbe_tx_copy() will allocate new 276 * transmit control blocks (tcb)s as needed (and append them onto 277 * 'pending_list'). Both functions also replace 'tcb' with the new 278 * tcb when they allocate a new tcb. 279 * 280 * We stop trying to process the packet once the number of descriptors 281 * used equals IXGBE_TX_DESC_LIMIT. Even if we're copying into the 282 * IXGBE_TX_DESC_LIMIT-th descriptor, we won't have room to add a 283 * context descriptor (since we're already at the limit), so there's 284 * no point in continuing. We'll pull up the mblk_t (see below) 285 * and try again. 286 */ 287 while (mp != NULL && desc_total < IXGBE_TX_DESC_LIMIT) { 288 uint8_t *rptr = mp->b_rptr + offset; 289 int ret; 290 291 len = MBLKL(mp) - offset; 292 offset = 0; 293 294 if (len > copy_thresh) { 295 ret = ixgbe_tx_bind(tx_ring, &tcb, &pending_list, rptr, 296 len); 297 } else { 298 ret = ixgbe_tx_copy(tx_ring, &tcb, &pending_list, rptr, 299 len); 300 } 301 302 if (ret < 0) 303 goto tx_failure; 304 305 desc_total += ret; 306 mp = mp->b_cont; 307 } 308 309 /* Finish off the last tcb */ 310 desc_total += ixgbe_tcb_done(tcb); 311 312 /* 313 * 82598/82599 chipset has a limitation that no more than 32 tx 314 * descriptors can be transmited out at one time. As noted above, 315 * we need to include space for a context descriptor in case its 316 * necessary, so we do this even if desc_total == IXGBE_TX_DESC_LIMIT 317 * as well as when it exceeds the limit. 318 * 319 * If we exceed this limit, we take the hit, do a msgpullup(), and 320 * then try again. Our DMA attributes guarantee we should never use 321 * more than MAX_COOKIE (18) descriptors to map a single mblk_t, so we 322 * should only need to retry once. 323 */ 324 if (desc_total >= IXGBE_TX_DESC_LIMIT) { 325 /* We shouldn't hit this path twice */ 326 VERIFY0(limit_retry); 327 328 tx_ring->stat_break_tbd_limit++; 329 330 /* Release all the tcbs we used previously */ 331 ixgbe_put_free_list(tx_ring, &pending_list); 332 desc_total = 0; 333 offset = 0; 334 335 pull_mp = msgpullup(orig_mp, -1); 336 if (pull_mp == NULL) { 337 tx_ring->reschedule = B_TRUE; 338 return (orig_mp); 339 } 340 341 mp = pull_mp; 342 limit_retry = B_TRUE; 343 goto retry; 344 } 345 346 /* 347 * Before filling the tx descriptor ring with the data, we need to 348 * ensure there are adequate free descriptors for transmit 349 * (including one context descriptor). 350 * Do not use up all the tx descriptors. 351 * Otherwise tx recycle will fail and cause false hang. 352 */ 353 if (tx_ring->tbd_free <= (desc_total + 1)) { 354 tx_ring->tx_recycle(tx_ring); 355 } 356 357 mutex_enter(&tx_ring->tx_lock); 358 /* 359 * If the number of free tx descriptors is not enough for transmit 360 * then return mp. 361 * 362 * Note: we must put this check under the mutex protection to 363 * ensure the correctness when multiple threads access it in 364 * parallel. 365 */ 366 if (tx_ring->tbd_free <= (desc_total + 1)) { 367 tx_ring->stat_fail_no_tbd++; 368 mutex_exit(&tx_ring->tx_lock); 369 goto tx_failure; 370 } 371 372 /* 373 * Attach the mblk_t we've setup to the last control block. 374 * This is only done once we know there are enough free descriptors 375 * to transmit so that the cleanup in tx_failure doesn't try to 376 * call freemsg() on mp (since we will want to return it). 377 */ 378 tcb->mp = (pull_mp != NULL) ? pull_mp : orig_mp; 379 380 desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx, 381 mbsize); 382 383 ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1))); 384 385 tx_ring->stat_obytes += mbsize; 386 tx_ring->stat_opackets++; 387 388 mutex_exit(&tx_ring->tx_lock); 389 390 /* 391 * Now that tx is done, if we pulled up the original message, we 392 * can free the original message since it is no longer being 393 * used. 394 */ 395 if (pull_mp != NULL) { 396 freemsg(orig_mp); 397 } 398 399 return (NULL); 400 401 tx_failure: 402 /* 403 * If transmission fails, need to free the pulling up mblk. 404 */ 405 if (pull_mp) { 406 freemsg(pull_mp); 407 } 408 409 /* 410 * Return the tx control blocks in the pending list to the free list. 411 */ 412 ixgbe_put_free_list(tx_ring, &pending_list); 413 414 /* Transmit failed, do not drop the mblk, rechedule the transmit */ 415 tx_ring->reschedule = B_TRUE; 416 417 return (orig_mp); 418 } 419 420 /* 421 * ixgbe_tx_copy 422 * 423 * Copy the mblk fragment to the pre-allocated tx buffer. Return -1 on error, 424 * otherwise return the number of descriptors we've completed in this call. 425 */ 426 static int 427 ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t **tcbp, 428 link_list_t *pending_list, const void *buf, size_t len) 429 { 430 tx_control_block_t *tcb = *tcbp; 431 dma_buffer_t *tx_buf; 432 uint32_t desc_num = 0; 433 434 /* 435 * We need a new tcb -- either the current one (tcb) is NULL because 436 * we just started, tcb is being used for DMA, or tcb isn't large enough 437 * to hold the contents we need to copy. 438 */ 439 if (tcb == NULL || tcb->tx_type == USE_DMA || 440 tcb->tx_buf.len + len > tcb->tx_buf.size) { 441 tx_control_block_t *newtcb; 442 443 newtcb = ixgbe_get_free_list(tx_ring, pending_list); 444 if (newtcb == NULL) 445 return (-1); 446 447 newtcb->tx_type = USE_COPY; 448 449 if (tcb != NULL) 450 desc_num += ixgbe_tcb_done(tcb); 451 *tcbp = tcb = newtcb; 452 } 453 454 ASSERT3S(tcb->tx_type, ==, USE_COPY); 455 tx_buf = &tcb->tx_buf; 456 457 /* 458 * Copy the packet data of the mblk fragment into the 459 * pre-allocated tx buffer, which is maintained by the 460 * tx control block. 461 * 462 * Several mblk fragments can be copied into one tx buffer. 463 * The destination address of the current copied fragment in 464 * the tx buffer is next to the end of the previous copied 465 * fragment. 466 */ 467 if (len > 0) { 468 bcopy(buf, tx_buf->address + tx_buf->len, len); 469 470 tx_buf->len += len; 471 tcb->frag_num++; 472 } 473 474 return (desc_num); 475 } 476 477 /* 478 * ixgbe_tx_bind 479 * 480 * Bind the mblk fragment with DMA. Returns -1 on error, otherwise it 481 * returns the number of descriptors completed in this call. This count 482 * can include descriptors that weren't filled in by the current call to 483 * ixgbe_tx_bind() but were being used (but not yet completed) in previous 484 * calls to ixgbe_tx_bind() or ixgbe_tx_copy(). 485 */ 486 static int 487 ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t **tcbp, 488 link_list_t *pending_list, uint8_t *buf, size_t len) 489 { 490 tx_control_block_t *tcb = NULL; 491 uint_t desc_num = 0; 492 int status; 493 494 tcb = ixgbe_get_free_list(tx_ring, pending_list); 495 if (tcb == NULL) 496 return (-1); 497 498 /* 499 * Use DMA binding to process the mblk fragment 500 */ 501 status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL, 502 (caddr_t)buf, len, 503 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 504 0, NULL, NULL); 505 506 if (status != DDI_DMA_MAPPED) { 507 tx_ring->stat_fail_dma_bind++; 508 return (-1); 509 } 510 511 tcb->frag_num++; 512 tcb->tx_type = USE_DMA; 513 514 /* 515 * If there was an old tcb, we're about to replace it. Finish 516 * setting up the old tcb so we can replace it with the new one. 517 */ 518 if (*tcbp != NULL) 519 desc_num += ixgbe_tcb_done(*tcbp); 520 521 *tcbp = tcb; 522 return (desc_num); 523 } 524 525 /* 526 * Once we're done populating a tcb (either by binding or copying into 527 * a buffer in the tcb), get it ready for tx and return the number of 528 * descriptors used. 529 */ 530 static uint_t 531 ixgbe_tcb_done(tx_control_block_t *tcb) 532 { 533 uint_t desc_num = 0; 534 535 if (tcb->tx_type == USE_DMA) { 536 const ddi_dma_cookie_t *c; 537 538 for (c = ddi_dma_cookie_iter(tcb->tx_dma_handle, NULL); 539 c != NULL; 540 c = ddi_dma_cookie_iter(tcb->tx_dma_handle, c)) { 541 /* 542 * Save the address and length to the private data 543 * structure of the tx control block, which will be 544 * used to fill the tx descriptor ring after all the 545 * fragments are processed. 546 */ 547 ixgbe_save_desc(tcb, c->dmac_laddress, c->dmac_size); 548 desc_num++; 549 } 550 } else if (tcb->tx_type == USE_COPY) { 551 dma_buffer_t *tx_buf = &tcb->tx_buf; 552 553 DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV); 554 ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len); 555 desc_num++; 556 } else { 557 panic("invalid tcb type"); 558 } 559 560 return (desc_num); 561 } 562 563 /* 564 * ixgbe_get_context 565 * 566 * Get the context information from the mblk 567 */ 568 static int 569 ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx) 570 { 571 uint32_t start; 572 uint32_t hckflags; 573 uint32_t lsoflags; 574 uint32_t lsocksum; 575 uint32_t mss; 576 uint32_t len; 577 uint32_t size; 578 uint32_t offset; 579 unsigned char *pos; 580 ushort_t etype; 581 uint32_t mac_hdr_len; 582 uint32_t l4_proto; 583 uint32_t l4_hdr_len; 584 585 ASSERT(mp != NULL); 586 587 mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags); 588 bzero(ctx, sizeof (ixgbe_tx_context_t)); 589 590 if (hckflags == 0) { 591 return (0); 592 } 593 594 ctx->hcksum_flags = hckflags; 595 596 mac_lso_get(mp, &mss, &lsoflags); 597 ctx->mss = mss; 598 ctx->lso_flag = (lsoflags == HW_LSO); 599 600 etype = 0; 601 mac_hdr_len = 0; 602 l4_proto = 0; 603 604 /* 605 * Firstly get the position of the ether_type/ether_tpid. 606 * Here we don't assume the ether (VLAN) header is fully included 607 * in one mblk fragment, so we go thourgh the fragments to parse 608 * the ether type. 609 */ 610 size = len = MBLKL(mp); 611 offset = offsetof(struct ether_header, ether_type); 612 while (size <= offset) { 613 mp = mp->b_cont; 614 ASSERT(mp != NULL); 615 len = MBLKL(mp); 616 size += len; 617 } 618 pos = mp->b_rptr + offset + len - size; 619 620 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 621 if (etype == ETHERTYPE_VLAN) { 622 /* 623 * Get the position of the ether_type in VLAN header 624 */ 625 offset = offsetof(struct ether_vlan_header, ether_type); 626 while (size <= offset) { 627 mp = mp->b_cont; 628 ASSERT(mp != NULL); 629 len = MBLKL(mp); 630 size += len; 631 } 632 pos = mp->b_rptr + offset + len - size; 633 634 etype = ntohs(*(ushort_t *)(uintptr_t)pos); 635 mac_hdr_len = sizeof (struct ether_vlan_header); 636 } else { 637 mac_hdr_len = sizeof (struct ether_header); 638 } 639 640 /* 641 * Here we don't assume the IP(V6) header is fully included in 642 * one mblk fragment. 643 */ 644 lsocksum = HCK_PARTIALCKSUM; 645 ctx->l3_proto = etype; 646 switch (etype) { 647 case ETHERTYPE_IP: 648 if (ctx->lso_flag) { 649 offset = offsetof(ipha_t, ipha_length) + mac_hdr_len; 650 while (size <= offset) { 651 mp = mp->b_cont; 652 ASSERT(mp != NULL); 653 len = MBLKL(mp); 654 size += len; 655 } 656 pos = mp->b_rptr + offset + len - size; 657 *((uint16_t *)(uintptr_t)(pos)) = 0; 658 659 offset = offsetof(ipha_t, ipha_hdr_checksum) + 660 mac_hdr_len; 661 while (size <= offset) { 662 mp = mp->b_cont; 663 ASSERT(mp != NULL); 664 len = MBLKL(mp); 665 size += len; 666 } 667 pos = mp->b_rptr + offset + len - size; 668 *((uint16_t *)(uintptr_t)(pos)) = 0; 669 670 /* 671 * To perform ixgbe LSO, here also need to fill 672 * the tcp checksum field of the packet with the 673 * following pseudo-header checksum: 674 * (ip_source_addr, ip_destination_addr, l4_proto) 675 * Currently the tcp/ip stack has done it. 676 */ 677 lsocksum |= HCK_IPV4_HDRCKSUM; 678 } 679 680 offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len; 681 while (size <= offset) { 682 mp = mp->b_cont; 683 ASSERT(mp != NULL); 684 len = MBLKL(mp); 685 size += len; 686 } 687 pos = mp->b_rptr + offset + len - size; 688 689 l4_proto = *(uint8_t *)pos; 690 break; 691 case ETHERTYPE_IPV6: 692 /* 693 * We need to zero out the length in the header. 694 */ 695 if (ctx->lso_flag) { 696 offset = offsetof(ip6_t, ip6_plen) + mac_hdr_len; 697 while (size <= offset) { 698 mp = mp->b_cont; 699 ASSERT(mp != NULL); 700 len = MBLKL(mp); 701 size += len; 702 } 703 pos = mp->b_rptr + offset + len - size; 704 *((uint16_t *)(uintptr_t)(pos)) = 0; 705 } 706 707 offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len; 708 while (size <= offset) { 709 mp = mp->b_cont; 710 ASSERT(mp != NULL); 711 len = MBLKL(mp); 712 size += len; 713 } 714 pos = mp->b_rptr + offset + len - size; 715 716 l4_proto = *(uint8_t *)pos; 717 break; 718 default: 719 /* Unrecoverable error */ 720 IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum"); 721 return (-2); 722 } 723 724 if (ctx->lso_flag) { 725 /* 726 * LSO relies on tx h/w checksum, so here will drop the packet 727 * if h/w checksum flag is not declared. 728 */ 729 if ((ctx->hcksum_flags & lsocksum) != lsocksum) { 730 IXGBE_DEBUGLOG_2(NULL, "ixgbe_tx: h/w checksum flags " 731 "are not set for LSO, found 0x%x, needed bits 0x%x", 732 ctx->hcksum_flags, lsocksum); 733 return (-1); 734 } 735 736 737 offset = mac_hdr_len + start; 738 while (size <= offset) { 739 mp = mp->b_cont; 740 ASSERT(mp != NULL); 741 len = MBLKL(mp); 742 size += len; 743 } 744 pos = mp->b_rptr + offset + len - size; 745 746 l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos); 747 } else { 748 /* 749 * l4 header length is only required for LSO 750 */ 751 l4_hdr_len = 0; 752 } 753 754 ctx->mac_hdr_len = mac_hdr_len; 755 ctx->ip_hdr_len = start; 756 ctx->l4_proto = l4_proto; 757 ctx->l4_hdr_len = l4_hdr_len; 758 759 return (0); 760 } 761 762 /* 763 * ixgbe_check_context 764 * 765 * Check if a new context descriptor is needed 766 */ 767 static boolean_t 768 ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx) 769 { 770 ixgbe_tx_context_t *last; 771 772 if (ctx == NULL) 773 return (B_FALSE); 774 775 /* 776 * Compare the context data retrieved from the mblk and the 777 * stored data of the last context descriptor. The data need 778 * to be checked are: 779 * hcksum_flags 780 * l4_proto 781 * mac_hdr_len 782 * ip_hdr_len 783 * lso_flag 784 * mss (only checked for LSO) 785 * l4_hr_len (only checked for LSO) 786 * Either one of the above data is changed, a new context descriptor 787 * will be needed. 788 */ 789 last = &tx_ring->tx_context; 790 791 if ((ctx->hcksum_flags != last->hcksum_flags) || 792 (ctx->l4_proto != last->l4_proto) || 793 (ctx->l3_proto != last->l3_proto) || 794 (ctx->mac_hdr_len != last->mac_hdr_len) || 795 (ctx->ip_hdr_len != last->ip_hdr_len) || 796 (ctx->lso_flag != last->lso_flag) || 797 (ctx->lso_flag && ((ctx->mss != last->mss) || 798 (ctx->l4_hdr_len != last->l4_hdr_len)))) { 799 return (B_TRUE); 800 } 801 802 return (B_FALSE); 803 } 804 805 /* 806 * ixgbe_fill_context 807 * 808 * Fill the context descriptor with hardware checksum informations 809 */ 810 static void 811 ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd, 812 ixgbe_tx_context_t *ctx) 813 { 814 /* 815 * Fill the context descriptor with the checksum 816 * context information we've got. 817 */ 818 ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len; 819 ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len << 820 IXGBE_ADVTXD_MACLEN_SHIFT; 821 822 ctx_tbd->type_tucmd_mlhl = 823 IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT; 824 /* 825 * When we have a TX context set up, we enforce that the ethertype is 826 * either IPv4 or IPv6 in ixgbe_get_tx_context(). 827 */ 828 if (ctx->lso_flag || ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) { 829 if (ctx->l3_proto == ETHERTYPE_IP) { 830 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; 831 } else { 832 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; 833 } 834 } 835 836 if (ctx->lso_flag || ctx->hcksum_flags & HCK_PARTIALCKSUM) { 837 switch (ctx->l4_proto) { 838 case IPPROTO_TCP: 839 ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP; 840 break; 841 case IPPROTO_UDP: 842 /* 843 * We don't have to explicitly set: 844 * ctx_tbd->type_tucmd_mlhl |= 845 * IXGBE_ADVTXD_TUCMD_L4T_UDP; 846 * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b 847 */ 848 break; 849 default: 850 /* Unrecoverable error */ 851 IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum"); 852 break; 853 } 854 } 855 856 ctx_tbd->seqnum_seed = 0; 857 858 if (ctx->lso_flag) { 859 ctx_tbd->mss_l4len_idx = 860 (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) | 861 (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT); 862 } else { 863 ctx_tbd->mss_l4len_idx = 0; 864 } 865 } 866 867 /* 868 * ixgbe_tx_fill_ring 869 * 870 * Fill the tx descriptor ring with the data 871 */ 872 static int 873 ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list, 874 ixgbe_tx_context_t *ctx, size_t mbsize) 875 { 876 struct ixgbe_hw *hw = &tx_ring->ixgbe->hw; 877 boolean_t load_context; 878 uint32_t index, tcb_index, desc_num; 879 union ixgbe_adv_tx_desc *tbd, *first_tbd; 880 tx_control_block_t *tcb, *first_tcb; 881 uint32_t hcksum_flags; 882 int i; 883 884 ASSERT(mutex_owned(&tx_ring->tx_lock)); 885 886 tbd = NULL; 887 first_tbd = NULL; 888 first_tcb = NULL; 889 desc_num = 0; 890 hcksum_flags = 0; 891 load_context = B_FALSE; 892 893 /* 894 * Get the index of the first tx descriptor that will be filled, 895 * and the index of the first work list item that will be attached 896 * with the first used tx control block in the pending list. 897 * Note: the two indexes are the same. 898 */ 899 index = tx_ring->tbd_tail; 900 tcb_index = tx_ring->tbd_tail; 901 902 if (ctx != NULL) { 903 hcksum_flags = ctx->hcksum_flags; 904 905 /* 906 * Check if a new context descriptor is needed for this packet 907 */ 908 load_context = ixgbe_check_context(tx_ring, ctx); 909 910 if (load_context) { 911 tbd = &tx_ring->tbd_ring[index]; 912 913 /* 914 * Fill the context descriptor with the 915 * hardware checksum offload informations. 916 */ 917 ixgbe_fill_context( 918 (struct ixgbe_adv_tx_context_desc *)tbd, ctx); 919 920 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 921 desc_num++; 922 923 /* 924 * Store the checksum context data if 925 * a new context descriptor is added 926 */ 927 tx_ring->tx_context = *ctx; 928 } 929 } 930 931 first_tbd = &tx_ring->tbd_ring[index]; 932 933 /* 934 * Fill tx data descriptors with the data saved in the pending list. 935 * The tx control blocks in the pending list are added to the work list 936 * at the same time. 937 * 938 * The work list is strictly 1:1 corresponding to the descriptor ring. 939 * One item of the work list corresponds to one tx descriptor. Because 940 * one tx control block can span multiple tx descriptors, the tx 941 * control block will be added to the first work list item that 942 * corresponds to the first tx descriptor generated from that tx 943 * control block. 944 */ 945 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 946 first_tcb = tcb; 947 while (tcb != NULL) { 948 949 for (i = 0; i < tcb->desc_num; i++) { 950 tbd = &tx_ring->tbd_ring[index]; 951 952 tbd->read.buffer_addr = tcb->desc[i].address; 953 tbd->read.cmd_type_len = tcb->desc[i].length; 954 955 tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT 956 | IXGBE_ADVTXD_DTYP_DATA; 957 958 tbd->read.olinfo_status = 0; 959 960 index = NEXT_INDEX(index, 1, tx_ring->ring_size); 961 desc_num++; 962 } 963 964 /* 965 * Add the tx control block to the work list 966 */ 967 ASSERT(tx_ring->work_list[tcb_index] == NULL); 968 tx_ring->work_list[tcb_index] = tcb; 969 970 tcb_index = index; 971 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 972 } 973 974 if (load_context) { 975 /* 976 * Count the context descriptor for 977 * the first tx control block. 978 */ 979 first_tcb->desc_num++; 980 } 981 first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size); 982 983 /* 984 * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only 985 * valid in the first descriptor of the packet. 986 * Setting paylen in every first_tbd for all parts. 987 * 82599, X540 and X550 require the packet length in paylen field 988 * with or without LSO and 82598 will ignore it in non-LSO mode. 989 */ 990 ASSERT(first_tbd != NULL); 991 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS; 992 993 switch (hw->mac.type) { 994 case ixgbe_mac_82598EB: 995 if (ctx != NULL && ctx->lso_flag) { 996 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 997 first_tbd->read.olinfo_status |= 998 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 999 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1000 } 1001 break; 1002 1003 case ixgbe_mac_82599EB: 1004 case ixgbe_mac_X540: 1005 case ixgbe_mac_X550: 1006 case ixgbe_mac_X550EM_x: 1007 case ixgbe_mac_X550EM_a: 1008 if (ctx != NULL && ctx->lso_flag) { 1009 first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE; 1010 first_tbd->read.olinfo_status |= 1011 (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len 1012 - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT; 1013 } else { 1014 first_tbd->read.olinfo_status |= 1015 (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT); 1016 } 1017 break; 1018 1019 default: 1020 break; 1021 } 1022 1023 /* Set hardware checksum bits */ 1024 if (hcksum_flags != 0) { 1025 if (hcksum_flags & HCK_IPV4_HDRCKSUM) 1026 first_tbd->read.olinfo_status |= 1027 IXGBE_ADVTXD_POPTS_IXSM; 1028 if (hcksum_flags & HCK_PARTIALCKSUM) 1029 first_tbd->read.olinfo_status |= 1030 IXGBE_ADVTXD_POPTS_TXSM; 1031 } 1032 1033 /* 1034 * The last descriptor of packet needs End Of Packet (EOP), 1035 * and Report Status (RS) bits set 1036 */ 1037 ASSERT(tbd != NULL); 1038 tbd->read.cmd_type_len |= 1039 IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS; 1040 1041 /* 1042 * Sync the DMA buffer of the tx descriptor ring 1043 */ 1044 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV); 1045 1046 /* 1047 * Update the number of the free tx descriptors. 1048 * The mutual exclusion between the transmission and the recycling 1049 * (for the tx descriptor ring and the work list) is implemented 1050 * with the atomic operation on the number of the free tx descriptors. 1051 * 1052 * Note: we should always decrement the counter tbd_free before 1053 * advancing the hardware TDT pointer to avoid the race condition - 1054 * before the counter tbd_free is decremented, the transmit of the 1055 * tx descriptors has done and the counter tbd_free is increased by 1056 * the tx recycling. 1057 */ 1058 i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num); 1059 ASSERT(i >= 0); 1060 1061 tx_ring->tbd_tail = index; 1062 1063 /* 1064 * Advance the hardware TDT pointer of the tx descriptor ring 1065 */ 1066 IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index); 1067 1068 if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) != 1069 DDI_FM_OK) { 1070 ddi_fm_service_impact(tx_ring->ixgbe->dip, 1071 DDI_SERVICE_DEGRADED); 1072 atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR); 1073 } 1074 1075 return (desc_num); 1076 } 1077 1078 /* 1079 * ixgbe_save_desc 1080 * 1081 * Save the address/length pair to the private array 1082 * of the tx control block. The address/length pairs 1083 * will be filled into the tx descriptor ring later. 1084 */ 1085 static void 1086 ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length) 1087 { 1088 sw_desc_t *desc; 1089 1090 desc = &tcb->desc[tcb->desc_num]; 1091 desc->address = address; 1092 desc->length = length; 1093 1094 tcb->desc_num++; 1095 } 1096 1097 /* 1098 * ixgbe_tx_recycle_legacy 1099 * 1100 * Recycle the tx descriptors and tx control blocks. 1101 * 1102 * The work list is traversed to check if the corresponding 1103 * tx descriptors have been transmitted. If so, the resources 1104 * bound to the tx control blocks will be freed, and those 1105 * tx control blocks will be returned to the free list. 1106 */ 1107 uint32_t 1108 ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring) 1109 { 1110 uint32_t index, last_index, prev_index; 1111 int desc_num; 1112 boolean_t desc_done; 1113 tx_control_block_t *tcb; 1114 link_list_t pending_list; 1115 ixgbe_t *ixgbe = tx_ring->ixgbe; 1116 1117 mutex_enter(&tx_ring->recycle_lock); 1118 1119 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1120 1121 if (tx_ring->tbd_free == tx_ring->ring_size) { 1122 tx_ring->recycle_fail = 0; 1123 tx_ring->stall_watchdog = 0; 1124 if (tx_ring->reschedule) { 1125 tx_ring->reschedule = B_FALSE; 1126 mac_tx_ring_update(ixgbe->mac_hdl, 1127 tx_ring->ring_handle); 1128 } 1129 mutex_exit(&tx_ring->recycle_lock); 1130 return (0); 1131 } 1132 1133 /* 1134 * Sync the DMA buffer of the tx descriptor ring 1135 */ 1136 DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1137 1138 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1139 mutex_exit(&tx_ring->recycle_lock); 1140 ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED); 1141 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1142 return (0); 1143 } 1144 1145 LINK_LIST_INIT(&pending_list); 1146 desc_num = 0; 1147 index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */ 1148 1149 tcb = tx_ring->work_list[index]; 1150 ASSERT(tcb != NULL); 1151 1152 while (tcb != NULL) { 1153 /* 1154 * Get the last tx descriptor of this packet. 1155 * If the last tx descriptor is done, then 1156 * we can recycle all descriptors of a packet 1157 * which usually includes several tx control blocks. 1158 * For 82599, LSO descriptors can not be recycled 1159 * unless the whole packet's transmission is done. 1160 * That's why packet level recycling is used here. 1161 * For 82598, there's not such limit. 1162 */ 1163 last_index = tcb->last_index; 1164 /* 1165 * MAX_TX_RING_SIZE is used to judge whether 1166 * the index is a valid value or not. 1167 */ 1168 if (last_index == MAX_TX_RING_SIZE) 1169 break; 1170 1171 /* 1172 * Check if the Descriptor Done bit is set 1173 */ 1174 desc_done = tx_ring->tbd_ring[last_index].wb.status & 1175 IXGBE_TXD_STAT_DD; 1176 if (desc_done) { 1177 /* 1178 * recycle all descriptors of the packet 1179 */ 1180 while (tcb != NULL) { 1181 /* 1182 * Strip off the tx control block from 1183 * the work list, and add it to the 1184 * pending list. 1185 */ 1186 tx_ring->work_list[index] = NULL; 1187 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1188 1189 /* 1190 * Count the total number of the tx 1191 * descriptors recycled 1192 */ 1193 desc_num += tcb->desc_num; 1194 1195 index = NEXT_INDEX(index, tcb->desc_num, 1196 tx_ring->ring_size); 1197 1198 tcb = tx_ring->work_list[index]; 1199 1200 prev_index = PREV_INDEX(index, 1, 1201 tx_ring->ring_size); 1202 if (prev_index == last_index) 1203 break; 1204 } 1205 } else { 1206 break; 1207 } 1208 } 1209 1210 /* 1211 * If no tx descriptors are recycled, no need to do more processing 1212 */ 1213 if (desc_num == 0) { 1214 tx_ring->recycle_fail++; 1215 mutex_exit(&tx_ring->recycle_lock); 1216 return (0); 1217 } 1218 1219 tx_ring->recycle_fail = 0; 1220 tx_ring->stall_watchdog = 0; 1221 1222 /* 1223 * Update the head index of the tx descriptor ring 1224 */ 1225 tx_ring->tbd_head = index; 1226 1227 /* 1228 * Update the number of the free tx descriptors with atomic operations 1229 */ 1230 atomic_add_32(&tx_ring->tbd_free, desc_num); 1231 1232 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1233 (tx_ring->reschedule)) { 1234 tx_ring->reschedule = B_FALSE; 1235 mac_tx_ring_update(ixgbe->mac_hdl, 1236 tx_ring->ring_handle); 1237 } 1238 mutex_exit(&tx_ring->recycle_lock); 1239 1240 /* 1241 * Add the tx control blocks in the pending list to the free list. 1242 */ 1243 ixgbe_put_free_list(tx_ring, &pending_list); 1244 1245 return (desc_num); 1246 } 1247 1248 /* 1249 * ixgbe_tx_recycle_head_wb 1250 * 1251 * Check the head write-back, and recycle all the transmitted 1252 * tx descriptors and tx control blocks. 1253 */ 1254 uint32_t 1255 ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring) 1256 { 1257 uint32_t index; 1258 uint32_t head_wb; 1259 int desc_num; 1260 tx_control_block_t *tcb; 1261 link_list_t pending_list; 1262 ixgbe_t *ixgbe = tx_ring->ixgbe; 1263 1264 mutex_enter(&tx_ring->recycle_lock); 1265 1266 ASSERT(tx_ring->tbd_free <= tx_ring->ring_size); 1267 1268 if (tx_ring->tbd_free == tx_ring->ring_size) { 1269 tx_ring->recycle_fail = 0; 1270 tx_ring->stall_watchdog = 0; 1271 if (tx_ring->reschedule) { 1272 tx_ring->reschedule = B_FALSE; 1273 mac_tx_ring_update(ixgbe->mac_hdl, 1274 tx_ring->ring_handle); 1275 } 1276 mutex_exit(&tx_ring->recycle_lock); 1277 return (0); 1278 } 1279 1280 /* 1281 * Sync the DMA buffer of the tx descriptor ring 1282 * 1283 * Note: For head write-back mode, the tx descriptors will not 1284 * be written back, but the head write-back value is stored at 1285 * the last extra tbd at the end of the DMA area, we still need 1286 * to sync the head write-back value for kernel. 1287 * 1288 * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL); 1289 */ 1290 (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle, 1291 sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size, 1292 sizeof (uint32_t), 1293 DDI_DMA_SYNC_FORKERNEL); 1294 1295 if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) { 1296 mutex_exit(&tx_ring->recycle_lock); 1297 ddi_fm_service_impact(ixgbe->dip, 1298 DDI_SERVICE_DEGRADED); 1299 atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR); 1300 return (0); 1301 } 1302 1303 LINK_LIST_INIT(&pending_list); 1304 desc_num = 0; 1305 index = tx_ring->tbd_head; /* Next index to clean */ 1306 1307 /* 1308 * Get the value of head write-back 1309 */ 1310 head_wb = *tx_ring->tbd_head_wb; 1311 while (index != head_wb) { 1312 tcb = tx_ring->work_list[index]; 1313 ASSERT(tcb != NULL); 1314 1315 if (OFFSET(index, head_wb, tx_ring->ring_size) < 1316 tcb->desc_num) { 1317 /* 1318 * The current tx control block is not 1319 * completely transmitted, stop recycling 1320 */ 1321 break; 1322 } 1323 1324 /* 1325 * Strip off the tx control block from the work list, 1326 * and add it to the pending list. 1327 */ 1328 tx_ring->work_list[index] = NULL; 1329 LIST_PUSH_TAIL(&pending_list, &tcb->link); 1330 1331 /* 1332 * Advance the index of the tx descriptor ring 1333 */ 1334 index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size); 1335 1336 /* 1337 * Count the total number of the tx descriptors recycled 1338 */ 1339 desc_num += tcb->desc_num; 1340 } 1341 1342 /* 1343 * If no tx descriptors are recycled, no need to do more processing 1344 */ 1345 if (desc_num == 0) { 1346 tx_ring->recycle_fail++; 1347 mutex_exit(&tx_ring->recycle_lock); 1348 return (0); 1349 } 1350 1351 tx_ring->recycle_fail = 0; 1352 tx_ring->stall_watchdog = 0; 1353 1354 /* 1355 * Update the head index of the tx descriptor ring 1356 */ 1357 tx_ring->tbd_head = index; 1358 1359 /* 1360 * Update the number of the free tx descriptors with atomic operations 1361 */ 1362 atomic_add_32(&tx_ring->tbd_free, desc_num); 1363 1364 if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) && 1365 (tx_ring->reschedule)) { 1366 tx_ring->reschedule = B_FALSE; 1367 mac_tx_ring_update(ixgbe->mac_hdl, 1368 tx_ring->ring_handle); 1369 } 1370 mutex_exit(&tx_ring->recycle_lock); 1371 1372 /* 1373 * Add the tx control blocks in the pending list to the free list. 1374 */ 1375 ixgbe_put_free_list(tx_ring, &pending_list); 1376 1377 return (desc_num); 1378 } 1379 1380 /* 1381 * ixgbe_free_tcb - free up the tx control block 1382 * 1383 * Free the resources of the tx control block, including 1384 * unbind the previously bound DMA handle, and reset other 1385 * control fields. 1386 */ 1387 void 1388 ixgbe_free_tcb(tx_control_block_t *tcb) 1389 { 1390 if (tcb == NULL) 1391 return; 1392 1393 switch (tcb->tx_type) { 1394 case USE_COPY: 1395 /* 1396 * Reset the buffer length that is used for copy 1397 */ 1398 tcb->tx_buf.len = 0; 1399 break; 1400 case USE_DMA: 1401 /* 1402 * Release the DMA resource that is used for 1403 * DMA binding. 1404 */ 1405 (void) ddi_dma_unbind_handle(tcb->tx_dma_handle); 1406 break; 1407 default: 1408 break; 1409 } 1410 1411 /* 1412 * Free the mblk 1413 */ 1414 if (tcb->mp != NULL) { 1415 freemsg(tcb->mp); 1416 tcb->mp = NULL; 1417 } 1418 1419 tcb->tx_type = USE_NONE; 1420 tcb->last_index = MAX_TX_RING_SIZE; 1421 tcb->frag_num = 0; 1422 tcb->desc_num = 0; 1423 } 1424 1425 /* 1426 * ixgbe_get_free_list - Get a free tx control block from the free list. 1427 * Returns the tx control block and appends it to list. 1428 * 1429 * The atomic operation on the number of the available tx control block 1430 * in the free list is used to keep this routine mutual exclusive with 1431 * the routine ixgbe_put_check_list. 1432 */ 1433 static tx_control_block_t * 1434 ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *list) 1435 { 1436 tx_control_block_t *tcb; 1437 1438 /* 1439 * Check and update the number of the free tx control block 1440 * in the free list. 1441 */ 1442 if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) { 1443 tx_ring->stat_fail_no_tcb++; 1444 return (NULL); 1445 } 1446 1447 mutex_enter(&tx_ring->tcb_head_lock); 1448 1449 tcb = tx_ring->free_list[tx_ring->tcb_head]; 1450 ASSERT(tcb != NULL); 1451 tx_ring->free_list[tx_ring->tcb_head] = NULL; 1452 tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1, 1453 tx_ring->free_list_size); 1454 1455 mutex_exit(&tx_ring->tcb_head_lock); 1456 1457 LIST_PUSH_TAIL(list, &tcb->link); 1458 return (tcb); 1459 } 1460 1461 /* 1462 * ixgbe_put_free_list 1463 * 1464 * Put a list of used tx control blocks back to the free list 1465 * 1466 * A mutex is used here to ensure the serialization. The mutual exclusion 1467 * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with 1468 * the atomic operation on the counter tcb_free. 1469 */ 1470 void 1471 ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list) 1472 { 1473 uint32_t index; 1474 int tcb_num; 1475 tx_control_block_t *tcb; 1476 1477 for (tcb = (tx_control_block_t *)LIST_GET_HEAD(pending_list); 1478 tcb != NULL; 1479 tcb = (tx_control_block_t *)LIST_GET_NEXT(pending_list, tcb)) { 1480 /* 1481 * Despite the name, ixgbe_free_tcb() just releases the 1482 * resources in tcb, but does not free tcb itself. 1483 */ 1484 ixgbe_free_tcb(tcb); 1485 } 1486 1487 mutex_enter(&tx_ring->tcb_tail_lock); 1488 1489 index = tx_ring->tcb_tail; 1490 1491 tcb_num = 0; 1492 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1493 while (tcb != NULL) { 1494 ASSERT(tx_ring->free_list[index] == NULL); 1495 tx_ring->free_list[index] = tcb; 1496 1497 tcb_num++; 1498 1499 index = NEXT_INDEX(index, 1, tx_ring->free_list_size); 1500 1501 tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list); 1502 } 1503 1504 tx_ring->tcb_tail = index; 1505 1506 /* 1507 * Update the number of the free tx control block 1508 * in the free list. This operation must be placed 1509 * under the protection of the lock. 1510 */ 1511 atomic_add_32(&tx_ring->tcb_free, tcb_num); 1512 1513 mutex_exit(&tx_ring->tcb_tail_lock); 1514 } 1515