1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * Copyright 2016 Joyent, Inc. 28 */ 29 30 /* 31 * ********************************************************************** 32 * * 33 * Module Name: * 34 * e1000g_tx.c * 35 * * 36 * Abstract: * 37 * This file contains some routines that take care of Transmit, * 38 * make the hardware to send the data pointed by the packet out * 39 * on to the physical medium. * 40 * * 41 * ********************************************************************** 42 */ 43 44 #include "e1000g_sw.h" 45 #include "e1000g_debug.h" 46 47 static boolean_t e1000g_send(struct e1000g *, mblk_t *); 48 static int e1000g_tx_copy(e1000g_tx_ring_t *, 49 p_tx_sw_packet_t, mblk_t *, boolean_t); 50 static int e1000g_tx_bind(e1000g_tx_ring_t *, 51 p_tx_sw_packet_t, mblk_t *); 52 static boolean_t e1000g_retrieve_context(mblk_t *, context_data_t *, size_t); 53 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *); 54 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *, 55 context_data_t *); 56 static void e1000g_fill_context_descriptor(context_data_t *, 57 struct e1000_context_desc *); 58 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *, 59 p_tx_sw_packet_t, uint64_t, size_t); 60 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length, 61 p_desc_array_t desc_array); 62 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t); 63 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t); 64 static void e1000g_82547_timeout(void *); 65 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *); 66 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *); 67 68 /* 69 * e1000g_free_tx_swpkt - free up the tx sw packet 70 * 71 * Unbind the previously bound DMA handle for a given 72 * transmit sw packet. And reset the sw packet data. 73 */ 74 void 75 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet) 76 { 77 switch (packet->data_transfer_type) { 78 case USE_BCOPY: 79 packet->tx_buf->len = 0; 80 break; 81 #ifdef __sparc 82 case USE_DVMA: 83 dvma_unload(packet->tx_dma_handle, 0, -1); 84 break; 85 #endif 86 case USE_DMA: 87 (void) ddi_dma_unbind_handle(packet->tx_dma_handle); 88 break; 89 default: 90 break; 91 } 92 93 /* 94 * The mblk has been stripped off the sw packet 95 * and will be freed in a triggered soft intr. 96 */ 97 ASSERT(packet->mp == NULL); 98 99 packet->data_transfer_type = USE_NONE; 100 packet->num_mblk_frag = 0; 101 packet->num_desc = 0; 102 } 103 104 mblk_t * 105 e1000g_m_tx(void *arg, mblk_t *mp) 106 { 107 struct e1000g *Adapter = (struct e1000g *)arg; 108 mblk_t *next; 109 110 rw_enter(&Adapter->chip_lock, RW_READER); 111 112 if ((Adapter->e1000g_state & E1000G_SUSPENDED) || 113 !(Adapter->e1000g_state & E1000G_STARTED) || 114 (Adapter->link_state != LINK_STATE_UP)) { 115 freemsgchain(mp); 116 mp = NULL; 117 } 118 119 while (mp != NULL) { 120 next = mp->b_next; 121 mp->b_next = NULL; 122 123 if (!e1000g_send(Adapter, mp)) { 124 mp->b_next = next; 125 break; 126 } 127 128 mp = next; 129 } 130 131 rw_exit(&Adapter->chip_lock); 132 return (mp); 133 } 134 135 /* 136 * e1000g_send - send packets onto the wire 137 * 138 * Called from e1000g_m_tx with an mblk ready to send. this 139 * routine sets up the transmit descriptors and sends data to 140 * the wire. It also pushes the just transmitted packet to 141 * the used tx sw packet list. 142 */ 143 static boolean_t 144 e1000g_send(struct e1000g *Adapter, mblk_t *mp) 145 { 146 p_tx_sw_packet_t packet; 147 LIST_DESCRIBER pending_list; 148 size_t len; 149 size_t msg_size; 150 uint32_t frag_count; 151 int desc_count; 152 uint32_t desc_total; 153 uint32_t bcopy_thresh; 154 uint32_t hdr_frag_len; 155 boolean_t tx_undersize_flag; 156 mblk_t *nmp; 157 mblk_t *tmp; 158 mblk_t *new_mp; 159 mblk_t *pre_mp; 160 mblk_t *next_mp; 161 e1000g_tx_ring_t *tx_ring; 162 context_data_t cur_context; 163 164 tx_ring = Adapter->tx_ring; 165 bcopy_thresh = Adapter->tx_bcopy_thresh; 166 167 /* Get the total size and frags number of the message */ 168 tx_undersize_flag = B_FALSE; 169 frag_count = 0; 170 msg_size = 0; 171 for (nmp = mp; nmp; nmp = nmp->b_cont) { 172 frag_count++; 173 msg_size += MBLKL(nmp); 174 } 175 176 /* retrieve and compute information for context descriptor */ 177 if (!e1000g_retrieve_context(mp, &cur_context, msg_size)) { 178 freemsg(mp); 179 return (B_TRUE); 180 } 181 182 /* 183 * Make sure the packet is less than the allowed size 184 */ 185 if (!cur_context.lso_flag && 186 (msg_size > Adapter->max_frame_size - ETHERFCSL)) { 187 /* 188 * For the over size packet, we'll just drop it. 189 * So we return B_TRUE here. 190 */ 191 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL, 192 "Tx packet out of bound. length = %d \n", msg_size); 193 E1000G_STAT(tx_ring->stat_over_size); 194 freemsg(mp); 195 return (B_TRUE); 196 } 197 198 /* 199 * Check and reclaim tx descriptors. 200 * This low water mark check should be done all the time as 201 * Transmit interrupt delay can produce Transmit interrupts little 202 * late and that may cause few problems related to reaping Tx 203 * Descriptors... As you may run short of them before getting any 204 * transmit interrupt... 205 */ 206 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 207 (void) e1000g_recycle(tx_ring); 208 E1000G_DEBUG_STAT(tx_ring->stat_recycle); 209 210 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 211 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc); 212 goto tx_no_resource; 213 } 214 } 215 216 /* 217 * If the message size is less than the minimum ethernet packet size, 218 * we'll use bcopy to send it, and padd it to 60 bytes later. 219 */ 220 if (msg_size < ETHERMIN) { 221 E1000G_DEBUG_STAT(tx_ring->stat_under_size); 222 tx_undersize_flag = B_TRUE; 223 } 224 225 /* Initialize variables */ 226 desc_count = 1; /* The initial value should be greater than 0 */ 227 desc_total = 0; 228 new_mp = NULL; 229 QUEUE_INIT_LIST(&pending_list); 230 231 /* Process each mblk fragment and fill tx descriptors */ 232 /* 233 * The software should guarantee LSO packet header(MAC+IP+TCP) 234 * to be within one descriptor. Here we reallocate and refill the 235 * the header if it's physical memory non-contiguous. 236 */ 237 if (cur_context.lso_flag) { 238 /* find the last fragment of the header */ 239 len = MBLKL(mp); 240 ASSERT(len > 0); 241 next_mp = mp; 242 pre_mp = NULL; 243 while (len < cur_context.hdr_len) { 244 pre_mp = next_mp; 245 next_mp = next_mp->b_cont; 246 len += MBLKL(next_mp); 247 } 248 /* 249 * If the header and the payload are in different mblks, 250 * we simply force the header to be copied into pre-allocated 251 * page-aligned buffer. 252 */ 253 if (len == cur_context.hdr_len) 254 goto adjust_threshold; 255 256 hdr_frag_len = cur_context.hdr_len - (len - MBLKL(next_mp)); 257 /* 258 * There are three cases we need to reallocate a mblk for the 259 * last header fragment: 260 * 261 * 1. the header is in multiple mblks and the last fragment 262 * share the same mblk with the payload 263 * 264 * 2. the header is in a single mblk shared with the payload 265 * and the header is physical memory non-contiguous 266 * 267 * 3. there is 4 KB boundary within the header and 64 bytes 268 * following the end of the header bytes. The case may cause 269 * TCP data corruption issue. 270 * 271 * The workaround for the case #2 and case #3 is: 272 * Assuming standard Ethernet/IP/TCP headers of 54 bytes, 273 * this means that the buffer(containing the headers) should 274 * not start -118 bytes before a 4 KB boundary. For example, 275 * 128-byte alignment for this buffer could be used to fulfill 276 * this condition. 277 */ 278 if ((next_mp != mp) || 279 (P2NPHASE((uintptr_t)next_mp->b_rptr, 280 E1000_LSO_FIRST_DESC_ALIGNMENT_BOUNDARY_4K) 281 < E1000_LSO_FIRST_DESC_ALIGNMENT)) { 282 E1000G_DEBUG_STAT(tx_ring->stat_lso_header_fail); 283 /* 284 * reallocate the mblk for the last header fragment, 285 * expect to bcopy into pre-allocated page-aligned 286 * buffer 287 */ 288 new_mp = allocb(hdr_frag_len, 0); 289 if (!new_mp) 290 return (B_FALSE); 291 bcopy(next_mp->b_rptr, new_mp->b_rptr, hdr_frag_len); 292 /* link the new header fragment with the other parts */ 293 new_mp->b_wptr = new_mp->b_rptr + hdr_frag_len; 294 new_mp->b_cont = next_mp; 295 if (pre_mp) 296 pre_mp->b_cont = new_mp; 297 else 298 mp = new_mp; 299 next_mp->b_rptr += hdr_frag_len; 300 frag_count++; 301 } 302 adjust_threshold: 303 /* 304 * adjust the bcopy threshhold to guarantee 305 * the header to use bcopy way 306 */ 307 if (bcopy_thresh < cur_context.hdr_len) 308 bcopy_thresh = cur_context.hdr_len; 309 } 310 311 packet = NULL; 312 nmp = mp; 313 while (nmp) { 314 tmp = nmp->b_cont; 315 316 len = MBLKL(nmp); 317 /* Check zero length mblks */ 318 if (len == 0) { 319 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags); 320 /* 321 * If there're no packet buffers have been used, 322 * or we just completed processing a buffer, then 323 * skip the empty mblk fragment. 324 * Otherwise, there's still a pending buffer that 325 * needs to be processed (tx_copy). 326 */ 327 if (desc_count > 0) { 328 nmp = tmp; 329 continue; 330 } 331 } 332 333 /* 334 * Get a new TxSwPacket to process mblk buffers. 335 */ 336 if (desc_count > 0) { 337 mutex_enter(&tx_ring->freelist_lock); 338 packet = (p_tx_sw_packet_t) 339 QUEUE_POP_HEAD(&tx_ring->free_list); 340 mutex_exit(&tx_ring->freelist_lock); 341 342 if (packet == NULL) { 343 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 344 "No Tx SwPacket available\n"); 345 E1000G_STAT(tx_ring->stat_no_swpkt); 346 goto tx_send_failed; 347 } 348 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 349 } 350 351 ASSERT(packet); 352 /* 353 * If the size of the fragment is less than the tx_bcopy_thresh 354 * we'll use bcopy; Otherwise, we'll use DMA binding. 355 */ 356 if ((len <= bcopy_thresh) || tx_undersize_flag) { 357 desc_count = 358 e1000g_tx_copy(tx_ring, packet, nmp, 359 tx_undersize_flag); 360 E1000G_DEBUG_STAT(tx_ring->stat_copy); 361 } else { 362 desc_count = 363 e1000g_tx_bind(tx_ring, packet, nmp); 364 E1000G_DEBUG_STAT(tx_ring->stat_bind); 365 } 366 367 if (desc_count > 0) 368 desc_total += desc_count; 369 else if (desc_count < 0) 370 goto tx_send_failed; 371 372 nmp = tmp; 373 } 374 375 /* Assign the message to the last sw packet */ 376 ASSERT(packet); 377 ASSERT(packet->mp == NULL); 378 packet->mp = mp; 379 380 /* Try to recycle the tx descriptors again */ 381 if (tx_ring->tbd_avail < (desc_total + 3)) { 382 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry); 383 (void) e1000g_recycle(tx_ring); 384 } 385 386 mutex_enter(&tx_ring->tx_lock); 387 388 /* 389 * If the number of available tx descriptors is not enough for transmit 390 * (one redundant descriptor and one hw checksum context descriptor are 391 * included), then return failure. 392 */ 393 if (tx_ring->tbd_avail < (desc_total + 3)) { 394 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 395 "No Enough Tx descriptors\n"); 396 E1000G_STAT(tx_ring->stat_no_desc); 397 mutex_exit(&tx_ring->tx_lock); 398 goto tx_send_failed; 399 } 400 401 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context); 402 403 mutex_exit(&tx_ring->tx_lock); 404 405 ASSERT(desc_count > 0); 406 407 /* Send successful */ 408 return (B_TRUE); 409 410 tx_send_failed: 411 /* Restore mp to original */ 412 if (new_mp) { 413 if (pre_mp) { 414 pre_mp->b_cont = next_mp; 415 } 416 new_mp->b_cont = NULL; 417 freemsg(new_mp); 418 419 next_mp->b_rptr -= hdr_frag_len; 420 } 421 422 /* 423 * Enable Transmit interrupts, so that the interrupt routine can 424 * call mac_tx_update() when transmit descriptors become available. 425 */ 426 tx_ring->resched_timestamp = ddi_get_lbolt(); 427 tx_ring->resched_needed = B_TRUE; 428 if (!Adapter->tx_intr_enable) 429 e1000g_mask_tx_interrupt(Adapter); 430 431 /* Free pending TxSwPackets */ 432 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 433 while (packet) { 434 packet->mp = NULL; 435 e1000g_free_tx_swpkt(packet); 436 packet = (p_tx_sw_packet_t) 437 QUEUE_GET_NEXT(&pending_list, &packet->Link); 438 } 439 440 /* Return pending TxSwPackets to the "Free" list */ 441 mutex_enter(&tx_ring->freelist_lock); 442 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 443 mutex_exit(&tx_ring->freelist_lock); 444 445 E1000G_STAT(tx_ring->stat_send_fail); 446 447 /* Message will be scheduled for re-transmit */ 448 return (B_FALSE); 449 450 tx_no_resource: 451 /* 452 * Enable Transmit interrupts, so that the interrupt routine can 453 * call mac_tx_update() when transmit descriptors become available. 454 */ 455 tx_ring->resched_timestamp = ddi_get_lbolt(); 456 tx_ring->resched_needed = B_TRUE; 457 if (!Adapter->tx_intr_enable) 458 e1000g_mask_tx_interrupt(Adapter); 459 460 /* Message will be scheduled for re-transmit */ 461 return (B_FALSE); 462 } 463 464 static boolean_t 465 e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context, 466 size_t msg_size) 467 { 468 uintptr_t ip_start; 469 uintptr_t tcp_start; 470 mblk_t *nmp; 471 uint32_t lsoflags; 472 uint32_t mss; 473 474 bzero(cur_context, sizeof (context_data_t)); 475 476 /* first check lso information */ 477 mac_lso_get(mp, &mss, &lsoflags); 478 479 /* retrieve checksum info */ 480 mac_hcksum_get(mp, &cur_context->cksum_start, 481 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); 482 /* retrieve ethernet header size */ 483 if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == 484 htons(ETHERTYPE_VLAN)) 485 cur_context->ether_header_size = 486 sizeof (struct ether_vlan_header); 487 else 488 cur_context->ether_header_size = 489 sizeof (struct ether_header); 490 491 if (lsoflags & HW_LSO) { 492 ASSERT(mss != 0); 493 494 /* free the invalid packet */ 495 if (mss == 0 || 496 !((cur_context->cksum_flags & HCK_PARTIALCKSUM) && 497 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) { 498 return (B_FALSE); 499 } 500 cur_context->mss = (uint16_t)mss; 501 cur_context->lso_flag = B_TRUE; 502 503 /* 504 * Some fields are cleared for the hardware to fill 505 * in. We don't assume Ethernet header, IP header and 506 * TCP header are always in the same mblk fragment, 507 * while we assume each header is always within one 508 * mblk fragment and Ethernet header is always in the 509 * first mblk fragment. 510 */ 511 nmp = mp; 512 ip_start = (uintptr_t)(nmp->b_rptr) 513 + cur_context->ether_header_size; 514 if (ip_start >= (uintptr_t)(nmp->b_wptr)) { 515 ip_start = (uintptr_t)nmp->b_cont->b_rptr 516 + (ip_start - (uintptr_t)(nmp->b_wptr)); 517 nmp = nmp->b_cont; 518 } 519 tcp_start = ip_start + 520 IPH_HDR_LENGTH((ipha_t *)ip_start); 521 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) { 522 tcp_start = (uintptr_t)nmp->b_cont->b_rptr 523 + (tcp_start - (uintptr_t)(nmp->b_wptr)); 524 nmp = nmp->b_cont; 525 } 526 cur_context->hdr_len = cur_context->ether_header_size 527 + IPH_HDR_LENGTH((ipha_t *)ip_start) 528 + TCP_HDR_LENGTH((tcph_t *)tcp_start); 529 ((ipha_t *)ip_start)->ipha_length = 0; 530 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0; 531 /* calculate the TCP packet payload length */ 532 cur_context->pay_len = msg_size - cur_context->hdr_len; 533 } 534 return (B_TRUE); 535 } 536 537 static boolean_t 538 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context) 539 { 540 boolean_t context_reload; 541 context_data_t *pre_context; 542 struct e1000g *Adapter; 543 544 context_reload = B_FALSE; 545 pre_context = &tx_ring->pre_context; 546 Adapter = tx_ring->adapter; 547 548 /* 549 * The following code determine if the context descriptor is 550 * needed to be reloaded. The sequence of the conditions is 551 * made by their possibilities of changing. 552 */ 553 /* 554 * workaround for 82546EB, context descriptor must be reloaded 555 * per LSO/hw_cksum packet if LSO is enabled. 556 */ 557 if (Adapter->lso_premature_issue && 558 Adapter->lso_enable && 559 (cur_context->cksum_flags != 0)) { 560 561 context_reload = B_TRUE; 562 } else if (cur_context->lso_flag) { 563 if ((cur_context->lso_flag != pre_context->lso_flag) || 564 (cur_context->cksum_flags != pre_context->cksum_flags) || 565 (cur_context->pay_len != pre_context->pay_len) || 566 (cur_context->mss != pre_context->mss) || 567 (cur_context->hdr_len != pre_context->hdr_len) || 568 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 569 (cur_context->cksum_start != pre_context->cksum_start) || 570 (cur_context->ether_header_size != 571 pre_context->ether_header_size)) { 572 573 context_reload = B_TRUE; 574 } 575 } else if (cur_context->cksum_flags != 0) { 576 if ((cur_context->lso_flag != pre_context->lso_flag) || 577 (cur_context->cksum_flags != pre_context->cksum_flags) || 578 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 579 (cur_context->cksum_start != pre_context->cksum_start) || 580 (cur_context->ether_header_size != 581 pre_context->ether_header_size)) { 582 583 context_reload = B_TRUE; 584 } 585 } 586 587 return (context_reload); 588 } 589 590 static int 591 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list, 592 context_data_t *cur_context) 593 { 594 struct e1000g *Adapter; 595 struct e1000_hw *hw; 596 p_tx_sw_packet_t first_packet; 597 p_tx_sw_packet_t packet; 598 p_tx_sw_packet_t previous_packet; 599 boolean_t context_reload; 600 struct e1000_tx_desc *first_data_desc; 601 struct e1000_tx_desc *next_desc; 602 struct e1000_tx_desc *descriptor; 603 struct e1000_data_desc zeroed; 604 int desc_count; 605 boolean_t buff_overrun_flag; 606 int i; 607 608 Adapter = tx_ring->adapter; 609 hw = &Adapter->shared; 610 611 desc_count = 0; 612 first_packet = NULL; 613 first_data_desc = NULL; 614 descriptor = NULL; 615 first_packet = NULL; 616 packet = NULL; 617 buff_overrun_flag = B_FALSE; 618 zeroed.upper.data = 0; 619 620 next_desc = tx_ring->tbd_next; 621 622 /* Context descriptor reload check */ 623 context_reload = e1000g_check_context(tx_ring, cur_context); 624 625 if (context_reload) { 626 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 627 628 descriptor = next_desc; 629 630 e1000g_fill_context_descriptor(cur_context, 631 (struct e1000_context_desc *)descriptor); 632 633 /* Check the wrap-around case */ 634 if (descriptor == tx_ring->tbd_last) 635 next_desc = tx_ring->tbd_first; 636 else 637 next_desc++; 638 639 desc_count++; 640 } 641 642 first_data_desc = next_desc; 643 644 /* 645 * According to the documentation, the packet options field (POPTS) is 646 * "ignored except on the first data descriptor of a packet." However, 647 * there is a bug in QEMU (638955) whereby the POPTS field within a 648 * given data descriptor is used to interpret that data descriptor -- 649 * regardless of whether or not the descriptor is the first in a packet 650 * or not. For a packet that spans multiple descriptors, the (virtual) 651 * HW checksum (either TCP/UDP or IP or both) will therefore _not_ be 652 * performed on descriptors after the first, resulting in incorrect 653 * checksums and mysteriously dropped/retransmitted packets. Other 654 * drivers do not have this issue because they (harmlessly) set the 655 * POPTS field on every data descriptor to be the intended options for 656 * the entire packet. To circumvent this QEMU bug, we engage in this 657 * same behavior iff the subsystem vendor and device IDs indicate that 658 * this is an emulated QEMU device (1af4,1100). 659 */ 660 if (hw->subsystem_vendor_id == 0x1af4 && 661 hw->subsystem_device_id == 0x1100 && 662 cur_context->cksum_flags) { 663 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 664 zeroed.upper.fields.popts |= E1000_TXD_POPTS_IXSM; 665 666 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 667 zeroed.upper.fields.popts |= E1000_TXD_POPTS_TXSM; 668 } 669 670 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 671 while (packet) { 672 ASSERT(packet->num_desc); 673 674 for (i = 0; i < packet->num_desc; i++) { 675 ASSERT(tx_ring->tbd_avail > 0); 676 677 descriptor = next_desc; 678 descriptor->buffer_addr = 679 packet->desc[i].address; 680 descriptor->lower.data = 681 packet->desc[i].length; 682 683 /* Zero out status */ 684 descriptor->upper.data = zeroed.upper.data; 685 686 descriptor->lower.data |= 687 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 688 /* must set RS on every outgoing descriptor */ 689 descriptor->lower.data |= 690 E1000_TXD_CMD_RS; 691 692 if (cur_context->lso_flag) 693 descriptor->lower.data |= E1000_TXD_CMD_TSE; 694 695 /* Check the wrap-around case */ 696 if (descriptor == tx_ring->tbd_last) 697 next_desc = tx_ring->tbd_first; 698 else 699 next_desc++; 700 701 desc_count++; 702 703 /* 704 * workaround for 82546EB errata 33, hang in PCI-X 705 * systems due to 2k Buffer Overrun during Transmit 706 * Operation. The workaround applies to all the Intel 707 * PCI-X chips. 708 */ 709 if (hw->bus.type == e1000_bus_type_pcix && 710 descriptor == first_data_desc && 711 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) 712 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) { 713 /* modified the first descriptor */ 714 descriptor->lower.data &= 715 ~E1000G_TBD_LENGTH_MASK; 716 descriptor->lower.flags.length = 717 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 718 719 /* insert a new descriptor */ 720 ASSERT(tx_ring->tbd_avail > 0); 721 next_desc->buffer_addr = 722 packet->desc[0].address + 723 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 724 next_desc->lower.data = 725 packet->desc[0].length - 726 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 727 728 /* Zero out status */ 729 next_desc->upper.data = zeroed.upper.data; 730 731 next_desc->lower.data |= 732 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 733 /* must set RS on every outgoing descriptor */ 734 next_desc->lower.data |= 735 E1000_TXD_CMD_RS; 736 737 if (cur_context->lso_flag) 738 next_desc->lower.data |= 739 E1000_TXD_CMD_TSE; 740 741 descriptor = next_desc; 742 743 /* Check the wrap-around case */ 744 if (next_desc == tx_ring->tbd_last) 745 next_desc = tx_ring->tbd_first; 746 else 747 next_desc++; 748 749 desc_count++; 750 buff_overrun_flag = B_TRUE; 751 } 752 } 753 754 if (buff_overrun_flag) { 755 packet->num_desc++; 756 buff_overrun_flag = B_FALSE; 757 } 758 759 if (first_packet != NULL) { 760 /* 761 * Count the checksum context descriptor for 762 * the first SwPacket. 763 */ 764 first_packet->num_desc++; 765 first_packet = NULL; 766 } 767 768 packet->tickstamp = ddi_get_lbolt64(); 769 770 previous_packet = packet; 771 packet = (p_tx_sw_packet_t) 772 QUEUE_GET_NEXT(pending_list, &packet->Link); 773 } 774 775 /* 776 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back 777 */ 778 if (Adapter->lso_premature_issue && cur_context->lso_flag && 779 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) { 780 /* modified the previous descriptor */ 781 descriptor->lower.data -= 4; 782 783 /* insert a new descriptor */ 784 ASSERT(tx_ring->tbd_avail > 0); 785 /* the lower 20 bits of lower.data is the length field */ 786 next_desc->buffer_addr = 787 descriptor->buffer_addr + 788 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK); 789 next_desc->lower.data = 4; 790 791 /* Zero out status */ 792 next_desc->upper.data = zeroed.upper.data; 793 /* It must be part of a LSO packet */ 794 next_desc->lower.data |= 795 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | 796 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE; 797 798 descriptor = next_desc; 799 800 /* Check the wrap-around case */ 801 if (descriptor == tx_ring->tbd_last) 802 next_desc = tx_ring->tbd_first; 803 else 804 next_desc++; 805 806 desc_count++; 807 /* update the number of descriptors */ 808 previous_packet->num_desc++; 809 } 810 811 ASSERT(descriptor); 812 813 if (cur_context->cksum_flags) { 814 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 815 ((struct e1000_data_desc *)first_data_desc)-> 816 upper.fields.popts |= E1000_TXD_POPTS_IXSM; 817 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 818 ((struct e1000_data_desc *)first_data_desc)-> 819 upper.fields.popts |= E1000_TXD_POPTS_TXSM; 820 } 821 822 /* 823 * Last Descriptor of Packet needs End Of Packet (EOP), Report 824 * Status (RS) set. 825 */ 826 if (Adapter->tx_intr_delay) { 827 descriptor->lower.data |= E1000_TXD_CMD_IDE | 828 E1000_TXD_CMD_EOP; 829 } else { 830 descriptor->lower.data |= E1000_TXD_CMD_EOP; 831 } 832 833 /* Set append Ethernet CRC (IFCS) bits */ 834 if (cur_context->lso_flag) { 835 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS; 836 } else { 837 descriptor->lower.data |= E1000_TXD_CMD_IFCS; 838 } 839 840 /* 841 * Sync the Tx descriptors DMA buffer 842 */ 843 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 844 0, 0, DDI_DMA_SYNC_FORDEV); 845 846 tx_ring->tbd_next = next_desc; 847 848 /* 849 * Advance the Transmit Descriptor Tail (Tdt), this tells the 850 * FX1000 that this frame is available to transmit. 851 */ 852 if (hw->mac.type == e1000_82547) 853 e1000g_82547_tx_move_tail(tx_ring); 854 else 855 E1000_WRITE_REG(hw, E1000_TDT(0), 856 (uint32_t)(next_desc - tx_ring->tbd_first)); 857 858 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 859 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 860 Adapter->e1000g_state |= E1000G_ERROR; 861 } 862 863 /* Put the pending SwPackets to the "Used" list */ 864 mutex_enter(&tx_ring->usedlist_lock); 865 QUEUE_APPEND(&tx_ring->used_list, pending_list); 866 tx_ring->tbd_avail -= desc_count; 867 mutex_exit(&tx_ring->usedlist_lock); 868 869 /* update LSO related data */ 870 if (context_reload) 871 tx_ring->pre_context = *cur_context; 872 873 return (desc_count); 874 } 875 876 /* 877 * e1000g_tx_setup - setup tx data structures 878 * 879 * This routine initializes all of the transmit related 880 * structures. This includes the Transmit descriptors, 881 * and the tx_sw_packet structures. 882 */ 883 void 884 e1000g_tx_setup(struct e1000g *Adapter) 885 { 886 struct e1000_hw *hw; 887 p_tx_sw_packet_t packet; 888 uint32_t i; 889 uint32_t buf_high; 890 uint32_t buf_low; 891 uint32_t reg_tipg; 892 uint32_t reg_tctl; 893 int size; 894 e1000g_tx_ring_t *tx_ring; 895 896 hw = &Adapter->shared; 897 tx_ring = Adapter->tx_ring; 898 899 /* init the lists */ 900 /* 901 * Here we don't need to protect the lists using the 902 * usedlist_lock and freelist_lock, for they have 903 * been protected by the chip_lock. 904 */ 905 QUEUE_INIT_LIST(&tx_ring->used_list); 906 QUEUE_INIT_LIST(&tx_ring->free_list); 907 908 /* Go through and set up each SW_Packet */ 909 packet = tx_ring->packet_area; 910 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) { 911 /* Initialize this tx_sw_apcket area */ 912 e1000g_free_tx_swpkt(packet); 913 /* Add this tx_sw_packet to the free list */ 914 QUEUE_PUSH_TAIL(&tx_ring->free_list, 915 &packet->Link); 916 } 917 918 /* Setup TX descriptor pointers */ 919 tx_ring->tbd_next = tx_ring->tbd_first; 920 tx_ring->tbd_oldest = tx_ring->tbd_first; 921 922 /* 923 * Setup Hardware TX Registers 924 */ 925 /* Setup the Transmit Control Register (TCTL). */ 926 reg_tctl = E1000_READ_REG(hw, E1000_TCTL); 927 reg_tctl |= E1000_TCTL_PSP | E1000_TCTL_EN | 928 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) | 929 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) | 930 E1000_TCTL_RTLC; 931 932 /* Enable the MULR bit */ 933 if (hw->bus.type == e1000_bus_type_pci_express) 934 reg_tctl |= E1000_TCTL_MULR; 935 936 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl); 937 938 /* Setup HW Base and Length of Tx descriptor area */ 939 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc)); 940 E1000_WRITE_REG(hw, E1000_TDLEN(0), size); 941 size = E1000_READ_REG(hw, E1000_TDLEN(0)); 942 943 buf_low = (uint32_t)tx_ring->tbd_dma_addr; 944 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32); 945 946 /* 947 * Write the highest location first and work backward to the lowest. 948 * This is necessary for some adapter types to 949 * prevent write combining from occurring. 950 */ 951 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high); 952 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low); 953 954 /* Setup our HW Tx Head & Tail descriptor pointers */ 955 E1000_WRITE_REG(hw, E1000_TDH(0), 0); 956 E1000_WRITE_REG(hw, E1000_TDT(0), 0); 957 958 /* Set the default values for the Tx Inter Packet Gap timer */ 959 if ((hw->mac.type == e1000_82542) && 960 ((hw->revision_id == E1000_REVISION_2) || 961 (hw->revision_id == E1000_REVISION_3))) { 962 reg_tipg = DEFAULT_82542_TIPG_IPGT; 963 reg_tipg |= 964 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 965 reg_tipg |= 966 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 967 } else if (hw->mac.type == e1000_80003es2lan) { 968 reg_tipg = DEFAULT_82543_TIPG_IPGR1; 969 reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 970 E1000_TIPG_IPGR2_SHIFT; 971 } else { 972 if (hw->phy.media_type == e1000_media_type_fiber) 973 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 974 else 975 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 976 reg_tipg |= 977 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 978 reg_tipg |= 979 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 980 } 981 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg); 982 983 /* Setup Transmit Interrupt Delay Value */ 984 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay); 985 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 986 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay); 987 988 if (hw->mac.type >= e1000_82540) { 989 E1000_WRITE_REG(&Adapter->shared, E1000_TADV, 990 Adapter->tx_intr_abs_delay); 991 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 992 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay); 993 } 994 995 tx_ring->tbd_avail = Adapter->tx_desc_num; 996 997 /* Initialize stored context information */ 998 bzero(&(tx_ring->pre_context), sizeof (context_data_t)); 999 } 1000 1001 /* 1002 * e1000g_recycle - recycle the tx descriptors and tx sw packets 1003 */ 1004 int 1005 e1000g_recycle(e1000g_tx_ring_t *tx_ring) 1006 { 1007 struct e1000g *Adapter; 1008 LIST_DESCRIBER pending_list; 1009 p_tx_sw_packet_t packet; 1010 mblk_t *mp; 1011 mblk_t *nmp; 1012 struct e1000_tx_desc *descriptor; 1013 int desc_count; 1014 int64_t delta; 1015 1016 /* 1017 * This function will examine each TxSwPacket in the 'used' queue 1018 * if the e1000g is done with it then the associated resources (Tx 1019 * Descriptors) will be "freed" and the TxSwPacket will be 1020 * returned to the 'free' queue. 1021 */ 1022 Adapter = tx_ring->adapter; 1023 delta = 0; 1024 1025 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list); 1026 if (packet == NULL) { 1027 Adapter->stall_flag = B_FALSE; 1028 return (0); 1029 } 1030 1031 desc_count = 0; 1032 QUEUE_INIT_LIST(&pending_list); 1033 1034 /* Sync the Tx descriptor DMA buffer */ 1035 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 1036 0, 0, DDI_DMA_SYNC_FORKERNEL); 1037 if (e1000g_check_dma_handle( 1038 tx_ring->tbd_dma_handle) != DDI_FM_OK) { 1039 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 1040 Adapter->e1000g_state |= E1000G_ERROR; 1041 return (0); 1042 } 1043 1044 /* 1045 * While there are still TxSwPackets in the used queue check them 1046 */ 1047 mutex_enter(&tx_ring->usedlist_lock); 1048 while ((packet = 1049 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) != NULL) { 1050 1051 /* 1052 * Get hold of the next descriptor that the e1000g will 1053 * report status back to (this will be the last descriptor 1054 * of a given sw packet). We only want to free the 1055 * sw packet (and it resources) if the e1000g is done 1056 * with ALL of the descriptors. If the e1000g is done 1057 * with the last one then it is done with all of them. 1058 */ 1059 ASSERT(packet->num_desc); 1060 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1); 1061 1062 /* Check for wrap case */ 1063 if (descriptor > tx_ring->tbd_last) 1064 descriptor -= Adapter->tx_desc_num; 1065 1066 /* 1067 * If the descriptor done bit is set free TxSwPacket and 1068 * associated resources 1069 */ 1070 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) { 1071 QUEUE_POP_HEAD(&tx_ring->used_list); 1072 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 1073 1074 if (descriptor == tx_ring->tbd_last) 1075 tx_ring->tbd_oldest = 1076 tx_ring->tbd_first; 1077 else 1078 tx_ring->tbd_oldest = 1079 descriptor + 1; 1080 1081 desc_count += packet->num_desc; 1082 } else { 1083 /* 1084 * Found a sw packet that the e1000g is not done 1085 * with then there is no reason to check the rest 1086 * of the queue. 1087 */ 1088 delta = ddi_get_lbolt64() - packet->tickstamp; 1089 break; 1090 } 1091 } 1092 1093 tx_ring->tbd_avail += desc_count; 1094 Adapter->tx_pkt_cnt += desc_count; 1095 1096 mutex_exit(&tx_ring->usedlist_lock); 1097 1098 if (desc_count == 0) { 1099 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none); 1100 /* 1101 * If the packet hasn't been sent out for seconds and 1102 * the transmitter is not under paused flowctrl condition, 1103 * the transmitter is considered to be stalled. 1104 */ 1105 if ((delta > Adapter->stall_threshold) && 1106 !(E1000_READ_REG(&Adapter->shared, 1107 E1000_STATUS) & E1000_STATUS_TXOFF)) { 1108 Adapter->stall_flag = B_TRUE; 1109 } 1110 return (0); 1111 } 1112 1113 Adapter->stall_flag = B_FALSE; 1114 1115 mp = NULL; 1116 nmp = NULL; 1117 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 1118 ASSERT(packet != NULL); 1119 while (packet != NULL) { 1120 if (packet->mp != NULL) { 1121 ASSERT(packet->mp->b_next == NULL); 1122 /* Assemble the message chain */ 1123 if (mp == NULL) { 1124 mp = packet->mp; 1125 nmp = packet->mp; 1126 } else { 1127 nmp->b_next = packet->mp; 1128 nmp = packet->mp; 1129 } 1130 /* Disconnect the message from the sw packet */ 1131 packet->mp = NULL; 1132 } 1133 1134 /* Free the TxSwPackets */ 1135 e1000g_free_tx_swpkt(packet); 1136 1137 packet = (p_tx_sw_packet_t) 1138 QUEUE_GET_NEXT(&pending_list, &packet->Link); 1139 } 1140 1141 /* Return the TxSwPackets back to the FreeList */ 1142 mutex_enter(&tx_ring->freelist_lock); 1143 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 1144 mutex_exit(&tx_ring->freelist_lock); 1145 1146 if (mp != NULL) 1147 freemsgchain(mp); 1148 1149 return (desc_count); 1150 } 1151 /* 1152 * 82544 Coexistence issue workaround: 1153 * There are 2 issues. 1154 * 1. If a 32 bit split completion happens from P64H2 and another 1155 * agent drives a 64 bit request/split completion after ONLY 1156 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then 1157 * 82544 has a problem where in to clock all the data in, it 1158 * looks at REQ64# signal and since it has changed so fast (i.e. 1 1159 * idle clock turn around), it will fail to clock all the data in. 1160 * Data coming from certain ending addresses has exposure to this issue. 1161 * 1162 * To detect this issue, following equation can be used... 1163 * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. 1164 * If SUM[3:0] is in between 1 to 4, we will have this issue. 1165 * 1166 * ROOT CAUSE: 1167 * The erratum involves the 82544 PCIX elasticity FIFO implementations as 1168 * 64-bit FIFO's and flushing of the final partial-bytes corresponding 1169 * to the end of a requested read burst. Under a specific burst condition 1170 * of ending-data alignment and 32-byte split-completions, the final 1171 * byte(s) of split-completion data require an extra clock cycle to flush 1172 * into 64-bit FIFO orientation. An incorrect logic dependency on the 1173 * REQ64# signal occurring during during this clock cycle may cause the 1174 * residual byte(s) to be lost, thereby rendering the internal DMA client 1175 * forever awaiting the final byte(s) for an outbound data-fetch. The 1176 * erratum is confirmed to *only* occur if certain subsequent external 1177 * 64-bit PCIX bus transactions occur immediately (minimum possible bus 1178 * turn- around) following the odd-aligned 32-bit split-completion 1179 * containing the final byte(s). Intel has confirmed that this has been 1180 * seen only with chipset/bridges which have the capability to provide 1181 * 32-bit split-completion data, and in the presence of newer PCIX bus 1182 * agents which fully-optimize the inter-transaction turn-around (zero 1183 * additional initiator latency when pre-granted bus ownership). 1184 * 1185 * This issue does not exist in PCI bus mode, when any agent is operating 1186 * in 32 bit only mode or on chipsets that do not do 32 bit split 1187 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does 1188 * 32 bit split completions for any read request that has bit 2 set to 1 1189 * for the requested address and read request size is more than 8 bytes. 1190 * 1191 * 2. Another issue is related to 82544 driving DACs under the similar 1192 * scenario (32 bit split completion followed by 64 bit transaction with 1193 * only 1 cycle turnaround). This issue is still being root caused. We 1194 * think that both of these issues can be avoided if following workaround 1195 * is implemented. It seems DAC issues is related to ending addresses being 1196 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity 1197 * FIFO which does not get flushed due to REQ64# dependency. We will only 1198 * know the full story after it has been simulated successfully by HW team. 1199 * 1200 * WORKAROUND: 1201 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC) 1202 */ 1203 static uint32_t 1204 e1000g_fill_82544_desc(uint64_t address, 1205 size_t length, p_desc_array_t desc_array) 1206 { 1207 /* 1208 * Since issue is sensitive to length and address. 1209 * Let us first check the address... 1210 */ 1211 uint32_t safe_terminator; 1212 1213 if (length <= 4) { 1214 desc_array->descriptor[0].address = address; 1215 desc_array->descriptor[0].length = (uint32_t)length; 1216 desc_array->elements = 1; 1217 return (desc_array->elements); 1218 } 1219 safe_terminator = 1220 (uint32_t)((((uint32_t)address & 0x7) + 1221 (length & 0xF)) & 0xF); 1222 /* 1223 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then 1224 * return 1225 */ 1226 if (safe_terminator == 0 || 1227 (safe_terminator > 4 && safe_terminator < 9) || 1228 (safe_terminator > 0xC && safe_terminator <= 0xF)) { 1229 desc_array->descriptor[0].address = address; 1230 desc_array->descriptor[0].length = (uint32_t)length; 1231 desc_array->elements = 1; 1232 return (desc_array->elements); 1233 } 1234 1235 desc_array->descriptor[0].address = address; 1236 desc_array->descriptor[0].length = length - 4; 1237 desc_array->descriptor[1].address = address + (length - 4); 1238 desc_array->descriptor[1].length = 4; 1239 desc_array->elements = 2; 1240 return (desc_array->elements); 1241 } 1242 1243 static int 1244 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, 1245 mblk_t *mp, boolean_t tx_undersize_flag) 1246 { 1247 size_t len; 1248 size_t len1; 1249 dma_buffer_t *tx_buf; 1250 mblk_t *nmp; 1251 boolean_t finished; 1252 int desc_count; 1253 1254 desc_count = 0; 1255 tx_buf = packet->tx_buf; 1256 len = MBLKL(mp); 1257 1258 ASSERT((tx_buf->len + len) <= tx_buf->size); 1259 1260 if (len > 0) { 1261 bcopy(mp->b_rptr, 1262 tx_buf->address + tx_buf->len, 1263 len); 1264 tx_buf->len += len; 1265 1266 packet->num_mblk_frag++; 1267 } 1268 1269 nmp = mp->b_cont; 1270 if (nmp == NULL) { 1271 finished = B_TRUE; 1272 } else { 1273 len1 = MBLKL(nmp); 1274 if ((tx_buf->len + len1) > tx_buf->size) 1275 finished = B_TRUE; 1276 else if (tx_undersize_flag) 1277 finished = B_FALSE; 1278 else if (len1 > tx_ring->adapter->tx_bcopy_thresh) 1279 finished = B_TRUE; 1280 else 1281 finished = B_FALSE; 1282 } 1283 1284 if (finished) { 1285 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy, 1286 (tx_buf->len > len)); 1287 1288 /* 1289 * If the packet is smaller than 64 bytes, which is the 1290 * minimum ethernet packet size, pad the packet to make 1291 * it at least 60 bytes. The hardware will add 4 bytes 1292 * for CRC. 1293 */ 1294 if (tx_undersize_flag) { 1295 ASSERT(tx_buf->len < ETHERMIN); 1296 1297 bzero(tx_buf->address + tx_buf->len, 1298 ETHERMIN - tx_buf->len); 1299 tx_buf->len = ETHERMIN; 1300 } 1301 1302 #ifdef __sparc 1303 if (packet->dma_type == USE_DVMA) 1304 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV); 1305 else 1306 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1307 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1308 #else 1309 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1310 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1311 #endif 1312 1313 packet->data_transfer_type = USE_BCOPY; 1314 1315 desc_count = e1000g_fill_tx_desc(tx_ring, 1316 packet, 1317 tx_buf->dma_address, 1318 tx_buf->len); 1319 1320 if (desc_count <= 0) 1321 return (-1); 1322 } 1323 1324 return (desc_count); 1325 } 1326 1327 static int 1328 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp) 1329 { 1330 int j; 1331 int mystat; 1332 size_t len; 1333 ddi_dma_cookie_t dma_cookie; 1334 uint_t ncookies; 1335 int desc_count; 1336 uint32_t desc_total; 1337 1338 desc_total = 0; 1339 len = MBLKL(mp); 1340 1341 /* 1342 * ddi_dma_addr_bind_handle() allocates DMA resources for a 1343 * memory object such that a device can perform DMA to or from 1344 * the object. DMA resources are allocated considering the 1345 * device's DMA attributes as expressed by ddi_dma_attr(9S) 1346 * (see ddi_dma_alloc_handle(9F)). 1347 * 1348 * ddi_dma_addr_bind_handle() fills in the first DMA cookie 1349 * pointed to by cookiep with the appropriate address, length, 1350 * and bus type. *ccountp is set to the number of DMA cookies 1351 * representing this DMA object. Subsequent DMA cookies must be 1352 * retrieved by calling ddi_dma_nextcookie(9F) the number of 1353 * times specified by *countp - 1. 1354 */ 1355 switch (packet->dma_type) { 1356 #ifdef __sparc 1357 case USE_DVMA: 1358 dvma_kaddr_load(packet->tx_dma_handle, 1359 (caddr_t)mp->b_rptr, len, 0, &dma_cookie); 1360 1361 dvma_sync(packet->tx_dma_handle, 0, 1362 DDI_DMA_SYNC_FORDEV); 1363 1364 ncookies = 1; 1365 packet->data_transfer_type = USE_DVMA; 1366 break; 1367 #endif 1368 case USE_DMA: 1369 if ((mystat = ddi_dma_addr_bind_handle( 1370 packet->tx_dma_handle, NULL, 1371 (caddr_t)mp->b_rptr, len, 1372 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1373 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1374 &ncookies)) != DDI_DMA_MAPPED) { 1375 1376 e1000g_log(tx_ring->adapter, CE_WARN, 1377 "Couldn't bind mblk buffer to Tx DMA handle: " 1378 "return: %X, Pkt: %X\n", 1379 mystat, packet); 1380 return (-1); 1381 } 1382 1383 /* 1384 * An implicit ddi_dma_sync() is done when the 1385 * ddi_dma_addr_bind_handle() is called. So we 1386 * don't need to explicitly call ddi_dma_sync() 1387 * here any more. 1388 */ 1389 ASSERT(ncookies); 1390 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie, 1391 (ncookies > 1)); 1392 1393 /* 1394 * The data_transfer_type value must be set after the handle 1395 * has been bound, for it will be used in e1000g_free_tx_swpkt() 1396 * to decide whether we need to unbind the handle. 1397 */ 1398 packet->data_transfer_type = USE_DMA; 1399 break; 1400 default: 1401 ASSERT(B_FALSE); 1402 break; 1403 } 1404 1405 packet->num_mblk_frag++; 1406 1407 /* 1408 * Each address could span thru multpile cookie.. 1409 * Each cookie will have one descriptor 1410 */ 1411 for (j = ncookies; j != 0; j--) { 1412 1413 desc_count = e1000g_fill_tx_desc(tx_ring, 1414 packet, 1415 dma_cookie.dmac_laddress, 1416 dma_cookie.dmac_size); 1417 1418 if (desc_count <= 0) 1419 return (-1); 1420 1421 desc_total += desc_count; 1422 1423 /* 1424 * ddi_dma_nextcookie() retrieves subsequent DMA 1425 * cookies for a DMA object. 1426 * ddi_dma_nextcookie() fills in the 1427 * ddi_dma_cookie(9S) structure pointed to by 1428 * cookiep. The ddi_dma_cookie(9S) structure 1429 * must be allocated prior to calling 1430 * ddi_dma_nextcookie(). The DMA cookie count 1431 * returned by ddi_dma_buf_bind_handle(9F), 1432 * ddi_dma_addr_bind_handle(9F), or 1433 * ddi_dma_getwin(9F) indicates the number of DMA 1434 * cookies a DMA object consists of. If the 1435 * resulting cookie count, N, is larger than 1, 1436 * ddi_dma_nextcookie() must be called N-1 times 1437 * to retrieve all DMA cookies. 1438 */ 1439 if (j > 1) { 1440 ddi_dma_nextcookie(packet->tx_dma_handle, 1441 &dma_cookie); 1442 } 1443 } 1444 1445 return (desc_total); 1446 } 1447 1448 static void 1449 e1000g_fill_context_descriptor(context_data_t *cur_context, 1450 struct e1000_context_desc *context_desc) 1451 { 1452 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) { 1453 context_desc->lower_setup.ip_fields.ipcss = 1454 cur_context->ether_header_size; 1455 context_desc->lower_setup.ip_fields.ipcso = 1456 cur_context->ether_header_size + 1457 offsetof(struct ip, ip_sum); 1458 context_desc->lower_setup.ip_fields.ipcse = 1459 cur_context->ether_header_size + 1460 cur_context->cksum_start - 1; 1461 } else 1462 context_desc->lower_setup.ip_config = 0; 1463 1464 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) { 1465 /* 1466 * The packet with same protocol has the following 1467 * stuff and start offset: 1468 * | Protocol | Stuff | Start | Checksum 1469 * | | Offset | Offset | Enable 1470 * | IPv4 + TCP | 0x24 | 0x14 | Yes 1471 * | IPv4 + UDP | 0x1A | 0x14 | Yes 1472 * | IPv6 + TCP | 0x20 | 0x10 | No 1473 * | IPv6 + UDP | 0x14 | 0x10 | No 1474 */ 1475 context_desc->upper_setup.tcp_fields.tucss = 1476 cur_context->cksum_start + cur_context->ether_header_size; 1477 context_desc->upper_setup.tcp_fields.tucso = 1478 cur_context->cksum_stuff + cur_context->ether_header_size; 1479 context_desc->upper_setup.tcp_fields.tucse = 0; 1480 } else 1481 context_desc->upper_setup.tcp_config = 0; 1482 1483 if (cur_context->lso_flag) { 1484 context_desc->tcp_seg_setup.fields.mss = cur_context->mss; 1485 context_desc->tcp_seg_setup.fields.hdr_len = 1486 cur_context->hdr_len; 1487 /* 1488 * workaround for 82546EB errata 23, status-writeback 1489 * reporting (RS) should not be set on context or 1490 * Null descriptors 1491 */ 1492 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1493 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP 1494 | E1000_TXD_DTYP_C | cur_context->pay_len; 1495 } else { 1496 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1497 | E1000_TXD_DTYP_C; 1498 /* 1499 * Zero out the options for TCP Segmentation Offload 1500 */ 1501 context_desc->tcp_seg_setup.data = 0; 1502 } 1503 } 1504 1505 static int 1506 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring, 1507 p_tx_sw_packet_t packet, uint64_t address, size_t size) 1508 { 1509 struct e1000_hw *hw = &tx_ring->adapter->shared; 1510 p_sw_desc_t desc; 1511 1512 if (hw->mac.type == e1000_82544) { 1513 if (hw->bus.type == e1000_bus_type_pcix) 1514 return (e1000g_tx_workaround_PCIX_82544(packet, 1515 address, size)); 1516 1517 if (size > JUMBO_FRAG_LENGTH) 1518 return (e1000g_tx_workaround_jumbo_82544(packet, 1519 address, size)); 1520 } 1521 1522 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1523 1524 desc = &packet->desc[packet->num_desc]; 1525 desc->address = address; 1526 desc->length = (uint32_t)size; 1527 1528 packet->num_desc++; 1529 1530 return (1); 1531 } 1532 1533 static int 1534 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet, 1535 uint64_t address, size_t size) 1536 { 1537 p_sw_desc_t desc; 1538 int desc_count; 1539 long size_left; 1540 size_t len; 1541 uint32_t counter; 1542 uint32_t array_elements; 1543 desc_array_t desc_array; 1544 1545 /* 1546 * Coexist Workaround for cordova: RP: 07/04/03 1547 * 1548 * RP: ERRATA: Workaround ISSUE: 1549 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup 1550 * Eachbuffer in to 8kb pieces until the 1551 * remainder is < 8kb 1552 */ 1553 size_left = size; 1554 desc_count = 0; 1555 1556 while (size_left > 0) { 1557 if (size_left > MAX_TX_BUF_SIZE) 1558 len = MAX_TX_BUF_SIZE; 1559 else 1560 len = size_left; 1561 1562 array_elements = e1000g_fill_82544_desc(address, 1563 len, &desc_array); 1564 1565 for (counter = 0; counter < array_elements; counter++) { 1566 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1567 /* 1568 * Put in the buffer address 1569 */ 1570 desc = &packet->desc[packet->num_desc]; 1571 1572 desc->address = 1573 desc_array.descriptor[counter].address; 1574 desc->length = 1575 desc_array.descriptor[counter].length; 1576 1577 packet->num_desc++; 1578 desc_count++; 1579 } /* for */ 1580 1581 /* 1582 * Update the buffer address and length 1583 */ 1584 address += MAX_TX_BUF_SIZE; 1585 size_left -= MAX_TX_BUF_SIZE; 1586 } /* while */ 1587 1588 return (desc_count); 1589 } 1590 1591 static int 1592 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet, 1593 uint64_t address, size_t size) 1594 { 1595 p_sw_desc_t desc; 1596 int desc_count; 1597 long size_left; 1598 uint32_t offset; 1599 1600 /* 1601 * Workaround for Jumbo Frames on Cordova 1602 * PSD 06/01/2001 1603 */ 1604 size_left = size; 1605 desc_count = 0; 1606 offset = 0; 1607 while (size_left > 0) { 1608 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1609 1610 desc = &packet->desc[packet->num_desc]; 1611 1612 desc->address = address + offset; 1613 1614 if (size_left > JUMBO_FRAG_LENGTH) 1615 desc->length = JUMBO_FRAG_LENGTH; 1616 else 1617 desc->length = (uint32_t)size_left; 1618 1619 packet->num_desc++; 1620 desc_count++; 1621 1622 offset += desc->length; 1623 size_left -= JUMBO_FRAG_LENGTH; 1624 } 1625 1626 return (desc_count); 1627 } 1628 1629 static void 1630 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring) 1631 { 1632 struct e1000_hw *hw; 1633 uint16_t hw_tdt; 1634 uint16_t sw_tdt; 1635 struct e1000_tx_desc *tx_desc; 1636 uint16_t length = 0; 1637 boolean_t eop = B_FALSE; 1638 struct e1000g *Adapter; 1639 1640 Adapter = tx_ring->adapter; 1641 hw = &Adapter->shared; 1642 1643 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0)); 1644 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first; 1645 1646 while (hw_tdt != sw_tdt) { 1647 tx_desc = &(tx_ring->tbd_first[hw_tdt]); 1648 length += tx_desc->lower.flags.length; 1649 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; 1650 if (++hw_tdt == Adapter->tx_desc_num) 1651 hw_tdt = 0; 1652 1653 if (eop) { 1654 if ((Adapter->link_duplex == HALF_DUPLEX) && 1655 (e1000_fifo_workaround_82547(hw, length) 1656 != E1000_SUCCESS)) { 1657 if (tx_ring->timer_enable_82547) { 1658 ASSERT(tx_ring->timer_id_82547 == 0); 1659 tx_ring->timer_id_82547 = 1660 timeout(e1000g_82547_timeout, 1661 (void *)tx_ring, 1662 drv_usectohz(10000)); 1663 } 1664 return; 1665 1666 } else { 1667 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt); 1668 e1000_update_tx_fifo_head_82547(hw, length); 1669 length = 0; 1670 } 1671 } 1672 } 1673 } 1674 1675 static void 1676 e1000g_82547_timeout(void *arg) 1677 { 1678 e1000g_tx_ring_t *tx_ring; 1679 1680 tx_ring = (e1000g_tx_ring_t *)arg; 1681 1682 mutex_enter(&tx_ring->tx_lock); 1683 1684 tx_ring->timer_id_82547 = 0; 1685 e1000g_82547_tx_move_tail_work(tx_ring); 1686 1687 mutex_exit(&tx_ring->tx_lock); 1688 } 1689 1690 static void 1691 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring) 1692 { 1693 timeout_id_t tid; 1694 1695 ASSERT(MUTEX_HELD(&tx_ring->tx_lock)); 1696 1697 tid = tx_ring->timer_id_82547; 1698 tx_ring->timer_id_82547 = 0; 1699 if (tid != 0) { 1700 tx_ring->timer_enable_82547 = B_FALSE; 1701 mutex_exit(&tx_ring->tx_lock); 1702 1703 (void) untimeout(tid); 1704 1705 mutex_enter(&tx_ring->tx_lock); 1706 } 1707 tx_ring->timer_enable_82547 = B_TRUE; 1708 e1000g_82547_tx_move_tail_work(tx_ring); 1709 } 1710 1711 /* 1712 * This is part of a workaround for the I219, see e1000g_flush_desc_rings() for 1713 * more information. 1714 * 1715 * We need to clear any potential pending descriptors from the tx_ring. As 1716 * we're about to reset the device, we don't care about the data that we give it 1717 * itself. 1718 */ 1719 void 1720 e1000g_flush_tx_ring(struct e1000g *Adapter) 1721 { 1722 struct e1000_hw *hw = &Adapter->shared; 1723 e1000g_tx_ring_t *tx_ring = &Adapter->tx_ring[0]; 1724 uint32_t tctl, txd_lower = E1000_TXD_CMD_IFCS; 1725 uint16_t size = 512; 1726 struct e1000_tx_desc *desc; 1727 1728 tctl = E1000_READ_REG(hw, E1000_TCTL); 1729 E1000_WRITE_REG(hw, E1000_TCTL, tctl | E1000_TCTL_EN); 1730 1731 desc = tx_ring->tbd_next; 1732 if (tx_ring->tbd_next == tx_ring->tbd_last) 1733 tx_ring->tbd_next = tx_ring->tbd_first; 1734 else 1735 tx_ring->tbd_next++; 1736 1737 /* We just need to set any valid address, so we use the ring itself */ 1738 desc->buffer_addr = tx_ring->tbd_dma_addr; 1739 desc->lower.data = LE_32(txd_lower | size); 1740 desc->upper.data = 0; 1741 1742 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 1743 0, 0, DDI_DMA_SYNC_FORDEV); 1744 E1000_WRITE_REG(hw, E1000_TDT(0), 1745 (uint32_t)(tx_ring->tbd_next - tx_ring->tbd_first)); 1746 (void) E1000_READ_REG(hw, E1000_STATUS); 1747 usec_delay(250); 1748 } 1749