1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2008 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms of the CDDLv1. 24 */ 25 26 /* 27 * ********************************************************************** 28 * * 29 * Module Name: * 30 * e1000g_tx.c * 31 * * 32 * Abstract: * 33 * This file contains some routines that take care of Transmit, * 34 * make the hardware to send the data pointed by the packet out * 35 * on to the physical medium. * 36 * * 37 * ********************************************************************** 38 */ 39 40 #include "e1000g_sw.h" 41 #include "e1000g_debug.h" 42 43 static boolean_t e1000g_send(struct e1000g *, mblk_t *); 44 static int e1000g_tx_copy(e1000g_tx_ring_t *, 45 p_tx_sw_packet_t, mblk_t *, boolean_t); 46 static int e1000g_tx_bind(e1000g_tx_ring_t *, 47 p_tx_sw_packet_t, mblk_t *); 48 static boolean_t e1000g_retreive_context(mblk_t *, context_data_t *, size_t); 49 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *); 50 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *, 51 context_data_t *); 52 static void e1000g_fill_context_descriptor(context_data_t *, 53 struct e1000_context_desc *); 54 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *, 55 p_tx_sw_packet_t, uint64_t, size_t); 56 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length, 57 p_desc_array_t desc_array); 58 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t); 59 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t); 60 static void e1000g_82547_timeout(void *); 61 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *); 62 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *); 63 64 #ifndef E1000G_DEBUG 65 #pragma inline(e1000g_tx_copy) 66 #pragma inline(e1000g_tx_bind) 67 #pragma inline(e1000g_retreive_context) 68 #pragma inline(e1000g_check_context) 69 #pragma inline(e1000g_fill_tx_ring) 70 #pragma inline(e1000g_fill_context_descriptor) 71 #pragma inline(e1000g_fill_tx_desc) 72 #pragma inline(e1000g_fill_82544_desc) 73 #pragma inline(e1000g_tx_workaround_PCIX_82544) 74 #pragma inline(e1000g_tx_workaround_jumbo_82544) 75 #pragma inline(e1000g_free_tx_swpkt) 76 #endif 77 78 /* 79 * e1000g_free_tx_swpkt - free up the tx sw packet 80 * 81 * Unbind the previously bound DMA handle for a given 82 * transmit sw packet. And reset the sw packet data. 83 */ 84 void 85 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet) 86 { 87 switch (packet->data_transfer_type) { 88 case USE_BCOPY: 89 packet->tx_buf->len = 0; 90 break; 91 #ifdef __sparc 92 case USE_DVMA: 93 dvma_unload(packet->tx_dma_handle, 0, -1); 94 break; 95 #endif 96 case USE_DMA: 97 (void) ddi_dma_unbind_handle(packet->tx_dma_handle); 98 break; 99 default: 100 break; 101 } 102 103 /* 104 * The mblk has been stripped off the sw packet 105 * and will be freed in a triggered soft intr. 106 */ 107 ASSERT(packet->mp == NULL); 108 109 packet->data_transfer_type = USE_NONE; 110 packet->num_mblk_frag = 0; 111 packet->num_desc = 0; 112 } 113 114 mblk_t * 115 e1000g_m_tx(void *arg, mblk_t *mp) 116 { 117 struct e1000g *Adapter = (struct e1000g *)arg; 118 mblk_t *next; 119 120 rw_enter(&Adapter->chip_lock, RW_READER); 121 122 if ((Adapter->chip_state != E1000G_START) || 123 (Adapter->link_state != LINK_STATE_UP)) { 124 freemsgchain(mp); 125 mp = NULL; 126 } 127 128 while (mp != NULL) { 129 next = mp->b_next; 130 mp->b_next = NULL; 131 132 if (!e1000g_send(Adapter, mp)) { 133 mp->b_next = next; 134 break; 135 } 136 137 mp = next; 138 } 139 140 rw_exit(&Adapter->chip_lock); 141 return (mp); 142 } 143 144 /* 145 * e1000g_send - send packets onto the wire 146 * 147 * Called from e1000g_m_tx with an mblk ready to send. this 148 * routine sets up the transmit descriptors and sends data to 149 * the wire. It also pushes the just transmitted packet to 150 * the used tx sw packet list. 151 */ 152 static boolean_t 153 e1000g_send(struct e1000g *Adapter, mblk_t *mp) 154 { 155 p_tx_sw_packet_t packet; 156 LIST_DESCRIBER pending_list; 157 size_t len; 158 size_t msg_size; 159 uint32_t frag_count; 160 int desc_count; 161 uint32_t desc_total; 162 boolean_t tx_undersize_flag; 163 mblk_t *nmp; 164 mblk_t *tmp; 165 e1000g_tx_ring_t *tx_ring; 166 context_data_t cur_context; 167 168 tx_ring = Adapter->tx_ring; 169 170 /* Get the total size and frags number of the message */ 171 tx_undersize_flag = B_FALSE; 172 frag_count = 0; 173 msg_size = 0; 174 for (nmp = mp; nmp; nmp = nmp->b_cont) { 175 frag_count++; 176 msg_size += MBLKL(nmp); 177 } 178 179 /* retreive and compute information for context descriptor */ 180 if (!e1000g_retreive_context(mp, &cur_context, msg_size)) { 181 freemsg(mp); 182 return (B_TRUE); 183 } 184 185 /* 186 * Make sure the packet is less than the allowed size 187 */ 188 if (!cur_context.lso_flag && 189 (msg_size > Adapter->max_frame_size - ETHERFCSL)) { 190 /* 191 * For the over size packet, we'll just drop it. 192 * So we return B_TRUE here. 193 */ 194 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL, 195 "Tx packet out of bound. length = %d \n", msg_size); 196 E1000G_STAT(tx_ring->stat_over_size); 197 freemsg(mp); 198 return (B_TRUE); 199 } 200 201 /* 202 * Check and reclaim tx descriptors. 203 * This low water mark check should be done all the time as 204 * Transmit interrupt delay can produce Transmit interrupts little 205 * late and that may cause few problems related to reaping Tx 206 * Descriptors... As you may run short of them before getting any 207 * transmit interrupt... 208 */ 209 if (tx_ring->resched_needed || 210 (tx_ring->tbd_avail < Adapter->tx_recycle_thresh)) { 211 (void) e1000g_recycle(tx_ring); 212 E1000G_DEBUG_STAT(tx_ring->stat_recycle); 213 214 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 215 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc); 216 goto tx_no_resource; 217 } 218 } 219 220 /* 221 * If the message size is less than the minimum ethernet packet size, 222 * we'll use bcopy to send it, and padd it to 60 bytes later. 223 */ 224 if (msg_size < ETHERMIN) { 225 E1000G_DEBUG_STAT(tx_ring->stat_under_size); 226 tx_undersize_flag = B_TRUE; 227 } 228 229 /* Initialize variables */ 230 desc_count = 1; /* The initial value should be greater than 0 */ 231 desc_total = 0; 232 QUEUE_INIT_LIST(&pending_list); 233 234 /* Process each mblk fragment and fill tx descriptors */ 235 packet = NULL; 236 nmp = mp; 237 while (nmp) { 238 tmp = nmp->b_cont; 239 240 len = MBLKL(nmp); 241 /* Check zero length mblks */ 242 if (len == 0) { 243 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags); 244 /* 245 * If there're no packet buffers have been used, 246 * or we just completed processing a buffer, then 247 * skip the empty mblk fragment. 248 * Otherwise, there's still a pending buffer that 249 * needs to be processed (tx_copy). 250 */ 251 if (desc_count > 0) { 252 nmp = tmp; 253 continue; 254 } 255 } 256 257 /* 258 * Get a new TxSwPacket to process mblk buffers. 259 */ 260 if (desc_count > 0) { 261 mutex_enter(&tx_ring->freelist_lock); 262 packet = (p_tx_sw_packet_t) 263 QUEUE_POP_HEAD(&tx_ring->free_list); 264 mutex_exit(&tx_ring->freelist_lock); 265 266 if (packet == NULL) { 267 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 268 "No Tx SwPacket available\n"); 269 E1000G_STAT(tx_ring->stat_no_swpkt); 270 goto tx_send_failed; 271 } 272 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 273 } 274 275 ASSERT(packet); 276 /* 277 * If the size of the fragment is less than the tx_bcopy_thresh 278 * we'll use bcopy; Otherwise, we'll use DMA binding. 279 */ 280 if ((len <= Adapter->tx_bcopy_thresh) || tx_undersize_flag) { 281 desc_count = 282 e1000g_tx_copy(tx_ring, packet, nmp, 283 tx_undersize_flag); 284 E1000G_DEBUG_STAT(tx_ring->stat_copy); 285 } else { 286 desc_count = 287 e1000g_tx_bind(tx_ring, packet, nmp); 288 E1000G_DEBUG_STAT(tx_ring->stat_bind); 289 } 290 291 if (desc_count > 0) 292 desc_total += desc_count; 293 else if (desc_count < 0) 294 goto tx_send_failed; 295 296 nmp = tmp; 297 } 298 299 /* Assign the message to the last sw packet */ 300 ASSERT(packet); 301 ASSERT(packet->mp == NULL); 302 packet->mp = mp; 303 304 /* Try to recycle the tx descriptors again */ 305 if (tx_ring->tbd_avail < (desc_total + 2)) { 306 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry); 307 (void) e1000g_recycle(tx_ring); 308 } 309 310 mutex_enter(&tx_ring->tx_lock); 311 312 /* 313 * If the number of available tx descriptors is not enough for transmit 314 * (one redundant descriptor and one hw checksum context descriptor are 315 * included), then return failure. 316 */ 317 if (tx_ring->tbd_avail < (desc_total + 2)) { 318 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 319 "No Enough Tx descriptors\n"); 320 E1000G_STAT(tx_ring->stat_no_desc); 321 mutex_exit(&tx_ring->tx_lock); 322 goto tx_send_failed; 323 } 324 325 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context); 326 327 mutex_exit(&tx_ring->tx_lock); 328 329 ASSERT(desc_count > 0); 330 331 /* Send successful */ 332 return (B_TRUE); 333 334 tx_send_failed: 335 /* 336 * Enable Transmit interrupts, so that the interrupt routine can 337 * call mac_tx_update() when transmit descriptors become available. 338 */ 339 tx_ring->resched_needed = B_TRUE; 340 if (!Adapter->tx_intr_enable) 341 e1000g_mask_tx_interrupt(Adapter); 342 343 /* Free pending TxSwPackets */ 344 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 345 while (packet) { 346 packet->mp = NULL; 347 e1000g_free_tx_swpkt(packet); 348 packet = (p_tx_sw_packet_t) 349 QUEUE_GET_NEXT(&pending_list, &packet->Link); 350 } 351 352 /* Return pending TxSwPackets to the "Free" list */ 353 mutex_enter(&tx_ring->freelist_lock); 354 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 355 mutex_exit(&tx_ring->freelist_lock); 356 357 E1000G_STAT(tx_ring->stat_send_fail); 358 359 /* Message will be scheduled for re-transmit */ 360 return (B_FALSE); 361 362 tx_no_resource: 363 /* 364 * Enable Transmit interrupts, so that the interrupt routine can 365 * call mac_tx_update() when transmit descriptors become available. 366 */ 367 tx_ring->resched_needed = B_TRUE; 368 if (!Adapter->tx_intr_enable) 369 e1000g_mask_tx_interrupt(Adapter); 370 371 /* Message will be scheduled for re-transmit */ 372 return (B_FALSE); 373 } 374 375 static boolean_t 376 e1000g_retreive_context(mblk_t *mp, context_data_t *cur_context, 377 size_t msg_size) 378 { 379 uintptr_t ip_start; 380 uintptr_t tcp_start; 381 mblk_t *nmp; 382 383 bzero(cur_context, sizeof (context_data_t)); 384 385 /* retrieve checksum info */ 386 hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, 387 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); 388 /* retreive ethernet header size */ 389 if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == 390 htons(ETHERTYPE_VLAN)) 391 cur_context->ether_header_size = 392 sizeof (struct ether_vlan_header); 393 else 394 cur_context->ether_header_size = 395 sizeof (struct ether_header); 396 397 if (cur_context->cksum_flags & HW_LSO) { 398 if ((cur_context->mss = DB_LSOMSS(mp)) != 0) { 399 /* free the invaid packet */ 400 if (!((cur_context->cksum_flags & HCK_PARTIALCKSUM) && 401 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) { 402 return (B_FALSE); 403 } 404 cur_context->lso_flag = B_TRUE; 405 /* 406 * Some fields are cleared for the hardware to fill 407 * in. We don't assume Ethernet header, IP header and 408 * TCP header are always in the same mblk fragment, 409 * while we assume each header is always within one 410 * mblk fragment and Ethernet header is always in the 411 * first mblk fragment. 412 */ 413 nmp = mp; 414 ip_start = (uintptr_t)(nmp->b_rptr) 415 + cur_context->ether_header_size; 416 if (ip_start >= (uintptr_t)(nmp->b_wptr)) { 417 ip_start = (uintptr_t)nmp->b_cont->b_rptr 418 + (ip_start - (uintptr_t)(nmp->b_wptr)); 419 nmp = nmp->b_cont; 420 } 421 tcp_start = ip_start + 422 IPH_HDR_LENGTH((ipha_t *)ip_start); 423 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) { 424 tcp_start = (uintptr_t)nmp->b_cont->b_rptr 425 + (tcp_start - (uintptr_t)(nmp->b_wptr)); 426 nmp = nmp->b_cont; 427 } 428 cur_context->hdr_len = cur_context->ether_header_size 429 + IPH_HDR_LENGTH((ipha_t *)ip_start) 430 + TCP_HDR_LENGTH((tcph_t *)tcp_start); 431 ((ipha_t *)ip_start)->ipha_length = 0; 432 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0; 433 /* calculate the TCP packet payload length */ 434 cur_context->pay_len = msg_size - cur_context->hdr_len; 435 } 436 } 437 return (B_TRUE); 438 } 439 440 static boolean_t 441 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context) 442 { 443 boolean_t context_reload; 444 context_data_t *pre_context; 445 struct e1000g *Adapter; 446 447 context_reload = B_FALSE; 448 pre_context = &tx_ring->pre_context; 449 Adapter = tx_ring->adapter; 450 451 /* 452 * The following code determine if the context descriptor is 453 * needed to be reloaded. The sequence of the conditions is 454 * made by their possibilities of changing. 455 */ 456 /* 457 * workaround for 82546EB, context descriptor must be reloaded 458 * per LSO/hw_cksum packet if LSO is enabled. 459 */ 460 if (Adapter->lso_premature_issue && 461 Adapter->lso_enable && 462 (cur_context->cksum_flags != 0)) { 463 464 context_reload = B_TRUE; 465 } else if (cur_context->lso_flag) { 466 if ((cur_context->cksum_flags != pre_context->cksum_flags) || 467 (cur_context->pay_len != pre_context->pay_len) || 468 (cur_context->mss != pre_context->mss) || 469 (cur_context->hdr_len != pre_context->hdr_len) || 470 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 471 (cur_context->cksum_start != pre_context->cksum_start) || 472 (cur_context->ether_header_size != 473 pre_context->ether_header_size)) { 474 475 context_reload = B_TRUE; 476 } 477 } else if (cur_context->cksum_flags != 0) { 478 if ((cur_context->cksum_flags != pre_context->cksum_flags) || 479 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 480 (cur_context->cksum_start != pre_context->cksum_start) || 481 (cur_context->ether_header_size != 482 pre_context->ether_header_size)) { 483 484 context_reload = B_TRUE; 485 } 486 } 487 488 return (context_reload); 489 } 490 491 static int 492 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list, 493 context_data_t *cur_context) 494 { 495 struct e1000g *Adapter; 496 struct e1000_hw *hw; 497 p_tx_sw_packet_t first_packet; 498 p_tx_sw_packet_t packet; 499 p_tx_sw_packet_t previous_packet; 500 boolean_t context_reload; 501 struct e1000_tx_desc *first_data_desc; 502 struct e1000_tx_desc *next_desc; 503 struct e1000_tx_desc *descriptor; 504 int desc_count; 505 boolean_t buff_overrun_flag; 506 int i; 507 508 Adapter = tx_ring->adapter; 509 hw = &Adapter->shared; 510 511 desc_count = 0; 512 first_packet = NULL; 513 first_data_desc = NULL; 514 descriptor = NULL; 515 first_packet = NULL; 516 packet = NULL; 517 buff_overrun_flag = B_FALSE; 518 519 next_desc = tx_ring->tbd_next; 520 521 /* Context descriptor reload check */ 522 context_reload = e1000g_check_context(tx_ring, cur_context); 523 524 if (context_reload) { 525 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 526 527 descriptor = next_desc; 528 529 e1000g_fill_context_descriptor(cur_context, 530 (struct e1000_context_desc *)descriptor); 531 532 /* Check the wrap-around case */ 533 if (descriptor == tx_ring->tbd_last) 534 next_desc = tx_ring->tbd_first; 535 else 536 next_desc++; 537 538 desc_count++; 539 } 540 541 first_data_desc = next_desc; 542 543 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 544 while (packet) { 545 ASSERT(packet->num_desc); 546 547 for (i = 0; i < packet->num_desc; i++) { 548 ASSERT(tx_ring->tbd_avail > 0); 549 550 descriptor = next_desc; 551 descriptor->buffer_addr = 552 packet->desc[i].address; 553 descriptor->lower.data = 554 packet->desc[i].length; 555 556 /* Zero out status */ 557 descriptor->upper.data = 0; 558 559 descriptor->lower.data |= 560 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 561 /* must set RS on every outgoing descriptor */ 562 descriptor->lower.data |= 563 E1000_TXD_CMD_RS; 564 565 if (cur_context->lso_flag) 566 descriptor->lower.data |= E1000_TXD_CMD_TSE; 567 568 /* Check the wrap-around case */ 569 if (descriptor == tx_ring->tbd_last) 570 next_desc = tx_ring->tbd_first; 571 else 572 next_desc++; 573 574 desc_count++; 575 576 /* 577 * workaround for 82546EB errata 33, hang in PCI-X 578 * systems due to 2k Buffer Overrun during Transmit 579 * Operation. The workaround applies to all the Intel 580 * PCI-X chips. 581 */ 582 if (hw->bus.type == e1000_bus_type_pcix && 583 descriptor == first_data_desc && 584 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) 585 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) { 586 /* modified the first descriptor */ 587 descriptor->lower.data &= 588 ~E1000G_TBD_LENGTH_MASK; 589 descriptor->lower.flags.length = 590 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 591 592 /* insert a new descriptor */ 593 ASSERT(tx_ring->tbd_avail > 0); 594 next_desc->buffer_addr = 595 packet->desc[0].address + 596 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 597 next_desc->lower.data = 598 packet->desc[0].length - 599 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 600 601 /* Zero out status */ 602 next_desc->upper.data = 0; 603 604 next_desc->lower.data |= 605 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 606 /* must set RS on every outgoing descriptor */ 607 next_desc->lower.data |= 608 E1000_TXD_CMD_RS; 609 610 if (cur_context->lso_flag) 611 next_desc->lower.data |= 612 E1000_TXD_CMD_TSE; 613 614 descriptor = next_desc; 615 616 /* Check the wrap-around case */ 617 if (next_desc == tx_ring->tbd_last) 618 next_desc = tx_ring->tbd_first; 619 else 620 next_desc++; 621 622 desc_count++; 623 buff_overrun_flag = B_TRUE; 624 } 625 } 626 627 if (buff_overrun_flag) { 628 packet->num_desc++; 629 buff_overrun_flag = B_FALSE; 630 } 631 632 if (first_packet != NULL) { 633 /* 634 * Count the checksum context descriptor for 635 * the first SwPacket. 636 */ 637 first_packet->num_desc++; 638 first_packet = NULL; 639 } 640 641 previous_packet = packet; 642 packet = (p_tx_sw_packet_t) 643 QUEUE_GET_NEXT(pending_list, &packet->Link); 644 } 645 646 /* 647 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back 648 */ 649 if (Adapter->lso_premature_issue && cur_context->lso_flag && 650 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) { 651 /* modified the previous descriptor */ 652 descriptor->lower.data -= 4; 653 654 /* insert a new descriptor */ 655 ASSERT(tx_ring->tbd_avail > 0); 656 /* the lower 20 bits of lower.data is the length field */ 657 next_desc->buffer_addr = 658 descriptor->buffer_addr + 659 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK); 660 next_desc->lower.data = 4; 661 662 /* Zero out status */ 663 next_desc->upper.data = 0; 664 /* It must be part of a LSO packet */ 665 next_desc->lower.data |= 666 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | 667 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE; 668 669 descriptor = next_desc; 670 671 /* Check the wrap-around case */ 672 if (descriptor == tx_ring->tbd_last) 673 next_desc = tx_ring->tbd_first; 674 else 675 next_desc++; 676 677 desc_count++; 678 /* update the number of descriptors */ 679 previous_packet->num_desc++; 680 } 681 682 ASSERT(descriptor); 683 684 if (cur_context->cksum_flags) { 685 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 686 ((struct e1000_data_desc *)first_data_desc)-> 687 upper.fields.popts |= E1000_TXD_POPTS_IXSM; 688 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 689 ((struct e1000_data_desc *)first_data_desc)-> 690 upper.fields.popts |= E1000_TXD_POPTS_TXSM; 691 } 692 693 /* 694 * Last Descriptor of Packet needs End Of Packet (EOP), Report 695 * Status (RS) set. 696 */ 697 if (Adapter->tx_intr_delay) { 698 descriptor->lower.data |= E1000_TXD_CMD_IDE | 699 E1000_TXD_CMD_EOP; 700 } else { 701 descriptor->lower.data |= E1000_TXD_CMD_EOP; 702 } 703 704 /* Set append Ethernet CRC (IFCS) bits */ 705 if (cur_context->lso_flag) { 706 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS; 707 } else { 708 descriptor->lower.data |= E1000_TXD_CMD_IFCS; 709 } 710 711 /* 712 * Sync the Tx descriptors DMA buffer 713 */ 714 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 715 0, 0, DDI_DMA_SYNC_FORDEV); 716 717 tx_ring->tbd_next = next_desc; 718 719 /* 720 * Advance the Transmit Descriptor Tail (Tdt), this tells the 721 * FX1000 that this frame is available to transmit. 722 */ 723 if (hw->mac.type == e1000_82547) 724 e1000g_82547_tx_move_tail(tx_ring); 725 else 726 E1000_WRITE_REG(hw, E1000_TDT(0), 727 (uint32_t)(next_desc - tx_ring->tbd_first)); 728 729 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 730 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 731 Adapter->chip_state = E1000G_ERROR; 732 } 733 734 /* Put the pending SwPackets to the "Used" list */ 735 mutex_enter(&tx_ring->usedlist_lock); 736 QUEUE_APPEND(&tx_ring->used_list, pending_list); 737 tx_ring->tbd_avail -= desc_count; 738 mutex_exit(&tx_ring->usedlist_lock); 739 740 /* update LSO related data */ 741 if (context_reload) 742 tx_ring->pre_context = *cur_context; 743 744 return (desc_count); 745 } 746 747 748 /* 749 * e1000g_tx_setup - setup tx data structures 750 * 751 * This routine initializes all of the transmit related 752 * structures. This includes the Transmit descriptors, 753 * and the tx_sw_packet structures. 754 */ 755 void 756 e1000g_tx_setup(struct e1000g *Adapter) 757 { 758 struct e1000_hw *hw; 759 p_tx_sw_packet_t packet; 760 UINT i; 761 uint32_t buf_high; 762 uint32_t buf_low; 763 uint32_t reg_tipg; 764 uint32_t reg_tctl; 765 int size; 766 e1000g_tx_ring_t *tx_ring; 767 768 hw = &Adapter->shared; 769 tx_ring = Adapter->tx_ring; 770 771 /* init the lists */ 772 /* 773 * Here we don't need to protect the lists using the 774 * usedlist_lock and freelist_lock, for they have 775 * been protected by the chip_lock. 776 */ 777 QUEUE_INIT_LIST(&tx_ring->used_list); 778 QUEUE_INIT_LIST(&tx_ring->free_list); 779 780 /* Go through and set up each SW_Packet */ 781 packet = tx_ring->packet_area; 782 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) { 783 /* Initialize this tx_sw_apcket area */ 784 e1000g_free_tx_swpkt(packet); 785 /* Add this tx_sw_packet to the free list */ 786 QUEUE_PUSH_TAIL(&tx_ring->free_list, 787 &packet->Link); 788 } 789 790 /* Setup TX descriptor pointers */ 791 tx_ring->tbd_next = tx_ring->tbd_first; 792 tx_ring->tbd_oldest = tx_ring->tbd_first; 793 794 /* 795 * Setup Hardware TX Registers 796 */ 797 /* Setup the Transmit Control Register (TCTL). */ 798 reg_tctl = E1000_READ_REG(hw, E1000_TCTL); 799 reg_tctl |= E1000_TCTL_PSP | E1000_TCTL_EN | 800 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) | 801 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) | 802 E1000_TCTL_RTLC; 803 804 /* Enable the MULR bit */ 805 if (hw->bus.type == e1000_bus_type_pci_express) 806 reg_tctl |= E1000_TCTL_MULR; 807 808 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl); 809 810 /* Setup HW Base and Length of Tx descriptor area */ 811 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc)); 812 E1000_WRITE_REG(hw, E1000_TDLEN(0), size); 813 size = E1000_READ_REG(hw, E1000_TDLEN(0)); 814 815 buf_low = (uint32_t)tx_ring->tbd_dma_addr; 816 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32); 817 818 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low); 819 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high); 820 821 /* Setup our HW Tx Head & Tail descriptor pointers */ 822 E1000_WRITE_REG(hw, E1000_TDH(0), 0); 823 E1000_WRITE_REG(hw, E1000_TDT(0), 0); 824 825 /* Set the default values for the Tx Inter Packet Gap timer */ 826 if ((hw->mac.type == e1000_82542) && 827 ((hw->revision_id == E1000_REVISION_2) || 828 (hw->revision_id == E1000_REVISION_3))) { 829 reg_tipg = DEFAULT_82542_TIPG_IPGT; 830 reg_tipg |= 831 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 832 reg_tipg |= 833 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 834 } else if (hw->mac.type == e1000_80003es2lan) { 835 reg_tipg = DEFAULT_82543_TIPG_IPGR1; 836 reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 837 E1000_TIPG_IPGR2_SHIFT; 838 } else { 839 if (hw->phy.media_type == e1000_media_type_fiber) 840 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 841 else 842 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 843 reg_tipg |= 844 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 845 reg_tipg |= 846 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 847 } 848 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg); 849 850 /* Setup Transmit Interrupt Delay Value */ 851 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay); 852 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 853 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay); 854 855 if (hw->mac.type >= e1000_82540) { 856 E1000_WRITE_REG(&Adapter->shared, E1000_TADV, 857 Adapter->tx_intr_abs_delay); 858 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 859 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay); 860 } 861 862 tx_ring->tbd_avail = Adapter->tx_desc_num; 863 864 /* Initialize stored context information */ 865 bzero(&(tx_ring->pre_context), sizeof (context_data_t)); 866 } 867 868 /* 869 * e1000g_recycle - recycle the tx descriptors and tx sw packets 870 */ 871 int 872 e1000g_recycle(e1000g_tx_ring_t *tx_ring) 873 { 874 struct e1000g *Adapter; 875 LIST_DESCRIBER pending_list; 876 p_tx_sw_packet_t packet; 877 mblk_t *mp; 878 mblk_t *nmp; 879 struct e1000_tx_desc *descriptor; 880 int desc_count; 881 int is_intr; 882 883 /* 884 * This function will examine each TxSwPacket in the 'used' queue 885 * if the e1000g is done with it then the associated resources (Tx 886 * Descriptors) will be "freed" and the TxSwPacket will be 887 * returned to the 'free' queue. 888 */ 889 Adapter = tx_ring->adapter; 890 891 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list); 892 if (packet == NULL) { 893 tx_ring->recycle_fail = 0; 894 tx_ring->stall_watchdog = 0; 895 return (0); 896 } 897 898 is_intr = servicing_interrupt(); 899 900 if (is_intr) 901 mutex_enter(&tx_ring->usedlist_lock); 902 else if (mutex_tryenter(&tx_ring->usedlist_lock) == 0) 903 return (0); 904 905 desc_count = 0; 906 QUEUE_INIT_LIST(&pending_list); 907 908 /* Sync the Tx descriptor DMA buffer */ 909 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 910 0, 0, DDI_DMA_SYNC_FORKERNEL); 911 if (e1000g_check_dma_handle( 912 tx_ring->tbd_dma_handle) != DDI_FM_OK) { 913 mutex_exit(&tx_ring->usedlist_lock); 914 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 915 Adapter->chip_state = E1000G_ERROR; 916 return (0); 917 } 918 919 /* 920 * While there are still TxSwPackets in the used queue check them 921 */ 922 while ((packet = 923 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) != NULL) { 924 925 /* 926 * Get hold of the next descriptor that the e1000g will 927 * report status back to (this will be the last descriptor 928 * of a given sw packet). We only want to free the 929 * sw packet (and it resources) if the e1000g is done 930 * with ALL of the descriptors. If the e1000g is done 931 * with the last one then it is done with all of them. 932 */ 933 ASSERT(packet->num_desc); 934 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1); 935 936 /* Check for wrap case */ 937 if (descriptor > tx_ring->tbd_last) 938 descriptor -= Adapter->tx_desc_num; 939 940 /* 941 * If the descriptor done bit is set free TxSwPacket and 942 * associated resources 943 */ 944 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) { 945 QUEUE_POP_HEAD(&tx_ring->used_list); 946 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 947 948 if (descriptor == tx_ring->tbd_last) 949 tx_ring->tbd_oldest = 950 tx_ring->tbd_first; 951 else 952 tx_ring->tbd_oldest = 953 descriptor + 1; 954 955 desc_count += packet->num_desc; 956 957 if (is_intr && (desc_count >= Adapter->tx_recycle_num)) 958 break; 959 } else { 960 /* 961 * Found a sw packet that the e1000g is not done 962 * with then there is no reason to check the rest 963 * of the queue. 964 */ 965 break; 966 } 967 } 968 969 tx_ring->tbd_avail += desc_count; 970 Adapter->tx_pkt_cnt += desc_count; 971 972 mutex_exit(&tx_ring->usedlist_lock); 973 974 if (desc_count == 0) { 975 tx_ring->recycle_fail++; 976 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none); 977 return (0); 978 } 979 980 tx_ring->recycle_fail = 0; 981 tx_ring->stall_watchdog = 0; 982 983 mp = NULL; 984 nmp = NULL; 985 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 986 ASSERT(packet != NULL); 987 while (packet != NULL) { 988 if (packet->mp != NULL) { 989 ASSERT(packet->mp->b_next == NULL); 990 /* Assemble the message chain */ 991 if (mp == NULL) { 992 mp = packet->mp; 993 nmp = packet->mp; 994 } else { 995 nmp->b_next = packet->mp; 996 nmp = packet->mp; 997 } 998 /* Disconnect the message from the sw packet */ 999 packet->mp = NULL; 1000 } 1001 1002 /* Free the TxSwPackets */ 1003 e1000g_free_tx_swpkt(packet); 1004 1005 packet = (p_tx_sw_packet_t) 1006 QUEUE_GET_NEXT(&pending_list, &packet->Link); 1007 } 1008 1009 /* Return the TxSwPackets back to the FreeList */ 1010 mutex_enter(&tx_ring->freelist_lock); 1011 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 1012 mutex_exit(&tx_ring->freelist_lock); 1013 1014 if (mp != NULL) 1015 freemsgchain(mp); 1016 1017 return (desc_count); 1018 } 1019 /* 1020 * 82544 Coexistence issue workaround: 1021 * There are 2 issues. 1022 * 1. If a 32 bit split completion happens from P64H2 and another 1023 * agent drives a 64 bit request/split completion after ONLY 1024 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then 1025 * 82544 has a problem where in to clock all the data in, it 1026 * looks at REQ64# signal and since it has changed so fast (i.e. 1 1027 * idle clock turn around), it will fail to clock all the data in. 1028 * Data coming from certain ending addresses has exposure to this issue. 1029 * 1030 * To detect this issue, following equation can be used... 1031 * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. 1032 * If SUM[3:0] is in between 1 to 4, we will have this issue. 1033 * 1034 * ROOT CAUSE: 1035 * The erratum involves the 82544 PCIX elasticity FIFO implementations as 1036 * 64-bit FIFO's and flushing of the final partial-bytes corresponding 1037 * to the end of a requested read burst. Under a specific burst condition 1038 * of ending-data alignment and 32-byte split-completions, the final 1039 * byte(s) of split-completion data require an extra clock cycle to flush 1040 * into 64-bit FIFO orientation. An incorrect logic dependency on the 1041 * REQ64# signal occurring during during this clock cycle may cause the 1042 * residual byte(s) to be lost, thereby rendering the internal DMA client 1043 * forever awaiting the final byte(s) for an outbound data-fetch. The 1044 * erratum is confirmed to *only* occur if certain subsequent external 1045 * 64-bit PCIX bus transactions occur immediately (minimum possible bus 1046 * turn- around) following the odd-aligned 32-bit split-completion 1047 * containing the final byte(s). Intel has confirmed that this has been 1048 * seen only with chipset/bridges which have the capability to provide 1049 * 32-bit split-completion data, and in the presence of newer PCIX bus 1050 * agents which fully-optimize the inter-transaction turn-around (zero 1051 * additional initiator latency when pre-granted bus ownership). 1052 * 1053 * This issue does not exist in PCI bus mode, when any agent is operating 1054 * in 32 bit only mode or on chipsets that do not do 32 bit split 1055 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does 1056 * 32 bit split completions for any read request that has bit 2 set to 1 1057 * for the requested address and read request size is more than 8 bytes. 1058 * 1059 * 2. Another issue is related to 82544 driving DACs under the similar 1060 * scenario (32 bit split completion followed by 64 bit transaction with 1061 * only 1 cycle turnaround). This issue is still being root caused. We 1062 * think that both of these issues can be avoided if following workaround 1063 * is implemented. It seems DAC issues is related to ending addresses being 1064 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity 1065 * FIFO which does not get flushed due to REQ64# dependency. We will only 1066 * know the full story after it has been simulated successfully by HW team. 1067 * 1068 * WORKAROUND: 1069 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC) 1070 */ 1071 static uint32_t 1072 e1000g_fill_82544_desc(uint64_t address, 1073 size_t length, p_desc_array_t desc_array) 1074 { 1075 /* 1076 * Since issue is sensitive to length and address. 1077 * Let us first check the address... 1078 */ 1079 uint32_t safe_terminator; 1080 1081 if (length <= 4) { 1082 desc_array->descriptor[0].address = address; 1083 desc_array->descriptor[0].length = (uint32_t)length; 1084 desc_array->elements = 1; 1085 return (desc_array->elements); 1086 } 1087 safe_terminator = 1088 (uint32_t)((((uint32_t)address & 0x7) + 1089 (length & 0xF)) & 0xF); 1090 /* 1091 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then 1092 * return 1093 */ 1094 if (safe_terminator == 0 || 1095 (safe_terminator > 4 && safe_terminator < 9) || 1096 (safe_terminator > 0xC && safe_terminator <= 0xF)) { 1097 desc_array->descriptor[0].address = address; 1098 desc_array->descriptor[0].length = (uint32_t)length; 1099 desc_array->elements = 1; 1100 return (desc_array->elements); 1101 } 1102 1103 desc_array->descriptor[0].address = address; 1104 desc_array->descriptor[0].length = length - 4; 1105 desc_array->descriptor[1].address = address + (length - 4); 1106 desc_array->descriptor[1].length = 4; 1107 desc_array->elements = 2; 1108 return (desc_array->elements); 1109 } 1110 1111 static int 1112 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, 1113 mblk_t *mp, boolean_t tx_undersize_flag) 1114 { 1115 size_t len; 1116 size_t len1; 1117 dma_buffer_t *tx_buf; 1118 mblk_t *nmp; 1119 boolean_t finished; 1120 int desc_count; 1121 1122 desc_count = 0; 1123 tx_buf = packet->tx_buf; 1124 len = MBLKL(mp); 1125 1126 ASSERT((tx_buf->len + len) <= tx_buf->size); 1127 1128 if (len > 0) { 1129 bcopy(mp->b_rptr, 1130 tx_buf->address + tx_buf->len, 1131 len); 1132 tx_buf->len += len; 1133 1134 packet->num_mblk_frag++; 1135 } 1136 1137 nmp = mp->b_cont; 1138 if (nmp == NULL) { 1139 finished = B_TRUE; 1140 } else { 1141 len1 = MBLKL(nmp); 1142 if ((tx_buf->len + len1) > tx_buf->size) 1143 finished = B_TRUE; 1144 else if (tx_undersize_flag) 1145 finished = B_FALSE; 1146 else if (len1 > tx_ring->adapter->tx_bcopy_thresh) 1147 finished = B_TRUE; 1148 else 1149 finished = B_FALSE; 1150 } 1151 1152 if (finished) { 1153 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy, 1154 (tx_buf->len > len)); 1155 1156 /* 1157 * If the packet is smaller than 64 bytes, which is the 1158 * minimum ethernet packet size, pad the packet to make 1159 * it at least 60 bytes. The hardware will add 4 bytes 1160 * for CRC. 1161 */ 1162 if (tx_undersize_flag) { 1163 ASSERT(tx_buf->len < ETHERMIN); 1164 1165 bzero(tx_buf->address + tx_buf->len, 1166 ETHERMIN - tx_buf->len); 1167 tx_buf->len = ETHERMIN; 1168 } 1169 1170 #ifdef __sparc 1171 if (packet->dma_type == USE_DVMA) 1172 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV); 1173 else 1174 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1175 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1176 #else 1177 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1178 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1179 #endif 1180 1181 packet->data_transfer_type = USE_BCOPY; 1182 1183 desc_count = e1000g_fill_tx_desc(tx_ring, 1184 packet, 1185 tx_buf->dma_address, 1186 tx_buf->len); 1187 1188 if (desc_count <= 0) 1189 return (-1); 1190 } 1191 1192 return (desc_count); 1193 } 1194 1195 static int 1196 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp) 1197 { 1198 int j; 1199 int mystat; 1200 size_t len; 1201 ddi_dma_cookie_t dma_cookie; 1202 uint_t ncookies; 1203 int desc_count; 1204 uint32_t desc_total; 1205 1206 desc_total = 0; 1207 len = MBLKL(mp); 1208 1209 /* 1210 * ddi_dma_addr_bind_handle() allocates DMA resources for a 1211 * memory object such that a device can perform DMA to or from 1212 * the object. DMA resources are allocated considering the 1213 * device's DMA attributes as expressed by ddi_dma_attr(9S) 1214 * (see ddi_dma_alloc_handle(9F)). 1215 * 1216 * ddi_dma_addr_bind_handle() fills in the first DMA cookie 1217 * pointed to by cookiep with the appropriate address, length, 1218 * and bus type. *ccountp is set to the number of DMA cookies 1219 * representing this DMA object. Subsequent DMA cookies must be 1220 * retrieved by calling ddi_dma_nextcookie(9F) the number of 1221 * times specified by *countp - 1. 1222 */ 1223 switch (packet->dma_type) { 1224 #ifdef __sparc 1225 case USE_DVMA: 1226 dvma_kaddr_load(packet->tx_dma_handle, 1227 (caddr_t)mp->b_rptr, len, 0, &dma_cookie); 1228 1229 dvma_sync(packet->tx_dma_handle, 0, 1230 DDI_DMA_SYNC_FORDEV); 1231 1232 ncookies = 1; 1233 packet->data_transfer_type = USE_DVMA; 1234 break; 1235 #endif 1236 case USE_DMA: 1237 if ((mystat = ddi_dma_addr_bind_handle( 1238 packet->tx_dma_handle, NULL, 1239 (caddr_t)mp->b_rptr, len, 1240 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1241 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1242 &ncookies)) != DDI_DMA_MAPPED) { 1243 1244 e1000g_log(tx_ring->adapter, CE_WARN, 1245 "Couldn't bind mblk buffer to Tx DMA handle: " 1246 "return: %X, Pkt: %X\n", 1247 mystat, packet); 1248 return (-1); 1249 } 1250 1251 /* 1252 * An implicit ddi_dma_sync() is done when the 1253 * ddi_dma_addr_bind_handle() is called. So we 1254 * don't need to explicitly call ddi_dma_sync() 1255 * here any more. 1256 */ 1257 ASSERT(ncookies); 1258 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie, 1259 (ncookies > 1)); 1260 1261 /* 1262 * The data_transfer_type value must be set after the handle 1263 * has been bound, for it will be used in e1000g_free_tx_swpkt() 1264 * to decide whether we need to unbind the handle. 1265 */ 1266 packet->data_transfer_type = USE_DMA; 1267 break; 1268 default: 1269 ASSERT(B_FALSE); 1270 break; 1271 } 1272 1273 packet->num_mblk_frag++; 1274 1275 /* 1276 * Each address could span thru multpile cookie.. 1277 * Each cookie will have one descriptor 1278 */ 1279 for (j = ncookies; j != 0; j--) { 1280 1281 desc_count = e1000g_fill_tx_desc(tx_ring, 1282 packet, 1283 dma_cookie.dmac_laddress, 1284 dma_cookie.dmac_size); 1285 1286 if (desc_count <= 0) 1287 return (-1); 1288 1289 desc_total += desc_count; 1290 1291 /* 1292 * ddi_dma_nextcookie() retrieves subsequent DMA 1293 * cookies for a DMA object. 1294 * ddi_dma_nextcookie() fills in the 1295 * ddi_dma_cookie(9S) structure pointed to by 1296 * cookiep. The ddi_dma_cookie(9S) structure 1297 * must be allocated prior to calling 1298 * ddi_dma_nextcookie(). The DMA cookie count 1299 * returned by ddi_dma_buf_bind_handle(9F), 1300 * ddi_dma_addr_bind_handle(9F), or 1301 * ddi_dma_getwin(9F) indicates the number of DMA 1302 * cookies a DMA object consists of. If the 1303 * resulting cookie count, N, is larger than 1, 1304 * ddi_dma_nextcookie() must be called N-1 times 1305 * to retrieve all DMA cookies. 1306 */ 1307 if (j > 1) { 1308 ddi_dma_nextcookie(packet->tx_dma_handle, 1309 &dma_cookie); 1310 } 1311 } 1312 1313 return (desc_total); 1314 } 1315 1316 static void 1317 e1000g_fill_context_descriptor(context_data_t *cur_context, 1318 struct e1000_context_desc *context_desc) 1319 { 1320 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) { 1321 context_desc->lower_setup.ip_fields.ipcss = 1322 cur_context->ether_header_size; 1323 context_desc->lower_setup.ip_fields.ipcso = 1324 cur_context->ether_header_size + 1325 offsetof(struct ip, ip_sum); 1326 context_desc->lower_setup.ip_fields.ipcse = 1327 cur_context->ether_header_size + 1328 cur_context->cksum_start - 1; 1329 } else 1330 context_desc->lower_setup.ip_config = 0; 1331 1332 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) { 1333 /* 1334 * The packet with same protocol has the following 1335 * stuff and start offset: 1336 * | Protocol | Stuff | Start | Checksum 1337 * | | Offset | Offset | Enable 1338 * | IPv4 + TCP | 0x24 | 0x14 | Yes 1339 * | IPv4 + UDP | 0x1A | 0x14 | Yes 1340 * | IPv6 + TCP | 0x20 | 0x10 | No 1341 * | IPv6 + UDP | 0x14 | 0x10 | No 1342 */ 1343 context_desc->upper_setup.tcp_fields.tucss = 1344 cur_context->cksum_start + cur_context->ether_header_size; 1345 context_desc->upper_setup.tcp_fields.tucso = 1346 cur_context->cksum_stuff + cur_context->ether_header_size; 1347 context_desc->upper_setup.tcp_fields.tucse = 0; 1348 } else 1349 context_desc->upper_setup.tcp_config = 0; 1350 1351 if (cur_context->lso_flag) { 1352 context_desc->tcp_seg_setup.fields.mss = cur_context->mss; 1353 context_desc->tcp_seg_setup.fields.hdr_len = 1354 cur_context->hdr_len; 1355 /* 1356 * workaround for 82546EB errata 23, status-writeback 1357 * reporting (RS) should not be set on context or 1358 * Null descriptors 1359 */ 1360 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1361 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP 1362 | E1000_TXD_DTYP_C | cur_context->pay_len; 1363 } else { 1364 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1365 | E1000_TXD_DTYP_C; 1366 /* 1367 * Zero out the options for TCP Segmentation Offload 1368 */ 1369 context_desc->tcp_seg_setup.data = 0; 1370 } 1371 } 1372 1373 static int 1374 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring, 1375 p_tx_sw_packet_t packet, uint64_t address, size_t size) 1376 { 1377 struct e1000_hw *hw = &tx_ring->adapter->shared; 1378 p_sw_desc_t desc; 1379 1380 if (hw->mac.type == e1000_82544) { 1381 if (hw->bus.type == e1000_bus_type_pcix) 1382 return (e1000g_tx_workaround_PCIX_82544(packet, 1383 address, size)); 1384 1385 if (size > JUMBO_FRAG_LENGTH) 1386 return (e1000g_tx_workaround_jumbo_82544(packet, 1387 address, size)); 1388 } 1389 1390 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1391 1392 desc = &packet->desc[packet->num_desc]; 1393 desc->address = address; 1394 desc->length = (uint32_t)size; 1395 1396 packet->num_desc++; 1397 1398 return (1); 1399 } 1400 1401 static int 1402 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet, 1403 uint64_t address, size_t size) 1404 { 1405 p_sw_desc_t desc; 1406 int desc_count; 1407 long size_left; 1408 size_t len; 1409 uint32_t counter; 1410 uint32_t array_elements; 1411 desc_array_t desc_array; 1412 1413 /* 1414 * Coexist Workaround for cordova: RP: 07/04/03 1415 * 1416 * RP: ERRATA: Workaround ISSUE: 1417 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup 1418 * Eachbuffer in to 8kb pieces until the 1419 * remainder is < 8kb 1420 */ 1421 size_left = size; 1422 desc_count = 0; 1423 1424 while (size_left > 0) { 1425 if (size_left > MAX_TX_BUF_SIZE) 1426 len = MAX_TX_BUF_SIZE; 1427 else 1428 len = size_left; 1429 1430 array_elements = e1000g_fill_82544_desc(address, 1431 len, &desc_array); 1432 1433 for (counter = 0; counter < array_elements; counter++) { 1434 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1435 /* 1436 * Put in the buffer address 1437 */ 1438 desc = &packet->desc[packet->num_desc]; 1439 1440 desc->address = 1441 desc_array.descriptor[counter].address; 1442 desc->length = 1443 desc_array.descriptor[counter].length; 1444 1445 packet->num_desc++; 1446 desc_count++; 1447 } /* for */ 1448 1449 /* 1450 * Update the buffer address and length 1451 */ 1452 address += MAX_TX_BUF_SIZE; 1453 size_left -= MAX_TX_BUF_SIZE; 1454 } /* while */ 1455 1456 return (desc_count); 1457 } 1458 1459 static int 1460 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet, 1461 uint64_t address, size_t size) 1462 { 1463 p_sw_desc_t desc; 1464 int desc_count; 1465 long size_left; 1466 uint32_t offset; 1467 1468 /* 1469 * Workaround for Jumbo Frames on Cordova 1470 * PSD 06/01/2001 1471 */ 1472 size_left = size; 1473 desc_count = 0; 1474 offset = 0; 1475 while (size_left > 0) { 1476 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1477 1478 desc = &packet->desc[packet->num_desc]; 1479 1480 desc->address = address + offset; 1481 1482 if (size_left > JUMBO_FRAG_LENGTH) 1483 desc->length = JUMBO_FRAG_LENGTH; 1484 else 1485 desc->length = (uint32_t)size_left; 1486 1487 packet->num_desc++; 1488 desc_count++; 1489 1490 offset += desc->length; 1491 size_left -= JUMBO_FRAG_LENGTH; 1492 } 1493 1494 return (desc_count); 1495 } 1496 1497 #pragma inline(e1000g_82547_tx_move_tail_work) 1498 1499 static void 1500 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring) 1501 { 1502 struct e1000_hw *hw; 1503 uint16_t hw_tdt; 1504 uint16_t sw_tdt; 1505 struct e1000_tx_desc *tx_desc; 1506 uint16_t length = 0; 1507 boolean_t eop = B_FALSE; 1508 struct e1000g *Adapter; 1509 1510 Adapter = tx_ring->adapter; 1511 hw = &Adapter->shared; 1512 1513 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0)); 1514 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first; 1515 1516 while (hw_tdt != sw_tdt) { 1517 tx_desc = &(tx_ring->tbd_first[hw_tdt]); 1518 length += tx_desc->lower.flags.length; 1519 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; 1520 if (++hw_tdt == Adapter->tx_desc_num) 1521 hw_tdt = 0; 1522 1523 if (eop) { 1524 if ((Adapter->link_duplex == HALF_DUPLEX) && 1525 (e1000_fifo_workaround_82547(hw, length) 1526 != E1000_SUCCESS)) { 1527 if (tx_ring->timer_enable_82547) { 1528 ASSERT(tx_ring->timer_id_82547 == 0); 1529 tx_ring->timer_id_82547 = 1530 timeout(e1000g_82547_timeout, 1531 (void *)tx_ring, 1532 drv_usectohz(10000)); 1533 } 1534 return; 1535 1536 } else { 1537 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt); 1538 e1000_update_tx_fifo_head_82547(hw, length); 1539 length = 0; 1540 } 1541 } 1542 } 1543 } 1544 1545 static void 1546 e1000g_82547_timeout(void *arg) 1547 { 1548 e1000g_tx_ring_t *tx_ring; 1549 1550 tx_ring = (e1000g_tx_ring_t *)arg; 1551 1552 mutex_enter(&tx_ring->tx_lock); 1553 1554 tx_ring->timer_id_82547 = 0; 1555 e1000g_82547_tx_move_tail_work(tx_ring); 1556 1557 mutex_exit(&tx_ring->tx_lock); 1558 } 1559 1560 static void 1561 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring) 1562 { 1563 timeout_id_t tid; 1564 1565 ASSERT(MUTEX_HELD(&tx_ring->tx_lock)); 1566 1567 tid = tx_ring->timer_id_82547; 1568 tx_ring->timer_id_82547 = 0; 1569 if (tid != 0) { 1570 tx_ring->timer_enable_82547 = B_FALSE; 1571 mutex_exit(&tx_ring->tx_lock); 1572 1573 (void) untimeout(tid); 1574 1575 mutex_enter(&tx_ring->tx_lock); 1576 } 1577 tx_ring->timer_enable_82547 = B_TRUE; 1578 e1000g_82547_tx_move_tail_work(tx_ring); 1579 } 1580