1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2008 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms of the CDDLv1. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * ********************************************************************** 30 * * 31 * Module Name: * 32 * e1000g_tx.c * 33 * * 34 * Abstract: * 35 * This file contains some routines that take care of Transmit, * 36 * make the hardware to send the data pointed by the packet out * 37 * on to the physical medium. * 38 * * 39 * ********************************************************************** 40 */ 41 42 #include "e1000g_sw.h" 43 #include "e1000g_debug.h" 44 45 static boolean_t e1000g_send(struct e1000g *, mblk_t *); 46 static int e1000g_tx_copy(e1000g_tx_ring_t *, 47 p_tx_sw_packet_t, mblk_t *, boolean_t); 48 static int e1000g_tx_bind(e1000g_tx_ring_t *, 49 p_tx_sw_packet_t, mblk_t *); 50 static boolean_t e1000g_retreive_context(mblk_t *, context_data_t *, size_t); 51 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *); 52 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *, 53 context_data_t *); 54 static void e1000g_fill_context_descriptor(context_data_t *, 55 struct e1000_context_desc *); 56 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *, 57 p_tx_sw_packet_t, uint64_t, size_t); 58 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length, 59 p_desc_array_t desc_array); 60 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t); 61 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t); 62 static void e1000g_82547_timeout(void *); 63 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *); 64 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *); 65 66 #ifndef E1000G_DEBUG 67 #pragma inline(e1000g_tx_copy) 68 #pragma inline(e1000g_tx_bind) 69 #pragma inline(e1000g_retreive_context) 70 #pragma inline(e1000g_check_context) 71 #pragma inline(e1000g_fill_tx_ring) 72 #pragma inline(e1000g_fill_context_descriptor) 73 #pragma inline(e1000g_fill_tx_desc) 74 #pragma inline(e1000g_fill_82544_desc) 75 #pragma inline(e1000g_tx_workaround_PCIX_82544) 76 #pragma inline(e1000g_tx_workaround_jumbo_82544) 77 #pragma inline(e1000g_free_tx_swpkt) 78 #endif 79 80 /* 81 * e1000g_free_tx_swpkt - free up the tx sw packet 82 * 83 * Unbind the previously bound DMA handle for a given 84 * transmit sw packet. And reset the sw packet data. 85 */ 86 void 87 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet) 88 { 89 switch (packet->data_transfer_type) { 90 case USE_BCOPY: 91 packet->tx_buf->len = 0; 92 break; 93 #ifdef __sparc 94 case USE_DVMA: 95 dvma_unload(packet->tx_dma_handle, 0, -1); 96 break; 97 #endif 98 case USE_DMA: 99 ddi_dma_unbind_handle(packet->tx_dma_handle); 100 break; 101 default: 102 break; 103 } 104 105 /* 106 * The mblk has been stripped off the sw packet 107 * and will be freed in a triggered soft intr. 108 */ 109 ASSERT(packet->mp == NULL); 110 111 packet->data_transfer_type = USE_NONE; 112 packet->num_mblk_frag = 0; 113 packet->num_desc = 0; 114 } 115 116 mblk_t * 117 e1000g_m_tx(void *arg, mblk_t *mp) 118 { 119 struct e1000g *Adapter = (struct e1000g *)arg; 120 mblk_t *next; 121 122 rw_enter(&Adapter->chip_lock, RW_READER); 123 124 if ((Adapter->chip_state != E1000G_START) || 125 (Adapter->link_state != LINK_STATE_UP)) { 126 freemsgchain(mp); 127 mp = NULL; 128 } 129 130 while (mp != NULL) { 131 next = mp->b_next; 132 mp->b_next = NULL; 133 134 if (!e1000g_send(Adapter, mp)) { 135 mp->b_next = next; 136 break; 137 } 138 139 mp = next; 140 } 141 142 rw_exit(&Adapter->chip_lock); 143 return (mp); 144 } 145 146 /* 147 * e1000g_send - send packets onto the wire 148 * 149 * Called from e1000g_m_tx with an mblk ready to send. this 150 * routine sets up the transmit descriptors and sends data to 151 * the wire. It also pushes the just transmitted packet to 152 * the used tx sw packet list. 153 */ 154 static boolean_t 155 e1000g_send(struct e1000g *Adapter, mblk_t *mp) 156 { 157 struct e1000_hw *hw; 158 p_tx_sw_packet_t packet; 159 LIST_DESCRIBER pending_list; 160 size_t len; 161 size_t msg_size; 162 uint32_t frag_count; 163 int desc_count; 164 uint32_t desc_total; 165 boolean_t tx_undersize_flag; 166 mblk_t *nmp; 167 mblk_t *tmp; 168 e1000g_tx_ring_t *tx_ring; 169 context_data_t cur_context; 170 171 hw = &Adapter->shared; 172 tx_ring = Adapter->tx_ring; 173 174 /* Get the total size and frags number of the message */ 175 tx_undersize_flag = B_FALSE; 176 frag_count = 0; 177 msg_size = 0; 178 for (nmp = mp; nmp; nmp = nmp->b_cont) { 179 frag_count++; 180 msg_size += MBLKL(nmp); 181 } 182 183 /* retreive and compute information for context descriptor */ 184 if (!e1000g_retreive_context(mp, &cur_context, msg_size)) { 185 freemsg(mp); 186 return (B_TRUE); 187 } 188 189 /* 190 * Make sure the packet is less than the allowed size 191 */ 192 if (!cur_context.lso_flag && 193 (msg_size > Adapter->max_frame_size - ETHERFCSL)) { 194 /* 195 * For the over size packet, we'll just drop it. 196 * So we return B_TRUE here. 197 */ 198 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL, 199 "Tx packet out of bound. length = %d \n", msg_size); 200 E1000G_STAT(tx_ring->stat_over_size); 201 freemsg(mp); 202 return (B_TRUE); 203 } 204 205 /* 206 * Check and reclaim tx descriptors. 207 * This low water mark check should be done all the time as 208 * Transmit interrupt delay can produce Transmit interrupts little 209 * late and that may cause few problems related to reaping Tx 210 * Descriptors... As you may run short of them before getting any 211 * transmit interrupt... 212 */ 213 if (tx_ring->resched_needed || 214 (tx_ring->tbd_avail < Adapter->tx_recycle_thresh)) { 215 (void) e1000g_recycle(tx_ring); 216 E1000G_DEBUG_STAT(tx_ring->stat_recycle); 217 218 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 219 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc); 220 goto tx_no_resource; 221 } 222 } 223 224 /* 225 * If the message size is less than the minimum ethernet packet size, 226 * we'll use bcopy to send it, and padd it to 60 bytes later. 227 */ 228 if (msg_size < ETHERMIN) { 229 E1000G_DEBUG_STAT(tx_ring->stat_under_size); 230 tx_undersize_flag = B_TRUE; 231 } 232 233 /* Initialize variables */ 234 desc_count = 1; /* The initial value should be greater than 0 */ 235 desc_total = 0; 236 QUEUE_INIT_LIST(&pending_list); 237 238 /* Process each mblk fragment and fill tx descriptors */ 239 packet = NULL; 240 nmp = mp; 241 while (nmp) { 242 tmp = nmp->b_cont; 243 244 len = MBLKL(nmp); 245 /* Check zero length mblks */ 246 if (len == 0) { 247 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags); 248 /* 249 * If there're no packet buffers have been used, 250 * or we just completed processing a buffer, then 251 * skip the empty mblk fragment. 252 * Otherwise, there's still a pending buffer that 253 * needs to be processed (tx_copy). 254 */ 255 if (desc_count > 0) { 256 nmp = tmp; 257 continue; 258 } 259 } 260 261 /* 262 * Get a new TxSwPacket to process mblk buffers. 263 */ 264 if (desc_count > 0) { 265 mutex_enter(&tx_ring->freelist_lock); 266 packet = (p_tx_sw_packet_t) 267 QUEUE_POP_HEAD(&tx_ring->free_list); 268 mutex_exit(&tx_ring->freelist_lock); 269 270 if (packet == NULL) { 271 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 272 "No Tx SwPacket available\n"); 273 E1000G_STAT(tx_ring->stat_no_swpkt); 274 goto tx_send_failed; 275 } 276 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 277 } 278 279 ASSERT(packet); 280 /* 281 * If the size of the fragment is less than the tx_bcopy_thresh 282 * we'll use bcopy; Otherwise, we'll use DMA binding. 283 */ 284 if ((len <= Adapter->tx_bcopy_thresh) || tx_undersize_flag) { 285 desc_count = 286 e1000g_tx_copy(tx_ring, packet, nmp, 287 tx_undersize_flag); 288 E1000G_DEBUG_STAT(tx_ring->stat_copy); 289 } else { 290 desc_count = 291 e1000g_tx_bind(tx_ring, packet, nmp); 292 E1000G_DEBUG_STAT(tx_ring->stat_bind); 293 } 294 295 if (desc_count > 0) 296 desc_total += desc_count; 297 else if (desc_count < 0) 298 goto tx_send_failed; 299 300 nmp = tmp; 301 } 302 303 /* Assign the message to the last sw packet */ 304 ASSERT(packet); 305 ASSERT(packet->mp == NULL); 306 packet->mp = mp; 307 308 /* Try to recycle the tx descriptors again */ 309 if (tx_ring->tbd_avail < (desc_total + 2)) { 310 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry); 311 (void) e1000g_recycle(tx_ring); 312 } 313 314 mutex_enter(&tx_ring->tx_lock); 315 316 /* 317 * If the number of available tx descriptors is not enough for transmit 318 * (one redundant descriptor and one hw checksum context descriptor are 319 * included), then return failure. 320 */ 321 if (tx_ring->tbd_avail < (desc_total + 2)) { 322 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 323 "No Enough Tx descriptors\n"); 324 E1000G_STAT(tx_ring->stat_no_desc); 325 mutex_exit(&tx_ring->tx_lock); 326 goto tx_send_failed; 327 } 328 329 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context); 330 331 mutex_exit(&tx_ring->tx_lock); 332 333 ASSERT(desc_count > 0); 334 335 /* Send successful */ 336 return (B_TRUE); 337 338 tx_send_failed: 339 /* 340 * Enable Transmit interrupts, so that the interrupt routine can 341 * call mac_tx_update() when transmit descriptors become available. 342 */ 343 tx_ring->resched_needed = B_TRUE; 344 if (!Adapter->tx_intr_enable) 345 e1000g_mask_tx_interrupt(Adapter); 346 347 /* Free pending TxSwPackets */ 348 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 349 while (packet) { 350 packet->mp = NULL; 351 e1000g_free_tx_swpkt(packet); 352 packet = (p_tx_sw_packet_t) 353 QUEUE_GET_NEXT(&pending_list, &packet->Link); 354 } 355 356 /* Return pending TxSwPackets to the "Free" list */ 357 mutex_enter(&tx_ring->freelist_lock); 358 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 359 mutex_exit(&tx_ring->freelist_lock); 360 361 E1000G_STAT(tx_ring->stat_send_fail); 362 363 /* Message will be scheduled for re-transmit */ 364 return (B_FALSE); 365 366 tx_no_resource: 367 /* 368 * Enable Transmit interrupts, so that the interrupt routine can 369 * call mac_tx_update() when transmit descriptors become available. 370 */ 371 tx_ring->resched_needed = B_TRUE; 372 if (!Adapter->tx_intr_enable) 373 e1000g_mask_tx_interrupt(Adapter); 374 375 /* Message will be scheduled for re-transmit */ 376 return (B_FALSE); 377 } 378 379 static boolean_t 380 e1000g_retreive_context(mblk_t *mp, context_data_t *cur_context, 381 size_t msg_size) 382 { 383 uintptr_t ip_start; 384 uintptr_t tcp_start; 385 mblk_t *nmp; 386 387 bzero(cur_context, sizeof (context_data_t)); 388 389 /* retrieve checksum info */ 390 hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, 391 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); 392 /* retreive ethernet header size */ 393 if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid == 394 htons(ETHERTYPE_VLAN)) 395 cur_context->ether_header_size = 396 sizeof (struct ether_vlan_header); 397 else 398 cur_context->ether_header_size = 399 sizeof (struct ether_header); 400 401 if (cur_context->cksum_flags & HW_LSO) { 402 if ((cur_context->mss = DB_LSOMSS(mp)) != 0) { 403 /* free the invaid packet */ 404 if (!((cur_context->cksum_flags & HCK_PARTIALCKSUM) && 405 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) { 406 return (B_FALSE); 407 } 408 cur_context->lso_flag = B_TRUE; 409 /* 410 * Some fields are cleared for the hardware to fill 411 * in. We don't assume Ethernet header, IP header and 412 * TCP header are always in the same mblk fragment, 413 * while we assume each header is always within one 414 * mblk fragment and Ethernet header is always in the 415 * first mblk fragment. 416 */ 417 nmp = mp; 418 ip_start = (uintptr_t)(nmp->b_rptr) 419 + cur_context->ether_header_size; 420 if (ip_start >= (uintptr_t)(nmp->b_wptr)) { 421 ip_start = (uintptr_t)nmp->b_cont->b_rptr 422 + (ip_start - (uintptr_t)(nmp->b_wptr)); 423 nmp = nmp->b_cont; 424 } 425 tcp_start = ip_start + 426 IPH_HDR_LENGTH((ipha_t *)ip_start); 427 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) { 428 tcp_start = (uintptr_t)nmp->b_cont->b_rptr 429 + (tcp_start - (uintptr_t)(nmp->b_wptr)); 430 nmp = nmp->b_cont; 431 } 432 cur_context->hdr_len = cur_context->ether_header_size 433 + IPH_HDR_LENGTH((ipha_t *)ip_start) 434 + TCP_HDR_LENGTH((tcph_t *)tcp_start); 435 ((ipha_t *)ip_start)->ipha_length = 0; 436 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0; 437 /* calculate the TCP packet payload length */ 438 cur_context->pay_len = msg_size - cur_context->hdr_len; 439 } 440 } 441 return (B_TRUE); 442 } 443 444 static boolean_t 445 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context) 446 { 447 boolean_t context_reload; 448 context_data_t *pre_context; 449 struct e1000g *Adapter; 450 451 context_reload = B_FALSE; 452 pre_context = &tx_ring->pre_context; 453 Adapter = tx_ring->adapter; 454 455 /* 456 * The following code determine if the context descriptor is 457 * needed to be reloaded. The sequence of the conditions is 458 * made by their possibilities of changing. 459 */ 460 /* 461 * workaround for 82546EB, context descriptor must be reloaded 462 * per LSO/hw_cksum packet if LSO is enabled. 463 */ 464 if (Adapter->lso_premature_issue && 465 Adapter->lso_enable && 466 (cur_context->cksum_flags != 0)) { 467 468 context_reload = B_TRUE; 469 } else if (cur_context->lso_flag) { 470 if ((cur_context->cksum_flags != pre_context->cksum_flags) || 471 (cur_context->pay_len != pre_context->pay_len) || 472 (cur_context->mss != pre_context->mss) || 473 (cur_context->hdr_len != pre_context->hdr_len) || 474 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 475 (cur_context->cksum_start != pre_context->cksum_start) || 476 (cur_context->ether_header_size != 477 pre_context->ether_header_size)) { 478 479 context_reload = B_TRUE; 480 } 481 } else if (cur_context->cksum_flags != 0) { 482 if ((cur_context->cksum_flags != pre_context->cksum_flags) || 483 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 484 (cur_context->cksum_start != pre_context->cksum_start) || 485 (cur_context->ether_header_size != 486 pre_context->ether_header_size)) { 487 488 context_reload = B_TRUE; 489 } 490 } 491 492 return (context_reload); 493 } 494 495 static int 496 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list, 497 context_data_t *cur_context) 498 { 499 struct e1000g *Adapter; 500 struct e1000_hw *hw; 501 p_tx_sw_packet_t first_packet; 502 p_tx_sw_packet_t packet; 503 p_tx_sw_packet_t previous_packet; 504 boolean_t context_reload; 505 struct e1000_tx_desc *first_data_desc; 506 struct e1000_tx_desc *next_desc; 507 struct e1000_tx_desc *descriptor; 508 int desc_count; 509 boolean_t buff_overrun_flag; 510 int i; 511 512 Adapter = tx_ring->adapter; 513 hw = &Adapter->shared; 514 515 desc_count = 0; 516 first_packet = NULL; 517 first_data_desc = NULL; 518 descriptor = NULL; 519 first_packet = NULL; 520 packet = NULL; 521 buff_overrun_flag = B_FALSE; 522 523 next_desc = tx_ring->tbd_next; 524 525 /* Context descriptor reload check */ 526 context_reload = e1000g_check_context(tx_ring, cur_context); 527 528 if (context_reload) { 529 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 530 531 descriptor = next_desc; 532 533 e1000g_fill_context_descriptor(cur_context, 534 (struct e1000_context_desc *)descriptor); 535 536 /* Check the wrap-around case */ 537 if (descriptor == tx_ring->tbd_last) 538 next_desc = tx_ring->tbd_first; 539 else 540 next_desc++; 541 542 desc_count++; 543 } 544 545 first_data_desc = next_desc; 546 547 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 548 while (packet) { 549 ASSERT(packet->num_desc); 550 551 for (i = 0; i < packet->num_desc; i++) { 552 ASSERT(tx_ring->tbd_avail > 0); 553 554 descriptor = next_desc; 555 descriptor->buffer_addr = 556 packet->desc[i].address; 557 descriptor->lower.data = 558 packet->desc[i].length; 559 560 /* Zero out status */ 561 descriptor->upper.data = 0; 562 563 descriptor->lower.data |= 564 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 565 /* must set RS on every outgoing descriptor */ 566 descriptor->lower.data |= 567 E1000_TXD_CMD_RS; 568 569 if (cur_context->lso_flag) 570 descriptor->lower.data |= E1000_TXD_CMD_TSE; 571 572 /* Check the wrap-around case */ 573 if (descriptor == tx_ring->tbd_last) 574 next_desc = tx_ring->tbd_first; 575 else 576 next_desc++; 577 578 desc_count++; 579 580 /* 581 * workaround for 82546EB errata 33, hang in PCI-X 582 * systems due to 2k Buffer Overrun during Transmit 583 * Operation. The workaround applies to all the Intel 584 * PCI-X chips. 585 */ 586 if (hw->bus.type == e1000_bus_type_pcix && 587 descriptor == first_data_desc && 588 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) 589 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) { 590 /* modified the first descriptor */ 591 descriptor->lower.data &= 592 ~E1000G_TBD_LENGTH_MASK; 593 descriptor->lower.flags.length = 594 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 595 596 /* insert a new descriptor */ 597 ASSERT(tx_ring->tbd_avail > 0); 598 next_desc->buffer_addr = 599 packet->desc[0].address + 600 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 601 next_desc->lower.data = 602 packet->desc[0].length - 603 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 604 605 /* Zero out status */ 606 next_desc->upper.data = 0; 607 608 next_desc->lower.data |= 609 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 610 /* must set RS on every outgoing descriptor */ 611 next_desc->lower.data |= 612 E1000_TXD_CMD_RS; 613 614 if (cur_context->lso_flag) 615 next_desc->lower.data |= 616 E1000_TXD_CMD_TSE; 617 618 descriptor = next_desc; 619 620 /* Check the wrap-around case */ 621 if (next_desc == tx_ring->tbd_last) 622 next_desc = tx_ring->tbd_first; 623 else 624 next_desc++; 625 626 desc_count++; 627 buff_overrun_flag = B_TRUE; 628 } 629 } 630 631 if (buff_overrun_flag) { 632 packet->num_desc++; 633 buff_overrun_flag = B_FALSE; 634 } 635 636 if (first_packet != NULL) { 637 /* 638 * Count the checksum context descriptor for 639 * the first SwPacket. 640 */ 641 first_packet->num_desc++; 642 first_packet = NULL; 643 } 644 645 previous_packet = packet; 646 packet = (p_tx_sw_packet_t) 647 QUEUE_GET_NEXT(pending_list, &packet->Link); 648 } 649 650 /* 651 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back 652 */ 653 if (Adapter->lso_premature_issue && cur_context->lso_flag && 654 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) { 655 /* modified the previous descriptor */ 656 descriptor->lower.data -= 4; 657 658 /* insert a new descriptor */ 659 ASSERT(tx_ring->tbd_avail > 0); 660 /* the lower 20 bits of lower.data is the length field */ 661 next_desc->buffer_addr = 662 descriptor->buffer_addr + 663 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK); 664 next_desc->lower.data = 4; 665 666 /* Zero out status */ 667 next_desc->upper.data = 0; 668 /* It must be part of a LSO packet */ 669 next_desc->lower.data |= 670 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | 671 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE; 672 673 descriptor = next_desc; 674 675 /* Check the wrap-around case */ 676 if (descriptor == tx_ring->tbd_last) 677 next_desc = tx_ring->tbd_first; 678 else 679 next_desc++; 680 681 desc_count++; 682 /* update the number of descriptors */ 683 previous_packet->num_desc++; 684 } 685 686 ASSERT(descriptor); 687 688 if (cur_context->cksum_flags) { 689 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 690 ((struct e1000_data_desc *)first_data_desc)-> 691 upper.fields.popts |= E1000_TXD_POPTS_IXSM; 692 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 693 ((struct e1000_data_desc *)first_data_desc)-> 694 upper.fields.popts |= E1000_TXD_POPTS_TXSM; 695 } 696 697 /* 698 * Last Descriptor of Packet needs End Of Packet (EOP), Report 699 * Status (RS) set. 700 */ 701 if (Adapter->tx_intr_delay) { 702 descriptor->lower.data |= E1000_TXD_CMD_IDE | 703 E1000_TXD_CMD_EOP; 704 } else { 705 descriptor->lower.data |= E1000_TXD_CMD_EOP; 706 } 707 708 /* Set append Ethernet CRC (IFCS) bits */ 709 if (cur_context->lso_flag) { 710 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS; 711 } else { 712 descriptor->lower.data |= E1000_TXD_CMD_IFCS; 713 } 714 715 /* 716 * Sync the Tx descriptors DMA buffer 717 */ 718 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 719 0, 0, DDI_DMA_SYNC_FORDEV); 720 721 tx_ring->tbd_next = next_desc; 722 723 /* 724 * Advance the Transmit Descriptor Tail (Tdt), this tells the 725 * FX1000 that this frame is available to transmit. 726 */ 727 if (hw->mac.type == e1000_82547) 728 e1000g_82547_tx_move_tail(tx_ring); 729 else 730 E1000_WRITE_REG(hw, E1000_TDT(0), 731 (uint32_t)(next_desc - tx_ring->tbd_first)); 732 733 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 734 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 735 Adapter->chip_state = E1000G_ERROR; 736 } 737 738 /* Put the pending SwPackets to the "Used" list */ 739 mutex_enter(&tx_ring->usedlist_lock); 740 QUEUE_APPEND(&tx_ring->used_list, pending_list); 741 tx_ring->tbd_avail -= desc_count; 742 mutex_exit(&tx_ring->usedlist_lock); 743 744 /* update LSO related data */ 745 if (context_reload) 746 tx_ring->pre_context = *cur_context; 747 748 return (desc_count); 749 } 750 751 752 /* 753 * e1000g_tx_setup - setup tx data structures 754 * 755 * This routine initializes all of the transmit related 756 * structures. This includes the Transmit descriptors, 757 * and the tx_sw_packet structures. 758 */ 759 void 760 e1000g_tx_setup(struct e1000g *Adapter) 761 { 762 struct e1000_hw *hw; 763 p_tx_sw_packet_t packet; 764 UINT i; 765 uint32_t buf_high; 766 uint32_t buf_low; 767 uint32_t reg_tipg; 768 uint32_t reg_tctl; 769 uint32_t reg_tarc; 770 uint16_t speed, duplex; 771 int size; 772 e1000g_tx_ring_t *tx_ring; 773 774 hw = &Adapter->shared; 775 tx_ring = Adapter->tx_ring; 776 777 /* init the lists */ 778 /* 779 * Here we don't need to protect the lists using the 780 * usedlist_lock and freelist_lock, for they have 781 * been protected by the chip_lock. 782 */ 783 QUEUE_INIT_LIST(&tx_ring->used_list); 784 QUEUE_INIT_LIST(&tx_ring->free_list); 785 786 /* Go through and set up each SW_Packet */ 787 packet = tx_ring->packet_area; 788 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) { 789 /* Initialize this tx_sw_apcket area */ 790 e1000g_free_tx_swpkt(packet); 791 /* Add this tx_sw_packet to the free list */ 792 QUEUE_PUSH_TAIL(&tx_ring->free_list, 793 &packet->Link); 794 } 795 796 /* Setup TX descriptor pointers */ 797 tx_ring->tbd_next = tx_ring->tbd_first; 798 tx_ring->tbd_oldest = tx_ring->tbd_first; 799 800 /* 801 * Setup Hardware TX Registers 802 */ 803 /* Setup the Transmit Control Register (TCTL). */ 804 reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN | 805 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) | 806 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) | 807 E1000_TCTL_RTLC; 808 809 /* Enable the MULR bit */ 810 if (hw->bus.type == e1000_bus_type_pci_express) 811 reg_tctl |= E1000_TCTL_MULR; 812 813 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl); 814 815 if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) { 816 e1000_get_speed_and_duplex(hw, &speed, &duplex); 817 818 reg_tarc = E1000_READ_REG(hw, E1000_TARC(0)); 819 reg_tarc |= (1 << 25); 820 if (speed == SPEED_1000) 821 reg_tarc |= (1 << 21); 822 E1000_WRITE_REG(hw, E1000_TARC(0), reg_tarc); 823 824 reg_tarc = E1000_READ_REG(hw, E1000_TARC(1)); 825 reg_tarc |= (1 << 25); 826 if (reg_tctl & E1000_TCTL_MULR) 827 reg_tarc &= ~(1 << 28); 828 else 829 reg_tarc |= (1 << 28); 830 E1000_WRITE_REG(hw, E1000_TARC(1), reg_tarc); 831 832 } else if (hw->mac.type == e1000_80003es2lan) { 833 reg_tarc = E1000_READ_REG(hw, E1000_TARC(0)); 834 reg_tarc |= 1; 835 if (hw->phy.media_type == e1000_media_type_internal_serdes) 836 reg_tarc |= (1 << 20); 837 E1000_WRITE_REG(hw, E1000_TARC(0), reg_tarc); 838 839 reg_tarc = E1000_READ_REG(hw, E1000_TARC(1)); 840 reg_tarc |= 1; 841 E1000_WRITE_REG(hw, E1000_TARC(1), reg_tarc); 842 } 843 844 /* Setup HW Base and Length of Tx descriptor area */ 845 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc)); 846 E1000_WRITE_REG(hw, E1000_TDLEN(0), size); 847 size = E1000_READ_REG(hw, E1000_TDLEN(0)); 848 849 buf_low = (uint32_t)tx_ring->tbd_dma_addr; 850 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32); 851 852 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low); 853 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high); 854 855 /* Setup our HW Tx Head & Tail descriptor pointers */ 856 E1000_WRITE_REG(hw, E1000_TDH(0), 0); 857 E1000_WRITE_REG(hw, E1000_TDT(0), 0); 858 859 /* Set the default values for the Tx Inter Packet Gap timer */ 860 if ((hw->mac.type == e1000_82542) && 861 ((hw->revision_id == E1000_REVISION_2) || 862 (hw->revision_id == E1000_REVISION_3))) { 863 reg_tipg = DEFAULT_82542_TIPG_IPGT; 864 reg_tipg |= 865 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 866 reg_tipg |= 867 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 868 } else { 869 if (hw->phy.media_type == e1000_media_type_fiber) 870 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 871 else 872 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 873 reg_tipg |= 874 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 875 reg_tipg |= 876 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 877 } 878 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg); 879 880 /* Setup Transmit Interrupt Delay Value */ 881 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay); 882 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 883 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay); 884 885 if (hw->mac.type >= e1000_82540) { 886 E1000_WRITE_REG(&Adapter->shared, E1000_TADV, 887 Adapter->tx_intr_abs_delay); 888 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 889 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay); 890 } 891 892 tx_ring->tbd_avail = Adapter->tx_desc_num; 893 894 /* Initialize stored context information */ 895 bzero(&(tx_ring->pre_context), sizeof (context_data_t)); 896 } 897 898 /* 899 * e1000g_recycle - recycle the tx descriptors and tx sw packets 900 */ 901 int 902 e1000g_recycle(e1000g_tx_ring_t *tx_ring) 903 { 904 struct e1000g *Adapter; 905 LIST_DESCRIBER pending_list; 906 p_tx_sw_packet_t packet; 907 mblk_t *mp; 908 mblk_t *nmp; 909 struct e1000_tx_desc *descriptor; 910 int desc_count; 911 int is_intr; 912 913 /* 914 * This function will examine each TxSwPacket in the 'used' queue 915 * if the e1000g is done with it then the associated resources (Tx 916 * Descriptors) will be "freed" and the TxSwPacket will be 917 * returned to the 'free' queue. 918 */ 919 Adapter = tx_ring->adapter; 920 921 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list); 922 if (packet == NULL) { 923 tx_ring->recycle_fail = 0; 924 tx_ring->stall_watchdog = 0; 925 return (0); 926 } 927 928 is_intr = servicing_interrupt(); 929 930 if (is_intr) 931 mutex_enter(&tx_ring->usedlist_lock); 932 else if (mutex_tryenter(&tx_ring->usedlist_lock) == 0) 933 return (0); 934 935 desc_count = 0; 936 QUEUE_INIT_LIST(&pending_list); 937 938 /* Sync the Tx descriptor DMA buffer */ 939 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 940 0, 0, DDI_DMA_SYNC_FORKERNEL); 941 if (e1000g_check_dma_handle( 942 tx_ring->tbd_dma_handle) != DDI_FM_OK) { 943 mutex_exit(&tx_ring->usedlist_lock); 944 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 945 Adapter->chip_state = E1000G_ERROR; 946 return (0); 947 } 948 949 /* 950 * While there are still TxSwPackets in the used queue check them 951 */ 952 while (packet = 953 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) { 954 955 /* 956 * Get hold of the next descriptor that the e1000g will 957 * report status back to (this will be the last descriptor 958 * of a given sw packet). We only want to free the 959 * sw packet (and it resources) if the e1000g is done 960 * with ALL of the descriptors. If the e1000g is done 961 * with the last one then it is done with all of them. 962 */ 963 ASSERT(packet->num_desc); 964 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1); 965 966 /* Check for wrap case */ 967 if (descriptor > tx_ring->tbd_last) 968 descriptor -= Adapter->tx_desc_num; 969 970 /* 971 * If the descriptor done bit is set free TxSwPacket and 972 * associated resources 973 */ 974 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) { 975 QUEUE_POP_HEAD(&tx_ring->used_list); 976 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 977 978 if (descriptor == tx_ring->tbd_last) 979 tx_ring->tbd_oldest = 980 tx_ring->tbd_first; 981 else 982 tx_ring->tbd_oldest = 983 descriptor + 1; 984 985 desc_count += packet->num_desc; 986 987 if (is_intr && (desc_count >= Adapter->tx_recycle_num)) 988 break; 989 } else { 990 /* 991 * Found a sw packet that the e1000g is not done 992 * with then there is no reason to check the rest 993 * of the queue. 994 */ 995 break; 996 } 997 } 998 999 tx_ring->tbd_avail += desc_count; 1000 Adapter->tx_pkt_cnt += desc_count; 1001 1002 mutex_exit(&tx_ring->usedlist_lock); 1003 1004 if (desc_count == 0) { 1005 tx_ring->recycle_fail++; 1006 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none); 1007 return (0); 1008 } 1009 1010 tx_ring->recycle_fail = 0; 1011 tx_ring->stall_watchdog = 0; 1012 1013 mp = NULL; 1014 nmp = NULL; 1015 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 1016 ASSERT(packet != NULL); 1017 while (packet != NULL) { 1018 if (packet->mp != NULL) { 1019 ASSERT(packet->mp->b_next == NULL); 1020 /* Assemble the message chain */ 1021 if (mp == NULL) { 1022 mp = packet->mp; 1023 nmp = packet->mp; 1024 } else { 1025 nmp->b_next = packet->mp; 1026 nmp = packet->mp; 1027 } 1028 /* Disconnect the message from the sw packet */ 1029 packet->mp = NULL; 1030 } 1031 1032 /* Free the TxSwPackets */ 1033 e1000g_free_tx_swpkt(packet); 1034 1035 packet = (p_tx_sw_packet_t) 1036 QUEUE_GET_NEXT(&pending_list, &packet->Link); 1037 } 1038 1039 /* Return the TxSwPackets back to the FreeList */ 1040 mutex_enter(&tx_ring->freelist_lock); 1041 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 1042 mutex_exit(&tx_ring->freelist_lock); 1043 1044 if (mp != NULL) 1045 freemsgchain(mp); 1046 1047 return (desc_count); 1048 } 1049 /* 1050 * 82544 Coexistence issue workaround: 1051 * There are 2 issues. 1052 * 1. If a 32 bit split completion happens from P64H2 and another 1053 * agent drives a 64 bit request/split completion after ONLY 1054 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then 1055 * 82544 has a problem where in to clock all the data in, it 1056 * looks at REQ64# signal and since it has changed so fast (i.e. 1 1057 * idle clock turn around), it will fail to clock all the data in. 1058 * Data coming from certain ending addresses has exposure to this issue. 1059 * 1060 * To detect this issue, following equation can be used... 1061 * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. 1062 * If SUM[3:0] is in between 1 to 4, we will have this issue. 1063 * 1064 * ROOT CAUSE: 1065 * The erratum involves the 82544 PCIX elasticity FIFO implementations as 1066 * 64-bit FIFO's and flushing of the final partial-bytes corresponding 1067 * to the end of a requested read burst. Under a specific burst condition 1068 * of ending-data alignment and 32-byte split-completions, the final 1069 * byte(s) of split-completion data require an extra clock cycle to flush 1070 * into 64-bit FIFO orientation. An incorrect logic dependency on the 1071 * REQ64# signal occurring during during this clock cycle may cause the 1072 * residual byte(s) to be lost, thereby rendering the internal DMA client 1073 * forever awaiting the final byte(s) for an outbound data-fetch. The 1074 * erratum is confirmed to *only* occur if certain subsequent external 1075 * 64-bit PCIX bus transactions occur immediately (minimum possible bus 1076 * turn- around) following the odd-aligned 32-bit split-completion 1077 * containing the final byte(s). Intel has confirmed that this has been 1078 * seen only with chipset/bridges which have the capability to provide 1079 * 32-bit split-completion data, and in the presence of newer PCIX bus 1080 * agents which fully-optimize the inter-transaction turn-around (zero 1081 * additional initiator latency when pre-granted bus ownership). 1082 * 1083 * This issue does not exist in PCI bus mode, when any agent is operating 1084 * in 32 bit only mode or on chipsets that do not do 32 bit split 1085 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does 1086 * 32 bit split completions for any read request that has bit 2 set to 1 1087 * for the requested address and read request size is more than 8 bytes. 1088 * 1089 * 2. Another issue is related to 82544 driving DACs under the similar 1090 * scenario (32 bit split completion followed by 64 bit transaction with 1091 * only 1 cycle turnaround). This issue is still being root caused. We 1092 * think that both of these issues can be avoided if following workaround 1093 * is implemented. It seems DAC issues is related to ending addresses being 1094 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity 1095 * FIFO which does not get flushed due to REQ64# dependency. We will only 1096 * know the full story after it has been simulated successfully by HW team. 1097 * 1098 * WORKAROUND: 1099 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC) 1100 */ 1101 static uint32_t 1102 e1000g_fill_82544_desc(uint64_t address, 1103 size_t length, p_desc_array_t desc_array) 1104 { 1105 /* 1106 * Since issue is sensitive to length and address. 1107 * Let us first check the address... 1108 */ 1109 uint32_t safe_terminator; 1110 1111 if (length <= 4) { 1112 desc_array->descriptor[0].address = address; 1113 desc_array->descriptor[0].length = length; 1114 desc_array->elements = 1; 1115 return (desc_array->elements); 1116 } 1117 safe_terminator = 1118 (uint32_t)((((uint32_t)address & 0x7) + 1119 (length & 0xF)) & 0xF); 1120 /* 1121 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then 1122 * return 1123 */ 1124 if (safe_terminator == 0 || 1125 (safe_terminator > 4 && safe_terminator < 9) || 1126 (safe_terminator > 0xC && safe_terminator <= 0xF)) { 1127 desc_array->descriptor[0].address = address; 1128 desc_array->descriptor[0].length = length; 1129 desc_array->elements = 1; 1130 return (desc_array->elements); 1131 } 1132 1133 desc_array->descriptor[0].address = address; 1134 desc_array->descriptor[0].length = length - 4; 1135 desc_array->descriptor[1].address = address + (length - 4); 1136 desc_array->descriptor[1].length = 4; 1137 desc_array->elements = 2; 1138 return (desc_array->elements); 1139 } 1140 1141 static int 1142 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, 1143 mblk_t *mp, boolean_t tx_undersize_flag) 1144 { 1145 size_t len; 1146 size_t len1; 1147 dma_buffer_t *tx_buf; 1148 mblk_t *nmp; 1149 boolean_t finished; 1150 int desc_count; 1151 1152 desc_count = 0; 1153 tx_buf = packet->tx_buf; 1154 len = MBLKL(mp); 1155 1156 ASSERT((tx_buf->len + len) <= tx_buf->size); 1157 1158 if (len > 0) { 1159 bcopy(mp->b_rptr, 1160 tx_buf->address + tx_buf->len, 1161 len); 1162 tx_buf->len += len; 1163 1164 packet->num_mblk_frag++; 1165 } 1166 1167 nmp = mp->b_cont; 1168 if (nmp == NULL) { 1169 finished = B_TRUE; 1170 } else { 1171 len1 = MBLKL(nmp); 1172 if ((tx_buf->len + len1) > tx_buf->size) 1173 finished = B_TRUE; 1174 else if (tx_undersize_flag) 1175 finished = B_FALSE; 1176 else if (len1 > tx_ring->adapter->tx_bcopy_thresh) 1177 finished = B_TRUE; 1178 else 1179 finished = B_FALSE; 1180 } 1181 1182 if (finished) { 1183 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy, 1184 (tx_buf->len > len)); 1185 1186 /* 1187 * If the packet is smaller than 64 bytes, which is the 1188 * minimum ethernet packet size, pad the packet to make 1189 * it at least 60 bytes. The hardware will add 4 bytes 1190 * for CRC. 1191 */ 1192 if (tx_undersize_flag) { 1193 ASSERT(tx_buf->len < ETHERMIN); 1194 1195 bzero(tx_buf->address + tx_buf->len, 1196 ETHERMIN - tx_buf->len); 1197 tx_buf->len = ETHERMIN; 1198 } 1199 1200 #ifdef __sparc 1201 if (packet->dma_type == USE_DVMA) 1202 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV); 1203 else 1204 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1205 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1206 #else 1207 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1208 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1209 #endif 1210 1211 packet->data_transfer_type = USE_BCOPY; 1212 1213 desc_count = e1000g_fill_tx_desc(tx_ring, 1214 packet, 1215 tx_buf->dma_address, 1216 tx_buf->len); 1217 1218 if (desc_count <= 0) 1219 return (-1); 1220 } 1221 1222 return (desc_count); 1223 } 1224 1225 static int 1226 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp) 1227 { 1228 int j; 1229 int mystat; 1230 size_t len; 1231 ddi_dma_cookie_t dma_cookie; 1232 uint_t ncookies; 1233 int desc_count; 1234 uint32_t desc_total; 1235 1236 desc_total = 0; 1237 len = MBLKL(mp); 1238 1239 /* 1240 * ddi_dma_addr_bind_handle() allocates DMA resources for a 1241 * memory object such that a device can perform DMA to or from 1242 * the object. DMA resources are allocated considering the 1243 * device's DMA attributes as expressed by ddi_dma_attr(9S) 1244 * (see ddi_dma_alloc_handle(9F)). 1245 * 1246 * ddi_dma_addr_bind_handle() fills in the first DMA cookie 1247 * pointed to by cookiep with the appropriate address, length, 1248 * and bus type. *ccountp is set to the number of DMA cookies 1249 * representing this DMA object. Subsequent DMA cookies must be 1250 * retrieved by calling ddi_dma_nextcookie(9F) the number of 1251 * times specified by *countp - 1. 1252 */ 1253 switch (packet->dma_type) { 1254 #ifdef __sparc 1255 case USE_DVMA: 1256 dvma_kaddr_load(packet->tx_dma_handle, 1257 (caddr_t)mp->b_rptr, len, 0, &dma_cookie); 1258 1259 dvma_sync(packet->tx_dma_handle, 0, 1260 DDI_DMA_SYNC_FORDEV); 1261 1262 ncookies = 1; 1263 packet->data_transfer_type = USE_DVMA; 1264 break; 1265 #endif 1266 case USE_DMA: 1267 if ((mystat = ddi_dma_addr_bind_handle( 1268 packet->tx_dma_handle, NULL, 1269 (caddr_t)mp->b_rptr, len, 1270 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1271 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1272 &ncookies)) != DDI_DMA_MAPPED) { 1273 1274 e1000g_log(tx_ring->adapter, CE_WARN, 1275 "Couldn't bind mblk buffer to Tx DMA handle: " 1276 "return: %X, Pkt: %X\n", 1277 mystat, packet); 1278 return (-1); 1279 } 1280 1281 /* 1282 * An implicit ddi_dma_sync() is done when the 1283 * ddi_dma_addr_bind_handle() is called. So we 1284 * don't need to explicitly call ddi_dma_sync() 1285 * here any more. 1286 */ 1287 ASSERT(ncookies); 1288 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie, 1289 (ncookies > 1)); 1290 1291 /* 1292 * The data_transfer_type value must be set after the handle 1293 * has been bound, for it will be used in e1000g_free_tx_swpkt() 1294 * to decide whether we need to unbind the handle. 1295 */ 1296 packet->data_transfer_type = USE_DMA; 1297 break; 1298 default: 1299 ASSERT(B_FALSE); 1300 break; 1301 } 1302 1303 packet->num_mblk_frag++; 1304 1305 /* 1306 * Each address could span thru multpile cookie.. 1307 * Each cookie will have one descriptor 1308 */ 1309 for (j = ncookies; j != 0; j--) { 1310 1311 desc_count = e1000g_fill_tx_desc(tx_ring, 1312 packet, 1313 dma_cookie.dmac_laddress, 1314 dma_cookie.dmac_size); 1315 1316 if (desc_count <= 0) 1317 return (-1); 1318 1319 desc_total += desc_count; 1320 1321 /* 1322 * ddi_dma_nextcookie() retrieves subsequent DMA 1323 * cookies for a DMA object. 1324 * ddi_dma_nextcookie() fills in the 1325 * ddi_dma_cookie(9S) structure pointed to by 1326 * cookiep. The ddi_dma_cookie(9S) structure 1327 * must be allocated prior to calling 1328 * ddi_dma_nextcookie(). The DMA cookie count 1329 * returned by ddi_dma_buf_bind_handle(9F), 1330 * ddi_dma_addr_bind_handle(9F), or 1331 * ddi_dma_getwin(9F) indicates the number of DMA 1332 * cookies a DMA object consists of. If the 1333 * resulting cookie count, N, is larger than 1, 1334 * ddi_dma_nextcookie() must be called N-1 times 1335 * to retrieve all DMA cookies. 1336 */ 1337 if (j > 1) { 1338 ddi_dma_nextcookie(packet->tx_dma_handle, 1339 &dma_cookie); 1340 } 1341 } 1342 1343 return (desc_total); 1344 } 1345 1346 static void 1347 e1000g_fill_context_descriptor(context_data_t *cur_context, 1348 struct e1000_context_desc *context_desc) 1349 { 1350 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) { 1351 context_desc->lower_setup.ip_fields.ipcss = 1352 cur_context->ether_header_size; 1353 context_desc->lower_setup.ip_fields.ipcso = 1354 cur_context->ether_header_size + 1355 offsetof(struct ip, ip_sum); 1356 context_desc->lower_setup.ip_fields.ipcse = 1357 cur_context->ether_header_size + 1358 cur_context->cksum_start - 1; 1359 } else 1360 context_desc->lower_setup.ip_config = 0; 1361 1362 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) { 1363 /* 1364 * The packet with same protocol has the following 1365 * stuff and start offset: 1366 * | Protocol | Stuff | Start | Checksum 1367 * | | Offset | Offset | Enable 1368 * | IPv4 + TCP | 0x24 | 0x14 | Yes 1369 * | IPv4 + UDP | 0x1A | 0x14 | Yes 1370 * | IPv6 + TCP | 0x20 | 0x10 | No 1371 * | IPv6 + UDP | 0x14 | 0x10 | No 1372 */ 1373 context_desc->upper_setup.tcp_fields.tucss = 1374 cur_context->cksum_start + cur_context->ether_header_size; 1375 context_desc->upper_setup.tcp_fields.tucso = 1376 cur_context->cksum_stuff + cur_context->ether_header_size; 1377 context_desc->upper_setup.tcp_fields.tucse = 0; 1378 } else 1379 context_desc->upper_setup.tcp_config = 0; 1380 1381 if (cur_context->lso_flag) { 1382 context_desc->tcp_seg_setup.fields.mss = cur_context->mss; 1383 context_desc->tcp_seg_setup.fields.hdr_len = 1384 cur_context->hdr_len; 1385 /* 1386 * workaround for 82546EB errata 23, status-writeback 1387 * reporting (RS) should not be set on context or 1388 * Null descriptors 1389 */ 1390 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1391 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP 1392 | E1000_TXD_DTYP_C | cur_context->pay_len; 1393 } else { 1394 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1395 | E1000_TXD_DTYP_C; 1396 /* 1397 * Zero out the options for TCP Segmentation Offload 1398 */ 1399 context_desc->tcp_seg_setup.data = 0; 1400 } 1401 } 1402 1403 static int 1404 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring, 1405 p_tx_sw_packet_t packet, uint64_t address, size_t size) 1406 { 1407 struct e1000_hw *hw = &tx_ring->adapter->shared; 1408 p_sw_desc_t desc; 1409 1410 if (hw->mac.type == e1000_82544) { 1411 if (hw->bus.type == e1000_bus_type_pcix) 1412 return (e1000g_tx_workaround_PCIX_82544(packet, 1413 address, size)); 1414 1415 if (size > JUMBO_FRAG_LENGTH) 1416 return (e1000g_tx_workaround_jumbo_82544(packet, 1417 address, size)); 1418 } 1419 1420 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1421 1422 desc = &packet->desc[packet->num_desc]; 1423 desc->address = address; 1424 desc->length = size; 1425 1426 packet->num_desc++; 1427 1428 return (1); 1429 } 1430 1431 static int 1432 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet, 1433 uint64_t address, size_t size) 1434 { 1435 p_sw_desc_t desc; 1436 int desc_count; 1437 long size_left; 1438 size_t len; 1439 uint32_t counter; 1440 uint32_t array_elements; 1441 desc_array_t desc_array; 1442 1443 /* 1444 * Coexist Workaround for cordova: RP: 07/04/03 1445 * 1446 * RP: ERRATA: Workaround ISSUE: 1447 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup 1448 * Eachbuffer in to 8kb pieces until the 1449 * remainder is < 8kb 1450 */ 1451 size_left = size; 1452 desc_count = 0; 1453 1454 while (size_left > 0) { 1455 if (size_left > MAX_TX_BUF_SIZE) 1456 len = MAX_TX_BUF_SIZE; 1457 else 1458 len = size_left; 1459 1460 array_elements = e1000g_fill_82544_desc(address, 1461 len, &desc_array); 1462 1463 for (counter = 0; counter < array_elements; counter++) { 1464 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1465 /* 1466 * Put in the buffer address 1467 */ 1468 desc = &packet->desc[packet->num_desc]; 1469 1470 desc->address = 1471 desc_array.descriptor[counter].address; 1472 desc->length = 1473 desc_array.descriptor[counter].length; 1474 1475 packet->num_desc++; 1476 desc_count++; 1477 } /* for */ 1478 1479 /* 1480 * Update the buffer address and length 1481 */ 1482 address += MAX_TX_BUF_SIZE; 1483 size_left -= MAX_TX_BUF_SIZE; 1484 } /* while */ 1485 1486 return (desc_count); 1487 } 1488 1489 static int 1490 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet, 1491 uint64_t address, size_t size) 1492 { 1493 p_sw_desc_t desc; 1494 int desc_count; 1495 long size_left; 1496 uint32_t offset; 1497 1498 /* 1499 * Workaround for Jumbo Frames on Cordova 1500 * PSD 06/01/2001 1501 */ 1502 size_left = size; 1503 desc_count = 0; 1504 offset = 0; 1505 while (size_left > 0) { 1506 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1507 1508 desc = &packet->desc[packet->num_desc]; 1509 1510 desc->address = address + offset; 1511 1512 if (size_left > JUMBO_FRAG_LENGTH) 1513 desc->length = JUMBO_FRAG_LENGTH; 1514 else 1515 desc->length = size_left; 1516 1517 packet->num_desc++; 1518 desc_count++; 1519 1520 offset += desc->length; 1521 size_left -= JUMBO_FRAG_LENGTH; 1522 } 1523 1524 return (desc_count); 1525 } 1526 1527 #pragma inline(e1000g_82547_tx_move_tail_work) 1528 1529 static void 1530 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring) 1531 { 1532 struct e1000_hw *hw; 1533 uint16_t hw_tdt; 1534 uint16_t sw_tdt; 1535 struct e1000_tx_desc *tx_desc; 1536 uint16_t length = 0; 1537 boolean_t eop = B_FALSE; 1538 struct e1000g *Adapter; 1539 1540 Adapter = tx_ring->adapter; 1541 hw = &Adapter->shared; 1542 1543 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0)); 1544 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first; 1545 1546 while (hw_tdt != sw_tdt) { 1547 tx_desc = &(tx_ring->tbd_first[hw_tdt]); 1548 length += tx_desc->lower.flags.length; 1549 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; 1550 if (++hw_tdt == Adapter->tx_desc_num) 1551 hw_tdt = 0; 1552 1553 if (eop) { 1554 if ((Adapter->link_duplex == HALF_DUPLEX) && 1555 (e1000_fifo_workaround_82547(hw, length) 1556 != E1000_SUCCESS)) { 1557 if (tx_ring->timer_enable_82547) { 1558 ASSERT(tx_ring->timer_id_82547 == 0); 1559 tx_ring->timer_id_82547 = 1560 timeout(e1000g_82547_timeout, 1561 (void *)tx_ring, 1562 drv_usectohz(10000)); 1563 } 1564 return; 1565 1566 } else { 1567 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt); 1568 e1000_update_tx_fifo_head_82547(hw, length); 1569 length = 0; 1570 } 1571 } 1572 } 1573 } 1574 1575 static void 1576 e1000g_82547_timeout(void *arg) 1577 { 1578 e1000g_tx_ring_t *tx_ring; 1579 1580 tx_ring = (e1000g_tx_ring_t *)arg; 1581 1582 mutex_enter(&tx_ring->tx_lock); 1583 1584 tx_ring->timer_id_82547 = 0; 1585 e1000g_82547_tx_move_tail_work(tx_ring); 1586 1587 mutex_exit(&tx_ring->tx_lock); 1588 } 1589 1590 static void 1591 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring) 1592 { 1593 timeout_id_t tid; 1594 1595 ASSERT(MUTEX_HELD(&tx_ring->tx_lock)); 1596 1597 tid = tx_ring->timer_id_82547; 1598 tx_ring->timer_id_82547 = 0; 1599 if (tid != 0) { 1600 tx_ring->timer_enable_82547 = B_FALSE; 1601 mutex_exit(&tx_ring->tx_lock); 1602 1603 (void) untimeout(tid); 1604 1605 mutex_enter(&tx_ring->tx_lock); 1606 } 1607 tx_ring->timer_enable_82547 = B_TRUE; 1608 e1000g_82547_tx_move_tail_work(tx_ring); 1609 } 1610