1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2008 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms of the CDDLv1. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * ********************************************************************** 30 * * 31 * Module Name: * 32 * e1000g_tx.c * 33 * * 34 * Abstract: * 35 * This file contains some routines that take care of Transmit, * 36 * make the hardware to send the data pointed by the packet out * 37 * on to the physical medium. * 38 * * 39 * ********************************************************************** 40 */ 41 42 #include "e1000g_sw.h" 43 #include "e1000g_debug.h" 44 45 static boolean_t e1000g_send(struct e1000g *, mblk_t *); 46 static int e1000g_tx_copy(e1000g_tx_ring_t *, 47 p_tx_sw_packet_t, mblk_t *, boolean_t); 48 static int e1000g_tx_bind(e1000g_tx_ring_t *, 49 p_tx_sw_packet_t, mblk_t *); 50 static boolean_t e1000g_retreive_context(mblk_t *, context_data_t *, size_t); 51 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *); 52 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *, 53 context_data_t *); 54 static void e1000g_fill_context_descriptor(context_data_t *, 55 struct e1000_context_desc *); 56 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *, 57 p_tx_sw_packet_t, uint64_t, size_t); 58 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length, 59 p_desc_array_t desc_array); 60 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t); 61 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t); 62 static void e1000g_82547_timeout(void *); 63 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *); 64 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *); 65 66 #ifndef E1000G_DEBUG 67 #pragma inline(e1000g_tx_copy) 68 #pragma inline(e1000g_tx_bind) 69 #pragma inline(e1000g_retreive_context) 70 #pragma inline(e1000g_check_context) 71 #pragma inline(e1000g_fill_tx_ring) 72 #pragma inline(e1000g_fill_context_descriptor) 73 #pragma inline(e1000g_fill_tx_desc) 74 #pragma inline(e1000g_fill_82544_desc) 75 #pragma inline(e1000g_tx_workaround_PCIX_82544) 76 #pragma inline(e1000g_tx_workaround_jumbo_82544) 77 #pragma inline(e1000g_free_tx_swpkt) 78 #endif 79 80 /* 81 * e1000g_free_tx_swpkt - free up the tx sw packet 82 * 83 * Unbind the previously bound DMA handle for a given 84 * transmit sw packet. And reset the sw packet data. 85 */ 86 void 87 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet) 88 { 89 switch (packet->data_transfer_type) { 90 case USE_BCOPY: 91 packet->tx_buf->len = 0; 92 break; 93 #ifdef __sparc 94 case USE_DVMA: 95 dvma_unload(packet->tx_dma_handle, 0, -1); 96 break; 97 #endif 98 case USE_DMA: 99 ddi_dma_unbind_handle(packet->tx_dma_handle); 100 break; 101 default: 102 break; 103 } 104 105 /* 106 * The mblk has been stripped off the sw packet 107 * and will be freed in a triggered soft intr. 108 */ 109 ASSERT(packet->mp == NULL); 110 111 packet->data_transfer_type = USE_NONE; 112 packet->num_mblk_frag = 0; 113 packet->num_desc = 0; 114 } 115 116 mblk_t * 117 e1000g_m_tx(void *arg, mblk_t *mp) 118 { 119 struct e1000g *Adapter = (struct e1000g *)arg; 120 mblk_t *next; 121 122 rw_enter(&Adapter->chip_lock, RW_READER); 123 124 if ((Adapter->chip_state != E1000G_START) || 125 (Adapter->link_state != LINK_STATE_UP)) { 126 freemsgchain(mp); 127 mp = NULL; 128 } 129 130 while (mp != NULL) { 131 next = mp->b_next; 132 mp->b_next = NULL; 133 134 if (!e1000g_send(Adapter, mp)) { 135 mp->b_next = next; 136 break; 137 } 138 139 mp = next; 140 } 141 142 rw_exit(&Adapter->chip_lock); 143 return (mp); 144 } 145 146 /* 147 * e1000g_send - send packets onto the wire 148 * 149 * Called from e1000g_m_tx with an mblk ready to send. this 150 * routine sets up the transmit descriptors and sends data to 151 * the wire. It also pushes the just transmitted packet to 152 * the used tx sw packet list. 153 */ 154 static boolean_t 155 e1000g_send(struct e1000g *Adapter, mblk_t *mp) 156 { 157 struct e1000_hw *hw; 158 p_tx_sw_packet_t packet; 159 LIST_DESCRIBER pending_list; 160 size_t len; 161 size_t msg_size; 162 uint32_t frag_count; 163 int desc_count; 164 uint32_t desc_total; 165 boolean_t tx_undersize_flag; 166 mblk_t *nmp; 167 mblk_t *tmp; 168 e1000g_tx_ring_t *tx_ring; 169 context_data_t cur_context; 170 171 hw = &Adapter->shared; 172 tx_ring = Adapter->tx_ring; 173 174 /* Get the total size and frags number of the message */ 175 tx_undersize_flag = B_FALSE; 176 frag_count = 0; 177 msg_size = 0; 178 for (nmp = mp; nmp; nmp = nmp->b_cont) { 179 frag_count++; 180 msg_size += MBLKL(nmp); 181 } 182 183 /* retreive and compute information for context descriptor */ 184 if (!e1000g_retreive_context(mp, &cur_context, msg_size)) { 185 freemsg(mp); 186 return (B_TRUE); 187 } 188 189 /* 190 * Make sure the packet is less than the allowed size 191 */ 192 if (!cur_context.lso_flag && 193 (msg_size > Adapter->max_frame_size - ETHERFCSL)) { 194 /* 195 * For the over size packet, we'll just drop it. 196 * So we return B_TRUE here. 197 */ 198 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL, 199 "Tx packet out of bound. length = %d \n", msg_size); 200 E1000G_STAT(tx_ring->stat_over_size); 201 freemsg(mp); 202 return (B_TRUE); 203 } 204 205 /* 206 * Check and reclaim tx descriptors. 207 * This low water mark check should be done all the time as 208 * Transmit interrupt delay can produce Transmit interrupts little 209 * late and that may cause few problems related to reaping Tx 210 * Descriptors... As you may run short of them before getting any 211 * transmit interrupt... 212 */ 213 if (tx_ring->resched_needed || 214 (tx_ring->tbd_avail < Adapter->tx_recycle_thresh)) { 215 (void) e1000g_recycle(tx_ring); 216 E1000G_DEBUG_STAT(tx_ring->stat_recycle); 217 218 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 219 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc); 220 goto tx_no_resource; 221 } 222 } 223 224 /* 225 * If the message size is less than the minimum ethernet packet size, 226 * we'll use bcopy to send it, and padd it to 60 bytes later. 227 */ 228 if (msg_size < ETHERMIN) { 229 E1000G_DEBUG_STAT(tx_ring->stat_under_size); 230 tx_undersize_flag = B_TRUE; 231 } 232 233 /* Initialize variables */ 234 desc_count = 1; /* The initial value should be greater than 0 */ 235 desc_total = 0; 236 QUEUE_INIT_LIST(&pending_list); 237 238 /* Process each mblk fragment and fill tx descriptors */ 239 packet = NULL; 240 nmp = mp; 241 while (nmp) { 242 tmp = nmp->b_cont; 243 244 len = MBLKL(nmp); 245 /* Check zero length mblks */ 246 if (len == 0) { 247 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags); 248 /* 249 * If there're no packet buffers have been used, 250 * or we just completed processing a buffer, then 251 * skip the empty mblk fragment. 252 * Otherwise, there's still a pending buffer that 253 * needs to be processed (tx_copy). 254 */ 255 if (desc_count > 0) { 256 nmp = tmp; 257 continue; 258 } 259 } 260 261 /* 262 * Get a new TxSwPacket to process mblk buffers. 263 */ 264 if (desc_count > 0) { 265 mutex_enter(&tx_ring->freelist_lock); 266 packet = (p_tx_sw_packet_t) 267 QUEUE_POP_HEAD(&tx_ring->free_list); 268 mutex_exit(&tx_ring->freelist_lock); 269 270 if (packet == NULL) { 271 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 272 "No Tx SwPacket available\n"); 273 E1000G_STAT(tx_ring->stat_no_swpkt); 274 goto tx_send_failed; 275 } 276 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 277 } 278 279 ASSERT(packet); 280 /* 281 * If the size of the fragment is less than the tx_bcopy_thresh 282 * we'll use bcopy; Otherwise, we'll use DMA binding. 283 */ 284 if ((len <= Adapter->tx_bcopy_thresh) || tx_undersize_flag) { 285 desc_count = 286 e1000g_tx_copy(tx_ring, packet, nmp, 287 tx_undersize_flag); 288 E1000G_DEBUG_STAT(tx_ring->stat_copy); 289 } else { 290 desc_count = 291 e1000g_tx_bind(tx_ring, packet, nmp); 292 E1000G_DEBUG_STAT(tx_ring->stat_bind); 293 } 294 295 if (desc_count > 0) 296 desc_total += desc_count; 297 else if (desc_count < 0) 298 goto tx_send_failed; 299 300 nmp = tmp; 301 } 302 303 /* Assign the message to the last sw packet */ 304 ASSERT(packet); 305 ASSERT(packet->mp == NULL); 306 packet->mp = mp; 307 308 /* Try to recycle the tx descriptors again */ 309 if (tx_ring->tbd_avail < (desc_total + 2)) { 310 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry); 311 (void) e1000g_recycle(tx_ring); 312 } 313 314 mutex_enter(&tx_ring->tx_lock); 315 316 /* 317 * If the number of available tx descriptors is not enough for transmit 318 * (one redundant descriptor and one hw checksum context descriptor are 319 * included), then return failure. 320 */ 321 if (tx_ring->tbd_avail < (desc_total + 2)) { 322 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 323 "No Enough Tx descriptors\n"); 324 E1000G_STAT(tx_ring->stat_no_desc); 325 mutex_exit(&tx_ring->tx_lock); 326 goto tx_send_failed; 327 } 328 329 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context); 330 331 mutex_exit(&tx_ring->tx_lock); 332 333 ASSERT(desc_count > 0); 334 335 /* Send successful */ 336 return (B_TRUE); 337 338 tx_send_failed: 339 /* 340 * Enable Transmit interrupts, so that the interrupt routine can 341 * call mac_tx_update() when transmit descriptors become available. 342 */ 343 tx_ring->resched_needed = B_TRUE; 344 if (!Adapter->tx_intr_enable) 345 e1000g_mask_tx_interrupt(Adapter); 346 347 /* Free pending TxSwPackets */ 348 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 349 while (packet) { 350 packet->mp = NULL; 351 e1000g_free_tx_swpkt(packet); 352 packet = (p_tx_sw_packet_t) 353 QUEUE_GET_NEXT(&pending_list, &packet->Link); 354 } 355 356 /* Return pending TxSwPackets to the "Free" list */ 357 mutex_enter(&tx_ring->freelist_lock); 358 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 359 mutex_exit(&tx_ring->freelist_lock); 360 361 E1000G_STAT(tx_ring->stat_send_fail); 362 363 /* Message will be scheduled for re-transmit */ 364 return (B_FALSE); 365 366 tx_no_resource: 367 /* 368 * Enable Transmit interrupts, so that the interrupt routine can 369 * call mac_tx_update() when transmit descriptors become available. 370 */ 371 tx_ring->resched_needed = B_TRUE; 372 if (!Adapter->tx_intr_enable) 373 e1000g_mask_tx_interrupt(Adapter); 374 375 /* Message will be scheduled for re-transmit */ 376 return (B_FALSE); 377 } 378 379 static boolean_t 380 e1000g_retreive_context(mblk_t *mp, context_data_t *cur_context, 381 size_t msg_size) 382 { 383 uintptr_t ip_start; 384 uintptr_t tcp_start; 385 mblk_t *nmp; 386 387 bzero(cur_context, sizeof (context_data_t)); 388 389 /* retrieve checksum info */ 390 hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, 391 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); 392 /* retreive ethernet header size */ 393 if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid == 394 htons(ETHERTYPE_VLAN)) 395 cur_context->ether_header_size = 396 sizeof (struct ether_vlan_header); 397 else 398 cur_context->ether_header_size = 399 sizeof (struct ether_header); 400 401 if (cur_context->cksum_flags & HW_LSO) { 402 if ((cur_context->mss = DB_LSOMSS(mp)) != 0) { 403 /* free the invaid packet */ 404 if (!((cur_context->cksum_flags & HCK_PARTIALCKSUM) && 405 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) { 406 return (B_FALSE); 407 } 408 cur_context->lso_flag = B_TRUE; 409 /* 410 * Some fields are cleared for the hardware to fill 411 * in. We don't assume Ethernet header, IP header and 412 * TCP header are always in the same mblk fragment, 413 * while we assume each header is always within one 414 * mblk fragment and Ethernet header is always in the 415 * first mblk fragment. 416 */ 417 nmp = mp; 418 ip_start = (uintptr_t)(nmp->b_rptr) 419 + cur_context->ether_header_size; 420 if (ip_start >= (uintptr_t)(nmp->b_wptr)) { 421 ip_start = (uintptr_t)nmp->b_cont->b_rptr 422 + (ip_start - (uintptr_t)(nmp->b_wptr)); 423 nmp = nmp->b_cont; 424 } 425 tcp_start = ip_start + 426 IPH_HDR_LENGTH((ipha_t *)ip_start); 427 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) { 428 tcp_start = (uintptr_t)nmp->b_cont->b_rptr 429 + (tcp_start - (uintptr_t)(nmp->b_wptr)); 430 nmp = nmp->b_cont; 431 } 432 cur_context->hdr_len = cur_context->ether_header_size 433 + IPH_HDR_LENGTH((ipha_t *)ip_start) 434 + TCP_HDR_LENGTH((tcph_t *)tcp_start); 435 ((ipha_t *)ip_start)->ipha_length = 0; 436 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0; 437 /* calculate the TCP packet payload length */ 438 cur_context->pay_len = msg_size - cur_context->hdr_len; 439 } 440 } 441 return (B_TRUE); 442 } 443 444 static boolean_t 445 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context) 446 { 447 boolean_t context_reload; 448 context_data_t *pre_context; 449 struct e1000g *Adapter; 450 451 context_reload = B_FALSE; 452 pre_context = &tx_ring->pre_context; 453 Adapter = tx_ring->adapter; 454 455 /* 456 * The following code determine if the context descriptor is 457 * needed to be reloaded. The sequence of the conditions is 458 * made by their possibilities of changing. 459 */ 460 /* 461 * workaround for 82546EB, context descriptor must be reloaded 462 * per LSO/hw_cksum packet if LSO is enabled. 463 */ 464 if (Adapter->lso_premature_issue && 465 Adapter->lso_enable && 466 (cur_context->cksum_flags != 0)) { 467 468 context_reload = B_TRUE; 469 } else if (cur_context->lso_flag) { 470 if ((cur_context->cksum_flags != pre_context->cksum_flags) || 471 (cur_context->pay_len != pre_context->pay_len) || 472 (cur_context->mss != pre_context->mss) || 473 (cur_context->hdr_len != pre_context->hdr_len) || 474 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 475 (cur_context->cksum_start != pre_context->cksum_start) || 476 (cur_context->ether_header_size != 477 pre_context->ether_header_size)) { 478 479 context_reload = B_TRUE; 480 } 481 } else if (cur_context->cksum_flags != 0) { 482 if ((cur_context->cksum_flags != pre_context->cksum_flags) || 483 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 484 (cur_context->cksum_start != pre_context->cksum_start) || 485 (cur_context->ether_header_size != 486 pre_context->ether_header_size)) { 487 488 context_reload = B_TRUE; 489 } 490 } 491 492 return (context_reload); 493 } 494 495 static int 496 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list, 497 context_data_t *cur_context) 498 { 499 struct e1000g *Adapter; 500 struct e1000_hw *hw; 501 p_tx_sw_packet_t first_packet; 502 p_tx_sw_packet_t packet; 503 p_tx_sw_packet_t previous_packet; 504 boolean_t context_reload; 505 struct e1000_tx_desc *first_data_desc; 506 struct e1000_tx_desc *next_desc; 507 struct e1000_tx_desc *descriptor; 508 int desc_count; 509 boolean_t buff_overrun_flag; 510 int i; 511 512 Adapter = tx_ring->adapter; 513 hw = &Adapter->shared; 514 515 desc_count = 0; 516 first_packet = NULL; 517 first_data_desc = NULL; 518 descriptor = NULL; 519 first_packet = NULL; 520 packet = NULL; 521 buff_overrun_flag = B_FALSE; 522 523 next_desc = tx_ring->tbd_next; 524 525 /* Context descriptor reload check */ 526 context_reload = e1000g_check_context(tx_ring, cur_context); 527 528 if (context_reload) { 529 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 530 531 descriptor = next_desc; 532 533 e1000g_fill_context_descriptor(cur_context, 534 (struct e1000_context_desc *)descriptor); 535 536 /* Check the wrap-around case */ 537 if (descriptor == tx_ring->tbd_last) 538 next_desc = tx_ring->tbd_first; 539 else 540 next_desc++; 541 542 desc_count++; 543 } 544 545 first_data_desc = next_desc; 546 547 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 548 while (packet) { 549 ASSERT(packet->num_desc); 550 551 for (i = 0; i < packet->num_desc; i++) { 552 ASSERT(tx_ring->tbd_avail > 0); 553 554 descriptor = next_desc; 555 descriptor->buffer_addr = 556 packet->desc[i].address; 557 descriptor->lower.data = 558 packet->desc[i].length; 559 560 /* Zero out status */ 561 descriptor->upper.data = 0; 562 563 descriptor->lower.data |= 564 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 565 /* must set RS on every outgoing descriptor */ 566 descriptor->lower.data |= 567 E1000_TXD_CMD_RS; 568 569 if (cur_context->lso_flag) 570 descriptor->lower.data |= E1000_TXD_CMD_TSE; 571 572 /* Check the wrap-around case */ 573 if (descriptor == tx_ring->tbd_last) 574 next_desc = tx_ring->tbd_first; 575 else 576 next_desc++; 577 578 desc_count++; 579 580 /* 581 * workaround for 82546EB errata 33, hang in PCI-X 582 * systems due to 2k Buffer Overrun during Transmit 583 * Operation. The workaround applies to all the Intel 584 * PCI-X chips. 585 */ 586 if (hw->bus.type == e1000_bus_type_pcix && 587 descriptor == first_data_desc && 588 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) 589 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) { 590 /* modified the first descriptor */ 591 descriptor->lower.data &= 592 ~E1000G_TBD_LENGTH_MASK; 593 descriptor->lower.flags.length = 594 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 595 596 /* insert a new descriptor */ 597 ASSERT(tx_ring->tbd_avail > 0); 598 next_desc->buffer_addr = 599 packet->desc[0].address + 600 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 601 next_desc->lower.data = 602 packet->desc[0].length - 603 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 604 605 /* Zero out status */ 606 next_desc->upper.data = 0; 607 608 next_desc->lower.data |= 609 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 610 /* must set RS on every outgoing descriptor */ 611 next_desc->lower.data |= 612 E1000_TXD_CMD_RS; 613 614 if (cur_context->lso_flag) 615 next_desc->lower.data |= 616 E1000_TXD_CMD_TSE; 617 618 descriptor = next_desc; 619 620 /* Check the wrap-around case */ 621 if (next_desc == tx_ring->tbd_last) 622 next_desc = tx_ring->tbd_first; 623 else 624 next_desc++; 625 626 desc_count++; 627 buff_overrun_flag = B_TRUE; 628 } 629 } 630 631 if (buff_overrun_flag) { 632 packet->num_desc++; 633 buff_overrun_flag = B_FALSE; 634 } 635 636 if (first_packet != NULL) { 637 /* 638 * Count the checksum context descriptor for 639 * the first SwPacket. 640 */ 641 first_packet->num_desc++; 642 first_packet = NULL; 643 } 644 645 previous_packet = packet; 646 packet = (p_tx_sw_packet_t) 647 QUEUE_GET_NEXT(pending_list, &packet->Link); 648 } 649 650 /* 651 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back 652 */ 653 if (Adapter->lso_premature_issue && cur_context->lso_flag && 654 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) { 655 /* modified the previous descriptor */ 656 descriptor->lower.data -= 4; 657 658 /* insert a new descriptor */ 659 ASSERT(tx_ring->tbd_avail > 0); 660 /* the lower 20 bits of lower.data is the length field */ 661 next_desc->buffer_addr = 662 descriptor->buffer_addr + 663 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK); 664 next_desc->lower.data = 4; 665 666 /* Zero out status */ 667 next_desc->upper.data = 0; 668 /* It must be part of a LSO packet */ 669 next_desc->lower.data |= 670 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | 671 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE; 672 673 descriptor = next_desc; 674 675 /* Check the wrap-around case */ 676 if (descriptor == tx_ring->tbd_last) 677 next_desc = tx_ring->tbd_first; 678 else 679 next_desc++; 680 681 desc_count++; 682 /* update the number of descriptors */ 683 previous_packet->num_desc++; 684 } 685 686 ASSERT(descriptor); 687 688 if (cur_context->cksum_flags) { 689 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 690 ((struct e1000_data_desc *)first_data_desc)-> 691 upper.fields.popts |= E1000_TXD_POPTS_IXSM; 692 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 693 ((struct e1000_data_desc *)first_data_desc)-> 694 upper.fields.popts |= E1000_TXD_POPTS_TXSM; 695 } 696 697 /* 698 * Last Descriptor of Packet needs End Of Packet (EOP), Report 699 * Status (RS) set. 700 */ 701 if (Adapter->tx_intr_delay) { 702 descriptor->lower.data |= E1000_TXD_CMD_IDE | 703 E1000_TXD_CMD_EOP; 704 } else { 705 descriptor->lower.data |= E1000_TXD_CMD_EOP; 706 } 707 708 /* Set append Ethernet CRC (IFCS) bits */ 709 if (cur_context->lso_flag) { 710 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS; 711 } else { 712 descriptor->lower.data |= E1000_TXD_CMD_IFCS; 713 } 714 715 /* 716 * Sync the Tx descriptors DMA buffer 717 */ 718 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 719 0, 0, DDI_DMA_SYNC_FORDEV); 720 721 tx_ring->tbd_next = next_desc; 722 723 /* 724 * Advance the Transmit Descriptor Tail (Tdt), this tells the 725 * FX1000 that this frame is available to transmit. 726 */ 727 if (hw->mac.type == e1000_82547) 728 e1000g_82547_tx_move_tail(tx_ring); 729 else 730 E1000_WRITE_REG(hw, E1000_TDT(0), 731 (uint32_t)(next_desc - tx_ring->tbd_first)); 732 733 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 734 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 735 Adapter->chip_state = E1000G_ERROR; 736 } 737 738 /* Put the pending SwPackets to the "Used" list */ 739 mutex_enter(&tx_ring->usedlist_lock); 740 QUEUE_APPEND(&tx_ring->used_list, pending_list); 741 tx_ring->tbd_avail -= desc_count; 742 mutex_exit(&tx_ring->usedlist_lock); 743 744 /* update LSO related data */ 745 if (context_reload) 746 tx_ring->pre_context = *cur_context; 747 748 return (desc_count); 749 } 750 751 752 /* 753 * e1000g_tx_setup - setup tx data structures 754 * 755 * This routine initializes all of the transmit related 756 * structures. This includes the Transmit descriptors, 757 * and the tx_sw_packet structures. 758 */ 759 void 760 e1000g_tx_setup(struct e1000g *Adapter) 761 { 762 struct e1000_hw *hw; 763 p_tx_sw_packet_t packet; 764 UINT i; 765 uint32_t buf_high; 766 uint32_t buf_low; 767 uint32_t reg_tipg; 768 uint32_t reg_tctl; 769 uint32_t reg_tarc; 770 uint16_t speed, duplex; 771 int size; 772 e1000g_tx_ring_t *tx_ring; 773 774 hw = &Adapter->shared; 775 tx_ring = Adapter->tx_ring; 776 777 /* init the lists */ 778 /* 779 * Here we don't need to protect the lists using the 780 * usedlist_lock and freelist_lock, for they have 781 * been protected by the chip_lock. 782 */ 783 QUEUE_INIT_LIST(&tx_ring->used_list); 784 QUEUE_INIT_LIST(&tx_ring->free_list); 785 786 /* Go through and set up each SW_Packet */ 787 packet = tx_ring->packet_area; 788 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) { 789 /* Initialize this tx_sw_apcket area */ 790 e1000g_free_tx_swpkt(packet); 791 /* Add this tx_sw_packet to the free list */ 792 QUEUE_PUSH_TAIL(&tx_ring->free_list, 793 &packet->Link); 794 } 795 796 /* Setup TX descriptor pointers */ 797 tx_ring->tbd_next = tx_ring->tbd_first; 798 tx_ring->tbd_oldest = tx_ring->tbd_first; 799 800 /* 801 * Setup Hardware TX Registers 802 */ 803 /* Setup the Transmit Control Register (TCTL). */ 804 reg_tctl = E1000_READ_REG(hw, E1000_TCTL); 805 reg_tctl |= E1000_TCTL_PSP | E1000_TCTL_EN | 806 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) | 807 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) | 808 E1000_TCTL_RTLC; 809 810 /* Enable the MULR bit */ 811 if (hw->bus.type == e1000_bus_type_pci_express) 812 reg_tctl |= E1000_TCTL_MULR; 813 814 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl); 815 816 /* Setup HW Base and Length of Tx descriptor area */ 817 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc)); 818 E1000_WRITE_REG(hw, E1000_TDLEN(0), size); 819 size = E1000_READ_REG(hw, E1000_TDLEN(0)); 820 821 buf_low = (uint32_t)tx_ring->tbd_dma_addr; 822 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32); 823 824 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low); 825 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high); 826 827 /* Setup our HW Tx Head & Tail descriptor pointers */ 828 E1000_WRITE_REG(hw, E1000_TDH(0), 0); 829 E1000_WRITE_REG(hw, E1000_TDT(0), 0); 830 831 /* Set the default values for the Tx Inter Packet Gap timer */ 832 if ((hw->mac.type == e1000_82542) && 833 ((hw->revision_id == E1000_REVISION_2) || 834 (hw->revision_id == E1000_REVISION_3))) { 835 reg_tipg = DEFAULT_82542_TIPG_IPGT; 836 reg_tipg |= 837 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 838 reg_tipg |= 839 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 840 } else if (hw->mac.type == e1000_80003es2lan) { 841 reg_tipg = DEFAULT_82543_TIPG_IPGR1; 842 reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 843 E1000_TIPG_IPGR2_SHIFT; 844 } else { 845 if (hw->phy.media_type == e1000_media_type_fiber) 846 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 847 else 848 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 849 reg_tipg |= 850 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 851 reg_tipg |= 852 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 853 } 854 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg); 855 856 /* Setup Transmit Interrupt Delay Value */ 857 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay); 858 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 859 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay); 860 861 if (hw->mac.type >= e1000_82540) { 862 E1000_WRITE_REG(&Adapter->shared, E1000_TADV, 863 Adapter->tx_intr_abs_delay); 864 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 865 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay); 866 } 867 868 tx_ring->tbd_avail = Adapter->tx_desc_num; 869 870 /* Initialize stored context information */ 871 bzero(&(tx_ring->pre_context), sizeof (context_data_t)); 872 } 873 874 /* 875 * e1000g_recycle - recycle the tx descriptors and tx sw packets 876 */ 877 int 878 e1000g_recycle(e1000g_tx_ring_t *tx_ring) 879 { 880 struct e1000g *Adapter; 881 LIST_DESCRIBER pending_list; 882 p_tx_sw_packet_t packet; 883 mblk_t *mp; 884 mblk_t *nmp; 885 struct e1000_tx_desc *descriptor; 886 int desc_count; 887 int is_intr; 888 889 /* 890 * This function will examine each TxSwPacket in the 'used' queue 891 * if the e1000g is done with it then the associated resources (Tx 892 * Descriptors) will be "freed" and the TxSwPacket will be 893 * returned to the 'free' queue. 894 */ 895 Adapter = tx_ring->adapter; 896 897 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list); 898 if (packet == NULL) { 899 tx_ring->recycle_fail = 0; 900 tx_ring->stall_watchdog = 0; 901 return (0); 902 } 903 904 is_intr = servicing_interrupt(); 905 906 if (is_intr) 907 mutex_enter(&tx_ring->usedlist_lock); 908 else if (mutex_tryenter(&tx_ring->usedlist_lock) == 0) 909 return (0); 910 911 desc_count = 0; 912 QUEUE_INIT_LIST(&pending_list); 913 914 /* Sync the Tx descriptor DMA buffer */ 915 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 916 0, 0, DDI_DMA_SYNC_FORKERNEL); 917 if (e1000g_check_dma_handle( 918 tx_ring->tbd_dma_handle) != DDI_FM_OK) { 919 mutex_exit(&tx_ring->usedlist_lock); 920 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 921 Adapter->chip_state = E1000G_ERROR; 922 return (0); 923 } 924 925 /* 926 * While there are still TxSwPackets in the used queue check them 927 */ 928 while (packet = 929 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) { 930 931 /* 932 * Get hold of the next descriptor that the e1000g will 933 * report status back to (this will be the last descriptor 934 * of a given sw packet). We only want to free the 935 * sw packet (and it resources) if the e1000g is done 936 * with ALL of the descriptors. If the e1000g is done 937 * with the last one then it is done with all of them. 938 */ 939 ASSERT(packet->num_desc); 940 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1); 941 942 /* Check for wrap case */ 943 if (descriptor > tx_ring->tbd_last) 944 descriptor -= Adapter->tx_desc_num; 945 946 /* 947 * If the descriptor done bit is set free TxSwPacket and 948 * associated resources 949 */ 950 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) { 951 QUEUE_POP_HEAD(&tx_ring->used_list); 952 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 953 954 if (descriptor == tx_ring->tbd_last) 955 tx_ring->tbd_oldest = 956 tx_ring->tbd_first; 957 else 958 tx_ring->tbd_oldest = 959 descriptor + 1; 960 961 desc_count += packet->num_desc; 962 963 if (is_intr && (desc_count >= Adapter->tx_recycle_num)) 964 break; 965 } else { 966 /* 967 * Found a sw packet that the e1000g is not done 968 * with then there is no reason to check the rest 969 * of the queue. 970 */ 971 break; 972 } 973 } 974 975 tx_ring->tbd_avail += desc_count; 976 Adapter->tx_pkt_cnt += desc_count; 977 978 mutex_exit(&tx_ring->usedlist_lock); 979 980 if (desc_count == 0) { 981 tx_ring->recycle_fail++; 982 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none); 983 return (0); 984 } 985 986 tx_ring->recycle_fail = 0; 987 tx_ring->stall_watchdog = 0; 988 989 mp = NULL; 990 nmp = NULL; 991 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 992 ASSERT(packet != NULL); 993 while (packet != NULL) { 994 if (packet->mp != NULL) { 995 ASSERT(packet->mp->b_next == NULL); 996 /* Assemble the message chain */ 997 if (mp == NULL) { 998 mp = packet->mp; 999 nmp = packet->mp; 1000 } else { 1001 nmp->b_next = packet->mp; 1002 nmp = packet->mp; 1003 } 1004 /* Disconnect the message from the sw packet */ 1005 packet->mp = NULL; 1006 } 1007 1008 /* Free the TxSwPackets */ 1009 e1000g_free_tx_swpkt(packet); 1010 1011 packet = (p_tx_sw_packet_t) 1012 QUEUE_GET_NEXT(&pending_list, &packet->Link); 1013 } 1014 1015 /* Return the TxSwPackets back to the FreeList */ 1016 mutex_enter(&tx_ring->freelist_lock); 1017 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 1018 mutex_exit(&tx_ring->freelist_lock); 1019 1020 if (mp != NULL) 1021 freemsgchain(mp); 1022 1023 return (desc_count); 1024 } 1025 /* 1026 * 82544 Coexistence issue workaround: 1027 * There are 2 issues. 1028 * 1. If a 32 bit split completion happens from P64H2 and another 1029 * agent drives a 64 bit request/split completion after ONLY 1030 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then 1031 * 82544 has a problem where in to clock all the data in, it 1032 * looks at REQ64# signal and since it has changed so fast (i.e. 1 1033 * idle clock turn around), it will fail to clock all the data in. 1034 * Data coming from certain ending addresses has exposure to this issue. 1035 * 1036 * To detect this issue, following equation can be used... 1037 * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. 1038 * If SUM[3:0] is in between 1 to 4, we will have this issue. 1039 * 1040 * ROOT CAUSE: 1041 * The erratum involves the 82544 PCIX elasticity FIFO implementations as 1042 * 64-bit FIFO's and flushing of the final partial-bytes corresponding 1043 * to the end of a requested read burst. Under a specific burst condition 1044 * of ending-data alignment and 32-byte split-completions, the final 1045 * byte(s) of split-completion data require an extra clock cycle to flush 1046 * into 64-bit FIFO orientation. An incorrect logic dependency on the 1047 * REQ64# signal occurring during during this clock cycle may cause the 1048 * residual byte(s) to be lost, thereby rendering the internal DMA client 1049 * forever awaiting the final byte(s) for an outbound data-fetch. The 1050 * erratum is confirmed to *only* occur if certain subsequent external 1051 * 64-bit PCIX bus transactions occur immediately (minimum possible bus 1052 * turn- around) following the odd-aligned 32-bit split-completion 1053 * containing the final byte(s). Intel has confirmed that this has been 1054 * seen only with chipset/bridges which have the capability to provide 1055 * 32-bit split-completion data, and in the presence of newer PCIX bus 1056 * agents which fully-optimize the inter-transaction turn-around (zero 1057 * additional initiator latency when pre-granted bus ownership). 1058 * 1059 * This issue does not exist in PCI bus mode, when any agent is operating 1060 * in 32 bit only mode or on chipsets that do not do 32 bit split 1061 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does 1062 * 32 bit split completions for any read request that has bit 2 set to 1 1063 * for the requested address and read request size is more than 8 bytes. 1064 * 1065 * 2. Another issue is related to 82544 driving DACs under the similar 1066 * scenario (32 bit split completion followed by 64 bit transaction with 1067 * only 1 cycle turnaround). This issue is still being root caused. We 1068 * think that both of these issues can be avoided if following workaround 1069 * is implemented. It seems DAC issues is related to ending addresses being 1070 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity 1071 * FIFO which does not get flushed due to REQ64# dependency. We will only 1072 * know the full story after it has been simulated successfully by HW team. 1073 * 1074 * WORKAROUND: 1075 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC) 1076 */ 1077 static uint32_t 1078 e1000g_fill_82544_desc(uint64_t address, 1079 size_t length, p_desc_array_t desc_array) 1080 { 1081 /* 1082 * Since issue is sensitive to length and address. 1083 * Let us first check the address... 1084 */ 1085 uint32_t safe_terminator; 1086 1087 if (length <= 4) { 1088 desc_array->descriptor[0].address = address; 1089 desc_array->descriptor[0].length = length; 1090 desc_array->elements = 1; 1091 return (desc_array->elements); 1092 } 1093 safe_terminator = 1094 (uint32_t)((((uint32_t)address & 0x7) + 1095 (length & 0xF)) & 0xF); 1096 /* 1097 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then 1098 * return 1099 */ 1100 if (safe_terminator == 0 || 1101 (safe_terminator > 4 && safe_terminator < 9) || 1102 (safe_terminator > 0xC && safe_terminator <= 0xF)) { 1103 desc_array->descriptor[0].address = address; 1104 desc_array->descriptor[0].length = length; 1105 desc_array->elements = 1; 1106 return (desc_array->elements); 1107 } 1108 1109 desc_array->descriptor[0].address = address; 1110 desc_array->descriptor[0].length = length - 4; 1111 desc_array->descriptor[1].address = address + (length - 4); 1112 desc_array->descriptor[1].length = 4; 1113 desc_array->elements = 2; 1114 return (desc_array->elements); 1115 } 1116 1117 static int 1118 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, 1119 mblk_t *mp, boolean_t tx_undersize_flag) 1120 { 1121 size_t len; 1122 size_t len1; 1123 dma_buffer_t *tx_buf; 1124 mblk_t *nmp; 1125 boolean_t finished; 1126 int desc_count; 1127 1128 desc_count = 0; 1129 tx_buf = packet->tx_buf; 1130 len = MBLKL(mp); 1131 1132 ASSERT((tx_buf->len + len) <= tx_buf->size); 1133 1134 if (len > 0) { 1135 bcopy(mp->b_rptr, 1136 tx_buf->address + tx_buf->len, 1137 len); 1138 tx_buf->len += len; 1139 1140 packet->num_mblk_frag++; 1141 } 1142 1143 nmp = mp->b_cont; 1144 if (nmp == NULL) { 1145 finished = B_TRUE; 1146 } else { 1147 len1 = MBLKL(nmp); 1148 if ((tx_buf->len + len1) > tx_buf->size) 1149 finished = B_TRUE; 1150 else if (tx_undersize_flag) 1151 finished = B_FALSE; 1152 else if (len1 > tx_ring->adapter->tx_bcopy_thresh) 1153 finished = B_TRUE; 1154 else 1155 finished = B_FALSE; 1156 } 1157 1158 if (finished) { 1159 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy, 1160 (tx_buf->len > len)); 1161 1162 /* 1163 * If the packet is smaller than 64 bytes, which is the 1164 * minimum ethernet packet size, pad the packet to make 1165 * it at least 60 bytes. The hardware will add 4 bytes 1166 * for CRC. 1167 */ 1168 if (tx_undersize_flag) { 1169 ASSERT(tx_buf->len < ETHERMIN); 1170 1171 bzero(tx_buf->address + tx_buf->len, 1172 ETHERMIN - tx_buf->len); 1173 tx_buf->len = ETHERMIN; 1174 } 1175 1176 #ifdef __sparc 1177 if (packet->dma_type == USE_DVMA) 1178 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV); 1179 else 1180 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1181 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1182 #else 1183 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1184 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1185 #endif 1186 1187 packet->data_transfer_type = USE_BCOPY; 1188 1189 desc_count = e1000g_fill_tx_desc(tx_ring, 1190 packet, 1191 tx_buf->dma_address, 1192 tx_buf->len); 1193 1194 if (desc_count <= 0) 1195 return (-1); 1196 } 1197 1198 return (desc_count); 1199 } 1200 1201 static int 1202 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp) 1203 { 1204 int j; 1205 int mystat; 1206 size_t len; 1207 ddi_dma_cookie_t dma_cookie; 1208 uint_t ncookies; 1209 int desc_count; 1210 uint32_t desc_total; 1211 1212 desc_total = 0; 1213 len = MBLKL(mp); 1214 1215 /* 1216 * ddi_dma_addr_bind_handle() allocates DMA resources for a 1217 * memory object such that a device can perform DMA to or from 1218 * the object. DMA resources are allocated considering the 1219 * device's DMA attributes as expressed by ddi_dma_attr(9S) 1220 * (see ddi_dma_alloc_handle(9F)). 1221 * 1222 * ddi_dma_addr_bind_handle() fills in the first DMA cookie 1223 * pointed to by cookiep with the appropriate address, length, 1224 * and bus type. *ccountp is set to the number of DMA cookies 1225 * representing this DMA object. Subsequent DMA cookies must be 1226 * retrieved by calling ddi_dma_nextcookie(9F) the number of 1227 * times specified by *countp - 1. 1228 */ 1229 switch (packet->dma_type) { 1230 #ifdef __sparc 1231 case USE_DVMA: 1232 dvma_kaddr_load(packet->tx_dma_handle, 1233 (caddr_t)mp->b_rptr, len, 0, &dma_cookie); 1234 1235 dvma_sync(packet->tx_dma_handle, 0, 1236 DDI_DMA_SYNC_FORDEV); 1237 1238 ncookies = 1; 1239 packet->data_transfer_type = USE_DVMA; 1240 break; 1241 #endif 1242 case USE_DMA: 1243 if ((mystat = ddi_dma_addr_bind_handle( 1244 packet->tx_dma_handle, NULL, 1245 (caddr_t)mp->b_rptr, len, 1246 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1247 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1248 &ncookies)) != DDI_DMA_MAPPED) { 1249 1250 e1000g_log(tx_ring->adapter, CE_WARN, 1251 "Couldn't bind mblk buffer to Tx DMA handle: " 1252 "return: %X, Pkt: %X\n", 1253 mystat, packet); 1254 return (-1); 1255 } 1256 1257 /* 1258 * An implicit ddi_dma_sync() is done when the 1259 * ddi_dma_addr_bind_handle() is called. So we 1260 * don't need to explicitly call ddi_dma_sync() 1261 * here any more. 1262 */ 1263 ASSERT(ncookies); 1264 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie, 1265 (ncookies > 1)); 1266 1267 /* 1268 * The data_transfer_type value must be set after the handle 1269 * has been bound, for it will be used in e1000g_free_tx_swpkt() 1270 * to decide whether we need to unbind the handle. 1271 */ 1272 packet->data_transfer_type = USE_DMA; 1273 break; 1274 default: 1275 ASSERT(B_FALSE); 1276 break; 1277 } 1278 1279 packet->num_mblk_frag++; 1280 1281 /* 1282 * Each address could span thru multpile cookie.. 1283 * Each cookie will have one descriptor 1284 */ 1285 for (j = ncookies; j != 0; j--) { 1286 1287 desc_count = e1000g_fill_tx_desc(tx_ring, 1288 packet, 1289 dma_cookie.dmac_laddress, 1290 dma_cookie.dmac_size); 1291 1292 if (desc_count <= 0) 1293 return (-1); 1294 1295 desc_total += desc_count; 1296 1297 /* 1298 * ddi_dma_nextcookie() retrieves subsequent DMA 1299 * cookies for a DMA object. 1300 * ddi_dma_nextcookie() fills in the 1301 * ddi_dma_cookie(9S) structure pointed to by 1302 * cookiep. The ddi_dma_cookie(9S) structure 1303 * must be allocated prior to calling 1304 * ddi_dma_nextcookie(). The DMA cookie count 1305 * returned by ddi_dma_buf_bind_handle(9F), 1306 * ddi_dma_addr_bind_handle(9F), or 1307 * ddi_dma_getwin(9F) indicates the number of DMA 1308 * cookies a DMA object consists of. If the 1309 * resulting cookie count, N, is larger than 1, 1310 * ddi_dma_nextcookie() must be called N-1 times 1311 * to retrieve all DMA cookies. 1312 */ 1313 if (j > 1) { 1314 ddi_dma_nextcookie(packet->tx_dma_handle, 1315 &dma_cookie); 1316 } 1317 } 1318 1319 return (desc_total); 1320 } 1321 1322 static void 1323 e1000g_fill_context_descriptor(context_data_t *cur_context, 1324 struct e1000_context_desc *context_desc) 1325 { 1326 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) { 1327 context_desc->lower_setup.ip_fields.ipcss = 1328 cur_context->ether_header_size; 1329 context_desc->lower_setup.ip_fields.ipcso = 1330 cur_context->ether_header_size + 1331 offsetof(struct ip, ip_sum); 1332 context_desc->lower_setup.ip_fields.ipcse = 1333 cur_context->ether_header_size + 1334 cur_context->cksum_start - 1; 1335 } else 1336 context_desc->lower_setup.ip_config = 0; 1337 1338 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) { 1339 /* 1340 * The packet with same protocol has the following 1341 * stuff and start offset: 1342 * | Protocol | Stuff | Start | Checksum 1343 * | | Offset | Offset | Enable 1344 * | IPv4 + TCP | 0x24 | 0x14 | Yes 1345 * | IPv4 + UDP | 0x1A | 0x14 | Yes 1346 * | IPv6 + TCP | 0x20 | 0x10 | No 1347 * | IPv6 + UDP | 0x14 | 0x10 | No 1348 */ 1349 context_desc->upper_setup.tcp_fields.tucss = 1350 cur_context->cksum_start + cur_context->ether_header_size; 1351 context_desc->upper_setup.tcp_fields.tucso = 1352 cur_context->cksum_stuff + cur_context->ether_header_size; 1353 context_desc->upper_setup.tcp_fields.tucse = 0; 1354 } else 1355 context_desc->upper_setup.tcp_config = 0; 1356 1357 if (cur_context->lso_flag) { 1358 context_desc->tcp_seg_setup.fields.mss = cur_context->mss; 1359 context_desc->tcp_seg_setup.fields.hdr_len = 1360 cur_context->hdr_len; 1361 /* 1362 * workaround for 82546EB errata 23, status-writeback 1363 * reporting (RS) should not be set on context or 1364 * Null descriptors 1365 */ 1366 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1367 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP 1368 | E1000_TXD_DTYP_C | cur_context->pay_len; 1369 } else { 1370 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1371 | E1000_TXD_DTYP_C; 1372 /* 1373 * Zero out the options for TCP Segmentation Offload 1374 */ 1375 context_desc->tcp_seg_setup.data = 0; 1376 } 1377 } 1378 1379 static int 1380 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring, 1381 p_tx_sw_packet_t packet, uint64_t address, size_t size) 1382 { 1383 struct e1000_hw *hw = &tx_ring->adapter->shared; 1384 p_sw_desc_t desc; 1385 1386 if (hw->mac.type == e1000_82544) { 1387 if (hw->bus.type == e1000_bus_type_pcix) 1388 return (e1000g_tx_workaround_PCIX_82544(packet, 1389 address, size)); 1390 1391 if (size > JUMBO_FRAG_LENGTH) 1392 return (e1000g_tx_workaround_jumbo_82544(packet, 1393 address, size)); 1394 } 1395 1396 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1397 1398 desc = &packet->desc[packet->num_desc]; 1399 desc->address = address; 1400 desc->length = size; 1401 1402 packet->num_desc++; 1403 1404 return (1); 1405 } 1406 1407 static int 1408 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet, 1409 uint64_t address, size_t size) 1410 { 1411 p_sw_desc_t desc; 1412 int desc_count; 1413 long size_left; 1414 size_t len; 1415 uint32_t counter; 1416 uint32_t array_elements; 1417 desc_array_t desc_array; 1418 1419 /* 1420 * Coexist Workaround for cordova: RP: 07/04/03 1421 * 1422 * RP: ERRATA: Workaround ISSUE: 1423 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup 1424 * Eachbuffer in to 8kb pieces until the 1425 * remainder is < 8kb 1426 */ 1427 size_left = size; 1428 desc_count = 0; 1429 1430 while (size_left > 0) { 1431 if (size_left > MAX_TX_BUF_SIZE) 1432 len = MAX_TX_BUF_SIZE; 1433 else 1434 len = size_left; 1435 1436 array_elements = e1000g_fill_82544_desc(address, 1437 len, &desc_array); 1438 1439 for (counter = 0; counter < array_elements; counter++) { 1440 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1441 /* 1442 * Put in the buffer address 1443 */ 1444 desc = &packet->desc[packet->num_desc]; 1445 1446 desc->address = 1447 desc_array.descriptor[counter].address; 1448 desc->length = 1449 desc_array.descriptor[counter].length; 1450 1451 packet->num_desc++; 1452 desc_count++; 1453 } /* for */ 1454 1455 /* 1456 * Update the buffer address and length 1457 */ 1458 address += MAX_TX_BUF_SIZE; 1459 size_left -= MAX_TX_BUF_SIZE; 1460 } /* while */ 1461 1462 return (desc_count); 1463 } 1464 1465 static int 1466 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet, 1467 uint64_t address, size_t size) 1468 { 1469 p_sw_desc_t desc; 1470 int desc_count; 1471 long size_left; 1472 uint32_t offset; 1473 1474 /* 1475 * Workaround for Jumbo Frames on Cordova 1476 * PSD 06/01/2001 1477 */ 1478 size_left = size; 1479 desc_count = 0; 1480 offset = 0; 1481 while (size_left > 0) { 1482 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1483 1484 desc = &packet->desc[packet->num_desc]; 1485 1486 desc->address = address + offset; 1487 1488 if (size_left > JUMBO_FRAG_LENGTH) 1489 desc->length = JUMBO_FRAG_LENGTH; 1490 else 1491 desc->length = size_left; 1492 1493 packet->num_desc++; 1494 desc_count++; 1495 1496 offset += desc->length; 1497 size_left -= JUMBO_FRAG_LENGTH; 1498 } 1499 1500 return (desc_count); 1501 } 1502 1503 #pragma inline(e1000g_82547_tx_move_tail_work) 1504 1505 static void 1506 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring) 1507 { 1508 struct e1000_hw *hw; 1509 uint16_t hw_tdt; 1510 uint16_t sw_tdt; 1511 struct e1000_tx_desc *tx_desc; 1512 uint16_t length = 0; 1513 boolean_t eop = B_FALSE; 1514 struct e1000g *Adapter; 1515 1516 Adapter = tx_ring->adapter; 1517 hw = &Adapter->shared; 1518 1519 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0)); 1520 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first; 1521 1522 while (hw_tdt != sw_tdt) { 1523 tx_desc = &(tx_ring->tbd_first[hw_tdt]); 1524 length += tx_desc->lower.flags.length; 1525 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; 1526 if (++hw_tdt == Adapter->tx_desc_num) 1527 hw_tdt = 0; 1528 1529 if (eop) { 1530 if ((Adapter->link_duplex == HALF_DUPLEX) && 1531 (e1000_fifo_workaround_82547(hw, length) 1532 != E1000_SUCCESS)) { 1533 if (tx_ring->timer_enable_82547) { 1534 ASSERT(tx_ring->timer_id_82547 == 0); 1535 tx_ring->timer_id_82547 = 1536 timeout(e1000g_82547_timeout, 1537 (void *)tx_ring, 1538 drv_usectohz(10000)); 1539 } 1540 return; 1541 1542 } else { 1543 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt); 1544 e1000_update_tx_fifo_head_82547(hw, length); 1545 length = 0; 1546 } 1547 } 1548 } 1549 } 1550 1551 static void 1552 e1000g_82547_timeout(void *arg) 1553 { 1554 e1000g_tx_ring_t *tx_ring; 1555 1556 tx_ring = (e1000g_tx_ring_t *)arg; 1557 1558 mutex_enter(&tx_ring->tx_lock); 1559 1560 tx_ring->timer_id_82547 = 0; 1561 e1000g_82547_tx_move_tail_work(tx_ring); 1562 1563 mutex_exit(&tx_ring->tx_lock); 1564 } 1565 1566 static void 1567 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring) 1568 { 1569 timeout_id_t tid; 1570 1571 ASSERT(MUTEX_HELD(&tx_ring->tx_lock)); 1572 1573 tid = tx_ring->timer_id_82547; 1574 tx_ring->timer_id_82547 = 0; 1575 if (tid != 0) { 1576 tx_ring->timer_enable_82547 = B_FALSE; 1577 mutex_exit(&tx_ring->tx_lock); 1578 1579 (void) untimeout(tid); 1580 1581 mutex_enter(&tx_ring->tx_lock); 1582 } 1583 tx_ring->timer_enable_82547 = B_TRUE; 1584 e1000g_82547_tx_move_tail_work(tx_ring); 1585 } 1586