1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * ********************************************************************** 28 * * 29 * Module Name: * 30 * e1000g_tx.c * 31 * * 32 * Abstract: * 33 * This file contains some routines that take care of Transmit, * 34 * make the hardware to send the data pointed by the packet out * 35 * on to the physical medium. * 36 * * 37 * ********************************************************************** 38 */ 39 40 #include "e1000g_sw.h" 41 #include "e1000g_debug.h" 42 43 static boolean_t e1000g_send(struct e1000g *, mblk_t *); 44 static int e1000g_tx_copy(e1000g_tx_ring_t *, 45 p_tx_sw_packet_t, mblk_t *, boolean_t); 46 static int e1000g_tx_bind(e1000g_tx_ring_t *, 47 p_tx_sw_packet_t, mblk_t *); 48 static boolean_t e1000g_retrieve_context(mblk_t *, context_data_t *, size_t); 49 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *); 50 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *, 51 context_data_t *); 52 static void e1000g_fill_context_descriptor(context_data_t *, 53 struct e1000_context_desc *); 54 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *, 55 p_tx_sw_packet_t, uint64_t, size_t); 56 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length, 57 p_desc_array_t desc_array); 58 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t); 59 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t); 60 static void e1000g_82547_timeout(void *); 61 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *); 62 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *); 63 64 #ifndef E1000G_DEBUG 65 #pragma inline(e1000g_tx_copy) 66 #pragma inline(e1000g_tx_bind) 67 #pragma inline(e1000g_retrieve_context) 68 #pragma inline(e1000g_check_context) 69 #pragma inline(e1000g_fill_tx_ring) 70 #pragma inline(e1000g_fill_context_descriptor) 71 #pragma inline(e1000g_fill_tx_desc) 72 #pragma inline(e1000g_fill_82544_desc) 73 #pragma inline(e1000g_tx_workaround_PCIX_82544) 74 #pragma inline(e1000g_tx_workaround_jumbo_82544) 75 #pragma inline(e1000g_free_tx_swpkt) 76 #endif 77 78 /* 79 * e1000g_free_tx_swpkt - free up the tx sw packet 80 * 81 * Unbind the previously bound DMA handle for a given 82 * transmit sw packet. And reset the sw packet data. 83 */ 84 void 85 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet) 86 { 87 switch (packet->data_transfer_type) { 88 case USE_BCOPY: 89 packet->tx_buf->len = 0; 90 break; 91 #ifdef __sparc 92 case USE_DVMA: 93 dvma_unload(packet->tx_dma_handle, 0, -1); 94 break; 95 #endif 96 case USE_DMA: 97 (void) ddi_dma_unbind_handle(packet->tx_dma_handle); 98 break; 99 default: 100 break; 101 } 102 103 /* 104 * The mblk has been stripped off the sw packet 105 * and will be freed in a triggered soft intr. 106 */ 107 ASSERT(packet->mp == NULL); 108 109 packet->data_transfer_type = USE_NONE; 110 packet->num_mblk_frag = 0; 111 packet->num_desc = 0; 112 } 113 114 mblk_t * 115 e1000g_m_tx(void *arg, mblk_t *mp) 116 { 117 struct e1000g *Adapter = (struct e1000g *)arg; 118 mblk_t *next; 119 120 rw_enter(&Adapter->chip_lock, RW_READER); 121 122 if ((Adapter->e1000g_state & E1000G_SUSPENDED) || 123 !(Adapter->e1000g_state & E1000G_STARTED) || 124 (Adapter->link_state != LINK_STATE_UP)) { 125 freemsgchain(mp); 126 mp = NULL; 127 } 128 129 while (mp != NULL) { 130 next = mp->b_next; 131 mp->b_next = NULL; 132 133 if (!e1000g_send(Adapter, mp)) { 134 mp->b_next = next; 135 break; 136 } 137 138 mp = next; 139 } 140 141 rw_exit(&Adapter->chip_lock); 142 return (mp); 143 } 144 145 /* 146 * e1000g_send - send packets onto the wire 147 * 148 * Called from e1000g_m_tx with an mblk ready to send. this 149 * routine sets up the transmit descriptors and sends data to 150 * the wire. It also pushes the just transmitted packet to 151 * the used tx sw packet list. 152 */ 153 static boolean_t 154 e1000g_send(struct e1000g *Adapter, mblk_t *mp) 155 { 156 p_tx_sw_packet_t packet; 157 LIST_DESCRIBER pending_list; 158 size_t len; 159 size_t msg_size; 160 uint32_t frag_count; 161 int desc_count; 162 uint32_t desc_total; 163 uint32_t bcopy_thresh; 164 uint32_t hdr_frag_len; 165 boolean_t tx_undersize_flag; 166 mblk_t *nmp; 167 mblk_t *tmp; 168 mblk_t *new_mp; 169 mblk_t *pre_mp; 170 e1000g_tx_ring_t *tx_ring; 171 context_data_t cur_context; 172 173 tx_ring = Adapter->tx_ring; 174 bcopy_thresh = Adapter->tx_bcopy_thresh; 175 176 /* Get the total size and frags number of the message */ 177 tx_undersize_flag = B_FALSE; 178 frag_count = 0; 179 msg_size = 0; 180 for (nmp = mp; nmp; nmp = nmp->b_cont) { 181 frag_count++; 182 msg_size += MBLKL(nmp); 183 } 184 185 /* retrieve and compute information for context descriptor */ 186 if (!e1000g_retrieve_context(mp, &cur_context, msg_size)) { 187 freemsg(mp); 188 return (B_TRUE); 189 } 190 191 /* 192 * Make sure the packet is less than the allowed size 193 */ 194 if (!cur_context.lso_flag && 195 (msg_size > Adapter->max_frame_size - ETHERFCSL)) { 196 /* 197 * For the over size packet, we'll just drop it. 198 * So we return B_TRUE here. 199 */ 200 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL, 201 "Tx packet out of bound. length = %d \n", msg_size); 202 E1000G_STAT(tx_ring->stat_over_size); 203 freemsg(mp); 204 return (B_TRUE); 205 } 206 207 /* 208 * Check and reclaim tx descriptors. 209 * This low water mark check should be done all the time as 210 * Transmit interrupt delay can produce Transmit interrupts little 211 * late and that may cause few problems related to reaping Tx 212 * Descriptors... As you may run short of them before getting any 213 * transmit interrupt... 214 */ 215 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 216 (void) e1000g_recycle(tx_ring); 217 E1000G_DEBUG_STAT(tx_ring->stat_recycle); 218 219 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 220 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc); 221 goto tx_no_resource; 222 } 223 } 224 225 /* 226 * If the message size is less than the minimum ethernet packet size, 227 * we'll use bcopy to send it, and padd it to 60 bytes later. 228 */ 229 if (msg_size < ETHERMIN) { 230 E1000G_DEBUG_STAT(tx_ring->stat_under_size); 231 tx_undersize_flag = B_TRUE; 232 } 233 234 /* Initialize variables */ 235 desc_count = 1; /* The initial value should be greater than 0 */ 236 desc_total = 0; 237 QUEUE_INIT_LIST(&pending_list); 238 239 /* Process each mblk fragment and fill tx descriptors */ 240 /* 241 * The software should guarantee LSO packet header(MAC+IP+TCP) 242 * to be within one descriptor. Here we reallocate and refill the 243 * the header if it's physical memory non-contiguous. 244 */ 245 if (cur_context.lso_flag) { 246 /* find the last fragment of the header */ 247 len = MBLKL(mp); 248 ASSERT(len > 0); 249 nmp = mp; 250 pre_mp = NULL; 251 while (len < cur_context.hdr_len) { 252 pre_mp = nmp; 253 nmp = nmp->b_cont; 254 len += MBLKL(nmp); 255 } 256 /* 257 * If the header and the payload are in different mblks, 258 * we simply force the header to be copied into pre-allocated 259 * page-aligned buffer. 260 */ 261 if (len == cur_context.hdr_len) 262 goto adjust_threshold; 263 264 hdr_frag_len = cur_context.hdr_len - (len - MBLKL(nmp)); 265 /* 266 * There are two cases we need to reallocate a mblk for the 267 * last header fragment: 268 * 1. the header is in multiple mblks and the last fragment 269 * share the same mblk with the payload 270 * 2. the header is in a single mblk shared with the payload 271 * and the header is physical memory non-contiguous 272 */ 273 if ((nmp != mp) || 274 (P2NPHASE((uintptr_t)nmp->b_rptr, Adapter->sys_page_sz) 275 < cur_context.hdr_len)) { 276 E1000G_DEBUG_STAT(tx_ring->stat_lso_header_fail); 277 /* 278 * reallocate the mblk for the last header fragment, 279 * expect to bcopy into pre-allocated page-aligned 280 * buffer 281 */ 282 new_mp = allocb(hdr_frag_len, NULL); 283 if (!new_mp) 284 return (B_FALSE); 285 bcopy(nmp->b_rptr, new_mp->b_rptr, hdr_frag_len); 286 /* link the new header fragment with the other parts */ 287 new_mp->b_wptr = new_mp->b_rptr + hdr_frag_len; 288 new_mp->b_cont = nmp; 289 if (pre_mp) 290 pre_mp->b_cont = new_mp; 291 else 292 mp = new_mp; 293 nmp->b_rptr += hdr_frag_len; 294 frag_count ++; 295 } 296 adjust_threshold: 297 /* 298 * adjust the bcopy threshhold to guarantee 299 * the header to use bcopy way 300 */ 301 if (bcopy_thresh < cur_context.hdr_len) 302 bcopy_thresh = cur_context.hdr_len; 303 } 304 305 packet = NULL; 306 nmp = mp; 307 while (nmp) { 308 tmp = nmp->b_cont; 309 310 len = MBLKL(nmp); 311 /* Check zero length mblks */ 312 if (len == 0) { 313 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags); 314 /* 315 * If there're no packet buffers have been used, 316 * or we just completed processing a buffer, then 317 * skip the empty mblk fragment. 318 * Otherwise, there's still a pending buffer that 319 * needs to be processed (tx_copy). 320 */ 321 if (desc_count > 0) { 322 nmp = tmp; 323 continue; 324 } 325 } 326 327 /* 328 * Get a new TxSwPacket to process mblk buffers. 329 */ 330 if (desc_count > 0) { 331 mutex_enter(&tx_ring->freelist_lock); 332 packet = (p_tx_sw_packet_t) 333 QUEUE_POP_HEAD(&tx_ring->free_list); 334 mutex_exit(&tx_ring->freelist_lock); 335 336 if (packet == NULL) { 337 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 338 "No Tx SwPacket available\n"); 339 E1000G_STAT(tx_ring->stat_no_swpkt); 340 goto tx_send_failed; 341 } 342 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 343 } 344 345 ASSERT(packet); 346 /* 347 * If the size of the fragment is less than the tx_bcopy_thresh 348 * we'll use bcopy; Otherwise, we'll use DMA binding. 349 */ 350 if ((len <= bcopy_thresh) || tx_undersize_flag) { 351 desc_count = 352 e1000g_tx_copy(tx_ring, packet, nmp, 353 tx_undersize_flag); 354 E1000G_DEBUG_STAT(tx_ring->stat_copy); 355 } else { 356 desc_count = 357 e1000g_tx_bind(tx_ring, packet, nmp); 358 E1000G_DEBUG_STAT(tx_ring->stat_bind); 359 } 360 361 if (desc_count > 0) 362 desc_total += desc_count; 363 else if (desc_count < 0) 364 goto tx_send_failed; 365 366 nmp = tmp; 367 } 368 369 /* Assign the message to the last sw packet */ 370 ASSERT(packet); 371 ASSERT(packet->mp == NULL); 372 packet->mp = mp; 373 374 /* Try to recycle the tx descriptors again */ 375 if (tx_ring->tbd_avail < (desc_total + 2)) { 376 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry); 377 (void) e1000g_recycle(tx_ring); 378 } 379 380 mutex_enter(&tx_ring->tx_lock); 381 382 /* 383 * If the number of available tx descriptors is not enough for transmit 384 * (one redundant descriptor and one hw checksum context descriptor are 385 * included), then return failure. 386 */ 387 if (tx_ring->tbd_avail < (desc_total + 2)) { 388 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 389 "No Enough Tx descriptors\n"); 390 E1000G_STAT(tx_ring->stat_no_desc); 391 mutex_exit(&tx_ring->tx_lock); 392 goto tx_send_failed; 393 } 394 395 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context); 396 397 mutex_exit(&tx_ring->tx_lock); 398 399 ASSERT(desc_count > 0); 400 401 /* Send successful */ 402 return (B_TRUE); 403 404 tx_send_failed: 405 /* 406 * Enable Transmit interrupts, so that the interrupt routine can 407 * call mac_tx_update() when transmit descriptors become available. 408 */ 409 tx_ring->resched_timestamp = ddi_get_lbolt(); 410 tx_ring->resched_needed = B_TRUE; 411 if (!Adapter->tx_intr_enable) 412 e1000g_mask_tx_interrupt(Adapter); 413 414 /* Free pending TxSwPackets */ 415 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 416 while (packet) { 417 packet->mp = NULL; 418 e1000g_free_tx_swpkt(packet); 419 packet = (p_tx_sw_packet_t) 420 QUEUE_GET_NEXT(&pending_list, &packet->Link); 421 } 422 423 /* Return pending TxSwPackets to the "Free" list */ 424 mutex_enter(&tx_ring->freelist_lock); 425 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 426 mutex_exit(&tx_ring->freelist_lock); 427 428 E1000G_STAT(tx_ring->stat_send_fail); 429 430 /* Message will be scheduled for re-transmit */ 431 return (B_FALSE); 432 433 tx_no_resource: 434 /* 435 * Enable Transmit interrupts, so that the interrupt routine can 436 * call mac_tx_update() when transmit descriptors become available. 437 */ 438 tx_ring->resched_timestamp = ddi_get_lbolt(); 439 tx_ring->resched_needed = B_TRUE; 440 if (!Adapter->tx_intr_enable) 441 e1000g_mask_tx_interrupt(Adapter); 442 443 /* Message will be scheduled for re-transmit */ 444 return (B_FALSE); 445 } 446 447 static boolean_t 448 e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context, 449 size_t msg_size) 450 { 451 uintptr_t ip_start; 452 uintptr_t tcp_start; 453 mblk_t *nmp; 454 uint32_t lsoflags; 455 uint32_t mss; 456 457 bzero(cur_context, sizeof (context_data_t)); 458 459 /* first check lso information */ 460 lso_info_get(mp, &mss, &lsoflags); 461 462 /* retrieve checksum info */ 463 hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, 464 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); 465 /* retrieve ethernet header size */ 466 if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == 467 htons(ETHERTYPE_VLAN)) 468 cur_context->ether_header_size = 469 sizeof (struct ether_vlan_header); 470 else 471 cur_context->ether_header_size = 472 sizeof (struct ether_header); 473 474 if (lsoflags & HW_LSO) { 475 ASSERT(mss != 0); 476 477 /* free the invalid packet */ 478 if (mss == 0 || 479 !((cur_context->cksum_flags & HCK_PARTIALCKSUM) && 480 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) { 481 return (B_FALSE); 482 } 483 cur_context->mss = (uint16_t)mss; 484 cur_context->lso_flag = B_TRUE; 485 486 /* 487 * Some fields are cleared for the hardware to fill 488 * in. We don't assume Ethernet header, IP header and 489 * TCP header are always in the same mblk fragment, 490 * while we assume each header is always within one 491 * mblk fragment and Ethernet header is always in the 492 * first mblk fragment. 493 */ 494 nmp = mp; 495 ip_start = (uintptr_t)(nmp->b_rptr) 496 + cur_context->ether_header_size; 497 if (ip_start >= (uintptr_t)(nmp->b_wptr)) { 498 ip_start = (uintptr_t)nmp->b_cont->b_rptr 499 + (ip_start - (uintptr_t)(nmp->b_wptr)); 500 nmp = nmp->b_cont; 501 } 502 tcp_start = ip_start + 503 IPH_HDR_LENGTH((ipha_t *)ip_start); 504 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) { 505 tcp_start = (uintptr_t)nmp->b_cont->b_rptr 506 + (tcp_start - (uintptr_t)(nmp->b_wptr)); 507 nmp = nmp->b_cont; 508 } 509 cur_context->hdr_len = cur_context->ether_header_size 510 + IPH_HDR_LENGTH((ipha_t *)ip_start) 511 + TCP_HDR_LENGTH((tcph_t *)tcp_start); 512 ((ipha_t *)ip_start)->ipha_length = 0; 513 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0; 514 /* calculate the TCP packet payload length */ 515 cur_context->pay_len = msg_size - cur_context->hdr_len; 516 } 517 return (B_TRUE); 518 } 519 520 static boolean_t 521 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context) 522 { 523 boolean_t context_reload; 524 context_data_t *pre_context; 525 struct e1000g *Adapter; 526 527 context_reload = B_FALSE; 528 pre_context = &tx_ring->pre_context; 529 Adapter = tx_ring->adapter; 530 531 /* 532 * The following code determine if the context descriptor is 533 * needed to be reloaded. The sequence of the conditions is 534 * made by their possibilities of changing. 535 */ 536 /* 537 * workaround for 82546EB, context descriptor must be reloaded 538 * per LSO/hw_cksum packet if LSO is enabled. 539 */ 540 if (Adapter->lso_premature_issue && 541 Adapter->lso_enable && 542 (cur_context->cksum_flags != 0)) { 543 544 context_reload = B_TRUE; 545 } else if (cur_context->lso_flag) { 546 if ((cur_context->lso_flag != pre_context->lso_flag) || 547 (cur_context->cksum_flags != pre_context->cksum_flags) || 548 (cur_context->pay_len != pre_context->pay_len) || 549 (cur_context->mss != pre_context->mss) || 550 (cur_context->hdr_len != pre_context->hdr_len) || 551 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 552 (cur_context->cksum_start != pre_context->cksum_start) || 553 (cur_context->ether_header_size != 554 pre_context->ether_header_size)) { 555 556 context_reload = B_TRUE; 557 } 558 } else if (cur_context->cksum_flags != 0) { 559 if ((cur_context->lso_flag != pre_context->lso_flag) || 560 (cur_context->cksum_flags != pre_context->cksum_flags) || 561 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 562 (cur_context->cksum_start != pre_context->cksum_start) || 563 (cur_context->ether_header_size != 564 pre_context->ether_header_size)) { 565 566 context_reload = B_TRUE; 567 } 568 } 569 570 return (context_reload); 571 } 572 573 static int 574 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list, 575 context_data_t *cur_context) 576 { 577 struct e1000g *Adapter; 578 struct e1000_hw *hw; 579 p_tx_sw_packet_t first_packet; 580 p_tx_sw_packet_t packet; 581 p_tx_sw_packet_t previous_packet; 582 boolean_t context_reload; 583 struct e1000_tx_desc *first_data_desc; 584 struct e1000_tx_desc *next_desc; 585 struct e1000_tx_desc *descriptor; 586 int desc_count; 587 boolean_t buff_overrun_flag; 588 int i; 589 590 Adapter = tx_ring->adapter; 591 hw = &Adapter->shared; 592 593 desc_count = 0; 594 first_packet = NULL; 595 first_data_desc = NULL; 596 descriptor = NULL; 597 first_packet = NULL; 598 packet = NULL; 599 buff_overrun_flag = B_FALSE; 600 601 next_desc = tx_ring->tbd_next; 602 603 /* Context descriptor reload check */ 604 context_reload = e1000g_check_context(tx_ring, cur_context); 605 606 if (context_reload) { 607 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 608 609 descriptor = next_desc; 610 611 e1000g_fill_context_descriptor(cur_context, 612 (struct e1000_context_desc *)descriptor); 613 614 /* Check the wrap-around case */ 615 if (descriptor == tx_ring->tbd_last) 616 next_desc = tx_ring->tbd_first; 617 else 618 next_desc++; 619 620 desc_count++; 621 } 622 623 first_data_desc = next_desc; 624 625 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 626 while (packet) { 627 ASSERT(packet->num_desc); 628 629 for (i = 0; i < packet->num_desc; i++) { 630 ASSERT(tx_ring->tbd_avail > 0); 631 632 descriptor = next_desc; 633 descriptor->buffer_addr = 634 packet->desc[i].address; 635 descriptor->lower.data = 636 packet->desc[i].length; 637 638 /* Zero out status */ 639 descriptor->upper.data = 0; 640 641 descriptor->lower.data |= 642 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 643 /* must set RS on every outgoing descriptor */ 644 descriptor->lower.data |= 645 E1000_TXD_CMD_RS; 646 647 if (cur_context->lso_flag) 648 descriptor->lower.data |= E1000_TXD_CMD_TSE; 649 650 /* Check the wrap-around case */ 651 if (descriptor == tx_ring->tbd_last) 652 next_desc = tx_ring->tbd_first; 653 else 654 next_desc++; 655 656 desc_count++; 657 658 /* 659 * workaround for 82546EB errata 33, hang in PCI-X 660 * systems due to 2k Buffer Overrun during Transmit 661 * Operation. The workaround applies to all the Intel 662 * PCI-X chips. 663 */ 664 if (hw->bus.type == e1000_bus_type_pcix && 665 descriptor == first_data_desc && 666 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) 667 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) { 668 /* modified the first descriptor */ 669 descriptor->lower.data &= 670 ~E1000G_TBD_LENGTH_MASK; 671 descriptor->lower.flags.length = 672 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 673 674 /* insert a new descriptor */ 675 ASSERT(tx_ring->tbd_avail > 0); 676 next_desc->buffer_addr = 677 packet->desc[0].address + 678 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 679 next_desc->lower.data = 680 packet->desc[0].length - 681 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 682 683 /* Zero out status */ 684 next_desc->upper.data = 0; 685 686 next_desc->lower.data |= 687 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 688 /* must set RS on every outgoing descriptor */ 689 next_desc->lower.data |= 690 E1000_TXD_CMD_RS; 691 692 if (cur_context->lso_flag) 693 next_desc->lower.data |= 694 E1000_TXD_CMD_TSE; 695 696 descriptor = next_desc; 697 698 /* Check the wrap-around case */ 699 if (next_desc == tx_ring->tbd_last) 700 next_desc = tx_ring->tbd_first; 701 else 702 next_desc++; 703 704 desc_count++; 705 buff_overrun_flag = B_TRUE; 706 } 707 } 708 709 if (buff_overrun_flag) { 710 packet->num_desc++; 711 buff_overrun_flag = B_FALSE; 712 } 713 714 if (first_packet != NULL) { 715 /* 716 * Count the checksum context descriptor for 717 * the first SwPacket. 718 */ 719 first_packet->num_desc++; 720 first_packet = NULL; 721 } 722 723 packet->tickstamp = lbolt64; 724 725 previous_packet = packet; 726 packet = (p_tx_sw_packet_t) 727 QUEUE_GET_NEXT(pending_list, &packet->Link); 728 } 729 730 /* 731 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back 732 */ 733 if (Adapter->lso_premature_issue && cur_context->lso_flag && 734 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) { 735 /* modified the previous descriptor */ 736 descriptor->lower.data -= 4; 737 738 /* insert a new descriptor */ 739 ASSERT(tx_ring->tbd_avail > 0); 740 /* the lower 20 bits of lower.data is the length field */ 741 next_desc->buffer_addr = 742 descriptor->buffer_addr + 743 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK); 744 next_desc->lower.data = 4; 745 746 /* Zero out status */ 747 next_desc->upper.data = 0; 748 /* It must be part of a LSO packet */ 749 next_desc->lower.data |= 750 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | 751 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE; 752 753 descriptor = next_desc; 754 755 /* Check the wrap-around case */ 756 if (descriptor == tx_ring->tbd_last) 757 next_desc = tx_ring->tbd_first; 758 else 759 next_desc++; 760 761 desc_count++; 762 /* update the number of descriptors */ 763 previous_packet->num_desc++; 764 } 765 766 ASSERT(descriptor); 767 768 if (cur_context->cksum_flags) { 769 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 770 ((struct e1000_data_desc *)first_data_desc)-> 771 upper.fields.popts |= E1000_TXD_POPTS_IXSM; 772 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 773 ((struct e1000_data_desc *)first_data_desc)-> 774 upper.fields.popts |= E1000_TXD_POPTS_TXSM; 775 } 776 777 /* 778 * Last Descriptor of Packet needs End Of Packet (EOP), Report 779 * Status (RS) set. 780 */ 781 if (Adapter->tx_intr_delay) { 782 descriptor->lower.data |= E1000_TXD_CMD_IDE | 783 E1000_TXD_CMD_EOP; 784 } else { 785 descriptor->lower.data |= E1000_TXD_CMD_EOP; 786 } 787 788 /* Set append Ethernet CRC (IFCS) bits */ 789 if (cur_context->lso_flag) { 790 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS; 791 } else { 792 descriptor->lower.data |= E1000_TXD_CMD_IFCS; 793 } 794 795 /* 796 * Sync the Tx descriptors DMA buffer 797 */ 798 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 799 0, 0, DDI_DMA_SYNC_FORDEV); 800 801 tx_ring->tbd_next = next_desc; 802 803 /* 804 * Advance the Transmit Descriptor Tail (Tdt), this tells the 805 * FX1000 that this frame is available to transmit. 806 */ 807 if (hw->mac.type == e1000_82547) 808 e1000g_82547_tx_move_tail(tx_ring); 809 else 810 E1000_WRITE_REG(hw, E1000_TDT(0), 811 (uint32_t)(next_desc - tx_ring->tbd_first)); 812 813 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 814 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 815 Adapter->e1000g_state |= E1000G_ERROR; 816 } 817 818 /* Put the pending SwPackets to the "Used" list */ 819 mutex_enter(&tx_ring->usedlist_lock); 820 QUEUE_APPEND(&tx_ring->used_list, pending_list); 821 tx_ring->tbd_avail -= desc_count; 822 mutex_exit(&tx_ring->usedlist_lock); 823 824 /* update LSO related data */ 825 if (context_reload) 826 tx_ring->pre_context = *cur_context; 827 828 return (desc_count); 829 } 830 831 /* 832 * e1000g_tx_setup - setup tx data structures 833 * 834 * This routine initializes all of the transmit related 835 * structures. This includes the Transmit descriptors, 836 * and the tx_sw_packet structures. 837 */ 838 void 839 e1000g_tx_setup(struct e1000g *Adapter) 840 { 841 struct e1000_hw *hw; 842 p_tx_sw_packet_t packet; 843 uint32_t i; 844 uint32_t buf_high; 845 uint32_t buf_low; 846 uint32_t reg_tipg; 847 uint32_t reg_tctl; 848 int size; 849 e1000g_tx_ring_t *tx_ring; 850 851 hw = &Adapter->shared; 852 tx_ring = Adapter->tx_ring; 853 854 /* init the lists */ 855 /* 856 * Here we don't need to protect the lists using the 857 * usedlist_lock and freelist_lock, for they have 858 * been protected by the chip_lock. 859 */ 860 QUEUE_INIT_LIST(&tx_ring->used_list); 861 QUEUE_INIT_LIST(&tx_ring->free_list); 862 863 /* Go through and set up each SW_Packet */ 864 packet = tx_ring->packet_area; 865 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) { 866 /* Initialize this tx_sw_apcket area */ 867 e1000g_free_tx_swpkt(packet); 868 /* Add this tx_sw_packet to the free list */ 869 QUEUE_PUSH_TAIL(&tx_ring->free_list, 870 &packet->Link); 871 } 872 873 /* Setup TX descriptor pointers */ 874 tx_ring->tbd_next = tx_ring->tbd_first; 875 tx_ring->tbd_oldest = tx_ring->tbd_first; 876 877 /* 878 * Setup Hardware TX Registers 879 */ 880 /* Setup the Transmit Control Register (TCTL). */ 881 reg_tctl = E1000_READ_REG(hw, E1000_TCTL); 882 reg_tctl |= E1000_TCTL_PSP | E1000_TCTL_EN | 883 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) | 884 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) | 885 E1000_TCTL_RTLC; 886 887 /* Enable the MULR bit */ 888 if (hw->bus.type == e1000_bus_type_pci_express) 889 reg_tctl |= E1000_TCTL_MULR; 890 891 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl); 892 893 /* Setup HW Base and Length of Tx descriptor area */ 894 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc)); 895 E1000_WRITE_REG(hw, E1000_TDLEN(0), size); 896 size = E1000_READ_REG(hw, E1000_TDLEN(0)); 897 898 buf_low = (uint32_t)tx_ring->tbd_dma_addr; 899 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32); 900 901 /* 902 * Write the highest location first and work backward to the lowest. 903 * This is necessary for some adapter types to 904 * prevent write combining from occurring. 905 */ 906 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high); 907 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low); 908 909 /* Setup our HW Tx Head & Tail descriptor pointers */ 910 E1000_WRITE_REG(hw, E1000_TDH(0), 0); 911 E1000_WRITE_REG(hw, E1000_TDT(0), 0); 912 913 /* Set the default values for the Tx Inter Packet Gap timer */ 914 if ((hw->mac.type == e1000_82542) && 915 ((hw->revision_id == E1000_REVISION_2) || 916 (hw->revision_id == E1000_REVISION_3))) { 917 reg_tipg = DEFAULT_82542_TIPG_IPGT; 918 reg_tipg |= 919 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 920 reg_tipg |= 921 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 922 } else if (hw->mac.type == e1000_80003es2lan) { 923 reg_tipg = DEFAULT_82543_TIPG_IPGR1; 924 reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 925 E1000_TIPG_IPGR2_SHIFT; 926 } else { 927 if (hw->phy.media_type == e1000_media_type_fiber) 928 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 929 else 930 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 931 reg_tipg |= 932 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 933 reg_tipg |= 934 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 935 } 936 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg); 937 938 /* Setup Transmit Interrupt Delay Value */ 939 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay); 940 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 941 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay); 942 943 if (hw->mac.type >= e1000_82540) { 944 E1000_WRITE_REG(&Adapter->shared, E1000_TADV, 945 Adapter->tx_intr_abs_delay); 946 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 947 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay); 948 } 949 950 tx_ring->tbd_avail = Adapter->tx_desc_num; 951 952 /* Initialize stored context information */ 953 bzero(&(tx_ring->pre_context), sizeof (context_data_t)); 954 } 955 956 /* 957 * e1000g_recycle - recycle the tx descriptors and tx sw packets 958 */ 959 int 960 e1000g_recycle(e1000g_tx_ring_t *tx_ring) 961 { 962 struct e1000g *Adapter; 963 LIST_DESCRIBER pending_list; 964 p_tx_sw_packet_t packet; 965 mblk_t *mp; 966 mblk_t *nmp; 967 struct e1000_tx_desc *descriptor; 968 int desc_count; 969 int64_t delta; 970 971 /* 972 * This function will examine each TxSwPacket in the 'used' queue 973 * if the e1000g is done with it then the associated resources (Tx 974 * Descriptors) will be "freed" and the TxSwPacket will be 975 * returned to the 'free' queue. 976 */ 977 Adapter = tx_ring->adapter; 978 delta = 0; 979 980 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list); 981 if (packet == NULL) { 982 Adapter->stall_flag = B_FALSE; 983 return (0); 984 } 985 986 desc_count = 0; 987 QUEUE_INIT_LIST(&pending_list); 988 989 /* Sync the Tx descriptor DMA buffer */ 990 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 991 0, 0, DDI_DMA_SYNC_FORKERNEL); 992 if (e1000g_check_dma_handle( 993 tx_ring->tbd_dma_handle) != DDI_FM_OK) { 994 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 995 Adapter->e1000g_state |= E1000G_ERROR; 996 return (0); 997 } 998 999 /* 1000 * While there are still TxSwPackets in the used queue check them 1001 */ 1002 mutex_enter(&tx_ring->usedlist_lock); 1003 while ((packet = 1004 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) != NULL) { 1005 1006 /* 1007 * Get hold of the next descriptor that the e1000g will 1008 * report status back to (this will be the last descriptor 1009 * of a given sw packet). We only want to free the 1010 * sw packet (and it resources) if the e1000g is done 1011 * with ALL of the descriptors. If the e1000g is done 1012 * with the last one then it is done with all of them. 1013 */ 1014 ASSERT(packet->num_desc); 1015 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1); 1016 1017 /* Check for wrap case */ 1018 if (descriptor > tx_ring->tbd_last) 1019 descriptor -= Adapter->tx_desc_num; 1020 1021 /* 1022 * If the descriptor done bit is set free TxSwPacket and 1023 * associated resources 1024 */ 1025 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) { 1026 QUEUE_POP_HEAD(&tx_ring->used_list); 1027 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 1028 1029 if (descriptor == tx_ring->tbd_last) 1030 tx_ring->tbd_oldest = 1031 tx_ring->tbd_first; 1032 else 1033 tx_ring->tbd_oldest = 1034 descriptor + 1; 1035 1036 desc_count += packet->num_desc; 1037 } else { 1038 /* 1039 * Found a sw packet that the e1000g is not done 1040 * with then there is no reason to check the rest 1041 * of the queue. 1042 */ 1043 delta = lbolt64 - packet->tickstamp; 1044 break; 1045 } 1046 } 1047 1048 tx_ring->tbd_avail += desc_count; 1049 Adapter->tx_pkt_cnt += desc_count; 1050 1051 mutex_exit(&tx_ring->usedlist_lock); 1052 1053 if (desc_count == 0) { 1054 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none); 1055 /* 1056 * If the packet hasn't been sent out for seconds, 1057 * the transmitter is considered to be stalled. 1058 */ 1059 if (delta > Adapter->stall_threshold) { 1060 Adapter->stall_flag = B_TRUE; 1061 } 1062 return (0); 1063 } 1064 1065 Adapter->stall_flag = B_FALSE; 1066 1067 mp = NULL; 1068 nmp = NULL; 1069 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 1070 ASSERT(packet != NULL); 1071 while (packet != NULL) { 1072 if (packet->mp != NULL) { 1073 ASSERT(packet->mp->b_next == NULL); 1074 /* Assemble the message chain */ 1075 if (mp == NULL) { 1076 mp = packet->mp; 1077 nmp = packet->mp; 1078 } else { 1079 nmp->b_next = packet->mp; 1080 nmp = packet->mp; 1081 } 1082 /* Disconnect the message from the sw packet */ 1083 packet->mp = NULL; 1084 } 1085 1086 /* Free the TxSwPackets */ 1087 e1000g_free_tx_swpkt(packet); 1088 1089 packet = (p_tx_sw_packet_t) 1090 QUEUE_GET_NEXT(&pending_list, &packet->Link); 1091 } 1092 1093 /* Return the TxSwPackets back to the FreeList */ 1094 mutex_enter(&tx_ring->freelist_lock); 1095 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 1096 mutex_exit(&tx_ring->freelist_lock); 1097 1098 if (mp != NULL) 1099 freemsgchain(mp); 1100 1101 return (desc_count); 1102 } 1103 /* 1104 * 82544 Coexistence issue workaround: 1105 * There are 2 issues. 1106 * 1. If a 32 bit split completion happens from P64H2 and another 1107 * agent drives a 64 bit request/split completion after ONLY 1108 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then 1109 * 82544 has a problem where in to clock all the data in, it 1110 * looks at REQ64# signal and since it has changed so fast (i.e. 1 1111 * idle clock turn around), it will fail to clock all the data in. 1112 * Data coming from certain ending addresses has exposure to this issue. 1113 * 1114 * To detect this issue, following equation can be used... 1115 * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. 1116 * If SUM[3:0] is in between 1 to 4, we will have this issue. 1117 * 1118 * ROOT CAUSE: 1119 * The erratum involves the 82544 PCIX elasticity FIFO implementations as 1120 * 64-bit FIFO's and flushing of the final partial-bytes corresponding 1121 * to the end of a requested read burst. Under a specific burst condition 1122 * of ending-data alignment and 32-byte split-completions, the final 1123 * byte(s) of split-completion data require an extra clock cycle to flush 1124 * into 64-bit FIFO orientation. An incorrect logic dependency on the 1125 * REQ64# signal occurring during during this clock cycle may cause the 1126 * residual byte(s) to be lost, thereby rendering the internal DMA client 1127 * forever awaiting the final byte(s) for an outbound data-fetch. The 1128 * erratum is confirmed to *only* occur if certain subsequent external 1129 * 64-bit PCIX bus transactions occur immediately (minimum possible bus 1130 * turn- around) following the odd-aligned 32-bit split-completion 1131 * containing the final byte(s). Intel has confirmed that this has been 1132 * seen only with chipset/bridges which have the capability to provide 1133 * 32-bit split-completion data, and in the presence of newer PCIX bus 1134 * agents which fully-optimize the inter-transaction turn-around (zero 1135 * additional initiator latency when pre-granted bus ownership). 1136 * 1137 * This issue does not exist in PCI bus mode, when any agent is operating 1138 * in 32 bit only mode or on chipsets that do not do 32 bit split 1139 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does 1140 * 32 bit split completions for any read request that has bit 2 set to 1 1141 * for the requested address and read request size is more than 8 bytes. 1142 * 1143 * 2. Another issue is related to 82544 driving DACs under the similar 1144 * scenario (32 bit split completion followed by 64 bit transaction with 1145 * only 1 cycle turnaround). This issue is still being root caused. We 1146 * think that both of these issues can be avoided if following workaround 1147 * is implemented. It seems DAC issues is related to ending addresses being 1148 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity 1149 * FIFO which does not get flushed due to REQ64# dependency. We will only 1150 * know the full story after it has been simulated successfully by HW team. 1151 * 1152 * WORKAROUND: 1153 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC) 1154 */ 1155 static uint32_t 1156 e1000g_fill_82544_desc(uint64_t address, 1157 size_t length, p_desc_array_t desc_array) 1158 { 1159 /* 1160 * Since issue is sensitive to length and address. 1161 * Let us first check the address... 1162 */ 1163 uint32_t safe_terminator; 1164 1165 if (length <= 4) { 1166 desc_array->descriptor[0].address = address; 1167 desc_array->descriptor[0].length = (uint32_t)length; 1168 desc_array->elements = 1; 1169 return (desc_array->elements); 1170 } 1171 safe_terminator = 1172 (uint32_t)((((uint32_t)address & 0x7) + 1173 (length & 0xF)) & 0xF); 1174 /* 1175 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then 1176 * return 1177 */ 1178 if (safe_terminator == 0 || 1179 (safe_terminator > 4 && safe_terminator < 9) || 1180 (safe_terminator > 0xC && safe_terminator <= 0xF)) { 1181 desc_array->descriptor[0].address = address; 1182 desc_array->descriptor[0].length = (uint32_t)length; 1183 desc_array->elements = 1; 1184 return (desc_array->elements); 1185 } 1186 1187 desc_array->descriptor[0].address = address; 1188 desc_array->descriptor[0].length = length - 4; 1189 desc_array->descriptor[1].address = address + (length - 4); 1190 desc_array->descriptor[1].length = 4; 1191 desc_array->elements = 2; 1192 return (desc_array->elements); 1193 } 1194 1195 static int 1196 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, 1197 mblk_t *mp, boolean_t tx_undersize_flag) 1198 { 1199 size_t len; 1200 size_t len1; 1201 dma_buffer_t *tx_buf; 1202 mblk_t *nmp; 1203 boolean_t finished; 1204 int desc_count; 1205 1206 desc_count = 0; 1207 tx_buf = packet->tx_buf; 1208 len = MBLKL(mp); 1209 1210 ASSERT((tx_buf->len + len) <= tx_buf->size); 1211 1212 if (len > 0) { 1213 bcopy(mp->b_rptr, 1214 tx_buf->address + tx_buf->len, 1215 len); 1216 tx_buf->len += len; 1217 1218 packet->num_mblk_frag++; 1219 } 1220 1221 nmp = mp->b_cont; 1222 if (nmp == NULL) { 1223 finished = B_TRUE; 1224 } else { 1225 len1 = MBLKL(nmp); 1226 if ((tx_buf->len + len1) > tx_buf->size) 1227 finished = B_TRUE; 1228 else if (tx_undersize_flag) 1229 finished = B_FALSE; 1230 else if (len1 > tx_ring->adapter->tx_bcopy_thresh) 1231 finished = B_TRUE; 1232 else 1233 finished = B_FALSE; 1234 } 1235 1236 if (finished) { 1237 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy, 1238 (tx_buf->len > len)); 1239 1240 /* 1241 * If the packet is smaller than 64 bytes, which is the 1242 * minimum ethernet packet size, pad the packet to make 1243 * it at least 60 bytes. The hardware will add 4 bytes 1244 * for CRC. 1245 */ 1246 if (tx_undersize_flag) { 1247 ASSERT(tx_buf->len < ETHERMIN); 1248 1249 bzero(tx_buf->address + tx_buf->len, 1250 ETHERMIN - tx_buf->len); 1251 tx_buf->len = ETHERMIN; 1252 } 1253 1254 #ifdef __sparc 1255 if (packet->dma_type == USE_DVMA) 1256 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV); 1257 else 1258 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1259 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1260 #else 1261 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1262 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1263 #endif 1264 1265 packet->data_transfer_type = USE_BCOPY; 1266 1267 desc_count = e1000g_fill_tx_desc(tx_ring, 1268 packet, 1269 tx_buf->dma_address, 1270 tx_buf->len); 1271 1272 if (desc_count <= 0) 1273 return (-1); 1274 } 1275 1276 return (desc_count); 1277 } 1278 1279 static int 1280 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp) 1281 { 1282 int j; 1283 int mystat; 1284 size_t len; 1285 ddi_dma_cookie_t dma_cookie; 1286 uint_t ncookies; 1287 int desc_count; 1288 uint32_t desc_total; 1289 1290 desc_total = 0; 1291 len = MBLKL(mp); 1292 1293 /* 1294 * ddi_dma_addr_bind_handle() allocates DMA resources for a 1295 * memory object such that a device can perform DMA to or from 1296 * the object. DMA resources are allocated considering the 1297 * device's DMA attributes as expressed by ddi_dma_attr(9S) 1298 * (see ddi_dma_alloc_handle(9F)). 1299 * 1300 * ddi_dma_addr_bind_handle() fills in the first DMA cookie 1301 * pointed to by cookiep with the appropriate address, length, 1302 * and bus type. *ccountp is set to the number of DMA cookies 1303 * representing this DMA object. Subsequent DMA cookies must be 1304 * retrieved by calling ddi_dma_nextcookie(9F) the number of 1305 * times specified by *countp - 1. 1306 */ 1307 switch (packet->dma_type) { 1308 #ifdef __sparc 1309 case USE_DVMA: 1310 dvma_kaddr_load(packet->tx_dma_handle, 1311 (caddr_t)mp->b_rptr, len, 0, &dma_cookie); 1312 1313 dvma_sync(packet->tx_dma_handle, 0, 1314 DDI_DMA_SYNC_FORDEV); 1315 1316 ncookies = 1; 1317 packet->data_transfer_type = USE_DVMA; 1318 break; 1319 #endif 1320 case USE_DMA: 1321 if ((mystat = ddi_dma_addr_bind_handle( 1322 packet->tx_dma_handle, NULL, 1323 (caddr_t)mp->b_rptr, len, 1324 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1325 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1326 &ncookies)) != DDI_DMA_MAPPED) { 1327 1328 e1000g_log(tx_ring->adapter, CE_WARN, 1329 "Couldn't bind mblk buffer to Tx DMA handle: " 1330 "return: %X, Pkt: %X\n", 1331 mystat, packet); 1332 return (-1); 1333 } 1334 1335 /* 1336 * An implicit ddi_dma_sync() is done when the 1337 * ddi_dma_addr_bind_handle() is called. So we 1338 * don't need to explicitly call ddi_dma_sync() 1339 * here any more. 1340 */ 1341 ASSERT(ncookies); 1342 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie, 1343 (ncookies > 1)); 1344 1345 /* 1346 * The data_transfer_type value must be set after the handle 1347 * has been bound, for it will be used in e1000g_free_tx_swpkt() 1348 * to decide whether we need to unbind the handle. 1349 */ 1350 packet->data_transfer_type = USE_DMA; 1351 break; 1352 default: 1353 ASSERT(B_FALSE); 1354 break; 1355 } 1356 1357 packet->num_mblk_frag++; 1358 1359 /* 1360 * Each address could span thru multpile cookie.. 1361 * Each cookie will have one descriptor 1362 */ 1363 for (j = ncookies; j != 0; j--) { 1364 1365 desc_count = e1000g_fill_tx_desc(tx_ring, 1366 packet, 1367 dma_cookie.dmac_laddress, 1368 dma_cookie.dmac_size); 1369 1370 if (desc_count <= 0) 1371 return (-1); 1372 1373 desc_total += desc_count; 1374 1375 /* 1376 * ddi_dma_nextcookie() retrieves subsequent DMA 1377 * cookies for a DMA object. 1378 * ddi_dma_nextcookie() fills in the 1379 * ddi_dma_cookie(9S) structure pointed to by 1380 * cookiep. The ddi_dma_cookie(9S) structure 1381 * must be allocated prior to calling 1382 * ddi_dma_nextcookie(). The DMA cookie count 1383 * returned by ddi_dma_buf_bind_handle(9F), 1384 * ddi_dma_addr_bind_handle(9F), or 1385 * ddi_dma_getwin(9F) indicates the number of DMA 1386 * cookies a DMA object consists of. If the 1387 * resulting cookie count, N, is larger than 1, 1388 * ddi_dma_nextcookie() must be called N-1 times 1389 * to retrieve all DMA cookies. 1390 */ 1391 if (j > 1) { 1392 ddi_dma_nextcookie(packet->tx_dma_handle, 1393 &dma_cookie); 1394 } 1395 } 1396 1397 return (desc_total); 1398 } 1399 1400 static void 1401 e1000g_fill_context_descriptor(context_data_t *cur_context, 1402 struct e1000_context_desc *context_desc) 1403 { 1404 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) { 1405 context_desc->lower_setup.ip_fields.ipcss = 1406 cur_context->ether_header_size; 1407 context_desc->lower_setup.ip_fields.ipcso = 1408 cur_context->ether_header_size + 1409 offsetof(struct ip, ip_sum); 1410 context_desc->lower_setup.ip_fields.ipcse = 1411 cur_context->ether_header_size + 1412 cur_context->cksum_start - 1; 1413 } else 1414 context_desc->lower_setup.ip_config = 0; 1415 1416 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) { 1417 /* 1418 * The packet with same protocol has the following 1419 * stuff and start offset: 1420 * | Protocol | Stuff | Start | Checksum 1421 * | | Offset | Offset | Enable 1422 * | IPv4 + TCP | 0x24 | 0x14 | Yes 1423 * | IPv4 + UDP | 0x1A | 0x14 | Yes 1424 * | IPv6 + TCP | 0x20 | 0x10 | No 1425 * | IPv6 + UDP | 0x14 | 0x10 | No 1426 */ 1427 context_desc->upper_setup.tcp_fields.tucss = 1428 cur_context->cksum_start + cur_context->ether_header_size; 1429 context_desc->upper_setup.tcp_fields.tucso = 1430 cur_context->cksum_stuff + cur_context->ether_header_size; 1431 context_desc->upper_setup.tcp_fields.tucse = 0; 1432 } else 1433 context_desc->upper_setup.tcp_config = 0; 1434 1435 if (cur_context->lso_flag) { 1436 context_desc->tcp_seg_setup.fields.mss = cur_context->mss; 1437 context_desc->tcp_seg_setup.fields.hdr_len = 1438 cur_context->hdr_len; 1439 /* 1440 * workaround for 82546EB errata 23, status-writeback 1441 * reporting (RS) should not be set on context or 1442 * Null descriptors 1443 */ 1444 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1445 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP 1446 | E1000_TXD_DTYP_C | cur_context->pay_len; 1447 } else { 1448 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1449 | E1000_TXD_DTYP_C; 1450 /* 1451 * Zero out the options for TCP Segmentation Offload 1452 */ 1453 context_desc->tcp_seg_setup.data = 0; 1454 } 1455 } 1456 1457 static int 1458 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring, 1459 p_tx_sw_packet_t packet, uint64_t address, size_t size) 1460 { 1461 struct e1000_hw *hw = &tx_ring->adapter->shared; 1462 p_sw_desc_t desc; 1463 1464 if (hw->mac.type == e1000_82544) { 1465 if (hw->bus.type == e1000_bus_type_pcix) 1466 return (e1000g_tx_workaround_PCIX_82544(packet, 1467 address, size)); 1468 1469 if (size > JUMBO_FRAG_LENGTH) 1470 return (e1000g_tx_workaround_jumbo_82544(packet, 1471 address, size)); 1472 } 1473 1474 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1475 1476 desc = &packet->desc[packet->num_desc]; 1477 desc->address = address; 1478 desc->length = (uint32_t)size; 1479 1480 packet->num_desc++; 1481 1482 return (1); 1483 } 1484 1485 static int 1486 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet, 1487 uint64_t address, size_t size) 1488 { 1489 p_sw_desc_t desc; 1490 int desc_count; 1491 long size_left; 1492 size_t len; 1493 uint32_t counter; 1494 uint32_t array_elements; 1495 desc_array_t desc_array; 1496 1497 /* 1498 * Coexist Workaround for cordova: RP: 07/04/03 1499 * 1500 * RP: ERRATA: Workaround ISSUE: 1501 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup 1502 * Eachbuffer in to 8kb pieces until the 1503 * remainder is < 8kb 1504 */ 1505 size_left = size; 1506 desc_count = 0; 1507 1508 while (size_left > 0) { 1509 if (size_left > MAX_TX_BUF_SIZE) 1510 len = MAX_TX_BUF_SIZE; 1511 else 1512 len = size_left; 1513 1514 array_elements = e1000g_fill_82544_desc(address, 1515 len, &desc_array); 1516 1517 for (counter = 0; counter < array_elements; counter++) { 1518 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1519 /* 1520 * Put in the buffer address 1521 */ 1522 desc = &packet->desc[packet->num_desc]; 1523 1524 desc->address = 1525 desc_array.descriptor[counter].address; 1526 desc->length = 1527 desc_array.descriptor[counter].length; 1528 1529 packet->num_desc++; 1530 desc_count++; 1531 } /* for */ 1532 1533 /* 1534 * Update the buffer address and length 1535 */ 1536 address += MAX_TX_BUF_SIZE; 1537 size_left -= MAX_TX_BUF_SIZE; 1538 } /* while */ 1539 1540 return (desc_count); 1541 } 1542 1543 static int 1544 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet, 1545 uint64_t address, size_t size) 1546 { 1547 p_sw_desc_t desc; 1548 int desc_count; 1549 long size_left; 1550 uint32_t offset; 1551 1552 /* 1553 * Workaround for Jumbo Frames on Cordova 1554 * PSD 06/01/2001 1555 */ 1556 size_left = size; 1557 desc_count = 0; 1558 offset = 0; 1559 while (size_left > 0) { 1560 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1561 1562 desc = &packet->desc[packet->num_desc]; 1563 1564 desc->address = address + offset; 1565 1566 if (size_left > JUMBO_FRAG_LENGTH) 1567 desc->length = JUMBO_FRAG_LENGTH; 1568 else 1569 desc->length = (uint32_t)size_left; 1570 1571 packet->num_desc++; 1572 desc_count++; 1573 1574 offset += desc->length; 1575 size_left -= JUMBO_FRAG_LENGTH; 1576 } 1577 1578 return (desc_count); 1579 } 1580 1581 #pragma inline(e1000g_82547_tx_move_tail_work) 1582 1583 static void 1584 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring) 1585 { 1586 struct e1000_hw *hw; 1587 uint16_t hw_tdt; 1588 uint16_t sw_tdt; 1589 struct e1000_tx_desc *tx_desc; 1590 uint16_t length = 0; 1591 boolean_t eop = B_FALSE; 1592 struct e1000g *Adapter; 1593 1594 Adapter = tx_ring->adapter; 1595 hw = &Adapter->shared; 1596 1597 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0)); 1598 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first; 1599 1600 while (hw_tdt != sw_tdt) { 1601 tx_desc = &(tx_ring->tbd_first[hw_tdt]); 1602 length += tx_desc->lower.flags.length; 1603 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; 1604 if (++hw_tdt == Adapter->tx_desc_num) 1605 hw_tdt = 0; 1606 1607 if (eop) { 1608 if ((Adapter->link_duplex == HALF_DUPLEX) && 1609 (e1000_fifo_workaround_82547(hw, length) 1610 != E1000_SUCCESS)) { 1611 if (tx_ring->timer_enable_82547) { 1612 ASSERT(tx_ring->timer_id_82547 == 0); 1613 tx_ring->timer_id_82547 = 1614 timeout(e1000g_82547_timeout, 1615 (void *)tx_ring, 1616 drv_usectohz(10000)); 1617 } 1618 return; 1619 1620 } else { 1621 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt); 1622 e1000_update_tx_fifo_head_82547(hw, length); 1623 length = 0; 1624 } 1625 } 1626 } 1627 } 1628 1629 static void 1630 e1000g_82547_timeout(void *arg) 1631 { 1632 e1000g_tx_ring_t *tx_ring; 1633 1634 tx_ring = (e1000g_tx_ring_t *)arg; 1635 1636 mutex_enter(&tx_ring->tx_lock); 1637 1638 tx_ring->timer_id_82547 = 0; 1639 e1000g_82547_tx_move_tail_work(tx_ring); 1640 1641 mutex_exit(&tx_ring->tx_lock); 1642 } 1643 1644 static void 1645 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring) 1646 { 1647 timeout_id_t tid; 1648 1649 ASSERT(MUTEX_HELD(&tx_ring->tx_lock)); 1650 1651 tid = tx_ring->timer_id_82547; 1652 tx_ring->timer_id_82547 = 0; 1653 if (tid != 0) { 1654 tx_ring->timer_enable_82547 = B_FALSE; 1655 mutex_exit(&tx_ring->tx_lock); 1656 1657 (void) untimeout(tid); 1658 1659 mutex_enter(&tx_ring->tx_lock); 1660 } 1661 tx_ring->timer_enable_82547 = B_TRUE; 1662 e1000g_82547_tx_move_tail_work(tx_ring); 1663 } 1664