1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * ********************************************************************** 28 * * 29 * Module Name: * 30 * e1000g_tx.c * 31 * * 32 * Abstract: * 33 * This file contains some routines that take care of Transmit, * 34 * make the hardware to send the data pointed by the packet out * 35 * on to the physical medium. * 36 * * 37 * ********************************************************************** 38 */ 39 40 #include "e1000g_sw.h" 41 #include "e1000g_debug.h" 42 43 static boolean_t e1000g_send(struct e1000g *, mblk_t *); 44 static int e1000g_tx_copy(e1000g_tx_ring_t *, 45 p_tx_sw_packet_t, mblk_t *, boolean_t); 46 static int e1000g_tx_bind(e1000g_tx_ring_t *, 47 p_tx_sw_packet_t, mblk_t *); 48 static boolean_t e1000g_retrieve_context(mblk_t *, context_data_t *, size_t); 49 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *); 50 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *, 51 context_data_t *); 52 static void e1000g_fill_context_descriptor(context_data_t *, 53 struct e1000_context_desc *); 54 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *, 55 p_tx_sw_packet_t, uint64_t, size_t); 56 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length, 57 p_desc_array_t desc_array); 58 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t); 59 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t); 60 static void e1000g_82547_timeout(void *); 61 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *); 62 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *); 63 64 #ifndef E1000G_DEBUG 65 #pragma inline(e1000g_tx_copy) 66 #pragma inline(e1000g_tx_bind) 67 #pragma inline(e1000g_retrieve_context) 68 #pragma inline(e1000g_check_context) 69 #pragma inline(e1000g_fill_tx_ring) 70 #pragma inline(e1000g_fill_context_descriptor) 71 #pragma inline(e1000g_fill_tx_desc) 72 #pragma inline(e1000g_fill_82544_desc) 73 #pragma inline(e1000g_tx_workaround_PCIX_82544) 74 #pragma inline(e1000g_tx_workaround_jumbo_82544) 75 #pragma inline(e1000g_free_tx_swpkt) 76 #endif 77 78 /* 79 * e1000g_free_tx_swpkt - free up the tx sw packet 80 * 81 * Unbind the previously bound DMA handle for a given 82 * transmit sw packet. And reset the sw packet data. 83 */ 84 void 85 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet) 86 { 87 switch (packet->data_transfer_type) { 88 case USE_BCOPY: 89 packet->tx_buf->len = 0; 90 break; 91 #ifdef __sparc 92 case USE_DVMA: 93 dvma_unload(packet->tx_dma_handle, 0, -1); 94 break; 95 #endif 96 case USE_DMA: 97 (void) ddi_dma_unbind_handle(packet->tx_dma_handle); 98 break; 99 default: 100 break; 101 } 102 103 /* 104 * The mblk has been stripped off the sw packet 105 * and will be freed in a triggered soft intr. 106 */ 107 ASSERT(packet->mp == NULL); 108 109 packet->data_transfer_type = USE_NONE; 110 packet->num_mblk_frag = 0; 111 packet->num_desc = 0; 112 } 113 114 mblk_t * 115 e1000g_m_tx(void *arg, mblk_t *mp) 116 { 117 struct e1000g *Adapter = (struct e1000g *)arg; 118 mblk_t *next; 119 120 rw_enter(&Adapter->chip_lock, RW_READER); 121 122 if ((Adapter->e1000g_state & E1000G_SUSPENDED) || 123 !(Adapter->e1000g_state & E1000G_STARTED) || 124 (Adapter->link_state != LINK_STATE_UP)) { 125 freemsgchain(mp); 126 mp = NULL; 127 } 128 129 while (mp != NULL) { 130 next = mp->b_next; 131 mp->b_next = NULL; 132 133 if (!e1000g_send(Adapter, mp)) { 134 mp->b_next = next; 135 break; 136 } 137 138 mp = next; 139 } 140 141 rw_exit(&Adapter->chip_lock); 142 return (mp); 143 } 144 145 /* 146 * e1000g_send - send packets onto the wire 147 * 148 * Called from e1000g_m_tx with an mblk ready to send. this 149 * routine sets up the transmit descriptors and sends data to 150 * the wire. It also pushes the just transmitted packet to 151 * the used tx sw packet list. 152 */ 153 static boolean_t 154 e1000g_send(struct e1000g *Adapter, mblk_t *mp) 155 { 156 p_tx_sw_packet_t packet; 157 LIST_DESCRIBER pending_list; 158 size_t len; 159 size_t msg_size; 160 uint32_t frag_count; 161 int desc_count; 162 uint32_t desc_total; 163 uint32_t bcopy_thresh; 164 uint32_t hdr_frag_len; 165 boolean_t tx_undersize_flag; 166 mblk_t *nmp; 167 mblk_t *tmp; 168 mblk_t *new_mp; 169 mblk_t *pre_mp; 170 mblk_t *next_mp; 171 e1000g_tx_ring_t *tx_ring; 172 context_data_t cur_context; 173 174 tx_ring = Adapter->tx_ring; 175 bcopy_thresh = Adapter->tx_bcopy_thresh; 176 177 /* Get the total size and frags number of the message */ 178 tx_undersize_flag = B_FALSE; 179 frag_count = 0; 180 msg_size = 0; 181 for (nmp = mp; nmp; nmp = nmp->b_cont) { 182 frag_count++; 183 msg_size += MBLKL(nmp); 184 } 185 186 /* retrieve and compute information for context descriptor */ 187 if (!e1000g_retrieve_context(mp, &cur_context, msg_size)) { 188 freemsg(mp); 189 return (B_TRUE); 190 } 191 192 /* 193 * Make sure the packet is less than the allowed size 194 */ 195 if (!cur_context.lso_flag && 196 (msg_size > Adapter->max_frame_size - ETHERFCSL)) { 197 /* 198 * For the over size packet, we'll just drop it. 199 * So we return B_TRUE here. 200 */ 201 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL, 202 "Tx packet out of bound. length = %d \n", msg_size); 203 E1000G_STAT(tx_ring->stat_over_size); 204 freemsg(mp); 205 return (B_TRUE); 206 } 207 208 /* 209 * Check and reclaim tx descriptors. 210 * This low water mark check should be done all the time as 211 * Transmit interrupt delay can produce Transmit interrupts little 212 * late and that may cause few problems related to reaping Tx 213 * Descriptors... As you may run short of them before getting any 214 * transmit interrupt... 215 */ 216 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 217 (void) e1000g_recycle(tx_ring); 218 E1000G_DEBUG_STAT(tx_ring->stat_recycle); 219 220 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 221 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc); 222 goto tx_no_resource; 223 } 224 } 225 226 /* 227 * If the message size is less than the minimum ethernet packet size, 228 * we'll use bcopy to send it, and padd it to 60 bytes later. 229 */ 230 if (msg_size < ETHERMIN) { 231 E1000G_DEBUG_STAT(tx_ring->stat_under_size); 232 tx_undersize_flag = B_TRUE; 233 } 234 235 /* Initialize variables */ 236 desc_count = 1; /* The initial value should be greater than 0 */ 237 desc_total = 0; 238 new_mp = NULL; 239 QUEUE_INIT_LIST(&pending_list); 240 241 /* Process each mblk fragment and fill tx descriptors */ 242 /* 243 * The software should guarantee LSO packet header(MAC+IP+TCP) 244 * to be within one descriptor. Here we reallocate and refill the 245 * the header if it's physical memory non-contiguous. 246 */ 247 if (cur_context.lso_flag) { 248 /* find the last fragment of the header */ 249 len = MBLKL(mp); 250 ASSERT(len > 0); 251 next_mp = mp; 252 pre_mp = NULL; 253 while (len < cur_context.hdr_len) { 254 pre_mp = next_mp; 255 next_mp = next_mp->b_cont; 256 len += MBLKL(next_mp); 257 } 258 /* 259 * If the header and the payload are in different mblks, 260 * we simply force the header to be copied into pre-allocated 261 * page-aligned buffer. 262 */ 263 if (len == cur_context.hdr_len) 264 goto adjust_threshold; 265 266 hdr_frag_len = cur_context.hdr_len - (len - MBLKL(next_mp)); 267 /* 268 * There are two cases we need to reallocate a mblk for the 269 * last header fragment: 270 * 1. the header is in multiple mblks and the last fragment 271 * share the same mblk with the payload 272 * 2. the header is in a single mblk shared with the payload 273 * and the header is physical memory non-contiguous 274 */ 275 if ((next_mp != mp) || 276 (P2NPHASE((uintptr_t)next_mp->b_rptr, Adapter->sys_page_sz) 277 < cur_context.hdr_len)) { 278 E1000G_DEBUG_STAT(tx_ring->stat_lso_header_fail); 279 /* 280 * reallocate the mblk for the last header fragment, 281 * expect to bcopy into pre-allocated page-aligned 282 * buffer 283 */ 284 new_mp = allocb(hdr_frag_len, NULL); 285 if (!new_mp) 286 return (B_FALSE); 287 bcopy(next_mp->b_rptr, new_mp->b_rptr, hdr_frag_len); 288 /* link the new header fragment with the other parts */ 289 new_mp->b_wptr = new_mp->b_rptr + hdr_frag_len; 290 new_mp->b_cont = next_mp; 291 if (pre_mp) 292 pre_mp->b_cont = new_mp; 293 else 294 mp = new_mp; 295 next_mp->b_rptr += hdr_frag_len; 296 frag_count++; 297 } 298 adjust_threshold: 299 /* 300 * adjust the bcopy threshhold to guarantee 301 * the header to use bcopy way 302 */ 303 if (bcopy_thresh < cur_context.hdr_len) 304 bcopy_thresh = cur_context.hdr_len; 305 } 306 307 packet = NULL; 308 nmp = mp; 309 while (nmp) { 310 tmp = nmp->b_cont; 311 312 len = MBLKL(nmp); 313 /* Check zero length mblks */ 314 if (len == 0) { 315 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags); 316 /* 317 * If there're no packet buffers have been used, 318 * or we just completed processing a buffer, then 319 * skip the empty mblk fragment. 320 * Otherwise, there's still a pending buffer that 321 * needs to be processed (tx_copy). 322 */ 323 if (desc_count > 0) { 324 nmp = tmp; 325 continue; 326 } 327 } 328 329 /* 330 * Get a new TxSwPacket to process mblk buffers. 331 */ 332 if (desc_count > 0) { 333 mutex_enter(&tx_ring->freelist_lock); 334 packet = (p_tx_sw_packet_t) 335 QUEUE_POP_HEAD(&tx_ring->free_list); 336 mutex_exit(&tx_ring->freelist_lock); 337 338 if (packet == NULL) { 339 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 340 "No Tx SwPacket available\n"); 341 E1000G_STAT(tx_ring->stat_no_swpkt); 342 goto tx_send_failed; 343 } 344 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 345 } 346 347 ASSERT(packet); 348 /* 349 * If the size of the fragment is less than the tx_bcopy_thresh 350 * we'll use bcopy; Otherwise, we'll use DMA binding. 351 */ 352 if ((len <= bcopy_thresh) || tx_undersize_flag) { 353 desc_count = 354 e1000g_tx_copy(tx_ring, packet, nmp, 355 tx_undersize_flag); 356 E1000G_DEBUG_STAT(tx_ring->stat_copy); 357 } else { 358 desc_count = 359 e1000g_tx_bind(tx_ring, packet, nmp); 360 E1000G_DEBUG_STAT(tx_ring->stat_bind); 361 } 362 363 if (desc_count > 0) 364 desc_total += desc_count; 365 else if (desc_count < 0) 366 goto tx_send_failed; 367 368 nmp = tmp; 369 } 370 371 /* Assign the message to the last sw packet */ 372 ASSERT(packet); 373 ASSERT(packet->mp == NULL); 374 packet->mp = mp; 375 376 /* Try to recycle the tx descriptors again */ 377 if (tx_ring->tbd_avail < (desc_total + 2)) { 378 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry); 379 (void) e1000g_recycle(tx_ring); 380 } 381 382 mutex_enter(&tx_ring->tx_lock); 383 384 /* 385 * If the number of available tx descriptors is not enough for transmit 386 * (one redundant descriptor and one hw checksum context descriptor are 387 * included), then return failure. 388 */ 389 if (tx_ring->tbd_avail < (desc_total + 2)) { 390 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 391 "No Enough Tx descriptors\n"); 392 E1000G_STAT(tx_ring->stat_no_desc); 393 mutex_exit(&tx_ring->tx_lock); 394 goto tx_send_failed; 395 } 396 397 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context); 398 399 mutex_exit(&tx_ring->tx_lock); 400 401 ASSERT(desc_count > 0); 402 403 /* Send successful */ 404 return (B_TRUE); 405 406 tx_send_failed: 407 /* Restore mp to original */ 408 if (new_mp) { 409 if (pre_mp) { 410 pre_mp->b_cont = next_mp; 411 } 412 new_mp->b_cont = NULL; 413 freemsg(new_mp); 414 415 next_mp->b_rptr -= hdr_frag_len; 416 } 417 418 /* 419 * Enable Transmit interrupts, so that the interrupt routine can 420 * call mac_tx_update() when transmit descriptors become available. 421 */ 422 tx_ring->resched_timestamp = ddi_get_lbolt(); 423 tx_ring->resched_needed = B_TRUE; 424 if (!Adapter->tx_intr_enable) 425 e1000g_mask_tx_interrupt(Adapter); 426 427 /* Free pending TxSwPackets */ 428 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 429 while (packet) { 430 packet->mp = NULL; 431 e1000g_free_tx_swpkt(packet); 432 packet = (p_tx_sw_packet_t) 433 QUEUE_GET_NEXT(&pending_list, &packet->Link); 434 } 435 436 /* Return pending TxSwPackets to the "Free" list */ 437 mutex_enter(&tx_ring->freelist_lock); 438 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 439 mutex_exit(&tx_ring->freelist_lock); 440 441 E1000G_STAT(tx_ring->stat_send_fail); 442 443 /* Message will be scheduled for re-transmit */ 444 return (B_FALSE); 445 446 tx_no_resource: 447 /* 448 * Enable Transmit interrupts, so that the interrupt routine can 449 * call mac_tx_update() when transmit descriptors become available. 450 */ 451 tx_ring->resched_timestamp = ddi_get_lbolt(); 452 tx_ring->resched_needed = B_TRUE; 453 if (!Adapter->tx_intr_enable) 454 e1000g_mask_tx_interrupt(Adapter); 455 456 /* Message will be scheduled for re-transmit */ 457 return (B_FALSE); 458 } 459 460 static boolean_t 461 e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context, 462 size_t msg_size) 463 { 464 uintptr_t ip_start; 465 uintptr_t tcp_start; 466 mblk_t *nmp; 467 uint32_t lsoflags; 468 uint32_t mss; 469 470 bzero(cur_context, sizeof (context_data_t)); 471 472 /* first check lso information */ 473 lso_info_get(mp, &mss, &lsoflags); 474 475 /* retrieve checksum info */ 476 hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, 477 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); 478 /* retrieve ethernet header size */ 479 if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == 480 htons(ETHERTYPE_VLAN)) 481 cur_context->ether_header_size = 482 sizeof (struct ether_vlan_header); 483 else 484 cur_context->ether_header_size = 485 sizeof (struct ether_header); 486 487 if (lsoflags & HW_LSO) { 488 ASSERT(mss != 0); 489 490 /* free the invalid packet */ 491 if (mss == 0 || 492 !((cur_context->cksum_flags & HCK_PARTIALCKSUM) && 493 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) { 494 return (B_FALSE); 495 } 496 cur_context->mss = (uint16_t)mss; 497 cur_context->lso_flag = B_TRUE; 498 499 /* 500 * Some fields are cleared for the hardware to fill 501 * in. We don't assume Ethernet header, IP header and 502 * TCP header are always in the same mblk fragment, 503 * while we assume each header is always within one 504 * mblk fragment and Ethernet header is always in the 505 * first mblk fragment. 506 */ 507 nmp = mp; 508 ip_start = (uintptr_t)(nmp->b_rptr) 509 + cur_context->ether_header_size; 510 if (ip_start >= (uintptr_t)(nmp->b_wptr)) { 511 ip_start = (uintptr_t)nmp->b_cont->b_rptr 512 + (ip_start - (uintptr_t)(nmp->b_wptr)); 513 nmp = nmp->b_cont; 514 } 515 tcp_start = ip_start + 516 IPH_HDR_LENGTH((ipha_t *)ip_start); 517 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) { 518 tcp_start = (uintptr_t)nmp->b_cont->b_rptr 519 + (tcp_start - (uintptr_t)(nmp->b_wptr)); 520 nmp = nmp->b_cont; 521 } 522 cur_context->hdr_len = cur_context->ether_header_size 523 + IPH_HDR_LENGTH((ipha_t *)ip_start) 524 + TCP_HDR_LENGTH((tcph_t *)tcp_start); 525 ((ipha_t *)ip_start)->ipha_length = 0; 526 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0; 527 /* calculate the TCP packet payload length */ 528 cur_context->pay_len = msg_size - cur_context->hdr_len; 529 } 530 return (B_TRUE); 531 } 532 533 static boolean_t 534 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context) 535 { 536 boolean_t context_reload; 537 context_data_t *pre_context; 538 struct e1000g *Adapter; 539 540 context_reload = B_FALSE; 541 pre_context = &tx_ring->pre_context; 542 Adapter = tx_ring->adapter; 543 544 /* 545 * The following code determine if the context descriptor is 546 * needed to be reloaded. The sequence of the conditions is 547 * made by their possibilities of changing. 548 */ 549 /* 550 * workaround for 82546EB, context descriptor must be reloaded 551 * per LSO/hw_cksum packet if LSO is enabled. 552 */ 553 if (Adapter->lso_premature_issue && 554 Adapter->lso_enable && 555 (cur_context->cksum_flags != 0)) { 556 557 context_reload = B_TRUE; 558 } else if (cur_context->lso_flag) { 559 if ((cur_context->lso_flag != pre_context->lso_flag) || 560 (cur_context->cksum_flags != pre_context->cksum_flags) || 561 (cur_context->pay_len != pre_context->pay_len) || 562 (cur_context->mss != pre_context->mss) || 563 (cur_context->hdr_len != pre_context->hdr_len) || 564 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 565 (cur_context->cksum_start != pre_context->cksum_start) || 566 (cur_context->ether_header_size != 567 pre_context->ether_header_size)) { 568 569 context_reload = B_TRUE; 570 } 571 } else if (cur_context->cksum_flags != 0) { 572 if ((cur_context->lso_flag != pre_context->lso_flag) || 573 (cur_context->cksum_flags != pre_context->cksum_flags) || 574 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 575 (cur_context->cksum_start != pre_context->cksum_start) || 576 (cur_context->ether_header_size != 577 pre_context->ether_header_size)) { 578 579 context_reload = B_TRUE; 580 } 581 } 582 583 return (context_reload); 584 } 585 586 static int 587 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list, 588 context_data_t *cur_context) 589 { 590 struct e1000g *Adapter; 591 struct e1000_hw *hw; 592 p_tx_sw_packet_t first_packet; 593 p_tx_sw_packet_t packet; 594 p_tx_sw_packet_t previous_packet; 595 boolean_t context_reload; 596 struct e1000_tx_desc *first_data_desc; 597 struct e1000_tx_desc *next_desc; 598 struct e1000_tx_desc *descriptor; 599 int desc_count; 600 boolean_t buff_overrun_flag; 601 int i; 602 603 Adapter = tx_ring->adapter; 604 hw = &Adapter->shared; 605 606 desc_count = 0; 607 first_packet = NULL; 608 first_data_desc = NULL; 609 descriptor = NULL; 610 first_packet = NULL; 611 packet = NULL; 612 buff_overrun_flag = B_FALSE; 613 614 next_desc = tx_ring->tbd_next; 615 616 /* Context descriptor reload check */ 617 context_reload = e1000g_check_context(tx_ring, cur_context); 618 619 if (context_reload) { 620 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 621 622 descriptor = next_desc; 623 624 e1000g_fill_context_descriptor(cur_context, 625 (struct e1000_context_desc *)descriptor); 626 627 /* Check the wrap-around case */ 628 if (descriptor == tx_ring->tbd_last) 629 next_desc = tx_ring->tbd_first; 630 else 631 next_desc++; 632 633 desc_count++; 634 } 635 636 first_data_desc = next_desc; 637 638 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 639 while (packet) { 640 ASSERT(packet->num_desc); 641 642 for (i = 0; i < packet->num_desc; i++) { 643 ASSERT(tx_ring->tbd_avail > 0); 644 645 descriptor = next_desc; 646 descriptor->buffer_addr = 647 packet->desc[i].address; 648 descriptor->lower.data = 649 packet->desc[i].length; 650 651 /* Zero out status */ 652 descriptor->upper.data = 0; 653 654 descriptor->lower.data |= 655 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 656 /* must set RS on every outgoing descriptor */ 657 descriptor->lower.data |= 658 E1000_TXD_CMD_RS; 659 660 if (cur_context->lso_flag) 661 descriptor->lower.data |= E1000_TXD_CMD_TSE; 662 663 /* Check the wrap-around case */ 664 if (descriptor == tx_ring->tbd_last) 665 next_desc = tx_ring->tbd_first; 666 else 667 next_desc++; 668 669 desc_count++; 670 671 /* 672 * workaround for 82546EB errata 33, hang in PCI-X 673 * systems due to 2k Buffer Overrun during Transmit 674 * Operation. The workaround applies to all the Intel 675 * PCI-X chips. 676 */ 677 if (hw->bus.type == e1000_bus_type_pcix && 678 descriptor == first_data_desc && 679 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) 680 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) { 681 /* modified the first descriptor */ 682 descriptor->lower.data &= 683 ~E1000G_TBD_LENGTH_MASK; 684 descriptor->lower.flags.length = 685 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 686 687 /* insert a new descriptor */ 688 ASSERT(tx_ring->tbd_avail > 0); 689 next_desc->buffer_addr = 690 packet->desc[0].address + 691 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 692 next_desc->lower.data = 693 packet->desc[0].length - 694 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 695 696 /* Zero out status */ 697 next_desc->upper.data = 0; 698 699 next_desc->lower.data |= 700 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 701 /* must set RS on every outgoing descriptor */ 702 next_desc->lower.data |= 703 E1000_TXD_CMD_RS; 704 705 if (cur_context->lso_flag) 706 next_desc->lower.data |= 707 E1000_TXD_CMD_TSE; 708 709 descriptor = next_desc; 710 711 /* Check the wrap-around case */ 712 if (next_desc == tx_ring->tbd_last) 713 next_desc = tx_ring->tbd_first; 714 else 715 next_desc++; 716 717 desc_count++; 718 buff_overrun_flag = B_TRUE; 719 } 720 } 721 722 if (buff_overrun_flag) { 723 packet->num_desc++; 724 buff_overrun_flag = B_FALSE; 725 } 726 727 if (first_packet != NULL) { 728 /* 729 * Count the checksum context descriptor for 730 * the first SwPacket. 731 */ 732 first_packet->num_desc++; 733 first_packet = NULL; 734 } 735 736 packet->tickstamp = lbolt64; 737 738 previous_packet = packet; 739 packet = (p_tx_sw_packet_t) 740 QUEUE_GET_NEXT(pending_list, &packet->Link); 741 } 742 743 /* 744 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back 745 */ 746 if (Adapter->lso_premature_issue && cur_context->lso_flag && 747 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) { 748 /* modified the previous descriptor */ 749 descriptor->lower.data -= 4; 750 751 /* insert a new descriptor */ 752 ASSERT(tx_ring->tbd_avail > 0); 753 /* the lower 20 bits of lower.data is the length field */ 754 next_desc->buffer_addr = 755 descriptor->buffer_addr + 756 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK); 757 next_desc->lower.data = 4; 758 759 /* Zero out status */ 760 next_desc->upper.data = 0; 761 /* It must be part of a LSO packet */ 762 next_desc->lower.data |= 763 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | 764 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE; 765 766 descriptor = next_desc; 767 768 /* Check the wrap-around case */ 769 if (descriptor == tx_ring->tbd_last) 770 next_desc = tx_ring->tbd_first; 771 else 772 next_desc++; 773 774 desc_count++; 775 /* update the number of descriptors */ 776 previous_packet->num_desc++; 777 } 778 779 ASSERT(descriptor); 780 781 if (cur_context->cksum_flags) { 782 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 783 ((struct e1000_data_desc *)first_data_desc)-> 784 upper.fields.popts |= E1000_TXD_POPTS_IXSM; 785 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 786 ((struct e1000_data_desc *)first_data_desc)-> 787 upper.fields.popts |= E1000_TXD_POPTS_TXSM; 788 } 789 790 /* 791 * Last Descriptor of Packet needs End Of Packet (EOP), Report 792 * Status (RS) set. 793 */ 794 if (Adapter->tx_intr_delay) { 795 descriptor->lower.data |= E1000_TXD_CMD_IDE | 796 E1000_TXD_CMD_EOP; 797 } else { 798 descriptor->lower.data |= E1000_TXD_CMD_EOP; 799 } 800 801 /* Set append Ethernet CRC (IFCS) bits */ 802 if (cur_context->lso_flag) { 803 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS; 804 } else { 805 descriptor->lower.data |= E1000_TXD_CMD_IFCS; 806 } 807 808 /* 809 * Sync the Tx descriptors DMA buffer 810 */ 811 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 812 0, 0, DDI_DMA_SYNC_FORDEV); 813 814 tx_ring->tbd_next = next_desc; 815 816 /* 817 * Advance the Transmit Descriptor Tail (Tdt), this tells the 818 * FX1000 that this frame is available to transmit. 819 */ 820 if (hw->mac.type == e1000_82547) 821 e1000g_82547_tx_move_tail(tx_ring); 822 else 823 E1000_WRITE_REG(hw, E1000_TDT(0), 824 (uint32_t)(next_desc - tx_ring->tbd_first)); 825 826 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 827 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 828 Adapter->e1000g_state |= E1000G_ERROR; 829 } 830 831 /* Put the pending SwPackets to the "Used" list */ 832 mutex_enter(&tx_ring->usedlist_lock); 833 QUEUE_APPEND(&tx_ring->used_list, pending_list); 834 tx_ring->tbd_avail -= desc_count; 835 mutex_exit(&tx_ring->usedlist_lock); 836 837 /* update LSO related data */ 838 if (context_reload) 839 tx_ring->pre_context = *cur_context; 840 841 return (desc_count); 842 } 843 844 /* 845 * e1000g_tx_setup - setup tx data structures 846 * 847 * This routine initializes all of the transmit related 848 * structures. This includes the Transmit descriptors, 849 * and the tx_sw_packet structures. 850 */ 851 void 852 e1000g_tx_setup(struct e1000g *Adapter) 853 { 854 struct e1000_hw *hw; 855 p_tx_sw_packet_t packet; 856 uint32_t i; 857 uint32_t buf_high; 858 uint32_t buf_low; 859 uint32_t reg_tipg; 860 uint32_t reg_tctl; 861 int size; 862 e1000g_tx_ring_t *tx_ring; 863 864 hw = &Adapter->shared; 865 tx_ring = Adapter->tx_ring; 866 867 /* init the lists */ 868 /* 869 * Here we don't need to protect the lists using the 870 * usedlist_lock and freelist_lock, for they have 871 * been protected by the chip_lock. 872 */ 873 QUEUE_INIT_LIST(&tx_ring->used_list); 874 QUEUE_INIT_LIST(&tx_ring->free_list); 875 876 /* Go through and set up each SW_Packet */ 877 packet = tx_ring->packet_area; 878 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) { 879 /* Initialize this tx_sw_apcket area */ 880 e1000g_free_tx_swpkt(packet); 881 /* Add this tx_sw_packet to the free list */ 882 QUEUE_PUSH_TAIL(&tx_ring->free_list, 883 &packet->Link); 884 } 885 886 /* Setup TX descriptor pointers */ 887 tx_ring->tbd_next = tx_ring->tbd_first; 888 tx_ring->tbd_oldest = tx_ring->tbd_first; 889 890 /* 891 * Setup Hardware TX Registers 892 */ 893 /* Setup the Transmit Control Register (TCTL). */ 894 reg_tctl = E1000_READ_REG(hw, E1000_TCTL); 895 reg_tctl |= E1000_TCTL_PSP | E1000_TCTL_EN | 896 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) | 897 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) | 898 E1000_TCTL_RTLC; 899 900 /* Enable the MULR bit */ 901 if (hw->bus.type == e1000_bus_type_pci_express) 902 reg_tctl |= E1000_TCTL_MULR; 903 904 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl); 905 906 /* Setup HW Base and Length of Tx descriptor area */ 907 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc)); 908 E1000_WRITE_REG(hw, E1000_TDLEN(0), size); 909 size = E1000_READ_REG(hw, E1000_TDLEN(0)); 910 911 buf_low = (uint32_t)tx_ring->tbd_dma_addr; 912 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32); 913 914 /* 915 * Write the highest location first and work backward to the lowest. 916 * This is necessary for some adapter types to 917 * prevent write combining from occurring. 918 */ 919 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high); 920 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low); 921 922 /* Setup our HW Tx Head & Tail descriptor pointers */ 923 E1000_WRITE_REG(hw, E1000_TDH(0), 0); 924 E1000_WRITE_REG(hw, E1000_TDT(0), 0); 925 926 /* Set the default values for the Tx Inter Packet Gap timer */ 927 if ((hw->mac.type == e1000_82542) && 928 ((hw->revision_id == E1000_REVISION_2) || 929 (hw->revision_id == E1000_REVISION_3))) { 930 reg_tipg = DEFAULT_82542_TIPG_IPGT; 931 reg_tipg |= 932 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 933 reg_tipg |= 934 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 935 } else if (hw->mac.type == e1000_80003es2lan) { 936 reg_tipg = DEFAULT_82543_TIPG_IPGR1; 937 reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 938 E1000_TIPG_IPGR2_SHIFT; 939 } else { 940 if (hw->phy.media_type == e1000_media_type_fiber) 941 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 942 else 943 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 944 reg_tipg |= 945 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 946 reg_tipg |= 947 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 948 } 949 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg); 950 951 /* Setup Transmit Interrupt Delay Value */ 952 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay); 953 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 954 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay); 955 956 if (hw->mac.type >= e1000_82540) { 957 E1000_WRITE_REG(&Adapter->shared, E1000_TADV, 958 Adapter->tx_intr_abs_delay); 959 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 960 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay); 961 } 962 963 tx_ring->tbd_avail = Adapter->tx_desc_num; 964 965 /* Initialize stored context information */ 966 bzero(&(tx_ring->pre_context), sizeof (context_data_t)); 967 } 968 969 /* 970 * e1000g_recycle - recycle the tx descriptors and tx sw packets 971 */ 972 int 973 e1000g_recycle(e1000g_tx_ring_t *tx_ring) 974 { 975 struct e1000g *Adapter; 976 LIST_DESCRIBER pending_list; 977 p_tx_sw_packet_t packet; 978 mblk_t *mp; 979 mblk_t *nmp; 980 struct e1000_tx_desc *descriptor; 981 int desc_count; 982 int64_t delta; 983 984 /* 985 * This function will examine each TxSwPacket in the 'used' queue 986 * if the e1000g is done with it then the associated resources (Tx 987 * Descriptors) will be "freed" and the TxSwPacket will be 988 * returned to the 'free' queue. 989 */ 990 Adapter = tx_ring->adapter; 991 delta = 0; 992 993 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list); 994 if (packet == NULL) { 995 Adapter->stall_flag = B_FALSE; 996 return (0); 997 } 998 999 desc_count = 0; 1000 QUEUE_INIT_LIST(&pending_list); 1001 1002 /* Sync the Tx descriptor DMA buffer */ 1003 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 1004 0, 0, DDI_DMA_SYNC_FORKERNEL); 1005 if (e1000g_check_dma_handle( 1006 tx_ring->tbd_dma_handle) != DDI_FM_OK) { 1007 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 1008 Adapter->e1000g_state |= E1000G_ERROR; 1009 return (0); 1010 } 1011 1012 /* 1013 * While there are still TxSwPackets in the used queue check them 1014 */ 1015 mutex_enter(&tx_ring->usedlist_lock); 1016 while ((packet = 1017 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) != NULL) { 1018 1019 /* 1020 * Get hold of the next descriptor that the e1000g will 1021 * report status back to (this will be the last descriptor 1022 * of a given sw packet). We only want to free the 1023 * sw packet (and it resources) if the e1000g is done 1024 * with ALL of the descriptors. If the e1000g is done 1025 * with the last one then it is done with all of them. 1026 */ 1027 ASSERT(packet->num_desc); 1028 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1); 1029 1030 /* Check for wrap case */ 1031 if (descriptor > tx_ring->tbd_last) 1032 descriptor -= Adapter->tx_desc_num; 1033 1034 /* 1035 * If the descriptor done bit is set free TxSwPacket and 1036 * associated resources 1037 */ 1038 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) { 1039 QUEUE_POP_HEAD(&tx_ring->used_list); 1040 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 1041 1042 if (descriptor == tx_ring->tbd_last) 1043 tx_ring->tbd_oldest = 1044 tx_ring->tbd_first; 1045 else 1046 tx_ring->tbd_oldest = 1047 descriptor + 1; 1048 1049 desc_count += packet->num_desc; 1050 } else { 1051 /* 1052 * Found a sw packet that the e1000g is not done 1053 * with then there is no reason to check the rest 1054 * of the queue. 1055 */ 1056 delta = lbolt64 - packet->tickstamp; 1057 break; 1058 } 1059 } 1060 1061 tx_ring->tbd_avail += desc_count; 1062 Adapter->tx_pkt_cnt += desc_count; 1063 1064 mutex_exit(&tx_ring->usedlist_lock); 1065 1066 if (desc_count == 0) { 1067 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none); 1068 /* 1069 * If the packet hasn't been sent out for seconds and 1070 * the transmitter is not under paused flowctrl condition, 1071 * the transmitter is considered to be stalled. 1072 */ 1073 if ((delta > Adapter->stall_threshold) && 1074 !(E1000_READ_REG(&Adapter->shared, 1075 E1000_STATUS) & E1000_STATUS_TXOFF)) { 1076 Adapter->stall_flag = B_TRUE; 1077 } 1078 return (0); 1079 } 1080 1081 Adapter->stall_flag = B_FALSE; 1082 1083 mp = NULL; 1084 nmp = NULL; 1085 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 1086 ASSERT(packet != NULL); 1087 while (packet != NULL) { 1088 if (packet->mp != NULL) { 1089 ASSERT(packet->mp->b_next == NULL); 1090 /* Assemble the message chain */ 1091 if (mp == NULL) { 1092 mp = packet->mp; 1093 nmp = packet->mp; 1094 } else { 1095 nmp->b_next = packet->mp; 1096 nmp = packet->mp; 1097 } 1098 /* Disconnect the message from the sw packet */ 1099 packet->mp = NULL; 1100 } 1101 1102 /* Free the TxSwPackets */ 1103 e1000g_free_tx_swpkt(packet); 1104 1105 packet = (p_tx_sw_packet_t) 1106 QUEUE_GET_NEXT(&pending_list, &packet->Link); 1107 } 1108 1109 /* Return the TxSwPackets back to the FreeList */ 1110 mutex_enter(&tx_ring->freelist_lock); 1111 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 1112 mutex_exit(&tx_ring->freelist_lock); 1113 1114 if (mp != NULL) 1115 freemsgchain(mp); 1116 1117 return (desc_count); 1118 } 1119 /* 1120 * 82544 Coexistence issue workaround: 1121 * There are 2 issues. 1122 * 1. If a 32 bit split completion happens from P64H2 and another 1123 * agent drives a 64 bit request/split completion after ONLY 1124 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then 1125 * 82544 has a problem where in to clock all the data in, it 1126 * looks at REQ64# signal and since it has changed so fast (i.e. 1 1127 * idle clock turn around), it will fail to clock all the data in. 1128 * Data coming from certain ending addresses has exposure to this issue. 1129 * 1130 * To detect this issue, following equation can be used... 1131 * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. 1132 * If SUM[3:0] is in between 1 to 4, we will have this issue. 1133 * 1134 * ROOT CAUSE: 1135 * The erratum involves the 82544 PCIX elasticity FIFO implementations as 1136 * 64-bit FIFO's and flushing of the final partial-bytes corresponding 1137 * to the end of a requested read burst. Under a specific burst condition 1138 * of ending-data alignment and 32-byte split-completions, the final 1139 * byte(s) of split-completion data require an extra clock cycle to flush 1140 * into 64-bit FIFO orientation. An incorrect logic dependency on the 1141 * REQ64# signal occurring during during this clock cycle may cause the 1142 * residual byte(s) to be lost, thereby rendering the internal DMA client 1143 * forever awaiting the final byte(s) for an outbound data-fetch. The 1144 * erratum is confirmed to *only* occur if certain subsequent external 1145 * 64-bit PCIX bus transactions occur immediately (minimum possible bus 1146 * turn- around) following the odd-aligned 32-bit split-completion 1147 * containing the final byte(s). Intel has confirmed that this has been 1148 * seen only with chipset/bridges which have the capability to provide 1149 * 32-bit split-completion data, and in the presence of newer PCIX bus 1150 * agents which fully-optimize the inter-transaction turn-around (zero 1151 * additional initiator latency when pre-granted bus ownership). 1152 * 1153 * This issue does not exist in PCI bus mode, when any agent is operating 1154 * in 32 bit only mode or on chipsets that do not do 32 bit split 1155 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does 1156 * 32 bit split completions for any read request that has bit 2 set to 1 1157 * for the requested address and read request size is more than 8 bytes. 1158 * 1159 * 2. Another issue is related to 82544 driving DACs under the similar 1160 * scenario (32 bit split completion followed by 64 bit transaction with 1161 * only 1 cycle turnaround). This issue is still being root caused. We 1162 * think that both of these issues can be avoided if following workaround 1163 * is implemented. It seems DAC issues is related to ending addresses being 1164 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity 1165 * FIFO which does not get flushed due to REQ64# dependency. We will only 1166 * know the full story after it has been simulated successfully by HW team. 1167 * 1168 * WORKAROUND: 1169 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC) 1170 */ 1171 static uint32_t 1172 e1000g_fill_82544_desc(uint64_t address, 1173 size_t length, p_desc_array_t desc_array) 1174 { 1175 /* 1176 * Since issue is sensitive to length and address. 1177 * Let us first check the address... 1178 */ 1179 uint32_t safe_terminator; 1180 1181 if (length <= 4) { 1182 desc_array->descriptor[0].address = address; 1183 desc_array->descriptor[0].length = (uint32_t)length; 1184 desc_array->elements = 1; 1185 return (desc_array->elements); 1186 } 1187 safe_terminator = 1188 (uint32_t)((((uint32_t)address & 0x7) + 1189 (length & 0xF)) & 0xF); 1190 /* 1191 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then 1192 * return 1193 */ 1194 if (safe_terminator == 0 || 1195 (safe_terminator > 4 && safe_terminator < 9) || 1196 (safe_terminator > 0xC && safe_terminator <= 0xF)) { 1197 desc_array->descriptor[0].address = address; 1198 desc_array->descriptor[0].length = (uint32_t)length; 1199 desc_array->elements = 1; 1200 return (desc_array->elements); 1201 } 1202 1203 desc_array->descriptor[0].address = address; 1204 desc_array->descriptor[0].length = length - 4; 1205 desc_array->descriptor[1].address = address + (length - 4); 1206 desc_array->descriptor[1].length = 4; 1207 desc_array->elements = 2; 1208 return (desc_array->elements); 1209 } 1210 1211 static int 1212 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, 1213 mblk_t *mp, boolean_t tx_undersize_flag) 1214 { 1215 size_t len; 1216 size_t len1; 1217 dma_buffer_t *tx_buf; 1218 mblk_t *nmp; 1219 boolean_t finished; 1220 int desc_count; 1221 1222 desc_count = 0; 1223 tx_buf = packet->tx_buf; 1224 len = MBLKL(mp); 1225 1226 ASSERT((tx_buf->len + len) <= tx_buf->size); 1227 1228 if (len > 0) { 1229 bcopy(mp->b_rptr, 1230 tx_buf->address + tx_buf->len, 1231 len); 1232 tx_buf->len += len; 1233 1234 packet->num_mblk_frag++; 1235 } 1236 1237 nmp = mp->b_cont; 1238 if (nmp == NULL) { 1239 finished = B_TRUE; 1240 } else { 1241 len1 = MBLKL(nmp); 1242 if ((tx_buf->len + len1) > tx_buf->size) 1243 finished = B_TRUE; 1244 else if (tx_undersize_flag) 1245 finished = B_FALSE; 1246 else if (len1 > tx_ring->adapter->tx_bcopy_thresh) 1247 finished = B_TRUE; 1248 else 1249 finished = B_FALSE; 1250 } 1251 1252 if (finished) { 1253 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy, 1254 (tx_buf->len > len)); 1255 1256 /* 1257 * If the packet is smaller than 64 bytes, which is the 1258 * minimum ethernet packet size, pad the packet to make 1259 * it at least 60 bytes. The hardware will add 4 bytes 1260 * for CRC. 1261 */ 1262 if (tx_undersize_flag) { 1263 ASSERT(tx_buf->len < ETHERMIN); 1264 1265 bzero(tx_buf->address + tx_buf->len, 1266 ETHERMIN - tx_buf->len); 1267 tx_buf->len = ETHERMIN; 1268 } 1269 1270 #ifdef __sparc 1271 if (packet->dma_type == USE_DVMA) 1272 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV); 1273 else 1274 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1275 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1276 #else 1277 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1278 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1279 #endif 1280 1281 packet->data_transfer_type = USE_BCOPY; 1282 1283 desc_count = e1000g_fill_tx_desc(tx_ring, 1284 packet, 1285 tx_buf->dma_address, 1286 tx_buf->len); 1287 1288 if (desc_count <= 0) 1289 return (-1); 1290 } 1291 1292 return (desc_count); 1293 } 1294 1295 static int 1296 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp) 1297 { 1298 int j; 1299 int mystat; 1300 size_t len; 1301 ddi_dma_cookie_t dma_cookie; 1302 uint_t ncookies; 1303 int desc_count; 1304 uint32_t desc_total; 1305 1306 desc_total = 0; 1307 len = MBLKL(mp); 1308 1309 /* 1310 * ddi_dma_addr_bind_handle() allocates DMA resources for a 1311 * memory object such that a device can perform DMA to or from 1312 * the object. DMA resources are allocated considering the 1313 * device's DMA attributes as expressed by ddi_dma_attr(9S) 1314 * (see ddi_dma_alloc_handle(9F)). 1315 * 1316 * ddi_dma_addr_bind_handle() fills in the first DMA cookie 1317 * pointed to by cookiep with the appropriate address, length, 1318 * and bus type. *ccountp is set to the number of DMA cookies 1319 * representing this DMA object. Subsequent DMA cookies must be 1320 * retrieved by calling ddi_dma_nextcookie(9F) the number of 1321 * times specified by *countp - 1. 1322 */ 1323 switch (packet->dma_type) { 1324 #ifdef __sparc 1325 case USE_DVMA: 1326 dvma_kaddr_load(packet->tx_dma_handle, 1327 (caddr_t)mp->b_rptr, len, 0, &dma_cookie); 1328 1329 dvma_sync(packet->tx_dma_handle, 0, 1330 DDI_DMA_SYNC_FORDEV); 1331 1332 ncookies = 1; 1333 packet->data_transfer_type = USE_DVMA; 1334 break; 1335 #endif 1336 case USE_DMA: 1337 if ((mystat = ddi_dma_addr_bind_handle( 1338 packet->tx_dma_handle, NULL, 1339 (caddr_t)mp->b_rptr, len, 1340 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1341 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1342 &ncookies)) != DDI_DMA_MAPPED) { 1343 1344 e1000g_log(tx_ring->adapter, CE_WARN, 1345 "Couldn't bind mblk buffer to Tx DMA handle: " 1346 "return: %X, Pkt: %X\n", 1347 mystat, packet); 1348 return (-1); 1349 } 1350 1351 /* 1352 * An implicit ddi_dma_sync() is done when the 1353 * ddi_dma_addr_bind_handle() is called. So we 1354 * don't need to explicitly call ddi_dma_sync() 1355 * here any more. 1356 */ 1357 ASSERT(ncookies); 1358 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie, 1359 (ncookies > 1)); 1360 1361 /* 1362 * The data_transfer_type value must be set after the handle 1363 * has been bound, for it will be used in e1000g_free_tx_swpkt() 1364 * to decide whether we need to unbind the handle. 1365 */ 1366 packet->data_transfer_type = USE_DMA; 1367 break; 1368 default: 1369 ASSERT(B_FALSE); 1370 break; 1371 } 1372 1373 packet->num_mblk_frag++; 1374 1375 /* 1376 * Each address could span thru multpile cookie.. 1377 * Each cookie will have one descriptor 1378 */ 1379 for (j = ncookies; j != 0; j--) { 1380 1381 desc_count = e1000g_fill_tx_desc(tx_ring, 1382 packet, 1383 dma_cookie.dmac_laddress, 1384 dma_cookie.dmac_size); 1385 1386 if (desc_count <= 0) 1387 return (-1); 1388 1389 desc_total += desc_count; 1390 1391 /* 1392 * ddi_dma_nextcookie() retrieves subsequent DMA 1393 * cookies for a DMA object. 1394 * ddi_dma_nextcookie() fills in the 1395 * ddi_dma_cookie(9S) structure pointed to by 1396 * cookiep. The ddi_dma_cookie(9S) structure 1397 * must be allocated prior to calling 1398 * ddi_dma_nextcookie(). The DMA cookie count 1399 * returned by ddi_dma_buf_bind_handle(9F), 1400 * ddi_dma_addr_bind_handle(9F), or 1401 * ddi_dma_getwin(9F) indicates the number of DMA 1402 * cookies a DMA object consists of. If the 1403 * resulting cookie count, N, is larger than 1, 1404 * ddi_dma_nextcookie() must be called N-1 times 1405 * to retrieve all DMA cookies. 1406 */ 1407 if (j > 1) { 1408 ddi_dma_nextcookie(packet->tx_dma_handle, 1409 &dma_cookie); 1410 } 1411 } 1412 1413 return (desc_total); 1414 } 1415 1416 static void 1417 e1000g_fill_context_descriptor(context_data_t *cur_context, 1418 struct e1000_context_desc *context_desc) 1419 { 1420 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) { 1421 context_desc->lower_setup.ip_fields.ipcss = 1422 cur_context->ether_header_size; 1423 context_desc->lower_setup.ip_fields.ipcso = 1424 cur_context->ether_header_size + 1425 offsetof(struct ip, ip_sum); 1426 context_desc->lower_setup.ip_fields.ipcse = 1427 cur_context->ether_header_size + 1428 cur_context->cksum_start - 1; 1429 } else 1430 context_desc->lower_setup.ip_config = 0; 1431 1432 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) { 1433 /* 1434 * The packet with same protocol has the following 1435 * stuff and start offset: 1436 * | Protocol | Stuff | Start | Checksum 1437 * | | Offset | Offset | Enable 1438 * | IPv4 + TCP | 0x24 | 0x14 | Yes 1439 * | IPv4 + UDP | 0x1A | 0x14 | Yes 1440 * | IPv6 + TCP | 0x20 | 0x10 | No 1441 * | IPv6 + UDP | 0x14 | 0x10 | No 1442 */ 1443 context_desc->upper_setup.tcp_fields.tucss = 1444 cur_context->cksum_start + cur_context->ether_header_size; 1445 context_desc->upper_setup.tcp_fields.tucso = 1446 cur_context->cksum_stuff + cur_context->ether_header_size; 1447 context_desc->upper_setup.tcp_fields.tucse = 0; 1448 } else 1449 context_desc->upper_setup.tcp_config = 0; 1450 1451 if (cur_context->lso_flag) { 1452 context_desc->tcp_seg_setup.fields.mss = cur_context->mss; 1453 context_desc->tcp_seg_setup.fields.hdr_len = 1454 cur_context->hdr_len; 1455 /* 1456 * workaround for 82546EB errata 23, status-writeback 1457 * reporting (RS) should not be set on context or 1458 * Null descriptors 1459 */ 1460 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1461 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP 1462 | E1000_TXD_DTYP_C | cur_context->pay_len; 1463 } else { 1464 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1465 | E1000_TXD_DTYP_C; 1466 /* 1467 * Zero out the options for TCP Segmentation Offload 1468 */ 1469 context_desc->tcp_seg_setup.data = 0; 1470 } 1471 } 1472 1473 static int 1474 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring, 1475 p_tx_sw_packet_t packet, uint64_t address, size_t size) 1476 { 1477 struct e1000_hw *hw = &tx_ring->adapter->shared; 1478 p_sw_desc_t desc; 1479 1480 if (hw->mac.type == e1000_82544) { 1481 if (hw->bus.type == e1000_bus_type_pcix) 1482 return (e1000g_tx_workaround_PCIX_82544(packet, 1483 address, size)); 1484 1485 if (size > JUMBO_FRAG_LENGTH) 1486 return (e1000g_tx_workaround_jumbo_82544(packet, 1487 address, size)); 1488 } 1489 1490 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1491 1492 desc = &packet->desc[packet->num_desc]; 1493 desc->address = address; 1494 desc->length = (uint32_t)size; 1495 1496 packet->num_desc++; 1497 1498 return (1); 1499 } 1500 1501 static int 1502 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet, 1503 uint64_t address, size_t size) 1504 { 1505 p_sw_desc_t desc; 1506 int desc_count; 1507 long size_left; 1508 size_t len; 1509 uint32_t counter; 1510 uint32_t array_elements; 1511 desc_array_t desc_array; 1512 1513 /* 1514 * Coexist Workaround for cordova: RP: 07/04/03 1515 * 1516 * RP: ERRATA: Workaround ISSUE: 1517 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup 1518 * Eachbuffer in to 8kb pieces until the 1519 * remainder is < 8kb 1520 */ 1521 size_left = size; 1522 desc_count = 0; 1523 1524 while (size_left > 0) { 1525 if (size_left > MAX_TX_BUF_SIZE) 1526 len = MAX_TX_BUF_SIZE; 1527 else 1528 len = size_left; 1529 1530 array_elements = e1000g_fill_82544_desc(address, 1531 len, &desc_array); 1532 1533 for (counter = 0; counter < array_elements; counter++) { 1534 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1535 /* 1536 * Put in the buffer address 1537 */ 1538 desc = &packet->desc[packet->num_desc]; 1539 1540 desc->address = 1541 desc_array.descriptor[counter].address; 1542 desc->length = 1543 desc_array.descriptor[counter].length; 1544 1545 packet->num_desc++; 1546 desc_count++; 1547 } /* for */ 1548 1549 /* 1550 * Update the buffer address and length 1551 */ 1552 address += MAX_TX_BUF_SIZE; 1553 size_left -= MAX_TX_BUF_SIZE; 1554 } /* while */ 1555 1556 return (desc_count); 1557 } 1558 1559 static int 1560 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet, 1561 uint64_t address, size_t size) 1562 { 1563 p_sw_desc_t desc; 1564 int desc_count; 1565 long size_left; 1566 uint32_t offset; 1567 1568 /* 1569 * Workaround for Jumbo Frames on Cordova 1570 * PSD 06/01/2001 1571 */ 1572 size_left = size; 1573 desc_count = 0; 1574 offset = 0; 1575 while (size_left > 0) { 1576 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1577 1578 desc = &packet->desc[packet->num_desc]; 1579 1580 desc->address = address + offset; 1581 1582 if (size_left > JUMBO_FRAG_LENGTH) 1583 desc->length = JUMBO_FRAG_LENGTH; 1584 else 1585 desc->length = (uint32_t)size_left; 1586 1587 packet->num_desc++; 1588 desc_count++; 1589 1590 offset += desc->length; 1591 size_left -= JUMBO_FRAG_LENGTH; 1592 } 1593 1594 return (desc_count); 1595 } 1596 1597 #pragma inline(e1000g_82547_tx_move_tail_work) 1598 1599 static void 1600 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring) 1601 { 1602 struct e1000_hw *hw; 1603 uint16_t hw_tdt; 1604 uint16_t sw_tdt; 1605 struct e1000_tx_desc *tx_desc; 1606 uint16_t length = 0; 1607 boolean_t eop = B_FALSE; 1608 struct e1000g *Adapter; 1609 1610 Adapter = tx_ring->adapter; 1611 hw = &Adapter->shared; 1612 1613 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0)); 1614 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first; 1615 1616 while (hw_tdt != sw_tdt) { 1617 tx_desc = &(tx_ring->tbd_first[hw_tdt]); 1618 length += tx_desc->lower.flags.length; 1619 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; 1620 if (++hw_tdt == Adapter->tx_desc_num) 1621 hw_tdt = 0; 1622 1623 if (eop) { 1624 if ((Adapter->link_duplex == HALF_DUPLEX) && 1625 (e1000_fifo_workaround_82547(hw, length) 1626 != E1000_SUCCESS)) { 1627 if (tx_ring->timer_enable_82547) { 1628 ASSERT(tx_ring->timer_id_82547 == 0); 1629 tx_ring->timer_id_82547 = 1630 timeout(e1000g_82547_timeout, 1631 (void *)tx_ring, 1632 drv_usectohz(10000)); 1633 } 1634 return; 1635 1636 } else { 1637 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt); 1638 e1000_update_tx_fifo_head_82547(hw, length); 1639 length = 0; 1640 } 1641 } 1642 } 1643 } 1644 1645 static void 1646 e1000g_82547_timeout(void *arg) 1647 { 1648 e1000g_tx_ring_t *tx_ring; 1649 1650 tx_ring = (e1000g_tx_ring_t *)arg; 1651 1652 mutex_enter(&tx_ring->tx_lock); 1653 1654 tx_ring->timer_id_82547 = 0; 1655 e1000g_82547_tx_move_tail_work(tx_ring); 1656 1657 mutex_exit(&tx_ring->tx_lock); 1658 } 1659 1660 static void 1661 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring) 1662 { 1663 timeout_id_t tid; 1664 1665 ASSERT(MUTEX_HELD(&tx_ring->tx_lock)); 1666 1667 tid = tx_ring->timer_id_82547; 1668 tx_ring->timer_id_82547 = 0; 1669 if (tid != 0) { 1670 tx_ring->timer_enable_82547 = B_FALSE; 1671 mutex_exit(&tx_ring->tx_lock); 1672 1673 (void) untimeout(tid); 1674 1675 mutex_enter(&tx_ring->tx_lock); 1676 } 1677 tx_ring->timer_enable_82547 = B_TRUE; 1678 e1000g_82547_tx_move_tail_work(tx_ring); 1679 } 1680