1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2009 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * ********************************************************************** 28 * * 29 * Module Name: * 30 * e1000g_tx.c * 31 * * 32 * Abstract: * 33 * This file contains some routines that take care of Transmit, * 34 * make the hardware to send the data pointed by the packet out * 35 * on to the physical medium. * 36 * * 37 * ********************************************************************** 38 */ 39 40 #include "e1000g_sw.h" 41 #include "e1000g_debug.h" 42 43 static boolean_t e1000g_send(struct e1000g *, mblk_t *); 44 static int e1000g_tx_copy(e1000g_tx_ring_t *, 45 p_tx_sw_packet_t, mblk_t *, boolean_t); 46 static int e1000g_tx_bind(e1000g_tx_ring_t *, 47 p_tx_sw_packet_t, mblk_t *); 48 static boolean_t e1000g_retrieve_context(mblk_t *, context_data_t *, size_t); 49 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *); 50 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *, 51 context_data_t *); 52 static void e1000g_fill_context_descriptor(context_data_t *, 53 struct e1000_context_desc *); 54 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *, 55 p_tx_sw_packet_t, uint64_t, size_t); 56 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length, 57 p_desc_array_t desc_array); 58 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t); 59 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t); 60 static void e1000g_82547_timeout(void *); 61 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *); 62 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *); 63 64 #ifndef E1000G_DEBUG 65 #pragma inline(e1000g_tx_copy) 66 #pragma inline(e1000g_tx_bind) 67 #pragma inline(e1000g_retrieve_context) 68 #pragma inline(e1000g_check_context) 69 #pragma inline(e1000g_fill_tx_ring) 70 #pragma inline(e1000g_fill_context_descriptor) 71 #pragma inline(e1000g_fill_tx_desc) 72 #pragma inline(e1000g_fill_82544_desc) 73 #pragma inline(e1000g_tx_workaround_PCIX_82544) 74 #pragma inline(e1000g_tx_workaround_jumbo_82544) 75 #pragma inline(e1000g_free_tx_swpkt) 76 #endif 77 78 /* 79 * e1000g_free_tx_swpkt - free up the tx sw packet 80 * 81 * Unbind the previously bound DMA handle for a given 82 * transmit sw packet. And reset the sw packet data. 83 */ 84 void 85 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet) 86 { 87 switch (packet->data_transfer_type) { 88 case USE_BCOPY: 89 packet->tx_buf->len = 0; 90 break; 91 #ifdef __sparc 92 case USE_DVMA: 93 dvma_unload(packet->tx_dma_handle, 0, -1); 94 break; 95 #endif 96 case USE_DMA: 97 (void) ddi_dma_unbind_handle(packet->tx_dma_handle); 98 break; 99 default: 100 break; 101 } 102 103 /* 104 * The mblk has been stripped off the sw packet 105 * and will be freed in a triggered soft intr. 106 */ 107 ASSERT(packet->mp == NULL); 108 109 packet->data_transfer_type = USE_NONE; 110 packet->num_mblk_frag = 0; 111 packet->num_desc = 0; 112 } 113 114 mblk_t * 115 e1000g_m_tx(void *arg, mblk_t *mp) 116 { 117 struct e1000g *Adapter = (struct e1000g *)arg; 118 mblk_t *next; 119 120 rw_enter(&Adapter->chip_lock, RW_READER); 121 122 if ((Adapter->e1000g_state & E1000G_SUSPENDED) || 123 !(Adapter->e1000g_state & E1000G_STARTED) || 124 (Adapter->link_state != LINK_STATE_UP)) { 125 freemsgchain(mp); 126 mp = NULL; 127 } 128 129 while (mp != NULL) { 130 next = mp->b_next; 131 mp->b_next = NULL; 132 133 if (!e1000g_send(Adapter, mp)) { 134 mp->b_next = next; 135 break; 136 } 137 138 mp = next; 139 } 140 141 rw_exit(&Adapter->chip_lock); 142 return (mp); 143 } 144 145 /* 146 * e1000g_send - send packets onto the wire 147 * 148 * Called from e1000g_m_tx with an mblk ready to send. this 149 * routine sets up the transmit descriptors and sends data to 150 * the wire. It also pushes the just transmitted packet to 151 * the used tx sw packet list. 152 */ 153 static boolean_t 154 e1000g_send(struct e1000g *Adapter, mblk_t *mp) 155 { 156 p_tx_sw_packet_t packet; 157 LIST_DESCRIBER pending_list; 158 size_t len; 159 size_t msg_size; 160 uint32_t frag_count; 161 int desc_count; 162 uint32_t desc_total; 163 uint32_t bcopy_thresh; 164 uint32_t hdr_frag_len; 165 boolean_t tx_undersize_flag; 166 mblk_t *nmp; 167 mblk_t *tmp; 168 mblk_t *new_mp; 169 mblk_t *pre_mp; 170 mblk_t *next_mp; 171 e1000g_tx_ring_t *tx_ring; 172 context_data_t cur_context; 173 174 tx_ring = Adapter->tx_ring; 175 bcopy_thresh = Adapter->tx_bcopy_thresh; 176 177 /* Get the total size and frags number of the message */ 178 tx_undersize_flag = B_FALSE; 179 frag_count = 0; 180 msg_size = 0; 181 for (nmp = mp; nmp; nmp = nmp->b_cont) { 182 frag_count++; 183 msg_size += MBLKL(nmp); 184 } 185 186 /* retrieve and compute information for context descriptor */ 187 if (!e1000g_retrieve_context(mp, &cur_context, msg_size)) { 188 freemsg(mp); 189 return (B_TRUE); 190 } 191 192 /* 193 * Make sure the packet is less than the allowed size 194 */ 195 if (!cur_context.lso_flag && 196 (msg_size > Adapter->max_frame_size - ETHERFCSL)) { 197 /* 198 * For the over size packet, we'll just drop it. 199 * So we return B_TRUE here. 200 */ 201 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL, 202 "Tx packet out of bound. length = %d \n", msg_size); 203 E1000G_STAT(tx_ring->stat_over_size); 204 freemsg(mp); 205 return (B_TRUE); 206 } 207 208 /* 209 * Check and reclaim tx descriptors. 210 * This low water mark check should be done all the time as 211 * Transmit interrupt delay can produce Transmit interrupts little 212 * late and that may cause few problems related to reaping Tx 213 * Descriptors... As you may run short of them before getting any 214 * transmit interrupt... 215 */ 216 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 217 (void) e1000g_recycle(tx_ring); 218 E1000G_DEBUG_STAT(tx_ring->stat_recycle); 219 220 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 221 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc); 222 goto tx_no_resource; 223 } 224 } 225 226 /* 227 * If the message size is less than the minimum ethernet packet size, 228 * we'll use bcopy to send it, and padd it to 60 bytes later. 229 */ 230 if (msg_size < ETHERMIN) { 231 E1000G_DEBUG_STAT(tx_ring->stat_under_size); 232 tx_undersize_flag = B_TRUE; 233 } 234 235 /* Initialize variables */ 236 desc_count = 1; /* The initial value should be greater than 0 */ 237 desc_total = 0; 238 new_mp = NULL; 239 QUEUE_INIT_LIST(&pending_list); 240 241 /* Process each mblk fragment and fill tx descriptors */ 242 /* 243 * The software should guarantee LSO packet header(MAC+IP+TCP) 244 * to be within one descriptor. Here we reallocate and refill the 245 * the header if it's physical memory non-contiguous. 246 */ 247 if (cur_context.lso_flag) { 248 /* find the last fragment of the header */ 249 len = MBLKL(mp); 250 ASSERT(len > 0); 251 next_mp = mp; 252 pre_mp = NULL; 253 while (len < cur_context.hdr_len) { 254 pre_mp = next_mp; 255 next_mp = next_mp->b_cont; 256 len += MBLKL(next_mp); 257 } 258 /* 259 * If the header and the payload are in different mblks, 260 * we simply force the header to be copied into pre-allocated 261 * page-aligned buffer. 262 */ 263 if (len == cur_context.hdr_len) 264 goto adjust_threshold; 265 266 hdr_frag_len = cur_context.hdr_len - (len - MBLKL(next_mp)); 267 /* 268 * There are three cases we need to reallocate a mblk for the 269 * last header fragment: 270 * 271 * 1. the header is in multiple mblks and the last fragment 272 * share the same mblk with the payload 273 * 274 * 2. the header is in a single mblk shared with the payload 275 * and the header is physical memory non-contiguous 276 * 277 * 3. there is 4 KB boundary within the header and 64 bytes 278 * following the end of the header bytes. The case may cause 279 * TCP data corruption issue. 280 * 281 * The workaround for the case #2 and case #3 is: 282 * Assuming standard Ethernet/IP/TCP headers of 54 bytes, 283 * this means that the buffer(containing the headers) should 284 * not start -118 bytes before a 4 KB boundary. For example, 285 * 128-byte alignment for this buffer could be used to fulfill 286 * this condition. 287 */ 288 if ((next_mp != mp) || 289 (P2NPHASE((uintptr_t)next_mp->b_rptr, 290 E1000_LSO_FIRST_DESC_ALIGNMENT_BOUNDARY_4K) 291 < E1000_LSO_FIRST_DESC_ALIGNMENT)) { 292 E1000G_DEBUG_STAT(tx_ring->stat_lso_header_fail); 293 /* 294 * reallocate the mblk for the last header fragment, 295 * expect to bcopy into pre-allocated page-aligned 296 * buffer 297 */ 298 new_mp = allocb(hdr_frag_len, NULL); 299 if (!new_mp) 300 return (B_FALSE); 301 bcopy(next_mp->b_rptr, new_mp->b_rptr, hdr_frag_len); 302 /* link the new header fragment with the other parts */ 303 new_mp->b_wptr = new_mp->b_rptr + hdr_frag_len; 304 new_mp->b_cont = next_mp; 305 if (pre_mp) 306 pre_mp->b_cont = new_mp; 307 else 308 mp = new_mp; 309 next_mp->b_rptr += hdr_frag_len; 310 frag_count++; 311 } 312 adjust_threshold: 313 /* 314 * adjust the bcopy threshhold to guarantee 315 * the header to use bcopy way 316 */ 317 if (bcopy_thresh < cur_context.hdr_len) 318 bcopy_thresh = cur_context.hdr_len; 319 } 320 321 packet = NULL; 322 nmp = mp; 323 while (nmp) { 324 tmp = nmp->b_cont; 325 326 len = MBLKL(nmp); 327 /* Check zero length mblks */ 328 if (len == 0) { 329 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags); 330 /* 331 * If there're no packet buffers have been used, 332 * or we just completed processing a buffer, then 333 * skip the empty mblk fragment. 334 * Otherwise, there's still a pending buffer that 335 * needs to be processed (tx_copy). 336 */ 337 if (desc_count > 0) { 338 nmp = tmp; 339 continue; 340 } 341 } 342 343 /* 344 * Get a new TxSwPacket to process mblk buffers. 345 */ 346 if (desc_count > 0) { 347 mutex_enter(&tx_ring->freelist_lock); 348 packet = (p_tx_sw_packet_t) 349 QUEUE_POP_HEAD(&tx_ring->free_list); 350 mutex_exit(&tx_ring->freelist_lock); 351 352 if (packet == NULL) { 353 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 354 "No Tx SwPacket available\n"); 355 E1000G_STAT(tx_ring->stat_no_swpkt); 356 goto tx_send_failed; 357 } 358 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 359 } 360 361 ASSERT(packet); 362 /* 363 * If the size of the fragment is less than the tx_bcopy_thresh 364 * we'll use bcopy; Otherwise, we'll use DMA binding. 365 */ 366 if ((len <= bcopy_thresh) || tx_undersize_flag) { 367 desc_count = 368 e1000g_tx_copy(tx_ring, packet, nmp, 369 tx_undersize_flag); 370 E1000G_DEBUG_STAT(tx_ring->stat_copy); 371 } else { 372 desc_count = 373 e1000g_tx_bind(tx_ring, packet, nmp); 374 E1000G_DEBUG_STAT(tx_ring->stat_bind); 375 } 376 377 if (desc_count > 0) 378 desc_total += desc_count; 379 else if (desc_count < 0) 380 goto tx_send_failed; 381 382 nmp = tmp; 383 } 384 385 /* Assign the message to the last sw packet */ 386 ASSERT(packet); 387 ASSERT(packet->mp == NULL); 388 packet->mp = mp; 389 390 /* Try to recycle the tx descriptors again */ 391 if (tx_ring->tbd_avail < (desc_total + 3)) { 392 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry); 393 (void) e1000g_recycle(tx_ring); 394 } 395 396 mutex_enter(&tx_ring->tx_lock); 397 398 /* 399 * If the number of available tx descriptors is not enough for transmit 400 * (one redundant descriptor and one hw checksum context descriptor are 401 * included), then return failure. 402 */ 403 if (tx_ring->tbd_avail < (desc_total + 3)) { 404 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 405 "No Enough Tx descriptors\n"); 406 E1000G_STAT(tx_ring->stat_no_desc); 407 mutex_exit(&tx_ring->tx_lock); 408 goto tx_send_failed; 409 } 410 411 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context); 412 413 mutex_exit(&tx_ring->tx_lock); 414 415 ASSERT(desc_count > 0); 416 417 /* Send successful */ 418 return (B_TRUE); 419 420 tx_send_failed: 421 /* Restore mp to original */ 422 if (new_mp) { 423 if (pre_mp) { 424 pre_mp->b_cont = next_mp; 425 } 426 new_mp->b_cont = NULL; 427 freemsg(new_mp); 428 429 next_mp->b_rptr -= hdr_frag_len; 430 } 431 432 /* 433 * Enable Transmit interrupts, so that the interrupt routine can 434 * call mac_tx_update() when transmit descriptors become available. 435 */ 436 tx_ring->resched_timestamp = ddi_get_lbolt(); 437 tx_ring->resched_needed = B_TRUE; 438 if (!Adapter->tx_intr_enable) 439 e1000g_mask_tx_interrupt(Adapter); 440 441 /* Free pending TxSwPackets */ 442 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 443 while (packet) { 444 packet->mp = NULL; 445 e1000g_free_tx_swpkt(packet); 446 packet = (p_tx_sw_packet_t) 447 QUEUE_GET_NEXT(&pending_list, &packet->Link); 448 } 449 450 /* Return pending TxSwPackets to the "Free" list */ 451 mutex_enter(&tx_ring->freelist_lock); 452 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 453 mutex_exit(&tx_ring->freelist_lock); 454 455 E1000G_STAT(tx_ring->stat_send_fail); 456 457 /* Message will be scheduled for re-transmit */ 458 return (B_FALSE); 459 460 tx_no_resource: 461 /* 462 * Enable Transmit interrupts, so that the interrupt routine can 463 * call mac_tx_update() when transmit descriptors become available. 464 */ 465 tx_ring->resched_timestamp = ddi_get_lbolt(); 466 tx_ring->resched_needed = B_TRUE; 467 if (!Adapter->tx_intr_enable) 468 e1000g_mask_tx_interrupt(Adapter); 469 470 /* Message will be scheduled for re-transmit */ 471 return (B_FALSE); 472 } 473 474 static boolean_t 475 e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context, 476 size_t msg_size) 477 { 478 uintptr_t ip_start; 479 uintptr_t tcp_start; 480 mblk_t *nmp; 481 uint32_t lsoflags; 482 uint32_t mss; 483 484 bzero(cur_context, sizeof (context_data_t)); 485 486 /* first check lso information */ 487 mac_lso_get(mp, &mss, &lsoflags); 488 489 /* retrieve checksum info */ 490 mac_hcksum_get(mp, &cur_context->cksum_start, 491 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); 492 /* retrieve ethernet header size */ 493 if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == 494 htons(ETHERTYPE_VLAN)) 495 cur_context->ether_header_size = 496 sizeof (struct ether_vlan_header); 497 else 498 cur_context->ether_header_size = 499 sizeof (struct ether_header); 500 501 if (lsoflags & HW_LSO) { 502 ASSERT(mss != 0); 503 504 /* free the invalid packet */ 505 if (mss == 0 || 506 !((cur_context->cksum_flags & HCK_PARTIALCKSUM) && 507 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) { 508 return (B_FALSE); 509 } 510 cur_context->mss = (uint16_t)mss; 511 cur_context->lso_flag = B_TRUE; 512 513 /* 514 * Some fields are cleared for the hardware to fill 515 * in. We don't assume Ethernet header, IP header and 516 * TCP header are always in the same mblk fragment, 517 * while we assume each header is always within one 518 * mblk fragment and Ethernet header is always in the 519 * first mblk fragment. 520 */ 521 nmp = mp; 522 ip_start = (uintptr_t)(nmp->b_rptr) 523 + cur_context->ether_header_size; 524 if (ip_start >= (uintptr_t)(nmp->b_wptr)) { 525 ip_start = (uintptr_t)nmp->b_cont->b_rptr 526 + (ip_start - (uintptr_t)(nmp->b_wptr)); 527 nmp = nmp->b_cont; 528 } 529 tcp_start = ip_start + 530 IPH_HDR_LENGTH((ipha_t *)ip_start); 531 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) { 532 tcp_start = (uintptr_t)nmp->b_cont->b_rptr 533 + (tcp_start - (uintptr_t)(nmp->b_wptr)); 534 nmp = nmp->b_cont; 535 } 536 cur_context->hdr_len = cur_context->ether_header_size 537 + IPH_HDR_LENGTH((ipha_t *)ip_start) 538 + TCP_HDR_LENGTH((tcph_t *)tcp_start); 539 ((ipha_t *)ip_start)->ipha_length = 0; 540 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0; 541 /* calculate the TCP packet payload length */ 542 cur_context->pay_len = msg_size - cur_context->hdr_len; 543 } 544 return (B_TRUE); 545 } 546 547 static boolean_t 548 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context) 549 { 550 boolean_t context_reload; 551 context_data_t *pre_context; 552 struct e1000g *Adapter; 553 554 context_reload = B_FALSE; 555 pre_context = &tx_ring->pre_context; 556 Adapter = tx_ring->adapter; 557 558 /* 559 * The following code determine if the context descriptor is 560 * needed to be reloaded. The sequence of the conditions is 561 * made by their possibilities of changing. 562 */ 563 /* 564 * workaround for 82546EB, context descriptor must be reloaded 565 * per LSO/hw_cksum packet if LSO is enabled. 566 */ 567 if (Adapter->lso_premature_issue && 568 Adapter->lso_enable && 569 (cur_context->cksum_flags != 0)) { 570 571 context_reload = B_TRUE; 572 } else if (cur_context->lso_flag) { 573 if ((cur_context->lso_flag != pre_context->lso_flag) || 574 (cur_context->cksum_flags != pre_context->cksum_flags) || 575 (cur_context->pay_len != pre_context->pay_len) || 576 (cur_context->mss != pre_context->mss) || 577 (cur_context->hdr_len != pre_context->hdr_len) || 578 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 579 (cur_context->cksum_start != pre_context->cksum_start) || 580 (cur_context->ether_header_size != 581 pre_context->ether_header_size)) { 582 583 context_reload = B_TRUE; 584 } 585 } else if (cur_context->cksum_flags != 0) { 586 if ((cur_context->lso_flag != pre_context->lso_flag) || 587 (cur_context->cksum_flags != pre_context->cksum_flags) || 588 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 589 (cur_context->cksum_start != pre_context->cksum_start) || 590 (cur_context->ether_header_size != 591 pre_context->ether_header_size)) { 592 593 context_reload = B_TRUE; 594 } 595 } 596 597 return (context_reload); 598 } 599 600 static int 601 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list, 602 context_data_t *cur_context) 603 { 604 struct e1000g *Adapter; 605 struct e1000_hw *hw; 606 p_tx_sw_packet_t first_packet; 607 p_tx_sw_packet_t packet; 608 p_tx_sw_packet_t previous_packet; 609 boolean_t context_reload; 610 struct e1000_tx_desc *first_data_desc; 611 struct e1000_tx_desc *next_desc; 612 struct e1000_tx_desc *descriptor; 613 int desc_count; 614 boolean_t buff_overrun_flag; 615 int i; 616 617 Adapter = tx_ring->adapter; 618 hw = &Adapter->shared; 619 620 desc_count = 0; 621 first_packet = NULL; 622 first_data_desc = NULL; 623 descriptor = NULL; 624 first_packet = NULL; 625 packet = NULL; 626 buff_overrun_flag = B_FALSE; 627 628 next_desc = tx_ring->tbd_next; 629 630 /* Context descriptor reload check */ 631 context_reload = e1000g_check_context(tx_ring, cur_context); 632 633 if (context_reload) { 634 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 635 636 descriptor = next_desc; 637 638 e1000g_fill_context_descriptor(cur_context, 639 (struct e1000_context_desc *)descriptor); 640 641 /* Check the wrap-around case */ 642 if (descriptor == tx_ring->tbd_last) 643 next_desc = tx_ring->tbd_first; 644 else 645 next_desc++; 646 647 desc_count++; 648 } 649 650 first_data_desc = next_desc; 651 652 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 653 while (packet) { 654 ASSERT(packet->num_desc); 655 656 for (i = 0; i < packet->num_desc; i++) { 657 ASSERT(tx_ring->tbd_avail > 0); 658 659 descriptor = next_desc; 660 descriptor->buffer_addr = 661 packet->desc[i].address; 662 descriptor->lower.data = 663 packet->desc[i].length; 664 665 /* Zero out status */ 666 descriptor->upper.data = 0; 667 668 descriptor->lower.data |= 669 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 670 /* must set RS on every outgoing descriptor */ 671 descriptor->lower.data |= 672 E1000_TXD_CMD_RS; 673 674 if (cur_context->lso_flag) 675 descriptor->lower.data |= E1000_TXD_CMD_TSE; 676 677 /* Check the wrap-around case */ 678 if (descriptor == tx_ring->tbd_last) 679 next_desc = tx_ring->tbd_first; 680 else 681 next_desc++; 682 683 desc_count++; 684 685 /* 686 * workaround for 82546EB errata 33, hang in PCI-X 687 * systems due to 2k Buffer Overrun during Transmit 688 * Operation. The workaround applies to all the Intel 689 * PCI-X chips. 690 */ 691 if (hw->bus.type == e1000_bus_type_pcix && 692 descriptor == first_data_desc && 693 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) 694 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) { 695 /* modified the first descriptor */ 696 descriptor->lower.data &= 697 ~E1000G_TBD_LENGTH_MASK; 698 descriptor->lower.flags.length = 699 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 700 701 /* insert a new descriptor */ 702 ASSERT(tx_ring->tbd_avail > 0); 703 next_desc->buffer_addr = 704 packet->desc[0].address + 705 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 706 next_desc->lower.data = 707 packet->desc[0].length - 708 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 709 710 /* Zero out status */ 711 next_desc->upper.data = 0; 712 713 next_desc->lower.data |= 714 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 715 /* must set RS on every outgoing descriptor */ 716 next_desc->lower.data |= 717 E1000_TXD_CMD_RS; 718 719 if (cur_context->lso_flag) 720 next_desc->lower.data |= 721 E1000_TXD_CMD_TSE; 722 723 descriptor = next_desc; 724 725 /* Check the wrap-around case */ 726 if (next_desc == tx_ring->tbd_last) 727 next_desc = tx_ring->tbd_first; 728 else 729 next_desc++; 730 731 desc_count++; 732 buff_overrun_flag = B_TRUE; 733 } 734 } 735 736 if (buff_overrun_flag) { 737 packet->num_desc++; 738 buff_overrun_flag = B_FALSE; 739 } 740 741 if (first_packet != NULL) { 742 /* 743 * Count the checksum context descriptor for 744 * the first SwPacket. 745 */ 746 first_packet->num_desc++; 747 first_packet = NULL; 748 } 749 750 packet->tickstamp = ddi_get_lbolt64(); 751 752 previous_packet = packet; 753 packet = (p_tx_sw_packet_t) 754 QUEUE_GET_NEXT(pending_list, &packet->Link); 755 } 756 757 /* 758 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back 759 */ 760 if (Adapter->lso_premature_issue && cur_context->lso_flag && 761 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) { 762 /* modified the previous descriptor */ 763 descriptor->lower.data -= 4; 764 765 /* insert a new descriptor */ 766 ASSERT(tx_ring->tbd_avail > 0); 767 /* the lower 20 bits of lower.data is the length field */ 768 next_desc->buffer_addr = 769 descriptor->buffer_addr + 770 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK); 771 next_desc->lower.data = 4; 772 773 /* Zero out status */ 774 next_desc->upper.data = 0; 775 /* It must be part of a LSO packet */ 776 next_desc->lower.data |= 777 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | 778 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE; 779 780 descriptor = next_desc; 781 782 /* Check the wrap-around case */ 783 if (descriptor == tx_ring->tbd_last) 784 next_desc = tx_ring->tbd_first; 785 else 786 next_desc++; 787 788 desc_count++; 789 /* update the number of descriptors */ 790 previous_packet->num_desc++; 791 } 792 793 ASSERT(descriptor); 794 795 if (cur_context->cksum_flags) { 796 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 797 ((struct e1000_data_desc *)first_data_desc)-> 798 upper.fields.popts |= E1000_TXD_POPTS_IXSM; 799 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 800 ((struct e1000_data_desc *)first_data_desc)-> 801 upper.fields.popts |= E1000_TXD_POPTS_TXSM; 802 } 803 804 /* 805 * Last Descriptor of Packet needs End Of Packet (EOP), Report 806 * Status (RS) set. 807 */ 808 if (Adapter->tx_intr_delay) { 809 descriptor->lower.data |= E1000_TXD_CMD_IDE | 810 E1000_TXD_CMD_EOP; 811 } else { 812 descriptor->lower.data |= E1000_TXD_CMD_EOP; 813 } 814 815 /* Set append Ethernet CRC (IFCS) bits */ 816 if (cur_context->lso_flag) { 817 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS; 818 } else { 819 descriptor->lower.data |= E1000_TXD_CMD_IFCS; 820 } 821 822 /* 823 * Sync the Tx descriptors DMA buffer 824 */ 825 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 826 0, 0, DDI_DMA_SYNC_FORDEV); 827 828 tx_ring->tbd_next = next_desc; 829 830 /* 831 * Advance the Transmit Descriptor Tail (Tdt), this tells the 832 * FX1000 that this frame is available to transmit. 833 */ 834 if (hw->mac.type == e1000_82547) 835 e1000g_82547_tx_move_tail(tx_ring); 836 else 837 E1000_WRITE_REG(hw, E1000_TDT(0), 838 (uint32_t)(next_desc - tx_ring->tbd_first)); 839 840 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 841 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 842 Adapter->e1000g_state |= E1000G_ERROR; 843 } 844 845 /* Put the pending SwPackets to the "Used" list */ 846 mutex_enter(&tx_ring->usedlist_lock); 847 QUEUE_APPEND(&tx_ring->used_list, pending_list); 848 tx_ring->tbd_avail -= desc_count; 849 mutex_exit(&tx_ring->usedlist_lock); 850 851 /* update LSO related data */ 852 if (context_reload) 853 tx_ring->pre_context = *cur_context; 854 855 return (desc_count); 856 } 857 858 /* 859 * e1000g_tx_setup - setup tx data structures 860 * 861 * This routine initializes all of the transmit related 862 * structures. This includes the Transmit descriptors, 863 * and the tx_sw_packet structures. 864 */ 865 void 866 e1000g_tx_setup(struct e1000g *Adapter) 867 { 868 struct e1000_hw *hw; 869 p_tx_sw_packet_t packet; 870 uint32_t i; 871 uint32_t buf_high; 872 uint32_t buf_low; 873 uint32_t reg_tipg; 874 uint32_t reg_tctl; 875 int size; 876 e1000g_tx_ring_t *tx_ring; 877 878 hw = &Adapter->shared; 879 tx_ring = Adapter->tx_ring; 880 881 /* init the lists */ 882 /* 883 * Here we don't need to protect the lists using the 884 * usedlist_lock and freelist_lock, for they have 885 * been protected by the chip_lock. 886 */ 887 QUEUE_INIT_LIST(&tx_ring->used_list); 888 QUEUE_INIT_LIST(&tx_ring->free_list); 889 890 /* Go through and set up each SW_Packet */ 891 packet = tx_ring->packet_area; 892 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) { 893 /* Initialize this tx_sw_apcket area */ 894 e1000g_free_tx_swpkt(packet); 895 /* Add this tx_sw_packet to the free list */ 896 QUEUE_PUSH_TAIL(&tx_ring->free_list, 897 &packet->Link); 898 } 899 900 /* Setup TX descriptor pointers */ 901 tx_ring->tbd_next = tx_ring->tbd_first; 902 tx_ring->tbd_oldest = tx_ring->tbd_first; 903 904 /* 905 * Setup Hardware TX Registers 906 */ 907 /* Setup the Transmit Control Register (TCTL). */ 908 reg_tctl = E1000_READ_REG(hw, E1000_TCTL); 909 reg_tctl |= E1000_TCTL_PSP | E1000_TCTL_EN | 910 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) | 911 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) | 912 E1000_TCTL_RTLC; 913 914 /* Enable the MULR bit */ 915 if (hw->bus.type == e1000_bus_type_pci_express) 916 reg_tctl |= E1000_TCTL_MULR; 917 918 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl); 919 920 /* Setup HW Base and Length of Tx descriptor area */ 921 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc)); 922 E1000_WRITE_REG(hw, E1000_TDLEN(0), size); 923 size = E1000_READ_REG(hw, E1000_TDLEN(0)); 924 925 buf_low = (uint32_t)tx_ring->tbd_dma_addr; 926 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32); 927 928 /* 929 * Write the highest location first and work backward to the lowest. 930 * This is necessary for some adapter types to 931 * prevent write combining from occurring. 932 */ 933 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high); 934 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low); 935 936 /* Setup our HW Tx Head & Tail descriptor pointers */ 937 E1000_WRITE_REG(hw, E1000_TDH(0), 0); 938 E1000_WRITE_REG(hw, E1000_TDT(0), 0); 939 940 /* Set the default values for the Tx Inter Packet Gap timer */ 941 if ((hw->mac.type == e1000_82542) && 942 ((hw->revision_id == E1000_REVISION_2) || 943 (hw->revision_id == E1000_REVISION_3))) { 944 reg_tipg = DEFAULT_82542_TIPG_IPGT; 945 reg_tipg |= 946 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 947 reg_tipg |= 948 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 949 } else if (hw->mac.type == e1000_80003es2lan) { 950 reg_tipg = DEFAULT_82543_TIPG_IPGR1; 951 reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 952 E1000_TIPG_IPGR2_SHIFT; 953 } else { 954 if (hw->phy.media_type == e1000_media_type_fiber) 955 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 956 else 957 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 958 reg_tipg |= 959 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 960 reg_tipg |= 961 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 962 } 963 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg); 964 965 /* Setup Transmit Interrupt Delay Value */ 966 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay); 967 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 968 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay); 969 970 if (hw->mac.type >= e1000_82540) { 971 E1000_WRITE_REG(&Adapter->shared, E1000_TADV, 972 Adapter->tx_intr_abs_delay); 973 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 974 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay); 975 } 976 977 tx_ring->tbd_avail = Adapter->tx_desc_num; 978 979 /* Initialize stored context information */ 980 bzero(&(tx_ring->pre_context), sizeof (context_data_t)); 981 } 982 983 /* 984 * e1000g_recycle - recycle the tx descriptors and tx sw packets 985 */ 986 int 987 e1000g_recycle(e1000g_tx_ring_t *tx_ring) 988 { 989 struct e1000g *Adapter; 990 LIST_DESCRIBER pending_list; 991 p_tx_sw_packet_t packet; 992 mblk_t *mp; 993 mblk_t *nmp; 994 struct e1000_tx_desc *descriptor; 995 int desc_count; 996 int64_t delta; 997 998 /* 999 * This function will examine each TxSwPacket in the 'used' queue 1000 * if the e1000g is done with it then the associated resources (Tx 1001 * Descriptors) will be "freed" and the TxSwPacket will be 1002 * returned to the 'free' queue. 1003 */ 1004 Adapter = tx_ring->adapter; 1005 delta = 0; 1006 1007 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list); 1008 if (packet == NULL) { 1009 Adapter->stall_flag = B_FALSE; 1010 return (0); 1011 } 1012 1013 desc_count = 0; 1014 QUEUE_INIT_LIST(&pending_list); 1015 1016 /* Sync the Tx descriptor DMA buffer */ 1017 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 1018 0, 0, DDI_DMA_SYNC_FORKERNEL); 1019 if (e1000g_check_dma_handle( 1020 tx_ring->tbd_dma_handle) != DDI_FM_OK) { 1021 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 1022 Adapter->e1000g_state |= E1000G_ERROR; 1023 return (0); 1024 } 1025 1026 /* 1027 * While there are still TxSwPackets in the used queue check them 1028 */ 1029 mutex_enter(&tx_ring->usedlist_lock); 1030 while ((packet = 1031 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) != NULL) { 1032 1033 /* 1034 * Get hold of the next descriptor that the e1000g will 1035 * report status back to (this will be the last descriptor 1036 * of a given sw packet). We only want to free the 1037 * sw packet (and it resources) if the e1000g is done 1038 * with ALL of the descriptors. If the e1000g is done 1039 * with the last one then it is done with all of them. 1040 */ 1041 ASSERT(packet->num_desc); 1042 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1); 1043 1044 /* Check for wrap case */ 1045 if (descriptor > tx_ring->tbd_last) 1046 descriptor -= Adapter->tx_desc_num; 1047 1048 /* 1049 * If the descriptor done bit is set free TxSwPacket and 1050 * associated resources 1051 */ 1052 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) { 1053 QUEUE_POP_HEAD(&tx_ring->used_list); 1054 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 1055 1056 if (descriptor == tx_ring->tbd_last) 1057 tx_ring->tbd_oldest = 1058 tx_ring->tbd_first; 1059 else 1060 tx_ring->tbd_oldest = 1061 descriptor + 1; 1062 1063 desc_count += packet->num_desc; 1064 } else { 1065 /* 1066 * Found a sw packet that the e1000g is not done 1067 * with then there is no reason to check the rest 1068 * of the queue. 1069 */ 1070 delta = ddi_get_lbolt64() - packet->tickstamp; 1071 break; 1072 } 1073 } 1074 1075 tx_ring->tbd_avail += desc_count; 1076 Adapter->tx_pkt_cnt += desc_count; 1077 1078 mutex_exit(&tx_ring->usedlist_lock); 1079 1080 if (desc_count == 0) { 1081 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none); 1082 /* 1083 * If the packet hasn't been sent out for seconds and 1084 * the transmitter is not under paused flowctrl condition, 1085 * the transmitter is considered to be stalled. 1086 */ 1087 if ((delta > Adapter->stall_threshold) && 1088 !(E1000_READ_REG(&Adapter->shared, 1089 E1000_STATUS) & E1000_STATUS_TXOFF)) { 1090 Adapter->stall_flag = B_TRUE; 1091 } 1092 return (0); 1093 } 1094 1095 Adapter->stall_flag = B_FALSE; 1096 1097 mp = NULL; 1098 nmp = NULL; 1099 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 1100 ASSERT(packet != NULL); 1101 while (packet != NULL) { 1102 if (packet->mp != NULL) { 1103 ASSERT(packet->mp->b_next == NULL); 1104 /* Assemble the message chain */ 1105 if (mp == NULL) { 1106 mp = packet->mp; 1107 nmp = packet->mp; 1108 } else { 1109 nmp->b_next = packet->mp; 1110 nmp = packet->mp; 1111 } 1112 /* Disconnect the message from the sw packet */ 1113 packet->mp = NULL; 1114 } 1115 1116 /* Free the TxSwPackets */ 1117 e1000g_free_tx_swpkt(packet); 1118 1119 packet = (p_tx_sw_packet_t) 1120 QUEUE_GET_NEXT(&pending_list, &packet->Link); 1121 } 1122 1123 /* Return the TxSwPackets back to the FreeList */ 1124 mutex_enter(&tx_ring->freelist_lock); 1125 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 1126 mutex_exit(&tx_ring->freelist_lock); 1127 1128 if (mp != NULL) 1129 freemsgchain(mp); 1130 1131 return (desc_count); 1132 } 1133 /* 1134 * 82544 Coexistence issue workaround: 1135 * There are 2 issues. 1136 * 1. If a 32 bit split completion happens from P64H2 and another 1137 * agent drives a 64 bit request/split completion after ONLY 1138 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then 1139 * 82544 has a problem where in to clock all the data in, it 1140 * looks at REQ64# signal and since it has changed so fast (i.e. 1 1141 * idle clock turn around), it will fail to clock all the data in. 1142 * Data coming from certain ending addresses has exposure to this issue. 1143 * 1144 * To detect this issue, following equation can be used... 1145 * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. 1146 * If SUM[3:0] is in between 1 to 4, we will have this issue. 1147 * 1148 * ROOT CAUSE: 1149 * The erratum involves the 82544 PCIX elasticity FIFO implementations as 1150 * 64-bit FIFO's and flushing of the final partial-bytes corresponding 1151 * to the end of a requested read burst. Under a specific burst condition 1152 * of ending-data alignment and 32-byte split-completions, the final 1153 * byte(s) of split-completion data require an extra clock cycle to flush 1154 * into 64-bit FIFO orientation. An incorrect logic dependency on the 1155 * REQ64# signal occurring during during this clock cycle may cause the 1156 * residual byte(s) to be lost, thereby rendering the internal DMA client 1157 * forever awaiting the final byte(s) for an outbound data-fetch. The 1158 * erratum is confirmed to *only* occur if certain subsequent external 1159 * 64-bit PCIX bus transactions occur immediately (minimum possible bus 1160 * turn- around) following the odd-aligned 32-bit split-completion 1161 * containing the final byte(s). Intel has confirmed that this has been 1162 * seen only with chipset/bridges which have the capability to provide 1163 * 32-bit split-completion data, and in the presence of newer PCIX bus 1164 * agents which fully-optimize the inter-transaction turn-around (zero 1165 * additional initiator latency when pre-granted bus ownership). 1166 * 1167 * This issue does not exist in PCI bus mode, when any agent is operating 1168 * in 32 bit only mode or on chipsets that do not do 32 bit split 1169 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does 1170 * 32 bit split completions for any read request that has bit 2 set to 1 1171 * for the requested address and read request size is more than 8 bytes. 1172 * 1173 * 2. Another issue is related to 82544 driving DACs under the similar 1174 * scenario (32 bit split completion followed by 64 bit transaction with 1175 * only 1 cycle turnaround). This issue is still being root caused. We 1176 * think that both of these issues can be avoided if following workaround 1177 * is implemented. It seems DAC issues is related to ending addresses being 1178 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity 1179 * FIFO which does not get flushed due to REQ64# dependency. We will only 1180 * know the full story after it has been simulated successfully by HW team. 1181 * 1182 * WORKAROUND: 1183 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC) 1184 */ 1185 static uint32_t 1186 e1000g_fill_82544_desc(uint64_t address, 1187 size_t length, p_desc_array_t desc_array) 1188 { 1189 /* 1190 * Since issue is sensitive to length and address. 1191 * Let us first check the address... 1192 */ 1193 uint32_t safe_terminator; 1194 1195 if (length <= 4) { 1196 desc_array->descriptor[0].address = address; 1197 desc_array->descriptor[0].length = (uint32_t)length; 1198 desc_array->elements = 1; 1199 return (desc_array->elements); 1200 } 1201 safe_terminator = 1202 (uint32_t)((((uint32_t)address & 0x7) + 1203 (length & 0xF)) & 0xF); 1204 /* 1205 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then 1206 * return 1207 */ 1208 if (safe_terminator == 0 || 1209 (safe_terminator > 4 && safe_terminator < 9) || 1210 (safe_terminator > 0xC && safe_terminator <= 0xF)) { 1211 desc_array->descriptor[0].address = address; 1212 desc_array->descriptor[0].length = (uint32_t)length; 1213 desc_array->elements = 1; 1214 return (desc_array->elements); 1215 } 1216 1217 desc_array->descriptor[0].address = address; 1218 desc_array->descriptor[0].length = length - 4; 1219 desc_array->descriptor[1].address = address + (length - 4); 1220 desc_array->descriptor[1].length = 4; 1221 desc_array->elements = 2; 1222 return (desc_array->elements); 1223 } 1224 1225 static int 1226 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, 1227 mblk_t *mp, boolean_t tx_undersize_flag) 1228 { 1229 size_t len; 1230 size_t len1; 1231 dma_buffer_t *tx_buf; 1232 mblk_t *nmp; 1233 boolean_t finished; 1234 int desc_count; 1235 1236 desc_count = 0; 1237 tx_buf = packet->tx_buf; 1238 len = MBLKL(mp); 1239 1240 ASSERT((tx_buf->len + len) <= tx_buf->size); 1241 1242 if (len > 0) { 1243 bcopy(mp->b_rptr, 1244 tx_buf->address + tx_buf->len, 1245 len); 1246 tx_buf->len += len; 1247 1248 packet->num_mblk_frag++; 1249 } 1250 1251 nmp = mp->b_cont; 1252 if (nmp == NULL) { 1253 finished = B_TRUE; 1254 } else { 1255 len1 = MBLKL(nmp); 1256 if ((tx_buf->len + len1) > tx_buf->size) 1257 finished = B_TRUE; 1258 else if (tx_undersize_flag) 1259 finished = B_FALSE; 1260 else if (len1 > tx_ring->adapter->tx_bcopy_thresh) 1261 finished = B_TRUE; 1262 else 1263 finished = B_FALSE; 1264 } 1265 1266 if (finished) { 1267 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy, 1268 (tx_buf->len > len)); 1269 1270 /* 1271 * If the packet is smaller than 64 bytes, which is the 1272 * minimum ethernet packet size, pad the packet to make 1273 * it at least 60 bytes. The hardware will add 4 bytes 1274 * for CRC. 1275 */ 1276 if (tx_undersize_flag) { 1277 ASSERT(tx_buf->len < ETHERMIN); 1278 1279 bzero(tx_buf->address + tx_buf->len, 1280 ETHERMIN - tx_buf->len); 1281 tx_buf->len = ETHERMIN; 1282 } 1283 1284 #ifdef __sparc 1285 if (packet->dma_type == USE_DVMA) 1286 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV); 1287 else 1288 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1289 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1290 #else 1291 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1292 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1293 #endif 1294 1295 packet->data_transfer_type = USE_BCOPY; 1296 1297 desc_count = e1000g_fill_tx_desc(tx_ring, 1298 packet, 1299 tx_buf->dma_address, 1300 tx_buf->len); 1301 1302 if (desc_count <= 0) 1303 return (-1); 1304 } 1305 1306 return (desc_count); 1307 } 1308 1309 static int 1310 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp) 1311 { 1312 int j; 1313 int mystat; 1314 size_t len; 1315 ddi_dma_cookie_t dma_cookie; 1316 uint_t ncookies; 1317 int desc_count; 1318 uint32_t desc_total; 1319 1320 desc_total = 0; 1321 len = MBLKL(mp); 1322 1323 /* 1324 * ddi_dma_addr_bind_handle() allocates DMA resources for a 1325 * memory object such that a device can perform DMA to or from 1326 * the object. DMA resources are allocated considering the 1327 * device's DMA attributes as expressed by ddi_dma_attr(9S) 1328 * (see ddi_dma_alloc_handle(9F)). 1329 * 1330 * ddi_dma_addr_bind_handle() fills in the first DMA cookie 1331 * pointed to by cookiep with the appropriate address, length, 1332 * and bus type. *ccountp is set to the number of DMA cookies 1333 * representing this DMA object. Subsequent DMA cookies must be 1334 * retrieved by calling ddi_dma_nextcookie(9F) the number of 1335 * times specified by *countp - 1. 1336 */ 1337 switch (packet->dma_type) { 1338 #ifdef __sparc 1339 case USE_DVMA: 1340 dvma_kaddr_load(packet->tx_dma_handle, 1341 (caddr_t)mp->b_rptr, len, 0, &dma_cookie); 1342 1343 dvma_sync(packet->tx_dma_handle, 0, 1344 DDI_DMA_SYNC_FORDEV); 1345 1346 ncookies = 1; 1347 packet->data_transfer_type = USE_DVMA; 1348 break; 1349 #endif 1350 case USE_DMA: 1351 if ((mystat = ddi_dma_addr_bind_handle( 1352 packet->tx_dma_handle, NULL, 1353 (caddr_t)mp->b_rptr, len, 1354 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1355 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1356 &ncookies)) != DDI_DMA_MAPPED) { 1357 1358 e1000g_log(tx_ring->adapter, CE_WARN, 1359 "Couldn't bind mblk buffer to Tx DMA handle: " 1360 "return: %X, Pkt: %X\n", 1361 mystat, packet); 1362 return (-1); 1363 } 1364 1365 /* 1366 * An implicit ddi_dma_sync() is done when the 1367 * ddi_dma_addr_bind_handle() is called. So we 1368 * don't need to explicitly call ddi_dma_sync() 1369 * here any more. 1370 */ 1371 ASSERT(ncookies); 1372 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie, 1373 (ncookies > 1)); 1374 1375 /* 1376 * The data_transfer_type value must be set after the handle 1377 * has been bound, for it will be used in e1000g_free_tx_swpkt() 1378 * to decide whether we need to unbind the handle. 1379 */ 1380 packet->data_transfer_type = USE_DMA; 1381 break; 1382 default: 1383 ASSERT(B_FALSE); 1384 break; 1385 } 1386 1387 packet->num_mblk_frag++; 1388 1389 /* 1390 * Each address could span thru multpile cookie.. 1391 * Each cookie will have one descriptor 1392 */ 1393 for (j = ncookies; j != 0; j--) { 1394 1395 desc_count = e1000g_fill_tx_desc(tx_ring, 1396 packet, 1397 dma_cookie.dmac_laddress, 1398 dma_cookie.dmac_size); 1399 1400 if (desc_count <= 0) 1401 return (-1); 1402 1403 desc_total += desc_count; 1404 1405 /* 1406 * ddi_dma_nextcookie() retrieves subsequent DMA 1407 * cookies for a DMA object. 1408 * ddi_dma_nextcookie() fills in the 1409 * ddi_dma_cookie(9S) structure pointed to by 1410 * cookiep. The ddi_dma_cookie(9S) structure 1411 * must be allocated prior to calling 1412 * ddi_dma_nextcookie(). The DMA cookie count 1413 * returned by ddi_dma_buf_bind_handle(9F), 1414 * ddi_dma_addr_bind_handle(9F), or 1415 * ddi_dma_getwin(9F) indicates the number of DMA 1416 * cookies a DMA object consists of. If the 1417 * resulting cookie count, N, is larger than 1, 1418 * ddi_dma_nextcookie() must be called N-1 times 1419 * to retrieve all DMA cookies. 1420 */ 1421 if (j > 1) { 1422 ddi_dma_nextcookie(packet->tx_dma_handle, 1423 &dma_cookie); 1424 } 1425 } 1426 1427 return (desc_total); 1428 } 1429 1430 static void 1431 e1000g_fill_context_descriptor(context_data_t *cur_context, 1432 struct e1000_context_desc *context_desc) 1433 { 1434 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) { 1435 context_desc->lower_setup.ip_fields.ipcss = 1436 cur_context->ether_header_size; 1437 context_desc->lower_setup.ip_fields.ipcso = 1438 cur_context->ether_header_size + 1439 offsetof(struct ip, ip_sum); 1440 context_desc->lower_setup.ip_fields.ipcse = 1441 cur_context->ether_header_size + 1442 cur_context->cksum_start - 1; 1443 } else 1444 context_desc->lower_setup.ip_config = 0; 1445 1446 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) { 1447 /* 1448 * The packet with same protocol has the following 1449 * stuff and start offset: 1450 * | Protocol | Stuff | Start | Checksum 1451 * | | Offset | Offset | Enable 1452 * | IPv4 + TCP | 0x24 | 0x14 | Yes 1453 * | IPv4 + UDP | 0x1A | 0x14 | Yes 1454 * | IPv6 + TCP | 0x20 | 0x10 | No 1455 * | IPv6 + UDP | 0x14 | 0x10 | No 1456 */ 1457 context_desc->upper_setup.tcp_fields.tucss = 1458 cur_context->cksum_start + cur_context->ether_header_size; 1459 context_desc->upper_setup.tcp_fields.tucso = 1460 cur_context->cksum_stuff + cur_context->ether_header_size; 1461 context_desc->upper_setup.tcp_fields.tucse = 0; 1462 } else 1463 context_desc->upper_setup.tcp_config = 0; 1464 1465 if (cur_context->lso_flag) { 1466 context_desc->tcp_seg_setup.fields.mss = cur_context->mss; 1467 context_desc->tcp_seg_setup.fields.hdr_len = 1468 cur_context->hdr_len; 1469 /* 1470 * workaround for 82546EB errata 23, status-writeback 1471 * reporting (RS) should not be set on context or 1472 * Null descriptors 1473 */ 1474 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1475 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP 1476 | E1000_TXD_DTYP_C | cur_context->pay_len; 1477 } else { 1478 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1479 | E1000_TXD_DTYP_C; 1480 /* 1481 * Zero out the options for TCP Segmentation Offload 1482 */ 1483 context_desc->tcp_seg_setup.data = 0; 1484 } 1485 } 1486 1487 static int 1488 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring, 1489 p_tx_sw_packet_t packet, uint64_t address, size_t size) 1490 { 1491 struct e1000_hw *hw = &tx_ring->adapter->shared; 1492 p_sw_desc_t desc; 1493 1494 if (hw->mac.type == e1000_82544) { 1495 if (hw->bus.type == e1000_bus_type_pcix) 1496 return (e1000g_tx_workaround_PCIX_82544(packet, 1497 address, size)); 1498 1499 if (size > JUMBO_FRAG_LENGTH) 1500 return (e1000g_tx_workaround_jumbo_82544(packet, 1501 address, size)); 1502 } 1503 1504 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1505 1506 desc = &packet->desc[packet->num_desc]; 1507 desc->address = address; 1508 desc->length = (uint32_t)size; 1509 1510 packet->num_desc++; 1511 1512 return (1); 1513 } 1514 1515 static int 1516 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet, 1517 uint64_t address, size_t size) 1518 { 1519 p_sw_desc_t desc; 1520 int desc_count; 1521 long size_left; 1522 size_t len; 1523 uint32_t counter; 1524 uint32_t array_elements; 1525 desc_array_t desc_array; 1526 1527 /* 1528 * Coexist Workaround for cordova: RP: 07/04/03 1529 * 1530 * RP: ERRATA: Workaround ISSUE: 1531 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup 1532 * Eachbuffer in to 8kb pieces until the 1533 * remainder is < 8kb 1534 */ 1535 size_left = size; 1536 desc_count = 0; 1537 1538 while (size_left > 0) { 1539 if (size_left > MAX_TX_BUF_SIZE) 1540 len = MAX_TX_BUF_SIZE; 1541 else 1542 len = size_left; 1543 1544 array_elements = e1000g_fill_82544_desc(address, 1545 len, &desc_array); 1546 1547 for (counter = 0; counter < array_elements; counter++) { 1548 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1549 /* 1550 * Put in the buffer address 1551 */ 1552 desc = &packet->desc[packet->num_desc]; 1553 1554 desc->address = 1555 desc_array.descriptor[counter].address; 1556 desc->length = 1557 desc_array.descriptor[counter].length; 1558 1559 packet->num_desc++; 1560 desc_count++; 1561 } /* for */ 1562 1563 /* 1564 * Update the buffer address and length 1565 */ 1566 address += MAX_TX_BUF_SIZE; 1567 size_left -= MAX_TX_BUF_SIZE; 1568 } /* while */ 1569 1570 return (desc_count); 1571 } 1572 1573 static int 1574 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet, 1575 uint64_t address, size_t size) 1576 { 1577 p_sw_desc_t desc; 1578 int desc_count; 1579 long size_left; 1580 uint32_t offset; 1581 1582 /* 1583 * Workaround for Jumbo Frames on Cordova 1584 * PSD 06/01/2001 1585 */ 1586 size_left = size; 1587 desc_count = 0; 1588 offset = 0; 1589 while (size_left > 0) { 1590 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1591 1592 desc = &packet->desc[packet->num_desc]; 1593 1594 desc->address = address + offset; 1595 1596 if (size_left > JUMBO_FRAG_LENGTH) 1597 desc->length = JUMBO_FRAG_LENGTH; 1598 else 1599 desc->length = (uint32_t)size_left; 1600 1601 packet->num_desc++; 1602 desc_count++; 1603 1604 offset += desc->length; 1605 size_left -= JUMBO_FRAG_LENGTH; 1606 } 1607 1608 return (desc_count); 1609 } 1610 1611 #pragma inline(e1000g_82547_tx_move_tail_work) 1612 1613 static void 1614 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring) 1615 { 1616 struct e1000_hw *hw; 1617 uint16_t hw_tdt; 1618 uint16_t sw_tdt; 1619 struct e1000_tx_desc *tx_desc; 1620 uint16_t length = 0; 1621 boolean_t eop = B_FALSE; 1622 struct e1000g *Adapter; 1623 1624 Adapter = tx_ring->adapter; 1625 hw = &Adapter->shared; 1626 1627 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0)); 1628 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first; 1629 1630 while (hw_tdt != sw_tdt) { 1631 tx_desc = &(tx_ring->tbd_first[hw_tdt]); 1632 length += tx_desc->lower.flags.length; 1633 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; 1634 if (++hw_tdt == Adapter->tx_desc_num) 1635 hw_tdt = 0; 1636 1637 if (eop) { 1638 if ((Adapter->link_duplex == HALF_DUPLEX) && 1639 (e1000_fifo_workaround_82547(hw, length) 1640 != E1000_SUCCESS)) { 1641 if (tx_ring->timer_enable_82547) { 1642 ASSERT(tx_ring->timer_id_82547 == 0); 1643 tx_ring->timer_id_82547 = 1644 timeout(e1000g_82547_timeout, 1645 (void *)tx_ring, 1646 drv_usectohz(10000)); 1647 } 1648 return; 1649 1650 } else { 1651 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt); 1652 e1000_update_tx_fifo_head_82547(hw, length); 1653 length = 0; 1654 } 1655 } 1656 } 1657 } 1658 1659 static void 1660 e1000g_82547_timeout(void *arg) 1661 { 1662 e1000g_tx_ring_t *tx_ring; 1663 1664 tx_ring = (e1000g_tx_ring_t *)arg; 1665 1666 mutex_enter(&tx_ring->tx_lock); 1667 1668 tx_ring->timer_id_82547 = 0; 1669 e1000g_82547_tx_move_tail_work(tx_ring); 1670 1671 mutex_exit(&tx_ring->tx_lock); 1672 } 1673 1674 static void 1675 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring) 1676 { 1677 timeout_id_t tid; 1678 1679 ASSERT(MUTEX_HELD(&tx_ring->tx_lock)); 1680 1681 tid = tx_ring->timer_id_82547; 1682 tx_ring->timer_id_82547 = 0; 1683 if (tid != 0) { 1684 tx_ring->timer_enable_82547 = B_FALSE; 1685 mutex_exit(&tx_ring->tx_lock); 1686 1687 (void) untimeout(tid); 1688 1689 mutex_enter(&tx_ring->tx_lock); 1690 } 1691 tx_ring->timer_enable_82547 = B_TRUE; 1692 e1000g_82547_tx_move_tail_work(tx_ring); 1693 } 1694