1 /* 2 * This file is provided under a CDDLv1 license. When using or 3 * redistributing this file, you may do so under this license. 4 * In redistributing this file this license must be included 5 * and no other modification of this header file is permitted. 6 * 7 * CDDL LICENSE SUMMARY 8 * 9 * Copyright(c) 1999 - 2008 Intel Corporation. All rights reserved. 10 * 11 * The contents of this file are subject to the terms of Version 12 * 1.0 of the Common Development and Distribution License (the "License"). 13 * 14 * You should have received a copy of the License with this software. 15 * You can obtain a copy of the License at 16 * http://www.opensolaris.org/os/licensing. 17 * See the License for the specific language governing permissions 18 * and limitations under the License. 19 */ 20 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * ********************************************************************** 28 * * 29 * Module Name: * 30 * e1000g_tx.c * 31 * * 32 * Abstract: * 33 * This file contains some routines that take care of Transmit, * 34 * make the hardware to send the data pointed by the packet out * 35 * on to the physical medium. * 36 * * 37 * ********************************************************************** 38 */ 39 40 #include "e1000g_sw.h" 41 #include "e1000g_debug.h" 42 43 static boolean_t e1000g_send(struct e1000g *, mblk_t *); 44 static int e1000g_tx_copy(e1000g_tx_ring_t *, 45 p_tx_sw_packet_t, mblk_t *, boolean_t); 46 static int e1000g_tx_bind(e1000g_tx_ring_t *, 47 p_tx_sw_packet_t, mblk_t *); 48 static boolean_t e1000g_retrieve_context(mblk_t *, context_data_t *, size_t); 49 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *); 50 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *, 51 context_data_t *); 52 static void e1000g_fill_context_descriptor(context_data_t *, 53 struct e1000_context_desc *); 54 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *, 55 p_tx_sw_packet_t, uint64_t, size_t); 56 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length, 57 p_desc_array_t desc_array); 58 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t); 59 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t); 60 static void e1000g_82547_timeout(void *); 61 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *); 62 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *); 63 64 #ifndef E1000G_DEBUG 65 #pragma inline(e1000g_tx_copy) 66 #pragma inline(e1000g_tx_bind) 67 #pragma inline(e1000g_retrieve_context) 68 #pragma inline(e1000g_check_context) 69 #pragma inline(e1000g_fill_tx_ring) 70 #pragma inline(e1000g_fill_context_descriptor) 71 #pragma inline(e1000g_fill_tx_desc) 72 #pragma inline(e1000g_fill_82544_desc) 73 #pragma inline(e1000g_tx_workaround_PCIX_82544) 74 #pragma inline(e1000g_tx_workaround_jumbo_82544) 75 #pragma inline(e1000g_free_tx_swpkt) 76 #endif 77 78 /* 79 * e1000g_free_tx_swpkt - free up the tx sw packet 80 * 81 * Unbind the previously bound DMA handle for a given 82 * transmit sw packet. And reset the sw packet data. 83 */ 84 void 85 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet) 86 { 87 switch (packet->data_transfer_type) { 88 case USE_BCOPY: 89 packet->tx_buf->len = 0; 90 break; 91 #ifdef __sparc 92 case USE_DVMA: 93 dvma_unload(packet->tx_dma_handle, 0, -1); 94 break; 95 #endif 96 case USE_DMA: 97 (void) ddi_dma_unbind_handle(packet->tx_dma_handle); 98 break; 99 default: 100 break; 101 } 102 103 /* 104 * The mblk has been stripped off the sw packet 105 * and will be freed in a triggered soft intr. 106 */ 107 ASSERT(packet->mp == NULL); 108 109 packet->data_transfer_type = USE_NONE; 110 packet->num_mblk_frag = 0; 111 packet->num_desc = 0; 112 } 113 114 mblk_t * 115 e1000g_m_tx(void *arg, mblk_t *mp) 116 { 117 struct e1000g *Adapter = (struct e1000g *)arg; 118 mblk_t *next; 119 120 rw_enter(&Adapter->chip_lock, RW_READER); 121 122 if ((Adapter->chip_state != E1000G_START) || 123 (Adapter->link_state != LINK_STATE_UP)) { 124 freemsgchain(mp); 125 mp = NULL; 126 } 127 128 while (mp != NULL) { 129 next = mp->b_next; 130 mp->b_next = NULL; 131 132 if (!e1000g_send(Adapter, mp)) { 133 mp->b_next = next; 134 break; 135 } 136 137 mp = next; 138 } 139 140 rw_exit(&Adapter->chip_lock); 141 return (mp); 142 } 143 144 /* 145 * e1000g_send - send packets onto the wire 146 * 147 * Called from e1000g_m_tx with an mblk ready to send. this 148 * routine sets up the transmit descriptors and sends data to 149 * the wire. It also pushes the just transmitted packet to 150 * the used tx sw packet list. 151 */ 152 static boolean_t 153 e1000g_send(struct e1000g *Adapter, mblk_t *mp) 154 { 155 p_tx_sw_packet_t packet; 156 LIST_DESCRIBER pending_list; 157 size_t len; 158 size_t msg_size; 159 uint32_t frag_count; 160 int desc_count; 161 uint32_t desc_total; 162 uint32_t bcopy_thresh; 163 uint32_t hdr_frag_len; 164 boolean_t tx_undersize_flag; 165 mblk_t *nmp; 166 mblk_t *tmp; 167 mblk_t *new_mp; 168 mblk_t *pre_mp; 169 e1000g_tx_ring_t *tx_ring; 170 context_data_t cur_context; 171 172 tx_ring = Adapter->tx_ring; 173 bcopy_thresh = Adapter->tx_bcopy_thresh; 174 175 /* Get the total size and frags number of the message */ 176 tx_undersize_flag = B_FALSE; 177 frag_count = 0; 178 msg_size = 0; 179 for (nmp = mp; nmp; nmp = nmp->b_cont) { 180 frag_count++; 181 msg_size += MBLKL(nmp); 182 } 183 184 /* retrieve and compute information for context descriptor */ 185 if (!e1000g_retrieve_context(mp, &cur_context, msg_size)) { 186 freemsg(mp); 187 return (B_TRUE); 188 } 189 190 /* 191 * Make sure the packet is less than the allowed size 192 */ 193 if (!cur_context.lso_flag && 194 (msg_size > Adapter->max_frame_size - ETHERFCSL)) { 195 /* 196 * For the over size packet, we'll just drop it. 197 * So we return B_TRUE here. 198 */ 199 E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL, 200 "Tx packet out of bound. length = %d \n", msg_size); 201 E1000G_STAT(tx_ring->stat_over_size); 202 freemsg(mp); 203 return (B_TRUE); 204 } 205 206 /* 207 * Check and reclaim tx descriptors. 208 * This low water mark check should be done all the time as 209 * Transmit interrupt delay can produce Transmit interrupts little 210 * late and that may cause few problems related to reaping Tx 211 * Descriptors... As you may run short of them before getting any 212 * transmit interrupt... 213 */ 214 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 215 (void) e1000g_recycle(tx_ring); 216 E1000G_DEBUG_STAT(tx_ring->stat_recycle); 217 218 if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) { 219 E1000G_DEBUG_STAT(tx_ring->stat_lack_desc); 220 goto tx_no_resource; 221 } 222 } 223 224 /* 225 * If the message size is less than the minimum ethernet packet size, 226 * we'll use bcopy to send it, and padd it to 60 bytes later. 227 */ 228 if (msg_size < ETHERMIN) { 229 E1000G_DEBUG_STAT(tx_ring->stat_under_size); 230 tx_undersize_flag = B_TRUE; 231 } 232 233 /* Initialize variables */ 234 desc_count = 1; /* The initial value should be greater than 0 */ 235 desc_total = 0; 236 QUEUE_INIT_LIST(&pending_list); 237 238 /* Process each mblk fragment and fill tx descriptors */ 239 /* 240 * The software should guarantee LSO packet header(MAC+IP+TCP) 241 * to be within one descriptor. Here we reallocate and refill the 242 * the header if it's physical memory non-contiguous. 243 */ 244 if (cur_context.lso_flag) { 245 /* find the last fragment of the header */ 246 len = MBLKL(mp); 247 ASSERT(len > 0); 248 nmp = mp; 249 pre_mp = NULL; 250 while (len < cur_context.hdr_len) { 251 pre_mp = nmp; 252 nmp = nmp->b_cont; 253 len += MBLKL(nmp); 254 } 255 /* 256 * If the header and the payload are in different mblks, 257 * we simply force the header to be copied into pre-allocated 258 * page-aligned buffer. 259 */ 260 if (len == cur_context.hdr_len) 261 goto adjust_threshold; 262 263 hdr_frag_len = cur_context.hdr_len - (len - MBLKL(nmp)); 264 /* 265 * There are two cases we need to reallocate a mblk for the 266 * last header fragment: 267 * 1. the header is in multiple mblks and the last fragment 268 * share the same mblk with the payload 269 * 2. the header is in a single mblk shared with the payload 270 * and the header is physical memory non-contiguous 271 */ 272 if ((nmp != mp) || 273 (P2NPHASE((uintptr_t)nmp->b_rptr, Adapter->sys_page_sz) 274 < len)) { 275 E1000G_DEBUG_STAT(tx_ring->stat_lso_header_fail); 276 /* 277 * reallocate the mblk for the last header fragment, 278 * expect to bcopy into pre-allocated page-aligned 279 * buffer 280 */ 281 new_mp = allocb(hdr_frag_len, NULL); 282 if (!new_mp) 283 return (B_FALSE); 284 bcopy(nmp->b_rptr, new_mp->b_rptr, hdr_frag_len); 285 /* link the new header fragment with the other parts */ 286 new_mp->b_wptr = new_mp->b_rptr + hdr_frag_len; 287 new_mp->b_cont = nmp; 288 if (pre_mp) 289 pre_mp->b_cont = new_mp; 290 nmp->b_rptr += hdr_frag_len; 291 if (hdr_frag_len == cur_context.hdr_len) 292 mp = new_mp; 293 frag_count ++; 294 } 295 adjust_threshold: 296 /* 297 * adjust the bcopy threshhold to guarantee 298 * the header to use bcopy way 299 */ 300 if (bcopy_thresh < cur_context.hdr_len) 301 bcopy_thresh = cur_context.hdr_len; 302 } 303 304 packet = NULL; 305 nmp = mp; 306 while (nmp) { 307 tmp = nmp->b_cont; 308 309 len = MBLKL(nmp); 310 /* Check zero length mblks */ 311 if (len == 0) { 312 E1000G_DEBUG_STAT(tx_ring->stat_empty_frags); 313 /* 314 * If there're no packet buffers have been used, 315 * or we just completed processing a buffer, then 316 * skip the empty mblk fragment. 317 * Otherwise, there's still a pending buffer that 318 * needs to be processed (tx_copy). 319 */ 320 if (desc_count > 0) { 321 nmp = tmp; 322 continue; 323 } 324 } 325 326 /* 327 * Get a new TxSwPacket to process mblk buffers. 328 */ 329 if (desc_count > 0) { 330 mutex_enter(&tx_ring->freelist_lock); 331 packet = (p_tx_sw_packet_t) 332 QUEUE_POP_HEAD(&tx_ring->free_list); 333 mutex_exit(&tx_ring->freelist_lock); 334 335 if (packet == NULL) { 336 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 337 "No Tx SwPacket available\n"); 338 E1000G_STAT(tx_ring->stat_no_swpkt); 339 goto tx_send_failed; 340 } 341 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 342 } 343 344 ASSERT(packet); 345 /* 346 * If the size of the fragment is less than the tx_bcopy_thresh 347 * we'll use bcopy; Otherwise, we'll use DMA binding. 348 */ 349 if ((len <= bcopy_thresh) || tx_undersize_flag) { 350 desc_count = 351 e1000g_tx_copy(tx_ring, packet, nmp, 352 tx_undersize_flag); 353 E1000G_DEBUG_STAT(tx_ring->stat_copy); 354 } else { 355 desc_count = 356 e1000g_tx_bind(tx_ring, packet, nmp); 357 E1000G_DEBUG_STAT(tx_ring->stat_bind); 358 } 359 360 if (desc_count > 0) 361 desc_total += desc_count; 362 else if (desc_count < 0) 363 goto tx_send_failed; 364 365 nmp = tmp; 366 } 367 368 /* Assign the message to the last sw packet */ 369 ASSERT(packet); 370 ASSERT(packet->mp == NULL); 371 packet->mp = mp; 372 373 /* Try to recycle the tx descriptors again */ 374 if (tx_ring->tbd_avail < (desc_total + 2)) { 375 E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry); 376 (void) e1000g_recycle(tx_ring); 377 } 378 379 mutex_enter(&tx_ring->tx_lock); 380 381 /* 382 * If the number of available tx descriptors is not enough for transmit 383 * (one redundant descriptor and one hw checksum context descriptor are 384 * included), then return failure. 385 */ 386 if (tx_ring->tbd_avail < (desc_total + 2)) { 387 E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL, 388 "No Enough Tx descriptors\n"); 389 E1000G_STAT(tx_ring->stat_no_desc); 390 mutex_exit(&tx_ring->tx_lock); 391 goto tx_send_failed; 392 } 393 394 desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context); 395 396 mutex_exit(&tx_ring->tx_lock); 397 398 ASSERT(desc_count > 0); 399 400 /* Send successful */ 401 return (B_TRUE); 402 403 tx_send_failed: 404 /* 405 * Enable Transmit interrupts, so that the interrupt routine can 406 * call mac_tx_update() when transmit descriptors become available. 407 */ 408 tx_ring->resched_timestamp = ddi_get_lbolt(); 409 tx_ring->resched_needed = B_TRUE; 410 if (!Adapter->tx_intr_enable) 411 e1000g_mask_tx_interrupt(Adapter); 412 413 /* Free pending TxSwPackets */ 414 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 415 while (packet) { 416 packet->mp = NULL; 417 e1000g_free_tx_swpkt(packet); 418 packet = (p_tx_sw_packet_t) 419 QUEUE_GET_NEXT(&pending_list, &packet->Link); 420 } 421 422 /* Return pending TxSwPackets to the "Free" list */ 423 mutex_enter(&tx_ring->freelist_lock); 424 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 425 mutex_exit(&tx_ring->freelist_lock); 426 427 E1000G_STAT(tx_ring->stat_send_fail); 428 429 /* Message will be scheduled for re-transmit */ 430 return (B_FALSE); 431 432 tx_no_resource: 433 /* 434 * Enable Transmit interrupts, so that the interrupt routine can 435 * call mac_tx_update() when transmit descriptors become available. 436 */ 437 tx_ring->resched_timestamp = ddi_get_lbolt(); 438 tx_ring->resched_needed = B_TRUE; 439 if (!Adapter->tx_intr_enable) 440 e1000g_mask_tx_interrupt(Adapter); 441 442 /* Message will be scheduled for re-transmit */ 443 return (B_FALSE); 444 } 445 446 static boolean_t 447 e1000g_retrieve_context(mblk_t *mp, context_data_t *cur_context, 448 size_t msg_size) 449 { 450 uintptr_t ip_start; 451 uintptr_t tcp_start; 452 mblk_t *nmp; 453 uint32_t lsoflags; 454 uint32_t mss; 455 456 bzero(cur_context, sizeof (context_data_t)); 457 458 /* first check lso information */ 459 lso_info_get(mp, &mss, &lsoflags); 460 461 /* retrieve checksum info */ 462 hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start, 463 &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags); 464 /* retrieve ethernet header size */ 465 if (((struct ether_vlan_header *)(uintptr_t)mp->b_rptr)->ether_tpid == 466 htons(ETHERTYPE_VLAN)) 467 cur_context->ether_header_size = 468 sizeof (struct ether_vlan_header); 469 else 470 cur_context->ether_header_size = 471 sizeof (struct ether_header); 472 473 if (lsoflags & HW_LSO) { 474 ASSERT(mss != 0); 475 476 /* free the invalid packet */ 477 if (mss == 0 || 478 !((cur_context->cksum_flags & HCK_PARTIALCKSUM) && 479 (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) { 480 return (B_FALSE); 481 } 482 cur_context->mss = (uint16_t)mss; 483 cur_context->lso_flag = B_TRUE; 484 485 /* 486 * Some fields are cleared for the hardware to fill 487 * in. We don't assume Ethernet header, IP header and 488 * TCP header are always in the same mblk fragment, 489 * while we assume each header is always within one 490 * mblk fragment and Ethernet header is always in the 491 * first mblk fragment. 492 */ 493 nmp = mp; 494 ip_start = (uintptr_t)(nmp->b_rptr) 495 + cur_context->ether_header_size; 496 if (ip_start >= (uintptr_t)(nmp->b_wptr)) { 497 ip_start = (uintptr_t)nmp->b_cont->b_rptr 498 + (ip_start - (uintptr_t)(nmp->b_wptr)); 499 nmp = nmp->b_cont; 500 } 501 tcp_start = ip_start + 502 IPH_HDR_LENGTH((ipha_t *)ip_start); 503 if (tcp_start >= (uintptr_t)(nmp->b_wptr)) { 504 tcp_start = (uintptr_t)nmp->b_cont->b_rptr 505 + (tcp_start - (uintptr_t)(nmp->b_wptr)); 506 nmp = nmp->b_cont; 507 } 508 cur_context->hdr_len = cur_context->ether_header_size 509 + IPH_HDR_LENGTH((ipha_t *)ip_start) 510 + TCP_HDR_LENGTH((tcph_t *)tcp_start); 511 ((ipha_t *)ip_start)->ipha_length = 0; 512 ((ipha_t *)ip_start)->ipha_hdr_checksum = 0; 513 /* calculate the TCP packet payload length */ 514 cur_context->pay_len = msg_size - cur_context->hdr_len; 515 } 516 return (B_TRUE); 517 } 518 519 static boolean_t 520 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context) 521 { 522 boolean_t context_reload; 523 context_data_t *pre_context; 524 struct e1000g *Adapter; 525 526 context_reload = B_FALSE; 527 pre_context = &tx_ring->pre_context; 528 Adapter = tx_ring->adapter; 529 530 /* 531 * The following code determine if the context descriptor is 532 * needed to be reloaded. The sequence of the conditions is 533 * made by their possibilities of changing. 534 */ 535 /* 536 * workaround for 82546EB, context descriptor must be reloaded 537 * per LSO/hw_cksum packet if LSO is enabled. 538 */ 539 if (Adapter->lso_premature_issue && 540 Adapter->lso_enable && 541 (cur_context->cksum_flags != 0)) { 542 543 context_reload = B_TRUE; 544 } else if (cur_context->lso_flag) { 545 if ((cur_context->lso_flag != pre_context->lso_flag) || 546 (cur_context->cksum_flags != pre_context->cksum_flags) || 547 (cur_context->pay_len != pre_context->pay_len) || 548 (cur_context->mss != pre_context->mss) || 549 (cur_context->hdr_len != pre_context->hdr_len) || 550 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 551 (cur_context->cksum_start != pre_context->cksum_start) || 552 (cur_context->ether_header_size != 553 pre_context->ether_header_size)) { 554 555 context_reload = B_TRUE; 556 } 557 } else if (cur_context->cksum_flags != 0) { 558 if ((cur_context->lso_flag != pre_context->lso_flag) || 559 (cur_context->cksum_flags != pre_context->cksum_flags) || 560 (cur_context->cksum_stuff != pre_context->cksum_stuff) || 561 (cur_context->cksum_start != pre_context->cksum_start) || 562 (cur_context->ether_header_size != 563 pre_context->ether_header_size)) { 564 565 context_reload = B_TRUE; 566 } 567 } 568 569 return (context_reload); 570 } 571 572 static int 573 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list, 574 context_data_t *cur_context) 575 { 576 struct e1000g *Adapter; 577 struct e1000_hw *hw; 578 p_tx_sw_packet_t first_packet; 579 p_tx_sw_packet_t packet; 580 p_tx_sw_packet_t previous_packet; 581 boolean_t context_reload; 582 struct e1000_tx_desc *first_data_desc; 583 struct e1000_tx_desc *next_desc; 584 struct e1000_tx_desc *descriptor; 585 int desc_count; 586 boolean_t buff_overrun_flag; 587 int i; 588 589 Adapter = tx_ring->adapter; 590 hw = &Adapter->shared; 591 592 desc_count = 0; 593 first_packet = NULL; 594 first_data_desc = NULL; 595 descriptor = NULL; 596 first_packet = NULL; 597 packet = NULL; 598 buff_overrun_flag = B_FALSE; 599 600 next_desc = tx_ring->tbd_next; 601 602 /* Context descriptor reload check */ 603 context_reload = e1000g_check_context(tx_ring, cur_context); 604 605 if (context_reload) { 606 first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 607 608 descriptor = next_desc; 609 610 e1000g_fill_context_descriptor(cur_context, 611 (struct e1000_context_desc *)descriptor); 612 613 /* Check the wrap-around case */ 614 if (descriptor == tx_ring->tbd_last) 615 next_desc = tx_ring->tbd_first; 616 else 617 next_desc++; 618 619 desc_count++; 620 } 621 622 first_data_desc = next_desc; 623 624 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list); 625 while (packet) { 626 ASSERT(packet->num_desc); 627 628 for (i = 0; i < packet->num_desc; i++) { 629 ASSERT(tx_ring->tbd_avail > 0); 630 631 descriptor = next_desc; 632 descriptor->buffer_addr = 633 packet->desc[i].address; 634 descriptor->lower.data = 635 packet->desc[i].length; 636 637 /* Zero out status */ 638 descriptor->upper.data = 0; 639 640 descriptor->lower.data |= 641 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 642 /* must set RS on every outgoing descriptor */ 643 descriptor->lower.data |= 644 E1000_TXD_CMD_RS; 645 646 if (cur_context->lso_flag) 647 descriptor->lower.data |= E1000_TXD_CMD_TSE; 648 649 /* Check the wrap-around case */ 650 if (descriptor == tx_ring->tbd_last) 651 next_desc = tx_ring->tbd_first; 652 else 653 next_desc++; 654 655 desc_count++; 656 657 /* 658 * workaround for 82546EB errata 33, hang in PCI-X 659 * systems due to 2k Buffer Overrun during Transmit 660 * Operation. The workaround applies to all the Intel 661 * PCI-X chips. 662 */ 663 if (hw->bus.type == e1000_bus_type_pcix && 664 descriptor == first_data_desc && 665 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) 666 > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) { 667 /* modified the first descriptor */ 668 descriptor->lower.data &= 669 ~E1000G_TBD_LENGTH_MASK; 670 descriptor->lower.flags.length = 671 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 672 673 /* insert a new descriptor */ 674 ASSERT(tx_ring->tbd_avail > 0); 675 next_desc->buffer_addr = 676 packet->desc[0].address + 677 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 678 next_desc->lower.data = 679 packet->desc[0].length - 680 E1000_TX_BUFFER_OEVRRUN_THRESHOLD; 681 682 /* Zero out status */ 683 next_desc->upper.data = 0; 684 685 next_desc->lower.data |= 686 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D; 687 /* must set RS on every outgoing descriptor */ 688 next_desc->lower.data |= 689 E1000_TXD_CMD_RS; 690 691 if (cur_context->lso_flag) 692 next_desc->lower.data |= 693 E1000_TXD_CMD_TSE; 694 695 descriptor = next_desc; 696 697 /* Check the wrap-around case */ 698 if (next_desc == tx_ring->tbd_last) 699 next_desc = tx_ring->tbd_first; 700 else 701 next_desc++; 702 703 desc_count++; 704 buff_overrun_flag = B_TRUE; 705 } 706 } 707 708 if (buff_overrun_flag) { 709 packet->num_desc++; 710 buff_overrun_flag = B_FALSE; 711 } 712 713 if (first_packet != NULL) { 714 /* 715 * Count the checksum context descriptor for 716 * the first SwPacket. 717 */ 718 first_packet->num_desc++; 719 first_packet = NULL; 720 } 721 722 previous_packet = packet; 723 packet = (p_tx_sw_packet_t) 724 QUEUE_GET_NEXT(pending_list, &packet->Link); 725 } 726 727 /* 728 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back 729 */ 730 if (Adapter->lso_premature_issue && cur_context->lso_flag && 731 ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) { 732 /* modified the previous descriptor */ 733 descriptor->lower.data -= 4; 734 735 /* insert a new descriptor */ 736 ASSERT(tx_ring->tbd_avail > 0); 737 /* the lower 20 bits of lower.data is the length field */ 738 next_desc->buffer_addr = 739 descriptor->buffer_addr + 740 (descriptor->lower.data & E1000G_TBD_LENGTH_MASK); 741 next_desc->lower.data = 4; 742 743 /* Zero out status */ 744 next_desc->upper.data = 0; 745 /* It must be part of a LSO packet */ 746 next_desc->lower.data |= 747 E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D | 748 E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE; 749 750 descriptor = next_desc; 751 752 /* Check the wrap-around case */ 753 if (descriptor == tx_ring->tbd_last) 754 next_desc = tx_ring->tbd_first; 755 else 756 next_desc++; 757 758 desc_count++; 759 /* update the number of descriptors */ 760 previous_packet->num_desc++; 761 } 762 763 ASSERT(descriptor); 764 765 if (cur_context->cksum_flags) { 766 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) 767 ((struct e1000_data_desc *)first_data_desc)-> 768 upper.fields.popts |= E1000_TXD_POPTS_IXSM; 769 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) 770 ((struct e1000_data_desc *)first_data_desc)-> 771 upper.fields.popts |= E1000_TXD_POPTS_TXSM; 772 } 773 774 /* 775 * Last Descriptor of Packet needs End Of Packet (EOP), Report 776 * Status (RS) set. 777 */ 778 if (Adapter->tx_intr_delay) { 779 descriptor->lower.data |= E1000_TXD_CMD_IDE | 780 E1000_TXD_CMD_EOP; 781 } else { 782 descriptor->lower.data |= E1000_TXD_CMD_EOP; 783 } 784 785 /* Set append Ethernet CRC (IFCS) bits */ 786 if (cur_context->lso_flag) { 787 first_data_desc->lower.data |= E1000_TXD_CMD_IFCS; 788 } else { 789 descriptor->lower.data |= E1000_TXD_CMD_IFCS; 790 } 791 792 /* 793 * Sync the Tx descriptors DMA buffer 794 */ 795 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 796 0, 0, DDI_DMA_SYNC_FORDEV); 797 798 tx_ring->tbd_next = next_desc; 799 800 /* 801 * Advance the Transmit Descriptor Tail (Tdt), this tells the 802 * FX1000 that this frame is available to transmit. 803 */ 804 if (hw->mac.type == e1000_82547) 805 e1000g_82547_tx_move_tail(tx_ring); 806 else 807 E1000_WRITE_REG(hw, E1000_TDT(0), 808 (uint32_t)(next_desc - tx_ring->tbd_first)); 809 810 if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) { 811 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 812 Adapter->chip_state = E1000G_ERROR; 813 } 814 815 /* Put the pending SwPackets to the "Used" list */ 816 mutex_enter(&tx_ring->usedlist_lock); 817 QUEUE_APPEND(&tx_ring->used_list, pending_list); 818 tx_ring->tbd_avail -= desc_count; 819 mutex_exit(&tx_ring->usedlist_lock); 820 821 /* update LSO related data */ 822 if (context_reload) 823 tx_ring->pre_context = *cur_context; 824 825 return (desc_count); 826 } 827 828 /* 829 * e1000g_tx_setup - setup tx data structures 830 * 831 * This routine initializes all of the transmit related 832 * structures. This includes the Transmit descriptors, 833 * and the tx_sw_packet structures. 834 */ 835 void 836 e1000g_tx_setup(struct e1000g *Adapter) 837 { 838 struct e1000_hw *hw; 839 p_tx_sw_packet_t packet; 840 uint32_t i; 841 uint32_t buf_high; 842 uint32_t buf_low; 843 uint32_t reg_tipg; 844 uint32_t reg_tctl; 845 int size; 846 e1000g_tx_ring_t *tx_ring; 847 848 hw = &Adapter->shared; 849 tx_ring = Adapter->tx_ring; 850 851 /* init the lists */ 852 /* 853 * Here we don't need to protect the lists using the 854 * usedlist_lock and freelist_lock, for they have 855 * been protected by the chip_lock. 856 */ 857 QUEUE_INIT_LIST(&tx_ring->used_list); 858 QUEUE_INIT_LIST(&tx_ring->free_list); 859 860 /* Go through and set up each SW_Packet */ 861 packet = tx_ring->packet_area; 862 for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) { 863 /* Initialize this tx_sw_apcket area */ 864 e1000g_free_tx_swpkt(packet); 865 /* Add this tx_sw_packet to the free list */ 866 QUEUE_PUSH_TAIL(&tx_ring->free_list, 867 &packet->Link); 868 } 869 870 /* Setup TX descriptor pointers */ 871 tx_ring->tbd_next = tx_ring->tbd_first; 872 tx_ring->tbd_oldest = tx_ring->tbd_first; 873 874 /* 875 * Setup Hardware TX Registers 876 */ 877 /* Setup the Transmit Control Register (TCTL). */ 878 reg_tctl = E1000_READ_REG(hw, E1000_TCTL); 879 reg_tctl |= E1000_TCTL_PSP | E1000_TCTL_EN | 880 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) | 881 (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) | 882 E1000_TCTL_RTLC; 883 884 /* Enable the MULR bit */ 885 if (hw->bus.type == e1000_bus_type_pci_express) 886 reg_tctl |= E1000_TCTL_MULR; 887 888 E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl); 889 890 /* Setup HW Base and Length of Tx descriptor area */ 891 size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc)); 892 E1000_WRITE_REG(hw, E1000_TDLEN(0), size); 893 size = E1000_READ_REG(hw, E1000_TDLEN(0)); 894 895 buf_low = (uint32_t)tx_ring->tbd_dma_addr; 896 buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32); 897 898 /* 899 * Write the highest location first and work backward to the lowest. 900 * This is necessary for some adapter types to 901 * prevent write combining from occurring. 902 */ 903 E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high); 904 E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low); 905 906 /* Setup our HW Tx Head & Tail descriptor pointers */ 907 E1000_WRITE_REG(hw, E1000_TDH(0), 0); 908 E1000_WRITE_REG(hw, E1000_TDT(0), 0); 909 910 /* Set the default values for the Tx Inter Packet Gap timer */ 911 if ((hw->mac.type == e1000_82542) && 912 ((hw->revision_id == E1000_REVISION_2) || 913 (hw->revision_id == E1000_REVISION_3))) { 914 reg_tipg = DEFAULT_82542_TIPG_IPGT; 915 reg_tipg |= 916 DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 917 reg_tipg |= 918 DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 919 } else if (hw->mac.type == e1000_80003es2lan) { 920 reg_tipg = DEFAULT_82543_TIPG_IPGR1; 921 reg_tipg |= DEFAULT_80003ES2LAN_TIPG_IPGR2 << 922 E1000_TIPG_IPGR2_SHIFT; 923 } else { 924 if (hw->phy.media_type == e1000_media_type_fiber) 925 reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER; 926 else 927 reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER; 928 reg_tipg |= 929 DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT; 930 reg_tipg |= 931 DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT; 932 } 933 E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg); 934 935 /* Setup Transmit Interrupt Delay Value */ 936 E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay); 937 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 938 "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay); 939 940 if (hw->mac.type >= e1000_82540) { 941 E1000_WRITE_REG(&Adapter->shared, E1000_TADV, 942 Adapter->tx_intr_abs_delay); 943 E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL, 944 "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay); 945 } 946 947 tx_ring->tbd_avail = Adapter->tx_desc_num; 948 949 /* Initialize stored context information */ 950 bzero(&(tx_ring->pre_context), sizeof (context_data_t)); 951 } 952 953 /* 954 * e1000g_recycle - recycle the tx descriptors and tx sw packets 955 */ 956 int 957 e1000g_recycle(e1000g_tx_ring_t *tx_ring) 958 { 959 struct e1000g *Adapter; 960 LIST_DESCRIBER pending_list; 961 p_tx_sw_packet_t packet; 962 mblk_t *mp; 963 mblk_t *nmp; 964 struct e1000_tx_desc *descriptor; 965 int desc_count; 966 967 /* 968 * This function will examine each TxSwPacket in the 'used' queue 969 * if the e1000g is done with it then the associated resources (Tx 970 * Descriptors) will be "freed" and the TxSwPacket will be 971 * returned to the 'free' queue. 972 */ 973 Adapter = tx_ring->adapter; 974 975 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list); 976 if (packet == NULL) { 977 tx_ring->recycle_fail = 0; 978 tx_ring->stall_watchdog = 0; 979 return (0); 980 } 981 982 desc_count = 0; 983 QUEUE_INIT_LIST(&pending_list); 984 985 /* Sync the Tx descriptor DMA buffer */ 986 (void) ddi_dma_sync(tx_ring->tbd_dma_handle, 987 0, 0, DDI_DMA_SYNC_FORKERNEL); 988 if (e1000g_check_dma_handle( 989 tx_ring->tbd_dma_handle) != DDI_FM_OK) { 990 ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED); 991 Adapter->chip_state = E1000G_ERROR; 992 return (0); 993 } 994 995 /* 996 * While there are still TxSwPackets in the used queue check them 997 */ 998 mutex_enter(&tx_ring->usedlist_lock); 999 while ((packet = 1000 (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) != NULL) { 1001 1002 /* 1003 * Get hold of the next descriptor that the e1000g will 1004 * report status back to (this will be the last descriptor 1005 * of a given sw packet). We only want to free the 1006 * sw packet (and it resources) if the e1000g is done 1007 * with ALL of the descriptors. If the e1000g is done 1008 * with the last one then it is done with all of them. 1009 */ 1010 ASSERT(packet->num_desc); 1011 descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1); 1012 1013 /* Check for wrap case */ 1014 if (descriptor > tx_ring->tbd_last) 1015 descriptor -= Adapter->tx_desc_num; 1016 1017 /* 1018 * If the descriptor done bit is set free TxSwPacket and 1019 * associated resources 1020 */ 1021 if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) { 1022 QUEUE_POP_HEAD(&tx_ring->used_list); 1023 QUEUE_PUSH_TAIL(&pending_list, &packet->Link); 1024 1025 if (descriptor == tx_ring->tbd_last) 1026 tx_ring->tbd_oldest = 1027 tx_ring->tbd_first; 1028 else 1029 tx_ring->tbd_oldest = 1030 descriptor + 1; 1031 1032 desc_count += packet->num_desc; 1033 } else { 1034 /* 1035 * Found a sw packet that the e1000g is not done 1036 * with then there is no reason to check the rest 1037 * of the queue. 1038 */ 1039 break; 1040 } 1041 } 1042 1043 tx_ring->tbd_avail += desc_count; 1044 Adapter->tx_pkt_cnt += desc_count; 1045 1046 mutex_exit(&tx_ring->usedlist_lock); 1047 1048 if (desc_count == 0) { 1049 tx_ring->recycle_fail++; 1050 E1000G_DEBUG_STAT(tx_ring->stat_recycle_none); 1051 return (0); 1052 } 1053 1054 tx_ring->recycle_fail = 0; 1055 tx_ring->stall_watchdog = 0; 1056 1057 mp = NULL; 1058 nmp = NULL; 1059 packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list); 1060 ASSERT(packet != NULL); 1061 while (packet != NULL) { 1062 if (packet->mp != NULL) { 1063 ASSERT(packet->mp->b_next == NULL); 1064 /* Assemble the message chain */ 1065 if (mp == NULL) { 1066 mp = packet->mp; 1067 nmp = packet->mp; 1068 } else { 1069 nmp->b_next = packet->mp; 1070 nmp = packet->mp; 1071 } 1072 /* Disconnect the message from the sw packet */ 1073 packet->mp = NULL; 1074 } 1075 1076 /* Free the TxSwPackets */ 1077 e1000g_free_tx_swpkt(packet); 1078 1079 packet = (p_tx_sw_packet_t) 1080 QUEUE_GET_NEXT(&pending_list, &packet->Link); 1081 } 1082 1083 /* Return the TxSwPackets back to the FreeList */ 1084 mutex_enter(&tx_ring->freelist_lock); 1085 QUEUE_APPEND(&tx_ring->free_list, &pending_list); 1086 mutex_exit(&tx_ring->freelist_lock); 1087 1088 if (mp != NULL) 1089 freemsgchain(mp); 1090 1091 return (desc_count); 1092 } 1093 /* 1094 * 82544 Coexistence issue workaround: 1095 * There are 2 issues. 1096 * 1. If a 32 bit split completion happens from P64H2 and another 1097 * agent drives a 64 bit request/split completion after ONLY 1098 * 1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then 1099 * 82544 has a problem where in to clock all the data in, it 1100 * looks at REQ64# signal and since it has changed so fast (i.e. 1 1101 * idle clock turn around), it will fail to clock all the data in. 1102 * Data coming from certain ending addresses has exposure to this issue. 1103 * 1104 * To detect this issue, following equation can be used... 1105 * SIZE[3:0] + ADDR[2:0] = SUM[3:0]. 1106 * If SUM[3:0] is in between 1 to 4, we will have this issue. 1107 * 1108 * ROOT CAUSE: 1109 * The erratum involves the 82544 PCIX elasticity FIFO implementations as 1110 * 64-bit FIFO's and flushing of the final partial-bytes corresponding 1111 * to the end of a requested read burst. Under a specific burst condition 1112 * of ending-data alignment and 32-byte split-completions, the final 1113 * byte(s) of split-completion data require an extra clock cycle to flush 1114 * into 64-bit FIFO orientation. An incorrect logic dependency on the 1115 * REQ64# signal occurring during during this clock cycle may cause the 1116 * residual byte(s) to be lost, thereby rendering the internal DMA client 1117 * forever awaiting the final byte(s) for an outbound data-fetch. The 1118 * erratum is confirmed to *only* occur if certain subsequent external 1119 * 64-bit PCIX bus transactions occur immediately (minimum possible bus 1120 * turn- around) following the odd-aligned 32-bit split-completion 1121 * containing the final byte(s). Intel has confirmed that this has been 1122 * seen only with chipset/bridges which have the capability to provide 1123 * 32-bit split-completion data, and in the presence of newer PCIX bus 1124 * agents which fully-optimize the inter-transaction turn-around (zero 1125 * additional initiator latency when pre-granted bus ownership). 1126 * 1127 * This issue does not exist in PCI bus mode, when any agent is operating 1128 * in 32 bit only mode or on chipsets that do not do 32 bit split 1129 * completions for 64 bit read requests (Serverworks chipsets). P64H2 does 1130 * 32 bit split completions for any read request that has bit 2 set to 1 1131 * for the requested address and read request size is more than 8 bytes. 1132 * 1133 * 2. Another issue is related to 82544 driving DACs under the similar 1134 * scenario (32 bit split completion followed by 64 bit transaction with 1135 * only 1 cycle turnaround). This issue is still being root caused. We 1136 * think that both of these issues can be avoided if following workaround 1137 * is implemented. It seems DAC issues is related to ending addresses being 1138 * 0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity 1139 * FIFO which does not get flushed due to REQ64# dependency. We will only 1140 * know the full story after it has been simulated successfully by HW team. 1141 * 1142 * WORKAROUND: 1143 * Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC) 1144 */ 1145 static uint32_t 1146 e1000g_fill_82544_desc(uint64_t address, 1147 size_t length, p_desc_array_t desc_array) 1148 { 1149 /* 1150 * Since issue is sensitive to length and address. 1151 * Let us first check the address... 1152 */ 1153 uint32_t safe_terminator; 1154 1155 if (length <= 4) { 1156 desc_array->descriptor[0].address = address; 1157 desc_array->descriptor[0].length = (uint32_t)length; 1158 desc_array->elements = 1; 1159 return (desc_array->elements); 1160 } 1161 safe_terminator = 1162 (uint32_t)((((uint32_t)address & 0x7) + 1163 (length & 0xF)) & 0xF); 1164 /* 1165 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then 1166 * return 1167 */ 1168 if (safe_terminator == 0 || 1169 (safe_terminator > 4 && safe_terminator < 9) || 1170 (safe_terminator > 0xC && safe_terminator <= 0xF)) { 1171 desc_array->descriptor[0].address = address; 1172 desc_array->descriptor[0].length = (uint32_t)length; 1173 desc_array->elements = 1; 1174 return (desc_array->elements); 1175 } 1176 1177 desc_array->descriptor[0].address = address; 1178 desc_array->descriptor[0].length = length - 4; 1179 desc_array->descriptor[1].address = address + (length - 4); 1180 desc_array->descriptor[1].length = 4; 1181 desc_array->elements = 2; 1182 return (desc_array->elements); 1183 } 1184 1185 static int 1186 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, 1187 mblk_t *mp, boolean_t tx_undersize_flag) 1188 { 1189 size_t len; 1190 size_t len1; 1191 dma_buffer_t *tx_buf; 1192 mblk_t *nmp; 1193 boolean_t finished; 1194 int desc_count; 1195 1196 desc_count = 0; 1197 tx_buf = packet->tx_buf; 1198 len = MBLKL(mp); 1199 1200 ASSERT((tx_buf->len + len) <= tx_buf->size); 1201 1202 if (len > 0) { 1203 bcopy(mp->b_rptr, 1204 tx_buf->address + tx_buf->len, 1205 len); 1206 tx_buf->len += len; 1207 1208 packet->num_mblk_frag++; 1209 } 1210 1211 nmp = mp->b_cont; 1212 if (nmp == NULL) { 1213 finished = B_TRUE; 1214 } else { 1215 len1 = MBLKL(nmp); 1216 if ((tx_buf->len + len1) > tx_buf->size) 1217 finished = B_TRUE; 1218 else if (tx_undersize_flag) 1219 finished = B_FALSE; 1220 else if (len1 > tx_ring->adapter->tx_bcopy_thresh) 1221 finished = B_TRUE; 1222 else 1223 finished = B_FALSE; 1224 } 1225 1226 if (finished) { 1227 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy, 1228 (tx_buf->len > len)); 1229 1230 /* 1231 * If the packet is smaller than 64 bytes, which is the 1232 * minimum ethernet packet size, pad the packet to make 1233 * it at least 60 bytes. The hardware will add 4 bytes 1234 * for CRC. 1235 */ 1236 if (tx_undersize_flag) { 1237 ASSERT(tx_buf->len < ETHERMIN); 1238 1239 bzero(tx_buf->address + tx_buf->len, 1240 ETHERMIN - tx_buf->len); 1241 tx_buf->len = ETHERMIN; 1242 } 1243 1244 #ifdef __sparc 1245 if (packet->dma_type == USE_DVMA) 1246 dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV); 1247 else 1248 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1249 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1250 #else 1251 (void) ddi_dma_sync(tx_buf->dma_handle, 0, 1252 tx_buf->len, DDI_DMA_SYNC_FORDEV); 1253 #endif 1254 1255 packet->data_transfer_type = USE_BCOPY; 1256 1257 desc_count = e1000g_fill_tx_desc(tx_ring, 1258 packet, 1259 tx_buf->dma_address, 1260 tx_buf->len); 1261 1262 if (desc_count <= 0) 1263 return (-1); 1264 } 1265 1266 return (desc_count); 1267 } 1268 1269 static int 1270 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp) 1271 { 1272 int j; 1273 int mystat; 1274 size_t len; 1275 ddi_dma_cookie_t dma_cookie; 1276 uint_t ncookies; 1277 int desc_count; 1278 uint32_t desc_total; 1279 1280 desc_total = 0; 1281 len = MBLKL(mp); 1282 1283 /* 1284 * ddi_dma_addr_bind_handle() allocates DMA resources for a 1285 * memory object such that a device can perform DMA to or from 1286 * the object. DMA resources are allocated considering the 1287 * device's DMA attributes as expressed by ddi_dma_attr(9S) 1288 * (see ddi_dma_alloc_handle(9F)). 1289 * 1290 * ddi_dma_addr_bind_handle() fills in the first DMA cookie 1291 * pointed to by cookiep with the appropriate address, length, 1292 * and bus type. *ccountp is set to the number of DMA cookies 1293 * representing this DMA object. Subsequent DMA cookies must be 1294 * retrieved by calling ddi_dma_nextcookie(9F) the number of 1295 * times specified by *countp - 1. 1296 */ 1297 switch (packet->dma_type) { 1298 #ifdef __sparc 1299 case USE_DVMA: 1300 dvma_kaddr_load(packet->tx_dma_handle, 1301 (caddr_t)mp->b_rptr, len, 0, &dma_cookie); 1302 1303 dvma_sync(packet->tx_dma_handle, 0, 1304 DDI_DMA_SYNC_FORDEV); 1305 1306 ncookies = 1; 1307 packet->data_transfer_type = USE_DVMA; 1308 break; 1309 #endif 1310 case USE_DMA: 1311 if ((mystat = ddi_dma_addr_bind_handle( 1312 packet->tx_dma_handle, NULL, 1313 (caddr_t)mp->b_rptr, len, 1314 DDI_DMA_WRITE | DDI_DMA_STREAMING, 1315 DDI_DMA_DONTWAIT, 0, &dma_cookie, 1316 &ncookies)) != DDI_DMA_MAPPED) { 1317 1318 e1000g_log(tx_ring->adapter, CE_WARN, 1319 "Couldn't bind mblk buffer to Tx DMA handle: " 1320 "return: %X, Pkt: %X\n", 1321 mystat, packet); 1322 return (-1); 1323 } 1324 1325 /* 1326 * An implicit ddi_dma_sync() is done when the 1327 * ddi_dma_addr_bind_handle() is called. So we 1328 * don't need to explicitly call ddi_dma_sync() 1329 * here any more. 1330 */ 1331 ASSERT(ncookies); 1332 E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie, 1333 (ncookies > 1)); 1334 1335 /* 1336 * The data_transfer_type value must be set after the handle 1337 * has been bound, for it will be used in e1000g_free_tx_swpkt() 1338 * to decide whether we need to unbind the handle. 1339 */ 1340 packet->data_transfer_type = USE_DMA; 1341 break; 1342 default: 1343 ASSERT(B_FALSE); 1344 break; 1345 } 1346 1347 packet->num_mblk_frag++; 1348 1349 /* 1350 * Each address could span thru multpile cookie.. 1351 * Each cookie will have one descriptor 1352 */ 1353 for (j = ncookies; j != 0; j--) { 1354 1355 desc_count = e1000g_fill_tx_desc(tx_ring, 1356 packet, 1357 dma_cookie.dmac_laddress, 1358 dma_cookie.dmac_size); 1359 1360 if (desc_count <= 0) 1361 return (-1); 1362 1363 desc_total += desc_count; 1364 1365 /* 1366 * ddi_dma_nextcookie() retrieves subsequent DMA 1367 * cookies for a DMA object. 1368 * ddi_dma_nextcookie() fills in the 1369 * ddi_dma_cookie(9S) structure pointed to by 1370 * cookiep. The ddi_dma_cookie(9S) structure 1371 * must be allocated prior to calling 1372 * ddi_dma_nextcookie(). The DMA cookie count 1373 * returned by ddi_dma_buf_bind_handle(9F), 1374 * ddi_dma_addr_bind_handle(9F), or 1375 * ddi_dma_getwin(9F) indicates the number of DMA 1376 * cookies a DMA object consists of. If the 1377 * resulting cookie count, N, is larger than 1, 1378 * ddi_dma_nextcookie() must be called N-1 times 1379 * to retrieve all DMA cookies. 1380 */ 1381 if (j > 1) { 1382 ddi_dma_nextcookie(packet->tx_dma_handle, 1383 &dma_cookie); 1384 } 1385 } 1386 1387 return (desc_total); 1388 } 1389 1390 static void 1391 e1000g_fill_context_descriptor(context_data_t *cur_context, 1392 struct e1000_context_desc *context_desc) 1393 { 1394 if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) { 1395 context_desc->lower_setup.ip_fields.ipcss = 1396 cur_context->ether_header_size; 1397 context_desc->lower_setup.ip_fields.ipcso = 1398 cur_context->ether_header_size + 1399 offsetof(struct ip, ip_sum); 1400 context_desc->lower_setup.ip_fields.ipcse = 1401 cur_context->ether_header_size + 1402 cur_context->cksum_start - 1; 1403 } else 1404 context_desc->lower_setup.ip_config = 0; 1405 1406 if (cur_context->cksum_flags & HCK_PARTIALCKSUM) { 1407 /* 1408 * The packet with same protocol has the following 1409 * stuff and start offset: 1410 * | Protocol | Stuff | Start | Checksum 1411 * | | Offset | Offset | Enable 1412 * | IPv4 + TCP | 0x24 | 0x14 | Yes 1413 * | IPv4 + UDP | 0x1A | 0x14 | Yes 1414 * | IPv6 + TCP | 0x20 | 0x10 | No 1415 * | IPv6 + UDP | 0x14 | 0x10 | No 1416 */ 1417 context_desc->upper_setup.tcp_fields.tucss = 1418 cur_context->cksum_start + cur_context->ether_header_size; 1419 context_desc->upper_setup.tcp_fields.tucso = 1420 cur_context->cksum_stuff + cur_context->ether_header_size; 1421 context_desc->upper_setup.tcp_fields.tucse = 0; 1422 } else 1423 context_desc->upper_setup.tcp_config = 0; 1424 1425 if (cur_context->lso_flag) { 1426 context_desc->tcp_seg_setup.fields.mss = cur_context->mss; 1427 context_desc->tcp_seg_setup.fields.hdr_len = 1428 cur_context->hdr_len; 1429 /* 1430 * workaround for 82546EB errata 23, status-writeback 1431 * reporting (RS) should not be set on context or 1432 * Null descriptors 1433 */ 1434 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1435 | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP 1436 | E1000_TXD_DTYP_C | cur_context->pay_len; 1437 } else { 1438 context_desc->cmd_and_length = E1000_TXD_CMD_DEXT 1439 | E1000_TXD_DTYP_C; 1440 /* 1441 * Zero out the options for TCP Segmentation Offload 1442 */ 1443 context_desc->tcp_seg_setup.data = 0; 1444 } 1445 } 1446 1447 static int 1448 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring, 1449 p_tx_sw_packet_t packet, uint64_t address, size_t size) 1450 { 1451 struct e1000_hw *hw = &tx_ring->adapter->shared; 1452 p_sw_desc_t desc; 1453 1454 if (hw->mac.type == e1000_82544) { 1455 if (hw->bus.type == e1000_bus_type_pcix) 1456 return (e1000g_tx_workaround_PCIX_82544(packet, 1457 address, size)); 1458 1459 if (size > JUMBO_FRAG_LENGTH) 1460 return (e1000g_tx_workaround_jumbo_82544(packet, 1461 address, size)); 1462 } 1463 1464 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1465 1466 desc = &packet->desc[packet->num_desc]; 1467 desc->address = address; 1468 desc->length = (uint32_t)size; 1469 1470 packet->num_desc++; 1471 1472 return (1); 1473 } 1474 1475 static int 1476 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet, 1477 uint64_t address, size_t size) 1478 { 1479 p_sw_desc_t desc; 1480 int desc_count; 1481 long size_left; 1482 size_t len; 1483 uint32_t counter; 1484 uint32_t array_elements; 1485 desc_array_t desc_array; 1486 1487 /* 1488 * Coexist Workaround for cordova: RP: 07/04/03 1489 * 1490 * RP: ERRATA: Workaround ISSUE: 1491 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup 1492 * Eachbuffer in to 8kb pieces until the 1493 * remainder is < 8kb 1494 */ 1495 size_left = size; 1496 desc_count = 0; 1497 1498 while (size_left > 0) { 1499 if (size_left > MAX_TX_BUF_SIZE) 1500 len = MAX_TX_BUF_SIZE; 1501 else 1502 len = size_left; 1503 1504 array_elements = e1000g_fill_82544_desc(address, 1505 len, &desc_array); 1506 1507 for (counter = 0; counter < array_elements; counter++) { 1508 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1509 /* 1510 * Put in the buffer address 1511 */ 1512 desc = &packet->desc[packet->num_desc]; 1513 1514 desc->address = 1515 desc_array.descriptor[counter].address; 1516 desc->length = 1517 desc_array.descriptor[counter].length; 1518 1519 packet->num_desc++; 1520 desc_count++; 1521 } /* for */ 1522 1523 /* 1524 * Update the buffer address and length 1525 */ 1526 address += MAX_TX_BUF_SIZE; 1527 size_left -= MAX_TX_BUF_SIZE; 1528 } /* while */ 1529 1530 return (desc_count); 1531 } 1532 1533 static int 1534 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet, 1535 uint64_t address, size_t size) 1536 { 1537 p_sw_desc_t desc; 1538 int desc_count; 1539 long size_left; 1540 uint32_t offset; 1541 1542 /* 1543 * Workaround for Jumbo Frames on Cordova 1544 * PSD 06/01/2001 1545 */ 1546 size_left = size; 1547 desc_count = 0; 1548 offset = 0; 1549 while (size_left > 0) { 1550 ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET); 1551 1552 desc = &packet->desc[packet->num_desc]; 1553 1554 desc->address = address + offset; 1555 1556 if (size_left > JUMBO_FRAG_LENGTH) 1557 desc->length = JUMBO_FRAG_LENGTH; 1558 else 1559 desc->length = (uint32_t)size_left; 1560 1561 packet->num_desc++; 1562 desc_count++; 1563 1564 offset += desc->length; 1565 size_left -= JUMBO_FRAG_LENGTH; 1566 } 1567 1568 return (desc_count); 1569 } 1570 1571 #pragma inline(e1000g_82547_tx_move_tail_work) 1572 1573 static void 1574 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring) 1575 { 1576 struct e1000_hw *hw; 1577 uint16_t hw_tdt; 1578 uint16_t sw_tdt; 1579 struct e1000_tx_desc *tx_desc; 1580 uint16_t length = 0; 1581 boolean_t eop = B_FALSE; 1582 struct e1000g *Adapter; 1583 1584 Adapter = tx_ring->adapter; 1585 hw = &Adapter->shared; 1586 1587 hw_tdt = E1000_READ_REG(hw, E1000_TDT(0)); 1588 sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first; 1589 1590 while (hw_tdt != sw_tdt) { 1591 tx_desc = &(tx_ring->tbd_first[hw_tdt]); 1592 length += tx_desc->lower.flags.length; 1593 eop = tx_desc->lower.data & E1000_TXD_CMD_EOP; 1594 if (++hw_tdt == Adapter->tx_desc_num) 1595 hw_tdt = 0; 1596 1597 if (eop) { 1598 if ((Adapter->link_duplex == HALF_DUPLEX) && 1599 (e1000_fifo_workaround_82547(hw, length) 1600 != E1000_SUCCESS)) { 1601 if (tx_ring->timer_enable_82547) { 1602 ASSERT(tx_ring->timer_id_82547 == 0); 1603 tx_ring->timer_id_82547 = 1604 timeout(e1000g_82547_timeout, 1605 (void *)tx_ring, 1606 drv_usectohz(10000)); 1607 } 1608 return; 1609 1610 } else { 1611 E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt); 1612 e1000_update_tx_fifo_head_82547(hw, length); 1613 length = 0; 1614 } 1615 } 1616 } 1617 } 1618 1619 static void 1620 e1000g_82547_timeout(void *arg) 1621 { 1622 e1000g_tx_ring_t *tx_ring; 1623 1624 tx_ring = (e1000g_tx_ring_t *)arg; 1625 1626 mutex_enter(&tx_ring->tx_lock); 1627 1628 tx_ring->timer_id_82547 = 0; 1629 e1000g_82547_tx_move_tail_work(tx_ring); 1630 1631 mutex_exit(&tx_ring->tx_lock); 1632 } 1633 1634 static void 1635 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring) 1636 { 1637 timeout_id_t tid; 1638 1639 ASSERT(MUTEX_HELD(&tx_ring->tx_lock)); 1640 1641 tid = tx_ring->timer_id_82547; 1642 tx_ring->timer_id_82547 = 0; 1643 if (tid != 0) { 1644 tx_ring->timer_enable_82547 = B_FALSE; 1645 mutex_exit(&tx_ring->tx_lock); 1646 1647 (void) untimeout(tid); 1648 1649 mutex_enter(&tx_ring->tx_lock); 1650 } 1651 tx_ring->timer_enable_82547 = B_TRUE; 1652 e1000g_82547_tx_move_tail_work(tx_ring); 1653 } 1654