xref: /titanic_50/usr/src/uts/common/io/e1000g/e1000g_tx.c (revision 6bb0858833ad931216a3c40682436131bec0007f)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2007 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms of the CDDLv1.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * **********************************************************************
30  *									*
31  * Module Name:								*
32  *   e1000g_tx.c							*
33  *									*
34  * Abstract:								*
35  *   This file contains some routines that takes care of Transmit	*
36  *   interrupt and also makes the hardware to send the data pointed	*
37  *   by the packet out on   to the physical medium.			*
38  *									*
39  *									*
40  * Environment:								*
41  *   Kernel Mode -							*
42  *									*
43  * Source History:							*
44  *   The code in this file is based somewhat on the "send" code		*
45  *   developed for the Intel Pro/100 family(Speedo1 and Speedo3) by	*
46  *   Steve Lindsay, and partly on some sample DDK code			*
47  *   of solaris.							*
48  *									*
49  *   March 12, 1997 Steve Lindsay					*
50  *   1st created - Ported from E100B send.c file			*
51  *									*
52  * **********************************************************************
53  */
54 
55 #include "e1000g_sw.h"
56 #include "e1000g_debug.h"
57 
58 static boolean_t e1000g_send(struct e1000g *, mblk_t *);
59 static int e1000g_tx_copy(struct e1000g *, PTX_SW_PACKET, mblk_t *, uint32_t);
60 static int e1000g_tx_bind(struct e1000g *, PTX_SW_PACKET, mblk_t *);
61 static boolean_t check_cksum_context(e1000g_tx_ring_t *, cksum_data_t *);
62 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *,
63     cksum_data_t *);
64 static void e1000g_fill_context_descriptor(cksum_data_t *,
65     struct e1000_context_desc *);
66 static int e1000g_fill_tx_desc(struct e1000g *,
67     PTX_SW_PACKET, uint64_t, size_t);
68 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length,
69     PDESC_ARRAY desc_array);
70 static int e1000g_tx_workaround_PCIX_82544(struct e1000g *,
71     PTX_SW_PACKET, uint64_t, size_t);
72 static int e1000g_tx_workaround_jumbo_82544(struct e1000g *,
73     PTX_SW_PACKET, uint64_t, size_t);
74 static uint32_t e1000g_tx_free_desc_num(e1000g_tx_ring_t *);
75 static void e1000g_82547_timeout(void *);
76 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *);
77 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *);
78 
79 #ifndef e1000g_DEBUG
80 #pragma inline(e1000g_tx_copy)
81 #pragma inline(e1000g_tx_bind)
82 #pragma inline(check_cksum_context)
83 #pragma inline(e1000g_fill_tx_ring)
84 #pragma inline(e1000g_fill_context_descriptor)
85 #pragma inline(e1000g_fill_tx_desc)
86 #pragma inline(e1000g_fill_82544_desc)
87 #pragma inline(e1000g_tx_workaround_PCIX_82544)
88 #pragma inline(e1000g_tx_workaround_jumbo_82544)
89 #pragma inline(FreeTxSwPacket)
90 #pragma inline(e1000g_tx_free_desc_num)
91 #endif
92 
93 /*
94  * **********************************************************************
95  * Name:      FreeTxSwPacket						*
96  *									*
97  * Description:								*
98  *	       Frees up the previusly allocated Dma handle for given	*
99  *	       transmit sw packet.					*
100  *									*
101  * Parameter Passed:							*
102  *									*
103  * Return Value:							*
104  *									*
105  * Functions called:							*
106  *									*
107  * **********************************************************************
108  */
109 void
110 FreeTxSwPacket(register PTX_SW_PACKET packet)
111 {
112 	switch (packet->data_transfer_type) {
113 	case USE_BCOPY:
114 		packet->tx_buf->len = 0;
115 		break;
116 #ifdef __sparc
117 	case USE_DVMA:
118 		dvma_unload(packet->tx_dma_handle, 0, -1);
119 		break;
120 #endif
121 	case USE_DMA:
122 		ddi_dma_unbind_handle(packet->tx_dma_handle);
123 		break;
124 	default:
125 		break;
126 	}
127 
128 	/*
129 	 * The mblk has been stripped off the sw packet
130 	 * and will be freed in a triggered soft intr.
131 	 */
132 	ASSERT(packet->mp == NULL);
133 
134 	packet->data_transfer_type = USE_NONE;
135 	packet->num_mblk_frag = 0;
136 	packet->num_desc = 0;
137 }
138 
139 uint_t
140 e1000g_tx_freemsg(caddr_t arg1, caddr_t arg2)
141 {
142 	struct e1000g *Adapter;
143 	mblk_t *mp;
144 
145 	Adapter = (struct e1000g *)arg1;
146 
147 	if ((Adapter == NULL) || (arg2 != NULL))
148 		return (DDI_INTR_UNCLAIMED);
149 
150 	if (!mutex_tryenter(&Adapter->tx_msg_chain->lock))
151 		return (DDI_INTR_CLAIMED);
152 
153 	mp = Adapter->tx_msg_chain->head;
154 	Adapter->tx_msg_chain->head = NULL;
155 	Adapter->tx_msg_chain->tail = NULL;
156 
157 	mutex_exit(&Adapter->tx_msg_chain->lock);
158 
159 	freemsgchain(mp);
160 
161 	return (DDI_INTR_CLAIMED);
162 }
163 
164 static uint32_t
165 e1000g_tx_free_desc_num(e1000g_tx_ring_t *tx_ring)
166 {
167 	struct e1000g *Adapter;
168 	int num;
169 
170 	Adapter = tx_ring->adapter;
171 
172 	num = tx_ring->tbd_oldest - tx_ring->tbd_next;
173 	if (num <= 0)
174 		num += Adapter->NumTxDescriptors;
175 
176 	return (num);
177 }
178 
179 mblk_t *
180 e1000g_m_tx(void *arg, mblk_t *mp)
181 {
182 	struct e1000g *Adapter = (struct e1000g *)arg;
183 	mblk_t *next;
184 
185 	rw_enter(&Adapter->chip_lock, RW_READER);
186 
187 	if (!Adapter->started || (Adapter->link_state != LINK_STATE_UP)) {
188 		freemsgchain(mp);
189 		mp = NULL;
190 	}
191 
192 	while (mp != NULL) {
193 		next = mp->b_next;
194 		mp->b_next = NULL;
195 
196 		if (!e1000g_send(Adapter, mp)) {
197 			mp->b_next = next;
198 			break;
199 		}
200 
201 		mp = next;
202 	}
203 
204 	rw_exit(&Adapter->chip_lock);
205 	return (mp);
206 }
207 
208 /*
209  * **********************************************************************
210  * Name:	e1000g_send						*
211  *									*
212  * Description:								*
213  *	Called from e1000g_m_tx with an mp ready to send. this		*
214  *	routine sets up the transmit descriptors and sends to		*
215  *	the wire. It also pushes the just transmitted packet to		*
216  *	the used tx sw packet list					*
217  *									*
218  * Arguments:								*
219  *	Pointer to the mblk to be sent, pointer to this adapter		*
220  *									*
221  * Returns:								*
222  *	B_TRUE, B_FALSE							*
223  *									*
224  * Modification log:							*
225  * Date      Who  Description						*
226  * --------  ---  -----------------------------------------------------	*
227  * **********************************************************************
228  */
229 static boolean_t
230 e1000g_send(struct e1000g *Adapter, mblk_t *mp)
231 {
232 	PTX_SW_PACKET packet;
233 	LIST_DESCRIBER pending_list;
234 	size_t len;
235 	size_t msg_size;
236 	uint32_t frag_count;
237 	int desc_count;
238 	uint32_t desc_total;
239 	uint32_t force_bcopy;
240 	mblk_t *nmp;
241 	mblk_t *tmp;
242 	e1000g_tx_ring_t *tx_ring;
243 	cksum_data_t cksum;
244 
245 	/* Get the total size and frags number of the message */
246 	force_bcopy = 0;
247 	frag_count = 0;
248 	msg_size = 0;
249 	for (nmp = mp; nmp; nmp = nmp->b_cont) {
250 		frag_count++;
251 		msg_size += MBLKL(nmp);
252 	}
253 
254 	/* Empty packet */
255 	if (msg_size == 0) {
256 		freemsg(mp);
257 		return (B_TRUE);
258 	}
259 
260 	/* Make sure packet is less than the max frame size */
261 	if (msg_size > Adapter->Shared.max_frame_size + VLAN_TAGSZ) {
262 		/*
263 		 * For the over size packet, we'll just drop it.
264 		 * So we return B_TRUE here.
265 		 */
266 		e1000g_DEBUGLOG_1(Adapter, e1000g_INFO_LEVEL,
267 		    "Tx packet out of bound. length = %d \n", msg_size);
268 		freemsg(mp);
269 		Adapter->tx_over_size++;
270 		return (B_TRUE);
271 	}
272 
273 	tx_ring = Adapter->tx_ring;
274 
275 	/*
276 	 * Check and reclaim tx descriptors.
277 	 * This low water mark check should be done all the time as
278 	 * Transmit interrupt delay can produce Transmit interrupts little
279 	 * late and that may cause few problems related to reaping Tx
280 	 * Descriptors... As you may run short of them before getting any
281 	 * transmit interrupt...
282 	 */
283 	if ((Adapter->NumTxDescriptors - e1000g_tx_free_desc_num(tx_ring)) >
284 	    Adapter->tx_recycle_low_water) {
285 		if (Adapter->Shared.mac_type == e1000_82547) {
286 			mutex_enter(&tx_ring->tx_lock);
287 			e1000g_82547_tx_move_tail(tx_ring);
288 			mutex_exit(&tx_ring->tx_lock);
289 		}
290 		Adapter->tx_recycle++;
291 		(void) e1000g_recycle(tx_ring);
292 	}
293 
294 	if (e1000g_tx_free_desc_num(tx_ring) < MAX_TX_DESC_PER_PACKET) {
295 		Adapter->tx_lack_desc++;
296 		goto tx_no_resource;
297 	}
298 
299 	/*
300 	 * If there are many frags of the message, then bcopy them
301 	 * into one tx descriptor buffer will get better performance.
302 	 */
303 	if ((frag_count >= Adapter->tx_frags_limit) &&
304 	    (msg_size <= Adapter->TxBufferSize)) {
305 		Adapter->tx_exceed_frags++;
306 		force_bcopy |= FORCE_BCOPY_EXCEED_FRAGS;
307 	}
308 
309 	/*
310 	 * If the message size is less than the minimum ethernet packet size,
311 	 * we'll use bcopy to send it, and padd it to 60 bytes later.
312 	 */
313 	if (msg_size < MINIMUM_ETHERNET_PACKET_SIZE) {
314 		Adapter->tx_under_size++;
315 		force_bcopy |= FORCE_BCOPY_UNDER_SIZE;
316 	}
317 
318 	/* Initialize variables */
319 	desc_count = 1;	/* The initial value should be greater than 0 */
320 	desc_total = 0;
321 	QUEUE_INIT_LIST(&pending_list);
322 
323 	/* Retrieve checksum info */
324 	hcksum_retrieve(mp, NULL, NULL, &cksum.cksum_start, &cksum.cksum_stuff,
325 	    NULL, NULL, &cksum.cksum_flags);
326 
327 	if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid ==
328 	    htons(ETHERTYPE_VLAN))
329 		cksum.ether_header_size = sizeof (struct ether_vlan_header);
330 	else
331 		cksum.ether_header_size = sizeof (struct ether_header);
332 
333 	/* Process each mblk fragment and fill tx descriptors */
334 	packet = NULL;
335 	nmp = mp;
336 	while (nmp) {
337 		tmp = nmp->b_cont;
338 
339 		len = MBLKL(nmp);
340 		/* Check zero length mblks */
341 		if (len == 0) {
342 			Adapter->tx_empty_frags++;
343 			/*
344 			 * If there're no packet buffers have been used,
345 			 * or we just completed processing a buffer, then
346 			 * skip the empty mblk fragment.
347 			 * Otherwise, there's still a pending buffer that
348 			 * needs to be processed (tx_copy).
349 			 */
350 			if (desc_count > 0) {
351 				nmp = tmp;
352 				continue;
353 			}
354 		}
355 
356 		/*
357 		 * Get a new TxSwPacket to process mblk buffers.
358 		 */
359 		if (desc_count > 0) {
360 
361 			mutex_enter(&tx_ring->freelist_lock);
362 			packet = (PTX_SW_PACKET)
363 			    QUEUE_POP_HEAD(&tx_ring->free_list);
364 			mutex_exit(&tx_ring->freelist_lock);
365 
366 			if (packet == NULL) {
367 				e1000g_DEBUGLOG_0(Adapter, e1000g_INFO_LEVEL,
368 				    "No Tx SwPacket available\n");
369 				Adapter->tx_no_swpkt++;
370 				goto tx_send_failed;
371 			}
372 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
373 		}
374 
375 		ASSERT(packet);
376 		/*
377 		 * If the size of the fragment is less than the tx_bcopy_thresh
378 		 * we'll use bcopy; Otherwise, we'll use DMA binding.
379 		 */
380 		if ((len <= Adapter->tx_bcopy_thresh) || force_bcopy) {
381 			desc_count =
382 			    e1000g_tx_copy(Adapter, packet, nmp, force_bcopy);
383 			Adapter->tx_copy++;
384 		} else {
385 			desc_count =
386 			    e1000g_tx_bind(Adapter, packet, nmp);
387 			Adapter->tx_bind++;
388 		}
389 
390 		if (desc_count < 0)
391 			goto tx_send_failed;
392 
393 		if (desc_count > 0)
394 			desc_total += desc_count;
395 
396 		nmp = tmp;
397 	}
398 
399 	/* Assign the message to the last sw packet */
400 	ASSERT(packet);
401 	ASSERT(packet->mp == NULL);
402 	packet->mp = mp;
403 
404 	/* Try to recycle the tx descriptors again */
405 	if (e1000g_tx_free_desc_num(tx_ring) < MAX_TX_DESC_PER_PACKET) {
406 		Adapter->tx_recycle_retry++;
407 		(void) e1000g_recycle(tx_ring);
408 	}
409 
410 	mutex_enter(&tx_ring->tx_lock);
411 
412 	/*
413 	 * If the number of available tx descriptors is not enough for transmit
414 	 * (one redundant descriptor and one hw checksum context descriptor are
415 	 * included), then return failure.
416 	 */
417 	if (e1000g_tx_free_desc_num(tx_ring) < (desc_total + 2)) {
418 		e1000g_DEBUGLOG_0(Adapter, e1000g_INFO_LEVEL,
419 		    "No Enough Tx descriptors\n");
420 		Adapter->tx_no_desc++;
421 		mutex_exit(&tx_ring->tx_lock);
422 		goto tx_send_failed;
423 	}
424 
425 	desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cksum);
426 
427 	mutex_exit(&tx_ring->tx_lock);
428 
429 	ASSERT(desc_count > 0);
430 
431 	/* Update statistic counters */
432 	if (Adapter->ProfileJumboTraffic) {
433 		if ((msg_size > ETHERMAX) &&
434 		    (msg_size <= FRAME_SIZE_UPTO_4K))
435 			Adapter->JumboTx_4K++;
436 
437 		if ((msg_size > FRAME_SIZE_UPTO_4K) &&
438 		    (msg_size <= FRAME_SIZE_UPTO_8K))
439 			Adapter->JumboTx_8K++;
440 
441 		if ((msg_size > FRAME_SIZE_UPTO_8K) &&
442 		    (msg_size <= FRAME_SIZE_UPTO_16K))
443 			Adapter->JumboTx_16K++;
444 	}
445 
446 	/* Send successful */
447 	return (B_TRUE);
448 
449 tx_send_failed:
450 	/* Free pending TxSwPackets */
451 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(&pending_list);
452 	while (packet) {
453 		packet->mp = NULL;
454 		FreeTxSwPacket(packet);
455 		packet = (PTX_SW_PACKET)
456 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
457 	}
458 
459 	/* Return pending TxSwPackets to the "Free" list */
460 	mutex_enter(&tx_ring->freelist_lock);
461 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
462 	mutex_exit(&tx_ring->freelist_lock);
463 
464 	Adapter->tx_send_fail++;
465 
466 	freemsg(mp);
467 
468 	/* Send failed, message dropped */
469 	return (B_TRUE);
470 
471 tx_no_resource:
472 	/*
473 	 * Enable Transmit interrupts, so that the interrupt routine can
474 	 * call mac_tx_update() when transmit descriptors become available.
475 	 */
476 	Adapter->resched_needed = B_TRUE;
477 	if (!Adapter->tx_intr_enable)
478 		e1000g_EnableTxInterrupt(Adapter);
479 
480 	/* Message will be scheduled for re-transmit */
481 	return (B_FALSE);
482 }
483 
484 static boolean_t
485 check_cksum_context(e1000g_tx_ring_t *tx_ring, cksum_data_t *cksum)
486 {
487 	boolean_t cksum_load;
488 	cksum_data_t *last;
489 
490 	cksum_load = B_FALSE;
491 	last = &tx_ring->cksum_data;
492 
493 	if (cksum->cksum_flags != 0) {
494 		if ((cksum->ether_header_size != last->ether_header_size) ||
495 		    (cksum->cksum_flags != last->cksum_flags) ||
496 		    (cksum->cksum_stuff != last->cksum_stuff) ||
497 		    (cksum->cksum_start != last->cksum_start)) {
498 
499 			cksum_load = B_TRUE;
500 		}
501 	}
502 
503 	return (cksum_load);
504 }
505 
506 static int
507 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list,
508     cksum_data_t *cksum)
509 {
510 	struct e1000g *Adapter;
511 	PTX_SW_PACKET first_packet;
512 	PTX_SW_PACKET packet;
513 	boolean_t cksum_load;
514 	struct e1000_tx_desc *first_data_desc;
515 	struct e1000_tx_desc *next_desc;
516 	struct e1000_tx_desc *descriptor;
517 	uint32_t sync_offset;
518 	int sync_len;
519 	int desc_count;
520 	int i;
521 
522 	Adapter = tx_ring->adapter;
523 
524 	desc_count = 0;
525 	first_packet = NULL;
526 	first_data_desc = NULL;
527 	descriptor = NULL;
528 
529 	next_desc = tx_ring->tbd_next;
530 
531 	/* IP Head/TCP/UDP checksum offload */
532 	cksum_load = check_cksum_context(tx_ring, cksum);
533 
534 	if (cksum_load) {
535 		first_packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(pending_list);
536 
537 		descriptor = next_desc;
538 
539 		e1000g_fill_context_descriptor(cksum,
540 		    (struct e1000_context_desc *)descriptor);
541 
542 		/* Check the wrap-around case */
543 		if (descriptor == tx_ring->tbd_last)
544 			next_desc = tx_ring->tbd_first;
545 		else
546 			next_desc++;
547 
548 		desc_count++;
549 	}
550 
551 	first_data_desc = next_desc;
552 
553 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(pending_list);
554 	while (packet) {
555 		ASSERT(packet->num_desc);
556 
557 		for (i = 0; i < packet->num_desc; i++) {
558 			ASSERT(e1000g_tx_free_desc_num(tx_ring) > 0);
559 
560 			descriptor = next_desc;
561 #ifdef __sparc
562 			descriptor->buffer_addr =
563 			    DWORD_SWAP(packet->desc[i].Address);
564 #else
565 			descriptor->buffer_addr =
566 			    packet->desc[i].Address;
567 #endif
568 			descriptor->lower.data =
569 			    packet->desc[i].Length;
570 
571 			/* Zero out status */
572 			descriptor->upper.data = 0;
573 
574 			descriptor->lower.data |=
575 			    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
576 			/* must set RS on every outgoing descriptor */
577 			descriptor->lower.data |=
578 			    E1000_TXD_CMD_RS;
579 
580 			/* Check the wrap-around case */
581 			if (descriptor == tx_ring->tbd_last)
582 				next_desc = tx_ring->tbd_first;
583 			else
584 				next_desc++;
585 
586 			desc_count++;
587 		}
588 
589 		if (first_packet != NULL) {
590 			/*
591 			 * Count the checksum context descriptor for
592 			 * the first SwPacket.
593 			 */
594 			first_packet->num_desc++;
595 			first_packet = NULL;
596 		}
597 
598 		packet = (PTX_SW_PACKET)
599 		    QUEUE_GET_NEXT(pending_list, &packet->Link);
600 	}
601 
602 	ASSERT(descriptor);
603 
604 	if (cksum->cksum_flags) {
605 		if (cksum->cksum_flags & HCK_IPV4_HDRCKSUM)
606 			((struct e1000_data_desc *)first_data_desc)->
607 				upper.fields.popts |= E1000_TXD_POPTS_IXSM;
608 		if (cksum->cksum_flags & HCK_PARTIALCKSUM)
609 			((struct e1000_data_desc *)first_data_desc)->
610 				upper.fields.popts |= E1000_TXD_POPTS_TXSM;
611 	}
612 
613 	/*
614 	 * Last Descriptor of Packet needs End Of Packet (EOP), Report
615 	 * Status (RS) and append Ethernet CRC (IFCS) bits set.
616 	 */
617 	if (Adapter->TxInterruptDelay) {
618 		descriptor->lower.data |= E1000_TXD_CMD_IDE |
619 		    E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
620 	} else {
621 		descriptor->lower.data |=
622 		    E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
623 	}
624 
625 	/*
626 	 * Sync the Tx descriptors DMA buffer
627 	 */
628 	sync_offset = tx_ring->tbd_next - tx_ring->tbd_first;
629 	sync_len = descriptor - tx_ring->tbd_next + 1;
630 	/* Check the wrap-around case */
631 	if (sync_len > 0) {
632 		(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
633 		    sync_offset * sizeof (struct e1000_tx_desc),
634 		    sync_len * sizeof (struct e1000_tx_desc),
635 		    DDI_DMA_SYNC_FORDEV);
636 	} else {
637 		(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
638 		    sync_offset * sizeof (struct e1000_tx_desc),
639 		    0,
640 		    DDI_DMA_SYNC_FORDEV);
641 		sync_len = descriptor - tx_ring->tbd_first + 1;
642 		(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
643 		    0,
644 		    sync_len * sizeof (struct e1000_tx_desc),
645 		    DDI_DMA_SYNC_FORDEV);
646 	}
647 
648 	tx_ring->tbd_next = next_desc;
649 
650 	/*
651 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
652 	 * FX1000 that this frame is available to transmit.
653 	 */
654 	if (Adapter->Shared.mac_type == e1000_82547)
655 		e1000g_82547_tx_move_tail(tx_ring);
656 	else
657 		E1000_WRITE_REG(&Adapter->Shared, TDT,
658 		    (uint32_t)(next_desc - tx_ring->tbd_first));
659 
660 	/* Put the pending SwPackets to the "Used" list */
661 	mutex_enter(&tx_ring->usedlist_lock);
662 	QUEUE_APPEND(&tx_ring->used_list, pending_list);
663 	mutex_exit(&tx_ring->usedlist_lock);
664 
665 	/* Store the cksum data */
666 	if (cksum_load)
667 		tx_ring->cksum_data = *cksum;
668 
669 	return (desc_count);
670 }
671 
672 
673 /*
674  * **********************************************************************
675  * Name:	SetupTransmitStructures					*
676  *									*
677  * Description: This routine initializes all of the transmit related	*
678  *	structures.  This includes the Transmit descriptors, the	*
679  *	coalesce buffers, and the TX_SW_PACKETs structures.		*
680  *									*
681  *	NOTE -- The device must have been reset before this		*
682  *		routine is called.					*
683  *									*
684  * Author:	Hari Seshadri						*
685  * Functions Called : get_32bit_value					*
686  *									*
687  *									*
688  *									*
689  * Arguments:								*
690  *	Adapter - A pointer to our context sensitive "Adapter"		*
691  *	structure.							*
692  *									*
693  * Returns:								*
694  *      (none)								*
695  *									*
696  * Modification log:							*
697  * Date      Who  Description						*
698  * --------  ---  -----------------------------------------------------	*
699  *									*
700  * **********************************************************************
701  */
702 void
703 SetupTransmitStructures(struct e1000g *Adapter)
704 {
705 	struct e1000_hw *hw;
706 	PTX_SW_PACKET packet;
707 	UINT i;
708 	uint32_t buf_high;
709 	uint32_t buf_low;
710 	uint32_t reg_tipg;
711 	uint32_t reg_tctl;
712 	uint32_t reg_tarc;
713 	uint16_t speed, duplex;
714 	int size;
715 	e1000g_tx_ring_t *tx_ring;
716 
717 	hw = &Adapter->Shared;
718 	tx_ring = Adapter->tx_ring;
719 
720 	/* init the lists */
721 	/*
722 	 * Here we don't need to protect the lists using the
723 	 * tx_usedlist_lock and tx_freelist_lock, for they have
724 	 * been protected by the chip_lock.
725 	 */
726 	QUEUE_INIT_LIST(&tx_ring->used_list);
727 	QUEUE_INIT_LIST(&tx_ring->free_list);
728 
729 	/* Go through and set up each SW_Packet */
730 	packet = tx_ring->packet_area;
731 	for (i = 0; i < Adapter->NumTxSwPacket; i++, packet++) {
732 		/* Initialize this TX_SW_PACKET area */
733 		FreeTxSwPacket(packet);
734 		/* Add this TX_SW_PACKET to the free list */
735 		QUEUE_PUSH_TAIL(&tx_ring->free_list,
736 		    &packet->Link);
737 	}
738 
739 	/* Setup TX descriptor pointers */
740 	tx_ring->tbd_next = tx_ring->tbd_first;
741 	tx_ring->tbd_oldest = tx_ring->tbd_first;
742 
743 	/*
744 	 * Setup Hardware TX Registers
745 	 */
746 	/* Setup the Transmit Control Register (TCTL). */
747 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
748 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) |
749 	    (E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT);
750 
751 	/* Enable the MULR bit */
752 	if (hw->bus_type == e1000_bus_type_pci_express)
753 		reg_tctl |= E1000_TCTL_MULR;
754 
755 	E1000_WRITE_REG(hw, TCTL, reg_tctl);
756 
757 	if ((hw->mac_type == e1000_82571) || (hw->mac_type == e1000_82572)) {
758 		e1000_get_speed_and_duplex(hw, &speed, &duplex);
759 
760 		reg_tarc = E1000_READ_REG(hw, TARC0);
761 		reg_tarc |= (1 << 25);
762 		if (speed == SPEED_1000)
763 			reg_tarc |= (1 << 21);
764 		E1000_WRITE_REG(hw, TARC0, reg_tarc);
765 
766 		reg_tarc = E1000_READ_REG(hw, TARC1);
767 		reg_tarc |= (1 << 25);
768 		if (reg_tctl & E1000_TCTL_MULR)
769 			reg_tarc &= ~(1 << 28);
770 		else
771 			reg_tarc |= (1 << 28);
772 		E1000_WRITE_REG(hw, TARC1, reg_tarc);
773 
774 	} else if (hw->mac_type == e1000_80003es2lan) {
775 		reg_tarc = E1000_READ_REG(hw, TARC0);
776 		reg_tarc |= 1;
777 		if (hw->media_type == e1000_media_type_internal_serdes)
778 			reg_tarc |= (1 << 20);
779 		E1000_WRITE_REG(hw, TARC0, reg_tarc);
780 
781 		reg_tarc = E1000_READ_REG(hw, TARC1);
782 		reg_tarc |= 1;
783 		E1000_WRITE_REG(hw, TARC1, reg_tarc);
784 	}
785 
786 	/* Setup HW Base and Length of Tx descriptor area */
787 	size = (Adapter->NumTxDescriptors * sizeof (struct e1000_tx_desc));
788 	E1000_WRITE_REG(hw, TDLEN, size);
789 	size = E1000_READ_REG(hw, TDLEN);
790 
791 	buf_low = (uint32_t)tx_ring->tbd_dma_addr;
792 	buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32);
793 
794 	E1000_WRITE_REG(hw, TDBAL, buf_low);
795 	E1000_WRITE_REG(hw, TDBAH, buf_high);
796 
797 	/* Setup our HW Tx Head & Tail descriptor pointers */
798 	E1000_WRITE_REG(hw, TDH, 0);
799 	E1000_WRITE_REG(hw, TDT, 0);
800 
801 	/* Set the default values for the Tx Inter Packet Gap timer */
802 	switch (hw->mac_type) {
803 	case e1000_82542_rev2_0:
804 	case e1000_82542_rev2_1:
805 		reg_tipg = DEFAULT_82542_TIPG_IPGT;
806 		reg_tipg |=
807 		    DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
808 		reg_tipg |=
809 		    DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
810 		break;
811 	default:
812 		if (hw->media_type == e1000_media_type_fiber)
813 			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
814 		else
815 			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
816 		reg_tipg |=
817 		    DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
818 		reg_tipg |=
819 		    DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
820 		break;
821 	}
822 	E1000_WRITE_REG(hw, TIPG, reg_tipg);
823 
824 	/* Setup Transmit Interrupt Delay Value */
825 	if (Adapter->TxInterruptDelay) {
826 		E1000_WRITE_REG(hw, TIDV, Adapter->TxInterruptDelay);
827 	}
828 
829 	/* For TCP/UDP checksum offload */
830 	tx_ring->cksum_data.cksum_stuff = 0;
831 	tx_ring->cksum_data.cksum_start = 0;
832 	tx_ring->cksum_data.cksum_flags = 0;
833 	tx_ring->cksum_data.ether_header_size = 0;
834 
835 	/* Initialize tx parameters */
836 	Adapter->tx_bcopy_thresh = DEFAULTTXBCOPYTHRESHOLD;
837 	Adapter->tx_recycle_low_water = DEFAULTTXRECYCLELOWWATER;
838 	Adapter->tx_recycle_num = DEFAULTTXRECYCLENUM;
839 	Adapter->tx_intr_enable = B_TRUE;
840 	Adapter->tx_frags_limit =
841 	    (Adapter->Shared.max_frame_size / Adapter->tx_bcopy_thresh) + 2;
842 	if (Adapter->tx_frags_limit > (MAX_TX_DESC_PER_PACKET >> 1))
843 		Adapter->tx_frags_limit = (MAX_TX_DESC_PER_PACKET >> 1);
844 }
845 
846 /*
847  * **********************************************************************
848  * Name:	e1000g_recycle						*
849  *									*
850  * Description: This routine cleans transmit packets.			*
851  *									*
852  *									*
853  *									*
854  * Arguments:								*
855  *      Adapter - A pointer to our context sensitive "Adapter"		*
856  *      structure.							*
857  *									*
858  * Returns:								*
859  *      (none)								*
860  * Functions Called:							*
861  *	  None								*
862  *									*
863  * Modification log:							*
864  * Date      Who  Description						*
865  * --------  ---  -----------------------------------------------------	*
866  *									*
867  * **********************************************************************
868  */
869 int
870 e1000g_recycle(e1000g_tx_ring_t *tx_ring)
871 {
872 	struct e1000g *Adapter;
873 	LIST_DESCRIBER pending_list;
874 	PTX_SW_PACKET packet;
875 	e1000g_msg_chain_t *msg_chain;
876 	mblk_t *mp;
877 	mblk_t *nmp;
878 	struct e1000_tx_desc *descriptor;
879 	int desc_count;
880 
881 	/*
882 	 * This function will examine each TxSwPacket in the 'used' queue
883 	 * if the e1000g is done with it then the associated resources (Tx
884 	 * Descriptors) will be "freed" and the TxSwPacket will be
885 	 * returned to the 'free' queue.
886 	 */
887 	Adapter = tx_ring->adapter;
888 
889 	desc_count = 0;
890 	QUEUE_INIT_LIST(&pending_list);
891 
892 	mutex_enter(&tx_ring->usedlist_lock);
893 
894 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(&tx_ring->used_list);
895 	if (packet == NULL) {
896 		mutex_exit(&tx_ring->usedlist_lock);
897 		Adapter->tx_recycle_fail = 0;
898 		Adapter->StallWatchdog = 0;
899 		return (0);
900 	}
901 
902 	/*
903 	 * While there are still TxSwPackets in the used queue check them
904 	 */
905 	while (packet =
906 	    (PTX_SW_PACKET) QUEUE_GET_HEAD(&tx_ring->used_list)) {
907 
908 		/*
909 		 * Get hold of the next descriptor that the e1000g will
910 		 * report status back to (this will be the last descriptor
911 		 * of a given TxSwPacket). We only want to free the
912 		 * TxSwPacket (and it resources) if the e1000g is done
913 		 * with ALL of the descriptors.  If the e1000g is done
914 		 * with the last one then it is done with all of them.
915 		 */
916 		ASSERT(packet->num_desc);
917 		descriptor = tx_ring->tbd_oldest +
918 		    (packet->num_desc - 1);
919 
920 		/* Check for wrap case */
921 		if (descriptor > tx_ring->tbd_last)
922 			descriptor -= Adapter->NumTxDescriptors;
923 
924 		/* Sync the Tx descriptor DMA buffer */
925 		(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
926 		    (descriptor - tx_ring->tbd_first) *
927 		    sizeof (struct e1000_tx_desc),
928 		    sizeof (struct e1000_tx_desc),
929 		    DDI_DMA_SYNC_FORCPU);
930 
931 		/*
932 		 * If the descriptor done bit is set free TxSwPacket and
933 		 * associated resources
934 		 */
935 		if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) {
936 			QUEUE_POP_HEAD(&tx_ring->used_list);
937 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
938 
939 			if (descriptor == tx_ring->tbd_last)
940 				tx_ring->tbd_oldest =
941 				    tx_ring->tbd_first;
942 			else
943 				tx_ring->tbd_oldest =
944 				    descriptor + 1;
945 
946 			desc_count += packet->num_desc;
947 
948 			if (desc_count >= Adapter->tx_recycle_num)
949 				break;
950 		} else {
951 			/*
952 			 * Found a TxSwPacket that the e1000g is not done
953 			 * with then there is no reason to check the rest
954 			 * of the queue.
955 			 */
956 			break;
957 		}
958 	}
959 
960 	mutex_exit(&tx_ring->usedlist_lock);
961 
962 	if (desc_count == 0) {
963 		Adapter->tx_recycle_fail++;
964 		Adapter->tx_recycle_none++;
965 		return (0);
966 	}
967 
968 	Adapter->tx_recycle_fail = 0;
969 	Adapter->StallWatchdog = 0;
970 
971 	mp = NULL;
972 	nmp = NULL;
973 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(&pending_list);
974 	ASSERT(packet != NULL);
975 	while (packet != NULL) {
976 		if (packet->mp != NULL) {
977 			ASSERT(packet->mp->b_next == NULL);
978 			/* Assemble the message chain */
979 			if (mp == NULL) {
980 				mp = packet->mp;
981 				nmp = packet->mp;
982 			} else {
983 				nmp->b_next = packet->mp;
984 				nmp = packet->mp;
985 			}
986 			/* Disconnect the message from the sw packet */
987 			packet->mp = NULL;
988 		}
989 
990 		/* Free the TxSwPackets */
991 		FreeTxSwPacket(packet);
992 
993 		packet = (PTX_SW_PACKET)
994 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
995 	}
996 
997 	/* Save the message chain */
998 	if (mp != NULL) {
999 		msg_chain = Adapter->tx_msg_chain;
1000 		mutex_enter(&msg_chain->lock);
1001 		if (msg_chain->head == NULL) {
1002 			msg_chain->head = mp;
1003 			msg_chain->tail = nmp;
1004 		} else {
1005 			msg_chain->tail->b_next = mp;
1006 			msg_chain->tail = nmp;
1007 		}
1008 		mutex_exit(&msg_chain->lock);
1009 
1010 		/*
1011 		 * If the tx interrupt is enabled, the messages will be freed
1012 		 * in the tx interrupt; Otherwise, they are freed here by
1013 		 * triggering a soft interrupt.
1014 		 */
1015 		if (!Adapter->tx_intr_enable)
1016 			ddi_intr_trigger_softint(Adapter->tx_softint_handle,
1017 			    NULL);
1018 	}
1019 
1020 	/* Return the TxSwPackets back to the FreeList */
1021 	mutex_enter(&tx_ring->freelist_lock);
1022 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
1023 	mutex_exit(&tx_ring->freelist_lock);
1024 
1025 	return (desc_count);
1026 }
1027 
1028 /*
1029  * 82544 Coexistence issue workaround:
1030  *    There are 2 issues.
1031  *    1. If a 32 bit split completion happens from P64H2 and another
1032  *	agent drives a 64 bit request/split completion after ONLY
1033  *	1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then
1034  *	82544 has a problem where in to clock all the data in, it
1035  *	looks at REQ64# signal and since it has changed so fast (i.e. 1
1036  *	idle clock turn around), it will fail to clock all the data in.
1037  *	Data coming from certain ending addresses has exposure to this issue.
1038  *
1039  * To detect this issue, following equation can be used...
1040  *	SIZE[3:0] + ADDR[2:0] = SUM[3:0].
1041  *	If SUM[3:0] is in between 1 to 4, we will have this issue.
1042  *
1043  * ROOT CAUSE:
1044  *	The erratum involves the 82544 PCIX elasticity FIFO implementations as
1045  *	64-bit FIFO's and flushing of the final partial-bytes corresponding
1046  *	to the end of a requested read burst. Under a specific burst condition
1047  *	of ending-data alignment and 32-byte split-completions, the final
1048  *	byte(s) of split-completion data require an extra clock cycle to flush
1049  *	into 64-bit FIFO orientation.  An incorrect logic dependency on the
1050  *	REQ64# signal occurring during during this clock cycle may cause the
1051  *	residual byte(s) to be lost, thereby rendering the internal DMA client
1052  *	forever awaiting the final byte(s) for an outbound data-fetch.  The
1053  *	erratum is confirmed to *only* occur if certain subsequent external
1054  *	64-bit PCIX bus transactions occur immediately (minimum possible bus
1055  *	turn- around) following the odd-aligned 32-bit split-completion
1056  *	containing the final byte(s).  Intel has confirmed that this has been
1057  *	seen only with chipset/bridges which have the capability to provide
1058  *	32-bit split-completion data, and in the presence of newer PCIX bus
1059  *	agents which fully-optimize the inter-transaction turn-around (zero
1060  *	additional initiator latency when pre-granted bus ownership).
1061  *
1062  *   	This issue does not exist in PCI bus mode, when any agent is operating
1063  *	in 32 bit only mode or on chipsets that do not do 32 bit split
1064  *	completions for 64 bit read requests (Serverworks chipsets). P64H2 does
1065  *	32 bit split completions for any read request that has bit 2 set to 1
1066  *	for the requested address and read request size is more than 8 bytes.
1067  *
1068  *   2. Another issue is related to 82544 driving DACs under the similar
1069  *	scenario (32 bit split completion followed by 64 bit transaction with
1070  *	only 1 cycle turnaround). This issue is still being root caused. We
1071  *	think that both of these issues can be avoided if following workaround
1072  *	is implemented. It seems DAC issues is related to ending addresses being
1073  *	0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity
1074  *	FIFO which does not get flushed due to REQ64# dependency. We will only
1075  *	know the full story after it has been simulated successfully by HW team.
1076  *
1077  * WORKAROUND:
1078  *	Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC)
1079  */
1080 static uint32_t
1081 e1000g_fill_82544_desc(uint64_t Address,
1082     size_t Length, PDESC_ARRAY desc_array)
1083 {
1084 	/*
1085 	 * Since issue is sensitive to length and address.
1086 	 * Let us first check the address...
1087 	 */
1088 	uint32_t safe_terminator;
1089 
1090 	if (Length <= 4) {
1091 		desc_array->Descriptor[0].Address = Address;
1092 		desc_array->Descriptor[0].Length = Length;
1093 		desc_array->Elements = 1;
1094 		return (desc_array->Elements);
1095 	}
1096 	safe_terminator =
1097 	    (uint32_t)((((uint32_t)Address & 0x7) +
1098 		(Length & 0xF)) & 0xF);
1099 	/*
1100 	 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then
1101 	 * return
1102 	 */
1103 	if (safe_terminator == 0 ||
1104 	    (safe_terminator > 4 &&
1105 		safe_terminator < 9) ||
1106 	    (safe_terminator > 0xC && safe_terminator <= 0xF)) {
1107 		desc_array->Descriptor[0].Address = Address;
1108 		desc_array->Descriptor[0].Length = Length;
1109 		desc_array->Elements = 1;
1110 		return (desc_array->Elements);
1111 	}
1112 
1113 	desc_array->Descriptor[0].Address = Address;
1114 	desc_array->Descriptor[0].Length = Length - 4;
1115 	desc_array->Descriptor[1].Address = Address + (Length - 4);
1116 	desc_array->Descriptor[1].Length = 4;
1117 	desc_array->Elements = 2;
1118 	return (desc_array->Elements);
1119 }
1120 
1121 static int
1122 e1000g_tx_copy(struct e1000g *Adapter, PTX_SW_PACKET packet,
1123     mblk_t *mp, uint32_t force_bcopy)
1124 {
1125 	size_t len;
1126 	size_t len1;
1127 	dma_buffer_t *tx_buf;
1128 	mblk_t *nmp;
1129 	boolean_t finished;
1130 	int desc_count;
1131 
1132 	desc_count = 0;
1133 	tx_buf = packet->tx_buf;
1134 	len = MBLKL(mp);
1135 
1136 	ASSERT((tx_buf->len + len) <= tx_buf->size);
1137 
1138 	if (len > 0) {
1139 		bcopy(mp->b_rptr,
1140 		    tx_buf->address + tx_buf->len,
1141 		    len);
1142 		tx_buf->len += len;
1143 
1144 		packet->num_mblk_frag++;
1145 	}
1146 
1147 	nmp = mp->b_cont;
1148 	if (nmp == NULL) {
1149 		finished = B_TRUE;
1150 	} else {
1151 		len1 = MBLKL(nmp);
1152 		if ((tx_buf->len + len1) > tx_buf->size)
1153 			finished = B_TRUE;
1154 		else if (force_bcopy)
1155 			finished = B_FALSE;
1156 		else if (len1 > Adapter->tx_bcopy_thresh)
1157 			finished = B_TRUE;
1158 		else
1159 			finished = B_FALSE;
1160 	}
1161 
1162 	if (finished) {
1163 		if (tx_buf->len > len)
1164 			Adapter->tx_multi_copy++;
1165 
1166 		/*
1167 		 * If the packet is smaller than 64 bytes, which is the
1168 		 * minimum ethernet packet size, pad the packet to make
1169 		 * it at least 60 bytes. The hardware will add 4 bytes
1170 		 * for CRC.
1171 		 */
1172 		if (force_bcopy & FORCE_BCOPY_UNDER_SIZE) {
1173 			ASSERT(tx_buf->len < MINIMUM_ETHERNET_PACKET_SIZE);
1174 
1175 			bzero(tx_buf->address + tx_buf->len,
1176 			    MINIMUM_ETHERNET_PACKET_SIZE - tx_buf->len);
1177 			tx_buf->len = MINIMUM_ETHERNET_PACKET_SIZE;
1178 		}
1179 
1180 		switch (packet->dma_type) {
1181 #ifdef __sparc
1182 		case USE_DVMA:
1183 			dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV);
1184 			break;
1185 #endif
1186 		case USE_DMA:
1187 			(void) ddi_dma_sync(tx_buf->dma_handle, 0,
1188 			    tx_buf->len, DDI_DMA_SYNC_FORDEV);
1189 			break;
1190 		default:
1191 			ASSERT(B_FALSE);
1192 			break;
1193 		}
1194 
1195 		packet->data_transfer_type = USE_BCOPY;
1196 
1197 		desc_count = e1000g_fill_tx_desc(Adapter,
1198 		    packet,
1199 		    tx_buf->dma_address,
1200 		    tx_buf->len);
1201 
1202 		if (desc_count <= 0)
1203 			return (-1);
1204 	}
1205 
1206 	return (desc_count);
1207 }
1208 
1209 static int
1210 e1000g_tx_bind(struct e1000g *Adapter, PTX_SW_PACKET packet, mblk_t *mp)
1211 {
1212 	int j;
1213 	int mystat;
1214 	size_t len;
1215 	ddi_dma_cookie_t dma_cookie;
1216 	uint_t ncookies;
1217 	int desc_count;
1218 	uint32_t desc_total;
1219 
1220 	desc_total = 0;
1221 	len = MBLKL(mp);
1222 
1223 	/*
1224 	 * ddi_dma_addr_bind_handle() allocates  DMA  resources  for  a
1225 	 * memory  object such that a device can perform DMA to or from
1226 	 * the object.  DMA resources  are  allocated  considering  the
1227 	 * device's  DMA  attributes  as  expressed by ddi_dma_attr(9S)
1228 	 * (see ddi_dma_alloc_handle(9F)).
1229 	 *
1230 	 * ddi_dma_addr_bind_handle() fills in  the  first  DMA  cookie
1231 	 * pointed  to by cookiep with the appropriate address, length,
1232 	 * and bus type. *ccountp is set to the number of DMA  cookies
1233 	 * representing this DMA object. Subsequent DMA cookies must be
1234 	 * retrieved by calling ddi_dma_nextcookie(9F)  the  number  of
1235 	 * times specified by *countp - 1.
1236 	 */
1237 	switch (packet->dma_type) {
1238 #ifdef __sparc
1239 	case USE_DVMA:
1240 		dvma_kaddr_load(packet->tx_dma_handle,
1241 		    (caddr_t)mp->b_rptr, len, 0, &dma_cookie);
1242 
1243 		dvma_sync(packet->tx_dma_handle, 0,
1244 		    DDI_DMA_SYNC_FORDEV);
1245 
1246 		ncookies = 1;
1247 		packet->data_transfer_type = USE_DVMA;
1248 		break;
1249 #endif
1250 	case USE_DMA:
1251 		if ((mystat = ddi_dma_addr_bind_handle(
1252 			packet->tx_dma_handle, NULL,
1253 			(caddr_t)mp->b_rptr, len,
1254 			DDI_DMA_WRITE | DDI_DMA_STREAMING,
1255 			DDI_DMA_DONTWAIT, 0, &dma_cookie,
1256 			&ncookies)) != DDI_DMA_MAPPED) {
1257 
1258 			e1000g_log(Adapter, CE_WARN,
1259 			    "Couldn't bind mblk buffer to Tx DMA handle: "
1260 			    "return: %X, Pkt: %X\n",
1261 			    mystat, packet);
1262 			return (-1);
1263 		}
1264 
1265 		/*
1266 		 * An implicit ddi_dma_sync() is done when the
1267 		 * ddi_dma_addr_bind_handle() is called. So we
1268 		 * don't need to explicitly call ddi_dma_sync()
1269 		 * here any more.
1270 		 */
1271 		ASSERT(ncookies);
1272 		if (ncookies > 1)
1273 			Adapter->tx_multi_cookie++;
1274 
1275 		/*
1276 		 * The data_transfer_type value must be set after the handle
1277 		 * has been bound, for it will be used in FreeTxSwPacket()
1278 		 * to decide whether we need to unbind the handle.
1279 		 */
1280 		packet->data_transfer_type = USE_DMA;
1281 		break;
1282 	default:
1283 		ASSERT(B_FALSE);
1284 		break;
1285 	}
1286 
1287 	packet->num_mblk_frag++;
1288 
1289 	/*
1290 	 * Each address could span thru multpile cookie..
1291 	 * Each cookie will have one descriptor
1292 	 */
1293 	for (j = ncookies; j != 0; j--) {
1294 
1295 		desc_count = e1000g_fill_tx_desc(Adapter,
1296 		    packet,
1297 		    dma_cookie.dmac_laddress,
1298 		    dma_cookie.dmac_size);
1299 
1300 		if (desc_count <= 0)
1301 			return (-1);
1302 
1303 		desc_total += desc_count;
1304 
1305 		/*
1306 		 * ddi_dma_nextcookie() retrieves subsequent DMA
1307 		 * cookies for a DMA object.
1308 		 * ddi_dma_nextcookie() fills in the
1309 		 * ddi_dma_cookie(9S) structure pointed to by
1310 		 * cookiep.  The ddi_dma_cookie(9S) structure
1311 		 * must be allocated prior to calling
1312 		 * ddi_dma_nextcookie(). The DMA cookie count
1313 		 * returned by ddi_dma_buf_bind_handle(9F),
1314 		 * ddi_dma_addr_bind_handle(9F), or
1315 		 * ddi_dma_getwin(9F) indicates the number of DMA
1316 		 * cookies a DMA object consists of.  If the
1317 		 * resulting cookie count, N, is larger than 1,
1318 		 * ddi_dma_nextcookie() must be called N-1 times
1319 		 * to retrieve all DMA cookies.
1320 		 */
1321 		if (j > 1) {
1322 			ddi_dma_nextcookie(packet->tx_dma_handle,
1323 			    &dma_cookie);
1324 		}
1325 	}
1326 
1327 	return (desc_total);
1328 }
1329 
1330 static void
1331 e1000g_fill_context_descriptor(cksum_data_t *cksum,
1332     struct e1000_context_desc *cksum_desc)
1333 {
1334 	if (cksum->cksum_flags & HCK_IPV4_HDRCKSUM) {
1335 		cksum_desc->lower_setup.ip_fields.ipcss =
1336 		    cksum->ether_header_size;
1337 		cksum_desc->lower_setup.ip_fields.ipcso =
1338 		    cksum->ether_header_size +
1339 		    offsetof(struct ip, ip_sum);
1340 		cksum_desc->lower_setup.ip_fields.ipcse =
1341 		    cksum->ether_header_size +
1342 		    sizeof (struct ip) - 1;
1343 	} else
1344 		cksum_desc->lower_setup.ip_config = 0;
1345 
1346 	if (cksum->cksum_flags & HCK_PARTIALCKSUM) {
1347 		/*
1348 		 * The packet with same protocol has the following
1349 		 * stuff and start offset:
1350 		 * |  Protocol  | Stuff  | Start  | Checksum
1351 		 * |		| Offset | Offset | Enable
1352 		 * | IPv4 + TCP |  0x24  |  0x14  |  Yes
1353 		 * | IPv4 + UDP |  0x1A  |  0x14  |  Yes
1354 		 * | IPv6 + TCP |  0x20  |  0x10  |  No
1355 		 * | IPv6 + UDP |  0x14  |  0x10  |  No
1356 		 */
1357 		cksum_desc->upper_setup.tcp_fields.tucss =
1358 		    cksum->cksum_start + cksum->ether_header_size;
1359 		cksum_desc->upper_setup.tcp_fields.tucso =
1360 		    cksum->cksum_stuff + cksum->ether_header_size;
1361 		cksum_desc->upper_setup.tcp_fields.tucse = 0;
1362 	} else
1363 		cksum_desc->upper_setup.tcp_config = 0;
1364 
1365 	cksum_desc->cmd_and_length = E1000_TXD_CMD_DEXT;
1366 
1367 	/*
1368 	 * Zero out the options for TCP Segmentation Offload,
1369 	 * since we don't support it in this version
1370 	 */
1371 	cksum_desc->tcp_seg_setup.data = 0;
1372 }
1373 
1374 static int
1375 e1000g_fill_tx_desc(struct e1000g *Adapter,
1376     PTX_SW_PACKET packet, uint64_t address, size_t size)
1377 {
1378 	PADDRESS_LENGTH_PAIR desc;
1379 	int desc_count;
1380 
1381 	desc_count = 0;
1382 
1383 	if ((Adapter->Shared.bus_type == e1000_bus_type_pcix) &&
1384 	    (Adapter->Shared.mac_type == e1000_82544)) {
1385 
1386 		desc_count = e1000g_tx_workaround_PCIX_82544(Adapter,
1387 		    packet, address, size);
1388 
1389 	} else if ((Adapter->Shared.mac_type == e1000_82544) &&
1390 	    (size > JUMBO_FRAG_LENGTH)) {
1391 
1392 		desc_count = e1000g_tx_workaround_jumbo_82544(Adapter,
1393 		    packet, address, size);
1394 
1395 	} else {
1396 		ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1397 
1398 		desc = &packet->desc[packet->num_desc];
1399 
1400 		desc->Address = address;
1401 		desc->Length = size;
1402 
1403 		packet->num_desc++;
1404 		desc_count++;
1405 	}
1406 
1407 	return (desc_count);
1408 }
1409 
1410 static int
1411 e1000g_tx_workaround_PCIX_82544(struct e1000g *Adapter,
1412     PTX_SW_PACKET packet, uint64_t address, size_t size)
1413 {
1414 	PADDRESS_LENGTH_PAIR desc;
1415 	int desc_count;
1416 	long size_left;
1417 	size_t len;
1418 	uint32_t counter;
1419 	uint32_t array_elements;
1420 	DESC_ARRAY desc_array;
1421 
1422 	/*
1423 	 * Coexist Workaround for cordova: RP: 07/04/03
1424 	 *
1425 	 * RP: ERRATA: Workaround ISSUE:
1426 	 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup
1427 	 * Eachbuffer in to 8kb pieces until the
1428 	 * remainder is < 8kb
1429 	 */
1430 	size_left = size;
1431 	desc_count = 0;
1432 
1433 	while (size_left > 0) {
1434 		if (size_left > MAX_TX_BUF_SIZE)
1435 			len = MAX_TX_BUF_SIZE;
1436 		else
1437 			len = size_left;
1438 
1439 		array_elements = e1000g_fill_82544_desc(address,
1440 		    len, &desc_array);
1441 
1442 		for (counter = 0; counter < array_elements; counter++) {
1443 			ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1444 			if (packet->num_desc >= MAX_TX_DESC_PER_PACKET) {
1445 				e1000g_log(Adapter, CE_WARN,
1446 				    "No enough preparing tx descriptors");
1447 				return (-1);
1448 			}
1449 			/*
1450 			 * Put in the buffer address
1451 			 */
1452 			desc = &packet->desc[packet->num_desc];
1453 
1454 			desc->Address =
1455 			    desc_array.Descriptor[counter].Address;
1456 			desc->Length =
1457 			    desc_array.Descriptor[counter].Length;
1458 
1459 			packet->num_desc++;
1460 			desc_count++;
1461 		} /* for */
1462 
1463 		/*
1464 		 * Update the buffer address and length
1465 		 */
1466 		address += MAX_TX_BUF_SIZE;
1467 		size_left -= MAX_TX_BUF_SIZE;
1468 	} /* while */
1469 
1470 	return (desc_count);
1471 }
1472 
1473 static int
1474 e1000g_tx_workaround_jumbo_82544(struct e1000g *Adapter,
1475     PTX_SW_PACKET packet, uint64_t address, size_t size)
1476 {
1477 	PADDRESS_LENGTH_PAIR desc;
1478 	int desc_count;
1479 	long size_left;
1480 	uint32_t offset;
1481 
1482 	/*
1483 	 * Workaround for Jumbo Frames on Cordova
1484 	 * PSD 06/01/2001
1485 	 */
1486 	size_left = size;
1487 	desc_count = 0;
1488 	offset = 0;
1489 	while (size_left > 0) {
1490 		ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1491 		if (packet->num_desc >= MAX_TX_DESC_PER_PACKET) {
1492 			e1000g_log(Adapter, CE_WARN,
1493 			    "No enough preparing tx descriptors");
1494 			return (-1);
1495 		}
1496 
1497 		desc = &packet->desc[packet->num_desc];
1498 
1499 		desc->Address = address + offset;
1500 
1501 		if (size_left > JUMBO_FRAG_LENGTH)
1502 			desc->Length = JUMBO_FRAG_LENGTH;
1503 		else
1504 			desc->Length = size_left;
1505 
1506 		packet->num_desc++;
1507 		desc_count++;
1508 
1509 		offset += desc->Length;
1510 		size_left -= JUMBO_FRAG_LENGTH;
1511 	}
1512 
1513 	return (desc_count);
1514 }
1515 
1516 static void
1517 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring)
1518 {
1519 	uint16_t hw_tdt;
1520 	uint16_t sw_tdt;
1521 	struct e1000_tx_desc *tx_desc;
1522 	uint16_t length = 0;
1523 	boolean_t eop = B_FALSE;
1524 	struct e1000g *Adapter;
1525 
1526 	Adapter = tx_ring->adapter;
1527 
1528 	hw_tdt = E1000_READ_REG(&Adapter->Shared, TDT);
1529 	sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first;
1530 
1531 	while (hw_tdt != sw_tdt) {
1532 		tx_desc = &(tx_ring->tbd_first[hw_tdt]);
1533 		length += tx_desc->lower.flags.length;
1534 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1535 		if (++hw_tdt == Adapter->NumTxDescriptors)
1536 			hw_tdt = 0;
1537 
1538 		if (eop) {
1539 			if ((Adapter->link_duplex == HALF_DUPLEX) &&
1540 			    e1000_82547_fifo_workaround(&Adapter->Shared,
1541 				length) != E1000_SUCCESS) {
1542 				if (tx_ring->timer_enable_82547) {
1543 					ASSERT(tx_ring->timer_id_82547 == 0);
1544 					tx_ring->timer_id_82547 =
1545 					    timeout(e1000g_82547_timeout,
1546 						(void *)Adapter,
1547 						drv_usectohz(10000));
1548 				}
1549 				return;
1550 
1551 			} else {
1552 				E1000_WRITE_REG(&Adapter->Shared, TDT,
1553 				    hw_tdt);
1554 				e1000_update_tx_fifo_head(&Adapter->Shared,
1555 				    length);
1556 				length = 0;
1557 			}
1558 		}
1559 	}
1560 }
1561 
1562 static void
1563 e1000g_82547_timeout(void *arg)
1564 {
1565 	e1000g_tx_ring_t *tx_ring;
1566 
1567 	tx_ring = (e1000g_tx_ring_t *)arg;
1568 
1569 	mutex_enter(&tx_ring->tx_lock);
1570 
1571 	tx_ring->timer_id_82547 = 0;
1572 	e1000g_82547_tx_move_tail_work(tx_ring);
1573 
1574 	mutex_exit(&tx_ring->tx_lock);
1575 }
1576 
1577 static void
1578 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring)
1579 {
1580 	timeout_id_t tid;
1581 
1582 	ASSERT(MUTEX_HELD(&tx_ring->tx_lock));
1583 
1584 	tid = tx_ring->timer_id_82547;
1585 	tx_ring->timer_id_82547 = 0;
1586 	if (tid != 0) {
1587 		tx_ring->timer_enable_82547 = B_FALSE;
1588 		mutex_exit(&tx_ring->tx_lock);
1589 
1590 		(void) untimeout(tid);
1591 
1592 		mutex_enter(&tx_ring->tx_lock);
1593 	}
1594 	tx_ring->timer_enable_82547 = B_TRUE;
1595 	e1000g_82547_tx_move_tail_work(tx_ring);
1596 }
1597