xref: /titanic_50/usr/src/uts/common/io/e1000g/e1000g_tx.c (revision 794ca8fa32f239770e58f592fc80eceff7ec92f0)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2007 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms of the CDDLv1.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * **********************************************************************
30  *									*
31  * Module Name:								*
32  *   e1000g_tx.c							*
33  *									*
34  * Abstract:								*
35  *   This file contains some routines that takes care of Transmit	*
36  *   interrupt and also makes the hardware to send the data pointed	*
37  *   by the packet out on   to the physical medium.			*
38  *									*
39  *									*
40  * Environment:								*
41  *   Kernel Mode -							*
42  *									*
43  * Source History:							*
44  *   The code in this file is based somewhat on the "send" code		*
45  *   developed for the Intel Pro/100 family(Speedo1 and Speedo3) by	*
46  *   Steve Lindsay, and partly on some sample DDK code			*
47  *   of solaris.							*
48  *									*
49  *   March 12, 1997 Steve Lindsay					*
50  *   1st created - Ported from E100B send.c file			*
51  *									*
52  * **********************************************************************
53  */
54 
55 #include "e1000g_sw.h"
56 #include "e1000g_debug.h"
57 
58 static boolean_t e1000g_send(struct e1000g *, mblk_t *);
59 static int e1000g_tx_copy(struct e1000g *, PTX_SW_PACKET, mblk_t *, uint32_t);
60 static int e1000g_tx_bind(struct e1000g *, PTX_SW_PACKET, mblk_t *);
61 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *,
62     uint_t, boolean_t);
63 static void e1000g_fill_context_descriptor(e1000g_tx_ring_t *,
64     struct e1000_context_desc *);
65 static int e1000g_fill_tx_desc(struct e1000g *,
66     PTX_SW_PACKET, uint64_t, size_t);
67 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length,
68     PDESC_ARRAY desc_array);
69 static int e1000g_tx_workaround_PCIX_82544(struct e1000g *,
70     PTX_SW_PACKET, uint64_t, size_t);
71 static int e1000g_tx_workaround_jumbo_82544(struct e1000g *,
72     PTX_SW_PACKET, uint64_t, size_t);
73 static uint32_t e1000g_tx_free_desc_num(e1000g_tx_ring_t *);
74 static void e1000g_82547_timeout(void *);
75 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *);
76 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *);
77 
78 #ifndef e1000g_DEBUG
79 #pragma inline(e1000g_tx_copy)
80 #pragma inline(e1000g_tx_bind)
81 #pragma inline(e1000g_fill_tx_ring)
82 #pragma inline(e1000g_fill_context_descriptor)
83 #pragma inline(e1000g_fill_tx_desc)
84 #pragma inline(e1000g_fill_82544_desc)
85 #pragma inline(e1000g_tx_workaround_PCIX_82544)
86 #pragma inline(e1000g_tx_workaround_jumbo_82544)
87 #pragma inline(FreeTxSwPacket)
88 #pragma inline(e1000g_tx_free_desc_num)
89 #endif
90 
91 /*
92  * **********************************************************************
93  * Name:      FreeTxSwPacket						*
94  *									*
95  * Description:								*
96  *	       Frees up the previusly allocated Dma handle for given	*
97  *	       transmit sw packet.					*
98  *									*
99  * Parameter Passed:							*
100  *									*
101  * Return Value:							*
102  *									*
103  * Functions called:							*
104  *									*
105  * **********************************************************************
106  */
107 void
108 FreeTxSwPacket(register PTX_SW_PACKET packet)
109 {
110 	switch (packet->data_transfer_type) {
111 	case USE_BCOPY:
112 		packet->tx_buf->len = 0;
113 		break;
114 #ifdef __sparc
115 	case USE_DVMA:
116 		dvma_unload(packet->tx_dma_handle, 0, -1);
117 		break;
118 #endif
119 	case USE_DMA:
120 		ddi_dma_unbind_handle(packet->tx_dma_handle);
121 		break;
122 	default:
123 		break;
124 	}
125 
126 	/*
127 	 * The mblk has been stripped off the sw packet
128 	 * and will be freed in a triggered soft intr.
129 	 */
130 	ASSERT(packet->mp == NULL);
131 
132 	packet->data_transfer_type = USE_NONE;
133 	packet->num_mblk_frag = 0;
134 	packet->num_desc = 0;
135 }
136 
137 uint_t
138 e1000g_tx_freemsg(caddr_t arg1, caddr_t arg2)
139 {
140 	struct e1000g *Adapter;
141 	mblk_t *mp;
142 
143 	Adapter = (struct e1000g *)arg1;
144 
145 	if ((Adapter == NULL) || (arg2 != NULL))
146 		return (DDI_INTR_UNCLAIMED);
147 
148 	if (!mutex_tryenter(&Adapter->tx_msg_chain->lock))
149 		return (DDI_INTR_CLAIMED);
150 
151 	mp = Adapter->tx_msg_chain->head;
152 	Adapter->tx_msg_chain->head = NULL;
153 	Adapter->tx_msg_chain->tail = NULL;
154 
155 	mutex_exit(&Adapter->tx_msg_chain->lock);
156 
157 	freemsgchain(mp);
158 
159 	return (DDI_INTR_CLAIMED);
160 }
161 
162 static uint32_t
163 e1000g_tx_free_desc_num(e1000g_tx_ring_t *tx_ring)
164 {
165 	struct e1000g *Adapter;
166 	int num;
167 
168 	Adapter = tx_ring->adapter;
169 
170 	num = tx_ring->tbd_oldest - tx_ring->tbd_next;
171 	if (num <= 0)
172 		num += Adapter->NumTxDescriptors;
173 
174 	return (num);
175 }
176 
177 mblk_t *
178 e1000g_m_tx(void *arg, mblk_t *mp)
179 {
180 	struct e1000g *Adapter = (struct e1000g *)arg;
181 	mblk_t *next;
182 
183 	rw_enter(&Adapter->chip_lock, RW_READER);
184 
185 	if (!Adapter->started) {
186 		freemsgchain(mp);
187 		mp = NULL;
188 	}
189 
190 	while (mp != NULL) {
191 		next = mp->b_next;
192 		mp->b_next = NULL;
193 
194 		if (!e1000g_send(Adapter, mp)) {
195 			mp->b_next = next;
196 			break;
197 		}
198 
199 		mp = next;
200 	}
201 
202 	rw_exit(&Adapter->chip_lock);
203 	return (mp);
204 }
205 
206 /*
207  * **********************************************************************
208  * Name:	e1000g_send						*
209  *									*
210  * Description:								*
211  *	Called from e1000g_m_tx with an mp ready to send. this		*
212  *	routine sets up the transmit descriptors and sends to		*
213  *	the wire. It also pushes the just transmitted packet to		*
214  *	the used tx sw packet list					*
215  *									*
216  * Arguments:								*
217  *	Pointer to the mblk to be sent, pointer to this adapter		*
218  *									*
219  * Returns:								*
220  *	B_TRUE, B_FALSE							*
221  *									*
222  * Modification log:							*
223  * Date      Who  Description						*
224  * --------  ---  -----------------------------------------------------	*
225  * **********************************************************************
226  */
227 static boolean_t
228 e1000g_send(struct e1000g *Adapter, mblk_t *mp)
229 {
230 	PTX_SW_PACKET packet;
231 	LIST_DESCRIBER pending_list;
232 	size_t len;
233 	size_t msg_size;
234 	uint32_t frag_count;
235 	int desc_count;
236 	uint32_t desc_total;
237 	uint32_t force_bcopy;
238 	mblk_t *nmp;
239 	mblk_t *tmp;
240 	e1000g_tx_ring_t *tx_ring;
241 	/* IP Head/TCP/UDP checksum offload */
242 	uint_t cksum_start;
243 	uint_t cksum_stuff;
244 	uint_t cksum_flags;
245 	boolean_t cksum_load;
246 	uint8_t ether_header_size;
247 
248 	/* Get the total size and frags number of the message */
249 	force_bcopy = 0;
250 	frag_count = 0;
251 	msg_size = 0;
252 	for (nmp = mp; nmp; nmp = nmp->b_cont) {
253 		frag_count++;
254 		msg_size += MBLKL(nmp);
255 	}
256 
257 	/* Empty packet */
258 	if (msg_size == 0) {
259 		freemsg(mp);
260 		return (B_TRUE);
261 	}
262 
263 	/* Make sure packet is less than the max frame size */
264 	if (msg_size > Adapter->Shared.max_frame_size + VLAN_TAGSZ) {
265 		/*
266 		 * For the over size packet, we'll just drop it.
267 		 * So we return B_TRUE here.
268 		 */
269 		e1000g_DEBUGLOG_1(Adapter, e1000g_INFO_LEVEL,
270 		    "Tx packet out of bound. length = %d \n", msg_size);
271 		freemsg(mp);
272 		Adapter->tx_over_size++;
273 		return (B_TRUE);
274 	}
275 
276 	tx_ring = Adapter->tx_ring;
277 
278 	/*
279 	 * Check and reclaim tx descriptors.
280 	 * This low water mark check should be done all the time as
281 	 * Transmit interrupt delay can produce Transmit interrupts little
282 	 * late and that may cause few problems related to reaping Tx
283 	 * Descriptors... As you may run short of them before getting any
284 	 * transmit interrupt...
285 	 */
286 	if ((Adapter->NumTxDescriptors - e1000g_tx_free_desc_num(tx_ring)) >
287 	    Adapter->tx_recycle_low_water) {
288 		if (Adapter->Shared.mac_type == e1000_82547) {
289 			mutex_enter(&tx_ring->tx_lock);
290 			e1000g_82547_tx_move_tail(tx_ring);
291 			mutex_exit(&tx_ring->tx_lock);
292 		}
293 		Adapter->tx_recycle++;
294 		(void) e1000g_recycle(tx_ring);
295 	}
296 
297 	if (e1000g_tx_free_desc_num(tx_ring) < MAX_TX_DESC_PER_PACKET) {
298 		Adapter->tx_lack_desc++;
299 		goto tx_no_resource;
300 	}
301 
302 	/*
303 	 * If there are many frags of the message, then bcopy them
304 	 * into one tx descriptor buffer will get better performance.
305 	 */
306 	if (frag_count >= Adapter->tx_frags_limit) {
307 		Adapter->tx_exceed_frags++;
308 		force_bcopy |= FORCE_BCOPY_EXCEED_FRAGS;
309 	}
310 
311 	/*
312 	 * If the message size is less than the minimum ethernet packet size,
313 	 * we'll use bcopy to send it, and padd it to 60 bytes later.
314 	 */
315 	if (msg_size < MINIMUM_ETHERNET_PACKET_SIZE) {
316 		Adapter->tx_under_size++;
317 		force_bcopy |= FORCE_BCOPY_UNDER_SIZE;
318 	}
319 
320 	/* Initialize variables */
321 	desc_count = 1;	/* The initial value should be greater than 0 */
322 	desc_total = 0;
323 	QUEUE_INIT_LIST(&pending_list);
324 
325 	/* Retrieve checksum info */
326 	hcksum_retrieve(mp, NULL, NULL, &cksum_start, &cksum_stuff,
327 	    NULL, NULL, &cksum_flags);
328 
329 	cksum_load = B_FALSE;
330 	if (cksum_flags) {
331 		if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid ==
332 		    htons(ETHERTYPE_VLAN))
333 			ether_header_size = sizeof (struct ether_vlan_header);
334 		else
335 			ether_header_size = sizeof (struct ether_header);
336 
337 		if ((ether_header_size != tx_ring->ether_header_size) ||
338 		    (cksum_flags != tx_ring->cksum_flags) ||
339 		    (cksum_stuff != tx_ring->cksum_stuff) ||
340 		    (cksum_start != tx_ring->cksum_start)) {
341 
342 			tx_ring->ether_header_size = ether_header_size;
343 			tx_ring->cksum_flags = cksum_flags;
344 			tx_ring->cksum_start = cksum_start;
345 			tx_ring->cksum_stuff = cksum_stuff;
346 
347 			cksum_load = B_TRUE;
348 		}
349 	}
350 
351 	/* Process each mblk fragment and fill tx descriptors */
352 	packet = NULL;
353 	nmp = mp;
354 	while (nmp) {
355 		tmp = nmp->b_cont;
356 
357 		len = MBLKL(nmp);
358 		/* Check zero length mblks */
359 		if (len == 0) {
360 			Adapter->tx_empty_frags++;
361 			/*
362 			 * If there're no packet buffers have been used,
363 			 * or we just completed processing a buffer, then
364 			 * skip the empty mblk fragment.
365 			 * Otherwise, there's still a pending buffer that
366 			 * needs to be processed (tx_copy).
367 			 */
368 			if (desc_count > 0) {
369 				nmp = tmp;
370 				continue;
371 			}
372 		}
373 
374 		/*
375 		 * Get a new TxSwPacket to process mblk buffers.
376 		 */
377 		if (desc_count > 0) {
378 
379 			mutex_enter(&tx_ring->freelist_lock);
380 			packet = (PTX_SW_PACKET)
381 			    QUEUE_POP_HEAD(&tx_ring->free_list);
382 			mutex_exit(&tx_ring->freelist_lock);
383 
384 			if (packet == NULL) {
385 				e1000g_DEBUGLOG_0(Adapter, e1000g_INFO_LEVEL,
386 				    "No Tx SwPacket available\n");
387 				Adapter->tx_no_swpkt++;
388 				goto tx_send_failed;
389 			}
390 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
391 		}
392 
393 		ASSERT(packet);
394 		/*
395 		 * If the size of the fragment is less than the tx_bcopy_thresh
396 		 * we'll use bcopy; Otherwise, we'll use DMA binding.
397 		 */
398 		if ((len <= Adapter->tx_bcopy_thresh) || force_bcopy) {
399 			desc_count =
400 			    e1000g_tx_copy(Adapter, packet, nmp, force_bcopy);
401 			Adapter->tx_copy++;
402 		} else {
403 			desc_count =
404 			    e1000g_tx_bind(Adapter, packet, nmp);
405 			Adapter->tx_bind++;
406 		}
407 
408 		if (desc_count < 0)
409 			goto tx_send_failed;
410 
411 		if (desc_count > 0)
412 			desc_total += desc_count;
413 
414 		nmp = tmp;
415 	}
416 
417 	/* Assign the message to the last sw packet */
418 	ASSERT(packet);
419 	ASSERT(packet->mp == NULL);
420 	packet->mp = mp;
421 
422 	/* Try to recycle the tx descriptors again */
423 	if (e1000g_tx_free_desc_num(tx_ring) < MAX_TX_DESC_PER_PACKET) {
424 		Adapter->tx_recycle_retry++;
425 		(void) e1000g_recycle(tx_ring);
426 	}
427 
428 	mutex_enter(&tx_ring->tx_lock);
429 
430 	/*
431 	 * If the number of available tx descriptors is not enough for transmit
432 	 * (one redundant descriptor and one hw checksum context descriptor are
433 	 * included), then return failure.
434 	 */
435 	if (e1000g_tx_free_desc_num(tx_ring) < (desc_total + 2)) {
436 		e1000g_DEBUGLOG_0(Adapter, e1000g_INFO_LEVEL,
437 		    "No Enough Tx descriptors\n");
438 		Adapter->tx_no_desc++;
439 		mutex_exit(&tx_ring->tx_lock);
440 		goto tx_send_failed;
441 	}
442 
443 	desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list,
444 	    cksum_flags, cksum_load);
445 
446 	mutex_exit(&tx_ring->tx_lock);
447 
448 	ASSERT(desc_count > 0);
449 
450 	/* Update statistic counters */
451 	if (Adapter->ProfileJumboTraffic) {
452 		if ((msg_size > ETHERMAX) &&
453 		    (msg_size <= FRAME_SIZE_UPTO_4K))
454 			Adapter->JumboTx_4K++;
455 
456 		if ((msg_size > FRAME_SIZE_UPTO_4K) &&
457 		    (msg_size <= FRAME_SIZE_UPTO_8K))
458 			Adapter->JumboTx_8K++;
459 
460 		if ((msg_size > FRAME_SIZE_UPTO_8K) &&
461 		    (msg_size <= FRAME_SIZE_UPTO_16K))
462 			Adapter->JumboTx_16K++;
463 	}
464 
465 	/* Send successful */
466 	return (B_TRUE);
467 
468 tx_send_failed:
469 	/* Free pending TxSwPackets */
470 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(&pending_list);
471 	while (packet) {
472 		packet->mp = NULL;
473 		FreeTxSwPacket(packet);
474 		packet = (PTX_SW_PACKET)
475 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
476 	}
477 
478 	/* Return pending TxSwPackets to the "Free" list */
479 	mutex_enter(&tx_ring->freelist_lock);
480 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
481 	mutex_exit(&tx_ring->freelist_lock);
482 
483 	Adapter->tx_send_fail++;
484 
485 	freemsg(mp);
486 
487 	/* Send failed, message dropped */
488 	return (B_TRUE);
489 
490 tx_no_resource:
491 	/*
492 	 * Enable Transmit interrupts, so that the interrupt routine can
493 	 * call mac_tx_update() when transmit descriptors become available.
494 	 */
495 	Adapter->resched_needed = B_TRUE;
496 	if (!Adapter->tx_intr_enable)
497 		e1000g_EnableTxInterrupt(Adapter);
498 
499 	/* Message will be scheduled for re-transmit */
500 	return (B_FALSE);
501 }
502 
503 static int
504 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list,
505     uint_t cksum_flags, boolean_t cksum_load)
506 {
507 	struct e1000g *Adapter;
508 	PTX_SW_PACKET first_packet;
509 	PTX_SW_PACKET packet;
510 	struct e1000_context_desc *cksum_desc;
511 	struct e1000_tx_desc *first_data_desc;
512 	struct e1000_tx_desc *next_desc;
513 	struct e1000_tx_desc *descriptor;
514 	uint32_t sync_offset;
515 	int sync_len;
516 	int desc_count;
517 	int i;
518 
519 	Adapter = tx_ring->adapter;
520 
521 	desc_count = 0;
522 	cksum_desc = NULL;
523 	first_data_desc = NULL;
524 	descriptor = NULL;
525 
526 	first_packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(pending_list);
527 	ASSERT(first_packet);
528 
529 	next_desc = tx_ring->tbd_next;
530 
531 	/* IP Head/TCP/UDP checksum offload */
532 	if (cksum_load) {
533 		descriptor = next_desc;
534 
535 		cksum_desc = (struct e1000_context_desc *)descriptor;
536 
537 		e1000g_fill_context_descriptor(tx_ring, cksum_desc);
538 
539 		/* Check the wrap-around case */
540 		if (descriptor == tx_ring->tbd_last)
541 			next_desc = tx_ring->tbd_first;
542 		else
543 			next_desc++;
544 
545 		desc_count++;
546 	}
547 
548 	if (cksum_desc == NULL)
549 		first_packet = NULL;
550 
551 	first_data_desc = next_desc;
552 
553 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(pending_list);
554 	while (packet) {
555 		ASSERT(packet->num_desc);
556 
557 		for (i = 0; i < packet->num_desc; i++) {
558 			ASSERT(e1000g_tx_free_desc_num(tx_ring) > 0);
559 
560 			descriptor = next_desc;
561 #ifdef __sparc
562 			descriptor->buffer_addr =
563 			    DWORD_SWAP(packet->desc[i].Address);
564 #else
565 			descriptor->buffer_addr =
566 			    packet->desc[i].Address;
567 #endif
568 			descriptor->lower.data =
569 			    packet->desc[i].Length;
570 
571 			/* Zero out status */
572 			descriptor->upper.data = 0;
573 
574 			descriptor->lower.data |=
575 			    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
576 			/* must set RS on every outgoing descriptor */
577 			descriptor->lower.data |=
578 			    E1000_TXD_CMD_RS;
579 
580 			/* Check the wrap-around case */
581 			if (descriptor == tx_ring->tbd_last)
582 				next_desc = tx_ring->tbd_first;
583 			else
584 				next_desc++;
585 
586 			desc_count++;
587 		}
588 
589 		if (first_packet != NULL) {
590 			/*
591 			 * Count the checksum context descriptor for
592 			 * the first SwPacket.
593 			 */
594 			first_packet->num_desc++;
595 			first_packet = NULL;
596 		}
597 
598 		packet = (PTX_SW_PACKET)
599 		    QUEUE_GET_NEXT(pending_list, &packet->Link);
600 	}
601 
602 	ASSERT(descriptor);
603 
604 	if (cksum_flags) {
605 		if (cksum_flags & HCK_IPV4_HDRCKSUM)
606 			((struct e1000_data_desc *)first_data_desc)->
607 				upper.fields.popts |= E1000_TXD_POPTS_IXSM;
608 		if (cksum_flags & HCK_PARTIALCKSUM)
609 			((struct e1000_data_desc *)first_data_desc)->
610 				upper.fields.popts |= E1000_TXD_POPTS_TXSM;
611 	}
612 
613 	/*
614 	 * Last Descriptor of Packet needs End Of Packet (EOP), Report
615 	 * Status (RS) and append Ethernet CRC (IFCS) bits set.
616 	 */
617 	if (Adapter->TxInterruptDelay) {
618 		descriptor->lower.data |= E1000_TXD_CMD_IDE |
619 		    E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
620 	} else {
621 		descriptor->lower.data |=
622 		    E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
623 	}
624 
625 	/*
626 	 * Sync the Tx descriptors DMA buffer
627 	 */
628 	sync_offset = tx_ring->tbd_next - tx_ring->tbd_first;
629 	sync_len = descriptor - tx_ring->tbd_next + 1;
630 	/* Check the wrap-around case */
631 	if (sync_len > 0) {
632 		(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
633 		    sync_offset * sizeof (struct e1000_tx_desc),
634 		    sync_len * sizeof (struct e1000_tx_desc),
635 		    DDI_DMA_SYNC_FORDEV);
636 	} else {
637 		(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
638 		    sync_offset * sizeof (struct e1000_tx_desc),
639 		    0,
640 		    DDI_DMA_SYNC_FORDEV);
641 		sync_len = descriptor - tx_ring->tbd_first + 1;
642 		(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
643 		    0,
644 		    sync_len * sizeof (struct e1000_tx_desc),
645 		    DDI_DMA_SYNC_FORDEV);
646 	}
647 
648 	tx_ring->tbd_next = next_desc;
649 
650 	/*
651 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
652 	 * FX1000 that this frame is available to transmit.
653 	 */
654 	if (Adapter->Shared.mac_type == e1000_82547)
655 		e1000g_82547_tx_move_tail(tx_ring);
656 	else
657 		E1000_WRITE_REG(&Adapter->Shared, TDT,
658 		    (uint32_t)(next_desc - tx_ring->tbd_first));
659 
660 	/* Put the pending SwPackets to the "Used" list */
661 	mutex_enter(&tx_ring->usedlist_lock);
662 	QUEUE_APPEND(&tx_ring->used_list, pending_list);
663 	mutex_exit(&tx_ring->usedlist_lock);
664 
665 	return (desc_count);
666 }
667 
668 
669 /*
670  * **********************************************************************
671  * Name:	SetupTransmitStructures					*
672  *									*
673  * Description: This routine initializes all of the transmit related	*
674  *	structures.  This includes the Transmit descriptors, the	*
675  *	coalesce buffers, and the TX_SW_PACKETs structures.		*
676  *									*
677  *	NOTE -- The device must have been reset before this		*
678  *		routine is called.					*
679  *									*
680  * Author:	Hari Seshadri						*
681  * Functions Called : get_32bit_value					*
682  *									*
683  *									*
684  *									*
685  * Arguments:								*
686  *	Adapter - A pointer to our context sensitive "Adapter"		*
687  *	structure.							*
688  *									*
689  * Returns:								*
690  *      (none)								*
691  *									*
692  * Modification log:							*
693  * Date      Who  Description						*
694  * --------  ---  -----------------------------------------------------	*
695  *									*
696  * **********************************************************************
697  */
698 void
699 SetupTransmitStructures(struct e1000g *Adapter)
700 {
701 	struct e1000_hw *hw;
702 	PTX_SW_PACKET packet;
703 	UINT i;
704 	uint32_t buf_high;
705 	uint32_t buf_low;
706 	uint32_t reg_tipg;
707 	uint32_t reg_tctl;
708 	uint32_t reg_tarc;
709 	uint16_t speed, duplex;
710 	int size;
711 	e1000g_tx_ring_t *tx_ring;
712 
713 	hw = &Adapter->Shared;
714 	tx_ring = Adapter->tx_ring;
715 
716 	/* init the lists */
717 	/*
718 	 * Here we don't need to protect the lists using the
719 	 * tx_usedlist_lock and tx_freelist_lock, for they have
720 	 * been protected by the chip_lock.
721 	 */
722 	QUEUE_INIT_LIST(&tx_ring->used_list);
723 	QUEUE_INIT_LIST(&tx_ring->free_list);
724 
725 	/* Go through and set up each SW_Packet */
726 	packet = tx_ring->packet_area;
727 	for (i = 0; i < Adapter->NumTxSwPacket; i++, packet++) {
728 		/* Initialize this TX_SW_PACKET area */
729 		FreeTxSwPacket(packet);
730 		/* Add this TX_SW_PACKET to the free list */
731 		QUEUE_PUSH_TAIL(&tx_ring->free_list,
732 		    &packet->Link);
733 	}
734 
735 	/* Setup TX descriptor pointers */
736 	tx_ring->tbd_next = tx_ring->tbd_first;
737 	tx_ring->tbd_oldest = tx_ring->tbd_first;
738 
739 	/*
740 	 * Setup Hardware TX Registers
741 	 */
742 	/* Setup the Transmit Control Register (TCTL). */
743 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
744 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) |
745 	    (E1000_FDX_COLLISION_DISTANCE << E1000_COLD_SHIFT);
746 
747 	/* Enable the MULR bit */
748 	if (hw->bus_type == e1000_bus_type_pci_express)
749 		reg_tctl |= E1000_TCTL_MULR;
750 
751 	E1000_WRITE_REG(hw, TCTL, reg_tctl);
752 
753 	if ((hw->mac_type == e1000_82571) || (hw->mac_type == e1000_82572)) {
754 		e1000_get_speed_and_duplex(hw, &speed, &duplex);
755 
756 		reg_tarc = E1000_READ_REG(hw, TARC0);
757 		reg_tarc |= (1 << 25);
758 		if (speed == SPEED_1000)
759 			reg_tarc |= (1 << 21);
760 		E1000_WRITE_REG(hw, TARC0, reg_tarc);
761 
762 		reg_tarc = E1000_READ_REG(hw, TARC1);
763 		reg_tarc |= (1 << 25);
764 		if (reg_tctl & E1000_TCTL_MULR)
765 			reg_tarc &= ~(1 << 28);
766 		else
767 			reg_tarc |= (1 << 28);
768 		E1000_WRITE_REG(hw, TARC1, reg_tarc);
769 
770 	} else if (hw->mac_type == e1000_80003es2lan) {
771 		reg_tarc = E1000_READ_REG(hw, TARC0);
772 		reg_tarc |= 1;
773 		if (hw->media_type == e1000_media_type_internal_serdes)
774 			reg_tarc |= (1 << 20);
775 		E1000_WRITE_REG(hw, TARC0, reg_tarc);
776 
777 		reg_tarc = E1000_READ_REG(hw, TARC1);
778 		reg_tarc |= 1;
779 		E1000_WRITE_REG(hw, TARC1, reg_tarc);
780 	}
781 
782 	/* Setup HW Base and Length of Tx descriptor area */
783 	size = (Adapter->NumTxDescriptors * sizeof (struct e1000_tx_desc));
784 	E1000_WRITE_REG(hw, TDLEN, size);
785 	size = E1000_READ_REG(hw, TDLEN);
786 
787 	buf_low = (uint32_t)tx_ring->tbd_dma_addr;
788 	buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32);
789 
790 	E1000_WRITE_REG(hw, TDBAL, buf_low);
791 	E1000_WRITE_REG(hw, TDBAH, buf_high);
792 
793 	/* Setup our HW Tx Head & Tail descriptor pointers */
794 	E1000_WRITE_REG(hw, TDH, 0);
795 	E1000_WRITE_REG(hw, TDT, 0);
796 
797 	/* Set the default values for the Tx Inter Packet Gap timer */
798 	switch (hw->mac_type) {
799 	case e1000_82542_rev2_0:
800 	case e1000_82542_rev2_1:
801 		reg_tipg = DEFAULT_82542_TIPG_IPGT;
802 		reg_tipg |=
803 		    DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
804 		reg_tipg |=
805 		    DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
806 		break;
807 	default:
808 		if (hw->media_type == e1000_media_type_fiber)
809 			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
810 		else
811 			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
812 		reg_tipg |=
813 		    DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
814 		reg_tipg |=
815 		    DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
816 		break;
817 	}
818 	E1000_WRITE_REG(hw, TIPG, reg_tipg);
819 
820 	/* Setup Transmit Interrupt Delay Value */
821 	if (Adapter->TxInterruptDelay) {
822 		E1000_WRITE_REG(hw, TIDV, Adapter->TxInterruptDelay);
823 	}
824 
825 	/* For TCP/UDP checksum offload */
826 	tx_ring->cksum_stuff = 0;
827 	tx_ring->cksum_start = 0;
828 	tx_ring->cksum_flags = 0;
829 
830 	/* Initialize tx parameters */
831 	Adapter->tx_bcopy_thresh = DEFAULTTXBCOPYTHRESHOLD;
832 	Adapter->tx_recycle_low_water = DEFAULTTXRECYCLELOWWATER;
833 	Adapter->tx_recycle_num = DEFAULTTXRECYCLENUM;
834 	Adapter->tx_intr_enable = B_TRUE;
835 	Adapter->tx_frags_limit =
836 	    (Adapter->Shared.max_frame_size / Adapter->tx_bcopy_thresh) + 2;
837 	if (Adapter->tx_frags_limit > (MAX_TX_DESC_PER_PACKET >> 1))
838 		Adapter->tx_frags_limit = (MAX_TX_DESC_PER_PACKET >> 1);
839 }
840 
841 /*
842  * **********************************************************************
843  * Name:	e1000g_recycle						*
844  *									*
845  * Description: This routine cleans transmit packets.			*
846  *									*
847  *									*
848  *									*
849  * Arguments:								*
850  *      Adapter - A pointer to our context sensitive "Adapter"		*
851  *      structure.							*
852  *									*
853  * Returns:								*
854  *      (none)								*
855  * Functions Called:							*
856  *	  None								*
857  *									*
858  * Modification log:							*
859  * Date      Who  Description						*
860  * --------  ---  -----------------------------------------------------	*
861  *									*
862  * **********************************************************************
863  */
864 int
865 e1000g_recycle(e1000g_tx_ring_t *tx_ring)
866 {
867 	struct e1000g *Adapter;
868 	LIST_DESCRIBER pending_list;
869 	PTX_SW_PACKET packet;
870 	e1000g_msg_chain_t *msg_chain;
871 	mblk_t *mp;
872 	mblk_t *nmp;
873 	struct e1000_tx_desc *descriptor;
874 	int desc_count;
875 
876 	/*
877 	 * This function will examine each TxSwPacket in the 'used' queue
878 	 * if the e1000g is done with it then the associated resources (Tx
879 	 * Descriptors) will be "freed" and the TxSwPacket will be
880 	 * returned to the 'free' queue.
881 	 */
882 	Adapter = tx_ring->adapter;
883 
884 	desc_count = 0;
885 	QUEUE_INIT_LIST(&pending_list);
886 
887 	mutex_enter(&tx_ring->usedlist_lock);
888 
889 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(&tx_ring->used_list);
890 	if (packet == NULL) {
891 		mutex_exit(&tx_ring->usedlist_lock);
892 		Adapter->tx_recycle_fail = 0;
893 		Adapter->StallWatchdog = 0;
894 		return (0);
895 	}
896 
897 	/*
898 	 * While there are still TxSwPackets in the used queue check them
899 	 */
900 	while (packet =
901 	    (PTX_SW_PACKET) QUEUE_GET_HEAD(&tx_ring->used_list)) {
902 
903 		/*
904 		 * Get hold of the next descriptor that the e1000g will
905 		 * report status back to (this will be the last descriptor
906 		 * of a given TxSwPacket). We only want to free the
907 		 * TxSwPacket (and it resources) if the e1000g is done
908 		 * with ALL of the descriptors.  If the e1000g is done
909 		 * with the last one then it is done with all of them.
910 		 */
911 		ASSERT(packet->num_desc);
912 		descriptor = tx_ring->tbd_oldest +
913 		    (packet->num_desc - 1);
914 
915 		/* Check for wrap case */
916 		if (descriptor > tx_ring->tbd_last)
917 			descriptor -= Adapter->NumTxDescriptors;
918 
919 		/* Sync the Tx descriptor DMA buffer */
920 		(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
921 		    (descriptor - tx_ring->tbd_first) *
922 		    sizeof (struct e1000_tx_desc),
923 		    sizeof (struct e1000_tx_desc),
924 		    DDI_DMA_SYNC_FORCPU);
925 
926 		/*
927 		 * If the descriptor done bit is set free TxSwPacket and
928 		 * associated resources
929 		 */
930 		if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) {
931 			QUEUE_POP_HEAD(&tx_ring->used_list);
932 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
933 
934 			if (descriptor == tx_ring->tbd_last)
935 				tx_ring->tbd_oldest =
936 				    tx_ring->tbd_first;
937 			else
938 				tx_ring->tbd_oldest =
939 				    descriptor + 1;
940 
941 			desc_count += packet->num_desc;
942 
943 			if (desc_count >= Adapter->tx_recycle_num)
944 				break;
945 		} else {
946 			/*
947 			 * Found a TxSwPacket that the e1000g is not done
948 			 * with then there is no reason to check the rest
949 			 * of the queue.
950 			 */
951 			break;
952 		}
953 	}
954 
955 	mutex_exit(&tx_ring->usedlist_lock);
956 
957 	if (desc_count == 0) {
958 		Adapter->tx_recycle_fail++;
959 		Adapter->tx_recycle_none++;
960 		return (0);
961 	}
962 
963 	Adapter->tx_recycle_fail = 0;
964 	Adapter->StallWatchdog = 0;
965 
966 	mp = NULL;
967 	nmp = NULL;
968 	packet = (PTX_SW_PACKET) QUEUE_GET_HEAD(&pending_list);
969 	ASSERT(packet != NULL);
970 	while (packet != NULL) {
971 		if (packet->mp != NULL) {
972 			ASSERT(packet->mp->b_next == NULL);
973 			/* Assemble the message chain */
974 			if (mp == NULL) {
975 				mp = packet->mp;
976 				nmp = packet->mp;
977 			} else {
978 				nmp->b_next = packet->mp;
979 				nmp = packet->mp;
980 			}
981 			/* Disconnect the message from the sw packet */
982 			packet->mp = NULL;
983 		}
984 
985 		/* Free the TxSwPackets */
986 		FreeTxSwPacket(packet);
987 
988 		packet = (PTX_SW_PACKET)
989 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
990 	}
991 
992 	/* Save the message chain */
993 	if (mp != NULL) {
994 		msg_chain = Adapter->tx_msg_chain;
995 		mutex_enter(&msg_chain->lock);
996 		if (msg_chain->head == NULL) {
997 			msg_chain->head = mp;
998 			msg_chain->tail = nmp;
999 		} else {
1000 			msg_chain->tail->b_next = mp;
1001 			msg_chain->tail = nmp;
1002 		}
1003 		mutex_exit(&msg_chain->lock);
1004 
1005 		/*
1006 		 * If the tx interrupt is enabled, the messages will be freed
1007 		 * in the tx interrupt; Otherwise, they are freed here by
1008 		 * triggering a soft interrupt.
1009 		 */
1010 		if (!Adapter->tx_intr_enable)
1011 			ddi_intr_trigger_softint(Adapter->tx_softint_handle,
1012 			    NULL);
1013 	}
1014 
1015 	/* Return the TxSwPackets back to the FreeList */
1016 	mutex_enter(&tx_ring->freelist_lock);
1017 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
1018 	mutex_exit(&tx_ring->freelist_lock);
1019 
1020 	return (desc_count);
1021 }
1022 
1023 /*
1024  * 82544 Coexistence issue workaround:
1025  *    There are 2 issues.
1026  *    1. If a 32 bit split completion happens from P64H2 and another
1027  *	agent drives a 64 bit request/split completion after ONLY
1028  *	1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then
1029  *	82544 has a problem where in to clock all the data in, it
1030  *	looks at REQ64# signal and since it has changed so fast (i.e. 1
1031  *	idle clock turn around), it will fail to clock all the data in.
1032  *	Data coming from certain ending addresses has exposure to this issue.
1033  *
1034  * To detect this issue, following equation can be used...
1035  *	SIZE[3:0] + ADDR[2:0] = SUM[3:0].
1036  *	If SUM[3:0] is in between 1 to 4, we will have this issue.
1037  *
1038  * ROOT CAUSE:
1039  *	The erratum involves the 82544 PCIX elasticity FIFO implementations as
1040  *	64-bit FIFO's and flushing of the final partial-bytes corresponding
1041  *	to the end of a requested read burst. Under a specific burst condition
1042  *	of ending-data alignment and 32-byte split-completions, the final
1043  *	byte(s) of split-completion data require an extra clock cycle to flush
1044  *	into 64-bit FIFO orientation.  An incorrect logic dependency on the
1045  *	REQ64# signal occurring during during this clock cycle may cause the
1046  *	residual byte(s) to be lost, thereby rendering the internal DMA client
1047  *	forever awaiting the final byte(s) for an outbound data-fetch.  The
1048  *	erratum is confirmed to *only* occur if certain subsequent external
1049  *	64-bit PCIX bus transactions occur immediately (minimum possible bus
1050  *	turn- around) following the odd-aligned 32-bit split-completion
1051  *	containing the final byte(s).  Intel has confirmed that this has been
1052  *	seen only with chipset/bridges which have the capability to provide
1053  *	32-bit split-completion data, and in the presence of newer PCIX bus
1054  *	agents which fully-optimize the inter-transaction turn-around (zero
1055  *	additional initiator latency when pre-granted bus ownership).
1056  *
1057  *   	This issue does not exist in PCI bus mode, when any agent is operating
1058  *	in 32 bit only mode or on chipsets that do not do 32 bit split
1059  *	completions for 64 bit read requests (Serverworks chipsets). P64H2 does
1060  *	32 bit split completions for any read request that has bit 2 set to 1
1061  *	for the requested address and read request size is more than 8 bytes.
1062  *
1063  *   2. Another issue is related to 82544 driving DACs under the similar
1064  *	scenario (32 bit split completion followed by 64 bit transaction with
1065  *	only 1 cycle turnaround). This issue is still being root caused. We
1066  *	think that both of these issues can be avoided if following workaround
1067  *	is implemented. It seems DAC issues is related to ending addresses being
1068  *	0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity
1069  *	FIFO which does not get flushed due to REQ64# dependency. We will only
1070  *	know the full story after it has been simulated successfully by HW team.
1071  *
1072  * WORKAROUND:
1073  *	Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC)
1074  */
1075 static uint32_t
1076 e1000g_fill_82544_desc(uint64_t Address,
1077     size_t Length, PDESC_ARRAY desc_array)
1078 {
1079 	/*
1080 	 * Since issue is sensitive to length and address.
1081 	 * Let us first check the address...
1082 	 */
1083 	uint32_t safe_terminator;
1084 
1085 	if (Length <= 4) {
1086 		desc_array->Descriptor[0].Address = Address;
1087 		desc_array->Descriptor[0].Length = Length;
1088 		desc_array->Elements = 1;
1089 		return (desc_array->Elements);
1090 	}
1091 	safe_terminator =
1092 	    (uint32_t)((((uint32_t)Address & 0x7) +
1093 		(Length & 0xF)) & 0xF);
1094 	/*
1095 	 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then
1096 	 * return
1097 	 */
1098 	if (safe_terminator == 0 ||
1099 	    (safe_terminator > 4 &&
1100 		safe_terminator < 9) ||
1101 	    (safe_terminator > 0xC && safe_terminator <= 0xF)) {
1102 		desc_array->Descriptor[0].Address = Address;
1103 		desc_array->Descriptor[0].Length = Length;
1104 		desc_array->Elements = 1;
1105 		return (desc_array->Elements);
1106 	}
1107 
1108 	desc_array->Descriptor[0].Address = Address;
1109 	desc_array->Descriptor[0].Length = Length - 4;
1110 	desc_array->Descriptor[1].Address = Address + (Length - 4);
1111 	desc_array->Descriptor[1].Length = 4;
1112 	desc_array->Elements = 2;
1113 	return (desc_array->Elements);
1114 }
1115 
1116 static int
1117 e1000g_tx_copy(struct e1000g *Adapter, PTX_SW_PACKET packet,
1118     mblk_t *mp, uint32_t force_bcopy)
1119 {
1120 	size_t len;
1121 	size_t len1;
1122 	dma_buffer_t *tx_buf;
1123 	mblk_t *nmp;
1124 	boolean_t finished;
1125 	int desc_count;
1126 
1127 	desc_count = 0;
1128 	tx_buf = packet->tx_buf;
1129 	len = MBLKL(mp);
1130 
1131 	ASSERT((tx_buf->len + len) <= tx_buf->size);
1132 
1133 	if (len > 0) {
1134 		bcopy(mp->b_rptr,
1135 		    tx_buf->address + tx_buf->len,
1136 		    len);
1137 		tx_buf->len += len;
1138 
1139 		packet->num_mblk_frag++;
1140 	}
1141 
1142 	nmp = mp->b_cont;
1143 	if (nmp == NULL) {
1144 		finished = B_TRUE;
1145 	} else {
1146 		len1 = MBLKL(nmp);
1147 		if ((tx_buf->len + len1) > tx_buf->size)
1148 			finished = B_TRUE;
1149 		else if (force_bcopy)
1150 			finished = B_FALSE;
1151 		else if (len1 > Adapter->tx_bcopy_thresh)
1152 			finished = B_TRUE;
1153 		else
1154 			finished = B_FALSE;
1155 	}
1156 
1157 	if (finished) {
1158 		if (tx_buf->len > len)
1159 			Adapter->tx_multi_copy++;
1160 
1161 		/*
1162 		 * If the packet is smaller than 64 bytes, which is the
1163 		 * minimum ethernet packet size, pad the packet to make
1164 		 * it at least 60 bytes. The hardware will add 4 bytes
1165 		 * for CRC.
1166 		 */
1167 		if (force_bcopy & FORCE_BCOPY_UNDER_SIZE) {
1168 			ASSERT(tx_buf->len < MINIMUM_ETHERNET_PACKET_SIZE);
1169 
1170 			bzero(tx_buf->address + tx_buf->len,
1171 			    MINIMUM_ETHERNET_PACKET_SIZE - tx_buf->len);
1172 			tx_buf->len = MINIMUM_ETHERNET_PACKET_SIZE;
1173 		}
1174 
1175 		switch (packet->dma_type) {
1176 #ifdef __sparc
1177 		case USE_DVMA:
1178 			dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV);
1179 			break;
1180 #endif
1181 		case USE_DMA:
1182 			(void) ddi_dma_sync(tx_buf->dma_handle, 0,
1183 			    tx_buf->len, DDI_DMA_SYNC_FORDEV);
1184 			break;
1185 		default:
1186 			ASSERT(B_FALSE);
1187 			break;
1188 		}
1189 
1190 		packet->data_transfer_type = USE_BCOPY;
1191 
1192 		desc_count = e1000g_fill_tx_desc(Adapter,
1193 		    packet,
1194 		    tx_buf->dma_address,
1195 		    tx_buf->len);
1196 
1197 		if (desc_count <= 0)
1198 			return (-1);
1199 	}
1200 
1201 	return (desc_count);
1202 }
1203 
1204 static int
1205 e1000g_tx_bind(struct e1000g *Adapter, PTX_SW_PACKET packet, mblk_t *mp)
1206 {
1207 	int j;
1208 	int mystat;
1209 	size_t len;
1210 	ddi_dma_cookie_t dma_cookie;
1211 	uint_t ncookies;
1212 	int desc_count;
1213 	uint32_t desc_total;
1214 
1215 	desc_total = 0;
1216 	len = MBLKL(mp);
1217 
1218 	/*
1219 	 * ddi_dma_addr_bind_handle() allocates  DMA  resources  for  a
1220 	 * memory  object such that a device can perform DMA to or from
1221 	 * the object.  DMA resources  are  allocated  considering  the
1222 	 * device's  DMA  attributes  as  expressed by ddi_dma_attr(9S)
1223 	 * (see ddi_dma_alloc_handle(9F)).
1224 	 *
1225 	 * ddi_dma_addr_bind_handle() fills in  the  first  DMA  cookie
1226 	 * pointed  to by cookiep with the appropriate address, length,
1227 	 * and bus type. *ccountp is set to the number of DMA  cookies
1228 	 * representing this DMA object. Subsequent DMA cookies must be
1229 	 * retrieved by calling ddi_dma_nextcookie(9F)  the  number  of
1230 	 * times specified by *countp - 1.
1231 	 */
1232 	switch (packet->dma_type) {
1233 #ifdef __sparc
1234 	case USE_DVMA:
1235 		dvma_kaddr_load(packet->tx_dma_handle,
1236 		    (caddr_t)mp->b_rptr, len, 0, &dma_cookie);
1237 
1238 		dvma_sync(packet->tx_dma_handle, 0,
1239 		    DDI_DMA_SYNC_FORDEV);
1240 
1241 		ncookies = 1;
1242 		packet->data_transfer_type = USE_DVMA;
1243 		break;
1244 #endif
1245 	case USE_DMA:
1246 		if ((mystat = ddi_dma_addr_bind_handle(
1247 			packet->tx_dma_handle, NULL,
1248 			(caddr_t)mp->b_rptr, len,
1249 			DDI_DMA_WRITE | DDI_DMA_STREAMING,
1250 			DDI_DMA_DONTWAIT, 0, &dma_cookie,
1251 			&ncookies)) != DDI_DMA_MAPPED) {
1252 
1253 			e1000g_log(Adapter, CE_WARN,
1254 			    "Couldn't bind mblk buffer to Tx DMA handle: "
1255 			    "return: %X, Pkt: %X\n",
1256 			    mystat, packet);
1257 			return (-1);
1258 		}
1259 
1260 		/*
1261 		 * An implicit ddi_dma_sync() is done when the
1262 		 * ddi_dma_addr_bind_handle() is called. So we
1263 		 * don't need to explicitly call ddi_dma_sync()
1264 		 * here any more.
1265 		 */
1266 		ASSERT(ncookies);
1267 		if (ncookies > 1)
1268 			Adapter->tx_multi_cookie++;
1269 
1270 		/*
1271 		 * The data_transfer_type value must be set after the handle
1272 		 * has been bound, for it will be used in FreeTxSwPacket()
1273 		 * to decide whether we need to unbind the handle.
1274 		 */
1275 		packet->data_transfer_type = USE_DMA;
1276 		break;
1277 	default:
1278 		ASSERT(B_FALSE);
1279 		break;
1280 	}
1281 
1282 	packet->num_mblk_frag++;
1283 
1284 	/*
1285 	 * Each address could span thru multpile cookie..
1286 	 * Each cookie will have one descriptor
1287 	 */
1288 	for (j = ncookies; j != 0; j--) {
1289 
1290 		desc_count = e1000g_fill_tx_desc(Adapter,
1291 		    packet,
1292 		    dma_cookie.dmac_laddress,
1293 		    dma_cookie.dmac_size);
1294 
1295 		if (desc_count <= 0)
1296 			return (-1);
1297 
1298 		desc_total += desc_count;
1299 
1300 		/*
1301 		 * ddi_dma_nextcookie() retrieves subsequent DMA
1302 		 * cookies for a DMA object.
1303 		 * ddi_dma_nextcookie() fills in the
1304 		 * ddi_dma_cookie(9S) structure pointed to by
1305 		 * cookiep.  The ddi_dma_cookie(9S) structure
1306 		 * must be allocated prior to calling
1307 		 * ddi_dma_nextcookie(). The DMA cookie count
1308 		 * returned by ddi_dma_buf_bind_handle(9F),
1309 		 * ddi_dma_addr_bind_handle(9F), or
1310 		 * ddi_dma_getwin(9F) indicates the number of DMA
1311 		 * cookies a DMA object consists of.  If the
1312 		 * resulting cookie count, N, is larger than 1,
1313 		 * ddi_dma_nextcookie() must be called N-1 times
1314 		 * to retrieve all DMA cookies.
1315 		 */
1316 		if (j > 1) {
1317 			ddi_dma_nextcookie(packet->tx_dma_handle,
1318 			    &dma_cookie);
1319 		}
1320 	}
1321 
1322 	return (desc_total);
1323 }
1324 
1325 static void
1326 e1000g_fill_context_descriptor(e1000g_tx_ring_t *tx_ring,
1327     struct e1000_context_desc *cksum_desc)
1328 {
1329 	if (tx_ring->cksum_flags & HCK_IPV4_HDRCKSUM) {
1330 		cksum_desc->lower_setup.ip_fields.ipcss =
1331 		    tx_ring->ether_header_size;
1332 		cksum_desc->lower_setup.ip_fields.ipcso =
1333 		    tx_ring->ether_header_size +
1334 		    offsetof(struct ip, ip_sum);
1335 		cksum_desc->lower_setup.ip_fields.ipcse =
1336 		    tx_ring->ether_header_size +
1337 		    sizeof (struct ip) - 1;
1338 	} else
1339 		cksum_desc->lower_setup.ip_config = 0;
1340 
1341 	if (tx_ring->cksum_flags & HCK_PARTIALCKSUM) {
1342 		/*
1343 		 * The packet with same protocol has the following
1344 		 * stuff and start offset:
1345 		 * |  Protocol  | Stuff  | Start  | Checksum
1346 		 * |		| Offset | Offset | Enable
1347 		 * | IPv4 + TCP |  0x24  |  0x14  |  Yes
1348 		 * | IPv4 + UDP |  0x1A  |  0x14  |  Yes
1349 		 * | IPv6 + TCP |  0x20  |  0x10  |  No
1350 		 * | IPv6 + UDP |  0x14  |  0x10  |  No
1351 		 */
1352 		cksum_desc->upper_setup.tcp_fields.tucss =
1353 		    tx_ring->cksum_start + tx_ring->ether_header_size;
1354 		cksum_desc->upper_setup.tcp_fields.tucso =
1355 		    tx_ring->cksum_stuff + tx_ring->ether_header_size;
1356 		cksum_desc->upper_setup.tcp_fields.tucse = 0;
1357 	} else
1358 		cksum_desc->upper_setup.tcp_config = 0;
1359 
1360 	cksum_desc->cmd_and_length = E1000_TXD_CMD_DEXT;
1361 
1362 	/*
1363 	 * Zero out the options for TCP Segmentation Offload,
1364 	 * since we don't support it in this version
1365 	 */
1366 	cksum_desc->tcp_seg_setup.data = 0;
1367 }
1368 
1369 static int
1370 e1000g_fill_tx_desc(struct e1000g *Adapter,
1371     PTX_SW_PACKET packet, uint64_t address, size_t size)
1372 {
1373 	PADDRESS_LENGTH_PAIR desc;
1374 	int desc_count;
1375 
1376 	desc_count = 0;
1377 
1378 	if ((Adapter->Shared.bus_type == e1000_bus_type_pcix) &&
1379 	    (Adapter->Shared.mac_type == e1000_82544)) {
1380 
1381 		desc_count = e1000g_tx_workaround_PCIX_82544(Adapter,
1382 		    packet, address, size);
1383 
1384 	} else if ((Adapter->Shared.mac_type == e1000_82544) &&
1385 	    (size > JUMBO_FRAG_LENGTH)) {
1386 
1387 		desc_count = e1000g_tx_workaround_jumbo_82544(Adapter,
1388 		    packet, address, size);
1389 
1390 	} else {
1391 		ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1392 
1393 		desc = &packet->desc[packet->num_desc];
1394 
1395 		desc->Address = address;
1396 		desc->Length = size;
1397 
1398 		packet->num_desc++;
1399 		desc_count++;
1400 	}
1401 
1402 	return (desc_count);
1403 }
1404 
1405 static int
1406 e1000g_tx_workaround_PCIX_82544(struct e1000g *Adapter,
1407     PTX_SW_PACKET packet, uint64_t address, size_t size)
1408 {
1409 	PADDRESS_LENGTH_PAIR desc;
1410 	int desc_count;
1411 	long size_left;
1412 	size_t len;
1413 	uint32_t counter;
1414 	uint32_t array_elements;
1415 	DESC_ARRAY desc_array;
1416 
1417 	/*
1418 	 * Coexist Workaround for cordova: RP: 07/04/03
1419 	 *
1420 	 * RP: ERRATA: Workaround ISSUE:
1421 	 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup
1422 	 * Eachbuffer in to 8kb pieces until the
1423 	 * remainder is < 8kb
1424 	 */
1425 	size_left = size;
1426 	desc_count = 0;
1427 
1428 	while (size_left > 0) {
1429 		if (size_left > MAX_TX_BUF_SIZE)
1430 			len = MAX_TX_BUF_SIZE;
1431 		else
1432 			len = size_left;
1433 
1434 		array_elements = e1000g_fill_82544_desc(address,
1435 		    len, &desc_array);
1436 
1437 		for (counter = 0; counter < array_elements; counter++) {
1438 			ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1439 			if (packet->num_desc >= MAX_TX_DESC_PER_PACKET) {
1440 				e1000g_log(Adapter, CE_WARN,
1441 				    "No enough preparing tx descriptors");
1442 				return (-1);
1443 			}
1444 			/*
1445 			 * Put in the buffer address
1446 			 */
1447 			desc = &packet->desc[packet->num_desc];
1448 
1449 			desc->Address =
1450 			    desc_array.Descriptor[counter].Address;
1451 			desc->Length =
1452 			    desc_array.Descriptor[counter].Length;
1453 
1454 			packet->num_desc++;
1455 			desc_count++;
1456 		} /* for */
1457 
1458 		/*
1459 		 * Update the buffer address and length
1460 		 */
1461 		address += MAX_TX_BUF_SIZE;
1462 		size_left -= MAX_TX_BUF_SIZE;
1463 	} /* while */
1464 
1465 	return (desc_count);
1466 }
1467 
1468 static int
1469 e1000g_tx_workaround_jumbo_82544(struct e1000g *Adapter,
1470     PTX_SW_PACKET packet, uint64_t address, size_t size)
1471 {
1472 	PADDRESS_LENGTH_PAIR desc;
1473 	int desc_count;
1474 	long size_left;
1475 	uint32_t offset;
1476 
1477 	/*
1478 	 * Workaround for Jumbo Frames on Cordova
1479 	 * PSD 06/01/2001
1480 	 */
1481 	size_left = size;
1482 	desc_count = 0;
1483 	offset = 0;
1484 	while (size_left > 0) {
1485 		ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1486 		if (packet->num_desc >= MAX_TX_DESC_PER_PACKET) {
1487 			e1000g_log(Adapter, CE_WARN,
1488 			    "No enough preparing tx descriptors");
1489 			return (-1);
1490 		}
1491 
1492 		desc = &packet->desc[packet->num_desc];
1493 
1494 		desc->Address = address + offset;
1495 
1496 		if (size_left > JUMBO_FRAG_LENGTH)
1497 			desc->Length = JUMBO_FRAG_LENGTH;
1498 		else
1499 			desc->Length = size_left;
1500 
1501 		packet->num_desc++;
1502 		desc_count++;
1503 
1504 		offset += desc->Length;
1505 		size_left -= JUMBO_FRAG_LENGTH;
1506 	}
1507 
1508 	return (desc_count);
1509 }
1510 
1511 static void
1512 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring)
1513 {
1514 	uint16_t hw_tdt;
1515 	uint16_t sw_tdt;
1516 	struct e1000_tx_desc *tx_desc;
1517 	uint16_t length = 0;
1518 	boolean_t eop = B_FALSE;
1519 	struct e1000g *Adapter;
1520 
1521 	Adapter = tx_ring->adapter;
1522 
1523 	hw_tdt = E1000_READ_REG(&Adapter->Shared, TDT);
1524 	sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first;
1525 
1526 	while (hw_tdt != sw_tdt) {
1527 		tx_desc = &(tx_ring->tbd_first[hw_tdt]);
1528 		length += tx_desc->lower.flags.length;
1529 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1530 		if (++hw_tdt == Adapter->NumTxDescriptors)
1531 			hw_tdt = 0;
1532 
1533 		if (eop) {
1534 			if ((Adapter->link_duplex == HALF_DUPLEX) &&
1535 			    e1000_82547_fifo_workaround(&Adapter->Shared,
1536 				length) != E1000_SUCCESS) {
1537 				if (tx_ring->timer_enable_82547) {
1538 					ASSERT(tx_ring->timer_id_82547 == 0);
1539 					tx_ring->timer_id_82547 =
1540 					    timeout(e1000g_82547_timeout,
1541 						(void *)Adapter,
1542 						drv_usectohz(10000));
1543 				}
1544 				return;
1545 
1546 			} else {
1547 				E1000_WRITE_REG(&Adapter->Shared, TDT,
1548 				    hw_tdt);
1549 				e1000_update_tx_fifo_head(&Adapter->Shared,
1550 				    length);
1551 				length = 0;
1552 			}
1553 		}
1554 	}
1555 }
1556 
1557 static void
1558 e1000g_82547_timeout(void *arg)
1559 {
1560 	e1000g_tx_ring_t *tx_ring;
1561 
1562 	tx_ring = (e1000g_tx_ring_t *)arg;
1563 
1564 	mutex_enter(&tx_ring->tx_lock);
1565 
1566 	tx_ring->timer_id_82547 = 0;
1567 	e1000g_82547_tx_move_tail_work(tx_ring);
1568 
1569 	mutex_exit(&tx_ring->tx_lock);
1570 }
1571 
1572 static void
1573 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring)
1574 {
1575 	timeout_id_t tid;
1576 
1577 	ASSERT(MUTEX_HELD(&tx_ring->tx_lock));
1578 
1579 	tid = tx_ring->timer_id_82547;
1580 	tx_ring->timer_id_82547 = 0;
1581 	if (tid != 0) {
1582 		tx_ring->timer_enable_82547 = B_FALSE;
1583 		mutex_exit(&tx_ring->tx_lock);
1584 
1585 		(void) untimeout(tid);
1586 
1587 		mutex_enter(&tx_ring->tx_lock);
1588 	}
1589 	tx_ring->timer_enable_82547 = B_TRUE;
1590 	e1000g_82547_tx_move_tail_work(tx_ring);
1591 }
1592