xref: /titanic_44/usr/src/uts/common/io/e1000g/e1000g_tx.c (revision ab4a9beb2e4d596be0b3288c7d92919e27781b57)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2007 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms of the CDDLv1.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * **********************************************************************
30  *									*
31  * Module Name:								*
32  *   e1000g_tx.c							*
33  *									*
34  * Abstract:								*
35  *   This file contains some routines that take care of Transmit,	*
36  *   make the hardware to send the data pointed by the packet out	*
37  *   on to the physical medium.						*
38  *									*
39  * **********************************************************************
40  */
41 
42 #include "e1000g_sw.h"
43 #include "e1000g_debug.h"
44 
45 static boolean_t e1000g_send(struct e1000g *, mblk_t *);
46 static int e1000g_tx_copy(e1000g_tx_ring_t *,
47     p_tx_sw_packet_t, mblk_t *, uint32_t);
48 static int e1000g_tx_bind(e1000g_tx_ring_t *,
49     p_tx_sw_packet_t, mblk_t *);
50 static boolean_t check_cksum_context(e1000g_tx_ring_t *, cksum_data_t *);
51 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *,
52     cksum_data_t *);
53 static void e1000g_fill_context_descriptor(cksum_data_t *,
54     struct e1000_context_desc *);
55 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *,
56     p_tx_sw_packet_t, uint64_t, size_t);
57 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length,
58     p_desc_array_t desc_array);
59 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t);
60 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t);
61 static void e1000g_82547_timeout(void *);
62 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *);
63 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *);
64 
65 #ifndef E1000G_DEBUG
66 #pragma inline(e1000g_tx_copy)
67 #pragma inline(e1000g_tx_bind)
68 #pragma inline(check_cksum_context)
69 #pragma inline(e1000g_fill_tx_ring)
70 #pragma inline(e1000g_fill_context_descriptor)
71 #pragma inline(e1000g_fill_tx_desc)
72 #pragma inline(e1000g_fill_82544_desc)
73 #pragma inline(e1000g_tx_workaround_PCIX_82544)
74 #pragma inline(e1000g_tx_workaround_jumbo_82544)
75 #pragma inline(e1000g_free_tx_swpkt)
76 #endif
77 
78 /*
79  * e1000g_free_tx_swpkt	- free up the tx sw packet
80  *
81  * Unbind the previously bound DMA handle for a given
82  * transmit sw packet. And reset the sw packet data.
83  */
84 void
85 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet)
86 {
87 	switch (packet->data_transfer_type) {
88 	case USE_BCOPY:
89 		packet->tx_buf->len = 0;
90 		break;
91 #ifdef __sparc
92 	case USE_DVMA:
93 		dvma_unload(packet->tx_dma_handle, 0, -1);
94 		break;
95 #endif
96 	case USE_DMA:
97 		ddi_dma_unbind_handle(packet->tx_dma_handle);
98 		break;
99 	default:
100 		break;
101 	}
102 
103 	/*
104 	 * The mblk has been stripped off the sw packet
105 	 * and will be freed in a triggered soft intr.
106 	 */
107 	ASSERT(packet->mp == NULL);
108 
109 	packet->data_transfer_type = USE_NONE;
110 	packet->num_mblk_frag = 0;
111 	packet->num_desc = 0;
112 }
113 
114 #pragma inline(e1000g_tx_freemsg)
115 
116 void
117 e1000g_tx_freemsg(e1000g_tx_ring_t *tx_ring)
118 {
119 	mblk_t *mp;
120 
121 	if (mutex_tryenter(&tx_ring->mblks_lock) == 0)
122 		return;
123 
124 	mp = tx_ring->mblks.head;
125 
126 	tx_ring->mblks.head = NULL;
127 	tx_ring->mblks.tail = NULL;
128 
129 	mutex_exit(&tx_ring->mblks_lock);
130 
131 	if (mp != NULL)
132 		freemsgchain(mp);
133 }
134 
135 uint_t
136 e1000g_tx_softint_worker(caddr_t arg1, caddr_t arg2)
137 {
138 	struct e1000g *Adapter;
139 	mblk_t *mp;
140 
141 	Adapter = (struct e1000g *)arg1;
142 
143 	if (Adapter == NULL)
144 		return (DDI_INTR_UNCLAIMED);
145 
146 	e1000g_tx_freemsg(Adapter->tx_ring);
147 
148 	return (DDI_INTR_CLAIMED);
149 }
150 
151 mblk_t *
152 e1000g_m_tx(void *arg, mblk_t *mp)
153 {
154 	struct e1000g *Adapter = (struct e1000g *)arg;
155 	mblk_t *next;
156 
157 	rw_enter(&Adapter->chip_lock, RW_READER);
158 
159 	if ((Adapter->chip_state != E1000G_START) ||
160 	    (Adapter->link_state != LINK_STATE_UP)) {
161 		freemsgchain(mp);
162 		mp = NULL;
163 	}
164 
165 	while (mp != NULL) {
166 		next = mp->b_next;
167 		mp->b_next = NULL;
168 
169 		if (!e1000g_send(Adapter, mp)) {
170 			mp->b_next = next;
171 			break;
172 		}
173 
174 		mp = next;
175 	}
176 
177 	rw_exit(&Adapter->chip_lock);
178 	return (mp);
179 }
180 
181 /*
182  * e1000g_send -  send packets onto the wire
183  *
184  * Called from e1000g_m_tx with an mblk ready to send. this
185  * routine sets up the transmit descriptors and sends data to
186  * the wire. It also pushes the just transmitted packet to
187  * the used tx sw packet list.
188  */
189 static boolean_t
190 e1000g_send(struct e1000g *Adapter, mblk_t *mp)
191 {
192 	struct e1000_hw *hw;
193 	p_tx_sw_packet_t packet;
194 	LIST_DESCRIBER pending_list;
195 	size_t len;
196 	size_t msg_size;
197 	uint32_t frag_count;
198 	int desc_count;
199 	uint32_t desc_total;
200 	uint32_t force_bcopy;
201 	mblk_t *nmp;
202 	mblk_t *tmp;
203 	e1000g_tx_ring_t *tx_ring;
204 	cksum_data_t cksum;
205 
206 	hw = &Adapter->shared;
207 	tx_ring = Adapter->tx_ring;
208 
209 	/* Get the total size and frags number of the message */
210 	force_bcopy = 0;
211 	frag_count = 0;
212 	msg_size = 0;
213 	for (nmp = mp; nmp; nmp = nmp->b_cont) {
214 		frag_count++;
215 		msg_size += MBLKL(nmp);
216 	}
217 
218 	/* Empty packet */
219 	if (msg_size == 0) {
220 		freemsg(mp);
221 		return (B_TRUE);
222 	}
223 
224 	/* Make sure packet is less than the max frame size */
225 	if (msg_size > hw->mac.max_frame_size + VLAN_TAGSZ) {
226 		/*
227 		 * For the over size packet, we'll just drop it.
228 		 * So we return B_TRUE here.
229 		 */
230 		E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL,
231 		    "Tx packet out of bound. length = %d \n", msg_size);
232 		E1000G_STAT(tx_ring->stat_over_size);
233 		freemsg(mp);
234 		return (B_TRUE);
235 	}
236 
237 	/*
238 	 * Check and reclaim tx descriptors.
239 	 * This low water mark check should be done all the time as
240 	 * Transmit interrupt delay can produce Transmit interrupts little
241 	 * late and that may cause few problems related to reaping Tx
242 	 * Descriptors... As you may run short of them before getting any
243 	 * transmit interrupt...
244 	 */
245 	if ((Adapter->tx_desc_num - tx_ring->tbd_avail) >
246 	    tx_ring->recycle_low_water) {
247 		E1000G_DEBUG_STAT(tx_ring->stat_recycle);
248 		(void) e1000g_recycle(tx_ring);
249 	}
250 
251 	if (tx_ring->tbd_avail < MAX_TX_DESC_PER_PACKET) {
252 		E1000G_DEBUG_STAT(tx_ring->stat_lack_desc);
253 		goto tx_no_resource;
254 	}
255 
256 	/*
257 	 * If there are many frags of the message, then bcopy them
258 	 * into one tx descriptor buffer will get better performance.
259 	 */
260 	if ((frag_count >= tx_ring->frags_limit) &&
261 	    (msg_size <= Adapter->tx_buffer_size)) {
262 		E1000G_DEBUG_STAT(tx_ring->stat_exceed_frags);
263 		force_bcopy |= FORCE_BCOPY_EXCEED_FRAGS;
264 	}
265 
266 	/*
267 	 * If the message size is less than the minimum ethernet packet size,
268 	 * we'll use bcopy to send it, and padd it to 60 bytes later.
269 	 */
270 	if (msg_size < MINIMUM_ETHERNET_PACKET_SIZE) {
271 		E1000G_DEBUG_STAT(tx_ring->stat_under_size);
272 		force_bcopy |= FORCE_BCOPY_UNDER_SIZE;
273 	}
274 
275 	/* Initialize variables */
276 	desc_count = 1;	/* The initial value should be greater than 0 */
277 	desc_total = 0;
278 	QUEUE_INIT_LIST(&pending_list);
279 
280 	/* Retrieve checksum info */
281 	hcksum_retrieve(mp, NULL, NULL, &cksum.cksum_start, &cksum.cksum_stuff,
282 	    NULL, NULL, &cksum.cksum_flags);
283 
284 	if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid ==
285 	    htons(ETHERTYPE_VLAN))
286 		cksum.ether_header_size = sizeof (struct ether_vlan_header);
287 	else
288 		cksum.ether_header_size = sizeof (struct ether_header);
289 
290 	/* Process each mblk fragment and fill tx descriptors */
291 	packet = NULL;
292 	nmp = mp;
293 	while (nmp) {
294 		tmp = nmp->b_cont;
295 
296 		len = MBLKL(nmp);
297 		/* Check zero length mblks */
298 		if (len == 0) {
299 			E1000G_DEBUG_STAT(tx_ring->stat_empty_frags);
300 			/*
301 			 * If there're no packet buffers have been used,
302 			 * or we just completed processing a buffer, then
303 			 * skip the empty mblk fragment.
304 			 * Otherwise, there's still a pending buffer that
305 			 * needs to be processed (tx_copy).
306 			 */
307 			if (desc_count > 0) {
308 				nmp = tmp;
309 				continue;
310 			}
311 		}
312 
313 		/*
314 		 * Get a new TxSwPacket to process mblk buffers.
315 		 */
316 		if (desc_count > 0) {
317 
318 			mutex_enter(&tx_ring->freelist_lock);
319 			packet = (p_tx_sw_packet_t)
320 			    QUEUE_POP_HEAD(&tx_ring->free_list);
321 			mutex_exit(&tx_ring->freelist_lock);
322 
323 			if (packet == NULL) {
324 				E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
325 				    "No Tx SwPacket available\n");
326 				E1000G_STAT(tx_ring->stat_no_swpkt);
327 				goto tx_send_failed;
328 			}
329 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
330 		}
331 
332 		ASSERT(packet);
333 		/*
334 		 * If the size of the fragment is less than the tx_bcopy_thresh
335 		 * we'll use bcopy; Otherwise, we'll use DMA binding.
336 		 */
337 		if ((len <= Adapter->tx_bcopy_thresh) || force_bcopy) {
338 			desc_count =
339 			    e1000g_tx_copy(tx_ring, packet, nmp, force_bcopy);
340 			E1000G_DEBUG_STAT(tx_ring->stat_copy);
341 		} else {
342 			desc_count =
343 			    e1000g_tx_bind(tx_ring, packet, nmp);
344 			E1000G_DEBUG_STAT(tx_ring->stat_bind);
345 		}
346 
347 		if (desc_count > 0)
348 			desc_total += desc_count;
349 		else if (desc_count < 0)
350 			goto tx_send_failed;
351 
352 		nmp = tmp;
353 	}
354 
355 	/* Assign the message to the last sw packet */
356 	ASSERT(packet);
357 	ASSERT(packet->mp == NULL);
358 	packet->mp = mp;
359 
360 	/* Try to recycle the tx descriptors again */
361 	if (tx_ring->tbd_avail < (desc_total + 2)) {
362 		E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry);
363 		(void) e1000g_recycle(tx_ring);
364 	}
365 
366 	mutex_enter(&tx_ring->tx_lock);
367 
368 	/*
369 	 * If the number of available tx descriptors is not enough for transmit
370 	 * (one redundant descriptor and one hw checksum context descriptor are
371 	 * included), then return failure.
372 	 */
373 	if (tx_ring->tbd_avail < (desc_total + 2)) {
374 		E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
375 		    "No Enough Tx descriptors\n");
376 		E1000G_STAT(tx_ring->stat_no_desc);
377 		mutex_exit(&tx_ring->tx_lock);
378 		goto tx_send_failed;
379 	}
380 
381 	desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cksum);
382 
383 	mutex_exit(&tx_ring->tx_lock);
384 
385 	ASSERT(desc_count > 0);
386 
387 	/* Send successful */
388 	return (B_TRUE);
389 
390 tx_send_failed:
391 	/* Free pending TxSwPackets */
392 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list);
393 	while (packet) {
394 		packet->mp = NULL;
395 		e1000g_free_tx_swpkt(packet);
396 		packet = (p_tx_sw_packet_t)
397 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
398 	}
399 
400 	/* Return pending TxSwPackets to the "Free" list */
401 	mutex_enter(&tx_ring->freelist_lock);
402 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
403 	mutex_exit(&tx_ring->freelist_lock);
404 
405 	E1000G_STAT(tx_ring->stat_send_fail);
406 
407 	freemsg(mp);
408 
409 	/* Send failed, message dropped */
410 	return (B_TRUE);
411 
412 tx_no_resource:
413 	/*
414 	 * Enable Transmit interrupts, so that the interrupt routine can
415 	 * call mac_tx_update() when transmit descriptors become available.
416 	 */
417 	tx_ring->resched_needed = B_TRUE;
418 	if (!Adapter->tx_intr_enable)
419 		e1000g_mask_tx_interrupt(Adapter);
420 
421 	/* Message will be scheduled for re-transmit */
422 	return (B_FALSE);
423 }
424 
425 static boolean_t
426 check_cksum_context(e1000g_tx_ring_t *tx_ring, cksum_data_t *cksum)
427 {
428 	boolean_t cksum_load;
429 	cksum_data_t *last;
430 
431 	cksum_load = B_FALSE;
432 	last = &tx_ring->cksum_data;
433 
434 	if (cksum->cksum_flags != 0) {
435 		if ((cksum->ether_header_size != last->ether_header_size) ||
436 		    (cksum->cksum_flags != last->cksum_flags) ||
437 		    (cksum->cksum_stuff != last->cksum_stuff) ||
438 		    (cksum->cksum_start != last->cksum_start)) {
439 
440 			cksum_load = B_TRUE;
441 		}
442 	}
443 
444 	return (cksum_load);
445 }
446 
447 static int
448 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list,
449     cksum_data_t *cksum)
450 {
451 	struct e1000g *Adapter;
452 	struct e1000_hw *hw;
453 	p_tx_sw_packet_t first_packet;
454 	p_tx_sw_packet_t packet;
455 	boolean_t cksum_load;
456 	struct e1000_tx_desc *first_data_desc;
457 	struct e1000_tx_desc *next_desc;
458 	struct e1000_tx_desc *descriptor;
459 	int desc_count;
460 	int i;
461 
462 	Adapter = tx_ring->adapter;
463 	hw = &Adapter->shared;
464 
465 	desc_count = 0;
466 	first_packet = NULL;
467 	first_data_desc = NULL;
468 	descriptor = NULL;
469 
470 	next_desc = tx_ring->tbd_next;
471 
472 	/* IP Head/TCP/UDP checksum offload */
473 	cksum_load = check_cksum_context(tx_ring, cksum);
474 
475 	if (cksum_load) {
476 		first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
477 
478 		descriptor = next_desc;
479 
480 		e1000g_fill_context_descriptor(cksum,
481 		    (struct e1000_context_desc *)descriptor);
482 
483 		/* Check the wrap-around case */
484 		if (descriptor == tx_ring->tbd_last)
485 			next_desc = tx_ring->tbd_first;
486 		else
487 			next_desc++;
488 
489 		desc_count++;
490 	}
491 
492 	first_data_desc = next_desc;
493 
494 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
495 	while (packet) {
496 		ASSERT(packet->num_desc);
497 
498 		for (i = 0; i < packet->num_desc; i++) {
499 			ASSERT(tx_ring->tbd_avail > 0);
500 
501 			descriptor = next_desc;
502 			descriptor->buffer_addr =
503 			    packet->desc[i].address;
504 			descriptor->lower.data =
505 			    packet->desc[i].length;
506 
507 			/* Zero out status */
508 			descriptor->upper.data = 0;
509 
510 			descriptor->lower.data |=
511 			    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
512 			/* must set RS on every outgoing descriptor */
513 			descriptor->lower.data |=
514 			    E1000_TXD_CMD_RS;
515 
516 			/* Check the wrap-around case */
517 			if (descriptor == tx_ring->tbd_last)
518 				next_desc = tx_ring->tbd_first;
519 			else
520 				next_desc++;
521 
522 			desc_count++;
523 		}
524 
525 		if (first_packet != NULL) {
526 			/*
527 			 * Count the checksum context descriptor for
528 			 * the first SwPacket.
529 			 */
530 			first_packet->num_desc++;
531 			first_packet = NULL;
532 		}
533 
534 		packet = (p_tx_sw_packet_t)
535 		    QUEUE_GET_NEXT(pending_list, &packet->Link);
536 	}
537 
538 	ASSERT(descriptor);
539 
540 	if (cksum->cksum_flags) {
541 		if (cksum->cksum_flags & HCK_IPV4_HDRCKSUM)
542 			((struct e1000_data_desc *)first_data_desc)->
543 			    upper.fields.popts |= E1000_TXD_POPTS_IXSM;
544 		if (cksum->cksum_flags & HCK_PARTIALCKSUM)
545 			((struct e1000_data_desc *)first_data_desc)->
546 			    upper.fields.popts |= E1000_TXD_POPTS_TXSM;
547 	}
548 
549 	/*
550 	 * Last Descriptor of Packet needs End Of Packet (EOP), Report
551 	 * Status (RS) and append Ethernet CRC (IFCS) bits set.
552 	 */
553 	if (Adapter->tx_intr_delay) {
554 		descriptor->lower.data |= E1000_TXD_CMD_IDE |
555 		    E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
556 	} else {
557 		descriptor->lower.data |=
558 		    E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
559 	}
560 
561 	/*
562 	 * Sync the Tx descriptors DMA buffer
563 	 */
564 	(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
565 	    0, 0, DDI_DMA_SYNC_FORDEV);
566 
567 	tx_ring->tbd_next = next_desc;
568 
569 	/*
570 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
571 	 * FX1000 that this frame is available to transmit.
572 	 */
573 	if (hw->mac.type == e1000_82547)
574 		e1000g_82547_tx_move_tail(tx_ring);
575 	else
576 		E1000_WRITE_REG(hw, E1000_TDT,
577 		    (uint32_t)(next_desc - tx_ring->tbd_first));
578 
579 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
580 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
581 		Adapter->chip_state = E1000G_ERROR;
582 	}
583 
584 	/* Put the pending SwPackets to the "Used" list */
585 	mutex_enter(&tx_ring->usedlist_lock);
586 	QUEUE_APPEND(&tx_ring->used_list, pending_list);
587 	tx_ring->tbd_avail -= desc_count;
588 	mutex_exit(&tx_ring->usedlist_lock);
589 
590 	/* Store the cksum data */
591 	if (cksum_load)
592 		tx_ring->cksum_data = *cksum;
593 
594 	return (desc_count);
595 }
596 
597 
598 /*
599  * e1000g_tx_setup - setup tx data structures
600  *
601  * This routine initializes all of the transmit related
602  * structures. This includes the Transmit descriptors,
603  * and the tx_sw_packet structures.
604  */
605 void
606 e1000g_tx_setup(struct e1000g *Adapter)
607 {
608 	struct e1000_hw *hw;
609 	p_tx_sw_packet_t packet;
610 	UINT i;
611 	uint32_t buf_high;
612 	uint32_t buf_low;
613 	uint32_t reg_tipg;
614 	uint32_t reg_tctl;
615 	uint32_t reg_tarc;
616 	uint16_t speed, duplex;
617 	int size;
618 	e1000g_tx_ring_t *tx_ring;
619 
620 	hw = &Adapter->shared;
621 	tx_ring = Adapter->tx_ring;
622 
623 	/* init the lists */
624 	/*
625 	 * Here we don't need to protect the lists using the
626 	 * usedlist_lock and freelist_lock, for they have
627 	 * been protected by the chip_lock.
628 	 */
629 	QUEUE_INIT_LIST(&tx_ring->used_list);
630 	QUEUE_INIT_LIST(&tx_ring->free_list);
631 
632 	/* Go through and set up each SW_Packet */
633 	packet = tx_ring->packet_area;
634 	for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) {
635 		/* Initialize this tx_sw_apcket area */
636 		e1000g_free_tx_swpkt(packet);
637 		/* Add this tx_sw_packet to the free list */
638 		QUEUE_PUSH_TAIL(&tx_ring->free_list,
639 		    &packet->Link);
640 	}
641 
642 	/* Setup TX descriptor pointers */
643 	tx_ring->tbd_next = tx_ring->tbd_first;
644 	tx_ring->tbd_oldest = tx_ring->tbd_first;
645 
646 	/*
647 	 * Setup Hardware TX Registers
648 	 */
649 	/* Setup the Transmit Control Register (TCTL). */
650 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
651 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) |
652 	    (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) |
653 	    E1000_TCTL_RTLC;
654 
655 	/* Enable the MULR bit */
656 	if (hw->bus.type == e1000_bus_type_pci_express)
657 		reg_tctl |= E1000_TCTL_MULR;
658 
659 	E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl);
660 
661 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
662 		e1000_get_speed_and_duplex(hw, &speed, &duplex);
663 
664 		reg_tarc = E1000_READ_REG(hw, E1000_TARC0);
665 		reg_tarc |= (1 << 25);
666 		if (speed == SPEED_1000)
667 			reg_tarc |= (1 << 21);
668 		E1000_WRITE_REG(hw, E1000_TARC0, reg_tarc);
669 
670 		reg_tarc = E1000_READ_REG(hw, E1000_TARC1);
671 		reg_tarc |= (1 << 25);
672 		if (reg_tctl & E1000_TCTL_MULR)
673 			reg_tarc &= ~(1 << 28);
674 		else
675 			reg_tarc |= (1 << 28);
676 		E1000_WRITE_REG(hw, E1000_TARC1, reg_tarc);
677 
678 	} else if (hw->mac.type == e1000_80003es2lan) {
679 		reg_tarc = E1000_READ_REG(hw, E1000_TARC0);
680 		reg_tarc |= 1;
681 		if (hw->media_type == e1000_media_type_internal_serdes)
682 			reg_tarc |= (1 << 20);
683 		E1000_WRITE_REG(hw, E1000_TARC0, reg_tarc);
684 
685 		reg_tarc = E1000_READ_REG(hw, E1000_TARC1);
686 		reg_tarc |= 1;
687 		E1000_WRITE_REG(hw, E1000_TARC1, reg_tarc);
688 	}
689 
690 	/* Setup HW Base and Length of Tx descriptor area */
691 	size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc));
692 	E1000_WRITE_REG(hw, E1000_TDLEN, size);
693 	size = E1000_READ_REG(hw, E1000_TDLEN);
694 
695 	buf_low = (uint32_t)tx_ring->tbd_dma_addr;
696 	buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32);
697 
698 	E1000_WRITE_REG(hw, E1000_TDBAL, buf_low);
699 	E1000_WRITE_REG(hw, E1000_TDBAH, buf_high);
700 
701 	/* Setup our HW Tx Head & Tail descriptor pointers */
702 	E1000_WRITE_REG(hw, E1000_TDH, 0);
703 	E1000_WRITE_REG(hw, E1000_TDT, 0);
704 
705 	/* Set the default values for the Tx Inter Packet Gap timer */
706 	if ((hw->mac.type == e1000_82542) &&
707 	    ((hw->revision_id == E1000_REVISION_2) ||
708 	    (hw->revision_id == E1000_REVISION_3))) {
709 		reg_tipg = DEFAULT_82542_TIPG_IPGT;
710 		reg_tipg |=
711 		    DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
712 		reg_tipg |=
713 		    DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
714 	} else {
715 		if (hw->media_type == e1000_media_type_fiber)
716 			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
717 		else
718 			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
719 		reg_tipg |=
720 		    DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
721 		reg_tipg |=
722 		    DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
723 	}
724 	E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg);
725 
726 	/* Setup Transmit Interrupt Delay Value */
727 	if (Adapter->tx_intr_delay) {
728 		E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay);
729 	}
730 
731 	tx_ring->tbd_avail = Adapter->tx_desc_num;
732 
733 	/* For TCP/UDP checksum offload */
734 	tx_ring->cksum_data.cksum_stuff = 0;
735 	tx_ring->cksum_data.cksum_start = 0;
736 	tx_ring->cksum_data.cksum_flags = 0;
737 	tx_ring->cksum_data.ether_header_size = 0;
738 }
739 
740 /*
741  * e1000g_recycle - recycle the tx descriptors and tx sw packets
742  */
743 int
744 e1000g_recycle(e1000g_tx_ring_t *tx_ring)
745 {
746 	struct e1000g *Adapter;
747 	LIST_DESCRIBER pending_list;
748 	p_tx_sw_packet_t packet;
749 	mblk_t *mp;
750 	mblk_t *nmp;
751 	struct e1000_tx_desc *descriptor;
752 	int desc_count;
753 
754 	/*
755 	 * This function will examine each TxSwPacket in the 'used' queue
756 	 * if the e1000g is done with it then the associated resources (Tx
757 	 * Descriptors) will be "freed" and the TxSwPacket will be
758 	 * returned to the 'free' queue.
759 	 */
760 	Adapter = tx_ring->adapter;
761 
762 	desc_count = 0;
763 	QUEUE_INIT_LIST(&pending_list);
764 
765 	mutex_enter(&tx_ring->usedlist_lock);
766 
767 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list);
768 	if (packet == NULL) {
769 		mutex_exit(&tx_ring->usedlist_lock);
770 		tx_ring->recycle_fail = 0;
771 		tx_ring->stall_watchdog = 0;
772 		return (0);
773 	}
774 
775 	/* Sync the Tx descriptor DMA buffer */
776 	(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
777 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
778 	if (e1000g_check_dma_handle(
779 	    tx_ring->tbd_dma_handle) != DDI_FM_OK) {
780 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
781 		Adapter->chip_state = E1000G_ERROR;
782 		return (0);
783 	}
784 
785 	/*
786 	 * While there are still TxSwPackets in the used queue check them
787 	 */
788 	while (packet =
789 	    (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) {
790 
791 		/*
792 		 * Get hold of the next descriptor that the e1000g will
793 		 * report status back to (this will be the last descriptor
794 		 * of a given sw packet). We only want to free the
795 		 * sw packet (and it resources) if the e1000g is done
796 		 * with ALL of the descriptors.  If the e1000g is done
797 		 * with the last one then it is done with all of them.
798 		 */
799 		ASSERT(packet->num_desc);
800 		descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1);
801 
802 		/* Check for wrap case */
803 		if (descriptor > tx_ring->tbd_last)
804 			descriptor -= Adapter->tx_desc_num;
805 
806 		/*
807 		 * If the descriptor done bit is set free TxSwPacket and
808 		 * associated resources
809 		 */
810 		if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) {
811 			QUEUE_POP_HEAD(&tx_ring->used_list);
812 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
813 
814 			if (descriptor == tx_ring->tbd_last)
815 				tx_ring->tbd_oldest =
816 				    tx_ring->tbd_first;
817 			else
818 				tx_ring->tbd_oldest =
819 				    descriptor + 1;
820 
821 			desc_count += packet->num_desc;
822 
823 			if (desc_count >= tx_ring->recycle_num)
824 				break;
825 		} else {
826 			/*
827 			 * Found a sw packet that the e1000g is not done
828 			 * with then there is no reason to check the rest
829 			 * of the queue.
830 			 */
831 			break;
832 		}
833 	}
834 
835 	tx_ring->tbd_avail += desc_count;
836 
837 	mutex_exit(&tx_ring->usedlist_lock);
838 
839 	if (desc_count == 0) {
840 		tx_ring->recycle_fail++;
841 		E1000G_DEBUG_STAT(tx_ring->stat_recycle_none);
842 		return (0);
843 	}
844 
845 	tx_ring->recycle_fail = 0;
846 	tx_ring->stall_watchdog = 0;
847 
848 	mp = NULL;
849 	nmp = NULL;
850 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list);
851 	ASSERT(packet != NULL);
852 	while (packet != NULL) {
853 		if (packet->mp != NULL) {
854 			ASSERT(packet->mp->b_next == NULL);
855 			/* Assemble the message chain */
856 			if (mp == NULL) {
857 				mp = packet->mp;
858 				nmp = packet->mp;
859 			} else {
860 				nmp->b_next = packet->mp;
861 				nmp = packet->mp;
862 			}
863 			/* Disconnect the message from the sw packet */
864 			packet->mp = NULL;
865 		}
866 
867 		/* Free the TxSwPackets */
868 		e1000g_free_tx_swpkt(packet);
869 
870 		packet = (p_tx_sw_packet_t)
871 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
872 	}
873 
874 	/* Save the message chain */
875 	if (mp != NULL) {
876 		mutex_enter(&tx_ring->mblks_lock);
877 		if (tx_ring->mblks.head == NULL) {
878 			tx_ring->mblks.head = mp;
879 			tx_ring->mblks.tail = nmp;
880 		} else {
881 			tx_ring->mblks.tail->b_next = mp;
882 			tx_ring->mblks.tail = nmp;
883 		}
884 		mutex_exit(&tx_ring->mblks_lock);
885 
886 		/*
887 		 * If the tx interrupt is enabled, the messages will be freed
888 		 * in the tx interrupt; Otherwise, they are freed here by
889 		 * triggering a soft interrupt.
890 		 */
891 		if (!Adapter->tx_intr_enable)
892 			ddi_intr_trigger_softint(Adapter->tx_softint_handle,
893 			    NULL);
894 	}
895 
896 	/* Return the TxSwPackets back to the FreeList */
897 	mutex_enter(&tx_ring->freelist_lock);
898 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
899 	mutex_exit(&tx_ring->freelist_lock);
900 
901 	return (desc_count);
902 }
903 
904 /*
905  * 82544 Coexistence issue workaround:
906  *    There are 2 issues.
907  *    1. If a 32 bit split completion happens from P64H2 and another
908  *	agent drives a 64 bit request/split completion after ONLY
909  *	1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then
910  *	82544 has a problem where in to clock all the data in, it
911  *	looks at REQ64# signal and since it has changed so fast (i.e. 1
912  *	idle clock turn around), it will fail to clock all the data in.
913  *	Data coming from certain ending addresses has exposure to this issue.
914  *
915  * To detect this issue, following equation can be used...
916  *	SIZE[3:0] + ADDR[2:0] = SUM[3:0].
917  *	If SUM[3:0] is in between 1 to 4, we will have this issue.
918  *
919  * ROOT CAUSE:
920  *	The erratum involves the 82544 PCIX elasticity FIFO implementations as
921  *	64-bit FIFO's and flushing of the final partial-bytes corresponding
922  *	to the end of a requested read burst. Under a specific burst condition
923  *	of ending-data alignment and 32-byte split-completions, the final
924  *	byte(s) of split-completion data require an extra clock cycle to flush
925  *	into 64-bit FIFO orientation.  An incorrect logic dependency on the
926  *	REQ64# signal occurring during during this clock cycle may cause the
927  *	residual byte(s) to be lost, thereby rendering the internal DMA client
928  *	forever awaiting the final byte(s) for an outbound data-fetch.  The
929  *	erratum is confirmed to *only* occur if certain subsequent external
930  *	64-bit PCIX bus transactions occur immediately (minimum possible bus
931  *	turn- around) following the odd-aligned 32-bit split-completion
932  *	containing the final byte(s).  Intel has confirmed that this has been
933  *	seen only with chipset/bridges which have the capability to provide
934  *	32-bit split-completion data, and in the presence of newer PCIX bus
935  *	agents which fully-optimize the inter-transaction turn-around (zero
936  *	additional initiator latency when pre-granted bus ownership).
937  *
938  *   	This issue does not exist in PCI bus mode, when any agent is operating
939  *	in 32 bit only mode or on chipsets that do not do 32 bit split
940  *	completions for 64 bit read requests (Serverworks chipsets). P64H2 does
941  *	32 bit split completions for any read request that has bit 2 set to 1
942  *	for the requested address and read request size is more than 8 bytes.
943  *
944  *   2. Another issue is related to 82544 driving DACs under the similar
945  *	scenario (32 bit split completion followed by 64 bit transaction with
946  *	only 1 cycle turnaround). This issue is still being root caused. We
947  *	think that both of these issues can be avoided if following workaround
948  *	is implemented. It seems DAC issues is related to ending addresses being
949  *	0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity
950  *	FIFO which does not get flushed due to REQ64# dependency. We will only
951  *	know the full story after it has been simulated successfully by HW team.
952  *
953  * WORKAROUND:
954  *	Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC)
955  */
956 static uint32_t
957 e1000g_fill_82544_desc(uint64_t address,
958     size_t length, p_desc_array_t desc_array)
959 {
960 	/*
961 	 * Since issue is sensitive to length and address.
962 	 * Let us first check the address...
963 	 */
964 	uint32_t safe_terminator;
965 
966 	if (length <= 4) {
967 		desc_array->descriptor[0].address = address;
968 		desc_array->descriptor[0].length = length;
969 		desc_array->elements = 1;
970 		return (desc_array->elements);
971 	}
972 	safe_terminator =
973 	    (uint32_t)((((uint32_t)address & 0x7) +
974 	    (length & 0xF)) & 0xF);
975 	/*
976 	 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then
977 	 * return
978 	 */
979 	if (safe_terminator == 0 ||
980 	    (safe_terminator > 4 && safe_terminator < 9) ||
981 	    (safe_terminator > 0xC && safe_terminator <= 0xF)) {
982 		desc_array->descriptor[0].address = address;
983 		desc_array->descriptor[0].length = length;
984 		desc_array->elements = 1;
985 		return (desc_array->elements);
986 	}
987 
988 	desc_array->descriptor[0].address = address;
989 	desc_array->descriptor[0].length = length - 4;
990 	desc_array->descriptor[1].address = address + (length - 4);
991 	desc_array->descriptor[1].length = 4;
992 	desc_array->elements = 2;
993 	return (desc_array->elements);
994 }
995 
996 static int
997 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet,
998     mblk_t *mp, uint32_t force_bcopy)
999 {
1000 	size_t len;
1001 	size_t len1;
1002 	dma_buffer_t *tx_buf;
1003 	mblk_t *nmp;
1004 	boolean_t finished;
1005 	int desc_count;
1006 
1007 	desc_count = 0;
1008 	tx_buf = packet->tx_buf;
1009 	len = MBLKL(mp);
1010 
1011 	ASSERT((tx_buf->len + len) <= tx_buf->size);
1012 
1013 	if (len > 0) {
1014 		bcopy(mp->b_rptr,
1015 		    tx_buf->address + tx_buf->len,
1016 		    len);
1017 		tx_buf->len += len;
1018 
1019 		packet->num_mblk_frag++;
1020 	}
1021 
1022 	nmp = mp->b_cont;
1023 	if (nmp == NULL) {
1024 		finished = B_TRUE;
1025 	} else {
1026 		len1 = MBLKL(nmp);
1027 		if ((tx_buf->len + len1) > tx_buf->size)
1028 			finished = B_TRUE;
1029 		else if (force_bcopy)
1030 			finished = B_FALSE;
1031 		else if (len1 > tx_ring->adapter->tx_bcopy_thresh)
1032 			finished = B_TRUE;
1033 		else
1034 			finished = B_FALSE;
1035 	}
1036 
1037 	if (finished) {
1038 		E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy,
1039 		    (tx_buf->len > len));
1040 
1041 		/*
1042 		 * If the packet is smaller than 64 bytes, which is the
1043 		 * minimum ethernet packet size, pad the packet to make
1044 		 * it at least 60 bytes. The hardware will add 4 bytes
1045 		 * for CRC.
1046 		 */
1047 		if (force_bcopy & FORCE_BCOPY_UNDER_SIZE) {
1048 			ASSERT(tx_buf->len < MINIMUM_ETHERNET_PACKET_SIZE);
1049 
1050 			bzero(tx_buf->address + tx_buf->len,
1051 			    MINIMUM_ETHERNET_PACKET_SIZE - tx_buf->len);
1052 			tx_buf->len = MINIMUM_ETHERNET_PACKET_SIZE;
1053 		}
1054 
1055 #ifdef __sparc
1056 		if (packet->dma_type == USE_DVMA)
1057 			dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV);
1058 		else
1059 			(void) ddi_dma_sync(tx_buf->dma_handle, 0,
1060 			    tx_buf->len, DDI_DMA_SYNC_FORDEV);
1061 #else
1062 		(void) ddi_dma_sync(tx_buf->dma_handle, 0,
1063 		    tx_buf->len, DDI_DMA_SYNC_FORDEV);
1064 #endif
1065 
1066 		packet->data_transfer_type = USE_BCOPY;
1067 
1068 		desc_count = e1000g_fill_tx_desc(tx_ring,
1069 		    packet,
1070 		    tx_buf->dma_address,
1071 		    tx_buf->len);
1072 
1073 		if (desc_count <= 0)
1074 			return (-1);
1075 	}
1076 
1077 	return (desc_count);
1078 }
1079 
1080 static int
1081 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp)
1082 {
1083 	int j;
1084 	int mystat;
1085 	size_t len;
1086 	ddi_dma_cookie_t dma_cookie;
1087 	uint_t ncookies;
1088 	int desc_count;
1089 	uint32_t desc_total;
1090 
1091 	desc_total = 0;
1092 	len = MBLKL(mp);
1093 
1094 	/*
1095 	 * ddi_dma_addr_bind_handle() allocates  DMA  resources  for  a
1096 	 * memory  object such that a device can perform DMA to or from
1097 	 * the object.  DMA resources  are  allocated  considering  the
1098 	 * device's  DMA  attributes  as  expressed by ddi_dma_attr(9S)
1099 	 * (see ddi_dma_alloc_handle(9F)).
1100 	 *
1101 	 * ddi_dma_addr_bind_handle() fills in  the  first  DMA  cookie
1102 	 * pointed  to by cookiep with the appropriate address, length,
1103 	 * and bus type. *ccountp is set to the number of DMA  cookies
1104 	 * representing this DMA object. Subsequent DMA cookies must be
1105 	 * retrieved by calling ddi_dma_nextcookie(9F)  the  number  of
1106 	 * times specified by *countp - 1.
1107 	 */
1108 	switch (packet->dma_type) {
1109 #ifdef __sparc
1110 	case USE_DVMA:
1111 		dvma_kaddr_load(packet->tx_dma_handle,
1112 		    (caddr_t)mp->b_rptr, len, 0, &dma_cookie);
1113 
1114 		dvma_sync(packet->tx_dma_handle, 0,
1115 		    DDI_DMA_SYNC_FORDEV);
1116 
1117 		ncookies = 1;
1118 		packet->data_transfer_type = USE_DVMA;
1119 		break;
1120 #endif
1121 	case USE_DMA:
1122 		if ((mystat = ddi_dma_addr_bind_handle(
1123 		    packet->tx_dma_handle, NULL,
1124 		    (caddr_t)mp->b_rptr, len,
1125 		    DDI_DMA_WRITE | DDI_DMA_STREAMING,
1126 		    DDI_DMA_DONTWAIT, 0, &dma_cookie,
1127 		    &ncookies)) != DDI_DMA_MAPPED) {
1128 
1129 			e1000g_log(tx_ring->adapter, CE_WARN,
1130 			    "Couldn't bind mblk buffer to Tx DMA handle: "
1131 			    "return: %X, Pkt: %X\n",
1132 			    mystat, packet);
1133 			return (-1);
1134 		}
1135 
1136 		/*
1137 		 * An implicit ddi_dma_sync() is done when the
1138 		 * ddi_dma_addr_bind_handle() is called. So we
1139 		 * don't need to explicitly call ddi_dma_sync()
1140 		 * here any more.
1141 		 */
1142 		ASSERT(ncookies);
1143 		E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie,
1144 		    (ncookies > 1));
1145 
1146 		/*
1147 		 * The data_transfer_type value must be set after the handle
1148 		 * has been bound, for it will be used in e1000g_free_tx_swpkt()
1149 		 * to decide whether we need to unbind the handle.
1150 		 */
1151 		packet->data_transfer_type = USE_DMA;
1152 		break;
1153 	default:
1154 		ASSERT(B_FALSE);
1155 		break;
1156 	}
1157 
1158 	packet->num_mblk_frag++;
1159 
1160 	/*
1161 	 * Each address could span thru multpile cookie..
1162 	 * Each cookie will have one descriptor
1163 	 */
1164 	for (j = ncookies; j != 0; j--) {
1165 
1166 		desc_count = e1000g_fill_tx_desc(tx_ring,
1167 		    packet,
1168 		    dma_cookie.dmac_laddress,
1169 		    dma_cookie.dmac_size);
1170 
1171 		if (desc_count <= 0)
1172 			return (-1);
1173 
1174 		desc_total += desc_count;
1175 
1176 		/*
1177 		 * ddi_dma_nextcookie() retrieves subsequent DMA
1178 		 * cookies for a DMA object.
1179 		 * ddi_dma_nextcookie() fills in the
1180 		 * ddi_dma_cookie(9S) structure pointed to by
1181 		 * cookiep.  The ddi_dma_cookie(9S) structure
1182 		 * must be allocated prior to calling
1183 		 * ddi_dma_nextcookie(). The DMA cookie count
1184 		 * returned by ddi_dma_buf_bind_handle(9F),
1185 		 * ddi_dma_addr_bind_handle(9F), or
1186 		 * ddi_dma_getwin(9F) indicates the number of DMA
1187 		 * cookies a DMA object consists of.  If the
1188 		 * resulting cookie count, N, is larger than 1,
1189 		 * ddi_dma_nextcookie() must be called N-1 times
1190 		 * to retrieve all DMA cookies.
1191 		 */
1192 		if (j > 1) {
1193 			ddi_dma_nextcookie(packet->tx_dma_handle,
1194 			    &dma_cookie);
1195 		}
1196 	}
1197 
1198 	return (desc_total);
1199 }
1200 
1201 static void
1202 e1000g_fill_context_descriptor(cksum_data_t *cksum,
1203     struct e1000_context_desc *cksum_desc)
1204 {
1205 	if (cksum->cksum_flags & HCK_IPV4_HDRCKSUM) {
1206 		cksum_desc->lower_setup.ip_fields.ipcss =
1207 		    cksum->ether_header_size;
1208 		cksum_desc->lower_setup.ip_fields.ipcso =
1209 		    cksum->ether_header_size +
1210 		    offsetof(struct ip, ip_sum);
1211 		cksum_desc->lower_setup.ip_fields.ipcse =
1212 		    cksum->ether_header_size +
1213 		    sizeof (struct ip) - 1;
1214 	} else
1215 		cksum_desc->lower_setup.ip_config = 0;
1216 
1217 	if (cksum->cksum_flags & HCK_PARTIALCKSUM) {
1218 		/*
1219 		 * The packet with same protocol has the following
1220 		 * stuff and start offset:
1221 		 * |  Protocol  | Stuff  | Start  | Checksum
1222 		 * |		| Offset | Offset | Enable
1223 		 * | IPv4 + TCP |  0x24  |  0x14  |  Yes
1224 		 * | IPv4 + UDP |  0x1A  |  0x14  |  Yes
1225 		 * | IPv6 + TCP |  0x20  |  0x10  |  No
1226 		 * | IPv6 + UDP |  0x14  |  0x10  |  No
1227 		 */
1228 		cksum_desc->upper_setup.tcp_fields.tucss =
1229 		    cksum->cksum_start + cksum->ether_header_size;
1230 		cksum_desc->upper_setup.tcp_fields.tucso =
1231 		    cksum->cksum_stuff + cksum->ether_header_size;
1232 		cksum_desc->upper_setup.tcp_fields.tucse = 0;
1233 	} else
1234 		cksum_desc->upper_setup.tcp_config = 0;
1235 
1236 	cksum_desc->cmd_and_length = E1000_TXD_CMD_DEXT;
1237 
1238 	/*
1239 	 * Zero out the options for TCP Segmentation Offload,
1240 	 * since we don't support it in this version
1241 	 */
1242 	cksum_desc->tcp_seg_setup.data = 0;
1243 }
1244 
1245 static int
1246 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring,
1247     p_tx_sw_packet_t packet, uint64_t address, size_t size)
1248 {
1249 	struct e1000_hw *hw = &tx_ring->adapter->shared;
1250 	p_sw_desc_t desc;
1251 
1252 	if (hw->mac.type == e1000_82544) {
1253 		if (hw->bus.type == e1000_bus_type_pcix)
1254 			return (e1000g_tx_workaround_PCIX_82544(packet,
1255 			    address, size));
1256 
1257 		if (size > JUMBO_FRAG_LENGTH)
1258 			return (e1000g_tx_workaround_jumbo_82544(packet,
1259 			    address, size));
1260 	}
1261 
1262 	ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1263 
1264 	desc = &packet->desc[packet->num_desc];
1265 	desc->address = address;
1266 	desc->length = size;
1267 
1268 	packet->num_desc++;
1269 
1270 	return (1);
1271 }
1272 
1273 static int
1274 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet,
1275     uint64_t address, size_t size)
1276 {
1277 	p_sw_desc_t desc;
1278 	int desc_count;
1279 	long size_left;
1280 	size_t len;
1281 	uint32_t counter;
1282 	uint32_t array_elements;
1283 	desc_array_t desc_array;
1284 
1285 	/*
1286 	 * Coexist Workaround for cordova: RP: 07/04/03
1287 	 *
1288 	 * RP: ERRATA: Workaround ISSUE:
1289 	 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup
1290 	 * Eachbuffer in to 8kb pieces until the
1291 	 * remainder is < 8kb
1292 	 */
1293 	size_left = size;
1294 	desc_count = 0;
1295 
1296 	while (size_left > 0) {
1297 		if (size_left > MAX_TX_BUF_SIZE)
1298 			len = MAX_TX_BUF_SIZE;
1299 		else
1300 			len = size_left;
1301 
1302 		array_elements = e1000g_fill_82544_desc(address,
1303 		    len, &desc_array);
1304 
1305 		for (counter = 0; counter < array_elements; counter++) {
1306 			ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1307 			/*
1308 			 * Put in the buffer address
1309 			 */
1310 			desc = &packet->desc[packet->num_desc];
1311 
1312 			desc->address =
1313 			    desc_array.descriptor[counter].address;
1314 			desc->length =
1315 			    desc_array.descriptor[counter].length;
1316 
1317 			packet->num_desc++;
1318 			desc_count++;
1319 		} /* for */
1320 
1321 		/*
1322 		 * Update the buffer address and length
1323 		 */
1324 		address += MAX_TX_BUF_SIZE;
1325 		size_left -= MAX_TX_BUF_SIZE;
1326 	} /* while */
1327 
1328 	return (desc_count);
1329 }
1330 
1331 static int
1332 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet,
1333     uint64_t address, size_t size)
1334 {
1335 	p_sw_desc_t desc;
1336 	int desc_count;
1337 	long size_left;
1338 	uint32_t offset;
1339 
1340 	/*
1341 	 * Workaround for Jumbo Frames on Cordova
1342 	 * PSD 06/01/2001
1343 	 */
1344 	size_left = size;
1345 	desc_count = 0;
1346 	offset = 0;
1347 	while (size_left > 0) {
1348 		ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1349 
1350 		desc = &packet->desc[packet->num_desc];
1351 
1352 		desc->address = address + offset;
1353 
1354 		if (size_left > JUMBO_FRAG_LENGTH)
1355 			desc->length = JUMBO_FRAG_LENGTH;
1356 		else
1357 			desc->length = size_left;
1358 
1359 		packet->num_desc++;
1360 		desc_count++;
1361 
1362 		offset += desc->length;
1363 		size_left -= JUMBO_FRAG_LENGTH;
1364 	}
1365 
1366 	return (desc_count);
1367 }
1368 
1369 #pragma inline(e1000g_82547_tx_move_tail_work)
1370 
1371 static void
1372 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring)
1373 {
1374 	struct e1000_hw *hw;
1375 	uint16_t hw_tdt;
1376 	uint16_t sw_tdt;
1377 	struct e1000_tx_desc *tx_desc;
1378 	uint16_t length = 0;
1379 	boolean_t eop = B_FALSE;
1380 	struct e1000g *Adapter;
1381 
1382 	Adapter = tx_ring->adapter;
1383 	hw = &Adapter->shared;
1384 
1385 	hw_tdt = E1000_READ_REG(hw, E1000_TDT);
1386 	sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first;
1387 
1388 	while (hw_tdt != sw_tdt) {
1389 		tx_desc = &(tx_ring->tbd_first[hw_tdt]);
1390 		length += tx_desc->lower.flags.length;
1391 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1392 		if (++hw_tdt == Adapter->tx_desc_num)
1393 			hw_tdt = 0;
1394 
1395 		if (eop) {
1396 			if ((Adapter->link_duplex == HALF_DUPLEX) &&
1397 			    (e1000_fifo_workaround_82547(hw, length)
1398 			    != E1000_SUCCESS)) {
1399 				if (tx_ring->timer_enable_82547) {
1400 					ASSERT(tx_ring->timer_id_82547 == 0);
1401 					tx_ring->timer_id_82547 =
1402 					    timeout(e1000g_82547_timeout,
1403 					    (void *)tx_ring,
1404 					    drv_usectohz(10000));
1405 				}
1406 				return;
1407 
1408 			} else {
1409 				E1000_WRITE_REG(hw, E1000_TDT, hw_tdt);
1410 				e1000_update_tx_fifo_head_82547(hw, length);
1411 				length = 0;
1412 			}
1413 		}
1414 	}
1415 }
1416 
1417 static void
1418 e1000g_82547_timeout(void *arg)
1419 {
1420 	e1000g_tx_ring_t *tx_ring;
1421 
1422 	tx_ring = (e1000g_tx_ring_t *)arg;
1423 
1424 	mutex_enter(&tx_ring->tx_lock);
1425 
1426 	tx_ring->timer_id_82547 = 0;
1427 	e1000g_82547_tx_move_tail_work(tx_ring);
1428 
1429 	mutex_exit(&tx_ring->tx_lock);
1430 }
1431 
1432 static void
1433 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring)
1434 {
1435 	timeout_id_t tid;
1436 
1437 	ASSERT(MUTEX_HELD(&tx_ring->tx_lock));
1438 
1439 	tid = tx_ring->timer_id_82547;
1440 	tx_ring->timer_id_82547 = 0;
1441 	if (tid != 0) {
1442 		tx_ring->timer_enable_82547 = B_FALSE;
1443 		mutex_exit(&tx_ring->tx_lock);
1444 
1445 		(void) untimeout(tid);
1446 
1447 		mutex_enter(&tx_ring->tx_lock);
1448 	}
1449 	tx_ring->timer_enable_82547 = B_TRUE;
1450 	e1000g_82547_tx_move_tail_work(tx_ring);
1451 }
1452