xref: /titanic_51/usr/src/uts/common/io/e1000g/e1000g_tx.c (revision 52ccf843e173e2a4a657360b0a22853fd413905f)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2008 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms of the CDDLv1.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * **********************************************************************
30  *									*
31  * Module Name:								*
32  *   e1000g_tx.c							*
33  *									*
34  * Abstract:								*
35  *   This file contains some routines that take care of Transmit,	*
36  *   make the hardware to send the data pointed by the packet out	*
37  *   on to the physical medium.						*
38  *									*
39  * **********************************************************************
40  */
41 
42 #include "e1000g_sw.h"
43 #include "e1000g_debug.h"
44 
45 static boolean_t e1000g_send(struct e1000g *, mblk_t *);
46 static int e1000g_tx_copy(e1000g_tx_ring_t *,
47     p_tx_sw_packet_t, mblk_t *, uint32_t);
48 static int e1000g_tx_bind(e1000g_tx_ring_t *,
49     p_tx_sw_packet_t, mblk_t *);
50 static boolean_t check_cksum_context(e1000g_tx_ring_t *, cksum_data_t *);
51 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *,
52     cksum_data_t *);
53 static void e1000g_fill_context_descriptor(cksum_data_t *,
54     struct e1000_context_desc *);
55 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *,
56     p_tx_sw_packet_t, uint64_t, size_t);
57 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length,
58     p_desc_array_t desc_array);
59 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t);
60 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t);
61 static void e1000g_82547_timeout(void *);
62 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *);
63 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *);
64 
65 #ifndef E1000G_DEBUG
66 #pragma inline(e1000g_tx_copy)
67 #pragma inline(e1000g_tx_bind)
68 #pragma inline(check_cksum_context)
69 #pragma inline(e1000g_fill_tx_ring)
70 #pragma inline(e1000g_fill_context_descriptor)
71 #pragma inline(e1000g_fill_tx_desc)
72 #pragma inline(e1000g_fill_82544_desc)
73 #pragma inline(e1000g_tx_workaround_PCIX_82544)
74 #pragma inline(e1000g_tx_workaround_jumbo_82544)
75 #pragma inline(e1000g_free_tx_swpkt)
76 #endif
77 
78 /*
79  * e1000g_free_tx_swpkt	- free up the tx sw packet
80  *
81  * Unbind the previously bound DMA handle for a given
82  * transmit sw packet. And reset the sw packet data.
83  */
84 void
85 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet)
86 {
87 	switch (packet->data_transfer_type) {
88 	case USE_BCOPY:
89 		packet->tx_buf->len = 0;
90 		break;
91 #ifdef __sparc
92 	case USE_DVMA:
93 		dvma_unload(packet->tx_dma_handle, 0, -1);
94 		break;
95 #endif
96 	case USE_DMA:
97 		ddi_dma_unbind_handle(packet->tx_dma_handle);
98 		break;
99 	default:
100 		break;
101 	}
102 
103 	/*
104 	 * The mblk has been stripped off the sw packet
105 	 * and will be freed in a triggered soft intr.
106 	 */
107 	ASSERT(packet->mp == NULL);
108 
109 	packet->data_transfer_type = USE_NONE;
110 	packet->num_mblk_frag = 0;
111 	packet->num_desc = 0;
112 }
113 
114 mblk_t *
115 e1000g_m_tx(void *arg, mblk_t *mp)
116 {
117 	struct e1000g *Adapter = (struct e1000g *)arg;
118 	mblk_t *next;
119 
120 	rw_enter(&Adapter->chip_lock, RW_READER);
121 
122 	if ((Adapter->chip_state != E1000G_START) ||
123 	    (Adapter->link_state != LINK_STATE_UP)) {
124 		freemsgchain(mp);
125 		mp = NULL;
126 	}
127 
128 	while (mp != NULL) {
129 		next = mp->b_next;
130 		mp->b_next = NULL;
131 
132 		if (!e1000g_send(Adapter, mp)) {
133 			mp->b_next = next;
134 			break;
135 		}
136 
137 		mp = next;
138 	}
139 
140 	rw_exit(&Adapter->chip_lock);
141 	return (mp);
142 }
143 
144 /*
145  * e1000g_send -  send packets onto the wire
146  *
147  * Called from e1000g_m_tx with an mblk ready to send. this
148  * routine sets up the transmit descriptors and sends data to
149  * the wire. It also pushes the just transmitted packet to
150  * the used tx sw packet list.
151  */
152 static boolean_t
153 e1000g_send(struct e1000g *Adapter, mblk_t *mp)
154 {
155 	struct e1000_hw *hw;
156 	p_tx_sw_packet_t packet;
157 	LIST_DESCRIBER pending_list;
158 	size_t len;
159 	size_t msg_size;
160 	uint32_t frag_count;
161 	int desc_count;
162 	uint32_t desc_total;
163 	uint32_t force_bcopy;
164 	mblk_t *nmp;
165 	mblk_t *tmp;
166 	e1000g_tx_ring_t *tx_ring;
167 	cksum_data_t cksum;
168 
169 	hw = &Adapter->shared;
170 	tx_ring = Adapter->tx_ring;
171 
172 	/* Get the total size and frags number of the message */
173 	force_bcopy = 0;
174 	frag_count = 0;
175 	msg_size = 0;
176 	for (nmp = mp; nmp; nmp = nmp->b_cont) {
177 		frag_count++;
178 		msg_size += MBLKL(nmp);
179 	}
180 
181 	/* Make sure packet is less than the max frame size */
182 	if (msg_size > Adapter->max_frame_size - ETHERFCSL) {
183 		/*
184 		 * For the over size packet, we'll just drop it.
185 		 * So we return B_TRUE here.
186 		 */
187 		E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL,
188 		    "Tx packet out of bound. length = %d \n", msg_size);
189 		E1000G_STAT(tx_ring->stat_over_size);
190 		freemsg(mp);
191 		return (B_TRUE);
192 	}
193 
194 	/*
195 	 * Check and reclaim tx descriptors.
196 	 * This low water mark check should be done all the time as
197 	 * Transmit interrupt delay can produce Transmit interrupts little
198 	 * late and that may cause few problems related to reaping Tx
199 	 * Descriptors... As you may run short of them before getting any
200 	 * transmit interrupt...
201 	 */
202 	if (tx_ring->resched_needed ||
203 	    (tx_ring->tbd_avail < Adapter->tx_recycle_thresh)) {
204 		(void) e1000g_recycle(tx_ring);
205 		E1000G_DEBUG_STAT(tx_ring->stat_recycle);
206 
207 		if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) {
208 			E1000G_DEBUG_STAT(tx_ring->stat_lack_desc);
209 			goto tx_no_resource;
210 		}
211 	}
212 
213 	/*
214 	 * If there are many frags of the message, then bcopy them
215 	 * into one tx descriptor buffer will get better performance.
216 	 */
217 	if ((frag_count >= tx_ring->frags_limit) &&
218 	    (msg_size <= Adapter->tx_buffer_size)) {
219 		E1000G_DEBUG_STAT(tx_ring->stat_exceed_frags);
220 		force_bcopy |= FORCE_BCOPY_EXCEED_FRAGS;
221 	}
222 
223 	/*
224 	 * If the message size is less than the minimum ethernet packet size,
225 	 * we'll use bcopy to send it, and padd it to 60 bytes later.
226 	 */
227 	if (msg_size < ETHERMIN) {
228 		E1000G_DEBUG_STAT(tx_ring->stat_under_size);
229 		force_bcopy |= FORCE_BCOPY_UNDER_SIZE;
230 	}
231 
232 	/* Initialize variables */
233 	desc_count = 1;	/* The initial value should be greater than 0 */
234 	desc_total = 0;
235 	QUEUE_INIT_LIST(&pending_list);
236 
237 	/* Retrieve checksum info */
238 	hcksum_retrieve(mp, NULL, NULL, &cksum.cksum_start, &cksum.cksum_stuff,
239 	    NULL, NULL, &cksum.cksum_flags);
240 
241 	if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid ==
242 	    htons(ETHERTYPE_VLAN))
243 		cksum.ether_header_size = sizeof (struct ether_vlan_header);
244 	else
245 		cksum.ether_header_size = sizeof (struct ether_header);
246 
247 	/* Process each mblk fragment and fill tx descriptors */
248 	packet = NULL;
249 	nmp = mp;
250 	while (nmp) {
251 		tmp = nmp->b_cont;
252 
253 		len = MBLKL(nmp);
254 		/* Check zero length mblks */
255 		if (len == 0) {
256 			E1000G_DEBUG_STAT(tx_ring->stat_empty_frags);
257 			/*
258 			 * If there're no packet buffers have been used,
259 			 * or we just completed processing a buffer, then
260 			 * skip the empty mblk fragment.
261 			 * Otherwise, there's still a pending buffer that
262 			 * needs to be processed (tx_copy).
263 			 */
264 			if (desc_count > 0) {
265 				nmp = tmp;
266 				continue;
267 			}
268 		}
269 
270 		/*
271 		 * Get a new TxSwPacket to process mblk buffers.
272 		 */
273 		if (desc_count > 0) {
274 			mutex_enter(&tx_ring->freelist_lock);
275 			packet = (p_tx_sw_packet_t)
276 			    QUEUE_POP_HEAD(&tx_ring->free_list);
277 			mutex_exit(&tx_ring->freelist_lock);
278 
279 			if (packet == NULL) {
280 				E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
281 				    "No Tx SwPacket available\n");
282 				E1000G_STAT(tx_ring->stat_no_swpkt);
283 				goto tx_send_failed;
284 			}
285 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
286 		}
287 
288 		ASSERT(packet);
289 		/*
290 		 * If the size of the fragment is less than the tx_bcopy_thresh
291 		 * we'll use bcopy; Otherwise, we'll use DMA binding.
292 		 */
293 		if ((len <= Adapter->tx_bcopy_thresh) || force_bcopy) {
294 			desc_count =
295 			    e1000g_tx_copy(tx_ring, packet, nmp, force_bcopy);
296 			E1000G_DEBUG_STAT(tx_ring->stat_copy);
297 		} else {
298 			desc_count =
299 			    e1000g_tx_bind(tx_ring, packet, nmp);
300 			E1000G_DEBUG_STAT(tx_ring->stat_bind);
301 		}
302 
303 		if (desc_count > 0)
304 			desc_total += desc_count;
305 		else if (desc_count < 0)
306 			goto tx_send_failed;
307 
308 		nmp = tmp;
309 	}
310 
311 	/* Assign the message to the last sw packet */
312 	ASSERT(packet);
313 	ASSERT(packet->mp == NULL);
314 	packet->mp = mp;
315 
316 	/* Try to recycle the tx descriptors again */
317 	if (tx_ring->tbd_avail < (desc_total + 2)) {
318 		E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry);
319 		(void) e1000g_recycle(tx_ring);
320 	}
321 
322 	mutex_enter(&tx_ring->tx_lock);
323 
324 	/*
325 	 * If the number of available tx descriptors is not enough for transmit
326 	 * (one redundant descriptor and one hw checksum context descriptor are
327 	 * included), then return failure.
328 	 */
329 	if (tx_ring->tbd_avail < (desc_total + 2)) {
330 		E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
331 		    "No Enough Tx descriptors\n");
332 		E1000G_STAT(tx_ring->stat_no_desc);
333 		mutex_exit(&tx_ring->tx_lock);
334 		goto tx_send_failed;
335 	}
336 
337 	desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cksum);
338 
339 	mutex_exit(&tx_ring->tx_lock);
340 
341 	ASSERT(desc_count > 0);
342 
343 	/* Send successful */
344 	return (B_TRUE);
345 
346 tx_send_failed:
347 	/*
348 	 * Enable Transmit interrupts, so that the interrupt routine can
349 	 * call mac_tx_update() when transmit descriptors become available.
350 	 */
351 	tx_ring->resched_needed = B_TRUE;
352 	if (!Adapter->tx_intr_enable)
353 		e1000g_mask_tx_interrupt(Adapter);
354 
355 	/* Free pending TxSwPackets */
356 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list);
357 	while (packet) {
358 		packet->mp = NULL;
359 		e1000g_free_tx_swpkt(packet);
360 		packet = (p_tx_sw_packet_t)
361 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
362 	}
363 
364 	/* Return pending TxSwPackets to the "Free" list */
365 	mutex_enter(&tx_ring->freelist_lock);
366 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
367 	mutex_exit(&tx_ring->freelist_lock);
368 
369 	E1000G_STAT(tx_ring->stat_send_fail);
370 
371 	/* Message will be scheduled for re-transmit */
372 	return (B_FALSE);
373 
374 tx_no_resource:
375 	/*
376 	 * Enable Transmit interrupts, so that the interrupt routine can
377 	 * call mac_tx_update() when transmit descriptors become available.
378 	 */
379 	tx_ring->resched_needed = B_TRUE;
380 	if (!Adapter->tx_intr_enable)
381 		e1000g_mask_tx_interrupt(Adapter);
382 
383 	/* Message will be scheduled for re-transmit */
384 	return (B_FALSE);
385 }
386 
387 static boolean_t
388 check_cksum_context(e1000g_tx_ring_t *tx_ring, cksum_data_t *cksum)
389 {
390 	boolean_t cksum_load;
391 	cksum_data_t *last;
392 
393 	cksum_load = B_FALSE;
394 	last = &tx_ring->cksum_data;
395 
396 	if (cksum->cksum_flags != 0) {
397 		if ((cksum->ether_header_size != last->ether_header_size) ||
398 		    (cksum->cksum_flags != last->cksum_flags) ||
399 		    (cksum->cksum_stuff != last->cksum_stuff) ||
400 		    (cksum->cksum_start != last->cksum_start)) {
401 
402 			cksum_load = B_TRUE;
403 		}
404 	}
405 
406 	return (cksum_load);
407 }
408 
409 static int
410 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list,
411     cksum_data_t *cksum)
412 {
413 	struct e1000g *Adapter;
414 	struct e1000_hw *hw;
415 	p_tx_sw_packet_t first_packet;
416 	p_tx_sw_packet_t packet;
417 	boolean_t cksum_load;
418 	struct e1000_tx_desc *first_data_desc;
419 	struct e1000_tx_desc *next_desc;
420 	struct e1000_tx_desc *descriptor;
421 	int desc_count;
422 	int i;
423 
424 	Adapter = tx_ring->adapter;
425 	hw = &Adapter->shared;
426 
427 	desc_count = 0;
428 	first_packet = NULL;
429 	first_data_desc = NULL;
430 	descriptor = NULL;
431 
432 	next_desc = tx_ring->tbd_next;
433 
434 	/* IP Head/TCP/UDP checksum offload */
435 	cksum_load = check_cksum_context(tx_ring, cksum);
436 
437 	if (cksum_load) {
438 		first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
439 
440 		descriptor = next_desc;
441 
442 		e1000g_fill_context_descriptor(cksum,
443 		    (struct e1000_context_desc *)descriptor);
444 
445 		/* Check the wrap-around case */
446 		if (descriptor == tx_ring->tbd_last)
447 			next_desc = tx_ring->tbd_first;
448 		else
449 			next_desc++;
450 
451 		desc_count++;
452 	}
453 
454 	first_data_desc = next_desc;
455 
456 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
457 	while (packet) {
458 		ASSERT(packet->num_desc);
459 
460 		for (i = 0; i < packet->num_desc; i++) {
461 			ASSERT(tx_ring->tbd_avail > 0);
462 
463 			descriptor = next_desc;
464 			descriptor->buffer_addr =
465 			    packet->desc[i].address;
466 			descriptor->lower.data =
467 			    packet->desc[i].length;
468 
469 			/* Zero out status */
470 			descriptor->upper.data = 0;
471 
472 			descriptor->lower.data |=
473 			    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
474 			/* must set RS on every outgoing descriptor */
475 			descriptor->lower.data |=
476 			    E1000_TXD_CMD_RS;
477 
478 			/* Check the wrap-around case */
479 			if (descriptor == tx_ring->tbd_last)
480 				next_desc = tx_ring->tbd_first;
481 			else
482 				next_desc++;
483 
484 			desc_count++;
485 		}
486 
487 		if (first_packet != NULL) {
488 			/*
489 			 * Count the checksum context descriptor for
490 			 * the first SwPacket.
491 			 */
492 			first_packet->num_desc++;
493 			first_packet = NULL;
494 		}
495 
496 		packet = (p_tx_sw_packet_t)
497 		    QUEUE_GET_NEXT(pending_list, &packet->Link);
498 	}
499 
500 	ASSERT(descriptor);
501 
502 	if (cksum->cksum_flags) {
503 		if (cksum->cksum_flags & HCK_IPV4_HDRCKSUM)
504 			((struct e1000_data_desc *)first_data_desc)->
505 			    upper.fields.popts |= E1000_TXD_POPTS_IXSM;
506 		if (cksum->cksum_flags & HCK_PARTIALCKSUM)
507 			((struct e1000_data_desc *)first_data_desc)->
508 			    upper.fields.popts |= E1000_TXD_POPTS_TXSM;
509 	}
510 
511 	/*
512 	 * Last Descriptor of Packet needs End Of Packet (EOP), Report
513 	 * Status (RS) and append Ethernet CRC (IFCS) bits set.
514 	 */
515 	if (Adapter->tx_intr_delay) {
516 		descriptor->lower.data |= E1000_TXD_CMD_IDE |
517 		    E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
518 	} else {
519 		descriptor->lower.data |=
520 		    E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
521 	}
522 
523 	/*
524 	 * Sync the Tx descriptors DMA buffer
525 	 */
526 	(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
527 	    0, 0, DDI_DMA_SYNC_FORDEV);
528 
529 	tx_ring->tbd_next = next_desc;
530 
531 	/*
532 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
533 	 * FX1000 that this frame is available to transmit.
534 	 */
535 	if (hw->mac.type == e1000_82547)
536 		e1000g_82547_tx_move_tail(tx_ring);
537 	else
538 		E1000_WRITE_REG(hw, E1000_TDT(0),
539 		    (uint32_t)(next_desc - tx_ring->tbd_first));
540 
541 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
542 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
543 		Adapter->chip_state = E1000G_ERROR;
544 	}
545 
546 	/* Put the pending SwPackets to the "Used" list */
547 	mutex_enter(&tx_ring->usedlist_lock);
548 	QUEUE_APPEND(&tx_ring->used_list, pending_list);
549 	tx_ring->tbd_avail -= desc_count;
550 	mutex_exit(&tx_ring->usedlist_lock);
551 
552 	/* Store the cksum data */
553 	if (cksum_load)
554 		tx_ring->cksum_data = *cksum;
555 
556 	return (desc_count);
557 }
558 
559 
560 /*
561  * e1000g_tx_setup - setup tx data structures
562  *
563  * This routine initializes all of the transmit related
564  * structures. This includes the Transmit descriptors,
565  * and the tx_sw_packet structures.
566  */
567 void
568 e1000g_tx_setup(struct e1000g *Adapter)
569 {
570 	struct e1000_hw *hw;
571 	p_tx_sw_packet_t packet;
572 	UINT i;
573 	uint32_t buf_high;
574 	uint32_t buf_low;
575 	uint32_t reg_tipg;
576 	uint32_t reg_tctl;
577 	uint32_t reg_tarc;
578 	uint16_t speed, duplex;
579 	int size;
580 	e1000g_tx_ring_t *tx_ring;
581 
582 	hw = &Adapter->shared;
583 	tx_ring = Adapter->tx_ring;
584 
585 	/* init the lists */
586 	/*
587 	 * Here we don't need to protect the lists using the
588 	 * usedlist_lock and freelist_lock, for they have
589 	 * been protected by the chip_lock.
590 	 */
591 	QUEUE_INIT_LIST(&tx_ring->used_list);
592 	QUEUE_INIT_LIST(&tx_ring->free_list);
593 
594 	/* Go through and set up each SW_Packet */
595 	packet = tx_ring->packet_area;
596 	for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) {
597 		/* Initialize this tx_sw_apcket area */
598 		e1000g_free_tx_swpkt(packet);
599 		/* Add this tx_sw_packet to the free list */
600 		QUEUE_PUSH_TAIL(&tx_ring->free_list,
601 		    &packet->Link);
602 	}
603 
604 	/* Setup TX descriptor pointers */
605 	tx_ring->tbd_next = tx_ring->tbd_first;
606 	tx_ring->tbd_oldest = tx_ring->tbd_first;
607 
608 	/*
609 	 * Setup Hardware TX Registers
610 	 */
611 	/* Setup the Transmit Control Register (TCTL). */
612 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
613 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) |
614 	    (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) |
615 	    E1000_TCTL_RTLC;
616 
617 	/* Enable the MULR bit */
618 	if (hw->bus.type == e1000_bus_type_pci_express)
619 		reg_tctl |= E1000_TCTL_MULR;
620 
621 	E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl);
622 
623 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
624 		e1000_get_speed_and_duplex(hw, &speed, &duplex);
625 
626 		reg_tarc = E1000_READ_REG(hw, E1000_TARC(0));
627 		reg_tarc |= (1 << 25);
628 		if (speed == SPEED_1000)
629 			reg_tarc |= (1 << 21);
630 		E1000_WRITE_REG(hw, E1000_TARC(0), reg_tarc);
631 
632 		reg_tarc = E1000_READ_REG(hw, E1000_TARC(1));
633 		reg_tarc |= (1 << 25);
634 		if (reg_tctl & E1000_TCTL_MULR)
635 			reg_tarc &= ~(1 << 28);
636 		else
637 			reg_tarc |= (1 << 28);
638 		E1000_WRITE_REG(hw, E1000_TARC(1), reg_tarc);
639 
640 	} else if (hw->mac.type == e1000_80003es2lan) {
641 		reg_tarc = E1000_READ_REG(hw, E1000_TARC(0));
642 		reg_tarc |= 1;
643 		if (hw->phy.media_type == e1000_media_type_internal_serdes)
644 			reg_tarc |= (1 << 20);
645 		E1000_WRITE_REG(hw, E1000_TARC(0), reg_tarc);
646 
647 		reg_tarc = E1000_READ_REG(hw, E1000_TARC(1));
648 		reg_tarc |= 1;
649 		E1000_WRITE_REG(hw, E1000_TARC(1), reg_tarc);
650 	}
651 
652 	/* Setup HW Base and Length of Tx descriptor area */
653 	size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc));
654 	E1000_WRITE_REG(hw, E1000_TDLEN(0), size);
655 	size = E1000_READ_REG(hw, E1000_TDLEN(0));
656 
657 	buf_low = (uint32_t)tx_ring->tbd_dma_addr;
658 	buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32);
659 
660 	E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low);
661 	E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high);
662 
663 	/* Setup our HW Tx Head & Tail descriptor pointers */
664 	E1000_WRITE_REG(hw, E1000_TDH(0), 0);
665 	E1000_WRITE_REG(hw, E1000_TDT(0), 0);
666 
667 	/* Set the default values for the Tx Inter Packet Gap timer */
668 	if ((hw->mac.type == e1000_82542) &&
669 	    ((hw->revision_id == E1000_REVISION_2) ||
670 	    (hw->revision_id == E1000_REVISION_3))) {
671 		reg_tipg = DEFAULT_82542_TIPG_IPGT;
672 		reg_tipg |=
673 		    DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
674 		reg_tipg |=
675 		    DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
676 	} else {
677 		if (hw->phy.media_type == e1000_media_type_fiber)
678 			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
679 		else
680 			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
681 		reg_tipg |=
682 		    DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
683 		reg_tipg |=
684 		    DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
685 	}
686 	E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg);
687 
688 	/* Setup Transmit Interrupt Delay Value */
689 	E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay);
690 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
691 	    "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay);
692 
693 	if (hw->mac.type >= e1000_82540) {
694 		E1000_WRITE_REG(&Adapter->shared, E1000_TADV,
695 		    Adapter->tx_intr_abs_delay);
696 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
697 		    "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay);
698 	}
699 
700 	tx_ring->tbd_avail = Adapter->tx_desc_num;
701 
702 	/* For TCP/UDP checksum offload */
703 	tx_ring->cksum_data.cksum_stuff = 0;
704 	tx_ring->cksum_data.cksum_start = 0;
705 	tx_ring->cksum_data.cksum_flags = 0;
706 	tx_ring->cksum_data.ether_header_size = 0;
707 }
708 
709 /*
710  * e1000g_recycle - recycle the tx descriptors and tx sw packets
711  */
712 int
713 e1000g_recycle(e1000g_tx_ring_t *tx_ring)
714 {
715 	struct e1000g *Adapter;
716 	LIST_DESCRIBER pending_list;
717 	p_tx_sw_packet_t packet;
718 	mblk_t *mp;
719 	mblk_t *nmp;
720 	struct e1000_tx_desc *descriptor;
721 	int desc_count;
722 	int is_intr;
723 
724 	/*
725 	 * This function will examine each TxSwPacket in the 'used' queue
726 	 * if the e1000g is done with it then the associated resources (Tx
727 	 * Descriptors) will be "freed" and the TxSwPacket will be
728 	 * returned to the 'free' queue.
729 	 */
730 	Adapter = tx_ring->adapter;
731 
732 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list);
733 	if (packet == NULL) {
734 		tx_ring->recycle_fail = 0;
735 		tx_ring->stall_watchdog = 0;
736 		return (0);
737 	}
738 
739 	is_intr = servicing_interrupt();
740 
741 	if (is_intr)
742 		mutex_enter(&tx_ring->usedlist_lock);
743 	else if (mutex_tryenter(&tx_ring->usedlist_lock) == 0)
744 		return (0);
745 
746 	desc_count = 0;
747 	QUEUE_INIT_LIST(&pending_list);
748 
749 	/* Sync the Tx descriptor DMA buffer */
750 	(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
751 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
752 	if (e1000g_check_dma_handle(
753 	    tx_ring->tbd_dma_handle) != DDI_FM_OK) {
754 		mutex_exit(&tx_ring->usedlist_lock);
755 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
756 		Adapter->chip_state = E1000G_ERROR;
757 		return (0);
758 	}
759 
760 	/*
761 	 * While there are still TxSwPackets in the used queue check them
762 	 */
763 	while (packet =
764 	    (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) {
765 
766 		/*
767 		 * Get hold of the next descriptor that the e1000g will
768 		 * report status back to (this will be the last descriptor
769 		 * of a given sw packet). We only want to free the
770 		 * sw packet (and it resources) if the e1000g is done
771 		 * with ALL of the descriptors.  If the e1000g is done
772 		 * with the last one then it is done with all of them.
773 		 */
774 		ASSERT(packet->num_desc);
775 		descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1);
776 
777 		/* Check for wrap case */
778 		if (descriptor > tx_ring->tbd_last)
779 			descriptor -= Adapter->tx_desc_num;
780 
781 		/*
782 		 * If the descriptor done bit is set free TxSwPacket and
783 		 * associated resources
784 		 */
785 		if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) {
786 			QUEUE_POP_HEAD(&tx_ring->used_list);
787 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
788 
789 			if (descriptor == tx_ring->tbd_last)
790 				tx_ring->tbd_oldest =
791 				    tx_ring->tbd_first;
792 			else
793 				tx_ring->tbd_oldest =
794 				    descriptor + 1;
795 
796 			desc_count += packet->num_desc;
797 
798 			if (is_intr && (desc_count >= Adapter->tx_recycle_num))
799 				break;
800 		} else {
801 			/*
802 			 * Found a sw packet that the e1000g is not done
803 			 * with then there is no reason to check the rest
804 			 * of the queue.
805 			 */
806 			break;
807 		}
808 	}
809 
810 	tx_ring->tbd_avail += desc_count;
811 	Adapter->tx_pkt_cnt += desc_count;
812 
813 	mutex_exit(&tx_ring->usedlist_lock);
814 
815 	if (desc_count == 0) {
816 		tx_ring->recycle_fail++;
817 		E1000G_DEBUG_STAT(tx_ring->stat_recycle_none);
818 		return (0);
819 	}
820 
821 	tx_ring->recycle_fail = 0;
822 	tx_ring->stall_watchdog = 0;
823 
824 	mp = NULL;
825 	nmp = NULL;
826 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list);
827 	ASSERT(packet != NULL);
828 	while (packet != NULL) {
829 		if (packet->mp != NULL) {
830 			ASSERT(packet->mp->b_next == NULL);
831 			/* Assemble the message chain */
832 			if (mp == NULL) {
833 				mp = packet->mp;
834 				nmp = packet->mp;
835 			} else {
836 				nmp->b_next = packet->mp;
837 				nmp = packet->mp;
838 			}
839 			/* Disconnect the message from the sw packet */
840 			packet->mp = NULL;
841 		}
842 
843 		/* Free the TxSwPackets */
844 		e1000g_free_tx_swpkt(packet);
845 
846 		packet = (p_tx_sw_packet_t)
847 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
848 	}
849 
850 	/* Return the TxSwPackets back to the FreeList */
851 	mutex_enter(&tx_ring->freelist_lock);
852 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
853 	mutex_exit(&tx_ring->freelist_lock);
854 
855 	if (mp != NULL)
856 		freemsgchain(mp);
857 
858 	return (desc_count);
859 }
860 /*
861  * 82544 Coexistence issue workaround:
862  *    There are 2 issues.
863  *    1. If a 32 bit split completion happens from P64H2 and another
864  *	agent drives a 64 bit request/split completion after ONLY
865  *	1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then
866  *	82544 has a problem where in to clock all the data in, it
867  *	looks at REQ64# signal and since it has changed so fast (i.e. 1
868  *	idle clock turn around), it will fail to clock all the data in.
869  *	Data coming from certain ending addresses has exposure to this issue.
870  *
871  * To detect this issue, following equation can be used...
872  *	SIZE[3:0] + ADDR[2:0] = SUM[3:0].
873  *	If SUM[3:0] is in between 1 to 4, we will have this issue.
874  *
875  * ROOT CAUSE:
876  *	The erratum involves the 82544 PCIX elasticity FIFO implementations as
877  *	64-bit FIFO's and flushing of the final partial-bytes corresponding
878  *	to the end of a requested read burst. Under a specific burst condition
879  *	of ending-data alignment and 32-byte split-completions, the final
880  *	byte(s) of split-completion data require an extra clock cycle to flush
881  *	into 64-bit FIFO orientation.  An incorrect logic dependency on the
882  *	REQ64# signal occurring during during this clock cycle may cause the
883  *	residual byte(s) to be lost, thereby rendering the internal DMA client
884  *	forever awaiting the final byte(s) for an outbound data-fetch.  The
885  *	erratum is confirmed to *only* occur if certain subsequent external
886  *	64-bit PCIX bus transactions occur immediately (minimum possible bus
887  *	turn- around) following the odd-aligned 32-bit split-completion
888  *	containing the final byte(s).  Intel has confirmed that this has been
889  *	seen only with chipset/bridges which have the capability to provide
890  *	32-bit split-completion data, and in the presence of newer PCIX bus
891  *	agents which fully-optimize the inter-transaction turn-around (zero
892  *	additional initiator latency when pre-granted bus ownership).
893  *
894  *   	This issue does not exist in PCI bus mode, when any agent is operating
895  *	in 32 bit only mode or on chipsets that do not do 32 bit split
896  *	completions for 64 bit read requests (Serverworks chipsets). P64H2 does
897  *	32 bit split completions for any read request that has bit 2 set to 1
898  *	for the requested address and read request size is more than 8 bytes.
899  *
900  *   2. Another issue is related to 82544 driving DACs under the similar
901  *	scenario (32 bit split completion followed by 64 bit transaction with
902  *	only 1 cycle turnaround). This issue is still being root caused. We
903  *	think that both of these issues can be avoided if following workaround
904  *	is implemented. It seems DAC issues is related to ending addresses being
905  *	0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity
906  *	FIFO which does not get flushed due to REQ64# dependency. We will only
907  *	know the full story after it has been simulated successfully by HW team.
908  *
909  * WORKAROUND:
910  *	Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC)
911  */
912 static uint32_t
913 e1000g_fill_82544_desc(uint64_t address,
914     size_t length, p_desc_array_t desc_array)
915 {
916 	/*
917 	 * Since issue is sensitive to length and address.
918 	 * Let us first check the address...
919 	 */
920 	uint32_t safe_terminator;
921 
922 	if (length <= 4) {
923 		desc_array->descriptor[0].address = address;
924 		desc_array->descriptor[0].length = length;
925 		desc_array->elements = 1;
926 		return (desc_array->elements);
927 	}
928 	safe_terminator =
929 	    (uint32_t)((((uint32_t)address & 0x7) +
930 	    (length & 0xF)) & 0xF);
931 	/*
932 	 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then
933 	 * return
934 	 */
935 	if (safe_terminator == 0 ||
936 	    (safe_terminator > 4 && safe_terminator < 9) ||
937 	    (safe_terminator > 0xC && safe_terminator <= 0xF)) {
938 		desc_array->descriptor[0].address = address;
939 		desc_array->descriptor[0].length = length;
940 		desc_array->elements = 1;
941 		return (desc_array->elements);
942 	}
943 
944 	desc_array->descriptor[0].address = address;
945 	desc_array->descriptor[0].length = length - 4;
946 	desc_array->descriptor[1].address = address + (length - 4);
947 	desc_array->descriptor[1].length = 4;
948 	desc_array->elements = 2;
949 	return (desc_array->elements);
950 }
951 
952 static int
953 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet,
954     mblk_t *mp, uint32_t force_bcopy)
955 {
956 	size_t len;
957 	size_t len1;
958 	dma_buffer_t *tx_buf;
959 	mblk_t *nmp;
960 	boolean_t finished;
961 	int desc_count;
962 
963 	desc_count = 0;
964 	tx_buf = packet->tx_buf;
965 	len = MBLKL(mp);
966 
967 	ASSERT((tx_buf->len + len) <= tx_buf->size);
968 
969 	if (len > 0) {
970 		bcopy(mp->b_rptr,
971 		    tx_buf->address + tx_buf->len,
972 		    len);
973 		tx_buf->len += len;
974 
975 		packet->num_mblk_frag++;
976 	}
977 
978 	nmp = mp->b_cont;
979 	if (nmp == NULL) {
980 		finished = B_TRUE;
981 	} else {
982 		len1 = MBLKL(nmp);
983 		if ((tx_buf->len + len1) > tx_buf->size)
984 			finished = B_TRUE;
985 		else if (force_bcopy)
986 			finished = B_FALSE;
987 		else if (len1 > tx_ring->adapter->tx_bcopy_thresh)
988 			finished = B_TRUE;
989 		else
990 			finished = B_FALSE;
991 	}
992 
993 	if (finished) {
994 		E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy,
995 		    (tx_buf->len > len));
996 
997 		/*
998 		 * If the packet is smaller than 64 bytes, which is the
999 		 * minimum ethernet packet size, pad the packet to make
1000 		 * it at least 60 bytes. The hardware will add 4 bytes
1001 		 * for CRC.
1002 		 */
1003 		if (force_bcopy & FORCE_BCOPY_UNDER_SIZE) {
1004 			ASSERT(tx_buf->len < ETHERMIN);
1005 
1006 			bzero(tx_buf->address + tx_buf->len,
1007 			    ETHERMIN - tx_buf->len);
1008 			tx_buf->len = ETHERMIN;
1009 		}
1010 
1011 #ifdef __sparc
1012 		if (packet->dma_type == USE_DVMA)
1013 			dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV);
1014 		else
1015 			(void) ddi_dma_sync(tx_buf->dma_handle, 0,
1016 			    tx_buf->len, DDI_DMA_SYNC_FORDEV);
1017 #else
1018 		(void) ddi_dma_sync(tx_buf->dma_handle, 0,
1019 		    tx_buf->len, DDI_DMA_SYNC_FORDEV);
1020 #endif
1021 
1022 		packet->data_transfer_type = USE_BCOPY;
1023 
1024 		desc_count = e1000g_fill_tx_desc(tx_ring,
1025 		    packet,
1026 		    tx_buf->dma_address,
1027 		    tx_buf->len);
1028 
1029 		if (desc_count <= 0)
1030 			return (-1);
1031 	}
1032 
1033 	return (desc_count);
1034 }
1035 
1036 static int
1037 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp)
1038 {
1039 	int j;
1040 	int mystat;
1041 	size_t len;
1042 	ddi_dma_cookie_t dma_cookie;
1043 	uint_t ncookies;
1044 	int desc_count;
1045 	uint32_t desc_total;
1046 
1047 	desc_total = 0;
1048 	len = MBLKL(mp);
1049 
1050 	/*
1051 	 * ddi_dma_addr_bind_handle() allocates  DMA  resources  for  a
1052 	 * memory  object such that a device can perform DMA to or from
1053 	 * the object.  DMA resources  are  allocated  considering  the
1054 	 * device's  DMA  attributes  as  expressed by ddi_dma_attr(9S)
1055 	 * (see ddi_dma_alloc_handle(9F)).
1056 	 *
1057 	 * ddi_dma_addr_bind_handle() fills in  the  first  DMA  cookie
1058 	 * pointed  to by cookiep with the appropriate address, length,
1059 	 * and bus type. *ccountp is set to the number of DMA  cookies
1060 	 * representing this DMA object. Subsequent DMA cookies must be
1061 	 * retrieved by calling ddi_dma_nextcookie(9F)  the  number  of
1062 	 * times specified by *countp - 1.
1063 	 */
1064 	switch (packet->dma_type) {
1065 #ifdef __sparc
1066 	case USE_DVMA:
1067 		dvma_kaddr_load(packet->tx_dma_handle,
1068 		    (caddr_t)mp->b_rptr, len, 0, &dma_cookie);
1069 
1070 		dvma_sync(packet->tx_dma_handle, 0,
1071 		    DDI_DMA_SYNC_FORDEV);
1072 
1073 		ncookies = 1;
1074 		packet->data_transfer_type = USE_DVMA;
1075 		break;
1076 #endif
1077 	case USE_DMA:
1078 		if ((mystat = ddi_dma_addr_bind_handle(
1079 		    packet->tx_dma_handle, NULL,
1080 		    (caddr_t)mp->b_rptr, len,
1081 		    DDI_DMA_WRITE | DDI_DMA_STREAMING,
1082 		    DDI_DMA_DONTWAIT, 0, &dma_cookie,
1083 		    &ncookies)) != DDI_DMA_MAPPED) {
1084 
1085 			e1000g_log(tx_ring->adapter, CE_WARN,
1086 			    "Couldn't bind mblk buffer to Tx DMA handle: "
1087 			    "return: %X, Pkt: %X\n",
1088 			    mystat, packet);
1089 			return (-1);
1090 		}
1091 
1092 		/*
1093 		 * An implicit ddi_dma_sync() is done when the
1094 		 * ddi_dma_addr_bind_handle() is called. So we
1095 		 * don't need to explicitly call ddi_dma_sync()
1096 		 * here any more.
1097 		 */
1098 		ASSERT(ncookies);
1099 		E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie,
1100 		    (ncookies > 1));
1101 
1102 		/*
1103 		 * The data_transfer_type value must be set after the handle
1104 		 * has been bound, for it will be used in e1000g_free_tx_swpkt()
1105 		 * to decide whether we need to unbind the handle.
1106 		 */
1107 		packet->data_transfer_type = USE_DMA;
1108 		break;
1109 	default:
1110 		ASSERT(B_FALSE);
1111 		break;
1112 	}
1113 
1114 	packet->num_mblk_frag++;
1115 
1116 	/*
1117 	 * Each address could span thru multpile cookie..
1118 	 * Each cookie will have one descriptor
1119 	 */
1120 	for (j = ncookies; j != 0; j--) {
1121 
1122 		desc_count = e1000g_fill_tx_desc(tx_ring,
1123 		    packet,
1124 		    dma_cookie.dmac_laddress,
1125 		    dma_cookie.dmac_size);
1126 
1127 		if (desc_count <= 0)
1128 			return (-1);
1129 
1130 		desc_total += desc_count;
1131 
1132 		/*
1133 		 * ddi_dma_nextcookie() retrieves subsequent DMA
1134 		 * cookies for a DMA object.
1135 		 * ddi_dma_nextcookie() fills in the
1136 		 * ddi_dma_cookie(9S) structure pointed to by
1137 		 * cookiep.  The ddi_dma_cookie(9S) structure
1138 		 * must be allocated prior to calling
1139 		 * ddi_dma_nextcookie(). The DMA cookie count
1140 		 * returned by ddi_dma_buf_bind_handle(9F),
1141 		 * ddi_dma_addr_bind_handle(9F), or
1142 		 * ddi_dma_getwin(9F) indicates the number of DMA
1143 		 * cookies a DMA object consists of.  If the
1144 		 * resulting cookie count, N, is larger than 1,
1145 		 * ddi_dma_nextcookie() must be called N-1 times
1146 		 * to retrieve all DMA cookies.
1147 		 */
1148 		if (j > 1) {
1149 			ddi_dma_nextcookie(packet->tx_dma_handle,
1150 			    &dma_cookie);
1151 		}
1152 	}
1153 
1154 	return (desc_total);
1155 }
1156 
1157 static void
1158 e1000g_fill_context_descriptor(cksum_data_t *cksum,
1159     struct e1000_context_desc *cksum_desc)
1160 {
1161 	if (cksum->cksum_flags & HCK_IPV4_HDRCKSUM) {
1162 		cksum_desc->lower_setup.ip_fields.ipcss =
1163 		    cksum->ether_header_size;
1164 		cksum_desc->lower_setup.ip_fields.ipcso =
1165 		    cksum->ether_header_size +
1166 		    offsetof(struct ip, ip_sum);
1167 		cksum_desc->lower_setup.ip_fields.ipcse =
1168 		    cksum->ether_header_size +
1169 		    sizeof (struct ip) - 1;
1170 	} else
1171 		cksum_desc->lower_setup.ip_config = 0;
1172 
1173 	if (cksum->cksum_flags & HCK_PARTIALCKSUM) {
1174 		/*
1175 		 * The packet with same protocol has the following
1176 		 * stuff and start offset:
1177 		 * |  Protocol  | Stuff  | Start  | Checksum
1178 		 * |		| Offset | Offset | Enable
1179 		 * | IPv4 + TCP |  0x24  |  0x14  |  Yes
1180 		 * | IPv4 + UDP |  0x1A  |  0x14  |  Yes
1181 		 * | IPv6 + TCP |  0x20  |  0x10  |  No
1182 		 * | IPv6 + UDP |  0x14  |  0x10  |  No
1183 		 */
1184 		cksum_desc->upper_setup.tcp_fields.tucss =
1185 		    cksum->cksum_start + cksum->ether_header_size;
1186 		cksum_desc->upper_setup.tcp_fields.tucso =
1187 		    cksum->cksum_stuff + cksum->ether_header_size;
1188 		cksum_desc->upper_setup.tcp_fields.tucse = 0;
1189 	} else
1190 		cksum_desc->upper_setup.tcp_config = 0;
1191 
1192 	cksum_desc->cmd_and_length = E1000_TXD_CMD_DEXT;
1193 
1194 	/*
1195 	 * Zero out the options for TCP Segmentation Offload,
1196 	 * since we don't support it in this version
1197 	 */
1198 	cksum_desc->tcp_seg_setup.data = 0;
1199 }
1200 
1201 static int
1202 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring,
1203     p_tx_sw_packet_t packet, uint64_t address, size_t size)
1204 {
1205 	struct e1000_hw *hw = &tx_ring->adapter->shared;
1206 	p_sw_desc_t desc;
1207 
1208 	if (hw->mac.type == e1000_82544) {
1209 		if (hw->bus.type == e1000_bus_type_pcix)
1210 			return (e1000g_tx_workaround_PCIX_82544(packet,
1211 			    address, size));
1212 
1213 		if (size > JUMBO_FRAG_LENGTH)
1214 			return (e1000g_tx_workaround_jumbo_82544(packet,
1215 			    address, size));
1216 	}
1217 
1218 	ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1219 
1220 	desc = &packet->desc[packet->num_desc];
1221 	desc->address = address;
1222 	desc->length = size;
1223 
1224 	packet->num_desc++;
1225 
1226 	return (1);
1227 }
1228 
1229 static int
1230 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet,
1231     uint64_t address, size_t size)
1232 {
1233 	p_sw_desc_t desc;
1234 	int desc_count;
1235 	long size_left;
1236 	size_t len;
1237 	uint32_t counter;
1238 	uint32_t array_elements;
1239 	desc_array_t desc_array;
1240 
1241 	/*
1242 	 * Coexist Workaround for cordova: RP: 07/04/03
1243 	 *
1244 	 * RP: ERRATA: Workaround ISSUE:
1245 	 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup
1246 	 * Eachbuffer in to 8kb pieces until the
1247 	 * remainder is < 8kb
1248 	 */
1249 	size_left = size;
1250 	desc_count = 0;
1251 
1252 	while (size_left > 0) {
1253 		if (size_left > MAX_TX_BUF_SIZE)
1254 			len = MAX_TX_BUF_SIZE;
1255 		else
1256 			len = size_left;
1257 
1258 		array_elements = e1000g_fill_82544_desc(address,
1259 		    len, &desc_array);
1260 
1261 		for (counter = 0; counter < array_elements; counter++) {
1262 			ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1263 			/*
1264 			 * Put in the buffer address
1265 			 */
1266 			desc = &packet->desc[packet->num_desc];
1267 
1268 			desc->address =
1269 			    desc_array.descriptor[counter].address;
1270 			desc->length =
1271 			    desc_array.descriptor[counter].length;
1272 
1273 			packet->num_desc++;
1274 			desc_count++;
1275 		} /* for */
1276 
1277 		/*
1278 		 * Update the buffer address and length
1279 		 */
1280 		address += MAX_TX_BUF_SIZE;
1281 		size_left -= MAX_TX_BUF_SIZE;
1282 	} /* while */
1283 
1284 	return (desc_count);
1285 }
1286 
1287 static int
1288 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet,
1289     uint64_t address, size_t size)
1290 {
1291 	p_sw_desc_t desc;
1292 	int desc_count;
1293 	long size_left;
1294 	uint32_t offset;
1295 
1296 	/*
1297 	 * Workaround for Jumbo Frames on Cordova
1298 	 * PSD 06/01/2001
1299 	 */
1300 	size_left = size;
1301 	desc_count = 0;
1302 	offset = 0;
1303 	while (size_left > 0) {
1304 		ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1305 
1306 		desc = &packet->desc[packet->num_desc];
1307 
1308 		desc->address = address + offset;
1309 
1310 		if (size_left > JUMBO_FRAG_LENGTH)
1311 			desc->length = JUMBO_FRAG_LENGTH;
1312 		else
1313 			desc->length = size_left;
1314 
1315 		packet->num_desc++;
1316 		desc_count++;
1317 
1318 		offset += desc->length;
1319 		size_left -= JUMBO_FRAG_LENGTH;
1320 	}
1321 
1322 	return (desc_count);
1323 }
1324 
1325 #pragma inline(e1000g_82547_tx_move_tail_work)
1326 
1327 static void
1328 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring)
1329 {
1330 	struct e1000_hw *hw;
1331 	uint16_t hw_tdt;
1332 	uint16_t sw_tdt;
1333 	struct e1000_tx_desc *tx_desc;
1334 	uint16_t length = 0;
1335 	boolean_t eop = B_FALSE;
1336 	struct e1000g *Adapter;
1337 
1338 	Adapter = tx_ring->adapter;
1339 	hw = &Adapter->shared;
1340 
1341 	hw_tdt = E1000_READ_REG(hw, E1000_TDT(0));
1342 	sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first;
1343 
1344 	while (hw_tdt != sw_tdt) {
1345 		tx_desc = &(tx_ring->tbd_first[hw_tdt]);
1346 		length += tx_desc->lower.flags.length;
1347 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1348 		if (++hw_tdt == Adapter->tx_desc_num)
1349 			hw_tdt = 0;
1350 
1351 		if (eop) {
1352 			if ((Adapter->link_duplex == HALF_DUPLEX) &&
1353 			    (e1000_fifo_workaround_82547(hw, length)
1354 			    != E1000_SUCCESS)) {
1355 				if (tx_ring->timer_enable_82547) {
1356 					ASSERT(tx_ring->timer_id_82547 == 0);
1357 					tx_ring->timer_id_82547 =
1358 					    timeout(e1000g_82547_timeout,
1359 					    (void *)tx_ring,
1360 					    drv_usectohz(10000));
1361 				}
1362 				return;
1363 
1364 			} else {
1365 				E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt);
1366 				e1000_update_tx_fifo_head_82547(hw, length);
1367 				length = 0;
1368 			}
1369 		}
1370 	}
1371 }
1372 
1373 static void
1374 e1000g_82547_timeout(void *arg)
1375 {
1376 	e1000g_tx_ring_t *tx_ring;
1377 
1378 	tx_ring = (e1000g_tx_ring_t *)arg;
1379 
1380 	mutex_enter(&tx_ring->tx_lock);
1381 
1382 	tx_ring->timer_id_82547 = 0;
1383 	e1000g_82547_tx_move_tail_work(tx_ring);
1384 
1385 	mutex_exit(&tx_ring->tx_lock);
1386 }
1387 
1388 static void
1389 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring)
1390 {
1391 	timeout_id_t tid;
1392 
1393 	ASSERT(MUTEX_HELD(&tx_ring->tx_lock));
1394 
1395 	tid = tx_ring->timer_id_82547;
1396 	tx_ring->timer_id_82547 = 0;
1397 	if (tid != 0) {
1398 		tx_ring->timer_enable_82547 = B_FALSE;
1399 		mutex_exit(&tx_ring->tx_lock);
1400 
1401 		(void) untimeout(tid);
1402 
1403 		mutex_enter(&tx_ring->tx_lock);
1404 	}
1405 	tx_ring->timer_enable_82547 = B_TRUE;
1406 	e1000g_82547_tx_move_tail_work(tx_ring);
1407 }
1408