xref: /titanic_50/usr/src/uts/common/io/e1000g/e1000g_tx.c (revision 64012b183780cacb63ca9686d771578f883ac119)
1 /*
2  * This file is provided under a CDDLv1 license.  When using or
3  * redistributing this file, you may do so under this license.
4  * In redistributing this file this license must be included
5  * and no other modification of this header file is permitted.
6  *
7  * CDDL LICENSE SUMMARY
8  *
9  * Copyright(c) 1999 - 2008 Intel Corporation. All rights reserved.
10  *
11  * The contents of this file are subject to the terms of Version
12  * 1.0 of the Common Development and Distribution License (the "License").
13  *
14  * You should have received a copy of the License with this software.
15  * You can obtain a copy of the License at
16  *	http://www.opensolaris.org/os/licensing.
17  * See the License for the specific language governing permissions
18  * and limitations under the License.
19  */
20 
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms of the CDDLv1.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * **********************************************************************
30  *									*
31  * Module Name:								*
32  *   e1000g_tx.c							*
33  *									*
34  * Abstract:								*
35  *   This file contains some routines that take care of Transmit,	*
36  *   make the hardware to send the data pointed by the packet out	*
37  *   on to the physical medium.						*
38  *									*
39  * **********************************************************************
40  */
41 
42 #include "e1000g_sw.h"
43 #include "e1000g_debug.h"
44 
45 static boolean_t e1000g_send(struct e1000g *, mblk_t *);
46 static int e1000g_tx_copy(e1000g_tx_ring_t *,
47     p_tx_sw_packet_t, mblk_t *, boolean_t);
48 static int e1000g_tx_bind(e1000g_tx_ring_t *,
49     p_tx_sw_packet_t, mblk_t *);
50 static boolean_t e1000g_retreive_context(mblk_t *, context_data_t *, size_t);
51 static boolean_t e1000g_check_context(e1000g_tx_ring_t *, context_data_t *);
52 static int e1000g_fill_tx_ring(e1000g_tx_ring_t *, LIST_DESCRIBER *,
53     context_data_t *);
54 static void e1000g_fill_context_descriptor(context_data_t *,
55     struct e1000_context_desc *);
56 static int e1000g_fill_tx_desc(e1000g_tx_ring_t *,
57     p_tx_sw_packet_t, uint64_t, size_t);
58 static uint32_t e1000g_fill_82544_desc(uint64_t Address, size_t Length,
59     p_desc_array_t desc_array);
60 static int e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t, uint64_t, size_t);
61 static int e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t, uint64_t, size_t);
62 static void e1000g_82547_timeout(void *);
63 static void e1000g_82547_tx_move_tail(e1000g_tx_ring_t *);
64 static void e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *);
65 
66 #ifndef E1000G_DEBUG
67 #pragma inline(e1000g_tx_copy)
68 #pragma inline(e1000g_tx_bind)
69 #pragma inline(e1000g_retreive_context)
70 #pragma inline(e1000g_check_context)
71 #pragma inline(e1000g_fill_tx_ring)
72 #pragma inline(e1000g_fill_context_descriptor)
73 #pragma inline(e1000g_fill_tx_desc)
74 #pragma inline(e1000g_fill_82544_desc)
75 #pragma inline(e1000g_tx_workaround_PCIX_82544)
76 #pragma inline(e1000g_tx_workaround_jumbo_82544)
77 #pragma inline(e1000g_free_tx_swpkt)
78 #endif
79 
80 /*
81  * e1000g_free_tx_swpkt	- free up the tx sw packet
82  *
83  * Unbind the previously bound DMA handle for a given
84  * transmit sw packet. And reset the sw packet data.
85  */
86 void
87 e1000g_free_tx_swpkt(register p_tx_sw_packet_t packet)
88 {
89 	switch (packet->data_transfer_type) {
90 	case USE_BCOPY:
91 		packet->tx_buf->len = 0;
92 		break;
93 #ifdef __sparc
94 	case USE_DVMA:
95 		dvma_unload(packet->tx_dma_handle, 0, -1);
96 		break;
97 #endif
98 	case USE_DMA:
99 		ddi_dma_unbind_handle(packet->tx_dma_handle);
100 		break;
101 	default:
102 		break;
103 	}
104 
105 	/*
106 	 * The mblk has been stripped off the sw packet
107 	 * and will be freed in a triggered soft intr.
108 	 */
109 	ASSERT(packet->mp == NULL);
110 
111 	packet->data_transfer_type = USE_NONE;
112 	packet->num_mblk_frag = 0;
113 	packet->num_desc = 0;
114 }
115 
116 mblk_t *
117 e1000g_m_tx(void *arg, mblk_t *mp)
118 {
119 	struct e1000g *Adapter = (struct e1000g *)arg;
120 	mblk_t *next;
121 
122 	rw_enter(&Adapter->chip_lock, RW_READER);
123 
124 	if ((Adapter->chip_state != E1000G_START) ||
125 	    (Adapter->link_state != LINK_STATE_UP)) {
126 		freemsgchain(mp);
127 		mp = NULL;
128 	}
129 
130 	while (mp != NULL) {
131 		next = mp->b_next;
132 		mp->b_next = NULL;
133 
134 		if (!e1000g_send(Adapter, mp)) {
135 			mp->b_next = next;
136 			break;
137 		}
138 
139 		mp = next;
140 	}
141 
142 	rw_exit(&Adapter->chip_lock);
143 	return (mp);
144 }
145 
146 /*
147  * e1000g_send -  send packets onto the wire
148  *
149  * Called from e1000g_m_tx with an mblk ready to send. this
150  * routine sets up the transmit descriptors and sends data to
151  * the wire. It also pushes the just transmitted packet to
152  * the used tx sw packet list.
153  */
154 static boolean_t
155 e1000g_send(struct e1000g *Adapter, mblk_t *mp)
156 {
157 	struct e1000_hw *hw;
158 	p_tx_sw_packet_t packet;
159 	LIST_DESCRIBER pending_list;
160 	size_t len;
161 	size_t msg_size;
162 	uint32_t frag_count;
163 	int desc_count;
164 	uint32_t desc_total;
165 	boolean_t tx_undersize_flag;
166 	mblk_t *nmp;
167 	mblk_t *tmp;
168 	e1000g_tx_ring_t *tx_ring;
169 	context_data_t cur_context;
170 
171 	hw = &Adapter->shared;
172 	tx_ring = Adapter->tx_ring;
173 
174 	/* Get the total size and frags number of the message */
175 	tx_undersize_flag = B_FALSE;
176 	frag_count = 0;
177 	msg_size = 0;
178 	for (nmp = mp; nmp; nmp = nmp->b_cont) {
179 		frag_count++;
180 		msg_size += MBLKL(nmp);
181 	}
182 
183 	/* retreive and compute information for context descriptor */
184 	if (!e1000g_retreive_context(mp, &cur_context, msg_size)) {
185 		freemsg(mp);
186 		return (B_TRUE);
187 	}
188 
189 	/*
190 	 * Make sure the packet is less than the allowed size
191 	 */
192 	if (!cur_context.lso_flag &&
193 	    (msg_size > Adapter->max_frame_size - ETHERFCSL)) {
194 		/*
195 		 * For the over size packet, we'll just drop it.
196 		 * So we return B_TRUE here.
197 		 */
198 		E1000G_DEBUGLOG_1(Adapter, E1000G_WARN_LEVEL,
199 		    "Tx packet out of bound. length = %d \n", msg_size);
200 		E1000G_STAT(tx_ring->stat_over_size);
201 		freemsg(mp);
202 		return (B_TRUE);
203 	}
204 
205 	/*
206 	 * Check and reclaim tx descriptors.
207 	 * This low water mark check should be done all the time as
208 	 * Transmit interrupt delay can produce Transmit interrupts little
209 	 * late and that may cause few problems related to reaping Tx
210 	 * Descriptors... As you may run short of them before getting any
211 	 * transmit interrupt...
212 	 */
213 	if (tx_ring->resched_needed ||
214 	    (tx_ring->tbd_avail < Adapter->tx_recycle_thresh)) {
215 		(void) e1000g_recycle(tx_ring);
216 		E1000G_DEBUG_STAT(tx_ring->stat_recycle);
217 
218 		if (tx_ring->tbd_avail < DEFAULT_TX_NO_RESOURCE) {
219 			E1000G_DEBUG_STAT(tx_ring->stat_lack_desc);
220 			goto tx_no_resource;
221 		}
222 	}
223 
224 	/*
225 	 * If the message size is less than the minimum ethernet packet size,
226 	 * we'll use bcopy to send it, and padd it to 60 bytes later.
227 	 */
228 	if (msg_size < ETHERMIN) {
229 		E1000G_DEBUG_STAT(tx_ring->stat_under_size);
230 		tx_undersize_flag = B_TRUE;
231 	}
232 
233 	/* Initialize variables */
234 	desc_count = 1;	/* The initial value should be greater than 0 */
235 	desc_total = 0;
236 	QUEUE_INIT_LIST(&pending_list);
237 
238 	/* Process each mblk fragment and fill tx descriptors */
239 	packet = NULL;
240 	nmp = mp;
241 	while (nmp) {
242 		tmp = nmp->b_cont;
243 
244 		len = MBLKL(nmp);
245 		/* Check zero length mblks */
246 		if (len == 0) {
247 			E1000G_DEBUG_STAT(tx_ring->stat_empty_frags);
248 			/*
249 			 * If there're no packet buffers have been used,
250 			 * or we just completed processing a buffer, then
251 			 * skip the empty mblk fragment.
252 			 * Otherwise, there's still a pending buffer that
253 			 * needs to be processed (tx_copy).
254 			 */
255 			if (desc_count > 0) {
256 				nmp = tmp;
257 				continue;
258 			}
259 		}
260 
261 		/*
262 		 * Get a new TxSwPacket to process mblk buffers.
263 		 */
264 		if (desc_count > 0) {
265 			mutex_enter(&tx_ring->freelist_lock);
266 			packet = (p_tx_sw_packet_t)
267 			    QUEUE_POP_HEAD(&tx_ring->free_list);
268 			mutex_exit(&tx_ring->freelist_lock);
269 
270 			if (packet == NULL) {
271 				E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
272 				    "No Tx SwPacket available\n");
273 				E1000G_STAT(tx_ring->stat_no_swpkt);
274 				goto tx_send_failed;
275 			}
276 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
277 		}
278 
279 		ASSERT(packet);
280 		/*
281 		 * If the size of the fragment is less than the tx_bcopy_thresh
282 		 * we'll use bcopy; Otherwise, we'll use DMA binding.
283 		 */
284 		if ((len <= Adapter->tx_bcopy_thresh) || tx_undersize_flag) {
285 			desc_count =
286 			    e1000g_tx_copy(tx_ring, packet, nmp,
287 			    tx_undersize_flag);
288 			E1000G_DEBUG_STAT(tx_ring->stat_copy);
289 		} else {
290 			desc_count =
291 			    e1000g_tx_bind(tx_ring, packet, nmp);
292 			E1000G_DEBUG_STAT(tx_ring->stat_bind);
293 		}
294 
295 		if (desc_count > 0)
296 			desc_total += desc_count;
297 		else if (desc_count < 0)
298 			goto tx_send_failed;
299 
300 		nmp = tmp;
301 	}
302 
303 	/* Assign the message to the last sw packet */
304 	ASSERT(packet);
305 	ASSERT(packet->mp == NULL);
306 	packet->mp = mp;
307 
308 	/* Try to recycle the tx descriptors again */
309 	if (tx_ring->tbd_avail < (desc_total + 2)) {
310 		E1000G_DEBUG_STAT(tx_ring->stat_recycle_retry);
311 		(void) e1000g_recycle(tx_ring);
312 	}
313 
314 	mutex_enter(&tx_ring->tx_lock);
315 
316 	/*
317 	 * If the number of available tx descriptors is not enough for transmit
318 	 * (one redundant descriptor and one hw checksum context descriptor are
319 	 * included), then return failure.
320 	 */
321 	if (tx_ring->tbd_avail < (desc_total + 2)) {
322 		E1000G_DEBUGLOG_0(Adapter, E1000G_INFO_LEVEL,
323 		    "No Enough Tx descriptors\n");
324 		E1000G_STAT(tx_ring->stat_no_desc);
325 		mutex_exit(&tx_ring->tx_lock);
326 		goto tx_send_failed;
327 	}
328 
329 	desc_count = e1000g_fill_tx_ring(tx_ring, &pending_list, &cur_context);
330 
331 	mutex_exit(&tx_ring->tx_lock);
332 
333 	ASSERT(desc_count > 0);
334 
335 	/* Send successful */
336 	return (B_TRUE);
337 
338 tx_send_failed:
339 	/*
340 	 * Enable Transmit interrupts, so that the interrupt routine can
341 	 * call mac_tx_update() when transmit descriptors become available.
342 	 */
343 	tx_ring->resched_needed = B_TRUE;
344 	if (!Adapter->tx_intr_enable)
345 		e1000g_mask_tx_interrupt(Adapter);
346 
347 	/* Free pending TxSwPackets */
348 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list);
349 	while (packet) {
350 		packet->mp = NULL;
351 		e1000g_free_tx_swpkt(packet);
352 		packet = (p_tx_sw_packet_t)
353 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
354 	}
355 
356 	/* Return pending TxSwPackets to the "Free" list */
357 	mutex_enter(&tx_ring->freelist_lock);
358 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
359 	mutex_exit(&tx_ring->freelist_lock);
360 
361 	E1000G_STAT(tx_ring->stat_send_fail);
362 
363 	/* Message will be scheduled for re-transmit */
364 	return (B_FALSE);
365 
366 tx_no_resource:
367 	/*
368 	 * Enable Transmit interrupts, so that the interrupt routine can
369 	 * call mac_tx_update() when transmit descriptors become available.
370 	 */
371 	tx_ring->resched_needed = B_TRUE;
372 	if (!Adapter->tx_intr_enable)
373 		e1000g_mask_tx_interrupt(Adapter);
374 
375 	/* Message will be scheduled for re-transmit */
376 	return (B_FALSE);
377 }
378 
379 static boolean_t
380 e1000g_retreive_context(mblk_t *mp, context_data_t *cur_context,
381     size_t msg_size)
382 {
383 	uintptr_t ip_start;
384 	uintptr_t tcp_start;
385 	mblk_t *nmp;
386 
387 	bzero(cur_context, sizeof (context_data_t));
388 
389 	/* retrieve checksum info */
390 	hcksum_retrieve(mp, NULL, NULL, &cur_context->cksum_start,
391 	    &cur_context->cksum_stuff, NULL, NULL, &cur_context->cksum_flags);
392 	/* retreive ethernet header size */
393 	if (((struct ether_vlan_header *)mp->b_rptr)->ether_tpid ==
394 	    htons(ETHERTYPE_VLAN))
395 		cur_context->ether_header_size =
396 		    sizeof (struct ether_vlan_header);
397 	else
398 		cur_context->ether_header_size =
399 		    sizeof (struct ether_header);
400 
401 	if (cur_context->cksum_flags & HW_LSO) {
402 		if ((cur_context->mss = DB_LSOMSS(mp)) != 0) {
403 			/* free the invaid packet */
404 			if (!((cur_context->cksum_flags & HCK_PARTIALCKSUM) &&
405 			    (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM))) {
406 				return (B_FALSE);
407 			}
408 			cur_context->lso_flag = B_TRUE;
409 			/*
410 			 * Some fields are cleared for the hardware to fill
411 			 * in. We don't assume Ethernet header, IP header and
412 			 * TCP header are always in the same mblk fragment,
413 			 * while we assume each header is always within one
414 			 * mblk fragment and Ethernet header is always in the
415 			 * first mblk fragment.
416 			 */
417 			nmp = mp;
418 			ip_start = (uintptr_t)(nmp->b_rptr)
419 			    + cur_context->ether_header_size;
420 			if (ip_start >= (uintptr_t)(nmp->b_wptr)) {
421 				ip_start = (uintptr_t)nmp->b_cont->b_rptr
422 				    + (ip_start - (uintptr_t)(nmp->b_wptr));
423 				nmp = nmp->b_cont;
424 			}
425 			tcp_start = ip_start +
426 			    IPH_HDR_LENGTH((ipha_t *)ip_start);
427 			if (tcp_start >= (uintptr_t)(nmp->b_wptr)) {
428 				tcp_start = (uintptr_t)nmp->b_cont->b_rptr
429 				    + (tcp_start - (uintptr_t)(nmp->b_wptr));
430 				nmp = nmp->b_cont;
431 			}
432 			cur_context->hdr_len = cur_context->ether_header_size
433 			    + IPH_HDR_LENGTH((ipha_t *)ip_start)
434 			    + TCP_HDR_LENGTH((tcph_t *)tcp_start);
435 			((ipha_t *)ip_start)->ipha_length = 0;
436 			((ipha_t *)ip_start)->ipha_hdr_checksum = 0;
437 			/* calculate the TCP packet payload length */
438 			cur_context->pay_len = msg_size - cur_context->hdr_len;
439 		}
440 	}
441 	return (B_TRUE);
442 }
443 
444 static boolean_t
445 e1000g_check_context(e1000g_tx_ring_t *tx_ring, context_data_t *cur_context)
446 {
447 	boolean_t context_reload;
448 	context_data_t *pre_context;
449 	struct e1000g *Adapter;
450 
451 	context_reload = B_FALSE;
452 	pre_context = &tx_ring->pre_context;
453 	Adapter = tx_ring->adapter;
454 
455 	/*
456 	 * The following code determine if the context descriptor is
457 	 * needed to be reloaded. The sequence of the conditions is
458 	 * made by their possibilities of changing.
459 	 */
460 	/*
461 	 * workaround for 82546EB, context descriptor must be reloaded
462 	 * per LSO/hw_cksum packet if LSO is enabled.
463 	 */
464 	if (Adapter->lso_premature_issue &&
465 	    Adapter->lso_enable &&
466 	    (cur_context->cksum_flags != 0)) {
467 
468 		context_reload = B_TRUE;
469 	} else if (cur_context->lso_flag) {
470 		if ((cur_context->cksum_flags != pre_context->cksum_flags) ||
471 		    (cur_context->pay_len != pre_context->pay_len) ||
472 		    (cur_context->mss != pre_context->mss) ||
473 		    (cur_context->hdr_len != pre_context->hdr_len) ||
474 		    (cur_context->cksum_stuff != pre_context->cksum_stuff) ||
475 		    (cur_context->cksum_start != pre_context->cksum_start) ||
476 		    (cur_context->ether_header_size !=
477 		    pre_context->ether_header_size)) {
478 
479 			context_reload = B_TRUE;
480 		}
481 	} else if (cur_context->cksum_flags != 0) {
482 		if ((cur_context->cksum_flags != pre_context->cksum_flags) ||
483 		    (cur_context->cksum_stuff != pre_context->cksum_stuff) ||
484 		    (cur_context->cksum_start != pre_context->cksum_start) ||
485 		    (cur_context->ether_header_size !=
486 		    pre_context->ether_header_size)) {
487 
488 			context_reload = B_TRUE;
489 		}
490 	}
491 
492 	return (context_reload);
493 }
494 
495 static int
496 e1000g_fill_tx_ring(e1000g_tx_ring_t *tx_ring, LIST_DESCRIBER *pending_list,
497     context_data_t *cur_context)
498 {
499 	struct e1000g *Adapter;
500 	struct e1000_hw *hw;
501 	p_tx_sw_packet_t first_packet;
502 	p_tx_sw_packet_t packet;
503 	p_tx_sw_packet_t previous_packet;
504 	boolean_t context_reload;
505 	struct e1000_tx_desc *first_data_desc;
506 	struct e1000_tx_desc *next_desc;
507 	struct e1000_tx_desc *descriptor;
508 	int desc_count;
509 	boolean_t buff_overrun_flag;
510 	int i;
511 
512 	Adapter = tx_ring->adapter;
513 	hw = &Adapter->shared;
514 
515 	desc_count = 0;
516 	first_packet = NULL;
517 	first_data_desc = NULL;
518 	descriptor = NULL;
519 	first_packet = NULL;
520 	packet = NULL;
521 	buff_overrun_flag = B_FALSE;
522 
523 	next_desc = tx_ring->tbd_next;
524 
525 	/* Context descriptor reload check */
526 	context_reload = e1000g_check_context(tx_ring, cur_context);
527 
528 	if (context_reload) {
529 		first_packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
530 
531 		descriptor = next_desc;
532 
533 		e1000g_fill_context_descriptor(cur_context,
534 		    (struct e1000_context_desc *)descriptor);
535 
536 		/* Check the wrap-around case */
537 		if (descriptor == tx_ring->tbd_last)
538 			next_desc = tx_ring->tbd_first;
539 		else
540 			next_desc++;
541 
542 		desc_count++;
543 	}
544 
545 	first_data_desc = next_desc;
546 
547 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(pending_list);
548 	while (packet) {
549 		ASSERT(packet->num_desc);
550 
551 		for (i = 0; i < packet->num_desc; i++) {
552 			ASSERT(tx_ring->tbd_avail > 0);
553 
554 			descriptor = next_desc;
555 			descriptor->buffer_addr =
556 			    packet->desc[i].address;
557 			descriptor->lower.data =
558 			    packet->desc[i].length;
559 
560 			/* Zero out status */
561 			descriptor->upper.data = 0;
562 
563 			descriptor->lower.data |=
564 			    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
565 			/* must set RS on every outgoing descriptor */
566 			descriptor->lower.data |=
567 			    E1000_TXD_CMD_RS;
568 
569 			if (cur_context->lso_flag)
570 				descriptor->lower.data |= E1000_TXD_CMD_TSE;
571 
572 			/* Check the wrap-around case */
573 			if (descriptor == tx_ring->tbd_last)
574 				next_desc = tx_ring->tbd_first;
575 			else
576 				next_desc++;
577 
578 			desc_count++;
579 
580 			/*
581 			 * workaround for 82546EB errata 33, hang in PCI-X
582 			 * systems due to 2k Buffer Overrun during Transmit
583 			 * Operation. The workaround applies to all the Intel
584 			 * PCI-X chips.
585 			 */
586 			if (hw->bus.type == e1000_bus_type_pcix &&
587 			    descriptor == first_data_desc &&
588 			    ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK)
589 			    > E1000_TX_BUFFER_OEVRRUN_THRESHOLD)) {
590 				/* modified the first descriptor */
591 				descriptor->lower.data &=
592 				    ~E1000G_TBD_LENGTH_MASK;
593 				descriptor->lower.flags.length =
594 				    E1000_TX_BUFFER_OEVRRUN_THRESHOLD;
595 
596 				/* insert a new descriptor */
597 				ASSERT(tx_ring->tbd_avail > 0);
598 				next_desc->buffer_addr =
599 				    packet->desc[0].address +
600 				    E1000_TX_BUFFER_OEVRRUN_THRESHOLD;
601 				next_desc->lower.data =
602 				    packet->desc[0].length -
603 				    E1000_TX_BUFFER_OEVRRUN_THRESHOLD;
604 
605 				/* Zero out status */
606 				next_desc->upper.data = 0;
607 
608 				next_desc->lower.data |=
609 				    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
610 				/* must set RS on every outgoing descriptor */
611 				next_desc->lower.data |=
612 				    E1000_TXD_CMD_RS;
613 
614 				if (cur_context->lso_flag)
615 					next_desc->lower.data |=
616 					    E1000_TXD_CMD_TSE;
617 
618 				descriptor = next_desc;
619 
620 				/* Check the wrap-around case */
621 				if (next_desc == tx_ring->tbd_last)
622 					next_desc = tx_ring->tbd_first;
623 				else
624 					next_desc++;
625 
626 				desc_count++;
627 				buff_overrun_flag = B_TRUE;
628 			}
629 		}
630 
631 		if (buff_overrun_flag) {
632 			packet->num_desc++;
633 			buff_overrun_flag = B_FALSE;
634 		}
635 
636 		if (first_packet != NULL) {
637 			/*
638 			 * Count the checksum context descriptor for
639 			 * the first SwPacket.
640 			 */
641 			first_packet->num_desc++;
642 			first_packet = NULL;
643 		}
644 
645 		previous_packet = packet;
646 		packet = (p_tx_sw_packet_t)
647 		    QUEUE_GET_NEXT(pending_list, &packet->Link);
648 	}
649 
650 	/*
651 	 * workaround for 82546EB errata 21, LSO Premature Descriptor Write Back
652 	 */
653 	if (Adapter->lso_premature_issue && cur_context->lso_flag &&
654 	    ((descriptor->lower.data & E1000G_TBD_LENGTH_MASK) > 8)) {
655 		/* modified the previous descriptor */
656 		descriptor->lower.data -= 4;
657 
658 		/* insert a new descriptor */
659 		ASSERT(tx_ring->tbd_avail > 0);
660 		/* the lower 20 bits of lower.data is the length field */
661 		next_desc->buffer_addr =
662 		    descriptor->buffer_addr +
663 		    (descriptor->lower.data & E1000G_TBD_LENGTH_MASK);
664 		next_desc->lower.data = 4;
665 
666 		/* Zero out status */
667 		next_desc->upper.data = 0;
668 		/* It must be part of a LSO packet */
669 		next_desc->lower.data |=
670 		    E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |
671 		    E1000_TXD_CMD_RS | E1000_TXD_CMD_TSE;
672 
673 		descriptor = next_desc;
674 
675 		/* Check the wrap-around case */
676 		if (descriptor == tx_ring->tbd_last)
677 			next_desc = tx_ring->tbd_first;
678 		else
679 			next_desc++;
680 
681 		desc_count++;
682 		/* update the number of descriptors */
683 		previous_packet->num_desc++;
684 	}
685 
686 	ASSERT(descriptor);
687 
688 	if (cur_context->cksum_flags) {
689 		if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM)
690 			((struct e1000_data_desc *)first_data_desc)->
691 			    upper.fields.popts |= E1000_TXD_POPTS_IXSM;
692 		if (cur_context->cksum_flags & HCK_PARTIALCKSUM)
693 			((struct e1000_data_desc *)first_data_desc)->
694 			    upper.fields.popts |= E1000_TXD_POPTS_TXSM;
695 	}
696 
697 	/*
698 	 * Last Descriptor of Packet needs End Of Packet (EOP), Report
699 	 * Status (RS) set.
700 	 */
701 	if (Adapter->tx_intr_delay) {
702 		descriptor->lower.data |= E1000_TXD_CMD_IDE |
703 		    E1000_TXD_CMD_EOP;
704 	} else {
705 		descriptor->lower.data |= E1000_TXD_CMD_EOP;
706 	}
707 
708 	/* Set append Ethernet CRC (IFCS) bits */
709 	if (cur_context->lso_flag) {
710 		first_data_desc->lower.data |= E1000_TXD_CMD_IFCS;
711 	} else {
712 		descriptor->lower.data |= E1000_TXD_CMD_IFCS;
713 	}
714 
715 	/*
716 	 * Sync the Tx descriptors DMA buffer
717 	 */
718 	(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
719 	    0, 0, DDI_DMA_SYNC_FORDEV);
720 
721 	tx_ring->tbd_next = next_desc;
722 
723 	/*
724 	 * Advance the Transmit Descriptor Tail (Tdt), this tells the
725 	 * FX1000 that this frame is available to transmit.
726 	 */
727 	if (hw->mac.type == e1000_82547)
728 		e1000g_82547_tx_move_tail(tx_ring);
729 	else
730 		E1000_WRITE_REG(hw, E1000_TDT(0),
731 		    (uint32_t)(next_desc - tx_ring->tbd_first));
732 
733 	if (e1000g_check_acc_handle(Adapter->osdep.reg_handle) != DDI_FM_OK) {
734 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
735 		Adapter->chip_state = E1000G_ERROR;
736 	}
737 
738 	/* Put the pending SwPackets to the "Used" list */
739 	mutex_enter(&tx_ring->usedlist_lock);
740 	QUEUE_APPEND(&tx_ring->used_list, pending_list);
741 	tx_ring->tbd_avail -= desc_count;
742 	mutex_exit(&tx_ring->usedlist_lock);
743 
744 	/* update LSO related data */
745 	if (context_reload)
746 		tx_ring->pre_context = *cur_context;
747 
748 	return (desc_count);
749 }
750 
751 
752 /*
753  * e1000g_tx_setup - setup tx data structures
754  *
755  * This routine initializes all of the transmit related
756  * structures. This includes the Transmit descriptors,
757  * and the tx_sw_packet structures.
758  */
759 void
760 e1000g_tx_setup(struct e1000g *Adapter)
761 {
762 	struct e1000_hw *hw;
763 	p_tx_sw_packet_t packet;
764 	UINT i;
765 	uint32_t buf_high;
766 	uint32_t buf_low;
767 	uint32_t reg_tipg;
768 	uint32_t reg_tctl;
769 	uint32_t reg_tarc;
770 	uint16_t speed, duplex;
771 	int size;
772 	e1000g_tx_ring_t *tx_ring;
773 
774 	hw = &Adapter->shared;
775 	tx_ring = Adapter->tx_ring;
776 
777 	/* init the lists */
778 	/*
779 	 * Here we don't need to protect the lists using the
780 	 * usedlist_lock and freelist_lock, for they have
781 	 * been protected by the chip_lock.
782 	 */
783 	QUEUE_INIT_LIST(&tx_ring->used_list);
784 	QUEUE_INIT_LIST(&tx_ring->free_list);
785 
786 	/* Go through and set up each SW_Packet */
787 	packet = tx_ring->packet_area;
788 	for (i = 0; i < Adapter->tx_freelist_num; i++, packet++) {
789 		/* Initialize this tx_sw_apcket area */
790 		e1000g_free_tx_swpkt(packet);
791 		/* Add this tx_sw_packet to the free list */
792 		QUEUE_PUSH_TAIL(&tx_ring->free_list,
793 		    &packet->Link);
794 	}
795 
796 	/* Setup TX descriptor pointers */
797 	tx_ring->tbd_next = tx_ring->tbd_first;
798 	tx_ring->tbd_oldest = tx_ring->tbd_first;
799 
800 	/*
801 	 * Setup Hardware TX Registers
802 	 */
803 	/* Setup the Transmit Control Register (TCTL). */
804 	reg_tctl = E1000_TCTL_PSP | E1000_TCTL_EN |
805 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT) |
806 	    (E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT) |
807 	    E1000_TCTL_RTLC;
808 
809 	/* Enable the MULR bit */
810 	if (hw->bus.type == e1000_bus_type_pci_express)
811 		reg_tctl |= E1000_TCTL_MULR;
812 
813 	E1000_WRITE_REG(hw, E1000_TCTL, reg_tctl);
814 
815 	if ((hw->mac.type == e1000_82571) || (hw->mac.type == e1000_82572)) {
816 		e1000_get_speed_and_duplex(hw, &speed, &duplex);
817 
818 		reg_tarc = E1000_READ_REG(hw, E1000_TARC(0));
819 		reg_tarc |= (1 << 25);
820 		if (speed == SPEED_1000)
821 			reg_tarc |= (1 << 21);
822 		E1000_WRITE_REG(hw, E1000_TARC(0), reg_tarc);
823 
824 		reg_tarc = E1000_READ_REG(hw, E1000_TARC(1));
825 		reg_tarc |= (1 << 25);
826 		if (reg_tctl & E1000_TCTL_MULR)
827 			reg_tarc &= ~(1 << 28);
828 		else
829 			reg_tarc |= (1 << 28);
830 		E1000_WRITE_REG(hw, E1000_TARC(1), reg_tarc);
831 
832 	} else if (hw->mac.type == e1000_80003es2lan) {
833 		reg_tarc = E1000_READ_REG(hw, E1000_TARC(0));
834 		reg_tarc |= 1;
835 		if (hw->phy.media_type == e1000_media_type_internal_serdes)
836 			reg_tarc |= (1 << 20);
837 		E1000_WRITE_REG(hw, E1000_TARC(0), reg_tarc);
838 
839 		reg_tarc = E1000_READ_REG(hw, E1000_TARC(1));
840 		reg_tarc |= 1;
841 		E1000_WRITE_REG(hw, E1000_TARC(1), reg_tarc);
842 	}
843 
844 	/* Setup HW Base and Length of Tx descriptor area */
845 	size = (Adapter->tx_desc_num * sizeof (struct e1000_tx_desc));
846 	E1000_WRITE_REG(hw, E1000_TDLEN(0), size);
847 	size = E1000_READ_REG(hw, E1000_TDLEN(0));
848 
849 	buf_low = (uint32_t)tx_ring->tbd_dma_addr;
850 	buf_high = (uint32_t)(tx_ring->tbd_dma_addr >> 32);
851 
852 	E1000_WRITE_REG(hw, E1000_TDBAL(0), buf_low);
853 	E1000_WRITE_REG(hw, E1000_TDBAH(0), buf_high);
854 
855 	/* Setup our HW Tx Head & Tail descriptor pointers */
856 	E1000_WRITE_REG(hw, E1000_TDH(0), 0);
857 	E1000_WRITE_REG(hw, E1000_TDT(0), 0);
858 
859 	/* Set the default values for the Tx Inter Packet Gap timer */
860 	if ((hw->mac.type == e1000_82542) &&
861 	    ((hw->revision_id == E1000_REVISION_2) ||
862 	    (hw->revision_id == E1000_REVISION_3))) {
863 		reg_tipg = DEFAULT_82542_TIPG_IPGT;
864 		reg_tipg |=
865 		    DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
866 		reg_tipg |=
867 		    DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
868 	} else {
869 		if (hw->phy.media_type == e1000_media_type_fiber)
870 			reg_tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
871 		else
872 			reg_tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
873 		reg_tipg |=
874 		    DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
875 		reg_tipg |=
876 		    DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
877 	}
878 	E1000_WRITE_REG(hw, E1000_TIPG, reg_tipg);
879 
880 	/* Setup Transmit Interrupt Delay Value */
881 	E1000_WRITE_REG(hw, E1000_TIDV, Adapter->tx_intr_delay);
882 	E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
883 	    "E1000_TIDV: 0x%x\n", Adapter->tx_intr_delay);
884 
885 	if (hw->mac.type >= e1000_82540) {
886 		E1000_WRITE_REG(&Adapter->shared, E1000_TADV,
887 		    Adapter->tx_intr_abs_delay);
888 		E1000G_DEBUGLOG_1(Adapter, E1000G_INFO_LEVEL,
889 		    "E1000_TADV: 0x%x\n", Adapter->tx_intr_abs_delay);
890 	}
891 
892 	tx_ring->tbd_avail = Adapter->tx_desc_num;
893 
894 	/* Initialize stored context information */
895 	bzero(&(tx_ring->pre_context), sizeof (context_data_t));
896 }
897 
898 /*
899  * e1000g_recycle - recycle the tx descriptors and tx sw packets
900  */
901 int
902 e1000g_recycle(e1000g_tx_ring_t *tx_ring)
903 {
904 	struct e1000g *Adapter;
905 	LIST_DESCRIBER pending_list;
906 	p_tx_sw_packet_t packet;
907 	mblk_t *mp;
908 	mblk_t *nmp;
909 	struct e1000_tx_desc *descriptor;
910 	int desc_count;
911 	int is_intr;
912 
913 	/*
914 	 * This function will examine each TxSwPacket in the 'used' queue
915 	 * if the e1000g is done with it then the associated resources (Tx
916 	 * Descriptors) will be "freed" and the TxSwPacket will be
917 	 * returned to the 'free' queue.
918 	 */
919 	Adapter = tx_ring->adapter;
920 
921 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list);
922 	if (packet == NULL) {
923 		tx_ring->recycle_fail = 0;
924 		tx_ring->stall_watchdog = 0;
925 		return (0);
926 	}
927 
928 	is_intr = servicing_interrupt();
929 
930 	if (is_intr)
931 		mutex_enter(&tx_ring->usedlist_lock);
932 	else if (mutex_tryenter(&tx_ring->usedlist_lock) == 0)
933 		return (0);
934 
935 	desc_count = 0;
936 	QUEUE_INIT_LIST(&pending_list);
937 
938 	/* Sync the Tx descriptor DMA buffer */
939 	(void) ddi_dma_sync(tx_ring->tbd_dma_handle,
940 	    0, 0, DDI_DMA_SYNC_FORKERNEL);
941 	if (e1000g_check_dma_handle(
942 	    tx_ring->tbd_dma_handle) != DDI_FM_OK) {
943 		mutex_exit(&tx_ring->usedlist_lock);
944 		ddi_fm_service_impact(Adapter->dip, DDI_SERVICE_DEGRADED);
945 		Adapter->chip_state = E1000G_ERROR;
946 		return (0);
947 	}
948 
949 	/*
950 	 * While there are still TxSwPackets in the used queue check them
951 	 */
952 	while (packet =
953 	    (p_tx_sw_packet_t)QUEUE_GET_HEAD(&tx_ring->used_list)) {
954 
955 		/*
956 		 * Get hold of the next descriptor that the e1000g will
957 		 * report status back to (this will be the last descriptor
958 		 * of a given sw packet). We only want to free the
959 		 * sw packet (and it resources) if the e1000g is done
960 		 * with ALL of the descriptors.  If the e1000g is done
961 		 * with the last one then it is done with all of them.
962 		 */
963 		ASSERT(packet->num_desc);
964 		descriptor = tx_ring->tbd_oldest + (packet->num_desc - 1);
965 
966 		/* Check for wrap case */
967 		if (descriptor > tx_ring->tbd_last)
968 			descriptor -= Adapter->tx_desc_num;
969 
970 		/*
971 		 * If the descriptor done bit is set free TxSwPacket and
972 		 * associated resources
973 		 */
974 		if (descriptor->upper.fields.status & E1000_TXD_STAT_DD) {
975 			QUEUE_POP_HEAD(&tx_ring->used_list);
976 			QUEUE_PUSH_TAIL(&pending_list, &packet->Link);
977 
978 			if (descriptor == tx_ring->tbd_last)
979 				tx_ring->tbd_oldest =
980 				    tx_ring->tbd_first;
981 			else
982 				tx_ring->tbd_oldest =
983 				    descriptor + 1;
984 
985 			desc_count += packet->num_desc;
986 
987 			if (is_intr && (desc_count >= Adapter->tx_recycle_num))
988 				break;
989 		} else {
990 			/*
991 			 * Found a sw packet that the e1000g is not done
992 			 * with then there is no reason to check the rest
993 			 * of the queue.
994 			 */
995 			break;
996 		}
997 	}
998 
999 	tx_ring->tbd_avail += desc_count;
1000 	Adapter->tx_pkt_cnt += desc_count;
1001 
1002 	mutex_exit(&tx_ring->usedlist_lock);
1003 
1004 	if (desc_count == 0) {
1005 		tx_ring->recycle_fail++;
1006 		E1000G_DEBUG_STAT(tx_ring->stat_recycle_none);
1007 		return (0);
1008 	}
1009 
1010 	tx_ring->recycle_fail = 0;
1011 	tx_ring->stall_watchdog = 0;
1012 
1013 	mp = NULL;
1014 	nmp = NULL;
1015 	packet = (p_tx_sw_packet_t)QUEUE_GET_HEAD(&pending_list);
1016 	ASSERT(packet != NULL);
1017 	while (packet != NULL) {
1018 		if (packet->mp != NULL) {
1019 			ASSERT(packet->mp->b_next == NULL);
1020 			/* Assemble the message chain */
1021 			if (mp == NULL) {
1022 				mp = packet->mp;
1023 				nmp = packet->mp;
1024 			} else {
1025 				nmp->b_next = packet->mp;
1026 				nmp = packet->mp;
1027 			}
1028 			/* Disconnect the message from the sw packet */
1029 			packet->mp = NULL;
1030 		}
1031 
1032 		/* Free the TxSwPackets */
1033 		e1000g_free_tx_swpkt(packet);
1034 
1035 		packet = (p_tx_sw_packet_t)
1036 		    QUEUE_GET_NEXT(&pending_list, &packet->Link);
1037 	}
1038 
1039 	/* Return the TxSwPackets back to the FreeList */
1040 	mutex_enter(&tx_ring->freelist_lock);
1041 	QUEUE_APPEND(&tx_ring->free_list, &pending_list);
1042 	mutex_exit(&tx_ring->freelist_lock);
1043 
1044 	if (mp != NULL)
1045 		freemsgchain(mp);
1046 
1047 	return (desc_count);
1048 }
1049 /*
1050  * 82544 Coexistence issue workaround:
1051  *    There are 2 issues.
1052  *    1. If a 32 bit split completion happens from P64H2 and another
1053  *	agent drives a 64 bit request/split completion after ONLY
1054  *	1 idle clock (BRCM/Emulex/Adaptec fiber channel cards) then
1055  *	82544 has a problem where in to clock all the data in, it
1056  *	looks at REQ64# signal and since it has changed so fast (i.e. 1
1057  *	idle clock turn around), it will fail to clock all the data in.
1058  *	Data coming from certain ending addresses has exposure to this issue.
1059  *
1060  * To detect this issue, following equation can be used...
1061  *	SIZE[3:0] + ADDR[2:0] = SUM[3:0].
1062  *	If SUM[3:0] is in between 1 to 4, we will have this issue.
1063  *
1064  * ROOT CAUSE:
1065  *	The erratum involves the 82544 PCIX elasticity FIFO implementations as
1066  *	64-bit FIFO's and flushing of the final partial-bytes corresponding
1067  *	to the end of a requested read burst. Under a specific burst condition
1068  *	of ending-data alignment and 32-byte split-completions, the final
1069  *	byte(s) of split-completion data require an extra clock cycle to flush
1070  *	into 64-bit FIFO orientation.  An incorrect logic dependency on the
1071  *	REQ64# signal occurring during during this clock cycle may cause the
1072  *	residual byte(s) to be lost, thereby rendering the internal DMA client
1073  *	forever awaiting the final byte(s) for an outbound data-fetch.  The
1074  *	erratum is confirmed to *only* occur if certain subsequent external
1075  *	64-bit PCIX bus transactions occur immediately (minimum possible bus
1076  *	turn- around) following the odd-aligned 32-bit split-completion
1077  *	containing the final byte(s).  Intel has confirmed that this has been
1078  *	seen only with chipset/bridges which have the capability to provide
1079  *	32-bit split-completion data, and in the presence of newer PCIX bus
1080  *	agents which fully-optimize the inter-transaction turn-around (zero
1081  *	additional initiator latency when pre-granted bus ownership).
1082  *
1083  *   	This issue does not exist in PCI bus mode, when any agent is operating
1084  *	in 32 bit only mode or on chipsets that do not do 32 bit split
1085  *	completions for 64 bit read requests (Serverworks chipsets). P64H2 does
1086  *	32 bit split completions for any read request that has bit 2 set to 1
1087  *	for the requested address and read request size is more than 8 bytes.
1088  *
1089  *   2. Another issue is related to 82544 driving DACs under the similar
1090  *	scenario (32 bit split completion followed by 64 bit transaction with
1091  *	only 1 cycle turnaround). This issue is still being root caused. We
1092  *	think that both of these issues can be avoided if following workaround
1093  *	is implemented. It seems DAC issues is related to ending addresses being
1094  *	0x9, 0xA, 0xB, 0xC and hence ending up at odd boundaries in elasticity
1095  *	FIFO which does not get flushed due to REQ64# dependency. We will only
1096  *	know the full story after it has been simulated successfully by HW team.
1097  *
1098  * WORKAROUND:
1099  *	Make sure we do not have ending address as 1,2,3,4(Hang) or 9,a,b,c(DAC)
1100  */
1101 static uint32_t
1102 e1000g_fill_82544_desc(uint64_t address,
1103     size_t length, p_desc_array_t desc_array)
1104 {
1105 	/*
1106 	 * Since issue is sensitive to length and address.
1107 	 * Let us first check the address...
1108 	 */
1109 	uint32_t safe_terminator;
1110 
1111 	if (length <= 4) {
1112 		desc_array->descriptor[0].address = address;
1113 		desc_array->descriptor[0].length = length;
1114 		desc_array->elements = 1;
1115 		return (desc_array->elements);
1116 	}
1117 	safe_terminator =
1118 	    (uint32_t)((((uint32_t)address & 0x7) +
1119 	    (length & 0xF)) & 0xF);
1120 	/*
1121 	 * if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then
1122 	 * return
1123 	 */
1124 	if (safe_terminator == 0 ||
1125 	    (safe_terminator > 4 && safe_terminator < 9) ||
1126 	    (safe_terminator > 0xC && safe_terminator <= 0xF)) {
1127 		desc_array->descriptor[0].address = address;
1128 		desc_array->descriptor[0].length = length;
1129 		desc_array->elements = 1;
1130 		return (desc_array->elements);
1131 	}
1132 
1133 	desc_array->descriptor[0].address = address;
1134 	desc_array->descriptor[0].length = length - 4;
1135 	desc_array->descriptor[1].address = address + (length - 4);
1136 	desc_array->descriptor[1].length = 4;
1137 	desc_array->elements = 2;
1138 	return (desc_array->elements);
1139 }
1140 
1141 static int
1142 e1000g_tx_copy(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet,
1143     mblk_t *mp, boolean_t tx_undersize_flag)
1144 {
1145 	size_t len;
1146 	size_t len1;
1147 	dma_buffer_t *tx_buf;
1148 	mblk_t *nmp;
1149 	boolean_t finished;
1150 	int desc_count;
1151 
1152 	desc_count = 0;
1153 	tx_buf = packet->tx_buf;
1154 	len = MBLKL(mp);
1155 
1156 	ASSERT((tx_buf->len + len) <= tx_buf->size);
1157 
1158 	if (len > 0) {
1159 		bcopy(mp->b_rptr,
1160 		    tx_buf->address + tx_buf->len,
1161 		    len);
1162 		tx_buf->len += len;
1163 
1164 		packet->num_mblk_frag++;
1165 	}
1166 
1167 	nmp = mp->b_cont;
1168 	if (nmp == NULL) {
1169 		finished = B_TRUE;
1170 	} else {
1171 		len1 = MBLKL(nmp);
1172 		if ((tx_buf->len + len1) > tx_buf->size)
1173 			finished = B_TRUE;
1174 		else if (tx_undersize_flag)
1175 			finished = B_FALSE;
1176 		else if (len1 > tx_ring->adapter->tx_bcopy_thresh)
1177 			finished = B_TRUE;
1178 		else
1179 			finished = B_FALSE;
1180 	}
1181 
1182 	if (finished) {
1183 		E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_copy,
1184 		    (tx_buf->len > len));
1185 
1186 		/*
1187 		 * If the packet is smaller than 64 bytes, which is the
1188 		 * minimum ethernet packet size, pad the packet to make
1189 		 * it at least 60 bytes. The hardware will add 4 bytes
1190 		 * for CRC.
1191 		 */
1192 		if (tx_undersize_flag) {
1193 			ASSERT(tx_buf->len < ETHERMIN);
1194 
1195 			bzero(tx_buf->address + tx_buf->len,
1196 			    ETHERMIN - tx_buf->len);
1197 			tx_buf->len = ETHERMIN;
1198 		}
1199 
1200 #ifdef __sparc
1201 		if (packet->dma_type == USE_DVMA)
1202 			dvma_sync(tx_buf->dma_handle, 0, DDI_DMA_SYNC_FORDEV);
1203 		else
1204 			(void) ddi_dma_sync(tx_buf->dma_handle, 0,
1205 			    tx_buf->len, DDI_DMA_SYNC_FORDEV);
1206 #else
1207 		(void) ddi_dma_sync(tx_buf->dma_handle, 0,
1208 		    tx_buf->len, DDI_DMA_SYNC_FORDEV);
1209 #endif
1210 
1211 		packet->data_transfer_type = USE_BCOPY;
1212 
1213 		desc_count = e1000g_fill_tx_desc(tx_ring,
1214 		    packet,
1215 		    tx_buf->dma_address,
1216 		    tx_buf->len);
1217 
1218 		if (desc_count <= 0)
1219 			return (-1);
1220 	}
1221 
1222 	return (desc_count);
1223 }
1224 
1225 static int
1226 e1000g_tx_bind(e1000g_tx_ring_t *tx_ring, p_tx_sw_packet_t packet, mblk_t *mp)
1227 {
1228 	int j;
1229 	int mystat;
1230 	size_t len;
1231 	ddi_dma_cookie_t dma_cookie;
1232 	uint_t ncookies;
1233 	int desc_count;
1234 	uint32_t desc_total;
1235 
1236 	desc_total = 0;
1237 	len = MBLKL(mp);
1238 
1239 	/*
1240 	 * ddi_dma_addr_bind_handle() allocates  DMA  resources  for  a
1241 	 * memory  object such that a device can perform DMA to or from
1242 	 * the object.  DMA resources  are  allocated  considering  the
1243 	 * device's  DMA  attributes  as  expressed by ddi_dma_attr(9S)
1244 	 * (see ddi_dma_alloc_handle(9F)).
1245 	 *
1246 	 * ddi_dma_addr_bind_handle() fills in  the  first  DMA  cookie
1247 	 * pointed  to by cookiep with the appropriate address, length,
1248 	 * and bus type. *ccountp is set to the number of DMA  cookies
1249 	 * representing this DMA object. Subsequent DMA cookies must be
1250 	 * retrieved by calling ddi_dma_nextcookie(9F)  the  number  of
1251 	 * times specified by *countp - 1.
1252 	 */
1253 	switch (packet->dma_type) {
1254 #ifdef __sparc
1255 	case USE_DVMA:
1256 		dvma_kaddr_load(packet->tx_dma_handle,
1257 		    (caddr_t)mp->b_rptr, len, 0, &dma_cookie);
1258 
1259 		dvma_sync(packet->tx_dma_handle, 0,
1260 		    DDI_DMA_SYNC_FORDEV);
1261 
1262 		ncookies = 1;
1263 		packet->data_transfer_type = USE_DVMA;
1264 		break;
1265 #endif
1266 	case USE_DMA:
1267 		if ((mystat = ddi_dma_addr_bind_handle(
1268 		    packet->tx_dma_handle, NULL,
1269 		    (caddr_t)mp->b_rptr, len,
1270 		    DDI_DMA_WRITE | DDI_DMA_STREAMING,
1271 		    DDI_DMA_DONTWAIT, 0, &dma_cookie,
1272 		    &ncookies)) != DDI_DMA_MAPPED) {
1273 
1274 			e1000g_log(tx_ring->adapter, CE_WARN,
1275 			    "Couldn't bind mblk buffer to Tx DMA handle: "
1276 			    "return: %X, Pkt: %X\n",
1277 			    mystat, packet);
1278 			return (-1);
1279 		}
1280 
1281 		/*
1282 		 * An implicit ddi_dma_sync() is done when the
1283 		 * ddi_dma_addr_bind_handle() is called. So we
1284 		 * don't need to explicitly call ddi_dma_sync()
1285 		 * here any more.
1286 		 */
1287 		ASSERT(ncookies);
1288 		E1000G_DEBUG_STAT_COND(tx_ring->stat_multi_cookie,
1289 		    (ncookies > 1));
1290 
1291 		/*
1292 		 * The data_transfer_type value must be set after the handle
1293 		 * has been bound, for it will be used in e1000g_free_tx_swpkt()
1294 		 * to decide whether we need to unbind the handle.
1295 		 */
1296 		packet->data_transfer_type = USE_DMA;
1297 		break;
1298 	default:
1299 		ASSERT(B_FALSE);
1300 		break;
1301 	}
1302 
1303 	packet->num_mblk_frag++;
1304 
1305 	/*
1306 	 * Each address could span thru multpile cookie..
1307 	 * Each cookie will have one descriptor
1308 	 */
1309 	for (j = ncookies; j != 0; j--) {
1310 
1311 		desc_count = e1000g_fill_tx_desc(tx_ring,
1312 		    packet,
1313 		    dma_cookie.dmac_laddress,
1314 		    dma_cookie.dmac_size);
1315 
1316 		if (desc_count <= 0)
1317 			return (-1);
1318 
1319 		desc_total += desc_count;
1320 
1321 		/*
1322 		 * ddi_dma_nextcookie() retrieves subsequent DMA
1323 		 * cookies for a DMA object.
1324 		 * ddi_dma_nextcookie() fills in the
1325 		 * ddi_dma_cookie(9S) structure pointed to by
1326 		 * cookiep.  The ddi_dma_cookie(9S) structure
1327 		 * must be allocated prior to calling
1328 		 * ddi_dma_nextcookie(). The DMA cookie count
1329 		 * returned by ddi_dma_buf_bind_handle(9F),
1330 		 * ddi_dma_addr_bind_handle(9F), or
1331 		 * ddi_dma_getwin(9F) indicates the number of DMA
1332 		 * cookies a DMA object consists of.  If the
1333 		 * resulting cookie count, N, is larger than 1,
1334 		 * ddi_dma_nextcookie() must be called N-1 times
1335 		 * to retrieve all DMA cookies.
1336 		 */
1337 		if (j > 1) {
1338 			ddi_dma_nextcookie(packet->tx_dma_handle,
1339 			    &dma_cookie);
1340 		}
1341 	}
1342 
1343 	return (desc_total);
1344 }
1345 
1346 static void
1347 e1000g_fill_context_descriptor(context_data_t *cur_context,
1348     struct e1000_context_desc *context_desc)
1349 {
1350 	if (cur_context->cksum_flags & HCK_IPV4_HDRCKSUM) {
1351 		context_desc->lower_setup.ip_fields.ipcss =
1352 		    cur_context->ether_header_size;
1353 		context_desc->lower_setup.ip_fields.ipcso =
1354 		    cur_context->ether_header_size +
1355 		    offsetof(struct ip, ip_sum);
1356 		context_desc->lower_setup.ip_fields.ipcse =
1357 		    cur_context->ether_header_size +
1358 		    cur_context->cksum_start - 1;
1359 	} else
1360 		context_desc->lower_setup.ip_config = 0;
1361 
1362 	if (cur_context->cksum_flags & HCK_PARTIALCKSUM) {
1363 		/*
1364 		 * The packet with same protocol has the following
1365 		 * stuff and start offset:
1366 		 * |  Protocol  | Stuff  | Start  | Checksum
1367 		 * |		| Offset | Offset | Enable
1368 		 * | IPv4 + TCP |  0x24  |  0x14  |  Yes
1369 		 * | IPv4 + UDP |  0x1A  |  0x14  |  Yes
1370 		 * | IPv6 + TCP |  0x20  |  0x10  |  No
1371 		 * | IPv6 + UDP |  0x14  |  0x10  |  No
1372 		 */
1373 		context_desc->upper_setup.tcp_fields.tucss =
1374 		    cur_context->cksum_start + cur_context->ether_header_size;
1375 		context_desc->upper_setup.tcp_fields.tucso =
1376 		    cur_context->cksum_stuff + cur_context->ether_header_size;
1377 		context_desc->upper_setup.tcp_fields.tucse = 0;
1378 	} else
1379 		context_desc->upper_setup.tcp_config = 0;
1380 
1381 	if (cur_context->lso_flag) {
1382 		context_desc->tcp_seg_setup.fields.mss = cur_context->mss;
1383 		context_desc->tcp_seg_setup.fields.hdr_len =
1384 		    cur_context->hdr_len;
1385 		/*
1386 		 * workaround for 82546EB errata 23, status-writeback
1387 		 * reporting (RS) should not be set on context or
1388 		 * Null descriptors
1389 		 */
1390 		context_desc->cmd_and_length = E1000_TXD_CMD_DEXT
1391 		    | E1000_TXD_CMD_TSE | E1000_TXD_CMD_IP | E1000_TXD_CMD_TCP
1392 		    | E1000_TXD_DTYP_C | cur_context->pay_len;
1393 	} else {
1394 		context_desc->cmd_and_length = E1000_TXD_CMD_DEXT
1395 		    | E1000_TXD_DTYP_C;
1396 		/*
1397 		 * Zero out the options for TCP Segmentation Offload
1398 		 */
1399 		context_desc->tcp_seg_setup.data = 0;
1400 	}
1401 }
1402 
1403 static int
1404 e1000g_fill_tx_desc(e1000g_tx_ring_t *tx_ring,
1405     p_tx_sw_packet_t packet, uint64_t address, size_t size)
1406 {
1407 	struct e1000_hw *hw = &tx_ring->adapter->shared;
1408 	p_sw_desc_t desc;
1409 
1410 	if (hw->mac.type == e1000_82544) {
1411 		if (hw->bus.type == e1000_bus_type_pcix)
1412 			return (e1000g_tx_workaround_PCIX_82544(packet,
1413 			    address, size));
1414 
1415 		if (size > JUMBO_FRAG_LENGTH)
1416 			return (e1000g_tx_workaround_jumbo_82544(packet,
1417 			    address, size));
1418 	}
1419 
1420 	ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1421 
1422 	desc = &packet->desc[packet->num_desc];
1423 	desc->address = address;
1424 	desc->length = size;
1425 
1426 	packet->num_desc++;
1427 
1428 	return (1);
1429 }
1430 
1431 static int
1432 e1000g_tx_workaround_PCIX_82544(p_tx_sw_packet_t packet,
1433     uint64_t address, size_t size)
1434 {
1435 	p_sw_desc_t desc;
1436 	int desc_count;
1437 	long size_left;
1438 	size_t len;
1439 	uint32_t counter;
1440 	uint32_t array_elements;
1441 	desc_array_t desc_array;
1442 
1443 	/*
1444 	 * Coexist Workaround for cordova: RP: 07/04/03
1445 	 *
1446 	 * RP: ERRATA: Workaround ISSUE:
1447 	 * 8kb_buffer_Lockup CONTROLLER: Cordova Breakup
1448 	 * Eachbuffer in to 8kb pieces until the
1449 	 * remainder is < 8kb
1450 	 */
1451 	size_left = size;
1452 	desc_count = 0;
1453 
1454 	while (size_left > 0) {
1455 		if (size_left > MAX_TX_BUF_SIZE)
1456 			len = MAX_TX_BUF_SIZE;
1457 		else
1458 			len = size_left;
1459 
1460 		array_elements = e1000g_fill_82544_desc(address,
1461 		    len, &desc_array);
1462 
1463 		for (counter = 0; counter < array_elements; counter++) {
1464 			ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1465 			/*
1466 			 * Put in the buffer address
1467 			 */
1468 			desc = &packet->desc[packet->num_desc];
1469 
1470 			desc->address =
1471 			    desc_array.descriptor[counter].address;
1472 			desc->length =
1473 			    desc_array.descriptor[counter].length;
1474 
1475 			packet->num_desc++;
1476 			desc_count++;
1477 		} /* for */
1478 
1479 		/*
1480 		 * Update the buffer address and length
1481 		 */
1482 		address += MAX_TX_BUF_SIZE;
1483 		size_left -= MAX_TX_BUF_SIZE;
1484 	} /* while */
1485 
1486 	return (desc_count);
1487 }
1488 
1489 static int
1490 e1000g_tx_workaround_jumbo_82544(p_tx_sw_packet_t packet,
1491     uint64_t address, size_t size)
1492 {
1493 	p_sw_desc_t desc;
1494 	int desc_count;
1495 	long size_left;
1496 	uint32_t offset;
1497 
1498 	/*
1499 	 * Workaround for Jumbo Frames on Cordova
1500 	 * PSD 06/01/2001
1501 	 */
1502 	size_left = size;
1503 	desc_count = 0;
1504 	offset = 0;
1505 	while (size_left > 0) {
1506 		ASSERT(packet->num_desc < MAX_TX_DESC_PER_PACKET);
1507 
1508 		desc = &packet->desc[packet->num_desc];
1509 
1510 		desc->address = address + offset;
1511 
1512 		if (size_left > JUMBO_FRAG_LENGTH)
1513 			desc->length = JUMBO_FRAG_LENGTH;
1514 		else
1515 			desc->length = size_left;
1516 
1517 		packet->num_desc++;
1518 		desc_count++;
1519 
1520 		offset += desc->length;
1521 		size_left -= JUMBO_FRAG_LENGTH;
1522 	}
1523 
1524 	return (desc_count);
1525 }
1526 
1527 #pragma inline(e1000g_82547_tx_move_tail_work)
1528 
1529 static void
1530 e1000g_82547_tx_move_tail_work(e1000g_tx_ring_t *tx_ring)
1531 {
1532 	struct e1000_hw *hw;
1533 	uint16_t hw_tdt;
1534 	uint16_t sw_tdt;
1535 	struct e1000_tx_desc *tx_desc;
1536 	uint16_t length = 0;
1537 	boolean_t eop = B_FALSE;
1538 	struct e1000g *Adapter;
1539 
1540 	Adapter = tx_ring->adapter;
1541 	hw = &Adapter->shared;
1542 
1543 	hw_tdt = E1000_READ_REG(hw, E1000_TDT(0));
1544 	sw_tdt = tx_ring->tbd_next - tx_ring->tbd_first;
1545 
1546 	while (hw_tdt != sw_tdt) {
1547 		tx_desc = &(tx_ring->tbd_first[hw_tdt]);
1548 		length += tx_desc->lower.flags.length;
1549 		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1550 		if (++hw_tdt == Adapter->tx_desc_num)
1551 			hw_tdt = 0;
1552 
1553 		if (eop) {
1554 			if ((Adapter->link_duplex == HALF_DUPLEX) &&
1555 			    (e1000_fifo_workaround_82547(hw, length)
1556 			    != E1000_SUCCESS)) {
1557 				if (tx_ring->timer_enable_82547) {
1558 					ASSERT(tx_ring->timer_id_82547 == 0);
1559 					tx_ring->timer_id_82547 =
1560 					    timeout(e1000g_82547_timeout,
1561 					    (void *)tx_ring,
1562 					    drv_usectohz(10000));
1563 				}
1564 				return;
1565 
1566 			} else {
1567 				E1000_WRITE_REG(hw, E1000_TDT(0), hw_tdt);
1568 				e1000_update_tx_fifo_head_82547(hw, length);
1569 				length = 0;
1570 			}
1571 		}
1572 	}
1573 }
1574 
1575 static void
1576 e1000g_82547_timeout(void *arg)
1577 {
1578 	e1000g_tx_ring_t *tx_ring;
1579 
1580 	tx_ring = (e1000g_tx_ring_t *)arg;
1581 
1582 	mutex_enter(&tx_ring->tx_lock);
1583 
1584 	tx_ring->timer_id_82547 = 0;
1585 	e1000g_82547_tx_move_tail_work(tx_ring);
1586 
1587 	mutex_exit(&tx_ring->tx_lock);
1588 }
1589 
1590 static void
1591 e1000g_82547_tx_move_tail(e1000g_tx_ring_t *tx_ring)
1592 {
1593 	timeout_id_t tid;
1594 
1595 	ASSERT(MUTEX_HELD(&tx_ring->tx_lock));
1596 
1597 	tid = tx_ring->timer_id_82547;
1598 	tx_ring->timer_id_82547 = 0;
1599 	if (tid != 0) {
1600 		tx_ring->timer_enable_82547 = B_FALSE;
1601 		mutex_exit(&tx_ring->tx_lock);
1602 
1603 		(void) untimeout(tid);
1604 
1605 		mutex_enter(&tx_ring->tx_lock);
1606 	}
1607 	tx_ring->timer_enable_82547 = B_TRUE;
1608 	e1000g_82547_tx_move_tail_work(tx_ring);
1609 }
1610