xref: /titanic_44/usr/src/uts/common/io/rge/rge_rxtx.c (revision d30c532def6a53800f4c4926a0b726cb23b1e6df)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include "rge.h"
29 
30 #define	U32TOPTR(x)	((void *)(uintptr_t)(uint32_t)(x))
31 #define	PTRTOU32(x)	((uint32_t)(uintptr_t)(void *)(x))
32 
33 /*
34  * ========== RX side routines ==========
35  */
36 
37 #define	RGE_DBG		RGE_DBG_RECV	/* debug flag for this code	*/
38 
39 static uint32_t rge_atomic_reserve(uint32_t *count_p, uint32_t n);
40 #pragma	inline(rge_atomic_reserve)
41 
42 static uint32_t
43 rge_atomic_reserve(uint32_t *count_p, uint32_t n)
44 {
45 	uint32_t oldval;
46 	uint32_t newval;
47 
48 	/* ATOMICALLY */
49 	do {
50 		oldval = *count_p;
51 		newval = oldval - n;
52 		if (oldval <= n)
53 			return (0);		/* no resources left	*/
54 	} while (cas32(count_p, oldval, newval) != oldval);
55 
56 	return (newval);
57 }
58 
59 /*
60  * Atomically increment a counter
61  */
62 static void rge_atomic_renounce(uint32_t *count_p, uint32_t n);
63 #pragma	inline(rge_atomic_renounce)
64 
65 static void
66 rge_atomic_renounce(uint32_t *count_p, uint32_t n)
67 {
68 	uint32_t oldval;
69 	uint32_t newval;
70 
71 	/* ATOMICALLY */
72 	do {
73 		oldval = *count_p;
74 		newval = oldval + n;
75 	} while (cas32(count_p, oldval, newval) != oldval);
76 }
77 
78 /*
79  * Callback code invoked from STREAMs when the recv data buffer is free
80  * for recycling.
81  */
82 void
83 rge_rx_recycle(caddr_t arg)
84 {
85 	rge_t *rgep;
86 	dma_buf_t *rx_buf;
87 	sw_rbd_t *free_srbdp;
88 	uint32_t slot_recy;
89 
90 	rx_buf = (dma_buf_t *)arg;
91 	rgep = (rge_t *)rx_buf->private;
92 
93 	/*
94 	 * In rge_unattach() and rge_attach(), this callback function will
95 	 * also be called to free mp in rge_fini_rings() and rge_init_rings().
96 	 * In such situation, we shouldn't do below desballoc(), otherwise,
97 	 * there'll be memory leak.
98 	 */
99 	if (rgep->rge_mac_state == RGE_MAC_UNATTACH ||
100 	    rgep->rge_mac_state == RGE_MAC_ATTACH)
101 		return;
102 
103 	/*
104 	 * Recycle the data buffer again
105 	 * and fill them in free ring
106 	 */
107 	rx_buf->mp = desballoc(DMA_VPTR(rx_buf->pbuf),
108 	    rgep->rxbuf_size, 0, &rx_buf->rx_recycle);
109 	if (rx_buf->mp == NULL) {
110 		rge_problem(rgep, "rge_rx_recycle: desballoc() failed");
111 		return;
112 	}
113 	mutex_enter(rgep->rc_lock);
114 	slot_recy = rgep->rc_next;
115 	free_srbdp = &rgep->free_srbds[slot_recy];
116 
117 	ASSERT(free_srbdp->rx_buf == NULL);
118 	free_srbdp->rx_buf = rx_buf;
119 	rgep->rc_next = NEXT(slot_recy, RGE_BUF_SLOTS);
120 	rge_atomic_renounce(&rgep->rx_free, 1);
121 	if (rgep->rx_bcopy && rgep->rx_free == RGE_BUF_SLOTS)
122 		rgep->rx_bcopy = B_FALSE;
123 	ASSERT(rgep->rx_free <= RGE_BUF_SLOTS);
124 
125 	mutex_exit(rgep->rc_lock);
126 }
127 
128 static int rge_rx_refill(rge_t *rgep, uint32_t slot);
129 #pragma	inline(rge_rx_refill)
130 
131 static int
132 rge_rx_refill(rge_t *rgep, uint32_t slot)
133 {
134 	dma_buf_t *free_buf;
135 	rge_bd_t *hw_rbd_p;
136 	sw_rbd_t *srbdp;
137 	uint32_t free_slot;
138 
139 	srbdp = &rgep->sw_rbds[slot];
140 	hw_rbd_p = &rgep->rx_ring[slot];
141 	free_slot = rgep->rf_next;
142 	free_buf = rgep->free_srbds[free_slot].rx_buf;
143 	if (free_buf != NULL) {
144 		srbdp->rx_buf = free_buf;
145 		rgep->free_srbds[free_slot].rx_buf = NULL;
146 		hw_rbd_p->host_buf_addr = RGE_BSWAP_32(rgep->head_room +
147 		    + free_buf->pbuf.cookie.dmac_laddress);
148 		hw_rbd_p->host_buf_addr_hi =
149 		    RGE_BSWAP_32(free_buf->pbuf.cookie.dmac_laddress >> 32);
150 		rgep->rf_next = NEXT(free_slot, RGE_BUF_SLOTS);
151 		return (1);
152 	} else {
153 		/*
154 		 * This situation shouldn't happen
155 		 */
156 		rge_problem(rgep, "rge_rx_refill: free buffer %d is NULL",
157 		    free_slot);
158 		rgep->rx_bcopy = B_TRUE;
159 		return (0);
160 	}
161 }
162 
163 static mblk_t *rge_receive_packet(rge_t *rgep, uint32_t slot);
164 #pragma	inline(rge_receive_packet)
165 
166 static mblk_t *
167 rge_receive_packet(rge_t *rgep, uint32_t slot)
168 {
169 	rge_bd_t *hw_rbd_p;
170 	sw_rbd_t *srbdp;
171 	uchar_t *dp;
172 	mblk_t *mp;
173 	uint8_t *rx_ptr;
174 	uint32_t rx_status;
175 	uint_t packet_len;
176 	uint_t minsize;
177 	uint_t maxsize;
178 	uint32_t proto;
179 	uint32_t pflags;
180 	struct ether_vlan_header *ehp;
181 	uint16_t vtag = 0;
182 
183 	hw_rbd_p = &rgep->rx_ring[slot];
184 	srbdp = &rgep->sw_rbds[slot];
185 
186 	/*
187 	 * Read receive status
188 	 */
189 	rx_status = RGE_BSWAP_32(hw_rbd_p->flags_len) & RBD_FLAGS_MASK;
190 
191 	/*
192 	 * Handle error packet
193 	 */
194 	if (!(rx_status & BD_FLAG_PKT_END)) {
195 		RGE_DEBUG(("rge_receive_packet: not a complete packat"));
196 		return (NULL);
197 	}
198 	if (rx_status & RBD_FLAG_ERROR) {
199 		if (rx_status & RBD_FLAG_CRC_ERR)
200 			rgep->stats.crc_err++;
201 		if (rx_status & RBD_FLAG_RUNT)
202 			rgep->stats.in_short++;
203 		/*
204 		 * Set chip_error flag to reset chip:
205 		 * (suggested in Realtek programming guide.)
206 		 */
207 		RGE_DEBUG(("rge_receive_packet: error packet, status = %x",
208 		    rx_status));
209 		mutex_enter(rgep->genlock);
210 		rgep->rge_chip_state = RGE_CHIP_ERROR;
211 		mutex_exit(rgep->genlock);
212 		return (NULL);
213 	}
214 
215 	/*
216 	 * Handle size error packet
217 	 */
218 	packet_len = RGE_BSWAP_32(hw_rbd_p->flags_len) & RBD_LEN_MASK;
219 	packet_len -= ETHERFCSL;
220 	minsize = ETHERMIN;
221 	pflags = RGE_BSWAP_32(hw_rbd_p->vlan_tag);
222 	if (pflags & RBD_VLAN_PKT)
223 		minsize -= VLAN_TAGSZ;
224 	maxsize = rgep->ethmax_size;
225 	if (packet_len < minsize || packet_len > maxsize) {
226 		RGE_DEBUG(("rge_receive_packet: len err = %d", packet_len));
227 		return (NULL);
228 	}
229 
230 	DMA_SYNC(srbdp->rx_buf->pbuf, DDI_DMA_SYNC_FORKERNEL);
231 	if (rgep->rx_bcopy || packet_len <= RGE_RECV_COPY_SIZE ||
232 	    !rge_atomic_reserve(&rgep->rx_free, 1)) {
233 		/*
234 		 * Allocate buffer to receive this good packet
235 		 */
236 		mp = allocb(packet_len + RGE_HEADROOM, 0);
237 		if (mp == NULL) {
238 			RGE_DEBUG(("rge_receive_packet: allocate buffer fail"));
239 			rgep->stats.no_rcvbuf++;
240 			return (NULL);
241 		}
242 
243 		/*
244 		 * Copy the data found into the new cluster
245 		 */
246 		rx_ptr = DMA_VPTR(srbdp->rx_buf->pbuf);
247 		mp->b_rptr = dp = mp->b_rptr + RGE_HEADROOM;
248 		bcopy(rx_ptr + rgep->head_room, dp, packet_len);
249 		mp->b_wptr = dp + packet_len;
250 	} else {
251 		mp = srbdp->rx_buf->mp;
252 		mp->b_rptr += rgep->head_room;
253 		mp->b_wptr = mp->b_rptr + packet_len;
254 		mp->b_next = mp->b_cont = NULL;
255 		/*
256 		 * Refill the current receive bd buffer
257 		 *   if fails, will just keep the mp.
258 		 */
259 		if (!rge_rx_refill(rgep, slot))
260 			return (NULL);
261 	}
262 	rgep->stats.rbytes += packet_len;
263 	rgep->stats.rpackets ++;
264 
265 	/*
266 	 * VLAN packet ?
267 	 */
268 	if (pflags & RBD_VLAN_PKT)
269 		vtag = pflags & RBD_VLAN_TAG;
270 	if (vtag) {
271 		vtag = TCI_CHIP2OS(vtag);
272 		/*
273 		 * As h/w strips the VLAN tag from incoming packet, we need
274 		 * insert VLAN tag into this packet before send up here.
275 		 */
276 		(void) memmove(mp->b_rptr - VLAN_TAGSZ, mp->b_rptr,
277 		    2 * ETHERADDRL);
278 		mp->b_rptr -= VLAN_TAGSZ;
279 		ehp = (struct ether_vlan_header *)mp->b_rptr;
280 		ehp->ether_tpid = htons(ETHERTYPE_VLAN);
281 		ehp->ether_tci = htons(vtag);
282 		rgep->stats.rbytes += VLAN_TAGSZ;
283 	}
284 
285 	/*
286 	 * Check h/w checksum offload status
287 	 */
288 	pflags = 0;
289 	proto = rx_status & RBD_FLAG_PROTOCOL;
290 	if ((proto == RBD_FLAG_TCP && !(rx_status & RBD_TCP_CKSUM_ERR)) ||
291 	    (proto == RBD_FLAG_UDP && !(rx_status & RBD_UDP_CKSUM_ERR)))
292 		pflags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
293 	if (proto != RBD_FLAG_NONE_IP && !(rx_status & RBD_IP_CKSUM_ERR))
294 		pflags |= HCK_IPV4_HDRCKSUM;
295 	if (pflags != 0)  {
296 		(void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, pflags, 0);
297 	}
298 
299 	return (mp);
300 }
301 
302 /*
303  * Accept the packets received in rx ring.
304  *
305  * Returns a chain of mblks containing the received data, to be
306  * passed up to mac_rx().
307  * The routine returns only when a complete scan has been performed
308  * without finding any packets to receive.
309  * This function must SET the OWN bit of BD to indicate the packets
310  * it has accepted from the ring.
311  */
312 static mblk_t *rge_receive_ring(rge_t *rgep);
313 #pragma	inline(rge_receive_ring)
314 
315 static mblk_t *
316 rge_receive_ring(rge_t *rgep)
317 {
318 	rge_bd_t *hw_rbd_p;
319 	mblk_t *head;
320 	mblk_t **tail;
321 	mblk_t *mp;
322 	uint32_t slot;
323 
324 	ASSERT(mutex_owned(rgep->rx_lock));
325 
326 	/*
327 	 * Sync (all) the receive ring descriptors
328 	 * before accepting the packets they describe
329 	 */
330 	DMA_SYNC(rgep->rx_desc, DDI_DMA_SYNC_FORKERNEL);
331 	slot = rgep->rx_next;
332 	hw_rbd_p = &rgep->rx_ring[slot];
333 	head = NULL;
334 	tail = &head;
335 
336 	while (!(hw_rbd_p->flags_len & RGE_BSWAP_32(BD_FLAG_HW_OWN))) {
337 		if ((mp = rge_receive_packet(rgep, slot)) != NULL) {
338 			*tail = mp;
339 			tail = &mp->b_next;
340 		}
341 
342 		/*
343 		 * Clear RBD flags
344 		 */
345 		hw_rbd_p->flags_len =
346 		    RGE_BSWAP_32(rgep->rxbuf_size - rgep->head_room);
347 		HW_RBD_INIT(hw_rbd_p, slot);
348 		slot = NEXT(slot, RGE_RECV_SLOTS);
349 		hw_rbd_p = &rgep->rx_ring[slot];
350 	}
351 
352 	rgep->rx_next = slot;
353 	return (head);
354 }
355 
356 /*
357  * Receive all ready packets.
358  */
359 void rge_receive(rge_t *rgep);
360 #pragma	no_inline(rge_receive)
361 
362 void
363 rge_receive(rge_t *rgep)
364 {
365 	mblk_t *mp;
366 
367 	mutex_enter(rgep->rx_lock);
368 	mp = rge_receive_ring(rgep);
369 	mutex_exit(rgep->rx_lock);
370 
371 	if (mp != NULL)
372 		mac_rx(rgep->mh, rgep->handle, mp);
373 }
374 
375 
376 #undef	RGE_DBG
377 #define	RGE_DBG		RGE_DBG_SEND	/* debug flag for this code	*/
378 
379 
380 /*
381  * ========== Send-side recycle routines ==========
382  */
383 static uint32_t rge_send_claim(rge_t *rgep);
384 #pragma	inline(rge_send_claim)
385 
386 static uint32_t
387 rge_send_claim(rge_t *rgep)
388 {
389 	uint32_t slot;
390 	uint32_t next;
391 
392 	mutex_enter(rgep->tx_lock);
393 	slot = rgep->tx_next;
394 	next = NEXT(slot, RGE_SEND_SLOTS);
395 	rgep->tx_next = next;
396 	rgep->tx_flow++;
397 	mutex_exit(rgep->tx_lock);
398 
399 	/*
400 	 * We check that our invariants still hold:
401 	 * +	the slot and next indexes are in range
402 	 * +	the slot must not be the last one (i.e. the *next*
403 	 *	index must not match the next-recycle index), 'cos
404 	 *	there must always be at least one free slot in a ring
405 	 */
406 	ASSERT(slot < RGE_SEND_SLOTS);
407 	ASSERT(next < RGE_SEND_SLOTS);
408 	ASSERT(next != rgep->tc_next);
409 
410 	return (slot);
411 }
412 
413 /*
414  * We don't want to call this function every time after a successful
415  * h/w transmit done in ISR.  Instead, we call this function in the
416  * rge_send() when there're few or no free tx BDs remained.
417  */
418 static void rge_send_recycle(rge_t *rgep);
419 #pragma	inline(rge_send_recycle)
420 
421 static void
422 rge_send_recycle(rge_t *rgep)
423 {
424 	rge_bd_t *hw_sbd_p;
425 	uint32_t tc_tail;
426 	uint32_t tc_head;
427 	uint32_t n;
428 
429 	mutex_enter(rgep->tc_lock);
430 	tc_head = rgep->tc_next;
431 	tc_tail = rgep->tc_tail;
432 	if (tc_head == tc_tail)
433 		goto resched;
434 
435 	do {
436 		tc_tail = LAST(tc_tail, RGE_SEND_SLOTS);
437 		hw_sbd_p = &rgep->tx_ring[tc_tail];
438 		if (tc_tail == tc_head) {
439 			if (hw_sbd_p->flags_len &
440 			    RGE_BSWAP_32(BD_FLAG_HW_OWN)) {
441 				/*
442 				 * Recyled nothing: bump the watchdog counter,
443 				 * thus guaranteeing that it's nonzero
444 				 * (watchdog activated).
445 				 */
446 				rgep->watchdog += 1;
447 				mutex_exit(rgep->tc_lock);
448 				return;
449 			}
450 			break;
451 		}
452 	} while (hw_sbd_p->flags_len & RGE_BSWAP_32(BD_FLAG_HW_OWN));
453 
454 	/*
455 	 * Recyled something :-)
456 	 */
457 	rgep->tc_next = NEXT(tc_tail, RGE_SEND_SLOTS);
458 	n = rgep->tc_next - tc_head;
459 	if (rgep->tc_next < tc_head)
460 		n += RGE_SEND_SLOTS;
461 	rge_atomic_renounce(&rgep->tx_free, n);
462 	rgep->watchdog = 0;
463 	ASSERT(rgep->tx_free <= RGE_SEND_SLOTS);
464 
465 resched:
466 	mutex_exit(rgep->tc_lock);
467 	if (rgep->resched_needed &&
468 	    rgep->rge_mac_state == RGE_MAC_STARTED) {
469 		rgep->resched_needed = B_FALSE;
470 		mac_tx_update(rgep->mh);
471 	}
472 }
473 
474 /*
475  * Send a message by copying it into a preallocated (and premapped) buffer
476  */
477 static void rge_send_copy(rge_t *rgep, mblk_t *mp, uint16_t tci);
478 #pragma	inline(rge_send_copy)
479 
480 static void
481 rge_send_copy(rge_t *rgep, mblk_t *mp, uint16_t tci)
482 {
483 	rge_bd_t *hw_sbd_p;
484 	sw_sbd_t *ssbdp;
485 	mblk_t *bp;
486 	char *txb;
487 	uint32_t slot;
488 	size_t totlen;
489 	size_t mblen;
490 	uint32_t pflags;
491 	struct ether_header *ethhdr;
492 	struct ip *ip_hdr;
493 
494 	/*
495 	 * IMPORTANT:
496 	 *	Up to the point where it claims a place, a send_msg()
497 	 *	routine can indicate failure by returning B_FALSE.  Once it's
498 	 *	claimed a place, it mustn't fail.
499 	 *
500 	 * In this version, there's no setup to be done here, and there's
501 	 * nothing that can fail, so we can go straight to claiming our
502 	 * already-reserved place on the train.
503 	 *
504 	 * This is the point of no return!
505 	 */
506 	slot = rge_send_claim(rgep);
507 	ssbdp = &rgep->sw_sbds[slot];
508 
509 	/*
510 	 * Copy the data into a pre-mapped buffer, which avoids the
511 	 * overhead (and complication) of mapping/unmapping STREAMS
512 	 * buffers and keeping hold of them until the DMA has completed.
513 	 *
514 	 * Because all buffers are the same size, and larger than the
515 	 * longest single valid message, we don't have to bother about
516 	 * splitting the message across multiple buffers either.
517 	 */
518 	txb = DMA_VPTR(ssbdp->pbuf);
519 	totlen = 0;
520 	bp = mp;
521 	if (tci != 0) {
522 		/*
523 		 * Do not copy the vlan tag
524 		 */
525 		bcopy(bp->b_rptr, txb, 2 * ETHERADDRL);
526 		txb += 2 * ETHERADDRL;
527 		totlen += 2 * ETHERADDRL;
528 		mblen = bp->b_wptr - bp->b_rptr;
529 		ASSERT(mblen >= 2 * ETHERADDRL + VLAN_TAGSZ);
530 		mblen -= 2 * ETHERADDRL + VLAN_TAGSZ;
531 		if ((totlen += mblen) <= rgep->ethmax_size) {
532 			bcopy(bp->b_rptr + 2 * ETHERADDRL + VLAN_TAGSZ,
533 			    txb, mblen);
534 			txb += mblen;
535 		}
536 		bp = bp->b_cont;
537 		rgep->stats.obytes += VLAN_TAGSZ;
538 	}
539 	for (; bp != NULL; bp = bp->b_cont) {
540 		mblen = bp->b_wptr - bp->b_rptr;
541 		if ((totlen += mblen) <= rgep->ethmax_size) {
542 			bcopy(bp->b_rptr, txb, mblen);
543 			txb += mblen;
544 		}
545 	}
546 	rgep->stats.obytes += totlen;
547 	rgep->stats.tx_pre_ismax = rgep->stats.tx_cur_ismax;
548 	if (totlen == rgep->ethmax_size)
549 		rgep->stats.tx_cur_ismax = B_TRUE;
550 	else
551 		rgep->stats.tx_cur_ismax = B_FALSE;
552 
553 	/*
554 	 * We'e reached the end of the chain; and we should have
555 	 * collected no more than ETHERMAX bytes into our buffer.
556 	 */
557 	ASSERT(bp == NULL);
558 	ASSERT(totlen <= rgep->ethmax_size);
559 	DMA_SYNC(ssbdp->pbuf, DDI_DMA_SYNC_FORDEV);
560 
561 	/*
562 	 * Update the hardware send buffer descriptor flags
563 	 */
564 	hw_sbd_p = &rgep->tx_ring[slot];
565 	ASSERT(hw_sbd_p == ssbdp->desc.mem_va);
566 	hw_sbd_p->flags_len = RGE_BSWAP_32(totlen & SBD_LEN_MASK);
567 	if (tci != 0) {
568 		tci = TCI_OS2CHIP(tci);
569 		hw_sbd_p->vlan_tag = RGE_BSWAP_32(tci);
570 		hw_sbd_p->vlan_tag |= RGE_BSWAP_32(SBD_VLAN_PKT);
571 	} else {
572 		hw_sbd_p->vlan_tag = 0;
573 	}
574 
575 	/*
576 	 * h/w checksum offload flags
577 	 */
578 	hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags);
579 	if (pflags & HCK_FULLCKSUM) {
580 		ASSERT(totlen >= sizeof (struct ether_header) +
581 		    sizeof (struct ip));
582 		ethhdr = (struct ether_header *)(DMA_VPTR(ssbdp->pbuf));
583 		/*
584 		 * Is the packet an IP(v4) packet?
585 		 */
586 		if (ntohs(ethhdr->ether_type) == ETHERTYPE_IP) {
587 			ip_hdr = (struct ip *)
588 			    ((uint8_t *)DMA_VPTR(ssbdp->pbuf) +
589 			    sizeof (struct ether_header));
590 			if (ip_hdr->ip_p == IPPROTO_TCP)
591 				hw_sbd_p->flags_len |=
592 				    RGE_BSWAP_32(SBD_FLAG_TCP_CKSUM);
593 			else if (ip_hdr->ip_p == IPPROTO_UDP)
594 				hw_sbd_p->flags_len |=
595 				    RGE_BSWAP_32(SBD_FLAG_UDP_CKSUM);
596 		}
597 	}
598 	if (pflags & HCK_IPV4_HDRCKSUM)
599 		hw_sbd_p->flags_len |= RGE_BSWAP_32(SBD_FLAG_IP_CKSUM);
600 
601 	HW_SBD_SET(hw_sbd_p, slot);
602 
603 	/*
604 	 * We're done.
605 	 * The message can be freed right away, as we've already
606 	 * copied the contents ...
607 	 */
608 	freemsg(mp);
609 }
610 
611 static boolean_t
612 rge_send(rge_t *rgep, mblk_t *mp)
613 {
614 	struct ether_vlan_header *ehp;
615 	uint16_t tci;
616 	rge_hw_stats_t *bstp;
617 	uint8_t counter;
618 
619 	ASSERT(mp->b_next == NULL);
620 
621 	/*
622 	 * Try to reserve a place in the transmit ring.
623 	 */
624 	if (!rge_atomic_reserve(&rgep->tx_free, 1)) {
625 		RGE_DEBUG(("rge_send: no free slots"));
626 		rgep->stats.defer++;
627 		rgep->resched_needed = B_TRUE;
628 		(void) ddi_intr_trigger_softint(rgep->resched_hdl, NULL);
629 		return (B_FALSE);
630 	}
631 
632 	/*
633 	 * Determine if the packet is VLAN tagged.
634 	 */
635 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
636 	tci = 0;
637 	ehp = (struct ether_vlan_header *)mp->b_rptr;
638 	if (ehp->ether_tpid == htons(ETHERTYPE_VLAN))
639 		tci = ntohs(ehp->ether_tci);
640 
641 	/*
642 	 * We've reserved a place :-)
643 	 * These ASSERTions check that our invariants still hold:
644 	 *	there must still be at least one free place
645 	 *	there must be at least one place NOT free (ours!)
646 	 */
647 	ASSERT(rgep->tx_free < RGE_SEND_SLOTS);
648 	rge_send_copy(rgep, mp, tci);
649 
650 	/*
651 	 * Trigger chip h/w transmit ...
652 	 */
653 	mutex_enter(rgep->tx_lock);
654 	if (--rgep->tx_flow == 0) {
655 		DMA_SYNC(rgep->tx_desc, DDI_DMA_SYNC_FORDEV);
656 		rge_tx_trigger(rgep);
657 		rgep->stats.opackets ++;
658 		if (rgep->tx_free < RGE_SEND_SLOTS/2)
659 			rge_send_recycle(rgep);
660 		rgep->tc_tail = rgep->tx_next;
661 
662 		/*
663 		 * It's observed that in current Realtek PCI-E chips, tx
664 		 * request of the second fragment for upper layer packets
665 		 * will be ignored if the hardware transmission is in
666 		 * progress and will not be processed when the tx engine
667 		 * is idle. So one solution is to re-issue the requests
668 		 * if the hardware and the software tx packets statistics
669 		 * are inconsistent.
670 		 */
671 		if (rgep->chipid.is_pcie && rgep->stats.tx_pre_ismax) {
672 			for (counter = 0; counter < 10; counter ++) {
673 				mutex_enter(rgep->genlock);
674 				rge_hw_stats_dump(rgep);
675 				mutex_exit(rgep->genlock);
676 				bstp = rgep->hw_stats;
677 				if (rgep->stats.opackets
678 				    != RGE_BSWAP_64(bstp->rcv_ok))
679 					rge_tx_trigger(rgep);
680 				else
681 					break;
682 			}
683 		}
684 	}
685 	mutex_exit(rgep->tx_lock);
686 
687 	return (B_TRUE);
688 }
689 
690 uint_t
691 rge_reschedule(caddr_t arg1, caddr_t arg2)
692 {
693 	rge_t *rgep;
694 
695 	rgep = (rge_t *)arg1;
696 	_NOTE(ARGUNUSED(arg2))
697 
698 	rge_send_recycle(rgep);
699 
700 	return (DDI_INTR_CLAIMED);
701 }
702 
703 /*
704  * rge_m_tx() - send a chain of packets
705  */
706 mblk_t *
707 rge_m_tx(void *arg, mblk_t *mp)
708 {
709 	rge_t *rgep = arg;		/* private device info	*/
710 	mblk_t *next;
711 
712 	ASSERT(mp != NULL);
713 	ASSERT(rgep->rge_mac_state == RGE_MAC_STARTED);
714 
715 	if (rgep->rge_chip_state != RGE_CHIP_RUNNING) {
716 		RGE_DEBUG(("rge_m_tx: chip not running"));
717 		return (mp);
718 	}
719 
720 	rw_enter(rgep->errlock, RW_READER);
721 	while (mp != NULL) {
722 		next = mp->b_next;
723 		mp->b_next = NULL;
724 
725 		if (!rge_send(rgep, mp)) {
726 			mp->b_next = next;
727 			break;
728 		}
729 
730 		mp = next;
731 	}
732 	rw_exit(rgep->errlock);
733 
734 	return (mp);
735 }
736