xref: /titanic_51/usr/src/uts/common/io/rge/rge_rxtx.c (revision 33f2fefd46350ca5992567761c46a5b70f864340)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include "rge.h"
27 
28 #define	U32TOPTR(x)	((void *)(uintptr_t)(uint32_t)(x))
29 #define	PTRTOU32(x)	((uint32_t)(uintptr_t)(void *)(x))
30 
31 /*
32  * ========== RX side routines ==========
33  */
34 
35 #define	RGE_DBG		RGE_DBG_RECV	/* debug flag for this code	*/
36 
37 static uint32_t rge_atomic_reserve(uint32_t *count_p, uint32_t n);
38 #pragma	inline(rge_atomic_reserve)
39 
40 static uint32_t
41 rge_atomic_reserve(uint32_t *count_p, uint32_t n)
42 {
43 	uint32_t oldval;
44 	uint32_t newval;
45 
46 	/* ATOMICALLY */
47 	do {
48 		oldval = *count_p;
49 		newval = oldval - n;
50 		if (oldval <= n)
51 			return (0);		/* no resources left	*/
52 	} while (cas32(count_p, oldval, newval) != oldval);
53 
54 	return (newval);
55 }
56 
57 /*
58  * Atomically increment a counter
59  */
60 static void rge_atomic_renounce(uint32_t *count_p, uint32_t n);
61 #pragma	inline(rge_atomic_renounce)
62 
63 static void
64 rge_atomic_renounce(uint32_t *count_p, uint32_t n)
65 {
66 	uint32_t oldval;
67 	uint32_t newval;
68 
69 	/* ATOMICALLY */
70 	do {
71 		oldval = *count_p;
72 		newval = oldval + n;
73 	} while (cas32(count_p, oldval, newval) != oldval);
74 }
75 
76 /*
77  * Callback code invoked from STREAMs when the recv data buffer is free
78  * for recycling.
79  */
80 void
81 rge_rx_recycle(caddr_t arg)
82 {
83 	rge_t *rgep;
84 	dma_buf_t *rx_buf;
85 	sw_rbd_t *free_srbdp;
86 	uint32_t slot_recy;
87 
88 	rx_buf = (dma_buf_t *)arg;
89 	rgep = (rge_t *)rx_buf->private;
90 
91 	/*
92 	 * In rge_unattach() and rge_attach(), this callback function will
93 	 * also be called to free mp in rge_fini_rings() and rge_init_rings().
94 	 * In such situation, we shouldn't do below desballoc(), otherwise,
95 	 * there'll be memory leak.
96 	 */
97 	if (rgep->rge_mac_state == RGE_MAC_UNATTACH ||
98 	    rgep->rge_mac_state == RGE_MAC_ATTACH)
99 		return;
100 
101 	/*
102 	 * Recycle the data buffer again
103 	 * and fill them in free ring
104 	 */
105 	rx_buf->mp = desballoc(DMA_VPTR(rx_buf->pbuf),
106 	    rgep->rxbuf_size, 0, &rx_buf->rx_recycle);
107 	if (rx_buf->mp == NULL) {
108 		rge_problem(rgep, "rge_rx_recycle: desballoc() failed");
109 		return;
110 	}
111 	mutex_enter(rgep->rc_lock);
112 	slot_recy = rgep->rc_next;
113 	free_srbdp = &rgep->free_srbds[slot_recy];
114 
115 	ASSERT(free_srbdp->rx_buf == NULL);
116 	free_srbdp->rx_buf = rx_buf;
117 	rgep->rc_next = NEXT(slot_recy, RGE_BUF_SLOTS);
118 	rge_atomic_renounce(&rgep->rx_free, 1);
119 	if (rgep->rx_bcopy && rgep->rx_free == RGE_BUF_SLOTS)
120 		rgep->rx_bcopy = B_FALSE;
121 	ASSERT(rgep->rx_free <= RGE_BUF_SLOTS);
122 
123 	mutex_exit(rgep->rc_lock);
124 }
125 
126 static int rge_rx_refill(rge_t *rgep, uint32_t slot);
127 #pragma	inline(rge_rx_refill)
128 
129 static int
130 rge_rx_refill(rge_t *rgep, uint32_t slot)
131 {
132 	dma_buf_t *free_buf;
133 	rge_bd_t *hw_rbd_p;
134 	sw_rbd_t *srbdp;
135 	uint32_t free_slot;
136 
137 	srbdp = &rgep->sw_rbds[slot];
138 	hw_rbd_p = &rgep->rx_ring[slot];
139 	free_slot = rgep->rf_next;
140 	free_buf = rgep->free_srbds[free_slot].rx_buf;
141 	if (free_buf != NULL) {
142 		srbdp->rx_buf = free_buf;
143 		rgep->free_srbds[free_slot].rx_buf = NULL;
144 		hw_rbd_p->host_buf_addr = RGE_BSWAP_32(rgep->head_room +
145 		    + free_buf->pbuf.cookie.dmac_laddress);
146 		hw_rbd_p->host_buf_addr_hi =
147 		    RGE_BSWAP_32(free_buf->pbuf.cookie.dmac_laddress >> 32);
148 		rgep->rf_next = NEXT(free_slot, RGE_BUF_SLOTS);
149 		return (1);
150 	} else {
151 		/*
152 		 * This situation shouldn't happen
153 		 */
154 		rge_problem(rgep, "rge_rx_refill: free buffer %d is NULL",
155 		    free_slot);
156 		rgep->rx_bcopy = B_TRUE;
157 		return (0);
158 	}
159 }
160 
161 static mblk_t *rge_receive_packet(rge_t *rgep, uint32_t slot);
162 #pragma	inline(rge_receive_packet)
163 
164 static mblk_t *
165 rge_receive_packet(rge_t *rgep, uint32_t slot)
166 {
167 	rge_bd_t *hw_rbd_p;
168 	sw_rbd_t *srbdp;
169 	uchar_t *dp;
170 	mblk_t *mp;
171 	uint8_t *rx_ptr;
172 	uint32_t rx_status;
173 	uint_t packet_len;
174 	uint_t minsize;
175 	uint_t maxsize;
176 	uint32_t proto;
177 	uint32_t pflags;
178 	struct ether_vlan_header *ehp;
179 	uint16_t vtag = 0;
180 
181 	hw_rbd_p = &rgep->rx_ring[slot];
182 	srbdp = &rgep->sw_rbds[slot];
183 
184 	/*
185 	 * Read receive status
186 	 */
187 	rx_status = RGE_BSWAP_32(hw_rbd_p->flags_len) & RBD_FLAGS_MASK;
188 
189 	/*
190 	 * Handle error packet
191 	 */
192 	if (!(rx_status & BD_FLAG_PKT_END)) {
193 		RGE_DEBUG(("rge_receive_packet: not a complete packat"));
194 		return (NULL);
195 	}
196 	if (rx_status & RBD_FLAG_ERROR) {
197 		if (rx_status & RBD_FLAG_CRC_ERR)
198 			rgep->stats.crc_err++;
199 		if (rx_status & RBD_FLAG_RUNT)
200 			rgep->stats.in_short++;
201 		/*
202 		 * Set chip_error flag to reset chip:
203 		 * (suggested in Realtek programming guide.)
204 		 */
205 		RGE_DEBUG(("rge_receive_packet: error packet, status = %x",
206 		    rx_status));
207 		mutex_enter(rgep->genlock);
208 		rgep->rge_chip_state = RGE_CHIP_ERROR;
209 		mutex_exit(rgep->genlock);
210 		return (NULL);
211 	}
212 
213 	/*
214 	 * Handle size error packet
215 	 */
216 	packet_len = RGE_BSWAP_32(hw_rbd_p->flags_len) & RBD_LEN_MASK;
217 	packet_len -= ETHERFCSL;
218 	minsize = ETHERMIN;
219 	pflags = RGE_BSWAP_32(hw_rbd_p->vlan_tag);
220 	if (pflags & RBD_VLAN_PKT)
221 		minsize -= VLAN_TAGSZ;
222 	maxsize = rgep->ethmax_size;
223 	if (packet_len < minsize || packet_len > maxsize) {
224 		RGE_DEBUG(("rge_receive_packet: len err = %d", packet_len));
225 		return (NULL);
226 	}
227 
228 	DMA_SYNC(srbdp->rx_buf->pbuf, DDI_DMA_SYNC_FORKERNEL);
229 	if (rgep->rx_bcopy || packet_len <= RGE_RECV_COPY_SIZE ||
230 	    !rge_atomic_reserve(&rgep->rx_free, 1)) {
231 		/*
232 		 * Allocate buffer to receive this good packet
233 		 */
234 		mp = allocb(packet_len + RGE_HEADROOM, 0);
235 		if (mp == NULL) {
236 			RGE_DEBUG(("rge_receive_packet: allocate buffer fail"));
237 			rgep->stats.no_rcvbuf++;
238 			return (NULL);
239 		}
240 
241 		/*
242 		 * Copy the data found into the new cluster
243 		 */
244 		rx_ptr = DMA_VPTR(srbdp->rx_buf->pbuf);
245 		mp->b_rptr = dp = mp->b_rptr + RGE_HEADROOM;
246 		bcopy(rx_ptr + rgep->head_room, dp, packet_len);
247 		mp->b_wptr = dp + packet_len;
248 	} else {
249 		mp = srbdp->rx_buf->mp;
250 		mp->b_rptr += rgep->head_room;
251 		mp->b_wptr = mp->b_rptr + packet_len;
252 		mp->b_next = mp->b_cont = NULL;
253 		/*
254 		 * Refill the current receive bd buffer
255 		 *   if fails, will just keep the mp.
256 		 */
257 		if (!rge_rx_refill(rgep, slot))
258 			return (NULL);
259 	}
260 	rgep->stats.rbytes += packet_len;
261 	rgep->stats.rpackets ++;
262 
263 	/*
264 	 * VLAN packet ?
265 	 */
266 	if (pflags & RBD_VLAN_PKT)
267 		vtag = pflags & RBD_VLAN_TAG;
268 	if (vtag) {
269 		vtag = TCI_CHIP2OS(vtag);
270 		/*
271 		 * As h/w strips the VLAN tag from incoming packet, we need
272 		 * insert VLAN tag into this packet before send up here.
273 		 */
274 		(void) memmove(mp->b_rptr - VLAN_TAGSZ, mp->b_rptr,
275 		    2 * ETHERADDRL);
276 		mp->b_rptr -= VLAN_TAGSZ;
277 		ehp = (struct ether_vlan_header *)mp->b_rptr;
278 		ehp->ether_tpid = htons(ETHERTYPE_VLAN);
279 		ehp->ether_tci = htons(vtag);
280 		rgep->stats.rbytes += VLAN_TAGSZ;
281 	}
282 
283 	/*
284 	 * Check h/w checksum offload status
285 	 */
286 	pflags = 0;
287 	proto = rx_status & RBD_FLAG_PROTOCOL;
288 	if ((proto == RBD_FLAG_TCP && !(rx_status & RBD_TCP_CKSUM_ERR)) ||
289 	    (proto == RBD_FLAG_UDP && !(rx_status & RBD_UDP_CKSUM_ERR)))
290 		pflags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
291 	if (proto != RBD_FLAG_NONE_IP && !(rx_status & RBD_IP_CKSUM_ERR))
292 		pflags |= HCK_IPV4_HDRCKSUM;
293 	if (pflags != 0)  {
294 		(void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, pflags, 0);
295 	}
296 
297 	return (mp);
298 }
299 
300 /*
301  * Accept the packets received in rx ring.
302  *
303  * Returns a chain of mblks containing the received data, to be
304  * passed up to mac_rx().
305  * The routine returns only when a complete scan has been performed
306  * without finding any packets to receive.
307  * This function must SET the OWN bit of BD to indicate the packets
308  * it has accepted from the ring.
309  */
310 static mblk_t *rge_receive_ring(rge_t *rgep);
311 #pragma	inline(rge_receive_ring)
312 
313 static mblk_t *
314 rge_receive_ring(rge_t *rgep)
315 {
316 	rge_bd_t *hw_rbd_p;
317 	mblk_t *head;
318 	mblk_t **tail;
319 	mblk_t *mp;
320 	uint32_t slot;
321 
322 	ASSERT(mutex_owned(rgep->rx_lock));
323 
324 	/*
325 	 * Sync (all) the receive ring descriptors
326 	 * before accepting the packets they describe
327 	 */
328 	DMA_SYNC(rgep->rx_desc, DDI_DMA_SYNC_FORKERNEL);
329 	slot = rgep->rx_next;
330 	hw_rbd_p = &rgep->rx_ring[slot];
331 	head = NULL;
332 	tail = &head;
333 
334 	while (!(hw_rbd_p->flags_len & RGE_BSWAP_32(BD_FLAG_HW_OWN))) {
335 		if ((mp = rge_receive_packet(rgep, slot)) != NULL) {
336 			*tail = mp;
337 			tail = &mp->b_next;
338 		}
339 
340 		/*
341 		 * Clear RBD flags
342 		 */
343 		hw_rbd_p->flags_len =
344 		    RGE_BSWAP_32(rgep->rxbuf_size - rgep->head_room);
345 		HW_RBD_INIT(hw_rbd_p, slot);
346 		slot = NEXT(slot, RGE_RECV_SLOTS);
347 		hw_rbd_p = &rgep->rx_ring[slot];
348 	}
349 
350 	rgep->rx_next = slot;
351 	return (head);
352 }
353 
354 /*
355  * Receive all ready packets.
356  */
357 void rge_receive(rge_t *rgep);
358 #pragma	no_inline(rge_receive)
359 
360 void
361 rge_receive(rge_t *rgep)
362 {
363 	mblk_t *mp;
364 
365 	mutex_enter(rgep->rx_lock);
366 	mp = rge_receive_ring(rgep);
367 	mutex_exit(rgep->rx_lock);
368 
369 	if (mp != NULL)
370 		mac_rx(rgep->mh, NULL, mp);
371 }
372 
373 
374 #undef	RGE_DBG
375 #define	RGE_DBG		RGE_DBG_SEND	/* debug flag for this code	*/
376 
377 
378 /*
379  * ========== Send-side recycle routines ==========
380  */
381 static uint32_t rge_send_claim(rge_t *rgep);
382 #pragma	inline(rge_send_claim)
383 
384 static uint32_t
385 rge_send_claim(rge_t *rgep)
386 {
387 	uint32_t slot;
388 	uint32_t next;
389 
390 	mutex_enter(rgep->tx_lock);
391 	slot = rgep->tx_next;
392 	next = NEXT(slot, RGE_SEND_SLOTS);
393 	rgep->tx_next = next;
394 	rgep->tx_flow++;
395 	mutex_exit(rgep->tx_lock);
396 
397 	/*
398 	 * We check that our invariants still hold:
399 	 * +	the slot and next indexes are in range
400 	 * +	the slot must not be the last one (i.e. the *next*
401 	 *	index must not match the next-recycle index), 'cos
402 	 *	there must always be at least one free slot in a ring
403 	 */
404 	ASSERT(slot < RGE_SEND_SLOTS);
405 	ASSERT(next < RGE_SEND_SLOTS);
406 	ASSERT(next != rgep->tc_next);
407 
408 	return (slot);
409 }
410 
411 /*
412  * We don't want to call this function every time after a successful
413  * h/w transmit done in ISR.  Instead, we call this function in the
414  * rge_send() when there're few or no free tx BDs remained.
415  */
416 static void rge_send_recycle(rge_t *rgep);
417 #pragma	inline(rge_send_recycle)
418 
419 static void
420 rge_send_recycle(rge_t *rgep)
421 {
422 	rge_bd_t *hw_sbd_p;
423 	uint32_t tc_tail;
424 	uint32_t tc_head;
425 	uint32_t n;
426 
427 	mutex_enter(rgep->tc_lock);
428 	tc_head = rgep->tc_next;
429 	tc_tail = rgep->tc_tail;
430 	if (tc_head == tc_tail)
431 		goto resched;
432 
433 	do {
434 		tc_tail = LAST(tc_tail, RGE_SEND_SLOTS);
435 		hw_sbd_p = &rgep->tx_ring[tc_tail];
436 		if (tc_tail == tc_head) {
437 			if (hw_sbd_p->flags_len &
438 			    RGE_BSWAP_32(BD_FLAG_HW_OWN)) {
439 				/*
440 				 * Recyled nothing: bump the watchdog counter,
441 				 * thus guaranteeing that it's nonzero
442 				 * (watchdog activated).
443 				 */
444 				rgep->watchdog += 1;
445 				mutex_exit(rgep->tc_lock);
446 				return;
447 			}
448 			break;
449 		}
450 	} while (hw_sbd_p->flags_len & RGE_BSWAP_32(BD_FLAG_HW_OWN));
451 
452 	/*
453 	 * Recyled something :-)
454 	 */
455 	rgep->tc_next = NEXT(tc_tail, RGE_SEND_SLOTS);
456 	n = rgep->tc_next - tc_head;
457 	if (rgep->tc_next < tc_head)
458 		n += RGE_SEND_SLOTS;
459 	rge_atomic_renounce(&rgep->tx_free, n);
460 	rgep->watchdog = 0;
461 	ASSERT(rgep->tx_free <= RGE_SEND_SLOTS);
462 
463 resched:
464 	mutex_exit(rgep->tc_lock);
465 	if (rgep->resched_needed &&
466 	    rgep->rge_mac_state == RGE_MAC_STARTED) {
467 		rgep->resched_needed = B_FALSE;
468 		mac_tx_update(rgep->mh);
469 	}
470 }
471 
472 /*
473  * Send a message by copying it into a preallocated (and premapped) buffer
474  */
475 static void rge_send_copy(rge_t *rgep, mblk_t *mp, uint16_t tci);
476 #pragma	inline(rge_send_copy)
477 
478 static void
479 rge_send_copy(rge_t *rgep, mblk_t *mp, uint16_t tci)
480 {
481 	rge_bd_t *hw_sbd_p;
482 	sw_sbd_t *ssbdp;
483 	mblk_t *bp;
484 	char *txb;
485 	uint32_t slot;
486 	size_t totlen;
487 	size_t mblen;
488 	uint32_t pflags;
489 	struct ether_header *ethhdr;
490 	struct ip *ip_hdr;
491 
492 	/*
493 	 * IMPORTANT:
494 	 *	Up to the point where it claims a place, a send_msg()
495 	 *	routine can indicate failure by returning B_FALSE.  Once it's
496 	 *	claimed a place, it mustn't fail.
497 	 *
498 	 * In this version, there's no setup to be done here, and there's
499 	 * nothing that can fail, so we can go straight to claiming our
500 	 * already-reserved place on the train.
501 	 *
502 	 * This is the point of no return!
503 	 */
504 	slot = rge_send_claim(rgep);
505 	ssbdp = &rgep->sw_sbds[slot];
506 
507 	/*
508 	 * Copy the data into a pre-mapped buffer, which avoids the
509 	 * overhead (and complication) of mapping/unmapping STREAMS
510 	 * buffers and keeping hold of them until the DMA has completed.
511 	 *
512 	 * Because all buffers are the same size, and larger than the
513 	 * longest single valid message, we don't have to bother about
514 	 * splitting the message across multiple buffers either.
515 	 */
516 	txb = DMA_VPTR(ssbdp->pbuf);
517 	totlen = 0;
518 	bp = mp;
519 	if (tci != 0) {
520 		/*
521 		 * Do not copy the vlan tag
522 		 */
523 		bcopy(bp->b_rptr, txb, 2 * ETHERADDRL);
524 		txb += 2 * ETHERADDRL;
525 		totlen += 2 * ETHERADDRL;
526 		mblen = MBLKL(bp);
527 		ASSERT(mblen >= 2 * ETHERADDRL + VLAN_TAGSZ);
528 		mblen -= 2 * ETHERADDRL + VLAN_TAGSZ;
529 		if ((totlen += mblen) <= rgep->ethmax_size) {
530 			bcopy(bp->b_rptr + 2 * ETHERADDRL + VLAN_TAGSZ,
531 			    txb, mblen);
532 			txb += mblen;
533 		}
534 		bp = bp->b_cont;
535 		rgep->stats.obytes += VLAN_TAGSZ;
536 	}
537 	for (; bp != NULL; bp = bp->b_cont) {
538 		mblen = MBLKL(bp);
539 		if ((totlen += mblen) <= rgep->ethmax_size) {
540 			bcopy(bp->b_rptr, txb, mblen);
541 			txb += mblen;
542 		}
543 	}
544 	rgep->stats.obytes += totlen;
545 	rgep->stats.tx_pre_ismax = rgep->stats.tx_cur_ismax;
546 	if (totlen == rgep->ethmax_size)
547 		rgep->stats.tx_cur_ismax = B_TRUE;
548 	else
549 		rgep->stats.tx_cur_ismax = B_FALSE;
550 
551 	/*
552 	 * We'e reached the end of the chain; and we should have
553 	 * collected no more than ETHERMAX bytes into our buffer.
554 	 */
555 	ASSERT(bp == NULL);
556 	ASSERT(totlen <= rgep->ethmax_size);
557 	DMA_SYNC(ssbdp->pbuf, DDI_DMA_SYNC_FORDEV);
558 
559 	/*
560 	 * Update the hardware send buffer descriptor flags
561 	 */
562 	hw_sbd_p = &rgep->tx_ring[slot];
563 	ASSERT(hw_sbd_p == ssbdp->desc.mem_va);
564 	hw_sbd_p->flags_len = RGE_BSWAP_32(totlen & SBD_LEN_MASK);
565 	if (tci != 0) {
566 		tci = TCI_OS2CHIP(tci);
567 		hw_sbd_p->vlan_tag = RGE_BSWAP_32(tci);
568 		hw_sbd_p->vlan_tag |= RGE_BSWAP_32(SBD_VLAN_PKT);
569 	} else {
570 		hw_sbd_p->vlan_tag = 0;
571 	}
572 
573 	/*
574 	 * h/w checksum offload flags
575 	 */
576 	hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, &pflags);
577 	if (pflags & HCK_FULLCKSUM) {
578 		ASSERT(totlen >= sizeof (struct ether_header) +
579 		    sizeof (struct ip));
580 		ethhdr = (struct ether_header *)(DMA_VPTR(ssbdp->pbuf));
581 		/*
582 		 * Is the packet an IP(v4) packet?
583 		 */
584 		if (ntohs(ethhdr->ether_type) == ETHERTYPE_IP) {
585 			ip_hdr = (struct ip *)
586 			    ((uint8_t *)DMA_VPTR(ssbdp->pbuf) +
587 			    sizeof (struct ether_header));
588 			if (ip_hdr->ip_p == IPPROTO_TCP)
589 				hw_sbd_p->flags_len |=
590 				    RGE_BSWAP_32(SBD_FLAG_TCP_CKSUM);
591 			else if (ip_hdr->ip_p == IPPROTO_UDP)
592 				hw_sbd_p->flags_len |=
593 				    RGE_BSWAP_32(SBD_FLAG_UDP_CKSUM);
594 		}
595 	}
596 	if (pflags & HCK_IPV4_HDRCKSUM)
597 		hw_sbd_p->flags_len |= RGE_BSWAP_32(SBD_FLAG_IP_CKSUM);
598 
599 	HW_SBD_SET(hw_sbd_p, slot);
600 
601 	/*
602 	 * We're done.
603 	 * The message can be freed right away, as we've already
604 	 * copied the contents ...
605 	 */
606 	freemsg(mp);
607 }
608 
609 static boolean_t
610 rge_send(rge_t *rgep, mblk_t *mp)
611 {
612 	struct ether_vlan_header *ehp;
613 	uint16_t tci;
614 	rge_hw_stats_t *bstp;
615 	uint8_t counter;
616 
617 	ASSERT(mp->b_next == NULL);
618 
619 	/*
620 	 * Try to reserve a place in the transmit ring.
621 	 */
622 	if (!rge_atomic_reserve(&rgep->tx_free, 1)) {
623 		RGE_DEBUG(("rge_send: no free slots"));
624 		rgep->stats.defer++;
625 		rgep->resched_needed = B_TRUE;
626 		(void) ddi_intr_trigger_softint(rgep->resched_hdl, NULL);
627 		return (B_FALSE);
628 	}
629 
630 	/*
631 	 * Determine if the packet is VLAN tagged.
632 	 */
633 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
634 	tci = 0;
635 	ehp = (struct ether_vlan_header *)mp->b_rptr;
636 	if (ehp->ether_tpid == htons(ETHERTYPE_VLAN))
637 		tci = ntohs(ehp->ether_tci);
638 
639 	/*
640 	 * We've reserved a place :-)
641 	 * These ASSERTions check that our invariants still hold:
642 	 *	there must still be at least one free place
643 	 *	there must be at least one place NOT free (ours!)
644 	 */
645 	ASSERT(rgep->tx_free < RGE_SEND_SLOTS);
646 	rge_send_copy(rgep, mp, tci);
647 
648 	/*
649 	 * Trigger chip h/w transmit ...
650 	 */
651 	mutex_enter(rgep->tx_lock);
652 	if (--rgep->tx_flow == 0) {
653 		DMA_SYNC(rgep->tx_desc, DDI_DMA_SYNC_FORDEV);
654 		rge_tx_trigger(rgep);
655 		rgep->stats.opackets ++;
656 		if (rgep->tx_free < RGE_SEND_SLOTS/2)
657 			rge_send_recycle(rgep);
658 		rgep->tc_tail = rgep->tx_next;
659 
660 		/*
661 		 * It's observed that in current Realtek PCI-E chips, tx
662 		 * request of the second fragment for upper layer packets
663 		 * will be ignored if the hardware transmission is in
664 		 * progress and will not be processed when the tx engine
665 		 * is idle. So one solution is to re-issue the requests
666 		 * if the hardware and the software tx packets statistics
667 		 * are inconsistent.
668 		 */
669 		if (rgep->chipid.is_pcie && rgep->stats.tx_pre_ismax) {
670 			for (counter = 0; counter < 10; counter ++) {
671 				mutex_enter(rgep->genlock);
672 				rge_hw_stats_dump(rgep);
673 				mutex_exit(rgep->genlock);
674 				bstp = rgep->hw_stats;
675 				if (rgep->stats.opackets
676 				    != RGE_BSWAP_64(bstp->rcv_ok))
677 					rge_tx_trigger(rgep);
678 				else
679 					break;
680 			}
681 		}
682 	}
683 	mutex_exit(rgep->tx_lock);
684 
685 	return (B_TRUE);
686 }
687 
688 uint_t
689 rge_reschedule(caddr_t arg1, caddr_t arg2)
690 {
691 	rge_t *rgep;
692 
693 	rgep = (rge_t *)arg1;
694 	_NOTE(ARGUNUSED(arg2))
695 
696 	rge_send_recycle(rgep);
697 
698 	return (DDI_INTR_CLAIMED);
699 }
700 
701 /*
702  * rge_m_tx() - send a chain of packets
703  */
704 mblk_t *
705 rge_m_tx(void *arg, mblk_t *mp)
706 {
707 	rge_t *rgep = arg;		/* private device info	*/
708 	mblk_t *next;
709 
710 	ASSERT(mp != NULL);
711 
712 	rw_enter(rgep->errlock, RW_READER);
713 	if ((rgep->rge_mac_state != RGE_MAC_STARTED) ||
714 	    (rgep->rge_chip_state != RGE_CHIP_RUNNING)) {
715 		RGE_DEBUG(("rge_m_tx: tx doesn't work"));
716 		rw_exit(rgep->errlock);
717 		return (mp);
718 	}
719 
720 	while (mp != NULL) {
721 		next = mp->b_next;
722 		mp->b_next = NULL;
723 
724 		if (!rge_send(rgep, mp)) {
725 			mp->b_next = next;
726 			break;
727 		}
728 
729 		mp = next;
730 	}
731 	rw_exit(rgep->errlock);
732 
733 	return (mp);
734 }
735