xref: /titanic_44/usr/src/uts/common/io/bge/bge_recv2.c (revision 20e6d5c536ad5b300e7fafb6a92e13040f492977)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "bge_impl.h"
30 
31 #define	U32TOPTR(x)	((void *)(uintptr_t)(uint32_t)(x))
32 #define	PTRTOU32(x)	((uint32_t)(uintptr_t)(void *)(x))
33 
34 /*
35  * ========== RX side routines ==========
36  */
37 
38 #define	BGE_DBG		BGE_DBG_RECV	/* debug flag for this code	*/
39 
40 static void bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp);
41 #pragma	inline(bge_refill)
42 
43 /*
44  * Return the specified buffer (srbdp) to the ring it came from (brp).
45  *
46  * Note:
47  *	If the driver is compiled with only one buffer ring *and* one
48  *	return ring, then the buffers must be returned in sequence.
49  *	In this case, we don't have to consider anything about the
50  *	buffer at all; we can simply advance the cyclic counter.  And
51  *	we don't even need the refill mutex <rf_lock>, as the caller
52  *	will already be holding the (one-and-only) <rx_lock>.
53  *
54  *	If the driver supports multiple buffer rings, but only one
55  *	return ring, the same still applies (to each buffer ring
56  *	separately).
57  */
58 static void
59 bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp)
60 {
61 	uint64_t slot;
62 
63 	_NOTE(ARGUNUSED(srbdp))
64 
65 	slot = brp->rf_next;
66 	brp->rf_next = NEXT(slot, brp->desc.nslots);
67 	bge_mbx_put(bgep, brp->chip_mbx_reg, slot);
68 }
69 
70 static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p);
71 #pragma	inline(bge_receive_packet)
72 
73 static mblk_t *
74 bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p)
75 {
76 	bge_rbd_t hw_rbd;
77 	buff_ring_t *brp;
78 	sw_rbd_t *srbdp;
79 	uchar_t *dp;
80 	mblk_t *mp;
81 	uint_t len;
82 	uint_t minsize;
83 	uint_t maxsize;
84 	uint32_t pflags;
85 
86 	mp = NULL;
87 	hw_rbd = *hw_rbd_p;
88 
89 	switch (hw_rbd.flags & (RBD_FLAG_MINI_RING|RBD_FLAG_JUMBO_RING)) {
90 	case RBD_FLAG_MINI_RING|RBD_FLAG_JUMBO_RING:
91 	default:
92 		/* error, this shouldn't happen */
93 		BGE_PKTDUMP((bgep, &hw_rbd, NULL, "bad ring flags!"));
94 		goto error;
95 
96 	case RBD_FLAG_JUMBO_RING:
97 		brp = &bgep->buff[BGE_JUMBO_BUFF_RING];
98 		break;
99 
100 #if	(BGE_BUFF_RINGS_USED > 2)
101 	case RBD_FLAG_MINI_RING:
102 		brp = &bgep->buff[BGE_MINI_BUFF_RING];
103 		break;
104 #endif	/* BGE_BUFF_RINGS_USED > 2 */
105 
106 	case 0:
107 		brp = &bgep->buff[BGE_STD_BUFF_RING];
108 		break;
109 	}
110 
111 	if (hw_rbd.index >= brp->desc.nslots) {
112 		/* error, this shouldn't happen */
113 		BGE_PKTDUMP((bgep, &hw_rbd, NULL, "bad ring index!"));
114 		goto error;
115 	}
116 
117 	srbdp = &brp->sw_rbds[hw_rbd.index];
118 	if (hw_rbd.opaque != srbdp->pbuf.token) {
119 		/* bogus, drop the packet */
120 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "bad ring token"));
121 		goto refill;
122 	}
123 
124 	if ((hw_rbd.flags & RBD_FLAG_PACKET_END) == 0) {
125 		/* bogus, drop the packet */
126 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "unterminated packet"));
127 		goto refill;
128 	}
129 
130 	if (hw_rbd.flags & RBD_FLAG_FRAME_HAS_ERROR) {
131 		/* bogus, drop the packet */
132 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "errored packet"));
133 		goto refill;
134 	}
135 
136 	len = hw_rbd.len;
137 
138 #ifdef BGE_IPMI_ASF
139 	/*
140 	 * When IPMI/ASF is enabled, VLAN tag must be stripped.
141 	 */
142 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG))
143 		maxsize = bgep->chipid.ethmax_size + ETHERFCSL;
144 	else
145 #endif
146 		/*
147 		 * H/W will not strip the VLAN tag from incoming packet
148 		 * now, as RECEIVE_MODE_KEEP_VLAN_TAG bit is set in
149 		 * RECEIVE_MAC_MODE_REG register.
150 		 */
151 		maxsize = bgep->chipid.ethmax_size + VLAN_TAGSZ + ETHERFCSL;
152 	if (len > maxsize) {
153 		/* bogus, drop the packet */
154 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "oversize packet"));
155 		goto refill;
156 	}
157 
158 #ifdef BGE_IPMI_ASF
159 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG))
160 		minsize = ETHERMIN + ETHERFCSL - VLAN_TAGSZ;
161 	else
162 #endif
163 		minsize = ETHERMIN + ETHERFCSL;
164 	if (len < minsize) {
165 		/* bogus, drop the packet */
166 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "undersize packet"));
167 		goto refill;
168 	}
169 
170 	/*
171 	 * Packet looks good; get a buffer to copy it into.
172 	 * We want to leave some space at the front of the allocated
173 	 * buffer in case any upstream modules want to prepend some
174 	 * sort of header.  This also has the side-effect of making
175 	 * the packet *contents* 4-byte aligned, as required by NCA!
176 	 */
177 #ifdef BGE_IPMI_ASF
178 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
179 		mp = allocb(BGE_HEADROOM + len + VLAN_TAGSZ, 0);
180 	} else {
181 #endif
182 
183 		mp = allocb(BGE_HEADROOM + len, 0);
184 #ifdef BGE_IPMI_ASF
185 	}
186 #endif
187 	if (mp == NULL) {
188 		/* Nothing to do but drop the packet */
189 		goto refill;
190 	}
191 
192 	/*
193 	 * Sync the data and copy it to the STREAMS buffer.
194 	 */
195 	DMA_SYNC(srbdp->pbuf, DDI_DMA_SYNC_FORKERNEL);
196 	if (bge_check_dma_handle(bgep, srbdp->pbuf.dma_hdl) != DDI_FM_OK) {
197 		bgep->bge_dma_error = B_TRUE;
198 		bgep->bge_chip_state = BGE_CHIP_ERROR;
199 		return (NULL);
200 	}
201 #ifdef BGE_IPMI_ASF
202 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
203 		/*
204 		 * As VLAN tag has been stripped from incoming packet in ASF
205 		 * scenario, we insert it into this packet again.
206 		 */
207 		struct ether_vlan_header *ehp;
208 		mp->b_rptr = dp = mp->b_rptr + BGE_HEADROOM - VLAN_TAGSZ;
209 		bcopy(DMA_VPTR(srbdp->pbuf), dp, 2 * ETHERADDRL);
210 		ehp = (void *)dp;
211 		ehp->ether_tpid = ntohs(ETHERTYPE_VLAN);
212 		ehp->ether_tci = ntohs(hw_rbd.vlan_tci);
213 		bcopy(((uchar_t *)(DMA_VPTR(srbdp->pbuf))) + 2 * ETHERADDRL,
214 		    dp + 2 * ETHERADDRL + VLAN_TAGSZ,
215 		    len - 2 * ETHERADDRL);
216 	} else {
217 #endif
218 		mp->b_rptr = dp = mp->b_rptr + BGE_HEADROOM;
219 		bcopy(DMA_VPTR(srbdp->pbuf), dp, len);
220 #ifdef BGE_IPMI_ASF
221 	}
222 
223 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
224 		mp->b_wptr = dp + len + VLAN_TAGSZ - ETHERFCSL;
225 	} else
226 #endif
227 		mp->b_wptr = dp + len - ETHERFCSL;
228 
229 	/*
230 	 * Special check for one specific type of data corruption;
231 	 * in a good packet, the first 8 bytes are *very* unlikely
232 	 * to be the same as the second 8 bytes ... but we let the
233 	 * packet through just in case.
234 	 */
235 	if (bcmp(dp, dp+8, 8) == 0)
236 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "stuttered packet?"));
237 
238 	pflags = 0;
239 	if (hw_rbd.flags & RBD_FLAG_TCP_UDP_CHECKSUM)
240 		pflags |= HCK_FULLCKSUM;
241 	if (hw_rbd.flags & RBD_FLAG_IP_CHECKSUM)
242 		pflags |= HCK_IPV4_HDRCKSUM;
243 	if (pflags != 0)
244 		(void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0,
245 		    hw_rbd.tcp_udp_cksum, pflags, 0);
246 
247 refill:
248 	/*
249 	 * Replace the buffer in the ring it came from ...
250 	 */
251 	bge_refill(bgep, brp, srbdp);
252 	return (mp);
253 
254 error:
255 	/*
256 	 * We come here if the integrity of the ring descriptors
257 	 * (rather than merely packet data) appears corrupted.
258 	 * The factotum will attempt to reset-and-recover.
259 	 */
260 	bgep->bge_chip_state = BGE_CHIP_ERROR;
261 	bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
262 	return (NULL);
263 }
264 
265 /*
266  * Accept the packets received in the specified ring up to
267  * (but not including) the producer index in the status block.
268  *
269  * Returns a chain of mblks containing the received data, to be
270  * passed up to gld_recv() (we can't call gld_recv() from here,
271  * 'cos we're holding the per-ring receive lock at this point).
272  *
273  * This function must advance (rrp->rx_next) and write it back to
274  * the chip to indicate the packets it has accepted from the ring.
275  */
276 static mblk_t *bge_receive_ring(bge_t *bgep, recv_ring_t *rrp);
277 #pragma	inline(bge_receive_ring)
278 
279 static mblk_t *
280 bge_receive_ring(bge_t *bgep, recv_ring_t *rrp)
281 {
282 	bge_rbd_t *hw_rbd_p;
283 	uint64_t slot;
284 	mblk_t *head;
285 	mblk_t **tail;
286 	mblk_t *mp;
287 	int recv_cnt = 0;
288 
289 	ASSERT(mutex_owned(rrp->rx_lock));
290 
291 	/*
292 	 * Sync (all) the receive ring descriptors
293 	 * before accepting the packets they describe
294 	 */
295 	DMA_SYNC(rrp->desc, DDI_DMA_SYNC_FORKERNEL);
296 	if (*rrp->prod_index_p >= rrp->desc.nslots) {
297 		bgep->bge_chip_state = BGE_CHIP_ERROR;
298 		bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
299 		return (NULL);
300 	}
301 	if (bge_check_dma_handle(bgep, rrp->desc.dma_hdl) != DDI_FM_OK) {
302 		rrp->rx_next = *rrp->prod_index_p;
303 		bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
304 		bgep->bge_dma_error = B_TRUE;
305 		bgep->bge_chip_state = BGE_CHIP_ERROR;
306 		return (NULL);
307 	}
308 
309 	hw_rbd_p = DMA_VPTR(rrp->desc);
310 	head = NULL;
311 	tail = &head;
312 	slot = rrp->rx_next;
313 
314 	while ((slot != *rrp->prod_index_p) && /* Note: volatile	*/
315 	    (recv_cnt < BGE_MAXPKT_RCVED)) {
316 		if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot])) != NULL) {
317 			*tail = mp;
318 			tail = &mp->b_next;
319 			recv_cnt++;
320 		}
321 		rrp->rx_next = slot = NEXT(slot, rrp->desc.nslots);
322 	}
323 
324 	bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
325 	if (bge_check_acc_handle(bgep, bgep->io_handle) != DDI_FM_OK)
326 		bgep->bge_chip_state = BGE_CHIP_ERROR;
327 	return (head);
328 }
329 
330 /*
331  * Receive all packets in all rings.
332  *
333  * To give priority to low-numbered rings, whenever we have received any
334  * packets in any ring except 0, we restart scanning again from ring 0.
335  * Thus, for example, if rings 0, 3, and 10 are carrying traffic, the
336  * pattern of receives might go 0, 3, 10, 3, 0, 10, 0:
337  *
338  *	0	found some - receive them
339  *	1..2					none found
340  *	3	found some - receive them	and restart scan
341  *	0..9					none found
342  *	10	found some - receive them	and restart scan
343  *	0..2					none found
344  *	3	found some more - receive them	and restart scan
345  *	0	found some more - receive them
346  *	1..9					none found
347  *	10	found some more - receive them	and restart scan
348  *	0	found some more - receive them
349  *	1..15					none found
350  *
351  * The routine returns only when a complete scan has been performed either
352  * without finding any packets to receive or BGE_MAXPKT_RCVED packets were
353  * received from ring 0 and other rings (if used) are empty.
354  *
355  * Note that driver-defined locks may *NOT* be held across calls
356  * to gld_recv().
357  *
358  * Note: the expression (BGE_RECV_RINGS_USED > 1), yields a compile-time
359  * constant and allows the compiler to optimise away the outer do-loop
360  * if only one receive ring is being used.
361  */
362 void bge_receive(bge_t *bgep, bge_status_t *bsp);
363 #pragma	no_inline(bge_receive)
364 
365 void
366 bge_receive(bge_t *bgep, bge_status_t *bsp)
367 {
368 	recv_ring_t *rrp;
369 	uint64_t ring;
370 	uint64_t rx_rings = bgep->chipid.rx_rings;
371 	mblk_t *mp;
372 
373 restart:
374 	ring = 0;
375 	rrp = &bgep->recv[ring];
376 	do {
377 		/*
378 		 * For each ring, (rrp->prod_index_p) points to the
379 		 * proper index within the status block (which has
380 		 * already been sync'd by the caller)
381 		 */
382 		ASSERT(rrp->prod_index_p == RECV_INDEX_P(bsp, ring));
383 
384 		if (*rrp->prod_index_p == rrp->rx_next)
385 			continue;		/* no packets		*/
386 		if (mutex_tryenter(rrp->rx_lock) == 0)
387 			continue;		/* already in process	*/
388 		mp = bge_receive_ring(bgep, rrp);
389 		mutex_exit(rrp->rx_lock);
390 
391 		if (mp != NULL) {
392 			mac_rx(bgep->mh, rrp->handle, mp);
393 
394 			/*
395 			 * Restart from ring 0, if the driver is compiled
396 			 * with multiple rings and we're not on ring 0 now
397 			 */
398 			if (rx_rings > 1 && ring > 0)
399 				goto restart;
400 		}
401 
402 		/*
403 		 * Loop over all rings (if there *are* multiple rings)
404 		 */
405 	} while (++rrp, ++ring < rx_rings);
406 }
407