xref: /illumos-gate/usr/src/uts/common/io/bge/bge_recv2.c (revision 51396a8ee7fb52fe0ab33bfe7b4f495ad431904a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "bge_impl.h"
28 
29 #define	U32TOPTR(x)	((void *)(uintptr_t)(uint32_t)(x))
30 #define	PTRTOU32(x)	((uint32_t)(uintptr_t)(void *)(x))
31 
32 /*
33  * ========== RX side routines ==========
34  */
35 
36 #define	BGE_DBG		BGE_DBG_RECV	/* debug flag for this code	*/
37 
38 static void bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp);
39 #pragma	inline(bge_refill)
40 
41 /*
42  * Return the specified buffer (srbdp) to the ring it came from (brp).
43  *
44  * Note:
45  *	If the driver is compiled with only one buffer ring *and* one
46  *	return ring, then the buffers must be returned in sequence.
47  *	In this case, we don't have to consider anything about the
48  *	buffer at all; we can simply advance the cyclic counter.  And
49  *	we don't even need the refill mutex <rf_lock>, as the caller
50  *	will already be holding the (one-and-only) <rx_lock>.
51  *
52  *	If the driver supports multiple buffer rings, but only one
53  *	return ring, the same still applies (to each buffer ring
54  *	separately).
55  */
56 static void
57 bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp)
58 {
59 	uint64_t slot;
60 
61 	_NOTE(ARGUNUSED(srbdp))
62 
63 	slot = brp->rf_next;
64 	brp->rf_next = NEXT(slot, brp->desc.nslots);
65 	bge_mbx_put(bgep, brp->chip_mbx_reg, slot);
66 }
67 
68 static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p,
69     recv_ring_t *rrp);
70 #pragma	inline(bge_receive_packet)
71 
72 static mblk_t *
73 bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p, recv_ring_t *rrp)
74 {
75 	bge_rbd_t hw_rbd;
76 	buff_ring_t *brp;
77 	sw_rbd_t *srbdp;
78 	uchar_t *dp;
79 	mblk_t *mp;
80 	uint_t len;
81 	uint_t minsize;
82 	uint_t maxsize;
83 	uint32_t pflags;
84 
85 	mp = NULL;
86 	hw_rbd = *hw_rbd_p;
87 
88 	switch (hw_rbd.flags & (RBD_FLAG_MINI_RING|RBD_FLAG_JUMBO_RING)) {
89 	case RBD_FLAG_MINI_RING|RBD_FLAG_JUMBO_RING:
90 	default:
91 		/* error, this shouldn't happen */
92 		BGE_PKTDUMP((bgep, &hw_rbd, NULL, "bad ring flags!"));
93 		goto error;
94 
95 	case RBD_FLAG_JUMBO_RING:
96 		brp = &bgep->buff[BGE_JUMBO_BUFF_RING];
97 		break;
98 
99 #if	(BGE_BUFF_RINGS_USED > 2)
100 	case RBD_FLAG_MINI_RING:
101 		brp = &bgep->buff[BGE_MINI_BUFF_RING];
102 		break;
103 #endif	/* BGE_BUFF_RINGS_USED > 2 */
104 
105 	case 0:
106 		brp = &bgep->buff[BGE_STD_BUFF_RING];
107 		break;
108 	}
109 
110 	if (hw_rbd.index >= brp->desc.nslots) {
111 		/* error, this shouldn't happen */
112 		BGE_PKTDUMP((bgep, &hw_rbd, NULL, "bad ring index!"));
113 		goto error;
114 	}
115 
116 	srbdp = &brp->sw_rbds[hw_rbd.index];
117 	if (hw_rbd.opaque != srbdp->pbuf.token) {
118 		/* bogus, drop the packet */
119 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "bad ring token"));
120 		goto refill;
121 	}
122 
123 	if ((hw_rbd.flags & RBD_FLAG_PACKET_END) == 0) {
124 		/* bogus, drop the packet */
125 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "unterminated packet"));
126 		goto refill;
127 	}
128 
129 	if (hw_rbd.flags & RBD_FLAG_FRAME_HAS_ERROR) {
130 		/* bogus, drop the packet */
131 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "errored packet"));
132 		goto refill;
133 	}
134 
135 	len = hw_rbd.len;
136 
137 #ifdef BGE_IPMI_ASF
138 	/*
139 	 * When IPMI/ASF is enabled, VLAN tag must be stripped.
140 	 */
141 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG))
142 		maxsize = bgep->chipid.ethmax_size + ETHERFCSL;
143 	else
144 #endif
145 		/*
146 		 * H/W will not strip the VLAN tag from incoming packet
147 		 * now, as RECEIVE_MODE_KEEP_VLAN_TAG bit is set in
148 		 * RECEIVE_MAC_MODE_REG register.
149 		 */
150 		maxsize = bgep->chipid.ethmax_size + VLAN_TAGSZ + ETHERFCSL;
151 	if (len > maxsize) {
152 		/* bogus, drop the packet */
153 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "oversize packet"));
154 		goto refill;
155 	}
156 
157 #ifdef BGE_IPMI_ASF
158 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG))
159 		minsize = ETHERMIN + ETHERFCSL - VLAN_TAGSZ;
160 	else
161 #endif
162 		minsize = ETHERMIN + ETHERFCSL;
163 	if (len < minsize) {
164 		/* bogus, drop the packet */
165 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "undersize packet"));
166 		goto refill;
167 	}
168 
169 	/*
170 	 * Packet looks good; get a buffer to copy it into.
171 	 * We want to leave some space at the front of the allocated
172 	 * buffer in case any upstream modules want to prepend some
173 	 * sort of header.  This also has the side-effect of making
174 	 * the packet *contents* 4-byte aligned, as required by NCA!
175 	 */
176 #ifdef BGE_IPMI_ASF
177 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
178 		mp = allocb(BGE_HEADROOM + len + VLAN_TAGSZ, 0);
179 	} else {
180 #endif
181 
182 		mp = allocb(BGE_HEADROOM + len, 0);
183 #ifdef BGE_IPMI_ASF
184 	}
185 #endif
186 	if (mp == NULL) {
187 		/* Nothing to do but drop the packet */
188 		goto refill;
189 	}
190 
191 	/*
192 	 * Sync the data and copy it to the STREAMS buffer.
193 	 */
194 	DMA_SYNC(srbdp->pbuf, DDI_DMA_SYNC_FORKERNEL);
195 	if (bge_check_dma_handle(bgep, srbdp->pbuf.dma_hdl) != DDI_FM_OK) {
196 		bgep->bge_dma_error = B_TRUE;
197 		bgep->bge_chip_state = BGE_CHIP_ERROR;
198 		return (NULL);
199 	}
200 #ifdef BGE_IPMI_ASF
201 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
202 		/*
203 		 * As VLAN tag has been stripped from incoming packet in ASF
204 		 * scenario, we insert it into this packet again.
205 		 */
206 		struct ether_vlan_header *ehp;
207 		mp->b_rptr = dp = mp->b_rptr + BGE_HEADROOM - VLAN_TAGSZ;
208 		bcopy(DMA_VPTR(srbdp->pbuf), dp, 2 * ETHERADDRL);
209 		ehp = (void *)dp;
210 		ehp->ether_tpid = ntohs(ETHERTYPE_VLAN);
211 		ehp->ether_tci = ntohs(hw_rbd.vlan_tci);
212 		bcopy(((uchar_t *)(DMA_VPTR(srbdp->pbuf))) + 2 * ETHERADDRL,
213 		    dp + 2 * ETHERADDRL + VLAN_TAGSZ,
214 		    len - 2 * ETHERADDRL);
215 	} else {
216 #endif
217 		mp->b_rptr = dp = mp->b_rptr + BGE_HEADROOM;
218 		bcopy(DMA_VPTR(srbdp->pbuf), dp, len);
219 #ifdef BGE_IPMI_ASF
220 	}
221 
222 	if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
223 		mp->b_wptr = dp + len + VLAN_TAGSZ - ETHERFCSL;
224 	} else
225 #endif
226 		mp->b_wptr = dp + len - ETHERFCSL;
227 
228 	/*
229 	 * Special check for one specific type of data corruption;
230 	 * in a good packet, the first 8 bytes are *very* unlikely
231 	 * to be the same as the second 8 bytes ... but we let the
232 	 * packet through just in case.
233 	 */
234 	if (bcmp(dp, dp+8, 8) == 0)
235 		BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "stuttered packet?"));
236 
237 	pflags = 0;
238 	if (hw_rbd.flags & RBD_FLAG_TCP_UDP_CHECKSUM)
239 		pflags |= HCK_FULLCKSUM;
240 	if (hw_rbd.flags & RBD_FLAG_IP_CHECKSUM)
241 		pflags |= HCK_IPV4_HDRCKSUM_OK;
242 	if (pflags != 0)
243 		mac_hcksum_set(mp, 0, 0, 0, hw_rbd.tcp_udp_cksum, pflags);
244 
245 	/* Update per-ring rx statistics */
246 	rrp->rx_pkts++;
247 	rrp->rx_bytes += len;
248 
249 refill:
250 	/*
251 	 * Replace the buffer in the ring it came from ...
252 	 */
253 	bge_refill(bgep, brp, srbdp);
254 	return (mp);
255 
256 error:
257 	/*
258 	 * We come here if the integrity of the ring descriptors
259 	 * (rather than merely packet data) appears corrupted.
260 	 * The factotum will attempt to reset-and-recover.
261 	 */
262 	bgep->bge_chip_state = BGE_CHIP_ERROR;
263 	bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
264 	return (NULL);
265 }
266 
267 /*
268  * Accept the packets received in the specified ring up to
269  * (but not including) the producer index in the status block.
270  *
271  * Returns a chain of mblks containing the received data, to be
272  * passed up to gld_recv() (we can't call gld_recv() from here,
273  * 'cos we're holding the per-ring receive lock at this point).
274  *
275  * This function must advance (rrp->rx_next) and write it back to
276  * the chip to indicate the packets it has accepted from the ring.
277  */
278 static mblk_t *bge_receive_ring(bge_t *bgep, recv_ring_t *rrp);
279 #ifndef	DEBUG
280 #pragma	inline(bge_receive_ring)
281 #endif
282 
283 static mblk_t *
284 bge_receive_ring(bge_t *bgep, recv_ring_t *rrp)
285 {
286 	bge_rbd_t *hw_rbd_p;
287 	uint64_t slot;
288 	mblk_t *head;
289 	mblk_t **tail;
290 	mblk_t *mp;
291 	int recv_cnt = 0;
292 
293 	ASSERT(mutex_owned(rrp->rx_lock));
294 
295 	/*
296 	 * Sync (all) the receive ring descriptors
297 	 * before accepting the packets they describe
298 	 */
299 	DMA_SYNC(rrp->desc, DDI_DMA_SYNC_FORKERNEL);
300 	if (*rrp->prod_index_p >= rrp->desc.nslots) {
301 		bgep->bge_chip_state = BGE_CHIP_ERROR;
302 		bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
303 		return (NULL);
304 	}
305 	if (bge_check_dma_handle(bgep, rrp->desc.dma_hdl) != DDI_FM_OK) {
306 		rrp->rx_next = *rrp->prod_index_p;
307 		bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
308 		bgep->bge_dma_error = B_TRUE;
309 		bgep->bge_chip_state = BGE_CHIP_ERROR;
310 		return (NULL);
311 	}
312 
313 	hw_rbd_p = DMA_VPTR(rrp->desc);
314 	head = NULL;
315 	tail = &head;
316 	slot = rrp->rx_next;
317 
318 	while ((slot != *rrp->prod_index_p) && /* Note: volatile	*/
319 	    (recv_cnt < BGE_MAXPKT_RCVED)) {
320 		if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp))
321 		    != NULL) {
322 			*tail = mp;
323 			tail = &mp->b_next;
324 			recv_cnt++;
325 		}
326 		rrp->rx_next = slot = NEXT(slot, rrp->desc.nslots);
327 	}
328 
329 	bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
330 	if (bge_check_acc_handle(bgep, bgep->io_handle) != DDI_FM_OK)
331 		bgep->bge_chip_state = BGE_CHIP_ERROR;
332 	return (head);
333 }
334 
335 /*
336  * XXX: Poll a particular ring. The implementation is incomplete.
337  * Once the ring interrupts are disabled, we need to do bge_recyle()
338  * for the ring as well and re enable the ring interrupt automatically
339  * if the poll doesn't find any packets in the ring. We need to
340  * have MSI-X interrupts support for this.
341  *
342  * The basic poll policy is that rings that are dealing with explicit
343  * flows (like TCP or some service) and are marked as such should
344  * have their own MSI-X interrupt per ring. bge_intr() should leave
345  * that interrupt disabled after an upcall. The ring is in poll mode.
346  * When a poll thread comes down and finds nothing, the MSI-X interrupt
347  * is automatically enabled. Squeue needs to deal with the race of
348  * a new interrupt firing and reaching before poll thread returns.
349  */
350 mblk_t *
351 bge_poll_ring(void *arg, int bytes_to_pickup)
352 {
353 	recv_ring_t *rrp = arg;
354 	bge_t *bgep = rrp->bgep;
355 	bge_rbd_t *hw_rbd_p;
356 	uint64_t slot;
357 	mblk_t *head;
358 	mblk_t **tail;
359 	mblk_t *mp;
360 	size_t sz = 0;
361 
362 	mutex_enter(rrp->rx_lock);
363 
364 	/*
365 	 * Sync (all) the receive ring descriptors
366 	 * before accepting the packets they describe
367 	 */
368 	DMA_SYNC(rrp->desc, DDI_DMA_SYNC_FORKERNEL);
369 	if (*rrp->prod_index_p >= rrp->desc.nslots) {
370 		bgep->bge_chip_state = BGE_CHIP_ERROR;
371 		bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
372 		mutex_exit(rrp->rx_lock);
373 		return (NULL);
374 	}
375 	if (bge_check_dma_handle(bgep, rrp->desc.dma_hdl) != DDI_FM_OK) {
376 		rrp->rx_next = *rrp->prod_index_p;
377 		bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
378 		bgep->bge_dma_error = B_TRUE;
379 		bgep->bge_chip_state = BGE_CHIP_ERROR;
380 		mutex_exit(rrp->rx_lock);
381 		return (NULL);
382 	}
383 
384 	hw_rbd_p = DMA_VPTR(rrp->desc);
385 	head = NULL;
386 	tail = &head;
387 	slot = rrp->rx_next;
388 
389 	/* Note: volatile */
390 	while ((slot != *rrp->prod_index_p) && (sz <= bytes_to_pickup)) {
391 		if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp))
392 		    != NULL) {
393 			*tail = mp;
394 			sz += msgdsize(mp);
395 			tail = &mp->b_next;
396 		}
397 		rrp->rx_next = slot = NEXT(slot, rrp->desc.nslots);
398 	}
399 
400 	bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
401 	if (bge_check_acc_handle(bgep, bgep->io_handle) != DDI_FM_OK)
402 		bgep->bge_chip_state = BGE_CHIP_ERROR;
403 	mutex_exit(rrp->rx_lock);
404 	return (head);
405 }
406 
407 /*
408  * Receive all packets in all rings.
409  */
410 void bge_receive(bge_t *bgep, bge_status_t *bsp);
411 #pragma	no_inline(bge_receive)
412 
413 void
414 bge_receive(bge_t *bgep, bge_status_t *bsp)
415 {
416 	recv_ring_t *rrp;
417 	uint64_t index;
418 	mblk_t *mp;
419 
420 	for (index = 0; index < bgep->chipid.rx_rings; index++) {
421 		/*
422 		 * Start from the first ring.
423 		 */
424 		rrp = &bgep->recv[index];
425 
426 		/*
427 		 * For each ring, (rrp->prod_index_p) points to the
428 		 * proper index within the status block (which has
429 		 * already been sync'd by the caller)
430 		 */
431 		ASSERT(rrp->prod_index_p == RECV_INDEX_P(bsp, index));
432 
433 		if (*rrp->prod_index_p == rrp->rx_next || rrp->poll_flag)
434 			continue;		/* no packets		*/
435 		if (mutex_tryenter(rrp->rx_lock) == 0)
436 			continue;		/* already in process	*/
437 		mp = bge_receive_ring(bgep, rrp);
438 		mutex_exit(rrp->rx_lock);
439 
440 		if (mp != NULL)
441 			mac_rx_ring(bgep->mh, rrp->ring_handle, mp,
442 			    rrp->ring_gen_num);
443 	}
444 }
445