1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include "bge_impl.h"
28
29 #define U32TOPTR(x) ((void *)(uintptr_t)(uint32_t)(x))
30 #define PTRTOU32(x) ((uint32_t)(uintptr_t)(void *)(x))
31
32 /*
33 * ========== RX side routines ==========
34 */
35
36 #define BGE_DBG BGE_DBG_RECV /* debug flag for this code */
37
38 static void bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp);
39
40 /*
41 * Return the specified buffer (srbdp) to the ring it came from (brp).
42 *
43 * Note:
44 * If the driver is compiled with only one buffer ring *and* one
45 * return ring, then the buffers must be returned in sequence.
46 * In this case, we don't have to consider anything about the
47 * buffer at all; we can simply advance the cyclic counter. And
48 * we don't even need the refill mutex <rf_lock>, as the caller
49 * will already be holding the (one-and-only) <rx_lock>.
50 *
51 * If the driver supports multiple buffer rings, but only one
52 * return ring, the same still applies (to each buffer ring
53 * separately).
54 */
55 static void
bge_refill(bge_t * bgep,buff_ring_t * brp,sw_rbd_t * srbdp)56 bge_refill(bge_t *bgep, buff_ring_t *brp, sw_rbd_t *srbdp)
57 {
58 uint64_t slot;
59
60 _NOTE(ARGUNUSED(srbdp))
61
62 slot = brp->rf_next;
63 brp->rf_next = NEXT(slot, brp->desc.nslots);
64 bge_mbx_put(bgep, brp->chip_mbx_reg, slot);
65 }
66
67 static mblk_t *bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p,
68 recv_ring_t *rrp);
69
70 static mblk_t *
bge_receive_packet(bge_t * bgep,bge_rbd_t * hw_rbd_p,recv_ring_t * rrp)71 bge_receive_packet(bge_t *bgep, bge_rbd_t *hw_rbd_p, recv_ring_t *rrp)
72 {
73 bge_rbd_t hw_rbd;
74 buff_ring_t *brp;
75 sw_rbd_t *srbdp;
76 uchar_t *dp;
77 mblk_t *mp;
78 uint_t len;
79 uint_t minsize;
80 uint_t maxsize;
81 uint32_t pflags;
82
83 mp = NULL;
84 hw_rbd = *hw_rbd_p;
85
86 switch (hw_rbd.flags & (RBD_FLAG_MINI_RING|RBD_FLAG_JUMBO_RING)) {
87 case RBD_FLAG_MINI_RING|RBD_FLAG_JUMBO_RING:
88 default:
89 /* error, this shouldn't happen */
90 BGE_PKTDUMP((bgep, &hw_rbd, NULL, "bad ring flags!"));
91 goto error;
92
93 case RBD_FLAG_JUMBO_RING:
94 brp = &bgep->buff[BGE_JUMBO_BUFF_RING];
95 break;
96
97 #if (BGE_BUFF_RINGS_USED > 2)
98 case RBD_FLAG_MINI_RING:
99 brp = &bgep->buff[BGE_MINI_BUFF_RING];
100 break;
101 #endif /* BGE_BUFF_RINGS_USED > 2 */
102
103 case 0:
104 brp = &bgep->buff[BGE_STD_BUFF_RING];
105 break;
106 }
107
108 if (hw_rbd.index >= brp->desc.nslots) {
109 /* error, this shouldn't happen */
110 BGE_PKTDUMP((bgep, &hw_rbd, NULL, "bad ring index!"));
111 goto error;
112 }
113
114 srbdp = &brp->sw_rbds[hw_rbd.index];
115 if (hw_rbd.opaque != srbdp->pbuf.token) {
116 /* bogus, drop the packet */
117 BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "bad ring token"));
118 goto refill;
119 }
120
121 if ((hw_rbd.flags & RBD_FLAG_PACKET_END) == 0) {
122 /* bogus, drop the packet */
123 BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "unterminated packet"));
124 goto refill;
125 }
126
127 if (hw_rbd.flags & RBD_FLAG_FRAME_HAS_ERROR) {
128 /* bogus, drop the packet */
129 BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "errored packet"));
130 goto refill;
131 }
132
133 len = hw_rbd.len;
134
135 #ifdef BGE_IPMI_ASF
136 /*
137 * When IPMI/ASF is enabled, VLAN tag must be stripped.
138 */
139 if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG))
140 maxsize = bgep->chipid.ethmax_size + ETHERFCSL;
141 else
142 #endif
143 /*
144 * H/W will not strip the VLAN tag from incoming packet
145 * now, as RECEIVE_MODE_KEEP_VLAN_TAG bit is set in
146 * RECEIVE_MAC_MODE_REG register.
147 */
148 maxsize = bgep->chipid.ethmax_size + VLAN_TAGSZ + ETHERFCSL;
149 if (len > maxsize) {
150 /* bogus, drop the packet */
151 BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "oversize packet"));
152 goto refill;
153 }
154
155 #ifdef BGE_IPMI_ASF
156 if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG))
157 minsize = ETHERMIN + ETHERFCSL - VLAN_TAGSZ;
158 else
159 #endif
160 minsize = ETHERMIN + ETHERFCSL;
161 if (len < minsize) {
162 /* bogus, drop the packet */
163 BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "undersize packet"));
164 goto refill;
165 }
166
167 /*
168 * Packet looks good; get a buffer to copy it into.
169 * We want to leave some space at the front of the allocated
170 * buffer in case any upstream modules want to prepend some
171 * sort of header. This also has the side-effect of making
172 * the packet *contents* 4-byte aligned, as required by NCA!
173 */
174 #ifdef BGE_IPMI_ASF
175 if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
176 mp = allocb(BGE_HEADROOM + len + VLAN_TAGSZ, 0);
177 } else {
178 #endif
179
180 mp = allocb(BGE_HEADROOM + len, 0);
181 #ifdef BGE_IPMI_ASF
182 }
183 #endif
184 if (mp == NULL) {
185 /* Nothing to do but drop the packet */
186 goto refill;
187 }
188
189 /*
190 * Sync the data and copy it to the STREAMS buffer.
191 */
192 DMA_SYNC(srbdp->pbuf, DDI_DMA_SYNC_FORKERNEL);
193 if (bge_check_dma_handle(bgep, srbdp->pbuf.dma_hdl) != DDI_FM_OK) {
194 bgep->bge_dma_error = B_TRUE;
195 bgep->bge_chip_state = BGE_CHIP_ERROR;
196 return (NULL);
197 }
198 #ifdef BGE_IPMI_ASF
199 if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
200 /*
201 * As VLAN tag has been stripped from incoming packet in ASF
202 * scenario, we insert it into this packet again.
203 */
204 struct ether_vlan_header *ehp;
205 mp->b_rptr = dp = mp->b_rptr + BGE_HEADROOM - VLAN_TAGSZ;
206 bcopy(DMA_VPTR(srbdp->pbuf), dp, 2 * ETHERADDRL);
207 ehp = (void *)dp;
208 ehp->ether_tpid = ntohs(ETHERTYPE_VLAN);
209 ehp->ether_tci = ntohs(hw_rbd.vlan_tci);
210 bcopy(((uchar_t *)(DMA_VPTR(srbdp->pbuf))) + 2 * ETHERADDRL,
211 dp + 2 * ETHERADDRL + VLAN_TAGSZ,
212 len - 2 * ETHERADDRL);
213 } else {
214 #endif
215 mp->b_rptr = dp = mp->b_rptr + BGE_HEADROOM;
216 bcopy(DMA_VPTR(srbdp->pbuf), dp, len);
217 #ifdef BGE_IPMI_ASF
218 }
219
220 if (bgep->asf_enabled && (hw_rbd.flags & RBD_FLAG_VLAN_TAG)) {
221 mp->b_wptr = dp + len + VLAN_TAGSZ - ETHERFCSL;
222 } else
223 #endif
224 mp->b_wptr = dp + len - ETHERFCSL;
225
226 /*
227 * Special check for one specific type of data corruption;
228 * in a good packet, the first 8 bytes are *very* unlikely
229 * to be the same as the second 8 bytes ... but we let the
230 * packet through just in case.
231 */
232 if (bcmp(dp, dp+8, 8) == 0)
233 BGE_PKTDUMP((bgep, &hw_rbd, srbdp, "stuttered packet?"));
234
235 pflags = 0;
236 if (hw_rbd.flags & RBD_FLAG_TCP_UDP_CHECKSUM)
237 pflags |= HCK_FULLCKSUM;
238 if (hw_rbd.flags & RBD_FLAG_IP_CHECKSUM)
239 pflags |= HCK_IPV4_HDRCKSUM_OK;
240 if (pflags != 0)
241 mac_hcksum_set(mp, 0, 0, 0, hw_rbd.tcp_udp_cksum, pflags);
242
243 /* Update per-ring rx statistics */
244 rrp->rx_pkts++;
245 rrp->rx_bytes += len;
246
247 refill:
248 /*
249 * Replace the buffer in the ring it came from ...
250 */
251 bge_refill(bgep, brp, srbdp);
252 return (mp);
253
254 error:
255 /*
256 * We come here if the integrity of the ring descriptors
257 * (rather than merely packet data) appears corrupted.
258 * The factotum will attempt to reset-and-recover.
259 */
260 bgep->bge_chip_state = BGE_CHIP_ERROR;
261 bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
262 return (NULL);
263 }
264
265 /*
266 * Accept the packets received in the specified ring up to
267 * (but not including) the producer index in the status block.
268 *
269 * Returns a chain of mblks containing the received data, to be
270 * passed up to gld_recv() (we can't call gld_recv() from here,
271 * 'cos we're holding the per-ring receive lock at this point).
272 *
273 * This function must advance (rrp->rx_next) and write it back to
274 * the chip to indicate the packets it has accepted from the ring.
275 */
276 static mblk_t *bge_receive_ring(bge_t *bgep, recv_ring_t *rrp);
277
278 static mblk_t *
bge_receive_ring(bge_t * bgep,recv_ring_t * rrp)279 bge_receive_ring(bge_t *bgep, recv_ring_t *rrp)
280 {
281 bge_rbd_t *hw_rbd_p;
282 uint64_t slot;
283 mblk_t *head;
284 mblk_t **tail;
285 mblk_t *mp;
286 int recv_cnt = 0;
287
288 ASSERT(mutex_owned(rrp->rx_lock));
289
290 /*
291 * Sync (all) the receive ring descriptors
292 * before accepting the packets they describe
293 */
294 DMA_SYNC(rrp->desc, DDI_DMA_SYNC_FORKERNEL);
295 if (*rrp->prod_index_p >= rrp->desc.nslots) {
296 bgep->bge_chip_state = BGE_CHIP_ERROR;
297 bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
298 return (NULL);
299 }
300 if (bge_check_dma_handle(bgep, rrp->desc.dma_hdl) != DDI_FM_OK) {
301 rrp->rx_next = *rrp->prod_index_p;
302 bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
303 bgep->bge_dma_error = B_TRUE;
304 bgep->bge_chip_state = BGE_CHIP_ERROR;
305 return (NULL);
306 }
307
308 hw_rbd_p = DMA_VPTR(rrp->desc);
309 head = NULL;
310 tail = &head;
311 slot = rrp->rx_next;
312
313 while ((slot != *rrp->prod_index_p) && /* Note: volatile */
314 (recv_cnt < BGE_MAXPKT_RCVED)) {
315 if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp))
316 != NULL) {
317 *tail = mp;
318 tail = &mp->b_next;
319 recv_cnt++;
320 }
321 rrp->rx_next = slot = NEXT(slot, rrp->desc.nslots);
322 }
323
324 bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
325 if (bge_check_acc_handle(bgep, bgep->io_handle) != DDI_FM_OK)
326 bgep->bge_chip_state = BGE_CHIP_ERROR;
327 return (head);
328 }
329
330 /*
331 * XXX: Poll a particular ring. The implementation is incomplete.
332 * Once the ring interrupts are disabled, we need to do bge_recyle()
333 * for the ring as well and re enable the ring interrupt automatically
334 * if the poll doesn't find any packets in the ring. We need to
335 * have MSI-X interrupts support for this.
336 *
337 * The basic poll policy is that rings that are dealing with explicit
338 * flows (like TCP or some service) and are marked as such should
339 * have their own MSI-X interrupt per ring. bge_intr() should leave
340 * that interrupt disabled after an upcall. The ring is in poll mode.
341 * When a poll thread comes down and finds nothing, the MSI-X interrupt
342 * is automatically enabled. Squeue needs to deal with the race of
343 * a new interrupt firing and reaching before poll thread returns.
344 */
345 mblk_t *
bge_poll_ring(void * arg,int bytes_to_pickup)346 bge_poll_ring(void *arg, int bytes_to_pickup)
347 {
348 recv_ring_t *rrp = arg;
349 bge_t *bgep = rrp->bgep;
350 bge_rbd_t *hw_rbd_p;
351 uint64_t slot;
352 mblk_t *head;
353 mblk_t **tail;
354 mblk_t *mp;
355 size_t sz = 0;
356
357 mutex_enter(rrp->rx_lock);
358
359 /*
360 * Sync (all) the receive ring descriptors
361 * before accepting the packets they describe
362 */
363 DMA_SYNC(rrp->desc, DDI_DMA_SYNC_FORKERNEL);
364 if (*rrp->prod_index_p >= rrp->desc.nslots) {
365 bgep->bge_chip_state = BGE_CHIP_ERROR;
366 bge_fm_ereport(bgep, DDI_FM_DEVICE_INVAL_STATE);
367 mutex_exit(rrp->rx_lock);
368 return (NULL);
369 }
370 if (bge_check_dma_handle(bgep, rrp->desc.dma_hdl) != DDI_FM_OK) {
371 rrp->rx_next = *rrp->prod_index_p;
372 bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
373 bgep->bge_dma_error = B_TRUE;
374 bgep->bge_chip_state = BGE_CHIP_ERROR;
375 mutex_exit(rrp->rx_lock);
376 return (NULL);
377 }
378
379 hw_rbd_p = DMA_VPTR(rrp->desc);
380 head = NULL;
381 tail = &head;
382 slot = rrp->rx_next;
383
384 /* Note: volatile */
385 while ((slot != *rrp->prod_index_p) && (sz <= bytes_to_pickup)) {
386 if ((mp = bge_receive_packet(bgep, &hw_rbd_p[slot], rrp))
387 != NULL) {
388 *tail = mp;
389 sz += msgdsize(mp);
390 tail = &mp->b_next;
391 }
392 rrp->rx_next = slot = NEXT(slot, rrp->desc.nslots);
393 }
394
395 bge_mbx_put(bgep, rrp->chip_mbx_reg, rrp->rx_next);
396 if (bge_check_acc_handle(bgep, bgep->io_handle) != DDI_FM_OK)
397 bgep->bge_chip_state = BGE_CHIP_ERROR;
398 mutex_exit(rrp->rx_lock);
399 return (head);
400 }
401
402 /*
403 * Receive all packets in all rings.
404 */
405 void bge_receive(bge_t *bgep, bge_status_t *bsp);
406
407 void
bge_receive(bge_t * bgep,bge_status_t * bsp)408 bge_receive(bge_t *bgep, bge_status_t *bsp)
409 {
410 recv_ring_t *rrp;
411 uint64_t index;
412 mblk_t *mp;
413
414 for (index = 0; index < bgep->chipid.rx_rings; index++) {
415 /*
416 * Start from the first ring.
417 */
418 rrp = &bgep->recv[index];
419
420 /*
421 * For each ring, (rrp->prod_index_p) points to the
422 * proper index within the status block (which has
423 * already been sync'd by the caller)
424 */
425 ASSERT(rrp->prod_index_p == RECV_INDEX_P(bsp, index));
426
427 if (*rrp->prod_index_p == rrp->rx_next || rrp->poll_flag)
428 continue; /* no packets */
429 if (mutex_tryenter(rrp->rx_lock) == 0)
430 continue; /* already in process */
431 mp = bge_receive_ring(bgep, rrp);
432 mutex_exit(rrp->rx_lock);
433
434 if (mp != NULL)
435 mac_rx_ring(bgep->mh, rrp->ring_handle, mp,
436 rrp->ring_gen_num);
437 }
438 }
439