xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_rx.c (revision 5093b3b62da799ea81b3a0f84f606266d06ce94e)
1 /*
2  * Copyright (C) 2007 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 
16 /*
17  * Copyright (c) 2013 by Delphix. All rights reserved.
18  */
19 
20 #include <vmxnet3.h>
21 
22 static void vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *rxBuf);
23 
24 /*
25  * vmxnet3_alloc_rxbuf --
26  *
27  *    Allocate a new rxBuf from memory. All its fields are set except
28  *    for its associated mblk which has to be allocated later.
29  *
30  * Results:
31  *    A new rxBuf or NULL.
32  *
33  * Side effects:
34  *    None.
35  */
36 static vmxnet3_rxbuf_t *
37 vmxnet3_alloc_rxbuf(vmxnet3_softc_t *dp, boolean_t canSleep)
38 {
39 	vmxnet3_rxbuf_t *rxBuf;
40 	int flag = canSleep ? KM_SLEEP : KM_NOSLEEP;
41 	int err;
42 
43 	atomic_inc_32(&dp->rx_alloc_buf);
44 	rxBuf = kmem_zalloc(sizeof (vmxnet3_rxbuf_t), flag);
45 	if (!rxBuf) {
46 		atomic_inc_32(&dp->rx_alloc_failed);
47 		return (NULL);
48 	}
49 
50 	if ((err = vmxnet3_alloc_dma_mem_1(dp, &rxBuf->dma, (dp->cur_mtu + 18),
51 	    canSleep)) != DDI_SUCCESS) {
52 		VMXNET3_DEBUG(dp, 0, "Failed to allocate %d bytes for rx buf, "
53 		    "err:%d\n", (dp->cur_mtu + 18), err);
54 		kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t));
55 		atomic_inc_32(&dp->rx_alloc_failed);
56 		return (NULL);
57 	}
58 
59 	rxBuf->freeCB.free_func = vmxnet3_put_rxbuf;
60 	rxBuf->freeCB.free_arg = (caddr_t)rxBuf;
61 	rxBuf->dp = dp;
62 
63 	atomic_inc_32(&dp->rxNumBufs);
64 
65 	return (rxBuf);
66 }
67 
68 /*
69  * vmxnet3_free_rxbuf --
70  *
71  *    Free a rxBuf.
72  *
73  * Results:
74  *    None.
75  *
76  * Side effects:
77  *    None.
78  */
79 static void
80 vmxnet3_free_rxbuf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf)
81 {
82 	vmxnet3_free_dma_mem(&rxBuf->dma);
83 	kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t));
84 
85 #ifndef DEBUG
86 	atomic_dec_32(&dp->rxNumBufs);
87 #else
88 	{
89 		uint32_t nv = atomic_dec_32_nv(&dp->rxNumBufs);
90 		ASSERT(nv != (uint32_t)-1);
91 	}
92 #endif
93 }
94 
95 /*
96  * vmxnet3_put_rxpool_buf --
97  *
98  *    Return a rxBuf to the pool.
99  *
100  * Results:
101  *    B_TRUE if there was room in the pool and the rxBuf was returned,
102  *    B_FALSE otherwise.
103  *
104  * Side effects:
105  *    None.
106  */
107 static boolean_t
108 vmxnet3_put_rxpool_buf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf)
109 {
110 	vmxnet3_rxpool_t *rxPool = &dp->rxPool;
111 	boolean_t returned = B_FALSE;
112 
113 	mutex_enter(&dp->rxPoolLock);
114 	ASSERT(rxPool->nBufs <= rxPool->nBufsLimit);
115 	if (dp->devEnabled && rxPool->nBufs < rxPool->nBufsLimit) {
116 		ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) ||
117 		    (rxPool->listHead != NULL && rxPool->nBufs != 0));
118 		rxBuf->next = rxPool->listHead;
119 		rxPool->listHead = rxBuf;
120 		rxPool->nBufs++;
121 		returned = B_TRUE;
122 	}
123 	mutex_exit(&dp->rxPoolLock);
124 	return (returned);
125 }
126 
127 /*
128  * vmxnet3_put_rxbuf --
129  *
130  *    Return a rxBuf to the pool or free it.
131  *
132  * Results:
133  *    None.
134  *
135  * Side effects:
136  *    None.
137  */
138 static void
139 vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *rxBuf)
140 {
141 	vmxnet3_softc_t *dp = rxBuf->dp;
142 
143 	VMXNET3_DEBUG(dp, 5, "free 0x%p\n", rxBuf);
144 
145 	if (!vmxnet3_put_rxpool_buf(dp, rxBuf))
146 		vmxnet3_free_rxbuf(dp, rxBuf);
147 }
148 
149 /*
150  * vmxnet3_get_rxpool_buf --
151  *
152  *    Get an unused rxBuf from the pool.
153  *
154  * Results:
155  *    A rxBuf or NULL if there are no buffers in the pool.
156  *
157  * Side effects:
158  *    None.
159  */
160 static vmxnet3_rxbuf_t *
161 vmxnet3_get_rxpool_buf(vmxnet3_softc_t *dp)
162 {
163 	vmxnet3_rxpool_t *rxPool = &dp->rxPool;
164 	vmxnet3_rxbuf_t *rxBuf = NULL;
165 
166 	mutex_enter(&dp->rxPoolLock);
167 	if (rxPool->listHead) {
168 		rxBuf = rxPool->listHead;
169 		rxPool->listHead = rxBuf->next;
170 		rxPool->nBufs--;
171 		ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) ||
172 		    (rxPool->listHead != NULL && rxPool->nBufs != 0));
173 	}
174 	mutex_exit(&dp->rxPoolLock);
175 	return (rxBuf);
176 }
177 
178 /*
179  * vmxnet3_get_rxbuf --
180  *
181  *    Get an unused rxBuf from either the pool or from memory.
182  *    The returned rxBuf has a mblk associated with it.
183  *
184  * Results:
185  *    A rxBuf or NULL.
186  *
187  * Side effects:
188  *    None.
189  */
190 static vmxnet3_rxbuf_t *
191 vmxnet3_get_rxbuf(vmxnet3_softc_t *dp, boolean_t canSleep)
192 {
193 	vmxnet3_rxbuf_t *rxBuf;
194 
195 	if ((rxBuf = vmxnet3_get_rxpool_buf(dp))) {
196 		VMXNET3_DEBUG(dp, 5, "alloc 0x%p from pool\n", rxBuf);
197 	} else if ((rxBuf = vmxnet3_alloc_rxbuf(dp, canSleep))) {
198 		VMXNET3_DEBUG(dp, 5, "alloc 0x%p from mem\n", rxBuf);
199 	}
200 
201 	if (rxBuf) {
202 		rxBuf->mblk = desballoc((uchar_t *)rxBuf->dma.buf,
203 		    rxBuf->dma.bufLen, BPRI_MED, &rxBuf->freeCB);
204 		if (!rxBuf->mblk) {
205 			vmxnet3_put_rxbuf(rxBuf);
206 			atomic_inc_32(&dp->rx_alloc_failed);
207 			rxBuf = NULL;
208 		}
209 	}
210 
211 	return (rxBuf);
212 }
213 
214 /*
215  * vmxnet3_rx_populate --
216  *
217  *    Populate a Rx descriptor with a new rxBuf.
218  *
219  * Results:
220  *    DDI_SUCCESS or DDI_FAILURE.
221  *
222  * Side effects:
223  *    None.
224  */
225 static int
226 vmxnet3_rx_populate(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq, uint16_t idx,
227     boolean_t canSleep)
228 {
229 	int ret = DDI_SUCCESS;
230 	vmxnet3_rxbuf_t *rxBuf = vmxnet3_get_rxbuf(dp, canSleep);
231 
232 	if (rxBuf) {
233 		vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
234 		Vmxnet3_GenericDesc *rxDesc = VMXNET3_GET_DESC(cmdRing, idx);
235 
236 		rxq->bufRing[idx].rxBuf = rxBuf;
237 		rxDesc->rxd.addr = rxBuf->dma.bufPA;
238 		rxDesc->rxd.len = rxBuf->dma.bufLen;
239 		/* rxDesc->rxd.btype = 0; */
240 		membar_producer();
241 		rxDesc->rxd.gen = cmdRing->gen;
242 	} else {
243 		ret = DDI_FAILURE;
244 	}
245 
246 	return (ret);
247 }
248 
249 /*
250  * vmxnet3_rxqueue_init --
251  *
252  *    Initialize a RxQueue by populating the whole Rx ring with rxBufs.
253  *
254  * Results:
255  *    DDI_SUCCESS or DDI_FAILURE.
256  *
257  * Side effects:
258  *    None.
259  */
260 int
261 vmxnet3_rxqueue_init(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
262 {
263 	vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
264 
265 	do {
266 		if (vmxnet3_rx_populate(dp, rxq, cmdRing->next2fill,
267 		    B_TRUE) != DDI_SUCCESS) {
268 			goto error;
269 		}
270 		VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
271 	} while (cmdRing->next2fill);
272 
273 	dp->rxPool.nBufsLimit = vmxnet3_getprop(dp, "RxBufPoolLimit", 0,
274 	    cmdRing->size * 10, cmdRing->size * 2);
275 
276 	return (DDI_SUCCESS);
277 
278 error:
279 	while (cmdRing->next2fill) {
280 		VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
281 		vmxnet3_free_rxbuf(dp, rxq->bufRing[cmdRing->next2fill].rxBuf);
282 	}
283 
284 	return (DDI_FAILURE);
285 }
286 
287 /*
288  * vmxnet3_rxqueue_fini --
289  *
290  *    Finish a RxQueue by freeing all the related rxBufs.
291  *
292  * Results:
293  *    DDI_SUCCESS.
294  *
295  * Side effects:
296  *    None.
297  */
298 void
299 vmxnet3_rxqueue_fini(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
300 {
301 	vmxnet3_rxbuf_t *rxBuf;
302 	unsigned int i;
303 
304 	ASSERT(!dp->devEnabled);
305 
306 	/* First the rxPool */
307 	while ((rxBuf = vmxnet3_get_rxpool_buf(dp)))
308 		vmxnet3_free_rxbuf(dp, rxBuf);
309 
310 	/* Then the ring */
311 	for (i = 0; i < rxq->cmdRing.size; i++) {
312 		rxBuf = rxq->bufRing[i].rxBuf;
313 		ASSERT(rxBuf);
314 		ASSERT(rxBuf->mblk);
315 		/*
316 		 * Here, freemsg() will trigger a call to vmxnet3_put_rxbuf()
317 		 * which will then call vmxnet3_free_rxbuf() because the
318 		 * underlying device is disabled.
319 		 */
320 		freemsg(rxBuf->mblk);
321 	}
322 }
323 
324 /*
325  * vmxnet3_rx_hwcksum --
326  *
327  *    Determine if a received packet was checksummed by the Vmxnet3
328  *    device and tag the mp appropriately.
329  *
330  * Results:
331  *    None.
332  *
333  * Side effects:
334  *    The mp may get tagged.
335  */
336 static void
337 vmxnet3_rx_hwcksum(vmxnet3_softc_t *dp, mblk_t *mp,
338     Vmxnet3_GenericDesc *compDesc)
339 {
340 	uint32_t flags = 0;
341 
342 	if (!compDesc->rcd.cnc) {
343 		if (compDesc->rcd.v4 && compDesc->rcd.ipc) {
344 			flags |= HCK_IPV4_HDRCKSUM;
345 			if ((compDesc->rcd.tcp || compDesc->rcd.udp) &&
346 			    compDesc->rcd.tuc) {
347 				flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
348 			}
349 		}
350 
351 		VMXNET3_DEBUG(dp, 3, "rx cksum flags = 0x%x\n", flags);
352 
353 		(void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0);
354 	}
355 }
356 
357 /*
358  * vmxnet3_rx_intr --
359  *
360  *    Interrupt handler for Rx. Look if there are any pending Rx and
361  *    put them in mplist.
362  *
363  * Results:
364  *    A list of messages to pass to the MAC subystem.
365  *
366  * Side effects:
367  *    None.
368  */
369 mblk_t *
370 vmxnet3_rx_intr(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
371 {
372 	vmxnet3_compring_t *compRing = &rxq->compRing;
373 	vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
374 	Vmxnet3_RxQueueCtrl *rxqCtrl = rxq->sharedCtrl;
375 	Vmxnet3_GenericDesc *compDesc;
376 	mblk_t *mplist = NULL, **mplistTail = &mplist;
377 
378 	ASSERT(mutex_owned(&dp->intrLock));
379 
380 	compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
381 	while (compDesc->rcd.gen == compRing->gen) {
382 		mblk_t *mp = NULL, **mpTail = &mp;
383 		boolean_t mpValid = B_TRUE;
384 		boolean_t eop;
385 
386 		ASSERT(compDesc->rcd.sop);
387 
388 		do {
389 			uint16_t rxdIdx = compDesc->rcd.rxdIdx;
390 			vmxnet3_rxbuf_t *rxBuf = rxq->bufRing[rxdIdx].rxBuf;
391 			mblk_t *mblk = rxBuf->mblk;
392 			Vmxnet3_GenericDesc *rxDesc;
393 
394 			while (compDesc->rcd.gen != compRing->gen) {
395 				/*
396 				 * H/W may be still be in the middle of
397 				 * generating this entry, so hold on until
398 				 * the gen bit is flipped.
399 				 */
400 				membar_consumer();
401 			}
402 			ASSERT(compDesc->rcd.gen == compRing->gen);
403 			ASSERT(rxBuf);
404 			ASSERT(mblk);
405 
406 			/* Some Rx descriptors may have been skipped */
407 			while (cmdRing->next2fill != rxdIdx) {
408 				rxDesc = VMXNET3_GET_DESC(cmdRing,
409 				    cmdRing->next2fill);
410 				rxDesc->rxd.gen = cmdRing->gen;
411 				VMXNET3_INC_RING_IDX(cmdRing,
412 				    cmdRing->next2fill);
413 			}
414 
415 			eop = compDesc->rcd.eop;
416 
417 			/*
418 			 * Now we have a piece of the packet in the rxdIdx
419 			 * descriptor. Grab it only if we achieve to replace
420 			 * it with a fresh buffer.
421 			 */
422 			if (vmxnet3_rx_populate(dp, rxq, rxdIdx,
423 			    B_FALSE) == DDI_SUCCESS) {
424 				/* Success, we can chain the mblk with the mp */
425 				mblk->b_wptr = mblk->b_rptr + compDesc->rcd.len;
426 				*mpTail = mblk;
427 				mpTail = &mblk->b_cont;
428 				ASSERT(*mpTail == NULL);
429 
430 				VMXNET3_DEBUG(dp, 3, "rx 0x%p on [%u]\n", mblk,
431 				    rxdIdx);
432 
433 				if (eop) {
434 					if (!compDesc->rcd.err) {
435 						/*
436 						 * Tag the mp if it was
437 						 * checksummed by the H/W
438 						 */
439 						vmxnet3_rx_hwcksum(dp, mp,
440 						    compDesc);
441 					} else {
442 						mpValid = B_FALSE;
443 					}
444 				}
445 			} else {
446 				/*
447 				 * Keep the same buffer, we still need
448 				 * to flip the gen bit
449 				 */
450 				rxDesc = VMXNET3_GET_DESC(cmdRing, rxdIdx);
451 				rxDesc->rxd.gen = cmdRing->gen;
452 				mpValid = B_FALSE;
453 			}
454 
455 			VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
456 			VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
457 			compDesc = VMXNET3_GET_DESC(compRing,
458 			    compRing->next2comp);
459 		} while (!eop);
460 
461 		if (mp) {
462 			if (mpValid) {
463 				*mplistTail = mp;
464 				mplistTail = &mp->b_next;
465 				ASSERT(*mplistTail == NULL);
466 			} else {
467 				/* This message got holes, drop it */
468 				freemsg(mp);
469 			}
470 		}
471 	}
472 
473 	if (rxqCtrl->updateRxProd) {
474 		uint32_t rxprod;
475 
476 		/*
477 		 * All buffers are actually available, but we can't tell that to
478 		 * the device because it may interpret that as an empty ring.
479 		 * So skip one buffer.
480 		 */
481 		if (cmdRing->next2fill) {
482 			rxprod = cmdRing->next2fill - 1;
483 		} else {
484 			rxprod = cmdRing->size - 1;
485 		}
486 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_RXPROD, rxprod);
487 	}
488 
489 	return (mplist);
490 }
491