xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_rx.c (revision 2f8bbd9dee64b0f32e2f0e385b450b0d7dca7e32)
1 /*
2  * Copyright (C) 2007 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 /*
16  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
17  */
18 
19 #include <vmxnet3.h>
20 
21 static void vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *);
22 
23 /*
24  * Allocate a new rxBuf from memory. All its fields are set except
25  * for its associated mblk which has to be allocated later.
26  *
27  * Returns:
28  *	A new rxBuf or NULL.
29  */
30 static vmxnet3_rxbuf_t *
31 vmxnet3_alloc_rxbuf(vmxnet3_softc_t *dp, boolean_t canSleep)
32 {
33 	vmxnet3_rxbuf_t *rxBuf;
34 	int flag = canSleep ? KM_SLEEP : KM_NOSLEEP;
35 	int err;
36 
37 	rxBuf = kmem_zalloc(sizeof (vmxnet3_rxbuf_t), flag);
38 	if (!rxBuf) {
39 		atomic_inc_32(&dp->rx_alloc_failed);
40 		return (NULL);
41 	}
42 
43 	if ((err = vmxnet3_alloc_dma_mem_1(dp, &rxBuf->dma, (dp->cur_mtu + 18),
44 	    canSleep)) != 0) {
45 		VMXNET3_DEBUG(dp, 0, "Failed to allocate %d bytes for rx buf, "
46 		    "err:%d\n", (dp->cur_mtu + 18), err);
47 		kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t));
48 		atomic_inc_32(&dp->rx_alloc_failed);
49 		return (NULL);
50 	}
51 
52 	rxBuf->freeCB.free_func = vmxnet3_put_rxbuf;
53 	rxBuf->freeCB.free_arg = (caddr_t)rxBuf;
54 	rxBuf->dp = dp;
55 
56 	atomic_inc_32(&dp->rx_num_bufs);
57 	atomic_inc_32(&dp->rx_alloc_buf);
58 	return (rxBuf);
59 }
60 
61 static void
62 vmxnet3_free_rxbuf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf)
63 {
64 	vmxnet3_free_dma_mem(&rxBuf->dma);
65 	kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t));
66 
67 #ifndef	DEBUG
68 	atomic_dec_32(&dp->rx_num_bufs);
69 #else
70 	{
71 		uint32_t nv = atomic_dec_32_nv(&dp->rx_num_bufs);
72 		ASSERT(nv != (uint32_t)-1);
73 	}
74 #endif
75 }
76 
77 /*
78  * Return a rxBuf to the pool. The init argument, when B_TRUE, indicates
79  * that we're being called for the purpose of pool initialization, and
80  * therefore, we should place the buffer in the pool even if the device
81  * isn't enabled.
82  *
83  * Returns:
84  *	B_TRUE if the buffer was returned to the pool, or B_FALSE if it
85  *	wasn't (e.g. if the device is stopped).
86  */
87 static boolean_t
88 vmxnet3_put_rxpool_buf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf,
89     boolean_t init)
90 {
91 	vmxnet3_rxpool_t *rxPool = &dp->rxPool;
92 	boolean_t returned = B_FALSE;
93 
94 	mutex_enter(&dp->rxPoolLock);
95 	ASSERT(rxPool->nBufs <= rxPool->nBufsLimit);
96 	if ((dp->devEnabled || init) && rxPool->nBufs < rxPool->nBufsLimit) {
97 		ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) ||
98 		    (rxPool->listHead != NULL && rxPool->nBufs != 0));
99 		rxBuf->next = rxPool->listHead;
100 		rxPool->listHead = rxBuf;
101 		rxPool->nBufs++;
102 		returned = B_TRUE;
103 	}
104 	mutex_exit(&dp->rxPoolLock);
105 	return (returned);
106 }
107 
108 /*
109  * Return a rxBuf to the pool or free it.
110  */
111 static void
112 vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *rxBuf)
113 {
114 	vmxnet3_softc_t *dp = rxBuf->dp;
115 
116 	if (!vmxnet3_put_rxpool_buf(dp, rxBuf, B_FALSE))
117 		vmxnet3_free_rxbuf(dp, rxBuf);
118 }
119 
120 /*
121  * Get an unused rxBuf from the pool.
122  *
123  * Returns:
124  *	A rxBuf or NULL if there are no buffers in the pool.
125  */
126 static vmxnet3_rxbuf_t *
127 vmxnet3_get_rxpool_buf(vmxnet3_softc_t *dp)
128 {
129 	vmxnet3_rxpool_t *rxPool = &dp->rxPool;
130 	vmxnet3_rxbuf_t *rxBuf = NULL;
131 
132 	mutex_enter(&dp->rxPoolLock);
133 	if (rxPool->listHead != NULL) {
134 		rxBuf = rxPool->listHead;
135 		rxPool->listHead = rxBuf->next;
136 		rxPool->nBufs--;
137 		ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) ||
138 		    (rxPool->listHead != NULL && rxPool->nBufs != 0));
139 	}
140 	mutex_exit(&dp->rxPoolLock);
141 	return (rxBuf);
142 }
143 
144 /*
145  * Fill a rxPool by allocating the maximum number of buffers.
146  *
147  * Returns:
148  *	0 on success, non-zero on failure.
149  */
150 static int
151 vmxnet3_rxpool_init(vmxnet3_softc_t *dp)
152 {
153 	int err = 0;
154 	vmxnet3_rxbuf_t *rxBuf;
155 
156 	ASSERT(dp->rxPool.nBufsLimit > 0);
157 	while (dp->rxPool.nBufs < dp->rxPool.nBufsLimit) {
158 		if ((rxBuf = vmxnet3_alloc_rxbuf(dp, B_FALSE)) == NULL) {
159 			err = ENOMEM;
160 			break;
161 		}
162 		VERIFY(vmxnet3_put_rxpool_buf(dp, rxBuf, B_TRUE));
163 	}
164 
165 	if (err != 0) {
166 		while ((rxBuf = vmxnet3_get_rxpool_buf(dp)) != NULL) {
167 			vmxnet3_free_rxbuf(dp, rxBuf);
168 		}
169 	}
170 
171 	return (err);
172 }
173 
174 /*
175  * Populate a Rx descriptor with a new rxBuf. If the pool argument is B_TRUE,
176  * then try to take a buffer from rxPool. If the pool is empty and the
177  * dp->alloc_ok is true, then fall back to dynamic allocation. If pool is
178  * B_FALSE, then always allocate a new buffer (this is only used when
179  * populating the initial set of buffers in the receive queue during start).
180  *
181  * Returns:
182  *	0 on success, non-zero on failure.
183  */
184 static int
185 vmxnet3_rx_populate(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq, uint16_t idx,
186     boolean_t canSleep, boolean_t pool)
187 {
188 	vmxnet3_rxbuf_t *rxBuf = NULL;
189 
190 	if (pool && (rxBuf = vmxnet3_get_rxpool_buf(dp)) == NULL) {
191 		/* The maximum number of pool buffers have been allocated. */
192 		atomic_inc_32(&dp->rx_pool_empty);
193 		if (!dp->alloc_ok) {
194 			atomic_inc_32(&dp->rx_alloc_failed);
195 		}
196 	}
197 
198 	if (rxBuf == NULL && (!pool || dp->alloc_ok)) {
199 		rxBuf = vmxnet3_alloc_rxbuf(dp, canSleep);
200 	}
201 
202 	if (rxBuf != NULL) {
203 		rxBuf->mblk = desballoc((uchar_t *)rxBuf->dma.buf,
204 		    rxBuf->dma.bufLen, BPRI_MED, &rxBuf->freeCB);
205 		if (rxBuf->mblk == NULL) {
206 			if (pool) {
207 				VERIFY(vmxnet3_put_rxpool_buf(dp, rxBuf,
208 				    B_FALSE));
209 			} else {
210 				vmxnet3_free_rxbuf(dp, rxBuf);
211 			}
212 			atomic_inc_32(&dp->rx_alloc_failed);
213 			return (ENOMEM);
214 		}
215 
216 		vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
217 		Vmxnet3_GenericDesc *rxDesc = VMXNET3_GET_DESC(cmdRing, idx);
218 
219 		rxq->bufRing[idx].rxBuf = rxBuf;
220 		rxDesc->rxd.addr = rxBuf->dma.bufPA;
221 		rxDesc->rxd.len = rxBuf->dma.bufLen;
222 		/* rxDesc->rxd.btype = 0; */
223 		membar_producer();
224 		rxDesc->rxd.gen = cmdRing->gen;
225 	} else {
226 		return (ENOMEM);
227 	}
228 
229 	return (0);
230 }
231 
232 /*
233  * Initialize a RxQueue by populating the whole Rx ring with rxBufs.
234  *
235  * Returns:
236  *	0 on success, non-zero on failure.
237  */
238 int
239 vmxnet3_rxqueue_init(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
240 {
241 	vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
242 	int err;
243 
244 	dp->rxPool.nBufsLimit = vmxnet3_getprop(dp, "RxBufPoolLimit", 0,
245 	    cmdRing->size * 10, cmdRing->size * 2);
246 
247 	do {
248 		if ((err = vmxnet3_rx_populate(dp, rxq, cmdRing->next2fill,
249 		    B_TRUE, B_FALSE)) != 0) {
250 			goto error;
251 		}
252 		VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
253 	} while (cmdRing->next2fill);
254 
255 	/*
256 	 * Pre-allocate rxPool buffers so that we never have to allocate
257 	 * new buffers from interrupt context when we need to replace a buffer
258 	 * in the rxqueue.
259 	 */
260 	if ((err = vmxnet3_rxpool_init(dp)) != 0) {
261 		goto error;
262 	}
263 
264 	return (0);
265 
266 error:
267 	while (cmdRing->next2fill) {
268 		VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
269 		vmxnet3_free_rxbuf(dp, rxq->bufRing[cmdRing->next2fill].rxBuf);
270 	}
271 
272 	return (err);
273 }
274 
275 /*
276  * Finish a RxQueue by freeing all the related rxBufs.
277  */
278 void
279 vmxnet3_rxqueue_fini(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
280 {
281 	vmxnet3_rxbuf_t *rxBuf;
282 	unsigned int i;
283 
284 	ASSERT(!dp->devEnabled);
285 
286 	/* First the rxPool */
287 	while ((rxBuf = vmxnet3_get_rxpool_buf(dp)))
288 		vmxnet3_free_rxbuf(dp, rxBuf);
289 
290 	/* Then the ring */
291 	for (i = 0; i < rxq->cmdRing.size; i++) {
292 		rxBuf = rxq->bufRing[i].rxBuf;
293 		ASSERT(rxBuf);
294 		ASSERT(rxBuf->mblk);
295 		/*
296 		 * Here, freemsg() will trigger a call to vmxnet3_put_rxbuf()
297 		 * which will then call vmxnet3_free_rxbuf() because the
298 		 * underlying device is disabled.
299 		 */
300 		freemsg(rxBuf->mblk);
301 	}
302 }
303 
304 /*
305  * Determine if a received packet was checksummed by the Vmxnet3
306  * device and tag the mp appropriately.
307  */
308 static void
309 vmxnet3_rx_hwcksum(vmxnet3_softc_t *dp, mblk_t *mp,
310     Vmxnet3_GenericDesc *compDesc)
311 {
312 	uint32_t flags = 0;
313 
314 	if (!compDesc->rcd.cnc) {
315 		if (compDesc->rcd.v4 && compDesc->rcd.ipc) {
316 			flags |= HCK_IPV4_HDRCKSUM;
317 			if ((compDesc->rcd.tcp || compDesc->rcd.udp) &&
318 			    compDesc->rcd.tuc) {
319 				flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
320 			}
321 		}
322 
323 		VMXNET3_DEBUG(dp, 3, "rx cksum flags = 0x%x\n", flags);
324 
325 		(void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0);
326 	}
327 }
328 
329 /*
330  * Interrupt handler for Rx. Look if there are any pending Rx and
331  * put them in mplist.
332  *
333  * Returns:
334  *	A list of messages to pass to the MAC subystem.
335  */
336 mblk_t *
337 vmxnet3_rx_intr(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
338 {
339 	vmxnet3_compring_t *compRing = &rxq->compRing;
340 	vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
341 	Vmxnet3_RxQueueCtrl *rxqCtrl = rxq->sharedCtrl;
342 	Vmxnet3_GenericDesc *compDesc;
343 	mblk_t *mplist = NULL, **mplistTail = &mplist;
344 
345 	ASSERT(mutex_owned(&dp->intrLock));
346 
347 	compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
348 	while (compDesc->rcd.gen == compRing->gen) {
349 		mblk_t *mp = NULL, **mpTail = &mp;
350 		boolean_t mpValid = B_TRUE;
351 		boolean_t eop;
352 
353 		ASSERT(compDesc->rcd.sop);
354 
355 		do {
356 			uint16_t rxdIdx = compDesc->rcd.rxdIdx;
357 			vmxnet3_rxbuf_t *rxBuf = rxq->bufRing[rxdIdx].rxBuf;
358 			mblk_t *mblk = rxBuf->mblk;
359 			Vmxnet3_GenericDesc *rxDesc;
360 
361 			while (compDesc->rcd.gen != compRing->gen) {
362 				/*
363 				 * H/W may be still be in the middle of
364 				 * generating this entry, so hold on until
365 				 * the gen bit is flipped.
366 				 */
367 				membar_consumer();
368 			}
369 			ASSERT(compDesc->rcd.gen == compRing->gen);
370 			ASSERT(rxBuf);
371 			ASSERT(mblk);
372 
373 			/* Some Rx descriptors may have been skipped */
374 			while (cmdRing->next2fill != rxdIdx) {
375 				rxDesc = VMXNET3_GET_DESC(cmdRing,
376 				    cmdRing->next2fill);
377 				rxDesc->rxd.gen = cmdRing->gen;
378 				VMXNET3_INC_RING_IDX(cmdRing,
379 				    cmdRing->next2fill);
380 			}
381 
382 			eop = compDesc->rcd.eop;
383 
384 			/*
385 			 * Now we have a piece of the packet in the rxdIdx
386 			 * descriptor. Grab it only if we achieve to replace
387 			 * it with a fresh buffer.
388 			 */
389 			if (vmxnet3_rx_populate(dp, rxq, rxdIdx, B_FALSE,
390 			    B_TRUE) == 0) {
391 				/* Success, we can chain the mblk with the mp */
392 				mblk->b_wptr = mblk->b_rptr + compDesc->rcd.len;
393 				*mpTail = mblk;
394 				mpTail = &mblk->b_cont;
395 				ASSERT(*mpTail == NULL);
396 
397 				VMXNET3_DEBUG(dp, 3, "rx 0x%p on [%u]\n",
398 				    (void *)mblk, rxdIdx);
399 
400 				if (eop) {
401 					if (!compDesc->rcd.err) {
402 						/*
403 						 * Tag the mp if it was
404 						 * checksummed by the H/W
405 						 */
406 						vmxnet3_rx_hwcksum(dp, mp,
407 						    compDesc);
408 					} else {
409 						mpValid = B_FALSE;
410 					}
411 				}
412 			} else {
413 				/*
414 				 * Keep the same buffer, we still need
415 				 * to flip the gen bit
416 				 */
417 				rxDesc = VMXNET3_GET_DESC(cmdRing, rxdIdx);
418 				rxDesc->rxd.gen = cmdRing->gen;
419 				mpValid = B_FALSE;
420 			}
421 
422 			VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
423 			VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
424 			compDesc = VMXNET3_GET_DESC(compRing,
425 			    compRing->next2comp);
426 		} while (!eop);
427 
428 		if (mp) {
429 			if (mpValid) {
430 				*mplistTail = mp;
431 				mplistTail = &mp->b_next;
432 				ASSERT(*mplistTail == NULL);
433 			} else {
434 				/* This message got holes, drop it */
435 				freemsg(mp);
436 			}
437 		}
438 	}
439 
440 	if (rxqCtrl->updateRxProd) {
441 		uint32_t rxprod;
442 
443 		/*
444 		 * All buffers are actually available, but we can't tell that to
445 		 * the device because it may interpret that as an empty ring.
446 		 * So skip one buffer.
447 		 */
448 		if (cmdRing->next2fill) {
449 			rxprod = cmdRing->next2fill - 1;
450 		} else {
451 			rxprod = cmdRing->size - 1;
452 		}
453 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_RXPROD, rxprod);
454 	}
455 
456 	return (mplist);
457 }
458