xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_rx.c (revision 45ede40b2394db7967e59f19288fae9b62efd4aa)
1 /*
2  * Copyright (C) 2007 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 /*
16  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
17  * Copyright 2018 Joyent, Inc.
18  */
19 
20 #include <vmxnet3.h>
21 
22 static void vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *);
23 
24 /*
25  * Allocate a new rxBuf from memory. All its fields are set except
26  * for its associated mblk which has to be allocated later.
27  *
28  * Returns:
29  *	A new rxBuf or NULL.
30  */
31 static vmxnet3_rxbuf_t *
32 vmxnet3_alloc_rxbuf(vmxnet3_softc_t *dp, boolean_t canSleep)
33 {
34 	vmxnet3_rxbuf_t *rxBuf;
35 	int flag = canSleep ? KM_SLEEP : KM_NOSLEEP;
36 	int err;
37 
38 	rxBuf = kmem_zalloc(sizeof (vmxnet3_rxbuf_t), flag);
39 	if (!rxBuf) {
40 		atomic_inc_32(&dp->rx_alloc_failed);
41 		return (NULL);
42 	}
43 
44 	if ((err = vmxnet3_alloc_dma_mem_1(dp, &rxBuf->dma, (dp->cur_mtu + 18),
45 	    canSleep)) != 0) {
46 		VMXNET3_DEBUG(dp, 0, "Failed to allocate %d bytes for rx buf, "
47 		    "err:%d\n", (dp->cur_mtu + 18), err);
48 		kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t));
49 		atomic_inc_32(&dp->rx_alloc_failed);
50 		return (NULL);
51 	}
52 
53 	rxBuf->freeCB.free_func = vmxnet3_put_rxbuf;
54 	rxBuf->freeCB.free_arg = (caddr_t)rxBuf;
55 	rxBuf->dp = dp;
56 
57 	atomic_inc_32(&dp->rx_num_bufs);
58 	atomic_inc_32(&dp->rx_alloc_buf);
59 	return (rxBuf);
60 }
61 
62 static void
63 vmxnet3_free_rxbuf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf)
64 {
65 	vmxnet3_free_dma_mem(&rxBuf->dma);
66 	kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t));
67 
68 #ifndef	DEBUG
69 	atomic_dec_32(&dp->rx_num_bufs);
70 #else
71 	{
72 		uint32_t nv = atomic_dec_32_nv(&dp->rx_num_bufs);
73 		ASSERT(nv != (uint32_t)-1);
74 	}
75 #endif
76 }
77 
78 /*
79  * Return a rxBuf to the pool. The init argument, when B_TRUE, indicates
80  * that we're being called for the purpose of pool initialization, and
81  * therefore, we should place the buffer in the pool even if the device
82  * isn't enabled.
83  *
84  * Returns:
85  *	B_TRUE if the buffer was returned to the pool, or B_FALSE if it
86  *	wasn't (e.g. if the device is stopped).
87  */
88 static boolean_t
89 vmxnet3_put_rxpool_buf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf,
90     boolean_t init)
91 {
92 	vmxnet3_rxpool_t *rxPool = &dp->rxPool;
93 	boolean_t returned = B_FALSE;
94 
95 	mutex_enter(&dp->rxPoolLock);
96 	ASSERT(rxPool->nBufs <= rxPool->nBufsLimit);
97 	if ((dp->devEnabled || init) && rxPool->nBufs < rxPool->nBufsLimit) {
98 		ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) ||
99 		    (rxPool->listHead != NULL && rxPool->nBufs != 0));
100 		rxBuf->next = rxPool->listHead;
101 		rxPool->listHead = rxBuf;
102 		rxPool->nBufs++;
103 		returned = B_TRUE;
104 	}
105 	mutex_exit(&dp->rxPoolLock);
106 	return (returned);
107 }
108 
109 /*
110  * Return a rxBuf to the pool or free it.
111  */
112 static void
113 vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *rxBuf)
114 {
115 	vmxnet3_softc_t *dp = rxBuf->dp;
116 
117 	if (!vmxnet3_put_rxpool_buf(dp, rxBuf, B_FALSE))
118 		vmxnet3_free_rxbuf(dp, rxBuf);
119 }
120 
121 /*
122  * Get an unused rxBuf from the pool.
123  *
124  * Returns:
125  *	A rxBuf or NULL if there are no buffers in the pool.
126  */
127 static vmxnet3_rxbuf_t *
128 vmxnet3_get_rxpool_buf(vmxnet3_softc_t *dp)
129 {
130 	vmxnet3_rxpool_t *rxPool = &dp->rxPool;
131 	vmxnet3_rxbuf_t *rxBuf = NULL;
132 
133 	mutex_enter(&dp->rxPoolLock);
134 	if (rxPool->listHead != NULL) {
135 		rxBuf = rxPool->listHead;
136 		rxPool->listHead = rxBuf->next;
137 		rxPool->nBufs--;
138 		ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) ||
139 		    (rxPool->listHead != NULL && rxPool->nBufs != 0));
140 	}
141 	mutex_exit(&dp->rxPoolLock);
142 	return (rxBuf);
143 }
144 
145 /*
146  * Fill a rxPool by allocating the maximum number of buffers.
147  *
148  * Returns:
149  *	0 on success, non-zero on failure.
150  */
151 static int
152 vmxnet3_rxpool_init(vmxnet3_softc_t *dp)
153 {
154 	int err = 0;
155 	vmxnet3_rxbuf_t *rxBuf;
156 
157 	ASSERT(dp->rxPool.nBufsLimit > 0);
158 	while (dp->rxPool.nBufs < dp->rxPool.nBufsLimit) {
159 		if ((rxBuf = vmxnet3_alloc_rxbuf(dp, B_FALSE)) == NULL) {
160 			err = ENOMEM;
161 			break;
162 		}
163 		VERIFY(vmxnet3_put_rxpool_buf(dp, rxBuf, B_TRUE));
164 	}
165 
166 	if (err != 0) {
167 		while ((rxBuf = vmxnet3_get_rxpool_buf(dp)) != NULL) {
168 			vmxnet3_free_rxbuf(dp, rxBuf);
169 		}
170 	}
171 
172 	return (err);
173 }
174 
175 /*
176  * Populate a Rx descriptor with a new rxBuf. If the pool argument is B_TRUE,
177  * then try to take a buffer from rxPool. If the pool is empty and the
178  * dp->alloc_ok is true, then fall back to dynamic allocation. If pool is
179  * B_FALSE, then always allocate a new buffer (this is only used when
180  * populating the initial set of buffers in the receive queue during start).
181  *
182  * Returns:
183  *	0 on success, non-zero on failure.
184  */
185 static int
186 vmxnet3_rx_populate(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq, uint16_t idx,
187     boolean_t canSleep, boolean_t pool)
188 {
189 	vmxnet3_rxbuf_t *rxBuf = NULL;
190 
191 	if (pool && (rxBuf = vmxnet3_get_rxpool_buf(dp)) == NULL) {
192 		/* The maximum number of pool buffers have been allocated. */
193 		atomic_inc_32(&dp->rx_pool_empty);
194 		if (!dp->alloc_ok) {
195 			atomic_inc_32(&dp->rx_alloc_failed);
196 		}
197 	}
198 
199 	if (rxBuf == NULL && (!pool || dp->alloc_ok)) {
200 		rxBuf = vmxnet3_alloc_rxbuf(dp, canSleep);
201 	}
202 
203 	if (rxBuf != NULL) {
204 		rxBuf->mblk = desballoc((uchar_t *)rxBuf->dma.buf,
205 		    rxBuf->dma.bufLen, BPRI_MED, &rxBuf->freeCB);
206 		if (rxBuf->mblk == NULL) {
207 			if (pool) {
208 				VERIFY(vmxnet3_put_rxpool_buf(dp, rxBuf,
209 				    B_FALSE));
210 			} else {
211 				vmxnet3_free_rxbuf(dp, rxBuf);
212 			}
213 			atomic_inc_32(&dp->rx_alloc_failed);
214 			return (ENOMEM);
215 		}
216 
217 		vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
218 		Vmxnet3_GenericDesc *rxDesc = VMXNET3_GET_DESC(cmdRing, idx);
219 
220 		rxq->bufRing[idx].rxBuf = rxBuf;
221 		rxDesc->rxd.addr = rxBuf->dma.bufPA;
222 		rxDesc->rxd.len = rxBuf->dma.bufLen;
223 		/* rxDesc->rxd.btype = 0; */
224 		membar_producer();
225 		rxDesc->rxd.gen = cmdRing->gen;
226 	} else {
227 		return (ENOMEM);
228 	}
229 
230 	return (0);
231 }
232 
233 /*
234  * Initialize a RxQueue by populating the whole Rx ring with rxBufs.
235  *
236  * Returns:
237  *	0 on success, non-zero on failure.
238  */
239 int
240 vmxnet3_rxqueue_init(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
241 {
242 	vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
243 	int err;
244 
245 	dp->rxPool.nBufsLimit = vmxnet3_getprop(dp, "RxBufPoolLimit", 0,
246 	    cmdRing->size * 10, cmdRing->size * 2);
247 
248 	do {
249 		if ((err = vmxnet3_rx_populate(dp, rxq, cmdRing->next2fill,
250 		    B_TRUE, B_FALSE)) != 0) {
251 			goto error;
252 		}
253 		VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
254 	} while (cmdRing->next2fill);
255 
256 	/*
257 	 * Pre-allocate rxPool buffers so that we never have to allocate
258 	 * new buffers from interrupt context when we need to replace a buffer
259 	 * in the rxqueue.
260 	 */
261 	if ((err = vmxnet3_rxpool_init(dp)) != 0) {
262 		goto error;
263 	}
264 
265 	return (0);
266 
267 error:
268 	while (cmdRing->next2fill) {
269 		VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
270 		vmxnet3_free_rxbuf(dp, rxq->bufRing[cmdRing->next2fill].rxBuf);
271 	}
272 
273 	return (err);
274 }
275 
276 /*
277  * Finish a RxQueue by freeing all the related rxBufs.
278  */
279 void
280 vmxnet3_rxqueue_fini(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
281 {
282 	vmxnet3_rxbuf_t *rxBuf;
283 	unsigned int i;
284 
285 	ASSERT(!dp->devEnabled);
286 
287 	/* First the rxPool */
288 	while ((rxBuf = vmxnet3_get_rxpool_buf(dp)))
289 		vmxnet3_free_rxbuf(dp, rxBuf);
290 
291 	/* Then the ring */
292 	for (i = 0; i < rxq->cmdRing.size; i++) {
293 		rxBuf = rxq->bufRing[i].rxBuf;
294 		ASSERT(rxBuf);
295 		ASSERT(rxBuf->mblk);
296 		/*
297 		 * Here, freemsg() will trigger a call to vmxnet3_put_rxbuf()
298 		 * which will then call vmxnet3_free_rxbuf() because the
299 		 * underlying device is disabled.
300 		 */
301 		freemsg(rxBuf->mblk);
302 	}
303 }
304 
305 /*
306  * Determine if a received packet was checksummed by the Vmxnet3
307  * device and tag the mp appropriately.
308  */
309 static void
310 vmxnet3_rx_hwcksum(vmxnet3_softc_t *dp, mblk_t *mp,
311     Vmxnet3_GenericDesc *compDesc)
312 {
313 	uint32_t flags = 0;
314 
315 	if (!compDesc->rcd.cnc) {
316 		if (compDesc->rcd.v4 && compDesc->rcd.ipc) {
317 			flags |= HCK_IPV4_HDRCKSUM;
318 			if ((compDesc->rcd.tcp || compDesc->rcd.udp) &&
319 			    compDesc->rcd.tuc) {
320 				flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
321 			}
322 		}
323 
324 		VMXNET3_DEBUG(dp, 3, "rx cksum flags = 0x%x\n", flags);
325 
326 		mac_hcksum_set(mp, 0, 0, 0, 0, flags);
327 	}
328 }
329 
330 /*
331  * Interrupt handler for Rx. Look if there are any pending Rx and
332  * put them in mplist.
333  *
334  * Returns:
335  *	A list of messages to pass to the MAC subystem.
336  */
337 mblk_t *
338 vmxnet3_rx_intr(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
339 {
340 	vmxnet3_compring_t *compRing = &rxq->compRing;
341 	vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
342 	Vmxnet3_RxQueueCtrl *rxqCtrl = rxq->sharedCtrl;
343 	Vmxnet3_GenericDesc *compDesc;
344 	mblk_t *mplist = NULL, **mplistTail = &mplist;
345 
346 	ASSERT(mutex_owned(&dp->intrLock));
347 
348 	compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
349 	while (compDesc->rcd.gen == compRing->gen) {
350 		mblk_t *mp = NULL, **mpTail = &mp;
351 		boolean_t mpValid = B_TRUE;
352 		boolean_t eop;
353 
354 		ASSERT(compDesc->rcd.sop);
355 
356 		do {
357 			uint16_t rxdIdx = compDesc->rcd.rxdIdx;
358 			vmxnet3_rxbuf_t *rxBuf = rxq->bufRing[rxdIdx].rxBuf;
359 			mblk_t *mblk = rxBuf->mblk;
360 			Vmxnet3_GenericDesc *rxDesc;
361 
362 			while (compDesc->rcd.gen != compRing->gen) {
363 				/*
364 				 * H/W may be still be in the middle of
365 				 * generating this entry, so hold on until
366 				 * the gen bit is flipped.
367 				 */
368 				membar_consumer();
369 			}
370 			ASSERT(compDesc->rcd.gen == compRing->gen);
371 			ASSERT(rxBuf);
372 			ASSERT(mblk);
373 
374 			/* Some Rx descriptors may have been skipped */
375 			while (cmdRing->next2fill != rxdIdx) {
376 				rxDesc = VMXNET3_GET_DESC(cmdRing,
377 				    cmdRing->next2fill);
378 				rxDesc->rxd.gen = cmdRing->gen;
379 				VMXNET3_INC_RING_IDX(cmdRing,
380 				    cmdRing->next2fill);
381 			}
382 
383 			eop = compDesc->rcd.eop;
384 
385 			/*
386 			 * Now we have a piece of the packet in the rxdIdx
387 			 * descriptor. Grab it only if we achieve to replace
388 			 * it with a fresh buffer.
389 			 */
390 			if (vmxnet3_rx_populate(dp, rxq, rxdIdx, B_FALSE,
391 			    B_TRUE) == 0) {
392 				/* Success, we can chain the mblk with the mp */
393 				mblk->b_wptr = mblk->b_rptr + compDesc->rcd.len;
394 				*mpTail = mblk;
395 				mpTail = &mblk->b_cont;
396 				ASSERT(*mpTail == NULL);
397 
398 				VMXNET3_DEBUG(dp, 3, "rx 0x%p on [%u]\n",
399 				    (void *)mblk, rxdIdx);
400 
401 				if (eop) {
402 					if (!compDesc->rcd.err) {
403 						/*
404 						 * Tag the mp if it was
405 						 * checksummed by the H/W
406 						 */
407 						vmxnet3_rx_hwcksum(dp, mp,
408 						    compDesc);
409 					} else {
410 						mpValid = B_FALSE;
411 					}
412 				}
413 			} else {
414 				/*
415 				 * Keep the same buffer, we still need
416 				 * to flip the gen bit
417 				 */
418 				rxDesc = VMXNET3_GET_DESC(cmdRing, rxdIdx);
419 				rxDesc->rxd.gen = cmdRing->gen;
420 				mpValid = B_FALSE;
421 			}
422 
423 			VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
424 			VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
425 			compDesc = VMXNET3_GET_DESC(compRing,
426 			    compRing->next2comp);
427 		} while (!eop);
428 
429 		if (mp) {
430 			if (mpValid) {
431 				*mplistTail = mp;
432 				mplistTail = &mp->b_next;
433 				ASSERT(*mplistTail == NULL);
434 			} else {
435 				/* This message got holes, drop it */
436 				freemsg(mp);
437 			}
438 		}
439 	}
440 
441 	if (rxqCtrl->updateRxProd) {
442 		uint32_t rxprod;
443 
444 		/*
445 		 * All buffers are actually available, but we can't tell that to
446 		 * the device because it may interpret that as an empty ring.
447 		 * So skip one buffer.
448 		 */
449 		if (cmdRing->next2fill) {
450 			rxprod = cmdRing->next2fill - 1;
451 		} else {
452 			rxprod = cmdRing->size - 1;
453 		}
454 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_RXPROD, rxprod);
455 	}
456 
457 	return (mplist);
458 }
459