1 /*
2 * Copyright (C) 2007 VMware, Inc. All rights reserved.
3 *
4 * The contents of this file are subject to the terms of the Common
5 * Development and Distribution License (the "License") version 1.0
6 * and no later version. You may not use this file except in
7 * compliance with the License.
8 *
9 * You can obtain a copy of the License at
10 * http://www.opensource.org/licenses/cddl1.php
11 *
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
14 */
15 /*
16 * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
17 * Copyright 2018 Joyent, Inc.
18 */
19
20 #include <vmxnet3.h>
21
22 static void vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *);
23
24 /*
25 * Allocate a new rxBuf from memory. All its fields are set except
26 * for its associated mblk which has to be allocated later.
27 *
28 * Returns:
29 * A new rxBuf or NULL.
30 */
31 static vmxnet3_rxbuf_t *
vmxnet3_alloc_rxbuf(vmxnet3_softc_t * dp,boolean_t canSleep)32 vmxnet3_alloc_rxbuf(vmxnet3_softc_t *dp, boolean_t canSleep)
33 {
34 vmxnet3_rxbuf_t *rxBuf;
35 int flag = canSleep ? KM_SLEEP : KM_NOSLEEP;
36 int err;
37
38 rxBuf = kmem_zalloc(sizeof (vmxnet3_rxbuf_t), flag);
39 if (!rxBuf) {
40 atomic_inc_32(&dp->rx_alloc_failed);
41 return (NULL);
42 }
43
44 if ((err = vmxnet3_alloc_dma_mem_1(dp, &rxBuf->dma, (dp->cur_mtu + 18),
45 canSleep)) != 0) {
46 VMXNET3_DEBUG(dp, 0, "Failed to allocate %d bytes for rx buf, "
47 "err:%d\n", (dp->cur_mtu + 18), err);
48 kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t));
49 atomic_inc_32(&dp->rx_alloc_failed);
50 return (NULL);
51 }
52
53 rxBuf->freeCB.free_func = vmxnet3_put_rxbuf;
54 rxBuf->freeCB.free_arg = (caddr_t)rxBuf;
55 rxBuf->dp = dp;
56
57 atomic_inc_32(&dp->rx_num_bufs);
58 atomic_inc_32(&dp->rx_alloc_buf);
59 return (rxBuf);
60 }
61
62 static void
vmxnet3_free_rxbuf(vmxnet3_softc_t * dp,vmxnet3_rxbuf_t * rxBuf)63 vmxnet3_free_rxbuf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf)
64 {
65 vmxnet3_free_dma_mem(&rxBuf->dma);
66 kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t));
67
68 #ifndef DEBUG
69 atomic_dec_32(&dp->rx_num_bufs);
70 #else
71 {
72 uint32_t nv = atomic_dec_32_nv(&dp->rx_num_bufs);
73 ASSERT(nv != (uint32_t)-1);
74 }
75 #endif
76 }
77
78 /*
79 * Return a rxBuf to the pool. The init argument, when B_TRUE, indicates
80 * that we're being called for the purpose of pool initialization, and
81 * therefore, we should place the buffer in the pool even if the device
82 * isn't enabled.
83 *
84 * Returns:
85 * B_TRUE if the buffer was returned to the pool, or B_FALSE if it
86 * wasn't (e.g. if the device is stopped).
87 */
88 static boolean_t
vmxnet3_put_rxpool_buf(vmxnet3_softc_t * dp,vmxnet3_rxbuf_t * rxBuf,boolean_t init)89 vmxnet3_put_rxpool_buf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf,
90 boolean_t init)
91 {
92 vmxnet3_rxpool_t *rxPool = &dp->rxPool;
93 boolean_t returned = B_FALSE;
94
95 mutex_enter(&dp->rxPoolLock);
96 ASSERT(rxPool->nBufs <= rxPool->nBufsLimit);
97 if ((dp->devEnabled || init) && rxPool->nBufs < rxPool->nBufsLimit) {
98 ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) ||
99 (rxPool->listHead != NULL && rxPool->nBufs != 0));
100 rxBuf->next = rxPool->listHead;
101 rxPool->listHead = rxBuf;
102 rxPool->nBufs++;
103 returned = B_TRUE;
104 }
105 mutex_exit(&dp->rxPoolLock);
106 return (returned);
107 }
108
109 /*
110 * Return a rxBuf to the pool or free it.
111 */
112 static void
vmxnet3_put_rxbuf(vmxnet3_rxbuf_t * rxBuf)113 vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *rxBuf)
114 {
115 vmxnet3_softc_t *dp = rxBuf->dp;
116
117 if (!vmxnet3_put_rxpool_buf(dp, rxBuf, B_FALSE))
118 vmxnet3_free_rxbuf(dp, rxBuf);
119 }
120
121 /*
122 * Get an unused rxBuf from the pool.
123 *
124 * Returns:
125 * A rxBuf or NULL if there are no buffers in the pool.
126 */
127 static vmxnet3_rxbuf_t *
vmxnet3_get_rxpool_buf(vmxnet3_softc_t * dp)128 vmxnet3_get_rxpool_buf(vmxnet3_softc_t *dp)
129 {
130 vmxnet3_rxpool_t *rxPool = &dp->rxPool;
131 vmxnet3_rxbuf_t *rxBuf = NULL;
132
133 mutex_enter(&dp->rxPoolLock);
134 if (rxPool->listHead != NULL) {
135 rxBuf = rxPool->listHead;
136 rxPool->listHead = rxBuf->next;
137 rxPool->nBufs--;
138 ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) ||
139 (rxPool->listHead != NULL && rxPool->nBufs != 0));
140 }
141 mutex_exit(&dp->rxPoolLock);
142 return (rxBuf);
143 }
144
145 /*
146 * Fill a rxPool by allocating the maximum number of buffers.
147 *
148 * Returns:
149 * 0 on success, non-zero on failure.
150 */
151 static int
vmxnet3_rxpool_init(vmxnet3_softc_t * dp)152 vmxnet3_rxpool_init(vmxnet3_softc_t *dp)
153 {
154 int err = 0;
155 vmxnet3_rxbuf_t *rxBuf;
156
157 ASSERT(dp->rxPool.nBufsLimit > 0);
158 while (dp->rxPool.nBufs < dp->rxPool.nBufsLimit) {
159 if ((rxBuf = vmxnet3_alloc_rxbuf(dp, B_FALSE)) == NULL) {
160 err = ENOMEM;
161 break;
162 }
163 VERIFY(vmxnet3_put_rxpool_buf(dp, rxBuf, B_TRUE));
164 }
165
166 if (err != 0) {
167 while ((rxBuf = vmxnet3_get_rxpool_buf(dp)) != NULL) {
168 vmxnet3_free_rxbuf(dp, rxBuf);
169 }
170 }
171
172 return (err);
173 }
174
175 /*
176 * Populate a Rx descriptor with a new rxBuf. If the pool argument is B_TRUE,
177 * then try to take a buffer from rxPool. If the pool is empty and the
178 * dp->alloc_ok is true, then fall back to dynamic allocation. If pool is
179 * B_FALSE, then always allocate a new buffer (this is only used when
180 * populating the initial set of buffers in the receive queue during start).
181 *
182 * Returns:
183 * 0 on success, non-zero on failure.
184 */
185 static int
vmxnet3_rx_populate(vmxnet3_softc_t * dp,vmxnet3_rxqueue_t * rxq,uint16_t idx,boolean_t canSleep,boolean_t pool)186 vmxnet3_rx_populate(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq, uint16_t idx,
187 boolean_t canSleep, boolean_t pool)
188 {
189 vmxnet3_rxbuf_t *rxBuf = NULL;
190
191 if (pool && (rxBuf = vmxnet3_get_rxpool_buf(dp)) == NULL) {
192 /* The maximum number of pool buffers have been allocated. */
193 atomic_inc_32(&dp->rx_pool_empty);
194 if (!dp->alloc_ok) {
195 atomic_inc_32(&dp->rx_alloc_failed);
196 }
197 }
198
199 if (rxBuf == NULL && (!pool || dp->alloc_ok)) {
200 rxBuf = vmxnet3_alloc_rxbuf(dp, canSleep);
201 }
202
203 if (rxBuf != NULL) {
204 rxBuf->mblk = desballoc((uchar_t *)rxBuf->dma.buf,
205 rxBuf->dma.bufLen, BPRI_MED, &rxBuf->freeCB);
206 if (rxBuf->mblk == NULL) {
207 if (pool) {
208 VERIFY(vmxnet3_put_rxpool_buf(dp, rxBuf,
209 B_FALSE));
210 } else {
211 vmxnet3_free_rxbuf(dp, rxBuf);
212 }
213 atomic_inc_32(&dp->rx_alloc_failed);
214 return (ENOMEM);
215 }
216
217 vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
218 Vmxnet3_GenericDesc *rxDesc = VMXNET3_GET_DESC(cmdRing, idx);
219
220 rxq->bufRing[idx].rxBuf = rxBuf;
221 rxDesc->rxd.addr = rxBuf->dma.bufPA;
222 rxDesc->rxd.len = rxBuf->dma.bufLen;
223 /* rxDesc->rxd.btype = 0; */
224 membar_producer();
225 rxDesc->rxd.gen = cmdRing->gen;
226 } else {
227 return (ENOMEM);
228 }
229
230 return (0);
231 }
232
233 /*
234 * Initialize a RxQueue by populating the whole Rx ring with rxBufs.
235 *
236 * Returns:
237 * 0 on success, non-zero on failure.
238 */
239 int
vmxnet3_rxqueue_init(vmxnet3_softc_t * dp,vmxnet3_rxqueue_t * rxq)240 vmxnet3_rxqueue_init(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
241 {
242 vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
243 int err;
244
245 dp->rxPool.nBufsLimit = vmxnet3_getprop(dp, "RxBufPoolLimit", 0,
246 cmdRing->size * 10, cmdRing->size * 2);
247
248 do {
249 if ((err = vmxnet3_rx_populate(dp, rxq, cmdRing->next2fill,
250 B_TRUE, B_FALSE)) != 0) {
251 goto error;
252 }
253 VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
254 } while (cmdRing->next2fill);
255
256 /*
257 * Pre-allocate rxPool buffers so that we never have to allocate
258 * new buffers from interrupt context when we need to replace a buffer
259 * in the rxqueue.
260 */
261 if ((err = vmxnet3_rxpool_init(dp)) != 0) {
262 goto error;
263 }
264
265 return (0);
266
267 error:
268 while (cmdRing->next2fill) {
269 VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
270 vmxnet3_free_rxbuf(dp, rxq->bufRing[cmdRing->next2fill].rxBuf);
271 }
272
273 return (err);
274 }
275
276 /*
277 * Finish a RxQueue by freeing all the related rxBufs.
278 */
279 void
vmxnet3_rxqueue_fini(vmxnet3_softc_t * dp,vmxnet3_rxqueue_t * rxq)280 vmxnet3_rxqueue_fini(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
281 {
282 vmxnet3_rxbuf_t *rxBuf;
283 unsigned int i;
284
285 ASSERT(!dp->devEnabled);
286
287 /* First the rxPool */
288 while ((rxBuf = vmxnet3_get_rxpool_buf(dp)))
289 vmxnet3_free_rxbuf(dp, rxBuf);
290
291 /* Then the ring */
292 for (i = 0; i < rxq->cmdRing.size; i++) {
293 rxBuf = rxq->bufRing[i].rxBuf;
294 ASSERT(rxBuf);
295 ASSERT(rxBuf->mblk);
296 /*
297 * Here, freemsg() will trigger a call to vmxnet3_put_rxbuf()
298 * which will then call vmxnet3_free_rxbuf() because the
299 * underlying device is disabled.
300 */
301 freemsg(rxBuf->mblk);
302 }
303 }
304
305 /*
306 * Determine if a received packet was checksummed by the Vmxnet3
307 * device and tag the mp appropriately.
308 */
309 static void
vmxnet3_rx_hwcksum(vmxnet3_softc_t * dp,mblk_t * mp,Vmxnet3_GenericDesc * compDesc)310 vmxnet3_rx_hwcksum(vmxnet3_softc_t *dp, mblk_t *mp,
311 Vmxnet3_GenericDesc *compDesc)
312 {
313 uint32_t flags = 0;
314
315 if (!compDesc->rcd.cnc) {
316 if (compDesc->rcd.v4 && compDesc->rcd.ipc) {
317 flags |= HCK_IPV4_HDRCKSUM;
318 if ((compDesc->rcd.tcp || compDesc->rcd.udp) &&
319 compDesc->rcd.tuc) {
320 flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK;
321 }
322 }
323
324 VMXNET3_DEBUG(dp, 3, "rx cksum flags = 0x%x\n", flags);
325
326 mac_hcksum_set(mp, 0, 0, 0, 0, flags);
327 }
328 }
329
330 /*
331 * Interrupt handler for Rx. Look if there are any pending Rx and
332 * put them in mplist.
333 *
334 * Returns:
335 * A list of messages to pass to the MAC subystem.
336 */
337 mblk_t *
vmxnet3_rx_intr(vmxnet3_softc_t * dp,vmxnet3_rxqueue_t * rxq)338 vmxnet3_rx_intr(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq)
339 {
340 vmxnet3_compring_t *compRing = &rxq->compRing;
341 vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing;
342 Vmxnet3_RxQueueCtrl *rxqCtrl = rxq->sharedCtrl;
343 Vmxnet3_GenericDesc *compDesc;
344 mblk_t *mplist = NULL, **mplistTail = &mplist;
345
346 ASSERT(mutex_owned(&dp->intrLock));
347
348 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
349 while (compDesc->rcd.gen == compRing->gen) {
350 mblk_t *mp = NULL, **mpTail = ∓
351 boolean_t mpValid = B_TRUE;
352 boolean_t eop;
353
354 ASSERT(compDesc->rcd.sop);
355
356 do {
357 uint16_t rxdIdx = compDesc->rcd.rxdIdx;
358 vmxnet3_rxbuf_t *rxBuf = rxq->bufRing[rxdIdx].rxBuf;
359 mblk_t *mblk = rxBuf->mblk;
360 Vmxnet3_GenericDesc *rxDesc;
361
362 while (compDesc->rcd.gen != compRing->gen) {
363 /*
364 * H/W may be still be in the middle of
365 * generating this entry, so hold on until
366 * the gen bit is flipped.
367 */
368 membar_consumer();
369 }
370 ASSERT(compDesc->rcd.gen == compRing->gen);
371 ASSERT(rxBuf);
372 ASSERT(mblk);
373
374 /* Some Rx descriptors may have been skipped */
375 while (cmdRing->next2fill != rxdIdx) {
376 rxDesc = VMXNET3_GET_DESC(cmdRing,
377 cmdRing->next2fill);
378 rxDesc->rxd.gen = cmdRing->gen;
379 VMXNET3_INC_RING_IDX(cmdRing,
380 cmdRing->next2fill);
381 }
382
383 eop = compDesc->rcd.eop;
384
385 /*
386 * Now we have a piece of the packet in the rxdIdx
387 * descriptor. Grab it only if we achieve to replace
388 * it with a fresh buffer.
389 */
390 if (vmxnet3_rx_populate(dp, rxq, rxdIdx, B_FALSE,
391 B_TRUE) == 0) {
392 /* Success, we can chain the mblk with the mp */
393 mblk->b_wptr = mblk->b_rptr + compDesc->rcd.len;
394 *mpTail = mblk;
395 mpTail = &mblk->b_cont;
396 ASSERT(*mpTail == NULL);
397
398 VMXNET3_DEBUG(dp, 3, "rx 0x%p on [%u]\n",
399 (void *)mblk, rxdIdx);
400
401 if (eop) {
402 if (!compDesc->rcd.err) {
403 /*
404 * Tag the mp if it was
405 * checksummed by the H/W
406 */
407 vmxnet3_rx_hwcksum(dp, mp,
408 compDesc);
409 } else {
410 mpValid = B_FALSE;
411 }
412 }
413 } else {
414 /*
415 * Keep the same buffer, we still need
416 * to flip the gen bit
417 */
418 rxDesc = VMXNET3_GET_DESC(cmdRing, rxdIdx);
419 rxDesc->rxd.gen = cmdRing->gen;
420 mpValid = B_FALSE;
421 }
422
423 VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
424 VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill);
425 compDesc = VMXNET3_GET_DESC(compRing,
426 compRing->next2comp);
427 } while (!eop);
428
429 if (mp) {
430 if (mpValid) {
431 *mplistTail = mp;
432 mplistTail = &mp->b_next;
433 ASSERT(*mplistTail == NULL);
434 } else {
435 /* This message got holes, drop it */
436 freemsg(mp);
437 }
438 }
439 }
440
441 if (rxqCtrl->updateRxProd) {
442 uint32_t rxprod;
443
444 /*
445 * All buffers are actually available, but we can't tell that to
446 * the device because it may interpret that as an empty ring.
447 * So skip one buffer.
448 */
449 if (cmdRing->next2fill) {
450 rxprod = cmdRing->next2fill - 1;
451 } else {
452 rxprod = cmdRing->size - 1;
453 }
454 VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_RXPROD, rxprod);
455 }
456
457 return (mplist);
458 }
459