1 /* 2 * Copyright (C) 2007 VMware, Inc. All rights reserved. 3 * 4 * The contents of this file are subject to the terms of the Common 5 * Development and Distribution License (the "License") version 1.0 6 * and no later version. You may not use this file except in 7 * compliance with the License. 8 * 9 * You can obtain a copy of the License at 10 * http://www.opensource.org/licenses/cddl1.php 11 * 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 */ 15 /* 16 * Copyright (c) 2013, 2016 by Delphix. All rights reserved. 17 */ 18 19 #include <vmxnet3.h> 20 21 static void vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *); 22 23 /* 24 * Allocate a new rxBuf from memory. All its fields are set except 25 * for its associated mblk which has to be allocated later. 26 * 27 * Returns: 28 * A new rxBuf or NULL. 29 */ 30 static vmxnet3_rxbuf_t * 31 vmxnet3_alloc_rxbuf(vmxnet3_softc_t *dp, boolean_t canSleep) 32 { 33 vmxnet3_rxbuf_t *rxBuf; 34 int flag = canSleep ? KM_SLEEP : KM_NOSLEEP; 35 int err; 36 37 rxBuf = kmem_zalloc(sizeof (vmxnet3_rxbuf_t), flag); 38 if (!rxBuf) { 39 atomic_inc_32(&dp->rx_alloc_failed); 40 return (NULL); 41 } 42 43 if ((err = vmxnet3_alloc_dma_mem_1(dp, &rxBuf->dma, (dp->cur_mtu + 18), 44 canSleep)) != 0) { 45 VMXNET3_DEBUG(dp, 0, "Failed to allocate %d bytes for rx buf, " 46 "err:%d\n", (dp->cur_mtu + 18), err); 47 kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t)); 48 atomic_inc_32(&dp->rx_alloc_failed); 49 return (NULL); 50 } 51 52 rxBuf->freeCB.free_func = vmxnet3_put_rxbuf; 53 rxBuf->freeCB.free_arg = (caddr_t)rxBuf; 54 rxBuf->dp = dp; 55 56 atomic_inc_32(&dp->rx_num_bufs); 57 atomic_inc_32(&dp->rx_alloc_buf); 58 return (rxBuf); 59 } 60 61 static void 62 vmxnet3_free_rxbuf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf) 63 { 64 vmxnet3_free_dma_mem(&rxBuf->dma); 65 kmem_free(rxBuf, sizeof (vmxnet3_rxbuf_t)); 66 67 #ifndef DEBUG 68 atomic_dec_32(&dp->rx_num_bufs); 69 #else 70 { 71 uint32_t nv = atomic_dec_32_nv(&dp->rx_num_bufs); 72 ASSERT(nv != (uint32_t)-1); 73 } 74 #endif 75 } 76 77 /* 78 * Return a rxBuf to the pool. The init argument, when B_TRUE, indicates 79 * that we're being called for the purpose of pool initialization, and 80 * therefore, we should place the buffer in the pool even if the device 81 * isn't enabled. 82 * 83 * Returns: 84 * B_TRUE if the buffer was returned to the pool, or B_FALSE if it 85 * wasn't (e.g. if the device is stopped). 86 */ 87 static boolean_t 88 vmxnet3_put_rxpool_buf(vmxnet3_softc_t *dp, vmxnet3_rxbuf_t *rxBuf, 89 boolean_t init) 90 { 91 vmxnet3_rxpool_t *rxPool = &dp->rxPool; 92 boolean_t returned = B_FALSE; 93 94 mutex_enter(&dp->rxPoolLock); 95 ASSERT(rxPool->nBufs <= rxPool->nBufsLimit); 96 if ((dp->devEnabled || init) && rxPool->nBufs < rxPool->nBufsLimit) { 97 ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) || 98 (rxPool->listHead != NULL && rxPool->nBufs != 0)); 99 rxBuf->next = rxPool->listHead; 100 rxPool->listHead = rxBuf; 101 rxPool->nBufs++; 102 returned = B_TRUE; 103 } 104 mutex_exit(&dp->rxPoolLock); 105 return (returned); 106 } 107 108 /* 109 * Return a rxBuf to the pool or free it. 110 */ 111 static void 112 vmxnet3_put_rxbuf(vmxnet3_rxbuf_t *rxBuf) 113 { 114 vmxnet3_softc_t *dp = rxBuf->dp; 115 116 if (!vmxnet3_put_rxpool_buf(dp, rxBuf, B_FALSE)) 117 vmxnet3_free_rxbuf(dp, rxBuf); 118 } 119 120 /* 121 * Get an unused rxBuf from the pool. 122 * 123 * Returns: 124 * A rxBuf or NULL if there are no buffers in the pool. 125 */ 126 static vmxnet3_rxbuf_t * 127 vmxnet3_get_rxpool_buf(vmxnet3_softc_t *dp) 128 { 129 vmxnet3_rxpool_t *rxPool = &dp->rxPool; 130 vmxnet3_rxbuf_t *rxBuf = NULL; 131 132 mutex_enter(&dp->rxPoolLock); 133 if (rxPool->listHead != NULL) { 134 rxBuf = rxPool->listHead; 135 rxPool->listHead = rxBuf->next; 136 rxPool->nBufs--; 137 ASSERT((rxPool->listHead == NULL && rxPool->nBufs == 0) || 138 (rxPool->listHead != NULL && rxPool->nBufs != 0)); 139 } 140 mutex_exit(&dp->rxPoolLock); 141 return (rxBuf); 142 } 143 144 /* 145 * Fill a rxPool by allocating the maximum number of buffers. 146 * 147 * Returns: 148 * 0 on success, non-zero on failure. 149 */ 150 static int 151 vmxnet3_rxpool_init(vmxnet3_softc_t *dp) 152 { 153 int err = 0; 154 vmxnet3_rxbuf_t *rxBuf; 155 156 ASSERT(dp->rxPool.nBufsLimit > 0); 157 while (dp->rxPool.nBufs < dp->rxPool.nBufsLimit) { 158 if ((rxBuf = vmxnet3_alloc_rxbuf(dp, B_FALSE)) == NULL) { 159 err = ENOMEM; 160 break; 161 } 162 VERIFY(vmxnet3_put_rxpool_buf(dp, rxBuf, B_TRUE)); 163 } 164 165 if (err != 0) { 166 while ((rxBuf = vmxnet3_get_rxpool_buf(dp)) != NULL) { 167 vmxnet3_free_rxbuf(dp, rxBuf); 168 } 169 } 170 171 return (err); 172 } 173 174 /* 175 * Populate a Rx descriptor with a new rxBuf. If the pool argument is B_TRUE, 176 * then try to take a buffer from rxPool. If the pool is empty and the 177 * dp->alloc_ok is true, then fall back to dynamic allocation. If pool is 178 * B_FALSE, then always allocate a new buffer (this is only used when 179 * populating the initial set of buffers in the receive queue during start). 180 * 181 * Returns: 182 * 0 on success, non-zero on failure. 183 */ 184 static int 185 vmxnet3_rx_populate(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq, uint16_t idx, 186 boolean_t canSleep, boolean_t pool) 187 { 188 vmxnet3_rxbuf_t *rxBuf = NULL; 189 190 if (pool && (rxBuf = vmxnet3_get_rxpool_buf(dp)) == NULL) { 191 /* The maximum number of pool buffers have been allocated. */ 192 atomic_inc_32(&dp->rx_pool_empty); 193 if (!dp->alloc_ok) { 194 atomic_inc_32(&dp->rx_alloc_failed); 195 } 196 } 197 198 if (rxBuf == NULL && (!pool || dp->alloc_ok)) { 199 rxBuf = vmxnet3_alloc_rxbuf(dp, canSleep); 200 } 201 202 if (rxBuf != NULL) { 203 rxBuf->mblk = desballoc((uchar_t *)rxBuf->dma.buf, 204 rxBuf->dma.bufLen, BPRI_MED, &rxBuf->freeCB); 205 if (rxBuf->mblk == NULL) { 206 if (pool) { 207 VERIFY(vmxnet3_put_rxpool_buf(dp, rxBuf, 208 B_FALSE)); 209 } else { 210 vmxnet3_free_rxbuf(dp, rxBuf); 211 } 212 atomic_inc_32(&dp->rx_alloc_failed); 213 return (ENOMEM); 214 } 215 216 vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing; 217 Vmxnet3_GenericDesc *rxDesc = VMXNET3_GET_DESC(cmdRing, idx); 218 219 rxq->bufRing[idx].rxBuf = rxBuf; 220 rxDesc->rxd.addr = rxBuf->dma.bufPA; 221 rxDesc->rxd.len = rxBuf->dma.bufLen; 222 /* rxDesc->rxd.btype = 0; */ 223 membar_producer(); 224 rxDesc->rxd.gen = cmdRing->gen; 225 } else { 226 return (ENOMEM); 227 } 228 229 return (0); 230 } 231 232 /* 233 * Initialize a RxQueue by populating the whole Rx ring with rxBufs. 234 * 235 * Returns: 236 * 0 on success, non-zero on failure. 237 */ 238 int 239 vmxnet3_rxqueue_init(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq) 240 { 241 vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing; 242 int err; 243 244 dp->rxPool.nBufsLimit = vmxnet3_getprop(dp, "RxBufPoolLimit", 0, 245 cmdRing->size * 10, cmdRing->size * 2); 246 247 do { 248 if ((err = vmxnet3_rx_populate(dp, rxq, cmdRing->next2fill, 249 B_TRUE, B_FALSE)) != 0) { 250 goto error; 251 } 252 VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill); 253 } while (cmdRing->next2fill); 254 255 /* 256 * Pre-allocate rxPool buffers so that we never have to allocate 257 * new buffers from interrupt context when we need to replace a buffer 258 * in the rxqueue. 259 */ 260 if ((err = vmxnet3_rxpool_init(dp)) != 0) { 261 goto error; 262 } 263 264 return (0); 265 266 error: 267 while (cmdRing->next2fill) { 268 VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill); 269 vmxnet3_free_rxbuf(dp, rxq->bufRing[cmdRing->next2fill].rxBuf); 270 } 271 272 return (err); 273 } 274 275 /* 276 * Finish a RxQueue by freeing all the related rxBufs. 277 */ 278 void 279 vmxnet3_rxqueue_fini(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq) 280 { 281 vmxnet3_rxbuf_t *rxBuf; 282 unsigned int i; 283 284 ASSERT(!dp->devEnabled); 285 286 /* First the rxPool */ 287 while ((rxBuf = vmxnet3_get_rxpool_buf(dp))) 288 vmxnet3_free_rxbuf(dp, rxBuf); 289 290 /* Then the ring */ 291 for (i = 0; i < rxq->cmdRing.size; i++) { 292 rxBuf = rxq->bufRing[i].rxBuf; 293 ASSERT(rxBuf); 294 ASSERT(rxBuf->mblk); 295 /* 296 * Here, freemsg() will trigger a call to vmxnet3_put_rxbuf() 297 * which will then call vmxnet3_free_rxbuf() because the 298 * underlying device is disabled. 299 */ 300 freemsg(rxBuf->mblk); 301 } 302 } 303 304 /* 305 * Determine if a received packet was checksummed by the Vmxnet3 306 * device and tag the mp appropriately. 307 */ 308 static void 309 vmxnet3_rx_hwcksum(vmxnet3_softc_t *dp, mblk_t *mp, 310 Vmxnet3_GenericDesc *compDesc) 311 { 312 uint32_t flags = 0; 313 314 if (!compDesc->rcd.cnc) { 315 if (compDesc->rcd.v4 && compDesc->rcd.ipc) { 316 flags |= HCK_IPV4_HDRCKSUM; 317 if ((compDesc->rcd.tcp || compDesc->rcd.udp) && 318 compDesc->rcd.tuc) { 319 flags |= HCK_FULLCKSUM | HCK_FULLCKSUM_OK; 320 } 321 } 322 323 VMXNET3_DEBUG(dp, 3, "rx cksum flags = 0x%x\n", flags); 324 325 (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, flags, 0); 326 } 327 } 328 329 /* 330 * Interrupt handler for Rx. Look if there are any pending Rx and 331 * put them in mplist. 332 * 333 * Returns: 334 * A list of messages to pass to the MAC subystem. 335 */ 336 mblk_t * 337 vmxnet3_rx_intr(vmxnet3_softc_t *dp, vmxnet3_rxqueue_t *rxq) 338 { 339 vmxnet3_compring_t *compRing = &rxq->compRing; 340 vmxnet3_cmdring_t *cmdRing = &rxq->cmdRing; 341 Vmxnet3_RxQueueCtrl *rxqCtrl = rxq->sharedCtrl; 342 Vmxnet3_GenericDesc *compDesc; 343 mblk_t *mplist = NULL, **mplistTail = &mplist; 344 345 ASSERT(mutex_owned(&dp->intrLock)); 346 347 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 348 while (compDesc->rcd.gen == compRing->gen) { 349 mblk_t *mp = NULL, **mpTail = ∓ 350 boolean_t mpValid = B_TRUE; 351 boolean_t eop; 352 353 ASSERT(compDesc->rcd.sop); 354 355 do { 356 uint16_t rxdIdx = compDesc->rcd.rxdIdx; 357 vmxnet3_rxbuf_t *rxBuf = rxq->bufRing[rxdIdx].rxBuf; 358 mblk_t *mblk = rxBuf->mblk; 359 Vmxnet3_GenericDesc *rxDesc; 360 361 while (compDesc->rcd.gen != compRing->gen) { 362 /* 363 * H/W may be still be in the middle of 364 * generating this entry, so hold on until 365 * the gen bit is flipped. 366 */ 367 membar_consumer(); 368 } 369 ASSERT(compDesc->rcd.gen == compRing->gen); 370 ASSERT(rxBuf); 371 ASSERT(mblk); 372 373 /* Some Rx descriptors may have been skipped */ 374 while (cmdRing->next2fill != rxdIdx) { 375 rxDesc = VMXNET3_GET_DESC(cmdRing, 376 cmdRing->next2fill); 377 rxDesc->rxd.gen = cmdRing->gen; 378 VMXNET3_INC_RING_IDX(cmdRing, 379 cmdRing->next2fill); 380 } 381 382 eop = compDesc->rcd.eop; 383 384 /* 385 * Now we have a piece of the packet in the rxdIdx 386 * descriptor. Grab it only if we achieve to replace 387 * it with a fresh buffer. 388 */ 389 if (vmxnet3_rx_populate(dp, rxq, rxdIdx, B_FALSE, 390 B_TRUE) == 0) { 391 /* Success, we can chain the mblk with the mp */ 392 mblk->b_wptr = mblk->b_rptr + compDesc->rcd.len; 393 *mpTail = mblk; 394 mpTail = &mblk->b_cont; 395 ASSERT(*mpTail == NULL); 396 397 VMXNET3_DEBUG(dp, 3, "rx 0x%p on [%u]\n", 398 (void *)mblk, rxdIdx); 399 400 if (eop) { 401 if (!compDesc->rcd.err) { 402 /* 403 * Tag the mp if it was 404 * checksummed by the H/W 405 */ 406 vmxnet3_rx_hwcksum(dp, mp, 407 compDesc); 408 } else { 409 mpValid = B_FALSE; 410 } 411 } 412 } else { 413 /* 414 * Keep the same buffer, we still need 415 * to flip the gen bit 416 */ 417 rxDesc = VMXNET3_GET_DESC(cmdRing, rxdIdx); 418 rxDesc->rxd.gen = cmdRing->gen; 419 mpValid = B_FALSE; 420 } 421 422 VMXNET3_INC_RING_IDX(compRing, compRing->next2comp); 423 VMXNET3_INC_RING_IDX(cmdRing, cmdRing->next2fill); 424 compDesc = VMXNET3_GET_DESC(compRing, 425 compRing->next2comp); 426 } while (!eop); 427 428 if (mp) { 429 if (mpValid) { 430 *mplistTail = mp; 431 mplistTail = &mp->b_next; 432 ASSERT(*mplistTail == NULL); 433 } else { 434 /* This message got holes, drop it */ 435 freemsg(mp); 436 } 437 } 438 } 439 440 if (rxqCtrl->updateRxProd) { 441 uint32_t rxprod; 442 443 /* 444 * All buffers are actually available, but we can't tell that to 445 * the device because it may interpret that as an empty ring. 446 * So skip one buffer. 447 */ 448 if (cmdRing->next2fill) { 449 rxprod = cmdRing->next2fill - 1; 450 } else { 451 rxprod = cmdRing->size - 1; 452 } 453 VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_RXPROD, rxprod); 454 } 455 456 return (mplist); 457 } 458