1 /* 2 * Copyright (C) 2007 VMware, Inc. All rights reserved. 3 * 4 * The contents of this file are subject to the terms of the Common 5 * Development and Distribution License (the "License") version 1.0 6 * and no later version. You may not use this file except in 7 * compliance with the License. 8 * 9 * You can obtain a copy of the License at 10 * http://www.opensource.org/licenses/cddl1.php 11 * 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 */ 15 16 /* 17 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 18 */ 19 20 #include <vmxnet3.h> 21 22 typedef enum vmxnet3_txstatus { 23 VMXNET3_TX_OK, 24 VMXNET3_TX_FAILURE, 25 VMXNET3_TX_PULLUP, 26 VMXNET3_TX_RINGFULL 27 } vmxnet3_txstatus; 28 29 typedef struct vmxnet3_offload_t { 30 uint16_t om; 31 uint16_t hlen; 32 uint16_t msscof; 33 } vmxnet3_offload_t; 34 35 /* 36 * Initialize a TxQueue. Currently nothing needs to be done. 37 */ 38 /* ARGSUSED */ 39 int 40 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 41 { 42 return (0); 43 } 44 45 /* 46 * Finish a TxQueue by freeing all pending Tx. 47 */ 48 void 49 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 50 { 51 unsigned int i; 52 53 ASSERT(!dp->devEnabled); 54 55 for (i = 0; i < txq->cmdRing.size; i++) { 56 mblk_t *mp = txq->metaRing[i].mp; 57 if (mp) { 58 freemsg(mp); 59 } 60 } 61 } 62 63 /* 64 * Build the offload context of a msg. 65 * 66 * Returns: 67 * 0 if everything went well. 68 * +n if n bytes need to be pulled up. 69 * -1 in case of error (not used). 70 */ 71 static int 72 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol, 73 mblk_t *mp) 74 { 75 int ret = 0; 76 uint32_t start, stuff, value, flags, lso_flag, mss; 77 78 ol->om = VMXNET3_OM_NONE; 79 ol->hlen = 0; 80 ol->msscof = 0; 81 82 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags); 83 84 mac_lso_get(mp, &mss, &lso_flag); 85 86 if (flags || lso_flag) { 87 struct ether_vlan_header *eth = (void *) mp->b_rptr; 88 uint8_t ethLen; 89 90 if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) { 91 ethLen = sizeof (struct ether_vlan_header); 92 } else { 93 ethLen = sizeof (struct ether_header); 94 } 95 96 VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, " 97 "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value); 98 99 if (lso_flag & HW_LSO) { 100 mblk_t *mblk = mp; 101 uint8_t *ip, *tcp; 102 uint8_t ipLen, tcpLen; 103 104 /* 105 * Copy e1000g's behavior: 106 * - Do not assume all the headers are in the same mblk. 107 * - Assume each header is always within one mblk. 108 * - Assume the ethernet header is in the first mblk. 109 */ 110 ip = mblk->b_rptr + ethLen; 111 if (ip >= mblk->b_wptr) { 112 mblk = mblk->b_cont; 113 ip = mblk->b_rptr; 114 } 115 ipLen = IPH_HDR_LENGTH((ipha_t *)ip); 116 tcp = ip + ipLen; 117 if (tcp >= mblk->b_wptr) { 118 mblk = mblk->b_cont; 119 tcp = mblk->b_rptr; 120 } 121 tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp); 122 /* Careful, '>' instead of '>=' here */ 123 if (tcp + tcpLen > mblk->b_wptr) { 124 mblk = mblk->b_cont; 125 } 126 127 ol->om = VMXNET3_OM_TSO; 128 ol->hlen = ethLen + ipLen + tcpLen; 129 ol->msscof = mss; 130 131 if (mblk != mp) { 132 ret = ol->hlen; 133 } 134 } else if (flags & HCK_PARTIALCKSUM) { 135 ol->om = VMXNET3_OM_CSUM; 136 ol->hlen = start + ethLen; 137 ol->msscof = stuff + ethLen; 138 } 139 } 140 141 return (ret); 142 } 143 144 /* 145 * Map a msg into the Tx command ring of a vmxnet3 device. 146 * 147 * Returns: 148 * VMXNET3_TX_OK if everything went well. 149 * VMXNET3_TX_RINGFULL if the ring is nearly full. 150 * VMXNET3_TX_PULLUP if the msg is overfragmented. 151 * VMXNET3_TX_FAILURE if there was a DMA or offload error. 152 * 153 * Side effects: 154 * The ring is filled if VMXNET3_TX_OK is returned. 155 */ 156 static vmxnet3_txstatus 157 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq, 158 vmxnet3_offload_t *ol, mblk_t *mp) 159 { 160 int ret = VMXNET3_TX_OK; 161 unsigned int frags = 0, totLen = 0; 162 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 163 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl; 164 Vmxnet3_GenericDesc *txDesc; 165 uint16_t sopIdx, eopIdx; 166 uint8_t sopGen, curGen; 167 mblk_t *mblk; 168 169 mutex_enter(&dp->txLock); 170 171 sopIdx = eopIdx = cmdRing->next2fill; 172 sopGen = cmdRing->gen; 173 curGen = !cmdRing->gen; 174 175 for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) { 176 unsigned int len = MBLKL(mblk); 177 ddi_dma_cookie_t cookie; 178 uint_t cookieCount; 179 180 if (len) { 181 totLen += len; 182 } else { 183 continue; 184 } 185 186 if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL, 187 (caddr_t)mblk->b_rptr, len, 188 DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL, 189 &cookie, &cookieCount) != DDI_DMA_MAPPED) { 190 VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n"); 191 ret = VMXNET3_TX_FAILURE; 192 goto error; 193 } 194 195 ASSERT(cookieCount); 196 197 do { 198 uint64_t addr = cookie.dmac_laddress; 199 size_t len = cookie.dmac_size; 200 201 do { 202 uint32_t dw2, dw3; 203 size_t chunkLen; 204 205 ASSERT(!txq->metaRing[eopIdx].mp); 206 ASSERT(cmdRing->avail - frags); 207 208 if (frags >= cmdRing->size - 1 || 209 (ol->om != VMXNET3_OM_TSO && 210 frags >= VMXNET3_MAX_TXD_PER_PKT)) { 211 VMXNET3_DEBUG(dp, 2, 212 "overfragmented mp (%u)\n", frags); 213 (void) ddi_dma_unbind_handle( 214 dp->txDmaHandle); 215 ret = VMXNET3_TX_PULLUP; 216 goto error; 217 } 218 if (cmdRing->avail - frags <= 1) { 219 dp->txMustResched = B_TRUE; 220 (void) ddi_dma_unbind_handle( 221 dp->txDmaHandle); 222 ret = VMXNET3_TX_RINGFULL; 223 goto error; 224 } 225 226 if (len > VMXNET3_MAX_TX_BUF_SIZE) { 227 chunkLen = VMXNET3_MAX_TX_BUF_SIZE; 228 } else { 229 chunkLen = len; 230 } 231 232 frags++; 233 eopIdx = cmdRing->next2fill; 234 235 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx); 236 ASSERT(txDesc->txd.gen != cmdRing->gen); 237 238 /* txd.addr */ 239 txDesc->txd.addr = addr; 240 /* txd.dw2 */ 241 dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ? 242 0 : chunkLen; 243 dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT; 244 txDesc->dword[2] = dw2; 245 ASSERT(txDesc->txd.len == len || 246 txDesc->txd.len == 0); 247 /* txd.dw3 */ 248 dw3 = 0; 249 txDesc->dword[3] = dw3; 250 251 VMXNET3_INC_RING_IDX(cmdRing, 252 cmdRing->next2fill); 253 curGen = cmdRing->gen; 254 255 addr += chunkLen; 256 len -= chunkLen; 257 } while (len); 258 259 if (--cookieCount) { 260 ddi_dma_nextcookie(dp->txDmaHandle, &cookie); 261 } 262 } while (cookieCount); 263 264 (void) ddi_dma_unbind_handle(dp->txDmaHandle); 265 } 266 267 /* Update the EOP descriptor */ 268 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx); 269 txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP; 270 271 /* Update the SOP descriptor. Must be done last */ 272 txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx); 273 if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 && 274 txDesc->txd.len < ol->hlen) { 275 ret = VMXNET3_TX_FAILURE; 276 goto error; 277 } 278 txDesc->txd.om = ol->om; 279 txDesc->txd.hlen = ol->hlen; 280 txDesc->txd.msscof = ol->msscof; 281 membar_producer(); 282 txDesc->txd.gen = sopGen; 283 284 /* Update the meta ring & metadata */ 285 txq->metaRing[sopIdx].mp = mp; 286 txq->metaRing[eopIdx].sopIdx = sopIdx; 287 txq->metaRing[eopIdx].frags = frags; 288 cmdRing->avail -= frags; 289 if (ol->om == VMXNET3_OM_TSO) { 290 txqCtrl->txNumDeferred += 291 (totLen - ol->hlen + ol->msscof - 1) / ol->msscof; 292 } else { 293 txqCtrl->txNumDeferred++; 294 } 295 296 VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", (void *)mp, sopIdx, 297 eopIdx); 298 299 goto done; 300 301 error: 302 /* Reverse the generation bits */ 303 while (sopIdx != cmdRing->next2fill) { 304 VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill); 305 txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill); 306 txDesc->txd.gen = !cmdRing->gen; 307 } 308 309 done: 310 mutex_exit(&dp->txLock); 311 312 return (ret); 313 } 314 315 /* 316 * Send packets on a vmxnet3 device. 317 * 318 * Returns: 319 * NULL in case of success or failure. 320 * The mps to be retransmitted later if the ring is full. 321 */ 322 mblk_t * 323 vmxnet3_tx(void *data, mblk_t *mps) 324 { 325 vmxnet3_softc_t *dp = data; 326 vmxnet3_txqueue_t *txq = &dp->txQueue; 327 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 328 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl; 329 vmxnet3_txstatus status = VMXNET3_TX_OK; 330 mblk_t *mp; 331 332 ASSERT(mps != NULL); 333 334 do { 335 vmxnet3_offload_t ol; 336 int pullup; 337 338 mp = mps; 339 mps = mp->b_next; 340 mp->b_next = NULL; 341 342 if (DB_TYPE(mp) != M_DATA) { 343 /* 344 * PR #315560: M_PROTO mblks could be passed for 345 * some reason. Drop them because we don't understand 346 * them and because their contents are not Ethernet 347 * frames anyway. 348 */ 349 ASSERT(B_FALSE); 350 freemsg(mp); 351 continue; 352 } 353 354 /* 355 * Prepare the offload while we're still handling the original 356 * message -- msgpullup() discards the metadata afterwards. 357 */ 358 pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp); 359 if (pullup) { 360 mblk_t *new_mp = msgpullup(mp, pullup); 361 atomic_inc_32(&dp->tx_pullup_needed); 362 freemsg(mp); 363 if (new_mp) { 364 mp = new_mp; 365 } else { 366 atomic_inc_32(&dp->tx_pullup_failed); 367 continue; 368 } 369 } 370 371 /* 372 * Try to map the message in the Tx ring. 373 * This call might fail for non-fatal reasons. 374 */ 375 status = vmxnet3_tx_one(dp, txq, &ol, mp); 376 if (status == VMXNET3_TX_PULLUP) { 377 /* 378 * Try one more time after flattening 379 * the message with msgpullup(). 380 */ 381 if (mp->b_cont != NULL) { 382 mblk_t *new_mp = msgpullup(mp, -1); 383 atomic_inc_32(&dp->tx_pullup_needed); 384 freemsg(mp); 385 if (new_mp) { 386 mp = new_mp; 387 status = vmxnet3_tx_one(dp, txq, &ol, 388 mp); 389 } else { 390 atomic_inc_32(&dp->tx_pullup_failed); 391 continue; 392 } 393 } 394 } 395 if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) { 396 /* Fatal failure, drop it */ 397 atomic_inc_32(&dp->tx_error); 398 freemsg(mp); 399 } 400 } while (mps && status != VMXNET3_TX_RINGFULL); 401 402 if (status == VMXNET3_TX_RINGFULL) { 403 atomic_inc_32(&dp->tx_ring_full); 404 mp->b_next = mps; 405 mps = mp; 406 } else { 407 ASSERT(!mps); 408 } 409 410 /* Notify the device */ 411 mutex_enter(&dp->txLock); 412 if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) { 413 txqCtrl->txNumDeferred = 0; 414 VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill); 415 } 416 mutex_exit(&dp->txLock); 417 418 return (mps); 419 } 420 421 /* 422 * Parse a transmit queue and complete packets. 423 * 424 * Returns: 425 * B_TRUE if Tx must be updated or B_FALSE if no action is required. 426 */ 427 boolean_t 428 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 429 { 430 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 431 vmxnet3_compring_t *compRing = &txq->compRing; 432 Vmxnet3_GenericDesc *compDesc; 433 boolean_t completedTx = B_FALSE; 434 boolean_t ret = B_FALSE; 435 436 mutex_enter(&dp->txLock); 437 438 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 439 while (compDesc->tcd.gen == compRing->gen) { 440 vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc; 441 uint16_t sopIdx, eopIdx; 442 mblk_t *mp; 443 444 eopIdx = compDesc->tcd.txdIdx; 445 eopMetaDesc = &txq->metaRing[eopIdx]; 446 sopIdx = eopMetaDesc->sopIdx; 447 sopMetaDesc = &txq->metaRing[sopIdx]; 448 449 ASSERT(eopMetaDesc->frags); 450 cmdRing->avail += eopMetaDesc->frags; 451 452 ASSERT(sopMetaDesc->mp); 453 mp = sopMetaDesc->mp; 454 freemsg(mp); 455 456 eopMetaDesc->sopIdx = 0; 457 eopMetaDesc->frags = 0; 458 sopMetaDesc->mp = NULL; 459 460 completedTx = B_TRUE; 461 462 VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", (void *)mp, sopIdx, 463 eopIdx); 464 465 VMXNET3_INC_RING_IDX(compRing, compRing->next2comp); 466 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 467 } 468 469 if (dp->txMustResched && completedTx) { 470 dp->txMustResched = B_FALSE; 471 ret = B_TRUE; 472 } 473 474 mutex_exit(&dp->txLock); 475 476 return (ret); 477 } 478