1 /* 2 * Copyright (C) 2007 VMware, Inc. All rights reserved. 3 * 4 * The contents of this file are subject to the terms of the Common 5 * Development and Distribution License (the "License") version 1.0 6 * and no later version. You may not use this file except in 7 * compliance with the License. 8 * 9 * You can obtain a copy of the License at 10 * http://www.opensource.org/licenses/cddl1.php 11 * 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 */ 15 16 /* 17 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 18 */ 19 20 #include <vmxnet3.h> 21 22 typedef enum vmxnet3_txstatus { 23 VMXNET3_TX_OK, 24 VMXNET3_TX_FAILURE, 25 VMXNET3_TX_PULLUP, 26 VMXNET3_TX_RINGFULL 27 } vmxnet3_txstatus; 28 29 typedef struct vmxnet3_offload_t { 30 uint16_t om; 31 uint16_t hlen; 32 uint16_t msscof; 33 } vmxnet3_offload_t; 34 35 /* 36 * Initialize a TxQueue. Currently nothing needs to be done. 37 */ 38 /* ARGSUSED */ 39 int 40 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 41 { 42 return (0); 43 } 44 45 /* 46 * Finish a TxQueue by freeing all pending Tx. 47 */ 48 void 49 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 50 { 51 unsigned int i; 52 53 ASSERT(!dp->devEnabled); 54 55 for (i = 0; i < txq->cmdRing.size; i++) { 56 mblk_t *mp = txq->metaRing[i].mp; 57 if (mp) { 58 freemsg(mp); 59 } 60 } 61 } 62 63 /* 64 * Build the offload context of a msg. 65 * 66 * Returns: 67 * 0 if everything went well. 68 * +n if n bytes need to be pulled up. 69 * -1 in case of error (not used). 70 */ 71 static int 72 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol, 73 mblk_t *mp) 74 { 75 int ret = 0; 76 uint32_t start, stuff, value, flags, lso_flag, mss; 77 78 ol->om = VMXNET3_OM_NONE; 79 ol->hlen = 0; 80 ol->msscof = 0; 81 82 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags); 83 84 mac_lso_get(mp, &mss, &lso_flag); 85 86 if (flags || lso_flag) { 87 struct ether_vlan_header *eth = (void *) mp->b_rptr; 88 uint8_t ethLen; 89 90 if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) { 91 ethLen = sizeof (struct ether_vlan_header); 92 } else { 93 ethLen = sizeof (struct ether_header); 94 } 95 96 VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, " 97 "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value); 98 99 if (lso_flag & HW_LSO) { 100 mblk_t *mblk = mp; 101 uint8_t *ip, *tcp; 102 uint8_t ipLen, tcpLen; 103 104 /* 105 * Copy e1000g's behavior: 106 * - Do not assume all the headers are in the same mblk. 107 * - Assume each header is always within one mblk. 108 * - Assume the ethernet header is in the first mblk. 109 */ 110 ip = mblk->b_rptr + ethLen; 111 if (ip >= mblk->b_wptr) { 112 mblk = mblk->b_cont; 113 ip = mblk->b_rptr; 114 } 115 ipLen = IPH_HDR_LENGTH((ipha_t *)ip); 116 tcp = ip + ipLen; 117 if (tcp >= mblk->b_wptr) { 118 mblk = mblk->b_cont; 119 tcp = mblk->b_rptr; 120 } 121 tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp); 122 /* Careful, '>' instead of '>=' here */ 123 if (tcp + tcpLen > mblk->b_wptr) { 124 mblk = mblk->b_cont; 125 } 126 127 ol->om = VMXNET3_OM_TSO; 128 ol->hlen = ethLen + ipLen + tcpLen; 129 ol->msscof = mss; 130 131 if (mblk != mp) { 132 ret = ol->hlen; 133 } 134 } else if (flags & HCK_PARTIALCKSUM) { 135 ol->om = VMXNET3_OM_CSUM; 136 ol->hlen = start + ethLen; 137 ol->msscof = stuff + ethLen; 138 } 139 } 140 141 return (ret); 142 } 143 144 /* 145 * Map a msg into the Tx command ring of a vmxnet3 device. 146 * 147 * Returns: 148 * VMXNET3_TX_OK if everything went well. 149 * VMXNET3_TX_RINGFULL if the ring is nearly full. 150 * VMXNET3_TX_PULLUP if the msg is overfragmented. 151 * VMXNET3_TX_FAILURE if there was a DMA or offload error. 152 * 153 * Side effects: 154 * The ring is filled if VMXNET3_TX_OK is returned. 155 */ 156 static vmxnet3_txstatus 157 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq, 158 vmxnet3_offload_t *ol, mblk_t *mp) 159 { 160 int ret = VMXNET3_TX_OK; 161 unsigned int frags = 0, totLen = 0; 162 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 163 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl; 164 Vmxnet3_GenericDesc *txDesc; 165 uint16_t sopIdx, eopIdx; 166 uint8_t sopGen, curGen; 167 mblk_t *mblk; 168 169 mutex_enter(&dp->txLock); 170 171 sopIdx = eopIdx = cmdRing->next2fill; 172 sopGen = cmdRing->gen; 173 curGen = !cmdRing->gen; 174 175 for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) { 176 unsigned int len = MBLKL(mblk); 177 ddi_dma_cookie_t cookie; 178 uint_t cookieCount; 179 180 if (len) { 181 totLen += len; 182 } else { 183 continue; 184 } 185 186 if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL, 187 (caddr_t)mblk->b_rptr, len, 188 DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL, 189 &cookie, &cookieCount) != DDI_DMA_MAPPED) { 190 VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n"); 191 ret = VMXNET3_TX_FAILURE; 192 goto error; 193 } 194 195 ASSERT(cookieCount); 196 197 do { 198 uint64_t addr = cookie.dmac_laddress; 199 size_t len = cookie.dmac_size; 200 201 do { 202 uint32_t dw2, dw3; 203 size_t chunkLen; 204 205 ASSERT(!txq->metaRing[eopIdx].mp); 206 ASSERT(cmdRing->avail - frags); 207 208 if (frags >= cmdRing->size - 1 || 209 (ol->om != VMXNET3_OM_TSO && 210 frags >= VMXNET3_MAX_TXD_PER_PKT)) { 211 VMXNET3_DEBUG(dp, 2, 212 "overfragmented mp (%u)\n", frags); 213 (void) ddi_dma_unbind_handle( 214 dp->txDmaHandle); 215 ret = VMXNET3_TX_PULLUP; 216 goto error; 217 } 218 if (cmdRing->avail - frags <= 1) { 219 dp->txMustResched = B_TRUE; 220 (void) ddi_dma_unbind_handle( 221 dp->txDmaHandle); 222 ret = VMXNET3_TX_RINGFULL; 223 goto error; 224 } 225 226 if (len > VMXNET3_MAX_TX_BUF_SIZE) { 227 chunkLen = VMXNET3_MAX_TX_BUF_SIZE; 228 } else { 229 chunkLen = len; 230 } 231 232 frags++; 233 eopIdx = cmdRing->next2fill; 234 235 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx); 236 ASSERT(txDesc->txd.gen != cmdRing->gen); 237 238 /* txd.addr */ 239 txDesc->txd.addr = addr; 240 /* txd.dw2 */ 241 dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ? 242 0 : chunkLen; 243 dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT; 244 txDesc->dword[2] = dw2; 245 ASSERT(txDesc->txd.len == len || 246 txDesc->txd.len == 0); 247 /* txd.dw3 */ 248 dw3 = 0; 249 txDesc->dword[3] = dw3; 250 251 VMXNET3_INC_RING_IDX(cmdRing, 252 cmdRing->next2fill); 253 curGen = cmdRing->gen; 254 255 addr += chunkLen; 256 len -= chunkLen; 257 } while (len); 258 259 if (--cookieCount) { 260 ddi_dma_nextcookie(dp->txDmaHandle, &cookie); 261 } 262 } while (cookieCount); 263 264 (void) ddi_dma_unbind_handle(dp->txDmaHandle); 265 } 266 267 /* Update the EOP descriptor */ 268 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx); 269 txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP; 270 271 /* Update the SOP descriptor. Must be done last */ 272 txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx); 273 if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 && 274 txDesc->txd.len < ol->hlen) { 275 ret = VMXNET3_TX_FAILURE; 276 goto error; 277 } 278 txDesc->txd.om = ol->om; 279 txDesc->txd.hlen = ol->hlen; 280 txDesc->txd.msscof = ol->msscof; 281 membar_producer(); 282 txDesc->txd.gen = sopGen; 283 284 /* Update the meta ring & metadata */ 285 txq->metaRing[sopIdx].mp = mp; 286 txq->metaRing[eopIdx].sopIdx = sopIdx; 287 txq->metaRing[eopIdx].frags = frags; 288 cmdRing->avail -= frags; 289 if (ol->om == VMXNET3_OM_TSO) { 290 txqCtrl->txNumDeferred += 291 (totLen - ol->hlen + ol->msscof - 1) / ol->msscof; 292 } else { 293 txqCtrl->txNumDeferred++; 294 } 295 296 VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", mp, sopIdx, eopIdx); 297 298 goto done; 299 300 error: 301 /* Reverse the generation bits */ 302 while (sopIdx != cmdRing->next2fill) { 303 VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill); 304 txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill); 305 txDesc->txd.gen = !cmdRing->gen; 306 } 307 308 done: 309 mutex_exit(&dp->txLock); 310 311 return (ret); 312 } 313 314 /* 315 * Send packets on a vmxnet3 device. 316 * 317 * Returns: 318 * NULL in case of success or failure. 319 * The mps to be retransmitted later if the ring is full. 320 */ 321 mblk_t * 322 vmxnet3_tx(void *data, mblk_t *mps) 323 { 324 vmxnet3_softc_t *dp = data; 325 vmxnet3_txqueue_t *txq = &dp->txQueue; 326 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 327 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl; 328 vmxnet3_txstatus status = VMXNET3_TX_OK; 329 mblk_t *mp; 330 331 ASSERT(mps != NULL); 332 333 do { 334 vmxnet3_offload_t ol; 335 int pullup; 336 337 mp = mps; 338 mps = mp->b_next; 339 mp->b_next = NULL; 340 341 if (DB_TYPE(mp) != M_DATA) { 342 /* 343 * PR #315560: M_PROTO mblks could be passed for 344 * some reason. Drop them because we don't understand 345 * them and because their contents are not Ethernet 346 * frames anyway. 347 */ 348 ASSERT(B_FALSE); 349 freemsg(mp); 350 continue; 351 } 352 353 /* 354 * Prepare the offload while we're still handling the original 355 * message -- msgpullup() discards the metadata afterwards. 356 */ 357 pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp); 358 if (pullup) { 359 mblk_t *new_mp = msgpullup(mp, pullup); 360 atomic_inc_32(&dp->tx_pullup_needed); 361 freemsg(mp); 362 if (new_mp) { 363 mp = new_mp; 364 } else { 365 atomic_inc_32(&dp->tx_pullup_failed); 366 continue; 367 } 368 } 369 370 /* 371 * Try to map the message in the Tx ring. 372 * This call might fail for non-fatal reasons. 373 */ 374 status = vmxnet3_tx_one(dp, txq, &ol, mp); 375 if (status == VMXNET3_TX_PULLUP) { 376 /* 377 * Try one more time after flattening 378 * the message with msgpullup(). 379 */ 380 if (mp->b_cont != NULL) { 381 mblk_t *new_mp = msgpullup(mp, -1); 382 atomic_inc_32(&dp->tx_pullup_needed); 383 freemsg(mp); 384 if (new_mp) { 385 mp = new_mp; 386 status = vmxnet3_tx_one(dp, txq, &ol, 387 mp); 388 } else { 389 atomic_inc_32(&dp->tx_pullup_failed); 390 continue; 391 } 392 } 393 } 394 if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) { 395 /* Fatal failure, drop it */ 396 atomic_inc_32(&dp->tx_error); 397 freemsg(mp); 398 } 399 } while (mps && status != VMXNET3_TX_RINGFULL); 400 401 if (status == VMXNET3_TX_RINGFULL) { 402 atomic_inc_32(&dp->tx_ring_full); 403 mp->b_next = mps; 404 mps = mp; 405 } else { 406 ASSERT(!mps); 407 } 408 409 /* Notify the device */ 410 mutex_enter(&dp->txLock); 411 if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) { 412 txqCtrl->txNumDeferred = 0; 413 VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill); 414 } 415 mutex_exit(&dp->txLock); 416 417 return (mps); 418 } 419 420 /* 421 * Parse a transmit queue and complete packets. 422 * 423 * Returns: 424 * B_TRUE if Tx must be updated or B_FALSE if no action is required. 425 */ 426 boolean_t 427 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 428 { 429 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 430 vmxnet3_compring_t *compRing = &txq->compRing; 431 Vmxnet3_GenericDesc *compDesc; 432 boolean_t completedTx = B_FALSE; 433 boolean_t ret = B_FALSE; 434 435 mutex_enter(&dp->txLock); 436 437 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 438 while (compDesc->tcd.gen == compRing->gen) { 439 vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc; 440 uint16_t sopIdx, eopIdx; 441 mblk_t *mp; 442 443 eopIdx = compDesc->tcd.txdIdx; 444 eopMetaDesc = &txq->metaRing[eopIdx]; 445 sopIdx = eopMetaDesc->sopIdx; 446 sopMetaDesc = &txq->metaRing[sopIdx]; 447 448 ASSERT(eopMetaDesc->frags); 449 cmdRing->avail += eopMetaDesc->frags; 450 451 ASSERT(sopMetaDesc->mp); 452 mp = sopMetaDesc->mp; 453 freemsg(mp); 454 455 eopMetaDesc->sopIdx = 0; 456 eopMetaDesc->frags = 0; 457 sopMetaDesc->mp = NULL; 458 459 completedTx = B_TRUE; 460 461 VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", mp, sopIdx, 462 eopIdx); 463 464 VMXNET3_INC_RING_IDX(compRing, compRing->next2comp); 465 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 466 } 467 468 if (dp->txMustResched && completedTx) { 469 dp->txMustResched = B_FALSE; 470 ret = B_TRUE; 471 } 472 473 mutex_exit(&dp->txLock); 474 475 return (ret); 476 } 477