1 /* 2 * Copyright (C) 2007 VMware, Inc. All rights reserved. 3 * 4 * The contents of this file are subject to the terms of the Common 5 * Development and Distribution License (the "License") version 1.0 6 * and no later version. You may not use this file except in 7 * compliance with the License. 8 * 9 * You can obtain a copy of the License at 10 * http://www.opensource.org/licenses/cddl1.php 11 * 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 */ 15 16 /* 17 * Copyright (c) 2012 by Delphix. All rights reserved. 18 */ 19 20 #include <vmxnet3.h> 21 22 typedef enum vmxnet3_txstatus { 23 VMXNET3_TX_OK, 24 VMXNET3_TX_FAILURE, 25 VMXNET3_TX_PULLUP, 26 VMXNET3_TX_RINGFULL 27 } vmxnet3_txstatus; 28 29 typedef struct vmxnet3_offload_t { 30 uint16_t om; 31 uint16_t hlen; 32 uint16_t msscof; 33 } vmxnet3_offload_t; 34 35 /* 36 * vmxnet3_txqueue_init -- 37 * 38 * Initialize a TxQueue. Currently nothing needs to be done. 39 * 40 * Results: 41 * DDI_SUCCESS. 42 * 43 * Side effects: 44 * None. 45 */ 46 /* ARGSUSED */ 47 int 48 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 49 { 50 return (DDI_SUCCESS); 51 } 52 53 /* 54 * vmxnet3_txqueue_fini -- 55 * 56 * Finish a TxQueue by freeing all pending Tx. 57 * 58 * Results: 59 * DDI_SUCCESS. 60 * 61 * Side effects: 62 * None. 63 */ 64 void 65 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 66 { 67 unsigned int i; 68 69 ASSERT(!dp->devEnabled); 70 71 for (i = 0; i < txq->cmdRing.size; i++) { 72 mblk_t *mp = txq->metaRing[i].mp; 73 if (mp) { 74 freemsg(mp); 75 } 76 } 77 } 78 79 /* 80 * vmxnet3_tx_prepare_offload -- 81 * 82 * Build the offload context of a msg. 83 * 84 * Results: 85 * 0 if everything went well. 86 * +n if n bytes need to be pulled up. 87 * -1 in case of error (not used). 88 * 89 * Side effects: 90 * None. 91 */ 92 static int 93 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol, 94 mblk_t *mp) 95 { 96 int ret = 0; 97 uint32_t start, stuff, value, flags, lsoflags, mss; 98 99 ol->om = VMXNET3_OM_NONE; 100 ol->hlen = 0; 101 ol->msscof = 0; 102 103 hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags); 104 105 mac_lso_get(mp, &mss, &lsoflags); 106 if (lsoflags & HW_LSO) { 107 flags |= HW_LSO; 108 } 109 110 if (flags) { 111 struct ether_vlan_header *eth = (void *) mp->b_rptr; 112 uint8_t ethLen; 113 114 if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) { 115 ethLen = sizeof (struct ether_vlan_header); 116 } else { 117 ethLen = sizeof (struct ether_header); 118 } 119 120 VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, " 121 "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value); 122 123 if (flags & HCK_PARTIALCKSUM) { 124 ol->om = VMXNET3_OM_CSUM; 125 ol->hlen = start + ethLen; 126 ol->msscof = stuff + ethLen; 127 } 128 if (flags & HW_LSO) { 129 mblk_t *mblk = mp; 130 uint8_t *ip, *tcp; 131 uint8_t ipLen, tcpLen; 132 133 /* 134 * Copy e1000g's behavior: 135 * - Do not assume all the headers are in the same mblk. 136 * - Assume each header is always within one mblk. 137 * - Assume the ethernet header is in the first mblk. 138 */ 139 ip = mblk->b_rptr + ethLen; 140 if (ip >= mblk->b_wptr) { 141 mblk = mblk->b_cont; 142 ip = mblk->b_rptr; 143 } 144 ipLen = IPH_HDR_LENGTH((ipha_t *)ip); 145 tcp = ip + ipLen; 146 if (tcp >= mblk->b_wptr) { 147 mblk = mblk->b_cont; 148 tcp = mblk->b_rptr; 149 } 150 tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp); 151 /* Careful, '>' instead of '>=' here */ 152 if (tcp + tcpLen > mblk->b_wptr) { 153 mblk = mblk->b_cont; 154 } 155 156 ol->om = VMXNET3_OM_TSO; 157 ol->hlen = ethLen + ipLen + tcpLen; 158 ol->msscof = DB_LSOMSS(mp); 159 160 if (mblk != mp) { 161 ret = ol->hlen; 162 } 163 } 164 } 165 166 return (ret); 167 } 168 169 /* 170 * vmxnet3_tx_one -- 171 * 172 * Map a msg into the Tx command ring of a vmxnet3 device. 173 * 174 * Results: 175 * VMXNET3_TX_OK if everything went well. 176 * VMXNET3_TX_RINGFULL if the ring is nearly full. 177 * VMXNET3_TX_PULLUP if the msg is overfragmented. 178 * VMXNET3_TX_FAILURE if there was a DMA or offload error. 179 * 180 * Side effects: 181 * The ring is filled if VMXNET3_TX_OK is returned. 182 */ 183 static vmxnet3_txstatus 184 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq, 185 vmxnet3_offload_t *ol, mblk_t *mp) 186 { 187 int ret = VMXNET3_TX_OK; 188 unsigned int frags = 0, totLen = 0; 189 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 190 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl; 191 Vmxnet3_GenericDesc *txDesc; 192 uint16_t sopIdx, eopIdx; 193 uint8_t sopGen, curGen; 194 mblk_t *mblk; 195 196 mutex_enter(&dp->txLock); 197 198 sopIdx = eopIdx = cmdRing->next2fill; 199 sopGen = cmdRing->gen; 200 curGen = !cmdRing->gen; 201 202 for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) { 203 unsigned int len = MBLKL(mblk); 204 ddi_dma_cookie_t cookie; 205 uint_t cookieCount; 206 207 if (len) { 208 totLen += len; 209 } else { 210 continue; 211 } 212 213 if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL, 214 (caddr_t)mblk->b_rptr, len, 215 DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL, 216 &cookie, &cookieCount) != DDI_DMA_MAPPED) { 217 VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n"); 218 ret = VMXNET3_TX_FAILURE; 219 goto error; 220 } 221 222 ASSERT(cookieCount); 223 224 do { 225 uint64_t addr = cookie.dmac_laddress; 226 size_t len = cookie.dmac_size; 227 228 do { 229 uint32_t dw2, dw3; 230 size_t chunkLen; 231 232 ASSERT(!txq->metaRing[eopIdx].mp); 233 ASSERT(cmdRing->avail - frags); 234 235 if (frags >= cmdRing->size - 1 || 236 (ol->om != VMXNET3_OM_TSO && 237 frags >= VMXNET3_MAX_TXD_PER_PKT)) { 238 VMXNET3_DEBUG(dp, 2, 239 "overfragmented mp (%u)\n", frags); 240 (void) ddi_dma_unbind_handle( 241 dp->txDmaHandle); 242 ret = VMXNET3_TX_PULLUP; 243 goto error; 244 } 245 if (cmdRing->avail - frags <= 1) { 246 dp->txMustResched = B_TRUE; 247 (void) ddi_dma_unbind_handle( 248 dp->txDmaHandle); 249 ret = VMXNET3_TX_RINGFULL; 250 goto error; 251 } 252 253 if (len > VMXNET3_MAX_TX_BUF_SIZE) { 254 chunkLen = VMXNET3_MAX_TX_BUF_SIZE; 255 } else { 256 chunkLen = len; 257 } 258 259 frags++; 260 eopIdx = cmdRing->next2fill; 261 262 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx); 263 ASSERT(txDesc->txd.gen != cmdRing->gen); 264 265 /* txd.addr */ 266 txDesc->txd.addr = addr; 267 /* txd.dw2 */ 268 dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ? 269 0 : chunkLen; 270 dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT; 271 txDesc->dword[2] = dw2; 272 ASSERT(txDesc->txd.len == len || 273 txDesc->txd.len == 0); 274 /* txd.dw3 */ 275 dw3 = 0; 276 txDesc->dword[3] = dw3; 277 278 VMXNET3_INC_RING_IDX(cmdRing, 279 cmdRing->next2fill); 280 curGen = cmdRing->gen; 281 282 addr += chunkLen; 283 len -= chunkLen; 284 } while (len); 285 286 if (--cookieCount) { 287 ddi_dma_nextcookie(dp->txDmaHandle, &cookie); 288 } 289 } while (cookieCount); 290 291 (void) ddi_dma_unbind_handle(dp->txDmaHandle); 292 } 293 294 /* Update the EOP descriptor */ 295 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx); 296 txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP; 297 298 /* Update the SOP descriptor. Must be done last */ 299 txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx); 300 if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 && 301 txDesc->txd.len < ol->hlen) { 302 ret = VMXNET3_TX_FAILURE; 303 goto error; 304 } 305 txDesc->txd.om = ol->om; 306 txDesc->txd.hlen = ol->hlen; 307 txDesc->txd.msscof = ol->msscof; 308 membar_producer(); 309 txDesc->txd.gen = sopGen; 310 311 /* Update the meta ring & metadata */ 312 txq->metaRing[sopIdx].mp = mp; 313 txq->metaRing[eopIdx].sopIdx = sopIdx; 314 txq->metaRing[eopIdx].frags = frags; 315 cmdRing->avail -= frags; 316 if (ol->om == VMXNET3_OM_TSO) { 317 txqCtrl->txNumDeferred += 318 (totLen - ol->hlen + ol->msscof - 1) / ol->msscof; 319 } else { 320 txqCtrl->txNumDeferred++; 321 } 322 323 VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", mp, sopIdx, eopIdx); 324 325 goto done; 326 327 error: 328 /* Reverse the generation bits */ 329 while (sopIdx != cmdRing->next2fill) { 330 VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill); 331 txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill); 332 txDesc->txd.gen = !cmdRing->gen; 333 } 334 335 done: 336 mutex_exit(&dp->txLock); 337 338 return (ret); 339 } 340 341 /* 342 * vmxnet3_tx -- 343 * 344 * Send packets on a vmxnet3 device. 345 * 346 * Results: 347 * NULL in case of success or failure. 348 * The mps to be retransmitted later if the ring is full. 349 * 350 * Side effects: 351 * None. 352 */ 353 mblk_t * 354 vmxnet3_tx(void *data, mblk_t *mps) 355 { 356 vmxnet3_softc_t *dp = data; 357 vmxnet3_txqueue_t *txq = &dp->txQueue; 358 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 359 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl; 360 vmxnet3_txstatus status = VMXNET3_TX_OK; 361 mblk_t *mp; 362 363 ASSERT(mps != NULL); 364 365 do { 366 vmxnet3_offload_t ol; 367 int pullup; 368 369 mp = mps; 370 mps = mp->b_next; 371 mp->b_next = NULL; 372 373 if (DB_TYPE(mp) != M_DATA) { 374 /* 375 * PR #315560: M_PROTO mblks could be passed for 376 * some reason. Drop them because we don't understand 377 * them and because their contents are not Ethernet 378 * frames anyway. 379 */ 380 ASSERT(B_FALSE); 381 freemsg(mp); 382 continue; 383 } 384 385 /* 386 * Prepare the offload while we're still handling the original 387 * message -- msgpullup() discards the metadata afterwards. 388 */ 389 pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp); 390 if (pullup) { 391 mblk_t *new_mp = msgpullup(mp, pullup); 392 atomic_inc_32(&dp->tx_pullup_needed); 393 freemsg(mp); 394 if (new_mp) { 395 mp = new_mp; 396 } else { 397 atomic_inc_32(&dp->tx_pullup_failed); 398 continue; 399 } 400 } 401 402 /* 403 * Try to map the message in the Tx ring. 404 * This call might fail for non-fatal reasons. 405 */ 406 status = vmxnet3_tx_one(dp, txq, &ol, mp); 407 if (status == VMXNET3_TX_PULLUP) { 408 /* 409 * Try one more time after flattening 410 * the message with msgpullup(). 411 */ 412 if (mp->b_cont != NULL) { 413 mblk_t *new_mp = msgpullup(mp, -1); 414 atomic_inc_32(&dp->tx_pullup_needed); 415 freemsg(mp); 416 if (new_mp) { 417 mp = new_mp; 418 status = vmxnet3_tx_one(dp, txq, &ol, 419 mp); 420 } else { 421 atomic_inc_32(&dp->tx_pullup_failed); 422 continue; 423 } 424 } 425 } 426 if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) { 427 /* Fatal failure, drop it */ 428 atomic_inc_32(&dp->tx_error); 429 freemsg(mp); 430 } 431 } while (mps && status != VMXNET3_TX_RINGFULL); 432 433 if (status == VMXNET3_TX_RINGFULL) { 434 atomic_inc_32(&dp->tx_ring_full); 435 mp->b_next = mps; 436 mps = mp; 437 } else { 438 ASSERT(!mps); 439 } 440 441 /* Notify the device */ 442 mutex_enter(&dp->txLock); 443 if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) { 444 txqCtrl->txNumDeferred = 0; 445 VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill); 446 } 447 mutex_exit(&dp->txLock); 448 449 return (mps); 450 } 451 452 /* 453 * vmxnet3_tx_complete -- 454 * 455 * Parse a transmit queue and complete packets. 456 * 457 * Results: 458 * B_TRUE if Tx must be updated or B_FALSE if no action is required. 459 * 460 * Side effects: 461 * None. 462 */ 463 boolean_t 464 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 465 { 466 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 467 vmxnet3_compring_t *compRing = &txq->compRing; 468 Vmxnet3_GenericDesc *compDesc; 469 boolean_t completedTx = B_FALSE; 470 boolean_t ret = B_FALSE; 471 472 mutex_enter(&dp->txLock); 473 474 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 475 while (compDesc->tcd.gen == compRing->gen) { 476 vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc; 477 uint16_t sopIdx, eopIdx; 478 mblk_t *mp; 479 480 eopIdx = compDesc->tcd.txdIdx; 481 eopMetaDesc = &txq->metaRing[eopIdx]; 482 sopIdx = eopMetaDesc->sopIdx; 483 sopMetaDesc = &txq->metaRing[sopIdx]; 484 485 ASSERT(eopMetaDesc->frags); 486 cmdRing->avail += eopMetaDesc->frags; 487 488 ASSERT(sopMetaDesc->mp); 489 mp = sopMetaDesc->mp; 490 freemsg(mp); 491 492 eopMetaDesc->sopIdx = 0; 493 eopMetaDesc->frags = 0; 494 sopMetaDesc->mp = NULL; 495 496 completedTx = B_TRUE; 497 498 VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", mp, sopIdx, 499 eopIdx); 500 501 VMXNET3_INC_RING_IDX(compRing, compRing->next2comp); 502 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 503 } 504 505 if (dp->txMustResched && completedTx) { 506 dp->txMustResched = B_FALSE; 507 ret = B_TRUE; 508 } 509 510 mutex_exit(&dp->txLock); 511 512 return (ret); 513 } 514