1 /* 2 * Copyright (C) 2007 VMware, Inc. All rights reserved. 3 * 4 * The contents of this file are subject to the terms of the Common 5 * Development and Distribution License (the "License") version 1.0 6 * and no later version. You may not use this file except in 7 * compliance with the License. 8 * 9 * You can obtain a copy of the License at 10 * http://www.opensource.org/licenses/cddl1.php 11 * 12 * See the License for the specific language governing permissions 13 * and limitations under the License. 14 */ 15 16 /* 17 * Copyright (c) 2012, 2016 by Delphix. All rights reserved. 18 * Copyright 2018 Joyent, Inc. 19 */ 20 21 #include <vmxnet3.h> 22 23 typedef enum vmxnet3_txstatus { 24 VMXNET3_TX_OK, 25 VMXNET3_TX_FAILURE, 26 VMXNET3_TX_PULLUP, 27 VMXNET3_TX_RINGFULL 28 } vmxnet3_txstatus; 29 30 typedef struct vmxnet3_offload_t { 31 uint16_t om; 32 uint16_t hlen; 33 uint16_t msscof; 34 } vmxnet3_offload_t; 35 36 /* 37 * Initialize a TxQueue. Currently nothing needs to be done. 38 */ 39 /* ARGSUSED */ 40 int 41 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 42 { 43 return (0); 44 } 45 46 /* 47 * Finish a TxQueue by freeing all pending Tx. 48 */ 49 void 50 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 51 { 52 unsigned int i; 53 54 ASSERT(!dp->devEnabled); 55 56 for (i = 0; i < txq->cmdRing.size; i++) { 57 mblk_t *mp = txq->metaRing[i].mp; 58 if (mp) { 59 freemsg(mp); 60 } 61 } 62 } 63 64 /* 65 * Build the offload context of a msg. 66 * 67 * Returns: 68 * 0 if everything went well. 69 * +n if n bytes need to be pulled up. 70 * -1 in case of error (not used). 71 */ 72 static int 73 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol, 74 mblk_t *mp) 75 { 76 int ret = 0; 77 uint32_t start, stuff, value, flags, lso_flag, mss; 78 79 ol->om = VMXNET3_OM_NONE; 80 ol->hlen = 0; 81 ol->msscof = 0; 82 83 mac_hcksum_get(mp, &start, &stuff, NULL, &value, &flags); 84 85 mac_lso_get(mp, &mss, &lso_flag); 86 87 if (flags || lso_flag) { 88 struct ether_vlan_header *eth = (void *) mp->b_rptr; 89 uint8_t ethLen; 90 91 if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) { 92 ethLen = sizeof (struct ether_vlan_header); 93 } else { 94 ethLen = sizeof (struct ether_header); 95 } 96 97 VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, " 98 "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value); 99 100 if (lso_flag & HW_LSO) { 101 mblk_t *mblk = mp; 102 uint8_t *ip, *tcp; 103 uint8_t ipLen, tcpLen; 104 105 /* 106 * Copy e1000g's behavior: 107 * - Do not assume all the headers are in the same mblk. 108 * - Assume each header is always within one mblk. 109 * - Assume the ethernet header is in the first mblk. 110 */ 111 ip = mblk->b_rptr + ethLen; 112 if (ip >= mblk->b_wptr) { 113 mblk = mblk->b_cont; 114 ip = mblk->b_rptr; 115 } 116 ipLen = IPH_HDR_LENGTH((ipha_t *)ip); 117 tcp = ip + ipLen; 118 if (tcp >= mblk->b_wptr) { 119 mblk = mblk->b_cont; 120 tcp = mblk->b_rptr; 121 } 122 tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp); 123 /* Careful, '>' instead of '>=' here */ 124 if (tcp + tcpLen > mblk->b_wptr) { 125 mblk = mblk->b_cont; 126 } 127 128 ol->om = VMXNET3_OM_TSO; 129 ol->hlen = ethLen + ipLen + tcpLen; 130 ol->msscof = mss; 131 132 if (mblk != mp) { 133 ret = ol->hlen; 134 } 135 } else if (flags & HCK_PARTIALCKSUM) { 136 ol->om = VMXNET3_OM_CSUM; 137 ol->hlen = start + ethLen; 138 ol->msscof = stuff + ethLen; 139 } 140 } 141 142 return (ret); 143 } 144 145 /* 146 * Map a msg into the Tx command ring of a vmxnet3 device. 147 * 148 * Returns: 149 * VMXNET3_TX_OK if everything went well. 150 * VMXNET3_TX_RINGFULL if the ring is nearly full. 151 * VMXNET3_TX_PULLUP if the msg is overfragmented. 152 * VMXNET3_TX_FAILURE if there was a DMA or offload error. 153 * 154 * Side effects: 155 * The ring is filled if VMXNET3_TX_OK is returned. 156 */ 157 static vmxnet3_txstatus 158 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq, 159 vmxnet3_offload_t *ol, mblk_t *mp) 160 { 161 int ret = VMXNET3_TX_OK; 162 unsigned int frags = 0, totLen = 0; 163 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 164 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl; 165 Vmxnet3_GenericDesc *txDesc; 166 uint16_t sopIdx, eopIdx; 167 uint8_t sopGen, curGen; 168 mblk_t *mblk; 169 170 mutex_enter(&dp->txLock); 171 172 sopIdx = eopIdx = cmdRing->next2fill; 173 sopGen = cmdRing->gen; 174 curGen = !cmdRing->gen; 175 176 for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) { 177 unsigned int len = MBLKL(mblk); 178 ddi_dma_cookie_t cookie; 179 uint_t cookieCount; 180 181 if (len) { 182 totLen += len; 183 } else { 184 continue; 185 } 186 187 if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL, 188 (caddr_t)mblk->b_rptr, len, 189 DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL, 190 &cookie, &cookieCount) != DDI_DMA_MAPPED) { 191 VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n"); 192 ret = VMXNET3_TX_FAILURE; 193 goto error; 194 } 195 196 ASSERT(cookieCount); 197 198 do { 199 uint64_t addr = cookie.dmac_laddress; 200 size_t len = cookie.dmac_size; 201 202 do { 203 uint32_t dw2, dw3; 204 size_t chunkLen; 205 206 ASSERT(!txq->metaRing[eopIdx].mp); 207 ASSERT(cmdRing->avail - frags); 208 209 if (frags >= cmdRing->size - 1 || 210 (ol->om != VMXNET3_OM_TSO && 211 frags >= VMXNET3_MAX_TXD_PER_PKT)) { 212 VMXNET3_DEBUG(dp, 2, 213 "overfragmented mp (%u)\n", frags); 214 (void) ddi_dma_unbind_handle( 215 dp->txDmaHandle); 216 ret = VMXNET3_TX_PULLUP; 217 goto error; 218 } 219 if (cmdRing->avail - frags <= 1) { 220 dp->txMustResched = B_TRUE; 221 (void) ddi_dma_unbind_handle( 222 dp->txDmaHandle); 223 ret = VMXNET3_TX_RINGFULL; 224 goto error; 225 } 226 227 if (len > VMXNET3_MAX_TX_BUF_SIZE) { 228 chunkLen = VMXNET3_MAX_TX_BUF_SIZE; 229 } else { 230 chunkLen = len; 231 } 232 233 frags++; 234 eopIdx = cmdRing->next2fill; 235 236 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx); 237 ASSERT(txDesc->txd.gen != cmdRing->gen); 238 239 /* txd.addr */ 240 txDesc->txd.addr = addr; 241 /* txd.dw2 */ 242 dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ? 243 0 : chunkLen; 244 dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT; 245 txDesc->dword[2] = dw2; 246 ASSERT(txDesc->txd.len == len || 247 txDesc->txd.len == 0); 248 /* txd.dw3 */ 249 dw3 = 0; 250 txDesc->dword[3] = dw3; 251 252 VMXNET3_INC_RING_IDX(cmdRing, 253 cmdRing->next2fill); 254 curGen = cmdRing->gen; 255 256 addr += chunkLen; 257 len -= chunkLen; 258 } while (len); 259 260 if (--cookieCount) { 261 ddi_dma_nextcookie(dp->txDmaHandle, &cookie); 262 } 263 } while (cookieCount); 264 265 (void) ddi_dma_unbind_handle(dp->txDmaHandle); 266 } 267 268 /* Update the EOP descriptor */ 269 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx); 270 txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP; 271 272 /* Update the SOP descriptor. Must be done last */ 273 txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx); 274 if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 && 275 txDesc->txd.len < ol->hlen) { 276 ret = VMXNET3_TX_FAILURE; 277 goto error; 278 } 279 txDesc->txd.om = ol->om; 280 txDesc->txd.hlen = ol->hlen; 281 txDesc->txd.msscof = ol->msscof; 282 membar_producer(); 283 txDesc->txd.gen = sopGen; 284 285 /* Update the meta ring & metadata */ 286 txq->metaRing[sopIdx].mp = mp; 287 txq->metaRing[eopIdx].sopIdx = sopIdx; 288 txq->metaRing[eopIdx].frags = frags; 289 cmdRing->avail -= frags; 290 if (ol->om == VMXNET3_OM_TSO) { 291 txqCtrl->txNumDeferred += 292 (totLen - ol->hlen + ol->msscof - 1) / ol->msscof; 293 } else { 294 txqCtrl->txNumDeferred++; 295 } 296 297 VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", (void *)mp, sopIdx, 298 eopIdx); 299 300 goto done; 301 302 error: 303 /* Reverse the generation bits */ 304 while (sopIdx != cmdRing->next2fill) { 305 VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill); 306 txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill); 307 txDesc->txd.gen = !cmdRing->gen; 308 } 309 310 done: 311 mutex_exit(&dp->txLock); 312 313 return (ret); 314 } 315 316 /* 317 * Send packets on a vmxnet3 device. 318 * 319 * Returns: 320 * NULL in case of success or failure. 321 * The mps to be retransmitted later if the ring is full. 322 */ 323 mblk_t * 324 vmxnet3_tx(void *data, mblk_t *mps) 325 { 326 vmxnet3_softc_t *dp = data; 327 vmxnet3_txqueue_t *txq = &dp->txQueue; 328 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 329 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl; 330 vmxnet3_txstatus status = VMXNET3_TX_OK; 331 mblk_t *mp; 332 333 ASSERT(mps != NULL); 334 335 do { 336 vmxnet3_offload_t ol; 337 int pullup; 338 339 mp = mps; 340 mps = mp->b_next; 341 mp->b_next = NULL; 342 343 if (DB_TYPE(mp) != M_DATA) { 344 /* 345 * PR #315560: M_PROTO mblks could be passed for 346 * some reason. Drop them because we don't understand 347 * them and because their contents are not Ethernet 348 * frames anyway. 349 */ 350 ASSERT(B_FALSE); 351 freemsg(mp); 352 continue; 353 } 354 355 /* 356 * Prepare the offload while we're still handling the original 357 * message -- msgpullup() discards the metadata afterwards. 358 */ 359 pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp); 360 if (pullup) { 361 mblk_t *new_mp = msgpullup(mp, pullup); 362 atomic_inc_32(&dp->tx_pullup_needed); 363 freemsg(mp); 364 if (new_mp) { 365 mp = new_mp; 366 } else { 367 atomic_inc_32(&dp->tx_pullup_failed); 368 continue; 369 } 370 } 371 372 /* 373 * Try to map the message in the Tx ring. 374 * This call might fail for non-fatal reasons. 375 */ 376 status = vmxnet3_tx_one(dp, txq, &ol, mp); 377 if (status == VMXNET3_TX_PULLUP) { 378 /* 379 * Try one more time after flattening 380 * the message with msgpullup(). 381 */ 382 if (mp->b_cont != NULL) { 383 mblk_t *new_mp = msgpullup(mp, -1); 384 atomic_inc_32(&dp->tx_pullup_needed); 385 freemsg(mp); 386 if (new_mp) { 387 mp = new_mp; 388 status = vmxnet3_tx_one(dp, txq, &ol, 389 mp); 390 } else { 391 atomic_inc_32(&dp->tx_pullup_failed); 392 continue; 393 } 394 } 395 } 396 if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) { 397 /* Fatal failure, drop it */ 398 atomic_inc_32(&dp->tx_error); 399 freemsg(mp); 400 } 401 } while (mps && status != VMXNET3_TX_RINGFULL); 402 403 if (status == VMXNET3_TX_RINGFULL) { 404 atomic_inc_32(&dp->tx_ring_full); 405 mp->b_next = mps; 406 mps = mp; 407 } else { 408 ASSERT(!mps); 409 } 410 411 /* Notify the device */ 412 mutex_enter(&dp->txLock); 413 if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) { 414 txqCtrl->txNumDeferred = 0; 415 VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill); 416 } 417 mutex_exit(&dp->txLock); 418 419 return (mps); 420 } 421 422 /* 423 * Parse a transmit queue and complete packets. 424 * 425 * Returns: 426 * B_TRUE if Tx must be updated or B_FALSE if no action is required. 427 */ 428 boolean_t 429 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq) 430 { 431 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing; 432 vmxnet3_compring_t *compRing = &txq->compRing; 433 Vmxnet3_GenericDesc *compDesc; 434 boolean_t completedTx = B_FALSE; 435 boolean_t ret = B_FALSE; 436 437 mutex_enter(&dp->txLock); 438 439 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 440 while (compDesc->tcd.gen == compRing->gen) { 441 vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc; 442 uint16_t sopIdx, eopIdx; 443 mblk_t *mp; 444 445 eopIdx = compDesc->tcd.txdIdx; 446 eopMetaDesc = &txq->metaRing[eopIdx]; 447 sopIdx = eopMetaDesc->sopIdx; 448 sopMetaDesc = &txq->metaRing[sopIdx]; 449 450 ASSERT(eopMetaDesc->frags); 451 cmdRing->avail += eopMetaDesc->frags; 452 453 ASSERT(sopMetaDesc->mp); 454 mp = sopMetaDesc->mp; 455 freemsg(mp); 456 457 eopMetaDesc->sopIdx = 0; 458 eopMetaDesc->frags = 0; 459 sopMetaDesc->mp = NULL; 460 461 completedTx = B_TRUE; 462 463 VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", (void *)mp, sopIdx, 464 eopIdx); 465 466 VMXNET3_INC_RING_IDX(compRing, compRing->next2comp); 467 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp); 468 } 469 470 if (dp->txMustResched && completedTx) { 471 dp->txMustResched = B_FALSE; 472 ret = B_TRUE; 473 } 474 475 mutex_exit(&dp->txLock); 476 477 return (ret); 478 } 479