1 /*
2 * Copyright (C) 2007 VMware, Inc. All rights reserved.
3 *
4 * The contents of this file are subject to the terms of the Common
5 * Development and Distribution License (the "License") version 1.0
6 * and no later version. You may not use this file except in
7 * compliance with the License.
8 *
9 * You can obtain a copy of the License at
10 * http://www.opensource.org/licenses/cddl1.php
11 *
12 * See the License for the specific language governing permissions
13 * and limitations under the License.
14 */
15
16 /*
17 * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
18 * Copyright 2018 Joyent, Inc.
19 */
20
21 #include <vmxnet3.h>
22
23 typedef enum vmxnet3_txstatus {
24 VMXNET3_TX_OK,
25 VMXNET3_TX_FAILURE,
26 VMXNET3_TX_PULLUP,
27 VMXNET3_TX_RINGFULL
28 } vmxnet3_txstatus;
29
30 typedef struct vmxnet3_offload_t {
31 uint16_t om;
32 uint16_t hlen;
33 uint16_t msscof;
34 } vmxnet3_offload_t;
35
36 /*
37 * Initialize a TxQueue. Currently nothing needs to be done.
38 */
39 /* ARGSUSED */
40 int
vmxnet3_txqueue_init(vmxnet3_softc_t * dp,vmxnet3_txqueue_t * txq)41 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
42 {
43 return (0);
44 }
45
46 /*
47 * Finish a TxQueue by freeing all pending Tx.
48 */
49 void
vmxnet3_txqueue_fini(vmxnet3_softc_t * dp,vmxnet3_txqueue_t * txq)50 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
51 {
52 unsigned int i;
53
54 ASSERT(!dp->devEnabled);
55
56 for (i = 0; i < txq->cmdRing.size; i++) {
57 mblk_t *mp = txq->metaRing[i].mp;
58 if (mp) {
59 freemsg(mp);
60 }
61 }
62 }
63
64 /*
65 * Build the offload context of a msg.
66 *
67 * Returns:
68 * 0 if everything went well.
69 * +n if n bytes need to be pulled up.
70 * -1 in case of error (not used).
71 */
72 static int
vmxnet3_tx_prepare_offload(vmxnet3_softc_t * dp,vmxnet3_offload_t * ol,mblk_t * mp)73 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol,
74 mblk_t *mp)
75 {
76 int ret = 0;
77 uint32_t start, stuff, value, flags, lso_flag, mss;
78
79 ol->om = VMXNET3_OM_NONE;
80 ol->hlen = 0;
81 ol->msscof = 0;
82
83 mac_hcksum_get(mp, &start, &stuff, NULL, &value, &flags);
84
85 mac_lso_get(mp, &mss, &lso_flag);
86
87 if (flags || lso_flag) {
88 struct ether_vlan_header *eth = (void *) mp->b_rptr;
89 uint8_t ethLen;
90
91 if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) {
92 ethLen = sizeof (struct ether_vlan_header);
93 } else {
94 ethLen = sizeof (struct ether_header);
95 }
96
97 VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, "
98 "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value);
99
100 if (lso_flag & HW_LSO) {
101 mblk_t *mblk = mp;
102 uint8_t *ip, *tcp;
103 uint8_t ipLen, tcpLen;
104
105 /*
106 * Copy e1000g's behavior:
107 * - Do not assume all the headers are in the same mblk.
108 * - Assume each header is always within one mblk.
109 * - Assume the ethernet header is in the first mblk.
110 */
111 ip = mblk->b_rptr + ethLen;
112 if (ip >= mblk->b_wptr) {
113 mblk = mblk->b_cont;
114 ip = mblk->b_rptr;
115 }
116 ipLen = IPH_HDR_LENGTH((ipha_t *)ip);
117 tcp = ip + ipLen;
118 if (tcp >= mblk->b_wptr) {
119 mblk = mblk->b_cont;
120 tcp = mblk->b_rptr;
121 }
122 tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp);
123 /* Careful, '>' instead of '>=' here */
124 if (tcp + tcpLen > mblk->b_wptr) {
125 mblk = mblk->b_cont;
126 }
127
128 ol->om = VMXNET3_OM_TSO;
129 ol->hlen = ethLen + ipLen + tcpLen;
130 ol->msscof = mss;
131
132 if (mblk != mp) {
133 ret = ol->hlen;
134 }
135 } else if (flags & HCK_PARTIALCKSUM) {
136 ol->om = VMXNET3_OM_CSUM;
137 ol->hlen = start + ethLen;
138 ol->msscof = stuff + ethLen;
139 }
140 }
141
142 return (ret);
143 }
144
145 /*
146 * Map a msg into the Tx command ring of a vmxnet3 device.
147 *
148 * Returns:
149 * VMXNET3_TX_OK if everything went well.
150 * VMXNET3_TX_RINGFULL if the ring is nearly full.
151 * VMXNET3_TX_PULLUP if the msg is overfragmented.
152 * VMXNET3_TX_FAILURE if there was a DMA or offload error.
153 *
154 * Side effects:
155 * The ring is filled if VMXNET3_TX_OK is returned.
156 */
157 static vmxnet3_txstatus
vmxnet3_tx_one(vmxnet3_softc_t * dp,vmxnet3_txqueue_t * txq,vmxnet3_offload_t * ol,mblk_t * mp)158 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq,
159 vmxnet3_offload_t *ol, mblk_t *mp)
160 {
161 int ret = VMXNET3_TX_OK;
162 unsigned int frags = 0, totLen = 0;
163 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
164 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
165 Vmxnet3_GenericDesc *txDesc;
166 uint16_t sopIdx, eopIdx;
167 uint8_t sopGen, curGen;
168 mblk_t *mblk;
169
170 mutex_enter(&dp->txLock);
171
172 sopIdx = eopIdx = cmdRing->next2fill;
173 sopGen = cmdRing->gen;
174 curGen = !cmdRing->gen;
175
176 for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) {
177 unsigned int len = MBLKL(mblk);
178 ddi_dma_cookie_t cookie;
179 uint_t cookieCount;
180
181 if (len) {
182 totLen += len;
183 } else {
184 continue;
185 }
186
187 if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL,
188 (caddr_t)mblk->b_rptr, len,
189 DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL,
190 &cookie, &cookieCount) != DDI_DMA_MAPPED) {
191 VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n");
192 ret = VMXNET3_TX_FAILURE;
193 goto error;
194 }
195
196 ASSERT(cookieCount);
197
198 do {
199 uint64_t addr = cookie.dmac_laddress;
200 size_t len = cookie.dmac_size;
201
202 do {
203 uint32_t dw2, dw3;
204 size_t chunkLen;
205
206 ASSERT(!txq->metaRing[eopIdx].mp);
207 ASSERT(cmdRing->avail - frags);
208
209 if (frags >= cmdRing->size - 1 ||
210 (ol->om != VMXNET3_OM_TSO &&
211 frags >= VMXNET3_MAX_TXD_PER_PKT)) {
212 VMXNET3_DEBUG(dp, 2,
213 "overfragmented mp (%u)\n", frags);
214 (void) ddi_dma_unbind_handle(
215 dp->txDmaHandle);
216 ret = VMXNET3_TX_PULLUP;
217 goto error;
218 }
219 if (cmdRing->avail - frags <= 1) {
220 dp->txMustResched = B_TRUE;
221 (void) ddi_dma_unbind_handle(
222 dp->txDmaHandle);
223 ret = VMXNET3_TX_RINGFULL;
224 goto error;
225 }
226
227 if (len > VMXNET3_MAX_TX_BUF_SIZE) {
228 chunkLen = VMXNET3_MAX_TX_BUF_SIZE;
229 } else {
230 chunkLen = len;
231 }
232
233 frags++;
234 eopIdx = cmdRing->next2fill;
235
236 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
237 ASSERT(txDesc->txd.gen != cmdRing->gen);
238
239 /* txd.addr */
240 txDesc->txd.addr = addr;
241 /* txd.dw2 */
242 dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ?
243 0 : chunkLen;
244 dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT;
245 txDesc->dword[2] = dw2;
246 ASSERT(txDesc->txd.len == len ||
247 txDesc->txd.len == 0);
248 /* txd.dw3 */
249 dw3 = 0;
250 txDesc->dword[3] = dw3;
251
252 VMXNET3_INC_RING_IDX(cmdRing,
253 cmdRing->next2fill);
254 curGen = cmdRing->gen;
255
256 addr += chunkLen;
257 len -= chunkLen;
258 } while (len);
259
260 if (--cookieCount) {
261 ddi_dma_nextcookie(dp->txDmaHandle, &cookie);
262 }
263 } while (cookieCount);
264
265 (void) ddi_dma_unbind_handle(dp->txDmaHandle);
266 }
267
268 /* Update the EOP descriptor */
269 txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
270 txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP;
271
272 /* Update the SOP descriptor. Must be done last */
273 txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx);
274 if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 &&
275 txDesc->txd.len < ol->hlen) {
276 ret = VMXNET3_TX_FAILURE;
277 goto error;
278 }
279 txDesc->txd.om = ol->om;
280 txDesc->txd.hlen = ol->hlen;
281 txDesc->txd.msscof = ol->msscof;
282 membar_producer();
283 txDesc->txd.gen = sopGen;
284
285 /* Update the meta ring & metadata */
286 txq->metaRing[sopIdx].mp = mp;
287 txq->metaRing[eopIdx].sopIdx = sopIdx;
288 txq->metaRing[eopIdx].frags = frags;
289 cmdRing->avail -= frags;
290 if (ol->om == VMXNET3_OM_TSO) {
291 txqCtrl->txNumDeferred +=
292 (totLen - ol->hlen + ol->msscof - 1) / ol->msscof;
293 } else {
294 txqCtrl->txNumDeferred++;
295 }
296
297 VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", (void *)mp, sopIdx,
298 eopIdx);
299
300 goto done;
301
302 error:
303 /* Reverse the generation bits */
304 while (sopIdx != cmdRing->next2fill) {
305 VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
306 txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill);
307 txDesc->txd.gen = !cmdRing->gen;
308 }
309
310 done:
311 mutex_exit(&dp->txLock);
312
313 return (ret);
314 }
315
316 /*
317 * Send packets on a vmxnet3 device.
318 *
319 * Returns:
320 * NULL in case of success or failure.
321 * The mps to be retransmitted later if the ring is full.
322 */
323 mblk_t *
vmxnet3_tx(void * data,mblk_t * mps)324 vmxnet3_tx(void *data, mblk_t *mps)
325 {
326 vmxnet3_softc_t *dp = data;
327 vmxnet3_txqueue_t *txq = &dp->txQueue;
328 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
329 Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
330 vmxnet3_txstatus status = VMXNET3_TX_OK;
331 mblk_t *mp;
332
333 ASSERT(mps != NULL);
334
335 do {
336 vmxnet3_offload_t ol;
337 int pullup;
338
339 mp = mps;
340 mps = mp->b_next;
341 mp->b_next = NULL;
342
343 if (DB_TYPE(mp) != M_DATA) {
344 /*
345 * PR #315560: M_PROTO mblks could be passed for
346 * some reason. Drop them because we don't understand
347 * them and because their contents are not Ethernet
348 * frames anyway.
349 */
350 ASSERT(B_FALSE);
351 freemsg(mp);
352 continue;
353 }
354
355 /*
356 * Prepare the offload while we're still handling the original
357 * message -- msgpullup() discards the metadata afterwards.
358 */
359 pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp);
360 if (pullup) {
361 mblk_t *new_mp = msgpullup(mp, pullup);
362 atomic_inc_32(&dp->tx_pullup_needed);
363 freemsg(mp);
364 if (new_mp) {
365 mp = new_mp;
366 } else {
367 atomic_inc_32(&dp->tx_pullup_failed);
368 continue;
369 }
370 }
371
372 /*
373 * Try to map the message in the Tx ring.
374 * This call might fail for non-fatal reasons.
375 */
376 status = vmxnet3_tx_one(dp, txq, &ol, mp);
377 if (status == VMXNET3_TX_PULLUP) {
378 /*
379 * Try one more time after flattening
380 * the message with msgpullup().
381 */
382 if (mp->b_cont != NULL) {
383 mblk_t *new_mp = msgpullup(mp, -1);
384 atomic_inc_32(&dp->tx_pullup_needed);
385 freemsg(mp);
386 if (new_mp) {
387 mp = new_mp;
388 status = vmxnet3_tx_one(dp, txq, &ol,
389 mp);
390 } else {
391 atomic_inc_32(&dp->tx_pullup_failed);
392 continue;
393 }
394 }
395 }
396 if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) {
397 /* Fatal failure, drop it */
398 atomic_inc_32(&dp->tx_error);
399 freemsg(mp);
400 }
401 } while (mps && status != VMXNET3_TX_RINGFULL);
402
403 if (status == VMXNET3_TX_RINGFULL) {
404 atomic_inc_32(&dp->tx_ring_full);
405 mp->b_next = mps;
406 mps = mp;
407 } else {
408 ASSERT(!mps);
409 }
410
411 /* Notify the device */
412 mutex_enter(&dp->txLock);
413 if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) {
414 txqCtrl->txNumDeferred = 0;
415 VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill);
416 }
417 mutex_exit(&dp->txLock);
418
419 return (mps);
420 }
421
422 /*
423 * Parse a transmit queue and complete packets.
424 *
425 * Returns:
426 * B_TRUE if Tx must be updated or B_FALSE if no action is required.
427 */
428 boolean_t
vmxnet3_tx_complete(vmxnet3_softc_t * dp,vmxnet3_txqueue_t * txq)429 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
430 {
431 vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
432 vmxnet3_compring_t *compRing = &txq->compRing;
433 Vmxnet3_GenericDesc *compDesc;
434 boolean_t completedTx = B_FALSE;
435 boolean_t ret = B_FALSE;
436
437 mutex_enter(&dp->txLock);
438
439 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
440 while (compDesc->tcd.gen == compRing->gen) {
441 vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc;
442 uint16_t sopIdx, eopIdx;
443 mblk_t *mp;
444
445 eopIdx = compDesc->tcd.txdIdx;
446 eopMetaDesc = &txq->metaRing[eopIdx];
447 sopIdx = eopMetaDesc->sopIdx;
448 sopMetaDesc = &txq->metaRing[sopIdx];
449
450 ASSERT(eopMetaDesc->frags);
451 cmdRing->avail += eopMetaDesc->frags;
452
453 ASSERT(sopMetaDesc->mp);
454 mp = sopMetaDesc->mp;
455 freemsg(mp);
456
457 eopMetaDesc->sopIdx = 0;
458 eopMetaDesc->frags = 0;
459 sopMetaDesc->mp = NULL;
460
461 completedTx = B_TRUE;
462
463 VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", (void *)mp, sopIdx,
464 eopIdx);
465
466 VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
467 compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
468 }
469
470 if (dp->txMustResched && completedTx) {
471 dp->txMustResched = B_FALSE;
472 ret = B_TRUE;
473 }
474
475 mutex_exit(&dp->txLock);
476
477 return (ret);
478 }
479