xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_tx.c (revision 843c398e8904ed9d833d2af3103894f909fb4b52)
1 /*
2  * Copyright (C) 2007 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 
16 /*
17  * Copyright (c) 2012 by Delphix. All rights reserved.
18  */
19 
20 #include <vmxnet3.h>
21 
22 typedef enum vmxnet3_txstatus {
23 	VMXNET3_TX_OK,
24 	VMXNET3_TX_FAILURE,
25 	VMXNET3_TX_PULLUP,
26 	VMXNET3_TX_RINGFULL
27 } vmxnet3_txstatus;
28 
29 typedef struct vmxnet3_offload_t {
30 	uint16_t om;
31 	uint16_t hlen;
32 	uint16_t msscof;
33 } vmxnet3_offload_t;
34 
35 /*
36  * vmxnet3_txqueue_init --
37  *
38  *    Initialize a TxQueue. Currently nothing needs to be done.
39  *
40  * Results:
41  *    DDI_SUCCESS.
42  *
43  * Side effects:
44  *    None.
45  */
46 /* ARGSUSED */
47 int
48 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
49 {
50 	return (DDI_SUCCESS);
51 }
52 
53 /*
54  * vmxnet3_txqueue_fini --
55  *
56  *    Finish a TxQueue by freeing all pending Tx.
57  *
58  * Results:
59  *    DDI_SUCCESS.
60  *
61  * Side effects:
62  *    None.
63  */
64 void
65 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
66 {
67 	unsigned int i;
68 
69 	ASSERT(!dp->devEnabled);
70 
71 	for (i = 0; i < txq->cmdRing.size; i++) {
72 		mblk_t *mp = txq->metaRing[i].mp;
73 		if (mp) {
74 			freemsg(mp);
75 		}
76 	}
77 }
78 
79 /*
80  * vmxnet3_tx_prepare_offload --
81  *
82  *    Build the offload context of a msg.
83  *
84  * Results:
85  *    0 if everything went well.
86  *    +n if n bytes need to be pulled up.
87  *    -1 in case of error (not used).
88  *
89  * Side effects:
90  *    None.
91  */
92 static int
93 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol,
94     mblk_t *mp)
95 {
96 	int ret = 0;
97 	uint32_t start, stuff, value, flags, lsoflags, mss;
98 
99 	ol->om = VMXNET3_OM_NONE;
100 	ol->hlen = 0;
101 	ol->msscof = 0;
102 
103 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags);
104 
105 	mac_lso_get(mp, &mss, &lsoflags);
106 	if (lsoflags & HW_LSO) {
107 		flags |= HW_LSO;
108 	}
109 
110 	if (flags) {
111 		struct ether_vlan_header *eth = (void *) mp->b_rptr;
112 		uint8_t ethLen;
113 
114 		if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) {
115 			ethLen = sizeof (struct ether_vlan_header);
116 		} else {
117 			ethLen = sizeof (struct ether_header);
118 		}
119 
120 		VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, "
121 		    "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value);
122 
123 		if (flags & HCK_PARTIALCKSUM) {
124 			ol->om = VMXNET3_OM_CSUM;
125 			ol->hlen = start + ethLen;
126 			ol->msscof = stuff + ethLen;
127 		}
128 		if (flags & HW_LSO) {
129 			mblk_t *mblk = mp;
130 			uint8_t *ip, *tcp;
131 			uint8_t ipLen, tcpLen;
132 
133 			/*
134 			 * Copy e1000g's behavior:
135 			 * - Do not assume all the headers are in the same mblk.
136 			 * - Assume each header is always within one mblk.
137 			 * - Assume the ethernet header is in the first mblk.
138 			 */
139 			ip = mblk->b_rptr + ethLen;
140 			if (ip >= mblk->b_wptr) {
141 				mblk = mblk->b_cont;
142 				ip = mblk->b_rptr;
143 			}
144 			ipLen = IPH_HDR_LENGTH((ipha_t *)ip);
145 			tcp = ip + ipLen;
146 			if (tcp >= mblk->b_wptr) {
147 				mblk = mblk->b_cont;
148 				tcp = mblk->b_rptr;
149 			}
150 			tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp);
151 			/* Careful, '>' instead of '>=' here */
152 			if (tcp + tcpLen > mblk->b_wptr) {
153 				mblk = mblk->b_cont;
154 			}
155 
156 			ol->om = VMXNET3_OM_TSO;
157 			ol->hlen = ethLen + ipLen + tcpLen;
158 			ol->msscof = DB_LSOMSS(mp);
159 
160 			if (mblk != mp) {
161 				ret = ol->hlen;
162 			}
163 		}
164 	}
165 
166 	return (ret);
167 }
168 
169 /*
170  * vmxnet3_tx_one --
171  *
172  *    Map a msg into the Tx command ring of a vmxnet3 device.
173  *
174  * Results:
175  *    VMXNET3_TX_OK if everything went well.
176  *    VMXNET3_TX_RINGFULL if the ring is nearly full.
177  *    VMXNET3_TX_PULLUP if the msg is overfragmented.
178  *    VMXNET3_TX_FAILURE if there was a DMA or offload error.
179  *
180  * Side effects:
181  *    The ring is filled if VMXNET3_TX_OK is returned.
182  */
183 static vmxnet3_txstatus
184 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq,
185     vmxnet3_offload_t *ol, mblk_t *mp)
186 {
187 	int ret = VMXNET3_TX_OK;
188 	unsigned int frags = 0, totLen = 0;
189 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
190 	Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
191 	Vmxnet3_GenericDesc *txDesc;
192 	uint16_t sopIdx, eopIdx;
193 	uint8_t sopGen, curGen;
194 	mblk_t *mblk;
195 
196 	mutex_enter(&dp->txLock);
197 
198 	sopIdx = eopIdx = cmdRing->next2fill;
199 	sopGen = cmdRing->gen;
200 	curGen = !cmdRing->gen;
201 
202 	for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) {
203 		unsigned int len = MBLKL(mblk);
204 		ddi_dma_cookie_t cookie;
205 		uint_t cookieCount;
206 
207 		if (len) {
208 			totLen += len;
209 		} else {
210 			continue;
211 		}
212 
213 		if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL,
214 		    (caddr_t)mblk->b_rptr, len,
215 		    DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL,
216 		    &cookie, &cookieCount) != DDI_DMA_MAPPED) {
217 			VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n");
218 			ret = VMXNET3_TX_FAILURE;
219 			goto error;
220 		}
221 
222 		ASSERT(cookieCount);
223 
224 		do {
225 			uint64_t addr = cookie.dmac_laddress;
226 			size_t len = cookie.dmac_size;
227 
228 			do {
229 				uint32_t dw2, dw3;
230 				size_t chunkLen;
231 
232 				ASSERT(!txq->metaRing[eopIdx].mp);
233 				ASSERT(cmdRing->avail - frags);
234 
235 				if (frags >= cmdRing->size - 1 ||
236 				    (ol->om != VMXNET3_OM_TSO &&
237 				    frags >= VMXNET3_MAX_TXD_PER_PKT)) {
238 					VMXNET3_DEBUG(dp, 2,
239 					    "overfragmented mp (%u)\n", frags);
240 					(void) ddi_dma_unbind_handle(
241 					    dp->txDmaHandle);
242 					ret = VMXNET3_TX_PULLUP;
243 					goto error;
244 				}
245 				if (cmdRing->avail - frags <= 1) {
246 					dp->txMustResched = B_TRUE;
247 					(void) ddi_dma_unbind_handle(
248 					    dp->txDmaHandle);
249 					ret = VMXNET3_TX_RINGFULL;
250 					goto error;
251 				}
252 
253 				if (len > VMXNET3_MAX_TX_BUF_SIZE) {
254 					chunkLen = VMXNET3_MAX_TX_BUF_SIZE;
255 				} else {
256 					chunkLen = len;
257 				}
258 
259 				frags++;
260 				eopIdx = cmdRing->next2fill;
261 
262 				txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
263 				ASSERT(txDesc->txd.gen != cmdRing->gen);
264 
265 				/* txd.addr */
266 				txDesc->txd.addr = addr;
267 				/* txd.dw2 */
268 				dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ?
269 				    0 : chunkLen;
270 				dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT;
271 				txDesc->dword[2] = dw2;
272 				ASSERT(txDesc->txd.len == len ||
273 				    txDesc->txd.len == 0);
274 				/* txd.dw3 */
275 				dw3 = 0;
276 				txDesc->dword[3] = dw3;
277 
278 				VMXNET3_INC_RING_IDX(cmdRing,
279 				    cmdRing->next2fill);
280 				curGen = cmdRing->gen;
281 
282 				addr += chunkLen;
283 				len -= chunkLen;
284 			} while (len);
285 
286 			if (--cookieCount) {
287 				ddi_dma_nextcookie(dp->txDmaHandle, &cookie);
288 			}
289 		} while (cookieCount);
290 
291 		(void) ddi_dma_unbind_handle(dp->txDmaHandle);
292 	}
293 
294 	/* Update the EOP descriptor */
295 	txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
296 	txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP;
297 
298 	/* Update the SOP descriptor. Must be done last */
299 	txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx);
300 	if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 &&
301 	    txDesc->txd.len < ol->hlen) {
302 		ret = VMXNET3_TX_FAILURE;
303 		goto error;
304 	}
305 	txDesc->txd.om = ol->om;
306 	txDesc->txd.hlen = ol->hlen;
307 	txDesc->txd.msscof = ol->msscof;
308 	membar_producer();
309 	txDesc->txd.gen = sopGen;
310 
311 	/* Update the meta ring & metadata */
312 	txq->metaRing[sopIdx].mp = mp;
313 	txq->metaRing[eopIdx].sopIdx = sopIdx;
314 	txq->metaRing[eopIdx].frags = frags;
315 	cmdRing->avail -= frags;
316 	if (ol->om == VMXNET3_OM_TSO) {
317 		txqCtrl->txNumDeferred +=
318 		    (totLen - ol->hlen + ol->msscof - 1) / ol->msscof;
319 	} else {
320 		txqCtrl->txNumDeferred++;
321 	}
322 
323 	VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", mp, sopIdx, eopIdx);
324 
325 	goto done;
326 
327 error:
328 	/* Reverse the generation bits */
329 	while (sopIdx != cmdRing->next2fill) {
330 		VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
331 		txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill);
332 		txDesc->txd.gen = !cmdRing->gen;
333 	}
334 
335 done:
336 	mutex_exit(&dp->txLock);
337 
338 	return (ret);
339 }
340 
341 /*
342  * vmxnet3_tx --
343  *
344  *    Send packets on a vmxnet3 device.
345  *
346  * Results:
347  *    NULL in case of success or failure.
348  *    The mps to be retransmitted later if the ring is full.
349  *
350  * Side effects:
351  *    None.
352  */
353 mblk_t *
354 vmxnet3_tx(void *data, mblk_t *mps)
355 {
356 	vmxnet3_softc_t *dp = data;
357 	vmxnet3_txqueue_t *txq = &dp->txQueue;
358 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
359 	Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
360 	vmxnet3_txstatus status = VMXNET3_TX_OK;
361 	mblk_t *mp;
362 
363 	ASSERT(mps != NULL);
364 
365 	do {
366 		vmxnet3_offload_t ol;
367 		int pullup;
368 
369 		mp = mps;
370 		mps = mp->b_next;
371 		mp->b_next = NULL;
372 
373 		if (DB_TYPE(mp) != M_DATA) {
374 			/*
375 			 * PR #315560: M_PROTO mblks could be passed for
376 			 * some reason. Drop them because we don't understand
377 			 * them and because their contents are not Ethernet
378 			 * frames anyway.
379 			 */
380 			ASSERT(B_FALSE);
381 			freemsg(mp);
382 			continue;
383 		}
384 
385 		/*
386 		 * Prepare the offload while we're still handling the original
387 		 * message -- msgpullup() discards the metadata afterwards.
388 		 */
389 		pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp);
390 		if (pullup) {
391 			mblk_t *new_mp = msgpullup(mp, pullup);
392 			atomic_inc_32(&dp->tx_pullup_needed);
393 			freemsg(mp);
394 			if (new_mp) {
395 				mp = new_mp;
396 			} else {
397 				atomic_inc_32(&dp->tx_pullup_failed);
398 				continue;
399 			}
400 		}
401 
402 		/*
403 		 * Try to map the message in the Tx ring.
404 		 * This call might fail for non-fatal reasons.
405 		 */
406 		status = vmxnet3_tx_one(dp, txq, &ol, mp);
407 		if (status == VMXNET3_TX_PULLUP) {
408 			/*
409 			 * Try one more time after flattening
410 			 * the message with msgpullup().
411 			 */
412 			if (mp->b_cont != NULL) {
413 				mblk_t *new_mp = msgpullup(mp, -1);
414 				atomic_inc_32(&dp->tx_pullup_needed);
415 				freemsg(mp);
416 				if (new_mp) {
417 					mp = new_mp;
418 					status = vmxnet3_tx_one(dp, txq, &ol,
419 					    mp);
420 				} else {
421 					atomic_inc_32(&dp->tx_pullup_failed);
422 					continue;
423 				}
424 			}
425 		}
426 		if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) {
427 			/* Fatal failure, drop it */
428 			atomic_inc_32(&dp->tx_error);
429 			freemsg(mp);
430 		}
431 	} while (mps && status != VMXNET3_TX_RINGFULL);
432 
433 	if (status == VMXNET3_TX_RINGFULL) {
434 		atomic_inc_32(&dp->tx_ring_full);
435 		mp->b_next = mps;
436 		mps = mp;
437 	} else {
438 		ASSERT(!mps);
439 	}
440 
441 	/* Notify the device */
442 	mutex_enter(&dp->txLock);
443 	if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) {
444 		txqCtrl->txNumDeferred = 0;
445 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill);
446 	}
447 	mutex_exit(&dp->txLock);
448 
449 	return (mps);
450 }
451 
452 /*
453  * vmxnet3_tx_complete --
454  *
455  *    Parse a transmit queue and complete packets.
456  *
457  * Results:
458  *    B_TRUE if Tx must be updated or B_FALSE if no action is required.
459  *
460  * Side effects:
461  *    None.
462  */
463 boolean_t
464 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
465 {
466 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
467 	vmxnet3_compring_t *compRing = &txq->compRing;
468 	Vmxnet3_GenericDesc *compDesc;
469 	boolean_t completedTx = B_FALSE;
470 	boolean_t ret = B_FALSE;
471 
472 	mutex_enter(&dp->txLock);
473 
474 	compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
475 	while (compDesc->tcd.gen == compRing->gen) {
476 		vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc;
477 		uint16_t sopIdx, eopIdx;
478 		mblk_t *mp;
479 
480 		eopIdx = compDesc->tcd.txdIdx;
481 		eopMetaDesc = &txq->metaRing[eopIdx];
482 		sopIdx = eopMetaDesc->sopIdx;
483 		sopMetaDesc = &txq->metaRing[sopIdx];
484 
485 		ASSERT(eopMetaDesc->frags);
486 		cmdRing->avail += eopMetaDesc->frags;
487 
488 		ASSERT(sopMetaDesc->mp);
489 		mp = sopMetaDesc->mp;
490 		freemsg(mp);
491 
492 		eopMetaDesc->sopIdx = 0;
493 		eopMetaDesc->frags = 0;
494 		sopMetaDesc->mp = NULL;
495 
496 		completedTx = B_TRUE;
497 
498 		VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", mp, sopIdx,
499 		    eopIdx);
500 
501 		VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
502 		compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
503 	}
504 
505 	if (dp->txMustResched && completedTx) {
506 		dp->txMustResched = B_FALSE;
507 		ret = B_TRUE;
508 	}
509 
510 	mutex_exit(&dp->txLock);
511 
512 	return (ret);
513 }
514