xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_tx.c (revision 30165b7f6753bc3d48c52319bed7ec7b3ea36b3c)
1 /*
2  * Copyright (C) 2007 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 
16 /*
17  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
18  */
19 
20 #include <vmxnet3.h>
21 
22 typedef enum vmxnet3_txstatus {
23 	VMXNET3_TX_OK,
24 	VMXNET3_TX_FAILURE,
25 	VMXNET3_TX_PULLUP,
26 	VMXNET3_TX_RINGFULL
27 } vmxnet3_txstatus;
28 
29 typedef struct vmxnet3_offload_t {
30 	uint16_t om;
31 	uint16_t hlen;
32 	uint16_t msscof;
33 } vmxnet3_offload_t;
34 
35 /*
36  * Initialize a TxQueue. Currently nothing needs to be done.
37  */
38 /* ARGSUSED */
39 int
40 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
41 {
42 	return (0);
43 }
44 
45 /*
46  * Finish a TxQueue by freeing all pending Tx.
47  */
48 void
49 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
50 {
51 	unsigned int i;
52 
53 	ASSERT(!dp->devEnabled);
54 
55 	for (i = 0; i < txq->cmdRing.size; i++) {
56 		mblk_t *mp = txq->metaRing[i].mp;
57 		if (mp) {
58 			freemsg(mp);
59 		}
60 	}
61 }
62 
63 /*
64  * Build the offload context of a msg.
65  *
66  * Returns:
67  *	0 if everything went well.
68  *	+n if n bytes need to be pulled up.
69  *	-1 in case of error (not used).
70  */
71 static int
72 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol,
73     mblk_t *mp)
74 {
75 	int ret = 0;
76 	uint32_t start, stuff, value, flags, lso_flag, mss;
77 
78 	ol->om = VMXNET3_OM_NONE;
79 	ol->hlen = 0;
80 	ol->msscof = 0;
81 
82 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags);
83 
84 	mac_lso_get(mp, &mss, &lso_flag);
85 
86 	if (flags || lso_flag) {
87 		struct ether_vlan_header *eth = (void *) mp->b_rptr;
88 		uint8_t ethLen;
89 
90 		if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) {
91 			ethLen = sizeof (struct ether_vlan_header);
92 		} else {
93 			ethLen = sizeof (struct ether_header);
94 		}
95 
96 		VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, "
97 		    "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value);
98 
99 		if (lso_flag & HW_LSO) {
100 			mblk_t *mblk = mp;
101 			uint8_t *ip, *tcp;
102 			uint8_t ipLen, tcpLen;
103 
104 			/*
105 			 * Copy e1000g's behavior:
106 			 * - Do not assume all the headers are in the same mblk.
107 			 * - Assume each header is always within one mblk.
108 			 * - Assume the ethernet header is in the first mblk.
109 			 */
110 			ip = mblk->b_rptr + ethLen;
111 			if (ip >= mblk->b_wptr) {
112 				mblk = mblk->b_cont;
113 				ip = mblk->b_rptr;
114 			}
115 			ipLen = IPH_HDR_LENGTH((ipha_t *)ip);
116 			tcp = ip + ipLen;
117 			if (tcp >= mblk->b_wptr) {
118 				mblk = mblk->b_cont;
119 				tcp = mblk->b_rptr;
120 			}
121 			tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp);
122 			/* Careful, '>' instead of '>=' here */
123 			if (tcp + tcpLen > mblk->b_wptr) {
124 				mblk = mblk->b_cont;
125 			}
126 
127 			ol->om = VMXNET3_OM_TSO;
128 			ol->hlen = ethLen + ipLen + tcpLen;
129 			ol->msscof = mss;
130 
131 			if (mblk != mp) {
132 				ret = ol->hlen;
133 			}
134 		} else if (flags & HCK_PARTIALCKSUM) {
135 			ol->om = VMXNET3_OM_CSUM;
136 			ol->hlen = start + ethLen;
137 			ol->msscof = stuff + ethLen;
138 		}
139 	}
140 
141 	return (ret);
142 }
143 
144 /*
145  * Map a msg into the Tx command ring of a vmxnet3 device.
146  *
147  * Returns:
148  *	VMXNET3_TX_OK if everything went well.
149  *	VMXNET3_TX_RINGFULL if the ring is nearly full.
150  *	VMXNET3_TX_PULLUP if the msg is overfragmented.
151  *	VMXNET3_TX_FAILURE if there was a DMA or offload error.
152  *
153  * Side effects:
154  *	The ring is filled if VMXNET3_TX_OK is returned.
155  */
156 static vmxnet3_txstatus
157 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq,
158     vmxnet3_offload_t *ol, mblk_t *mp)
159 {
160 	int ret = VMXNET3_TX_OK;
161 	unsigned int frags = 0, totLen = 0;
162 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
163 	Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
164 	Vmxnet3_GenericDesc *txDesc;
165 	uint16_t sopIdx, eopIdx;
166 	uint8_t sopGen, curGen;
167 	mblk_t *mblk;
168 
169 	mutex_enter(&dp->txLock);
170 
171 	sopIdx = eopIdx = cmdRing->next2fill;
172 	sopGen = cmdRing->gen;
173 	curGen = !cmdRing->gen;
174 
175 	for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) {
176 		unsigned int len = MBLKL(mblk);
177 		ddi_dma_cookie_t cookie;
178 		uint_t cookieCount;
179 
180 		if (len) {
181 			totLen += len;
182 		} else {
183 			continue;
184 		}
185 
186 		if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL,
187 		    (caddr_t)mblk->b_rptr, len,
188 		    DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL,
189 		    &cookie, &cookieCount) != DDI_DMA_MAPPED) {
190 			VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n");
191 			ret = VMXNET3_TX_FAILURE;
192 			goto error;
193 		}
194 
195 		ASSERT(cookieCount);
196 
197 		do {
198 			uint64_t addr = cookie.dmac_laddress;
199 			size_t len = cookie.dmac_size;
200 
201 			do {
202 				uint32_t dw2, dw3;
203 				size_t chunkLen;
204 
205 				ASSERT(!txq->metaRing[eopIdx].mp);
206 				ASSERT(cmdRing->avail - frags);
207 
208 				if (frags >= cmdRing->size - 1 ||
209 				    (ol->om != VMXNET3_OM_TSO &&
210 				    frags >= VMXNET3_MAX_TXD_PER_PKT)) {
211 					VMXNET3_DEBUG(dp, 2,
212 					    "overfragmented mp (%u)\n", frags);
213 					(void) ddi_dma_unbind_handle(
214 					    dp->txDmaHandle);
215 					ret = VMXNET3_TX_PULLUP;
216 					goto error;
217 				}
218 				if (cmdRing->avail - frags <= 1) {
219 					dp->txMustResched = B_TRUE;
220 					(void) ddi_dma_unbind_handle(
221 					    dp->txDmaHandle);
222 					ret = VMXNET3_TX_RINGFULL;
223 					goto error;
224 				}
225 
226 				if (len > VMXNET3_MAX_TX_BUF_SIZE) {
227 					chunkLen = VMXNET3_MAX_TX_BUF_SIZE;
228 				} else {
229 					chunkLen = len;
230 				}
231 
232 				frags++;
233 				eopIdx = cmdRing->next2fill;
234 
235 				txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
236 				ASSERT(txDesc->txd.gen != cmdRing->gen);
237 
238 				/* txd.addr */
239 				txDesc->txd.addr = addr;
240 				/* txd.dw2 */
241 				dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ?
242 				    0 : chunkLen;
243 				dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT;
244 				txDesc->dword[2] = dw2;
245 				ASSERT(txDesc->txd.len == len ||
246 				    txDesc->txd.len == 0);
247 				/* txd.dw3 */
248 				dw3 = 0;
249 				txDesc->dword[3] = dw3;
250 
251 				VMXNET3_INC_RING_IDX(cmdRing,
252 				    cmdRing->next2fill);
253 				curGen = cmdRing->gen;
254 
255 				addr += chunkLen;
256 				len -= chunkLen;
257 			} while (len);
258 
259 			if (--cookieCount) {
260 				ddi_dma_nextcookie(dp->txDmaHandle, &cookie);
261 			}
262 		} while (cookieCount);
263 
264 		(void) ddi_dma_unbind_handle(dp->txDmaHandle);
265 	}
266 
267 	/* Update the EOP descriptor */
268 	txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
269 	txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP;
270 
271 	/* Update the SOP descriptor. Must be done last */
272 	txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx);
273 	if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 &&
274 	    txDesc->txd.len < ol->hlen) {
275 		ret = VMXNET3_TX_FAILURE;
276 		goto error;
277 	}
278 	txDesc->txd.om = ol->om;
279 	txDesc->txd.hlen = ol->hlen;
280 	txDesc->txd.msscof = ol->msscof;
281 	membar_producer();
282 	txDesc->txd.gen = sopGen;
283 
284 	/* Update the meta ring & metadata */
285 	txq->metaRing[sopIdx].mp = mp;
286 	txq->metaRing[eopIdx].sopIdx = sopIdx;
287 	txq->metaRing[eopIdx].frags = frags;
288 	cmdRing->avail -= frags;
289 	if (ol->om == VMXNET3_OM_TSO) {
290 		txqCtrl->txNumDeferred +=
291 		    (totLen - ol->hlen + ol->msscof - 1) / ol->msscof;
292 	} else {
293 		txqCtrl->txNumDeferred++;
294 	}
295 
296 	VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", (void *)mp, sopIdx,
297 	    eopIdx);
298 
299 	goto done;
300 
301 error:
302 	/* Reverse the generation bits */
303 	while (sopIdx != cmdRing->next2fill) {
304 		VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
305 		txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill);
306 		txDesc->txd.gen = !cmdRing->gen;
307 	}
308 
309 done:
310 	mutex_exit(&dp->txLock);
311 
312 	return (ret);
313 }
314 
315 /*
316  * Send packets on a vmxnet3 device.
317  *
318  * Returns:
319  *	NULL in case of success or failure.
320  *	The mps to be retransmitted later if the ring is full.
321  */
322 mblk_t *
323 vmxnet3_tx(void *data, mblk_t *mps)
324 {
325 	vmxnet3_softc_t *dp = data;
326 	vmxnet3_txqueue_t *txq = &dp->txQueue;
327 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
328 	Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
329 	vmxnet3_txstatus status = VMXNET3_TX_OK;
330 	mblk_t *mp;
331 
332 	ASSERT(mps != NULL);
333 
334 	do {
335 		vmxnet3_offload_t ol;
336 		int pullup;
337 
338 		mp = mps;
339 		mps = mp->b_next;
340 		mp->b_next = NULL;
341 
342 		if (DB_TYPE(mp) != M_DATA) {
343 			/*
344 			 * PR #315560: M_PROTO mblks could be passed for
345 			 * some reason. Drop them because we don't understand
346 			 * them and because their contents are not Ethernet
347 			 * frames anyway.
348 			 */
349 			ASSERT(B_FALSE);
350 			freemsg(mp);
351 			continue;
352 		}
353 
354 		/*
355 		 * Prepare the offload while we're still handling the original
356 		 * message -- msgpullup() discards the metadata afterwards.
357 		 */
358 		pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp);
359 		if (pullup) {
360 			mblk_t *new_mp = msgpullup(mp, pullup);
361 			atomic_inc_32(&dp->tx_pullup_needed);
362 			freemsg(mp);
363 			if (new_mp) {
364 				mp = new_mp;
365 			} else {
366 				atomic_inc_32(&dp->tx_pullup_failed);
367 				continue;
368 			}
369 		}
370 
371 		/*
372 		 * Try to map the message in the Tx ring.
373 		 * This call might fail for non-fatal reasons.
374 		 */
375 		status = vmxnet3_tx_one(dp, txq, &ol, mp);
376 		if (status == VMXNET3_TX_PULLUP) {
377 			/*
378 			 * Try one more time after flattening
379 			 * the message with msgpullup().
380 			 */
381 			if (mp->b_cont != NULL) {
382 				mblk_t *new_mp = msgpullup(mp, -1);
383 				atomic_inc_32(&dp->tx_pullup_needed);
384 				freemsg(mp);
385 				if (new_mp) {
386 					mp = new_mp;
387 					status = vmxnet3_tx_one(dp, txq, &ol,
388 					    mp);
389 				} else {
390 					atomic_inc_32(&dp->tx_pullup_failed);
391 					continue;
392 				}
393 			}
394 		}
395 		if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) {
396 			/* Fatal failure, drop it */
397 			atomic_inc_32(&dp->tx_error);
398 			freemsg(mp);
399 		}
400 	} while (mps && status != VMXNET3_TX_RINGFULL);
401 
402 	if (status == VMXNET3_TX_RINGFULL) {
403 		atomic_inc_32(&dp->tx_ring_full);
404 		mp->b_next = mps;
405 		mps = mp;
406 	} else {
407 		ASSERT(!mps);
408 	}
409 
410 	/* Notify the device */
411 	mutex_enter(&dp->txLock);
412 	if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) {
413 		txqCtrl->txNumDeferred = 0;
414 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill);
415 	}
416 	mutex_exit(&dp->txLock);
417 
418 	return (mps);
419 }
420 
421 /*
422  * Parse a transmit queue and complete packets.
423  *
424  * Returns:
425  *	B_TRUE if Tx must be updated or B_FALSE if no action is required.
426  */
427 boolean_t
428 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
429 {
430 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
431 	vmxnet3_compring_t *compRing = &txq->compRing;
432 	Vmxnet3_GenericDesc *compDesc;
433 	boolean_t completedTx = B_FALSE;
434 	boolean_t ret = B_FALSE;
435 
436 	mutex_enter(&dp->txLock);
437 
438 	compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
439 	while (compDesc->tcd.gen == compRing->gen) {
440 		vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc;
441 		uint16_t sopIdx, eopIdx;
442 		mblk_t *mp;
443 
444 		eopIdx = compDesc->tcd.txdIdx;
445 		eopMetaDesc = &txq->metaRing[eopIdx];
446 		sopIdx = eopMetaDesc->sopIdx;
447 		sopMetaDesc = &txq->metaRing[sopIdx];
448 
449 		ASSERT(eopMetaDesc->frags);
450 		cmdRing->avail += eopMetaDesc->frags;
451 
452 		ASSERT(sopMetaDesc->mp);
453 		mp = sopMetaDesc->mp;
454 		freemsg(mp);
455 
456 		eopMetaDesc->sopIdx = 0;
457 		eopMetaDesc->frags = 0;
458 		sopMetaDesc->mp = NULL;
459 
460 		completedTx = B_TRUE;
461 
462 		VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", (void *)mp, sopIdx,
463 		    eopIdx);
464 
465 		VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
466 		compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
467 	}
468 
469 	if (dp->txMustResched && completedTx) {
470 		dp->txMustResched = B_FALSE;
471 		ret = B_TRUE;
472 	}
473 
474 	mutex_exit(&dp->txLock);
475 
476 	return (ret);
477 }
478