xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_tx.c (revision 2cdd73db19663a333ca54a47c4bdf7abe0a4a4dd)
1 /*
2  * Copyright (C) 2007 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 
16 /*
17  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
18  * Copyright 2018 Joyent, Inc.
19  */
20 
21 #include <vmxnet3.h>
22 
23 typedef enum vmxnet3_txstatus {
24 	VMXNET3_TX_OK,
25 	VMXNET3_TX_FAILURE,
26 	VMXNET3_TX_PULLUP,
27 	VMXNET3_TX_RINGFULL
28 } vmxnet3_txstatus;
29 
30 typedef struct vmxnet3_offload_t {
31 	uint16_t om;
32 	uint16_t hlen;
33 	uint16_t msscof;
34 } vmxnet3_offload_t;
35 
36 /*
37  * Initialize a TxQueue. Currently nothing needs to be done.
38  */
39 /* ARGSUSED */
40 int
41 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
42 {
43 	return (0);
44 }
45 
46 /*
47  * Finish a TxQueue by freeing all pending Tx.
48  */
49 void
50 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
51 {
52 	unsigned int i;
53 
54 	ASSERT(!dp->devEnabled);
55 
56 	for (i = 0; i < txq->cmdRing.size; i++) {
57 		mblk_t *mp = txq->metaRing[i].mp;
58 		if (mp) {
59 			freemsg(mp);
60 		}
61 	}
62 }
63 
64 /*
65  * Build the offload context of a msg.
66  *
67  * Returns:
68  *	0 if everything went well.
69  *	+n if n bytes need to be pulled up.
70  *	-1 in case of error (not used).
71  */
72 static int
73 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol,
74     mblk_t *mp)
75 {
76 	int ret = 0;
77 	uint32_t start, stuff, value, flags, lso_flag, mss;
78 
79 	ol->om = VMXNET3_OM_NONE;
80 	ol->hlen = 0;
81 	ol->msscof = 0;
82 
83 	mac_hcksum_get(mp, &start, &stuff, NULL, &value, &flags);
84 
85 	mac_lso_get(mp, &mss, &lso_flag);
86 
87 	if (flags || lso_flag) {
88 		struct ether_vlan_header *eth = (void *) mp->b_rptr;
89 		uint8_t ethLen;
90 
91 		if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) {
92 			ethLen = sizeof (struct ether_vlan_header);
93 		} else {
94 			ethLen = sizeof (struct ether_header);
95 		}
96 
97 		VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, "
98 		    "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value);
99 
100 		if (lso_flag & HW_LSO) {
101 			mblk_t *mblk = mp;
102 			uint8_t *ip, *tcp;
103 			uint8_t ipLen, tcpLen;
104 
105 			/*
106 			 * Copy e1000g's behavior:
107 			 * - Do not assume all the headers are in the same mblk.
108 			 * - Assume each header is always within one mblk.
109 			 * - Assume the ethernet header is in the first mblk.
110 			 */
111 			ip = mblk->b_rptr + ethLen;
112 			if (ip >= mblk->b_wptr) {
113 				mblk = mblk->b_cont;
114 				ip = mblk->b_rptr;
115 			}
116 			ipLen = IPH_HDR_LENGTH((ipha_t *)ip);
117 			tcp = ip + ipLen;
118 			if (tcp >= mblk->b_wptr) {
119 				mblk = mblk->b_cont;
120 				tcp = mblk->b_rptr;
121 			}
122 			tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp);
123 			/* Careful, '>' instead of '>=' here */
124 			if (tcp + tcpLen > mblk->b_wptr) {
125 				mblk = mblk->b_cont;
126 			}
127 
128 			ol->om = VMXNET3_OM_TSO;
129 			ol->hlen = ethLen + ipLen + tcpLen;
130 			ol->msscof = mss;
131 
132 			if (mblk != mp) {
133 				ret = ol->hlen;
134 			}
135 		} else if (flags & HCK_PARTIALCKSUM) {
136 			ol->om = VMXNET3_OM_CSUM;
137 			ol->hlen = start + ethLen;
138 			ol->msscof = stuff + ethLen;
139 		}
140 	}
141 
142 	return (ret);
143 }
144 
145 /*
146  * Map a msg into the Tx command ring of a vmxnet3 device.
147  *
148  * Returns:
149  *	VMXNET3_TX_OK if everything went well.
150  *	VMXNET3_TX_RINGFULL if the ring is nearly full.
151  *	VMXNET3_TX_PULLUP if the msg is overfragmented.
152  *	VMXNET3_TX_FAILURE if there was a DMA or offload error.
153  *
154  * Side effects:
155  *	The ring is filled if VMXNET3_TX_OK is returned.
156  */
157 static vmxnet3_txstatus
158 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq,
159     vmxnet3_offload_t *ol, mblk_t *mp)
160 {
161 	int ret = VMXNET3_TX_OK;
162 	unsigned int frags = 0, totLen = 0;
163 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
164 	Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
165 	Vmxnet3_GenericDesc *txDesc;
166 	uint16_t sopIdx, eopIdx;
167 	uint8_t sopGen, curGen;
168 	mblk_t *mblk;
169 
170 	mutex_enter(&dp->txLock);
171 
172 	sopIdx = eopIdx = cmdRing->next2fill;
173 	sopGen = cmdRing->gen;
174 	curGen = !cmdRing->gen;
175 
176 	for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) {
177 		unsigned int len = MBLKL(mblk);
178 		ddi_dma_cookie_t cookie;
179 		uint_t cookieCount;
180 
181 		if (len) {
182 			totLen += len;
183 		} else {
184 			continue;
185 		}
186 
187 		if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL,
188 		    (caddr_t)mblk->b_rptr, len,
189 		    DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL,
190 		    &cookie, &cookieCount) != DDI_DMA_MAPPED) {
191 			VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n");
192 			ret = VMXNET3_TX_FAILURE;
193 			goto error;
194 		}
195 
196 		ASSERT(cookieCount);
197 
198 		do {
199 			uint64_t addr = cookie.dmac_laddress;
200 			size_t len = cookie.dmac_size;
201 
202 			do {
203 				uint32_t dw2, dw3;
204 				size_t chunkLen;
205 
206 				ASSERT(!txq->metaRing[eopIdx].mp);
207 				ASSERT(cmdRing->avail - frags);
208 
209 				if (frags >= cmdRing->size - 1 ||
210 				    (ol->om != VMXNET3_OM_TSO &&
211 				    frags >= VMXNET3_MAX_TXD_PER_PKT)) {
212 					VMXNET3_DEBUG(dp, 2,
213 					    "overfragmented mp (%u)\n", frags);
214 					(void) ddi_dma_unbind_handle(
215 					    dp->txDmaHandle);
216 					ret = VMXNET3_TX_PULLUP;
217 					goto error;
218 				}
219 				if (cmdRing->avail - frags <= 1) {
220 					dp->txMustResched = B_TRUE;
221 					(void) ddi_dma_unbind_handle(
222 					    dp->txDmaHandle);
223 					ret = VMXNET3_TX_RINGFULL;
224 					goto error;
225 				}
226 
227 				if (len > VMXNET3_MAX_TX_BUF_SIZE) {
228 					chunkLen = VMXNET3_MAX_TX_BUF_SIZE;
229 				} else {
230 					chunkLen = len;
231 				}
232 
233 				frags++;
234 				eopIdx = cmdRing->next2fill;
235 
236 				txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
237 				ASSERT(txDesc->txd.gen != cmdRing->gen);
238 
239 				/* txd.addr */
240 				txDesc->txd.addr = addr;
241 				/* txd.dw2 */
242 				dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ?
243 				    0 : chunkLen;
244 				dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT;
245 				txDesc->dword[2] = dw2;
246 				ASSERT(txDesc->txd.len == len ||
247 				    txDesc->txd.len == 0);
248 				/* txd.dw3 */
249 				dw3 = 0;
250 				txDesc->dword[3] = dw3;
251 
252 				VMXNET3_INC_RING_IDX(cmdRing,
253 				    cmdRing->next2fill);
254 				curGen = cmdRing->gen;
255 
256 				addr += chunkLen;
257 				len -= chunkLen;
258 			} while (len);
259 
260 			if (--cookieCount) {
261 				ddi_dma_nextcookie(dp->txDmaHandle, &cookie);
262 			}
263 		} while (cookieCount);
264 
265 		(void) ddi_dma_unbind_handle(dp->txDmaHandle);
266 	}
267 
268 	/* Update the EOP descriptor */
269 	txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
270 	txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP;
271 
272 	/* Update the SOP descriptor. Must be done last */
273 	txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx);
274 	if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 &&
275 	    txDesc->txd.len < ol->hlen) {
276 		ret = VMXNET3_TX_FAILURE;
277 		goto error;
278 	}
279 	txDesc->txd.om = ol->om;
280 	txDesc->txd.hlen = ol->hlen;
281 	txDesc->txd.msscof = ol->msscof;
282 	membar_producer();
283 	txDesc->txd.gen = sopGen;
284 
285 	/* Update the meta ring & metadata */
286 	txq->metaRing[sopIdx].mp = mp;
287 	txq->metaRing[eopIdx].sopIdx = sopIdx;
288 	txq->metaRing[eopIdx].frags = frags;
289 	cmdRing->avail -= frags;
290 	if (ol->om == VMXNET3_OM_TSO) {
291 		txqCtrl->txNumDeferred +=
292 		    (totLen - ol->hlen + ol->msscof - 1) / ol->msscof;
293 	} else {
294 		txqCtrl->txNumDeferred++;
295 	}
296 
297 	VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", (void *)mp, sopIdx,
298 	    eopIdx);
299 
300 	goto done;
301 
302 error:
303 	/* Reverse the generation bits */
304 	while (sopIdx != cmdRing->next2fill) {
305 		VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
306 		txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill);
307 		txDesc->txd.gen = !cmdRing->gen;
308 	}
309 
310 done:
311 	mutex_exit(&dp->txLock);
312 
313 	return (ret);
314 }
315 
316 /*
317  * Send packets on a vmxnet3 device.
318  *
319  * Returns:
320  *	NULL in case of success or failure.
321  *	The mps to be retransmitted later if the ring is full.
322  */
323 mblk_t *
324 vmxnet3_tx(void *data, mblk_t *mps)
325 {
326 	vmxnet3_softc_t *dp = data;
327 	vmxnet3_txqueue_t *txq = &dp->txQueue;
328 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
329 	Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
330 	vmxnet3_txstatus status = VMXNET3_TX_OK;
331 	mblk_t *mp;
332 
333 	ASSERT(mps != NULL);
334 
335 	do {
336 		vmxnet3_offload_t ol;
337 		int pullup;
338 
339 		mp = mps;
340 		mps = mp->b_next;
341 		mp->b_next = NULL;
342 
343 		if (DB_TYPE(mp) != M_DATA) {
344 			/*
345 			 * PR #315560: M_PROTO mblks could be passed for
346 			 * some reason. Drop them because we don't understand
347 			 * them and because their contents are not Ethernet
348 			 * frames anyway.
349 			 */
350 			ASSERT(B_FALSE);
351 			freemsg(mp);
352 			continue;
353 		}
354 
355 		/*
356 		 * Prepare the offload while we're still handling the original
357 		 * message -- msgpullup() discards the metadata afterwards.
358 		 */
359 		pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp);
360 		if (pullup) {
361 			mblk_t *new_mp = msgpullup(mp, pullup);
362 			atomic_inc_32(&dp->tx_pullup_needed);
363 			freemsg(mp);
364 			if (new_mp) {
365 				mp = new_mp;
366 			} else {
367 				atomic_inc_32(&dp->tx_pullup_failed);
368 				continue;
369 			}
370 		}
371 
372 		/*
373 		 * Try to map the message in the Tx ring.
374 		 * This call might fail for non-fatal reasons.
375 		 */
376 		status = vmxnet3_tx_one(dp, txq, &ol, mp);
377 		if (status == VMXNET3_TX_PULLUP) {
378 			/*
379 			 * Try one more time after flattening
380 			 * the message with msgpullup().
381 			 */
382 			if (mp->b_cont != NULL) {
383 				mblk_t *new_mp = msgpullup(mp, -1);
384 				atomic_inc_32(&dp->tx_pullup_needed);
385 				freemsg(mp);
386 				if (new_mp) {
387 					mp = new_mp;
388 					status = vmxnet3_tx_one(dp, txq, &ol,
389 					    mp);
390 				} else {
391 					atomic_inc_32(&dp->tx_pullup_failed);
392 					continue;
393 				}
394 			}
395 		}
396 		if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) {
397 			/* Fatal failure, drop it */
398 			atomic_inc_32(&dp->tx_error);
399 			freemsg(mp);
400 		}
401 	} while (mps && status != VMXNET3_TX_RINGFULL);
402 
403 	if (status == VMXNET3_TX_RINGFULL) {
404 		atomic_inc_32(&dp->tx_ring_full);
405 		mp->b_next = mps;
406 		mps = mp;
407 	} else {
408 		ASSERT(!mps);
409 	}
410 
411 	/* Notify the device */
412 	mutex_enter(&dp->txLock);
413 	if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) {
414 		txqCtrl->txNumDeferred = 0;
415 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill);
416 	}
417 	mutex_exit(&dp->txLock);
418 
419 	return (mps);
420 }
421 
422 /*
423  * Parse a transmit queue and complete packets.
424  *
425  * Returns:
426  *	B_TRUE if Tx must be updated or B_FALSE if no action is required.
427  */
428 boolean_t
429 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
430 {
431 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
432 	vmxnet3_compring_t *compRing = &txq->compRing;
433 	Vmxnet3_GenericDesc *compDesc;
434 	boolean_t completedTx = B_FALSE;
435 	boolean_t ret = B_FALSE;
436 
437 	mutex_enter(&dp->txLock);
438 
439 	compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
440 	while (compDesc->tcd.gen == compRing->gen) {
441 		vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc;
442 		uint16_t sopIdx, eopIdx;
443 		mblk_t *mp;
444 
445 		eopIdx = compDesc->tcd.txdIdx;
446 		eopMetaDesc = &txq->metaRing[eopIdx];
447 		sopIdx = eopMetaDesc->sopIdx;
448 		sopMetaDesc = &txq->metaRing[sopIdx];
449 
450 		ASSERT(eopMetaDesc->frags);
451 		cmdRing->avail += eopMetaDesc->frags;
452 
453 		ASSERT(sopMetaDesc->mp);
454 		mp = sopMetaDesc->mp;
455 		freemsg(mp);
456 
457 		eopMetaDesc->sopIdx = 0;
458 		eopMetaDesc->frags = 0;
459 		sopMetaDesc->mp = NULL;
460 
461 		completedTx = B_TRUE;
462 
463 		VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", (void *)mp, sopIdx,
464 		    eopIdx);
465 
466 		VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
467 		compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
468 	}
469 
470 	if (dp->txMustResched && completedTx) {
471 		dp->txMustResched = B_FALSE;
472 		ret = B_TRUE;
473 	}
474 
475 	mutex_exit(&dp->txLock);
476 
477 	return (ret);
478 }
479