xref: /illumos-gate/usr/src/uts/intel/io/vmxnet3s/vmxnet3_tx.c (revision c160bf3613805cfb4a89a0433ae896d3594f551f)
1 /*
2  * Copyright (C) 2007 VMware, Inc. All rights reserved.
3  *
4  * The contents of this file are subject to the terms of the Common
5  * Development and Distribution License (the "License") version 1.0
6  * and no later version.  You may not use this file except in
7  * compliance with the License.
8  *
9  * You can obtain a copy of the License at
10  *         http://www.opensource.org/licenses/cddl1.php
11  *
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  */
15 
16 /*
17  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
18  */
19 
20 #include <vmxnet3.h>
21 
22 typedef enum vmxnet3_txstatus {
23 	VMXNET3_TX_OK,
24 	VMXNET3_TX_FAILURE,
25 	VMXNET3_TX_PULLUP,
26 	VMXNET3_TX_RINGFULL
27 } vmxnet3_txstatus;
28 
29 typedef struct vmxnet3_offload_t {
30 	uint16_t om;
31 	uint16_t hlen;
32 	uint16_t msscof;
33 } vmxnet3_offload_t;
34 
35 /*
36  * Initialize a TxQueue. Currently nothing needs to be done.
37  */
38 /* ARGSUSED */
39 int
40 vmxnet3_txqueue_init(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
41 {
42 	return (0);
43 }
44 
45 /*
46  * Finish a TxQueue by freeing all pending Tx.
47  */
48 void
49 vmxnet3_txqueue_fini(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
50 {
51 	unsigned int i;
52 
53 	ASSERT(!dp->devEnabled);
54 
55 	for (i = 0; i < txq->cmdRing.size; i++) {
56 		mblk_t *mp = txq->metaRing[i].mp;
57 		if (mp) {
58 			freemsg(mp);
59 		}
60 	}
61 }
62 
63 /*
64  * Build the offload context of a msg.
65  *
66  * Returns:
67  *	0 if everything went well.
68  *	+n if n bytes need to be pulled up.
69  *	-1 in case of error (not used).
70  */
71 static int
72 vmxnet3_tx_prepare_offload(vmxnet3_softc_t *dp, vmxnet3_offload_t *ol,
73     mblk_t *mp)
74 {
75 	int ret = 0;
76 	uint32_t start, stuff, value, flags, lso_flag, mss;
77 
78 	ol->om = VMXNET3_OM_NONE;
79 	ol->hlen = 0;
80 	ol->msscof = 0;
81 
82 	hcksum_retrieve(mp, NULL, NULL, &start, &stuff, NULL, &value, &flags);
83 
84 	mac_lso_get(mp, &mss, &lso_flag);
85 
86 	if (flags || lso_flag) {
87 		struct ether_vlan_header *eth = (void *) mp->b_rptr;
88 		uint8_t ethLen;
89 
90 		if (eth->ether_tpid == htons(ETHERTYPE_VLAN)) {
91 			ethLen = sizeof (struct ether_vlan_header);
92 		} else {
93 			ethLen = sizeof (struct ether_header);
94 		}
95 
96 		VMXNET3_DEBUG(dp, 4, "flags=0x%x, ethLen=%u, start=%u, "
97 		    "stuff=%u, value=%u\n", flags, ethLen, start, stuff, value);
98 
99 		if (lso_flag & HW_LSO) {
100 			mblk_t *mblk = mp;
101 			uint8_t *ip, *tcp;
102 			uint8_t ipLen, tcpLen;
103 
104 			/*
105 			 * Copy e1000g's behavior:
106 			 * - Do not assume all the headers are in the same mblk.
107 			 * - Assume each header is always within one mblk.
108 			 * - Assume the ethernet header is in the first mblk.
109 			 */
110 			ip = mblk->b_rptr + ethLen;
111 			if (ip >= mblk->b_wptr) {
112 				mblk = mblk->b_cont;
113 				ip = mblk->b_rptr;
114 			}
115 			ipLen = IPH_HDR_LENGTH((ipha_t *)ip);
116 			tcp = ip + ipLen;
117 			if (tcp >= mblk->b_wptr) {
118 				mblk = mblk->b_cont;
119 				tcp = mblk->b_rptr;
120 			}
121 			tcpLen = TCP_HDR_LENGTH((tcph_t *)tcp);
122 			/* Careful, '>' instead of '>=' here */
123 			if (tcp + tcpLen > mblk->b_wptr) {
124 				mblk = mblk->b_cont;
125 			}
126 
127 			ol->om = VMXNET3_OM_TSO;
128 			ol->hlen = ethLen + ipLen + tcpLen;
129 			ol->msscof = mss;
130 
131 			if (mblk != mp) {
132 				ret = ol->hlen;
133 			}
134 		} else if (flags & HCK_PARTIALCKSUM) {
135 			ol->om = VMXNET3_OM_CSUM;
136 			ol->hlen = start + ethLen;
137 			ol->msscof = stuff + ethLen;
138 		}
139 	}
140 
141 	return (ret);
142 }
143 
144 /*
145  * Map a msg into the Tx command ring of a vmxnet3 device.
146  *
147  * Returns:
148  *	VMXNET3_TX_OK if everything went well.
149  *	VMXNET3_TX_RINGFULL if the ring is nearly full.
150  *	VMXNET3_TX_PULLUP if the msg is overfragmented.
151  *	VMXNET3_TX_FAILURE if there was a DMA or offload error.
152  *
153  * Side effects:
154  *	The ring is filled if VMXNET3_TX_OK is returned.
155  */
156 static vmxnet3_txstatus
157 vmxnet3_tx_one(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq,
158     vmxnet3_offload_t *ol, mblk_t *mp)
159 {
160 	int ret = VMXNET3_TX_OK;
161 	unsigned int frags = 0, totLen = 0;
162 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
163 	Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
164 	Vmxnet3_GenericDesc *txDesc;
165 	uint16_t sopIdx, eopIdx;
166 	uint8_t sopGen, curGen;
167 	mblk_t *mblk;
168 
169 	mutex_enter(&dp->txLock);
170 
171 	sopIdx = eopIdx = cmdRing->next2fill;
172 	sopGen = cmdRing->gen;
173 	curGen = !cmdRing->gen;
174 
175 	for (mblk = mp; mblk != NULL; mblk = mblk->b_cont) {
176 		unsigned int len = MBLKL(mblk);
177 		ddi_dma_cookie_t cookie;
178 		uint_t cookieCount;
179 
180 		if (len) {
181 			totLen += len;
182 		} else {
183 			continue;
184 		}
185 
186 		if (ddi_dma_addr_bind_handle(dp->txDmaHandle, NULL,
187 		    (caddr_t)mblk->b_rptr, len,
188 		    DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, NULL,
189 		    &cookie, &cookieCount) != DDI_DMA_MAPPED) {
190 			VMXNET3_WARN(dp, "ddi_dma_addr_bind_handle() failed\n");
191 			ret = VMXNET3_TX_FAILURE;
192 			goto error;
193 		}
194 
195 		ASSERT(cookieCount);
196 
197 		do {
198 			uint64_t addr = cookie.dmac_laddress;
199 			size_t len = cookie.dmac_size;
200 
201 			do {
202 				uint32_t dw2, dw3;
203 				size_t chunkLen;
204 
205 				ASSERT(!txq->metaRing[eopIdx].mp);
206 				ASSERT(cmdRing->avail - frags);
207 
208 				if (frags >= cmdRing->size - 1 ||
209 				    (ol->om != VMXNET3_OM_TSO &&
210 				    frags >= VMXNET3_MAX_TXD_PER_PKT)) {
211 					VMXNET3_DEBUG(dp, 2,
212 					    "overfragmented mp (%u)\n", frags);
213 					(void) ddi_dma_unbind_handle(
214 					    dp->txDmaHandle);
215 					ret = VMXNET3_TX_PULLUP;
216 					goto error;
217 				}
218 				if (cmdRing->avail - frags <= 1) {
219 					dp->txMustResched = B_TRUE;
220 					(void) ddi_dma_unbind_handle(
221 					    dp->txDmaHandle);
222 					ret = VMXNET3_TX_RINGFULL;
223 					goto error;
224 				}
225 
226 				if (len > VMXNET3_MAX_TX_BUF_SIZE) {
227 					chunkLen = VMXNET3_MAX_TX_BUF_SIZE;
228 				} else {
229 					chunkLen = len;
230 				}
231 
232 				frags++;
233 				eopIdx = cmdRing->next2fill;
234 
235 				txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
236 				ASSERT(txDesc->txd.gen != cmdRing->gen);
237 
238 				/* txd.addr */
239 				txDesc->txd.addr = addr;
240 				/* txd.dw2 */
241 				dw2 = chunkLen == VMXNET3_MAX_TX_BUF_SIZE ?
242 				    0 : chunkLen;
243 				dw2 |= curGen << VMXNET3_TXD_GEN_SHIFT;
244 				txDesc->dword[2] = dw2;
245 				ASSERT(txDesc->txd.len == len ||
246 				    txDesc->txd.len == 0);
247 				/* txd.dw3 */
248 				dw3 = 0;
249 				txDesc->dword[3] = dw3;
250 
251 				VMXNET3_INC_RING_IDX(cmdRing,
252 				    cmdRing->next2fill);
253 				curGen = cmdRing->gen;
254 
255 				addr += chunkLen;
256 				len -= chunkLen;
257 			} while (len);
258 
259 			if (--cookieCount) {
260 				ddi_dma_nextcookie(dp->txDmaHandle, &cookie);
261 			}
262 		} while (cookieCount);
263 
264 		(void) ddi_dma_unbind_handle(dp->txDmaHandle);
265 	}
266 
267 	/* Update the EOP descriptor */
268 	txDesc = VMXNET3_GET_DESC(cmdRing, eopIdx);
269 	txDesc->dword[3] |= VMXNET3_TXD_CQ | VMXNET3_TXD_EOP;
270 
271 	/* Update the SOP descriptor. Must be done last */
272 	txDesc = VMXNET3_GET_DESC(cmdRing, sopIdx);
273 	if (ol->om == VMXNET3_OM_TSO && txDesc->txd.len != 0 &&
274 	    txDesc->txd.len < ol->hlen) {
275 		ret = VMXNET3_TX_FAILURE;
276 		goto error;
277 	}
278 	txDesc->txd.om = ol->om;
279 	txDesc->txd.hlen = ol->hlen;
280 	txDesc->txd.msscof = ol->msscof;
281 	membar_producer();
282 	txDesc->txd.gen = sopGen;
283 
284 	/* Update the meta ring & metadata */
285 	txq->metaRing[sopIdx].mp = mp;
286 	txq->metaRing[eopIdx].sopIdx = sopIdx;
287 	txq->metaRing[eopIdx].frags = frags;
288 	cmdRing->avail -= frags;
289 	if (ol->om == VMXNET3_OM_TSO) {
290 		txqCtrl->txNumDeferred +=
291 		    (totLen - ol->hlen + ol->msscof - 1) / ol->msscof;
292 	} else {
293 		txqCtrl->txNumDeferred++;
294 	}
295 
296 	VMXNET3_DEBUG(dp, 3, "tx 0x%p on [%u;%u]\n", mp, sopIdx, eopIdx);
297 
298 	goto done;
299 
300 error:
301 	/* Reverse the generation bits */
302 	while (sopIdx != cmdRing->next2fill) {
303 		VMXNET3_DEC_RING_IDX(cmdRing, cmdRing->next2fill);
304 		txDesc = VMXNET3_GET_DESC(cmdRing, cmdRing->next2fill);
305 		txDesc->txd.gen = !cmdRing->gen;
306 	}
307 
308 done:
309 	mutex_exit(&dp->txLock);
310 
311 	return (ret);
312 }
313 
314 /*
315  * Send packets on a vmxnet3 device.
316  *
317  * Returns:
318  *	NULL in case of success or failure.
319  *	The mps to be retransmitted later if the ring is full.
320  */
321 mblk_t *
322 vmxnet3_tx(void *data, mblk_t *mps)
323 {
324 	vmxnet3_softc_t *dp = data;
325 	vmxnet3_txqueue_t *txq = &dp->txQueue;
326 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
327 	Vmxnet3_TxQueueCtrl *txqCtrl = txq->sharedCtrl;
328 	vmxnet3_txstatus status = VMXNET3_TX_OK;
329 	mblk_t *mp;
330 
331 	ASSERT(mps != NULL);
332 
333 	do {
334 		vmxnet3_offload_t ol;
335 		int pullup;
336 
337 		mp = mps;
338 		mps = mp->b_next;
339 		mp->b_next = NULL;
340 
341 		if (DB_TYPE(mp) != M_DATA) {
342 			/*
343 			 * PR #315560: M_PROTO mblks could be passed for
344 			 * some reason. Drop them because we don't understand
345 			 * them and because their contents are not Ethernet
346 			 * frames anyway.
347 			 */
348 			ASSERT(B_FALSE);
349 			freemsg(mp);
350 			continue;
351 		}
352 
353 		/*
354 		 * Prepare the offload while we're still handling the original
355 		 * message -- msgpullup() discards the metadata afterwards.
356 		 */
357 		pullup = vmxnet3_tx_prepare_offload(dp, &ol, mp);
358 		if (pullup) {
359 			mblk_t *new_mp = msgpullup(mp, pullup);
360 			atomic_inc_32(&dp->tx_pullup_needed);
361 			freemsg(mp);
362 			if (new_mp) {
363 				mp = new_mp;
364 			} else {
365 				atomic_inc_32(&dp->tx_pullup_failed);
366 				continue;
367 			}
368 		}
369 
370 		/*
371 		 * Try to map the message in the Tx ring.
372 		 * This call might fail for non-fatal reasons.
373 		 */
374 		status = vmxnet3_tx_one(dp, txq, &ol, mp);
375 		if (status == VMXNET3_TX_PULLUP) {
376 			/*
377 			 * Try one more time after flattening
378 			 * the message with msgpullup().
379 			 */
380 			if (mp->b_cont != NULL) {
381 				mblk_t *new_mp = msgpullup(mp, -1);
382 				atomic_inc_32(&dp->tx_pullup_needed);
383 				freemsg(mp);
384 				if (new_mp) {
385 					mp = new_mp;
386 					status = vmxnet3_tx_one(dp, txq, &ol,
387 					    mp);
388 				} else {
389 					atomic_inc_32(&dp->tx_pullup_failed);
390 					continue;
391 				}
392 			}
393 		}
394 		if (status != VMXNET3_TX_OK && status != VMXNET3_TX_RINGFULL) {
395 			/* Fatal failure, drop it */
396 			atomic_inc_32(&dp->tx_error);
397 			freemsg(mp);
398 		}
399 	} while (mps && status != VMXNET3_TX_RINGFULL);
400 
401 	if (status == VMXNET3_TX_RINGFULL) {
402 		atomic_inc_32(&dp->tx_ring_full);
403 		mp->b_next = mps;
404 		mps = mp;
405 	} else {
406 		ASSERT(!mps);
407 	}
408 
409 	/* Notify the device */
410 	mutex_enter(&dp->txLock);
411 	if (txqCtrl->txNumDeferred >= txqCtrl->txThreshold) {
412 		txqCtrl->txNumDeferred = 0;
413 		VMXNET3_BAR0_PUT32(dp, VMXNET3_REG_TXPROD, cmdRing->next2fill);
414 	}
415 	mutex_exit(&dp->txLock);
416 
417 	return (mps);
418 }
419 
420 /*
421  * Parse a transmit queue and complete packets.
422  *
423  * Returns:
424  *	B_TRUE if Tx must be updated or B_FALSE if no action is required.
425  */
426 boolean_t
427 vmxnet3_tx_complete(vmxnet3_softc_t *dp, vmxnet3_txqueue_t *txq)
428 {
429 	vmxnet3_cmdring_t *cmdRing = &txq->cmdRing;
430 	vmxnet3_compring_t *compRing = &txq->compRing;
431 	Vmxnet3_GenericDesc *compDesc;
432 	boolean_t completedTx = B_FALSE;
433 	boolean_t ret = B_FALSE;
434 
435 	mutex_enter(&dp->txLock);
436 
437 	compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
438 	while (compDesc->tcd.gen == compRing->gen) {
439 		vmxnet3_metatx_t *sopMetaDesc, *eopMetaDesc;
440 		uint16_t sopIdx, eopIdx;
441 		mblk_t *mp;
442 
443 		eopIdx = compDesc->tcd.txdIdx;
444 		eopMetaDesc = &txq->metaRing[eopIdx];
445 		sopIdx = eopMetaDesc->sopIdx;
446 		sopMetaDesc = &txq->metaRing[sopIdx];
447 
448 		ASSERT(eopMetaDesc->frags);
449 		cmdRing->avail += eopMetaDesc->frags;
450 
451 		ASSERT(sopMetaDesc->mp);
452 		mp = sopMetaDesc->mp;
453 		freemsg(mp);
454 
455 		eopMetaDesc->sopIdx = 0;
456 		eopMetaDesc->frags = 0;
457 		sopMetaDesc->mp = NULL;
458 
459 		completedTx = B_TRUE;
460 
461 		VMXNET3_DEBUG(dp, 3, "cp 0x%p on [%u;%u]\n", mp, sopIdx,
462 		    eopIdx);
463 
464 		VMXNET3_INC_RING_IDX(compRing, compRing->next2comp);
465 		compDesc = VMXNET3_GET_DESC(compRing, compRing->next2comp);
466 	}
467 
468 	if (dp->txMustResched && completedTx) {
469 		dp->txMustResched = B_FALSE;
470 		ret = B_TRUE;
471 	}
472 
473 	mutex_exit(&dp->txLock);
474 
475 	return (ret);
476 }
477