xref: /illumos-gate/usr/src/uts/common/io/ena/ena_tx.c (revision ddb365bfc9e868ad24ccdcb0dc91af18b10df082)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2021 Oxide Computer Company
14  */
15 #include "ena.h"
16 
17 void
18 ena_free_tx_dma(ena_txq_t *txq)
19 {
20 	if (txq->et_tcbs != NULL) {
21 		for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
22 			ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
23 			ena_dma_free(&tcb->etcb_dma);
24 		}
25 
26 		kmem_free(txq->et_tcbs,
27 		    sizeof (*txq->et_tcbs) * txq->et_sq_num_descs);
28 
29 		txq->et_tcbs = NULL;
30 
31 	}
32 
33 	ena_dma_free(&txq->et_cq_dma);
34 	txq->et_cq_descs = NULL;
35 
36 	ena_dma_free(&txq->et_sq_dma);
37 	txq->et_sq_descs = NULL;
38 
39 	txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC;
40 }
41 
42 static int
43 ena_alloc_tx_dma(ena_txq_t *txq)
44 {
45 	ena_t *ena = txq->et_ena;
46 	size_t cq_descs_sz;
47 	size_t sq_descs_sz;
48 	int err = 0;
49 	ena_dma_conf_t conf;
50 
51 	ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
52 	ASSERT3P(ena, !=, NULL);
53 
54 	cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs);
55 	sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs);
56 
57 	conf = (ena_dma_conf_t) {
58 		.edc_size = sq_descs_sz,
59 		.edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT,
60 		.edc_sgl = 1,
61 		.edc_endian = DDI_NEVERSWAP_ACC,
62 		.edc_stream = B_FALSE,
63 	};
64 
65 	if (!ena_dma_alloc(ena, &txq->et_sq_dma, &conf, sq_descs_sz)) {
66 		return (ENOMEM);
67 	}
68 
69 	bzero(txq->et_sq_dma.edb_va, sq_descs_sz);
70 	txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va;
71 	txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) *
72 	    txq->et_sq_num_descs, KM_SLEEP);
73 
74 	for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
75 		ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
76 		ena_dma_conf_t buf_conf = {
77 			.edc_size = ena->ena_tx_buf_sz,
78 			.edc_align = 1,
79 			.edc_sgl = ena->ena_tx_sgl_max_sz,
80 			.edc_endian = DDI_NEVERSWAP_ACC,
81 			.edc_stream = B_TRUE,
82 		};
83 
84 		if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf,
85 		    ena->ena_tx_buf_sz)) {
86 			err = ENOMEM;
87 			goto error;
88 		}
89 	}
90 
91 	conf = (ena_dma_conf_t) {
92 		.edc_size = cq_descs_sz,
93 		.edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT,
94 		.edc_sgl = 1,
95 		.edc_endian = DDI_NEVERSWAP_ACC,
96 		.edc_stream = B_FALSE,
97 	};
98 
99 	if (!ena_dma_alloc(ena, &txq->et_cq_dma, &conf, cq_descs_sz)) {
100 		err = ENOMEM;
101 		goto error;
102 	}
103 
104 	bzero(txq->et_cq_dma.edb_va, cq_descs_sz);
105 	txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va;
106 	txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC;
107 	return (0);
108 
109 error:
110 	ena_free_tx_dma(txq);
111 	return (err);
112 }
113 
114 boolean_t
115 ena_alloc_txq(ena_txq_t *txq)
116 {
117 	int ret = 0;
118 	ena_t *ena = txq->et_ena;
119 	uint16_t cq_hw_idx, sq_hw_idx;
120 	uint32_t *cq_unmask_addr, *cq_headdb, *cq_numanode;
121 	uint32_t *sq_db_addr;
122 
123 	ASSERT3U(txq->et_cq_num_descs, >, 0);
124 
125 	/*
126 	 * First, allocate the Tx data buffers.
127 	 */
128 	if ((ret = ena_alloc_tx_dma(txq)) != 0) {
129 		ena_err(ena, "failed to allocate Tx queue %u data buffers: %d",
130 		    txq->et_txqs_idx, ret);
131 		return (B_FALSE);
132 	}
133 
134 	ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
135 
136 	/*
137 	 * Second, create the Completion Queue.
138 	 */
139 	ret = ena_create_cq(ena, txq->et_cq_num_descs,
140 	    txq->et_cq_dma.edb_cookie->dmac_laddress, B_TRUE,
141 	    txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_headdb,
142 	    &cq_numanode);
143 
144 	if (ret != 0) {
145 		ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx,
146 		    ret);
147 		return (B_FALSE);
148 	}
149 
150 	txq->et_cq_hw_idx = cq_hw_idx;
151 	txq->et_cq_phase = 1;
152 	txq->et_cq_unmask_addr = cq_unmask_addr;
153 	txq->et_cq_head_db_addr = cq_headdb;
154 	txq->et_cq_numa_addr = cq_numanode;
155 	txq->et_state |= ENA_TXQ_STATE_CQ_CREATED;
156 
157 	/*
158 	 * Third, create the Submission Queue to match with the above
159 	 * CQ. At this time we force the SQ and CQ to have the same
160 	 * number of descriptors as we only use a 1:1 completion
161 	 * policy. However, in the future, we could loosen this and
162 	 * use an on-demand completion policy and the two could have a
163 	 * different number of descriptors.
164 	 */
165 	ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs);
166 
167 	ret = ena_create_sq(ena, txq->et_sq_num_descs,
168 	    txq->et_sq_dma.edb_cookie->dmac_laddress, B_TRUE, cq_hw_idx,
169 	    &sq_hw_idx, &sq_db_addr);
170 
171 	if (ret != 0) {
172 		ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx,
173 		    ret);
174 		return (B_FALSE);
175 	}
176 
177 	txq->et_sq_hw_idx = sq_hw_idx;
178 	txq->et_sq_db_addr = sq_db_addr;
179 	/* The phase must always start on 1. */
180 	txq->et_sq_phase = 1;
181 	txq->et_sq_avail_descs = txq->et_sq_num_descs;
182 	txq->et_blocked = B_FALSE;
183 	txq->et_state |= ENA_TXQ_STATE_SQ_CREATED;
184 
185 	return (B_TRUE);
186 }
187 
188 void
189 ena_cleanup_txq(ena_txq_t *txq)
190 {
191 	int ret = 0;
192 	ena_t *ena = txq->et_ena;
193 
194 	if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) {
195 		ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, B_TRUE);
196 
197 		if (ret != 0) {
198 			ena_err(ena, "failed to destroy Tx SQ %u: %d",
199 			    txq->et_txqs_idx, ret);
200 		}
201 
202 		txq->et_sq_hw_idx = 0;
203 		txq->et_sq_db_addr = NULL;
204 		txq->et_sq_tail_idx = 0;
205 		txq->et_sq_phase = 0;
206 		txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED;
207 	}
208 
209 	if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) {
210 		ret = ena_destroy_cq(ena, txq->et_cq_hw_idx);
211 
212 		if (ret != 0) {
213 			ena_err(ena, "failed to destroy Tx CQ %u: %d",
214 			    txq->et_txqs_idx, ret);
215 		}
216 
217 		txq->et_cq_hw_idx = 0;
218 		txq->et_cq_head_idx = 0;
219 		txq->et_cq_phase = 0;
220 		txq->et_cq_head_db_addr = NULL;
221 		txq->et_cq_unmask_addr = NULL;
222 		txq->et_cq_numa_addr = NULL;
223 		txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED;
224 	}
225 
226 	ena_free_tx_dma(txq);
227 	VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE);
228 }
229 
230 void
231 ena_ring_tx_stop(mac_ring_driver_t rh)
232 {
233 	ena_txq_t *txq = (ena_txq_t *)rh;
234 	uint32_t intr_ctrl;
235 
236 	intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr);
237 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
238 	ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl);
239 
240 	txq->et_state &= ~ENA_TXQ_STATE_RUNNING;
241 	txq->et_state &= ~ENA_TXQ_STATE_READY;
242 }
243 
244 int
245 ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num)
246 {
247 	ena_txq_t *txq = (ena_txq_t *)rh;
248 	ena_t *ena = txq->et_ena;
249 	uint32_t intr_ctrl;
250 
251 	mutex_enter(&txq->et_lock);
252 	txq->et_m_gen_num = gen_num;
253 	mutex_exit(&txq->et_lock);
254 
255 	txq->et_state |= ENA_TXQ_STATE_READY;
256 
257 	intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr);
258 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
259 	ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl);
260 	txq->et_state |= ENA_TXQ_STATE_RUNNING;
261 	return (0);
262 }
263 
264 static void
265 ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp,
266     const size_t off, const size_t len)
267 {
268 	const void *soff = mp->b_rptr + off;
269 	void *doff =
270 	    (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len);
271 
272 	VERIFY3U(len, >, 0);
273 	VERIFY3P(soff, >=, mp->b_rptr);
274 	VERIFY3P(soff, <=, mp->b_wptr);
275 	VERIFY3U(len, <=, MBLKL(mp));
276 	VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr);
277 	VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len);
278 
279 	bcopy(soff, doff, len);
280 	tcb->etcb_type = ENA_TCB_COPY;
281 	tcb->etcb_dma.edb_used_len += len;
282 }
283 
284 ena_tx_control_block_t *
285 ena_pull_tcb(const ena_txq_t *txq, mblk_t *mp)
286 {
287 	mblk_t *nmp = mp;
288 	ena_t *ena = txq->et_ena;
289 	ena_tx_control_block_t *tcb = NULL;
290 	const uint16_t tail_mod =
291 	    txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1);
292 
293 	ASSERT(MUTEX_HELD(&txq->et_lock));
294 	VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz);
295 
296 	while (nmp != NULL) {
297 		const size_t nmp_len = MBLKL(nmp);
298 
299 		if (nmp_len == 0) {
300 			nmp = nmp->b_cont;
301 			continue;
302 		}
303 
304 		/* For now TCB is bound to SQ desc. */
305 		if (tcb == NULL) {
306 			tcb = &txq->et_tcbs[tail_mod];
307 		}
308 
309 		ena_tx_copy_fragment(tcb, nmp, 0, nmp_len);
310 		nmp = nmp->b_cont;
311 	}
312 
313 	ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV);
314 	VERIFY3P(nmp, ==, NULL);
315 	VERIFY3P(tcb, !=, NULL);
316 	return (tcb);
317 }
318 
319 static void
320 ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb,
321     uint16_t tail, uint8_t phase, enahw_tx_data_desc_t *desc,
322     mac_ether_offload_info_t *meo, size_t mlen)
323 {
324 	VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK);
325 
326 #ifdef DEBUG
327 	/*
328 	 * If there is no header for the specific layer it will be set
329 	 * to zero, thus we elide the meoi_flags check here.
330 	 */
331 	size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen;
332 	ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len);
333 #endif
334 
335 	bzero(desc, sizeof (*desc));
336 	ENAHW_TX_DESC_FIRST_ON(desc);
337 	ENAHW_TX_DESC_LENGTH(desc, mlen);
338 	ENAHW_TX_DESC_REQID_HI(desc, tail);
339 	ENAHW_TX_DESC_REQID_LO(desc, tail);
340 	ENAHW_TX_DESC_PHASE(desc, phase);
341 	ENAHW_TX_DESC_DF_ON(desc);
342 	ENAHW_TX_DESC_LAST_ON(desc);
343 	ENAHW_TX_DESC_COMP_REQ_ON(desc);
344 	ENAHW_TX_DESC_META_DESC_OFF(desc);
345 	ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
346 	ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
347 	/*
348 	 * NOTE: Please see the block comment above
349 	 * etd_buff_addr_hi_hdr_sz to see why this is set to 0.
350 	 */
351 	ENAHW_TX_DESC_HEADER_LENGTH(desc, 0);
352 	ENAHW_TX_DESC_TSO_OFF(desc);
353 	ENAHW_TX_DESC_L3_CSUM_OFF(desc);
354 	ENAHW_TX_DESC_L4_CSUM_OFF(desc);
355 	/*
356 	 * Enabling this bit tells the device NOT to calculate the
357 	 * pseudo header checksum.
358 	 */
359 	ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc);
360 }
361 
362 static void
363 ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx)
364 {
365 	ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx);
366 }
367 
368 /*
369  * For now we do the simplest thing possible. All Tx uses bcopy to
370  * pre-allocated buffers, no checksum, no TSO, etc.
371  */
372 mblk_t *
373 ena_ring_tx(void *arg, mblk_t *mp)
374 {
375 	ena_txq_t *txq = arg;
376 	ena_t *ena = txq->et_ena;
377 	mac_ether_offload_info_t meo;
378 	enahw_tx_data_desc_t *desc;
379 	ena_tx_control_block_t *tcb;
380 	const uint16_t tail_mod =
381 	    txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1);
382 
383 	VERIFY3P(mp->b_next, ==, NULL);
384 	VERIFY(txq->et_blocked == B_FALSE);
385 
386 	/*
387 	 * The ena_state value is written by atomic operations. The
388 	 * et_state value is currently Write Once, but if that changes
389 	 * it should also be written with atomics.
390 	 */
391 	if (!(ena->ena_state & ENA_STATE_RUNNING) ||
392 	    !(txq->et_state & ENA_TXQ_STATE_RUNNING)) {
393 		freemsg(mp);
394 		return (NULL);
395 	}
396 
397 	if (mac_ether_offload_info(mp, &meo) != 0) {
398 		freemsg(mp);
399 		mutex_enter(&txq->et_stat_lock);
400 		txq->et_stat.ets_hck_meoifail.value.ui64++;
401 		mutex_exit(&txq->et_stat_lock);
402 		return (NULL);
403 	}
404 
405 	mutex_enter(&txq->et_lock);
406 
407 	/*
408 	 * For the moment there is a 1:1 mapping between Tx descs and
409 	 * Tx contexts. Currently Tx is copy only, and each context
410 	 * buffer is guaranteed to be as large as MTU + frame header,
411 	 * see ena_update_buf_sizes().
412 	 */
413 	if (txq->et_sq_avail_descs == 0) {
414 		txq->et_blocked = B_TRUE;
415 		mutex_enter(&txq->et_stat_lock);
416 		txq->et_stat.ets_blocked.value.ui64++;
417 		mutex_exit(&txq->et_stat_lock);
418 		mutex_exit(&txq->et_lock);
419 		return (mp);
420 	}
421 
422 	ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total);
423 	tcb = ena_pull_tcb(txq, mp);
424 	ASSERT3P(tcb, !=, NULL);
425 	tcb->etcb_mp = mp;
426 	txq->et_sq_avail_descs--;
427 
428 	/* Fill in the Tx descriptor. */
429 	desc = &(txq->et_sq_descs[tail_mod].etd_data);
430 	ena_fill_tx_data_desc(txq, tcb, tail_mod, txq->et_sq_phase, desc, &meo,
431 	    meo.meoi_len);
432 	DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t,
433 	    tail_mod, enahw_tx_data_desc_t *, desc);
434 
435 	/*
436 	 * Remember, we submit the raw tail value to the device, the
437 	 * hardware performs its own modulo (like we did to get
438 	 * tail_mod).
439 	 */
440 	txq->et_sq_tail_idx++;
441 	ena_submit_tx(txq, txq->et_sq_tail_idx);
442 
443 	mutex_enter(&txq->et_stat_lock);
444 	txq->et_stat.ets_packets.value.ui64++;
445 	txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len;
446 	mutex_exit(&txq->et_stat_lock);
447 
448 	if ((txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1)) == 0) {
449 		txq->et_sq_phase = !txq->et_sq_phase;
450 	}
451 
452 	mutex_exit(&txq->et_lock);
453 	return (NULL);
454 }
455 
456 void
457 ena_tx_intr_work(ena_txq_t *txq)
458 {
459 	uint16_t head_mod;
460 	enahw_tx_cdesc_t *cdesc;
461 	ena_tx_control_block_t *tcb;
462 	uint16_t req_id;
463 	uint64_t recycled = 0;
464 	boolean_t unblocked = B_FALSE;
465 
466 	mutex_enter(&txq->et_lock);
467 	head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1);
468 	ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL);
469 	cdesc = &txq->et_cq_descs[head_mod];
470 
471 	/* Recycle any completed descriptors. */
472 	while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) {
473 		mblk_t *mp;
474 
475 		/* Get the corresponding TCB. */
476 		req_id = cdesc->etc_req_id;
477 		/*
478 		 * It would be nice to make this a device reset
479 		 * instead.
480 		 */
481 		VERIFY3U(req_id, <=, txq->et_sq_num_descs);
482 		tcb = &txq->et_tcbs[req_id];
483 		DTRACE_PROBE2(tx__complete, uint16_t, req_id,
484 		    ena_tx_control_block_t *, tcb);
485 
486 		/* Free the associated mblk. */
487 		tcb->etcb_dma.edb_used_len = 0;
488 		mp = tcb->etcb_mp;
489 		/* Make this a device reset instead. */
490 		VERIFY3P(mp, !=, NULL);
491 		freemsg(mp);
492 		tcb->etcb_mp = NULL;
493 
494 		/* Add this descriptor back to the free list. */
495 		txq->et_sq_avail_descs++;
496 		txq->et_cq_head_idx++;
497 
498 		/* Check for phase rollover. */
499 		head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1);
500 
501 		if (head_mod == 0) {
502 			txq->et_cq_phase = !txq->et_cq_phase;
503 		}
504 
505 		if (txq->et_blocked) {
506 			txq->et_blocked = B_FALSE;
507 			unblocked = B_TRUE;
508 			mac_tx_ring_update(txq->et_ena->ena_mh, txq->et_mrh);
509 		}
510 
511 		recycled++;
512 		cdesc = &txq->et_cq_descs[head_mod];
513 	}
514 
515 	/*
516 	 * If the device provided a head doorbell register, then we
517 	 * need to update it to let the device know we are done
518 	 * reading these CQ entries.
519 	 */
520 	if (txq->et_cq_head_db_addr != NULL) {
521 		ena_hw_abs_write32(txq->et_ena, txq->et_cq_head_db_addr,
522 		    head_mod);
523 	}
524 
525 	mutex_exit(&txq->et_lock);
526 
527 	/* Update stats. */
528 	mutex_enter(&txq->et_stat_lock);
529 	txq->et_stat.ets_recycled.value.ui64 += recycled;
530 	if (unblocked) {
531 		txq->et_stat.ets_unblocked.value.ui64++;
532 	}
533 	mutex_exit(&txq->et_stat_lock);
534 }
535