xref: /illumos-gate/usr/src/uts/common/io/ena/ena_tx.c (revision 02ac56e010f18fc0c5aafe47377586d8ba8c897c)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 #include "ena.h"
17 
18 void
19 ena_free_tx_dma(ena_txq_t *txq)
20 {
21 	if (txq->et_tcbs != NULL) {
22 		for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
23 			ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
24 			ena_dma_free(&tcb->etcb_dma);
25 		}
26 
27 		kmem_free(txq->et_tcbs,
28 		    sizeof (*txq->et_tcbs) * txq->et_sq_num_descs);
29 
30 		txq->et_tcbs = NULL;
31 	}
32 
33 	ena_dma_free(&txq->et_cq_dma);
34 	txq->et_cq_descs = NULL;
35 
36 	ena_dma_free(&txq->et_sq_dma);
37 	txq->et_sq_descs = NULL;
38 
39 	txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC;
40 }
41 
42 static int
43 ena_alloc_tx_dma(ena_txq_t *txq)
44 {
45 	ena_t *ena = txq->et_ena;
46 	size_t cq_descs_sz;
47 	size_t sq_descs_sz;
48 	int err = 0;
49 
50 	ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
51 	ASSERT3P(ena, !=, NULL);
52 
53 	cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs);
54 	sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs);
55 
56 	ena_dma_conf_t sq_conf = {
57 		.edc_size = sq_descs_sz,
58 		.edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT,
59 		.edc_sgl = 1,
60 		.edc_endian = DDI_NEVERSWAP_ACC,
61 		.edc_stream = false,
62 	};
63 
64 	if (!ena_dma_alloc(ena, &txq->et_sq_dma, &sq_conf, sq_descs_sz)) {
65 		return (ENOMEM);
66 	}
67 
68 	txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va;
69 	txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) *
70 	    txq->et_sq_num_descs, KM_SLEEP);
71 
72 	for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
73 		ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
74 		ena_dma_conf_t buf_conf = {
75 			.edc_size = ena->ena_tx_buf_sz,
76 			.edc_align = 1,
77 			.edc_sgl = ena->ena_tx_sgl_max_sz,
78 			.edc_endian = DDI_NEVERSWAP_ACC,
79 			.edc_stream = true,
80 		};
81 
82 		if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf,
83 		    ena->ena_tx_buf_sz)) {
84 			err = ENOMEM;
85 			goto error;
86 		}
87 	}
88 
89 	ena_dma_conf_t cq_conf = {
90 		.edc_size = cq_descs_sz,
91 		.edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT,
92 		.edc_sgl = 1,
93 		.edc_endian = DDI_NEVERSWAP_ACC,
94 		.edc_stream = false,
95 	};
96 
97 	if (!ena_dma_alloc(ena, &txq->et_cq_dma, &cq_conf, cq_descs_sz)) {
98 		err = ENOMEM;
99 		goto error;
100 	}
101 
102 	txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va;
103 	txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC;
104 	return (0);
105 
106 error:
107 	ena_free_tx_dma(txq);
108 	return (err);
109 }
110 
111 bool
112 ena_alloc_txq(ena_txq_t *txq)
113 {
114 	int ret = 0;
115 	ena_t *ena = txq->et_ena;
116 	uint16_t cq_hw_idx, sq_hw_idx;
117 	uint32_t *cq_unmask_addr, *cq_numanode;
118 	uint32_t *sq_db_addr;
119 
120 	ASSERT3U(txq->et_cq_num_descs, >, 0);
121 
122 	/*
123 	 * First, allocate the Tx data buffers.
124 	 */
125 	if ((ret = ena_alloc_tx_dma(txq)) != 0) {
126 		ena_err(ena, "failed to allocate Tx queue %u data buffers: %d",
127 		    txq->et_txqs_idx, ret);
128 		return (false);
129 	}
130 
131 	ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
132 
133 	/*
134 	 * Second, create the Completion Queue.
135 	 */
136 	ret = ena_create_cq(ena, txq->et_cq_num_descs,
137 	    txq->et_cq_dma.edb_cookie->dmac_laddress, true,
138 	    txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_numanode);
139 
140 	if (ret != 0) {
141 		ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx,
142 		    ret);
143 		return (false);
144 	}
145 
146 	txq->et_cq_hw_idx = cq_hw_idx;
147 	txq->et_cq_phase = 1;
148 	txq->et_cq_unmask_addr = cq_unmask_addr;
149 	txq->et_cq_numa_addr = cq_numanode;
150 	txq->et_state |= ENA_TXQ_STATE_CQ_CREATED;
151 
152 	/*
153 	 * Third, create the Submission Queue to match with the above
154 	 * CQ. At this time we force the SQ and CQ to have the same
155 	 * number of descriptors as we only use a 1:1 completion
156 	 * policy. However, in the future, we could loosen this and
157 	 * use an on-demand completion policy and the two could have a
158 	 * different number of descriptors.
159 	 */
160 	ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs);
161 
162 	ret = ena_create_sq(ena, txq->et_sq_num_descs,
163 	    txq->et_sq_dma.edb_cookie->dmac_laddress, true, cq_hw_idx,
164 	    &sq_hw_idx, &sq_db_addr);
165 
166 	if (ret != 0) {
167 		ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx,
168 		    ret);
169 		return (false);
170 	}
171 
172 	txq->et_sq_hw_idx = sq_hw_idx;
173 	txq->et_sq_db_addr = sq_db_addr;
174 	/* The phase must always start on 1. */
175 	txq->et_sq_phase = 1;
176 	txq->et_sq_avail_descs = txq->et_sq_num_descs;
177 	txq->et_blocked = false;
178 	txq->et_stall_watchdog = 0;
179 	txq->et_state |= ENA_TXQ_STATE_SQ_CREATED;
180 
181 	return (true);
182 }
183 
184 void
185 ena_cleanup_txq(ena_txq_t *txq, bool resetting)
186 {
187 	int ret = 0;
188 	ena_t *ena = txq->et_ena;
189 
190 	if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) {
191 		if (!resetting) {
192 			ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, true);
193 
194 			if (ret != 0) {
195 				ena_err(ena, "failed to destroy Tx SQ %u: %d",
196 				    txq->et_txqs_idx, ret);
197 			}
198 		}
199 
200 		txq->et_sq_hw_idx = 0;
201 		txq->et_sq_db_addr = NULL;
202 		txq->et_sq_tail_idx = 0;
203 		txq->et_sq_phase = 0;
204 		txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED;
205 	}
206 
207 	if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) {
208 		if (!resetting) {
209 			ret = ena_destroy_cq(ena, txq->et_cq_hw_idx);
210 
211 			if (ret != 0) {
212 				ena_err(ena, "failed to destroy Tx CQ %u: %d",
213 				    txq->et_txqs_idx, ret);
214 			}
215 		}
216 
217 		txq->et_cq_hw_idx = 0;
218 		txq->et_cq_head_idx = 0;
219 		txq->et_cq_phase = 0;
220 		txq->et_cq_unmask_addr = NULL;
221 		txq->et_cq_numa_addr = NULL;
222 		txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED;
223 	}
224 
225 	ena_free_tx_dma(txq);
226 	VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE);
227 }
228 
229 void
230 ena_ring_tx_stop(mac_ring_driver_t rh)
231 {
232 	ena_txq_t *txq = (ena_txq_t *)rh;
233 	uint32_t intr_ctrl;
234 
235 	intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr);
236 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
237 	ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl);
238 
239 	txq->et_state &= ~ENA_TXQ_STATE_RUNNING;
240 	txq->et_state &= ~ENA_TXQ_STATE_READY;
241 }
242 
243 int
244 ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num)
245 {
246 	ena_txq_t *txq = (ena_txq_t *)rh;
247 	ena_t *ena = txq->et_ena;
248 	uint32_t intr_ctrl;
249 
250 	ena_dbg(ena, "ring_tx_start %p: state 0x%x", txq, txq->et_state);
251 
252 	mutex_enter(&txq->et_lock);
253 	txq->et_m_gen_num = gen_num;
254 	mutex_exit(&txq->et_lock);
255 
256 	txq->et_state |= ENA_TXQ_STATE_READY;
257 
258 	intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr);
259 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
260 	ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl);
261 	txq->et_state |= ENA_TXQ_STATE_RUNNING;
262 
263 	return (0);
264 }
265 
266 static void
267 ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp,
268     const size_t off, const size_t len)
269 {
270 	const void *soff = mp->b_rptr + off;
271 	void *doff =
272 	    (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len);
273 
274 	VERIFY3U(len, >, 0);
275 	VERIFY3P(soff, >=, mp->b_rptr);
276 	VERIFY3P(soff, <=, mp->b_wptr);
277 	VERIFY3U(len, <=, MBLKL(mp));
278 	VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr);
279 	VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len);
280 
281 	bcopy(soff, doff, len);
282 	tcb->etcb_type = ENA_TCB_COPY;
283 	tcb->etcb_dma.edb_used_len += len;
284 }
285 
286 ena_tx_control_block_t *
287 ena_pull_tcb(const ena_txq_t *txq, mblk_t *mp)
288 {
289 	mblk_t *nmp = mp;
290 	ena_t *ena = txq->et_ena;
291 	ena_tx_control_block_t *tcb = NULL;
292 	const uint16_t tail_mod =
293 	    txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1);
294 
295 	ASSERT(MUTEX_HELD(&txq->et_lock));
296 	VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz);
297 
298 	while (nmp != NULL) {
299 		const size_t nmp_len = MBLKL(nmp);
300 
301 		if (nmp_len == 0) {
302 			nmp = nmp->b_cont;
303 			continue;
304 		}
305 
306 		/* For now TCB is bound to SQ desc. */
307 		if (tcb == NULL) {
308 			tcb = &txq->et_tcbs[tail_mod];
309 		}
310 
311 		ena_tx_copy_fragment(tcb, nmp, 0, nmp_len);
312 		nmp = nmp->b_cont;
313 	}
314 
315 	ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV);
316 	VERIFY3P(nmp, ==, NULL);
317 	VERIFY3P(tcb, !=, NULL);
318 	return (tcb);
319 }
320 
321 static void
322 ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb,
323     uint16_t tail, uint8_t phase, enahw_tx_data_desc_t *desc,
324     mac_ether_offload_info_t *meo, size_t mlen)
325 {
326 	VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK);
327 
328 #ifdef DEBUG
329 	/*
330 	 * If there is no header for the specific layer it will be set
331 	 * to zero, thus we elide the meoi_flags check here.
332 	 */
333 	size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen;
334 	ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len);
335 #endif
336 
337 	bzero(desc, sizeof (*desc));
338 	ENAHW_TX_DESC_FIRST_ON(desc);
339 	ENAHW_TX_DESC_LENGTH(desc, mlen);
340 	ENAHW_TX_DESC_REQID_HI(desc, tail);
341 	ENAHW_TX_DESC_REQID_LO(desc, tail);
342 	ENAHW_TX_DESC_PHASE(desc, phase);
343 	ENAHW_TX_DESC_DF_ON(desc);
344 	ENAHW_TX_DESC_LAST_ON(desc);
345 	ENAHW_TX_DESC_COMP_REQ_ON(desc);
346 	ENAHW_TX_DESC_META_DESC_OFF(desc);
347 	ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
348 	ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
349 	/*
350 	 * NOTE: Please see the block comment above
351 	 * etd_buff_addr_hi_hdr_sz to see why this is set to 0.
352 	 */
353 	ENAHW_TX_DESC_HEADER_LENGTH(desc, 0);
354 	ENAHW_TX_DESC_TSO_OFF(desc);
355 	ENAHW_TX_DESC_L3_CSUM_OFF(desc);
356 	ENAHW_TX_DESC_L4_CSUM_OFF(desc);
357 	/*
358 	 * Enabling this bit tells the device NOT to calculate the
359 	 * pseudo header checksum.
360 	 */
361 	ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc);
362 }
363 
364 static void
365 ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx)
366 {
367 	ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx);
368 }
369 
370 /*
371  * For now we do the simplest thing possible. All Tx uses bcopy to
372  * pre-allocated buffers, no checksum, no TSO, etc.
373  */
374 mblk_t *
375 ena_ring_tx(void *arg, mblk_t *mp)
376 {
377 	ena_txq_t *txq = arg;
378 	ena_t *ena = txq->et_ena;
379 	mac_ether_offload_info_t meo;
380 	enahw_tx_data_desc_t *desc;
381 	ena_tx_control_block_t *tcb;
382 	const uint16_t modulo_mask = txq->et_sq_num_descs - 1;
383 	uint16_t tail_mod;
384 
385 	VERIFY3P(mp->b_next, ==, NULL);
386 
387 	/*
388 	 * The ena_state value is written by atomic operations. The
389 	 * et_state value is currently Write Once, but if that changes
390 	 * it should also be written with atomics.
391 	 */
392 	if (!(ena->ena_state & ENA_STATE_STARTED) ||
393 	    !(txq->et_state & ENA_TXQ_STATE_RUNNING)) {
394 		freemsg(mp);
395 		return (NULL);
396 	}
397 
398 	if (mac_ether_offload_info(mp, &meo) != 0) {
399 		freemsg(mp);
400 		mutex_enter(&txq->et_stat_lock);
401 		txq->et_stat.ets_hck_meoifail.value.ui64++;
402 		mutex_exit(&txq->et_stat_lock);
403 		return (NULL);
404 	}
405 
406 	mutex_enter(&txq->et_lock);
407 
408 	/*
409 	 * For the moment there is a 1:1 mapping between Tx descs and
410 	 * Tx contexts. Currently Tx is copy only, and each context
411 	 * buffer is guaranteed to be as large as MTU + frame header,
412 	 * see ena_update_buf_sizes().
413 	 */
414 	if (txq->et_blocked || txq->et_sq_avail_descs == 0) {
415 		txq->et_blocked = true;
416 		mutex_enter(&txq->et_stat_lock);
417 		txq->et_stat.ets_blocked.value.ui64++;
418 		mutex_exit(&txq->et_stat_lock);
419 		mutex_exit(&txq->et_lock);
420 		return (mp);
421 	}
422 
423 	ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total);
424 	tcb = ena_pull_tcb(txq, mp);
425 	ASSERT3P(tcb, !=, NULL);
426 	tcb->etcb_mp = mp;
427 	txq->et_sq_avail_descs--;
428 
429 	/* Fill in the Tx descriptor. */
430 	tail_mod = txq->et_sq_tail_idx & modulo_mask;
431 	desc = &txq->et_sq_descs[tail_mod].etd_data;
432 	ena_fill_tx_data_desc(txq, tcb, tail_mod, txq->et_sq_phase, desc, &meo,
433 	    meo.meoi_len);
434 	DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t,
435 	    tail_mod, enahw_tx_data_desc_t *, desc);
436 
437 	/*
438 	 * Remember, we submit the raw tail value to the device, the
439 	 * hardware performs its own modulo (like we did to get
440 	 * tail_mod).
441 	 */
442 	txq->et_sq_tail_idx++;
443 	ena_submit_tx(txq, txq->et_sq_tail_idx);
444 
445 	mutex_enter(&txq->et_stat_lock);
446 	txq->et_stat.ets_packets.value.ui64++;
447 	txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len;
448 	mutex_exit(&txq->et_stat_lock);
449 
450 	if ((txq->et_sq_tail_idx & modulo_mask) == 0)
451 		txq->et_sq_phase ^= 1;
452 
453 	mutex_exit(&txq->et_lock);
454 
455 	return (NULL);
456 }
457 
458 void
459 ena_tx_intr_work(ena_txq_t *txq)
460 {
461 	uint16_t head_mod;
462 	enahw_tx_cdesc_t *cdesc;
463 	ena_tx_control_block_t *tcb;
464 	uint16_t req_id;
465 	uint64_t recycled = 0;
466 	bool unblocked = false;
467 	const uint16_t modulo_mask = txq->et_cq_num_descs - 1;
468 	ena_t *ena = txq->et_ena;
469 
470 	mutex_enter(&txq->et_lock);
471 	head_mod = txq->et_cq_head_idx & modulo_mask;
472 	ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL);
473 	cdesc = &txq->et_cq_descs[head_mod];
474 
475 	/* Recycle any completed descriptors. */
476 	while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) {
477 		mblk_t *mp;
478 
479 		/* Get the corresponding TCB. */
480 		req_id = cdesc->etc_req_id;
481 		if (req_id > txq->et_sq_num_descs) {
482 			ena_err(ena, "invalid Tx request ID: 0x%x", req_id);
483 			ena_trigger_reset(ena, ENAHW_RESET_INV_TX_REQ_ID);
484 			break;
485 		}
486 		tcb = &txq->et_tcbs[req_id];
487 		DTRACE_PROBE2(tx__complete, uint16_t, req_id,
488 		    ena_tx_control_block_t *, tcb);
489 
490 		/* Free the associated mblk. */
491 		tcb->etcb_dma.edb_used_len = 0;
492 		mp = tcb->etcb_mp;
493 		VERIFY3P(mp, !=, NULL);
494 		freemsg(mp);
495 		tcb->etcb_mp = NULL;
496 
497 		/* Add this descriptor back to the free list. */
498 		txq->et_sq_avail_descs++;
499 		txq->et_cq_head_idx++;
500 
501 		/* Check for phase rollover. */
502 		head_mod = txq->et_cq_head_idx & modulo_mask;
503 		if (head_mod == 0)
504 			txq->et_cq_phase ^= 1;
505 
506 		if (txq->et_blocked) {
507 			txq->et_blocked = false;
508 			txq->et_stall_watchdog = 0;
509 			unblocked = true;
510 			mac_tx_ring_update(ena->ena_mh, txq->et_mrh);
511 		}
512 
513 		recycled++;
514 		cdesc = &txq->et_cq_descs[head_mod];
515 	}
516 
517 	mutex_exit(&txq->et_lock);
518 
519 	if (recycled == 0)
520 		return;
521 
522 	/* Update stats. */
523 	mutex_enter(&txq->et_stat_lock);
524 	txq->et_stat.ets_recycled.value.ui64 += recycled;
525 	if (unblocked) {
526 		txq->et_stat.ets_unblocked.value.ui64++;
527 	}
528 	mutex_exit(&txq->et_stat_lock);
529 }
530