xref: /illumos-gate/usr/src/uts/common/io/ena/ena_tx.c (revision 4774dff6a15e3052e75fd8a0fdd519521be2db59)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 #include "ena.h"
16 
17 void
18 ena_free_tx_dma(ena_txq_t *txq)
19 {
20 	if (txq->et_tcbs != NULL) {
21 		for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
22 			ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
23 			ena_dma_free(&tcb->etcb_dma);
24 		}
25 
26 		kmem_free(txq->et_tcbs,
27 		    sizeof (*txq->et_tcbs) * txq->et_sq_num_descs);
28 
29 		txq->et_tcbs = NULL;
30 
31 	}
32 
33 	ena_dma_free(&txq->et_cq_dma);
34 	txq->et_cq_descs = NULL;
35 
36 	ena_dma_free(&txq->et_sq_dma);
37 	txq->et_sq_descs = NULL;
38 
39 	txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC;
40 }
41 
42 static int
43 ena_alloc_tx_dma(ena_txq_t *txq)
44 {
45 	ena_t *ena = txq->et_ena;
46 	size_t cq_descs_sz;
47 	size_t sq_descs_sz;
48 	int err = 0;
49 	ena_dma_conf_t conf;
50 
51 	ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
52 	ASSERT3P(ena, !=, NULL);
53 
54 	cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs);
55 	sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs);
56 
57 	/* BEGIN CSTYLED */
58 	conf = (ena_dma_conf_t) {
59 		.edc_size = sq_descs_sz,
60 		.edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT,
61 		.edc_sgl = 1,
62 		.edc_endian = DDI_NEVERSWAP_ACC,
63 		.edc_stream = B_FALSE,
64 	};
65 	/* END CSTYLED */
66 
67 	if (!ena_dma_alloc(ena, &txq->et_sq_dma, &conf, sq_descs_sz)) {
68 		return (ENOMEM);
69 	}
70 
71 	bzero(txq->et_sq_dma.edb_va, sq_descs_sz);
72 	txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va;
73 	txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) *
74 	    txq->et_sq_num_descs, KM_SLEEP);
75 
76 	for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
77 		ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
78 		ena_dma_conf_t buf_conf = {
79 			.edc_size = ena->ena_tx_buf_sz,
80 			.edc_align = 1,
81 			.edc_sgl = ena->ena_tx_sgl_max_sz,
82 			.edc_endian = DDI_NEVERSWAP_ACC,
83 			.edc_stream = B_TRUE,
84 		};
85 
86 		if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf,
87 		    ena->ena_tx_buf_sz)) {
88 			err = ENOMEM;
89 			goto error;
90 		}
91 	}
92 
93 	/* BEGIN CSTYLED */
94 	conf = (ena_dma_conf_t) {
95 		.edc_size = cq_descs_sz,
96 		.edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT,
97 		.edc_sgl = 1,
98 		.edc_endian = DDI_NEVERSWAP_ACC,
99 		.edc_stream = B_FALSE,
100 	};
101 	/* END CSTYLED */
102 
103 	if (!ena_dma_alloc(ena, &txq->et_cq_dma, &conf, cq_descs_sz)) {
104 		err = ENOMEM;
105 		goto error;
106 	}
107 
108 	bzero(txq->et_cq_dma.edb_va, cq_descs_sz);
109 	txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va;
110 	txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC;
111 	return (0);
112 
113 error:
114 	ena_free_tx_dma(txq);
115 	return (err);
116 }
117 
118 boolean_t
119 ena_alloc_txq(ena_txq_t *txq)
120 {
121 	int ret = 0;
122 	ena_t *ena = txq->et_ena;
123 	uint16_t cq_hw_idx, sq_hw_idx;
124 	uint32_t *cq_unmask_addr, *cq_numanode;
125 	uint32_t *sq_db_addr;
126 
127 	ASSERT3U(txq->et_cq_num_descs, >, 0);
128 
129 	/*
130 	 * First, allocate the Tx data buffers.
131 	 */
132 	if ((ret = ena_alloc_tx_dma(txq)) != 0) {
133 		ena_err(ena, "failed to allocate Tx queue %u data buffers: %d",
134 		    txq->et_txqs_idx, ret);
135 		return (B_FALSE);
136 	}
137 
138 	ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
139 
140 	/*
141 	 * Second, create the Completion Queue.
142 	 */
143 	ret = ena_create_cq(ena, txq->et_cq_num_descs,
144 	    txq->et_cq_dma.edb_cookie->dmac_laddress, B_TRUE,
145 	    txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_numanode);
146 
147 	if (ret != 0) {
148 		ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx,
149 		    ret);
150 		return (B_FALSE);
151 	}
152 
153 	txq->et_cq_hw_idx = cq_hw_idx;
154 	txq->et_cq_phase = 1;
155 	txq->et_cq_unmask_addr = cq_unmask_addr;
156 	txq->et_cq_numa_addr = cq_numanode;
157 	txq->et_state |= ENA_TXQ_STATE_CQ_CREATED;
158 
159 	/*
160 	 * Third, create the Submission Queue to match with the above
161 	 * CQ. At this time we force the SQ and CQ to have the same
162 	 * number of descriptors as we only use a 1:1 completion
163 	 * policy. However, in the future, we could loosen this and
164 	 * use an on-demand completion policy and the two could have a
165 	 * different number of descriptors.
166 	 */
167 	ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs);
168 
169 	ret = ena_create_sq(ena, txq->et_sq_num_descs,
170 	    txq->et_sq_dma.edb_cookie->dmac_laddress, B_TRUE, cq_hw_idx,
171 	    &sq_hw_idx, &sq_db_addr);
172 
173 	if (ret != 0) {
174 		ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx,
175 		    ret);
176 		return (B_FALSE);
177 	}
178 
179 	txq->et_sq_hw_idx = sq_hw_idx;
180 	txq->et_sq_db_addr = sq_db_addr;
181 	/* The phase must always start on 1. */
182 	txq->et_sq_phase = 1;
183 	txq->et_sq_avail_descs = txq->et_sq_num_descs;
184 	txq->et_blocked = B_FALSE;
185 	txq->et_state |= ENA_TXQ_STATE_SQ_CREATED;
186 
187 	return (B_TRUE);
188 }
189 
190 void
191 ena_cleanup_txq(ena_txq_t *txq)
192 {
193 	int ret = 0;
194 	ena_t *ena = txq->et_ena;
195 
196 	if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) {
197 		ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, B_TRUE);
198 
199 		if (ret != 0) {
200 			ena_err(ena, "failed to destroy Tx SQ %u: %d",
201 			    txq->et_txqs_idx, ret);
202 		}
203 
204 		txq->et_sq_hw_idx = 0;
205 		txq->et_sq_db_addr = NULL;
206 		txq->et_sq_tail_idx = 0;
207 		txq->et_sq_phase = 0;
208 		txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED;
209 	}
210 
211 	if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) {
212 		ret = ena_destroy_cq(ena, txq->et_cq_hw_idx);
213 
214 		if (ret != 0) {
215 			ena_err(ena, "failed to destroy Tx CQ %u: %d",
216 			    txq->et_txqs_idx, ret);
217 		}
218 
219 		txq->et_cq_hw_idx = 0;
220 		txq->et_cq_head_idx = 0;
221 		txq->et_cq_phase = 0;
222 		txq->et_cq_unmask_addr = NULL;
223 		txq->et_cq_numa_addr = NULL;
224 		txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED;
225 	}
226 
227 	ena_free_tx_dma(txq);
228 	VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE);
229 }
230 
231 void
232 ena_ring_tx_stop(mac_ring_driver_t rh)
233 {
234 	ena_txq_t *txq = (ena_txq_t *)rh;
235 	uint32_t intr_ctrl;
236 
237 	intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr);
238 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
239 	ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl);
240 
241 	txq->et_state &= ~ENA_TXQ_STATE_RUNNING;
242 	txq->et_state &= ~ENA_TXQ_STATE_READY;
243 }
244 
245 int
246 ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num)
247 {
248 	ena_txq_t *txq = (ena_txq_t *)rh;
249 	ena_t *ena = txq->et_ena;
250 	uint32_t intr_ctrl;
251 
252 	mutex_enter(&txq->et_lock);
253 	txq->et_m_gen_num = gen_num;
254 	mutex_exit(&txq->et_lock);
255 
256 	txq->et_state |= ENA_TXQ_STATE_READY;
257 
258 	intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr);
259 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
260 	ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl);
261 	txq->et_state |= ENA_TXQ_STATE_RUNNING;
262 	return (0);
263 }
264 
265 static void
266 ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp,
267     const size_t off, const size_t len)
268 {
269 	const void *soff = mp->b_rptr + off;
270 	void *doff =
271 	    (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len);
272 
273 	VERIFY3U(len, >, 0);
274 	VERIFY3P(soff, >=, mp->b_rptr);
275 	VERIFY3P(soff, <=, mp->b_wptr);
276 	VERIFY3U(len, <=, MBLKL(mp));
277 	VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr);
278 	VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len);
279 
280 	bcopy(soff, doff, len);
281 	tcb->etcb_type = ENA_TCB_COPY;
282 	tcb->etcb_dma.edb_used_len += len;
283 }
284 
285 ena_tx_control_block_t *
286 ena_pull_tcb(const ena_txq_t *txq, mblk_t *mp)
287 {
288 	mblk_t *nmp = mp;
289 	ena_t *ena = txq->et_ena;
290 	ena_tx_control_block_t *tcb = NULL;
291 	const uint16_t tail_mod =
292 	    txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1);
293 
294 	ASSERT(MUTEX_HELD(&txq->et_lock));
295 	VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz);
296 
297 	while (nmp != NULL) {
298 		const size_t nmp_len = MBLKL(nmp);
299 
300 		if (nmp_len == 0) {
301 			nmp = nmp->b_cont;
302 			continue;
303 		}
304 
305 		/* For now TCB is bound to SQ desc. */
306 		if (tcb == NULL) {
307 			tcb = &txq->et_tcbs[tail_mod];
308 		}
309 
310 		ena_tx_copy_fragment(tcb, nmp, 0, nmp_len);
311 		nmp = nmp->b_cont;
312 	}
313 
314 	ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV);
315 	VERIFY3P(nmp, ==, NULL);
316 	VERIFY3P(tcb, !=, NULL);
317 	return (tcb);
318 }
319 
320 static void
321 ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb,
322     uint16_t tail, uint8_t phase, enahw_tx_data_desc_t *desc,
323     mac_ether_offload_info_t *meo, size_t mlen)
324 {
325 	VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK);
326 
327 #ifdef DEBUG
328 	/*
329 	 * If there is no header for the specific layer it will be set
330 	 * to zero, thus we elide the meoi_flags check here.
331 	 */
332 	size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen;
333 	ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len);
334 #endif
335 
336 	bzero(desc, sizeof (*desc));
337 	ENAHW_TX_DESC_FIRST_ON(desc);
338 	ENAHW_TX_DESC_LENGTH(desc, mlen);
339 	ENAHW_TX_DESC_REQID_HI(desc, tail);
340 	ENAHW_TX_DESC_REQID_LO(desc, tail);
341 	ENAHW_TX_DESC_PHASE(desc, phase);
342 	ENAHW_TX_DESC_DF_ON(desc);
343 	ENAHW_TX_DESC_LAST_ON(desc);
344 	ENAHW_TX_DESC_COMP_REQ_ON(desc);
345 	ENAHW_TX_DESC_META_DESC_OFF(desc);
346 	ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
347 	ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
348 	/*
349 	 * NOTE: Please see the block comment above
350 	 * etd_buff_addr_hi_hdr_sz to see why this is set to 0.
351 	 */
352 	ENAHW_TX_DESC_HEADER_LENGTH(desc, 0);
353 	ENAHW_TX_DESC_TSO_OFF(desc);
354 	ENAHW_TX_DESC_L3_CSUM_OFF(desc);
355 	ENAHW_TX_DESC_L4_CSUM_OFF(desc);
356 	/*
357 	 * Enabling this bit tells the device NOT to calculate the
358 	 * pseudo header checksum.
359 	 */
360 	ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc);
361 }
362 
363 static void
364 ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx)
365 {
366 	ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx);
367 }
368 
369 /*
370  * For now we do the simplest thing possible. All Tx uses bcopy to
371  * pre-allocated buffers, no checksum, no TSO, etc.
372  */
373 mblk_t *
374 ena_ring_tx(void *arg, mblk_t *mp)
375 {
376 	ena_txq_t *txq = arg;
377 	ena_t *ena = txq->et_ena;
378 	mac_ether_offload_info_t meo;
379 	enahw_tx_data_desc_t *desc;
380 	ena_tx_control_block_t *tcb;
381 	const uint16_t tail_mod =
382 	    txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1);
383 
384 	VERIFY3P(mp->b_next, ==, NULL);
385 
386 	/*
387 	 * The ena_state value is written by atomic operations. The
388 	 * et_state value is currently Write Once, but if that changes
389 	 * it should also be written with atomics.
390 	 */
391 	if (!(ena->ena_state & ENA_STATE_RUNNING) ||
392 	    !(txq->et_state & ENA_TXQ_STATE_RUNNING)) {
393 		freemsg(mp);
394 		return (NULL);
395 	}
396 
397 	if (mac_ether_offload_info(mp, &meo) != 0) {
398 		freemsg(mp);
399 		mutex_enter(&txq->et_stat_lock);
400 		txq->et_stat.ets_hck_meoifail.value.ui64++;
401 		mutex_exit(&txq->et_stat_lock);
402 		return (NULL);
403 	}
404 
405 	mutex_enter(&txq->et_lock);
406 
407 	/*
408 	 * For the moment there is a 1:1 mapping between Tx descs and
409 	 * Tx contexts. Currently Tx is copy only, and each context
410 	 * buffer is guaranteed to be as large as MTU + frame header,
411 	 * see ena_update_buf_sizes().
412 	 */
413 	if (txq->et_blocked || txq->et_sq_avail_descs == 0) {
414 		txq->et_blocked = B_TRUE;
415 		mutex_enter(&txq->et_stat_lock);
416 		txq->et_stat.ets_blocked.value.ui64++;
417 		mutex_exit(&txq->et_stat_lock);
418 		mutex_exit(&txq->et_lock);
419 		return (mp);
420 	}
421 
422 	ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total);
423 	tcb = ena_pull_tcb(txq, mp);
424 	ASSERT3P(tcb, !=, NULL);
425 	tcb->etcb_mp = mp;
426 	txq->et_sq_avail_descs--;
427 
428 	/* Fill in the Tx descriptor. */
429 	desc = &(txq->et_sq_descs[tail_mod].etd_data);
430 	ena_fill_tx_data_desc(txq, tcb, tail_mod, txq->et_sq_phase, desc, &meo,
431 	    meo.meoi_len);
432 	DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t,
433 	    tail_mod, enahw_tx_data_desc_t *, desc);
434 
435 	/*
436 	 * Remember, we submit the raw tail value to the device, the
437 	 * hardware performs its own modulo (like we did to get
438 	 * tail_mod).
439 	 */
440 	txq->et_sq_tail_idx++;
441 	ena_submit_tx(txq, txq->et_sq_tail_idx);
442 
443 	mutex_enter(&txq->et_stat_lock);
444 	txq->et_stat.ets_packets.value.ui64++;
445 	txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len;
446 	mutex_exit(&txq->et_stat_lock);
447 
448 	if ((txq->et_sq_tail_idx & (txq->et_sq_num_descs - 1)) == 0) {
449 		txq->et_sq_phase ^= 1;
450 	}
451 
452 	mutex_exit(&txq->et_lock);
453 	return (NULL);
454 }
455 
456 void
457 ena_tx_intr_work(ena_txq_t *txq)
458 {
459 	uint16_t head_mod;
460 	enahw_tx_cdesc_t *cdesc;
461 	ena_tx_control_block_t *tcb;
462 	uint16_t req_id;
463 	uint64_t recycled = 0;
464 	boolean_t unblocked = B_FALSE;
465 
466 	mutex_enter(&txq->et_lock);
467 	head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1);
468 	ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL);
469 	cdesc = &txq->et_cq_descs[head_mod];
470 
471 	/* Recycle any completed descriptors. */
472 	while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) {
473 		mblk_t *mp;
474 
475 		/* Get the corresponding TCB. */
476 		req_id = cdesc->etc_req_id;
477 		/*
478 		 * It would be nice to make this a device reset
479 		 * instead.
480 		 */
481 		VERIFY3U(req_id, <=, txq->et_sq_num_descs);
482 		tcb = &txq->et_tcbs[req_id];
483 		DTRACE_PROBE2(tx__complete, uint16_t, req_id,
484 		    ena_tx_control_block_t *, tcb);
485 
486 		/* Free the associated mblk. */
487 		tcb->etcb_dma.edb_used_len = 0;
488 		mp = tcb->etcb_mp;
489 		/* Make this a device reset instead. */
490 		VERIFY3P(mp, !=, NULL);
491 		freemsg(mp);
492 		tcb->etcb_mp = NULL;
493 
494 		/* Add this descriptor back to the free list. */
495 		txq->et_sq_avail_descs++;
496 		txq->et_cq_head_idx++;
497 
498 		/* Check for phase rollover. */
499 		head_mod = txq->et_cq_head_idx & (txq->et_cq_num_descs - 1);
500 
501 		if (head_mod == 0) {
502 			txq->et_cq_phase ^= 1;
503 		}
504 
505 		if (txq->et_blocked) {
506 			txq->et_blocked = B_FALSE;
507 			unblocked = B_TRUE;
508 			mac_tx_ring_update(txq->et_ena->ena_mh, txq->et_mrh);
509 		}
510 
511 		recycled++;
512 		cdesc = &txq->et_cq_descs[head_mod];
513 	}
514 
515 	if (recycled == 0) {
516 		mutex_exit(&txq->et_lock);
517 		return;
518 	}
519 
520 	mutex_exit(&txq->et_lock);
521 
522 	/* Update stats. */
523 	mutex_enter(&txq->et_stat_lock);
524 	txq->et_stat.ets_recycled.value.ui64 += recycled;
525 	if (unblocked) {
526 		txq->et_stat.ets_unblocked.value.ui64++;
527 	}
528 	mutex_exit(&txq->et_stat_lock);
529 }
530