xref: /illumos-gate/usr/src/uts/common/io/ena/ena_tx.c (revision 8119dad84d6416f13557b0ba8e2aaf9064cbcfd3)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 #include "ena.h"
17 
18 void
19 ena_free_tx_dma(ena_txq_t *txq)
20 {
21 	if (txq->et_tcbs != NULL) {
22 		for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
23 			ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
24 			ena_dma_free(&tcb->etcb_dma);
25 			if (tcb->etcb_mp != NULL)
26 				freemsg(tcb->etcb_mp);
27 		}
28 
29 		kmem_free(txq->et_tcbs,
30 		    sizeof (*txq->et_tcbs) * txq->et_sq_num_descs);
31 		kmem_free(txq->et_tcbs_freelist,
32 		    sizeof (ena_tx_control_block_t *) * txq->et_sq_num_descs);
33 
34 		txq->et_tcbs = NULL;
35 		txq->et_tcbs_freelist = NULL;
36 		txq->et_tcbs_freelist_size = 0;
37 	}
38 
39 	ena_dma_free(&txq->et_cq_dma);
40 	txq->et_cq_descs = NULL;
41 
42 	ena_dma_free(&txq->et_sq_dma);
43 	txq->et_sq_descs = NULL;
44 
45 	txq->et_state &= ~ENA_TXQ_STATE_HOST_ALLOC;
46 }
47 
48 static int
49 ena_alloc_tx_dma(ena_txq_t *txq)
50 {
51 	ena_t *ena = txq->et_ena;
52 	size_t cq_descs_sz;
53 	size_t sq_descs_sz;
54 	int err = 0;
55 
56 	ASSERT0(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
57 	ASSERT3P(ena, !=, NULL);
58 
59 	cq_descs_sz = txq->et_cq_num_descs * sizeof (*txq->et_cq_descs);
60 	sq_descs_sz = txq->et_sq_num_descs * sizeof (*txq->et_sq_descs);
61 
62 	ena_dma_conf_t sq_conf = {
63 		.edc_size = sq_descs_sz,
64 		.edc_align = ENAHW_IO_SQ_DESC_BUF_ALIGNMENT,
65 		.edc_sgl = 1,
66 		.edc_endian = DDI_NEVERSWAP_ACC,
67 		.edc_stream = false,
68 	};
69 
70 	if (!ena_dma_alloc(ena, &txq->et_sq_dma, &sq_conf, sq_descs_sz)) {
71 		return (ENOMEM);
72 	}
73 
74 	txq->et_sq_descs = (void *)txq->et_sq_dma.edb_va;
75 	txq->et_tcbs = kmem_zalloc(sizeof (*txq->et_tcbs) *
76 	    txq->et_sq_num_descs, KM_SLEEP);
77 	txq->et_tcbs_freelist = kmem_zalloc(sizeof (ena_tx_control_block_t *) *
78 	    txq->et_sq_num_descs, KM_SLEEP);
79 
80 	for (uint_t i = 0; i < txq->et_sq_num_descs; i++) {
81 		ena_tx_control_block_t *tcb = &txq->et_tcbs[i];
82 		ena_dma_conf_t buf_conf = {
83 			.edc_size = ena->ena_tx_buf_sz,
84 			.edc_align = 1,
85 			.edc_sgl = ena->ena_tx_sgl_max_sz,
86 			.edc_endian = DDI_NEVERSWAP_ACC,
87 			.edc_stream = true,
88 		};
89 
90 		if (!ena_dma_alloc(ena, &tcb->etcb_dma, &buf_conf,
91 		    ena->ena_tx_buf_sz)) {
92 			err = ENOMEM;
93 			goto error;
94 		}
95 
96 		tcb->etcb_id = i;
97 		txq->et_tcbs_freelist[i] = tcb;
98 	}
99 	txq->et_tcbs_freelist_size = txq->et_sq_num_descs;
100 
101 	ena_dma_conf_t cq_conf = {
102 		.edc_size = cq_descs_sz,
103 		.edc_align = ENAHW_IO_CQ_DESC_BUF_ALIGNMENT,
104 		.edc_sgl = 1,
105 		.edc_endian = DDI_NEVERSWAP_ACC,
106 		.edc_stream = false,
107 	};
108 
109 	if (!ena_dma_alloc(ena, &txq->et_cq_dma, &cq_conf, cq_descs_sz)) {
110 		err = ENOMEM;
111 		goto error;
112 	}
113 
114 	txq->et_cq_descs = (void *)txq->et_cq_dma.edb_va;
115 	txq->et_state |= ENA_TXQ_STATE_HOST_ALLOC;
116 	return (0);
117 
118 error:
119 	ena_free_tx_dma(txq);
120 	return (err);
121 }
122 
123 bool
124 ena_alloc_txq(ena_txq_t *txq)
125 {
126 	int ret = 0;
127 	ena_t *ena = txq->et_ena;
128 	uint16_t cq_hw_idx, sq_hw_idx;
129 	uint32_t *cq_unmask_addr, *cq_numanode;
130 	uint32_t *sq_db_addr;
131 
132 	ASSERT3U(txq->et_cq_num_descs, >, 0);
133 
134 	/*
135 	 * First, allocate the Tx data buffers.
136 	 */
137 	if ((ret = ena_alloc_tx_dma(txq)) != 0) {
138 		ena_err(ena, "failed to allocate Tx queue %u data buffers: %d",
139 		    txq->et_txqs_idx, ret);
140 		return (false);
141 	}
142 
143 	ASSERT(txq->et_state & ENA_TXQ_STATE_HOST_ALLOC);
144 
145 	/*
146 	 * Second, create the Completion Queue.
147 	 */
148 	ret = ena_create_cq(ena, txq->et_cq_num_descs,
149 	    txq->et_cq_dma.edb_cookie->dmac_laddress, true,
150 	    txq->et_intr_vector, &cq_hw_idx, &cq_unmask_addr, &cq_numanode);
151 
152 	if (ret != 0) {
153 		ena_err(ena, "failed to create Tx CQ %u: %d", txq->et_txqs_idx,
154 		    ret);
155 		return (false);
156 	}
157 
158 	txq->et_cq_hw_idx = cq_hw_idx;
159 	txq->et_cq_phase = 1;
160 	txq->et_cq_unmask_addr = cq_unmask_addr;
161 	txq->et_cq_numa_addr = cq_numanode;
162 	txq->et_state |= ENA_TXQ_STATE_CQ_CREATED;
163 
164 	/*
165 	 * Third, create the Submission Queue to match with the above
166 	 * CQ. At this time we force the SQ and CQ to have the same
167 	 * number of descriptors as we only use a 1:1 completion
168 	 * policy. However, in the future, we could loosen this and
169 	 * use an on-demand completion policy and the two could have a
170 	 * different number of descriptors.
171 	 */
172 	ASSERT3U(txq->et_sq_num_descs, ==, txq->et_cq_num_descs);
173 
174 	ret = ena_create_sq(ena, txq->et_sq_num_descs,
175 	    txq->et_sq_dma.edb_cookie->dmac_laddress, true, cq_hw_idx,
176 	    &sq_hw_idx, &sq_db_addr);
177 
178 	if (ret != 0) {
179 		ena_err(ena, "failed to create Tx SQ %u: %d", txq->et_txqs_idx,
180 		    ret);
181 		return (false);
182 	}
183 
184 	txq->et_sq_hw_idx = sq_hw_idx;
185 	txq->et_sq_db_addr = sq_db_addr;
186 	/* The phase must always start on 1. */
187 	txq->et_sq_phase = 1;
188 	txq->et_sq_avail_descs = txq->et_sq_num_descs;
189 	txq->et_blocked = false;
190 	txq->et_stall_watchdog = 0;
191 	txq->et_state |= ENA_TXQ_STATE_SQ_CREATED;
192 
193 	return (true);
194 }
195 
196 void
197 ena_cleanup_txq(ena_txq_t *txq, bool resetting)
198 {
199 	int ret = 0;
200 	ena_t *ena = txq->et_ena;
201 
202 	if ((txq->et_state & ENA_TXQ_STATE_SQ_CREATED) != 0) {
203 		if (!resetting) {
204 			ret = ena_destroy_sq(ena, txq->et_sq_hw_idx, true);
205 
206 			if (ret != 0) {
207 				ena_err(ena, "failed to destroy Tx SQ %u: %d",
208 				    txq->et_txqs_idx, ret);
209 			}
210 		}
211 
212 		txq->et_sq_hw_idx = 0;
213 		txq->et_sq_db_addr = NULL;
214 		txq->et_sq_tail_idx = 0;
215 		txq->et_sq_phase = 0;
216 		txq->et_state &= ~ENA_TXQ_STATE_SQ_CREATED;
217 	}
218 
219 	if ((txq->et_state & ENA_TXQ_STATE_CQ_CREATED) != 0) {
220 		if (!resetting) {
221 			ret = ena_destroy_cq(ena, txq->et_cq_hw_idx);
222 
223 			if (ret != 0) {
224 				ena_err(ena, "failed to destroy Tx CQ %u: %d",
225 				    txq->et_txqs_idx, ret);
226 			}
227 		}
228 
229 		txq->et_cq_hw_idx = 0;
230 		txq->et_cq_head_idx = 0;
231 		txq->et_cq_phase = 0;
232 		txq->et_cq_unmask_addr = NULL;
233 		txq->et_cq_numa_addr = NULL;
234 		txq->et_state &= ~ENA_TXQ_STATE_CQ_CREATED;
235 	}
236 
237 	ena_free_tx_dma(txq);
238 	VERIFY3S(txq->et_state, ==, ENA_TXQ_STATE_NONE);
239 }
240 
241 void
242 ena_ring_tx_stop(mac_ring_driver_t rh)
243 {
244 	ena_txq_t *txq = (ena_txq_t *)rh;
245 	uint32_t intr_ctrl;
246 
247 	intr_ctrl = ena_hw_abs_read32(txq->et_ena, txq->et_cq_unmask_addr);
248 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
249 	ena_hw_abs_write32(txq->et_ena, txq->et_cq_unmask_addr, intr_ctrl);
250 
251 	txq->et_state &= ~ENA_TXQ_STATE_RUNNING;
252 	txq->et_state &= ~ENA_TXQ_STATE_READY;
253 }
254 
255 int
256 ena_ring_tx_start(mac_ring_driver_t rh, uint64_t gen_num)
257 {
258 	ena_txq_t *txq = (ena_txq_t *)rh;
259 	ena_t *ena = txq->et_ena;
260 	uint32_t intr_ctrl;
261 
262 	ena_dbg(ena, "ring_tx_start %p: state 0x%x", txq, txq->et_state);
263 
264 	mutex_enter(&txq->et_lock);
265 	txq->et_m_gen_num = gen_num;
266 	mutex_exit(&txq->et_lock);
267 
268 	txq->et_state |= ENA_TXQ_STATE_READY;
269 
270 	intr_ctrl = ena_hw_abs_read32(ena, txq->et_cq_unmask_addr);
271 	ENAHW_REG_INTR_UNMASK(intr_ctrl);
272 	ena_hw_abs_write32(ena, txq->et_cq_unmask_addr, intr_ctrl);
273 	txq->et_state |= ENA_TXQ_STATE_RUNNING;
274 
275 	return (0);
276 }
277 
278 static ena_tx_control_block_t *
279 ena_tcb_alloc(ena_txq_t *txq)
280 {
281 	ena_tx_control_block_t *tcb;
282 
283 	ASSERT(MUTEX_HELD(&txq->et_lock));
284 
285 	if (txq->et_tcbs_freelist_size == 0)
286 		return (NULL);
287 	txq->et_tcbs_freelist_size--;
288 	tcb = txq->et_tcbs_freelist[txq->et_tcbs_freelist_size];
289 	txq->et_tcbs_freelist[txq->et_tcbs_freelist_size] = NULL;
290 
291 	return (tcb);
292 }
293 
294 static void
295 ena_tcb_free(ena_txq_t *txq, ena_tx_control_block_t *tcb)
296 {
297 	ASSERT3P(tcb, !=, NULL);
298 	ASSERT(MUTEX_HELD(&txq->et_lock));
299 	ASSERT3U(txq->et_tcbs_freelist_size, <, txq->et_sq_num_descs);
300 	txq->et_tcbs_freelist[txq->et_tcbs_freelist_size++] = tcb;
301 }
302 
303 
304 static void
305 ena_tx_copy_fragment(ena_tx_control_block_t *tcb, const mblk_t *mp,
306     const size_t off, const size_t len)
307 {
308 	const void *soff = mp->b_rptr + off;
309 	void *doff =
310 	    (void *)(tcb->etcb_dma.edb_va + tcb->etcb_dma.edb_used_len);
311 
312 	VERIFY3U(len, >, 0);
313 	VERIFY3P(soff, >=, mp->b_rptr);
314 	VERIFY3P(soff, <=, mp->b_wptr);
315 	VERIFY3U(len, <=, MBLKL(mp));
316 	VERIFY3U((uintptr_t)soff + len, <=, (uintptr_t)mp->b_wptr);
317 	VERIFY3U(tcb->etcb_dma.edb_used_len + len, <, tcb->etcb_dma.edb_len);
318 
319 	bcopy(soff, doff, len);
320 	tcb->etcb_type = ENA_TCB_COPY;
321 	tcb->etcb_dma.edb_used_len += len;
322 }
323 
324 static void
325 ena_tcb_pull(const ena_txq_t *txq, ena_tx_control_block_t *tcb, mblk_t *mp)
326 {
327 	mblk_t *nmp = mp;
328 	ena_t *ena = txq->et_ena;
329 
330 	ASSERT(MUTEX_HELD(&txq->et_lock));
331 	VERIFY3U(msgsize(mp), <, ena->ena_tx_buf_sz);
332 	ASSERT3P(tcb, !=, NULL);
333 	VERIFY0(tcb->etcb_dma.edb_used_len);
334 
335 	while (nmp != NULL) {
336 		const size_t nmp_len = MBLKL(nmp);
337 
338 		if (nmp_len == 0) {
339 			nmp = nmp->b_cont;
340 			continue;
341 		}
342 
343 		ena_tx_copy_fragment(tcb, nmp, 0, nmp_len);
344 		nmp = nmp->b_cont;
345 	}
346 
347 	ENA_DMA_SYNC(tcb->etcb_dma, DDI_DMA_SYNC_FORDEV);
348 
349 	VERIFY3P(tcb->etcb_mp, ==, NULL);
350 	tcb->etcb_mp = mp;
351 }
352 
353 static void
354 ena_fill_tx_data_desc(ena_txq_t *txq, ena_tx_control_block_t *tcb,
355     uint16_t req_id, uint8_t phase, enahw_tx_data_desc_t *desc,
356     mac_ether_offload_info_t *meo, size_t mlen)
357 {
358 	VERIFY3U(mlen, <=, ENAHW_TX_DESC_LENGTH_MASK);
359 
360 #ifdef DEBUG
361 	/*
362 	 * If there is no header for the specific layer it will be set
363 	 * to zero, thus we elide the meoi_flags check here.
364 	 */
365 	size_t hdr_len = meo->meoi_l2hlen + meo->meoi_l3hlen + meo->meoi_l4hlen;
366 	ASSERT3U(hdr_len, <=, txq->et_ena->ena_tx_max_hdr_len);
367 #endif
368 
369 	bzero(desc, sizeof (*desc));
370 	ENAHW_TX_DESC_FIRST_ON(desc);
371 	ENAHW_TX_DESC_LENGTH(desc, mlen);
372 	ENAHW_TX_DESC_REQID_HI(desc, req_id);
373 	ENAHW_TX_DESC_REQID_LO(desc, req_id);
374 	ENAHW_TX_DESC_PHASE(desc, phase);
375 	ENAHW_TX_DESC_DF_ON(desc);
376 	ENAHW_TX_DESC_LAST_ON(desc);
377 	ENAHW_TX_DESC_COMP_REQ_ON(desc);
378 	ENAHW_TX_DESC_META_DESC_OFF(desc);
379 	ENAHW_TX_DESC_ADDR_LO(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
380 	ENAHW_TX_DESC_ADDR_HI(desc, tcb->etcb_dma.edb_cookie->dmac_laddress);
381 	/*
382 	 * NOTE: Please see the block comment above
383 	 * etd_buff_addr_hi_hdr_sz to see why this is set to 0.
384 	 */
385 	ENAHW_TX_DESC_HEADER_LENGTH(desc, 0);
386 	ENAHW_TX_DESC_TSO_OFF(desc);
387 	ENAHW_TX_DESC_L3_CSUM_OFF(desc);
388 	ENAHW_TX_DESC_L4_CSUM_OFF(desc);
389 	/*
390 	 * Enabling this bit tells the device NOT to calculate the
391 	 * pseudo header checksum.
392 	 */
393 	ENAHW_TX_DESC_L4_CSUM_PARTIAL_ON(desc);
394 }
395 
396 static void
397 ena_submit_tx(ena_txq_t *txq, uint16_t desc_idx)
398 {
399 	ena_hw_abs_write32(txq->et_ena, txq->et_sq_db_addr, desc_idx);
400 }
401 
402 /*
403  * For now we do the simplest thing possible. All Tx uses bcopy to
404  * pre-allocated buffers, no checksum, no TSO, etc.
405  */
406 mblk_t *
407 ena_ring_tx(void *arg, mblk_t *mp)
408 {
409 	ena_txq_t *txq = arg;
410 	ena_t *ena = txq->et_ena;
411 	mac_ether_offload_info_t meo;
412 	enahw_tx_data_desc_t *desc;
413 	ena_tx_control_block_t *tcb;
414 	const uint16_t modulo_mask = txq->et_sq_num_descs - 1;
415 	uint16_t tail_mod;
416 
417 	VERIFY3P(mp->b_next, ==, NULL);
418 
419 	/*
420 	 * The ena_state value is written by atomic operations. The
421 	 * et_state value is currently Write Once, but if that changes
422 	 * it should also be written with atomics.
423 	 */
424 	if (!(ena->ena_state & ENA_STATE_STARTED) ||
425 	    !(txq->et_state & ENA_TXQ_STATE_RUNNING)) {
426 		freemsg(mp);
427 		return (NULL);
428 	}
429 
430 	if (mac_ether_offload_info(mp, &meo) != 0) {
431 		freemsg(mp);
432 		mutex_enter(&txq->et_stat_lock);
433 		txq->et_stat.ets_hck_meoifail.value.ui64++;
434 		mutex_exit(&txq->et_stat_lock);
435 		return (NULL);
436 	}
437 
438 	mutex_enter(&txq->et_lock);
439 
440 	/*
441 	 * For the moment there are an equal number of Tx descs and Tx
442 	 * contexts. Currently Tx is copy only, and each context buffer is
443 	 * guaranteed to be as large as MTU + frame header, see
444 	 * ena_update_buf_sizes().
445 	 */
446 	if (txq->et_blocked || txq->et_sq_avail_descs == 0) {
447 		txq->et_blocked = true;
448 		mutex_enter(&txq->et_stat_lock);
449 		txq->et_stat.ets_blocked.value.ui64++;
450 		mutex_exit(&txq->et_stat_lock);
451 		mutex_exit(&txq->et_lock);
452 		return (mp);
453 	}
454 
455 	ASSERT3U(meo.meoi_len, <=, ena->ena_max_frame_total);
456 
457 	/*
458 	 * There are as many pre-allocated TCBs as there are Tx descs so we
459 	 * should never fail to get one.
460 	 */
461 	tcb = ena_tcb_alloc(txq);
462 	ASSERT3P(tcb, !=, NULL);
463 	ena_tcb_pull(txq, tcb, mp);
464 
465 	/* Fill in the Tx descriptor. */
466 	tail_mod = txq->et_sq_tail_idx & modulo_mask;
467 	desc = &txq->et_sq_descs[tail_mod].etd_data;
468 	ena_fill_tx_data_desc(txq, tcb, tcb->etcb_id, txq->et_sq_phase, desc,
469 	    &meo, meo.meoi_len);
470 	DTRACE_PROBE3(tx__submit, ena_tx_control_block_t *, tcb, uint16_t,
471 	    tcb->etcb_id, enahw_tx_data_desc_t *, desc);
472 
473 	txq->et_sq_avail_descs--;
474 
475 	/*
476 	 * Remember, we submit the raw tail value to the device, the
477 	 * hardware performs its own modulo (like we did to get
478 	 * tail_mod).
479 	 */
480 	txq->et_sq_tail_idx++;
481 	ena_submit_tx(txq, txq->et_sq_tail_idx);
482 
483 	mutex_enter(&txq->et_stat_lock);
484 	txq->et_stat.ets_packets.value.ui64++;
485 	txq->et_stat.ets_bytes.value.ui64 += meo.meoi_len;
486 	mutex_exit(&txq->et_stat_lock);
487 
488 	if ((txq->et_sq_tail_idx & modulo_mask) == 0)
489 		txq->et_sq_phase ^= 1;
490 
491 	mutex_exit(&txq->et_lock);
492 
493 	return (NULL);
494 }
495 
496 void
497 ena_tx_intr_work(ena_txq_t *txq)
498 {
499 	uint16_t head_mod;
500 	enahw_tx_cdesc_t *cdesc;
501 	ena_tx_control_block_t *tcb;
502 	uint16_t req_id;
503 	uint64_t recycled = 0;
504 	bool unblocked = false;
505 	const uint16_t modulo_mask = txq->et_cq_num_descs - 1;
506 	ena_t *ena = txq->et_ena;
507 
508 	mutex_enter(&txq->et_lock);
509 	head_mod = txq->et_cq_head_idx & modulo_mask;
510 	ENA_DMA_SYNC(txq->et_cq_dma, DDI_DMA_SYNC_FORKERNEL);
511 	cdesc = &txq->et_cq_descs[head_mod];
512 
513 	/* Recycle any completed descriptors. */
514 	while (ENAHW_TX_CDESC_GET_PHASE(cdesc) == txq->et_cq_phase) {
515 		mblk_t *mp;
516 
517 		/* Get the corresponding TCB. */
518 		req_id = cdesc->etc_req_id;
519 		if (req_id > txq->et_sq_num_descs) {
520 			ena_err(ena, "invalid Tx request ID: 0x%x", req_id);
521 			ena_trigger_reset(ena, ENAHW_RESET_INV_TX_REQ_ID);
522 			break;
523 		}
524 		tcb = &txq->et_tcbs[req_id];
525 		DTRACE_PROBE2(tx__complete, uint16_t, req_id,
526 		    ena_tx_control_block_t *, tcb);
527 
528 		/* Free the associated mblk. */
529 		tcb->etcb_dma.edb_used_len = 0;
530 		mp = tcb->etcb_mp;
531 		tcb->etcb_mp = NULL;
532 		VERIFY3P(mp, !=, NULL);
533 		freemsg(mp);
534 
535 		/* Add this descriptor back to the free list. */
536 		ena_tcb_free(txq, tcb);
537 		txq->et_sq_avail_descs++;
538 
539 		/* Move on and check for phase rollover. */
540 		txq->et_cq_head_idx++;
541 		head_mod = txq->et_cq_head_idx & modulo_mask;
542 		if (head_mod == 0)
543 			txq->et_cq_phase ^= 1;
544 
545 		if (txq->et_blocked) {
546 			txq->et_blocked = false;
547 			txq->et_stall_watchdog = 0;
548 			unblocked = true;
549 			mac_tx_ring_update(ena->ena_mh, txq->et_mrh);
550 		}
551 
552 		recycled++;
553 		cdesc = &txq->et_cq_descs[head_mod];
554 	}
555 
556 	mutex_exit(&txq->et_lock);
557 
558 	if (recycled == 0)
559 		return;
560 
561 	/* Update stats. */
562 	mutex_enter(&txq->et_stat_lock);
563 	txq->et_stat.ets_recycled.value.ui64 += recycled;
564 	if (unblocked) {
565 		txq->et_stat.ets_unblocked.value.ui64++;
566 	}
567 	mutex_exit(&txq->et_stat_lock);
568 }
569