xref: /freebsd/sys/dev/apple_bce/apple_bce_queue.c (revision 255538cd906045095d0c2113ae6c4731ce36c0cf)
1 /*
2  * Copyright (c) 2026 Abdelkader Boudih <freebsd@seuros.com>
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Apple BCE queue management -- CQ, SQ, and command queue operations.
7  */
8 
9 #include <sys/param.h>
10 #include <sys/bus.h>
11 #include <sys/kernel.h>
12 #include <sys/lock.h>
13 #include <sys/malloc.h>
14 #include <sys/mutex.h>
15 #include <sys/sema.h>
16 #include <sys/systm.h>
17 #include <machine/bus.h>
18 #include <machine/atomic.h>
19 
20 #include "apple_bce.h"
21 #include "apple_bce_queue.h"
22 
23 static MALLOC_DEFINE(M_BCE, "apple_bce", "Apple BCE driver");
24 
25 #define BCE_CMDQ_TIMEOUT_MS	5000
26 
27 /*
28  * DMA callback for bus_dmamap_load.
29  */
30 struct bce_dma_cb_arg {
31 	bus_addr_t	addr;
32 	int		error;
33 };
34 
35 static int
36 bce_cmdq_wait(struct bce_queue_cmdq *cmdq, uint32_t slot,
37     struct bce_queue_cmdq_result *res)
38 {
39 	int error;
40 
41 	error = sema_timedwait(&res->cmpl, hz * BCE_CMDQ_TIMEOUT_MS / 1000);
42 	if (error == 0)
43 		return (0);
44 
45 	mtx_lock(&cmdq->lck);
46 	if (cmdq->tres[slot] == res)
47 		cmdq->tres[slot] = NULL;
48 	mtx_unlock(&cmdq->lck);
49 
50 	return (ETIMEDOUT);
51 }
52 
53 static void
54 bce_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
55 {
56 	struct bce_dma_cb_arg *cb = arg;
57 
58 	cb->error = error;
59 	if (error == 0)
60 		cb->addr = segs[0].ds_addr;
61 }
62 
63 /*
64  * Allocate a completion queue with DMA-coherent memory.
65  */
66 struct bce_queue_cq *
67 bce_alloc_cq(struct apple_bce_softc *sc, int qid, uint32_t el_count)
68 {
69 	struct bce_queue_cq *cq;
70 	struct bce_dma_cb_arg cb;
71 	int error;
72 
73 	cq = malloc(sizeof(*cq), M_BCE, M_WAITOK | M_ZERO);
74 	cq->qid = qid;
75 	cq->el_count = el_count;
76 	cq->index = 0;
77 
78 	error = bus_dma_tag_create(sc->sc_dma_tag,
79 	    4, 0,			/* alignment, boundary */
80 	    BUS_SPACE_MAXADDR,		/* lowaddr */
81 	    BUS_SPACE_MAXADDR,		/* highaddr */
82 	    NULL, NULL,			/* filter */
83 	    el_count * sizeof(struct bce_qe_completion),
84 	    1,				/* nsegments */
85 	    el_count * sizeof(struct bce_qe_completion),
86 	    BUS_DMA_WAITOK,		/* flags */
87 	    NULL, NULL,			/* lockfunc */
88 	    &cq->dma_tag);
89 	if (error) {
90 		free(cq, M_BCE);
91 		return (NULL);
92 	}
93 
94 	error = bus_dmamem_alloc(cq->dma_tag, (void **)&cq->data,
95 	    BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &cq->dma_map);
96 	if (error) {
97 		bus_dma_tag_destroy(cq->dma_tag);
98 		free(cq, M_BCE);
99 		return (NULL);
100 	}
101 
102 	error = bus_dmamap_load(cq->dma_tag, cq->dma_map, cq->data,
103 	    el_count * sizeof(struct bce_qe_completion),
104 	    bce_dma_cb, &cb, BUS_DMA_WAITOK);
105 	if (error || cb.error) {
106 		bus_dmamem_free(cq->dma_tag, cq->data, cq->dma_map);
107 		bus_dma_tag_destroy(cq->dma_tag);
108 		free(cq, M_BCE);
109 		return (NULL);
110 	}
111 	cq->dma_addr = cb.addr;
112 
113 	return (cq);
114 }
115 
116 void
117 bce_free_cq(struct apple_bce_softc *sc, struct bce_queue_cq *cq)
118 {
119 	if (cq == NULL)
120 		return;
121 	bus_dmamap_unload(cq->dma_tag, cq->dma_map);
122 	bus_dmamem_free(cq->dma_tag, cq->data, cq->dma_map);
123 	bus_dma_tag_destroy(cq->dma_tag);
124 	free(cq, M_BCE);
125 }
126 
127 void
128 bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg)
129 {
130 	cfg->qid = (uint16_t)cq->qid;
131 	cfg->el_count = (uint16_t)cq->el_count;
132 	cfg->vector_or_cq = 0;
133 	cfg->_pad = 0;
134 	cfg->addr = cq->dma_addr;
135 	cfg->length = cq->el_count * sizeof(struct bce_qe_completion);
136 }
137 
138 /*
139  * Process completions from a CQ and dispatch to the target SQs.
140  */
141 void
142 bce_handle_cq_completions(struct apple_bce_softc *sc, struct bce_queue_cq *cq)
143 {
144 	struct bce_qe_completion *e;
145 	struct bce_queue_sq *sq;
146 	int i;
147 	bus_dmamap_sync(cq->dma_tag, cq->dma_map, BUS_DMASYNC_POSTREAD);
148 
149 	e = bce_cq_element(cq, cq->index);
150 	if (!(e->flags & BCE_CQ_FLAG_PENDING))
151 		return;
152 
153 	while ((e = bce_cq_element(cq, cq->index))->flags &
154 	    BCE_CQ_FLAG_PENDING) {
155 		/* Route completion to target SQ (skip qid 0 which is the CQ) */
156 		if (e->qid > 0 && e->qid < BCE_MAX_QUEUE_COUNT) {
157 			sq = (struct bce_queue_sq *)sc->sc_queues[e->qid];
158 			if (sq != NULL &&
159 			    e->completion_index < sq->el_count) {
160 				sq->completion_data[e->completion_index].status =
161 				    e->status;
162 				sq->completion_data[e->completion_index].data_size =
163 				    e->data_size;
164 				sq->completion_data[e->completion_index].result =
165 				    e->result;
166 				/* Advance tail so completion callback sees it */
167 				sq->completion_tail =
168 				    (e->completion_index + 1) % sq->el_count;
169 				sq->has_pending = 1;
170 			}
171 		}
172 
173 		e->flags = 0;
174 		cq->index = (cq->index + 1) % cq->el_count;
175 	}
176 
177 	bus_dmamap_sync(cq->dma_tag, cq->dma_map, BUS_DMASYNC_PREWRITE);
178 
179 	/* Ring doorbell with updated consumer index */
180 	bus_write_4(sc->sc_bar2, BCE_REG_DOORBELL_BASE + cq->qid * 4,
181 	    cq->index);
182 
183 	/* Fire callbacks on SQs that received completions */
184 	for (i = 0; i < BCE_MAX_QUEUE_COUNT; i++) {
185 		sq = sc->sc_int_sq_list[i];
186 		if (sq != NULL && sq->has_pending) {
187 			sq->has_pending = 0;
188 			if (sq->completion != NULL)
189 				sq->completion(sq);
190 		}
191 	}
192 }
193 
194 /*
195  * Allocate a submission queue with DMA-coherent memory.
196  */
197 struct bce_queue_sq *
198 bce_alloc_sq(struct apple_bce_softc *sc, int qid, uint32_t el_size,
199     uint32_t el_count, bce_sq_completion_fn compl, void *userdata)
200 {
201 	struct bce_queue_sq *sq;
202 	struct bce_dma_cb_arg cb;
203 	int error;
204 
205 	sq = malloc(sizeof(*sq), M_BCE, M_WAITOK | M_ZERO);
206 	sq->qid = qid;
207 	sq->el_size = el_size;
208 	sq->el_count = el_count;
209 	sq->completion = compl;
210 	sq->userdata = userdata;
211 	sq->available_commands = el_count - 1;
212 
213 	error = bus_dma_tag_create(sc->sc_dma_tag,
214 	    4, 0,
215 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
216 	    NULL, NULL,
217 	    el_count * el_size, 1, el_count * el_size,
218 	    BUS_DMA_WAITOK,
219 	    NULL, NULL,
220 	    &sq->dma_tag);
221 	if (error) {
222 		free(sq, M_BCE);
223 		return (NULL);
224 	}
225 
226 	error = bus_dmamem_alloc(sq->dma_tag, &sq->data,
227 	    BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sq->dma_map);
228 	if (error) {
229 		bus_dma_tag_destroy(sq->dma_tag);
230 		free(sq, M_BCE);
231 		return (NULL);
232 	}
233 
234 	error = bus_dmamap_load(sq->dma_tag, sq->dma_map, sq->data,
235 	    el_count * el_size, bce_dma_cb, &cb, BUS_DMA_WAITOK);
236 	if (error || cb.error) {
237 		bus_dmamem_free(sq->dma_tag, sq->data, sq->dma_map);
238 		bus_dma_tag_destroy(sq->dma_tag);
239 		free(sq, M_BCE);
240 		return (NULL);
241 	}
242 	sq->dma_addr = cb.addr;
243 
244 	sq->completion_data = malloc(sizeof(*sq->completion_data) * el_count,
245 	    M_BCE, M_WAITOK | M_ZERO);
246 
247 	return (sq);
248 }
249 
250 void
251 bce_free_sq(struct apple_bce_softc *sc, struct bce_queue_sq *sq)
252 {
253 	if (sq == NULL)
254 		return;
255 	free(sq->completion_data, M_BCE);
256 	bus_dmamap_unload(sq->dma_tag, sq->dma_map);
257 	bus_dmamem_free(sq->dma_tag, sq->data, sq->dma_map);
258 	bus_dma_tag_destroy(sq->dma_tag);
259 	free(sq, M_BCE);
260 }
261 
262 void
263 bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq,
264     struct bce_queue_memcfg *cfg)
265 {
266 	cfg->qid = (uint16_t)sq->qid;
267 	cfg->el_count = (uint16_t)sq->el_count;
268 	cfg->vector_or_cq = (uint16_t)cq->qid;
269 	cfg->_pad = 0;
270 	cfg->addr = sq->dma_addr;
271 	cfg->length = sq->el_count * sq->el_size;
272 }
273 
274 int
275 bce_reserve_submission(struct bce_queue_sq *sq)
276 {
277 	int old, new;
278 
279 	do {
280 		old = atomic_load_int(&sq->available_commands);
281 		if (old <= 0)
282 			return (EAGAIN);
283 		new = old - 1;
284 	} while (!atomic_cmpset_int(&sq->available_commands, old, new));
285 
286 	return (0);
287 }
288 
289 void *
290 bce_next_submission(struct bce_queue_sq *sq)
291 {
292 	void *ret;
293 
294 	ret = bce_sq_element(sq, sq->tail);
295 	sq->tail = (sq->tail + 1) % sq->el_count;
296 	return (ret);
297 }
298 
299 void
300 bce_submit_to_device(struct apple_bce_softc *sc, struct bce_queue_sq *sq)
301 {
302 	bus_dmamap_sync(sq->dma_tag, sq->dma_map, BUS_DMASYNC_PREWRITE);
303 	bus_write_4(sc->sc_bar2, BCE_REG_DOORBELL_BASE + sq->qid * 4,
304 	    sq->tail);
305 }
306 
307 void
308 bce_notify_submission_complete(struct bce_queue_sq *sq)
309 {
310 	sq->head = (sq->head + 1) % sq->el_count;
311 	atomic_add_int(&sq->available_commands, 1);
312 }
313 
314 /*
315  * Command queue -- wraps an SQ for synchronous control operations.
316  */
317 static void	bce_cmdq_completion(struct bce_queue_sq *sq);
318 
319 struct bce_queue_cmdq *
320 bce_alloc_cmdq(struct apple_bce_softc *sc, struct bce_queue_sq *sq)
321 {
322 	struct bce_queue_cmdq *cmdq;
323 
324 	cmdq = malloc(sizeof(*cmdq), M_BCE, M_WAITOK | M_ZERO);
325 	cmdq->sq = sq;
326 	mtx_init(&cmdq->lck, "bce_cmdq", NULL, MTX_DEF);
327 	cmdq->tres = malloc(sizeof(void *) * sq->el_count, M_BCE,
328 	    M_WAITOK | M_ZERO);
329 
330 	/* Wire up completion callback */
331 	sq->completion = bce_cmdq_completion;
332 	sq->userdata = cmdq;
333 
334 	return (cmdq);
335 }
336 
337 void
338 bce_free_cmdq(struct bce_queue_cmdq *cmdq)
339 {
340 	if (cmdq == NULL)
341 		return;
342 	mtx_destroy(&cmdq->lck);
343 	free(cmdq->tres, M_BCE);
344 	free(cmdq, M_BCE);
345 }
346 
347 /*
348  * Command queue completion callback -- wake waiters.
349  */
350 static void
351 bce_cmdq_completion(struct bce_queue_sq *sq)
352 {
353 	struct bce_queue_cmdq *cmdq = sq->userdata;
354 	struct bce_queue_cmdq_result *res;
355 
356 	mtx_lock(&cmdq->lck);
357 	while (sq->completion_cidx != sq->completion_tail) {
358 		struct bce_sq_completion_data *cd;
359 
360 		cd = &sq->completion_data[sq->completion_cidx];
361 		res = cmdq->tres[sq->completion_cidx];
362 		if (res != NULL) {
363 			res->status = cd->status;
364 			res->result = cd->result;
365 			sema_post(&res->cmpl);
366 			cmdq->tres[sq->completion_cidx] = NULL;
367 		}
368 		sq->completion_cidx = (sq->completion_cidx + 1) %
369 		    sq->el_count;
370 		bce_notify_submission_complete(sq);
371 	}
372 	mtx_unlock(&cmdq->lck);
373 }
374 
375 uint32_t
376 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq,
377     struct apple_bce_softc *sc, struct bce_queue_memcfg *cfg,
378     const char *name, int isdirout)
379 {
380 	struct bce_queue_cmdq_result res;
381 	struct bce_cmdq_reg_cmd *cmd;
382 	uint32_t slot;
383 	int error;
384 
385 	sema_init(&res.cmpl, 0, "bce_cmd");
386 
387 	if (bce_reserve_submission(cmdq->sq) != 0) {
388 		sema_destroy(&res.cmpl);
389 		return (EAGAIN);
390 	}
391 
392 	mtx_lock(&cmdq->lck);
393 	slot = cmdq->sq->tail;
394 	cmdq->tres[slot] = &res;
395 	cmd = bce_next_submission(cmdq->sq);
396 
397 	memset(cmd, 0, BCE_CMD_SIZE);
398 	cmd->cmd = BCE_CMD_REGISTER_QUEUE;
399 	cmd->flags = (name ? BCE_CMDQ_FLAG_NAMED : 0) |
400 	    (isdirout ? BCE_CMDQ_FLAG_OUT : 0);
401 	cmd->qid = cfg->qid;
402 	cmd->el_count = cfg->el_count;
403 	cmd->vector_or_cq = cfg->vector_or_cq;
404 	if (name != NULL) {
405 		cmd->name_len = (uint16_t)MIN(strlen(name), sizeof(cmd->name));
406 		memcpy(cmd->name, name, cmd->name_len);
407 	}
408 	cmd->addr = cfg->addr;
409 	cmd->length = cfg->length;
410 
411 	bce_submit_to_device(sc, cmdq->sq);
412 	mtx_unlock(&cmdq->lck);
413 
414 	error = bce_cmdq_wait(cmdq, slot, &res);
415 	sema_destroy(&res.cmpl);
416 	if (error != 0)
417 		return (error);
418 
419 	return (res.status);
420 }
421 
422 uint32_t
423 bce_cmd_unregister_queue(struct bce_queue_cmdq *cmdq,
424     struct apple_bce_softc *sc, int qid)
425 {
426 	struct bce_queue_cmdq_result res;
427 	struct bce_cmdq_simple_cmd *cmd;
428 	uint32_t slot;
429 	int error;
430 
431 	sema_init(&res.cmpl, 0, "bce_cmd");
432 
433 	if (bce_reserve_submission(cmdq->sq) != 0) {
434 		sema_destroy(&res.cmpl);
435 		return (EAGAIN);
436 	}
437 
438 	mtx_lock(&cmdq->lck);
439 	slot = cmdq->sq->tail;
440 	cmdq->tres[slot] = &res;
441 	cmd = bce_next_submission(cmdq->sq);
442 
443 	memset(cmd, 0, BCE_CMD_SIZE);
444 	cmd->cmd = BCE_CMD_UNREGISTER_QUEUE;
445 	cmd->qid = (uint16_t)qid;
446 
447 	bce_submit_to_device(sc, cmdq->sq);
448 	mtx_unlock(&cmdq->lck);
449 
450 	error = bce_cmdq_wait(cmdq, slot, &res);
451 	sema_destroy(&res.cmpl);
452 	if (error != 0)
453 		return (error);
454 
455 	return (res.status);
456 }
457 
458 uint32_t
459 bce_cmd_flush_queue(struct bce_queue_cmdq *cmdq,
460     struct apple_bce_softc *sc, int qid)
461 {
462 	struct bce_queue_cmdq_result res;
463 	struct bce_cmdq_simple_cmd *cmd;
464 	uint32_t slot;
465 	int error;
466 
467 	sema_init(&res.cmpl, 0, "bce_cmd");
468 
469 	if (bce_reserve_submission(cmdq->sq) != 0) {
470 		sema_destroy(&res.cmpl);
471 		return (EAGAIN);
472 	}
473 
474 	mtx_lock(&cmdq->lck);
475 	slot = cmdq->sq->tail;
476 	cmdq->tres[slot] = &res;
477 	cmd = bce_next_submission(cmdq->sq);
478 
479 	memset(cmd, 0, BCE_CMD_SIZE);
480 	cmd->cmd = BCE_CMD_FLUSH_QUEUE;
481 	cmd->qid = (uint16_t)qid;
482 
483 	bce_submit_to_device(sc, cmdq->sq);
484 	mtx_unlock(&cmdq->lck);
485 
486 	error = bce_cmdq_wait(cmdq, slot, &res);
487 	sema_destroy(&res.cmpl);
488 	if (error != 0)
489 		return (error);
490 
491 	return (res.status);
492 }
493