1 /* 2 * Copyright (c) 2026 Abdelkader Boudih <freebsd@seuros.com> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Apple BCE queue management -- CQ, SQ, and command queue operations. 7 */ 8 9 #include <sys/param.h> 10 #include <sys/bus.h> 11 #include <sys/kernel.h> 12 #include <sys/lock.h> 13 #include <sys/malloc.h> 14 #include <sys/mutex.h> 15 #include <sys/sema.h> 16 #include <sys/systm.h> 17 #include <machine/bus.h> 18 #include <machine/atomic.h> 19 20 #include "apple_bce.h" 21 #include "apple_bce_queue.h" 22 23 static MALLOC_DEFINE(M_BCE, "apple_bce", "Apple BCE driver"); 24 25 #define BCE_CMDQ_TIMEOUT_MS 5000 26 27 /* 28 * DMA callback for bus_dmamap_load. 29 */ 30 struct bce_dma_cb_arg { 31 bus_addr_t addr; 32 int error; 33 }; 34 35 static int 36 bce_cmdq_wait(struct bce_queue_cmdq *cmdq, uint32_t slot, 37 struct bce_queue_cmdq_result *res) 38 { 39 int error; 40 41 error = sema_timedwait(&res->cmpl, hz * BCE_CMDQ_TIMEOUT_MS / 1000); 42 if (error == 0) 43 return (0); 44 45 mtx_lock(&cmdq->lck); 46 if (cmdq->tres[slot] == res) 47 cmdq->tres[slot] = NULL; 48 mtx_unlock(&cmdq->lck); 49 50 return (ETIMEDOUT); 51 } 52 53 static void 54 bce_dma_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 55 { 56 struct bce_dma_cb_arg *cb = arg; 57 58 cb->error = error; 59 if (error == 0) 60 cb->addr = segs[0].ds_addr; 61 } 62 63 /* 64 * Allocate a completion queue with DMA-coherent memory. 65 */ 66 struct bce_queue_cq * 67 bce_alloc_cq(struct apple_bce_softc *sc, int qid, uint32_t el_count) 68 { 69 struct bce_queue_cq *cq; 70 struct bce_dma_cb_arg cb; 71 int error; 72 73 cq = malloc(sizeof(*cq), M_BCE, M_WAITOK | M_ZERO); 74 cq->qid = qid; 75 cq->el_count = el_count; 76 cq->index = 0; 77 78 error = bus_dma_tag_create(sc->sc_dma_tag, 79 4, 0, /* alignment, boundary */ 80 BUS_SPACE_MAXADDR, /* lowaddr */ 81 BUS_SPACE_MAXADDR, /* highaddr */ 82 NULL, NULL, /* filter */ 83 el_count * sizeof(struct bce_qe_completion), 84 1, /* nsegments */ 85 el_count * sizeof(struct bce_qe_completion), 86 BUS_DMA_WAITOK, /* flags */ 87 NULL, NULL, /* lockfunc */ 88 &cq->dma_tag); 89 if (error) { 90 free(cq, M_BCE); 91 return (NULL); 92 } 93 94 error = bus_dmamem_alloc(cq->dma_tag, (void **)&cq->data, 95 BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &cq->dma_map); 96 if (error) { 97 bus_dma_tag_destroy(cq->dma_tag); 98 free(cq, M_BCE); 99 return (NULL); 100 } 101 102 error = bus_dmamap_load(cq->dma_tag, cq->dma_map, cq->data, 103 el_count * sizeof(struct bce_qe_completion), 104 bce_dma_cb, &cb, BUS_DMA_WAITOK); 105 if (error || cb.error) { 106 bus_dmamem_free(cq->dma_tag, cq->data, cq->dma_map); 107 bus_dma_tag_destroy(cq->dma_tag); 108 free(cq, M_BCE); 109 return (NULL); 110 } 111 cq->dma_addr = cb.addr; 112 113 return (cq); 114 } 115 116 void 117 bce_free_cq(struct apple_bce_softc *sc, struct bce_queue_cq *cq) 118 { 119 if (cq == NULL) 120 return; 121 bus_dmamap_unload(cq->dma_tag, cq->dma_map); 122 bus_dmamem_free(cq->dma_tag, cq->data, cq->dma_map); 123 bus_dma_tag_destroy(cq->dma_tag); 124 free(cq, M_BCE); 125 } 126 127 void 128 bce_get_cq_memcfg(struct bce_queue_cq *cq, struct bce_queue_memcfg *cfg) 129 { 130 cfg->qid = (uint16_t)cq->qid; 131 cfg->el_count = (uint16_t)cq->el_count; 132 cfg->vector_or_cq = 0; 133 cfg->_pad = 0; 134 cfg->addr = cq->dma_addr; 135 cfg->length = cq->el_count * sizeof(struct bce_qe_completion); 136 } 137 138 /* 139 * Process completions from a CQ and dispatch to the target SQs. 140 */ 141 void 142 bce_handle_cq_completions(struct apple_bce_softc *sc, struct bce_queue_cq *cq) 143 { 144 struct bce_qe_completion *e; 145 struct bce_queue_sq *sq; 146 int i; 147 bus_dmamap_sync(cq->dma_tag, cq->dma_map, BUS_DMASYNC_POSTREAD); 148 149 e = bce_cq_element(cq, cq->index); 150 if (!(e->flags & BCE_CQ_FLAG_PENDING)) 151 return; 152 153 while ((e = bce_cq_element(cq, cq->index))->flags & 154 BCE_CQ_FLAG_PENDING) { 155 /* Route completion to target SQ (skip qid 0 which is the CQ) */ 156 if (e->qid > 0 && e->qid < BCE_MAX_QUEUE_COUNT) { 157 sq = (struct bce_queue_sq *)sc->sc_queues[e->qid]; 158 if (sq != NULL && 159 e->completion_index < sq->el_count) { 160 sq->completion_data[e->completion_index].status = 161 e->status; 162 sq->completion_data[e->completion_index].data_size = 163 e->data_size; 164 sq->completion_data[e->completion_index].result = 165 e->result; 166 /* Advance tail so completion callback sees it */ 167 sq->completion_tail = 168 (e->completion_index + 1) % sq->el_count; 169 sq->has_pending = 1; 170 } 171 } 172 173 e->flags = 0; 174 cq->index = (cq->index + 1) % cq->el_count; 175 } 176 177 bus_dmamap_sync(cq->dma_tag, cq->dma_map, BUS_DMASYNC_PREWRITE); 178 179 /* Ring doorbell with updated consumer index */ 180 bus_write_4(sc->sc_bar2, BCE_REG_DOORBELL_BASE + cq->qid * 4, 181 cq->index); 182 183 /* Fire callbacks on SQs that received completions */ 184 for (i = 0; i < BCE_MAX_QUEUE_COUNT; i++) { 185 sq = sc->sc_int_sq_list[i]; 186 if (sq != NULL && sq->has_pending) { 187 sq->has_pending = 0; 188 if (sq->completion != NULL) 189 sq->completion(sq); 190 } 191 } 192 } 193 194 /* 195 * Allocate a submission queue with DMA-coherent memory. 196 */ 197 struct bce_queue_sq * 198 bce_alloc_sq(struct apple_bce_softc *sc, int qid, uint32_t el_size, 199 uint32_t el_count, bce_sq_completion_fn compl, void *userdata) 200 { 201 struct bce_queue_sq *sq; 202 struct bce_dma_cb_arg cb; 203 int error; 204 205 sq = malloc(sizeof(*sq), M_BCE, M_WAITOK | M_ZERO); 206 sq->qid = qid; 207 sq->el_size = el_size; 208 sq->el_count = el_count; 209 sq->completion = compl; 210 sq->userdata = userdata; 211 sq->available_commands = el_count - 1; 212 213 error = bus_dma_tag_create(sc->sc_dma_tag, 214 4, 0, 215 BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, 216 NULL, NULL, 217 el_count * el_size, 1, el_count * el_size, 218 BUS_DMA_WAITOK, 219 NULL, NULL, 220 &sq->dma_tag); 221 if (error) { 222 free(sq, M_BCE); 223 return (NULL); 224 } 225 226 error = bus_dmamem_alloc(sq->dma_tag, &sq->data, 227 BUS_DMA_WAITOK | BUS_DMA_ZERO | BUS_DMA_COHERENT, &sq->dma_map); 228 if (error) { 229 bus_dma_tag_destroy(sq->dma_tag); 230 free(sq, M_BCE); 231 return (NULL); 232 } 233 234 error = bus_dmamap_load(sq->dma_tag, sq->dma_map, sq->data, 235 el_count * el_size, bce_dma_cb, &cb, BUS_DMA_WAITOK); 236 if (error || cb.error) { 237 bus_dmamem_free(sq->dma_tag, sq->data, sq->dma_map); 238 bus_dma_tag_destroy(sq->dma_tag); 239 free(sq, M_BCE); 240 return (NULL); 241 } 242 sq->dma_addr = cb.addr; 243 244 sq->completion_data = malloc(sizeof(*sq->completion_data) * el_count, 245 M_BCE, M_WAITOK | M_ZERO); 246 247 return (sq); 248 } 249 250 void 251 bce_free_sq(struct apple_bce_softc *sc, struct bce_queue_sq *sq) 252 { 253 if (sq == NULL) 254 return; 255 free(sq->completion_data, M_BCE); 256 bus_dmamap_unload(sq->dma_tag, sq->dma_map); 257 bus_dmamem_free(sq->dma_tag, sq->data, sq->dma_map); 258 bus_dma_tag_destroy(sq->dma_tag); 259 free(sq, M_BCE); 260 } 261 262 void 263 bce_get_sq_memcfg(struct bce_queue_sq *sq, struct bce_queue_cq *cq, 264 struct bce_queue_memcfg *cfg) 265 { 266 cfg->qid = (uint16_t)sq->qid; 267 cfg->el_count = (uint16_t)sq->el_count; 268 cfg->vector_or_cq = (uint16_t)cq->qid; 269 cfg->_pad = 0; 270 cfg->addr = sq->dma_addr; 271 cfg->length = sq->el_count * sq->el_size; 272 } 273 274 int 275 bce_reserve_submission(struct bce_queue_sq *sq) 276 { 277 int old, new; 278 279 do { 280 old = atomic_load_int(&sq->available_commands); 281 if (old <= 0) 282 return (EAGAIN); 283 new = old - 1; 284 } while (!atomic_cmpset_int(&sq->available_commands, old, new)); 285 286 return (0); 287 } 288 289 void * 290 bce_next_submission(struct bce_queue_sq *sq) 291 { 292 void *ret; 293 294 ret = bce_sq_element(sq, sq->tail); 295 sq->tail = (sq->tail + 1) % sq->el_count; 296 return (ret); 297 } 298 299 void 300 bce_submit_to_device(struct apple_bce_softc *sc, struct bce_queue_sq *sq) 301 { 302 bus_dmamap_sync(sq->dma_tag, sq->dma_map, BUS_DMASYNC_PREWRITE); 303 bus_write_4(sc->sc_bar2, BCE_REG_DOORBELL_BASE + sq->qid * 4, 304 sq->tail); 305 } 306 307 void 308 bce_notify_submission_complete(struct bce_queue_sq *sq) 309 { 310 sq->head = (sq->head + 1) % sq->el_count; 311 atomic_add_int(&sq->available_commands, 1); 312 } 313 314 /* 315 * Command queue -- wraps an SQ for synchronous control operations. 316 */ 317 static void bce_cmdq_completion(struct bce_queue_sq *sq); 318 319 struct bce_queue_cmdq * 320 bce_alloc_cmdq(struct apple_bce_softc *sc, struct bce_queue_sq *sq) 321 { 322 struct bce_queue_cmdq *cmdq; 323 324 cmdq = malloc(sizeof(*cmdq), M_BCE, M_WAITOK | M_ZERO); 325 cmdq->sq = sq; 326 mtx_init(&cmdq->lck, "bce_cmdq", NULL, MTX_DEF); 327 cmdq->tres = malloc(sizeof(void *) * sq->el_count, M_BCE, 328 M_WAITOK | M_ZERO); 329 330 /* Wire up completion callback */ 331 sq->completion = bce_cmdq_completion; 332 sq->userdata = cmdq; 333 334 return (cmdq); 335 } 336 337 void 338 bce_free_cmdq(struct bce_queue_cmdq *cmdq) 339 { 340 if (cmdq == NULL) 341 return; 342 mtx_destroy(&cmdq->lck); 343 free(cmdq->tres, M_BCE); 344 free(cmdq, M_BCE); 345 } 346 347 /* 348 * Command queue completion callback -- wake waiters. 349 */ 350 static void 351 bce_cmdq_completion(struct bce_queue_sq *sq) 352 { 353 struct bce_queue_cmdq *cmdq = sq->userdata; 354 struct bce_queue_cmdq_result *res; 355 356 mtx_lock(&cmdq->lck); 357 while (sq->completion_cidx != sq->completion_tail) { 358 struct bce_sq_completion_data *cd; 359 360 cd = &sq->completion_data[sq->completion_cidx]; 361 res = cmdq->tres[sq->completion_cidx]; 362 if (res != NULL) { 363 res->status = cd->status; 364 res->result = cd->result; 365 sema_post(&res->cmpl); 366 cmdq->tres[sq->completion_cidx] = NULL; 367 } 368 sq->completion_cidx = (sq->completion_cidx + 1) % 369 sq->el_count; 370 bce_notify_submission_complete(sq); 371 } 372 mtx_unlock(&cmdq->lck); 373 } 374 375 uint32_t 376 bce_cmd_register_queue(struct bce_queue_cmdq *cmdq, 377 struct apple_bce_softc *sc, struct bce_queue_memcfg *cfg, 378 const char *name, int isdirout) 379 { 380 struct bce_queue_cmdq_result res; 381 struct bce_cmdq_reg_cmd *cmd; 382 uint32_t slot; 383 int error; 384 385 sema_init(&res.cmpl, 0, "bce_cmd"); 386 387 if (bce_reserve_submission(cmdq->sq) != 0) { 388 sema_destroy(&res.cmpl); 389 return (EAGAIN); 390 } 391 392 mtx_lock(&cmdq->lck); 393 slot = cmdq->sq->tail; 394 cmdq->tres[slot] = &res; 395 cmd = bce_next_submission(cmdq->sq); 396 397 memset(cmd, 0, BCE_CMD_SIZE); 398 cmd->cmd = BCE_CMD_REGISTER_QUEUE; 399 cmd->flags = (name ? BCE_CMDQ_FLAG_NAMED : 0) | 400 (isdirout ? BCE_CMDQ_FLAG_OUT : 0); 401 cmd->qid = cfg->qid; 402 cmd->el_count = cfg->el_count; 403 cmd->vector_or_cq = cfg->vector_or_cq; 404 if (name != NULL) { 405 cmd->name_len = (uint16_t)MIN(strlen(name), sizeof(cmd->name)); 406 memcpy(cmd->name, name, cmd->name_len); 407 } 408 cmd->addr = cfg->addr; 409 cmd->length = cfg->length; 410 411 bce_submit_to_device(sc, cmdq->sq); 412 mtx_unlock(&cmdq->lck); 413 414 error = bce_cmdq_wait(cmdq, slot, &res); 415 sema_destroy(&res.cmpl); 416 if (error != 0) 417 return (error); 418 419 return (res.status); 420 } 421 422 uint32_t 423 bce_cmd_unregister_queue(struct bce_queue_cmdq *cmdq, 424 struct apple_bce_softc *sc, int qid) 425 { 426 struct bce_queue_cmdq_result res; 427 struct bce_cmdq_simple_cmd *cmd; 428 uint32_t slot; 429 int error; 430 431 sema_init(&res.cmpl, 0, "bce_cmd"); 432 433 if (bce_reserve_submission(cmdq->sq) != 0) { 434 sema_destroy(&res.cmpl); 435 return (EAGAIN); 436 } 437 438 mtx_lock(&cmdq->lck); 439 slot = cmdq->sq->tail; 440 cmdq->tres[slot] = &res; 441 cmd = bce_next_submission(cmdq->sq); 442 443 memset(cmd, 0, BCE_CMD_SIZE); 444 cmd->cmd = BCE_CMD_UNREGISTER_QUEUE; 445 cmd->qid = (uint16_t)qid; 446 447 bce_submit_to_device(sc, cmdq->sq); 448 mtx_unlock(&cmdq->lck); 449 450 error = bce_cmdq_wait(cmdq, slot, &res); 451 sema_destroy(&res.cmpl); 452 if (error != 0) 453 return (error); 454 455 return (res.status); 456 } 457 458 uint32_t 459 bce_cmd_flush_queue(struct bce_queue_cmdq *cmdq, 460 struct apple_bce_softc *sc, int qid) 461 { 462 struct bce_queue_cmdq_result res; 463 struct bce_cmdq_simple_cmd *cmd; 464 uint32_t slot; 465 int error; 466 467 sema_init(&res.cmpl, 0, "bce_cmd"); 468 469 if (bce_reserve_submission(cmdq->sq) != 0) { 470 sema_destroy(&res.cmpl); 471 return (EAGAIN); 472 } 473 474 mtx_lock(&cmdq->lck); 475 slot = cmdq->sq->tail; 476 cmdq->tres[slot] = &res; 477 cmd = bce_next_submission(cmdq->sq); 478 479 memset(cmd, 0, BCE_CMD_SIZE); 480 cmd->cmd = BCE_CMD_FLUSH_QUEUE; 481 cmd->qid = (uint16_t)qid; 482 483 bce_submit_to_device(sc, cmdq->sq); 484 mtx_unlock(&cmdq->lck); 485 486 error = bce_cmdq_wait(cmdq, slot, &res); 487 sema_destroy(&res.cmpl); 488 if (error != 0) 489 return (error); 490 491 return (res.status); 492 } 493