1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/bus.h> 10 #include <sys/lock.h> 11 #include <sys/malloc.h> 12 #include <sys/mutex.h> 13 #include <sys/nv.h> 14 #include <sys/sysctl.h> 15 #include <dev/nvme/nvme.h> 16 #include <dev/nvmf/nvmf.h> 17 #include <dev/nvmf/nvmf_transport.h> 18 #include <dev/nvmf/host/nvmf_var.h> 19 20 struct nvmf_host_command { 21 struct nvmf_request *req; 22 TAILQ_ENTRY(nvmf_host_command) link; 23 uint16_t cid; 24 }; 25 26 struct nvmf_host_qpair { 27 struct nvmf_softc *sc; 28 struct nvmf_qpair *qp; 29 30 bool sq_flow_control; 31 bool shutting_down; 32 u_int allocating; 33 u_int num_commands; 34 uint16_t sqhd; 35 uint16_t sqtail; 36 uint64_t submitted; 37 38 struct mtx lock; 39 40 TAILQ_HEAD(, nvmf_host_command) free_commands; 41 STAILQ_HEAD(, nvmf_request) pending_requests; 42 43 /* Indexed by cid. */ 44 struct nvmf_host_command **active_commands; 45 46 char name[16]; 47 struct sysctl_ctx_list sysctl_ctx; 48 }; 49 50 struct nvmf_request * 51 nvmf_allocate_request(struct nvmf_host_qpair *qp, void *sqe, 52 nvmf_request_complete_t *cb, void *cb_arg, int how) 53 { 54 struct nvmf_request *req; 55 struct nvmf_qpair *nq; 56 57 KASSERT(how == M_WAITOK || how == M_NOWAIT, 58 ("%s: invalid how", __func__)); 59 60 req = malloc(sizeof(*req), M_NVMF, how | M_ZERO); 61 if (req == NULL) 62 return (NULL); 63 64 mtx_lock(&qp->lock); 65 nq = qp->qp; 66 if (nq == NULL) { 67 mtx_unlock(&qp->lock); 68 free(req, M_NVMF); 69 return (NULL); 70 } 71 qp->allocating++; 72 MPASS(qp->allocating != 0); 73 mtx_unlock(&qp->lock); 74 75 req->qp = qp; 76 req->cb = cb; 77 req->cb_arg = cb_arg; 78 req->nc = nvmf_allocate_command(nq, sqe, how); 79 if (req->nc == NULL) { 80 free(req, M_NVMF); 81 req = NULL; 82 } 83 84 mtx_lock(&qp->lock); 85 qp->allocating--; 86 if (qp->allocating == 0 && qp->shutting_down) 87 wakeup(qp); 88 mtx_unlock(&qp->lock); 89 90 return (req); 91 } 92 93 static void 94 nvmf_abort_request(struct nvmf_request *req, uint16_t cid) 95 { 96 struct nvme_completion cqe; 97 98 memset(&cqe, 0, sizeof(cqe)); 99 cqe.cid = cid; 100 cqe.status = htole16(NVMEF(NVME_STATUS_SCT, NVME_SCT_PATH_RELATED) | 101 NVMEF(NVME_STATUS_SC, NVME_SC_COMMAND_ABORTED_BY_HOST)); 102 req->cb(req->cb_arg, &cqe); 103 } 104 105 void 106 nvmf_free_request(struct nvmf_request *req) 107 { 108 if (req->nc != NULL) 109 nvmf_free_capsule(req->nc); 110 free(req, M_NVMF); 111 } 112 113 static void 114 nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd) 115 { 116 struct nvmf_softc *sc = qp->sc; 117 struct nvme_command *sqe; 118 struct nvmf_capsule *nc; 119 uint16_t new_sqtail; 120 int error; 121 122 mtx_assert(&qp->lock, MA_OWNED); 123 124 qp->submitted++; 125 126 /* 127 * Update flow control tracking. This is just a sanity check. 128 * Since num_commands == qsize - 1, there can never be too 129 * many commands in flight. 130 */ 131 new_sqtail = (qp->sqtail + 1) % (qp->num_commands + 1); 132 KASSERT(new_sqtail != qp->sqhd, ("%s: qp %p is full", __func__, qp)); 133 qp->sqtail = new_sqtail; 134 mtx_unlock(&qp->lock); 135 136 nc = cmd->req->nc; 137 sqe = nvmf_capsule_sqe(nc); 138 139 /* 140 * NB: Don't bother byte-swapping the cid so that receive 141 * doesn't have to swap. 142 */ 143 sqe->cid = cmd->cid; 144 145 error = nvmf_transmit_capsule(nc); 146 if (error != 0) { 147 device_printf(sc->dev, 148 "failed to transmit capsule: %d, disconnecting\n", error); 149 nvmf_disconnect(sc); 150 return; 151 } 152 153 if (sc->ka_traffic) 154 atomic_store_int(&sc->ka_active_tx_traffic, 1); 155 } 156 157 static void 158 nvmf_qp_error(void *arg, int error) 159 { 160 struct nvmf_host_qpair *qp = arg; 161 struct nvmf_softc *sc = qp->sc; 162 163 /* Ignore simple close of queue pairs during shutdown. */ 164 if (!(sc->detaching && error == 0)) 165 device_printf(sc->dev, "error %d on %s, disconnecting\n", error, 166 qp->name); 167 nvmf_disconnect(sc); 168 } 169 170 static void 171 nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc) 172 { 173 struct nvmf_host_qpair *qp = arg; 174 struct nvmf_softc *sc = qp->sc; 175 struct nvmf_host_command *cmd; 176 struct nvmf_request *req; 177 const struct nvme_completion *cqe; 178 uint16_t cid; 179 180 cqe = nvmf_capsule_cqe(nc); 181 182 if (sc->ka_traffic) 183 atomic_store_int(&sc->ka_active_rx_traffic, 1); 184 185 /* 186 * NB: Don't bother byte-swapping the cid as transmit doesn't 187 * swap either. 188 */ 189 cid = cqe->cid; 190 191 if (cid > qp->num_commands) { 192 device_printf(sc->dev, 193 "received invalid CID %u, disconnecting\n", cid); 194 nvmf_disconnect(sc); 195 nvmf_free_capsule(nc); 196 return; 197 } 198 199 /* Update flow control tracking. */ 200 mtx_lock(&qp->lock); 201 if (qp->sq_flow_control) { 202 if (nvmf_sqhd_valid(nc)) 203 qp->sqhd = le16toh(cqe->sqhd); 204 } else { 205 /* 206 * If SQ FC is disabled, just advance the head for 207 * each response capsule received. 208 */ 209 qp->sqhd = (qp->sqhd + 1) % (qp->num_commands + 1); 210 } 211 212 /* 213 * If the queue has been shutdown due to an error, silently 214 * drop the response. 215 */ 216 if (qp->qp == NULL) { 217 device_printf(sc->dev, 218 "received completion for CID %u on shutdown %s\n", cid, 219 qp->name); 220 mtx_unlock(&qp->lock); 221 nvmf_free_capsule(nc); 222 return; 223 } 224 225 cmd = qp->active_commands[cid]; 226 if (cmd == NULL) { 227 mtx_unlock(&qp->lock); 228 device_printf(sc->dev, 229 "received completion for inactive CID %u, disconnecting\n", 230 cid); 231 nvmf_disconnect(sc); 232 nvmf_free_capsule(nc); 233 return; 234 } 235 236 KASSERT(cmd->cid == cid, ("%s: CID mismatch", __func__)); 237 req = cmd->req; 238 cmd->req = NULL; 239 if (STAILQ_EMPTY(&qp->pending_requests)) { 240 qp->active_commands[cid] = NULL; 241 TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link); 242 mtx_unlock(&qp->lock); 243 } else { 244 cmd->req = STAILQ_FIRST(&qp->pending_requests); 245 STAILQ_REMOVE_HEAD(&qp->pending_requests, link); 246 nvmf_dispatch_command(qp, cmd); 247 } 248 249 req->cb(req->cb_arg, cqe); 250 nvmf_free_capsule(nc); 251 nvmf_free_request(req); 252 } 253 254 static void 255 nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp, 256 bool admin, u_int qid) 257 { 258 struct sysctl_ctx_list *ctx = &qp->sysctl_ctx; 259 struct sysctl_oid *oid; 260 struct sysctl_oid_list *list; 261 char name[8]; 262 263 if (admin) { 264 oid = SYSCTL_ADD_NODE(ctx, 265 SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, 266 "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue"); 267 } else { 268 snprintf(name, sizeof(name), "%u", qid); 269 oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name, 270 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue"); 271 } 272 list = SYSCTL_CHILDREN(oid); 273 274 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD, 275 NULL, qp->num_commands + 1, "Number of entries in queue"); 276 SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd, 277 0, "Current head of submission queue (as observed by driver)"); 278 SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail, 279 0, "Current tail of submission queue (as observed by driver)"); 280 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD, 281 &qp->submitted, 0, "Number of commands submitted"); 282 } 283 284 struct nvmf_host_qpair * 285 nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, 286 const nvlist_t *nvl, const char *name, u_int qid) 287 { 288 struct nvmf_host_command *cmd, *ncmd; 289 struct nvmf_host_qpair *qp; 290 u_int i; 291 bool admin; 292 293 admin = nvlist_get_bool(nvl, "admin"); 294 qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO); 295 qp->sc = sc; 296 qp->sq_flow_control = nvlist_get_bool(nvl, "sq_flow_control"); 297 qp->sqhd = nvlist_get_number(nvl, "sqhd"); 298 qp->sqtail = nvlist_get_number(nvl, "sqtail"); 299 strlcpy(qp->name, name, sizeof(qp->name)); 300 mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF); 301 (void)sysctl_ctx_init(&qp->sysctl_ctx); 302 303 /* 304 * Allocate a spare command slot for each pending AER command 305 * on the admin queue. 306 */ 307 qp->num_commands = nvlist_get_number(nvl, "qsize") - 1; 308 if (admin) 309 qp->num_commands += sc->num_aer; 310 311 qp->active_commands = malloc(sizeof(*qp->active_commands) * 312 qp->num_commands, M_NVMF, M_WAITOK | M_ZERO); 313 TAILQ_INIT(&qp->free_commands); 314 for (i = 0; i < qp->num_commands; i++) { 315 cmd = malloc(sizeof(*cmd), M_NVMF, M_WAITOK | M_ZERO); 316 cmd->cid = i; 317 TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link); 318 } 319 STAILQ_INIT(&qp->pending_requests); 320 321 qp->qp = nvmf_allocate_qpair(trtype, false, nvl, nvmf_qp_error, qp, 322 nvmf_receive_capsule, qp); 323 if (qp->qp == NULL) { 324 (void)sysctl_ctx_free(&qp->sysctl_ctx); 325 TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { 326 TAILQ_REMOVE(&qp->free_commands, cmd, link); 327 free(cmd, M_NVMF); 328 } 329 free(qp->active_commands, M_NVMF); 330 mtx_destroy(&qp->lock); 331 free(qp, M_NVMF); 332 return (NULL); 333 } 334 335 nvmf_sysctls_qp(sc, qp, admin, qid); 336 337 return (qp); 338 } 339 340 void 341 nvmf_shutdown_qp(struct nvmf_host_qpair *qp) 342 { 343 struct nvmf_host_command *cmd; 344 struct nvmf_request *req; 345 struct nvmf_qpair *nq; 346 347 mtx_lock(&qp->lock); 348 nq = qp->qp; 349 qp->qp = NULL; 350 351 if (nq == NULL) { 352 while (qp->shutting_down) 353 mtx_sleep(qp, &qp->lock, 0, "nvmfqpsh", 0); 354 mtx_unlock(&qp->lock); 355 return; 356 } 357 qp->shutting_down = true; 358 while (qp->allocating != 0) 359 mtx_sleep(qp, &qp->lock, 0, "nvmfqpqu", 0); 360 mtx_unlock(&qp->lock); 361 362 nvmf_free_qpair(nq); 363 364 /* 365 * Abort outstanding requests. Active requests will have 366 * their I/O completions invoked and associated capsules freed 367 * by the transport layer via nvmf_free_qpair. Pending 368 * requests must have their I/O completion invoked via 369 * nvmf_abort_capsule_data. 370 */ 371 for (u_int i = 0; i < qp->num_commands; i++) { 372 cmd = qp->active_commands[i]; 373 if (cmd != NULL) { 374 if (!cmd->req->aer) 375 printf("%s: aborted active command %p (CID %u)\n", 376 __func__, cmd->req, cmd->cid); 377 378 /* This was freed by nvmf_free_qpair. */ 379 cmd->req->nc = NULL; 380 nvmf_abort_request(cmd->req, cmd->cid); 381 nvmf_free_request(cmd->req); 382 free(cmd, M_NVMF); 383 } 384 } 385 while (!STAILQ_EMPTY(&qp->pending_requests)) { 386 req = STAILQ_FIRST(&qp->pending_requests); 387 STAILQ_REMOVE_HEAD(&qp->pending_requests, link); 388 if (!req->aer) 389 printf("%s: aborted pending command %p\n", __func__, 390 req); 391 nvmf_abort_capsule_data(req->nc, ECONNABORTED); 392 nvmf_abort_request(req, 0); 393 nvmf_free_request(req); 394 } 395 396 mtx_lock(&qp->lock); 397 qp->shutting_down = false; 398 mtx_unlock(&qp->lock); 399 wakeup(qp); 400 } 401 402 void 403 nvmf_destroy_qp(struct nvmf_host_qpair *qp) 404 { 405 struct nvmf_host_command *cmd, *ncmd; 406 407 nvmf_shutdown_qp(qp); 408 (void)sysctl_ctx_free(&qp->sysctl_ctx); 409 410 TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { 411 TAILQ_REMOVE(&qp->free_commands, cmd, link); 412 free(cmd, M_NVMF); 413 } 414 free(qp->active_commands, M_NVMF); 415 mtx_destroy(&qp->lock); 416 free(qp, M_NVMF); 417 } 418 419 void 420 nvmf_submit_request(struct nvmf_request *req) 421 { 422 struct nvmf_host_qpair *qp; 423 struct nvmf_host_command *cmd; 424 425 qp = req->qp; 426 mtx_lock(&qp->lock); 427 if (qp->qp == NULL) { 428 mtx_unlock(&qp->lock); 429 printf("%s: aborted pending command %p\n", __func__, req); 430 nvmf_abort_capsule_data(req->nc, ECONNABORTED); 431 nvmf_abort_request(req, 0); 432 nvmf_free_request(req); 433 return; 434 } 435 cmd = TAILQ_FIRST(&qp->free_commands); 436 if (cmd == NULL) { 437 /* 438 * Queue this request. Will be sent after enough 439 * in-flight requests have completed. 440 */ 441 STAILQ_INSERT_TAIL(&qp->pending_requests, req, link); 442 mtx_unlock(&qp->lock); 443 return; 444 } 445 446 TAILQ_REMOVE(&qp->free_commands, cmd, link); 447 KASSERT(qp->active_commands[cmd->cid] == NULL, 448 ("%s: CID already busy", __func__)); 449 qp->active_commands[cmd->cid] = cmd; 450 cmd->req = req; 451 nvmf_dispatch_command(qp, cmd); 452 } 453