1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/bus.h> 10 #include <sys/lock.h> 11 #include <sys/malloc.h> 12 #include <sys/mutex.h> 13 #include <sys/sysctl.h> 14 #include <dev/nvme/nvme.h> 15 #include <dev/nvmf/nvmf.h> 16 #include <dev/nvmf/nvmf_transport.h> 17 #include <dev/nvmf/host/nvmf_var.h> 18 19 struct nvmf_host_command { 20 struct nvmf_request *req; 21 TAILQ_ENTRY(nvmf_host_command) link; 22 uint16_t cid; 23 }; 24 25 struct nvmf_host_qpair { 26 struct nvmf_softc *sc; 27 struct nvmf_qpair *qp; 28 29 bool sq_flow_control; 30 bool shutting_down; 31 u_int allocating; 32 u_int num_commands; 33 uint16_t sqhd; 34 uint16_t sqtail; 35 uint64_t submitted; 36 37 struct mtx lock; 38 39 TAILQ_HEAD(, nvmf_host_command) free_commands; 40 STAILQ_HEAD(, nvmf_request) pending_requests; 41 42 /* Indexed by cid. */ 43 struct nvmf_host_command **active_commands; 44 45 char name[16]; 46 struct sysctl_ctx_list sysctl_ctx; 47 }; 48 49 struct nvmf_request * 50 nvmf_allocate_request(struct nvmf_host_qpair *qp, void *sqe, 51 nvmf_request_complete_t *cb, void *cb_arg, int how) 52 { 53 struct nvmf_request *req; 54 struct nvmf_qpair *nq; 55 56 KASSERT(how == M_WAITOK || how == M_NOWAIT, 57 ("%s: invalid how", __func__)); 58 59 req = malloc(sizeof(*req), M_NVMF, how | M_ZERO); 60 if (req == NULL) 61 return (NULL); 62 63 mtx_lock(&qp->lock); 64 nq = qp->qp; 65 if (nq == NULL) { 66 mtx_unlock(&qp->lock); 67 free(req, M_NVMF); 68 return (NULL); 69 } 70 qp->allocating++; 71 MPASS(qp->allocating != 0); 72 mtx_unlock(&qp->lock); 73 74 req->qp = qp; 75 req->cb = cb; 76 req->cb_arg = cb_arg; 77 req->nc = nvmf_allocate_command(nq, sqe, how); 78 if (req->nc == NULL) { 79 free(req, M_NVMF); 80 req = NULL; 81 } 82 83 mtx_lock(&qp->lock); 84 qp->allocating--; 85 if (qp->allocating == 0 && qp->shutting_down) 86 wakeup(qp); 87 mtx_unlock(&qp->lock); 88 89 return (req); 90 } 91 92 static void 93 nvmf_abort_request(struct nvmf_request *req, uint16_t cid) 94 { 95 struct nvme_completion cqe; 96 97 memset(&cqe, 0, sizeof(cqe)); 98 cqe.cid = cid; 99 cqe.status = htole16(NVMEF(NVME_STATUS_SCT, NVME_SCT_PATH_RELATED) | 100 NVMEF(NVME_STATUS_SC, NVME_SC_COMMAND_ABORTED_BY_HOST)); 101 req->cb(req->cb_arg, &cqe); 102 } 103 104 void 105 nvmf_free_request(struct nvmf_request *req) 106 { 107 if (req->nc != NULL) 108 nvmf_free_capsule(req->nc); 109 free(req, M_NVMF); 110 } 111 112 static void 113 nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd) 114 { 115 struct nvmf_softc *sc = qp->sc; 116 struct nvme_command *sqe; 117 struct nvmf_capsule *nc; 118 uint16_t new_sqtail; 119 int error; 120 121 mtx_assert(&qp->lock, MA_OWNED); 122 123 qp->submitted++; 124 125 /* 126 * Update flow control tracking. This is just a sanity check. 127 * Since num_commands == qsize - 1, there can never be too 128 * many commands in flight. 129 */ 130 new_sqtail = (qp->sqtail + 1) % (qp->num_commands + 1); 131 KASSERT(new_sqtail != qp->sqhd, ("%s: qp %p is full", __func__, qp)); 132 qp->sqtail = new_sqtail; 133 mtx_unlock(&qp->lock); 134 135 nc = cmd->req->nc; 136 sqe = nvmf_capsule_sqe(nc); 137 138 /* 139 * NB: Don't bother byte-swapping the cid so that receive 140 * doesn't have to swap. 141 */ 142 sqe->cid = cmd->cid; 143 144 error = nvmf_transmit_capsule(nc); 145 if (error != 0) { 146 device_printf(sc->dev, 147 "failed to transmit capsule: %d, disconnecting\n", error); 148 nvmf_disconnect(sc); 149 return; 150 } 151 152 if (sc->ka_traffic) 153 atomic_store_int(&sc->ka_active_tx_traffic, 1); 154 } 155 156 static void 157 nvmf_qp_error(void *arg, int error) 158 { 159 struct nvmf_host_qpair *qp = arg; 160 struct nvmf_softc *sc = qp->sc; 161 162 /* Ignore simple close of queue pairs during shutdown. */ 163 if (!(sc->detaching && error == 0)) 164 device_printf(sc->dev, "error %d on %s, disconnecting\n", error, 165 qp->name); 166 nvmf_disconnect(sc); 167 } 168 169 static void 170 nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc) 171 { 172 struct nvmf_host_qpair *qp = arg; 173 struct nvmf_softc *sc = qp->sc; 174 struct nvmf_host_command *cmd; 175 struct nvmf_request *req; 176 const struct nvme_completion *cqe; 177 uint16_t cid; 178 179 cqe = nvmf_capsule_cqe(nc); 180 181 if (sc->ka_traffic) 182 atomic_store_int(&sc->ka_active_rx_traffic, 1); 183 184 /* 185 * NB: Don't bother byte-swapping the cid as transmit doesn't 186 * swap either. 187 */ 188 cid = cqe->cid; 189 190 if (cid > qp->num_commands) { 191 device_printf(sc->dev, 192 "received invalid CID %u, disconnecting\n", cid); 193 nvmf_disconnect(sc); 194 nvmf_free_capsule(nc); 195 return; 196 } 197 198 /* Update flow control tracking. */ 199 mtx_lock(&qp->lock); 200 if (qp->sq_flow_control) { 201 if (nvmf_sqhd_valid(nc)) 202 qp->sqhd = le16toh(cqe->sqhd); 203 } else { 204 /* 205 * If SQ FC is disabled, just advance the head for 206 * each response capsule received. 207 */ 208 qp->sqhd = (qp->sqhd + 1) % (qp->num_commands + 1); 209 } 210 211 /* 212 * If the queue has been shutdown due to an error, silently 213 * drop the response. 214 */ 215 if (qp->qp == NULL) { 216 device_printf(sc->dev, 217 "received completion for CID %u on shutdown %s\n", cid, 218 qp->name); 219 mtx_unlock(&qp->lock); 220 nvmf_free_capsule(nc); 221 return; 222 } 223 224 cmd = qp->active_commands[cid]; 225 if (cmd == NULL) { 226 mtx_unlock(&qp->lock); 227 device_printf(sc->dev, 228 "received completion for inactive CID %u, disconnecting\n", 229 cid); 230 nvmf_disconnect(sc); 231 nvmf_free_capsule(nc); 232 return; 233 } 234 235 KASSERT(cmd->cid == cid, ("%s: CID mismatch", __func__)); 236 req = cmd->req; 237 cmd->req = NULL; 238 if (STAILQ_EMPTY(&qp->pending_requests)) { 239 qp->active_commands[cid] = NULL; 240 TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link); 241 mtx_unlock(&qp->lock); 242 } else { 243 cmd->req = STAILQ_FIRST(&qp->pending_requests); 244 STAILQ_REMOVE_HEAD(&qp->pending_requests, link); 245 nvmf_dispatch_command(qp, cmd); 246 } 247 248 req->cb(req->cb_arg, cqe); 249 nvmf_free_capsule(nc); 250 nvmf_free_request(req); 251 } 252 253 static void 254 nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp, 255 bool admin, u_int qid) 256 { 257 struct sysctl_ctx_list *ctx = &qp->sysctl_ctx; 258 struct sysctl_oid *oid; 259 struct sysctl_oid_list *list; 260 char name[8]; 261 262 if (admin) { 263 oid = SYSCTL_ADD_NODE(ctx, 264 SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, 265 "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue"); 266 } else { 267 snprintf(name, sizeof(name), "%u", qid); 268 oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name, 269 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue"); 270 } 271 list = SYSCTL_CHILDREN(oid); 272 273 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD, 274 NULL, qp->num_commands + 1, "Number of entries in queue"); 275 SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd, 276 0, "Current head of submission queue (as observed by driver)"); 277 SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail, 278 0, "Current tail of submission queue (as observed by driver)"); 279 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD, 280 &qp->submitted, 0, "Number of commands submitted"); 281 } 282 283 struct nvmf_host_qpair * 284 nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, 285 struct nvmf_handoff_qpair_params *handoff, const char *name, u_int qid) 286 { 287 struct nvmf_host_command *cmd, *ncmd; 288 struct nvmf_host_qpair *qp; 289 u_int i; 290 291 qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO); 292 qp->sc = sc; 293 qp->sq_flow_control = handoff->sq_flow_control; 294 qp->sqhd = handoff->sqhd; 295 qp->sqtail = handoff->sqtail; 296 strlcpy(qp->name, name, sizeof(qp->name)); 297 mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF); 298 (void)sysctl_ctx_init(&qp->sysctl_ctx); 299 300 /* 301 * Allocate a spare command slot for each pending AER command 302 * on the admin queue. 303 */ 304 qp->num_commands = handoff->qsize - 1; 305 if (handoff->admin) 306 qp->num_commands += sc->num_aer; 307 308 qp->active_commands = malloc(sizeof(*qp->active_commands) * 309 qp->num_commands, M_NVMF, M_WAITOK | M_ZERO); 310 TAILQ_INIT(&qp->free_commands); 311 for (i = 0; i < qp->num_commands; i++) { 312 cmd = malloc(sizeof(*cmd), M_NVMF, M_WAITOK | M_ZERO); 313 cmd->cid = i; 314 TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link); 315 } 316 STAILQ_INIT(&qp->pending_requests); 317 318 qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error, 319 qp, nvmf_receive_capsule, qp); 320 if (qp->qp == NULL) { 321 (void)sysctl_ctx_free(&qp->sysctl_ctx); 322 TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { 323 TAILQ_REMOVE(&qp->free_commands, cmd, link); 324 free(cmd, M_NVMF); 325 } 326 free(qp->active_commands, M_NVMF); 327 mtx_destroy(&qp->lock); 328 free(qp, M_NVMF); 329 return (NULL); 330 } 331 332 nvmf_sysctls_qp(sc, qp, handoff->admin, qid); 333 334 return (qp); 335 } 336 337 void 338 nvmf_shutdown_qp(struct nvmf_host_qpair *qp) 339 { 340 struct nvmf_host_command *cmd; 341 struct nvmf_request *req; 342 struct nvmf_qpair *nq; 343 344 mtx_lock(&qp->lock); 345 nq = qp->qp; 346 qp->qp = NULL; 347 348 if (nq == NULL) { 349 while (qp->shutting_down) 350 mtx_sleep(qp, &qp->lock, 0, "nvmfqpsh", 0); 351 mtx_unlock(&qp->lock); 352 return; 353 } 354 qp->shutting_down = true; 355 while (qp->allocating != 0) 356 mtx_sleep(qp, &qp->lock, 0, "nvmfqpqu", 0); 357 mtx_unlock(&qp->lock); 358 359 nvmf_free_qpair(nq); 360 361 /* 362 * Abort outstanding requests. Active requests will have 363 * their I/O completions invoked and associated capsules freed 364 * by the transport layer via nvmf_free_qpair. Pending 365 * requests must have their I/O completion invoked via 366 * nvmf_abort_capsule_data. 367 */ 368 for (u_int i = 0; i < qp->num_commands; i++) { 369 cmd = qp->active_commands[i]; 370 if (cmd != NULL) { 371 if (!cmd->req->aer) 372 printf("%s: aborted active command %p (CID %u)\n", 373 __func__, cmd->req, cmd->cid); 374 375 /* This was freed by nvmf_free_qpair. */ 376 cmd->req->nc = NULL; 377 nvmf_abort_request(cmd->req, cmd->cid); 378 nvmf_free_request(cmd->req); 379 free(cmd, M_NVMF); 380 } 381 } 382 while (!STAILQ_EMPTY(&qp->pending_requests)) { 383 req = STAILQ_FIRST(&qp->pending_requests); 384 STAILQ_REMOVE_HEAD(&qp->pending_requests, link); 385 if (!req->aer) 386 printf("%s: aborted pending command %p\n", __func__, 387 req); 388 nvmf_abort_capsule_data(req->nc, ECONNABORTED); 389 nvmf_abort_request(req, 0); 390 nvmf_free_request(req); 391 } 392 393 mtx_lock(&qp->lock); 394 qp->shutting_down = false; 395 mtx_unlock(&qp->lock); 396 wakeup(qp); 397 } 398 399 void 400 nvmf_destroy_qp(struct nvmf_host_qpair *qp) 401 { 402 struct nvmf_host_command *cmd, *ncmd; 403 404 nvmf_shutdown_qp(qp); 405 (void)sysctl_ctx_free(&qp->sysctl_ctx); 406 407 TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { 408 TAILQ_REMOVE(&qp->free_commands, cmd, link); 409 free(cmd, M_NVMF); 410 } 411 free(qp->active_commands, M_NVMF); 412 mtx_destroy(&qp->lock); 413 free(qp, M_NVMF); 414 } 415 416 void 417 nvmf_submit_request(struct nvmf_request *req) 418 { 419 struct nvmf_host_qpair *qp; 420 struct nvmf_host_command *cmd; 421 422 qp = req->qp; 423 mtx_lock(&qp->lock); 424 if (qp->qp == NULL) { 425 mtx_unlock(&qp->lock); 426 printf("%s: aborted pending command %p\n", __func__, req); 427 nvmf_abort_capsule_data(req->nc, ECONNABORTED); 428 nvmf_abort_request(req, 0); 429 nvmf_free_request(req); 430 return; 431 } 432 cmd = TAILQ_FIRST(&qp->free_commands); 433 if (cmd == NULL) { 434 /* 435 * Queue this request. Will be sent after enough 436 * in-flight requests have completed. 437 */ 438 STAILQ_INSERT_TAIL(&qp->pending_requests, req, link); 439 mtx_unlock(&qp->lock); 440 return; 441 } 442 443 TAILQ_REMOVE(&qp->free_commands, cmd, link); 444 KASSERT(qp->active_commands[cmd->cid] == NULL, 445 ("%s: CID already busy", __func__)); 446 qp->active_commands[cmd->cid] = cmd; 447 cmd->req = req; 448 nvmf_dispatch_command(qp, cmd); 449 } 450