1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/bus.h> 10 #include <sys/lock.h> 11 #include <sys/malloc.h> 12 #include <sys/mutex.h> 13 #include <sys/sysctl.h> 14 #include <dev/nvme/nvme.h> 15 #include <dev/nvmf/nvmf.h> 16 #include <dev/nvmf/nvmf_transport.h> 17 #include <dev/nvmf/host/nvmf_var.h> 18 19 struct nvmf_host_command { 20 struct nvmf_request *req; 21 TAILQ_ENTRY(nvmf_host_command) link; 22 uint16_t cid; 23 }; 24 25 struct nvmf_host_qpair { 26 struct nvmf_softc *sc; 27 struct nvmf_qpair *qp; 28 29 bool sq_flow_control; 30 bool shutting_down; 31 u_int allocating; 32 u_int num_commands; 33 uint16_t sqhd; 34 uint16_t sqtail; 35 uint64_t submitted; 36 37 struct mtx lock; 38 39 TAILQ_HEAD(, nvmf_host_command) free_commands; 40 STAILQ_HEAD(, nvmf_request) pending_requests; 41 42 /* Indexed by cid. */ 43 struct nvmf_host_command **active_commands; 44 45 char name[16]; 46 struct sysctl_ctx_list sysctl_ctx; 47 }; 48 49 struct nvmf_request * 50 nvmf_allocate_request(struct nvmf_host_qpair *qp, void *sqe, 51 nvmf_request_complete_t *cb, void *cb_arg, int how) 52 { 53 struct nvmf_request *req; 54 struct nvmf_qpair *nq; 55 56 KASSERT(how == M_WAITOK || how == M_NOWAIT, 57 ("%s: invalid how", __func__)); 58 59 req = malloc(sizeof(*req), M_NVMF, how | M_ZERO); 60 if (req == NULL) 61 return (NULL); 62 63 mtx_lock(&qp->lock); 64 nq = qp->qp; 65 if (nq == NULL) { 66 mtx_unlock(&qp->lock); 67 free(req, M_NVMF); 68 return (NULL); 69 } 70 qp->allocating++; 71 MPASS(qp->allocating != 0); 72 mtx_unlock(&qp->lock); 73 74 req->qp = qp; 75 req->cb = cb; 76 req->cb_arg = cb_arg; 77 req->nc = nvmf_allocate_command(nq, sqe, how); 78 if (req->nc == NULL) { 79 free(req, M_NVMF); 80 req = NULL; 81 } 82 83 mtx_lock(&qp->lock); 84 qp->allocating--; 85 if (qp->allocating == 0 && qp->shutting_down) 86 wakeup(qp); 87 mtx_unlock(&qp->lock); 88 89 return (req); 90 } 91 92 static void 93 nvmf_abort_request(struct nvmf_request *req, uint16_t cid) 94 { 95 struct nvme_completion cqe; 96 97 memset(&cqe, 0, sizeof(cqe)); 98 cqe.cid = cid; 99 cqe.status = htole16(NVMEF(NVME_STATUS_SCT, NVME_SCT_PATH_RELATED) | 100 NVMEF(NVME_STATUS_SC, NVME_SC_COMMAND_ABORTED_BY_HOST)); 101 req->cb(req->cb_arg, &cqe); 102 } 103 104 void 105 nvmf_free_request(struct nvmf_request *req) 106 { 107 if (req->nc != NULL) 108 nvmf_free_capsule(req->nc); 109 free(req, M_NVMF); 110 } 111 112 static void 113 nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd) 114 { 115 struct nvmf_softc *sc = qp->sc; 116 struct nvme_command *sqe; 117 struct nvmf_capsule *nc; 118 int error; 119 120 nc = cmd->req->nc; 121 sqe = nvmf_capsule_sqe(nc); 122 123 /* 124 * NB: Don't bother byte-swapping the cid so that receive 125 * doesn't have to swap. 126 */ 127 sqe->cid = cmd->cid; 128 129 error = nvmf_transmit_capsule(nc); 130 if (error != 0) { 131 device_printf(sc->dev, 132 "failed to transmit capsule: %d, disconnecting\n", error); 133 nvmf_disconnect(sc); 134 return; 135 } 136 137 if (sc->ka_traffic) 138 atomic_store_int(&sc->ka_active_tx_traffic, 1); 139 } 140 141 static void 142 nvmf_qp_error(void *arg, int error) 143 { 144 struct nvmf_host_qpair *qp = arg; 145 struct nvmf_softc *sc = qp->sc; 146 147 /* Ignore simple close of queue pairs during shutdown. */ 148 if (!(sc->detaching && error == 0)) 149 device_printf(sc->dev, "error %d on %s, disconnecting\n", error, 150 qp->name); 151 nvmf_disconnect(sc); 152 } 153 154 static void 155 nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc) 156 { 157 struct nvmf_host_qpair *qp = arg; 158 struct nvmf_softc *sc = qp->sc; 159 struct nvmf_host_command *cmd; 160 struct nvmf_request *req; 161 const struct nvme_completion *cqe; 162 uint16_t cid; 163 164 cqe = nvmf_capsule_cqe(nc); 165 166 if (sc->ka_traffic) 167 atomic_store_int(&sc->ka_active_rx_traffic, 1); 168 169 /* 170 * NB: Don't bother byte-swapping the cid as transmit doesn't 171 * swap either. 172 */ 173 cid = cqe->cid; 174 175 if (cid > qp->num_commands) { 176 device_printf(sc->dev, 177 "received invalid CID %u, disconnecting\n", cid); 178 nvmf_disconnect(sc); 179 nvmf_free_capsule(nc); 180 return; 181 } 182 183 /* 184 * If the queue has been shutdown due to an error, silently 185 * drop the response. 186 */ 187 mtx_lock(&qp->lock); 188 if (qp->qp == NULL) { 189 device_printf(sc->dev, 190 "received completion for CID %u on shutdown %s\n", cid, 191 qp->name); 192 mtx_unlock(&qp->lock); 193 nvmf_free_capsule(nc); 194 return; 195 } 196 197 cmd = qp->active_commands[cid]; 198 if (cmd == NULL) { 199 mtx_unlock(&qp->lock); 200 device_printf(sc->dev, 201 "received completion for inactive CID %u, disconnecting\n", 202 cid); 203 nvmf_disconnect(sc); 204 nvmf_free_capsule(nc); 205 return; 206 } 207 208 KASSERT(cmd->cid == cid, ("%s: CID mismatch", __func__)); 209 req = cmd->req; 210 cmd->req = NULL; 211 if (STAILQ_EMPTY(&qp->pending_requests)) { 212 qp->active_commands[cid] = NULL; 213 TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link); 214 mtx_unlock(&qp->lock); 215 } else { 216 cmd->req = STAILQ_FIRST(&qp->pending_requests); 217 STAILQ_REMOVE_HEAD(&qp->pending_requests, link); 218 qp->submitted++; 219 mtx_unlock(&qp->lock); 220 nvmf_dispatch_command(qp, cmd); 221 } 222 223 req->cb(req->cb_arg, cqe); 224 nvmf_free_capsule(nc); 225 nvmf_free_request(req); 226 } 227 228 static void 229 nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp, 230 bool admin, u_int qid) 231 { 232 struct sysctl_ctx_list *ctx = &qp->sysctl_ctx; 233 struct sysctl_oid *oid; 234 struct sysctl_oid_list *list; 235 char name[8]; 236 237 if (admin) { 238 oid = SYSCTL_ADD_NODE(ctx, 239 SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, 240 "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue"); 241 } else { 242 snprintf(name, sizeof(name), "%u", qid); 243 oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name, 244 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue"); 245 } 246 list = SYSCTL_CHILDREN(oid); 247 248 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD, 249 NULL, qp->num_commands + 1, "Number of entries in queue"); 250 SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd, 251 0, "Current head of submission queue (as observed by driver)"); 252 SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail, 253 0, "Current tail of submission queue (as observed by driver)"); 254 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD, 255 &qp->submitted, 0, "Number of commands submitted"); 256 } 257 258 struct nvmf_host_qpair * 259 nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype, 260 struct nvmf_handoff_qpair_params *handoff, const char *name, u_int qid) 261 { 262 struct nvmf_host_command *cmd, *ncmd; 263 struct nvmf_host_qpair *qp; 264 u_int i; 265 266 qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO); 267 qp->sc = sc; 268 qp->sq_flow_control = handoff->sq_flow_control; 269 qp->sqhd = handoff->sqhd; 270 qp->sqtail = handoff->sqtail; 271 strlcpy(qp->name, name, sizeof(qp->name)); 272 mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF); 273 (void)sysctl_ctx_init(&qp->sysctl_ctx); 274 275 /* 276 * Allocate a spare command slot for each pending AER command 277 * on the admin queue. 278 */ 279 qp->num_commands = handoff->qsize - 1; 280 if (handoff->admin) 281 qp->num_commands += sc->num_aer; 282 283 qp->active_commands = malloc(sizeof(*qp->active_commands) * 284 qp->num_commands, M_NVMF, M_WAITOK | M_ZERO); 285 TAILQ_INIT(&qp->free_commands); 286 for (i = 0; i < qp->num_commands; i++) { 287 cmd = malloc(sizeof(*cmd), M_NVMF, M_WAITOK | M_ZERO); 288 cmd->cid = i; 289 TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link); 290 } 291 STAILQ_INIT(&qp->pending_requests); 292 293 qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error, 294 qp, nvmf_receive_capsule, qp); 295 if (qp->qp == NULL) { 296 (void)sysctl_ctx_free(&qp->sysctl_ctx); 297 TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { 298 TAILQ_REMOVE(&qp->free_commands, cmd, link); 299 free(cmd, M_NVMF); 300 } 301 free(qp->active_commands, M_NVMF); 302 mtx_destroy(&qp->lock); 303 free(qp, M_NVMF); 304 return (NULL); 305 } 306 307 nvmf_sysctls_qp(sc, qp, handoff->admin, qid); 308 309 return (qp); 310 } 311 312 void 313 nvmf_shutdown_qp(struct nvmf_host_qpair *qp) 314 { 315 struct nvmf_host_command *cmd; 316 struct nvmf_request *req; 317 struct nvmf_qpair *nq; 318 319 mtx_lock(&qp->lock); 320 nq = qp->qp; 321 qp->qp = NULL; 322 323 if (nq == NULL) { 324 while (qp->shutting_down) 325 mtx_sleep(qp, &qp->lock, 0, "nvmfqpsh", 0); 326 mtx_unlock(&qp->lock); 327 return; 328 } 329 qp->shutting_down = true; 330 while (qp->allocating != 0) 331 mtx_sleep(qp, &qp->lock, 0, "nvmfqpqu", 0); 332 mtx_unlock(&qp->lock); 333 334 nvmf_free_qpair(nq); 335 336 /* 337 * Abort outstanding requests. Active requests will have 338 * their I/O completions invoked and associated capsules freed 339 * by the transport layer via nvmf_free_qpair. Pending 340 * requests must have their I/O completion invoked via 341 * nvmf_abort_capsule_data. 342 */ 343 for (u_int i = 0; i < qp->num_commands; i++) { 344 cmd = qp->active_commands[i]; 345 if (cmd != NULL) { 346 if (!cmd->req->aer) 347 printf("%s: aborted active command %p (CID %u)\n", 348 __func__, cmd->req, cmd->cid); 349 350 /* This was freed by nvmf_free_qpair. */ 351 cmd->req->nc = NULL; 352 nvmf_abort_request(cmd->req, cmd->cid); 353 nvmf_free_request(cmd->req); 354 free(cmd, M_NVMF); 355 } 356 } 357 while (!STAILQ_EMPTY(&qp->pending_requests)) { 358 req = STAILQ_FIRST(&qp->pending_requests); 359 STAILQ_REMOVE_HEAD(&qp->pending_requests, link); 360 if (!req->aer) 361 printf("%s: aborted pending command %p\n", __func__, 362 req); 363 nvmf_abort_capsule_data(req->nc, ECONNABORTED); 364 nvmf_abort_request(req, 0); 365 nvmf_free_request(req); 366 } 367 368 mtx_lock(&qp->lock); 369 qp->shutting_down = false; 370 mtx_unlock(&qp->lock); 371 wakeup(qp); 372 } 373 374 void 375 nvmf_destroy_qp(struct nvmf_host_qpair *qp) 376 { 377 struct nvmf_host_command *cmd, *ncmd; 378 379 nvmf_shutdown_qp(qp); 380 (void)sysctl_ctx_free(&qp->sysctl_ctx); 381 382 TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) { 383 TAILQ_REMOVE(&qp->free_commands, cmd, link); 384 free(cmd, M_NVMF); 385 } 386 free(qp->active_commands, M_NVMF); 387 mtx_destroy(&qp->lock); 388 free(qp, M_NVMF); 389 } 390 391 void 392 nvmf_submit_request(struct nvmf_request *req) 393 { 394 struct nvmf_host_qpair *qp; 395 struct nvmf_host_command *cmd; 396 397 qp = req->qp; 398 mtx_lock(&qp->lock); 399 if (qp->qp == NULL) { 400 mtx_unlock(&qp->lock); 401 printf("%s: aborted pending command %p\n", __func__, req); 402 nvmf_abort_capsule_data(req->nc, ECONNABORTED); 403 nvmf_abort_request(req, 0); 404 nvmf_free_request(req); 405 return; 406 } 407 cmd = TAILQ_FIRST(&qp->free_commands); 408 if (cmd == NULL) { 409 /* 410 * Queue this request. Will be sent after enough 411 * in-flight requests have completed. 412 */ 413 STAILQ_INSERT_TAIL(&qp->pending_requests, req, link); 414 mtx_unlock(&qp->lock); 415 return; 416 } 417 418 TAILQ_REMOVE(&qp->free_commands, cmd, link); 419 KASSERT(qp->active_commands[cmd->cid] == NULL, 420 ("%s: CID already busy", __func__)); 421 qp->active_commands[cmd->cid] = cmd; 422 cmd->req = req; 423 qp->submitted++; 424 mtx_unlock(&qp->lock); 425 nvmf_dispatch_command(qp, cmd); 426 } 427