1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/bus.h> 10 #include <sys/conf.h> 11 #include <sys/eventhandler.h> 12 #include <sys/lock.h> 13 #include <sys/kernel.h> 14 #include <sys/malloc.h> 15 #include <sys/memdesc.h> 16 #include <sys/module.h> 17 #include <sys/mutex.h> 18 #include <sys/reboot.h> 19 #include <sys/sx.h> 20 #include <sys/sysctl.h> 21 #include <sys/taskqueue.h> 22 #include <dev/nvme/nvme.h> 23 #include <dev/nvmf/nvmf.h> 24 #include <dev/nvmf/nvmf_transport.h> 25 #include <dev/nvmf/host/nvmf_var.h> 26 27 static struct cdevsw nvmf_cdevsw; 28 29 bool nvmf_fail_disconnect = false; 30 SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, 31 &nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure"); 32 33 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host"); 34 35 static void nvmf_disconnect_task(void *arg, int pending); 36 static void nvmf_shutdown_pre_sync(void *arg, int howto); 37 static void nvmf_shutdown_post_sync(void *arg, int howto); 38 39 void 40 nvmf_complete(void *arg, const struct nvme_completion *cqe) 41 { 42 struct nvmf_completion_status *status = arg; 43 struct mtx *mtx; 44 45 status->cqe = *cqe; 46 mtx = mtx_pool_find(mtxpool_sleep, status); 47 mtx_lock(mtx); 48 status->done = true; 49 mtx_unlock(mtx); 50 wakeup(status); 51 } 52 53 void 54 nvmf_io_complete(void *arg, size_t xfered, int error) 55 { 56 struct nvmf_completion_status *status = arg; 57 struct mtx *mtx; 58 59 status->io_error = error; 60 mtx = mtx_pool_find(mtxpool_sleep, status); 61 mtx_lock(mtx); 62 status->io_done = true; 63 mtx_unlock(mtx); 64 wakeup(status); 65 } 66 67 void 68 nvmf_wait_for_reply(struct nvmf_completion_status *status) 69 { 70 struct mtx *mtx; 71 72 mtx = mtx_pool_find(mtxpool_sleep, status); 73 mtx_lock(mtx); 74 while (!status->done || !status->io_done) 75 mtx_sleep(status, mtx, 0, "nvmfcmd", 0); 76 mtx_unlock(mtx); 77 } 78 79 static int 80 nvmf_read_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 81 uint64_t *value) 82 { 83 const struct nvmf_fabric_prop_get_rsp *rsp; 84 struct nvmf_completion_status status; 85 86 nvmf_status_init(&status); 87 if (!nvmf_cmd_get_property(sc, offset, size, nvmf_complete, &status, 88 M_WAITOK)) 89 return (ECONNABORTED); 90 nvmf_wait_for_reply(&status); 91 92 if (status.cqe.status != 0) { 93 device_printf(sc->dev, "PROPERTY_GET failed, status %#x\n", 94 le16toh(status.cqe.status)); 95 return (EIO); 96 } 97 98 rsp = (const struct nvmf_fabric_prop_get_rsp *)&status.cqe; 99 if (size == 8) 100 *value = le64toh(rsp->value.u64); 101 else 102 *value = le32toh(rsp->value.u32.low); 103 return (0); 104 } 105 106 static int 107 nvmf_write_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 108 uint64_t value) 109 { 110 struct nvmf_completion_status status; 111 112 nvmf_status_init(&status); 113 if (!nvmf_cmd_set_property(sc, offset, size, value, nvmf_complete, &status, 114 M_WAITOK)) 115 return (ECONNABORTED); 116 nvmf_wait_for_reply(&status); 117 118 if (status.cqe.status != 0) { 119 device_printf(sc->dev, "PROPERTY_SET failed, status %#x\n", 120 le16toh(status.cqe.status)); 121 return (EIO); 122 } 123 return (0); 124 } 125 126 static void 127 nvmf_shutdown_controller(struct nvmf_softc *sc) 128 { 129 uint64_t cc; 130 int error; 131 132 error = nvmf_read_property(sc, NVMF_PROP_CC, 4, &cc); 133 if (error != 0) { 134 device_printf(sc->dev, "Failed to fetch CC for shutdown\n"); 135 return; 136 } 137 138 cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL); 139 140 error = nvmf_write_property(sc, NVMF_PROP_CC, 4, cc); 141 if (error != 0) 142 device_printf(sc->dev, 143 "Failed to set CC to trigger shutdown\n"); 144 } 145 146 static void 147 nvmf_check_keep_alive(void *arg) 148 { 149 struct nvmf_softc *sc = arg; 150 int traffic; 151 152 traffic = atomic_readandclear_int(&sc->ka_active_rx_traffic); 153 if (traffic == 0) { 154 device_printf(sc->dev, 155 "disconnecting due to KeepAlive timeout\n"); 156 nvmf_disconnect(sc); 157 return; 158 } 159 160 callout_schedule_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, C_HARDCLOCK); 161 } 162 163 static void 164 nvmf_keep_alive_complete(void *arg, const struct nvme_completion *cqe) 165 { 166 struct nvmf_softc *sc = arg; 167 168 atomic_store_int(&sc->ka_active_rx_traffic, 1); 169 if (cqe->status != 0) { 170 device_printf(sc->dev, 171 "KeepAlive response reported status %#x\n", 172 le16toh(cqe->status)); 173 } 174 } 175 176 static void 177 nvmf_send_keep_alive(void *arg) 178 { 179 struct nvmf_softc *sc = arg; 180 int traffic; 181 182 /* 183 * Don't bother sending a KeepAlive command if TKAS is active 184 * and another command has been sent during the interval. 185 */ 186 traffic = atomic_load_int(&sc->ka_active_tx_traffic); 187 if (traffic == 0 && !nvmf_cmd_keep_alive(sc, nvmf_keep_alive_complete, 188 sc, M_NOWAIT)) 189 device_printf(sc->dev, 190 "Failed to allocate KeepAlive command\n"); 191 192 /* Clear ka_active_tx_traffic after sending the keep alive command. */ 193 atomic_store_int(&sc->ka_active_tx_traffic, 0); 194 195 callout_schedule_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, C_HARDCLOCK); 196 } 197 198 int 199 nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh) 200 { 201 size_t len; 202 u_int i; 203 int error; 204 205 memset(ivars, 0, sizeof(*ivars)); 206 207 if (!hh->admin.admin || hh->num_io_queues < 1) 208 return (EINVAL); 209 210 ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK); 211 error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata)); 212 if (error != 0) 213 goto out; 214 nvme_controller_data_swapbytes(ivars->cdata); 215 216 len = hh->num_io_queues * sizeof(*ivars->io_params); 217 ivars->io_params = malloc(len, M_NVMF, M_WAITOK); 218 error = copyin(hh->io, ivars->io_params, len); 219 if (error != 0) 220 goto out; 221 for (i = 0; i < hh->num_io_queues; i++) { 222 if (ivars->io_params[i].admin) { 223 error = EINVAL; 224 goto out; 225 } 226 227 /* Require all I/O queues to be the same size. */ 228 if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) { 229 error = EINVAL; 230 goto out; 231 } 232 } 233 234 ivars->hh = hh; 235 return (0); 236 237 out: 238 free(ivars->io_params, M_NVMF); 239 free(ivars->cdata, M_NVMF); 240 return (error); 241 } 242 243 void 244 nvmf_free_ivars(struct nvmf_ivars *ivars) 245 { 246 free(ivars->io_params, M_NVMF); 247 free(ivars->cdata, M_NVMF); 248 } 249 250 static int 251 nvmf_probe(device_t dev) 252 { 253 struct nvmf_ivars *ivars = device_get_ivars(dev); 254 255 if (ivars == NULL) 256 return (ENXIO); 257 258 device_set_descf(dev, "Fabrics: %.256s", ivars->cdata->subnqn); 259 return (BUS_PROBE_DEFAULT); 260 } 261 262 static int 263 nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) 264 { 265 char name[16]; 266 267 /* Setup the admin queue. */ 268 sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin, 269 "admin queue", 0); 270 if (sc->admin == NULL) { 271 device_printf(sc->dev, "Failed to setup admin queue\n"); 272 return (ENXIO); 273 } 274 275 /* Setup I/O queues. */ 276 sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF, 277 M_WAITOK | M_ZERO); 278 sc->num_io_queues = ivars->hh->num_io_queues; 279 for (u_int i = 0; i < sc->num_io_queues; i++) { 280 snprintf(name, sizeof(name), "I/O queue %u", i); 281 sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype, 282 &ivars->io_params[i], name, i); 283 if (sc->io[i] == NULL) { 284 device_printf(sc->dev, "Failed to setup I/O queue %u\n", 285 i + 1); 286 return (ENXIO); 287 } 288 } 289 290 /* Start KeepAlive timers. */ 291 if (ivars->hh->kato != 0) { 292 sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS, 293 sc->cdata->ctratt) != 0; 294 sc->ka_rx_sbt = mstosbt(ivars->hh->kato); 295 sc->ka_tx_sbt = sc->ka_rx_sbt / 2; 296 callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, 297 nvmf_check_keep_alive, sc, C_HARDCLOCK); 298 callout_reset_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, 299 nvmf_send_keep_alive, sc, C_HARDCLOCK); 300 } 301 302 return (0); 303 } 304 305 typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t, 306 const struct nvme_namespace_data *, void *); 307 308 static bool 309 nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, 310 struct nvme_namespace_data *data, uint32_t *nsidp, 311 nvmf_scan_active_ns_cb *cb, void *cb_arg) 312 { 313 struct nvmf_completion_status status; 314 uint32_t nsid; 315 316 nvmf_status_init(&status); 317 nvmf_status_wait_io(&status); 318 if (!nvmf_cmd_identify_active_namespaces(sc, *nsidp, nslist, 319 nvmf_complete, &status, nvmf_io_complete, &status, M_WAITOK)) { 320 device_printf(sc->dev, 321 "failed to send IDENTIFY active namespaces command\n"); 322 return (false); 323 } 324 nvmf_wait_for_reply(&status); 325 326 if (status.cqe.status != 0) { 327 device_printf(sc->dev, 328 "IDENTIFY active namespaces failed, status %#x\n", 329 le16toh(status.cqe.status)); 330 return (false); 331 } 332 333 if (status.io_error != 0) { 334 device_printf(sc->dev, 335 "IDENTIFY active namespaces failed with I/O error %d\n", 336 status.io_error); 337 return (false); 338 } 339 340 for (u_int i = 0; i < nitems(nslist->ns); i++) { 341 nsid = nslist->ns[i]; 342 if (nsid == 0) { 343 *nsidp = 0; 344 return (true); 345 } 346 347 nvmf_status_init(&status); 348 nvmf_status_wait_io(&status); 349 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 350 &status, nvmf_io_complete, &status, M_WAITOK)) { 351 device_printf(sc->dev, 352 "failed to send IDENTIFY namespace %u command\n", 353 nsid); 354 return (false); 355 } 356 nvmf_wait_for_reply(&status); 357 358 if (status.cqe.status != 0) { 359 device_printf(sc->dev, 360 "IDENTIFY namespace %u failed, status %#x\n", nsid, 361 le16toh(status.cqe.status)); 362 return (false); 363 } 364 365 if (status.io_error != 0) { 366 device_printf(sc->dev, 367 "IDENTIFY namespace %u failed with I/O error %d\n", 368 nsid, status.io_error); 369 return (false); 370 } 371 372 nvme_namespace_data_swapbytes(data); 373 if (!cb(sc, nsid, data, cb_arg)) 374 return (false); 375 } 376 377 MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0); 378 379 if (nsid >= NVME_GLOBAL_NAMESPACE_TAG - 1) 380 *nsidp = 0; 381 else 382 *nsidp = nsid; 383 return (true); 384 } 385 386 static bool 387 nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb, 388 void *cb_arg) 389 { 390 struct nvme_namespace_data *data; 391 struct nvme_ns_list *nslist; 392 uint32_t nsid; 393 bool retval; 394 395 nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK); 396 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 397 398 nsid = 0; 399 retval = true; 400 for (;;) { 401 if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb, 402 cb_arg)) { 403 retval = false; 404 break; 405 } 406 if (nsid == 0) 407 break; 408 } 409 410 free(data, M_NVMF); 411 free(nslist, M_NVMF); 412 return (retval); 413 } 414 415 static bool 416 nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid, 417 const struct nvme_namespace_data *data, void *arg __unused) 418 { 419 if (sc->ns[nsid - 1] != NULL) { 420 device_printf(sc->dev, 421 "duplicate namespace %u in active namespace list\n", 422 nsid); 423 return (false); 424 } 425 426 /* 427 * As in nvme_ns_construct, a size of zero indicates an 428 * invalid namespace. 429 */ 430 if (data->nsze == 0) { 431 device_printf(sc->dev, 432 "ignoring active namespace %u with zero size\n", nsid); 433 return (true); 434 } 435 436 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 437 438 nvmf_sim_rescan_ns(sc, nsid); 439 return (true); 440 } 441 442 static bool 443 nvmf_add_namespaces(struct nvmf_softc *sc) 444 { 445 sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF, 446 M_WAITOK | M_ZERO); 447 return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL)); 448 } 449 450 static int 451 nvmf_attach(device_t dev) 452 { 453 struct make_dev_args mda; 454 struct nvmf_softc *sc = device_get_softc(dev); 455 struct nvmf_ivars *ivars = device_get_ivars(dev); 456 struct sysctl_oid *oid; 457 uint64_t val; 458 u_int i; 459 int error; 460 461 if (ivars == NULL) 462 return (ENXIO); 463 464 sc->dev = dev; 465 sc->trtype = ivars->hh->trtype; 466 callout_init(&sc->ka_rx_timer, 1); 467 callout_init(&sc->ka_tx_timer, 1); 468 sx_init(&sc->connection_lock, "nvmf connection"); 469 TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc); 470 471 oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev), 472 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq", 473 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues"); 474 sc->ioq_oid_list = SYSCTL_CHILDREN(oid); 475 476 /* Claim the cdata pointer from ivars. */ 477 sc->cdata = ivars->cdata; 478 ivars->cdata = NULL; 479 480 nvmf_init_aer(sc); 481 482 /* TODO: Multiqueue support. */ 483 sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */; 484 485 error = nvmf_establish_connection(sc, ivars); 486 if (error != 0) 487 goto out; 488 489 error = nvmf_read_property(sc, NVMF_PROP_CAP, 8, &sc->cap); 490 if (error != 0) { 491 device_printf(sc->dev, "Failed to fetch CAP\n"); 492 error = ENXIO; 493 goto out; 494 } 495 496 error = nvmf_read_property(sc, NVMF_PROP_VS, 4, &val); 497 if (error != 0) { 498 device_printf(sc->dev, "Failed to fetch VS\n"); 499 error = ENXIO; 500 goto out; 501 } 502 sc->vs = val; 503 504 /* Honor MDTS if it is set. */ 505 sc->max_xfer_size = maxphys; 506 if (sc->cdata->mdts != 0) { 507 sc->max_xfer_size = ulmin(sc->max_xfer_size, 508 1 << (sc->cdata->mdts + NVME_MPS_SHIFT + 509 NVME_CAP_HI_MPSMIN(sc->cap >> 32))); 510 } 511 512 error = nvmf_init_sim(sc); 513 if (error != 0) 514 goto out; 515 516 error = nvmf_start_aer(sc); 517 if (error != 0) { 518 nvmf_destroy_sim(sc); 519 goto out; 520 } 521 522 if (!nvmf_add_namespaces(sc)) { 523 nvmf_destroy_sim(sc); 524 goto out; 525 } 526 527 make_dev_args_init(&mda); 528 mda.mda_devsw = &nvmf_cdevsw; 529 mda.mda_uid = UID_ROOT; 530 mda.mda_gid = GID_WHEEL; 531 mda.mda_mode = 0600; 532 mda.mda_si_drv1 = sc; 533 error = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); 534 if (error != 0) { 535 nvmf_destroy_sim(sc); 536 goto out; 537 } 538 539 sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, 540 nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST); 541 sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, 542 nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_FIRST); 543 544 return (0); 545 out: 546 if (sc->ns != NULL) { 547 for (i = 0; i < sc->cdata->nn; i++) { 548 if (sc->ns[i] != NULL) 549 nvmf_destroy_ns(sc->ns[i]); 550 } 551 free(sc->ns, M_NVMF); 552 } 553 554 callout_drain(&sc->ka_tx_timer); 555 callout_drain(&sc->ka_rx_timer); 556 557 if (sc->admin != NULL) 558 nvmf_shutdown_controller(sc); 559 560 for (i = 0; i < sc->num_io_queues; i++) { 561 if (sc->io[i] != NULL) 562 nvmf_destroy_qp(sc->io[i]); 563 } 564 free(sc->io, M_NVMF); 565 if (sc->admin != NULL) 566 nvmf_destroy_qp(sc->admin); 567 568 nvmf_destroy_aer(sc); 569 570 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 571 sx_destroy(&sc->connection_lock); 572 free(sc->cdata, M_NVMF); 573 return (error); 574 } 575 576 void 577 nvmf_disconnect(struct nvmf_softc *sc) 578 { 579 taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task); 580 } 581 582 static void 583 nvmf_disconnect_task(void *arg, int pending __unused) 584 { 585 struct nvmf_softc *sc = arg; 586 u_int i; 587 588 sx_xlock(&sc->connection_lock); 589 if (sc->admin == NULL) { 590 /* 591 * Ignore transport errors if there is no active 592 * association. 593 */ 594 sx_xunlock(&sc->connection_lock); 595 return; 596 } 597 598 if (sc->detaching) { 599 if (sc->admin != NULL) { 600 /* 601 * This unsticks the detach process if a 602 * transport error occurs during detach. 603 */ 604 nvmf_shutdown_qp(sc->admin); 605 } 606 sx_xunlock(&sc->connection_lock); 607 return; 608 } 609 610 if (sc->cdev == NULL) { 611 /* 612 * Transport error occurred during attach (nvmf_add_namespaces). 613 * Shutdown the admin queue. 614 */ 615 nvmf_shutdown_qp(sc->admin); 616 sx_xunlock(&sc->connection_lock); 617 return; 618 } 619 620 callout_drain(&sc->ka_tx_timer); 621 callout_drain(&sc->ka_rx_timer); 622 sc->ka_traffic = false; 623 624 /* Quiesce namespace consumers. */ 625 nvmf_disconnect_sim(sc); 626 for (i = 0; i < sc->cdata->nn; i++) { 627 if (sc->ns[i] != NULL) 628 nvmf_disconnect_ns(sc->ns[i]); 629 } 630 631 /* Shutdown the existing qpairs. */ 632 for (i = 0; i < sc->num_io_queues; i++) { 633 nvmf_destroy_qp(sc->io[i]); 634 } 635 free(sc->io, M_NVMF); 636 sc->io = NULL; 637 sc->num_io_queues = 0; 638 nvmf_destroy_qp(sc->admin); 639 sc->admin = NULL; 640 641 sx_xunlock(&sc->connection_lock); 642 } 643 644 static int 645 nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) 646 { 647 struct nvmf_ivars ivars; 648 u_int i; 649 int error; 650 651 /* XXX: Should we permit changing the transport type? */ 652 if (sc->trtype != hh->trtype) { 653 device_printf(sc->dev, 654 "transport type mismatch on reconnect\n"); 655 return (EINVAL); 656 } 657 658 error = nvmf_init_ivars(&ivars, hh); 659 if (error != 0) 660 return (error); 661 662 sx_xlock(&sc->connection_lock); 663 if (sc->admin != NULL || sc->detaching) { 664 error = EBUSY; 665 goto out; 666 } 667 668 /* 669 * Ensure this is for the same controller. Note that the 670 * controller ID can vary across associations if the remote 671 * system is using the dynamic controller model. This merely 672 * ensures the new association is connected to the same NVMe 673 * subsystem. 674 */ 675 if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn, 676 sizeof(ivars.cdata->subnqn)) != 0) { 677 device_printf(sc->dev, 678 "controller subsystem NQN mismatch on reconnect\n"); 679 error = EINVAL; 680 goto out; 681 } 682 683 /* 684 * XXX: Require same number and size of I/O queues so that 685 * max_pending_io is still correct? 686 */ 687 688 error = nvmf_establish_connection(sc, &ivars); 689 if (error != 0) 690 goto out; 691 692 error = nvmf_start_aer(sc); 693 if (error != 0) 694 goto out; 695 696 device_printf(sc->dev, 697 "established new association with %u I/O queues\n", 698 sc->num_io_queues); 699 700 /* Restart namespace consumers. */ 701 for (i = 0; i < sc->cdata->nn; i++) { 702 if (sc->ns[i] != NULL) 703 nvmf_reconnect_ns(sc->ns[i]); 704 } 705 nvmf_reconnect_sim(sc); 706 707 nvmf_rescan_all_ns(sc); 708 out: 709 sx_xunlock(&sc->connection_lock); 710 nvmf_free_ivars(&ivars); 711 return (error); 712 } 713 714 static void 715 nvmf_shutdown_pre_sync(void *arg, int howto) 716 { 717 struct nvmf_softc *sc = arg; 718 719 if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED()) 720 return; 721 722 /* 723 * If this association is disconnected, abort any pending 724 * requests with an error to permit filesystems to unmount 725 * without hanging. 726 */ 727 sx_xlock(&sc->connection_lock); 728 if (sc->admin != NULL || sc->detaching) { 729 sx_xunlock(&sc->connection_lock); 730 return; 731 } 732 733 for (u_int i = 0; i < sc->cdata->nn; i++) { 734 if (sc->ns[i] != NULL) 735 nvmf_shutdown_ns(sc->ns[i]); 736 } 737 nvmf_shutdown_sim(sc); 738 sx_xunlock(&sc->connection_lock); 739 } 740 741 static void 742 nvmf_shutdown_post_sync(void *arg, int howto) 743 { 744 struct nvmf_softc *sc = arg; 745 746 if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED()) 747 return; 748 749 /* 750 * If this association is connected, disconnect gracefully. 751 */ 752 sx_xlock(&sc->connection_lock); 753 if (sc->admin == NULL || sc->detaching) { 754 sx_xunlock(&sc->connection_lock); 755 return; 756 } 757 758 callout_drain(&sc->ka_tx_timer); 759 callout_drain(&sc->ka_rx_timer); 760 761 nvmf_shutdown_controller(sc); 762 for (u_int i = 0; i < sc->num_io_queues; i++) { 763 nvmf_destroy_qp(sc->io[i]); 764 } 765 nvmf_destroy_qp(sc->admin); 766 sc->admin = NULL; 767 sx_xunlock(&sc->connection_lock); 768 } 769 770 static int 771 nvmf_detach(device_t dev) 772 { 773 struct nvmf_softc *sc = device_get_softc(dev); 774 u_int i; 775 776 destroy_dev(sc->cdev); 777 778 sx_xlock(&sc->connection_lock); 779 sc->detaching = true; 780 sx_xunlock(&sc->connection_lock); 781 782 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh); 783 EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->shutdown_post_sync_eh); 784 785 nvmf_destroy_sim(sc); 786 for (i = 0; i < sc->cdata->nn; i++) { 787 if (sc->ns[i] != NULL) 788 nvmf_destroy_ns(sc->ns[i]); 789 } 790 free(sc->ns, M_NVMF); 791 792 callout_drain(&sc->ka_tx_timer); 793 callout_drain(&sc->ka_rx_timer); 794 795 if (sc->admin != NULL) 796 nvmf_shutdown_controller(sc); 797 798 for (i = 0; i < sc->num_io_queues; i++) { 799 nvmf_destroy_qp(sc->io[i]); 800 } 801 free(sc->io, M_NVMF); 802 803 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 804 805 if (sc->admin != NULL) 806 nvmf_destroy_qp(sc->admin); 807 808 nvmf_destroy_aer(sc); 809 810 sx_destroy(&sc->connection_lock); 811 free(sc->cdata, M_NVMF); 812 return (0); 813 } 814 815 static void 816 nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid, 817 const struct nvme_namespace_data *data) 818 { 819 struct nvmf_namespace *ns; 820 821 /* XXX: Needs locking around sc->ns[]. */ 822 ns = sc->ns[nsid - 1]; 823 if (data->nsze == 0) { 824 /* XXX: Needs locking */ 825 if (ns != NULL) { 826 nvmf_destroy_ns(ns); 827 sc->ns[nsid - 1] = NULL; 828 } 829 } else { 830 /* XXX: Needs locking */ 831 if (ns == NULL) { 832 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 833 } else { 834 if (!nvmf_update_ns(ns, data)) { 835 nvmf_destroy_ns(ns); 836 sc->ns[nsid - 1] = NULL; 837 } 838 } 839 } 840 841 nvmf_sim_rescan_ns(sc, nsid); 842 } 843 844 void 845 nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid) 846 { 847 struct nvmf_completion_status status; 848 struct nvme_namespace_data *data; 849 850 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 851 852 nvmf_status_init(&status); 853 nvmf_status_wait_io(&status); 854 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 855 &status, nvmf_io_complete, &status, M_WAITOK)) { 856 device_printf(sc->dev, 857 "failed to send IDENTIFY namespace %u command\n", nsid); 858 free(data, M_NVMF); 859 return; 860 } 861 nvmf_wait_for_reply(&status); 862 863 if (status.cqe.status != 0) { 864 device_printf(sc->dev, 865 "IDENTIFY namespace %u failed, status %#x\n", nsid, 866 le16toh(status.cqe.status)); 867 free(data, M_NVMF); 868 return; 869 } 870 871 if (status.io_error != 0) { 872 device_printf(sc->dev, 873 "IDENTIFY namespace %u failed with I/O error %d\n", 874 nsid, status.io_error); 875 free(data, M_NVMF); 876 return; 877 } 878 879 nvme_namespace_data_swapbytes(data); 880 881 nvmf_rescan_ns_1(sc, nsid, data); 882 883 free(data, M_NVMF); 884 } 885 886 static void 887 nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid, 888 uint32_t next_valid_nsid) 889 { 890 struct nvmf_namespace *ns; 891 892 for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++) 893 { 894 /* XXX: Needs locking around sc->ns[]. */ 895 ns = sc->ns[nsid - 1]; 896 if (ns != NULL) { 897 nvmf_destroy_ns(ns); 898 sc->ns[nsid - 1] = NULL; 899 900 nvmf_sim_rescan_ns(sc, nsid); 901 } 902 } 903 } 904 905 static bool 906 nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid, 907 const struct nvme_namespace_data *data, void *arg) 908 { 909 uint32_t *last_nsid = arg; 910 911 /* Check for any gaps prior to this namespace. */ 912 nvmf_purge_namespaces(sc, *last_nsid + 1, nsid); 913 *last_nsid = nsid; 914 915 nvmf_rescan_ns_1(sc, nsid, data); 916 return (true); 917 } 918 919 void 920 nvmf_rescan_all_ns(struct nvmf_softc *sc) 921 { 922 uint32_t last_nsid; 923 924 last_nsid = 0; 925 if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid)) 926 return; 927 928 /* 929 * Check for any namespace devices after the last active 930 * namespace. 931 */ 932 nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1); 933 } 934 935 int 936 nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt, 937 bool admin) 938 { 939 struct nvmf_completion_status status; 940 struct nvme_command cmd; 941 struct memdesc mem; 942 struct nvmf_host_qpair *qp; 943 struct nvmf_request *req; 944 void *buf; 945 int error; 946 947 if (pt->len > sc->max_xfer_size) 948 return (EINVAL); 949 950 buf = NULL; 951 if (pt->len != 0) { 952 /* 953 * XXX: Depending on the size we may want to pin the 954 * user pages and use a memdesc with vm_page_t's 955 * instead. 956 */ 957 buf = malloc(pt->len, M_NVMF, M_WAITOK); 958 if (pt->is_read == 0) { 959 error = copyin(pt->buf, buf, pt->len); 960 if (error != 0) { 961 free(buf, M_NVMF); 962 return (error); 963 } 964 } else { 965 /* Ensure no kernel data is leaked to userland. */ 966 memset(buf, 0, pt->len); 967 } 968 } 969 970 memset(&cmd, 0, sizeof(cmd)); 971 cmd.opc = pt->cmd.opc; 972 cmd.fuse = pt->cmd.fuse; 973 cmd.nsid = pt->cmd.nsid; 974 cmd.cdw10 = pt->cmd.cdw10; 975 cmd.cdw11 = pt->cmd.cdw11; 976 cmd.cdw12 = pt->cmd.cdw12; 977 cmd.cdw13 = pt->cmd.cdw13; 978 cmd.cdw14 = pt->cmd.cdw14; 979 cmd.cdw15 = pt->cmd.cdw15; 980 981 sx_slock(&sc->connection_lock); 982 if (sc->admin == NULL || sc->detaching) { 983 device_printf(sc->dev, 984 "failed to send passthrough command\n"); 985 error = ECONNABORTED; 986 sx_sunlock(&sc->connection_lock); 987 goto error; 988 } 989 if (admin) 990 qp = sc->admin; 991 else 992 qp = nvmf_select_io_queue(sc); 993 nvmf_status_init(&status); 994 req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK); 995 sx_sunlock(&sc->connection_lock); 996 if (req == NULL) { 997 device_printf(sc->dev, "failed to send passthrough command\n"); 998 error = ECONNABORTED; 999 goto error; 1000 } 1001 1002 if (pt->len != 0) { 1003 mem = memdesc_vaddr(buf, pt->len); 1004 nvmf_capsule_append_data(req->nc, &mem, pt->len, 1005 pt->is_read == 0, nvmf_io_complete, &status); 1006 nvmf_status_wait_io(&status); 1007 } 1008 1009 nvmf_submit_request(req); 1010 nvmf_wait_for_reply(&status); 1011 1012 memset(&pt->cpl, 0, sizeof(pt->cpl)); 1013 pt->cpl.cdw0 = status.cqe.cdw0; 1014 pt->cpl.status = status.cqe.status; 1015 1016 error = status.io_error; 1017 if (error == 0 && pt->len != 0 && pt->is_read != 0) 1018 error = copyout(buf, pt->buf, pt->len); 1019 error: 1020 free(buf, M_NVMF); 1021 return (error); 1022 } 1023 1024 static int 1025 nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 1026 struct thread *td) 1027 { 1028 struct nvmf_softc *sc = cdev->si_drv1; 1029 struct nvme_get_nsid *gnsid; 1030 struct nvme_pt_command *pt; 1031 struct nvmf_reconnect_params *rp; 1032 struct nvmf_handoff_host *hh; 1033 1034 switch (cmd) { 1035 case NVME_PASSTHROUGH_CMD: 1036 pt = (struct nvme_pt_command *)arg; 1037 return (nvmf_passthrough_cmd(sc, pt, true)); 1038 case NVME_GET_NSID: 1039 gnsid = (struct nvme_get_nsid *)arg; 1040 strlcpy(gnsid->cdev, device_get_nameunit(sc->dev), 1041 sizeof(gnsid->cdev)); 1042 gnsid->nsid = 0; 1043 return (0); 1044 case NVME_GET_MAX_XFER_SIZE: 1045 *(uint64_t *)arg = sc->max_xfer_size; 1046 return (0); 1047 case NVMF_RECONNECT_PARAMS: 1048 rp = (struct nvmf_reconnect_params *)arg; 1049 if ((sc->cdata->fcatt & 1) == 0) 1050 rp->cntlid = NVMF_CNTLID_DYNAMIC; 1051 else 1052 rp->cntlid = sc->cdata->ctrlr_id; 1053 memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn)); 1054 return (0); 1055 case NVMF_RECONNECT_HOST: 1056 hh = (struct nvmf_handoff_host *)arg; 1057 return (nvmf_reconnect_host(sc, hh)); 1058 default: 1059 return (ENOTTY); 1060 } 1061 } 1062 1063 static struct cdevsw nvmf_cdevsw = { 1064 .d_version = D_VERSION, 1065 .d_ioctl = nvmf_ioctl 1066 }; 1067 1068 static int 1069 nvmf_modevent(module_t mod, int what, void *arg) 1070 { 1071 switch (what) { 1072 case MOD_LOAD: 1073 return (nvmf_ctl_load()); 1074 case MOD_QUIESCE: 1075 return (0); 1076 case MOD_UNLOAD: 1077 nvmf_ctl_unload(); 1078 destroy_dev_drain(&nvmf_cdevsw); 1079 return (0); 1080 default: 1081 return (EOPNOTSUPP); 1082 } 1083 } 1084 1085 static device_method_t nvmf_methods[] = { 1086 /* Device interface */ 1087 DEVMETHOD(device_probe, nvmf_probe), 1088 DEVMETHOD(device_attach, nvmf_attach), 1089 DEVMETHOD(device_detach, nvmf_detach), 1090 DEVMETHOD_END 1091 }; 1092 1093 driver_t nvme_nvmf_driver = { 1094 "nvme", 1095 nvmf_methods, 1096 sizeof(struct nvmf_softc), 1097 }; 1098 1099 DRIVER_MODULE(nvme, root, nvme_nvmf_driver, nvmf_modevent, NULL); 1100 MODULE_DEPEND(nvmf, nvmf_transport, 1, 1, 1); 1101