1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/bus.h> 10 #include <sys/conf.h> 11 #include <sys/eventhandler.h> 12 #include <sys/lock.h> 13 #include <sys/kernel.h> 14 #include <sys/malloc.h> 15 #include <sys/memdesc.h> 16 #include <sys/module.h> 17 #include <sys/mutex.h> 18 #include <sys/reboot.h> 19 #include <sys/sx.h> 20 #include <sys/sysctl.h> 21 #include <sys/taskqueue.h> 22 #include <dev/nvme/nvme.h> 23 #include <dev/nvmf/nvmf.h> 24 #include <dev/nvmf/nvmf_transport.h> 25 #include <dev/nvmf/host/nvmf_var.h> 26 27 static struct cdevsw nvmf_cdevsw; 28 29 bool nvmf_fail_disconnect = false; 30 SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, 31 &nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure"); 32 33 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host"); 34 35 static void nvmf_disconnect_task(void *arg, int pending); 36 static void nvmf_shutdown_pre_sync(void *arg, int howto); 37 static void nvmf_shutdown_post_sync(void *arg, int howto); 38 39 void 40 nvmf_complete(void *arg, const struct nvme_completion *cqe) 41 { 42 struct nvmf_completion_status *status = arg; 43 struct mtx *mtx; 44 45 status->cqe = *cqe; 46 mtx = mtx_pool_find(mtxpool_sleep, status); 47 mtx_lock(mtx); 48 status->done = true; 49 mtx_unlock(mtx); 50 wakeup(status); 51 } 52 53 void 54 nvmf_io_complete(void *arg, size_t xfered, int error) 55 { 56 struct nvmf_completion_status *status = arg; 57 struct mtx *mtx; 58 59 status->io_error = error; 60 mtx = mtx_pool_find(mtxpool_sleep, status); 61 mtx_lock(mtx); 62 status->io_done = true; 63 mtx_unlock(mtx); 64 wakeup(status); 65 } 66 67 void 68 nvmf_wait_for_reply(struct nvmf_completion_status *status) 69 { 70 struct mtx *mtx; 71 72 mtx = mtx_pool_find(mtxpool_sleep, status); 73 mtx_lock(mtx); 74 while (!status->done || !status->io_done) 75 mtx_sleep(status, mtx, 0, "nvmfcmd", 0); 76 mtx_unlock(mtx); 77 } 78 79 static int 80 nvmf_read_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 81 uint64_t *value) 82 { 83 const struct nvmf_fabric_prop_get_rsp *rsp; 84 struct nvmf_completion_status status; 85 86 nvmf_status_init(&status); 87 if (!nvmf_cmd_get_property(sc, offset, size, nvmf_complete, &status, 88 M_WAITOK)) 89 return (ECONNABORTED); 90 nvmf_wait_for_reply(&status); 91 92 if (status.cqe.status != 0) { 93 device_printf(sc->dev, "PROPERTY_GET failed, status %#x\n", 94 le16toh(status.cqe.status)); 95 return (EIO); 96 } 97 98 rsp = (const struct nvmf_fabric_prop_get_rsp *)&status.cqe; 99 if (size == 8) 100 *value = le64toh(rsp->value.u64); 101 else 102 *value = le32toh(rsp->value.u32.low); 103 return (0); 104 } 105 106 static int 107 nvmf_write_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 108 uint64_t value) 109 { 110 struct nvmf_completion_status status; 111 112 nvmf_status_init(&status); 113 if (!nvmf_cmd_set_property(sc, offset, size, value, nvmf_complete, &status, 114 M_WAITOK)) 115 return (ECONNABORTED); 116 nvmf_wait_for_reply(&status); 117 118 if (status.cqe.status != 0) { 119 device_printf(sc->dev, "PROPERTY_SET failed, status %#x\n", 120 le16toh(status.cqe.status)); 121 return (EIO); 122 } 123 return (0); 124 } 125 126 static void 127 nvmf_shutdown_controller(struct nvmf_softc *sc) 128 { 129 uint64_t cc; 130 int error; 131 132 error = nvmf_read_property(sc, NVMF_PROP_CC, 4, &cc); 133 if (error != 0) { 134 device_printf(sc->dev, "Failed to fetch CC for shutdown\n"); 135 return; 136 } 137 138 cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL); 139 140 error = nvmf_write_property(sc, NVMF_PROP_CC, 4, cc); 141 if (error != 0) 142 device_printf(sc->dev, 143 "Failed to set CC to trigger shutdown\n"); 144 } 145 146 static void 147 nvmf_check_keep_alive(void *arg) 148 { 149 struct nvmf_softc *sc = arg; 150 int traffic; 151 152 traffic = atomic_readandclear_int(&sc->ka_active_rx_traffic); 153 if (traffic == 0) { 154 device_printf(sc->dev, 155 "disconnecting due to KeepAlive timeout\n"); 156 nvmf_disconnect(sc); 157 return; 158 } 159 160 callout_schedule_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, C_HARDCLOCK); 161 } 162 163 static void 164 nvmf_keep_alive_complete(void *arg, const struct nvme_completion *cqe) 165 { 166 struct nvmf_softc *sc = arg; 167 168 atomic_store_int(&sc->ka_active_rx_traffic, 1); 169 if (cqe->status != 0) { 170 device_printf(sc->dev, 171 "KeepAlive response reported status %#x\n", 172 le16toh(cqe->status)); 173 } 174 } 175 176 static void 177 nvmf_send_keep_alive(void *arg) 178 { 179 struct nvmf_softc *sc = arg; 180 int traffic; 181 182 /* 183 * Don't bother sending a KeepAlive command if TKAS is active 184 * and another command has been sent during the interval. 185 */ 186 traffic = atomic_load_int(&sc->ka_active_tx_traffic); 187 if (traffic == 0 && !nvmf_cmd_keep_alive(sc, nvmf_keep_alive_complete, 188 sc, M_NOWAIT)) 189 device_printf(sc->dev, 190 "Failed to allocate KeepAlive command\n"); 191 192 /* Clear ka_active_tx_traffic after sending the keep alive command. */ 193 atomic_store_int(&sc->ka_active_tx_traffic, 0); 194 195 callout_schedule_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, C_HARDCLOCK); 196 } 197 198 int 199 nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh) 200 { 201 size_t len; 202 u_int i; 203 int error; 204 205 memset(ivars, 0, sizeof(*ivars)); 206 207 if (!hh->admin.admin || hh->num_io_queues < 1) 208 return (EINVAL); 209 210 ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK); 211 error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata)); 212 if (error != 0) 213 goto out; 214 nvme_controller_data_swapbytes(ivars->cdata); 215 216 len = hh->num_io_queues * sizeof(*ivars->io_params); 217 ivars->io_params = malloc(len, M_NVMF, M_WAITOK); 218 error = copyin(hh->io, ivars->io_params, len); 219 if (error != 0) 220 goto out; 221 for (i = 0; i < hh->num_io_queues; i++) { 222 if (ivars->io_params[i].admin) { 223 error = EINVAL; 224 goto out; 225 } 226 227 /* Require all I/O queues to be the same size. */ 228 if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) { 229 error = EINVAL; 230 goto out; 231 } 232 } 233 234 ivars->hh = hh; 235 return (0); 236 237 out: 238 free(ivars->io_params, M_NVMF); 239 free(ivars->cdata, M_NVMF); 240 return (error); 241 } 242 243 void 244 nvmf_free_ivars(struct nvmf_ivars *ivars) 245 { 246 free(ivars->io_params, M_NVMF); 247 free(ivars->cdata, M_NVMF); 248 } 249 250 static int 251 nvmf_probe(device_t dev) 252 { 253 struct nvmf_ivars *ivars = device_get_ivars(dev); 254 char desc[260]; 255 256 if (ivars == NULL) 257 return (ENXIO); 258 259 snprintf(desc, sizeof(desc), "Fabrics: %.256s", ivars->cdata->subnqn); 260 device_set_desc_copy(dev, desc); 261 return (BUS_PROBE_DEFAULT); 262 } 263 264 static int 265 nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) 266 { 267 char name[16]; 268 269 /* Setup the admin queue. */ 270 sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin, 271 "admin queue"); 272 if (sc->admin == NULL) { 273 device_printf(sc->dev, "Failed to setup admin queue\n"); 274 return (ENXIO); 275 } 276 277 /* Setup I/O queues. */ 278 sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF, 279 M_WAITOK | M_ZERO); 280 sc->num_io_queues = ivars->hh->num_io_queues; 281 for (u_int i = 0; i < sc->num_io_queues; i++) { 282 snprintf(name, sizeof(name), "I/O queue %u", i); 283 sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype, 284 &ivars->io_params[i], name); 285 if (sc->io[i] == NULL) { 286 device_printf(sc->dev, "Failed to setup I/O queue %u\n", 287 i + 1); 288 return (ENXIO); 289 } 290 } 291 292 /* Start KeepAlive timers. */ 293 if (ivars->hh->kato != 0) { 294 sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS, 295 sc->cdata->ctratt) != 0; 296 sc->ka_rx_sbt = mstosbt(ivars->hh->kato); 297 sc->ka_tx_sbt = sc->ka_rx_sbt / 2; 298 callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, 299 nvmf_check_keep_alive, sc, C_HARDCLOCK); 300 callout_reset_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, 301 nvmf_send_keep_alive, sc, C_HARDCLOCK); 302 } 303 304 return (0); 305 } 306 307 typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t, 308 const struct nvme_namespace_data *, void *); 309 310 static bool 311 nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, 312 struct nvme_namespace_data *data, uint32_t *nsidp, 313 nvmf_scan_active_ns_cb *cb, void *cb_arg) 314 { 315 struct nvmf_completion_status status; 316 uint32_t nsid; 317 318 nvmf_status_init(&status); 319 nvmf_status_wait_io(&status); 320 if (!nvmf_cmd_identify_active_namespaces(sc, *nsidp, nslist, 321 nvmf_complete, &status, nvmf_io_complete, &status, M_WAITOK)) { 322 device_printf(sc->dev, 323 "failed to send IDENTIFY active namespaces command\n"); 324 return (false); 325 } 326 nvmf_wait_for_reply(&status); 327 328 if (status.cqe.status != 0) { 329 device_printf(sc->dev, 330 "IDENTIFY active namespaces failed, status %#x\n", 331 le16toh(status.cqe.status)); 332 return (false); 333 } 334 335 if (status.io_error != 0) { 336 device_printf(sc->dev, 337 "IDENTIFY active namespaces failed with I/O error %d\n", 338 status.io_error); 339 return (false); 340 } 341 342 for (u_int i = 0; i < nitems(nslist->ns); i++) { 343 nsid = nslist->ns[i]; 344 if (nsid == 0) { 345 *nsidp = 0; 346 return (true); 347 } 348 349 nvmf_status_init(&status); 350 nvmf_status_wait_io(&status); 351 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 352 &status, nvmf_io_complete, &status, M_WAITOK)) { 353 device_printf(sc->dev, 354 "failed to send IDENTIFY namespace %u command\n", 355 nsid); 356 return (false); 357 } 358 nvmf_wait_for_reply(&status); 359 360 if (status.cqe.status != 0) { 361 device_printf(sc->dev, 362 "IDENTIFY namespace %u failed, status %#x\n", nsid, 363 le16toh(status.cqe.status)); 364 return (false); 365 } 366 367 if (status.io_error != 0) { 368 device_printf(sc->dev, 369 "IDENTIFY namespace %u failed with I/O error %d\n", 370 nsid, status.io_error); 371 return (false); 372 } 373 374 nvme_namespace_data_swapbytes(data); 375 if (!cb(sc, nsid, data, cb_arg)) 376 return (false); 377 } 378 379 MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0); 380 381 if (nsid >= 0xfffffffd) 382 *nsidp = 0; 383 else 384 *nsidp = nsid + 1; 385 return (true); 386 } 387 388 static bool 389 nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb, 390 void *cb_arg) 391 { 392 struct nvme_namespace_data *data; 393 struct nvme_ns_list *nslist; 394 uint32_t nsid; 395 bool retval; 396 397 nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK); 398 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 399 400 nsid = 0; 401 retval = true; 402 for (;;) { 403 if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb, 404 cb_arg)) { 405 retval = false; 406 break; 407 } 408 if (nsid == 0) 409 break; 410 } 411 412 free(data, M_NVMF); 413 free(nslist, M_NVMF); 414 return (retval); 415 } 416 417 static bool 418 nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid, 419 const struct nvme_namespace_data *data, void *arg __unused) 420 { 421 if (sc->ns[nsid - 1] != NULL) { 422 device_printf(sc->dev, 423 "duplicate namespace %u in active namespace list\n", 424 nsid); 425 return (false); 426 } 427 428 /* 429 * As in nvme_ns_construct, a size of zero indicates an 430 * invalid namespace. 431 */ 432 if (data->nsze == 0) { 433 device_printf(sc->dev, 434 "ignoring active namespace %u with zero size\n", nsid); 435 return (true); 436 } 437 438 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 439 440 nvmf_sim_rescan_ns(sc, nsid); 441 return (true); 442 } 443 444 static bool 445 nvmf_add_namespaces(struct nvmf_softc *sc) 446 { 447 sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF, 448 M_WAITOK | M_ZERO); 449 return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL)); 450 } 451 452 static int 453 nvmf_attach(device_t dev) 454 { 455 struct make_dev_args mda; 456 struct nvmf_softc *sc = device_get_softc(dev); 457 struct nvmf_ivars *ivars = device_get_ivars(dev); 458 uint64_t val; 459 u_int i; 460 int error; 461 462 if (ivars == NULL) 463 return (ENXIO); 464 465 sc->dev = dev; 466 sc->trtype = ivars->hh->trtype; 467 callout_init(&sc->ka_rx_timer, 1); 468 callout_init(&sc->ka_tx_timer, 1); 469 sx_init(&sc->connection_lock, "nvmf connection"); 470 TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc); 471 472 /* Claim the cdata pointer from ivars. */ 473 sc->cdata = ivars->cdata; 474 ivars->cdata = NULL; 475 476 nvmf_init_aer(sc); 477 478 /* TODO: Multiqueue support. */ 479 sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */; 480 481 error = nvmf_establish_connection(sc, ivars); 482 if (error != 0) 483 goto out; 484 485 error = nvmf_read_property(sc, NVMF_PROP_CAP, 8, &sc->cap); 486 if (error != 0) { 487 device_printf(sc->dev, "Failed to fetch CAP\n"); 488 error = ENXIO; 489 goto out; 490 } 491 492 error = nvmf_read_property(sc, NVMF_PROP_VS, 4, &val); 493 if (error != 0) { 494 device_printf(sc->dev, "Failed to fetch VS\n"); 495 error = ENXIO; 496 goto out; 497 } 498 sc->vs = val; 499 500 /* Honor MDTS if it is set. */ 501 sc->max_xfer_size = maxphys; 502 if (sc->cdata->mdts != 0) { 503 sc->max_xfer_size = ulmin(sc->max_xfer_size, 504 1 << (sc->cdata->mdts + NVME_MPS_SHIFT + 505 NVME_CAP_HI_MPSMIN(sc->cap >> 32))); 506 } 507 508 error = nvmf_init_sim(sc); 509 if (error != 0) 510 goto out; 511 512 error = nvmf_start_aer(sc); 513 if (error != 0) { 514 nvmf_destroy_sim(sc); 515 goto out; 516 } 517 518 if (!nvmf_add_namespaces(sc)) { 519 nvmf_destroy_sim(sc); 520 goto out; 521 } 522 523 make_dev_args_init(&mda); 524 mda.mda_devsw = &nvmf_cdevsw; 525 mda.mda_uid = UID_ROOT; 526 mda.mda_gid = GID_WHEEL; 527 mda.mda_mode = 0600; 528 mda.mda_si_drv1 = sc; 529 error = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); 530 if (error != 0) { 531 nvmf_destroy_sim(sc); 532 goto out; 533 } 534 535 sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, 536 nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST); 537 sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, 538 nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_FIRST); 539 540 return (0); 541 out: 542 if (sc->ns != NULL) { 543 for (i = 0; i < sc->cdata->nn; i++) { 544 if (sc->ns[i] != NULL) 545 nvmf_destroy_ns(sc->ns[i]); 546 } 547 free(sc->ns, M_NVMF); 548 } 549 550 callout_drain(&sc->ka_tx_timer); 551 callout_drain(&sc->ka_rx_timer); 552 553 if (sc->admin != NULL) 554 nvmf_shutdown_controller(sc); 555 556 for (i = 0; i < sc->num_io_queues; i++) { 557 if (sc->io[i] != NULL) 558 nvmf_destroy_qp(sc->io[i]); 559 } 560 free(sc->io, M_NVMF); 561 if (sc->admin != NULL) 562 nvmf_destroy_qp(sc->admin); 563 564 nvmf_destroy_aer(sc); 565 566 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 567 sx_destroy(&sc->connection_lock); 568 free(sc->cdata, M_NVMF); 569 return (error); 570 } 571 572 void 573 nvmf_disconnect(struct nvmf_softc *sc) 574 { 575 taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task); 576 } 577 578 static void 579 nvmf_disconnect_task(void *arg, int pending __unused) 580 { 581 struct nvmf_softc *sc = arg; 582 u_int i; 583 584 sx_xlock(&sc->connection_lock); 585 if (sc->admin == NULL) { 586 /* 587 * Ignore transport errors if there is no active 588 * association. 589 */ 590 sx_xunlock(&sc->connection_lock); 591 return; 592 } 593 594 if (sc->detaching) { 595 if (sc->admin != NULL) { 596 /* 597 * This unsticks the detach process if a 598 * transport error occurs during detach. 599 */ 600 nvmf_shutdown_qp(sc->admin); 601 } 602 sx_xunlock(&sc->connection_lock); 603 return; 604 } 605 606 if (sc->cdev == NULL) { 607 /* 608 * Transport error occurred during attach (nvmf_add_namespaces). 609 * Shutdown the admin queue. 610 */ 611 nvmf_shutdown_qp(sc->admin); 612 sx_xunlock(&sc->connection_lock); 613 return; 614 } 615 616 callout_drain(&sc->ka_tx_timer); 617 callout_drain(&sc->ka_rx_timer); 618 sc->ka_traffic = false; 619 620 /* Quiesce namespace consumers. */ 621 nvmf_disconnect_sim(sc); 622 for (i = 0; i < sc->cdata->nn; i++) { 623 if (sc->ns[i] != NULL) 624 nvmf_disconnect_ns(sc->ns[i]); 625 } 626 627 /* Shutdown the existing qpairs. */ 628 for (i = 0; i < sc->num_io_queues; i++) { 629 nvmf_destroy_qp(sc->io[i]); 630 } 631 free(sc->io, M_NVMF); 632 sc->io = NULL; 633 sc->num_io_queues = 0; 634 nvmf_destroy_qp(sc->admin); 635 sc->admin = NULL; 636 637 sx_xunlock(&sc->connection_lock); 638 } 639 640 static int 641 nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) 642 { 643 struct nvmf_ivars ivars; 644 u_int i; 645 int error; 646 647 /* XXX: Should we permit changing the transport type? */ 648 if (sc->trtype != hh->trtype) { 649 device_printf(sc->dev, 650 "transport type mismatch on reconnect\n"); 651 return (EINVAL); 652 } 653 654 error = nvmf_init_ivars(&ivars, hh); 655 if (error != 0) 656 return (error); 657 658 sx_xlock(&sc->connection_lock); 659 if (sc->admin != NULL || sc->detaching) { 660 error = EBUSY; 661 goto out; 662 } 663 664 /* 665 * Ensure this is for the same controller. Note that the 666 * controller ID can vary across associations if the remote 667 * system is using the dynamic controller model. This merely 668 * ensures the new association is connected to the same NVMe 669 * subsystem. 670 */ 671 if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn, 672 sizeof(ivars.cdata->subnqn)) != 0) { 673 device_printf(sc->dev, 674 "controller subsystem NQN mismatch on reconnect\n"); 675 error = EINVAL; 676 goto out; 677 } 678 679 /* 680 * XXX: Require same number and size of I/O queues so that 681 * max_pending_io is still correct? 682 */ 683 684 error = nvmf_establish_connection(sc, &ivars); 685 if (error != 0) 686 goto out; 687 688 error = nvmf_start_aer(sc); 689 if (error != 0) 690 goto out; 691 692 device_printf(sc->dev, 693 "established new association with %u I/O queues\n", 694 sc->num_io_queues); 695 696 /* Restart namespace consumers. */ 697 for (i = 0; i < sc->cdata->nn; i++) { 698 if (sc->ns[i] != NULL) 699 nvmf_reconnect_ns(sc->ns[i]); 700 } 701 nvmf_reconnect_sim(sc); 702 703 nvmf_rescan_all_ns(sc); 704 out: 705 sx_xunlock(&sc->connection_lock); 706 nvmf_free_ivars(&ivars); 707 return (error); 708 } 709 710 static void 711 nvmf_shutdown_pre_sync(void *arg, int howto) 712 { 713 struct nvmf_softc *sc = arg; 714 715 if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED()) 716 return; 717 718 /* 719 * If this association is disconnected, abort any pending 720 * requests with an error to permit filesystems to unmount 721 * without hanging. 722 */ 723 sx_xlock(&sc->connection_lock); 724 if (sc->admin != NULL || sc->detaching) { 725 sx_xunlock(&sc->connection_lock); 726 return; 727 } 728 729 for (u_int i = 0; i < sc->cdata->nn; i++) { 730 if (sc->ns[i] != NULL) 731 nvmf_shutdown_ns(sc->ns[i]); 732 } 733 nvmf_shutdown_sim(sc); 734 sx_xunlock(&sc->connection_lock); 735 } 736 737 static void 738 nvmf_shutdown_post_sync(void *arg, int howto) 739 { 740 struct nvmf_softc *sc = arg; 741 742 if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED()) 743 return; 744 745 /* 746 * If this association is connected, disconnect gracefully. 747 */ 748 sx_xlock(&sc->connection_lock); 749 if (sc->admin == NULL || sc->detaching) { 750 sx_xunlock(&sc->connection_lock); 751 return; 752 } 753 754 callout_drain(&sc->ka_tx_timer); 755 callout_drain(&sc->ka_rx_timer); 756 757 nvmf_shutdown_controller(sc); 758 for (u_int i = 0; i < sc->num_io_queues; i++) { 759 nvmf_destroy_qp(sc->io[i]); 760 } 761 nvmf_destroy_qp(sc->admin); 762 sc->admin = NULL; 763 sx_xunlock(&sc->connection_lock); 764 } 765 766 static int 767 nvmf_detach(device_t dev) 768 { 769 struct nvmf_softc *sc = device_get_softc(dev); 770 u_int i; 771 772 destroy_dev(sc->cdev); 773 774 sx_xlock(&sc->connection_lock); 775 sc->detaching = true; 776 sx_xunlock(&sc->connection_lock); 777 778 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh); 779 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_post_sync_eh); 780 781 nvmf_destroy_sim(sc); 782 for (i = 0; i < sc->cdata->nn; i++) { 783 if (sc->ns[i] != NULL) 784 nvmf_destroy_ns(sc->ns[i]); 785 } 786 free(sc->ns, M_NVMF); 787 788 callout_drain(&sc->ka_tx_timer); 789 callout_drain(&sc->ka_rx_timer); 790 791 if (sc->admin != NULL) 792 nvmf_shutdown_controller(sc); 793 794 for (i = 0; i < sc->num_io_queues; i++) { 795 nvmf_destroy_qp(sc->io[i]); 796 } 797 free(sc->io, M_NVMF); 798 799 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 800 801 if (sc->admin != NULL) 802 nvmf_destroy_qp(sc->admin); 803 804 nvmf_destroy_aer(sc); 805 806 sx_destroy(&sc->connection_lock); 807 free(sc->cdata, M_NVMF); 808 return (0); 809 } 810 811 static void 812 nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid, 813 const struct nvme_namespace_data *data) 814 { 815 struct nvmf_namespace *ns; 816 817 /* XXX: Needs locking around sc->ns[]. */ 818 ns = sc->ns[nsid - 1]; 819 if (data->nsze == 0) { 820 /* XXX: Needs locking */ 821 if (ns != NULL) { 822 nvmf_destroy_ns(ns); 823 sc->ns[nsid - 1] = NULL; 824 } 825 } else { 826 /* XXX: Needs locking */ 827 if (ns == NULL) { 828 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 829 } else { 830 if (!nvmf_update_ns(ns, data)) { 831 nvmf_destroy_ns(ns); 832 sc->ns[nsid - 1] = NULL; 833 } 834 } 835 } 836 837 nvmf_sim_rescan_ns(sc, nsid); 838 } 839 840 void 841 nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid) 842 { 843 struct nvmf_completion_status status; 844 struct nvme_namespace_data *data; 845 846 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 847 848 nvmf_status_init(&status); 849 nvmf_status_wait_io(&status); 850 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 851 &status, nvmf_io_complete, &status, M_WAITOK)) { 852 device_printf(sc->dev, 853 "failed to send IDENTIFY namespace %u command\n", nsid); 854 free(data, M_NVMF); 855 return; 856 } 857 nvmf_wait_for_reply(&status); 858 859 if (status.cqe.status != 0) { 860 device_printf(sc->dev, 861 "IDENTIFY namespace %u failed, status %#x\n", nsid, 862 le16toh(status.cqe.status)); 863 free(data, M_NVMF); 864 return; 865 } 866 867 if (status.io_error != 0) { 868 device_printf(sc->dev, 869 "IDENTIFY namespace %u failed with I/O error %d\n", 870 nsid, status.io_error); 871 free(data, M_NVMF); 872 return; 873 } 874 875 nvme_namespace_data_swapbytes(data); 876 877 nvmf_rescan_ns_1(sc, nsid, data); 878 879 free(data, M_NVMF); 880 } 881 882 static void 883 nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid, 884 uint32_t next_valid_nsid) 885 { 886 struct nvmf_namespace *ns; 887 888 for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++) 889 { 890 /* XXX: Needs locking around sc->ns[]. */ 891 ns = sc->ns[nsid - 1]; 892 if (ns != NULL) { 893 nvmf_destroy_ns(ns); 894 sc->ns[nsid - 1] = NULL; 895 896 nvmf_sim_rescan_ns(sc, nsid); 897 } 898 } 899 } 900 901 static bool 902 nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid, 903 const struct nvme_namespace_data *data, void *arg) 904 { 905 uint32_t *last_nsid = arg; 906 907 /* Check for any gaps prior to this namespace. */ 908 nvmf_purge_namespaces(sc, *last_nsid + 1, nsid); 909 *last_nsid = nsid; 910 911 nvmf_rescan_ns_1(sc, nsid, data); 912 return (true); 913 } 914 915 void 916 nvmf_rescan_all_ns(struct nvmf_softc *sc) 917 { 918 uint32_t last_nsid; 919 920 last_nsid = 0; 921 if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid)) 922 return; 923 924 /* 925 * Check for any namespace devices after the last active 926 * namespace. 927 */ 928 nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1); 929 } 930 931 int 932 nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt, 933 bool admin) 934 { 935 struct nvmf_completion_status status; 936 struct nvme_command cmd; 937 struct memdesc mem; 938 struct nvmf_host_qpair *qp; 939 struct nvmf_request *req; 940 void *buf; 941 int error; 942 943 if (pt->len > sc->max_xfer_size) 944 return (EINVAL); 945 946 buf = NULL; 947 if (pt->len != 0) { 948 /* 949 * XXX: Depending on the size we may want to pin the 950 * user pages and use a memdesc with vm_page_t's 951 * instead. 952 */ 953 buf = malloc(pt->len, M_NVMF, M_WAITOK); 954 if (pt->is_read == 0) { 955 error = copyin(pt->buf, buf, pt->len); 956 if (error != 0) { 957 free(buf, M_NVMF); 958 return (error); 959 } 960 } else { 961 /* Ensure no kernel data is leaked to userland. */ 962 memset(buf, 0, pt->len); 963 } 964 } 965 966 memset(&cmd, 0, sizeof(cmd)); 967 cmd.opc = pt->cmd.opc; 968 cmd.fuse = pt->cmd.fuse; 969 cmd.nsid = pt->cmd.nsid; 970 cmd.cdw10 = pt->cmd.cdw10; 971 cmd.cdw11 = pt->cmd.cdw11; 972 cmd.cdw12 = pt->cmd.cdw12; 973 cmd.cdw13 = pt->cmd.cdw13; 974 cmd.cdw14 = pt->cmd.cdw14; 975 cmd.cdw15 = pt->cmd.cdw15; 976 977 if (admin) 978 qp = sc->admin; 979 else 980 qp = nvmf_select_io_queue(sc); 981 nvmf_status_init(&status); 982 req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK); 983 if (req == NULL) { 984 device_printf(sc->dev, "failed to send passthrough command\n"); 985 error = ECONNABORTED; 986 goto error; 987 } 988 989 if (pt->len != 0) { 990 mem = memdesc_vaddr(buf, pt->len); 991 nvmf_capsule_append_data(req->nc, &mem, pt->len, 992 pt->is_read == 0, nvmf_io_complete, &status); 993 nvmf_status_wait_io(&status); 994 } 995 996 nvmf_submit_request(req); 997 nvmf_wait_for_reply(&status); 998 999 memset(&pt->cpl, 0, sizeof(pt->cpl)); 1000 pt->cpl.cdw0 = status.cqe.cdw0; 1001 pt->cpl.status = status.cqe.status; 1002 1003 error = status.io_error; 1004 if (error == 0 && pt->len != 0 && pt->is_read != 0) 1005 error = copyout(buf, pt->buf, pt->len); 1006 error: 1007 free(buf, M_NVMF); 1008 return (error); 1009 } 1010 1011 static int 1012 nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 1013 struct thread *td) 1014 { 1015 struct nvmf_softc *sc = cdev->si_drv1; 1016 struct nvme_get_nsid *gnsid; 1017 struct nvme_pt_command *pt; 1018 struct nvmf_reconnect_params *rp; 1019 struct nvmf_handoff_host *hh; 1020 1021 switch (cmd) { 1022 case NVME_PASSTHROUGH_CMD: 1023 pt = (struct nvme_pt_command *)arg; 1024 return (nvmf_passthrough_cmd(sc, pt, true)); 1025 case NVME_GET_NSID: 1026 gnsid = (struct nvme_get_nsid *)arg; 1027 strlcpy(gnsid->cdev, device_get_nameunit(sc->dev), 1028 sizeof(gnsid->cdev)); 1029 gnsid->nsid = 0; 1030 return (0); 1031 case NVME_GET_MAX_XFER_SIZE: 1032 *(uint64_t *)arg = sc->max_xfer_size; 1033 return (0); 1034 case NVMF_RECONNECT_PARAMS: 1035 rp = (struct nvmf_reconnect_params *)arg; 1036 if ((sc->cdata->fcatt & 1) == 0) 1037 rp->cntlid = NVMF_CNTLID_DYNAMIC; 1038 else 1039 rp->cntlid = sc->cdata->ctrlr_id; 1040 memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn)); 1041 return (0); 1042 case NVMF_RECONNECT_HOST: 1043 hh = (struct nvmf_handoff_host *)arg; 1044 return (nvmf_reconnect_host(sc, hh)); 1045 default: 1046 return (ENOTTY); 1047 } 1048 } 1049 1050 static struct cdevsw nvmf_cdevsw = { 1051 .d_version = D_VERSION, 1052 .d_ioctl = nvmf_ioctl 1053 }; 1054 1055 static int 1056 nvmf_modevent(module_t mod, int what, void *arg) 1057 { 1058 switch (what) { 1059 case MOD_LOAD: 1060 return (nvmf_ctl_load()); 1061 case MOD_QUIESCE: 1062 return (0); 1063 case MOD_UNLOAD: 1064 nvmf_ctl_unload(); 1065 destroy_dev_drain(&nvmf_cdevsw); 1066 return (0); 1067 default: 1068 return (EOPNOTSUPP); 1069 } 1070 } 1071 1072 static device_method_t nvmf_methods[] = { 1073 /* Device interface */ 1074 DEVMETHOD(device_probe, nvmf_probe), 1075 DEVMETHOD(device_attach, nvmf_attach), 1076 DEVMETHOD(device_detach, nvmf_detach), 1077 DEVMETHOD_END 1078 }; 1079 1080 driver_t nvme_nvmf_driver = { 1081 "nvme", 1082 nvmf_methods, 1083 sizeof(struct nvmf_softc), 1084 }; 1085 1086 DRIVER_MODULE(nvme, root, nvme_nvmf_driver, nvmf_modevent, NULL); 1087 MODULE_DEPEND(nvmf, nvmf_transport, 1, 1, 1); 1088