1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/bus.h> 10 #include <sys/conf.h> 11 #include <sys/eventhandler.h> 12 #include <sys/lock.h> 13 #include <sys/kernel.h> 14 #include <sys/malloc.h> 15 #include <sys/memdesc.h> 16 #include <sys/module.h> 17 #include <sys/mutex.h> 18 #include <sys/reboot.h> 19 #include <sys/sx.h> 20 #include <sys/sysctl.h> 21 #include <sys/taskqueue.h> 22 #include <dev/nvme/nvme.h> 23 #include <dev/nvmf/nvmf.h> 24 #include <dev/nvmf/nvmf_transport.h> 25 #include <dev/nvmf/host/nvmf_var.h> 26 27 static struct cdevsw nvmf_cdevsw; 28 29 bool nvmf_fail_disconnect = false; 30 SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, 31 &nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure"); 32 33 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host"); 34 35 static void nvmf_disconnect_task(void *arg, int pending); 36 static void nvmf_shutdown_pre_sync(void *arg, int howto); 37 static void nvmf_shutdown_post_sync(void *arg, int howto); 38 39 void 40 nvmf_complete(void *arg, const struct nvme_completion *cqe) 41 { 42 struct nvmf_completion_status *status = arg; 43 struct mtx *mtx; 44 45 status->cqe = *cqe; 46 mtx = mtx_pool_find(mtxpool_sleep, status); 47 mtx_lock(mtx); 48 status->done = true; 49 mtx_unlock(mtx); 50 wakeup(status); 51 } 52 53 void 54 nvmf_io_complete(void *arg, size_t xfered, int error) 55 { 56 struct nvmf_completion_status *status = arg; 57 struct mtx *mtx; 58 59 status->io_error = error; 60 mtx = mtx_pool_find(mtxpool_sleep, status); 61 mtx_lock(mtx); 62 status->io_done = true; 63 mtx_unlock(mtx); 64 wakeup(status); 65 } 66 67 void 68 nvmf_wait_for_reply(struct nvmf_completion_status *status) 69 { 70 struct mtx *mtx; 71 72 mtx = mtx_pool_find(mtxpool_sleep, status); 73 mtx_lock(mtx); 74 while (!status->done || !status->io_done) 75 mtx_sleep(status, mtx, 0, "nvmfcmd", 0); 76 mtx_unlock(mtx); 77 } 78 79 static int 80 nvmf_read_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 81 uint64_t *value) 82 { 83 const struct nvmf_fabric_prop_get_rsp *rsp; 84 struct nvmf_completion_status status; 85 86 nvmf_status_init(&status); 87 if (!nvmf_cmd_get_property(sc, offset, size, nvmf_complete, &status, 88 M_WAITOK)) 89 return (ECONNABORTED); 90 nvmf_wait_for_reply(&status); 91 92 if (status.cqe.status != 0) { 93 device_printf(sc->dev, "PROPERTY_GET failed, status %#x\n", 94 le16toh(status.cqe.status)); 95 return (EIO); 96 } 97 98 rsp = (const struct nvmf_fabric_prop_get_rsp *)&status.cqe; 99 if (size == 8) 100 *value = le64toh(rsp->value.u64); 101 else 102 *value = le32toh(rsp->value.u32.low); 103 return (0); 104 } 105 106 static int 107 nvmf_write_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 108 uint64_t value) 109 { 110 struct nvmf_completion_status status; 111 112 nvmf_status_init(&status); 113 if (!nvmf_cmd_set_property(sc, offset, size, value, nvmf_complete, &status, 114 M_WAITOK)) 115 return (ECONNABORTED); 116 nvmf_wait_for_reply(&status); 117 118 if (status.cqe.status != 0) { 119 device_printf(sc->dev, "PROPERTY_SET failed, status %#x\n", 120 le16toh(status.cqe.status)); 121 return (EIO); 122 } 123 return (0); 124 } 125 126 static void 127 nvmf_shutdown_controller(struct nvmf_softc *sc) 128 { 129 uint64_t cc; 130 int error; 131 132 error = nvmf_read_property(sc, NVMF_PROP_CC, 4, &cc); 133 if (error != 0) { 134 device_printf(sc->dev, "Failed to fetch CC for shutdown\n"); 135 return; 136 } 137 138 cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL); 139 140 error = nvmf_write_property(sc, NVMF_PROP_CC, 4, cc); 141 if (error != 0) 142 device_printf(sc->dev, 143 "Failed to set CC to trigger shutdown\n"); 144 } 145 146 static void 147 nvmf_check_keep_alive(void *arg) 148 { 149 struct nvmf_softc *sc = arg; 150 int traffic; 151 152 traffic = atomic_readandclear_int(&sc->ka_active_rx_traffic); 153 if (traffic == 0) { 154 device_printf(sc->dev, 155 "disconnecting due to KeepAlive timeout\n"); 156 nvmf_disconnect(sc); 157 return; 158 } 159 160 callout_schedule_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, C_HARDCLOCK); 161 } 162 163 static void 164 nvmf_keep_alive_complete(void *arg, const struct nvme_completion *cqe) 165 { 166 struct nvmf_softc *sc = arg; 167 168 atomic_store_int(&sc->ka_active_rx_traffic, 1); 169 if (cqe->status != 0) { 170 device_printf(sc->dev, 171 "KeepAlive response reported status %#x\n", 172 le16toh(cqe->status)); 173 } 174 } 175 176 static void 177 nvmf_send_keep_alive(void *arg) 178 { 179 struct nvmf_softc *sc = arg; 180 int traffic; 181 182 /* 183 * Don't bother sending a KeepAlive command if TKAS is active 184 * and another command has been sent during the interval. 185 */ 186 traffic = atomic_load_int(&sc->ka_active_tx_traffic); 187 if (traffic == 0 && !nvmf_cmd_keep_alive(sc, nvmf_keep_alive_complete, 188 sc, M_NOWAIT)) 189 device_printf(sc->dev, 190 "Failed to allocate KeepAlive command\n"); 191 192 /* Clear ka_active_tx_traffic after sending the keep alive command. */ 193 atomic_store_int(&sc->ka_active_tx_traffic, 0); 194 195 callout_schedule_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, C_HARDCLOCK); 196 } 197 198 int 199 nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh) 200 { 201 size_t len; 202 u_int i; 203 int error; 204 205 memset(ivars, 0, sizeof(*ivars)); 206 207 if (!hh->admin.admin || hh->num_io_queues < 1) 208 return (EINVAL); 209 210 ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK); 211 error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata)); 212 if (error != 0) 213 goto out; 214 nvme_controller_data_swapbytes(ivars->cdata); 215 216 len = hh->num_io_queues * sizeof(*ivars->io_params); 217 ivars->io_params = malloc(len, M_NVMF, M_WAITOK); 218 error = copyin(hh->io, ivars->io_params, len); 219 if (error != 0) 220 goto out; 221 for (i = 0; i < hh->num_io_queues; i++) { 222 if (ivars->io_params[i].admin) { 223 error = EINVAL; 224 goto out; 225 } 226 227 /* Require all I/O queues to be the same size. */ 228 if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) { 229 error = EINVAL; 230 goto out; 231 } 232 } 233 234 ivars->hh = hh; 235 return (0); 236 237 out: 238 free(ivars->io_params, M_NVMF); 239 free(ivars->cdata, M_NVMF); 240 return (error); 241 } 242 243 void 244 nvmf_free_ivars(struct nvmf_ivars *ivars) 245 { 246 free(ivars->io_params, M_NVMF); 247 free(ivars->cdata, M_NVMF); 248 } 249 250 static int 251 nvmf_probe(device_t dev) 252 { 253 struct nvmf_ivars *ivars = device_get_ivars(dev); 254 255 if (ivars == NULL) 256 return (ENXIO); 257 258 device_set_descf(dev, "Fabrics: %.256s", ivars->cdata->subnqn); 259 return (BUS_PROBE_DEFAULT); 260 } 261 262 static int 263 nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) 264 { 265 char name[16]; 266 267 /* Setup the admin queue. */ 268 sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin, 269 "admin queue"); 270 if (sc->admin == NULL) { 271 device_printf(sc->dev, "Failed to setup admin queue\n"); 272 return (ENXIO); 273 } 274 275 /* Setup I/O queues. */ 276 sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF, 277 M_WAITOK | M_ZERO); 278 sc->num_io_queues = ivars->hh->num_io_queues; 279 for (u_int i = 0; i < sc->num_io_queues; i++) { 280 snprintf(name, sizeof(name), "I/O queue %u", i); 281 sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype, 282 &ivars->io_params[i], name); 283 if (sc->io[i] == NULL) { 284 device_printf(sc->dev, "Failed to setup I/O queue %u\n", 285 i + 1); 286 return (ENXIO); 287 } 288 } 289 290 /* Start KeepAlive timers. */ 291 if (ivars->hh->kato != 0) { 292 sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS, 293 sc->cdata->ctratt) != 0; 294 sc->ka_rx_sbt = mstosbt(ivars->hh->kato); 295 sc->ka_tx_sbt = sc->ka_rx_sbt / 2; 296 callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, 297 nvmf_check_keep_alive, sc, C_HARDCLOCK); 298 callout_reset_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, 299 nvmf_send_keep_alive, sc, C_HARDCLOCK); 300 } 301 302 return (0); 303 } 304 305 typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t, 306 const struct nvme_namespace_data *, void *); 307 308 static bool 309 nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, 310 struct nvme_namespace_data *data, uint32_t *nsidp, 311 nvmf_scan_active_ns_cb *cb, void *cb_arg) 312 { 313 struct nvmf_completion_status status; 314 uint32_t nsid; 315 316 nvmf_status_init(&status); 317 nvmf_status_wait_io(&status); 318 if (!nvmf_cmd_identify_active_namespaces(sc, *nsidp, nslist, 319 nvmf_complete, &status, nvmf_io_complete, &status, M_WAITOK)) { 320 device_printf(sc->dev, 321 "failed to send IDENTIFY active namespaces command\n"); 322 return (false); 323 } 324 nvmf_wait_for_reply(&status); 325 326 if (status.cqe.status != 0) { 327 device_printf(sc->dev, 328 "IDENTIFY active namespaces failed, status %#x\n", 329 le16toh(status.cqe.status)); 330 return (false); 331 } 332 333 if (status.io_error != 0) { 334 device_printf(sc->dev, 335 "IDENTIFY active namespaces failed with I/O error %d\n", 336 status.io_error); 337 return (false); 338 } 339 340 for (u_int i = 0; i < nitems(nslist->ns); i++) { 341 nsid = nslist->ns[i]; 342 if (nsid == 0) { 343 *nsidp = 0; 344 return (true); 345 } 346 347 nvmf_status_init(&status); 348 nvmf_status_wait_io(&status); 349 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 350 &status, nvmf_io_complete, &status, M_WAITOK)) { 351 device_printf(sc->dev, 352 "failed to send IDENTIFY namespace %u command\n", 353 nsid); 354 return (false); 355 } 356 nvmf_wait_for_reply(&status); 357 358 if (status.cqe.status != 0) { 359 device_printf(sc->dev, 360 "IDENTIFY namespace %u failed, status %#x\n", nsid, 361 le16toh(status.cqe.status)); 362 return (false); 363 } 364 365 if (status.io_error != 0) { 366 device_printf(sc->dev, 367 "IDENTIFY namespace %u failed with I/O error %d\n", 368 nsid, status.io_error); 369 return (false); 370 } 371 372 nvme_namespace_data_swapbytes(data); 373 if (!cb(sc, nsid, data, cb_arg)) 374 return (false); 375 } 376 377 MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0); 378 379 if (nsid >= 0xfffffffd) 380 *nsidp = 0; 381 else 382 *nsidp = nsid + 1; 383 return (true); 384 } 385 386 static bool 387 nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb, 388 void *cb_arg) 389 { 390 struct nvme_namespace_data *data; 391 struct nvme_ns_list *nslist; 392 uint32_t nsid; 393 bool retval; 394 395 nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK); 396 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 397 398 nsid = 0; 399 retval = true; 400 for (;;) { 401 if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb, 402 cb_arg)) { 403 retval = false; 404 break; 405 } 406 if (nsid == 0) 407 break; 408 } 409 410 free(data, M_NVMF); 411 free(nslist, M_NVMF); 412 return (retval); 413 } 414 415 static bool 416 nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid, 417 const struct nvme_namespace_data *data, void *arg __unused) 418 { 419 if (sc->ns[nsid - 1] != NULL) { 420 device_printf(sc->dev, 421 "duplicate namespace %u in active namespace list\n", 422 nsid); 423 return (false); 424 } 425 426 /* 427 * As in nvme_ns_construct, a size of zero indicates an 428 * invalid namespace. 429 */ 430 if (data->nsze == 0) { 431 device_printf(sc->dev, 432 "ignoring active namespace %u with zero size\n", nsid); 433 return (true); 434 } 435 436 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 437 438 nvmf_sim_rescan_ns(sc, nsid); 439 return (true); 440 } 441 442 static bool 443 nvmf_add_namespaces(struct nvmf_softc *sc) 444 { 445 sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF, 446 M_WAITOK | M_ZERO); 447 return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL)); 448 } 449 450 static int 451 nvmf_attach(device_t dev) 452 { 453 struct make_dev_args mda; 454 struct nvmf_softc *sc = device_get_softc(dev); 455 struct nvmf_ivars *ivars = device_get_ivars(dev); 456 uint64_t val; 457 u_int i; 458 int error; 459 460 if (ivars == NULL) 461 return (ENXIO); 462 463 sc->dev = dev; 464 sc->trtype = ivars->hh->trtype; 465 callout_init(&sc->ka_rx_timer, 1); 466 callout_init(&sc->ka_tx_timer, 1); 467 sx_init(&sc->connection_lock, "nvmf connection"); 468 TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc); 469 470 /* Claim the cdata pointer from ivars. */ 471 sc->cdata = ivars->cdata; 472 ivars->cdata = NULL; 473 474 nvmf_init_aer(sc); 475 476 /* TODO: Multiqueue support. */ 477 sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */; 478 479 error = nvmf_establish_connection(sc, ivars); 480 if (error != 0) 481 goto out; 482 483 error = nvmf_read_property(sc, NVMF_PROP_CAP, 8, &sc->cap); 484 if (error != 0) { 485 device_printf(sc->dev, "Failed to fetch CAP\n"); 486 error = ENXIO; 487 goto out; 488 } 489 490 error = nvmf_read_property(sc, NVMF_PROP_VS, 4, &val); 491 if (error != 0) { 492 device_printf(sc->dev, "Failed to fetch VS\n"); 493 error = ENXIO; 494 goto out; 495 } 496 sc->vs = val; 497 498 /* Honor MDTS if it is set. */ 499 sc->max_xfer_size = maxphys; 500 if (sc->cdata->mdts != 0) { 501 sc->max_xfer_size = ulmin(sc->max_xfer_size, 502 1 << (sc->cdata->mdts + NVME_MPS_SHIFT + 503 NVME_CAP_HI_MPSMIN(sc->cap >> 32))); 504 } 505 506 error = nvmf_init_sim(sc); 507 if (error != 0) 508 goto out; 509 510 error = nvmf_start_aer(sc); 511 if (error != 0) { 512 nvmf_destroy_sim(sc); 513 goto out; 514 } 515 516 if (!nvmf_add_namespaces(sc)) { 517 nvmf_destroy_sim(sc); 518 goto out; 519 } 520 521 make_dev_args_init(&mda); 522 mda.mda_devsw = &nvmf_cdevsw; 523 mda.mda_uid = UID_ROOT; 524 mda.mda_gid = GID_WHEEL; 525 mda.mda_mode = 0600; 526 mda.mda_si_drv1 = sc; 527 error = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); 528 if (error != 0) { 529 nvmf_destroy_sim(sc); 530 goto out; 531 } 532 533 sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync, 534 nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST); 535 sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync, 536 nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_FIRST); 537 538 return (0); 539 out: 540 if (sc->ns != NULL) { 541 for (i = 0; i < sc->cdata->nn; i++) { 542 if (sc->ns[i] != NULL) 543 nvmf_destroy_ns(sc->ns[i]); 544 } 545 free(sc->ns, M_NVMF); 546 } 547 548 callout_drain(&sc->ka_tx_timer); 549 callout_drain(&sc->ka_rx_timer); 550 551 if (sc->admin != NULL) 552 nvmf_shutdown_controller(sc); 553 554 for (i = 0; i < sc->num_io_queues; i++) { 555 if (sc->io[i] != NULL) 556 nvmf_destroy_qp(sc->io[i]); 557 } 558 free(sc->io, M_NVMF); 559 if (sc->admin != NULL) 560 nvmf_destroy_qp(sc->admin); 561 562 nvmf_destroy_aer(sc); 563 564 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 565 sx_destroy(&sc->connection_lock); 566 free(sc->cdata, M_NVMF); 567 return (error); 568 } 569 570 void 571 nvmf_disconnect(struct nvmf_softc *sc) 572 { 573 taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task); 574 } 575 576 static void 577 nvmf_disconnect_task(void *arg, int pending __unused) 578 { 579 struct nvmf_softc *sc = arg; 580 u_int i; 581 582 sx_xlock(&sc->connection_lock); 583 if (sc->admin == NULL) { 584 /* 585 * Ignore transport errors if there is no active 586 * association. 587 */ 588 sx_xunlock(&sc->connection_lock); 589 return; 590 } 591 592 if (sc->detaching) { 593 if (sc->admin != NULL) { 594 /* 595 * This unsticks the detach process if a 596 * transport error occurs during detach. 597 */ 598 nvmf_shutdown_qp(sc->admin); 599 } 600 sx_xunlock(&sc->connection_lock); 601 return; 602 } 603 604 if (sc->cdev == NULL) { 605 /* 606 * Transport error occurred during attach (nvmf_add_namespaces). 607 * Shutdown the admin queue. 608 */ 609 nvmf_shutdown_qp(sc->admin); 610 sx_xunlock(&sc->connection_lock); 611 return; 612 } 613 614 callout_drain(&sc->ka_tx_timer); 615 callout_drain(&sc->ka_rx_timer); 616 sc->ka_traffic = false; 617 618 /* Quiesce namespace consumers. */ 619 nvmf_disconnect_sim(sc); 620 for (i = 0; i < sc->cdata->nn; i++) { 621 if (sc->ns[i] != NULL) 622 nvmf_disconnect_ns(sc->ns[i]); 623 } 624 625 /* Shutdown the existing qpairs. */ 626 for (i = 0; i < sc->num_io_queues; i++) { 627 nvmf_destroy_qp(sc->io[i]); 628 } 629 free(sc->io, M_NVMF); 630 sc->io = NULL; 631 sc->num_io_queues = 0; 632 nvmf_destroy_qp(sc->admin); 633 sc->admin = NULL; 634 635 sx_xunlock(&sc->connection_lock); 636 } 637 638 static int 639 nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) 640 { 641 struct nvmf_ivars ivars; 642 u_int i; 643 int error; 644 645 /* XXX: Should we permit changing the transport type? */ 646 if (sc->trtype != hh->trtype) { 647 device_printf(sc->dev, 648 "transport type mismatch on reconnect\n"); 649 return (EINVAL); 650 } 651 652 error = nvmf_init_ivars(&ivars, hh); 653 if (error != 0) 654 return (error); 655 656 sx_xlock(&sc->connection_lock); 657 if (sc->admin != NULL || sc->detaching) { 658 error = EBUSY; 659 goto out; 660 } 661 662 /* 663 * Ensure this is for the same controller. Note that the 664 * controller ID can vary across associations if the remote 665 * system is using the dynamic controller model. This merely 666 * ensures the new association is connected to the same NVMe 667 * subsystem. 668 */ 669 if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn, 670 sizeof(ivars.cdata->subnqn)) != 0) { 671 device_printf(sc->dev, 672 "controller subsystem NQN mismatch on reconnect\n"); 673 error = EINVAL; 674 goto out; 675 } 676 677 /* 678 * XXX: Require same number and size of I/O queues so that 679 * max_pending_io is still correct? 680 */ 681 682 error = nvmf_establish_connection(sc, &ivars); 683 if (error != 0) 684 goto out; 685 686 error = nvmf_start_aer(sc); 687 if (error != 0) 688 goto out; 689 690 device_printf(sc->dev, 691 "established new association with %u I/O queues\n", 692 sc->num_io_queues); 693 694 /* Restart namespace consumers. */ 695 for (i = 0; i < sc->cdata->nn; i++) { 696 if (sc->ns[i] != NULL) 697 nvmf_reconnect_ns(sc->ns[i]); 698 } 699 nvmf_reconnect_sim(sc); 700 701 nvmf_rescan_all_ns(sc); 702 out: 703 sx_xunlock(&sc->connection_lock); 704 nvmf_free_ivars(&ivars); 705 return (error); 706 } 707 708 static void 709 nvmf_shutdown_pre_sync(void *arg, int howto) 710 { 711 struct nvmf_softc *sc = arg; 712 713 if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED()) 714 return; 715 716 /* 717 * If this association is disconnected, abort any pending 718 * requests with an error to permit filesystems to unmount 719 * without hanging. 720 */ 721 sx_xlock(&sc->connection_lock); 722 if (sc->admin != NULL || sc->detaching) { 723 sx_xunlock(&sc->connection_lock); 724 return; 725 } 726 727 for (u_int i = 0; i < sc->cdata->nn; i++) { 728 if (sc->ns[i] != NULL) 729 nvmf_shutdown_ns(sc->ns[i]); 730 } 731 nvmf_shutdown_sim(sc); 732 sx_xunlock(&sc->connection_lock); 733 } 734 735 static void 736 nvmf_shutdown_post_sync(void *arg, int howto) 737 { 738 struct nvmf_softc *sc = arg; 739 740 if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED()) 741 return; 742 743 /* 744 * If this association is connected, disconnect gracefully. 745 */ 746 sx_xlock(&sc->connection_lock); 747 if (sc->admin == NULL || sc->detaching) { 748 sx_xunlock(&sc->connection_lock); 749 return; 750 } 751 752 callout_drain(&sc->ka_tx_timer); 753 callout_drain(&sc->ka_rx_timer); 754 755 nvmf_shutdown_controller(sc); 756 for (u_int i = 0; i < sc->num_io_queues; i++) { 757 nvmf_destroy_qp(sc->io[i]); 758 } 759 nvmf_destroy_qp(sc->admin); 760 sc->admin = NULL; 761 sx_xunlock(&sc->connection_lock); 762 } 763 764 static int 765 nvmf_detach(device_t dev) 766 { 767 struct nvmf_softc *sc = device_get_softc(dev); 768 u_int i; 769 770 destroy_dev(sc->cdev); 771 772 sx_xlock(&sc->connection_lock); 773 sc->detaching = true; 774 sx_xunlock(&sc->connection_lock); 775 776 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh); 777 EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_post_sync_eh); 778 779 nvmf_destroy_sim(sc); 780 for (i = 0; i < sc->cdata->nn; i++) { 781 if (sc->ns[i] != NULL) 782 nvmf_destroy_ns(sc->ns[i]); 783 } 784 free(sc->ns, M_NVMF); 785 786 callout_drain(&sc->ka_tx_timer); 787 callout_drain(&sc->ka_rx_timer); 788 789 if (sc->admin != NULL) 790 nvmf_shutdown_controller(sc); 791 792 for (i = 0; i < sc->num_io_queues; i++) { 793 nvmf_destroy_qp(sc->io[i]); 794 } 795 free(sc->io, M_NVMF); 796 797 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 798 799 if (sc->admin != NULL) 800 nvmf_destroy_qp(sc->admin); 801 802 nvmf_destroy_aer(sc); 803 804 sx_destroy(&sc->connection_lock); 805 free(sc->cdata, M_NVMF); 806 return (0); 807 } 808 809 static void 810 nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid, 811 const struct nvme_namespace_data *data) 812 { 813 struct nvmf_namespace *ns; 814 815 /* XXX: Needs locking around sc->ns[]. */ 816 ns = sc->ns[nsid - 1]; 817 if (data->nsze == 0) { 818 /* XXX: Needs locking */ 819 if (ns != NULL) { 820 nvmf_destroy_ns(ns); 821 sc->ns[nsid - 1] = NULL; 822 } 823 } else { 824 /* XXX: Needs locking */ 825 if (ns == NULL) { 826 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 827 } else { 828 if (!nvmf_update_ns(ns, data)) { 829 nvmf_destroy_ns(ns); 830 sc->ns[nsid - 1] = NULL; 831 } 832 } 833 } 834 835 nvmf_sim_rescan_ns(sc, nsid); 836 } 837 838 void 839 nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid) 840 { 841 struct nvmf_completion_status status; 842 struct nvme_namespace_data *data; 843 844 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 845 846 nvmf_status_init(&status); 847 nvmf_status_wait_io(&status); 848 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 849 &status, nvmf_io_complete, &status, M_WAITOK)) { 850 device_printf(sc->dev, 851 "failed to send IDENTIFY namespace %u command\n", nsid); 852 free(data, M_NVMF); 853 return; 854 } 855 nvmf_wait_for_reply(&status); 856 857 if (status.cqe.status != 0) { 858 device_printf(sc->dev, 859 "IDENTIFY namespace %u failed, status %#x\n", nsid, 860 le16toh(status.cqe.status)); 861 free(data, M_NVMF); 862 return; 863 } 864 865 if (status.io_error != 0) { 866 device_printf(sc->dev, 867 "IDENTIFY namespace %u failed with I/O error %d\n", 868 nsid, status.io_error); 869 free(data, M_NVMF); 870 return; 871 } 872 873 nvme_namespace_data_swapbytes(data); 874 875 nvmf_rescan_ns_1(sc, nsid, data); 876 877 free(data, M_NVMF); 878 } 879 880 static void 881 nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid, 882 uint32_t next_valid_nsid) 883 { 884 struct nvmf_namespace *ns; 885 886 for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++) 887 { 888 /* XXX: Needs locking around sc->ns[]. */ 889 ns = sc->ns[nsid - 1]; 890 if (ns != NULL) { 891 nvmf_destroy_ns(ns); 892 sc->ns[nsid - 1] = NULL; 893 894 nvmf_sim_rescan_ns(sc, nsid); 895 } 896 } 897 } 898 899 static bool 900 nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid, 901 const struct nvme_namespace_data *data, void *arg) 902 { 903 uint32_t *last_nsid = arg; 904 905 /* Check for any gaps prior to this namespace. */ 906 nvmf_purge_namespaces(sc, *last_nsid + 1, nsid); 907 *last_nsid = nsid; 908 909 nvmf_rescan_ns_1(sc, nsid, data); 910 return (true); 911 } 912 913 void 914 nvmf_rescan_all_ns(struct nvmf_softc *sc) 915 { 916 uint32_t last_nsid; 917 918 last_nsid = 0; 919 if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid)) 920 return; 921 922 /* 923 * Check for any namespace devices after the last active 924 * namespace. 925 */ 926 nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1); 927 } 928 929 int 930 nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt, 931 bool admin) 932 { 933 struct nvmf_completion_status status; 934 struct nvme_command cmd; 935 struct memdesc mem; 936 struct nvmf_host_qpair *qp; 937 struct nvmf_request *req; 938 void *buf; 939 int error; 940 941 if (pt->len > sc->max_xfer_size) 942 return (EINVAL); 943 944 buf = NULL; 945 if (pt->len != 0) { 946 /* 947 * XXX: Depending on the size we may want to pin the 948 * user pages and use a memdesc with vm_page_t's 949 * instead. 950 */ 951 buf = malloc(pt->len, M_NVMF, M_WAITOK); 952 if (pt->is_read == 0) { 953 error = copyin(pt->buf, buf, pt->len); 954 if (error != 0) { 955 free(buf, M_NVMF); 956 return (error); 957 } 958 } else { 959 /* Ensure no kernel data is leaked to userland. */ 960 memset(buf, 0, pt->len); 961 } 962 } 963 964 memset(&cmd, 0, sizeof(cmd)); 965 cmd.opc = pt->cmd.opc; 966 cmd.fuse = pt->cmd.fuse; 967 cmd.nsid = pt->cmd.nsid; 968 cmd.cdw10 = pt->cmd.cdw10; 969 cmd.cdw11 = pt->cmd.cdw11; 970 cmd.cdw12 = pt->cmd.cdw12; 971 cmd.cdw13 = pt->cmd.cdw13; 972 cmd.cdw14 = pt->cmd.cdw14; 973 cmd.cdw15 = pt->cmd.cdw15; 974 975 sx_slock(&sc->connection_lock); 976 if (sc->admin == NULL || sc->detaching) { 977 device_printf(sc->dev, 978 "failed to send passthrough command\n"); 979 error = ECONNABORTED; 980 sx_sunlock(&sc->connection_lock); 981 goto error; 982 } 983 if (admin) 984 qp = sc->admin; 985 else 986 qp = nvmf_select_io_queue(sc); 987 nvmf_status_init(&status); 988 req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK); 989 sx_sunlock(&sc->connection_lock); 990 if (req == NULL) { 991 device_printf(sc->dev, "failed to send passthrough command\n"); 992 error = ECONNABORTED; 993 goto error; 994 } 995 996 if (pt->len != 0) { 997 mem = memdesc_vaddr(buf, pt->len); 998 nvmf_capsule_append_data(req->nc, &mem, pt->len, 999 pt->is_read == 0, nvmf_io_complete, &status); 1000 nvmf_status_wait_io(&status); 1001 } 1002 1003 nvmf_submit_request(req); 1004 nvmf_wait_for_reply(&status); 1005 1006 memset(&pt->cpl, 0, sizeof(pt->cpl)); 1007 pt->cpl.cdw0 = status.cqe.cdw0; 1008 pt->cpl.status = status.cqe.status; 1009 1010 error = status.io_error; 1011 if (error == 0 && pt->len != 0 && pt->is_read != 0) 1012 error = copyout(buf, pt->buf, pt->len); 1013 error: 1014 free(buf, M_NVMF); 1015 return (error); 1016 } 1017 1018 static int 1019 nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 1020 struct thread *td) 1021 { 1022 struct nvmf_softc *sc = cdev->si_drv1; 1023 struct nvme_get_nsid *gnsid; 1024 struct nvme_pt_command *pt; 1025 struct nvmf_reconnect_params *rp; 1026 struct nvmf_handoff_host *hh; 1027 1028 switch (cmd) { 1029 case NVME_PASSTHROUGH_CMD: 1030 pt = (struct nvme_pt_command *)arg; 1031 return (nvmf_passthrough_cmd(sc, pt, true)); 1032 case NVME_GET_NSID: 1033 gnsid = (struct nvme_get_nsid *)arg; 1034 strlcpy(gnsid->cdev, device_get_nameunit(sc->dev), 1035 sizeof(gnsid->cdev)); 1036 gnsid->nsid = 0; 1037 return (0); 1038 case NVME_GET_MAX_XFER_SIZE: 1039 *(uint64_t *)arg = sc->max_xfer_size; 1040 return (0); 1041 case NVMF_RECONNECT_PARAMS: 1042 rp = (struct nvmf_reconnect_params *)arg; 1043 if ((sc->cdata->fcatt & 1) == 0) 1044 rp->cntlid = NVMF_CNTLID_DYNAMIC; 1045 else 1046 rp->cntlid = sc->cdata->ctrlr_id; 1047 memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn)); 1048 return (0); 1049 case NVMF_RECONNECT_HOST: 1050 hh = (struct nvmf_handoff_host *)arg; 1051 return (nvmf_reconnect_host(sc, hh)); 1052 default: 1053 return (ENOTTY); 1054 } 1055 } 1056 1057 static struct cdevsw nvmf_cdevsw = { 1058 .d_version = D_VERSION, 1059 .d_ioctl = nvmf_ioctl 1060 }; 1061 1062 static int 1063 nvmf_modevent(module_t mod, int what, void *arg) 1064 { 1065 switch (what) { 1066 case MOD_LOAD: 1067 return (nvmf_ctl_load()); 1068 case MOD_QUIESCE: 1069 return (0); 1070 case MOD_UNLOAD: 1071 nvmf_ctl_unload(); 1072 destroy_dev_drain(&nvmf_cdevsw); 1073 return (0); 1074 default: 1075 return (EOPNOTSUPP); 1076 } 1077 } 1078 1079 static device_method_t nvmf_methods[] = { 1080 /* Device interface */ 1081 DEVMETHOD(device_probe, nvmf_probe), 1082 DEVMETHOD(device_attach, nvmf_attach), 1083 DEVMETHOD(device_detach, nvmf_detach), 1084 DEVMETHOD_END 1085 }; 1086 1087 driver_t nvme_nvmf_driver = { 1088 "nvme", 1089 nvmf_methods, 1090 sizeof(struct nvmf_softc), 1091 }; 1092 1093 DRIVER_MODULE(nvme, root, nvme_nvmf_driver, nvmf_modevent, NULL); 1094 MODULE_DEPEND(nvmf, nvmf_transport, 1, 1, 1); 1095