1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/bus.h> 10 #include <sys/conf.h> 11 #include <sys/lock.h> 12 #include <sys/kernel.h> 13 #include <sys/malloc.h> 14 #include <sys/memdesc.h> 15 #include <sys/module.h> 16 #include <sys/mutex.h> 17 #include <sys/sx.h> 18 #include <sys/sysctl.h> 19 #include <sys/taskqueue.h> 20 #include <dev/nvme/nvme.h> 21 #include <dev/nvmf/nvmf.h> 22 #include <dev/nvmf/nvmf_transport.h> 23 #include <dev/nvmf/host/nvmf_var.h> 24 25 static struct cdevsw nvmf_cdevsw; 26 27 bool nvmf_fail_disconnect = false; 28 SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN, 29 &nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure"); 30 31 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host"); 32 33 static void nvmf_disconnect_task(void *arg, int pending); 34 35 void 36 nvmf_complete(void *arg, const struct nvme_completion *cqe) 37 { 38 struct nvmf_completion_status *status = arg; 39 struct mtx *mtx; 40 41 status->cqe = *cqe; 42 mtx = mtx_pool_find(mtxpool_sleep, status); 43 mtx_lock(mtx); 44 status->done = true; 45 mtx_unlock(mtx); 46 wakeup(status); 47 } 48 49 void 50 nvmf_io_complete(void *arg, size_t xfered, int error) 51 { 52 struct nvmf_completion_status *status = arg; 53 struct mtx *mtx; 54 55 status->io_error = error; 56 mtx = mtx_pool_find(mtxpool_sleep, status); 57 mtx_lock(mtx); 58 status->io_done = true; 59 mtx_unlock(mtx); 60 wakeup(status); 61 } 62 63 void 64 nvmf_wait_for_reply(struct nvmf_completion_status *status) 65 { 66 struct mtx *mtx; 67 68 mtx = mtx_pool_find(mtxpool_sleep, status); 69 mtx_lock(mtx); 70 while (!status->done || !status->io_done) 71 mtx_sleep(status, mtx, 0, "nvmfcmd", 0); 72 mtx_unlock(mtx); 73 } 74 75 static int 76 nvmf_read_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 77 uint64_t *value) 78 { 79 const struct nvmf_fabric_prop_get_rsp *rsp; 80 struct nvmf_completion_status status; 81 82 nvmf_status_init(&status); 83 if (!nvmf_cmd_get_property(sc, offset, size, nvmf_complete, &status, 84 M_WAITOK)) 85 return (ECONNABORTED); 86 nvmf_wait_for_reply(&status); 87 88 if (status.cqe.status != 0) { 89 device_printf(sc->dev, "PROPERTY_GET failed, status %#x\n", 90 le16toh(status.cqe.status)); 91 return (EIO); 92 } 93 94 rsp = (const struct nvmf_fabric_prop_get_rsp *)&status.cqe; 95 if (size == 8) 96 *value = le64toh(rsp->value.u64); 97 else 98 *value = le32toh(rsp->value.u32.low); 99 return (0); 100 } 101 102 static int 103 nvmf_write_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 104 uint64_t value) 105 { 106 struct nvmf_completion_status status; 107 108 nvmf_status_init(&status); 109 if (!nvmf_cmd_set_property(sc, offset, size, value, nvmf_complete, &status, 110 M_WAITOK)) 111 return (ECONNABORTED); 112 nvmf_wait_for_reply(&status); 113 114 if (status.cqe.status != 0) { 115 device_printf(sc->dev, "PROPERTY_SET failed, status %#x\n", 116 le16toh(status.cqe.status)); 117 return (EIO); 118 } 119 return (0); 120 } 121 122 static void 123 nvmf_shutdown_controller(struct nvmf_softc *sc) 124 { 125 uint64_t cc; 126 int error; 127 128 error = nvmf_read_property(sc, NVMF_PROP_CC, 4, &cc); 129 if (error != 0) { 130 device_printf(sc->dev, "Failed to fetch CC for shutdown\n"); 131 return; 132 } 133 134 cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL); 135 136 error = nvmf_write_property(sc, NVMF_PROP_CC, 4, cc); 137 if (error != 0) 138 device_printf(sc->dev, 139 "Failed to set CC to trigger shutdown\n"); 140 } 141 142 static void 143 nvmf_check_keep_alive(void *arg) 144 { 145 struct nvmf_softc *sc = arg; 146 int traffic; 147 148 traffic = atomic_readandclear_int(&sc->ka_active_rx_traffic); 149 if (traffic == 0) { 150 device_printf(sc->dev, 151 "disconnecting due to KeepAlive timeout\n"); 152 nvmf_disconnect(sc); 153 return; 154 } 155 156 callout_schedule_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, C_HARDCLOCK); 157 } 158 159 static void 160 nvmf_keep_alive_complete(void *arg, const struct nvme_completion *cqe) 161 { 162 struct nvmf_softc *sc = arg; 163 164 atomic_store_int(&sc->ka_active_rx_traffic, 1); 165 if (cqe->status != 0) { 166 device_printf(sc->dev, 167 "KeepAlive response reported status %#x\n", 168 le16toh(cqe->status)); 169 } 170 } 171 172 static void 173 nvmf_send_keep_alive(void *arg) 174 { 175 struct nvmf_softc *sc = arg; 176 int traffic; 177 178 /* 179 * Don't bother sending a KeepAlive command if TKAS is active 180 * and another command has been sent during the interval. 181 */ 182 traffic = atomic_load_int(&sc->ka_active_tx_traffic); 183 if (traffic == 0 && !nvmf_cmd_keep_alive(sc, nvmf_keep_alive_complete, 184 sc, M_NOWAIT)) 185 device_printf(sc->dev, 186 "Failed to allocate KeepAlive command\n"); 187 188 /* Clear ka_active_tx_traffic after sending the keep alive command. */ 189 atomic_store_int(&sc->ka_active_tx_traffic, 0); 190 191 callout_schedule_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, C_HARDCLOCK); 192 } 193 194 int 195 nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh) 196 { 197 size_t len; 198 u_int i; 199 int error; 200 201 memset(ivars, 0, sizeof(*ivars)); 202 203 if (!hh->admin.admin || hh->num_io_queues < 1) 204 return (EINVAL); 205 206 ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK); 207 error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata)); 208 if (error != 0) 209 goto out; 210 nvme_controller_data_swapbytes(ivars->cdata); 211 212 len = hh->num_io_queues * sizeof(*ivars->io_params); 213 ivars->io_params = malloc(len, M_NVMF, M_WAITOK); 214 error = copyin(hh->io, ivars->io_params, len); 215 if (error != 0) 216 goto out; 217 for (i = 0; i < hh->num_io_queues; i++) { 218 if (ivars->io_params[i].admin) { 219 error = EINVAL; 220 goto out; 221 } 222 223 /* Require all I/O queues to be the same size. */ 224 if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) { 225 error = EINVAL; 226 goto out; 227 } 228 } 229 230 ivars->hh = hh; 231 return (0); 232 233 out: 234 free(ivars->io_params, M_NVMF); 235 free(ivars->cdata, M_NVMF); 236 return (error); 237 } 238 239 void 240 nvmf_free_ivars(struct nvmf_ivars *ivars) 241 { 242 free(ivars->io_params, M_NVMF); 243 free(ivars->cdata, M_NVMF); 244 } 245 246 static int 247 nvmf_probe(device_t dev) 248 { 249 struct nvmf_ivars *ivars = device_get_ivars(dev); 250 char desc[260]; 251 252 if (ivars == NULL) 253 return (ENXIO); 254 255 snprintf(desc, sizeof(desc), "Fabrics: %.256s", ivars->cdata->subnqn); 256 device_set_desc_copy(dev, desc); 257 return (BUS_PROBE_DEFAULT); 258 } 259 260 static int 261 nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) 262 { 263 char name[16]; 264 265 /* Setup the admin queue. */ 266 sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin, 267 "admin queue"); 268 if (sc->admin == NULL) { 269 device_printf(sc->dev, "Failed to setup admin queue\n"); 270 return (ENXIO); 271 } 272 273 /* Setup I/O queues. */ 274 sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF, 275 M_WAITOK | M_ZERO); 276 sc->num_io_queues = ivars->hh->num_io_queues; 277 for (u_int i = 0; i < sc->num_io_queues; i++) { 278 snprintf(name, sizeof(name), "I/O queue %u", i); 279 sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype, 280 &ivars->io_params[i], name); 281 if (sc->io[i] == NULL) { 282 device_printf(sc->dev, "Failed to setup I/O queue %u\n", 283 i + 1); 284 return (ENXIO); 285 } 286 } 287 288 /* Start KeepAlive timers. */ 289 if (ivars->hh->kato != 0) { 290 sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS, 291 sc->cdata->ctratt) != 0; 292 sc->ka_rx_sbt = mstosbt(ivars->hh->kato); 293 sc->ka_tx_sbt = sc->ka_rx_sbt / 2; 294 callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, 295 nvmf_check_keep_alive, sc, C_HARDCLOCK); 296 callout_reset_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, 297 nvmf_send_keep_alive, sc, C_HARDCLOCK); 298 } 299 300 return (0); 301 } 302 303 typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t, 304 const struct nvme_namespace_data *, void *); 305 306 static bool 307 nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, 308 struct nvme_namespace_data *data, uint32_t *nsidp, 309 nvmf_scan_active_ns_cb *cb, void *cb_arg) 310 { 311 struct nvmf_completion_status status; 312 uint32_t nsid; 313 314 nvmf_status_init(&status); 315 nvmf_status_wait_io(&status); 316 if (!nvmf_cmd_identify_active_namespaces(sc, *nsidp, nslist, 317 nvmf_complete, &status, nvmf_io_complete, &status, M_WAITOK)) { 318 device_printf(sc->dev, 319 "failed to send IDENTIFY active namespaces command\n"); 320 return (false); 321 } 322 nvmf_wait_for_reply(&status); 323 324 if (status.cqe.status != 0) { 325 device_printf(sc->dev, 326 "IDENTIFY active namespaces failed, status %#x\n", 327 le16toh(status.cqe.status)); 328 return (false); 329 } 330 331 if (status.io_error != 0) { 332 device_printf(sc->dev, 333 "IDENTIFY active namespaces failed with I/O error %d\n", 334 status.io_error); 335 return (false); 336 } 337 338 for (u_int i = 0; i < nitems(nslist->ns); i++) { 339 nsid = nslist->ns[i]; 340 if (nsid == 0) { 341 *nsidp = 0; 342 return (true); 343 } 344 345 nvmf_status_init(&status); 346 nvmf_status_wait_io(&status); 347 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 348 &status, nvmf_io_complete, &status, M_WAITOK)) { 349 device_printf(sc->dev, 350 "failed to send IDENTIFY namespace %u command\n", 351 nsid); 352 return (false); 353 } 354 nvmf_wait_for_reply(&status); 355 356 if (status.cqe.status != 0) { 357 device_printf(sc->dev, 358 "IDENTIFY namespace %u failed, status %#x\n", nsid, 359 le16toh(status.cqe.status)); 360 return (false); 361 } 362 363 if (status.io_error != 0) { 364 device_printf(sc->dev, 365 "IDENTIFY namespace %u failed with I/O error %d\n", 366 nsid, status.io_error); 367 return (false); 368 } 369 370 nvme_namespace_data_swapbytes(data); 371 if (!cb(sc, nsid, data, cb_arg)) 372 return (false); 373 } 374 375 MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0); 376 377 if (nsid >= 0xfffffffd) 378 *nsidp = 0; 379 else 380 *nsidp = nsid + 1; 381 return (true); 382 } 383 384 static bool 385 nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb, 386 void *cb_arg) 387 { 388 struct nvme_namespace_data *data; 389 struct nvme_ns_list *nslist; 390 uint32_t nsid; 391 bool retval; 392 393 nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK); 394 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 395 396 nsid = 0; 397 retval = true; 398 for (;;) { 399 if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb, 400 cb_arg)) { 401 retval = false; 402 break; 403 } 404 if (nsid == 0) 405 break; 406 } 407 408 free(data, M_NVMF); 409 free(nslist, M_NVMF); 410 return (retval); 411 } 412 413 static bool 414 nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid, 415 const struct nvme_namespace_data *data, void *arg __unused) 416 { 417 if (sc->ns[nsid - 1] != NULL) { 418 device_printf(sc->dev, 419 "duplicate namespace %u in active namespace list\n", 420 nsid); 421 return (false); 422 } 423 424 /* 425 * As in nvme_ns_construct, a size of zero indicates an 426 * invalid namespace. 427 */ 428 if (data->nsze == 0) { 429 device_printf(sc->dev, 430 "ignoring active namespace %u with zero size\n", nsid); 431 return (true); 432 } 433 434 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 435 436 nvmf_sim_rescan_ns(sc, nsid); 437 return (true); 438 } 439 440 static bool 441 nvmf_add_namespaces(struct nvmf_softc *sc) 442 { 443 sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF, 444 M_WAITOK | M_ZERO); 445 return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL)); 446 } 447 448 static int 449 nvmf_attach(device_t dev) 450 { 451 struct make_dev_args mda; 452 struct nvmf_softc *sc = device_get_softc(dev); 453 struct nvmf_ivars *ivars = device_get_ivars(dev); 454 uint64_t val; 455 u_int i; 456 int error; 457 458 if (ivars == NULL) 459 return (ENXIO); 460 461 sc->dev = dev; 462 sc->trtype = ivars->hh->trtype; 463 callout_init(&sc->ka_rx_timer, 1); 464 callout_init(&sc->ka_tx_timer, 1); 465 sx_init(&sc->connection_lock, "nvmf connection"); 466 TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc); 467 468 /* Claim the cdata pointer from ivars. */ 469 sc->cdata = ivars->cdata; 470 ivars->cdata = NULL; 471 472 nvmf_init_aer(sc); 473 474 /* TODO: Multiqueue support. */ 475 sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */; 476 477 error = nvmf_establish_connection(sc, ivars); 478 if (error != 0) 479 goto out; 480 481 error = nvmf_read_property(sc, NVMF_PROP_CAP, 8, &sc->cap); 482 if (error != 0) { 483 device_printf(sc->dev, "Failed to fetch CAP\n"); 484 error = ENXIO; 485 goto out; 486 } 487 488 error = nvmf_read_property(sc, NVMF_PROP_VS, 4, &val); 489 if (error != 0) { 490 device_printf(sc->dev, "Failed to fetch VS\n"); 491 error = ENXIO; 492 goto out; 493 } 494 sc->vs = val; 495 496 /* Honor MDTS if it is set. */ 497 sc->max_xfer_size = maxphys; 498 if (sc->cdata->mdts != 0) { 499 sc->max_xfer_size = ulmin(sc->max_xfer_size, 500 1 << (sc->cdata->mdts + NVME_MPS_SHIFT + 501 NVME_CAP_HI_MPSMIN(sc->cap >> 32))); 502 } 503 504 error = nvmf_init_sim(sc); 505 if (error != 0) 506 goto out; 507 508 error = nvmf_start_aer(sc); 509 if (error != 0) { 510 nvmf_destroy_sim(sc); 511 goto out; 512 } 513 514 if (!nvmf_add_namespaces(sc)) { 515 nvmf_destroy_sim(sc); 516 goto out; 517 } 518 519 make_dev_args_init(&mda); 520 mda.mda_devsw = &nvmf_cdevsw; 521 mda.mda_uid = UID_ROOT; 522 mda.mda_gid = GID_WHEEL; 523 mda.mda_mode = 0600; 524 mda.mda_si_drv1 = sc; 525 error = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); 526 if (error != 0) { 527 nvmf_destroy_sim(sc); 528 goto out; 529 } 530 531 return (0); 532 out: 533 if (sc->ns != NULL) { 534 for (i = 0; i < sc->cdata->nn; i++) { 535 if (sc->ns[i] != NULL) 536 nvmf_destroy_ns(sc->ns[i]); 537 } 538 free(sc->ns, M_NVMF); 539 } 540 541 callout_drain(&sc->ka_tx_timer); 542 callout_drain(&sc->ka_rx_timer); 543 544 if (sc->admin != NULL) 545 nvmf_shutdown_controller(sc); 546 547 for (i = 0; i < sc->num_io_queues; i++) { 548 if (sc->io[i] != NULL) 549 nvmf_destroy_qp(sc->io[i]); 550 } 551 free(sc->io, M_NVMF); 552 if (sc->admin != NULL) 553 nvmf_destroy_qp(sc->admin); 554 555 nvmf_destroy_aer(sc); 556 557 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 558 sx_destroy(&sc->connection_lock); 559 free(sc->cdata, M_NVMF); 560 return (error); 561 } 562 563 void 564 nvmf_disconnect(struct nvmf_softc *sc) 565 { 566 taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task); 567 } 568 569 static void 570 nvmf_disconnect_task(void *arg, int pending __unused) 571 { 572 struct nvmf_softc *sc = arg; 573 u_int i; 574 575 sx_xlock(&sc->connection_lock); 576 if (sc->admin == NULL) { 577 /* 578 * Ignore transport errors if there is no active 579 * association. 580 */ 581 sx_xunlock(&sc->connection_lock); 582 return; 583 } 584 585 if (sc->detaching) { 586 if (sc->admin != NULL) { 587 /* 588 * This unsticks the detach process if a 589 * transport error occurs during detach. 590 */ 591 nvmf_shutdown_qp(sc->admin); 592 } 593 sx_xunlock(&sc->connection_lock); 594 return; 595 } 596 597 if (sc->cdev == NULL) { 598 /* 599 * Transport error occurred during attach (nvmf_add_namespaces). 600 * Shutdown the admin queue. 601 */ 602 nvmf_shutdown_qp(sc->admin); 603 sx_xunlock(&sc->connection_lock); 604 return; 605 } 606 607 callout_drain(&sc->ka_tx_timer); 608 callout_drain(&sc->ka_rx_timer); 609 sc->ka_traffic = false; 610 611 /* Quiesce namespace consumers. */ 612 nvmf_disconnect_sim(sc); 613 for (i = 0; i < sc->cdata->nn; i++) { 614 if (sc->ns[i] != NULL) 615 nvmf_disconnect_ns(sc->ns[i]); 616 } 617 618 /* Shutdown the existing qpairs. */ 619 for (i = 0; i < sc->num_io_queues; i++) { 620 nvmf_destroy_qp(sc->io[i]); 621 } 622 free(sc->io, M_NVMF); 623 sc->io = NULL; 624 sc->num_io_queues = 0; 625 nvmf_destroy_qp(sc->admin); 626 sc->admin = NULL; 627 628 sx_xunlock(&sc->connection_lock); 629 } 630 631 static int 632 nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) 633 { 634 struct nvmf_ivars ivars; 635 u_int i; 636 int error; 637 638 /* XXX: Should we permit changing the transport type? */ 639 if (sc->trtype != hh->trtype) { 640 device_printf(sc->dev, 641 "transport type mismatch on reconnect\n"); 642 return (EINVAL); 643 } 644 645 error = nvmf_init_ivars(&ivars, hh); 646 if (error != 0) 647 return (error); 648 649 sx_xlock(&sc->connection_lock); 650 if (sc->admin != NULL || sc->detaching) { 651 error = EBUSY; 652 goto out; 653 } 654 655 /* 656 * Ensure this is for the same controller. Note that the 657 * controller ID can vary across associations if the remote 658 * system is using the dynamic controller model. This merely 659 * ensures the new association is connected to the same NVMe 660 * subsystem. 661 */ 662 if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn, 663 sizeof(ivars.cdata->subnqn)) != 0) { 664 device_printf(sc->dev, 665 "controller subsystem NQN mismatch on reconnect\n"); 666 error = EINVAL; 667 goto out; 668 } 669 670 /* 671 * XXX: Require same number and size of I/O queues so that 672 * max_pending_io is still correct? 673 */ 674 675 error = nvmf_establish_connection(sc, &ivars); 676 if (error != 0) 677 goto out; 678 679 error = nvmf_start_aer(sc); 680 if (error != 0) 681 goto out; 682 683 device_printf(sc->dev, 684 "established new association with %u I/O queues\n", 685 sc->num_io_queues); 686 687 /* Restart namespace consumers. */ 688 for (i = 0; i < sc->cdata->nn; i++) { 689 if (sc->ns[i] != NULL) 690 nvmf_reconnect_ns(sc->ns[i]); 691 } 692 nvmf_reconnect_sim(sc); 693 694 nvmf_rescan_all_ns(sc); 695 out: 696 sx_xunlock(&sc->connection_lock); 697 nvmf_free_ivars(&ivars); 698 return (error); 699 } 700 701 static int 702 nvmf_detach(device_t dev) 703 { 704 struct nvmf_softc *sc = device_get_softc(dev); 705 u_int i; 706 707 destroy_dev(sc->cdev); 708 709 sx_xlock(&sc->connection_lock); 710 sc->detaching = true; 711 sx_xunlock(&sc->connection_lock); 712 713 nvmf_destroy_sim(sc); 714 for (i = 0; i < sc->cdata->nn; i++) { 715 if (sc->ns[i] != NULL) 716 nvmf_destroy_ns(sc->ns[i]); 717 } 718 free(sc->ns, M_NVMF); 719 720 callout_drain(&sc->ka_tx_timer); 721 callout_drain(&sc->ka_rx_timer); 722 723 if (sc->admin != NULL) 724 nvmf_shutdown_controller(sc); 725 726 for (i = 0; i < sc->num_io_queues; i++) { 727 nvmf_destroy_qp(sc->io[i]); 728 } 729 free(sc->io, M_NVMF); 730 731 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 732 733 if (sc->admin != NULL) 734 nvmf_destroy_qp(sc->admin); 735 736 nvmf_destroy_aer(sc); 737 738 sx_destroy(&sc->connection_lock); 739 free(sc->cdata, M_NVMF); 740 return (0); 741 } 742 743 static void 744 nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid, 745 const struct nvme_namespace_data *data) 746 { 747 struct nvmf_namespace *ns; 748 749 /* XXX: Needs locking around sc->ns[]. */ 750 ns = sc->ns[nsid - 1]; 751 if (data->nsze == 0) { 752 /* XXX: Needs locking */ 753 if (ns != NULL) { 754 nvmf_destroy_ns(ns); 755 sc->ns[nsid - 1] = NULL; 756 } 757 } else { 758 /* XXX: Needs locking */ 759 if (ns == NULL) { 760 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 761 } else { 762 if (!nvmf_update_ns(ns, data)) { 763 nvmf_destroy_ns(ns); 764 sc->ns[nsid - 1] = NULL; 765 } 766 } 767 } 768 769 nvmf_sim_rescan_ns(sc, nsid); 770 } 771 772 void 773 nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid) 774 { 775 struct nvmf_completion_status status; 776 struct nvme_namespace_data *data; 777 778 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 779 780 nvmf_status_init(&status); 781 nvmf_status_wait_io(&status); 782 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 783 &status, nvmf_io_complete, &status, M_WAITOK)) { 784 device_printf(sc->dev, 785 "failed to send IDENTIFY namespace %u command\n", nsid); 786 free(data, M_NVMF); 787 return; 788 } 789 nvmf_wait_for_reply(&status); 790 791 if (status.cqe.status != 0) { 792 device_printf(sc->dev, 793 "IDENTIFY namespace %u failed, status %#x\n", nsid, 794 le16toh(status.cqe.status)); 795 free(data, M_NVMF); 796 return; 797 } 798 799 if (status.io_error != 0) { 800 device_printf(sc->dev, 801 "IDENTIFY namespace %u failed with I/O error %d\n", 802 nsid, status.io_error); 803 free(data, M_NVMF); 804 return; 805 } 806 807 nvme_namespace_data_swapbytes(data); 808 809 nvmf_rescan_ns_1(sc, nsid, data); 810 811 free(data, M_NVMF); 812 } 813 814 static void 815 nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid, 816 uint32_t next_valid_nsid) 817 { 818 struct nvmf_namespace *ns; 819 820 for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++) 821 { 822 /* XXX: Needs locking around sc->ns[]. */ 823 ns = sc->ns[nsid - 1]; 824 if (ns != NULL) { 825 nvmf_destroy_ns(ns); 826 sc->ns[nsid - 1] = NULL; 827 828 nvmf_sim_rescan_ns(sc, nsid); 829 } 830 } 831 } 832 833 static bool 834 nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid, 835 const struct nvme_namespace_data *data, void *arg) 836 { 837 uint32_t *last_nsid = arg; 838 839 /* Check for any gaps prior to this namespace. */ 840 nvmf_purge_namespaces(sc, *last_nsid + 1, nsid); 841 *last_nsid = nsid; 842 843 nvmf_rescan_ns_1(sc, nsid, data); 844 return (true); 845 } 846 847 void 848 nvmf_rescan_all_ns(struct nvmf_softc *sc) 849 { 850 uint32_t last_nsid; 851 852 last_nsid = 0; 853 if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid)) 854 return; 855 856 /* 857 * Check for any namespace devices after the last active 858 * namespace. 859 */ 860 nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1); 861 } 862 863 int 864 nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt, 865 bool admin) 866 { 867 struct nvmf_completion_status status; 868 struct nvme_command cmd; 869 struct memdesc mem; 870 struct nvmf_host_qpair *qp; 871 struct nvmf_request *req; 872 void *buf; 873 int error; 874 875 if (pt->len > sc->max_xfer_size) 876 return (EINVAL); 877 878 buf = NULL; 879 if (pt->len != 0) { 880 /* 881 * XXX: Depending on the size we may want to pin the 882 * user pages and use a memdesc with vm_page_t's 883 * instead. 884 */ 885 buf = malloc(pt->len, M_NVMF, M_WAITOK); 886 if (pt->is_read == 0) { 887 error = copyin(pt->buf, buf, pt->len); 888 if (error != 0) { 889 free(buf, M_NVMF); 890 return (error); 891 } 892 } else { 893 /* Ensure no kernel data is leaked to userland. */ 894 memset(buf, 0, pt->len); 895 } 896 } 897 898 memset(&cmd, 0, sizeof(cmd)); 899 cmd.opc = pt->cmd.opc; 900 cmd.fuse = pt->cmd.fuse; 901 cmd.nsid = pt->cmd.nsid; 902 cmd.cdw10 = pt->cmd.cdw10; 903 cmd.cdw11 = pt->cmd.cdw11; 904 cmd.cdw12 = pt->cmd.cdw12; 905 cmd.cdw13 = pt->cmd.cdw13; 906 cmd.cdw14 = pt->cmd.cdw14; 907 cmd.cdw15 = pt->cmd.cdw15; 908 909 if (admin) 910 qp = sc->admin; 911 else 912 qp = nvmf_select_io_queue(sc); 913 nvmf_status_init(&status); 914 req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK); 915 if (req == NULL) { 916 device_printf(sc->dev, "failed to send passthrough command\n"); 917 error = ECONNABORTED; 918 goto error; 919 } 920 921 if (pt->len != 0) { 922 mem = memdesc_vaddr(buf, pt->len); 923 nvmf_capsule_append_data(req->nc, &mem, pt->len, 924 pt->is_read == 0, nvmf_io_complete, &status); 925 nvmf_status_wait_io(&status); 926 } 927 928 nvmf_submit_request(req); 929 nvmf_wait_for_reply(&status); 930 931 memset(&pt->cpl, 0, sizeof(pt->cpl)); 932 pt->cpl.cdw0 = status.cqe.cdw0; 933 pt->cpl.status = status.cqe.status; 934 935 error = status.io_error; 936 if (error == 0 && pt->len != 0 && pt->is_read != 0) 937 error = copyout(buf, pt->buf, pt->len); 938 error: 939 free(buf, M_NVMF); 940 return (error); 941 } 942 943 static int 944 nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 945 struct thread *td) 946 { 947 struct nvmf_softc *sc = cdev->si_drv1; 948 struct nvme_get_nsid *gnsid; 949 struct nvme_pt_command *pt; 950 struct nvmf_reconnect_params *rp; 951 struct nvmf_handoff_host *hh; 952 953 switch (cmd) { 954 case NVME_PASSTHROUGH_CMD: 955 pt = (struct nvme_pt_command *)arg; 956 return (nvmf_passthrough_cmd(sc, pt, true)); 957 case NVME_GET_NSID: 958 gnsid = (struct nvme_get_nsid *)arg; 959 strlcpy(gnsid->cdev, device_get_nameunit(sc->dev), 960 sizeof(gnsid->cdev)); 961 gnsid->nsid = 0; 962 return (0); 963 case NVME_GET_MAX_XFER_SIZE: 964 *(uint64_t *)arg = sc->max_xfer_size; 965 return (0); 966 case NVMF_RECONNECT_PARAMS: 967 rp = (struct nvmf_reconnect_params *)arg; 968 if ((sc->cdata->fcatt & 1) == 0) 969 rp->cntlid = NVMF_CNTLID_DYNAMIC; 970 else 971 rp->cntlid = sc->cdata->ctrlr_id; 972 memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn)); 973 return (0); 974 case NVMF_RECONNECT_HOST: 975 hh = (struct nvmf_handoff_host *)arg; 976 return (nvmf_reconnect_host(sc, hh)); 977 default: 978 return (ENOTTY); 979 } 980 } 981 982 static struct cdevsw nvmf_cdevsw = { 983 .d_version = D_VERSION, 984 .d_ioctl = nvmf_ioctl 985 }; 986 987 static int 988 nvmf_modevent(module_t mod, int what, void *arg) 989 { 990 switch (what) { 991 case MOD_LOAD: 992 return (nvmf_ctl_load()); 993 case MOD_QUIESCE: 994 return (0); 995 case MOD_UNLOAD: 996 nvmf_ctl_unload(); 997 destroy_dev_drain(&nvmf_cdevsw); 998 return (0); 999 default: 1000 return (EOPNOTSUPP); 1001 } 1002 } 1003 1004 static device_method_t nvmf_methods[] = { 1005 /* Device interface */ 1006 DEVMETHOD(device_probe, nvmf_probe), 1007 DEVMETHOD(device_attach, nvmf_attach), 1008 DEVMETHOD(device_detach, nvmf_detach), 1009 #if 0 1010 DEVMETHOD(device_shutdown, nvmf_shutdown), 1011 #endif 1012 DEVMETHOD_END 1013 }; 1014 1015 driver_t nvme_nvmf_driver = { 1016 "nvme", 1017 nvmf_methods, 1018 sizeof(struct nvmf_softc), 1019 }; 1020 1021 DRIVER_MODULE(nvme, root, nvme_nvmf_driver, nvmf_modevent, NULL); 1022 MODULE_DEPEND(nvmf, nvmf_transport, 1, 1, 1); 1023