1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/bus.h> 10 #include <sys/conf.h> 11 #include <sys/lock.h> 12 #include <sys/kernel.h> 13 #include <sys/malloc.h> 14 #include <sys/memdesc.h> 15 #include <sys/module.h> 16 #include <sys/mutex.h> 17 #include <sys/sx.h> 18 #include <sys/taskqueue.h> 19 #include <dev/nvme/nvme.h> 20 #include <dev/nvmf/nvmf.h> 21 #include <dev/nvmf/nvmf_transport.h> 22 #include <dev/nvmf/host/nvmf_var.h> 23 24 static struct cdevsw nvmf_cdevsw; 25 26 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host"); 27 28 static void nvmf_disconnect_task(void *arg, int pending); 29 30 void 31 nvmf_complete(void *arg, const struct nvme_completion *cqe) 32 { 33 struct nvmf_completion_status *status = arg; 34 struct mtx *mtx; 35 36 status->cqe = *cqe; 37 mtx = mtx_pool_find(mtxpool_sleep, status); 38 mtx_lock(mtx); 39 status->done = true; 40 mtx_unlock(mtx); 41 wakeup(status); 42 } 43 44 void 45 nvmf_io_complete(void *arg, size_t xfered, int error) 46 { 47 struct nvmf_completion_status *status = arg; 48 struct mtx *mtx; 49 50 status->io_error = error; 51 mtx = mtx_pool_find(mtxpool_sleep, status); 52 mtx_lock(mtx); 53 status->io_done = true; 54 mtx_unlock(mtx); 55 wakeup(status); 56 } 57 58 void 59 nvmf_wait_for_reply(struct nvmf_completion_status *status) 60 { 61 struct mtx *mtx; 62 63 mtx = mtx_pool_find(mtxpool_sleep, status); 64 mtx_lock(mtx); 65 while (!status->done || !status->io_done) 66 mtx_sleep(status, mtx, 0, "nvmfcmd", 0); 67 mtx_unlock(mtx); 68 } 69 70 static int 71 nvmf_read_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 72 uint64_t *value) 73 { 74 const struct nvmf_fabric_prop_get_rsp *rsp; 75 struct nvmf_completion_status status; 76 77 nvmf_status_init(&status); 78 if (!nvmf_cmd_get_property(sc, offset, size, nvmf_complete, &status, 79 M_WAITOK)) 80 return (ECONNABORTED); 81 nvmf_wait_for_reply(&status); 82 83 if (status.cqe.status != 0) { 84 device_printf(sc->dev, "PROPERTY_GET failed, status %#x\n", 85 le16toh(status.cqe.status)); 86 return (EIO); 87 } 88 89 rsp = (const struct nvmf_fabric_prop_get_rsp *)&status.cqe; 90 if (size == 8) 91 *value = le64toh(rsp->value.u64); 92 else 93 *value = le32toh(rsp->value.u32.low); 94 return (0); 95 } 96 97 static int 98 nvmf_write_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size, 99 uint64_t value) 100 { 101 struct nvmf_completion_status status; 102 103 nvmf_status_init(&status); 104 if (!nvmf_cmd_set_property(sc, offset, size, value, nvmf_complete, &status, 105 M_WAITOK)) 106 return (ECONNABORTED); 107 nvmf_wait_for_reply(&status); 108 109 if (status.cqe.status != 0) { 110 device_printf(sc->dev, "PROPERTY_SET failed, status %#x\n", 111 le16toh(status.cqe.status)); 112 return (EIO); 113 } 114 return (0); 115 } 116 117 static void 118 nvmf_shutdown_controller(struct nvmf_softc *sc) 119 { 120 uint64_t cc; 121 int error; 122 123 error = nvmf_read_property(sc, NVMF_PROP_CC, 4, &cc); 124 if (error != 0) { 125 device_printf(sc->dev, "Failed to fetch CC for shutdown\n"); 126 return; 127 } 128 129 cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL); 130 131 error = nvmf_write_property(sc, NVMF_PROP_CC, 4, cc); 132 if (error != 0) 133 device_printf(sc->dev, 134 "Failed to set CC to trigger shutdown\n"); 135 } 136 137 static void 138 nvmf_check_keep_alive(void *arg) 139 { 140 struct nvmf_softc *sc = arg; 141 int traffic; 142 143 traffic = atomic_readandclear_int(&sc->ka_active_rx_traffic); 144 if (traffic == 0) { 145 device_printf(sc->dev, 146 "disconnecting due to KeepAlive timeout\n"); 147 nvmf_disconnect(sc); 148 return; 149 } 150 151 callout_schedule_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, C_HARDCLOCK); 152 } 153 154 static void 155 nvmf_keep_alive_complete(void *arg, const struct nvme_completion *cqe) 156 { 157 struct nvmf_softc *sc = arg; 158 159 atomic_store_int(&sc->ka_active_rx_traffic, 1); 160 if (cqe->status != 0) { 161 device_printf(sc->dev, 162 "KeepAlive response reported status %#x\n", 163 le16toh(cqe->status)); 164 } 165 } 166 167 static void 168 nvmf_send_keep_alive(void *arg) 169 { 170 struct nvmf_softc *sc = arg; 171 int traffic; 172 173 /* 174 * Don't bother sending a KeepAlive command if TKAS is active 175 * and another command has been sent during the interval. 176 */ 177 traffic = atomic_load_int(&sc->ka_active_tx_traffic); 178 if (traffic == 0 && !nvmf_cmd_keep_alive(sc, nvmf_keep_alive_complete, 179 sc, M_NOWAIT)) 180 device_printf(sc->dev, 181 "Failed to allocate KeepAlive command\n"); 182 183 /* Clear ka_active_tx_traffic after sending the keep alive command. */ 184 atomic_store_int(&sc->ka_active_tx_traffic, 0); 185 186 callout_schedule_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, C_HARDCLOCK); 187 } 188 189 int 190 nvmf_init_ivars(struct nvmf_ivars *ivars, struct nvmf_handoff_host *hh) 191 { 192 size_t len; 193 u_int i; 194 int error; 195 196 memset(ivars, 0, sizeof(*ivars)); 197 198 if (!hh->admin.admin || hh->num_io_queues < 1) 199 return (EINVAL); 200 201 ivars->cdata = malloc(sizeof(*ivars->cdata), M_NVMF, M_WAITOK); 202 error = copyin(hh->cdata, ivars->cdata, sizeof(*ivars->cdata)); 203 if (error != 0) 204 goto out; 205 nvme_controller_data_swapbytes(ivars->cdata); 206 207 len = hh->num_io_queues * sizeof(*ivars->io_params); 208 ivars->io_params = malloc(len, M_NVMF, M_WAITOK); 209 error = copyin(hh->io, ivars->io_params, len); 210 if (error != 0) 211 goto out; 212 for (i = 0; i < hh->num_io_queues; i++) { 213 if (ivars->io_params[i].admin) { 214 error = EINVAL; 215 goto out; 216 } 217 218 /* Require all I/O queues to be the same size. */ 219 if (ivars->io_params[i].qsize != ivars->io_params[0].qsize) { 220 error = EINVAL; 221 goto out; 222 } 223 } 224 225 ivars->hh = hh; 226 return (0); 227 228 out: 229 free(ivars->io_params, M_NVMF); 230 free(ivars->cdata, M_NVMF); 231 return (error); 232 } 233 234 void 235 nvmf_free_ivars(struct nvmf_ivars *ivars) 236 { 237 free(ivars->io_params, M_NVMF); 238 free(ivars->cdata, M_NVMF); 239 } 240 241 static int 242 nvmf_probe(device_t dev) 243 { 244 struct nvmf_ivars *ivars = device_get_ivars(dev); 245 char desc[260]; 246 247 if (ivars == NULL) 248 return (ENXIO); 249 250 snprintf(desc, sizeof(desc), "Fabrics: %.256s", ivars->cdata->subnqn); 251 device_set_desc_copy(dev, desc); 252 return (BUS_PROBE_DEFAULT); 253 } 254 255 static int 256 nvmf_establish_connection(struct nvmf_softc *sc, struct nvmf_ivars *ivars) 257 { 258 char name[16]; 259 260 /* Setup the admin queue. */ 261 sc->admin = nvmf_init_qp(sc, ivars->hh->trtype, &ivars->hh->admin, 262 "admin queue"); 263 if (sc->admin == NULL) { 264 device_printf(sc->dev, "Failed to setup admin queue\n"); 265 return (ENXIO); 266 } 267 268 /* Setup I/O queues. */ 269 sc->io = malloc(ivars->hh->num_io_queues * sizeof(*sc->io), M_NVMF, 270 M_WAITOK | M_ZERO); 271 sc->num_io_queues = ivars->hh->num_io_queues; 272 for (u_int i = 0; i < sc->num_io_queues; i++) { 273 snprintf(name, sizeof(name), "I/O queue %u", i); 274 sc->io[i] = nvmf_init_qp(sc, ivars->hh->trtype, 275 &ivars->io_params[i], name); 276 if (sc->io[i] == NULL) { 277 device_printf(sc->dev, "Failed to setup I/O queue %u\n", 278 i + 1); 279 return (ENXIO); 280 } 281 } 282 283 /* Start KeepAlive timers. */ 284 if (ivars->hh->kato != 0) { 285 sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS, 286 sc->cdata->ctratt) != 0; 287 sc->ka_rx_sbt = mstosbt(ivars->hh->kato); 288 sc->ka_tx_sbt = sc->ka_rx_sbt / 2; 289 callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, 290 nvmf_check_keep_alive, sc, C_HARDCLOCK); 291 callout_reset_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, 292 nvmf_send_keep_alive, sc, C_HARDCLOCK); 293 } 294 295 return (0); 296 } 297 298 static bool 299 nvmf_scan_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist, 300 struct nvme_namespace_data *data, uint32_t *nsidp) 301 { 302 struct nvmf_completion_status status; 303 uint32_t nsid; 304 305 nvmf_status_init(&status); 306 nvmf_status_wait_io(&status); 307 if (!nvmf_cmd_identify_active_namespaces(sc, *nsidp, nslist, 308 nvmf_complete, &status, nvmf_io_complete, &status, M_WAITOK)) { 309 device_printf(sc->dev, 310 "failed to send IDENTIFY active namespaces command\n"); 311 return (false); 312 } 313 nvmf_wait_for_reply(&status); 314 315 if (status.cqe.status != 0) { 316 device_printf(sc->dev, 317 "IDENTIFY active namespaces failed, status %#x\n", 318 le16toh(status.cqe.status)); 319 return (false); 320 } 321 322 if (status.io_error != 0) { 323 device_printf(sc->dev, 324 "IDENTIFY active namespaces failed with I/O error %d\n", 325 status.io_error); 326 return (false); 327 } 328 329 for (u_int i = 0; i < nitems(nslist->ns); i++) { 330 nsid = nslist->ns[i]; 331 if (nsid == 0) { 332 *nsidp = 0; 333 return (true); 334 } 335 336 if (sc->ns[nsid - 1] != NULL) { 337 device_printf(sc->dev, 338 "duplicate namespace %u in active namespace list\n", 339 nsid); 340 return (false); 341 } 342 343 nvmf_status_init(&status); 344 nvmf_status_wait_io(&status); 345 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 346 &status, nvmf_io_complete, &status, M_WAITOK)) { 347 device_printf(sc->dev, 348 "failed to send IDENTIFY namespace %u command\n", 349 nsid); 350 return (false); 351 } 352 nvmf_wait_for_reply(&status); 353 354 if (status.cqe.status != 0) { 355 device_printf(sc->dev, 356 "IDENTIFY namespace %u failed, status %#x\n", nsid, 357 le16toh(status.cqe.status)); 358 return (false); 359 } 360 361 if (status.io_error != 0) { 362 device_printf(sc->dev, 363 "IDENTIFY namespace %u failed with I/O error %d\n", 364 nsid, status.io_error); 365 return (false); 366 } 367 368 /* 369 * As in nvme_ns_construct, a size of zero indicates an 370 * invalid namespace. 371 */ 372 nvme_namespace_data_swapbytes(data); 373 if (data->nsze == 0) { 374 device_printf(sc->dev, 375 "ignoring active namespace %u with zero size\n", 376 nsid); 377 continue; 378 } 379 380 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 381 382 nvmf_sim_rescan_ns(sc, nsid); 383 } 384 385 MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0); 386 387 if (nsid >= 0xfffffffd) 388 *nsidp = 0; 389 else 390 *nsidp = nsid + 1; 391 return (true); 392 } 393 394 static bool 395 nvmf_add_namespaces(struct nvmf_softc *sc) 396 { 397 struct nvme_namespace_data *data; 398 struct nvme_ns_list *nslist; 399 uint32_t nsid; 400 bool retval; 401 402 sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF, 403 M_WAITOK | M_ZERO); 404 nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK); 405 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 406 407 nsid = 0; 408 retval = true; 409 for (;;) { 410 if (!nvmf_scan_nslist(sc, nslist, data, &nsid)) { 411 retval = false; 412 break; 413 } 414 if (nsid == 0) 415 break; 416 } 417 418 free(data, M_NVMF); 419 free(nslist, M_NVMF); 420 return (retval); 421 } 422 423 static int 424 nvmf_attach(device_t dev) 425 { 426 struct make_dev_args mda; 427 struct nvmf_softc *sc = device_get_softc(dev); 428 struct nvmf_ivars *ivars = device_get_ivars(dev); 429 uint64_t val; 430 u_int i; 431 int error; 432 433 if (ivars == NULL) 434 return (ENXIO); 435 436 sc->dev = dev; 437 sc->trtype = ivars->hh->trtype; 438 callout_init(&sc->ka_rx_timer, 1); 439 callout_init(&sc->ka_tx_timer, 1); 440 sx_init(&sc->connection_lock, "nvmf connection"); 441 TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc); 442 443 /* Claim the cdata pointer from ivars. */ 444 sc->cdata = ivars->cdata; 445 ivars->cdata = NULL; 446 447 nvmf_init_aer(sc); 448 449 /* TODO: Multiqueue support. */ 450 sc->max_pending_io = ivars->io_params[0].qsize /* * sc->num_io_queues */; 451 452 error = nvmf_establish_connection(sc, ivars); 453 if (error != 0) 454 goto out; 455 456 error = nvmf_read_property(sc, NVMF_PROP_CAP, 8, &sc->cap); 457 if (error != 0) { 458 device_printf(sc->dev, "Failed to fetch CAP\n"); 459 error = ENXIO; 460 goto out; 461 } 462 463 error = nvmf_read_property(sc, NVMF_PROP_VS, 4, &val); 464 if (error != 0) { 465 device_printf(sc->dev, "Failed to fetch VS\n"); 466 error = ENXIO; 467 goto out; 468 } 469 sc->vs = val; 470 471 /* Honor MDTS if it is set. */ 472 sc->max_xfer_size = maxphys; 473 if (sc->cdata->mdts != 0) { 474 sc->max_xfer_size = ulmin(sc->max_xfer_size, 475 1 << (sc->cdata->mdts + NVME_MPS_SHIFT + 476 NVME_CAP_HI_MPSMIN(sc->cap >> 32))); 477 } 478 479 error = nvmf_init_sim(sc); 480 if (error != 0) 481 goto out; 482 483 error = nvmf_start_aer(sc); 484 if (error != 0) { 485 nvmf_destroy_sim(sc); 486 goto out; 487 } 488 489 if (!nvmf_add_namespaces(sc)) { 490 nvmf_destroy_sim(sc); 491 goto out; 492 } 493 494 make_dev_args_init(&mda); 495 mda.mda_devsw = &nvmf_cdevsw; 496 mda.mda_uid = UID_ROOT; 497 mda.mda_gid = GID_WHEEL; 498 mda.mda_mode = 0600; 499 mda.mda_si_drv1 = sc; 500 error = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); 501 if (error != 0) { 502 nvmf_destroy_sim(sc); 503 goto out; 504 } 505 506 return (0); 507 out: 508 if (sc->ns != NULL) { 509 for (i = 0; i < sc->cdata->nn; i++) { 510 if (sc->ns[i] != NULL) 511 nvmf_destroy_ns(sc->ns[i]); 512 } 513 free(sc->ns, M_NVMF); 514 } 515 516 callout_drain(&sc->ka_tx_timer); 517 callout_drain(&sc->ka_rx_timer); 518 519 if (sc->admin != NULL) 520 nvmf_shutdown_controller(sc); 521 522 for (i = 0; i < sc->num_io_queues; i++) { 523 if (sc->io[i] != NULL) 524 nvmf_destroy_qp(sc->io[i]); 525 } 526 free(sc->io, M_NVMF); 527 if (sc->admin != NULL) 528 nvmf_destroy_qp(sc->admin); 529 530 nvmf_destroy_aer(sc); 531 532 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 533 sx_destroy(&sc->connection_lock); 534 free(sc->cdata, M_NVMF); 535 return (error); 536 } 537 538 void 539 nvmf_disconnect(struct nvmf_softc *sc) 540 { 541 taskqueue_enqueue(taskqueue_thread, &sc->disconnect_task); 542 } 543 544 static void 545 nvmf_disconnect_task(void *arg, int pending __unused) 546 { 547 struct nvmf_softc *sc = arg; 548 u_int i; 549 550 sx_xlock(&sc->connection_lock); 551 if (sc->admin == NULL) { 552 /* 553 * Ignore transport errors if there is no active 554 * association. 555 */ 556 sx_xunlock(&sc->connection_lock); 557 return; 558 } 559 560 if (sc->detaching) { 561 if (sc->admin != NULL) { 562 /* 563 * This unsticks the detach process if a 564 * transport error occurs during detach. 565 */ 566 nvmf_shutdown_qp(sc->admin); 567 } 568 sx_xunlock(&sc->connection_lock); 569 return; 570 } 571 572 if (sc->cdev == NULL) { 573 /* 574 * Transport error occurred during attach (nvmf_add_namespaces). 575 * Shutdown the admin queue. 576 */ 577 nvmf_shutdown_qp(sc->admin); 578 sx_xunlock(&sc->connection_lock); 579 return; 580 } 581 582 callout_drain(&sc->ka_tx_timer); 583 callout_drain(&sc->ka_rx_timer); 584 sc->ka_traffic = false; 585 586 /* Quiesce namespace consumers. */ 587 nvmf_disconnect_sim(sc); 588 for (i = 0; i < sc->cdata->nn; i++) { 589 if (sc->ns[i] != NULL) 590 nvmf_disconnect_ns(sc->ns[i]); 591 } 592 593 /* Shutdown the existing qpairs. */ 594 for (i = 0; i < sc->num_io_queues; i++) { 595 nvmf_destroy_qp(sc->io[i]); 596 } 597 free(sc->io, M_NVMF); 598 sc->io = NULL; 599 sc->num_io_queues = 0; 600 nvmf_destroy_qp(sc->admin); 601 sc->admin = NULL; 602 603 sx_xunlock(&sc->connection_lock); 604 } 605 606 static int 607 nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_handoff_host *hh) 608 { 609 struct nvmf_ivars ivars; 610 u_int i; 611 int error; 612 613 /* XXX: Should we permit changing the transport type? */ 614 if (sc->trtype != hh->trtype) { 615 device_printf(sc->dev, 616 "transport type mismatch on reconnect\n"); 617 return (EINVAL); 618 } 619 620 error = nvmf_init_ivars(&ivars, hh); 621 if (error != 0) 622 return (error); 623 624 sx_xlock(&sc->connection_lock); 625 if (sc->admin != NULL || sc->detaching) { 626 error = EBUSY; 627 goto out; 628 } 629 630 /* 631 * Ensure this is for the same controller. Note that the 632 * controller ID can vary across associations if the remote 633 * system is using the dynamic controller model. This merely 634 * ensures the new association is connected to the same NVMe 635 * subsystem. 636 */ 637 if (memcmp(sc->cdata->subnqn, ivars.cdata->subnqn, 638 sizeof(ivars.cdata->subnqn)) != 0) { 639 device_printf(sc->dev, 640 "controller subsystem NQN mismatch on reconnect\n"); 641 error = EINVAL; 642 goto out; 643 } 644 645 /* 646 * XXX: Require same number and size of I/O queues so that 647 * max_pending_io is still correct? 648 */ 649 650 error = nvmf_establish_connection(sc, &ivars); 651 if (error != 0) 652 goto out; 653 654 error = nvmf_start_aer(sc); 655 if (error != 0) 656 goto out; 657 658 device_printf(sc->dev, 659 "established new association with %u I/O queues\n", 660 sc->num_io_queues); 661 662 /* Restart namespace consumers. */ 663 for (i = 0; i < sc->cdata->nn; i++) { 664 if (sc->ns[i] != NULL) 665 nvmf_reconnect_ns(sc->ns[i]); 666 } 667 nvmf_reconnect_sim(sc); 668 out: 669 sx_xunlock(&sc->connection_lock); 670 nvmf_free_ivars(&ivars); 671 return (error); 672 } 673 674 static int 675 nvmf_detach(device_t dev) 676 { 677 struct nvmf_softc *sc = device_get_softc(dev); 678 u_int i; 679 680 destroy_dev(sc->cdev); 681 682 sx_xlock(&sc->connection_lock); 683 sc->detaching = true; 684 sx_xunlock(&sc->connection_lock); 685 686 nvmf_destroy_sim(sc); 687 for (i = 0; i < sc->cdata->nn; i++) { 688 if (sc->ns[i] != NULL) 689 nvmf_destroy_ns(sc->ns[i]); 690 } 691 free(sc->ns, M_NVMF); 692 693 callout_drain(&sc->ka_tx_timer); 694 callout_drain(&sc->ka_rx_timer); 695 696 if (sc->admin != NULL) 697 nvmf_shutdown_controller(sc); 698 699 for (i = 0; i < sc->num_io_queues; i++) { 700 nvmf_destroy_qp(sc->io[i]); 701 } 702 free(sc->io, M_NVMF); 703 704 taskqueue_drain(taskqueue_thread, &sc->disconnect_task); 705 706 if (sc->admin != NULL) 707 nvmf_destroy_qp(sc->admin); 708 709 nvmf_destroy_aer(sc); 710 711 sx_destroy(&sc->connection_lock); 712 free(sc->cdata, M_NVMF); 713 return (0); 714 } 715 716 void 717 nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid) 718 { 719 struct nvmf_completion_status status; 720 struct nvme_namespace_data *data; 721 struct nvmf_namespace *ns; 722 723 data = malloc(sizeof(*data), M_NVMF, M_WAITOK); 724 725 nvmf_status_init(&status); 726 nvmf_status_wait_io(&status); 727 if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete, 728 &status, nvmf_io_complete, &status, M_WAITOK)) { 729 device_printf(sc->dev, 730 "failed to send IDENTIFY namespace %u command\n", nsid); 731 free(data, M_NVMF); 732 return; 733 } 734 nvmf_wait_for_reply(&status); 735 736 if (status.cqe.status != 0) { 737 device_printf(sc->dev, 738 "IDENTIFY namespace %u failed, status %#x\n", nsid, 739 le16toh(status.cqe.status)); 740 free(data, M_NVMF); 741 return; 742 } 743 744 if (status.io_error != 0) { 745 device_printf(sc->dev, 746 "IDENTIFY namespace %u failed with I/O error %d\n", 747 nsid, status.io_error); 748 free(data, M_NVMF); 749 return; 750 } 751 752 nvme_namespace_data_swapbytes(data); 753 754 /* XXX: Needs locking around sc->ns[]. */ 755 ns = sc->ns[nsid - 1]; 756 if (data->nsze == 0) { 757 /* XXX: Needs locking */ 758 if (ns != NULL) { 759 nvmf_destroy_ns(ns); 760 sc->ns[nsid - 1] = NULL; 761 } 762 } else { 763 /* XXX: Needs locking */ 764 if (ns == NULL) { 765 sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data); 766 } else { 767 if (!nvmf_update_ns(ns, data)) { 768 nvmf_destroy_ns(ns); 769 sc->ns[nsid - 1] = NULL; 770 } 771 } 772 } 773 774 free(data, M_NVMF); 775 776 nvmf_sim_rescan_ns(sc, nsid); 777 } 778 779 int 780 nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt, 781 bool admin) 782 { 783 struct nvmf_completion_status status; 784 struct nvme_command cmd; 785 struct memdesc mem; 786 struct nvmf_host_qpair *qp; 787 struct nvmf_request *req; 788 void *buf; 789 int error; 790 791 if (pt->len > sc->max_xfer_size) 792 return (EINVAL); 793 794 buf = NULL; 795 if (pt->len != 0) { 796 /* 797 * XXX: Depending on the size we may want to pin the 798 * user pages and use a memdesc with vm_page_t's 799 * instead. 800 */ 801 buf = malloc(pt->len, M_NVMF, M_WAITOK); 802 if (pt->is_read == 0) { 803 error = copyin(pt->buf, buf, pt->len); 804 if (error != 0) { 805 free(buf, M_NVMF); 806 return (error); 807 } 808 } else { 809 /* Ensure no kernel data is leaked to userland. */ 810 memset(buf, 0, pt->len); 811 } 812 } 813 814 memset(&cmd, 0, sizeof(cmd)); 815 cmd.opc = pt->cmd.opc; 816 cmd.fuse = pt->cmd.fuse; 817 cmd.nsid = pt->cmd.nsid; 818 cmd.cdw10 = pt->cmd.cdw10; 819 cmd.cdw11 = pt->cmd.cdw11; 820 cmd.cdw12 = pt->cmd.cdw12; 821 cmd.cdw13 = pt->cmd.cdw13; 822 cmd.cdw14 = pt->cmd.cdw14; 823 cmd.cdw15 = pt->cmd.cdw15; 824 825 if (admin) 826 qp = sc->admin; 827 else 828 qp = nvmf_select_io_queue(sc); 829 nvmf_status_init(&status); 830 req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK); 831 if (req == NULL) { 832 device_printf(sc->dev, "failed to send passthrough command\n"); 833 error = ECONNABORTED; 834 goto error; 835 } 836 837 if (pt->len != 0) { 838 mem = memdesc_vaddr(buf, pt->len); 839 nvmf_capsule_append_data(req->nc, &mem, pt->len, 840 pt->is_read == 0, nvmf_io_complete, &status); 841 nvmf_status_wait_io(&status); 842 } 843 844 nvmf_submit_request(req); 845 nvmf_wait_for_reply(&status); 846 847 memset(&pt->cpl, 0, sizeof(pt->cpl)); 848 pt->cpl.cdw0 = status.cqe.cdw0; 849 pt->cpl.status = status.cqe.status; 850 851 error = status.io_error; 852 if (error == 0 && pt->len != 0 && pt->is_read != 0) 853 error = copyout(buf, pt->buf, pt->len); 854 error: 855 free(buf, M_NVMF); 856 return (error); 857 } 858 859 static int 860 nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag, 861 struct thread *td) 862 { 863 struct nvmf_softc *sc = cdev->si_drv1; 864 struct nvme_get_nsid *gnsid; 865 struct nvme_pt_command *pt; 866 struct nvmf_reconnect_params *rp; 867 struct nvmf_handoff_host *hh; 868 869 switch (cmd) { 870 case NVME_PASSTHROUGH_CMD: 871 pt = (struct nvme_pt_command *)arg; 872 return (nvmf_passthrough_cmd(sc, pt, true)); 873 case NVME_GET_NSID: 874 gnsid = (struct nvme_get_nsid *)arg; 875 strlcpy(gnsid->cdev, device_get_nameunit(sc->dev), 876 sizeof(gnsid->cdev)); 877 gnsid->nsid = 0; 878 return (0); 879 case NVME_GET_MAX_XFER_SIZE: 880 *(uint64_t *)arg = sc->max_xfer_size; 881 return (0); 882 case NVMF_RECONNECT_PARAMS: 883 rp = (struct nvmf_reconnect_params *)arg; 884 if ((sc->cdata->fcatt & 1) == 0) 885 rp->cntlid = NVMF_CNTLID_DYNAMIC; 886 else 887 rp->cntlid = sc->cdata->ctrlr_id; 888 memcpy(rp->subnqn, sc->cdata->subnqn, sizeof(rp->subnqn)); 889 return (0); 890 case NVMF_RECONNECT_HOST: 891 hh = (struct nvmf_handoff_host *)arg; 892 return (nvmf_reconnect_host(sc, hh)); 893 default: 894 return (ENOTTY); 895 } 896 } 897 898 static struct cdevsw nvmf_cdevsw = { 899 .d_version = D_VERSION, 900 .d_ioctl = nvmf_ioctl 901 }; 902 903 static int 904 nvmf_modevent(module_t mod, int what, void *arg) 905 { 906 switch (what) { 907 case MOD_LOAD: 908 return (nvmf_ctl_load()); 909 case MOD_QUIESCE: 910 return (0); 911 case MOD_UNLOAD: 912 nvmf_ctl_unload(); 913 destroy_dev_drain(&nvmf_cdevsw); 914 return (0); 915 default: 916 return (EOPNOTSUPP); 917 } 918 } 919 920 static device_method_t nvmf_methods[] = { 921 /* Device interface */ 922 DEVMETHOD(device_probe, nvmf_probe), 923 DEVMETHOD(device_attach, nvmf_attach), 924 DEVMETHOD(device_detach, nvmf_detach), 925 #if 0 926 DEVMETHOD(device_shutdown, nvmf_shutdown), 927 #endif 928 DEVMETHOD_END 929 }; 930 931 driver_t nvme_nvmf_driver = { 932 "nvme", 933 nvmf_methods, 934 sizeof(struct nvmf_softc), 935 }; 936 937 DRIVER_MODULE(nvme, root, nvme_nvmf_driver, nvmf_modevent, NULL); 938 MODULE_DEPEND(nvmf, nvmf_transport, 1, 1, 1); 939