1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/dnv.h> 10 #include <sys/jail.h> 11 #include <sys/kernel.h> 12 #include <sys/limits.h> 13 #include <sys/lock.h> 14 #include <sys/malloc.h> 15 #include <sys/mbuf.h> 16 #include <sys/memdesc.h> 17 #include <sys/module.h> 18 #include <sys/proc.h> 19 #include <sys/queue.h> 20 #include <sys/refcount.h> 21 #include <sys/sbuf.h> 22 #include <sys/smp.h> 23 #include <sys/sx.h> 24 #include <sys/taskqueue.h> 25 26 #include <machine/bus.h> 27 #include <machine/bus_dma.h> 28 29 #include <dev/nvmf/nvmf.h> 30 #include <dev/nvmf/nvmf_transport.h> 31 #include <dev/nvmf/controller/nvmft_subr.h> 32 #include <dev/nvmf/controller/nvmft_var.h> 33 34 #include <cam/ctl/ctl.h> 35 #include <cam/ctl/ctl_error.h> 36 #include <cam/ctl/ctl_ha.h> 37 #include <cam/ctl/ctl_io.h> 38 #include <cam/ctl/ctl_frontend.h> 39 #include <cam/ctl/ctl_private.h> 40 41 /* 42 * Store pointers to the capsule and qpair in the two pointer members 43 * of CTL_PRIV_FRONTEND. 44 */ 45 #define NVMFT_NC(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0]) 46 #define NVMFT_QP(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1]) 47 48 static void nvmft_done(union ctl_io *io); 49 static int nvmft_init(void); 50 static int nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, 51 int flag, struct thread *td); 52 static int nvmft_shutdown(void); 53 54 extern struct ctl_softc *control_softc; 55 56 static struct taskqueue *nvmft_taskq; 57 static TAILQ_HEAD(, nvmft_port) nvmft_ports; 58 static struct sx nvmft_ports_lock; 59 60 MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller"); 61 62 static struct ctl_frontend nvmft_frontend = { 63 .name = "nvmf", 64 .init = nvmft_init, 65 .ioctl = nvmft_ioctl, 66 .fe_dump = NULL, 67 .shutdown = nvmft_shutdown, 68 }; 69 70 static void 71 nvmft_online(void *arg) 72 { 73 struct nvmft_port *np = arg; 74 75 sx_xlock(&np->lock); 76 np->online = true; 77 sx_xunlock(&np->lock); 78 } 79 80 static void 81 nvmft_offline(void *arg) 82 { 83 struct nvmft_port *np = arg; 84 struct nvmft_controller *ctrlr; 85 86 sx_xlock(&np->lock); 87 np->online = false; 88 89 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 90 nvmft_printf(ctrlr, 91 "shutting down due to port going offline\n"); 92 nvmft_controller_error(ctrlr, NULL, ENODEV); 93 } 94 95 while (!TAILQ_EMPTY(&np->controllers)) 96 sx_sleep(np, &np->lock, 0, "nvmfoff", 0); 97 sx_xunlock(&np->lock); 98 } 99 100 static int 101 nvmft_lun_enable(void *arg, int lun_id) 102 { 103 struct nvmft_port *np = arg; 104 struct nvmft_controller *ctrlr; 105 uint32_t *old_ns, *new_ns; 106 uint32_t nsid; 107 u_int i; 108 109 if (lun_id >= le32toh(np->cdata.nn)) { 110 printf("NVMFT: %s lun %d larger than maximum nsid %u\n", 111 np->cdata.subnqn, lun_id, le32toh(np->cdata.nn)); 112 return (EOPNOTSUPP); 113 } 114 nsid = lun_id + 1; 115 116 sx_xlock(&np->lock); 117 new_ns = mallocarray(np->num_ns + 1, sizeof(*new_ns), M_NVMFT, 118 M_WAITOK); 119 for (i = 0; i < np->num_ns; i++) { 120 if (np->active_ns[i] < nsid) 121 continue; 122 if (np->active_ns[i] == nsid) { 123 sx_xunlock(&np->lock); 124 free(new_ns, M_NVMFT); 125 printf("NVMFT: %s duplicate lun %d\n", 126 np->cdata.subnqn, lun_id); 127 return (EINVAL); 128 } 129 break; 130 } 131 132 /* Copy over IDs smaller than nsid. */ 133 memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns)); 134 135 /* Insert nsid. */ 136 new_ns[i] = nsid; 137 138 /* Copy over IDs greater than nsid. */ 139 memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) * 140 sizeof(*np->active_ns)); 141 142 np->num_ns++; 143 old_ns = np->active_ns; 144 np->active_ns = new_ns; 145 146 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 147 nvmft_controller_lun_changed(ctrlr, lun_id); 148 } 149 150 sx_xunlock(&np->lock); 151 free(old_ns, M_NVMFT); 152 153 return (0); 154 } 155 156 static int 157 nvmft_lun_disable(void *arg, int lun_id) 158 { 159 struct nvmft_port *np = arg; 160 struct nvmft_controller *ctrlr; 161 uint32_t nsid; 162 u_int i; 163 164 if (lun_id >= le32toh(np->cdata.nn)) 165 return (0); 166 nsid = lun_id + 1; 167 168 sx_xlock(&np->lock); 169 for (i = 0; i < np->num_ns; i++) { 170 if (np->active_ns[i] == nsid) 171 goto found; 172 } 173 sx_xunlock(&np->lock); 174 printf("NVMFT: %s request to disable nonexistent lun %d\n", 175 np->cdata.subnqn, lun_id); 176 return (EINVAL); 177 178 found: 179 /* Move down IDs greater than nsid. */ 180 memmove(np->active_ns + i, np->active_ns + i + 1, 181 (np->num_ns - (i + 1)) * sizeof(*np->active_ns)); 182 np->num_ns--; 183 184 /* NB: Don't bother freeing the old active_ns array. */ 185 186 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 187 nvmft_controller_lun_changed(ctrlr, lun_id); 188 } 189 190 sx_xunlock(&np->lock); 191 192 return (0); 193 } 194 195 void 196 nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid, 197 struct nvme_ns_list *nslist) 198 { 199 u_int i, count; 200 201 sx_slock(&np->lock); 202 count = 0; 203 for (i = 0; i < np->num_ns; i++) { 204 if (np->active_ns[i] <= nsid) 205 continue; 206 nslist->ns[count] = htole32(np->active_ns[i]); 207 count++; 208 if (count == nitems(nslist->ns)) 209 break; 210 } 211 sx_sunlock(&np->lock); 212 } 213 214 void 215 nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 216 bool admin) 217 { 218 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 219 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 220 struct nvmft_port *np = ctrlr->np; 221 union ctl_io *io; 222 int error; 223 224 if (cmd->nsid == htole32(0)) { 225 nvmft_send_generic_error(qp, nc, 226 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 227 nvmf_free_capsule(nc); 228 return; 229 } 230 231 mtx_lock(&ctrlr->lock); 232 if (ctrlr->pending_commands == 0) 233 ctrlr->start_busy = sbinuptime(); 234 ctrlr->pending_commands++; 235 mtx_unlock(&ctrlr->lock); 236 io = ctl_alloc_io(np->port.ctl_pool_ref); 237 ctl_zero_io(io); 238 NVMFT_NC(io) = nc; 239 NVMFT_QP(io) = qp; 240 io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME; 241 io->io_hdr.nexus.initid = ctrlr->cntlid; 242 io->io_hdr.nexus.targ_port = np->port.targ_port; 243 io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1; 244 io->nvmeio.cmd = *cmd; 245 error = ctl_run(io); 246 if (error != 0) { 247 nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n", 248 nvmft_qpair_name(qp), error); 249 ctl_nvme_set_generic_error(&io->nvmeio, 250 NVME_SC_INTERNAL_DEVICE_ERROR); 251 nvmft_done(io); 252 253 nvmft_controller_error(ctrlr, qp, ENXIO); 254 } 255 } 256 257 void 258 nvmft_terminate_commands(struct nvmft_controller *ctrlr) 259 { 260 struct nvmft_port *np = ctrlr->np; 261 union ctl_io *io; 262 int error; 263 264 mtx_lock(&ctrlr->lock); 265 if (ctrlr->pending_commands == 0) 266 ctrlr->start_busy = sbinuptime(); 267 ctrlr->pending_commands++; 268 mtx_unlock(&ctrlr->lock); 269 io = ctl_alloc_io(np->port.ctl_pool_ref); 270 ctl_zero_io(io); 271 NVMFT_QP(io) = ctrlr->admin; 272 io->io_hdr.io_type = CTL_IO_TASK; 273 io->io_hdr.nexus.initid = ctrlr->cntlid; 274 io->io_hdr.nexus.targ_port = np->port.targ_port; 275 io->io_hdr.nexus.targ_lun = 0; 276 io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX: unused? */ 277 io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; 278 error = ctl_run(io); 279 if (error != CTL_RETVAL_COMPLETE) { 280 nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error); 281 #ifdef INVARIANTS 282 io->io_hdr.status = CTL_SUCCESS; 283 #endif 284 nvmft_done(io); 285 } 286 } 287 288 static void 289 nvmft_datamove_out_cb(void *arg, size_t xfered, int error) 290 { 291 struct ctl_nvmeio *ctnio = arg; 292 293 if (error != 0) { 294 ctl_nvme_set_data_transfer_error(ctnio); 295 } else { 296 MPASS(xfered == ctnio->kern_data_len); 297 ctnio->kern_data_resid -= xfered; 298 } 299 300 if (ctnio->kern_sg_entries) { 301 free(ctnio->ext_data_ptr, M_NVMFT); 302 ctnio->ext_data_ptr = NULL; 303 } else 304 MPASS(ctnio->ext_data_ptr == NULL); 305 ctl_datamove_done((union ctl_io *)ctnio, false); 306 } 307 308 static void 309 nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp, 310 struct nvmf_capsule *nc) 311 { 312 struct memdesc mem; 313 int error; 314 315 MPASS(ctnio->ext_data_ptr == NULL); 316 if (ctnio->kern_sg_entries > 0) { 317 struct ctl_sg_entry *sgl; 318 struct bus_dma_segment *vlist; 319 320 vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist), 321 M_NVMFT, M_WAITOK); 322 ctnio->ext_data_ptr = (void *)vlist; 323 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 324 for (u_int i = 0; i < ctnio->kern_sg_entries; i++) { 325 vlist[i].ds_addr = (uintptr_t)sgl[i].addr; 326 vlist[i].ds_len = sgl[i].len; 327 } 328 mem = memdesc_vlist(vlist, ctnio->kern_sg_entries); 329 } else 330 mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len); 331 332 error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem, 333 ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio); 334 if (error == 0) 335 return; 336 337 nvmft_printf(nvmft_qpair_ctrlr(qp), 338 "Failed to request capsule data: %d\n", error); 339 ctl_nvme_set_data_transfer_error(ctnio); 340 341 if (ctnio->kern_sg_entries) { 342 free(ctnio->ext_data_ptr, M_NVMFT); 343 ctnio->ext_data_ptr = NULL; 344 } else 345 MPASS(ctnio->ext_data_ptr == NULL); 346 ctl_datamove_done((union ctl_io *)ctnio, true); 347 } 348 349 static struct mbuf * 350 nvmft_copy_data(struct ctl_nvmeio *ctnio) 351 { 352 struct ctl_sg_entry *sgl; 353 struct mbuf *m0, *m; 354 uint32_t resid, off, todo; 355 int mlen; 356 357 MPASS(ctnio->kern_data_len != 0); 358 359 m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0); 360 361 if (ctnio->kern_sg_entries == 0) { 362 m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr); 363 return (m0); 364 } 365 366 resid = ctnio->kern_data_len; 367 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 368 off = 0; 369 m = m0; 370 mlen = M_TRAILINGSPACE(m); 371 for (;;) { 372 todo = MIN(mlen, sgl->len - off); 373 memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off, 374 todo); 375 m->m_len += todo; 376 resid -= todo; 377 if (resid == 0) { 378 MPASS(m->m_next == NULL); 379 break; 380 } 381 382 off += todo; 383 if (off == sgl->len) { 384 sgl++; 385 off = 0; 386 } 387 mlen -= todo; 388 if (mlen == 0) { 389 m = m->m_next; 390 mlen = M_TRAILINGSPACE(m); 391 } 392 } 393 394 return (m0); 395 } 396 397 static void 398 m_free_ref_data(struct mbuf *m) 399 { 400 ctl_ref kern_data_ref = m->m_ext.ext_arg1; 401 402 kern_data_ref(m->m_ext.ext_arg2, -1); 403 } 404 405 static struct mbuf * 406 m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size) 407 { 408 struct mbuf *m; 409 410 m = m_get(M_WAITOK, MT_DATA); 411 m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref, 412 ctnio->kern_data_arg, M_RDONLY, EXT_CTL); 413 m->m_len = size; 414 ctnio->kern_data_ref(ctnio->kern_data_arg, 1); 415 return (m); 416 } 417 418 static struct mbuf * 419 nvmft_ref_data(struct ctl_nvmeio *ctnio) 420 { 421 struct ctl_sg_entry *sgl; 422 struct mbuf *m0, *m; 423 424 MPASS(ctnio->kern_data_len != 0); 425 426 if (ctnio->kern_sg_entries == 0) 427 return (m_get_ref_data(ctnio, ctnio->kern_data_ptr, 428 ctnio->kern_data_len)); 429 430 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 431 m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len); 432 m = m0; 433 for (u_int i = 1; i < ctnio->kern_sg_entries; i++) { 434 m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len); 435 m = m->m_next; 436 } 437 return (m0); 438 } 439 440 static void 441 nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp, 442 struct nvmf_capsule *nc) 443 { 444 struct mbuf *m; 445 u_int status; 446 447 if (ctnio->kern_data_ref != NULL) 448 m = nvmft_ref_data(ctnio); 449 else 450 m = nvmft_copy_data(ctnio); 451 status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m, 452 ctnio->kern_data_len); 453 switch (status) { 454 case NVMF_SUCCESS_SENT: 455 ctnio->success_sent = true; 456 nvmft_command_completed(qp, nc); 457 /* FALLTHROUGH */ 458 case NVMF_MORE: 459 case NVME_SC_SUCCESS: 460 break; 461 default: 462 ctl_nvme_set_generic_error(ctnio, status); 463 break; 464 } 465 ctl_datamove_done((union ctl_io *)ctnio, true); 466 } 467 468 void 469 nvmft_handle_datamove(union ctl_io *io) 470 { 471 struct nvmf_capsule *nc; 472 struct nvmft_qpair *qp; 473 474 /* Some CTL commands preemptively set a success status. */ 475 MPASS(io->io_hdr.status == CTL_STATUS_NONE || 476 io->io_hdr.status == CTL_SUCCESS); 477 MPASS(!io->nvmeio.success_sent); 478 479 nc = NVMFT_NC(io); 480 qp = NVMFT_QP(io); 481 482 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) 483 nvmft_datamove_in(&io->nvmeio, qp, nc); 484 else 485 nvmft_datamove_out(&io->nvmeio, qp, nc); 486 } 487 488 void 489 nvmft_abort_datamove(union ctl_io *io) 490 { 491 io->io_hdr.port_status = 1; 492 io->io_hdr.flags |= CTL_FLAG_ABORT; 493 ctl_datamove_done(io, true); 494 } 495 496 static void 497 nvmft_datamove(union ctl_io *io) 498 { 499 struct nvmft_qpair *qp; 500 501 qp = NVMFT_QP(io); 502 nvmft_qpair_datamove(qp, io); 503 } 504 505 void 506 nvmft_enqueue_task(struct task *task) 507 { 508 taskqueue_enqueue(nvmft_taskq, task); 509 } 510 511 void 512 nvmft_drain_task(struct task *task) 513 { 514 taskqueue_drain(nvmft_taskq, task); 515 } 516 517 static void 518 hip_add(uint64_t pair[2], uint64_t addend) 519 { 520 uint64_t old, new; 521 522 old = le64toh(pair[0]); 523 new = old + addend; 524 pair[0] = htole64(new); 525 if (new < old) 526 pair[1] += htole64(1); 527 } 528 529 static void 530 nvmft_done(union ctl_io *io) 531 { 532 struct nvmft_controller *ctrlr; 533 const struct nvme_command *cmd; 534 struct nvmft_qpair *qp; 535 struct nvmf_capsule *nc; 536 size_t len; 537 538 KASSERT(io->io_hdr.status == CTL_SUCCESS || 539 io->io_hdr.status == CTL_NVME_ERROR, 540 ("%s: bad status %u", __func__, io->io_hdr.status)); 541 542 nc = NVMFT_NC(io); 543 qp = NVMFT_QP(io); 544 ctrlr = nvmft_qpair_ctrlr(qp); 545 546 if (nc == NULL) { 547 /* Completion of nvmft_terminate_commands. */ 548 goto end; 549 } 550 551 cmd = nvmf_capsule_sqe(nc); 552 553 if (io->io_hdr.status == CTL_SUCCESS) 554 len = nvmf_capsule_data_len(nc) / 512; 555 else 556 len = 0; 557 switch (cmd->opc) { 558 case NVME_OPC_WRITE: 559 mtx_lock(&ctrlr->lock); 560 hip_add(ctrlr->hip.host_write_commands, 1); 561 len += ctrlr->partial_duw; 562 if (len > 1000) 563 hip_add(ctrlr->hip.data_units_written, len / 1000); 564 ctrlr->partial_duw = len % 1000; 565 mtx_unlock(&ctrlr->lock); 566 break; 567 case NVME_OPC_READ: 568 case NVME_OPC_COMPARE: 569 case NVME_OPC_VERIFY: 570 mtx_lock(&ctrlr->lock); 571 if (cmd->opc != NVME_OPC_VERIFY) 572 hip_add(ctrlr->hip.host_read_commands, 1); 573 len += ctrlr->partial_dur; 574 if (len > 1000) 575 hip_add(ctrlr->hip.data_units_read, len / 1000); 576 ctrlr->partial_dur = len % 1000; 577 mtx_unlock(&ctrlr->lock); 578 break; 579 } 580 581 if (io->nvmeio.success_sent) { 582 MPASS(io->io_hdr.status == CTL_SUCCESS); 583 } else { 584 io->nvmeio.cpl.cid = cmd->cid; 585 nvmft_send_response(qp, &io->nvmeio.cpl); 586 } 587 nvmf_free_capsule(nc); 588 end: 589 ctl_free_io(io); 590 mtx_lock(&ctrlr->lock); 591 ctrlr->pending_commands--; 592 if (ctrlr->pending_commands == 0) 593 ctrlr->busy_total += sbinuptime() - ctrlr->start_busy; 594 mtx_unlock(&ctrlr->lock); 595 } 596 597 static int 598 nvmft_init(void) 599 { 600 int error; 601 602 nvmft_taskq = taskqueue_create("nvmft", M_WAITOK, 603 taskqueue_thread_enqueue, &nvmft_taskq); 604 error = taskqueue_start_threads_in_proc(&nvmft_taskq, mp_ncpus, PWAIT, 605 control_softc->ctl_proc, "nvmft"); 606 if (error != 0) { 607 taskqueue_free(nvmft_taskq); 608 return (error); 609 } 610 611 TAILQ_INIT(&nvmft_ports); 612 sx_init(&nvmft_ports_lock, "nvmft ports"); 613 return (0); 614 } 615 616 void 617 nvmft_port_free(struct nvmft_port *np) 618 { 619 KASSERT(TAILQ_EMPTY(&np->controllers), 620 ("%s(%p): active controllers", __func__, np)); 621 622 if (np->port.targ_port != -1) { 623 if (ctl_port_deregister(&np->port) != 0) 624 printf("%s: ctl_port_deregister() failed\n", __func__); 625 } 626 627 free(np->active_ns, M_NVMFT); 628 clean_unrhdr(np->ids); 629 delete_unrhdr(np->ids); 630 sx_destroy(&np->lock); 631 free(np, M_NVMFT); 632 } 633 634 static struct nvmft_port * 635 nvmft_port_find(const char *subnqn) 636 { 637 struct nvmft_port *np; 638 639 KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__)); 640 641 sx_assert(&nvmft_ports_lock, SA_LOCKED); 642 TAILQ_FOREACH(np, &nvmft_ports, link) { 643 if (strcmp(np->cdata.subnqn, subnqn) == 0) 644 break; 645 } 646 return (np); 647 } 648 649 static struct nvmft_port * 650 nvmft_port_find_by_id(int port_id) 651 { 652 struct nvmft_port *np; 653 654 sx_assert(&nvmft_ports_lock, SA_LOCKED); 655 TAILQ_FOREACH(np, &nvmft_ports, link) { 656 if (np->port.targ_port == port_id) 657 break; 658 } 659 return (np); 660 } 661 662 /* 663 * Helper function to fetch a number stored as a string in an nv_list. 664 * Returns false if the string was not a valid number. 665 */ 666 static bool 667 dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value, 668 u_long *value) 669 { 670 const char *str; 671 char *cp; 672 673 str = dnvlist_get_string(nvl, name, NULL); 674 if (str == NULL) { 675 *value = default_value; 676 return (true); 677 } 678 if (*str == '\0') 679 return (false); 680 *value = strtoul(str, &cp, 0); 681 if (*cp != '\0') 682 return (false); 683 return (true); 684 } 685 686 /* 687 * NVMeoF ports support the following parameters: 688 * 689 * Mandatory: 690 * 691 * subnqn: subsystem NVMe Qualified Name 692 * portid: integer port ID from Discovery Log Page entry 693 * 694 * Optional: 695 * serial: Serial Number string 696 * max_io_qsize: Maximum number of I/O queue entries 697 * enable_timeout: Timeout for controller enable in milliseconds 698 * ioccsz: Maximum command capsule size 699 * iorcsz: Maximum response capsule size 700 * nn: Number of namespaces 701 */ 702 static void 703 nvmft_port_create(struct ctl_req *req) 704 { 705 struct nvmft_port *np; 706 struct ctl_port *port; 707 const char *serial, *subnqn; 708 char serial_buf[NVME_SERIAL_NUMBER_LENGTH]; 709 u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid; 710 int error; 711 712 /* Required parameters. */ 713 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL); 714 if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) { 715 req->status = CTL_LUN_ERROR; 716 snprintf(req->error_str, sizeof(req->error_str), 717 "Missing required argument"); 718 return; 719 } 720 if (!nvmf_nqn_valid(subnqn)) { 721 req->status = CTL_LUN_ERROR; 722 snprintf(req->error_str, sizeof(req->error_str), 723 "Invalid SubNQN"); 724 return; 725 } 726 if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) || 727 portid > UINT16_MAX) { 728 req->status = CTL_LUN_ERROR; 729 snprintf(req->error_str, sizeof(req->error_str), 730 "Invalid port ID"); 731 return; 732 } 733 734 /* Optional parameters. */ 735 if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize", 736 NVMF_MAX_IO_ENTRIES, &max_io_qsize) || 737 max_io_qsize < NVME_MIN_IO_ENTRIES || 738 max_io_qsize > NVME_MAX_IO_ENTRIES) { 739 req->status = CTL_LUN_ERROR; 740 snprintf(req->error_str, sizeof(req->error_str), 741 "Invalid maximum I/O queue size"); 742 return; 743 } 744 745 if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout", 746 NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) || 747 (enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) { 748 req->status = CTL_LUN_ERROR; 749 snprintf(req->error_str, sizeof(req->error_str), 750 "Invalid enable timeout"); 751 return; 752 } 753 754 if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ, 755 &ioccsz) || ioccsz < sizeof(struct nvme_command) || 756 (ioccsz % 16) != 0) { 757 req->status = CTL_LUN_ERROR; 758 snprintf(req->error_str, sizeof(req->error_str), 759 "Invalid Command Capsule size"); 760 return; 761 } 762 763 if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ, 764 &iorcsz) || iorcsz < sizeof(struct nvme_completion) || 765 (iorcsz % 16) != 0) { 766 req->status = CTL_LUN_ERROR; 767 snprintf(req->error_str, sizeof(req->error_str), 768 "Invalid Response Capsule size"); 769 return; 770 } 771 772 if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) || 773 nn < 1 || nn > UINT32_MAX) { 774 req->status = CTL_LUN_ERROR; 775 snprintf(req->error_str, sizeof(req->error_str), 776 "Invalid number of namespaces"); 777 return; 778 } 779 780 serial = dnvlist_get_string(req->args_nvl, "serial", NULL); 781 if (serial == NULL) { 782 getcredhostid(curthread->td_ucred, &hostid); 783 nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid); 784 serial = serial_buf; 785 } 786 787 sx_xlock(&nvmft_ports_lock); 788 789 np = nvmft_port_find(subnqn); 790 if (np != NULL) { 791 req->status = CTL_LUN_ERROR; 792 snprintf(req->error_str, sizeof(req->error_str), 793 "SubNQN \"%s\" already exists", subnqn); 794 sx_xunlock(&nvmft_ports_lock); 795 return; 796 } 797 798 np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO); 799 refcount_init(&np->refs, 1); 800 np->max_io_qsize = max_io_qsize; 801 np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500); 802 sx_init(&np->lock, "nvmft port"); 803 np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1, 804 NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX); 805 TAILQ_INIT(&np->controllers); 806 807 /* The controller ID is set later for individual controllers. */ 808 _nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype, 809 osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata); 810 np->cdata.aerl = NVMFT_NUM_AER - 1; 811 np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE); 812 np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) | 813 NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) | 814 NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) | 815 NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1)); 816 np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1); 817 818 np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1); 819 memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr)); 820 821 port = &np->port; 822 823 port->frontend = &nvmft_frontend; 824 port->port_type = CTL_PORT_NVMF; 825 port->num_requested_ctl_io = max_io_qsize; 826 port->port_name = "nvmf"; 827 port->physical_port = portid; 828 port->virtual_port = 0; 829 port->port_online = nvmft_online; 830 port->port_offline = nvmft_offline; 831 port->onoff_arg = np; 832 port->lun_enable = nvmft_lun_enable; 833 port->lun_disable = nvmft_lun_disable; 834 port->targ_lun_arg = np; 835 port->fe_datamove = nvmft_datamove; 836 port->fe_done = nvmft_done; 837 port->targ_port = -1; 838 port->options = nvlist_clone(req->args_nvl); 839 840 error = ctl_port_register(port); 841 if (error != 0) { 842 sx_xunlock(&nvmft_ports_lock); 843 nvlist_destroy(port->options); 844 nvmft_port_rele(np); 845 req->status = CTL_LUN_ERROR; 846 snprintf(req->error_str, sizeof(req->error_str), 847 "Failed to register CTL port with error %d", error); 848 return; 849 } 850 851 TAILQ_INSERT_TAIL(&nvmft_ports, np, link); 852 sx_xunlock(&nvmft_ports_lock); 853 854 req->status = CTL_LUN_OK; 855 req->result_nvl = nvlist_create(0); 856 nvlist_add_number(req->result_nvl, "port_id", port->targ_port); 857 } 858 859 static void 860 nvmft_port_remove(struct ctl_req *req) 861 { 862 struct nvmft_port *np; 863 const char *subnqn; 864 u_long port_id; 865 866 /* 867 * ctladm port -r just provides the port_id, so permit looking 868 * up a port either by "subnqn" or "port_id". 869 */ 870 port_id = ULONG_MAX; 871 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL); 872 if (subnqn == NULL) { 873 if (!nvlist_exists_string(req->args_nvl, "port_id")) { 874 req->status = CTL_LUN_ERROR; 875 snprintf(req->error_str, sizeof(req->error_str), 876 "Missing required argument"); 877 return; 878 } 879 if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX, 880 &port_id)) { 881 req->status = CTL_LUN_ERROR; 882 snprintf(req->error_str, sizeof(req->error_str), 883 "Invalid CTL port ID"); 884 return; 885 } 886 } else { 887 if (nvlist_exists_string(req->args_nvl, "port_id")) { 888 req->status = CTL_LUN_ERROR; 889 snprintf(req->error_str, sizeof(req->error_str), 890 "Ambiguous port removal request"); 891 return; 892 } 893 } 894 895 sx_xlock(&nvmft_ports_lock); 896 897 if (subnqn != NULL) { 898 np = nvmft_port_find(subnqn); 899 if (np == NULL) { 900 req->status = CTL_LUN_ERROR; 901 snprintf(req->error_str, sizeof(req->error_str), 902 "SubNQN \"%s\" does not exist", subnqn); 903 sx_xunlock(&nvmft_ports_lock); 904 return; 905 } 906 } else { 907 np = nvmft_port_find_by_id(port_id); 908 if (np == NULL) { 909 req->status = CTL_LUN_ERROR; 910 snprintf(req->error_str, sizeof(req->error_str), 911 "CTL port %lu is not a NVMF port", port_id); 912 sx_xunlock(&nvmft_ports_lock); 913 return; 914 } 915 } 916 917 TAILQ_REMOVE(&nvmft_ports, np, link); 918 sx_xunlock(&nvmft_ports_lock); 919 920 ctl_port_offline(&np->port); 921 nvmft_port_rele(np); 922 req->status = CTL_LUN_OK; 923 } 924 925 static void 926 nvmft_handoff(struct ctl_nvmf *cn) 927 { 928 struct nvmf_fabric_connect_cmd cmd; 929 struct nvmf_handoff_controller_qpair *handoff; 930 struct nvmf_fabric_connect_data *data; 931 struct nvmft_port *np; 932 int error; 933 934 np = NULL; 935 data = NULL; 936 handoff = &cn->data.handoff; 937 error = copyin(handoff->cmd, &cmd, sizeof(cmd)); 938 if (error != 0) { 939 cn->status = CTL_NVMF_ERROR; 940 snprintf(cn->error_str, sizeof(cn->error_str), 941 "Failed to copyin CONNECT SQE"); 942 return; 943 } 944 945 data = malloc(sizeof(*data), M_NVMFT, M_WAITOK); 946 error = copyin(handoff->data, data, sizeof(*data)); 947 if (error != 0) { 948 cn->status = CTL_NVMF_ERROR; 949 snprintf(cn->error_str, sizeof(cn->error_str), 950 "Failed to copyin CONNECT data"); 951 goto out; 952 } 953 954 if (!nvmf_nqn_valid(data->subnqn)) { 955 cn->status = CTL_NVMF_ERROR; 956 snprintf(cn->error_str, sizeof(cn->error_str), 957 "Invalid SubNQN"); 958 goto out; 959 } 960 961 sx_slock(&nvmft_ports_lock); 962 np = nvmft_port_find(data->subnqn); 963 if (np == NULL) { 964 sx_sunlock(&nvmft_ports_lock); 965 cn->status = CTL_NVMF_ERROR; 966 snprintf(cn->error_str, sizeof(cn->error_str), 967 "Unknown SubNQN"); 968 goto out; 969 } 970 if (!np->online) { 971 sx_sunlock(&nvmft_ports_lock); 972 cn->status = CTL_NVMF_ERROR; 973 snprintf(cn->error_str, sizeof(cn->error_str), 974 "CTL port offline"); 975 np = NULL; 976 goto out; 977 } 978 nvmft_port_ref(np); 979 sx_sunlock(&nvmft_ports_lock); 980 981 if (handoff->params.admin) { 982 error = nvmft_handoff_admin_queue(np, handoff, &cmd, data); 983 if (error != 0) { 984 cn->status = CTL_NVMF_ERROR; 985 snprintf(cn->error_str, sizeof(cn->error_str), 986 "Failed to handoff admin queue: %d", error); 987 goto out; 988 } 989 } else { 990 error = nvmft_handoff_io_queue(np, handoff, &cmd, data); 991 if (error != 0) { 992 cn->status = CTL_NVMF_ERROR; 993 snprintf(cn->error_str, sizeof(cn->error_str), 994 "Failed to handoff admin queue: %d", error); 995 goto out; 996 } 997 } 998 999 cn->status = CTL_NVMF_OK; 1000 out: 1001 if (np != NULL) 1002 nvmft_port_rele(np); 1003 free(data, M_NVMFT); 1004 } 1005 1006 static void 1007 nvmft_list(struct ctl_nvmf *cn) 1008 { 1009 struct ctl_nvmf_list_params *lp; 1010 struct nvmft_controller *ctrlr; 1011 struct nvmft_port *np; 1012 struct sbuf *sb; 1013 int error; 1014 1015 lp = &cn->data.list; 1016 1017 sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN | 1018 SBUF_INCLUDENUL); 1019 if (sb == NULL) { 1020 cn->status = CTL_NVMF_ERROR; 1021 snprintf(cn->error_str, sizeof(cn->error_str), 1022 "Failed to allocate NVMeoF session list"); 1023 return; 1024 } 1025 1026 sbuf_printf(sb, "<ctlnvmflist>\n"); 1027 sx_slock(&nvmft_ports_lock); 1028 TAILQ_FOREACH(np, &nvmft_ports, link) { 1029 sx_slock(&np->lock); 1030 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 1031 sbuf_printf(sb, "<connection id=\"%d\">" 1032 "<hostnqn>%s</hostnqn>" 1033 "<subnqn>%s</subnqn>" 1034 "<trtype>%u</trtype>" 1035 "</connection>\n", 1036 ctrlr->cntlid, 1037 ctrlr->hostnqn, 1038 np->cdata.subnqn, 1039 ctrlr->trtype); 1040 } 1041 sx_sunlock(&np->lock); 1042 } 1043 sx_sunlock(&nvmft_ports_lock); 1044 sbuf_printf(sb, "</ctlnvmflist>\n"); 1045 if (sbuf_finish(sb) != 0) { 1046 sbuf_delete(sb); 1047 cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE; 1048 snprintf(cn->error_str, sizeof(cn->error_str), 1049 "Out of space, %d bytes is too small", lp->alloc_len); 1050 return; 1051 } 1052 1053 error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb)); 1054 if (error != 0) { 1055 sbuf_delete(sb); 1056 cn->status = CTL_NVMF_ERROR; 1057 snprintf(cn->error_str, sizeof(cn->error_str), 1058 "Failed to copyout session list: %d", error); 1059 return; 1060 } 1061 lp->fill_len = sbuf_len(sb); 1062 cn->status = CTL_NVMF_OK; 1063 sbuf_delete(sb); 1064 } 1065 1066 static void 1067 nvmft_terminate(struct ctl_nvmf *cn) 1068 { 1069 struct ctl_nvmf_terminate_params *tp; 1070 struct nvmft_controller *ctrlr; 1071 struct nvmft_port *np; 1072 bool found, match; 1073 1074 tp = &cn->data.terminate; 1075 1076 found = false; 1077 sx_slock(&nvmft_ports_lock); 1078 TAILQ_FOREACH(np, &nvmft_ports, link) { 1079 sx_slock(&np->lock); 1080 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 1081 if (tp->all != 0) 1082 match = true; 1083 else if (tp->cntlid != -1) 1084 match = tp->cntlid == ctrlr->cntlid; 1085 else if (tp->hostnqn[0] != '\0') 1086 match = strncmp(tp->hostnqn, ctrlr->hostnqn, 1087 sizeof(tp->hostnqn)) == 0; 1088 else 1089 match = false; 1090 if (!match) 1091 continue; 1092 nvmft_printf(ctrlr, 1093 "disconnecting due to administrative request\n"); 1094 nvmft_controller_error(ctrlr, NULL, ECONNABORTED); 1095 found = true; 1096 } 1097 sx_sunlock(&np->lock); 1098 } 1099 sx_sunlock(&nvmft_ports_lock); 1100 1101 if (!found) { 1102 cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND; 1103 snprintf(cn->error_str, sizeof(cn->error_str), 1104 "No matching associations found"); 1105 return; 1106 } 1107 cn->status = CTL_NVMF_OK; 1108 } 1109 1110 static int 1111 nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag, 1112 struct thread *td) 1113 { 1114 struct ctl_nvmf *cn; 1115 struct ctl_req *req; 1116 1117 switch (cmd) { 1118 case CTL_PORT_REQ: 1119 req = (struct ctl_req *)data; 1120 switch (req->reqtype) { 1121 case CTL_REQ_CREATE: 1122 nvmft_port_create(req); 1123 break; 1124 case CTL_REQ_REMOVE: 1125 nvmft_port_remove(req); 1126 break; 1127 default: 1128 req->status = CTL_LUN_ERROR; 1129 snprintf(req->error_str, sizeof(req->error_str), 1130 "Unsupported request type %d", req->reqtype); 1131 break; 1132 } 1133 return (0); 1134 case CTL_NVMF: 1135 cn = (struct ctl_nvmf *)data; 1136 switch (cn->type) { 1137 case CTL_NVMF_HANDOFF: 1138 nvmft_handoff(cn); 1139 break; 1140 case CTL_NVMF_LIST: 1141 nvmft_list(cn); 1142 break; 1143 case CTL_NVMF_TERMINATE: 1144 nvmft_terminate(cn); 1145 break; 1146 default: 1147 cn->status = CTL_NVMF_ERROR; 1148 snprintf(cn->error_str, sizeof(cn->error_str), 1149 "Invalid NVMeoF request type %d", cn->type); 1150 break; 1151 } 1152 return (0); 1153 default: 1154 return (ENOTTY); 1155 } 1156 } 1157 1158 static int 1159 nvmft_shutdown(void) 1160 { 1161 /* TODO: Need to check for active controllers. */ 1162 if (!TAILQ_EMPTY(&nvmft_ports)) 1163 return (EBUSY); 1164 1165 taskqueue_free(nvmft_taskq); 1166 sx_destroy(&nvmft_ports_lock); 1167 return (0); 1168 } 1169 1170 CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend); 1171 MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1); 1172