1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/dnv.h> 10 #include <sys/jail.h> 11 #include <sys/kernel.h> 12 #include <sys/limits.h> 13 #include <sys/lock.h> 14 #include <sys/malloc.h> 15 #include <sys/mbuf.h> 16 #include <sys/memdesc.h> 17 #include <sys/module.h> 18 #include <sys/proc.h> 19 #include <sys/queue.h> 20 #include <sys/refcount.h> 21 #include <sys/sbuf.h> 22 #include <sys/smp.h> 23 #include <sys/sx.h> 24 #include <sys/taskqueue.h> 25 26 #include <machine/bus.h> 27 #include <machine/bus_dma.h> 28 29 #include <dev/nvmf/nvmf.h> 30 #include <dev/nvmf/nvmf_transport.h> 31 #include <dev/nvmf/controller/nvmft_subr.h> 32 #include <dev/nvmf/controller/nvmft_var.h> 33 34 #include <cam/ctl/ctl.h> 35 #include <cam/ctl/ctl_error.h> 36 #include <cam/ctl/ctl_ha.h> 37 #include <cam/ctl/ctl_io.h> 38 #include <cam/ctl/ctl_frontend.h> 39 #include <cam/ctl/ctl_private.h> 40 41 /* 42 * Store pointers to the capsule and qpair in the two pointer members 43 * of CTL_PRIV_FRONTEND. 44 */ 45 #define NVMFT_NC(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0]) 46 #define NVMFT_QP(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1]) 47 48 static void nvmft_done(union ctl_io *io); 49 static int nvmft_init(void); 50 static int nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, 51 int flag, struct thread *td); 52 static int nvmft_shutdown(void); 53 54 static struct taskqueue *nvmft_taskq; 55 static TAILQ_HEAD(, nvmft_port) nvmft_ports; 56 static struct sx nvmft_ports_lock; 57 58 MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller"); 59 60 static struct ctl_frontend nvmft_frontend = { 61 .name = "nvmf", 62 .init = nvmft_init, 63 .ioctl = nvmft_ioctl, 64 .fe_dump = NULL, 65 .shutdown = nvmft_shutdown, 66 }; 67 68 static void 69 nvmft_online(void *arg) 70 { 71 struct nvmft_port *np = arg; 72 73 sx_xlock(&np->lock); 74 np->online = true; 75 sx_xunlock(&np->lock); 76 } 77 78 static void 79 nvmft_offline(void *arg) 80 { 81 struct nvmft_port *np = arg; 82 struct nvmft_controller *ctrlr; 83 84 sx_xlock(&np->lock); 85 np->online = false; 86 87 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 88 nvmft_printf(ctrlr, 89 "shutting down due to port going offline\n"); 90 nvmft_controller_error(ctrlr, NULL, ENODEV); 91 } 92 93 while (!TAILQ_EMPTY(&np->controllers)) 94 sx_sleep(np, &np->lock, 0, "nvmfoff", 0); 95 sx_xunlock(&np->lock); 96 } 97 98 static int 99 nvmft_lun_enable(void *arg, int lun_id) 100 { 101 struct nvmft_port *np = arg; 102 struct nvmft_controller *ctrlr; 103 uint32_t *old_ns, *new_ns; 104 uint32_t nsid; 105 u_int i; 106 107 if (lun_id >= le32toh(np->cdata.nn)) { 108 printf("NVMFT: %s lun %d larger than maximum nsid %u\n", 109 np->cdata.subnqn, lun_id, le32toh(np->cdata.nn)); 110 return (EOPNOTSUPP); 111 } 112 nsid = lun_id + 1; 113 114 sx_xlock(&np->lock); 115 new_ns = mallocarray(np->num_ns + 1, sizeof(*new_ns), M_NVMFT, 116 M_WAITOK); 117 for (i = 0; i < np->num_ns; i++) { 118 if (np->active_ns[i] < nsid) 119 continue; 120 if (np->active_ns[i] == nsid) { 121 sx_xunlock(&np->lock); 122 free(new_ns, M_NVMFT); 123 printf("NVMFT: %s duplicate lun %d\n", 124 np->cdata.subnqn, lun_id); 125 return (EINVAL); 126 } 127 break; 128 } 129 130 /* Copy over IDs smaller than nsid. */ 131 memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns)); 132 133 /* Insert nsid. */ 134 new_ns[i] = nsid; 135 136 /* Copy over IDs greater than nsid. */ 137 memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) * 138 sizeof(*np->active_ns)); 139 140 np->num_ns++; 141 old_ns = np->active_ns; 142 np->active_ns = new_ns; 143 144 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 145 nvmft_controller_lun_changed(ctrlr, lun_id); 146 } 147 148 sx_xunlock(&np->lock); 149 free(old_ns, M_NVMFT); 150 151 return (0); 152 } 153 154 static int 155 nvmft_lun_disable(void *arg, int lun_id) 156 { 157 struct nvmft_port *np = arg; 158 struct nvmft_controller *ctrlr; 159 uint32_t nsid; 160 u_int i; 161 162 if (lun_id >= le32toh(np->cdata.nn)) 163 return (0); 164 nsid = lun_id + 1; 165 166 sx_xlock(&np->lock); 167 for (i = 0; i < np->num_ns; i++) { 168 if (np->active_ns[i] == nsid) 169 goto found; 170 } 171 sx_xunlock(&np->lock); 172 printf("NVMFT: %s request to disable nonexistent lun %d\n", 173 np->cdata.subnqn, lun_id); 174 return (EINVAL); 175 176 found: 177 /* Move down IDs greater than nsid. */ 178 memmove(np->active_ns + i, np->active_ns + i + 1, 179 (np->num_ns - (i + 1)) * sizeof(*np->active_ns)); 180 np->num_ns--; 181 182 /* NB: Don't bother freeing the old active_ns array. */ 183 184 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 185 nvmft_controller_lun_changed(ctrlr, lun_id); 186 } 187 188 sx_xunlock(&np->lock); 189 190 return (0); 191 } 192 193 void 194 nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid, 195 struct nvme_ns_list *nslist) 196 { 197 u_int i, count; 198 199 sx_slock(&np->lock); 200 count = 0; 201 for (i = 0; i < np->num_ns; i++) { 202 if (np->active_ns[i] <= nsid) 203 continue; 204 nslist->ns[count] = htole32(np->active_ns[i]); 205 count++; 206 if (count == nitems(nslist->ns)) 207 break; 208 } 209 sx_sunlock(&np->lock); 210 } 211 212 void 213 nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 214 bool admin) 215 { 216 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 217 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 218 struct nvmft_port *np = ctrlr->np; 219 union ctl_io *io; 220 int error; 221 222 if (cmd->nsid == htole32(0)) { 223 nvmft_send_generic_error(qp, nc, 224 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 225 nvmf_free_capsule(nc); 226 return; 227 } 228 229 mtx_lock(&ctrlr->lock); 230 if (ctrlr->pending_commands == 0) 231 ctrlr->start_busy = sbinuptime(); 232 ctrlr->pending_commands++; 233 mtx_unlock(&ctrlr->lock); 234 io = ctl_alloc_io(np->port.ctl_pool_ref); 235 ctl_zero_io(io); 236 NVMFT_NC(io) = nc; 237 NVMFT_QP(io) = qp; 238 io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME; 239 io->io_hdr.nexus.initid = ctrlr->cntlid; 240 io->io_hdr.nexus.targ_port = np->port.targ_port; 241 io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1; 242 io->nvmeio.cmd = *cmd; 243 error = ctl_run(io); 244 if (error != 0) { 245 nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n", 246 nvmft_qpair_name(qp), error); 247 ctl_nvme_set_generic_error(&io->nvmeio, 248 NVME_SC_INTERNAL_DEVICE_ERROR); 249 nvmft_done(io); 250 251 nvmft_controller_error(ctrlr, qp, ENXIO); 252 } 253 } 254 255 void 256 nvmft_terminate_commands(struct nvmft_controller *ctrlr) 257 { 258 struct nvmft_port *np = ctrlr->np; 259 union ctl_io *io; 260 int error; 261 262 mtx_lock(&ctrlr->lock); 263 if (ctrlr->pending_commands == 0) 264 ctrlr->start_busy = sbinuptime(); 265 ctrlr->pending_commands++; 266 mtx_unlock(&ctrlr->lock); 267 io = ctl_alloc_io(np->port.ctl_pool_ref); 268 ctl_zero_io(io); 269 NVMFT_QP(io) = ctrlr->admin; 270 io->io_hdr.io_type = CTL_IO_TASK; 271 io->io_hdr.nexus.initid = ctrlr->cntlid; 272 io->io_hdr.nexus.targ_port = np->port.targ_port; 273 io->io_hdr.nexus.targ_lun = 0; 274 io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX: unused? */ 275 io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; 276 error = ctl_run(io); 277 if (error != CTL_RETVAL_COMPLETE) { 278 nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error); 279 #ifdef INVARIANTS 280 io->io_hdr.status = CTL_SUCCESS; 281 #endif 282 nvmft_done(io); 283 } 284 } 285 286 static void 287 nvmft_datamove_out_cb(void *arg, size_t xfered, int error) 288 { 289 struct ctl_nvmeio *ctnio = arg; 290 291 if (error != 0) { 292 ctl_nvme_set_data_transfer_error(ctnio); 293 } else { 294 MPASS(xfered == ctnio->kern_data_len); 295 ctnio->kern_data_resid -= xfered; 296 } 297 298 if (ctnio->kern_sg_entries) { 299 free(ctnio->ext_data_ptr, M_NVMFT); 300 ctnio->ext_data_ptr = NULL; 301 } else 302 MPASS(ctnio->ext_data_ptr == NULL); 303 ctl_datamove_done((union ctl_io *)ctnio, false); 304 } 305 306 static void 307 nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp, 308 struct nvmf_capsule *nc) 309 { 310 struct memdesc mem; 311 int error; 312 313 MPASS(ctnio->ext_data_ptr == NULL); 314 if (ctnio->kern_sg_entries > 0) { 315 struct ctl_sg_entry *sgl; 316 struct bus_dma_segment *vlist; 317 318 vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist), 319 M_NVMFT, M_WAITOK); 320 ctnio->ext_data_ptr = (void *)vlist; 321 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 322 for (u_int i = 0; i < ctnio->kern_sg_entries; i++) { 323 vlist[i].ds_addr = (uintptr_t)sgl[i].addr; 324 vlist[i].ds_len = sgl[i].len; 325 } 326 mem = memdesc_vlist(vlist, ctnio->kern_sg_entries); 327 } else 328 mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len); 329 330 error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem, 331 ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio); 332 if (error == 0) 333 return; 334 335 nvmft_printf(nvmft_qpair_ctrlr(qp), 336 "Failed to request capsule data: %d\n", error); 337 ctl_nvme_set_data_transfer_error(ctnio); 338 339 if (ctnio->kern_sg_entries) { 340 free(ctnio->ext_data_ptr, M_NVMFT); 341 ctnio->ext_data_ptr = NULL; 342 } else 343 MPASS(ctnio->ext_data_ptr == NULL); 344 ctl_datamove_done((union ctl_io *)ctnio, true); 345 } 346 347 static struct mbuf * 348 nvmft_copy_data(struct ctl_nvmeio *ctnio) 349 { 350 struct ctl_sg_entry *sgl; 351 struct mbuf *m0, *m; 352 uint32_t resid, off, todo; 353 int mlen; 354 355 MPASS(ctnio->kern_data_len != 0); 356 357 m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0); 358 359 if (ctnio->kern_sg_entries == 0) { 360 m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr); 361 return (m0); 362 } 363 364 resid = ctnio->kern_data_len; 365 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 366 off = 0; 367 m = m0; 368 mlen = M_TRAILINGSPACE(m); 369 for (;;) { 370 todo = MIN(mlen, sgl->len - off); 371 memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off, 372 todo); 373 m->m_len += todo; 374 resid -= todo; 375 if (resid == 0) { 376 MPASS(m->m_next == NULL); 377 break; 378 } 379 380 off += todo; 381 if (off == sgl->len) { 382 sgl++; 383 off = 0; 384 } 385 mlen -= todo; 386 if (mlen == 0) { 387 m = m->m_next; 388 mlen = M_TRAILINGSPACE(m); 389 } 390 } 391 392 return (m0); 393 } 394 395 static void 396 m_free_ref_data(struct mbuf *m) 397 { 398 ctl_ref kern_data_ref = m->m_ext.ext_arg1; 399 400 kern_data_ref(m->m_ext.ext_arg2, -1); 401 } 402 403 static struct mbuf * 404 m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size) 405 { 406 struct mbuf *m; 407 408 m = m_get(M_WAITOK, MT_DATA); 409 m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref, 410 ctnio->kern_data_arg, M_RDONLY, EXT_CTL); 411 m->m_len = size; 412 ctnio->kern_data_ref(ctnio->kern_data_arg, 1); 413 return (m); 414 } 415 416 static struct mbuf * 417 nvmft_ref_data(struct ctl_nvmeio *ctnio) 418 { 419 struct ctl_sg_entry *sgl; 420 struct mbuf *m0, *m; 421 422 MPASS(ctnio->kern_data_len != 0); 423 424 if (ctnio->kern_sg_entries == 0) 425 return (m_get_ref_data(ctnio, ctnio->kern_data_ptr, 426 ctnio->kern_data_len)); 427 428 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 429 m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len); 430 m = m0; 431 for (u_int i = 1; i < ctnio->kern_sg_entries; i++) { 432 m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len); 433 m = m->m_next; 434 } 435 return (m0); 436 } 437 438 static void 439 nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp, 440 struct nvmf_capsule *nc) 441 { 442 struct mbuf *m; 443 u_int status; 444 445 if (ctnio->kern_data_ref != NULL) 446 m = nvmft_ref_data(ctnio); 447 else 448 m = nvmft_copy_data(ctnio); 449 status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m, 450 ctnio->kern_data_len); 451 switch (status) { 452 case NVMF_SUCCESS_SENT: 453 ctnio->success_sent = true; 454 nvmft_command_completed(qp, nc); 455 /* FALLTHROUGH */ 456 case NVMF_MORE: 457 case NVME_SC_SUCCESS: 458 break; 459 default: 460 ctl_nvme_set_generic_error(ctnio, status); 461 break; 462 } 463 ctl_datamove_done((union ctl_io *)ctnio, true); 464 } 465 466 void 467 nvmft_handle_datamove(union ctl_io *io) 468 { 469 struct nvmf_capsule *nc; 470 struct nvmft_qpair *qp; 471 472 /* Some CTL commands preemptively set a success status. */ 473 MPASS(io->io_hdr.status == CTL_STATUS_NONE || 474 io->io_hdr.status == CTL_SUCCESS); 475 MPASS(!io->nvmeio.success_sent); 476 477 nc = NVMFT_NC(io); 478 qp = NVMFT_QP(io); 479 480 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) 481 nvmft_datamove_in(&io->nvmeio, qp, nc); 482 else 483 nvmft_datamove_out(&io->nvmeio, qp, nc); 484 } 485 486 void 487 nvmft_abort_datamove(union ctl_io *io) 488 { 489 io->io_hdr.port_status = 1; 490 io->io_hdr.flags |= CTL_FLAG_ABORT; 491 ctl_datamove_done(io, true); 492 } 493 494 static void 495 nvmft_datamove(union ctl_io *io) 496 { 497 struct nvmft_qpair *qp; 498 499 qp = NVMFT_QP(io); 500 nvmft_qpair_datamove(qp, io); 501 } 502 503 void 504 nvmft_enqueue_task(struct task *task) 505 { 506 taskqueue_enqueue(nvmft_taskq, task); 507 } 508 509 void 510 nvmft_drain_task(struct task *task) 511 { 512 taskqueue_drain(nvmft_taskq, task); 513 } 514 515 static void 516 hip_add(uint64_t pair[2], uint64_t addend) 517 { 518 uint64_t old, new; 519 520 old = le64toh(pair[0]); 521 new = old + addend; 522 pair[0] = htole64(new); 523 if (new < old) 524 pair[1] += htole64(1); 525 } 526 527 static void 528 nvmft_done(union ctl_io *io) 529 { 530 struct nvmft_controller *ctrlr; 531 const struct nvme_command *cmd; 532 struct nvmft_qpair *qp; 533 struct nvmf_capsule *nc; 534 size_t len; 535 536 KASSERT(io->io_hdr.status == CTL_SUCCESS || 537 io->io_hdr.status == CTL_NVME_ERROR, 538 ("%s: bad status %u", __func__, io->io_hdr.status)); 539 540 nc = NVMFT_NC(io); 541 qp = NVMFT_QP(io); 542 ctrlr = nvmft_qpair_ctrlr(qp); 543 544 if (nc == NULL) { 545 /* Completion of nvmft_terminate_commands. */ 546 goto end; 547 } 548 549 cmd = nvmf_capsule_sqe(nc); 550 551 if (io->io_hdr.status == CTL_SUCCESS) 552 len = nvmf_capsule_data_len(nc) / 512; 553 else 554 len = 0; 555 switch (cmd->opc) { 556 case NVME_OPC_WRITE: 557 mtx_lock(&ctrlr->lock); 558 hip_add(ctrlr->hip.host_write_commands, 1); 559 len += ctrlr->partial_duw; 560 if (len > 1000) 561 hip_add(ctrlr->hip.data_units_written, len / 1000); 562 ctrlr->partial_duw = len % 1000; 563 mtx_unlock(&ctrlr->lock); 564 break; 565 case NVME_OPC_READ: 566 case NVME_OPC_COMPARE: 567 case NVME_OPC_VERIFY: 568 mtx_lock(&ctrlr->lock); 569 if (cmd->opc != NVME_OPC_VERIFY) 570 hip_add(ctrlr->hip.host_read_commands, 1); 571 len += ctrlr->partial_dur; 572 if (len > 1000) 573 hip_add(ctrlr->hip.data_units_read, len / 1000); 574 ctrlr->partial_dur = len % 1000; 575 mtx_unlock(&ctrlr->lock); 576 break; 577 } 578 579 if (io->nvmeio.success_sent) { 580 MPASS(io->io_hdr.status == CTL_SUCCESS); 581 } else { 582 io->nvmeio.cpl.cid = cmd->cid; 583 nvmft_send_response(qp, &io->nvmeio.cpl); 584 } 585 nvmf_free_capsule(nc); 586 end: 587 ctl_free_io(io); 588 mtx_lock(&ctrlr->lock); 589 ctrlr->pending_commands--; 590 if (ctrlr->pending_commands == 0) 591 ctrlr->busy_total += sbinuptime() - ctrlr->start_busy; 592 mtx_unlock(&ctrlr->lock); 593 } 594 595 static int 596 nvmft_init(void) 597 { 598 int error; 599 600 nvmft_taskq = taskqueue_create("nvmft", M_WAITOK, 601 taskqueue_thread_enqueue, &nvmft_taskq); 602 error = taskqueue_start_threads_in_proc(&nvmft_taskq, mp_ncpus, PWAIT, 603 control_softc->ctl_proc, "nvmft"); 604 if (error != 0) { 605 taskqueue_free(nvmft_taskq); 606 return (error); 607 } 608 609 TAILQ_INIT(&nvmft_ports); 610 sx_init(&nvmft_ports_lock, "nvmft ports"); 611 return (0); 612 } 613 614 void 615 nvmft_port_free(struct nvmft_port *np) 616 { 617 KASSERT(TAILQ_EMPTY(&np->controllers), 618 ("%s(%p): active controllers", __func__, np)); 619 620 if (np->port.targ_port != -1) { 621 if (ctl_port_deregister(&np->port) != 0) 622 printf("%s: ctl_port_deregister() failed\n", __func__); 623 } 624 625 free(np->active_ns, M_NVMFT); 626 clean_unrhdr(np->ids); 627 delete_unrhdr(np->ids); 628 sx_destroy(&np->lock); 629 free(np, M_NVMFT); 630 } 631 632 static struct nvmft_port * 633 nvmft_port_find(const char *subnqn) 634 { 635 struct nvmft_port *np; 636 637 KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__)); 638 639 sx_assert(&nvmft_ports_lock, SA_LOCKED); 640 TAILQ_FOREACH(np, &nvmft_ports, link) { 641 if (strcmp(np->cdata.subnqn, subnqn) == 0) 642 break; 643 } 644 return (np); 645 } 646 647 static struct nvmft_port * 648 nvmft_port_find_by_id(int port_id) 649 { 650 struct nvmft_port *np; 651 652 sx_assert(&nvmft_ports_lock, SA_LOCKED); 653 TAILQ_FOREACH(np, &nvmft_ports, link) { 654 if (np->port.targ_port == port_id) 655 break; 656 } 657 return (np); 658 } 659 660 /* 661 * Helper function to fetch a number stored as a string in an nv_list. 662 * Returns false if the string was not a valid number. 663 */ 664 static bool 665 dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value, 666 u_long *value) 667 { 668 const char *str; 669 char *cp; 670 671 str = dnvlist_get_string(nvl, name, NULL); 672 if (str == NULL) { 673 *value = default_value; 674 return (true); 675 } 676 if (*str == '\0') 677 return (false); 678 *value = strtoul(str, &cp, 0); 679 if (*cp != '\0') 680 return (false); 681 return (true); 682 } 683 684 /* 685 * NVMeoF ports support the following parameters: 686 * 687 * Mandatory: 688 * 689 * subnqn: subsystem NVMe Qualified Name 690 * portid: integer port ID from Discovery Log Page entry 691 * 692 * Optional: 693 * serial: Serial Number string 694 * max_io_qsize: Maximum number of I/O queue entries 695 * enable_timeout: Timeout for controller enable in milliseconds 696 * ioccsz: Maximum command capsule size 697 * iorcsz: Maximum response capsule size 698 * nn: Number of namespaces 699 */ 700 static void 701 nvmft_port_create(struct ctl_req *req) 702 { 703 struct nvmft_port *np; 704 struct ctl_port *port; 705 const char *serial, *subnqn; 706 char serial_buf[NVME_SERIAL_NUMBER_LENGTH]; 707 u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid; 708 int error; 709 710 /* Required parameters. */ 711 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL); 712 if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) { 713 req->status = CTL_LUN_ERROR; 714 snprintf(req->error_str, sizeof(req->error_str), 715 "Missing required argument"); 716 return; 717 } 718 if (!nvmf_nqn_valid(subnqn)) { 719 req->status = CTL_LUN_ERROR; 720 snprintf(req->error_str, sizeof(req->error_str), 721 "Invalid SubNQN"); 722 return; 723 } 724 if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) || 725 portid > UINT16_MAX) { 726 req->status = CTL_LUN_ERROR; 727 snprintf(req->error_str, sizeof(req->error_str), 728 "Invalid port ID"); 729 return; 730 } 731 732 /* Optional parameters. */ 733 if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize", 734 NVMF_MAX_IO_ENTRIES, &max_io_qsize) || 735 max_io_qsize < NVME_MIN_IO_ENTRIES || 736 max_io_qsize > NVME_MAX_IO_ENTRIES) { 737 req->status = CTL_LUN_ERROR; 738 snprintf(req->error_str, sizeof(req->error_str), 739 "Invalid maximum I/O queue size"); 740 return; 741 } 742 743 if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout", 744 NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) || 745 (enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) { 746 req->status = CTL_LUN_ERROR; 747 snprintf(req->error_str, sizeof(req->error_str), 748 "Invalid enable timeout"); 749 return; 750 } 751 752 if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ, 753 &ioccsz) || ioccsz < sizeof(struct nvme_command) || 754 (ioccsz % 16) != 0) { 755 req->status = CTL_LUN_ERROR; 756 snprintf(req->error_str, sizeof(req->error_str), 757 "Invalid Command Capsule size"); 758 return; 759 } 760 761 if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ, 762 &iorcsz) || iorcsz < sizeof(struct nvme_completion) || 763 (iorcsz % 16) != 0) { 764 req->status = CTL_LUN_ERROR; 765 snprintf(req->error_str, sizeof(req->error_str), 766 "Invalid Response Capsule size"); 767 return; 768 } 769 770 if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) || 771 nn < 1 || nn > UINT32_MAX) { 772 req->status = CTL_LUN_ERROR; 773 snprintf(req->error_str, sizeof(req->error_str), 774 "Invalid number of namespaces"); 775 return; 776 } 777 778 serial = dnvlist_get_string(req->args_nvl, "serial", NULL); 779 if (serial == NULL) { 780 getcredhostid(curthread->td_ucred, &hostid); 781 nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid); 782 serial = serial_buf; 783 } 784 785 sx_xlock(&nvmft_ports_lock); 786 787 np = nvmft_port_find(subnqn); 788 if (np != NULL) { 789 req->status = CTL_LUN_ERROR; 790 snprintf(req->error_str, sizeof(req->error_str), 791 "SubNQN \"%s\" already exists", subnqn); 792 sx_xunlock(&nvmft_ports_lock); 793 return; 794 } 795 796 np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO); 797 refcount_init(&np->refs, 1); 798 np->max_io_qsize = max_io_qsize; 799 np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500); 800 sx_init(&np->lock, "nvmft port"); 801 np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1, 802 NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX); 803 TAILQ_INIT(&np->controllers); 804 805 /* The controller ID is set later for individual controllers. */ 806 _nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype, 807 osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata); 808 np->cdata.aerl = NVMFT_NUM_AER - 1; 809 np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE); 810 np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) | 811 NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) | 812 NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) | 813 NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1)); 814 np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1); 815 816 np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1); 817 memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr)); 818 819 port = &np->port; 820 821 port->frontend = &nvmft_frontend; 822 port->port_type = CTL_PORT_NVMF; 823 port->num_requested_ctl_io = max_io_qsize; 824 port->port_name = "nvmf"; 825 port->physical_port = portid; 826 port->virtual_port = 0; 827 port->port_online = nvmft_online; 828 port->port_offline = nvmft_offline; 829 port->onoff_arg = np; 830 port->lun_enable = nvmft_lun_enable; 831 port->lun_disable = nvmft_lun_disable; 832 port->targ_lun_arg = np; 833 port->fe_datamove = nvmft_datamove; 834 port->fe_done = nvmft_done; 835 port->targ_port = -1; 836 port->options = nvlist_clone(req->args_nvl); 837 838 error = ctl_port_register(port); 839 if (error != 0) { 840 sx_xunlock(&nvmft_ports_lock); 841 nvlist_destroy(port->options); 842 nvmft_port_rele(np); 843 req->status = CTL_LUN_ERROR; 844 snprintf(req->error_str, sizeof(req->error_str), 845 "Failed to register CTL port with error %d", error); 846 return; 847 } 848 849 TAILQ_INSERT_TAIL(&nvmft_ports, np, link); 850 sx_xunlock(&nvmft_ports_lock); 851 852 req->status = CTL_LUN_OK; 853 req->result_nvl = nvlist_create(0); 854 nvlist_add_number(req->result_nvl, "port_id", port->targ_port); 855 } 856 857 static void 858 nvmft_port_remove(struct ctl_req *req) 859 { 860 struct nvmft_port *np; 861 const char *subnqn; 862 u_long port_id; 863 864 /* 865 * ctladm port -r just provides the port_id, so permit looking 866 * up a port either by "subnqn" or "port_id". 867 */ 868 port_id = ULONG_MAX; 869 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL); 870 if (subnqn == NULL) { 871 if (!nvlist_exists_string(req->args_nvl, "port_id")) { 872 req->status = CTL_LUN_ERROR; 873 snprintf(req->error_str, sizeof(req->error_str), 874 "Missing required argument"); 875 return; 876 } 877 if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX, 878 &port_id)) { 879 req->status = CTL_LUN_ERROR; 880 snprintf(req->error_str, sizeof(req->error_str), 881 "Invalid CTL port ID"); 882 return; 883 } 884 } else { 885 if (nvlist_exists_string(req->args_nvl, "port_id")) { 886 req->status = CTL_LUN_ERROR; 887 snprintf(req->error_str, sizeof(req->error_str), 888 "Ambiguous port removal request"); 889 return; 890 } 891 } 892 893 sx_xlock(&nvmft_ports_lock); 894 895 if (subnqn != NULL) { 896 np = nvmft_port_find(subnqn); 897 if (np == NULL) { 898 req->status = CTL_LUN_ERROR; 899 snprintf(req->error_str, sizeof(req->error_str), 900 "SubNQN \"%s\" does not exist", subnqn); 901 sx_xunlock(&nvmft_ports_lock); 902 return; 903 } 904 } else { 905 np = nvmft_port_find_by_id(port_id); 906 if (np == NULL) { 907 req->status = CTL_LUN_ERROR; 908 snprintf(req->error_str, sizeof(req->error_str), 909 "CTL port %lu is not a NVMF port", port_id); 910 sx_xunlock(&nvmft_ports_lock); 911 return; 912 } 913 } 914 915 TAILQ_REMOVE(&nvmft_ports, np, link); 916 sx_xunlock(&nvmft_ports_lock); 917 918 ctl_port_offline(&np->port); 919 nvmft_port_rele(np); 920 req->status = CTL_LUN_OK; 921 } 922 923 static void 924 nvmft_handoff(struct ctl_nvmf *cn) 925 { 926 const struct nvmf_fabric_connect_cmd *cmd; 927 const struct nvmf_fabric_connect_data *data; 928 const nvlist_t *params; 929 struct nvmft_port *np; 930 nvlist_t *nvl; 931 size_t len; 932 enum nvmf_trtype trtype; 933 int error; 934 935 np = NULL; 936 error = nvmf_unpack_ioc_nvlist(&cn->data.handoff, &nvl); 937 if (error != 0) { 938 cn->status = CTL_NVMF_ERROR; 939 snprintf(cn->error_str, sizeof(cn->error_str), 940 "Failed to copyin and unpack handoff arguments"); 941 return; 942 } 943 944 if (!nvlist_exists_number(nvl, "trtype") || 945 !nvlist_exists_nvlist(nvl, "params") || 946 !nvlist_exists_binary(nvl, "cmd") || 947 !nvlist_exists_binary(nvl, "data")) { 948 cn->status = CTL_NVMF_ERROR; 949 snprintf(cn->error_str, sizeof(cn->error_str), 950 "Handoff arguments missing required value"); 951 goto out; 952 } 953 954 params = nvlist_get_nvlist(nvl, "params"); 955 if (!nvmf_validate_qpair_nvlist(params, true)) { 956 cn->status = CTL_NVMF_ERROR; 957 snprintf(cn->error_str, sizeof(cn->error_str), 958 "Invalid queue pair parameters"); 959 goto out; 960 } 961 962 cmd = nvlist_get_binary(nvl, "cmd", &len); 963 if (len != sizeof(*cmd)) { 964 cn->status = CTL_NVMF_ERROR; 965 snprintf(cn->error_str, sizeof(cn->error_str), 966 "Wrong size for CONNECT SQE"); 967 goto out; 968 } 969 970 data = nvlist_get_binary(nvl, "data", &len); 971 if (len != sizeof(*data)) { 972 cn->status = CTL_NVMF_ERROR; 973 snprintf(cn->error_str, sizeof(cn->error_str), 974 "Wrong size for CONNECT data"); 975 goto out; 976 } 977 978 if (!nvmf_nqn_valid(data->subnqn)) { 979 cn->status = CTL_NVMF_ERROR; 980 snprintf(cn->error_str, sizeof(cn->error_str), 981 "Invalid SubNQN"); 982 goto out; 983 } 984 985 sx_slock(&nvmft_ports_lock); 986 np = nvmft_port_find(data->subnqn); 987 if (np == NULL) { 988 sx_sunlock(&nvmft_ports_lock); 989 cn->status = CTL_NVMF_ERROR; 990 snprintf(cn->error_str, sizeof(cn->error_str), 991 "Unknown SubNQN"); 992 goto out; 993 } 994 if (!np->online) { 995 sx_sunlock(&nvmft_ports_lock); 996 cn->status = CTL_NVMF_ERROR; 997 snprintf(cn->error_str, sizeof(cn->error_str), 998 "CTL port offline"); 999 np = NULL; 1000 goto out; 1001 } 1002 nvmft_port_ref(np); 1003 sx_sunlock(&nvmft_ports_lock); 1004 1005 trtype = nvlist_get_number(nvl, "trtype"); 1006 if (nvlist_get_bool(params, "admin")) { 1007 error = nvmft_handoff_admin_queue(np, trtype, params, cmd, 1008 data); 1009 if (error != 0) { 1010 cn->status = CTL_NVMF_ERROR; 1011 snprintf(cn->error_str, sizeof(cn->error_str), 1012 "Failed to handoff admin queue: %d", error); 1013 goto out; 1014 } 1015 } else { 1016 error = nvmft_handoff_io_queue(np, trtype, params, cmd, data); 1017 if (error != 0) { 1018 cn->status = CTL_NVMF_ERROR; 1019 snprintf(cn->error_str, sizeof(cn->error_str), 1020 "Failed to handoff I/O queue: %d", error); 1021 goto out; 1022 } 1023 } 1024 1025 cn->status = CTL_NVMF_OK; 1026 out: 1027 if (np != NULL) 1028 nvmft_port_rele(np); 1029 nvlist_destroy(nvl); 1030 } 1031 1032 static void 1033 nvmft_list(struct ctl_nvmf *cn) 1034 { 1035 struct ctl_nvmf_list_params *lp; 1036 struct nvmft_controller *ctrlr; 1037 struct nvmft_port *np; 1038 struct sbuf *sb; 1039 int error; 1040 1041 lp = &cn->data.list; 1042 1043 sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN | 1044 SBUF_INCLUDENUL); 1045 if (sb == NULL) { 1046 cn->status = CTL_NVMF_ERROR; 1047 snprintf(cn->error_str, sizeof(cn->error_str), 1048 "Failed to allocate NVMeoF session list"); 1049 return; 1050 } 1051 1052 sbuf_printf(sb, "<ctlnvmflist>\n"); 1053 sx_slock(&nvmft_ports_lock); 1054 TAILQ_FOREACH(np, &nvmft_ports, link) { 1055 sx_slock(&np->lock); 1056 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 1057 sbuf_printf(sb, "<connection id=\"%d\">" 1058 "<hostnqn>%s</hostnqn>" 1059 "<subnqn>%s</subnqn>" 1060 "<trtype>%u</trtype>" 1061 "</connection>\n", 1062 ctrlr->cntlid, 1063 ctrlr->hostnqn, 1064 np->cdata.subnqn, 1065 ctrlr->trtype); 1066 } 1067 sx_sunlock(&np->lock); 1068 } 1069 sx_sunlock(&nvmft_ports_lock); 1070 sbuf_printf(sb, "</ctlnvmflist>\n"); 1071 if (sbuf_finish(sb) != 0) { 1072 sbuf_delete(sb); 1073 cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE; 1074 snprintf(cn->error_str, sizeof(cn->error_str), 1075 "Out of space, %d bytes is too small", lp->alloc_len); 1076 return; 1077 } 1078 1079 error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb)); 1080 if (error != 0) { 1081 sbuf_delete(sb); 1082 cn->status = CTL_NVMF_ERROR; 1083 snprintf(cn->error_str, sizeof(cn->error_str), 1084 "Failed to copyout session list: %d", error); 1085 return; 1086 } 1087 lp->fill_len = sbuf_len(sb); 1088 cn->status = CTL_NVMF_OK; 1089 sbuf_delete(sb); 1090 } 1091 1092 static void 1093 nvmft_terminate(struct ctl_nvmf *cn) 1094 { 1095 struct ctl_nvmf_terminate_params *tp; 1096 struct nvmft_controller *ctrlr; 1097 struct nvmft_port *np; 1098 bool found, match; 1099 1100 tp = &cn->data.terminate; 1101 1102 found = false; 1103 sx_slock(&nvmft_ports_lock); 1104 TAILQ_FOREACH(np, &nvmft_ports, link) { 1105 sx_slock(&np->lock); 1106 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 1107 if (tp->all != 0) 1108 match = true; 1109 else if (tp->cntlid != -1) 1110 match = tp->cntlid == ctrlr->cntlid; 1111 else if (tp->hostnqn[0] != '\0') 1112 match = strncmp(tp->hostnqn, ctrlr->hostnqn, 1113 sizeof(tp->hostnqn)) == 0; 1114 else 1115 match = false; 1116 if (!match) 1117 continue; 1118 nvmft_printf(ctrlr, 1119 "disconnecting due to administrative request\n"); 1120 nvmft_controller_error(ctrlr, NULL, ECONNABORTED); 1121 found = true; 1122 } 1123 sx_sunlock(&np->lock); 1124 } 1125 sx_sunlock(&nvmft_ports_lock); 1126 1127 if (!found) { 1128 cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND; 1129 snprintf(cn->error_str, sizeof(cn->error_str), 1130 "No matching associations found"); 1131 return; 1132 } 1133 cn->status = CTL_NVMF_OK; 1134 } 1135 1136 static int 1137 nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag, 1138 struct thread *td) 1139 { 1140 struct ctl_nvmf *cn; 1141 struct ctl_req *req; 1142 1143 switch (cmd) { 1144 case CTL_PORT_REQ: 1145 req = (struct ctl_req *)data; 1146 switch (req->reqtype) { 1147 case CTL_REQ_CREATE: 1148 nvmft_port_create(req); 1149 break; 1150 case CTL_REQ_REMOVE: 1151 nvmft_port_remove(req); 1152 break; 1153 default: 1154 req->status = CTL_LUN_ERROR; 1155 snprintf(req->error_str, sizeof(req->error_str), 1156 "Unsupported request type %d", req->reqtype); 1157 break; 1158 } 1159 return (0); 1160 case CTL_NVMF: 1161 cn = (struct ctl_nvmf *)data; 1162 switch (cn->type) { 1163 case CTL_NVMF_HANDOFF: 1164 nvmft_handoff(cn); 1165 break; 1166 case CTL_NVMF_LIST: 1167 nvmft_list(cn); 1168 break; 1169 case CTL_NVMF_TERMINATE: 1170 nvmft_terminate(cn); 1171 break; 1172 default: 1173 cn->status = CTL_NVMF_ERROR; 1174 snprintf(cn->error_str, sizeof(cn->error_str), 1175 "Invalid NVMeoF request type %d", cn->type); 1176 break; 1177 } 1178 return (0); 1179 default: 1180 return (ENOTTY); 1181 } 1182 } 1183 1184 static int 1185 nvmft_shutdown(void) 1186 { 1187 /* TODO: Need to check for active controllers. */ 1188 if (!TAILQ_EMPTY(&nvmft_ports)) 1189 return (EBUSY); 1190 1191 taskqueue_free(nvmft_taskq); 1192 sx_destroy(&nvmft_ports_lock); 1193 return (0); 1194 } 1195 1196 CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend); 1197 MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1); 1198