1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/dnv.h> 10 #include <sys/jail.h> 11 #include <sys/kernel.h> 12 #include <sys/limits.h> 13 #include <sys/lock.h> 14 #include <sys/malloc.h> 15 #include <sys/mbuf.h> 16 #include <sys/memdesc.h> 17 #include <sys/module.h> 18 #include <sys/proc.h> 19 #include <sys/queue.h> 20 #include <sys/refcount.h> 21 #include <sys/sbuf.h> 22 #include <sys/smp.h> 23 #include <sys/sx.h> 24 #include <sys/taskqueue.h> 25 26 #include <machine/bus.h> 27 #include <machine/bus_dma.h> 28 29 #include <dev/nvmf/nvmf.h> 30 #include <dev/nvmf/nvmf_transport.h> 31 #include <dev/nvmf/controller/nvmft_subr.h> 32 #include <dev/nvmf/controller/nvmft_var.h> 33 34 #include <cam/ctl/ctl.h> 35 #include <cam/ctl/ctl_error.h> 36 #include <cam/ctl/ctl_ha.h> 37 #include <cam/ctl/ctl_io.h> 38 #include <cam/ctl/ctl_frontend.h> 39 #include <cam/ctl/ctl_private.h> 40 41 /* 42 * Store pointers to the capsule and qpair in the two pointer members 43 * of CTL_PRIV_FRONTEND. 44 */ 45 #define NVMFT_NC(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0]) 46 #define NVMFT_QP(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1]) 47 48 static void nvmft_done(union ctl_io *io); 49 static int nvmft_init(void); 50 static int nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, 51 int flag, struct thread *td); 52 static int nvmft_shutdown(void); 53 54 static struct taskqueue *nvmft_taskq; 55 static TAILQ_HEAD(, nvmft_port) nvmft_ports; 56 static struct sx nvmft_ports_lock; 57 58 MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller"); 59 60 static struct ctl_frontend nvmft_frontend = { 61 .name = "nvmf", 62 .init = nvmft_init, 63 .ioctl = nvmft_ioctl, 64 .fe_dump = NULL, 65 .shutdown = nvmft_shutdown, 66 }; 67 68 static void 69 nvmft_online(void *arg) 70 { 71 struct nvmft_port *np = arg; 72 73 mtx_lock(&np->lock); 74 np->online = true; 75 mtx_unlock(&np->lock); 76 } 77 78 static void 79 nvmft_offline(void *arg) 80 { 81 struct nvmft_port *np = arg; 82 struct nvmft_controller *ctrlr; 83 84 mtx_lock(&np->lock); 85 np->online = false; 86 87 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 88 nvmft_printf(ctrlr, 89 "shutting down due to port going offline\n"); 90 nvmft_controller_error(ctrlr, NULL, ENODEV); 91 } 92 93 while (!TAILQ_EMPTY(&np->controllers)) 94 mtx_sleep(np, &np->lock, 0, "nvmfoff", 0); 95 mtx_unlock(&np->lock); 96 } 97 98 static int 99 nvmft_info(void *arg, struct sbuf *sb) 100 { 101 struct nvmft_port *np = arg; 102 struct nvmft_controller *ctrlr; 103 int retval; 104 105 mtx_lock(&np->lock); 106 retval = sbuf_printf(sb, "\t<port>%s,p,%u</port>\n", np->cdata.subnqn, 107 np->portid); 108 if (retval != 0) 109 goto out; 110 111 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 112 retval = sbuf_printf(sb, "\t<host id=\"%u\">%s</host>\n", 113 ctrlr->cntlid, ctrlr->hostnqn); 114 if (retval != 0) 115 break; 116 } 117 out: 118 mtx_unlock(&np->lock); 119 return (retval); 120 } 121 122 static int 123 nvmft_lun_enable(void *arg, int lun_id) 124 { 125 struct nvmft_port *np = arg; 126 struct nvmft_controller *ctrlr; 127 uint32_t *old_ns, *new_ns; 128 uint32_t nsid; 129 u_int i, new_count; 130 131 if (lun_id >= le32toh(np->cdata.nn)) { 132 printf("NVMFT: %s lun %d larger than maximum nsid %u\n", 133 np->cdata.subnqn, lun_id, le32toh(np->cdata.nn)); 134 return (EOPNOTSUPP); 135 } 136 nsid = lun_id + 1; 137 138 mtx_lock(&np->lock); 139 for (;;) { 140 new_count = np->num_ns + 1; 141 mtx_unlock(&np->lock); 142 new_ns = mallocarray(new_count, sizeof(*new_ns), M_NVMFT, 143 M_WAITOK); 144 mtx_lock(&np->lock); 145 if (np->num_ns + 1 <= new_count) 146 break; 147 free(new_ns, M_NVMFT); 148 } 149 for (i = 0; i < np->num_ns; i++) { 150 if (np->active_ns[i] < nsid) 151 continue; 152 if (np->active_ns[i] == nsid) { 153 mtx_unlock(&np->lock); 154 free(new_ns, M_NVMFT); 155 printf("NVMFT: %s duplicate lun %d\n", 156 np->cdata.subnqn, lun_id); 157 return (EINVAL); 158 } 159 break; 160 } 161 162 /* Copy over IDs smaller than nsid. */ 163 memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns)); 164 165 /* Insert nsid. */ 166 new_ns[i] = nsid; 167 168 /* Copy over IDs greater than nsid. */ 169 memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) * 170 sizeof(*np->active_ns)); 171 172 np->num_ns++; 173 old_ns = np->active_ns; 174 np->active_ns = new_ns; 175 176 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 177 nvmft_controller_lun_changed(ctrlr, lun_id); 178 } 179 180 mtx_unlock(&np->lock); 181 free(old_ns, M_NVMFT); 182 183 return (0); 184 } 185 186 static int 187 nvmft_lun_disable(void *arg, int lun_id) 188 { 189 struct nvmft_port *np = arg; 190 struct nvmft_controller *ctrlr; 191 uint32_t nsid; 192 u_int i; 193 194 if (lun_id >= le32toh(np->cdata.nn)) 195 return (0); 196 nsid = lun_id + 1; 197 198 mtx_lock(&np->lock); 199 for (i = 0; i < np->num_ns; i++) { 200 if (np->active_ns[i] == nsid) 201 goto found; 202 } 203 mtx_unlock(&np->lock); 204 printf("NVMFT: %s request to disable nonexistent lun %d\n", 205 np->cdata.subnqn, lun_id); 206 return (EINVAL); 207 208 found: 209 /* Move down IDs greater than nsid. */ 210 memmove(np->active_ns + i, np->active_ns + i + 1, 211 (np->num_ns - (i + 1)) * sizeof(*np->active_ns)); 212 np->num_ns--; 213 214 /* NB: Don't bother freeing the old active_ns array. */ 215 216 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 217 nvmft_controller_lun_changed(ctrlr, lun_id); 218 } 219 220 mtx_unlock(&np->lock); 221 222 return (0); 223 } 224 225 void 226 nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid, 227 struct nvme_ns_list *nslist) 228 { 229 u_int i, count; 230 231 mtx_lock(&np->lock); 232 count = 0; 233 for (i = 0; i < np->num_ns; i++) { 234 if (np->active_ns[i] <= nsid) 235 continue; 236 nslist->ns[count] = htole32(np->active_ns[i]); 237 count++; 238 if (count == nitems(nslist->ns)) 239 break; 240 } 241 mtx_unlock(&np->lock); 242 } 243 244 void 245 nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 246 bool admin) 247 { 248 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 249 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 250 struct nvmft_port *np = ctrlr->np; 251 union ctl_io *io; 252 int error; 253 254 if (cmd->nsid == htole32(0)) { 255 nvmft_send_generic_error(qp, nc, 256 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 257 nvmf_free_capsule(nc); 258 return; 259 } 260 261 mtx_lock(&ctrlr->lock); 262 if (ctrlr->pending_commands == 0) 263 ctrlr->start_busy = sbinuptime(); 264 ctrlr->pending_commands++; 265 mtx_unlock(&ctrlr->lock); 266 io = ctl_alloc_io(np->port.ctl_pool_ref); 267 ctl_zero_io(io); 268 NVMFT_NC(io) = nc; 269 NVMFT_QP(io) = qp; 270 io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME; 271 io->io_hdr.nexus.initid = ctrlr->cntlid; 272 io->io_hdr.nexus.targ_port = np->port.targ_port; 273 io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1; 274 io->nvmeio.cmd = *cmd; 275 error = ctl_run(io); 276 if (error != 0) { 277 nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n", 278 nvmft_qpair_name(qp), error); 279 ctl_nvme_set_generic_error(&io->nvmeio, 280 NVME_SC_INTERNAL_DEVICE_ERROR); 281 nvmft_done(io); 282 283 nvmft_controller_error(ctrlr, qp, ENXIO); 284 } 285 } 286 287 void 288 nvmft_terminate_commands(struct nvmft_controller *ctrlr) 289 { 290 struct nvmft_port *np = ctrlr->np; 291 union ctl_io *io; 292 int error; 293 294 mtx_lock(&ctrlr->lock); 295 if (ctrlr->pending_commands == 0) 296 ctrlr->start_busy = sbinuptime(); 297 ctrlr->pending_commands++; 298 mtx_unlock(&ctrlr->lock); 299 io = ctl_alloc_io(np->port.ctl_pool_ref); 300 ctl_zero_io(io); 301 NVMFT_QP(io) = ctrlr->admin; 302 io->io_hdr.io_type = CTL_IO_TASK; 303 io->io_hdr.nexus.initid = ctrlr->cntlid; 304 io->io_hdr.nexus.targ_port = np->port.targ_port; 305 io->io_hdr.nexus.targ_lun = 0; 306 io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX: unused? */ 307 io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET; 308 error = ctl_run(io); 309 if (error != CTL_RETVAL_COMPLETE) { 310 nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error); 311 #ifdef INVARIANTS 312 io->io_hdr.status = CTL_SUCCESS; 313 #endif 314 nvmft_done(io); 315 } 316 } 317 318 static void 319 nvmft_datamove_out_cb(void *arg, size_t xfered, int error) 320 { 321 struct ctl_nvmeio *ctnio = arg; 322 323 if (error != 0) { 324 ctl_nvme_set_data_transfer_error(ctnio); 325 } else { 326 MPASS(xfered == ctnio->kern_data_len); 327 ctnio->kern_data_resid -= xfered; 328 } 329 330 if (ctnio->kern_sg_entries) { 331 free(ctnio->ext_data_ptr, M_NVMFT); 332 ctnio->ext_data_ptr = NULL; 333 } else 334 MPASS(ctnio->ext_data_ptr == NULL); 335 ctl_datamove_done((union ctl_io *)ctnio, false); 336 } 337 338 static void 339 nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp, 340 struct nvmf_capsule *nc) 341 { 342 struct memdesc mem; 343 int error; 344 345 MPASS(ctnio->ext_data_ptr == NULL); 346 if (ctnio->kern_sg_entries > 0) { 347 struct ctl_sg_entry *sgl; 348 struct bus_dma_segment *vlist; 349 350 vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist), 351 M_NVMFT, M_WAITOK); 352 ctnio->ext_data_ptr = (void *)vlist; 353 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 354 for (u_int i = 0; i < ctnio->kern_sg_entries; i++) { 355 vlist[i].ds_addr = (uintptr_t)sgl[i].addr; 356 vlist[i].ds_len = sgl[i].len; 357 } 358 mem = memdesc_vlist(vlist, ctnio->kern_sg_entries); 359 } else 360 mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len); 361 362 error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem, 363 ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio); 364 if (error == 0) 365 return; 366 367 nvmft_printf(nvmft_qpair_ctrlr(qp), 368 "Failed to request capsule data: %d\n", error); 369 ctl_nvme_set_data_transfer_error(ctnio); 370 371 if (ctnio->kern_sg_entries) { 372 free(ctnio->ext_data_ptr, M_NVMFT); 373 ctnio->ext_data_ptr = NULL; 374 } else 375 MPASS(ctnio->ext_data_ptr == NULL); 376 ctl_datamove_done((union ctl_io *)ctnio, true); 377 } 378 379 static struct mbuf * 380 nvmft_copy_data(struct ctl_nvmeio *ctnio) 381 { 382 struct ctl_sg_entry *sgl; 383 struct mbuf *m0, *m; 384 uint32_t resid, off, todo; 385 int mlen; 386 387 MPASS(ctnio->kern_data_len != 0); 388 389 m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0); 390 391 if (ctnio->kern_sg_entries == 0) { 392 m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr); 393 return (m0); 394 } 395 396 resid = ctnio->kern_data_len; 397 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 398 off = 0; 399 m = m0; 400 mlen = M_TRAILINGSPACE(m); 401 for (;;) { 402 todo = MIN(mlen, sgl->len - off); 403 memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off, 404 todo); 405 m->m_len += todo; 406 resid -= todo; 407 if (resid == 0) { 408 MPASS(m->m_next == NULL); 409 break; 410 } 411 412 off += todo; 413 if (off == sgl->len) { 414 sgl++; 415 off = 0; 416 } 417 mlen -= todo; 418 if (mlen == 0) { 419 m = m->m_next; 420 mlen = M_TRAILINGSPACE(m); 421 } 422 } 423 424 return (m0); 425 } 426 427 static void 428 m_free_ref_data(struct mbuf *m) 429 { 430 ctl_ref kern_data_ref = m->m_ext.ext_arg1; 431 432 kern_data_ref(m->m_ext.ext_arg2, -1); 433 } 434 435 static struct mbuf * 436 m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size) 437 { 438 struct mbuf *m; 439 440 m = m_get(M_WAITOK, MT_DATA); 441 m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref, 442 ctnio->kern_data_arg, M_RDONLY, EXT_CTL); 443 m->m_len = size; 444 ctnio->kern_data_ref(ctnio->kern_data_arg, 1); 445 return (m); 446 } 447 448 static struct mbuf * 449 nvmft_ref_data(struct ctl_nvmeio *ctnio) 450 { 451 struct ctl_sg_entry *sgl; 452 struct mbuf *m0, *m; 453 454 MPASS(ctnio->kern_data_len != 0); 455 456 if (ctnio->kern_sg_entries == 0) 457 return (m_get_ref_data(ctnio, ctnio->kern_data_ptr, 458 ctnio->kern_data_len)); 459 460 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr; 461 m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len); 462 m = m0; 463 for (u_int i = 1; i < ctnio->kern_sg_entries; i++) { 464 m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len); 465 m = m->m_next; 466 } 467 return (m0); 468 } 469 470 static void 471 nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp, 472 struct nvmf_capsule *nc) 473 { 474 struct mbuf *m; 475 u_int status; 476 477 if (ctnio->kern_data_ref != NULL) 478 m = nvmft_ref_data(ctnio); 479 else 480 m = nvmft_copy_data(ctnio); 481 status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m, 482 ctnio->kern_data_len); 483 switch (status) { 484 case NVMF_SUCCESS_SENT: 485 ctnio->success_sent = true; 486 nvmft_command_completed(qp, nc); 487 /* FALLTHROUGH */ 488 case NVMF_MORE: 489 case NVME_SC_SUCCESS: 490 break; 491 default: 492 ctl_nvme_set_generic_error(ctnio, status); 493 break; 494 } 495 ctl_datamove_done((union ctl_io *)ctnio, true); 496 } 497 498 void 499 nvmft_handle_datamove(union ctl_io *io) 500 { 501 struct nvmf_capsule *nc; 502 struct nvmft_qpair *qp; 503 504 /* Some CTL commands preemptively set a success status. */ 505 MPASS(io->io_hdr.status == CTL_STATUS_NONE || 506 io->io_hdr.status == CTL_SUCCESS); 507 MPASS(!io->nvmeio.success_sent); 508 509 nc = NVMFT_NC(io); 510 qp = NVMFT_QP(io); 511 512 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN) 513 nvmft_datamove_in(&io->nvmeio, qp, nc); 514 else 515 nvmft_datamove_out(&io->nvmeio, qp, nc); 516 } 517 518 void 519 nvmft_abort_datamove(union ctl_io *io) 520 { 521 io->io_hdr.port_status = 1; 522 io->io_hdr.flags |= CTL_FLAG_ABORT; 523 ctl_datamove_done(io, true); 524 } 525 526 static void 527 nvmft_datamove(union ctl_io *io) 528 { 529 struct nvmft_qpair *qp; 530 531 qp = NVMFT_QP(io); 532 nvmft_qpair_datamove(qp, io); 533 } 534 535 void 536 nvmft_enqueue_task(struct task *task) 537 { 538 taskqueue_enqueue(nvmft_taskq, task); 539 } 540 541 void 542 nvmft_drain_task(struct task *task) 543 { 544 taskqueue_drain(nvmft_taskq, task); 545 } 546 547 static void 548 hip_add(uint64_t pair[2], uint64_t addend) 549 { 550 uint64_t old, new; 551 552 old = le64toh(pair[0]); 553 new = old + addend; 554 pair[0] = htole64(new); 555 if (new < old) 556 pair[1] += htole64(1); 557 } 558 559 static void 560 nvmft_done(union ctl_io *io) 561 { 562 struct nvmft_controller *ctrlr; 563 const struct nvme_command *cmd; 564 struct nvmft_qpair *qp; 565 struct nvmf_capsule *nc; 566 size_t len; 567 568 KASSERT(io->io_hdr.status == CTL_SUCCESS || 569 io->io_hdr.status == CTL_NVME_ERROR, 570 ("%s: bad status %u", __func__, io->io_hdr.status)); 571 572 nc = NVMFT_NC(io); 573 qp = NVMFT_QP(io); 574 ctrlr = nvmft_qpair_ctrlr(qp); 575 576 if (nc == NULL) { 577 /* Completion of nvmft_terminate_commands. */ 578 goto end; 579 } 580 581 cmd = nvmf_capsule_sqe(nc); 582 583 if (io->io_hdr.status == CTL_SUCCESS) 584 len = nvmf_capsule_data_len(nc) / 512; 585 else 586 len = 0; 587 switch (cmd->opc) { 588 case NVME_OPC_WRITE: 589 mtx_lock(&ctrlr->lock); 590 hip_add(ctrlr->hip.host_write_commands, 1); 591 len += ctrlr->partial_duw; 592 if (len > 1000) 593 hip_add(ctrlr->hip.data_units_written, len / 1000); 594 ctrlr->partial_duw = len % 1000; 595 mtx_unlock(&ctrlr->lock); 596 break; 597 case NVME_OPC_READ: 598 case NVME_OPC_COMPARE: 599 case NVME_OPC_VERIFY: 600 mtx_lock(&ctrlr->lock); 601 if (cmd->opc != NVME_OPC_VERIFY) 602 hip_add(ctrlr->hip.host_read_commands, 1); 603 len += ctrlr->partial_dur; 604 if (len > 1000) 605 hip_add(ctrlr->hip.data_units_read, len / 1000); 606 ctrlr->partial_dur = len % 1000; 607 mtx_unlock(&ctrlr->lock); 608 break; 609 } 610 611 if (io->nvmeio.success_sent) { 612 MPASS(io->io_hdr.status == CTL_SUCCESS); 613 } else { 614 io->nvmeio.cpl.cid = cmd->cid; 615 nvmft_send_response(qp, &io->nvmeio.cpl); 616 } 617 nvmf_free_capsule(nc); 618 end: 619 ctl_free_io(io); 620 mtx_lock(&ctrlr->lock); 621 ctrlr->pending_commands--; 622 if (ctrlr->pending_commands == 0) 623 ctrlr->busy_total += sbinuptime() - ctrlr->start_busy; 624 mtx_unlock(&ctrlr->lock); 625 } 626 627 static int 628 nvmft_init(void) 629 { 630 int error; 631 632 nvmft_taskq = taskqueue_create("nvmft", M_WAITOK, 633 taskqueue_thread_enqueue, &nvmft_taskq); 634 error = taskqueue_start_threads_in_proc(&nvmft_taskq, mp_ncpus, PWAIT, 635 control_softc->ctl_proc, "nvmft"); 636 if (error != 0) { 637 taskqueue_free(nvmft_taskq); 638 return (error); 639 } 640 641 TAILQ_INIT(&nvmft_ports); 642 sx_init(&nvmft_ports_lock, "nvmft ports"); 643 return (0); 644 } 645 646 void 647 nvmft_port_free(struct nvmft_port *np) 648 { 649 KASSERT(TAILQ_EMPTY(&np->controllers), 650 ("%s(%p): active controllers", __func__, np)); 651 652 if (np->port.targ_port != -1) { 653 if (ctl_port_deregister(&np->port) != 0) 654 printf("%s: ctl_port_deregister() failed\n", __func__); 655 } 656 657 free(np->active_ns, M_NVMFT); 658 clean_unrhdr(np->ids); 659 delete_unrhdr(np->ids); 660 mtx_destroy(&np->lock); 661 free(np, M_NVMFT); 662 } 663 664 static struct nvmft_port * 665 nvmft_port_find(const char *subnqn) 666 { 667 struct nvmft_port *np; 668 669 KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__)); 670 671 sx_assert(&nvmft_ports_lock, SA_LOCKED); 672 TAILQ_FOREACH(np, &nvmft_ports, link) { 673 if (strcmp(np->cdata.subnqn, subnqn) == 0) 674 break; 675 } 676 return (np); 677 } 678 679 static struct nvmft_port * 680 nvmft_port_find_by_id(int port_id) 681 { 682 struct nvmft_port *np; 683 684 sx_assert(&nvmft_ports_lock, SA_LOCKED); 685 TAILQ_FOREACH(np, &nvmft_ports, link) { 686 if (np->port.targ_port == port_id) 687 break; 688 } 689 return (np); 690 } 691 692 /* 693 * Helper function to fetch a number stored as a string in an nv_list. 694 * Returns false if the string was not a valid number. 695 */ 696 static bool 697 dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value, 698 u_long *value) 699 { 700 const char *str; 701 char *cp; 702 703 str = dnvlist_get_string(nvl, name, NULL); 704 if (str == NULL) { 705 *value = default_value; 706 return (true); 707 } 708 if (*str == '\0') 709 return (false); 710 *value = strtoul(str, &cp, 0); 711 if (*cp != '\0') 712 return (false); 713 return (true); 714 } 715 716 /* 717 * NVMeoF ports support the following parameters: 718 * 719 * Mandatory: 720 * 721 * subnqn: subsystem NVMe Qualified Name 722 * portid: integer port ID from Discovery Log Page entry 723 * 724 * Optional: 725 * serial: Serial Number string 726 * max_io_qsize: Maximum number of I/O queue entries 727 * enable_timeout: Timeout for controller enable in milliseconds 728 * ioccsz: Maximum command capsule size 729 * iorcsz: Maximum response capsule size 730 * nn: Number of namespaces 731 */ 732 static void 733 nvmft_port_create(struct ctl_req *req) 734 { 735 struct nvmft_port *np; 736 struct ctl_port *port; 737 const char *serial, *subnqn; 738 char serial_buf[NVME_SERIAL_NUMBER_LENGTH]; 739 u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid; 740 int error; 741 742 /* Required parameters. */ 743 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL); 744 if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) { 745 req->status = CTL_LUN_ERROR; 746 snprintf(req->error_str, sizeof(req->error_str), 747 "Missing required argument"); 748 return; 749 } 750 if (!nvmf_nqn_valid(subnqn)) { 751 req->status = CTL_LUN_ERROR; 752 snprintf(req->error_str, sizeof(req->error_str), 753 "Invalid SubNQN"); 754 return; 755 } 756 if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) || 757 portid > UINT16_MAX) { 758 req->status = CTL_LUN_ERROR; 759 snprintf(req->error_str, sizeof(req->error_str), 760 "Invalid port ID"); 761 return; 762 } 763 764 /* Optional parameters. */ 765 if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize", 766 NVMF_MAX_IO_ENTRIES, &max_io_qsize) || 767 max_io_qsize < NVME_MIN_IO_ENTRIES || 768 max_io_qsize > NVME_MAX_IO_ENTRIES) { 769 req->status = CTL_LUN_ERROR; 770 snprintf(req->error_str, sizeof(req->error_str), 771 "Invalid maximum I/O queue size"); 772 return; 773 } 774 775 if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout", 776 NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) || 777 (enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) { 778 req->status = CTL_LUN_ERROR; 779 snprintf(req->error_str, sizeof(req->error_str), 780 "Invalid enable timeout"); 781 return; 782 } 783 784 if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ, 785 &ioccsz) || ioccsz < sizeof(struct nvme_command) || 786 (ioccsz % 16) != 0) { 787 req->status = CTL_LUN_ERROR; 788 snprintf(req->error_str, sizeof(req->error_str), 789 "Invalid Command Capsule size"); 790 return; 791 } 792 793 if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ, 794 &iorcsz) || iorcsz < sizeof(struct nvme_completion) || 795 (iorcsz % 16) != 0) { 796 req->status = CTL_LUN_ERROR; 797 snprintf(req->error_str, sizeof(req->error_str), 798 "Invalid Response Capsule size"); 799 return; 800 } 801 802 if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) || 803 nn < 1 || nn > UINT32_MAX) { 804 req->status = CTL_LUN_ERROR; 805 snprintf(req->error_str, sizeof(req->error_str), 806 "Invalid number of namespaces"); 807 return; 808 } 809 810 serial = dnvlist_get_string(req->args_nvl, "serial", NULL); 811 if (serial == NULL) { 812 getcredhostid(curthread->td_ucred, &hostid); 813 nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid); 814 serial = serial_buf; 815 } 816 817 sx_xlock(&nvmft_ports_lock); 818 819 np = nvmft_port_find(subnqn); 820 if (np != NULL) { 821 req->status = CTL_LUN_ERROR; 822 snprintf(req->error_str, sizeof(req->error_str), 823 "SubNQN \"%s\" already exists", subnqn); 824 sx_xunlock(&nvmft_ports_lock); 825 return; 826 } 827 828 np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO); 829 refcount_init(&np->refs, 1); 830 np->portid = portid; 831 np->max_io_qsize = max_io_qsize; 832 np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500); 833 mtx_init(&np->lock, "nvmft port", NULL, MTX_DEF); 834 np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1, 835 NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX); 836 TAILQ_INIT(&np->controllers); 837 838 /* The controller ID is set later for individual controllers. */ 839 _nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype, 840 osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata); 841 np->cdata.aerl = NVMFT_NUM_AER - 1; 842 np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE); 843 np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) | 844 NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) | 845 NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) | 846 NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1)); 847 np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1); 848 849 np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1); 850 memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr)); 851 852 port = &np->port; 853 854 port->frontend = &nvmft_frontend; 855 port->port_type = CTL_PORT_NVMF; 856 port->num_requested_ctl_io = max_io_qsize; 857 port->port_name = "nvmf"; 858 port->physical_port = portid; 859 port->virtual_port = 0; 860 port->port_online = nvmft_online; 861 port->port_offline = nvmft_offline; 862 port->port_info = nvmft_info; 863 port->onoff_arg = np; 864 port->lun_enable = nvmft_lun_enable; 865 port->lun_disable = nvmft_lun_disable; 866 port->targ_lun_arg = np; 867 port->fe_datamove = nvmft_datamove; 868 port->fe_done = nvmft_done; 869 port->targ_port = -1; 870 port->options = nvlist_clone(req->args_nvl); 871 872 error = ctl_port_register(port); 873 if (error != 0) { 874 sx_xunlock(&nvmft_ports_lock); 875 nvlist_destroy(port->options); 876 nvmft_port_rele(np); 877 req->status = CTL_LUN_ERROR; 878 snprintf(req->error_str, sizeof(req->error_str), 879 "Failed to register CTL port with error %d", error); 880 return; 881 } 882 883 TAILQ_INSERT_TAIL(&nvmft_ports, np, link); 884 sx_xunlock(&nvmft_ports_lock); 885 886 req->status = CTL_LUN_OK; 887 req->result_nvl = nvlist_create(0); 888 nvlist_add_number(req->result_nvl, "port_id", port->targ_port); 889 } 890 891 static void 892 nvmft_port_remove(struct ctl_req *req) 893 { 894 struct nvmft_port *np; 895 const char *subnqn; 896 u_long port_id; 897 898 /* 899 * ctladm port -r just provides the port_id, so permit looking 900 * up a port either by "subnqn" or "port_id". 901 */ 902 port_id = ULONG_MAX; 903 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL); 904 if (subnqn == NULL) { 905 if (!nvlist_exists_string(req->args_nvl, "port_id")) { 906 req->status = CTL_LUN_ERROR; 907 snprintf(req->error_str, sizeof(req->error_str), 908 "Missing required argument"); 909 return; 910 } 911 if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX, 912 &port_id)) { 913 req->status = CTL_LUN_ERROR; 914 snprintf(req->error_str, sizeof(req->error_str), 915 "Invalid CTL port ID"); 916 return; 917 } 918 } else { 919 if (nvlist_exists_string(req->args_nvl, "port_id")) { 920 req->status = CTL_LUN_ERROR; 921 snprintf(req->error_str, sizeof(req->error_str), 922 "Ambiguous port removal request"); 923 return; 924 } 925 } 926 927 sx_xlock(&nvmft_ports_lock); 928 929 if (subnqn != NULL) { 930 np = nvmft_port_find(subnqn); 931 if (np == NULL) { 932 req->status = CTL_LUN_ERROR; 933 snprintf(req->error_str, sizeof(req->error_str), 934 "SubNQN \"%s\" does not exist", subnqn); 935 sx_xunlock(&nvmft_ports_lock); 936 return; 937 } 938 } else { 939 np = nvmft_port_find_by_id(port_id); 940 if (np == NULL) { 941 req->status = CTL_LUN_ERROR; 942 snprintf(req->error_str, sizeof(req->error_str), 943 "CTL port %lu is not a NVMF port", port_id); 944 sx_xunlock(&nvmft_ports_lock); 945 return; 946 } 947 } 948 949 TAILQ_REMOVE(&nvmft_ports, np, link); 950 sx_xunlock(&nvmft_ports_lock); 951 952 mtx_lock(&np->lock); 953 if (np->online) { 954 mtx_unlock(&np->lock); 955 ctl_port_offline(&np->port); 956 } else 957 mtx_unlock(&np->lock); 958 959 nvmft_port_rele(np); 960 req->status = CTL_LUN_OK; 961 } 962 963 static void 964 nvmft_handoff(struct ctl_nvmf *cn) 965 { 966 const struct nvmf_fabric_connect_cmd *cmd; 967 const struct nvmf_fabric_connect_data *data; 968 const nvlist_t *params; 969 struct nvmft_port *np; 970 nvlist_t *nvl; 971 size_t len; 972 enum nvmf_trtype trtype; 973 int error; 974 975 np = NULL; 976 error = nvmf_unpack_ioc_nvlist(&cn->data.handoff, &nvl); 977 if (error != 0) { 978 cn->status = CTL_NVMF_ERROR; 979 snprintf(cn->error_str, sizeof(cn->error_str), 980 "Failed to copyin and unpack handoff arguments"); 981 return; 982 } 983 984 if (!nvlist_exists_number(nvl, "trtype") || 985 !nvlist_exists_nvlist(nvl, "params") || 986 !nvlist_exists_binary(nvl, "cmd") || 987 !nvlist_exists_binary(nvl, "data")) { 988 cn->status = CTL_NVMF_ERROR; 989 snprintf(cn->error_str, sizeof(cn->error_str), 990 "Handoff arguments missing required value"); 991 goto out; 992 } 993 994 params = nvlist_get_nvlist(nvl, "params"); 995 if (!nvmf_validate_qpair_nvlist(params, true)) { 996 cn->status = CTL_NVMF_ERROR; 997 snprintf(cn->error_str, sizeof(cn->error_str), 998 "Invalid queue pair parameters"); 999 goto out; 1000 } 1001 1002 cmd = nvlist_get_binary(nvl, "cmd", &len); 1003 if (len != sizeof(*cmd)) { 1004 cn->status = CTL_NVMF_ERROR; 1005 snprintf(cn->error_str, sizeof(cn->error_str), 1006 "Wrong size for CONNECT SQE"); 1007 goto out; 1008 } 1009 1010 data = nvlist_get_binary(nvl, "data", &len); 1011 if (len != sizeof(*data)) { 1012 cn->status = CTL_NVMF_ERROR; 1013 snprintf(cn->error_str, sizeof(cn->error_str), 1014 "Wrong size for CONNECT data"); 1015 goto out; 1016 } 1017 1018 if (!nvmf_nqn_valid(data->subnqn)) { 1019 cn->status = CTL_NVMF_ERROR; 1020 snprintf(cn->error_str, sizeof(cn->error_str), 1021 "Invalid SubNQN"); 1022 goto out; 1023 } 1024 1025 sx_slock(&nvmft_ports_lock); 1026 np = nvmft_port_find(data->subnqn); 1027 if (np == NULL) { 1028 sx_sunlock(&nvmft_ports_lock); 1029 cn->status = CTL_NVMF_ERROR; 1030 snprintf(cn->error_str, sizeof(cn->error_str), 1031 "Unknown SubNQN"); 1032 goto out; 1033 } 1034 if (!np->online) { 1035 sx_sunlock(&nvmft_ports_lock); 1036 cn->status = CTL_NVMF_ERROR; 1037 snprintf(cn->error_str, sizeof(cn->error_str), 1038 "CTL port offline"); 1039 np = NULL; 1040 goto out; 1041 } 1042 nvmft_port_ref(np); 1043 sx_sunlock(&nvmft_ports_lock); 1044 1045 trtype = nvlist_get_number(nvl, "trtype"); 1046 if (nvlist_get_bool(params, "admin")) { 1047 error = nvmft_handoff_admin_queue(np, trtype, params, cmd, 1048 data); 1049 if (error != 0) { 1050 cn->status = CTL_NVMF_ERROR; 1051 snprintf(cn->error_str, sizeof(cn->error_str), 1052 "Failed to handoff admin queue: %d", error); 1053 goto out; 1054 } 1055 } else { 1056 error = nvmft_handoff_io_queue(np, trtype, params, cmd, data); 1057 if (error != 0) { 1058 cn->status = CTL_NVMF_ERROR; 1059 snprintf(cn->error_str, sizeof(cn->error_str), 1060 "Failed to handoff I/O queue: %d", error); 1061 goto out; 1062 } 1063 } 1064 1065 cn->status = CTL_NVMF_OK; 1066 out: 1067 if (np != NULL) 1068 nvmft_port_rele(np); 1069 nvlist_destroy(nvl); 1070 } 1071 1072 static void 1073 nvmft_list(struct ctl_nvmf *cn) 1074 { 1075 struct ctl_nvmf_list_params *lp; 1076 struct nvmft_controller *ctrlr; 1077 struct nvmft_port *np; 1078 struct sbuf *sb; 1079 int error; 1080 1081 lp = &cn->data.list; 1082 1083 sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN | 1084 SBUF_INCLUDENUL); 1085 if (sb == NULL) { 1086 cn->status = CTL_NVMF_ERROR; 1087 snprintf(cn->error_str, sizeof(cn->error_str), 1088 "Failed to allocate NVMeoF session list"); 1089 return; 1090 } 1091 1092 sbuf_printf(sb, "<ctlnvmflist>\n"); 1093 sx_slock(&nvmft_ports_lock); 1094 TAILQ_FOREACH(np, &nvmft_ports, link) { 1095 mtx_lock(&np->lock); 1096 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 1097 sbuf_printf(sb, "<connection id=\"%d\">" 1098 "<hostnqn>%s</hostnqn>" 1099 "<subnqn>%s</subnqn>" 1100 "<trtype>%u</trtype>" 1101 "</connection>\n", 1102 ctrlr->cntlid, 1103 ctrlr->hostnqn, 1104 np->cdata.subnqn, 1105 ctrlr->trtype); 1106 } 1107 mtx_unlock(&np->lock); 1108 } 1109 sx_sunlock(&nvmft_ports_lock); 1110 sbuf_printf(sb, "</ctlnvmflist>\n"); 1111 if (sbuf_finish(sb) != 0) { 1112 sbuf_delete(sb); 1113 cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE; 1114 snprintf(cn->error_str, sizeof(cn->error_str), 1115 "Out of space, %d bytes is too small", lp->alloc_len); 1116 return; 1117 } 1118 1119 error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb)); 1120 if (error != 0) { 1121 sbuf_delete(sb); 1122 cn->status = CTL_NVMF_ERROR; 1123 snprintf(cn->error_str, sizeof(cn->error_str), 1124 "Failed to copyout session list: %d", error); 1125 return; 1126 } 1127 lp->fill_len = sbuf_len(sb); 1128 cn->status = CTL_NVMF_OK; 1129 sbuf_delete(sb); 1130 } 1131 1132 static void 1133 nvmft_terminate(struct ctl_nvmf *cn) 1134 { 1135 struct ctl_nvmf_terminate_params *tp; 1136 struct nvmft_controller *ctrlr; 1137 struct nvmft_port *np; 1138 bool found, match; 1139 1140 tp = &cn->data.terminate; 1141 1142 found = false; 1143 sx_slock(&nvmft_ports_lock); 1144 TAILQ_FOREACH(np, &nvmft_ports, link) { 1145 mtx_lock(&np->lock); 1146 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 1147 if (tp->all != 0) 1148 match = true; 1149 else if (tp->cntlid != -1) 1150 match = tp->cntlid == ctrlr->cntlid; 1151 else if (tp->hostnqn[0] != '\0') 1152 match = strncmp(tp->hostnqn, ctrlr->hostnqn, 1153 sizeof(tp->hostnqn)) == 0; 1154 else 1155 match = false; 1156 if (!match) 1157 continue; 1158 nvmft_printf(ctrlr, 1159 "disconnecting due to administrative request\n"); 1160 nvmft_controller_error(ctrlr, NULL, ECONNABORTED); 1161 found = true; 1162 } 1163 mtx_unlock(&np->lock); 1164 } 1165 sx_sunlock(&nvmft_ports_lock); 1166 1167 if (!found) { 1168 cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND; 1169 snprintf(cn->error_str, sizeof(cn->error_str), 1170 "No matching associations found"); 1171 return; 1172 } 1173 cn->status = CTL_NVMF_OK; 1174 } 1175 1176 static int 1177 nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag, 1178 struct thread *td) 1179 { 1180 struct ctl_nvmf *cn; 1181 struct ctl_req *req; 1182 1183 switch (cmd) { 1184 case CTL_PORT_REQ: 1185 req = (struct ctl_req *)data; 1186 switch (req->reqtype) { 1187 case CTL_REQ_CREATE: 1188 nvmft_port_create(req); 1189 break; 1190 case CTL_REQ_REMOVE: 1191 nvmft_port_remove(req); 1192 break; 1193 default: 1194 req->status = CTL_LUN_ERROR; 1195 snprintf(req->error_str, sizeof(req->error_str), 1196 "Unsupported request type %d", req->reqtype); 1197 break; 1198 } 1199 return (0); 1200 case CTL_NVMF: 1201 cn = (struct ctl_nvmf *)data; 1202 switch (cn->type) { 1203 case CTL_NVMF_HANDOFF: 1204 nvmft_handoff(cn); 1205 break; 1206 case CTL_NVMF_LIST: 1207 nvmft_list(cn); 1208 break; 1209 case CTL_NVMF_TERMINATE: 1210 nvmft_terminate(cn); 1211 break; 1212 default: 1213 cn->status = CTL_NVMF_ERROR; 1214 snprintf(cn->error_str, sizeof(cn->error_str), 1215 "Invalid NVMeoF request type %d", cn->type); 1216 break; 1217 } 1218 return (0); 1219 default: 1220 return (ENOTTY); 1221 } 1222 } 1223 1224 static int 1225 nvmft_shutdown(void) 1226 { 1227 /* TODO: Need to check for active controllers. */ 1228 if (!TAILQ_EMPTY(&nvmft_ports)) 1229 return (EBUSY); 1230 1231 taskqueue_free(nvmft_taskq); 1232 sx_destroy(&nvmft_ports_lock); 1233 return (0); 1234 } 1235 1236 CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend); 1237 MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1); 1238