1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/callout.h> 10 #include <sys/kernel.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/memdesc.h> 15 #include <sys/mutex.h> 16 #include <sys/sbuf.h> 17 #include <sys/sx.h> 18 #include <sys/taskqueue.h> 19 20 #include <dev/nvmf/nvmf_transport.h> 21 #include <dev/nvmf/controller/nvmft_subr.h> 22 #include <dev/nvmf/controller/nvmft_var.h> 23 24 static void nvmft_controller_shutdown(void *arg, int pending); 25 static void nvmft_controller_terminate(void *arg, int pending); 26 27 int 28 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...) 29 { 30 char buf[128]; 31 struct sbuf sb; 32 va_list ap; 33 size_t retval; 34 35 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 36 sbuf_set_drain(&sb, sbuf_printf_drain, &retval); 37 38 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid); 39 40 va_start(ap, fmt); 41 sbuf_vprintf(&sb, fmt, ap); 42 va_end(ap); 43 44 sbuf_finish(&sb); 45 sbuf_delete(&sb); 46 47 return (retval); 48 } 49 50 static struct nvmft_controller * 51 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid, 52 const struct nvmf_fabric_connect_data *data) 53 { 54 struct nvmft_controller *ctrlr; 55 56 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO); 57 ctrlr->cntlid = cntlid; 58 nvmft_port_ref(np); 59 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link); 60 ctrlr->np = np; 61 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF); 62 callout_init(&ctrlr->ka_timer, 1); 63 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr); 64 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0, 65 nvmft_controller_terminate, ctrlr); 66 67 ctrlr->cdata = np->cdata; 68 ctrlr->cdata.ctrlr_id = htole16(cntlid); 69 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)); 70 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)); 71 ctrlr->hip.power_cycles[0] = 1; 72 ctrlr->create_time = sbinuptime(); 73 74 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT, 75 M_WAITOK | M_ZERO); 76 77 return (ctrlr); 78 } 79 80 static void 81 nvmft_controller_free(struct nvmft_controller *ctrlr) 82 { 83 mtx_destroy(&ctrlr->lock); 84 MPASS(ctrlr->io_qpairs == NULL); 85 free(ctrlr->changed_ns, M_NVMFT); 86 free(ctrlr, M_NVMFT); 87 } 88 89 static void 90 nvmft_keep_alive_timer(void *arg) 91 { 92 struct nvmft_controller *ctrlr = arg; 93 int traffic; 94 95 if (ctrlr->shutdown) 96 return; 97 98 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic); 99 if (traffic == 0) { 100 nvmft_printf(ctrlr, 101 "disconnecting due to KeepAlive timeout\n"); 102 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT); 103 return; 104 } 105 106 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK); 107 } 108 109 int 110 nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype, 111 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, 112 const struct nvmf_fabric_connect_data *data) 113 { 114 struct nvmft_controller *ctrlr; 115 struct nvmft_qpair *qp; 116 uint32_t kato; 117 int cntlid; 118 119 if (cmd->qid != htole16(0)) 120 return (EINVAL); 121 122 qp = nvmft_qpair_init(trtype, params, 0, "admin queue"); 123 if (qp == NULL) { 124 printf("NVMFT: Failed to setup admin queue from %.*s\n", 125 (int)sizeof(data->hostnqn), data->hostnqn); 126 return (ENXIO); 127 } 128 129 sx_xlock(&np->lock); 130 cntlid = alloc_unr(np->ids); 131 if (cntlid == -1) { 132 sx_xunlock(&np->lock); 133 printf("NVMFT: Unable to allocate controller for %.*s\n", 134 (int)sizeof(data->hostnqn), data->hostnqn); 135 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC, 136 NVMF_FABRIC_SC_INVALID_HOST); 137 nvmft_qpair_destroy(qp); 138 return (ENOMEM); 139 } 140 141 #ifdef INVARIANTS 142 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 143 KASSERT(ctrlr->cntlid != cntlid, 144 ("%s: duplicate controllers with id %d", __func__, cntlid)); 145 } 146 #endif 147 148 ctrlr = nvmft_controller_alloc(np, cntlid, data); 149 nvmft_printf(ctrlr, "associated with %.*s\n", 150 (int)sizeof(data->hostnqn), data->hostnqn); 151 ctrlr->admin = qp; 152 ctrlr->trtype = trtype; 153 154 /* 155 * The spec requires a non-zero KeepAlive timer, but allow a 156 * zero KATO value to match Linux. 157 */ 158 kato = le32toh(cmd->kato); 159 if (kato != 0) { 160 /* 161 * Round up to 1 second matching granularity 162 * advertised in cdata. 163 */ 164 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000)); 165 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 166 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK); 167 } 168 169 nvmft_finish_accept(qp, cmd, ctrlr); 170 sx_xunlock(&np->lock); 171 172 return (0); 173 } 174 175 int 176 nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype, 177 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, 178 const struct nvmf_fabric_connect_data *data) 179 { 180 struct nvmft_controller *ctrlr; 181 struct nvmft_qpair *qp; 182 char name[16]; 183 uint16_t cntlid, qid; 184 185 qid = le16toh(cmd->qid); 186 if (qid == 0) 187 return (EINVAL); 188 cntlid = le16toh(data->cntlid); 189 190 snprintf(name, sizeof(name), "I/O queue %u", qid); 191 qp = nvmft_qpair_init(trtype, params, qid, name); 192 if (qp == NULL) { 193 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid, 194 (int)sizeof(data->hostnqn), data->hostnqn); 195 return (ENXIO); 196 } 197 198 sx_slock(&np->lock); 199 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 200 if (ctrlr->cntlid == cntlid) 201 break; 202 } 203 if (ctrlr == NULL) { 204 sx_sunlock(&np->lock); 205 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n", 206 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn), 207 data->hostnqn); 208 nvmft_connect_invalid_parameters(qp, cmd, true, 209 offsetof(struct nvmf_fabric_connect_data, cntlid)); 210 nvmft_qpair_destroy(qp); 211 return (ENOENT); 212 } 213 214 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) { 215 sx_sunlock(&np->lock); 216 nvmft_printf(ctrlr, 217 "hostid mismatch for I/O queue %u from %.*s\n", qid, 218 (int)sizeof(data->hostnqn), data->hostnqn); 219 nvmft_connect_invalid_parameters(qp, cmd, true, 220 offsetof(struct nvmf_fabric_connect_data, hostid)); 221 nvmft_qpair_destroy(qp); 222 return (EINVAL); 223 } 224 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) { 225 sx_sunlock(&np->lock); 226 nvmft_printf(ctrlr, 227 "hostnqn mismatch for I/O queue %u from %.*s\n", qid, 228 (int)sizeof(data->hostnqn), data->hostnqn); 229 nvmft_connect_invalid_parameters(qp, cmd, true, 230 offsetof(struct nvmf_fabric_connect_data, hostnqn)); 231 nvmft_qpair_destroy(qp); 232 return (EINVAL); 233 } 234 235 /* XXX: Require trtype == ctrlr->trtype? */ 236 237 mtx_lock(&ctrlr->lock); 238 if (ctrlr->shutdown) { 239 mtx_unlock(&ctrlr->lock); 240 sx_sunlock(&np->lock); 241 nvmft_printf(ctrlr, 242 "attempt to create I/O queue %u on disabled controller from %.*s\n", 243 qid, (int)sizeof(data->hostnqn), data->hostnqn); 244 nvmft_connect_invalid_parameters(qp, cmd, true, 245 offsetof(struct nvmf_fabric_connect_data, cntlid)); 246 nvmft_qpair_destroy(qp); 247 return (EINVAL); 248 } 249 if (ctrlr->num_io_queues == 0) { 250 mtx_unlock(&ctrlr->lock); 251 sx_sunlock(&np->lock); 252 nvmft_printf(ctrlr, 253 "attempt to create I/O queue %u without enabled queues from %.*s\n", 254 qid, (int)sizeof(data->hostnqn), data->hostnqn); 255 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 256 NVME_SC_COMMAND_SEQUENCE_ERROR); 257 nvmft_qpair_destroy(qp); 258 return (EINVAL); 259 } 260 if (cmd->qid > ctrlr->num_io_queues) { 261 mtx_unlock(&ctrlr->lock); 262 sx_sunlock(&np->lock); 263 nvmft_printf(ctrlr, 264 "attempt to create invalid I/O queue %u from %.*s\n", qid, 265 (int)sizeof(data->hostnqn), data->hostnqn); 266 nvmft_connect_invalid_parameters(qp, cmd, false, 267 offsetof(struct nvmf_fabric_connect_cmd, qid)); 268 nvmft_qpair_destroy(qp); 269 return (EINVAL); 270 } 271 if (ctrlr->io_qpairs[qid - 1].qp != NULL) { 272 mtx_unlock(&ctrlr->lock); 273 sx_sunlock(&np->lock); 274 nvmft_printf(ctrlr, 275 "attempt to re-create I/O queue %u from %.*s\n", qid, 276 (int)sizeof(data->hostnqn), data->hostnqn); 277 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 278 NVME_SC_COMMAND_SEQUENCE_ERROR); 279 nvmft_qpair_destroy(qp); 280 return (EINVAL); 281 } 282 283 ctrlr->io_qpairs[qid - 1].qp = qp; 284 mtx_unlock(&ctrlr->lock); 285 nvmft_finish_accept(qp, cmd, ctrlr); 286 sx_sunlock(&np->lock); 287 288 return (0); 289 } 290 291 static void 292 nvmft_controller_shutdown(void *arg, int pending) 293 { 294 struct nvmft_controller *ctrlr = arg; 295 296 MPASS(pending == 1); 297 298 /* 299 * Shutdown all I/O queues to terminate pending datamoves and 300 * stop receiving new commands. 301 */ 302 mtx_lock(&ctrlr->lock); 303 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 304 if (ctrlr->io_qpairs[i].qp != NULL) { 305 ctrlr->io_qpairs[i].shutdown = true; 306 mtx_unlock(&ctrlr->lock); 307 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp); 308 mtx_lock(&ctrlr->lock); 309 } 310 } 311 mtx_unlock(&ctrlr->lock); 312 313 /* Terminate active CTL commands. */ 314 nvmft_terminate_commands(ctrlr); 315 316 /* Wait for all pending CTL commands to complete. */ 317 mtx_lock(&ctrlr->lock); 318 while (ctrlr->pending_commands != 0) 319 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh", 320 hz / 100); 321 mtx_unlock(&ctrlr->lock); 322 323 /* Delete all of the I/O queues. */ 324 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 325 if (ctrlr->io_qpairs[i].qp != NULL) 326 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp); 327 } 328 free(ctrlr->io_qpairs, M_NVMFT); 329 ctrlr->io_qpairs = NULL; 330 331 mtx_lock(&ctrlr->lock); 332 ctrlr->num_io_queues = 0; 333 334 /* Mark shutdown complete. */ 335 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) { 336 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 337 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE); 338 } 339 340 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) { 341 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY); 342 ctrlr->shutdown = false; 343 } 344 mtx_unlock(&ctrlr->lock); 345 346 /* 347 * If the admin queue was closed while shutting down or a 348 * fatal controller error has occurred, terminate the 349 * association immediately, otherwise wait up to 2 minutes 350 * (NVMe-over-Fabrics 1.1 4.6). 351 */ 352 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0) 353 nvmft_controller_terminate(ctrlr, 0); 354 else 355 taskqueue_enqueue_timeout(taskqueue_thread, 356 &ctrlr->terminate_task, hz * 60 * 2); 357 } 358 359 static void 360 nvmft_controller_terminate(void *arg, int pending) 361 { 362 struct nvmft_controller *ctrlr = arg; 363 struct nvmft_port *np; 364 bool wakeup_np; 365 366 /* If the controller has been re-enabled, nothing to do. */ 367 mtx_lock(&ctrlr->lock); 368 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) { 369 mtx_unlock(&ctrlr->lock); 370 371 if (ctrlr->ka_sbt != 0) 372 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 373 C_HARDCLOCK); 374 return; 375 } 376 377 /* Disable updates to CC while destroying admin qpair. */ 378 ctrlr->shutdown = true; 379 mtx_unlock(&ctrlr->lock); 380 381 nvmft_qpair_destroy(ctrlr->admin); 382 383 /* Remove association (CNTLID). */ 384 np = ctrlr->np; 385 sx_xlock(&np->lock); 386 TAILQ_REMOVE(&np->controllers, ctrlr, link); 387 free_unr(np->ids, ctrlr->cntlid); 388 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers)); 389 sx_xunlock(&np->lock); 390 if (wakeup_np) 391 wakeup(np); 392 393 callout_drain(&ctrlr->ka_timer); 394 395 nvmft_printf(ctrlr, "association terminated\n"); 396 nvmft_controller_free(ctrlr); 397 nvmft_port_rele(np); 398 } 399 400 void 401 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp, 402 int error) 403 { 404 /* 405 * If a queue pair is closed, that isn't an error per se. 406 * That just means additional commands cannot be received on 407 * that queue pair. 408 * 409 * If the admin queue pair is closed while idle or while 410 * shutting down, terminate the association immediately. 411 * 412 * If an I/O queue pair is closed, just ignore it. 413 */ 414 if (error == 0) { 415 if (qp != ctrlr->admin) 416 return; 417 418 mtx_lock(&ctrlr->lock); 419 if (ctrlr->shutdown) { 420 ctrlr->admin_closed = true; 421 mtx_unlock(&ctrlr->lock); 422 return; 423 } 424 425 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) { 426 MPASS(ctrlr->num_io_queues == 0); 427 mtx_unlock(&ctrlr->lock); 428 429 /* 430 * Ok to drop lock here since ctrlr->cc can't 431 * change if the admin queue pair has closed. 432 * This also means no new queues can be handed 433 * off, etc. Note that since there are no I/O 434 * queues, only the admin queue needs to be 435 * destroyed, so it is safe to skip 436 * nvmft_controller_shutdown and just schedule 437 * nvmft_controller_terminate. Note that we 438 * cannot call nvmft_controller_terminate from 439 * here directly as this is called from the 440 * transport layer and freeing the admin qpair 441 * might deadlock waiting for the current 442 * thread to exit. 443 */ 444 if (taskqueue_cancel_timeout(taskqueue_thread, 445 &ctrlr->terminate_task, NULL) == 0) 446 taskqueue_enqueue_timeout(taskqueue_thread, 447 &ctrlr->terminate_task, 0); 448 return; 449 } 450 451 /* 452 * Treat closing of the admin queue pair while enabled 453 * as a transport error. Note that the admin queue 454 * pair has been closed. 455 */ 456 ctrlr->admin_closed = true; 457 } else 458 mtx_lock(&ctrlr->lock); 459 460 /* Ignore transport errors if we are already shutting down. */ 461 if (ctrlr->shutdown) { 462 mtx_unlock(&ctrlr->lock); 463 return; 464 } 465 466 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1); 467 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 468 ctrlr->shutdown = true; 469 mtx_unlock(&ctrlr->lock); 470 471 callout_stop(&ctrlr->ka_timer); 472 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 473 } 474 475 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */ 476 static struct mbuf * 477 m_getml(size_t len, int how) 478 { 479 struct mbuf *m, *n; 480 481 m = m_getm2(NULL, len, how, MT_DATA, 0); 482 if (m == NULL) 483 return (NULL); 484 for (n = m; len > 0; n = n->m_next) { 485 n->m_len = M_SIZE(n); 486 if (n->m_len >= len) { 487 n->m_len = len; 488 MPASS(n->m_next == NULL); 489 } 490 len -= n->m_len; 491 } 492 return (m); 493 } 494 495 static void 496 m_zero(struct mbuf *m, u_int offset, u_int len) 497 { 498 u_int todo; 499 500 if (len == 0) 501 return; 502 503 while (m->m_len <= offset) { 504 offset -= m->m_len; 505 m = m->m_next; 506 } 507 508 todo = m->m_len - offset; 509 if (todo > len) 510 todo = len; 511 memset(mtodo(m, offset), 0, todo); 512 m = m->m_next; 513 len -= todo; 514 515 while (len > 0) { 516 todo = m->m_len; 517 if (todo > len) 518 todo = len; 519 memset(mtod(m, void *), 0, todo); 520 m = m->m_next; 521 len -= todo; 522 } 523 } 524 525 static void 526 handle_get_log_page(struct nvmft_controller *ctrlr, 527 struct nvmf_capsule *nc, const struct nvme_command *cmd) 528 { 529 struct mbuf *m; 530 uint64_t offset; 531 uint32_t numd; 532 size_t len, todo; 533 u_int status; 534 uint8_t lid; 535 bool rae; 536 537 lid = le32toh(cmd->cdw10) & 0xff; 538 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0; 539 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 540 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 541 542 if (offset % 3 != 0) { 543 status = NVME_SC_INVALID_FIELD; 544 goto done; 545 } 546 547 len = (numd + 1) * 4; 548 549 switch (lid) { 550 case NVME_LOG_ERROR: 551 todo = 0; 552 553 m = m_getml(len, M_WAITOK); 554 if (todo != len) 555 m_zero(m, todo, len - todo); 556 status = nvmf_send_controller_data(nc, 0, m, len); 557 MPASS(status != NVMF_MORE); 558 break; 559 case NVME_LOG_HEALTH_INFORMATION: 560 { 561 struct nvme_health_information_page hip; 562 563 if (offset >= sizeof(hip)) { 564 status = NVME_SC_INVALID_FIELD; 565 goto done; 566 } 567 todo = sizeof(hip) - offset; 568 if (todo > len) 569 todo = len; 570 571 mtx_lock(&ctrlr->lock); 572 hip = ctrlr->hip; 573 hip.controller_busy_time[0] = 574 sbintime_getsec(ctrlr->busy_total) / 60; 575 hip.power_on_hours[0] = 576 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600; 577 mtx_unlock(&ctrlr->lock); 578 579 m = m_getml(len, M_WAITOK); 580 m_copyback(m, 0, todo, (char *)&hip + offset); 581 if (todo != len) 582 m_zero(m, todo, len - todo); 583 status = nvmf_send_controller_data(nc, 0, m, len); 584 MPASS(status != NVMF_MORE); 585 break; 586 } 587 case NVME_LOG_FIRMWARE_SLOT: 588 if (offset >= sizeof(ctrlr->np->fp)) { 589 status = NVME_SC_INVALID_FIELD; 590 goto done; 591 } 592 todo = sizeof(ctrlr->np->fp) - offset; 593 if (todo > len) 594 todo = len; 595 596 m = m_getml(len, M_WAITOK); 597 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset); 598 if (todo != len) 599 m_zero(m, todo, len - todo); 600 status = nvmf_send_controller_data(nc, 0, m, len); 601 MPASS(status != NVMF_MORE); 602 break; 603 case NVME_LOG_CHANGED_NAMESPACE: 604 if (offset >= sizeof(*ctrlr->changed_ns)) { 605 status = NVME_SC_INVALID_FIELD; 606 goto done; 607 } 608 todo = sizeof(*ctrlr->changed_ns) - offset; 609 if (todo > len) 610 todo = len; 611 612 m = m_getml(len, M_WAITOK); 613 mtx_lock(&ctrlr->lock); 614 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset); 615 if (offset == 0 && len == sizeof(*ctrlr->changed_ns)) 616 memset(ctrlr->changed_ns, 0, 617 sizeof(*ctrlr->changed_ns)); 618 if (!rae) 619 ctrlr->changed_ns_reported = false; 620 mtx_unlock(&ctrlr->lock); 621 if (todo != len) 622 m_zero(m, todo, len - todo); 623 status = nvmf_send_controller_data(nc, 0, m, len); 624 MPASS(status != NVMF_MORE); 625 break; 626 default: 627 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n", 628 lid); 629 status = NVME_SC_INVALID_FIELD; 630 break; 631 } 632 633 done: 634 if (status == NVMF_SUCCESS_SENT) 635 nvmft_command_completed(ctrlr->admin, nc); 636 else 637 nvmft_send_generic_error(ctrlr->admin, nc, status); 638 nvmf_free_capsule(nc); 639 } 640 641 static void 642 m_free_nslist(struct mbuf *m) 643 { 644 free(m->m_ext.ext_arg1, M_NVMFT); 645 } 646 647 static void 648 handle_identify_command(struct nvmft_controller *ctrlr, 649 struct nvmf_capsule *nc, const struct nvme_command *cmd) 650 { 651 struct mbuf *m; 652 size_t data_len; 653 u_int status; 654 uint8_t cns; 655 656 cns = le32toh(cmd->cdw10) & 0xFF; 657 data_len = nvmf_capsule_data_len(nc); 658 if (data_len != sizeof(ctrlr->cdata)) { 659 nvmft_printf(ctrlr, 660 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len, 661 cns); 662 nvmft_send_generic_error(ctrlr->admin, nc, 663 NVME_SC_INVALID_OPCODE); 664 nvmf_free_capsule(nc); 665 return; 666 } 667 668 switch (cns) { 669 case 0: /* Namespace data. */ 670 case 3: /* Namespace Identification Descriptor list. */ 671 nvmft_dispatch_command(ctrlr->admin, nc, true); 672 return; 673 case 1: 674 /* Controller data. */ 675 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK); 676 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata); 677 status = nvmf_send_controller_data(nc, 0, m, 678 sizeof(ctrlr->cdata)); 679 MPASS(status != NVMF_MORE); 680 break; 681 case 2: 682 { 683 /* Active namespace list. */ 684 struct nvme_ns_list *nslist; 685 uint32_t nsid; 686 687 nsid = le32toh(cmd->nsid); 688 if (nsid >= 0xfffffffe) { 689 status = NVME_SC_INVALID_FIELD; 690 break; 691 } 692 693 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO); 694 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist); 695 m = m_get(M_WAITOK, MT_DATA); 696 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist, 697 nslist, NULL, 0, EXT_CTL); 698 m->m_len = sizeof(*nslist); 699 status = nvmf_send_controller_data(nc, 0, m, m->m_len); 700 MPASS(status != NVMF_MORE); 701 break; 702 } 703 default: 704 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns); 705 status = NVME_SC_INVALID_FIELD; 706 break; 707 } 708 709 if (status == NVMF_SUCCESS_SENT) 710 nvmft_command_completed(ctrlr->admin, nc); 711 else 712 nvmft_send_generic_error(ctrlr->admin, nc, status); 713 nvmf_free_capsule(nc); 714 } 715 716 static void 717 handle_set_features(struct nvmft_controller *ctrlr, 718 struct nvmf_capsule *nc, const struct nvme_command *cmd) 719 { 720 struct nvme_completion cqe; 721 uint8_t fid; 722 723 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 724 switch (fid) { 725 case NVME_FEAT_NUMBER_OF_QUEUES: 726 { 727 uint32_t num_queues; 728 struct nvmft_io_qpair *io_qpairs; 729 730 num_queues = le32toh(cmd->cdw11) & 0xffff; 731 732 /* 5.12.1.7: 65535 is invalid. */ 733 if (num_queues == 65535) 734 goto error; 735 736 /* Fabrics requires the same number of SQs and CQs. */ 737 if (le32toh(cmd->cdw11) >> 16 != num_queues) 738 goto error; 739 740 /* Convert to 1's based */ 741 num_queues++; 742 743 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs), 744 M_NVMFT, M_WAITOK | M_ZERO); 745 746 mtx_lock(&ctrlr->lock); 747 if (ctrlr->num_io_queues != 0) { 748 mtx_unlock(&ctrlr->lock); 749 free(io_qpairs, M_NVMFT); 750 nvmft_send_generic_error(ctrlr->admin, nc, 751 NVME_SC_COMMAND_SEQUENCE_ERROR); 752 nvmf_free_capsule(nc); 753 return; 754 } 755 756 ctrlr->num_io_queues = num_queues; 757 ctrlr->io_qpairs = io_qpairs; 758 mtx_unlock(&ctrlr->lock); 759 760 nvmft_init_cqe(&cqe, nc, 0); 761 cqe.cdw0 = cmd->cdw11; 762 nvmft_send_response(ctrlr->admin, &cqe); 763 nvmf_free_capsule(nc); 764 return; 765 } 766 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 767 { 768 uint32_t aer_mask; 769 770 aer_mask = le32toh(cmd->cdw11); 771 772 /* Check for any reserved or unimplemented feature bits. */ 773 if ((aer_mask & 0xffffc000) != 0) 774 goto error; 775 776 mtx_lock(&ctrlr->lock); 777 ctrlr->aer_mask = aer_mask; 778 mtx_unlock(&ctrlr->lock); 779 nvmft_send_success(ctrlr->admin, nc); 780 return; 781 } 782 default: 783 nvmft_printf(ctrlr, 784 "Unsupported feature ID %u for SET_FEATURES\n", fid); 785 goto error; 786 } 787 788 error: 789 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 790 nvmf_free_capsule(nc); 791 } 792 793 static bool 794 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown) 795 { 796 struct nvmft_port *np = ctrlr->np; 797 uint32_t changes; 798 799 *need_shutdown = false; 800 801 mtx_lock(&ctrlr->lock); 802 803 /* Don't allow any changes while shutting down. */ 804 if (ctrlr->shutdown) { 805 mtx_unlock(&ctrlr->lock); 806 return (false); 807 } 808 809 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) { 810 mtx_unlock(&ctrlr->lock); 811 return (false); 812 } 813 814 changes = ctrlr->cc ^ new_cc; 815 ctrlr->cc = new_cc; 816 817 /* Handle shutdown requests. */ 818 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 && 819 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) { 820 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 821 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING); 822 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 823 ctrlr->shutdown = true; 824 *need_shutdown = true; 825 nvmft_printf(ctrlr, "shutdown requested\n"); 826 } 827 828 if (NVMEV(NVME_CC_REG_EN, changes) != 0) { 829 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) { 830 /* Controller reset. */ 831 nvmft_printf(ctrlr, "reset requested\n"); 832 ctrlr->shutdown = true; 833 *need_shutdown = true; 834 } else 835 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1); 836 } 837 mtx_unlock(&ctrlr->lock); 838 839 return (true); 840 } 841 842 static void 843 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 844 const struct nvmf_fabric_prop_get_cmd *pget) 845 { 846 struct nvmf_fabric_prop_get_rsp rsp; 847 848 nvmft_init_cqe(&rsp, nc, 0); 849 850 switch (le32toh(pget->ofst)) { 851 case NVMF_PROP_CAP: 852 if (pget->attrib.size != NVMF_PROP_SIZE_8) 853 goto error; 854 rsp.value.u64 = htole64(ctrlr->np->cap); 855 break; 856 case NVMF_PROP_VS: 857 if (pget->attrib.size != NVMF_PROP_SIZE_4) 858 goto error; 859 rsp.value.u32.low = ctrlr->cdata.ver; 860 break; 861 case NVMF_PROP_CC: 862 if (pget->attrib.size != NVMF_PROP_SIZE_4) 863 goto error; 864 rsp.value.u32.low = htole32(ctrlr->cc); 865 break; 866 case NVMF_PROP_CSTS: 867 if (pget->attrib.size != NVMF_PROP_SIZE_4) 868 goto error; 869 rsp.value.u32.low = htole32(ctrlr->csts); 870 break; 871 default: 872 goto error; 873 } 874 875 nvmft_send_response(ctrlr->admin, &rsp); 876 return; 877 error: 878 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 879 } 880 881 static void 882 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 883 const struct nvmf_fabric_prop_set_cmd *pset) 884 { 885 bool need_shutdown; 886 887 need_shutdown = false; 888 switch (le32toh(pset->ofst)) { 889 case NVMF_PROP_CC: 890 if (pset->attrib.size != NVMF_PROP_SIZE_4) 891 goto error; 892 if (!update_cc(ctrlr, le32toh(pset->value.u32.low), 893 &need_shutdown)) 894 goto error; 895 break; 896 default: 897 goto error; 898 } 899 900 nvmft_send_success(ctrlr->admin, nc); 901 if (need_shutdown) { 902 callout_stop(&ctrlr->ka_timer); 903 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 904 } 905 return; 906 error: 907 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 908 } 909 910 static void 911 handle_admin_fabrics_command(struct nvmft_controller *ctrlr, 912 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc) 913 { 914 switch (fc->fctype) { 915 case NVMF_FABRIC_COMMAND_PROPERTY_GET: 916 handle_property_get(ctrlr, nc, 917 (const struct nvmf_fabric_prop_get_cmd *)fc); 918 break; 919 case NVMF_FABRIC_COMMAND_PROPERTY_SET: 920 handle_property_set(ctrlr, nc, 921 (const struct nvmf_fabric_prop_set_cmd *)fc); 922 break; 923 case NVMF_FABRIC_COMMAND_CONNECT: 924 nvmft_printf(ctrlr, 925 "CONNECT command on connected admin queue\n"); 926 nvmft_send_generic_error(ctrlr->admin, nc, 927 NVME_SC_COMMAND_SEQUENCE_ERROR); 928 break; 929 case NVMF_FABRIC_COMMAND_DISCONNECT: 930 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n"); 931 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC, 932 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE); 933 break; 934 default: 935 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n", 936 fc->fctype); 937 nvmft_send_generic_error(ctrlr->admin, nc, 938 NVME_SC_INVALID_OPCODE); 939 break; 940 } 941 nvmf_free_capsule(nc); 942 } 943 944 void 945 nvmft_handle_admin_command(struct nvmft_controller *ctrlr, 946 struct nvmf_capsule *nc) 947 { 948 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 949 950 /* Only permit Fabrics commands while a controller is disabled. */ 951 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 && 952 cmd->opc != NVME_OPC_FABRICS_COMMANDS) { 953 nvmft_printf(ctrlr, 954 "Unsupported admin opcode %#x while disabled\n", cmd->opc); 955 nvmft_send_generic_error(ctrlr->admin, nc, 956 NVME_SC_COMMAND_SEQUENCE_ERROR); 957 nvmf_free_capsule(nc); 958 return; 959 } 960 961 atomic_store_int(&ctrlr->ka_active_traffic, 1); 962 963 switch (cmd->opc) { 964 case NVME_OPC_GET_LOG_PAGE: 965 handle_get_log_page(ctrlr, nc, cmd); 966 break; 967 case NVME_OPC_IDENTIFY: 968 handle_identify_command(ctrlr, nc, cmd); 969 break; 970 case NVME_OPC_SET_FEATURES: 971 handle_set_features(ctrlr, nc, cmd); 972 break; 973 case NVME_OPC_ASYNC_EVENT_REQUEST: 974 mtx_lock(&ctrlr->lock); 975 if (ctrlr->aer_pending == NVMFT_NUM_AER) { 976 mtx_unlock(&ctrlr->lock); 977 nvmft_send_error(ctrlr->admin, nc, 978 NVME_SCT_COMMAND_SPECIFIC, 979 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); 980 } else { 981 /* NB: Store the CID without byte-swapping. */ 982 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid; 983 ctrlr->aer_pending++; 984 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER; 985 mtx_unlock(&ctrlr->lock); 986 } 987 nvmf_free_capsule(nc); 988 break; 989 case NVME_OPC_KEEP_ALIVE: 990 nvmft_send_success(ctrlr->admin, nc); 991 nvmf_free_capsule(nc); 992 break; 993 case NVME_OPC_FABRICS_COMMANDS: 994 handle_admin_fabrics_command(ctrlr, nc, 995 (const struct nvmf_fabric_cmd *)cmd); 996 break; 997 default: 998 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc); 999 nvmft_send_generic_error(ctrlr->admin, nc, 1000 NVME_SC_INVALID_OPCODE); 1001 nvmf_free_capsule(nc); 1002 break; 1003 } 1004 } 1005 1006 void 1007 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid, 1008 struct nvmf_capsule *nc) 1009 { 1010 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 1011 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 1012 1013 atomic_store_int(&ctrlr->ka_active_traffic, 1); 1014 1015 switch (cmd->opc) { 1016 case NVME_OPC_FLUSH: 1017 if (cmd->nsid == htole32(0xffffffff)) { 1018 nvmft_send_generic_error(qp, nc, 1019 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 1020 nvmf_free_capsule(nc); 1021 break; 1022 } 1023 /* FALLTHROUGH */ 1024 case NVME_OPC_WRITE: 1025 case NVME_OPC_READ: 1026 case NVME_OPC_WRITE_UNCORRECTABLE: 1027 case NVME_OPC_COMPARE: 1028 case NVME_OPC_WRITE_ZEROES: 1029 case NVME_OPC_DATASET_MANAGEMENT: 1030 case NVME_OPC_VERIFY: 1031 nvmft_dispatch_command(qp, nc, false); 1032 break; 1033 default: 1034 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc); 1035 nvmft_send_generic_error(qp, nc, 1036 NVME_SC_INVALID_OPCODE); 1037 nvmf_free_capsule(nc); 1038 break; 1039 } 1040 } 1041 1042 static void 1043 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask, 1044 u_int type, uint8_t info, uint8_t log_page_id) 1045 { 1046 struct nvme_completion cpl; 1047 1048 MPASS(type <= 7); 1049 1050 /* Drop events that are not enabled. */ 1051 mtx_lock(&ctrlr->lock); 1052 if ((ctrlr->aer_mask & aer_mask) == 0) { 1053 mtx_unlock(&ctrlr->lock); 1054 return; 1055 } 1056 1057 /* 1058 * If there is no pending AER command, drop it. 1059 * XXX: Should we queue these? 1060 */ 1061 if (ctrlr->aer_pending == 0) { 1062 mtx_unlock(&ctrlr->lock); 1063 nvmft_printf(ctrlr, 1064 "dropping AER type %u, info %#x, page %#x\n", 1065 type, info, log_page_id); 1066 return; 1067 } 1068 1069 memset(&cpl, 0, sizeof(cpl)); 1070 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx]; 1071 ctrlr->aer_pending--; 1072 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER; 1073 mtx_unlock(&ctrlr->lock); 1074 1075 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) | 1076 NVMEF(NVME_ASYNC_EVENT_INFO, info) | 1077 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id)); 1078 1079 nvmft_send_response(ctrlr->admin, &cpl); 1080 } 1081 1082 void 1083 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id) 1084 { 1085 struct nvme_ns_list *nslist; 1086 uint32_t new_nsid, nsid; 1087 u_int i; 1088 1089 new_nsid = lun_id + 1; 1090 1091 mtx_lock(&ctrlr->lock); 1092 nslist = ctrlr->changed_ns; 1093 1094 /* If the first entry is 0xffffffff, the list is already full. */ 1095 if (nslist->ns[0] != 0xffffffff) { 1096 /* Find the insertion point for this namespace ID. */ 1097 for (i = 0; i < nitems(nslist->ns); i++) { 1098 nsid = le32toh(nslist->ns[i]); 1099 if (nsid == new_nsid) { 1100 /* Already reported, nothing to do. */ 1101 mtx_unlock(&ctrlr->lock); 1102 return; 1103 } 1104 1105 if (nsid == 0 || nsid > new_nsid) 1106 break; 1107 } 1108 1109 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) { 1110 /* List is full. */ 1111 memset(ctrlr->changed_ns, 0, 1112 sizeof(*ctrlr->changed_ns)); 1113 ctrlr->changed_ns->ns[0] = 0xffffffff; 1114 } else if (nslist->ns[i] == htole32(0)) { 1115 /* 1116 * Optimize case where this ID is appended to 1117 * the end. 1118 */ 1119 nslist->ns[i] = htole32(new_nsid); 1120 } else { 1121 memmove(&nslist->ns[i + 1], &nslist->ns[i], 1122 (nitems(nslist->ns) - i - 1) * 1123 sizeof(nslist->ns[0])); 1124 nslist->ns[i] = htole32(new_nsid); 1125 } 1126 } 1127 1128 if (ctrlr->changed_ns_reported) { 1129 mtx_unlock(&ctrlr->lock); 1130 return; 1131 } 1132 ctrlr->changed_ns_reported = true; 1133 mtx_unlock(&ctrlr->lock); 1134 1135 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0, 1136 NVME_LOG_CHANGED_NAMESPACE); 1137 } 1138