1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/callout.h> 10 #include <sys/kernel.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/memdesc.h> 15 #include <sys/mutex.h> 16 #include <sys/sbuf.h> 17 #include <sys/sx.h> 18 #include <sys/taskqueue.h> 19 20 #include <dev/nvmf/nvmf_transport.h> 21 #include <dev/nvmf/controller/nvmft_subr.h> 22 #include <dev/nvmf/controller/nvmft_var.h> 23 24 static void nvmft_controller_shutdown(void *arg, int pending); 25 static void nvmft_controller_terminate(void *arg, int pending); 26 27 int 28 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...) 29 { 30 char buf[128]; 31 struct sbuf sb; 32 va_list ap; 33 size_t retval; 34 35 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 36 sbuf_set_drain(&sb, sbuf_printf_drain, &retval); 37 38 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid); 39 40 va_start(ap, fmt); 41 sbuf_vprintf(&sb, fmt, ap); 42 va_end(ap); 43 44 sbuf_finish(&sb); 45 sbuf_delete(&sb); 46 47 return (retval); 48 } 49 50 static struct nvmft_controller * 51 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid, 52 const struct nvmf_fabric_connect_data *data) 53 { 54 struct nvmft_controller *ctrlr; 55 56 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO); 57 ctrlr->cntlid = cntlid; 58 nvmft_port_ref(np); 59 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link); 60 ctrlr->np = np; 61 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF); 62 callout_init(&ctrlr->ka_timer, 1); 63 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr); 64 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0, 65 nvmft_controller_terminate, ctrlr); 66 67 ctrlr->cdata = np->cdata; 68 ctrlr->cdata.ctrlr_id = htole16(cntlid); 69 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)); 70 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)); 71 ctrlr->hip.power_cycles[0] = 1; 72 ctrlr->create_time = sbinuptime(); 73 74 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT, 75 M_WAITOK | M_ZERO); 76 77 return (ctrlr); 78 } 79 80 static void 81 nvmft_controller_free(struct nvmft_controller *ctrlr) 82 { 83 mtx_destroy(&ctrlr->lock); 84 MPASS(ctrlr->io_qpairs == NULL); 85 free(ctrlr->changed_ns, M_NVMFT); 86 free(ctrlr, M_NVMFT); 87 } 88 89 static void 90 nvmft_keep_alive_timer(void *arg) 91 { 92 struct nvmft_controller *ctrlr = arg; 93 int traffic; 94 95 if (ctrlr->shutdown) 96 return; 97 98 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic); 99 if (traffic == 0) { 100 nvmft_printf(ctrlr, 101 "disconnecting due to KeepAlive timeout\n"); 102 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT); 103 return; 104 } 105 106 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK); 107 } 108 109 int 110 nvmft_handoff_admin_queue(struct nvmft_port *np, 111 const struct nvmf_handoff_controller_qpair *handoff, 112 const struct nvmf_fabric_connect_cmd *cmd, 113 const struct nvmf_fabric_connect_data *data) 114 { 115 struct nvmft_controller *ctrlr; 116 struct nvmft_qpair *qp; 117 uint32_t kato; 118 int cntlid; 119 120 if (cmd->qid != htole16(0)) 121 return (EINVAL); 122 123 qp = nvmft_qpair_init(handoff->trtype, &handoff->params, 0, 124 "admin queue"); 125 126 sx_xlock(&np->lock); 127 cntlid = alloc_unr(np->ids); 128 if (cntlid == -1) { 129 sx_xunlock(&np->lock); 130 printf("NVMFT: Unable to allocate controller for %.*s\n", 131 (int)sizeof(data->hostnqn), data->hostnqn); 132 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC, 133 NVMF_FABRIC_SC_INVALID_HOST); 134 nvmft_qpair_destroy(qp); 135 return (ENOMEM); 136 } 137 138 #ifdef INVARIANTS 139 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 140 KASSERT(ctrlr->cntlid != cntlid, 141 ("%s: duplicate controllers with id %d", __func__, cntlid)); 142 } 143 #endif 144 145 ctrlr = nvmft_controller_alloc(np, cntlid, data); 146 nvmft_printf(ctrlr, "associated with %.*s\n", 147 (int)sizeof(data->hostnqn), data->hostnqn); 148 ctrlr->admin = qp; 149 ctrlr->trtype = handoff->trtype; 150 151 /* 152 * The spec requires a non-zero KeepAlive timer, but allow a 153 * zero KATO value to match Linux. 154 */ 155 kato = le32toh(cmd->kato); 156 if (kato != 0) { 157 /* 158 * Round up to 1 second matching granularity 159 * advertised in cdata. 160 */ 161 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000)); 162 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 163 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK); 164 } 165 166 nvmft_finish_accept(qp, cmd, ctrlr); 167 sx_xunlock(&np->lock); 168 169 return (0); 170 } 171 172 int 173 nvmft_handoff_io_queue(struct nvmft_port *np, 174 const struct nvmf_handoff_controller_qpair *handoff, 175 const struct nvmf_fabric_connect_cmd *cmd, 176 const struct nvmf_fabric_connect_data *data) 177 { 178 struct nvmft_controller *ctrlr; 179 struct nvmft_qpair *qp; 180 char name[16]; 181 uint16_t cntlid, qid; 182 183 qid = le16toh(cmd->qid); 184 if (qid == 0) 185 return (EINVAL); 186 cntlid = le16toh(data->cntlid); 187 188 snprintf(name, sizeof(name), "I/O queue %u", qid); 189 qp = nvmft_qpair_init(handoff->trtype, &handoff->params, qid, name); 190 191 sx_slock(&np->lock); 192 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 193 if (ctrlr->cntlid == cntlid) 194 break; 195 } 196 if (ctrlr == NULL) { 197 sx_sunlock(&np->lock); 198 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n", 199 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn), 200 data->hostnqn); 201 nvmft_connect_invalid_parameters(qp, cmd, true, 202 offsetof(struct nvmf_fabric_connect_data, cntlid)); 203 nvmft_qpair_destroy(qp); 204 return (ENOENT); 205 } 206 207 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) { 208 sx_sunlock(&np->lock); 209 nvmft_printf(ctrlr, 210 "hostid mismatch for I/O queue %u from %.*s\n", qid, 211 (int)sizeof(data->hostnqn), data->hostnqn); 212 nvmft_connect_invalid_parameters(qp, cmd, true, 213 offsetof(struct nvmf_fabric_connect_data, hostid)); 214 nvmft_qpair_destroy(qp); 215 return (EINVAL); 216 } 217 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) { 218 sx_sunlock(&np->lock); 219 nvmft_printf(ctrlr, 220 "hostnqn mismatch for I/O queue %u from %.*s\n", qid, 221 (int)sizeof(data->hostnqn), data->hostnqn); 222 nvmft_connect_invalid_parameters(qp, cmd, true, 223 offsetof(struct nvmf_fabric_connect_data, hostnqn)); 224 nvmft_qpair_destroy(qp); 225 return (EINVAL); 226 } 227 228 /* XXX: Require handoff->trtype == ctrlr->trtype? */ 229 230 mtx_lock(&ctrlr->lock); 231 if (ctrlr->shutdown) { 232 mtx_unlock(&ctrlr->lock); 233 sx_sunlock(&np->lock); 234 nvmft_printf(ctrlr, 235 "attempt to create I/O queue %u on disabled controller from %.*s\n", 236 qid, (int)sizeof(data->hostnqn), data->hostnqn); 237 nvmft_connect_invalid_parameters(qp, cmd, true, 238 offsetof(struct nvmf_fabric_connect_data, cntlid)); 239 nvmft_qpair_destroy(qp); 240 return (EINVAL); 241 } 242 if (ctrlr->num_io_queues == 0) { 243 mtx_unlock(&ctrlr->lock); 244 sx_sunlock(&np->lock); 245 nvmft_printf(ctrlr, 246 "attempt to create I/O queue %u without enabled queues from %.*s\n", 247 qid, (int)sizeof(data->hostnqn), data->hostnqn); 248 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 249 NVME_SC_COMMAND_SEQUENCE_ERROR); 250 nvmft_qpair_destroy(qp); 251 return (EINVAL); 252 } 253 if (cmd->qid > ctrlr->num_io_queues) { 254 mtx_unlock(&ctrlr->lock); 255 sx_sunlock(&np->lock); 256 nvmft_printf(ctrlr, 257 "attempt to create invalid I/O queue %u from %.*s\n", qid, 258 (int)sizeof(data->hostnqn), data->hostnqn); 259 nvmft_connect_invalid_parameters(qp, cmd, false, 260 offsetof(struct nvmf_fabric_connect_cmd, qid)); 261 nvmft_qpair_destroy(qp); 262 return (EINVAL); 263 } 264 if (ctrlr->io_qpairs[qid - 1].qp != NULL) { 265 mtx_unlock(&ctrlr->lock); 266 sx_sunlock(&np->lock); 267 nvmft_printf(ctrlr, 268 "attempt to re-create I/O queue %u from %.*s\n", qid, 269 (int)sizeof(data->hostnqn), data->hostnqn); 270 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 271 NVME_SC_COMMAND_SEQUENCE_ERROR); 272 nvmft_qpair_destroy(qp); 273 return (EINVAL); 274 } 275 276 ctrlr->io_qpairs[qid - 1].qp = qp; 277 mtx_unlock(&ctrlr->lock); 278 nvmft_finish_accept(qp, cmd, ctrlr); 279 sx_sunlock(&np->lock); 280 281 return (0); 282 } 283 284 static void 285 nvmft_controller_shutdown(void *arg, int pending) 286 { 287 struct nvmft_controller *ctrlr = arg; 288 289 MPASS(pending == 1); 290 291 /* 292 * Shutdown all I/O queues to terminate pending datamoves and 293 * stop receiving new commands. 294 */ 295 mtx_lock(&ctrlr->lock); 296 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 297 if (ctrlr->io_qpairs[i].qp != NULL) { 298 ctrlr->io_qpairs[i].shutdown = true; 299 mtx_unlock(&ctrlr->lock); 300 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp); 301 mtx_lock(&ctrlr->lock); 302 } 303 } 304 mtx_unlock(&ctrlr->lock); 305 306 /* Terminate active CTL commands. */ 307 nvmft_terminate_commands(ctrlr); 308 309 /* Wait for all pending CTL commands to complete. */ 310 mtx_lock(&ctrlr->lock); 311 while (ctrlr->pending_commands != 0) 312 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh", 313 hz / 100); 314 mtx_unlock(&ctrlr->lock); 315 316 /* Delete all of the I/O queues. */ 317 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 318 if (ctrlr->io_qpairs[i].qp != NULL) 319 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp); 320 } 321 free(ctrlr->io_qpairs, M_NVMFT); 322 ctrlr->io_qpairs = NULL; 323 324 mtx_lock(&ctrlr->lock); 325 ctrlr->num_io_queues = 0; 326 327 /* Mark shutdown complete. */ 328 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) { 329 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 330 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE); 331 } 332 333 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) { 334 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY); 335 ctrlr->shutdown = false; 336 } 337 mtx_unlock(&ctrlr->lock); 338 339 /* 340 * If the admin queue was closed while shutting down or a 341 * fatal controller error has occurred, terminate the 342 * association immediately, otherwise wait up to 2 minutes 343 * (NVMe-over-Fabrics 1.1 4.6). 344 */ 345 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0) 346 nvmft_controller_terminate(ctrlr, 0); 347 else 348 taskqueue_enqueue_timeout(taskqueue_thread, 349 &ctrlr->terminate_task, hz * 60 * 2); 350 } 351 352 static void 353 nvmft_controller_terminate(void *arg, int pending) 354 { 355 struct nvmft_controller *ctrlr = arg; 356 struct nvmft_port *np; 357 bool wakeup_np; 358 359 /* If the controller has been re-enabled, nothing to do. */ 360 mtx_lock(&ctrlr->lock); 361 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) { 362 mtx_unlock(&ctrlr->lock); 363 364 if (ctrlr->ka_sbt != 0) 365 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 366 C_HARDCLOCK); 367 return; 368 } 369 370 /* Disable updates to CC while destroying admin qpair. */ 371 ctrlr->shutdown = true; 372 mtx_unlock(&ctrlr->lock); 373 374 nvmft_qpair_destroy(ctrlr->admin); 375 376 /* Remove association (CNTLID). */ 377 np = ctrlr->np; 378 sx_xlock(&np->lock); 379 TAILQ_REMOVE(&np->controllers, ctrlr, link); 380 free_unr(np->ids, ctrlr->cntlid); 381 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers)); 382 sx_xunlock(&np->lock); 383 if (wakeup_np) 384 wakeup(np); 385 386 callout_drain(&ctrlr->ka_timer); 387 388 nvmft_printf(ctrlr, "association terminated\n"); 389 nvmft_controller_free(ctrlr); 390 nvmft_port_rele(np); 391 } 392 393 void 394 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp, 395 int error) 396 { 397 /* 398 * If a queue pair is closed, that isn't an error per se. 399 * That just means additional commands cannot be received on 400 * that queue pair. 401 * 402 * If the admin queue pair is closed while idle or while 403 * shutting down, terminate the association immediately. 404 * 405 * If an I/O queue pair is closed, just ignore it. 406 */ 407 if (error == 0) { 408 if (qp != ctrlr->admin) 409 return; 410 411 mtx_lock(&ctrlr->lock); 412 if (ctrlr->shutdown) { 413 ctrlr->admin_closed = true; 414 mtx_unlock(&ctrlr->lock); 415 return; 416 } 417 418 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) { 419 MPASS(ctrlr->num_io_queues == 0); 420 mtx_unlock(&ctrlr->lock); 421 422 /* 423 * Ok to drop lock here since ctrlr->cc can't 424 * change if the admin queue pair has closed. 425 * This also means no new queues can be handed 426 * off, etc. Note that since there are no I/O 427 * queues, only the admin queue needs to be 428 * destroyed, so it is safe to skip 429 * nvmft_controller_shutdown and just schedule 430 * nvmft_controller_terminate. Note that we 431 * cannot call nvmft_controller_terminate from 432 * here directly as this is called from the 433 * transport layer and freeing the admin qpair 434 * might deadlock waiting for the current 435 * thread to exit. 436 */ 437 if (taskqueue_cancel_timeout(taskqueue_thread, 438 &ctrlr->terminate_task, NULL) == 0) 439 taskqueue_enqueue_timeout(taskqueue_thread, 440 &ctrlr->terminate_task, 0); 441 return; 442 } 443 444 /* 445 * Treat closing of the admin queue pair while enabled 446 * as a transport error. Note that the admin queue 447 * pair has been closed. 448 */ 449 ctrlr->admin_closed = true; 450 } else 451 mtx_lock(&ctrlr->lock); 452 453 /* Ignore transport errors if we are already shutting down. */ 454 if (ctrlr->shutdown) { 455 mtx_unlock(&ctrlr->lock); 456 return; 457 } 458 459 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1); 460 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 461 ctrlr->shutdown = true; 462 mtx_unlock(&ctrlr->lock); 463 464 callout_stop(&ctrlr->ka_timer); 465 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 466 } 467 468 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */ 469 static struct mbuf * 470 m_getml(size_t len, int how) 471 { 472 struct mbuf *m, *n; 473 474 m = m_getm2(NULL, len, how, MT_DATA, 0); 475 if (m == NULL) 476 return (NULL); 477 for (n = m; len > 0; n = n->m_next) { 478 n->m_len = M_SIZE(n); 479 if (n->m_len >= len) { 480 n->m_len = len; 481 MPASS(n->m_next == NULL); 482 } 483 len -= n->m_len; 484 } 485 return (m); 486 } 487 488 static void 489 m_zero(struct mbuf *m, u_int offset, u_int len) 490 { 491 u_int todo; 492 493 if (len == 0) 494 return; 495 496 while (m->m_len <= offset) { 497 offset -= m->m_len; 498 m = m->m_next; 499 } 500 501 todo = m->m_len - offset; 502 if (todo > len) 503 todo = len; 504 memset(mtodo(m, offset), 0, todo); 505 m = m->m_next; 506 len -= todo; 507 508 while (len > 0) { 509 todo = m->m_len; 510 if (todo > len) 511 todo = len; 512 memset(mtod(m, void *), 0, todo); 513 m = m->m_next; 514 len -= todo; 515 } 516 } 517 518 static void 519 handle_get_log_page(struct nvmft_controller *ctrlr, 520 struct nvmf_capsule *nc, const struct nvme_command *cmd) 521 { 522 struct mbuf *m; 523 uint64_t offset; 524 uint32_t numd; 525 size_t len, todo; 526 u_int status; 527 uint8_t lid; 528 bool rae; 529 530 lid = le32toh(cmd->cdw10) & 0xff; 531 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0; 532 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 533 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 534 535 if (offset % 3 != 0) { 536 status = NVME_SC_INVALID_FIELD; 537 goto done; 538 } 539 540 len = (numd + 1) * 4; 541 542 switch (lid) { 543 case NVME_LOG_ERROR: 544 todo = 0; 545 546 m = m_getml(len, M_WAITOK); 547 if (todo != len) 548 m_zero(m, todo, len - todo); 549 status = nvmf_send_controller_data(nc, 0, m, len); 550 MPASS(status != NVMF_MORE); 551 break; 552 case NVME_LOG_HEALTH_INFORMATION: 553 { 554 struct nvme_health_information_page hip; 555 556 if (offset >= sizeof(hip)) { 557 status = NVME_SC_INVALID_FIELD; 558 goto done; 559 } 560 todo = sizeof(hip) - offset; 561 if (todo > len) 562 todo = len; 563 564 mtx_lock(&ctrlr->lock); 565 hip = ctrlr->hip; 566 hip.controller_busy_time[0] = 567 sbintime_getsec(ctrlr->busy_total) / 60; 568 hip.power_on_hours[0] = 569 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600; 570 mtx_unlock(&ctrlr->lock); 571 572 m = m_getml(len, M_WAITOK); 573 m_copyback(m, 0, todo, (char *)&hip + offset); 574 if (todo != len) 575 m_zero(m, todo, len - todo); 576 status = nvmf_send_controller_data(nc, 0, m, len); 577 MPASS(status != NVMF_MORE); 578 break; 579 } 580 case NVME_LOG_FIRMWARE_SLOT: 581 if (offset >= sizeof(ctrlr->np->fp)) { 582 status = NVME_SC_INVALID_FIELD; 583 goto done; 584 } 585 todo = sizeof(ctrlr->np->fp) - offset; 586 if (todo > len) 587 todo = len; 588 589 m = m_getml(len, M_WAITOK); 590 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset); 591 if (todo != len) 592 m_zero(m, todo, len - todo); 593 status = nvmf_send_controller_data(nc, 0, m, len); 594 MPASS(status != NVMF_MORE); 595 break; 596 case NVME_LOG_CHANGED_NAMESPACE: 597 if (offset >= sizeof(*ctrlr->changed_ns)) { 598 status = NVME_SC_INVALID_FIELD; 599 goto done; 600 } 601 todo = sizeof(*ctrlr->changed_ns) - offset; 602 if (todo > len) 603 todo = len; 604 605 m = m_getml(len, M_WAITOK); 606 mtx_lock(&ctrlr->lock); 607 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset); 608 if (offset == 0 && len == sizeof(*ctrlr->changed_ns)) 609 memset(ctrlr->changed_ns, 0, 610 sizeof(*ctrlr->changed_ns)); 611 if (!rae) 612 ctrlr->changed_ns_reported = false; 613 mtx_unlock(&ctrlr->lock); 614 if (todo != len) 615 m_zero(m, todo, len - todo); 616 status = nvmf_send_controller_data(nc, 0, m, len); 617 MPASS(status != NVMF_MORE); 618 break; 619 default: 620 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n", 621 lid); 622 status = NVME_SC_INVALID_FIELD; 623 break; 624 } 625 626 done: 627 if (status == NVMF_SUCCESS_SENT) 628 nvmft_command_completed(ctrlr->admin, nc); 629 else 630 nvmft_send_generic_error(ctrlr->admin, nc, status); 631 nvmf_free_capsule(nc); 632 } 633 634 static void 635 m_free_nslist(struct mbuf *m) 636 { 637 free(m->m_ext.ext_arg1, M_NVMFT); 638 } 639 640 static void 641 handle_identify_command(struct nvmft_controller *ctrlr, 642 struct nvmf_capsule *nc, const struct nvme_command *cmd) 643 { 644 struct mbuf *m; 645 size_t data_len; 646 u_int status; 647 uint8_t cns; 648 649 cns = le32toh(cmd->cdw10) & 0xFF; 650 data_len = nvmf_capsule_data_len(nc); 651 if (data_len != sizeof(ctrlr->cdata)) { 652 nvmft_printf(ctrlr, 653 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len, 654 cns); 655 nvmft_send_generic_error(ctrlr->admin, nc, 656 NVME_SC_INVALID_OPCODE); 657 nvmf_free_capsule(nc); 658 return; 659 } 660 661 switch (cns) { 662 case 0: /* Namespace data. */ 663 case 3: /* Namespace Identification Descriptor list. */ 664 nvmft_dispatch_command(ctrlr->admin, nc, true); 665 return; 666 case 1: 667 /* Controller data. */ 668 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK); 669 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata); 670 status = nvmf_send_controller_data(nc, 0, m, 671 sizeof(ctrlr->cdata)); 672 MPASS(status != NVMF_MORE); 673 break; 674 case 2: 675 { 676 /* Active namespace list. */ 677 struct nvme_ns_list *nslist; 678 uint32_t nsid; 679 680 nsid = le32toh(cmd->nsid); 681 if (nsid >= 0xfffffffe) { 682 status = NVME_SC_INVALID_FIELD; 683 break; 684 } 685 686 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO); 687 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist); 688 m = m_get(M_WAITOK, MT_DATA); 689 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist, 690 nslist, NULL, 0, EXT_CTL); 691 m->m_len = sizeof(*nslist); 692 status = nvmf_send_controller_data(nc, 0, m, m->m_len); 693 MPASS(status != NVMF_MORE); 694 break; 695 } 696 default: 697 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns); 698 status = NVME_SC_INVALID_FIELD; 699 break; 700 } 701 702 if (status == NVMF_SUCCESS_SENT) 703 nvmft_command_completed(ctrlr->admin, nc); 704 else 705 nvmft_send_generic_error(ctrlr->admin, nc, status); 706 nvmf_free_capsule(nc); 707 } 708 709 static void 710 handle_set_features(struct nvmft_controller *ctrlr, 711 struct nvmf_capsule *nc, const struct nvme_command *cmd) 712 { 713 struct nvme_completion cqe; 714 uint8_t fid; 715 716 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 717 switch (fid) { 718 case NVME_FEAT_NUMBER_OF_QUEUES: 719 { 720 uint32_t num_queues; 721 struct nvmft_io_qpair *io_qpairs; 722 723 num_queues = le32toh(cmd->cdw11) & 0xffff; 724 725 /* 5.12.1.7: 65535 is invalid. */ 726 if (num_queues == 65535) 727 goto error; 728 729 /* Fabrics requires the same number of SQs and CQs. */ 730 if (le32toh(cmd->cdw11) >> 16 != num_queues) 731 goto error; 732 733 /* Convert to 1's based */ 734 num_queues++; 735 736 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs), 737 M_NVMFT, M_WAITOK | M_ZERO); 738 739 mtx_lock(&ctrlr->lock); 740 if (ctrlr->num_io_queues != 0) { 741 mtx_unlock(&ctrlr->lock); 742 free(io_qpairs, M_NVMFT); 743 nvmft_send_generic_error(ctrlr->admin, nc, 744 NVME_SC_COMMAND_SEQUENCE_ERROR); 745 nvmf_free_capsule(nc); 746 return; 747 } 748 749 ctrlr->num_io_queues = num_queues; 750 ctrlr->io_qpairs = io_qpairs; 751 mtx_unlock(&ctrlr->lock); 752 753 nvmft_init_cqe(&cqe, nc, 0); 754 cqe.cdw0 = cmd->cdw11; 755 nvmft_send_response(ctrlr->admin, &cqe); 756 nvmf_free_capsule(nc); 757 return; 758 } 759 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 760 { 761 uint32_t aer_mask; 762 763 aer_mask = le32toh(cmd->cdw11); 764 765 /* Check for any reserved or unimplemented feature bits. */ 766 if ((aer_mask & 0xffffc000) != 0) 767 goto error; 768 769 mtx_lock(&ctrlr->lock); 770 ctrlr->aer_mask = aer_mask; 771 mtx_unlock(&ctrlr->lock); 772 nvmft_send_success(ctrlr->admin, nc); 773 return; 774 } 775 default: 776 nvmft_printf(ctrlr, 777 "Unsupported feature ID %u for SET_FEATURES\n", fid); 778 goto error; 779 } 780 781 error: 782 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 783 nvmf_free_capsule(nc); 784 } 785 786 static bool 787 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown) 788 { 789 struct nvmft_port *np = ctrlr->np; 790 uint32_t changes; 791 792 *need_shutdown = false; 793 794 mtx_lock(&ctrlr->lock); 795 796 /* Don't allow any changes while shutting down. */ 797 if (ctrlr->shutdown) { 798 mtx_unlock(&ctrlr->lock); 799 return (false); 800 } 801 802 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) { 803 mtx_unlock(&ctrlr->lock); 804 return (false); 805 } 806 807 changes = ctrlr->cc ^ new_cc; 808 ctrlr->cc = new_cc; 809 810 /* Handle shutdown requests. */ 811 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 && 812 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) { 813 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 814 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING); 815 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 816 ctrlr->shutdown = true; 817 *need_shutdown = true; 818 nvmft_printf(ctrlr, "shutdown requested\n"); 819 } 820 821 if (NVMEV(NVME_CC_REG_EN, changes) != 0) { 822 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) { 823 /* Controller reset. */ 824 nvmft_printf(ctrlr, "reset requested\n"); 825 ctrlr->shutdown = true; 826 *need_shutdown = true; 827 } else 828 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1); 829 } 830 mtx_unlock(&ctrlr->lock); 831 832 return (true); 833 } 834 835 static void 836 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 837 const struct nvmf_fabric_prop_get_cmd *pget) 838 { 839 struct nvmf_fabric_prop_get_rsp rsp; 840 841 nvmft_init_cqe(&rsp, nc, 0); 842 843 switch (le32toh(pget->ofst)) { 844 case NVMF_PROP_CAP: 845 if (pget->attrib.size != NVMF_PROP_SIZE_8) 846 goto error; 847 rsp.value.u64 = htole64(ctrlr->np->cap); 848 break; 849 case NVMF_PROP_VS: 850 if (pget->attrib.size != NVMF_PROP_SIZE_4) 851 goto error; 852 rsp.value.u32.low = ctrlr->cdata.ver; 853 break; 854 case NVMF_PROP_CC: 855 if (pget->attrib.size != NVMF_PROP_SIZE_4) 856 goto error; 857 rsp.value.u32.low = htole32(ctrlr->cc); 858 break; 859 case NVMF_PROP_CSTS: 860 if (pget->attrib.size != NVMF_PROP_SIZE_4) 861 goto error; 862 rsp.value.u32.low = htole32(ctrlr->csts); 863 break; 864 default: 865 goto error; 866 } 867 868 nvmft_send_response(ctrlr->admin, &rsp); 869 return; 870 error: 871 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 872 } 873 874 static void 875 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 876 const struct nvmf_fabric_prop_set_cmd *pset) 877 { 878 bool need_shutdown; 879 880 need_shutdown = false; 881 switch (le32toh(pset->ofst)) { 882 case NVMF_PROP_CC: 883 if (pset->attrib.size != NVMF_PROP_SIZE_4) 884 goto error; 885 if (!update_cc(ctrlr, le32toh(pset->value.u32.low), 886 &need_shutdown)) 887 goto error; 888 break; 889 default: 890 goto error; 891 } 892 893 nvmft_send_success(ctrlr->admin, nc); 894 if (need_shutdown) { 895 callout_stop(&ctrlr->ka_timer); 896 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 897 } 898 return; 899 error: 900 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 901 } 902 903 static void 904 handle_admin_fabrics_command(struct nvmft_controller *ctrlr, 905 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc) 906 { 907 switch (fc->fctype) { 908 case NVMF_FABRIC_COMMAND_PROPERTY_GET: 909 handle_property_get(ctrlr, nc, 910 (const struct nvmf_fabric_prop_get_cmd *)fc); 911 break; 912 case NVMF_FABRIC_COMMAND_PROPERTY_SET: 913 handle_property_set(ctrlr, nc, 914 (const struct nvmf_fabric_prop_set_cmd *)fc); 915 break; 916 case NVMF_FABRIC_COMMAND_CONNECT: 917 nvmft_printf(ctrlr, 918 "CONNECT command on connected admin queue\n"); 919 nvmft_send_generic_error(ctrlr->admin, nc, 920 NVME_SC_COMMAND_SEQUENCE_ERROR); 921 break; 922 case NVMF_FABRIC_COMMAND_DISCONNECT: 923 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n"); 924 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC, 925 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE); 926 break; 927 default: 928 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n", 929 fc->fctype); 930 nvmft_send_generic_error(ctrlr->admin, nc, 931 NVME_SC_INVALID_OPCODE); 932 break; 933 } 934 nvmf_free_capsule(nc); 935 } 936 937 void 938 nvmft_handle_admin_command(struct nvmft_controller *ctrlr, 939 struct nvmf_capsule *nc) 940 { 941 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 942 943 /* Only permit Fabrics commands while a controller is disabled. */ 944 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 && 945 cmd->opc != NVME_OPC_FABRICS_COMMANDS) { 946 nvmft_printf(ctrlr, 947 "Unsupported admin opcode %#x whiled disabled\n", cmd->opc); 948 nvmft_send_generic_error(ctrlr->admin, nc, 949 NVME_SC_COMMAND_SEQUENCE_ERROR); 950 nvmf_free_capsule(nc); 951 return; 952 } 953 954 atomic_store_int(&ctrlr->ka_active_traffic, 1); 955 956 switch (cmd->opc) { 957 case NVME_OPC_GET_LOG_PAGE: 958 handle_get_log_page(ctrlr, nc, cmd); 959 break; 960 case NVME_OPC_IDENTIFY: 961 handle_identify_command(ctrlr, nc, cmd); 962 break; 963 case NVME_OPC_SET_FEATURES: 964 handle_set_features(ctrlr, nc, cmd); 965 break; 966 case NVME_OPC_ASYNC_EVENT_REQUEST: 967 mtx_lock(&ctrlr->lock); 968 if (ctrlr->aer_pending == NVMFT_NUM_AER) { 969 mtx_unlock(&ctrlr->lock); 970 nvmft_send_error(ctrlr->admin, nc, 971 NVME_SCT_COMMAND_SPECIFIC, 972 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); 973 } else { 974 /* NB: Store the CID without byte-swapping. */ 975 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid; 976 ctrlr->aer_pending++; 977 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER; 978 mtx_unlock(&ctrlr->lock); 979 } 980 nvmf_free_capsule(nc); 981 break; 982 case NVME_OPC_KEEP_ALIVE: 983 nvmft_send_success(ctrlr->admin, nc); 984 nvmf_free_capsule(nc); 985 break; 986 case NVME_OPC_FABRICS_COMMANDS: 987 handle_admin_fabrics_command(ctrlr, nc, 988 (const struct nvmf_fabric_cmd *)cmd); 989 break; 990 default: 991 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc); 992 nvmft_send_generic_error(ctrlr->admin, nc, 993 NVME_SC_INVALID_OPCODE); 994 nvmf_free_capsule(nc); 995 break; 996 } 997 } 998 999 void 1000 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid, 1001 struct nvmf_capsule *nc) 1002 { 1003 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 1004 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 1005 1006 atomic_store_int(&ctrlr->ka_active_traffic, 1); 1007 1008 switch (cmd->opc) { 1009 case NVME_OPC_FLUSH: 1010 if (cmd->nsid == htole32(0xffffffff)) { 1011 nvmft_send_generic_error(qp, nc, 1012 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 1013 nvmf_free_capsule(nc); 1014 break; 1015 } 1016 /* FALLTHROUGH */ 1017 case NVME_OPC_WRITE: 1018 case NVME_OPC_READ: 1019 case NVME_OPC_WRITE_UNCORRECTABLE: 1020 case NVME_OPC_COMPARE: 1021 case NVME_OPC_WRITE_ZEROES: 1022 case NVME_OPC_DATASET_MANAGEMENT: 1023 case NVME_OPC_VERIFY: 1024 nvmft_dispatch_command(qp, nc, false); 1025 break; 1026 default: 1027 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc); 1028 nvmft_send_generic_error(qp, nc, 1029 NVME_SC_INVALID_OPCODE); 1030 nvmf_free_capsule(nc); 1031 break; 1032 } 1033 } 1034 1035 static void 1036 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask, 1037 u_int type, uint8_t info, uint8_t log_page_id) 1038 { 1039 struct nvme_completion cpl; 1040 1041 MPASS(type <= 7); 1042 1043 /* Drop events that are not enabled. */ 1044 mtx_lock(&ctrlr->lock); 1045 if ((ctrlr->aer_mask & aer_mask) == 0) { 1046 mtx_unlock(&ctrlr->lock); 1047 return; 1048 } 1049 1050 /* 1051 * If there is no pending AER command, drop it. 1052 * XXX: Should we queue these? 1053 */ 1054 if (ctrlr->aer_pending == 0) { 1055 mtx_unlock(&ctrlr->lock); 1056 nvmft_printf(ctrlr, 1057 "dropping AER type %u, info %#x, page %#x\n", 1058 type, info, log_page_id); 1059 return; 1060 } 1061 1062 memset(&cpl, 0, sizeof(cpl)); 1063 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx]; 1064 ctrlr->aer_pending--; 1065 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER; 1066 mtx_unlock(&ctrlr->lock); 1067 1068 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) | 1069 NVMEF(NVME_ASYNC_EVENT_INFO, info) | 1070 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id)); 1071 1072 nvmft_send_response(ctrlr->admin, &cpl); 1073 } 1074 1075 void 1076 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id) 1077 { 1078 struct nvme_ns_list *nslist; 1079 uint32_t new_nsid, nsid; 1080 u_int i; 1081 1082 new_nsid = lun_id + 1; 1083 1084 mtx_lock(&ctrlr->lock); 1085 nslist = ctrlr->changed_ns; 1086 1087 /* If the first entry is 0xffffffff, the list is already full. */ 1088 if (nslist->ns[0] != 0xffffffff) { 1089 /* Find the insertion point for this namespace ID. */ 1090 for (i = 0; i < nitems(nslist->ns); i++) { 1091 nsid = le32toh(nslist->ns[i]); 1092 if (nsid == new_nsid) { 1093 /* Already reported, nothing to do. */ 1094 mtx_unlock(&ctrlr->lock); 1095 return; 1096 } 1097 1098 if (nsid == 0 || nsid > new_nsid) 1099 break; 1100 } 1101 1102 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) { 1103 /* List is full. */ 1104 memset(ctrlr->changed_ns, 0, 1105 sizeof(*ctrlr->changed_ns)); 1106 ctrlr->changed_ns->ns[0] = 0xffffffff; 1107 } else if (nslist->ns[i] == htole32(0)) { 1108 /* 1109 * Optimize case where this ID is appended to 1110 * the end. 1111 */ 1112 nslist->ns[i] = htole32(new_nsid); 1113 } else { 1114 memmove(&nslist->ns[i + 1], &nslist->ns[i], 1115 (nitems(nslist->ns) - i - 1) * 1116 sizeof(nslist->ns[0])); 1117 nslist->ns[i] = htole32(new_nsid); 1118 } 1119 } 1120 1121 if (ctrlr->changed_ns_reported) { 1122 mtx_unlock(&ctrlr->lock); 1123 return; 1124 } 1125 ctrlr->changed_ns_reported = true; 1126 mtx_unlock(&ctrlr->lock); 1127 1128 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0, 1129 NVME_LOG_CHANGED_NAMESPACE); 1130 } 1131