1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/callout.h> 10 #include <sys/kernel.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/memdesc.h> 15 #include <sys/mutex.h> 16 #include <sys/sbuf.h> 17 #include <sys/taskqueue.h> 18 19 #include <dev/nvmf/nvmf_transport.h> 20 #include <dev/nvmf/controller/nvmft_subr.h> 21 #include <dev/nvmf/controller/nvmft_var.h> 22 23 static void nvmft_controller_shutdown(void *arg, int pending); 24 static void nvmft_controller_terminate(void *arg, int pending); 25 26 int 27 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...) 28 { 29 char buf[128]; 30 struct sbuf sb; 31 va_list ap; 32 size_t retval; 33 34 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 35 sbuf_set_drain(&sb, sbuf_printf_drain, &retval); 36 37 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid); 38 39 va_start(ap, fmt); 40 sbuf_vprintf(&sb, fmt, ap); 41 va_end(ap); 42 43 sbuf_finish(&sb); 44 sbuf_delete(&sb); 45 46 return (retval); 47 } 48 49 static struct nvmft_controller * 50 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid, 51 const struct nvmf_fabric_connect_data *data) 52 { 53 struct nvmft_controller *ctrlr; 54 55 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO); 56 ctrlr->cntlid = cntlid; 57 ctrlr->np = np; 58 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF); 59 callout_init(&ctrlr->ka_timer, 1); 60 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr); 61 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0, 62 nvmft_controller_terminate, ctrlr); 63 64 ctrlr->cdata = np->cdata; 65 ctrlr->cdata.ctrlr_id = htole16(cntlid); 66 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)); 67 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)); 68 ctrlr->hip.power_cycles[0] = 1; 69 ctrlr->create_time = sbinuptime(); 70 71 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT, 72 M_WAITOK | M_ZERO); 73 74 return (ctrlr); 75 } 76 77 static void 78 nvmft_controller_free(struct nvmft_controller *ctrlr) 79 { 80 mtx_destroy(&ctrlr->lock); 81 MPASS(ctrlr->io_qpairs == NULL); 82 free(ctrlr->changed_ns, M_NVMFT); 83 free(ctrlr, M_NVMFT); 84 } 85 86 static void 87 nvmft_keep_alive_timer(void *arg) 88 { 89 struct nvmft_controller *ctrlr = arg; 90 int traffic; 91 92 if (ctrlr->shutdown) 93 return; 94 95 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic); 96 if (traffic == 0) { 97 nvmft_printf(ctrlr, 98 "disconnecting due to KeepAlive timeout\n"); 99 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT); 100 return; 101 } 102 103 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK); 104 } 105 106 int 107 nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype, 108 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, 109 const struct nvmf_fabric_connect_data *data) 110 { 111 struct nvmft_controller *ctrlr; 112 struct nvmft_qpair *qp; 113 uint32_t kato; 114 int cntlid; 115 116 if (cmd->qid != htole16(0)) 117 return (EINVAL); 118 119 qp = nvmft_qpair_init(trtype, params, 0, "admin queue"); 120 if (qp == NULL) { 121 printf("NVMFT: Failed to setup admin queue from %.*s\n", 122 (int)sizeof(data->hostnqn), data->hostnqn); 123 return (ENXIO); 124 } 125 126 mtx_lock(&np->lock); 127 cntlid = alloc_unr(np->ids); 128 if (cntlid == -1) { 129 mtx_unlock(&np->lock); 130 printf("NVMFT: Unable to allocate controller for %.*s\n", 131 (int)sizeof(data->hostnqn), data->hostnqn); 132 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC, 133 NVMF_FABRIC_SC_INVALID_HOST); 134 nvmft_qpair_destroy(qp); 135 return (ENOMEM); 136 } 137 138 #ifdef INVARIANTS 139 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 140 KASSERT(ctrlr->cntlid != cntlid, 141 ("%s: duplicate controllers with id %d", __func__, cntlid)); 142 } 143 #endif 144 mtx_unlock(&np->lock); 145 146 ctrlr = nvmft_controller_alloc(np, cntlid, data); 147 148 mtx_lock(&np->lock); 149 if (!np->online) { 150 mtx_unlock(&np->lock); 151 nvmft_controller_free(ctrlr); 152 free_unr(np->ids, cntlid); 153 nvmft_qpair_destroy(qp); 154 return (ENXIO); 155 } 156 nvmft_port_ref(np); 157 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link); 158 159 nvmft_printf(ctrlr, "associated with %.*s\n", 160 (int)sizeof(data->hostnqn), data->hostnqn); 161 ctrlr->admin = qp; 162 ctrlr->trtype = trtype; 163 164 /* 165 * The spec requires a non-zero KeepAlive timer, but allow a 166 * zero KATO value to match Linux. 167 */ 168 kato = le32toh(cmd->kato); 169 if (kato != 0) { 170 /* 171 * Round up to 1 second matching granularity 172 * advertised in cdata. 173 */ 174 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000)); 175 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 176 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK); 177 } 178 mtx_unlock(&np->lock); 179 180 nvmft_finish_accept(qp, cmd, ctrlr); 181 182 return (0); 183 } 184 185 int 186 nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype, 187 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, 188 const struct nvmf_fabric_connect_data *data) 189 { 190 struct nvmft_controller *ctrlr; 191 struct nvmft_qpair *qp; 192 char name[16]; 193 uint16_t cntlid, qid; 194 195 qid = le16toh(cmd->qid); 196 if (qid == 0) 197 return (EINVAL); 198 cntlid = le16toh(data->cntlid); 199 200 snprintf(name, sizeof(name), "I/O queue %u", qid); 201 qp = nvmft_qpair_init(trtype, params, qid, name); 202 if (qp == NULL) { 203 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid, 204 (int)sizeof(data->hostnqn), data->hostnqn); 205 return (ENXIO); 206 } 207 208 mtx_lock(&np->lock); 209 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 210 if (ctrlr->cntlid == cntlid) 211 break; 212 } 213 if (ctrlr == NULL) { 214 mtx_unlock(&np->lock); 215 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n", 216 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn), 217 data->hostnqn); 218 nvmft_connect_invalid_parameters(qp, cmd, true, 219 offsetof(struct nvmf_fabric_connect_data, cntlid)); 220 nvmft_qpair_destroy(qp); 221 return (ENOENT); 222 } 223 224 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) { 225 mtx_unlock(&np->lock); 226 nvmft_printf(ctrlr, 227 "hostid mismatch for I/O queue %u from %.*s\n", qid, 228 (int)sizeof(data->hostnqn), data->hostnqn); 229 nvmft_connect_invalid_parameters(qp, cmd, true, 230 offsetof(struct nvmf_fabric_connect_data, hostid)); 231 nvmft_qpair_destroy(qp); 232 return (EINVAL); 233 } 234 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) { 235 mtx_unlock(&np->lock); 236 nvmft_printf(ctrlr, 237 "hostnqn mismatch for I/O queue %u from %.*s\n", qid, 238 (int)sizeof(data->hostnqn), data->hostnqn); 239 nvmft_connect_invalid_parameters(qp, cmd, true, 240 offsetof(struct nvmf_fabric_connect_data, hostnqn)); 241 nvmft_qpair_destroy(qp); 242 return (EINVAL); 243 } 244 245 /* XXX: Require trtype == ctrlr->trtype? */ 246 247 mtx_lock(&ctrlr->lock); 248 if (ctrlr->shutdown) { 249 mtx_unlock(&ctrlr->lock); 250 mtx_unlock(&np->lock); 251 nvmft_printf(ctrlr, 252 "attempt to create I/O queue %u on disabled controller from %.*s\n", 253 qid, (int)sizeof(data->hostnqn), data->hostnqn); 254 nvmft_connect_invalid_parameters(qp, cmd, true, 255 offsetof(struct nvmf_fabric_connect_data, cntlid)); 256 nvmft_qpair_destroy(qp); 257 return (EINVAL); 258 } 259 if (ctrlr->num_io_queues == 0) { 260 mtx_unlock(&ctrlr->lock); 261 mtx_unlock(&np->lock); 262 nvmft_printf(ctrlr, 263 "attempt to create I/O queue %u without enabled queues from %.*s\n", 264 qid, (int)sizeof(data->hostnqn), data->hostnqn); 265 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 266 NVME_SC_COMMAND_SEQUENCE_ERROR); 267 nvmft_qpair_destroy(qp); 268 return (EINVAL); 269 } 270 if (cmd->qid > ctrlr->num_io_queues) { 271 mtx_unlock(&ctrlr->lock); 272 mtx_unlock(&np->lock); 273 nvmft_printf(ctrlr, 274 "attempt to create invalid I/O queue %u from %.*s\n", qid, 275 (int)sizeof(data->hostnqn), data->hostnqn); 276 nvmft_connect_invalid_parameters(qp, cmd, false, 277 offsetof(struct nvmf_fabric_connect_cmd, qid)); 278 nvmft_qpair_destroy(qp); 279 return (EINVAL); 280 } 281 if (ctrlr->io_qpairs[qid - 1].qp != NULL) { 282 mtx_unlock(&ctrlr->lock); 283 mtx_unlock(&np->lock); 284 nvmft_printf(ctrlr, 285 "attempt to re-create I/O queue %u from %.*s\n", qid, 286 (int)sizeof(data->hostnqn), data->hostnqn); 287 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 288 NVME_SC_COMMAND_SEQUENCE_ERROR); 289 nvmft_qpair_destroy(qp); 290 return (EINVAL); 291 } 292 293 ctrlr->io_qpairs[qid - 1].qp = qp; 294 mtx_unlock(&ctrlr->lock); 295 mtx_unlock(&np->lock); 296 nvmft_finish_accept(qp, cmd, ctrlr); 297 298 return (0); 299 } 300 301 static void 302 nvmft_controller_shutdown(void *arg, int pending) 303 { 304 struct nvmft_controller *ctrlr = arg; 305 306 MPASS(pending == 1); 307 308 /* 309 * Shutdown all I/O queues to terminate pending datamoves and 310 * stop receiving new commands. 311 */ 312 mtx_lock(&ctrlr->lock); 313 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 314 if (ctrlr->io_qpairs[i].qp != NULL) { 315 ctrlr->io_qpairs[i].shutdown = true; 316 mtx_unlock(&ctrlr->lock); 317 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp); 318 mtx_lock(&ctrlr->lock); 319 } 320 } 321 mtx_unlock(&ctrlr->lock); 322 323 /* Terminate active CTL commands. */ 324 nvmft_terminate_commands(ctrlr); 325 326 /* Wait for all pending CTL commands to complete. */ 327 mtx_lock(&ctrlr->lock); 328 while (ctrlr->pending_commands != 0) 329 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh", 330 hz / 100); 331 mtx_unlock(&ctrlr->lock); 332 333 /* Delete all of the I/O queues. */ 334 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 335 if (ctrlr->io_qpairs[i].qp != NULL) 336 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp); 337 } 338 free(ctrlr->io_qpairs, M_NVMFT); 339 ctrlr->io_qpairs = NULL; 340 341 mtx_lock(&ctrlr->lock); 342 ctrlr->num_io_queues = 0; 343 344 /* Mark shutdown complete. */ 345 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) { 346 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 347 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE); 348 } 349 350 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) { 351 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY); 352 ctrlr->shutdown = false; 353 } 354 mtx_unlock(&ctrlr->lock); 355 356 /* 357 * If the admin queue was closed while shutting down or a 358 * fatal controller error has occurred, terminate the 359 * association immediately, otherwise wait up to 2 minutes 360 * (NVMe-over-Fabrics 1.1 4.6). 361 */ 362 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0) 363 nvmft_controller_terminate(ctrlr, 0); 364 else 365 taskqueue_enqueue_timeout(taskqueue_thread, 366 &ctrlr->terminate_task, hz * 60 * 2); 367 } 368 369 static void 370 nvmft_controller_terminate(void *arg, int pending) 371 { 372 struct nvmft_controller *ctrlr = arg; 373 struct nvmft_port *np; 374 bool wakeup_np; 375 376 /* If the controller has been re-enabled, nothing to do. */ 377 mtx_lock(&ctrlr->lock); 378 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) { 379 mtx_unlock(&ctrlr->lock); 380 381 if (ctrlr->ka_sbt != 0) 382 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 383 C_HARDCLOCK); 384 return; 385 } 386 387 /* Disable updates to CC while destroying admin qpair. */ 388 ctrlr->shutdown = true; 389 mtx_unlock(&ctrlr->lock); 390 391 nvmft_qpair_destroy(ctrlr->admin); 392 393 /* Remove association (CNTLID). */ 394 np = ctrlr->np; 395 mtx_lock(&np->lock); 396 TAILQ_REMOVE(&np->controllers, ctrlr, link); 397 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers)); 398 mtx_unlock(&np->lock); 399 free_unr(np->ids, ctrlr->cntlid); 400 if (wakeup_np) 401 wakeup(np); 402 403 callout_drain(&ctrlr->ka_timer); 404 405 nvmft_printf(ctrlr, "association terminated\n"); 406 nvmft_controller_free(ctrlr); 407 nvmft_port_rele(np); 408 } 409 410 void 411 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp, 412 int error) 413 { 414 /* 415 * If a queue pair is closed, that isn't an error per se. 416 * That just means additional commands cannot be received on 417 * that queue pair. 418 * 419 * If the admin queue pair is closed while idle or while 420 * shutting down, terminate the association immediately. 421 * 422 * If an I/O queue pair is closed, just ignore it. 423 */ 424 if (error == 0) { 425 if (qp != ctrlr->admin) 426 return; 427 428 mtx_lock(&ctrlr->lock); 429 if (ctrlr->shutdown) { 430 ctrlr->admin_closed = true; 431 mtx_unlock(&ctrlr->lock); 432 return; 433 } 434 435 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) { 436 MPASS(ctrlr->num_io_queues == 0); 437 mtx_unlock(&ctrlr->lock); 438 439 /* 440 * Ok to drop lock here since ctrlr->cc can't 441 * change if the admin queue pair has closed. 442 * This also means no new queues can be handed 443 * off, etc. Note that since there are no I/O 444 * queues, only the admin queue needs to be 445 * destroyed, so it is safe to skip 446 * nvmft_controller_shutdown and just schedule 447 * nvmft_controller_terminate. Note that we 448 * cannot call nvmft_controller_terminate from 449 * here directly as this is called from the 450 * transport layer and freeing the admin qpair 451 * might deadlock waiting for the current 452 * thread to exit. 453 */ 454 if (taskqueue_cancel_timeout(taskqueue_thread, 455 &ctrlr->terminate_task, NULL) == 0) 456 taskqueue_enqueue_timeout(taskqueue_thread, 457 &ctrlr->terminate_task, 0); 458 return; 459 } 460 461 /* 462 * Treat closing of the admin queue pair while enabled 463 * as a transport error. Note that the admin queue 464 * pair has been closed. 465 */ 466 ctrlr->admin_closed = true; 467 } else 468 mtx_lock(&ctrlr->lock); 469 470 /* Ignore transport errors if we are already shutting down. */ 471 if (ctrlr->shutdown) { 472 mtx_unlock(&ctrlr->lock); 473 return; 474 } 475 476 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1); 477 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 478 ctrlr->shutdown = true; 479 mtx_unlock(&ctrlr->lock); 480 481 callout_stop(&ctrlr->ka_timer); 482 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 483 } 484 485 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */ 486 static struct mbuf * 487 m_getml(size_t len, int how) 488 { 489 struct mbuf *m, *n; 490 491 m = m_getm2(NULL, len, how, MT_DATA, 0); 492 if (m == NULL) 493 return (NULL); 494 for (n = m; len > 0; n = n->m_next) { 495 n->m_len = M_SIZE(n); 496 if (n->m_len >= len) { 497 n->m_len = len; 498 MPASS(n->m_next == NULL); 499 } 500 len -= n->m_len; 501 } 502 return (m); 503 } 504 505 static void 506 m_zero(struct mbuf *m, u_int offset, u_int len) 507 { 508 u_int todo; 509 510 if (len == 0) 511 return; 512 513 while (m->m_len <= offset) { 514 offset -= m->m_len; 515 m = m->m_next; 516 } 517 518 todo = m->m_len - offset; 519 if (todo > len) 520 todo = len; 521 memset(mtodo(m, offset), 0, todo); 522 m = m->m_next; 523 len -= todo; 524 525 while (len > 0) { 526 todo = m->m_len; 527 if (todo > len) 528 todo = len; 529 memset(mtod(m, void *), 0, todo); 530 m = m->m_next; 531 len -= todo; 532 } 533 } 534 535 static void 536 handle_get_log_page(struct nvmft_controller *ctrlr, 537 struct nvmf_capsule *nc, const struct nvme_command *cmd) 538 { 539 struct mbuf *m; 540 uint64_t offset; 541 uint32_t numd; 542 size_t len, todo; 543 u_int status; 544 uint8_t lid; 545 bool rae; 546 547 lid = le32toh(cmd->cdw10) & 0xff; 548 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0; 549 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 550 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 551 552 if (offset % 3 != 0) { 553 status = NVME_SC_INVALID_FIELD; 554 goto done; 555 } 556 557 len = (numd + 1) * 4; 558 559 switch (lid) { 560 case NVME_LOG_ERROR: 561 todo = 0; 562 563 m = m_getml(len, M_WAITOK); 564 if (todo != len) 565 m_zero(m, todo, len - todo); 566 status = nvmf_send_controller_data(nc, 0, m, len); 567 MPASS(status != NVMF_MORE); 568 break; 569 case NVME_LOG_HEALTH_INFORMATION: 570 { 571 struct nvme_health_information_page hip; 572 573 if (offset >= sizeof(hip)) { 574 status = NVME_SC_INVALID_FIELD; 575 goto done; 576 } 577 todo = sizeof(hip) - offset; 578 if (todo > len) 579 todo = len; 580 581 mtx_lock(&ctrlr->lock); 582 hip = ctrlr->hip; 583 hip.controller_busy_time[0] = 584 sbintime_getsec(ctrlr->busy_total) / 60; 585 hip.power_on_hours[0] = 586 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600; 587 mtx_unlock(&ctrlr->lock); 588 589 m = m_getml(len, M_WAITOK); 590 m_copyback(m, 0, todo, (char *)&hip + offset); 591 if (todo != len) 592 m_zero(m, todo, len - todo); 593 status = nvmf_send_controller_data(nc, 0, m, len); 594 MPASS(status != NVMF_MORE); 595 break; 596 } 597 case NVME_LOG_FIRMWARE_SLOT: 598 if (offset >= sizeof(ctrlr->np->fp)) { 599 status = NVME_SC_INVALID_FIELD; 600 goto done; 601 } 602 todo = sizeof(ctrlr->np->fp) - offset; 603 if (todo > len) 604 todo = len; 605 606 m = m_getml(len, M_WAITOK); 607 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset); 608 if (todo != len) 609 m_zero(m, todo, len - todo); 610 status = nvmf_send_controller_data(nc, 0, m, len); 611 MPASS(status != NVMF_MORE); 612 break; 613 case NVME_LOG_CHANGED_NAMESPACE: 614 if (offset >= sizeof(*ctrlr->changed_ns)) { 615 status = NVME_SC_INVALID_FIELD; 616 goto done; 617 } 618 todo = sizeof(*ctrlr->changed_ns) - offset; 619 if (todo > len) 620 todo = len; 621 622 m = m_getml(len, M_WAITOK); 623 mtx_lock(&ctrlr->lock); 624 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset); 625 if (offset == 0 && len == sizeof(*ctrlr->changed_ns)) 626 memset(ctrlr->changed_ns, 0, 627 sizeof(*ctrlr->changed_ns)); 628 if (!rae) 629 ctrlr->changed_ns_reported = false; 630 mtx_unlock(&ctrlr->lock); 631 if (todo != len) 632 m_zero(m, todo, len - todo); 633 status = nvmf_send_controller_data(nc, 0, m, len); 634 MPASS(status != NVMF_MORE); 635 break; 636 default: 637 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n", 638 lid); 639 status = NVME_SC_INVALID_FIELD; 640 break; 641 } 642 643 done: 644 if (status == NVMF_SUCCESS_SENT) 645 nvmft_command_completed(ctrlr->admin, nc); 646 else 647 nvmft_send_generic_error(ctrlr->admin, nc, status); 648 nvmf_free_capsule(nc); 649 } 650 651 static void 652 m_free_nslist(struct mbuf *m) 653 { 654 free(m->m_ext.ext_arg1, M_NVMFT); 655 } 656 657 static void 658 handle_identify_command(struct nvmft_controller *ctrlr, 659 struct nvmf_capsule *nc, const struct nvme_command *cmd) 660 { 661 struct mbuf *m; 662 size_t data_len; 663 u_int status; 664 uint8_t cns; 665 666 cns = le32toh(cmd->cdw10) & 0xFF; 667 data_len = nvmf_capsule_data_len(nc); 668 if (data_len != sizeof(ctrlr->cdata)) { 669 nvmft_printf(ctrlr, 670 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len, 671 cns); 672 nvmft_send_generic_error(ctrlr->admin, nc, 673 NVME_SC_INVALID_OPCODE); 674 nvmf_free_capsule(nc); 675 return; 676 } 677 678 switch (cns) { 679 case 0: /* Namespace data. */ 680 case 3: /* Namespace Identification Descriptor list. */ 681 nvmft_dispatch_command(ctrlr->admin, nc, true); 682 return; 683 case 1: 684 /* Controller data. */ 685 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK); 686 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata); 687 status = nvmf_send_controller_data(nc, 0, m, 688 sizeof(ctrlr->cdata)); 689 MPASS(status != NVMF_MORE); 690 break; 691 case 2: 692 { 693 /* Active namespace list. */ 694 struct nvme_ns_list *nslist; 695 uint32_t nsid; 696 697 nsid = le32toh(cmd->nsid); 698 if (nsid >= 0xfffffffe) { 699 status = NVME_SC_INVALID_FIELD; 700 break; 701 } 702 703 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO); 704 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist); 705 m = m_get(M_WAITOK, MT_DATA); 706 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist, 707 nslist, NULL, 0, EXT_CTL); 708 m->m_len = sizeof(*nslist); 709 status = nvmf_send_controller_data(nc, 0, m, m->m_len); 710 MPASS(status != NVMF_MORE); 711 break; 712 } 713 default: 714 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns); 715 status = NVME_SC_INVALID_FIELD; 716 break; 717 } 718 719 if (status == NVMF_SUCCESS_SENT) 720 nvmft_command_completed(ctrlr->admin, nc); 721 else 722 nvmft_send_generic_error(ctrlr->admin, nc, status); 723 nvmf_free_capsule(nc); 724 } 725 726 static void 727 handle_set_features(struct nvmft_controller *ctrlr, 728 struct nvmf_capsule *nc, const struct nvme_command *cmd) 729 { 730 struct nvme_completion cqe; 731 uint8_t fid; 732 733 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 734 switch (fid) { 735 case NVME_FEAT_NUMBER_OF_QUEUES: 736 { 737 uint32_t num_queues; 738 struct nvmft_io_qpair *io_qpairs; 739 740 num_queues = le32toh(cmd->cdw11) & 0xffff; 741 742 /* 5.12.1.7: 65535 is invalid. */ 743 if (num_queues == 65535) 744 goto error; 745 746 /* Fabrics requires the same number of SQs and CQs. */ 747 if (le32toh(cmd->cdw11) >> 16 != num_queues) 748 goto error; 749 750 /* Convert to 1's based */ 751 num_queues++; 752 753 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs), 754 M_NVMFT, M_WAITOK | M_ZERO); 755 756 mtx_lock(&ctrlr->lock); 757 if (ctrlr->num_io_queues != 0) { 758 mtx_unlock(&ctrlr->lock); 759 free(io_qpairs, M_NVMFT); 760 nvmft_send_generic_error(ctrlr->admin, nc, 761 NVME_SC_COMMAND_SEQUENCE_ERROR); 762 nvmf_free_capsule(nc); 763 return; 764 } 765 766 ctrlr->num_io_queues = num_queues; 767 ctrlr->io_qpairs = io_qpairs; 768 mtx_unlock(&ctrlr->lock); 769 770 nvmft_init_cqe(&cqe, nc, 0); 771 cqe.cdw0 = cmd->cdw11; 772 nvmft_send_response(ctrlr->admin, &cqe); 773 nvmf_free_capsule(nc); 774 return; 775 } 776 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 777 { 778 uint32_t aer_mask; 779 780 aer_mask = le32toh(cmd->cdw11); 781 782 /* Check for any reserved or unimplemented feature bits. */ 783 if ((aer_mask & 0xffffc000) != 0) 784 goto error; 785 786 mtx_lock(&ctrlr->lock); 787 ctrlr->aer_mask = aer_mask; 788 mtx_unlock(&ctrlr->lock); 789 nvmft_send_success(ctrlr->admin, nc); 790 return; 791 } 792 default: 793 nvmft_printf(ctrlr, 794 "Unsupported feature ID %u for SET_FEATURES\n", fid); 795 goto error; 796 } 797 798 error: 799 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 800 nvmf_free_capsule(nc); 801 } 802 803 static bool 804 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown) 805 { 806 struct nvmft_port *np = ctrlr->np; 807 uint32_t changes; 808 809 *need_shutdown = false; 810 811 mtx_lock(&ctrlr->lock); 812 813 /* Don't allow any changes while shutting down. */ 814 if (ctrlr->shutdown) { 815 mtx_unlock(&ctrlr->lock); 816 return (false); 817 } 818 819 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) { 820 mtx_unlock(&ctrlr->lock); 821 return (false); 822 } 823 824 changes = ctrlr->cc ^ new_cc; 825 ctrlr->cc = new_cc; 826 827 /* Handle shutdown requests. */ 828 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 && 829 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) { 830 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 831 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING); 832 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 833 ctrlr->shutdown = true; 834 *need_shutdown = true; 835 nvmft_printf(ctrlr, "shutdown requested\n"); 836 } 837 838 if (NVMEV(NVME_CC_REG_EN, changes) != 0) { 839 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) { 840 /* Controller reset. */ 841 nvmft_printf(ctrlr, "reset requested\n"); 842 ctrlr->shutdown = true; 843 *need_shutdown = true; 844 } else 845 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1); 846 } 847 mtx_unlock(&ctrlr->lock); 848 849 return (true); 850 } 851 852 static void 853 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 854 const struct nvmf_fabric_prop_get_cmd *pget) 855 { 856 struct nvmf_fabric_prop_get_rsp rsp; 857 858 nvmft_init_cqe(&rsp, nc, 0); 859 860 switch (le32toh(pget->ofst)) { 861 case NVMF_PROP_CAP: 862 if (pget->attrib.size != NVMF_PROP_SIZE_8) 863 goto error; 864 rsp.value.u64 = htole64(ctrlr->np->cap); 865 break; 866 case NVMF_PROP_VS: 867 if (pget->attrib.size != NVMF_PROP_SIZE_4) 868 goto error; 869 rsp.value.u32.low = ctrlr->cdata.ver; 870 break; 871 case NVMF_PROP_CC: 872 if (pget->attrib.size != NVMF_PROP_SIZE_4) 873 goto error; 874 rsp.value.u32.low = htole32(ctrlr->cc); 875 break; 876 case NVMF_PROP_CSTS: 877 if (pget->attrib.size != NVMF_PROP_SIZE_4) 878 goto error; 879 rsp.value.u32.low = htole32(ctrlr->csts); 880 break; 881 default: 882 goto error; 883 } 884 885 nvmft_send_response(ctrlr->admin, &rsp); 886 return; 887 error: 888 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 889 } 890 891 static void 892 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 893 const struct nvmf_fabric_prop_set_cmd *pset) 894 { 895 bool need_shutdown; 896 897 need_shutdown = false; 898 switch (le32toh(pset->ofst)) { 899 case NVMF_PROP_CC: 900 if (pset->attrib.size != NVMF_PROP_SIZE_4) 901 goto error; 902 if (!update_cc(ctrlr, le32toh(pset->value.u32.low), 903 &need_shutdown)) 904 goto error; 905 break; 906 default: 907 goto error; 908 } 909 910 nvmft_send_success(ctrlr->admin, nc); 911 if (need_shutdown) { 912 callout_stop(&ctrlr->ka_timer); 913 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 914 } 915 return; 916 error: 917 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 918 } 919 920 static void 921 handle_admin_fabrics_command(struct nvmft_controller *ctrlr, 922 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc) 923 { 924 switch (fc->fctype) { 925 case NVMF_FABRIC_COMMAND_PROPERTY_GET: 926 handle_property_get(ctrlr, nc, 927 (const struct nvmf_fabric_prop_get_cmd *)fc); 928 break; 929 case NVMF_FABRIC_COMMAND_PROPERTY_SET: 930 handle_property_set(ctrlr, nc, 931 (const struct nvmf_fabric_prop_set_cmd *)fc); 932 break; 933 case NVMF_FABRIC_COMMAND_CONNECT: 934 nvmft_printf(ctrlr, 935 "CONNECT command on connected admin queue\n"); 936 nvmft_send_generic_error(ctrlr->admin, nc, 937 NVME_SC_COMMAND_SEQUENCE_ERROR); 938 break; 939 case NVMF_FABRIC_COMMAND_DISCONNECT: 940 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n"); 941 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC, 942 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE); 943 break; 944 default: 945 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n", 946 fc->fctype); 947 nvmft_send_generic_error(ctrlr->admin, nc, 948 NVME_SC_INVALID_OPCODE); 949 break; 950 } 951 nvmf_free_capsule(nc); 952 } 953 954 void 955 nvmft_handle_admin_command(struct nvmft_controller *ctrlr, 956 struct nvmf_capsule *nc) 957 { 958 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 959 960 /* Only permit Fabrics commands while a controller is disabled. */ 961 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 && 962 cmd->opc != NVME_OPC_FABRICS_COMMANDS) { 963 nvmft_printf(ctrlr, 964 "Unsupported admin opcode %#x while disabled\n", cmd->opc); 965 nvmft_send_generic_error(ctrlr->admin, nc, 966 NVME_SC_COMMAND_SEQUENCE_ERROR); 967 nvmf_free_capsule(nc); 968 return; 969 } 970 971 atomic_store_int(&ctrlr->ka_active_traffic, 1); 972 973 switch (cmd->opc) { 974 case NVME_OPC_GET_LOG_PAGE: 975 handle_get_log_page(ctrlr, nc, cmd); 976 break; 977 case NVME_OPC_IDENTIFY: 978 handle_identify_command(ctrlr, nc, cmd); 979 break; 980 case NVME_OPC_SET_FEATURES: 981 handle_set_features(ctrlr, nc, cmd); 982 break; 983 case NVME_OPC_ASYNC_EVENT_REQUEST: 984 mtx_lock(&ctrlr->lock); 985 if (ctrlr->aer_pending == NVMFT_NUM_AER) { 986 mtx_unlock(&ctrlr->lock); 987 nvmft_send_error(ctrlr->admin, nc, 988 NVME_SCT_COMMAND_SPECIFIC, 989 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); 990 } else { 991 /* NB: Store the CID without byte-swapping. */ 992 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid; 993 ctrlr->aer_pending++; 994 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER; 995 mtx_unlock(&ctrlr->lock); 996 } 997 nvmf_free_capsule(nc); 998 break; 999 case NVME_OPC_KEEP_ALIVE: 1000 nvmft_send_success(ctrlr->admin, nc); 1001 nvmf_free_capsule(nc); 1002 break; 1003 case NVME_OPC_FABRICS_COMMANDS: 1004 handle_admin_fabrics_command(ctrlr, nc, 1005 (const struct nvmf_fabric_cmd *)cmd); 1006 break; 1007 default: 1008 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc); 1009 nvmft_send_generic_error(ctrlr->admin, nc, 1010 NVME_SC_INVALID_OPCODE); 1011 nvmf_free_capsule(nc); 1012 break; 1013 } 1014 } 1015 1016 void 1017 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid, 1018 struct nvmf_capsule *nc) 1019 { 1020 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 1021 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 1022 1023 atomic_store_int(&ctrlr->ka_active_traffic, 1); 1024 1025 switch (cmd->opc) { 1026 case NVME_OPC_FLUSH: 1027 if (cmd->nsid == htole32(0xffffffff)) { 1028 nvmft_send_generic_error(qp, nc, 1029 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 1030 nvmf_free_capsule(nc); 1031 break; 1032 } 1033 /* FALLTHROUGH */ 1034 case NVME_OPC_WRITE: 1035 case NVME_OPC_READ: 1036 case NVME_OPC_WRITE_UNCORRECTABLE: 1037 case NVME_OPC_COMPARE: 1038 case NVME_OPC_WRITE_ZEROES: 1039 case NVME_OPC_DATASET_MANAGEMENT: 1040 case NVME_OPC_VERIFY: 1041 nvmft_dispatch_command(qp, nc, false); 1042 break; 1043 default: 1044 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc); 1045 nvmft_send_generic_error(qp, nc, 1046 NVME_SC_INVALID_OPCODE); 1047 nvmf_free_capsule(nc); 1048 break; 1049 } 1050 } 1051 1052 static void 1053 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask, 1054 u_int type, uint8_t info, uint8_t log_page_id) 1055 { 1056 struct nvme_completion cpl; 1057 1058 MPASS(type <= 7); 1059 1060 /* Drop events that are not enabled. */ 1061 mtx_lock(&ctrlr->lock); 1062 if ((ctrlr->aer_mask & aer_mask) == 0) { 1063 mtx_unlock(&ctrlr->lock); 1064 return; 1065 } 1066 1067 /* 1068 * If there is no pending AER command, drop it. 1069 * XXX: Should we queue these? 1070 */ 1071 if (ctrlr->aer_pending == 0) { 1072 mtx_unlock(&ctrlr->lock); 1073 nvmft_printf(ctrlr, 1074 "dropping AER type %u, info %#x, page %#x\n", 1075 type, info, log_page_id); 1076 return; 1077 } 1078 1079 memset(&cpl, 0, sizeof(cpl)); 1080 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx]; 1081 ctrlr->aer_pending--; 1082 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER; 1083 mtx_unlock(&ctrlr->lock); 1084 1085 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) | 1086 NVMEF(NVME_ASYNC_EVENT_INFO, info) | 1087 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id)); 1088 1089 nvmft_send_response(ctrlr->admin, &cpl); 1090 } 1091 1092 void 1093 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id) 1094 { 1095 struct nvme_ns_list *nslist; 1096 uint32_t new_nsid, nsid; 1097 u_int i; 1098 1099 new_nsid = lun_id + 1; 1100 1101 mtx_lock(&ctrlr->lock); 1102 nslist = ctrlr->changed_ns; 1103 1104 /* If the first entry is 0xffffffff, the list is already full. */ 1105 if (nslist->ns[0] != 0xffffffff) { 1106 /* Find the insertion point for this namespace ID. */ 1107 for (i = 0; i < nitems(nslist->ns); i++) { 1108 nsid = le32toh(nslist->ns[i]); 1109 if (nsid == new_nsid) { 1110 /* Already reported, nothing to do. */ 1111 mtx_unlock(&ctrlr->lock); 1112 return; 1113 } 1114 1115 if (nsid == 0 || nsid > new_nsid) 1116 break; 1117 } 1118 1119 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) { 1120 /* List is full. */ 1121 memset(ctrlr->changed_ns, 0, 1122 sizeof(*ctrlr->changed_ns)); 1123 ctrlr->changed_ns->ns[0] = 0xffffffff; 1124 } else if (nslist->ns[i] == htole32(0)) { 1125 /* 1126 * Optimize case where this ID is appended to 1127 * the end. 1128 */ 1129 nslist->ns[i] = htole32(new_nsid); 1130 } else { 1131 memmove(&nslist->ns[i + 1], &nslist->ns[i], 1132 (nitems(nslist->ns) - i - 1) * 1133 sizeof(nslist->ns[0])); 1134 nslist->ns[i] = htole32(new_nsid); 1135 } 1136 } 1137 1138 if (ctrlr->changed_ns_reported) { 1139 mtx_unlock(&ctrlr->lock); 1140 return; 1141 } 1142 ctrlr->changed_ns_reported = true; 1143 mtx_unlock(&ctrlr->lock); 1144 1145 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0, 1146 NVME_LOG_CHANGED_NAMESPACE); 1147 } 1148