1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/callout.h> 10 #include <sys/kernel.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/memdesc.h> 15 #include <sys/mutex.h> 16 #include <sys/sbuf.h> 17 #include <sys/taskqueue.h> 18 19 #include <dev/nvmf/nvmf_transport.h> 20 #include <dev/nvmf/controller/nvmft_subr.h> 21 #include <dev/nvmf/controller/nvmft_var.h> 22 23 static void nvmft_controller_shutdown(void *arg, int pending); 24 static void nvmft_controller_terminate(void *arg, int pending); 25 26 int 27 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...) 28 { 29 char buf[128]; 30 struct sbuf sb; 31 va_list ap; 32 size_t retval; 33 34 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 35 sbuf_set_drain(&sb, sbuf_printf_drain, &retval); 36 37 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid); 38 39 va_start(ap, fmt); 40 sbuf_vprintf(&sb, fmt, ap); 41 va_end(ap); 42 43 sbuf_finish(&sb); 44 sbuf_delete(&sb); 45 46 return (retval); 47 } 48 49 static struct nvmft_controller * 50 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid, 51 const struct nvmf_fabric_connect_data *data) 52 { 53 struct nvmft_controller *ctrlr; 54 55 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO); 56 ctrlr->cntlid = cntlid; 57 ctrlr->np = np; 58 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF); 59 callout_init(&ctrlr->ka_timer, 1); 60 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr); 61 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0, 62 nvmft_controller_terminate, ctrlr); 63 64 ctrlr->cdata = np->cdata; 65 ctrlr->cdata.ctrlr_id = htole16(cntlid); 66 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)); 67 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)); 68 ctrlr->hip.power_cycles[0] = 1; 69 ctrlr->create_time = sbinuptime(); 70 71 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT, 72 M_WAITOK | M_ZERO); 73 74 return (ctrlr); 75 } 76 77 static void 78 nvmft_controller_free(struct nvmft_controller *ctrlr) 79 { 80 mtx_destroy(&ctrlr->lock); 81 MPASS(ctrlr->io_qpairs == NULL); 82 free(ctrlr->changed_ns, M_NVMFT); 83 free(ctrlr, M_NVMFT); 84 } 85 86 static void 87 nvmft_keep_alive_timer(void *arg) 88 { 89 struct nvmft_controller *ctrlr = arg; 90 int traffic; 91 92 if (ctrlr->shutdown) 93 return; 94 95 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic); 96 if (traffic == 0) { 97 nvmft_printf(ctrlr, 98 "disconnecting due to KeepAlive timeout\n"); 99 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT); 100 return; 101 } 102 103 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK); 104 } 105 106 int 107 nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype, 108 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, 109 const struct nvmf_fabric_connect_data *data) 110 { 111 struct nvmft_controller *ctrlr; 112 struct nvmft_qpair *qp; 113 uint32_t kato; 114 int cntlid; 115 116 if (cmd->qid != htole16(0)) 117 return (EINVAL); 118 119 qp = nvmft_qpair_init(trtype, params, 0, "admin queue"); 120 if (qp == NULL) { 121 printf("NVMFT: Failed to setup admin queue from %.*s\n", 122 (int)sizeof(data->hostnqn), data->hostnqn); 123 return (ENXIO); 124 } 125 126 mtx_lock(&np->lock); 127 cntlid = alloc_unr(np->ids); 128 if (cntlid == -1) { 129 mtx_unlock(&np->lock); 130 printf("NVMFT: Unable to allocate controller for %.*s\n", 131 (int)sizeof(data->hostnqn), data->hostnqn); 132 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC, 133 NVMF_FABRIC_SC_INVALID_HOST); 134 nvmft_qpair_destroy(qp); 135 return (ENOMEM); 136 } 137 138 #ifdef INVARIANTS 139 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 140 KASSERT(ctrlr->cntlid != cntlid, 141 ("%s: duplicate controllers with id %d", __func__, cntlid)); 142 } 143 #endif 144 mtx_unlock(&np->lock); 145 146 ctrlr = nvmft_controller_alloc(np, cntlid, data); 147 148 mtx_lock(&np->lock); 149 if (!np->online) { 150 mtx_unlock(&np->lock); 151 nvmft_controller_free(ctrlr); 152 free_unr(np->ids, cntlid); 153 nvmft_qpair_destroy(qp); 154 return (ENXIO); 155 } 156 nvmft_port_ref(np); 157 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link); 158 159 nvmft_printf(ctrlr, "associated with %.*s\n", 160 (int)sizeof(data->hostnqn), data->hostnqn); 161 ctrlr->admin = qp; 162 ctrlr->trtype = trtype; 163 164 /* 165 * The spec requires a non-zero KeepAlive timer, but allow a 166 * zero KATO value to match Linux. 167 */ 168 kato = le32toh(cmd->kato); 169 if (kato != 0) { 170 /* 171 * Round up to 1 second matching granularity 172 * advertised in cdata. 173 */ 174 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000)); 175 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 176 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK); 177 } 178 mtx_unlock(&np->lock); 179 180 nvmft_finish_accept(qp, cmd, ctrlr); 181 182 return (0); 183 } 184 185 int 186 nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype, 187 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, 188 const struct nvmf_fabric_connect_data *data) 189 { 190 struct nvmft_controller *ctrlr; 191 struct nvmft_qpair *qp; 192 char name[16]; 193 uint16_t cntlid, qid; 194 195 qid = le16toh(cmd->qid); 196 if (qid == 0) 197 return (EINVAL); 198 cntlid = le16toh(data->cntlid); 199 200 snprintf(name, sizeof(name), "I/O queue %u", qid); 201 qp = nvmft_qpair_init(trtype, params, qid, name); 202 if (qp == NULL) { 203 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid, 204 (int)sizeof(data->hostnqn), data->hostnqn); 205 return (ENXIO); 206 } 207 208 mtx_lock(&np->lock); 209 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 210 if (ctrlr->cntlid == cntlid) 211 break; 212 } 213 if (ctrlr == NULL) { 214 mtx_unlock(&np->lock); 215 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n", 216 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn), 217 data->hostnqn); 218 nvmft_connect_invalid_parameters(qp, cmd, true, 219 offsetof(struct nvmf_fabric_connect_data, cntlid)); 220 nvmft_qpair_destroy(qp); 221 return (ENOENT); 222 } 223 224 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) { 225 mtx_unlock(&np->lock); 226 nvmft_printf(ctrlr, 227 "hostid mismatch for I/O queue %u from %.*s\n", qid, 228 (int)sizeof(data->hostnqn), data->hostnqn); 229 nvmft_connect_invalid_parameters(qp, cmd, true, 230 offsetof(struct nvmf_fabric_connect_data, hostid)); 231 nvmft_qpair_destroy(qp); 232 return (EINVAL); 233 } 234 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) { 235 mtx_unlock(&np->lock); 236 nvmft_printf(ctrlr, 237 "hostnqn mismatch for I/O queue %u from %.*s\n", qid, 238 (int)sizeof(data->hostnqn), data->hostnqn); 239 nvmft_connect_invalid_parameters(qp, cmd, true, 240 offsetof(struct nvmf_fabric_connect_data, hostnqn)); 241 nvmft_qpair_destroy(qp); 242 return (EINVAL); 243 } 244 245 /* XXX: Require trtype == ctrlr->trtype? */ 246 247 mtx_lock(&ctrlr->lock); 248 if (ctrlr->shutdown) { 249 mtx_unlock(&ctrlr->lock); 250 mtx_unlock(&np->lock); 251 nvmft_printf(ctrlr, 252 "attempt to create I/O queue %u on disabled controller from %.*s\n", 253 qid, (int)sizeof(data->hostnqn), data->hostnqn); 254 nvmft_connect_invalid_parameters(qp, cmd, true, 255 offsetof(struct nvmf_fabric_connect_data, cntlid)); 256 nvmft_qpair_destroy(qp); 257 return (EINVAL); 258 } 259 if (ctrlr->num_io_queues == 0) { 260 mtx_unlock(&ctrlr->lock); 261 mtx_unlock(&np->lock); 262 nvmft_printf(ctrlr, 263 "attempt to create I/O queue %u without enabled queues from %.*s\n", 264 qid, (int)sizeof(data->hostnqn), data->hostnqn); 265 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 266 NVME_SC_COMMAND_SEQUENCE_ERROR); 267 nvmft_qpair_destroy(qp); 268 return (EINVAL); 269 } 270 if (cmd->qid > ctrlr->num_io_queues) { 271 mtx_unlock(&ctrlr->lock); 272 mtx_unlock(&np->lock); 273 nvmft_printf(ctrlr, 274 "attempt to create invalid I/O queue %u from %.*s\n", qid, 275 (int)sizeof(data->hostnqn), data->hostnqn); 276 nvmft_connect_invalid_parameters(qp, cmd, false, 277 offsetof(struct nvmf_fabric_connect_cmd, qid)); 278 nvmft_qpair_destroy(qp); 279 return (EINVAL); 280 } 281 if (ctrlr->io_qpairs[qid - 1].qp != NULL) { 282 mtx_unlock(&ctrlr->lock); 283 mtx_unlock(&np->lock); 284 nvmft_printf(ctrlr, 285 "attempt to re-create I/O queue %u from %.*s\n", qid, 286 (int)sizeof(data->hostnqn), data->hostnqn); 287 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 288 NVME_SC_COMMAND_SEQUENCE_ERROR); 289 nvmft_qpair_destroy(qp); 290 return (EINVAL); 291 } 292 293 ctrlr->io_qpairs[qid - 1].qp = qp; 294 mtx_unlock(&ctrlr->lock); 295 mtx_unlock(&np->lock); 296 nvmft_finish_accept(qp, cmd, ctrlr); 297 298 return (0); 299 } 300 301 static void 302 nvmft_controller_shutdown(void *arg, int pending) 303 { 304 struct nvmft_controller *ctrlr = arg; 305 306 MPASS(pending == 1); 307 308 /* 309 * Shutdown all I/O queues to terminate pending datamoves and 310 * stop receiving new commands. 311 */ 312 mtx_lock(&ctrlr->lock); 313 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 314 if (ctrlr->io_qpairs[i].qp != NULL) { 315 ctrlr->io_qpairs[i].shutdown = true; 316 mtx_unlock(&ctrlr->lock); 317 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp); 318 mtx_lock(&ctrlr->lock); 319 } 320 } 321 mtx_unlock(&ctrlr->lock); 322 323 /* Terminate active CTL commands. */ 324 nvmft_terminate_commands(ctrlr); 325 326 /* Wait for all pending CTL commands to complete. */ 327 mtx_lock(&ctrlr->lock); 328 while (ctrlr->pending_commands != 0) 329 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh", 330 hz / 100); 331 mtx_unlock(&ctrlr->lock); 332 333 /* Delete all of the I/O queues. */ 334 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 335 if (ctrlr->io_qpairs[i].qp != NULL) 336 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp); 337 } 338 free(ctrlr->io_qpairs, M_NVMFT); 339 ctrlr->io_qpairs = NULL; 340 341 mtx_lock(&ctrlr->lock); 342 ctrlr->num_io_queues = 0; 343 344 /* Mark shutdown complete. */ 345 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) { 346 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 347 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE); 348 } 349 350 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) { 351 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY); 352 ctrlr->shutdown = false; 353 } 354 mtx_unlock(&ctrlr->lock); 355 356 /* 357 * If the admin queue was closed while shutting down or a 358 * fatal controller error has occurred, terminate the 359 * association immediately, otherwise wait up to 2 minutes 360 * (NVMe-over-Fabrics 1.1 4.6). 361 */ 362 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0) 363 nvmft_controller_terminate(ctrlr, 0); 364 else 365 taskqueue_enqueue_timeout(taskqueue_thread, 366 &ctrlr->terminate_task, hz * 60 * 2); 367 } 368 369 static void 370 nvmft_controller_terminate(void *arg, int pending) 371 { 372 struct nvmft_controller *ctrlr = arg; 373 struct nvmft_port *np; 374 bool wakeup_np; 375 376 /* If the controller has been re-enabled, nothing to do. */ 377 mtx_lock(&ctrlr->lock); 378 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) { 379 mtx_unlock(&ctrlr->lock); 380 381 if (ctrlr->ka_sbt != 0) 382 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 383 C_HARDCLOCK); 384 return; 385 } 386 387 /* Disable updates to CC while destroying admin qpair. */ 388 ctrlr->shutdown = true; 389 mtx_unlock(&ctrlr->lock); 390 391 nvmft_qpair_destroy(ctrlr->admin); 392 393 /* Remove association (CNTLID). */ 394 np = ctrlr->np; 395 mtx_lock(&np->lock); 396 TAILQ_REMOVE(&np->controllers, ctrlr, link); 397 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers)); 398 mtx_unlock(&np->lock); 399 free_unr(np->ids, ctrlr->cntlid); 400 if (wakeup_np) 401 wakeup(np); 402 403 callout_drain(&ctrlr->ka_timer); 404 405 nvmft_printf(ctrlr, "association terminated\n"); 406 nvmft_controller_free(ctrlr); 407 nvmft_port_rele(np); 408 } 409 410 void 411 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp, 412 int error) 413 { 414 /* 415 * If a queue pair is closed, that isn't an error per se. 416 * That just means additional commands cannot be received on 417 * that queue pair. 418 * 419 * If the admin queue pair is closed while idle or while 420 * shutting down, terminate the association immediately. 421 * 422 * If an I/O queue pair is closed, just ignore it. 423 */ 424 if (error == 0) { 425 if (qp != ctrlr->admin) 426 return; 427 428 mtx_lock(&ctrlr->lock); 429 if (ctrlr->shutdown) { 430 ctrlr->admin_closed = true; 431 mtx_unlock(&ctrlr->lock); 432 return; 433 } 434 435 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) { 436 MPASS(ctrlr->num_io_queues == 0); 437 mtx_unlock(&ctrlr->lock); 438 439 /* 440 * Ok to drop lock here since ctrlr->cc can't 441 * change if the admin queue pair has closed. 442 * This also means no new queues can be handed 443 * off, etc. Note that since there are no I/O 444 * queues, only the admin queue needs to be 445 * destroyed, so it is safe to skip 446 * nvmft_controller_shutdown and just schedule 447 * nvmft_controller_terminate. Note that we 448 * cannot call nvmft_controller_terminate from 449 * here directly as this is called from the 450 * transport layer and freeing the admin qpair 451 * might deadlock waiting for the current 452 * thread to exit. 453 */ 454 if (taskqueue_cancel_timeout(taskqueue_thread, 455 &ctrlr->terminate_task, NULL) == 0) 456 taskqueue_enqueue_timeout(taskqueue_thread, 457 &ctrlr->terminate_task, 0); 458 return; 459 } 460 461 /* 462 * Treat closing of the admin queue pair while enabled 463 * as a transport error. Note that the admin queue 464 * pair has been closed. 465 */ 466 ctrlr->admin_closed = true; 467 } else 468 mtx_lock(&ctrlr->lock); 469 470 /* Ignore transport errors if we are already shutting down. */ 471 if (ctrlr->shutdown) { 472 mtx_unlock(&ctrlr->lock); 473 return; 474 } 475 476 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1); 477 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 478 ctrlr->shutdown = true; 479 mtx_unlock(&ctrlr->lock); 480 481 callout_stop(&ctrlr->ka_timer); 482 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 483 } 484 485 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */ 486 static struct mbuf * 487 m_getml(size_t len, int how) 488 { 489 struct mbuf *m, *n; 490 491 m = m_getm2(NULL, len, how, MT_DATA, 0); 492 if (m == NULL) 493 return (NULL); 494 for (n = m; len > 0; n = n->m_next) { 495 n->m_len = M_SIZE(n); 496 if (n->m_len >= len) { 497 n->m_len = len; 498 MPASS(n->m_next == NULL); 499 } 500 len -= n->m_len; 501 } 502 return (m); 503 } 504 505 static void 506 m_zero(struct mbuf *m, u_int offset, u_int len) 507 { 508 u_int todo; 509 510 if (len == 0) 511 return; 512 513 while (m->m_len <= offset) { 514 offset -= m->m_len; 515 m = m->m_next; 516 } 517 518 todo = m->m_len - offset; 519 if (todo > len) 520 todo = len; 521 memset(mtodo(m, offset), 0, todo); 522 m = m->m_next; 523 len -= todo; 524 525 while (len > 0) { 526 todo = m->m_len; 527 if (todo > len) 528 todo = len; 529 memset(mtod(m, void *), 0, todo); 530 m = m->m_next; 531 len -= todo; 532 } 533 } 534 535 static void 536 handle_get_log_page(struct nvmft_controller *ctrlr, 537 struct nvmf_capsule *nc, const struct nvme_command *cmd) 538 { 539 struct mbuf *m; 540 uint64_t offset; 541 uint32_t numd; 542 size_t len, todo; 543 u_int status; 544 uint8_t lid; 545 bool rae; 546 547 lid = le32toh(cmd->cdw10) & 0xff; 548 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0; 549 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 550 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 551 552 if (offset % 3 != 0) { 553 status = NVME_SC_INVALID_FIELD; 554 goto done; 555 } 556 557 len = (numd + 1) * 4; 558 559 switch (lid) { 560 case NVME_LOG_ERROR: 561 todo = 0; 562 563 m = m_getml(len, M_WAITOK); 564 if (todo != len) 565 m_zero(m, todo, len - todo); 566 status = nvmf_send_controller_data(nc, 0, m, len); 567 MPASS(status != NVMF_MORE); 568 break; 569 case NVME_LOG_HEALTH_INFORMATION: 570 { 571 struct nvme_health_information_page hip; 572 573 if (offset >= sizeof(hip)) { 574 status = NVME_SC_INVALID_FIELD; 575 goto done; 576 } 577 todo = sizeof(hip) - offset; 578 if (todo > len) 579 todo = len; 580 581 mtx_lock(&ctrlr->lock); 582 hip = ctrlr->hip; 583 hip.controller_busy_time[0] = 584 sbintime_getsec(ctrlr->busy_total) / 60; 585 hip.power_on_hours[0] = 586 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600; 587 mtx_unlock(&ctrlr->lock); 588 589 m = m_getml(len, M_WAITOK); 590 m_copyback(m, 0, todo, (char *)&hip + offset); 591 if (todo != len) 592 m_zero(m, todo, len - todo); 593 status = nvmf_send_controller_data(nc, 0, m, len); 594 MPASS(status != NVMF_MORE); 595 break; 596 } 597 case NVME_LOG_FIRMWARE_SLOT: 598 if (offset >= sizeof(ctrlr->np->fp)) { 599 status = NVME_SC_INVALID_FIELD; 600 goto done; 601 } 602 todo = sizeof(ctrlr->np->fp) - offset; 603 if (todo > len) 604 todo = len; 605 606 m = m_getml(len, M_WAITOK); 607 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset); 608 if (todo != len) 609 m_zero(m, todo, len - todo); 610 status = nvmf_send_controller_data(nc, 0, m, len); 611 MPASS(status != NVMF_MORE); 612 break; 613 case NVME_LOG_CHANGED_NAMESPACE: 614 if (offset >= sizeof(*ctrlr->changed_ns)) { 615 status = NVME_SC_INVALID_FIELD; 616 goto done; 617 } 618 todo = sizeof(*ctrlr->changed_ns) - offset; 619 if (todo > len) 620 todo = len; 621 622 m = m_getml(len, M_WAITOK); 623 mtx_lock(&ctrlr->lock); 624 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset); 625 if (offset == 0 && len == sizeof(*ctrlr->changed_ns)) 626 memset(ctrlr->changed_ns, 0, 627 sizeof(*ctrlr->changed_ns)); 628 if (!rae) 629 ctrlr->changed_ns_reported = false; 630 mtx_unlock(&ctrlr->lock); 631 if (todo != len) 632 m_zero(m, todo, len - todo); 633 status = nvmf_send_controller_data(nc, 0, m, len); 634 MPASS(status != NVMF_MORE); 635 break; 636 default: 637 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n", 638 lid); 639 status = NVME_SC_INVALID_FIELD; 640 break; 641 } 642 643 done: 644 if (status == NVMF_SUCCESS_SENT) 645 nvmft_command_completed(ctrlr->admin, nc); 646 else 647 nvmft_send_generic_error(ctrlr->admin, nc, status); 648 nvmf_free_capsule(nc); 649 } 650 651 static void 652 m_free_nslist(struct mbuf *m) 653 { 654 free(m->m_ext.ext_arg1, M_NVMFT); 655 } 656 657 static void 658 handle_identify_command(struct nvmft_controller *ctrlr, 659 struct nvmf_capsule *nc, const struct nvme_command *cmd) 660 { 661 struct mbuf *m; 662 size_t data_len; 663 u_int status; 664 uint8_t cns; 665 666 cns = le32toh(cmd->cdw10) & 0xFF; 667 data_len = nvmf_capsule_data_len(nc); 668 if (data_len != sizeof(ctrlr->cdata)) { 669 nvmft_printf(ctrlr, 670 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len, 671 cns); 672 nvmft_send_generic_error(ctrlr->admin, nc, 673 NVME_SC_INVALID_OPCODE); 674 nvmf_free_capsule(nc); 675 return; 676 } 677 678 switch (cns) { 679 case 0: /* Namespace data. */ 680 case 3: /* Namespace Identification Descriptor list. */ 681 nvmft_dispatch_command(ctrlr->admin, nc, true); 682 return; 683 case 1: 684 /* Controller data. */ 685 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK); 686 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata); 687 status = nvmf_send_controller_data(nc, 0, m, 688 sizeof(ctrlr->cdata)); 689 MPASS(status != NVMF_MORE); 690 break; 691 case 2: 692 { 693 /* Active namespace list. */ 694 struct nvme_ns_list *nslist; 695 uint32_t nsid; 696 697 nsid = le32toh(cmd->nsid); 698 if (nsid >= 0xfffffffe) { 699 status = NVME_SC_INVALID_FIELD; 700 break; 701 } 702 703 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO); 704 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist); 705 m = m_get(M_WAITOK, MT_DATA); 706 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist, 707 nslist, NULL, 0, EXT_CTL); 708 m->m_len = sizeof(*nslist); 709 status = nvmf_send_controller_data(nc, 0, m, m->m_len); 710 MPASS(status != NVMF_MORE); 711 break; 712 } 713 default: 714 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns); 715 status = NVME_SC_INVALID_FIELD; 716 break; 717 } 718 719 if (status == NVMF_SUCCESS_SENT) 720 nvmft_command_completed(ctrlr->admin, nc); 721 else 722 nvmft_send_generic_error(ctrlr->admin, nc, status); 723 nvmf_free_capsule(nc); 724 } 725 726 static void 727 handle_set_features(struct nvmft_controller *ctrlr, 728 struct nvmf_capsule *nc, const struct nvme_command *cmd) 729 { 730 struct nvme_completion cqe; 731 uint8_t fid; 732 733 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 734 switch (fid) { 735 case NVME_FEAT_NUMBER_OF_QUEUES: 736 { 737 uint32_t num_queues; 738 struct nvmft_io_qpair *io_qpairs; 739 740 num_queues = le32toh(cmd->cdw11) & 0xffff; 741 742 /* 5.12.1.7: 65535 is invalid. */ 743 if (num_queues == 65535) 744 goto error; 745 746 /* Fabrics requires the same number of SQs and CQs. */ 747 if (le32toh(cmd->cdw11) >> 16 != num_queues) 748 goto error; 749 750 /* Convert to 1's based */ 751 num_queues++; 752 753 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs), 754 M_NVMFT, M_WAITOK | M_ZERO); 755 756 mtx_lock(&ctrlr->lock); 757 if (ctrlr->num_io_queues != 0) { 758 mtx_unlock(&ctrlr->lock); 759 free(io_qpairs, M_NVMFT); 760 nvmft_send_generic_error(ctrlr->admin, nc, 761 NVME_SC_COMMAND_SEQUENCE_ERROR); 762 nvmf_free_capsule(nc); 763 return; 764 } 765 766 ctrlr->num_io_queues = num_queues; 767 ctrlr->io_qpairs = io_qpairs; 768 mtx_unlock(&ctrlr->lock); 769 770 nvmft_init_cqe(&cqe, nc, 0); 771 cqe.cdw0 = cmd->cdw11; 772 nvmft_send_response(ctrlr->admin, &cqe); 773 nvmf_free_capsule(nc); 774 return; 775 } 776 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 777 { 778 uint32_t aer_mask; 779 780 aer_mask = le32toh(cmd->cdw11); 781 782 /* Check for any reserved or unimplemented feature bits. */ 783 if ((aer_mask & 0xffffc000) != 0) 784 goto error; 785 786 mtx_lock(&ctrlr->lock); 787 ctrlr->aer_mask = aer_mask; 788 mtx_unlock(&ctrlr->lock); 789 nvmft_send_success(ctrlr->admin, nc); 790 nvmf_free_capsule(nc); 791 return; 792 } 793 default: 794 nvmft_printf(ctrlr, 795 "Unsupported feature ID %u for SET_FEATURES\n", fid); 796 goto error; 797 } 798 799 error: 800 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 801 nvmf_free_capsule(nc); 802 } 803 804 static bool 805 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown) 806 { 807 struct nvmft_port *np = ctrlr->np; 808 uint32_t changes; 809 810 *need_shutdown = false; 811 812 mtx_lock(&ctrlr->lock); 813 814 /* Don't allow any changes while shutting down. */ 815 if (ctrlr->shutdown) { 816 mtx_unlock(&ctrlr->lock); 817 return (false); 818 } 819 820 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) { 821 mtx_unlock(&ctrlr->lock); 822 return (false); 823 } 824 825 changes = ctrlr->cc ^ new_cc; 826 ctrlr->cc = new_cc; 827 828 /* Handle shutdown requests. */ 829 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 && 830 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) { 831 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 832 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING); 833 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 834 ctrlr->shutdown = true; 835 *need_shutdown = true; 836 nvmft_printf(ctrlr, "shutdown requested\n"); 837 } 838 839 if (NVMEV(NVME_CC_REG_EN, changes) != 0) { 840 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) { 841 /* Controller reset. */ 842 nvmft_printf(ctrlr, "reset requested\n"); 843 ctrlr->shutdown = true; 844 *need_shutdown = true; 845 } else 846 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1); 847 } 848 mtx_unlock(&ctrlr->lock); 849 850 return (true); 851 } 852 853 static void 854 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 855 const struct nvmf_fabric_prop_get_cmd *pget) 856 { 857 struct nvmf_fabric_prop_get_rsp rsp; 858 859 nvmft_init_cqe(&rsp, nc, 0); 860 861 switch (le32toh(pget->ofst)) { 862 case NVMF_PROP_CAP: 863 if (pget->attrib.size != NVMF_PROP_SIZE_8) 864 goto error; 865 rsp.value.u64 = htole64(ctrlr->np->cap); 866 break; 867 case NVMF_PROP_VS: 868 if (pget->attrib.size != NVMF_PROP_SIZE_4) 869 goto error; 870 rsp.value.u32.low = ctrlr->cdata.ver; 871 break; 872 case NVMF_PROP_CC: 873 if (pget->attrib.size != NVMF_PROP_SIZE_4) 874 goto error; 875 rsp.value.u32.low = htole32(ctrlr->cc); 876 break; 877 case NVMF_PROP_CSTS: 878 if (pget->attrib.size != NVMF_PROP_SIZE_4) 879 goto error; 880 rsp.value.u32.low = htole32(ctrlr->csts); 881 break; 882 default: 883 goto error; 884 } 885 886 nvmft_send_response(ctrlr->admin, &rsp); 887 return; 888 error: 889 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 890 } 891 892 static void 893 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 894 const struct nvmf_fabric_prop_set_cmd *pset) 895 { 896 bool need_shutdown; 897 898 need_shutdown = false; 899 switch (le32toh(pset->ofst)) { 900 case NVMF_PROP_CC: 901 if (pset->attrib.size != NVMF_PROP_SIZE_4) 902 goto error; 903 if (!update_cc(ctrlr, le32toh(pset->value.u32.low), 904 &need_shutdown)) 905 goto error; 906 break; 907 default: 908 goto error; 909 } 910 911 nvmft_send_success(ctrlr->admin, nc); 912 if (need_shutdown) { 913 callout_stop(&ctrlr->ka_timer); 914 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 915 } 916 return; 917 error: 918 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 919 } 920 921 static void 922 handle_admin_fabrics_command(struct nvmft_controller *ctrlr, 923 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc) 924 { 925 switch (fc->fctype) { 926 case NVMF_FABRIC_COMMAND_PROPERTY_GET: 927 handle_property_get(ctrlr, nc, 928 (const struct nvmf_fabric_prop_get_cmd *)fc); 929 break; 930 case NVMF_FABRIC_COMMAND_PROPERTY_SET: 931 handle_property_set(ctrlr, nc, 932 (const struct nvmf_fabric_prop_set_cmd *)fc); 933 break; 934 case NVMF_FABRIC_COMMAND_CONNECT: 935 nvmft_printf(ctrlr, 936 "CONNECT command on connected admin queue\n"); 937 nvmft_send_generic_error(ctrlr->admin, nc, 938 NVME_SC_COMMAND_SEQUENCE_ERROR); 939 break; 940 case NVMF_FABRIC_COMMAND_DISCONNECT: 941 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n"); 942 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC, 943 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE); 944 break; 945 default: 946 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n", 947 fc->fctype); 948 nvmft_send_generic_error(ctrlr->admin, nc, 949 NVME_SC_INVALID_OPCODE); 950 break; 951 } 952 nvmf_free_capsule(nc); 953 } 954 955 void 956 nvmft_handle_admin_command(struct nvmft_controller *ctrlr, 957 struct nvmf_capsule *nc) 958 { 959 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 960 961 /* Only permit Fabrics commands while a controller is disabled. */ 962 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 && 963 cmd->opc != NVME_OPC_FABRICS_COMMANDS) { 964 nvmft_printf(ctrlr, 965 "Unsupported admin opcode %#x while disabled\n", cmd->opc); 966 nvmft_send_generic_error(ctrlr->admin, nc, 967 NVME_SC_COMMAND_SEQUENCE_ERROR); 968 nvmf_free_capsule(nc); 969 return; 970 } 971 972 atomic_store_int(&ctrlr->ka_active_traffic, 1); 973 974 switch (cmd->opc) { 975 case NVME_OPC_GET_LOG_PAGE: 976 handle_get_log_page(ctrlr, nc, cmd); 977 break; 978 case NVME_OPC_IDENTIFY: 979 handle_identify_command(ctrlr, nc, cmd); 980 break; 981 case NVME_OPC_SET_FEATURES: 982 handle_set_features(ctrlr, nc, cmd); 983 break; 984 case NVME_OPC_ASYNC_EVENT_REQUEST: 985 mtx_lock(&ctrlr->lock); 986 if (ctrlr->aer_pending == NVMFT_NUM_AER) { 987 mtx_unlock(&ctrlr->lock); 988 nvmft_send_error(ctrlr->admin, nc, 989 NVME_SCT_COMMAND_SPECIFIC, 990 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); 991 } else { 992 /* NB: Store the CID without byte-swapping. */ 993 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid; 994 ctrlr->aer_pending++; 995 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER; 996 mtx_unlock(&ctrlr->lock); 997 } 998 nvmf_free_capsule(nc); 999 break; 1000 case NVME_OPC_KEEP_ALIVE: 1001 nvmft_send_success(ctrlr->admin, nc); 1002 nvmf_free_capsule(nc); 1003 break; 1004 case NVME_OPC_FABRICS_COMMANDS: 1005 handle_admin_fabrics_command(ctrlr, nc, 1006 (const struct nvmf_fabric_cmd *)cmd); 1007 break; 1008 default: 1009 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc); 1010 nvmft_send_generic_error(ctrlr->admin, nc, 1011 NVME_SC_INVALID_OPCODE); 1012 nvmf_free_capsule(nc); 1013 break; 1014 } 1015 } 1016 1017 void 1018 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid, 1019 struct nvmf_capsule *nc) 1020 { 1021 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 1022 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 1023 1024 atomic_store_int(&ctrlr->ka_active_traffic, 1); 1025 1026 switch (cmd->opc) { 1027 case NVME_OPC_FLUSH: 1028 if (cmd->nsid == htole32(0xffffffff)) { 1029 nvmft_send_generic_error(qp, nc, 1030 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 1031 nvmf_free_capsule(nc); 1032 break; 1033 } 1034 /* FALLTHROUGH */ 1035 case NVME_OPC_WRITE: 1036 case NVME_OPC_READ: 1037 case NVME_OPC_WRITE_UNCORRECTABLE: 1038 case NVME_OPC_COMPARE: 1039 case NVME_OPC_WRITE_ZEROES: 1040 case NVME_OPC_DATASET_MANAGEMENT: 1041 case NVME_OPC_VERIFY: 1042 nvmft_dispatch_command(qp, nc, false); 1043 break; 1044 default: 1045 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc); 1046 nvmft_send_generic_error(qp, nc, 1047 NVME_SC_INVALID_OPCODE); 1048 nvmf_free_capsule(nc); 1049 break; 1050 } 1051 } 1052 1053 static void 1054 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask, 1055 u_int type, uint8_t info, uint8_t log_page_id) 1056 { 1057 struct nvme_completion cpl; 1058 1059 MPASS(type <= 7); 1060 1061 /* Drop events that are not enabled. */ 1062 mtx_lock(&ctrlr->lock); 1063 if ((ctrlr->aer_mask & aer_mask) == 0) { 1064 mtx_unlock(&ctrlr->lock); 1065 return; 1066 } 1067 1068 /* 1069 * If there is no pending AER command, drop it. 1070 * XXX: Should we queue these? 1071 */ 1072 if (ctrlr->aer_pending == 0) { 1073 mtx_unlock(&ctrlr->lock); 1074 nvmft_printf(ctrlr, 1075 "dropping AER type %u, info %#x, page %#x\n", 1076 type, info, log_page_id); 1077 return; 1078 } 1079 1080 memset(&cpl, 0, sizeof(cpl)); 1081 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx]; 1082 ctrlr->aer_pending--; 1083 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER; 1084 mtx_unlock(&ctrlr->lock); 1085 1086 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) | 1087 NVMEF(NVME_ASYNC_EVENT_INFO, info) | 1088 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id)); 1089 1090 nvmft_send_response(ctrlr->admin, &cpl); 1091 } 1092 1093 void 1094 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id) 1095 { 1096 struct nvme_ns_list *nslist; 1097 uint32_t new_nsid, nsid; 1098 u_int i; 1099 1100 new_nsid = lun_id + 1; 1101 1102 mtx_lock(&ctrlr->lock); 1103 nslist = ctrlr->changed_ns; 1104 1105 /* If the first entry is 0xffffffff, the list is already full. */ 1106 if (nslist->ns[0] != 0xffffffff) { 1107 /* Find the insertion point for this namespace ID. */ 1108 for (i = 0; i < nitems(nslist->ns); i++) { 1109 nsid = le32toh(nslist->ns[i]); 1110 if (nsid == new_nsid) { 1111 /* Already reported, nothing to do. */ 1112 mtx_unlock(&ctrlr->lock); 1113 return; 1114 } 1115 1116 if (nsid == 0 || nsid > new_nsid) 1117 break; 1118 } 1119 1120 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) { 1121 /* List is full. */ 1122 memset(ctrlr->changed_ns, 0, 1123 sizeof(*ctrlr->changed_ns)); 1124 ctrlr->changed_ns->ns[0] = 0xffffffff; 1125 } else if (nslist->ns[i] == htole32(0)) { 1126 /* 1127 * Optimize case where this ID is appended to 1128 * the end. 1129 */ 1130 nslist->ns[i] = htole32(new_nsid); 1131 } else { 1132 memmove(&nslist->ns[i + 1], &nslist->ns[i], 1133 (nitems(nslist->ns) - i - 1) * 1134 sizeof(nslist->ns[0])); 1135 nslist->ns[i] = htole32(new_nsid); 1136 } 1137 } 1138 1139 if (ctrlr->changed_ns_reported) { 1140 mtx_unlock(&ctrlr->lock); 1141 return; 1142 } 1143 ctrlr->changed_ns_reported = true; 1144 mtx_unlock(&ctrlr->lock); 1145 1146 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0, 1147 NVME_LOG_CHANGED_NAMESPACE); 1148 } 1149