1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/callout.h> 10 #include <sys/kernel.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/memdesc.h> 15 #include <sys/mutex.h> 16 #include <sys/sbuf.h> 17 #include <sys/sx.h> 18 #include <sys/taskqueue.h> 19 20 #include <dev/nvmf/nvmf_transport.h> 21 #include <dev/nvmf/controller/nvmft_subr.h> 22 #include <dev/nvmf/controller/nvmft_var.h> 23 24 static void nvmft_controller_shutdown(void *arg, int pending); 25 static void nvmft_controller_terminate(void *arg, int pending); 26 27 int 28 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...) 29 { 30 char buf[128]; 31 struct sbuf sb; 32 va_list ap; 33 size_t retval; 34 35 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN); 36 sbuf_set_drain(&sb, sbuf_printf_drain, &retval); 37 38 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid); 39 40 va_start(ap, fmt); 41 sbuf_vprintf(&sb, fmt, ap); 42 va_end(ap); 43 44 sbuf_finish(&sb); 45 sbuf_delete(&sb); 46 47 return (retval); 48 } 49 50 static struct nvmft_controller * 51 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid, 52 const struct nvmf_fabric_connect_data *data) 53 { 54 struct nvmft_controller *ctrlr; 55 56 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO); 57 ctrlr->cntlid = cntlid; 58 nvmft_port_ref(np); 59 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link); 60 ctrlr->np = np; 61 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF); 62 callout_init(&ctrlr->ka_timer, 1); 63 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr); 64 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0, 65 nvmft_controller_terminate, ctrlr); 66 67 ctrlr->cdata = np->cdata; 68 ctrlr->cdata.ctrlr_id = htole16(cntlid); 69 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)); 70 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)); 71 ctrlr->hip.power_cycles[0] = 1; 72 ctrlr->create_time = sbinuptime(); 73 74 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT, 75 M_WAITOK | M_ZERO); 76 77 return (ctrlr); 78 } 79 80 static void 81 nvmft_controller_free(struct nvmft_controller *ctrlr) 82 { 83 mtx_destroy(&ctrlr->lock); 84 MPASS(ctrlr->io_qpairs == NULL); 85 free(ctrlr->changed_ns, M_NVMFT); 86 free(ctrlr, M_NVMFT); 87 } 88 89 static void 90 nvmft_keep_alive_timer(void *arg) 91 { 92 struct nvmft_controller *ctrlr = arg; 93 int traffic; 94 95 if (ctrlr->shutdown) 96 return; 97 98 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic); 99 if (traffic == 0) { 100 nvmft_printf(ctrlr, 101 "disconnecting due to KeepAlive timeout\n"); 102 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT); 103 return; 104 } 105 106 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK); 107 } 108 109 int 110 nvmft_handoff_admin_queue(struct nvmft_port *np, 111 const struct nvmf_handoff_controller_qpair *handoff, 112 const struct nvmf_fabric_connect_cmd *cmd, 113 const struct nvmf_fabric_connect_data *data) 114 { 115 struct nvmft_controller *ctrlr; 116 struct nvmft_qpair *qp; 117 uint32_t kato; 118 int cntlid; 119 120 if (cmd->qid != htole16(0)) 121 return (EINVAL); 122 123 qp = nvmft_qpair_init(handoff->trtype, &handoff->params, 0, 124 "admin queue"); 125 if (qp == NULL) { 126 printf("NVMFT: Failed to setup admin queue from %.*s\n", 127 (int)sizeof(data->hostnqn), data->hostnqn); 128 return (ENXIO); 129 } 130 131 sx_xlock(&np->lock); 132 cntlid = alloc_unr(np->ids); 133 if (cntlid == -1) { 134 sx_xunlock(&np->lock); 135 printf("NVMFT: Unable to allocate controller for %.*s\n", 136 (int)sizeof(data->hostnqn), data->hostnqn); 137 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC, 138 NVMF_FABRIC_SC_INVALID_HOST); 139 nvmft_qpair_destroy(qp); 140 return (ENOMEM); 141 } 142 143 #ifdef INVARIANTS 144 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 145 KASSERT(ctrlr->cntlid != cntlid, 146 ("%s: duplicate controllers with id %d", __func__, cntlid)); 147 } 148 #endif 149 150 ctrlr = nvmft_controller_alloc(np, cntlid, data); 151 nvmft_printf(ctrlr, "associated with %.*s\n", 152 (int)sizeof(data->hostnqn), data->hostnqn); 153 ctrlr->admin = qp; 154 ctrlr->trtype = handoff->trtype; 155 156 /* 157 * The spec requires a non-zero KeepAlive timer, but allow a 158 * zero KATO value to match Linux. 159 */ 160 kato = le32toh(cmd->kato); 161 if (kato != 0) { 162 /* 163 * Round up to 1 second matching granularity 164 * advertised in cdata. 165 */ 166 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000)); 167 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 168 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK); 169 } 170 171 nvmft_finish_accept(qp, cmd, ctrlr); 172 sx_xunlock(&np->lock); 173 174 return (0); 175 } 176 177 int 178 nvmft_handoff_io_queue(struct nvmft_port *np, 179 const struct nvmf_handoff_controller_qpair *handoff, 180 const struct nvmf_fabric_connect_cmd *cmd, 181 const struct nvmf_fabric_connect_data *data) 182 { 183 struct nvmft_controller *ctrlr; 184 struct nvmft_qpair *qp; 185 char name[16]; 186 uint16_t cntlid, qid; 187 188 qid = le16toh(cmd->qid); 189 if (qid == 0) 190 return (EINVAL); 191 cntlid = le16toh(data->cntlid); 192 193 snprintf(name, sizeof(name), "I/O queue %u", qid); 194 qp = nvmft_qpair_init(handoff->trtype, &handoff->params, qid, name); 195 if (qp == NULL) { 196 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid, 197 (int)sizeof(data->hostnqn), data->hostnqn); 198 return (ENXIO); 199 } 200 201 sx_slock(&np->lock); 202 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 203 if (ctrlr->cntlid == cntlid) 204 break; 205 } 206 if (ctrlr == NULL) { 207 sx_sunlock(&np->lock); 208 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n", 209 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn), 210 data->hostnqn); 211 nvmft_connect_invalid_parameters(qp, cmd, true, 212 offsetof(struct nvmf_fabric_connect_data, cntlid)); 213 nvmft_qpair_destroy(qp); 214 return (ENOENT); 215 } 216 217 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) { 218 sx_sunlock(&np->lock); 219 nvmft_printf(ctrlr, 220 "hostid mismatch for I/O queue %u from %.*s\n", qid, 221 (int)sizeof(data->hostnqn), data->hostnqn); 222 nvmft_connect_invalid_parameters(qp, cmd, true, 223 offsetof(struct nvmf_fabric_connect_data, hostid)); 224 nvmft_qpair_destroy(qp); 225 return (EINVAL); 226 } 227 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) { 228 sx_sunlock(&np->lock); 229 nvmft_printf(ctrlr, 230 "hostnqn mismatch for I/O queue %u from %.*s\n", qid, 231 (int)sizeof(data->hostnqn), data->hostnqn); 232 nvmft_connect_invalid_parameters(qp, cmd, true, 233 offsetof(struct nvmf_fabric_connect_data, hostnqn)); 234 nvmft_qpair_destroy(qp); 235 return (EINVAL); 236 } 237 238 /* XXX: Require handoff->trtype == ctrlr->trtype? */ 239 240 mtx_lock(&ctrlr->lock); 241 if (ctrlr->shutdown) { 242 mtx_unlock(&ctrlr->lock); 243 sx_sunlock(&np->lock); 244 nvmft_printf(ctrlr, 245 "attempt to create I/O queue %u on disabled controller from %.*s\n", 246 qid, (int)sizeof(data->hostnqn), data->hostnqn); 247 nvmft_connect_invalid_parameters(qp, cmd, true, 248 offsetof(struct nvmf_fabric_connect_data, cntlid)); 249 nvmft_qpair_destroy(qp); 250 return (EINVAL); 251 } 252 if (ctrlr->num_io_queues == 0) { 253 mtx_unlock(&ctrlr->lock); 254 sx_sunlock(&np->lock); 255 nvmft_printf(ctrlr, 256 "attempt to create I/O queue %u without enabled queues from %.*s\n", 257 qid, (int)sizeof(data->hostnqn), data->hostnqn); 258 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 259 NVME_SC_COMMAND_SEQUENCE_ERROR); 260 nvmft_qpair_destroy(qp); 261 return (EINVAL); 262 } 263 if (cmd->qid > ctrlr->num_io_queues) { 264 mtx_unlock(&ctrlr->lock); 265 sx_sunlock(&np->lock); 266 nvmft_printf(ctrlr, 267 "attempt to create invalid I/O queue %u from %.*s\n", qid, 268 (int)sizeof(data->hostnqn), data->hostnqn); 269 nvmft_connect_invalid_parameters(qp, cmd, false, 270 offsetof(struct nvmf_fabric_connect_cmd, qid)); 271 nvmft_qpair_destroy(qp); 272 return (EINVAL); 273 } 274 if (ctrlr->io_qpairs[qid - 1].qp != NULL) { 275 mtx_unlock(&ctrlr->lock); 276 sx_sunlock(&np->lock); 277 nvmft_printf(ctrlr, 278 "attempt to re-create I/O queue %u from %.*s\n", qid, 279 (int)sizeof(data->hostnqn), data->hostnqn); 280 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 281 NVME_SC_COMMAND_SEQUENCE_ERROR); 282 nvmft_qpair_destroy(qp); 283 return (EINVAL); 284 } 285 286 ctrlr->io_qpairs[qid - 1].qp = qp; 287 mtx_unlock(&ctrlr->lock); 288 nvmft_finish_accept(qp, cmd, ctrlr); 289 sx_sunlock(&np->lock); 290 291 return (0); 292 } 293 294 static void 295 nvmft_controller_shutdown(void *arg, int pending) 296 { 297 struct nvmft_controller *ctrlr = arg; 298 299 MPASS(pending == 1); 300 301 /* 302 * Shutdown all I/O queues to terminate pending datamoves and 303 * stop receiving new commands. 304 */ 305 mtx_lock(&ctrlr->lock); 306 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 307 if (ctrlr->io_qpairs[i].qp != NULL) { 308 ctrlr->io_qpairs[i].shutdown = true; 309 mtx_unlock(&ctrlr->lock); 310 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp); 311 mtx_lock(&ctrlr->lock); 312 } 313 } 314 mtx_unlock(&ctrlr->lock); 315 316 /* Terminate active CTL commands. */ 317 nvmft_terminate_commands(ctrlr); 318 319 /* Wait for all pending CTL commands to complete. */ 320 mtx_lock(&ctrlr->lock); 321 while (ctrlr->pending_commands != 0) 322 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh", 323 hz / 100); 324 mtx_unlock(&ctrlr->lock); 325 326 /* Delete all of the I/O queues. */ 327 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 328 if (ctrlr->io_qpairs[i].qp != NULL) 329 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp); 330 } 331 free(ctrlr->io_qpairs, M_NVMFT); 332 ctrlr->io_qpairs = NULL; 333 334 mtx_lock(&ctrlr->lock); 335 ctrlr->num_io_queues = 0; 336 337 /* Mark shutdown complete. */ 338 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) { 339 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 340 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE); 341 } 342 343 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) { 344 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY); 345 ctrlr->shutdown = false; 346 } 347 mtx_unlock(&ctrlr->lock); 348 349 /* 350 * If the admin queue was closed while shutting down or a 351 * fatal controller error has occurred, terminate the 352 * association immediately, otherwise wait up to 2 minutes 353 * (NVMe-over-Fabrics 1.1 4.6). 354 */ 355 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0) 356 nvmft_controller_terminate(ctrlr, 0); 357 else 358 taskqueue_enqueue_timeout(taskqueue_thread, 359 &ctrlr->terminate_task, hz * 60 * 2); 360 } 361 362 static void 363 nvmft_controller_terminate(void *arg, int pending) 364 { 365 struct nvmft_controller *ctrlr = arg; 366 struct nvmft_port *np; 367 bool wakeup_np; 368 369 /* If the controller has been re-enabled, nothing to do. */ 370 mtx_lock(&ctrlr->lock); 371 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) { 372 mtx_unlock(&ctrlr->lock); 373 374 if (ctrlr->ka_sbt != 0) 375 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 376 C_HARDCLOCK); 377 return; 378 } 379 380 /* Disable updates to CC while destroying admin qpair. */ 381 ctrlr->shutdown = true; 382 mtx_unlock(&ctrlr->lock); 383 384 nvmft_qpair_destroy(ctrlr->admin); 385 386 /* Remove association (CNTLID). */ 387 np = ctrlr->np; 388 sx_xlock(&np->lock); 389 TAILQ_REMOVE(&np->controllers, ctrlr, link); 390 free_unr(np->ids, ctrlr->cntlid); 391 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers)); 392 sx_xunlock(&np->lock); 393 if (wakeup_np) 394 wakeup(np); 395 396 callout_drain(&ctrlr->ka_timer); 397 398 nvmft_printf(ctrlr, "association terminated\n"); 399 nvmft_controller_free(ctrlr); 400 nvmft_port_rele(np); 401 } 402 403 void 404 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp, 405 int error) 406 { 407 /* 408 * If a queue pair is closed, that isn't an error per se. 409 * That just means additional commands cannot be received on 410 * that queue pair. 411 * 412 * If the admin queue pair is closed while idle or while 413 * shutting down, terminate the association immediately. 414 * 415 * If an I/O queue pair is closed, just ignore it. 416 */ 417 if (error == 0) { 418 if (qp != ctrlr->admin) 419 return; 420 421 mtx_lock(&ctrlr->lock); 422 if (ctrlr->shutdown) { 423 ctrlr->admin_closed = true; 424 mtx_unlock(&ctrlr->lock); 425 return; 426 } 427 428 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) { 429 MPASS(ctrlr->num_io_queues == 0); 430 mtx_unlock(&ctrlr->lock); 431 432 /* 433 * Ok to drop lock here since ctrlr->cc can't 434 * change if the admin queue pair has closed. 435 * This also means no new queues can be handed 436 * off, etc. Note that since there are no I/O 437 * queues, only the admin queue needs to be 438 * destroyed, so it is safe to skip 439 * nvmft_controller_shutdown and just schedule 440 * nvmft_controller_terminate. Note that we 441 * cannot call nvmft_controller_terminate from 442 * here directly as this is called from the 443 * transport layer and freeing the admin qpair 444 * might deadlock waiting for the current 445 * thread to exit. 446 */ 447 if (taskqueue_cancel_timeout(taskqueue_thread, 448 &ctrlr->terminate_task, NULL) == 0) 449 taskqueue_enqueue_timeout(taskqueue_thread, 450 &ctrlr->terminate_task, 0); 451 return; 452 } 453 454 /* 455 * Treat closing of the admin queue pair while enabled 456 * as a transport error. Note that the admin queue 457 * pair has been closed. 458 */ 459 ctrlr->admin_closed = true; 460 } else 461 mtx_lock(&ctrlr->lock); 462 463 /* Ignore transport errors if we are already shutting down. */ 464 if (ctrlr->shutdown) { 465 mtx_unlock(&ctrlr->lock); 466 return; 467 } 468 469 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1); 470 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 471 ctrlr->shutdown = true; 472 mtx_unlock(&ctrlr->lock); 473 474 callout_stop(&ctrlr->ka_timer); 475 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 476 } 477 478 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */ 479 static struct mbuf * 480 m_getml(size_t len, int how) 481 { 482 struct mbuf *m, *n; 483 484 m = m_getm2(NULL, len, how, MT_DATA, 0); 485 if (m == NULL) 486 return (NULL); 487 for (n = m; len > 0; n = n->m_next) { 488 n->m_len = M_SIZE(n); 489 if (n->m_len >= len) { 490 n->m_len = len; 491 MPASS(n->m_next == NULL); 492 } 493 len -= n->m_len; 494 } 495 return (m); 496 } 497 498 static void 499 m_zero(struct mbuf *m, u_int offset, u_int len) 500 { 501 u_int todo; 502 503 if (len == 0) 504 return; 505 506 while (m->m_len <= offset) { 507 offset -= m->m_len; 508 m = m->m_next; 509 } 510 511 todo = m->m_len - offset; 512 if (todo > len) 513 todo = len; 514 memset(mtodo(m, offset), 0, todo); 515 m = m->m_next; 516 len -= todo; 517 518 while (len > 0) { 519 todo = m->m_len; 520 if (todo > len) 521 todo = len; 522 memset(mtod(m, void *), 0, todo); 523 m = m->m_next; 524 len -= todo; 525 } 526 } 527 528 static void 529 handle_get_log_page(struct nvmft_controller *ctrlr, 530 struct nvmf_capsule *nc, const struct nvme_command *cmd) 531 { 532 struct mbuf *m; 533 uint64_t offset; 534 uint32_t numd; 535 size_t len, todo; 536 u_int status; 537 uint8_t lid; 538 bool rae; 539 540 lid = le32toh(cmd->cdw10) & 0xff; 541 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0; 542 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 543 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 544 545 if (offset % 3 != 0) { 546 status = NVME_SC_INVALID_FIELD; 547 goto done; 548 } 549 550 len = (numd + 1) * 4; 551 552 switch (lid) { 553 case NVME_LOG_ERROR: 554 todo = 0; 555 556 m = m_getml(len, M_WAITOK); 557 if (todo != len) 558 m_zero(m, todo, len - todo); 559 status = nvmf_send_controller_data(nc, 0, m, len); 560 MPASS(status != NVMF_MORE); 561 break; 562 case NVME_LOG_HEALTH_INFORMATION: 563 { 564 struct nvme_health_information_page hip; 565 566 if (offset >= sizeof(hip)) { 567 status = NVME_SC_INVALID_FIELD; 568 goto done; 569 } 570 todo = sizeof(hip) - offset; 571 if (todo > len) 572 todo = len; 573 574 mtx_lock(&ctrlr->lock); 575 hip = ctrlr->hip; 576 hip.controller_busy_time[0] = 577 sbintime_getsec(ctrlr->busy_total) / 60; 578 hip.power_on_hours[0] = 579 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600; 580 mtx_unlock(&ctrlr->lock); 581 582 m = m_getml(len, M_WAITOK); 583 m_copyback(m, 0, todo, (char *)&hip + offset); 584 if (todo != len) 585 m_zero(m, todo, len - todo); 586 status = nvmf_send_controller_data(nc, 0, m, len); 587 MPASS(status != NVMF_MORE); 588 break; 589 } 590 case NVME_LOG_FIRMWARE_SLOT: 591 if (offset >= sizeof(ctrlr->np->fp)) { 592 status = NVME_SC_INVALID_FIELD; 593 goto done; 594 } 595 todo = sizeof(ctrlr->np->fp) - offset; 596 if (todo > len) 597 todo = len; 598 599 m = m_getml(len, M_WAITOK); 600 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset); 601 if (todo != len) 602 m_zero(m, todo, len - todo); 603 status = nvmf_send_controller_data(nc, 0, m, len); 604 MPASS(status != NVMF_MORE); 605 break; 606 case NVME_LOG_CHANGED_NAMESPACE: 607 if (offset >= sizeof(*ctrlr->changed_ns)) { 608 status = NVME_SC_INVALID_FIELD; 609 goto done; 610 } 611 todo = sizeof(*ctrlr->changed_ns) - offset; 612 if (todo > len) 613 todo = len; 614 615 m = m_getml(len, M_WAITOK); 616 mtx_lock(&ctrlr->lock); 617 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset); 618 if (offset == 0 && len == sizeof(*ctrlr->changed_ns)) 619 memset(ctrlr->changed_ns, 0, 620 sizeof(*ctrlr->changed_ns)); 621 if (!rae) 622 ctrlr->changed_ns_reported = false; 623 mtx_unlock(&ctrlr->lock); 624 if (todo != len) 625 m_zero(m, todo, len - todo); 626 status = nvmf_send_controller_data(nc, 0, m, len); 627 MPASS(status != NVMF_MORE); 628 break; 629 default: 630 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n", 631 lid); 632 status = NVME_SC_INVALID_FIELD; 633 break; 634 } 635 636 done: 637 if (status == NVMF_SUCCESS_SENT) 638 nvmft_command_completed(ctrlr->admin, nc); 639 else 640 nvmft_send_generic_error(ctrlr->admin, nc, status); 641 nvmf_free_capsule(nc); 642 } 643 644 static void 645 m_free_nslist(struct mbuf *m) 646 { 647 free(m->m_ext.ext_arg1, M_NVMFT); 648 } 649 650 static void 651 handle_identify_command(struct nvmft_controller *ctrlr, 652 struct nvmf_capsule *nc, const struct nvme_command *cmd) 653 { 654 struct mbuf *m; 655 size_t data_len; 656 u_int status; 657 uint8_t cns; 658 659 cns = le32toh(cmd->cdw10) & 0xFF; 660 data_len = nvmf_capsule_data_len(nc); 661 if (data_len != sizeof(ctrlr->cdata)) { 662 nvmft_printf(ctrlr, 663 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len, 664 cns); 665 nvmft_send_generic_error(ctrlr->admin, nc, 666 NVME_SC_INVALID_OPCODE); 667 nvmf_free_capsule(nc); 668 return; 669 } 670 671 switch (cns) { 672 case 0: /* Namespace data. */ 673 case 3: /* Namespace Identification Descriptor list. */ 674 nvmft_dispatch_command(ctrlr->admin, nc, true); 675 return; 676 case 1: 677 /* Controller data. */ 678 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK); 679 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata); 680 status = nvmf_send_controller_data(nc, 0, m, 681 sizeof(ctrlr->cdata)); 682 MPASS(status != NVMF_MORE); 683 break; 684 case 2: 685 { 686 /* Active namespace list. */ 687 struct nvme_ns_list *nslist; 688 uint32_t nsid; 689 690 nsid = le32toh(cmd->nsid); 691 if (nsid >= 0xfffffffe) { 692 status = NVME_SC_INVALID_FIELD; 693 break; 694 } 695 696 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO); 697 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist); 698 m = m_get(M_WAITOK, MT_DATA); 699 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist, 700 nslist, NULL, 0, EXT_CTL); 701 m->m_len = sizeof(*nslist); 702 status = nvmf_send_controller_data(nc, 0, m, m->m_len); 703 MPASS(status != NVMF_MORE); 704 break; 705 } 706 default: 707 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns); 708 status = NVME_SC_INVALID_FIELD; 709 break; 710 } 711 712 if (status == NVMF_SUCCESS_SENT) 713 nvmft_command_completed(ctrlr->admin, nc); 714 else 715 nvmft_send_generic_error(ctrlr->admin, nc, status); 716 nvmf_free_capsule(nc); 717 } 718 719 static void 720 handle_set_features(struct nvmft_controller *ctrlr, 721 struct nvmf_capsule *nc, const struct nvme_command *cmd) 722 { 723 struct nvme_completion cqe; 724 uint8_t fid; 725 726 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 727 switch (fid) { 728 case NVME_FEAT_NUMBER_OF_QUEUES: 729 { 730 uint32_t num_queues; 731 struct nvmft_io_qpair *io_qpairs; 732 733 num_queues = le32toh(cmd->cdw11) & 0xffff; 734 735 /* 5.12.1.7: 65535 is invalid. */ 736 if (num_queues == 65535) 737 goto error; 738 739 /* Fabrics requires the same number of SQs and CQs. */ 740 if (le32toh(cmd->cdw11) >> 16 != num_queues) 741 goto error; 742 743 /* Convert to 1's based */ 744 num_queues++; 745 746 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs), 747 M_NVMFT, M_WAITOK | M_ZERO); 748 749 mtx_lock(&ctrlr->lock); 750 if (ctrlr->num_io_queues != 0) { 751 mtx_unlock(&ctrlr->lock); 752 free(io_qpairs, M_NVMFT); 753 nvmft_send_generic_error(ctrlr->admin, nc, 754 NVME_SC_COMMAND_SEQUENCE_ERROR); 755 nvmf_free_capsule(nc); 756 return; 757 } 758 759 ctrlr->num_io_queues = num_queues; 760 ctrlr->io_qpairs = io_qpairs; 761 mtx_unlock(&ctrlr->lock); 762 763 nvmft_init_cqe(&cqe, nc, 0); 764 cqe.cdw0 = cmd->cdw11; 765 nvmft_send_response(ctrlr->admin, &cqe); 766 nvmf_free_capsule(nc); 767 return; 768 } 769 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 770 { 771 uint32_t aer_mask; 772 773 aer_mask = le32toh(cmd->cdw11); 774 775 /* Check for any reserved or unimplemented feature bits. */ 776 if ((aer_mask & 0xffffc000) != 0) 777 goto error; 778 779 mtx_lock(&ctrlr->lock); 780 ctrlr->aer_mask = aer_mask; 781 mtx_unlock(&ctrlr->lock); 782 nvmft_send_success(ctrlr->admin, nc); 783 return; 784 } 785 default: 786 nvmft_printf(ctrlr, 787 "Unsupported feature ID %u for SET_FEATURES\n", fid); 788 goto error; 789 } 790 791 error: 792 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 793 nvmf_free_capsule(nc); 794 } 795 796 static bool 797 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown) 798 { 799 struct nvmft_port *np = ctrlr->np; 800 uint32_t changes; 801 802 *need_shutdown = false; 803 804 mtx_lock(&ctrlr->lock); 805 806 /* Don't allow any changes while shutting down. */ 807 if (ctrlr->shutdown) { 808 mtx_unlock(&ctrlr->lock); 809 return (false); 810 } 811 812 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) { 813 mtx_unlock(&ctrlr->lock); 814 return (false); 815 } 816 817 changes = ctrlr->cc ^ new_cc; 818 ctrlr->cc = new_cc; 819 820 /* Handle shutdown requests. */ 821 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 && 822 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) { 823 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 824 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING); 825 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 826 ctrlr->shutdown = true; 827 *need_shutdown = true; 828 nvmft_printf(ctrlr, "shutdown requested\n"); 829 } 830 831 if (NVMEV(NVME_CC_REG_EN, changes) != 0) { 832 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) { 833 /* Controller reset. */ 834 nvmft_printf(ctrlr, "reset requested\n"); 835 ctrlr->shutdown = true; 836 *need_shutdown = true; 837 } else 838 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1); 839 } 840 mtx_unlock(&ctrlr->lock); 841 842 return (true); 843 } 844 845 static void 846 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 847 const struct nvmf_fabric_prop_get_cmd *pget) 848 { 849 struct nvmf_fabric_prop_get_rsp rsp; 850 851 nvmft_init_cqe(&rsp, nc, 0); 852 853 switch (le32toh(pget->ofst)) { 854 case NVMF_PROP_CAP: 855 if (pget->attrib.size != NVMF_PROP_SIZE_8) 856 goto error; 857 rsp.value.u64 = htole64(ctrlr->np->cap); 858 break; 859 case NVMF_PROP_VS: 860 if (pget->attrib.size != NVMF_PROP_SIZE_4) 861 goto error; 862 rsp.value.u32.low = ctrlr->cdata.ver; 863 break; 864 case NVMF_PROP_CC: 865 if (pget->attrib.size != NVMF_PROP_SIZE_4) 866 goto error; 867 rsp.value.u32.low = htole32(ctrlr->cc); 868 break; 869 case NVMF_PROP_CSTS: 870 if (pget->attrib.size != NVMF_PROP_SIZE_4) 871 goto error; 872 rsp.value.u32.low = htole32(ctrlr->csts); 873 break; 874 default: 875 goto error; 876 } 877 878 nvmft_send_response(ctrlr->admin, &rsp); 879 return; 880 error: 881 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 882 } 883 884 static void 885 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 886 const struct nvmf_fabric_prop_set_cmd *pset) 887 { 888 bool need_shutdown; 889 890 need_shutdown = false; 891 switch (le32toh(pset->ofst)) { 892 case NVMF_PROP_CC: 893 if (pset->attrib.size != NVMF_PROP_SIZE_4) 894 goto error; 895 if (!update_cc(ctrlr, le32toh(pset->value.u32.low), 896 &need_shutdown)) 897 goto error; 898 break; 899 default: 900 goto error; 901 } 902 903 nvmft_send_success(ctrlr->admin, nc); 904 if (need_shutdown) { 905 callout_stop(&ctrlr->ka_timer); 906 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 907 } 908 return; 909 error: 910 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 911 } 912 913 static void 914 handle_admin_fabrics_command(struct nvmft_controller *ctrlr, 915 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc) 916 { 917 switch (fc->fctype) { 918 case NVMF_FABRIC_COMMAND_PROPERTY_GET: 919 handle_property_get(ctrlr, nc, 920 (const struct nvmf_fabric_prop_get_cmd *)fc); 921 break; 922 case NVMF_FABRIC_COMMAND_PROPERTY_SET: 923 handle_property_set(ctrlr, nc, 924 (const struct nvmf_fabric_prop_set_cmd *)fc); 925 break; 926 case NVMF_FABRIC_COMMAND_CONNECT: 927 nvmft_printf(ctrlr, 928 "CONNECT command on connected admin queue\n"); 929 nvmft_send_generic_error(ctrlr->admin, nc, 930 NVME_SC_COMMAND_SEQUENCE_ERROR); 931 break; 932 case NVMF_FABRIC_COMMAND_DISCONNECT: 933 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n"); 934 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC, 935 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE); 936 break; 937 default: 938 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n", 939 fc->fctype); 940 nvmft_send_generic_error(ctrlr->admin, nc, 941 NVME_SC_INVALID_OPCODE); 942 break; 943 } 944 nvmf_free_capsule(nc); 945 } 946 947 void 948 nvmft_handle_admin_command(struct nvmft_controller *ctrlr, 949 struct nvmf_capsule *nc) 950 { 951 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 952 953 /* Only permit Fabrics commands while a controller is disabled. */ 954 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 && 955 cmd->opc != NVME_OPC_FABRICS_COMMANDS) { 956 nvmft_printf(ctrlr, 957 "Unsupported admin opcode %#x while disabled\n", cmd->opc); 958 nvmft_send_generic_error(ctrlr->admin, nc, 959 NVME_SC_COMMAND_SEQUENCE_ERROR); 960 nvmf_free_capsule(nc); 961 return; 962 } 963 964 atomic_store_int(&ctrlr->ka_active_traffic, 1); 965 966 switch (cmd->opc) { 967 case NVME_OPC_GET_LOG_PAGE: 968 handle_get_log_page(ctrlr, nc, cmd); 969 break; 970 case NVME_OPC_IDENTIFY: 971 handle_identify_command(ctrlr, nc, cmd); 972 break; 973 case NVME_OPC_SET_FEATURES: 974 handle_set_features(ctrlr, nc, cmd); 975 break; 976 case NVME_OPC_ASYNC_EVENT_REQUEST: 977 mtx_lock(&ctrlr->lock); 978 if (ctrlr->aer_pending == NVMFT_NUM_AER) { 979 mtx_unlock(&ctrlr->lock); 980 nvmft_send_error(ctrlr->admin, nc, 981 NVME_SCT_COMMAND_SPECIFIC, 982 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); 983 } else { 984 /* NB: Store the CID without byte-swapping. */ 985 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid; 986 ctrlr->aer_pending++; 987 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER; 988 mtx_unlock(&ctrlr->lock); 989 } 990 nvmf_free_capsule(nc); 991 break; 992 case NVME_OPC_KEEP_ALIVE: 993 nvmft_send_success(ctrlr->admin, nc); 994 nvmf_free_capsule(nc); 995 break; 996 case NVME_OPC_FABRICS_COMMANDS: 997 handle_admin_fabrics_command(ctrlr, nc, 998 (const struct nvmf_fabric_cmd *)cmd); 999 break; 1000 default: 1001 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc); 1002 nvmft_send_generic_error(ctrlr->admin, nc, 1003 NVME_SC_INVALID_OPCODE); 1004 nvmf_free_capsule(nc); 1005 break; 1006 } 1007 } 1008 1009 void 1010 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid, 1011 struct nvmf_capsule *nc) 1012 { 1013 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 1014 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 1015 1016 atomic_store_int(&ctrlr->ka_active_traffic, 1); 1017 1018 switch (cmd->opc) { 1019 case NVME_OPC_FLUSH: 1020 if (cmd->nsid == htole32(0xffffffff)) { 1021 nvmft_send_generic_error(qp, nc, 1022 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 1023 nvmf_free_capsule(nc); 1024 break; 1025 } 1026 /* FALLTHROUGH */ 1027 case NVME_OPC_WRITE: 1028 case NVME_OPC_READ: 1029 case NVME_OPC_WRITE_UNCORRECTABLE: 1030 case NVME_OPC_COMPARE: 1031 case NVME_OPC_WRITE_ZEROES: 1032 case NVME_OPC_DATASET_MANAGEMENT: 1033 case NVME_OPC_VERIFY: 1034 nvmft_dispatch_command(qp, nc, false); 1035 break; 1036 default: 1037 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc); 1038 nvmft_send_generic_error(qp, nc, 1039 NVME_SC_INVALID_OPCODE); 1040 nvmf_free_capsule(nc); 1041 break; 1042 } 1043 } 1044 1045 static void 1046 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask, 1047 u_int type, uint8_t info, uint8_t log_page_id) 1048 { 1049 struct nvme_completion cpl; 1050 1051 MPASS(type <= 7); 1052 1053 /* Drop events that are not enabled. */ 1054 mtx_lock(&ctrlr->lock); 1055 if ((ctrlr->aer_mask & aer_mask) == 0) { 1056 mtx_unlock(&ctrlr->lock); 1057 return; 1058 } 1059 1060 /* 1061 * If there is no pending AER command, drop it. 1062 * XXX: Should we queue these? 1063 */ 1064 if (ctrlr->aer_pending == 0) { 1065 mtx_unlock(&ctrlr->lock); 1066 nvmft_printf(ctrlr, 1067 "dropping AER type %u, info %#x, page %#x\n", 1068 type, info, log_page_id); 1069 return; 1070 } 1071 1072 memset(&cpl, 0, sizeof(cpl)); 1073 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx]; 1074 ctrlr->aer_pending--; 1075 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER; 1076 mtx_unlock(&ctrlr->lock); 1077 1078 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) | 1079 NVMEF(NVME_ASYNC_EVENT_INFO, info) | 1080 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id)); 1081 1082 nvmft_send_response(ctrlr->admin, &cpl); 1083 } 1084 1085 void 1086 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id) 1087 { 1088 struct nvme_ns_list *nslist; 1089 uint32_t new_nsid, nsid; 1090 u_int i; 1091 1092 new_nsid = lun_id + 1; 1093 1094 mtx_lock(&ctrlr->lock); 1095 nslist = ctrlr->changed_ns; 1096 1097 /* If the first entry is 0xffffffff, the list is already full. */ 1098 if (nslist->ns[0] != 0xffffffff) { 1099 /* Find the insertion point for this namespace ID. */ 1100 for (i = 0; i < nitems(nslist->ns); i++) { 1101 nsid = le32toh(nslist->ns[i]); 1102 if (nsid == new_nsid) { 1103 /* Already reported, nothing to do. */ 1104 mtx_unlock(&ctrlr->lock); 1105 return; 1106 } 1107 1108 if (nsid == 0 || nsid > new_nsid) 1109 break; 1110 } 1111 1112 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) { 1113 /* List is full. */ 1114 memset(ctrlr->changed_ns, 0, 1115 sizeof(*ctrlr->changed_ns)); 1116 ctrlr->changed_ns->ns[0] = 0xffffffff; 1117 } else if (nslist->ns[i] == htole32(0)) { 1118 /* 1119 * Optimize case where this ID is appended to 1120 * the end. 1121 */ 1122 nslist->ns[i] = htole32(new_nsid); 1123 } else { 1124 memmove(&nslist->ns[i + 1], &nslist->ns[i], 1125 (nitems(nslist->ns) - i - 1) * 1126 sizeof(nslist->ns[0])); 1127 nslist->ns[i] = htole32(new_nsid); 1128 } 1129 } 1130 1131 if (ctrlr->changed_ns_reported) { 1132 mtx_unlock(&ctrlr->lock); 1133 return; 1134 } 1135 ctrlr->changed_ns_reported = true; 1136 mtx_unlock(&ctrlr->lock); 1137 1138 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0, 1139 NVME_LOG_CHANGED_NAMESPACE); 1140 } 1141