1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/callout.h> 10 #include <sys/kernel.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/memdesc.h> 15 #include <sys/mutex.h> 16 #include <sys/sbuf.h> 17 #include <sys/taskqueue.h> 18 19 #include <dev/nvmf/nvmf_transport.h> 20 #include <dev/nvmf/controller/nvmft_subr.h> 21 #include <dev/nvmf/controller/nvmft_var.h> 22 23 static void nvmft_controller_shutdown(void *arg, int pending); 24 static void nvmft_controller_terminate(void *arg, int pending); 25 26 int 27 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...) 28 { 29 char buf[128]; 30 struct sbuf sb; 31 va_list ap; 32 size_t retval; 33 34 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN | SBUF_INCLUDENUL); 35 sbuf_set_drain(&sb, sbuf_printf_drain, &retval); 36 37 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid); 38 39 va_start(ap, fmt); 40 sbuf_vprintf(&sb, fmt, ap); 41 va_end(ap); 42 43 sbuf_finish(&sb); 44 sbuf_delete(&sb); 45 46 return (retval); 47 } 48 49 static struct nvmft_controller * 50 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid, 51 const struct nvmf_fabric_connect_data *data) 52 { 53 struct nvmft_controller *ctrlr; 54 55 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO); 56 ctrlr->cntlid = cntlid; 57 ctrlr->np = np; 58 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF); 59 callout_init(&ctrlr->ka_timer, 1); 60 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr); 61 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0, 62 nvmft_controller_terminate, ctrlr); 63 64 ctrlr->cdata = np->cdata; 65 ctrlr->cdata.ctrlr_id = htole16(cntlid); 66 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)); 67 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)); 68 ctrlr->hip.power_cycles[0] = 1; 69 ctrlr->create_time = sbinuptime(); 70 71 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT, 72 M_WAITOK | M_ZERO); 73 74 return (ctrlr); 75 } 76 77 static void 78 nvmft_controller_free(struct nvmft_controller *ctrlr) 79 { 80 mtx_destroy(&ctrlr->lock); 81 MPASS(ctrlr->io_qpairs == NULL); 82 free(ctrlr->changed_ns, M_NVMFT); 83 free(ctrlr, M_NVMFT); 84 } 85 86 static void 87 nvmft_keep_alive_timer(void *arg) 88 { 89 struct nvmft_controller *ctrlr = arg; 90 int traffic; 91 92 if (ctrlr->shutdown) 93 return; 94 95 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic); 96 if (traffic == 0) { 97 nvmft_printf(ctrlr, 98 "disconnecting due to KeepAlive timeout\n"); 99 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT); 100 return; 101 } 102 103 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK); 104 } 105 106 static void 107 nvmft_update_cdata(struct nvmft_controller *ctrlr) 108 { 109 uint32_t ioccsz, val; 110 111 val = nvmft_max_ioccsz(ctrlr->admin); 112 if (val != 0) { 113 ioccsz = le32toh(ctrlr->cdata.ioccsz) * 16; 114 if (val < ioccsz) 115 ctrlr->cdata.ioccsz = htole32(val / 16); 116 } 117 } 118 119 int 120 nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype, 121 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, 122 const struct nvmf_fabric_connect_data *data) 123 { 124 struct nvmft_controller *ctrlr; 125 struct nvmft_qpair *qp; 126 uint32_t kato; 127 int cntlid; 128 129 if (cmd->qid != htole16(0)) 130 return (EINVAL); 131 132 qp = nvmft_qpair_init(trtype, params, 0, "admin queue"); 133 if (qp == NULL) { 134 printf("NVMFT: Failed to setup admin queue from %.*s\n", 135 (int)sizeof(data->hostnqn), data->hostnqn); 136 return (ENXIO); 137 } 138 139 mtx_lock(&np->lock); 140 cntlid = alloc_unr(np->ids); 141 if (cntlid == -1) { 142 mtx_unlock(&np->lock); 143 printf("NVMFT: Unable to allocate controller for %.*s\n", 144 (int)sizeof(data->hostnqn), data->hostnqn); 145 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC, 146 NVMF_FABRIC_SC_INVALID_HOST); 147 nvmft_qpair_destroy(qp); 148 return (ENOMEM); 149 } 150 151 #ifdef INVARIANTS 152 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 153 KASSERT(ctrlr->cntlid != cntlid, 154 ("%s: duplicate controllers with id %d", __func__, cntlid)); 155 } 156 #endif 157 mtx_unlock(&np->lock); 158 159 ctrlr = nvmft_controller_alloc(np, cntlid, data); 160 161 mtx_lock(&np->lock); 162 if (!np->online) { 163 mtx_unlock(&np->lock); 164 nvmft_controller_free(ctrlr); 165 free_unr(np->ids, cntlid); 166 nvmft_qpair_destroy(qp); 167 return (ENXIO); 168 } 169 nvmft_port_ref(np); 170 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link); 171 172 nvmft_printf(ctrlr, "associated with %.*s\n", 173 (int)sizeof(data->hostnqn), data->hostnqn); 174 ctrlr->admin = qp; 175 ctrlr->trtype = trtype; 176 nvmft_update_cdata(ctrlr); 177 178 /* 179 * The spec requires a non-zero KeepAlive timer, but allow a 180 * zero KATO value to match Linux. 181 */ 182 kato = le32toh(cmd->kato); 183 if (kato != 0) { 184 /* 185 * Round up to 1 second matching granularity 186 * advertised in cdata. 187 */ 188 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000)); 189 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 190 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK); 191 } 192 mtx_unlock(&np->lock); 193 194 nvmft_finish_accept(qp, cmd, ctrlr); 195 196 return (0); 197 } 198 199 int 200 nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype, 201 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd, 202 const struct nvmf_fabric_connect_data *data) 203 { 204 struct nvmft_controller *ctrlr; 205 struct nvmft_qpair *qp; 206 char name[16]; 207 uint16_t cntlid, qid; 208 209 qid = le16toh(cmd->qid); 210 if (qid == 0) 211 return (EINVAL); 212 cntlid = le16toh(data->cntlid); 213 214 snprintf(name, sizeof(name), "I/O queue %u", qid); 215 qp = nvmft_qpair_init(trtype, params, qid, name); 216 if (qp == NULL) { 217 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid, 218 (int)sizeof(data->hostnqn), data->hostnqn); 219 return (ENXIO); 220 } 221 222 mtx_lock(&np->lock); 223 TAILQ_FOREACH(ctrlr, &np->controllers, link) { 224 if (ctrlr->cntlid == cntlid) 225 break; 226 } 227 if (ctrlr == NULL) { 228 mtx_unlock(&np->lock); 229 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n", 230 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn), 231 data->hostnqn); 232 nvmft_connect_invalid_parameters(qp, cmd, true, 233 offsetof(struct nvmf_fabric_connect_data, cntlid)); 234 nvmft_qpair_destroy(qp); 235 return (ENOENT); 236 } 237 238 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) { 239 mtx_unlock(&np->lock); 240 nvmft_printf(ctrlr, 241 "hostid mismatch for I/O queue %u from %.*s\n", qid, 242 (int)sizeof(data->hostnqn), data->hostnqn); 243 nvmft_connect_invalid_parameters(qp, cmd, true, 244 offsetof(struct nvmf_fabric_connect_data, hostid)); 245 nvmft_qpair_destroy(qp); 246 return (EINVAL); 247 } 248 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) { 249 mtx_unlock(&np->lock); 250 nvmft_printf(ctrlr, 251 "hostnqn mismatch for I/O queue %u from %.*s\n", qid, 252 (int)sizeof(data->hostnqn), data->hostnqn); 253 nvmft_connect_invalid_parameters(qp, cmd, true, 254 offsetof(struct nvmf_fabric_connect_data, hostnqn)); 255 nvmft_qpair_destroy(qp); 256 return (EINVAL); 257 } 258 259 /* XXX: Require trtype == ctrlr->trtype? */ 260 261 mtx_lock(&ctrlr->lock); 262 if (ctrlr->shutdown) { 263 mtx_unlock(&ctrlr->lock); 264 mtx_unlock(&np->lock); 265 nvmft_printf(ctrlr, 266 "attempt to create I/O queue %u on disabled controller from %.*s\n", 267 qid, (int)sizeof(data->hostnqn), data->hostnqn); 268 nvmft_connect_invalid_parameters(qp, cmd, true, 269 offsetof(struct nvmf_fabric_connect_data, cntlid)); 270 nvmft_qpair_destroy(qp); 271 return (EINVAL); 272 } 273 if (ctrlr->num_io_queues == 0) { 274 mtx_unlock(&ctrlr->lock); 275 mtx_unlock(&np->lock); 276 nvmft_printf(ctrlr, 277 "attempt to create I/O queue %u without enabled queues from %.*s\n", 278 qid, (int)sizeof(data->hostnqn), data->hostnqn); 279 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 280 NVME_SC_COMMAND_SEQUENCE_ERROR); 281 nvmft_qpair_destroy(qp); 282 return (EINVAL); 283 } 284 if (cmd->qid > ctrlr->num_io_queues) { 285 mtx_unlock(&ctrlr->lock); 286 mtx_unlock(&np->lock); 287 nvmft_printf(ctrlr, 288 "attempt to create invalid I/O queue %u from %.*s\n", qid, 289 (int)sizeof(data->hostnqn), data->hostnqn); 290 nvmft_connect_invalid_parameters(qp, cmd, false, 291 offsetof(struct nvmf_fabric_connect_cmd, qid)); 292 nvmft_qpair_destroy(qp); 293 return (EINVAL); 294 } 295 if (ctrlr->io_qpairs[qid - 1].qp != NULL) { 296 mtx_unlock(&ctrlr->lock); 297 mtx_unlock(&np->lock); 298 nvmft_printf(ctrlr, 299 "attempt to re-create I/O queue %u from %.*s\n", qid, 300 (int)sizeof(data->hostnqn), data->hostnqn); 301 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC, 302 NVME_SC_COMMAND_SEQUENCE_ERROR); 303 nvmft_qpair_destroy(qp); 304 return (EINVAL); 305 } 306 307 ctrlr->io_qpairs[qid - 1].qp = qp; 308 mtx_unlock(&ctrlr->lock); 309 mtx_unlock(&np->lock); 310 nvmft_finish_accept(qp, cmd, ctrlr); 311 312 return (0); 313 } 314 315 static void 316 nvmft_controller_shutdown(void *arg, int pending) 317 { 318 struct nvmft_controller *ctrlr = arg; 319 320 MPASS(pending == 1); 321 322 /* 323 * Shutdown all I/O queues to terminate pending datamoves and 324 * stop receiving new commands. 325 */ 326 mtx_lock(&ctrlr->lock); 327 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 328 if (ctrlr->io_qpairs[i].qp != NULL) { 329 ctrlr->io_qpairs[i].shutdown = true; 330 mtx_unlock(&ctrlr->lock); 331 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp); 332 mtx_lock(&ctrlr->lock); 333 } 334 } 335 mtx_unlock(&ctrlr->lock); 336 337 /* Terminate active CTL commands. */ 338 nvmft_terminate_commands(ctrlr); 339 340 /* Wait for all pending CTL commands to complete. */ 341 mtx_lock(&ctrlr->lock); 342 while (ctrlr->pending_commands != 0) 343 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh", 344 hz / 100); 345 mtx_unlock(&ctrlr->lock); 346 347 /* Delete all of the I/O queues. */ 348 for (u_int i = 0; i < ctrlr->num_io_queues; i++) { 349 if (ctrlr->io_qpairs[i].qp != NULL) 350 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp); 351 } 352 free(ctrlr->io_qpairs, M_NVMFT); 353 ctrlr->io_qpairs = NULL; 354 355 mtx_lock(&ctrlr->lock); 356 ctrlr->num_io_queues = 0; 357 358 /* Mark shutdown complete. */ 359 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) { 360 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 361 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE); 362 } 363 364 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) { 365 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY); 366 ctrlr->shutdown = false; 367 } 368 mtx_unlock(&ctrlr->lock); 369 370 /* 371 * If the admin queue was closed while shutting down or a 372 * fatal controller error has occurred, terminate the 373 * association immediately, otherwise wait up to 2 minutes 374 * (NVMe-over-Fabrics 1.1 4.6). 375 */ 376 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0) 377 nvmft_controller_terminate(ctrlr, 0); 378 else 379 taskqueue_enqueue_timeout(taskqueue_thread, 380 &ctrlr->terminate_task, hz * 60 * 2); 381 } 382 383 static void 384 nvmft_controller_terminate(void *arg, int pending) 385 { 386 struct nvmft_controller *ctrlr = arg; 387 struct nvmft_port *np; 388 bool wakeup_np; 389 390 /* If the controller has been re-enabled, nothing to do. */ 391 mtx_lock(&ctrlr->lock); 392 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) { 393 mtx_unlock(&ctrlr->lock); 394 395 if (ctrlr->ka_sbt != 0) 396 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, 397 C_HARDCLOCK); 398 return; 399 } 400 401 /* Disable updates to CC while destroying admin qpair. */ 402 ctrlr->shutdown = true; 403 mtx_unlock(&ctrlr->lock); 404 405 nvmft_qpair_destroy(ctrlr->admin); 406 407 /* Remove association (CNTLID). */ 408 np = ctrlr->np; 409 mtx_lock(&np->lock); 410 TAILQ_REMOVE(&np->controllers, ctrlr, link); 411 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers)); 412 mtx_unlock(&np->lock); 413 free_unr(np->ids, ctrlr->cntlid); 414 if (wakeup_np) 415 wakeup(np); 416 417 callout_drain(&ctrlr->ka_timer); 418 419 nvmft_printf(ctrlr, "association terminated\n"); 420 nvmft_controller_free(ctrlr); 421 nvmft_port_rele(np); 422 } 423 424 void 425 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp, 426 int error) 427 { 428 /* 429 * If a queue pair is closed, that isn't an error per se. 430 * That just means additional commands cannot be received on 431 * that queue pair. 432 * 433 * If the admin queue pair is closed while idle or while 434 * shutting down, terminate the association immediately. 435 * 436 * If an I/O queue pair is closed, just ignore it. 437 */ 438 if (error == 0) { 439 if (qp != ctrlr->admin) 440 return; 441 442 mtx_lock(&ctrlr->lock); 443 if (ctrlr->shutdown) { 444 ctrlr->admin_closed = true; 445 mtx_unlock(&ctrlr->lock); 446 return; 447 } 448 449 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) { 450 MPASS(ctrlr->num_io_queues == 0); 451 mtx_unlock(&ctrlr->lock); 452 453 /* 454 * Ok to drop lock here since ctrlr->cc can't 455 * change if the admin queue pair has closed. 456 * This also means no new queues can be handed 457 * off, etc. Note that since there are no I/O 458 * queues, only the admin queue needs to be 459 * destroyed, so it is safe to skip 460 * nvmft_controller_shutdown and just schedule 461 * nvmft_controller_terminate. Note that we 462 * cannot call nvmft_controller_terminate from 463 * here directly as this is called from the 464 * transport layer and freeing the admin qpair 465 * might deadlock waiting for the current 466 * thread to exit. 467 */ 468 if (taskqueue_cancel_timeout(taskqueue_thread, 469 &ctrlr->terminate_task, NULL) == 0) 470 taskqueue_enqueue_timeout(taskqueue_thread, 471 &ctrlr->terminate_task, 0); 472 return; 473 } 474 475 /* 476 * Treat closing of the admin queue pair while enabled 477 * as a transport error. Note that the admin queue 478 * pair has been closed. 479 */ 480 ctrlr->admin_closed = true; 481 } else 482 mtx_lock(&ctrlr->lock); 483 484 /* Ignore transport errors if we are already shutting down. */ 485 if (ctrlr->shutdown) { 486 mtx_unlock(&ctrlr->lock); 487 return; 488 } 489 490 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1); 491 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 492 ctrlr->shutdown = true; 493 mtx_unlock(&ctrlr->lock); 494 495 callout_stop(&ctrlr->ka_timer); 496 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 497 } 498 499 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */ 500 static struct mbuf * 501 m_getml(size_t len, int how) 502 { 503 struct mbuf *m, *n; 504 505 m = m_getm2(NULL, len, how, MT_DATA, 0); 506 if (m == NULL) 507 return (NULL); 508 for (n = m; len > 0; n = n->m_next) { 509 n->m_len = M_SIZE(n); 510 if (n->m_len >= len) { 511 n->m_len = len; 512 MPASS(n->m_next == NULL); 513 } 514 len -= n->m_len; 515 } 516 return (m); 517 } 518 519 static void 520 m_zero(struct mbuf *m, u_int offset, u_int len) 521 { 522 u_int todo; 523 524 if (len == 0) 525 return; 526 527 while (m->m_len <= offset) { 528 offset -= m->m_len; 529 m = m->m_next; 530 } 531 532 todo = m->m_len - offset; 533 if (todo > len) 534 todo = len; 535 memset(mtodo(m, offset), 0, todo); 536 m = m->m_next; 537 len -= todo; 538 539 while (len > 0) { 540 todo = m->m_len; 541 if (todo > len) 542 todo = len; 543 memset(mtod(m, void *), 0, todo); 544 m = m->m_next; 545 len -= todo; 546 } 547 } 548 549 static void 550 handle_get_log_page(struct nvmft_controller *ctrlr, 551 struct nvmf_capsule *nc, const struct nvme_command *cmd) 552 { 553 struct mbuf *m; 554 uint64_t offset; 555 uint32_t numd; 556 size_t len, todo; 557 u_int status; 558 uint8_t lid; 559 bool rae; 560 561 lid = le32toh(cmd->cdw10) & 0xff; 562 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0; 563 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 564 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 565 566 if (offset % 3 != 0) { 567 status = NVME_SC_INVALID_FIELD; 568 goto done; 569 } 570 571 len = (numd + 1) * 4; 572 573 switch (lid) { 574 case NVME_LOG_ERROR: 575 todo = 0; 576 577 m = m_getml(len, M_WAITOK); 578 if (todo != len) 579 m_zero(m, todo, len - todo); 580 status = nvmf_send_controller_data(nc, 0, m, len); 581 MPASS(status != NVMF_MORE); 582 break; 583 case NVME_LOG_HEALTH_INFORMATION: 584 { 585 struct nvme_health_information_page hip; 586 587 if (offset >= sizeof(hip)) { 588 status = NVME_SC_INVALID_FIELD; 589 goto done; 590 } 591 todo = sizeof(hip) - offset; 592 if (todo > len) 593 todo = len; 594 595 mtx_lock(&ctrlr->lock); 596 hip = ctrlr->hip; 597 hip.controller_busy_time[0] = 598 sbintime_getsec(ctrlr->busy_total) / 60; 599 hip.power_on_hours[0] = 600 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600; 601 mtx_unlock(&ctrlr->lock); 602 603 m = m_getml(len, M_WAITOK); 604 m_copyback(m, 0, todo, (char *)&hip + offset); 605 if (todo != len) 606 m_zero(m, todo, len - todo); 607 status = nvmf_send_controller_data(nc, 0, m, len); 608 MPASS(status != NVMF_MORE); 609 break; 610 } 611 case NVME_LOG_FIRMWARE_SLOT: 612 if (offset >= sizeof(ctrlr->np->fp)) { 613 status = NVME_SC_INVALID_FIELD; 614 goto done; 615 } 616 todo = sizeof(ctrlr->np->fp) - offset; 617 if (todo > len) 618 todo = len; 619 620 m = m_getml(len, M_WAITOK); 621 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset); 622 if (todo != len) 623 m_zero(m, todo, len - todo); 624 status = nvmf_send_controller_data(nc, 0, m, len); 625 MPASS(status != NVMF_MORE); 626 break; 627 case NVME_LOG_CHANGED_NAMESPACE: 628 if (offset >= sizeof(*ctrlr->changed_ns)) { 629 status = NVME_SC_INVALID_FIELD; 630 goto done; 631 } 632 todo = sizeof(*ctrlr->changed_ns) - offset; 633 if (todo > len) 634 todo = len; 635 636 m = m_getml(len, M_WAITOK); 637 mtx_lock(&ctrlr->lock); 638 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset); 639 if (offset == 0 && len == sizeof(*ctrlr->changed_ns)) 640 memset(ctrlr->changed_ns, 0, 641 sizeof(*ctrlr->changed_ns)); 642 if (!rae) 643 ctrlr->changed_ns_reported = false; 644 mtx_unlock(&ctrlr->lock); 645 if (todo != len) 646 m_zero(m, todo, len - todo); 647 status = nvmf_send_controller_data(nc, 0, m, len); 648 MPASS(status != NVMF_MORE); 649 break; 650 default: 651 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n", 652 lid); 653 status = NVME_SC_INVALID_FIELD; 654 break; 655 } 656 657 done: 658 if (status == NVMF_SUCCESS_SENT) 659 nvmft_command_completed(ctrlr->admin, nc); 660 else 661 nvmft_send_generic_error(ctrlr->admin, nc, status); 662 nvmf_free_capsule(nc); 663 } 664 665 static void 666 m_free_nslist(struct mbuf *m) 667 { 668 free(m->m_ext.ext_arg1, M_NVMFT); 669 } 670 671 static void 672 handle_identify_command(struct nvmft_controller *ctrlr, 673 struct nvmf_capsule *nc, const struct nvme_command *cmd) 674 { 675 struct mbuf *m; 676 size_t data_len; 677 u_int status; 678 uint8_t cns; 679 680 cns = le32toh(cmd->cdw10) & 0xFF; 681 data_len = nvmf_capsule_data_len(nc); 682 if (data_len != sizeof(ctrlr->cdata)) { 683 nvmft_printf(ctrlr, 684 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len, 685 cns); 686 nvmft_send_generic_error(ctrlr->admin, nc, 687 NVME_SC_INVALID_OPCODE); 688 nvmf_free_capsule(nc); 689 return; 690 } 691 692 switch (cns) { 693 case 0: /* Namespace data. */ 694 case 3: /* Namespace Identification Descriptor list. */ 695 nvmft_dispatch_command(ctrlr->admin, nc, true); 696 return; 697 case 1: 698 /* Controller data. */ 699 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK); 700 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata); 701 status = nvmf_send_controller_data(nc, 0, m, 702 sizeof(ctrlr->cdata)); 703 MPASS(status != NVMF_MORE); 704 break; 705 case 2: 706 { 707 /* Active namespace list. */ 708 struct nvme_ns_list *nslist; 709 uint32_t nsid; 710 711 nsid = le32toh(cmd->nsid); 712 if (nsid >= 0xfffffffe) { 713 status = NVME_SC_INVALID_FIELD; 714 break; 715 } 716 717 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO); 718 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist); 719 m = m_get(M_WAITOK, MT_DATA); 720 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist, 721 nslist, NULL, 0, EXT_CTL); 722 m->m_len = sizeof(*nslist); 723 status = nvmf_send_controller_data(nc, 0, m, m->m_len); 724 MPASS(status != NVMF_MORE); 725 break; 726 } 727 default: 728 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns); 729 status = NVME_SC_INVALID_FIELD; 730 break; 731 } 732 733 if (status == NVMF_SUCCESS_SENT) 734 nvmft_command_completed(ctrlr->admin, nc); 735 else 736 nvmft_send_generic_error(ctrlr->admin, nc, status); 737 nvmf_free_capsule(nc); 738 } 739 740 static void 741 handle_set_features(struct nvmft_controller *ctrlr, 742 struct nvmf_capsule *nc, const struct nvme_command *cmd) 743 { 744 struct nvme_completion cqe; 745 uint8_t fid; 746 747 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 748 switch (fid) { 749 case NVME_FEAT_NUMBER_OF_QUEUES: 750 { 751 uint32_t num_queues; 752 struct nvmft_io_qpair *io_qpairs; 753 754 num_queues = le32toh(cmd->cdw11) & 0xffff; 755 756 /* 5.12.1.7: 65535 is invalid. */ 757 if (num_queues == 65535) 758 goto error; 759 760 /* Fabrics requires the same number of SQs and CQs. */ 761 if (le32toh(cmd->cdw11) >> 16 != num_queues) 762 goto error; 763 764 /* Convert to 1's based */ 765 num_queues++; 766 767 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs), 768 M_NVMFT, M_WAITOK | M_ZERO); 769 770 mtx_lock(&ctrlr->lock); 771 if (ctrlr->num_io_queues != 0) { 772 mtx_unlock(&ctrlr->lock); 773 free(io_qpairs, M_NVMFT); 774 nvmft_send_generic_error(ctrlr->admin, nc, 775 NVME_SC_COMMAND_SEQUENCE_ERROR); 776 nvmf_free_capsule(nc); 777 return; 778 } 779 780 ctrlr->num_io_queues = num_queues; 781 ctrlr->io_qpairs = io_qpairs; 782 mtx_unlock(&ctrlr->lock); 783 784 nvmft_init_cqe(&cqe, nc, 0); 785 cqe.cdw0 = cmd->cdw11; 786 nvmft_send_response(ctrlr->admin, &cqe); 787 nvmf_free_capsule(nc); 788 return; 789 } 790 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 791 { 792 uint32_t aer_mask; 793 794 aer_mask = le32toh(cmd->cdw11); 795 796 /* Check for any reserved or unimplemented feature bits. */ 797 if ((aer_mask & 0xffffc000) != 0) 798 goto error; 799 800 mtx_lock(&ctrlr->lock); 801 ctrlr->aer_mask = aer_mask; 802 mtx_unlock(&ctrlr->lock); 803 nvmft_send_success(ctrlr->admin, nc); 804 nvmf_free_capsule(nc); 805 return; 806 } 807 default: 808 nvmft_printf(ctrlr, 809 "Unsupported feature ID %u for SET_FEATURES\n", fid); 810 goto error; 811 } 812 813 error: 814 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 815 nvmf_free_capsule(nc); 816 } 817 818 static bool 819 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown) 820 { 821 struct nvmft_port *np = ctrlr->np; 822 uint32_t changes; 823 824 *need_shutdown = false; 825 826 mtx_lock(&ctrlr->lock); 827 828 /* Don't allow any changes while shutting down. */ 829 if (ctrlr->shutdown) { 830 mtx_unlock(&ctrlr->lock); 831 return (false); 832 } 833 834 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) { 835 mtx_unlock(&ctrlr->lock); 836 return (false); 837 } 838 839 changes = ctrlr->cc ^ new_cc; 840 ctrlr->cc = new_cc; 841 842 /* Handle shutdown requests. */ 843 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 && 844 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) { 845 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST); 846 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING); 847 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN); 848 ctrlr->shutdown = true; 849 *need_shutdown = true; 850 nvmft_printf(ctrlr, "shutdown requested\n"); 851 } 852 853 if (NVMEV(NVME_CC_REG_EN, changes) != 0) { 854 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) { 855 /* Controller reset. */ 856 nvmft_printf(ctrlr, "reset requested\n"); 857 ctrlr->shutdown = true; 858 *need_shutdown = true; 859 } else 860 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1); 861 } 862 mtx_unlock(&ctrlr->lock); 863 864 return (true); 865 } 866 867 static void 868 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 869 const struct nvmf_fabric_prop_get_cmd *pget) 870 { 871 struct nvmf_fabric_prop_get_rsp rsp; 872 873 nvmft_init_cqe(&rsp, nc, 0); 874 875 switch (le32toh(pget->ofst)) { 876 case NVMF_PROP_CAP: 877 if (pget->attrib.size != NVMF_PROP_SIZE_8) 878 goto error; 879 rsp.value.u64 = htole64(ctrlr->np->cap); 880 break; 881 case NVMF_PROP_VS: 882 if (pget->attrib.size != NVMF_PROP_SIZE_4) 883 goto error; 884 rsp.value.u32.low = ctrlr->cdata.ver; 885 break; 886 case NVMF_PROP_CC: 887 if (pget->attrib.size != NVMF_PROP_SIZE_4) 888 goto error; 889 rsp.value.u32.low = htole32(ctrlr->cc); 890 break; 891 case NVMF_PROP_CSTS: 892 if (pget->attrib.size != NVMF_PROP_SIZE_4) 893 goto error; 894 rsp.value.u32.low = htole32(ctrlr->csts); 895 break; 896 default: 897 goto error; 898 } 899 900 nvmft_send_response(ctrlr->admin, &rsp); 901 return; 902 error: 903 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 904 } 905 906 static void 907 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc, 908 const struct nvmf_fabric_prop_set_cmd *pset) 909 { 910 bool need_shutdown; 911 912 need_shutdown = false; 913 switch (le32toh(pset->ofst)) { 914 case NVMF_PROP_CC: 915 if (pset->attrib.size != NVMF_PROP_SIZE_4) 916 goto error; 917 if (!update_cc(ctrlr, le32toh(pset->value.u32.low), 918 &need_shutdown)) 919 goto error; 920 break; 921 default: 922 goto error; 923 } 924 925 nvmft_send_success(ctrlr->admin, nc); 926 if (need_shutdown) { 927 callout_stop(&ctrlr->ka_timer); 928 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task); 929 } 930 return; 931 error: 932 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD); 933 } 934 935 static void 936 handle_admin_fabrics_command(struct nvmft_controller *ctrlr, 937 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc) 938 { 939 switch (fc->fctype) { 940 case NVMF_FABRIC_COMMAND_PROPERTY_GET: 941 handle_property_get(ctrlr, nc, 942 (const struct nvmf_fabric_prop_get_cmd *)fc); 943 break; 944 case NVMF_FABRIC_COMMAND_PROPERTY_SET: 945 handle_property_set(ctrlr, nc, 946 (const struct nvmf_fabric_prop_set_cmd *)fc); 947 break; 948 case NVMF_FABRIC_COMMAND_CONNECT: 949 nvmft_printf(ctrlr, 950 "CONNECT command on connected admin queue\n"); 951 nvmft_send_generic_error(ctrlr->admin, nc, 952 NVME_SC_COMMAND_SEQUENCE_ERROR); 953 break; 954 case NVMF_FABRIC_COMMAND_DISCONNECT: 955 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n"); 956 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC, 957 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE); 958 break; 959 default: 960 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n", 961 fc->fctype); 962 nvmft_send_generic_error(ctrlr->admin, nc, 963 NVME_SC_INVALID_OPCODE); 964 break; 965 } 966 nvmf_free_capsule(nc); 967 } 968 969 void 970 nvmft_handle_admin_command(struct nvmft_controller *ctrlr, 971 struct nvmf_capsule *nc) 972 { 973 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 974 975 /* Only permit Fabrics commands while a controller is disabled. */ 976 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 && 977 cmd->opc != NVME_OPC_FABRICS_COMMANDS) { 978 nvmft_printf(ctrlr, 979 "Unsupported admin opcode %#x while disabled\n", cmd->opc); 980 nvmft_send_generic_error(ctrlr->admin, nc, 981 NVME_SC_COMMAND_SEQUENCE_ERROR); 982 nvmf_free_capsule(nc); 983 return; 984 } 985 986 atomic_store_int(&ctrlr->ka_active_traffic, 1); 987 988 switch (cmd->opc) { 989 case NVME_OPC_GET_LOG_PAGE: 990 handle_get_log_page(ctrlr, nc, cmd); 991 break; 992 case NVME_OPC_IDENTIFY: 993 handle_identify_command(ctrlr, nc, cmd); 994 break; 995 case NVME_OPC_SET_FEATURES: 996 handle_set_features(ctrlr, nc, cmd); 997 break; 998 case NVME_OPC_ASYNC_EVENT_REQUEST: 999 mtx_lock(&ctrlr->lock); 1000 if (ctrlr->aer_pending == NVMFT_NUM_AER) { 1001 mtx_unlock(&ctrlr->lock); 1002 nvmft_send_error(ctrlr->admin, nc, 1003 NVME_SCT_COMMAND_SPECIFIC, 1004 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED); 1005 } else { 1006 /* NB: Store the CID without byte-swapping. */ 1007 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid; 1008 ctrlr->aer_pending++; 1009 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER; 1010 mtx_unlock(&ctrlr->lock); 1011 } 1012 nvmf_free_capsule(nc); 1013 break; 1014 case NVME_OPC_KEEP_ALIVE: 1015 nvmft_send_success(ctrlr->admin, nc); 1016 nvmf_free_capsule(nc); 1017 break; 1018 case NVME_OPC_FABRICS_COMMANDS: 1019 handle_admin_fabrics_command(ctrlr, nc, 1020 (const struct nvmf_fabric_cmd *)cmd); 1021 break; 1022 default: 1023 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc); 1024 nvmft_send_generic_error(ctrlr->admin, nc, 1025 NVME_SC_INVALID_OPCODE); 1026 nvmf_free_capsule(nc); 1027 break; 1028 } 1029 } 1030 1031 void 1032 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid, 1033 struct nvmf_capsule *nc) 1034 { 1035 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp); 1036 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 1037 1038 atomic_store_int(&ctrlr->ka_active_traffic, 1); 1039 1040 switch (cmd->opc) { 1041 case NVME_OPC_FLUSH: 1042 if (cmd->nsid == htole32(0xffffffff)) { 1043 nvmft_send_generic_error(qp, nc, 1044 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 1045 nvmf_free_capsule(nc); 1046 break; 1047 } 1048 /* FALLTHROUGH */ 1049 case NVME_OPC_WRITE: 1050 case NVME_OPC_READ: 1051 case NVME_OPC_WRITE_UNCORRECTABLE: 1052 case NVME_OPC_COMPARE: 1053 case NVME_OPC_WRITE_ZEROES: 1054 case NVME_OPC_DATASET_MANAGEMENT: 1055 case NVME_OPC_VERIFY: 1056 nvmft_dispatch_command(qp, nc, false); 1057 break; 1058 default: 1059 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc); 1060 nvmft_send_generic_error(qp, nc, 1061 NVME_SC_INVALID_OPCODE); 1062 nvmf_free_capsule(nc); 1063 break; 1064 } 1065 } 1066 1067 static void 1068 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask, 1069 u_int type, uint8_t info, uint8_t log_page_id) 1070 { 1071 struct nvme_completion cpl; 1072 1073 MPASS(type <= 7); 1074 1075 /* Drop events that are not enabled. */ 1076 mtx_lock(&ctrlr->lock); 1077 if ((ctrlr->aer_mask & aer_mask) == 0) { 1078 mtx_unlock(&ctrlr->lock); 1079 return; 1080 } 1081 1082 /* 1083 * If there is no pending AER command, drop it. 1084 * XXX: Should we queue these? 1085 */ 1086 if (ctrlr->aer_pending == 0) { 1087 mtx_unlock(&ctrlr->lock); 1088 nvmft_printf(ctrlr, 1089 "dropping AER type %u, info %#x, page %#x\n", 1090 type, info, log_page_id); 1091 return; 1092 } 1093 1094 memset(&cpl, 0, sizeof(cpl)); 1095 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx]; 1096 ctrlr->aer_pending--; 1097 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER; 1098 mtx_unlock(&ctrlr->lock); 1099 1100 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) | 1101 NVMEF(NVME_ASYNC_EVENT_INFO, info) | 1102 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id)); 1103 1104 nvmft_send_response(ctrlr->admin, &cpl); 1105 } 1106 1107 void 1108 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id) 1109 { 1110 struct nvme_ns_list *nslist; 1111 uint32_t new_nsid, nsid; 1112 u_int i; 1113 1114 new_nsid = lun_id + 1; 1115 1116 mtx_lock(&ctrlr->lock); 1117 nslist = ctrlr->changed_ns; 1118 1119 /* If the first entry is 0xffffffff, the list is already full. */ 1120 if (nslist->ns[0] != 0xffffffff) { 1121 /* Find the insertion point for this namespace ID. */ 1122 for (i = 0; i < nitems(nslist->ns); i++) { 1123 nsid = le32toh(nslist->ns[i]); 1124 if (nsid == new_nsid) { 1125 /* Already reported, nothing to do. */ 1126 mtx_unlock(&ctrlr->lock); 1127 return; 1128 } 1129 1130 if (nsid == 0 || nsid > new_nsid) 1131 break; 1132 } 1133 1134 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) { 1135 /* List is full. */ 1136 memset(ctrlr->changed_ns, 0, 1137 sizeof(*ctrlr->changed_ns)); 1138 ctrlr->changed_ns->ns[0] = 0xffffffff; 1139 } else if (nslist->ns[i] == htole32(0)) { 1140 /* 1141 * Optimize case where this ID is appended to 1142 * the end. 1143 */ 1144 nslist->ns[i] = htole32(new_nsid); 1145 } else { 1146 memmove(&nslist->ns[i + 1], &nslist->ns[i], 1147 (nitems(nslist->ns) - i - 1) * 1148 sizeof(nslist->ns[0])); 1149 nslist->ns[i] = htole32(new_nsid); 1150 } 1151 } 1152 1153 if (ctrlr->changed_ns_reported) { 1154 mtx_unlock(&ctrlr->lock); 1155 return; 1156 } 1157 ctrlr->changed_ns_reported = true; 1158 mtx_unlock(&ctrlr->lock); 1159 1160 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0, 1161 NVME_LOG_CHANGED_NAMESPACE); 1162 } 1163