1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/sysctl.h> 9 #include <err.h> 10 #include <errno.h> 11 #include <libnvmf.h> 12 #include <pthread.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <string.h> 16 #include <unistd.h> 17 18 #include "internal.h" 19 20 struct io_controller { 21 struct controller *c; 22 23 u_int num_io_queues; 24 u_int active_io_queues; 25 struct nvmf_qpair **io_qpairs; 26 int *io_sockets; 27 28 struct nvme_firmware_page fp; 29 struct nvme_health_information_page hip; 30 uint16_t partial_dur; 31 uint16_t partial_duw; 32 33 uint16_t cntlid; 34 char hostid[16]; 35 char hostnqn[NVME_NQN_FIELD_SIZE]; 36 }; 37 38 static struct nvmf_association *io_na; 39 static pthread_cond_t io_cond; 40 static pthread_mutex_t io_na_mutex; 41 static struct io_controller *io_controller; 42 static const char *nqn; 43 static char serial[NVME_SERIAL_NUMBER_LENGTH]; 44 45 void 46 init_io(const char *subnqn) 47 { 48 struct nvmf_association_params aparams; 49 u_long hostid; 50 size_t len; 51 52 memset(&aparams, 0, sizeof(aparams)); 53 aparams.sq_flow_control = !flow_control_disable; 54 aparams.dynamic_controller_model = true; 55 aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES; 56 aparams.max_io_qsize = NVMF_MAX_IO_ENTRIES; 57 aparams.tcp.pda = 0; 58 aparams.tcp.header_digests = header_digests; 59 aparams.tcp.data_digests = data_digests; 60 aparams.tcp.maxh2cdata = maxh2cdata; 61 io_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true, 62 &aparams); 63 if (io_na == NULL) 64 err(1, "Failed to create I/O controller association"); 65 66 nqn = subnqn; 67 68 /* Generate a serial number from the kern.hostid node. */ 69 len = sizeof(hostid); 70 if (sysctlbyname("kern.hostid", &hostid, &len, NULL, 0) == -1) 71 err(1, "sysctl: kern.hostid"); 72 73 nvmf_controller_serial(serial, sizeof(serial), hostid); 74 75 pthread_cond_init(&io_cond, NULL); 76 pthread_mutex_init(&io_na_mutex, NULL); 77 78 if (kernel_io) 79 init_ctl_port(subnqn, &aparams); 80 } 81 82 void 83 shutdown_io(void) 84 { 85 if (kernel_io) 86 shutdown_ctl_port(nqn); 87 } 88 89 static void 90 handle_get_log_page(struct io_controller *ioc, const struct nvmf_capsule *nc, 91 const struct nvme_command *cmd) 92 { 93 uint64_t offset; 94 uint32_t numd; 95 size_t len; 96 uint8_t lid; 97 98 lid = le32toh(cmd->cdw10) & 0xff; 99 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 100 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 101 102 if (offset % 3 != 0) 103 goto error; 104 105 len = (numd + 1) * 4; 106 107 switch (lid) { 108 case NVME_LOG_ERROR: 109 { 110 void *buf; 111 112 if (len % sizeof(struct nvme_error_information_entry) != 0) 113 goto error; 114 115 buf = calloc(1, len); 116 nvmf_send_controller_data(nc, buf, len); 117 free(buf); 118 return; 119 } 120 case NVME_LOG_HEALTH_INFORMATION: 121 if (len != sizeof(ioc->hip)) 122 goto error; 123 124 nvmf_send_controller_data(nc, &ioc->hip, sizeof(ioc->hip)); 125 return; 126 case NVME_LOG_FIRMWARE_SLOT: 127 if (len != sizeof(ioc->fp)) 128 goto error; 129 130 nvmf_send_controller_data(nc, &ioc->fp, sizeof(ioc->fp)); 131 return; 132 default: 133 warnx("Unsupported page %#x for GET_LOG_PAGE\n", lid); 134 goto error; 135 } 136 137 error: 138 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 139 } 140 141 static bool 142 handle_io_identify_command(const struct nvmf_capsule *nc, 143 const struct nvme_command *cmd) 144 { 145 struct nvme_namespace_data nsdata; 146 struct nvme_ns_list nslist; 147 uint32_t nsid; 148 uint8_t cns; 149 150 cns = le32toh(cmd->cdw10) & 0xFF; 151 switch (cns) { 152 case 0: /* Namespace data. */ 153 if (!device_namespace_data(le32toh(cmd->nsid), &nsdata)) { 154 nvmf_send_generic_error(nc, 155 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 156 return (true); 157 } 158 159 nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata)); 160 return (true); 161 case 2: /* Active namespace list. */ 162 nsid = le32toh(cmd->nsid); 163 if (nsid >= 0xfffffffe) { 164 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 165 return (true); 166 } 167 168 device_active_nslist(nsid, &nslist); 169 nvmf_send_controller_data(nc, &nslist, sizeof(nslist)); 170 return (true); 171 case 3: /* Namespace Identification Descriptor list. */ 172 if (!device_identification_descriptor(le32toh(cmd->nsid), 173 &nsdata)) { 174 nvmf_send_generic_error(nc, 175 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 176 return (true); 177 } 178 179 nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata)); 180 return (true); 181 default: 182 return (false); 183 } 184 } 185 186 static void 187 handle_set_features(struct io_controller *ioc, const struct nvmf_capsule *nc, 188 const struct nvme_command *cmd) 189 { 190 struct nvme_completion cqe; 191 uint8_t fid; 192 193 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 194 switch (fid) { 195 case NVME_FEAT_NUMBER_OF_QUEUES: 196 { 197 uint32_t num_queues; 198 199 if (ioc->num_io_queues != 0) { 200 nvmf_send_generic_error(nc, 201 NVME_SC_COMMAND_SEQUENCE_ERROR); 202 return; 203 } 204 205 num_queues = le32toh(cmd->cdw11) & 0xffff; 206 207 /* 5.12.1.7: 65535 is invalid. */ 208 if (num_queues == 65535) 209 goto error; 210 211 /* Fabrics requires the same number of SQs and CQs. */ 212 if (le32toh(cmd->cdw11) >> 16 != num_queues) 213 goto error; 214 215 /* Convert to 1's based */ 216 num_queues++; 217 218 /* Lock to synchronize with handle_io_qpair. */ 219 pthread_mutex_lock(&io_na_mutex); 220 ioc->num_io_queues = num_queues; 221 ioc->io_qpairs = calloc(num_queues, sizeof(*ioc->io_qpairs)); 222 ioc->io_sockets = calloc(num_queues, sizeof(*ioc->io_sockets)); 223 pthread_mutex_unlock(&io_na_mutex); 224 225 nvmf_init_cqe(&cqe, nc, 0); 226 cqe.cdw0 = cmd->cdw11; 227 nvmf_send_response(nc, &cqe); 228 return; 229 } 230 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 231 { 232 uint32_t aer_mask; 233 234 aer_mask = le32toh(cmd->cdw11); 235 236 /* Check for any reserved or unimplemented feature bits. */ 237 if ((aer_mask & 0xffffc000) != 0) 238 goto error; 239 240 /* No AERs are generated by this daemon. */ 241 nvmf_send_success(nc); 242 return; 243 } 244 default: 245 warnx("Unsupported feature ID %u for SET_FEATURES", fid); 246 goto error; 247 } 248 249 error: 250 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 251 } 252 253 static bool 254 admin_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd, 255 void *arg) 256 { 257 struct io_controller *ioc = arg; 258 259 switch (cmd->opc) { 260 case NVME_OPC_GET_LOG_PAGE: 261 handle_get_log_page(ioc, nc, cmd); 262 return (true); 263 case NVME_OPC_IDENTIFY: 264 return (handle_io_identify_command(nc, cmd)); 265 case NVME_OPC_SET_FEATURES: 266 handle_set_features(ioc, nc, cmd); 267 return (true); 268 case NVME_OPC_ASYNC_EVENT_REQUEST: 269 /* Ignore and never complete. */ 270 return (true); 271 case NVME_OPC_KEEP_ALIVE: 272 nvmf_send_success(nc); 273 return (true); 274 default: 275 return (false); 276 } 277 } 278 279 static void 280 handle_admin_qpair(struct io_controller *ioc) 281 { 282 pthread_setname_np(pthread_self(), "admin queue"); 283 284 controller_handle_admin_commands(ioc->c, admin_command, ioc); 285 286 pthread_mutex_lock(&io_na_mutex); 287 for (u_int i = 0; i < ioc->num_io_queues; i++) { 288 if (ioc->io_qpairs[i] == NULL || ioc->io_sockets[i] == -1) 289 continue; 290 close(ioc->io_sockets[i]); 291 ioc->io_sockets[i] = -1; 292 } 293 294 /* Wait for I/O threads to notice. */ 295 while (ioc->active_io_queues > 0) 296 pthread_cond_wait(&io_cond, &io_na_mutex); 297 298 io_controller = NULL; 299 pthread_mutex_unlock(&io_na_mutex); 300 301 free_controller(ioc->c); 302 303 free(ioc); 304 } 305 306 static bool 307 handle_io_fabrics_command(const struct nvmf_capsule *nc, 308 const struct nvmf_fabric_cmd *fc) 309 { 310 switch (fc->fctype) { 311 case NVMF_FABRIC_COMMAND_CONNECT: 312 warnx("CONNECT command on connected queue"); 313 nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 314 break; 315 case NVMF_FABRIC_COMMAND_DISCONNECT: 316 { 317 const struct nvmf_fabric_disconnect_cmd *dis = 318 (const struct nvmf_fabric_disconnect_cmd *)fc; 319 if (dis->recfmt != htole16(0)) { 320 nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC, 321 NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT); 322 break; 323 } 324 nvmf_send_success(nc); 325 return (true); 326 } 327 default: 328 warnx("Unsupported fabrics command %#x", fc->fctype); 329 nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE); 330 break; 331 } 332 333 return (false); 334 } 335 336 static void 337 hip_add(uint64_t pair[2], uint64_t addend) 338 { 339 uint64_t old, new; 340 341 old = le64toh(pair[0]); 342 new = old + addend; 343 pair[0] = htole64(new); 344 if (new < old) 345 pair[1] += htole64(1); 346 } 347 348 static uint64_t 349 cmd_lba(const struct nvme_command *cmd) 350 { 351 return ((uint64_t)le32toh(cmd->cdw11) << 32 | le32toh(cmd->cdw10)); 352 } 353 354 static u_int 355 cmd_nlb(const struct nvme_command *cmd) 356 { 357 return ((le32toh(cmd->cdw12) & 0xffff) + 1); 358 } 359 360 static void 361 handle_read(struct io_controller *ioc, const struct nvmf_capsule *nc, 362 const struct nvme_command *cmd) 363 { 364 size_t len; 365 366 len = nvmf_capsule_data_len(nc); 367 device_read(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc); 368 hip_add(ioc->hip.host_read_commands, 1); 369 370 len /= 512; 371 len += ioc->partial_dur; 372 if (len > 1000) 373 hip_add(ioc->hip.data_units_read, len / 1000); 374 ioc->partial_dur = len % 1000; 375 } 376 377 static void 378 handle_write(struct io_controller *ioc, const struct nvmf_capsule *nc, 379 const struct nvme_command *cmd) 380 { 381 size_t len; 382 383 len = nvmf_capsule_data_len(nc); 384 device_write(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc); 385 hip_add(ioc->hip.host_write_commands, 1); 386 387 len /= 512; 388 len += ioc->partial_duw; 389 if (len > 1000) 390 hip_add(ioc->hip.data_units_written, len / 1000); 391 ioc->partial_duw = len % 1000; 392 } 393 394 static void 395 handle_flush(const struct nvmf_capsule *nc, const struct nvme_command *cmd) 396 { 397 device_flush(le32toh(cmd->nsid), nc); 398 } 399 400 static bool 401 handle_io_commands(struct io_controller *ioc, struct nvmf_qpair *qp) 402 { 403 const struct nvme_command *cmd; 404 struct nvmf_capsule *nc; 405 int error; 406 bool disconnect; 407 408 disconnect = false; 409 410 while (!disconnect) { 411 error = nvmf_controller_receive_capsule(qp, &nc); 412 if (error != 0) { 413 if (error != ECONNRESET) 414 warnc(error, "Failed to read command capsule"); 415 break; 416 } 417 418 cmd = nvmf_capsule_sqe(nc); 419 420 switch (cmd->opc) { 421 case NVME_OPC_FLUSH: 422 if (cmd->nsid == htole32(0xffffffff)) { 423 nvmf_send_generic_error(nc, 424 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 425 break; 426 } 427 handle_flush(nc, cmd); 428 break; 429 case NVME_OPC_WRITE: 430 handle_write(ioc, nc, cmd); 431 break; 432 case NVME_OPC_READ: 433 handle_read(ioc, nc, cmd); 434 break; 435 case NVME_OPC_FABRICS_COMMANDS: 436 disconnect = handle_io_fabrics_command(nc, 437 (const struct nvmf_fabric_cmd *)cmd); 438 break; 439 default: 440 warnx("Unsupported NVM opcode %#x", cmd->opc); 441 nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE); 442 break; 443 } 444 nvmf_free_capsule(nc); 445 } 446 447 return (disconnect); 448 } 449 450 static void 451 handle_io_qpair(struct io_controller *ioc, struct nvmf_qpair *qp, int qid) 452 { 453 char name[64]; 454 bool disconnect; 455 456 snprintf(name, sizeof(name), "I/O queue %d", qid); 457 pthread_setname_np(pthread_self(), name); 458 459 disconnect = handle_io_commands(ioc, qp); 460 461 pthread_mutex_lock(&io_na_mutex); 462 if (disconnect) 463 ioc->io_qpairs[qid - 1] = NULL; 464 if (ioc->io_sockets[qid - 1] != -1) { 465 close(ioc->io_sockets[qid - 1]); 466 ioc->io_sockets[qid - 1] = -1; 467 } 468 ioc->active_io_queues--; 469 if (ioc->active_io_queues == 0) 470 pthread_cond_broadcast(&io_cond); 471 pthread_mutex_unlock(&io_na_mutex); 472 } 473 474 static void 475 connect_admin_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc, 476 const struct nvmf_fabric_connect_data *data) 477 { 478 struct nvme_controller_data cdata; 479 struct io_controller *ioc; 480 int error; 481 482 /* Can only have one active I/O controller at a time. */ 483 pthread_mutex_lock(&io_na_mutex); 484 if (io_controller != NULL) { 485 pthread_mutex_unlock(&io_na_mutex); 486 nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC, 487 NVMF_FABRIC_SC_CONTROLLER_BUSY); 488 goto error; 489 } 490 491 error = nvmf_finish_accept(nc, 2); 492 if (error != 0) { 493 pthread_mutex_unlock(&io_na_mutex); 494 warnc(error, "Failed to send CONNECT response"); 495 goto error; 496 } 497 498 ioc = calloc(1, sizeof(*ioc)); 499 ioc->cntlid = 2; 500 memcpy(ioc->hostid, data->hostid, sizeof(ioc->hostid)); 501 memcpy(ioc->hostnqn, data->hostnqn, sizeof(ioc->hostnqn)); 502 503 nvmf_init_io_controller_data(qp, serial, nqn, device_count(), 504 NVMF_IOCCSZ, &cdata); 505 506 ioc->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1); 507 memcpy(ioc->fp.revision[0], cdata.fr, sizeof(cdata.fr)); 508 509 ioc->hip.power_cycles[0] = 1; 510 511 ioc->c = init_controller(qp, &cdata); 512 513 io_controller = ioc; 514 pthread_mutex_unlock(&io_na_mutex); 515 516 nvmf_free_capsule(nc); 517 518 handle_admin_qpair(ioc); 519 close(s); 520 return; 521 522 error: 523 nvmf_free_capsule(nc); 524 close(s); 525 } 526 527 static void 528 connect_io_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc, 529 const struct nvmf_fabric_connect_data *data, uint16_t qid) 530 { 531 struct io_controller *ioc; 532 int error; 533 534 pthread_mutex_lock(&io_na_mutex); 535 if (io_controller == NULL) { 536 pthread_mutex_unlock(&io_na_mutex); 537 warnx("Attempt to create I/O qpair without admin qpair"); 538 nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 539 goto error; 540 } 541 542 if (memcmp(io_controller->hostid, data->hostid, 543 sizeof(data->hostid)) != 0) { 544 pthread_mutex_unlock(&io_na_mutex); 545 warnx("hostid mismatch for I/O qpair CONNECT"); 546 nvmf_connect_invalid_parameters(nc, true, 547 offsetof(struct nvmf_fabric_connect_data, hostid)); 548 goto error; 549 } 550 if (le16toh(data->cntlid) != io_controller->cntlid) { 551 pthread_mutex_unlock(&io_na_mutex); 552 warnx("cntlid mismatch for I/O qpair CONNECT"); 553 nvmf_connect_invalid_parameters(nc, true, 554 offsetof(struct nvmf_fabric_connect_data, cntlid)); 555 goto error; 556 } 557 if (memcmp(io_controller->hostnqn, data->hostnqn, 558 sizeof(data->hostid)) != 0) { 559 pthread_mutex_unlock(&io_na_mutex); 560 warnx("host NQN mismatch for I/O qpair CONNECT"); 561 nvmf_connect_invalid_parameters(nc, true, 562 offsetof(struct nvmf_fabric_connect_data, hostnqn)); 563 goto error; 564 } 565 566 if (io_controller->num_io_queues == 0) { 567 pthread_mutex_unlock(&io_na_mutex); 568 warnx("Attempt to create I/O qpair without enabled queues"); 569 nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 570 goto error; 571 } 572 if (qid > io_controller->num_io_queues) { 573 pthread_mutex_unlock(&io_na_mutex); 574 warnx("Attempt to create invalid I/O qpair %u", qid); 575 nvmf_connect_invalid_parameters(nc, false, 576 offsetof(struct nvmf_fabric_connect_cmd, qid)); 577 goto error; 578 } 579 if (io_controller->io_qpairs[qid - 1] != NULL) { 580 pthread_mutex_unlock(&io_na_mutex); 581 warnx("Attempt to re-create I/O qpair %u", qid); 582 nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 583 goto error; 584 } 585 586 error = nvmf_finish_accept(nc, io_controller->cntlid); 587 if (error != 0) { 588 pthread_mutex_unlock(&io_na_mutex); 589 warnc(error, "Failed to send CONNECT response"); 590 goto error; 591 } 592 593 ioc = io_controller; 594 ioc->active_io_queues++; 595 ioc->io_qpairs[qid - 1] = qp; 596 ioc->io_sockets[qid - 1] = s; 597 pthread_mutex_unlock(&io_na_mutex); 598 599 nvmf_free_capsule(nc); 600 601 handle_io_qpair(ioc, qp, qid); 602 return; 603 604 error: 605 nvmf_free_capsule(nc); 606 close(s); 607 } 608 609 static void * 610 io_socket_thread(void *arg) 611 { 612 struct nvmf_fabric_connect_data data; 613 struct nvmf_qpair_params qparams; 614 const struct nvmf_fabric_connect_cmd *cmd; 615 struct nvmf_capsule *nc; 616 struct nvmf_qpair *qp; 617 int s; 618 619 pthread_detach(pthread_self()); 620 621 s = (intptr_t)arg; 622 memset(&qparams, 0, sizeof(qparams)); 623 qparams.tcp.fd = s; 624 625 nc = NULL; 626 qp = nvmf_accept(io_na, &qparams, &nc, &data); 627 if (qp == NULL) { 628 warnx("Failed to create I/O qpair: %s", 629 nvmf_association_error(io_na)); 630 goto error; 631 } 632 633 if (kernel_io) { 634 ctl_handoff_qpair(qp, nvmf_capsule_sqe(nc), &data); 635 goto error; 636 } 637 638 if (strcmp(data.subnqn, nqn) != 0) { 639 warn("I/O qpair with invalid SubNQN: %.*s", 640 (int)sizeof(data.subnqn), data.subnqn); 641 nvmf_connect_invalid_parameters(nc, true, 642 offsetof(struct nvmf_fabric_connect_data, subnqn)); 643 goto error; 644 } 645 646 /* Is this an admin or I/O queue pair? */ 647 cmd = nvmf_capsule_sqe(nc); 648 if (cmd->qid == 0) 649 connect_admin_qpair(s, qp, nc, &data); 650 else 651 connect_io_qpair(s, qp, nc, &data, le16toh(cmd->qid)); 652 nvmf_free_qpair(qp); 653 return (NULL); 654 655 error: 656 if (nc != NULL) 657 nvmf_free_capsule(nc); 658 if (qp != NULL) 659 nvmf_free_qpair(qp); 660 close(s); 661 return (NULL); 662 } 663 664 void 665 handle_io_socket(int s) 666 { 667 pthread_t thr; 668 int error; 669 670 error = pthread_create(&thr, NULL, io_socket_thread, 671 (void *)(uintptr_t)s); 672 if (error != 0) { 673 warnc(error, "Failed to create I/O qpair thread"); 674 close(s); 675 } 676 } 677