1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/sysctl.h> 9 #include <err.h> 10 #include <errno.h> 11 #include <libnvmf.h> 12 #include <pthread.h> 13 #include <stdio.h> 14 #include <stdlib.h> 15 #include <string.h> 16 #include <unistd.h> 17 18 #include "internal.h" 19 20 struct io_controller { 21 struct controller *c; 22 23 u_int num_io_queues; 24 u_int active_io_queues; 25 struct nvmf_qpair **io_qpairs; 26 int *io_sockets; 27 28 struct nvme_firmware_page fp; 29 struct nvme_health_information_page hip; 30 uint16_t partial_dur; 31 uint16_t partial_duw; 32 33 uint16_t cntlid; 34 char hostid[16]; 35 char hostnqn[NVME_NQN_FIELD_SIZE]; 36 }; 37 38 static struct nvmf_association *io_na; 39 static pthread_cond_t io_cond; 40 static pthread_mutex_t io_na_mutex; 41 static struct io_controller *io_controller; 42 static const char *nqn; 43 static char serial[NVME_SERIAL_NUMBER_LENGTH]; 44 45 void 46 init_io(const char *subnqn) 47 { 48 struct nvmf_association_params aparams; 49 u_long hostid; 50 size_t len; 51 52 memset(&aparams, 0, sizeof(aparams)); 53 aparams.sq_flow_control = !flow_control_disable; 54 aparams.dynamic_controller_model = true; 55 aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES; 56 aparams.max_io_qsize = NVMF_MAX_IO_ENTRIES; 57 aparams.tcp.pda = 0; 58 aparams.tcp.header_digests = header_digests; 59 aparams.tcp.data_digests = data_digests; 60 aparams.tcp.maxr2t = 1; 61 aparams.tcp.maxh2cdata = 256 * 1024; 62 io_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true, 63 &aparams); 64 if (io_na == NULL) 65 err(1, "Failed to create I/O controller association"); 66 67 nqn = subnqn; 68 69 /* Generate a serial number from the kern.hostid node. */ 70 len = sizeof(hostid); 71 if (sysctlbyname("kern.hostid", &hostid, &len, NULL, 0) == -1) 72 err(1, "sysctl: kern.hostid"); 73 74 nvmf_controller_serial(serial, sizeof(serial), hostid); 75 76 pthread_cond_init(&io_cond, NULL); 77 pthread_mutex_init(&io_na_mutex, NULL); 78 79 if (kernel_io) 80 init_ctl_port(subnqn, &aparams); 81 } 82 83 void 84 shutdown_io(void) 85 { 86 if (kernel_io) 87 shutdown_ctl_port(nqn); 88 } 89 90 static void 91 handle_get_log_page(struct io_controller *ioc, const struct nvmf_capsule *nc, 92 const struct nvme_command *cmd) 93 { 94 uint64_t offset; 95 uint32_t numd; 96 size_t len; 97 uint8_t lid; 98 99 lid = le32toh(cmd->cdw10) & 0xff; 100 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 101 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 102 103 if (offset % 3 != 0) 104 goto error; 105 106 len = (numd + 1) * 4; 107 108 switch (lid) { 109 case NVME_LOG_ERROR: 110 { 111 void *buf; 112 113 if (len % sizeof(struct nvme_error_information_entry) != 0) 114 goto error; 115 116 buf = calloc(1, len); 117 nvmf_send_controller_data(nc, buf, len); 118 free(buf); 119 return; 120 } 121 case NVME_LOG_HEALTH_INFORMATION: 122 if (len != sizeof(ioc->hip)) 123 goto error; 124 125 nvmf_send_controller_data(nc, &ioc->hip, sizeof(ioc->hip)); 126 return; 127 case NVME_LOG_FIRMWARE_SLOT: 128 if (len != sizeof(ioc->fp)) 129 goto error; 130 131 nvmf_send_controller_data(nc, &ioc->fp, sizeof(ioc->fp)); 132 return; 133 default: 134 warnx("Unsupported page %#x for GET_LOG_PAGE\n", lid); 135 goto error; 136 } 137 138 error: 139 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 140 } 141 142 static bool 143 handle_io_identify_command(const struct nvmf_capsule *nc, 144 const struct nvme_command *cmd) 145 { 146 struct nvme_namespace_data nsdata; 147 struct nvme_ns_list nslist; 148 uint32_t nsid; 149 uint8_t cns; 150 151 cns = le32toh(cmd->cdw10) & 0xFF; 152 switch (cns) { 153 case 0: /* Namespace data. */ 154 if (!device_namespace_data(le32toh(cmd->nsid), &nsdata)) { 155 nvmf_send_generic_error(nc, 156 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 157 return (true); 158 } 159 160 nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata)); 161 return (true); 162 case 2: /* Active namespace list. */ 163 nsid = le32toh(cmd->nsid); 164 if (nsid >= 0xfffffffe) { 165 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 166 return (true); 167 } 168 169 device_active_nslist(nsid, &nslist); 170 nvmf_send_controller_data(nc, &nslist, sizeof(nslist)); 171 return (true); 172 case 3: /* Namespace Identification Descriptor list. */ 173 if (!device_identification_descriptor(le32toh(cmd->nsid), 174 &nsdata)) { 175 nvmf_send_generic_error(nc, 176 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 177 return (true); 178 } 179 180 nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata)); 181 return (true); 182 default: 183 return (false); 184 } 185 } 186 187 static void 188 handle_set_features(struct io_controller *ioc, const struct nvmf_capsule *nc, 189 const struct nvme_command *cmd) 190 { 191 struct nvme_completion cqe; 192 uint8_t fid; 193 194 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 195 switch (fid) { 196 case NVME_FEAT_NUMBER_OF_QUEUES: 197 { 198 uint32_t num_queues; 199 200 if (ioc->num_io_queues != 0) { 201 nvmf_send_generic_error(nc, 202 NVME_SC_COMMAND_SEQUENCE_ERROR); 203 return; 204 } 205 206 num_queues = le32toh(cmd->cdw11) & 0xffff; 207 208 /* 5.12.1.7: 65535 is invalid. */ 209 if (num_queues == 65535) 210 goto error; 211 212 /* Fabrics requires the same number of SQs and CQs. */ 213 if (le32toh(cmd->cdw11) >> 16 != num_queues) 214 goto error; 215 216 /* Convert to 1's based */ 217 num_queues++; 218 219 /* Lock to synchronize with handle_io_qpair. */ 220 pthread_mutex_lock(&io_na_mutex); 221 ioc->num_io_queues = num_queues; 222 ioc->io_qpairs = calloc(num_queues, sizeof(*ioc->io_qpairs)); 223 ioc->io_sockets = calloc(num_queues, sizeof(*ioc->io_sockets)); 224 pthread_mutex_unlock(&io_na_mutex); 225 226 nvmf_init_cqe(&cqe, nc, 0); 227 cqe.cdw0 = cmd->cdw11; 228 nvmf_send_response(nc, &cqe); 229 return; 230 } 231 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 232 { 233 uint32_t aer_mask; 234 235 aer_mask = le32toh(cmd->cdw11); 236 237 /* Check for any reserved or unimplemented feature bits. */ 238 if ((aer_mask & 0xffffc000) != 0) 239 goto error; 240 241 /* No AERs are generated by this daemon. */ 242 nvmf_send_success(nc); 243 return; 244 } 245 default: 246 warnx("Unsupported feature ID %u for SET_FEATURES", fid); 247 goto error; 248 } 249 250 error: 251 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 252 } 253 254 static bool 255 admin_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd, 256 void *arg) 257 { 258 struct io_controller *ioc = arg; 259 260 switch (cmd->opc) { 261 case NVME_OPC_GET_LOG_PAGE: 262 handle_get_log_page(ioc, nc, cmd); 263 return (true); 264 case NVME_OPC_IDENTIFY: 265 return (handle_io_identify_command(nc, cmd)); 266 case NVME_OPC_SET_FEATURES: 267 handle_set_features(ioc, nc, cmd); 268 return (true); 269 case NVME_OPC_ASYNC_EVENT_REQUEST: 270 /* Ignore and never complete. */ 271 return (true); 272 case NVME_OPC_KEEP_ALIVE: 273 nvmf_send_success(nc); 274 return (true); 275 default: 276 return (false); 277 } 278 } 279 280 static void 281 handle_admin_qpair(struct io_controller *ioc) 282 { 283 pthread_setname_np(pthread_self(), "admin queue"); 284 285 controller_handle_admin_commands(ioc->c, admin_command, ioc); 286 287 pthread_mutex_lock(&io_na_mutex); 288 for (u_int i = 0; i < ioc->num_io_queues; i++) { 289 if (ioc->io_qpairs[i] == NULL || ioc->io_sockets[i] == -1) 290 continue; 291 close(ioc->io_sockets[i]); 292 ioc->io_sockets[i] = -1; 293 } 294 295 /* Wait for I/O threads to notice. */ 296 while (ioc->active_io_queues > 0) 297 pthread_cond_wait(&io_cond, &io_na_mutex); 298 299 io_controller = NULL; 300 pthread_mutex_unlock(&io_na_mutex); 301 302 free_controller(ioc->c); 303 304 free(ioc); 305 } 306 307 static bool 308 handle_io_fabrics_command(const struct nvmf_capsule *nc, 309 const struct nvmf_fabric_cmd *fc) 310 { 311 switch (fc->fctype) { 312 case NVMF_FABRIC_COMMAND_CONNECT: 313 warnx("CONNECT command on connected queue"); 314 nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 315 break; 316 case NVMF_FABRIC_COMMAND_DISCONNECT: 317 { 318 const struct nvmf_fabric_disconnect_cmd *dis = 319 (const struct nvmf_fabric_disconnect_cmd *)fc; 320 if (dis->recfmt != htole16(0)) { 321 nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC, 322 NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT); 323 break; 324 } 325 nvmf_send_success(nc); 326 return (true); 327 } 328 default: 329 warnx("Unsupported fabrics command %#x", fc->fctype); 330 nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE); 331 break; 332 } 333 334 return (false); 335 } 336 337 static void 338 hip_add(uint64_t pair[2], uint64_t addend) 339 { 340 uint64_t old, new; 341 342 old = le64toh(pair[0]); 343 new = old + addend; 344 pair[0] = htole64(new); 345 if (new < old) 346 pair[1] += htole64(1); 347 } 348 349 static uint64_t 350 cmd_lba(const struct nvme_command *cmd) 351 { 352 return ((uint64_t)le32toh(cmd->cdw11) << 32 | le32toh(cmd->cdw10)); 353 } 354 355 static u_int 356 cmd_nlb(const struct nvme_command *cmd) 357 { 358 return ((le32toh(cmd->cdw12) & 0xffff) + 1); 359 } 360 361 static void 362 handle_read(struct io_controller *ioc, const struct nvmf_capsule *nc, 363 const struct nvme_command *cmd) 364 { 365 size_t len; 366 367 len = nvmf_capsule_data_len(nc); 368 device_read(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc); 369 hip_add(ioc->hip.host_read_commands, 1); 370 371 len /= 512; 372 len += ioc->partial_dur; 373 if (len > 1000) 374 hip_add(ioc->hip.data_units_read, len / 1000); 375 ioc->partial_dur = len % 1000; 376 } 377 378 static void 379 handle_write(struct io_controller *ioc, const struct nvmf_capsule *nc, 380 const struct nvme_command *cmd) 381 { 382 size_t len; 383 384 len = nvmf_capsule_data_len(nc); 385 device_write(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc); 386 hip_add(ioc->hip.host_write_commands, 1); 387 388 len /= 512; 389 len += ioc->partial_duw; 390 if (len > 1000) 391 hip_add(ioc->hip.data_units_written, len / 1000); 392 ioc->partial_duw = len % 1000; 393 } 394 395 static void 396 handle_flush(const struct nvmf_capsule *nc, const struct nvme_command *cmd) 397 { 398 device_flush(le32toh(cmd->nsid), nc); 399 } 400 401 static bool 402 handle_io_commands(struct io_controller *ioc, struct nvmf_qpair *qp) 403 { 404 const struct nvme_command *cmd; 405 struct nvmf_capsule *nc; 406 int error; 407 bool disconnect; 408 409 disconnect = false; 410 411 while (!disconnect) { 412 error = nvmf_controller_receive_capsule(qp, &nc); 413 if (error != 0) { 414 if (error != ECONNRESET) 415 warnc(error, "Failed to read command capsule"); 416 break; 417 } 418 419 cmd = nvmf_capsule_sqe(nc); 420 421 switch (cmd->opc) { 422 case NVME_OPC_FLUSH: 423 if (cmd->nsid == htole32(0xffffffff)) { 424 nvmf_send_generic_error(nc, 425 NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 426 break; 427 } 428 handle_flush(nc, cmd); 429 break; 430 case NVME_OPC_WRITE: 431 handle_write(ioc, nc, cmd); 432 break; 433 case NVME_OPC_READ: 434 handle_read(ioc, nc, cmd); 435 break; 436 case NVME_OPC_FABRICS_COMMANDS: 437 disconnect = handle_io_fabrics_command(nc, 438 (const struct nvmf_fabric_cmd *)cmd); 439 break; 440 default: 441 warnx("Unsupported NVM opcode %#x", cmd->opc); 442 nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE); 443 break; 444 } 445 nvmf_free_capsule(nc); 446 } 447 448 return (disconnect); 449 } 450 451 static void 452 handle_io_qpair(struct io_controller *ioc, struct nvmf_qpair *qp, int qid) 453 { 454 char name[64]; 455 bool disconnect; 456 457 snprintf(name, sizeof(name), "I/O queue %d", qid); 458 pthread_setname_np(pthread_self(), name); 459 460 disconnect = handle_io_commands(ioc, qp); 461 462 pthread_mutex_lock(&io_na_mutex); 463 if (disconnect) 464 ioc->io_qpairs[qid - 1] = NULL; 465 if (ioc->io_sockets[qid - 1] != -1) { 466 close(ioc->io_sockets[qid - 1]); 467 ioc->io_sockets[qid - 1] = -1; 468 } 469 ioc->active_io_queues--; 470 if (ioc->active_io_queues == 0) 471 pthread_cond_broadcast(&io_cond); 472 pthread_mutex_unlock(&io_na_mutex); 473 } 474 475 static void 476 connect_admin_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc, 477 const struct nvmf_fabric_connect_data *data) 478 { 479 struct nvme_controller_data cdata; 480 struct io_controller *ioc; 481 int error; 482 483 /* Can only have one active I/O controller at a time. */ 484 pthread_mutex_lock(&io_na_mutex); 485 if (io_controller != NULL) { 486 pthread_mutex_unlock(&io_na_mutex); 487 nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC, 488 NVMF_FABRIC_SC_CONTROLLER_BUSY); 489 goto error; 490 } 491 492 error = nvmf_finish_accept(nc, 2); 493 if (error != 0) { 494 pthread_mutex_unlock(&io_na_mutex); 495 warnc(error, "Failed to send CONNECT response"); 496 goto error; 497 } 498 499 ioc = calloc(1, sizeof(*ioc)); 500 ioc->cntlid = 2; 501 memcpy(ioc->hostid, data->hostid, sizeof(ioc->hostid)); 502 memcpy(ioc->hostnqn, data->hostnqn, sizeof(ioc->hostnqn)); 503 504 nvmf_init_io_controller_data(qp, serial, nqn, device_count(), 505 NVMF_IOCCSZ, &cdata); 506 507 ioc->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1); 508 memcpy(ioc->fp.revision[0], cdata.fr, sizeof(cdata.fr)); 509 510 ioc->hip.power_cycles[0] = 1; 511 512 ioc->c = init_controller(qp, &cdata); 513 514 io_controller = ioc; 515 pthread_mutex_unlock(&io_na_mutex); 516 517 nvmf_free_capsule(nc); 518 519 handle_admin_qpair(ioc); 520 close(s); 521 return; 522 523 error: 524 nvmf_free_capsule(nc); 525 close(s); 526 } 527 528 static void 529 connect_io_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc, 530 const struct nvmf_fabric_connect_data *data, uint16_t qid) 531 { 532 struct io_controller *ioc; 533 int error; 534 535 pthread_mutex_lock(&io_na_mutex); 536 if (io_controller == NULL) { 537 pthread_mutex_unlock(&io_na_mutex); 538 warnx("Attempt to create I/O qpair without admin qpair"); 539 nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 540 goto error; 541 } 542 543 if (memcmp(io_controller->hostid, data->hostid, 544 sizeof(data->hostid)) != 0) { 545 pthread_mutex_unlock(&io_na_mutex); 546 warnx("hostid mismatch for I/O qpair CONNECT"); 547 nvmf_connect_invalid_parameters(nc, true, 548 offsetof(struct nvmf_fabric_connect_data, hostid)); 549 goto error; 550 } 551 if (le16toh(data->cntlid) != io_controller->cntlid) { 552 pthread_mutex_unlock(&io_na_mutex); 553 warnx("cntlid mismatch for I/O qpair CONNECT"); 554 nvmf_connect_invalid_parameters(nc, true, 555 offsetof(struct nvmf_fabric_connect_data, cntlid)); 556 goto error; 557 } 558 if (memcmp(io_controller->hostnqn, data->hostnqn, 559 sizeof(data->hostid)) != 0) { 560 pthread_mutex_unlock(&io_na_mutex); 561 warnx("host NQN mismatch for I/O qpair CONNECT"); 562 nvmf_connect_invalid_parameters(nc, true, 563 offsetof(struct nvmf_fabric_connect_data, hostnqn)); 564 goto error; 565 } 566 567 if (io_controller->num_io_queues == 0) { 568 pthread_mutex_unlock(&io_na_mutex); 569 warnx("Attempt to create I/O qpair without enabled queues"); 570 nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 571 goto error; 572 } 573 if (qid > io_controller->num_io_queues) { 574 pthread_mutex_unlock(&io_na_mutex); 575 warnx("Attempt to create invalid I/O qpair %u", qid); 576 nvmf_connect_invalid_parameters(nc, false, 577 offsetof(struct nvmf_fabric_connect_cmd, qid)); 578 goto error; 579 } 580 if (io_controller->io_qpairs[qid - 1] != NULL) { 581 pthread_mutex_unlock(&io_na_mutex); 582 warnx("Attempt to re-create I/O qpair %u", qid); 583 nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 584 goto error; 585 } 586 587 error = nvmf_finish_accept(nc, io_controller->cntlid); 588 if (error != 0) { 589 pthread_mutex_unlock(&io_na_mutex); 590 warnc(error, "Failed to send CONNECT response"); 591 goto error; 592 } 593 594 ioc = io_controller; 595 ioc->active_io_queues++; 596 ioc->io_qpairs[qid - 1] = qp; 597 ioc->io_sockets[qid - 1] = s; 598 pthread_mutex_unlock(&io_na_mutex); 599 600 nvmf_free_capsule(nc); 601 602 handle_io_qpair(ioc, qp, qid); 603 return; 604 605 error: 606 nvmf_free_capsule(nc); 607 close(s); 608 } 609 610 static void * 611 io_socket_thread(void *arg) 612 { 613 struct nvmf_fabric_connect_data data; 614 struct nvmf_qpair_params qparams; 615 const struct nvmf_fabric_connect_cmd *cmd; 616 struct nvmf_capsule *nc; 617 struct nvmf_qpair *qp; 618 int s; 619 620 pthread_detach(pthread_self()); 621 622 s = (intptr_t)arg; 623 memset(&qparams, 0, sizeof(qparams)); 624 qparams.tcp.fd = s; 625 626 nc = NULL; 627 qp = nvmf_accept(io_na, &qparams, &nc, &data); 628 if (qp == NULL) { 629 warnx("Failed to create I/O qpair: %s", 630 nvmf_association_error(io_na)); 631 goto error; 632 } 633 634 if (kernel_io) { 635 ctl_handoff_qpair(qp, nvmf_capsule_sqe(nc), &data); 636 goto error; 637 } 638 639 if (strcmp(data.subnqn, nqn) != 0) { 640 warn("I/O qpair with invalid SubNQN: %.*s", 641 (int)sizeof(data.subnqn), data.subnqn); 642 nvmf_connect_invalid_parameters(nc, true, 643 offsetof(struct nvmf_fabric_connect_data, subnqn)); 644 goto error; 645 } 646 647 /* Is this an admin or I/O queue pair? */ 648 cmd = nvmf_capsule_sqe(nc); 649 if (cmd->qid == 0) 650 connect_admin_qpair(s, qp, nc, &data); 651 else 652 connect_io_qpair(s, qp, nc, &data, le16toh(cmd->qid)); 653 nvmf_free_qpair(qp); 654 return (NULL); 655 656 error: 657 if (nc != NULL) 658 nvmf_free_capsule(nc); 659 if (qp != NULL) 660 nvmf_free_qpair(qp); 661 close(s); 662 return (NULL); 663 } 664 665 void 666 handle_io_socket(int s) 667 { 668 pthread_t thr; 669 int error; 670 671 error = pthread_create(&thr, NULL, io_socket_thread, 672 (void *)(uintptr_t)s); 673 if (error != 0) { 674 warnc(error, "Failed to create I/O qpair thread"); 675 close(s); 676 } 677 } 678