1*a8089ea5SJohn Baldwin /*- 2*a8089ea5SJohn Baldwin * SPDX-License-Identifier: BSD-2-Clause 3*a8089ea5SJohn Baldwin * 4*a8089ea5SJohn Baldwin * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5*a8089ea5SJohn Baldwin * Written by: John Baldwin <jhb@FreeBSD.org> 6*a8089ea5SJohn Baldwin */ 7*a8089ea5SJohn Baldwin 8*a8089ea5SJohn Baldwin #include <sys/sysctl.h> 9*a8089ea5SJohn Baldwin #include <err.h> 10*a8089ea5SJohn Baldwin #include <errno.h> 11*a8089ea5SJohn Baldwin #include <libnvmf.h> 12*a8089ea5SJohn Baldwin #include <pthread.h> 13*a8089ea5SJohn Baldwin #include <stdio.h> 14*a8089ea5SJohn Baldwin #include <stdlib.h> 15*a8089ea5SJohn Baldwin #include <string.h> 16*a8089ea5SJohn Baldwin #include <unistd.h> 17*a8089ea5SJohn Baldwin 18*a8089ea5SJohn Baldwin #include "internal.h" 19*a8089ea5SJohn Baldwin 20*a8089ea5SJohn Baldwin struct io_controller { 21*a8089ea5SJohn Baldwin struct controller *c; 22*a8089ea5SJohn Baldwin 23*a8089ea5SJohn Baldwin u_int num_io_queues; 24*a8089ea5SJohn Baldwin u_int active_io_queues; 25*a8089ea5SJohn Baldwin struct nvmf_qpair **io_qpairs; 26*a8089ea5SJohn Baldwin int *io_sockets; 27*a8089ea5SJohn Baldwin 28*a8089ea5SJohn Baldwin struct nvme_firmware_page fp; 29*a8089ea5SJohn Baldwin struct nvme_health_information_page hip; 30*a8089ea5SJohn Baldwin uint16_t partial_dur; 31*a8089ea5SJohn Baldwin uint16_t partial_duw; 32*a8089ea5SJohn Baldwin 33*a8089ea5SJohn Baldwin uint16_t cntlid; 34*a8089ea5SJohn Baldwin char hostid[16]; 35*a8089ea5SJohn Baldwin char hostnqn[NVME_NQN_FIELD_SIZE]; 36*a8089ea5SJohn Baldwin }; 37*a8089ea5SJohn Baldwin 38*a8089ea5SJohn Baldwin static struct nvmf_association *io_na; 39*a8089ea5SJohn Baldwin static pthread_cond_t io_cond; 40*a8089ea5SJohn Baldwin static pthread_mutex_t io_na_mutex; 41*a8089ea5SJohn Baldwin static struct io_controller *io_controller; 42*a8089ea5SJohn Baldwin static const char *nqn; 43*a8089ea5SJohn Baldwin static char serial[NVME_SERIAL_NUMBER_LENGTH]; 44*a8089ea5SJohn Baldwin 45*a8089ea5SJohn Baldwin void 46*a8089ea5SJohn Baldwin init_io(const char *subnqn) 47*a8089ea5SJohn Baldwin { 48*a8089ea5SJohn Baldwin struct nvmf_association_params aparams; 49*a8089ea5SJohn Baldwin u_long hostid; 50*a8089ea5SJohn Baldwin size_t len; 51*a8089ea5SJohn Baldwin 52*a8089ea5SJohn Baldwin memset(&aparams, 0, sizeof(aparams)); 53*a8089ea5SJohn Baldwin aparams.sq_flow_control = !flow_control_disable; 54*a8089ea5SJohn Baldwin aparams.dynamic_controller_model = true; 55*a8089ea5SJohn Baldwin aparams.max_admin_qsize = NVME_MAX_ADMIN_ENTRIES; 56*a8089ea5SJohn Baldwin aparams.max_io_qsize = NVMF_MAX_IO_ENTRIES; 57*a8089ea5SJohn Baldwin aparams.tcp.pda = 0; 58*a8089ea5SJohn Baldwin aparams.tcp.header_digests = header_digests; 59*a8089ea5SJohn Baldwin aparams.tcp.data_digests = data_digests; 60*a8089ea5SJohn Baldwin aparams.tcp.maxr2t = 1; 61*a8089ea5SJohn Baldwin aparams.tcp.maxh2cdata = 256 * 1024; 62*a8089ea5SJohn Baldwin io_na = nvmf_allocate_association(NVMF_TRTYPE_TCP, true, 63*a8089ea5SJohn Baldwin &aparams); 64*a8089ea5SJohn Baldwin if (io_na == NULL) 65*a8089ea5SJohn Baldwin err(1, "Failed to create I/O controller association"); 66*a8089ea5SJohn Baldwin 67*a8089ea5SJohn Baldwin nqn = subnqn; 68*a8089ea5SJohn Baldwin 69*a8089ea5SJohn Baldwin /* Generate a serial number from the kern.hostid node. */ 70*a8089ea5SJohn Baldwin len = sizeof(hostid); 71*a8089ea5SJohn Baldwin if (sysctlbyname("kern.hostid", &hostid, &len, NULL, 0) == -1) 72*a8089ea5SJohn Baldwin err(1, "sysctl: kern.hostid"); 73*a8089ea5SJohn Baldwin 74*a8089ea5SJohn Baldwin nvmf_controller_serial(serial, sizeof(serial), hostid); 75*a8089ea5SJohn Baldwin 76*a8089ea5SJohn Baldwin pthread_cond_init(&io_cond, NULL); 77*a8089ea5SJohn Baldwin pthread_mutex_init(&io_na_mutex, NULL); 78*a8089ea5SJohn Baldwin 79*a8089ea5SJohn Baldwin if (kernel_io) 80*a8089ea5SJohn Baldwin init_ctl_port(subnqn, &aparams); 81*a8089ea5SJohn Baldwin } 82*a8089ea5SJohn Baldwin 83*a8089ea5SJohn Baldwin void 84*a8089ea5SJohn Baldwin shutdown_io(void) 85*a8089ea5SJohn Baldwin { 86*a8089ea5SJohn Baldwin if (kernel_io) 87*a8089ea5SJohn Baldwin shutdown_ctl_port(nqn); 88*a8089ea5SJohn Baldwin } 89*a8089ea5SJohn Baldwin 90*a8089ea5SJohn Baldwin static void 91*a8089ea5SJohn Baldwin handle_get_log_page(struct io_controller *ioc, const struct nvmf_capsule *nc, 92*a8089ea5SJohn Baldwin const struct nvme_command *cmd) 93*a8089ea5SJohn Baldwin { 94*a8089ea5SJohn Baldwin uint64_t offset; 95*a8089ea5SJohn Baldwin uint32_t numd; 96*a8089ea5SJohn Baldwin size_t len; 97*a8089ea5SJohn Baldwin uint8_t lid; 98*a8089ea5SJohn Baldwin 99*a8089ea5SJohn Baldwin lid = le32toh(cmd->cdw10) & 0xff; 100*a8089ea5SJohn Baldwin numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16; 101*a8089ea5SJohn Baldwin offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32; 102*a8089ea5SJohn Baldwin 103*a8089ea5SJohn Baldwin if (offset % 3 != 0) 104*a8089ea5SJohn Baldwin goto error; 105*a8089ea5SJohn Baldwin 106*a8089ea5SJohn Baldwin len = (numd + 1) * 4; 107*a8089ea5SJohn Baldwin 108*a8089ea5SJohn Baldwin switch (lid) { 109*a8089ea5SJohn Baldwin case NVME_LOG_ERROR: 110*a8089ea5SJohn Baldwin { 111*a8089ea5SJohn Baldwin void *buf; 112*a8089ea5SJohn Baldwin 113*a8089ea5SJohn Baldwin if (len % sizeof(struct nvme_error_information_entry) != 0) 114*a8089ea5SJohn Baldwin goto error; 115*a8089ea5SJohn Baldwin 116*a8089ea5SJohn Baldwin buf = calloc(1, len); 117*a8089ea5SJohn Baldwin nvmf_send_controller_data(nc, buf, len); 118*a8089ea5SJohn Baldwin free(buf); 119*a8089ea5SJohn Baldwin return; 120*a8089ea5SJohn Baldwin } 121*a8089ea5SJohn Baldwin case NVME_LOG_HEALTH_INFORMATION: 122*a8089ea5SJohn Baldwin if (len != sizeof(ioc->hip)) 123*a8089ea5SJohn Baldwin goto error; 124*a8089ea5SJohn Baldwin 125*a8089ea5SJohn Baldwin nvmf_send_controller_data(nc, &ioc->hip, sizeof(ioc->hip)); 126*a8089ea5SJohn Baldwin return; 127*a8089ea5SJohn Baldwin case NVME_LOG_FIRMWARE_SLOT: 128*a8089ea5SJohn Baldwin if (len != sizeof(ioc->fp)) 129*a8089ea5SJohn Baldwin goto error; 130*a8089ea5SJohn Baldwin 131*a8089ea5SJohn Baldwin nvmf_send_controller_data(nc, &ioc->fp, sizeof(ioc->fp)); 132*a8089ea5SJohn Baldwin return; 133*a8089ea5SJohn Baldwin default: 134*a8089ea5SJohn Baldwin warnx("Unsupported page %#x for GET_LOG_PAGE\n", lid); 135*a8089ea5SJohn Baldwin goto error; 136*a8089ea5SJohn Baldwin } 137*a8089ea5SJohn Baldwin 138*a8089ea5SJohn Baldwin error: 139*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 140*a8089ea5SJohn Baldwin } 141*a8089ea5SJohn Baldwin 142*a8089ea5SJohn Baldwin static bool 143*a8089ea5SJohn Baldwin handle_io_identify_command(const struct nvmf_capsule *nc, 144*a8089ea5SJohn Baldwin const struct nvme_command *cmd) 145*a8089ea5SJohn Baldwin { 146*a8089ea5SJohn Baldwin struct nvme_namespace_data nsdata; 147*a8089ea5SJohn Baldwin struct nvme_ns_list nslist; 148*a8089ea5SJohn Baldwin uint32_t nsid; 149*a8089ea5SJohn Baldwin uint8_t cns; 150*a8089ea5SJohn Baldwin 151*a8089ea5SJohn Baldwin cns = le32toh(cmd->cdw10) & 0xFF; 152*a8089ea5SJohn Baldwin switch (cns) { 153*a8089ea5SJohn Baldwin case 0: /* Namespace data. */ 154*a8089ea5SJohn Baldwin if (!device_namespace_data(le32toh(cmd->nsid), &nsdata)) { 155*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, 156*a8089ea5SJohn Baldwin NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 157*a8089ea5SJohn Baldwin return (true); 158*a8089ea5SJohn Baldwin } 159*a8089ea5SJohn Baldwin 160*a8089ea5SJohn Baldwin nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata)); 161*a8089ea5SJohn Baldwin return (true); 162*a8089ea5SJohn Baldwin case 2: /* Active namespace list. */ 163*a8089ea5SJohn Baldwin nsid = le32toh(cmd->nsid); 164*a8089ea5SJohn Baldwin if (nsid >= 0xfffffffe) { 165*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 166*a8089ea5SJohn Baldwin return (true); 167*a8089ea5SJohn Baldwin } 168*a8089ea5SJohn Baldwin 169*a8089ea5SJohn Baldwin device_active_nslist(nsid, &nslist); 170*a8089ea5SJohn Baldwin nvmf_send_controller_data(nc, &nslist, sizeof(nslist)); 171*a8089ea5SJohn Baldwin return (true); 172*a8089ea5SJohn Baldwin case 3: /* Namespace Identification Descriptor list. */ 173*a8089ea5SJohn Baldwin if (!device_identification_descriptor(le32toh(cmd->nsid), 174*a8089ea5SJohn Baldwin &nsdata)) { 175*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, 176*a8089ea5SJohn Baldwin NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 177*a8089ea5SJohn Baldwin return (true); 178*a8089ea5SJohn Baldwin } 179*a8089ea5SJohn Baldwin 180*a8089ea5SJohn Baldwin nvmf_send_controller_data(nc, &nsdata, sizeof(nsdata)); 181*a8089ea5SJohn Baldwin return (true); 182*a8089ea5SJohn Baldwin default: 183*a8089ea5SJohn Baldwin return (false); 184*a8089ea5SJohn Baldwin } 185*a8089ea5SJohn Baldwin } 186*a8089ea5SJohn Baldwin 187*a8089ea5SJohn Baldwin static void 188*a8089ea5SJohn Baldwin handle_set_features(struct io_controller *ioc, const struct nvmf_capsule *nc, 189*a8089ea5SJohn Baldwin const struct nvme_command *cmd) 190*a8089ea5SJohn Baldwin { 191*a8089ea5SJohn Baldwin struct nvme_completion cqe; 192*a8089ea5SJohn Baldwin uint8_t fid; 193*a8089ea5SJohn Baldwin 194*a8089ea5SJohn Baldwin fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10)); 195*a8089ea5SJohn Baldwin switch (fid) { 196*a8089ea5SJohn Baldwin case NVME_FEAT_NUMBER_OF_QUEUES: 197*a8089ea5SJohn Baldwin { 198*a8089ea5SJohn Baldwin uint32_t num_queues; 199*a8089ea5SJohn Baldwin 200*a8089ea5SJohn Baldwin if (ioc->num_io_queues != 0) { 201*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, 202*a8089ea5SJohn Baldwin NVME_SC_COMMAND_SEQUENCE_ERROR); 203*a8089ea5SJohn Baldwin return; 204*a8089ea5SJohn Baldwin } 205*a8089ea5SJohn Baldwin 206*a8089ea5SJohn Baldwin num_queues = le32toh(cmd->cdw11) & 0xffff; 207*a8089ea5SJohn Baldwin 208*a8089ea5SJohn Baldwin /* 5.12.1.7: 65535 is invalid. */ 209*a8089ea5SJohn Baldwin if (num_queues == 65535) 210*a8089ea5SJohn Baldwin goto error; 211*a8089ea5SJohn Baldwin 212*a8089ea5SJohn Baldwin /* Fabrics requires the same number of SQs and CQs. */ 213*a8089ea5SJohn Baldwin if (le32toh(cmd->cdw11) >> 16 != num_queues) 214*a8089ea5SJohn Baldwin goto error; 215*a8089ea5SJohn Baldwin 216*a8089ea5SJohn Baldwin /* Convert to 1's based */ 217*a8089ea5SJohn Baldwin num_queues++; 218*a8089ea5SJohn Baldwin 219*a8089ea5SJohn Baldwin /* Lock to synchronize with handle_io_qpair. */ 220*a8089ea5SJohn Baldwin pthread_mutex_lock(&io_na_mutex); 221*a8089ea5SJohn Baldwin ioc->num_io_queues = num_queues; 222*a8089ea5SJohn Baldwin ioc->io_qpairs = calloc(num_queues, sizeof(*ioc->io_qpairs)); 223*a8089ea5SJohn Baldwin ioc->io_sockets = calloc(num_queues, sizeof(*ioc->io_sockets)); 224*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 225*a8089ea5SJohn Baldwin 226*a8089ea5SJohn Baldwin nvmf_init_cqe(&cqe, nc, 0); 227*a8089ea5SJohn Baldwin cqe.cdw0 = cmd->cdw11; 228*a8089ea5SJohn Baldwin nvmf_send_response(nc, &cqe); 229*a8089ea5SJohn Baldwin return; 230*a8089ea5SJohn Baldwin } 231*a8089ea5SJohn Baldwin case NVME_FEAT_ASYNC_EVENT_CONFIGURATION: 232*a8089ea5SJohn Baldwin { 233*a8089ea5SJohn Baldwin uint32_t aer_mask; 234*a8089ea5SJohn Baldwin 235*a8089ea5SJohn Baldwin aer_mask = le32toh(cmd->cdw11); 236*a8089ea5SJohn Baldwin 237*a8089ea5SJohn Baldwin /* Check for any reserved or unimplemented feature bits. */ 238*a8089ea5SJohn Baldwin if ((aer_mask & 0xffffc000) != 0) 239*a8089ea5SJohn Baldwin goto error; 240*a8089ea5SJohn Baldwin 241*a8089ea5SJohn Baldwin /* No AERs are generated by this daemon. */ 242*a8089ea5SJohn Baldwin nvmf_send_success(nc); 243*a8089ea5SJohn Baldwin return; 244*a8089ea5SJohn Baldwin } 245*a8089ea5SJohn Baldwin default: 246*a8089ea5SJohn Baldwin warnx("Unsupported feature ID %u for SET_FEATURES", fid); 247*a8089ea5SJohn Baldwin goto error; 248*a8089ea5SJohn Baldwin } 249*a8089ea5SJohn Baldwin 250*a8089ea5SJohn Baldwin error: 251*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 252*a8089ea5SJohn Baldwin } 253*a8089ea5SJohn Baldwin 254*a8089ea5SJohn Baldwin static bool 255*a8089ea5SJohn Baldwin admin_command(const struct nvmf_capsule *nc, const struct nvme_command *cmd, 256*a8089ea5SJohn Baldwin void *arg) 257*a8089ea5SJohn Baldwin { 258*a8089ea5SJohn Baldwin struct io_controller *ioc = arg; 259*a8089ea5SJohn Baldwin 260*a8089ea5SJohn Baldwin switch (cmd->opc) { 261*a8089ea5SJohn Baldwin case NVME_OPC_GET_LOG_PAGE: 262*a8089ea5SJohn Baldwin handle_get_log_page(ioc, nc, cmd); 263*a8089ea5SJohn Baldwin return (true); 264*a8089ea5SJohn Baldwin case NVME_OPC_IDENTIFY: 265*a8089ea5SJohn Baldwin return (handle_io_identify_command(nc, cmd)); 266*a8089ea5SJohn Baldwin case NVME_OPC_SET_FEATURES: 267*a8089ea5SJohn Baldwin handle_set_features(ioc, nc, cmd); 268*a8089ea5SJohn Baldwin return (true); 269*a8089ea5SJohn Baldwin case NVME_OPC_ASYNC_EVENT_REQUEST: 270*a8089ea5SJohn Baldwin /* Ignore and never complete. */ 271*a8089ea5SJohn Baldwin return (true); 272*a8089ea5SJohn Baldwin case NVME_OPC_KEEP_ALIVE: 273*a8089ea5SJohn Baldwin nvmf_send_success(nc); 274*a8089ea5SJohn Baldwin return (true); 275*a8089ea5SJohn Baldwin default: 276*a8089ea5SJohn Baldwin return (false); 277*a8089ea5SJohn Baldwin } 278*a8089ea5SJohn Baldwin } 279*a8089ea5SJohn Baldwin 280*a8089ea5SJohn Baldwin static void 281*a8089ea5SJohn Baldwin handle_admin_qpair(struct io_controller *ioc) 282*a8089ea5SJohn Baldwin { 283*a8089ea5SJohn Baldwin pthread_setname_np(pthread_self(), "admin queue"); 284*a8089ea5SJohn Baldwin 285*a8089ea5SJohn Baldwin controller_handle_admin_commands(ioc->c, admin_command, ioc); 286*a8089ea5SJohn Baldwin 287*a8089ea5SJohn Baldwin pthread_mutex_lock(&io_na_mutex); 288*a8089ea5SJohn Baldwin for (u_int i = 0; i < ioc->num_io_queues; i++) { 289*a8089ea5SJohn Baldwin if (ioc->io_qpairs[i] == NULL || ioc->io_sockets[i] == -1) 290*a8089ea5SJohn Baldwin continue; 291*a8089ea5SJohn Baldwin close(ioc->io_sockets[i]); 292*a8089ea5SJohn Baldwin ioc->io_sockets[i] = -1; 293*a8089ea5SJohn Baldwin } 294*a8089ea5SJohn Baldwin 295*a8089ea5SJohn Baldwin /* Wait for I/O threads to notice. */ 296*a8089ea5SJohn Baldwin while (ioc->active_io_queues > 0) 297*a8089ea5SJohn Baldwin pthread_cond_wait(&io_cond, &io_na_mutex); 298*a8089ea5SJohn Baldwin 299*a8089ea5SJohn Baldwin io_controller = NULL; 300*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 301*a8089ea5SJohn Baldwin 302*a8089ea5SJohn Baldwin free_controller(ioc->c); 303*a8089ea5SJohn Baldwin 304*a8089ea5SJohn Baldwin free(ioc); 305*a8089ea5SJohn Baldwin } 306*a8089ea5SJohn Baldwin 307*a8089ea5SJohn Baldwin static bool 308*a8089ea5SJohn Baldwin handle_io_fabrics_command(const struct nvmf_capsule *nc, 309*a8089ea5SJohn Baldwin const struct nvmf_fabric_cmd *fc) 310*a8089ea5SJohn Baldwin { 311*a8089ea5SJohn Baldwin switch (fc->fctype) { 312*a8089ea5SJohn Baldwin case NVMF_FABRIC_COMMAND_CONNECT: 313*a8089ea5SJohn Baldwin warnx("CONNECT command on connected queue"); 314*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 315*a8089ea5SJohn Baldwin break; 316*a8089ea5SJohn Baldwin case NVMF_FABRIC_COMMAND_DISCONNECT: 317*a8089ea5SJohn Baldwin { 318*a8089ea5SJohn Baldwin const struct nvmf_fabric_disconnect_cmd *dis = 319*a8089ea5SJohn Baldwin (const struct nvmf_fabric_disconnect_cmd *)fc; 320*a8089ea5SJohn Baldwin if (dis->recfmt != htole16(0)) { 321*a8089ea5SJohn Baldwin nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC, 322*a8089ea5SJohn Baldwin NVMF_FABRIC_SC_INCOMPATIBLE_FORMAT); 323*a8089ea5SJohn Baldwin break; 324*a8089ea5SJohn Baldwin } 325*a8089ea5SJohn Baldwin nvmf_send_success(nc); 326*a8089ea5SJohn Baldwin return (true); 327*a8089ea5SJohn Baldwin } 328*a8089ea5SJohn Baldwin default: 329*a8089ea5SJohn Baldwin warnx("Unsupported fabrics command %#x", fc->fctype); 330*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE); 331*a8089ea5SJohn Baldwin break; 332*a8089ea5SJohn Baldwin } 333*a8089ea5SJohn Baldwin 334*a8089ea5SJohn Baldwin return (false); 335*a8089ea5SJohn Baldwin } 336*a8089ea5SJohn Baldwin 337*a8089ea5SJohn Baldwin static void 338*a8089ea5SJohn Baldwin hip_add(uint64_t pair[2], uint64_t addend) 339*a8089ea5SJohn Baldwin { 340*a8089ea5SJohn Baldwin uint64_t old, new; 341*a8089ea5SJohn Baldwin 342*a8089ea5SJohn Baldwin old = le64toh(pair[0]); 343*a8089ea5SJohn Baldwin new = old + addend; 344*a8089ea5SJohn Baldwin pair[0] = htole64(new); 345*a8089ea5SJohn Baldwin if (new < old) 346*a8089ea5SJohn Baldwin pair[1] += htole64(1); 347*a8089ea5SJohn Baldwin } 348*a8089ea5SJohn Baldwin 349*a8089ea5SJohn Baldwin static uint64_t 350*a8089ea5SJohn Baldwin cmd_lba(const struct nvme_command *cmd) 351*a8089ea5SJohn Baldwin { 352*a8089ea5SJohn Baldwin return ((uint64_t)le32toh(cmd->cdw11) << 32 | le32toh(cmd->cdw10)); 353*a8089ea5SJohn Baldwin } 354*a8089ea5SJohn Baldwin 355*a8089ea5SJohn Baldwin static u_int 356*a8089ea5SJohn Baldwin cmd_nlb(const struct nvme_command *cmd) 357*a8089ea5SJohn Baldwin { 358*a8089ea5SJohn Baldwin return ((le32toh(cmd->cdw12) & 0xffff) + 1); 359*a8089ea5SJohn Baldwin } 360*a8089ea5SJohn Baldwin 361*a8089ea5SJohn Baldwin static void 362*a8089ea5SJohn Baldwin handle_read(struct io_controller *ioc, const struct nvmf_capsule *nc, 363*a8089ea5SJohn Baldwin const struct nvme_command *cmd) 364*a8089ea5SJohn Baldwin { 365*a8089ea5SJohn Baldwin size_t len; 366*a8089ea5SJohn Baldwin 367*a8089ea5SJohn Baldwin len = nvmf_capsule_data_len(nc); 368*a8089ea5SJohn Baldwin device_read(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc); 369*a8089ea5SJohn Baldwin hip_add(ioc->hip.host_read_commands, 1); 370*a8089ea5SJohn Baldwin 371*a8089ea5SJohn Baldwin len /= 512; 372*a8089ea5SJohn Baldwin len += ioc->partial_dur; 373*a8089ea5SJohn Baldwin if (len > 1000) 374*a8089ea5SJohn Baldwin hip_add(ioc->hip.data_units_read, len / 1000); 375*a8089ea5SJohn Baldwin ioc->partial_dur = len % 1000; 376*a8089ea5SJohn Baldwin } 377*a8089ea5SJohn Baldwin 378*a8089ea5SJohn Baldwin static void 379*a8089ea5SJohn Baldwin handle_write(struct io_controller *ioc, const struct nvmf_capsule *nc, 380*a8089ea5SJohn Baldwin const struct nvme_command *cmd) 381*a8089ea5SJohn Baldwin { 382*a8089ea5SJohn Baldwin size_t len; 383*a8089ea5SJohn Baldwin 384*a8089ea5SJohn Baldwin len = nvmf_capsule_data_len(nc); 385*a8089ea5SJohn Baldwin device_write(le32toh(cmd->nsid), cmd_lba(cmd), cmd_nlb(cmd), nc); 386*a8089ea5SJohn Baldwin hip_add(ioc->hip.host_write_commands, 1); 387*a8089ea5SJohn Baldwin 388*a8089ea5SJohn Baldwin len /= 512; 389*a8089ea5SJohn Baldwin len += ioc->partial_duw; 390*a8089ea5SJohn Baldwin if (len > 1000) 391*a8089ea5SJohn Baldwin hip_add(ioc->hip.data_units_written, len / 1000); 392*a8089ea5SJohn Baldwin ioc->partial_duw = len % 1000; 393*a8089ea5SJohn Baldwin } 394*a8089ea5SJohn Baldwin 395*a8089ea5SJohn Baldwin static void 396*a8089ea5SJohn Baldwin handle_flush(const struct nvmf_capsule *nc, const struct nvme_command *cmd) 397*a8089ea5SJohn Baldwin { 398*a8089ea5SJohn Baldwin device_flush(le32toh(cmd->nsid), nc); 399*a8089ea5SJohn Baldwin } 400*a8089ea5SJohn Baldwin 401*a8089ea5SJohn Baldwin static bool 402*a8089ea5SJohn Baldwin handle_io_commands(struct io_controller *ioc, struct nvmf_qpair *qp) 403*a8089ea5SJohn Baldwin { 404*a8089ea5SJohn Baldwin const struct nvme_command *cmd; 405*a8089ea5SJohn Baldwin struct nvmf_capsule *nc; 406*a8089ea5SJohn Baldwin int error; 407*a8089ea5SJohn Baldwin bool disconnect; 408*a8089ea5SJohn Baldwin 409*a8089ea5SJohn Baldwin disconnect = false; 410*a8089ea5SJohn Baldwin 411*a8089ea5SJohn Baldwin while (!disconnect) { 412*a8089ea5SJohn Baldwin error = nvmf_controller_receive_capsule(qp, &nc); 413*a8089ea5SJohn Baldwin if (error != 0) { 414*a8089ea5SJohn Baldwin if (error != ECONNRESET) 415*a8089ea5SJohn Baldwin warnc(error, "Failed to read command capsule"); 416*a8089ea5SJohn Baldwin break; 417*a8089ea5SJohn Baldwin } 418*a8089ea5SJohn Baldwin 419*a8089ea5SJohn Baldwin cmd = nvmf_capsule_sqe(nc); 420*a8089ea5SJohn Baldwin 421*a8089ea5SJohn Baldwin switch (cmd->opc) { 422*a8089ea5SJohn Baldwin case NVME_OPC_FLUSH: 423*a8089ea5SJohn Baldwin if (cmd->nsid == htole32(0xffffffff)) { 424*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, 425*a8089ea5SJohn Baldwin NVME_SC_INVALID_NAMESPACE_OR_FORMAT); 426*a8089ea5SJohn Baldwin break; 427*a8089ea5SJohn Baldwin } 428*a8089ea5SJohn Baldwin handle_flush(nc, cmd); 429*a8089ea5SJohn Baldwin break; 430*a8089ea5SJohn Baldwin case NVME_OPC_WRITE: 431*a8089ea5SJohn Baldwin handle_write(ioc, nc, cmd); 432*a8089ea5SJohn Baldwin break; 433*a8089ea5SJohn Baldwin case NVME_OPC_READ: 434*a8089ea5SJohn Baldwin handle_read(ioc, nc, cmd); 435*a8089ea5SJohn Baldwin break; 436*a8089ea5SJohn Baldwin case NVME_OPC_FABRICS_COMMANDS: 437*a8089ea5SJohn Baldwin disconnect = handle_io_fabrics_command(nc, 438*a8089ea5SJohn Baldwin (const struct nvmf_fabric_cmd *)cmd); 439*a8089ea5SJohn Baldwin break; 440*a8089ea5SJohn Baldwin default: 441*a8089ea5SJohn Baldwin warnx("Unsupported NVM opcode %#x", cmd->opc); 442*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_INVALID_OPCODE); 443*a8089ea5SJohn Baldwin break; 444*a8089ea5SJohn Baldwin } 445*a8089ea5SJohn Baldwin nvmf_free_capsule(nc); 446*a8089ea5SJohn Baldwin } 447*a8089ea5SJohn Baldwin 448*a8089ea5SJohn Baldwin return (disconnect); 449*a8089ea5SJohn Baldwin } 450*a8089ea5SJohn Baldwin 451*a8089ea5SJohn Baldwin static void 452*a8089ea5SJohn Baldwin handle_io_qpair(struct io_controller *ioc, struct nvmf_qpair *qp, int qid) 453*a8089ea5SJohn Baldwin { 454*a8089ea5SJohn Baldwin char name[64]; 455*a8089ea5SJohn Baldwin bool disconnect; 456*a8089ea5SJohn Baldwin 457*a8089ea5SJohn Baldwin snprintf(name, sizeof(name), "I/O queue %d", qid); 458*a8089ea5SJohn Baldwin pthread_setname_np(pthread_self(), name); 459*a8089ea5SJohn Baldwin 460*a8089ea5SJohn Baldwin disconnect = handle_io_commands(ioc, qp); 461*a8089ea5SJohn Baldwin 462*a8089ea5SJohn Baldwin pthread_mutex_lock(&io_na_mutex); 463*a8089ea5SJohn Baldwin if (disconnect) 464*a8089ea5SJohn Baldwin ioc->io_qpairs[qid - 1] = NULL; 465*a8089ea5SJohn Baldwin if (ioc->io_sockets[qid - 1] != -1) { 466*a8089ea5SJohn Baldwin close(ioc->io_sockets[qid - 1]); 467*a8089ea5SJohn Baldwin ioc->io_sockets[qid - 1] = -1; 468*a8089ea5SJohn Baldwin } 469*a8089ea5SJohn Baldwin ioc->active_io_queues--; 470*a8089ea5SJohn Baldwin if (ioc->active_io_queues == 0) 471*a8089ea5SJohn Baldwin pthread_cond_broadcast(&io_cond); 472*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 473*a8089ea5SJohn Baldwin } 474*a8089ea5SJohn Baldwin 475*a8089ea5SJohn Baldwin static void 476*a8089ea5SJohn Baldwin connect_admin_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc, 477*a8089ea5SJohn Baldwin const struct nvmf_fabric_connect_data *data) 478*a8089ea5SJohn Baldwin { 479*a8089ea5SJohn Baldwin struct nvme_controller_data cdata; 480*a8089ea5SJohn Baldwin struct io_controller *ioc; 481*a8089ea5SJohn Baldwin int error; 482*a8089ea5SJohn Baldwin 483*a8089ea5SJohn Baldwin /* Can only have one active I/O controller at a time. */ 484*a8089ea5SJohn Baldwin pthread_mutex_lock(&io_na_mutex); 485*a8089ea5SJohn Baldwin if (io_controller != NULL) { 486*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 487*a8089ea5SJohn Baldwin nvmf_send_error(nc, NVME_SCT_COMMAND_SPECIFIC, 488*a8089ea5SJohn Baldwin NVMF_FABRIC_SC_CONTROLLER_BUSY); 489*a8089ea5SJohn Baldwin goto error; 490*a8089ea5SJohn Baldwin } 491*a8089ea5SJohn Baldwin 492*a8089ea5SJohn Baldwin error = nvmf_finish_accept(nc, 2); 493*a8089ea5SJohn Baldwin if (error != 0) { 494*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 495*a8089ea5SJohn Baldwin warnc(error, "Failed to send CONNECT response"); 496*a8089ea5SJohn Baldwin goto error; 497*a8089ea5SJohn Baldwin } 498*a8089ea5SJohn Baldwin 499*a8089ea5SJohn Baldwin ioc = calloc(1, sizeof(*ioc)); 500*a8089ea5SJohn Baldwin ioc->cntlid = 2; 501*a8089ea5SJohn Baldwin memcpy(ioc->hostid, data->hostid, sizeof(ioc->hostid)); 502*a8089ea5SJohn Baldwin memcpy(ioc->hostnqn, data->hostnqn, sizeof(ioc->hostnqn)); 503*a8089ea5SJohn Baldwin 504*a8089ea5SJohn Baldwin nvmf_init_io_controller_data(qp, serial, nqn, device_count(), 505*a8089ea5SJohn Baldwin NVMF_IOCCSZ, &cdata); 506*a8089ea5SJohn Baldwin 507*a8089ea5SJohn Baldwin ioc->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1); 508*a8089ea5SJohn Baldwin memcpy(ioc->fp.revision[0], cdata.fr, sizeof(cdata.fr)); 509*a8089ea5SJohn Baldwin 510*a8089ea5SJohn Baldwin ioc->hip.power_cycles[0] = 1; 511*a8089ea5SJohn Baldwin 512*a8089ea5SJohn Baldwin ioc->c = init_controller(qp, &cdata); 513*a8089ea5SJohn Baldwin 514*a8089ea5SJohn Baldwin io_controller = ioc; 515*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 516*a8089ea5SJohn Baldwin 517*a8089ea5SJohn Baldwin nvmf_free_capsule(nc); 518*a8089ea5SJohn Baldwin 519*a8089ea5SJohn Baldwin handle_admin_qpair(ioc); 520*a8089ea5SJohn Baldwin close(s); 521*a8089ea5SJohn Baldwin return; 522*a8089ea5SJohn Baldwin 523*a8089ea5SJohn Baldwin error: 524*a8089ea5SJohn Baldwin nvmf_free_capsule(nc); 525*a8089ea5SJohn Baldwin close(s); 526*a8089ea5SJohn Baldwin } 527*a8089ea5SJohn Baldwin 528*a8089ea5SJohn Baldwin static void 529*a8089ea5SJohn Baldwin connect_io_qpair(int s, struct nvmf_qpair *qp, struct nvmf_capsule *nc, 530*a8089ea5SJohn Baldwin const struct nvmf_fabric_connect_data *data, uint16_t qid) 531*a8089ea5SJohn Baldwin { 532*a8089ea5SJohn Baldwin struct io_controller *ioc; 533*a8089ea5SJohn Baldwin int error; 534*a8089ea5SJohn Baldwin 535*a8089ea5SJohn Baldwin pthread_mutex_lock(&io_na_mutex); 536*a8089ea5SJohn Baldwin if (io_controller == NULL) { 537*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 538*a8089ea5SJohn Baldwin warnx("Attempt to create I/O qpair without admin qpair"); 539*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 540*a8089ea5SJohn Baldwin goto error; 541*a8089ea5SJohn Baldwin } 542*a8089ea5SJohn Baldwin 543*a8089ea5SJohn Baldwin if (memcmp(io_controller->hostid, data->hostid, 544*a8089ea5SJohn Baldwin sizeof(data->hostid)) != 0) { 545*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 546*a8089ea5SJohn Baldwin warnx("hostid mismatch for I/O qpair CONNECT"); 547*a8089ea5SJohn Baldwin nvmf_connect_invalid_parameters(nc, true, 548*a8089ea5SJohn Baldwin offsetof(struct nvmf_fabric_connect_data, hostid)); 549*a8089ea5SJohn Baldwin goto error; 550*a8089ea5SJohn Baldwin } 551*a8089ea5SJohn Baldwin if (le16toh(data->cntlid) != io_controller->cntlid) { 552*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 553*a8089ea5SJohn Baldwin warnx("cntlid mismatch for I/O qpair CONNECT"); 554*a8089ea5SJohn Baldwin nvmf_connect_invalid_parameters(nc, true, 555*a8089ea5SJohn Baldwin offsetof(struct nvmf_fabric_connect_data, cntlid)); 556*a8089ea5SJohn Baldwin goto error; 557*a8089ea5SJohn Baldwin } 558*a8089ea5SJohn Baldwin if (memcmp(io_controller->hostnqn, data->hostnqn, 559*a8089ea5SJohn Baldwin sizeof(data->hostid)) != 0) { 560*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 561*a8089ea5SJohn Baldwin warnx("host NQN mismatch for I/O qpair CONNECT"); 562*a8089ea5SJohn Baldwin nvmf_connect_invalid_parameters(nc, true, 563*a8089ea5SJohn Baldwin offsetof(struct nvmf_fabric_connect_data, hostnqn)); 564*a8089ea5SJohn Baldwin goto error; 565*a8089ea5SJohn Baldwin } 566*a8089ea5SJohn Baldwin 567*a8089ea5SJohn Baldwin if (io_controller->num_io_queues == 0) { 568*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 569*a8089ea5SJohn Baldwin warnx("Attempt to create I/O qpair without enabled queues"); 570*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 571*a8089ea5SJohn Baldwin goto error; 572*a8089ea5SJohn Baldwin } 573*a8089ea5SJohn Baldwin if (qid > io_controller->num_io_queues) { 574*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 575*a8089ea5SJohn Baldwin warnx("Attempt to create invalid I/O qpair %u", qid); 576*a8089ea5SJohn Baldwin nvmf_connect_invalid_parameters(nc, false, 577*a8089ea5SJohn Baldwin offsetof(struct nvmf_fabric_connect_cmd, qid)); 578*a8089ea5SJohn Baldwin goto error; 579*a8089ea5SJohn Baldwin } 580*a8089ea5SJohn Baldwin if (io_controller->io_qpairs[qid - 1] != NULL) { 581*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 582*a8089ea5SJohn Baldwin warnx("Attempt to re-create I/O qpair %u", qid); 583*a8089ea5SJohn Baldwin nvmf_send_generic_error(nc, NVME_SC_COMMAND_SEQUENCE_ERROR); 584*a8089ea5SJohn Baldwin goto error; 585*a8089ea5SJohn Baldwin } 586*a8089ea5SJohn Baldwin 587*a8089ea5SJohn Baldwin error = nvmf_finish_accept(nc, io_controller->cntlid); 588*a8089ea5SJohn Baldwin if (error != 0) { 589*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 590*a8089ea5SJohn Baldwin warnc(error, "Failed to send CONNECT response"); 591*a8089ea5SJohn Baldwin goto error; 592*a8089ea5SJohn Baldwin } 593*a8089ea5SJohn Baldwin 594*a8089ea5SJohn Baldwin ioc = io_controller; 595*a8089ea5SJohn Baldwin ioc->active_io_queues++; 596*a8089ea5SJohn Baldwin ioc->io_qpairs[qid - 1] = qp; 597*a8089ea5SJohn Baldwin ioc->io_sockets[qid - 1] = s; 598*a8089ea5SJohn Baldwin pthread_mutex_unlock(&io_na_mutex); 599*a8089ea5SJohn Baldwin 600*a8089ea5SJohn Baldwin nvmf_free_capsule(nc); 601*a8089ea5SJohn Baldwin 602*a8089ea5SJohn Baldwin handle_io_qpair(ioc, qp, qid); 603*a8089ea5SJohn Baldwin return; 604*a8089ea5SJohn Baldwin 605*a8089ea5SJohn Baldwin error: 606*a8089ea5SJohn Baldwin nvmf_free_capsule(nc); 607*a8089ea5SJohn Baldwin close(s); 608*a8089ea5SJohn Baldwin } 609*a8089ea5SJohn Baldwin 610*a8089ea5SJohn Baldwin static void * 611*a8089ea5SJohn Baldwin io_socket_thread(void *arg) 612*a8089ea5SJohn Baldwin { 613*a8089ea5SJohn Baldwin struct nvmf_fabric_connect_data data; 614*a8089ea5SJohn Baldwin struct nvmf_qpair_params qparams; 615*a8089ea5SJohn Baldwin const struct nvmf_fabric_connect_cmd *cmd; 616*a8089ea5SJohn Baldwin struct nvmf_capsule *nc; 617*a8089ea5SJohn Baldwin struct nvmf_qpair *qp; 618*a8089ea5SJohn Baldwin int s; 619*a8089ea5SJohn Baldwin 620*a8089ea5SJohn Baldwin pthread_detach(pthread_self()); 621*a8089ea5SJohn Baldwin 622*a8089ea5SJohn Baldwin s = (intptr_t)arg; 623*a8089ea5SJohn Baldwin memset(&qparams, 0, sizeof(qparams)); 624*a8089ea5SJohn Baldwin qparams.tcp.fd = s; 625*a8089ea5SJohn Baldwin 626*a8089ea5SJohn Baldwin nc = NULL; 627*a8089ea5SJohn Baldwin qp = nvmf_accept(io_na, &qparams, &nc, &data); 628*a8089ea5SJohn Baldwin if (qp == NULL) { 629*a8089ea5SJohn Baldwin warnx("Failed to create I/O qpair: %s", 630*a8089ea5SJohn Baldwin nvmf_association_error(io_na)); 631*a8089ea5SJohn Baldwin goto error; 632*a8089ea5SJohn Baldwin } 633*a8089ea5SJohn Baldwin 634*a8089ea5SJohn Baldwin if (kernel_io) { 635*a8089ea5SJohn Baldwin ctl_handoff_qpair(qp, nvmf_capsule_sqe(nc), &data); 636*a8089ea5SJohn Baldwin goto error; 637*a8089ea5SJohn Baldwin } 638*a8089ea5SJohn Baldwin 639*a8089ea5SJohn Baldwin if (strcmp(data.subnqn, nqn) != 0) { 640*a8089ea5SJohn Baldwin warn("I/O qpair with invalid SubNQN: %.*s", 641*a8089ea5SJohn Baldwin (int)sizeof(data.subnqn), data.subnqn); 642*a8089ea5SJohn Baldwin nvmf_connect_invalid_parameters(nc, true, 643*a8089ea5SJohn Baldwin offsetof(struct nvmf_fabric_connect_data, subnqn)); 644*a8089ea5SJohn Baldwin goto error; 645*a8089ea5SJohn Baldwin } 646*a8089ea5SJohn Baldwin 647*a8089ea5SJohn Baldwin /* Is this an admin or I/O queue pair? */ 648*a8089ea5SJohn Baldwin cmd = nvmf_capsule_sqe(nc); 649*a8089ea5SJohn Baldwin if (cmd->qid == 0) 650*a8089ea5SJohn Baldwin connect_admin_qpair(s, qp, nc, &data); 651*a8089ea5SJohn Baldwin else 652*a8089ea5SJohn Baldwin connect_io_qpair(s, qp, nc, &data, le16toh(cmd->qid)); 653*a8089ea5SJohn Baldwin nvmf_free_qpair(qp); 654*a8089ea5SJohn Baldwin return (NULL); 655*a8089ea5SJohn Baldwin 656*a8089ea5SJohn Baldwin error: 657*a8089ea5SJohn Baldwin if (nc != NULL) 658*a8089ea5SJohn Baldwin nvmf_free_capsule(nc); 659*a8089ea5SJohn Baldwin if (qp != NULL) 660*a8089ea5SJohn Baldwin nvmf_free_qpair(qp); 661*a8089ea5SJohn Baldwin close(s); 662*a8089ea5SJohn Baldwin return (NULL); 663*a8089ea5SJohn Baldwin } 664*a8089ea5SJohn Baldwin 665*a8089ea5SJohn Baldwin void 666*a8089ea5SJohn Baldwin handle_io_socket(int s) 667*a8089ea5SJohn Baldwin { 668*a8089ea5SJohn Baldwin pthread_t thr; 669*a8089ea5SJohn Baldwin int error; 670*a8089ea5SJohn Baldwin 671*a8089ea5SJohn Baldwin error = pthread_create(&thr, NULL, io_socket_thread, 672*a8089ea5SJohn Baldwin (void *)(uintptr_t)s); 673*a8089ea5SJohn Baldwin if (error != 0) { 674*a8089ea5SJohn Baldwin warnc(error, "Failed to create I/O qpair thread"); 675*a8089ea5SJohn Baldwin close(s); 676*a8089ea5SJohn Baldwin } 677*a8089ea5SJohn Baldwin } 678