1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/_bitset.h> 10 #include <sys/bitset.h> 11 #include <sys/lock.h> 12 #include <sys/mutex.h> 13 14 #include <dev/nvmf/nvmf_transport.h> 15 #include <dev/nvmf/controller/nvmft_var.h> 16 17 /* 18 * A bitmask of command ID values. This is used to detect duplicate 19 * commands with the same ID. 20 */ 21 #define NUM_CIDS (UINT16_MAX + 1) 22 BITSET_DEFINE(cidset, NUM_CIDS); 23 24 struct nvmft_qpair { 25 struct nvmft_controller *ctrlr; 26 struct nvmf_qpair *qp; 27 struct cidset *cids; 28 29 bool admin; 30 bool sq_flow_control; 31 uint16_t qid; 32 u_int qsize; 33 uint16_t sqhd; 34 uint16_t sqtail; 35 volatile u_int qp_refs; /* Internal references on 'qp'. */ 36 37 struct task datamove_task; 38 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 39 40 struct mtx lock; 41 42 char name[16]; 43 }; 44 45 static int _nvmft_send_generic_error(struct nvmft_qpair *qp, 46 struct nvmf_capsule *nc, uint8_t sc_status); 47 static void nvmft_datamove_task(void *context, int pending); 48 49 static void 50 nvmft_qpair_error(void *arg, int error) 51 { 52 struct nvmft_qpair *qp = arg; 53 struct nvmft_controller *ctrlr = qp->ctrlr; 54 55 /* 56 * XXX: The Linux TCP initiator sends a RST immediately after 57 * the FIN, so treat ECONNRESET as plain EOF to avoid spurious 58 * errors on shutdown. 59 */ 60 if (error == ECONNRESET) 61 error = 0; 62 63 if (error != 0) 64 nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name); 65 nvmft_controller_error(ctrlr, qp, error); 66 } 67 68 static void 69 nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc) 70 { 71 struct nvmft_qpair *qp = arg; 72 struct nvmft_controller *ctrlr = qp->ctrlr; 73 const struct nvme_command *cmd; 74 uint8_t sc_status; 75 76 cmd = nvmf_capsule_sqe(nc); 77 if (ctrlr == NULL) { 78 printf("NVMFT: %s received CID %u opcode %u on newborn queue\n", 79 qp->name, le16toh(cmd->cid), cmd->opc); 80 nvmf_free_capsule(nc); 81 return; 82 } 83 84 sc_status = nvmf_validate_command_capsule(nc); 85 if (sc_status != NVME_SC_SUCCESS) { 86 _nvmft_send_generic_error(qp, nc, sc_status); 87 nvmf_free_capsule(nc); 88 return; 89 } 90 91 /* Don't bother byte-swapping CID. */ 92 if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) { 93 _nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT); 94 nvmf_free_capsule(nc); 95 return; 96 } 97 98 if (qp->admin) 99 nvmft_handle_admin_command(ctrlr, nc); 100 else 101 nvmft_handle_io_command(qp, qp->qid, nc); 102 } 103 104 struct nvmft_qpair * 105 nvmft_qpair_init(enum nvmf_trtype trtype, 106 const struct nvmf_handoff_qpair_params *handoff, uint16_t qid, 107 const char *name) 108 { 109 struct nvmft_qpair *qp; 110 111 qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO); 112 qp->admin = handoff->admin; 113 qp->sq_flow_control = handoff->sq_flow_control; 114 qp->qsize = handoff->qsize; 115 qp->qid = qid; 116 qp->sqhd = handoff->sqhd; 117 qp->sqtail = handoff->sqtail; 118 strlcpy(qp->name, name, sizeof(qp->name)); 119 mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF); 120 qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO); 121 STAILQ_INIT(&qp->datamove_queue); 122 TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp); 123 124 qp->qp = nvmf_allocate_qpair(trtype, true, handoff, nvmft_qpair_error, 125 qp, nvmft_receive_capsule, qp); 126 if (qp->qp == NULL) { 127 mtx_destroy(&qp->lock); 128 free(qp->cids, M_NVMFT); 129 free(qp, M_NVMFT); 130 return (NULL); 131 } 132 133 refcount_init(&qp->qp_refs, 1); 134 return (qp); 135 } 136 137 void 138 nvmft_qpair_shutdown(struct nvmft_qpair *qp) 139 { 140 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 141 struct nvmf_qpair *nq; 142 union ctl_io *io; 143 144 STAILQ_INIT(&datamove_queue); 145 mtx_lock(&qp->lock); 146 nq = qp->qp; 147 qp->qp = NULL; 148 STAILQ_CONCAT(&datamove_queue, &qp->datamove_queue); 149 mtx_unlock(&qp->lock); 150 if (nq != NULL && refcount_release(&qp->qp_refs)) 151 nvmf_free_qpair(nq); 152 153 while (!STAILQ_EMPTY(&datamove_queue)) { 154 io = (union ctl_io *)STAILQ_FIRST(&datamove_queue); 155 STAILQ_REMOVE_HEAD(&datamove_queue, links); 156 nvmft_abort_datamove(io); 157 } 158 nvmft_drain_task(&qp->datamove_task); 159 } 160 161 void 162 nvmft_qpair_destroy(struct nvmft_qpair *qp) 163 { 164 nvmft_qpair_shutdown(qp); 165 mtx_destroy(&qp->lock); 166 free(qp->cids, M_NVMFT); 167 free(qp, M_NVMFT); 168 } 169 170 struct nvmft_controller * 171 nvmft_qpair_ctrlr(struct nvmft_qpair *qp) 172 { 173 return (qp->ctrlr); 174 } 175 176 uint16_t 177 nvmft_qpair_id(struct nvmft_qpair *qp) 178 { 179 return (qp->qid); 180 } 181 182 const char * 183 nvmft_qpair_name(struct nvmft_qpair *qp) 184 { 185 return (qp->name); 186 } 187 188 static int 189 _nvmft_send_response(struct nvmft_qpair *qp, const void *cqe) 190 { 191 struct nvme_completion cpl; 192 struct nvmf_qpair *nq; 193 struct nvmf_capsule *rc; 194 int error; 195 196 memcpy(&cpl, cqe, sizeof(cpl)); 197 mtx_lock(&qp->lock); 198 nq = qp->qp; 199 if (nq == NULL) { 200 mtx_unlock(&qp->lock); 201 return (ENOTCONN); 202 } 203 refcount_acquire(&qp->qp_refs); 204 205 /* Set SQHD. */ 206 if (qp->sq_flow_control) { 207 qp->sqhd = (qp->sqhd + 1) % qp->qsize; 208 cpl.sqhd = htole16(qp->sqhd); 209 } else 210 cpl.sqhd = 0; 211 mtx_unlock(&qp->lock); 212 213 rc = nvmf_allocate_response(nq, &cpl, M_WAITOK); 214 error = nvmf_transmit_capsule(rc); 215 nvmf_free_capsule(rc); 216 217 if (refcount_release(&qp->qp_refs)) 218 nvmf_free_qpair(nq); 219 return (error); 220 } 221 222 void 223 nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc) 224 { 225 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 226 227 /* Don't bother byte-swapping CID. */ 228 KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids), 229 ("%s: CID %u not busy", __func__, cmd->cid)); 230 231 BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids); 232 } 233 234 int 235 nvmft_send_response(struct nvmft_qpair *qp, const void *cqe) 236 { 237 const struct nvme_completion *cpl = cqe; 238 239 /* Don't bother byte-swapping CID. */ 240 KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids), 241 ("%s: CID %u not busy", __func__, cpl->cid)); 242 243 BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids); 244 return (_nvmft_send_response(qp, cqe)); 245 } 246 247 void 248 nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status) 249 { 250 struct nvme_completion *cpl = cqe; 251 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 252 253 memset(cpl, 0, sizeof(*cpl)); 254 cpl->cid = cmd->cid; 255 cpl->status = htole16(status); 256 } 257 258 int 259 nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 260 uint8_t sc_type, uint8_t sc_status) 261 { 262 struct nvme_completion cpl; 263 uint16_t status; 264 265 status = NVMEF(NVME_STATUS_SCT, sc_type) | 266 NVMEF(NVME_STATUS_SC, sc_status); 267 nvmft_init_cqe(&cpl, nc, status); 268 return (nvmft_send_response(qp, &cpl)); 269 } 270 271 int 272 nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 273 uint8_t sc_status) 274 { 275 return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status)); 276 } 277 278 /* 279 * This version doesn't clear CID in qp->cids and is used for errors 280 * before the CID is validated. 281 */ 282 static int 283 _nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 284 uint8_t sc_status) 285 { 286 struct nvme_completion cpl; 287 uint16_t status; 288 289 status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) | 290 NVMEF(NVME_STATUS_SC, sc_status); 291 nvmft_init_cqe(&cpl, nc, status); 292 return (_nvmft_send_response(qp, &cpl)); 293 } 294 295 int 296 nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc) 297 { 298 return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS)); 299 } 300 301 static void 302 nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp, 303 const struct nvmf_fabric_connect_cmd *cmd, uint16_t status) 304 { 305 memset(rsp, 0, sizeof(*rsp)); 306 rsp->cid = cmd->cid; 307 rsp->status = htole16(status); 308 } 309 310 static int 311 nvmft_send_connect_response(struct nvmft_qpair *qp, 312 const struct nvmf_fabric_connect_rsp *rsp) 313 { 314 struct nvmf_capsule *rc; 315 struct nvmf_qpair *nq; 316 int error; 317 318 mtx_lock(&qp->lock); 319 nq = qp->qp; 320 if (nq == NULL) { 321 mtx_unlock(&qp->lock); 322 return (ENOTCONN); 323 } 324 refcount_acquire(&qp->qp_refs); 325 mtx_unlock(&qp->lock); 326 327 rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK); 328 error = nvmf_transmit_capsule(rc); 329 nvmf_free_capsule(rc); 330 331 if (refcount_release(&qp->qp_refs)) 332 nvmf_free_qpair(nq); 333 return (error); 334 } 335 336 void 337 nvmft_connect_error(struct nvmft_qpair *qp, 338 const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type, 339 uint8_t sc_status) 340 { 341 struct nvmf_fabric_connect_rsp rsp; 342 uint16_t status; 343 344 status = NVMEF(NVME_STATUS_SCT, sc_type) | 345 NVMEF(NVME_STATUS_SC, sc_status); 346 nvmft_init_connect_rsp(&rsp, cmd, status); 347 nvmft_send_connect_response(qp, &rsp); 348 } 349 350 void 351 nvmft_connect_invalid_parameters(struct nvmft_qpair *qp, 352 const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset) 353 { 354 struct nvmf_fabric_connect_rsp rsp; 355 356 nvmft_init_connect_rsp(&rsp, cmd, 357 NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) | 358 NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM)); 359 rsp.status_code_specific.invalid.ipo = htole16(offset); 360 rsp.status_code_specific.invalid.iattr = data ? 1 : 0; 361 nvmft_send_connect_response(qp, &rsp); 362 } 363 364 int 365 nvmft_finish_accept(struct nvmft_qpair *qp, 366 const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr) 367 { 368 struct nvmf_fabric_connect_rsp rsp; 369 370 qp->ctrlr = ctrlr; 371 nvmft_init_connect_rsp(&rsp, cmd, 0); 372 if (qp->sq_flow_control) 373 rsp.sqhd = htole16(qp->sqhd); 374 else 375 rsp.sqhd = htole16(0xffff); 376 rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid); 377 return (nvmft_send_connect_response(qp, &rsp)); 378 } 379 380 void 381 nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io) 382 { 383 bool enqueue_task; 384 385 mtx_lock(&qp->lock); 386 if (qp->qp == NULL) { 387 mtx_unlock(&qp->lock); 388 nvmft_abort_datamove(io); 389 return; 390 } 391 enqueue_task = STAILQ_EMPTY(&qp->datamove_queue); 392 STAILQ_INSERT_TAIL(&qp->datamove_queue, &io->io_hdr, links); 393 mtx_unlock(&qp->lock); 394 if (enqueue_task) 395 nvmft_enqueue_task(&qp->datamove_task); 396 } 397 398 static void 399 nvmft_datamove_task(void *context, int pending __unused) 400 { 401 struct nvmft_qpair *qp = context; 402 union ctl_io *io; 403 bool abort; 404 405 mtx_lock(&qp->lock); 406 while (!STAILQ_EMPTY(&qp->datamove_queue)) { 407 io = (union ctl_io *)STAILQ_FIRST(&qp->datamove_queue); 408 STAILQ_REMOVE_HEAD(&qp->datamove_queue, links); 409 abort = (qp->qp == NULL); 410 mtx_unlock(&qp->lock); 411 if (abort) 412 nvmft_abort_datamove(io); 413 else 414 nvmft_handle_datamove(io); 415 mtx_lock(&qp->lock); 416 } 417 mtx_unlock(&qp->lock); 418 } 419