1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/_bitset.h> 10 #include <sys/bitset.h> 11 #include <sys/lock.h> 12 #include <sys/mutex.h> 13 14 #include <dev/nvmf/nvmf_transport.h> 15 #include <dev/nvmf/controller/nvmft_var.h> 16 17 /* 18 * A bitmask of command ID values. This is used to detect duplicate 19 * commands with the same ID. 20 */ 21 #define NUM_CIDS (UINT16_MAX + 1) 22 BITSET_DEFINE(cidset, NUM_CIDS); 23 24 struct nvmft_qpair { 25 struct nvmft_controller *ctrlr; 26 struct nvmf_qpair *qp; 27 struct cidset *cids; 28 29 bool admin; 30 bool sq_flow_control; 31 uint16_t qid; 32 u_int qsize; 33 uint16_t sqhd; 34 volatile u_int qp_refs; /* Internal references on 'qp'. */ 35 36 struct task datamove_task; 37 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 38 39 struct mtx lock; 40 41 char name[16]; 42 }; 43 44 static int _nvmft_send_generic_error(struct nvmft_qpair *qp, 45 struct nvmf_capsule *nc, uint8_t sc_status); 46 static void nvmft_datamove_task(void *context, int pending); 47 48 static void 49 nvmft_qpair_error(void *arg, int error) 50 { 51 struct nvmft_qpair *qp = arg; 52 struct nvmft_controller *ctrlr = qp->ctrlr; 53 54 /* 55 * XXX: The Linux TCP initiator sends a RST immediately after 56 * the FIN, so treat ECONNRESET as plain EOF to avoid spurious 57 * errors on shutdown. 58 */ 59 if (error == ECONNRESET) 60 error = 0; 61 62 if (error != 0) 63 nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name); 64 nvmft_controller_error(ctrlr, qp, error); 65 } 66 67 static void 68 nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc) 69 { 70 struct nvmft_qpair *qp = arg; 71 struct nvmft_controller *ctrlr = qp->ctrlr; 72 const struct nvme_command *cmd; 73 uint8_t sc_status; 74 75 cmd = nvmf_capsule_sqe(nc); 76 if (ctrlr == NULL) { 77 printf("NVMFT: %s received CID %u opcode %u on newborn queue\n", 78 qp->name, le16toh(cmd->cid), cmd->opc); 79 nvmf_free_capsule(nc); 80 return; 81 } 82 83 sc_status = nvmf_validate_command_capsule(nc); 84 if (sc_status != NVME_SC_SUCCESS) { 85 _nvmft_send_generic_error(qp, nc, sc_status); 86 nvmf_free_capsule(nc); 87 return; 88 } 89 90 /* Don't bother byte-swapping CID. */ 91 if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) { 92 _nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT); 93 nvmf_free_capsule(nc); 94 return; 95 } 96 97 if (qp->admin) 98 nvmft_handle_admin_command(ctrlr, nc); 99 else 100 nvmft_handle_io_command(qp, qp->qid, nc); 101 } 102 103 struct nvmft_qpair * 104 nvmft_qpair_init(enum nvmf_trtype trtype, const nvlist_t *params, uint16_t qid, 105 const char *name) 106 { 107 struct nvmft_qpair *qp; 108 109 qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO); 110 qp->admin = nvlist_get_bool(params, "admin"); 111 qp->sq_flow_control = nvlist_get_bool(params, "sq_flow_control"); 112 qp->qsize = nvlist_get_number(params, "qsize"); 113 qp->qid = qid; 114 qp->sqhd = nvlist_get_number(params, "sqhd"); 115 strlcpy(qp->name, name, sizeof(qp->name)); 116 mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF); 117 qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO); 118 STAILQ_INIT(&qp->datamove_queue); 119 TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp); 120 121 qp->qp = nvmf_allocate_qpair(trtype, true, params, nvmft_qpair_error, 122 qp, nvmft_receive_capsule, qp); 123 if (qp->qp == NULL) { 124 mtx_destroy(&qp->lock); 125 free(qp->cids, M_NVMFT); 126 free(qp, M_NVMFT); 127 return (NULL); 128 } 129 130 refcount_init(&qp->qp_refs, 1); 131 return (qp); 132 } 133 134 void 135 nvmft_qpair_shutdown(struct nvmft_qpair *qp) 136 { 137 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 138 struct nvmf_qpair *nq; 139 union ctl_io *io; 140 141 STAILQ_INIT(&datamove_queue); 142 mtx_lock(&qp->lock); 143 nq = qp->qp; 144 qp->qp = NULL; 145 STAILQ_CONCAT(&datamove_queue, &qp->datamove_queue); 146 mtx_unlock(&qp->lock); 147 if (nq != NULL && refcount_release(&qp->qp_refs)) 148 nvmf_free_qpair(nq); 149 150 while (!STAILQ_EMPTY(&datamove_queue)) { 151 io = (union ctl_io *)STAILQ_FIRST(&datamove_queue); 152 STAILQ_REMOVE_HEAD(&datamove_queue, links); 153 nvmft_abort_datamove(io); 154 } 155 nvmft_drain_task(&qp->datamove_task); 156 } 157 158 void 159 nvmft_qpair_destroy(struct nvmft_qpair *qp) 160 { 161 nvmft_qpair_shutdown(qp); 162 mtx_destroy(&qp->lock); 163 free(qp->cids, M_NVMFT); 164 free(qp, M_NVMFT); 165 } 166 167 struct nvmft_controller * 168 nvmft_qpair_ctrlr(struct nvmft_qpair *qp) 169 { 170 return (qp->ctrlr); 171 } 172 173 uint16_t 174 nvmft_qpair_id(struct nvmft_qpair *qp) 175 { 176 return (qp->qid); 177 } 178 179 const char * 180 nvmft_qpair_name(struct nvmft_qpair *qp) 181 { 182 return (qp->name); 183 } 184 185 static int 186 _nvmft_send_response(struct nvmft_qpair *qp, const void *cqe) 187 { 188 struct nvme_completion cpl; 189 struct nvmf_qpair *nq; 190 struct nvmf_capsule *rc; 191 int error; 192 193 memcpy(&cpl, cqe, sizeof(cpl)); 194 mtx_lock(&qp->lock); 195 nq = qp->qp; 196 if (nq == NULL) { 197 mtx_unlock(&qp->lock); 198 return (ENOTCONN); 199 } 200 refcount_acquire(&qp->qp_refs); 201 202 /* Set SQHD. */ 203 if (qp->sq_flow_control) { 204 qp->sqhd = (qp->sqhd + 1) % qp->qsize; 205 cpl.sqhd = htole16(qp->sqhd); 206 } else 207 cpl.sqhd = 0; 208 mtx_unlock(&qp->lock); 209 210 rc = nvmf_allocate_response(nq, &cpl, M_WAITOK); 211 error = nvmf_transmit_capsule(rc); 212 nvmf_free_capsule(rc); 213 214 if (refcount_release(&qp->qp_refs)) 215 nvmf_free_qpair(nq); 216 return (error); 217 } 218 219 void 220 nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc) 221 { 222 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 223 224 /* Don't bother byte-swapping CID. */ 225 KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids), 226 ("%s: CID %u not busy", __func__, cmd->cid)); 227 228 BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids); 229 } 230 231 int 232 nvmft_send_response(struct nvmft_qpair *qp, const void *cqe) 233 { 234 const struct nvme_completion *cpl = cqe; 235 236 /* Don't bother byte-swapping CID. */ 237 KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids), 238 ("%s: CID %u not busy", __func__, cpl->cid)); 239 240 BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids); 241 return (_nvmft_send_response(qp, cqe)); 242 } 243 244 void 245 nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status) 246 { 247 struct nvme_completion *cpl = cqe; 248 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 249 250 memset(cpl, 0, sizeof(*cpl)); 251 cpl->cid = cmd->cid; 252 cpl->status = htole16(status); 253 } 254 255 int 256 nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 257 uint8_t sc_type, uint8_t sc_status) 258 { 259 struct nvme_completion cpl; 260 uint16_t status; 261 262 status = NVMEF(NVME_STATUS_SCT, sc_type) | 263 NVMEF(NVME_STATUS_SC, sc_status); 264 nvmft_init_cqe(&cpl, nc, status); 265 return (nvmft_send_response(qp, &cpl)); 266 } 267 268 int 269 nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 270 uint8_t sc_status) 271 { 272 return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status)); 273 } 274 275 /* 276 * This version doesn't clear CID in qp->cids and is used for errors 277 * before the CID is validated. 278 */ 279 static int 280 _nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 281 uint8_t sc_status) 282 { 283 struct nvme_completion cpl; 284 uint16_t status; 285 286 status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) | 287 NVMEF(NVME_STATUS_SC, sc_status); 288 nvmft_init_cqe(&cpl, nc, status); 289 return (_nvmft_send_response(qp, &cpl)); 290 } 291 292 int 293 nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc) 294 { 295 return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS)); 296 } 297 298 static void 299 nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp, 300 const struct nvmf_fabric_connect_cmd *cmd, uint16_t status) 301 { 302 memset(rsp, 0, sizeof(*rsp)); 303 rsp->cid = cmd->cid; 304 rsp->status = htole16(status); 305 } 306 307 static int 308 nvmft_send_connect_response(struct nvmft_qpair *qp, 309 const struct nvmf_fabric_connect_rsp *rsp) 310 { 311 struct nvmf_capsule *rc; 312 struct nvmf_qpair *nq; 313 int error; 314 315 mtx_lock(&qp->lock); 316 nq = qp->qp; 317 if (nq == NULL) { 318 mtx_unlock(&qp->lock); 319 return (ENOTCONN); 320 } 321 refcount_acquire(&qp->qp_refs); 322 mtx_unlock(&qp->lock); 323 324 rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK); 325 error = nvmf_transmit_capsule(rc); 326 nvmf_free_capsule(rc); 327 328 if (refcount_release(&qp->qp_refs)) 329 nvmf_free_qpair(nq); 330 return (error); 331 } 332 333 void 334 nvmft_connect_error(struct nvmft_qpair *qp, 335 const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type, 336 uint8_t sc_status) 337 { 338 struct nvmf_fabric_connect_rsp rsp; 339 uint16_t status; 340 341 status = NVMEF(NVME_STATUS_SCT, sc_type) | 342 NVMEF(NVME_STATUS_SC, sc_status); 343 nvmft_init_connect_rsp(&rsp, cmd, status); 344 nvmft_send_connect_response(qp, &rsp); 345 } 346 347 void 348 nvmft_connect_invalid_parameters(struct nvmft_qpair *qp, 349 const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset) 350 { 351 struct nvmf_fabric_connect_rsp rsp; 352 353 nvmft_init_connect_rsp(&rsp, cmd, 354 NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) | 355 NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM)); 356 rsp.status_code_specific.invalid.ipo = htole16(offset); 357 rsp.status_code_specific.invalid.iattr = data ? 1 : 0; 358 nvmft_send_connect_response(qp, &rsp); 359 } 360 361 int 362 nvmft_finish_accept(struct nvmft_qpair *qp, 363 const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr) 364 { 365 struct nvmf_fabric_connect_rsp rsp; 366 367 qp->ctrlr = ctrlr; 368 nvmft_init_connect_rsp(&rsp, cmd, 0); 369 if (qp->sq_flow_control) 370 rsp.sqhd = htole16(qp->sqhd); 371 else 372 rsp.sqhd = htole16(0xffff); 373 rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid); 374 return (nvmft_send_connect_response(qp, &rsp)); 375 } 376 377 void 378 nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io) 379 { 380 bool enqueue_task; 381 382 mtx_lock(&qp->lock); 383 if (qp->qp == NULL) { 384 mtx_unlock(&qp->lock); 385 nvmft_abort_datamove(io); 386 return; 387 } 388 enqueue_task = STAILQ_EMPTY(&qp->datamove_queue); 389 STAILQ_INSERT_TAIL(&qp->datamove_queue, &io->io_hdr, links); 390 mtx_unlock(&qp->lock); 391 if (enqueue_task) 392 nvmft_enqueue_task(&qp->datamove_task); 393 } 394 395 static void 396 nvmft_datamove_task(void *context, int pending __unused) 397 { 398 struct nvmft_qpair *qp = context; 399 union ctl_io *io; 400 bool abort; 401 402 mtx_lock(&qp->lock); 403 while (!STAILQ_EMPTY(&qp->datamove_queue)) { 404 io = (union ctl_io *)STAILQ_FIRST(&qp->datamove_queue); 405 STAILQ_REMOVE_HEAD(&qp->datamove_queue, links); 406 abort = (qp->qp == NULL); 407 mtx_unlock(&qp->lock); 408 if (abort) 409 nvmft_abort_datamove(io); 410 else 411 nvmft_handle_datamove(io); 412 mtx_lock(&qp->lock); 413 } 414 mtx_unlock(&qp->lock); 415 } 416