1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2023-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/types.h> 9 #include <sys/_bitset.h> 10 #include <sys/bitset.h> 11 #include <sys/lock.h> 12 #include <sys/mutex.h> 13 14 #include <dev/nvmf/nvmf_transport.h> 15 #include <dev/nvmf/controller/nvmft_var.h> 16 17 /* 18 * A bitmask of command ID values. This is used to detect duplicate 19 * commands with the same ID. 20 */ 21 #define NUM_CIDS (UINT16_MAX + 1) 22 BITSET_DEFINE(cidset, NUM_CIDS); 23 24 struct nvmft_qpair { 25 struct nvmft_controller *ctrlr; 26 struct nvmf_qpair *qp; 27 struct cidset *cids; 28 29 bool admin; 30 bool sq_flow_control; 31 uint16_t qid; 32 u_int qsize; 33 uint16_t sqhd; 34 volatile u_int qp_refs; /* Internal references on 'qp'. */ 35 36 struct task datamove_task; 37 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 38 39 struct mtx lock; 40 41 char name[16]; 42 }; 43 44 static int _nvmft_send_generic_error(struct nvmft_qpair *qp, 45 struct nvmf_capsule *nc, uint8_t sc_status); 46 static void nvmft_datamove_task(void *context, int pending); 47 48 static void 49 nvmft_qpair_error(void *arg, int error) 50 { 51 struct nvmft_qpair *qp = arg; 52 struct nvmft_controller *ctrlr = qp->ctrlr; 53 54 /* 55 * XXX: The Linux TCP initiator sends a RST immediately after 56 * the FIN, so treat ECONNRESET as plain EOF to avoid spurious 57 * errors on shutdown. 58 */ 59 if (error == ECONNRESET) 60 error = 0; 61 62 if (error != 0) 63 nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name); 64 nvmft_controller_error(ctrlr, qp, error); 65 } 66 67 static void 68 nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc) 69 { 70 struct nvmft_qpair *qp = arg; 71 struct nvmft_controller *ctrlr = qp->ctrlr; 72 const struct nvme_command *cmd; 73 uint8_t sc_status; 74 75 cmd = nvmf_capsule_sqe(nc); 76 if (ctrlr == NULL) { 77 printf("NVMFT: %s received CID %u opcode %u on newborn queue\n", 78 qp->name, le16toh(cmd->cid), cmd->opc); 79 nvmf_free_capsule(nc); 80 return; 81 } 82 83 sc_status = nvmf_validate_command_capsule(nc); 84 if (sc_status != NVME_SC_SUCCESS) { 85 _nvmft_send_generic_error(qp, nc, sc_status); 86 nvmf_free_capsule(nc); 87 return; 88 } 89 90 /* Don't bother byte-swapping CID. */ 91 if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) { 92 _nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT); 93 nvmf_free_capsule(nc); 94 return; 95 } 96 97 if (qp->admin) 98 nvmft_handle_admin_command(ctrlr, nc); 99 else 100 nvmft_handle_io_command(qp, qp->qid, nc); 101 } 102 103 struct nvmft_qpair * 104 nvmft_qpair_init(enum nvmf_trtype trtype, const nvlist_t *params, uint16_t qid, 105 const char *name) 106 { 107 struct nvmft_qpair *qp; 108 109 qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO); 110 qp->admin = nvlist_get_bool(params, "admin"); 111 qp->sq_flow_control = nvlist_get_bool(params, "sq_flow_control"); 112 qp->qsize = nvlist_get_number(params, "qsize"); 113 qp->qid = qid; 114 qp->sqhd = nvlist_get_number(params, "sqhd"); 115 strlcpy(qp->name, name, sizeof(qp->name)); 116 mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF); 117 qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO); 118 STAILQ_INIT(&qp->datamove_queue); 119 TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp); 120 121 qp->qp = nvmf_allocate_qpair(trtype, true, params, nvmft_qpair_error, 122 qp, nvmft_receive_capsule, qp); 123 if (qp->qp == NULL) { 124 mtx_destroy(&qp->lock); 125 free(qp->cids, M_NVMFT); 126 free(qp, M_NVMFT); 127 return (NULL); 128 } 129 130 refcount_init(&qp->qp_refs, 1); 131 return (qp); 132 } 133 134 void 135 nvmft_qpair_shutdown(struct nvmft_qpair *qp) 136 { 137 STAILQ_HEAD(, ctl_io_hdr) datamove_queue; 138 struct nvmf_qpair *nq; 139 union ctl_io *io; 140 141 STAILQ_INIT(&datamove_queue); 142 mtx_lock(&qp->lock); 143 nq = qp->qp; 144 qp->qp = NULL; 145 STAILQ_CONCAT(&datamove_queue, &qp->datamove_queue); 146 mtx_unlock(&qp->lock); 147 if (nq != NULL && refcount_release(&qp->qp_refs)) 148 nvmf_free_qpair(nq); 149 150 while (!STAILQ_EMPTY(&datamove_queue)) { 151 io = (union ctl_io *)STAILQ_FIRST(&datamove_queue); 152 STAILQ_REMOVE_HEAD(&datamove_queue, links); 153 nvmft_abort_datamove(io); 154 } 155 nvmft_drain_task(&qp->datamove_task); 156 } 157 158 void 159 nvmft_qpair_destroy(struct nvmft_qpair *qp) 160 { 161 nvmft_qpair_shutdown(qp); 162 mtx_destroy(&qp->lock); 163 free(qp->cids, M_NVMFT); 164 free(qp, M_NVMFT); 165 } 166 167 struct nvmft_controller * 168 nvmft_qpair_ctrlr(struct nvmft_qpair *qp) 169 { 170 return (qp->ctrlr); 171 } 172 173 uint16_t 174 nvmft_qpair_id(struct nvmft_qpair *qp) 175 { 176 return (qp->qid); 177 } 178 179 const char * 180 nvmft_qpair_name(struct nvmft_qpair *qp) 181 { 182 return (qp->name); 183 } 184 185 uint32_t 186 nvmft_max_ioccsz(struct nvmft_qpair *qp) 187 { 188 return (nvmf_max_ioccsz(qp->qp)); 189 } 190 191 static int 192 _nvmft_send_response(struct nvmft_qpair *qp, const void *cqe) 193 { 194 struct nvme_completion cpl; 195 struct nvmf_qpair *nq; 196 struct nvmf_capsule *rc; 197 int error; 198 199 memcpy(&cpl, cqe, sizeof(cpl)); 200 mtx_lock(&qp->lock); 201 nq = qp->qp; 202 if (nq == NULL) { 203 mtx_unlock(&qp->lock); 204 return (ENOTCONN); 205 } 206 refcount_acquire(&qp->qp_refs); 207 208 /* Set SQHD. */ 209 if (qp->sq_flow_control) { 210 qp->sqhd = (qp->sqhd + 1) % qp->qsize; 211 cpl.sqhd = htole16(qp->sqhd); 212 } else 213 cpl.sqhd = 0; 214 mtx_unlock(&qp->lock); 215 216 rc = nvmf_allocate_response(nq, &cpl, M_WAITOK); 217 error = nvmf_transmit_capsule(rc); 218 nvmf_free_capsule(rc); 219 220 if (refcount_release(&qp->qp_refs)) 221 nvmf_free_qpair(nq); 222 return (error); 223 } 224 225 void 226 nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc) 227 { 228 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 229 230 /* Don't bother byte-swapping CID. */ 231 KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids), 232 ("%s: CID %u not busy", __func__, cmd->cid)); 233 234 BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids); 235 } 236 237 int 238 nvmft_send_response(struct nvmft_qpair *qp, const void *cqe) 239 { 240 const struct nvme_completion *cpl = cqe; 241 242 /* Don't bother byte-swapping CID. */ 243 KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids), 244 ("%s: CID %u not busy", __func__, cpl->cid)); 245 246 BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids); 247 return (_nvmft_send_response(qp, cqe)); 248 } 249 250 void 251 nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status) 252 { 253 struct nvme_completion *cpl = cqe; 254 const struct nvme_command *cmd = nvmf_capsule_sqe(nc); 255 256 memset(cpl, 0, sizeof(*cpl)); 257 cpl->cid = cmd->cid; 258 cpl->status = htole16(status); 259 } 260 261 int 262 nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 263 uint8_t sc_type, uint8_t sc_status) 264 { 265 struct nvme_completion cpl; 266 uint16_t status; 267 268 status = NVMEF(NVME_STATUS_SCT, sc_type) | 269 NVMEF(NVME_STATUS_SC, sc_status); 270 nvmft_init_cqe(&cpl, nc, status); 271 return (nvmft_send_response(qp, &cpl)); 272 } 273 274 int 275 nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 276 uint8_t sc_status) 277 { 278 return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status)); 279 } 280 281 /* 282 * This version doesn't clear CID in qp->cids and is used for errors 283 * before the CID is validated. 284 */ 285 static int 286 _nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc, 287 uint8_t sc_status) 288 { 289 struct nvme_completion cpl; 290 uint16_t status; 291 292 status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) | 293 NVMEF(NVME_STATUS_SC, sc_status); 294 nvmft_init_cqe(&cpl, nc, status); 295 return (_nvmft_send_response(qp, &cpl)); 296 } 297 298 int 299 nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc) 300 { 301 return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS)); 302 } 303 304 static void 305 nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp, 306 const struct nvmf_fabric_connect_cmd *cmd, uint16_t status) 307 { 308 memset(rsp, 0, sizeof(*rsp)); 309 rsp->cid = cmd->cid; 310 rsp->status = htole16(status); 311 } 312 313 static int 314 nvmft_send_connect_response(struct nvmft_qpair *qp, 315 const struct nvmf_fabric_connect_rsp *rsp) 316 { 317 struct nvmf_capsule *rc; 318 struct nvmf_qpair *nq; 319 int error; 320 321 mtx_lock(&qp->lock); 322 nq = qp->qp; 323 if (nq == NULL) { 324 mtx_unlock(&qp->lock); 325 return (ENOTCONN); 326 } 327 refcount_acquire(&qp->qp_refs); 328 mtx_unlock(&qp->lock); 329 330 rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK); 331 error = nvmf_transmit_capsule(rc); 332 nvmf_free_capsule(rc); 333 334 if (refcount_release(&qp->qp_refs)) 335 nvmf_free_qpair(nq); 336 return (error); 337 } 338 339 void 340 nvmft_connect_error(struct nvmft_qpair *qp, 341 const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type, 342 uint8_t sc_status) 343 { 344 struct nvmf_fabric_connect_rsp rsp; 345 uint16_t status; 346 347 status = NVMEF(NVME_STATUS_SCT, sc_type) | 348 NVMEF(NVME_STATUS_SC, sc_status); 349 nvmft_init_connect_rsp(&rsp, cmd, status); 350 nvmft_send_connect_response(qp, &rsp); 351 } 352 353 void 354 nvmft_connect_invalid_parameters(struct nvmft_qpair *qp, 355 const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset) 356 { 357 struct nvmf_fabric_connect_rsp rsp; 358 359 nvmft_init_connect_rsp(&rsp, cmd, 360 NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) | 361 NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM)); 362 rsp.status_code_specific.invalid.ipo = htole16(offset); 363 rsp.status_code_specific.invalid.iattr = data ? 1 : 0; 364 nvmft_send_connect_response(qp, &rsp); 365 } 366 367 int 368 nvmft_finish_accept(struct nvmft_qpair *qp, 369 const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr) 370 { 371 struct nvmf_fabric_connect_rsp rsp; 372 373 qp->ctrlr = ctrlr; 374 nvmft_init_connect_rsp(&rsp, cmd, 0); 375 if (qp->sq_flow_control) 376 rsp.sqhd = htole16(qp->sqhd); 377 else 378 rsp.sqhd = htole16(0xffff); 379 rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid); 380 return (nvmft_send_connect_response(qp, &rsp)); 381 } 382 383 void 384 nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io) 385 { 386 bool enqueue_task; 387 388 mtx_lock(&qp->lock); 389 if (qp->qp == NULL) { 390 mtx_unlock(&qp->lock); 391 nvmft_abort_datamove(io); 392 return; 393 } 394 enqueue_task = STAILQ_EMPTY(&qp->datamove_queue); 395 STAILQ_INSERT_TAIL(&qp->datamove_queue, &io->io_hdr, links); 396 mtx_unlock(&qp->lock); 397 if (enqueue_task) 398 nvmft_enqueue_task(&qp->datamove_task); 399 } 400 401 static void 402 nvmft_datamove_task(void *context, int pending __unused) 403 { 404 struct nvmft_qpair *qp = context; 405 union ctl_io *io; 406 bool abort; 407 408 mtx_lock(&qp->lock); 409 while (!STAILQ_EMPTY(&qp->datamove_queue)) { 410 io = (union ctl_io *)STAILQ_FIRST(&qp->datamove_queue); 411 STAILQ_REMOVE_HEAD(&qp->datamove_queue, links); 412 abort = (qp->qp == NULL); 413 mtx_unlock(&qp->lock); 414 if (abort) 415 nvmft_abort_datamove(io); 416 else 417 nvmft_handle_datamove(io); 418 mtx_lock(&qp->lock); 419 } 420 mtx_unlock(&qp->lock); 421 } 422