1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/types.h>
9 #include <sys/_bitset.h>
10 #include <sys/bitset.h>
11 #include <sys/lock.h>
12 #include <sys/mutex.h>
13
14 #include <dev/nvmf/nvmf_transport.h>
15 #include <dev/nvmf/controller/nvmft_var.h>
16
17 /*
18 * A bitmask of command ID values. This is used to detect duplicate
19 * commands with the same ID.
20 */
21 #define NUM_CIDS (UINT16_MAX + 1)
22 BITSET_DEFINE(cidset, NUM_CIDS);
23
24 struct nvmft_qpair {
25 struct nvmft_controller *ctrlr;
26 struct nvmf_qpair *qp;
27 struct cidset *cids;
28
29 bool admin;
30 bool sq_flow_control;
31 uint16_t qid;
32 u_int qsize;
33 uint16_t sqhd;
34 volatile u_int qp_refs; /* Internal references on 'qp'. */
35
36 struct task datamove_task;
37 STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
38
39 struct mtx lock;
40
41 char name[16];
42 };
43
44 static int _nvmft_send_generic_error(struct nvmft_qpair *qp,
45 struct nvmf_capsule *nc, uint8_t sc_status);
46 static void nvmft_datamove_task(void *context, int pending);
47
48 static void
nvmft_qpair_error(void * arg,int error)49 nvmft_qpair_error(void *arg, int error)
50 {
51 struct nvmft_qpair *qp = arg;
52 struct nvmft_controller *ctrlr = qp->ctrlr;
53
54 /*
55 * XXX: The Linux TCP initiator sends a RST immediately after
56 * the FIN, so treat ECONNRESET as plain EOF to avoid spurious
57 * errors on shutdown.
58 */
59 if (error == ECONNRESET)
60 error = 0;
61
62 if (error != 0)
63 nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name);
64 nvmft_controller_error(ctrlr, qp, error);
65 }
66
67 static void
nvmft_receive_capsule(void * arg,struct nvmf_capsule * nc)68 nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc)
69 {
70 struct nvmft_qpair *qp = arg;
71 struct nvmft_controller *ctrlr = qp->ctrlr;
72 const struct nvme_command *cmd;
73 uint8_t sc_status;
74
75 cmd = nvmf_capsule_sqe(nc);
76 if (ctrlr == NULL) {
77 printf("NVMFT: %s received CID %u opcode %u on newborn queue\n",
78 qp->name, le16toh(cmd->cid), cmd->opc);
79 nvmf_free_capsule(nc);
80 return;
81 }
82
83 sc_status = nvmf_validate_command_capsule(nc);
84 if (sc_status != NVME_SC_SUCCESS) {
85 _nvmft_send_generic_error(qp, nc, sc_status);
86 nvmf_free_capsule(nc);
87 return;
88 }
89
90 /* Don't bother byte-swapping CID. */
91 if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) {
92 _nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT);
93 nvmf_free_capsule(nc);
94 return;
95 }
96
97 if (qp->admin)
98 nvmft_handle_admin_command(ctrlr, nc);
99 else
100 nvmft_handle_io_command(qp, qp->qid, nc);
101 }
102
103 struct nvmft_qpair *
nvmft_qpair_init(enum nvmf_trtype trtype,const nvlist_t * params,uint16_t qid,const char * name)104 nvmft_qpair_init(enum nvmf_trtype trtype, const nvlist_t *params, uint16_t qid,
105 const char *name)
106 {
107 struct nvmft_qpair *qp;
108
109 qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO);
110 qp->admin = nvlist_get_bool(params, "admin");
111 qp->sq_flow_control = nvlist_get_bool(params, "sq_flow_control");
112 qp->qsize = nvlist_get_number(params, "qsize");
113 qp->qid = qid;
114 qp->sqhd = nvlist_get_number(params, "sqhd");
115 strlcpy(qp->name, name, sizeof(qp->name));
116 mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF);
117 qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO);
118 STAILQ_INIT(&qp->datamove_queue);
119 TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp);
120
121 qp->qp = nvmf_allocate_qpair(trtype, true, params, nvmft_qpair_error,
122 qp, nvmft_receive_capsule, qp);
123 if (qp->qp == NULL) {
124 mtx_destroy(&qp->lock);
125 free(qp->cids, M_NVMFT);
126 free(qp, M_NVMFT);
127 return (NULL);
128 }
129
130 refcount_init(&qp->qp_refs, 1);
131 return (qp);
132 }
133
134 void
nvmft_qpair_shutdown(struct nvmft_qpair * qp)135 nvmft_qpair_shutdown(struct nvmft_qpair *qp)
136 {
137 STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
138 struct nvmf_qpair *nq;
139 union ctl_io *io;
140
141 STAILQ_INIT(&datamove_queue);
142 mtx_lock(&qp->lock);
143 nq = qp->qp;
144 qp->qp = NULL;
145 STAILQ_CONCAT(&datamove_queue, &qp->datamove_queue);
146 mtx_unlock(&qp->lock);
147 if (nq != NULL && refcount_release(&qp->qp_refs))
148 nvmf_free_qpair(nq);
149
150 while (!STAILQ_EMPTY(&datamove_queue)) {
151 io = (union ctl_io *)STAILQ_FIRST(&datamove_queue);
152 STAILQ_REMOVE_HEAD(&datamove_queue, links);
153 nvmft_abort_datamove(io);
154 }
155 nvmft_drain_task(&qp->datamove_task);
156 }
157
158 void
nvmft_qpair_destroy(struct nvmft_qpair * qp)159 nvmft_qpair_destroy(struct nvmft_qpair *qp)
160 {
161 nvmft_qpair_shutdown(qp);
162 mtx_destroy(&qp->lock);
163 free(qp->cids, M_NVMFT);
164 free(qp, M_NVMFT);
165 }
166
167 struct nvmft_controller *
nvmft_qpair_ctrlr(struct nvmft_qpair * qp)168 nvmft_qpair_ctrlr(struct nvmft_qpair *qp)
169 {
170 return (qp->ctrlr);
171 }
172
173 uint16_t
nvmft_qpair_id(struct nvmft_qpair * qp)174 nvmft_qpair_id(struct nvmft_qpair *qp)
175 {
176 return (qp->qid);
177 }
178
179 const char *
nvmft_qpair_name(struct nvmft_qpair * qp)180 nvmft_qpair_name(struct nvmft_qpair *qp)
181 {
182 return (qp->name);
183 }
184
185 static int
_nvmft_send_response(struct nvmft_qpair * qp,const void * cqe)186 _nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
187 {
188 struct nvme_completion cpl;
189 struct nvmf_qpair *nq;
190 struct nvmf_capsule *rc;
191 int error;
192
193 memcpy(&cpl, cqe, sizeof(cpl));
194 mtx_lock(&qp->lock);
195 nq = qp->qp;
196 if (nq == NULL) {
197 mtx_unlock(&qp->lock);
198 return (ENOTCONN);
199 }
200 refcount_acquire(&qp->qp_refs);
201
202 /* Set SQHD. */
203 if (qp->sq_flow_control) {
204 qp->sqhd = (qp->sqhd + 1) % qp->qsize;
205 cpl.sqhd = htole16(qp->sqhd);
206 } else
207 cpl.sqhd = 0;
208 mtx_unlock(&qp->lock);
209
210 rc = nvmf_allocate_response(nq, &cpl, M_WAITOK);
211 error = nvmf_transmit_capsule(rc);
212 nvmf_free_capsule(rc);
213
214 if (refcount_release(&qp->qp_refs))
215 nvmf_free_qpair(nq);
216 return (error);
217 }
218
219 void
nvmft_command_completed(struct nvmft_qpair * qp,struct nvmf_capsule * nc)220 nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
221 {
222 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
223
224 /* Don't bother byte-swapping CID. */
225 KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids),
226 ("%s: CID %u not busy", __func__, cmd->cid));
227
228 BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids);
229 }
230
231 int
nvmft_send_response(struct nvmft_qpair * qp,const void * cqe)232 nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
233 {
234 const struct nvme_completion *cpl = cqe;
235
236 /* Don't bother byte-swapping CID. */
237 KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids),
238 ("%s: CID %u not busy", __func__, cpl->cid));
239
240 BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids);
241 return (_nvmft_send_response(qp, cqe));
242 }
243
244 void
nvmft_init_cqe(void * cqe,struct nvmf_capsule * nc,uint16_t status)245 nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status)
246 {
247 struct nvme_completion *cpl = cqe;
248 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
249
250 memset(cpl, 0, sizeof(*cpl));
251 cpl->cid = cmd->cid;
252 cpl->status = htole16(status);
253 }
254
255 int
nvmft_send_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_type,uint8_t sc_status)256 nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
257 uint8_t sc_type, uint8_t sc_status)
258 {
259 struct nvme_completion cpl;
260 uint16_t status;
261
262 status = NVMEF(NVME_STATUS_SCT, sc_type) |
263 NVMEF(NVME_STATUS_SC, sc_status);
264 nvmft_init_cqe(&cpl, nc, status);
265 return (nvmft_send_response(qp, &cpl));
266 }
267
268 int
nvmft_send_generic_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_status)269 nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
270 uint8_t sc_status)
271 {
272 return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status));
273 }
274
275 /*
276 * This version doesn't clear CID in qp->cids and is used for errors
277 * before the CID is validated.
278 */
279 static int
_nvmft_send_generic_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_status)280 _nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
281 uint8_t sc_status)
282 {
283 struct nvme_completion cpl;
284 uint16_t status;
285
286 status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) |
287 NVMEF(NVME_STATUS_SC, sc_status);
288 nvmft_init_cqe(&cpl, nc, status);
289 return (_nvmft_send_response(qp, &cpl));
290 }
291
292 int
nvmft_send_success(struct nvmft_qpair * qp,struct nvmf_capsule * nc)293 nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
294 {
295 return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS));
296 }
297
298 static void
nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp * rsp,const struct nvmf_fabric_connect_cmd * cmd,uint16_t status)299 nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp,
300 const struct nvmf_fabric_connect_cmd *cmd, uint16_t status)
301 {
302 memset(rsp, 0, sizeof(*rsp));
303 rsp->cid = cmd->cid;
304 rsp->status = htole16(status);
305 }
306
307 static int
nvmft_send_connect_response(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_rsp * rsp)308 nvmft_send_connect_response(struct nvmft_qpair *qp,
309 const struct nvmf_fabric_connect_rsp *rsp)
310 {
311 struct nvmf_capsule *rc;
312 struct nvmf_qpair *nq;
313 int error;
314
315 mtx_lock(&qp->lock);
316 nq = qp->qp;
317 if (nq == NULL) {
318 mtx_unlock(&qp->lock);
319 return (ENOTCONN);
320 }
321 refcount_acquire(&qp->qp_refs);
322 mtx_unlock(&qp->lock);
323
324 rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK);
325 error = nvmf_transmit_capsule(rc);
326 nvmf_free_capsule(rc);
327
328 if (refcount_release(&qp->qp_refs))
329 nvmf_free_qpair(nq);
330 return (error);
331 }
332
333 void
nvmft_connect_error(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,uint8_t sc_type,uint8_t sc_status)334 nvmft_connect_error(struct nvmft_qpair *qp,
335 const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type,
336 uint8_t sc_status)
337 {
338 struct nvmf_fabric_connect_rsp rsp;
339 uint16_t status;
340
341 status = NVMEF(NVME_STATUS_SCT, sc_type) |
342 NVMEF(NVME_STATUS_SC, sc_status);
343 nvmft_init_connect_rsp(&rsp, cmd, status);
344 nvmft_send_connect_response(qp, &rsp);
345 }
346
347 void
nvmft_connect_invalid_parameters(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,bool data,uint16_t offset)348 nvmft_connect_invalid_parameters(struct nvmft_qpair *qp,
349 const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset)
350 {
351 struct nvmf_fabric_connect_rsp rsp;
352
353 nvmft_init_connect_rsp(&rsp, cmd,
354 NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) |
355 NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM));
356 rsp.status_code_specific.invalid.ipo = htole16(offset);
357 rsp.status_code_specific.invalid.iattr = data ? 1 : 0;
358 nvmft_send_connect_response(qp, &rsp);
359 }
360
361 int
nvmft_finish_accept(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,struct nvmft_controller * ctrlr)362 nvmft_finish_accept(struct nvmft_qpair *qp,
363 const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr)
364 {
365 struct nvmf_fabric_connect_rsp rsp;
366
367 qp->ctrlr = ctrlr;
368 nvmft_init_connect_rsp(&rsp, cmd, 0);
369 if (qp->sq_flow_control)
370 rsp.sqhd = htole16(qp->sqhd);
371 else
372 rsp.sqhd = htole16(0xffff);
373 rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid);
374 return (nvmft_send_connect_response(qp, &rsp));
375 }
376
377 void
nvmft_qpair_datamove(struct nvmft_qpair * qp,union ctl_io * io)378 nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io)
379 {
380 bool enqueue_task;
381
382 mtx_lock(&qp->lock);
383 if (qp->qp == NULL) {
384 mtx_unlock(&qp->lock);
385 nvmft_abort_datamove(io);
386 return;
387 }
388 enqueue_task = STAILQ_EMPTY(&qp->datamove_queue);
389 STAILQ_INSERT_TAIL(&qp->datamove_queue, &io->io_hdr, links);
390 mtx_unlock(&qp->lock);
391 if (enqueue_task)
392 nvmft_enqueue_task(&qp->datamove_task);
393 }
394
395 static void
nvmft_datamove_task(void * context,int pending __unused)396 nvmft_datamove_task(void *context, int pending __unused)
397 {
398 struct nvmft_qpair *qp = context;
399 union ctl_io *io;
400 bool abort;
401
402 mtx_lock(&qp->lock);
403 while (!STAILQ_EMPTY(&qp->datamove_queue)) {
404 io = (union ctl_io *)STAILQ_FIRST(&qp->datamove_queue);
405 STAILQ_REMOVE_HEAD(&qp->datamove_queue, links);
406 abort = (qp->qp == NULL);
407 mtx_unlock(&qp->lock);
408 if (abort)
409 nvmft_abort_datamove(io);
410 else
411 nvmft_handle_datamove(io);
412 mtx_lock(&qp->lock);
413 }
414 mtx_unlock(&qp->lock);
415 }
416