xref: /freebsd/sys/dev/nvmf/controller/nvmft_qpair.c (revision 1b3fa1ac36d1b48a906dc98d9d9f22447a213d47)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  */
7 
8 #include <sys/types.h>
9 #include <sys/_bitset.h>
10 #include <sys/bitset.h>
11 #include <sys/lock.h>
12 #include <sys/mutex.h>
13 
14 #include <dev/nvmf/nvmf_transport.h>
15 #include <dev/nvmf/controller/nvmft_var.h>
16 
17 /*
18  * A bitmask of command ID values.  This is used to detect duplicate
19  * commands with the same ID.
20  */
21 #define	NUM_CIDS	(UINT16_MAX + 1)
22 BITSET_DEFINE(cidset, NUM_CIDS);
23 
24 struct nvmft_qpair {
25 	struct nvmft_controller *ctrlr;
26 	struct nvmf_qpair *qp;
27 	struct cidset *cids;
28 
29 	bool	admin;
30 	bool	sq_flow_control;
31 	uint16_t qid;
32 	u_int	qsize;
33 	uint16_t sqhd;
34 	uint16_t sqtail;
35 	volatile u_int qp_refs;		/* Internal references on 'qp'. */
36 
37 	struct task datamove_task;
38 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
39 
40 	struct mtx lock;
41 
42 	char	name[16];
43 };
44 
45 static int	_nvmft_send_generic_error(struct nvmft_qpair *qp,
46     struct nvmf_capsule *nc, uint8_t sc_status);
47 static void	nvmft_datamove_task(void *context, int pending);
48 
49 static void
nvmft_qpair_error(void * arg,int error)50 nvmft_qpair_error(void *arg, int error)
51 {
52 	struct nvmft_qpair *qp = arg;
53 	struct nvmft_controller *ctrlr = qp->ctrlr;
54 
55 	/*
56 	 * XXX: The Linux TCP initiator sends a RST immediately after
57 	 * the FIN, so treat ECONNRESET as plain EOF to avoid spurious
58 	 * errors on shutdown.
59 	 */
60 	if (error == ECONNRESET)
61 		error = 0;
62 
63 	if (error != 0)
64 		nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name);
65 	nvmft_controller_error(ctrlr, qp, error);
66 }
67 
68 static void
nvmft_receive_capsule(void * arg,struct nvmf_capsule * nc)69 nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc)
70 {
71 	struct nvmft_qpair *qp = arg;
72 	struct nvmft_controller *ctrlr = qp->ctrlr;
73 	const struct nvme_command *cmd;
74 	uint8_t sc_status;
75 
76 	cmd = nvmf_capsule_sqe(nc);
77 	if (ctrlr == NULL) {
78 		printf("NVMFT: %s received CID %u opcode %u on newborn queue\n",
79 		    qp->name, le16toh(cmd->cid), cmd->opc);
80 		nvmf_free_capsule(nc);
81 		return;
82 	}
83 
84 	sc_status = nvmf_validate_command_capsule(nc);
85 	if (sc_status != NVME_SC_SUCCESS) {
86 		_nvmft_send_generic_error(qp, nc, sc_status);
87 		nvmf_free_capsule(nc);
88 		return;
89 	}
90 
91 	/* Don't bother byte-swapping CID. */
92 	if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) {
93 		_nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT);
94 		nvmf_free_capsule(nc);
95 		return;
96 	}
97 
98 	if (qp->admin)
99 		nvmft_handle_admin_command(ctrlr, nc);
100 	else
101 		nvmft_handle_io_command(qp, qp->qid, nc);
102 }
103 
104 struct nvmft_qpair *
nvmft_qpair_init(enum nvmf_trtype trtype,const struct nvmf_handoff_qpair_params * handoff,uint16_t qid,const char * name)105 nvmft_qpair_init(enum nvmf_trtype trtype,
106     const struct nvmf_handoff_qpair_params *handoff, uint16_t qid,
107     const char *name)
108 {
109 	struct nvmft_qpair *qp;
110 
111 	qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO);
112 	qp->admin = handoff->admin;
113 	qp->sq_flow_control = handoff->sq_flow_control;
114 	qp->qsize = handoff->qsize;
115 	qp->qid = qid;
116 	qp->sqhd = handoff->sqhd;
117 	qp->sqtail = handoff->sqtail;
118 	strlcpy(qp->name, name, sizeof(qp->name));
119 	mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF);
120 	qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO);
121 	STAILQ_INIT(&qp->datamove_queue);
122 	TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp);
123 
124 	qp->qp = nvmf_allocate_qpair(trtype, true, handoff, nvmft_qpair_error,
125 	    qp, nvmft_receive_capsule, qp);
126 	if (qp->qp == NULL) {
127 		mtx_destroy(&qp->lock);
128 		free(qp->cids, M_NVMFT);
129 		free(qp, M_NVMFT);
130 		return (NULL);
131 	}
132 
133 	refcount_init(&qp->qp_refs, 1);
134 	return (qp);
135 }
136 
137 void
nvmft_qpair_shutdown(struct nvmft_qpair * qp)138 nvmft_qpair_shutdown(struct nvmft_qpair *qp)
139 {
140 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
141 	struct nvmf_qpair *nq;
142 	union ctl_io *io;
143 
144 	STAILQ_INIT(&datamove_queue);
145 	mtx_lock(&qp->lock);
146 	nq = qp->qp;
147 	qp->qp = NULL;
148 	STAILQ_CONCAT(&datamove_queue, &qp->datamove_queue);
149 	mtx_unlock(&qp->lock);
150 	if (nq != NULL && refcount_release(&qp->qp_refs))
151 		nvmf_free_qpair(nq);
152 
153 	while (!STAILQ_EMPTY(&datamove_queue)) {
154 		io = (union ctl_io *)STAILQ_FIRST(&datamove_queue);
155 		STAILQ_REMOVE_HEAD(&datamove_queue, links);
156 		nvmft_abort_datamove(io);
157 	}
158 	nvmft_drain_task(&qp->datamove_task);
159 }
160 
161 void
nvmft_qpair_destroy(struct nvmft_qpair * qp)162 nvmft_qpair_destroy(struct nvmft_qpair *qp)
163 {
164 	nvmft_qpair_shutdown(qp);
165 	mtx_destroy(&qp->lock);
166 	free(qp->cids, M_NVMFT);
167 	free(qp, M_NVMFT);
168 }
169 
170 struct nvmft_controller *
nvmft_qpair_ctrlr(struct nvmft_qpair * qp)171 nvmft_qpair_ctrlr(struct nvmft_qpair *qp)
172 {
173 	return (qp->ctrlr);
174 }
175 
176 uint16_t
nvmft_qpair_id(struct nvmft_qpair * qp)177 nvmft_qpair_id(struct nvmft_qpair *qp)
178 {
179 	return (qp->qid);
180 }
181 
182 const char *
nvmft_qpair_name(struct nvmft_qpair * qp)183 nvmft_qpair_name(struct nvmft_qpair *qp)
184 {
185 	return (qp->name);
186 }
187 
188 static int
_nvmft_send_response(struct nvmft_qpair * qp,const void * cqe)189 _nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
190 {
191 	struct nvme_completion cpl;
192 	struct nvmf_qpair *nq;
193 	struct nvmf_capsule *rc;
194 	int error;
195 
196 	memcpy(&cpl, cqe, sizeof(cpl));
197 	mtx_lock(&qp->lock);
198 	nq = qp->qp;
199 	if (nq == NULL) {
200 		mtx_unlock(&qp->lock);
201 		return (ENOTCONN);
202 	}
203 	refcount_acquire(&qp->qp_refs);
204 
205 	/* Set SQHD. */
206 	if (qp->sq_flow_control) {
207 		qp->sqhd = (qp->sqhd + 1) % qp->qsize;
208 		cpl.sqhd = htole16(qp->sqhd);
209 	} else
210 		cpl.sqhd = 0;
211 	mtx_unlock(&qp->lock);
212 
213 	rc = nvmf_allocate_response(nq, &cpl, M_WAITOK);
214 	error = nvmf_transmit_capsule(rc);
215 	nvmf_free_capsule(rc);
216 
217 	if (refcount_release(&qp->qp_refs))
218 		nvmf_free_qpair(nq);
219 	return (error);
220 }
221 
222 void
nvmft_command_completed(struct nvmft_qpair * qp,struct nvmf_capsule * nc)223 nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
224 {
225 	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
226 
227 	/* Don't bother byte-swapping CID. */
228 	KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids),
229 	    ("%s: CID %u not busy", __func__, cmd->cid));
230 
231 	BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids);
232 }
233 
234 int
nvmft_send_response(struct nvmft_qpair * qp,const void * cqe)235 nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
236 {
237 	const struct nvme_completion *cpl = cqe;
238 
239 	/* Don't bother byte-swapping CID. */
240 	KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids),
241 	    ("%s: CID %u not busy", __func__, cpl->cid));
242 
243 	BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids);
244 	return (_nvmft_send_response(qp, cqe));
245 }
246 
247 void
nvmft_init_cqe(void * cqe,struct nvmf_capsule * nc,uint16_t status)248 nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status)
249 {
250 	struct nvme_completion *cpl = cqe;
251 	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
252 
253 	memset(cpl, 0, sizeof(*cpl));
254 	cpl->cid = cmd->cid;
255 	cpl->status = htole16(status);
256 }
257 
258 int
nvmft_send_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_type,uint8_t sc_status)259 nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
260     uint8_t sc_type, uint8_t sc_status)
261 {
262 	struct nvme_completion cpl;
263 	uint16_t status;
264 
265 	status = NVMEF(NVME_STATUS_SCT, sc_type) |
266 	    NVMEF(NVME_STATUS_SC, sc_status);
267 	nvmft_init_cqe(&cpl, nc, status);
268 	return (nvmft_send_response(qp, &cpl));
269 }
270 
271 int
nvmft_send_generic_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_status)272 nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
273     uint8_t sc_status)
274 {
275 	return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status));
276 }
277 
278 /*
279  * This version doesn't clear CID in qp->cids and is used for errors
280  * before the CID is validated.
281  */
282 static int
_nvmft_send_generic_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_status)283 _nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
284     uint8_t sc_status)
285 {
286 	struct nvme_completion cpl;
287 	uint16_t status;
288 
289 	status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) |
290 	    NVMEF(NVME_STATUS_SC, sc_status);
291 	nvmft_init_cqe(&cpl, nc, status);
292 	return (_nvmft_send_response(qp, &cpl));
293 }
294 
295 int
nvmft_send_success(struct nvmft_qpair * qp,struct nvmf_capsule * nc)296 nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
297 {
298 	return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS));
299 }
300 
301 static void
nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp * rsp,const struct nvmf_fabric_connect_cmd * cmd,uint16_t status)302 nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp,
303     const struct nvmf_fabric_connect_cmd *cmd, uint16_t status)
304 {
305 	memset(rsp, 0, sizeof(*rsp));
306 	rsp->cid = cmd->cid;
307 	rsp->status = htole16(status);
308 }
309 
310 static int
nvmft_send_connect_response(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_rsp * rsp)311 nvmft_send_connect_response(struct nvmft_qpair *qp,
312     const struct nvmf_fabric_connect_rsp *rsp)
313 {
314 	struct nvmf_capsule *rc;
315 	struct nvmf_qpair *nq;
316 	int error;
317 
318 	mtx_lock(&qp->lock);
319 	nq = qp->qp;
320 	if (nq == NULL) {
321 		mtx_unlock(&qp->lock);
322 		return (ENOTCONN);
323 	}
324 	refcount_acquire(&qp->qp_refs);
325 	mtx_unlock(&qp->lock);
326 
327 	rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK);
328 	error = nvmf_transmit_capsule(rc);
329 	nvmf_free_capsule(rc);
330 
331 	if (refcount_release(&qp->qp_refs))
332 		nvmf_free_qpair(nq);
333 	return (error);
334 }
335 
336 void
nvmft_connect_error(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,uint8_t sc_type,uint8_t sc_status)337 nvmft_connect_error(struct nvmft_qpair *qp,
338     const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type,
339     uint8_t sc_status)
340 {
341 	struct nvmf_fabric_connect_rsp rsp;
342 	uint16_t status;
343 
344 	status = NVMEF(NVME_STATUS_SCT, sc_type) |
345 	    NVMEF(NVME_STATUS_SC, sc_status);
346 	nvmft_init_connect_rsp(&rsp, cmd, status);
347 	nvmft_send_connect_response(qp, &rsp);
348 }
349 
350 void
nvmft_connect_invalid_parameters(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,bool data,uint16_t offset)351 nvmft_connect_invalid_parameters(struct nvmft_qpair *qp,
352     const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset)
353 {
354 	struct nvmf_fabric_connect_rsp rsp;
355 
356 	nvmft_init_connect_rsp(&rsp, cmd,
357 	    NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) |
358 	    NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM));
359 	rsp.status_code_specific.invalid.ipo = htole16(offset);
360 	rsp.status_code_specific.invalid.iattr = data ? 1 : 0;
361 	nvmft_send_connect_response(qp, &rsp);
362 }
363 
364 int
nvmft_finish_accept(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,struct nvmft_controller * ctrlr)365 nvmft_finish_accept(struct nvmft_qpair *qp,
366     const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr)
367 {
368 	struct nvmf_fabric_connect_rsp rsp;
369 
370 	qp->ctrlr = ctrlr;
371 	nvmft_init_connect_rsp(&rsp, cmd, 0);
372 	if (qp->sq_flow_control)
373 		rsp.sqhd = htole16(qp->sqhd);
374 	else
375 		rsp.sqhd = htole16(0xffff);
376 	rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid);
377 	return (nvmft_send_connect_response(qp, &rsp));
378 }
379 
380 void
nvmft_qpair_datamove(struct nvmft_qpair * qp,union ctl_io * io)381 nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io)
382 {
383 	bool enqueue_task;
384 
385 	mtx_lock(&qp->lock);
386 	if (qp->qp == NULL) {
387 		mtx_unlock(&qp->lock);
388 		nvmft_abort_datamove(io);
389 		return;
390 	}
391 	enqueue_task = STAILQ_EMPTY(&qp->datamove_queue);
392 	STAILQ_INSERT_TAIL(&qp->datamove_queue, &io->io_hdr, links);
393 	mtx_unlock(&qp->lock);
394 	if (enqueue_task)
395 		nvmft_enqueue_task(&qp->datamove_task);
396 }
397 
398 static void
nvmft_datamove_task(void * context,int pending __unused)399 nvmft_datamove_task(void *context, int pending __unused)
400 {
401 	struct nvmft_qpair *qp = context;
402 	union ctl_io *io;
403 	bool abort;
404 
405 	mtx_lock(&qp->lock);
406 	while (!STAILQ_EMPTY(&qp->datamove_queue)) {
407 		io = (union ctl_io *)STAILQ_FIRST(&qp->datamove_queue);
408 		STAILQ_REMOVE_HEAD(&qp->datamove_queue, links);
409 		abort = (qp->qp == NULL);
410 		mtx_unlock(&qp->lock);
411 		if (abort)
412 			nvmft_abort_datamove(io);
413 		else
414 			nvmft_handle_datamove(io);
415 		mtx_lock(&qp->lock);
416 	}
417 	mtx_unlock(&qp->lock);
418 }
419