xref: /freebsd/sys/dev/nvmf/controller/nvmft_qpair.c (revision 8ced50767933f3e2949456367d4d9a64797daec3)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  */
7 
8 #include <sys/types.h>
9 #include <sys/_bitset.h>
10 #include <sys/bitset.h>
11 #include <sys/lock.h>
12 #include <sys/mutex.h>
13 
14 #include <dev/nvmf/nvmf_transport.h>
15 #include <dev/nvmf/controller/nvmft_var.h>
16 
17 /*
18  * A bitmask of command ID values.  This is used to detect duplicate
19  * commands with the same ID.
20  */
21 #define	NUM_CIDS	(UINT16_MAX + 1)
22 BITSET_DEFINE(cidset, NUM_CIDS);
23 
24 struct nvmft_qpair {
25 	struct nvmft_controller *ctrlr;
26 	struct nvmf_qpair *qp;
27 	struct cidset *cids;
28 
29 	bool	admin;
30 	bool	sq_flow_control;
31 	uint16_t qid;
32 	u_int	qsize;
33 	uint16_t sqhd;
34 	volatile u_int qp_refs;		/* Internal references on 'qp'. */
35 
36 	struct task datamove_task;
37 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
38 
39 	struct mtx lock;
40 
41 	char	name[16];
42 };
43 
44 static int	_nvmft_send_generic_error(struct nvmft_qpair *qp,
45     struct nvmf_capsule *nc, uint8_t sc_status);
46 static void	nvmft_datamove_task(void *context, int pending);
47 
48 static void
nvmft_qpair_error(void * arg,int error)49 nvmft_qpair_error(void *arg, int error)
50 {
51 	struct nvmft_qpair *qp = arg;
52 	struct nvmft_controller *ctrlr = qp->ctrlr;
53 
54 	/*
55 	 * XXX: The Linux TCP initiator sends a RST immediately after
56 	 * the FIN, so treat ECONNRESET as plain EOF to avoid spurious
57 	 * errors on shutdown.
58 	 */
59 	if (error == ECONNRESET)
60 		error = 0;
61 
62 	if (error != 0)
63 		nvmft_printf(ctrlr, "error %d on %s\n", error, qp->name);
64 	nvmft_controller_error(ctrlr, qp, error);
65 }
66 
67 static void
nvmft_receive_capsule(void * arg,struct nvmf_capsule * nc)68 nvmft_receive_capsule(void *arg, struct nvmf_capsule *nc)
69 {
70 	struct nvmft_qpair *qp = arg;
71 	struct nvmft_controller *ctrlr = qp->ctrlr;
72 	const struct nvme_command *cmd;
73 	uint8_t sc_status;
74 
75 	cmd = nvmf_capsule_sqe(nc);
76 	if (ctrlr == NULL) {
77 		printf("NVMFT: %s received CID %u opcode %u on newborn queue\n",
78 		    qp->name, le16toh(cmd->cid), cmd->opc);
79 		nvmf_free_capsule(nc);
80 		return;
81 	}
82 
83 	sc_status = nvmf_validate_command_capsule(nc);
84 	if (sc_status != NVME_SC_SUCCESS) {
85 		_nvmft_send_generic_error(qp, nc, sc_status);
86 		nvmf_free_capsule(nc);
87 		return;
88 	}
89 
90 	/* Don't bother byte-swapping CID. */
91 	if (BIT_TEST_SET_ATOMIC(NUM_CIDS, cmd->cid, qp->cids)) {
92 		_nvmft_send_generic_error(qp, nc, NVME_SC_COMMAND_ID_CONFLICT);
93 		nvmf_free_capsule(nc);
94 		return;
95 	}
96 
97 	if (qp->admin)
98 		nvmft_handle_admin_command(ctrlr, nc);
99 	else
100 		nvmft_handle_io_command(qp, qp->qid, nc);
101 }
102 
103 struct nvmft_qpair *
nvmft_qpair_init(enum nvmf_trtype trtype,const nvlist_t * params,uint16_t qid,const char * name)104 nvmft_qpair_init(enum nvmf_trtype trtype, const nvlist_t *params, uint16_t qid,
105     const char *name)
106 {
107 	struct nvmft_qpair *qp;
108 
109 	qp = malloc(sizeof(*qp), M_NVMFT, M_WAITOK | M_ZERO);
110 	qp->admin = nvlist_get_bool(params, "admin");
111 	qp->sq_flow_control = nvlist_get_bool(params, "sq_flow_control");
112 	qp->qsize = nvlist_get_number(params, "qsize");
113 	qp->qid = qid;
114 	qp->sqhd = nvlist_get_number(params, "sqhd");
115 	strlcpy(qp->name, name, sizeof(qp->name));
116 	mtx_init(&qp->lock, "nvmft qp", NULL, MTX_DEF);
117 	qp->cids = BITSET_ALLOC(NUM_CIDS, M_NVMFT, M_WAITOK | M_ZERO);
118 	STAILQ_INIT(&qp->datamove_queue);
119 	TASK_INIT(&qp->datamove_task, 0, nvmft_datamove_task, qp);
120 
121 	qp->qp = nvmf_allocate_qpair(trtype, true, params, nvmft_qpair_error,
122 	    qp, nvmft_receive_capsule, qp);
123 	if (qp->qp == NULL) {
124 		mtx_destroy(&qp->lock);
125 		free(qp->cids, M_NVMFT);
126 		free(qp, M_NVMFT);
127 		return (NULL);
128 	}
129 
130 	refcount_init(&qp->qp_refs, 1);
131 	return (qp);
132 }
133 
134 void
nvmft_qpair_shutdown(struct nvmft_qpair * qp)135 nvmft_qpair_shutdown(struct nvmft_qpair *qp)
136 {
137 	STAILQ_HEAD(, ctl_io_hdr) datamove_queue;
138 	struct nvmf_qpair *nq;
139 	union ctl_io *io;
140 
141 	STAILQ_INIT(&datamove_queue);
142 	mtx_lock(&qp->lock);
143 	nq = qp->qp;
144 	qp->qp = NULL;
145 	STAILQ_CONCAT(&datamove_queue, &qp->datamove_queue);
146 	mtx_unlock(&qp->lock);
147 	if (nq != NULL && refcount_release(&qp->qp_refs))
148 		nvmf_free_qpair(nq);
149 
150 	while (!STAILQ_EMPTY(&datamove_queue)) {
151 		io = (union ctl_io *)STAILQ_FIRST(&datamove_queue);
152 		STAILQ_REMOVE_HEAD(&datamove_queue, links);
153 		nvmft_abort_datamove(io);
154 	}
155 	nvmft_drain_task(&qp->datamove_task);
156 }
157 
158 void
nvmft_qpair_destroy(struct nvmft_qpair * qp)159 nvmft_qpair_destroy(struct nvmft_qpair *qp)
160 {
161 	nvmft_qpair_shutdown(qp);
162 	mtx_destroy(&qp->lock);
163 	free(qp->cids, M_NVMFT);
164 	free(qp, M_NVMFT);
165 }
166 
167 struct nvmft_controller *
nvmft_qpair_ctrlr(struct nvmft_qpair * qp)168 nvmft_qpair_ctrlr(struct nvmft_qpair *qp)
169 {
170 	return (qp->ctrlr);
171 }
172 
173 uint16_t
nvmft_qpair_id(struct nvmft_qpair * qp)174 nvmft_qpair_id(struct nvmft_qpair *qp)
175 {
176 	return (qp->qid);
177 }
178 
179 const char *
nvmft_qpair_name(struct nvmft_qpair * qp)180 nvmft_qpair_name(struct nvmft_qpair *qp)
181 {
182 	return (qp->name);
183 }
184 
185 uint32_t
nvmft_max_ioccsz(struct nvmft_qpair * qp)186 nvmft_max_ioccsz(struct nvmft_qpair *qp)
187 {
188 	return (nvmf_max_ioccsz(qp->qp));
189 }
190 
191 static int
_nvmft_send_response(struct nvmft_qpair * qp,const void * cqe)192 _nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
193 {
194 	struct nvme_completion cpl;
195 	struct nvmf_qpair *nq;
196 	struct nvmf_capsule *rc;
197 	int error;
198 
199 	memcpy(&cpl, cqe, sizeof(cpl));
200 	mtx_lock(&qp->lock);
201 	nq = qp->qp;
202 	if (nq == NULL) {
203 		mtx_unlock(&qp->lock);
204 		return (ENOTCONN);
205 	}
206 	refcount_acquire(&qp->qp_refs);
207 
208 	/* Set SQHD. */
209 	if (qp->sq_flow_control) {
210 		qp->sqhd = (qp->sqhd + 1) % qp->qsize;
211 		cpl.sqhd = htole16(qp->sqhd);
212 	} else
213 		cpl.sqhd = 0;
214 	mtx_unlock(&qp->lock);
215 
216 	rc = nvmf_allocate_response(nq, &cpl, M_WAITOK);
217 	error = nvmf_transmit_capsule(rc);
218 	nvmf_free_capsule(rc);
219 
220 	if (refcount_release(&qp->qp_refs))
221 		nvmf_free_qpair(nq);
222 	return (error);
223 }
224 
225 void
nvmft_command_completed(struct nvmft_qpair * qp,struct nvmf_capsule * nc)226 nvmft_command_completed(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
227 {
228 	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
229 
230 	/* Don't bother byte-swapping CID. */
231 	KASSERT(BIT_ISSET(NUM_CIDS, cmd->cid, qp->cids),
232 	    ("%s: CID %u not busy", __func__, cmd->cid));
233 
234 	BIT_CLR_ATOMIC(NUM_CIDS, cmd->cid, qp->cids);
235 }
236 
237 int
nvmft_send_response(struct nvmft_qpair * qp,const void * cqe)238 nvmft_send_response(struct nvmft_qpair *qp, const void *cqe)
239 {
240 	const struct nvme_completion *cpl = cqe;
241 
242 	/* Don't bother byte-swapping CID. */
243 	KASSERT(BIT_ISSET(NUM_CIDS, cpl->cid, qp->cids),
244 	    ("%s: CID %u not busy", __func__, cpl->cid));
245 
246 	BIT_CLR_ATOMIC(NUM_CIDS, cpl->cid, qp->cids);
247 	return (_nvmft_send_response(qp, cqe));
248 }
249 
250 void
nvmft_init_cqe(void * cqe,struct nvmf_capsule * nc,uint16_t status)251 nvmft_init_cqe(void *cqe, struct nvmf_capsule *nc, uint16_t status)
252 {
253 	struct nvme_completion *cpl = cqe;
254 	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
255 
256 	memset(cpl, 0, sizeof(*cpl));
257 	cpl->cid = cmd->cid;
258 	cpl->status = htole16(status);
259 }
260 
261 int
nvmft_send_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_type,uint8_t sc_status)262 nvmft_send_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
263     uint8_t sc_type, uint8_t sc_status)
264 {
265 	struct nvme_completion cpl;
266 	uint16_t status;
267 
268 	status = NVMEF(NVME_STATUS_SCT, sc_type) |
269 	    NVMEF(NVME_STATUS_SC, sc_status);
270 	nvmft_init_cqe(&cpl, nc, status);
271 	return (nvmft_send_response(qp, &cpl));
272 }
273 
274 int
nvmft_send_generic_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_status)275 nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
276     uint8_t sc_status)
277 {
278 	return (nvmft_send_error(qp, nc, NVME_SCT_GENERIC, sc_status));
279 }
280 
281 /*
282  * This version doesn't clear CID in qp->cids and is used for errors
283  * before the CID is validated.
284  */
285 static int
_nvmft_send_generic_error(struct nvmft_qpair * qp,struct nvmf_capsule * nc,uint8_t sc_status)286 _nvmft_send_generic_error(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
287     uint8_t sc_status)
288 {
289 	struct nvme_completion cpl;
290 	uint16_t status;
291 
292 	status = NVMEF(NVME_STATUS_SCT, NVME_SCT_GENERIC) |
293 	    NVMEF(NVME_STATUS_SC, sc_status);
294 	nvmft_init_cqe(&cpl, nc, status);
295 	return (_nvmft_send_response(qp, &cpl));
296 }
297 
298 int
nvmft_send_success(struct nvmft_qpair * qp,struct nvmf_capsule * nc)299 nvmft_send_success(struct nvmft_qpair *qp, struct nvmf_capsule *nc)
300 {
301 	return (nvmft_send_generic_error(qp, nc, NVME_SC_SUCCESS));
302 }
303 
304 static void
nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp * rsp,const struct nvmf_fabric_connect_cmd * cmd,uint16_t status)305 nvmft_init_connect_rsp(struct nvmf_fabric_connect_rsp *rsp,
306     const struct nvmf_fabric_connect_cmd *cmd, uint16_t status)
307 {
308 	memset(rsp, 0, sizeof(*rsp));
309 	rsp->cid = cmd->cid;
310 	rsp->status = htole16(status);
311 }
312 
313 static int
nvmft_send_connect_response(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_rsp * rsp)314 nvmft_send_connect_response(struct nvmft_qpair *qp,
315     const struct nvmf_fabric_connect_rsp *rsp)
316 {
317 	struct nvmf_capsule *rc;
318 	struct nvmf_qpair *nq;
319 	int error;
320 
321 	mtx_lock(&qp->lock);
322 	nq = qp->qp;
323 	if (nq == NULL) {
324 		mtx_unlock(&qp->lock);
325 		return (ENOTCONN);
326 	}
327 	refcount_acquire(&qp->qp_refs);
328 	mtx_unlock(&qp->lock);
329 
330 	rc = nvmf_allocate_response(qp->qp, rsp, M_WAITOK);
331 	error = nvmf_transmit_capsule(rc);
332 	nvmf_free_capsule(rc);
333 
334 	if (refcount_release(&qp->qp_refs))
335 		nvmf_free_qpair(nq);
336 	return (error);
337 }
338 
339 void
nvmft_connect_error(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,uint8_t sc_type,uint8_t sc_status)340 nvmft_connect_error(struct nvmft_qpair *qp,
341     const struct nvmf_fabric_connect_cmd *cmd, uint8_t sc_type,
342     uint8_t sc_status)
343 {
344 	struct nvmf_fabric_connect_rsp rsp;
345 	uint16_t status;
346 
347 	status = NVMEF(NVME_STATUS_SCT, sc_type) |
348 	    NVMEF(NVME_STATUS_SC, sc_status);
349 	nvmft_init_connect_rsp(&rsp, cmd, status);
350 	nvmft_send_connect_response(qp, &rsp);
351 }
352 
353 void
nvmft_connect_invalid_parameters(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,bool data,uint16_t offset)354 nvmft_connect_invalid_parameters(struct nvmft_qpair *qp,
355     const struct nvmf_fabric_connect_cmd *cmd, bool data, uint16_t offset)
356 {
357 	struct nvmf_fabric_connect_rsp rsp;
358 
359 	nvmft_init_connect_rsp(&rsp, cmd,
360 	    NVMEF(NVME_STATUS_SCT, NVME_SCT_COMMAND_SPECIFIC) |
361 	    NVMEF(NVME_STATUS_SC, NVMF_FABRIC_SC_INVALID_PARAM));
362 	rsp.status_code_specific.invalid.ipo = htole16(offset);
363 	rsp.status_code_specific.invalid.iattr = data ? 1 : 0;
364 	nvmft_send_connect_response(qp, &rsp);
365 }
366 
367 int
nvmft_finish_accept(struct nvmft_qpair * qp,const struct nvmf_fabric_connect_cmd * cmd,struct nvmft_controller * ctrlr)368 nvmft_finish_accept(struct nvmft_qpair *qp,
369     const struct nvmf_fabric_connect_cmd *cmd, struct nvmft_controller *ctrlr)
370 {
371 	struct nvmf_fabric_connect_rsp rsp;
372 
373 	qp->ctrlr = ctrlr;
374 	nvmft_init_connect_rsp(&rsp, cmd, 0);
375 	if (qp->sq_flow_control)
376 		rsp.sqhd = htole16(qp->sqhd);
377 	else
378 		rsp.sqhd = htole16(0xffff);
379 	rsp.status_code_specific.success.cntlid = htole16(ctrlr->cntlid);
380 	return (nvmft_send_connect_response(qp, &rsp));
381 }
382 
383 void
nvmft_qpair_datamove(struct nvmft_qpair * qp,union ctl_io * io)384 nvmft_qpair_datamove(struct nvmft_qpair *qp, union ctl_io *io)
385 {
386 	bool enqueue_task;
387 
388 	mtx_lock(&qp->lock);
389 	if (qp->qp == NULL) {
390 		mtx_unlock(&qp->lock);
391 		nvmft_abort_datamove(io);
392 		return;
393 	}
394 	enqueue_task = STAILQ_EMPTY(&qp->datamove_queue);
395 	STAILQ_INSERT_TAIL(&qp->datamove_queue, &io->io_hdr, links);
396 	mtx_unlock(&qp->lock);
397 	if (enqueue_task)
398 		nvmft_enqueue_task(&qp->datamove_task);
399 }
400 
401 static void
nvmft_datamove_task(void * context,int pending __unused)402 nvmft_datamove_task(void *context, int pending __unused)
403 {
404 	struct nvmft_qpair *qp = context;
405 	union ctl_io *io;
406 	bool abort;
407 
408 	mtx_lock(&qp->lock);
409 	while (!STAILQ_EMPTY(&qp->datamove_queue)) {
410 		io = (union ctl_io *)STAILQ_FIRST(&qp->datamove_queue);
411 		STAILQ_REMOVE_HEAD(&qp->datamove_queue, links);
412 		abort = (qp->qp == NULL);
413 		mtx_unlock(&qp->lock);
414 		if (abort)
415 			nvmft_abort_datamove(io);
416 		else
417 			nvmft_handle_datamove(io);
418 		mtx_lock(&qp->lock);
419 	}
420 	mtx_unlock(&qp->lock);
421 }
422