xref: /freebsd/sys/dev/nvmf/host/nvmf_qpair.c (revision 5036d9652a5701d00e9e40ea942c278e9f77d33d)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  */
7 
8 #include <sys/types.h>
9 #include <sys/bus.h>
10 #include <sys/lock.h>
11 #include <sys/malloc.h>
12 #include <sys/mutex.h>
13 #include <sys/sysctl.h>
14 #include <dev/nvme/nvme.h>
15 #include <dev/nvmf/nvmf.h>
16 #include <dev/nvmf/nvmf_transport.h>
17 #include <dev/nvmf/host/nvmf_var.h>
18 
19 struct nvmf_host_command {
20 	struct nvmf_request *req;
21 	TAILQ_ENTRY(nvmf_host_command) link;
22 	uint16_t cid;
23 };
24 
25 struct nvmf_host_qpair {
26 	struct nvmf_softc *sc;
27 	struct nvmf_qpair *qp;
28 
29 	bool	sq_flow_control;
30 	bool	shutting_down;
31 	u_int	allocating;
32 	u_int	num_commands;
33 	uint16_t sqhd;
34 	uint16_t sqtail;
35 	uint64_t submitted;
36 
37 	struct mtx lock;
38 
39 	TAILQ_HEAD(, nvmf_host_command) free_commands;
40 	STAILQ_HEAD(, nvmf_request) pending_requests;
41 
42 	/* Indexed by cid. */
43 	struct nvmf_host_command **active_commands;
44 
45 	char	name[16];
46 	struct sysctl_ctx_list sysctl_ctx;
47 };
48 
49 struct nvmf_request *
50 nvmf_allocate_request(struct nvmf_host_qpair *qp, void *sqe,
51     nvmf_request_complete_t *cb, void *cb_arg, int how)
52 {
53 	struct nvmf_request *req;
54 	struct nvmf_qpair *nq;
55 
56 	KASSERT(how == M_WAITOK || how == M_NOWAIT,
57 	    ("%s: invalid how", __func__));
58 
59 	req = malloc(sizeof(*req), M_NVMF, how | M_ZERO);
60 	if (req == NULL)
61 		return (NULL);
62 
63 	mtx_lock(&qp->lock);
64 	nq = qp->qp;
65 	if (nq == NULL) {
66 		mtx_unlock(&qp->lock);
67 		free(req, M_NVMF);
68 		return (NULL);
69 	}
70 	qp->allocating++;
71 	MPASS(qp->allocating != 0);
72 	mtx_unlock(&qp->lock);
73 
74 	req->qp = qp;
75 	req->cb = cb;
76 	req->cb_arg = cb_arg;
77 	req->nc = nvmf_allocate_command(nq, sqe, how);
78 	if (req->nc == NULL) {
79 		free(req, M_NVMF);
80 		req = NULL;
81 	}
82 
83 	mtx_lock(&qp->lock);
84 	qp->allocating--;
85 	if (qp->allocating == 0 && qp->shutting_down)
86 		wakeup(qp);
87 	mtx_unlock(&qp->lock);
88 
89 	return (req);
90 }
91 
92 static void
93 nvmf_abort_request(struct nvmf_request *req, uint16_t cid)
94 {
95 	struct nvme_completion cqe;
96 
97 	memset(&cqe, 0, sizeof(cqe));
98 	cqe.cid = cid;
99 	cqe.status = htole16(NVMEF(NVME_STATUS_SCT, NVME_SCT_PATH_RELATED) |
100 	    NVMEF(NVME_STATUS_SC, NVME_SC_COMMAND_ABORTED_BY_HOST));
101 	req->cb(req->cb_arg, &cqe);
102 }
103 
104 void
105 nvmf_free_request(struct nvmf_request *req)
106 {
107 	if (req->nc != NULL)
108 		nvmf_free_capsule(req->nc);
109 	free(req, M_NVMF);
110 }
111 
112 static void
113 nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd)
114 {
115 	struct nvmf_softc *sc = qp->sc;
116 	struct nvme_command *sqe;
117 	struct nvmf_capsule *nc;
118 	int error;
119 
120 	nc = cmd->req->nc;
121 	sqe = nvmf_capsule_sqe(nc);
122 
123 	/*
124 	 * NB: Don't bother byte-swapping the cid so that receive
125 	 * doesn't have to swap.
126 	 */
127 	sqe->cid = cmd->cid;
128 
129 	error = nvmf_transmit_capsule(nc);
130 	if (error != 0) {
131 		device_printf(sc->dev,
132 		    "failed to transmit capsule: %d, disconnecting\n", error);
133 		nvmf_disconnect(sc);
134 		return;
135 	}
136 
137 	if (sc->ka_traffic)
138 		atomic_store_int(&sc->ka_active_tx_traffic, 1);
139 }
140 
141 static void
142 nvmf_qp_error(void *arg, int error)
143 {
144 	struct nvmf_host_qpair *qp = arg;
145 	struct nvmf_softc *sc = qp->sc;
146 
147 	/* Ignore simple close of queue pairs during shutdown. */
148 	if (!(sc->detaching && error == 0))
149 		device_printf(sc->dev, "error %d on %s, disconnecting\n", error,
150 		    qp->name);
151 	nvmf_disconnect(sc);
152 }
153 
154 static void
155 nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
156 {
157 	struct nvmf_host_qpair *qp = arg;
158 	struct nvmf_softc *sc = qp->sc;
159 	struct nvmf_host_command *cmd;
160 	struct nvmf_request *req;
161 	const struct nvme_completion *cqe;
162 	uint16_t cid;
163 
164 	cqe = nvmf_capsule_cqe(nc);
165 
166 	if (sc->ka_traffic)
167 		atomic_store_int(&sc->ka_active_rx_traffic, 1);
168 
169 	/*
170 	 * NB: Don't bother byte-swapping the cid as transmit doesn't
171 	 * swap either.
172 	 */
173 	cid = cqe->cid;
174 
175 	if (cid > qp->num_commands) {
176 		device_printf(sc->dev,
177 		    "received invalid CID %u, disconnecting\n", cid);
178 		nvmf_disconnect(sc);
179 		nvmf_free_capsule(nc);
180 		return;
181 	}
182 
183 	/*
184 	 * If the queue has been shutdown due to an error, silently
185 	 * drop the response.
186 	 */
187 	mtx_lock(&qp->lock);
188 	if (qp->qp == NULL) {
189 		device_printf(sc->dev,
190 		    "received completion for CID %u on shutdown %s\n", cid,
191 		    qp->name);
192 		mtx_unlock(&qp->lock);
193 		nvmf_free_capsule(nc);
194 		return;
195 	}
196 
197 	cmd = qp->active_commands[cid];
198 	if (cmd == NULL) {
199 		mtx_unlock(&qp->lock);
200 		device_printf(sc->dev,
201 		    "received completion for inactive CID %u, disconnecting\n",
202 		    cid);
203 		nvmf_disconnect(sc);
204 		nvmf_free_capsule(nc);
205 		return;
206 	}
207 
208 	KASSERT(cmd->cid == cid, ("%s: CID mismatch", __func__));
209 	req = cmd->req;
210 	cmd->req = NULL;
211 	if (STAILQ_EMPTY(&qp->pending_requests)) {
212 		qp->active_commands[cid] = NULL;
213 		TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link);
214 		mtx_unlock(&qp->lock);
215 	} else {
216 		cmd->req = STAILQ_FIRST(&qp->pending_requests);
217 		STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
218 		qp->submitted++;
219 		mtx_unlock(&qp->lock);
220 		nvmf_dispatch_command(qp, cmd);
221 	}
222 
223 	req->cb(req->cb_arg, cqe);
224 	nvmf_free_capsule(nc);
225 	nvmf_free_request(req);
226 }
227 
228 static void
229 nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp,
230     bool admin, u_int qid)
231 {
232 	struct sysctl_ctx_list *ctx = &qp->sysctl_ctx;
233 	struct sysctl_oid *oid;
234 	struct sysctl_oid_list *list;
235 	char name[8];
236 
237 	if (admin) {
238 		oid = SYSCTL_ADD_NODE(ctx,
239 		    SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO,
240 		    "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue");
241 	} else {
242 		snprintf(name, sizeof(name), "%u", qid);
243 		oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name,
244 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue");
245 	}
246 	list = SYSCTL_CHILDREN(oid);
247 
248 	SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD,
249 	    NULL, qp->num_commands + 1, "Number of entries in queue");
250 	SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd,
251 	    0, "Current head of submission queue (as observed by driver)");
252 	SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail,
253 	    0, "Current tail of submission queue (as observed by driver)");
254 	SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD,
255 	    &qp->submitted, 0, "Number of commands submitted");
256 }
257 
258 struct nvmf_host_qpair *
259 nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
260     struct nvmf_handoff_qpair_params *handoff, const char *name, u_int qid)
261 {
262 	struct nvmf_host_command *cmd, *ncmd;
263 	struct nvmf_host_qpair *qp;
264 	u_int i;
265 
266 	qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO);
267 	qp->sc = sc;
268 	qp->sq_flow_control = handoff->sq_flow_control;
269 	qp->sqhd = handoff->sqhd;
270 	qp->sqtail = handoff->sqtail;
271 	strlcpy(qp->name, name, sizeof(qp->name));
272 	mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF);
273 	(void)sysctl_ctx_init(&qp->sysctl_ctx);
274 
275 	/*
276 	 * Allocate a spare command slot for each pending AER command
277 	 * on the admin queue.
278 	 */
279 	qp->num_commands = handoff->qsize - 1;
280 	if (handoff->admin)
281 		qp->num_commands += sc->num_aer;
282 
283 	qp->active_commands = malloc(sizeof(*qp->active_commands) *
284 	    qp->num_commands, M_NVMF, M_WAITOK | M_ZERO);
285 	TAILQ_INIT(&qp->free_commands);
286 	for (i = 0; i < qp->num_commands; i++) {
287 		cmd = malloc(sizeof(*cmd), M_NVMF, M_WAITOK | M_ZERO);
288 		cmd->cid = i;
289 		TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link);
290 	}
291 	STAILQ_INIT(&qp->pending_requests);
292 
293 	qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error,
294 	    qp, nvmf_receive_capsule, qp);
295 	if (qp->qp == NULL) {
296 		(void)sysctl_ctx_free(&qp->sysctl_ctx);
297 		TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
298 			TAILQ_REMOVE(&qp->free_commands, cmd, link);
299 			free(cmd, M_NVMF);
300 		}
301 		free(qp->active_commands, M_NVMF);
302 		mtx_destroy(&qp->lock);
303 		free(qp, M_NVMF);
304 		return (NULL);
305 	}
306 
307 	nvmf_sysctls_qp(sc, qp, handoff->admin, qid);
308 
309 	return (qp);
310 }
311 
312 void
313 nvmf_shutdown_qp(struct nvmf_host_qpair *qp)
314 {
315 	struct nvmf_host_command *cmd;
316 	struct nvmf_request *req;
317 	struct nvmf_qpair *nq;
318 
319 	mtx_lock(&qp->lock);
320 	nq = qp->qp;
321 	qp->qp = NULL;
322 
323 	if (nq == NULL) {
324 		while (qp->shutting_down)
325 			mtx_sleep(qp, &qp->lock, 0, "nvmfqpsh", 0);
326 		mtx_unlock(&qp->lock);
327 		return;
328 	}
329 	qp->shutting_down = true;
330 	while (qp->allocating != 0)
331 		mtx_sleep(qp, &qp->lock, 0, "nvmfqpqu", 0);
332 	mtx_unlock(&qp->lock);
333 
334 	nvmf_free_qpair(nq);
335 
336 	/*
337 	 * Abort outstanding requests.  Active requests will have
338 	 * their I/O completions invoked and associated capsules freed
339 	 * by the transport layer via nvmf_free_qpair.  Pending
340 	 * requests must have their I/O completion invoked via
341 	 * nvmf_abort_capsule_data.
342 	 */
343 	for (u_int i = 0; i < qp->num_commands; i++) {
344 		cmd = qp->active_commands[i];
345 		if (cmd != NULL) {
346 			if (!cmd->req->aer)
347 				printf("%s: aborted active command %p (CID %u)\n",
348 				    __func__, cmd->req, cmd->cid);
349 
350 			/* This was freed by nvmf_free_qpair. */
351 			cmd->req->nc = NULL;
352 			nvmf_abort_request(cmd->req, cmd->cid);
353 			nvmf_free_request(cmd->req);
354 			free(cmd, M_NVMF);
355 		}
356 	}
357 	while (!STAILQ_EMPTY(&qp->pending_requests)) {
358 		req = STAILQ_FIRST(&qp->pending_requests);
359 		STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
360 		if (!req->aer)
361 			printf("%s: aborted pending command %p\n", __func__,
362 			    req);
363 		nvmf_abort_capsule_data(req->nc, ECONNABORTED);
364 		nvmf_abort_request(req, 0);
365 		nvmf_free_request(req);
366 	}
367 
368 	mtx_lock(&qp->lock);
369 	qp->shutting_down = false;
370 	mtx_unlock(&qp->lock);
371 	wakeup(qp);
372 }
373 
374 void
375 nvmf_destroy_qp(struct nvmf_host_qpair *qp)
376 {
377 	struct nvmf_host_command *cmd, *ncmd;
378 
379 	nvmf_shutdown_qp(qp);
380 	(void)sysctl_ctx_free(&qp->sysctl_ctx);
381 
382 	TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
383 		TAILQ_REMOVE(&qp->free_commands, cmd, link);
384 		free(cmd, M_NVMF);
385 	}
386 	free(qp->active_commands, M_NVMF);
387 	mtx_destroy(&qp->lock);
388 	free(qp, M_NVMF);
389 }
390 
391 void
392 nvmf_submit_request(struct nvmf_request *req)
393 {
394 	struct nvmf_host_qpair *qp;
395 	struct nvmf_host_command *cmd;
396 
397 	qp = req->qp;
398 	mtx_lock(&qp->lock);
399 	if (qp->qp == NULL) {
400 		mtx_unlock(&qp->lock);
401 		printf("%s: aborted pending command %p\n", __func__, req);
402 		nvmf_abort_capsule_data(req->nc, ECONNABORTED);
403 		nvmf_abort_request(req, 0);
404 		nvmf_free_request(req);
405 		return;
406 	}
407 	cmd = TAILQ_FIRST(&qp->free_commands);
408 	if (cmd == NULL) {
409 		/*
410 		 * Queue this request.  Will be sent after enough
411 		 * in-flight requests have completed.
412 		 */
413 		STAILQ_INSERT_TAIL(&qp->pending_requests, req, link);
414 		mtx_unlock(&qp->lock);
415 		return;
416 	}
417 
418 	TAILQ_REMOVE(&qp->free_commands, cmd, link);
419 	KASSERT(qp->active_commands[cmd->cid] == NULL,
420 	    ("%s: CID already busy", __func__));
421 	qp->active_commands[cmd->cid] = cmd;
422 	cmd->req = req;
423 	qp->submitted++;
424 	mtx_unlock(&qp->lock);
425 	nvmf_dispatch_command(qp, cmd);
426 }
427