1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/types.h>
9 #include <sys/bus.h>
10 #include <sys/lock.h>
11 #include <sys/malloc.h>
12 #include <sys/mutex.h>
13 #include <sys/sysctl.h>
14 #include <dev/nvme/nvme.h>
15 #include <dev/nvmf/nvmf.h>
16 #include <dev/nvmf/nvmf_transport.h>
17 #include <dev/nvmf/host/nvmf_var.h>
18
19 struct nvmf_host_command {
20 struct nvmf_request *req;
21 TAILQ_ENTRY(nvmf_host_command) link;
22 uint16_t cid;
23 };
24
25 struct nvmf_host_qpair {
26 struct nvmf_softc *sc;
27 struct nvmf_qpair *qp;
28
29 bool sq_flow_control;
30 bool shutting_down;
31 u_int allocating;
32 u_int num_commands;
33 uint16_t sqhd;
34 uint16_t sqtail;
35 uint64_t submitted;
36
37 struct mtx lock;
38
39 TAILQ_HEAD(, nvmf_host_command) free_commands;
40 STAILQ_HEAD(, nvmf_request) pending_requests;
41
42 /* Indexed by cid. */
43 struct nvmf_host_command **active_commands;
44
45 char name[16];
46 struct sysctl_ctx_list sysctl_ctx;
47 };
48
49 struct nvmf_request *
nvmf_allocate_request(struct nvmf_host_qpair * qp,void * sqe,nvmf_request_complete_t * cb,void * cb_arg,int how)50 nvmf_allocate_request(struct nvmf_host_qpair *qp, void *sqe,
51 nvmf_request_complete_t *cb, void *cb_arg, int how)
52 {
53 struct nvmf_request *req;
54 struct nvmf_qpair *nq;
55
56 KASSERT(how == M_WAITOK || how == M_NOWAIT,
57 ("%s: invalid how", __func__));
58
59 req = malloc(sizeof(*req), M_NVMF, how | M_ZERO);
60 if (req == NULL)
61 return (NULL);
62
63 mtx_lock(&qp->lock);
64 nq = qp->qp;
65 if (nq == NULL) {
66 mtx_unlock(&qp->lock);
67 free(req, M_NVMF);
68 return (NULL);
69 }
70 qp->allocating++;
71 MPASS(qp->allocating != 0);
72 mtx_unlock(&qp->lock);
73
74 req->qp = qp;
75 req->cb = cb;
76 req->cb_arg = cb_arg;
77 req->nc = nvmf_allocate_command(nq, sqe, how);
78 if (req->nc == NULL) {
79 free(req, M_NVMF);
80 req = NULL;
81 }
82
83 mtx_lock(&qp->lock);
84 qp->allocating--;
85 if (qp->allocating == 0 && qp->shutting_down)
86 wakeup(qp);
87 mtx_unlock(&qp->lock);
88
89 return (req);
90 }
91
92 static void
nvmf_abort_request(struct nvmf_request * req,uint16_t cid)93 nvmf_abort_request(struct nvmf_request *req, uint16_t cid)
94 {
95 struct nvme_completion cqe;
96
97 memset(&cqe, 0, sizeof(cqe));
98 cqe.cid = cid;
99 cqe.status = htole16(NVMEF(NVME_STATUS_SCT, NVME_SCT_PATH_RELATED) |
100 NVMEF(NVME_STATUS_SC, NVME_SC_COMMAND_ABORTED_BY_HOST));
101 req->cb(req->cb_arg, &cqe);
102 }
103
104 void
nvmf_free_request(struct nvmf_request * req)105 nvmf_free_request(struct nvmf_request *req)
106 {
107 if (req->nc != NULL)
108 nvmf_free_capsule(req->nc);
109 free(req, M_NVMF);
110 }
111
112 static void
nvmf_dispatch_command(struct nvmf_host_qpair * qp,struct nvmf_host_command * cmd)113 nvmf_dispatch_command(struct nvmf_host_qpair *qp, struct nvmf_host_command *cmd)
114 {
115 struct nvmf_softc *sc = qp->sc;
116 struct nvme_command *sqe;
117 struct nvmf_capsule *nc;
118 uint16_t new_sqtail;
119 int error;
120
121 mtx_assert(&qp->lock, MA_OWNED);
122
123 qp->submitted++;
124
125 /*
126 * Update flow control tracking. This is just a sanity check.
127 * Since num_commands == qsize - 1, there can never be too
128 * many commands in flight.
129 */
130 new_sqtail = (qp->sqtail + 1) % (qp->num_commands + 1);
131 KASSERT(new_sqtail != qp->sqhd, ("%s: qp %p is full", __func__, qp));
132 qp->sqtail = new_sqtail;
133 mtx_unlock(&qp->lock);
134
135 nc = cmd->req->nc;
136 sqe = nvmf_capsule_sqe(nc);
137
138 /*
139 * NB: Don't bother byte-swapping the cid so that receive
140 * doesn't have to swap.
141 */
142 sqe->cid = cmd->cid;
143
144 error = nvmf_transmit_capsule(nc);
145 if (error != 0) {
146 device_printf(sc->dev,
147 "failed to transmit capsule: %d, disconnecting\n", error);
148 nvmf_disconnect(sc);
149 return;
150 }
151
152 if (sc->ka_traffic)
153 atomic_store_int(&sc->ka_active_tx_traffic, 1);
154 }
155
156 static void
nvmf_qp_error(void * arg,int error)157 nvmf_qp_error(void *arg, int error)
158 {
159 struct nvmf_host_qpair *qp = arg;
160 struct nvmf_softc *sc = qp->sc;
161
162 /* Ignore simple close of queue pairs during shutdown. */
163 if (!(sc->detaching && error == 0))
164 device_printf(sc->dev, "error %d on %s, disconnecting\n", error,
165 qp->name);
166 nvmf_disconnect(sc);
167 }
168
169 static void
nvmf_receive_capsule(void * arg,struct nvmf_capsule * nc)170 nvmf_receive_capsule(void *arg, struct nvmf_capsule *nc)
171 {
172 struct nvmf_host_qpair *qp = arg;
173 struct nvmf_softc *sc = qp->sc;
174 struct nvmf_host_command *cmd;
175 struct nvmf_request *req;
176 const struct nvme_completion *cqe;
177 uint16_t cid;
178
179 cqe = nvmf_capsule_cqe(nc);
180
181 if (sc->ka_traffic)
182 atomic_store_int(&sc->ka_active_rx_traffic, 1);
183
184 /*
185 * NB: Don't bother byte-swapping the cid as transmit doesn't
186 * swap either.
187 */
188 cid = cqe->cid;
189
190 if (cid > qp->num_commands) {
191 device_printf(sc->dev,
192 "received invalid CID %u, disconnecting\n", cid);
193 nvmf_disconnect(sc);
194 nvmf_free_capsule(nc);
195 return;
196 }
197
198 /* Update flow control tracking. */
199 mtx_lock(&qp->lock);
200 if (qp->sq_flow_control) {
201 if (nvmf_sqhd_valid(nc))
202 qp->sqhd = le16toh(cqe->sqhd);
203 } else {
204 /*
205 * If SQ FC is disabled, just advance the head for
206 * each response capsule received.
207 */
208 qp->sqhd = (qp->sqhd + 1) % (qp->num_commands + 1);
209 }
210
211 /*
212 * If the queue has been shutdown due to an error, silently
213 * drop the response.
214 */
215 if (qp->qp == NULL) {
216 device_printf(sc->dev,
217 "received completion for CID %u on shutdown %s\n", cid,
218 qp->name);
219 mtx_unlock(&qp->lock);
220 nvmf_free_capsule(nc);
221 return;
222 }
223
224 cmd = qp->active_commands[cid];
225 if (cmd == NULL) {
226 mtx_unlock(&qp->lock);
227 device_printf(sc->dev,
228 "received completion for inactive CID %u, disconnecting\n",
229 cid);
230 nvmf_disconnect(sc);
231 nvmf_free_capsule(nc);
232 return;
233 }
234
235 KASSERT(cmd->cid == cid, ("%s: CID mismatch", __func__));
236 req = cmd->req;
237 cmd->req = NULL;
238 if (STAILQ_EMPTY(&qp->pending_requests)) {
239 qp->active_commands[cid] = NULL;
240 TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link);
241 mtx_unlock(&qp->lock);
242 } else {
243 cmd->req = STAILQ_FIRST(&qp->pending_requests);
244 STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
245 nvmf_dispatch_command(qp, cmd);
246 }
247
248 req->cb(req->cb_arg, cqe);
249 nvmf_free_capsule(nc);
250 nvmf_free_request(req);
251 }
252
253 static void
nvmf_sysctls_qp(struct nvmf_softc * sc,struct nvmf_host_qpair * qp,bool admin,u_int qid)254 nvmf_sysctls_qp(struct nvmf_softc *sc, struct nvmf_host_qpair *qp,
255 bool admin, u_int qid)
256 {
257 struct sysctl_ctx_list *ctx = &qp->sysctl_ctx;
258 struct sysctl_oid *oid;
259 struct sysctl_oid_list *list;
260 char name[8];
261
262 if (admin) {
263 oid = SYSCTL_ADD_NODE(ctx,
264 SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO,
265 "adminq", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Admin Queue");
266 } else {
267 snprintf(name, sizeof(name), "%u", qid);
268 oid = SYSCTL_ADD_NODE(ctx, sc->ioq_oid_list, OID_AUTO, name,
269 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queue");
270 }
271 list = SYSCTL_CHILDREN(oid);
272
273 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "num_entries", CTLFLAG_RD,
274 NULL, qp->num_commands + 1, "Number of entries in queue");
275 SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_head", CTLFLAG_RD, &qp->sqhd,
276 0, "Current head of submission queue (as observed by driver)");
277 SYSCTL_ADD_U16(ctx, list, OID_AUTO, "sq_tail", CTLFLAG_RD, &qp->sqtail,
278 0, "Current tail of submission queue (as observed by driver)");
279 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "num_cmds", CTLFLAG_RD,
280 &qp->submitted, 0, "Number of commands submitted");
281 }
282
283 struct nvmf_host_qpair *
nvmf_init_qp(struct nvmf_softc * sc,enum nvmf_trtype trtype,struct nvmf_handoff_qpair_params * handoff,const char * name,u_int qid)284 nvmf_init_qp(struct nvmf_softc *sc, enum nvmf_trtype trtype,
285 struct nvmf_handoff_qpair_params *handoff, const char *name, u_int qid)
286 {
287 struct nvmf_host_command *cmd, *ncmd;
288 struct nvmf_host_qpair *qp;
289 u_int i;
290
291 qp = malloc(sizeof(*qp), M_NVMF, M_WAITOK | M_ZERO);
292 qp->sc = sc;
293 qp->sq_flow_control = handoff->sq_flow_control;
294 qp->sqhd = handoff->sqhd;
295 qp->sqtail = handoff->sqtail;
296 strlcpy(qp->name, name, sizeof(qp->name));
297 mtx_init(&qp->lock, "nvmf qp", NULL, MTX_DEF);
298 (void)sysctl_ctx_init(&qp->sysctl_ctx);
299
300 /*
301 * Allocate a spare command slot for each pending AER command
302 * on the admin queue.
303 */
304 qp->num_commands = handoff->qsize - 1;
305 if (handoff->admin)
306 qp->num_commands += sc->num_aer;
307
308 qp->active_commands = malloc(sizeof(*qp->active_commands) *
309 qp->num_commands, M_NVMF, M_WAITOK | M_ZERO);
310 TAILQ_INIT(&qp->free_commands);
311 for (i = 0; i < qp->num_commands; i++) {
312 cmd = malloc(sizeof(*cmd), M_NVMF, M_WAITOK | M_ZERO);
313 cmd->cid = i;
314 TAILQ_INSERT_TAIL(&qp->free_commands, cmd, link);
315 }
316 STAILQ_INIT(&qp->pending_requests);
317
318 qp->qp = nvmf_allocate_qpair(trtype, false, handoff, nvmf_qp_error,
319 qp, nvmf_receive_capsule, qp);
320 if (qp->qp == NULL) {
321 (void)sysctl_ctx_free(&qp->sysctl_ctx);
322 TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
323 TAILQ_REMOVE(&qp->free_commands, cmd, link);
324 free(cmd, M_NVMF);
325 }
326 free(qp->active_commands, M_NVMF);
327 mtx_destroy(&qp->lock);
328 free(qp, M_NVMF);
329 return (NULL);
330 }
331
332 nvmf_sysctls_qp(sc, qp, handoff->admin, qid);
333
334 return (qp);
335 }
336
337 void
nvmf_shutdown_qp(struct nvmf_host_qpair * qp)338 nvmf_shutdown_qp(struct nvmf_host_qpair *qp)
339 {
340 struct nvmf_host_command *cmd;
341 struct nvmf_request *req;
342 struct nvmf_qpair *nq;
343
344 mtx_lock(&qp->lock);
345 nq = qp->qp;
346 qp->qp = NULL;
347
348 if (nq == NULL) {
349 while (qp->shutting_down)
350 mtx_sleep(qp, &qp->lock, 0, "nvmfqpsh", 0);
351 mtx_unlock(&qp->lock);
352 return;
353 }
354 qp->shutting_down = true;
355 while (qp->allocating != 0)
356 mtx_sleep(qp, &qp->lock, 0, "nvmfqpqu", 0);
357 mtx_unlock(&qp->lock);
358
359 nvmf_free_qpair(nq);
360
361 /*
362 * Abort outstanding requests. Active requests will have
363 * their I/O completions invoked and associated capsules freed
364 * by the transport layer via nvmf_free_qpair. Pending
365 * requests must have their I/O completion invoked via
366 * nvmf_abort_capsule_data.
367 */
368 for (u_int i = 0; i < qp->num_commands; i++) {
369 cmd = qp->active_commands[i];
370 if (cmd != NULL) {
371 if (!cmd->req->aer)
372 printf("%s: aborted active command %p (CID %u)\n",
373 __func__, cmd->req, cmd->cid);
374
375 /* This was freed by nvmf_free_qpair. */
376 cmd->req->nc = NULL;
377 nvmf_abort_request(cmd->req, cmd->cid);
378 nvmf_free_request(cmd->req);
379 free(cmd, M_NVMF);
380 }
381 }
382 while (!STAILQ_EMPTY(&qp->pending_requests)) {
383 req = STAILQ_FIRST(&qp->pending_requests);
384 STAILQ_REMOVE_HEAD(&qp->pending_requests, link);
385 if (!req->aer)
386 printf("%s: aborted pending command %p\n", __func__,
387 req);
388 nvmf_abort_capsule_data(req->nc, ECONNABORTED);
389 nvmf_abort_request(req, 0);
390 nvmf_free_request(req);
391 }
392
393 mtx_lock(&qp->lock);
394 qp->shutting_down = false;
395 mtx_unlock(&qp->lock);
396 wakeup(qp);
397 }
398
399 void
nvmf_destroy_qp(struct nvmf_host_qpair * qp)400 nvmf_destroy_qp(struct nvmf_host_qpair *qp)
401 {
402 struct nvmf_host_command *cmd, *ncmd;
403
404 nvmf_shutdown_qp(qp);
405 (void)sysctl_ctx_free(&qp->sysctl_ctx);
406
407 TAILQ_FOREACH_SAFE(cmd, &qp->free_commands, link, ncmd) {
408 TAILQ_REMOVE(&qp->free_commands, cmd, link);
409 free(cmd, M_NVMF);
410 }
411 free(qp->active_commands, M_NVMF);
412 mtx_destroy(&qp->lock);
413 free(qp, M_NVMF);
414 }
415
416 void
nvmf_submit_request(struct nvmf_request * req)417 nvmf_submit_request(struct nvmf_request *req)
418 {
419 struct nvmf_host_qpair *qp;
420 struct nvmf_host_command *cmd;
421
422 qp = req->qp;
423 mtx_lock(&qp->lock);
424 if (qp->qp == NULL) {
425 mtx_unlock(&qp->lock);
426 printf("%s: aborted pending command %p\n", __func__, req);
427 nvmf_abort_capsule_data(req->nc, ECONNABORTED);
428 nvmf_abort_request(req, 0);
429 nvmf_free_request(req);
430 return;
431 }
432 cmd = TAILQ_FIRST(&qp->free_commands);
433 if (cmd == NULL) {
434 /*
435 * Queue this request. Will be sent after enough
436 * in-flight requests have completed.
437 */
438 STAILQ_INSERT_TAIL(&qp->pending_requests, req, link);
439 mtx_unlock(&qp->lock);
440 return;
441 }
442
443 TAILQ_REMOVE(&qp->free_commands, cmd, link);
444 KASSERT(qp->active_commands[cmd->cid] == NULL,
445 ("%s: CID already busy", __func__));
446 qp->active_commands[cmd->cid] = cmd;
447 cmd->req = req;
448 nvmf_dispatch_command(qp, cmd);
449 }
450