1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/param.h>
9 #include <sys/callout.h>
10 #include <sys/kernel.h>
11 #include <sys/lock.h>
12 #include <sys/malloc.h>
13 #include <sys/mbuf.h>
14 #include <sys/memdesc.h>
15 #include <sys/mutex.h>
16 #include <sys/sbuf.h>
17 #include <sys/taskqueue.h>
18
19 #include <dev/nvmf/nvmf_transport.h>
20 #include <dev/nvmf/controller/nvmft_subr.h>
21 #include <dev/nvmf/controller/nvmft_var.h>
22
23 static void nvmft_controller_shutdown(void *arg, int pending);
24 static void nvmft_controller_terminate(void *arg, int pending);
25
26 int
nvmft_printf(struct nvmft_controller * ctrlr,const char * fmt,...)27 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...)
28 {
29 char buf[128];
30 struct sbuf sb;
31 va_list ap;
32 size_t retval;
33
34 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
35 sbuf_set_drain(&sb, sbuf_printf_drain, &retval);
36
37 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid);
38
39 va_start(ap, fmt);
40 sbuf_vprintf(&sb, fmt, ap);
41 va_end(ap);
42
43 sbuf_finish(&sb);
44 sbuf_delete(&sb);
45
46 return (retval);
47 }
48
49 static struct nvmft_controller *
nvmft_controller_alloc(struct nvmft_port * np,uint16_t cntlid,const struct nvmf_fabric_connect_data * data)50 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid,
51 const struct nvmf_fabric_connect_data *data)
52 {
53 struct nvmft_controller *ctrlr;
54
55 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO);
56 ctrlr->cntlid = cntlid;
57 ctrlr->np = np;
58 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF);
59 callout_init(&ctrlr->ka_timer, 1);
60 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr);
61 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0,
62 nvmft_controller_terminate, ctrlr);
63
64 ctrlr->cdata = np->cdata;
65 ctrlr->cdata.ctrlr_id = htole16(cntlid);
66 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid));
67 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn));
68 ctrlr->hip.power_cycles[0] = 1;
69 ctrlr->create_time = sbinuptime();
70
71 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT,
72 M_WAITOK | M_ZERO);
73
74 return (ctrlr);
75 }
76
77 static void
nvmft_controller_free(struct nvmft_controller * ctrlr)78 nvmft_controller_free(struct nvmft_controller *ctrlr)
79 {
80 mtx_destroy(&ctrlr->lock);
81 MPASS(ctrlr->io_qpairs == NULL);
82 free(ctrlr->changed_ns, M_NVMFT);
83 free(ctrlr, M_NVMFT);
84 }
85
86 static void
nvmft_keep_alive_timer(void * arg)87 nvmft_keep_alive_timer(void *arg)
88 {
89 struct nvmft_controller *ctrlr = arg;
90 int traffic;
91
92 if (ctrlr->shutdown)
93 return;
94
95 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic);
96 if (traffic == 0) {
97 nvmft_printf(ctrlr,
98 "disconnecting due to KeepAlive timeout\n");
99 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT);
100 return;
101 }
102
103 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK);
104 }
105
106 int
nvmft_handoff_admin_queue(struct nvmft_port * np,enum nvmf_trtype trtype,const nvlist_t * params,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)107 nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
108 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
109 const struct nvmf_fabric_connect_data *data)
110 {
111 struct nvmft_controller *ctrlr;
112 struct nvmft_qpair *qp;
113 uint32_t kato;
114 int cntlid;
115
116 if (cmd->qid != htole16(0))
117 return (EINVAL);
118
119 qp = nvmft_qpair_init(trtype, params, 0, "admin queue");
120 if (qp == NULL) {
121 printf("NVMFT: Failed to setup admin queue from %.*s\n",
122 (int)sizeof(data->hostnqn), data->hostnqn);
123 return (ENXIO);
124 }
125
126 mtx_lock(&np->lock);
127 cntlid = alloc_unr(np->ids);
128 if (cntlid == -1) {
129 mtx_unlock(&np->lock);
130 printf("NVMFT: Unable to allocate controller for %.*s\n",
131 (int)sizeof(data->hostnqn), data->hostnqn);
132 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC,
133 NVMF_FABRIC_SC_INVALID_HOST);
134 nvmft_qpair_destroy(qp);
135 return (ENOMEM);
136 }
137
138 #ifdef INVARIANTS
139 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
140 KASSERT(ctrlr->cntlid != cntlid,
141 ("%s: duplicate controllers with id %d", __func__, cntlid));
142 }
143 #endif
144 mtx_unlock(&np->lock);
145
146 ctrlr = nvmft_controller_alloc(np, cntlid, data);
147
148 mtx_lock(&np->lock);
149 if (!np->online) {
150 mtx_unlock(&np->lock);
151 nvmft_controller_free(ctrlr);
152 free_unr(np->ids, cntlid);
153 nvmft_qpair_destroy(qp);
154 return (ENXIO);
155 }
156 nvmft_port_ref(np);
157 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link);
158
159 nvmft_printf(ctrlr, "associated with %.*s\n",
160 (int)sizeof(data->hostnqn), data->hostnqn);
161 ctrlr->admin = qp;
162 ctrlr->trtype = trtype;
163
164 /*
165 * The spec requires a non-zero KeepAlive timer, but allow a
166 * zero KATO value to match Linux.
167 */
168 kato = le32toh(cmd->kato);
169 if (kato != 0) {
170 /*
171 * Round up to 1 second matching granularity
172 * advertised in cdata.
173 */
174 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000));
175 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
176 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK);
177 }
178 mtx_unlock(&np->lock);
179
180 nvmft_finish_accept(qp, cmd, ctrlr);
181
182 return (0);
183 }
184
185 int
nvmft_handoff_io_queue(struct nvmft_port * np,enum nvmf_trtype trtype,const nvlist_t * params,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)186 nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
187 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
188 const struct nvmf_fabric_connect_data *data)
189 {
190 struct nvmft_controller *ctrlr;
191 struct nvmft_qpair *qp;
192 char name[16];
193 uint16_t cntlid, qid;
194
195 qid = le16toh(cmd->qid);
196 if (qid == 0)
197 return (EINVAL);
198 cntlid = le16toh(data->cntlid);
199
200 snprintf(name, sizeof(name), "I/O queue %u", qid);
201 qp = nvmft_qpair_init(trtype, params, qid, name);
202 if (qp == NULL) {
203 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid,
204 (int)sizeof(data->hostnqn), data->hostnqn);
205 return (ENXIO);
206 }
207
208 mtx_lock(&np->lock);
209 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
210 if (ctrlr->cntlid == cntlid)
211 break;
212 }
213 if (ctrlr == NULL) {
214 mtx_unlock(&np->lock);
215 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n",
216 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn),
217 data->hostnqn);
218 nvmft_connect_invalid_parameters(qp, cmd, true,
219 offsetof(struct nvmf_fabric_connect_data, cntlid));
220 nvmft_qpair_destroy(qp);
221 return (ENOENT);
222 }
223
224 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) {
225 mtx_unlock(&np->lock);
226 nvmft_printf(ctrlr,
227 "hostid mismatch for I/O queue %u from %.*s\n", qid,
228 (int)sizeof(data->hostnqn), data->hostnqn);
229 nvmft_connect_invalid_parameters(qp, cmd, true,
230 offsetof(struct nvmf_fabric_connect_data, hostid));
231 nvmft_qpair_destroy(qp);
232 return (EINVAL);
233 }
234 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) {
235 mtx_unlock(&np->lock);
236 nvmft_printf(ctrlr,
237 "hostnqn mismatch for I/O queue %u from %.*s\n", qid,
238 (int)sizeof(data->hostnqn), data->hostnqn);
239 nvmft_connect_invalid_parameters(qp, cmd, true,
240 offsetof(struct nvmf_fabric_connect_data, hostnqn));
241 nvmft_qpair_destroy(qp);
242 return (EINVAL);
243 }
244
245 /* XXX: Require trtype == ctrlr->trtype? */
246
247 mtx_lock(&ctrlr->lock);
248 if (ctrlr->shutdown) {
249 mtx_unlock(&ctrlr->lock);
250 mtx_unlock(&np->lock);
251 nvmft_printf(ctrlr,
252 "attempt to create I/O queue %u on disabled controller from %.*s\n",
253 qid, (int)sizeof(data->hostnqn), data->hostnqn);
254 nvmft_connect_invalid_parameters(qp, cmd, true,
255 offsetof(struct nvmf_fabric_connect_data, cntlid));
256 nvmft_qpair_destroy(qp);
257 return (EINVAL);
258 }
259 if (ctrlr->num_io_queues == 0) {
260 mtx_unlock(&ctrlr->lock);
261 mtx_unlock(&np->lock);
262 nvmft_printf(ctrlr,
263 "attempt to create I/O queue %u without enabled queues from %.*s\n",
264 qid, (int)sizeof(data->hostnqn), data->hostnqn);
265 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
266 NVME_SC_COMMAND_SEQUENCE_ERROR);
267 nvmft_qpair_destroy(qp);
268 return (EINVAL);
269 }
270 if (cmd->qid > ctrlr->num_io_queues) {
271 mtx_unlock(&ctrlr->lock);
272 mtx_unlock(&np->lock);
273 nvmft_printf(ctrlr,
274 "attempt to create invalid I/O queue %u from %.*s\n", qid,
275 (int)sizeof(data->hostnqn), data->hostnqn);
276 nvmft_connect_invalid_parameters(qp, cmd, false,
277 offsetof(struct nvmf_fabric_connect_cmd, qid));
278 nvmft_qpair_destroy(qp);
279 return (EINVAL);
280 }
281 if (ctrlr->io_qpairs[qid - 1].qp != NULL) {
282 mtx_unlock(&ctrlr->lock);
283 mtx_unlock(&np->lock);
284 nvmft_printf(ctrlr,
285 "attempt to re-create I/O queue %u from %.*s\n", qid,
286 (int)sizeof(data->hostnqn), data->hostnqn);
287 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
288 NVME_SC_COMMAND_SEQUENCE_ERROR);
289 nvmft_qpair_destroy(qp);
290 return (EINVAL);
291 }
292
293 ctrlr->io_qpairs[qid - 1].qp = qp;
294 mtx_unlock(&ctrlr->lock);
295 mtx_unlock(&np->lock);
296 nvmft_finish_accept(qp, cmd, ctrlr);
297
298 return (0);
299 }
300
301 static void
nvmft_controller_shutdown(void * arg,int pending)302 nvmft_controller_shutdown(void *arg, int pending)
303 {
304 struct nvmft_controller *ctrlr = arg;
305
306 MPASS(pending == 1);
307
308 /*
309 * Shutdown all I/O queues to terminate pending datamoves and
310 * stop receiving new commands.
311 */
312 mtx_lock(&ctrlr->lock);
313 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
314 if (ctrlr->io_qpairs[i].qp != NULL) {
315 ctrlr->io_qpairs[i].shutdown = true;
316 mtx_unlock(&ctrlr->lock);
317 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp);
318 mtx_lock(&ctrlr->lock);
319 }
320 }
321 mtx_unlock(&ctrlr->lock);
322
323 /* Terminate active CTL commands. */
324 nvmft_terminate_commands(ctrlr);
325
326 /* Wait for all pending CTL commands to complete. */
327 mtx_lock(&ctrlr->lock);
328 while (ctrlr->pending_commands != 0)
329 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh",
330 hz / 100);
331 mtx_unlock(&ctrlr->lock);
332
333 /* Delete all of the I/O queues. */
334 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
335 if (ctrlr->io_qpairs[i].qp != NULL)
336 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp);
337 }
338 free(ctrlr->io_qpairs, M_NVMFT);
339 ctrlr->io_qpairs = NULL;
340
341 mtx_lock(&ctrlr->lock);
342 ctrlr->num_io_queues = 0;
343
344 /* Mark shutdown complete. */
345 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) {
346 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
347 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE);
348 }
349
350 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) {
351 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY);
352 ctrlr->shutdown = false;
353 }
354 mtx_unlock(&ctrlr->lock);
355
356 /*
357 * If the admin queue was closed while shutting down or a
358 * fatal controller error has occurred, terminate the
359 * association immediately, otherwise wait up to 2 minutes
360 * (NVMe-over-Fabrics 1.1 4.6).
361 */
362 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0)
363 nvmft_controller_terminate(ctrlr, 0);
364 else
365 taskqueue_enqueue_timeout(taskqueue_thread,
366 &ctrlr->terminate_task, hz * 60 * 2);
367 }
368
369 static void
nvmft_controller_terminate(void * arg,int pending)370 nvmft_controller_terminate(void *arg, int pending)
371 {
372 struct nvmft_controller *ctrlr = arg;
373 struct nvmft_port *np;
374 bool wakeup_np;
375
376 /* If the controller has been re-enabled, nothing to do. */
377 mtx_lock(&ctrlr->lock);
378 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) {
379 mtx_unlock(&ctrlr->lock);
380
381 if (ctrlr->ka_sbt != 0)
382 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
383 C_HARDCLOCK);
384 return;
385 }
386
387 /* Disable updates to CC while destroying admin qpair. */
388 ctrlr->shutdown = true;
389 mtx_unlock(&ctrlr->lock);
390
391 nvmft_qpair_destroy(ctrlr->admin);
392
393 /* Remove association (CNTLID). */
394 np = ctrlr->np;
395 mtx_lock(&np->lock);
396 TAILQ_REMOVE(&np->controllers, ctrlr, link);
397 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers));
398 mtx_unlock(&np->lock);
399 free_unr(np->ids, ctrlr->cntlid);
400 if (wakeup_np)
401 wakeup(np);
402
403 callout_drain(&ctrlr->ka_timer);
404
405 nvmft_printf(ctrlr, "association terminated\n");
406 nvmft_controller_free(ctrlr);
407 nvmft_port_rele(np);
408 }
409
410 void
nvmft_controller_error(struct nvmft_controller * ctrlr,struct nvmft_qpair * qp,int error)411 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp,
412 int error)
413 {
414 /*
415 * If a queue pair is closed, that isn't an error per se.
416 * That just means additional commands cannot be received on
417 * that queue pair.
418 *
419 * If the admin queue pair is closed while idle or while
420 * shutting down, terminate the association immediately.
421 *
422 * If an I/O queue pair is closed, just ignore it.
423 */
424 if (error == 0) {
425 if (qp != ctrlr->admin)
426 return;
427
428 mtx_lock(&ctrlr->lock);
429 if (ctrlr->shutdown) {
430 ctrlr->admin_closed = true;
431 mtx_unlock(&ctrlr->lock);
432 return;
433 }
434
435 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) {
436 MPASS(ctrlr->num_io_queues == 0);
437 mtx_unlock(&ctrlr->lock);
438
439 /*
440 * Ok to drop lock here since ctrlr->cc can't
441 * change if the admin queue pair has closed.
442 * This also means no new queues can be handed
443 * off, etc. Note that since there are no I/O
444 * queues, only the admin queue needs to be
445 * destroyed, so it is safe to skip
446 * nvmft_controller_shutdown and just schedule
447 * nvmft_controller_terminate. Note that we
448 * cannot call nvmft_controller_terminate from
449 * here directly as this is called from the
450 * transport layer and freeing the admin qpair
451 * might deadlock waiting for the current
452 * thread to exit.
453 */
454 if (taskqueue_cancel_timeout(taskqueue_thread,
455 &ctrlr->terminate_task, NULL) == 0)
456 taskqueue_enqueue_timeout(taskqueue_thread,
457 &ctrlr->terminate_task, 0);
458 return;
459 }
460
461 /*
462 * Treat closing of the admin queue pair while enabled
463 * as a transport error. Note that the admin queue
464 * pair has been closed.
465 */
466 ctrlr->admin_closed = true;
467 } else
468 mtx_lock(&ctrlr->lock);
469
470 /* Ignore transport errors if we are already shutting down. */
471 if (ctrlr->shutdown) {
472 mtx_unlock(&ctrlr->lock);
473 return;
474 }
475
476 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1);
477 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
478 ctrlr->shutdown = true;
479 mtx_unlock(&ctrlr->lock);
480
481 callout_stop(&ctrlr->ka_timer);
482 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
483 }
484
485 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */
486 static struct mbuf *
m_getml(size_t len,int how)487 m_getml(size_t len, int how)
488 {
489 struct mbuf *m, *n;
490
491 m = m_getm2(NULL, len, how, MT_DATA, 0);
492 if (m == NULL)
493 return (NULL);
494 for (n = m; len > 0; n = n->m_next) {
495 n->m_len = M_SIZE(n);
496 if (n->m_len >= len) {
497 n->m_len = len;
498 MPASS(n->m_next == NULL);
499 }
500 len -= n->m_len;
501 }
502 return (m);
503 }
504
505 static void
m_zero(struct mbuf * m,u_int offset,u_int len)506 m_zero(struct mbuf *m, u_int offset, u_int len)
507 {
508 u_int todo;
509
510 if (len == 0)
511 return;
512
513 while (m->m_len <= offset) {
514 offset -= m->m_len;
515 m = m->m_next;
516 }
517
518 todo = m->m_len - offset;
519 if (todo > len)
520 todo = len;
521 memset(mtodo(m, offset), 0, todo);
522 m = m->m_next;
523 len -= todo;
524
525 while (len > 0) {
526 todo = m->m_len;
527 if (todo > len)
528 todo = len;
529 memset(mtod(m, void *), 0, todo);
530 m = m->m_next;
531 len -= todo;
532 }
533 }
534
535 static void
handle_get_log_page(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)536 handle_get_log_page(struct nvmft_controller *ctrlr,
537 struct nvmf_capsule *nc, const struct nvme_command *cmd)
538 {
539 struct mbuf *m;
540 uint64_t offset;
541 uint32_t numd;
542 size_t len, todo;
543 u_int status;
544 uint8_t lid;
545 bool rae;
546
547 lid = le32toh(cmd->cdw10) & 0xff;
548 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0;
549 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
550 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
551
552 if (offset % 3 != 0) {
553 status = NVME_SC_INVALID_FIELD;
554 goto done;
555 }
556
557 len = (numd + 1) * 4;
558
559 switch (lid) {
560 case NVME_LOG_ERROR:
561 todo = 0;
562
563 m = m_getml(len, M_WAITOK);
564 if (todo != len)
565 m_zero(m, todo, len - todo);
566 status = nvmf_send_controller_data(nc, 0, m, len);
567 MPASS(status != NVMF_MORE);
568 break;
569 case NVME_LOG_HEALTH_INFORMATION:
570 {
571 struct nvme_health_information_page hip;
572
573 if (offset >= sizeof(hip)) {
574 status = NVME_SC_INVALID_FIELD;
575 goto done;
576 }
577 todo = sizeof(hip) - offset;
578 if (todo > len)
579 todo = len;
580
581 mtx_lock(&ctrlr->lock);
582 hip = ctrlr->hip;
583 hip.controller_busy_time[0] =
584 sbintime_getsec(ctrlr->busy_total) / 60;
585 hip.power_on_hours[0] =
586 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600;
587 mtx_unlock(&ctrlr->lock);
588
589 m = m_getml(len, M_WAITOK);
590 m_copyback(m, 0, todo, (char *)&hip + offset);
591 if (todo != len)
592 m_zero(m, todo, len - todo);
593 status = nvmf_send_controller_data(nc, 0, m, len);
594 MPASS(status != NVMF_MORE);
595 break;
596 }
597 case NVME_LOG_FIRMWARE_SLOT:
598 if (offset >= sizeof(ctrlr->np->fp)) {
599 status = NVME_SC_INVALID_FIELD;
600 goto done;
601 }
602 todo = sizeof(ctrlr->np->fp) - offset;
603 if (todo > len)
604 todo = len;
605
606 m = m_getml(len, M_WAITOK);
607 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset);
608 if (todo != len)
609 m_zero(m, todo, len - todo);
610 status = nvmf_send_controller_data(nc, 0, m, len);
611 MPASS(status != NVMF_MORE);
612 break;
613 case NVME_LOG_CHANGED_NAMESPACE:
614 if (offset >= sizeof(*ctrlr->changed_ns)) {
615 status = NVME_SC_INVALID_FIELD;
616 goto done;
617 }
618 todo = sizeof(*ctrlr->changed_ns) - offset;
619 if (todo > len)
620 todo = len;
621
622 m = m_getml(len, M_WAITOK);
623 mtx_lock(&ctrlr->lock);
624 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset);
625 if (offset == 0 && len == sizeof(*ctrlr->changed_ns))
626 memset(ctrlr->changed_ns, 0,
627 sizeof(*ctrlr->changed_ns));
628 if (!rae)
629 ctrlr->changed_ns_reported = false;
630 mtx_unlock(&ctrlr->lock);
631 if (todo != len)
632 m_zero(m, todo, len - todo);
633 status = nvmf_send_controller_data(nc, 0, m, len);
634 MPASS(status != NVMF_MORE);
635 break;
636 default:
637 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n",
638 lid);
639 status = NVME_SC_INVALID_FIELD;
640 break;
641 }
642
643 done:
644 if (status == NVMF_SUCCESS_SENT)
645 nvmft_command_completed(ctrlr->admin, nc);
646 else
647 nvmft_send_generic_error(ctrlr->admin, nc, status);
648 nvmf_free_capsule(nc);
649 }
650
651 static void
m_free_nslist(struct mbuf * m)652 m_free_nslist(struct mbuf *m)
653 {
654 free(m->m_ext.ext_arg1, M_NVMFT);
655 }
656
657 static void
handle_identify_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)658 handle_identify_command(struct nvmft_controller *ctrlr,
659 struct nvmf_capsule *nc, const struct nvme_command *cmd)
660 {
661 struct mbuf *m;
662 size_t data_len;
663 u_int status;
664 uint8_t cns;
665
666 cns = le32toh(cmd->cdw10) & 0xFF;
667 data_len = nvmf_capsule_data_len(nc);
668 if (data_len != sizeof(ctrlr->cdata)) {
669 nvmft_printf(ctrlr,
670 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len,
671 cns);
672 nvmft_send_generic_error(ctrlr->admin, nc,
673 NVME_SC_INVALID_OPCODE);
674 nvmf_free_capsule(nc);
675 return;
676 }
677
678 switch (cns) {
679 case 0: /* Namespace data. */
680 case 3: /* Namespace Identification Descriptor list. */
681 nvmft_dispatch_command(ctrlr->admin, nc, true);
682 return;
683 case 1:
684 /* Controller data. */
685 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK);
686 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata);
687 status = nvmf_send_controller_data(nc, 0, m,
688 sizeof(ctrlr->cdata));
689 MPASS(status != NVMF_MORE);
690 break;
691 case 2:
692 {
693 /* Active namespace list. */
694 struct nvme_ns_list *nslist;
695 uint32_t nsid;
696
697 nsid = le32toh(cmd->nsid);
698 if (nsid >= 0xfffffffe) {
699 status = NVME_SC_INVALID_FIELD;
700 break;
701 }
702
703 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO);
704 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist);
705 m = m_get(M_WAITOK, MT_DATA);
706 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist,
707 nslist, NULL, 0, EXT_CTL);
708 m->m_len = sizeof(*nslist);
709 status = nvmf_send_controller_data(nc, 0, m, m->m_len);
710 MPASS(status != NVMF_MORE);
711 break;
712 }
713 default:
714 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns);
715 status = NVME_SC_INVALID_FIELD;
716 break;
717 }
718
719 if (status == NVMF_SUCCESS_SENT)
720 nvmft_command_completed(ctrlr->admin, nc);
721 else
722 nvmft_send_generic_error(ctrlr->admin, nc, status);
723 nvmf_free_capsule(nc);
724 }
725
726 static void
handle_set_features(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)727 handle_set_features(struct nvmft_controller *ctrlr,
728 struct nvmf_capsule *nc, const struct nvme_command *cmd)
729 {
730 struct nvme_completion cqe;
731 uint8_t fid;
732
733 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
734 switch (fid) {
735 case NVME_FEAT_NUMBER_OF_QUEUES:
736 {
737 uint32_t num_queues;
738 struct nvmft_io_qpair *io_qpairs;
739
740 num_queues = le32toh(cmd->cdw11) & 0xffff;
741
742 /* 5.12.1.7: 65535 is invalid. */
743 if (num_queues == 65535)
744 goto error;
745
746 /* Fabrics requires the same number of SQs and CQs. */
747 if (le32toh(cmd->cdw11) >> 16 != num_queues)
748 goto error;
749
750 /* Convert to 1's based */
751 num_queues++;
752
753 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs),
754 M_NVMFT, M_WAITOK | M_ZERO);
755
756 mtx_lock(&ctrlr->lock);
757 if (ctrlr->num_io_queues != 0) {
758 mtx_unlock(&ctrlr->lock);
759 free(io_qpairs, M_NVMFT);
760 nvmft_send_generic_error(ctrlr->admin, nc,
761 NVME_SC_COMMAND_SEQUENCE_ERROR);
762 nvmf_free_capsule(nc);
763 return;
764 }
765
766 ctrlr->num_io_queues = num_queues;
767 ctrlr->io_qpairs = io_qpairs;
768 mtx_unlock(&ctrlr->lock);
769
770 nvmft_init_cqe(&cqe, nc, 0);
771 cqe.cdw0 = cmd->cdw11;
772 nvmft_send_response(ctrlr->admin, &cqe);
773 nvmf_free_capsule(nc);
774 return;
775 }
776 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
777 {
778 uint32_t aer_mask;
779
780 aer_mask = le32toh(cmd->cdw11);
781
782 /* Check for any reserved or unimplemented feature bits. */
783 if ((aer_mask & 0xffffc000) != 0)
784 goto error;
785
786 mtx_lock(&ctrlr->lock);
787 ctrlr->aer_mask = aer_mask;
788 mtx_unlock(&ctrlr->lock);
789 nvmft_send_success(ctrlr->admin, nc);
790 return;
791 }
792 default:
793 nvmft_printf(ctrlr,
794 "Unsupported feature ID %u for SET_FEATURES\n", fid);
795 goto error;
796 }
797
798 error:
799 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
800 nvmf_free_capsule(nc);
801 }
802
803 static bool
update_cc(struct nvmft_controller * ctrlr,uint32_t new_cc,bool * need_shutdown)804 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown)
805 {
806 struct nvmft_port *np = ctrlr->np;
807 uint32_t changes;
808
809 *need_shutdown = false;
810
811 mtx_lock(&ctrlr->lock);
812
813 /* Don't allow any changes while shutting down. */
814 if (ctrlr->shutdown) {
815 mtx_unlock(&ctrlr->lock);
816 return (false);
817 }
818
819 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) {
820 mtx_unlock(&ctrlr->lock);
821 return (false);
822 }
823
824 changes = ctrlr->cc ^ new_cc;
825 ctrlr->cc = new_cc;
826
827 /* Handle shutdown requests. */
828 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 &&
829 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) {
830 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
831 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING);
832 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
833 ctrlr->shutdown = true;
834 *need_shutdown = true;
835 nvmft_printf(ctrlr, "shutdown requested\n");
836 }
837
838 if (NVMEV(NVME_CC_REG_EN, changes) != 0) {
839 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) {
840 /* Controller reset. */
841 nvmft_printf(ctrlr, "reset requested\n");
842 ctrlr->shutdown = true;
843 *need_shutdown = true;
844 } else
845 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1);
846 }
847 mtx_unlock(&ctrlr->lock);
848
849 return (true);
850 }
851
852 static void
handle_property_get(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_get_cmd * pget)853 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
854 const struct nvmf_fabric_prop_get_cmd *pget)
855 {
856 struct nvmf_fabric_prop_get_rsp rsp;
857
858 nvmft_init_cqe(&rsp, nc, 0);
859
860 switch (le32toh(pget->ofst)) {
861 case NVMF_PROP_CAP:
862 if (pget->attrib.size != NVMF_PROP_SIZE_8)
863 goto error;
864 rsp.value.u64 = htole64(ctrlr->np->cap);
865 break;
866 case NVMF_PROP_VS:
867 if (pget->attrib.size != NVMF_PROP_SIZE_4)
868 goto error;
869 rsp.value.u32.low = ctrlr->cdata.ver;
870 break;
871 case NVMF_PROP_CC:
872 if (pget->attrib.size != NVMF_PROP_SIZE_4)
873 goto error;
874 rsp.value.u32.low = htole32(ctrlr->cc);
875 break;
876 case NVMF_PROP_CSTS:
877 if (pget->attrib.size != NVMF_PROP_SIZE_4)
878 goto error;
879 rsp.value.u32.low = htole32(ctrlr->csts);
880 break;
881 default:
882 goto error;
883 }
884
885 nvmft_send_response(ctrlr->admin, &rsp);
886 return;
887 error:
888 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
889 }
890
891 static void
handle_property_set(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_set_cmd * pset)892 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
893 const struct nvmf_fabric_prop_set_cmd *pset)
894 {
895 bool need_shutdown;
896
897 need_shutdown = false;
898 switch (le32toh(pset->ofst)) {
899 case NVMF_PROP_CC:
900 if (pset->attrib.size != NVMF_PROP_SIZE_4)
901 goto error;
902 if (!update_cc(ctrlr, le32toh(pset->value.u32.low),
903 &need_shutdown))
904 goto error;
905 break;
906 default:
907 goto error;
908 }
909
910 nvmft_send_success(ctrlr->admin, nc);
911 if (need_shutdown) {
912 callout_stop(&ctrlr->ka_timer);
913 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
914 }
915 return;
916 error:
917 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
918 }
919
920 static void
handle_admin_fabrics_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_cmd * fc)921 handle_admin_fabrics_command(struct nvmft_controller *ctrlr,
922 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc)
923 {
924 switch (fc->fctype) {
925 case NVMF_FABRIC_COMMAND_PROPERTY_GET:
926 handle_property_get(ctrlr, nc,
927 (const struct nvmf_fabric_prop_get_cmd *)fc);
928 break;
929 case NVMF_FABRIC_COMMAND_PROPERTY_SET:
930 handle_property_set(ctrlr, nc,
931 (const struct nvmf_fabric_prop_set_cmd *)fc);
932 break;
933 case NVMF_FABRIC_COMMAND_CONNECT:
934 nvmft_printf(ctrlr,
935 "CONNECT command on connected admin queue\n");
936 nvmft_send_generic_error(ctrlr->admin, nc,
937 NVME_SC_COMMAND_SEQUENCE_ERROR);
938 break;
939 case NVMF_FABRIC_COMMAND_DISCONNECT:
940 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n");
941 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC,
942 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE);
943 break;
944 default:
945 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n",
946 fc->fctype);
947 nvmft_send_generic_error(ctrlr->admin, nc,
948 NVME_SC_INVALID_OPCODE);
949 break;
950 }
951 nvmf_free_capsule(nc);
952 }
953
954 void
nvmft_handle_admin_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc)955 nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
956 struct nvmf_capsule *nc)
957 {
958 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
959
960 /* Only permit Fabrics commands while a controller is disabled. */
961 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 &&
962 cmd->opc != NVME_OPC_FABRICS_COMMANDS) {
963 nvmft_printf(ctrlr,
964 "Unsupported admin opcode %#x while disabled\n", cmd->opc);
965 nvmft_send_generic_error(ctrlr->admin, nc,
966 NVME_SC_COMMAND_SEQUENCE_ERROR);
967 nvmf_free_capsule(nc);
968 return;
969 }
970
971 atomic_store_int(&ctrlr->ka_active_traffic, 1);
972
973 switch (cmd->opc) {
974 case NVME_OPC_GET_LOG_PAGE:
975 handle_get_log_page(ctrlr, nc, cmd);
976 break;
977 case NVME_OPC_IDENTIFY:
978 handle_identify_command(ctrlr, nc, cmd);
979 break;
980 case NVME_OPC_SET_FEATURES:
981 handle_set_features(ctrlr, nc, cmd);
982 break;
983 case NVME_OPC_ASYNC_EVENT_REQUEST:
984 mtx_lock(&ctrlr->lock);
985 if (ctrlr->aer_pending == NVMFT_NUM_AER) {
986 mtx_unlock(&ctrlr->lock);
987 nvmft_send_error(ctrlr->admin, nc,
988 NVME_SCT_COMMAND_SPECIFIC,
989 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
990 } else {
991 /* NB: Store the CID without byte-swapping. */
992 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid;
993 ctrlr->aer_pending++;
994 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER;
995 mtx_unlock(&ctrlr->lock);
996 }
997 nvmf_free_capsule(nc);
998 break;
999 case NVME_OPC_KEEP_ALIVE:
1000 nvmft_send_success(ctrlr->admin, nc);
1001 nvmf_free_capsule(nc);
1002 break;
1003 case NVME_OPC_FABRICS_COMMANDS:
1004 handle_admin_fabrics_command(ctrlr, nc,
1005 (const struct nvmf_fabric_cmd *)cmd);
1006 break;
1007 default:
1008 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc);
1009 nvmft_send_generic_error(ctrlr->admin, nc,
1010 NVME_SC_INVALID_OPCODE);
1011 nvmf_free_capsule(nc);
1012 break;
1013 }
1014 }
1015
1016 void
nvmft_handle_io_command(struct nvmft_qpair * qp,uint16_t qid,struct nvmf_capsule * nc)1017 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid,
1018 struct nvmf_capsule *nc)
1019 {
1020 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
1021 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
1022
1023 atomic_store_int(&ctrlr->ka_active_traffic, 1);
1024
1025 switch (cmd->opc) {
1026 case NVME_OPC_FLUSH:
1027 if (cmd->nsid == htole32(0xffffffff)) {
1028 nvmft_send_generic_error(qp, nc,
1029 NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
1030 nvmf_free_capsule(nc);
1031 break;
1032 }
1033 /* FALLTHROUGH */
1034 case NVME_OPC_WRITE:
1035 case NVME_OPC_READ:
1036 case NVME_OPC_WRITE_UNCORRECTABLE:
1037 case NVME_OPC_COMPARE:
1038 case NVME_OPC_WRITE_ZEROES:
1039 case NVME_OPC_DATASET_MANAGEMENT:
1040 case NVME_OPC_VERIFY:
1041 nvmft_dispatch_command(qp, nc, false);
1042 break;
1043 default:
1044 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc);
1045 nvmft_send_generic_error(qp, nc,
1046 NVME_SC_INVALID_OPCODE);
1047 nvmf_free_capsule(nc);
1048 break;
1049 }
1050 }
1051
1052 static void
nvmft_report_aer(struct nvmft_controller * ctrlr,uint32_t aer_mask,u_int type,uint8_t info,uint8_t log_page_id)1053 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask,
1054 u_int type, uint8_t info, uint8_t log_page_id)
1055 {
1056 struct nvme_completion cpl;
1057
1058 MPASS(type <= 7);
1059
1060 /* Drop events that are not enabled. */
1061 mtx_lock(&ctrlr->lock);
1062 if ((ctrlr->aer_mask & aer_mask) == 0) {
1063 mtx_unlock(&ctrlr->lock);
1064 return;
1065 }
1066
1067 /*
1068 * If there is no pending AER command, drop it.
1069 * XXX: Should we queue these?
1070 */
1071 if (ctrlr->aer_pending == 0) {
1072 mtx_unlock(&ctrlr->lock);
1073 nvmft_printf(ctrlr,
1074 "dropping AER type %u, info %#x, page %#x\n",
1075 type, info, log_page_id);
1076 return;
1077 }
1078
1079 memset(&cpl, 0, sizeof(cpl));
1080 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx];
1081 ctrlr->aer_pending--;
1082 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER;
1083 mtx_unlock(&ctrlr->lock);
1084
1085 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) |
1086 NVMEF(NVME_ASYNC_EVENT_INFO, info) |
1087 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id));
1088
1089 nvmft_send_response(ctrlr->admin, &cpl);
1090 }
1091
1092 void
nvmft_controller_lun_changed(struct nvmft_controller * ctrlr,int lun_id)1093 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id)
1094 {
1095 struct nvme_ns_list *nslist;
1096 uint32_t new_nsid, nsid;
1097 u_int i;
1098
1099 new_nsid = lun_id + 1;
1100
1101 mtx_lock(&ctrlr->lock);
1102 nslist = ctrlr->changed_ns;
1103
1104 /* If the first entry is 0xffffffff, the list is already full. */
1105 if (nslist->ns[0] != 0xffffffff) {
1106 /* Find the insertion point for this namespace ID. */
1107 for (i = 0; i < nitems(nslist->ns); i++) {
1108 nsid = le32toh(nslist->ns[i]);
1109 if (nsid == new_nsid) {
1110 /* Already reported, nothing to do. */
1111 mtx_unlock(&ctrlr->lock);
1112 return;
1113 }
1114
1115 if (nsid == 0 || nsid > new_nsid)
1116 break;
1117 }
1118
1119 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) {
1120 /* List is full. */
1121 memset(ctrlr->changed_ns, 0,
1122 sizeof(*ctrlr->changed_ns));
1123 ctrlr->changed_ns->ns[0] = 0xffffffff;
1124 } else if (nslist->ns[i] == htole32(0)) {
1125 /*
1126 * Optimize case where this ID is appended to
1127 * the end.
1128 */
1129 nslist->ns[i] = htole32(new_nsid);
1130 } else {
1131 memmove(&nslist->ns[i + 1], &nslist->ns[i],
1132 (nitems(nslist->ns) - i - 1) *
1133 sizeof(nslist->ns[0]));
1134 nslist->ns[i] = htole32(new_nsid);
1135 }
1136 }
1137
1138 if (ctrlr->changed_ns_reported) {
1139 mtx_unlock(&ctrlr->lock);
1140 return;
1141 }
1142 ctrlr->changed_ns_reported = true;
1143 mtx_unlock(&ctrlr->lock);
1144
1145 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0,
1146 NVME_LOG_CHANGED_NAMESPACE);
1147 }
1148