1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/param.h>
9 #include <sys/callout.h>
10 #include <sys/kernel.h>
11 #include <sys/lock.h>
12 #include <sys/malloc.h>
13 #include <sys/mbuf.h>
14 #include <sys/memdesc.h>
15 #include <sys/mutex.h>
16 #include <sys/sbuf.h>
17 #include <sys/taskqueue.h>
18
19 #include <dev/nvmf/nvmf_transport.h>
20 #include <dev/nvmf/controller/nvmft_subr.h>
21 #include <dev/nvmf/controller/nvmft_var.h>
22
23 static void nvmft_controller_shutdown(void *arg, int pending);
24 static void nvmft_controller_terminate(void *arg, int pending);
25
26 int
nvmft_printf(struct nvmft_controller * ctrlr,const char * fmt,...)27 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...)
28 {
29 char buf[128];
30 struct sbuf sb;
31 va_list ap;
32 size_t retval;
33
34 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
35 sbuf_set_drain(&sb, sbuf_printf_drain, &retval);
36
37 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid);
38
39 va_start(ap, fmt);
40 sbuf_vprintf(&sb, fmt, ap);
41 va_end(ap);
42
43 sbuf_finish(&sb);
44 sbuf_delete(&sb);
45
46 return (retval);
47 }
48
49 static struct nvmft_controller *
nvmft_controller_alloc(struct nvmft_port * np,uint16_t cntlid,const struct nvmf_fabric_connect_data * data)50 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid,
51 const struct nvmf_fabric_connect_data *data)
52 {
53 struct nvmft_controller *ctrlr;
54
55 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO);
56 ctrlr->cntlid = cntlid;
57 ctrlr->np = np;
58 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF);
59 callout_init(&ctrlr->ka_timer, 1);
60 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr);
61 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0,
62 nvmft_controller_terminate, ctrlr);
63
64 ctrlr->cdata = np->cdata;
65 ctrlr->cdata.ctrlr_id = htole16(cntlid);
66 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid));
67 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn));
68 ctrlr->hip.power_cycles[0] = 1;
69 ctrlr->create_time = sbinuptime();
70
71 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT,
72 M_WAITOK | M_ZERO);
73
74 return (ctrlr);
75 }
76
77 static void
nvmft_controller_free(struct nvmft_controller * ctrlr)78 nvmft_controller_free(struct nvmft_controller *ctrlr)
79 {
80 mtx_destroy(&ctrlr->lock);
81 MPASS(ctrlr->io_qpairs == NULL);
82 free(ctrlr->changed_ns, M_NVMFT);
83 free(ctrlr, M_NVMFT);
84 }
85
86 static void
nvmft_keep_alive_timer(void * arg)87 nvmft_keep_alive_timer(void *arg)
88 {
89 struct nvmft_controller *ctrlr = arg;
90 int traffic;
91
92 if (ctrlr->shutdown)
93 return;
94
95 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic);
96 if (traffic == 0) {
97 nvmft_printf(ctrlr,
98 "disconnecting due to KeepAlive timeout\n");
99 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT);
100 return;
101 }
102
103 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK);
104 }
105
106 int
nvmft_handoff_admin_queue(struct nvmft_port * np,enum nvmf_trtype trtype,const nvlist_t * params,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)107 nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
108 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
109 const struct nvmf_fabric_connect_data *data)
110 {
111 struct nvmft_controller *ctrlr;
112 struct nvmft_qpair *qp;
113 uint32_t kato;
114 int cntlid;
115
116 if (cmd->qid != htole16(0))
117 return (EINVAL);
118
119 qp = nvmft_qpair_init(trtype, params, 0, "admin queue");
120 if (qp == NULL) {
121 printf("NVMFT: Failed to setup admin queue from %.*s\n",
122 (int)sizeof(data->hostnqn), data->hostnqn);
123 return (ENXIO);
124 }
125
126 mtx_lock(&np->lock);
127 cntlid = alloc_unr(np->ids);
128 if (cntlid == -1) {
129 mtx_unlock(&np->lock);
130 printf("NVMFT: Unable to allocate controller for %.*s\n",
131 (int)sizeof(data->hostnqn), data->hostnqn);
132 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC,
133 NVMF_FABRIC_SC_INVALID_HOST);
134 nvmft_qpair_destroy(qp);
135 return (ENOMEM);
136 }
137
138 #ifdef INVARIANTS
139 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
140 KASSERT(ctrlr->cntlid != cntlid,
141 ("%s: duplicate controllers with id %d", __func__, cntlid));
142 }
143 #endif
144 mtx_unlock(&np->lock);
145
146 ctrlr = nvmft_controller_alloc(np, cntlid, data);
147
148 mtx_lock(&np->lock);
149 if (!np->online) {
150 mtx_unlock(&np->lock);
151 nvmft_controller_free(ctrlr);
152 free_unr(np->ids, cntlid);
153 nvmft_qpair_destroy(qp);
154 return (ENXIO);
155 }
156 nvmft_port_ref(np);
157 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link);
158
159 nvmft_printf(ctrlr, "associated with %.*s\n",
160 (int)sizeof(data->hostnqn), data->hostnqn);
161 ctrlr->admin = qp;
162 ctrlr->trtype = trtype;
163
164 /*
165 * The spec requires a non-zero KeepAlive timer, but allow a
166 * zero KATO value to match Linux.
167 */
168 kato = le32toh(cmd->kato);
169 if (kato != 0) {
170 /*
171 * Round up to 1 second matching granularity
172 * advertised in cdata.
173 */
174 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000));
175 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
176 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK);
177 }
178 mtx_unlock(&np->lock);
179
180 nvmft_finish_accept(qp, cmd, ctrlr);
181
182 return (0);
183 }
184
185 int
nvmft_handoff_io_queue(struct nvmft_port * np,enum nvmf_trtype trtype,const nvlist_t * params,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)186 nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
187 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
188 const struct nvmf_fabric_connect_data *data)
189 {
190 struct nvmft_controller *ctrlr;
191 struct nvmft_qpair *qp;
192 char name[16];
193 uint16_t cntlid, qid;
194
195 qid = le16toh(cmd->qid);
196 if (qid == 0)
197 return (EINVAL);
198 cntlid = le16toh(data->cntlid);
199
200 snprintf(name, sizeof(name), "I/O queue %u", qid);
201 qp = nvmft_qpair_init(trtype, params, qid, name);
202 if (qp == NULL) {
203 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid,
204 (int)sizeof(data->hostnqn), data->hostnqn);
205 return (ENXIO);
206 }
207
208 mtx_lock(&np->lock);
209 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
210 if (ctrlr->cntlid == cntlid)
211 break;
212 }
213 if (ctrlr == NULL) {
214 mtx_unlock(&np->lock);
215 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n",
216 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn),
217 data->hostnqn);
218 nvmft_connect_invalid_parameters(qp, cmd, true,
219 offsetof(struct nvmf_fabric_connect_data, cntlid));
220 nvmft_qpair_destroy(qp);
221 return (ENOENT);
222 }
223
224 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) {
225 mtx_unlock(&np->lock);
226 nvmft_printf(ctrlr,
227 "hostid mismatch for I/O queue %u from %.*s\n", qid,
228 (int)sizeof(data->hostnqn), data->hostnqn);
229 nvmft_connect_invalid_parameters(qp, cmd, true,
230 offsetof(struct nvmf_fabric_connect_data, hostid));
231 nvmft_qpair_destroy(qp);
232 return (EINVAL);
233 }
234 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) {
235 mtx_unlock(&np->lock);
236 nvmft_printf(ctrlr,
237 "hostnqn mismatch for I/O queue %u from %.*s\n", qid,
238 (int)sizeof(data->hostnqn), data->hostnqn);
239 nvmft_connect_invalid_parameters(qp, cmd, true,
240 offsetof(struct nvmf_fabric_connect_data, hostnqn));
241 nvmft_qpair_destroy(qp);
242 return (EINVAL);
243 }
244
245 /* XXX: Require trtype == ctrlr->trtype? */
246
247 mtx_lock(&ctrlr->lock);
248 if (ctrlr->shutdown) {
249 mtx_unlock(&ctrlr->lock);
250 mtx_unlock(&np->lock);
251 nvmft_printf(ctrlr,
252 "attempt to create I/O queue %u on disabled controller from %.*s\n",
253 qid, (int)sizeof(data->hostnqn), data->hostnqn);
254 nvmft_connect_invalid_parameters(qp, cmd, true,
255 offsetof(struct nvmf_fabric_connect_data, cntlid));
256 nvmft_qpair_destroy(qp);
257 return (EINVAL);
258 }
259 if (ctrlr->num_io_queues == 0) {
260 mtx_unlock(&ctrlr->lock);
261 mtx_unlock(&np->lock);
262 nvmft_printf(ctrlr,
263 "attempt to create I/O queue %u without enabled queues from %.*s\n",
264 qid, (int)sizeof(data->hostnqn), data->hostnqn);
265 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
266 NVME_SC_COMMAND_SEQUENCE_ERROR);
267 nvmft_qpair_destroy(qp);
268 return (EINVAL);
269 }
270 if (cmd->qid > ctrlr->num_io_queues) {
271 mtx_unlock(&ctrlr->lock);
272 mtx_unlock(&np->lock);
273 nvmft_printf(ctrlr,
274 "attempt to create invalid I/O queue %u from %.*s\n", qid,
275 (int)sizeof(data->hostnqn), data->hostnqn);
276 nvmft_connect_invalid_parameters(qp, cmd, false,
277 offsetof(struct nvmf_fabric_connect_cmd, qid));
278 nvmft_qpair_destroy(qp);
279 return (EINVAL);
280 }
281 if (ctrlr->io_qpairs[qid - 1].qp != NULL) {
282 mtx_unlock(&ctrlr->lock);
283 mtx_unlock(&np->lock);
284 nvmft_printf(ctrlr,
285 "attempt to re-create I/O queue %u from %.*s\n", qid,
286 (int)sizeof(data->hostnqn), data->hostnqn);
287 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
288 NVME_SC_COMMAND_SEQUENCE_ERROR);
289 nvmft_qpair_destroy(qp);
290 return (EINVAL);
291 }
292
293 ctrlr->io_qpairs[qid - 1].qp = qp;
294 mtx_unlock(&ctrlr->lock);
295 mtx_unlock(&np->lock);
296 nvmft_finish_accept(qp, cmd, ctrlr);
297
298 return (0);
299 }
300
301 static void
nvmft_controller_shutdown(void * arg,int pending)302 nvmft_controller_shutdown(void *arg, int pending)
303 {
304 struct nvmft_controller *ctrlr = arg;
305
306 MPASS(pending == 1);
307
308 /*
309 * Shutdown all I/O queues to terminate pending datamoves and
310 * stop receiving new commands.
311 */
312 mtx_lock(&ctrlr->lock);
313 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
314 if (ctrlr->io_qpairs[i].qp != NULL) {
315 ctrlr->io_qpairs[i].shutdown = true;
316 mtx_unlock(&ctrlr->lock);
317 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp);
318 mtx_lock(&ctrlr->lock);
319 }
320 }
321 mtx_unlock(&ctrlr->lock);
322
323 /* Terminate active CTL commands. */
324 nvmft_terminate_commands(ctrlr);
325
326 /* Wait for all pending CTL commands to complete. */
327 mtx_lock(&ctrlr->lock);
328 while (ctrlr->pending_commands != 0)
329 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh",
330 hz / 100);
331 mtx_unlock(&ctrlr->lock);
332
333 /* Delete all of the I/O queues. */
334 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
335 if (ctrlr->io_qpairs[i].qp != NULL)
336 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp);
337 }
338 free(ctrlr->io_qpairs, M_NVMFT);
339 ctrlr->io_qpairs = NULL;
340
341 mtx_lock(&ctrlr->lock);
342 ctrlr->num_io_queues = 0;
343
344 /* Mark shutdown complete. */
345 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) {
346 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
347 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE);
348 }
349
350 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) {
351 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY);
352 ctrlr->shutdown = false;
353 }
354 mtx_unlock(&ctrlr->lock);
355
356 /*
357 * If the admin queue was closed while shutting down or a
358 * fatal controller error has occurred, terminate the
359 * association immediately, otherwise wait up to 2 minutes
360 * (NVMe-over-Fabrics 1.1 4.6).
361 */
362 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0)
363 nvmft_controller_terminate(ctrlr, 0);
364 else
365 taskqueue_enqueue_timeout(taskqueue_thread,
366 &ctrlr->terminate_task, hz * 60 * 2);
367 }
368
369 static void
nvmft_controller_terminate(void * arg,int pending)370 nvmft_controller_terminate(void *arg, int pending)
371 {
372 struct nvmft_controller *ctrlr = arg;
373 struct nvmft_port *np;
374 bool wakeup_np;
375
376 /* If the controller has been re-enabled, nothing to do. */
377 mtx_lock(&ctrlr->lock);
378 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) {
379 mtx_unlock(&ctrlr->lock);
380
381 if (ctrlr->ka_sbt != 0)
382 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
383 C_HARDCLOCK);
384 return;
385 }
386
387 /* Disable updates to CC while destroying admin qpair. */
388 ctrlr->shutdown = true;
389 mtx_unlock(&ctrlr->lock);
390
391 nvmft_qpair_destroy(ctrlr->admin);
392
393 /* Remove association (CNTLID). */
394 np = ctrlr->np;
395 mtx_lock(&np->lock);
396 TAILQ_REMOVE(&np->controllers, ctrlr, link);
397 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers));
398 mtx_unlock(&np->lock);
399 free_unr(np->ids, ctrlr->cntlid);
400 if (wakeup_np)
401 wakeup(np);
402
403 callout_drain(&ctrlr->ka_timer);
404
405 nvmft_printf(ctrlr, "association terminated\n");
406 nvmft_controller_free(ctrlr);
407 nvmft_port_rele(np);
408 }
409
410 void
nvmft_controller_error(struct nvmft_controller * ctrlr,struct nvmft_qpair * qp,int error)411 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp,
412 int error)
413 {
414 /*
415 * If a queue pair is closed, that isn't an error per se.
416 * That just means additional commands cannot be received on
417 * that queue pair.
418 *
419 * If the admin queue pair is closed while idle or while
420 * shutting down, terminate the association immediately.
421 *
422 * If an I/O queue pair is closed, just ignore it.
423 */
424 if (error == 0) {
425 if (qp != ctrlr->admin)
426 return;
427
428 mtx_lock(&ctrlr->lock);
429 if (ctrlr->shutdown) {
430 ctrlr->admin_closed = true;
431 mtx_unlock(&ctrlr->lock);
432 return;
433 }
434
435 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) {
436 MPASS(ctrlr->num_io_queues == 0);
437 mtx_unlock(&ctrlr->lock);
438
439 /*
440 * Ok to drop lock here since ctrlr->cc can't
441 * change if the admin queue pair has closed.
442 * This also means no new queues can be handed
443 * off, etc. Note that since there are no I/O
444 * queues, only the admin queue needs to be
445 * destroyed, so it is safe to skip
446 * nvmft_controller_shutdown and just schedule
447 * nvmft_controller_terminate. Note that we
448 * cannot call nvmft_controller_terminate from
449 * here directly as this is called from the
450 * transport layer and freeing the admin qpair
451 * might deadlock waiting for the current
452 * thread to exit.
453 */
454 if (taskqueue_cancel_timeout(taskqueue_thread,
455 &ctrlr->terminate_task, NULL) == 0)
456 taskqueue_enqueue_timeout(taskqueue_thread,
457 &ctrlr->terminate_task, 0);
458 return;
459 }
460
461 /*
462 * Treat closing of the admin queue pair while enabled
463 * as a transport error. Note that the admin queue
464 * pair has been closed.
465 */
466 ctrlr->admin_closed = true;
467 } else
468 mtx_lock(&ctrlr->lock);
469
470 /* Ignore transport errors if we are already shutting down. */
471 if (ctrlr->shutdown) {
472 mtx_unlock(&ctrlr->lock);
473 return;
474 }
475
476 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1);
477 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
478 ctrlr->shutdown = true;
479 mtx_unlock(&ctrlr->lock);
480
481 callout_stop(&ctrlr->ka_timer);
482 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
483 }
484
485 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */
486 static struct mbuf *
m_getml(size_t len,int how)487 m_getml(size_t len, int how)
488 {
489 struct mbuf *m, *n;
490
491 m = m_getm2(NULL, len, how, MT_DATA, 0);
492 if (m == NULL)
493 return (NULL);
494 for (n = m; len > 0; n = n->m_next) {
495 n->m_len = M_SIZE(n);
496 if (n->m_len >= len) {
497 n->m_len = len;
498 MPASS(n->m_next == NULL);
499 }
500 len -= n->m_len;
501 }
502 return (m);
503 }
504
505 static void
m_zero(struct mbuf * m,u_int offset,u_int len)506 m_zero(struct mbuf *m, u_int offset, u_int len)
507 {
508 u_int todo;
509
510 if (len == 0)
511 return;
512
513 while (m->m_len <= offset) {
514 offset -= m->m_len;
515 m = m->m_next;
516 }
517
518 todo = m->m_len - offset;
519 if (todo > len)
520 todo = len;
521 memset(mtodo(m, offset), 0, todo);
522 m = m->m_next;
523 len -= todo;
524
525 while (len > 0) {
526 todo = m->m_len;
527 if (todo > len)
528 todo = len;
529 memset(mtod(m, void *), 0, todo);
530 m = m->m_next;
531 len -= todo;
532 }
533 }
534
535 static void
handle_get_log_page(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)536 handle_get_log_page(struct nvmft_controller *ctrlr,
537 struct nvmf_capsule *nc, const struct nvme_command *cmd)
538 {
539 struct mbuf *m;
540 uint64_t offset;
541 uint32_t numd;
542 size_t len, todo;
543 u_int status;
544 uint8_t lid;
545 bool rae;
546
547 lid = le32toh(cmd->cdw10) & 0xff;
548 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0;
549 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
550 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
551
552 if (offset % 3 != 0) {
553 status = NVME_SC_INVALID_FIELD;
554 goto done;
555 }
556
557 len = (numd + 1) * 4;
558
559 switch (lid) {
560 case NVME_LOG_ERROR:
561 todo = 0;
562
563 m = m_getml(len, M_WAITOK);
564 if (todo != len)
565 m_zero(m, todo, len - todo);
566 status = nvmf_send_controller_data(nc, 0, m, len);
567 MPASS(status != NVMF_MORE);
568 break;
569 case NVME_LOG_HEALTH_INFORMATION:
570 {
571 struct nvme_health_information_page hip;
572
573 if (offset >= sizeof(hip)) {
574 status = NVME_SC_INVALID_FIELD;
575 goto done;
576 }
577 todo = sizeof(hip) - offset;
578 if (todo > len)
579 todo = len;
580
581 mtx_lock(&ctrlr->lock);
582 hip = ctrlr->hip;
583 hip.controller_busy_time[0] =
584 sbintime_getsec(ctrlr->busy_total) / 60;
585 hip.power_on_hours[0] =
586 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600;
587 mtx_unlock(&ctrlr->lock);
588
589 m = m_getml(len, M_WAITOK);
590 m_copyback(m, 0, todo, (char *)&hip + offset);
591 if (todo != len)
592 m_zero(m, todo, len - todo);
593 status = nvmf_send_controller_data(nc, 0, m, len);
594 MPASS(status != NVMF_MORE);
595 break;
596 }
597 case NVME_LOG_FIRMWARE_SLOT:
598 if (offset >= sizeof(ctrlr->np->fp)) {
599 status = NVME_SC_INVALID_FIELD;
600 goto done;
601 }
602 todo = sizeof(ctrlr->np->fp) - offset;
603 if (todo > len)
604 todo = len;
605
606 m = m_getml(len, M_WAITOK);
607 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset);
608 if (todo != len)
609 m_zero(m, todo, len - todo);
610 status = nvmf_send_controller_data(nc, 0, m, len);
611 MPASS(status != NVMF_MORE);
612 break;
613 case NVME_LOG_CHANGED_NAMESPACE:
614 if (offset >= sizeof(*ctrlr->changed_ns)) {
615 status = NVME_SC_INVALID_FIELD;
616 goto done;
617 }
618 todo = sizeof(*ctrlr->changed_ns) - offset;
619 if (todo > len)
620 todo = len;
621
622 m = m_getml(len, M_WAITOK);
623 mtx_lock(&ctrlr->lock);
624 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset);
625 if (offset == 0 && len == sizeof(*ctrlr->changed_ns))
626 memset(ctrlr->changed_ns, 0,
627 sizeof(*ctrlr->changed_ns));
628 if (!rae)
629 ctrlr->changed_ns_reported = false;
630 mtx_unlock(&ctrlr->lock);
631 if (todo != len)
632 m_zero(m, todo, len - todo);
633 status = nvmf_send_controller_data(nc, 0, m, len);
634 MPASS(status != NVMF_MORE);
635 break;
636 default:
637 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n",
638 lid);
639 status = NVME_SC_INVALID_FIELD;
640 break;
641 }
642
643 done:
644 if (status == NVMF_SUCCESS_SENT)
645 nvmft_command_completed(ctrlr->admin, nc);
646 else
647 nvmft_send_generic_error(ctrlr->admin, nc, status);
648 nvmf_free_capsule(nc);
649 }
650
651 static void
m_free_nslist(struct mbuf * m)652 m_free_nslist(struct mbuf *m)
653 {
654 free(m->m_ext.ext_arg1, M_NVMFT);
655 }
656
657 static void
handle_identify_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)658 handle_identify_command(struct nvmft_controller *ctrlr,
659 struct nvmf_capsule *nc, const struct nvme_command *cmd)
660 {
661 struct mbuf *m;
662 size_t data_len;
663 u_int status;
664 uint8_t cns;
665
666 cns = le32toh(cmd->cdw10) & 0xFF;
667 data_len = nvmf_capsule_data_len(nc);
668 if (data_len != sizeof(ctrlr->cdata)) {
669 nvmft_printf(ctrlr,
670 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len,
671 cns);
672 nvmft_send_generic_error(ctrlr->admin, nc,
673 NVME_SC_INVALID_OPCODE);
674 nvmf_free_capsule(nc);
675 return;
676 }
677
678 switch (cns) {
679 case 0: /* Namespace data. */
680 case 3: /* Namespace Identification Descriptor list. */
681 nvmft_dispatch_command(ctrlr->admin, nc, true);
682 return;
683 case 1:
684 /* Controller data. */
685 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK);
686 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata);
687 status = nvmf_send_controller_data(nc, 0, m,
688 sizeof(ctrlr->cdata));
689 MPASS(status != NVMF_MORE);
690 break;
691 case 2:
692 {
693 /* Active namespace list. */
694 struct nvme_ns_list *nslist;
695 uint32_t nsid;
696
697 nsid = le32toh(cmd->nsid);
698 if (nsid >= 0xfffffffe) {
699 status = NVME_SC_INVALID_FIELD;
700 break;
701 }
702
703 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO);
704 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist);
705 m = m_get(M_WAITOK, MT_DATA);
706 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist,
707 nslist, NULL, 0, EXT_CTL);
708 m->m_len = sizeof(*nslist);
709 status = nvmf_send_controller_data(nc, 0, m, m->m_len);
710 MPASS(status != NVMF_MORE);
711 break;
712 }
713 default:
714 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns);
715 status = NVME_SC_INVALID_FIELD;
716 break;
717 }
718
719 if (status == NVMF_SUCCESS_SENT)
720 nvmft_command_completed(ctrlr->admin, nc);
721 else
722 nvmft_send_generic_error(ctrlr->admin, nc, status);
723 nvmf_free_capsule(nc);
724 }
725
726 static void
handle_set_features(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)727 handle_set_features(struct nvmft_controller *ctrlr,
728 struct nvmf_capsule *nc, const struct nvme_command *cmd)
729 {
730 struct nvme_completion cqe;
731 uint8_t fid;
732
733 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
734 switch (fid) {
735 case NVME_FEAT_NUMBER_OF_QUEUES:
736 {
737 uint32_t num_queues;
738 struct nvmft_io_qpair *io_qpairs;
739
740 num_queues = le32toh(cmd->cdw11) & 0xffff;
741
742 /* 5.12.1.7: 65535 is invalid. */
743 if (num_queues == 65535)
744 goto error;
745
746 /* Fabrics requires the same number of SQs and CQs. */
747 if (le32toh(cmd->cdw11) >> 16 != num_queues)
748 goto error;
749
750 /* Convert to 1's based */
751 num_queues++;
752
753 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs),
754 M_NVMFT, M_WAITOK | M_ZERO);
755
756 mtx_lock(&ctrlr->lock);
757 if (ctrlr->num_io_queues != 0) {
758 mtx_unlock(&ctrlr->lock);
759 free(io_qpairs, M_NVMFT);
760 nvmft_send_generic_error(ctrlr->admin, nc,
761 NVME_SC_COMMAND_SEQUENCE_ERROR);
762 nvmf_free_capsule(nc);
763 return;
764 }
765
766 ctrlr->num_io_queues = num_queues;
767 ctrlr->io_qpairs = io_qpairs;
768 mtx_unlock(&ctrlr->lock);
769
770 nvmft_init_cqe(&cqe, nc, 0);
771 cqe.cdw0 = cmd->cdw11;
772 nvmft_send_response(ctrlr->admin, &cqe);
773 nvmf_free_capsule(nc);
774 return;
775 }
776 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
777 {
778 uint32_t aer_mask;
779
780 aer_mask = le32toh(cmd->cdw11);
781
782 /* Check for any reserved or unimplemented feature bits. */
783 if ((aer_mask & 0xffffc000) != 0)
784 goto error;
785
786 mtx_lock(&ctrlr->lock);
787 ctrlr->aer_mask = aer_mask;
788 mtx_unlock(&ctrlr->lock);
789 nvmft_send_success(ctrlr->admin, nc);
790 nvmf_free_capsule(nc);
791 return;
792 }
793 default:
794 nvmft_printf(ctrlr,
795 "Unsupported feature ID %u for SET_FEATURES\n", fid);
796 goto error;
797 }
798
799 error:
800 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
801 nvmf_free_capsule(nc);
802 }
803
804 static bool
update_cc(struct nvmft_controller * ctrlr,uint32_t new_cc,bool * need_shutdown)805 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown)
806 {
807 struct nvmft_port *np = ctrlr->np;
808 uint32_t changes;
809
810 *need_shutdown = false;
811
812 mtx_lock(&ctrlr->lock);
813
814 /* Don't allow any changes while shutting down. */
815 if (ctrlr->shutdown) {
816 mtx_unlock(&ctrlr->lock);
817 return (false);
818 }
819
820 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) {
821 mtx_unlock(&ctrlr->lock);
822 return (false);
823 }
824
825 changes = ctrlr->cc ^ new_cc;
826 ctrlr->cc = new_cc;
827
828 /* Handle shutdown requests. */
829 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 &&
830 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) {
831 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
832 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING);
833 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
834 ctrlr->shutdown = true;
835 *need_shutdown = true;
836 nvmft_printf(ctrlr, "shutdown requested\n");
837 }
838
839 if (NVMEV(NVME_CC_REG_EN, changes) != 0) {
840 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) {
841 /* Controller reset. */
842 nvmft_printf(ctrlr, "reset requested\n");
843 ctrlr->shutdown = true;
844 *need_shutdown = true;
845 } else
846 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1);
847 }
848 mtx_unlock(&ctrlr->lock);
849
850 return (true);
851 }
852
853 static void
handle_property_get(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_get_cmd * pget)854 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
855 const struct nvmf_fabric_prop_get_cmd *pget)
856 {
857 struct nvmf_fabric_prop_get_rsp rsp;
858
859 nvmft_init_cqe(&rsp, nc, 0);
860
861 switch (le32toh(pget->ofst)) {
862 case NVMF_PROP_CAP:
863 if (pget->attrib.size != NVMF_PROP_SIZE_8)
864 goto error;
865 rsp.value.u64 = htole64(ctrlr->np->cap);
866 break;
867 case NVMF_PROP_VS:
868 if (pget->attrib.size != NVMF_PROP_SIZE_4)
869 goto error;
870 rsp.value.u32.low = ctrlr->cdata.ver;
871 break;
872 case NVMF_PROP_CC:
873 if (pget->attrib.size != NVMF_PROP_SIZE_4)
874 goto error;
875 rsp.value.u32.low = htole32(ctrlr->cc);
876 break;
877 case NVMF_PROP_CSTS:
878 if (pget->attrib.size != NVMF_PROP_SIZE_4)
879 goto error;
880 rsp.value.u32.low = htole32(ctrlr->csts);
881 break;
882 default:
883 goto error;
884 }
885
886 nvmft_send_response(ctrlr->admin, &rsp);
887 return;
888 error:
889 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
890 }
891
892 static void
handle_property_set(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_set_cmd * pset)893 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
894 const struct nvmf_fabric_prop_set_cmd *pset)
895 {
896 bool need_shutdown;
897
898 need_shutdown = false;
899 switch (le32toh(pset->ofst)) {
900 case NVMF_PROP_CC:
901 if (pset->attrib.size != NVMF_PROP_SIZE_4)
902 goto error;
903 if (!update_cc(ctrlr, le32toh(pset->value.u32.low),
904 &need_shutdown))
905 goto error;
906 break;
907 default:
908 goto error;
909 }
910
911 nvmft_send_success(ctrlr->admin, nc);
912 if (need_shutdown) {
913 callout_stop(&ctrlr->ka_timer);
914 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
915 }
916 return;
917 error:
918 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
919 }
920
921 static void
handle_admin_fabrics_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_cmd * fc)922 handle_admin_fabrics_command(struct nvmft_controller *ctrlr,
923 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc)
924 {
925 switch (fc->fctype) {
926 case NVMF_FABRIC_COMMAND_PROPERTY_GET:
927 handle_property_get(ctrlr, nc,
928 (const struct nvmf_fabric_prop_get_cmd *)fc);
929 break;
930 case NVMF_FABRIC_COMMAND_PROPERTY_SET:
931 handle_property_set(ctrlr, nc,
932 (const struct nvmf_fabric_prop_set_cmd *)fc);
933 break;
934 case NVMF_FABRIC_COMMAND_CONNECT:
935 nvmft_printf(ctrlr,
936 "CONNECT command on connected admin queue\n");
937 nvmft_send_generic_error(ctrlr->admin, nc,
938 NVME_SC_COMMAND_SEQUENCE_ERROR);
939 break;
940 case NVMF_FABRIC_COMMAND_DISCONNECT:
941 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n");
942 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC,
943 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE);
944 break;
945 default:
946 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n",
947 fc->fctype);
948 nvmft_send_generic_error(ctrlr->admin, nc,
949 NVME_SC_INVALID_OPCODE);
950 break;
951 }
952 nvmf_free_capsule(nc);
953 }
954
955 void
nvmft_handle_admin_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc)956 nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
957 struct nvmf_capsule *nc)
958 {
959 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
960
961 /* Only permit Fabrics commands while a controller is disabled. */
962 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 &&
963 cmd->opc != NVME_OPC_FABRICS_COMMANDS) {
964 nvmft_printf(ctrlr,
965 "Unsupported admin opcode %#x while disabled\n", cmd->opc);
966 nvmft_send_generic_error(ctrlr->admin, nc,
967 NVME_SC_COMMAND_SEQUENCE_ERROR);
968 nvmf_free_capsule(nc);
969 return;
970 }
971
972 atomic_store_int(&ctrlr->ka_active_traffic, 1);
973
974 switch (cmd->opc) {
975 case NVME_OPC_GET_LOG_PAGE:
976 handle_get_log_page(ctrlr, nc, cmd);
977 break;
978 case NVME_OPC_IDENTIFY:
979 handle_identify_command(ctrlr, nc, cmd);
980 break;
981 case NVME_OPC_SET_FEATURES:
982 handle_set_features(ctrlr, nc, cmd);
983 break;
984 case NVME_OPC_ASYNC_EVENT_REQUEST:
985 mtx_lock(&ctrlr->lock);
986 if (ctrlr->aer_pending == NVMFT_NUM_AER) {
987 mtx_unlock(&ctrlr->lock);
988 nvmft_send_error(ctrlr->admin, nc,
989 NVME_SCT_COMMAND_SPECIFIC,
990 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
991 } else {
992 /* NB: Store the CID without byte-swapping. */
993 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid;
994 ctrlr->aer_pending++;
995 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER;
996 mtx_unlock(&ctrlr->lock);
997 }
998 nvmf_free_capsule(nc);
999 break;
1000 case NVME_OPC_KEEP_ALIVE:
1001 nvmft_send_success(ctrlr->admin, nc);
1002 nvmf_free_capsule(nc);
1003 break;
1004 case NVME_OPC_FABRICS_COMMANDS:
1005 handle_admin_fabrics_command(ctrlr, nc,
1006 (const struct nvmf_fabric_cmd *)cmd);
1007 break;
1008 default:
1009 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc);
1010 nvmft_send_generic_error(ctrlr->admin, nc,
1011 NVME_SC_INVALID_OPCODE);
1012 nvmf_free_capsule(nc);
1013 break;
1014 }
1015 }
1016
1017 void
nvmft_handle_io_command(struct nvmft_qpair * qp,uint16_t qid,struct nvmf_capsule * nc)1018 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid,
1019 struct nvmf_capsule *nc)
1020 {
1021 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
1022 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
1023
1024 atomic_store_int(&ctrlr->ka_active_traffic, 1);
1025
1026 switch (cmd->opc) {
1027 case NVME_OPC_FLUSH:
1028 if (cmd->nsid == htole32(0xffffffff)) {
1029 nvmft_send_generic_error(qp, nc,
1030 NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
1031 nvmf_free_capsule(nc);
1032 break;
1033 }
1034 /* FALLTHROUGH */
1035 case NVME_OPC_WRITE:
1036 case NVME_OPC_READ:
1037 case NVME_OPC_WRITE_UNCORRECTABLE:
1038 case NVME_OPC_COMPARE:
1039 case NVME_OPC_WRITE_ZEROES:
1040 case NVME_OPC_DATASET_MANAGEMENT:
1041 case NVME_OPC_VERIFY:
1042 nvmft_dispatch_command(qp, nc, false);
1043 break;
1044 default:
1045 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc);
1046 nvmft_send_generic_error(qp, nc,
1047 NVME_SC_INVALID_OPCODE);
1048 nvmf_free_capsule(nc);
1049 break;
1050 }
1051 }
1052
1053 static void
nvmft_report_aer(struct nvmft_controller * ctrlr,uint32_t aer_mask,u_int type,uint8_t info,uint8_t log_page_id)1054 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask,
1055 u_int type, uint8_t info, uint8_t log_page_id)
1056 {
1057 struct nvme_completion cpl;
1058
1059 MPASS(type <= 7);
1060
1061 /* Drop events that are not enabled. */
1062 mtx_lock(&ctrlr->lock);
1063 if ((ctrlr->aer_mask & aer_mask) == 0) {
1064 mtx_unlock(&ctrlr->lock);
1065 return;
1066 }
1067
1068 /*
1069 * If there is no pending AER command, drop it.
1070 * XXX: Should we queue these?
1071 */
1072 if (ctrlr->aer_pending == 0) {
1073 mtx_unlock(&ctrlr->lock);
1074 nvmft_printf(ctrlr,
1075 "dropping AER type %u, info %#x, page %#x\n",
1076 type, info, log_page_id);
1077 return;
1078 }
1079
1080 memset(&cpl, 0, sizeof(cpl));
1081 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx];
1082 ctrlr->aer_pending--;
1083 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER;
1084 mtx_unlock(&ctrlr->lock);
1085
1086 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) |
1087 NVMEF(NVME_ASYNC_EVENT_INFO, info) |
1088 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id));
1089
1090 nvmft_send_response(ctrlr->admin, &cpl);
1091 }
1092
1093 void
nvmft_controller_lun_changed(struct nvmft_controller * ctrlr,int lun_id)1094 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id)
1095 {
1096 struct nvme_ns_list *nslist;
1097 uint32_t new_nsid, nsid;
1098 u_int i;
1099
1100 new_nsid = lun_id + 1;
1101
1102 mtx_lock(&ctrlr->lock);
1103 nslist = ctrlr->changed_ns;
1104
1105 /* If the first entry is 0xffffffff, the list is already full. */
1106 if (nslist->ns[0] != 0xffffffff) {
1107 /* Find the insertion point for this namespace ID. */
1108 for (i = 0; i < nitems(nslist->ns); i++) {
1109 nsid = le32toh(nslist->ns[i]);
1110 if (nsid == new_nsid) {
1111 /* Already reported, nothing to do. */
1112 mtx_unlock(&ctrlr->lock);
1113 return;
1114 }
1115
1116 if (nsid == 0 || nsid > new_nsid)
1117 break;
1118 }
1119
1120 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) {
1121 /* List is full. */
1122 memset(ctrlr->changed_ns, 0,
1123 sizeof(*ctrlr->changed_ns));
1124 ctrlr->changed_ns->ns[0] = 0xffffffff;
1125 } else if (nslist->ns[i] == htole32(0)) {
1126 /*
1127 * Optimize case where this ID is appended to
1128 * the end.
1129 */
1130 nslist->ns[i] = htole32(new_nsid);
1131 } else {
1132 memmove(&nslist->ns[i + 1], &nslist->ns[i],
1133 (nitems(nslist->ns) - i - 1) *
1134 sizeof(nslist->ns[0]));
1135 nslist->ns[i] = htole32(new_nsid);
1136 }
1137 }
1138
1139 if (ctrlr->changed_ns_reported) {
1140 mtx_unlock(&ctrlr->lock);
1141 return;
1142 }
1143 ctrlr->changed_ns_reported = true;
1144 mtx_unlock(&ctrlr->lock);
1145
1146 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0,
1147 NVME_LOG_CHANGED_NAMESPACE);
1148 }
1149