1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/param.h>
9 #include <sys/callout.h>
10 #include <sys/kernel.h>
11 #include <sys/lock.h>
12 #include <sys/malloc.h>
13 #include <sys/mbuf.h>
14 #include <sys/memdesc.h>
15 #include <sys/mutex.h>
16 #include <sys/sbuf.h>
17 #include <sys/sx.h>
18 #include <sys/taskqueue.h>
19
20 #include <dev/nvmf/nvmf_transport.h>
21 #include <dev/nvmf/controller/nvmft_subr.h>
22 #include <dev/nvmf/controller/nvmft_var.h>
23
24 static void nvmft_controller_shutdown(void *arg, int pending);
25 static void nvmft_controller_terminate(void *arg, int pending);
26
27 int
nvmft_printf(struct nvmft_controller * ctrlr,const char * fmt,...)28 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...)
29 {
30 char buf[128];
31 struct sbuf sb;
32 va_list ap;
33 size_t retval;
34
35 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
36 sbuf_set_drain(&sb, sbuf_printf_drain, &retval);
37
38 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid);
39
40 va_start(ap, fmt);
41 sbuf_vprintf(&sb, fmt, ap);
42 va_end(ap);
43
44 sbuf_finish(&sb);
45 sbuf_delete(&sb);
46
47 return (retval);
48 }
49
50 static struct nvmft_controller *
nvmft_controller_alloc(struct nvmft_port * np,uint16_t cntlid,const struct nvmf_fabric_connect_data * data)51 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid,
52 const struct nvmf_fabric_connect_data *data)
53 {
54 struct nvmft_controller *ctrlr;
55
56 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO);
57 ctrlr->cntlid = cntlid;
58 nvmft_port_ref(np);
59 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link);
60 ctrlr->np = np;
61 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF);
62 callout_init(&ctrlr->ka_timer, 1);
63 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr);
64 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0,
65 nvmft_controller_terminate, ctrlr);
66
67 ctrlr->cdata = np->cdata;
68 ctrlr->cdata.ctrlr_id = htole16(cntlid);
69 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid));
70 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn));
71 ctrlr->hip.power_cycles[0] = 1;
72 ctrlr->create_time = sbinuptime();
73
74 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT,
75 M_WAITOK | M_ZERO);
76
77 return (ctrlr);
78 }
79
80 static void
nvmft_controller_free(struct nvmft_controller * ctrlr)81 nvmft_controller_free(struct nvmft_controller *ctrlr)
82 {
83 mtx_destroy(&ctrlr->lock);
84 MPASS(ctrlr->io_qpairs == NULL);
85 free(ctrlr->changed_ns, M_NVMFT);
86 free(ctrlr, M_NVMFT);
87 }
88
89 static void
nvmft_keep_alive_timer(void * arg)90 nvmft_keep_alive_timer(void *arg)
91 {
92 struct nvmft_controller *ctrlr = arg;
93 int traffic;
94
95 if (ctrlr->shutdown)
96 return;
97
98 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic);
99 if (traffic == 0) {
100 nvmft_printf(ctrlr,
101 "disconnecting due to KeepAlive timeout\n");
102 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT);
103 return;
104 }
105
106 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK);
107 }
108
109 int
nvmft_handoff_admin_queue(struct nvmft_port * np,enum nvmf_trtype trtype,const nvlist_t * params,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)110 nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
111 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
112 const struct nvmf_fabric_connect_data *data)
113 {
114 struct nvmft_controller *ctrlr;
115 struct nvmft_qpair *qp;
116 uint32_t kato;
117 int cntlid;
118
119 if (cmd->qid != htole16(0))
120 return (EINVAL);
121
122 qp = nvmft_qpair_init(trtype, params, 0, "admin queue");
123 if (qp == NULL) {
124 printf("NVMFT: Failed to setup admin queue from %.*s\n",
125 (int)sizeof(data->hostnqn), data->hostnqn);
126 return (ENXIO);
127 }
128
129 sx_xlock(&np->lock);
130 cntlid = alloc_unr(np->ids);
131 if (cntlid == -1) {
132 sx_xunlock(&np->lock);
133 printf("NVMFT: Unable to allocate controller for %.*s\n",
134 (int)sizeof(data->hostnqn), data->hostnqn);
135 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC,
136 NVMF_FABRIC_SC_INVALID_HOST);
137 nvmft_qpair_destroy(qp);
138 return (ENOMEM);
139 }
140
141 #ifdef INVARIANTS
142 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
143 KASSERT(ctrlr->cntlid != cntlid,
144 ("%s: duplicate controllers with id %d", __func__, cntlid));
145 }
146 #endif
147
148 ctrlr = nvmft_controller_alloc(np, cntlid, data);
149 nvmft_printf(ctrlr, "associated with %.*s\n",
150 (int)sizeof(data->hostnqn), data->hostnqn);
151 ctrlr->admin = qp;
152 ctrlr->trtype = trtype;
153
154 /*
155 * The spec requires a non-zero KeepAlive timer, but allow a
156 * zero KATO value to match Linux.
157 */
158 kato = le32toh(cmd->kato);
159 if (kato != 0) {
160 /*
161 * Round up to 1 second matching granularity
162 * advertised in cdata.
163 */
164 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000));
165 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
166 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK);
167 }
168
169 nvmft_finish_accept(qp, cmd, ctrlr);
170 sx_xunlock(&np->lock);
171
172 return (0);
173 }
174
175 int
nvmft_handoff_io_queue(struct nvmft_port * np,enum nvmf_trtype trtype,const nvlist_t * params,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)176 nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
177 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
178 const struct nvmf_fabric_connect_data *data)
179 {
180 struct nvmft_controller *ctrlr;
181 struct nvmft_qpair *qp;
182 char name[16];
183 uint16_t cntlid, qid;
184
185 qid = le16toh(cmd->qid);
186 if (qid == 0)
187 return (EINVAL);
188 cntlid = le16toh(data->cntlid);
189
190 snprintf(name, sizeof(name), "I/O queue %u", qid);
191 qp = nvmft_qpair_init(trtype, params, qid, name);
192 if (qp == NULL) {
193 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid,
194 (int)sizeof(data->hostnqn), data->hostnqn);
195 return (ENXIO);
196 }
197
198 sx_slock(&np->lock);
199 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
200 if (ctrlr->cntlid == cntlid)
201 break;
202 }
203 if (ctrlr == NULL) {
204 sx_sunlock(&np->lock);
205 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n",
206 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn),
207 data->hostnqn);
208 nvmft_connect_invalid_parameters(qp, cmd, true,
209 offsetof(struct nvmf_fabric_connect_data, cntlid));
210 nvmft_qpair_destroy(qp);
211 return (ENOENT);
212 }
213
214 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) {
215 sx_sunlock(&np->lock);
216 nvmft_printf(ctrlr,
217 "hostid mismatch for I/O queue %u from %.*s\n", qid,
218 (int)sizeof(data->hostnqn), data->hostnqn);
219 nvmft_connect_invalid_parameters(qp, cmd, true,
220 offsetof(struct nvmf_fabric_connect_data, hostid));
221 nvmft_qpair_destroy(qp);
222 return (EINVAL);
223 }
224 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) {
225 sx_sunlock(&np->lock);
226 nvmft_printf(ctrlr,
227 "hostnqn mismatch for I/O queue %u from %.*s\n", qid,
228 (int)sizeof(data->hostnqn), data->hostnqn);
229 nvmft_connect_invalid_parameters(qp, cmd, true,
230 offsetof(struct nvmf_fabric_connect_data, hostnqn));
231 nvmft_qpair_destroy(qp);
232 return (EINVAL);
233 }
234
235 /* XXX: Require trtype == ctrlr->trtype? */
236
237 mtx_lock(&ctrlr->lock);
238 if (ctrlr->shutdown) {
239 mtx_unlock(&ctrlr->lock);
240 sx_sunlock(&np->lock);
241 nvmft_printf(ctrlr,
242 "attempt to create I/O queue %u on disabled controller from %.*s\n",
243 qid, (int)sizeof(data->hostnqn), data->hostnqn);
244 nvmft_connect_invalid_parameters(qp, cmd, true,
245 offsetof(struct nvmf_fabric_connect_data, cntlid));
246 nvmft_qpair_destroy(qp);
247 return (EINVAL);
248 }
249 if (ctrlr->num_io_queues == 0) {
250 mtx_unlock(&ctrlr->lock);
251 sx_sunlock(&np->lock);
252 nvmft_printf(ctrlr,
253 "attempt to create I/O queue %u without enabled queues from %.*s\n",
254 qid, (int)sizeof(data->hostnqn), data->hostnqn);
255 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
256 NVME_SC_COMMAND_SEQUENCE_ERROR);
257 nvmft_qpair_destroy(qp);
258 return (EINVAL);
259 }
260 if (cmd->qid > ctrlr->num_io_queues) {
261 mtx_unlock(&ctrlr->lock);
262 sx_sunlock(&np->lock);
263 nvmft_printf(ctrlr,
264 "attempt to create invalid I/O queue %u from %.*s\n", qid,
265 (int)sizeof(data->hostnqn), data->hostnqn);
266 nvmft_connect_invalid_parameters(qp, cmd, false,
267 offsetof(struct nvmf_fabric_connect_cmd, qid));
268 nvmft_qpair_destroy(qp);
269 return (EINVAL);
270 }
271 if (ctrlr->io_qpairs[qid - 1].qp != NULL) {
272 mtx_unlock(&ctrlr->lock);
273 sx_sunlock(&np->lock);
274 nvmft_printf(ctrlr,
275 "attempt to re-create I/O queue %u from %.*s\n", qid,
276 (int)sizeof(data->hostnqn), data->hostnqn);
277 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
278 NVME_SC_COMMAND_SEQUENCE_ERROR);
279 nvmft_qpair_destroy(qp);
280 return (EINVAL);
281 }
282
283 ctrlr->io_qpairs[qid - 1].qp = qp;
284 mtx_unlock(&ctrlr->lock);
285 nvmft_finish_accept(qp, cmd, ctrlr);
286 sx_sunlock(&np->lock);
287
288 return (0);
289 }
290
291 static void
nvmft_controller_shutdown(void * arg,int pending)292 nvmft_controller_shutdown(void *arg, int pending)
293 {
294 struct nvmft_controller *ctrlr = arg;
295
296 MPASS(pending == 1);
297
298 /*
299 * Shutdown all I/O queues to terminate pending datamoves and
300 * stop receiving new commands.
301 */
302 mtx_lock(&ctrlr->lock);
303 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
304 if (ctrlr->io_qpairs[i].qp != NULL) {
305 ctrlr->io_qpairs[i].shutdown = true;
306 mtx_unlock(&ctrlr->lock);
307 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp);
308 mtx_lock(&ctrlr->lock);
309 }
310 }
311 mtx_unlock(&ctrlr->lock);
312
313 /* Terminate active CTL commands. */
314 nvmft_terminate_commands(ctrlr);
315
316 /* Wait for all pending CTL commands to complete. */
317 mtx_lock(&ctrlr->lock);
318 while (ctrlr->pending_commands != 0)
319 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh",
320 hz / 100);
321 mtx_unlock(&ctrlr->lock);
322
323 /* Delete all of the I/O queues. */
324 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
325 if (ctrlr->io_qpairs[i].qp != NULL)
326 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp);
327 }
328 free(ctrlr->io_qpairs, M_NVMFT);
329 ctrlr->io_qpairs = NULL;
330
331 mtx_lock(&ctrlr->lock);
332 ctrlr->num_io_queues = 0;
333
334 /* Mark shutdown complete. */
335 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) {
336 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
337 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE);
338 }
339
340 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) {
341 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY);
342 ctrlr->shutdown = false;
343 }
344 mtx_unlock(&ctrlr->lock);
345
346 /*
347 * If the admin queue was closed while shutting down or a
348 * fatal controller error has occurred, terminate the
349 * association immediately, otherwise wait up to 2 minutes
350 * (NVMe-over-Fabrics 1.1 4.6).
351 */
352 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0)
353 nvmft_controller_terminate(ctrlr, 0);
354 else
355 taskqueue_enqueue_timeout(taskqueue_thread,
356 &ctrlr->terminate_task, hz * 60 * 2);
357 }
358
359 static void
nvmft_controller_terminate(void * arg,int pending)360 nvmft_controller_terminate(void *arg, int pending)
361 {
362 struct nvmft_controller *ctrlr = arg;
363 struct nvmft_port *np;
364 bool wakeup_np;
365
366 /* If the controller has been re-enabled, nothing to do. */
367 mtx_lock(&ctrlr->lock);
368 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) {
369 mtx_unlock(&ctrlr->lock);
370
371 if (ctrlr->ka_sbt != 0)
372 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
373 C_HARDCLOCK);
374 return;
375 }
376
377 /* Disable updates to CC while destroying admin qpair. */
378 ctrlr->shutdown = true;
379 mtx_unlock(&ctrlr->lock);
380
381 nvmft_qpair_destroy(ctrlr->admin);
382
383 /* Remove association (CNTLID). */
384 np = ctrlr->np;
385 sx_xlock(&np->lock);
386 TAILQ_REMOVE(&np->controllers, ctrlr, link);
387 free_unr(np->ids, ctrlr->cntlid);
388 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers));
389 sx_xunlock(&np->lock);
390 if (wakeup_np)
391 wakeup(np);
392
393 callout_drain(&ctrlr->ka_timer);
394
395 nvmft_printf(ctrlr, "association terminated\n");
396 nvmft_controller_free(ctrlr);
397 nvmft_port_rele(np);
398 }
399
400 void
nvmft_controller_error(struct nvmft_controller * ctrlr,struct nvmft_qpair * qp,int error)401 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp,
402 int error)
403 {
404 /*
405 * If a queue pair is closed, that isn't an error per se.
406 * That just means additional commands cannot be received on
407 * that queue pair.
408 *
409 * If the admin queue pair is closed while idle or while
410 * shutting down, terminate the association immediately.
411 *
412 * If an I/O queue pair is closed, just ignore it.
413 */
414 if (error == 0) {
415 if (qp != ctrlr->admin)
416 return;
417
418 mtx_lock(&ctrlr->lock);
419 if (ctrlr->shutdown) {
420 ctrlr->admin_closed = true;
421 mtx_unlock(&ctrlr->lock);
422 return;
423 }
424
425 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) {
426 MPASS(ctrlr->num_io_queues == 0);
427 mtx_unlock(&ctrlr->lock);
428
429 /*
430 * Ok to drop lock here since ctrlr->cc can't
431 * change if the admin queue pair has closed.
432 * This also means no new queues can be handed
433 * off, etc. Note that since there are no I/O
434 * queues, only the admin queue needs to be
435 * destroyed, so it is safe to skip
436 * nvmft_controller_shutdown and just schedule
437 * nvmft_controller_terminate. Note that we
438 * cannot call nvmft_controller_terminate from
439 * here directly as this is called from the
440 * transport layer and freeing the admin qpair
441 * might deadlock waiting for the current
442 * thread to exit.
443 */
444 if (taskqueue_cancel_timeout(taskqueue_thread,
445 &ctrlr->terminate_task, NULL) == 0)
446 taskqueue_enqueue_timeout(taskqueue_thread,
447 &ctrlr->terminate_task, 0);
448 return;
449 }
450
451 /*
452 * Treat closing of the admin queue pair while enabled
453 * as a transport error. Note that the admin queue
454 * pair has been closed.
455 */
456 ctrlr->admin_closed = true;
457 } else
458 mtx_lock(&ctrlr->lock);
459
460 /* Ignore transport errors if we are already shutting down. */
461 if (ctrlr->shutdown) {
462 mtx_unlock(&ctrlr->lock);
463 return;
464 }
465
466 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1);
467 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
468 ctrlr->shutdown = true;
469 mtx_unlock(&ctrlr->lock);
470
471 callout_stop(&ctrlr->ka_timer);
472 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
473 }
474
475 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */
476 static struct mbuf *
m_getml(size_t len,int how)477 m_getml(size_t len, int how)
478 {
479 struct mbuf *m, *n;
480
481 m = m_getm2(NULL, len, how, MT_DATA, 0);
482 if (m == NULL)
483 return (NULL);
484 for (n = m; len > 0; n = n->m_next) {
485 n->m_len = M_SIZE(n);
486 if (n->m_len >= len) {
487 n->m_len = len;
488 MPASS(n->m_next == NULL);
489 }
490 len -= n->m_len;
491 }
492 return (m);
493 }
494
495 static void
m_zero(struct mbuf * m,u_int offset,u_int len)496 m_zero(struct mbuf *m, u_int offset, u_int len)
497 {
498 u_int todo;
499
500 if (len == 0)
501 return;
502
503 while (m->m_len <= offset) {
504 offset -= m->m_len;
505 m = m->m_next;
506 }
507
508 todo = m->m_len - offset;
509 if (todo > len)
510 todo = len;
511 memset(mtodo(m, offset), 0, todo);
512 m = m->m_next;
513 len -= todo;
514
515 while (len > 0) {
516 todo = m->m_len;
517 if (todo > len)
518 todo = len;
519 memset(mtod(m, void *), 0, todo);
520 m = m->m_next;
521 len -= todo;
522 }
523 }
524
525 static void
handle_get_log_page(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)526 handle_get_log_page(struct nvmft_controller *ctrlr,
527 struct nvmf_capsule *nc, const struct nvme_command *cmd)
528 {
529 struct mbuf *m;
530 uint64_t offset;
531 uint32_t numd;
532 size_t len, todo;
533 u_int status;
534 uint8_t lid;
535 bool rae;
536
537 lid = le32toh(cmd->cdw10) & 0xff;
538 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0;
539 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
540 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
541
542 if (offset % 3 != 0) {
543 status = NVME_SC_INVALID_FIELD;
544 goto done;
545 }
546
547 len = (numd + 1) * 4;
548
549 switch (lid) {
550 case NVME_LOG_ERROR:
551 todo = 0;
552
553 m = m_getml(len, M_WAITOK);
554 if (todo != len)
555 m_zero(m, todo, len - todo);
556 status = nvmf_send_controller_data(nc, 0, m, len);
557 MPASS(status != NVMF_MORE);
558 break;
559 case NVME_LOG_HEALTH_INFORMATION:
560 {
561 struct nvme_health_information_page hip;
562
563 if (offset >= sizeof(hip)) {
564 status = NVME_SC_INVALID_FIELD;
565 goto done;
566 }
567 todo = sizeof(hip) - offset;
568 if (todo > len)
569 todo = len;
570
571 mtx_lock(&ctrlr->lock);
572 hip = ctrlr->hip;
573 hip.controller_busy_time[0] =
574 sbintime_getsec(ctrlr->busy_total) / 60;
575 hip.power_on_hours[0] =
576 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600;
577 mtx_unlock(&ctrlr->lock);
578
579 m = m_getml(len, M_WAITOK);
580 m_copyback(m, 0, todo, (char *)&hip + offset);
581 if (todo != len)
582 m_zero(m, todo, len - todo);
583 status = nvmf_send_controller_data(nc, 0, m, len);
584 MPASS(status != NVMF_MORE);
585 break;
586 }
587 case NVME_LOG_FIRMWARE_SLOT:
588 if (offset >= sizeof(ctrlr->np->fp)) {
589 status = NVME_SC_INVALID_FIELD;
590 goto done;
591 }
592 todo = sizeof(ctrlr->np->fp) - offset;
593 if (todo > len)
594 todo = len;
595
596 m = m_getml(len, M_WAITOK);
597 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset);
598 if (todo != len)
599 m_zero(m, todo, len - todo);
600 status = nvmf_send_controller_data(nc, 0, m, len);
601 MPASS(status != NVMF_MORE);
602 break;
603 case NVME_LOG_CHANGED_NAMESPACE:
604 if (offset >= sizeof(*ctrlr->changed_ns)) {
605 status = NVME_SC_INVALID_FIELD;
606 goto done;
607 }
608 todo = sizeof(*ctrlr->changed_ns) - offset;
609 if (todo > len)
610 todo = len;
611
612 m = m_getml(len, M_WAITOK);
613 mtx_lock(&ctrlr->lock);
614 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset);
615 if (offset == 0 && len == sizeof(*ctrlr->changed_ns))
616 memset(ctrlr->changed_ns, 0,
617 sizeof(*ctrlr->changed_ns));
618 if (!rae)
619 ctrlr->changed_ns_reported = false;
620 mtx_unlock(&ctrlr->lock);
621 if (todo != len)
622 m_zero(m, todo, len - todo);
623 status = nvmf_send_controller_data(nc, 0, m, len);
624 MPASS(status != NVMF_MORE);
625 break;
626 default:
627 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n",
628 lid);
629 status = NVME_SC_INVALID_FIELD;
630 break;
631 }
632
633 done:
634 if (status == NVMF_SUCCESS_SENT)
635 nvmft_command_completed(ctrlr->admin, nc);
636 else
637 nvmft_send_generic_error(ctrlr->admin, nc, status);
638 nvmf_free_capsule(nc);
639 }
640
641 static void
m_free_nslist(struct mbuf * m)642 m_free_nslist(struct mbuf *m)
643 {
644 free(m->m_ext.ext_arg1, M_NVMFT);
645 }
646
647 static void
handle_identify_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)648 handle_identify_command(struct nvmft_controller *ctrlr,
649 struct nvmf_capsule *nc, const struct nvme_command *cmd)
650 {
651 struct mbuf *m;
652 size_t data_len;
653 u_int status;
654 uint8_t cns;
655
656 cns = le32toh(cmd->cdw10) & 0xFF;
657 data_len = nvmf_capsule_data_len(nc);
658 if (data_len != sizeof(ctrlr->cdata)) {
659 nvmft_printf(ctrlr,
660 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len,
661 cns);
662 nvmft_send_generic_error(ctrlr->admin, nc,
663 NVME_SC_INVALID_OPCODE);
664 nvmf_free_capsule(nc);
665 return;
666 }
667
668 switch (cns) {
669 case 0: /* Namespace data. */
670 case 3: /* Namespace Identification Descriptor list. */
671 nvmft_dispatch_command(ctrlr->admin, nc, true);
672 return;
673 case 1:
674 /* Controller data. */
675 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK);
676 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata);
677 status = nvmf_send_controller_data(nc, 0, m,
678 sizeof(ctrlr->cdata));
679 MPASS(status != NVMF_MORE);
680 break;
681 case 2:
682 {
683 /* Active namespace list. */
684 struct nvme_ns_list *nslist;
685 uint32_t nsid;
686
687 nsid = le32toh(cmd->nsid);
688 if (nsid >= 0xfffffffe) {
689 status = NVME_SC_INVALID_FIELD;
690 break;
691 }
692
693 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO);
694 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist);
695 m = m_get(M_WAITOK, MT_DATA);
696 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist,
697 nslist, NULL, 0, EXT_CTL);
698 m->m_len = sizeof(*nslist);
699 status = nvmf_send_controller_data(nc, 0, m, m->m_len);
700 MPASS(status != NVMF_MORE);
701 break;
702 }
703 default:
704 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns);
705 status = NVME_SC_INVALID_FIELD;
706 break;
707 }
708
709 if (status == NVMF_SUCCESS_SENT)
710 nvmft_command_completed(ctrlr->admin, nc);
711 else
712 nvmft_send_generic_error(ctrlr->admin, nc, status);
713 nvmf_free_capsule(nc);
714 }
715
716 static void
handle_set_features(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)717 handle_set_features(struct nvmft_controller *ctrlr,
718 struct nvmf_capsule *nc, const struct nvme_command *cmd)
719 {
720 struct nvme_completion cqe;
721 uint8_t fid;
722
723 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
724 switch (fid) {
725 case NVME_FEAT_NUMBER_OF_QUEUES:
726 {
727 uint32_t num_queues;
728 struct nvmft_io_qpair *io_qpairs;
729
730 num_queues = le32toh(cmd->cdw11) & 0xffff;
731
732 /* 5.12.1.7: 65535 is invalid. */
733 if (num_queues == 65535)
734 goto error;
735
736 /* Fabrics requires the same number of SQs and CQs. */
737 if (le32toh(cmd->cdw11) >> 16 != num_queues)
738 goto error;
739
740 /* Convert to 1's based */
741 num_queues++;
742
743 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs),
744 M_NVMFT, M_WAITOK | M_ZERO);
745
746 mtx_lock(&ctrlr->lock);
747 if (ctrlr->num_io_queues != 0) {
748 mtx_unlock(&ctrlr->lock);
749 free(io_qpairs, M_NVMFT);
750 nvmft_send_generic_error(ctrlr->admin, nc,
751 NVME_SC_COMMAND_SEQUENCE_ERROR);
752 nvmf_free_capsule(nc);
753 return;
754 }
755
756 ctrlr->num_io_queues = num_queues;
757 ctrlr->io_qpairs = io_qpairs;
758 mtx_unlock(&ctrlr->lock);
759
760 nvmft_init_cqe(&cqe, nc, 0);
761 cqe.cdw0 = cmd->cdw11;
762 nvmft_send_response(ctrlr->admin, &cqe);
763 nvmf_free_capsule(nc);
764 return;
765 }
766 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
767 {
768 uint32_t aer_mask;
769
770 aer_mask = le32toh(cmd->cdw11);
771
772 /* Check for any reserved or unimplemented feature bits. */
773 if ((aer_mask & 0xffffc000) != 0)
774 goto error;
775
776 mtx_lock(&ctrlr->lock);
777 ctrlr->aer_mask = aer_mask;
778 mtx_unlock(&ctrlr->lock);
779 nvmft_send_success(ctrlr->admin, nc);
780 return;
781 }
782 default:
783 nvmft_printf(ctrlr,
784 "Unsupported feature ID %u for SET_FEATURES\n", fid);
785 goto error;
786 }
787
788 error:
789 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
790 nvmf_free_capsule(nc);
791 }
792
793 static bool
update_cc(struct nvmft_controller * ctrlr,uint32_t new_cc,bool * need_shutdown)794 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown)
795 {
796 struct nvmft_port *np = ctrlr->np;
797 uint32_t changes;
798
799 *need_shutdown = false;
800
801 mtx_lock(&ctrlr->lock);
802
803 /* Don't allow any changes while shutting down. */
804 if (ctrlr->shutdown) {
805 mtx_unlock(&ctrlr->lock);
806 return (false);
807 }
808
809 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) {
810 mtx_unlock(&ctrlr->lock);
811 return (false);
812 }
813
814 changes = ctrlr->cc ^ new_cc;
815 ctrlr->cc = new_cc;
816
817 /* Handle shutdown requests. */
818 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 &&
819 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) {
820 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
821 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING);
822 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
823 ctrlr->shutdown = true;
824 *need_shutdown = true;
825 nvmft_printf(ctrlr, "shutdown requested\n");
826 }
827
828 if (NVMEV(NVME_CC_REG_EN, changes) != 0) {
829 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) {
830 /* Controller reset. */
831 nvmft_printf(ctrlr, "reset requested\n");
832 ctrlr->shutdown = true;
833 *need_shutdown = true;
834 } else
835 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1);
836 }
837 mtx_unlock(&ctrlr->lock);
838
839 return (true);
840 }
841
842 static void
handle_property_get(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_get_cmd * pget)843 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
844 const struct nvmf_fabric_prop_get_cmd *pget)
845 {
846 struct nvmf_fabric_prop_get_rsp rsp;
847
848 nvmft_init_cqe(&rsp, nc, 0);
849
850 switch (le32toh(pget->ofst)) {
851 case NVMF_PROP_CAP:
852 if (pget->attrib.size != NVMF_PROP_SIZE_8)
853 goto error;
854 rsp.value.u64 = htole64(ctrlr->np->cap);
855 break;
856 case NVMF_PROP_VS:
857 if (pget->attrib.size != NVMF_PROP_SIZE_4)
858 goto error;
859 rsp.value.u32.low = ctrlr->cdata.ver;
860 break;
861 case NVMF_PROP_CC:
862 if (pget->attrib.size != NVMF_PROP_SIZE_4)
863 goto error;
864 rsp.value.u32.low = htole32(ctrlr->cc);
865 break;
866 case NVMF_PROP_CSTS:
867 if (pget->attrib.size != NVMF_PROP_SIZE_4)
868 goto error;
869 rsp.value.u32.low = htole32(ctrlr->csts);
870 break;
871 default:
872 goto error;
873 }
874
875 nvmft_send_response(ctrlr->admin, &rsp);
876 return;
877 error:
878 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
879 }
880
881 static void
handle_property_set(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_set_cmd * pset)882 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
883 const struct nvmf_fabric_prop_set_cmd *pset)
884 {
885 bool need_shutdown;
886
887 need_shutdown = false;
888 switch (le32toh(pset->ofst)) {
889 case NVMF_PROP_CC:
890 if (pset->attrib.size != NVMF_PROP_SIZE_4)
891 goto error;
892 if (!update_cc(ctrlr, le32toh(pset->value.u32.low),
893 &need_shutdown))
894 goto error;
895 break;
896 default:
897 goto error;
898 }
899
900 nvmft_send_success(ctrlr->admin, nc);
901 if (need_shutdown) {
902 callout_stop(&ctrlr->ka_timer);
903 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
904 }
905 return;
906 error:
907 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
908 }
909
910 static void
handle_admin_fabrics_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_cmd * fc)911 handle_admin_fabrics_command(struct nvmft_controller *ctrlr,
912 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc)
913 {
914 switch (fc->fctype) {
915 case NVMF_FABRIC_COMMAND_PROPERTY_GET:
916 handle_property_get(ctrlr, nc,
917 (const struct nvmf_fabric_prop_get_cmd *)fc);
918 break;
919 case NVMF_FABRIC_COMMAND_PROPERTY_SET:
920 handle_property_set(ctrlr, nc,
921 (const struct nvmf_fabric_prop_set_cmd *)fc);
922 break;
923 case NVMF_FABRIC_COMMAND_CONNECT:
924 nvmft_printf(ctrlr,
925 "CONNECT command on connected admin queue\n");
926 nvmft_send_generic_error(ctrlr->admin, nc,
927 NVME_SC_COMMAND_SEQUENCE_ERROR);
928 break;
929 case NVMF_FABRIC_COMMAND_DISCONNECT:
930 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n");
931 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC,
932 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE);
933 break;
934 default:
935 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n",
936 fc->fctype);
937 nvmft_send_generic_error(ctrlr->admin, nc,
938 NVME_SC_INVALID_OPCODE);
939 break;
940 }
941 nvmf_free_capsule(nc);
942 }
943
944 void
nvmft_handle_admin_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc)945 nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
946 struct nvmf_capsule *nc)
947 {
948 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
949
950 /* Only permit Fabrics commands while a controller is disabled. */
951 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 &&
952 cmd->opc != NVME_OPC_FABRICS_COMMANDS) {
953 nvmft_printf(ctrlr,
954 "Unsupported admin opcode %#x while disabled\n", cmd->opc);
955 nvmft_send_generic_error(ctrlr->admin, nc,
956 NVME_SC_COMMAND_SEQUENCE_ERROR);
957 nvmf_free_capsule(nc);
958 return;
959 }
960
961 atomic_store_int(&ctrlr->ka_active_traffic, 1);
962
963 switch (cmd->opc) {
964 case NVME_OPC_GET_LOG_PAGE:
965 handle_get_log_page(ctrlr, nc, cmd);
966 break;
967 case NVME_OPC_IDENTIFY:
968 handle_identify_command(ctrlr, nc, cmd);
969 break;
970 case NVME_OPC_SET_FEATURES:
971 handle_set_features(ctrlr, nc, cmd);
972 break;
973 case NVME_OPC_ASYNC_EVENT_REQUEST:
974 mtx_lock(&ctrlr->lock);
975 if (ctrlr->aer_pending == NVMFT_NUM_AER) {
976 mtx_unlock(&ctrlr->lock);
977 nvmft_send_error(ctrlr->admin, nc,
978 NVME_SCT_COMMAND_SPECIFIC,
979 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
980 } else {
981 /* NB: Store the CID without byte-swapping. */
982 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid;
983 ctrlr->aer_pending++;
984 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER;
985 mtx_unlock(&ctrlr->lock);
986 }
987 nvmf_free_capsule(nc);
988 break;
989 case NVME_OPC_KEEP_ALIVE:
990 nvmft_send_success(ctrlr->admin, nc);
991 nvmf_free_capsule(nc);
992 break;
993 case NVME_OPC_FABRICS_COMMANDS:
994 handle_admin_fabrics_command(ctrlr, nc,
995 (const struct nvmf_fabric_cmd *)cmd);
996 break;
997 default:
998 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc);
999 nvmft_send_generic_error(ctrlr->admin, nc,
1000 NVME_SC_INVALID_OPCODE);
1001 nvmf_free_capsule(nc);
1002 break;
1003 }
1004 }
1005
1006 void
nvmft_handle_io_command(struct nvmft_qpair * qp,uint16_t qid,struct nvmf_capsule * nc)1007 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid,
1008 struct nvmf_capsule *nc)
1009 {
1010 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
1011 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
1012
1013 atomic_store_int(&ctrlr->ka_active_traffic, 1);
1014
1015 switch (cmd->opc) {
1016 case NVME_OPC_FLUSH:
1017 if (cmd->nsid == htole32(0xffffffff)) {
1018 nvmft_send_generic_error(qp, nc,
1019 NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
1020 nvmf_free_capsule(nc);
1021 break;
1022 }
1023 /* FALLTHROUGH */
1024 case NVME_OPC_WRITE:
1025 case NVME_OPC_READ:
1026 case NVME_OPC_WRITE_UNCORRECTABLE:
1027 case NVME_OPC_COMPARE:
1028 case NVME_OPC_WRITE_ZEROES:
1029 case NVME_OPC_DATASET_MANAGEMENT:
1030 case NVME_OPC_VERIFY:
1031 nvmft_dispatch_command(qp, nc, false);
1032 break;
1033 default:
1034 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc);
1035 nvmft_send_generic_error(qp, nc,
1036 NVME_SC_INVALID_OPCODE);
1037 nvmf_free_capsule(nc);
1038 break;
1039 }
1040 }
1041
1042 static void
nvmft_report_aer(struct nvmft_controller * ctrlr,uint32_t aer_mask,u_int type,uint8_t info,uint8_t log_page_id)1043 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask,
1044 u_int type, uint8_t info, uint8_t log_page_id)
1045 {
1046 struct nvme_completion cpl;
1047
1048 MPASS(type <= 7);
1049
1050 /* Drop events that are not enabled. */
1051 mtx_lock(&ctrlr->lock);
1052 if ((ctrlr->aer_mask & aer_mask) == 0) {
1053 mtx_unlock(&ctrlr->lock);
1054 return;
1055 }
1056
1057 /*
1058 * If there is no pending AER command, drop it.
1059 * XXX: Should we queue these?
1060 */
1061 if (ctrlr->aer_pending == 0) {
1062 mtx_unlock(&ctrlr->lock);
1063 nvmft_printf(ctrlr,
1064 "dropping AER type %u, info %#x, page %#x\n",
1065 type, info, log_page_id);
1066 return;
1067 }
1068
1069 memset(&cpl, 0, sizeof(cpl));
1070 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx];
1071 ctrlr->aer_pending--;
1072 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER;
1073 mtx_unlock(&ctrlr->lock);
1074
1075 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) |
1076 NVMEF(NVME_ASYNC_EVENT_INFO, info) |
1077 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id));
1078
1079 nvmft_send_response(ctrlr->admin, &cpl);
1080 }
1081
1082 void
nvmft_controller_lun_changed(struct nvmft_controller * ctrlr,int lun_id)1083 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id)
1084 {
1085 struct nvme_ns_list *nslist;
1086 uint32_t new_nsid, nsid;
1087 u_int i;
1088
1089 new_nsid = lun_id + 1;
1090
1091 mtx_lock(&ctrlr->lock);
1092 nslist = ctrlr->changed_ns;
1093
1094 /* If the first entry is 0xffffffff, the list is already full. */
1095 if (nslist->ns[0] != 0xffffffff) {
1096 /* Find the insertion point for this namespace ID. */
1097 for (i = 0; i < nitems(nslist->ns); i++) {
1098 nsid = le32toh(nslist->ns[i]);
1099 if (nsid == new_nsid) {
1100 /* Already reported, nothing to do. */
1101 mtx_unlock(&ctrlr->lock);
1102 return;
1103 }
1104
1105 if (nsid == 0 || nsid > new_nsid)
1106 break;
1107 }
1108
1109 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) {
1110 /* List is full. */
1111 memset(ctrlr->changed_ns, 0,
1112 sizeof(*ctrlr->changed_ns));
1113 ctrlr->changed_ns->ns[0] = 0xffffffff;
1114 } else if (nslist->ns[i] == htole32(0)) {
1115 /*
1116 * Optimize case where this ID is appended to
1117 * the end.
1118 */
1119 nslist->ns[i] = htole32(new_nsid);
1120 } else {
1121 memmove(&nslist->ns[i + 1], &nslist->ns[i],
1122 (nitems(nslist->ns) - i - 1) *
1123 sizeof(nslist->ns[0]));
1124 nslist->ns[i] = htole32(new_nsid);
1125 }
1126 }
1127
1128 if (ctrlr->changed_ns_reported) {
1129 mtx_unlock(&ctrlr->lock);
1130 return;
1131 }
1132 ctrlr->changed_ns_reported = true;
1133 mtx_unlock(&ctrlr->lock);
1134
1135 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0,
1136 NVME_LOG_CHANGED_NAMESPACE);
1137 }
1138