1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/param.h>
9 #include <sys/callout.h>
10 #include <sys/kernel.h>
11 #include <sys/lock.h>
12 #include <sys/malloc.h>
13 #include <sys/mbuf.h>
14 #include <sys/memdesc.h>
15 #include <sys/mutex.h>
16 #include <sys/sbuf.h>
17 #include <sys/sx.h>
18 #include <sys/taskqueue.h>
19
20 #include <dev/nvmf/nvmf_transport.h>
21 #include <dev/nvmf/controller/nvmft_subr.h>
22 #include <dev/nvmf/controller/nvmft_var.h>
23
24 static void nvmft_controller_shutdown(void *arg, int pending);
25 static void nvmft_controller_terminate(void *arg, int pending);
26
27 int
nvmft_printf(struct nvmft_controller * ctrlr,const char * fmt,...)28 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...)
29 {
30 char buf[128];
31 struct sbuf sb;
32 va_list ap;
33 size_t retval;
34
35 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN);
36 sbuf_set_drain(&sb, sbuf_printf_drain, &retval);
37
38 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid);
39
40 va_start(ap, fmt);
41 sbuf_vprintf(&sb, fmt, ap);
42 va_end(ap);
43
44 sbuf_finish(&sb);
45 sbuf_delete(&sb);
46
47 return (retval);
48 }
49
50 static struct nvmft_controller *
nvmft_controller_alloc(struct nvmft_port * np,uint16_t cntlid,const struct nvmf_fabric_connect_data * data)51 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid,
52 const struct nvmf_fabric_connect_data *data)
53 {
54 struct nvmft_controller *ctrlr;
55
56 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO);
57 ctrlr->cntlid = cntlid;
58 nvmft_port_ref(np);
59 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link);
60 ctrlr->np = np;
61 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF);
62 callout_init(&ctrlr->ka_timer, 1);
63 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr);
64 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0,
65 nvmft_controller_terminate, ctrlr);
66
67 ctrlr->cdata = np->cdata;
68 ctrlr->cdata.ctrlr_id = htole16(cntlid);
69 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid));
70 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn));
71 ctrlr->hip.power_cycles[0] = 1;
72 ctrlr->create_time = sbinuptime();
73
74 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT,
75 M_WAITOK | M_ZERO);
76
77 return (ctrlr);
78 }
79
80 static void
nvmft_controller_free(struct nvmft_controller * ctrlr)81 nvmft_controller_free(struct nvmft_controller *ctrlr)
82 {
83 mtx_destroy(&ctrlr->lock);
84 MPASS(ctrlr->io_qpairs == NULL);
85 free(ctrlr->changed_ns, M_NVMFT);
86 free(ctrlr, M_NVMFT);
87 }
88
89 static void
nvmft_keep_alive_timer(void * arg)90 nvmft_keep_alive_timer(void *arg)
91 {
92 struct nvmft_controller *ctrlr = arg;
93 int traffic;
94
95 if (ctrlr->shutdown)
96 return;
97
98 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic);
99 if (traffic == 0) {
100 nvmft_printf(ctrlr,
101 "disconnecting due to KeepAlive timeout\n");
102 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT);
103 return;
104 }
105
106 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK);
107 }
108
109 int
nvmft_handoff_admin_queue(struct nvmft_port * np,const struct nvmf_handoff_controller_qpair * handoff,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)110 nvmft_handoff_admin_queue(struct nvmft_port *np,
111 const struct nvmf_handoff_controller_qpair *handoff,
112 const struct nvmf_fabric_connect_cmd *cmd,
113 const struct nvmf_fabric_connect_data *data)
114 {
115 struct nvmft_controller *ctrlr;
116 struct nvmft_qpair *qp;
117 uint32_t kato;
118 int cntlid;
119
120 if (cmd->qid != htole16(0))
121 return (EINVAL);
122
123 qp = nvmft_qpair_init(handoff->trtype, &handoff->params, 0,
124 "admin queue");
125 if (qp == NULL) {
126 printf("NVMFT: Failed to setup admin queue from %.*s\n",
127 (int)sizeof(data->hostnqn), data->hostnqn);
128 return (ENXIO);
129 }
130
131 sx_xlock(&np->lock);
132 cntlid = alloc_unr(np->ids);
133 if (cntlid == -1) {
134 sx_xunlock(&np->lock);
135 printf("NVMFT: Unable to allocate controller for %.*s\n",
136 (int)sizeof(data->hostnqn), data->hostnqn);
137 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC,
138 NVMF_FABRIC_SC_INVALID_HOST);
139 nvmft_qpair_destroy(qp);
140 return (ENOMEM);
141 }
142
143 #ifdef INVARIANTS
144 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
145 KASSERT(ctrlr->cntlid != cntlid,
146 ("%s: duplicate controllers with id %d", __func__, cntlid));
147 }
148 #endif
149
150 ctrlr = nvmft_controller_alloc(np, cntlid, data);
151 nvmft_printf(ctrlr, "associated with %.*s\n",
152 (int)sizeof(data->hostnqn), data->hostnqn);
153 ctrlr->admin = qp;
154 ctrlr->trtype = handoff->trtype;
155
156 /*
157 * The spec requires a non-zero KeepAlive timer, but allow a
158 * zero KATO value to match Linux.
159 */
160 kato = le32toh(cmd->kato);
161 if (kato != 0) {
162 /*
163 * Round up to 1 second matching granularity
164 * advertised in cdata.
165 */
166 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000));
167 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
168 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK);
169 }
170
171 nvmft_finish_accept(qp, cmd, ctrlr);
172 sx_xunlock(&np->lock);
173
174 return (0);
175 }
176
177 int
nvmft_handoff_io_queue(struct nvmft_port * np,const struct nvmf_handoff_controller_qpair * handoff,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)178 nvmft_handoff_io_queue(struct nvmft_port *np,
179 const struct nvmf_handoff_controller_qpair *handoff,
180 const struct nvmf_fabric_connect_cmd *cmd,
181 const struct nvmf_fabric_connect_data *data)
182 {
183 struct nvmft_controller *ctrlr;
184 struct nvmft_qpair *qp;
185 char name[16];
186 uint16_t cntlid, qid;
187
188 qid = le16toh(cmd->qid);
189 if (qid == 0)
190 return (EINVAL);
191 cntlid = le16toh(data->cntlid);
192
193 snprintf(name, sizeof(name), "I/O queue %u", qid);
194 qp = nvmft_qpair_init(handoff->trtype, &handoff->params, qid, name);
195 if (qp == NULL) {
196 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid,
197 (int)sizeof(data->hostnqn), data->hostnqn);
198 return (ENXIO);
199 }
200
201 sx_slock(&np->lock);
202 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
203 if (ctrlr->cntlid == cntlid)
204 break;
205 }
206 if (ctrlr == NULL) {
207 sx_sunlock(&np->lock);
208 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n",
209 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn),
210 data->hostnqn);
211 nvmft_connect_invalid_parameters(qp, cmd, true,
212 offsetof(struct nvmf_fabric_connect_data, cntlid));
213 nvmft_qpair_destroy(qp);
214 return (ENOENT);
215 }
216
217 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) {
218 sx_sunlock(&np->lock);
219 nvmft_printf(ctrlr,
220 "hostid mismatch for I/O queue %u from %.*s\n", qid,
221 (int)sizeof(data->hostnqn), data->hostnqn);
222 nvmft_connect_invalid_parameters(qp, cmd, true,
223 offsetof(struct nvmf_fabric_connect_data, hostid));
224 nvmft_qpair_destroy(qp);
225 return (EINVAL);
226 }
227 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) {
228 sx_sunlock(&np->lock);
229 nvmft_printf(ctrlr,
230 "hostnqn mismatch for I/O queue %u from %.*s\n", qid,
231 (int)sizeof(data->hostnqn), data->hostnqn);
232 nvmft_connect_invalid_parameters(qp, cmd, true,
233 offsetof(struct nvmf_fabric_connect_data, hostnqn));
234 nvmft_qpair_destroy(qp);
235 return (EINVAL);
236 }
237
238 /* XXX: Require handoff->trtype == ctrlr->trtype? */
239
240 mtx_lock(&ctrlr->lock);
241 if (ctrlr->shutdown) {
242 mtx_unlock(&ctrlr->lock);
243 sx_sunlock(&np->lock);
244 nvmft_printf(ctrlr,
245 "attempt to create I/O queue %u on disabled controller from %.*s\n",
246 qid, (int)sizeof(data->hostnqn), data->hostnqn);
247 nvmft_connect_invalid_parameters(qp, cmd, true,
248 offsetof(struct nvmf_fabric_connect_data, cntlid));
249 nvmft_qpair_destroy(qp);
250 return (EINVAL);
251 }
252 if (ctrlr->num_io_queues == 0) {
253 mtx_unlock(&ctrlr->lock);
254 sx_sunlock(&np->lock);
255 nvmft_printf(ctrlr,
256 "attempt to create I/O queue %u without enabled queues from %.*s\n",
257 qid, (int)sizeof(data->hostnqn), data->hostnqn);
258 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
259 NVME_SC_COMMAND_SEQUENCE_ERROR);
260 nvmft_qpair_destroy(qp);
261 return (EINVAL);
262 }
263 if (cmd->qid > ctrlr->num_io_queues) {
264 mtx_unlock(&ctrlr->lock);
265 sx_sunlock(&np->lock);
266 nvmft_printf(ctrlr,
267 "attempt to create invalid I/O queue %u from %.*s\n", qid,
268 (int)sizeof(data->hostnqn), data->hostnqn);
269 nvmft_connect_invalid_parameters(qp, cmd, false,
270 offsetof(struct nvmf_fabric_connect_cmd, qid));
271 nvmft_qpair_destroy(qp);
272 return (EINVAL);
273 }
274 if (ctrlr->io_qpairs[qid - 1].qp != NULL) {
275 mtx_unlock(&ctrlr->lock);
276 sx_sunlock(&np->lock);
277 nvmft_printf(ctrlr,
278 "attempt to re-create I/O queue %u from %.*s\n", qid,
279 (int)sizeof(data->hostnqn), data->hostnqn);
280 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
281 NVME_SC_COMMAND_SEQUENCE_ERROR);
282 nvmft_qpair_destroy(qp);
283 return (EINVAL);
284 }
285
286 ctrlr->io_qpairs[qid - 1].qp = qp;
287 mtx_unlock(&ctrlr->lock);
288 nvmft_finish_accept(qp, cmd, ctrlr);
289 sx_sunlock(&np->lock);
290
291 return (0);
292 }
293
294 static void
nvmft_controller_shutdown(void * arg,int pending)295 nvmft_controller_shutdown(void *arg, int pending)
296 {
297 struct nvmft_controller *ctrlr = arg;
298
299 MPASS(pending == 1);
300
301 /*
302 * Shutdown all I/O queues to terminate pending datamoves and
303 * stop receiving new commands.
304 */
305 mtx_lock(&ctrlr->lock);
306 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
307 if (ctrlr->io_qpairs[i].qp != NULL) {
308 ctrlr->io_qpairs[i].shutdown = true;
309 mtx_unlock(&ctrlr->lock);
310 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp);
311 mtx_lock(&ctrlr->lock);
312 }
313 }
314 mtx_unlock(&ctrlr->lock);
315
316 /* Terminate active CTL commands. */
317 nvmft_terminate_commands(ctrlr);
318
319 /* Wait for all pending CTL commands to complete. */
320 mtx_lock(&ctrlr->lock);
321 while (ctrlr->pending_commands != 0)
322 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh",
323 hz / 100);
324 mtx_unlock(&ctrlr->lock);
325
326 /* Delete all of the I/O queues. */
327 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
328 if (ctrlr->io_qpairs[i].qp != NULL)
329 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp);
330 }
331 free(ctrlr->io_qpairs, M_NVMFT);
332 ctrlr->io_qpairs = NULL;
333
334 mtx_lock(&ctrlr->lock);
335 ctrlr->num_io_queues = 0;
336
337 /* Mark shutdown complete. */
338 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) {
339 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
340 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE);
341 }
342
343 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) {
344 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY);
345 ctrlr->shutdown = false;
346 }
347 mtx_unlock(&ctrlr->lock);
348
349 /*
350 * If the admin queue was closed while shutting down or a
351 * fatal controller error has occurred, terminate the
352 * association immediately, otherwise wait up to 2 minutes
353 * (NVMe-over-Fabrics 1.1 4.6).
354 */
355 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0)
356 nvmft_controller_terminate(ctrlr, 0);
357 else
358 taskqueue_enqueue_timeout(taskqueue_thread,
359 &ctrlr->terminate_task, hz * 60 * 2);
360 }
361
362 static void
nvmft_controller_terminate(void * arg,int pending)363 nvmft_controller_terminate(void *arg, int pending)
364 {
365 struct nvmft_controller *ctrlr = arg;
366 struct nvmft_port *np;
367 bool wakeup_np;
368
369 /* If the controller has been re-enabled, nothing to do. */
370 mtx_lock(&ctrlr->lock);
371 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) {
372 mtx_unlock(&ctrlr->lock);
373
374 if (ctrlr->ka_sbt != 0)
375 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
376 C_HARDCLOCK);
377 return;
378 }
379
380 /* Disable updates to CC while destroying admin qpair. */
381 ctrlr->shutdown = true;
382 mtx_unlock(&ctrlr->lock);
383
384 nvmft_qpair_destroy(ctrlr->admin);
385
386 /* Remove association (CNTLID). */
387 np = ctrlr->np;
388 sx_xlock(&np->lock);
389 TAILQ_REMOVE(&np->controllers, ctrlr, link);
390 free_unr(np->ids, ctrlr->cntlid);
391 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers));
392 sx_xunlock(&np->lock);
393 if (wakeup_np)
394 wakeup(np);
395
396 callout_drain(&ctrlr->ka_timer);
397
398 nvmft_printf(ctrlr, "association terminated\n");
399 nvmft_controller_free(ctrlr);
400 nvmft_port_rele(np);
401 }
402
403 void
nvmft_controller_error(struct nvmft_controller * ctrlr,struct nvmft_qpair * qp,int error)404 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp,
405 int error)
406 {
407 /*
408 * If a queue pair is closed, that isn't an error per se.
409 * That just means additional commands cannot be received on
410 * that queue pair.
411 *
412 * If the admin queue pair is closed while idle or while
413 * shutting down, terminate the association immediately.
414 *
415 * If an I/O queue pair is closed, just ignore it.
416 */
417 if (error == 0) {
418 if (qp != ctrlr->admin)
419 return;
420
421 mtx_lock(&ctrlr->lock);
422 if (ctrlr->shutdown) {
423 ctrlr->admin_closed = true;
424 mtx_unlock(&ctrlr->lock);
425 return;
426 }
427
428 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) {
429 MPASS(ctrlr->num_io_queues == 0);
430 mtx_unlock(&ctrlr->lock);
431
432 /*
433 * Ok to drop lock here since ctrlr->cc can't
434 * change if the admin queue pair has closed.
435 * This also means no new queues can be handed
436 * off, etc. Note that since there are no I/O
437 * queues, only the admin queue needs to be
438 * destroyed, so it is safe to skip
439 * nvmft_controller_shutdown and just schedule
440 * nvmft_controller_terminate. Note that we
441 * cannot call nvmft_controller_terminate from
442 * here directly as this is called from the
443 * transport layer and freeing the admin qpair
444 * might deadlock waiting for the current
445 * thread to exit.
446 */
447 if (taskqueue_cancel_timeout(taskqueue_thread,
448 &ctrlr->terminate_task, NULL) == 0)
449 taskqueue_enqueue_timeout(taskqueue_thread,
450 &ctrlr->terminate_task, 0);
451 return;
452 }
453
454 /*
455 * Treat closing of the admin queue pair while enabled
456 * as a transport error. Note that the admin queue
457 * pair has been closed.
458 */
459 ctrlr->admin_closed = true;
460 } else
461 mtx_lock(&ctrlr->lock);
462
463 /* Ignore transport errors if we are already shutting down. */
464 if (ctrlr->shutdown) {
465 mtx_unlock(&ctrlr->lock);
466 return;
467 }
468
469 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1);
470 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
471 ctrlr->shutdown = true;
472 mtx_unlock(&ctrlr->lock);
473
474 callout_stop(&ctrlr->ka_timer);
475 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
476 }
477
478 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */
479 static struct mbuf *
m_getml(size_t len,int how)480 m_getml(size_t len, int how)
481 {
482 struct mbuf *m, *n;
483
484 m = m_getm2(NULL, len, how, MT_DATA, 0);
485 if (m == NULL)
486 return (NULL);
487 for (n = m; len > 0; n = n->m_next) {
488 n->m_len = M_SIZE(n);
489 if (n->m_len >= len) {
490 n->m_len = len;
491 MPASS(n->m_next == NULL);
492 }
493 len -= n->m_len;
494 }
495 return (m);
496 }
497
498 static void
m_zero(struct mbuf * m,u_int offset,u_int len)499 m_zero(struct mbuf *m, u_int offset, u_int len)
500 {
501 u_int todo;
502
503 if (len == 0)
504 return;
505
506 while (m->m_len <= offset) {
507 offset -= m->m_len;
508 m = m->m_next;
509 }
510
511 todo = m->m_len - offset;
512 if (todo > len)
513 todo = len;
514 memset(mtodo(m, offset), 0, todo);
515 m = m->m_next;
516 len -= todo;
517
518 while (len > 0) {
519 todo = m->m_len;
520 if (todo > len)
521 todo = len;
522 memset(mtod(m, void *), 0, todo);
523 m = m->m_next;
524 len -= todo;
525 }
526 }
527
528 static void
handle_get_log_page(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)529 handle_get_log_page(struct nvmft_controller *ctrlr,
530 struct nvmf_capsule *nc, const struct nvme_command *cmd)
531 {
532 struct mbuf *m;
533 uint64_t offset;
534 uint32_t numd;
535 size_t len, todo;
536 u_int status;
537 uint8_t lid;
538 bool rae;
539
540 lid = le32toh(cmd->cdw10) & 0xff;
541 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0;
542 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
543 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
544
545 if (offset % 3 != 0) {
546 status = NVME_SC_INVALID_FIELD;
547 goto done;
548 }
549
550 len = (numd + 1) * 4;
551
552 switch (lid) {
553 case NVME_LOG_ERROR:
554 todo = 0;
555
556 m = m_getml(len, M_WAITOK);
557 if (todo != len)
558 m_zero(m, todo, len - todo);
559 status = nvmf_send_controller_data(nc, 0, m, len);
560 MPASS(status != NVMF_MORE);
561 break;
562 case NVME_LOG_HEALTH_INFORMATION:
563 {
564 struct nvme_health_information_page hip;
565
566 if (offset >= sizeof(hip)) {
567 status = NVME_SC_INVALID_FIELD;
568 goto done;
569 }
570 todo = sizeof(hip) - offset;
571 if (todo > len)
572 todo = len;
573
574 mtx_lock(&ctrlr->lock);
575 hip = ctrlr->hip;
576 hip.controller_busy_time[0] =
577 sbintime_getsec(ctrlr->busy_total) / 60;
578 hip.power_on_hours[0] =
579 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600;
580 mtx_unlock(&ctrlr->lock);
581
582 m = m_getml(len, M_WAITOK);
583 m_copyback(m, 0, todo, (char *)&hip + offset);
584 if (todo != len)
585 m_zero(m, todo, len - todo);
586 status = nvmf_send_controller_data(nc, 0, m, len);
587 MPASS(status != NVMF_MORE);
588 break;
589 }
590 case NVME_LOG_FIRMWARE_SLOT:
591 if (offset >= sizeof(ctrlr->np->fp)) {
592 status = NVME_SC_INVALID_FIELD;
593 goto done;
594 }
595 todo = sizeof(ctrlr->np->fp) - offset;
596 if (todo > len)
597 todo = len;
598
599 m = m_getml(len, M_WAITOK);
600 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset);
601 if (todo != len)
602 m_zero(m, todo, len - todo);
603 status = nvmf_send_controller_data(nc, 0, m, len);
604 MPASS(status != NVMF_MORE);
605 break;
606 case NVME_LOG_CHANGED_NAMESPACE:
607 if (offset >= sizeof(*ctrlr->changed_ns)) {
608 status = NVME_SC_INVALID_FIELD;
609 goto done;
610 }
611 todo = sizeof(*ctrlr->changed_ns) - offset;
612 if (todo > len)
613 todo = len;
614
615 m = m_getml(len, M_WAITOK);
616 mtx_lock(&ctrlr->lock);
617 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset);
618 if (offset == 0 && len == sizeof(*ctrlr->changed_ns))
619 memset(ctrlr->changed_ns, 0,
620 sizeof(*ctrlr->changed_ns));
621 if (!rae)
622 ctrlr->changed_ns_reported = false;
623 mtx_unlock(&ctrlr->lock);
624 if (todo != len)
625 m_zero(m, todo, len - todo);
626 status = nvmf_send_controller_data(nc, 0, m, len);
627 MPASS(status != NVMF_MORE);
628 break;
629 default:
630 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n",
631 lid);
632 status = NVME_SC_INVALID_FIELD;
633 break;
634 }
635
636 done:
637 if (status == NVMF_SUCCESS_SENT)
638 nvmft_command_completed(ctrlr->admin, nc);
639 else
640 nvmft_send_generic_error(ctrlr->admin, nc, status);
641 nvmf_free_capsule(nc);
642 }
643
644 static void
m_free_nslist(struct mbuf * m)645 m_free_nslist(struct mbuf *m)
646 {
647 free(m->m_ext.ext_arg1, M_NVMFT);
648 }
649
650 static void
handle_identify_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)651 handle_identify_command(struct nvmft_controller *ctrlr,
652 struct nvmf_capsule *nc, const struct nvme_command *cmd)
653 {
654 struct mbuf *m;
655 size_t data_len;
656 u_int status;
657 uint8_t cns;
658
659 cns = le32toh(cmd->cdw10) & 0xFF;
660 data_len = nvmf_capsule_data_len(nc);
661 if (data_len != sizeof(ctrlr->cdata)) {
662 nvmft_printf(ctrlr,
663 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len,
664 cns);
665 nvmft_send_generic_error(ctrlr->admin, nc,
666 NVME_SC_INVALID_OPCODE);
667 nvmf_free_capsule(nc);
668 return;
669 }
670
671 switch (cns) {
672 case 0: /* Namespace data. */
673 case 3: /* Namespace Identification Descriptor list. */
674 nvmft_dispatch_command(ctrlr->admin, nc, true);
675 return;
676 case 1:
677 /* Controller data. */
678 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK);
679 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata);
680 status = nvmf_send_controller_data(nc, 0, m,
681 sizeof(ctrlr->cdata));
682 MPASS(status != NVMF_MORE);
683 break;
684 case 2:
685 {
686 /* Active namespace list. */
687 struct nvme_ns_list *nslist;
688 uint32_t nsid;
689
690 nsid = le32toh(cmd->nsid);
691 if (nsid >= 0xfffffffe) {
692 status = NVME_SC_INVALID_FIELD;
693 break;
694 }
695
696 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO);
697 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist);
698 m = m_get(M_WAITOK, MT_DATA);
699 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist,
700 nslist, NULL, 0, EXT_CTL);
701 m->m_len = sizeof(*nslist);
702 status = nvmf_send_controller_data(nc, 0, m, m->m_len);
703 MPASS(status != NVMF_MORE);
704 break;
705 }
706 default:
707 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns);
708 status = NVME_SC_INVALID_FIELD;
709 break;
710 }
711
712 if (status == NVMF_SUCCESS_SENT)
713 nvmft_command_completed(ctrlr->admin, nc);
714 else
715 nvmft_send_generic_error(ctrlr->admin, nc, status);
716 nvmf_free_capsule(nc);
717 }
718
719 static void
handle_set_features(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)720 handle_set_features(struct nvmft_controller *ctrlr,
721 struct nvmf_capsule *nc, const struct nvme_command *cmd)
722 {
723 struct nvme_completion cqe;
724 uint8_t fid;
725
726 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
727 switch (fid) {
728 case NVME_FEAT_NUMBER_OF_QUEUES:
729 {
730 uint32_t num_queues;
731 struct nvmft_io_qpair *io_qpairs;
732
733 num_queues = le32toh(cmd->cdw11) & 0xffff;
734
735 /* 5.12.1.7: 65535 is invalid. */
736 if (num_queues == 65535)
737 goto error;
738
739 /* Fabrics requires the same number of SQs and CQs. */
740 if (le32toh(cmd->cdw11) >> 16 != num_queues)
741 goto error;
742
743 /* Convert to 1's based */
744 num_queues++;
745
746 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs),
747 M_NVMFT, M_WAITOK | M_ZERO);
748
749 mtx_lock(&ctrlr->lock);
750 if (ctrlr->num_io_queues != 0) {
751 mtx_unlock(&ctrlr->lock);
752 free(io_qpairs, M_NVMFT);
753 nvmft_send_generic_error(ctrlr->admin, nc,
754 NVME_SC_COMMAND_SEQUENCE_ERROR);
755 nvmf_free_capsule(nc);
756 return;
757 }
758
759 ctrlr->num_io_queues = num_queues;
760 ctrlr->io_qpairs = io_qpairs;
761 mtx_unlock(&ctrlr->lock);
762
763 nvmft_init_cqe(&cqe, nc, 0);
764 cqe.cdw0 = cmd->cdw11;
765 nvmft_send_response(ctrlr->admin, &cqe);
766 nvmf_free_capsule(nc);
767 return;
768 }
769 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
770 {
771 uint32_t aer_mask;
772
773 aer_mask = le32toh(cmd->cdw11);
774
775 /* Check for any reserved or unimplemented feature bits. */
776 if ((aer_mask & 0xffffc000) != 0)
777 goto error;
778
779 mtx_lock(&ctrlr->lock);
780 ctrlr->aer_mask = aer_mask;
781 mtx_unlock(&ctrlr->lock);
782 nvmft_send_success(ctrlr->admin, nc);
783 return;
784 }
785 default:
786 nvmft_printf(ctrlr,
787 "Unsupported feature ID %u for SET_FEATURES\n", fid);
788 goto error;
789 }
790
791 error:
792 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
793 nvmf_free_capsule(nc);
794 }
795
796 static bool
update_cc(struct nvmft_controller * ctrlr,uint32_t new_cc,bool * need_shutdown)797 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown)
798 {
799 struct nvmft_port *np = ctrlr->np;
800 uint32_t changes;
801
802 *need_shutdown = false;
803
804 mtx_lock(&ctrlr->lock);
805
806 /* Don't allow any changes while shutting down. */
807 if (ctrlr->shutdown) {
808 mtx_unlock(&ctrlr->lock);
809 return (false);
810 }
811
812 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) {
813 mtx_unlock(&ctrlr->lock);
814 return (false);
815 }
816
817 changes = ctrlr->cc ^ new_cc;
818 ctrlr->cc = new_cc;
819
820 /* Handle shutdown requests. */
821 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 &&
822 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) {
823 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
824 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING);
825 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
826 ctrlr->shutdown = true;
827 *need_shutdown = true;
828 nvmft_printf(ctrlr, "shutdown requested\n");
829 }
830
831 if (NVMEV(NVME_CC_REG_EN, changes) != 0) {
832 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) {
833 /* Controller reset. */
834 nvmft_printf(ctrlr, "reset requested\n");
835 ctrlr->shutdown = true;
836 *need_shutdown = true;
837 } else
838 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1);
839 }
840 mtx_unlock(&ctrlr->lock);
841
842 return (true);
843 }
844
845 static void
handle_property_get(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_get_cmd * pget)846 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
847 const struct nvmf_fabric_prop_get_cmd *pget)
848 {
849 struct nvmf_fabric_prop_get_rsp rsp;
850
851 nvmft_init_cqe(&rsp, nc, 0);
852
853 switch (le32toh(pget->ofst)) {
854 case NVMF_PROP_CAP:
855 if (pget->attrib.size != NVMF_PROP_SIZE_8)
856 goto error;
857 rsp.value.u64 = htole64(ctrlr->np->cap);
858 break;
859 case NVMF_PROP_VS:
860 if (pget->attrib.size != NVMF_PROP_SIZE_4)
861 goto error;
862 rsp.value.u32.low = ctrlr->cdata.ver;
863 break;
864 case NVMF_PROP_CC:
865 if (pget->attrib.size != NVMF_PROP_SIZE_4)
866 goto error;
867 rsp.value.u32.low = htole32(ctrlr->cc);
868 break;
869 case NVMF_PROP_CSTS:
870 if (pget->attrib.size != NVMF_PROP_SIZE_4)
871 goto error;
872 rsp.value.u32.low = htole32(ctrlr->csts);
873 break;
874 default:
875 goto error;
876 }
877
878 nvmft_send_response(ctrlr->admin, &rsp);
879 return;
880 error:
881 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
882 }
883
884 static void
handle_property_set(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_set_cmd * pset)885 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
886 const struct nvmf_fabric_prop_set_cmd *pset)
887 {
888 bool need_shutdown;
889
890 need_shutdown = false;
891 switch (le32toh(pset->ofst)) {
892 case NVMF_PROP_CC:
893 if (pset->attrib.size != NVMF_PROP_SIZE_4)
894 goto error;
895 if (!update_cc(ctrlr, le32toh(pset->value.u32.low),
896 &need_shutdown))
897 goto error;
898 break;
899 default:
900 goto error;
901 }
902
903 nvmft_send_success(ctrlr->admin, nc);
904 if (need_shutdown) {
905 callout_stop(&ctrlr->ka_timer);
906 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
907 }
908 return;
909 error:
910 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
911 }
912
913 static void
handle_admin_fabrics_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_cmd * fc)914 handle_admin_fabrics_command(struct nvmft_controller *ctrlr,
915 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc)
916 {
917 switch (fc->fctype) {
918 case NVMF_FABRIC_COMMAND_PROPERTY_GET:
919 handle_property_get(ctrlr, nc,
920 (const struct nvmf_fabric_prop_get_cmd *)fc);
921 break;
922 case NVMF_FABRIC_COMMAND_PROPERTY_SET:
923 handle_property_set(ctrlr, nc,
924 (const struct nvmf_fabric_prop_set_cmd *)fc);
925 break;
926 case NVMF_FABRIC_COMMAND_CONNECT:
927 nvmft_printf(ctrlr,
928 "CONNECT command on connected admin queue\n");
929 nvmft_send_generic_error(ctrlr->admin, nc,
930 NVME_SC_COMMAND_SEQUENCE_ERROR);
931 break;
932 case NVMF_FABRIC_COMMAND_DISCONNECT:
933 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n");
934 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC,
935 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE);
936 break;
937 default:
938 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n",
939 fc->fctype);
940 nvmft_send_generic_error(ctrlr->admin, nc,
941 NVME_SC_INVALID_OPCODE);
942 break;
943 }
944 nvmf_free_capsule(nc);
945 }
946
947 void
nvmft_handle_admin_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc)948 nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
949 struct nvmf_capsule *nc)
950 {
951 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
952
953 /* Only permit Fabrics commands while a controller is disabled. */
954 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 &&
955 cmd->opc != NVME_OPC_FABRICS_COMMANDS) {
956 nvmft_printf(ctrlr,
957 "Unsupported admin opcode %#x while disabled\n", cmd->opc);
958 nvmft_send_generic_error(ctrlr->admin, nc,
959 NVME_SC_COMMAND_SEQUENCE_ERROR);
960 nvmf_free_capsule(nc);
961 return;
962 }
963
964 atomic_store_int(&ctrlr->ka_active_traffic, 1);
965
966 switch (cmd->opc) {
967 case NVME_OPC_GET_LOG_PAGE:
968 handle_get_log_page(ctrlr, nc, cmd);
969 break;
970 case NVME_OPC_IDENTIFY:
971 handle_identify_command(ctrlr, nc, cmd);
972 break;
973 case NVME_OPC_SET_FEATURES:
974 handle_set_features(ctrlr, nc, cmd);
975 break;
976 case NVME_OPC_ASYNC_EVENT_REQUEST:
977 mtx_lock(&ctrlr->lock);
978 if (ctrlr->aer_pending == NVMFT_NUM_AER) {
979 mtx_unlock(&ctrlr->lock);
980 nvmft_send_error(ctrlr->admin, nc,
981 NVME_SCT_COMMAND_SPECIFIC,
982 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
983 } else {
984 /* NB: Store the CID without byte-swapping. */
985 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid;
986 ctrlr->aer_pending++;
987 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER;
988 mtx_unlock(&ctrlr->lock);
989 }
990 nvmf_free_capsule(nc);
991 break;
992 case NVME_OPC_KEEP_ALIVE:
993 nvmft_send_success(ctrlr->admin, nc);
994 nvmf_free_capsule(nc);
995 break;
996 case NVME_OPC_FABRICS_COMMANDS:
997 handle_admin_fabrics_command(ctrlr, nc,
998 (const struct nvmf_fabric_cmd *)cmd);
999 break;
1000 default:
1001 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc);
1002 nvmft_send_generic_error(ctrlr->admin, nc,
1003 NVME_SC_INVALID_OPCODE);
1004 nvmf_free_capsule(nc);
1005 break;
1006 }
1007 }
1008
1009 void
nvmft_handle_io_command(struct nvmft_qpair * qp,uint16_t qid,struct nvmf_capsule * nc)1010 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid,
1011 struct nvmf_capsule *nc)
1012 {
1013 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
1014 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
1015
1016 atomic_store_int(&ctrlr->ka_active_traffic, 1);
1017
1018 switch (cmd->opc) {
1019 case NVME_OPC_FLUSH:
1020 if (cmd->nsid == htole32(0xffffffff)) {
1021 nvmft_send_generic_error(qp, nc,
1022 NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
1023 nvmf_free_capsule(nc);
1024 break;
1025 }
1026 /* FALLTHROUGH */
1027 case NVME_OPC_WRITE:
1028 case NVME_OPC_READ:
1029 case NVME_OPC_WRITE_UNCORRECTABLE:
1030 case NVME_OPC_COMPARE:
1031 case NVME_OPC_WRITE_ZEROES:
1032 case NVME_OPC_DATASET_MANAGEMENT:
1033 case NVME_OPC_VERIFY:
1034 nvmft_dispatch_command(qp, nc, false);
1035 break;
1036 default:
1037 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc);
1038 nvmft_send_generic_error(qp, nc,
1039 NVME_SC_INVALID_OPCODE);
1040 nvmf_free_capsule(nc);
1041 break;
1042 }
1043 }
1044
1045 static void
nvmft_report_aer(struct nvmft_controller * ctrlr,uint32_t aer_mask,u_int type,uint8_t info,uint8_t log_page_id)1046 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask,
1047 u_int type, uint8_t info, uint8_t log_page_id)
1048 {
1049 struct nvme_completion cpl;
1050
1051 MPASS(type <= 7);
1052
1053 /* Drop events that are not enabled. */
1054 mtx_lock(&ctrlr->lock);
1055 if ((ctrlr->aer_mask & aer_mask) == 0) {
1056 mtx_unlock(&ctrlr->lock);
1057 return;
1058 }
1059
1060 /*
1061 * If there is no pending AER command, drop it.
1062 * XXX: Should we queue these?
1063 */
1064 if (ctrlr->aer_pending == 0) {
1065 mtx_unlock(&ctrlr->lock);
1066 nvmft_printf(ctrlr,
1067 "dropping AER type %u, info %#x, page %#x\n",
1068 type, info, log_page_id);
1069 return;
1070 }
1071
1072 memset(&cpl, 0, sizeof(cpl));
1073 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx];
1074 ctrlr->aer_pending--;
1075 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER;
1076 mtx_unlock(&ctrlr->lock);
1077
1078 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) |
1079 NVMEF(NVME_ASYNC_EVENT_INFO, info) |
1080 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id));
1081
1082 nvmft_send_response(ctrlr->admin, &cpl);
1083 }
1084
1085 void
nvmft_controller_lun_changed(struct nvmft_controller * ctrlr,int lun_id)1086 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id)
1087 {
1088 struct nvme_ns_list *nslist;
1089 uint32_t new_nsid, nsid;
1090 u_int i;
1091
1092 new_nsid = lun_id + 1;
1093
1094 mtx_lock(&ctrlr->lock);
1095 nslist = ctrlr->changed_ns;
1096
1097 /* If the first entry is 0xffffffff, the list is already full. */
1098 if (nslist->ns[0] != 0xffffffff) {
1099 /* Find the insertion point for this namespace ID. */
1100 for (i = 0; i < nitems(nslist->ns); i++) {
1101 nsid = le32toh(nslist->ns[i]);
1102 if (nsid == new_nsid) {
1103 /* Already reported, nothing to do. */
1104 mtx_unlock(&ctrlr->lock);
1105 return;
1106 }
1107
1108 if (nsid == 0 || nsid > new_nsid)
1109 break;
1110 }
1111
1112 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) {
1113 /* List is full. */
1114 memset(ctrlr->changed_ns, 0,
1115 sizeof(*ctrlr->changed_ns));
1116 ctrlr->changed_ns->ns[0] = 0xffffffff;
1117 } else if (nslist->ns[i] == htole32(0)) {
1118 /*
1119 * Optimize case where this ID is appended to
1120 * the end.
1121 */
1122 nslist->ns[i] = htole32(new_nsid);
1123 } else {
1124 memmove(&nslist->ns[i + 1], &nslist->ns[i],
1125 (nitems(nslist->ns) - i - 1) *
1126 sizeof(nslist->ns[0]));
1127 nslist->ns[i] = htole32(new_nsid);
1128 }
1129 }
1130
1131 if (ctrlr->changed_ns_reported) {
1132 mtx_unlock(&ctrlr->lock);
1133 return;
1134 }
1135 ctrlr->changed_ns_reported = true;
1136 mtx_unlock(&ctrlr->lock);
1137
1138 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0,
1139 NVME_LOG_CHANGED_NAMESPACE);
1140 }
1141