1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/param.h>
9 #include <sys/callout.h>
10 #include <sys/kernel.h>
11 #include <sys/lock.h>
12 #include <sys/malloc.h>
13 #include <sys/mbuf.h>
14 #include <sys/memdesc.h>
15 #include <sys/mutex.h>
16 #include <sys/sbuf.h>
17 #include <sys/taskqueue.h>
18
19 #include <dev/nvmf/nvmf_transport.h>
20 #include <dev/nvmf/controller/nvmft_subr.h>
21 #include <dev/nvmf/controller/nvmft_var.h>
22
23 static void nvmft_controller_shutdown(void *arg, int pending);
24 static void nvmft_controller_terminate(void *arg, int pending);
25
26 int
nvmft_printf(struct nvmft_controller * ctrlr,const char * fmt,...)27 nvmft_printf(struct nvmft_controller *ctrlr, const char *fmt, ...)
28 {
29 char buf[128];
30 struct sbuf sb;
31 va_list ap;
32 size_t retval;
33
34 sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN | SBUF_INCLUDENUL);
35 sbuf_set_drain(&sb, sbuf_printf_drain, &retval);
36
37 sbuf_printf(&sb, "nvmft%u: ", ctrlr->cntlid);
38
39 va_start(ap, fmt);
40 sbuf_vprintf(&sb, fmt, ap);
41 va_end(ap);
42
43 sbuf_finish(&sb);
44 sbuf_delete(&sb);
45
46 return (retval);
47 }
48
49 static struct nvmft_controller *
nvmft_controller_alloc(struct nvmft_port * np,uint16_t cntlid,const struct nvmf_fabric_connect_data * data)50 nvmft_controller_alloc(struct nvmft_port *np, uint16_t cntlid,
51 const struct nvmf_fabric_connect_data *data)
52 {
53 struct nvmft_controller *ctrlr;
54
55 ctrlr = malloc(sizeof(*ctrlr), M_NVMFT, M_WAITOK | M_ZERO);
56 ctrlr->cntlid = cntlid;
57 ctrlr->np = np;
58 mtx_init(&ctrlr->lock, "nvmft controller", NULL, MTX_DEF);
59 callout_init(&ctrlr->ka_timer, 1);
60 TASK_INIT(&ctrlr->shutdown_task, 0, nvmft_controller_shutdown, ctrlr);
61 TIMEOUT_TASK_INIT(taskqueue_thread, &ctrlr->terminate_task, 0,
62 nvmft_controller_terminate, ctrlr);
63
64 ctrlr->cdata = np->cdata;
65 ctrlr->cdata.ctrlr_id = htole16(cntlid);
66 memcpy(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid));
67 memcpy(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn));
68 ctrlr->hip.power_cycles[0] = 1;
69 ctrlr->create_time = sbinuptime();
70
71 ctrlr->changed_ns = malloc(sizeof(*ctrlr->changed_ns), M_NVMFT,
72 M_WAITOK | M_ZERO);
73
74 return (ctrlr);
75 }
76
77 static void
nvmft_controller_free(struct nvmft_controller * ctrlr)78 nvmft_controller_free(struct nvmft_controller *ctrlr)
79 {
80 mtx_destroy(&ctrlr->lock);
81 MPASS(ctrlr->io_qpairs == NULL);
82 free(ctrlr->changed_ns, M_NVMFT);
83 free(ctrlr, M_NVMFT);
84 }
85
86 static void
nvmft_keep_alive_timer(void * arg)87 nvmft_keep_alive_timer(void *arg)
88 {
89 struct nvmft_controller *ctrlr = arg;
90 int traffic;
91
92 if (ctrlr->shutdown)
93 return;
94
95 traffic = atomic_readandclear_int(&ctrlr->ka_active_traffic);
96 if (traffic == 0) {
97 nvmft_printf(ctrlr,
98 "disconnecting due to KeepAlive timeout\n");
99 nvmft_controller_error(ctrlr, NULL, ETIMEDOUT);
100 return;
101 }
102
103 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0, C_HARDCLOCK);
104 }
105
106 static void
nvmft_update_cdata(struct nvmft_controller * ctrlr)107 nvmft_update_cdata(struct nvmft_controller *ctrlr)
108 {
109 uint32_t ioccsz, val;
110
111 val = nvmft_max_ioccsz(ctrlr->admin);
112 if (val != 0) {
113 ioccsz = le32toh(ctrlr->cdata.ioccsz) * 16;
114 if (val < ioccsz)
115 ctrlr->cdata.ioccsz = htole32(val / 16);
116 }
117 }
118
119 int
nvmft_handoff_admin_queue(struct nvmft_port * np,enum nvmf_trtype trtype,const nvlist_t * params,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)120 nvmft_handoff_admin_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
121 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
122 const struct nvmf_fabric_connect_data *data)
123 {
124 struct nvmft_controller *ctrlr;
125 struct nvmft_qpair *qp;
126 uint32_t kato;
127 int cntlid;
128
129 if (cmd->qid != htole16(0))
130 return (EINVAL);
131
132 qp = nvmft_qpair_init(trtype, params, 0, "admin queue");
133 if (qp == NULL) {
134 printf("NVMFT: Failed to setup admin queue from %.*s\n",
135 (int)sizeof(data->hostnqn), data->hostnqn);
136 return (ENXIO);
137 }
138
139 mtx_lock(&np->lock);
140 cntlid = alloc_unr(np->ids);
141 if (cntlid == -1) {
142 mtx_unlock(&np->lock);
143 printf("NVMFT: Unable to allocate controller for %.*s\n",
144 (int)sizeof(data->hostnqn), data->hostnqn);
145 nvmft_connect_error(qp, cmd, NVME_SCT_COMMAND_SPECIFIC,
146 NVMF_FABRIC_SC_INVALID_HOST);
147 nvmft_qpair_destroy(qp);
148 return (ENOMEM);
149 }
150
151 #ifdef INVARIANTS
152 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
153 KASSERT(ctrlr->cntlid != cntlid,
154 ("%s: duplicate controllers with id %d", __func__, cntlid));
155 }
156 #endif
157 mtx_unlock(&np->lock);
158
159 ctrlr = nvmft_controller_alloc(np, cntlid, data);
160
161 mtx_lock(&np->lock);
162 if (!np->online) {
163 mtx_unlock(&np->lock);
164 nvmft_controller_free(ctrlr);
165 free_unr(np->ids, cntlid);
166 nvmft_qpair_destroy(qp);
167 return (ENXIO);
168 }
169 nvmft_port_ref(np);
170 TAILQ_INSERT_TAIL(&np->controllers, ctrlr, link);
171
172 nvmft_printf(ctrlr, "associated with %.*s\n",
173 (int)sizeof(data->hostnqn), data->hostnqn);
174 ctrlr->admin = qp;
175 ctrlr->trtype = trtype;
176 nvmft_update_cdata(ctrlr);
177
178 /*
179 * The spec requires a non-zero KeepAlive timer, but allow a
180 * zero KATO value to match Linux.
181 */
182 kato = le32toh(cmd->kato);
183 if (kato != 0) {
184 /*
185 * Round up to 1 second matching granularity
186 * advertised in cdata.
187 */
188 ctrlr->ka_sbt = mstosbt(roundup(kato, 1000));
189 callout_reset_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
190 nvmft_keep_alive_timer, ctrlr, C_HARDCLOCK);
191 }
192 mtx_unlock(&np->lock);
193
194 nvmft_finish_accept(qp, cmd, ctrlr);
195
196 return (0);
197 }
198
199 int
nvmft_handoff_io_queue(struct nvmft_port * np,enum nvmf_trtype trtype,const nvlist_t * params,const struct nvmf_fabric_connect_cmd * cmd,const struct nvmf_fabric_connect_data * data)200 nvmft_handoff_io_queue(struct nvmft_port *np, enum nvmf_trtype trtype,
201 const nvlist_t *params, const struct nvmf_fabric_connect_cmd *cmd,
202 const struct nvmf_fabric_connect_data *data)
203 {
204 struct nvmft_controller *ctrlr;
205 struct nvmft_qpair *qp;
206 char name[16];
207 uint16_t cntlid, qid;
208
209 qid = le16toh(cmd->qid);
210 if (qid == 0)
211 return (EINVAL);
212 cntlid = le16toh(data->cntlid);
213
214 snprintf(name, sizeof(name), "I/O queue %u", qid);
215 qp = nvmft_qpair_init(trtype, params, qid, name);
216 if (qp == NULL) {
217 printf("NVMFT: Failed to setup I/O queue %u from %.*s\n", qid,
218 (int)sizeof(data->hostnqn), data->hostnqn);
219 return (ENXIO);
220 }
221
222 mtx_lock(&np->lock);
223 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
224 if (ctrlr->cntlid == cntlid)
225 break;
226 }
227 if (ctrlr == NULL) {
228 mtx_unlock(&np->lock);
229 printf("NVMFT: Nonexistent controller %u for I/O queue %u from %.*s\n",
230 ctrlr->cntlid, qid, (int)sizeof(data->hostnqn),
231 data->hostnqn);
232 nvmft_connect_invalid_parameters(qp, cmd, true,
233 offsetof(struct nvmf_fabric_connect_data, cntlid));
234 nvmft_qpair_destroy(qp);
235 return (ENOENT);
236 }
237
238 if (memcmp(ctrlr->hostid, data->hostid, sizeof(ctrlr->hostid)) != 0) {
239 mtx_unlock(&np->lock);
240 nvmft_printf(ctrlr,
241 "hostid mismatch for I/O queue %u from %.*s\n", qid,
242 (int)sizeof(data->hostnqn), data->hostnqn);
243 nvmft_connect_invalid_parameters(qp, cmd, true,
244 offsetof(struct nvmf_fabric_connect_data, hostid));
245 nvmft_qpair_destroy(qp);
246 return (EINVAL);
247 }
248 if (memcmp(ctrlr->hostnqn, data->hostnqn, sizeof(ctrlr->hostnqn)) != 0) {
249 mtx_unlock(&np->lock);
250 nvmft_printf(ctrlr,
251 "hostnqn mismatch for I/O queue %u from %.*s\n", qid,
252 (int)sizeof(data->hostnqn), data->hostnqn);
253 nvmft_connect_invalid_parameters(qp, cmd, true,
254 offsetof(struct nvmf_fabric_connect_data, hostnqn));
255 nvmft_qpair_destroy(qp);
256 return (EINVAL);
257 }
258
259 /* XXX: Require trtype == ctrlr->trtype? */
260
261 mtx_lock(&ctrlr->lock);
262 if (ctrlr->shutdown) {
263 mtx_unlock(&ctrlr->lock);
264 mtx_unlock(&np->lock);
265 nvmft_printf(ctrlr,
266 "attempt to create I/O queue %u on disabled controller from %.*s\n",
267 qid, (int)sizeof(data->hostnqn), data->hostnqn);
268 nvmft_connect_invalid_parameters(qp, cmd, true,
269 offsetof(struct nvmf_fabric_connect_data, cntlid));
270 nvmft_qpair_destroy(qp);
271 return (EINVAL);
272 }
273 if (ctrlr->num_io_queues == 0) {
274 mtx_unlock(&ctrlr->lock);
275 mtx_unlock(&np->lock);
276 nvmft_printf(ctrlr,
277 "attempt to create I/O queue %u without enabled queues from %.*s\n",
278 qid, (int)sizeof(data->hostnqn), data->hostnqn);
279 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
280 NVME_SC_COMMAND_SEQUENCE_ERROR);
281 nvmft_qpair_destroy(qp);
282 return (EINVAL);
283 }
284 if (cmd->qid > ctrlr->num_io_queues) {
285 mtx_unlock(&ctrlr->lock);
286 mtx_unlock(&np->lock);
287 nvmft_printf(ctrlr,
288 "attempt to create invalid I/O queue %u from %.*s\n", qid,
289 (int)sizeof(data->hostnqn), data->hostnqn);
290 nvmft_connect_invalid_parameters(qp, cmd, false,
291 offsetof(struct nvmf_fabric_connect_cmd, qid));
292 nvmft_qpair_destroy(qp);
293 return (EINVAL);
294 }
295 if (ctrlr->io_qpairs[qid - 1].qp != NULL) {
296 mtx_unlock(&ctrlr->lock);
297 mtx_unlock(&np->lock);
298 nvmft_printf(ctrlr,
299 "attempt to re-create I/O queue %u from %.*s\n", qid,
300 (int)sizeof(data->hostnqn), data->hostnqn);
301 nvmft_connect_error(qp, cmd, NVME_SCT_GENERIC,
302 NVME_SC_COMMAND_SEQUENCE_ERROR);
303 nvmft_qpair_destroy(qp);
304 return (EINVAL);
305 }
306
307 ctrlr->io_qpairs[qid - 1].qp = qp;
308 mtx_unlock(&ctrlr->lock);
309 mtx_unlock(&np->lock);
310 nvmft_finish_accept(qp, cmd, ctrlr);
311
312 return (0);
313 }
314
315 static void
nvmft_controller_shutdown(void * arg,int pending)316 nvmft_controller_shutdown(void *arg, int pending)
317 {
318 struct nvmft_controller *ctrlr = arg;
319
320 MPASS(pending == 1);
321
322 /*
323 * Shutdown all I/O queues to terminate pending datamoves and
324 * stop receiving new commands.
325 */
326 mtx_lock(&ctrlr->lock);
327 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
328 if (ctrlr->io_qpairs[i].qp != NULL) {
329 ctrlr->io_qpairs[i].shutdown = true;
330 mtx_unlock(&ctrlr->lock);
331 nvmft_qpair_shutdown(ctrlr->io_qpairs[i].qp);
332 mtx_lock(&ctrlr->lock);
333 }
334 }
335 mtx_unlock(&ctrlr->lock);
336
337 /* Terminate active CTL commands. */
338 nvmft_terminate_commands(ctrlr);
339
340 /* Wait for all pending CTL commands to complete. */
341 mtx_lock(&ctrlr->lock);
342 while (ctrlr->pending_commands != 0)
343 mtx_sleep(&ctrlr->pending_commands, &ctrlr->lock, 0, "nvmftsh",
344 hz / 100);
345 mtx_unlock(&ctrlr->lock);
346
347 /* Delete all of the I/O queues. */
348 for (u_int i = 0; i < ctrlr->num_io_queues; i++) {
349 if (ctrlr->io_qpairs[i].qp != NULL)
350 nvmft_qpair_destroy(ctrlr->io_qpairs[i].qp);
351 }
352 free(ctrlr->io_qpairs, M_NVMFT);
353 ctrlr->io_qpairs = NULL;
354
355 mtx_lock(&ctrlr->lock);
356 ctrlr->num_io_queues = 0;
357
358 /* Mark shutdown complete. */
359 if (NVMEV(NVME_CSTS_REG_SHST, ctrlr->csts) == NVME_SHST_OCCURRING) {
360 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
361 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_COMPLETE);
362 }
363
364 if (NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) == 0) {
365 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_RDY);
366 ctrlr->shutdown = false;
367 }
368 mtx_unlock(&ctrlr->lock);
369
370 /*
371 * If the admin queue was closed while shutting down or a
372 * fatal controller error has occurred, terminate the
373 * association immediately, otherwise wait up to 2 minutes
374 * (NVMe-over-Fabrics 1.1 4.6).
375 */
376 if (ctrlr->admin_closed || NVMEV(NVME_CSTS_REG_CFS, ctrlr->csts) != 0)
377 nvmft_controller_terminate(ctrlr, 0);
378 else
379 taskqueue_enqueue_timeout(taskqueue_thread,
380 &ctrlr->terminate_task, hz * 60 * 2);
381 }
382
383 static void
nvmft_controller_terminate(void * arg,int pending)384 nvmft_controller_terminate(void *arg, int pending)
385 {
386 struct nvmft_controller *ctrlr = arg;
387 struct nvmft_port *np;
388 bool wakeup_np;
389
390 /* If the controller has been re-enabled, nothing to do. */
391 mtx_lock(&ctrlr->lock);
392 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) != 0) {
393 mtx_unlock(&ctrlr->lock);
394
395 if (ctrlr->ka_sbt != 0)
396 callout_schedule_sbt(&ctrlr->ka_timer, ctrlr->ka_sbt, 0,
397 C_HARDCLOCK);
398 return;
399 }
400
401 /* Disable updates to CC while destroying admin qpair. */
402 ctrlr->shutdown = true;
403 mtx_unlock(&ctrlr->lock);
404
405 nvmft_qpair_destroy(ctrlr->admin);
406
407 /* Remove association (CNTLID). */
408 np = ctrlr->np;
409 mtx_lock(&np->lock);
410 TAILQ_REMOVE(&np->controllers, ctrlr, link);
411 wakeup_np = (!np->online && TAILQ_EMPTY(&np->controllers));
412 mtx_unlock(&np->lock);
413 free_unr(np->ids, ctrlr->cntlid);
414 if (wakeup_np)
415 wakeup(np);
416
417 callout_drain(&ctrlr->ka_timer);
418
419 nvmft_printf(ctrlr, "association terminated\n");
420 nvmft_controller_free(ctrlr);
421 nvmft_port_rele(np);
422 }
423
424 void
nvmft_controller_error(struct nvmft_controller * ctrlr,struct nvmft_qpair * qp,int error)425 nvmft_controller_error(struct nvmft_controller *ctrlr, struct nvmft_qpair *qp,
426 int error)
427 {
428 /*
429 * If a queue pair is closed, that isn't an error per se.
430 * That just means additional commands cannot be received on
431 * that queue pair.
432 *
433 * If the admin queue pair is closed while idle or while
434 * shutting down, terminate the association immediately.
435 *
436 * If an I/O queue pair is closed, just ignore it.
437 */
438 if (error == 0) {
439 if (qp != ctrlr->admin)
440 return;
441
442 mtx_lock(&ctrlr->lock);
443 if (ctrlr->shutdown) {
444 ctrlr->admin_closed = true;
445 mtx_unlock(&ctrlr->lock);
446 return;
447 }
448
449 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0) {
450 MPASS(ctrlr->num_io_queues == 0);
451 mtx_unlock(&ctrlr->lock);
452
453 /*
454 * Ok to drop lock here since ctrlr->cc can't
455 * change if the admin queue pair has closed.
456 * This also means no new queues can be handed
457 * off, etc. Note that since there are no I/O
458 * queues, only the admin queue needs to be
459 * destroyed, so it is safe to skip
460 * nvmft_controller_shutdown and just schedule
461 * nvmft_controller_terminate. Note that we
462 * cannot call nvmft_controller_terminate from
463 * here directly as this is called from the
464 * transport layer and freeing the admin qpair
465 * might deadlock waiting for the current
466 * thread to exit.
467 */
468 if (taskqueue_cancel_timeout(taskqueue_thread,
469 &ctrlr->terminate_task, NULL) == 0)
470 taskqueue_enqueue_timeout(taskqueue_thread,
471 &ctrlr->terminate_task, 0);
472 return;
473 }
474
475 /*
476 * Treat closing of the admin queue pair while enabled
477 * as a transport error. Note that the admin queue
478 * pair has been closed.
479 */
480 ctrlr->admin_closed = true;
481 } else
482 mtx_lock(&ctrlr->lock);
483
484 /* Ignore transport errors if we are already shutting down. */
485 if (ctrlr->shutdown) {
486 mtx_unlock(&ctrlr->lock);
487 return;
488 }
489
490 ctrlr->csts |= NVMEF(NVME_CSTS_REG_CFS, 1);
491 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
492 ctrlr->shutdown = true;
493 mtx_unlock(&ctrlr->lock);
494
495 callout_stop(&ctrlr->ka_timer);
496 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
497 }
498
499 /* Wrapper around m_getm2 that also sets m_len in the mbufs in the chain. */
500 static struct mbuf *
m_getml(size_t len,int how)501 m_getml(size_t len, int how)
502 {
503 struct mbuf *m, *n;
504
505 m = m_getm2(NULL, len, how, MT_DATA, 0);
506 if (m == NULL)
507 return (NULL);
508 for (n = m; len > 0; n = n->m_next) {
509 n->m_len = M_SIZE(n);
510 if (n->m_len >= len) {
511 n->m_len = len;
512 MPASS(n->m_next == NULL);
513 }
514 len -= n->m_len;
515 }
516 return (m);
517 }
518
519 static void
m_zero(struct mbuf * m,u_int offset,u_int len)520 m_zero(struct mbuf *m, u_int offset, u_int len)
521 {
522 u_int todo;
523
524 if (len == 0)
525 return;
526
527 while (m->m_len <= offset) {
528 offset -= m->m_len;
529 m = m->m_next;
530 }
531
532 todo = m->m_len - offset;
533 if (todo > len)
534 todo = len;
535 memset(mtodo(m, offset), 0, todo);
536 m = m->m_next;
537 len -= todo;
538
539 while (len > 0) {
540 todo = m->m_len;
541 if (todo > len)
542 todo = len;
543 memset(mtod(m, void *), 0, todo);
544 m = m->m_next;
545 len -= todo;
546 }
547 }
548
549 static void
handle_get_log_page(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)550 handle_get_log_page(struct nvmft_controller *ctrlr,
551 struct nvmf_capsule *nc, const struct nvme_command *cmd)
552 {
553 struct mbuf *m;
554 uint64_t offset;
555 uint32_t numd;
556 size_t len, todo;
557 u_int status;
558 uint8_t lid;
559 bool rae;
560
561 lid = le32toh(cmd->cdw10) & 0xff;
562 rae = (le32toh(cmd->cdw10) & (1U << 15)) != 0;
563 numd = le32toh(cmd->cdw10) >> 16 | le32toh(cmd->cdw11) << 16;
564 offset = le32toh(cmd->cdw12) | (uint64_t)le32toh(cmd->cdw13) << 32;
565
566 if (offset % 3 != 0) {
567 status = NVME_SC_INVALID_FIELD;
568 goto done;
569 }
570
571 len = (numd + 1) * 4;
572
573 switch (lid) {
574 case NVME_LOG_ERROR:
575 todo = 0;
576
577 m = m_getml(len, M_WAITOK);
578 if (todo != len)
579 m_zero(m, todo, len - todo);
580 status = nvmf_send_controller_data(nc, 0, m, len);
581 MPASS(status != NVMF_MORE);
582 break;
583 case NVME_LOG_HEALTH_INFORMATION:
584 {
585 struct nvme_health_information_page hip;
586
587 if (offset >= sizeof(hip)) {
588 status = NVME_SC_INVALID_FIELD;
589 goto done;
590 }
591 todo = sizeof(hip) - offset;
592 if (todo > len)
593 todo = len;
594
595 mtx_lock(&ctrlr->lock);
596 hip = ctrlr->hip;
597 hip.controller_busy_time[0] =
598 sbintime_getsec(ctrlr->busy_total) / 60;
599 hip.power_on_hours[0] =
600 sbintime_getsec(sbinuptime() - ctrlr->create_time) / 3600;
601 mtx_unlock(&ctrlr->lock);
602
603 m = m_getml(len, M_WAITOK);
604 m_copyback(m, 0, todo, (char *)&hip + offset);
605 if (todo != len)
606 m_zero(m, todo, len - todo);
607 status = nvmf_send_controller_data(nc, 0, m, len);
608 MPASS(status != NVMF_MORE);
609 break;
610 }
611 case NVME_LOG_FIRMWARE_SLOT:
612 if (offset >= sizeof(ctrlr->np->fp)) {
613 status = NVME_SC_INVALID_FIELD;
614 goto done;
615 }
616 todo = sizeof(ctrlr->np->fp) - offset;
617 if (todo > len)
618 todo = len;
619
620 m = m_getml(len, M_WAITOK);
621 m_copyback(m, 0, todo, (char *)&ctrlr->np->fp + offset);
622 if (todo != len)
623 m_zero(m, todo, len - todo);
624 status = nvmf_send_controller_data(nc, 0, m, len);
625 MPASS(status != NVMF_MORE);
626 break;
627 case NVME_LOG_CHANGED_NAMESPACE:
628 if (offset >= sizeof(*ctrlr->changed_ns)) {
629 status = NVME_SC_INVALID_FIELD;
630 goto done;
631 }
632 todo = sizeof(*ctrlr->changed_ns) - offset;
633 if (todo > len)
634 todo = len;
635
636 m = m_getml(len, M_WAITOK);
637 mtx_lock(&ctrlr->lock);
638 m_copyback(m, 0, todo, (char *)ctrlr->changed_ns + offset);
639 if (offset == 0 && len == sizeof(*ctrlr->changed_ns))
640 memset(ctrlr->changed_ns, 0,
641 sizeof(*ctrlr->changed_ns));
642 if (!rae)
643 ctrlr->changed_ns_reported = false;
644 mtx_unlock(&ctrlr->lock);
645 if (todo != len)
646 m_zero(m, todo, len - todo);
647 status = nvmf_send_controller_data(nc, 0, m, len);
648 MPASS(status != NVMF_MORE);
649 break;
650 default:
651 nvmft_printf(ctrlr, "Unsupported page %#x for GET_LOG_PAGE\n",
652 lid);
653 status = NVME_SC_INVALID_FIELD;
654 break;
655 }
656
657 done:
658 if (status == NVMF_SUCCESS_SENT)
659 nvmft_command_completed(ctrlr->admin, nc);
660 else
661 nvmft_send_generic_error(ctrlr->admin, nc, status);
662 nvmf_free_capsule(nc);
663 }
664
665 static void
m_free_nslist(struct mbuf * m)666 m_free_nslist(struct mbuf *m)
667 {
668 free(m->m_ext.ext_arg1, M_NVMFT);
669 }
670
671 static void
handle_identify_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)672 handle_identify_command(struct nvmft_controller *ctrlr,
673 struct nvmf_capsule *nc, const struct nvme_command *cmd)
674 {
675 struct mbuf *m;
676 size_t data_len;
677 u_int status;
678 uint8_t cns;
679
680 cns = le32toh(cmd->cdw10) & 0xFF;
681 data_len = nvmf_capsule_data_len(nc);
682 if (data_len != sizeof(ctrlr->cdata)) {
683 nvmft_printf(ctrlr,
684 "Invalid length %zu for IDENTIFY with CNS %#x\n", data_len,
685 cns);
686 nvmft_send_generic_error(ctrlr->admin, nc,
687 NVME_SC_INVALID_OPCODE);
688 nvmf_free_capsule(nc);
689 return;
690 }
691
692 switch (cns) {
693 case 0: /* Namespace data. */
694 case 3: /* Namespace Identification Descriptor list. */
695 nvmft_dispatch_command(ctrlr->admin, nc, true);
696 return;
697 case 1:
698 /* Controller data. */
699 m = m_getml(sizeof(ctrlr->cdata), M_WAITOK);
700 m_copyback(m, 0, sizeof(ctrlr->cdata), (void *)&ctrlr->cdata);
701 status = nvmf_send_controller_data(nc, 0, m,
702 sizeof(ctrlr->cdata));
703 MPASS(status != NVMF_MORE);
704 break;
705 case 2:
706 {
707 /* Active namespace list. */
708 struct nvme_ns_list *nslist;
709 uint32_t nsid;
710
711 nsid = le32toh(cmd->nsid);
712 if (nsid >= 0xfffffffe) {
713 status = NVME_SC_INVALID_FIELD;
714 break;
715 }
716
717 nslist = malloc(sizeof(*nslist), M_NVMFT, M_WAITOK | M_ZERO);
718 nvmft_populate_active_nslist(ctrlr->np, nsid, nslist);
719 m = m_get(M_WAITOK, MT_DATA);
720 m_extadd(m, (void *)nslist, sizeof(*nslist), m_free_nslist,
721 nslist, NULL, 0, EXT_CTL);
722 m->m_len = sizeof(*nslist);
723 status = nvmf_send_controller_data(nc, 0, m, m->m_len);
724 MPASS(status != NVMF_MORE);
725 break;
726 }
727 default:
728 nvmft_printf(ctrlr, "Unsupported CNS %#x for IDENTIFY\n", cns);
729 status = NVME_SC_INVALID_FIELD;
730 break;
731 }
732
733 if (status == NVMF_SUCCESS_SENT)
734 nvmft_command_completed(ctrlr->admin, nc);
735 else
736 nvmft_send_generic_error(ctrlr->admin, nc, status);
737 nvmf_free_capsule(nc);
738 }
739
740 static void
handle_set_features(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvme_command * cmd)741 handle_set_features(struct nvmft_controller *ctrlr,
742 struct nvmf_capsule *nc, const struct nvme_command *cmd)
743 {
744 struct nvme_completion cqe;
745 uint8_t fid;
746
747 fid = NVMEV(NVME_FEAT_SET_FID, le32toh(cmd->cdw10));
748 switch (fid) {
749 case NVME_FEAT_NUMBER_OF_QUEUES:
750 {
751 uint32_t num_queues;
752 struct nvmft_io_qpair *io_qpairs;
753
754 num_queues = le32toh(cmd->cdw11) & 0xffff;
755
756 /* 5.12.1.7: 65535 is invalid. */
757 if (num_queues == 65535)
758 goto error;
759
760 /* Fabrics requires the same number of SQs and CQs. */
761 if (le32toh(cmd->cdw11) >> 16 != num_queues)
762 goto error;
763
764 /* Convert to 1's based */
765 num_queues++;
766
767 io_qpairs = mallocarray(num_queues, sizeof(*io_qpairs),
768 M_NVMFT, M_WAITOK | M_ZERO);
769
770 mtx_lock(&ctrlr->lock);
771 if (ctrlr->num_io_queues != 0) {
772 mtx_unlock(&ctrlr->lock);
773 free(io_qpairs, M_NVMFT);
774 nvmft_send_generic_error(ctrlr->admin, nc,
775 NVME_SC_COMMAND_SEQUENCE_ERROR);
776 nvmf_free_capsule(nc);
777 return;
778 }
779
780 ctrlr->num_io_queues = num_queues;
781 ctrlr->io_qpairs = io_qpairs;
782 mtx_unlock(&ctrlr->lock);
783
784 nvmft_init_cqe(&cqe, nc, 0);
785 cqe.cdw0 = cmd->cdw11;
786 nvmft_send_response(ctrlr->admin, &cqe);
787 nvmf_free_capsule(nc);
788 return;
789 }
790 case NVME_FEAT_ASYNC_EVENT_CONFIGURATION:
791 {
792 uint32_t aer_mask;
793
794 aer_mask = le32toh(cmd->cdw11);
795
796 /* Check for any reserved or unimplemented feature bits. */
797 if ((aer_mask & 0xffffc000) != 0)
798 goto error;
799
800 mtx_lock(&ctrlr->lock);
801 ctrlr->aer_mask = aer_mask;
802 mtx_unlock(&ctrlr->lock);
803 nvmft_send_success(ctrlr->admin, nc);
804 nvmf_free_capsule(nc);
805 return;
806 }
807 default:
808 nvmft_printf(ctrlr,
809 "Unsupported feature ID %u for SET_FEATURES\n", fid);
810 goto error;
811 }
812
813 error:
814 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
815 nvmf_free_capsule(nc);
816 }
817
818 static bool
update_cc(struct nvmft_controller * ctrlr,uint32_t new_cc,bool * need_shutdown)819 update_cc(struct nvmft_controller *ctrlr, uint32_t new_cc, bool *need_shutdown)
820 {
821 struct nvmft_port *np = ctrlr->np;
822 uint32_t changes;
823
824 *need_shutdown = false;
825
826 mtx_lock(&ctrlr->lock);
827
828 /* Don't allow any changes while shutting down. */
829 if (ctrlr->shutdown) {
830 mtx_unlock(&ctrlr->lock);
831 return (false);
832 }
833
834 if (!_nvmf_validate_cc(np->max_io_qsize, np->cap, ctrlr->cc, new_cc)) {
835 mtx_unlock(&ctrlr->lock);
836 return (false);
837 }
838
839 changes = ctrlr->cc ^ new_cc;
840 ctrlr->cc = new_cc;
841
842 /* Handle shutdown requests. */
843 if (NVMEV(NVME_CC_REG_SHN, changes) != 0 &&
844 NVMEV(NVME_CC_REG_SHN, new_cc) != 0) {
845 ctrlr->csts &= ~NVMEM(NVME_CSTS_REG_SHST);
846 ctrlr->csts |= NVMEF(NVME_CSTS_REG_SHST, NVME_SHST_OCCURRING);
847 ctrlr->cc &= ~NVMEM(NVME_CC_REG_EN);
848 ctrlr->shutdown = true;
849 *need_shutdown = true;
850 nvmft_printf(ctrlr, "shutdown requested\n");
851 }
852
853 if (NVMEV(NVME_CC_REG_EN, changes) != 0) {
854 if (NVMEV(NVME_CC_REG_EN, new_cc) == 0) {
855 /* Controller reset. */
856 nvmft_printf(ctrlr, "reset requested\n");
857 ctrlr->shutdown = true;
858 *need_shutdown = true;
859 } else
860 ctrlr->csts |= NVMEF(NVME_CSTS_REG_RDY, 1);
861 }
862 mtx_unlock(&ctrlr->lock);
863
864 return (true);
865 }
866
867 static void
handle_property_get(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_get_cmd * pget)868 handle_property_get(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
869 const struct nvmf_fabric_prop_get_cmd *pget)
870 {
871 struct nvmf_fabric_prop_get_rsp rsp;
872
873 nvmft_init_cqe(&rsp, nc, 0);
874
875 switch (le32toh(pget->ofst)) {
876 case NVMF_PROP_CAP:
877 if (pget->attrib.size != NVMF_PROP_SIZE_8)
878 goto error;
879 rsp.value.u64 = htole64(ctrlr->np->cap);
880 break;
881 case NVMF_PROP_VS:
882 if (pget->attrib.size != NVMF_PROP_SIZE_4)
883 goto error;
884 rsp.value.u32.low = ctrlr->cdata.ver;
885 break;
886 case NVMF_PROP_CC:
887 if (pget->attrib.size != NVMF_PROP_SIZE_4)
888 goto error;
889 rsp.value.u32.low = htole32(ctrlr->cc);
890 break;
891 case NVMF_PROP_CSTS:
892 if (pget->attrib.size != NVMF_PROP_SIZE_4)
893 goto error;
894 rsp.value.u32.low = htole32(ctrlr->csts);
895 break;
896 default:
897 goto error;
898 }
899
900 nvmft_send_response(ctrlr->admin, &rsp);
901 return;
902 error:
903 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
904 }
905
906 static void
handle_property_set(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_prop_set_cmd * pset)907 handle_property_set(struct nvmft_controller *ctrlr, struct nvmf_capsule *nc,
908 const struct nvmf_fabric_prop_set_cmd *pset)
909 {
910 bool need_shutdown;
911
912 need_shutdown = false;
913 switch (le32toh(pset->ofst)) {
914 case NVMF_PROP_CC:
915 if (pset->attrib.size != NVMF_PROP_SIZE_4)
916 goto error;
917 if (!update_cc(ctrlr, le32toh(pset->value.u32.low),
918 &need_shutdown))
919 goto error;
920 break;
921 default:
922 goto error;
923 }
924
925 nvmft_send_success(ctrlr->admin, nc);
926 if (need_shutdown) {
927 callout_stop(&ctrlr->ka_timer);
928 taskqueue_enqueue(taskqueue_thread, &ctrlr->shutdown_task);
929 }
930 return;
931 error:
932 nvmft_send_generic_error(ctrlr->admin, nc, NVME_SC_INVALID_FIELD);
933 }
934
935 static void
handle_admin_fabrics_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc,const struct nvmf_fabric_cmd * fc)936 handle_admin_fabrics_command(struct nvmft_controller *ctrlr,
937 struct nvmf_capsule *nc, const struct nvmf_fabric_cmd *fc)
938 {
939 switch (fc->fctype) {
940 case NVMF_FABRIC_COMMAND_PROPERTY_GET:
941 handle_property_get(ctrlr, nc,
942 (const struct nvmf_fabric_prop_get_cmd *)fc);
943 break;
944 case NVMF_FABRIC_COMMAND_PROPERTY_SET:
945 handle_property_set(ctrlr, nc,
946 (const struct nvmf_fabric_prop_set_cmd *)fc);
947 break;
948 case NVMF_FABRIC_COMMAND_CONNECT:
949 nvmft_printf(ctrlr,
950 "CONNECT command on connected admin queue\n");
951 nvmft_send_generic_error(ctrlr->admin, nc,
952 NVME_SC_COMMAND_SEQUENCE_ERROR);
953 break;
954 case NVMF_FABRIC_COMMAND_DISCONNECT:
955 nvmft_printf(ctrlr, "DISCONNECT command on admin queue\n");
956 nvmft_send_error(ctrlr->admin, nc, NVME_SCT_COMMAND_SPECIFIC,
957 NVMF_FABRIC_SC_INVALID_QUEUE_TYPE);
958 break;
959 default:
960 nvmft_printf(ctrlr, "Unsupported fabrics command %#x\n",
961 fc->fctype);
962 nvmft_send_generic_error(ctrlr->admin, nc,
963 NVME_SC_INVALID_OPCODE);
964 break;
965 }
966 nvmf_free_capsule(nc);
967 }
968
969 void
nvmft_handle_admin_command(struct nvmft_controller * ctrlr,struct nvmf_capsule * nc)970 nvmft_handle_admin_command(struct nvmft_controller *ctrlr,
971 struct nvmf_capsule *nc)
972 {
973 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
974
975 /* Only permit Fabrics commands while a controller is disabled. */
976 if (NVMEV(NVME_CC_REG_EN, ctrlr->cc) == 0 &&
977 cmd->opc != NVME_OPC_FABRICS_COMMANDS) {
978 nvmft_printf(ctrlr,
979 "Unsupported admin opcode %#x while disabled\n", cmd->opc);
980 nvmft_send_generic_error(ctrlr->admin, nc,
981 NVME_SC_COMMAND_SEQUENCE_ERROR);
982 nvmf_free_capsule(nc);
983 return;
984 }
985
986 atomic_store_int(&ctrlr->ka_active_traffic, 1);
987
988 switch (cmd->opc) {
989 case NVME_OPC_GET_LOG_PAGE:
990 handle_get_log_page(ctrlr, nc, cmd);
991 break;
992 case NVME_OPC_IDENTIFY:
993 handle_identify_command(ctrlr, nc, cmd);
994 break;
995 case NVME_OPC_SET_FEATURES:
996 handle_set_features(ctrlr, nc, cmd);
997 break;
998 case NVME_OPC_ASYNC_EVENT_REQUEST:
999 mtx_lock(&ctrlr->lock);
1000 if (ctrlr->aer_pending == NVMFT_NUM_AER) {
1001 mtx_unlock(&ctrlr->lock);
1002 nvmft_send_error(ctrlr->admin, nc,
1003 NVME_SCT_COMMAND_SPECIFIC,
1004 NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED);
1005 } else {
1006 /* NB: Store the CID without byte-swapping. */
1007 ctrlr->aer_cids[ctrlr->aer_pidx] = cmd->cid;
1008 ctrlr->aer_pending++;
1009 ctrlr->aer_pidx = (ctrlr->aer_pidx + 1) % NVMFT_NUM_AER;
1010 mtx_unlock(&ctrlr->lock);
1011 }
1012 nvmf_free_capsule(nc);
1013 break;
1014 case NVME_OPC_KEEP_ALIVE:
1015 nvmft_send_success(ctrlr->admin, nc);
1016 nvmf_free_capsule(nc);
1017 break;
1018 case NVME_OPC_FABRICS_COMMANDS:
1019 handle_admin_fabrics_command(ctrlr, nc,
1020 (const struct nvmf_fabric_cmd *)cmd);
1021 break;
1022 default:
1023 nvmft_printf(ctrlr, "Unsupported admin opcode %#x\n", cmd->opc);
1024 nvmft_send_generic_error(ctrlr->admin, nc,
1025 NVME_SC_INVALID_OPCODE);
1026 nvmf_free_capsule(nc);
1027 break;
1028 }
1029 }
1030
1031 void
nvmft_handle_io_command(struct nvmft_qpair * qp,uint16_t qid,struct nvmf_capsule * nc)1032 nvmft_handle_io_command(struct nvmft_qpair *qp, uint16_t qid,
1033 struct nvmf_capsule *nc)
1034 {
1035 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
1036 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
1037
1038 atomic_store_int(&ctrlr->ka_active_traffic, 1);
1039
1040 switch (cmd->opc) {
1041 case NVME_OPC_FLUSH:
1042 if (cmd->nsid == htole32(0xffffffff)) {
1043 nvmft_send_generic_error(qp, nc,
1044 NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
1045 nvmf_free_capsule(nc);
1046 break;
1047 }
1048 /* FALLTHROUGH */
1049 case NVME_OPC_WRITE:
1050 case NVME_OPC_READ:
1051 case NVME_OPC_WRITE_UNCORRECTABLE:
1052 case NVME_OPC_COMPARE:
1053 case NVME_OPC_WRITE_ZEROES:
1054 case NVME_OPC_DATASET_MANAGEMENT:
1055 case NVME_OPC_VERIFY:
1056 nvmft_dispatch_command(qp, nc, false);
1057 break;
1058 default:
1059 nvmft_printf(ctrlr, "Unsupported I/O opcode %#x\n", cmd->opc);
1060 nvmft_send_generic_error(qp, nc,
1061 NVME_SC_INVALID_OPCODE);
1062 nvmf_free_capsule(nc);
1063 break;
1064 }
1065 }
1066
1067 static void
nvmft_report_aer(struct nvmft_controller * ctrlr,uint32_t aer_mask,u_int type,uint8_t info,uint8_t log_page_id)1068 nvmft_report_aer(struct nvmft_controller *ctrlr, uint32_t aer_mask,
1069 u_int type, uint8_t info, uint8_t log_page_id)
1070 {
1071 struct nvme_completion cpl;
1072
1073 MPASS(type <= 7);
1074
1075 /* Drop events that are not enabled. */
1076 mtx_lock(&ctrlr->lock);
1077 if ((ctrlr->aer_mask & aer_mask) == 0) {
1078 mtx_unlock(&ctrlr->lock);
1079 return;
1080 }
1081
1082 /*
1083 * If there is no pending AER command, drop it.
1084 * XXX: Should we queue these?
1085 */
1086 if (ctrlr->aer_pending == 0) {
1087 mtx_unlock(&ctrlr->lock);
1088 nvmft_printf(ctrlr,
1089 "dropping AER type %u, info %#x, page %#x\n",
1090 type, info, log_page_id);
1091 return;
1092 }
1093
1094 memset(&cpl, 0, sizeof(cpl));
1095 cpl.cid = ctrlr->aer_cids[ctrlr->aer_cidx];
1096 ctrlr->aer_pending--;
1097 ctrlr->aer_cidx = (ctrlr->aer_cidx + 1) % NVMFT_NUM_AER;
1098 mtx_unlock(&ctrlr->lock);
1099
1100 cpl.cdw0 = htole32(NVMEF(NVME_ASYNC_EVENT_TYPE, type) |
1101 NVMEF(NVME_ASYNC_EVENT_INFO, info) |
1102 NVMEF(NVME_ASYNC_EVENT_LOG_PAGE_ID, log_page_id));
1103
1104 nvmft_send_response(ctrlr->admin, &cpl);
1105 }
1106
1107 void
nvmft_controller_lun_changed(struct nvmft_controller * ctrlr,int lun_id)1108 nvmft_controller_lun_changed(struct nvmft_controller *ctrlr, int lun_id)
1109 {
1110 struct nvme_ns_list *nslist;
1111 uint32_t new_nsid, nsid;
1112 u_int i;
1113
1114 new_nsid = lun_id + 1;
1115
1116 mtx_lock(&ctrlr->lock);
1117 nslist = ctrlr->changed_ns;
1118
1119 /* If the first entry is 0xffffffff, the list is already full. */
1120 if (nslist->ns[0] != 0xffffffff) {
1121 /* Find the insertion point for this namespace ID. */
1122 for (i = 0; i < nitems(nslist->ns); i++) {
1123 nsid = le32toh(nslist->ns[i]);
1124 if (nsid == new_nsid) {
1125 /* Already reported, nothing to do. */
1126 mtx_unlock(&ctrlr->lock);
1127 return;
1128 }
1129
1130 if (nsid == 0 || nsid > new_nsid)
1131 break;
1132 }
1133
1134 if (nslist->ns[nitems(nslist->ns) - 1] != htole32(0)) {
1135 /* List is full. */
1136 memset(ctrlr->changed_ns, 0,
1137 sizeof(*ctrlr->changed_ns));
1138 ctrlr->changed_ns->ns[0] = 0xffffffff;
1139 } else if (nslist->ns[i] == htole32(0)) {
1140 /*
1141 * Optimize case where this ID is appended to
1142 * the end.
1143 */
1144 nslist->ns[i] = htole32(new_nsid);
1145 } else {
1146 memmove(&nslist->ns[i + 1], &nslist->ns[i],
1147 (nitems(nslist->ns) - i - 1) *
1148 sizeof(nslist->ns[0]));
1149 nslist->ns[i] = htole32(new_nsid);
1150 }
1151 }
1152
1153 if (ctrlr->changed_ns_reported) {
1154 mtx_unlock(&ctrlr->lock);
1155 return;
1156 }
1157 ctrlr->changed_ns_reported = true;
1158 mtx_unlock(&ctrlr->lock);
1159
1160 nvmft_report_aer(ctrlr, NVME_ASYNC_EVENT_NS_ATTRIBUTE, 0x2, 0x0,
1161 NVME_LOG_CHANGED_NAMESPACE);
1162 }
1163