1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/param.h>
9 #include <sys/dnv.h>
10 #include <sys/jail.h>
11 #include <sys/kernel.h>
12 #include <sys/limits.h>
13 #include <sys/lock.h>
14 #include <sys/malloc.h>
15 #include <sys/mbuf.h>
16 #include <sys/memdesc.h>
17 #include <sys/module.h>
18 #include <sys/proc.h>
19 #include <sys/queue.h>
20 #include <sys/refcount.h>
21 #include <sys/sbuf.h>
22 #include <sys/smp.h>
23 #include <sys/sx.h>
24 #include <sys/taskqueue.h>
25
26 #include <machine/bus.h>
27 #include <machine/bus_dma.h>
28
29 #include <dev/nvmf/nvmf.h>
30 #include <dev/nvmf/nvmf_transport.h>
31 #include <dev/nvmf/controller/nvmft_subr.h>
32 #include <dev/nvmf/controller/nvmft_var.h>
33
34 #include <cam/ctl/ctl.h>
35 #include <cam/ctl/ctl_error.h>
36 #include <cam/ctl/ctl_ha.h>
37 #include <cam/ctl/ctl_io.h>
38 #include <cam/ctl/ctl_frontend.h>
39 #include <cam/ctl/ctl_private.h>
40
41 /*
42 * Store pointers to the capsule and qpair in the two pointer members
43 * of CTL_PRIV_FRONTEND.
44 */
45 #define NVMFT_NC(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0])
46 #define NVMFT_QP(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1])
47
48 static void nvmft_done(union ctl_io *io);
49 static int nvmft_init(void);
50 static int nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data,
51 int flag, struct thread *td);
52 static int nvmft_shutdown(void);
53
54 static struct taskqueue *nvmft_taskq;
55 static TAILQ_HEAD(, nvmft_port) nvmft_ports;
56 static struct sx nvmft_ports_lock;
57
58 MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller");
59
60 static struct ctl_frontend nvmft_frontend = {
61 .name = "nvmf",
62 .init = nvmft_init,
63 .ioctl = nvmft_ioctl,
64 .fe_dump = NULL,
65 .shutdown = nvmft_shutdown,
66 };
67
68 static void
nvmft_online(void * arg)69 nvmft_online(void *arg)
70 {
71 struct nvmft_port *np = arg;
72
73 sx_xlock(&np->lock);
74 np->online = true;
75 sx_xunlock(&np->lock);
76 }
77
78 static void
nvmft_offline(void * arg)79 nvmft_offline(void *arg)
80 {
81 struct nvmft_port *np = arg;
82 struct nvmft_controller *ctrlr;
83
84 sx_xlock(&np->lock);
85 np->online = false;
86
87 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
88 nvmft_printf(ctrlr,
89 "shutting down due to port going offline\n");
90 nvmft_controller_error(ctrlr, NULL, ENODEV);
91 }
92
93 while (!TAILQ_EMPTY(&np->controllers))
94 sx_sleep(np, &np->lock, 0, "nvmfoff", 0);
95 sx_xunlock(&np->lock);
96 }
97
98 static int
nvmft_lun_enable(void * arg,int lun_id)99 nvmft_lun_enable(void *arg, int lun_id)
100 {
101 struct nvmft_port *np = arg;
102 struct nvmft_controller *ctrlr;
103 uint32_t *old_ns, *new_ns;
104 uint32_t nsid;
105 u_int i;
106
107 if (lun_id >= le32toh(np->cdata.nn)) {
108 printf("NVMFT: %s lun %d larger than maximum nsid %u\n",
109 np->cdata.subnqn, lun_id, le32toh(np->cdata.nn));
110 return (EOPNOTSUPP);
111 }
112 nsid = lun_id + 1;
113
114 sx_xlock(&np->lock);
115 new_ns = mallocarray(np->num_ns + 1, sizeof(*new_ns), M_NVMFT,
116 M_WAITOK);
117 for (i = 0; i < np->num_ns; i++) {
118 if (np->active_ns[i] < nsid)
119 continue;
120 if (np->active_ns[i] == nsid) {
121 sx_xunlock(&np->lock);
122 free(new_ns, M_NVMFT);
123 printf("NVMFT: %s duplicate lun %d\n",
124 np->cdata.subnqn, lun_id);
125 return (EINVAL);
126 }
127 break;
128 }
129
130 /* Copy over IDs smaller than nsid. */
131 memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns));
132
133 /* Insert nsid. */
134 new_ns[i] = nsid;
135
136 /* Copy over IDs greater than nsid. */
137 memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) *
138 sizeof(*np->active_ns));
139
140 np->num_ns++;
141 old_ns = np->active_ns;
142 np->active_ns = new_ns;
143
144 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
145 nvmft_controller_lun_changed(ctrlr, lun_id);
146 }
147
148 sx_xunlock(&np->lock);
149 free(old_ns, M_NVMFT);
150
151 return (0);
152 }
153
154 static int
nvmft_lun_disable(void * arg,int lun_id)155 nvmft_lun_disable(void *arg, int lun_id)
156 {
157 struct nvmft_port *np = arg;
158 struct nvmft_controller *ctrlr;
159 uint32_t nsid;
160 u_int i;
161
162 if (lun_id >= le32toh(np->cdata.nn))
163 return (0);
164 nsid = lun_id + 1;
165
166 sx_xlock(&np->lock);
167 for (i = 0; i < np->num_ns; i++) {
168 if (np->active_ns[i] == nsid)
169 goto found;
170 }
171 sx_xunlock(&np->lock);
172 printf("NVMFT: %s request to disable nonexistent lun %d\n",
173 np->cdata.subnqn, lun_id);
174 return (EINVAL);
175
176 found:
177 /* Move down IDs greater than nsid. */
178 memmove(np->active_ns + i, np->active_ns + i + 1,
179 (np->num_ns - (i + 1)) * sizeof(*np->active_ns));
180 np->num_ns--;
181
182 /* NB: Don't bother freeing the old active_ns array. */
183
184 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
185 nvmft_controller_lun_changed(ctrlr, lun_id);
186 }
187
188 sx_xunlock(&np->lock);
189
190 return (0);
191 }
192
193 void
nvmft_populate_active_nslist(struct nvmft_port * np,uint32_t nsid,struct nvme_ns_list * nslist)194 nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
195 struct nvme_ns_list *nslist)
196 {
197 u_int i, count;
198
199 sx_slock(&np->lock);
200 count = 0;
201 for (i = 0; i < np->num_ns; i++) {
202 if (np->active_ns[i] <= nsid)
203 continue;
204 nslist->ns[count] = htole32(np->active_ns[i]);
205 count++;
206 if (count == nitems(nslist->ns))
207 break;
208 }
209 sx_sunlock(&np->lock);
210 }
211
212 void
nvmft_dispatch_command(struct nvmft_qpair * qp,struct nvmf_capsule * nc,bool admin)213 nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
214 bool admin)
215 {
216 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
217 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
218 struct nvmft_port *np = ctrlr->np;
219 union ctl_io *io;
220 int error;
221
222 if (cmd->nsid == htole32(0)) {
223 nvmft_send_generic_error(qp, nc,
224 NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
225 nvmf_free_capsule(nc);
226 return;
227 }
228
229 mtx_lock(&ctrlr->lock);
230 if (ctrlr->pending_commands == 0)
231 ctrlr->start_busy = sbinuptime();
232 ctrlr->pending_commands++;
233 mtx_unlock(&ctrlr->lock);
234 io = ctl_alloc_io(np->port.ctl_pool_ref);
235 ctl_zero_io(io);
236 NVMFT_NC(io) = nc;
237 NVMFT_QP(io) = qp;
238 io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME;
239 io->io_hdr.nexus.initid = ctrlr->cntlid;
240 io->io_hdr.nexus.targ_port = np->port.targ_port;
241 io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1;
242 io->nvmeio.cmd = *cmd;
243 error = ctl_run(io);
244 if (error != 0) {
245 nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n",
246 nvmft_qpair_name(qp), error);
247 ctl_nvme_set_generic_error(&io->nvmeio,
248 NVME_SC_INTERNAL_DEVICE_ERROR);
249 nvmft_done(io);
250
251 nvmft_controller_error(ctrlr, qp, ENXIO);
252 }
253 }
254
255 void
nvmft_terminate_commands(struct nvmft_controller * ctrlr)256 nvmft_terminate_commands(struct nvmft_controller *ctrlr)
257 {
258 struct nvmft_port *np = ctrlr->np;
259 union ctl_io *io;
260 int error;
261
262 mtx_lock(&ctrlr->lock);
263 if (ctrlr->pending_commands == 0)
264 ctrlr->start_busy = sbinuptime();
265 ctrlr->pending_commands++;
266 mtx_unlock(&ctrlr->lock);
267 io = ctl_alloc_io(np->port.ctl_pool_ref);
268 ctl_zero_io(io);
269 NVMFT_QP(io) = ctrlr->admin;
270 io->io_hdr.io_type = CTL_IO_TASK;
271 io->io_hdr.nexus.initid = ctrlr->cntlid;
272 io->io_hdr.nexus.targ_port = np->port.targ_port;
273 io->io_hdr.nexus.targ_lun = 0;
274 io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX: unused? */
275 io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
276 error = ctl_run(io);
277 if (error != CTL_RETVAL_COMPLETE) {
278 nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error);
279 #ifdef INVARIANTS
280 io->io_hdr.status = CTL_SUCCESS;
281 #endif
282 nvmft_done(io);
283 }
284 }
285
286 static void
nvmft_datamove_out_cb(void * arg,size_t xfered,int error)287 nvmft_datamove_out_cb(void *arg, size_t xfered, int error)
288 {
289 struct ctl_nvmeio *ctnio = arg;
290
291 if (error != 0) {
292 ctl_nvme_set_data_transfer_error(ctnio);
293 } else {
294 MPASS(xfered == ctnio->kern_data_len);
295 ctnio->kern_data_resid -= xfered;
296 }
297
298 if (ctnio->kern_sg_entries) {
299 free(ctnio->ext_data_ptr, M_NVMFT);
300 ctnio->ext_data_ptr = NULL;
301 } else
302 MPASS(ctnio->ext_data_ptr == NULL);
303 ctl_datamove_done((union ctl_io *)ctnio, false);
304 }
305
306 static void
nvmft_datamove_out(struct ctl_nvmeio * ctnio,struct nvmft_qpair * qp,struct nvmf_capsule * nc)307 nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
308 struct nvmf_capsule *nc)
309 {
310 struct memdesc mem;
311 int error;
312
313 MPASS(ctnio->ext_data_ptr == NULL);
314 if (ctnio->kern_sg_entries > 0) {
315 struct ctl_sg_entry *sgl;
316 struct bus_dma_segment *vlist;
317
318 vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist),
319 M_NVMFT, M_WAITOK);
320 ctnio->ext_data_ptr = (void *)vlist;
321 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
322 for (u_int i = 0; i < ctnio->kern_sg_entries; i++) {
323 vlist[i].ds_addr = (uintptr_t)sgl[i].addr;
324 vlist[i].ds_len = sgl[i].len;
325 }
326 mem = memdesc_vlist(vlist, ctnio->kern_sg_entries);
327 } else
328 mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len);
329
330 error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem,
331 ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio);
332 if (error == 0)
333 return;
334
335 nvmft_printf(nvmft_qpair_ctrlr(qp),
336 "Failed to request capsule data: %d\n", error);
337 ctl_nvme_set_data_transfer_error(ctnio);
338
339 if (ctnio->kern_sg_entries) {
340 free(ctnio->ext_data_ptr, M_NVMFT);
341 ctnio->ext_data_ptr = NULL;
342 } else
343 MPASS(ctnio->ext_data_ptr == NULL);
344 ctl_datamove_done((union ctl_io *)ctnio, true);
345 }
346
347 static struct mbuf *
nvmft_copy_data(struct ctl_nvmeio * ctnio)348 nvmft_copy_data(struct ctl_nvmeio *ctnio)
349 {
350 struct ctl_sg_entry *sgl;
351 struct mbuf *m0, *m;
352 uint32_t resid, off, todo;
353 int mlen;
354
355 MPASS(ctnio->kern_data_len != 0);
356
357 m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0);
358
359 if (ctnio->kern_sg_entries == 0) {
360 m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr);
361 return (m0);
362 }
363
364 resid = ctnio->kern_data_len;
365 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
366 off = 0;
367 m = m0;
368 mlen = M_TRAILINGSPACE(m);
369 for (;;) {
370 todo = MIN(mlen, sgl->len - off);
371 memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off,
372 todo);
373 m->m_len += todo;
374 resid -= todo;
375 if (resid == 0) {
376 MPASS(m->m_next == NULL);
377 break;
378 }
379
380 off += todo;
381 if (off == sgl->len) {
382 sgl++;
383 off = 0;
384 }
385 mlen -= todo;
386 if (mlen == 0) {
387 m = m->m_next;
388 mlen = M_TRAILINGSPACE(m);
389 }
390 }
391
392 return (m0);
393 }
394
395 static void
m_free_ref_data(struct mbuf * m)396 m_free_ref_data(struct mbuf *m)
397 {
398 ctl_ref kern_data_ref = m->m_ext.ext_arg1;
399
400 kern_data_ref(m->m_ext.ext_arg2, -1);
401 }
402
403 static struct mbuf *
m_get_ref_data(struct ctl_nvmeio * ctnio,void * buf,u_int size)404 m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size)
405 {
406 struct mbuf *m;
407
408 m = m_get(M_WAITOK, MT_DATA);
409 m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref,
410 ctnio->kern_data_arg, M_RDONLY, EXT_CTL);
411 m->m_len = size;
412 ctnio->kern_data_ref(ctnio->kern_data_arg, 1);
413 return (m);
414 }
415
416 static struct mbuf *
nvmft_ref_data(struct ctl_nvmeio * ctnio)417 nvmft_ref_data(struct ctl_nvmeio *ctnio)
418 {
419 struct ctl_sg_entry *sgl;
420 struct mbuf *m0, *m;
421
422 MPASS(ctnio->kern_data_len != 0);
423
424 if (ctnio->kern_sg_entries == 0)
425 return (m_get_ref_data(ctnio, ctnio->kern_data_ptr,
426 ctnio->kern_data_len));
427
428 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
429 m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len);
430 m = m0;
431 for (u_int i = 1; i < ctnio->kern_sg_entries; i++) {
432 m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len);
433 m = m->m_next;
434 }
435 return (m0);
436 }
437
438 static void
nvmft_datamove_in(struct ctl_nvmeio * ctnio,struct nvmft_qpair * qp,struct nvmf_capsule * nc)439 nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
440 struct nvmf_capsule *nc)
441 {
442 struct mbuf *m;
443 u_int status;
444
445 if (ctnio->kern_data_ref != NULL)
446 m = nvmft_ref_data(ctnio);
447 else
448 m = nvmft_copy_data(ctnio);
449 status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m,
450 ctnio->kern_data_len);
451 switch (status) {
452 case NVMF_SUCCESS_SENT:
453 ctnio->success_sent = true;
454 nvmft_command_completed(qp, nc);
455 /* FALLTHROUGH */
456 case NVMF_MORE:
457 case NVME_SC_SUCCESS:
458 break;
459 default:
460 ctl_nvme_set_generic_error(ctnio, status);
461 break;
462 }
463 ctl_datamove_done((union ctl_io *)ctnio, true);
464 }
465
466 void
nvmft_handle_datamove(union ctl_io * io)467 nvmft_handle_datamove(union ctl_io *io)
468 {
469 struct nvmf_capsule *nc;
470 struct nvmft_qpair *qp;
471
472 /* Some CTL commands preemptively set a success status. */
473 MPASS(io->io_hdr.status == CTL_STATUS_NONE ||
474 io->io_hdr.status == CTL_SUCCESS);
475 MPASS(!io->nvmeio.success_sent);
476
477 nc = NVMFT_NC(io);
478 qp = NVMFT_QP(io);
479
480 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
481 nvmft_datamove_in(&io->nvmeio, qp, nc);
482 else
483 nvmft_datamove_out(&io->nvmeio, qp, nc);
484 }
485
486 void
nvmft_abort_datamove(union ctl_io * io)487 nvmft_abort_datamove(union ctl_io *io)
488 {
489 io->io_hdr.port_status = 1;
490 io->io_hdr.flags |= CTL_FLAG_ABORT;
491 ctl_datamove_done(io, true);
492 }
493
494 static void
nvmft_datamove(union ctl_io * io)495 nvmft_datamove(union ctl_io *io)
496 {
497 struct nvmft_qpair *qp;
498
499 qp = NVMFT_QP(io);
500 nvmft_qpair_datamove(qp, io);
501 }
502
503 void
nvmft_enqueue_task(struct task * task)504 nvmft_enqueue_task(struct task *task)
505 {
506 taskqueue_enqueue(nvmft_taskq, task);
507 }
508
509 void
nvmft_drain_task(struct task * task)510 nvmft_drain_task(struct task *task)
511 {
512 taskqueue_drain(nvmft_taskq, task);
513 }
514
515 static void
hip_add(uint64_t pair[2],uint64_t addend)516 hip_add(uint64_t pair[2], uint64_t addend)
517 {
518 uint64_t old, new;
519
520 old = le64toh(pair[0]);
521 new = old + addend;
522 pair[0] = htole64(new);
523 if (new < old)
524 pair[1] += htole64(1);
525 }
526
527 static void
nvmft_done(union ctl_io * io)528 nvmft_done(union ctl_io *io)
529 {
530 struct nvmft_controller *ctrlr;
531 const struct nvme_command *cmd;
532 struct nvmft_qpair *qp;
533 struct nvmf_capsule *nc;
534 size_t len;
535
536 KASSERT(io->io_hdr.status == CTL_SUCCESS ||
537 io->io_hdr.status == CTL_NVME_ERROR,
538 ("%s: bad status %u", __func__, io->io_hdr.status));
539
540 nc = NVMFT_NC(io);
541 qp = NVMFT_QP(io);
542 ctrlr = nvmft_qpair_ctrlr(qp);
543
544 if (nc == NULL) {
545 /* Completion of nvmft_terminate_commands. */
546 goto end;
547 }
548
549 cmd = nvmf_capsule_sqe(nc);
550
551 if (io->io_hdr.status == CTL_SUCCESS)
552 len = nvmf_capsule_data_len(nc) / 512;
553 else
554 len = 0;
555 switch (cmd->opc) {
556 case NVME_OPC_WRITE:
557 mtx_lock(&ctrlr->lock);
558 hip_add(ctrlr->hip.host_write_commands, 1);
559 len += ctrlr->partial_duw;
560 if (len > 1000)
561 hip_add(ctrlr->hip.data_units_written, len / 1000);
562 ctrlr->partial_duw = len % 1000;
563 mtx_unlock(&ctrlr->lock);
564 break;
565 case NVME_OPC_READ:
566 case NVME_OPC_COMPARE:
567 case NVME_OPC_VERIFY:
568 mtx_lock(&ctrlr->lock);
569 if (cmd->opc != NVME_OPC_VERIFY)
570 hip_add(ctrlr->hip.host_read_commands, 1);
571 len += ctrlr->partial_dur;
572 if (len > 1000)
573 hip_add(ctrlr->hip.data_units_read, len / 1000);
574 ctrlr->partial_dur = len % 1000;
575 mtx_unlock(&ctrlr->lock);
576 break;
577 }
578
579 if (io->nvmeio.success_sent) {
580 MPASS(io->io_hdr.status == CTL_SUCCESS);
581 } else {
582 io->nvmeio.cpl.cid = cmd->cid;
583 nvmft_send_response(qp, &io->nvmeio.cpl);
584 }
585 nvmf_free_capsule(nc);
586 end:
587 ctl_free_io(io);
588 mtx_lock(&ctrlr->lock);
589 ctrlr->pending_commands--;
590 if (ctrlr->pending_commands == 0)
591 ctrlr->busy_total += sbinuptime() - ctrlr->start_busy;
592 mtx_unlock(&ctrlr->lock);
593 }
594
595 static int
nvmft_init(void)596 nvmft_init(void)
597 {
598 int error;
599
600 nvmft_taskq = taskqueue_create("nvmft", M_WAITOK,
601 taskqueue_thread_enqueue, &nvmft_taskq);
602 error = taskqueue_start_threads_in_proc(&nvmft_taskq, mp_ncpus, PWAIT,
603 control_softc->ctl_proc, "nvmft");
604 if (error != 0) {
605 taskqueue_free(nvmft_taskq);
606 return (error);
607 }
608
609 TAILQ_INIT(&nvmft_ports);
610 sx_init(&nvmft_ports_lock, "nvmft ports");
611 return (0);
612 }
613
614 void
nvmft_port_free(struct nvmft_port * np)615 nvmft_port_free(struct nvmft_port *np)
616 {
617 KASSERT(TAILQ_EMPTY(&np->controllers),
618 ("%s(%p): active controllers", __func__, np));
619
620 if (np->port.targ_port != -1) {
621 if (ctl_port_deregister(&np->port) != 0)
622 printf("%s: ctl_port_deregister() failed\n", __func__);
623 }
624
625 free(np->active_ns, M_NVMFT);
626 clean_unrhdr(np->ids);
627 delete_unrhdr(np->ids);
628 sx_destroy(&np->lock);
629 free(np, M_NVMFT);
630 }
631
632 static struct nvmft_port *
nvmft_port_find(const char * subnqn)633 nvmft_port_find(const char *subnqn)
634 {
635 struct nvmft_port *np;
636
637 KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__));
638
639 sx_assert(&nvmft_ports_lock, SA_LOCKED);
640 TAILQ_FOREACH(np, &nvmft_ports, link) {
641 if (strcmp(np->cdata.subnqn, subnqn) == 0)
642 break;
643 }
644 return (np);
645 }
646
647 static struct nvmft_port *
nvmft_port_find_by_id(int port_id)648 nvmft_port_find_by_id(int port_id)
649 {
650 struct nvmft_port *np;
651
652 sx_assert(&nvmft_ports_lock, SA_LOCKED);
653 TAILQ_FOREACH(np, &nvmft_ports, link) {
654 if (np->port.targ_port == port_id)
655 break;
656 }
657 return (np);
658 }
659
660 /*
661 * Helper function to fetch a number stored as a string in an nv_list.
662 * Returns false if the string was not a valid number.
663 */
664 static bool
dnvlist_get_strnum(nvlist_t * nvl,const char * name,u_long default_value,u_long * value)665 dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value,
666 u_long *value)
667 {
668 const char *str;
669 char *cp;
670
671 str = dnvlist_get_string(nvl, name, NULL);
672 if (str == NULL) {
673 *value = default_value;
674 return (true);
675 }
676 if (*str == '\0')
677 return (false);
678 *value = strtoul(str, &cp, 0);
679 if (*cp != '\0')
680 return (false);
681 return (true);
682 }
683
684 /*
685 * NVMeoF ports support the following parameters:
686 *
687 * Mandatory:
688 *
689 * subnqn: subsystem NVMe Qualified Name
690 * portid: integer port ID from Discovery Log Page entry
691 *
692 * Optional:
693 * serial: Serial Number string
694 * max_io_qsize: Maximum number of I/O queue entries
695 * enable_timeout: Timeout for controller enable in milliseconds
696 * ioccsz: Maximum command capsule size
697 * iorcsz: Maximum response capsule size
698 * nn: Number of namespaces
699 */
700 static void
nvmft_port_create(struct ctl_req * req)701 nvmft_port_create(struct ctl_req *req)
702 {
703 struct nvmft_port *np;
704 struct ctl_port *port;
705 const char *serial, *subnqn;
706 char serial_buf[NVME_SERIAL_NUMBER_LENGTH];
707 u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid;
708 int error;
709
710 /* Required parameters. */
711 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
712 if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) {
713 req->status = CTL_LUN_ERROR;
714 snprintf(req->error_str, sizeof(req->error_str),
715 "Missing required argument");
716 return;
717 }
718 if (!nvmf_nqn_valid(subnqn)) {
719 req->status = CTL_LUN_ERROR;
720 snprintf(req->error_str, sizeof(req->error_str),
721 "Invalid SubNQN");
722 return;
723 }
724 if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) ||
725 portid > UINT16_MAX) {
726 req->status = CTL_LUN_ERROR;
727 snprintf(req->error_str, sizeof(req->error_str),
728 "Invalid port ID");
729 return;
730 }
731
732 /* Optional parameters. */
733 if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize",
734 NVMF_MAX_IO_ENTRIES, &max_io_qsize) ||
735 max_io_qsize < NVME_MIN_IO_ENTRIES ||
736 max_io_qsize > NVME_MAX_IO_ENTRIES) {
737 req->status = CTL_LUN_ERROR;
738 snprintf(req->error_str, sizeof(req->error_str),
739 "Invalid maximum I/O queue size");
740 return;
741 }
742
743 if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout",
744 NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) ||
745 (enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) {
746 req->status = CTL_LUN_ERROR;
747 snprintf(req->error_str, sizeof(req->error_str),
748 "Invalid enable timeout");
749 return;
750 }
751
752 if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ,
753 &ioccsz) || ioccsz < sizeof(struct nvme_command) ||
754 (ioccsz % 16) != 0) {
755 req->status = CTL_LUN_ERROR;
756 snprintf(req->error_str, sizeof(req->error_str),
757 "Invalid Command Capsule size");
758 return;
759 }
760
761 if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ,
762 &iorcsz) || iorcsz < sizeof(struct nvme_completion) ||
763 (iorcsz % 16) != 0) {
764 req->status = CTL_LUN_ERROR;
765 snprintf(req->error_str, sizeof(req->error_str),
766 "Invalid Response Capsule size");
767 return;
768 }
769
770 if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) ||
771 nn < 1 || nn > UINT32_MAX) {
772 req->status = CTL_LUN_ERROR;
773 snprintf(req->error_str, sizeof(req->error_str),
774 "Invalid number of namespaces");
775 return;
776 }
777
778 serial = dnvlist_get_string(req->args_nvl, "serial", NULL);
779 if (serial == NULL) {
780 getcredhostid(curthread->td_ucred, &hostid);
781 nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid);
782 serial = serial_buf;
783 }
784
785 sx_xlock(&nvmft_ports_lock);
786
787 np = nvmft_port_find(subnqn);
788 if (np != NULL) {
789 req->status = CTL_LUN_ERROR;
790 snprintf(req->error_str, sizeof(req->error_str),
791 "SubNQN \"%s\" already exists", subnqn);
792 sx_xunlock(&nvmft_ports_lock);
793 return;
794 }
795
796 np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO);
797 refcount_init(&np->refs, 1);
798 np->max_io_qsize = max_io_qsize;
799 np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500);
800 sx_init(&np->lock, "nvmft port");
801 np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1,
802 NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX);
803 TAILQ_INIT(&np->controllers);
804
805 /* The controller ID is set later for individual controllers. */
806 _nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype,
807 osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata);
808 np->cdata.aerl = NVMFT_NUM_AER - 1;
809 np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE);
810 np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) |
811 NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) |
812 NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) |
813 NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1));
814 np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1);
815
816 np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
817 memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr));
818
819 port = &np->port;
820
821 port->frontend = &nvmft_frontend;
822 port->port_type = CTL_PORT_NVMF;
823 port->num_requested_ctl_io = max_io_qsize;
824 port->port_name = "nvmf";
825 port->physical_port = portid;
826 port->virtual_port = 0;
827 port->port_online = nvmft_online;
828 port->port_offline = nvmft_offline;
829 port->onoff_arg = np;
830 port->lun_enable = nvmft_lun_enable;
831 port->lun_disable = nvmft_lun_disable;
832 port->targ_lun_arg = np;
833 port->fe_datamove = nvmft_datamove;
834 port->fe_done = nvmft_done;
835 port->targ_port = -1;
836 port->options = nvlist_clone(req->args_nvl);
837
838 error = ctl_port_register(port);
839 if (error != 0) {
840 sx_xunlock(&nvmft_ports_lock);
841 nvlist_destroy(port->options);
842 nvmft_port_rele(np);
843 req->status = CTL_LUN_ERROR;
844 snprintf(req->error_str, sizeof(req->error_str),
845 "Failed to register CTL port with error %d", error);
846 return;
847 }
848
849 TAILQ_INSERT_TAIL(&nvmft_ports, np, link);
850 sx_xunlock(&nvmft_ports_lock);
851
852 req->status = CTL_LUN_OK;
853 req->result_nvl = nvlist_create(0);
854 nvlist_add_number(req->result_nvl, "port_id", port->targ_port);
855 }
856
857 static void
nvmft_port_remove(struct ctl_req * req)858 nvmft_port_remove(struct ctl_req *req)
859 {
860 struct nvmft_port *np;
861 const char *subnqn;
862 u_long port_id;
863
864 /*
865 * ctladm port -r just provides the port_id, so permit looking
866 * up a port either by "subnqn" or "port_id".
867 */
868 port_id = ULONG_MAX;
869 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
870 if (subnqn == NULL) {
871 if (!nvlist_exists_string(req->args_nvl, "port_id")) {
872 req->status = CTL_LUN_ERROR;
873 snprintf(req->error_str, sizeof(req->error_str),
874 "Missing required argument");
875 return;
876 }
877 if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX,
878 &port_id)) {
879 req->status = CTL_LUN_ERROR;
880 snprintf(req->error_str, sizeof(req->error_str),
881 "Invalid CTL port ID");
882 return;
883 }
884 } else {
885 if (nvlist_exists_string(req->args_nvl, "port_id")) {
886 req->status = CTL_LUN_ERROR;
887 snprintf(req->error_str, sizeof(req->error_str),
888 "Ambiguous port removal request");
889 return;
890 }
891 }
892
893 sx_xlock(&nvmft_ports_lock);
894
895 if (subnqn != NULL) {
896 np = nvmft_port_find(subnqn);
897 if (np == NULL) {
898 req->status = CTL_LUN_ERROR;
899 snprintf(req->error_str, sizeof(req->error_str),
900 "SubNQN \"%s\" does not exist", subnqn);
901 sx_xunlock(&nvmft_ports_lock);
902 return;
903 }
904 } else {
905 np = nvmft_port_find_by_id(port_id);
906 if (np == NULL) {
907 req->status = CTL_LUN_ERROR;
908 snprintf(req->error_str, sizeof(req->error_str),
909 "CTL port %lu is not a NVMF port", port_id);
910 sx_xunlock(&nvmft_ports_lock);
911 return;
912 }
913 }
914
915 TAILQ_REMOVE(&nvmft_ports, np, link);
916 sx_xunlock(&nvmft_ports_lock);
917
918 ctl_port_offline(&np->port);
919 nvmft_port_rele(np);
920 req->status = CTL_LUN_OK;
921 }
922
923 static void
nvmft_handoff(struct ctl_nvmf * cn)924 nvmft_handoff(struct ctl_nvmf *cn)
925 {
926 struct nvmf_fabric_connect_cmd cmd;
927 struct nvmf_handoff_controller_qpair *handoff;
928 struct nvmf_fabric_connect_data *data;
929 struct nvmft_port *np;
930 int error;
931
932 np = NULL;
933 data = NULL;
934 handoff = &cn->data.handoff;
935 error = copyin(handoff->cmd, &cmd, sizeof(cmd));
936 if (error != 0) {
937 cn->status = CTL_NVMF_ERROR;
938 snprintf(cn->error_str, sizeof(cn->error_str),
939 "Failed to copyin CONNECT SQE");
940 return;
941 }
942
943 data = malloc(sizeof(*data), M_NVMFT, M_WAITOK);
944 error = copyin(handoff->data, data, sizeof(*data));
945 if (error != 0) {
946 cn->status = CTL_NVMF_ERROR;
947 snprintf(cn->error_str, sizeof(cn->error_str),
948 "Failed to copyin CONNECT data");
949 goto out;
950 }
951
952 if (!nvmf_nqn_valid(data->subnqn)) {
953 cn->status = CTL_NVMF_ERROR;
954 snprintf(cn->error_str, sizeof(cn->error_str),
955 "Invalid SubNQN");
956 goto out;
957 }
958
959 sx_slock(&nvmft_ports_lock);
960 np = nvmft_port_find(data->subnqn);
961 if (np == NULL) {
962 sx_sunlock(&nvmft_ports_lock);
963 cn->status = CTL_NVMF_ERROR;
964 snprintf(cn->error_str, sizeof(cn->error_str),
965 "Unknown SubNQN");
966 goto out;
967 }
968 if (!np->online) {
969 sx_sunlock(&nvmft_ports_lock);
970 cn->status = CTL_NVMF_ERROR;
971 snprintf(cn->error_str, sizeof(cn->error_str),
972 "CTL port offline");
973 np = NULL;
974 goto out;
975 }
976 nvmft_port_ref(np);
977 sx_sunlock(&nvmft_ports_lock);
978
979 if (handoff->params.admin) {
980 error = nvmft_handoff_admin_queue(np, handoff, &cmd, data);
981 if (error != 0) {
982 cn->status = CTL_NVMF_ERROR;
983 snprintf(cn->error_str, sizeof(cn->error_str),
984 "Failed to handoff admin queue: %d", error);
985 goto out;
986 }
987 } else {
988 error = nvmft_handoff_io_queue(np, handoff, &cmd, data);
989 if (error != 0) {
990 cn->status = CTL_NVMF_ERROR;
991 snprintf(cn->error_str, sizeof(cn->error_str),
992 "Failed to handoff admin queue: %d", error);
993 goto out;
994 }
995 }
996
997 cn->status = CTL_NVMF_OK;
998 out:
999 if (np != NULL)
1000 nvmft_port_rele(np);
1001 free(data, M_NVMFT);
1002 }
1003
1004 static void
nvmft_list(struct ctl_nvmf * cn)1005 nvmft_list(struct ctl_nvmf *cn)
1006 {
1007 struct ctl_nvmf_list_params *lp;
1008 struct nvmft_controller *ctrlr;
1009 struct nvmft_port *np;
1010 struct sbuf *sb;
1011 int error;
1012
1013 lp = &cn->data.list;
1014
1015 sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN |
1016 SBUF_INCLUDENUL);
1017 if (sb == NULL) {
1018 cn->status = CTL_NVMF_ERROR;
1019 snprintf(cn->error_str, sizeof(cn->error_str),
1020 "Failed to allocate NVMeoF session list");
1021 return;
1022 }
1023
1024 sbuf_printf(sb, "<ctlnvmflist>\n");
1025 sx_slock(&nvmft_ports_lock);
1026 TAILQ_FOREACH(np, &nvmft_ports, link) {
1027 sx_slock(&np->lock);
1028 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
1029 sbuf_printf(sb, "<connection id=\"%d\">"
1030 "<hostnqn>%s</hostnqn>"
1031 "<subnqn>%s</subnqn>"
1032 "<trtype>%u</trtype>"
1033 "</connection>\n",
1034 ctrlr->cntlid,
1035 ctrlr->hostnqn,
1036 np->cdata.subnqn,
1037 ctrlr->trtype);
1038 }
1039 sx_sunlock(&np->lock);
1040 }
1041 sx_sunlock(&nvmft_ports_lock);
1042 sbuf_printf(sb, "</ctlnvmflist>\n");
1043 if (sbuf_finish(sb) != 0) {
1044 sbuf_delete(sb);
1045 cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE;
1046 snprintf(cn->error_str, sizeof(cn->error_str),
1047 "Out of space, %d bytes is too small", lp->alloc_len);
1048 return;
1049 }
1050
1051 error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb));
1052 if (error != 0) {
1053 sbuf_delete(sb);
1054 cn->status = CTL_NVMF_ERROR;
1055 snprintf(cn->error_str, sizeof(cn->error_str),
1056 "Failed to copyout session list: %d", error);
1057 return;
1058 }
1059 lp->fill_len = sbuf_len(sb);
1060 cn->status = CTL_NVMF_OK;
1061 sbuf_delete(sb);
1062 }
1063
1064 static void
nvmft_terminate(struct ctl_nvmf * cn)1065 nvmft_terminate(struct ctl_nvmf *cn)
1066 {
1067 struct ctl_nvmf_terminate_params *tp;
1068 struct nvmft_controller *ctrlr;
1069 struct nvmft_port *np;
1070 bool found, match;
1071
1072 tp = &cn->data.terminate;
1073
1074 found = false;
1075 sx_slock(&nvmft_ports_lock);
1076 TAILQ_FOREACH(np, &nvmft_ports, link) {
1077 sx_slock(&np->lock);
1078 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
1079 if (tp->all != 0)
1080 match = true;
1081 else if (tp->cntlid != -1)
1082 match = tp->cntlid == ctrlr->cntlid;
1083 else if (tp->hostnqn[0] != '\0')
1084 match = strncmp(tp->hostnqn, ctrlr->hostnqn,
1085 sizeof(tp->hostnqn)) == 0;
1086 else
1087 match = false;
1088 if (!match)
1089 continue;
1090 nvmft_printf(ctrlr,
1091 "disconnecting due to administrative request\n");
1092 nvmft_controller_error(ctrlr, NULL, ECONNABORTED);
1093 found = true;
1094 }
1095 sx_sunlock(&np->lock);
1096 }
1097 sx_sunlock(&nvmft_ports_lock);
1098
1099 if (!found) {
1100 cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND;
1101 snprintf(cn->error_str, sizeof(cn->error_str),
1102 "No matching associations found");
1103 return;
1104 }
1105 cn->status = CTL_NVMF_OK;
1106 }
1107
1108 static int
nvmft_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int flag,struct thread * td)1109 nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag,
1110 struct thread *td)
1111 {
1112 struct ctl_nvmf *cn;
1113 struct ctl_req *req;
1114
1115 switch (cmd) {
1116 case CTL_PORT_REQ:
1117 req = (struct ctl_req *)data;
1118 switch (req->reqtype) {
1119 case CTL_REQ_CREATE:
1120 nvmft_port_create(req);
1121 break;
1122 case CTL_REQ_REMOVE:
1123 nvmft_port_remove(req);
1124 break;
1125 default:
1126 req->status = CTL_LUN_ERROR;
1127 snprintf(req->error_str, sizeof(req->error_str),
1128 "Unsupported request type %d", req->reqtype);
1129 break;
1130 }
1131 return (0);
1132 case CTL_NVMF:
1133 cn = (struct ctl_nvmf *)data;
1134 switch (cn->type) {
1135 case CTL_NVMF_HANDOFF:
1136 nvmft_handoff(cn);
1137 break;
1138 case CTL_NVMF_LIST:
1139 nvmft_list(cn);
1140 break;
1141 case CTL_NVMF_TERMINATE:
1142 nvmft_terminate(cn);
1143 break;
1144 default:
1145 cn->status = CTL_NVMF_ERROR;
1146 snprintf(cn->error_str, sizeof(cn->error_str),
1147 "Invalid NVMeoF request type %d", cn->type);
1148 break;
1149 }
1150 return (0);
1151 default:
1152 return (ENOTTY);
1153 }
1154 }
1155
1156 static int
nvmft_shutdown(void)1157 nvmft_shutdown(void)
1158 {
1159 /* TODO: Need to check for active controllers. */
1160 if (!TAILQ_EMPTY(&nvmft_ports))
1161 return (EBUSY);
1162
1163 taskqueue_free(nvmft_taskq);
1164 sx_destroy(&nvmft_ports_lock);
1165 return (0);
1166 }
1167
1168 CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend);
1169 MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1);
1170