1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5 * Written by: John Baldwin <jhb@FreeBSD.org>
6 */
7
8 #include <sys/param.h>
9 #include <sys/dnv.h>
10 #include <sys/jail.h>
11 #include <sys/kernel.h>
12 #include <sys/limits.h>
13 #include <sys/lock.h>
14 #include <sys/malloc.h>
15 #include <sys/mbuf.h>
16 #include <sys/memdesc.h>
17 #include <sys/module.h>
18 #include <sys/proc.h>
19 #include <sys/queue.h>
20 #include <sys/refcount.h>
21 #include <sys/sbuf.h>
22 #include <sys/smp.h>
23 #include <sys/sx.h>
24 #include <sys/taskqueue.h>
25
26 #include <machine/bus.h>
27 #include <machine/bus_dma.h>
28
29 #include <dev/nvmf/nvmf.h>
30 #include <dev/nvmf/nvmf_transport.h>
31 #include <dev/nvmf/controller/nvmft_subr.h>
32 #include <dev/nvmf/controller/nvmft_var.h>
33
34 #include <cam/ctl/ctl.h>
35 #include <cam/ctl/ctl_error.h>
36 #include <cam/ctl/ctl_ha.h>
37 #include <cam/ctl/ctl_io.h>
38 #include <cam/ctl/ctl_frontend.h>
39 #include <cam/ctl/ctl_private.h>
40
41 /*
42 * Store pointers to the capsule and qpair in the two pointer members
43 * of CTL_PRIV_FRONTEND.
44 */
45 #define NVMFT_NC(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0])
46 #define NVMFT_QP(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1])
47
48 static void nvmft_done(union ctl_io *io);
49 static int nvmft_init(void);
50 static int nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data,
51 int flag, struct thread *td);
52 static int nvmft_shutdown(void);
53
54 static struct taskqueue *nvmft_taskq;
55 static TAILQ_HEAD(, nvmft_port) nvmft_ports;
56 static struct sx nvmft_ports_lock;
57
58 MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller");
59
60 static struct ctl_frontend nvmft_frontend = {
61 .name = "nvmf",
62 .init = nvmft_init,
63 .ioctl = nvmft_ioctl,
64 .fe_dump = NULL,
65 .shutdown = nvmft_shutdown,
66 };
67
68 static void
nvmft_online(void * arg)69 nvmft_online(void *arg)
70 {
71 struct nvmft_port *np = arg;
72
73 mtx_lock(&np->lock);
74 np->online = true;
75 mtx_unlock(&np->lock);
76 }
77
78 static void
nvmft_offline(void * arg)79 nvmft_offline(void *arg)
80 {
81 struct nvmft_port *np = arg;
82 struct nvmft_controller *ctrlr;
83
84 mtx_lock(&np->lock);
85 np->online = false;
86
87 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
88 nvmft_printf(ctrlr,
89 "shutting down due to port going offline\n");
90 nvmft_controller_error(ctrlr, NULL, ENODEV);
91 }
92
93 while (!TAILQ_EMPTY(&np->controllers))
94 mtx_sleep(np, &np->lock, 0, "nvmfoff", 0);
95 mtx_unlock(&np->lock);
96 }
97
98 static int
nvmft_info(void * arg,struct sbuf * sb)99 nvmft_info(void *arg, struct sbuf *sb)
100 {
101 struct nvmft_port *np = arg;
102 struct nvmft_controller *ctrlr;
103 int retval;
104
105 mtx_lock(&np->lock);
106 retval = sbuf_printf(sb, "\t<port>%s,p,%u</port>\n", np->cdata.subnqn,
107 np->portid);
108 if (retval != 0)
109 goto out;
110
111 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
112 retval = sbuf_printf(sb, "\t<host id=\"%u\">%s</host>\n",
113 ctrlr->cntlid, ctrlr->hostnqn);
114 if (retval != 0)
115 break;
116 }
117 out:
118 mtx_unlock(&np->lock);
119 return (retval);
120 }
121
122 static int
nvmft_lun_enable(void * arg,int lun_id)123 nvmft_lun_enable(void *arg, int lun_id)
124 {
125 struct nvmft_port *np = arg;
126 struct nvmft_controller *ctrlr;
127 uint32_t *old_ns, *new_ns;
128 uint32_t nsid;
129 u_int i, new_count;
130
131 if (lun_id >= le32toh(np->cdata.nn)) {
132 printf("NVMFT: %s lun %d larger than maximum nsid %u\n",
133 np->cdata.subnqn, lun_id, le32toh(np->cdata.nn));
134 return (EOPNOTSUPP);
135 }
136 nsid = lun_id + 1;
137
138 mtx_lock(&np->lock);
139 for (;;) {
140 new_count = np->num_ns + 1;
141 mtx_unlock(&np->lock);
142 new_ns = mallocarray(new_count, sizeof(*new_ns), M_NVMFT,
143 M_WAITOK);
144 mtx_lock(&np->lock);
145 if (np->num_ns + 1 <= new_count)
146 break;
147 free(new_ns, M_NVMFT);
148 }
149 for (i = 0; i < np->num_ns; i++) {
150 if (np->active_ns[i] < nsid)
151 continue;
152 if (np->active_ns[i] == nsid) {
153 mtx_unlock(&np->lock);
154 free(new_ns, M_NVMFT);
155 printf("NVMFT: %s duplicate lun %d\n",
156 np->cdata.subnqn, lun_id);
157 return (EINVAL);
158 }
159 break;
160 }
161
162 /* Copy over IDs smaller than nsid. */
163 memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns));
164
165 /* Insert nsid. */
166 new_ns[i] = nsid;
167
168 /* Copy over IDs greater than nsid. */
169 memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) *
170 sizeof(*np->active_ns));
171
172 np->num_ns++;
173 old_ns = np->active_ns;
174 np->active_ns = new_ns;
175
176 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
177 nvmft_controller_lun_changed(ctrlr, lun_id);
178 }
179
180 mtx_unlock(&np->lock);
181 free(old_ns, M_NVMFT);
182
183 return (0);
184 }
185
186 static int
nvmft_lun_disable(void * arg,int lun_id)187 nvmft_lun_disable(void *arg, int lun_id)
188 {
189 struct nvmft_port *np = arg;
190 struct nvmft_controller *ctrlr;
191 uint32_t nsid;
192 u_int i;
193
194 if (lun_id >= le32toh(np->cdata.nn))
195 return (0);
196 nsid = lun_id + 1;
197
198 mtx_lock(&np->lock);
199 for (i = 0; i < np->num_ns; i++) {
200 if (np->active_ns[i] == nsid)
201 goto found;
202 }
203 mtx_unlock(&np->lock);
204 printf("NVMFT: %s request to disable nonexistent lun %d\n",
205 np->cdata.subnqn, lun_id);
206 return (EINVAL);
207
208 found:
209 /* Move down IDs greater than nsid. */
210 memmove(np->active_ns + i, np->active_ns + i + 1,
211 (np->num_ns - (i + 1)) * sizeof(*np->active_ns));
212 np->num_ns--;
213
214 /* NB: Don't bother freeing the old active_ns array. */
215
216 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
217 nvmft_controller_lun_changed(ctrlr, lun_id);
218 }
219
220 mtx_unlock(&np->lock);
221
222 return (0);
223 }
224
225 void
nvmft_populate_active_nslist(struct nvmft_port * np,uint32_t nsid,struct nvme_ns_list * nslist)226 nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
227 struct nvme_ns_list *nslist)
228 {
229 u_int i, count;
230
231 mtx_lock(&np->lock);
232 count = 0;
233 for (i = 0; i < np->num_ns; i++) {
234 if (np->active_ns[i] <= nsid)
235 continue;
236 nslist->ns[count] = htole32(np->active_ns[i]);
237 count++;
238 if (count == nitems(nslist->ns))
239 break;
240 }
241 mtx_unlock(&np->lock);
242 }
243
244 void
nvmft_dispatch_command(struct nvmft_qpair * qp,struct nvmf_capsule * nc,bool admin)245 nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
246 bool admin)
247 {
248 struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
249 const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
250 struct nvmft_port *np = ctrlr->np;
251 union ctl_io *io;
252 int error;
253
254 if (cmd->nsid == htole32(0)) {
255 nvmft_send_generic_error(qp, nc,
256 NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
257 nvmf_free_capsule(nc);
258 return;
259 }
260
261 mtx_lock(&ctrlr->lock);
262 if (ctrlr->pending_commands == 0)
263 ctrlr->start_busy = sbinuptime();
264 ctrlr->pending_commands++;
265 mtx_unlock(&ctrlr->lock);
266 io = ctl_alloc_io(np->port.ctl_pool_ref);
267 ctl_zero_io(io);
268 NVMFT_NC(io) = nc;
269 NVMFT_QP(io) = qp;
270 io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME;
271 io->io_hdr.nexus.initid = ctrlr->cntlid;
272 io->io_hdr.nexus.targ_port = np->port.targ_port;
273 io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1;
274 io->nvmeio.cmd = *cmd;
275 error = ctl_run(io);
276 if (error != 0) {
277 nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n",
278 nvmft_qpair_name(qp), error);
279 ctl_nvme_set_generic_error(&io->nvmeio,
280 NVME_SC_INTERNAL_DEVICE_ERROR);
281 nvmft_done(io);
282
283 nvmft_controller_error(ctrlr, qp, ENXIO);
284 }
285 }
286
287 void
nvmft_terminate_commands(struct nvmft_controller * ctrlr)288 nvmft_terminate_commands(struct nvmft_controller *ctrlr)
289 {
290 struct nvmft_port *np = ctrlr->np;
291 union ctl_io *io;
292 int error;
293
294 mtx_lock(&ctrlr->lock);
295 if (ctrlr->pending_commands == 0)
296 ctrlr->start_busy = sbinuptime();
297 ctrlr->pending_commands++;
298 mtx_unlock(&ctrlr->lock);
299 io = ctl_alloc_io(np->port.ctl_pool_ref);
300 ctl_zero_io(io);
301 NVMFT_QP(io) = ctrlr->admin;
302 io->io_hdr.io_type = CTL_IO_TASK;
303 io->io_hdr.nexus.initid = ctrlr->cntlid;
304 io->io_hdr.nexus.targ_port = np->port.targ_port;
305 io->io_hdr.nexus.targ_lun = 0;
306 io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX: unused? */
307 io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
308 error = ctl_run(io);
309 if (error != CTL_RETVAL_COMPLETE) {
310 nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error);
311 #ifdef INVARIANTS
312 io->io_hdr.status = CTL_SUCCESS;
313 #endif
314 nvmft_done(io);
315 }
316 }
317
318 static void
nvmft_datamove_out_cb(void * arg,size_t xfered,int error)319 nvmft_datamove_out_cb(void *arg, size_t xfered, int error)
320 {
321 struct ctl_nvmeio *ctnio = arg;
322
323 if (error != 0) {
324 ctl_nvme_set_data_transfer_error(ctnio);
325 } else {
326 MPASS(xfered == ctnio->kern_data_len);
327 ctnio->kern_data_resid -= xfered;
328 }
329
330 if (ctnio->kern_sg_entries) {
331 free(ctnio->ext_data_ptr, M_NVMFT);
332 ctnio->ext_data_ptr = NULL;
333 } else
334 MPASS(ctnio->ext_data_ptr == NULL);
335 ctl_datamove_done((union ctl_io *)ctnio, false);
336 }
337
338 static void
nvmft_datamove_out(struct ctl_nvmeio * ctnio,struct nvmft_qpair * qp,struct nvmf_capsule * nc)339 nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
340 struct nvmf_capsule *nc)
341 {
342 struct memdesc mem;
343 int error;
344
345 MPASS(ctnio->ext_data_ptr == NULL);
346 if (ctnio->kern_sg_entries > 0) {
347 struct ctl_sg_entry *sgl;
348 struct bus_dma_segment *vlist;
349
350 vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist),
351 M_NVMFT, M_WAITOK);
352 ctnio->ext_data_ptr = (void *)vlist;
353 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
354 for (u_int i = 0; i < ctnio->kern_sg_entries; i++) {
355 vlist[i].ds_addr = (uintptr_t)sgl[i].addr;
356 vlist[i].ds_len = sgl[i].len;
357 }
358 mem = memdesc_vlist(vlist, ctnio->kern_sg_entries);
359 } else
360 mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len);
361
362 error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem,
363 ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio);
364 if (error == 0)
365 return;
366
367 nvmft_printf(nvmft_qpair_ctrlr(qp),
368 "Failed to request capsule data: %d\n", error);
369 ctl_nvme_set_data_transfer_error(ctnio);
370
371 if (ctnio->kern_sg_entries) {
372 free(ctnio->ext_data_ptr, M_NVMFT);
373 ctnio->ext_data_ptr = NULL;
374 } else
375 MPASS(ctnio->ext_data_ptr == NULL);
376 ctl_datamove_done((union ctl_io *)ctnio, true);
377 }
378
379 static struct mbuf *
nvmft_copy_data(struct ctl_nvmeio * ctnio)380 nvmft_copy_data(struct ctl_nvmeio *ctnio)
381 {
382 struct ctl_sg_entry *sgl;
383 struct mbuf *m0, *m;
384 uint32_t resid, off, todo;
385 int mlen;
386
387 MPASS(ctnio->kern_data_len != 0);
388
389 m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0);
390
391 if (ctnio->kern_sg_entries == 0) {
392 m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr);
393 return (m0);
394 }
395
396 resid = ctnio->kern_data_len;
397 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
398 off = 0;
399 m = m0;
400 mlen = M_TRAILINGSPACE(m);
401 for (;;) {
402 todo = MIN(mlen, sgl->len - off);
403 memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off,
404 todo);
405 m->m_len += todo;
406 resid -= todo;
407 if (resid == 0) {
408 MPASS(m->m_next == NULL);
409 break;
410 }
411
412 off += todo;
413 if (off == sgl->len) {
414 sgl++;
415 off = 0;
416 }
417 mlen -= todo;
418 if (mlen == 0) {
419 m = m->m_next;
420 mlen = M_TRAILINGSPACE(m);
421 }
422 }
423
424 return (m0);
425 }
426
427 static void
m_free_ref_data(struct mbuf * m)428 m_free_ref_data(struct mbuf *m)
429 {
430 ctl_ref kern_data_ref = m->m_ext.ext_arg1;
431
432 kern_data_ref(m->m_ext.ext_arg2, -1);
433 }
434
435 static struct mbuf *
m_get_ref_data(struct ctl_nvmeio * ctnio,void * buf,u_int size)436 m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size)
437 {
438 struct mbuf *m;
439
440 m = m_get(M_WAITOK, MT_DATA);
441 m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref,
442 ctnio->kern_data_arg, M_RDONLY, EXT_CTL);
443 m->m_len = size;
444 ctnio->kern_data_ref(ctnio->kern_data_arg, 1);
445 return (m);
446 }
447
448 static struct mbuf *
nvmft_ref_data(struct ctl_nvmeio * ctnio)449 nvmft_ref_data(struct ctl_nvmeio *ctnio)
450 {
451 struct ctl_sg_entry *sgl;
452 struct mbuf *m0, *m;
453
454 MPASS(ctnio->kern_data_len != 0);
455
456 if (ctnio->kern_sg_entries == 0)
457 return (m_get_ref_data(ctnio, ctnio->kern_data_ptr,
458 ctnio->kern_data_len));
459
460 sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
461 m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len);
462 m = m0;
463 for (u_int i = 1; i < ctnio->kern_sg_entries; i++) {
464 m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len);
465 m = m->m_next;
466 }
467 return (m0);
468 }
469
470 static void
nvmft_datamove_in(struct ctl_nvmeio * ctnio,struct nvmft_qpair * qp,struct nvmf_capsule * nc)471 nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
472 struct nvmf_capsule *nc)
473 {
474 struct mbuf *m;
475 u_int status;
476
477 if (ctnio->kern_data_ref != NULL)
478 m = nvmft_ref_data(ctnio);
479 else
480 m = nvmft_copy_data(ctnio);
481 status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m,
482 ctnio->kern_data_len);
483 switch (status) {
484 case NVMF_SUCCESS_SENT:
485 ctnio->success_sent = true;
486 nvmft_command_completed(qp, nc);
487 /* FALLTHROUGH */
488 case NVMF_MORE:
489 case NVME_SC_SUCCESS:
490 break;
491 default:
492 ctl_nvme_set_generic_error(ctnio, status);
493 break;
494 }
495 ctl_datamove_done((union ctl_io *)ctnio, true);
496 }
497
498 void
nvmft_handle_datamove(union ctl_io * io)499 nvmft_handle_datamove(union ctl_io *io)
500 {
501 struct nvmf_capsule *nc;
502 struct nvmft_qpair *qp;
503
504 /* Some CTL commands preemptively set a success status. */
505 MPASS(io->io_hdr.status == CTL_STATUS_NONE ||
506 io->io_hdr.status == CTL_SUCCESS);
507 MPASS(!io->nvmeio.success_sent);
508
509 nc = NVMFT_NC(io);
510 qp = NVMFT_QP(io);
511
512 if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
513 nvmft_datamove_in(&io->nvmeio, qp, nc);
514 else
515 nvmft_datamove_out(&io->nvmeio, qp, nc);
516 }
517
518 void
nvmft_abort_datamove(union ctl_io * io)519 nvmft_abort_datamove(union ctl_io *io)
520 {
521 io->io_hdr.port_status = 1;
522 io->io_hdr.flags |= CTL_FLAG_ABORT;
523 ctl_datamove_done(io, true);
524 }
525
526 static void
nvmft_datamove(union ctl_io * io)527 nvmft_datamove(union ctl_io *io)
528 {
529 struct nvmft_qpair *qp;
530
531 qp = NVMFT_QP(io);
532 nvmft_qpair_datamove(qp, io);
533 }
534
535 void
nvmft_enqueue_task(struct task * task)536 nvmft_enqueue_task(struct task *task)
537 {
538 taskqueue_enqueue(nvmft_taskq, task);
539 }
540
541 void
nvmft_drain_task(struct task * task)542 nvmft_drain_task(struct task *task)
543 {
544 taskqueue_drain(nvmft_taskq, task);
545 }
546
547 static void
hip_add(uint64_t pair[2],uint64_t addend)548 hip_add(uint64_t pair[2], uint64_t addend)
549 {
550 uint64_t old, new;
551
552 old = le64toh(pair[0]);
553 new = old + addend;
554 pair[0] = htole64(new);
555 if (new < old)
556 pair[1] += htole64(1);
557 }
558
559 static void
nvmft_done(union ctl_io * io)560 nvmft_done(union ctl_io *io)
561 {
562 struct nvmft_controller *ctrlr;
563 const struct nvme_command *cmd;
564 struct nvmft_qpair *qp;
565 struct nvmf_capsule *nc;
566 size_t len;
567
568 KASSERT(io->io_hdr.status == CTL_SUCCESS ||
569 io->io_hdr.status == CTL_NVME_ERROR,
570 ("%s: bad status %u", __func__, io->io_hdr.status));
571
572 nc = NVMFT_NC(io);
573 qp = NVMFT_QP(io);
574 ctrlr = nvmft_qpair_ctrlr(qp);
575
576 if (nc == NULL) {
577 /* Completion of nvmft_terminate_commands. */
578 goto end;
579 }
580
581 cmd = nvmf_capsule_sqe(nc);
582
583 if (io->io_hdr.status == CTL_SUCCESS)
584 len = nvmf_capsule_data_len(nc) / 512;
585 else
586 len = 0;
587 switch (cmd->opc) {
588 case NVME_OPC_WRITE:
589 mtx_lock(&ctrlr->lock);
590 hip_add(ctrlr->hip.host_write_commands, 1);
591 len += ctrlr->partial_duw;
592 if (len > 1000)
593 hip_add(ctrlr->hip.data_units_written, len / 1000);
594 ctrlr->partial_duw = len % 1000;
595 mtx_unlock(&ctrlr->lock);
596 break;
597 case NVME_OPC_READ:
598 case NVME_OPC_COMPARE:
599 case NVME_OPC_VERIFY:
600 mtx_lock(&ctrlr->lock);
601 if (cmd->opc != NVME_OPC_VERIFY)
602 hip_add(ctrlr->hip.host_read_commands, 1);
603 len += ctrlr->partial_dur;
604 if (len > 1000)
605 hip_add(ctrlr->hip.data_units_read, len / 1000);
606 ctrlr->partial_dur = len % 1000;
607 mtx_unlock(&ctrlr->lock);
608 break;
609 }
610
611 if (io->nvmeio.success_sent) {
612 MPASS(io->io_hdr.status == CTL_SUCCESS);
613 } else {
614 io->nvmeio.cpl.cid = cmd->cid;
615 nvmft_send_response(qp, &io->nvmeio.cpl);
616 }
617 nvmf_free_capsule(nc);
618 end:
619 ctl_free_io(io);
620 mtx_lock(&ctrlr->lock);
621 ctrlr->pending_commands--;
622 if (ctrlr->pending_commands == 0)
623 ctrlr->busy_total += sbinuptime() - ctrlr->start_busy;
624 mtx_unlock(&ctrlr->lock);
625 }
626
627 static int
nvmft_init(void)628 nvmft_init(void)
629 {
630 int error;
631
632 nvmft_taskq = taskqueue_create("nvmft", M_WAITOK,
633 taskqueue_thread_enqueue, &nvmft_taskq);
634 error = taskqueue_start_threads_in_proc(&nvmft_taskq, mp_ncpus, PWAIT,
635 control_softc->ctl_proc, "nvmft");
636 if (error != 0) {
637 taskqueue_free(nvmft_taskq);
638 return (error);
639 }
640
641 TAILQ_INIT(&nvmft_ports);
642 sx_init(&nvmft_ports_lock, "nvmft ports");
643 return (0);
644 }
645
646 void
nvmft_port_free(struct nvmft_port * np)647 nvmft_port_free(struct nvmft_port *np)
648 {
649 KASSERT(TAILQ_EMPTY(&np->controllers),
650 ("%s(%p): active controllers", __func__, np));
651
652 if (np->port.targ_port != -1) {
653 if (ctl_port_deregister(&np->port) != 0)
654 printf("%s: ctl_port_deregister() failed\n", __func__);
655 }
656
657 free(np->active_ns, M_NVMFT);
658 clean_unrhdr(np->ids);
659 delete_unrhdr(np->ids);
660 mtx_destroy(&np->lock);
661 free(np, M_NVMFT);
662 }
663
664 static struct nvmft_port *
nvmft_port_find(const char * subnqn)665 nvmft_port_find(const char *subnqn)
666 {
667 struct nvmft_port *np;
668
669 KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__));
670
671 sx_assert(&nvmft_ports_lock, SA_LOCKED);
672 TAILQ_FOREACH(np, &nvmft_ports, link) {
673 if (strcmp(np->cdata.subnqn, subnqn) == 0)
674 break;
675 }
676 return (np);
677 }
678
679 static struct nvmft_port *
nvmft_port_find_by_id(int port_id)680 nvmft_port_find_by_id(int port_id)
681 {
682 struct nvmft_port *np;
683
684 sx_assert(&nvmft_ports_lock, SA_LOCKED);
685 TAILQ_FOREACH(np, &nvmft_ports, link) {
686 if (np->port.targ_port == port_id)
687 break;
688 }
689 return (np);
690 }
691
692 /*
693 * Helper function to fetch a number stored as a string in an nv_list.
694 * Returns false if the string was not a valid number.
695 */
696 static bool
dnvlist_get_strnum(nvlist_t * nvl,const char * name,u_long default_value,u_long * value)697 dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value,
698 u_long *value)
699 {
700 const char *str;
701 char *cp;
702
703 str = dnvlist_get_string(nvl, name, NULL);
704 if (str == NULL) {
705 *value = default_value;
706 return (true);
707 }
708 if (*str == '\0')
709 return (false);
710 *value = strtoul(str, &cp, 0);
711 if (*cp != '\0')
712 return (false);
713 return (true);
714 }
715
716 /*
717 * NVMeoF ports support the following parameters:
718 *
719 * Mandatory:
720 *
721 * subnqn: subsystem NVMe Qualified Name
722 * portid: integer port ID from Discovery Log Page entry
723 *
724 * Optional:
725 * serial: Serial Number string
726 * max_io_qsize: Maximum number of I/O queue entries
727 * enable_timeout: Timeout for controller enable in milliseconds
728 * ioccsz: Maximum command capsule size
729 * iorcsz: Maximum response capsule size
730 * nn: Number of namespaces
731 */
732 static void
nvmft_port_create(struct ctl_req * req)733 nvmft_port_create(struct ctl_req *req)
734 {
735 struct nvmft_port *np;
736 struct ctl_port *port;
737 const char *serial, *subnqn;
738 char serial_buf[NVME_SERIAL_NUMBER_LENGTH];
739 u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid;
740 int error;
741
742 /* Required parameters. */
743 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
744 if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) {
745 req->status = CTL_LUN_ERROR;
746 snprintf(req->error_str, sizeof(req->error_str),
747 "Missing required argument");
748 return;
749 }
750 if (!nvmf_nqn_valid(subnqn)) {
751 req->status = CTL_LUN_ERROR;
752 snprintf(req->error_str, sizeof(req->error_str),
753 "Invalid SubNQN");
754 return;
755 }
756 if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) ||
757 portid > UINT16_MAX) {
758 req->status = CTL_LUN_ERROR;
759 snprintf(req->error_str, sizeof(req->error_str),
760 "Invalid port ID");
761 return;
762 }
763
764 /* Optional parameters. */
765 if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize",
766 NVMF_MAX_IO_ENTRIES, &max_io_qsize) ||
767 max_io_qsize < NVME_MIN_IO_ENTRIES ||
768 max_io_qsize > NVME_MAX_IO_ENTRIES) {
769 req->status = CTL_LUN_ERROR;
770 snprintf(req->error_str, sizeof(req->error_str),
771 "Invalid maximum I/O queue size");
772 return;
773 }
774
775 if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout",
776 NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) ||
777 (enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) {
778 req->status = CTL_LUN_ERROR;
779 snprintf(req->error_str, sizeof(req->error_str),
780 "Invalid enable timeout");
781 return;
782 }
783
784 if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ,
785 &ioccsz) || ioccsz < sizeof(struct nvme_command) ||
786 (ioccsz % 16) != 0) {
787 req->status = CTL_LUN_ERROR;
788 snprintf(req->error_str, sizeof(req->error_str),
789 "Invalid Command Capsule size");
790 return;
791 }
792
793 if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ,
794 &iorcsz) || iorcsz < sizeof(struct nvme_completion) ||
795 (iorcsz % 16) != 0) {
796 req->status = CTL_LUN_ERROR;
797 snprintf(req->error_str, sizeof(req->error_str),
798 "Invalid Response Capsule size");
799 return;
800 }
801
802 if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) ||
803 nn < 1 || nn > UINT32_MAX) {
804 req->status = CTL_LUN_ERROR;
805 snprintf(req->error_str, sizeof(req->error_str),
806 "Invalid number of namespaces");
807 return;
808 }
809
810 serial = dnvlist_get_string(req->args_nvl, "serial", NULL);
811 if (serial == NULL) {
812 getcredhostid(curthread->td_ucred, &hostid);
813 nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid);
814 serial = serial_buf;
815 }
816
817 sx_xlock(&nvmft_ports_lock);
818
819 np = nvmft_port_find(subnqn);
820 if (np != NULL) {
821 req->status = CTL_LUN_ERROR;
822 snprintf(req->error_str, sizeof(req->error_str),
823 "SubNQN \"%s\" already exists", subnqn);
824 sx_xunlock(&nvmft_ports_lock);
825 return;
826 }
827
828 np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO);
829 refcount_init(&np->refs, 1);
830 np->portid = portid;
831 np->max_io_qsize = max_io_qsize;
832 np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500);
833 mtx_init(&np->lock, "nvmft port", NULL, MTX_DEF);
834 np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1,
835 NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX);
836 TAILQ_INIT(&np->controllers);
837
838 /* The controller ID is set later for individual controllers. */
839 _nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype,
840 osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata);
841 np->cdata.aerl = NVMFT_NUM_AER - 1;
842 np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE);
843 np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) |
844 NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) |
845 NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) |
846 NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1));
847 np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1);
848
849 np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
850 memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr));
851
852 port = &np->port;
853
854 port->frontend = &nvmft_frontend;
855 port->port_type = CTL_PORT_NVMF;
856 port->num_requested_ctl_io = max_io_qsize;
857 port->port_name = "nvmf";
858 port->physical_port = portid;
859 port->virtual_port = 0;
860 port->port_online = nvmft_online;
861 port->port_offline = nvmft_offline;
862 port->port_info = nvmft_info;
863 port->onoff_arg = np;
864 port->lun_enable = nvmft_lun_enable;
865 port->lun_disable = nvmft_lun_disable;
866 port->targ_lun_arg = np;
867 port->fe_datamove = nvmft_datamove;
868 port->fe_done = nvmft_done;
869 port->targ_port = -1;
870 port->options = nvlist_clone(req->args_nvl);
871
872 error = ctl_port_register(port);
873 if (error != 0) {
874 sx_xunlock(&nvmft_ports_lock);
875 nvlist_destroy(port->options);
876 nvmft_port_rele(np);
877 req->status = CTL_LUN_ERROR;
878 snprintf(req->error_str, sizeof(req->error_str),
879 "Failed to register CTL port with error %d", error);
880 return;
881 }
882
883 TAILQ_INSERT_TAIL(&nvmft_ports, np, link);
884 sx_xunlock(&nvmft_ports_lock);
885
886 req->status = CTL_LUN_OK;
887 req->result_nvl = nvlist_create(0);
888 nvlist_add_number(req->result_nvl, "port_id", port->targ_port);
889 }
890
891 static void
nvmft_port_remove(struct ctl_req * req)892 nvmft_port_remove(struct ctl_req *req)
893 {
894 struct nvmft_port *np;
895 const char *subnqn;
896 u_long port_id;
897
898 /*
899 * ctladm port -r just provides the port_id, so permit looking
900 * up a port either by "subnqn" or "port_id".
901 */
902 port_id = ULONG_MAX;
903 subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
904 if (subnqn == NULL) {
905 if (!nvlist_exists_string(req->args_nvl, "port_id")) {
906 req->status = CTL_LUN_ERROR;
907 snprintf(req->error_str, sizeof(req->error_str),
908 "Missing required argument");
909 return;
910 }
911 if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX,
912 &port_id)) {
913 req->status = CTL_LUN_ERROR;
914 snprintf(req->error_str, sizeof(req->error_str),
915 "Invalid CTL port ID");
916 return;
917 }
918 } else {
919 if (nvlist_exists_string(req->args_nvl, "port_id")) {
920 req->status = CTL_LUN_ERROR;
921 snprintf(req->error_str, sizeof(req->error_str),
922 "Ambiguous port removal request");
923 return;
924 }
925 }
926
927 sx_xlock(&nvmft_ports_lock);
928
929 if (subnqn != NULL) {
930 np = nvmft_port_find(subnqn);
931 if (np == NULL) {
932 req->status = CTL_LUN_ERROR;
933 snprintf(req->error_str, sizeof(req->error_str),
934 "SubNQN \"%s\" does not exist", subnqn);
935 sx_xunlock(&nvmft_ports_lock);
936 return;
937 }
938 } else {
939 np = nvmft_port_find_by_id(port_id);
940 if (np == NULL) {
941 req->status = CTL_LUN_ERROR;
942 snprintf(req->error_str, sizeof(req->error_str),
943 "CTL port %lu is not a NVMF port", port_id);
944 sx_xunlock(&nvmft_ports_lock);
945 return;
946 }
947 }
948
949 TAILQ_REMOVE(&nvmft_ports, np, link);
950 sx_xunlock(&nvmft_ports_lock);
951
952 mtx_lock(&np->lock);
953 if (np->online) {
954 mtx_unlock(&np->lock);
955 ctl_port_offline(&np->port);
956 } else
957 mtx_unlock(&np->lock);
958
959 nvmft_port_rele(np);
960 req->status = CTL_LUN_OK;
961 }
962
963 static void
nvmft_handoff(struct ctl_nvmf * cn)964 nvmft_handoff(struct ctl_nvmf *cn)
965 {
966 const struct nvmf_fabric_connect_cmd *cmd;
967 const struct nvmf_fabric_connect_data *data;
968 const nvlist_t *params;
969 struct nvmft_port *np;
970 nvlist_t *nvl;
971 size_t len;
972 enum nvmf_trtype trtype;
973 int error;
974
975 np = NULL;
976 error = nvmf_unpack_ioc_nvlist(&cn->data.handoff, &nvl);
977 if (error != 0) {
978 cn->status = CTL_NVMF_ERROR;
979 snprintf(cn->error_str, sizeof(cn->error_str),
980 "Failed to copyin and unpack handoff arguments");
981 return;
982 }
983
984 if (!nvlist_exists_number(nvl, "trtype") ||
985 !nvlist_exists_nvlist(nvl, "params") ||
986 !nvlist_exists_binary(nvl, "cmd") ||
987 !nvlist_exists_binary(nvl, "data")) {
988 cn->status = CTL_NVMF_ERROR;
989 snprintf(cn->error_str, sizeof(cn->error_str),
990 "Handoff arguments missing required value");
991 goto out;
992 }
993
994 params = nvlist_get_nvlist(nvl, "params");
995 if (!nvmf_validate_qpair_nvlist(params, true)) {
996 cn->status = CTL_NVMF_ERROR;
997 snprintf(cn->error_str, sizeof(cn->error_str),
998 "Invalid queue pair parameters");
999 goto out;
1000 }
1001
1002 cmd = nvlist_get_binary(nvl, "cmd", &len);
1003 if (len != sizeof(*cmd)) {
1004 cn->status = CTL_NVMF_ERROR;
1005 snprintf(cn->error_str, sizeof(cn->error_str),
1006 "Wrong size for CONNECT SQE");
1007 goto out;
1008 }
1009
1010 data = nvlist_get_binary(nvl, "data", &len);
1011 if (len != sizeof(*data)) {
1012 cn->status = CTL_NVMF_ERROR;
1013 snprintf(cn->error_str, sizeof(cn->error_str),
1014 "Wrong size for CONNECT data");
1015 goto out;
1016 }
1017
1018 if (!nvmf_nqn_valid(data->subnqn)) {
1019 cn->status = CTL_NVMF_ERROR;
1020 snprintf(cn->error_str, sizeof(cn->error_str),
1021 "Invalid SubNQN");
1022 goto out;
1023 }
1024
1025 sx_slock(&nvmft_ports_lock);
1026 np = nvmft_port_find(data->subnqn);
1027 if (np == NULL) {
1028 sx_sunlock(&nvmft_ports_lock);
1029 cn->status = CTL_NVMF_ERROR;
1030 snprintf(cn->error_str, sizeof(cn->error_str),
1031 "Unknown SubNQN");
1032 goto out;
1033 }
1034 if (!np->online) {
1035 sx_sunlock(&nvmft_ports_lock);
1036 cn->status = CTL_NVMF_ERROR;
1037 snprintf(cn->error_str, sizeof(cn->error_str),
1038 "CTL port offline");
1039 np = NULL;
1040 goto out;
1041 }
1042 nvmft_port_ref(np);
1043 sx_sunlock(&nvmft_ports_lock);
1044
1045 trtype = nvlist_get_number(nvl, "trtype");
1046 if (nvlist_get_bool(params, "admin")) {
1047 error = nvmft_handoff_admin_queue(np, trtype, params, cmd,
1048 data);
1049 if (error != 0) {
1050 cn->status = CTL_NVMF_ERROR;
1051 snprintf(cn->error_str, sizeof(cn->error_str),
1052 "Failed to handoff admin queue: %d", error);
1053 goto out;
1054 }
1055 } else {
1056 error = nvmft_handoff_io_queue(np, trtype, params, cmd, data);
1057 if (error != 0) {
1058 cn->status = CTL_NVMF_ERROR;
1059 snprintf(cn->error_str, sizeof(cn->error_str),
1060 "Failed to handoff I/O queue: %d", error);
1061 goto out;
1062 }
1063 }
1064
1065 cn->status = CTL_NVMF_OK;
1066 out:
1067 if (np != NULL)
1068 nvmft_port_rele(np);
1069 nvlist_destroy(nvl);
1070 }
1071
1072 static void
nvmft_list(struct ctl_nvmf * cn)1073 nvmft_list(struct ctl_nvmf *cn)
1074 {
1075 struct ctl_nvmf_list_params *lp;
1076 struct nvmft_controller *ctrlr;
1077 struct nvmft_port *np;
1078 struct sbuf *sb;
1079 int error;
1080
1081 lp = &cn->data.list;
1082
1083 sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN |
1084 SBUF_INCLUDENUL);
1085 if (sb == NULL) {
1086 cn->status = CTL_NVMF_ERROR;
1087 snprintf(cn->error_str, sizeof(cn->error_str),
1088 "Failed to allocate NVMeoF session list");
1089 return;
1090 }
1091
1092 sbuf_printf(sb, "<ctlnvmflist>\n");
1093 sx_slock(&nvmft_ports_lock);
1094 TAILQ_FOREACH(np, &nvmft_ports, link) {
1095 mtx_lock(&np->lock);
1096 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
1097 sbuf_printf(sb, "<connection id=\"%d\">"
1098 "<hostnqn>%s</hostnqn>"
1099 "<subnqn>%s</subnqn>"
1100 "<trtype>%u</trtype>"
1101 "</connection>\n",
1102 ctrlr->cntlid,
1103 ctrlr->hostnqn,
1104 np->cdata.subnqn,
1105 ctrlr->trtype);
1106 }
1107 mtx_unlock(&np->lock);
1108 }
1109 sx_sunlock(&nvmft_ports_lock);
1110 sbuf_printf(sb, "</ctlnvmflist>\n");
1111 if (sbuf_finish(sb) != 0) {
1112 sbuf_delete(sb);
1113 cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE;
1114 snprintf(cn->error_str, sizeof(cn->error_str),
1115 "Out of space, %d bytes is too small", lp->alloc_len);
1116 return;
1117 }
1118
1119 error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb));
1120 if (error != 0) {
1121 sbuf_delete(sb);
1122 cn->status = CTL_NVMF_ERROR;
1123 snprintf(cn->error_str, sizeof(cn->error_str),
1124 "Failed to copyout session list: %d", error);
1125 return;
1126 }
1127 lp->fill_len = sbuf_len(sb);
1128 cn->status = CTL_NVMF_OK;
1129 sbuf_delete(sb);
1130 }
1131
1132 static void
nvmft_terminate(struct ctl_nvmf * cn)1133 nvmft_terminate(struct ctl_nvmf *cn)
1134 {
1135 struct ctl_nvmf_terminate_params *tp;
1136 struct nvmft_controller *ctrlr;
1137 struct nvmft_port *np;
1138 bool found, match;
1139
1140 tp = &cn->data.terminate;
1141
1142 found = false;
1143 sx_slock(&nvmft_ports_lock);
1144 TAILQ_FOREACH(np, &nvmft_ports, link) {
1145 mtx_lock(&np->lock);
1146 TAILQ_FOREACH(ctrlr, &np->controllers, link) {
1147 if (tp->all != 0)
1148 match = true;
1149 else if (tp->cntlid != -1)
1150 match = tp->cntlid == ctrlr->cntlid;
1151 else if (tp->hostnqn[0] != '\0')
1152 match = strncmp(tp->hostnqn, ctrlr->hostnqn,
1153 sizeof(tp->hostnqn)) == 0;
1154 else
1155 match = false;
1156 if (!match)
1157 continue;
1158 nvmft_printf(ctrlr,
1159 "disconnecting due to administrative request\n");
1160 nvmft_controller_error(ctrlr, NULL, ECONNABORTED);
1161 found = true;
1162 }
1163 mtx_unlock(&np->lock);
1164 }
1165 sx_sunlock(&nvmft_ports_lock);
1166
1167 if (!found) {
1168 cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND;
1169 snprintf(cn->error_str, sizeof(cn->error_str),
1170 "No matching associations found");
1171 return;
1172 }
1173 cn->status = CTL_NVMF_OK;
1174 }
1175
1176 static int
nvmft_ioctl(struct cdev * cdev,u_long cmd,caddr_t data,int flag,struct thread * td)1177 nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag,
1178 struct thread *td)
1179 {
1180 struct ctl_nvmf *cn;
1181 struct ctl_req *req;
1182
1183 switch (cmd) {
1184 case CTL_PORT_REQ:
1185 req = (struct ctl_req *)data;
1186 switch (req->reqtype) {
1187 case CTL_REQ_CREATE:
1188 nvmft_port_create(req);
1189 break;
1190 case CTL_REQ_REMOVE:
1191 nvmft_port_remove(req);
1192 break;
1193 default:
1194 req->status = CTL_LUN_ERROR;
1195 snprintf(req->error_str, sizeof(req->error_str),
1196 "Unsupported request type %d", req->reqtype);
1197 break;
1198 }
1199 return (0);
1200 case CTL_NVMF:
1201 cn = (struct ctl_nvmf *)data;
1202 switch (cn->type) {
1203 case CTL_NVMF_HANDOFF:
1204 nvmft_handoff(cn);
1205 break;
1206 case CTL_NVMF_LIST:
1207 nvmft_list(cn);
1208 break;
1209 case CTL_NVMF_TERMINATE:
1210 nvmft_terminate(cn);
1211 break;
1212 default:
1213 cn->status = CTL_NVMF_ERROR;
1214 snprintf(cn->error_str, sizeof(cn->error_str),
1215 "Invalid NVMeoF request type %d", cn->type);
1216 break;
1217 }
1218 return (0);
1219 default:
1220 return (ENOTTY);
1221 }
1222 }
1223
1224 static int
nvmft_shutdown(void)1225 nvmft_shutdown(void)
1226 {
1227 /* TODO: Need to check for active controllers. */
1228 if (!TAILQ_EMPTY(&nvmft_ports))
1229 return (EBUSY);
1230
1231 taskqueue_free(nvmft_taskq);
1232 sx_destroy(&nvmft_ports_lock);
1233 return (0);
1234 }
1235
1236 CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend);
1237 MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1);
1238