xref: /freebsd/sys/dev/nvmf/controller/ctl_frontend_nvmf.c (revision c7a33fe37d7688cb21c743f68256680e003210ad)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  */
7 
8 #include <sys/param.h>
9 #include <sys/dnv.h>
10 #include <sys/jail.h>
11 #include <sys/kernel.h>
12 #include <sys/limits.h>
13 #include <sys/lock.h>
14 #include <sys/malloc.h>
15 #include <sys/mbuf.h>
16 #include <sys/memdesc.h>
17 #include <sys/module.h>
18 #include <sys/proc.h>
19 #include <sys/queue.h>
20 #include <sys/refcount.h>
21 #include <sys/sbuf.h>
22 #include <sys/smp.h>
23 #include <sys/sx.h>
24 #include <sys/taskqueue.h>
25 
26 #include <machine/bus.h>
27 #include <machine/bus_dma.h>
28 
29 #include <dev/nvmf/nvmf.h>
30 #include <dev/nvmf/nvmf_transport.h>
31 #include <dev/nvmf/controller/nvmft_subr.h>
32 #include <dev/nvmf/controller/nvmft_var.h>
33 
34 #include <cam/ctl/ctl.h>
35 #include <cam/ctl/ctl_error.h>
36 #include <cam/ctl/ctl_ha.h>
37 #include <cam/ctl/ctl_io.h>
38 #include <cam/ctl/ctl_frontend.h>
39 #include <cam/ctl/ctl_private.h>
40 
41 /*
42  * Store pointers to the capsule and qpair in the two pointer members
43  * of CTL_PRIV_FRONTEND.
44  */
45 #define	NVMFT_NC(io)	((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0])
46 #define	NVMFT_QP(io)	((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1])
47 
48 static void	nvmft_done(union ctl_io *io);
49 static int	nvmft_init(void);
50 static int	nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data,
51     int flag, struct thread *td);
52 static int	nvmft_shutdown(void);
53 
54 extern struct ctl_softc *control_softc;
55 
56 static struct taskqueue *nvmft_taskq;
57 static TAILQ_HEAD(, nvmft_port) nvmft_ports;
58 static struct sx nvmft_ports_lock;
59 
60 MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller");
61 
62 static struct ctl_frontend nvmft_frontend = {
63 	.name = "nvmf",
64 	.init = nvmft_init,
65 	.ioctl = nvmft_ioctl,
66 	.fe_dump = NULL,
67 	.shutdown = nvmft_shutdown,
68 };
69 
70 static void
71 nvmft_online(void *arg)
72 {
73 	struct nvmft_port *np = arg;
74 
75 	sx_xlock(&np->lock);
76 	np->online = true;
77 	sx_xunlock(&np->lock);
78 }
79 
80 static void
81 nvmft_offline(void *arg)
82 {
83 	struct nvmft_port *np = arg;
84 	struct nvmft_controller *ctrlr;
85 
86 	sx_xlock(&np->lock);
87 	np->online = false;
88 
89 	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
90 		nvmft_printf(ctrlr,
91 		    "shutting down due to port going offline\n");
92 		nvmft_controller_error(ctrlr, NULL, ENODEV);
93 	}
94 
95 	while (!TAILQ_EMPTY(&np->controllers))
96 		sx_sleep(np, &np->lock, 0, "nvmfoff", 0);
97 	sx_xunlock(&np->lock);
98 }
99 
100 static int
101 nvmft_lun_enable(void *arg, int lun_id)
102 {
103 	struct nvmft_port *np = arg;
104 	struct nvmft_controller *ctrlr;
105 	uint32_t *old_ns, *new_ns;
106 	uint32_t nsid;
107 	u_int i;
108 
109 	if (lun_id >= le32toh(np->cdata.nn)) {
110 		printf("NVMFT: %s lun %d larger than maximum nsid %u\n",
111 		    np->cdata.subnqn, lun_id, le32toh(np->cdata.nn));
112 		return (EOPNOTSUPP);
113 	}
114 	nsid = lun_id + 1;
115 
116 	sx_xlock(&np->lock);
117 	new_ns = mallocarray(np->num_ns + 1, sizeof(*new_ns), M_NVMFT,
118 	    M_WAITOK);
119 	for (i = 0; i < np->num_ns; i++) {
120 		if (np->active_ns[i] < nsid)
121 			continue;
122 		if (np->active_ns[i] == nsid) {
123 			sx_xunlock(&np->lock);
124 			free(new_ns, M_NVMFT);
125 			printf("NVMFT: %s duplicate lun %d\n",
126 			    np->cdata.subnqn, lun_id);
127 			return (EINVAL);
128 		}
129 		break;
130 	}
131 
132 	/* Copy over IDs smaller than nsid. */
133 	memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns));
134 
135 	/* Insert nsid. */
136 	new_ns[i] = nsid;
137 
138 	/* Copy over IDs greater than nsid. */
139 	memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) *
140 	    sizeof(*np->active_ns));
141 
142 	np->num_ns++;
143 	old_ns = np->active_ns;
144 	np->active_ns = new_ns;
145 
146 	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
147 		nvmft_controller_lun_changed(ctrlr, lun_id);
148 	}
149 
150 	sx_xunlock(&np->lock);
151 	free(old_ns, M_NVMFT);
152 
153 	return (0);
154 }
155 
156 static int
157 nvmft_lun_disable(void *arg, int lun_id)
158 {
159 	struct nvmft_port *np = arg;
160 	struct nvmft_controller *ctrlr;
161 	uint32_t nsid;
162 	u_int i;
163 
164 	if (lun_id >= le32toh(np->cdata.nn))
165 		return (0);
166 	nsid = lun_id + 1;
167 
168 	sx_xlock(&np->lock);
169 	for (i = 0; i < np->num_ns; i++) {
170 		if (np->active_ns[i] == nsid)
171 			goto found;
172 	}
173 	sx_xunlock(&np->lock);
174 	printf("NVMFT: %s request to disable nonexistent lun %d\n",
175 	    np->cdata.subnqn, lun_id);
176 	return (EINVAL);
177 
178 found:
179 	/* Move down IDs greater than nsid. */
180 	memmove(np->active_ns + i, np->active_ns + i + 1,
181 	    (np->num_ns - (i + 1)) * sizeof(*np->active_ns));
182 	np->num_ns--;
183 
184 	/* NB: Don't bother freeing the old active_ns array. */
185 
186 	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
187 		nvmft_controller_lun_changed(ctrlr, lun_id);
188 	}
189 
190 	sx_xunlock(&np->lock);
191 
192 	return (0);
193 }
194 
195 void
196 nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
197     struct nvme_ns_list *nslist)
198 {
199 	u_int i, count;
200 
201 	sx_slock(&np->lock);
202 	count = 0;
203 	for (i = 0; i < np->num_ns; i++) {
204 		if (np->active_ns[i] <= nsid)
205 			continue;
206 		nslist->ns[count] = htole32(np->active_ns[i]);
207 		count++;
208 		if (count == nitems(nslist->ns))
209 			break;
210 	}
211 	sx_sunlock(&np->lock);
212 }
213 
214 void
215 nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
216     bool admin)
217 {
218 	struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
219 	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
220 	struct nvmft_port *np = ctrlr->np;
221 	union ctl_io *io;
222 	int error;
223 
224 	if (cmd->nsid == htole32(0)) {
225 		nvmft_send_generic_error(qp, nc,
226 		    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
227 		nvmf_free_capsule(nc);
228 		return;
229 	}
230 
231 	mtx_lock(&ctrlr->lock);
232 	if (ctrlr->pending_commands == 0)
233 		ctrlr->start_busy = sbinuptime();
234 	ctrlr->pending_commands++;
235 	mtx_unlock(&ctrlr->lock);
236 	io = ctl_alloc_io(np->port.ctl_pool_ref);
237 	ctl_zero_io(io);
238 	NVMFT_NC(io) = nc;
239 	NVMFT_QP(io) = qp;
240 	io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME;
241 	io->io_hdr.nexus.initid = ctrlr->cntlid;
242 	io->io_hdr.nexus.targ_port = np->port.targ_port;
243 	io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1;
244 	io->nvmeio.cmd = *cmd;
245 	error = ctl_run(io);
246 	if (error != 0) {
247 		nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n",
248 		    nvmft_qpair_name(qp), error);
249 		ctl_nvme_set_generic_error(&io->nvmeio,
250 		    NVME_SC_INTERNAL_DEVICE_ERROR);
251 		nvmft_done(io);
252 
253 		nvmft_controller_error(ctrlr, qp, ENXIO);
254 	}
255 }
256 
257 void
258 nvmft_terminate_commands(struct nvmft_controller *ctrlr)
259 {
260 	struct nvmft_port *np = ctrlr->np;
261 	union ctl_io *io;
262 	int error;
263 
264 	mtx_lock(&ctrlr->lock);
265 	if (ctrlr->pending_commands == 0)
266 		ctrlr->start_busy = sbinuptime();
267 	ctrlr->pending_commands++;
268 	mtx_unlock(&ctrlr->lock);
269 	io = ctl_alloc_io(np->port.ctl_pool_ref);
270 	ctl_zero_io(io);
271 	NVMFT_QP(io) = ctrlr->admin;
272 	io->io_hdr.io_type = CTL_IO_TASK;
273 	io->io_hdr.nexus.initid = ctrlr->cntlid;
274 	io->io_hdr.nexus.targ_port = np->port.targ_port;
275 	io->io_hdr.nexus.targ_lun = 0;
276 	io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX: unused? */
277 	io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
278 	error = ctl_run(io);
279 	if (error != CTL_RETVAL_COMPLETE) {
280 		nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error);
281 #ifdef INVARIANTS
282 		io->io_hdr.status = CTL_SUCCESS;
283 #endif
284 		nvmft_done(io);
285 	}
286 }
287 
288 static void
289 nvmft_datamove_out_cb(void *arg, size_t xfered, int error)
290 {
291 	struct ctl_nvmeio *ctnio = arg;
292 
293 	if (error != 0) {
294 		ctl_nvme_set_data_transfer_error(ctnio);
295 	} else {
296 		MPASS(xfered == ctnio->kern_data_len);
297 		ctnio->kern_data_resid -= xfered;
298 	}
299 
300 	if (ctnio->kern_sg_entries) {
301 		free(ctnio->ext_data_ptr, M_NVMFT);
302 		ctnio->ext_data_ptr = NULL;
303 	} else
304 		MPASS(ctnio->ext_data_ptr == NULL);
305 	ctl_datamove_done((union ctl_io *)ctnio, false);
306 }
307 
308 static void
309 nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
310     struct nvmf_capsule *nc)
311 {
312 	struct memdesc mem;
313 	int error;
314 
315 	MPASS(ctnio->ext_data_ptr == NULL);
316 	if (ctnio->kern_sg_entries > 0) {
317 		struct ctl_sg_entry *sgl;
318 		struct bus_dma_segment *vlist;
319 
320 		vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist),
321 		    M_NVMFT, M_WAITOK);
322 		ctnio->ext_data_ptr = (void *)vlist;
323 		sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
324 		for (u_int i = 0; i < ctnio->kern_sg_entries; i++) {
325 			vlist[i].ds_addr = (uintptr_t)sgl[i].addr;
326 			vlist[i].ds_len = sgl[i].len;
327 		}
328 		mem = memdesc_vlist(vlist, ctnio->kern_sg_entries);
329 	} else
330 		mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len);
331 
332 	error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem,
333 	    ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio);
334 	if (error == 0)
335 		return;
336 
337 	nvmft_printf(nvmft_qpair_ctrlr(qp),
338 	    "Failed to request capsule data: %d\n", error);
339 	ctl_nvme_set_data_transfer_error(ctnio);
340 
341 	if (ctnio->kern_sg_entries) {
342 		free(ctnio->ext_data_ptr, M_NVMFT);
343 		ctnio->ext_data_ptr = NULL;
344 	} else
345 		MPASS(ctnio->ext_data_ptr == NULL);
346 	ctl_datamove_done((union ctl_io *)ctnio, true);
347 }
348 
349 static struct mbuf *
350 nvmft_copy_data(struct ctl_nvmeio *ctnio)
351 {
352 	struct ctl_sg_entry *sgl;
353 	struct mbuf *m0, *m;
354 	uint32_t resid, off, todo;
355 	int mlen;
356 
357 	MPASS(ctnio->kern_data_len != 0);
358 
359 	m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0);
360 
361 	if (ctnio->kern_sg_entries == 0) {
362 		m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr);
363 		return (m0);
364 	}
365 
366 	resid = ctnio->kern_data_len;
367 	sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
368 	off = 0;
369 	m = m0;
370 	mlen = M_TRAILINGSPACE(m);
371 	for (;;) {
372 		todo = MIN(mlen, sgl->len - off);
373 		memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off,
374 		    todo);
375 		m->m_len += todo;
376 		resid -= todo;
377 		if (resid == 0) {
378 			MPASS(m->m_next == NULL);
379 			break;
380 		}
381 
382 		off += todo;
383 		if (off == sgl->len) {
384 			sgl++;
385 			off = 0;
386 		}
387 		mlen -= todo;
388 		if (mlen == 0) {
389 			m = m->m_next;
390 			mlen = M_TRAILINGSPACE(m);
391 		}
392 	}
393 
394 	return (m0);
395 }
396 
397 static void
398 m_free_ref_data(struct mbuf *m)
399 {
400 	ctl_ref kern_data_ref = m->m_ext.ext_arg1;
401 
402 	kern_data_ref(m->m_ext.ext_arg2, -1);
403 }
404 
405 static struct mbuf *
406 m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size)
407 {
408 	struct mbuf *m;
409 
410 	m = m_get(M_WAITOK, MT_DATA);
411 	m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref,
412 	    ctnio->kern_data_arg, M_RDONLY, EXT_CTL);
413 	m->m_len = size;
414 	ctnio->kern_data_ref(ctnio->kern_data_arg, 1);
415 	return (m);
416 }
417 
418 static struct mbuf *
419 nvmft_ref_data(struct ctl_nvmeio *ctnio)
420 {
421 	struct ctl_sg_entry *sgl;
422 	struct mbuf *m0, *m;
423 
424 	MPASS(ctnio->kern_data_len != 0);
425 
426 	if (ctnio->kern_sg_entries == 0)
427 		return (m_get_ref_data(ctnio, ctnio->kern_data_ptr,
428 		    ctnio->kern_data_len));
429 
430 	sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
431 	m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len);
432 	m = m0;
433 	for (u_int i = 1; i < ctnio->kern_sg_entries; i++) {
434 		m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len);
435 		m = m->m_next;
436 	}
437 	return (m0);
438 }
439 
440 static void
441 nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
442     struct nvmf_capsule *nc)
443 {
444 	struct mbuf *m;
445 	u_int status;
446 
447 	if (ctnio->kern_data_ref != NULL)
448 		m = nvmft_ref_data(ctnio);
449 	else
450 		m = nvmft_copy_data(ctnio);
451 	status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m,
452 	    ctnio->kern_data_len);
453 	switch (status) {
454 	case NVMF_SUCCESS_SENT:
455 		ctnio->success_sent = true;
456 		nvmft_command_completed(qp, nc);
457 		/* FALLTHROUGH */
458 	case NVMF_MORE:
459 	case NVME_SC_SUCCESS:
460 		break;
461 	default:
462 		ctl_nvme_set_generic_error(ctnio, status);
463 		break;
464 	}
465 	ctl_datamove_done((union ctl_io *)ctnio, true);
466 }
467 
468 void
469 nvmft_handle_datamove(union ctl_io *io)
470 {
471 	struct nvmf_capsule *nc;
472 	struct nvmft_qpair *qp;
473 
474 	/* Some CTL commands preemptively set a success status. */
475 	MPASS(io->io_hdr.status == CTL_STATUS_NONE ||
476 	    io->io_hdr.status == CTL_SUCCESS);
477 	MPASS(!io->nvmeio.success_sent);
478 
479 	nc = NVMFT_NC(io);
480 	qp = NVMFT_QP(io);
481 
482 	if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
483 		nvmft_datamove_in(&io->nvmeio, qp, nc);
484 	else
485 		nvmft_datamove_out(&io->nvmeio, qp, nc);
486 }
487 
488 void
489 nvmft_abort_datamove(union ctl_io *io)
490 {
491 	io->io_hdr.port_status = 1;
492 	io->io_hdr.flags |= CTL_FLAG_ABORT;
493 	ctl_datamove_done(io, true);
494 }
495 
496 static void
497 nvmft_datamove(union ctl_io *io)
498 {
499 	struct nvmft_qpair *qp;
500 
501 	qp = NVMFT_QP(io);
502 	nvmft_qpair_datamove(qp, io);
503 }
504 
505 void
506 nvmft_enqueue_task(struct task *task)
507 {
508 	taskqueue_enqueue(nvmft_taskq, task);
509 }
510 
511 void
512 nvmft_drain_task(struct task *task)
513 {
514 	taskqueue_drain(nvmft_taskq, task);
515 }
516 
517 static void
518 hip_add(uint64_t pair[2], uint64_t addend)
519 {
520 	uint64_t old, new;
521 
522 	old = le64toh(pair[0]);
523 	new = old + addend;
524 	pair[0] = htole64(new);
525 	if (new < old)
526 		pair[1] += htole64(1);
527 }
528 
529 static void
530 nvmft_done(union ctl_io *io)
531 {
532 	struct nvmft_controller *ctrlr;
533 	const struct nvme_command *cmd;
534 	struct nvmft_qpair *qp;
535 	struct nvmf_capsule *nc;
536 	size_t len;
537 
538 	KASSERT(io->io_hdr.status == CTL_SUCCESS ||
539 	    io->io_hdr.status == CTL_NVME_ERROR,
540 	    ("%s: bad status %u", __func__, io->io_hdr.status));
541 
542 	nc = NVMFT_NC(io);
543 	qp = NVMFT_QP(io);
544 	ctrlr = nvmft_qpair_ctrlr(qp);
545 
546 	if (nc == NULL) {
547 		/* Completion of nvmft_terminate_commands. */
548 		goto end;
549 	}
550 
551 	cmd = nvmf_capsule_sqe(nc);
552 
553 	if (io->io_hdr.status == CTL_SUCCESS)
554 		len = nvmf_capsule_data_len(nc) / 512;
555 	else
556 		len = 0;
557 	switch (cmd->opc) {
558 	case NVME_OPC_WRITE:
559 		mtx_lock(&ctrlr->lock);
560 		hip_add(ctrlr->hip.host_write_commands, 1);
561 		len += ctrlr->partial_duw;
562 		if (len > 1000)
563 			hip_add(ctrlr->hip.data_units_written, len / 1000);
564 		ctrlr->partial_duw = len % 1000;
565 		mtx_unlock(&ctrlr->lock);
566 		break;
567 	case NVME_OPC_READ:
568 	case NVME_OPC_COMPARE:
569 	case NVME_OPC_VERIFY:
570 		mtx_lock(&ctrlr->lock);
571 		if (cmd->opc != NVME_OPC_VERIFY)
572 			hip_add(ctrlr->hip.host_read_commands, 1);
573 		len += ctrlr->partial_dur;
574 		if (len > 1000)
575 			hip_add(ctrlr->hip.data_units_read, len / 1000);
576 		ctrlr->partial_dur = len % 1000;
577 		mtx_unlock(&ctrlr->lock);
578 		break;
579 	}
580 
581 	if (io->nvmeio.success_sent) {
582 		MPASS(io->io_hdr.status == CTL_SUCCESS);
583 	} else {
584 		io->nvmeio.cpl.cid = cmd->cid;
585 		nvmft_send_response(qp, &io->nvmeio.cpl);
586 	}
587 	nvmf_free_capsule(nc);
588 end:
589 	ctl_free_io(io);
590 	mtx_lock(&ctrlr->lock);
591 	ctrlr->pending_commands--;
592 	if (ctrlr->pending_commands == 0)
593 		ctrlr->busy_total += sbinuptime() - ctrlr->start_busy;
594 	mtx_unlock(&ctrlr->lock);
595 }
596 
597 static int
598 nvmft_init(void)
599 {
600 	int error;
601 
602 	nvmft_taskq = taskqueue_create("nvmft", M_WAITOK,
603 	    taskqueue_thread_enqueue, &nvmft_taskq);
604 	error = taskqueue_start_threads_in_proc(&nvmft_taskq, mp_ncpus, PWAIT,
605 	    control_softc->ctl_proc, "nvmft");
606 	if (error != 0) {
607 		taskqueue_free(nvmft_taskq);
608 		return (error);
609 	}
610 
611 	TAILQ_INIT(&nvmft_ports);
612 	sx_init(&nvmft_ports_lock, "nvmft ports");
613 	return (0);
614 }
615 
616 void
617 nvmft_port_free(struct nvmft_port *np)
618 {
619 	KASSERT(TAILQ_EMPTY(&np->controllers),
620 	    ("%s(%p): active controllers", __func__, np));
621 
622 	if (np->port.targ_port != -1) {
623 		if (ctl_port_deregister(&np->port) != 0)
624 			printf("%s: ctl_port_deregister() failed\n", __func__);
625 	}
626 
627 	free(np->active_ns, M_NVMFT);
628 	clean_unrhdr(np->ids);
629 	delete_unrhdr(np->ids);
630 	sx_destroy(&np->lock);
631 	free(np, M_NVMFT);
632 }
633 
634 static struct nvmft_port *
635 nvmft_port_find(const char *subnqn)
636 {
637 	struct nvmft_port *np;
638 
639 	KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__));
640 
641 	sx_assert(&nvmft_ports_lock, SA_LOCKED);
642 	TAILQ_FOREACH(np, &nvmft_ports, link) {
643 		if (strcmp(np->cdata.subnqn, subnqn) == 0)
644 			break;
645 	}
646 	return (np);
647 }
648 
649 static struct nvmft_port *
650 nvmft_port_find_by_id(int port_id)
651 {
652 	struct nvmft_port *np;
653 
654 	sx_assert(&nvmft_ports_lock, SA_LOCKED);
655 	TAILQ_FOREACH(np, &nvmft_ports, link) {
656 		if (np->port.targ_port == port_id)
657 			break;
658 	}
659 	return (np);
660 }
661 
662 /*
663  * Helper function to fetch a number stored as a string in an nv_list.
664  * Returns false if the string was not a valid number.
665  */
666 static bool
667 dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value,
668 	u_long *value)
669 {
670 	const char *str;
671 	char *cp;
672 
673 	str = dnvlist_get_string(nvl, name, NULL);
674 	if (str == NULL) {
675 		*value = default_value;
676 		return (true);
677 	}
678 	if (*str == '\0')
679 		return (false);
680 	*value = strtoul(str, &cp, 0);
681 	if (*cp != '\0')
682 		return (false);
683 	return (true);
684 }
685 
686 /*
687  * NVMeoF ports support the following parameters:
688  *
689  * Mandatory:
690  *
691  * subnqn: subsystem NVMe Qualified Name
692  * portid: integer port ID from Discovery Log Page entry
693  *
694  * Optional:
695  * serial: Serial Number string
696  * max_io_qsize: Maximum number of I/O queue entries
697  * enable_timeout: Timeout for controller enable in milliseconds
698  * ioccsz: Maximum command capsule size
699  * iorcsz: Maximum response capsule size
700  * nn: Number of namespaces
701  */
702 static void
703 nvmft_port_create(struct ctl_req *req)
704 {
705 	struct nvmft_port *np;
706 	struct ctl_port *port;
707 	const char *serial, *subnqn;
708 	char serial_buf[NVME_SERIAL_NUMBER_LENGTH];
709 	u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid;
710 	int error;
711 
712 	/* Required parameters. */
713 	subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
714 	if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) {
715 		req->status = CTL_LUN_ERROR;
716 		snprintf(req->error_str, sizeof(req->error_str),
717 		    "Missing required argument");
718 		return;
719 	}
720 	if (!nvmf_nqn_valid(subnqn)) {
721 		req->status = CTL_LUN_ERROR;
722 		snprintf(req->error_str, sizeof(req->error_str),
723 		    "Invalid SubNQN");
724 		return;
725 	}
726 	if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) ||
727 	    portid > UINT16_MAX) {
728 		req->status = CTL_LUN_ERROR;
729 		snprintf(req->error_str, sizeof(req->error_str),
730 		    "Invalid port ID");
731 		return;
732 	}
733 
734 	/* Optional parameters. */
735 	if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize",
736 	    NVMF_MAX_IO_ENTRIES, &max_io_qsize) ||
737 	    max_io_qsize < NVME_MIN_IO_ENTRIES ||
738 	    max_io_qsize > NVME_MAX_IO_ENTRIES) {
739 		req->status = CTL_LUN_ERROR;
740 		snprintf(req->error_str, sizeof(req->error_str),
741 		    "Invalid maximum I/O queue size");
742 		return;
743 	}
744 
745 	if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout",
746 	    NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) ||
747 	    (enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) {
748 		req->status = CTL_LUN_ERROR;
749 		snprintf(req->error_str, sizeof(req->error_str),
750 		    "Invalid enable timeout");
751 		return;
752 	}
753 
754 	if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ,
755 	    &ioccsz) || ioccsz < sizeof(struct nvme_command) ||
756 	    (ioccsz % 16) != 0) {
757 		req->status = CTL_LUN_ERROR;
758 		snprintf(req->error_str, sizeof(req->error_str),
759 		    "Invalid Command Capsule size");
760 		return;
761 	}
762 
763 	if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ,
764 	    &iorcsz) || iorcsz < sizeof(struct nvme_completion) ||
765 	    (iorcsz % 16) != 0) {
766 		req->status = CTL_LUN_ERROR;
767 		snprintf(req->error_str, sizeof(req->error_str),
768 		    "Invalid Response Capsule size");
769 		return;
770 	}
771 
772 	if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) ||
773 	    nn < 1 || nn > UINT32_MAX) {
774 		req->status = CTL_LUN_ERROR;
775 		snprintf(req->error_str, sizeof(req->error_str),
776 		    "Invalid number of namespaces");
777 		return;
778 	}
779 
780 	serial = dnvlist_get_string(req->args_nvl, "serial", NULL);
781 	if (serial == NULL) {
782 		getcredhostid(curthread->td_ucred, &hostid);
783 		nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid);
784 		serial = serial_buf;
785 	}
786 
787 	sx_xlock(&nvmft_ports_lock);
788 
789 	np = nvmft_port_find(subnqn);
790 	if (np != NULL) {
791 		req->status = CTL_LUN_ERROR;
792 		snprintf(req->error_str, sizeof(req->error_str),
793 		    "SubNQN \"%s\" already exists", subnqn);
794 		sx_xunlock(&nvmft_ports_lock);
795 		return;
796 	}
797 
798 	np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO);
799 	refcount_init(&np->refs, 1);
800 	np->max_io_qsize = max_io_qsize;
801 	np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500);
802 	sx_init(&np->lock, "nvmft port");
803 	np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1,
804 	    NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX);
805 	TAILQ_INIT(&np->controllers);
806 
807 	/* The controller ID is set later for individual controllers. */
808 	_nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype,
809 	    osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata);
810 	np->cdata.aerl = NVMFT_NUM_AER - 1;
811 	np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE);
812 	np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) |
813 	    NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) |
814 	    NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) |
815 	    NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1));
816 	np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1);
817 
818 	np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
819 	memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr));
820 
821 	port = &np->port;
822 
823 	port->frontend = &nvmft_frontend;
824 	port->port_type = CTL_PORT_NVMF;
825 	port->num_requested_ctl_io = max_io_qsize;
826 	port->port_name = "nvmf";
827 	port->physical_port = portid;
828 	port->virtual_port = 0;
829 	port->port_online = nvmft_online;
830 	port->port_offline = nvmft_offline;
831 	port->onoff_arg = np;
832 	port->lun_enable = nvmft_lun_enable;
833 	port->lun_disable = nvmft_lun_disable;
834 	port->targ_lun_arg = np;
835 	port->fe_datamove = nvmft_datamove;
836 	port->fe_done = nvmft_done;
837 	port->targ_port = -1;
838 	port->options = nvlist_clone(req->args_nvl);
839 
840 	error = ctl_port_register(port);
841 	if (error != 0) {
842 		sx_xunlock(&nvmft_ports_lock);
843 		nvlist_destroy(port->options);
844 		nvmft_port_rele(np);
845 		req->status = CTL_LUN_ERROR;
846 		snprintf(req->error_str, sizeof(req->error_str),
847 		    "Failed to register CTL port with error %d", error);
848 		return;
849 	}
850 
851 	TAILQ_INSERT_TAIL(&nvmft_ports, np, link);
852 	sx_xunlock(&nvmft_ports_lock);
853 
854 	req->status = CTL_LUN_OK;
855 	req->result_nvl = nvlist_create(0);
856 	nvlist_add_number(req->result_nvl, "port_id", port->targ_port);
857 }
858 
859 static void
860 nvmft_port_remove(struct ctl_req *req)
861 {
862 	struct nvmft_port *np;
863 	const char *subnqn;
864 	u_long port_id;
865 
866 	/*
867 	 * ctladm port -r just provides the port_id, so permit looking
868 	 * up a port either by "subnqn" or "port_id".
869 	 */
870 	port_id = ULONG_MAX;
871 	subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
872 	if (subnqn == NULL) {
873 		if (!nvlist_exists_string(req->args_nvl, "port_id")) {
874 			req->status = CTL_LUN_ERROR;
875 			snprintf(req->error_str, sizeof(req->error_str),
876 			    "Missing required argument");
877 			return;
878 		}
879 		if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX,
880 		    &port_id)) {
881 			req->status = CTL_LUN_ERROR;
882 			snprintf(req->error_str, sizeof(req->error_str),
883 			    "Invalid CTL port ID");
884 			return;
885 		}
886 	} else {
887 		if (nvlist_exists_string(req->args_nvl, "port_id")) {
888 			req->status = CTL_LUN_ERROR;
889 			snprintf(req->error_str, sizeof(req->error_str),
890 			    "Ambiguous port removal request");
891 			return;
892 		}
893 	}
894 
895 	sx_xlock(&nvmft_ports_lock);
896 
897 	if (subnqn != NULL) {
898 		np = nvmft_port_find(subnqn);
899 		if (np == NULL) {
900 			req->status = CTL_LUN_ERROR;
901 			snprintf(req->error_str, sizeof(req->error_str),
902 			    "SubNQN \"%s\" does not exist", subnqn);
903 			sx_xunlock(&nvmft_ports_lock);
904 			return;
905 		}
906 	} else {
907 		np = nvmft_port_find_by_id(port_id);
908 		if (np == NULL) {
909 			req->status = CTL_LUN_ERROR;
910 			snprintf(req->error_str, sizeof(req->error_str),
911 			    "CTL port %lu is not a NVMF port", port_id);
912 			sx_xunlock(&nvmft_ports_lock);
913 			return;
914 		}
915 	}
916 
917 	TAILQ_REMOVE(&nvmft_ports, np, link);
918 	sx_xunlock(&nvmft_ports_lock);
919 
920 	ctl_port_offline(&np->port);
921 	nvmft_port_rele(np);
922 	req->status = CTL_LUN_OK;
923 }
924 
925 static void
926 nvmft_handoff(struct ctl_nvmf *cn)
927 {
928 	struct nvmf_fabric_connect_cmd cmd;
929 	struct nvmf_handoff_controller_qpair *handoff;
930 	struct nvmf_fabric_connect_data *data;
931 	struct nvmft_port *np;
932 	int error;
933 
934 	np = NULL;
935 	data = NULL;
936 	handoff = &cn->data.handoff;
937 	error = copyin(handoff->cmd, &cmd, sizeof(cmd));
938 	if (error != 0) {
939 		cn->status = CTL_NVMF_ERROR;
940 		snprintf(cn->error_str, sizeof(cn->error_str),
941 		    "Failed to copyin CONNECT SQE");
942 		return;
943 	}
944 
945 	data = malloc(sizeof(*data), M_NVMFT, M_WAITOK);
946 	error = copyin(handoff->data, data, sizeof(*data));
947 	if (error != 0) {
948 		cn->status = CTL_NVMF_ERROR;
949 		snprintf(cn->error_str, sizeof(cn->error_str),
950 		    "Failed to copyin CONNECT data");
951 		goto out;
952 	}
953 
954 	if (!nvmf_nqn_valid(data->subnqn)) {
955 		cn->status = CTL_NVMF_ERROR;
956 		snprintf(cn->error_str, sizeof(cn->error_str),
957 		    "Invalid SubNQN");
958 		goto out;
959 	}
960 
961 	sx_slock(&nvmft_ports_lock);
962 	np = nvmft_port_find(data->subnqn);
963 	if (np == NULL) {
964 		sx_sunlock(&nvmft_ports_lock);
965 		cn->status = CTL_NVMF_ERROR;
966 		snprintf(cn->error_str, sizeof(cn->error_str),
967 		    "Unknown SubNQN");
968 		goto out;
969 	}
970 	if (!np->online) {
971 		sx_sunlock(&nvmft_ports_lock);
972 		cn->status = CTL_NVMF_ERROR;
973 		snprintf(cn->error_str, sizeof(cn->error_str),
974 		    "CTL port offline");
975 		np = NULL;
976 		goto out;
977 	}
978 	nvmft_port_ref(np);
979 	sx_sunlock(&nvmft_ports_lock);
980 
981 	if (handoff->params.admin) {
982 		error = nvmft_handoff_admin_queue(np, handoff, &cmd, data);
983 		if (error != 0) {
984 			cn->status = CTL_NVMF_ERROR;
985 			snprintf(cn->error_str, sizeof(cn->error_str),
986 			    "Failed to handoff admin queue: %d", error);
987 			goto out;
988 		}
989 	} else {
990 		error = nvmft_handoff_io_queue(np, handoff, &cmd, data);
991 		if (error != 0) {
992 			cn->status = CTL_NVMF_ERROR;
993 			snprintf(cn->error_str, sizeof(cn->error_str),
994 			    "Failed to handoff admin queue: %d", error);
995 			goto out;
996 		}
997 	}
998 
999 	cn->status = CTL_NVMF_OK;
1000 out:
1001 	if (np != NULL)
1002 		nvmft_port_rele(np);
1003 	free(data, M_NVMFT);
1004 }
1005 
1006 static void
1007 nvmft_list(struct ctl_nvmf *cn)
1008 {
1009 	struct ctl_nvmf_list_params *lp;
1010 	struct nvmft_controller *ctrlr;
1011 	struct nvmft_port *np;
1012 	struct sbuf *sb;
1013 	int error;
1014 
1015 	lp = &cn->data.list;
1016 
1017 	sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN |
1018 	    SBUF_INCLUDENUL);
1019 	if (sb == NULL) {
1020 		cn->status = CTL_NVMF_ERROR;
1021 		snprintf(cn->error_str, sizeof(cn->error_str),
1022 		    "Failed to allocate NVMeoF session list");
1023 		return;
1024 	}
1025 
1026 	sbuf_printf(sb, "<ctlnvmflist>\n");
1027 	sx_slock(&nvmft_ports_lock);
1028 	TAILQ_FOREACH(np, &nvmft_ports, link) {
1029 		sx_slock(&np->lock);
1030 		TAILQ_FOREACH(ctrlr, &np->controllers, link) {
1031 			sbuf_printf(sb, "<connection id=\"%d\">"
1032 			    "<hostnqn>%s</hostnqn>"
1033 			    "<subnqn>%s</subnqn>"
1034 			    "<trtype>%u</trtype>"
1035 			    "</connection>\n",
1036 			    ctrlr->cntlid,
1037 			    ctrlr->hostnqn,
1038 			    np->cdata.subnqn,
1039 			    ctrlr->trtype);
1040 		}
1041 		sx_sunlock(&np->lock);
1042 	}
1043 	sx_sunlock(&nvmft_ports_lock);
1044 	sbuf_printf(sb, "</ctlnvmflist>\n");
1045 	if (sbuf_finish(sb) != 0) {
1046 		sbuf_delete(sb);
1047 		cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE;
1048 		snprintf(cn->error_str, sizeof(cn->error_str),
1049 		    "Out of space, %d bytes is too small", lp->alloc_len);
1050 		return;
1051 	}
1052 
1053 	error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb));
1054 	if (error != 0) {
1055 		sbuf_delete(sb);
1056 		cn->status = CTL_NVMF_ERROR;
1057 		snprintf(cn->error_str, sizeof(cn->error_str),
1058 		    "Failed to copyout session list: %d", error);
1059 		return;
1060 	}
1061 	lp->fill_len = sbuf_len(sb);
1062 	cn->status = CTL_NVMF_OK;
1063 	sbuf_delete(sb);
1064 }
1065 
1066 static void
1067 nvmft_terminate(struct ctl_nvmf *cn)
1068 {
1069 	struct ctl_nvmf_terminate_params *tp;
1070 	struct nvmft_controller *ctrlr;
1071 	struct nvmft_port *np;
1072 	bool found, match;
1073 
1074 	tp = &cn->data.terminate;
1075 
1076 	found = false;
1077 	sx_slock(&nvmft_ports_lock);
1078 	TAILQ_FOREACH(np, &nvmft_ports, link) {
1079 		sx_slock(&np->lock);
1080 		TAILQ_FOREACH(ctrlr, &np->controllers, link) {
1081 			if (tp->all != 0)
1082 				match = true;
1083 			else if (tp->cntlid != -1)
1084 				match = tp->cntlid == ctrlr->cntlid;
1085 			else if (tp->hostnqn[0] != '\0')
1086 				match = strncmp(tp->hostnqn, ctrlr->hostnqn,
1087 				    sizeof(tp->hostnqn)) == 0;
1088 			else
1089 				match = false;
1090 			if (!match)
1091 				continue;
1092 			nvmft_printf(ctrlr,
1093 			    "disconnecting due to administrative request\n");
1094 			nvmft_controller_error(ctrlr, NULL, ECONNABORTED);
1095 			found = true;
1096 		}
1097 		sx_sunlock(&np->lock);
1098 	}
1099 	sx_sunlock(&nvmft_ports_lock);
1100 
1101 	if (!found) {
1102 		cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND;
1103 		snprintf(cn->error_str, sizeof(cn->error_str),
1104 		    "No matching associations found");
1105 		return;
1106 	}
1107 	cn->status = CTL_NVMF_OK;
1108 }
1109 
1110 static int
1111 nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag,
1112     struct thread *td)
1113 {
1114 	struct ctl_nvmf *cn;
1115 	struct ctl_req *req;
1116 
1117 	switch (cmd) {
1118 	case CTL_PORT_REQ:
1119 		req = (struct ctl_req *)data;
1120 		switch (req->reqtype) {
1121 		case CTL_REQ_CREATE:
1122 			nvmft_port_create(req);
1123 			break;
1124 		case CTL_REQ_REMOVE:
1125 			nvmft_port_remove(req);
1126 			break;
1127 		default:
1128 			req->status = CTL_LUN_ERROR;
1129 			snprintf(req->error_str, sizeof(req->error_str),
1130 			    "Unsupported request type %d", req->reqtype);
1131 			break;
1132 		}
1133 		return (0);
1134 	case CTL_NVMF:
1135 		cn = (struct ctl_nvmf *)data;
1136 		switch (cn->type) {
1137 		case CTL_NVMF_HANDOFF:
1138 			nvmft_handoff(cn);
1139 			break;
1140 		case CTL_NVMF_LIST:
1141 			nvmft_list(cn);
1142 			break;
1143 		case CTL_NVMF_TERMINATE:
1144 			nvmft_terminate(cn);
1145 			break;
1146 		default:
1147 			cn->status = CTL_NVMF_ERROR;
1148 			snprintf(cn->error_str, sizeof(cn->error_str),
1149 			    "Invalid NVMeoF request type %d", cn->type);
1150 			break;
1151 		}
1152 		return (0);
1153 	default:
1154 		return (ENOTTY);
1155 	}
1156 }
1157 
1158 static int
1159 nvmft_shutdown(void)
1160 {
1161 	/* TODO: Need to check for active controllers. */
1162 	if (!TAILQ_EMPTY(&nvmft_ports))
1163 		return (EBUSY);
1164 
1165 	taskqueue_free(nvmft_taskq);
1166 	sx_destroy(&nvmft_ports_lock);
1167 	return (0);
1168 }
1169 
1170 CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend);
1171 MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1);
1172