xref: /freebsd/sys/dev/nvmf/controller/ctl_frontend_nvmf.c (revision 53120fbb68952b7d620c2c0e1cf05c5017fc1b27)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  */
7 
8 #include <sys/param.h>
9 #include <sys/dnv.h>
10 #include <sys/jail.h>
11 #include <sys/kernel.h>
12 #include <sys/limits.h>
13 #include <sys/lock.h>
14 #include <sys/malloc.h>
15 #include <sys/mbuf.h>
16 #include <sys/memdesc.h>
17 #include <sys/module.h>
18 #include <sys/proc.h>
19 #include <sys/queue.h>
20 #include <sys/refcount.h>
21 #include <sys/sbuf.h>
22 #include <sys/sx.h>
23 
24 #include <machine/bus.h>
25 #include <machine/bus_dma.h>
26 
27 #include <dev/nvmf/nvmf.h>
28 #include <dev/nvmf/nvmf_transport.h>
29 #include <dev/nvmf/controller/nvmft_subr.h>
30 #include <dev/nvmf/controller/nvmft_var.h>
31 
32 #include <cam/ctl/ctl.h>
33 #include <cam/ctl/ctl_error.h>
34 #include <cam/ctl/ctl_io.h>
35 #include <cam/ctl/ctl_frontend.h>
36 
37 /*
38  * Store pointers to the capsule and qpair in the two pointer members
39  * of CTL_PRIV_FRONTEND.
40  */
41 #define	NVMFT_NC(io)	((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[0])
42 #define	NVMFT_QP(io)	((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptrs[1])
43 
44 static void	nvmft_done(union ctl_io *io);
45 static int	nvmft_init(void);
46 static int	nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data,
47     int flag, struct thread *td);
48 static int	nvmft_shutdown(void);
49 
50 static TAILQ_HEAD(, nvmft_port) nvmft_ports;
51 static struct sx nvmft_ports_lock;
52 
53 MALLOC_DEFINE(M_NVMFT, "nvmft", "NVMe over Fabrics controller");
54 
55 static struct ctl_frontend nvmft_frontend = {
56 	.name = "nvmf",
57 	.init = nvmft_init,
58 	.ioctl = nvmft_ioctl,
59 	.fe_dump = NULL,
60 	.shutdown = nvmft_shutdown,
61 };
62 
63 static void
64 nvmft_online(void *arg)
65 {
66 	struct nvmft_port *np = arg;
67 
68 	sx_xlock(&np->lock);
69 	np->online = true;
70 	sx_xunlock(&np->lock);
71 }
72 
73 static void
74 nvmft_offline(void *arg)
75 {
76 	struct nvmft_port *np = arg;
77 	struct nvmft_controller *ctrlr;
78 
79 	sx_xlock(&np->lock);
80 	np->online = false;
81 
82 	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
83 		nvmft_printf(ctrlr,
84 		    "shutting down due to port going offline\n");
85 		nvmft_controller_error(ctrlr, NULL, ENODEV);
86 	}
87 
88 	while (!TAILQ_EMPTY(&np->controllers))
89 		sx_sleep(np, &np->lock, 0, "nvmfoff", 0);
90 	sx_xunlock(&np->lock);
91 }
92 
93 static int
94 nvmft_lun_enable(void *arg, int lun_id)
95 {
96 	struct nvmft_port *np = arg;
97 	struct nvmft_controller *ctrlr;
98 	uint32_t *old_ns, *new_ns;
99 	uint32_t nsid;
100 	u_int i;
101 
102 	if (lun_id >= le32toh(np->cdata.nn)) {
103 		printf("NVMFT: %s lun %d larger than maximum nsid %u\n",
104 		    np->cdata.subnqn, lun_id, le32toh(np->cdata.nn));
105 		return (EOPNOTSUPP);
106 	}
107 	nsid = lun_id + 1;
108 
109 	sx_xlock(&np->lock);
110 	new_ns = mallocarray(np->num_ns + 1, sizeof(*new_ns), M_NVMFT,
111 	    M_WAITOK);
112 	for (i = 0; i < np->num_ns; i++) {
113 		if (np->active_ns[i] < nsid)
114 			continue;
115 		if (np->active_ns[i] == nsid) {
116 			sx_xunlock(&np->lock);
117 			free(new_ns, M_NVMFT);
118 			printf("NVMFT: %s duplicate lun %d\n",
119 			    np->cdata.subnqn, lun_id);
120 			return (EINVAL);
121 		}
122 		break;
123 	}
124 
125 	/* Copy over IDs smaller than nsid. */
126 	memcpy(new_ns, np->active_ns, i * sizeof(*np->active_ns));
127 
128 	/* Insert nsid. */
129 	new_ns[i] = nsid;
130 
131 	/* Copy over IDs greater than nsid. */
132 	memcpy(new_ns + i + 1, np->active_ns + i, (np->num_ns - i) *
133 	    sizeof(*np->active_ns));
134 
135 	np->num_ns++;
136 	old_ns = np->active_ns;
137 	np->active_ns = new_ns;
138 
139 	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
140 		nvmft_controller_lun_changed(ctrlr, lun_id);
141 	}
142 
143 	sx_xunlock(&np->lock);
144 	free(old_ns, M_NVMFT);
145 
146 	return (0);
147 }
148 
149 static int
150 nvmft_lun_disable(void *arg, int lun_id)
151 {
152 	struct nvmft_port *np = arg;
153 	struct nvmft_controller *ctrlr;
154 	uint32_t nsid;
155 	u_int i;
156 
157 	if (lun_id >= le32toh(np->cdata.nn))
158 		return (0);
159 	nsid = lun_id + 1;
160 
161 	sx_xlock(&np->lock);
162 	for (i = 0; i < np->num_ns; i++) {
163 		if (np->active_ns[i] == nsid)
164 			goto found;
165 	}
166 	sx_xunlock(&np->lock);
167 	printf("NVMFT: %s request to disable nonexistent lun %d\n",
168 	    np->cdata.subnqn, lun_id);
169 	return (EINVAL);
170 
171 found:
172 	/* Move down IDs greater than nsid. */
173 	memmove(np->active_ns + i, np->active_ns + i + 1,
174 	    (np->num_ns - (i + 1)) * sizeof(*np->active_ns));
175 	np->num_ns--;
176 
177 	/* NB: Don't bother freeing the old active_ns array. */
178 
179 	TAILQ_FOREACH(ctrlr, &np->controllers, link) {
180 		nvmft_controller_lun_changed(ctrlr, lun_id);
181 	}
182 
183 	sx_xunlock(&np->lock);
184 
185 	return (0);
186 }
187 
188 void
189 nvmft_populate_active_nslist(struct nvmft_port *np, uint32_t nsid,
190     struct nvme_ns_list *nslist)
191 {
192 	u_int i, count;
193 
194 	sx_slock(&np->lock);
195 	count = 0;
196 	for (i = 0; i < np->num_ns; i++) {
197 		if (np->active_ns[i] <= nsid)
198 			continue;
199 		nslist->ns[count] = htole32(np->active_ns[i]);
200 		count++;
201 		if (count == nitems(nslist->ns))
202 			break;
203 	}
204 	sx_sunlock(&np->lock);
205 }
206 
207 void
208 nvmft_dispatch_command(struct nvmft_qpair *qp, struct nvmf_capsule *nc,
209     bool admin)
210 {
211 	struct nvmft_controller *ctrlr = nvmft_qpair_ctrlr(qp);
212 	const struct nvme_command *cmd = nvmf_capsule_sqe(nc);
213 	struct nvmft_port *np = ctrlr->np;
214 	union ctl_io *io;
215 	int error;
216 
217 	if (cmd->nsid == htole32(0)) {
218 		nvmft_send_generic_error(qp, nc,
219 		    NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
220 		nvmf_free_capsule(nc);
221 		return;
222 	}
223 
224 	mtx_lock(&ctrlr->lock);
225 	if (ctrlr->pending_commands == 0)
226 		ctrlr->start_busy = sbinuptime();
227 	ctrlr->pending_commands++;
228 	mtx_unlock(&ctrlr->lock);
229 	io = ctl_alloc_io(np->port.ctl_pool_ref);
230 	ctl_zero_io(io);
231 	NVMFT_NC(io) = nc;
232 	NVMFT_QP(io) = qp;
233 	io->io_hdr.io_type = admin ? CTL_IO_NVME_ADMIN : CTL_IO_NVME;
234 	io->io_hdr.nexus.initid = ctrlr->cntlid;
235 	io->io_hdr.nexus.targ_port = np->port.targ_port;
236 	io->io_hdr.nexus.targ_lun = le32toh(cmd->nsid) - 1;
237 	io->nvmeio.cmd = *cmd;
238 	error = ctl_run(io);
239 	if (error != 0) {
240 		nvmft_printf(ctrlr, "ctl_run failed for command on %s: %d\n",
241 		    nvmft_qpair_name(qp), error);
242 		ctl_nvme_set_generic_error(&io->nvmeio,
243 		    NVME_SC_INTERNAL_DEVICE_ERROR);
244 		nvmft_done(io);
245 
246 		nvmft_controller_error(ctrlr, qp, ENXIO);
247 	}
248 }
249 
250 void
251 nvmft_terminate_commands(struct nvmft_controller *ctrlr)
252 {
253 	struct nvmft_port *np = ctrlr->np;
254 	union ctl_io *io;
255 	int error;
256 
257 	mtx_lock(&ctrlr->lock);
258 	if (ctrlr->pending_commands == 0)
259 		ctrlr->start_busy = sbinuptime();
260 	ctrlr->pending_commands++;
261 	mtx_unlock(&ctrlr->lock);
262 	io = ctl_alloc_io(np->port.ctl_pool_ref);
263 	ctl_zero_io(io);
264 	NVMFT_QP(io) = ctrlr->admin;
265 	io->io_hdr.io_type = CTL_IO_TASK;
266 	io->io_hdr.nexus.initid = ctrlr->cntlid;
267 	io->io_hdr.nexus.targ_port = np->port.targ_port;
268 	io->io_hdr.nexus.targ_lun = 0;
269 	io->taskio.tag_type = CTL_TAG_SIMPLE; /* XXX: unused? */
270 	io->taskio.task_action = CTL_TASK_I_T_NEXUS_RESET;
271 	error = ctl_run(io);
272 	if (error != CTL_RETVAL_COMPLETE) {
273 		nvmft_printf(ctrlr, "failed to terminate tasks: %d\n", error);
274 #ifdef INVARIANTS
275 		io->io_hdr.status = CTL_SUCCESS;
276 #endif
277 		nvmft_done(io);
278 	}
279 }
280 
281 static void
282 nvmft_datamove_out_cb(void *arg, size_t xfered, int error)
283 {
284 	struct ctl_nvmeio *ctnio = arg;
285 
286 	if (error != 0) {
287 		ctl_nvme_set_data_transfer_error(ctnio);
288 	} else {
289 		MPASS(xfered == ctnio->kern_data_len);
290 		ctnio->kern_data_resid -= xfered;
291 	}
292 
293 	if (ctnio->kern_sg_entries) {
294 		free(ctnio->ext_data_ptr, M_NVMFT);
295 		ctnio->ext_data_ptr = NULL;
296 	} else
297 		MPASS(ctnio->ext_data_ptr == NULL);
298 	ctl_datamove_done((union ctl_io *)ctnio, false);
299 }
300 
301 static void
302 nvmft_datamove_out(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
303     struct nvmf_capsule *nc)
304 {
305 	struct memdesc mem;
306 	int error;
307 
308 	MPASS(ctnio->ext_data_ptr == NULL);
309 	if (ctnio->kern_sg_entries > 0) {
310 		struct ctl_sg_entry *sgl;
311 		struct bus_dma_segment *vlist;
312 
313 		vlist = mallocarray(ctnio->kern_sg_entries, sizeof(*vlist),
314 		    M_NVMFT, M_WAITOK);
315 		ctnio->ext_data_ptr = (void *)vlist;
316 		sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
317 		for (u_int i = 0; i < ctnio->kern_sg_entries; i++) {
318 			vlist[i].ds_addr = (uintptr_t)sgl[i].addr;
319 			vlist[i].ds_len = sgl[i].len;
320 		}
321 		mem = memdesc_vlist(vlist, ctnio->kern_sg_entries);
322 	} else
323 		mem = memdesc_vaddr(ctnio->kern_data_ptr, ctnio->kern_data_len);
324 
325 	error = nvmf_receive_controller_data(nc, ctnio->kern_rel_offset, &mem,
326 	    ctnio->kern_data_len, nvmft_datamove_out_cb, ctnio);
327 	if (error == 0)
328 		return;
329 
330 	nvmft_printf(nvmft_qpair_ctrlr(qp),
331 	    "Failed to request capsule data: %d\n", error);
332 	ctl_nvme_set_data_transfer_error(ctnio);
333 
334 	if (ctnio->kern_sg_entries) {
335 		free(ctnio->ext_data_ptr, M_NVMFT);
336 		ctnio->ext_data_ptr = NULL;
337 	} else
338 		MPASS(ctnio->ext_data_ptr == NULL);
339 	ctl_datamove_done((union ctl_io *)ctnio, true);
340 }
341 
342 static struct mbuf *
343 nvmft_copy_data(struct ctl_nvmeio *ctnio)
344 {
345 	struct ctl_sg_entry *sgl;
346 	struct mbuf *m0, *m;
347 	uint32_t resid, off, todo;
348 	int mlen;
349 
350 	MPASS(ctnio->kern_data_len != 0);
351 
352 	m0 = m_getm2(NULL, ctnio->kern_data_len, M_WAITOK, MT_DATA, 0);
353 
354 	if (ctnio->kern_sg_entries == 0) {
355 		m_copyback(m0, 0, ctnio->kern_data_len, ctnio->kern_data_ptr);
356 		return (m0);
357 	}
358 
359 	resid = ctnio->kern_data_len;
360 	sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
361 	off = 0;
362 	m = m0;
363 	mlen = M_TRAILINGSPACE(m);
364 	for (;;) {
365 		todo = MIN(mlen, sgl->len - off);
366 		memcpy(mtod(m, char *) + m->m_len, (char *)sgl->addr + off,
367 		    todo);
368 		m->m_len += todo;
369 		resid -= todo;
370 		if (resid == 0) {
371 			MPASS(m->m_next == NULL);
372 			break;
373 		}
374 
375 		off += todo;
376 		if (off == sgl->len) {
377 			sgl++;
378 			off = 0;
379 		}
380 		mlen -= todo;
381 		if (mlen == 0) {
382 			m = m->m_next;
383 			mlen = M_TRAILINGSPACE(m);
384 		}
385 	}
386 
387 	return (m0);
388 }
389 
390 static void
391 m_free_ref_data(struct mbuf *m)
392 {
393 	ctl_ref kern_data_ref = m->m_ext.ext_arg1;
394 
395 	kern_data_ref(m->m_ext.ext_arg2, -1);
396 }
397 
398 static struct mbuf *
399 m_get_ref_data(struct ctl_nvmeio *ctnio, void *buf, u_int size)
400 {
401 	struct mbuf *m;
402 
403 	m = m_get(M_WAITOK, MT_DATA);
404 	m_extadd(m, buf, size, m_free_ref_data, ctnio->kern_data_ref,
405 	    ctnio->kern_data_arg, M_RDONLY, EXT_CTL);
406 	m->m_len = size;
407 	ctnio->kern_data_ref(ctnio->kern_data_arg, 1);
408 	return (m);
409 }
410 
411 static struct mbuf *
412 nvmft_ref_data(struct ctl_nvmeio *ctnio)
413 {
414 	struct ctl_sg_entry *sgl;
415 	struct mbuf *m0, *m;
416 
417 	MPASS(ctnio->kern_data_len != 0);
418 
419 	if (ctnio->kern_sg_entries == 0)
420 		return (m_get_ref_data(ctnio, ctnio->kern_data_ptr,
421 		    ctnio->kern_data_len));
422 
423 	sgl = (struct ctl_sg_entry *)ctnio->kern_data_ptr;
424 	m0 = m_get_ref_data(ctnio, sgl[0].addr, sgl[0].len);
425 	m = m0;
426 	for (u_int i = 1; i < ctnio->kern_sg_entries; i++) {
427 		m->m_next = m_get_ref_data(ctnio, sgl[i].addr, sgl[i].len);
428 		m = m->m_next;
429 	}
430 	return (m0);
431 }
432 
433 static void
434 nvmft_datamove_in(struct ctl_nvmeio *ctnio, struct nvmft_qpair *qp,
435     struct nvmf_capsule *nc)
436 {
437 	struct mbuf *m;
438 	u_int status;
439 
440 	if (ctnio->kern_data_ref != NULL)
441 		m = nvmft_ref_data(ctnio);
442 	else
443 		m = nvmft_copy_data(ctnio);
444 	status = nvmf_send_controller_data(nc, ctnio->kern_rel_offset, m,
445 	    ctnio->kern_data_len);
446 	switch (status) {
447 	case NVMF_SUCCESS_SENT:
448 		ctnio->success_sent = true;
449 		nvmft_command_completed(qp, nc);
450 		/* FALLTHROUGH */
451 	case NVMF_MORE:
452 	case NVME_SC_SUCCESS:
453 		break;
454 	default:
455 		ctl_nvme_set_generic_error(ctnio, status);
456 		break;
457 	}
458 	ctl_datamove_done((union ctl_io *)ctnio, true);
459 }
460 
461 static void
462 nvmft_datamove(union ctl_io *io)
463 {
464 	struct nvmf_capsule *nc;
465 	struct nvmft_qpair *qp;
466 
467 	/* Some CTL commands preemptively set a success status. */
468 	MPASS(io->io_hdr.status == CTL_STATUS_NONE ||
469 	    io->io_hdr.status == CTL_SUCCESS);
470 	MPASS(!io->nvmeio.success_sent);
471 
472 	nc = NVMFT_NC(io);
473 	qp = NVMFT_QP(io);
474 
475 	if ((io->io_hdr.flags & CTL_FLAG_DATA_MASK) == CTL_FLAG_DATA_IN)
476 		nvmft_datamove_in(&io->nvmeio, qp, nc);
477 	else
478 		nvmft_datamove_out(&io->nvmeio, qp, nc);
479 }
480 
481 static void
482 hip_add(uint64_t pair[2], uint64_t addend)
483 {
484 	uint64_t old, new;
485 
486 	old = le64toh(pair[0]);
487 	new = old + addend;
488 	pair[0] = htole64(new);
489 	if (new < old)
490 		pair[1] += htole64(1);
491 }
492 
493 static void
494 nvmft_done(union ctl_io *io)
495 {
496 	struct nvmft_controller *ctrlr;
497 	const struct nvme_command *cmd;
498 	struct nvmft_qpair *qp;
499 	struct nvmf_capsule *nc;
500 	size_t len;
501 
502 	KASSERT(io->io_hdr.status == CTL_SUCCESS ||
503 	    io->io_hdr.status == CTL_NVME_ERROR,
504 	    ("%s: bad status %u", __func__, io->io_hdr.status));
505 
506 	nc = NVMFT_NC(io);
507 	qp = NVMFT_QP(io);
508 	ctrlr = nvmft_qpair_ctrlr(qp);
509 
510 	if (nc == NULL) {
511 		/* Completion of nvmft_terminate_commands. */
512 		goto end;
513 	}
514 
515 	cmd = nvmf_capsule_sqe(nc);
516 
517 	if (io->io_hdr.status == CTL_SUCCESS)
518 		len = nvmf_capsule_data_len(nc) / 512;
519 	else
520 		len = 0;
521 	switch (cmd->opc) {
522 	case NVME_OPC_WRITE:
523 		mtx_lock(&ctrlr->lock);
524 		hip_add(ctrlr->hip.host_write_commands, 1);
525 		len += ctrlr->partial_duw;
526 		if (len > 1000)
527 			hip_add(ctrlr->hip.data_units_written, len / 1000);
528 		ctrlr->partial_duw = len % 1000;
529 		mtx_unlock(&ctrlr->lock);
530 		break;
531 	case NVME_OPC_READ:
532 	case NVME_OPC_COMPARE:
533 	case NVME_OPC_VERIFY:
534 		mtx_lock(&ctrlr->lock);
535 		if (cmd->opc != NVME_OPC_VERIFY)
536 			hip_add(ctrlr->hip.host_read_commands, 1);
537 		len += ctrlr->partial_dur;
538 		if (len > 1000)
539 			hip_add(ctrlr->hip.data_units_read, len / 1000);
540 		ctrlr->partial_dur = len % 1000;
541 		mtx_unlock(&ctrlr->lock);
542 		break;
543 	}
544 
545 	if (io->nvmeio.success_sent) {
546 		MPASS(io->io_hdr.status == CTL_SUCCESS);
547 	} else {
548 		io->nvmeio.cpl.cid = cmd->cid;
549 		nvmft_send_response(qp, &io->nvmeio.cpl);
550 	}
551 	nvmf_free_capsule(nc);
552 end:
553 	ctl_free_io(io);
554 	mtx_lock(&ctrlr->lock);
555 	ctrlr->pending_commands--;
556 	if (ctrlr->pending_commands == 0)
557 		ctrlr->busy_total += sbinuptime() - ctrlr->start_busy;
558 	mtx_unlock(&ctrlr->lock);
559 }
560 
561 static int
562 nvmft_init(void)
563 {
564 	TAILQ_INIT(&nvmft_ports);
565 	sx_init(&nvmft_ports_lock, "nvmft ports");
566 	return (0);
567 }
568 
569 void
570 nvmft_port_free(struct nvmft_port *np)
571 {
572 	KASSERT(TAILQ_EMPTY(&np->controllers),
573 	    ("%s(%p): active controllers", __func__, np));
574 
575 	if (np->port.targ_port != -1) {
576 		if (ctl_port_deregister(&np->port) != 0)
577 			printf("%s: ctl_port_deregister() failed\n", __func__);
578 	}
579 
580 	free(np->active_ns, M_NVMFT);
581 	clean_unrhdr(np->ids);
582 	delete_unrhdr(np->ids);
583 	sx_destroy(&np->lock);
584 	free(np, M_NVMFT);
585 }
586 
587 static struct nvmft_port *
588 nvmft_port_find(const char *subnqn)
589 {
590 	struct nvmft_port *np;
591 
592 	KASSERT(nvmf_nqn_valid(subnqn), ("%s: invalid nqn", __func__));
593 
594 	sx_assert(&nvmft_ports_lock, SA_LOCKED);
595 	TAILQ_FOREACH(np, &nvmft_ports, link) {
596 		if (strcmp(np->cdata.subnqn, subnqn) == 0)
597 			break;
598 	}
599 	return (np);
600 }
601 
602 static struct nvmft_port *
603 nvmft_port_find_by_id(int port_id)
604 {
605 	struct nvmft_port *np;
606 
607 	sx_assert(&nvmft_ports_lock, SA_LOCKED);
608 	TAILQ_FOREACH(np, &nvmft_ports, link) {
609 		if (np->port.targ_port == port_id)
610 			break;
611 	}
612 	return (np);
613 }
614 
615 /*
616  * Helper function to fetch a number stored as a string in an nv_list.
617  * Returns false if the string was not a valid number.
618  */
619 static bool
620 dnvlist_get_strnum(nvlist_t *nvl, const char *name, u_long default_value,
621 	u_long *value)
622 {
623 	const char *str;
624 	char *cp;
625 
626 	str = dnvlist_get_string(nvl, name, NULL);
627 	if (str == NULL) {
628 		*value = default_value;
629 		return (true);
630 	}
631 	if (*str == '\0')
632 		return (false);
633 	*value = strtoul(str, &cp, 0);
634 	if (*cp != '\0')
635 		return (false);
636 	return (true);
637 }
638 
639 /*
640  * NVMeoF ports support the following parameters:
641  *
642  * Mandatory:
643  *
644  * subnqn: subsystem NVMe Qualified Name
645  * portid: integer port ID from Discovery Log Page entry
646  *
647  * Optional:
648  * serial: Serial Number string
649  * max_io_qsize: Maximum number of I/O queue entries
650  * enable_timeout: Timeout for controller enable in milliseconds
651  * ioccsz: Maximum command capsule size
652  * iorcsz: Maximum response capsule size
653  * nn: Number of namespaces
654  */
655 static void
656 nvmft_port_create(struct ctl_req *req)
657 {
658 	struct nvmft_port *np;
659 	struct ctl_port *port;
660 	const char *serial, *subnqn;
661 	char serial_buf[NVME_SERIAL_NUMBER_LENGTH];
662 	u_long enable_timeout, hostid, ioccsz, iorcsz, max_io_qsize, nn, portid;
663 	int error;
664 
665 	/* Required parameters. */
666 	subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
667 	if (subnqn == NULL || !nvlist_exists_string(req->args_nvl, "portid")) {
668 		req->status = CTL_LUN_ERROR;
669 		snprintf(req->error_str, sizeof(req->error_str),
670 		    "Missing required argument");
671 		return;
672 	}
673 	if (!nvmf_nqn_valid(subnqn)) {
674 		req->status = CTL_LUN_ERROR;
675 		snprintf(req->error_str, sizeof(req->error_str),
676 		    "Invalid SubNQN");
677 		return;
678 	}
679 	if (!dnvlist_get_strnum(req->args_nvl, "portid", UINT16_MAX, &portid) ||
680 	    portid > UINT16_MAX) {
681 		req->status = CTL_LUN_ERROR;
682 		snprintf(req->error_str, sizeof(req->error_str),
683 		    "Invalid port ID");
684 		return;
685 	}
686 
687 	/* Optional parameters. */
688 	if (!dnvlist_get_strnum(req->args_nvl, "max_io_qsize",
689 	    NVMF_MAX_IO_ENTRIES, &max_io_qsize) ||
690 	    max_io_qsize < NVME_MIN_IO_ENTRIES ||
691 	    max_io_qsize > NVME_MAX_IO_ENTRIES) {
692 		req->status = CTL_LUN_ERROR;
693 		snprintf(req->error_str, sizeof(req->error_str),
694 		    "Invalid maximum I/O queue size");
695 		return;
696 	}
697 
698 	if (!dnvlist_get_strnum(req->args_nvl, "enable_timeout",
699 	    NVMF_CC_EN_TIMEOUT * 500, &enable_timeout) ||
700 	    (enable_timeout % 500) != 0 || (enable_timeout / 500) > 255) {
701 		req->status = CTL_LUN_ERROR;
702 		snprintf(req->error_str, sizeof(req->error_str),
703 		    "Invalid enable timeout");
704 		return;
705 	}
706 
707 	if (!dnvlist_get_strnum(req->args_nvl, "ioccsz", NVMF_IOCCSZ,
708 	    &ioccsz) || ioccsz < sizeof(struct nvme_command) ||
709 	    (ioccsz % 16) != 0) {
710 		req->status = CTL_LUN_ERROR;
711 		snprintf(req->error_str, sizeof(req->error_str),
712 		    "Invalid Command Capsule size");
713 		return;
714 	}
715 
716 	if (!dnvlist_get_strnum(req->args_nvl, "iorcsz", NVMF_IORCSZ,
717 	    &iorcsz) || iorcsz < sizeof(struct nvme_completion) ||
718 	    (iorcsz % 16) != 0) {
719 		req->status = CTL_LUN_ERROR;
720 		snprintf(req->error_str, sizeof(req->error_str),
721 		    "Invalid Response Capsule size");
722 		return;
723 	}
724 
725 	if (!dnvlist_get_strnum(req->args_nvl, "nn", NVMF_NN, &nn) ||
726 	    nn < 1 || nn > UINT32_MAX) {
727 		req->status = CTL_LUN_ERROR;
728 		snprintf(req->error_str, sizeof(req->error_str),
729 		    "Invalid number of namespaces");
730 		return;
731 	}
732 
733 	serial = dnvlist_get_string(req->args_nvl, "serial", NULL);
734 	if (serial == NULL) {
735 		getcredhostid(curthread->td_ucred, &hostid);
736 		nvmf_controller_serial(serial_buf, sizeof(serial_buf), hostid);
737 		serial = serial_buf;
738 	}
739 
740 	sx_xlock(&nvmft_ports_lock);
741 
742 	np = nvmft_port_find(subnqn);
743 	if (np != NULL) {
744 		req->status = CTL_LUN_ERROR;
745 		snprintf(req->error_str, sizeof(req->error_str),
746 		    "SubNQN \"%s\" already exists", subnqn);
747 		sx_xunlock(&nvmft_ports_lock);
748 		return;
749 	}
750 
751 	np = malloc(sizeof(*np), M_NVMFT, M_WAITOK | M_ZERO);
752 	refcount_init(&np->refs, 1);
753 	np->max_io_qsize = max_io_qsize;
754 	np->cap = _nvmf_controller_cap(max_io_qsize, enable_timeout / 500);
755 	sx_init(&np->lock, "nvmft port");
756 	np->ids = new_unrhdr(0, MIN(CTL_MAX_INIT_PER_PORT - 1,
757 	    NVMF_CNTLID_STATIC_MAX), UNR_NO_MTX);
758 	TAILQ_INIT(&np->controllers);
759 
760 	/* The controller ID is set later for individual controllers. */
761 	_nvmf_init_io_controller_data(0, max_io_qsize, serial, ostype,
762 	    osrelease, subnqn, nn, ioccsz, iorcsz, &np->cdata);
763 	np->cdata.aerl = NVMFT_NUM_AER - 1;
764 	np->cdata.oaes = htole32(NVME_ASYNC_EVENT_NS_ATTRIBUTE);
765 	np->cdata.oncs = htole16(NVMEF(NVME_CTRLR_DATA_ONCS_VERIFY, 1) |
766 	    NVMEF(NVME_CTRLR_DATA_ONCS_WRZERO, 1) |
767 	    NVMEF(NVME_CTRLR_DATA_ONCS_DSM, 1) |
768 	    NVMEF(NVME_CTRLR_DATA_ONCS_COMPARE, 1));
769 	np->cdata.fuses = NVMEF(NVME_CTRLR_DATA_FUSES_CNW, 1);
770 
771 	np->fp.afi = NVMEF(NVME_FIRMWARE_PAGE_AFI_SLOT, 1);
772 	memcpy(np->fp.revision[0], np->cdata.fr, sizeof(np->cdata.fr));
773 
774 	port = &np->port;
775 
776 	port->frontend = &nvmft_frontend;
777 	port->port_type = CTL_PORT_NVMF;
778 	port->num_requested_ctl_io = max_io_qsize;
779 	port->port_name = "nvmf";
780 	port->physical_port = portid;
781 	port->virtual_port = 0;
782 	port->port_online = nvmft_online;
783 	port->port_offline = nvmft_offline;
784 	port->onoff_arg = np;
785 	port->lun_enable = nvmft_lun_enable;
786 	port->lun_disable = nvmft_lun_disable;
787 	port->targ_lun_arg = np;
788 	port->fe_datamove = nvmft_datamove;
789 	port->fe_done = nvmft_done;
790 	port->targ_port = -1;
791 	port->options = nvlist_clone(req->args_nvl);
792 
793 	error = ctl_port_register(port);
794 	if (error != 0) {
795 		sx_xunlock(&nvmft_ports_lock);
796 		nvlist_destroy(port->options);
797 		nvmft_port_rele(np);
798 		req->status = CTL_LUN_ERROR;
799 		snprintf(req->error_str, sizeof(req->error_str),
800 		    "Failed to register CTL port with error %d", error);
801 		return;
802 	}
803 
804 	TAILQ_INSERT_TAIL(&nvmft_ports, np, link);
805 	sx_xunlock(&nvmft_ports_lock);
806 
807 	req->status = CTL_LUN_OK;
808 	req->result_nvl = nvlist_create(0);
809 	nvlist_add_number(req->result_nvl, "port_id", port->targ_port);
810 }
811 
812 static void
813 nvmft_port_remove(struct ctl_req *req)
814 {
815 	struct nvmft_port *np;
816 	const char *subnqn;
817 	u_long port_id;
818 
819 	/*
820 	 * ctladm port -r just provides the port_id, so permit looking
821 	 * up a port either by "subnqn" or "port_id".
822 	 */
823 	port_id = ULONG_MAX;
824 	subnqn = dnvlist_get_string(req->args_nvl, "subnqn", NULL);
825 	if (subnqn == NULL) {
826 		if (!nvlist_exists_string(req->args_nvl, "port_id")) {
827 			req->status = CTL_LUN_ERROR;
828 			snprintf(req->error_str, sizeof(req->error_str),
829 			    "Missing required argument");
830 			return;
831 		}
832 		if (!dnvlist_get_strnum(req->args_nvl, "port_id", ULONG_MAX,
833 		    &port_id)) {
834 			req->status = CTL_LUN_ERROR;
835 			snprintf(req->error_str, sizeof(req->error_str),
836 			    "Invalid CTL port ID");
837 			return;
838 		}
839 	} else {
840 		if (nvlist_exists_string(req->args_nvl, "port_id")) {
841 			req->status = CTL_LUN_ERROR;
842 			snprintf(req->error_str, sizeof(req->error_str),
843 			    "Ambiguous port removal request");
844 			return;
845 		}
846 	}
847 
848 	sx_xlock(&nvmft_ports_lock);
849 
850 	if (subnqn != NULL) {
851 		np = nvmft_port_find(subnqn);
852 		if (np == NULL) {
853 			req->status = CTL_LUN_ERROR;
854 			snprintf(req->error_str, sizeof(req->error_str),
855 			    "SubNQN \"%s\" does not exist", subnqn);
856 			sx_xunlock(&nvmft_ports_lock);
857 			return;
858 		}
859 	} else {
860 		np = nvmft_port_find_by_id(port_id);
861 		if (np == NULL) {
862 			req->status = CTL_LUN_ERROR;
863 			snprintf(req->error_str, sizeof(req->error_str),
864 			    "CTL port %lu is not a NVMF port", port_id);
865 			sx_xunlock(&nvmft_ports_lock);
866 			return;
867 		}
868 	}
869 
870 	TAILQ_REMOVE(&nvmft_ports, np, link);
871 	sx_xunlock(&nvmft_ports_lock);
872 
873 	ctl_port_offline(&np->port);
874 	nvmft_port_rele(np);
875 	req->status = CTL_LUN_OK;
876 }
877 
878 static void
879 nvmft_handoff(struct ctl_nvmf *cn)
880 {
881 	struct nvmf_fabric_connect_cmd cmd;
882 	struct nvmf_handoff_controller_qpair *handoff;
883 	struct nvmf_fabric_connect_data *data;
884 	struct nvmft_port *np;
885 	int error;
886 
887 	np = NULL;
888 	data = NULL;
889 	handoff = &cn->data.handoff;
890 	error = copyin(handoff->cmd, &cmd, sizeof(cmd));
891 	if (error != 0) {
892 		cn->status = CTL_NVMF_ERROR;
893 		snprintf(cn->error_str, sizeof(cn->error_str),
894 		    "Failed to copyin CONNECT SQE");
895 		return;
896 	}
897 
898 	data = malloc(sizeof(*data), M_NVMFT, M_WAITOK);
899 	error = copyin(handoff->data, data, sizeof(*data));
900 	if (error != 0) {
901 		cn->status = CTL_NVMF_ERROR;
902 		snprintf(cn->error_str, sizeof(cn->error_str),
903 		    "Failed to copyin CONNECT data");
904 		goto out;
905 	}
906 
907 	if (!nvmf_nqn_valid(data->subnqn)) {
908 		cn->status = CTL_NVMF_ERROR;
909 		snprintf(cn->error_str, sizeof(cn->error_str),
910 		    "Invalid SubNQN");
911 		goto out;
912 	}
913 
914 	sx_slock(&nvmft_ports_lock);
915 	np = nvmft_port_find(data->subnqn);
916 	if (np == NULL) {
917 		sx_sunlock(&nvmft_ports_lock);
918 		cn->status = CTL_NVMF_ERROR;
919 		snprintf(cn->error_str, sizeof(cn->error_str),
920 		    "Unknown SubNQN");
921 		goto out;
922 	}
923 	if (!np->online) {
924 		sx_sunlock(&nvmft_ports_lock);
925 		cn->status = CTL_NVMF_ERROR;
926 		snprintf(cn->error_str, sizeof(cn->error_str),
927 		    "CTL port offline");
928 		np = NULL;
929 		goto out;
930 	}
931 	nvmft_port_ref(np);
932 	sx_sunlock(&nvmft_ports_lock);
933 
934 	if (handoff->params.admin) {
935 		error = nvmft_handoff_admin_queue(np, handoff, &cmd, data);
936 		if (error != 0) {
937 			cn->status = CTL_NVMF_ERROR;
938 			snprintf(cn->error_str, sizeof(cn->error_str),
939 			    "Failed to handoff admin queue: %d", error);
940 			goto out;
941 		}
942 	} else {
943 		error = nvmft_handoff_io_queue(np, handoff, &cmd, data);
944 		if (error != 0) {
945 			cn->status = CTL_NVMF_ERROR;
946 			snprintf(cn->error_str, sizeof(cn->error_str),
947 			    "Failed to handoff admin queue: %d", error);
948 			goto out;
949 		}
950 	}
951 
952 	cn->status = CTL_NVMF_OK;
953 out:
954 	if (np != NULL)
955 		nvmft_port_rele(np);
956 	free(data, M_NVMFT);
957 }
958 
959 static void
960 nvmft_list(struct ctl_nvmf *cn)
961 {
962 	struct ctl_nvmf_list_params *lp;
963 	struct nvmft_controller *ctrlr;
964 	struct nvmft_port *np;
965 	struct sbuf *sb;
966 	int error;
967 
968 	lp = &cn->data.list;
969 
970 	sb = sbuf_new(NULL, NULL, lp->alloc_len, SBUF_FIXEDLEN |
971 	    SBUF_INCLUDENUL);
972 	if (sb == NULL) {
973 		cn->status = CTL_NVMF_ERROR;
974 		snprintf(cn->error_str, sizeof(cn->error_str),
975 		    "Failed to allocate NVMeoF session list");
976 		return;
977 	}
978 
979 	sbuf_printf(sb, "<ctlnvmflist>\n");
980 	sx_slock(&nvmft_ports_lock);
981 	TAILQ_FOREACH(np, &nvmft_ports, link) {
982 		sx_slock(&np->lock);
983 		TAILQ_FOREACH(ctrlr, &np->controllers, link) {
984 			sbuf_printf(sb, "<connection id=\"%d\">"
985 			    "<hostnqn>%s</hostnqn>"
986 			    "<subnqn>%s</subnqn>"
987 			    "<trtype>%u</trtype>"
988 			    "</connection>\n",
989 			    ctrlr->cntlid,
990 			    ctrlr->hostnqn,
991 			    np->cdata.subnqn,
992 			    ctrlr->trtype);
993 		}
994 		sx_sunlock(&np->lock);
995 	}
996 	sx_sunlock(&nvmft_ports_lock);
997 	sbuf_printf(sb, "</ctlnvmflist>\n");
998 	if (sbuf_finish(sb) != 0) {
999 		sbuf_delete(sb);
1000 		cn->status = CTL_NVMF_LIST_NEED_MORE_SPACE;
1001 		snprintf(cn->error_str, sizeof(cn->error_str),
1002 		    "Out of space, %d bytes is too small", lp->alloc_len);
1003 		return;
1004 	}
1005 
1006 	error = copyout(sbuf_data(sb), lp->conn_xml, sbuf_len(sb));
1007 	if (error != 0) {
1008 		sbuf_delete(sb);
1009 		cn->status = CTL_NVMF_ERROR;
1010 		snprintf(cn->error_str, sizeof(cn->error_str),
1011 		    "Failed to copyout session list: %d", error);
1012 		return;
1013 	}
1014 	lp->fill_len = sbuf_len(sb);
1015 	cn->status = CTL_NVMF_OK;
1016 	sbuf_delete(sb);
1017 }
1018 
1019 static void
1020 nvmft_terminate(struct ctl_nvmf *cn)
1021 {
1022 	struct ctl_nvmf_terminate_params *tp;
1023 	struct nvmft_controller *ctrlr;
1024 	struct nvmft_port *np;
1025 	bool found, match;
1026 
1027 	tp = &cn->data.terminate;
1028 
1029 	found = false;
1030 	sx_slock(&nvmft_ports_lock);
1031 	TAILQ_FOREACH(np, &nvmft_ports, link) {
1032 		sx_slock(&np->lock);
1033 		TAILQ_FOREACH(ctrlr, &np->controllers, link) {
1034 			if (tp->all != 0)
1035 				match = true;
1036 			else if (tp->cntlid != -1)
1037 				match = tp->cntlid == ctrlr->cntlid;
1038 			else if (tp->hostnqn[0] != '\0')
1039 				match = strncmp(tp->hostnqn, ctrlr->hostnqn,
1040 				    sizeof(tp->hostnqn)) == 0;
1041 			else
1042 				match = false;
1043 			if (!match)
1044 				continue;
1045 			nvmft_printf(ctrlr,
1046 			    "disconnecting due to administrative request\n");
1047 			nvmft_controller_error(ctrlr, NULL, ECONNABORTED);
1048 			found = true;
1049 		}
1050 		sx_sunlock(&np->lock);
1051 	}
1052 	sx_sunlock(&nvmft_ports_lock);
1053 
1054 	if (!found) {
1055 		cn->status = CTL_NVMF_ASSOCIATION_NOT_FOUND;
1056 		snprintf(cn->error_str, sizeof(cn->error_str),
1057 		    "No matching associations found");
1058 		return;
1059 	}
1060 	cn->status = CTL_NVMF_OK;
1061 }
1062 
1063 static int
1064 nvmft_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int flag,
1065     struct thread *td)
1066 {
1067 	struct ctl_nvmf *cn;
1068 	struct ctl_req *req;
1069 
1070 	switch (cmd) {
1071 	case CTL_PORT_REQ:
1072 		req = (struct ctl_req *)data;
1073 		switch (req->reqtype) {
1074 		case CTL_REQ_CREATE:
1075 			nvmft_port_create(req);
1076 			break;
1077 		case CTL_REQ_REMOVE:
1078 			nvmft_port_remove(req);
1079 			break;
1080 		default:
1081 			req->status = CTL_LUN_ERROR;
1082 			snprintf(req->error_str, sizeof(req->error_str),
1083 			    "Unsupported request type %d", req->reqtype);
1084 			break;
1085 		}
1086 		return (0);
1087 	case CTL_NVMF:
1088 		cn = (struct ctl_nvmf *)data;
1089 		switch (cn->type) {
1090 		case CTL_NVMF_HANDOFF:
1091 			nvmft_handoff(cn);
1092 			break;
1093 		case CTL_NVMF_LIST:
1094 			nvmft_list(cn);
1095 			break;
1096 		case CTL_NVMF_TERMINATE:
1097 			nvmft_terminate(cn);
1098 			break;
1099 		default:
1100 			cn->status = CTL_NVMF_ERROR;
1101 			snprintf(cn->error_str, sizeof(cn->error_str),
1102 			    "Invalid NVMeoF request type %d", cn->type);
1103 			break;
1104 		}
1105 		return (0);
1106 	default:
1107 		return (ENOTTY);
1108 	}
1109 }
1110 
1111 static int
1112 nvmft_shutdown(void)
1113 {
1114 	/* TODO: Need to check for active controllers. */
1115 	if (!TAILQ_EMPTY(&nvmft_ports))
1116 		return (EBUSY);
1117 
1118 	sx_destroy(&nvmft_ports_lock);
1119 	return (0);
1120 }
1121 
1122 CTL_FRONTEND_DECLARE(nvmft, nvmft_frontend);
1123 MODULE_DEPEND(nvmft, nvmf_transport, 1, 1, 1);
1124