xref: /freebsd/sys/dev/nvmf/host/nvmf.c (revision 5c59cec2d5e141de54bbc737477a8e498685186a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2023-2024 Chelsio Communications, Inc.
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  */
7 
8 #include <sys/param.h>
9 #include <sys/bus.h>
10 #include <sys/conf.h>
11 #include <sys/dnv.h>
12 #include <sys/eventhandler.h>
13 #include <sys/lock.h>
14 #include <sys/kernel.h>
15 #include <sys/malloc.h>
16 #include <sys/memdesc.h>
17 #include <sys/module.h>
18 #include <sys/mutex.h>
19 #include <sys/nv.h>
20 #include <sys/reboot.h>
21 #include <sys/sx.h>
22 #include <sys/sysctl.h>
23 #include <sys/taskqueue.h>
24 #include <dev/nvme/nvme.h>
25 #include <dev/nvmf/nvmf.h>
26 #include <dev/nvmf/nvmf_transport.h>
27 #include <dev/nvmf/host/nvmf_var.h>
28 
29 static struct cdevsw nvmf_cdevsw;
30 static struct taskqueue *nvmf_tq;
31 
32 bool nvmf_fail_disconnect = false;
33 SYSCTL_BOOL(_kern_nvmf, OID_AUTO, fail_on_disconnection, CTLFLAG_RWTUN,
34     &nvmf_fail_disconnect, 0, "Fail I/O requests on connection failure");
35 
36 MALLOC_DEFINE(M_NVMF, "nvmf", "NVMe over Fabrics host");
37 
38 static void	nvmf_controller_loss_task(void *arg, int pending);
39 static void	nvmf_disconnect_task(void *arg, int pending);
40 static void	nvmf_request_reconnect(struct nvmf_softc *sc);
41 static void	nvmf_request_reconnect_task(void *arg, int pending);
42 static void	nvmf_shutdown_pre_sync(void *arg, int howto);
43 static void	nvmf_shutdown_post_sync(void *arg, int howto);
44 
45 void
nvmf_complete(void * arg,const struct nvme_completion * cqe)46 nvmf_complete(void *arg, const struct nvme_completion *cqe)
47 {
48 	struct nvmf_completion_status *status = arg;
49 	struct mtx *mtx;
50 
51 	status->cqe = *cqe;
52 	mtx = mtx_pool_find(mtxpool_sleep, status);
53 	mtx_lock(mtx);
54 	status->done = true;
55 	mtx_unlock(mtx);
56 	wakeup(status);
57 }
58 
59 void
nvmf_io_complete(void * arg,size_t xfered,int error)60 nvmf_io_complete(void *arg, size_t xfered, int error)
61 {
62 	struct nvmf_completion_status *status = arg;
63 	struct mtx *mtx;
64 
65 	status->io_error = error;
66 	mtx = mtx_pool_find(mtxpool_sleep, status);
67 	mtx_lock(mtx);
68 	status->io_done = true;
69 	mtx_unlock(mtx);
70 	wakeup(status);
71 }
72 
73 void
nvmf_wait_for_reply(struct nvmf_completion_status * status)74 nvmf_wait_for_reply(struct nvmf_completion_status *status)
75 {
76 	struct mtx *mtx;
77 
78 	mtx = mtx_pool_find(mtxpool_sleep, status);
79 	mtx_lock(mtx);
80 	while (!status->done || !status->io_done)
81 		mtx_sleep(status, mtx, 0, "nvmfcmd", 0);
82 	mtx_unlock(mtx);
83 }
84 
85 static int
nvmf_read_property(struct nvmf_softc * sc,uint32_t offset,uint8_t size,uint64_t * value)86 nvmf_read_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size,
87     uint64_t *value)
88 {
89 	const struct nvmf_fabric_prop_get_rsp *rsp;
90 	struct nvmf_completion_status status;
91 
92 	nvmf_status_init(&status);
93 	if (!nvmf_cmd_get_property(sc, offset, size, nvmf_complete, &status,
94 	    M_WAITOK))
95 		return (ECONNABORTED);
96 	nvmf_wait_for_reply(&status);
97 
98 	if (status.cqe.status != 0) {
99 		device_printf(sc->dev, "PROPERTY_GET failed, status %#x\n",
100 		    le16toh(status.cqe.status));
101 		return (EIO);
102 	}
103 
104 	rsp = (const struct nvmf_fabric_prop_get_rsp *)&status.cqe;
105 	if (size == 8)
106 		*value = le64toh(rsp->value.u64);
107 	else
108 		*value = le32toh(rsp->value.u32.low);
109 	return (0);
110 }
111 
112 static int
nvmf_write_property(struct nvmf_softc * sc,uint32_t offset,uint8_t size,uint64_t value)113 nvmf_write_property(struct nvmf_softc *sc, uint32_t offset, uint8_t size,
114     uint64_t value)
115 {
116 	struct nvmf_completion_status status;
117 
118 	nvmf_status_init(&status);
119 	if (!nvmf_cmd_set_property(sc, offset, size, value, nvmf_complete, &status,
120 	    M_WAITOK))
121 		return (ECONNABORTED);
122 	nvmf_wait_for_reply(&status);
123 
124 	if (status.cqe.status != 0) {
125 		device_printf(sc->dev, "PROPERTY_SET failed, status %#x\n",
126 		    le16toh(status.cqe.status));
127 		return (EIO);
128 	}
129 	return (0);
130 }
131 
132 static void
nvmf_shutdown_controller(struct nvmf_softc * sc)133 nvmf_shutdown_controller(struct nvmf_softc *sc)
134 {
135 	uint64_t cc;
136 	int error;
137 
138 	error = nvmf_read_property(sc, NVMF_PROP_CC, 4, &cc);
139 	if (error != 0) {
140 		device_printf(sc->dev, "Failed to fetch CC for shutdown\n");
141 		return;
142 	}
143 
144 	cc |= NVMEF(NVME_CC_REG_SHN, NVME_SHN_NORMAL);
145 
146 	error = nvmf_write_property(sc, NVMF_PROP_CC, 4, cc);
147 	if (error != 0)
148 		device_printf(sc->dev,
149 		    "Failed to set CC to trigger shutdown\n");
150 }
151 
152 static void
nvmf_check_keep_alive(void * arg)153 nvmf_check_keep_alive(void *arg)
154 {
155 	struct nvmf_softc *sc = arg;
156 	int traffic;
157 
158 	traffic = atomic_readandclear_int(&sc->ka_active_rx_traffic);
159 	if (traffic == 0) {
160 		device_printf(sc->dev,
161 		    "disconnecting due to KeepAlive timeout\n");
162 		nvmf_disconnect(sc);
163 		return;
164 	}
165 
166 	callout_schedule_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0, C_HARDCLOCK);
167 }
168 
169 static void
nvmf_keep_alive_complete(void * arg,const struct nvme_completion * cqe)170 nvmf_keep_alive_complete(void *arg, const struct nvme_completion *cqe)
171 {
172 	struct nvmf_softc *sc = arg;
173 
174 	atomic_store_int(&sc->ka_active_rx_traffic, 1);
175 	if (cqe->status != 0) {
176 		device_printf(sc->dev,
177 		    "KeepAlive response reported status %#x\n",
178 		    le16toh(cqe->status));
179 	}
180 }
181 
182 static void
nvmf_send_keep_alive(void * arg)183 nvmf_send_keep_alive(void *arg)
184 {
185 	struct nvmf_softc *sc = arg;
186 	int traffic;
187 
188 	/*
189 	 * Don't bother sending a KeepAlive command if TKAS is active
190 	 * and another command has been sent during the interval.
191 	 */
192 	traffic = atomic_load_int(&sc->ka_active_tx_traffic);
193 	if (traffic == 0 && !nvmf_cmd_keep_alive(sc, nvmf_keep_alive_complete,
194 	    sc, M_NOWAIT))
195 		device_printf(sc->dev,
196 		    "Failed to allocate KeepAlive command\n");
197 
198 	/* Clear ka_active_tx_traffic after sending the keep alive command. */
199 	atomic_store_int(&sc->ka_active_tx_traffic, 0);
200 
201 	callout_schedule_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0, C_HARDCLOCK);
202 }
203 
204 int
nvmf_copyin_handoff(const struct nvmf_ioc_nv * nv,nvlist_t ** nvlp)205 nvmf_copyin_handoff(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp)
206 {
207 	const struct nvme_discovery_log_entry *dle;
208 	const struct nvme_controller_data *cdata;
209 	const nvlist_t *const *io;
210 	const nvlist_t *admin, *rparams;
211 	nvlist_t *nvl;
212 	size_t i, num_io_queues;
213 	uint32_t qsize;
214 	int error;
215 
216 	error = nvmf_unpack_ioc_nvlist(nv, &nvl);
217 	if (error != 0)
218 		return (error);
219 
220 	if (!nvlist_exists_number(nvl, "trtype") ||
221 	    !nvlist_exists_nvlist(nvl, "admin") ||
222 	    !nvlist_exists_nvlist_array(nvl, "io") ||
223 	    !nvlist_exists_binary(nvl, "cdata") ||
224 	    !nvlist_exists_nvlist(nvl, "rparams"))
225 		goto invalid;
226 
227 	rparams = nvlist_get_nvlist(nvl, "rparams");
228 	if (!nvlist_exists_binary(rparams, "dle") ||
229 	    !nvlist_exists_string(rparams, "hostnqn") ||
230 	    !nvlist_exists_number(rparams, "num_io_queues") ||
231 	    !nvlist_exists_number(rparams, "io_qsize"))
232 		goto invalid;
233 
234 	admin = nvlist_get_nvlist(nvl, "admin");
235 	if (!nvmf_validate_qpair_nvlist(admin, false))
236 		goto invalid;
237 	if (!nvlist_get_bool(admin, "admin"))
238 		goto invalid;
239 
240 	io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
241 	if (num_io_queues < 1 ||
242 	    num_io_queues != nvlist_get_number(rparams, "num_io_queues"))
243 		goto invalid;
244 	for (i = 0; i < num_io_queues; i++) {
245 		if (!nvmf_validate_qpair_nvlist(io[i], false))
246 			goto invalid;
247 	}
248 
249 	/* Require all I/O queues to be the same size. */
250 	qsize = nvlist_get_number(rparams, "io_qsize");
251 	for (i = 0; i < num_io_queues; i++) {
252 		if (nvlist_get_number(io[i], "qsize") != qsize)
253 			goto invalid;
254 	}
255 
256 	cdata = nvlist_get_binary(nvl, "cdata", &i);
257 	if (i != sizeof(*cdata))
258 		goto invalid;
259 	dle = nvlist_get_binary(rparams, "dle", &i);
260 	if (i != sizeof(*dle))
261 		goto invalid;
262 
263 	if (memcmp(dle->subnqn, cdata->subnqn, sizeof(cdata->subnqn)) != 0)
264 		goto invalid;
265 
266 	*nvlp = nvl;
267 	return (0);
268 invalid:
269 	nvlist_destroy(nvl);
270 	return (EINVAL);
271 }
272 
273 static int
nvmf_probe(device_t dev)274 nvmf_probe(device_t dev)
275 {
276 	const nvlist_t *nvl = device_get_ivars(dev);
277 	const struct nvme_controller_data *cdata;
278 
279 	if (nvl == NULL)
280 		return (ENXIO);
281 
282 	cdata = nvlist_get_binary(nvl, "cdata", NULL);
283 	device_set_descf(dev, "Fabrics: %.256s", cdata->subnqn);
284 	return (BUS_PROBE_DEFAULT);
285 }
286 
287 static int
nvmf_establish_connection(struct nvmf_softc * sc,nvlist_t * nvl)288 nvmf_establish_connection(struct nvmf_softc *sc, nvlist_t *nvl)
289 {
290 	const nvlist_t *const *io;
291 	const nvlist_t *admin;
292 	uint64_t kato;
293 	size_t num_io_queues;
294 	enum nvmf_trtype trtype;
295 	char name[16];
296 
297 	trtype = nvlist_get_number(nvl, "trtype");
298 	admin = nvlist_get_nvlist(nvl, "admin");
299 	io = nvlist_get_nvlist_array(nvl, "io", &num_io_queues);
300 	kato = dnvlist_get_number(nvl, "kato", 0);
301 	sc->reconnect_delay = dnvlist_get_number(nvl, "reconnect_delay", 0);
302 	sc->controller_loss_timeout = dnvlist_get_number(nvl,
303 	    "controller_loss_timeout", 0);
304 
305 	/* Setup the admin queue. */
306 	sc->admin = nvmf_init_qp(sc, trtype, admin, "admin queue", 0);
307 	if (sc->admin == NULL) {
308 		device_printf(sc->dev, "Failed to setup admin queue\n");
309 		return (ENXIO);
310 	}
311 
312 	/* Setup I/O queues. */
313 	sc->io = malloc(num_io_queues * sizeof(*sc->io), M_NVMF,
314 	    M_WAITOK | M_ZERO);
315 	sc->num_io_queues = num_io_queues;
316 	for (u_int i = 0; i < sc->num_io_queues; i++) {
317 		snprintf(name, sizeof(name), "I/O queue %u", i);
318 		sc->io[i] = nvmf_init_qp(sc, trtype, io[i], name, i);
319 		if (sc->io[i] == NULL) {
320 			device_printf(sc->dev, "Failed to setup I/O queue %u\n",
321 			    i);
322 			return (ENXIO);
323 		}
324 	}
325 
326 	/* Start KeepAlive timers. */
327 	if (kato != 0) {
328 		sc->ka_traffic = NVMEV(NVME_CTRLR_DATA_CTRATT_TBKAS,
329 		    sc->cdata->ctratt) != 0;
330 		sc->ka_rx_sbt = mstosbt(kato);
331 		sc->ka_tx_sbt = sc->ka_rx_sbt / 2;
332 		callout_reset_sbt(&sc->ka_rx_timer, sc->ka_rx_sbt, 0,
333 		    nvmf_check_keep_alive, sc, C_HARDCLOCK);
334 		callout_reset_sbt(&sc->ka_tx_timer, sc->ka_tx_sbt, 0,
335 		    nvmf_send_keep_alive, sc, C_HARDCLOCK);
336 	}
337 
338 	memcpy(sc->cdata, nvlist_get_binary(nvl, "cdata", NULL),
339 	    sizeof(*sc->cdata));
340 
341 	/* Save reconnect parameters. */
342 	nvlist_destroy(sc->rparams);
343 	sc->rparams = nvlist_take_nvlist(nvl, "rparams");
344 
345 	return (0);
346 }
347 
348 typedef bool nvmf_scan_active_ns_cb(struct nvmf_softc *, uint32_t,
349     const struct nvme_namespace_data *, void *);
350 
351 static bool
nvmf_scan_active_nslist(struct nvmf_softc * sc,struct nvme_ns_list * nslist,struct nvme_namespace_data * data,uint32_t * nsidp,nvmf_scan_active_ns_cb * cb,void * cb_arg)352 nvmf_scan_active_nslist(struct nvmf_softc *sc, struct nvme_ns_list *nslist,
353     struct nvme_namespace_data *data, uint32_t *nsidp,
354     nvmf_scan_active_ns_cb *cb, void *cb_arg)
355 {
356 	struct nvmf_completion_status status;
357 	uint32_t nsid;
358 
359 	nvmf_status_init(&status);
360 	nvmf_status_wait_io(&status);
361 	if (!nvmf_cmd_identify_active_namespaces(sc, *nsidp, nslist,
362 	    nvmf_complete, &status, nvmf_io_complete, &status, M_WAITOK)) {
363 		device_printf(sc->dev,
364 		    "failed to send IDENTIFY active namespaces command\n");
365 		return (false);
366 	}
367 	nvmf_wait_for_reply(&status);
368 
369 	if (status.cqe.status != 0) {
370 		device_printf(sc->dev,
371 		    "IDENTIFY active namespaces failed, status %#x\n",
372 		    le16toh(status.cqe.status));
373 		return (false);
374 	}
375 
376 	if (status.io_error != 0) {
377 		device_printf(sc->dev,
378 		    "IDENTIFY active namespaces failed with I/O error %d\n",
379 		    status.io_error);
380 		return (false);
381 	}
382 
383 	for (u_int i = 0; i < nitems(nslist->ns); i++) {
384 		nsid = nslist->ns[i];
385 		if (nsid == 0) {
386 			*nsidp = 0;
387 			return (true);
388 		}
389 
390 		nvmf_status_init(&status);
391 		nvmf_status_wait_io(&status);
392 		if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete,
393 		    &status, nvmf_io_complete, &status, M_WAITOK)) {
394 			device_printf(sc->dev,
395 			    "failed to send IDENTIFY namespace %u command\n",
396 			    nsid);
397 			return (false);
398 		}
399 		nvmf_wait_for_reply(&status);
400 
401 		if (status.cqe.status != 0) {
402 			device_printf(sc->dev,
403 			    "IDENTIFY namespace %u failed, status %#x\n", nsid,
404 			    le16toh(status.cqe.status));
405 			return (false);
406 		}
407 
408 		if (status.io_error != 0) {
409 			device_printf(sc->dev,
410 			    "IDENTIFY namespace %u failed with I/O error %d\n",
411 			    nsid, status.io_error);
412 			return (false);
413 		}
414 
415 		nvme_namespace_data_swapbytes(data);
416 		if (!cb(sc, nsid, data, cb_arg))
417 			return (false);
418 	}
419 
420 	MPASS(nsid == nslist->ns[nitems(nslist->ns) - 1] && nsid != 0);
421 
422 	if (nsid >= NVME_GLOBAL_NAMESPACE_TAG - 1)
423 		*nsidp = 0;
424 	else
425 		*nsidp = nsid;
426 	return (true);
427 }
428 
429 static bool
nvmf_scan_active_namespaces(struct nvmf_softc * sc,nvmf_scan_active_ns_cb * cb,void * cb_arg)430 nvmf_scan_active_namespaces(struct nvmf_softc *sc, nvmf_scan_active_ns_cb *cb,
431     void *cb_arg)
432 {
433 	struct nvme_namespace_data *data;
434 	struct nvme_ns_list *nslist;
435 	uint32_t nsid;
436 	bool retval;
437 
438 	nslist = malloc(sizeof(*nslist), M_NVMF, M_WAITOK);
439 	data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
440 
441 	nsid = 0;
442 	retval = true;
443 	for (;;) {
444 		if (!nvmf_scan_active_nslist(sc, nslist, data, &nsid, cb,
445 		    cb_arg)) {
446 			retval = false;
447 			break;
448 		}
449 		if (nsid == 0)
450 			break;
451 	}
452 
453 	free(data, M_NVMF);
454 	free(nslist, M_NVMF);
455 	return (retval);
456 }
457 
458 static bool
nvmf_add_ns(struct nvmf_softc * sc,uint32_t nsid,const struct nvme_namespace_data * data,void * arg __unused)459 nvmf_add_ns(struct nvmf_softc *sc, uint32_t nsid,
460     const struct nvme_namespace_data *data, void *arg __unused)
461 {
462 	if (sc->ns[nsid - 1] != NULL) {
463 		device_printf(sc->dev,
464 		    "duplicate namespace %u in active namespace list\n",
465 		    nsid);
466 		return (false);
467 	}
468 
469 	/*
470 	 * As in nvme_ns_construct, a size of zero indicates an
471 	 * invalid namespace.
472 	 */
473 	if (data->nsze == 0) {
474 		device_printf(sc->dev,
475 		    "ignoring active namespace %u with zero size\n", nsid);
476 		return (true);
477 	}
478 
479 	sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
480 
481 	nvmf_sim_rescan_ns(sc, nsid);
482 	return (true);
483 }
484 
485 static bool
nvmf_add_namespaces(struct nvmf_softc * sc)486 nvmf_add_namespaces(struct nvmf_softc *sc)
487 {
488 	sc->ns = mallocarray(sc->cdata->nn, sizeof(*sc->ns), M_NVMF,
489 	    M_WAITOK | M_ZERO);
490 	return (nvmf_scan_active_namespaces(sc, nvmf_add_ns, NULL));
491 }
492 
493 static int
nvmf_attach(device_t dev)494 nvmf_attach(device_t dev)
495 {
496 	struct make_dev_args mda;
497 	struct nvmf_softc *sc = device_get_softc(dev);
498 	nvlist_t *nvl = device_get_ivars(dev);
499 	const nvlist_t * const *io;
500 	struct sysctl_oid *oid;
501 	uint64_t val;
502 	u_int i;
503 	int error;
504 
505 	if (nvl == NULL)
506 		return (ENXIO);
507 
508 	sc->dev = dev;
509 	sc->trtype = nvlist_get_number(nvl, "trtype");
510 	callout_init(&sc->ka_rx_timer, 1);
511 	callout_init(&sc->ka_tx_timer, 1);
512 	sx_init(&sc->connection_lock, "nvmf connection");
513 	TASK_INIT(&sc->disconnect_task, 0, nvmf_disconnect_task, sc);
514 	TIMEOUT_TASK_INIT(nvmf_tq, &sc->controller_loss_task, 0,
515 	    nvmf_controller_loss_task, sc);
516 	TIMEOUT_TASK_INIT(nvmf_tq, &sc->request_reconnect_task, 0,
517 	    nvmf_request_reconnect_task, sc);
518 
519 	oid = SYSCTL_ADD_NODE(device_get_sysctl_ctx(dev),
520 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ioq",
521 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "I/O Queues");
522 	sc->ioq_oid_list = SYSCTL_CHILDREN(oid);
523 
524 	sc->cdata = malloc(sizeof(*sc->cdata), M_NVMF, M_WAITOK);
525 
526 	nvmf_init_aer(sc);
527 
528 	error = nvmf_establish_connection(sc, nvl);
529 	if (error != 0)
530 		goto out;
531 
532 	error = nvmf_read_property(sc, NVMF_PROP_CAP, 8, &sc->cap);
533 	if (error != 0) {
534 		device_printf(sc->dev, "Failed to fetch CAP\n");
535 		error = ENXIO;
536 		goto out;
537 	}
538 
539 	error = nvmf_read_property(sc, NVMF_PROP_VS, 4, &val);
540 	if (error != 0) {
541 		device_printf(sc->dev, "Failed to fetch VS\n");
542 		error = ENXIO;
543 		goto out;
544 	}
545 	sc->vs = val;
546 
547 	/* Honor MDTS if it is set. */
548 	sc->max_xfer_size = maxphys;
549 	if (sc->cdata->mdts != 0) {
550 		sc->max_xfer_size = ulmin(sc->max_xfer_size,
551 		    1 << (sc->cdata->mdts + NVME_MPS_SHIFT +
552 		    NVME_CAP_HI_MPSMIN(sc->cap >> 32)));
553 	}
554 
555 	io = nvlist_get_nvlist_array(nvl, "io", NULL);
556 	sc->max_pending_io = nvlist_get_number(io[0], "qsize") *
557 	    sc->num_io_queues;
558 
559 	error = nvmf_init_sim(sc);
560 	if (error != 0)
561 		goto out;
562 
563 	error = nvmf_start_aer(sc);
564 	if (error != 0) {
565 		nvmf_destroy_sim(sc);
566 		goto out;
567 	}
568 
569 	if (!nvmf_add_namespaces(sc)) {
570 		nvmf_destroy_sim(sc);
571 		goto out;
572 	}
573 
574 	make_dev_args_init(&mda);
575 	mda.mda_devsw = &nvmf_cdevsw;
576 	mda.mda_uid = UID_ROOT;
577 	mda.mda_gid = GID_WHEEL;
578 	mda.mda_mode = 0600;
579 	mda.mda_si_drv1 = sc;
580 	error = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
581 	if (error != 0) {
582 		nvmf_destroy_sim(sc);
583 		goto out;
584 	}
585 
586 	sc->shutdown_pre_sync_eh = EVENTHANDLER_REGISTER(shutdown_pre_sync,
587 	    nvmf_shutdown_pre_sync, sc, SHUTDOWN_PRI_FIRST);
588 	sc->shutdown_post_sync_eh = EVENTHANDLER_REGISTER(shutdown_post_sync,
589 	    nvmf_shutdown_post_sync, sc, SHUTDOWN_PRI_LAST);
590 
591 	return (0);
592 out:
593 	if (sc->ns != NULL) {
594 		for (i = 0; i < sc->cdata->nn; i++) {
595 			if (sc->ns[i] != NULL)
596 				nvmf_destroy_ns(sc->ns[i]);
597 		}
598 		free(sc->ns, M_NVMF);
599 	}
600 
601 	callout_drain(&sc->ka_tx_timer);
602 	callout_drain(&sc->ka_rx_timer);
603 
604 	if (sc->admin != NULL)
605 		nvmf_shutdown_controller(sc);
606 
607 	for (i = 0; i < sc->num_io_queues; i++) {
608 		if (sc->io[i] != NULL)
609 			nvmf_destroy_qp(sc->io[i]);
610 	}
611 	free(sc->io, M_NVMF);
612 	if (sc->admin != NULL)
613 		nvmf_destroy_qp(sc->admin);
614 
615 	nvmf_destroy_aer(sc);
616 
617 	taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
618 	taskqueue_drain_timeout(nvmf_tq, &sc->controller_loss_task);
619 	taskqueue_drain(nvmf_tq, &sc->disconnect_task);
620 	sx_destroy(&sc->connection_lock);
621 	nvlist_destroy(sc->rparams);
622 	free(sc->cdata, M_NVMF);
623 	return (error);
624 }
625 
626 void
nvmf_disconnect(struct nvmf_softc * sc)627 nvmf_disconnect(struct nvmf_softc *sc)
628 {
629 	taskqueue_enqueue(nvmf_tq, &sc->disconnect_task);
630 }
631 
632 static void
nvmf_disconnect_task(void * arg,int pending __unused)633 nvmf_disconnect_task(void *arg, int pending __unused)
634 {
635 	struct nvmf_softc *sc = arg;
636 	u_int i;
637 
638 	sx_xlock(&sc->connection_lock);
639 	if (sc->admin == NULL) {
640 		/*
641 		 * Ignore transport errors if there is no active
642 		 * association.
643 		 */
644 		sx_xunlock(&sc->connection_lock);
645 		return;
646 	}
647 
648 	if (sc->detaching) {
649 		if (sc->admin != NULL) {
650 			/*
651 			 * This unsticks the detach process if a
652 			 * transport error occurs during detach.
653 			 */
654 			nvmf_shutdown_qp(sc->admin);
655 		}
656 		sx_xunlock(&sc->connection_lock);
657 		return;
658 	}
659 
660 	if (sc->cdev == NULL) {
661 		/*
662 		 * Transport error occurred during attach (nvmf_add_namespaces).
663 		 * Shutdown the admin queue.
664 		 */
665 		nvmf_shutdown_qp(sc->admin);
666 		sx_xunlock(&sc->connection_lock);
667 		return;
668 	}
669 
670 	nanotime(&sc->last_disconnect);
671 	callout_drain(&sc->ka_tx_timer);
672 	callout_drain(&sc->ka_rx_timer);
673 	sc->ka_traffic = false;
674 
675 	/* Quiesce namespace consumers. */
676 	nvmf_disconnect_sim(sc);
677 	for (i = 0; i < sc->cdata->nn; i++) {
678 		if (sc->ns[i] != NULL)
679 			nvmf_disconnect_ns(sc->ns[i]);
680 	}
681 
682 	/* Shutdown the existing qpairs. */
683 	for (i = 0; i < sc->num_io_queues; i++) {
684 		nvmf_destroy_qp(sc->io[i]);
685 	}
686 	free(sc->io, M_NVMF);
687 	sc->io = NULL;
688 	sc->num_io_queues = 0;
689 	nvmf_destroy_qp(sc->admin);
690 	sc->admin = NULL;
691 
692 	if (sc->reconnect_delay != 0)
693 		nvmf_request_reconnect(sc);
694 	if (sc->controller_loss_timeout != 0)
695 		taskqueue_enqueue_timeout(nvmf_tq,
696 		    &sc->controller_loss_task, sc->controller_loss_timeout *
697 		    hz);
698 
699 	sx_xunlock(&sc->connection_lock);
700 }
701 
702 static void
nvmf_controller_loss_task(void * arg,int pending)703 nvmf_controller_loss_task(void *arg, int pending)
704 {
705 	struct nvmf_softc *sc = arg;
706 	device_t dev;
707 	int error;
708 
709 	bus_topo_lock();
710 	sx_xlock(&sc->connection_lock);
711 	if (sc->admin != NULL || sc->detaching) {
712 		/* Reconnected or already detaching. */
713 		sx_xunlock(&sc->connection_lock);
714 		bus_topo_unlock();
715 		return;
716 	}
717 
718 	sc->controller_timedout = true;
719 	sx_xunlock(&sc->connection_lock);
720 
721 	/*
722 	 * XXX: Doing this from here is a bit ugly.  We don't have an
723 	 * extra reference on `dev` but bus_topo_lock should block any
724 	 * concurrent device_delete_child invocations.
725 	 */
726 	dev = sc->dev;
727 	error = device_delete_child(root_bus, dev);
728 	if (error != 0)
729 		device_printf(dev,
730 		    "failed to detach after controller loss: %d\n", error);
731 	bus_topo_unlock();
732 }
733 
734 static void
nvmf_request_reconnect(struct nvmf_softc * sc)735 nvmf_request_reconnect(struct nvmf_softc *sc)
736 {
737 	char buf[64];
738 
739 	sx_assert(&sc->connection_lock, SX_LOCKED);
740 
741 	snprintf(buf, sizeof(buf), "name=\"%s\"", device_get_nameunit(sc->dev));
742 	devctl_notify("nvme", "controller", "RECONNECT", buf);
743 	taskqueue_enqueue_timeout(nvmf_tq, &sc->request_reconnect_task,
744 	    sc->reconnect_delay * hz);
745 }
746 
747 static void
nvmf_request_reconnect_task(void * arg,int pending)748 nvmf_request_reconnect_task(void *arg, int pending)
749 {
750 	struct nvmf_softc *sc = arg;
751 
752 	sx_xlock(&sc->connection_lock);
753 	if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
754 		/* Reconnected or already detaching. */
755 		sx_xunlock(&sc->connection_lock);
756 		return;
757 	}
758 
759 	nvmf_request_reconnect(sc);
760 	sx_xunlock(&sc->connection_lock);
761 }
762 
763 static int
nvmf_reconnect_host(struct nvmf_softc * sc,struct nvmf_ioc_nv * nv)764 nvmf_reconnect_host(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
765 {
766 	const struct nvme_controller_data *cdata;
767 	nvlist_t *nvl;
768 	u_int i;
769 	int error;
770 
771 	error = nvmf_copyin_handoff(nv, &nvl);
772 	if (error != 0)
773 		return (error);
774 
775 	/* XXX: Should we permit changing the transport type? */
776 	if (sc->trtype != nvlist_get_number(nvl, "trtype")) {
777 		device_printf(sc->dev,
778 		    "transport type mismatch on reconnect\n");
779 		return (EINVAL);
780 	}
781 
782 	sx_xlock(&sc->connection_lock);
783 	if (sc->admin != NULL || sc->detaching || sc->controller_timedout) {
784 		error = EBUSY;
785 		goto out;
786 	}
787 
788 	/*
789 	 * Ensure this is for the same controller.  Note that the
790 	 * controller ID can vary across associations if the remote
791 	 * system is using the dynamic controller model.  This merely
792 	 * ensures the new association is connected to the same NVMe
793 	 * subsystem.
794 	 */
795 	cdata = nvlist_get_binary(nvl, "cdata", NULL);
796 	if (memcmp(sc->cdata->subnqn, cdata->subnqn,
797 	    sizeof(cdata->subnqn)) != 0) {
798 		device_printf(sc->dev,
799 		    "controller subsystem NQN mismatch on reconnect\n");
800 		error = EINVAL;
801 		goto out;
802 	}
803 
804 	/*
805 	 * XXX: Require same number and size of I/O queues so that
806 	 * max_pending_io is still correct?
807 	 */
808 
809 	error = nvmf_establish_connection(sc, nvl);
810 	if (error != 0)
811 		goto out;
812 
813 	error = nvmf_start_aer(sc);
814 	if (error != 0)
815 		goto out;
816 
817 	device_printf(sc->dev,
818 	    "established new association with %u I/O queues\n",
819 	    sc->num_io_queues);
820 
821 	/* Restart namespace consumers. */
822 	for (i = 0; i < sc->cdata->nn; i++) {
823 		if (sc->ns[i] != NULL)
824 			nvmf_reconnect_ns(sc->ns[i]);
825 	}
826 	nvmf_reconnect_sim(sc);
827 
828 	nvmf_rescan_all_ns(sc);
829 
830 	taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task, NULL);
831 	taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task, NULL);
832 out:
833 	sx_xunlock(&sc->connection_lock);
834 	nvlist_destroy(nvl);
835 	return (error);
836 }
837 
838 static void
nvmf_shutdown_pre_sync(void * arg,int howto)839 nvmf_shutdown_pre_sync(void *arg, int howto)
840 {
841 	struct nvmf_softc *sc = arg;
842 
843 	if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
844 		return;
845 
846 	/*
847 	 * If this association is disconnected, abort any pending
848 	 * requests with an error to permit filesystems to unmount
849 	 * without hanging.
850 	 */
851 	sx_xlock(&sc->connection_lock);
852 	if (sc->admin != NULL || sc->detaching) {
853 		sx_xunlock(&sc->connection_lock);
854 		return;
855 	}
856 
857 	for (u_int i = 0; i < sc->cdata->nn; i++) {
858 		if (sc->ns[i] != NULL)
859 			nvmf_shutdown_ns(sc->ns[i]);
860 	}
861 	nvmf_shutdown_sim(sc);
862 	sx_xunlock(&sc->connection_lock);
863 }
864 
865 static void
nvmf_shutdown_post_sync(void * arg,int howto)866 nvmf_shutdown_post_sync(void *arg, int howto)
867 {
868 	struct nvmf_softc *sc = arg;
869 
870 	if ((howto & RB_NOSYNC) != 0 || SCHEDULER_STOPPED())
871 		return;
872 
873 	/*
874 	 * If this association is connected, disconnect gracefully.
875 	 */
876 	sx_xlock(&sc->connection_lock);
877 	if (sc->admin == NULL || sc->detaching) {
878 		sx_xunlock(&sc->connection_lock);
879 		return;
880 	}
881 
882 	callout_drain(&sc->ka_tx_timer);
883 	callout_drain(&sc->ka_rx_timer);
884 
885 	nvmf_shutdown_controller(sc);
886 
887 	/*
888 	 * Quiesce consumers so that any commands submitted after this
889 	 * fail with an error.  Notably, nda(4) calls nda_flush() from
890 	 * a post_sync handler that might be ordered after this one.
891 	 */
892 	for (u_int i = 0; i < sc->cdata->nn; i++) {
893 		if (sc->ns[i] != NULL)
894 			nvmf_shutdown_ns(sc->ns[i]);
895 	}
896 	nvmf_shutdown_sim(sc);
897 
898 	for (u_int i = 0; i < sc->num_io_queues; i++) {
899 		nvmf_destroy_qp(sc->io[i]);
900 	}
901 	nvmf_destroy_qp(sc->admin);
902 	sc->admin = NULL;
903 	sx_xunlock(&sc->connection_lock);
904 }
905 
906 static int
nvmf_detach(device_t dev)907 nvmf_detach(device_t dev)
908 {
909 	struct nvmf_softc *sc = device_get_softc(dev);
910 	u_int i;
911 
912 	destroy_dev(sc->cdev);
913 
914 	sx_xlock(&sc->connection_lock);
915 	sc->detaching = true;
916 	sx_xunlock(&sc->connection_lock);
917 
918 	EVENTHANDLER_DEREGISTER(shutdown_pre_sync, sc->shutdown_pre_sync_eh);
919 	EVENTHANDLER_DEREGISTER(shutdown_post_sync, sc->shutdown_post_sync_eh);
920 
921 	nvmf_destroy_sim(sc);
922 	for (i = 0; i < sc->cdata->nn; i++) {
923 		if (sc->ns[i] != NULL)
924 			nvmf_destroy_ns(sc->ns[i]);
925 	}
926 	free(sc->ns, M_NVMF);
927 
928 	callout_drain(&sc->ka_tx_timer);
929 	callout_drain(&sc->ka_rx_timer);
930 
931 	if (sc->admin != NULL)
932 		nvmf_shutdown_controller(sc);
933 
934 	for (i = 0; i < sc->num_io_queues; i++) {
935 		nvmf_destroy_qp(sc->io[i]);
936 	}
937 	free(sc->io, M_NVMF);
938 
939 	taskqueue_drain(nvmf_tq, &sc->disconnect_task);
940 	if (taskqueue_cancel_timeout(nvmf_tq, &sc->request_reconnect_task,
941 	    NULL) != 0)
942 		taskqueue_drain_timeout(nvmf_tq, &sc->request_reconnect_task);
943 
944 	/*
945 	 * Don't cancel/drain the controller loss task if that task
946 	 * has fired and is triggering the detach.
947 	 */
948 	if (!sc->controller_timedout) {
949 		if (taskqueue_cancel_timeout(nvmf_tq, &sc->controller_loss_task,
950 		    NULL) != 0)
951 			taskqueue_drain_timeout(nvmf_tq,
952 			    &sc->controller_loss_task);
953 	}
954 
955 	if (sc->admin != NULL)
956 		nvmf_destroy_qp(sc->admin);
957 
958 	nvmf_destroy_aer(sc);
959 
960 	sx_destroy(&sc->connection_lock);
961 	nvlist_destroy(sc->rparams);
962 	free(sc->cdata, M_NVMF);
963 	return (0);
964 }
965 
966 static void
nvmf_rescan_ns_1(struct nvmf_softc * sc,uint32_t nsid,const struct nvme_namespace_data * data)967 nvmf_rescan_ns_1(struct nvmf_softc *sc, uint32_t nsid,
968     const struct nvme_namespace_data *data)
969 {
970 	struct nvmf_namespace *ns;
971 
972 	/* XXX: Needs locking around sc->ns[]. */
973 	ns = sc->ns[nsid - 1];
974 	if (data->nsze == 0) {
975 		/* XXX: Needs locking */
976 		if (ns != NULL) {
977 			nvmf_destroy_ns(ns);
978 			sc->ns[nsid - 1] = NULL;
979 		}
980 	} else {
981 		/* XXX: Needs locking */
982 		if (ns == NULL) {
983 			sc->ns[nsid - 1] = nvmf_init_ns(sc, nsid, data);
984 		} else {
985 			if (!nvmf_update_ns(ns, data)) {
986 				nvmf_destroy_ns(ns);
987 				sc->ns[nsid - 1] = NULL;
988 			}
989 		}
990 	}
991 
992 	nvmf_sim_rescan_ns(sc, nsid);
993 }
994 
995 void
nvmf_rescan_ns(struct nvmf_softc * sc,uint32_t nsid)996 nvmf_rescan_ns(struct nvmf_softc *sc, uint32_t nsid)
997 {
998 	struct nvmf_completion_status status;
999 	struct nvme_namespace_data *data;
1000 
1001 	data = malloc(sizeof(*data), M_NVMF, M_WAITOK);
1002 
1003 	nvmf_status_init(&status);
1004 	nvmf_status_wait_io(&status);
1005 	if (!nvmf_cmd_identify_namespace(sc, nsid, data, nvmf_complete,
1006 	    &status, nvmf_io_complete, &status, M_WAITOK)) {
1007 		device_printf(sc->dev,
1008 		    "failed to send IDENTIFY namespace %u command\n", nsid);
1009 		free(data, M_NVMF);
1010 		return;
1011 	}
1012 	nvmf_wait_for_reply(&status);
1013 
1014 	if (status.cqe.status != 0) {
1015 		device_printf(sc->dev,
1016 		    "IDENTIFY namespace %u failed, status %#x\n", nsid,
1017 		    le16toh(status.cqe.status));
1018 		free(data, M_NVMF);
1019 		return;
1020 	}
1021 
1022 	if (status.io_error != 0) {
1023 		device_printf(sc->dev,
1024 		    "IDENTIFY namespace %u failed with I/O error %d\n",
1025 		    nsid, status.io_error);
1026 		free(data, M_NVMF);
1027 		return;
1028 	}
1029 
1030 	nvme_namespace_data_swapbytes(data);
1031 
1032 	nvmf_rescan_ns_1(sc, nsid, data);
1033 
1034 	free(data, M_NVMF);
1035 }
1036 
1037 static void
nvmf_purge_namespaces(struct nvmf_softc * sc,uint32_t first_nsid,uint32_t next_valid_nsid)1038 nvmf_purge_namespaces(struct nvmf_softc *sc, uint32_t first_nsid,
1039     uint32_t next_valid_nsid)
1040 {
1041 	struct nvmf_namespace *ns;
1042 
1043 	for (uint32_t nsid = first_nsid; nsid < next_valid_nsid; nsid++)
1044 	{
1045 		/* XXX: Needs locking around sc->ns[]. */
1046 		ns = sc->ns[nsid - 1];
1047 		if (ns != NULL) {
1048 			nvmf_destroy_ns(ns);
1049 			sc->ns[nsid - 1] = NULL;
1050 
1051 			nvmf_sim_rescan_ns(sc, nsid);
1052 		}
1053 	}
1054 }
1055 
1056 static bool
nvmf_rescan_ns_cb(struct nvmf_softc * sc,uint32_t nsid,const struct nvme_namespace_data * data,void * arg)1057 nvmf_rescan_ns_cb(struct nvmf_softc *sc, uint32_t nsid,
1058     const struct nvme_namespace_data *data, void *arg)
1059 {
1060 	uint32_t *last_nsid = arg;
1061 
1062 	/* Check for any gaps prior to this namespace. */
1063 	nvmf_purge_namespaces(sc, *last_nsid + 1, nsid);
1064 	*last_nsid = nsid;
1065 
1066 	nvmf_rescan_ns_1(sc, nsid, data);
1067 	return (true);
1068 }
1069 
1070 void
nvmf_rescan_all_ns(struct nvmf_softc * sc)1071 nvmf_rescan_all_ns(struct nvmf_softc *sc)
1072 {
1073 	uint32_t last_nsid;
1074 
1075 	last_nsid = 0;
1076 	if (!nvmf_scan_active_namespaces(sc, nvmf_rescan_ns_cb, &last_nsid))
1077 		return;
1078 
1079 	/*
1080 	 * Check for any namespace devices after the last active
1081 	 * namespace.
1082 	 */
1083 	nvmf_purge_namespaces(sc, last_nsid + 1, sc->cdata->nn + 1);
1084 }
1085 
1086 int
nvmf_passthrough_cmd(struct nvmf_softc * sc,struct nvme_pt_command * pt,bool admin)1087 nvmf_passthrough_cmd(struct nvmf_softc *sc, struct nvme_pt_command *pt,
1088     bool admin)
1089 {
1090 	struct nvmf_completion_status status;
1091 	struct nvme_command cmd;
1092 	struct memdesc mem;
1093 	struct nvmf_host_qpair *qp;
1094 	struct nvmf_request *req;
1095 	void *buf;
1096 	int error;
1097 
1098 	if (pt->len > sc->max_xfer_size)
1099 		return (EINVAL);
1100 
1101 	buf = NULL;
1102 	if (pt->len != 0) {
1103 		/*
1104 		 * XXX: Depending on the size we may want to pin the
1105 		 * user pages and use a memdesc with vm_page_t's
1106 		 * instead.
1107 		 */
1108 		buf = malloc(pt->len, M_NVMF, M_WAITOK);
1109 		if (pt->is_read == 0) {
1110 			error = copyin(pt->buf, buf, pt->len);
1111 			if (error != 0) {
1112 				free(buf, M_NVMF);
1113 				return (error);
1114 			}
1115 		} else {
1116 			/* Ensure no kernel data is leaked to userland. */
1117 			memset(buf, 0, pt->len);
1118 		}
1119 	}
1120 
1121 	memset(&cmd, 0, sizeof(cmd));
1122 	cmd.opc = pt->cmd.opc;
1123 	cmd.fuse = pt->cmd.fuse;
1124 	cmd.nsid = pt->cmd.nsid;
1125 	cmd.cdw10 = pt->cmd.cdw10;
1126 	cmd.cdw11 = pt->cmd.cdw11;
1127 	cmd.cdw12 = pt->cmd.cdw12;
1128 	cmd.cdw13 = pt->cmd.cdw13;
1129 	cmd.cdw14 = pt->cmd.cdw14;
1130 	cmd.cdw15 = pt->cmd.cdw15;
1131 
1132 	sx_slock(&sc->connection_lock);
1133 	if (sc->admin == NULL || sc->detaching) {
1134 		device_printf(sc->dev,
1135 		    "failed to send passthrough command\n");
1136 		error = ECONNABORTED;
1137 		sx_sunlock(&sc->connection_lock);
1138 		goto error;
1139 	}
1140 	if (admin)
1141 		qp = sc->admin;
1142 	else
1143 		qp = nvmf_select_io_queue(sc);
1144 	nvmf_status_init(&status);
1145 	req = nvmf_allocate_request(qp, &cmd, nvmf_complete, &status, M_WAITOK);
1146 	sx_sunlock(&sc->connection_lock);
1147 	if (req == NULL) {
1148 		device_printf(sc->dev, "failed to send passthrough command\n");
1149 		error = ECONNABORTED;
1150 		goto error;
1151 	}
1152 
1153 	if (pt->len != 0) {
1154 		mem = memdesc_vaddr(buf, pt->len);
1155 		nvmf_capsule_append_data(req->nc, &mem, pt->len,
1156 		    pt->is_read == 0, nvmf_io_complete, &status);
1157 		nvmf_status_wait_io(&status);
1158 	}
1159 
1160 	nvmf_submit_request(req);
1161 	nvmf_wait_for_reply(&status);
1162 
1163 	memset(&pt->cpl, 0, sizeof(pt->cpl));
1164 	pt->cpl.cdw0 = status.cqe.cdw0;
1165 	pt->cpl.status = status.cqe.status;
1166 
1167 	error = status.io_error;
1168 	if (error == 0 && pt->len != 0 && pt->is_read != 0)
1169 		error = copyout(buf, pt->buf, pt->len);
1170 error:
1171 	free(buf, M_NVMF);
1172 	return (error);
1173 }
1174 
1175 static int
nvmf_reconnect_params(struct nvmf_softc * sc,struct nvmf_ioc_nv * nv)1176 nvmf_reconnect_params(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
1177 {
1178 	int error;
1179 
1180 	sx_slock(&sc->connection_lock);
1181 	error = nvmf_pack_ioc_nvlist(sc->rparams, nv);
1182 	sx_sunlock(&sc->connection_lock);
1183 
1184 	return (error);
1185 }
1186 
1187 static int
nvmf_connection_status(struct nvmf_softc * sc,struct nvmf_ioc_nv * nv)1188 nvmf_connection_status(struct nvmf_softc *sc, struct nvmf_ioc_nv *nv)
1189 {
1190 	nvlist_t *nvl, *nvl_ts;
1191 	int error;
1192 
1193 	nvl = nvlist_create(0);
1194 	nvl_ts = nvlist_create(0);
1195 
1196 	sx_slock(&sc->connection_lock);
1197 	nvlist_add_bool(nvl, "connected", sc->admin != NULL);
1198 	nvlist_add_number(nvl_ts, "tv_sec", sc->last_disconnect.tv_sec);
1199 	nvlist_add_number(nvl_ts, "tv_nsec", sc->last_disconnect.tv_nsec);
1200 	sx_sunlock(&sc->connection_lock);
1201 	nvlist_move_nvlist(nvl, "last_disconnect", nvl_ts);
1202 
1203 	error = nvmf_pack_ioc_nvlist(nvl, nv);
1204 	nvlist_destroy(nvl);
1205 	return (error);
1206 }
1207 
1208 static int
nvmf_ioctl(struct cdev * cdev,u_long cmd,caddr_t arg,int flag,struct thread * td)1209 nvmf_ioctl(struct cdev *cdev, u_long cmd, caddr_t arg, int flag,
1210     struct thread *td)
1211 {
1212 	struct nvmf_softc *sc = cdev->si_drv1;
1213 	struct nvme_get_nsid *gnsid;
1214 	struct nvme_pt_command *pt;
1215 	struct nvmf_ioc_nv *nv;
1216 
1217 	switch (cmd) {
1218 	case NVME_PASSTHROUGH_CMD:
1219 		pt = (struct nvme_pt_command *)arg;
1220 		return (nvmf_passthrough_cmd(sc, pt, true));
1221 	case NVME_GET_NSID:
1222 		gnsid = (struct nvme_get_nsid *)arg;
1223 		strlcpy(gnsid->cdev, device_get_nameunit(sc->dev),
1224 		    sizeof(gnsid->cdev));
1225 		gnsid->nsid = 0;
1226 		return (0);
1227 	case NVME_GET_MAX_XFER_SIZE:
1228 		*(uint64_t *)arg = sc->max_xfer_size;
1229 		return (0);
1230 	case NVME_GET_CONTROLLER_DATA:
1231 		memcpy(arg, sc->cdata, sizeof(*sc->cdata));
1232 		return (0);
1233 	case NVMF_RECONNECT_PARAMS:
1234 		nv = (struct nvmf_ioc_nv *)arg;
1235 		return (nvmf_reconnect_params(sc, nv));
1236 	case NVMF_RECONNECT_HOST:
1237 		nv = (struct nvmf_ioc_nv *)arg;
1238 		return (nvmf_reconnect_host(sc, nv));
1239 	case NVMF_CONNECTION_STATUS:
1240 		nv = (struct nvmf_ioc_nv *)arg;
1241 		return (nvmf_connection_status(sc, nv));
1242 	default:
1243 		return (ENOTTY);
1244 	}
1245 }
1246 
1247 static struct cdevsw nvmf_cdevsw = {
1248 	.d_version = D_VERSION,
1249 	.d_ioctl = nvmf_ioctl
1250 };
1251 
1252 static int
nvmf_modevent(module_t mod,int what,void * arg)1253 nvmf_modevent(module_t mod, int what, void *arg)
1254 {
1255 	int error;
1256 
1257 	switch (what) {
1258 	case MOD_LOAD:
1259 		error = nvmf_ctl_load();
1260 		if (error != 0)
1261 			return (error);
1262 
1263 		nvmf_tq = taskqueue_create("nvmf", M_WAITOK | M_ZERO,
1264 		    taskqueue_thread_enqueue, &nvmf_tq);
1265 		taskqueue_start_threads(&nvmf_tq, 1, PWAIT, "nvmf taskq");
1266 		return (0);
1267 	case MOD_QUIESCE:
1268 		return (0);
1269 	case MOD_UNLOAD:
1270 		nvmf_ctl_unload();
1271 		destroy_dev_drain(&nvmf_cdevsw);
1272 		if (nvmf_tq != NULL)
1273 			taskqueue_free(nvmf_tq);
1274 		return (0);
1275 	default:
1276 		return (EOPNOTSUPP);
1277 	}
1278 }
1279 
1280 static device_method_t nvmf_methods[] = {
1281 	/* Device interface */
1282 	DEVMETHOD(device_probe,     nvmf_probe),
1283 	DEVMETHOD(device_attach,    nvmf_attach),
1284 	DEVMETHOD(device_detach,    nvmf_detach),
1285 	DEVMETHOD_END
1286 };
1287 
1288 driver_t nvme_nvmf_driver = {
1289 	"nvme",
1290 	nvmf_methods,
1291 	sizeof(struct nvmf_softc),
1292 };
1293 
1294 DRIVER_MODULE(nvme, root, nvme_nvmf_driver, nvmf_modevent, NULL);
1295 MODULE_DEPEND(nvmf, nvmf_transport, 1, 1, 1);
1296