xref: /freebsd/sys/dev/nvd/nvd.c (revision 595e514d0df2bac5b813d35f83e32875dbf16a83)
1 /*-
2  * Copyright (C) 2012 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/param.h>
31 #include <sys/bio.h>
32 #include <sys/kernel.h>
33 #include <sys/malloc.h>
34 #include <sys/module.h>
35 #include <sys/systm.h>
36 #include <sys/taskqueue.h>
37 
38 #include <geom/geom.h>
39 #include <geom/geom_disk.h>
40 
41 #include <dev/nvme/nvme.h>
42 
43 struct nvd_disk;
44 
45 static disk_ioctl_t nvd_ioctl;
46 static disk_strategy_t nvd_strategy;
47 
48 static void *nvd_new_disk(struct nvme_namespace *ns, void *ctrlr);
49 static void destroy_geom_disk(struct nvd_disk *ndisk);
50 
51 static void *nvd_new_controller(struct nvme_controller *ctrlr);
52 static void nvd_controller_fail(void *ctrlr);
53 
54 static int nvd_load(void);
55 static void nvd_unload(void);
56 
57 MALLOC_DEFINE(M_NVD, "nvd", "nvd(4) allocations");
58 
59 struct nvme_consumer *consumer_handle;
60 
61 struct nvd_disk {
62 
63 	struct bio_queue_head	bioq;
64 	struct task		bioqtask;
65 	struct mtx		bioqlock;
66 
67 	struct disk		*disk;
68 	struct taskqueue	*tq;
69 	struct nvme_namespace	*ns;
70 
71 	uint32_t		cur_depth;
72 
73 	TAILQ_ENTRY(nvd_disk)	global_tailq;
74 	TAILQ_ENTRY(nvd_disk)	ctrlr_tailq;
75 };
76 
77 struct nvd_controller {
78 
79 	TAILQ_ENTRY(nvd_controller)	tailq;
80 	TAILQ_HEAD(, nvd_disk)		disk_head;
81 };
82 
83 static TAILQ_HEAD(, nvd_controller)	ctrlr_head;
84 static TAILQ_HEAD(disk_list, nvd_disk)	disk_head;
85 
86 static int nvd_modevent(module_t mod, int type, void *arg)
87 {
88 	int error = 0;
89 
90 	switch (type) {
91 	case MOD_LOAD:
92 		error = nvd_load();
93 		break;
94 	case MOD_UNLOAD:
95 		nvd_unload();
96 		break;
97 	default:
98 		break;
99 	}
100 
101 	return (error);
102 }
103 
104 moduledata_t nvd_mod = {
105 	"nvd",
106 	(modeventhand_t)nvd_modevent,
107 	0
108 };
109 
110 DECLARE_MODULE(nvd, nvd_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
111 MODULE_VERSION(nvd, 1);
112 MODULE_DEPEND(nvd, nvme, 1, 1, 1);
113 
114 static int
115 nvd_load()
116 {
117 
118 	TAILQ_INIT(&ctrlr_head);
119 	TAILQ_INIT(&disk_head);
120 
121 	consumer_handle = nvme_register_consumer(nvd_new_disk,
122 	    nvd_new_controller, NULL, nvd_controller_fail);
123 
124 	return (consumer_handle != NULL ? 0 : -1);
125 }
126 
127 static void
128 nvd_unload()
129 {
130 	struct nvd_controller	*ctrlr;
131 	struct nvd_disk		*disk;
132 
133 	while (!TAILQ_EMPTY(&ctrlr_head)) {
134 		ctrlr = TAILQ_FIRST(&ctrlr_head);
135 		TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
136 		free(ctrlr, M_NVD);
137 	}
138 
139 	while (!TAILQ_EMPTY(&disk_head)) {
140 		disk = TAILQ_FIRST(&disk_head);
141 		TAILQ_REMOVE(&disk_head, disk, global_tailq);
142 		destroy_geom_disk(disk);
143 		free(disk, M_NVD);
144 	}
145 
146 	nvme_unregister_consumer(consumer_handle);
147 }
148 
149 static void
150 nvd_strategy(struct bio *bp)
151 {
152 	struct nvd_disk *ndisk;
153 
154 	ndisk = (struct nvd_disk *)bp->bio_disk->d_drv1;
155 
156 	mtx_lock(&ndisk->bioqlock);
157 	bioq_insert_tail(&ndisk->bioq, bp);
158 	mtx_unlock(&ndisk->bioqlock);
159 	taskqueue_enqueue(ndisk->tq, &ndisk->bioqtask);
160 }
161 
162 static int
163 nvd_ioctl(struct disk *ndisk, u_long cmd, void *data, int fflag,
164     struct thread *td)
165 {
166 	int ret = 0;
167 
168 	switch (cmd) {
169 	default:
170 		ret = EIO;
171 	}
172 
173 	return (ret);
174 }
175 
176 static void
177 nvd_done(void *arg, const struct nvme_completion *cpl)
178 {
179 	struct bio *bp;
180 	struct nvd_disk *ndisk;
181 
182 	bp = (struct bio *)arg;
183 
184 	ndisk = bp->bio_disk->d_drv1;
185 
186 	atomic_add_int(&ndisk->cur_depth, -1);
187 
188 	/*
189 	 * TODO: add more extensive translation of NVMe status codes
190 	 *  to different bio error codes (i.e. EIO, EINVAL, etc.)
191 	 */
192 	if (nvme_completion_is_error(cpl)) {
193 		bp->bio_error = EIO;
194 		bp->bio_flags |= BIO_ERROR;
195 		bp->bio_resid = bp->bio_bcount;
196 	} else
197 		bp->bio_resid = 0;
198 
199 	biodone(bp);
200 }
201 
202 static void
203 nvd_bioq_process(void *arg, int pending)
204 {
205 	struct nvd_disk *ndisk = arg;
206 	struct bio *bp;
207 	int err;
208 
209 	for (;;) {
210 		mtx_lock(&ndisk->bioqlock);
211 		bp = bioq_takefirst(&ndisk->bioq);
212 		mtx_unlock(&ndisk->bioqlock);
213 		if (bp == NULL)
214 			break;
215 
216 #ifdef BIO_ORDERED
217 		/*
218 		 * BIO_ORDERED flag dictates that all outstanding bios
219 		 *  must be completed before processing the bio with
220 		 *  BIO_ORDERED flag set.
221 		 */
222 		if (bp->bio_flags & BIO_ORDERED) {
223 			while (ndisk->cur_depth > 0) {
224 				pause("nvd flush", 1);
225 			}
226 		}
227 #endif
228 
229 		bp->bio_driver1 = NULL;
230 		atomic_add_int(&ndisk->cur_depth, 1);
231 
232 		err = nvme_ns_bio_process(ndisk->ns, bp, nvd_done);
233 
234 		if (err) {
235 			atomic_add_int(&ndisk->cur_depth, -1);
236 			bp->bio_error = err;
237 			bp->bio_flags |= BIO_ERROR;
238 			bp->bio_resid = bp->bio_bcount;
239 			biodone(bp);
240 		}
241 
242 #ifdef BIO_ORDERED
243 		/*
244 		 * BIO_ORDERED flag dictates that the bio with BIO_ORDERED
245 		 *  flag set must be completed before proceeding with
246 		 *  additional bios.
247 		 */
248 		if (bp->bio_flags & BIO_ORDERED) {
249 			while (ndisk->cur_depth > 0) {
250 				pause("nvd flush", 1);
251 			}
252 		}
253 #endif
254 	}
255 }
256 
257 static void *
258 nvd_new_controller(struct nvme_controller *ctrlr)
259 {
260 	struct nvd_controller	*nvd_ctrlr;
261 
262 	nvd_ctrlr = malloc(sizeof(struct nvd_controller), M_NVD,
263 	    M_ZERO | M_WAITOK);
264 
265 	TAILQ_INIT(&nvd_ctrlr->disk_head);
266 	TAILQ_INSERT_TAIL(&ctrlr_head, nvd_ctrlr, tailq);
267 
268 	return (nvd_ctrlr);
269 }
270 
271 static void *
272 nvd_new_disk(struct nvme_namespace *ns, void *ctrlr_arg)
273 {
274 	struct nvd_disk		*ndisk;
275 	struct disk		*disk;
276 	struct nvd_controller	*ctrlr = ctrlr_arg;
277 
278 	ndisk = malloc(sizeof(struct nvd_disk), M_NVD, M_ZERO | M_WAITOK);
279 
280 	disk = disk_alloc();
281 	disk->d_strategy = nvd_strategy;
282 	disk->d_ioctl = nvd_ioctl;
283 	disk->d_name = "nvd";
284 	disk->d_drv1 = ndisk;
285 
286 	disk->d_maxsize = nvme_ns_get_max_io_xfer_size(ns);
287 	disk->d_sectorsize = nvme_ns_get_sector_size(ns);
288 	disk->d_mediasize = (off_t)nvme_ns_get_size(ns);
289 
290 	if (TAILQ_EMPTY(&disk_head))
291 		disk->d_unit = 0;
292 	else
293 		disk->d_unit =
294 		    TAILQ_LAST(&disk_head, disk_list)->disk->d_unit + 1;
295 
296 	disk->d_flags = 0;
297 
298 	if (nvme_ns_get_flags(ns) & NVME_NS_DEALLOCATE_SUPPORTED)
299 		disk->d_flags |= DISKFLAG_CANDELETE;
300 
301 	if (nvme_ns_get_flags(ns) & NVME_NS_FLUSH_SUPPORTED)
302 		disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
303 
304 /* ifdef used here to ease porting to stable branches at a later point. */
305 #ifdef DISKFLAG_UNMAPPED_BIO
306 	disk->d_flags |= DISKFLAG_UNMAPPED_BIO;
307 #endif
308 
309 	strlcpy(disk->d_ident, nvme_ns_get_serial_number(ns),
310 	    sizeof(disk->d_ident));
311 
312 #if __FreeBSD_version >= 900034
313 	strlcpy(disk->d_descr, nvme_ns_get_model_number(ns),
314 	    sizeof(disk->d_descr));
315 #endif
316 
317 	disk_create(disk, DISK_VERSION);
318 
319 	ndisk->ns = ns;
320 	ndisk->disk = disk;
321 	ndisk->cur_depth = 0;
322 
323 	mtx_init(&ndisk->bioqlock, "NVD bioq lock", NULL, MTX_DEF);
324 	bioq_init(&ndisk->bioq);
325 
326 	TASK_INIT(&ndisk->bioqtask, 0, nvd_bioq_process, ndisk);
327 	ndisk->tq = taskqueue_create("nvd_taskq", M_WAITOK,
328 	    taskqueue_thread_enqueue, &ndisk->tq);
329 	taskqueue_start_threads(&ndisk->tq, 1, PI_DISK, "nvd taskq");
330 
331 	TAILQ_INSERT_TAIL(&disk_head, ndisk, global_tailq);
332 	TAILQ_INSERT_TAIL(&ctrlr->disk_head, ndisk, ctrlr_tailq);
333 
334 	return (NULL);
335 }
336 
337 static void
338 destroy_geom_disk(struct nvd_disk *ndisk)
339 {
340 	struct bio *bp;
341 
342 	taskqueue_free(ndisk->tq);
343 	disk_destroy(ndisk->disk);
344 
345 	mtx_lock(&ndisk->bioqlock);
346 	for (;;) {
347 		bp = bioq_takefirst(&ndisk->bioq);
348 		if (bp == NULL)
349 			break;
350 		bp->bio_error = EIO;
351 		bp->bio_flags |= BIO_ERROR;
352 		bp->bio_resid = bp->bio_bcount;
353 
354 		biodone(bp);
355 	}
356 	mtx_unlock(&ndisk->bioqlock);
357 
358 	mtx_destroy(&ndisk->bioqlock);
359 }
360 
361 static void
362 nvd_controller_fail(void *ctrlr_arg)
363 {
364 	struct nvd_controller	*ctrlr = ctrlr_arg;
365 	struct nvd_disk		*disk;
366 
367 	while (!TAILQ_EMPTY(&ctrlr->disk_head)) {
368 		disk = TAILQ_FIRST(&ctrlr->disk_head);
369 		TAILQ_REMOVE(&disk_head, disk, global_tailq);
370 		TAILQ_REMOVE(&ctrlr->disk_head, disk, ctrlr_tailq);
371 		destroy_geom_disk(disk);
372 		free(disk, M_NVD);
373 	}
374 
375 	TAILQ_REMOVE(&ctrlr_head, ctrlr, tailq);
376 	free(ctrlr, M_NVD);
377 }
378 
379