xref: /freebsd/sys/dev/virtio/block/virtio_blk.c (revision 10b59a9b4add0320d52c15ce057dd697261e7dfc)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO block devices. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/bio.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/sglist.h>
39 #include <sys/lock.h>
40 #include <sys/mutex.h>
41 #include <sys/queue.h>
42 #include <sys/taskqueue.h>
43 
44 #include <geom/geom_disk.h>
45 #include <vm/uma.h>
46 
47 #include <machine/bus.h>
48 #include <machine/resource.h>
49 #include <sys/bus.h>
50 #include <sys/rman.h>
51 
52 #include <dev/virtio/virtio.h>
53 #include <dev/virtio/virtqueue.h>
54 #include <dev/virtio/block/virtio_blk.h>
55 
56 #include "virtio_if.h"
57 
58 struct vtblk_request {
59 	struct virtio_blk_outhdr	 vbr_hdr;
60 	struct bio			*vbr_bp;
61 	uint8_t				 vbr_ack;
62 
63 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
64 };
65 
66 struct vtblk_softc {
67 	device_t		 vtblk_dev;
68 	struct mtx		 vtblk_mtx;
69 	uint64_t		 vtblk_features;
70 	uint32_t		 vtblk_flags;
71 #define VTBLK_FLAG_INDIRECT	0x0001
72 #define VTBLK_FLAG_READONLY	0x0002
73 #define VTBLK_FLAG_DETACHING	0x0004
74 #define VTBLK_FLAG_SUSPENDED	0x0008
75 #define VTBLK_FLAG_DUMPING	0x0010
76 
77 	struct virtqueue	*vtblk_vq;
78 	struct sglist		*vtblk_sglist;
79 	struct disk		*vtblk_disk;
80 
81 	struct bio_queue_head	 vtblk_bioq;
82 	TAILQ_HEAD(, vtblk_request)
83 				 vtblk_req_free;
84 	TAILQ_HEAD(, vtblk_request)
85 				 vtblk_req_ready;
86 
87 	struct taskqueue	*vtblk_tq;
88 	struct task		 vtblk_intr_task;
89 
90 	int			 vtblk_sector_size;
91 	int			 vtblk_max_nsegs;
92 	int			 vtblk_unit;
93 	int			 vtblk_request_count;
94 
95 	struct vtblk_request	 vtblk_dump_request;
96 };
97 
98 static struct virtio_feature_desc vtblk_feature_desc[] = {
99 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
100 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
101 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
102 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
103 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
104 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
105 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
106 	{ VIRTIO_BLK_F_FLUSH,		"FlushCmd"	},
107 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
108 
109 	{ 0, NULL }
110 };
111 
112 static int	vtblk_modevent(module_t, int, void *);
113 
114 static int	vtblk_probe(device_t);
115 static int	vtblk_attach(device_t);
116 static int	vtblk_detach(device_t);
117 static int	vtblk_suspend(device_t);
118 static int	vtblk_resume(device_t);
119 static int	vtblk_shutdown(device_t);
120 
121 static void	vtblk_negotiate_features(struct vtblk_softc *);
122 static int	vtblk_maximum_segments(struct vtblk_softc *,
123 		    struct virtio_blk_config *);
124 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
125 static void	vtblk_alloc_disk(struct vtblk_softc *,
126 		    struct virtio_blk_config *);
127 static void	vtblk_create_disk(struct vtblk_softc *);
128 
129 static int	vtblk_open(struct disk *);
130 static int	vtblk_close(struct disk *);
131 static int	vtblk_ioctl(struct disk *, u_long, void *, int,
132 		    struct thread *);
133 static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
134 static void	vtblk_strategy(struct bio *);
135 
136 static void	vtblk_startio(struct vtblk_softc *);
137 static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *);
138 static int	vtblk_execute_request(struct vtblk_softc *,
139 		    struct vtblk_request *);
140 
141 static int	vtblk_vq_intr(void *);
142 static void	vtblk_intr_task(void *, int);
143 
144 static void	vtblk_stop(struct vtblk_softc *);
145 
146 static void	vtblk_get_ident(struct vtblk_softc *);
147 static void	vtblk_prepare_dump(struct vtblk_softc *);
148 static int	vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t);
149 static int	vtblk_flush_dump(struct vtblk_softc *);
150 static int	vtblk_poll_request(struct vtblk_softc *,
151 		    struct vtblk_request *);
152 
153 static void	vtblk_drain_vq(struct vtblk_softc *, int);
154 static void	vtblk_drain(struct vtblk_softc *);
155 
156 static int	vtblk_alloc_requests(struct vtblk_softc *);
157 static void	vtblk_free_requests(struct vtblk_softc *);
158 static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *);
159 static void	vtblk_enqueue_request(struct vtblk_softc *,
160 		    struct vtblk_request *);
161 
162 static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *);
163 static void	vtblk_enqueue_ready(struct vtblk_softc *,
164 		    struct vtblk_request *);
165 
166 static void	vtblk_bio_error(struct bio *, int);
167 
168 /* Tunables. */
169 static int vtblk_no_ident = 0;
170 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
171 
172 /* Features desired/implemented by this driver. */
173 #define VTBLK_FEATURES \
174     (VIRTIO_BLK_F_BARRIER		| \
175      VIRTIO_BLK_F_SIZE_MAX		| \
176      VIRTIO_BLK_F_SEG_MAX		| \
177      VIRTIO_BLK_F_GEOMETRY		| \
178      VIRTIO_BLK_F_RO			| \
179      VIRTIO_BLK_F_BLK_SIZE		| \
180      VIRTIO_BLK_F_FLUSH			| \
181      VIRTIO_RING_F_INDIRECT_DESC)
182 
183 #define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
184 #define VTBLK_LOCK_INIT(_sc, _name) \
185 				mtx_init(VTBLK_MTX((_sc)), (_name), \
186 				    "VTBLK Lock", MTX_DEF)
187 #define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
188 #define VTBLK_TRYLOCK(_sc)	mtx_trylock(VTBLK_MTX((_sc)))
189 #define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
190 #define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
191 #define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
192 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
193 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
194 
195 #define VTBLK_BIO_SEGMENTS(_bp)	sglist_count((_bp)->bio_data, (_bp)->bio_bcount)
196 
197 #define VTBLK_DISK_NAME		"vtbd"
198 
199 /*
200  * Each block request uses at least two segments - one for the header
201  * and one for the status.
202  */
203 #define VTBLK_MIN_SEGMENTS	2
204 
205 static uma_zone_t vtblk_req_zone;
206 
207 static device_method_t vtblk_methods[] = {
208 	/* Device methods. */
209 	DEVMETHOD(device_probe,		vtblk_probe),
210 	DEVMETHOD(device_attach,	vtblk_attach),
211 	DEVMETHOD(device_detach,	vtblk_detach),
212 	DEVMETHOD(device_suspend,	vtblk_suspend),
213 	DEVMETHOD(device_resume,	vtblk_resume),
214 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
215 
216 	{ 0, 0 }
217 };
218 
219 static driver_t vtblk_driver = {
220 	"vtblk",
221 	vtblk_methods,
222 	sizeof(struct vtblk_softc)
223 };
224 static devclass_t vtblk_devclass;
225 
226 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
227     vtblk_modevent, 0);
228 MODULE_VERSION(virtio_blk, 1);
229 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
230 
231 static int
232 vtblk_modevent(module_t mod, int type, void *unused)
233 {
234 	int error;
235 
236 	error = 0;
237 
238 	switch (type) {
239 	case MOD_LOAD:
240 		vtblk_req_zone = uma_zcreate("vtblk_request",
241 		    sizeof(struct vtblk_request),
242 		    NULL, NULL, NULL, NULL, 0, 0);
243 		break;
244 	case MOD_QUIESCE:
245 	case MOD_UNLOAD:
246 		if (uma_zone_get_cur(vtblk_req_zone) > 0)
247 			error = EBUSY;
248 		else if (type == MOD_UNLOAD) {
249 			uma_zdestroy(vtblk_req_zone);
250 			vtblk_req_zone = NULL;
251 		}
252 		break;
253 	case MOD_SHUTDOWN:
254 		break;
255 	default:
256 		error = EOPNOTSUPP;
257 		break;
258 	}
259 
260 	return (error);
261 }
262 
263 static int
264 vtblk_probe(device_t dev)
265 {
266 
267 	if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
268 		return (ENXIO);
269 
270 	device_set_desc(dev, "VirtIO Block Adapter");
271 
272 	return (BUS_PROBE_DEFAULT);
273 }
274 
275 static int
276 vtblk_attach(device_t dev)
277 {
278 	struct vtblk_softc *sc;
279 	struct virtio_blk_config blkcfg;
280 	int error;
281 
282 	sc = device_get_softc(dev);
283 	sc->vtblk_dev = dev;
284 	sc->vtblk_unit = device_get_unit(dev);
285 
286 	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
287 
288 	bioq_init(&sc->vtblk_bioq);
289 	TAILQ_INIT(&sc->vtblk_req_free);
290 	TAILQ_INIT(&sc->vtblk_req_ready);
291 
292 	virtio_set_feature_desc(dev, vtblk_feature_desc);
293 	vtblk_negotiate_features(sc);
294 
295 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
296 		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
297 
298 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
299 		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
300 
301 	/* Get local copy of config. */
302 	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) == 0) {
303 		bzero(&blkcfg, sizeof(struct virtio_blk_config));
304 		virtio_read_device_config(dev, 0, &blkcfg,
305 		    offsetof(struct virtio_blk_config, physical_block_exp));
306 	} else
307 		virtio_read_device_config(dev, 0, &blkcfg,
308 		    sizeof(struct virtio_blk_config));
309 
310 	/*
311 	 * With the current sglist(9) implementation, it is not easy
312 	 * for us to support a maximum segment size as adjacent
313 	 * segments are coalesced. For now, just make sure it's larger
314 	 * than the maximum supported transfer size.
315 	 */
316 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
317 		if (blkcfg.size_max < MAXPHYS) {
318 			error = ENOTSUP;
319 			device_printf(dev, "host requires unsupported "
320 			    "maximum segment size feature\n");
321 			goto fail;
322 		}
323 	}
324 
325 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
326 
327 	/*
328 	 * Allocate working sglist. The number of segments may be too
329 	 * large to safely store on the stack.
330 	 */
331 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
332 	if (sc->vtblk_sglist == NULL) {
333 		error = ENOMEM;
334 		device_printf(dev, "cannot allocate sglist\n");
335 		goto fail;
336 	}
337 
338 	error = vtblk_alloc_virtqueue(sc);
339 	if (error) {
340 		device_printf(dev, "cannot allocate virtqueue\n");
341 		goto fail;
342 	}
343 
344 	error = vtblk_alloc_requests(sc);
345 	if (error) {
346 		device_printf(dev, "cannot preallocate requests\n");
347 		goto fail;
348 	}
349 
350 	vtblk_alloc_disk(sc, &blkcfg);
351 
352 	TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_intr_task, sc);
353 	sc->vtblk_tq = taskqueue_create_fast("vtblk_taskq", M_NOWAIT,
354 	    taskqueue_thread_enqueue, &sc->vtblk_tq);
355 	if (sc->vtblk_tq == NULL) {
356 		error = ENOMEM;
357 		device_printf(dev, "cannot allocate taskqueue\n");
358 		goto fail;
359 	}
360 	taskqueue_start_threads(&sc->vtblk_tq, 1, PI_DISK, "%s taskq",
361 	    device_get_nameunit(dev));
362 
363 	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
364 	if (error) {
365 		device_printf(dev, "cannot setup virtqueue interrupt\n");
366 		goto fail;
367 	}
368 
369 	vtblk_create_disk(sc);
370 
371 	virtqueue_enable_intr(sc->vtblk_vq);
372 
373 fail:
374 	if (error)
375 		vtblk_detach(dev);
376 
377 	return (error);
378 }
379 
380 static int
381 vtblk_detach(device_t dev)
382 {
383 	struct vtblk_softc *sc;
384 
385 	sc = device_get_softc(dev);
386 
387 	VTBLK_LOCK(sc);
388 	sc->vtblk_flags |= VTBLK_FLAG_DETACHING;
389 	if (device_is_attached(dev))
390 		vtblk_stop(sc);
391 	VTBLK_UNLOCK(sc);
392 
393 	if (sc->vtblk_tq != NULL) {
394 		taskqueue_drain(sc->vtblk_tq, &sc->vtblk_intr_task);
395 		taskqueue_free(sc->vtblk_tq);
396 		sc->vtblk_tq = NULL;
397 	}
398 
399 	vtblk_drain(sc);
400 
401 	if (sc->vtblk_disk != NULL) {
402 		disk_destroy(sc->vtblk_disk);
403 		sc->vtblk_disk = NULL;
404 	}
405 
406 	if (sc->vtblk_sglist != NULL) {
407 		sglist_free(sc->vtblk_sglist);
408 		sc->vtblk_sglist = NULL;
409 	}
410 
411 	VTBLK_LOCK_DESTROY(sc);
412 
413 	return (0);
414 }
415 
416 static int
417 vtblk_suspend(device_t dev)
418 {
419 	struct vtblk_softc *sc;
420 
421 	sc = device_get_softc(dev);
422 
423 	VTBLK_LOCK(sc);
424 	sc->vtblk_flags |= VTBLK_FLAG_SUSPENDED;
425 	/* TODO Wait for any inflight IO to complete? */
426 	VTBLK_UNLOCK(sc);
427 
428 	return (0);
429 }
430 
431 static int
432 vtblk_resume(device_t dev)
433 {
434 	struct vtblk_softc *sc;
435 
436 	sc = device_get_softc(dev);
437 
438 	VTBLK_LOCK(sc);
439 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPENDED;
440 	/* TODO Resume IO? */
441 	VTBLK_UNLOCK(sc);
442 
443 	return (0);
444 }
445 
446 static int
447 vtblk_shutdown(device_t dev)
448 {
449 
450 	return (0);
451 }
452 
453 static int
454 vtblk_open(struct disk *dp)
455 {
456 	struct vtblk_softc *sc;
457 
458 	if ((sc = dp->d_drv1) == NULL)
459 		return (ENXIO);
460 
461 	return (sc->vtblk_flags & VTBLK_FLAG_DETACHING ? ENXIO : 0);
462 }
463 
464 static int
465 vtblk_close(struct disk *dp)
466 {
467 	struct vtblk_softc *sc;
468 
469 	if ((sc = dp->d_drv1) == NULL)
470 		return (ENXIO);
471 
472 	return (0);
473 }
474 
475 static int
476 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
477     struct thread *td)
478 {
479 	struct vtblk_softc *sc;
480 
481 	if ((sc = dp->d_drv1) == NULL)
482 		return (ENXIO);
483 
484 	return (ENOTTY);
485 }
486 
487 static int
488 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
489     size_t length)
490 {
491 	struct disk *dp;
492 	struct vtblk_softc *sc;
493 	int error;
494 
495 	dp = arg;
496 	error = 0;
497 
498 	if ((sc = dp->d_drv1) == NULL)
499 		return (ENXIO);
500 
501 	if (VTBLK_TRYLOCK(sc) == 0) {
502 		device_printf(sc->vtblk_dev,
503 		    "softc already locked, cannot dump...\n");
504 		return (EBUSY);
505 	}
506 
507 	if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
508 		vtblk_prepare_dump(sc);
509 		sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
510 	}
511 
512 	if (length > 0)
513 		error = vtblk_write_dump(sc, virtual, offset, length);
514 	else if (virtual == NULL && offset == 0)
515 		error = vtblk_flush_dump(sc);
516 
517 	VTBLK_UNLOCK(sc);
518 
519 	return (error);
520 }
521 
522 static void
523 vtblk_strategy(struct bio *bp)
524 {
525 	struct vtblk_softc *sc;
526 
527 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
528 		vtblk_bio_error(bp, EINVAL);
529 		return;
530 	}
531 
532 	/*
533 	 * Fail any write if RO. Unfortunately, there does not seem to
534 	 * be a better way to report our readonly'ness to GEOM above.
535 	 */
536 	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
537 	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
538 		vtblk_bio_error(bp, EROFS);
539 		return;
540 	}
541 
542 	/*
543 	 * Prevent read/write buffers spanning too many segments from
544 	 * getting into the queue. This should only trip if d_maxsize
545 	 * was incorrectly set.
546 	 */
547 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
548 		KASSERT(VTBLK_BIO_SEGMENTS(bp) <= sc->vtblk_max_nsegs -
549 		    VTBLK_MIN_SEGMENTS,
550 		    ("bio spanned too many segments: %d, max: %d",
551 		    VTBLK_BIO_SEGMENTS(bp),
552 		    sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS));
553 	}
554 
555 	VTBLK_LOCK(sc);
556 	if ((sc->vtblk_flags & VTBLK_FLAG_DETACHING) == 0) {
557 		bioq_disksort(&sc->vtblk_bioq, bp);
558 		vtblk_startio(sc);
559 	} else
560 		vtblk_bio_error(bp, ENXIO);
561 	VTBLK_UNLOCK(sc);
562 }
563 
564 static void
565 vtblk_negotiate_features(struct vtblk_softc *sc)
566 {
567 	device_t dev;
568 	uint64_t features;
569 
570 	dev = sc->vtblk_dev;
571 	features = VTBLK_FEATURES;
572 
573 	sc->vtblk_features = virtio_negotiate_features(dev, features);
574 }
575 
576 static int
577 vtblk_maximum_segments(struct vtblk_softc *sc,
578     struct virtio_blk_config *blkcfg)
579 {
580 	device_t dev;
581 	int nsegs;
582 
583 	dev = sc->vtblk_dev;
584 	nsegs = VTBLK_MIN_SEGMENTS;
585 
586 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
587 		nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
588 		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
589 			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
590 	} else
591 		nsegs += 1;
592 
593 	return (nsegs);
594 }
595 
596 static int
597 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
598 {
599 	device_t dev;
600 	struct vq_alloc_info vq_info;
601 
602 	dev = sc->vtblk_dev;
603 
604 	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
605 	    vtblk_vq_intr, sc, &sc->vtblk_vq,
606 	    "%s request", device_get_nameunit(dev));
607 
608 	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
609 }
610 
611 static void
612 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
613 {
614 	device_t dev;
615 	struct disk *dp;
616 
617 	dev = sc->vtblk_dev;
618 
619 	sc->vtblk_disk = dp = disk_alloc();
620 	dp->d_open = vtblk_open;
621 	dp->d_close = vtblk_close;
622 	dp->d_ioctl = vtblk_ioctl;
623 	dp->d_strategy = vtblk_strategy;
624 	dp->d_name = VTBLK_DISK_NAME;
625 	dp->d_unit = sc->vtblk_unit;
626 	dp->d_drv1 = sc;
627 
628 	if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0)
629 		dp->d_dump = vtblk_dump;
630 
631 	/* Capacity is always in 512-byte units. */
632 	dp->d_mediasize = blkcfg->capacity * 512;
633 
634 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
635 		sc->vtblk_sector_size = blkcfg->blk_size;
636 	else
637 		sc->vtblk_sector_size = 512;
638 	dp->d_sectorsize = sc->vtblk_sector_size;
639 
640 	/*
641 	 * The VirtIO maximum I/O size is given in terms of segments.
642 	 * However, FreeBSD limits I/O size by logical buffer size, not
643 	 * by physically contiguous pages. Therefore, we have to assume
644 	 * no pages are contiguous. This may impose an artificially low
645 	 * maximum I/O size. But in practice, since QEMU advertises 128
646 	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
647 	 * which is typically greater than MAXPHYS. Eventually we should
648 	 * just advertise MAXPHYS and split buffers that are too big.
649 	 *
650 	 * Note we must subtract one additional segment in case of non
651 	 * page aligned buffers.
652 	 */
653 	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
654 	    PAGE_SIZE;
655 	if (dp->d_maxsize < PAGE_SIZE)
656 		dp->d_maxsize = PAGE_SIZE; /* XXX */
657 
658 	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
659 		dp->d_fwsectors = blkcfg->geometry.sectors;
660 		dp->d_fwheads = blkcfg->geometry.heads;
661 	}
662 
663 	if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
664 		dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
665 }
666 
667 static void
668 vtblk_create_disk(struct vtblk_softc *sc)
669 {
670 	struct disk *dp;
671 
672 	dp = sc->vtblk_disk;
673 
674 	/*
675 	 * Retrieving the identification string must be done after
676 	 * the virtqueue interrupt is setup otherwise it will hang.
677 	 */
678 	vtblk_get_ident(sc);
679 
680 	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
681 	    (uintmax_t) dp->d_mediasize >> 20,
682 	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
683 	    dp->d_sectorsize);
684 
685 	disk_create(dp, DISK_VERSION);
686 }
687 
688 static void
689 vtblk_startio(struct vtblk_softc *sc)
690 {
691 	struct virtqueue *vq;
692 	struct vtblk_request *req;
693 	int enq;
694 
695 	vq = sc->vtblk_vq;
696 	enq = 0;
697 
698 	VTBLK_LOCK_ASSERT(sc);
699 
700 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPENDED)
701 		return;
702 
703 	while (!virtqueue_full(vq)) {
704 		if ((req = vtblk_dequeue_ready(sc)) == NULL)
705 			req = vtblk_bio_request(sc);
706 		if (req == NULL)
707 			break;
708 
709 		if (vtblk_execute_request(sc, req) != 0) {
710 			vtblk_enqueue_ready(sc, req);
711 			break;
712 		}
713 
714 		enq++;
715 	}
716 
717 	if (enq > 0)
718 		virtqueue_notify(vq);
719 }
720 
721 static struct vtblk_request *
722 vtblk_bio_request(struct vtblk_softc *sc)
723 {
724 	struct bio_queue_head *bioq;
725 	struct vtblk_request *req;
726 	struct bio *bp;
727 
728 	bioq = &sc->vtblk_bioq;
729 
730 	if (bioq_first(bioq) == NULL)
731 		return (NULL);
732 
733 	req = vtblk_dequeue_request(sc);
734 	if (req == NULL)
735 		return (NULL);
736 
737 	bp = bioq_takefirst(bioq);
738 	req->vbr_bp = bp;
739 	req->vbr_ack = -1;
740 	req->vbr_hdr.ioprio = 1;
741 
742 	switch (bp->bio_cmd) {
743 	case BIO_FLUSH:
744 		req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
745 		break;
746 	case BIO_READ:
747 		req->vbr_hdr.type = VIRTIO_BLK_T_IN;
748 		req->vbr_hdr.sector = bp->bio_offset / 512;
749 		break;
750 	case BIO_WRITE:
751 		req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
752 		req->vbr_hdr.sector = bp->bio_offset / 512;
753 		break;
754 	default:
755 		KASSERT(0, ("bio with unhandled cmd: %d", bp->bio_cmd));
756 		req->vbr_hdr.type = -1;
757 		break;
758 	}
759 
760 	if (bp->bio_flags & BIO_ORDERED)
761 		req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
762 
763 	return (req);
764 }
765 
766 static int
767 vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
768 {
769 	struct sglist *sg;
770 	struct bio *bp;
771 	int writable, error;
772 
773 	sg = sc->vtblk_sglist;
774 	bp = req->vbr_bp;
775 	writable = 0;
776 
777 	VTBLK_LOCK_ASSERT(sc);
778 
779 	sglist_reset(sg);
780 	error = sglist_append(sg, &req->vbr_hdr,
781 	    sizeof(struct virtio_blk_outhdr));
782 	KASSERT(error == 0, ("error adding header to sglist"));
783 	KASSERT(sg->sg_nseg == 1,
784 	    ("header spanned multiple segments: %d", sg->sg_nseg));
785 
786 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
787 		error = sglist_append(sg, bp->bio_data, bp->bio_bcount);
788 		KASSERT(error == 0, ("error adding buffer to sglist"));
789 
790 		/* BIO_READ means the host writes into our buffer. */
791 		if (bp->bio_cmd == BIO_READ)
792 			writable += sg->sg_nseg - 1;
793 	}
794 
795 	error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
796 	KASSERT(error == 0, ("error adding ack to sglist"));
797 	writable++;
798 
799 	KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
800 	    ("fewer than min segments: %d", sg->sg_nseg));
801 
802 	error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
803 	    sg->sg_nseg - writable, writable);
804 
805 	return (error);
806 }
807 
808 static int
809 vtblk_vq_intr(void *xsc)
810 {
811 	struct vtblk_softc *sc;
812 
813 	sc = xsc;
814 
815 	virtqueue_disable_intr(sc->vtblk_vq);
816 	taskqueue_enqueue_fast(sc->vtblk_tq, &sc->vtblk_intr_task);
817 
818 	return (1);
819 }
820 
821 static void
822 vtblk_intr_task(void *arg, int pending)
823 {
824 	struct vtblk_softc *sc;
825 	struct vtblk_request *req;
826 	struct virtqueue *vq;
827 	struct bio *bp;
828 
829 	sc = arg;
830 	vq = sc->vtblk_vq;
831 
832 	VTBLK_LOCK(sc);
833 	if (sc->vtblk_flags & VTBLK_FLAG_DETACHING) {
834 		VTBLK_UNLOCK(sc);
835 		return;
836 	}
837 
838 	while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
839 		bp = req->vbr_bp;
840 
841 		if (req->vbr_ack == VIRTIO_BLK_S_OK)
842 			bp->bio_resid = 0;
843 		else {
844 			bp->bio_flags |= BIO_ERROR;
845 			if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP)
846 				bp->bio_error = ENOTSUP;
847 			else
848 				bp->bio_error = EIO;
849 		}
850 
851 		biodone(bp);
852 		vtblk_enqueue_request(sc, req);
853 	}
854 
855 	vtblk_startio(sc);
856 
857 	if (virtqueue_enable_intr(vq) != 0) {
858 		virtqueue_disable_intr(vq);
859 		VTBLK_UNLOCK(sc);
860 		taskqueue_enqueue_fast(sc->vtblk_tq,
861 		    &sc->vtblk_intr_task);
862 		return;
863 	}
864 
865 	VTBLK_UNLOCK(sc);
866 }
867 
868 static void
869 vtblk_stop(struct vtblk_softc *sc)
870 {
871 
872 	virtqueue_disable_intr(sc->vtblk_vq);
873 	virtio_stop(sc->vtblk_dev);
874 }
875 
876 static void
877 vtblk_get_ident(struct vtblk_softc *sc)
878 {
879 	struct bio buf;
880 	struct disk *dp;
881 	struct vtblk_request *req;
882 	int len, error;
883 
884 	dp = sc->vtblk_disk;
885 	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
886 
887 	if (vtblk_no_ident != 0)
888 		return;
889 
890 	req = vtblk_dequeue_request(sc);
891 	if (req == NULL)
892 		return;
893 
894 	req->vbr_ack = -1;
895 	req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID;
896 	req->vbr_hdr.ioprio = 1;
897 	req->vbr_hdr.sector = 0;
898 
899 	req->vbr_bp = &buf;
900 	bzero(&buf, sizeof(struct bio));
901 
902 	buf.bio_cmd = BIO_READ;
903 	buf.bio_data = dp->d_ident;
904 	buf.bio_bcount = len;
905 
906 	VTBLK_LOCK(sc);
907 	error = vtblk_poll_request(sc, req);
908 	vtblk_enqueue_request(sc, req);
909 	VTBLK_UNLOCK(sc);
910 
911 	if (error) {
912 		device_printf(sc->vtblk_dev,
913 		    "error getting device identifier: %d\n", error);
914 	}
915 }
916 
917 static void
918 vtblk_prepare_dump(struct vtblk_softc *sc)
919 {
920 	device_t dev;
921 	struct virtqueue *vq;
922 
923 	dev = sc->vtblk_dev;
924 	vq = sc->vtblk_vq;
925 
926 	vtblk_stop(sc);
927 
928 	/*
929 	 * Drain all requests caught in-flight in the virtqueue,
930 	 * skipping biodone(). When dumping, only one request is
931 	 * outstanding at a time, and we just poll the virtqueue
932 	 * for the response.
933 	 */
934 	vtblk_drain_vq(sc, 1);
935 
936 	if (virtio_reinit(dev, sc->vtblk_features) != 0)
937 		panic("cannot reinit VirtIO block device during dump");
938 
939 	virtqueue_disable_intr(vq);
940 	virtio_reinit_complete(dev);
941 }
942 
943 static int
944 vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset,
945     size_t length)
946 {
947 	struct bio buf;
948 	struct vtblk_request *req;
949 
950 	req = &sc->vtblk_dump_request;
951 	req->vbr_ack = -1;
952 	req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
953 	req->vbr_hdr.ioprio = 1;
954 	req->vbr_hdr.sector = offset / 512;
955 
956 	req->vbr_bp = &buf;
957 	bzero(&buf, sizeof(struct bio));
958 
959 	buf.bio_cmd = BIO_WRITE;
960 	buf.bio_data = virtual;
961 	buf.bio_bcount = length;
962 
963 	return (vtblk_poll_request(sc, req));
964 }
965 
966 static int
967 vtblk_flush_dump(struct vtblk_softc *sc)
968 {
969 	struct bio buf;
970 	struct vtblk_request *req;
971 
972 	req = &sc->vtblk_dump_request;
973 	req->vbr_ack = -1;
974 	req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
975 	req->vbr_hdr.ioprio = 1;
976 	req->vbr_hdr.sector = 0;
977 
978 	req->vbr_bp = &buf;
979 	bzero(&buf, sizeof(struct bio));
980 
981 	buf.bio_cmd = BIO_FLUSH;
982 
983 	return (vtblk_poll_request(sc, req));
984 }
985 
986 static int
987 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
988 {
989 	device_t dev;
990 	struct virtqueue *vq;
991 	struct vtblk_request *r;
992 	int error;
993 
994 	dev = sc->vtblk_dev;
995 	vq = sc->vtblk_vq;
996 
997 	if (!virtqueue_empty(vq))
998 		return (EBUSY);
999 
1000 	error = vtblk_execute_request(sc, req);
1001 	if (error)
1002 		return (error);
1003 
1004 	virtqueue_notify(vq);
1005 
1006 	r = virtqueue_poll(vq, NULL);
1007 	KASSERT(r == req, ("unexpected request response"));
1008 
1009 	if (req->vbr_ack != VIRTIO_BLK_S_OK) {
1010 		error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO;
1011 		if (bootverbose)
1012 			device_printf(dev,
1013 			    "vtblk_poll_request: IO error: %d\n", error);
1014 	}
1015 
1016 	return (error);
1017 }
1018 
1019 static void
1020 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
1021 {
1022 	struct virtqueue *vq;
1023 	struct vtblk_request *req;
1024 	int last;
1025 
1026 	vq = sc->vtblk_vq;
1027 	last = 0;
1028 
1029 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1030 		if (!skip_done)
1031 			vtblk_bio_error(req->vbr_bp, ENXIO);
1032 
1033 		vtblk_enqueue_request(sc, req);
1034 	}
1035 
1036 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1037 }
1038 
1039 static void
1040 vtblk_drain(struct vtblk_softc *sc)
1041 {
1042 	struct bio_queue_head *bioq;
1043 	struct vtblk_request *req;
1044 	struct bio *bp;
1045 
1046 	bioq = &sc->vtblk_bioq;
1047 
1048 	if (sc->vtblk_vq != NULL)
1049 		vtblk_drain_vq(sc, 0);
1050 
1051 	while ((req = vtblk_dequeue_ready(sc)) != NULL) {
1052 		vtblk_bio_error(req->vbr_bp, ENXIO);
1053 		vtblk_enqueue_request(sc, req);
1054 	}
1055 
1056 	while (bioq_first(bioq) != NULL) {
1057 		bp = bioq_takefirst(bioq);
1058 		vtblk_bio_error(bp, ENXIO);
1059 	}
1060 
1061 	vtblk_free_requests(sc);
1062 }
1063 
1064 static int
1065 vtblk_alloc_requests(struct vtblk_softc *sc)
1066 {
1067 	struct vtblk_request *req;
1068 	int i, size;
1069 
1070 	size = virtqueue_size(sc->vtblk_vq);
1071 
1072 	/*
1073 	 * Preallocate sufficient requests to keep the virtqueue full. Each
1074 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
1075 	 * the number allocated when indirect descriptors are not available.
1076 	 */
1077 	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
1078 		size /= VTBLK_MIN_SEGMENTS;
1079 
1080 	for (i = 0; i < size; i++) {
1081 		req = uma_zalloc(vtblk_req_zone, M_NOWAIT);
1082 		if (req == NULL)
1083 			return (ENOMEM);
1084 
1085 		sc->vtblk_request_count++;
1086 		vtblk_enqueue_request(sc, req);
1087 	}
1088 
1089 	return (0);
1090 }
1091 
1092 static void
1093 vtblk_free_requests(struct vtblk_softc *sc)
1094 {
1095 	struct vtblk_request *req;
1096 
1097 	while ((req = vtblk_dequeue_request(sc)) != NULL) {
1098 		sc->vtblk_request_count--;
1099 		uma_zfree(vtblk_req_zone, req);
1100 	}
1101 
1102 	KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
1103 }
1104 
1105 static struct vtblk_request *
1106 vtblk_dequeue_request(struct vtblk_softc *sc)
1107 {
1108 	struct vtblk_request *req;
1109 
1110 	req = TAILQ_FIRST(&sc->vtblk_req_free);
1111 	if (req != NULL)
1112 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
1113 
1114 	return (req);
1115 }
1116 
1117 static void
1118 vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
1119 {
1120 
1121 	bzero(req, sizeof(struct vtblk_request));
1122 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
1123 }
1124 
1125 static struct vtblk_request *
1126 vtblk_dequeue_ready(struct vtblk_softc *sc)
1127 {
1128 	struct vtblk_request *req;
1129 
1130 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
1131 	if (req != NULL)
1132 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
1133 
1134 	return (req);
1135 }
1136 
1137 static void
1138 vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
1139 {
1140 
1141 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
1142 }
1143 
1144 static void
1145 vtblk_bio_error(struct bio *bp, int error)
1146 {
1147 
1148 	biofinish(bp, NULL, error);
1149 }
1150