xref: /freebsd/sys/dev/virtio/block/virtio_blk.c (revision 0d972b25f64dc1f52aff3fe09bc62cbaf332df83)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO block devices. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/bio.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/sglist.h>
39 #include <sys/sysctl.h>
40 #include <sys/lock.h>
41 #include <sys/mutex.h>
42 #include <sys/queue.h>
43 
44 #include <geom/geom_disk.h>
45 
46 #include <machine/bus.h>
47 #include <machine/resource.h>
48 #include <sys/bus.h>
49 #include <sys/rman.h>
50 
51 #include <dev/virtio/virtio.h>
52 #include <dev/virtio/virtqueue.h>
53 #include <dev/virtio/block/virtio_blk.h>
54 
55 #include "virtio_if.h"
56 
57 struct vtblk_request {
58 	struct virtio_blk_outhdr	 vbr_hdr;
59 	struct bio			*vbr_bp;
60 	uint8_t				 vbr_ack;
61 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
62 };
63 
64 enum vtblk_cache_mode {
65 	VTBLK_CACHE_WRITETHROUGH,
66 	VTBLK_CACHE_WRITEBACK,
67 	VTBLK_CACHE_MAX
68 };
69 
70 struct vtblk_softc {
71 	device_t		 vtblk_dev;
72 	struct mtx		 vtblk_mtx;
73 	uint64_t		 vtblk_features;
74 	uint32_t		 vtblk_flags;
75 #define VTBLK_FLAG_INDIRECT	0x0001
76 #define VTBLK_FLAG_READONLY	0x0002
77 #define VTBLK_FLAG_DETACH	0x0004
78 #define VTBLK_FLAG_SUSPEND	0x0008
79 #define VTBLK_FLAG_BARRIER	0x0010
80 #define VTBLK_FLAG_WC_CONFIG	0x0020
81 
82 	struct virtqueue	*vtblk_vq;
83 	struct sglist		*vtblk_sglist;
84 	struct disk		*vtblk_disk;
85 
86 	struct bio_queue_head	 vtblk_bioq;
87 	TAILQ_HEAD(, vtblk_request)
88 				 vtblk_req_free;
89 	TAILQ_HEAD(, vtblk_request)
90 				 vtblk_req_ready;
91 	struct vtblk_request	*vtblk_req_ordered;
92 
93 	int			 vtblk_max_nsegs;
94 	int			 vtblk_request_count;
95 	enum vtblk_cache_mode	 vtblk_write_cache;
96 
97 	struct bio_queue	 vtblk_dump_queue;
98 	struct vtblk_request	 vtblk_dump_request;
99 };
100 
101 static struct virtio_feature_desc vtblk_feature_desc[] = {
102 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
103 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
104 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
105 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
106 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
107 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
108 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
109 	{ VIRTIO_BLK_F_WCE,		"WriteCache"	},
110 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
111 	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
112 
113 	{ 0, NULL }
114 };
115 
116 static int	vtblk_modevent(module_t, int, void *);
117 
118 static int	vtblk_probe(device_t);
119 static int	vtblk_attach(device_t);
120 static int	vtblk_detach(device_t);
121 static int	vtblk_suspend(device_t);
122 static int	vtblk_resume(device_t);
123 static int	vtblk_shutdown(device_t);
124 static int	vtblk_config_change(device_t);
125 
126 static int	vtblk_open(struct disk *);
127 static int	vtblk_close(struct disk *);
128 static int	vtblk_ioctl(struct disk *, u_long, void *, int,
129 		    struct thread *);
130 static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
131 static void	vtblk_strategy(struct bio *);
132 
133 static void	vtblk_negotiate_features(struct vtblk_softc *);
134 static void	vtblk_setup_features(struct vtblk_softc *);
135 static int	vtblk_maximum_segments(struct vtblk_softc *,
136 		    struct virtio_blk_config *);
137 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
138 static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
139 static void	vtblk_alloc_disk(struct vtblk_softc *,
140 		    struct virtio_blk_config *);
141 static void	vtblk_create_disk(struct vtblk_softc *);
142 
143 static int	vtblk_request_prealloc(struct vtblk_softc *);
144 static void	vtblk_request_free(struct vtblk_softc *);
145 static struct vtblk_request *
146 		vtblk_request_dequeue(struct vtblk_softc *);
147 static void	vtblk_request_enqueue(struct vtblk_softc *,
148 		    struct vtblk_request *);
149 static struct vtblk_request *
150 		vtblk_request_next_ready(struct vtblk_softc *);
151 static void	vtblk_request_requeue_ready(struct vtblk_softc *,
152 		    struct vtblk_request *);
153 static struct vtblk_request *
154 		vtblk_request_next(struct vtblk_softc *);
155 static struct vtblk_request *
156 		vtblk_request_bio(struct vtblk_softc *);
157 static int	vtblk_request_execute(struct vtblk_softc *,
158 		    struct vtblk_request *);
159 static int	vtblk_request_error(struct vtblk_request *);
160 
161 static void	vtblk_queue_completed(struct vtblk_softc *,
162 		    struct bio_queue *);
163 static void	vtblk_done_completed(struct vtblk_softc *,
164 		    struct bio_queue *);
165 static void	vtblk_drain_vq(struct vtblk_softc *);
166 static void	vtblk_drain(struct vtblk_softc *);
167 
168 static void	vtblk_startio(struct vtblk_softc *);
169 static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
170 
171 static void	vtblk_read_config(struct vtblk_softc *,
172 		    struct virtio_blk_config *);
173 static void	vtblk_ident(struct vtblk_softc *);
174 static int	vtblk_poll_request(struct vtblk_softc *,
175 		    struct vtblk_request *);
176 static int	vtblk_quiesce(struct vtblk_softc *);
177 static void	vtblk_vq_intr(void *);
178 static void	vtblk_stop(struct vtblk_softc *);
179 
180 static void	vtblk_dump_quiesce(struct vtblk_softc *);
181 static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
182 static int	vtblk_dump_flush(struct vtblk_softc *);
183 static void	vtblk_dump_complete(struct vtblk_softc *);
184 
185 static void	vtblk_set_write_cache(struct vtblk_softc *, int);
186 static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
187 		    struct virtio_blk_config *);
188 static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
189 
190 static void	vtblk_setup_sysctl(struct vtblk_softc *);
191 static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
192 
193 /* Tunables. */
194 static int vtblk_no_ident = 0;
195 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
196 static int vtblk_writecache_mode = -1;
197 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
198 
199 /* Features desired/implemented by this driver. */
200 #define VTBLK_FEATURES \
201     (VIRTIO_BLK_F_BARRIER		| \
202      VIRTIO_BLK_F_SIZE_MAX		| \
203      VIRTIO_BLK_F_SEG_MAX		| \
204      VIRTIO_BLK_F_GEOMETRY		| \
205      VIRTIO_BLK_F_RO			| \
206      VIRTIO_BLK_F_BLK_SIZE		| \
207      VIRTIO_BLK_F_WCE			| \
208      VIRTIO_BLK_F_CONFIG_WCE		| \
209      VIRTIO_RING_F_INDIRECT_DESC)
210 
211 #define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
212 #define VTBLK_LOCK_INIT(_sc, _name) \
213 				mtx_init(VTBLK_MTX((_sc)), (_name), \
214 				    "VirtIO Block Lock", MTX_DEF)
215 #define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
216 #define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
217 #define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
218 #define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
219 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
220 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
221 
222 #define VTBLK_DISK_NAME		"vtbd"
223 #define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
224 
225 /*
226  * Each block request uses at least two segments - one for the header
227  * and one for the status.
228  */
229 #define VTBLK_MIN_SEGMENTS	2
230 
231 static device_method_t vtblk_methods[] = {
232 	/* Device methods. */
233 	DEVMETHOD(device_probe,		vtblk_probe),
234 	DEVMETHOD(device_attach,	vtblk_attach),
235 	DEVMETHOD(device_detach,	vtblk_detach),
236 	DEVMETHOD(device_suspend,	vtblk_suspend),
237 	DEVMETHOD(device_resume,	vtblk_resume),
238 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
239 
240 	/* VirtIO methods. */
241 	DEVMETHOD(virtio_config_change,	vtblk_config_change),
242 
243 	DEVMETHOD_END
244 };
245 
246 static driver_t vtblk_driver = {
247 	"vtblk",
248 	vtblk_methods,
249 	sizeof(struct vtblk_softc)
250 };
251 static devclass_t vtblk_devclass;
252 
253 DRIVER_MODULE(virtio_blk, virtio_mmio, vtblk_driver, vtblk_devclass,
254     vtblk_modevent, 0);
255 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
256     vtblk_modevent, 0);
257 MODULE_VERSION(virtio_blk, 1);
258 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
259 
260 static int
261 vtblk_modevent(module_t mod, int type, void *unused)
262 {
263 	int error;
264 
265 	error = 0;
266 
267 	switch (type) {
268 	case MOD_LOAD:
269 	case MOD_QUIESCE:
270 	case MOD_UNLOAD:
271 	case MOD_SHUTDOWN:
272 		break;
273 	default:
274 		error = EOPNOTSUPP;
275 		break;
276 	}
277 
278 	return (error);
279 }
280 
281 static int
282 vtblk_probe(device_t dev)
283 {
284 
285 	if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
286 		return (ENXIO);
287 
288 	device_set_desc(dev, "VirtIO Block Adapter");
289 
290 	return (BUS_PROBE_DEFAULT);
291 }
292 
293 static int
294 vtblk_attach(device_t dev)
295 {
296 	struct vtblk_softc *sc;
297 	struct virtio_blk_config blkcfg;
298 	int error;
299 
300 	virtio_set_feature_desc(dev, vtblk_feature_desc);
301 
302 	sc = device_get_softc(dev);
303 	sc->vtblk_dev = dev;
304 	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
305 	bioq_init(&sc->vtblk_bioq);
306 	TAILQ_INIT(&sc->vtblk_dump_queue);
307 	TAILQ_INIT(&sc->vtblk_req_free);
308 	TAILQ_INIT(&sc->vtblk_req_ready);
309 
310 	vtblk_setup_sysctl(sc);
311 	vtblk_setup_features(sc);
312 
313 	vtblk_read_config(sc, &blkcfg);
314 
315 	/*
316 	 * With the current sglist(9) implementation, it is not easy
317 	 * for us to support a maximum segment size as adjacent
318 	 * segments are coalesced. For now, just make sure it's larger
319 	 * than the maximum supported transfer size.
320 	 */
321 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
322 		if (blkcfg.size_max < MAXPHYS) {
323 			error = ENOTSUP;
324 			device_printf(dev, "host requires unsupported "
325 			    "maximum segment size feature\n");
326 			goto fail;
327 		}
328 	}
329 
330 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
331 	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
332 		error = EINVAL;
333 		device_printf(dev, "fewer than minimum number of segments "
334 		    "allowed: %d\n", sc->vtblk_max_nsegs);
335 		goto fail;
336 	}
337 
338 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
339 	if (sc->vtblk_sglist == NULL) {
340 		error = ENOMEM;
341 		device_printf(dev, "cannot allocate sglist\n");
342 		goto fail;
343 	}
344 
345 	error = vtblk_alloc_virtqueue(sc);
346 	if (error) {
347 		device_printf(dev, "cannot allocate virtqueue\n");
348 		goto fail;
349 	}
350 
351 	error = vtblk_request_prealloc(sc);
352 	if (error) {
353 		device_printf(dev, "cannot preallocate requests\n");
354 		goto fail;
355 	}
356 
357 	vtblk_alloc_disk(sc, &blkcfg);
358 
359 	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
360 	if (error) {
361 		device_printf(dev, "cannot setup virtqueue interrupt\n");
362 		goto fail;
363 	}
364 
365 	vtblk_create_disk(sc);
366 
367 	virtqueue_enable_intr(sc->vtblk_vq);
368 
369 fail:
370 	if (error)
371 		vtblk_detach(dev);
372 
373 	return (error);
374 }
375 
376 static int
377 vtblk_detach(device_t dev)
378 {
379 	struct vtblk_softc *sc;
380 
381 	sc = device_get_softc(dev);
382 
383 	VTBLK_LOCK(sc);
384 	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
385 	if (device_is_attached(dev))
386 		vtblk_stop(sc);
387 	VTBLK_UNLOCK(sc);
388 
389 	vtblk_drain(sc);
390 
391 	if (sc->vtblk_disk != NULL) {
392 		disk_destroy(sc->vtblk_disk);
393 		sc->vtblk_disk = NULL;
394 	}
395 
396 	if (sc->vtblk_sglist != NULL) {
397 		sglist_free(sc->vtblk_sglist);
398 		sc->vtblk_sglist = NULL;
399 	}
400 
401 	VTBLK_LOCK_DESTROY(sc);
402 
403 	return (0);
404 }
405 
406 static int
407 vtblk_suspend(device_t dev)
408 {
409 	struct vtblk_softc *sc;
410 	int error;
411 
412 	sc = device_get_softc(dev);
413 
414 	VTBLK_LOCK(sc);
415 	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
416 	/* XXX BMV: virtio_stop(), etc needed here? */
417 	error = vtblk_quiesce(sc);
418 	if (error)
419 		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
420 	VTBLK_UNLOCK(sc);
421 
422 	return (error);
423 }
424 
425 static int
426 vtblk_resume(device_t dev)
427 {
428 	struct vtblk_softc *sc;
429 
430 	sc = device_get_softc(dev);
431 
432 	VTBLK_LOCK(sc);
433 	/* XXX BMV: virtio_reinit(), etc needed here? */
434 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
435 	vtblk_startio(sc);
436 	VTBLK_UNLOCK(sc);
437 
438 	return (0);
439 }
440 
441 static int
442 vtblk_shutdown(device_t dev)
443 {
444 
445 	return (0);
446 }
447 
448 static int
449 vtblk_config_change(device_t dev)
450 {
451 	struct vtblk_softc *sc;
452 	struct virtio_blk_config blkcfg;
453 	uint64_t capacity;
454 
455 	sc = device_get_softc(dev);
456 
457 	vtblk_read_config(sc, &blkcfg);
458 
459 	/* Capacity is always in 512-byte units. */
460 	capacity = blkcfg.capacity * 512;
461 
462 	if (sc->vtblk_disk->d_mediasize != capacity)
463 		vtblk_resize_disk(sc, capacity);
464 
465 	return (0);
466 }
467 
468 static int
469 vtblk_open(struct disk *dp)
470 {
471 	struct vtblk_softc *sc;
472 
473 	if ((sc = dp->d_drv1) == NULL)
474 		return (ENXIO);
475 
476 	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
477 }
478 
479 static int
480 vtblk_close(struct disk *dp)
481 {
482 	struct vtblk_softc *sc;
483 
484 	if ((sc = dp->d_drv1) == NULL)
485 		return (ENXIO);
486 
487 	return (0);
488 }
489 
490 static int
491 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
492     struct thread *td)
493 {
494 	struct vtblk_softc *sc;
495 
496 	if ((sc = dp->d_drv1) == NULL)
497 		return (ENXIO);
498 
499 	return (ENOTTY);
500 }
501 
502 static int
503 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
504     size_t length)
505 {
506 	struct disk *dp;
507 	struct vtblk_softc *sc;
508 	int error;
509 
510 	dp = arg;
511 	error = 0;
512 
513 	if ((sc = dp->d_drv1) == NULL)
514 		return (ENXIO);
515 
516 	VTBLK_LOCK(sc);
517 
518 	vtblk_dump_quiesce(sc);
519 
520 	if (length > 0)
521 		error = vtblk_dump_write(sc, virtual, offset, length);
522 	if (error || (virtual == NULL && offset == 0))
523 		vtblk_dump_complete(sc);
524 
525 	VTBLK_UNLOCK(sc);
526 
527 	return (error);
528 }
529 
530 static void
531 vtblk_strategy(struct bio *bp)
532 {
533 	struct vtblk_softc *sc;
534 
535 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
536 		vtblk_bio_done(NULL, bp, EINVAL);
537 		return;
538 	}
539 
540 	/*
541 	 * Fail any write if RO. Unfortunately, there does not seem to
542 	 * be a better way to report our readonly'ness to GEOM above.
543 	 */
544 	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
545 	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
546 		vtblk_bio_done(sc, bp, EROFS);
547 		return;
548 	}
549 
550 	VTBLK_LOCK(sc);
551 
552 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
553 		VTBLK_UNLOCK(sc);
554 		vtblk_bio_done(sc, bp, ENXIO);
555 		return;
556 	}
557 
558 	bioq_insert_tail(&sc->vtblk_bioq, bp);
559 	vtblk_startio(sc);
560 
561 	VTBLK_UNLOCK(sc);
562 }
563 
564 static void
565 vtblk_negotiate_features(struct vtblk_softc *sc)
566 {
567 	device_t dev;
568 	uint64_t features;
569 
570 	dev = sc->vtblk_dev;
571 	features = VTBLK_FEATURES;
572 
573 	sc->vtblk_features = virtio_negotiate_features(dev, features);
574 }
575 
576 static void
577 vtblk_setup_features(struct vtblk_softc *sc)
578 {
579 	device_t dev;
580 
581 	dev = sc->vtblk_dev;
582 
583 	vtblk_negotiate_features(sc);
584 
585 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
586 		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
587 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
588 		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
589 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
590 		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
591 	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
592 		sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
593 }
594 
595 static int
596 vtblk_maximum_segments(struct vtblk_softc *sc,
597     struct virtio_blk_config *blkcfg)
598 {
599 	device_t dev;
600 	int nsegs;
601 
602 	dev = sc->vtblk_dev;
603 	nsegs = VTBLK_MIN_SEGMENTS;
604 
605 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
606 		nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
607 		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
608 			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
609 	} else
610 		nsegs += 1;
611 
612 	return (nsegs);
613 }
614 
615 static int
616 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
617 {
618 	device_t dev;
619 	struct vq_alloc_info vq_info;
620 
621 	dev = sc->vtblk_dev;
622 
623 	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
624 	    vtblk_vq_intr, sc, &sc->vtblk_vq,
625 	    "%s request", device_get_nameunit(dev));
626 
627 	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
628 }
629 
630 static void
631 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
632 {
633 	device_t dev;
634 	struct disk *dp;
635 	int error;
636 
637 	dev = sc->vtblk_dev;
638 	dp = sc->vtblk_disk;
639 
640 	dp->d_mediasize = new_capacity;
641 	if (bootverbose) {
642 		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
643 		    (uintmax_t) dp->d_mediasize >> 20,
644 		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
645 		    dp->d_sectorsize);
646 	}
647 
648 	error = disk_resize(dp, M_NOWAIT);
649 	if (error) {
650 		device_printf(dev,
651 		    "disk_resize(9) failed, error: %d\n", error);
652 	}
653 }
654 
655 static void
656 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
657 {
658 	device_t dev;
659 	struct disk *dp;
660 
661 	dev = sc->vtblk_dev;
662 
663 	sc->vtblk_disk = dp = disk_alloc();
664 	dp->d_open = vtblk_open;
665 	dp->d_close = vtblk_close;
666 	dp->d_ioctl = vtblk_ioctl;
667 	dp->d_strategy = vtblk_strategy;
668 	dp->d_name = VTBLK_DISK_NAME;
669 	dp->d_unit = device_get_unit(dev);
670 	dp->d_drv1 = sc;
671 	dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO |
672 	    DISKFLAG_DIRECT_COMPLETION;
673 	dp->d_hba_vendor = virtio_get_vendor(dev);
674 	dp->d_hba_device = virtio_get_device(dev);
675 	dp->d_hba_subvendor = virtio_get_subvendor(dev);
676 	dp->d_hba_subdevice = virtio_get_subdevice(dev);
677 
678 	if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0)
679 		dp->d_dump = vtblk_dump;
680 
681 	/* Capacity is always in 512-byte units. */
682 	dp->d_mediasize = blkcfg->capacity * 512;
683 
684 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
685 		dp->d_sectorsize = blkcfg->blk_size;
686 	else
687 		dp->d_sectorsize = 512;
688 
689 	/*
690 	 * The VirtIO maximum I/O size is given in terms of segments.
691 	 * However, FreeBSD limits I/O size by logical buffer size, not
692 	 * by physically contiguous pages. Therefore, we have to assume
693 	 * no pages are contiguous. This may impose an artificially low
694 	 * maximum I/O size. But in practice, since QEMU advertises 128
695 	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
696 	 * which is typically greater than MAXPHYS. Eventually we should
697 	 * just advertise MAXPHYS and split buffers that are too big.
698 	 *
699 	 * Note we must subtract one additional segment in case of non
700 	 * page aligned buffers.
701 	 */
702 	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
703 	    PAGE_SIZE;
704 	if (dp->d_maxsize < PAGE_SIZE)
705 		dp->d_maxsize = PAGE_SIZE; /* XXX */
706 
707 	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
708 		dp->d_fwsectors = blkcfg->geometry.sectors;
709 		dp->d_fwheads = blkcfg->geometry.heads;
710 	}
711 
712 	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY)) {
713 		dp->d_stripesize = dp->d_sectorsize *
714 		    (1 << blkcfg->topology.physical_block_exp);
715 		dp->d_stripeoffset = (dp->d_stripesize -
716 		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
717 		    dp->d_stripesize;
718 	}
719 
720 	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
721 		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
722 	else
723 		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
724 }
725 
726 static void
727 vtblk_create_disk(struct vtblk_softc *sc)
728 {
729 	struct disk *dp;
730 
731 	dp = sc->vtblk_disk;
732 
733 	vtblk_ident(sc);
734 
735 	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
736 	    (uintmax_t) dp->d_mediasize >> 20,
737 	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
738 	    dp->d_sectorsize);
739 
740 	disk_create(dp, DISK_VERSION);
741 }
742 
743 static int
744 vtblk_request_prealloc(struct vtblk_softc *sc)
745 {
746 	struct vtblk_request *req;
747 	int i, nreqs;
748 
749 	nreqs = virtqueue_size(sc->vtblk_vq);
750 
751 	/*
752 	 * Preallocate sufficient requests to keep the virtqueue full. Each
753 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
754 	 * the number allocated when indirect descriptors are not available.
755 	 */
756 	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
757 		nreqs /= VTBLK_MIN_SEGMENTS;
758 
759 	for (i = 0; i < nreqs; i++) {
760 		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
761 		if (req == NULL)
762 			return (ENOMEM);
763 
764 		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
765 		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
766 
767 		sc->vtblk_request_count++;
768 		vtblk_request_enqueue(sc, req);
769 	}
770 
771 	return (0);
772 }
773 
774 static void
775 vtblk_request_free(struct vtblk_softc *sc)
776 {
777 	struct vtblk_request *req;
778 
779 	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
780 
781 	while ((req = vtblk_request_dequeue(sc)) != NULL) {
782 		sc->vtblk_request_count--;
783 		free(req, M_DEVBUF);
784 	}
785 
786 	KASSERT(sc->vtblk_request_count == 0,
787 	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
788 }
789 
790 static struct vtblk_request *
791 vtblk_request_dequeue(struct vtblk_softc *sc)
792 {
793 	struct vtblk_request *req;
794 
795 	req = TAILQ_FIRST(&sc->vtblk_req_free);
796 	if (req != NULL) {
797 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
798 		bzero(req, sizeof(struct vtblk_request));
799 	}
800 
801 	return (req);
802 }
803 
804 static void
805 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
806 {
807 
808 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
809 }
810 
811 static struct vtblk_request *
812 vtblk_request_next_ready(struct vtblk_softc *sc)
813 {
814 	struct vtblk_request *req;
815 
816 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
817 	if (req != NULL)
818 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
819 
820 	return (req);
821 }
822 
823 static void
824 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
825 {
826 
827 	/* NOTE: Currently, there will be at most one request in the queue. */
828 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
829 }
830 
831 static struct vtblk_request *
832 vtblk_request_next(struct vtblk_softc *sc)
833 {
834 	struct vtblk_request *req;
835 
836 	req = vtblk_request_next_ready(sc);
837 	if (req != NULL)
838 		return (req);
839 
840 	return (vtblk_request_bio(sc));
841 }
842 
843 static struct vtblk_request *
844 vtblk_request_bio(struct vtblk_softc *sc)
845 {
846 	struct bio_queue_head *bioq;
847 	struct vtblk_request *req;
848 	struct bio *bp;
849 
850 	bioq = &sc->vtblk_bioq;
851 
852 	if (bioq_first(bioq) == NULL)
853 		return (NULL);
854 
855 	req = vtblk_request_dequeue(sc);
856 	if (req == NULL)
857 		return (NULL);
858 
859 	bp = bioq_takefirst(bioq);
860 	req->vbr_bp = bp;
861 	req->vbr_ack = -1;
862 	req->vbr_hdr.ioprio = 1;
863 
864 	switch (bp->bio_cmd) {
865 	case BIO_FLUSH:
866 		req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
867 		break;
868 	case BIO_READ:
869 		req->vbr_hdr.type = VIRTIO_BLK_T_IN;
870 		req->vbr_hdr.sector = bp->bio_offset / 512;
871 		break;
872 	case BIO_WRITE:
873 		req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
874 		req->vbr_hdr.sector = bp->bio_offset / 512;
875 		break;
876 	default:
877 		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
878 	}
879 
880 	if (bp->bio_flags & BIO_ORDERED)
881 		req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
882 
883 	return (req);
884 }
885 
886 static int
887 vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
888 {
889 	struct virtqueue *vq;
890 	struct sglist *sg;
891 	struct bio *bp;
892 	int ordered, readable, writable, error;
893 
894 	vq = sc->vtblk_vq;
895 	sg = sc->vtblk_sglist;
896 	bp = req->vbr_bp;
897 	ordered = 0;
898 	writable = 0;
899 
900 	/*
901 	 * Some hosts (such as bhyve) do not implement the barrier feature,
902 	 * so we emulate it in the driver by allowing the barrier request
903 	 * to be the only one in flight.
904 	 */
905 	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
906 		if (sc->vtblk_req_ordered != NULL)
907 			return (EBUSY);
908 		if (bp->bio_flags & BIO_ORDERED) {
909 			if (!virtqueue_empty(vq))
910 				return (EBUSY);
911 			ordered = 1;
912 			req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER;
913 		}
914 	}
915 
916 	sglist_reset(sg);
917 	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
918 
919 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
920 		error = sglist_append_bio(sg, bp);
921 		if (error || sg->sg_nseg == sg->sg_maxseg) {
922 			panic("%s: bio %p data buffer too big %d",
923 			    __func__, bp, error);
924 		}
925 
926 		/* BIO_READ means the host writes into our buffer. */
927 		if (bp->bio_cmd == BIO_READ)
928 			writable = sg->sg_nseg - 1;
929 	}
930 
931 	writable++;
932 	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
933 	readable = sg->sg_nseg - writable;
934 
935 	error = virtqueue_enqueue(vq, req, sg, readable, writable);
936 	if (error == 0 && ordered)
937 		sc->vtblk_req_ordered = req;
938 
939 	return (error);
940 }
941 
942 static int
943 vtblk_request_error(struct vtblk_request *req)
944 {
945 	int error;
946 
947 	switch (req->vbr_ack) {
948 	case VIRTIO_BLK_S_OK:
949 		error = 0;
950 		break;
951 	case VIRTIO_BLK_S_UNSUPP:
952 		error = ENOTSUP;
953 		break;
954 	default:
955 		error = EIO;
956 		break;
957 	}
958 
959 	return (error);
960 }
961 
962 static void
963 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
964 {
965 	struct vtblk_request *req;
966 	struct bio *bp;
967 
968 	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
969 		if (sc->vtblk_req_ordered != NULL) {
970 			MPASS(sc->vtblk_req_ordered == req);
971 			sc->vtblk_req_ordered = NULL;
972 		}
973 
974 		bp = req->vbr_bp;
975 		bp->bio_error = vtblk_request_error(req);
976 		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
977 
978 		vtblk_request_enqueue(sc, req);
979 	}
980 }
981 
982 static void
983 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
984 {
985 	struct bio *bp, *tmp;
986 
987 	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
988 		if (bp->bio_error != 0)
989 			disk_err(bp, "hard error", -1, 1);
990 		vtblk_bio_done(sc, bp, bp->bio_error);
991 	}
992 }
993 
994 static void
995 vtblk_drain_vq(struct vtblk_softc *sc)
996 {
997 	struct virtqueue *vq;
998 	struct vtblk_request *req;
999 	int last;
1000 
1001 	vq = sc->vtblk_vq;
1002 	last = 0;
1003 
1004 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1005 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1006 		vtblk_request_enqueue(sc, req);
1007 	}
1008 
1009 	sc->vtblk_req_ordered = NULL;
1010 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1011 }
1012 
1013 static void
1014 vtblk_drain(struct vtblk_softc *sc)
1015 {
1016 	struct bio_queue queue;
1017 	struct bio_queue_head *bioq;
1018 	struct vtblk_request *req;
1019 	struct bio *bp;
1020 
1021 	bioq = &sc->vtblk_bioq;
1022 	TAILQ_INIT(&queue);
1023 
1024 	if (sc->vtblk_vq != NULL) {
1025 		vtblk_queue_completed(sc, &queue);
1026 		vtblk_done_completed(sc, &queue);
1027 
1028 		vtblk_drain_vq(sc);
1029 	}
1030 
1031 	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1032 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1033 		vtblk_request_enqueue(sc, req);
1034 	}
1035 
1036 	while (bioq_first(bioq) != NULL) {
1037 		bp = bioq_takefirst(bioq);
1038 		vtblk_bio_done(sc, bp, ENXIO);
1039 	}
1040 
1041 	vtblk_request_free(sc);
1042 }
1043 
1044 static void
1045 vtblk_startio(struct vtblk_softc *sc)
1046 {
1047 	struct virtqueue *vq;
1048 	struct vtblk_request *req;
1049 	int enq;
1050 
1051 	VTBLK_LOCK_ASSERT(sc);
1052 	vq = sc->vtblk_vq;
1053 	enq = 0;
1054 
1055 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1056 		return;
1057 
1058 	while (!virtqueue_full(vq)) {
1059 		req = vtblk_request_next(sc);
1060 		if (req == NULL)
1061 			break;
1062 
1063 		if (vtblk_request_execute(sc, req) != 0) {
1064 			vtblk_request_requeue_ready(sc, req);
1065 			break;
1066 		}
1067 
1068 		enq++;
1069 	}
1070 
1071 	if (enq > 0)
1072 		virtqueue_notify(vq);
1073 }
1074 
1075 static void
1076 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1077 {
1078 
1079 	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1080 	if (sc != NULL)
1081 		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1082 
1083 	if (error) {
1084 		bp->bio_resid = bp->bio_bcount;
1085 		bp->bio_error = error;
1086 		bp->bio_flags |= BIO_ERROR;
1087 	}
1088 
1089 	biodone(bp);
1090 }
1091 
1092 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1093 	if (virtio_with_feature(_dev, _feature)) {			\
1094 		virtio_read_device_config(_dev,				\
1095 		    offsetof(struct virtio_blk_config, _field),		\
1096 		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1097 	}
1098 
1099 static void
1100 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1101 {
1102 	device_t dev;
1103 
1104 	dev = sc->vtblk_dev;
1105 
1106 	bzero(blkcfg, sizeof(struct virtio_blk_config));
1107 
1108 	/* The capacity is always available. */
1109 	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1110 	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1111 
1112 	/* Read the configuration if the feature was negotiated. */
1113 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1114 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1115 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg);
1116 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1117 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg);
1118 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg);
1119 }
1120 
1121 #undef VTBLK_GET_CONFIG
1122 
1123 static void
1124 vtblk_ident(struct vtblk_softc *sc)
1125 {
1126 	struct bio buf;
1127 	struct disk *dp;
1128 	struct vtblk_request *req;
1129 	int len, error;
1130 
1131 	dp = sc->vtblk_disk;
1132 	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1133 
1134 	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1135 		return;
1136 
1137 	req = vtblk_request_dequeue(sc);
1138 	if (req == NULL)
1139 		return;
1140 
1141 	req->vbr_ack = -1;
1142 	req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID;
1143 	req->vbr_hdr.ioprio = 1;
1144 	req->vbr_hdr.sector = 0;
1145 
1146 	req->vbr_bp = &buf;
1147 	bzero(&buf, sizeof(struct bio));
1148 
1149 	buf.bio_cmd = BIO_READ;
1150 	buf.bio_data = dp->d_ident;
1151 	buf.bio_bcount = len;
1152 
1153 	VTBLK_LOCK(sc);
1154 	error = vtblk_poll_request(sc, req);
1155 	VTBLK_UNLOCK(sc);
1156 
1157 	vtblk_request_enqueue(sc, req);
1158 
1159 	if (error) {
1160 		device_printf(sc->vtblk_dev,
1161 		    "error getting device identifier: %d\n", error);
1162 	}
1163 }
1164 
1165 static int
1166 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1167 {
1168 	struct virtqueue *vq;
1169 	int error;
1170 
1171 	vq = sc->vtblk_vq;
1172 
1173 	if (!virtqueue_empty(vq))
1174 		return (EBUSY);
1175 
1176 	error = vtblk_request_execute(sc, req);
1177 	if (error)
1178 		return (error);
1179 
1180 	virtqueue_notify(vq);
1181 	virtqueue_poll(vq, NULL);
1182 
1183 	error = vtblk_request_error(req);
1184 	if (error && bootverbose) {
1185 		device_printf(sc->vtblk_dev,
1186 		    "%s: IO error: %d\n", __func__, error);
1187 	}
1188 
1189 	return (error);
1190 }
1191 
1192 static int
1193 vtblk_quiesce(struct vtblk_softc *sc)
1194 {
1195 	int error;
1196 
1197 	VTBLK_LOCK_ASSERT(sc);
1198 	error = 0;
1199 
1200 	while (!virtqueue_empty(sc->vtblk_vq)) {
1201 		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1202 		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1203 			error = EBUSY;
1204 			break;
1205 		}
1206 	}
1207 
1208 	return (error);
1209 }
1210 
1211 static void
1212 vtblk_vq_intr(void *xsc)
1213 {
1214 	struct vtblk_softc *sc;
1215 	struct virtqueue *vq;
1216 	struct bio_queue queue;
1217 
1218 	sc = xsc;
1219 	vq = sc->vtblk_vq;
1220 	TAILQ_INIT(&queue);
1221 
1222 	VTBLK_LOCK(sc);
1223 
1224 again:
1225 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1226 		goto out;
1227 
1228 	vtblk_queue_completed(sc, &queue);
1229 	vtblk_startio(sc);
1230 
1231 	if (virtqueue_enable_intr(vq) != 0) {
1232 		virtqueue_disable_intr(vq);
1233 		goto again;
1234 	}
1235 
1236 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1237 		wakeup(&sc->vtblk_vq);
1238 
1239 out:
1240 	VTBLK_UNLOCK(sc);
1241 	vtblk_done_completed(sc, &queue);
1242 }
1243 
1244 static void
1245 vtblk_stop(struct vtblk_softc *sc)
1246 {
1247 
1248 	virtqueue_disable_intr(sc->vtblk_vq);
1249 	virtio_stop(sc->vtblk_dev);
1250 }
1251 
1252 static void
1253 vtblk_dump_quiesce(struct vtblk_softc *sc)
1254 {
1255 
1256 	/*
1257 	 * Spin here until all the requests in-flight at the time of the
1258 	 * dump are completed and queued. The queued requests will be
1259 	 * biodone'd once the dump is finished.
1260 	 */
1261 	while (!virtqueue_empty(sc->vtblk_vq))
1262 		vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1263 }
1264 
1265 static int
1266 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1267     size_t length)
1268 {
1269 	struct bio buf;
1270 	struct vtblk_request *req;
1271 
1272 	req = &sc->vtblk_dump_request;
1273 	req->vbr_ack = -1;
1274 	req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
1275 	req->vbr_hdr.ioprio = 1;
1276 	req->vbr_hdr.sector = offset / 512;
1277 
1278 	req->vbr_bp = &buf;
1279 	bzero(&buf, sizeof(struct bio));
1280 
1281 	buf.bio_cmd = BIO_WRITE;
1282 	buf.bio_data = virtual;
1283 	buf.bio_bcount = length;
1284 
1285 	return (vtblk_poll_request(sc, req));
1286 }
1287 
1288 static int
1289 vtblk_dump_flush(struct vtblk_softc *sc)
1290 {
1291 	struct bio buf;
1292 	struct vtblk_request *req;
1293 
1294 	req = &sc->vtblk_dump_request;
1295 	req->vbr_ack = -1;
1296 	req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
1297 	req->vbr_hdr.ioprio = 1;
1298 	req->vbr_hdr.sector = 0;
1299 
1300 	req->vbr_bp = &buf;
1301 	bzero(&buf, sizeof(struct bio));
1302 
1303 	buf.bio_cmd = BIO_FLUSH;
1304 
1305 	return (vtblk_poll_request(sc, req));
1306 }
1307 
1308 static void
1309 vtblk_dump_complete(struct vtblk_softc *sc)
1310 {
1311 
1312 	vtblk_dump_flush(sc);
1313 
1314 	VTBLK_UNLOCK(sc);
1315 	vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1316 	VTBLK_LOCK(sc);
1317 }
1318 
1319 static void
1320 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1321 {
1322 
1323 	/* Set either writeback (1) or writethrough (0) mode. */
1324 	virtio_write_dev_config_1(sc->vtblk_dev,
1325 	    offsetof(struct virtio_blk_config, writeback), wc);
1326 }
1327 
1328 static int
1329 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1330     struct virtio_blk_config *blkcfg)
1331 {
1332 	int wc;
1333 
1334 	if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) {
1335 		wc = vtblk_tunable_int(sc, "writecache_mode",
1336 		    vtblk_writecache_mode);
1337 		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1338 			vtblk_set_write_cache(sc, wc);
1339 		else
1340 			wc = blkcfg->writeback;
1341 	} else
1342 		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
1343 
1344 	return (wc);
1345 }
1346 
1347 static int
1348 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1349 {
1350 	struct vtblk_softc *sc;
1351 	int wc, error;
1352 
1353 	sc = oidp->oid_arg1;
1354 	wc = sc->vtblk_write_cache;
1355 
1356 	error = sysctl_handle_int(oidp, &wc, 0, req);
1357 	if (error || req->newptr == NULL)
1358 		return (error);
1359 	if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0)
1360 		return (EPERM);
1361 	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1362 		return (EINVAL);
1363 
1364 	VTBLK_LOCK(sc);
1365 	sc->vtblk_write_cache = wc;
1366 	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1367 	VTBLK_UNLOCK(sc);
1368 
1369 	return (0);
1370 }
1371 
1372 static void
1373 vtblk_setup_sysctl(struct vtblk_softc *sc)
1374 {
1375 	device_t dev;
1376 	struct sysctl_ctx_list *ctx;
1377 	struct sysctl_oid *tree;
1378 	struct sysctl_oid_list *child;
1379 
1380 	dev = sc->vtblk_dev;
1381 	ctx = device_get_sysctl_ctx(dev);
1382 	tree = device_get_sysctl_tree(dev);
1383 	child = SYSCTL_CHILDREN(tree);
1384 
1385 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1386 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl,
1387 	    "I", "Write cache mode (writethrough (0) or writeback (1))");
1388 }
1389 
1390 static int
1391 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1392 {
1393 	char path[64];
1394 
1395 	snprintf(path, sizeof(path),
1396 	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1397 	TUNABLE_INT_FETCH(path, &def);
1398 
1399 	return (def);
1400 }
1401