xref: /freebsd/sys/dev/virtio/block/virtio_blk.c (revision bbb29a3c0f2c4565eff6fda70426807b6ed97f8b)
1 /*-
2  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice unmodified, this list of conditions, and the following
10  *    disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* Driver for VirtIO block devices. */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/bio.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/sglist.h>
39 #include <sys/sysctl.h>
40 #include <sys/lock.h>
41 #include <sys/mutex.h>
42 #include <sys/queue.h>
43 
44 #include <geom/geom_disk.h>
45 
46 #include <machine/bus.h>
47 #include <machine/resource.h>
48 #include <sys/bus.h>
49 #include <sys/rman.h>
50 
51 #include <dev/virtio/virtio.h>
52 #include <dev/virtio/virtqueue.h>
53 #include <dev/virtio/block/virtio_blk.h>
54 
55 #include "virtio_if.h"
56 
57 struct vtblk_request {
58 	struct virtio_blk_outhdr	 vbr_hdr;
59 	struct bio			*vbr_bp;
60 	uint8_t				 vbr_ack;
61 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
62 };
63 
64 enum vtblk_cache_mode {
65 	VTBLK_CACHE_WRITETHROUGH,
66 	VTBLK_CACHE_WRITEBACK,
67 	VTBLK_CACHE_MAX
68 };
69 
70 struct vtblk_softc {
71 	device_t		 vtblk_dev;
72 	struct mtx		 vtblk_mtx;
73 	uint64_t		 vtblk_features;
74 	uint32_t		 vtblk_flags;
75 #define VTBLK_FLAG_INDIRECT	0x0001
76 #define VTBLK_FLAG_READONLY	0x0002
77 #define VTBLK_FLAG_DETACH	0x0004
78 #define VTBLK_FLAG_SUSPEND	0x0008
79 #define VTBLK_FLAG_DUMPING	0x0010
80 #define VTBLK_FLAG_BARRIER	0x0020
81 #define VTBLK_FLAG_WC_CONFIG	0x0040
82 
83 	struct virtqueue	*vtblk_vq;
84 	struct sglist		*vtblk_sglist;
85 	struct disk		*vtblk_disk;
86 
87 	struct bio_queue_head	 vtblk_bioq;
88 	TAILQ_HEAD(, vtblk_request)
89 				 vtblk_req_free;
90 	TAILQ_HEAD(, vtblk_request)
91 				 vtblk_req_ready;
92 	struct vtblk_request	*vtblk_req_ordered;
93 
94 	int			 vtblk_max_nsegs;
95 	int			 vtblk_request_count;
96 	enum vtblk_cache_mode	 vtblk_write_cache;
97 
98 	struct vtblk_request	 vtblk_dump_request;
99 };
100 
101 static struct virtio_feature_desc vtblk_feature_desc[] = {
102 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
103 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
104 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
105 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
106 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
107 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
108 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
109 	{ VIRTIO_BLK_F_WCE,		"WriteCache"	},
110 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
111 	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
112 
113 	{ 0, NULL }
114 };
115 
116 static int	vtblk_modevent(module_t, int, void *);
117 
118 static int	vtblk_probe(device_t);
119 static int	vtblk_attach(device_t);
120 static int	vtblk_detach(device_t);
121 static int	vtblk_suspend(device_t);
122 static int	vtblk_resume(device_t);
123 static int	vtblk_shutdown(device_t);
124 static int	vtblk_config_change(device_t);
125 
126 static int	vtblk_open(struct disk *);
127 static int	vtblk_close(struct disk *);
128 static int	vtblk_ioctl(struct disk *, u_long, void *, int,
129 		    struct thread *);
130 static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
131 static void	vtblk_strategy(struct bio *);
132 
133 static void	vtblk_negotiate_features(struct vtblk_softc *);
134 static void	vtblk_setup_features(struct vtblk_softc *);
135 static int	vtblk_maximum_segments(struct vtblk_softc *,
136 		    struct virtio_blk_config *);
137 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
138 static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
139 static void	vtblk_alloc_disk(struct vtblk_softc *,
140 		    struct virtio_blk_config *);
141 static void	vtblk_create_disk(struct vtblk_softc *);
142 
143 static int	vtblk_request_prealloc(struct vtblk_softc *);
144 static void	vtblk_request_free(struct vtblk_softc *);
145 static struct vtblk_request *
146 		vtblk_request_dequeue(struct vtblk_softc *);
147 static void	vtblk_request_enqueue(struct vtblk_softc *,
148 		    struct vtblk_request *);
149 static struct vtblk_request *
150 		vtblk_request_next_ready(struct vtblk_softc *);
151 static void	vtblk_request_requeue_ready(struct vtblk_softc *,
152 		    struct vtblk_request *);
153 static struct vtblk_request *
154 		vtblk_request_next(struct vtblk_softc *);
155 static struct vtblk_request *
156 		vtblk_request_bio(struct vtblk_softc *);
157 static int	vtblk_request_execute(struct vtblk_softc *,
158 		    struct vtblk_request *);
159 static int	vtblk_request_error(struct vtblk_request *);
160 
161 static void	vtblk_queue_completed(struct vtblk_softc *,
162 		    struct bio_queue *);
163 static void	vtblk_done_completed(struct vtblk_softc *,
164 		    struct bio_queue *);
165 static void	vtblk_drain_vq(struct vtblk_softc *, int);
166 static void	vtblk_drain(struct vtblk_softc *);
167 
168 static void	vtblk_startio(struct vtblk_softc *);
169 static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
170 
171 static void	vtblk_read_config(struct vtblk_softc *,
172 		    struct virtio_blk_config *);
173 static void	vtblk_ident(struct vtblk_softc *);
174 static int	vtblk_poll_request(struct vtblk_softc *,
175 		    struct vtblk_request *);
176 static int	vtblk_quiesce(struct vtblk_softc *);
177 static void	vtblk_vq_intr(void *);
178 static void	vtblk_stop(struct vtblk_softc *);
179 
180 static void	vtblk_dump_prepare(struct vtblk_softc *);
181 static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
182 static int	vtblk_dump_flush(struct vtblk_softc *);
183 
184 static void	vtblk_set_write_cache(struct vtblk_softc *, int);
185 static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
186 		    struct virtio_blk_config *);
187 static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
188 
189 static void	vtblk_setup_sysctl(struct vtblk_softc *);
190 static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
191 
192 /* Tunables. */
193 static int vtblk_no_ident = 0;
194 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
195 static int vtblk_writecache_mode = -1;
196 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
197 
198 /* Features desired/implemented by this driver. */
199 #define VTBLK_FEATURES \
200     (VIRTIO_BLK_F_BARRIER		| \
201      VIRTIO_BLK_F_SIZE_MAX		| \
202      VIRTIO_BLK_F_SEG_MAX		| \
203      VIRTIO_BLK_F_GEOMETRY		| \
204      VIRTIO_BLK_F_RO			| \
205      VIRTIO_BLK_F_BLK_SIZE		| \
206      VIRTIO_BLK_F_WCE			| \
207      VIRTIO_BLK_F_CONFIG_WCE		| \
208      VIRTIO_RING_F_INDIRECT_DESC)
209 
210 #define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
211 #define VTBLK_LOCK_INIT(_sc, _name) \
212 				mtx_init(VTBLK_MTX((_sc)), (_name), \
213 				    "VirtIO Block Lock", MTX_DEF)
214 #define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
215 #define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
216 #define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
217 #define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
218 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
219 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
220 
221 #define VTBLK_DISK_NAME		"vtbd"
222 #define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
223 
224 /*
225  * Each block request uses at least two segments - one for the header
226  * and one for the status.
227  */
228 #define VTBLK_MIN_SEGMENTS	2
229 
230 static device_method_t vtblk_methods[] = {
231 	/* Device methods. */
232 	DEVMETHOD(device_probe,		vtblk_probe),
233 	DEVMETHOD(device_attach,	vtblk_attach),
234 	DEVMETHOD(device_detach,	vtblk_detach),
235 	DEVMETHOD(device_suspend,	vtblk_suspend),
236 	DEVMETHOD(device_resume,	vtblk_resume),
237 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
238 
239 	/* VirtIO methods. */
240 	DEVMETHOD(virtio_config_change,	vtblk_config_change),
241 
242 	DEVMETHOD_END
243 };
244 
245 static driver_t vtblk_driver = {
246 	"vtblk",
247 	vtblk_methods,
248 	sizeof(struct vtblk_softc)
249 };
250 static devclass_t vtblk_devclass;
251 
252 DRIVER_MODULE(virtio_blk, virtio_mmio, vtblk_driver, vtblk_devclass,
253     vtblk_modevent, 0);
254 DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
255     vtblk_modevent, 0);
256 MODULE_VERSION(virtio_blk, 1);
257 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
258 
259 static int
260 vtblk_modevent(module_t mod, int type, void *unused)
261 {
262 	int error;
263 
264 	error = 0;
265 
266 	switch (type) {
267 	case MOD_LOAD:
268 	case MOD_QUIESCE:
269 	case MOD_UNLOAD:
270 	case MOD_SHUTDOWN:
271 		break;
272 	default:
273 		error = EOPNOTSUPP;
274 		break;
275 	}
276 
277 	return (error);
278 }
279 
280 static int
281 vtblk_probe(device_t dev)
282 {
283 
284 	if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
285 		return (ENXIO);
286 
287 	device_set_desc(dev, "VirtIO Block Adapter");
288 
289 	return (BUS_PROBE_DEFAULT);
290 }
291 
292 static int
293 vtblk_attach(device_t dev)
294 {
295 	struct vtblk_softc *sc;
296 	struct virtio_blk_config blkcfg;
297 	int error;
298 
299 	virtio_set_feature_desc(dev, vtblk_feature_desc);
300 
301 	sc = device_get_softc(dev);
302 	sc->vtblk_dev = dev;
303 	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
304 	bioq_init(&sc->vtblk_bioq);
305 	TAILQ_INIT(&sc->vtblk_req_free);
306 	TAILQ_INIT(&sc->vtblk_req_ready);
307 
308 	vtblk_setup_sysctl(sc);
309 	vtblk_setup_features(sc);
310 
311 	vtblk_read_config(sc, &blkcfg);
312 
313 	/*
314 	 * With the current sglist(9) implementation, it is not easy
315 	 * for us to support a maximum segment size as adjacent
316 	 * segments are coalesced. For now, just make sure it's larger
317 	 * than the maximum supported transfer size.
318 	 */
319 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
320 		if (blkcfg.size_max < MAXPHYS) {
321 			error = ENOTSUP;
322 			device_printf(dev, "host requires unsupported "
323 			    "maximum segment size feature\n");
324 			goto fail;
325 		}
326 	}
327 
328 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
329 	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
330 		error = EINVAL;
331 		device_printf(dev, "fewer than minimum number of segments "
332 		    "allowed: %d\n", sc->vtblk_max_nsegs);
333 		goto fail;
334 	}
335 
336 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
337 	if (sc->vtblk_sglist == NULL) {
338 		error = ENOMEM;
339 		device_printf(dev, "cannot allocate sglist\n");
340 		goto fail;
341 	}
342 
343 	error = vtblk_alloc_virtqueue(sc);
344 	if (error) {
345 		device_printf(dev, "cannot allocate virtqueue\n");
346 		goto fail;
347 	}
348 
349 	error = vtblk_request_prealloc(sc);
350 	if (error) {
351 		device_printf(dev, "cannot preallocate requests\n");
352 		goto fail;
353 	}
354 
355 	vtblk_alloc_disk(sc, &blkcfg);
356 
357 	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
358 	if (error) {
359 		device_printf(dev, "cannot setup virtqueue interrupt\n");
360 		goto fail;
361 	}
362 
363 	vtblk_create_disk(sc);
364 
365 	virtqueue_enable_intr(sc->vtblk_vq);
366 
367 fail:
368 	if (error)
369 		vtblk_detach(dev);
370 
371 	return (error);
372 }
373 
374 static int
375 vtblk_detach(device_t dev)
376 {
377 	struct vtblk_softc *sc;
378 
379 	sc = device_get_softc(dev);
380 
381 	VTBLK_LOCK(sc);
382 	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
383 	if (device_is_attached(dev))
384 		vtblk_stop(sc);
385 	VTBLK_UNLOCK(sc);
386 
387 	vtblk_drain(sc);
388 
389 	if (sc->vtblk_disk != NULL) {
390 		disk_destroy(sc->vtblk_disk);
391 		sc->vtblk_disk = NULL;
392 	}
393 
394 	if (sc->vtblk_sglist != NULL) {
395 		sglist_free(sc->vtblk_sglist);
396 		sc->vtblk_sglist = NULL;
397 	}
398 
399 	VTBLK_LOCK_DESTROY(sc);
400 
401 	return (0);
402 }
403 
404 static int
405 vtblk_suspend(device_t dev)
406 {
407 	struct vtblk_softc *sc;
408 	int error;
409 
410 	sc = device_get_softc(dev);
411 
412 	VTBLK_LOCK(sc);
413 	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
414 	/* XXX BMV: virtio_stop(), etc needed here? */
415 	error = vtblk_quiesce(sc);
416 	if (error)
417 		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
418 	VTBLK_UNLOCK(sc);
419 
420 	return (error);
421 }
422 
423 static int
424 vtblk_resume(device_t dev)
425 {
426 	struct vtblk_softc *sc;
427 
428 	sc = device_get_softc(dev);
429 
430 	VTBLK_LOCK(sc);
431 	/* XXX BMV: virtio_reinit(), etc needed here? */
432 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
433 	vtblk_startio(sc);
434 	VTBLK_UNLOCK(sc);
435 
436 	return (0);
437 }
438 
439 static int
440 vtblk_shutdown(device_t dev)
441 {
442 
443 	return (0);
444 }
445 
446 static int
447 vtblk_config_change(device_t dev)
448 {
449 	struct vtblk_softc *sc;
450 	struct virtio_blk_config blkcfg;
451 	uint64_t capacity;
452 
453 	sc = device_get_softc(dev);
454 
455 	vtblk_read_config(sc, &blkcfg);
456 
457 	/* Capacity is always in 512-byte units. */
458 	capacity = blkcfg.capacity * 512;
459 
460 	if (sc->vtblk_disk->d_mediasize != capacity)
461 		vtblk_resize_disk(sc, capacity);
462 
463 	return (0);
464 }
465 
466 static int
467 vtblk_open(struct disk *dp)
468 {
469 	struct vtblk_softc *sc;
470 
471 	if ((sc = dp->d_drv1) == NULL)
472 		return (ENXIO);
473 
474 	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
475 }
476 
477 static int
478 vtblk_close(struct disk *dp)
479 {
480 	struct vtblk_softc *sc;
481 
482 	if ((sc = dp->d_drv1) == NULL)
483 		return (ENXIO);
484 
485 	return (0);
486 }
487 
488 static int
489 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
490     struct thread *td)
491 {
492 	struct vtblk_softc *sc;
493 
494 	if ((sc = dp->d_drv1) == NULL)
495 		return (ENXIO);
496 
497 	return (ENOTTY);
498 }
499 
500 static int
501 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
502     size_t length)
503 {
504 	struct disk *dp;
505 	struct vtblk_softc *sc;
506 	int error;
507 
508 	dp = arg;
509 
510 	if ((sc = dp->d_drv1) == NULL)
511 		return (ENXIO);
512 
513 	VTBLK_LOCK(sc);
514 
515 	if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
516 		vtblk_dump_prepare(sc);
517 		sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
518 	}
519 
520 	if (length > 0)
521 		error = vtblk_dump_write(sc, virtual, offset, length);
522 	else if (virtual == NULL && offset == 0)
523 		error = vtblk_dump_flush(sc);
524 	else {
525 		error = EINVAL;
526 		sc->vtblk_flags &= ~VTBLK_FLAG_DUMPING;
527 	}
528 
529 	VTBLK_UNLOCK(sc);
530 
531 	return (error);
532 }
533 
534 static void
535 vtblk_strategy(struct bio *bp)
536 {
537 	struct vtblk_softc *sc;
538 
539 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
540 		vtblk_bio_done(NULL, bp, EINVAL);
541 		return;
542 	}
543 
544 	/*
545 	 * Fail any write if RO. Unfortunately, there does not seem to
546 	 * be a better way to report our readonly'ness to GEOM above.
547 	 */
548 	if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
549 	    (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
550 		vtblk_bio_done(sc, bp, EROFS);
551 		return;
552 	}
553 
554 	VTBLK_LOCK(sc);
555 
556 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
557 		VTBLK_UNLOCK(sc);
558 		vtblk_bio_done(sc, bp, ENXIO);
559 		return;
560 	}
561 
562 	bioq_insert_tail(&sc->vtblk_bioq, bp);
563 	vtblk_startio(sc);
564 
565 	VTBLK_UNLOCK(sc);
566 }
567 
568 static void
569 vtblk_negotiate_features(struct vtblk_softc *sc)
570 {
571 	device_t dev;
572 	uint64_t features;
573 
574 	dev = sc->vtblk_dev;
575 	features = VTBLK_FEATURES;
576 
577 	sc->vtblk_features = virtio_negotiate_features(dev, features);
578 }
579 
580 static void
581 vtblk_setup_features(struct vtblk_softc *sc)
582 {
583 	device_t dev;
584 
585 	dev = sc->vtblk_dev;
586 
587 	vtblk_negotiate_features(sc);
588 
589 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
590 		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
591 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
592 		sc->vtblk_flags |= VTBLK_FLAG_READONLY;
593 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
594 		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
595 	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
596 		sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG;
597 }
598 
599 static int
600 vtblk_maximum_segments(struct vtblk_softc *sc,
601     struct virtio_blk_config *blkcfg)
602 {
603 	device_t dev;
604 	int nsegs;
605 
606 	dev = sc->vtblk_dev;
607 	nsegs = VTBLK_MIN_SEGMENTS;
608 
609 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
610 		nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
611 		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
612 			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
613 	} else
614 		nsegs += 1;
615 
616 	return (nsegs);
617 }
618 
619 static int
620 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
621 {
622 	device_t dev;
623 	struct vq_alloc_info vq_info;
624 
625 	dev = sc->vtblk_dev;
626 
627 	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
628 	    vtblk_vq_intr, sc, &sc->vtblk_vq,
629 	    "%s request", device_get_nameunit(dev));
630 
631 	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
632 }
633 
634 static void
635 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
636 {
637 	device_t dev;
638 	struct disk *dp;
639 	int error;
640 
641 	dev = sc->vtblk_dev;
642 	dp = sc->vtblk_disk;
643 
644 	dp->d_mediasize = new_capacity;
645 	if (bootverbose) {
646 		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
647 		    (uintmax_t) dp->d_mediasize >> 20,
648 		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
649 		    dp->d_sectorsize);
650 	}
651 
652 	error = disk_resize(dp, M_NOWAIT);
653 	if (error) {
654 		device_printf(dev,
655 		    "disk_resize(9) failed, error: %d\n", error);
656 	}
657 }
658 
659 static void
660 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
661 {
662 	device_t dev;
663 	struct disk *dp;
664 
665 	dev = sc->vtblk_dev;
666 
667 	sc->vtblk_disk = dp = disk_alloc();
668 	dp->d_open = vtblk_open;
669 	dp->d_close = vtblk_close;
670 	dp->d_ioctl = vtblk_ioctl;
671 	dp->d_strategy = vtblk_strategy;
672 	dp->d_name = VTBLK_DISK_NAME;
673 	dp->d_unit = device_get_unit(dev);
674 	dp->d_drv1 = sc;
675 	dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO |
676 	    DISKFLAG_DIRECT_COMPLETION;
677 	dp->d_hba_vendor = virtio_get_vendor(dev);
678 	dp->d_hba_device = virtio_get_device(dev);
679 	dp->d_hba_subvendor = virtio_get_subvendor(dev);
680 	dp->d_hba_subdevice = virtio_get_subdevice(dev);
681 
682 	if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0)
683 		dp->d_dump = vtblk_dump;
684 
685 	/* Capacity is always in 512-byte units. */
686 	dp->d_mediasize = blkcfg->capacity * 512;
687 
688 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
689 		dp->d_sectorsize = blkcfg->blk_size;
690 	else
691 		dp->d_sectorsize = 512;
692 
693 	/*
694 	 * The VirtIO maximum I/O size is given in terms of segments.
695 	 * However, FreeBSD limits I/O size by logical buffer size, not
696 	 * by physically contiguous pages. Therefore, we have to assume
697 	 * no pages are contiguous. This may impose an artificially low
698 	 * maximum I/O size. But in practice, since QEMU advertises 128
699 	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
700 	 * which is typically greater than MAXPHYS. Eventually we should
701 	 * just advertise MAXPHYS and split buffers that are too big.
702 	 *
703 	 * Note we must subtract one additional segment in case of non
704 	 * page aligned buffers.
705 	 */
706 	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
707 	    PAGE_SIZE;
708 	if (dp->d_maxsize < PAGE_SIZE)
709 		dp->d_maxsize = PAGE_SIZE; /* XXX */
710 
711 	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
712 		dp->d_fwsectors = blkcfg->geometry.sectors;
713 		dp->d_fwheads = blkcfg->geometry.heads;
714 	}
715 
716 	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY)) {
717 		dp->d_stripesize = dp->d_sectorsize *
718 		    (1 << blkcfg->topology.physical_block_exp);
719 		dp->d_stripeoffset = (dp->d_stripesize -
720 		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
721 		    dp->d_stripesize;
722 	}
723 
724 	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
725 		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
726 	else
727 		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
728 }
729 
730 static void
731 vtblk_create_disk(struct vtblk_softc *sc)
732 {
733 	struct disk *dp;
734 
735 	dp = sc->vtblk_disk;
736 
737 	vtblk_ident(sc);
738 
739 	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
740 	    (uintmax_t) dp->d_mediasize >> 20,
741 	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
742 	    dp->d_sectorsize);
743 
744 	disk_create(dp, DISK_VERSION);
745 }
746 
747 static int
748 vtblk_request_prealloc(struct vtblk_softc *sc)
749 {
750 	struct vtblk_request *req;
751 	int i, nreqs;
752 
753 	nreqs = virtqueue_size(sc->vtblk_vq);
754 
755 	/*
756 	 * Preallocate sufficient requests to keep the virtqueue full. Each
757 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
758 	 * the number allocated when indirect descriptors are not available.
759 	 */
760 	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
761 		nreqs /= VTBLK_MIN_SEGMENTS;
762 
763 	for (i = 0; i < nreqs; i++) {
764 		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
765 		if (req == NULL)
766 			return (ENOMEM);
767 
768 		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
769 		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
770 
771 		sc->vtblk_request_count++;
772 		vtblk_request_enqueue(sc, req);
773 	}
774 
775 	return (0);
776 }
777 
778 static void
779 vtblk_request_free(struct vtblk_softc *sc)
780 {
781 	struct vtblk_request *req;
782 
783 	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
784 
785 	while ((req = vtblk_request_dequeue(sc)) != NULL) {
786 		sc->vtblk_request_count--;
787 		free(req, M_DEVBUF);
788 	}
789 
790 	KASSERT(sc->vtblk_request_count == 0,
791 	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
792 }
793 
794 static struct vtblk_request *
795 vtblk_request_dequeue(struct vtblk_softc *sc)
796 {
797 	struct vtblk_request *req;
798 
799 	req = TAILQ_FIRST(&sc->vtblk_req_free);
800 	if (req != NULL) {
801 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
802 		bzero(req, sizeof(struct vtblk_request));
803 	}
804 
805 	return (req);
806 }
807 
808 static void
809 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
810 {
811 
812 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
813 }
814 
815 static struct vtblk_request *
816 vtblk_request_next_ready(struct vtblk_softc *sc)
817 {
818 	struct vtblk_request *req;
819 
820 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
821 	if (req != NULL)
822 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
823 
824 	return (req);
825 }
826 
827 static void
828 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
829 {
830 
831 	/* NOTE: Currently, there will be at most one request in the queue. */
832 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
833 }
834 
835 static struct vtblk_request *
836 vtblk_request_next(struct vtblk_softc *sc)
837 {
838 	struct vtblk_request *req;
839 
840 	req = vtblk_request_next_ready(sc);
841 	if (req != NULL)
842 		return (req);
843 
844 	return (vtblk_request_bio(sc));
845 }
846 
847 static struct vtblk_request *
848 vtblk_request_bio(struct vtblk_softc *sc)
849 {
850 	struct bio_queue_head *bioq;
851 	struct vtblk_request *req;
852 	struct bio *bp;
853 
854 	bioq = &sc->vtblk_bioq;
855 
856 	if (bioq_first(bioq) == NULL)
857 		return (NULL);
858 
859 	req = vtblk_request_dequeue(sc);
860 	if (req == NULL)
861 		return (NULL);
862 
863 	bp = bioq_takefirst(bioq);
864 	req->vbr_bp = bp;
865 	req->vbr_ack = -1;
866 	req->vbr_hdr.ioprio = 1;
867 
868 	switch (bp->bio_cmd) {
869 	case BIO_FLUSH:
870 		req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
871 		break;
872 	case BIO_READ:
873 		req->vbr_hdr.type = VIRTIO_BLK_T_IN;
874 		req->vbr_hdr.sector = bp->bio_offset / 512;
875 		break;
876 	case BIO_WRITE:
877 		req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
878 		req->vbr_hdr.sector = bp->bio_offset / 512;
879 		break;
880 	default:
881 		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
882 	}
883 
884 	if (bp->bio_flags & BIO_ORDERED)
885 		req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
886 
887 	return (req);
888 }
889 
890 static int
891 vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
892 {
893 	struct virtqueue *vq;
894 	struct sglist *sg;
895 	struct bio *bp;
896 	int ordered, readable, writable, error;
897 
898 	vq = sc->vtblk_vq;
899 	sg = sc->vtblk_sglist;
900 	bp = req->vbr_bp;
901 	ordered = 0;
902 	writable = 0;
903 
904 	/*
905 	 * Some hosts (such as bhyve) do not implement the barrier feature,
906 	 * so we emulate it in the driver by allowing the barrier request
907 	 * to be the only one in flight.
908 	 */
909 	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
910 		if (sc->vtblk_req_ordered != NULL)
911 			return (EBUSY);
912 		if (bp->bio_flags & BIO_ORDERED) {
913 			if (!virtqueue_empty(vq))
914 				return (EBUSY);
915 			ordered = 1;
916 			req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER;
917 		}
918 	}
919 
920 	sglist_reset(sg);
921 	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
922 
923 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
924 		error = sglist_append_bio(sg, bp);
925 		if (error || sg->sg_nseg == sg->sg_maxseg) {
926 			panic("%s: bio %p data buffer too big %d",
927 			    __func__, bp, error);
928 		}
929 
930 		/* BIO_READ means the host writes into our buffer. */
931 		if (bp->bio_cmd == BIO_READ)
932 			writable = sg->sg_nseg - 1;
933 	}
934 
935 	writable++;
936 	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
937 	readable = sg->sg_nseg - writable;
938 
939 	error = virtqueue_enqueue(vq, req, sg, readable, writable);
940 	if (error == 0 && ordered)
941 		sc->vtblk_req_ordered = req;
942 
943 	return (error);
944 }
945 
946 static int
947 vtblk_request_error(struct vtblk_request *req)
948 {
949 	int error;
950 
951 	switch (req->vbr_ack) {
952 	case VIRTIO_BLK_S_OK:
953 		error = 0;
954 		break;
955 	case VIRTIO_BLK_S_UNSUPP:
956 		error = ENOTSUP;
957 		break;
958 	default:
959 		error = EIO;
960 		break;
961 	}
962 
963 	return (error);
964 }
965 
966 static void
967 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
968 {
969 	struct vtblk_request *req;
970 	struct bio *bp;
971 
972 	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
973 		if (sc->vtblk_req_ordered != NULL) {
974 			MPASS(sc->vtblk_req_ordered == req);
975 			sc->vtblk_req_ordered = NULL;
976 		}
977 
978 		bp = req->vbr_bp;
979 		bp->bio_error = vtblk_request_error(req);
980 		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
981 
982 		vtblk_request_enqueue(sc, req);
983 	}
984 }
985 
986 static void
987 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
988 {
989 	struct bio *bp, *tmp;
990 
991 	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
992 		if (bp->bio_error != 0)
993 			disk_err(bp, "hard error", -1, 1);
994 		vtblk_bio_done(sc, bp, bp->bio_error);
995 	}
996 }
997 
998 static void
999 vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
1000 {
1001 	struct virtqueue *vq;
1002 	struct vtblk_request *req;
1003 	int last;
1004 
1005 	vq = sc->vtblk_vq;
1006 	last = 0;
1007 
1008 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1009 		if (!skip_done)
1010 			vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1011 
1012 		vtblk_request_enqueue(sc, req);
1013 	}
1014 
1015 	sc->vtblk_req_ordered = NULL;
1016 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1017 }
1018 
1019 static void
1020 vtblk_drain(struct vtblk_softc *sc)
1021 {
1022 	struct bio_queue queue;
1023 	struct bio_queue_head *bioq;
1024 	struct vtblk_request *req;
1025 	struct bio *bp;
1026 
1027 	bioq = &sc->vtblk_bioq;
1028 	TAILQ_INIT(&queue);
1029 
1030 	if (sc->vtblk_vq != NULL) {
1031 		vtblk_queue_completed(sc, &queue);
1032 		vtblk_done_completed(sc, &queue);
1033 
1034 		vtblk_drain_vq(sc, 0);
1035 	}
1036 
1037 	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1038 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1039 		vtblk_request_enqueue(sc, req);
1040 	}
1041 
1042 	while (bioq_first(bioq) != NULL) {
1043 		bp = bioq_takefirst(bioq);
1044 		vtblk_bio_done(sc, bp, ENXIO);
1045 	}
1046 
1047 	vtblk_request_free(sc);
1048 }
1049 
1050 static void
1051 vtblk_startio(struct vtblk_softc *sc)
1052 {
1053 	struct virtqueue *vq;
1054 	struct vtblk_request *req;
1055 	int enq;
1056 
1057 	VTBLK_LOCK_ASSERT(sc);
1058 	vq = sc->vtblk_vq;
1059 	enq = 0;
1060 
1061 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1062 		return;
1063 
1064 	while (!virtqueue_full(vq)) {
1065 		req = vtblk_request_next(sc);
1066 		if (req == NULL)
1067 			break;
1068 
1069 		if (vtblk_request_execute(sc, req) != 0) {
1070 			vtblk_request_requeue_ready(sc, req);
1071 			break;
1072 		}
1073 
1074 		enq++;
1075 	}
1076 
1077 	if (enq > 0)
1078 		virtqueue_notify(vq);
1079 }
1080 
1081 static void
1082 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1083 {
1084 
1085 	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1086 	if (sc != NULL)
1087 		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1088 
1089 	if (error) {
1090 		bp->bio_resid = bp->bio_bcount;
1091 		bp->bio_error = error;
1092 		bp->bio_flags |= BIO_ERROR;
1093 	}
1094 
1095 	biodone(bp);
1096 }
1097 
1098 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1099 	if (virtio_with_feature(_dev, _feature)) {			\
1100 		virtio_read_device_config(_dev,				\
1101 		    offsetof(struct virtio_blk_config, _field),		\
1102 		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1103 	}
1104 
1105 static void
1106 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1107 {
1108 	device_t dev;
1109 
1110 	dev = sc->vtblk_dev;
1111 
1112 	bzero(blkcfg, sizeof(struct virtio_blk_config));
1113 
1114 	/* The capacity is always available. */
1115 	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1116 	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1117 
1118 	/* Read the configuration if the feature was negotiated. */
1119 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1120 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1121 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg);
1122 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1123 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg);
1124 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg);
1125 }
1126 
1127 #undef VTBLK_GET_CONFIG
1128 
1129 static void
1130 vtblk_ident(struct vtblk_softc *sc)
1131 {
1132 	struct bio buf;
1133 	struct disk *dp;
1134 	struct vtblk_request *req;
1135 	int len, error;
1136 
1137 	dp = sc->vtblk_disk;
1138 	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1139 
1140 	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1141 		return;
1142 
1143 	req = vtblk_request_dequeue(sc);
1144 	if (req == NULL)
1145 		return;
1146 
1147 	req->vbr_ack = -1;
1148 	req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID;
1149 	req->vbr_hdr.ioprio = 1;
1150 	req->vbr_hdr.sector = 0;
1151 
1152 	req->vbr_bp = &buf;
1153 	bzero(&buf, sizeof(struct bio));
1154 
1155 	buf.bio_cmd = BIO_READ;
1156 	buf.bio_data = dp->d_ident;
1157 	buf.bio_bcount = len;
1158 
1159 	VTBLK_LOCK(sc);
1160 	error = vtblk_poll_request(sc, req);
1161 	VTBLK_UNLOCK(sc);
1162 
1163 	vtblk_request_enqueue(sc, req);
1164 
1165 	if (error) {
1166 		device_printf(sc->vtblk_dev,
1167 		    "error getting device identifier: %d\n", error);
1168 	}
1169 }
1170 
1171 static int
1172 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1173 {
1174 	struct virtqueue *vq;
1175 	int error;
1176 
1177 	vq = sc->vtblk_vq;
1178 
1179 	if (!virtqueue_empty(vq))
1180 		return (EBUSY);
1181 
1182 	error = vtblk_request_execute(sc, req);
1183 	if (error)
1184 		return (error);
1185 
1186 	virtqueue_notify(vq);
1187 	virtqueue_poll(vq, NULL);
1188 
1189 	error = vtblk_request_error(req);
1190 	if (error && bootverbose) {
1191 		device_printf(sc->vtblk_dev,
1192 		    "%s: IO error: %d\n", __func__, error);
1193 	}
1194 
1195 	return (error);
1196 }
1197 
1198 static int
1199 vtblk_quiesce(struct vtblk_softc *sc)
1200 {
1201 	int error;
1202 
1203 	VTBLK_LOCK_ASSERT(sc);
1204 	error = 0;
1205 
1206 	while (!virtqueue_empty(sc->vtblk_vq)) {
1207 		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1208 		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1209 			error = EBUSY;
1210 			break;
1211 		}
1212 	}
1213 
1214 	return (error);
1215 }
1216 
1217 static void
1218 vtblk_vq_intr(void *xsc)
1219 {
1220 	struct vtblk_softc *sc;
1221 	struct virtqueue *vq;
1222 	struct bio_queue queue;
1223 
1224 	sc = xsc;
1225 	vq = sc->vtblk_vq;
1226 	TAILQ_INIT(&queue);
1227 
1228 	VTBLK_LOCK(sc);
1229 
1230 again:
1231 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1232 		goto out;
1233 
1234 	vtblk_queue_completed(sc, &queue);
1235 	vtblk_startio(sc);
1236 
1237 	if (virtqueue_enable_intr(vq) != 0) {
1238 		virtqueue_disable_intr(vq);
1239 		goto again;
1240 	}
1241 
1242 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1243 		wakeup(&sc->vtblk_vq);
1244 
1245 out:
1246 	VTBLK_UNLOCK(sc);
1247 	vtblk_done_completed(sc, &queue);
1248 }
1249 
1250 static void
1251 vtblk_stop(struct vtblk_softc *sc)
1252 {
1253 
1254 	virtqueue_disable_intr(sc->vtblk_vq);
1255 	virtio_stop(sc->vtblk_dev);
1256 }
1257 
1258 static void
1259 vtblk_dump_prepare(struct vtblk_softc *sc)
1260 {
1261 	device_t dev;
1262 	struct virtqueue *vq;
1263 
1264 	dev = sc->vtblk_dev;
1265 	vq = sc->vtblk_vq;
1266 
1267 	vtblk_stop(sc);
1268 
1269 	/*
1270 	 * Drain all requests caught in-flight in the virtqueue,
1271 	 * skipping biodone(). When dumping, only one request is
1272 	 * outstanding at a time, and we just poll the virtqueue
1273 	 * for the response.
1274 	 */
1275 	vtblk_drain_vq(sc, 1);
1276 
1277 	if (virtio_reinit(dev, sc->vtblk_features) != 0) {
1278 		panic("%s: cannot reinit VirtIO block device during dump",
1279 		    device_get_nameunit(dev));
1280 	}
1281 
1282 	virtqueue_disable_intr(vq);
1283 	virtio_reinit_complete(dev);
1284 }
1285 
1286 static int
1287 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1288     size_t length)
1289 {
1290 	struct bio buf;
1291 	struct vtblk_request *req;
1292 
1293 	req = &sc->vtblk_dump_request;
1294 	req->vbr_ack = -1;
1295 	req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
1296 	req->vbr_hdr.ioprio = 1;
1297 	req->vbr_hdr.sector = offset / 512;
1298 
1299 	req->vbr_bp = &buf;
1300 	bzero(&buf, sizeof(struct bio));
1301 
1302 	buf.bio_cmd = BIO_WRITE;
1303 	buf.bio_data = virtual;
1304 	buf.bio_bcount = length;
1305 
1306 	return (vtblk_poll_request(sc, req));
1307 }
1308 
1309 static int
1310 vtblk_dump_flush(struct vtblk_softc *sc)
1311 {
1312 	struct bio buf;
1313 	struct vtblk_request *req;
1314 
1315 	req = &sc->vtblk_dump_request;
1316 	req->vbr_ack = -1;
1317 	req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
1318 	req->vbr_hdr.ioprio = 1;
1319 	req->vbr_hdr.sector = 0;
1320 
1321 	req->vbr_bp = &buf;
1322 	bzero(&buf, sizeof(struct bio));
1323 
1324 	buf.bio_cmd = BIO_FLUSH;
1325 
1326 	return (vtblk_poll_request(sc, req));
1327 }
1328 
1329 static void
1330 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1331 {
1332 
1333 	/* Set either writeback (1) or writethrough (0) mode. */
1334 	virtio_write_dev_config_1(sc->vtblk_dev,
1335 	    offsetof(struct virtio_blk_config, writeback), wc);
1336 }
1337 
1338 static int
1339 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1340     struct virtio_blk_config *blkcfg)
1341 {
1342 	int wc;
1343 
1344 	if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) {
1345 		wc = vtblk_tunable_int(sc, "writecache_mode",
1346 		    vtblk_writecache_mode);
1347 		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1348 			vtblk_set_write_cache(sc, wc);
1349 		else
1350 			wc = blkcfg->writeback;
1351 	} else
1352 		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE);
1353 
1354 	return (wc);
1355 }
1356 
1357 static int
1358 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1359 {
1360 	struct vtblk_softc *sc;
1361 	int wc, error;
1362 
1363 	sc = oidp->oid_arg1;
1364 	wc = sc->vtblk_write_cache;
1365 
1366 	error = sysctl_handle_int(oidp, &wc, 0, req);
1367 	if (error || req->newptr == NULL)
1368 		return (error);
1369 	if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0)
1370 		return (EPERM);
1371 	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1372 		return (EINVAL);
1373 
1374 	VTBLK_LOCK(sc);
1375 	sc->vtblk_write_cache = wc;
1376 	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1377 	VTBLK_UNLOCK(sc);
1378 
1379 	return (0);
1380 }
1381 
1382 static void
1383 vtblk_setup_sysctl(struct vtblk_softc *sc)
1384 {
1385 	device_t dev;
1386 	struct sysctl_ctx_list *ctx;
1387 	struct sysctl_oid *tree;
1388 	struct sysctl_oid_list *child;
1389 
1390 	dev = sc->vtblk_dev;
1391 	ctx = device_get_sysctl_ctx(dev);
1392 	tree = device_get_sysctl_tree(dev);
1393 	child = SYSCTL_CHILDREN(tree);
1394 
1395 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1396 	    CTLTYPE_INT | CTLFLAG_RW, sc, 0, vtblk_write_cache_sysctl,
1397 	    "I", "Write cache mode (writethrough (0) or writeback (1))");
1398 }
1399 
1400 static int
1401 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1402 {
1403 	char path[64];
1404 
1405 	snprintf(path, sizeof(path),
1406 	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1407 	TUNABLE_INT_FETCH(path, &def);
1408 
1409 	return (def);
1410 }
1411