xref: /freebsd/sys/dev/virtio/block/virtio_blk.c (revision 3110d4ebd6c0848cf5e25890d01791bb407e2a9b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /* Driver for VirtIO block devices. */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/bio.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/sglist.h>
41 #include <sys/sysctl.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/queue.h>
45 
46 #include <geom/geom.h>
47 #include <geom/geom_disk.h>
48 
49 #include <machine/bus.h>
50 #include <machine/resource.h>
51 #include <sys/bus.h>
52 #include <sys/rman.h>
53 
54 #include <dev/virtio/virtio.h>
55 #include <dev/virtio/virtqueue.h>
56 #include <dev/virtio/block/virtio_blk.h>
57 
58 #include "virtio_if.h"
59 
60 struct vtblk_request {
61 	struct virtio_blk_outhdr	 vbr_hdr;
62 	struct bio			*vbr_bp;
63 	uint8_t				 vbr_ack;
64 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
65 };
66 
67 enum vtblk_cache_mode {
68 	VTBLK_CACHE_WRITETHROUGH,
69 	VTBLK_CACHE_WRITEBACK,
70 	VTBLK_CACHE_MAX
71 };
72 
73 struct vtblk_softc {
74 	device_t		 vtblk_dev;
75 	struct mtx		 vtblk_mtx;
76 	uint64_t		 vtblk_features;
77 	uint32_t		 vtblk_flags;
78 #define VTBLK_FLAG_INDIRECT	0x0001
79 #define VTBLK_FLAG_DETACH	0x0002
80 #define VTBLK_FLAG_SUSPEND	0x0004
81 #define VTBLK_FLAG_BARRIER	0x0008
82 #define VTBLK_FLAG_WCE_CONFIG	0x0010
83 
84 	struct virtqueue	*vtblk_vq;
85 	struct sglist		*vtblk_sglist;
86 	struct disk		*vtblk_disk;
87 
88 	struct bio_queue_head	 vtblk_bioq;
89 	TAILQ_HEAD(, vtblk_request)
90 				 vtblk_req_free;
91 	TAILQ_HEAD(, vtblk_request)
92 				 vtblk_req_ready;
93 	struct vtblk_request	*vtblk_req_ordered;
94 
95 	int			 vtblk_max_nsegs;
96 	int			 vtblk_request_count;
97 	enum vtblk_cache_mode	 vtblk_write_cache;
98 
99 	struct bio_queue	 vtblk_dump_queue;
100 	struct vtblk_request	 vtblk_dump_request;
101 };
102 
103 static struct virtio_feature_desc vtblk_feature_desc[] = {
104 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
105 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
106 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
107 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
108 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
109 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
110 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
111 	{ VIRTIO_BLK_F_FLUSH,		"FlushCmd"	},
112 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
113 	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
114 	{ VIRTIO_BLK_F_MQ,		"Multiqueue"	},
115 	{ VIRTIO_BLK_F_DISCARD,		"Discard"	},
116 	{ VIRTIO_BLK_F_WRITE_ZEROES,	"WriteZeros"	},
117 
118 	{ 0, NULL }
119 };
120 
121 static int	vtblk_modevent(module_t, int, void *);
122 
123 static int	vtblk_probe(device_t);
124 static int	vtblk_attach(device_t);
125 static int	vtblk_detach(device_t);
126 static int	vtblk_suspend(device_t);
127 static int	vtblk_resume(device_t);
128 static int	vtblk_shutdown(device_t);
129 static int	vtblk_config_change(device_t);
130 
131 static int	vtblk_open(struct disk *);
132 static int	vtblk_close(struct disk *);
133 static int	vtblk_ioctl(struct disk *, u_long, void *, int,
134 		    struct thread *);
135 static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
136 static void	vtblk_strategy(struct bio *);
137 
138 static int	vtblk_negotiate_features(struct vtblk_softc *);
139 static int	vtblk_setup_features(struct vtblk_softc *);
140 static int	vtblk_maximum_segments(struct vtblk_softc *,
141 		    struct virtio_blk_config *);
142 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
143 static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
144 static void	vtblk_alloc_disk(struct vtblk_softc *,
145 		    struct virtio_blk_config *);
146 static void	vtblk_create_disk(struct vtblk_softc *);
147 
148 static int	vtblk_request_prealloc(struct vtblk_softc *);
149 static void	vtblk_request_free(struct vtblk_softc *);
150 static struct vtblk_request *
151 		vtblk_request_dequeue(struct vtblk_softc *);
152 static void	vtblk_request_enqueue(struct vtblk_softc *,
153 		    struct vtblk_request *);
154 static struct vtblk_request *
155 		vtblk_request_next_ready(struct vtblk_softc *);
156 static void	vtblk_request_requeue_ready(struct vtblk_softc *,
157 		    struct vtblk_request *);
158 static struct vtblk_request *
159 		vtblk_request_next(struct vtblk_softc *);
160 static struct vtblk_request *
161 		vtblk_request_bio(struct vtblk_softc *);
162 static int	vtblk_request_execute(struct vtblk_softc *,
163 		    struct vtblk_request *);
164 static int	vtblk_request_error(struct vtblk_request *);
165 
166 static void	vtblk_queue_completed(struct vtblk_softc *,
167 		    struct bio_queue *);
168 static void	vtblk_done_completed(struct vtblk_softc *,
169 		    struct bio_queue *);
170 static void	vtblk_drain_vq(struct vtblk_softc *);
171 static void	vtblk_drain(struct vtblk_softc *);
172 
173 static void	vtblk_startio(struct vtblk_softc *);
174 static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
175 
176 static void	vtblk_read_config(struct vtblk_softc *,
177 		    struct virtio_blk_config *);
178 static void	vtblk_ident(struct vtblk_softc *);
179 static int	vtblk_poll_request(struct vtblk_softc *,
180 		    struct vtblk_request *);
181 static int	vtblk_quiesce(struct vtblk_softc *);
182 static void	vtblk_vq_intr(void *);
183 static void	vtblk_stop(struct vtblk_softc *);
184 
185 static void	vtblk_dump_quiesce(struct vtblk_softc *);
186 static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
187 static int	vtblk_dump_flush(struct vtblk_softc *);
188 static void	vtblk_dump_complete(struct vtblk_softc *);
189 
190 static void	vtblk_set_write_cache(struct vtblk_softc *, int);
191 static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
192 		    struct virtio_blk_config *);
193 static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
194 
195 static void	vtblk_setup_sysctl(struct vtblk_softc *);
196 static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
197 
198 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
199 #define vtblk_htog16(_sc, _val)	virtio_htog16(vtblk_modern(_sc), _val)
200 #define vtblk_htog32(_sc, _val)	virtio_htog32(vtblk_modern(_sc), _val)
201 #define vtblk_htog64(_sc, _val)	virtio_htog64(vtblk_modern(_sc), _val)
202 #define vtblk_gtoh16(_sc, _val)	virtio_gtoh16(vtblk_modern(_sc), _val)
203 #define vtblk_gtoh32(_sc, _val)	virtio_gtoh32(vtblk_modern(_sc), _val)
204 #define vtblk_gtoh64(_sc, _val)	virtio_gtoh64(vtblk_modern(_sc), _val)
205 
206 /* Tunables. */
207 static int vtblk_no_ident = 0;
208 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
209 static int vtblk_writecache_mode = -1;
210 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
211 
212 #define VTBLK_COMMON_FEATURES \
213     (VIRTIO_BLK_F_SIZE_MAX		| \
214      VIRTIO_BLK_F_SEG_MAX		| \
215      VIRTIO_BLK_F_GEOMETRY		| \
216      VIRTIO_BLK_F_RO			| \
217      VIRTIO_BLK_F_BLK_SIZE		| \
218      VIRTIO_BLK_F_FLUSH			| \
219      VIRTIO_BLK_F_TOPOLOGY		| \
220      VIRTIO_BLK_F_CONFIG_WCE		| \
221      VIRTIO_BLK_F_DISCARD		| \
222      VIRTIO_RING_F_INDIRECT_DESC)
223 
224 #define VTBLK_MODERN_FEATURES	(VTBLK_COMMON_FEATURES)
225 #define VTBLK_LEGACY_FEATURES	(VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
226 
227 #define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
228 #define VTBLK_LOCK_INIT(_sc, _name) \
229 				mtx_init(VTBLK_MTX((_sc)), (_name), \
230 				    "VirtIO Block Lock", MTX_DEF)
231 #define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
232 #define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
233 #define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
234 #define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
235 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
236 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
237 
238 #define VTBLK_DISK_NAME		"vtbd"
239 #define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
240 #define VTBLK_BSIZE		512
241 
242 /*
243  * Each block request uses at least two segments - one for the header
244  * and one for the status.
245  */
246 #define VTBLK_MIN_SEGMENTS	2
247 
248 static device_method_t vtblk_methods[] = {
249 	/* Device methods. */
250 	DEVMETHOD(device_probe,		vtblk_probe),
251 	DEVMETHOD(device_attach,	vtblk_attach),
252 	DEVMETHOD(device_detach,	vtblk_detach),
253 	DEVMETHOD(device_suspend,	vtblk_suspend),
254 	DEVMETHOD(device_resume,	vtblk_resume),
255 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
256 
257 	/* VirtIO methods. */
258 	DEVMETHOD(virtio_config_change,	vtblk_config_change),
259 
260 	DEVMETHOD_END
261 };
262 
263 static driver_t vtblk_driver = {
264 	"vtblk",
265 	vtblk_methods,
266 	sizeof(struct vtblk_softc)
267 };
268 static devclass_t vtblk_devclass;
269 
270 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_devclass,
271     vtblk_modevent, 0);
272 MODULE_VERSION(virtio_blk, 1);
273 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
274 
275 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
276 
277 static int
278 vtblk_modevent(module_t mod, int type, void *unused)
279 {
280 	int error;
281 
282 	error = 0;
283 
284 	switch (type) {
285 	case MOD_LOAD:
286 	case MOD_QUIESCE:
287 	case MOD_UNLOAD:
288 	case MOD_SHUTDOWN:
289 		break;
290 	default:
291 		error = EOPNOTSUPP;
292 		break;
293 	}
294 
295 	return (error);
296 }
297 
298 static int
299 vtblk_probe(device_t dev)
300 {
301 	return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
302 }
303 
304 static int
305 vtblk_attach(device_t dev)
306 {
307 	struct vtblk_softc *sc;
308 	struct virtio_blk_config blkcfg;
309 	int error;
310 
311 	sc = device_get_softc(dev);
312 	sc->vtblk_dev = dev;
313 	virtio_set_feature_desc(dev, vtblk_feature_desc);
314 
315 	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
316 	bioq_init(&sc->vtblk_bioq);
317 	TAILQ_INIT(&sc->vtblk_dump_queue);
318 	TAILQ_INIT(&sc->vtblk_req_free);
319 	TAILQ_INIT(&sc->vtblk_req_ready);
320 
321 	vtblk_setup_sysctl(sc);
322 
323 	error = vtblk_setup_features(sc);
324 	if (error) {
325 		device_printf(dev, "cannot setup features\n");
326 		goto fail;
327 	}
328 
329 	vtblk_read_config(sc, &blkcfg);
330 
331 	/*
332 	 * With the current sglist(9) implementation, it is not easy
333 	 * for us to support a maximum segment size as adjacent
334 	 * segments are coalesced. For now, just make sure it's larger
335 	 * than the maximum supported transfer size.
336 	 */
337 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
338 		if (blkcfg.size_max < maxphys) {
339 			error = ENOTSUP;
340 			device_printf(dev, "host requires unsupported "
341 			    "maximum segment size feature\n");
342 			goto fail;
343 		}
344 	}
345 
346 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
347 	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
348 		error = EINVAL;
349 		device_printf(dev, "fewer than minimum number of segments "
350 		    "allowed: %d\n", sc->vtblk_max_nsegs);
351 		goto fail;
352 	}
353 
354 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
355 	if (sc->vtblk_sglist == NULL) {
356 		error = ENOMEM;
357 		device_printf(dev, "cannot allocate sglist\n");
358 		goto fail;
359 	}
360 
361 	error = vtblk_alloc_virtqueue(sc);
362 	if (error) {
363 		device_printf(dev, "cannot allocate virtqueue\n");
364 		goto fail;
365 	}
366 
367 	error = vtblk_request_prealloc(sc);
368 	if (error) {
369 		device_printf(dev, "cannot preallocate requests\n");
370 		goto fail;
371 	}
372 
373 	vtblk_alloc_disk(sc, &blkcfg);
374 
375 	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
376 	if (error) {
377 		device_printf(dev, "cannot setup virtqueue interrupt\n");
378 		goto fail;
379 	}
380 
381 	vtblk_create_disk(sc);
382 
383 	virtqueue_enable_intr(sc->vtblk_vq);
384 
385 fail:
386 	if (error)
387 		vtblk_detach(dev);
388 
389 	return (error);
390 }
391 
392 static int
393 vtblk_detach(device_t dev)
394 {
395 	struct vtblk_softc *sc;
396 
397 	sc = device_get_softc(dev);
398 
399 	VTBLK_LOCK(sc);
400 	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
401 	if (device_is_attached(dev))
402 		vtblk_stop(sc);
403 	VTBLK_UNLOCK(sc);
404 
405 	vtblk_drain(sc);
406 
407 	if (sc->vtblk_disk != NULL) {
408 		disk_destroy(sc->vtblk_disk);
409 		sc->vtblk_disk = NULL;
410 	}
411 
412 	if (sc->vtblk_sglist != NULL) {
413 		sglist_free(sc->vtblk_sglist);
414 		sc->vtblk_sglist = NULL;
415 	}
416 
417 	VTBLK_LOCK_DESTROY(sc);
418 
419 	return (0);
420 }
421 
422 static int
423 vtblk_suspend(device_t dev)
424 {
425 	struct vtblk_softc *sc;
426 	int error;
427 
428 	sc = device_get_softc(dev);
429 
430 	VTBLK_LOCK(sc);
431 	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
432 	/* XXX BMV: virtio_stop(), etc needed here? */
433 	error = vtblk_quiesce(sc);
434 	if (error)
435 		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
436 	VTBLK_UNLOCK(sc);
437 
438 	return (error);
439 }
440 
441 static int
442 vtblk_resume(device_t dev)
443 {
444 	struct vtblk_softc *sc;
445 
446 	sc = device_get_softc(dev);
447 
448 	VTBLK_LOCK(sc);
449 	/* XXX BMV: virtio_reinit(), etc needed here? */
450 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
451 	vtblk_startio(sc);
452 	VTBLK_UNLOCK(sc);
453 
454 	return (0);
455 }
456 
457 static int
458 vtblk_shutdown(device_t dev)
459 {
460 
461 	return (0);
462 }
463 
464 static int
465 vtblk_config_change(device_t dev)
466 {
467 	struct vtblk_softc *sc;
468 	struct virtio_blk_config blkcfg;
469 	uint64_t capacity;
470 
471 	sc = device_get_softc(dev);
472 
473 	vtblk_read_config(sc, &blkcfg);
474 
475 	/* Capacity is always in 512-byte units. */
476 	capacity = blkcfg.capacity * VTBLK_BSIZE;
477 
478 	if (sc->vtblk_disk->d_mediasize != capacity)
479 		vtblk_resize_disk(sc, capacity);
480 
481 	return (0);
482 }
483 
484 static int
485 vtblk_open(struct disk *dp)
486 {
487 	struct vtblk_softc *sc;
488 
489 	if ((sc = dp->d_drv1) == NULL)
490 		return (ENXIO);
491 
492 	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
493 }
494 
495 static int
496 vtblk_close(struct disk *dp)
497 {
498 	struct vtblk_softc *sc;
499 
500 	if ((sc = dp->d_drv1) == NULL)
501 		return (ENXIO);
502 
503 	return (0);
504 }
505 
506 static int
507 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
508     struct thread *td)
509 {
510 	struct vtblk_softc *sc;
511 
512 	if ((sc = dp->d_drv1) == NULL)
513 		return (ENXIO);
514 
515 	return (ENOTTY);
516 }
517 
518 static int
519 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
520     size_t length)
521 {
522 	struct disk *dp;
523 	struct vtblk_softc *sc;
524 	int error;
525 
526 	dp = arg;
527 	error = 0;
528 
529 	if ((sc = dp->d_drv1) == NULL)
530 		return (ENXIO);
531 
532 	VTBLK_LOCK(sc);
533 
534 	vtblk_dump_quiesce(sc);
535 
536 	if (length > 0)
537 		error = vtblk_dump_write(sc, virtual, offset, length);
538 	if (error || (virtual == NULL && offset == 0))
539 		vtblk_dump_complete(sc);
540 
541 	VTBLK_UNLOCK(sc);
542 
543 	return (error);
544 }
545 
546 static void
547 vtblk_strategy(struct bio *bp)
548 {
549 	struct vtblk_softc *sc;
550 
551 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
552 		vtblk_bio_done(NULL, bp, EINVAL);
553 		return;
554 	}
555 
556 	if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
557 	    (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
558 		vtblk_bio_done(sc, bp, EOPNOTSUPP);
559 		return;
560 	}
561 
562 	VTBLK_LOCK(sc);
563 
564 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
565 		VTBLK_UNLOCK(sc);
566 		vtblk_bio_done(sc, bp, ENXIO);
567 		return;
568 	}
569 
570 	bioq_insert_tail(&sc->vtblk_bioq, bp);
571 	vtblk_startio(sc);
572 
573 	VTBLK_UNLOCK(sc);
574 }
575 
576 static int
577 vtblk_negotiate_features(struct vtblk_softc *sc)
578 {
579 	device_t dev;
580 	uint64_t features;
581 
582 	dev = sc->vtblk_dev;
583 	features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
584 	    VTBLK_LEGACY_FEATURES;
585 
586 	sc->vtblk_features = virtio_negotiate_features(dev, features);
587 	return (virtio_finalize_features(dev));
588 }
589 
590 static int
591 vtblk_setup_features(struct vtblk_softc *sc)
592 {
593 	device_t dev;
594 	int error;
595 
596 	dev = sc->vtblk_dev;
597 
598 	error = vtblk_negotiate_features(sc);
599 	if (error)
600 		return (error);
601 
602 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
603 		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
604 	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
605 		sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
606 
607 	/* Legacy. */
608 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
609 		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
610 
611 	return (0);
612 }
613 
614 static int
615 vtblk_maximum_segments(struct vtblk_softc *sc,
616     struct virtio_blk_config *blkcfg)
617 {
618 	device_t dev;
619 	int nsegs;
620 
621 	dev = sc->vtblk_dev;
622 	nsegs = VTBLK_MIN_SEGMENTS;
623 
624 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
625 		nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
626 		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
627 			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
628 	} else
629 		nsegs += 1;
630 
631 	return (nsegs);
632 }
633 
634 static int
635 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
636 {
637 	device_t dev;
638 	struct vq_alloc_info vq_info;
639 
640 	dev = sc->vtblk_dev;
641 
642 	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
643 	    vtblk_vq_intr, sc, &sc->vtblk_vq,
644 	    "%s request", device_get_nameunit(dev));
645 
646 	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
647 }
648 
649 static void
650 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
651 {
652 	device_t dev;
653 	struct disk *dp;
654 	int error;
655 
656 	dev = sc->vtblk_dev;
657 	dp = sc->vtblk_disk;
658 
659 	dp->d_mediasize = new_capacity;
660 	if (bootverbose) {
661 		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
662 		    (uintmax_t) dp->d_mediasize >> 20,
663 		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
664 		    dp->d_sectorsize);
665 	}
666 
667 	error = disk_resize(dp, M_NOWAIT);
668 	if (error) {
669 		device_printf(dev,
670 		    "disk_resize(9) failed, error: %d\n", error);
671 	}
672 }
673 
674 static void
675 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
676 {
677 	device_t dev;
678 	struct disk *dp;
679 
680 	dev = sc->vtblk_dev;
681 
682 	sc->vtblk_disk = dp = disk_alloc();
683 	dp->d_open = vtblk_open;
684 	dp->d_close = vtblk_close;
685 	dp->d_ioctl = vtblk_ioctl;
686 	dp->d_strategy = vtblk_strategy;
687 	dp->d_name = VTBLK_DISK_NAME;
688 	dp->d_unit = device_get_unit(dev);
689 	dp->d_drv1 = sc;
690 	dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
691 	dp->d_hba_vendor = virtio_get_vendor(dev);
692 	dp->d_hba_device = virtio_get_device(dev);
693 	dp->d_hba_subvendor = virtio_get_subvendor(dev);
694 	dp->d_hba_subdevice = virtio_get_subdevice(dev);
695 
696 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
697 		dp->d_flags |= DISKFLAG_WRITE_PROTECT;
698 	else {
699 		if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
700 			dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
701 		dp->d_dump = vtblk_dump;
702 	}
703 
704 	/* Capacity is always in 512-byte units. */
705 	dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
706 
707 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
708 		dp->d_sectorsize = blkcfg->blk_size;
709 	else
710 		dp->d_sectorsize = VTBLK_BSIZE;
711 
712 	/*
713 	 * The VirtIO maximum I/O size is given in terms of segments.
714 	 * However, FreeBSD limits I/O size by logical buffer size, not
715 	 * by physically contiguous pages. Therefore, we have to assume
716 	 * no pages are contiguous. This may impose an artificially low
717 	 * maximum I/O size. But in practice, since QEMU advertises 128
718 	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
719 	 * which is typically greater than maxphys. Eventually we should
720 	 * just advertise maxphys and split buffers that are too big.
721 	 *
722 	 * Note we must subtract one additional segment in case of non
723 	 * page aligned buffers.
724 	 */
725 	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
726 	    PAGE_SIZE;
727 	if (dp->d_maxsize < PAGE_SIZE)
728 		dp->d_maxsize = PAGE_SIZE; /* XXX */
729 
730 	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
731 		dp->d_fwsectors = blkcfg->geometry.sectors;
732 		dp->d_fwheads = blkcfg->geometry.heads;
733 	}
734 
735 	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
736 	    blkcfg->topology.physical_block_exp > 0) {
737 		dp->d_stripesize = dp->d_sectorsize *
738 		    (1 << blkcfg->topology.physical_block_exp);
739 		dp->d_stripeoffset = (dp->d_stripesize -
740 		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
741 		    dp->d_stripesize;
742 	}
743 
744 	if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
745 		dp->d_flags |= DISKFLAG_CANDELETE;
746 		dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
747 	}
748 
749 	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
750 		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
751 	else
752 		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
753 }
754 
755 static void
756 vtblk_create_disk(struct vtblk_softc *sc)
757 {
758 	struct disk *dp;
759 
760 	dp = sc->vtblk_disk;
761 
762 	vtblk_ident(sc);
763 
764 	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
765 	    (uintmax_t) dp->d_mediasize >> 20,
766 	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
767 	    dp->d_sectorsize);
768 
769 	disk_create(dp, DISK_VERSION);
770 }
771 
772 static int
773 vtblk_request_prealloc(struct vtblk_softc *sc)
774 {
775 	struct vtblk_request *req;
776 	int i, nreqs;
777 
778 	nreqs = virtqueue_size(sc->vtblk_vq);
779 
780 	/*
781 	 * Preallocate sufficient requests to keep the virtqueue full. Each
782 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
783 	 * the number allocated when indirect descriptors are not available.
784 	 */
785 	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
786 		nreqs /= VTBLK_MIN_SEGMENTS;
787 
788 	for (i = 0; i < nreqs; i++) {
789 		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
790 		if (req == NULL)
791 			return (ENOMEM);
792 
793 		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
794 		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
795 
796 		sc->vtblk_request_count++;
797 		vtblk_request_enqueue(sc, req);
798 	}
799 
800 	return (0);
801 }
802 
803 static void
804 vtblk_request_free(struct vtblk_softc *sc)
805 {
806 	struct vtblk_request *req;
807 
808 	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
809 
810 	while ((req = vtblk_request_dequeue(sc)) != NULL) {
811 		sc->vtblk_request_count--;
812 		free(req, M_DEVBUF);
813 	}
814 
815 	KASSERT(sc->vtblk_request_count == 0,
816 	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
817 }
818 
819 static struct vtblk_request *
820 vtblk_request_dequeue(struct vtblk_softc *sc)
821 {
822 	struct vtblk_request *req;
823 
824 	req = TAILQ_FIRST(&sc->vtblk_req_free);
825 	if (req != NULL) {
826 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
827 		bzero(req, sizeof(struct vtblk_request));
828 	}
829 
830 	return (req);
831 }
832 
833 static void
834 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
835 {
836 
837 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
838 }
839 
840 static struct vtblk_request *
841 vtblk_request_next_ready(struct vtblk_softc *sc)
842 {
843 	struct vtblk_request *req;
844 
845 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
846 	if (req != NULL)
847 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
848 
849 	return (req);
850 }
851 
852 static void
853 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
854 {
855 
856 	/* NOTE: Currently, there will be at most one request in the queue. */
857 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
858 }
859 
860 static struct vtblk_request *
861 vtblk_request_next(struct vtblk_softc *sc)
862 {
863 	struct vtblk_request *req;
864 
865 	req = vtblk_request_next_ready(sc);
866 	if (req != NULL)
867 		return (req);
868 
869 	return (vtblk_request_bio(sc));
870 }
871 
872 static struct vtblk_request *
873 vtblk_request_bio(struct vtblk_softc *sc)
874 {
875 	struct bio_queue_head *bioq;
876 	struct vtblk_request *req;
877 	struct bio *bp;
878 
879 	bioq = &sc->vtblk_bioq;
880 
881 	if (bioq_first(bioq) == NULL)
882 		return (NULL);
883 
884 	req = vtblk_request_dequeue(sc);
885 	if (req == NULL)
886 		return (NULL);
887 
888 	bp = bioq_takefirst(bioq);
889 	req->vbr_bp = bp;
890 	req->vbr_ack = -1;
891 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
892 
893 	switch (bp->bio_cmd) {
894 	case BIO_FLUSH:
895 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
896 		req->vbr_hdr.sector = 0;
897 		break;
898 	case BIO_READ:
899 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
900 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
901 		break;
902 	case BIO_WRITE:
903 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
904 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
905 		break;
906 	case BIO_DELETE:
907 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
908 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
909 		break;
910 	default:
911 		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
912 	}
913 
914 	if (bp->bio_flags & BIO_ORDERED)
915 		req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
916 
917 	return (req);
918 }
919 
920 static int
921 vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
922 {
923 	struct virtqueue *vq;
924 	struct sglist *sg;
925 	struct bio *bp;
926 	int ordered, readable, writable, error;
927 
928 	vq = sc->vtblk_vq;
929 	sg = sc->vtblk_sglist;
930 	bp = req->vbr_bp;
931 	ordered = 0;
932 	writable = 0;
933 
934 	/*
935 	 * Some hosts (such as bhyve) do not implement the barrier feature,
936 	 * so we emulate it in the driver by allowing the barrier request
937 	 * to be the only one in flight.
938 	 */
939 	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
940 		if (sc->vtblk_req_ordered != NULL)
941 			return (EBUSY);
942 		if (bp->bio_flags & BIO_ORDERED) {
943 			if (!virtqueue_empty(vq))
944 				return (EBUSY);
945 			ordered = 1;
946 			req->vbr_hdr.type &= vtblk_gtoh32(sc,
947 				~VIRTIO_BLK_T_BARRIER);
948 		}
949 	}
950 
951 	sglist_reset(sg);
952 	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
953 
954 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
955 		error = sglist_append_bio(sg, bp);
956 		if (error || sg->sg_nseg == sg->sg_maxseg) {
957 			panic("%s: bio %p data buffer too big %d",
958 			    __func__, bp, error);
959 		}
960 
961 		/* BIO_READ means the host writes into our buffer. */
962 		if (bp->bio_cmd == BIO_READ)
963 			writable = sg->sg_nseg - 1;
964 	} else if (bp->bio_cmd == BIO_DELETE) {
965 		struct virtio_blk_discard_write_zeroes *discard;
966 
967 		discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
968 		if (discard == NULL)
969 			return (ENOMEM);
970 
971 		bp->bio_driver1 = discard;
972 		discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
973 		discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
974 		error = sglist_append(sg, discard, sizeof(*discard));
975 		if (error || sg->sg_nseg == sg->sg_maxseg) {
976 			panic("%s: bio %p data buffer too big %d",
977 			    __func__, bp, error);
978 		}
979 	}
980 
981 	writable++;
982 	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
983 	readable = sg->sg_nseg - writable;
984 
985 	error = virtqueue_enqueue(vq, req, sg, readable, writable);
986 	if (error == 0 && ordered)
987 		sc->vtblk_req_ordered = req;
988 
989 	return (error);
990 }
991 
992 static int
993 vtblk_request_error(struct vtblk_request *req)
994 {
995 	int error;
996 
997 	switch (req->vbr_ack) {
998 	case VIRTIO_BLK_S_OK:
999 		error = 0;
1000 		break;
1001 	case VIRTIO_BLK_S_UNSUPP:
1002 		error = ENOTSUP;
1003 		break;
1004 	default:
1005 		error = EIO;
1006 		break;
1007 	}
1008 
1009 	return (error);
1010 }
1011 
1012 static void
1013 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1014 {
1015 	struct vtblk_request *req;
1016 	struct bio *bp;
1017 
1018 	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1019 		if (sc->vtblk_req_ordered != NULL) {
1020 			MPASS(sc->vtblk_req_ordered == req);
1021 			sc->vtblk_req_ordered = NULL;
1022 		}
1023 
1024 		bp = req->vbr_bp;
1025 		bp->bio_error = vtblk_request_error(req);
1026 		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1027 
1028 		vtblk_request_enqueue(sc, req);
1029 	}
1030 }
1031 
1032 static void
1033 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1034 {
1035 	struct bio *bp, *tmp;
1036 
1037 	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1038 		if (bp->bio_error != 0)
1039 			disk_err(bp, "hard error", -1, 1);
1040 		vtblk_bio_done(sc, bp, bp->bio_error);
1041 	}
1042 }
1043 
1044 static void
1045 vtblk_drain_vq(struct vtblk_softc *sc)
1046 {
1047 	struct virtqueue *vq;
1048 	struct vtblk_request *req;
1049 	int last;
1050 
1051 	vq = sc->vtblk_vq;
1052 	last = 0;
1053 
1054 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1055 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1056 		vtblk_request_enqueue(sc, req);
1057 	}
1058 
1059 	sc->vtblk_req_ordered = NULL;
1060 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1061 }
1062 
1063 static void
1064 vtblk_drain(struct vtblk_softc *sc)
1065 {
1066 	struct bio_queue_head *bioq;
1067 	struct vtblk_request *req;
1068 	struct bio *bp;
1069 
1070 	bioq = &sc->vtblk_bioq;
1071 
1072 	if (sc->vtblk_vq != NULL) {
1073 		struct bio_queue queue;
1074 
1075 		TAILQ_INIT(&queue);
1076 		vtblk_queue_completed(sc, &queue);
1077 		vtblk_done_completed(sc, &queue);
1078 
1079 		vtblk_drain_vq(sc);
1080 	}
1081 
1082 	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1083 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1084 		vtblk_request_enqueue(sc, req);
1085 	}
1086 
1087 	while (bioq_first(bioq) != NULL) {
1088 		bp = bioq_takefirst(bioq);
1089 		vtblk_bio_done(sc, bp, ENXIO);
1090 	}
1091 
1092 	vtblk_request_free(sc);
1093 }
1094 
1095 static void
1096 vtblk_startio(struct vtblk_softc *sc)
1097 {
1098 	struct virtqueue *vq;
1099 	struct vtblk_request *req;
1100 	int enq;
1101 
1102 	VTBLK_LOCK_ASSERT(sc);
1103 	vq = sc->vtblk_vq;
1104 	enq = 0;
1105 
1106 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1107 		return;
1108 
1109 	while (!virtqueue_full(vq)) {
1110 		req = vtblk_request_next(sc);
1111 		if (req == NULL)
1112 			break;
1113 
1114 		if (vtblk_request_execute(sc, req) != 0) {
1115 			vtblk_request_requeue_ready(sc, req);
1116 			break;
1117 		}
1118 
1119 		enq++;
1120 	}
1121 
1122 	if (enq > 0)
1123 		virtqueue_notify(vq);
1124 }
1125 
1126 static void
1127 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1128 {
1129 
1130 	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1131 	if (sc != NULL)
1132 		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1133 
1134 	if (error) {
1135 		bp->bio_resid = bp->bio_bcount;
1136 		bp->bio_error = error;
1137 		bp->bio_flags |= BIO_ERROR;
1138 	}
1139 
1140 	if (bp->bio_driver1 != NULL) {
1141 		free(bp->bio_driver1, M_DEVBUF);
1142 		bp->bio_driver1 = NULL;
1143 	}
1144 
1145 	biodone(bp);
1146 }
1147 
1148 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1149 	if (virtio_with_feature(_dev, _feature)) {			\
1150 		virtio_read_device_config(_dev,				\
1151 		    offsetof(struct virtio_blk_config, _field),		\
1152 		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1153 	}
1154 
1155 static void
1156 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1157 {
1158 	device_t dev;
1159 
1160 	dev = sc->vtblk_dev;
1161 
1162 	bzero(blkcfg, sizeof(struct virtio_blk_config));
1163 
1164 	/* The capacity is always available. */
1165 	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1166 	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1167 
1168 	/* Read the configuration if the feature was negotiated. */
1169 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1170 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1171 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1172 	    geometry.cylinders, blkcfg);
1173 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1174 	    geometry.heads, blkcfg);
1175 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1176 	    geometry.sectors, blkcfg);
1177 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1178 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1179 	    topology.physical_block_exp, blkcfg);
1180 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1181 	    topology.alignment_offset, blkcfg);
1182 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1183 	    topology.min_io_size, blkcfg);
1184 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1185 	    topology.opt_io_size, blkcfg);
1186 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1187 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1188 	    blkcfg);
1189 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1190 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1191 	    blkcfg);
1192 }
1193 
1194 #undef VTBLK_GET_CONFIG
1195 
1196 static void
1197 vtblk_ident(struct vtblk_softc *sc)
1198 {
1199 	struct bio buf;
1200 	struct disk *dp;
1201 	struct vtblk_request *req;
1202 	int len, error;
1203 
1204 	dp = sc->vtblk_disk;
1205 	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1206 
1207 	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1208 		return;
1209 
1210 	req = vtblk_request_dequeue(sc);
1211 	if (req == NULL)
1212 		return;
1213 
1214 	req->vbr_ack = -1;
1215 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1216 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1217 	req->vbr_hdr.sector = 0;
1218 
1219 	req->vbr_bp = &buf;
1220 	g_reset_bio(&buf);
1221 
1222 	buf.bio_cmd = BIO_READ;
1223 	buf.bio_data = dp->d_ident;
1224 	buf.bio_bcount = len;
1225 
1226 	VTBLK_LOCK(sc);
1227 	error = vtblk_poll_request(sc, req);
1228 	VTBLK_UNLOCK(sc);
1229 
1230 	vtblk_request_enqueue(sc, req);
1231 
1232 	if (error) {
1233 		device_printf(sc->vtblk_dev,
1234 		    "error getting device identifier: %d\n", error);
1235 	}
1236 }
1237 
1238 static int
1239 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1240 {
1241 	struct virtqueue *vq;
1242 	int error;
1243 
1244 	vq = sc->vtblk_vq;
1245 
1246 	if (!virtqueue_empty(vq))
1247 		return (EBUSY);
1248 
1249 	error = vtblk_request_execute(sc, req);
1250 	if (error)
1251 		return (error);
1252 
1253 	virtqueue_notify(vq);
1254 	virtqueue_poll(vq, NULL);
1255 
1256 	error = vtblk_request_error(req);
1257 	if (error && bootverbose) {
1258 		device_printf(sc->vtblk_dev,
1259 		    "%s: IO error: %d\n", __func__, error);
1260 	}
1261 
1262 	return (error);
1263 }
1264 
1265 static int
1266 vtblk_quiesce(struct vtblk_softc *sc)
1267 {
1268 	int error;
1269 
1270 	VTBLK_LOCK_ASSERT(sc);
1271 	error = 0;
1272 
1273 	while (!virtqueue_empty(sc->vtblk_vq)) {
1274 		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1275 		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1276 			error = EBUSY;
1277 			break;
1278 		}
1279 	}
1280 
1281 	return (error);
1282 }
1283 
1284 static void
1285 vtblk_vq_intr(void *xsc)
1286 {
1287 	struct vtblk_softc *sc;
1288 	struct virtqueue *vq;
1289 	struct bio_queue queue;
1290 
1291 	sc = xsc;
1292 	vq = sc->vtblk_vq;
1293 	TAILQ_INIT(&queue);
1294 
1295 	VTBLK_LOCK(sc);
1296 
1297 again:
1298 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1299 		goto out;
1300 
1301 	vtblk_queue_completed(sc, &queue);
1302 	vtblk_startio(sc);
1303 
1304 	if (virtqueue_enable_intr(vq) != 0) {
1305 		virtqueue_disable_intr(vq);
1306 		goto again;
1307 	}
1308 
1309 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1310 		wakeup(&sc->vtblk_vq);
1311 
1312 out:
1313 	VTBLK_UNLOCK(sc);
1314 	vtblk_done_completed(sc, &queue);
1315 }
1316 
1317 static void
1318 vtblk_stop(struct vtblk_softc *sc)
1319 {
1320 
1321 	virtqueue_disable_intr(sc->vtblk_vq);
1322 	virtio_stop(sc->vtblk_dev);
1323 }
1324 
1325 static void
1326 vtblk_dump_quiesce(struct vtblk_softc *sc)
1327 {
1328 
1329 	/*
1330 	 * Spin here until all the requests in-flight at the time of the
1331 	 * dump are completed and queued. The queued requests will be
1332 	 * biodone'd once the dump is finished.
1333 	 */
1334 	while (!virtqueue_empty(sc->vtblk_vq))
1335 		vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1336 }
1337 
1338 static int
1339 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1340     size_t length)
1341 {
1342 	struct bio buf;
1343 	struct vtblk_request *req;
1344 
1345 	req = &sc->vtblk_dump_request;
1346 	req->vbr_ack = -1;
1347 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1348 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1349 	req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1350 
1351 	req->vbr_bp = &buf;
1352 	g_reset_bio(&buf);
1353 
1354 	buf.bio_cmd = BIO_WRITE;
1355 	buf.bio_data = virtual;
1356 	buf.bio_bcount = length;
1357 
1358 	return (vtblk_poll_request(sc, req));
1359 }
1360 
1361 static int
1362 vtblk_dump_flush(struct vtblk_softc *sc)
1363 {
1364 	struct bio buf;
1365 	struct vtblk_request *req;
1366 
1367 	req = &sc->vtblk_dump_request;
1368 	req->vbr_ack = -1;
1369 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1370 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1371 	req->vbr_hdr.sector = 0;
1372 
1373 	req->vbr_bp = &buf;
1374 	g_reset_bio(&buf);
1375 
1376 	buf.bio_cmd = BIO_FLUSH;
1377 
1378 	return (vtblk_poll_request(sc, req));
1379 }
1380 
1381 static void
1382 vtblk_dump_complete(struct vtblk_softc *sc)
1383 {
1384 
1385 	vtblk_dump_flush(sc);
1386 
1387 	VTBLK_UNLOCK(sc);
1388 	vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1389 	VTBLK_LOCK(sc);
1390 }
1391 
1392 static void
1393 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1394 {
1395 
1396 	/* Set either writeback (1) or writethrough (0) mode. */
1397 	virtio_write_dev_config_1(sc->vtblk_dev,
1398 	    offsetof(struct virtio_blk_config, wce), wc);
1399 }
1400 
1401 static int
1402 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1403     struct virtio_blk_config *blkcfg)
1404 {
1405 	int wc;
1406 
1407 	if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1408 		wc = vtblk_tunable_int(sc, "writecache_mode",
1409 		    vtblk_writecache_mode);
1410 		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1411 			vtblk_set_write_cache(sc, wc);
1412 		else
1413 			wc = blkcfg->wce;
1414 	} else
1415 		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1416 
1417 	return (wc);
1418 }
1419 
1420 static int
1421 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1422 {
1423 	struct vtblk_softc *sc;
1424 	int wc, error;
1425 
1426 	sc = oidp->oid_arg1;
1427 	wc = sc->vtblk_write_cache;
1428 
1429 	error = sysctl_handle_int(oidp, &wc, 0, req);
1430 	if (error || req->newptr == NULL)
1431 		return (error);
1432 	if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1433 		return (EPERM);
1434 	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1435 		return (EINVAL);
1436 
1437 	VTBLK_LOCK(sc);
1438 	sc->vtblk_write_cache = wc;
1439 	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1440 	VTBLK_UNLOCK(sc);
1441 
1442 	return (0);
1443 }
1444 
1445 static void
1446 vtblk_setup_sysctl(struct vtblk_softc *sc)
1447 {
1448 	device_t dev;
1449 	struct sysctl_ctx_list *ctx;
1450 	struct sysctl_oid *tree;
1451 	struct sysctl_oid_list *child;
1452 
1453 	dev = sc->vtblk_dev;
1454 	ctx = device_get_sysctl_ctx(dev);
1455 	tree = device_get_sysctl_tree(dev);
1456 	child = SYSCTL_CHILDREN(tree);
1457 
1458 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1459 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1460 	    vtblk_write_cache_sysctl, "I",
1461 	    "Write cache mode (writethrough (0) or writeback (1))");
1462 }
1463 
1464 static int
1465 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1466 {
1467 	char path[64];
1468 
1469 	snprintf(path, sizeof(path),
1470 	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1471 	TUNABLE_INT_FETCH(path, &def);
1472 
1473 	return (def);
1474 }
1475