xref: /freebsd/sys/dev/virtio/block/virtio_blk.c (revision 1603881667360c015f6685131f2f25474fa67a72)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /* Driver for VirtIO block devices. */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/bio.h>
38 #include <sys/malloc.h>
39 #include <sys/module.h>
40 #include <sys/sglist.h>
41 #include <sys/sysctl.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/queue.h>
45 
46 #include <geom/geom.h>
47 #include <geom/geom_disk.h>
48 
49 #include <machine/bus.h>
50 #include <machine/resource.h>
51 #include <sys/bus.h>
52 #include <sys/rman.h>
53 
54 #include <dev/virtio/virtio.h>
55 #include <dev/virtio/virtqueue.h>
56 #include <dev/virtio/block/virtio_blk.h>
57 
58 #include "virtio_if.h"
59 
60 struct vtblk_request {
61 	struct virtio_blk_outhdr	 vbr_hdr;
62 	struct bio			*vbr_bp;
63 	uint8_t				 vbr_ack;
64 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
65 };
66 
67 enum vtblk_cache_mode {
68 	VTBLK_CACHE_WRITETHROUGH,
69 	VTBLK_CACHE_WRITEBACK,
70 	VTBLK_CACHE_MAX
71 };
72 
73 struct vtblk_softc {
74 	device_t		 vtblk_dev;
75 	struct mtx		 vtblk_mtx;
76 	uint64_t		 vtblk_features;
77 	uint32_t		 vtblk_flags;
78 #define VTBLK_FLAG_INDIRECT	0x0001
79 #define VTBLK_FLAG_DETACH	0x0002
80 #define VTBLK_FLAG_SUSPEND	0x0004
81 #define VTBLK_FLAG_BARRIER	0x0008
82 #define VTBLK_FLAG_WCE_CONFIG	0x0010
83 
84 	struct virtqueue	*vtblk_vq;
85 	struct sglist		*vtblk_sglist;
86 	struct disk		*vtblk_disk;
87 
88 	struct bio_queue_head	 vtblk_bioq;
89 	TAILQ_HEAD(, vtblk_request)
90 				 vtblk_req_free;
91 	TAILQ_HEAD(, vtblk_request)
92 				 vtblk_req_ready;
93 	struct vtblk_request	*vtblk_req_ordered;
94 
95 	int			 vtblk_max_nsegs;
96 	int			 vtblk_request_count;
97 	enum vtblk_cache_mode	 vtblk_write_cache;
98 
99 	struct bio_queue	 vtblk_dump_queue;
100 	struct vtblk_request	 vtblk_dump_request;
101 };
102 
103 static struct virtio_feature_desc vtblk_feature_desc[] = {
104 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
105 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
106 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
107 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
108 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
109 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
110 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
111 	{ VIRTIO_BLK_F_FLUSH,		"FlushCmd"	},
112 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
113 	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
114 	{ VIRTIO_BLK_F_MQ,		"Multiqueue"	},
115 	{ VIRTIO_BLK_F_DISCARD,		"Discard"	},
116 	{ VIRTIO_BLK_F_WRITE_ZEROES,	"WriteZeros"	},
117 
118 	{ 0, NULL }
119 };
120 
121 static int	vtblk_modevent(module_t, int, void *);
122 
123 static int	vtblk_probe(device_t);
124 static int	vtblk_attach(device_t);
125 static int	vtblk_detach(device_t);
126 static int	vtblk_suspend(device_t);
127 static int	vtblk_resume(device_t);
128 static int	vtblk_shutdown(device_t);
129 static int	vtblk_attach_completed(device_t);
130 static int	vtblk_config_change(device_t);
131 
132 static int	vtblk_open(struct disk *);
133 static int	vtblk_close(struct disk *);
134 static int	vtblk_ioctl(struct disk *, u_long, void *, int,
135 		    struct thread *);
136 static int	vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
137 static void	vtblk_strategy(struct bio *);
138 
139 static int	vtblk_negotiate_features(struct vtblk_softc *);
140 static int	vtblk_setup_features(struct vtblk_softc *);
141 static int	vtblk_maximum_segments(struct vtblk_softc *,
142 		    struct virtio_blk_config *);
143 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
144 static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
145 static void	vtblk_alloc_disk(struct vtblk_softc *,
146 		    struct virtio_blk_config *);
147 static void	vtblk_create_disk(struct vtblk_softc *);
148 
149 static int	vtblk_request_prealloc(struct vtblk_softc *);
150 static void	vtblk_request_free(struct vtblk_softc *);
151 static struct vtblk_request *
152 		vtblk_request_dequeue(struct vtblk_softc *);
153 static void	vtblk_request_enqueue(struct vtblk_softc *,
154 		    struct vtblk_request *);
155 static struct vtblk_request *
156 		vtblk_request_next_ready(struct vtblk_softc *);
157 static void	vtblk_request_requeue_ready(struct vtblk_softc *,
158 		    struct vtblk_request *);
159 static struct vtblk_request *
160 		vtblk_request_next(struct vtblk_softc *);
161 static struct vtblk_request *
162 		vtblk_request_bio(struct vtblk_softc *);
163 static int	vtblk_request_execute(struct vtblk_softc *,
164 		    struct vtblk_request *);
165 static int	vtblk_request_error(struct vtblk_request *);
166 
167 static void	vtblk_queue_completed(struct vtblk_softc *,
168 		    struct bio_queue *);
169 static void	vtblk_done_completed(struct vtblk_softc *,
170 		    struct bio_queue *);
171 static void	vtblk_drain_vq(struct vtblk_softc *);
172 static void	vtblk_drain(struct vtblk_softc *);
173 
174 static void	vtblk_startio(struct vtblk_softc *);
175 static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
176 
177 static void	vtblk_read_config(struct vtblk_softc *,
178 		    struct virtio_blk_config *);
179 static void	vtblk_ident(struct vtblk_softc *);
180 static int	vtblk_poll_request(struct vtblk_softc *,
181 		    struct vtblk_request *);
182 static int	vtblk_quiesce(struct vtblk_softc *);
183 static void	vtblk_vq_intr(void *);
184 static void	vtblk_stop(struct vtblk_softc *);
185 
186 static void	vtblk_dump_quiesce(struct vtblk_softc *);
187 static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
188 static int	vtblk_dump_flush(struct vtblk_softc *);
189 static void	vtblk_dump_complete(struct vtblk_softc *);
190 
191 static void	vtblk_set_write_cache(struct vtblk_softc *, int);
192 static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
193 		    struct virtio_blk_config *);
194 static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
195 
196 static void	vtblk_setup_sysctl(struct vtblk_softc *);
197 static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
198 
199 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
200 #define vtblk_htog16(_sc, _val)	virtio_htog16(vtblk_modern(_sc), _val)
201 #define vtblk_htog32(_sc, _val)	virtio_htog32(vtblk_modern(_sc), _val)
202 #define vtblk_htog64(_sc, _val)	virtio_htog64(vtblk_modern(_sc), _val)
203 #define vtblk_gtoh16(_sc, _val)	virtio_gtoh16(vtblk_modern(_sc), _val)
204 #define vtblk_gtoh32(_sc, _val)	virtio_gtoh32(vtblk_modern(_sc), _val)
205 #define vtblk_gtoh64(_sc, _val)	virtio_gtoh64(vtblk_modern(_sc), _val)
206 
207 /* Tunables. */
208 static int vtblk_no_ident = 0;
209 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
210 static int vtblk_writecache_mode = -1;
211 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
212 
213 #define VTBLK_COMMON_FEATURES \
214     (VIRTIO_BLK_F_SIZE_MAX		| \
215      VIRTIO_BLK_F_SEG_MAX		| \
216      VIRTIO_BLK_F_GEOMETRY		| \
217      VIRTIO_BLK_F_RO			| \
218      VIRTIO_BLK_F_BLK_SIZE		| \
219      VIRTIO_BLK_F_FLUSH			| \
220      VIRTIO_BLK_F_TOPOLOGY		| \
221      VIRTIO_BLK_F_CONFIG_WCE		| \
222      VIRTIO_BLK_F_DISCARD		| \
223      VIRTIO_RING_F_INDIRECT_DESC)
224 
225 #define VTBLK_MODERN_FEATURES	(VTBLK_COMMON_FEATURES)
226 #define VTBLK_LEGACY_FEATURES	(VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
227 
228 #define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
229 #define VTBLK_LOCK_INIT(_sc, _name) \
230 				mtx_init(VTBLK_MTX((_sc)), (_name), \
231 				    "VirtIO Block Lock", MTX_DEF)
232 #define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
233 #define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
234 #define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
235 #define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
236 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
237 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
238 
239 #define VTBLK_DISK_NAME		"vtbd"
240 #define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
241 #define VTBLK_BSIZE		512
242 
243 /*
244  * Each block request uses at least two segments - one for the header
245  * and one for the status.
246  */
247 #define VTBLK_MIN_SEGMENTS	2
248 
249 static device_method_t vtblk_methods[] = {
250 	/* Device methods. */
251 	DEVMETHOD(device_probe,		vtblk_probe),
252 	DEVMETHOD(device_attach,	vtblk_attach),
253 	DEVMETHOD(device_detach,	vtblk_detach),
254 	DEVMETHOD(device_suspend,	vtblk_suspend),
255 	DEVMETHOD(device_resume,	vtblk_resume),
256 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
257 
258 	/* VirtIO methods. */
259 	DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
260 	DEVMETHOD(virtio_config_change,	vtblk_config_change),
261 
262 	DEVMETHOD_END
263 };
264 
265 static driver_t vtblk_driver = {
266 	"vtblk",
267 	vtblk_methods,
268 	sizeof(struct vtblk_softc)
269 };
270 static devclass_t vtblk_devclass;
271 
272 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_devclass,
273     vtblk_modevent, 0);
274 MODULE_VERSION(virtio_blk, 1);
275 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
276 
277 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
278 
279 static int
280 vtblk_modevent(module_t mod, int type, void *unused)
281 {
282 	int error;
283 
284 	error = 0;
285 
286 	switch (type) {
287 	case MOD_LOAD:
288 	case MOD_QUIESCE:
289 	case MOD_UNLOAD:
290 	case MOD_SHUTDOWN:
291 		break;
292 	default:
293 		error = EOPNOTSUPP;
294 		break;
295 	}
296 
297 	return (error);
298 }
299 
300 static int
301 vtblk_probe(device_t dev)
302 {
303 	return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
304 }
305 
306 static int
307 vtblk_attach(device_t dev)
308 {
309 	struct vtblk_softc *sc;
310 	struct virtio_blk_config blkcfg;
311 	int error;
312 
313 	sc = device_get_softc(dev);
314 	sc->vtblk_dev = dev;
315 	virtio_set_feature_desc(dev, vtblk_feature_desc);
316 
317 	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
318 	bioq_init(&sc->vtblk_bioq);
319 	TAILQ_INIT(&sc->vtblk_dump_queue);
320 	TAILQ_INIT(&sc->vtblk_req_free);
321 	TAILQ_INIT(&sc->vtblk_req_ready);
322 
323 	vtblk_setup_sysctl(sc);
324 
325 	error = vtblk_setup_features(sc);
326 	if (error) {
327 		device_printf(dev, "cannot setup features\n");
328 		goto fail;
329 	}
330 
331 	vtblk_read_config(sc, &blkcfg);
332 
333 	/*
334 	 * With the current sglist(9) implementation, it is not easy
335 	 * for us to support a maximum segment size as adjacent
336 	 * segments are coalesced. For now, just make sure it's larger
337 	 * than the maximum supported transfer size.
338 	 */
339 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
340 		if (blkcfg.size_max < maxphys) {
341 			error = ENOTSUP;
342 			device_printf(dev, "host requires unsupported "
343 			    "maximum segment size feature\n");
344 			goto fail;
345 		}
346 	}
347 
348 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
349 	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
350 		error = EINVAL;
351 		device_printf(dev, "fewer than minimum number of segments "
352 		    "allowed: %d\n", sc->vtblk_max_nsegs);
353 		goto fail;
354 	}
355 
356 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
357 	if (sc->vtblk_sglist == NULL) {
358 		error = ENOMEM;
359 		device_printf(dev, "cannot allocate sglist\n");
360 		goto fail;
361 	}
362 
363 	error = vtblk_alloc_virtqueue(sc);
364 	if (error) {
365 		device_printf(dev, "cannot allocate virtqueue\n");
366 		goto fail;
367 	}
368 
369 	error = vtblk_request_prealloc(sc);
370 	if (error) {
371 		device_printf(dev, "cannot preallocate requests\n");
372 		goto fail;
373 	}
374 
375 	vtblk_alloc_disk(sc, &blkcfg);
376 
377 	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
378 	if (error) {
379 		device_printf(dev, "cannot setup virtqueue interrupt\n");
380 		goto fail;
381 	}
382 
383 	virtqueue_enable_intr(sc->vtblk_vq);
384 
385 fail:
386 	if (error)
387 		vtblk_detach(dev);
388 
389 	return (error);
390 }
391 
392 static int
393 vtblk_detach(device_t dev)
394 {
395 	struct vtblk_softc *sc;
396 
397 	sc = device_get_softc(dev);
398 
399 	VTBLK_LOCK(sc);
400 	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
401 	if (device_is_attached(dev))
402 		vtblk_stop(sc);
403 	VTBLK_UNLOCK(sc);
404 
405 	vtblk_drain(sc);
406 
407 	if (sc->vtblk_disk != NULL) {
408 		disk_destroy(sc->vtblk_disk);
409 		sc->vtblk_disk = NULL;
410 	}
411 
412 	if (sc->vtblk_sglist != NULL) {
413 		sglist_free(sc->vtblk_sglist);
414 		sc->vtblk_sglist = NULL;
415 	}
416 
417 	VTBLK_LOCK_DESTROY(sc);
418 
419 	return (0);
420 }
421 
422 static int
423 vtblk_suspend(device_t dev)
424 {
425 	struct vtblk_softc *sc;
426 	int error;
427 
428 	sc = device_get_softc(dev);
429 
430 	VTBLK_LOCK(sc);
431 	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
432 	/* XXX BMV: virtio_stop(), etc needed here? */
433 	error = vtblk_quiesce(sc);
434 	if (error)
435 		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
436 	VTBLK_UNLOCK(sc);
437 
438 	return (error);
439 }
440 
441 static int
442 vtblk_resume(device_t dev)
443 {
444 	struct vtblk_softc *sc;
445 
446 	sc = device_get_softc(dev);
447 
448 	VTBLK_LOCK(sc);
449 	/* XXX BMV: virtio_reinit(), etc needed here? */
450 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
451 	vtblk_startio(sc);
452 	VTBLK_UNLOCK(sc);
453 
454 	return (0);
455 }
456 
457 static int
458 vtblk_shutdown(device_t dev)
459 {
460 
461 	return (0);
462 }
463 
464 static int
465 vtblk_attach_completed(device_t dev)
466 {
467 	struct vtblk_softc *sc;
468 
469 	sc = device_get_softc(dev);
470 
471 	/*
472 	 * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
473 	 * processed after the device acknowledged
474 	 * VIRTIO_CONFIG_STATUS_DRIVER_OK.
475 	 */
476 	vtblk_create_disk(sc);
477 	return (0);
478 }
479 
480 static int
481 vtblk_config_change(device_t dev)
482 {
483 	struct vtblk_softc *sc;
484 	struct virtio_blk_config blkcfg;
485 	uint64_t capacity;
486 
487 	sc = device_get_softc(dev);
488 
489 	vtblk_read_config(sc, &blkcfg);
490 
491 	/* Capacity is always in 512-byte units. */
492 	capacity = blkcfg.capacity * VTBLK_BSIZE;
493 
494 	if (sc->vtblk_disk->d_mediasize != capacity)
495 		vtblk_resize_disk(sc, capacity);
496 
497 	return (0);
498 }
499 
500 static int
501 vtblk_open(struct disk *dp)
502 {
503 	struct vtblk_softc *sc;
504 
505 	if ((sc = dp->d_drv1) == NULL)
506 		return (ENXIO);
507 
508 	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
509 }
510 
511 static int
512 vtblk_close(struct disk *dp)
513 {
514 	struct vtblk_softc *sc;
515 
516 	if ((sc = dp->d_drv1) == NULL)
517 		return (ENXIO);
518 
519 	return (0);
520 }
521 
522 static int
523 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
524     struct thread *td)
525 {
526 	struct vtblk_softc *sc;
527 
528 	if ((sc = dp->d_drv1) == NULL)
529 		return (ENXIO);
530 
531 	return (ENOTTY);
532 }
533 
534 static int
535 vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
536     size_t length)
537 {
538 	struct disk *dp;
539 	struct vtblk_softc *sc;
540 	int error;
541 
542 	dp = arg;
543 	error = 0;
544 
545 	if ((sc = dp->d_drv1) == NULL)
546 		return (ENXIO);
547 
548 	VTBLK_LOCK(sc);
549 
550 	vtblk_dump_quiesce(sc);
551 
552 	if (length > 0)
553 		error = vtblk_dump_write(sc, virtual, offset, length);
554 	if (error || (virtual == NULL && offset == 0))
555 		vtblk_dump_complete(sc);
556 
557 	VTBLK_UNLOCK(sc);
558 
559 	return (error);
560 }
561 
562 static void
563 vtblk_strategy(struct bio *bp)
564 {
565 	struct vtblk_softc *sc;
566 
567 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
568 		vtblk_bio_done(NULL, bp, EINVAL);
569 		return;
570 	}
571 
572 	if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
573 	    (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
574 		vtblk_bio_done(sc, bp, EOPNOTSUPP);
575 		return;
576 	}
577 
578 	VTBLK_LOCK(sc);
579 
580 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
581 		VTBLK_UNLOCK(sc);
582 		vtblk_bio_done(sc, bp, ENXIO);
583 		return;
584 	}
585 
586 	bioq_insert_tail(&sc->vtblk_bioq, bp);
587 	vtblk_startio(sc);
588 
589 	VTBLK_UNLOCK(sc);
590 }
591 
592 static int
593 vtblk_negotiate_features(struct vtblk_softc *sc)
594 {
595 	device_t dev;
596 	uint64_t features;
597 
598 	dev = sc->vtblk_dev;
599 	features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
600 	    VTBLK_LEGACY_FEATURES;
601 
602 	sc->vtblk_features = virtio_negotiate_features(dev, features);
603 	return (virtio_finalize_features(dev));
604 }
605 
606 static int
607 vtblk_setup_features(struct vtblk_softc *sc)
608 {
609 	device_t dev;
610 	int error;
611 
612 	dev = sc->vtblk_dev;
613 
614 	error = vtblk_negotiate_features(sc);
615 	if (error)
616 		return (error);
617 
618 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
619 		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
620 	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
621 		sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
622 
623 	/* Legacy. */
624 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
625 		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
626 
627 	return (0);
628 }
629 
630 static int
631 vtblk_maximum_segments(struct vtblk_softc *sc,
632     struct virtio_blk_config *blkcfg)
633 {
634 	device_t dev;
635 	int nsegs;
636 
637 	dev = sc->vtblk_dev;
638 	nsegs = VTBLK_MIN_SEGMENTS;
639 
640 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
641 		nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
642 		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
643 			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
644 	} else
645 		nsegs += 1;
646 
647 	return (nsegs);
648 }
649 
650 static int
651 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
652 {
653 	device_t dev;
654 	struct vq_alloc_info vq_info;
655 
656 	dev = sc->vtblk_dev;
657 
658 	VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
659 	    vtblk_vq_intr, sc, &sc->vtblk_vq,
660 	    "%s request", device_get_nameunit(dev));
661 
662 	return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
663 }
664 
665 static void
666 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
667 {
668 	device_t dev;
669 	struct disk *dp;
670 	int error;
671 
672 	dev = sc->vtblk_dev;
673 	dp = sc->vtblk_disk;
674 
675 	dp->d_mediasize = new_capacity;
676 	if (bootverbose) {
677 		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
678 		    (uintmax_t) dp->d_mediasize >> 20,
679 		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
680 		    dp->d_sectorsize);
681 	}
682 
683 	error = disk_resize(dp, M_NOWAIT);
684 	if (error) {
685 		device_printf(dev,
686 		    "disk_resize(9) failed, error: %d\n", error);
687 	}
688 }
689 
690 static void
691 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
692 {
693 	device_t dev;
694 	struct disk *dp;
695 
696 	dev = sc->vtblk_dev;
697 
698 	sc->vtblk_disk = dp = disk_alloc();
699 	dp->d_open = vtblk_open;
700 	dp->d_close = vtblk_close;
701 	dp->d_ioctl = vtblk_ioctl;
702 	dp->d_strategy = vtblk_strategy;
703 	dp->d_name = VTBLK_DISK_NAME;
704 	dp->d_unit = device_get_unit(dev);
705 	dp->d_drv1 = sc;
706 	dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
707 	dp->d_hba_vendor = virtio_get_vendor(dev);
708 	dp->d_hba_device = virtio_get_device(dev);
709 	dp->d_hba_subvendor = virtio_get_subvendor(dev);
710 	dp->d_hba_subdevice = virtio_get_subdevice(dev);
711 
712 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
713 		dp->d_flags |= DISKFLAG_WRITE_PROTECT;
714 	else {
715 		if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
716 			dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
717 		dp->d_dump = vtblk_dump;
718 	}
719 
720 	/* Capacity is always in 512-byte units. */
721 	dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
722 
723 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
724 		dp->d_sectorsize = blkcfg->blk_size;
725 	else
726 		dp->d_sectorsize = VTBLK_BSIZE;
727 
728 	/*
729 	 * The VirtIO maximum I/O size is given in terms of segments.
730 	 * However, FreeBSD limits I/O size by logical buffer size, not
731 	 * by physically contiguous pages. Therefore, we have to assume
732 	 * no pages are contiguous. This may impose an artificially low
733 	 * maximum I/O size. But in practice, since QEMU advertises 128
734 	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
735 	 * which is typically greater than maxphys. Eventually we should
736 	 * just advertise maxphys and split buffers that are too big.
737 	 *
738 	 * Note we must subtract one additional segment in case of non
739 	 * page aligned buffers.
740 	 */
741 	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
742 	    PAGE_SIZE;
743 	if (dp->d_maxsize < PAGE_SIZE)
744 		dp->d_maxsize = PAGE_SIZE; /* XXX */
745 
746 	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
747 		dp->d_fwsectors = blkcfg->geometry.sectors;
748 		dp->d_fwheads = blkcfg->geometry.heads;
749 	}
750 
751 	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
752 	    blkcfg->topology.physical_block_exp > 0) {
753 		dp->d_stripesize = dp->d_sectorsize *
754 		    (1 << blkcfg->topology.physical_block_exp);
755 		dp->d_stripeoffset = (dp->d_stripesize -
756 		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
757 		    dp->d_stripesize;
758 	}
759 
760 	if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
761 		dp->d_flags |= DISKFLAG_CANDELETE;
762 		dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
763 	}
764 
765 	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
766 		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
767 	else
768 		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
769 }
770 
771 static void
772 vtblk_create_disk(struct vtblk_softc *sc)
773 {
774 	struct disk *dp;
775 
776 	dp = sc->vtblk_disk;
777 
778 	vtblk_ident(sc);
779 
780 	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
781 	    (uintmax_t) dp->d_mediasize >> 20,
782 	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
783 	    dp->d_sectorsize);
784 
785 	disk_create(dp, DISK_VERSION);
786 }
787 
788 static int
789 vtblk_request_prealloc(struct vtblk_softc *sc)
790 {
791 	struct vtblk_request *req;
792 	int i, nreqs;
793 
794 	nreqs = virtqueue_size(sc->vtblk_vq);
795 
796 	/*
797 	 * Preallocate sufficient requests to keep the virtqueue full. Each
798 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
799 	 * the number allocated when indirect descriptors are not available.
800 	 */
801 	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
802 		nreqs /= VTBLK_MIN_SEGMENTS;
803 
804 	for (i = 0; i < nreqs; i++) {
805 		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
806 		if (req == NULL)
807 			return (ENOMEM);
808 
809 		MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
810 		MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
811 
812 		sc->vtblk_request_count++;
813 		vtblk_request_enqueue(sc, req);
814 	}
815 
816 	return (0);
817 }
818 
819 static void
820 vtblk_request_free(struct vtblk_softc *sc)
821 {
822 	struct vtblk_request *req;
823 
824 	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
825 
826 	while ((req = vtblk_request_dequeue(sc)) != NULL) {
827 		sc->vtblk_request_count--;
828 		free(req, M_DEVBUF);
829 	}
830 
831 	KASSERT(sc->vtblk_request_count == 0,
832 	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
833 }
834 
835 static struct vtblk_request *
836 vtblk_request_dequeue(struct vtblk_softc *sc)
837 {
838 	struct vtblk_request *req;
839 
840 	req = TAILQ_FIRST(&sc->vtblk_req_free);
841 	if (req != NULL) {
842 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
843 		bzero(req, sizeof(struct vtblk_request));
844 	}
845 
846 	return (req);
847 }
848 
849 static void
850 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
851 {
852 
853 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
854 }
855 
856 static struct vtblk_request *
857 vtblk_request_next_ready(struct vtblk_softc *sc)
858 {
859 	struct vtblk_request *req;
860 
861 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
862 	if (req != NULL)
863 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
864 
865 	return (req);
866 }
867 
868 static void
869 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
870 {
871 
872 	/* NOTE: Currently, there will be at most one request in the queue. */
873 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
874 }
875 
876 static struct vtblk_request *
877 vtblk_request_next(struct vtblk_softc *sc)
878 {
879 	struct vtblk_request *req;
880 
881 	req = vtblk_request_next_ready(sc);
882 	if (req != NULL)
883 		return (req);
884 
885 	return (vtblk_request_bio(sc));
886 }
887 
888 static struct vtblk_request *
889 vtblk_request_bio(struct vtblk_softc *sc)
890 {
891 	struct bio_queue_head *bioq;
892 	struct vtblk_request *req;
893 	struct bio *bp;
894 
895 	bioq = &sc->vtblk_bioq;
896 
897 	if (bioq_first(bioq) == NULL)
898 		return (NULL);
899 
900 	req = vtblk_request_dequeue(sc);
901 	if (req == NULL)
902 		return (NULL);
903 
904 	bp = bioq_takefirst(bioq);
905 	req->vbr_bp = bp;
906 	req->vbr_ack = -1;
907 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
908 
909 	switch (bp->bio_cmd) {
910 	case BIO_FLUSH:
911 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
912 		req->vbr_hdr.sector = 0;
913 		break;
914 	case BIO_READ:
915 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
916 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
917 		break;
918 	case BIO_WRITE:
919 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
920 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
921 		break;
922 	case BIO_DELETE:
923 		req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
924 		req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
925 		break;
926 	default:
927 		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
928 	}
929 
930 	if (bp->bio_flags & BIO_ORDERED)
931 		req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
932 
933 	return (req);
934 }
935 
936 static int
937 vtblk_request_execute(struct vtblk_softc *sc, struct vtblk_request *req)
938 {
939 	struct virtqueue *vq;
940 	struct sglist *sg;
941 	struct bio *bp;
942 	int ordered, readable, writable, error;
943 
944 	vq = sc->vtblk_vq;
945 	sg = sc->vtblk_sglist;
946 	bp = req->vbr_bp;
947 	ordered = 0;
948 	writable = 0;
949 
950 	/*
951 	 * Some hosts (such as bhyve) do not implement the barrier feature,
952 	 * so we emulate it in the driver by allowing the barrier request
953 	 * to be the only one in flight.
954 	 */
955 	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
956 		if (sc->vtblk_req_ordered != NULL)
957 			return (EBUSY);
958 		if (bp->bio_flags & BIO_ORDERED) {
959 			if (!virtqueue_empty(vq))
960 				return (EBUSY);
961 			ordered = 1;
962 			req->vbr_hdr.type &= vtblk_gtoh32(sc,
963 				~VIRTIO_BLK_T_BARRIER);
964 		}
965 	}
966 
967 	sglist_reset(sg);
968 	sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
969 
970 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
971 		error = sglist_append_bio(sg, bp);
972 		if (error || sg->sg_nseg == sg->sg_maxseg) {
973 			panic("%s: bio %p data buffer too big %d",
974 			    __func__, bp, error);
975 		}
976 
977 		/* BIO_READ means the host writes into our buffer. */
978 		if (bp->bio_cmd == BIO_READ)
979 			writable = sg->sg_nseg - 1;
980 	} else if (bp->bio_cmd == BIO_DELETE) {
981 		struct virtio_blk_discard_write_zeroes *discard;
982 
983 		discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
984 		if (discard == NULL)
985 			return (ENOMEM);
986 
987 		bp->bio_driver1 = discard;
988 		discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
989 		discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
990 		error = sglist_append(sg, discard, sizeof(*discard));
991 		if (error || sg->sg_nseg == sg->sg_maxseg) {
992 			panic("%s: bio %p data buffer too big %d",
993 			    __func__, bp, error);
994 		}
995 	}
996 
997 	writable++;
998 	sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
999 	readable = sg->sg_nseg - writable;
1000 
1001 	error = virtqueue_enqueue(vq, req, sg, readable, writable);
1002 	if (error == 0 && ordered)
1003 		sc->vtblk_req_ordered = req;
1004 
1005 	return (error);
1006 }
1007 
1008 static int
1009 vtblk_request_error(struct vtblk_request *req)
1010 {
1011 	int error;
1012 
1013 	switch (req->vbr_ack) {
1014 	case VIRTIO_BLK_S_OK:
1015 		error = 0;
1016 		break;
1017 	case VIRTIO_BLK_S_UNSUPP:
1018 		error = ENOTSUP;
1019 		break;
1020 	default:
1021 		error = EIO;
1022 		break;
1023 	}
1024 
1025 	return (error);
1026 }
1027 
1028 static void
1029 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1030 {
1031 	struct vtblk_request *req;
1032 	struct bio *bp;
1033 
1034 	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1035 		if (sc->vtblk_req_ordered != NULL) {
1036 			MPASS(sc->vtblk_req_ordered == req);
1037 			sc->vtblk_req_ordered = NULL;
1038 		}
1039 
1040 		bp = req->vbr_bp;
1041 		bp->bio_error = vtblk_request_error(req);
1042 		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1043 
1044 		vtblk_request_enqueue(sc, req);
1045 	}
1046 }
1047 
1048 static void
1049 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1050 {
1051 	struct bio *bp, *tmp;
1052 
1053 	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1054 		if (bp->bio_error != 0)
1055 			disk_err(bp, "hard error", -1, 1);
1056 		vtblk_bio_done(sc, bp, bp->bio_error);
1057 	}
1058 }
1059 
1060 static void
1061 vtblk_drain_vq(struct vtblk_softc *sc)
1062 {
1063 	struct virtqueue *vq;
1064 	struct vtblk_request *req;
1065 	int last;
1066 
1067 	vq = sc->vtblk_vq;
1068 	last = 0;
1069 
1070 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1071 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1072 		vtblk_request_enqueue(sc, req);
1073 	}
1074 
1075 	sc->vtblk_req_ordered = NULL;
1076 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1077 }
1078 
1079 static void
1080 vtblk_drain(struct vtblk_softc *sc)
1081 {
1082 	struct bio_queue_head *bioq;
1083 	struct vtblk_request *req;
1084 	struct bio *bp;
1085 
1086 	bioq = &sc->vtblk_bioq;
1087 
1088 	if (sc->vtblk_vq != NULL) {
1089 		struct bio_queue queue;
1090 
1091 		TAILQ_INIT(&queue);
1092 		vtblk_queue_completed(sc, &queue);
1093 		vtblk_done_completed(sc, &queue);
1094 
1095 		vtblk_drain_vq(sc);
1096 	}
1097 
1098 	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1099 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1100 		vtblk_request_enqueue(sc, req);
1101 	}
1102 
1103 	while (bioq_first(bioq) != NULL) {
1104 		bp = bioq_takefirst(bioq);
1105 		vtblk_bio_done(sc, bp, ENXIO);
1106 	}
1107 
1108 	vtblk_request_free(sc);
1109 }
1110 
1111 static void
1112 vtblk_startio(struct vtblk_softc *sc)
1113 {
1114 	struct virtqueue *vq;
1115 	struct vtblk_request *req;
1116 	int enq;
1117 
1118 	VTBLK_LOCK_ASSERT(sc);
1119 	vq = sc->vtblk_vq;
1120 	enq = 0;
1121 
1122 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1123 		return;
1124 
1125 	while (!virtqueue_full(vq)) {
1126 		req = vtblk_request_next(sc);
1127 		if (req == NULL)
1128 			break;
1129 
1130 		if (vtblk_request_execute(sc, req) != 0) {
1131 			vtblk_request_requeue_ready(sc, req);
1132 			break;
1133 		}
1134 
1135 		enq++;
1136 	}
1137 
1138 	if (enq > 0)
1139 		virtqueue_notify(vq);
1140 }
1141 
1142 static void
1143 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1144 {
1145 
1146 	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1147 	if (sc != NULL)
1148 		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1149 
1150 	if (error) {
1151 		bp->bio_resid = bp->bio_bcount;
1152 		bp->bio_error = error;
1153 		bp->bio_flags |= BIO_ERROR;
1154 	}
1155 
1156 	if (bp->bio_driver1 != NULL) {
1157 		free(bp->bio_driver1, M_DEVBUF);
1158 		bp->bio_driver1 = NULL;
1159 	}
1160 
1161 	biodone(bp);
1162 }
1163 
1164 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1165 	if (virtio_with_feature(_dev, _feature)) {			\
1166 		virtio_read_device_config(_dev,				\
1167 		    offsetof(struct virtio_blk_config, _field),		\
1168 		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1169 	}
1170 
1171 static void
1172 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1173 {
1174 	device_t dev;
1175 
1176 	dev = sc->vtblk_dev;
1177 
1178 	bzero(blkcfg, sizeof(struct virtio_blk_config));
1179 
1180 	/* The capacity is always available. */
1181 	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1182 	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1183 
1184 	/* Read the configuration if the feature was negotiated. */
1185 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1186 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1187 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1188 	    geometry.cylinders, blkcfg);
1189 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1190 	    geometry.heads, blkcfg);
1191 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1192 	    geometry.sectors, blkcfg);
1193 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1194 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1195 	    topology.physical_block_exp, blkcfg);
1196 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1197 	    topology.alignment_offset, blkcfg);
1198 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1199 	    topology.min_io_size, blkcfg);
1200 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1201 	    topology.opt_io_size, blkcfg);
1202 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1203 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1204 	    blkcfg);
1205 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1206 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1207 	    blkcfg);
1208 }
1209 
1210 #undef VTBLK_GET_CONFIG
1211 
1212 static void
1213 vtblk_ident(struct vtblk_softc *sc)
1214 {
1215 	struct bio buf;
1216 	struct disk *dp;
1217 	struct vtblk_request *req;
1218 	int len, error;
1219 
1220 	dp = sc->vtblk_disk;
1221 	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1222 
1223 	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1224 		return;
1225 
1226 	req = vtblk_request_dequeue(sc);
1227 	if (req == NULL)
1228 		return;
1229 
1230 	req->vbr_ack = -1;
1231 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1232 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1233 	req->vbr_hdr.sector = 0;
1234 
1235 	req->vbr_bp = &buf;
1236 	g_reset_bio(&buf);
1237 
1238 	buf.bio_cmd = BIO_READ;
1239 	buf.bio_data = dp->d_ident;
1240 	buf.bio_bcount = len;
1241 
1242 	VTBLK_LOCK(sc);
1243 	error = vtblk_poll_request(sc, req);
1244 	VTBLK_UNLOCK(sc);
1245 
1246 	vtblk_request_enqueue(sc, req);
1247 
1248 	if (error) {
1249 		device_printf(sc->vtblk_dev,
1250 		    "error getting device identifier: %d\n", error);
1251 	}
1252 }
1253 
1254 static int
1255 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1256 {
1257 	struct virtqueue *vq;
1258 	int error;
1259 
1260 	vq = sc->vtblk_vq;
1261 
1262 	if (!virtqueue_empty(vq))
1263 		return (EBUSY);
1264 
1265 	error = vtblk_request_execute(sc, req);
1266 	if (error)
1267 		return (error);
1268 
1269 	virtqueue_notify(vq);
1270 	virtqueue_poll(vq, NULL);
1271 
1272 	error = vtblk_request_error(req);
1273 	if (error && bootverbose) {
1274 		device_printf(sc->vtblk_dev,
1275 		    "%s: IO error: %d\n", __func__, error);
1276 	}
1277 
1278 	return (error);
1279 }
1280 
1281 static int
1282 vtblk_quiesce(struct vtblk_softc *sc)
1283 {
1284 	int error;
1285 
1286 	VTBLK_LOCK_ASSERT(sc);
1287 	error = 0;
1288 
1289 	while (!virtqueue_empty(sc->vtblk_vq)) {
1290 		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1291 		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1292 			error = EBUSY;
1293 			break;
1294 		}
1295 	}
1296 
1297 	return (error);
1298 }
1299 
1300 static void
1301 vtblk_vq_intr(void *xsc)
1302 {
1303 	struct vtblk_softc *sc;
1304 	struct virtqueue *vq;
1305 	struct bio_queue queue;
1306 
1307 	sc = xsc;
1308 	vq = sc->vtblk_vq;
1309 	TAILQ_INIT(&queue);
1310 
1311 	VTBLK_LOCK(sc);
1312 
1313 again:
1314 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1315 		goto out;
1316 
1317 	vtblk_queue_completed(sc, &queue);
1318 	vtblk_startio(sc);
1319 
1320 	if (virtqueue_enable_intr(vq) != 0) {
1321 		virtqueue_disable_intr(vq);
1322 		goto again;
1323 	}
1324 
1325 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1326 		wakeup(&sc->vtblk_vq);
1327 
1328 out:
1329 	VTBLK_UNLOCK(sc);
1330 	vtblk_done_completed(sc, &queue);
1331 }
1332 
1333 static void
1334 vtblk_stop(struct vtblk_softc *sc)
1335 {
1336 
1337 	virtqueue_disable_intr(sc->vtblk_vq);
1338 	virtio_stop(sc->vtblk_dev);
1339 }
1340 
1341 static void
1342 vtblk_dump_quiesce(struct vtblk_softc *sc)
1343 {
1344 
1345 	/*
1346 	 * Spin here until all the requests in-flight at the time of the
1347 	 * dump are completed and queued. The queued requests will be
1348 	 * biodone'd once the dump is finished.
1349 	 */
1350 	while (!virtqueue_empty(sc->vtblk_vq))
1351 		vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1352 }
1353 
1354 static int
1355 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1356     size_t length)
1357 {
1358 	struct bio buf;
1359 	struct vtblk_request *req;
1360 
1361 	req = &sc->vtblk_dump_request;
1362 	req->vbr_ack = -1;
1363 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1364 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1365 	req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1366 
1367 	req->vbr_bp = &buf;
1368 	g_reset_bio(&buf);
1369 
1370 	buf.bio_cmd = BIO_WRITE;
1371 	buf.bio_data = virtual;
1372 	buf.bio_bcount = length;
1373 
1374 	return (vtblk_poll_request(sc, req));
1375 }
1376 
1377 static int
1378 vtblk_dump_flush(struct vtblk_softc *sc)
1379 {
1380 	struct bio buf;
1381 	struct vtblk_request *req;
1382 
1383 	req = &sc->vtblk_dump_request;
1384 	req->vbr_ack = -1;
1385 	req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1386 	req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1387 	req->vbr_hdr.sector = 0;
1388 
1389 	req->vbr_bp = &buf;
1390 	g_reset_bio(&buf);
1391 
1392 	buf.bio_cmd = BIO_FLUSH;
1393 
1394 	return (vtblk_poll_request(sc, req));
1395 }
1396 
1397 static void
1398 vtblk_dump_complete(struct vtblk_softc *sc)
1399 {
1400 
1401 	vtblk_dump_flush(sc);
1402 
1403 	VTBLK_UNLOCK(sc);
1404 	vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1405 	VTBLK_LOCK(sc);
1406 }
1407 
1408 static void
1409 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1410 {
1411 
1412 	/* Set either writeback (1) or writethrough (0) mode. */
1413 	virtio_write_dev_config_1(sc->vtblk_dev,
1414 	    offsetof(struct virtio_blk_config, wce), wc);
1415 }
1416 
1417 static int
1418 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1419     struct virtio_blk_config *blkcfg)
1420 {
1421 	int wc;
1422 
1423 	if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1424 		wc = vtblk_tunable_int(sc, "writecache_mode",
1425 		    vtblk_writecache_mode);
1426 		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1427 			vtblk_set_write_cache(sc, wc);
1428 		else
1429 			wc = blkcfg->wce;
1430 	} else
1431 		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1432 
1433 	return (wc);
1434 }
1435 
1436 static int
1437 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1438 {
1439 	struct vtblk_softc *sc;
1440 	int wc, error;
1441 
1442 	sc = oidp->oid_arg1;
1443 	wc = sc->vtblk_write_cache;
1444 
1445 	error = sysctl_handle_int(oidp, &wc, 0, req);
1446 	if (error || req->newptr == NULL)
1447 		return (error);
1448 	if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1449 		return (EPERM);
1450 	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1451 		return (EINVAL);
1452 
1453 	VTBLK_LOCK(sc);
1454 	sc->vtblk_write_cache = wc;
1455 	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1456 	VTBLK_UNLOCK(sc);
1457 
1458 	return (0);
1459 }
1460 
1461 static void
1462 vtblk_setup_sysctl(struct vtblk_softc *sc)
1463 {
1464 	device_t dev;
1465 	struct sysctl_ctx_list *ctx;
1466 	struct sysctl_oid *tree;
1467 	struct sysctl_oid_list *child;
1468 
1469 	dev = sc->vtblk_dev;
1470 	ctx = device_get_sysctl_ctx(dev);
1471 	tree = device_get_sysctl_tree(dev);
1472 	child = SYSCTL_CHILDREN(tree);
1473 
1474 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1475 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1476 	    vtblk_write_cache_sysctl, "I",
1477 	    "Write cache mode (writethrough (0) or writeback (1))");
1478 }
1479 
1480 static int
1481 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1482 {
1483 	char path[64];
1484 
1485 	snprintf(path, sizeof(path),
1486 	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1487 	TUNABLE_INT_FETCH(path, &def);
1488 
1489 	return (def);
1490 }
1491