xref: /freebsd/sys/dev/virtio/block/virtio_blk.c (revision f54209510b1b30b1445792db7d33401f7c7a97d6)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /* Driver for VirtIO block devices. */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bio.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/msan.h>
38 #include <sys/sglist.h>
39 #include <sys/sysctl.h>
40 #include <sys/lock.h>
41 #include <sys/mutex.h>
42 #include <sys/queue.h>
43 
44 #include <geom/geom.h>
45 #include <geom/geom_disk.h>
46 
47 #include <machine/bus.h>
48 #include <machine/resource.h>
49 #include <sys/bus.h>
50 #include <sys/rman.h>
51 
52 #include <dev/virtio/virtio.h>
53 #include <dev/virtio/virtqueue.h>
54 #include <dev/virtio/block/virtio_blk.h>
55 
56 #include "virtio_if.h"
57 
58 struct vtblk_request {
59 	struct vtblk_softc		*vbr_sc;
60 	bus_dmamap_t			 vbr_mapp;
61 
62 	struct virtio_blk_outhdr	*vbr_hdr;
63 	vm_paddr_t			 vbr_hdr_paddr;
64 	bus_dmamap_t			 vbr_hdr_mapp;
65 	uint8_t				*vbr_ack;
66 	vm_paddr_t			 vbr_ack_paddr;
67 	bus_dmamap_t			 vbr_ack_mapp;
68 
69 	/* Fields after this point are zeroed for each request. */
70 	struct bio			*vbr_bp;
71 	uint8_t				 vbr_requeue_on_error;
72 	uint8_t				 vbr_busdma_wait;
73 	int				 vbr_error;
74 	TAILQ_ENTRY(vtblk_request)	 vbr_link;
75 };
76 
77 enum vtblk_cache_mode {
78 	VTBLK_CACHE_WRITETHROUGH,
79 	VTBLK_CACHE_WRITEBACK,
80 	VTBLK_CACHE_MAX
81 };
82 
83 struct vtblk_softc {
84 	device_t		 vtblk_dev;
85 	struct mtx		 vtblk_mtx;
86 	struct mtx		 vtblk_hdr_mtx;
87 	struct mtx		 vtblk_ack_mtx;
88 	uint64_t		 vtblk_features;
89 	uint32_t		 vtblk_flags;
90 #define VTBLK_FLAG_INDIRECT	0x0001
91 #define VTBLK_FLAG_DETACH	0x0002
92 #define VTBLK_FLAG_SUSPEND	0x0004
93 #define VTBLK_FLAG_BARRIER	0x0008
94 #define VTBLK_FLAG_WCE_CONFIG	0x0010
95 #define VTBLK_FLAG_BUSDMA_WAIT	0x0020
96 #define VTBLK_FLAG_BUSDMA_ALIGN	0x0040
97 
98 	struct virtqueue	*vtblk_vq;
99 	struct sglist		*vtblk_sglist;
100 	bus_dma_tag_t		 vtblk_dmat;
101 	bus_dma_tag_t		 vtblk_hdr_dmat;
102 	bus_dma_tag_t		 vtblk_ack_dmat;
103 	struct disk		*vtblk_disk;
104 
105 	struct bio_queue_head	 vtblk_bioq;
106 	TAILQ_HEAD(, vtblk_request)
107 				 vtblk_req_free;
108 	TAILQ_HEAD(, vtblk_request)
109 				 vtblk_req_ready;
110 	struct vtblk_request	*vtblk_req_ordered;
111 
112 	int			 vtblk_max_nsegs;
113 	int			 vtblk_request_count;
114 	enum vtblk_cache_mode	 vtblk_write_cache;
115 
116 	struct bio_queue	 vtblk_dump_queue;
117 	struct vtblk_request	 vtblk_dump_request;
118 };
119 
120 static struct virtio_feature_desc vtblk_feature_desc[] = {
121 	{ VIRTIO_BLK_F_BARRIER,		"HostBarrier"	},
122 	{ VIRTIO_BLK_F_SIZE_MAX,	"MaxSegSize"	},
123 	{ VIRTIO_BLK_F_SEG_MAX,		"MaxNumSegs"	},
124 	{ VIRTIO_BLK_F_GEOMETRY,	"DiskGeometry"	},
125 	{ VIRTIO_BLK_F_RO,		"ReadOnly"	},
126 	{ VIRTIO_BLK_F_BLK_SIZE,	"BlockSize"	},
127 	{ VIRTIO_BLK_F_SCSI,		"SCSICmds"	},
128 	{ VIRTIO_BLK_F_FLUSH,		"FlushCmd"	},
129 	{ VIRTIO_BLK_F_TOPOLOGY,	"Topology"	},
130 	{ VIRTIO_BLK_F_CONFIG_WCE,	"ConfigWCE"	},
131 	{ VIRTIO_BLK_F_MQ,		"Multiqueue"	},
132 	{ VIRTIO_BLK_F_DISCARD,		"Discard"	},
133 	{ VIRTIO_BLK_F_WRITE_ZEROES,	"WriteZeros"	},
134 
135 	{ 0, NULL }
136 };
137 
138 static int	vtblk_modevent(module_t, int, void *);
139 
140 static int	vtblk_probe(device_t);
141 static int	vtblk_attach(device_t);
142 static int	vtblk_detach(device_t);
143 static int	vtblk_suspend(device_t);
144 static int	vtblk_resume(device_t);
145 static int	vtblk_shutdown(device_t);
146 static int	vtblk_attach_completed(device_t);
147 static int	vtblk_config_change(device_t);
148 
149 static int	vtblk_open(struct disk *);
150 static int	vtblk_close(struct disk *);
151 static int	vtblk_ioctl(struct disk *, u_long, void *, int,
152 		    struct thread *);
153 static int	vtblk_dump(void *, void *, off_t, size_t);
154 static void	vtblk_strategy(struct bio *);
155 
156 static int	vtblk_negotiate_features(struct vtblk_softc *);
157 static int	vtblk_setup_features(struct vtblk_softc *);
158 static int	vtblk_maximum_segments(struct vtblk_softc *,
159 		    struct virtio_blk_config *);
160 static int	vtblk_alloc_virtqueue(struct vtblk_softc *);
161 static void	vtblk_resize_disk(struct vtblk_softc *, uint64_t);
162 static void	vtblk_alloc_disk(struct vtblk_softc *,
163 		    struct virtio_blk_config *);
164 static void	vtblk_create_disk(struct vtblk_softc *);
165 
166 static int	vtblk_request_prealloc(struct vtblk_softc *);
167 static void	vtblk_request_free(struct vtblk_softc *);
168 static struct vtblk_request *
169 		vtblk_request_dequeue(struct vtblk_softc *);
170 static void	vtblk_request_enqueue(struct vtblk_softc *,
171 		    struct vtblk_request *);
172 static struct vtblk_request *
173 		vtblk_request_next_ready(struct vtblk_softc *);
174 static void	vtblk_request_requeue_ready(struct vtblk_softc *,
175 		    struct vtblk_request *);
176 static struct vtblk_request *
177 		vtblk_request_next(struct vtblk_softc *);
178 static struct vtblk_request *
179 		vtblk_request_bio(struct vtblk_softc *);
180 static int	vtblk_request_execute(struct vtblk_request *, int);
181 static void	vtblk_request_execute_cb(void *,
182 		    bus_dma_segment_t *, int, int);
183 static int	vtblk_request_error(struct vtblk_request *);
184 
185 static void	vtblk_queue_completed(struct vtblk_softc *,
186 		    struct bio_queue *);
187 static void	vtblk_done_completed(struct vtblk_softc *,
188 		    struct bio_queue *);
189 static void	vtblk_drain_vq(struct vtblk_softc *);
190 static void	vtblk_drain(struct vtblk_softc *);
191 
192 static void	vtblk_startio(struct vtblk_softc *);
193 static void	vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
194 
195 static void	vtblk_read_config(struct vtblk_softc *,
196 		    struct virtio_blk_config *);
197 static void	vtblk_ident(struct vtblk_softc *);
198 static int	vtblk_poll_request(struct vtblk_softc *,
199 		    struct vtblk_request *);
200 static int	vtblk_quiesce(struct vtblk_softc *);
201 static void	vtblk_vq_intr(void *);
202 static void	vtblk_stop(struct vtblk_softc *);
203 
204 static void	vtblk_dump_quiesce(struct vtblk_softc *);
205 static int	vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
206 static int	vtblk_dump_flush(struct vtblk_softc *);
207 static void	vtblk_dump_complete(struct vtblk_softc *);
208 
209 static void	vtblk_set_write_cache(struct vtblk_softc *, int);
210 static int	vtblk_write_cache_enabled(struct vtblk_softc *sc,
211 		    struct virtio_blk_config *);
212 static int	vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
213 
214 static void	vtblk_setup_sysctl(struct vtblk_softc *);
215 static int	vtblk_tunable_int(struct vtblk_softc *, const char *, int);
216 
217 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
218 #define vtblk_htog16(_sc, _val)	virtio_htog16(vtblk_modern(_sc), _val)
219 #define vtblk_htog32(_sc, _val)	virtio_htog32(vtblk_modern(_sc), _val)
220 #define vtblk_htog64(_sc, _val)	virtio_htog64(vtblk_modern(_sc), _val)
221 #define vtblk_gtoh16(_sc, _val)	virtio_gtoh16(vtblk_modern(_sc), _val)
222 #define vtblk_gtoh32(_sc, _val)	virtio_gtoh32(vtblk_modern(_sc), _val)
223 #define vtblk_gtoh64(_sc, _val)	virtio_gtoh64(vtblk_modern(_sc), _val)
224 
225 /* Tunables. */
226 static int vtblk_no_ident = 0;
227 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
228 static int vtblk_writecache_mode = -1;
229 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
230 
231 #define VTBLK_COMMON_FEATURES \
232     (VIRTIO_BLK_F_SIZE_MAX		| \
233      VIRTIO_BLK_F_SEG_MAX		| \
234      VIRTIO_BLK_F_GEOMETRY		| \
235      VIRTIO_BLK_F_RO			| \
236      VIRTIO_BLK_F_BLK_SIZE		| \
237      VIRTIO_BLK_F_FLUSH			| \
238      VIRTIO_BLK_F_TOPOLOGY		| \
239      VIRTIO_BLK_F_CONFIG_WCE		| \
240      VIRTIO_BLK_F_DISCARD		| \
241      VIRTIO_RING_F_INDIRECT_DESC)
242 
243 #define VTBLK_MODERN_FEATURES	(VTBLK_COMMON_FEATURES)
244 #define VTBLK_LEGACY_FEATURES	(VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
245 
246 #define VTBLK_MTX(_sc)		&(_sc)->vtblk_mtx
247 #define VTBLK_LOCK_INIT(_sc, _name) \
248 				mtx_init(VTBLK_MTX((_sc)), (_name), \
249 				    "VirtIO Block Lock", MTX_DEF)
250 #define VTBLK_LOCK(_sc)		mtx_lock(VTBLK_MTX((_sc)))
251 #define VTBLK_UNLOCK(_sc)	mtx_unlock(VTBLK_MTX((_sc)))
252 #define VTBLK_LOCK_DESTROY(_sc)	mtx_destroy(VTBLK_MTX((_sc)))
253 #define VTBLK_LOCK_ASSERT(_sc)	mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
254 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
255 				mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
256 
257 #define VTBLK_DISK_NAME		"vtbd"
258 #define VTBLK_QUIESCE_TIMEOUT	(30 * hz)
259 #define VTBLK_BSIZE		512
260 
261 /*
262  * Each block request uses at least two segments - one for the header
263  * and one for the status.
264  */
265 #define VTBLK_MIN_SEGMENTS	2
266 
267 static device_method_t vtblk_methods[] = {
268 	/* Device methods. */
269 	DEVMETHOD(device_probe,		vtblk_probe),
270 	DEVMETHOD(device_attach,	vtblk_attach),
271 	DEVMETHOD(device_detach,	vtblk_detach),
272 	DEVMETHOD(device_suspend,	vtblk_suspend),
273 	DEVMETHOD(device_resume,	vtblk_resume),
274 	DEVMETHOD(device_shutdown,	vtblk_shutdown),
275 
276 	/* VirtIO methods. */
277 	DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
278 	DEVMETHOD(virtio_config_change,	vtblk_config_change),
279 
280 	DEVMETHOD_END
281 };
282 
283 static driver_t vtblk_driver = {
284 	"vtblk",
285 	vtblk_methods,
286 	sizeof(struct vtblk_softc)
287 };
288 
289 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_modevent, NULL);
290 MODULE_VERSION(virtio_blk, 1);
291 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
292 
293 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
294 
295 static int
vtblk_modevent(module_t mod,int type,void * unused)296 vtblk_modevent(module_t mod, int type, void *unused)
297 {
298 	int error;
299 
300 	error = 0;
301 
302 	switch (type) {
303 	case MOD_LOAD:
304 	case MOD_QUIESCE:
305 	case MOD_UNLOAD:
306 	case MOD_SHUTDOWN:
307 		break;
308 	default:
309 		error = EOPNOTSUPP;
310 		break;
311 	}
312 
313 	return (error);
314 }
315 
316 static int
vtblk_probe(device_t dev)317 vtblk_probe(device_t dev)
318 {
319 	return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
320 }
321 
322 static int
vtblk_attach(device_t dev)323 vtblk_attach(device_t dev)
324 {
325 	struct vtblk_softc *sc;
326 	struct virtio_blk_config blkcfg;
327 	int error;
328 
329 	sc = device_get_softc(dev);
330 	sc->vtblk_dev = dev;
331 	virtio_set_feature_desc(dev, vtblk_feature_desc);
332 
333 	VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
334 	bioq_init(&sc->vtblk_bioq);
335 	TAILQ_INIT(&sc->vtblk_dump_queue);
336 	TAILQ_INIT(&sc->vtblk_req_free);
337 	TAILQ_INIT(&sc->vtblk_req_ready);
338 
339 	vtblk_setup_sysctl(sc);
340 
341 	error = vtblk_setup_features(sc);
342 	if (error) {
343 		device_printf(dev, "cannot setup features\n");
344 		goto fail;
345 	}
346 
347 	vtblk_read_config(sc, &blkcfg);
348 
349 	/*
350 	 * With the current sglist(9) implementation, it is not easy
351 	 * for us to support a maximum segment size as adjacent
352 	 * segments are coalesced. For now, just make sure it's larger
353 	 * than the maximum supported transfer size.
354 	 */
355 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
356 		if (blkcfg.size_max < maxphys) {
357 			error = ENOTSUP;
358 			device_printf(dev, "host requires unsupported "
359 			    "maximum segment size feature\n");
360 			goto fail;
361 		}
362 	}
363 
364 	sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
365 	if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
366 		error = EINVAL;
367 		device_printf(dev, "fewer than minimum number of segments "
368 		    "allowed: %d\n", sc->vtblk_max_nsegs);
369 		goto fail;
370 	}
371 
372 	sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
373 	if (sc->vtblk_sglist == NULL) {
374 		error = ENOMEM;
375 		device_printf(dev, "cannot allocate sglist\n");
376 		goto fail;
377 	}
378 
379 	/*
380 	 * If vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1, the device only
381 	 * supports a single data segment; in that case we need busdma to
382 	 * align to a page boundary so we can send a *contiguous* page size
383 	 * request to the host.
384 	 */
385 	if (sc->vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1)
386 		sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_ALIGN;
387 	error = bus_dma_tag_create(
388 	    bus_get_dma_tag(dev),			/* parent */
389 	    (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE : 1,
390 	    0,						/* boundary */
391 	    BUS_SPACE_MAXADDR,				/* lowaddr */
392 	    BUS_SPACE_MAXADDR,				/* highaddr */
393 	    NULL, NULL,					/* filter, filterarg */
394 	    maxphys,					/* max request size */
395 	    sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS,	/* max # segments */
396 	    maxphys,					/* maxsegsize */
397 	    BUS_DMA_COHERENT,				/* flags */
398 	    busdma_lock_mutex,				/* lockfunc */
399 	    &sc->vtblk_mtx,				/* lockarg */
400 	    &sc->vtblk_dmat);
401 	if (error) {
402 		device_printf(dev, "cannot create bus dma tag\n");
403 		goto fail;
404 	}
405 
406 	error = bus_dma_tag_create(
407 	    bus_get_dma_tag(dev),			/* parent */
408 	    (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE :
409 	        sizeof(struct virtio_blk_outhdr),	/* alignment */
410 	    0,						/* boundary */
411 	    BUS_SPACE_MAXADDR,				/* lowaddr */
412 	    BUS_SPACE_MAXADDR,				/* highaddr */
413 	    NULL, NULL,					/* filter, filterarg */
414 	    sizeof(struct virtio_blk_outhdr),		/* max request size */
415 	    1,						/* max # segments */
416 	    sizeof(struct virtio_blk_outhdr),		/* maxsegsize */
417 	    BUS_DMA_COHERENT,				/* flags */
418 	    busdma_lock_mutex,				/* lockfunc */
419 	    &sc->vtblk_hdr_mtx,				/* lockarg */
420 	    &sc->vtblk_hdr_dmat);
421 	if (error) {
422 		device_printf(dev, "cannot create hdr bus dma tag\n");
423 		goto fail;
424 	}
425 
426 	error = bus_dma_tag_create(
427 	    bus_get_dma_tag(dev),			/* parent */
428 	    (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE :
429 	        sizeof(uint8_t),			/* alignment */
430 	    0,						/* boundary */
431 	    BUS_SPACE_MAXADDR,				/* lowaddr */
432 	    BUS_SPACE_MAXADDR,				/* highaddr */
433 	    NULL, NULL,					/* filter, filterarg */
434 	    sizeof(uint8_t),				/* max request size */
435 	    1,						/* max # segments */
436 	    sizeof(uint8_t),				/* maxsegsize */
437 	    BUS_DMA_COHERENT,				/* flags */
438 	    busdma_lock_mutex,				/* lockfunc */
439 	    &sc->vtblk_ack_mtx,				/* lockarg */
440 	    &sc->vtblk_ack_dmat);
441 	if (error) {
442 		device_printf(dev, "cannot create ack bus dma tag\n");
443 		goto fail;
444 	}
445 
446 #ifdef __powerpc__
447 	/*
448 	 * Virtio uses physical addresses rather than bus addresses, so we
449 	 * need to ask busdma to skip the iommu physical->bus mapping.  At
450 	 * present, this is only a thing on the powerpc architectures.
451 	 */
452 	bus_dma_tag_set_iommu(sc->vtblk_dmat, NULL, NULL);
453 	bus_dma_tag_set_iommu(sc->vtblk_hdr_dmat, NULL, NULL);
454 	bus_dma_tag_set_iommu(sc->vtblk_ack_dmat, NULL, NULL);
455 #endif
456 
457 	error = vtblk_alloc_virtqueue(sc);
458 	if (error) {
459 		device_printf(dev, "cannot allocate virtqueue\n");
460 		goto fail;
461 	}
462 
463 	error = vtblk_request_prealloc(sc);
464 	if (error) {
465 		device_printf(dev, "cannot preallocate requests\n");
466 		goto fail;
467 	}
468 
469 	vtblk_alloc_disk(sc, &blkcfg);
470 
471 	error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
472 	if (error) {
473 		device_printf(dev, "cannot setup virtqueue interrupt\n");
474 		goto fail;
475 	}
476 
477 	virtqueue_enable_intr(sc->vtblk_vq);
478 
479 fail:
480 	if (error)
481 		vtblk_detach(dev);
482 
483 	return (error);
484 }
485 
486 static int
vtblk_detach(device_t dev)487 vtblk_detach(device_t dev)
488 {
489 	struct vtblk_softc *sc;
490 
491 	sc = device_get_softc(dev);
492 
493 	VTBLK_LOCK(sc);
494 	sc->vtblk_flags |= VTBLK_FLAG_DETACH;
495 	if (device_is_attached(dev))
496 		vtblk_stop(sc);
497 	VTBLK_UNLOCK(sc);
498 
499 	vtblk_drain(sc);
500 
501 	if (sc->vtblk_disk != NULL) {
502 		disk_destroy(sc->vtblk_disk);
503 		sc->vtblk_disk = NULL;
504 	}
505 
506 	if (sc->vtblk_ack_dmat != NULL) {
507 		bus_dma_tag_destroy(sc->vtblk_ack_dmat);
508 		sc->vtblk_ack_dmat = NULL;
509 	}
510 
511 	if (sc->vtblk_hdr_dmat != NULL) {
512 		bus_dma_tag_destroy(sc->vtblk_hdr_dmat);
513 		sc->vtblk_hdr_dmat = NULL;
514 	}
515 
516 	if (sc->vtblk_dmat != NULL) {
517 		bus_dma_tag_destroy(sc->vtblk_dmat);
518 		sc->vtblk_dmat = NULL;
519 	}
520 
521 	if (sc->vtblk_sglist != NULL) {
522 		sglist_free(sc->vtblk_sglist);
523 		sc->vtblk_sglist = NULL;
524 	}
525 
526 	VTBLK_LOCK_DESTROY(sc);
527 
528 	return (0);
529 }
530 
531 static int
vtblk_suspend(device_t dev)532 vtblk_suspend(device_t dev)
533 {
534 	struct vtblk_softc *sc;
535 	int error;
536 
537 	sc = device_get_softc(dev);
538 
539 	VTBLK_LOCK(sc);
540 	sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
541 	/* XXX BMV: virtio_stop(), etc needed here? */
542 	error = vtblk_quiesce(sc);
543 	if (error)
544 		sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
545 	VTBLK_UNLOCK(sc);
546 
547 	return (error);
548 }
549 
550 static int
vtblk_resume(device_t dev)551 vtblk_resume(device_t dev)
552 {
553 	struct vtblk_softc *sc;
554 
555 	sc = device_get_softc(dev);
556 
557 	VTBLK_LOCK(sc);
558 	/* XXX BMV: virtio_reinit(), etc needed here? */
559 	sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
560 	vtblk_startio(sc);
561 	VTBLK_UNLOCK(sc);
562 
563 	return (0);
564 }
565 
566 static int
vtblk_shutdown(device_t dev)567 vtblk_shutdown(device_t dev)
568 {
569 
570 	return (0);
571 }
572 
573 static int
vtblk_attach_completed(device_t dev)574 vtblk_attach_completed(device_t dev)
575 {
576 	struct vtblk_softc *sc;
577 
578 	sc = device_get_softc(dev);
579 
580 	/*
581 	 * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
582 	 * processed after the device acknowledged
583 	 * VIRTIO_CONFIG_STATUS_DRIVER_OK.
584 	 */
585 	vtblk_create_disk(sc);
586 	return (0);
587 }
588 
589 static int
vtblk_config_change(device_t dev)590 vtblk_config_change(device_t dev)
591 {
592 	struct vtblk_softc *sc;
593 	struct virtio_blk_config blkcfg;
594 	uint64_t capacity;
595 
596 	sc = device_get_softc(dev);
597 
598 	vtblk_read_config(sc, &blkcfg);
599 
600 	/* Capacity is always in 512-byte units. */
601 	capacity = blkcfg.capacity * VTBLK_BSIZE;
602 
603 	if (sc->vtblk_disk->d_mediasize != capacity)
604 		vtblk_resize_disk(sc, capacity);
605 
606 	return (0);
607 }
608 
609 static int
vtblk_open(struct disk * dp)610 vtblk_open(struct disk *dp)
611 {
612 	struct vtblk_softc *sc;
613 
614 	if ((sc = dp->d_drv1) == NULL)
615 		return (ENXIO);
616 
617 	return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
618 }
619 
620 static int
vtblk_close(struct disk * dp)621 vtblk_close(struct disk *dp)
622 {
623 	struct vtblk_softc *sc;
624 
625 	if ((sc = dp->d_drv1) == NULL)
626 		return (ENXIO);
627 
628 	return (0);
629 }
630 
631 static int
vtblk_ioctl(struct disk * dp,u_long cmd,void * addr,int flag,struct thread * td)632 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
633     struct thread *td)
634 {
635 	struct vtblk_softc *sc;
636 
637 	if ((sc = dp->d_drv1) == NULL)
638 		return (ENXIO);
639 
640 	return (ENOTTY);
641 }
642 
643 static int
vtblk_dump(void * arg,void * virtual,off_t offset,size_t length)644 vtblk_dump(void *arg, void *virtual, off_t offset, size_t length)
645 {
646 	struct disk *dp;
647 	struct vtblk_softc *sc;
648 	int error;
649 
650 	dp = arg;
651 	error = 0;
652 
653 	if ((sc = dp->d_drv1) == NULL)
654 		return (ENXIO);
655 
656 	VTBLK_LOCK(sc);
657 
658 	vtblk_dump_quiesce(sc);
659 
660 	if (length > 0)
661 		error = vtblk_dump_write(sc, virtual, offset, length);
662 	if (error || (virtual == NULL && offset == 0))
663 		vtblk_dump_complete(sc);
664 
665 	VTBLK_UNLOCK(sc);
666 
667 	return (error);
668 }
669 
670 static void
vtblk_strategy(struct bio * bp)671 vtblk_strategy(struct bio *bp)
672 {
673 	struct vtblk_softc *sc;
674 
675 	if ((sc = bp->bio_disk->d_drv1) == NULL) {
676 		vtblk_bio_done(NULL, bp, EINVAL);
677 		return;
678 	}
679 
680 	if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
681 	    (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
682 		vtblk_bio_done(sc, bp, EOPNOTSUPP);
683 		return;
684 	}
685 
686 	VTBLK_LOCK(sc);
687 
688 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
689 		VTBLK_UNLOCK(sc);
690 		vtblk_bio_done(sc, bp, ENXIO);
691 		return;
692 	}
693 
694 	bioq_insert_tail(&sc->vtblk_bioq, bp);
695 	vtblk_startio(sc);
696 
697 	VTBLK_UNLOCK(sc);
698 }
699 
700 static int
vtblk_negotiate_features(struct vtblk_softc * sc)701 vtblk_negotiate_features(struct vtblk_softc *sc)
702 {
703 	device_t dev;
704 	uint64_t features;
705 
706 	dev = sc->vtblk_dev;
707 	features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
708 	    VTBLK_LEGACY_FEATURES;
709 
710 	sc->vtblk_features = virtio_negotiate_features(dev, features);
711 	return (virtio_finalize_features(dev));
712 }
713 
714 static int
vtblk_setup_features(struct vtblk_softc * sc)715 vtblk_setup_features(struct vtblk_softc *sc)
716 {
717 	device_t dev;
718 	int error;
719 
720 	dev = sc->vtblk_dev;
721 
722 	error = vtblk_negotiate_features(sc);
723 	if (error)
724 		return (error);
725 
726 	if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
727 		sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
728 	if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
729 		sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
730 
731 	/* Legacy. */
732 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
733 		sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
734 
735 	return (0);
736 }
737 
738 static int
vtblk_maximum_segments(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)739 vtblk_maximum_segments(struct vtblk_softc *sc,
740     struct virtio_blk_config *blkcfg)
741 {
742 	device_t dev;
743 	int nsegs;
744 
745 	dev = sc->vtblk_dev;
746 	nsegs = VTBLK_MIN_SEGMENTS;
747 
748 	if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
749 		nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
750 		if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
751 			nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
752 	} else
753 		nsegs += 1;
754 
755 	return (nsegs);
756 }
757 
758 static int
vtblk_alloc_virtqueue(struct vtblk_softc * sc)759 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
760 {
761 	device_t dev;
762 	struct vq_alloc_info vq_info;
763 	int indir_segs;
764 
765 	dev = sc->vtblk_dev;
766 
767 	indir_segs = 0;
768 	if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
769 		indir_segs = sc->vtblk_max_nsegs;
770 	VQ_ALLOC_INFO_INIT(&vq_info, indir_segs,
771 	    vtblk_vq_intr, sc, &sc->vtblk_vq,
772 	    "%s request", device_get_nameunit(dev));
773 
774 	return (virtio_alloc_virtqueues(dev, 1, &vq_info));
775 }
776 
777 static void
vtblk_resize_disk(struct vtblk_softc * sc,uint64_t new_capacity)778 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
779 {
780 	device_t dev;
781 	struct disk *dp;
782 	int error;
783 
784 	dev = sc->vtblk_dev;
785 	dp = sc->vtblk_disk;
786 
787 	dp->d_mediasize = new_capacity;
788 	if (bootverbose) {
789 		device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
790 		    (uintmax_t) dp->d_mediasize >> 20,
791 		    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
792 		    dp->d_sectorsize);
793 	}
794 
795 	error = disk_resize(dp, M_NOWAIT);
796 	if (error) {
797 		device_printf(dev,
798 		    "disk_resize(9) failed, error: %d\n", error);
799 	}
800 }
801 
802 static void
vtblk_alloc_disk(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)803 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
804 {
805 	device_t dev;
806 	struct disk *dp;
807 
808 	dev = sc->vtblk_dev;
809 
810 	sc->vtblk_disk = dp = disk_alloc();
811 	dp->d_open = vtblk_open;
812 	dp->d_close = vtblk_close;
813 	dp->d_ioctl = vtblk_ioctl;
814 	dp->d_strategy = vtblk_strategy;
815 	dp->d_name = VTBLK_DISK_NAME;
816 	dp->d_unit = device_get_unit(dev);
817 	dp->d_drv1 = sc;
818 	dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
819 	dp->d_hba_vendor = virtio_get_vendor(dev);
820 	dp->d_hba_device = virtio_get_device(dev);
821 	dp->d_hba_subvendor = virtio_get_subvendor(dev);
822 	dp->d_hba_subdevice = virtio_get_subdevice(dev);
823 	strlcpy(dp->d_attachment, device_get_nameunit(dev),
824 	    sizeof(dp->d_attachment));
825 
826 	if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
827 		dp->d_flags |= DISKFLAG_WRITE_PROTECT;
828 	else {
829 		if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
830 			dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
831 		dp->d_dump = vtblk_dump;
832 	}
833 
834 	/* Capacity is always in 512-byte units. */
835 	dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
836 
837 	if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
838 		dp->d_sectorsize = blkcfg->blk_size;
839 	else
840 		dp->d_sectorsize = VTBLK_BSIZE;
841 
842 	/*
843 	 * The VirtIO maximum I/O size is given in terms of segments.
844 	 * However, FreeBSD limits I/O size by logical buffer size, not
845 	 * by physically contiguous pages. Therefore, we have to assume
846 	 * no pages are contiguous. This may impose an artificially low
847 	 * maximum I/O size. But in practice, since QEMU advertises 128
848 	 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
849 	 * which is typically greater than maxphys. Eventually we should
850 	 * just advertise maxphys and split buffers that are too big.
851 	 *
852 	 * If we're not asking busdma to align data to page boundaries, the
853 	 * maximum I/O size is reduced by PAGE_SIZE in order to accommodate
854 	 * unaligned I/Os.
855 	 */
856 	dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS) *
857 	    PAGE_SIZE;
858 	if ((sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) == 0)
859 		dp->d_maxsize -= PAGE_SIZE;
860 
861 	if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
862 		dp->d_fwsectors = blkcfg->geometry.sectors;
863 		dp->d_fwheads = blkcfg->geometry.heads;
864 	}
865 
866 	if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
867 	    blkcfg->topology.physical_block_exp > 0) {
868 		dp->d_stripesize = dp->d_sectorsize *
869 		    (1 << blkcfg->topology.physical_block_exp);
870 		dp->d_stripeoffset = (dp->d_stripesize -
871 		    blkcfg->topology.alignment_offset * dp->d_sectorsize) %
872 		    dp->d_stripesize;
873 	}
874 
875 	if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
876 		dp->d_flags |= DISKFLAG_CANDELETE;
877 		dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
878 	}
879 
880 	if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
881 		sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
882 	else
883 		sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
884 }
885 
886 static void
vtblk_create_disk(struct vtblk_softc * sc)887 vtblk_create_disk(struct vtblk_softc *sc)
888 {
889 	struct disk *dp;
890 
891 	dp = sc->vtblk_disk;
892 
893 	vtblk_ident(sc);
894 
895 	device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
896 	    (uintmax_t) dp->d_mediasize >> 20,
897 	    (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
898 	    dp->d_sectorsize);
899 
900 	disk_create(dp, DISK_VERSION);
901 }
902 
903 static void
vtblk_ack_load_callback(void * arg,bus_dma_segment_t * segs,int nsegs,int error)904 vtblk_ack_load_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
905     int error)
906 {
907 	struct vtblk_request *req;
908 
909 	if (error != 0)
910 		return;
911 
912 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
913 
914 	req = (struct vtblk_request *)arg;
915 	req->vbr_ack_paddr = segs[0].ds_addr;
916 }
917 
918 static void
vtblk_hdr_load_callback(void * arg,bus_dma_segment_t * segs,int nsegs,int error)919 vtblk_hdr_load_callback(void *arg, bus_dma_segment_t *segs, int nsegs,
920     int error)
921 {
922 	struct vtblk_request *req;
923 
924 	if (error != 0)
925 		return;
926 
927 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
928 
929 	req = (struct vtblk_request *)arg;
930 	req->vbr_hdr_paddr = segs[0].ds_addr;
931 }
932 
933 static int
vtblk_create_request(struct vtblk_softc * sc,struct vtblk_request * req)934 vtblk_create_request(struct vtblk_softc *sc, struct vtblk_request *req)
935 {
936 	req->vbr_sc = sc;
937 
938 	if (bus_dmamap_create(sc->vtblk_dmat, 0, &req->vbr_mapp))
939 		goto error_free;
940 
941 	if (bus_dmamem_alloc(sc->vtblk_hdr_dmat, (void **)&req->vbr_hdr,
942 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT,
943 	    &req->vbr_hdr_mapp))
944 		goto error_destroy;
945 
946 	if (bus_dmamem_alloc(sc->vtblk_ack_dmat, (void **)&req->vbr_ack,
947 	    BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT,
948 	    &req->vbr_ack_mapp))
949 		goto error_hdr_free;
950 
951 	MPASS(sglist_count(req->vbr_hdr, sizeof(*req->vbr_hdr)) == 1);
952 	MPASS(sglist_count(req->vbr_ack, sizeof(*req->vbr_ack)) == 1);
953 
954 	if (bus_dmamap_load(sc->vtblk_hdr_dmat, req->vbr_hdr_mapp,
955 	    req->vbr_hdr, sizeof(struct virtio_blk_outhdr),
956 	    vtblk_hdr_load_callback, req, BUS_DMA_NOWAIT))
957 		goto error_ack_free;
958 
959 	if (bus_dmamap_load(sc->vtblk_ack_dmat, req->vbr_ack_mapp,
960 	    req->vbr_ack, sizeof(uint8_t), vtblk_ack_load_callback,
961 	    req, BUS_DMA_NOWAIT))
962 		goto error_hdr_unload;
963 
964 	return (0);
965 
966 error_hdr_unload:
967 	bus_dmamap_unload(sc->vtblk_hdr_dmat, req->vbr_hdr_mapp);
968 error_ack_free:
969 	bus_dmamem_free(sc->vtblk_ack_dmat, req->vbr_ack, req->vbr_ack_mapp);
970 error_hdr_free:
971 	bus_dmamem_free(sc->vtblk_hdr_dmat, req->vbr_hdr, req->vbr_hdr_mapp);
972 error_destroy:
973 	bus_dmamap_destroy(sc->vtblk_dmat, req->vbr_mapp);
974 error_free:
975 
976 	return (ENOMEM);
977 }
978 
979 static int
vtblk_request_prealloc(struct vtblk_softc * sc)980 vtblk_request_prealloc(struct vtblk_softc *sc)
981 {
982 	struct vtblk_request *req;
983 	int i, nreqs;
984 	int error;
985 
986 	nreqs = virtqueue_size(sc->vtblk_vq);
987 
988 	/*
989 	 * Preallocate sufficient requests to keep the virtqueue full. Each
990 	 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
991 	 * the number allocated when indirect descriptors are not available.
992 	 */
993 	if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
994 		nreqs /= VTBLK_MIN_SEGMENTS;
995 
996 	for (i = 0; i < nreqs; i++) {
997 		req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
998 		if (req == NULL)
999 			return (ENOMEM);
1000 
1001 		error = vtblk_create_request(sc, req);
1002 		if (error) {
1003 			free(req, M_DEVBUF);
1004 			return (error);
1005 		}
1006 
1007 		sc->vtblk_request_count++;
1008 		vtblk_request_enqueue(sc, req);
1009 	}
1010 
1011 	error = vtblk_create_request(sc, &sc->vtblk_dump_request);
1012 
1013 	return (error);
1014 }
1015 
1016 static void
vtblk_request_free(struct vtblk_softc * sc)1017 vtblk_request_free(struct vtblk_softc *sc)
1018 {
1019 	struct vtblk_request *req;
1020 
1021 	MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
1022 
1023 	while ((req = vtblk_request_dequeue(sc)) != NULL) {
1024 		sc->vtblk_request_count--;
1025 		bus_dmamap_unload(sc->vtblk_ack_dmat, req->vbr_ack_mapp);
1026 		bus_dmamem_free(sc->vtblk_ack_dmat, req->vbr_ack,
1027 		    req->vbr_ack_mapp);
1028 		bus_dmamap_unload(sc->vtblk_hdr_dmat, req->vbr_hdr_mapp);
1029 		bus_dmamem_free(sc->vtblk_hdr_dmat, req->vbr_hdr,
1030 		    req->vbr_hdr_mapp);
1031 		bus_dmamap_destroy(sc->vtblk_dmat, req->vbr_mapp);
1032 	}
1033 
1034 	KASSERT(sc->vtblk_request_count == 0,
1035 	    ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
1036 }
1037 
1038 static struct vtblk_request *
vtblk_request_dequeue(struct vtblk_softc * sc)1039 vtblk_request_dequeue(struct vtblk_softc *sc)
1040 {
1041 	struct vtblk_request *req;
1042 
1043 	req = TAILQ_FIRST(&sc->vtblk_req_free);
1044 	if (req != NULL) {
1045 		TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
1046 		bzero(req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
1047 		*req->vbr_ack = 0;
1048 		bzero(&req->vbr_bp, sizeof(struct vtblk_request) -
1049 		    offsetof(struct vtblk_request, vbr_bp));
1050 	}
1051 
1052 	return (req);
1053 }
1054 
1055 static void
vtblk_request_enqueue(struct vtblk_softc * sc,struct vtblk_request * req)1056 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
1057 {
1058 
1059 	TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
1060 }
1061 
1062 static struct vtblk_request *
vtblk_request_next_ready(struct vtblk_softc * sc)1063 vtblk_request_next_ready(struct vtblk_softc *sc)
1064 {
1065 	struct vtblk_request *req;
1066 
1067 	req = TAILQ_FIRST(&sc->vtblk_req_ready);
1068 	if (req != NULL)
1069 		TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
1070 
1071 	return (req);
1072 }
1073 
1074 static void
vtblk_request_requeue_ready(struct vtblk_softc * sc,struct vtblk_request * req)1075 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
1076 {
1077 
1078 	/* NOTE: Currently, there will be at most one request in the queue. */
1079 	TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
1080 }
1081 
1082 static struct vtblk_request *
vtblk_request_next(struct vtblk_softc * sc)1083 vtblk_request_next(struct vtblk_softc *sc)
1084 {
1085 	struct vtblk_request *req;
1086 
1087 	req = vtblk_request_next_ready(sc);
1088 	if (req != NULL)
1089 		return (req);
1090 
1091 	return (vtblk_request_bio(sc));
1092 }
1093 
1094 static struct vtblk_request *
vtblk_request_bio(struct vtblk_softc * sc)1095 vtblk_request_bio(struct vtblk_softc *sc)
1096 {
1097 	struct bio_queue_head *bioq;
1098 	struct vtblk_request *req;
1099 	struct bio *bp;
1100 
1101 	bioq = &sc->vtblk_bioq;
1102 
1103 	if (bioq_first(bioq) == NULL)
1104 		return (NULL);
1105 
1106 	req = vtblk_request_dequeue(sc);
1107 	if (req == NULL)
1108 		return (NULL);
1109 
1110 	bp = bioq_takefirst(bioq);
1111 	req->vbr_bp = bp;
1112 	*req->vbr_ack = -1;
1113 	req->vbr_hdr->ioprio = vtblk_gtoh32(sc, 1);
1114 
1115 	switch (bp->bio_cmd) {
1116 	case BIO_FLUSH:
1117 		req->vbr_hdr->type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1118 		req->vbr_hdr->sector = 0;
1119 		break;
1120 	case BIO_READ:
1121 		req->vbr_hdr->type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
1122 		req->vbr_hdr->sector = vtblk_gtoh64(sc, bp->bio_offset /
1123 		    VTBLK_BSIZE);
1124 		break;
1125 	case BIO_WRITE:
1126 		req->vbr_hdr->type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1127 		req->vbr_hdr->sector = vtblk_gtoh64(sc, bp->bio_offset /
1128 		    VTBLK_BSIZE);
1129 		break;
1130 	case BIO_DELETE:
1131 		req->vbr_hdr->type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
1132 		req->vbr_hdr->sector = vtblk_gtoh64(sc, bp->bio_offset /
1133 		    VTBLK_BSIZE);
1134 		break;
1135 	default:
1136 		panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
1137 	}
1138 
1139 	if (bp->bio_flags & BIO_ORDERED)
1140 		req->vbr_hdr->type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
1141 
1142 	return (req);
1143 }
1144 
1145 static int
vtblk_request_execute(struct vtblk_request * req,int flags)1146 vtblk_request_execute(struct vtblk_request *req, int flags)
1147 {
1148 	struct vtblk_softc *sc = req->vbr_sc;
1149 	struct bio *bp = req->vbr_bp;
1150 	int error = 0;
1151 
1152 	/*
1153 	 * Call via bus_dmamap_load_bio or directly depending on whether we
1154 	 * have a buffer we need to map.  If we don't have a busdma map,
1155 	 * try to perform the I/O directly and hope that it works (this will
1156 	 * happen when dumping).
1157 	 */
1158 	if ((req->vbr_mapp != NULL) &&
1159 	    (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
1160 		error = bus_dmamap_load_bio(sc->vtblk_dmat, req->vbr_mapp,
1161 		    req->vbr_bp, vtblk_request_execute_cb, req, flags);
1162 		if (error == EINPROGRESS) {
1163 			req->vbr_busdma_wait = 1;
1164 			sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_WAIT;
1165 		}
1166 	} else {
1167 		vtblk_request_execute_cb(req, NULL, 0, 0);
1168 	}
1169 
1170 	return (error ? error : req->vbr_error);
1171 }
1172 
1173 static void
vtblk_request_execute_cb(void * callback_arg,bus_dma_segment_t * segs,int nseg,int error)1174 vtblk_request_execute_cb(void * callback_arg, bus_dma_segment_t * segs,
1175     int nseg, int error)
1176 {
1177 	struct vtblk_request *req;
1178 	struct vtblk_softc *sc;
1179 	struct virtqueue *vq;
1180 	struct sglist *sg;
1181 	struct bio *bp;
1182 	int ordered, readable, writable, i;
1183 
1184 	req = (struct vtblk_request *)callback_arg;
1185 	sc = req->vbr_sc;
1186 	vq = sc->vtblk_vq;
1187 	sg = sc->vtblk_sglist;
1188 	bp = req->vbr_bp;
1189 	ordered = 0;
1190 	writable = 0;
1191 
1192 	/*
1193 	 * If we paused request queueing while we waited for busdma to call us
1194 	 * asynchronously, unpause it now; this request made it through so we
1195 	 * don't need to worry about others getting ahead of us.  (Note that we
1196 	 * hold the device mutex so nothing will happen until after we return
1197 	 * anyway.)
1198 	 */
1199 	if (req->vbr_busdma_wait)
1200 		sc->vtblk_flags &= ~VTBLK_FLAG_BUSDMA_WAIT;
1201 
1202 	/* Fail on errors from busdma. */
1203 	if (error)
1204 		goto out1;
1205 
1206 	/*
1207 	 * Some hosts (such as bhyve) do not implement the barrier feature,
1208 	 * so we emulate it in the driver by allowing the barrier request
1209 	 * to be the only one in flight.
1210 	 */
1211 	if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
1212 		if (sc->vtblk_req_ordered != NULL) {
1213 			error = EBUSY;
1214 			goto out;
1215 		}
1216 		if (bp->bio_flags & BIO_ORDERED) {
1217 			if (!virtqueue_empty(vq)) {
1218 				error = EBUSY;
1219 				goto out;
1220 			}
1221 			ordered = 1;
1222 			req->vbr_hdr->type &= vtblk_gtoh32(sc,
1223 				~VIRTIO_BLK_T_BARRIER);
1224 		}
1225 	}
1226 
1227 	bus_dmamap_sync(sc->vtblk_hdr_dmat, req->vbr_hdr_mapp,
1228 	    BUS_DMASYNC_PREWRITE);
1229 
1230 	sglist_reset(sg);
1231 	sglist_append_phys(sg, req->vbr_hdr_paddr,
1232 	    sizeof(struct virtio_blk_outhdr));
1233 
1234 	if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
1235 		/*
1236 		 * We cast bus_addr_t to vm_paddr_t here; since we skip the
1237 		 * iommu mapping (see vtblk_attach) this should be safe.
1238 		 */
1239 		for (i = 0; i < nseg; i++) {
1240 			error = sglist_append_phys(sg,
1241 			    (vm_paddr_t)segs[i].ds_addr, segs[i].ds_len);
1242 			if (error || sg->sg_nseg == sg->sg_maxseg) {
1243 				panic("%s: bio %p data buffer too big %d",
1244 				    __func__, bp, error);
1245 			}
1246 		}
1247 
1248 		/* Special handling for dump, which bypasses busdma. */
1249 		if (req->vbr_mapp == NULL) {
1250 			error = sglist_append_bio(sg, bp);
1251 			if (error || sg->sg_nseg == sg->sg_maxseg) {
1252 				panic("%s: bio %p data buffer too big %d",
1253 				    __func__, bp, error);
1254 			}
1255 		}
1256 
1257 		/* BIO_READ means the host writes into our buffer. */
1258 		if (bp->bio_cmd == BIO_READ)
1259 			writable = sg->sg_nseg - 1;
1260 	} else if (bp->bio_cmd == BIO_DELETE) {
1261 		struct virtio_blk_discard_write_zeroes *discard;
1262 
1263 		discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
1264 		if (discard == NULL) {
1265 			error = ENOMEM;
1266 			goto out;
1267 		}
1268 
1269 		bp->bio_driver1 = discard;
1270 		discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
1271 		discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
1272 		error = sglist_append(sg, discard, sizeof(*discard));
1273 		if (error || sg->sg_nseg == sg->sg_maxseg) {
1274 			panic("%s: bio %p data buffer too big %d",
1275 			    __func__, bp, error);
1276 		}
1277 	}
1278 
1279 	bus_dmamap_sync(sc->vtblk_ack_dmat, req->vbr_ack_mapp,
1280 	    BUS_DMASYNC_PREREAD);
1281 
1282 	writable++;
1283 	sglist_append_phys(sg, req->vbr_ack_paddr, sizeof(uint8_t));
1284 	readable = sg->sg_nseg - writable;
1285 
1286 	if (req->vbr_mapp != NULL) {
1287 		switch (bp->bio_cmd) {
1288 		case BIO_READ:
1289 			bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1290 			    BUS_DMASYNC_PREREAD);
1291 			break;
1292 		case BIO_WRITE:
1293 			bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1294 			    BUS_DMASYNC_PREWRITE);
1295 			break;
1296 		}
1297 	}
1298 
1299 	error = virtqueue_enqueue(vq, req, sg, readable, writable);
1300 	if (error == 0 && ordered)
1301 		sc->vtblk_req_ordered = req;
1302 
1303 	/*
1304 	 * If we were called asynchronously, we need to notify the queue that
1305 	 * we've added a new request, since the notification from startio was
1306 	 * performed already.
1307 	 */
1308 	if (error == 0 && req->vbr_busdma_wait)
1309 		virtqueue_notify(vq);
1310 
1311 out:
1312 	if (error && (req->vbr_mapp != NULL))
1313 		bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1314 out1:
1315 	if (error && req->vbr_requeue_on_error)
1316 		vtblk_request_requeue_ready(sc, req);
1317 	req->vbr_error = error;
1318 }
1319 
1320 static int
vtblk_request_error(struct vtblk_request * req)1321 vtblk_request_error(struct vtblk_request *req)
1322 {
1323 	int error;
1324 
1325 	bus_dmamap_sync(req->vbr_sc->vtblk_ack_dmat, req->vbr_ack_mapp,
1326 	    BUS_DMASYNC_POSTREAD);
1327 
1328 	switch (*req->vbr_ack) {
1329 	case VIRTIO_BLK_S_OK:
1330 		error = 0;
1331 		break;
1332 	case VIRTIO_BLK_S_UNSUPP:
1333 		error = ENOTSUP;
1334 		break;
1335 	default:
1336 		error = EIO;
1337 		break;
1338 	}
1339 
1340 	return (error);
1341 }
1342 
1343 static struct bio *
vtblk_queue_complete_one(struct vtblk_softc * sc,struct vtblk_request * req)1344 vtblk_queue_complete_one(struct vtblk_softc *sc, struct vtblk_request *req)
1345 {
1346 	struct bio *bp;
1347 
1348 	if (sc->vtblk_req_ordered != NULL) {
1349 		MPASS(sc->vtblk_req_ordered == req);
1350 		sc->vtblk_req_ordered = NULL;
1351 	}
1352 
1353 	bp = req->vbr_bp;
1354 	if (req->vbr_mapp != NULL) {
1355 		switch (bp->bio_cmd) {
1356 		case BIO_READ:
1357 			bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1358 			    BUS_DMASYNC_POSTREAD);
1359 			bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1360 			break;
1361 		case BIO_WRITE:
1362 			bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1363 			    BUS_DMASYNC_POSTWRITE);
1364 			bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1365 			break;
1366 		}
1367 	}
1368 	bp->bio_error = vtblk_request_error(req);
1369 	return (bp);
1370 }
1371 
1372 static void
vtblk_queue_completed(struct vtblk_softc * sc,struct bio_queue * queue)1373 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1374 {
1375 	struct vtblk_request *req;
1376 	struct bio *bp;
1377 
1378 	while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1379 		bp = vtblk_queue_complete_one(sc, req);
1380 
1381 		TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1382 		vtblk_request_enqueue(sc, req);
1383 	}
1384 }
1385 
1386 static void
vtblk_done_completed(struct vtblk_softc * sc,struct bio_queue * queue)1387 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1388 {
1389 	struct bio *bp, *tmp;
1390 
1391 	TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1392 		if (bp->bio_error != 0)
1393 			disk_err(bp, "hard error", -1, 1);
1394 		vtblk_bio_done(sc, bp, bp->bio_error);
1395 	}
1396 }
1397 
1398 static void
vtblk_drain_vq(struct vtblk_softc * sc)1399 vtblk_drain_vq(struct vtblk_softc *sc)
1400 {
1401 	struct virtqueue *vq;
1402 	struct vtblk_request *req;
1403 	int last;
1404 
1405 	vq = sc->vtblk_vq;
1406 	last = 0;
1407 
1408 	while ((req = virtqueue_drain(vq, &last)) != NULL) {
1409 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1410 		vtblk_request_enqueue(sc, req);
1411 	}
1412 
1413 	sc->vtblk_req_ordered = NULL;
1414 	KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1415 }
1416 
1417 static void
vtblk_drain(struct vtblk_softc * sc)1418 vtblk_drain(struct vtblk_softc *sc)
1419 {
1420 	struct bio_queue_head *bioq;
1421 	struct vtblk_request *req;
1422 	struct bio *bp;
1423 
1424 	bioq = &sc->vtblk_bioq;
1425 
1426 	if (sc->vtblk_vq != NULL) {
1427 		struct bio_queue queue;
1428 
1429 		TAILQ_INIT(&queue);
1430 		vtblk_queue_completed(sc, &queue);
1431 		vtblk_done_completed(sc, &queue);
1432 
1433 		vtblk_drain_vq(sc);
1434 	}
1435 
1436 	while ((req = vtblk_request_next_ready(sc)) != NULL) {
1437 		vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1438 		vtblk_request_enqueue(sc, req);
1439 	}
1440 
1441 	while (bioq_first(bioq) != NULL) {
1442 		bp = bioq_takefirst(bioq);
1443 		vtblk_bio_done(sc, bp, ENXIO);
1444 	}
1445 
1446 	vtblk_request_free(sc);
1447 }
1448 
1449 static void
vtblk_startio(struct vtblk_softc * sc)1450 vtblk_startio(struct vtblk_softc *sc)
1451 {
1452 	struct virtqueue *vq;
1453 	struct vtblk_request *req;
1454 	int enq;
1455 
1456 	VTBLK_LOCK_ASSERT(sc);
1457 	vq = sc->vtblk_vq;
1458 	enq = 0;
1459 
1460 	if (sc->vtblk_flags & (VTBLK_FLAG_SUSPEND | VTBLK_FLAG_BUSDMA_WAIT))
1461 		return;
1462 
1463 	while (!virtqueue_full(vq)) {
1464 		req = vtblk_request_next(sc);
1465 		if (req == NULL)
1466 			break;
1467 
1468 		req->vbr_requeue_on_error = 1;
1469 		if (vtblk_request_execute(req, BUS_DMA_WAITOK))
1470 			break;
1471 
1472 		enq++;
1473 	}
1474 
1475 	if (enq > 0)
1476 		virtqueue_notify(vq);
1477 }
1478 
1479 static void
vtblk_bio_done(struct vtblk_softc * sc,struct bio * bp,int error)1480 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1481 {
1482 
1483 	/* Because of GEOM direct dispatch, we cannot hold any locks. */
1484 	if (sc != NULL)
1485 		VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1486 
1487 	if (error) {
1488 		bp->bio_resid = bp->bio_bcount;
1489 		bp->bio_error = error;
1490 		bp->bio_flags |= BIO_ERROR;
1491 	} else {
1492 		kmsan_mark_bio(bp, KMSAN_STATE_INITED);
1493 	}
1494 
1495 	if (bp->bio_driver1 != NULL) {
1496 		free(bp->bio_driver1, M_DEVBUF);
1497 		bp->bio_driver1 = NULL;
1498 	}
1499 
1500 	biodone(bp);
1501 }
1502 
1503 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg)			\
1504 	if (virtio_with_feature(_dev, _feature)) {			\
1505 		virtio_read_device_config(_dev,				\
1506 		    offsetof(struct virtio_blk_config, _field),		\
1507 		    &(_cfg)->_field, sizeof((_cfg)->_field));		\
1508 	}
1509 
1510 static void
vtblk_read_config(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)1511 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1512 {
1513 	device_t dev;
1514 
1515 	dev = sc->vtblk_dev;
1516 
1517 	bzero(blkcfg, sizeof(struct virtio_blk_config));
1518 
1519 	/* The capacity is always available. */
1520 	virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1521 	    capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1522 
1523 	/* Read the configuration if the feature was negotiated. */
1524 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1525 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1526 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1527 	    geometry.cylinders, blkcfg);
1528 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1529 	    geometry.heads, blkcfg);
1530 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1531 	    geometry.sectors, blkcfg);
1532 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1533 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1534 	    topology.physical_block_exp, blkcfg);
1535 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1536 	    topology.alignment_offset, blkcfg);
1537 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1538 	    topology.min_io_size, blkcfg);
1539 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1540 	    topology.opt_io_size, blkcfg);
1541 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1542 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1543 	    blkcfg);
1544 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1545 	VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1546 	    blkcfg);
1547 }
1548 
1549 #undef VTBLK_GET_CONFIG
1550 
1551 static void
vtblk_ident(struct vtblk_softc * sc)1552 vtblk_ident(struct vtblk_softc *sc)
1553 {
1554 	struct bio buf;
1555 	struct disk *dp;
1556 	struct vtblk_request *req;
1557 	int len, error;
1558 
1559 	dp = sc->vtblk_disk;
1560 	len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1561 
1562 	if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1563 		return;
1564 
1565 	req = vtblk_request_dequeue(sc);
1566 	if (req == NULL)
1567 		return;
1568 
1569 	*req->vbr_ack = -1;
1570 	req->vbr_hdr->type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1571 	req->vbr_hdr->ioprio = vtblk_gtoh32(sc, 1);
1572 	req->vbr_hdr->sector = 0;
1573 
1574 	req->vbr_bp = &buf;
1575 	g_reset_bio(&buf);
1576 
1577 	buf.bio_cmd = BIO_READ;
1578 	buf.bio_data = dp->d_ident;
1579 	buf.bio_bcount = len;
1580 
1581 	VTBLK_LOCK(sc);
1582 	error = vtblk_poll_request(sc, req);
1583 	VTBLK_UNLOCK(sc);
1584 
1585 	if (error) {
1586 		device_printf(sc->vtblk_dev,
1587 		    "error getting device identifier: %d\n", error);
1588 	}
1589 }
1590 
1591 static int
vtblk_poll_request(struct vtblk_softc * sc,struct vtblk_request * req)1592 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1593 {
1594 	struct vtblk_request *req1 __diagused;
1595 	struct virtqueue *vq;
1596 	struct bio *bp;
1597 	int error;
1598 
1599 	vq = sc->vtblk_vq;
1600 
1601 	if (!virtqueue_empty(vq))
1602 		return (EBUSY);
1603 
1604 	error = vtblk_request_execute(req, BUS_DMA_NOWAIT);
1605 	if (error)
1606 		return (error);
1607 
1608 	virtqueue_notify(vq);
1609 	req1 = virtqueue_poll(vq, NULL);
1610 	KASSERT(req == req1,
1611 	    ("%s: polling completed %p not %p", __func__, req1, req));
1612 
1613 	bp = vtblk_queue_complete_one(sc, req);
1614 	error = bp->bio_error;
1615 	if (error && bootverbose) {
1616 		device_printf(sc->vtblk_dev,
1617 		    "%s: IO error: %d\n", __func__, error);
1618 	}
1619 	if (req != &sc->vtblk_dump_request)
1620 		vtblk_request_enqueue(sc, req);
1621 
1622 	return (error);
1623 }
1624 
1625 static int
vtblk_quiesce(struct vtblk_softc * sc)1626 vtblk_quiesce(struct vtblk_softc *sc)
1627 {
1628 	int error;
1629 
1630 	VTBLK_LOCK_ASSERT(sc);
1631 	error = 0;
1632 
1633 	while (!virtqueue_empty(sc->vtblk_vq)) {
1634 		if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1635 		    VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1636 			error = EBUSY;
1637 			break;
1638 		}
1639 	}
1640 
1641 	return (error);
1642 }
1643 
1644 static void
vtblk_vq_intr(void * xsc)1645 vtblk_vq_intr(void *xsc)
1646 {
1647 	struct vtblk_softc *sc;
1648 	struct virtqueue *vq;
1649 	struct bio_queue queue;
1650 
1651 	sc = xsc;
1652 	vq = sc->vtblk_vq;
1653 	TAILQ_INIT(&queue);
1654 
1655 	VTBLK_LOCK(sc);
1656 
1657 again:
1658 	if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1659 		goto out;
1660 
1661 	vtblk_queue_completed(sc, &queue);
1662 	vtblk_startio(sc);
1663 
1664 	if (virtqueue_enable_intr(vq) != 0) {
1665 		virtqueue_disable_intr(vq);
1666 		goto again;
1667 	}
1668 
1669 	if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1670 		wakeup(&sc->vtblk_vq);
1671 
1672 out:
1673 	VTBLK_UNLOCK(sc);
1674 	vtblk_done_completed(sc, &queue);
1675 }
1676 
1677 static void
vtblk_stop(struct vtblk_softc * sc)1678 vtblk_stop(struct vtblk_softc *sc)
1679 {
1680 
1681 	virtqueue_disable_intr(sc->vtblk_vq);
1682 	virtio_stop(sc->vtblk_dev);
1683 }
1684 
1685 static void
vtblk_dump_quiesce(struct vtblk_softc * sc)1686 vtblk_dump_quiesce(struct vtblk_softc *sc)
1687 {
1688 
1689 	/*
1690 	 * Spin here until all the requests in-flight at the time of the
1691 	 * dump are completed and queued. The queued requests will be
1692 	 * biodone'd once the dump is finished.
1693 	 */
1694 	while (!virtqueue_empty(sc->vtblk_vq))
1695 		vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1696 }
1697 
1698 static int
vtblk_dump_write(struct vtblk_softc * sc,void * virtual,off_t offset,size_t length)1699 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1700     size_t length)
1701 {
1702 	struct bio buf;
1703 	struct vtblk_request *req;
1704 
1705 	req = &sc->vtblk_dump_request;
1706 	req->vbr_sc = sc;
1707 	*req->vbr_ack = -1;
1708 	req->vbr_hdr->type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1709 	req->vbr_hdr->ioprio = vtblk_gtoh32(sc, 1);
1710 	req->vbr_hdr->sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1711 
1712 	req->vbr_bp = &buf;
1713 	g_reset_bio(&buf);
1714 
1715 	buf.bio_cmd = BIO_WRITE;
1716 	buf.bio_data = virtual;
1717 	buf.bio_bcount = length;
1718 
1719 	return (vtblk_poll_request(sc, req));
1720 }
1721 
1722 static int
vtblk_dump_flush(struct vtblk_softc * sc)1723 vtblk_dump_flush(struct vtblk_softc *sc)
1724 {
1725 	struct bio buf;
1726 	struct vtblk_request *req;
1727 
1728 	req = &sc->vtblk_dump_request;
1729 	req->vbr_sc = sc;
1730 	*req->vbr_ack = -1;
1731 	req->vbr_hdr->type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1732 	req->vbr_hdr->ioprio = vtblk_gtoh32(sc, 1);
1733 	req->vbr_hdr->sector = 0;
1734 
1735 	req->vbr_bp = &buf;
1736 	g_reset_bio(&buf);
1737 
1738 	buf.bio_cmd = BIO_FLUSH;
1739 
1740 	return (vtblk_poll_request(sc, req));
1741 }
1742 
1743 static void
vtblk_dump_complete(struct vtblk_softc * sc)1744 vtblk_dump_complete(struct vtblk_softc *sc)
1745 {
1746 
1747 	vtblk_dump_flush(sc);
1748 
1749 	VTBLK_UNLOCK(sc);
1750 	vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1751 	VTBLK_LOCK(sc);
1752 }
1753 
1754 static void
vtblk_set_write_cache(struct vtblk_softc * sc,int wc)1755 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1756 {
1757 
1758 	/* Set either writeback (1) or writethrough (0) mode. */
1759 	virtio_write_dev_config_1(sc->vtblk_dev,
1760 	    offsetof(struct virtio_blk_config, wce), wc);
1761 }
1762 
1763 static int
vtblk_write_cache_enabled(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)1764 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1765     struct virtio_blk_config *blkcfg)
1766 {
1767 	int wc;
1768 
1769 	if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1770 		wc = vtblk_tunable_int(sc, "writecache_mode",
1771 		    vtblk_writecache_mode);
1772 		if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1773 			vtblk_set_write_cache(sc, wc);
1774 		else
1775 			wc = blkcfg->wce;
1776 	} else
1777 		wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1778 
1779 	return (wc);
1780 }
1781 
1782 static int
vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)1783 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1784 {
1785 	struct vtblk_softc *sc;
1786 	int wc, error;
1787 
1788 	sc = oidp->oid_arg1;
1789 	wc = sc->vtblk_write_cache;
1790 
1791 	error = sysctl_handle_int(oidp, &wc, 0, req);
1792 	if (error || req->newptr == NULL)
1793 		return (error);
1794 	if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1795 		return (EPERM);
1796 	if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1797 		return (EINVAL);
1798 
1799 	VTBLK_LOCK(sc);
1800 	sc->vtblk_write_cache = wc;
1801 	vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1802 	VTBLK_UNLOCK(sc);
1803 
1804 	return (0);
1805 }
1806 
1807 static void
vtblk_setup_sysctl(struct vtblk_softc * sc)1808 vtblk_setup_sysctl(struct vtblk_softc *sc)
1809 {
1810 	device_t dev;
1811 	struct sysctl_ctx_list *ctx;
1812 	struct sysctl_oid *tree;
1813 	struct sysctl_oid_list *child;
1814 
1815 	dev = sc->vtblk_dev;
1816 	ctx = device_get_sysctl_ctx(dev);
1817 	tree = device_get_sysctl_tree(dev);
1818 	child = SYSCTL_CHILDREN(tree);
1819 
1820 	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1821 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1822 	    vtblk_write_cache_sysctl, "I",
1823 	    "Write cache mode (writethrough (0) or writeback (1))");
1824 }
1825 
1826 static int
vtblk_tunable_int(struct vtblk_softc * sc,const char * knob,int def)1827 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1828 {
1829 	char path[64];
1830 
1831 	snprintf(path, sizeof(path),
1832 	    "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1833 	TUNABLE_INT_FETCH(path, &def);
1834 
1835 	return (def);
1836 }
1837