1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /* Driver for VirtIO block devices. */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kernel.h>
34 #include <sys/bio.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/msan.h>
38 #include <sys/sglist.h>
39 #include <sys/sysctl.h>
40 #include <sys/lock.h>
41 #include <sys/mutex.h>
42 #include <sys/queue.h>
43
44 #include <geom/geom.h>
45 #include <geom/geom_disk.h>
46
47 #include <machine/bus.h>
48 #include <machine/resource.h>
49 #include <sys/bus.h>
50 #include <sys/rman.h>
51
52 #include <dev/virtio/virtio.h>
53 #include <dev/virtio/virtqueue.h>
54 #include <dev/virtio/block/virtio_blk.h>
55
56 #include "virtio_if.h"
57
58 struct vtblk_request {
59 struct vtblk_softc *vbr_sc;
60 bus_dmamap_t vbr_mapp;
61
62 /* Fields after this point are zeroed for each request. */
63 struct virtio_blk_outhdr vbr_hdr;
64 struct bio *vbr_bp;
65 uint8_t vbr_ack;
66 uint8_t vbr_requeue_on_error;
67 uint8_t vbr_busdma_wait;
68 int vbr_error;
69 TAILQ_ENTRY(vtblk_request) vbr_link;
70 };
71
72 enum vtblk_cache_mode {
73 VTBLK_CACHE_WRITETHROUGH,
74 VTBLK_CACHE_WRITEBACK,
75 VTBLK_CACHE_MAX
76 };
77
78 struct vtblk_softc {
79 device_t vtblk_dev;
80 struct mtx vtblk_mtx;
81 uint64_t vtblk_features;
82 uint32_t vtblk_flags;
83 #define VTBLK_FLAG_INDIRECT 0x0001
84 #define VTBLK_FLAG_DETACH 0x0002
85 #define VTBLK_FLAG_SUSPEND 0x0004
86 #define VTBLK_FLAG_BARRIER 0x0008
87 #define VTBLK_FLAG_WCE_CONFIG 0x0010
88 #define VTBLK_FLAG_BUSDMA_WAIT 0x0020
89 #define VTBLK_FLAG_BUSDMA_ALIGN 0x0040
90
91 struct virtqueue *vtblk_vq;
92 struct sglist *vtblk_sglist;
93 bus_dma_tag_t vtblk_dmat;
94 struct disk *vtblk_disk;
95
96 struct bio_queue_head vtblk_bioq;
97 TAILQ_HEAD(, vtblk_request)
98 vtblk_req_free;
99 TAILQ_HEAD(, vtblk_request)
100 vtblk_req_ready;
101 struct vtblk_request *vtblk_req_ordered;
102
103 int vtblk_max_nsegs;
104 int vtblk_request_count;
105 enum vtblk_cache_mode vtblk_write_cache;
106
107 struct bio_queue vtblk_dump_queue;
108 struct vtblk_request vtblk_dump_request;
109 };
110
111 static struct virtio_feature_desc vtblk_feature_desc[] = {
112 { VIRTIO_BLK_F_BARRIER, "HostBarrier" },
113 { VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" },
114 { VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" },
115 { VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" },
116 { VIRTIO_BLK_F_RO, "ReadOnly" },
117 { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" },
118 { VIRTIO_BLK_F_SCSI, "SCSICmds" },
119 { VIRTIO_BLK_F_FLUSH, "FlushCmd" },
120 { VIRTIO_BLK_F_TOPOLOGY, "Topology" },
121 { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" },
122 { VIRTIO_BLK_F_MQ, "Multiqueue" },
123 { VIRTIO_BLK_F_DISCARD, "Discard" },
124 { VIRTIO_BLK_F_WRITE_ZEROES, "WriteZeros" },
125
126 { 0, NULL }
127 };
128
129 static int vtblk_modevent(module_t, int, void *);
130
131 static int vtblk_probe(device_t);
132 static int vtblk_attach(device_t);
133 static int vtblk_detach(device_t);
134 static int vtblk_suspend(device_t);
135 static int vtblk_resume(device_t);
136 static int vtblk_shutdown(device_t);
137 static int vtblk_attach_completed(device_t);
138 static int vtblk_config_change(device_t);
139
140 static int vtblk_open(struct disk *);
141 static int vtblk_close(struct disk *);
142 static int vtblk_ioctl(struct disk *, u_long, void *, int,
143 struct thread *);
144 static int vtblk_dump(void *, void *, off_t, size_t);
145 static void vtblk_strategy(struct bio *);
146
147 static int vtblk_negotiate_features(struct vtblk_softc *);
148 static int vtblk_setup_features(struct vtblk_softc *);
149 static int vtblk_maximum_segments(struct vtblk_softc *,
150 struct virtio_blk_config *);
151 static int vtblk_alloc_virtqueue(struct vtblk_softc *);
152 static void vtblk_resize_disk(struct vtblk_softc *, uint64_t);
153 static void vtblk_alloc_disk(struct vtblk_softc *,
154 struct virtio_blk_config *);
155 static void vtblk_create_disk(struct vtblk_softc *);
156
157 static int vtblk_request_prealloc(struct vtblk_softc *);
158 static void vtblk_request_free(struct vtblk_softc *);
159 static struct vtblk_request *
160 vtblk_request_dequeue(struct vtblk_softc *);
161 static void vtblk_request_enqueue(struct vtblk_softc *,
162 struct vtblk_request *);
163 static struct vtblk_request *
164 vtblk_request_next_ready(struct vtblk_softc *);
165 static void vtblk_request_requeue_ready(struct vtblk_softc *,
166 struct vtblk_request *);
167 static struct vtblk_request *
168 vtblk_request_next(struct vtblk_softc *);
169 static struct vtblk_request *
170 vtblk_request_bio(struct vtblk_softc *);
171 static int vtblk_request_execute(struct vtblk_request *, int);
172 static void vtblk_request_execute_cb(void *,
173 bus_dma_segment_t *, int, int);
174 static int vtblk_request_error(struct vtblk_request *);
175
176 static void vtblk_queue_completed(struct vtblk_softc *,
177 struct bio_queue *);
178 static void vtblk_done_completed(struct vtblk_softc *,
179 struct bio_queue *);
180 static void vtblk_drain_vq(struct vtblk_softc *);
181 static void vtblk_drain(struct vtblk_softc *);
182
183 static void vtblk_startio(struct vtblk_softc *);
184 static void vtblk_bio_done(struct vtblk_softc *, struct bio *, int);
185
186 static void vtblk_read_config(struct vtblk_softc *,
187 struct virtio_blk_config *);
188 static void vtblk_ident(struct vtblk_softc *);
189 static int vtblk_poll_request(struct vtblk_softc *,
190 struct vtblk_request *);
191 static int vtblk_quiesce(struct vtblk_softc *);
192 static void vtblk_vq_intr(void *);
193 static void vtblk_stop(struct vtblk_softc *);
194
195 static void vtblk_dump_quiesce(struct vtblk_softc *);
196 static int vtblk_dump_write(struct vtblk_softc *, void *, off_t, size_t);
197 static int vtblk_dump_flush(struct vtblk_softc *);
198 static void vtblk_dump_complete(struct vtblk_softc *);
199
200 static void vtblk_set_write_cache(struct vtblk_softc *, int);
201 static int vtblk_write_cache_enabled(struct vtblk_softc *sc,
202 struct virtio_blk_config *);
203 static int vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS);
204
205 static void vtblk_setup_sysctl(struct vtblk_softc *);
206 static int vtblk_tunable_int(struct vtblk_softc *, const char *, int);
207
208 #define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0)
209 #define vtblk_htog16(_sc, _val) virtio_htog16(vtblk_modern(_sc), _val)
210 #define vtblk_htog32(_sc, _val) virtio_htog32(vtblk_modern(_sc), _val)
211 #define vtblk_htog64(_sc, _val) virtio_htog64(vtblk_modern(_sc), _val)
212 #define vtblk_gtoh16(_sc, _val) virtio_gtoh16(vtblk_modern(_sc), _val)
213 #define vtblk_gtoh32(_sc, _val) virtio_gtoh32(vtblk_modern(_sc), _val)
214 #define vtblk_gtoh64(_sc, _val) virtio_gtoh64(vtblk_modern(_sc), _val)
215
216 /* Tunables. */
217 static int vtblk_no_ident = 0;
218 TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
219 static int vtblk_writecache_mode = -1;
220 TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode);
221
222 #define VTBLK_COMMON_FEATURES \
223 (VIRTIO_BLK_F_SIZE_MAX | \
224 VIRTIO_BLK_F_SEG_MAX | \
225 VIRTIO_BLK_F_GEOMETRY | \
226 VIRTIO_BLK_F_RO | \
227 VIRTIO_BLK_F_BLK_SIZE | \
228 VIRTIO_BLK_F_FLUSH | \
229 VIRTIO_BLK_F_TOPOLOGY | \
230 VIRTIO_BLK_F_CONFIG_WCE | \
231 VIRTIO_BLK_F_DISCARD | \
232 VIRTIO_RING_F_INDIRECT_DESC)
233
234 #define VTBLK_MODERN_FEATURES (VTBLK_COMMON_FEATURES)
235 #define VTBLK_LEGACY_FEATURES (VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES)
236
237 #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx
238 #define VTBLK_LOCK_INIT(_sc, _name) \
239 mtx_init(VTBLK_MTX((_sc)), (_name), \
240 "VirtIO Block Lock", MTX_DEF)
241 #define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc)))
242 #define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc)))
243 #define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc)))
244 #define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
245 #define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
246 mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
247
248 #define VTBLK_DISK_NAME "vtbd"
249 #define VTBLK_QUIESCE_TIMEOUT (30 * hz)
250 #define VTBLK_BSIZE 512
251
252 /*
253 * Each block request uses at least two segments - one for the header
254 * and one for the status.
255 */
256 #define VTBLK_MIN_SEGMENTS 2
257
258 static device_method_t vtblk_methods[] = {
259 /* Device methods. */
260 DEVMETHOD(device_probe, vtblk_probe),
261 DEVMETHOD(device_attach, vtblk_attach),
262 DEVMETHOD(device_detach, vtblk_detach),
263 DEVMETHOD(device_suspend, vtblk_suspend),
264 DEVMETHOD(device_resume, vtblk_resume),
265 DEVMETHOD(device_shutdown, vtblk_shutdown),
266
267 /* VirtIO methods. */
268 DEVMETHOD(virtio_attach_completed, vtblk_attach_completed),
269 DEVMETHOD(virtio_config_change, vtblk_config_change),
270
271 DEVMETHOD_END
272 };
273
274 static driver_t vtblk_driver = {
275 "vtblk",
276 vtblk_methods,
277 sizeof(struct vtblk_softc)
278 };
279
280 VIRTIO_DRIVER_MODULE(virtio_blk, vtblk_driver, vtblk_modevent, NULL);
281 MODULE_VERSION(virtio_blk, 1);
282 MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
283
284 VIRTIO_SIMPLE_PNPINFO(virtio_blk, VIRTIO_ID_BLOCK, "VirtIO Block Adapter");
285
286 static int
vtblk_modevent(module_t mod,int type,void * unused)287 vtblk_modevent(module_t mod, int type, void *unused)
288 {
289 int error;
290
291 error = 0;
292
293 switch (type) {
294 case MOD_LOAD:
295 case MOD_QUIESCE:
296 case MOD_UNLOAD:
297 case MOD_SHUTDOWN:
298 break;
299 default:
300 error = EOPNOTSUPP;
301 break;
302 }
303
304 return (error);
305 }
306
307 static int
vtblk_probe(device_t dev)308 vtblk_probe(device_t dev)
309 {
310 return (VIRTIO_SIMPLE_PROBE(dev, virtio_blk));
311 }
312
313 static int
vtblk_attach(device_t dev)314 vtblk_attach(device_t dev)
315 {
316 struct vtblk_softc *sc;
317 struct virtio_blk_config blkcfg;
318 int error;
319
320 sc = device_get_softc(dev);
321 sc->vtblk_dev = dev;
322 virtio_set_feature_desc(dev, vtblk_feature_desc);
323
324 VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
325 bioq_init(&sc->vtblk_bioq);
326 TAILQ_INIT(&sc->vtblk_dump_queue);
327 TAILQ_INIT(&sc->vtblk_req_free);
328 TAILQ_INIT(&sc->vtblk_req_ready);
329
330 vtblk_setup_sysctl(sc);
331
332 error = vtblk_setup_features(sc);
333 if (error) {
334 device_printf(dev, "cannot setup features\n");
335 goto fail;
336 }
337
338 vtblk_read_config(sc, &blkcfg);
339
340 /*
341 * With the current sglist(9) implementation, it is not easy
342 * for us to support a maximum segment size as adjacent
343 * segments are coalesced. For now, just make sure it's larger
344 * than the maximum supported transfer size.
345 */
346 if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
347 if (blkcfg.size_max < maxphys) {
348 error = ENOTSUP;
349 device_printf(dev, "host requires unsupported "
350 "maximum segment size feature\n");
351 goto fail;
352 }
353 }
354
355 sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
356 if (sc->vtblk_max_nsegs <= VTBLK_MIN_SEGMENTS) {
357 error = EINVAL;
358 device_printf(dev, "fewer than minimum number of segments "
359 "allowed: %d\n", sc->vtblk_max_nsegs);
360 goto fail;
361 }
362
363 sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
364 if (sc->vtblk_sglist == NULL) {
365 error = ENOMEM;
366 device_printf(dev, "cannot allocate sglist\n");
367 goto fail;
368 }
369
370 /*
371 * If vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1, the device only
372 * supports a single data segment; in that case we need busdma to
373 * align to a page boundary so we can send a *contiguous* page size
374 * request to the host.
375 */
376 if (sc->vtblk_max_nsegs == VTBLK_MIN_SEGMENTS + 1)
377 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_ALIGN;
378 error = bus_dma_tag_create(
379 bus_get_dma_tag(dev), /* parent */
380 (sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) ? PAGE_SIZE : 1,
381 0, /* boundary */
382 BUS_SPACE_MAXADDR, /* lowaddr */
383 BUS_SPACE_MAXADDR, /* highaddr */
384 NULL, NULL, /* filter, filterarg */
385 maxphys, /* max request size */
386 sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS, /* max # segments */
387 maxphys, /* maxsegsize */
388 0, /* flags */
389 busdma_lock_mutex, /* lockfunc */
390 &sc->vtblk_mtx, /* lockarg */
391 &sc->vtblk_dmat);
392 if (error) {
393 device_printf(dev, "cannot create bus dma tag\n");
394 goto fail;
395 }
396
397 #ifdef __powerpc__
398 /*
399 * Virtio uses physical addresses rather than bus addresses, so we
400 * need to ask busdma to skip the iommu physical->bus mapping. At
401 * present, this is only a thing on the powerpc architectures.
402 */
403 bus_dma_tag_set_iommu(sc->vtblk_dmat, NULL, NULL);
404 #endif
405
406 error = vtblk_alloc_virtqueue(sc);
407 if (error) {
408 device_printf(dev, "cannot allocate virtqueue\n");
409 goto fail;
410 }
411
412 error = vtblk_request_prealloc(sc);
413 if (error) {
414 device_printf(dev, "cannot preallocate requests\n");
415 goto fail;
416 }
417
418 vtblk_alloc_disk(sc, &blkcfg);
419
420 error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
421 if (error) {
422 device_printf(dev, "cannot setup virtqueue interrupt\n");
423 goto fail;
424 }
425
426 virtqueue_enable_intr(sc->vtblk_vq);
427
428 fail:
429 if (error)
430 vtblk_detach(dev);
431
432 return (error);
433 }
434
435 static int
vtblk_detach(device_t dev)436 vtblk_detach(device_t dev)
437 {
438 struct vtblk_softc *sc;
439
440 sc = device_get_softc(dev);
441
442 VTBLK_LOCK(sc);
443 sc->vtblk_flags |= VTBLK_FLAG_DETACH;
444 if (device_is_attached(dev))
445 vtblk_stop(sc);
446 VTBLK_UNLOCK(sc);
447
448 vtblk_drain(sc);
449
450 if (sc->vtblk_disk != NULL) {
451 disk_destroy(sc->vtblk_disk);
452 sc->vtblk_disk = NULL;
453 }
454
455 if (sc->vtblk_dmat != NULL) {
456 bus_dma_tag_destroy(sc->vtblk_dmat);
457 sc->vtblk_dmat = NULL;
458 }
459
460 if (sc->vtblk_sglist != NULL) {
461 sglist_free(sc->vtblk_sglist);
462 sc->vtblk_sglist = NULL;
463 }
464
465 VTBLK_LOCK_DESTROY(sc);
466
467 return (0);
468 }
469
470 static int
vtblk_suspend(device_t dev)471 vtblk_suspend(device_t dev)
472 {
473 struct vtblk_softc *sc;
474 int error;
475
476 sc = device_get_softc(dev);
477
478 VTBLK_LOCK(sc);
479 sc->vtblk_flags |= VTBLK_FLAG_SUSPEND;
480 /* XXX BMV: virtio_stop(), etc needed here? */
481 error = vtblk_quiesce(sc);
482 if (error)
483 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
484 VTBLK_UNLOCK(sc);
485
486 return (error);
487 }
488
489 static int
vtblk_resume(device_t dev)490 vtblk_resume(device_t dev)
491 {
492 struct vtblk_softc *sc;
493
494 sc = device_get_softc(dev);
495
496 VTBLK_LOCK(sc);
497 /* XXX BMV: virtio_reinit(), etc needed here? */
498 sc->vtblk_flags &= ~VTBLK_FLAG_SUSPEND;
499 vtblk_startio(sc);
500 VTBLK_UNLOCK(sc);
501
502 return (0);
503 }
504
505 static int
vtblk_shutdown(device_t dev)506 vtblk_shutdown(device_t dev)
507 {
508
509 return (0);
510 }
511
512 static int
vtblk_attach_completed(device_t dev)513 vtblk_attach_completed(device_t dev)
514 {
515 struct vtblk_softc *sc;
516
517 sc = device_get_softc(dev);
518
519 /*
520 * Create disk after attach as VIRTIO_BLK_T_GET_ID can only be
521 * processed after the device acknowledged
522 * VIRTIO_CONFIG_STATUS_DRIVER_OK.
523 */
524 vtblk_create_disk(sc);
525 return (0);
526 }
527
528 static int
vtblk_config_change(device_t dev)529 vtblk_config_change(device_t dev)
530 {
531 struct vtblk_softc *sc;
532 struct virtio_blk_config blkcfg;
533 uint64_t capacity;
534
535 sc = device_get_softc(dev);
536
537 vtblk_read_config(sc, &blkcfg);
538
539 /* Capacity is always in 512-byte units. */
540 capacity = blkcfg.capacity * VTBLK_BSIZE;
541
542 if (sc->vtblk_disk->d_mediasize != capacity)
543 vtblk_resize_disk(sc, capacity);
544
545 return (0);
546 }
547
548 static int
vtblk_open(struct disk * dp)549 vtblk_open(struct disk *dp)
550 {
551 struct vtblk_softc *sc;
552
553 if ((sc = dp->d_drv1) == NULL)
554 return (ENXIO);
555
556 return (sc->vtblk_flags & VTBLK_FLAG_DETACH ? ENXIO : 0);
557 }
558
559 static int
vtblk_close(struct disk * dp)560 vtblk_close(struct disk *dp)
561 {
562 struct vtblk_softc *sc;
563
564 if ((sc = dp->d_drv1) == NULL)
565 return (ENXIO);
566
567 return (0);
568 }
569
570 static int
vtblk_ioctl(struct disk * dp,u_long cmd,void * addr,int flag,struct thread * td)571 vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
572 struct thread *td)
573 {
574 struct vtblk_softc *sc;
575
576 if ((sc = dp->d_drv1) == NULL)
577 return (ENXIO);
578
579 return (ENOTTY);
580 }
581
582 static int
vtblk_dump(void * arg,void * virtual,off_t offset,size_t length)583 vtblk_dump(void *arg, void *virtual, off_t offset, size_t length)
584 {
585 struct disk *dp;
586 struct vtblk_softc *sc;
587 int error;
588
589 dp = arg;
590 error = 0;
591
592 if ((sc = dp->d_drv1) == NULL)
593 return (ENXIO);
594
595 VTBLK_LOCK(sc);
596
597 vtblk_dump_quiesce(sc);
598
599 if (length > 0)
600 error = vtblk_dump_write(sc, virtual, offset, length);
601 if (error || (virtual == NULL && offset == 0))
602 vtblk_dump_complete(sc);
603
604 VTBLK_UNLOCK(sc);
605
606 return (error);
607 }
608
609 static void
vtblk_strategy(struct bio * bp)610 vtblk_strategy(struct bio *bp)
611 {
612 struct vtblk_softc *sc;
613
614 if ((sc = bp->bio_disk->d_drv1) == NULL) {
615 vtblk_bio_done(NULL, bp, EINVAL);
616 return;
617 }
618
619 if ((bp->bio_cmd != BIO_READ) && (bp->bio_cmd != BIO_WRITE) &&
620 (bp->bio_cmd != BIO_FLUSH) && (bp->bio_cmd != BIO_DELETE)) {
621 vtblk_bio_done(sc, bp, EOPNOTSUPP);
622 return;
623 }
624
625 VTBLK_LOCK(sc);
626
627 if (sc->vtblk_flags & VTBLK_FLAG_DETACH) {
628 VTBLK_UNLOCK(sc);
629 vtblk_bio_done(sc, bp, ENXIO);
630 return;
631 }
632
633 bioq_insert_tail(&sc->vtblk_bioq, bp);
634 vtblk_startio(sc);
635
636 VTBLK_UNLOCK(sc);
637 }
638
639 static int
vtblk_negotiate_features(struct vtblk_softc * sc)640 vtblk_negotiate_features(struct vtblk_softc *sc)
641 {
642 device_t dev;
643 uint64_t features;
644
645 dev = sc->vtblk_dev;
646 features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES :
647 VTBLK_LEGACY_FEATURES;
648
649 sc->vtblk_features = virtio_negotiate_features(dev, features);
650 return (virtio_finalize_features(dev));
651 }
652
653 static int
vtblk_setup_features(struct vtblk_softc * sc)654 vtblk_setup_features(struct vtblk_softc *sc)
655 {
656 device_t dev;
657 int error;
658
659 dev = sc->vtblk_dev;
660
661 error = vtblk_negotiate_features(sc);
662 if (error)
663 return (error);
664
665 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
666 sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
667 if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE))
668 sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG;
669
670 /* Legacy. */
671 if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER))
672 sc->vtblk_flags |= VTBLK_FLAG_BARRIER;
673
674 return (0);
675 }
676
677 static int
vtblk_maximum_segments(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)678 vtblk_maximum_segments(struct vtblk_softc *sc,
679 struct virtio_blk_config *blkcfg)
680 {
681 device_t dev;
682 int nsegs;
683
684 dev = sc->vtblk_dev;
685 nsegs = VTBLK_MIN_SEGMENTS;
686
687 if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
688 nsegs += MIN(blkcfg->seg_max, maxphys / PAGE_SIZE + 1);
689 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
690 nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
691 } else
692 nsegs += 1;
693
694 return (nsegs);
695 }
696
697 static int
vtblk_alloc_virtqueue(struct vtblk_softc * sc)698 vtblk_alloc_virtqueue(struct vtblk_softc *sc)
699 {
700 device_t dev;
701 struct vq_alloc_info vq_info;
702 int indir_segs;
703
704 dev = sc->vtblk_dev;
705
706 indir_segs = 0;
707 if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
708 indir_segs = sc->vtblk_max_nsegs;
709 VQ_ALLOC_INFO_INIT(&vq_info, indir_segs,
710 vtblk_vq_intr, sc, &sc->vtblk_vq,
711 "%s request", device_get_nameunit(dev));
712
713 return (virtio_alloc_virtqueues(dev, 1, &vq_info));
714 }
715
716 static void
vtblk_resize_disk(struct vtblk_softc * sc,uint64_t new_capacity)717 vtblk_resize_disk(struct vtblk_softc *sc, uint64_t new_capacity)
718 {
719 device_t dev;
720 struct disk *dp;
721 int error;
722
723 dev = sc->vtblk_dev;
724 dp = sc->vtblk_disk;
725
726 dp->d_mediasize = new_capacity;
727 if (bootverbose) {
728 device_printf(dev, "resized to %juMB (%ju %u byte sectors)\n",
729 (uintmax_t) dp->d_mediasize >> 20,
730 (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
731 dp->d_sectorsize);
732 }
733
734 error = disk_resize(dp, M_NOWAIT);
735 if (error) {
736 device_printf(dev,
737 "disk_resize(9) failed, error: %d\n", error);
738 }
739 }
740
741 static void
vtblk_alloc_disk(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)742 vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
743 {
744 device_t dev;
745 struct disk *dp;
746
747 dev = sc->vtblk_dev;
748
749 sc->vtblk_disk = dp = disk_alloc();
750 dp->d_open = vtblk_open;
751 dp->d_close = vtblk_close;
752 dp->d_ioctl = vtblk_ioctl;
753 dp->d_strategy = vtblk_strategy;
754 dp->d_name = VTBLK_DISK_NAME;
755 dp->d_unit = device_get_unit(dev);
756 dp->d_drv1 = sc;
757 dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION;
758 dp->d_hba_vendor = virtio_get_vendor(dev);
759 dp->d_hba_device = virtio_get_device(dev);
760 dp->d_hba_subvendor = virtio_get_subvendor(dev);
761 dp->d_hba_subdevice = virtio_get_subdevice(dev);
762 strlcpy(dp->d_attachment, device_get_nameunit(dev),
763 sizeof(dp->d_attachment));
764
765 if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
766 dp->d_flags |= DISKFLAG_WRITE_PROTECT;
767 else {
768 if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
769 dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
770 dp->d_dump = vtblk_dump;
771 }
772
773 /* Capacity is always in 512-byte units. */
774 dp->d_mediasize = blkcfg->capacity * VTBLK_BSIZE;
775
776 if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
777 dp->d_sectorsize = blkcfg->blk_size;
778 else
779 dp->d_sectorsize = VTBLK_BSIZE;
780
781 /*
782 * The VirtIO maximum I/O size is given in terms of segments.
783 * However, FreeBSD limits I/O size by logical buffer size, not
784 * by physically contiguous pages. Therefore, we have to assume
785 * no pages are contiguous. This may impose an artificially low
786 * maximum I/O size. But in practice, since QEMU advertises 128
787 * segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
788 * which is typically greater than maxphys. Eventually we should
789 * just advertise maxphys and split buffers that are too big.
790 *
791 * If we're not asking busdma to align data to page boundaries, the
792 * maximum I/O size is reduced by PAGE_SIZE in order to accommodate
793 * unaligned I/Os.
794 */
795 dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS) *
796 PAGE_SIZE;
797 if ((sc->vtblk_flags & VTBLK_FLAG_BUSDMA_ALIGN) == 0)
798 dp->d_maxsize -= PAGE_SIZE;
799
800 if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
801 dp->d_fwsectors = blkcfg->geometry.sectors;
802 dp->d_fwheads = blkcfg->geometry.heads;
803 }
804
805 if (virtio_with_feature(dev, VIRTIO_BLK_F_TOPOLOGY) &&
806 blkcfg->topology.physical_block_exp > 0) {
807 dp->d_stripesize = dp->d_sectorsize *
808 (1 << blkcfg->topology.physical_block_exp);
809 dp->d_stripeoffset = (dp->d_stripesize -
810 blkcfg->topology.alignment_offset * dp->d_sectorsize) %
811 dp->d_stripesize;
812 }
813
814 if (virtio_with_feature(dev, VIRTIO_BLK_F_DISCARD)) {
815 dp->d_flags |= DISKFLAG_CANDELETE;
816 dp->d_delmaxsize = blkcfg->max_discard_sectors * VTBLK_BSIZE;
817 }
818
819 if (vtblk_write_cache_enabled(sc, blkcfg) != 0)
820 sc->vtblk_write_cache = VTBLK_CACHE_WRITEBACK;
821 else
822 sc->vtblk_write_cache = VTBLK_CACHE_WRITETHROUGH;
823 }
824
825 static void
vtblk_create_disk(struct vtblk_softc * sc)826 vtblk_create_disk(struct vtblk_softc *sc)
827 {
828 struct disk *dp;
829
830 dp = sc->vtblk_disk;
831
832 vtblk_ident(sc);
833
834 device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
835 (uintmax_t) dp->d_mediasize >> 20,
836 (uintmax_t) dp->d_mediasize / dp->d_sectorsize,
837 dp->d_sectorsize);
838
839 disk_create(dp, DISK_VERSION);
840 }
841
842 static int
vtblk_request_prealloc(struct vtblk_softc * sc)843 vtblk_request_prealloc(struct vtblk_softc *sc)
844 {
845 struct vtblk_request *req;
846 int i, nreqs;
847
848 nreqs = virtqueue_size(sc->vtblk_vq);
849
850 /*
851 * Preallocate sufficient requests to keep the virtqueue full. Each
852 * request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
853 * the number allocated when indirect descriptors are not available.
854 */
855 if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
856 nreqs /= VTBLK_MIN_SEGMENTS;
857
858 for (i = 0; i < nreqs; i++) {
859 req = malloc(sizeof(struct vtblk_request), M_DEVBUF, M_NOWAIT);
860 if (req == NULL)
861 return (ENOMEM);
862
863 req->vbr_sc = sc;
864 if (bus_dmamap_create(sc->vtblk_dmat, 0, &req->vbr_mapp)) {
865 free(req, M_DEVBUF);
866 return (ENOMEM);
867 }
868
869 MPASS(sglist_count(&req->vbr_hdr, sizeof(req->vbr_hdr)) == 1);
870 MPASS(sglist_count(&req->vbr_ack, sizeof(req->vbr_ack)) == 1);
871
872 sc->vtblk_request_count++;
873 vtblk_request_enqueue(sc, req);
874 }
875
876 return (0);
877 }
878
879 static void
vtblk_request_free(struct vtblk_softc * sc)880 vtblk_request_free(struct vtblk_softc *sc)
881 {
882 struct vtblk_request *req;
883
884 MPASS(TAILQ_EMPTY(&sc->vtblk_req_ready));
885
886 while ((req = vtblk_request_dequeue(sc)) != NULL) {
887 sc->vtblk_request_count--;
888 bus_dmamap_destroy(sc->vtblk_dmat, req->vbr_mapp);
889 free(req, M_DEVBUF);
890 }
891
892 KASSERT(sc->vtblk_request_count == 0,
893 ("%s: leaked %d requests", __func__, sc->vtblk_request_count));
894 }
895
896 static struct vtblk_request *
vtblk_request_dequeue(struct vtblk_softc * sc)897 vtblk_request_dequeue(struct vtblk_softc *sc)
898 {
899 struct vtblk_request *req;
900
901 req = TAILQ_FIRST(&sc->vtblk_req_free);
902 if (req != NULL) {
903 TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
904 bzero(&req->vbr_hdr, sizeof(struct vtblk_request) -
905 offsetof(struct vtblk_request, vbr_hdr));
906 }
907
908 return (req);
909 }
910
911 static void
vtblk_request_enqueue(struct vtblk_softc * sc,struct vtblk_request * req)912 vtblk_request_enqueue(struct vtblk_softc *sc, struct vtblk_request *req)
913 {
914
915 TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
916 }
917
918 static struct vtblk_request *
vtblk_request_next_ready(struct vtblk_softc * sc)919 vtblk_request_next_ready(struct vtblk_softc *sc)
920 {
921 struct vtblk_request *req;
922
923 req = TAILQ_FIRST(&sc->vtblk_req_ready);
924 if (req != NULL)
925 TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
926
927 return (req);
928 }
929
930 static void
vtblk_request_requeue_ready(struct vtblk_softc * sc,struct vtblk_request * req)931 vtblk_request_requeue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
932 {
933
934 /* NOTE: Currently, there will be at most one request in the queue. */
935 TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
936 }
937
938 static struct vtblk_request *
vtblk_request_next(struct vtblk_softc * sc)939 vtblk_request_next(struct vtblk_softc *sc)
940 {
941 struct vtblk_request *req;
942
943 req = vtblk_request_next_ready(sc);
944 if (req != NULL)
945 return (req);
946
947 return (vtblk_request_bio(sc));
948 }
949
950 static struct vtblk_request *
vtblk_request_bio(struct vtblk_softc * sc)951 vtblk_request_bio(struct vtblk_softc *sc)
952 {
953 struct bio_queue_head *bioq;
954 struct vtblk_request *req;
955 struct bio *bp;
956
957 bioq = &sc->vtblk_bioq;
958
959 if (bioq_first(bioq) == NULL)
960 return (NULL);
961
962 req = vtblk_request_dequeue(sc);
963 if (req == NULL)
964 return (NULL);
965
966 bp = bioq_takefirst(bioq);
967 req->vbr_bp = bp;
968 req->vbr_ack = -1;
969 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
970
971 switch (bp->bio_cmd) {
972 case BIO_FLUSH:
973 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
974 req->vbr_hdr.sector = 0;
975 break;
976 case BIO_READ:
977 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN);
978 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
979 break;
980 case BIO_WRITE:
981 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
982 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
983 break;
984 case BIO_DELETE:
985 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_DISCARD);
986 req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
987 break;
988 default:
989 panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd);
990 }
991
992 if (bp->bio_flags & BIO_ORDERED)
993 req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER);
994
995 return (req);
996 }
997
998 static int
vtblk_request_execute(struct vtblk_request * req,int flags)999 vtblk_request_execute(struct vtblk_request *req, int flags)
1000 {
1001 struct vtblk_softc *sc = req->vbr_sc;
1002 struct bio *bp = req->vbr_bp;
1003 int error = 0;
1004
1005 /*
1006 * Call via bus_dmamap_load_bio or directly depending on whether we
1007 * have a buffer we need to map. If we don't have a busdma map,
1008 * try to perform the I/O directly and hope that it works (this will
1009 * happen when dumping).
1010 */
1011 if ((req->vbr_mapp != NULL) &&
1012 (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) {
1013 error = bus_dmamap_load_bio(sc->vtblk_dmat, req->vbr_mapp,
1014 req->vbr_bp, vtblk_request_execute_cb, req, flags);
1015 if (error == EINPROGRESS) {
1016 req->vbr_busdma_wait = 1;
1017 sc->vtblk_flags |= VTBLK_FLAG_BUSDMA_WAIT;
1018 }
1019 } else {
1020 vtblk_request_execute_cb(req, NULL, 0, 0);
1021 }
1022
1023 return (error ? error : req->vbr_error);
1024 }
1025
1026 static void
vtblk_request_execute_cb(void * callback_arg,bus_dma_segment_t * segs,int nseg,int error)1027 vtblk_request_execute_cb(void * callback_arg, bus_dma_segment_t * segs,
1028 int nseg, int error)
1029 {
1030 struct vtblk_request *req;
1031 struct vtblk_softc *sc;
1032 struct virtqueue *vq;
1033 struct sglist *sg;
1034 struct bio *bp;
1035 int ordered, readable, writable, i;
1036
1037 req = (struct vtblk_request *)callback_arg;
1038 sc = req->vbr_sc;
1039 vq = sc->vtblk_vq;
1040 sg = sc->vtblk_sglist;
1041 bp = req->vbr_bp;
1042 ordered = 0;
1043 writable = 0;
1044
1045 /*
1046 * If we paused request queueing while we waited for busdma to call us
1047 * asynchronously, unpause it now; this request made it through so we
1048 * don't need to worry about others getting ahead of us. (Note that we
1049 * hold the device mutex so nothing will happen until after we return
1050 * anyway.)
1051 */
1052 if (req->vbr_busdma_wait)
1053 sc->vtblk_flags &= ~VTBLK_FLAG_BUSDMA_WAIT;
1054
1055 /* Fail on errors from busdma. */
1056 if (error)
1057 goto out1;
1058
1059 /*
1060 * Some hosts (such as bhyve) do not implement the barrier feature,
1061 * so we emulate it in the driver by allowing the barrier request
1062 * to be the only one in flight.
1063 */
1064 if ((sc->vtblk_flags & VTBLK_FLAG_BARRIER) == 0) {
1065 if (sc->vtblk_req_ordered != NULL) {
1066 error = EBUSY;
1067 goto out;
1068 }
1069 if (bp->bio_flags & BIO_ORDERED) {
1070 if (!virtqueue_empty(vq)) {
1071 error = EBUSY;
1072 goto out;
1073 }
1074 ordered = 1;
1075 req->vbr_hdr.type &= vtblk_gtoh32(sc,
1076 ~VIRTIO_BLK_T_BARRIER);
1077 }
1078 }
1079
1080 sglist_reset(sg);
1081 sglist_append(sg, &req->vbr_hdr, sizeof(struct virtio_blk_outhdr));
1082
1083 if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
1084 /*
1085 * We cast bus_addr_t to vm_paddr_t here; since we skip the
1086 * iommu mapping (see vtblk_attach) this should be safe.
1087 */
1088 for (i = 0; i < nseg; i++) {
1089 error = sglist_append_phys(sg,
1090 (vm_paddr_t)segs[i].ds_addr, segs[i].ds_len);
1091 if (error || sg->sg_nseg == sg->sg_maxseg) {
1092 panic("%s: bio %p data buffer too big %d",
1093 __func__, bp, error);
1094 }
1095 }
1096
1097 /* Special handling for dump, which bypasses busdma. */
1098 if (req->vbr_mapp == NULL) {
1099 error = sglist_append_bio(sg, bp);
1100 if (error || sg->sg_nseg == sg->sg_maxseg) {
1101 panic("%s: bio %p data buffer too big %d",
1102 __func__, bp, error);
1103 }
1104 }
1105
1106 /* BIO_READ means the host writes into our buffer. */
1107 if (bp->bio_cmd == BIO_READ)
1108 writable = sg->sg_nseg - 1;
1109 } else if (bp->bio_cmd == BIO_DELETE) {
1110 struct virtio_blk_discard_write_zeroes *discard;
1111
1112 discard = malloc(sizeof(*discard), M_DEVBUF, M_NOWAIT | M_ZERO);
1113 if (discard == NULL) {
1114 error = ENOMEM;
1115 goto out;
1116 }
1117
1118 bp->bio_driver1 = discard;
1119 discard->sector = vtblk_gtoh64(sc, bp->bio_offset / VTBLK_BSIZE);
1120 discard->num_sectors = vtblk_gtoh32(sc, bp->bio_bcount / VTBLK_BSIZE);
1121 error = sglist_append(sg, discard, sizeof(*discard));
1122 if (error || sg->sg_nseg == sg->sg_maxseg) {
1123 panic("%s: bio %p data buffer too big %d",
1124 __func__, bp, error);
1125 }
1126 }
1127
1128 writable++;
1129 sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
1130 readable = sg->sg_nseg - writable;
1131
1132 if (req->vbr_mapp != NULL) {
1133 switch (bp->bio_cmd) {
1134 case BIO_READ:
1135 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1136 BUS_DMASYNC_PREREAD);
1137 break;
1138 case BIO_WRITE:
1139 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1140 BUS_DMASYNC_PREWRITE);
1141 break;
1142 }
1143 }
1144
1145 error = virtqueue_enqueue(vq, req, sg, readable, writable);
1146 if (error == 0 && ordered)
1147 sc->vtblk_req_ordered = req;
1148
1149 /*
1150 * If we were called asynchronously, we need to notify the queue that
1151 * we've added a new request, since the notification from startio was
1152 * performed already.
1153 */
1154 if (error == 0 && req->vbr_busdma_wait)
1155 virtqueue_notify(vq);
1156
1157 out:
1158 if (error && (req->vbr_mapp != NULL))
1159 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1160 out1:
1161 if (error && req->vbr_requeue_on_error)
1162 vtblk_request_requeue_ready(sc, req);
1163 req->vbr_error = error;
1164 }
1165
1166 static int
vtblk_request_error(struct vtblk_request * req)1167 vtblk_request_error(struct vtblk_request *req)
1168 {
1169 int error;
1170
1171 switch (req->vbr_ack) {
1172 case VIRTIO_BLK_S_OK:
1173 error = 0;
1174 break;
1175 case VIRTIO_BLK_S_UNSUPP:
1176 error = ENOTSUP;
1177 break;
1178 default:
1179 error = EIO;
1180 break;
1181 }
1182
1183 return (error);
1184 }
1185
1186 static struct bio *
vtblk_queue_complete_one(struct vtblk_softc * sc,struct vtblk_request * req)1187 vtblk_queue_complete_one(struct vtblk_softc *sc, struct vtblk_request *req)
1188 {
1189 struct bio *bp;
1190
1191 if (sc->vtblk_req_ordered != NULL) {
1192 MPASS(sc->vtblk_req_ordered == req);
1193 sc->vtblk_req_ordered = NULL;
1194 }
1195
1196 bp = req->vbr_bp;
1197 if (req->vbr_mapp != NULL) {
1198 switch (bp->bio_cmd) {
1199 case BIO_READ:
1200 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1201 BUS_DMASYNC_POSTREAD);
1202 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1203 break;
1204 case BIO_WRITE:
1205 bus_dmamap_sync(sc->vtblk_dmat, req->vbr_mapp,
1206 BUS_DMASYNC_POSTWRITE);
1207 bus_dmamap_unload(sc->vtblk_dmat, req->vbr_mapp);
1208 break;
1209 }
1210 }
1211 bp->bio_error = vtblk_request_error(req);
1212 return (bp);
1213 }
1214
1215 static void
vtblk_queue_completed(struct vtblk_softc * sc,struct bio_queue * queue)1216 vtblk_queue_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1217 {
1218 struct vtblk_request *req;
1219 struct bio *bp;
1220
1221 while ((req = virtqueue_dequeue(sc->vtblk_vq, NULL)) != NULL) {
1222 bp = vtblk_queue_complete_one(sc, req);
1223
1224 TAILQ_INSERT_TAIL(queue, bp, bio_queue);
1225 vtblk_request_enqueue(sc, req);
1226 }
1227 }
1228
1229 static void
vtblk_done_completed(struct vtblk_softc * sc,struct bio_queue * queue)1230 vtblk_done_completed(struct vtblk_softc *sc, struct bio_queue *queue)
1231 {
1232 struct bio *bp, *tmp;
1233
1234 TAILQ_FOREACH_SAFE(bp, queue, bio_queue, tmp) {
1235 if (bp->bio_error != 0)
1236 disk_err(bp, "hard error", -1, 1);
1237 vtblk_bio_done(sc, bp, bp->bio_error);
1238 }
1239 }
1240
1241 static void
vtblk_drain_vq(struct vtblk_softc * sc)1242 vtblk_drain_vq(struct vtblk_softc *sc)
1243 {
1244 struct virtqueue *vq;
1245 struct vtblk_request *req;
1246 int last;
1247
1248 vq = sc->vtblk_vq;
1249 last = 0;
1250
1251 while ((req = virtqueue_drain(vq, &last)) != NULL) {
1252 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1253 vtblk_request_enqueue(sc, req);
1254 }
1255
1256 sc->vtblk_req_ordered = NULL;
1257 KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
1258 }
1259
1260 static void
vtblk_drain(struct vtblk_softc * sc)1261 vtblk_drain(struct vtblk_softc *sc)
1262 {
1263 struct bio_queue_head *bioq;
1264 struct vtblk_request *req;
1265 struct bio *bp;
1266
1267 bioq = &sc->vtblk_bioq;
1268
1269 if (sc->vtblk_vq != NULL) {
1270 struct bio_queue queue;
1271
1272 TAILQ_INIT(&queue);
1273 vtblk_queue_completed(sc, &queue);
1274 vtblk_done_completed(sc, &queue);
1275
1276 vtblk_drain_vq(sc);
1277 }
1278
1279 while ((req = vtblk_request_next_ready(sc)) != NULL) {
1280 vtblk_bio_done(sc, req->vbr_bp, ENXIO);
1281 vtblk_request_enqueue(sc, req);
1282 }
1283
1284 while (bioq_first(bioq) != NULL) {
1285 bp = bioq_takefirst(bioq);
1286 vtblk_bio_done(sc, bp, ENXIO);
1287 }
1288
1289 vtblk_request_free(sc);
1290 }
1291
1292 static void
vtblk_startio(struct vtblk_softc * sc)1293 vtblk_startio(struct vtblk_softc *sc)
1294 {
1295 struct virtqueue *vq;
1296 struct vtblk_request *req;
1297 int enq;
1298
1299 VTBLK_LOCK_ASSERT(sc);
1300 vq = sc->vtblk_vq;
1301 enq = 0;
1302
1303 if (sc->vtblk_flags & (VTBLK_FLAG_SUSPEND | VTBLK_FLAG_BUSDMA_WAIT))
1304 return;
1305
1306 while (!virtqueue_full(vq)) {
1307 req = vtblk_request_next(sc);
1308 if (req == NULL)
1309 break;
1310
1311 req->vbr_requeue_on_error = 1;
1312 if (vtblk_request_execute(req, BUS_DMA_WAITOK))
1313 break;
1314
1315 enq++;
1316 }
1317
1318 if (enq > 0)
1319 virtqueue_notify(vq);
1320 }
1321
1322 static void
vtblk_bio_done(struct vtblk_softc * sc,struct bio * bp,int error)1323 vtblk_bio_done(struct vtblk_softc *sc, struct bio *bp, int error)
1324 {
1325
1326 /* Because of GEOM direct dispatch, we cannot hold any locks. */
1327 if (sc != NULL)
1328 VTBLK_LOCK_ASSERT_NOTOWNED(sc);
1329
1330 if (error) {
1331 bp->bio_resid = bp->bio_bcount;
1332 bp->bio_error = error;
1333 bp->bio_flags |= BIO_ERROR;
1334 } else {
1335 kmsan_mark_bio(bp, KMSAN_STATE_INITED);
1336 }
1337
1338 if (bp->bio_driver1 != NULL) {
1339 free(bp->bio_driver1, M_DEVBUF);
1340 bp->bio_driver1 = NULL;
1341 }
1342
1343 biodone(bp);
1344 }
1345
1346 #define VTBLK_GET_CONFIG(_dev, _feature, _field, _cfg) \
1347 if (virtio_with_feature(_dev, _feature)) { \
1348 virtio_read_device_config(_dev, \
1349 offsetof(struct virtio_blk_config, _field), \
1350 &(_cfg)->_field, sizeof((_cfg)->_field)); \
1351 }
1352
1353 static void
vtblk_read_config(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)1354 vtblk_read_config(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
1355 {
1356 device_t dev;
1357
1358 dev = sc->vtblk_dev;
1359
1360 bzero(blkcfg, sizeof(struct virtio_blk_config));
1361
1362 /* The capacity is always available. */
1363 virtio_read_device_config(dev, offsetof(struct virtio_blk_config,
1364 capacity), &blkcfg->capacity, sizeof(blkcfg->capacity));
1365
1366 /* Read the configuration if the feature was negotiated. */
1367 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg);
1368 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg);
1369 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1370 geometry.cylinders, blkcfg);
1371 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1372 geometry.heads, blkcfg);
1373 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY,
1374 geometry.sectors, blkcfg);
1375 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg);
1376 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1377 topology.physical_block_exp, blkcfg);
1378 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1379 topology.alignment_offset, blkcfg);
1380 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1381 topology.min_io_size, blkcfg);
1382 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY,
1383 topology.opt_io_size, blkcfg);
1384 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg);
1385 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_sectors,
1386 blkcfg);
1387 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, max_discard_seg, blkcfg);
1388 VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_DISCARD, discard_sector_alignment,
1389 blkcfg);
1390 }
1391
1392 #undef VTBLK_GET_CONFIG
1393
1394 static void
vtblk_ident(struct vtblk_softc * sc)1395 vtblk_ident(struct vtblk_softc *sc)
1396 {
1397 struct bio buf;
1398 struct disk *dp;
1399 struct vtblk_request *req;
1400 int len, error;
1401
1402 dp = sc->vtblk_disk;
1403 len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
1404
1405 if (vtblk_tunable_int(sc, "no_ident", vtblk_no_ident) != 0)
1406 return;
1407
1408 req = vtblk_request_dequeue(sc);
1409 if (req == NULL)
1410 return;
1411
1412 req->vbr_ack = -1;
1413 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID);
1414 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1415 req->vbr_hdr.sector = 0;
1416
1417 req->vbr_bp = &buf;
1418 g_reset_bio(&buf);
1419
1420 buf.bio_cmd = BIO_READ;
1421 buf.bio_data = dp->d_ident;
1422 buf.bio_bcount = len;
1423
1424 VTBLK_LOCK(sc);
1425 error = vtblk_poll_request(sc, req);
1426 VTBLK_UNLOCK(sc);
1427
1428 if (error) {
1429 device_printf(sc->vtblk_dev,
1430 "error getting device identifier: %d\n", error);
1431 }
1432 }
1433
1434 static int
vtblk_poll_request(struct vtblk_softc * sc,struct vtblk_request * req)1435 vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
1436 {
1437 struct vtblk_request *req1 __diagused;
1438 struct virtqueue *vq;
1439 struct bio *bp;
1440 int error;
1441
1442 vq = sc->vtblk_vq;
1443
1444 if (!virtqueue_empty(vq))
1445 return (EBUSY);
1446
1447 error = vtblk_request_execute(req, BUS_DMA_NOWAIT);
1448 if (error)
1449 return (error);
1450
1451 virtqueue_notify(vq);
1452 req1 = virtqueue_poll(vq, NULL);
1453 KASSERT(req == req1,
1454 ("%s: polling completed %p not %p", __func__, req1, req));
1455
1456 bp = vtblk_queue_complete_one(sc, req);
1457 error = bp->bio_error;
1458 if (error && bootverbose) {
1459 device_printf(sc->vtblk_dev,
1460 "%s: IO error: %d\n", __func__, error);
1461 }
1462 if (req != &sc->vtblk_dump_request)
1463 vtblk_request_enqueue(sc, req);
1464
1465 return (error);
1466 }
1467
1468 static int
vtblk_quiesce(struct vtblk_softc * sc)1469 vtblk_quiesce(struct vtblk_softc *sc)
1470 {
1471 int error;
1472
1473 VTBLK_LOCK_ASSERT(sc);
1474 error = 0;
1475
1476 while (!virtqueue_empty(sc->vtblk_vq)) {
1477 if (mtx_sleep(&sc->vtblk_vq, VTBLK_MTX(sc), PRIBIO, "vtblkq",
1478 VTBLK_QUIESCE_TIMEOUT) == EWOULDBLOCK) {
1479 error = EBUSY;
1480 break;
1481 }
1482 }
1483
1484 return (error);
1485 }
1486
1487 static void
vtblk_vq_intr(void * xsc)1488 vtblk_vq_intr(void *xsc)
1489 {
1490 struct vtblk_softc *sc;
1491 struct virtqueue *vq;
1492 struct bio_queue queue;
1493
1494 sc = xsc;
1495 vq = sc->vtblk_vq;
1496 TAILQ_INIT(&queue);
1497
1498 VTBLK_LOCK(sc);
1499
1500 again:
1501 if (sc->vtblk_flags & VTBLK_FLAG_DETACH)
1502 goto out;
1503
1504 vtblk_queue_completed(sc, &queue);
1505 vtblk_startio(sc);
1506
1507 if (virtqueue_enable_intr(vq) != 0) {
1508 virtqueue_disable_intr(vq);
1509 goto again;
1510 }
1511
1512 if (sc->vtblk_flags & VTBLK_FLAG_SUSPEND)
1513 wakeup(&sc->vtblk_vq);
1514
1515 out:
1516 VTBLK_UNLOCK(sc);
1517 vtblk_done_completed(sc, &queue);
1518 }
1519
1520 static void
vtblk_stop(struct vtblk_softc * sc)1521 vtblk_stop(struct vtblk_softc *sc)
1522 {
1523
1524 virtqueue_disable_intr(sc->vtblk_vq);
1525 virtio_stop(sc->vtblk_dev);
1526 }
1527
1528 static void
vtblk_dump_quiesce(struct vtblk_softc * sc)1529 vtblk_dump_quiesce(struct vtblk_softc *sc)
1530 {
1531
1532 /*
1533 * Spin here until all the requests in-flight at the time of the
1534 * dump are completed and queued. The queued requests will be
1535 * biodone'd once the dump is finished.
1536 */
1537 while (!virtqueue_empty(sc->vtblk_vq))
1538 vtblk_queue_completed(sc, &sc->vtblk_dump_queue);
1539 }
1540
1541 static int
vtblk_dump_write(struct vtblk_softc * sc,void * virtual,off_t offset,size_t length)1542 vtblk_dump_write(struct vtblk_softc *sc, void *virtual, off_t offset,
1543 size_t length)
1544 {
1545 struct bio buf;
1546 struct vtblk_request *req;
1547
1548 req = &sc->vtblk_dump_request;
1549 req->vbr_sc = sc;
1550 req->vbr_ack = -1;
1551 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT);
1552 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1553 req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / VTBLK_BSIZE);
1554
1555 req->vbr_bp = &buf;
1556 g_reset_bio(&buf);
1557
1558 buf.bio_cmd = BIO_WRITE;
1559 buf.bio_data = virtual;
1560 buf.bio_bcount = length;
1561
1562 return (vtblk_poll_request(sc, req));
1563 }
1564
1565 static int
vtblk_dump_flush(struct vtblk_softc * sc)1566 vtblk_dump_flush(struct vtblk_softc *sc)
1567 {
1568 struct bio buf;
1569 struct vtblk_request *req;
1570
1571 req = &sc->vtblk_dump_request;
1572 req->vbr_sc = sc;
1573 req->vbr_ack = -1;
1574 req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH);
1575 req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1);
1576 req->vbr_hdr.sector = 0;
1577
1578 req->vbr_bp = &buf;
1579 g_reset_bio(&buf);
1580
1581 buf.bio_cmd = BIO_FLUSH;
1582
1583 return (vtblk_poll_request(sc, req));
1584 }
1585
1586 static void
vtblk_dump_complete(struct vtblk_softc * sc)1587 vtblk_dump_complete(struct vtblk_softc *sc)
1588 {
1589
1590 vtblk_dump_flush(sc);
1591
1592 VTBLK_UNLOCK(sc);
1593 vtblk_done_completed(sc, &sc->vtblk_dump_queue);
1594 VTBLK_LOCK(sc);
1595 }
1596
1597 static void
vtblk_set_write_cache(struct vtblk_softc * sc,int wc)1598 vtblk_set_write_cache(struct vtblk_softc *sc, int wc)
1599 {
1600
1601 /* Set either writeback (1) or writethrough (0) mode. */
1602 virtio_write_dev_config_1(sc->vtblk_dev,
1603 offsetof(struct virtio_blk_config, wce), wc);
1604 }
1605
1606 static int
vtblk_write_cache_enabled(struct vtblk_softc * sc,struct virtio_blk_config * blkcfg)1607 vtblk_write_cache_enabled(struct vtblk_softc *sc,
1608 struct virtio_blk_config *blkcfg)
1609 {
1610 int wc;
1611
1612 if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) {
1613 wc = vtblk_tunable_int(sc, "writecache_mode",
1614 vtblk_writecache_mode);
1615 if (wc >= 0 && wc < VTBLK_CACHE_MAX)
1616 vtblk_set_write_cache(sc, wc);
1617 else
1618 wc = blkcfg->wce;
1619 } else
1620 wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH);
1621
1622 return (wc);
1623 }
1624
1625 static int
vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)1626 vtblk_write_cache_sysctl(SYSCTL_HANDLER_ARGS)
1627 {
1628 struct vtblk_softc *sc;
1629 int wc, error;
1630
1631 sc = oidp->oid_arg1;
1632 wc = sc->vtblk_write_cache;
1633
1634 error = sysctl_handle_int(oidp, &wc, 0, req);
1635 if (error || req->newptr == NULL)
1636 return (error);
1637 if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0)
1638 return (EPERM);
1639 if (wc < 0 || wc >= VTBLK_CACHE_MAX)
1640 return (EINVAL);
1641
1642 VTBLK_LOCK(sc);
1643 sc->vtblk_write_cache = wc;
1644 vtblk_set_write_cache(sc, sc->vtblk_write_cache);
1645 VTBLK_UNLOCK(sc);
1646
1647 return (0);
1648 }
1649
1650 static void
vtblk_setup_sysctl(struct vtblk_softc * sc)1651 vtblk_setup_sysctl(struct vtblk_softc *sc)
1652 {
1653 device_t dev;
1654 struct sysctl_ctx_list *ctx;
1655 struct sysctl_oid *tree;
1656 struct sysctl_oid_list *child;
1657
1658 dev = sc->vtblk_dev;
1659 ctx = device_get_sysctl_ctx(dev);
1660 tree = device_get_sysctl_tree(dev);
1661 child = SYSCTL_CHILDREN(tree);
1662
1663 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "writecache_mode",
1664 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1665 vtblk_write_cache_sysctl, "I",
1666 "Write cache mode (writethrough (0) or writeback (1))");
1667 }
1668
1669 static int
vtblk_tunable_int(struct vtblk_softc * sc,const char * knob,int def)1670 vtblk_tunable_int(struct vtblk_softc *sc, const char *knob, int def)
1671 {
1672 char path[64];
1673
1674 snprintf(path, sizeof(path),
1675 "hw.vtblk.%d.%s", device_get_unit(sc->vtblk_dev), knob);
1676 TUNABLE_INT_FETCH(path, &def);
1677
1678 return (def);
1679 }
1680