xref: /illumos-gate/usr/src/cmd/bhyve/common/pci_virtio_block.c (revision 5c4a5fe16715fb423db76577a6883b5bbecdbe45)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  * Copyright 2020-2021 Joyent, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * This file and its contents are supplied under the terms of the
31  * Common Development and Distribution License ("CDDL"), version 1.0.
32  * You may only use this file in accordance with the terms of version
33  * 1.0 of the CDDL.
34  *
35  * A full copy of the text of the CDDL should have accompanied this
36  * source.  A copy of the CDDL is also available via the Internet at
37  * http://www.illumos.org/license/CDDL.
38  *
39  * Copyright 2014 Pluribus Networks Inc.
40  */
41 
42 
43 #include <sys/param.h>
44 #include <sys/linker_set.h>
45 #include <sys/stat.h>
46 #include <sys/uio.h>
47 #include <sys/ioctl.h>
48 #include <sys/disk.h>
49 
50 #include <errno.h>
51 #include <fcntl.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <stdint.h>
55 #include <string.h>
56 #include <strings.h>
57 #include <unistd.h>
58 #include <assert.h>
59 #include <pthread.h>
60 #include <md5.h>
61 
62 #include "bhyverun.h"
63 #include "config.h"
64 #include "debug.h"
65 #include "pci_emul.h"
66 #include "virtio.h"
67 #include "block_if.h"
68 
69 #define	VTBLK_BSIZE	512
70 #define	VTBLK_RINGSZ	128
71 
72 _Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
73 
74 #define	VTBLK_S_OK	0
75 #define	VTBLK_S_IOERR	1
76 #define	VTBLK_S_UNSUPP	2
77 
78 #define	VTBLK_BLK_ID_BYTES	20 + 1
79 
80 /* Capability bits */
81 #define	VTBLK_F_BARRIER		(1 << 0)	/* Does host support barriers? */
82 #define	VTBLK_F_SIZE_MAX	(1 << 1)	/* Indicates maximum segment size */
83 #define	VTBLK_F_SEG_MAX		(1 << 2)	/* Indicates maximum # of segments */
84 #define	VTBLK_F_GEOMETRY	(1 << 4)	/* Legacy geometry available  */
85 #define	VTBLK_F_RO		(1 << 5)	/* Disk is read-only */
86 #define	VTBLK_F_BLK_SIZE	(1 << 6)	/* Block size of disk is available*/
87 #define	VTBLK_F_SCSI		(1 << 7)	/* Supports scsi command passthru */
88 #define	VTBLK_F_FLUSH		(1 << 9)	/* Writeback mode enabled after reset */
89 #define	VTBLK_F_WCE		(1 << 9)	/* Legacy alias for FLUSH */
90 #define	VTBLK_F_TOPOLOGY	(1 << 10)	/* Topology information is available */
91 #define	VTBLK_F_CONFIG_WCE	(1 << 11)	/* Writeback mode available in config */
92 #define	VTBLK_F_MQ		(1 << 12)	/* Multi-Queue */
93 #define	VTBLK_F_DISCARD		(1 << 13)	/* Trim blocks */
94 #define	VTBLK_F_WRITE_ZEROES	(1 << 14)	/* Write zeros */
95 
96 /*
97  * Host capabilities
98  */
99 #define	VTBLK_S_HOSTCAPS      \
100   ( VTBLK_F_SEG_MAX  |						    \
101     VTBLK_F_BLK_SIZE |						    \
102     VTBLK_F_FLUSH    |						    \
103     VTBLK_F_TOPOLOGY |						    \
104     VIRTIO_RING_F_INDIRECT_DESC )	/* indirect descriptors */
105 
106 /*
107  * The current blockif_delete() interface only allows a single delete
108  * request at a time.
109  */
110 #define	VTBLK_MAX_DISCARD_SEG	1
111 
112 /*
113  * An arbitrary limit to prevent excessive latency due to large
114  * delete requests.
115  */
116 #define	VTBLK_MAX_DISCARD_SECT	((16 << 20) / VTBLK_BSIZE)	/* 16 MiB */
117 
118 /*
119  * Config space "registers"
120  */
121 struct vtblk_config {
122 	uint64_t	vbc_capacity;
123 	uint32_t	vbc_size_max;
124 	uint32_t	vbc_seg_max;
125 	struct {
126 		uint16_t cylinders;
127 		uint8_t heads;
128 		uint8_t sectors;
129 	} vbc_geometry;
130 	uint32_t	vbc_blk_size;
131 	struct {
132 		uint8_t physical_block_exp;
133 		uint8_t alignment_offset;
134 		uint16_t min_io_size;
135 		uint32_t opt_io_size;
136 	} vbc_topology;
137 	uint8_t		vbc_writeback;
138 	uint8_t		unused0[1];
139 	uint16_t	num_queues;
140 	uint32_t	max_discard_sectors;
141 	uint32_t	max_discard_seg;
142 	uint32_t	discard_sector_alignment;
143 	uint32_t	max_write_zeroes_sectors;
144 	uint32_t	max_write_zeroes_seg;
145 	uint8_t		write_zeroes_may_unmap;
146 	uint8_t		unused1[3];
147 } __packed;
148 
149 /*
150  * Fixed-size block header
151  */
152 struct virtio_blk_hdr {
153 #define	VBH_OP_READ		0
154 #define	VBH_OP_WRITE		1
155 #define	VBH_OP_SCSI_CMD		2
156 #define	VBH_OP_SCSI_CMD_OUT	3
157 #define	VBH_OP_FLUSH		4
158 #define	VBH_OP_FLUSH_OUT	5
159 #define	VBH_OP_IDENT		8
160 #define	VBH_OP_DISCARD		11
161 #define	VBH_OP_WRITE_ZEROES	13
162 
163 #define	VBH_FLAG_BARRIER	0x80000000	/* OR'ed into vbh_type */
164 	uint32_t	vbh_type;
165 	uint32_t	vbh_ioprio;
166 	uint64_t	vbh_sector;
167 } __packed;
168 
169 /*
170  * Debug printf
171  */
172 static int pci_vtblk_debug;
173 #define	DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
174 #define	WPRINTF(params) PRINTLN params
175 
176 struct pci_vtblk_ioreq {
177 	struct blockif_req		io_req;
178 	struct pci_vtblk_softc		*io_sc;
179 	uint8_t				*io_status;
180 	uint16_t			io_idx;
181 };
182 
183 struct virtio_blk_discard_write_zeroes {
184 	uint64_t	sector;
185 	uint32_t	num_sectors;
186 	struct {
187 		uint32_t unmap:1;
188 		uint32_t reserved:31;
189 	} flags;
190 };
191 
192 /*
193  * Per-device softc
194  */
195 struct pci_vtblk_softc {
196 	struct virtio_softc vbsc_vs;
197 	pthread_mutex_t vsc_mtx;
198 	struct vqueue_info vbsc_vq;
199 	struct vtblk_config vbsc_cfg;
200 	struct virtio_consts vbsc_consts;
201 	struct blockif_ctxt *bc;
202 #ifndef __FreeBSD__
203 	int vbsc_wce;
204 #endif
205 	char vbsc_ident[VTBLK_BLK_ID_BYTES];
206 	struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
207 };
208 
209 static void pci_vtblk_reset(void *);
210 static void pci_vtblk_notify(void *, struct vqueue_info *);
211 static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
212 static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
213 #ifndef __FreeBSD__
214 static void pci_vtblk_apply_feats(void *, uint64_t);
215 #endif
216 
217 static struct virtio_consts vtblk_vi_consts = {
218 	.vc_name =	"vtblk",
219 	.vc_nvq =	1,
220 	.vc_cfgsize =	sizeof(struct vtblk_config),
221 	.vc_reset =	pci_vtblk_reset,
222 	.vc_qnotify =	pci_vtblk_notify,
223 	.vc_cfgread =	pci_vtblk_cfgread,
224 	.vc_cfgwrite =	pci_vtblk_cfgwrite,
225 #ifndef __FreeBSD__
226 	.vc_apply_features = pci_vtblk_apply_feats,
227 #else
228 	.vc_apply_features = NULL,
229 #endif
230 	.vc_hv_caps =	VTBLK_S_HOSTCAPS,
231 };
232 
233 static void
pci_vtblk_reset(void * vsc)234 pci_vtblk_reset(void *vsc)
235 {
236 	struct pci_vtblk_softc *sc = vsc;
237 
238 	DPRINTF(("vtblk: device reset requested !"));
239 	vi_reset_dev(&sc->vbsc_vs);
240 #ifndef __FreeBSD__
241 	/* Disable write cache until FLUSH feature is negotiated */
242 	(void) blockif_set_wce(sc->bc, 0);
243 	sc->vbsc_wce = 0;
244 #endif
245 }
246 
247 static void
pci_vtblk_done_locked(struct pci_vtblk_ioreq * io,int err)248 pci_vtblk_done_locked(struct pci_vtblk_ioreq *io, int err)
249 {
250 	struct pci_vtblk_softc *sc = io->io_sc;
251 
252 	/* convert errno into a virtio block error return */
253 	if (err == EOPNOTSUPP || err == ENOSYS)
254 		*io->io_status = VTBLK_S_UNSUPP;
255 	else if (err != 0)
256 		*io->io_status = VTBLK_S_IOERR;
257 	else
258 		*io->io_status = VTBLK_S_OK;
259 
260 	/*
261 	 * Return the descriptor back to the host.
262 	 * We wrote 1 byte (our status) to host.
263 	 */
264 	vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
265 	vq_endchains(&sc->vbsc_vq, 0);
266 }
267 
268 static void
pci_vtblk_done(struct blockif_req * br,int err)269 pci_vtblk_done(struct blockif_req *br, int err)
270 {
271 	struct pci_vtblk_ioreq *io = br->br_param;
272 	struct pci_vtblk_softc *sc = io->io_sc;
273 
274 	pthread_mutex_lock(&sc->vsc_mtx);
275 	pci_vtblk_done_locked(io, err);
276 	pthread_mutex_unlock(&sc->vsc_mtx);
277 }
278 
279 static void
pci_vtblk_proc(struct pci_vtblk_softc * sc,struct vqueue_info * vq)280 pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
281 {
282 	struct virtio_blk_hdr *vbh;
283 	struct pci_vtblk_ioreq *io;
284 	int i, n;
285 	int err;
286 	ssize_t iolen;
287 	int writeop, type;
288 	struct vi_req req;
289 	struct iovec iov[BLOCKIF_IOV_MAX + 2];
290 	struct virtio_blk_discard_write_zeroes *discard;
291 
292 	n = vq_getchain(vq, iov, BLOCKIF_IOV_MAX + 2, &req);
293 
294 	/*
295 	 * The first descriptor will be the read-only fixed header,
296 	 * and the last is for status (hence +2 above and below).
297 	 * The remaining iov's are the actual data I/O vectors.
298 	 *
299 	 * XXX - note - this fails on crash dump, which does a
300 	 * VIRTIO_BLK_T_FLUSH with a zero transfer length
301 	 */
302 	assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
303 
304 	io = &sc->vbsc_ios[req.idx];
305 	assert(req.readable != 0);
306 	assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
307 	vbh = (struct virtio_blk_hdr *)iov[0].iov_base;
308 	memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
309 	io->io_req.br_iovcnt = n - 2;
310 	io->io_req.br_offset = vbh->vbh_sector * VTBLK_BSIZE;
311 	io->io_status = (uint8_t *)iov[--n].iov_base;
312 	assert(req.writable != 0);
313 	assert(iov[n].iov_len == 1);
314 
315 	/*
316 	 * XXX
317 	 * The guest should not be setting the BARRIER flag because
318 	 * we don't advertise the capability.
319 	 */
320 	type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
321 	writeop = (type == VBH_OP_WRITE || type == VBH_OP_DISCARD);
322 	/*
323 	 * - Write op implies read-only descriptor
324 	 * - Read/ident op implies write-only descriptor
325 	 *
326 	 * By taking away either the read-only fixed header or the write-only
327 	 * status iovec, the following condition should hold true.
328 	 */
329 	assert(n == (writeop ? req.readable : req.writable));
330 
331 	iolen = 0;
332 	for (i = 1; i < n; i++) {
333 		iolen += iov[i].iov_len;
334 	}
335 	io->io_req.br_resid = iolen;
336 
337 	DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld",
338 		 writeop ? "write/discard" : "read/ident", iolen, i - 1,
339 		 io->io_req.br_offset));
340 
341 	switch (type) {
342 	case VBH_OP_READ:
343 		err = blockif_read(sc->bc, &io->io_req);
344 		break;
345 	case VBH_OP_WRITE:
346 		err = blockif_write(sc->bc, &io->io_req);
347 		break;
348 	case VBH_OP_DISCARD:
349 		/*
350 		 * We currently only support a single request, if the guest
351 		 * has submitted a request that doesn't conform to the
352 		 * requirements, we return a error.
353 		 */
354 		if (iov[1].iov_len != sizeof (*discard)) {
355 			pci_vtblk_done_locked(io, EINVAL);
356 			return;
357 		}
358 
359 		/* The segments to discard are provided rather than data */
360 		discard = (struct virtio_blk_discard_write_zeroes *)
361 		    iov[1].iov_base;
362 
363 		/*
364 		 * virtio v1.1 5.2.6.2:
365 		 * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP
366 		 * for discard and write zeroes commands if any unknown flag is
367 		 * set. Furthermore, the device MUST set the status byte to
368 		 * VIRTIO_BLK_S_UNSUPP for discard commands if the unmap flag
369 		 * is set.
370 		 *
371 		 * Currently there are no known flags for a DISCARD request.
372 		 */
373 		if (discard->flags.unmap != 0 || discard->flags.reserved != 0) {
374 			pci_vtblk_done_locked(io, ENOTSUP);
375 			return;
376 		}
377 
378 		/* Make sure the request doesn't exceed our size limit */
379 		if (discard->num_sectors > VTBLK_MAX_DISCARD_SECT) {
380 			pci_vtblk_done_locked(io, EINVAL);
381 			return;
382 		}
383 
384 		io->io_req.br_offset = discard->sector * VTBLK_BSIZE;
385 		io->io_req.br_resid = discard->num_sectors * VTBLK_BSIZE;
386 		err = blockif_delete(sc->bc, &io->io_req);
387 		break;
388 	case VBH_OP_FLUSH:
389 	case VBH_OP_FLUSH_OUT:
390 		err = blockif_flush(sc->bc, &io->io_req);
391 		break;
392 	case VBH_OP_IDENT:
393 		/* Assume a single buffer */
394 		/* S/n equal to buffer is not zero-terminated. */
395 		memset(iov[1].iov_base, 0, iov[1].iov_len);
396 		strncpy(iov[1].iov_base, sc->vbsc_ident,
397 		    MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
398 		pci_vtblk_done_locked(io, 0);
399 		return;
400 	default:
401 		pci_vtblk_done_locked(io, EOPNOTSUPP);
402 		return;
403 	}
404 	assert(err == 0);
405 }
406 
407 static void
pci_vtblk_notify(void * vsc,struct vqueue_info * vq)408 pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
409 {
410 	struct pci_vtblk_softc *sc = vsc;
411 
412 	while (vq_has_descs(vq))
413 		pci_vtblk_proc(sc, vq);
414 }
415 
416 static void
pci_vtblk_resized(struct blockif_ctxt * bctxt __unused,void * arg,size_t new_size)417 pci_vtblk_resized(struct blockif_ctxt *bctxt __unused, void *arg,
418     size_t new_size)
419 {
420 	struct pci_vtblk_softc *sc;
421 
422 	sc = arg;
423 
424 	sc->vbsc_cfg.vbc_capacity = new_size / VTBLK_BSIZE; /* 512-byte units */
425 	vi_interrupt(&sc->vbsc_vs, VIRTIO_PCI_ISR_CONFIG,
426 	    sc->vbsc_vs.vs_msix_cfg_idx);
427 }
428 
429 static int
pci_vtblk_init(struct pci_devinst * pi,nvlist_t * nvl)430 pci_vtblk_init(struct pci_devinst *pi, nvlist_t *nvl)
431 {
432 	char bident[sizeof("XXX:XXX")];
433 	struct blockif_ctxt *bctxt;
434 	const char *path, *serial;
435 	MD5_CTX mdctx;
436 	u_char digest[16];
437 	struct pci_vtblk_softc *sc;
438 	off_t size;
439 	int i, sectsz, sts, sto;
440 
441 	/*
442 	 * The supplied backing file has to exist
443 	 */
444 	snprintf(bident, sizeof(bident), "%u:%u", pi->pi_slot, pi->pi_func);
445 	bctxt = blockif_open(nvl, bident);
446 	if (bctxt == NULL) {
447 		perror("Could not open backing file");
448 		return (1);
449 	}
450 
451 	if (blockif_add_boot_device(pi, bctxt)) {
452 		perror("Invalid boot device");
453 		return (1);
454 	}
455 
456 	size = blockif_size(bctxt);
457 	sectsz = blockif_sectsz(bctxt);
458 	blockif_psectsz(bctxt, &sts, &sto);
459 
460 	sc = calloc(1, sizeof(struct pci_vtblk_softc));
461 	sc->bc = bctxt;
462 	for (i = 0; i < VTBLK_RINGSZ; i++) {
463 		struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
464 		io->io_req.br_callback = pci_vtblk_done;
465 		io->io_req.br_param = io;
466 		io->io_sc = sc;
467 		io->io_idx = i;
468 	}
469 
470 	bcopy(&vtblk_vi_consts, &sc->vbsc_consts, sizeof (vtblk_vi_consts));
471 	if (blockif_candelete(sc->bc))
472 		sc->vbsc_consts.vc_hv_caps |= VTBLK_F_DISCARD;
473 
474 #ifndef __FreeBSD__
475 	/* Disable write cache until FLUSH feature is negotiated */
476 	(void) blockif_set_wce(sc->bc, 0);
477 	sc->vbsc_wce = 0;
478 #endif
479 
480 	pthread_mutex_init(&sc->vsc_mtx, NULL);
481 
482 	/* init virtio softc and virtqueues */
483 	vi_softc_linkup(&sc->vbsc_vs, &sc->vbsc_consts, sc, pi, &sc->vbsc_vq);
484 	sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
485 
486 	sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
487 	/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
488 
489 	/*
490 	 * If an explicit identifier is not given, create an
491 	 * identifier using parts of the md5 sum of the filename.
492 	 */
493 	bzero(sc->vbsc_ident, VTBLK_BLK_ID_BYTES);
494 	if ((serial = get_config_value_node(nvl, "serial")) != NULL ||
495 	    (serial = get_config_value_node(nvl, "ser")) != NULL) {
496 		strlcpy(sc->vbsc_ident, serial, VTBLK_BLK_ID_BYTES);
497 	} else {
498 		path = get_config_value_node(nvl, "path");
499 		MD5Init(&mdctx);
500 		MD5Update(&mdctx, path, strlen(path));
501 		MD5Final(digest, &mdctx);
502 		snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES,
503 		    "BHYVE-%02X%02X-%02X%02X-%02X%02X",
504 		    digest[0], digest[1], digest[2], digest[3], digest[4],
505 		    digest[5]);
506 	}
507 
508 	/* setup virtio block config space */
509 	sc->vbsc_cfg.vbc_capacity = size / VTBLK_BSIZE; /* 512-byte units */
510 	sc->vbsc_cfg.vbc_size_max = 0;	/* not negotiated */
511 
512 	/*
513 	 * If Linux is presented with a seg_max greater than the virtio queue
514 	 * size, it can stumble into situations where it violates its own
515 	 * invariants and panics.  For safety, we keep seg_max clamped, paying
516 	 * heed to the two extra descriptors needed for the header and status
517 	 * of a request.
518 	 */
519 	sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
520 	sc->vbsc_cfg.vbc_geometry.cylinders = 0;	/* no geometry */
521 	sc->vbsc_cfg.vbc_geometry.heads = 0;
522 	sc->vbsc_cfg.vbc_geometry.sectors = 0;
523 	sc->vbsc_cfg.vbc_blk_size = sectsz;
524 	sc->vbsc_cfg.vbc_topology.physical_block_exp =
525 	    (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
526 	sc->vbsc_cfg.vbc_topology.alignment_offset =
527 	    (sto != 0) ? ((sts - sto) / sectsz) : 0;
528 	sc->vbsc_cfg.vbc_topology.min_io_size = 0;
529 	sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
530 	sc->vbsc_cfg.vbc_writeback = 0;
531 	sc->vbsc_cfg.max_discard_sectors = VTBLK_MAX_DISCARD_SECT;
532 	sc->vbsc_cfg.max_discard_seg = VTBLK_MAX_DISCARD_SEG;
533 	sc->vbsc_cfg.discard_sector_alignment = MAX(sectsz, sts) / VTBLK_BSIZE;
534 
535 	/*
536 	 * Should we move some of this into virtio.c?  Could
537 	 * have the device, class, and subdev_0 as fields in
538 	 * the virtio constants structure.
539 	 */
540 	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
541 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
542 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
543 	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_BLOCK);
544 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
545 
546 	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
547 		blockif_close(sc->bc);
548 		free(sc);
549 		return (1);
550 	}
551 	vi_set_io_bar(&sc->vbsc_vs, 0);
552 	blockif_register_resize_callback(sc->bc, pci_vtblk_resized, sc);
553 	return (0);
554 }
555 
556 static int
pci_vtblk_cfgwrite(void * vsc __unused,int offset,int size __unused,uint32_t value __unused)557 pci_vtblk_cfgwrite(void *vsc __unused, int offset, int size __unused,
558     uint32_t value __unused)
559 {
560 
561 	DPRINTF(("vtblk: write to readonly reg %d", offset));
562 	return (1);
563 }
564 
565 static int
pci_vtblk_cfgread(void * vsc,int offset,int size,uint32_t * retval)566 pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
567 {
568 	struct pci_vtblk_softc *sc = vsc;
569 	void *ptr;
570 
571 	/* our caller has already verified offset and size */
572 	ptr = (uint8_t *)&sc->vbsc_cfg + offset;
573 	memcpy(retval, ptr, size);
574 	return (0);
575 }
576 
577 #ifndef __FreeBSD__
578 void
pci_vtblk_apply_feats(void * vsc,uint64_t caps)579 pci_vtblk_apply_feats(void *vsc, uint64_t caps)
580 {
581 	struct pci_vtblk_softc *sc = vsc;
582 	const int wce_next = ((caps & VTBLK_F_FLUSH) != 0) ? 1 : 0;
583 
584 	if (sc->vbsc_wce != wce_next) {
585 		(void) blockif_set_wce(sc->bc, wce_next);
586 		sc->vbsc_wce = wce_next;
587 	}
588 }
589 #endif /* __FreeBSD__ */
590 
591 static const struct pci_devemu pci_de_vblk = {
592 	.pe_emu =	"virtio-blk",
593 	.pe_init =	pci_vtblk_init,
594 	.pe_legacy_config = blockif_legacy_config,
595 	.pe_barwrite =	vi_pci_write,
596 	.pe_barread =	vi_pci_read,
597 };
598 PCI_EMUL_SET(pci_de_vblk);
599