xref: /freebsd/usr.sbin/bhyve/pci_virtio_block.c (revision 4d65a7c6951cea0333f1a0c1b32c38489cdfa6c5)
1366f6083SPeter Grehan /*-
24d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
31de7b4b8SPedro F. Giffuni  *
4366f6083SPeter Grehan  * Copyright (c) 2011 NetApp, Inc.
5366f6083SPeter Grehan  * All rights reserved.
62349cda4SJohn Baldwin  * Copyright 2020-2021 Joyent, Inc.
7366f6083SPeter Grehan  *
8366f6083SPeter Grehan  * Redistribution and use in source and binary forms, with or without
9366f6083SPeter Grehan  * modification, are permitted provided that the following conditions
10366f6083SPeter Grehan  * are met:
11366f6083SPeter Grehan  * 1. Redistributions of source code must retain the above copyright
12366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer.
13366f6083SPeter Grehan  * 2. Redistributions in binary form must reproduce the above copyright
14366f6083SPeter Grehan  *    notice, this list of conditions and the following disclaimer in the
15366f6083SPeter Grehan  *    documentation and/or other materials provided with the distribution.
16366f6083SPeter Grehan  *
17366f6083SPeter Grehan  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18366f6083SPeter Grehan  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19366f6083SPeter Grehan  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20366f6083SPeter Grehan  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21366f6083SPeter Grehan  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22366f6083SPeter Grehan  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23366f6083SPeter Grehan  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24366f6083SPeter Grehan  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25366f6083SPeter Grehan  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26366f6083SPeter Grehan  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27366f6083SPeter Grehan  * SUCH DAMAGE.
28366f6083SPeter Grehan  */
29366f6083SPeter Grehan 
30366f6083SPeter Grehan #include <sys/param.h>
31366f6083SPeter Grehan #include <sys/linker_set.h>
32366f6083SPeter Grehan #include <sys/stat.h>
33366f6083SPeter Grehan #include <sys/uio.h>
34366f6083SPeter Grehan #include <sys/ioctl.h>
352e325b33SPeter Grehan #include <sys/disk.h>
36366f6083SPeter Grehan 
37483d953aSJohn Baldwin #include <machine/vmm_snapshot.h>
38483d953aSJohn Baldwin 
39366f6083SPeter Grehan #include <errno.h>
40366f6083SPeter Grehan #include <fcntl.h>
41366f6083SPeter Grehan #include <stdio.h>
42366f6083SPeter Grehan #include <stdlib.h>
43366f6083SPeter Grehan #include <stdint.h>
44366f6083SPeter Grehan #include <string.h>
45366f6083SPeter Grehan #include <strings.h>
46366f6083SPeter Grehan #include <unistd.h>
47366f6083SPeter Grehan #include <assert.h>
48366f6083SPeter Grehan #include <pthread.h>
4964945a9eSPeter Grehan #include <md5.h>
50366f6083SPeter Grehan 
51e285ef8dSPeter Grehan #include "bhyverun.h"
52621b5090SJohn Baldwin #include "config.h"
53332eff95SVincenzo Maffione #include "debug.h"
54366f6083SPeter Grehan #include "pci_emul.h"
55366f6083SPeter Grehan #include "virtio.h"
56066a8f14SAlexander Motin #include "block_if.h"
57366f6083SPeter Grehan 
5822769bbeSAllan Jude #define	VTBLK_BSIZE	512
598c74ade8SJohn Baldwin #define	VTBLK_RINGSZ	128
608c74ade8SJohn Baldwin 
618c74ade8SJohn Baldwin _Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
62366f6083SPeter Grehan 
63366f6083SPeter Grehan #define	VTBLK_S_OK	0
64366f6083SPeter Grehan #define	VTBLK_S_IOERR	1
6564945a9eSPeter Grehan #define	VTBLK_S_UNSUPP	2
6664945a9eSPeter Grehan 
67edce78c2SMarcelo Araujo #define	VTBLK_BLK_ID_BYTES	20 + 1
68366f6083SPeter Grehan 
69fce0413bSPeter Grehan /* Capability bits */
7022769bbeSAllan Jude #define	VTBLK_F_BARRIER		(1 << 0)	/* Does host support barriers? */
7122769bbeSAllan Jude #define	VTBLK_F_SIZE_MAX	(1 << 1)	/* Indicates maximum segment size */
7222769bbeSAllan Jude #define	VTBLK_F_SEG_MAX		(1 << 2)	/* Indicates maximum # of segments */
7322769bbeSAllan Jude #define	VTBLK_F_GEOMETRY	(1 << 4)	/* Legacy geometry available  */
7422769bbeSAllan Jude #define	VTBLK_F_RO		(1 << 5)	/* Disk is read-only */
7522769bbeSAllan Jude #define	VTBLK_F_BLK_SIZE	(1 << 6)	/* Block size of disk is available*/
7622769bbeSAllan Jude #define	VTBLK_F_SCSI		(1 << 7)	/* Supports scsi command passthru */
7722769bbeSAllan Jude #define	VTBLK_F_FLUSH		(1 << 9)	/* Writeback mode enabled after reset */
7822769bbeSAllan Jude #define	VTBLK_F_WCE		(1 << 9)	/* Legacy alias for FLUSH */
7922769bbeSAllan Jude #define	VTBLK_F_TOPOLOGY	(1 << 10)	/* Topology information is available */
8022769bbeSAllan Jude #define	VTBLK_F_CONFIG_WCE	(1 << 11)	/* Writeback mode available in config */
8122769bbeSAllan Jude #define	VTBLK_F_MQ		(1 << 12)	/* Multi-Queue */
8222769bbeSAllan Jude #define	VTBLK_F_DISCARD		(1 << 13)	/* Trim blocks */
8322769bbeSAllan Jude #define	VTBLK_F_WRITE_ZEROES	(1 << 14)	/* Write zeros */
84fce0413bSPeter Grehan 
85366f6083SPeter Grehan /*
86366f6083SPeter Grehan  * Host capabilities
87366f6083SPeter Grehan  */
88366f6083SPeter Grehan #define	VTBLK_S_HOSTCAPS      \
89fce0413bSPeter Grehan   ( VTBLK_F_SEG_MAX  |						    \
90fce0413bSPeter Grehan     VTBLK_F_BLK_SIZE |						    \
91cb5c7929SAlexander Motin     VTBLK_F_FLUSH    |						    \
92297c4868SAlexander Motin     VTBLK_F_TOPOLOGY |						    \
93ba41c3c1SPeter Grehan     VIRTIO_RING_F_INDIRECT_DESC )	/* indirect descriptors */
94366f6083SPeter Grehan 
95366f6083SPeter Grehan /*
9622769bbeSAllan Jude  * The current blockif_delete() interface only allows a single delete
9722769bbeSAllan Jude  * request at a time.
9822769bbeSAllan Jude  */
9922769bbeSAllan Jude #define	VTBLK_MAX_DISCARD_SEG	1
10022769bbeSAllan Jude 
10122769bbeSAllan Jude /*
10222769bbeSAllan Jude  * An arbitrary limit to prevent excessive latency due to large
10322769bbeSAllan Jude  * delete requests.
10422769bbeSAllan Jude  */
10522769bbeSAllan Jude #define	VTBLK_MAX_DISCARD_SECT	((16 << 20) / VTBLK_BSIZE)	/* 16 MiB */
10622769bbeSAllan Jude 
10722769bbeSAllan Jude /*
108ba41c3c1SPeter Grehan  * Config space "registers"
109366f6083SPeter Grehan  */
110366f6083SPeter Grehan struct vtblk_config {
111366f6083SPeter Grehan 	uint64_t	vbc_capacity;
112366f6083SPeter Grehan 	uint32_t	vbc_size_max;
113366f6083SPeter Grehan 	uint32_t	vbc_seg_max;
114297c4868SAlexander Motin 	struct {
115297c4868SAlexander Motin 		uint16_t cylinders;
116297c4868SAlexander Motin 		uint8_t heads;
117297c4868SAlexander Motin 		uint8_t sectors;
118297c4868SAlexander Motin 	} vbc_geometry;
119366f6083SPeter Grehan 	uint32_t	vbc_blk_size;
120297c4868SAlexander Motin 	struct {
121297c4868SAlexander Motin 		uint8_t physical_block_exp;
122297c4868SAlexander Motin 		uint8_t alignment_offset;
123297c4868SAlexander Motin 		uint16_t min_io_size;
124297c4868SAlexander Motin 		uint32_t opt_io_size;
125297c4868SAlexander Motin 	} vbc_topology;
126297c4868SAlexander Motin 	uint8_t		vbc_writeback;
12722769bbeSAllan Jude 	uint8_t		unused0[1];
12822769bbeSAllan Jude 	uint16_t	num_queues;
12922769bbeSAllan Jude 	uint32_t	max_discard_sectors;
13022769bbeSAllan Jude 	uint32_t	max_discard_seg;
13122769bbeSAllan Jude 	uint32_t	discard_sector_alignment;
13222769bbeSAllan Jude 	uint32_t	max_write_zeroes_sectors;
13322769bbeSAllan Jude 	uint32_t	max_write_zeroes_seg;
13422769bbeSAllan Jude 	uint8_t		write_zeroes_may_unmap;
13522769bbeSAllan Jude 	uint8_t		unused1[3];
136366f6083SPeter Grehan } __packed;
137366f6083SPeter Grehan 
138366f6083SPeter Grehan /*
139366f6083SPeter Grehan  * Fixed-size block header
140366f6083SPeter Grehan  */
141366f6083SPeter Grehan struct virtio_blk_hdr {
142366f6083SPeter Grehan #define	VBH_OP_READ		0
143366f6083SPeter Grehan #define	VBH_OP_WRITE		1
14422769bbeSAllan Jude #define	VBH_OP_SCSI_CMD		2
14522769bbeSAllan Jude #define	VBH_OP_SCSI_CMD_OUT	3
1468ccb28efSNeel Natu #define	VBH_OP_FLUSH		4
1478ccb28efSNeel Natu #define	VBH_OP_FLUSH_OUT	5
14864945a9eSPeter Grehan #define	VBH_OP_IDENT		8
14922769bbeSAllan Jude #define	VBH_OP_DISCARD		11
15022769bbeSAllan Jude #define	VBH_OP_WRITE_ZEROES	13
15122769bbeSAllan Jude 
15258a6b033SNeel Natu #define	VBH_FLAG_BARRIER	0x80000000	/* OR'ed into vbh_type */
153366f6083SPeter Grehan 	uint32_t	vbh_type;
154366f6083SPeter Grehan 	uint32_t	vbh_ioprio;
155366f6083SPeter Grehan 	uint64_t	vbh_sector;
156366f6083SPeter Grehan } __packed;
157366f6083SPeter Grehan 
158366f6083SPeter Grehan /*
159366f6083SPeter Grehan  * Debug printf
160366f6083SPeter Grehan  */
161366f6083SPeter Grehan static int pci_vtblk_debug;
162332eff95SVincenzo Maffione #define	DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
163332eff95SVincenzo Maffione #define	WPRINTF(params) PRINTLN params
164366f6083SPeter Grehan 
165066a8f14SAlexander Motin struct pci_vtblk_ioreq {
166066a8f14SAlexander Motin 	struct blockif_req		io_req;
167066a8f14SAlexander Motin 	struct pci_vtblk_softc		*io_sc;
168066a8f14SAlexander Motin 	uint8_t				*io_status;
169066a8f14SAlexander Motin 	uint16_t			io_idx;
170066a8f14SAlexander Motin };
171066a8f14SAlexander Motin 
17222769bbeSAllan Jude struct virtio_blk_discard_write_zeroes {
17322769bbeSAllan Jude 	uint64_t	sector;
17422769bbeSAllan Jude 	uint32_t	num_sectors;
17522769bbeSAllan Jude 	struct {
17622769bbeSAllan Jude 		uint32_t unmap:1;
17722769bbeSAllan Jude 		uint32_t reserved:31;
17822769bbeSAllan Jude 	} flags;
17922769bbeSAllan Jude };
18022769bbeSAllan Jude 
181366f6083SPeter Grehan /*
182366f6083SPeter Grehan  * Per-device softc
183366f6083SPeter Grehan  */
184366f6083SPeter Grehan struct pci_vtblk_softc {
185ba41c3c1SPeter Grehan 	struct virtio_softc vbsc_vs;
1863cbf3585SJohn Baldwin 	pthread_mutex_t vsc_mtx;
187ba41c3c1SPeter Grehan 	struct vqueue_info vbsc_vq;
188366f6083SPeter Grehan 	struct vtblk_config vbsc_cfg;
18922769bbeSAllan Jude 	struct virtio_consts vbsc_consts;
190066a8f14SAlexander Motin 	struct blockif_ctxt *bc;
19164945a9eSPeter Grehan 	char vbsc_ident[VTBLK_BLK_ID_BYTES];
192066a8f14SAlexander Motin 	struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
193366f6083SPeter Grehan };
194366f6083SPeter Grehan 
195ba41c3c1SPeter Grehan static void pci_vtblk_reset(void *);
196ba41c3c1SPeter Grehan static void pci_vtblk_notify(void *, struct vqueue_info *);
197ba41c3c1SPeter Grehan static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
198ba41c3c1SPeter Grehan static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
199483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT
200483d953aSJohn Baldwin static void pci_vtblk_pause(void *);
201483d953aSJohn Baldwin static void pci_vtblk_resume(void *);
202483d953aSJohn Baldwin static int pci_vtblk_snapshot(void *, struct vm_snapshot_meta *);
203483d953aSJohn Baldwin #endif
204445e089eSNeel Natu 
205ba41c3c1SPeter Grehan static struct virtio_consts vtblk_vi_consts = {
2066cb26162SMark Johnston 	.vc_name =	"vtblk",
2076cb26162SMark Johnston 	.vc_nvq =	1,
2086cb26162SMark Johnston 	.vc_cfgsize =	sizeof(struct vtblk_config),
2096cb26162SMark Johnston 	.vc_reset =	pci_vtblk_reset,
2106cb26162SMark Johnston 	.vc_qnotify =	pci_vtblk_notify,
2116cb26162SMark Johnston 	.vc_cfgread =	pci_vtblk_cfgread,
2126cb26162SMark Johnston 	.vc_cfgwrite =	pci_vtblk_cfgwrite,
2136cb26162SMark Johnston 	.vc_apply_features = NULL,
2146cb26162SMark Johnston 	.vc_hv_caps =	VTBLK_S_HOSTCAPS,
215483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT
2166cb26162SMark Johnston 	.vc_pause =	pci_vtblk_pause,
2176cb26162SMark Johnston 	.vc_resume =	pci_vtblk_resume,
2186cb26162SMark Johnston 	.vc_snapshot =	pci_vtblk_snapshot,
219483d953aSJohn Baldwin #endif
220ba41c3c1SPeter Grehan };
221366f6083SPeter Grehan 
222366f6083SPeter Grehan static void
pci_vtblk_reset(void * vsc)223ba41c3c1SPeter Grehan pci_vtblk_reset(void *vsc)
224366f6083SPeter Grehan {
225ba41c3c1SPeter Grehan 	struct pci_vtblk_softc *sc = vsc;
226ba41c3c1SPeter Grehan 
227332eff95SVincenzo Maffione 	DPRINTF(("vtblk: device reset requested !"));
228ba41c3c1SPeter Grehan 	vi_reset_dev(&sc->vbsc_vs);
229366f6083SPeter Grehan }
230366f6083SPeter Grehan 
231366f6083SPeter Grehan static void
pci_vtblk_done_locked(struct pci_vtblk_ioreq * io,int err)23222769bbeSAllan Jude pci_vtblk_done_locked(struct pci_vtblk_ioreq *io, int err)
233066a8f14SAlexander Motin {
234066a8f14SAlexander Motin 	struct pci_vtblk_softc *sc = io->io_sc;
235066a8f14SAlexander Motin 
236066a8f14SAlexander Motin 	/* convert errno into a virtio block error return */
237066a8f14SAlexander Motin 	if (err == EOPNOTSUPP || err == ENOSYS)
238066a8f14SAlexander Motin 		*io->io_status = VTBLK_S_UNSUPP;
239066a8f14SAlexander Motin 	else if (err != 0)
240066a8f14SAlexander Motin 		*io->io_status = VTBLK_S_IOERR;
241066a8f14SAlexander Motin 	else
242066a8f14SAlexander Motin 		*io->io_status = VTBLK_S_OK;
243066a8f14SAlexander Motin 
244066a8f14SAlexander Motin 	/*
245066a8f14SAlexander Motin 	 * Return the descriptor back to the host.
246066a8f14SAlexander Motin 	 * We wrote 1 byte (our status) to host.
247066a8f14SAlexander Motin 	 */
248066a8f14SAlexander Motin 	vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
249066a8f14SAlexander Motin 	vq_endchains(&sc->vbsc_vq, 0);
25022769bbeSAllan Jude }
25122769bbeSAllan Jude 
252483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT
253483d953aSJohn Baldwin static void
pci_vtblk_pause(void * vsc)254483d953aSJohn Baldwin pci_vtblk_pause(void *vsc)
255483d953aSJohn Baldwin {
256483d953aSJohn Baldwin 	struct pci_vtblk_softc *sc = vsc;
257483d953aSJohn Baldwin 
258483d953aSJohn Baldwin 	DPRINTF(("vtblk: device pause requested !\n"));
259483d953aSJohn Baldwin 	blockif_pause(sc->bc);
260483d953aSJohn Baldwin }
261483d953aSJohn Baldwin 
262483d953aSJohn Baldwin static void
pci_vtblk_resume(void * vsc)263483d953aSJohn Baldwin pci_vtblk_resume(void *vsc)
264483d953aSJohn Baldwin {
265483d953aSJohn Baldwin 	struct pci_vtblk_softc *sc = vsc;
266483d953aSJohn Baldwin 
267483d953aSJohn Baldwin 	DPRINTF(("vtblk: device resume requested !\n"));
268483d953aSJohn Baldwin 	blockif_resume(sc->bc);
269483d953aSJohn Baldwin }
270483d953aSJohn Baldwin 
271483d953aSJohn Baldwin static int
pci_vtblk_snapshot(void * vsc,struct vm_snapshot_meta * meta)272483d953aSJohn Baldwin pci_vtblk_snapshot(void *vsc, struct vm_snapshot_meta *meta)
273483d953aSJohn Baldwin {
274483d953aSJohn Baldwin 	int ret;
275483d953aSJohn Baldwin 	struct pci_vtblk_softc *sc = vsc;
276483d953aSJohn Baldwin 
277483d953aSJohn Baldwin 	SNAPSHOT_VAR_OR_LEAVE(sc->vbsc_cfg, meta, ret, done);
278483d953aSJohn Baldwin 	SNAPSHOT_BUF_OR_LEAVE(sc->vbsc_ident, sizeof(sc->vbsc_ident),
279483d953aSJohn Baldwin 			      meta, ret, done);
280483d953aSJohn Baldwin 
281483d953aSJohn Baldwin done:
282483d953aSJohn Baldwin 	return (ret);
283483d953aSJohn Baldwin }
284483d953aSJohn Baldwin #endif
285483d953aSJohn Baldwin 
28622769bbeSAllan Jude static void
pci_vtblk_done(struct blockif_req * br,int err)28722769bbeSAllan Jude pci_vtblk_done(struct blockif_req *br, int err)
28822769bbeSAllan Jude {
28922769bbeSAllan Jude 	struct pci_vtblk_ioreq *io = br->br_param;
29022769bbeSAllan Jude 	struct pci_vtblk_softc *sc = io->io_sc;
29122769bbeSAllan Jude 
29222769bbeSAllan Jude 	pthread_mutex_lock(&sc->vsc_mtx);
29322769bbeSAllan Jude 	pci_vtblk_done_locked(io, err);
294066a8f14SAlexander Motin 	pthread_mutex_unlock(&sc->vsc_mtx);
295066a8f14SAlexander Motin }
296066a8f14SAlexander Motin 
297066a8f14SAlexander Motin static void
pci_vtblk_proc(struct pci_vtblk_softc * sc,struct vqueue_info * vq)298ba41c3c1SPeter Grehan pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
299366f6083SPeter Grehan {
300366f6083SPeter Grehan 	struct virtio_blk_hdr *vbh;
301066a8f14SAlexander Motin 	struct pci_vtblk_ioreq *io;
302ba41c3c1SPeter Grehan 	int i, n;
303366f6083SPeter Grehan 	int err;
304bb1524afSAlexander Motin 	ssize_t iolen;
305ba41c3c1SPeter Grehan 	int writeop, type;
306b0139127SKa Ho Ng 	struct vi_req req;
30754b7bb76SAlexander Motin 	struct iovec iov[BLOCKIF_IOV_MAX + 2];
30822769bbeSAllan Jude 	struct virtio_blk_discard_write_zeroes *discard;
309366f6083SPeter Grehan 
310b0139127SKa Ho Ng 	n = vq_getchain(vq, iov, BLOCKIF_IOV_MAX + 2, &req);
311366f6083SPeter Grehan 
312366f6083SPeter Grehan 	/*
313ba41c3c1SPeter Grehan 	 * The first descriptor will be the read-only fixed header,
314ba41c3c1SPeter Grehan 	 * and the last is for status (hence +2 above and below).
315ba41c3c1SPeter Grehan 	 * The remaining iov's are the actual data I/O vectors.
316ba41c3c1SPeter Grehan 	 *
317ba41c3c1SPeter Grehan 	 * XXX - note - this fails on crash dump, which does a
318ba41c3c1SPeter Grehan 	 * VIRTIO_BLK_T_FLUSH with a zero transfer length
319366f6083SPeter Grehan 	 */
32054b7bb76SAlexander Motin 	assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
321ba41c3c1SPeter Grehan 
322b0139127SKa Ho Ng 	io = &sc->vbsc_ios[req.idx];
323b0139127SKa Ho Ng 	assert(req.readable != 0);
324ba41c3c1SPeter Grehan 	assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
32522769bbeSAllan Jude 	vbh = (struct virtio_blk_hdr *)iov[0].iov_base;
326066a8f14SAlexander Motin 	memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
327066a8f14SAlexander Motin 	io->io_req.br_iovcnt = n - 2;
32822769bbeSAllan Jude 	io->io_req.br_offset = vbh->vbh_sector * VTBLK_BSIZE;
32922769bbeSAllan Jude 	io->io_status = (uint8_t *)iov[--n].iov_base;
330b0139127SKa Ho Ng 	assert(req.writable != 0);
331ba41c3c1SPeter Grehan 	assert(iov[n].iov_len == 1);
332366f6083SPeter Grehan 
33358a6b033SNeel Natu 	/*
33458a6b033SNeel Natu 	 * XXX
33558a6b033SNeel Natu 	 * The guest should not be setting the BARRIER flag because
33658a6b033SNeel Natu 	 * we don't advertise the capability.
33758a6b033SNeel Natu 	 */
33858a6b033SNeel Natu 	type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
33922769bbeSAllan Jude 	writeop = (type == VBH_OP_WRITE || type == VBH_OP_DISCARD);
340b0139127SKa Ho Ng 	/*
341b0139127SKa Ho Ng 	 * - Write op implies read-only descriptor
342b0139127SKa Ho Ng 	 * - Read/ident op implies write-only descriptor
343b0139127SKa Ho Ng 	 *
344b0139127SKa Ho Ng 	 * By taking away either the read-only fixed header or the write-only
345b0139127SKa Ho Ng 	 * status iovec, the following condition should hold true.
346b0139127SKa Ho Ng 	 */
347b0139127SKa Ho Ng 	assert(n == (writeop ? req.readable : req.writable));
348366f6083SPeter Grehan 
349ba41c3c1SPeter Grehan 	iolen = 0;
350ba41c3c1SPeter Grehan 	for (i = 1; i < n; i++) {
351ba41c3c1SPeter Grehan 		iolen += iov[i].iov_len;
352366f6083SPeter Grehan 	}
353bb1524afSAlexander Motin 	io->io_req.br_resid = iolen;
354366f6083SPeter Grehan 
355332eff95SVincenzo Maffione 	DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld",
35622769bbeSAllan Jude 		 writeop ? "write/discard" : "read/ident", iolen, i - 1,
35740dbeed3SPedro F. Giffuni 		 io->io_req.br_offset));
358366f6083SPeter Grehan 
35964945a9eSPeter Grehan 	switch (type) {
36064945a9eSPeter Grehan 	case VBH_OP_READ:
361066a8f14SAlexander Motin 		err = blockif_read(sc->bc, &io->io_req);
362066a8f14SAlexander Motin 		break;
363066a8f14SAlexander Motin 	case VBH_OP_WRITE:
364066a8f14SAlexander Motin 		err = blockif_write(sc->bc, &io->io_req);
365066a8f14SAlexander Motin 		break;
36622769bbeSAllan Jude 	case VBH_OP_DISCARD:
36722769bbeSAllan Jude 		/*
36822769bbeSAllan Jude 		 * We currently only support a single request, if the guest
36922769bbeSAllan Jude 		 * has submitted a request that doesn't conform to the
37022769bbeSAllan Jude 		 * requirements, we return a error.
37122769bbeSAllan Jude 		 */
37222769bbeSAllan Jude 		if (iov[1].iov_len != sizeof (*discard)) {
37322769bbeSAllan Jude 			pci_vtblk_done_locked(io, EINVAL);
37422769bbeSAllan Jude 			return;
37522769bbeSAllan Jude 		}
37622769bbeSAllan Jude 
37722769bbeSAllan Jude 		/* The segments to discard are provided rather than data */
37822769bbeSAllan Jude 		discard = (struct virtio_blk_discard_write_zeroes *)
37922769bbeSAllan Jude 		    iov[1].iov_base;
38022769bbeSAllan Jude 
38122769bbeSAllan Jude 		/*
38222769bbeSAllan Jude 		 * virtio v1.1 5.2.6.2:
38322769bbeSAllan Jude 		 * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP
38422769bbeSAllan Jude 		 * for discard and write zeroes commands if any unknown flag is
38522769bbeSAllan Jude 		 * set. Furthermore, the device MUST set the status byte to
38622769bbeSAllan Jude 		 * VIRTIO_BLK_S_UNSUPP for discard commands if the unmap flag
38722769bbeSAllan Jude 		 * is set.
38822769bbeSAllan Jude 		 *
38922769bbeSAllan Jude 		 * Currently there are no known flags for a DISCARD request.
39022769bbeSAllan Jude 		 */
39122769bbeSAllan Jude 		if (discard->flags.unmap != 0 || discard->flags.reserved != 0) {
39222769bbeSAllan Jude 			pci_vtblk_done_locked(io, ENOTSUP);
39322769bbeSAllan Jude 			return;
39422769bbeSAllan Jude 		}
39522769bbeSAllan Jude 
39622769bbeSAllan Jude 		/* Make sure the request doesn't exceed our size limit */
39722769bbeSAllan Jude 		if (discard->num_sectors > VTBLK_MAX_DISCARD_SECT) {
39822769bbeSAllan Jude 			pci_vtblk_done_locked(io, EINVAL);
39922769bbeSAllan Jude 			return;
40022769bbeSAllan Jude 		}
40122769bbeSAllan Jude 
40222769bbeSAllan Jude 		io->io_req.br_offset = discard->sector * VTBLK_BSIZE;
40322769bbeSAllan Jude 		io->io_req.br_resid = discard->num_sectors * VTBLK_BSIZE;
40422769bbeSAllan Jude 		err = blockif_delete(sc->bc, &io->io_req);
40522769bbeSAllan Jude 		break;
406066a8f14SAlexander Motin 	case VBH_OP_FLUSH:
407066a8f14SAlexander Motin 	case VBH_OP_FLUSH_OUT:
408066a8f14SAlexander Motin 		err = blockif_flush(sc->bc, &io->io_req);
40964945a9eSPeter Grehan 		break;
41064945a9eSPeter Grehan 	case VBH_OP_IDENT:
41164945a9eSPeter Grehan 		/* Assume a single buffer */
412811a355fSAlexander Motin 		/* S/n equal to buffer is not zero-terminated. */
413811a355fSAlexander Motin 		memset(iov[1].iov_base, 0, iov[1].iov_len);
414811a355fSAlexander Motin 		strncpy(iov[1].iov_base, sc->vbsc_ident,
41526cdcdbeSNeel Natu 		    MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
41622769bbeSAllan Jude 		pci_vtblk_done_locked(io, 0);
417066a8f14SAlexander Motin 		return;
41864945a9eSPeter Grehan 	default:
41922769bbeSAllan Jude 		pci_vtblk_done_locked(io, EOPNOTSUPP);
420066a8f14SAlexander Motin 		return;
42164945a9eSPeter Grehan 	}
422066a8f14SAlexander Motin 	assert(err == 0);
4233bf0823cSNeel Natu }
424366f6083SPeter Grehan 
4253bf0823cSNeel Natu static void
pci_vtblk_notify(void * vsc,struct vqueue_info * vq)426ba41c3c1SPeter Grehan pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
4273bf0823cSNeel Natu {
428ba41c3c1SPeter Grehan 	struct pci_vtblk_softc *sc = vsc;
4293bf0823cSNeel Natu 
430ba41c3c1SPeter Grehan 	while (vq_has_descs(vq))
431ba41c3c1SPeter Grehan 		pci_vtblk_proc(sc, vq);
432366f6083SPeter Grehan }
433366f6083SPeter Grehan 
4342349cda4SJohn Baldwin static void
pci_vtblk_resized(struct blockif_ctxt * bctxt __unused,void * arg,size_t new_size)43598d920d9SMark Johnston pci_vtblk_resized(struct blockif_ctxt *bctxt __unused, void *arg,
43698d920d9SMark Johnston     size_t new_size)
4372349cda4SJohn Baldwin {
4382349cda4SJohn Baldwin 	struct pci_vtblk_softc *sc;
4392349cda4SJohn Baldwin 
4402349cda4SJohn Baldwin 	sc = arg;
4412349cda4SJohn Baldwin 
4422349cda4SJohn Baldwin 	sc->vbsc_cfg.vbc_capacity = new_size / VTBLK_BSIZE; /* 512-byte units */
4432349cda4SJohn Baldwin 	vi_interrupt(&sc->vbsc_vs, VIRTIO_PCI_ISR_CONFIG,
4442349cda4SJohn Baldwin 	    sc->vbsc_vs.vs_msix_cfg_idx);
4452349cda4SJohn Baldwin }
4462349cda4SJohn Baldwin 
447366f6083SPeter Grehan static int
pci_vtblk_init(struct pci_devinst * pi,nvlist_t * nvl)4486a284cacSJohn Baldwin pci_vtblk_init(struct pci_devinst *pi, nvlist_t *nvl)
449366f6083SPeter Grehan {
4505d805962SJohn Baldwin 	char bident[sizeof("XXX:XXX")];
451066a8f14SAlexander Motin 	struct blockif_ctxt *bctxt;
452c6efcb12SJohn Baldwin 	const char *path, *serial;
45364945a9eSPeter Grehan 	MD5_CTX mdctx;
45464945a9eSPeter Grehan 	u_char digest[16];
455366f6083SPeter Grehan 	struct pci_vtblk_softc *sc;
456066a8f14SAlexander Motin 	off_t size;
457066a8f14SAlexander Motin 	int i, sectsz, sts, sto;
458366f6083SPeter Grehan 
459366f6083SPeter Grehan 	/*
460366f6083SPeter Grehan 	 * The supplied backing file has to exist
461366f6083SPeter Grehan 	 */
4625d805962SJohn Baldwin 	snprintf(bident, sizeof(bident), "%u:%u", pi->pi_slot, pi->pi_func);
463621b5090SJohn Baldwin 	bctxt = blockif_open(nvl, bident);
464066a8f14SAlexander Motin 	if (bctxt == NULL) {
465366f6083SPeter Grehan 		perror("Could not open backing file");
466366f6083SPeter Grehan 		return (1);
467366f6083SPeter Grehan 	}
468366f6083SPeter Grehan 
469*480bef94SCorvin Köhne 	if (blockif_add_boot_device(pi, bctxt)) {
470*480bef94SCorvin Köhne 		perror("Invalid boot device");
471*480bef94SCorvin Köhne 		return (1);
472*480bef94SCorvin Köhne 	}
473*480bef94SCorvin Köhne 
474066a8f14SAlexander Motin 	size = blockif_size(bctxt);
475066a8f14SAlexander Motin 	sectsz = blockif_sectsz(bctxt);
476066a8f14SAlexander Motin 	blockif_psectsz(bctxt, &sts, &sto);
4772e325b33SPeter Grehan 
478994f858aSXin LI 	sc = calloc(1, sizeof(struct pci_vtblk_softc));
479066a8f14SAlexander Motin 	sc->bc = bctxt;
480066a8f14SAlexander Motin 	for (i = 0; i < VTBLK_RINGSZ; i++) {
481066a8f14SAlexander Motin 		struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
482066a8f14SAlexander Motin 		io->io_req.br_callback = pci_vtblk_done;
483066a8f14SAlexander Motin 		io->io_req.br_param = io;
484066a8f14SAlexander Motin 		io->io_sc = sc;
485066a8f14SAlexander Motin 		io->io_idx = i;
486066a8f14SAlexander Motin 	}
487366f6083SPeter Grehan 
48822769bbeSAllan Jude 	bcopy(&vtblk_vi_consts, &sc->vbsc_consts, sizeof (vtblk_vi_consts));
48922769bbeSAllan Jude 	if (blockif_candelete(sc->bc))
49022769bbeSAllan Jude 		sc->vbsc_consts.vc_hv_caps |= VTBLK_F_DISCARD;
49122769bbeSAllan Jude 
4923cbf3585SJohn Baldwin 	pthread_mutex_init(&sc->vsc_mtx, NULL);
4933cbf3585SJohn Baldwin 
494ba41c3c1SPeter Grehan 	/* init virtio softc and virtqueues */
49522769bbeSAllan Jude 	vi_softc_linkup(&sc->vbsc_vs, &sc->vbsc_consts, sc, pi, &sc->vbsc_vq);
4963cbf3585SJohn Baldwin 	sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
4973cbf3585SJohn Baldwin 
498ba41c3c1SPeter Grehan 	sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
499ba41c3c1SPeter Grehan 	/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
500ba41c3c1SPeter Grehan 
50164945a9eSPeter Grehan 	/*
502c6efcb12SJohn Baldwin 	 * If an explicit identifier is not given, create an
503c6efcb12SJohn Baldwin 	 * identifier using parts of the md5 sum of the filename.
50464945a9eSPeter Grehan 	 */
505c6efcb12SJohn Baldwin 	bzero(sc->vbsc_ident, VTBLK_BLK_ID_BYTES);
506c6efcb12SJohn Baldwin 	if ((serial = get_config_value_node(nvl, "serial")) != NULL ||
507c6efcb12SJohn Baldwin 	    (serial = get_config_value_node(nvl, "ser")) != NULL) {
508c6efcb12SJohn Baldwin 		strlcpy(sc->vbsc_ident, serial, VTBLK_BLK_ID_BYTES);
509c6efcb12SJohn Baldwin 	} else {
510621b5090SJohn Baldwin 		path = get_config_value_node(nvl, "path");
51164945a9eSPeter Grehan 		MD5Init(&mdctx);
512621b5090SJohn Baldwin 		MD5Update(&mdctx, path, strlen(path));
51364945a9eSPeter Grehan 		MD5Final(digest, &mdctx);
514edce78c2SMarcelo Araujo 		snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES,
515edce78c2SMarcelo Araujo 		    "BHYVE-%02X%02X-%02X%02X-%02X%02X",
516c6efcb12SJohn Baldwin 		    digest[0], digest[1], digest[2], digest[3], digest[4],
517c6efcb12SJohn Baldwin 		    digest[5]);
518c6efcb12SJohn Baldwin 	}
51964945a9eSPeter Grehan 
520366f6083SPeter Grehan 	/* setup virtio block config space */
52122769bbeSAllan Jude 	sc->vbsc_cfg.vbc_capacity = size / VTBLK_BSIZE; /* 512-byte units */
522366f6083SPeter Grehan 	sc->vbsc_cfg.vbc_size_max = 0;	/* not negotiated */
5238c74ade8SJohn Baldwin 
5248c74ade8SJohn Baldwin 	/*
5258c74ade8SJohn Baldwin 	 * If Linux is presented with a seg_max greater than the virtio queue
5268c74ade8SJohn Baldwin 	 * size, it can stumble into situations where it violates its own
5278c74ade8SJohn Baldwin 	 * invariants and panics.  For safety, we keep seg_max clamped, paying
5288c74ade8SJohn Baldwin 	 * heed to the two extra descriptors needed for the header and status
5298c74ade8SJohn Baldwin 	 * of a request.
5308c74ade8SJohn Baldwin 	 */
5318c74ade8SJohn Baldwin 	sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
532297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_geometry.cylinders = 0;	/* no geometry */
533297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_geometry.heads = 0;
534297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_geometry.sectors = 0;
535297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_blk_size = sectsz;
536297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_topology.physical_block_exp =
537297c4868SAlexander Motin 	    (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
538297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_topology.alignment_offset =
539297c4868SAlexander Motin 	    (sto != 0) ? ((sts - sto) / sectsz) : 0;
540297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_topology.min_io_size = 0;
541297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
542297c4868SAlexander Motin 	sc->vbsc_cfg.vbc_writeback = 0;
54322769bbeSAllan Jude 	sc->vbsc_cfg.max_discard_sectors = VTBLK_MAX_DISCARD_SECT;
54422769bbeSAllan Jude 	sc->vbsc_cfg.max_discard_seg = VTBLK_MAX_DISCARD_SEG;
545cc3568c1SAllan Jude 	sc->vbsc_cfg.discard_sector_alignment = MAX(sectsz, sts) / VTBLK_BSIZE;
546366f6083SPeter Grehan 
547ba41c3c1SPeter Grehan 	/*
548ba41c3c1SPeter Grehan 	 * Should we move some of this into virtio.c?  Could
549ba41c3c1SPeter Grehan 	 * have the device, class, and subdev_0 as fields in
550ba41c3c1SPeter Grehan 	 * the virtio constants structure.
551ba41c3c1SPeter Grehan 	 */
552366f6083SPeter Grehan 	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
553366f6083SPeter Grehan 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
554366f6083SPeter Grehan 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
55554ac6f72SKa Ho Ng 	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_BLOCK);
556604b5210SPeter Grehan 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
557445e089eSNeel Natu 
558066a8f14SAlexander Motin 	if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
559066a8f14SAlexander Motin 		blockif_close(sc->bc);
560066a8f14SAlexander Motin 		free(sc);
561445e089eSNeel Natu 		return (1);
562066a8f14SAlexander Motin 	}
563ba41c3c1SPeter Grehan 	vi_set_io_bar(&sc->vbsc_vs, 0);
5642349cda4SJohn Baldwin 	blockif_register_resize_callback(sc->bc, pci_vtblk_resized, sc);
565366f6083SPeter Grehan 	return (0);
566366f6083SPeter Grehan }
567366f6083SPeter Grehan 
568ba41c3c1SPeter Grehan static int
pci_vtblk_cfgwrite(void * vsc __unused,int offset,int size __unused,uint32_t value __unused)56998d920d9SMark Johnston pci_vtblk_cfgwrite(void *vsc __unused, int offset, int size __unused,
57098d920d9SMark Johnston     uint32_t value __unused)
571445e089eSNeel Natu {
572ba41c3c1SPeter Grehan 
573332eff95SVincenzo Maffione 	DPRINTF(("vtblk: write to readonly reg %d", offset));
574ba41c3c1SPeter Grehan 	return (1);
575445e089eSNeel Natu }
576445e089eSNeel Natu 
577ba41c3c1SPeter Grehan static int
pci_vtblk_cfgread(void * vsc,int offset,int size,uint32_t * retval)578ba41c3c1SPeter Grehan pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
579366f6083SPeter Grehan {
580ba41c3c1SPeter Grehan 	struct pci_vtblk_softc *sc = vsc;
5816214e48cSPeter Grehan 	void *ptr;
582366f6083SPeter Grehan 
583ba41c3c1SPeter Grehan 	/* our caller has already verified offset and size */
584ba41c3c1SPeter Grehan 	ptr = (uint8_t *)&sc->vbsc_cfg + offset;
585ba41c3c1SPeter Grehan 	memcpy(retval, ptr, size);
586366f6083SPeter Grehan 	return (0);
587366f6083SPeter Grehan }
588366f6083SPeter Grehan 
58937045dfaSMark Johnston static const struct pci_devemu pci_de_vblk = {
590366f6083SPeter Grehan 	.pe_emu =	"virtio-blk",
591366f6083SPeter Grehan 	.pe_init =	pci_vtblk_init,
592621b5090SJohn Baldwin 	.pe_legacy_config = blockif_legacy_config,
593ba41c3c1SPeter Grehan 	.pe_barwrite =	vi_pci_write,
594483d953aSJohn Baldwin 	.pe_barread =	vi_pci_read,
595483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT
596483d953aSJohn Baldwin 	.pe_snapshot =	vi_pci_snapshot,
597a85bbbeaSVitaliy Gusev 	.pe_pause =     vi_pci_pause,
598a85bbbeaSVitaliy Gusev 	.pe_resume =    vi_pci_resume,
599483d953aSJohn Baldwin #endif
600366f6083SPeter Grehan };
601366f6083SPeter Grehan PCI_EMUL_SET(pci_de_vblk);
602