1366f6083SPeter Grehan /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
31de7b4b8SPedro F. Giffuni *
4366f6083SPeter Grehan * Copyright (c) 2011 NetApp, Inc.
5366f6083SPeter Grehan * All rights reserved.
62349cda4SJohn Baldwin * Copyright 2020-2021 Joyent, Inc.
7366f6083SPeter Grehan *
8366f6083SPeter Grehan * Redistribution and use in source and binary forms, with or without
9366f6083SPeter Grehan * modification, are permitted provided that the following conditions
10366f6083SPeter Grehan * are met:
11366f6083SPeter Grehan * 1. Redistributions of source code must retain the above copyright
12366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer.
13366f6083SPeter Grehan * 2. Redistributions in binary form must reproduce the above copyright
14366f6083SPeter Grehan * notice, this list of conditions and the following disclaimer in the
15366f6083SPeter Grehan * documentation and/or other materials provided with the distribution.
16366f6083SPeter Grehan *
17366f6083SPeter Grehan * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18366f6083SPeter Grehan * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19366f6083SPeter Grehan * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20366f6083SPeter Grehan * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21366f6083SPeter Grehan * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22366f6083SPeter Grehan * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23366f6083SPeter Grehan * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24366f6083SPeter Grehan * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25366f6083SPeter Grehan * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26366f6083SPeter Grehan * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27366f6083SPeter Grehan * SUCH DAMAGE.
28366f6083SPeter Grehan */
29366f6083SPeter Grehan
30366f6083SPeter Grehan #include <sys/param.h>
31366f6083SPeter Grehan #include <sys/linker_set.h>
32366f6083SPeter Grehan #include <sys/stat.h>
33366f6083SPeter Grehan #include <sys/uio.h>
34366f6083SPeter Grehan #include <sys/ioctl.h>
352e325b33SPeter Grehan #include <sys/disk.h>
36366f6083SPeter Grehan
37483d953aSJohn Baldwin #include <machine/vmm_snapshot.h>
38483d953aSJohn Baldwin
39366f6083SPeter Grehan #include <errno.h>
40366f6083SPeter Grehan #include <fcntl.h>
41366f6083SPeter Grehan #include <stdio.h>
42366f6083SPeter Grehan #include <stdlib.h>
43366f6083SPeter Grehan #include <stdint.h>
44366f6083SPeter Grehan #include <string.h>
45366f6083SPeter Grehan #include <strings.h>
46366f6083SPeter Grehan #include <unistd.h>
47366f6083SPeter Grehan #include <assert.h>
48366f6083SPeter Grehan #include <pthread.h>
4964945a9eSPeter Grehan #include <md5.h>
50366f6083SPeter Grehan
51e285ef8dSPeter Grehan #include "bhyverun.h"
52621b5090SJohn Baldwin #include "config.h"
53332eff95SVincenzo Maffione #include "debug.h"
54366f6083SPeter Grehan #include "pci_emul.h"
55366f6083SPeter Grehan #include "virtio.h"
56066a8f14SAlexander Motin #include "block_if.h"
57366f6083SPeter Grehan
5822769bbeSAllan Jude #define VTBLK_BSIZE 512
598c74ade8SJohn Baldwin #define VTBLK_RINGSZ 128
608c74ade8SJohn Baldwin
618c74ade8SJohn Baldwin _Static_assert(VTBLK_RINGSZ <= BLOCKIF_RING_MAX, "Each ring entry must be able to queue a request");
62366f6083SPeter Grehan
63366f6083SPeter Grehan #define VTBLK_S_OK 0
64366f6083SPeter Grehan #define VTBLK_S_IOERR 1
6564945a9eSPeter Grehan #define VTBLK_S_UNSUPP 2
6664945a9eSPeter Grehan
67edce78c2SMarcelo Araujo #define VTBLK_BLK_ID_BYTES 20 + 1
68366f6083SPeter Grehan
69fce0413bSPeter Grehan /* Capability bits */
7022769bbeSAllan Jude #define VTBLK_F_BARRIER (1 << 0) /* Does host support barriers? */
7122769bbeSAllan Jude #define VTBLK_F_SIZE_MAX (1 << 1) /* Indicates maximum segment size */
7222769bbeSAllan Jude #define VTBLK_F_SEG_MAX (1 << 2) /* Indicates maximum # of segments */
7322769bbeSAllan Jude #define VTBLK_F_GEOMETRY (1 << 4) /* Legacy geometry available */
7422769bbeSAllan Jude #define VTBLK_F_RO (1 << 5) /* Disk is read-only */
7522769bbeSAllan Jude #define VTBLK_F_BLK_SIZE (1 << 6) /* Block size of disk is available*/
7622769bbeSAllan Jude #define VTBLK_F_SCSI (1 << 7) /* Supports scsi command passthru */
7722769bbeSAllan Jude #define VTBLK_F_FLUSH (1 << 9) /* Writeback mode enabled after reset */
7822769bbeSAllan Jude #define VTBLK_F_WCE (1 << 9) /* Legacy alias for FLUSH */
7922769bbeSAllan Jude #define VTBLK_F_TOPOLOGY (1 << 10) /* Topology information is available */
8022769bbeSAllan Jude #define VTBLK_F_CONFIG_WCE (1 << 11) /* Writeback mode available in config */
8122769bbeSAllan Jude #define VTBLK_F_MQ (1 << 12) /* Multi-Queue */
8222769bbeSAllan Jude #define VTBLK_F_DISCARD (1 << 13) /* Trim blocks */
8322769bbeSAllan Jude #define VTBLK_F_WRITE_ZEROES (1 << 14) /* Write zeros */
84fce0413bSPeter Grehan
85366f6083SPeter Grehan /*
86366f6083SPeter Grehan * Host capabilities
87366f6083SPeter Grehan */
88366f6083SPeter Grehan #define VTBLK_S_HOSTCAPS \
89fce0413bSPeter Grehan ( VTBLK_F_SEG_MAX | \
90fce0413bSPeter Grehan VTBLK_F_BLK_SIZE | \
91cb5c7929SAlexander Motin VTBLK_F_FLUSH | \
92297c4868SAlexander Motin VTBLK_F_TOPOLOGY | \
93ba41c3c1SPeter Grehan VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */
94366f6083SPeter Grehan
95366f6083SPeter Grehan /*
9622769bbeSAllan Jude * The current blockif_delete() interface only allows a single delete
9722769bbeSAllan Jude * request at a time.
9822769bbeSAllan Jude */
9922769bbeSAllan Jude #define VTBLK_MAX_DISCARD_SEG 1
10022769bbeSAllan Jude
10122769bbeSAllan Jude /*
10222769bbeSAllan Jude * An arbitrary limit to prevent excessive latency due to large
10322769bbeSAllan Jude * delete requests.
10422769bbeSAllan Jude */
10522769bbeSAllan Jude #define VTBLK_MAX_DISCARD_SECT ((16 << 20) / VTBLK_BSIZE) /* 16 MiB */
10622769bbeSAllan Jude
10722769bbeSAllan Jude /*
108ba41c3c1SPeter Grehan * Config space "registers"
109366f6083SPeter Grehan */
110366f6083SPeter Grehan struct vtblk_config {
111366f6083SPeter Grehan uint64_t vbc_capacity;
112366f6083SPeter Grehan uint32_t vbc_size_max;
113366f6083SPeter Grehan uint32_t vbc_seg_max;
114297c4868SAlexander Motin struct {
115297c4868SAlexander Motin uint16_t cylinders;
116297c4868SAlexander Motin uint8_t heads;
117297c4868SAlexander Motin uint8_t sectors;
118297c4868SAlexander Motin } vbc_geometry;
119366f6083SPeter Grehan uint32_t vbc_blk_size;
120297c4868SAlexander Motin struct {
121297c4868SAlexander Motin uint8_t physical_block_exp;
122297c4868SAlexander Motin uint8_t alignment_offset;
123297c4868SAlexander Motin uint16_t min_io_size;
124297c4868SAlexander Motin uint32_t opt_io_size;
125297c4868SAlexander Motin } vbc_topology;
126297c4868SAlexander Motin uint8_t vbc_writeback;
12722769bbeSAllan Jude uint8_t unused0[1];
12822769bbeSAllan Jude uint16_t num_queues;
12922769bbeSAllan Jude uint32_t max_discard_sectors;
13022769bbeSAllan Jude uint32_t max_discard_seg;
13122769bbeSAllan Jude uint32_t discard_sector_alignment;
13222769bbeSAllan Jude uint32_t max_write_zeroes_sectors;
13322769bbeSAllan Jude uint32_t max_write_zeroes_seg;
13422769bbeSAllan Jude uint8_t write_zeroes_may_unmap;
13522769bbeSAllan Jude uint8_t unused1[3];
136366f6083SPeter Grehan } __packed;
137366f6083SPeter Grehan
138366f6083SPeter Grehan /*
139366f6083SPeter Grehan * Fixed-size block header
140366f6083SPeter Grehan */
141366f6083SPeter Grehan struct virtio_blk_hdr {
142366f6083SPeter Grehan #define VBH_OP_READ 0
143366f6083SPeter Grehan #define VBH_OP_WRITE 1
14422769bbeSAllan Jude #define VBH_OP_SCSI_CMD 2
14522769bbeSAllan Jude #define VBH_OP_SCSI_CMD_OUT 3
1468ccb28efSNeel Natu #define VBH_OP_FLUSH 4
1478ccb28efSNeel Natu #define VBH_OP_FLUSH_OUT 5
14864945a9eSPeter Grehan #define VBH_OP_IDENT 8
14922769bbeSAllan Jude #define VBH_OP_DISCARD 11
15022769bbeSAllan Jude #define VBH_OP_WRITE_ZEROES 13
15122769bbeSAllan Jude
15258a6b033SNeel Natu #define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
153366f6083SPeter Grehan uint32_t vbh_type;
154366f6083SPeter Grehan uint32_t vbh_ioprio;
155366f6083SPeter Grehan uint64_t vbh_sector;
156366f6083SPeter Grehan } __packed;
157366f6083SPeter Grehan
158366f6083SPeter Grehan /*
159366f6083SPeter Grehan * Debug printf
160366f6083SPeter Grehan */
161366f6083SPeter Grehan static int pci_vtblk_debug;
162332eff95SVincenzo Maffione #define DPRINTF(params) if (pci_vtblk_debug) PRINTLN params
163332eff95SVincenzo Maffione #define WPRINTF(params) PRINTLN params
164366f6083SPeter Grehan
165066a8f14SAlexander Motin struct pci_vtblk_ioreq {
166066a8f14SAlexander Motin struct blockif_req io_req;
167066a8f14SAlexander Motin struct pci_vtblk_softc *io_sc;
168066a8f14SAlexander Motin uint8_t *io_status;
169066a8f14SAlexander Motin uint16_t io_idx;
170066a8f14SAlexander Motin };
171066a8f14SAlexander Motin
17222769bbeSAllan Jude struct virtio_blk_discard_write_zeroes {
17322769bbeSAllan Jude uint64_t sector;
17422769bbeSAllan Jude uint32_t num_sectors;
17522769bbeSAllan Jude struct {
17622769bbeSAllan Jude uint32_t unmap:1;
17722769bbeSAllan Jude uint32_t reserved:31;
17822769bbeSAllan Jude } flags;
17922769bbeSAllan Jude };
18022769bbeSAllan Jude
181366f6083SPeter Grehan /*
182366f6083SPeter Grehan * Per-device softc
183366f6083SPeter Grehan */
184366f6083SPeter Grehan struct pci_vtblk_softc {
185ba41c3c1SPeter Grehan struct virtio_softc vbsc_vs;
1863cbf3585SJohn Baldwin pthread_mutex_t vsc_mtx;
187ba41c3c1SPeter Grehan struct vqueue_info vbsc_vq;
188366f6083SPeter Grehan struct vtblk_config vbsc_cfg;
18922769bbeSAllan Jude struct virtio_consts vbsc_consts;
190066a8f14SAlexander Motin struct blockif_ctxt *bc;
19164945a9eSPeter Grehan char vbsc_ident[VTBLK_BLK_ID_BYTES];
192066a8f14SAlexander Motin struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
193366f6083SPeter Grehan };
194366f6083SPeter Grehan
195ba41c3c1SPeter Grehan static void pci_vtblk_reset(void *);
196ba41c3c1SPeter Grehan static void pci_vtblk_notify(void *, struct vqueue_info *);
197ba41c3c1SPeter Grehan static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
198ba41c3c1SPeter Grehan static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
199483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT
200483d953aSJohn Baldwin static void pci_vtblk_pause(void *);
201483d953aSJohn Baldwin static void pci_vtblk_resume(void *);
202483d953aSJohn Baldwin static int pci_vtblk_snapshot(void *, struct vm_snapshot_meta *);
203483d953aSJohn Baldwin #endif
204445e089eSNeel Natu
205ba41c3c1SPeter Grehan static struct virtio_consts vtblk_vi_consts = {
2066cb26162SMark Johnston .vc_name = "vtblk",
2076cb26162SMark Johnston .vc_nvq = 1,
2086cb26162SMark Johnston .vc_cfgsize = sizeof(struct vtblk_config),
2096cb26162SMark Johnston .vc_reset = pci_vtblk_reset,
2106cb26162SMark Johnston .vc_qnotify = pci_vtblk_notify,
2116cb26162SMark Johnston .vc_cfgread = pci_vtblk_cfgread,
2126cb26162SMark Johnston .vc_cfgwrite = pci_vtblk_cfgwrite,
2136cb26162SMark Johnston .vc_apply_features = NULL,
2146cb26162SMark Johnston .vc_hv_caps = VTBLK_S_HOSTCAPS,
215483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT
2166cb26162SMark Johnston .vc_pause = pci_vtblk_pause,
2176cb26162SMark Johnston .vc_resume = pci_vtblk_resume,
2186cb26162SMark Johnston .vc_snapshot = pci_vtblk_snapshot,
219483d953aSJohn Baldwin #endif
220ba41c3c1SPeter Grehan };
221366f6083SPeter Grehan
222366f6083SPeter Grehan static void
pci_vtblk_reset(void * vsc)223ba41c3c1SPeter Grehan pci_vtblk_reset(void *vsc)
224366f6083SPeter Grehan {
225ba41c3c1SPeter Grehan struct pci_vtblk_softc *sc = vsc;
226ba41c3c1SPeter Grehan
227332eff95SVincenzo Maffione DPRINTF(("vtblk: device reset requested !"));
228ba41c3c1SPeter Grehan vi_reset_dev(&sc->vbsc_vs);
229366f6083SPeter Grehan }
230366f6083SPeter Grehan
231366f6083SPeter Grehan static void
pci_vtblk_done_locked(struct pci_vtblk_ioreq * io,int err)23222769bbeSAllan Jude pci_vtblk_done_locked(struct pci_vtblk_ioreq *io, int err)
233066a8f14SAlexander Motin {
234066a8f14SAlexander Motin struct pci_vtblk_softc *sc = io->io_sc;
235066a8f14SAlexander Motin
236066a8f14SAlexander Motin /* convert errno into a virtio block error return */
237066a8f14SAlexander Motin if (err == EOPNOTSUPP || err == ENOSYS)
238066a8f14SAlexander Motin *io->io_status = VTBLK_S_UNSUPP;
239066a8f14SAlexander Motin else if (err != 0)
240066a8f14SAlexander Motin *io->io_status = VTBLK_S_IOERR;
241066a8f14SAlexander Motin else
242066a8f14SAlexander Motin *io->io_status = VTBLK_S_OK;
243066a8f14SAlexander Motin
244066a8f14SAlexander Motin /*
245066a8f14SAlexander Motin * Return the descriptor back to the host.
246066a8f14SAlexander Motin * We wrote 1 byte (our status) to host.
247066a8f14SAlexander Motin */
248066a8f14SAlexander Motin vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
249066a8f14SAlexander Motin vq_endchains(&sc->vbsc_vq, 0);
25022769bbeSAllan Jude }
25122769bbeSAllan Jude
252483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT
253483d953aSJohn Baldwin static void
pci_vtblk_pause(void * vsc)254483d953aSJohn Baldwin pci_vtblk_pause(void *vsc)
255483d953aSJohn Baldwin {
256483d953aSJohn Baldwin struct pci_vtblk_softc *sc = vsc;
257483d953aSJohn Baldwin
258483d953aSJohn Baldwin DPRINTF(("vtblk: device pause requested !\n"));
259483d953aSJohn Baldwin blockif_pause(sc->bc);
260483d953aSJohn Baldwin }
261483d953aSJohn Baldwin
262483d953aSJohn Baldwin static void
pci_vtblk_resume(void * vsc)263483d953aSJohn Baldwin pci_vtblk_resume(void *vsc)
264483d953aSJohn Baldwin {
265483d953aSJohn Baldwin struct pci_vtblk_softc *sc = vsc;
266483d953aSJohn Baldwin
267483d953aSJohn Baldwin DPRINTF(("vtblk: device resume requested !\n"));
268483d953aSJohn Baldwin blockif_resume(sc->bc);
269483d953aSJohn Baldwin }
270483d953aSJohn Baldwin
271483d953aSJohn Baldwin static int
pci_vtblk_snapshot(void * vsc,struct vm_snapshot_meta * meta)272483d953aSJohn Baldwin pci_vtblk_snapshot(void *vsc, struct vm_snapshot_meta *meta)
273483d953aSJohn Baldwin {
274483d953aSJohn Baldwin int ret;
275483d953aSJohn Baldwin struct pci_vtblk_softc *sc = vsc;
276483d953aSJohn Baldwin
277483d953aSJohn Baldwin SNAPSHOT_VAR_OR_LEAVE(sc->vbsc_cfg, meta, ret, done);
278483d953aSJohn Baldwin SNAPSHOT_BUF_OR_LEAVE(sc->vbsc_ident, sizeof(sc->vbsc_ident),
279483d953aSJohn Baldwin meta, ret, done);
280483d953aSJohn Baldwin
281483d953aSJohn Baldwin done:
282483d953aSJohn Baldwin return (ret);
283483d953aSJohn Baldwin }
284483d953aSJohn Baldwin #endif
285483d953aSJohn Baldwin
28622769bbeSAllan Jude static void
pci_vtblk_done(struct blockif_req * br,int err)28722769bbeSAllan Jude pci_vtblk_done(struct blockif_req *br, int err)
28822769bbeSAllan Jude {
28922769bbeSAllan Jude struct pci_vtblk_ioreq *io = br->br_param;
29022769bbeSAllan Jude struct pci_vtblk_softc *sc = io->io_sc;
29122769bbeSAllan Jude
29222769bbeSAllan Jude pthread_mutex_lock(&sc->vsc_mtx);
29322769bbeSAllan Jude pci_vtblk_done_locked(io, err);
294066a8f14SAlexander Motin pthread_mutex_unlock(&sc->vsc_mtx);
295066a8f14SAlexander Motin }
296066a8f14SAlexander Motin
297066a8f14SAlexander Motin static void
pci_vtblk_proc(struct pci_vtblk_softc * sc,struct vqueue_info * vq)298ba41c3c1SPeter Grehan pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
299366f6083SPeter Grehan {
300366f6083SPeter Grehan struct virtio_blk_hdr *vbh;
301066a8f14SAlexander Motin struct pci_vtblk_ioreq *io;
302ba41c3c1SPeter Grehan int i, n;
303366f6083SPeter Grehan int err;
304bb1524afSAlexander Motin ssize_t iolen;
305ba41c3c1SPeter Grehan int writeop, type;
306b0139127SKa Ho Ng struct vi_req req;
30754b7bb76SAlexander Motin struct iovec iov[BLOCKIF_IOV_MAX + 2];
30822769bbeSAllan Jude struct virtio_blk_discard_write_zeroes *discard;
309366f6083SPeter Grehan
310b0139127SKa Ho Ng n = vq_getchain(vq, iov, BLOCKIF_IOV_MAX + 2, &req);
311366f6083SPeter Grehan
312366f6083SPeter Grehan /*
313ba41c3c1SPeter Grehan * The first descriptor will be the read-only fixed header,
314ba41c3c1SPeter Grehan * and the last is for status (hence +2 above and below).
315ba41c3c1SPeter Grehan * The remaining iov's are the actual data I/O vectors.
316ba41c3c1SPeter Grehan *
317ba41c3c1SPeter Grehan * XXX - note - this fails on crash dump, which does a
318ba41c3c1SPeter Grehan * VIRTIO_BLK_T_FLUSH with a zero transfer length
319366f6083SPeter Grehan */
32054b7bb76SAlexander Motin assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
321ba41c3c1SPeter Grehan
322b0139127SKa Ho Ng io = &sc->vbsc_ios[req.idx];
323b0139127SKa Ho Ng assert(req.readable != 0);
324ba41c3c1SPeter Grehan assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
32522769bbeSAllan Jude vbh = (struct virtio_blk_hdr *)iov[0].iov_base;
326066a8f14SAlexander Motin memcpy(&io->io_req.br_iov, &iov[1], sizeof(struct iovec) * (n - 2));
327066a8f14SAlexander Motin io->io_req.br_iovcnt = n - 2;
32822769bbeSAllan Jude io->io_req.br_offset = vbh->vbh_sector * VTBLK_BSIZE;
32922769bbeSAllan Jude io->io_status = (uint8_t *)iov[--n].iov_base;
330b0139127SKa Ho Ng assert(req.writable != 0);
331ba41c3c1SPeter Grehan assert(iov[n].iov_len == 1);
332366f6083SPeter Grehan
33358a6b033SNeel Natu /*
33458a6b033SNeel Natu * XXX
33558a6b033SNeel Natu * The guest should not be setting the BARRIER flag because
33658a6b033SNeel Natu * we don't advertise the capability.
33758a6b033SNeel Natu */
33858a6b033SNeel Natu type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
33922769bbeSAllan Jude writeop = (type == VBH_OP_WRITE || type == VBH_OP_DISCARD);
340b0139127SKa Ho Ng /*
341b0139127SKa Ho Ng * - Write op implies read-only descriptor
342b0139127SKa Ho Ng * - Read/ident op implies write-only descriptor
343b0139127SKa Ho Ng *
344b0139127SKa Ho Ng * By taking away either the read-only fixed header or the write-only
345b0139127SKa Ho Ng * status iovec, the following condition should hold true.
346b0139127SKa Ho Ng */
347b0139127SKa Ho Ng assert(n == (writeop ? req.readable : req.writable));
348366f6083SPeter Grehan
349ba41c3c1SPeter Grehan iolen = 0;
350ba41c3c1SPeter Grehan for (i = 1; i < n; i++) {
351ba41c3c1SPeter Grehan iolen += iov[i].iov_len;
352366f6083SPeter Grehan }
353bb1524afSAlexander Motin io->io_req.br_resid = iolen;
354366f6083SPeter Grehan
355332eff95SVincenzo Maffione DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld",
35622769bbeSAllan Jude writeop ? "write/discard" : "read/ident", iolen, i - 1,
35740dbeed3SPedro F. Giffuni io->io_req.br_offset));
358366f6083SPeter Grehan
35964945a9eSPeter Grehan switch (type) {
36064945a9eSPeter Grehan case VBH_OP_READ:
361066a8f14SAlexander Motin err = blockif_read(sc->bc, &io->io_req);
362066a8f14SAlexander Motin break;
363066a8f14SAlexander Motin case VBH_OP_WRITE:
364066a8f14SAlexander Motin err = blockif_write(sc->bc, &io->io_req);
365066a8f14SAlexander Motin break;
36622769bbeSAllan Jude case VBH_OP_DISCARD:
36722769bbeSAllan Jude /*
36822769bbeSAllan Jude * We currently only support a single request, if the guest
36922769bbeSAllan Jude * has submitted a request that doesn't conform to the
37022769bbeSAllan Jude * requirements, we return a error.
37122769bbeSAllan Jude */
37222769bbeSAllan Jude if (iov[1].iov_len != sizeof (*discard)) {
37322769bbeSAllan Jude pci_vtblk_done_locked(io, EINVAL);
37422769bbeSAllan Jude return;
37522769bbeSAllan Jude }
37622769bbeSAllan Jude
37722769bbeSAllan Jude /* The segments to discard are provided rather than data */
37822769bbeSAllan Jude discard = (struct virtio_blk_discard_write_zeroes *)
37922769bbeSAllan Jude iov[1].iov_base;
38022769bbeSAllan Jude
38122769bbeSAllan Jude /*
38222769bbeSAllan Jude * virtio v1.1 5.2.6.2:
38322769bbeSAllan Jude * The device MUST set the status byte to VIRTIO_BLK_S_UNSUPP
38422769bbeSAllan Jude * for discard and write zeroes commands if any unknown flag is
38522769bbeSAllan Jude * set. Furthermore, the device MUST set the status byte to
38622769bbeSAllan Jude * VIRTIO_BLK_S_UNSUPP for discard commands if the unmap flag
38722769bbeSAllan Jude * is set.
38822769bbeSAllan Jude *
38922769bbeSAllan Jude * Currently there are no known flags for a DISCARD request.
39022769bbeSAllan Jude */
39122769bbeSAllan Jude if (discard->flags.unmap != 0 || discard->flags.reserved != 0) {
39222769bbeSAllan Jude pci_vtblk_done_locked(io, ENOTSUP);
39322769bbeSAllan Jude return;
39422769bbeSAllan Jude }
39522769bbeSAllan Jude
39622769bbeSAllan Jude /* Make sure the request doesn't exceed our size limit */
39722769bbeSAllan Jude if (discard->num_sectors > VTBLK_MAX_DISCARD_SECT) {
39822769bbeSAllan Jude pci_vtblk_done_locked(io, EINVAL);
39922769bbeSAllan Jude return;
40022769bbeSAllan Jude }
40122769bbeSAllan Jude
40222769bbeSAllan Jude io->io_req.br_offset = discard->sector * VTBLK_BSIZE;
40322769bbeSAllan Jude io->io_req.br_resid = discard->num_sectors * VTBLK_BSIZE;
40422769bbeSAllan Jude err = blockif_delete(sc->bc, &io->io_req);
40522769bbeSAllan Jude break;
406066a8f14SAlexander Motin case VBH_OP_FLUSH:
407066a8f14SAlexander Motin case VBH_OP_FLUSH_OUT:
408066a8f14SAlexander Motin err = blockif_flush(sc->bc, &io->io_req);
40964945a9eSPeter Grehan break;
41064945a9eSPeter Grehan case VBH_OP_IDENT:
41164945a9eSPeter Grehan /* Assume a single buffer */
412811a355fSAlexander Motin /* S/n equal to buffer is not zero-terminated. */
413811a355fSAlexander Motin memset(iov[1].iov_base, 0, iov[1].iov_len);
414811a355fSAlexander Motin strncpy(iov[1].iov_base, sc->vbsc_ident,
41526cdcdbeSNeel Natu MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
41622769bbeSAllan Jude pci_vtblk_done_locked(io, 0);
417066a8f14SAlexander Motin return;
41864945a9eSPeter Grehan default:
41922769bbeSAllan Jude pci_vtblk_done_locked(io, EOPNOTSUPP);
420066a8f14SAlexander Motin return;
42164945a9eSPeter Grehan }
422066a8f14SAlexander Motin assert(err == 0);
4233bf0823cSNeel Natu }
424366f6083SPeter Grehan
4253bf0823cSNeel Natu static void
pci_vtblk_notify(void * vsc,struct vqueue_info * vq)426ba41c3c1SPeter Grehan pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
4273bf0823cSNeel Natu {
428ba41c3c1SPeter Grehan struct pci_vtblk_softc *sc = vsc;
4293bf0823cSNeel Natu
430ba41c3c1SPeter Grehan while (vq_has_descs(vq))
431ba41c3c1SPeter Grehan pci_vtblk_proc(sc, vq);
432366f6083SPeter Grehan }
433366f6083SPeter Grehan
4342349cda4SJohn Baldwin static void
pci_vtblk_resized(struct blockif_ctxt * bctxt __unused,void * arg,size_t new_size)43598d920d9SMark Johnston pci_vtblk_resized(struct blockif_ctxt *bctxt __unused, void *arg,
43698d920d9SMark Johnston size_t new_size)
4372349cda4SJohn Baldwin {
4382349cda4SJohn Baldwin struct pci_vtblk_softc *sc;
4392349cda4SJohn Baldwin
4402349cda4SJohn Baldwin sc = arg;
4412349cda4SJohn Baldwin
4422349cda4SJohn Baldwin sc->vbsc_cfg.vbc_capacity = new_size / VTBLK_BSIZE; /* 512-byte units */
4432349cda4SJohn Baldwin vi_interrupt(&sc->vbsc_vs, VIRTIO_PCI_ISR_CONFIG,
4442349cda4SJohn Baldwin sc->vbsc_vs.vs_msix_cfg_idx);
4452349cda4SJohn Baldwin }
4462349cda4SJohn Baldwin
447366f6083SPeter Grehan static int
pci_vtblk_init(struct pci_devinst * pi,nvlist_t * nvl)4486a284cacSJohn Baldwin pci_vtblk_init(struct pci_devinst *pi, nvlist_t *nvl)
449366f6083SPeter Grehan {
4505d805962SJohn Baldwin char bident[sizeof("XXX:XXX")];
451066a8f14SAlexander Motin struct blockif_ctxt *bctxt;
452c6efcb12SJohn Baldwin const char *path, *serial;
45364945a9eSPeter Grehan MD5_CTX mdctx;
45464945a9eSPeter Grehan u_char digest[16];
455366f6083SPeter Grehan struct pci_vtblk_softc *sc;
456066a8f14SAlexander Motin off_t size;
457066a8f14SAlexander Motin int i, sectsz, sts, sto;
458366f6083SPeter Grehan
459366f6083SPeter Grehan /*
460366f6083SPeter Grehan * The supplied backing file has to exist
461366f6083SPeter Grehan */
4625d805962SJohn Baldwin snprintf(bident, sizeof(bident), "%u:%u", pi->pi_slot, pi->pi_func);
463621b5090SJohn Baldwin bctxt = blockif_open(nvl, bident);
464066a8f14SAlexander Motin if (bctxt == NULL) {
465366f6083SPeter Grehan perror("Could not open backing file");
466366f6083SPeter Grehan return (1);
467366f6083SPeter Grehan }
468366f6083SPeter Grehan
469*480bef94SCorvin Köhne if (blockif_add_boot_device(pi, bctxt)) {
470*480bef94SCorvin Köhne perror("Invalid boot device");
471*480bef94SCorvin Köhne return (1);
472*480bef94SCorvin Köhne }
473*480bef94SCorvin Köhne
474066a8f14SAlexander Motin size = blockif_size(bctxt);
475066a8f14SAlexander Motin sectsz = blockif_sectsz(bctxt);
476066a8f14SAlexander Motin blockif_psectsz(bctxt, &sts, &sto);
4772e325b33SPeter Grehan
478994f858aSXin LI sc = calloc(1, sizeof(struct pci_vtblk_softc));
479066a8f14SAlexander Motin sc->bc = bctxt;
480066a8f14SAlexander Motin for (i = 0; i < VTBLK_RINGSZ; i++) {
481066a8f14SAlexander Motin struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
482066a8f14SAlexander Motin io->io_req.br_callback = pci_vtblk_done;
483066a8f14SAlexander Motin io->io_req.br_param = io;
484066a8f14SAlexander Motin io->io_sc = sc;
485066a8f14SAlexander Motin io->io_idx = i;
486066a8f14SAlexander Motin }
487366f6083SPeter Grehan
48822769bbeSAllan Jude bcopy(&vtblk_vi_consts, &sc->vbsc_consts, sizeof (vtblk_vi_consts));
48922769bbeSAllan Jude if (blockif_candelete(sc->bc))
49022769bbeSAllan Jude sc->vbsc_consts.vc_hv_caps |= VTBLK_F_DISCARD;
49122769bbeSAllan Jude
4923cbf3585SJohn Baldwin pthread_mutex_init(&sc->vsc_mtx, NULL);
4933cbf3585SJohn Baldwin
494ba41c3c1SPeter Grehan /* init virtio softc and virtqueues */
49522769bbeSAllan Jude vi_softc_linkup(&sc->vbsc_vs, &sc->vbsc_consts, sc, pi, &sc->vbsc_vq);
4963cbf3585SJohn Baldwin sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
4973cbf3585SJohn Baldwin
498ba41c3c1SPeter Grehan sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
499ba41c3c1SPeter Grehan /* sc->vbsc_vq.vq_notify = we have no per-queue notify */
500ba41c3c1SPeter Grehan
50164945a9eSPeter Grehan /*
502c6efcb12SJohn Baldwin * If an explicit identifier is not given, create an
503c6efcb12SJohn Baldwin * identifier using parts of the md5 sum of the filename.
50464945a9eSPeter Grehan */
505c6efcb12SJohn Baldwin bzero(sc->vbsc_ident, VTBLK_BLK_ID_BYTES);
506c6efcb12SJohn Baldwin if ((serial = get_config_value_node(nvl, "serial")) != NULL ||
507c6efcb12SJohn Baldwin (serial = get_config_value_node(nvl, "ser")) != NULL) {
508c6efcb12SJohn Baldwin strlcpy(sc->vbsc_ident, serial, VTBLK_BLK_ID_BYTES);
509c6efcb12SJohn Baldwin } else {
510621b5090SJohn Baldwin path = get_config_value_node(nvl, "path");
51164945a9eSPeter Grehan MD5Init(&mdctx);
512621b5090SJohn Baldwin MD5Update(&mdctx, path, strlen(path));
51364945a9eSPeter Grehan MD5Final(digest, &mdctx);
514edce78c2SMarcelo Araujo snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES,
515edce78c2SMarcelo Araujo "BHYVE-%02X%02X-%02X%02X-%02X%02X",
516c6efcb12SJohn Baldwin digest[0], digest[1], digest[2], digest[3], digest[4],
517c6efcb12SJohn Baldwin digest[5]);
518c6efcb12SJohn Baldwin }
51964945a9eSPeter Grehan
520366f6083SPeter Grehan /* setup virtio block config space */
52122769bbeSAllan Jude sc->vbsc_cfg.vbc_capacity = size / VTBLK_BSIZE; /* 512-byte units */
522366f6083SPeter Grehan sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
5238c74ade8SJohn Baldwin
5248c74ade8SJohn Baldwin /*
5258c74ade8SJohn Baldwin * If Linux is presented with a seg_max greater than the virtio queue
5268c74ade8SJohn Baldwin * size, it can stumble into situations where it violates its own
5278c74ade8SJohn Baldwin * invariants and panics. For safety, we keep seg_max clamped, paying
5288c74ade8SJohn Baldwin * heed to the two extra descriptors needed for the header and status
5298c74ade8SJohn Baldwin * of a request.
5308c74ade8SJohn Baldwin */
5318c74ade8SJohn Baldwin sc->vbsc_cfg.vbc_seg_max = MIN(VTBLK_RINGSZ - 2, BLOCKIF_IOV_MAX);
532297c4868SAlexander Motin sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */
533297c4868SAlexander Motin sc->vbsc_cfg.vbc_geometry.heads = 0;
534297c4868SAlexander Motin sc->vbsc_cfg.vbc_geometry.sectors = 0;
535297c4868SAlexander Motin sc->vbsc_cfg.vbc_blk_size = sectsz;
536297c4868SAlexander Motin sc->vbsc_cfg.vbc_topology.physical_block_exp =
537297c4868SAlexander Motin (sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0;
538297c4868SAlexander Motin sc->vbsc_cfg.vbc_topology.alignment_offset =
539297c4868SAlexander Motin (sto != 0) ? ((sts - sto) / sectsz) : 0;
540297c4868SAlexander Motin sc->vbsc_cfg.vbc_topology.min_io_size = 0;
541297c4868SAlexander Motin sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
542297c4868SAlexander Motin sc->vbsc_cfg.vbc_writeback = 0;
54322769bbeSAllan Jude sc->vbsc_cfg.max_discard_sectors = VTBLK_MAX_DISCARD_SECT;
54422769bbeSAllan Jude sc->vbsc_cfg.max_discard_seg = VTBLK_MAX_DISCARD_SEG;
545cc3568c1SAllan Jude sc->vbsc_cfg.discard_sector_alignment = MAX(sectsz, sts) / VTBLK_BSIZE;
546366f6083SPeter Grehan
547ba41c3c1SPeter Grehan /*
548ba41c3c1SPeter Grehan * Should we move some of this into virtio.c? Could
549ba41c3c1SPeter Grehan * have the device, class, and subdev_0 as fields in
550ba41c3c1SPeter Grehan * the virtio constants structure.
551ba41c3c1SPeter Grehan */
552366f6083SPeter Grehan pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
553366f6083SPeter Grehan pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
554366f6083SPeter Grehan pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
55554ac6f72SKa Ho Ng pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_BLOCK);
556604b5210SPeter Grehan pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
557445e089eSNeel Natu
558066a8f14SAlexander Motin if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
559066a8f14SAlexander Motin blockif_close(sc->bc);
560066a8f14SAlexander Motin free(sc);
561445e089eSNeel Natu return (1);
562066a8f14SAlexander Motin }
563ba41c3c1SPeter Grehan vi_set_io_bar(&sc->vbsc_vs, 0);
5642349cda4SJohn Baldwin blockif_register_resize_callback(sc->bc, pci_vtblk_resized, sc);
565366f6083SPeter Grehan return (0);
566366f6083SPeter Grehan }
567366f6083SPeter Grehan
568ba41c3c1SPeter Grehan static int
pci_vtblk_cfgwrite(void * vsc __unused,int offset,int size __unused,uint32_t value __unused)56998d920d9SMark Johnston pci_vtblk_cfgwrite(void *vsc __unused, int offset, int size __unused,
57098d920d9SMark Johnston uint32_t value __unused)
571445e089eSNeel Natu {
572ba41c3c1SPeter Grehan
573332eff95SVincenzo Maffione DPRINTF(("vtblk: write to readonly reg %d", offset));
574ba41c3c1SPeter Grehan return (1);
575445e089eSNeel Natu }
576445e089eSNeel Natu
577ba41c3c1SPeter Grehan static int
pci_vtblk_cfgread(void * vsc,int offset,int size,uint32_t * retval)578ba41c3c1SPeter Grehan pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
579366f6083SPeter Grehan {
580ba41c3c1SPeter Grehan struct pci_vtblk_softc *sc = vsc;
5816214e48cSPeter Grehan void *ptr;
582366f6083SPeter Grehan
583ba41c3c1SPeter Grehan /* our caller has already verified offset and size */
584ba41c3c1SPeter Grehan ptr = (uint8_t *)&sc->vbsc_cfg + offset;
585ba41c3c1SPeter Grehan memcpy(retval, ptr, size);
586366f6083SPeter Grehan return (0);
587366f6083SPeter Grehan }
588366f6083SPeter Grehan
58937045dfaSMark Johnston static const struct pci_devemu pci_de_vblk = {
590366f6083SPeter Grehan .pe_emu = "virtio-blk",
591366f6083SPeter Grehan .pe_init = pci_vtblk_init,
592621b5090SJohn Baldwin .pe_legacy_config = blockif_legacy_config,
593ba41c3c1SPeter Grehan .pe_barwrite = vi_pci_write,
594483d953aSJohn Baldwin .pe_barread = vi_pci_read,
595483d953aSJohn Baldwin #ifdef BHYVE_SNAPSHOT
596483d953aSJohn Baldwin .pe_snapshot = vi_pci_snapshot,
597a85bbbeaSVitaliy Gusev .pe_pause = vi_pci_pause,
598a85bbbeaSVitaliy Gusev .pe_resume = vi_pci_resume,
599483d953aSJohn Baldwin #endif
600366f6083SPeter Grehan };
601366f6083SPeter Grehan PCI_EMUL_SET(pci_de_vblk);
602