xref: /illumos-gate/usr/src/uts/common/io/vioblk/vioblk.c (revision 1a5ae140ba142cafb59ab08b3212c4ebbce84f32)
11c5bc425SAlexey Zaytsev /*
21c5bc425SAlexey Zaytsev  * CDDL HEADER START
31c5bc425SAlexey Zaytsev  *
41c5bc425SAlexey Zaytsev  * The contents of this file are subject to the terms of the
51c5bc425SAlexey Zaytsev  * Common Development and Distribution License (the "License").
61c5bc425SAlexey Zaytsev  * You may not use this file except in compliance with the License.
71c5bc425SAlexey Zaytsev  *
81c5bc425SAlexey Zaytsev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91c5bc425SAlexey Zaytsev  * or http://www.opensolaris.org/os/licensing.
101c5bc425SAlexey Zaytsev  * See the License for the specific language governing permissions
111c5bc425SAlexey Zaytsev  * and limitations under the License.
121c5bc425SAlexey Zaytsev  *
131c5bc425SAlexey Zaytsev  * When distributing Covered Code, include this CDDL HEADER in each
141c5bc425SAlexey Zaytsev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151c5bc425SAlexey Zaytsev  * If applicable, add the following below this CDDL HEADER, with the
161c5bc425SAlexey Zaytsev  * fields enclosed by brackets "[]" replaced with your own identifying
171c5bc425SAlexey Zaytsev  * information: Portions Copyright [yyyy] [name of copyright owner]
181c5bc425SAlexey Zaytsev  *
191c5bc425SAlexey Zaytsev  * CDDL HEADER END
201c5bc425SAlexey Zaytsev  */
211c5bc425SAlexey Zaytsev 
221c5bc425SAlexey Zaytsev /*
23510a6847SHans Rosenfeld  * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
241c5bc425SAlexey Zaytsev  * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
25*1a5ae140SJason King  * Copyright 2020 Joyent Inc.
264d95620bSPaul Winder  * Copyright 2019 Western Digital Corporation.
271c5bc425SAlexey Zaytsev  */
281c5bc425SAlexey Zaytsev 
29f8296c60SJoshua M. Clulow /*
30f8296c60SJoshua M. Clulow  * VIRTIO BLOCK DRIVER
31f8296c60SJoshua M. Clulow  *
32f8296c60SJoshua M. Clulow  * This driver provides support for Virtio Block devices.  Each driver instance
33f8296c60SJoshua M. Clulow  * attaches to a single underlying block device.
34f8296c60SJoshua M. Clulow  *
35f8296c60SJoshua M. Clulow  * REQUEST CHAIN LAYOUT
36f8296c60SJoshua M. Clulow  *
37f8296c60SJoshua M. Clulow  * Every request chain sent to the I/O queue has the following structure.  Each
38f8296c60SJoshua M. Clulow  * box in the diagram represents a descriptor entry (i.e., a DMA cookie) within
39f8296c60SJoshua M. Clulow  * the chain:
40f8296c60SJoshua M. Clulow  *
41f8296c60SJoshua M. Clulow  *    +-0-----------------------------------------+
42f8296c60SJoshua M. Clulow  *    | struct virtio_blk_hdr                     |-----------------------\
43f8296c60SJoshua M. Clulow  *    |   (written by driver, read by device)     |                       |
44f8296c60SJoshua M. Clulow  *    +-1-----------------------------------------+                       |
45f8296c60SJoshua M. Clulow  *    | optional data payload                     |--\                    |
46f8296c60SJoshua M. Clulow  *    |   (written by driver for write requests,  |  |                    |
47f8296c60SJoshua M. Clulow  *    |    or by device for read requests)        |  |                    |
48f8296c60SJoshua M. Clulow  *    +-2-----------------------------------------+  |                    |
49f8296c60SJoshua M. Clulow  *    | ,~`           :                              |-cookies loaned     |
50f8296c60SJoshua M. Clulow  *    |/              :                        ,~`|  | from blkdev        |
51f8296c60SJoshua M. Clulow  *                    :                       /   |  |                    |
52f8296c60SJoshua M. Clulow  *    +-(N - 1)-----------------------------------+  |                    |
53f8296c60SJoshua M. Clulow  *    | ... end of data payload.                  |  |                    |
54f8296c60SJoshua M. Clulow  *    |                                           |  |                    |
55f8296c60SJoshua M. Clulow  *    |                                           |--/                    |
56f8296c60SJoshua M. Clulow  *    +-N-----------------------------------------+                       |
57f8296c60SJoshua M. Clulow  *    | status byte                               |                       |
58f8296c60SJoshua M. Clulow  *    |   (written by device, read by driver)     |--------------------\  |
59f8296c60SJoshua M. Clulow  *    +-------------------------------------------+                    |  |
60f8296c60SJoshua M. Clulow  *                                                                     |  |
61f8296c60SJoshua M. Clulow  * The memory for the header and status bytes (i.e., 0 and N above)    |  |
62f8296c60SJoshua M. Clulow  * is allocated as a single chunk by vioblk_alloc_reqs():              |  |
63f8296c60SJoshua M. Clulow  *                                                                     |  |
64f8296c60SJoshua M. Clulow  *    +-------------------------------------------+                    |  |
65f8296c60SJoshua M. Clulow  *    | struct virtio_blk_hdr                     |<----------------------/
66f8296c60SJoshua M. Clulow  *    +-------------------------------------------+                    |
67f8296c60SJoshua M. Clulow  *    | status byte                               |<-------------------/
68f8296c60SJoshua M. Clulow  *    +-------------------------------------------+
69f8296c60SJoshua M. Clulow  */
701c5bc425SAlexey Zaytsev 
711c5bc425SAlexey Zaytsev #include <sys/modctl.h>
721c5bc425SAlexey Zaytsev #include <sys/blkdev.h>
731c5bc425SAlexey Zaytsev #include <sys/types.h>
741c5bc425SAlexey Zaytsev #include <sys/errno.h>
751c5bc425SAlexey Zaytsev #include <sys/param.h>
761c5bc425SAlexey Zaytsev #include <sys/stropts.h>
771c5bc425SAlexey Zaytsev #include <sys/stream.h>
781c5bc425SAlexey Zaytsev #include <sys/strsubr.h>
791c5bc425SAlexey Zaytsev #include <sys/kmem.h>
801c5bc425SAlexey Zaytsev #include <sys/conf.h>
811c5bc425SAlexey Zaytsev #include <sys/devops.h>
821c5bc425SAlexey Zaytsev #include <sys/ksynch.h>
831c5bc425SAlexey Zaytsev #include <sys/stat.h>
841c5bc425SAlexey Zaytsev #include <sys/modctl.h>
851c5bc425SAlexey Zaytsev #include <sys/debug.h>
861c5bc425SAlexey Zaytsev #include <sys/pci.h>
8794c3dad2SToomas Soome #include <sys/containerof.h>
88f8296c60SJoshua M. Clulow #include <sys/ctype.h>
89f8296c60SJoshua M. Clulow #include <sys/sysmacros.h>
90*1a5ae140SJason King #include <sys/dkioc_free_util.h>
911c5bc425SAlexey Zaytsev 
92f8296c60SJoshua M. Clulow #include "virtio.h"
93f8296c60SJoshua M. Clulow #include "vioblk.h"
941c5bc425SAlexey Zaytsev 
95f8296c60SJoshua M. Clulow static void vioblk_get_id(vioblk_t *);
96f8296c60SJoshua M. Clulow uint_t vioblk_int_handler(caddr_t, caddr_t);
97f8296c60SJoshua M. Clulow static uint_t vioblk_poll(vioblk_t *);
981c5bc425SAlexey Zaytsev static int vioblk_quiesce(dev_info_t *);
991c5bc425SAlexey Zaytsev static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
1001c5bc425SAlexey Zaytsev static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
1011c5bc425SAlexey Zaytsev 
102f8296c60SJoshua M. Clulow 
1031c5bc425SAlexey Zaytsev static struct dev_ops vioblk_dev_ops = {
104f8296c60SJoshua M. Clulow 	.devo_rev =			DEVO_REV,
105f8296c60SJoshua M. Clulow 	.devo_refcnt =			0,
106f8296c60SJoshua M. Clulow 
107f8296c60SJoshua M. Clulow 	.devo_attach =			vioblk_attach,
108f8296c60SJoshua M. Clulow 	.devo_detach =			vioblk_detach,
109f8296c60SJoshua M. Clulow 	.devo_quiesce =			vioblk_quiesce,
110f8296c60SJoshua M. Clulow 
111f8296c60SJoshua M. Clulow 	.devo_getinfo =			ddi_no_info,
112f8296c60SJoshua M. Clulow 	.devo_identify =		nulldev,
113f8296c60SJoshua M. Clulow 	.devo_probe =			nulldev,
114f8296c60SJoshua M. Clulow 	.devo_reset =			nodev,
115f8296c60SJoshua M. Clulow 	.devo_cb_ops =			NULL,
116f8296c60SJoshua M. Clulow 	.devo_bus_ops =			NULL,
117f8296c60SJoshua M. Clulow 	.devo_power =			NULL,
118f8296c60SJoshua M. Clulow };
119f8296c60SJoshua M. Clulow 
120f8296c60SJoshua M. Clulow static struct modldrv vioblk_modldrv = {
121f8296c60SJoshua M. Clulow 	.drv_modops =			&mod_driverops,
122f8296c60SJoshua M. Clulow 	.drv_linkinfo =			"VIRTIO block driver",
123f8296c60SJoshua M. Clulow 	.drv_dev_ops =			&vioblk_dev_ops
124f8296c60SJoshua M. Clulow };
125f8296c60SJoshua M. Clulow 
126f8296c60SJoshua M. Clulow static struct modlinkage vioblk_modlinkage = {
127f8296c60SJoshua M. Clulow 	.ml_rev =			MODREV_1,
128f8296c60SJoshua M. Clulow 	.ml_linkage =			{ &vioblk_modldrv, NULL }
129f8296c60SJoshua M. Clulow };
130f8296c60SJoshua M. Clulow 
131f8296c60SJoshua M. Clulow /*
132f8296c60SJoshua M. Clulow  * DMA attribute template for header and status blocks.  We also make a
133f8296c60SJoshua M. Clulow  * per-instance copy of this template with negotiated sizes from the device for
134f8296c60SJoshua M. Clulow  * blkdev.
135f8296c60SJoshua M. Clulow  */
136f8296c60SJoshua M. Clulow static const ddi_dma_attr_t vioblk_dma_attr = {
137f8296c60SJoshua M. Clulow 	.dma_attr_version =		DMA_ATTR_V0,
138f8296c60SJoshua M. Clulow 	.dma_attr_addr_lo =		0x0000000000000000,
139f8296c60SJoshua M. Clulow 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
140f8296c60SJoshua M. Clulow 	.dma_attr_count_max =		0x00000000FFFFFFFF,
141f8296c60SJoshua M. Clulow 	.dma_attr_align =		1,
142f8296c60SJoshua M. Clulow 	.dma_attr_burstsizes =		1,
143f8296c60SJoshua M. Clulow 	.dma_attr_minxfer =		1,
144f8296c60SJoshua M. Clulow 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
145f8296c60SJoshua M. Clulow 	.dma_attr_seg =			0x00000000FFFFFFFF,
146f8296c60SJoshua M. Clulow 	.dma_attr_sgllen =		1,
147f8296c60SJoshua M. Clulow 	.dma_attr_granular =		1,
148f8296c60SJoshua M. Clulow 	.dma_attr_flags =		0
1491c5bc425SAlexey Zaytsev };
1501c5bc425SAlexey Zaytsev 
151f8296c60SJoshua M. Clulow static vioblk_req_t *
152f8296c60SJoshua M. Clulow vioblk_req_alloc(vioblk_t *vib)
1531c5bc425SAlexey Zaytsev {
154f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr;
1551c5bc425SAlexey Zaytsev 
156f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
1571c5bc425SAlexey Zaytsev 
158f8296c60SJoshua M. Clulow 	if ((vbr = list_remove_head(&vib->vib_reqs)) == NULL) {
159f8296c60SJoshua M. Clulow 		return (NULL);
160f8296c60SJoshua M. Clulow 	}
161f8296c60SJoshua M. Clulow 	vib->vib_nreqs_alloc++;
1621c5bc425SAlexey Zaytsev 
163f8296c60SJoshua M. Clulow 	VERIFY0(vbr->vbr_status);
164f8296c60SJoshua M. Clulow 	vbr->vbr_status |= VIOBLK_REQSTAT_ALLOCATED;
165f8296c60SJoshua M. Clulow 
166f8296c60SJoshua M. Clulow 	VERIFY3P(vbr->vbr_xfer, ==, NULL);
167f8296c60SJoshua M. Clulow 	VERIFY3S(vbr->vbr_error, ==, 0);
168f8296c60SJoshua M. Clulow 
169f8296c60SJoshua M. Clulow 	return (vbr);
170f8296c60SJoshua M. Clulow }
171f8296c60SJoshua M. Clulow 
172f8296c60SJoshua M. Clulow static void
173f8296c60SJoshua M. Clulow vioblk_req_free(vioblk_t *vib, vioblk_req_t *vbr)
174f8296c60SJoshua M. Clulow {
175f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
176f8296c60SJoshua M. Clulow 
177f8296c60SJoshua M. Clulow 	/*
178f8296c60SJoshua M. Clulow 	 * Check that this request was allocated, then zero the status field to
179f8296c60SJoshua M. Clulow 	 * clear all status bits.
180f8296c60SJoshua M. Clulow 	 */
181f8296c60SJoshua M. Clulow 	VERIFY(vbr->vbr_status & VIOBLK_REQSTAT_ALLOCATED);
182f8296c60SJoshua M. Clulow 	vbr->vbr_status = 0;
183f8296c60SJoshua M. Clulow 
184f8296c60SJoshua M. Clulow 	vbr->vbr_xfer = NULL;
185f8296c60SJoshua M. Clulow 	vbr->vbr_error = 0;
186f8296c60SJoshua M. Clulow 	vbr->vbr_type = 0;
187f8296c60SJoshua M. Clulow 
188f8296c60SJoshua M. Clulow 	list_insert_head(&vib->vib_reqs, vbr);
189f8296c60SJoshua M. Clulow 
190f8296c60SJoshua M. Clulow 	VERIFY3U(vib->vib_nreqs_alloc, >, 0);
191f8296c60SJoshua M. Clulow 	vib->vib_nreqs_alloc--;
192f8296c60SJoshua M. Clulow }
193f8296c60SJoshua M. Clulow 
194f8296c60SJoshua M. Clulow static void
195f8296c60SJoshua M. Clulow vioblk_complete(vioblk_t *vib, vioblk_req_t *vbr)
196f8296c60SJoshua M. Clulow {
197f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
198f8296c60SJoshua M. Clulow 
199f8296c60SJoshua M. Clulow 	VERIFY(!(vbr->vbr_status & VIOBLK_REQSTAT_COMPLETE));
200f8296c60SJoshua M. Clulow 	vbr->vbr_status |= VIOBLK_REQSTAT_COMPLETE;
201f8296c60SJoshua M. Clulow 
202f8296c60SJoshua M. Clulow 	if (vbr->vbr_type == VIRTIO_BLK_T_FLUSH) {
203f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_cacheflush.value.ui64++;
204f8296c60SJoshua M. Clulow 	}
205f8296c60SJoshua M. Clulow 
206f8296c60SJoshua M. Clulow 	if (vbr->vbr_xfer != NULL) {
207f8296c60SJoshua M. Clulow 		/*
208f8296c60SJoshua M. Clulow 		 * This is a blkdev framework request.
209f8296c60SJoshua M. Clulow 		 */
210f8296c60SJoshua M. Clulow 		mutex_exit(&vib->vib_mutex);
211f8296c60SJoshua M. Clulow 		bd_xfer_done(vbr->vbr_xfer, vbr->vbr_error);
212f8296c60SJoshua M. Clulow 		mutex_enter(&vib->vib_mutex);
213f8296c60SJoshua M. Clulow 		vbr->vbr_xfer = NULL;
214f8296c60SJoshua M. Clulow 	}
215f8296c60SJoshua M. Clulow }
216f8296c60SJoshua M. Clulow 
217f8296c60SJoshua M. Clulow static virtio_chain_t *
218f8296c60SJoshua M. Clulow vioblk_common_start(vioblk_t *vib, int type, uint64_t sector,
219f8296c60SJoshua M. Clulow     boolean_t polled)
220f8296c60SJoshua M. Clulow {
221f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr = NULL;
222f8296c60SJoshua M. Clulow 	virtio_chain_t *vic = NULL;
223f8296c60SJoshua M. Clulow 
224f8296c60SJoshua M. Clulow 	if ((vbr = vioblk_req_alloc(vib)) == NULL) {
225f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_outofmemory.value.ui64++;
226f8296c60SJoshua M. Clulow 		return (NULL);
227f8296c60SJoshua M. Clulow 	}
228f8296c60SJoshua M. Clulow 	vbr->vbr_type = type;
229f8296c60SJoshua M. Clulow 
230f8296c60SJoshua M. Clulow 	if (polled) {
231f8296c60SJoshua M. Clulow 		/*
232f8296c60SJoshua M. Clulow 		 * Mark this command as polled so that we can wait on it
233f8296c60SJoshua M. Clulow 		 * ourselves.
234f8296c60SJoshua M. Clulow 		 */
235f8296c60SJoshua M. Clulow 		vbr->vbr_status |= VIOBLK_REQSTAT_POLLED;
236f8296c60SJoshua M. Clulow 	}
237f8296c60SJoshua M. Clulow 
238f8296c60SJoshua M. Clulow 	if ((vic = virtio_chain_alloc(vib->vib_vq, KM_NOSLEEP)) == NULL) {
239f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_outofmemory.value.ui64++;
240f8296c60SJoshua M. Clulow 		goto fail;
241f8296c60SJoshua M. Clulow 	}
242f8296c60SJoshua M. Clulow 
243f8296c60SJoshua M. Clulow 	struct vioblk_req_hdr vbh;
244f8296c60SJoshua M. Clulow 	vbh.vbh_type = type;
245f8296c60SJoshua M. Clulow 	vbh.vbh_ioprio = 0;
246c5c712a8SToomas Soome 	vbh.vbh_sector = (sector * vib->vib_blk_size) / DEV_BSIZE;
247f8296c60SJoshua M. Clulow 	bcopy(&vbh, virtio_dma_va(vbr->vbr_dma, 0), sizeof (vbh));
248f8296c60SJoshua M. Clulow 
249f8296c60SJoshua M. Clulow 	virtio_chain_data_set(vic, vbr);
250f8296c60SJoshua M. Clulow 
251f8296c60SJoshua M. Clulow 	/*
252f8296c60SJoshua M. Clulow 	 * Put the header in the first descriptor.  See the block comment at
253f8296c60SJoshua M. Clulow 	 * the top of the file for more details on the chain layout.
254f8296c60SJoshua M. Clulow 	 */
255f8296c60SJoshua M. Clulow 	if (virtio_chain_append(vic, virtio_dma_cookie_pa(vbr->vbr_dma, 0),
256f8296c60SJoshua M. Clulow 	    sizeof (struct vioblk_req_hdr), VIRTIO_DIR_DEVICE_READS) !=
257f8296c60SJoshua M. Clulow 	    DDI_SUCCESS) {
258f8296c60SJoshua M. Clulow 		goto fail;
259f8296c60SJoshua M. Clulow 	}
260f8296c60SJoshua M. Clulow 
261f8296c60SJoshua M. Clulow 	return (vic);
262f8296c60SJoshua M. Clulow 
263f8296c60SJoshua M. Clulow fail:
264f8296c60SJoshua M. Clulow 	vbr->vbr_xfer = NULL;
265f8296c60SJoshua M. Clulow 	vioblk_req_free(vib, vbr);
266f8296c60SJoshua M. Clulow 	if (vic != NULL) {
267f8296c60SJoshua M. Clulow 		virtio_chain_free(vic);
268f8296c60SJoshua M. Clulow 	}
269f8296c60SJoshua M. Clulow 	return (NULL);
270f8296c60SJoshua M. Clulow }
2711c5bc425SAlexey Zaytsev 
2721c5bc425SAlexey Zaytsev static int
273f8296c60SJoshua M. Clulow vioblk_common_submit(vioblk_t *vib, virtio_chain_t *vic)
2741c5bc425SAlexey Zaytsev {
275f8296c60SJoshua M. Clulow 	int r;
276f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr = virtio_chain_data(vic);
2771c5bc425SAlexey Zaytsev 
278f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
2791c5bc425SAlexey Zaytsev 
280f8296c60SJoshua M. Clulow 	/*
281f8296c60SJoshua M. Clulow 	 * The device will write the status byte into this last descriptor.
282f8296c60SJoshua M. Clulow 	 * See the block comment at the top of the file for more details on the
283f8296c60SJoshua M. Clulow 	 * chain layout.
284f8296c60SJoshua M. Clulow 	 */
285f8296c60SJoshua M. Clulow 	if (virtio_chain_append(vic, virtio_dma_cookie_pa(vbr->vbr_dma, 0) +
286f8296c60SJoshua M. Clulow 	    sizeof (struct vioblk_req_hdr), sizeof (uint8_t),
287f8296c60SJoshua M. Clulow 	    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
288f8296c60SJoshua M. Clulow 		r = ENOMEM;
289f8296c60SJoshua M. Clulow 		goto out;
2901c5bc425SAlexey Zaytsev 	}
2911c5bc425SAlexey Zaytsev 
292f8296c60SJoshua M. Clulow 	virtio_dma_sync(vbr->vbr_dma, DDI_DMA_SYNC_FORDEV);
293f8296c60SJoshua M. Clulow 	virtio_chain_submit(vic, B_TRUE);
2941c5bc425SAlexey Zaytsev 
295f8296c60SJoshua M. Clulow 	if (!(vbr->vbr_status & VIOBLK_REQSTAT_POLLED)) {
296f8296c60SJoshua M. Clulow 		/*
297f8296c60SJoshua M. Clulow 		 * This is not a polled request.  Our request will be freed and
298f8296c60SJoshua M. Clulow 		 * the caller notified later in vioblk_poll().
299f8296c60SJoshua M. Clulow 		 */
300f8296c60SJoshua M. Clulow 		return (0);
3011c5bc425SAlexey Zaytsev 	}
3021c5bc425SAlexey Zaytsev 
3031c5bc425SAlexey Zaytsev 	/*
304f8296c60SJoshua M. Clulow 	 * This is a polled request.  We need to block here and wait for the
305f8296c60SJoshua M. Clulow 	 * device to complete request processing.
3061c5bc425SAlexey Zaytsev 	 */
307f8296c60SJoshua M. Clulow 	while (!(vbr->vbr_status & VIOBLK_REQSTAT_POLL_COMPLETE)) {
308f8296c60SJoshua M. Clulow 		if (ddi_in_panic()) {
309f8296c60SJoshua M. Clulow 			/*
310f8296c60SJoshua M. Clulow 			 * When panicking, interrupts are disabled.  We must
311f8296c60SJoshua M. Clulow 			 * poll the queue manually.
312f8296c60SJoshua M. Clulow 			 */
3131c5bc425SAlexey Zaytsev 			drv_usecwait(10);
314f8296c60SJoshua M. Clulow 			(void) vioblk_poll(vib);
315f8296c60SJoshua M. Clulow 			continue;
3161c5bc425SAlexey Zaytsev 		}
3171c5bc425SAlexey Zaytsev 
318f8296c60SJoshua M. Clulow 		/*
319f8296c60SJoshua M. Clulow 		 * When not panicking, the device will interrupt on command
320f8296c60SJoshua M. Clulow 		 * completion and vioblk_poll() will be called to wake us up.
321f8296c60SJoshua M. Clulow 		 */
322f8296c60SJoshua M. Clulow 		cv_wait(&vib->vib_cv, &vib->vib_mutex);
3231c5bc425SAlexey Zaytsev 	}
3241c5bc425SAlexey Zaytsev 
325f8296c60SJoshua M. Clulow 	vioblk_complete(vib, vbr);
326f8296c60SJoshua M. Clulow 	r = vbr->vbr_error;
327f8296c60SJoshua M. Clulow 
328f8296c60SJoshua M. Clulow out:
329f8296c60SJoshua M. Clulow 	vioblk_req_free(vib, vbr);
330f8296c60SJoshua M. Clulow 	virtio_chain_free(vic);
331f8296c60SJoshua M. Clulow 	return (r);
3321c5bc425SAlexey Zaytsev }
3331c5bc425SAlexey Zaytsev 
3341c5bc425SAlexey Zaytsev static int
335f8296c60SJoshua M. Clulow vioblk_internal(vioblk_t *vib, int type, virtio_dma_t *dma,
336f8296c60SJoshua M. Clulow     uint64_t sector, virtio_direction_t dir)
3371c5bc425SAlexey Zaytsev {
338f8296c60SJoshua M. Clulow 	virtio_chain_t *vic;
339f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr;
340f8296c60SJoshua M. Clulow 	int r;
3411c5bc425SAlexey Zaytsev 
342f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
343f8296c60SJoshua M. Clulow 
344f8296c60SJoshua M. Clulow 	/*
345f8296c60SJoshua M. Clulow 	 * Allocate a polled request.
346f8296c60SJoshua M. Clulow 	 */
347f8296c60SJoshua M. Clulow 	if ((vic = vioblk_common_start(vib, type, sector, B_TRUE)) == NULL) {
348f8296c60SJoshua M. Clulow 		return (ENOMEM);
349f8296c60SJoshua M. Clulow 	}
350f8296c60SJoshua M. Clulow 	vbr = virtio_chain_data(vic);
351f8296c60SJoshua M. Clulow 
352f8296c60SJoshua M. Clulow 	/*
353f8296c60SJoshua M. Clulow 	 * If there is a request payload, it goes between the header and the
354f8296c60SJoshua M. Clulow 	 * status byte.  See the block comment at the top of the file for more
355f8296c60SJoshua M. Clulow 	 * detail on the chain layout.
356f8296c60SJoshua M. Clulow 	 */
357f8296c60SJoshua M. Clulow 	if (dma != NULL) {
358f8296c60SJoshua M. Clulow 		for (uint_t n = 0; n < virtio_dma_ncookies(dma); n++) {
359f8296c60SJoshua M. Clulow 			if (virtio_chain_append(vic,
360f8296c60SJoshua M. Clulow 			    virtio_dma_cookie_pa(dma, n),
361f8296c60SJoshua M. Clulow 			    virtio_dma_cookie_size(dma, n), dir) !=
362f8296c60SJoshua M. Clulow 			    DDI_SUCCESS) {
363f8296c60SJoshua M. Clulow 				r = ENOMEM;
364f8296c60SJoshua M. Clulow 				goto out;
365f8296c60SJoshua M. Clulow 			}
366f8296c60SJoshua M. Clulow 		}
3671c5bc425SAlexey Zaytsev 	}
3681c5bc425SAlexey Zaytsev 
369f8296c60SJoshua M. Clulow 	return (vioblk_common_submit(vib, vic));
370f8296c60SJoshua M. Clulow 
371f8296c60SJoshua M. Clulow out:
372f8296c60SJoshua M. Clulow 	vioblk_req_free(vib, vbr);
373f8296c60SJoshua M. Clulow 	virtio_chain_free(vic);
374f8296c60SJoshua M. Clulow 	return (r);
375f8296c60SJoshua M. Clulow }
376f8296c60SJoshua M. Clulow 
377f8296c60SJoshua M. Clulow static int
378*1a5ae140SJason King vioblk_map_discard(vioblk_t *vib, virtio_chain_t *vic, const bd_xfer_t *xfer)
379*1a5ae140SJason King {
380*1a5ae140SJason King 	const dkioc_free_list_t *dfl = xfer->x_dfl;
381*1a5ae140SJason King 	const dkioc_free_list_ext_t *exts = dfl->dfl_exts;
382*1a5ae140SJason King 	virtio_dma_t *dma = NULL;
383*1a5ae140SJason King 	struct vioblk_discard_write_zeroes *wzp = NULL;
384*1a5ae140SJason King 
385*1a5ae140SJason King 	dma = virtio_dma_alloc(vib->vib_virtio,
386*1a5ae140SJason King 	    dfl->dfl_num_exts * sizeof (*wzp), &vioblk_dma_attr,
387*1a5ae140SJason King 	    DDI_DMA_CONSISTENT | DDI_DMA_WRITE, KM_SLEEP);
388*1a5ae140SJason King 	if (dma == NULL)
389*1a5ae140SJason King 		return (ENOMEM);
390*1a5ae140SJason King 
391*1a5ae140SJason King 	wzp = virtio_dma_va(dma, 0);
392*1a5ae140SJason King 
393*1a5ae140SJason King 	for (uint64_t i = 0; i < dfl->dfl_num_exts; i++, exts++, wzp++) {
394*1a5ae140SJason King 		uint64_t start = dfl->dfl_offset + exts->dfle_start;
395*1a5ae140SJason King 
396*1a5ae140SJason King 		const struct vioblk_discard_write_zeroes vdwz = {
397*1a5ae140SJason King 			.vdwz_sector = start >> DEV_BSHIFT,
398*1a5ae140SJason King 			.vdwz_num_sectors = exts->dfle_length >> DEV_BSHIFT,
399*1a5ae140SJason King 			.vdwz_flags = 0
400*1a5ae140SJason King 		};
401*1a5ae140SJason King 
402*1a5ae140SJason King 		bcopy(&vdwz, wzp, sizeof (*wzp));
403*1a5ae140SJason King 	}
404*1a5ae140SJason King 
405*1a5ae140SJason King 	if (virtio_chain_append(vic,
406*1a5ae140SJason King 	    virtio_dma_cookie_pa(dma, 0),
407*1a5ae140SJason King 	    virtio_dma_cookie_size(dma, 0),
408*1a5ae140SJason King 	    VIRTIO_DIR_DEVICE_READS) != DDI_SUCCESS) {
409*1a5ae140SJason King 		virtio_dma_free(dma);
410*1a5ae140SJason King 		return (ENOMEM);
411*1a5ae140SJason King 	}
412*1a5ae140SJason King 
413*1a5ae140SJason King 	return (0);
414*1a5ae140SJason King }
415*1a5ae140SJason King 
416*1a5ae140SJason King static int
417f8296c60SJoshua M. Clulow vioblk_request(vioblk_t *vib, bd_xfer_t *xfer, int type)
418f8296c60SJoshua M. Clulow {
419f8296c60SJoshua M. Clulow 	virtio_chain_t *vic = NULL;
420f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr = NULL;
421f8296c60SJoshua M. Clulow 	uint_t total_cookies = 2;
422f8296c60SJoshua M. Clulow 	boolean_t polled = (xfer->x_flags & BD_XFER_POLL) != 0;
423f8296c60SJoshua M. Clulow 	int r;
424f8296c60SJoshua M. Clulow 
425f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
426f8296c60SJoshua M. Clulow 
427f8296c60SJoshua M. Clulow 	/*
428f8296c60SJoshua M. Clulow 	 * Ensure that this request falls within the advertised size of the
429f8296c60SJoshua M. Clulow 	 * block device.  Be careful to avoid overflow.
430f8296c60SJoshua M. Clulow 	 */
431f8296c60SJoshua M. Clulow 	if (xfer->x_nblks > SIZE_MAX - xfer->x_blkno ||
432f8296c60SJoshua M. Clulow 	    (xfer->x_blkno + xfer->x_nblks) > vib->vib_nblks) {
433f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_badoffset.value.ui64++;
434f8296c60SJoshua M. Clulow 		return (EINVAL);
435f8296c60SJoshua M. Clulow 	}
436f8296c60SJoshua M. Clulow 
437f8296c60SJoshua M. Clulow 	if ((vic = vioblk_common_start(vib, type, xfer->x_blkno, polled)) ==
438f8296c60SJoshua M. Clulow 	    NULL) {
439f8296c60SJoshua M. Clulow 		return (ENOMEM);
440f8296c60SJoshua M. Clulow 	}
441f8296c60SJoshua M. Clulow 	vbr = virtio_chain_data(vic);
442f8296c60SJoshua M. Clulow 	vbr->vbr_xfer = xfer;
443f8296c60SJoshua M. Clulow 
444f8296c60SJoshua M. Clulow 	/*
445f8296c60SJoshua M. Clulow 	 * If there is a request payload, it goes between the header and the
446f8296c60SJoshua M. Clulow 	 * status byte.  See the block comment at the top of the file for more
447f8296c60SJoshua M. Clulow 	 * detail on the chain layout.
448f8296c60SJoshua M. Clulow 	 */
449f8296c60SJoshua M. Clulow 	if ((type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_OUT) &&
450f8296c60SJoshua M. Clulow 	    xfer->x_nblks > 0) {
451f8296c60SJoshua M. Clulow 		virtio_direction_t dir = (type == VIRTIO_BLK_T_OUT) ?
452f8296c60SJoshua M. Clulow 		    VIRTIO_DIR_DEVICE_READS : VIRTIO_DIR_DEVICE_WRITES;
453f8296c60SJoshua M. Clulow 
454f8296c60SJoshua M. Clulow 		for (uint_t n = 0; n < xfer->x_ndmac; n++) {
455f8296c60SJoshua M. Clulow 			ddi_dma_cookie_t dmac;
456f8296c60SJoshua M. Clulow 
457f8296c60SJoshua M. Clulow 			if (n == 0) {
458f8296c60SJoshua M. Clulow 				/*
459f8296c60SJoshua M. Clulow 				 * The first cookie is in the blkdev request.
460f8296c60SJoshua M. Clulow 				 */
461f8296c60SJoshua M. Clulow 				dmac = xfer->x_dmac;
4621c5bc425SAlexey Zaytsev 			} else {
463f8296c60SJoshua M. Clulow 				ddi_dma_nextcookie(xfer->x_dmah, &dmac);
4641c5bc425SAlexey Zaytsev 			}
4651c5bc425SAlexey Zaytsev 
466f8296c60SJoshua M. Clulow 			if (virtio_chain_append(vic, dmac.dmac_laddress,
467f8296c60SJoshua M. Clulow 			    dmac.dmac_size, dir) != DDI_SUCCESS) {
468f8296c60SJoshua M. Clulow 				r = ENOMEM;
469f8296c60SJoshua M. Clulow 				goto fail;
470f8296c60SJoshua M. Clulow 			}
4711c5bc425SAlexey Zaytsev 		}
4721c5bc425SAlexey Zaytsev 
473f8296c60SJoshua M. Clulow 		total_cookies += xfer->x_ndmac;
474f8296c60SJoshua M. Clulow 
475f8296c60SJoshua M. Clulow 	} else if (xfer->x_nblks > 0) {
476f8296c60SJoshua M. Clulow 		dev_err(vib->vib_dip, CE_PANIC,
477f8296c60SJoshua M. Clulow 		    "request of type %d had payload length of %lu blocks", type,
478f8296c60SJoshua M. Clulow 		    xfer->x_nblks);
479*1a5ae140SJason King 	} else if (type == VIRTIO_BLK_T_DISCARD) {
480*1a5ae140SJason King 		r = vioblk_map_discard(vib, vic, xfer);
481*1a5ae140SJason King 		if (r != 0) {
482*1a5ae140SJason King 			goto fail;
483*1a5ae140SJason King 		}
484f8296c60SJoshua M. Clulow 	}
485f8296c60SJoshua M. Clulow 
486f8296c60SJoshua M. Clulow 	if (vib->vib_stats->vbs_rw_cookiesmax.value.ui32 < total_cookies) {
487f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_cookiesmax.value.ui32 = total_cookies;
488f8296c60SJoshua M. Clulow 	}
489f8296c60SJoshua M. Clulow 
490f8296c60SJoshua M. Clulow 	return (vioblk_common_submit(vib, vic));
491f8296c60SJoshua M. Clulow 
492f8296c60SJoshua M. Clulow fail:
493f8296c60SJoshua M. Clulow 	vbr->vbr_xfer = NULL;
494f8296c60SJoshua M. Clulow 	vioblk_req_free(vib, vbr);
495f8296c60SJoshua M. Clulow 	virtio_chain_free(vic);
496f8296c60SJoshua M. Clulow 	return (r);
4971c5bc425SAlexey Zaytsev }
4981c5bc425SAlexey Zaytsev 
4991c5bc425SAlexey Zaytsev static int
500f8296c60SJoshua M. Clulow vioblk_bd_read(void *arg, bd_xfer_t *xfer)
5011c5bc425SAlexey Zaytsev {
502f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
503f8296c60SJoshua M. Clulow 	int r;
5041c5bc425SAlexey Zaytsev 
505f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
506f8296c60SJoshua M. Clulow 	r = vioblk_request(vib, xfer, VIRTIO_BLK_T_IN);
507f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
5081c5bc425SAlexey Zaytsev 
509f8296c60SJoshua M. Clulow 	return (r);
5101c5bc425SAlexey Zaytsev }
5111c5bc425SAlexey Zaytsev 
5121c5bc425SAlexey Zaytsev static int
513f8296c60SJoshua M. Clulow vioblk_bd_write(void *arg, bd_xfer_t *xfer)
5141c5bc425SAlexey Zaytsev {
515f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
516f8296c60SJoshua M. Clulow 	int r;
5171c5bc425SAlexey Zaytsev 
518f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
519f8296c60SJoshua M. Clulow 	r = vioblk_request(vib, xfer, VIRTIO_BLK_T_OUT);
520f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
5211c5bc425SAlexey Zaytsev 
522f8296c60SJoshua M. Clulow 	return (r);
5231c5bc425SAlexey Zaytsev }
5241c5bc425SAlexey Zaytsev 
525f8296c60SJoshua M. Clulow static int
526f8296c60SJoshua M. Clulow vioblk_bd_flush(void *arg, bd_xfer_t *xfer)
527f8296c60SJoshua M. Clulow {
528f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
529f8296c60SJoshua M. Clulow 	int r;
530f8296c60SJoshua M. Clulow 
531f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
532f8296c60SJoshua M. Clulow 	if (!virtio_feature_present(vib->vib_virtio, VIRTIO_BLK_F_FLUSH)) {
533f8296c60SJoshua M. Clulow 		/*
534f8296c60SJoshua M. Clulow 		 * We don't really expect to get here, because if we did not
535f8296c60SJoshua M. Clulow 		 * negotiate the flush feature we would not have installed this
536f8296c60SJoshua M. Clulow 		 * function in the blkdev ops vector.
537f8296c60SJoshua M. Clulow 		 */
538f8296c60SJoshua M. Clulow 		mutex_exit(&vib->vib_mutex);
539f8296c60SJoshua M. Clulow 		return (ENOTSUP);
540f8296c60SJoshua M. Clulow 	}
541f8296c60SJoshua M. Clulow 
542f8296c60SJoshua M. Clulow 	r = vioblk_request(vib, xfer, VIRTIO_BLK_T_FLUSH);
543f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
544f8296c60SJoshua M. Clulow 
545f8296c60SJoshua M. Clulow 	return (r);
546f8296c60SJoshua M. Clulow }
5471c5bc425SAlexey Zaytsev 
5481c5bc425SAlexey Zaytsev static void
549f8296c60SJoshua M. Clulow vioblk_bd_driveinfo(void *arg, bd_drive_t *drive)
5501c5bc425SAlexey Zaytsev {
551f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
5521c5bc425SAlexey Zaytsev 
553f8296c60SJoshua M. Clulow 	drive->d_qsize = vib->vib_reqs_capacity;
5541c5bc425SAlexey Zaytsev 	drive->d_removable = B_FALSE;
5551c5bc425SAlexey Zaytsev 	drive->d_hotpluggable = B_TRUE;
5561c5bc425SAlexey Zaytsev 	drive->d_target = 0;
5571c5bc425SAlexey Zaytsev 	drive->d_lun = 0;
558510a6847SHans Rosenfeld 
559510a6847SHans Rosenfeld 	drive->d_vendor = "Virtio";
560510a6847SHans Rosenfeld 	drive->d_vendor_len = strlen(drive->d_vendor);
561510a6847SHans Rosenfeld 
562510a6847SHans Rosenfeld 	drive->d_product = "Block Device";
563510a6847SHans Rosenfeld 	drive->d_product_len = strlen(drive->d_product);
564510a6847SHans Rosenfeld 
565f8296c60SJoshua M. Clulow 	drive->d_serial = vib->vib_devid;
566510a6847SHans Rosenfeld 	drive->d_serial_len = strlen(drive->d_serial);
567510a6847SHans Rosenfeld 
568510a6847SHans Rosenfeld 	drive->d_revision = "0000";
569510a6847SHans Rosenfeld 	drive->d_revision_len = strlen(drive->d_revision);
570*1a5ae140SJason King 
571*1a5ae140SJason King 	if (vib->vib_can_discard) {
572*1a5ae140SJason King 		drive->d_free_align = vib->vib_discard_sector_align;
573*1a5ae140SJason King 		drive->d_max_free_seg = vib->vib_max_discard_seg;
574*1a5ae140SJason King 		drive->d_max_free_blks = vib->vib_max_discard_sectors;
575*1a5ae140SJason King 		/*
576*1a5ae140SJason King 		 * The virtio 1.1 spec doesn't specify a per segment sector
577*1a5ae140SJason King 		 * limit for discards -- only a limit on the total sectors in
578*1a5ae140SJason King 		 * a discard request. Therefore, we assume a vioblk device must
579*1a5ae140SJason King 		 * be able to accept a single segment of vib_max_discard_sectors
580*1a5ae140SJason King 		 * (when it supports discard requests) and use
581*1a5ae140SJason King 		 * vib_max_discard_sectors both for the overall limit for
582*1a5ae140SJason King 		 * a discard request, but also as the limit for a single
583*1a5ae140SJason King 		 * segment. blkdev will ensure we are never called with
584*1a5ae140SJason King 		 * a dkioc_free_list_t that violates either limit.
585*1a5ae140SJason King 		 */
586*1a5ae140SJason King 		drive->d_max_free_seg_blks = vib->vib_max_discard_sectors;
587*1a5ae140SJason King 	}
5881c5bc425SAlexey Zaytsev }
5891c5bc425SAlexey Zaytsev 
5901c5bc425SAlexey Zaytsev static int
591f8296c60SJoshua M. Clulow vioblk_bd_mediainfo(void *arg, bd_media_t *media)
5921c5bc425SAlexey Zaytsev {
593f8296c60SJoshua M. Clulow 	vioblk_t *vib = (void *)arg;
5941c5bc425SAlexey Zaytsev 
595f8296c60SJoshua M. Clulow 	/*
596f8296c60SJoshua M. Clulow 	 * The device protocol is specified in terms of 512 byte logical
597f8296c60SJoshua M. Clulow 	 * blocks, regardless of the recommended I/O size which might be
598f8296c60SJoshua M. Clulow 	 * larger.
599f8296c60SJoshua M. Clulow 	 */
600f8296c60SJoshua M. Clulow 	media->m_nblks = vib->vib_nblks;
601c5c712a8SToomas Soome 	media->m_blksize = vib->vib_blk_size;
6021c5bc425SAlexey Zaytsev 
603f8296c60SJoshua M. Clulow 	media->m_readonly = vib->vib_readonly;
604f8296c60SJoshua M. Clulow 	media->m_pblksize = vib->vib_pblk_size;
6051c5bc425SAlexey Zaytsev 	return (0);
6061c5bc425SAlexey Zaytsev }
6071c5bc425SAlexey Zaytsev 
6081c5bc425SAlexey Zaytsev static void
609f8296c60SJoshua M. Clulow vioblk_get_id(vioblk_t *vib)
6101c5bc425SAlexey Zaytsev {
611f8296c60SJoshua M. Clulow 	virtio_dma_t *dma;
612f8296c60SJoshua M. Clulow 	int r;
6131c5bc425SAlexey Zaytsev 
614f8296c60SJoshua M. Clulow 	if ((dma = virtio_dma_alloc(vib->vib_virtio, VIRTIO_BLK_ID_BYTES,
615f8296c60SJoshua M. Clulow 	    &vioblk_dma_attr, DDI_DMA_CONSISTENT | DDI_DMA_READ,
616f8296c60SJoshua M. Clulow 	    KM_SLEEP)) == NULL) {
617f8296c60SJoshua M. Clulow 		return;
618f8296c60SJoshua M. Clulow 	}
6191c5bc425SAlexey Zaytsev 
620f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
621f8296c60SJoshua M. Clulow 	if ((r = vioblk_internal(vib, VIRTIO_BLK_T_GET_ID, dma, 0,
622f8296c60SJoshua M. Clulow 	    VIRTIO_DIR_DEVICE_WRITES)) == 0) {
623f8296c60SJoshua M. Clulow 		const char *b = virtio_dma_va(dma, 0);
624f8296c60SJoshua M. Clulow 		uint_t pos = 0;
6251c5bc425SAlexey Zaytsev 
626f8296c60SJoshua M. Clulow 		/*
627f8296c60SJoshua M. Clulow 		 * Save the entire response for debugging purposes.
628f8296c60SJoshua M. Clulow 		 */
629f8296c60SJoshua M. Clulow 		bcopy(virtio_dma_va(dma, 0), vib->vib_rawid,
630f8296c60SJoshua M. Clulow 		    VIRTIO_BLK_ID_BYTES);
6311c5bc425SAlexey Zaytsev 
632f8296c60SJoshua M. Clulow 		/*
633f8296c60SJoshua M. Clulow 		 * Process the returned ID.
634f8296c60SJoshua M. Clulow 		 */
635f8296c60SJoshua M. Clulow 		bzero(vib->vib_devid, sizeof (vib->vib_devid));
636f8296c60SJoshua M. Clulow 		for (uint_t n = 0; n < VIRTIO_BLK_ID_BYTES; n++) {
637f8296c60SJoshua M. Clulow 			if (isalnum(b[n]) || b[n] == '-' || b[n] == '_') {
638f8296c60SJoshua M. Clulow 				/*
639f8296c60SJoshua M. Clulow 				 * Accept a subset of printable ASCII
640f8296c60SJoshua M. Clulow 				 * characters.
641f8296c60SJoshua M. Clulow 				 */
642f8296c60SJoshua M. Clulow 				vib->vib_devid[pos++] = b[n];
643f8296c60SJoshua M. Clulow 			} else {
644f8296c60SJoshua M. Clulow 				/*
645f8296c60SJoshua M. Clulow 				 * Stop processing at the first sign of
646f8296c60SJoshua M. Clulow 				 * trouble.
647f8296c60SJoshua M. Clulow 				 */
648f8296c60SJoshua M. Clulow 				break;
649f8296c60SJoshua M. Clulow 			}
650f8296c60SJoshua M. Clulow 		}
6511c5bc425SAlexey Zaytsev 
652f8296c60SJoshua M. Clulow 		vib->vib_devid_fetched = B_TRUE;
653f8296c60SJoshua M. Clulow 	}
654f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
6551c5bc425SAlexey Zaytsev 
656f8296c60SJoshua M. Clulow 	virtio_dma_free(dma);
6571c5bc425SAlexey Zaytsev }
6581c5bc425SAlexey Zaytsev 
6591c5bc425SAlexey Zaytsev static int
660f8296c60SJoshua M. Clulow vioblk_bd_devid(void *arg, dev_info_t *dip, ddi_devid_t *devid)
6611c5bc425SAlexey Zaytsev {
662f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
663f8296c60SJoshua M. Clulow 	size_t len;
6641c5bc425SAlexey Zaytsev 
665f8296c60SJoshua M. Clulow 	if ((len = strlen(vib->vib_devid)) == 0) {
666f8296c60SJoshua M. Clulow 		/*
667f8296c60SJoshua M. Clulow 		 * The device has no ID.
668f8296c60SJoshua M. Clulow 		 */
6691c5bc425SAlexey Zaytsev 		return (DDI_FAILURE);
6701c5bc425SAlexey Zaytsev 	}
6711c5bc425SAlexey Zaytsev 
672f8296c60SJoshua M. Clulow 	return (ddi_devid_init(dip, DEVID_ATA_SERIAL, len, vib->vib_devid,
673f8296c60SJoshua M. Clulow 	    devid));
6741c5bc425SAlexey Zaytsev }
6751c5bc425SAlexey Zaytsev 
676*1a5ae140SJason King static int
677*1a5ae140SJason King vioblk_bd_free_space(void *arg, bd_xfer_t *xfer)
678*1a5ae140SJason King {
679*1a5ae140SJason King 	vioblk_t *vib = arg;
680*1a5ae140SJason King 	int r = 0;
681*1a5ae140SJason King 
682*1a5ae140SJason King 	/*
683*1a5ae140SJason King 	 * Since vib_can_discard is write once (and set during attach),
684*1a5ae140SJason King 	 * we can check if it's enabled without taking the mutex.
685*1a5ae140SJason King 	 */
686*1a5ae140SJason King 	if (!vib->vib_can_discard) {
687*1a5ae140SJason King 		return (ENOTSUP);
688*1a5ae140SJason King 	}
689*1a5ae140SJason King 
690*1a5ae140SJason King 	mutex_enter(&vib->vib_mutex);
691*1a5ae140SJason King 	r = vioblk_request(vib, xfer, VIRTIO_BLK_T_DISCARD);
692*1a5ae140SJason King 	mutex_exit(&vib->vib_mutex);
693*1a5ae140SJason King 
694*1a5ae140SJason King 	return (r);
695*1a5ae140SJason King }
696*1a5ae140SJason King 
697f8296c60SJoshua M. Clulow /*
698f8296c60SJoshua M. Clulow  * As the device completes processing of a request, it returns the chain for
699f8296c60SJoshua M. Clulow  * that request to our I/O queue.  This routine is called in two contexts:
700f8296c60SJoshua M. Clulow  *   - from the interrupt handler, in response to notification from the device
701f8296c60SJoshua M. Clulow  *   - synchronously in line with request processing when panicking
702f8296c60SJoshua M. Clulow  */
703f8296c60SJoshua M. Clulow static uint_t
704f8296c60SJoshua M. Clulow vioblk_poll(vioblk_t *vib)
7051c5bc425SAlexey Zaytsev {
706f8296c60SJoshua M. Clulow 	virtio_chain_t *vic;
707f8296c60SJoshua M. Clulow 	uint_t count = 0;
708f8296c60SJoshua M. Clulow 	boolean_t wakeup = B_FALSE;
7091c5bc425SAlexey Zaytsev 
710f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
7111c5bc425SAlexey Zaytsev 
712f8296c60SJoshua M. Clulow 	while ((vic = virtio_queue_poll(vib->vib_vq)) != NULL) {
713f8296c60SJoshua M. Clulow 		vioblk_req_t *vbr = virtio_chain_data(vic);
714f8296c60SJoshua M. Clulow 		uint8_t status;
7151c5bc425SAlexey Zaytsev 
716f8296c60SJoshua M. Clulow 		virtio_dma_sync(vbr->vbr_dma, DDI_DMA_SYNC_FORCPU);
7171c5bc425SAlexey Zaytsev 
718f8296c60SJoshua M. Clulow 		bcopy(virtio_dma_va(vbr->vbr_dma,
719f8296c60SJoshua M. Clulow 		    sizeof (struct vioblk_req_hdr)), &status, sizeof (status));
7201c5bc425SAlexey Zaytsev 
7211c5bc425SAlexey Zaytsev 		switch (status) {
7221c5bc425SAlexey Zaytsev 		case VIRTIO_BLK_S_OK:
723f8296c60SJoshua M. Clulow 			vbr->vbr_error = 0;
7241c5bc425SAlexey Zaytsev 			break;
7251c5bc425SAlexey Zaytsev 		case VIRTIO_BLK_S_IOERR:
726f8296c60SJoshua M. Clulow 			vbr->vbr_error = EIO;
727f8296c60SJoshua M. Clulow 			vib->vib_stats->vbs_io_errors.value.ui64++;
7281c5bc425SAlexey Zaytsev 			break;
7291c5bc425SAlexey Zaytsev 		case VIRTIO_BLK_S_UNSUPP:
730f8296c60SJoshua M. Clulow 			vbr->vbr_error = ENOTTY;
731f8296c60SJoshua M. Clulow 			vib->vib_stats->vbs_unsupp_errors.value.ui64++;
7321c5bc425SAlexey Zaytsev 			break;
7331c5bc425SAlexey Zaytsev 		default:
734f8296c60SJoshua M. Clulow 			vbr->vbr_error = ENXIO;
735f8296c60SJoshua M. Clulow 			vib->vib_stats->vbs_nxio_errors.value.ui64++;
7361c5bc425SAlexey Zaytsev 			break;
7371c5bc425SAlexey Zaytsev 		}
7381c5bc425SAlexey Zaytsev 
739f8296c60SJoshua M. Clulow 		count++;
7401c5bc425SAlexey Zaytsev 
741f8296c60SJoshua M. Clulow 		if (vbr->vbr_status & VIOBLK_REQSTAT_POLLED) {
742f8296c60SJoshua M. Clulow 			/*
743f8296c60SJoshua M. Clulow 			 * This request must not be freed as it is being held
744f8296c60SJoshua M. Clulow 			 * by a call to vioblk_common_submit().
745f8296c60SJoshua M. Clulow 			 */
746f8296c60SJoshua M. Clulow 			VERIFY(!(vbr->vbr_status &
747f8296c60SJoshua M. Clulow 			    VIOBLK_REQSTAT_POLL_COMPLETE));
748f8296c60SJoshua M. Clulow 			vbr->vbr_status |= VIOBLK_REQSTAT_POLL_COMPLETE;
749f8296c60SJoshua M. Clulow 			wakeup = B_TRUE;
750f8296c60SJoshua M. Clulow 			continue;
7511c5bc425SAlexey Zaytsev 		}
7521c5bc425SAlexey Zaytsev 
753f8296c60SJoshua M. Clulow 		vioblk_complete(vib, vbr);
7541c5bc425SAlexey Zaytsev 
755f8296c60SJoshua M. Clulow 		vioblk_req_free(vib, vbr);
756f8296c60SJoshua M. Clulow 		virtio_chain_free(vic);
7571c5bc425SAlexey Zaytsev 	}
7581c5bc425SAlexey Zaytsev 
759f8296c60SJoshua M. Clulow 	if (wakeup) {
760f8296c60SJoshua M. Clulow 		/*
761f8296c60SJoshua M. Clulow 		 * Signal anybody waiting for polled command completion.
762f8296c60SJoshua M. Clulow 		 */
763f8296c60SJoshua M. Clulow 		cv_broadcast(&vib->vib_cv);
764f8296c60SJoshua M. Clulow 	}
765f8296c60SJoshua M. Clulow 
766f8296c60SJoshua M. Clulow 	return (count);
767f8296c60SJoshua M. Clulow }
768f8296c60SJoshua M. Clulow 
7691c5bc425SAlexey Zaytsev uint_t
770f8296c60SJoshua M. Clulow vioblk_int_handler(caddr_t arg0, caddr_t arg1)
7711c5bc425SAlexey Zaytsev {
772f8296c60SJoshua M. Clulow 	vioblk_t *vib = (vioblk_t *)arg0;
773f8296c60SJoshua M. Clulow 	uint_t count;
774f8296c60SJoshua M. Clulow 
775f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
776f8296c60SJoshua M. Clulow 	if ((count = vioblk_poll(vib)) >
777f8296c60SJoshua M. Clulow 	    vib->vib_stats->vbs_intr_queuemax.value.ui32) {
778f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_intr_queuemax.value.ui32 = count;
7791c5bc425SAlexey Zaytsev 	}
7801c5bc425SAlexey Zaytsev 
781f8296c60SJoshua M. Clulow 	vib->vib_stats->vbs_intr_total.value.ui64++;
782f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
7831c5bc425SAlexey Zaytsev 
784f8296c60SJoshua M. Clulow 	return (DDI_INTR_CLAIMED);
7851c5bc425SAlexey Zaytsev }
7861c5bc425SAlexey Zaytsev 
7871c5bc425SAlexey Zaytsev static void
788f8296c60SJoshua M. Clulow vioblk_free_reqs(vioblk_t *vib)
7891c5bc425SAlexey Zaytsev {
790f8296c60SJoshua M. Clulow 	VERIFY3U(vib->vib_nreqs_alloc, ==, 0);
7911c5bc425SAlexey Zaytsev 
792f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < vib->vib_reqs_capacity; i++) {
793f8296c60SJoshua M. Clulow 		struct vioblk_req *vbr = &vib->vib_reqs_mem[i];
7941c5bc425SAlexey Zaytsev 
795f8296c60SJoshua M. Clulow 		VERIFY(list_link_active(&vbr->vbr_link));
796f8296c60SJoshua M. Clulow 		list_remove(&vib->vib_reqs, vbr);
7971c5bc425SAlexey Zaytsev 
798f8296c60SJoshua M. Clulow 		VERIFY0(vbr->vbr_status);
7991c5bc425SAlexey Zaytsev 
800f8296c60SJoshua M. Clulow 		if (vbr->vbr_dma != NULL) {
801f8296c60SJoshua M. Clulow 			virtio_dma_free(vbr->vbr_dma);
802f8296c60SJoshua M. Clulow 			vbr->vbr_dma = NULL;
8031c5bc425SAlexey Zaytsev 		}
804f8296c60SJoshua M. Clulow 	}
805f8296c60SJoshua M. Clulow 	VERIFY(list_is_empty(&vib->vib_reqs));
8061c5bc425SAlexey Zaytsev 
807f8296c60SJoshua M. Clulow 	if (vib->vib_reqs_mem != NULL) {
808f8296c60SJoshua M. Clulow 		kmem_free(vib->vib_reqs_mem,
809f8296c60SJoshua M. Clulow 		    sizeof (struct vioblk_req) * vib->vib_reqs_capacity);
810f8296c60SJoshua M. Clulow 		vib->vib_reqs_mem = NULL;
811f8296c60SJoshua M. Clulow 		vib->vib_reqs_capacity = 0;
812f8296c60SJoshua M. Clulow 	}
8131c5bc425SAlexey Zaytsev }
8141c5bc425SAlexey Zaytsev 
8151c5bc425SAlexey Zaytsev static int
816f8296c60SJoshua M. Clulow vioblk_alloc_reqs(vioblk_t *vib)
8171c5bc425SAlexey Zaytsev {
818f8296c60SJoshua M. Clulow 	vib->vib_reqs_capacity = MIN(virtio_queue_size(vib->vib_vq),
819f8296c60SJoshua M. Clulow 	    VIRTIO_BLK_REQ_BUFS);
820f8296c60SJoshua M. Clulow 	vib->vib_reqs_mem = kmem_zalloc(
821f8296c60SJoshua M. Clulow 	    sizeof (struct vioblk_req) * vib->vib_reqs_capacity, KM_SLEEP);
822f8296c60SJoshua M. Clulow 	vib->vib_nreqs_alloc = 0;
8231c5bc425SAlexey Zaytsev 
824f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < vib->vib_reqs_capacity; i++) {
825f8296c60SJoshua M. Clulow 		list_insert_tail(&vib->vib_reqs, &vib->vib_reqs_mem[i]);
8261c5bc425SAlexey Zaytsev 	}
8271c5bc425SAlexey Zaytsev 
828f8296c60SJoshua M. Clulow 	for (vioblk_req_t *vbr = list_head(&vib->vib_reqs); vbr != NULL;
829f8296c60SJoshua M. Clulow 	    vbr = list_next(&vib->vib_reqs, vbr)) {
830f8296c60SJoshua M. Clulow 		if ((vbr->vbr_dma = virtio_dma_alloc(vib->vib_virtio,
8311c5bc425SAlexey Zaytsev 		    sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
832f8296c60SJoshua M. Clulow 		    &vioblk_dma_attr, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
833f8296c60SJoshua M. Clulow 		    KM_SLEEP)) == NULL) {
834f8296c60SJoshua M. Clulow 			goto fail;
8351c5bc425SAlexey Zaytsev 		}
8361c5bc425SAlexey Zaytsev 	}
8371c5bc425SAlexey Zaytsev 
8381c5bc425SAlexey Zaytsev 	return (0);
8391c5bc425SAlexey Zaytsev 
840f8296c60SJoshua M. Clulow fail:
841f8296c60SJoshua M. Clulow 	vioblk_free_reqs(vib);
8421c5bc425SAlexey Zaytsev 	return (ENOMEM);
8431c5bc425SAlexey Zaytsev }
8441c5bc425SAlexey Zaytsev 
8451c5bc425SAlexey Zaytsev static int
846f8296c60SJoshua M. Clulow vioblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
8471c5bc425SAlexey Zaytsev {
848f8296c60SJoshua M. Clulow 	int instance = ddi_get_instance(dip);
849f8296c60SJoshua M. Clulow 	vioblk_t *vib;
850f8296c60SJoshua M. Clulow 	virtio_t *vio;
851f8296c60SJoshua M. Clulow 	boolean_t did_mutex = B_FALSE;
8521c5bc425SAlexey Zaytsev 
853f8296c60SJoshua M. Clulow 	if (cmd != DDI_ATTACH) {
854d48defc5SHans Rosenfeld 		return (DDI_FAILURE);
8551c5bc425SAlexey Zaytsev 	}
8561c5bc425SAlexey Zaytsev 
857f8296c60SJoshua M. Clulow 	if ((vio = virtio_init(dip, VIRTIO_BLK_WANTED_FEATURES, B_TRUE)) ==
858f8296c60SJoshua M. Clulow 	    NULL) {
859f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "failed to start Virtio init");
860f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
861f8296c60SJoshua M. Clulow 	}
8621c5bc425SAlexey Zaytsev 
863f8296c60SJoshua M. Clulow 	vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
864f8296c60SJoshua M. Clulow 	vib->vib_dip = dip;
865f8296c60SJoshua M. Clulow 	vib->vib_virtio = vio;
866f8296c60SJoshua M. Clulow 	ddi_set_driver_private(dip, vib);
867f8296c60SJoshua M. Clulow 	list_create(&vib->vib_reqs, sizeof (vioblk_req_t),
868f8296c60SJoshua M. Clulow 	    offsetof(vioblk_req_t, vbr_link));
8691c5bc425SAlexey Zaytsev 
8701c5bc425SAlexey Zaytsev 	/*
871f8296c60SJoshua M. Clulow 	 * Determine how many scatter-gather entries we can use in a single
872f8296c60SJoshua M. Clulow 	 * request.
8731c5bc425SAlexey Zaytsev 	 */
874f8296c60SJoshua M. Clulow 	vib->vib_seg_max = VIRTIO_BLK_DEFAULT_MAX_SEG;
875f8296c60SJoshua M. Clulow 	if (virtio_feature_present(vio, VIRTIO_BLK_F_SEG_MAX)) {
876f8296c60SJoshua M. Clulow 		vib->vib_seg_max = virtio_dev_get32(vio,
8771c5bc425SAlexey Zaytsev 		    VIRTIO_BLK_CONFIG_SEG_MAX);
8781c5bc425SAlexey Zaytsev 
879f8296c60SJoshua M. Clulow 		if (vib->vib_seg_max == 0 || vib->vib_seg_max == PCI_EINVAL32) {
880f8296c60SJoshua M. Clulow 			/*
881f8296c60SJoshua M. Clulow 			 * We need to be able to use at least one data segment,
882f8296c60SJoshua M. Clulow 			 * so we'll assume that this device is just poorly
883f8296c60SJoshua M. Clulow 			 * implemented and try for one.
884f8296c60SJoshua M. Clulow 			 */
885f8296c60SJoshua M. Clulow 			vib->vib_seg_max = 1;
886f8296c60SJoshua M. Clulow 		}
887f8296c60SJoshua M. Clulow 	}
8881c5bc425SAlexey Zaytsev 
889*1a5ae140SJason King 	if (virtio_feature_present(vio, VIRTIO_BLK_F_DISCARD)) {
890*1a5ae140SJason King 		vib->vib_max_discard_sectors = virtio_dev_get32(vio,
891*1a5ae140SJason King 		    VIRTIO_BLK_CONFIG_MAX_DISCARD_SECT);
892*1a5ae140SJason King 		vib->vib_max_discard_seg = virtio_dev_get32(vio,
893*1a5ae140SJason King 		    VIRTIO_BLK_CONFIG_MAX_DISCARD_SEG);
894*1a5ae140SJason King 		vib->vib_discard_sector_align = virtio_dev_get32(vio,
895*1a5ae140SJason King 		    VIRTIO_BLK_CONFIG_DISCARD_ALIGN);
896*1a5ae140SJason King 
897*1a5ae140SJason King 		if (vib->vib_max_discard_sectors == 0 ||
898*1a5ae140SJason King 		    vib->vib_max_discard_seg == 0 ||
899*1a5ae140SJason King 		    vib->vib_discard_sector_align == 0) {
900*1a5ae140SJason King 			vib->vib_can_discard = B_FALSE;
901*1a5ae140SJason King 
902*1a5ae140SJason King 			/*
903*1a5ae140SJason King 			 * The hypervisor shouldn't be giving us bad values.
904*1a5ae140SJason King 			 * If it is, it's probably worth notifying the
905*1a5ae140SJason King 			 * operator.
906*1a5ae140SJason King 			 */
907*1a5ae140SJason King 			dev_err(dip, CE_NOTE,
908*1a5ae140SJason King 			    "Host is advertising DISCARD support but with bad"
909*1a5ae140SJason King 			    "parameters: max_discard_sectors=%u, "
910*1a5ae140SJason King 			    "max_discard_segments=%u, discard_sector_align=%u",
911*1a5ae140SJason King 			    vib->vib_max_discard_sectors,
912*1a5ae140SJason King 			    vib->vib_max_discard_seg,
913*1a5ae140SJason King 			    vib->vib_discard_sector_align);
914*1a5ae140SJason King 		} else {
915*1a5ae140SJason King 			vib->vib_can_discard = B_TRUE;
916*1a5ae140SJason King 		}
917*1a5ae140SJason King 	}
918*1a5ae140SJason King 
9191c5bc425SAlexey Zaytsev 	/*
920f8296c60SJoshua M. Clulow 	 * When allocating the request queue, we include two additional
921f8296c60SJoshua M. Clulow 	 * descriptors (beyond those required for request data) to account for
922f8296c60SJoshua M. Clulow 	 * the header and the status byte.
9231c5bc425SAlexey Zaytsev 	 */
924f8296c60SJoshua M. Clulow 	if ((vib->vib_vq = virtio_queue_alloc(vio, VIRTIO_BLK_VIRTQ_IO, "io",
925f8296c60SJoshua M. Clulow 	    vioblk_int_handler, vib, B_FALSE, vib->vib_seg_max + 2)) == NULL) {
926f8296c60SJoshua M. Clulow 		goto fail;
9271c5bc425SAlexey Zaytsev 	}
9281c5bc425SAlexey Zaytsev 
929f8296c60SJoshua M. Clulow 	if (virtio_init_complete(vio, 0) != DDI_SUCCESS) {
930f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "failed to complete Virtio init");
931f8296c60SJoshua M. Clulow 		goto fail;
9321c5bc425SAlexey Zaytsev 	}
9331c5bc425SAlexey Zaytsev 
934f8296c60SJoshua M. Clulow 	cv_init(&vib->vib_cv, NULL, CV_DRIVER, NULL);
935f8296c60SJoshua M. Clulow 	mutex_init(&vib->vib_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
936f8296c60SJoshua M. Clulow 	did_mutex = B_TRUE;
937f8296c60SJoshua M. Clulow 
938f8296c60SJoshua M. Clulow 	if ((vib->vib_kstat = kstat_create("vioblk", instance,
939f8296c60SJoshua M. Clulow 	    "statistics", "controller", KSTAT_TYPE_NAMED,
940f8296c60SJoshua M. Clulow 	    sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
941f8296c60SJoshua M. Clulow 	    KSTAT_FLAG_PERSISTENT)) == NULL) {
942f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "kstat_create failed");
943f8296c60SJoshua M. Clulow 		goto fail;
944f8296c60SJoshua M. Clulow 	}
945f8296c60SJoshua M. Clulow 	vib->vib_stats = (vioblk_stats_t *)vib->vib_kstat->ks_data;
946f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_rw_outofmemory,
947f8296c60SJoshua M. Clulow 	    "total_rw_outofmemory", KSTAT_DATA_UINT64);
948f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_rw_badoffset,
949f8296c60SJoshua M. Clulow 	    "total_rw_badoffset", KSTAT_DATA_UINT64);
950f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_intr_total,
951f8296c60SJoshua M. Clulow 	    "total_intr", KSTAT_DATA_UINT64);
952f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_io_errors,
953f8296c60SJoshua M. Clulow 	    "total_io_errors", KSTAT_DATA_UINT64);
954f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_unsupp_errors,
955f8296c60SJoshua M. Clulow 	    "total_unsupp_errors", KSTAT_DATA_UINT64);
956f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_nxio_errors,
957f8296c60SJoshua M. Clulow 	    "total_nxio_errors", KSTAT_DATA_UINT64);
958f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_rw_cacheflush,
959f8296c60SJoshua M. Clulow 	    "total_rw_cacheflush", KSTAT_DATA_UINT64);
960f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_rw_cookiesmax,
961f8296c60SJoshua M. Clulow 	    "max_rw_cookies", KSTAT_DATA_UINT32);
962f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_intr_queuemax,
963f8296c60SJoshua M. Clulow 	    "max_intr_queue", KSTAT_DATA_UINT32);
964f8296c60SJoshua M. Clulow 	kstat_install(vib->vib_kstat);
965f8296c60SJoshua M. Clulow 
966f8296c60SJoshua M. Clulow 	vib->vib_readonly = virtio_feature_present(vio, VIRTIO_BLK_F_RO);
967f8296c60SJoshua M. Clulow 	if ((vib->vib_nblks = virtio_dev_get64(vio,
968f8296c60SJoshua M. Clulow 	    VIRTIO_BLK_CONFIG_CAPACITY)) == UINT64_MAX) {
969f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "invalid capacity");
970f8296c60SJoshua M. Clulow 		goto fail;
9711c5bc425SAlexey Zaytsev 	}
9721c5bc425SAlexey Zaytsev 
973f8296c60SJoshua M. Clulow 	/*
974f8296c60SJoshua M. Clulow 	 * Determine the optimal logical block size recommended by the device.
975f8296c60SJoshua M. Clulow 	 * This size is advisory; the protocol always deals in 512 byte blocks.
976f8296c60SJoshua M. Clulow 	 */
977f8296c60SJoshua M. Clulow 	vib->vib_blk_size = DEV_BSIZE;
978f8296c60SJoshua M. Clulow 	if (virtio_feature_present(vio, VIRTIO_BLK_F_BLK_SIZE)) {
979f8296c60SJoshua M. Clulow 		uint32_t v = virtio_dev_get32(vio, VIRTIO_BLK_CONFIG_BLK_SIZE);
9801c5bc425SAlexey Zaytsev 
981f8296c60SJoshua M. Clulow 		if (v != 0 && v != PCI_EINVAL32) {
982f8296c60SJoshua M. Clulow 			vib->vib_blk_size = v;
983f8296c60SJoshua M. Clulow 		}
984f8296c60SJoshua M. Clulow 	}
9851c5bc425SAlexey Zaytsev 
986f8296c60SJoshua M. Clulow 	/*
987c5c712a8SToomas Soome 	 * Device capacity is always in 512-byte units, convert to
988c5c712a8SToomas Soome 	 * native blocks.
989c5c712a8SToomas Soome 	 */
990c5c712a8SToomas Soome 	vib->vib_nblks = (vib->vib_nblks * DEV_BSIZE) / vib->vib_blk_size;
991c5c712a8SToomas Soome 
992c5c712a8SToomas Soome 	/*
993f8296c60SJoshua M. Clulow 	 * The device may also provide an advisory physical block size.
994f8296c60SJoshua M. Clulow 	 */
995f8296c60SJoshua M. Clulow 	vib->vib_pblk_size = vib->vib_blk_size;
996f8296c60SJoshua M. Clulow 	if (virtio_feature_present(vio, VIRTIO_BLK_F_TOPOLOGY)) {
997f8296c60SJoshua M. Clulow 		uint8_t v = virtio_dev_get8(vio, VIRTIO_BLK_CONFIG_TOPO_PBEXP);
9981c5bc425SAlexey Zaytsev 
999f8296c60SJoshua M. Clulow 		if (v != PCI_EINVAL8) {
1000f8296c60SJoshua M. Clulow 			vib->vib_pblk_size <<= v;
1001f8296c60SJoshua M. Clulow 		}
1002f8296c60SJoshua M. Clulow 	}
10031c5bc425SAlexey Zaytsev 
1004f8296c60SJoshua M. Clulow 	/*
1005f8296c60SJoshua M. Clulow 	 * The maximum size for a cookie in a request.
1006f8296c60SJoshua M. Clulow 	 */
1007f8296c60SJoshua M. Clulow 	vib->vib_seg_size_max = VIRTIO_BLK_DEFAULT_MAX_SIZE;
1008f8296c60SJoshua M. Clulow 	if (virtio_feature_present(vio, VIRTIO_BLK_F_SIZE_MAX)) {
1009f8296c60SJoshua M. Clulow 		uint32_t v = virtio_dev_get32(vio, VIRTIO_BLK_CONFIG_SIZE_MAX);
1010f8296c60SJoshua M. Clulow 
1011f8296c60SJoshua M. Clulow 		if (v != 0 && v != PCI_EINVAL32) {
1012f8296c60SJoshua M. Clulow 			vib->vib_seg_size_max = v;
1013f8296c60SJoshua M. Clulow 		}
1014f8296c60SJoshua M. Clulow 	}
1015f8296c60SJoshua M. Clulow 
1016f8296c60SJoshua M. Clulow 	/*
1017f8296c60SJoshua M. Clulow 	 * Set up the DMA attributes for blkdev to use for request data.  The
1018f8296c60SJoshua M. Clulow 	 * specification is not extremely clear about whether DMA-related
1019f8296c60SJoshua M. Clulow 	 * parameters include or exclude the header and status descriptors.
1020f8296c60SJoshua M. Clulow 	 * For now, we assume they cover only the request data and not the
1021f8296c60SJoshua M. Clulow 	 * headers.
1022f8296c60SJoshua M. Clulow 	 */
1023f8296c60SJoshua M. Clulow 	vib->vib_bd_dma_attr = vioblk_dma_attr;
1024f8296c60SJoshua M. Clulow 	vib->vib_bd_dma_attr.dma_attr_sgllen = vib->vib_seg_max;
1025f8296c60SJoshua M. Clulow 	vib->vib_bd_dma_attr.dma_attr_count_max = vib->vib_seg_size_max;
1026f8296c60SJoshua M. Clulow 	vib->vib_bd_dma_attr.dma_attr_maxxfer = vib->vib_seg_max *
1027f8296c60SJoshua M. Clulow 	    vib->vib_seg_size_max;
1028f8296c60SJoshua M. Clulow 
1029f8296c60SJoshua M. Clulow 	if (vioblk_alloc_reqs(vib) != 0) {
1030f8296c60SJoshua M. Clulow 		goto fail;
1031f8296c60SJoshua M. Clulow 	}
1032f8296c60SJoshua M. Clulow 
1033f8296c60SJoshua M. Clulow 	/*
1034f8296c60SJoshua M. Clulow 	 * The blkdev framework does not provide a way to specify that the
1035f8296c60SJoshua M. Clulow 	 * device does not support write cache flushing, except by omitting the
1036f8296c60SJoshua M. Clulow 	 * "o_sync_cache" member from the ops vector.  As "bd_alloc_handle()"
1037f8296c60SJoshua M. Clulow 	 * makes a copy of the ops vector, we can safely assemble one on the
1038f8296c60SJoshua M. Clulow 	 * stack based on negotiated features.
1039*1a5ae140SJason King 	 *
1040*1a5ae140SJason King 	 * Similarly, the blkdev framework does not provide a way to indicate
1041*1a5ae140SJason King 	 * if a device supports an TRIM/UNMAP/DISCARD type operation except
1042*1a5ae140SJason King 	 * by omitting the "o_free_space" member from the ops vector.
1043f8296c60SJoshua M. Clulow 	 */
1044f8296c60SJoshua M. Clulow 	bd_ops_t vioblk_bd_ops = {
10454d95620bSPaul Winder 		.o_version =		BD_OPS_CURRENT_VERSION,
1046f8296c60SJoshua M. Clulow 		.o_drive_info =		vioblk_bd_driveinfo,
1047f8296c60SJoshua M. Clulow 		.o_media_info =		vioblk_bd_mediainfo,
1048f8296c60SJoshua M. Clulow 		.o_devid_init =		vioblk_bd_devid,
1049f8296c60SJoshua M. Clulow 		.o_sync_cache =		vioblk_bd_flush,
1050f8296c60SJoshua M. Clulow 		.o_read =		vioblk_bd_read,
1051f8296c60SJoshua M. Clulow 		.o_write =		vioblk_bd_write,
1052*1a5ae140SJason King 		.o_free_space =		vioblk_bd_free_space,
1053f8296c60SJoshua M. Clulow 	};
1054f8296c60SJoshua M. Clulow 	if (!virtio_feature_present(vio, VIRTIO_BLK_F_FLUSH)) {
1055f8296c60SJoshua M. Clulow 		vioblk_bd_ops.o_sync_cache = NULL;
1056f8296c60SJoshua M. Clulow 	}
1057*1a5ae140SJason King 	if (!vib->vib_can_discard) {
1058*1a5ae140SJason King 		vioblk_bd_ops.o_free_space = NULL;
1059*1a5ae140SJason King 	}
1060f8296c60SJoshua M. Clulow 
1061f8296c60SJoshua M. Clulow 	vib->vib_bd_h = bd_alloc_handle(vib, &vioblk_bd_ops,
1062f8296c60SJoshua M. Clulow 	    &vib->vib_bd_dma_attr, KM_SLEEP);
1063f8296c60SJoshua M. Clulow 
1064f8296c60SJoshua M. Clulow 	/*
1065f8296c60SJoshua M. Clulow 	 * Enable interrupts now so that we can request the device identity.
1066f8296c60SJoshua M. Clulow 	 */
1067f8296c60SJoshua M. Clulow 	if (virtio_interrupts_enable(vio) != DDI_SUCCESS) {
1068f8296c60SJoshua M. Clulow 		goto fail;
1069f8296c60SJoshua M. Clulow 	}
1070f8296c60SJoshua M. Clulow 
1071f8296c60SJoshua M. Clulow 	vioblk_get_id(vib);
1072f8296c60SJoshua M. Clulow 
1073f8296c60SJoshua M. Clulow 	if (bd_attach_handle(dip, vib->vib_bd_h) != DDI_SUCCESS) {
1074f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "Failed to attach blkdev");
1075f8296c60SJoshua M. Clulow 		goto fail;
10761c5bc425SAlexey Zaytsev 	}
10771c5bc425SAlexey Zaytsev 
10781c5bc425SAlexey Zaytsev 	return (DDI_SUCCESS);
10791c5bc425SAlexey Zaytsev 
1080f8296c60SJoshua M. Clulow fail:
1081f8296c60SJoshua M. Clulow 	if (vib->vib_bd_h != NULL) {
1082f8296c60SJoshua M. Clulow 		(void) bd_detach_handle(vib->vib_bd_h);
1083f8296c60SJoshua M. Clulow 		bd_free_handle(vib->vib_bd_h);
1084f8296c60SJoshua M. Clulow 	}
1085f8296c60SJoshua M. Clulow 	if (vio != NULL) {
1086f8296c60SJoshua M. Clulow 		(void) virtio_fini(vio, B_TRUE);
1087f8296c60SJoshua M. Clulow 	}
1088f8296c60SJoshua M. Clulow 	if (did_mutex) {
1089f8296c60SJoshua M. Clulow 		mutex_destroy(&vib->vib_mutex);
1090f8296c60SJoshua M. Clulow 		cv_destroy(&vib->vib_cv);
1091f8296c60SJoshua M. Clulow 	}
1092f8296c60SJoshua M. Clulow 	if (vib->vib_kstat != NULL) {
1093f8296c60SJoshua M. Clulow 		kstat_delete(vib->vib_kstat);
1094f8296c60SJoshua M. Clulow 	}
1095f8296c60SJoshua M. Clulow 	vioblk_free_reqs(vib);
1096f8296c60SJoshua M. Clulow 	kmem_free(vib, sizeof (*vib));
1097d48defc5SHans Rosenfeld 	return (DDI_FAILURE);
10981c5bc425SAlexey Zaytsev }
10991c5bc425SAlexey Zaytsev 
11001c5bc425SAlexey Zaytsev static int
1101f8296c60SJoshua M. Clulow vioblk_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
11021c5bc425SAlexey Zaytsev {
1103f8296c60SJoshua M. Clulow 	vioblk_t *vib = ddi_get_driver_private(dip);
11041c5bc425SAlexey Zaytsev 
1105f8296c60SJoshua M. Clulow 	if (cmd != DDI_DETACH) {
11061c5bc425SAlexey Zaytsev 		return (DDI_FAILURE);
11071c5bc425SAlexey Zaytsev 	}
11081c5bc425SAlexey Zaytsev 
1109f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
1110f8296c60SJoshua M. Clulow 	if (vib->vib_nreqs_alloc > 0) {
1111f8296c60SJoshua M. Clulow 		/*
1112f8296c60SJoshua M. Clulow 		 * Cannot detach while there are still outstanding requests.
1113f8296c60SJoshua M. Clulow 		 */
1114f8296c60SJoshua M. Clulow 		mutex_exit(&vib->vib_mutex);
1115f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
1116f8296c60SJoshua M. Clulow 	}
1117f8296c60SJoshua M. Clulow 
1118f8296c60SJoshua M. Clulow 	if (bd_detach_handle(vib->vib_bd_h) != DDI_SUCCESS) {
1119f8296c60SJoshua M. Clulow 		mutex_exit(&vib->vib_mutex);
1120f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
1121f8296c60SJoshua M. Clulow 	}
1122f8296c60SJoshua M. Clulow 
1123f8296c60SJoshua M. Clulow 	/*
1124f8296c60SJoshua M. Clulow 	 * Tear down the Virtio framework before freeing the rest of the
1125f8296c60SJoshua M. Clulow 	 * resources.  This will ensure the interrupt handlers are no longer
1126f8296c60SJoshua M. Clulow 	 * running.
1127f8296c60SJoshua M. Clulow 	 */
1128f8296c60SJoshua M. Clulow 	virtio_fini(vib->vib_virtio, B_FALSE);
1129f8296c60SJoshua M. Clulow 
1130f8296c60SJoshua M. Clulow 	vioblk_free_reqs(vib);
1131f8296c60SJoshua M. Clulow 	kstat_delete(vib->vib_kstat);
1132f8296c60SJoshua M. Clulow 
1133f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
1134f8296c60SJoshua M. Clulow 	mutex_destroy(&vib->vib_mutex);
1135f8296c60SJoshua M. Clulow 
1136f8296c60SJoshua M. Clulow 	kmem_free(vib, sizeof (*vib));
11371c5bc425SAlexey Zaytsev 
11381c5bc425SAlexey Zaytsev 	return (DDI_SUCCESS);
11391c5bc425SAlexey Zaytsev }
11401c5bc425SAlexey Zaytsev 
11411c5bc425SAlexey Zaytsev static int
1142f8296c60SJoshua M. Clulow vioblk_quiesce(dev_info_t *dip)
11431c5bc425SAlexey Zaytsev {
1144f8296c60SJoshua M. Clulow 	vioblk_t *vib;
11451c5bc425SAlexey Zaytsev 
1146f8296c60SJoshua M. Clulow 	if ((vib = ddi_get_driver_private(dip)) == NULL) {
1147f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
1148f8296c60SJoshua M. Clulow 	}
11491c5bc425SAlexey Zaytsev 
1150f8296c60SJoshua M. Clulow 	return (virtio_quiesce(vib->vib_virtio));
11511c5bc425SAlexey Zaytsev }
11521c5bc425SAlexey Zaytsev 
11531c5bc425SAlexey Zaytsev int
11541c5bc425SAlexey Zaytsev _init(void)
11551c5bc425SAlexey Zaytsev {
11561c5bc425SAlexey Zaytsev 	int rv;
11571c5bc425SAlexey Zaytsev 
11581c5bc425SAlexey Zaytsev 	bd_mod_init(&vioblk_dev_ops);
11591c5bc425SAlexey Zaytsev 
1160f8296c60SJoshua M. Clulow 	if ((rv = mod_install(&vioblk_modlinkage)) != 0) {
11611c5bc425SAlexey Zaytsev 		bd_mod_fini(&vioblk_dev_ops);
11621c5bc425SAlexey Zaytsev 	}
11631c5bc425SAlexey Zaytsev 
11641c5bc425SAlexey Zaytsev 	return (rv);
11651c5bc425SAlexey Zaytsev }
11661c5bc425SAlexey Zaytsev 
11671c5bc425SAlexey Zaytsev int
11681c5bc425SAlexey Zaytsev _fini(void)
11691c5bc425SAlexey Zaytsev {
11701c5bc425SAlexey Zaytsev 	int rv;
11711c5bc425SAlexey Zaytsev 
1172f8296c60SJoshua M. Clulow 	if ((rv = mod_remove(&vioblk_modlinkage)) == 0) {
11731c5bc425SAlexey Zaytsev 		bd_mod_fini(&vioblk_dev_ops);
11741c5bc425SAlexey Zaytsev 	}
11751c5bc425SAlexey Zaytsev 
11761c5bc425SAlexey Zaytsev 	return (rv);
11771c5bc425SAlexey Zaytsev }
11781c5bc425SAlexey Zaytsev 
11791c5bc425SAlexey Zaytsev int
11801c5bc425SAlexey Zaytsev _info(struct modinfo *modinfop)
11811c5bc425SAlexey Zaytsev {
1182f8296c60SJoshua M. Clulow 	return (mod_info(&vioblk_modlinkage, modinfop));
11831c5bc425SAlexey Zaytsev }
1184