xref: /illumos-gate/usr/src/uts/common/io/vioblk/vioblk.c (revision f8296c60994fb27105f37ac6f75661e4a6bdbab7)
11c5bc425SAlexey Zaytsev /*
21c5bc425SAlexey Zaytsev  * CDDL HEADER START
31c5bc425SAlexey Zaytsev  *
41c5bc425SAlexey Zaytsev  * The contents of this file are subject to the terms of the
51c5bc425SAlexey Zaytsev  * Common Development and Distribution License (the "License").
61c5bc425SAlexey Zaytsev  * You may not use this file except in compliance with the License.
71c5bc425SAlexey Zaytsev  *
81c5bc425SAlexey Zaytsev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91c5bc425SAlexey Zaytsev  * or http://www.opensolaris.org/os/licensing.
101c5bc425SAlexey Zaytsev  * See the License for the specific language governing permissions
111c5bc425SAlexey Zaytsev  * and limitations under the License.
121c5bc425SAlexey Zaytsev  *
131c5bc425SAlexey Zaytsev  * When distributing Covered Code, include this CDDL HEADER in each
141c5bc425SAlexey Zaytsev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151c5bc425SAlexey Zaytsev  * If applicable, add the following below this CDDL HEADER, with the
161c5bc425SAlexey Zaytsev  * fields enclosed by brackets "[]" replaced with your own identifying
171c5bc425SAlexey Zaytsev  * information: Portions Copyright [yyyy] [name of copyright owner]
181c5bc425SAlexey Zaytsev  *
191c5bc425SAlexey Zaytsev  * CDDL HEADER END
201c5bc425SAlexey Zaytsev  */
211c5bc425SAlexey Zaytsev 
221c5bc425SAlexey Zaytsev /*
23510a6847SHans Rosenfeld  * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
241c5bc425SAlexey Zaytsev  * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
25*f8296c60SJoshua M. Clulow  * Copyright 2019 Joyent Inc.
261c5bc425SAlexey Zaytsev  */
271c5bc425SAlexey Zaytsev 
28*f8296c60SJoshua M. Clulow /*
29*f8296c60SJoshua M. Clulow  * VIRTIO BLOCK DRIVER
30*f8296c60SJoshua M. Clulow  *
31*f8296c60SJoshua M. Clulow  * This driver provides support for Virtio Block devices.  Each driver instance
32*f8296c60SJoshua M. Clulow  * attaches to a single underlying block device.
33*f8296c60SJoshua M. Clulow  *
34*f8296c60SJoshua M. Clulow  * REQUEST CHAIN LAYOUT
35*f8296c60SJoshua M. Clulow  *
36*f8296c60SJoshua M. Clulow  * Every request chain sent to the I/O queue has the following structure.  Each
37*f8296c60SJoshua M. Clulow  * box in the diagram represents a descriptor entry (i.e., a DMA cookie) within
38*f8296c60SJoshua M. Clulow  * the chain:
39*f8296c60SJoshua M. Clulow  *
40*f8296c60SJoshua M. Clulow  *    +-0-----------------------------------------+
41*f8296c60SJoshua M. Clulow  *    | struct virtio_blk_hdr                     |-----------------------\
42*f8296c60SJoshua M. Clulow  *    |   (written by driver, read by device)     |                       |
43*f8296c60SJoshua M. Clulow  *    +-1-----------------------------------------+                       |
44*f8296c60SJoshua M. Clulow  *    | optional data payload                     |--\                    |
45*f8296c60SJoshua M. Clulow  *    |   (written by driver for write requests,  |  |                    |
46*f8296c60SJoshua M. Clulow  *    |    or by device for read requests)        |  |                    |
47*f8296c60SJoshua M. Clulow  *    +-2-----------------------------------------+  |                    |
48*f8296c60SJoshua M. Clulow  *    | ,~`           :                              |-cookies loaned     |
49*f8296c60SJoshua M. Clulow  *    |/              :                        ,~`|  | from blkdev        |
50*f8296c60SJoshua M. Clulow  *                    :                       /   |  |                    |
51*f8296c60SJoshua M. Clulow  *    +-(N - 1)-----------------------------------+  |                    |
52*f8296c60SJoshua M. Clulow  *    | ... end of data payload.                  |  |                    |
53*f8296c60SJoshua M. Clulow  *    |                                           |  |                    |
54*f8296c60SJoshua M. Clulow  *    |                                           |--/                    |
55*f8296c60SJoshua M. Clulow  *    +-N-----------------------------------------+                       |
56*f8296c60SJoshua M. Clulow  *    | status byte                               |                       |
57*f8296c60SJoshua M. Clulow  *    |   (written by device, read by driver)     |--------------------\  |
58*f8296c60SJoshua M. Clulow  *    +-------------------------------------------+                    |  |
59*f8296c60SJoshua M. Clulow  *                                                                     |  |
60*f8296c60SJoshua M. Clulow  * The memory for the header and status bytes (i.e., 0 and N above)    |  |
61*f8296c60SJoshua M. Clulow  * is allocated as a single chunk by vioblk_alloc_reqs():              |  |
62*f8296c60SJoshua M. Clulow  *                                                                     |  |
63*f8296c60SJoshua M. Clulow  *    +-------------------------------------------+                    |  |
64*f8296c60SJoshua M. Clulow  *    | struct virtio_blk_hdr                     |<----------------------/
65*f8296c60SJoshua M. Clulow  *    +-------------------------------------------+                    |
66*f8296c60SJoshua M. Clulow  *    | status byte                               |<-------------------/
67*f8296c60SJoshua M. Clulow  *    +-------------------------------------------+
68*f8296c60SJoshua M. Clulow  */
691c5bc425SAlexey Zaytsev 
701c5bc425SAlexey Zaytsev #include <sys/modctl.h>
711c5bc425SAlexey Zaytsev #include <sys/blkdev.h>
721c5bc425SAlexey Zaytsev #include <sys/types.h>
731c5bc425SAlexey Zaytsev #include <sys/errno.h>
741c5bc425SAlexey Zaytsev #include <sys/param.h>
751c5bc425SAlexey Zaytsev #include <sys/stropts.h>
761c5bc425SAlexey Zaytsev #include <sys/stream.h>
771c5bc425SAlexey Zaytsev #include <sys/strsubr.h>
781c5bc425SAlexey Zaytsev #include <sys/kmem.h>
791c5bc425SAlexey Zaytsev #include <sys/conf.h>
801c5bc425SAlexey Zaytsev #include <sys/devops.h>
811c5bc425SAlexey Zaytsev #include <sys/ksynch.h>
821c5bc425SAlexey Zaytsev #include <sys/stat.h>
831c5bc425SAlexey Zaytsev #include <sys/modctl.h>
841c5bc425SAlexey Zaytsev #include <sys/debug.h>
851c5bc425SAlexey Zaytsev #include <sys/pci.h>
8694c3dad2SToomas Soome #include <sys/containerof.h>
87*f8296c60SJoshua M. Clulow #include <sys/ctype.h>
88*f8296c60SJoshua M. Clulow #include <sys/sysmacros.h>
891c5bc425SAlexey Zaytsev 
90*f8296c60SJoshua M. Clulow #include "virtio.h"
91*f8296c60SJoshua M. Clulow #include "vioblk.h"
921c5bc425SAlexey Zaytsev 
931c5bc425SAlexey Zaytsev 
94*f8296c60SJoshua M. Clulow static void vioblk_get_id(vioblk_t *);
95*f8296c60SJoshua M. Clulow uint_t vioblk_int_handler(caddr_t, caddr_t);
96*f8296c60SJoshua M. Clulow static uint_t vioblk_poll(vioblk_t *);
971c5bc425SAlexey Zaytsev static int vioblk_quiesce(dev_info_t *);
981c5bc425SAlexey Zaytsev static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
991c5bc425SAlexey Zaytsev static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
1001c5bc425SAlexey Zaytsev 
101*f8296c60SJoshua M. Clulow 
1021c5bc425SAlexey Zaytsev static struct dev_ops vioblk_dev_ops = {
103*f8296c60SJoshua M. Clulow 	.devo_rev =			DEVO_REV,
104*f8296c60SJoshua M. Clulow 	.devo_refcnt =			0,
105*f8296c60SJoshua M. Clulow 
106*f8296c60SJoshua M. Clulow 	.devo_attach =			vioblk_attach,
107*f8296c60SJoshua M. Clulow 	.devo_detach =			vioblk_detach,
108*f8296c60SJoshua M. Clulow 	.devo_quiesce =			vioblk_quiesce,
109*f8296c60SJoshua M. Clulow 
110*f8296c60SJoshua M. Clulow 	.devo_getinfo =			ddi_no_info,
111*f8296c60SJoshua M. Clulow 	.devo_identify =		nulldev,
112*f8296c60SJoshua M. Clulow 	.devo_probe =			nulldev,
113*f8296c60SJoshua M. Clulow 	.devo_reset =			nodev,
114*f8296c60SJoshua M. Clulow 	.devo_cb_ops =			NULL,
115*f8296c60SJoshua M. Clulow 	.devo_bus_ops =			NULL,
116*f8296c60SJoshua M. Clulow 	.devo_power =			NULL,
117*f8296c60SJoshua M. Clulow };
118*f8296c60SJoshua M. Clulow 
119*f8296c60SJoshua M. Clulow static struct modldrv vioblk_modldrv = {
120*f8296c60SJoshua M. Clulow 	.drv_modops =			&mod_driverops,
121*f8296c60SJoshua M. Clulow 	.drv_linkinfo =			"VIRTIO block driver",
122*f8296c60SJoshua M. Clulow 	.drv_dev_ops =			&vioblk_dev_ops
123*f8296c60SJoshua M. Clulow };
124*f8296c60SJoshua M. Clulow 
125*f8296c60SJoshua M. Clulow static struct modlinkage vioblk_modlinkage = {
126*f8296c60SJoshua M. Clulow 	.ml_rev =			MODREV_1,
127*f8296c60SJoshua M. Clulow 	.ml_linkage =			{ &vioblk_modldrv, NULL }
128*f8296c60SJoshua M. Clulow };
129*f8296c60SJoshua M. Clulow 
130*f8296c60SJoshua M. Clulow /*
131*f8296c60SJoshua M. Clulow  * DMA attribute template for header and status blocks.  We also make a
132*f8296c60SJoshua M. Clulow  * per-instance copy of this template with negotiated sizes from the device for
133*f8296c60SJoshua M. Clulow  * blkdev.
134*f8296c60SJoshua M. Clulow  */
135*f8296c60SJoshua M. Clulow static const ddi_dma_attr_t vioblk_dma_attr = {
136*f8296c60SJoshua M. Clulow 	.dma_attr_version =		DMA_ATTR_V0,
137*f8296c60SJoshua M. Clulow 	.dma_attr_addr_lo =		0x0000000000000000,
138*f8296c60SJoshua M. Clulow 	.dma_attr_addr_hi =		0xFFFFFFFFFFFFFFFF,
139*f8296c60SJoshua M. Clulow 	.dma_attr_count_max =		0x00000000FFFFFFFF,
140*f8296c60SJoshua M. Clulow 	.dma_attr_align =		1,
141*f8296c60SJoshua M. Clulow 	.dma_attr_burstsizes =		1,
142*f8296c60SJoshua M. Clulow 	.dma_attr_minxfer =		1,
143*f8296c60SJoshua M. Clulow 	.dma_attr_maxxfer =		0x00000000FFFFFFFF,
144*f8296c60SJoshua M. Clulow 	.dma_attr_seg =			0x00000000FFFFFFFF,
145*f8296c60SJoshua M. Clulow 	.dma_attr_sgllen =		1,
146*f8296c60SJoshua M. Clulow 	.dma_attr_granular =		1,
147*f8296c60SJoshua M. Clulow 	.dma_attr_flags =		0
1481c5bc425SAlexey Zaytsev };
1491c5bc425SAlexey Zaytsev 
1501c5bc425SAlexey Zaytsev 
151*f8296c60SJoshua M. Clulow static vioblk_req_t *
152*f8296c60SJoshua M. Clulow vioblk_req_alloc(vioblk_t *vib)
1531c5bc425SAlexey Zaytsev {
154*f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr;
1551c5bc425SAlexey Zaytsev 
156*f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
1571c5bc425SAlexey Zaytsev 
158*f8296c60SJoshua M. Clulow 	if ((vbr = list_remove_head(&vib->vib_reqs)) == NULL) {
159*f8296c60SJoshua M. Clulow 		return (NULL);
160*f8296c60SJoshua M. Clulow 	}
161*f8296c60SJoshua M. Clulow 	vib->vib_nreqs_alloc++;
1621c5bc425SAlexey Zaytsev 
163*f8296c60SJoshua M. Clulow 	VERIFY0(vbr->vbr_status);
164*f8296c60SJoshua M. Clulow 	vbr->vbr_status |= VIOBLK_REQSTAT_ALLOCATED;
165*f8296c60SJoshua M. Clulow 
166*f8296c60SJoshua M. Clulow 	VERIFY3P(vbr->vbr_xfer, ==, NULL);
167*f8296c60SJoshua M. Clulow 	VERIFY3S(vbr->vbr_error, ==, 0);
168*f8296c60SJoshua M. Clulow 
169*f8296c60SJoshua M. Clulow 	return (vbr);
170*f8296c60SJoshua M. Clulow }
171*f8296c60SJoshua M. Clulow 
172*f8296c60SJoshua M. Clulow static void
173*f8296c60SJoshua M. Clulow vioblk_req_free(vioblk_t *vib, vioblk_req_t *vbr)
174*f8296c60SJoshua M. Clulow {
175*f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
176*f8296c60SJoshua M. Clulow 
177*f8296c60SJoshua M. Clulow 	/*
178*f8296c60SJoshua M. Clulow 	 * Check that this request was allocated, then zero the status field to
179*f8296c60SJoshua M. Clulow 	 * clear all status bits.
180*f8296c60SJoshua M. Clulow 	 */
181*f8296c60SJoshua M. Clulow 	VERIFY(vbr->vbr_status & VIOBLK_REQSTAT_ALLOCATED);
182*f8296c60SJoshua M. Clulow 	vbr->vbr_status = 0;
183*f8296c60SJoshua M. Clulow 
184*f8296c60SJoshua M. Clulow 	vbr->vbr_xfer = NULL;
185*f8296c60SJoshua M. Clulow 	vbr->vbr_error = 0;
186*f8296c60SJoshua M. Clulow 	vbr->vbr_type = 0;
187*f8296c60SJoshua M. Clulow 
188*f8296c60SJoshua M. Clulow 	list_insert_head(&vib->vib_reqs, vbr);
189*f8296c60SJoshua M. Clulow 
190*f8296c60SJoshua M. Clulow 	VERIFY3U(vib->vib_nreqs_alloc, >, 0);
191*f8296c60SJoshua M. Clulow 	vib->vib_nreqs_alloc--;
192*f8296c60SJoshua M. Clulow }
193*f8296c60SJoshua M. Clulow 
194*f8296c60SJoshua M. Clulow static void
195*f8296c60SJoshua M. Clulow vioblk_complete(vioblk_t *vib, vioblk_req_t *vbr)
196*f8296c60SJoshua M. Clulow {
197*f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
198*f8296c60SJoshua M. Clulow 
199*f8296c60SJoshua M. Clulow 	VERIFY(!(vbr->vbr_status & VIOBLK_REQSTAT_COMPLETE));
200*f8296c60SJoshua M. Clulow 	vbr->vbr_status |= VIOBLK_REQSTAT_COMPLETE;
201*f8296c60SJoshua M. Clulow 
202*f8296c60SJoshua M. Clulow 	if (vbr->vbr_type == VIRTIO_BLK_T_FLUSH) {
203*f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_cacheflush.value.ui64++;
204*f8296c60SJoshua M. Clulow 	}
205*f8296c60SJoshua M. Clulow 
206*f8296c60SJoshua M. Clulow 	if (vbr->vbr_xfer != NULL) {
207*f8296c60SJoshua M. Clulow 		/*
208*f8296c60SJoshua M. Clulow 		 * This is a blkdev framework request.
209*f8296c60SJoshua M. Clulow 		 */
210*f8296c60SJoshua M. Clulow 		mutex_exit(&vib->vib_mutex);
211*f8296c60SJoshua M. Clulow 		bd_xfer_done(vbr->vbr_xfer, vbr->vbr_error);
212*f8296c60SJoshua M. Clulow 		mutex_enter(&vib->vib_mutex);
213*f8296c60SJoshua M. Clulow 		vbr->vbr_xfer = NULL;
214*f8296c60SJoshua M. Clulow 	}
215*f8296c60SJoshua M. Clulow }
216*f8296c60SJoshua M. Clulow 
217*f8296c60SJoshua M. Clulow static virtio_chain_t *
218*f8296c60SJoshua M. Clulow vioblk_common_start(vioblk_t *vib, int type, uint64_t sector,
219*f8296c60SJoshua M. Clulow     boolean_t polled)
220*f8296c60SJoshua M. Clulow {
221*f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr = NULL;
222*f8296c60SJoshua M. Clulow 	virtio_chain_t *vic = NULL;
223*f8296c60SJoshua M. Clulow 
224*f8296c60SJoshua M. Clulow 	if ((vbr = vioblk_req_alloc(vib)) == NULL) {
225*f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_outofmemory.value.ui64++;
226*f8296c60SJoshua M. Clulow 		return (NULL);
227*f8296c60SJoshua M. Clulow 	}
228*f8296c60SJoshua M. Clulow 	vbr->vbr_type = type;
229*f8296c60SJoshua M. Clulow 
230*f8296c60SJoshua M. Clulow 	if (polled) {
231*f8296c60SJoshua M. Clulow 		/*
232*f8296c60SJoshua M. Clulow 		 * Mark this command as polled so that we can wait on it
233*f8296c60SJoshua M. Clulow 		 * ourselves.
234*f8296c60SJoshua M. Clulow 		 */
235*f8296c60SJoshua M. Clulow 		vbr->vbr_status |= VIOBLK_REQSTAT_POLLED;
236*f8296c60SJoshua M. Clulow 	}
237*f8296c60SJoshua M. Clulow 
238*f8296c60SJoshua M. Clulow 	if ((vic = virtio_chain_alloc(vib->vib_vq, KM_NOSLEEP)) == NULL) {
239*f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_outofmemory.value.ui64++;
240*f8296c60SJoshua M. Clulow 		goto fail;
241*f8296c60SJoshua M. Clulow 	}
242*f8296c60SJoshua M. Clulow 
243*f8296c60SJoshua M. Clulow 	struct vioblk_req_hdr vbh;
244*f8296c60SJoshua M. Clulow 	vbh.vbh_type = type;
245*f8296c60SJoshua M. Clulow 	vbh.vbh_ioprio = 0;
246*f8296c60SJoshua M. Clulow 	vbh.vbh_sector = sector;
247*f8296c60SJoshua M. Clulow 	bcopy(&vbh, virtio_dma_va(vbr->vbr_dma, 0), sizeof (vbh));
248*f8296c60SJoshua M. Clulow 
249*f8296c60SJoshua M. Clulow 	virtio_chain_data_set(vic, vbr);
250*f8296c60SJoshua M. Clulow 
251*f8296c60SJoshua M. Clulow 	/*
252*f8296c60SJoshua M. Clulow 	 * Put the header in the first descriptor.  See the block comment at
253*f8296c60SJoshua M. Clulow 	 * the top of the file for more details on the chain layout.
254*f8296c60SJoshua M. Clulow 	 */
255*f8296c60SJoshua M. Clulow 	if (virtio_chain_append(vic, virtio_dma_cookie_pa(vbr->vbr_dma, 0),
256*f8296c60SJoshua M. Clulow 	    sizeof (struct vioblk_req_hdr), VIRTIO_DIR_DEVICE_READS) !=
257*f8296c60SJoshua M. Clulow 	    DDI_SUCCESS) {
258*f8296c60SJoshua M. Clulow 		goto fail;
259*f8296c60SJoshua M. Clulow 	}
260*f8296c60SJoshua M. Clulow 
261*f8296c60SJoshua M. Clulow 	return (vic);
262*f8296c60SJoshua M. Clulow 
263*f8296c60SJoshua M. Clulow fail:
264*f8296c60SJoshua M. Clulow 	vbr->vbr_xfer = NULL;
265*f8296c60SJoshua M. Clulow 	vioblk_req_free(vib, vbr);
266*f8296c60SJoshua M. Clulow 	if (vic != NULL) {
267*f8296c60SJoshua M. Clulow 		virtio_chain_free(vic);
268*f8296c60SJoshua M. Clulow 	}
269*f8296c60SJoshua M. Clulow 	return (NULL);
270*f8296c60SJoshua M. Clulow }
2711c5bc425SAlexey Zaytsev 
2721c5bc425SAlexey Zaytsev static int
273*f8296c60SJoshua M. Clulow vioblk_common_submit(vioblk_t *vib, virtio_chain_t *vic)
2741c5bc425SAlexey Zaytsev {
275*f8296c60SJoshua M. Clulow 	int r;
276*f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr = virtio_chain_data(vic);
2771c5bc425SAlexey Zaytsev 
278*f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
2791c5bc425SAlexey Zaytsev 
280*f8296c60SJoshua M. Clulow 	/*
281*f8296c60SJoshua M. Clulow 	 * The device will write the status byte into this last descriptor.
282*f8296c60SJoshua M. Clulow 	 * See the block comment at the top of the file for more details on the
283*f8296c60SJoshua M. Clulow 	 * chain layout.
284*f8296c60SJoshua M. Clulow 	 */
285*f8296c60SJoshua M. Clulow 	if (virtio_chain_append(vic, virtio_dma_cookie_pa(vbr->vbr_dma, 0) +
286*f8296c60SJoshua M. Clulow 	    sizeof (struct vioblk_req_hdr), sizeof (uint8_t),
287*f8296c60SJoshua M. Clulow 	    VIRTIO_DIR_DEVICE_WRITES) != DDI_SUCCESS) {
288*f8296c60SJoshua M. Clulow 		r = ENOMEM;
289*f8296c60SJoshua M. Clulow 		goto out;
2901c5bc425SAlexey Zaytsev 	}
2911c5bc425SAlexey Zaytsev 
292*f8296c60SJoshua M. Clulow 	virtio_dma_sync(vbr->vbr_dma, DDI_DMA_SYNC_FORDEV);
293*f8296c60SJoshua M. Clulow 	virtio_chain_submit(vic, B_TRUE);
2941c5bc425SAlexey Zaytsev 
295*f8296c60SJoshua M. Clulow 	if (!(vbr->vbr_status & VIOBLK_REQSTAT_POLLED)) {
296*f8296c60SJoshua M. Clulow 		/*
297*f8296c60SJoshua M. Clulow 		 * This is not a polled request.  Our request will be freed and
298*f8296c60SJoshua M. Clulow 		 * the caller notified later in vioblk_poll().
299*f8296c60SJoshua M. Clulow 		 */
300*f8296c60SJoshua M. Clulow 		return (0);
3011c5bc425SAlexey Zaytsev 	}
3021c5bc425SAlexey Zaytsev 
3031c5bc425SAlexey Zaytsev 	/*
304*f8296c60SJoshua M. Clulow 	 * This is a polled request.  We need to block here and wait for the
305*f8296c60SJoshua M. Clulow 	 * device to complete request processing.
3061c5bc425SAlexey Zaytsev 	 */
307*f8296c60SJoshua M. Clulow 	while (!(vbr->vbr_status & VIOBLK_REQSTAT_POLL_COMPLETE)) {
308*f8296c60SJoshua M. Clulow 		if (ddi_in_panic()) {
309*f8296c60SJoshua M. Clulow 			/*
310*f8296c60SJoshua M. Clulow 			 * When panicking, interrupts are disabled.  We must
311*f8296c60SJoshua M. Clulow 			 * poll the queue manually.
312*f8296c60SJoshua M. Clulow 			 */
3131c5bc425SAlexey Zaytsev 			drv_usecwait(10);
314*f8296c60SJoshua M. Clulow 			(void) vioblk_poll(vib);
315*f8296c60SJoshua M. Clulow 			continue;
3161c5bc425SAlexey Zaytsev 		}
3171c5bc425SAlexey Zaytsev 
318*f8296c60SJoshua M. Clulow 		/*
319*f8296c60SJoshua M. Clulow 		 * When not panicking, the device will interrupt on command
320*f8296c60SJoshua M. Clulow 		 * completion and vioblk_poll() will be called to wake us up.
321*f8296c60SJoshua M. Clulow 		 */
322*f8296c60SJoshua M. Clulow 		cv_wait(&vib->vib_cv, &vib->vib_mutex);
3231c5bc425SAlexey Zaytsev 	}
3241c5bc425SAlexey Zaytsev 
325*f8296c60SJoshua M. Clulow 	vioblk_complete(vib, vbr);
326*f8296c60SJoshua M. Clulow 	r = vbr->vbr_error;
327*f8296c60SJoshua M. Clulow 
328*f8296c60SJoshua M. Clulow out:
329*f8296c60SJoshua M. Clulow 	vioblk_req_free(vib, vbr);
330*f8296c60SJoshua M. Clulow 	virtio_chain_free(vic);
331*f8296c60SJoshua M. Clulow 	return (r);
3321c5bc425SAlexey Zaytsev }
3331c5bc425SAlexey Zaytsev 
3341c5bc425SAlexey Zaytsev static int
335*f8296c60SJoshua M. Clulow vioblk_internal(vioblk_t *vib, int type, virtio_dma_t *dma,
336*f8296c60SJoshua M. Clulow     uint64_t sector, virtio_direction_t dir)
3371c5bc425SAlexey Zaytsev {
338*f8296c60SJoshua M. Clulow 	virtio_chain_t *vic;
339*f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr;
340*f8296c60SJoshua M. Clulow 	int r;
3411c5bc425SAlexey Zaytsev 
342*f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
343*f8296c60SJoshua M. Clulow 
344*f8296c60SJoshua M. Clulow 	/*
345*f8296c60SJoshua M. Clulow 	 * Allocate a polled request.
346*f8296c60SJoshua M. Clulow 	 */
347*f8296c60SJoshua M. Clulow 	if ((vic = vioblk_common_start(vib, type, sector, B_TRUE)) == NULL) {
348*f8296c60SJoshua M. Clulow 		return (ENOMEM);
349*f8296c60SJoshua M. Clulow 	}
350*f8296c60SJoshua M. Clulow 	vbr = virtio_chain_data(vic);
351*f8296c60SJoshua M. Clulow 
352*f8296c60SJoshua M. Clulow 	/*
353*f8296c60SJoshua M. Clulow 	 * If there is a request payload, it goes between the header and the
354*f8296c60SJoshua M. Clulow 	 * status byte.  See the block comment at the top of the file for more
355*f8296c60SJoshua M. Clulow 	 * detail on the chain layout.
356*f8296c60SJoshua M. Clulow 	 */
357*f8296c60SJoshua M. Clulow 	if (dma != NULL) {
358*f8296c60SJoshua M. Clulow 		for (uint_t n = 0; n < virtio_dma_ncookies(dma); n++) {
359*f8296c60SJoshua M. Clulow 			if (virtio_chain_append(vic,
360*f8296c60SJoshua M. Clulow 			    virtio_dma_cookie_pa(dma, n),
361*f8296c60SJoshua M. Clulow 			    virtio_dma_cookie_size(dma, n), dir) !=
362*f8296c60SJoshua M. Clulow 			    DDI_SUCCESS) {
363*f8296c60SJoshua M. Clulow 				r = ENOMEM;
364*f8296c60SJoshua M. Clulow 				goto out;
365*f8296c60SJoshua M. Clulow 			}
366*f8296c60SJoshua M. Clulow 		}
3671c5bc425SAlexey Zaytsev 	}
3681c5bc425SAlexey Zaytsev 
369*f8296c60SJoshua M. Clulow 	return (vioblk_common_submit(vib, vic));
370*f8296c60SJoshua M. Clulow 
371*f8296c60SJoshua M. Clulow out:
372*f8296c60SJoshua M. Clulow 	vioblk_req_free(vib, vbr);
373*f8296c60SJoshua M. Clulow 	virtio_chain_free(vic);
374*f8296c60SJoshua M. Clulow 	return (r);
375*f8296c60SJoshua M. Clulow }
376*f8296c60SJoshua M. Clulow 
377*f8296c60SJoshua M. Clulow static int
378*f8296c60SJoshua M. Clulow vioblk_request(vioblk_t *vib, bd_xfer_t *xfer, int type)
379*f8296c60SJoshua M. Clulow {
380*f8296c60SJoshua M. Clulow 	virtio_chain_t *vic = NULL;
381*f8296c60SJoshua M. Clulow 	vioblk_req_t *vbr = NULL;
382*f8296c60SJoshua M. Clulow 	uint_t total_cookies = 2;
383*f8296c60SJoshua M. Clulow 	boolean_t polled = (xfer->x_flags & BD_XFER_POLL) != 0;
384*f8296c60SJoshua M. Clulow 	int r;
385*f8296c60SJoshua M. Clulow 
386*f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
387*f8296c60SJoshua M. Clulow 
388*f8296c60SJoshua M. Clulow 	/*
389*f8296c60SJoshua M. Clulow 	 * Ensure that this request falls within the advertised size of the
390*f8296c60SJoshua M. Clulow 	 * block device.  Be careful to avoid overflow.
391*f8296c60SJoshua M. Clulow 	 */
392*f8296c60SJoshua M. Clulow 	if (xfer->x_nblks > SIZE_MAX - xfer->x_blkno ||
393*f8296c60SJoshua M. Clulow 	    (xfer->x_blkno + xfer->x_nblks) > vib->vib_nblks) {
394*f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_badoffset.value.ui64++;
395*f8296c60SJoshua M. Clulow 		return (EINVAL);
396*f8296c60SJoshua M. Clulow 	}
397*f8296c60SJoshua M. Clulow 
398*f8296c60SJoshua M. Clulow 	if ((vic = vioblk_common_start(vib, type, xfer->x_blkno, polled)) ==
399*f8296c60SJoshua M. Clulow 	    NULL) {
400*f8296c60SJoshua M. Clulow 		return (ENOMEM);
401*f8296c60SJoshua M. Clulow 	}
402*f8296c60SJoshua M. Clulow 	vbr = virtio_chain_data(vic);
403*f8296c60SJoshua M. Clulow 	vbr->vbr_xfer = xfer;
404*f8296c60SJoshua M. Clulow 
405*f8296c60SJoshua M. Clulow 	/*
406*f8296c60SJoshua M. Clulow 	 * If there is a request payload, it goes between the header and the
407*f8296c60SJoshua M. Clulow 	 * status byte.  See the block comment at the top of the file for more
408*f8296c60SJoshua M. Clulow 	 * detail on the chain layout.
409*f8296c60SJoshua M. Clulow 	 */
410*f8296c60SJoshua M. Clulow 	if ((type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_OUT) &&
411*f8296c60SJoshua M. Clulow 	    xfer->x_nblks > 0) {
412*f8296c60SJoshua M. Clulow 		virtio_direction_t dir = (type == VIRTIO_BLK_T_OUT) ?
413*f8296c60SJoshua M. Clulow 		    VIRTIO_DIR_DEVICE_READS : VIRTIO_DIR_DEVICE_WRITES;
414*f8296c60SJoshua M. Clulow 
415*f8296c60SJoshua M. Clulow 		for (uint_t n = 0; n < xfer->x_ndmac; n++) {
416*f8296c60SJoshua M. Clulow 			ddi_dma_cookie_t dmac;
417*f8296c60SJoshua M. Clulow 
418*f8296c60SJoshua M. Clulow 			if (n == 0) {
419*f8296c60SJoshua M. Clulow 				/*
420*f8296c60SJoshua M. Clulow 				 * The first cookie is in the blkdev request.
421*f8296c60SJoshua M. Clulow 				 */
422*f8296c60SJoshua M. Clulow 				dmac = xfer->x_dmac;
4231c5bc425SAlexey Zaytsev 			} else {
424*f8296c60SJoshua M. Clulow 				ddi_dma_nextcookie(xfer->x_dmah, &dmac);
4251c5bc425SAlexey Zaytsev 			}
4261c5bc425SAlexey Zaytsev 
427*f8296c60SJoshua M. Clulow 			if (virtio_chain_append(vic, dmac.dmac_laddress,
428*f8296c60SJoshua M. Clulow 			    dmac.dmac_size, dir) != DDI_SUCCESS) {
429*f8296c60SJoshua M. Clulow 				r = ENOMEM;
430*f8296c60SJoshua M. Clulow 				goto fail;
431*f8296c60SJoshua M. Clulow 			}
4321c5bc425SAlexey Zaytsev 		}
4331c5bc425SAlexey Zaytsev 
434*f8296c60SJoshua M. Clulow 		total_cookies += xfer->x_ndmac;
435*f8296c60SJoshua M. Clulow 
436*f8296c60SJoshua M. Clulow 	} else if (xfer->x_nblks > 0) {
437*f8296c60SJoshua M. Clulow 		dev_err(vib->vib_dip, CE_PANIC,
438*f8296c60SJoshua M. Clulow 		    "request of type %d had payload length of %lu blocks", type,
439*f8296c60SJoshua M. Clulow 		    xfer->x_nblks);
440*f8296c60SJoshua M. Clulow 	}
441*f8296c60SJoshua M. Clulow 
442*f8296c60SJoshua M. Clulow 	if (vib->vib_stats->vbs_rw_cookiesmax.value.ui32 < total_cookies) {
443*f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_rw_cookiesmax.value.ui32 = total_cookies;
444*f8296c60SJoshua M. Clulow 	}
445*f8296c60SJoshua M. Clulow 
446*f8296c60SJoshua M. Clulow 	return (vioblk_common_submit(vib, vic));
447*f8296c60SJoshua M. Clulow 
448*f8296c60SJoshua M. Clulow fail:
449*f8296c60SJoshua M. Clulow 	vbr->vbr_xfer = NULL;
450*f8296c60SJoshua M. Clulow 	vioblk_req_free(vib, vbr);
451*f8296c60SJoshua M. Clulow 	virtio_chain_free(vic);
452*f8296c60SJoshua M. Clulow 	return (r);
4531c5bc425SAlexey Zaytsev }
4541c5bc425SAlexey Zaytsev 
4551c5bc425SAlexey Zaytsev static int
456*f8296c60SJoshua M. Clulow vioblk_bd_read(void *arg, bd_xfer_t *xfer)
4571c5bc425SAlexey Zaytsev {
458*f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
459*f8296c60SJoshua M. Clulow 	int r;
4601c5bc425SAlexey Zaytsev 
461*f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
462*f8296c60SJoshua M. Clulow 	r = vioblk_request(vib, xfer, VIRTIO_BLK_T_IN);
463*f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
4641c5bc425SAlexey Zaytsev 
465*f8296c60SJoshua M. Clulow 	return (r);
4661c5bc425SAlexey Zaytsev }
4671c5bc425SAlexey Zaytsev 
4681c5bc425SAlexey Zaytsev static int
469*f8296c60SJoshua M. Clulow vioblk_bd_write(void *arg, bd_xfer_t *xfer)
4701c5bc425SAlexey Zaytsev {
471*f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
472*f8296c60SJoshua M. Clulow 	int r;
4731c5bc425SAlexey Zaytsev 
474*f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
475*f8296c60SJoshua M. Clulow 	r = vioblk_request(vib, xfer, VIRTIO_BLK_T_OUT);
476*f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
4771c5bc425SAlexey Zaytsev 
478*f8296c60SJoshua M. Clulow 	return (r);
4791c5bc425SAlexey Zaytsev }
4801c5bc425SAlexey Zaytsev 
481*f8296c60SJoshua M. Clulow static int
482*f8296c60SJoshua M. Clulow vioblk_bd_flush(void *arg, bd_xfer_t *xfer)
483*f8296c60SJoshua M. Clulow {
484*f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
485*f8296c60SJoshua M. Clulow 	int r;
486*f8296c60SJoshua M. Clulow 
487*f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
488*f8296c60SJoshua M. Clulow 	if (!virtio_feature_present(vib->vib_virtio, VIRTIO_BLK_F_FLUSH)) {
489*f8296c60SJoshua M. Clulow 		/*
490*f8296c60SJoshua M. Clulow 		 * We don't really expect to get here, because if we did not
491*f8296c60SJoshua M. Clulow 		 * negotiate the flush feature we would not have installed this
492*f8296c60SJoshua M. Clulow 		 * function in the blkdev ops vector.
493*f8296c60SJoshua M. Clulow 		 */
494*f8296c60SJoshua M. Clulow 		mutex_exit(&vib->vib_mutex);
495*f8296c60SJoshua M. Clulow 		return (ENOTSUP);
496*f8296c60SJoshua M. Clulow 	}
497*f8296c60SJoshua M. Clulow 
498*f8296c60SJoshua M. Clulow 	r = vioblk_request(vib, xfer, VIRTIO_BLK_T_FLUSH);
499*f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
500*f8296c60SJoshua M. Clulow 
501*f8296c60SJoshua M. Clulow 	return (r);
502*f8296c60SJoshua M. Clulow }
5031c5bc425SAlexey Zaytsev 
5041c5bc425SAlexey Zaytsev static void
505*f8296c60SJoshua M. Clulow vioblk_bd_driveinfo(void *arg, bd_drive_t *drive)
5061c5bc425SAlexey Zaytsev {
507*f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
5081c5bc425SAlexey Zaytsev 
509*f8296c60SJoshua M. Clulow 	drive->d_qsize = vib->vib_reqs_capacity;
5101c5bc425SAlexey Zaytsev 	drive->d_removable = B_FALSE;
5111c5bc425SAlexey Zaytsev 	drive->d_hotpluggable = B_TRUE;
5121c5bc425SAlexey Zaytsev 	drive->d_target = 0;
5131c5bc425SAlexey Zaytsev 	drive->d_lun = 0;
514510a6847SHans Rosenfeld 
515510a6847SHans Rosenfeld 	drive->d_vendor = "Virtio";
516510a6847SHans Rosenfeld 	drive->d_vendor_len = strlen(drive->d_vendor);
517510a6847SHans Rosenfeld 
518510a6847SHans Rosenfeld 	drive->d_product = "Block Device";
519510a6847SHans Rosenfeld 	drive->d_product_len = strlen(drive->d_product);
520510a6847SHans Rosenfeld 
521*f8296c60SJoshua M. Clulow 	drive->d_serial = vib->vib_devid;
522510a6847SHans Rosenfeld 	drive->d_serial_len = strlen(drive->d_serial);
523510a6847SHans Rosenfeld 
524510a6847SHans Rosenfeld 	drive->d_revision = "0000";
525510a6847SHans Rosenfeld 	drive->d_revision_len = strlen(drive->d_revision);
5261c5bc425SAlexey Zaytsev }
5271c5bc425SAlexey Zaytsev 
5281c5bc425SAlexey Zaytsev static int
529*f8296c60SJoshua M. Clulow vioblk_bd_mediainfo(void *arg, bd_media_t *media)
5301c5bc425SAlexey Zaytsev {
531*f8296c60SJoshua M. Clulow 	vioblk_t *vib = (void *)arg;
5321c5bc425SAlexey Zaytsev 
533*f8296c60SJoshua M. Clulow 	/*
534*f8296c60SJoshua M. Clulow 	 * The device protocol is specified in terms of 512 byte logical
535*f8296c60SJoshua M. Clulow 	 * blocks, regardless of the recommended I/O size which might be
536*f8296c60SJoshua M. Clulow 	 * larger.
537*f8296c60SJoshua M. Clulow 	 */
538*f8296c60SJoshua M. Clulow 	media->m_nblks = vib->vib_nblks;
539*f8296c60SJoshua M. Clulow 	media->m_blksize = DEV_BSIZE;
5401c5bc425SAlexey Zaytsev 
541*f8296c60SJoshua M. Clulow 	media->m_readonly = vib->vib_readonly;
542*f8296c60SJoshua M. Clulow 	media->m_pblksize = vib->vib_pblk_size;
5431c5bc425SAlexey Zaytsev 	return (0);
5441c5bc425SAlexey Zaytsev }
5451c5bc425SAlexey Zaytsev 
5461c5bc425SAlexey Zaytsev static void
547*f8296c60SJoshua M. Clulow vioblk_get_id(vioblk_t *vib)
5481c5bc425SAlexey Zaytsev {
549*f8296c60SJoshua M. Clulow 	virtio_dma_t *dma;
550*f8296c60SJoshua M. Clulow 	int r;
5511c5bc425SAlexey Zaytsev 
552*f8296c60SJoshua M. Clulow 	if ((dma = virtio_dma_alloc(vib->vib_virtio, VIRTIO_BLK_ID_BYTES,
553*f8296c60SJoshua M. Clulow 	    &vioblk_dma_attr, DDI_DMA_CONSISTENT | DDI_DMA_READ,
554*f8296c60SJoshua M. Clulow 	    KM_SLEEP)) == NULL) {
555*f8296c60SJoshua M. Clulow 		return;
556*f8296c60SJoshua M. Clulow 	}
5571c5bc425SAlexey Zaytsev 
558*f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
559*f8296c60SJoshua M. Clulow 	if ((r = vioblk_internal(vib, VIRTIO_BLK_T_GET_ID, dma, 0,
560*f8296c60SJoshua M. Clulow 	    VIRTIO_DIR_DEVICE_WRITES)) == 0) {
561*f8296c60SJoshua M. Clulow 		const char *b = virtio_dma_va(dma, 0);
562*f8296c60SJoshua M. Clulow 		uint_t pos = 0;
5631c5bc425SAlexey Zaytsev 
564*f8296c60SJoshua M. Clulow 		/*
565*f8296c60SJoshua M. Clulow 		 * Save the entire response for debugging purposes.
566*f8296c60SJoshua M. Clulow 		 */
567*f8296c60SJoshua M. Clulow 		bcopy(virtio_dma_va(dma, 0), vib->vib_rawid,
568*f8296c60SJoshua M. Clulow 		    VIRTIO_BLK_ID_BYTES);
5691c5bc425SAlexey Zaytsev 
570*f8296c60SJoshua M. Clulow 		/*
571*f8296c60SJoshua M. Clulow 		 * Process the returned ID.
572*f8296c60SJoshua M. Clulow 		 */
573*f8296c60SJoshua M. Clulow 		bzero(vib->vib_devid, sizeof (vib->vib_devid));
574*f8296c60SJoshua M. Clulow 		for (uint_t n = 0; n < VIRTIO_BLK_ID_BYTES; n++) {
575*f8296c60SJoshua M. Clulow 			if (isalnum(b[n]) || b[n] == '-' || b[n] == '_') {
576*f8296c60SJoshua M. Clulow 				/*
577*f8296c60SJoshua M. Clulow 				 * Accept a subset of printable ASCII
578*f8296c60SJoshua M. Clulow 				 * characters.
579*f8296c60SJoshua M. Clulow 				 */
580*f8296c60SJoshua M. Clulow 				vib->vib_devid[pos++] = b[n];
581*f8296c60SJoshua M. Clulow 			} else {
582*f8296c60SJoshua M. Clulow 				/*
583*f8296c60SJoshua M. Clulow 				 * Stop processing at the first sign of
584*f8296c60SJoshua M. Clulow 				 * trouble.
585*f8296c60SJoshua M. Clulow 				 */
586*f8296c60SJoshua M. Clulow 				break;
587*f8296c60SJoshua M. Clulow 			}
588*f8296c60SJoshua M. Clulow 		}
5891c5bc425SAlexey Zaytsev 
590*f8296c60SJoshua M. Clulow 		vib->vib_devid_fetched = B_TRUE;
591*f8296c60SJoshua M. Clulow 	}
592*f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
5931c5bc425SAlexey Zaytsev 
594*f8296c60SJoshua M. Clulow 	virtio_dma_free(dma);
5951c5bc425SAlexey Zaytsev }
5961c5bc425SAlexey Zaytsev 
5971c5bc425SAlexey Zaytsev static int
598*f8296c60SJoshua M. Clulow vioblk_bd_devid(void *arg, dev_info_t *dip, ddi_devid_t *devid)
5991c5bc425SAlexey Zaytsev {
600*f8296c60SJoshua M. Clulow 	vioblk_t *vib = arg;
601*f8296c60SJoshua M. Clulow 	size_t len;
6021c5bc425SAlexey Zaytsev 
603*f8296c60SJoshua M. Clulow 	if ((len = strlen(vib->vib_devid)) == 0) {
604*f8296c60SJoshua M. Clulow 		/*
605*f8296c60SJoshua M. Clulow 		 * The device has no ID.
606*f8296c60SJoshua M. Clulow 		 */
6071c5bc425SAlexey Zaytsev 		return (DDI_FAILURE);
6081c5bc425SAlexey Zaytsev 	}
6091c5bc425SAlexey Zaytsev 
610*f8296c60SJoshua M. Clulow 	return (ddi_devid_init(dip, DEVID_ATA_SERIAL, len, vib->vib_devid,
611*f8296c60SJoshua M. Clulow 	    devid));
6121c5bc425SAlexey Zaytsev }
6131c5bc425SAlexey Zaytsev 
614*f8296c60SJoshua M. Clulow /*
615*f8296c60SJoshua M. Clulow  * As the device completes processing of a request, it returns the chain for
616*f8296c60SJoshua M. Clulow  * that request to our I/O queue.  This routine is called in two contexts:
617*f8296c60SJoshua M. Clulow  *   - from the interrupt handler, in response to notification from the device
618*f8296c60SJoshua M. Clulow  *   - synchronously in line with request processing when panicking
619*f8296c60SJoshua M. Clulow  */
620*f8296c60SJoshua M. Clulow static uint_t
621*f8296c60SJoshua M. Clulow vioblk_poll(vioblk_t *vib)
6221c5bc425SAlexey Zaytsev {
623*f8296c60SJoshua M. Clulow 	virtio_chain_t *vic;
624*f8296c60SJoshua M. Clulow 	uint_t count = 0;
625*f8296c60SJoshua M. Clulow 	boolean_t wakeup = B_FALSE;
6261c5bc425SAlexey Zaytsev 
627*f8296c60SJoshua M. Clulow 	VERIFY(MUTEX_HELD(&vib->vib_mutex));
6281c5bc425SAlexey Zaytsev 
629*f8296c60SJoshua M. Clulow 	while ((vic = virtio_queue_poll(vib->vib_vq)) != NULL) {
630*f8296c60SJoshua M. Clulow 		vioblk_req_t *vbr = virtio_chain_data(vic);
631*f8296c60SJoshua M. Clulow 		uint8_t status;
6321c5bc425SAlexey Zaytsev 
633*f8296c60SJoshua M. Clulow 		virtio_dma_sync(vbr->vbr_dma, DDI_DMA_SYNC_FORCPU);
6341c5bc425SAlexey Zaytsev 
635*f8296c60SJoshua M. Clulow 		bcopy(virtio_dma_va(vbr->vbr_dma,
636*f8296c60SJoshua M. Clulow 		    sizeof (struct vioblk_req_hdr)), &status, sizeof (status));
6371c5bc425SAlexey Zaytsev 
6381c5bc425SAlexey Zaytsev 		switch (status) {
6391c5bc425SAlexey Zaytsev 		case VIRTIO_BLK_S_OK:
640*f8296c60SJoshua M. Clulow 			vbr->vbr_error = 0;
6411c5bc425SAlexey Zaytsev 			break;
6421c5bc425SAlexey Zaytsev 		case VIRTIO_BLK_S_IOERR:
643*f8296c60SJoshua M. Clulow 			vbr->vbr_error = EIO;
644*f8296c60SJoshua M. Clulow 			vib->vib_stats->vbs_io_errors.value.ui64++;
6451c5bc425SAlexey Zaytsev 			break;
6461c5bc425SAlexey Zaytsev 		case VIRTIO_BLK_S_UNSUPP:
647*f8296c60SJoshua M. Clulow 			vbr->vbr_error = ENOTTY;
648*f8296c60SJoshua M. Clulow 			vib->vib_stats->vbs_unsupp_errors.value.ui64++;
6491c5bc425SAlexey Zaytsev 			break;
6501c5bc425SAlexey Zaytsev 		default:
651*f8296c60SJoshua M. Clulow 			vbr->vbr_error = ENXIO;
652*f8296c60SJoshua M. Clulow 			vib->vib_stats->vbs_nxio_errors.value.ui64++;
6531c5bc425SAlexey Zaytsev 			break;
6541c5bc425SAlexey Zaytsev 		}
6551c5bc425SAlexey Zaytsev 
656*f8296c60SJoshua M. Clulow 		count++;
6571c5bc425SAlexey Zaytsev 
658*f8296c60SJoshua M. Clulow 		if (vbr->vbr_status & VIOBLK_REQSTAT_POLLED) {
659*f8296c60SJoshua M. Clulow 			/*
660*f8296c60SJoshua M. Clulow 			 * This request must not be freed as it is being held
661*f8296c60SJoshua M. Clulow 			 * by a call to vioblk_common_submit().
662*f8296c60SJoshua M. Clulow 			 */
663*f8296c60SJoshua M. Clulow 			VERIFY(!(vbr->vbr_status &
664*f8296c60SJoshua M. Clulow 			    VIOBLK_REQSTAT_POLL_COMPLETE));
665*f8296c60SJoshua M. Clulow 			vbr->vbr_status |= VIOBLK_REQSTAT_POLL_COMPLETE;
666*f8296c60SJoshua M. Clulow 			wakeup = B_TRUE;
667*f8296c60SJoshua M. Clulow 			continue;
6681c5bc425SAlexey Zaytsev 		}
6691c5bc425SAlexey Zaytsev 
670*f8296c60SJoshua M. Clulow 		vioblk_complete(vib, vbr);
6711c5bc425SAlexey Zaytsev 
672*f8296c60SJoshua M. Clulow 		vioblk_req_free(vib, vbr);
673*f8296c60SJoshua M. Clulow 		virtio_chain_free(vic);
6741c5bc425SAlexey Zaytsev 	}
6751c5bc425SAlexey Zaytsev 
676*f8296c60SJoshua M. Clulow 	if (wakeup) {
677*f8296c60SJoshua M. Clulow 		/*
678*f8296c60SJoshua M. Clulow 		 * Signal anybody waiting for polled command completion.
679*f8296c60SJoshua M. Clulow 		 */
680*f8296c60SJoshua M. Clulow 		cv_broadcast(&vib->vib_cv);
681*f8296c60SJoshua M. Clulow 	}
682*f8296c60SJoshua M. Clulow 
683*f8296c60SJoshua M. Clulow 	return (count);
684*f8296c60SJoshua M. Clulow }
685*f8296c60SJoshua M. Clulow 
6861c5bc425SAlexey Zaytsev uint_t
687*f8296c60SJoshua M. Clulow vioblk_int_handler(caddr_t arg0, caddr_t arg1)
6881c5bc425SAlexey Zaytsev {
689*f8296c60SJoshua M. Clulow 	vioblk_t *vib = (vioblk_t *)arg0;
690*f8296c60SJoshua M. Clulow 	uint_t count;
691*f8296c60SJoshua M. Clulow 
692*f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
693*f8296c60SJoshua M. Clulow 	if ((count = vioblk_poll(vib)) >
694*f8296c60SJoshua M. Clulow 	    vib->vib_stats->vbs_intr_queuemax.value.ui32) {
695*f8296c60SJoshua M. Clulow 		vib->vib_stats->vbs_intr_queuemax.value.ui32 = count;
6961c5bc425SAlexey Zaytsev 	}
6971c5bc425SAlexey Zaytsev 
698*f8296c60SJoshua M. Clulow 	vib->vib_stats->vbs_intr_total.value.ui64++;
699*f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
7001c5bc425SAlexey Zaytsev 
701*f8296c60SJoshua M. Clulow 	return (DDI_INTR_CLAIMED);
7021c5bc425SAlexey Zaytsev }
7031c5bc425SAlexey Zaytsev 
7041c5bc425SAlexey Zaytsev static void
705*f8296c60SJoshua M. Clulow vioblk_free_reqs(vioblk_t *vib)
7061c5bc425SAlexey Zaytsev {
707*f8296c60SJoshua M. Clulow 	VERIFY3U(vib->vib_nreqs_alloc, ==, 0);
7081c5bc425SAlexey Zaytsev 
709*f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < vib->vib_reqs_capacity; i++) {
710*f8296c60SJoshua M. Clulow 		struct vioblk_req *vbr = &vib->vib_reqs_mem[i];
7111c5bc425SAlexey Zaytsev 
712*f8296c60SJoshua M. Clulow 		VERIFY(list_link_active(&vbr->vbr_link));
713*f8296c60SJoshua M. Clulow 		list_remove(&vib->vib_reqs, vbr);
7141c5bc425SAlexey Zaytsev 
715*f8296c60SJoshua M. Clulow 		VERIFY0(vbr->vbr_status);
7161c5bc425SAlexey Zaytsev 
717*f8296c60SJoshua M. Clulow 		if (vbr->vbr_dma != NULL) {
718*f8296c60SJoshua M. Clulow 			virtio_dma_free(vbr->vbr_dma);
719*f8296c60SJoshua M. Clulow 			vbr->vbr_dma = NULL;
7201c5bc425SAlexey Zaytsev 		}
721*f8296c60SJoshua M. Clulow 	}
722*f8296c60SJoshua M. Clulow 	VERIFY(list_is_empty(&vib->vib_reqs));
7231c5bc425SAlexey Zaytsev 
724*f8296c60SJoshua M. Clulow 	if (vib->vib_reqs_mem != NULL) {
725*f8296c60SJoshua M. Clulow 		kmem_free(vib->vib_reqs_mem,
726*f8296c60SJoshua M. Clulow 		    sizeof (struct vioblk_req) * vib->vib_reqs_capacity);
727*f8296c60SJoshua M. Clulow 		vib->vib_reqs_mem = NULL;
728*f8296c60SJoshua M. Clulow 		vib->vib_reqs_capacity = 0;
729*f8296c60SJoshua M. Clulow 	}
7301c5bc425SAlexey Zaytsev }
7311c5bc425SAlexey Zaytsev 
7321c5bc425SAlexey Zaytsev static int
733*f8296c60SJoshua M. Clulow vioblk_alloc_reqs(vioblk_t *vib)
7341c5bc425SAlexey Zaytsev {
735*f8296c60SJoshua M. Clulow 	vib->vib_reqs_capacity = MIN(virtio_queue_size(vib->vib_vq),
736*f8296c60SJoshua M. Clulow 	    VIRTIO_BLK_REQ_BUFS);
737*f8296c60SJoshua M. Clulow 	vib->vib_reqs_mem = kmem_zalloc(
738*f8296c60SJoshua M. Clulow 	    sizeof (struct vioblk_req) * vib->vib_reqs_capacity, KM_SLEEP);
739*f8296c60SJoshua M. Clulow 	vib->vib_nreqs_alloc = 0;
7401c5bc425SAlexey Zaytsev 
741*f8296c60SJoshua M. Clulow 	for (uint_t i = 0; i < vib->vib_reqs_capacity; i++) {
742*f8296c60SJoshua M. Clulow 		list_insert_tail(&vib->vib_reqs, &vib->vib_reqs_mem[i]);
7431c5bc425SAlexey Zaytsev 	}
7441c5bc425SAlexey Zaytsev 
745*f8296c60SJoshua M. Clulow 	for (vioblk_req_t *vbr = list_head(&vib->vib_reqs); vbr != NULL;
746*f8296c60SJoshua M. Clulow 	    vbr = list_next(&vib->vib_reqs, vbr)) {
747*f8296c60SJoshua M. Clulow 		if ((vbr->vbr_dma = virtio_dma_alloc(vib->vib_virtio,
7481c5bc425SAlexey Zaytsev 		    sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
749*f8296c60SJoshua M. Clulow 		    &vioblk_dma_attr, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
750*f8296c60SJoshua M. Clulow 		    KM_SLEEP)) == NULL) {
751*f8296c60SJoshua M. Clulow 			goto fail;
7521c5bc425SAlexey Zaytsev 		}
7531c5bc425SAlexey Zaytsev 	}
7541c5bc425SAlexey Zaytsev 
7551c5bc425SAlexey Zaytsev 	return (0);
7561c5bc425SAlexey Zaytsev 
757*f8296c60SJoshua M. Clulow fail:
758*f8296c60SJoshua M. Clulow 	vioblk_free_reqs(vib);
7591c5bc425SAlexey Zaytsev 	return (ENOMEM);
7601c5bc425SAlexey Zaytsev }
7611c5bc425SAlexey Zaytsev 
7621c5bc425SAlexey Zaytsev static int
763*f8296c60SJoshua M. Clulow vioblk_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
7641c5bc425SAlexey Zaytsev {
765*f8296c60SJoshua M. Clulow 	int instance = ddi_get_instance(dip);
766*f8296c60SJoshua M. Clulow 	vioblk_t *vib;
767*f8296c60SJoshua M. Clulow 	virtio_t *vio;
768*f8296c60SJoshua M. Clulow 	boolean_t did_mutex = B_FALSE;
7691c5bc425SAlexey Zaytsev 
770*f8296c60SJoshua M. Clulow 	if (cmd != DDI_ATTACH) {
771d48defc5SHans Rosenfeld 		return (DDI_FAILURE);
7721c5bc425SAlexey Zaytsev 	}
7731c5bc425SAlexey Zaytsev 
774*f8296c60SJoshua M. Clulow 	if ((vio = virtio_init(dip, VIRTIO_BLK_WANTED_FEATURES, B_TRUE)) ==
775*f8296c60SJoshua M. Clulow 	    NULL) {
776*f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "failed to start Virtio init");
777*f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
778*f8296c60SJoshua M. Clulow 	}
7791c5bc425SAlexey Zaytsev 
780*f8296c60SJoshua M. Clulow 	vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
781*f8296c60SJoshua M. Clulow 	vib->vib_dip = dip;
782*f8296c60SJoshua M. Clulow 	vib->vib_virtio = vio;
783*f8296c60SJoshua M. Clulow 	ddi_set_driver_private(dip, vib);
784*f8296c60SJoshua M. Clulow 	list_create(&vib->vib_reqs, sizeof (vioblk_req_t),
785*f8296c60SJoshua M. Clulow 	    offsetof(vioblk_req_t, vbr_link));
7861c5bc425SAlexey Zaytsev 
7871c5bc425SAlexey Zaytsev 	/*
788*f8296c60SJoshua M. Clulow 	 * Determine how many scatter-gather entries we can use in a single
789*f8296c60SJoshua M. Clulow 	 * request.
7901c5bc425SAlexey Zaytsev 	 */
791*f8296c60SJoshua M. Clulow 	vib->vib_seg_max = VIRTIO_BLK_DEFAULT_MAX_SEG;
792*f8296c60SJoshua M. Clulow 	if (virtio_feature_present(vio, VIRTIO_BLK_F_SEG_MAX)) {
793*f8296c60SJoshua M. Clulow 		vib->vib_seg_max = virtio_dev_get32(vio,
7941c5bc425SAlexey Zaytsev 		    VIRTIO_BLK_CONFIG_SEG_MAX);
7951c5bc425SAlexey Zaytsev 
796*f8296c60SJoshua M. Clulow 		if (vib->vib_seg_max == 0 || vib->vib_seg_max == PCI_EINVAL32) {
797*f8296c60SJoshua M. Clulow 			/*
798*f8296c60SJoshua M. Clulow 			 * We need to be able to use at least one data segment,
799*f8296c60SJoshua M. Clulow 			 * so we'll assume that this device is just poorly
800*f8296c60SJoshua M. Clulow 			 * implemented and try for one.
801*f8296c60SJoshua M. Clulow 			 */
802*f8296c60SJoshua M. Clulow 			vib->vib_seg_max = 1;
803*f8296c60SJoshua M. Clulow 		}
804*f8296c60SJoshua M. Clulow 	}
8051c5bc425SAlexey Zaytsev 
8061c5bc425SAlexey Zaytsev 	/*
807*f8296c60SJoshua M. Clulow 	 * When allocating the request queue, we include two additional
808*f8296c60SJoshua M. Clulow 	 * descriptors (beyond those required for request data) to account for
809*f8296c60SJoshua M. Clulow 	 * the header and the status byte.
8101c5bc425SAlexey Zaytsev 	 */
811*f8296c60SJoshua M. Clulow 	if ((vib->vib_vq = virtio_queue_alloc(vio, VIRTIO_BLK_VIRTQ_IO, "io",
812*f8296c60SJoshua M. Clulow 	    vioblk_int_handler, vib, B_FALSE, vib->vib_seg_max + 2)) == NULL) {
813*f8296c60SJoshua M. Clulow 		goto fail;
8141c5bc425SAlexey Zaytsev 	}
8151c5bc425SAlexey Zaytsev 
816*f8296c60SJoshua M. Clulow 	if (virtio_init_complete(vio, 0) != DDI_SUCCESS) {
817*f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "failed to complete Virtio init");
818*f8296c60SJoshua M. Clulow 		goto fail;
8191c5bc425SAlexey Zaytsev 	}
8201c5bc425SAlexey Zaytsev 
821*f8296c60SJoshua M. Clulow 	cv_init(&vib->vib_cv, NULL, CV_DRIVER, NULL);
822*f8296c60SJoshua M. Clulow 	mutex_init(&vib->vib_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio));
823*f8296c60SJoshua M. Clulow 	did_mutex = B_TRUE;
824*f8296c60SJoshua M. Clulow 
825*f8296c60SJoshua M. Clulow 	if ((vib->vib_kstat = kstat_create("vioblk", instance,
826*f8296c60SJoshua M. Clulow 	    "statistics", "controller", KSTAT_TYPE_NAMED,
827*f8296c60SJoshua M. Clulow 	    sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
828*f8296c60SJoshua M. Clulow 	    KSTAT_FLAG_PERSISTENT)) == NULL) {
829*f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "kstat_create failed");
830*f8296c60SJoshua M. Clulow 		goto fail;
831*f8296c60SJoshua M. Clulow 	}
832*f8296c60SJoshua M. Clulow 	vib->vib_stats = (vioblk_stats_t *)vib->vib_kstat->ks_data;
833*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_rw_outofmemory,
834*f8296c60SJoshua M. Clulow 	    "total_rw_outofmemory", KSTAT_DATA_UINT64);
835*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_rw_badoffset,
836*f8296c60SJoshua M. Clulow 	    "total_rw_badoffset", KSTAT_DATA_UINT64);
837*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_intr_total,
838*f8296c60SJoshua M. Clulow 	    "total_intr", KSTAT_DATA_UINT64);
839*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_io_errors,
840*f8296c60SJoshua M. Clulow 	    "total_io_errors", KSTAT_DATA_UINT64);
841*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_unsupp_errors,
842*f8296c60SJoshua M. Clulow 	    "total_unsupp_errors", KSTAT_DATA_UINT64);
843*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_nxio_errors,
844*f8296c60SJoshua M. Clulow 	    "total_nxio_errors", KSTAT_DATA_UINT64);
845*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_rw_cacheflush,
846*f8296c60SJoshua M. Clulow 	    "total_rw_cacheflush", KSTAT_DATA_UINT64);
847*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_rw_cookiesmax,
848*f8296c60SJoshua M. Clulow 	    "max_rw_cookies", KSTAT_DATA_UINT32);
849*f8296c60SJoshua M. Clulow 	kstat_named_init(&vib->vib_stats->vbs_intr_queuemax,
850*f8296c60SJoshua M. Clulow 	    "max_intr_queue", KSTAT_DATA_UINT32);
851*f8296c60SJoshua M. Clulow 	kstat_install(vib->vib_kstat);
852*f8296c60SJoshua M. Clulow 
853*f8296c60SJoshua M. Clulow 	vib->vib_readonly = virtio_feature_present(vio, VIRTIO_BLK_F_RO);
854*f8296c60SJoshua M. Clulow 	if ((vib->vib_nblks = virtio_dev_get64(vio,
855*f8296c60SJoshua M. Clulow 	    VIRTIO_BLK_CONFIG_CAPACITY)) == UINT64_MAX) {
856*f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "invalid capacity");
857*f8296c60SJoshua M. Clulow 		goto fail;
8581c5bc425SAlexey Zaytsev 	}
8591c5bc425SAlexey Zaytsev 
860*f8296c60SJoshua M. Clulow 	/*
861*f8296c60SJoshua M. Clulow 	 * Determine the optimal logical block size recommended by the device.
862*f8296c60SJoshua M. Clulow 	 * This size is advisory; the protocol always deals in 512 byte blocks.
863*f8296c60SJoshua M. Clulow 	 */
864*f8296c60SJoshua M. Clulow 	vib->vib_blk_size = DEV_BSIZE;
865*f8296c60SJoshua M. Clulow 	if (virtio_feature_present(vio, VIRTIO_BLK_F_BLK_SIZE)) {
866*f8296c60SJoshua M. Clulow 		uint32_t v = virtio_dev_get32(vio, VIRTIO_BLK_CONFIG_BLK_SIZE);
8671c5bc425SAlexey Zaytsev 
868*f8296c60SJoshua M. Clulow 		if (v != 0 && v != PCI_EINVAL32) {
869*f8296c60SJoshua M. Clulow 			vib->vib_blk_size = v;
870*f8296c60SJoshua M. Clulow 		}
871*f8296c60SJoshua M. Clulow 	}
8721c5bc425SAlexey Zaytsev 
873*f8296c60SJoshua M. Clulow 	/*
874*f8296c60SJoshua M. Clulow 	 * The device may also provide an advisory physical block size.
875*f8296c60SJoshua M. Clulow 	 */
876*f8296c60SJoshua M. Clulow 	vib->vib_pblk_size = vib->vib_blk_size;
877*f8296c60SJoshua M. Clulow 	if (virtio_feature_present(vio, VIRTIO_BLK_F_TOPOLOGY)) {
878*f8296c60SJoshua M. Clulow 		uint8_t v = virtio_dev_get8(vio, VIRTIO_BLK_CONFIG_TOPO_PBEXP);
8791c5bc425SAlexey Zaytsev 
880*f8296c60SJoshua M. Clulow 		if (v != PCI_EINVAL8) {
881*f8296c60SJoshua M. Clulow 			vib->vib_pblk_size <<= v;
882*f8296c60SJoshua M. Clulow 		}
883*f8296c60SJoshua M. Clulow 	}
8841c5bc425SAlexey Zaytsev 
885*f8296c60SJoshua M. Clulow 	/*
886*f8296c60SJoshua M. Clulow 	 * The maximum size for a cookie in a request.
887*f8296c60SJoshua M. Clulow 	 */
888*f8296c60SJoshua M. Clulow 	vib->vib_seg_size_max = VIRTIO_BLK_DEFAULT_MAX_SIZE;
889*f8296c60SJoshua M. Clulow 	if (virtio_feature_present(vio, VIRTIO_BLK_F_SIZE_MAX)) {
890*f8296c60SJoshua M. Clulow 		uint32_t v = virtio_dev_get32(vio, VIRTIO_BLK_CONFIG_SIZE_MAX);
891*f8296c60SJoshua M. Clulow 
892*f8296c60SJoshua M. Clulow 		if (v != 0 && v != PCI_EINVAL32) {
893*f8296c60SJoshua M. Clulow 			vib->vib_seg_size_max = v;
894*f8296c60SJoshua M. Clulow 		}
895*f8296c60SJoshua M. Clulow 	}
896*f8296c60SJoshua M. Clulow 
897*f8296c60SJoshua M. Clulow 	/*
898*f8296c60SJoshua M. Clulow 	 * Set up the DMA attributes for blkdev to use for request data.  The
899*f8296c60SJoshua M. Clulow 	 * specification is not extremely clear about whether DMA-related
900*f8296c60SJoshua M. Clulow 	 * parameters include or exclude the header and status descriptors.
901*f8296c60SJoshua M. Clulow 	 * For now, we assume they cover only the request data and not the
902*f8296c60SJoshua M. Clulow 	 * headers.
903*f8296c60SJoshua M. Clulow 	 */
904*f8296c60SJoshua M. Clulow 	vib->vib_bd_dma_attr = vioblk_dma_attr;
905*f8296c60SJoshua M. Clulow 	vib->vib_bd_dma_attr.dma_attr_sgllen = vib->vib_seg_max;
906*f8296c60SJoshua M. Clulow 	vib->vib_bd_dma_attr.dma_attr_count_max = vib->vib_seg_size_max;
907*f8296c60SJoshua M. Clulow 	vib->vib_bd_dma_attr.dma_attr_maxxfer = vib->vib_seg_max *
908*f8296c60SJoshua M. Clulow 	    vib->vib_seg_size_max;
909*f8296c60SJoshua M. Clulow 
910*f8296c60SJoshua M. Clulow 	if (vioblk_alloc_reqs(vib) != 0) {
911*f8296c60SJoshua M. Clulow 		goto fail;
912*f8296c60SJoshua M. Clulow 	}
913*f8296c60SJoshua M. Clulow 
914*f8296c60SJoshua M. Clulow 	/*
915*f8296c60SJoshua M. Clulow 	 * The blkdev framework does not provide a way to specify that the
916*f8296c60SJoshua M. Clulow 	 * device does not support write cache flushing, except by omitting the
917*f8296c60SJoshua M. Clulow 	 * "o_sync_cache" member from the ops vector.  As "bd_alloc_handle()"
918*f8296c60SJoshua M. Clulow 	 * makes a copy of the ops vector, we can safely assemble one on the
919*f8296c60SJoshua M. Clulow 	 * stack based on negotiated features.
920*f8296c60SJoshua M. Clulow 	 */
921*f8296c60SJoshua M. Clulow 	bd_ops_t vioblk_bd_ops = {
922*f8296c60SJoshua M. Clulow 		.o_version =		BD_OPS_VERSION_0,
923*f8296c60SJoshua M. Clulow 		.o_drive_info =		vioblk_bd_driveinfo,
924*f8296c60SJoshua M. Clulow 		.o_media_info =		vioblk_bd_mediainfo,
925*f8296c60SJoshua M. Clulow 		.o_devid_init =		vioblk_bd_devid,
926*f8296c60SJoshua M. Clulow 		.o_sync_cache =		vioblk_bd_flush,
927*f8296c60SJoshua M. Clulow 		.o_read =		vioblk_bd_read,
928*f8296c60SJoshua M. Clulow 		.o_write =		vioblk_bd_write,
929*f8296c60SJoshua M. Clulow 	};
930*f8296c60SJoshua M. Clulow 	if (!virtio_feature_present(vio, VIRTIO_BLK_F_FLUSH)) {
931*f8296c60SJoshua M. Clulow 		vioblk_bd_ops.o_sync_cache = NULL;
932*f8296c60SJoshua M. Clulow 	}
933*f8296c60SJoshua M. Clulow 
934*f8296c60SJoshua M. Clulow 	vib->vib_bd_h = bd_alloc_handle(vib, &vioblk_bd_ops,
935*f8296c60SJoshua M. Clulow 	    &vib->vib_bd_dma_attr, KM_SLEEP);
936*f8296c60SJoshua M. Clulow 
937*f8296c60SJoshua M. Clulow 	/*
938*f8296c60SJoshua M. Clulow 	 * Enable interrupts now so that we can request the device identity.
939*f8296c60SJoshua M. Clulow 	 */
940*f8296c60SJoshua M. Clulow 	if (virtio_interrupts_enable(vio) != DDI_SUCCESS) {
941*f8296c60SJoshua M. Clulow 		goto fail;
942*f8296c60SJoshua M. Clulow 	}
943*f8296c60SJoshua M. Clulow 
944*f8296c60SJoshua M. Clulow 	vioblk_get_id(vib);
945*f8296c60SJoshua M. Clulow 
946*f8296c60SJoshua M. Clulow 	if (bd_attach_handle(dip, vib->vib_bd_h) != DDI_SUCCESS) {
947*f8296c60SJoshua M. Clulow 		dev_err(dip, CE_WARN, "Failed to attach blkdev");
948*f8296c60SJoshua M. Clulow 		goto fail;
9491c5bc425SAlexey Zaytsev 	}
9501c5bc425SAlexey Zaytsev 
9511c5bc425SAlexey Zaytsev 	return (DDI_SUCCESS);
9521c5bc425SAlexey Zaytsev 
953*f8296c60SJoshua M. Clulow fail:
954*f8296c60SJoshua M. Clulow 	if (vib->vib_bd_h != NULL) {
955*f8296c60SJoshua M. Clulow 		(void) bd_detach_handle(vib->vib_bd_h);
956*f8296c60SJoshua M. Clulow 		bd_free_handle(vib->vib_bd_h);
957*f8296c60SJoshua M. Clulow 	}
958*f8296c60SJoshua M. Clulow 	if (vio != NULL) {
959*f8296c60SJoshua M. Clulow 		(void) virtio_fini(vio, B_TRUE);
960*f8296c60SJoshua M. Clulow 	}
961*f8296c60SJoshua M. Clulow 	if (did_mutex) {
962*f8296c60SJoshua M. Clulow 		mutex_destroy(&vib->vib_mutex);
963*f8296c60SJoshua M. Clulow 		cv_destroy(&vib->vib_cv);
964*f8296c60SJoshua M. Clulow 	}
965*f8296c60SJoshua M. Clulow 	if (vib->vib_kstat != NULL) {
966*f8296c60SJoshua M. Clulow 		kstat_delete(vib->vib_kstat);
967*f8296c60SJoshua M. Clulow 	}
968*f8296c60SJoshua M. Clulow 	vioblk_free_reqs(vib);
969*f8296c60SJoshua M. Clulow 	kmem_free(vib, sizeof (*vib));
970d48defc5SHans Rosenfeld 	return (DDI_FAILURE);
9711c5bc425SAlexey Zaytsev }
9721c5bc425SAlexey Zaytsev 
9731c5bc425SAlexey Zaytsev static int
974*f8296c60SJoshua M. Clulow vioblk_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
9751c5bc425SAlexey Zaytsev {
976*f8296c60SJoshua M. Clulow 	vioblk_t *vib = ddi_get_driver_private(dip);
9771c5bc425SAlexey Zaytsev 
978*f8296c60SJoshua M. Clulow 	if (cmd != DDI_DETACH) {
9791c5bc425SAlexey Zaytsev 		return (DDI_FAILURE);
9801c5bc425SAlexey Zaytsev 	}
9811c5bc425SAlexey Zaytsev 
982*f8296c60SJoshua M. Clulow 	mutex_enter(&vib->vib_mutex);
983*f8296c60SJoshua M. Clulow 	if (vib->vib_nreqs_alloc > 0) {
984*f8296c60SJoshua M. Clulow 		/*
985*f8296c60SJoshua M. Clulow 		 * Cannot detach while there are still outstanding requests.
986*f8296c60SJoshua M. Clulow 		 */
987*f8296c60SJoshua M. Clulow 		mutex_exit(&vib->vib_mutex);
988*f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
989*f8296c60SJoshua M. Clulow 	}
990*f8296c60SJoshua M. Clulow 
991*f8296c60SJoshua M. Clulow 	if (bd_detach_handle(vib->vib_bd_h) != DDI_SUCCESS) {
992*f8296c60SJoshua M. Clulow 		mutex_exit(&vib->vib_mutex);
993*f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
994*f8296c60SJoshua M. Clulow 	}
995*f8296c60SJoshua M. Clulow 
996*f8296c60SJoshua M. Clulow 	/*
997*f8296c60SJoshua M. Clulow 	 * Tear down the Virtio framework before freeing the rest of the
998*f8296c60SJoshua M. Clulow 	 * resources.  This will ensure the interrupt handlers are no longer
999*f8296c60SJoshua M. Clulow 	 * running.
1000*f8296c60SJoshua M. Clulow 	 */
1001*f8296c60SJoshua M. Clulow 	virtio_fini(vib->vib_virtio, B_FALSE);
1002*f8296c60SJoshua M. Clulow 
1003*f8296c60SJoshua M. Clulow 	vioblk_free_reqs(vib);
1004*f8296c60SJoshua M. Clulow 	kstat_delete(vib->vib_kstat);
1005*f8296c60SJoshua M. Clulow 
1006*f8296c60SJoshua M. Clulow 	mutex_exit(&vib->vib_mutex);
1007*f8296c60SJoshua M. Clulow 	mutex_destroy(&vib->vib_mutex);
1008*f8296c60SJoshua M. Clulow 
1009*f8296c60SJoshua M. Clulow 	kmem_free(vib, sizeof (*vib));
10101c5bc425SAlexey Zaytsev 
10111c5bc425SAlexey Zaytsev 	return (DDI_SUCCESS);
10121c5bc425SAlexey Zaytsev }
10131c5bc425SAlexey Zaytsev 
10141c5bc425SAlexey Zaytsev static int
1015*f8296c60SJoshua M. Clulow vioblk_quiesce(dev_info_t *dip)
10161c5bc425SAlexey Zaytsev {
1017*f8296c60SJoshua M. Clulow 	vioblk_t *vib;
10181c5bc425SAlexey Zaytsev 
1019*f8296c60SJoshua M. Clulow 	if ((vib = ddi_get_driver_private(dip)) == NULL) {
1020*f8296c60SJoshua M. Clulow 		return (DDI_FAILURE);
1021*f8296c60SJoshua M. Clulow 	}
10221c5bc425SAlexey Zaytsev 
1023*f8296c60SJoshua M. Clulow 	return (virtio_quiesce(vib->vib_virtio));
10241c5bc425SAlexey Zaytsev }
10251c5bc425SAlexey Zaytsev 
10261c5bc425SAlexey Zaytsev int
10271c5bc425SAlexey Zaytsev _init(void)
10281c5bc425SAlexey Zaytsev {
10291c5bc425SAlexey Zaytsev 	int rv;
10301c5bc425SAlexey Zaytsev 
10311c5bc425SAlexey Zaytsev 	bd_mod_init(&vioblk_dev_ops);
10321c5bc425SAlexey Zaytsev 
1033*f8296c60SJoshua M. Clulow 	if ((rv = mod_install(&vioblk_modlinkage)) != 0) {
10341c5bc425SAlexey Zaytsev 		bd_mod_fini(&vioblk_dev_ops);
10351c5bc425SAlexey Zaytsev 	}
10361c5bc425SAlexey Zaytsev 
10371c5bc425SAlexey Zaytsev 	return (rv);
10381c5bc425SAlexey Zaytsev }
10391c5bc425SAlexey Zaytsev 
10401c5bc425SAlexey Zaytsev int
10411c5bc425SAlexey Zaytsev _fini(void)
10421c5bc425SAlexey Zaytsev {
10431c5bc425SAlexey Zaytsev 	int rv;
10441c5bc425SAlexey Zaytsev 
1045*f8296c60SJoshua M. Clulow 	if ((rv = mod_remove(&vioblk_modlinkage)) == 0) {
10461c5bc425SAlexey Zaytsev 		bd_mod_fini(&vioblk_dev_ops);
10471c5bc425SAlexey Zaytsev 	}
10481c5bc425SAlexey Zaytsev 
10491c5bc425SAlexey Zaytsev 	return (rv);
10501c5bc425SAlexey Zaytsev }
10511c5bc425SAlexey Zaytsev 
10521c5bc425SAlexey Zaytsev int
10531c5bc425SAlexey Zaytsev _info(struct modinfo *modinfop)
10541c5bc425SAlexey Zaytsev {
1055*f8296c60SJoshua M. Clulow 	return (mod_info(&vioblk_modlinkage, modinfop));
10561c5bc425SAlexey Zaytsev }
1057