xref: /illumos-gate/usr/src/uts/common/io/vioblk/vioblk.c (revision 3e0831a90c729a9b8266ce68233ba63a32ffaa4c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
25  */
26 
27 
28 #include <sys/modctl.h>
29 #include <sys/blkdev.h>
30 #include <sys/types.h>
31 #include <sys/errno.h>
32 #include <sys/param.h>
33 #include <sys/stropts.h>
34 #include <sys/stream.h>
35 #include <sys/strsubr.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/devops.h>
39 #include <sys/ksynch.h>
40 #include <sys/stat.h>
41 #include <sys/modctl.h>
42 #include <sys/debug.h>
43 #include <sys/pci.h>
44 #include <sys/sysmacros.h>
45 #include "virtiovar.h"
46 #include "virtioreg.h"
47 
48 /* Feature bits */
49 #define	VIRTIO_BLK_F_BARRIER	(1<<0)
50 #define	VIRTIO_BLK_F_SIZE_MAX	(1<<1)
51 #define	VIRTIO_BLK_F_SEG_MAX	(1<<2)
52 #define	VIRTIO_BLK_F_GEOMETRY	(1<<4)
53 #define	VIRTIO_BLK_F_RO		(1<<5)
54 #define	VIRTIO_BLK_F_BLK_SIZE	(1<<6)
55 #define	VIRTIO_BLK_F_SCSI	(1<<7)
56 #define	VIRTIO_BLK_F_FLUSH	(1<<9)
57 #define	VIRTIO_BLK_F_TOPOLOGY	(1<<10)
58 
59 /* Configuration registers */
60 #define	VIRTIO_BLK_CONFIG_CAPACITY	0 /* 64bit */
61 #define	VIRTIO_BLK_CONFIG_SIZE_MAX	8 /* 32bit */
62 #define	VIRTIO_BLK_CONFIG_SEG_MAX	12 /* 32bit */
63 #define	VIRTIO_BLK_CONFIG_GEOMETRY_C	16 /* 16bit */
64 #define	VIRTIO_BLK_CONFIG_GEOMETRY_H	18 /* 8bit */
65 #define	VIRTIO_BLK_CONFIG_GEOMETRY_S	19 /* 8bit */
66 #define	VIRTIO_BLK_CONFIG_BLK_SIZE	20 /* 32bit */
67 #define	VIRTIO_BLK_CONFIG_TOPO_PBEXP	24 /* 8bit */
68 #define	VIRTIO_BLK_CONFIG_TOPO_ALIGN	25 /* 8bit */
69 #define	VIRTIO_BLK_CONFIG_TOPO_MIN_SZ	26 /* 16bit */
70 #define	VIRTIO_BLK_CONFIG_TOPO_OPT_SZ	28 /* 32bit */
71 
72 /* Command */
73 #define	VIRTIO_BLK_T_IN			0
74 #define	VIRTIO_BLK_T_OUT		1
75 #define	VIRTIO_BLK_T_SCSI_CMD		2
76 #define	VIRTIO_BLK_T_SCSI_CMD_OUT	3
77 #define	VIRTIO_BLK_T_FLUSH		4
78 #define	VIRTIO_BLK_T_FLUSH_OUT		5
79 #define	VIRTIO_BLK_T_GET_ID		8
80 #define	VIRTIO_BLK_T_BARRIER		0x80000000
81 
82 #define	VIRTIO_BLK_ID_BYTES	20 /* devid */
83 
84 /* Statuses */
85 #define	VIRTIO_BLK_S_OK		0
86 #define	VIRTIO_BLK_S_IOERR	1
87 #define	VIRTIO_BLK_S_UNSUPP	2
88 
89 #define	DEF_MAXINDIRECT		(128)
90 #define	DEF_MAXSECTOR		(4096)
91 
92 #define	VIOBLK_POISON		0xdead0001dead0001
93 
94 /*
95  * Static Variables.
96  */
97 static char vioblk_ident[] = "VirtIO block driver";
98 
99 /* Request header structure */
100 struct vioblk_req_hdr {
101 	uint32_t		type;   /* VIRTIO_BLK_T_* */
102 	uint32_t		ioprio;
103 	uint64_t		sector;
104 };
105 
106 struct vioblk_req {
107 	struct vioblk_req_hdr	hdr;
108 	uint8_t			status;
109 	uint8_t			unused[3];
110 	unsigned int		ndmac;
111 	ddi_dma_handle_t	dmah;
112 	ddi_dma_handle_t	bd_dmah;
113 	ddi_dma_cookie_t	dmac;
114 	bd_xfer_t		*xfer;
115 };
116 
117 struct vioblk_stats {
118 	struct kstat_named	sts_rw_outofmemory;
119 	struct kstat_named	sts_rw_badoffset;
120 	struct kstat_named	sts_rw_queuemax;
121 	struct kstat_named	sts_rw_cookiesmax;
122 	struct kstat_named	sts_rw_cacheflush;
123 	struct kstat_named	sts_intr_queuemax;
124 	struct kstat_named	sts_intr_total;
125 	struct kstat_named	sts_io_errors;
126 	struct kstat_named	sts_unsupp_errors;
127 	struct kstat_named	sts_nxio_errors;
128 };
129 
130 struct vioblk_lstats {
131 	uint64_t		rw_cacheflush;
132 	uint64_t		intr_total;
133 	unsigned int		rw_cookiesmax;
134 	unsigned int		intr_queuemax;
135 	unsigned int		io_errors;
136 	unsigned int		unsupp_errors;
137 	unsigned int		nxio_errors;
138 };
139 
140 struct vioblk_softc {
141 	dev_info_t		*sc_dev; /* mirrors virtio_softc->sc_dev */
142 	struct virtio_softc	sc_virtio;
143 	struct virtqueue	*sc_vq;
144 	bd_handle_t		bd_h;
145 	struct vioblk_req	*sc_reqs;
146 	struct vioblk_stats	*ks_data;
147 	kstat_t			*sc_intrstat;
148 	uint64_t		sc_capacity;
149 	uint64_t		sc_nblks;
150 	struct vioblk_lstats	sc_stats;
151 	short			sc_blkflags;
152 	boolean_t		sc_in_poll_mode;
153 	boolean_t		sc_readonly;
154 	int			sc_blk_size;
155 	int			sc_pblk_size;
156 	int			sc_seg_max;
157 	int			sc_seg_size_max;
158 	kmutex_t		lock_devid;
159 	kcondvar_t		cv_devid;
160 	char			devid[VIRTIO_BLK_ID_BYTES + 1];
161 };
162 
163 static int vioblk_read(void *arg, bd_xfer_t *xfer);
164 static int vioblk_write(void *arg, bd_xfer_t *xfer);
165 static int vioblk_flush(void *arg, bd_xfer_t *xfer);
166 static void vioblk_driveinfo(void *arg, bd_drive_t *drive);
167 static int vioblk_mediainfo(void *arg, bd_media_t *media);
168 static int vioblk_devid_init(void *, dev_info_t *, ddi_devid_t *);
169 uint_t vioblk_int_handler(caddr_t arg1, caddr_t arg2);
170 
171 static bd_ops_t vioblk_ops = {
172 	BD_OPS_VERSION_0,
173 	vioblk_driveinfo,
174 	vioblk_mediainfo,
175 	vioblk_devid_init,
176 	vioblk_flush,
177 	vioblk_read,
178 	vioblk_write,
179 };
180 
181 static int vioblk_quiesce(dev_info_t *);
182 static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
183 static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
184 
185 static struct dev_ops vioblk_dev_ops = {
186 	DEVO_REV,
187 	0,
188 	ddi_no_info,
189 	nulldev,	/* identify */
190 	nulldev,	/* probe */
191 	vioblk_attach,	/* attach */
192 	vioblk_detach,	/* detach */
193 	nodev,		/* reset */
194 	NULL,		/* cb_ops */
195 	NULL,		/* bus_ops */
196 	NULL,		/* power */
197 	vioblk_quiesce	/* quiesce */
198 };
199 
200 
201 
202 /* Standard Module linkage initialization for a Streams driver */
203 extern struct mod_ops mod_driverops;
204 
205 static struct modldrv modldrv = {
206 	&mod_driverops,		/* Type of module.  This one is a driver */
207 	vioblk_ident,    /* short description */
208 	&vioblk_dev_ops	/* driver specific ops */
209 };
210 
211 static struct modlinkage modlinkage = {
212 	MODREV_1,
213 	{
214 		(void *)&modldrv,
215 		NULL,
216 	},
217 };
218 
219 ddi_device_acc_attr_t vioblk_attr = {
220 	DDI_DEVICE_ATTR_V0,
221 	DDI_NEVERSWAP_ACC,	/* virtio is always native byte order */
222 	DDI_STORECACHING_OK_ACC,
223 	DDI_DEFAULT_ACC
224 };
225 
226 /* DMA attr for the header/status blocks. */
227 static ddi_dma_attr_t vioblk_req_dma_attr = {
228 	DMA_ATTR_V0,			/* dma_attr version	*/
229 	0,				/* dma_attr_addr_lo	*/
230 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
231 	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
232 	1,				/* dma_attr_align	*/
233 	1,				/* dma_attr_burstsizes	*/
234 	1,				/* dma_attr_minxfer	*/
235 	0xFFFFFFFFull,			/* dma_attr_maxxfer	*/
236 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
237 	1,				/* dma_attr_sgllen	*/
238 	1,				/* dma_attr_granular	*/
239 	0,				/* dma_attr_flags	*/
240 };
241 
242 /* DMA attr for the data blocks. */
243 static ddi_dma_attr_t vioblk_bd_dma_attr = {
244 	DMA_ATTR_V0,			/* dma_attr version	*/
245 	0,				/* dma_attr_addr_lo	*/
246 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
247 	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
248 	1,				/* dma_attr_align	*/
249 	1,				/* dma_attr_burstsizes	*/
250 	1,				/* dma_attr_minxfer	*/
251 	0,				/* dma_attr_maxxfer, set in attach */
252 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
253 	0,				/* dma_attr_sgllen, set in attach */
254 	1,				/* dma_attr_granular	*/
255 	0,				/* dma_attr_flags	*/
256 };
257 
258 static int
259 vioblk_rw(struct vioblk_softc *sc, bd_xfer_t *xfer, int type,
260     uint32_t len)
261 {
262 	struct vioblk_req *req;
263 	struct vq_entry *ve_hdr;
264 	int total_cookies, write;
265 
266 	write = (type == VIRTIO_BLK_T_OUT ||
267 	    type == VIRTIO_BLK_T_FLUSH_OUT) ? 1 : 0;
268 	total_cookies = 2;
269 
270 	if ((xfer->x_blkno + xfer->x_nblks) > sc->sc_nblks) {
271 		sc->ks_data->sts_rw_badoffset.value.ui64++;
272 		return (EINVAL);
273 	}
274 
275 	/* allocate top entry */
276 	ve_hdr = vq_alloc_entry(sc->sc_vq);
277 	if (!ve_hdr) {
278 		sc->ks_data->sts_rw_outofmemory.value.ui64++;
279 		return (ENOMEM);
280 	}
281 
282 	/* getting request */
283 	req = &sc->sc_reqs[ve_hdr->qe_index];
284 	req->hdr.type = type;
285 	req->hdr.ioprio = 0;
286 	req->hdr.sector = xfer->x_blkno;
287 	req->xfer = xfer;
288 
289 	/* Header */
290 	virtio_ve_add_indirect_buf(ve_hdr, req->dmac.dmac_laddress,
291 	    sizeof (struct vioblk_req_hdr), B_TRUE);
292 
293 	/* Payload */
294 	if (len > 0) {
295 		virtio_ve_add_cookie(ve_hdr, xfer->x_dmah, xfer->x_dmac,
296 		    xfer->x_ndmac, write ? B_TRUE : B_FALSE);
297 		total_cookies += xfer->x_ndmac;
298 	}
299 
300 	/* Status */
301 	virtio_ve_add_indirect_buf(ve_hdr,
302 	    req->dmac.dmac_laddress + sizeof (struct vioblk_req_hdr),
303 	    sizeof (uint8_t), B_FALSE);
304 
305 	/* sending the whole chain to the device */
306 	virtio_push_chain(ve_hdr, B_TRUE);
307 
308 	if (sc->sc_stats.rw_cookiesmax < total_cookies)
309 		sc->sc_stats.rw_cookiesmax = total_cookies;
310 
311 	return (DDI_SUCCESS);
312 }
313 
314 /*
315  * Now in polling mode. Interrupts are off, so we
316  * 1) poll for the already queued requests to complete.
317  * 2) push our request.
318  * 3) wait for our request to complete.
319  */
320 static int
321 vioblk_rw_poll(struct vioblk_softc *sc, bd_xfer_t *xfer,
322     int type, uint32_t len)
323 {
324 	clock_t tmout;
325 	int ret;
326 
327 	ASSERT(xfer->x_flags & BD_XFER_POLL);
328 
329 	/* Prevent a hard hang. */
330 	tmout = drv_usectohz(30000000);
331 
332 	/* Poll for an empty queue */
333 	while (vq_num_used(sc->sc_vq)) {
334 		/* Check if any pending requests completed. */
335 		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
336 		if (ret != DDI_INTR_CLAIMED) {
337 			drv_usecwait(10);
338 			tmout -= 10;
339 			return (ETIMEDOUT);
340 		}
341 	}
342 
343 	ret = vioblk_rw(sc, xfer, type, len);
344 	if (ret)
345 		return (ret);
346 
347 	tmout = drv_usectohz(30000000);
348 	/* Poll for an empty queue again. */
349 	while (vq_num_used(sc->sc_vq)) {
350 		/* Check if any pending requests completed. */
351 		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
352 		if (ret != DDI_INTR_CLAIMED) {
353 			drv_usecwait(10);
354 			tmout -= 10;
355 			return (ETIMEDOUT);
356 		}
357 	}
358 
359 	return (DDI_SUCCESS);
360 }
361 
362 static int
363 vioblk_read(void *arg, bd_xfer_t *xfer)
364 {
365 	int ret;
366 	struct vioblk_softc *sc = (void *)arg;
367 
368 	if (xfer->x_flags & BD_XFER_POLL) {
369 		if (!sc->sc_in_poll_mode) {
370 			virtio_stop_vq_intr(sc->sc_vq);
371 			sc->sc_in_poll_mode = 1;
372 		}
373 
374 		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_IN,
375 		    xfer->x_nblks * DEV_BSIZE);
376 	} else {
377 		if (sc->sc_in_poll_mode) {
378 			virtio_start_vq_intr(sc->sc_vq);
379 			sc->sc_in_poll_mode = 0;
380 		}
381 
382 		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_IN,
383 		    xfer->x_nblks * DEV_BSIZE);
384 	}
385 
386 	return (ret);
387 }
388 
389 static int
390 vioblk_write(void *arg, bd_xfer_t *xfer)
391 {
392 	int ret;
393 	struct vioblk_softc *sc = (void *)arg;
394 
395 	if (xfer->x_flags & BD_XFER_POLL) {
396 		if (!sc->sc_in_poll_mode) {
397 			virtio_stop_vq_intr(sc->sc_vq);
398 			sc->sc_in_poll_mode = 1;
399 		}
400 
401 		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_OUT,
402 		    xfer->x_nblks * DEV_BSIZE);
403 	} else {
404 		if (sc->sc_in_poll_mode) {
405 			virtio_start_vq_intr(sc->sc_vq);
406 			sc->sc_in_poll_mode = 0;
407 		}
408 
409 		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_OUT,
410 		    xfer->x_nblks * DEV_BSIZE);
411 	}
412 	return (ret);
413 }
414 
415 static int
416 vioblk_flush(void *arg, bd_xfer_t *xfer)
417 {
418 	int ret;
419 	struct vioblk_softc *sc = (void *)arg;
420 
421 	ASSERT((xfer->x_flags & BD_XFER_POLL) == 0);
422 
423 	ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_FLUSH_OUT,
424 	    xfer->x_nblks * DEV_BSIZE);
425 
426 	if (!ret)
427 		sc->sc_stats.rw_cacheflush++;
428 
429 	return (ret);
430 }
431 
432 
433 static void
434 vioblk_driveinfo(void *arg, bd_drive_t *drive)
435 {
436 	struct vioblk_softc *sc = (void *)arg;
437 
438 	drive->d_qsize = sc->sc_vq->vq_num;
439 	drive->d_removable = B_FALSE;
440 	drive->d_hotpluggable = B_TRUE;
441 	drive->d_target = 0;
442 	drive->d_lun = 0;
443 }
444 
445 static int
446 vioblk_mediainfo(void *arg, bd_media_t *media)
447 {
448 	struct vioblk_softc *sc = (void *)arg;
449 
450 	media->m_nblks = sc->sc_nblks;
451 	media->m_blksize = sc->sc_blk_size;
452 	media->m_readonly = sc->sc_readonly;
453 	media->m_pblksize = sc->sc_pblk_size;
454 	return (0);
455 }
456 
457 static int
458 vioblk_devid_init(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
459 {
460 	struct vioblk_softc *sc = (void *)arg;
461 	clock_t deadline;
462 	int ret;
463 	bd_xfer_t xfer;
464 
465 	deadline = ddi_get_lbolt() + (clock_t)drv_usectohz(3 * 1000000);
466 	(void) memset(&xfer, 0, sizeof (bd_xfer_t));
467 	xfer.x_nblks = 1;
468 
469 	ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_bd_dma_attr,
470 	    DDI_DMA_SLEEP, NULL, &xfer.x_dmah);
471 	if (ret != DDI_SUCCESS)
472 		goto out_alloc;
473 
474 	ret = ddi_dma_addr_bind_handle(xfer.x_dmah, NULL, (caddr_t)&sc->devid,
475 	    VIRTIO_BLK_ID_BYTES, DDI_DMA_READ | DDI_DMA_CONSISTENT,
476 	    DDI_DMA_SLEEP, NULL, &xfer.x_dmac, &xfer.x_ndmac);
477 	if (ret != DDI_DMA_MAPPED) {
478 		ret = DDI_FAILURE;
479 		goto out_map;
480 	}
481 
482 	mutex_enter(&sc->lock_devid);
483 
484 	ret = vioblk_rw(sc, &xfer, VIRTIO_BLK_T_GET_ID,
485 	    VIRTIO_BLK_ID_BYTES);
486 	if (ret) {
487 		mutex_exit(&sc->lock_devid);
488 		goto out_rw;
489 	}
490 
491 	/* wait for reply */
492 	ret = cv_timedwait(&sc->cv_devid, &sc->lock_devid, deadline);
493 	mutex_exit(&sc->lock_devid);
494 
495 	(void) ddi_dma_unbind_handle(xfer.x_dmah);
496 	ddi_dma_free_handle(&xfer.x_dmah);
497 
498 	/* timeout */
499 	if (ret < 0) {
500 		dev_err(devinfo, CE_WARN, "Cannot get devid from the device");
501 		return (DDI_FAILURE);
502 	}
503 
504 	ret = ddi_devid_init(devinfo, DEVID_ATA_SERIAL,
505 	    VIRTIO_BLK_ID_BYTES, sc->devid, devid);
506 	if (ret != DDI_SUCCESS) {
507 		dev_err(devinfo, CE_WARN, "Cannot build devid from the device");
508 		return (ret);
509 	}
510 
511 	dev_debug(sc->sc_dev, CE_NOTE,
512 	    "devid %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x",
513 	    sc->devid[0], sc->devid[1], sc->devid[2], sc->devid[3],
514 	    sc->devid[4], sc->devid[5], sc->devid[6], sc->devid[7],
515 	    sc->devid[8], sc->devid[9], sc->devid[10], sc->devid[11],
516 	    sc->devid[12], sc->devid[13], sc->devid[14], sc->devid[15],
517 	    sc->devid[16], sc->devid[17], sc->devid[18], sc->devid[19]);
518 
519 	return (0);
520 
521 out_rw:
522 	(void) ddi_dma_unbind_handle(xfer.x_dmah);
523 out_map:
524 	ddi_dma_free_handle(&xfer.x_dmah);
525 out_alloc:
526 	return (ret);
527 }
528 
529 static void
530 vioblk_show_features(struct vioblk_softc *sc, const char *prefix,
531     uint32_t features)
532 {
533 	char buf[512];
534 	char *bufp = buf;
535 	char *bufend = buf + sizeof (buf);
536 
537 	/* LINTED E_PTRDIFF_OVERFLOW */
538 	bufp += snprintf(bufp, bufend - bufp, prefix);
539 
540 	/* LINTED E_PTRDIFF_OVERFLOW */
541 	bufp += virtio_show_features(features, bufp, bufend - bufp);
542 
543 
544 	/* LINTED E_PTRDIFF_OVERFLOW */
545 	bufp += snprintf(bufp, bufend - bufp, "Vioblk ( ");
546 
547 	if (features & VIRTIO_BLK_F_BARRIER)
548 		/* LINTED E_PTRDIFF_OVERFLOW */
549 		bufp += snprintf(bufp, bufend - bufp, "BARRIER ");
550 	if (features & VIRTIO_BLK_F_SIZE_MAX)
551 		/* LINTED E_PTRDIFF_OVERFLOW */
552 		bufp += snprintf(bufp, bufend - bufp, "SIZE_MAX ");
553 	if (features & VIRTIO_BLK_F_SEG_MAX)
554 		/* LINTED E_PTRDIFF_OVERFLOW */
555 		bufp += snprintf(bufp, bufend - bufp, "SEG_MAX ");
556 	if (features & VIRTIO_BLK_F_GEOMETRY)
557 		/* LINTED E_PTRDIFF_OVERFLOW */
558 		bufp += snprintf(bufp, bufend - bufp, "GEOMETRY ");
559 	if (features & VIRTIO_BLK_F_RO)
560 		/* LINTED E_PTRDIFF_OVERFLOW */
561 		bufp += snprintf(bufp, bufend - bufp, "RO ");
562 	if (features & VIRTIO_BLK_F_BLK_SIZE)
563 		/* LINTED E_PTRDIFF_OVERFLOW */
564 		bufp += snprintf(bufp, bufend - bufp, "BLK_SIZE ");
565 	if (features & VIRTIO_BLK_F_SCSI)
566 		/* LINTED E_PTRDIFF_OVERFLOW */
567 		bufp += snprintf(bufp, bufend - bufp, "SCSI ");
568 	if (features & VIRTIO_BLK_F_FLUSH)
569 		/* LINTED E_PTRDIFF_OVERFLOW */
570 		bufp += snprintf(bufp, bufend - bufp, "FLUSH ");
571 	if (features & VIRTIO_BLK_F_TOPOLOGY)
572 		/* LINTED E_PTRDIFF_OVERFLOW */
573 		bufp += snprintf(bufp, bufend - bufp, "TOPOLOGY ");
574 
575 	/* LINTED E_PTRDIFF_OVERFLOW */
576 	bufp += snprintf(bufp, bufend - bufp, ")");
577 	*bufp = '\0';
578 
579 	dev_debug(sc->sc_dev, CE_NOTE, "%s", buf);
580 }
581 
582 static int
583 vioblk_dev_features(struct vioblk_softc *sc)
584 {
585 	uint32_t host_features;
586 
587 	host_features = virtio_negotiate_features(&sc->sc_virtio,
588 	    VIRTIO_BLK_F_RO |
589 	    VIRTIO_BLK_F_GEOMETRY |
590 	    VIRTIO_BLK_F_BLK_SIZE |
591 	    VIRTIO_BLK_F_FLUSH |
592 	    VIRTIO_BLK_F_TOPOLOGY |
593 	    VIRTIO_BLK_F_SEG_MAX |
594 	    VIRTIO_BLK_F_SIZE_MAX |
595 	    VIRTIO_F_RING_INDIRECT_DESC);
596 
597 	vioblk_show_features(sc, "Host features: ", host_features);
598 	vioblk_show_features(sc, "Negotiated features: ",
599 	    sc->sc_virtio.sc_features);
600 
601 	if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
602 		dev_err(sc->sc_dev, CE_NOTE,
603 		    "Host does not support RING_INDIRECT_DESC, bye.");
604 		return (DDI_FAILURE);
605 	}
606 
607 	return (DDI_SUCCESS);
608 }
609 
610 /* ARGSUSED */
611 uint_t
612 vioblk_int_handler(caddr_t arg1, caddr_t arg2)
613 {
614 	struct virtio_softc *vsc = (void *)arg1;
615 	struct vioblk_softc *sc = container_of(vsc,
616 	    struct vioblk_softc, sc_virtio);
617 	struct vq_entry *ve;
618 	uint32_t len;
619 	int i = 0, error;
620 
621 	while ((ve = virtio_pull_chain(sc->sc_vq, &len))) {
622 		struct vioblk_req *req = &sc->sc_reqs[ve->qe_index];
623 		bd_xfer_t *xfer = req->xfer;
624 		uint8_t status = req->status;
625 		uint32_t type = req->hdr.type;
626 
627 		if (req->xfer == (void *)VIOBLK_POISON) {
628 			dev_err(sc->sc_dev, CE_WARN, "Poisoned descriptor!");
629 			virtio_free_chain(ve);
630 			return (DDI_INTR_CLAIMED);
631 		}
632 
633 		req->xfer = (void *) VIOBLK_POISON;
634 
635 		/* Note: blkdev tears down the payload mapping for us. */
636 		virtio_free_chain(ve);
637 
638 		/* returning payload back to blkdev */
639 		switch (status) {
640 			case VIRTIO_BLK_S_OK:
641 				error = 0;
642 				break;
643 			case VIRTIO_BLK_S_IOERR:
644 				error = EIO;
645 				sc->sc_stats.io_errors++;
646 				break;
647 			case VIRTIO_BLK_S_UNSUPP:
648 				sc->sc_stats.unsupp_errors++;
649 				error = ENOTTY;
650 				break;
651 			default:
652 				sc->sc_stats.nxio_errors++;
653 				error = ENXIO;
654 				break;
655 		}
656 
657 		if (type == VIRTIO_BLK_T_GET_ID) {
658 			/* notify devid_init */
659 			mutex_enter(&sc->lock_devid);
660 			cv_broadcast(&sc->cv_devid);
661 			mutex_exit(&sc->lock_devid);
662 		} else
663 			bd_xfer_done(xfer, error);
664 
665 		i++;
666 	}
667 
668 	/* update stats */
669 	if (sc->sc_stats.intr_queuemax < i)
670 		sc->sc_stats.intr_queuemax = i;
671 	sc->sc_stats.intr_total++;
672 
673 	return (DDI_INTR_CLAIMED);
674 }
675 
676 /* ARGSUSED */
677 uint_t
678 vioblk_config_handler(caddr_t arg1, caddr_t arg2)
679 {
680 	return (DDI_INTR_CLAIMED);
681 }
682 
683 static int
684 vioblk_register_ints(struct vioblk_softc *sc)
685 {
686 	int ret;
687 
688 	struct virtio_int_handler vioblk_conf_h = {
689 		vioblk_config_handler
690 	};
691 
692 	struct virtio_int_handler vioblk_vq_h[] = {
693 		{ vioblk_int_handler },
694 		{ NULL },
695 	};
696 
697 	ret = virtio_register_ints(&sc->sc_virtio,
698 	    &vioblk_conf_h, vioblk_vq_h);
699 
700 	return (ret);
701 }
702 
703 static void
704 vioblk_free_reqs(struct vioblk_softc *sc)
705 {
706 	int i, qsize;
707 
708 	qsize = sc->sc_vq->vq_num;
709 
710 	for (i = 0; i < qsize; i++) {
711 		struct vioblk_req *req = &sc->sc_reqs[i];
712 
713 		if (req->ndmac)
714 			(void) ddi_dma_unbind_handle(req->dmah);
715 
716 		if (req->dmah)
717 			ddi_dma_free_handle(&req->dmah);
718 	}
719 
720 	kmem_free(sc->sc_reqs, sizeof (struct vioblk_req) * qsize);
721 }
722 
723 static int
724 vioblk_alloc_reqs(struct vioblk_softc *sc)
725 {
726 	int i, qsize;
727 	int ret;
728 
729 	qsize = sc->sc_vq->vq_num;
730 
731 	sc->sc_reqs = kmem_zalloc(sizeof (struct vioblk_req) * qsize, KM_SLEEP);
732 
733 	for (i = 0; i < qsize; i++) {
734 		struct vioblk_req *req = &sc->sc_reqs[i];
735 
736 		ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_req_dma_attr,
737 		    DDI_DMA_SLEEP, NULL, &req->dmah);
738 		if (ret != DDI_SUCCESS) {
739 
740 			dev_err(sc->sc_dev, CE_WARN,
741 			    "Can't allocate dma handle for req "
742 			    "buffer %d", i);
743 			goto exit;
744 		}
745 
746 		ret = ddi_dma_addr_bind_handle(req->dmah, NULL,
747 		    (caddr_t)&req->hdr,
748 		    sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
749 		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
750 		    NULL, &req->dmac, &req->ndmac);
751 		if (ret != DDI_DMA_MAPPED) {
752 			dev_err(sc->sc_dev, CE_WARN,
753 			    "Can't bind req buffer %d", i);
754 			goto exit;
755 		}
756 	}
757 
758 	return (0);
759 
760 exit:
761 	vioblk_free_reqs(sc);
762 	return (ENOMEM);
763 }
764 
765 
766 static int
767 vioblk_ksupdate(kstat_t *ksp, int rw)
768 {
769 	struct vioblk_softc *sc = ksp->ks_private;
770 
771 	if (rw == KSTAT_WRITE)
772 		return (EACCES);
773 
774 	sc->ks_data->sts_rw_cookiesmax.value.ui32 = sc->sc_stats.rw_cookiesmax;
775 	sc->ks_data->sts_intr_queuemax.value.ui32 = sc->sc_stats.intr_queuemax;
776 	sc->ks_data->sts_unsupp_errors.value.ui32 = sc->sc_stats.unsupp_errors;
777 	sc->ks_data->sts_nxio_errors.value.ui32 = sc->sc_stats.nxio_errors;
778 	sc->ks_data->sts_io_errors.value.ui32 = sc->sc_stats.io_errors;
779 	sc->ks_data->sts_rw_cacheflush.value.ui64 = sc->sc_stats.rw_cacheflush;
780 	sc->ks_data->sts_intr_total.value.ui64 = sc->sc_stats.intr_total;
781 
782 
783 	return (0);
784 }
785 
786 static int
787 vioblk_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
788 {
789 	int ret = DDI_SUCCESS;
790 	int instance;
791 	struct vioblk_softc *sc;
792 	struct virtio_softc *vsc;
793 	struct vioblk_stats *ks_data;
794 
795 	instance = ddi_get_instance(devinfo);
796 
797 	switch (cmd) {
798 	case DDI_ATTACH:
799 		break;
800 
801 	case DDI_RESUME:
802 	case DDI_PM_RESUME:
803 		dev_err(devinfo, CE_WARN, "resume not supported yet");
804 		ret = DDI_FAILURE;
805 		goto exit;
806 
807 	default:
808 		dev_err(devinfo, CE_WARN, "cmd 0x%x not recognized", cmd);
809 		ret = DDI_FAILURE;
810 		goto exit;
811 	}
812 
813 	sc = kmem_zalloc(sizeof (struct vioblk_softc), KM_SLEEP);
814 	ddi_set_driver_private(devinfo, sc);
815 
816 	vsc = &sc->sc_virtio;
817 
818 	/* Duplicate for faster access / less typing */
819 	sc->sc_dev = devinfo;
820 	vsc->sc_dev = devinfo;
821 
822 	cv_init(&sc->cv_devid, NULL, CV_DRIVER, NULL);
823 	mutex_init(&sc->lock_devid, NULL, MUTEX_DRIVER, NULL);
824 
825 	/*
826 	 * Initialize interrupt kstat.  This should not normally fail, since
827 	 * we don't use a persistent stat.  We do it this way to avoid having
828 	 * to test for it at run time on the hot path.
829 	 */
830 	sc->sc_intrstat = kstat_create("vioblk", instance,
831 	    "intrs", "controller", KSTAT_TYPE_NAMED,
832 	    sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
833 	    KSTAT_FLAG_PERSISTENT);
834 	if (sc->sc_intrstat == NULL) {
835 		dev_err(devinfo, CE_WARN, "kstat_create failed");
836 		goto exit_intrstat;
837 	}
838 	ks_data = (struct vioblk_stats *)sc->sc_intrstat->ks_data;
839 	kstat_named_init(&ks_data->sts_rw_outofmemory,
840 	    "total_rw_outofmemory", KSTAT_DATA_UINT64);
841 	kstat_named_init(&ks_data->sts_rw_badoffset,
842 	    "total_rw_badoffset", KSTAT_DATA_UINT64);
843 	kstat_named_init(&ks_data->sts_intr_total,
844 	    "total_intr", KSTAT_DATA_UINT64);
845 	kstat_named_init(&ks_data->sts_io_errors,
846 	    "total_io_errors", KSTAT_DATA_UINT32);
847 	kstat_named_init(&ks_data->sts_unsupp_errors,
848 	    "total_unsupp_errors", KSTAT_DATA_UINT32);
849 	kstat_named_init(&ks_data->sts_nxio_errors,
850 	    "total_nxio_errors", KSTAT_DATA_UINT32);
851 	kstat_named_init(&ks_data->sts_rw_cacheflush,
852 	    "total_rw_cacheflush", KSTAT_DATA_UINT64);
853 	kstat_named_init(&ks_data->sts_rw_cookiesmax,
854 	    "max_rw_cookies", KSTAT_DATA_UINT32);
855 	kstat_named_init(&ks_data->sts_intr_queuemax,
856 	    "max_intr_queue", KSTAT_DATA_UINT32);
857 	sc->ks_data = ks_data;
858 	sc->sc_intrstat->ks_private = sc;
859 	sc->sc_intrstat->ks_update = vioblk_ksupdate;
860 	kstat_install(sc->sc_intrstat);
861 
862 	/* map BAR0 */
863 	ret = ddi_regs_map_setup(devinfo, 1,
864 	    (caddr_t *)&sc->sc_virtio.sc_io_addr,
865 	    0, 0, &vioblk_attr, &sc->sc_virtio.sc_ioh);
866 	if (ret != DDI_SUCCESS) {
867 		dev_err(devinfo, CE_WARN, "unable to map bar0: [%d]", ret);
868 		goto exit_map;
869 	}
870 
871 	virtio_device_reset(&sc->sc_virtio);
872 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
873 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
874 
875 	if (vioblk_register_ints(sc)) {
876 		dev_err(devinfo, CE_WARN, "Unable to add interrupt");
877 		goto exit_int;
878 	}
879 
880 	ret = vioblk_dev_features(sc);
881 	if (ret)
882 		goto exit_features;
883 
884 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_RO)
885 		sc->sc_readonly = B_TRUE;
886 	else
887 		sc->sc_readonly = B_FALSE;
888 
889 	sc->sc_capacity = virtio_read_device_config_8(&sc->sc_virtio,
890 	    VIRTIO_BLK_CONFIG_CAPACITY);
891 	sc->sc_nblks = sc->sc_capacity;
892 
893 	sc->sc_blk_size = DEV_BSIZE;
894 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_BLK_SIZE) {
895 		sc->sc_blk_size = virtio_read_device_config_4(&sc->sc_virtio,
896 		    VIRTIO_BLK_CONFIG_BLK_SIZE);
897 	}
898 
899 	sc->sc_pblk_size = sc->sc_blk_size;
900 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_TOPOLOGY) {
901 		sc->sc_pblk_size <<= virtio_read_device_config_1(&sc->sc_virtio,
902 		    VIRTIO_BLK_CONFIG_TOPO_PBEXP);
903 	}
904 
905 	/* Flushing is not supported. */
906 	if (!(sc->sc_virtio.sc_features & VIRTIO_BLK_F_FLUSH)) {
907 		vioblk_ops.o_sync_cache = NULL;
908 	}
909 
910 	sc->sc_seg_max = DEF_MAXINDIRECT;
911 	/* The max number of segments (cookies) in a request */
912 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SEG_MAX) {
913 		sc->sc_seg_max = virtio_read_device_config_4(&sc->sc_virtio,
914 		    VIRTIO_BLK_CONFIG_SEG_MAX);
915 
916 		/* That's what Linux does. */
917 		if (!sc->sc_seg_max)
918 			sc->sc_seg_max = 1;
919 
920 		/*
921 		 * SEG_MAX corresponds to the number of _data_
922 		 * blocks in a request
923 		 */
924 		sc->sc_seg_max += 2;
925 	}
926 	/* 2 descriptors taken for header/status */
927 	vioblk_bd_dma_attr.dma_attr_sgllen = sc->sc_seg_max - 2;
928 
929 
930 	/* The maximum size for a cookie in a request. */
931 	sc->sc_seg_size_max = DEF_MAXSECTOR;
932 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SIZE_MAX) {
933 		sc->sc_seg_size_max = virtio_read_device_config_4(
934 		    &sc->sc_virtio, VIRTIO_BLK_CONFIG_SIZE_MAX);
935 	}
936 
937 	/* The maximum request size */
938 	vioblk_bd_dma_attr.dma_attr_maxxfer =
939 	    vioblk_bd_dma_attr.dma_attr_sgllen * sc->sc_seg_size_max;
940 
941 	dev_debug(devinfo, CE_NOTE,
942 	    "nblks=%" PRIu64 " blksize=%d (%d) num_seg=%d, "
943 	    "seg_size=%d, maxxfer=%" PRIu64,
944 	    sc->sc_nblks, sc->sc_blk_size, sc->sc_pblk_size,
945 	    vioblk_bd_dma_attr.dma_attr_sgllen,
946 	    sc->sc_seg_size_max,
947 	    vioblk_bd_dma_attr.dma_attr_maxxfer);
948 
949 
950 	sc->sc_vq = virtio_alloc_vq(&sc->sc_virtio, 0, 0,
951 	    sc->sc_seg_max, "I/O request");
952 	if (sc->sc_vq == NULL) {
953 		goto exit_alloc1;
954 	}
955 
956 	ret = vioblk_alloc_reqs(sc);
957 	if (ret) {
958 		goto exit_alloc2;
959 	}
960 
961 	sc->bd_h = bd_alloc_handle(sc, &vioblk_ops, &vioblk_bd_dma_attr,
962 	    KM_SLEEP);
963 
964 
965 	virtio_set_status(&sc->sc_virtio,
966 	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
967 	virtio_start_vq_intr(sc->sc_vq);
968 
969 	ret = virtio_enable_ints(&sc->sc_virtio);
970 	if (ret)
971 		goto exit_enable_ints;
972 
973 	ret = bd_attach_handle(devinfo, sc->bd_h);
974 	if (ret != DDI_SUCCESS) {
975 		dev_err(devinfo, CE_WARN, "Failed to attach blkdev");
976 		goto exit_attach_bd;
977 	}
978 
979 	return (DDI_SUCCESS);
980 
981 exit_attach_bd:
982 	/*
983 	 * There is no virtio_disable_ints(), it's done in virtio_release_ints.
984 	 * If they ever get split, don't forget to add a call here.
985 	 */
986 exit_enable_ints:
987 	virtio_stop_vq_intr(sc->sc_vq);
988 	bd_free_handle(sc->bd_h);
989 	vioblk_free_reqs(sc);
990 exit_alloc2:
991 	virtio_free_vq(sc->sc_vq);
992 exit_alloc1:
993 exit_features:
994 	virtio_release_ints(&sc->sc_virtio);
995 exit_int:
996 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
997 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
998 exit_map:
999 	kstat_delete(sc->sc_intrstat);
1000 exit_intrstat:
1001 	mutex_destroy(&sc->lock_devid);
1002 	cv_destroy(&sc->cv_devid);
1003 	kmem_free(sc, sizeof (struct vioblk_softc));
1004 exit:
1005 	return (ret);
1006 }
1007 
1008 static int
1009 vioblk_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1010 {
1011 	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1012 
1013 	switch (cmd) {
1014 	case DDI_DETACH:
1015 		break;
1016 
1017 	case DDI_PM_SUSPEND:
1018 		cmn_err(CE_WARN, "suspend not supported yet");
1019 		return (DDI_FAILURE);
1020 
1021 	default:
1022 		cmn_err(CE_WARN, "cmd 0x%x unrecognized", cmd);
1023 		return (DDI_FAILURE);
1024 	}
1025 
1026 	(void) bd_detach_handle(sc->bd_h);
1027 	virtio_stop_vq_intr(sc->sc_vq);
1028 	virtio_release_ints(&sc->sc_virtio);
1029 	vioblk_free_reqs(sc);
1030 	virtio_free_vq(sc->sc_vq);
1031 	virtio_device_reset(&sc->sc_virtio);
1032 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1033 	kstat_delete(sc->sc_intrstat);
1034 	kmem_free(sc, sizeof (struct vioblk_softc));
1035 
1036 	return (DDI_SUCCESS);
1037 }
1038 
1039 static int
1040 vioblk_quiesce(dev_info_t *devinfo)
1041 {
1042 	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1043 
1044 	virtio_stop_vq_intr(sc->sc_vq);
1045 	virtio_device_reset(&sc->sc_virtio);
1046 
1047 	return (DDI_SUCCESS);
1048 }
1049 
1050 int
1051 _init(void)
1052 {
1053 	int rv;
1054 
1055 	bd_mod_init(&vioblk_dev_ops);
1056 
1057 	if ((rv = mod_install(&modlinkage)) != 0) {
1058 		bd_mod_fini(&vioblk_dev_ops);
1059 	}
1060 
1061 	return (rv);
1062 }
1063 
1064 int
1065 _fini(void)
1066 {
1067 	int rv;
1068 
1069 	if ((rv = mod_remove(&modlinkage)) == 0) {
1070 		bd_mod_fini(&vioblk_dev_ops);
1071 	}
1072 
1073 	return (rv);
1074 }
1075 
1076 int
1077 _info(struct modinfo *modinfop)
1078 {
1079 	return (mod_info(&modlinkage, modinfop));
1080 }
1081