xref: /illumos-gate/usr/src/uts/common/io/vioblk/vioblk.c (revision 8c6ffd5964f28b15919c0a4ad3d120f84cedbc3d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
25  */
26 
27 
28 #include <sys/modctl.h>
29 #include <sys/blkdev.h>
30 #include <sys/types.h>
31 #include <sys/errno.h>
32 #include <sys/param.h>
33 #include <sys/stropts.h>
34 #include <sys/stream.h>
35 #include <sys/strsubr.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/devops.h>
39 #include <sys/ksynch.h>
40 #include <sys/stat.h>
41 #include <sys/modctl.h>
42 #include <sys/debug.h>
43 #include <sys/pci.h>
44 #include <sys/sysmacros.h>
45 #include "virtiovar.h"
46 #include "virtioreg.h"
47 
48 /* Feature bits */
49 #define	VIRTIO_BLK_F_BARRIER	(1<<0)
50 #define	VIRTIO_BLK_F_SIZE_MAX	(1<<1)
51 #define	VIRTIO_BLK_F_SEG_MAX	(1<<2)
52 #define	VIRTIO_BLK_F_GEOMETRY	(1<<4)
53 #define	VIRTIO_BLK_F_RO		(1<<5)
54 #define	VIRTIO_BLK_F_BLK_SIZE	(1<<6)
55 #define	VIRTIO_BLK_F_SCSI	(1<<7)
56 #define	VIRTIO_BLK_F_FLUSH	(1<<9)
57 #define	VIRTIO_BLK_F_TOPOLOGY	(1<<10)
58 
59 /* Configuration registers */
60 #define	VIRTIO_BLK_CONFIG_CAPACITY	0 /* 64bit */
61 #define	VIRTIO_BLK_CONFIG_SIZE_MAX	8 /* 32bit */
62 #define	VIRTIO_BLK_CONFIG_SEG_MAX	12 /* 32bit */
63 #define	VIRTIO_BLK_CONFIG_GEOMETRY_C	16 /* 16bit */
64 #define	VIRTIO_BLK_CONFIG_GEOMETRY_H	18 /* 8bit */
65 #define	VIRTIO_BLK_CONFIG_GEOMETRY_S	19 /* 8bit */
66 #define	VIRTIO_BLK_CONFIG_BLK_SIZE	20 /* 32bit */
67 #define	VIRTIO_BLK_CONFIG_TOPO_PBEXP	24 /* 8bit */
68 #define	VIRTIO_BLK_CONFIG_TOPO_ALIGN	25 /* 8bit */
69 #define	VIRTIO_BLK_CONFIG_TOPO_MIN_SZ	26 /* 16bit */
70 #define	VIRTIO_BLK_CONFIG_TOPO_OPT_SZ	28 /* 32bit */
71 
72 /* Command */
73 #define	VIRTIO_BLK_T_IN			0
74 #define	VIRTIO_BLK_T_OUT		1
75 #define	VIRTIO_BLK_T_SCSI_CMD		2
76 #define	VIRTIO_BLK_T_SCSI_CMD_OUT	3
77 #define	VIRTIO_BLK_T_FLUSH		4
78 #define	VIRTIO_BLK_T_FLUSH_OUT		5
79 #define	VIRTIO_BLK_T_GET_ID		8
80 #define	VIRTIO_BLK_T_BARRIER		0x80000000
81 
82 #define	VIRTIO_BLK_ID_BYTES	20 /* devid */
83 
84 /* Statuses */
85 #define	VIRTIO_BLK_S_OK		0
86 #define	VIRTIO_BLK_S_IOERR	1
87 #define	VIRTIO_BLK_S_UNSUPP	2
88 
89 #define	DEF_MAXINDIRECT		(128)
90 #define	DEF_MAXSECTOR		(4096)
91 
92 #define	VIOBLK_POISON		0xdead0001dead0001
93 
94 /*
95  * Static Variables.
96  */
97 static char vioblk_ident[] = "VirtIO block driver";
98 
99 /* Request header structure */
100 struct vioblk_req_hdr {
101 	uint32_t		type;   /* VIRTIO_BLK_T_* */
102 	uint32_t		ioprio;
103 	uint64_t		sector;
104 };
105 
106 struct vioblk_req {
107 	struct vioblk_req_hdr	hdr;
108 	uint8_t			status;
109 	uint8_t			unused[3];
110 	unsigned int		ndmac;
111 	ddi_dma_handle_t	dmah;
112 	ddi_dma_handle_t	bd_dmah;
113 	ddi_dma_cookie_t	dmac;
114 	bd_xfer_t		*xfer;
115 };
116 
117 struct vioblk_stats {
118 	struct kstat_named	sts_rw_outofmemory;
119 	struct kstat_named	sts_rw_badoffset;
120 	struct kstat_named	sts_rw_queuemax;
121 	struct kstat_named	sts_rw_cookiesmax;
122 	struct kstat_named	sts_rw_cacheflush;
123 	struct kstat_named	sts_intr_queuemax;
124 	struct kstat_named	sts_intr_total;
125 	struct kstat_named	sts_io_errors;
126 	struct kstat_named	sts_unsupp_errors;
127 	struct kstat_named	sts_nxio_errors;
128 };
129 
130 struct vioblk_lstats {
131 	uint64_t		rw_cacheflush;
132 	uint64_t		intr_total;
133 	unsigned int		rw_cookiesmax;
134 	unsigned int		intr_queuemax;
135 	unsigned int		io_errors;
136 	unsigned int		unsupp_errors;
137 	unsigned int		nxio_errors;
138 };
139 
140 struct vioblk_softc {
141 	dev_info_t		*sc_dev; /* mirrors virtio_softc->sc_dev */
142 	struct virtio_softc	sc_virtio;
143 	struct virtqueue	*sc_vq;
144 	bd_handle_t		bd_h;
145 	struct vioblk_req	*sc_reqs;
146 	struct vioblk_stats	*ks_data;
147 	kstat_t			*sc_intrstat;
148 	uint64_t		sc_capacity;
149 	uint64_t		sc_nblks;
150 	struct vioblk_lstats	sc_stats;
151 	short			sc_blkflags;
152 	boolean_t		sc_in_poll_mode;
153 	boolean_t		sc_readonly;
154 	int			sc_blk_size;
155 	int			sc_pblk_size;
156 	int			sc_seg_max;
157 	int			sc_seg_size_max;
158 	kmutex_t		lock_devid;
159 	kcondvar_t		cv_devid;
160 	char			devid[VIRTIO_BLK_ID_BYTES + 1];
161 };
162 
163 static int vioblk_get_id(struct vioblk_softc *sc);
164 
165 static int vioblk_read(void *arg, bd_xfer_t *xfer);
166 static int vioblk_write(void *arg, bd_xfer_t *xfer);
167 static int vioblk_flush(void *arg, bd_xfer_t *xfer);
168 static void vioblk_driveinfo(void *arg, bd_drive_t *drive);
169 static int vioblk_mediainfo(void *arg, bd_media_t *media);
170 static int vioblk_devid_init(void *, dev_info_t *, ddi_devid_t *);
171 uint_t vioblk_int_handler(caddr_t arg1, caddr_t arg2);
172 
173 static bd_ops_t vioblk_ops = {
174 	BD_OPS_VERSION_0,
175 	vioblk_driveinfo,
176 	vioblk_mediainfo,
177 	vioblk_devid_init,
178 	vioblk_flush,
179 	vioblk_read,
180 	vioblk_write,
181 };
182 
183 static int vioblk_quiesce(dev_info_t *);
184 static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
185 static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
186 
187 static struct dev_ops vioblk_dev_ops = {
188 	DEVO_REV,
189 	0,
190 	ddi_no_info,
191 	nulldev,	/* identify */
192 	nulldev,	/* probe */
193 	vioblk_attach,	/* attach */
194 	vioblk_detach,	/* detach */
195 	nodev,		/* reset */
196 	NULL,		/* cb_ops */
197 	NULL,		/* bus_ops */
198 	NULL,		/* power */
199 	vioblk_quiesce	/* quiesce */
200 };
201 
202 
203 
204 /* Standard Module linkage initialization for a Streams driver */
205 extern struct mod_ops mod_driverops;
206 
207 static struct modldrv modldrv = {
208 	&mod_driverops,		/* Type of module.  This one is a driver */
209 	vioblk_ident,    /* short description */
210 	&vioblk_dev_ops	/* driver specific ops */
211 };
212 
213 static struct modlinkage modlinkage = {
214 	MODREV_1,
215 	{
216 		(void *)&modldrv,
217 		NULL,
218 	},
219 };
220 
221 ddi_device_acc_attr_t vioblk_attr = {
222 	DDI_DEVICE_ATTR_V0,
223 	DDI_NEVERSWAP_ACC,	/* virtio is always native byte order */
224 	DDI_STORECACHING_OK_ACC,
225 	DDI_DEFAULT_ACC
226 };
227 
228 /* DMA attr for the header/status blocks. */
229 static ddi_dma_attr_t vioblk_req_dma_attr = {
230 	DMA_ATTR_V0,			/* dma_attr version	*/
231 	0,				/* dma_attr_addr_lo	*/
232 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
233 	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
234 	1,				/* dma_attr_align	*/
235 	1,				/* dma_attr_burstsizes	*/
236 	1,				/* dma_attr_minxfer	*/
237 	0xFFFFFFFFull,			/* dma_attr_maxxfer	*/
238 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
239 	1,				/* dma_attr_sgllen	*/
240 	1,				/* dma_attr_granular	*/
241 	0,				/* dma_attr_flags	*/
242 };
243 
244 /* DMA attr for the data blocks. */
245 static ddi_dma_attr_t vioblk_bd_dma_attr = {
246 	DMA_ATTR_V0,			/* dma_attr version	*/
247 	0,				/* dma_attr_addr_lo	*/
248 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
249 	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
250 	1,				/* dma_attr_align	*/
251 	1,				/* dma_attr_burstsizes	*/
252 	1,				/* dma_attr_minxfer	*/
253 	0,				/* dma_attr_maxxfer, set in attach */
254 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
255 	0,				/* dma_attr_sgllen, set in attach */
256 	1,				/* dma_attr_granular	*/
257 	0,				/* dma_attr_flags	*/
258 };
259 
260 static int
261 vioblk_rw(struct vioblk_softc *sc, bd_xfer_t *xfer, int type,
262     uint32_t len)
263 {
264 	struct vioblk_req *req;
265 	struct vq_entry *ve_hdr;
266 	int total_cookies, write;
267 
268 	write = (type == VIRTIO_BLK_T_OUT ||
269 	    type == VIRTIO_BLK_T_FLUSH_OUT) ? 1 : 0;
270 	total_cookies = 2;
271 
272 	if ((xfer->x_blkno + xfer->x_nblks) > sc->sc_nblks) {
273 		sc->ks_data->sts_rw_badoffset.value.ui64++;
274 		return (EINVAL);
275 	}
276 
277 	/* allocate top entry */
278 	ve_hdr = vq_alloc_entry(sc->sc_vq);
279 	if (!ve_hdr) {
280 		sc->ks_data->sts_rw_outofmemory.value.ui64++;
281 		return (ENOMEM);
282 	}
283 
284 	/* getting request */
285 	req = &sc->sc_reqs[ve_hdr->qe_index];
286 	req->hdr.type = type;
287 	req->hdr.ioprio = 0;
288 	req->hdr.sector = xfer->x_blkno;
289 	req->xfer = xfer;
290 
291 	/* Header */
292 	virtio_ve_add_indirect_buf(ve_hdr, req->dmac.dmac_laddress,
293 	    sizeof (struct vioblk_req_hdr), B_TRUE);
294 
295 	/* Payload */
296 	if (len > 0) {
297 		virtio_ve_add_cookie(ve_hdr, xfer->x_dmah, xfer->x_dmac,
298 		    xfer->x_ndmac, write ? B_TRUE : B_FALSE);
299 		total_cookies += xfer->x_ndmac;
300 	}
301 
302 	/* Status */
303 	virtio_ve_add_indirect_buf(ve_hdr,
304 	    req->dmac.dmac_laddress + sizeof (struct vioblk_req_hdr),
305 	    sizeof (uint8_t), B_FALSE);
306 
307 	/* sending the whole chain to the device */
308 	virtio_push_chain(ve_hdr, B_TRUE);
309 
310 	if (sc->sc_stats.rw_cookiesmax < total_cookies)
311 		sc->sc_stats.rw_cookiesmax = total_cookies;
312 
313 	return (DDI_SUCCESS);
314 }
315 
316 /*
317  * Now in polling mode. Interrupts are off, so we
318  * 1) poll for the already queued requests to complete.
319  * 2) push our request.
320  * 3) wait for our request to complete.
321  */
322 static int
323 vioblk_rw_poll(struct vioblk_softc *sc, bd_xfer_t *xfer,
324     int type, uint32_t len)
325 {
326 	clock_t tmout;
327 	int ret;
328 
329 	ASSERT(xfer->x_flags & BD_XFER_POLL);
330 
331 	/* Prevent a hard hang. */
332 	tmout = drv_usectohz(30000000);
333 
334 	/* Poll for an empty queue */
335 	while (vq_num_used(sc->sc_vq)) {
336 		/* Check if any pending requests completed. */
337 		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
338 		if (ret != DDI_INTR_CLAIMED) {
339 			drv_usecwait(10);
340 			tmout -= 10;
341 			return (ETIMEDOUT);
342 		}
343 	}
344 
345 	ret = vioblk_rw(sc, xfer, type, len);
346 	if (ret)
347 		return (ret);
348 
349 	tmout = drv_usectohz(30000000);
350 	/* Poll for an empty queue again. */
351 	while (vq_num_used(sc->sc_vq)) {
352 		/* Check if any pending requests completed. */
353 		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
354 		if (ret != DDI_INTR_CLAIMED) {
355 			drv_usecwait(10);
356 			tmout -= 10;
357 			return (ETIMEDOUT);
358 		}
359 	}
360 
361 	return (DDI_SUCCESS);
362 }
363 
364 static int
365 vioblk_read(void *arg, bd_xfer_t *xfer)
366 {
367 	int ret;
368 	struct vioblk_softc *sc = (void *)arg;
369 
370 	if (xfer->x_flags & BD_XFER_POLL) {
371 		if (!sc->sc_in_poll_mode) {
372 			virtio_stop_vq_intr(sc->sc_vq);
373 			sc->sc_in_poll_mode = 1;
374 		}
375 
376 		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_IN,
377 		    xfer->x_nblks * DEV_BSIZE);
378 	} else {
379 		if (sc->sc_in_poll_mode) {
380 			virtio_start_vq_intr(sc->sc_vq);
381 			sc->sc_in_poll_mode = 0;
382 		}
383 
384 		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_IN,
385 		    xfer->x_nblks * DEV_BSIZE);
386 	}
387 
388 	return (ret);
389 }
390 
391 static int
392 vioblk_write(void *arg, bd_xfer_t *xfer)
393 {
394 	int ret;
395 	struct vioblk_softc *sc = (void *)arg;
396 
397 	if (xfer->x_flags & BD_XFER_POLL) {
398 		if (!sc->sc_in_poll_mode) {
399 			virtio_stop_vq_intr(sc->sc_vq);
400 			sc->sc_in_poll_mode = 1;
401 		}
402 
403 		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_OUT,
404 		    xfer->x_nblks * DEV_BSIZE);
405 	} else {
406 		if (sc->sc_in_poll_mode) {
407 			virtio_start_vq_intr(sc->sc_vq);
408 			sc->sc_in_poll_mode = 0;
409 		}
410 
411 		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_OUT,
412 		    xfer->x_nblks * DEV_BSIZE);
413 	}
414 	return (ret);
415 }
416 
417 static int
418 vioblk_flush(void *arg, bd_xfer_t *xfer)
419 {
420 	int ret;
421 	struct vioblk_softc *sc = (void *)arg;
422 
423 	ASSERT((xfer->x_flags & BD_XFER_POLL) == 0);
424 
425 	ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_FLUSH_OUT,
426 	    xfer->x_nblks * DEV_BSIZE);
427 
428 	if (!ret)
429 		sc->sc_stats.rw_cacheflush++;
430 
431 	return (ret);
432 }
433 
434 
435 static void
436 vioblk_driveinfo(void *arg, bd_drive_t *drive)
437 {
438 	struct vioblk_softc *sc = (void *)arg;
439 
440 	drive->d_qsize = sc->sc_vq->vq_num;
441 	drive->d_removable = B_FALSE;
442 	drive->d_hotpluggable = B_TRUE;
443 	drive->d_target = 0;
444 	drive->d_lun = 0;
445 
446 	drive->d_vendor = "Virtio";
447 	drive->d_vendor_len = strlen(drive->d_vendor);
448 
449 	drive->d_product = "Block Device";
450 	drive->d_product_len = strlen(drive->d_product);
451 
452 	(void) vioblk_get_id(sc);
453 	drive->d_serial = sc->devid;
454 	drive->d_serial_len = strlen(drive->d_serial);
455 
456 	drive->d_revision = "0000";
457 	drive->d_revision_len = strlen(drive->d_revision);
458 }
459 
460 static int
461 vioblk_mediainfo(void *arg, bd_media_t *media)
462 {
463 	struct vioblk_softc *sc = (void *)arg;
464 
465 	media->m_nblks = sc->sc_nblks;
466 	media->m_blksize = sc->sc_blk_size;
467 	media->m_readonly = sc->sc_readonly;
468 	media->m_pblksize = sc->sc_pblk_size;
469 	return (0);
470 }
471 
472 static int
473 vioblk_get_id(struct vioblk_softc *sc)
474 {
475 	clock_t deadline;
476 	int ret;
477 	bd_xfer_t xfer;
478 
479 	deadline = ddi_get_lbolt() + (clock_t)drv_usectohz(3 * 1000000);
480 	(void) memset(&xfer, 0, sizeof (bd_xfer_t));
481 	xfer.x_nblks = 1;
482 
483 	ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_bd_dma_attr,
484 	    DDI_DMA_SLEEP, NULL, &xfer.x_dmah);
485 	if (ret != DDI_SUCCESS)
486 		goto out_alloc;
487 
488 	ret = ddi_dma_addr_bind_handle(xfer.x_dmah, NULL, (caddr_t)&sc->devid,
489 	    VIRTIO_BLK_ID_BYTES, DDI_DMA_READ | DDI_DMA_CONSISTENT,
490 	    DDI_DMA_SLEEP, NULL, &xfer.x_dmac, &xfer.x_ndmac);
491 	if (ret != DDI_DMA_MAPPED) {
492 		ret = DDI_FAILURE;
493 		goto out_map;
494 	}
495 
496 	mutex_enter(&sc->lock_devid);
497 
498 	ret = vioblk_rw(sc, &xfer, VIRTIO_BLK_T_GET_ID,
499 	    VIRTIO_BLK_ID_BYTES);
500 	if (ret) {
501 		mutex_exit(&sc->lock_devid);
502 		goto out_rw;
503 	}
504 
505 	/* wait for reply */
506 	ret = cv_timedwait(&sc->cv_devid, &sc->lock_devid, deadline);
507 	mutex_exit(&sc->lock_devid);
508 
509 	(void) ddi_dma_unbind_handle(xfer.x_dmah);
510 	ddi_dma_free_handle(&xfer.x_dmah);
511 
512 	/* timeout */
513 	if (ret < 0) {
514 		dev_err(sc->sc_dev, CE_WARN,
515 		    "Cannot get devid from the device");
516 		return (DDI_FAILURE);
517 	}
518 
519 	return (0);
520 
521 out_rw:
522 	(void) ddi_dma_unbind_handle(xfer.x_dmah);
523 out_map:
524 	ddi_dma_free_handle(&xfer.x_dmah);
525 out_alloc:
526 	return (ret);
527 }
528 
529 static int
530 vioblk_devid_init(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
531 {
532 	struct vioblk_softc *sc = (void *)arg;
533 	int ret;
534 
535 	ret = vioblk_get_id(sc);
536 	if (ret != DDI_SUCCESS)
537 		return (ret);
538 
539 	ret = ddi_devid_init(devinfo, DEVID_ATA_SERIAL,
540 	    VIRTIO_BLK_ID_BYTES, sc->devid, devid);
541 	if (ret != DDI_SUCCESS) {
542 		dev_err(devinfo, CE_WARN, "Cannot build devid from the device");
543 		return (ret);
544 	}
545 
546 	dev_debug(sc->sc_dev, CE_NOTE,
547 	    "devid %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x",
548 	    sc->devid[0], sc->devid[1], sc->devid[2], sc->devid[3],
549 	    sc->devid[4], sc->devid[5], sc->devid[6], sc->devid[7],
550 	    sc->devid[8], sc->devid[9], sc->devid[10], sc->devid[11],
551 	    sc->devid[12], sc->devid[13], sc->devid[14], sc->devid[15],
552 	    sc->devid[16], sc->devid[17], sc->devid[18], sc->devid[19]);
553 
554 	return (0);
555 }
556 
557 static void
558 vioblk_show_features(struct vioblk_softc *sc, const char *prefix,
559     uint32_t features)
560 {
561 	char buf[512];
562 	char *bufp = buf;
563 	char *bufend = buf + sizeof (buf);
564 
565 	/* LINTED E_PTRDIFF_OVERFLOW */
566 	bufp += snprintf(bufp, bufend - bufp, prefix);
567 
568 	/* LINTED E_PTRDIFF_OVERFLOW */
569 	bufp += virtio_show_features(features, bufp, bufend - bufp);
570 
571 
572 	/* LINTED E_PTRDIFF_OVERFLOW */
573 	bufp += snprintf(bufp, bufend - bufp, "Vioblk ( ");
574 
575 	if (features & VIRTIO_BLK_F_BARRIER)
576 		/* LINTED E_PTRDIFF_OVERFLOW */
577 		bufp += snprintf(bufp, bufend - bufp, "BARRIER ");
578 	if (features & VIRTIO_BLK_F_SIZE_MAX)
579 		/* LINTED E_PTRDIFF_OVERFLOW */
580 		bufp += snprintf(bufp, bufend - bufp, "SIZE_MAX ");
581 	if (features & VIRTIO_BLK_F_SEG_MAX)
582 		/* LINTED E_PTRDIFF_OVERFLOW */
583 		bufp += snprintf(bufp, bufend - bufp, "SEG_MAX ");
584 	if (features & VIRTIO_BLK_F_GEOMETRY)
585 		/* LINTED E_PTRDIFF_OVERFLOW */
586 		bufp += snprintf(bufp, bufend - bufp, "GEOMETRY ");
587 	if (features & VIRTIO_BLK_F_RO)
588 		/* LINTED E_PTRDIFF_OVERFLOW */
589 		bufp += snprintf(bufp, bufend - bufp, "RO ");
590 	if (features & VIRTIO_BLK_F_BLK_SIZE)
591 		/* LINTED E_PTRDIFF_OVERFLOW */
592 		bufp += snprintf(bufp, bufend - bufp, "BLK_SIZE ");
593 	if (features & VIRTIO_BLK_F_SCSI)
594 		/* LINTED E_PTRDIFF_OVERFLOW */
595 		bufp += snprintf(bufp, bufend - bufp, "SCSI ");
596 	if (features & VIRTIO_BLK_F_FLUSH)
597 		/* LINTED E_PTRDIFF_OVERFLOW */
598 		bufp += snprintf(bufp, bufend - bufp, "FLUSH ");
599 	if (features & VIRTIO_BLK_F_TOPOLOGY)
600 		/* LINTED E_PTRDIFF_OVERFLOW */
601 		bufp += snprintf(bufp, bufend - bufp, "TOPOLOGY ");
602 
603 	/* LINTED E_PTRDIFF_OVERFLOW */
604 	bufp += snprintf(bufp, bufend - bufp, ")");
605 	*bufp = '\0';
606 
607 	dev_debug(sc->sc_dev, CE_NOTE, "%s", buf);
608 }
609 
610 static int
611 vioblk_dev_features(struct vioblk_softc *sc)
612 {
613 	uint32_t host_features;
614 
615 	host_features = virtio_negotiate_features(&sc->sc_virtio,
616 	    VIRTIO_BLK_F_RO |
617 	    VIRTIO_BLK_F_GEOMETRY |
618 	    VIRTIO_BLK_F_BLK_SIZE |
619 	    VIRTIO_BLK_F_FLUSH |
620 	    VIRTIO_BLK_F_TOPOLOGY |
621 	    VIRTIO_BLK_F_SEG_MAX |
622 	    VIRTIO_BLK_F_SIZE_MAX |
623 	    VIRTIO_F_RING_INDIRECT_DESC);
624 
625 	vioblk_show_features(sc, "Host features: ", host_features);
626 	vioblk_show_features(sc, "Negotiated features: ",
627 	    sc->sc_virtio.sc_features);
628 
629 	if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
630 		dev_err(sc->sc_dev, CE_NOTE,
631 		    "Host does not support RING_INDIRECT_DESC, bye.");
632 		return (DDI_FAILURE);
633 	}
634 
635 	return (DDI_SUCCESS);
636 }
637 
638 /* ARGSUSED */
639 uint_t
640 vioblk_int_handler(caddr_t arg1, caddr_t arg2)
641 {
642 	struct virtio_softc *vsc = (void *)arg1;
643 	struct vioblk_softc *sc = container_of(vsc,
644 	    struct vioblk_softc, sc_virtio);
645 	struct vq_entry *ve;
646 	uint32_t len;
647 	int i = 0, error;
648 
649 	while ((ve = virtio_pull_chain(sc->sc_vq, &len))) {
650 		struct vioblk_req *req = &sc->sc_reqs[ve->qe_index];
651 		bd_xfer_t *xfer = req->xfer;
652 		uint8_t status = req->status;
653 		uint32_t type = req->hdr.type;
654 
655 		if (req->xfer == (void *)VIOBLK_POISON) {
656 			dev_err(sc->sc_dev, CE_WARN, "Poisoned descriptor!");
657 			virtio_free_chain(ve);
658 			return (DDI_INTR_CLAIMED);
659 		}
660 
661 		req->xfer = (void *) VIOBLK_POISON;
662 
663 		/* Note: blkdev tears down the payload mapping for us. */
664 		virtio_free_chain(ve);
665 
666 		/* returning payload back to blkdev */
667 		switch (status) {
668 			case VIRTIO_BLK_S_OK:
669 				error = 0;
670 				break;
671 			case VIRTIO_BLK_S_IOERR:
672 				error = EIO;
673 				sc->sc_stats.io_errors++;
674 				break;
675 			case VIRTIO_BLK_S_UNSUPP:
676 				sc->sc_stats.unsupp_errors++;
677 				error = ENOTTY;
678 				break;
679 			default:
680 				sc->sc_stats.nxio_errors++;
681 				error = ENXIO;
682 				break;
683 		}
684 
685 		if (type == VIRTIO_BLK_T_GET_ID) {
686 			/* notify devid_init */
687 			mutex_enter(&sc->lock_devid);
688 			cv_broadcast(&sc->cv_devid);
689 			mutex_exit(&sc->lock_devid);
690 		} else
691 			bd_xfer_done(xfer, error);
692 
693 		i++;
694 	}
695 
696 	/* update stats */
697 	if (sc->sc_stats.intr_queuemax < i)
698 		sc->sc_stats.intr_queuemax = i;
699 	sc->sc_stats.intr_total++;
700 
701 	return (DDI_INTR_CLAIMED);
702 }
703 
704 /* ARGSUSED */
705 uint_t
706 vioblk_config_handler(caddr_t arg1, caddr_t arg2)
707 {
708 	return (DDI_INTR_CLAIMED);
709 }
710 
711 static int
712 vioblk_register_ints(struct vioblk_softc *sc)
713 {
714 	int ret;
715 
716 	struct virtio_int_handler vioblk_conf_h = {
717 		vioblk_config_handler
718 	};
719 
720 	struct virtio_int_handler vioblk_vq_h[] = {
721 		{ vioblk_int_handler },
722 		{ NULL },
723 	};
724 
725 	ret = virtio_register_ints(&sc->sc_virtio,
726 	    &vioblk_conf_h, vioblk_vq_h);
727 
728 	return (ret);
729 }
730 
731 static void
732 vioblk_free_reqs(struct vioblk_softc *sc)
733 {
734 	int i, qsize;
735 
736 	qsize = sc->sc_vq->vq_num;
737 
738 	for (i = 0; i < qsize; i++) {
739 		struct vioblk_req *req = &sc->sc_reqs[i];
740 
741 		if (req->ndmac)
742 			(void) ddi_dma_unbind_handle(req->dmah);
743 
744 		if (req->dmah)
745 			ddi_dma_free_handle(&req->dmah);
746 	}
747 
748 	kmem_free(sc->sc_reqs, sizeof (struct vioblk_req) * qsize);
749 }
750 
751 static int
752 vioblk_alloc_reqs(struct vioblk_softc *sc)
753 {
754 	int i, qsize;
755 	int ret;
756 
757 	qsize = sc->sc_vq->vq_num;
758 
759 	sc->sc_reqs = kmem_zalloc(sizeof (struct vioblk_req) * qsize, KM_SLEEP);
760 
761 	for (i = 0; i < qsize; i++) {
762 		struct vioblk_req *req = &sc->sc_reqs[i];
763 
764 		ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_req_dma_attr,
765 		    DDI_DMA_SLEEP, NULL, &req->dmah);
766 		if (ret != DDI_SUCCESS) {
767 
768 			dev_err(sc->sc_dev, CE_WARN,
769 			    "Can't allocate dma handle for req "
770 			    "buffer %d", i);
771 			goto exit;
772 		}
773 
774 		ret = ddi_dma_addr_bind_handle(req->dmah, NULL,
775 		    (caddr_t)&req->hdr,
776 		    sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
777 		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
778 		    NULL, &req->dmac, &req->ndmac);
779 		if (ret != DDI_DMA_MAPPED) {
780 			dev_err(sc->sc_dev, CE_WARN,
781 			    "Can't bind req buffer %d", i);
782 			goto exit;
783 		}
784 	}
785 
786 	return (0);
787 
788 exit:
789 	vioblk_free_reqs(sc);
790 	return (ENOMEM);
791 }
792 
793 
794 static int
795 vioblk_ksupdate(kstat_t *ksp, int rw)
796 {
797 	struct vioblk_softc *sc = ksp->ks_private;
798 
799 	if (rw == KSTAT_WRITE)
800 		return (EACCES);
801 
802 	sc->ks_data->sts_rw_cookiesmax.value.ui32 = sc->sc_stats.rw_cookiesmax;
803 	sc->ks_data->sts_intr_queuemax.value.ui32 = sc->sc_stats.intr_queuemax;
804 	sc->ks_data->sts_unsupp_errors.value.ui32 = sc->sc_stats.unsupp_errors;
805 	sc->ks_data->sts_nxio_errors.value.ui32 = sc->sc_stats.nxio_errors;
806 	sc->ks_data->sts_io_errors.value.ui32 = sc->sc_stats.io_errors;
807 	sc->ks_data->sts_rw_cacheflush.value.ui64 = sc->sc_stats.rw_cacheflush;
808 	sc->ks_data->sts_intr_total.value.ui64 = sc->sc_stats.intr_total;
809 
810 
811 	return (0);
812 }
813 
814 static int
815 vioblk_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
816 {
817 	int ret = DDI_SUCCESS;
818 	int instance;
819 	struct vioblk_softc *sc;
820 	struct virtio_softc *vsc;
821 	struct vioblk_stats *ks_data;
822 
823 	instance = ddi_get_instance(devinfo);
824 
825 	switch (cmd) {
826 	case DDI_ATTACH:
827 		break;
828 
829 	case DDI_RESUME:
830 	case DDI_PM_RESUME:
831 		dev_err(devinfo, CE_WARN, "resume not supported yet");
832 		ret = DDI_FAILURE;
833 		goto exit;
834 
835 	default:
836 		dev_err(devinfo, CE_WARN, "cmd 0x%x not recognized", cmd);
837 		ret = DDI_FAILURE;
838 		goto exit;
839 	}
840 
841 	sc = kmem_zalloc(sizeof (struct vioblk_softc), KM_SLEEP);
842 	ddi_set_driver_private(devinfo, sc);
843 
844 	vsc = &sc->sc_virtio;
845 
846 	/* Duplicate for faster access / less typing */
847 	sc->sc_dev = devinfo;
848 	vsc->sc_dev = devinfo;
849 
850 	cv_init(&sc->cv_devid, NULL, CV_DRIVER, NULL);
851 	mutex_init(&sc->lock_devid, NULL, MUTEX_DRIVER, NULL);
852 
853 	/*
854 	 * Initialize interrupt kstat.  This should not normally fail, since
855 	 * we don't use a persistent stat.  We do it this way to avoid having
856 	 * to test for it at run time on the hot path.
857 	 */
858 	sc->sc_intrstat = kstat_create("vioblk", instance,
859 	    "intrs", "controller", KSTAT_TYPE_NAMED,
860 	    sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
861 	    KSTAT_FLAG_PERSISTENT);
862 	if (sc->sc_intrstat == NULL) {
863 		dev_err(devinfo, CE_WARN, "kstat_create failed");
864 		goto exit_intrstat;
865 	}
866 	ks_data = (struct vioblk_stats *)sc->sc_intrstat->ks_data;
867 	kstat_named_init(&ks_data->sts_rw_outofmemory,
868 	    "total_rw_outofmemory", KSTAT_DATA_UINT64);
869 	kstat_named_init(&ks_data->sts_rw_badoffset,
870 	    "total_rw_badoffset", KSTAT_DATA_UINT64);
871 	kstat_named_init(&ks_data->sts_intr_total,
872 	    "total_intr", KSTAT_DATA_UINT64);
873 	kstat_named_init(&ks_data->sts_io_errors,
874 	    "total_io_errors", KSTAT_DATA_UINT32);
875 	kstat_named_init(&ks_data->sts_unsupp_errors,
876 	    "total_unsupp_errors", KSTAT_DATA_UINT32);
877 	kstat_named_init(&ks_data->sts_nxio_errors,
878 	    "total_nxio_errors", KSTAT_DATA_UINT32);
879 	kstat_named_init(&ks_data->sts_rw_cacheflush,
880 	    "total_rw_cacheflush", KSTAT_DATA_UINT64);
881 	kstat_named_init(&ks_data->sts_rw_cookiesmax,
882 	    "max_rw_cookies", KSTAT_DATA_UINT32);
883 	kstat_named_init(&ks_data->sts_intr_queuemax,
884 	    "max_intr_queue", KSTAT_DATA_UINT32);
885 	sc->ks_data = ks_data;
886 	sc->sc_intrstat->ks_private = sc;
887 	sc->sc_intrstat->ks_update = vioblk_ksupdate;
888 	kstat_install(sc->sc_intrstat);
889 
890 	/* map BAR0 */
891 	ret = ddi_regs_map_setup(devinfo, 1,
892 	    (caddr_t *)&sc->sc_virtio.sc_io_addr,
893 	    0, 0, &vioblk_attr, &sc->sc_virtio.sc_ioh);
894 	if (ret != DDI_SUCCESS) {
895 		dev_err(devinfo, CE_WARN, "unable to map bar0: [%d]", ret);
896 		goto exit_map;
897 	}
898 
899 	virtio_device_reset(&sc->sc_virtio);
900 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
901 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
902 
903 	if (vioblk_register_ints(sc)) {
904 		dev_err(devinfo, CE_WARN, "Unable to add interrupt");
905 		goto exit_int;
906 	}
907 
908 	ret = vioblk_dev_features(sc);
909 	if (ret)
910 		goto exit_features;
911 
912 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_RO)
913 		sc->sc_readonly = B_TRUE;
914 	else
915 		sc->sc_readonly = B_FALSE;
916 
917 	sc->sc_capacity = virtio_read_device_config_8(&sc->sc_virtio,
918 	    VIRTIO_BLK_CONFIG_CAPACITY);
919 	sc->sc_nblks = sc->sc_capacity;
920 
921 	sc->sc_blk_size = DEV_BSIZE;
922 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_BLK_SIZE) {
923 		sc->sc_blk_size = virtio_read_device_config_4(&sc->sc_virtio,
924 		    VIRTIO_BLK_CONFIG_BLK_SIZE);
925 	}
926 
927 	sc->sc_pblk_size = sc->sc_blk_size;
928 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_TOPOLOGY) {
929 		sc->sc_pblk_size <<= virtio_read_device_config_1(&sc->sc_virtio,
930 		    VIRTIO_BLK_CONFIG_TOPO_PBEXP);
931 	}
932 
933 	/* Flushing is not supported. */
934 	if (!(sc->sc_virtio.sc_features & VIRTIO_BLK_F_FLUSH)) {
935 		vioblk_ops.o_sync_cache = NULL;
936 	}
937 
938 	sc->sc_seg_max = DEF_MAXINDIRECT;
939 	/* The max number of segments (cookies) in a request */
940 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SEG_MAX) {
941 		sc->sc_seg_max = virtio_read_device_config_4(&sc->sc_virtio,
942 		    VIRTIO_BLK_CONFIG_SEG_MAX);
943 
944 		/* That's what Linux does. */
945 		if (!sc->sc_seg_max)
946 			sc->sc_seg_max = 1;
947 
948 		/*
949 		 * SEG_MAX corresponds to the number of _data_
950 		 * blocks in a request
951 		 */
952 		sc->sc_seg_max += 2;
953 	}
954 	/* 2 descriptors taken for header/status */
955 	vioblk_bd_dma_attr.dma_attr_sgllen = sc->sc_seg_max - 2;
956 
957 
958 	/* The maximum size for a cookie in a request. */
959 	sc->sc_seg_size_max = DEF_MAXSECTOR;
960 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SIZE_MAX) {
961 		sc->sc_seg_size_max = virtio_read_device_config_4(
962 		    &sc->sc_virtio, VIRTIO_BLK_CONFIG_SIZE_MAX);
963 	}
964 
965 	/* The maximum request size */
966 	vioblk_bd_dma_attr.dma_attr_maxxfer =
967 	    vioblk_bd_dma_attr.dma_attr_sgllen * sc->sc_seg_size_max;
968 
969 	dev_debug(devinfo, CE_NOTE,
970 	    "nblks=%" PRIu64 " blksize=%d (%d) num_seg=%d, "
971 	    "seg_size=%d, maxxfer=%" PRIu64,
972 	    sc->sc_nblks, sc->sc_blk_size, sc->sc_pblk_size,
973 	    vioblk_bd_dma_attr.dma_attr_sgllen,
974 	    sc->sc_seg_size_max,
975 	    vioblk_bd_dma_attr.dma_attr_maxxfer);
976 
977 
978 	sc->sc_vq = virtio_alloc_vq(&sc->sc_virtio, 0, 0,
979 	    sc->sc_seg_max, "I/O request");
980 	if (sc->sc_vq == NULL) {
981 		goto exit_alloc1;
982 	}
983 
984 	ret = vioblk_alloc_reqs(sc);
985 	if (ret) {
986 		goto exit_alloc2;
987 	}
988 
989 	sc->bd_h = bd_alloc_handle(sc, &vioblk_ops, &vioblk_bd_dma_attr,
990 	    KM_SLEEP);
991 
992 
993 	virtio_set_status(&sc->sc_virtio,
994 	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
995 	virtio_start_vq_intr(sc->sc_vq);
996 
997 	ret = virtio_enable_ints(&sc->sc_virtio);
998 	if (ret)
999 		goto exit_enable_ints;
1000 
1001 	ret = bd_attach_handle(devinfo, sc->bd_h);
1002 	if (ret != DDI_SUCCESS) {
1003 		dev_err(devinfo, CE_WARN, "Failed to attach blkdev");
1004 		goto exit_attach_bd;
1005 	}
1006 
1007 	return (DDI_SUCCESS);
1008 
1009 exit_attach_bd:
1010 	/*
1011 	 * There is no virtio_disable_ints(), it's done in virtio_release_ints.
1012 	 * If they ever get split, don't forget to add a call here.
1013 	 */
1014 exit_enable_ints:
1015 	virtio_stop_vq_intr(sc->sc_vq);
1016 	bd_free_handle(sc->bd_h);
1017 	vioblk_free_reqs(sc);
1018 exit_alloc2:
1019 	virtio_free_vq(sc->sc_vq);
1020 exit_alloc1:
1021 exit_features:
1022 	virtio_release_ints(&sc->sc_virtio);
1023 exit_int:
1024 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1025 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1026 exit_map:
1027 	kstat_delete(sc->sc_intrstat);
1028 exit_intrstat:
1029 	mutex_destroy(&sc->lock_devid);
1030 	cv_destroy(&sc->cv_devid);
1031 	kmem_free(sc, sizeof (struct vioblk_softc));
1032 exit:
1033 	return (ret);
1034 }
1035 
1036 static int
1037 vioblk_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1038 {
1039 	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1040 
1041 	switch (cmd) {
1042 	case DDI_DETACH:
1043 		break;
1044 
1045 	case DDI_PM_SUSPEND:
1046 		cmn_err(CE_WARN, "suspend not supported yet");
1047 		return (DDI_FAILURE);
1048 
1049 	default:
1050 		cmn_err(CE_WARN, "cmd 0x%x unrecognized", cmd);
1051 		return (DDI_FAILURE);
1052 	}
1053 
1054 	(void) bd_detach_handle(sc->bd_h);
1055 	virtio_stop_vq_intr(sc->sc_vq);
1056 	virtio_release_ints(&sc->sc_virtio);
1057 	vioblk_free_reqs(sc);
1058 	virtio_free_vq(sc->sc_vq);
1059 	virtio_device_reset(&sc->sc_virtio);
1060 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1061 	kstat_delete(sc->sc_intrstat);
1062 	kmem_free(sc, sizeof (struct vioblk_softc));
1063 
1064 	return (DDI_SUCCESS);
1065 }
1066 
1067 static int
1068 vioblk_quiesce(dev_info_t *devinfo)
1069 {
1070 	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1071 
1072 	virtio_stop_vq_intr(sc->sc_vq);
1073 	virtio_device_reset(&sc->sc_virtio);
1074 
1075 	return (DDI_SUCCESS);
1076 }
1077 
1078 int
1079 _init(void)
1080 {
1081 	int rv;
1082 
1083 	bd_mod_init(&vioblk_dev_ops);
1084 
1085 	if ((rv = mod_install(&modlinkage)) != 0) {
1086 		bd_mod_fini(&vioblk_dev_ops);
1087 	}
1088 
1089 	return (rv);
1090 }
1091 
1092 int
1093 _fini(void)
1094 {
1095 	int rv;
1096 
1097 	if ((rv = mod_remove(&modlinkage)) == 0) {
1098 		bd_mod_fini(&vioblk_dev_ops);
1099 	}
1100 
1101 	return (rv);
1102 }
1103 
1104 int
1105 _info(struct modinfo *modinfop)
1106 {
1107 	return (mod_info(&modlinkage, modinfop));
1108 }
1109