xref: /illumos-gate/usr/src/uts/common/io/vioblk/vioblk.c (revision f18d8787c0ba765f61b003e2aae78db90b48f833)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
25  * Copyright 2017, Joyent Inc.
26  */
27 
28 
29 #include <sys/modctl.h>
30 #include <sys/blkdev.h>
31 #include <sys/types.h>
32 #include <sys/errno.h>
33 #include <sys/param.h>
34 #include <sys/stropts.h>
35 #include <sys/stream.h>
36 #include <sys/strsubr.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/devops.h>
40 #include <sys/ksynch.h>
41 #include <sys/stat.h>
42 #include <sys/modctl.h>
43 #include <sys/debug.h>
44 #include <sys/pci.h>
45 #include <sys/containerof.h>
46 #include "virtiovar.h"
47 #include "virtioreg.h"
48 
49 /* Feature bits */
50 #define	VIRTIO_BLK_F_BARRIER	(1<<0)
51 #define	VIRTIO_BLK_F_SIZE_MAX	(1<<1)
52 #define	VIRTIO_BLK_F_SEG_MAX	(1<<2)
53 #define	VIRTIO_BLK_F_GEOMETRY	(1<<4)
54 #define	VIRTIO_BLK_F_RO		(1<<5)
55 #define	VIRTIO_BLK_F_BLK_SIZE	(1<<6)
56 #define	VIRTIO_BLK_F_SCSI	(1<<7)
57 #define	VIRTIO_BLK_F_FLUSH	(1<<9)
58 #define	VIRTIO_BLK_F_TOPOLOGY	(1<<10)
59 
60 /* Configuration registers */
61 #define	VIRTIO_BLK_CONFIG_CAPACITY	0 /* 64bit */
62 #define	VIRTIO_BLK_CONFIG_SIZE_MAX	8 /* 32bit */
63 #define	VIRTIO_BLK_CONFIG_SEG_MAX	12 /* 32bit */
64 #define	VIRTIO_BLK_CONFIG_GEOMETRY_C	16 /* 16bit */
65 #define	VIRTIO_BLK_CONFIG_GEOMETRY_H	18 /* 8bit */
66 #define	VIRTIO_BLK_CONFIG_GEOMETRY_S	19 /* 8bit */
67 #define	VIRTIO_BLK_CONFIG_BLK_SIZE	20 /* 32bit */
68 #define	VIRTIO_BLK_CONFIG_TOPO_PBEXP	24 /* 8bit */
69 #define	VIRTIO_BLK_CONFIG_TOPO_ALIGN	25 /* 8bit */
70 #define	VIRTIO_BLK_CONFIG_TOPO_MIN_SZ	26 /* 16bit */
71 #define	VIRTIO_BLK_CONFIG_TOPO_OPT_SZ	28 /* 32bit */
72 
73 /* Command */
74 #define	VIRTIO_BLK_T_IN			0
75 #define	VIRTIO_BLK_T_OUT		1
76 #define	VIRTIO_BLK_T_SCSI_CMD		2
77 #define	VIRTIO_BLK_T_SCSI_CMD_OUT	3
78 #define	VIRTIO_BLK_T_FLUSH		4
79 #define	VIRTIO_BLK_T_FLUSH_OUT		5
80 #define	VIRTIO_BLK_T_GET_ID		8
81 #define	VIRTIO_BLK_T_BARRIER		0x80000000
82 
83 #define	VIRTIO_BLK_ID_BYTES	20 /* devid */
84 
85 /* Statuses */
86 #define	VIRTIO_BLK_S_OK		0
87 #define	VIRTIO_BLK_S_IOERR	1
88 #define	VIRTIO_BLK_S_UNSUPP	2
89 
90 #define	DEF_MAXINDIRECT		(128)
91 #define	DEF_MAXSECTOR		(4096)
92 
93 #define	VIOBLK_POISON		0xdead0001dead0001
94 
95 /*
96  * Static Variables.
97  */
98 static char vioblk_ident[] = "VirtIO block driver";
99 
100 /* Request header structure */
101 struct vioblk_req_hdr {
102 	uint32_t		type;   /* VIRTIO_BLK_T_* */
103 	uint32_t		ioprio;
104 	uint64_t		sector;
105 };
106 
107 struct vioblk_req {
108 	struct vioblk_req_hdr	hdr;
109 	uint8_t			status;
110 	uint8_t			unused[3];
111 	unsigned int		ndmac;
112 	ddi_dma_handle_t	dmah;
113 	ddi_dma_handle_t	bd_dmah;
114 	ddi_dma_cookie_t	dmac;
115 	bd_xfer_t		*xfer;
116 };
117 
118 struct vioblk_stats {
119 	struct kstat_named	sts_rw_outofmemory;
120 	struct kstat_named	sts_rw_badoffset;
121 	struct kstat_named	sts_rw_queuemax;
122 	struct kstat_named	sts_rw_cookiesmax;
123 	struct kstat_named	sts_rw_cacheflush;
124 	struct kstat_named	sts_intr_queuemax;
125 	struct kstat_named	sts_intr_total;
126 	struct kstat_named	sts_io_errors;
127 	struct kstat_named	sts_unsupp_errors;
128 	struct kstat_named	sts_nxio_errors;
129 };
130 
131 struct vioblk_lstats {
132 	uint64_t		rw_cacheflush;
133 	uint64_t		intr_total;
134 	unsigned int		rw_cookiesmax;
135 	unsigned int		intr_queuemax;
136 	unsigned int		io_errors;
137 	unsigned int		unsupp_errors;
138 	unsigned int		nxio_errors;
139 };
140 
141 struct vioblk_softc {
142 	dev_info_t		*sc_dev; /* mirrors virtio_softc->sc_dev */
143 	struct virtio_softc	sc_virtio;
144 	struct virtqueue	*sc_vq;
145 	bd_handle_t		bd_h;
146 	struct vioblk_req	*sc_reqs;
147 	struct vioblk_stats	*ks_data;
148 	kstat_t			*sc_intrstat;
149 	uint64_t		sc_capacity;
150 	uint64_t		sc_nblks;
151 	struct vioblk_lstats	sc_stats;
152 	short			sc_blkflags;
153 	boolean_t		sc_in_poll_mode;
154 	boolean_t		sc_readonly;
155 	int			sc_blk_size;
156 	int			sc_pblk_size;
157 	int			sc_seg_max;
158 	int			sc_seg_size_max;
159 	kmutex_t		lock_devid;
160 	kcondvar_t		cv_devid;
161 	char			devid[VIRTIO_BLK_ID_BYTES + 1];
162 };
163 
164 static int vioblk_get_id(struct vioblk_softc *sc);
165 
166 static int vioblk_read(void *arg, bd_xfer_t *xfer);
167 static int vioblk_write(void *arg, bd_xfer_t *xfer);
168 static int vioblk_flush(void *arg, bd_xfer_t *xfer);
169 static void vioblk_driveinfo(void *arg, bd_drive_t *drive);
170 static int vioblk_mediainfo(void *arg, bd_media_t *media);
171 static int vioblk_devid_init(void *, dev_info_t *, ddi_devid_t *);
172 uint_t vioblk_int_handler(caddr_t arg1, caddr_t arg2);
173 
174 static bd_ops_t vioblk_ops = {
175 	BD_OPS_VERSION_0,
176 	vioblk_driveinfo,
177 	vioblk_mediainfo,
178 	vioblk_devid_init,
179 	vioblk_flush,
180 	vioblk_read,
181 	vioblk_write,
182 };
183 
184 static int vioblk_quiesce(dev_info_t *);
185 static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
186 static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
187 
188 static struct dev_ops vioblk_dev_ops = {
189 	DEVO_REV,
190 	0,
191 	ddi_no_info,
192 	nulldev,	/* identify */
193 	nulldev,	/* probe */
194 	vioblk_attach,	/* attach */
195 	vioblk_detach,	/* detach */
196 	nodev,		/* reset */
197 	NULL,		/* cb_ops */
198 	NULL,		/* bus_ops */
199 	NULL,		/* power */
200 	vioblk_quiesce	/* quiesce */
201 };
202 
203 
204 
205 /* Standard Module linkage initialization for a Streams driver */
206 extern struct mod_ops mod_driverops;
207 
208 static struct modldrv modldrv = {
209 	&mod_driverops,		/* Type of module.  This one is a driver */
210 	vioblk_ident,    /* short description */
211 	&vioblk_dev_ops	/* driver specific ops */
212 };
213 
214 static struct modlinkage modlinkage = {
215 	MODREV_1,
216 	{
217 		(void *)&modldrv,
218 		NULL,
219 	},
220 };
221 
222 ddi_device_acc_attr_t vioblk_attr = {
223 	DDI_DEVICE_ATTR_V0,
224 	DDI_NEVERSWAP_ACC,	/* virtio is always native byte order */
225 	DDI_STORECACHING_OK_ACC,
226 	DDI_DEFAULT_ACC
227 };
228 
229 /* DMA attr for the header/status blocks. */
230 static ddi_dma_attr_t vioblk_req_dma_attr = {
231 	DMA_ATTR_V0,			/* dma_attr version	*/
232 	0,				/* dma_attr_addr_lo	*/
233 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
234 	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
235 	1,				/* dma_attr_align	*/
236 	1,				/* dma_attr_burstsizes	*/
237 	1,				/* dma_attr_minxfer	*/
238 	0xFFFFFFFFull,			/* dma_attr_maxxfer	*/
239 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
240 	1,				/* dma_attr_sgllen	*/
241 	1,				/* dma_attr_granular	*/
242 	0,				/* dma_attr_flags	*/
243 };
244 
245 /* DMA attr for the data blocks. */
246 static ddi_dma_attr_t vioblk_bd_dma_attr = {
247 	DMA_ATTR_V0,			/* dma_attr version	*/
248 	0,				/* dma_attr_addr_lo	*/
249 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
250 	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
251 	1,				/* dma_attr_align	*/
252 	1,				/* dma_attr_burstsizes	*/
253 	1,				/* dma_attr_minxfer	*/
254 	0,				/* dma_attr_maxxfer, set in attach */
255 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
256 	0,				/* dma_attr_sgllen, set in attach */
257 	1,				/* dma_attr_granular	*/
258 	0,				/* dma_attr_flags	*/
259 };
260 
261 static int
262 vioblk_rw(struct vioblk_softc *sc, bd_xfer_t *xfer, int type,
263     uint32_t len)
264 {
265 	struct vioblk_req *req;
266 	struct vq_entry *ve_hdr;
267 	int total_cookies, write;
268 
269 	write = (type == VIRTIO_BLK_T_OUT ||
270 	    type == VIRTIO_BLK_T_FLUSH_OUT) ? 1 : 0;
271 	total_cookies = 2;
272 
273 	if ((xfer->x_blkno + xfer->x_nblks) > sc->sc_nblks) {
274 		sc->ks_data->sts_rw_badoffset.value.ui64++;
275 		return (EINVAL);
276 	}
277 
278 	/* allocate top entry */
279 	ve_hdr = vq_alloc_entry(sc->sc_vq);
280 	if (!ve_hdr) {
281 		sc->ks_data->sts_rw_outofmemory.value.ui64++;
282 		return (ENOMEM);
283 	}
284 
285 	/* getting request */
286 	req = &sc->sc_reqs[ve_hdr->qe_index];
287 	req->hdr.type = type;
288 	req->hdr.ioprio = 0;
289 	req->hdr.sector = xfer->x_blkno;
290 	req->xfer = xfer;
291 
292 	/* Header */
293 	virtio_ve_add_indirect_buf(ve_hdr, req->dmac.dmac_laddress,
294 	    sizeof (struct vioblk_req_hdr), B_TRUE);
295 
296 	/* Payload */
297 	if (len > 0) {
298 		virtio_ve_add_cookie(ve_hdr, xfer->x_dmah, xfer->x_dmac,
299 		    xfer->x_ndmac, write ? B_TRUE : B_FALSE);
300 		total_cookies += xfer->x_ndmac;
301 	}
302 
303 	/* Status */
304 	virtio_ve_add_indirect_buf(ve_hdr,
305 	    req->dmac.dmac_laddress + sizeof (struct vioblk_req_hdr),
306 	    sizeof (uint8_t), B_FALSE);
307 
308 	/* sending the whole chain to the device */
309 	virtio_push_chain(ve_hdr, B_TRUE);
310 
311 	if (sc->sc_stats.rw_cookiesmax < total_cookies)
312 		sc->sc_stats.rw_cookiesmax = total_cookies;
313 
314 	return (DDI_SUCCESS);
315 }
316 
317 /*
318  * Now in polling mode. Interrupts are off, so we
319  * 1) poll for the already queued requests to complete.
320  * 2) push our request.
321  * 3) wait for our request to complete.
322  */
323 static int
324 vioblk_rw_poll(struct vioblk_softc *sc, bd_xfer_t *xfer,
325     int type, uint32_t len)
326 {
327 	clock_t tmout;
328 	int ret;
329 
330 	ASSERT(xfer->x_flags & BD_XFER_POLL);
331 
332 	/* Prevent a hard hang. */
333 	tmout = drv_usectohz(30000000);
334 
335 	/* Poll for an empty queue */
336 	while (vq_num_used(sc->sc_vq)) {
337 		/* Check if any pending requests completed. */
338 		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
339 		if (ret != DDI_INTR_CLAIMED) {
340 			drv_usecwait(10);
341 			tmout -= 10;
342 			return (ETIMEDOUT);
343 		}
344 	}
345 
346 	ret = vioblk_rw(sc, xfer, type, len);
347 	if (ret)
348 		return (ret);
349 
350 	tmout = drv_usectohz(30000000);
351 	/* Poll for an empty queue again. */
352 	while (vq_num_used(sc->sc_vq)) {
353 		/* Check if any pending requests completed. */
354 		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
355 		if (ret != DDI_INTR_CLAIMED) {
356 			drv_usecwait(10);
357 			tmout -= 10;
358 			return (ETIMEDOUT);
359 		}
360 	}
361 
362 	return (DDI_SUCCESS);
363 }
364 
365 static int
366 vioblk_read(void *arg, bd_xfer_t *xfer)
367 {
368 	int ret;
369 	struct vioblk_softc *sc = (void *)arg;
370 
371 	if (xfer->x_flags & BD_XFER_POLL) {
372 		if (!sc->sc_in_poll_mode) {
373 			virtio_stop_vq_intr(sc->sc_vq);
374 			sc->sc_in_poll_mode = 1;
375 		}
376 
377 		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_IN,
378 		    xfer->x_nblks * DEV_BSIZE);
379 	} else {
380 		if (sc->sc_in_poll_mode) {
381 			virtio_start_vq_intr(sc->sc_vq);
382 			sc->sc_in_poll_mode = 0;
383 		}
384 
385 		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_IN,
386 		    xfer->x_nblks * DEV_BSIZE);
387 	}
388 
389 	return (ret);
390 }
391 
392 static int
393 vioblk_write(void *arg, bd_xfer_t *xfer)
394 {
395 	int ret;
396 	struct vioblk_softc *sc = (void *)arg;
397 
398 	if (xfer->x_flags & BD_XFER_POLL) {
399 		if (!sc->sc_in_poll_mode) {
400 			virtio_stop_vq_intr(sc->sc_vq);
401 			sc->sc_in_poll_mode = 1;
402 		}
403 
404 		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_OUT,
405 		    xfer->x_nblks * DEV_BSIZE);
406 	} else {
407 		if (sc->sc_in_poll_mode) {
408 			virtio_start_vq_intr(sc->sc_vq);
409 			sc->sc_in_poll_mode = 0;
410 		}
411 
412 		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_OUT,
413 		    xfer->x_nblks * DEV_BSIZE);
414 	}
415 	return (ret);
416 }
417 
418 static int
419 vioblk_flush(void *arg, bd_xfer_t *xfer)
420 {
421 	int ret;
422 	struct vioblk_softc *sc = (void *)arg;
423 
424 	ASSERT((xfer->x_flags & BD_XFER_POLL) == 0);
425 
426 	ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_FLUSH_OUT,
427 	    xfer->x_nblks * DEV_BSIZE);
428 
429 	if (!ret)
430 		sc->sc_stats.rw_cacheflush++;
431 
432 	return (ret);
433 }
434 
435 
436 static void
437 vioblk_driveinfo(void *arg, bd_drive_t *drive)
438 {
439 	struct vioblk_softc *sc = (void *)arg;
440 
441 	drive->d_qsize = sc->sc_vq->vq_num;
442 	drive->d_removable = B_FALSE;
443 	drive->d_hotpluggable = B_TRUE;
444 	drive->d_target = 0;
445 	drive->d_lun = 0;
446 
447 	drive->d_vendor = "Virtio";
448 	drive->d_vendor_len = strlen(drive->d_vendor);
449 
450 	drive->d_product = "Block Device";
451 	drive->d_product_len = strlen(drive->d_product);
452 
453 	(void) vioblk_get_id(sc);
454 	drive->d_serial = sc->devid;
455 	drive->d_serial_len = strlen(drive->d_serial);
456 
457 	drive->d_revision = "0000";
458 	drive->d_revision_len = strlen(drive->d_revision);
459 }
460 
461 static int
462 vioblk_mediainfo(void *arg, bd_media_t *media)
463 {
464 	struct vioblk_softc *sc = (void *)arg;
465 
466 	media->m_nblks = sc->sc_nblks;
467 	media->m_blksize = sc->sc_blk_size;
468 	media->m_readonly = sc->sc_readonly;
469 	media->m_pblksize = sc->sc_pblk_size;
470 	return (0);
471 }
472 
473 static int
474 vioblk_get_id(struct vioblk_softc *sc)
475 {
476 	clock_t deadline;
477 	int ret;
478 	bd_xfer_t xfer;
479 
480 	deadline = ddi_get_lbolt() + (clock_t)drv_usectohz(3 * 1000000);
481 	(void) memset(&xfer, 0, sizeof (bd_xfer_t));
482 	xfer.x_nblks = 1;
483 
484 	ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_bd_dma_attr,
485 	    DDI_DMA_SLEEP, NULL, &xfer.x_dmah);
486 	if (ret != DDI_SUCCESS)
487 		goto out_alloc;
488 
489 	ret = ddi_dma_addr_bind_handle(xfer.x_dmah, NULL, (caddr_t)&sc->devid,
490 	    VIRTIO_BLK_ID_BYTES, DDI_DMA_READ | DDI_DMA_CONSISTENT,
491 	    DDI_DMA_SLEEP, NULL, &xfer.x_dmac, &xfer.x_ndmac);
492 	if (ret != DDI_DMA_MAPPED) {
493 		ret = DDI_FAILURE;
494 		goto out_map;
495 	}
496 
497 	mutex_enter(&sc->lock_devid);
498 
499 	ret = vioblk_rw(sc, &xfer, VIRTIO_BLK_T_GET_ID,
500 	    VIRTIO_BLK_ID_BYTES);
501 	if (ret) {
502 		mutex_exit(&sc->lock_devid);
503 		goto out_rw;
504 	}
505 
506 	/* wait for reply */
507 	ret = cv_timedwait(&sc->cv_devid, &sc->lock_devid, deadline);
508 	mutex_exit(&sc->lock_devid);
509 
510 	(void) ddi_dma_unbind_handle(xfer.x_dmah);
511 	ddi_dma_free_handle(&xfer.x_dmah);
512 
513 	/* timeout */
514 	if (ret < 0) {
515 		dev_err(sc->sc_dev, CE_WARN,
516 		    "Cannot get devid from the device");
517 		return (DDI_FAILURE);
518 	}
519 
520 	return (0);
521 
522 out_rw:
523 	(void) ddi_dma_unbind_handle(xfer.x_dmah);
524 out_map:
525 	ddi_dma_free_handle(&xfer.x_dmah);
526 out_alloc:
527 	return (ret);
528 }
529 
530 static int
531 vioblk_devid_init(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
532 {
533 	struct vioblk_softc *sc = (void *)arg;
534 	int ret;
535 
536 	ret = vioblk_get_id(sc);
537 	if (ret != DDI_SUCCESS)
538 		return (ret);
539 
540 	ret = ddi_devid_init(devinfo, DEVID_ATA_SERIAL,
541 	    VIRTIO_BLK_ID_BYTES, sc->devid, devid);
542 	if (ret != DDI_SUCCESS) {
543 		dev_err(devinfo, CE_WARN, "Cannot build devid from the device");
544 		return (ret);
545 	}
546 
547 	dev_debug(sc->sc_dev, CE_NOTE,
548 	    "devid %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x",
549 	    sc->devid[0], sc->devid[1], sc->devid[2], sc->devid[3],
550 	    sc->devid[4], sc->devid[5], sc->devid[6], sc->devid[7],
551 	    sc->devid[8], sc->devid[9], sc->devid[10], sc->devid[11],
552 	    sc->devid[12], sc->devid[13], sc->devid[14], sc->devid[15],
553 	    sc->devid[16], sc->devid[17], sc->devid[18], sc->devid[19]);
554 
555 	return (0);
556 }
557 
558 static void
559 vioblk_show_features(struct vioblk_softc *sc, const char *prefix,
560     uint32_t features)
561 {
562 	char buf[512];
563 	char *bufp = buf;
564 	char *bufend = buf + sizeof (buf);
565 
566 	/* LINTED E_PTRDIFF_OVERFLOW */
567 	bufp += snprintf(bufp, bufend - bufp, prefix);
568 
569 	/* LINTED E_PTRDIFF_OVERFLOW */
570 	bufp += virtio_show_features(features, bufp, bufend - bufp);
571 
572 
573 	/* LINTED E_PTRDIFF_OVERFLOW */
574 	bufp += snprintf(bufp, bufend - bufp, "Vioblk ( ");
575 
576 	if (features & VIRTIO_BLK_F_BARRIER)
577 		/* LINTED E_PTRDIFF_OVERFLOW */
578 		bufp += snprintf(bufp, bufend - bufp, "BARRIER ");
579 	if (features & VIRTIO_BLK_F_SIZE_MAX)
580 		/* LINTED E_PTRDIFF_OVERFLOW */
581 		bufp += snprintf(bufp, bufend - bufp, "SIZE_MAX ");
582 	if (features & VIRTIO_BLK_F_SEG_MAX)
583 		/* LINTED E_PTRDIFF_OVERFLOW */
584 		bufp += snprintf(bufp, bufend - bufp, "SEG_MAX ");
585 	if (features & VIRTIO_BLK_F_GEOMETRY)
586 		/* LINTED E_PTRDIFF_OVERFLOW */
587 		bufp += snprintf(bufp, bufend - bufp, "GEOMETRY ");
588 	if (features & VIRTIO_BLK_F_RO)
589 		/* LINTED E_PTRDIFF_OVERFLOW */
590 		bufp += snprintf(bufp, bufend - bufp, "RO ");
591 	if (features & VIRTIO_BLK_F_BLK_SIZE)
592 		/* LINTED E_PTRDIFF_OVERFLOW */
593 		bufp += snprintf(bufp, bufend - bufp, "BLK_SIZE ");
594 	if (features & VIRTIO_BLK_F_SCSI)
595 		/* LINTED E_PTRDIFF_OVERFLOW */
596 		bufp += snprintf(bufp, bufend - bufp, "SCSI ");
597 	if (features & VIRTIO_BLK_F_FLUSH)
598 		/* LINTED E_PTRDIFF_OVERFLOW */
599 		bufp += snprintf(bufp, bufend - bufp, "FLUSH ");
600 	if (features & VIRTIO_BLK_F_TOPOLOGY)
601 		/* LINTED E_PTRDIFF_OVERFLOW */
602 		bufp += snprintf(bufp, bufend - bufp, "TOPOLOGY ");
603 
604 	/* LINTED E_PTRDIFF_OVERFLOW */
605 	bufp += snprintf(bufp, bufend - bufp, ")");
606 	*bufp = '\0';
607 
608 	dev_debug(sc->sc_dev, CE_NOTE, "%s", buf);
609 }
610 
611 static int
612 vioblk_dev_features(struct vioblk_softc *sc)
613 {
614 	uint32_t host_features;
615 
616 	host_features = virtio_negotiate_features(&sc->sc_virtio,
617 	    VIRTIO_BLK_F_RO |
618 	    VIRTIO_BLK_F_GEOMETRY |
619 	    VIRTIO_BLK_F_BLK_SIZE |
620 	    VIRTIO_BLK_F_FLUSH |
621 	    VIRTIO_BLK_F_TOPOLOGY |
622 	    VIRTIO_BLK_F_SEG_MAX |
623 	    VIRTIO_BLK_F_SIZE_MAX |
624 	    VIRTIO_F_RING_INDIRECT_DESC);
625 
626 	vioblk_show_features(sc, "Host features: ", host_features);
627 	vioblk_show_features(sc, "Negotiated features: ",
628 	    sc->sc_virtio.sc_features);
629 
630 	if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
631 		dev_err(sc->sc_dev, CE_NOTE,
632 		    "Host does not support RING_INDIRECT_DESC, bye.");
633 		return (DDI_FAILURE);
634 	}
635 
636 	return (DDI_SUCCESS);
637 }
638 
639 /* ARGSUSED */
640 uint_t
641 vioblk_int_handler(caddr_t arg1, caddr_t arg2)
642 {
643 	struct virtio_softc *vsc = (void *)arg1;
644 	struct vioblk_softc *sc = __containerof(vsc,
645 	    struct vioblk_softc, sc_virtio);
646 	struct vq_entry *ve;
647 	uint32_t len;
648 	int i = 0, error;
649 
650 	while ((ve = virtio_pull_chain(sc->sc_vq, &len))) {
651 		struct vioblk_req *req = &sc->sc_reqs[ve->qe_index];
652 		bd_xfer_t *xfer = req->xfer;
653 		uint8_t status = req->status;
654 		uint32_t type = req->hdr.type;
655 
656 		if (req->xfer == (void *)VIOBLK_POISON) {
657 			dev_err(sc->sc_dev, CE_WARN, "Poisoned descriptor!");
658 			virtio_free_chain(ve);
659 			return (DDI_INTR_CLAIMED);
660 		}
661 
662 		req->xfer = (void *) VIOBLK_POISON;
663 
664 		/* Note: blkdev tears down the payload mapping for us. */
665 		virtio_free_chain(ve);
666 
667 		/* returning payload back to blkdev */
668 		switch (status) {
669 			case VIRTIO_BLK_S_OK:
670 				error = 0;
671 				break;
672 			case VIRTIO_BLK_S_IOERR:
673 				error = EIO;
674 				sc->sc_stats.io_errors++;
675 				break;
676 			case VIRTIO_BLK_S_UNSUPP:
677 				sc->sc_stats.unsupp_errors++;
678 				error = ENOTTY;
679 				break;
680 			default:
681 				sc->sc_stats.nxio_errors++;
682 				error = ENXIO;
683 				break;
684 		}
685 
686 		if (type == VIRTIO_BLK_T_GET_ID) {
687 			/* notify devid_init */
688 			mutex_enter(&sc->lock_devid);
689 			cv_broadcast(&sc->cv_devid);
690 			mutex_exit(&sc->lock_devid);
691 		} else
692 			bd_xfer_done(xfer, error);
693 
694 		i++;
695 	}
696 
697 	/* update stats */
698 	if (sc->sc_stats.intr_queuemax < i)
699 		sc->sc_stats.intr_queuemax = i;
700 	sc->sc_stats.intr_total++;
701 
702 	return (DDI_INTR_CLAIMED);
703 }
704 
705 /* ARGSUSED */
706 uint_t
707 vioblk_config_handler(caddr_t arg1, caddr_t arg2)
708 {
709 	return (DDI_INTR_CLAIMED);
710 }
711 
712 static int
713 vioblk_register_ints(struct vioblk_softc *sc)
714 {
715 	int ret;
716 
717 	struct virtio_int_handler vioblk_conf_h = {
718 		vioblk_config_handler
719 	};
720 
721 	struct virtio_int_handler vioblk_vq_h[] = {
722 		{ vioblk_int_handler },
723 		{ NULL },
724 	};
725 
726 	ret = virtio_register_ints(&sc->sc_virtio,
727 	    &vioblk_conf_h, vioblk_vq_h);
728 
729 	return (ret);
730 }
731 
732 static void
733 vioblk_free_reqs(struct vioblk_softc *sc)
734 {
735 	int i, qsize;
736 
737 	qsize = sc->sc_vq->vq_num;
738 
739 	for (i = 0; i < qsize; i++) {
740 		struct vioblk_req *req = &sc->sc_reqs[i];
741 
742 		if (req->ndmac)
743 			(void) ddi_dma_unbind_handle(req->dmah);
744 
745 		if (req->dmah)
746 			ddi_dma_free_handle(&req->dmah);
747 	}
748 
749 	kmem_free(sc->sc_reqs, sizeof (struct vioblk_req) * qsize);
750 }
751 
752 static int
753 vioblk_alloc_reqs(struct vioblk_softc *sc)
754 {
755 	int i, qsize;
756 	int ret;
757 
758 	qsize = sc->sc_vq->vq_num;
759 
760 	sc->sc_reqs = kmem_zalloc(sizeof (struct vioblk_req) * qsize, KM_SLEEP);
761 
762 	for (i = 0; i < qsize; i++) {
763 		struct vioblk_req *req = &sc->sc_reqs[i];
764 
765 		ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_req_dma_attr,
766 		    DDI_DMA_SLEEP, NULL, &req->dmah);
767 		if (ret != DDI_SUCCESS) {
768 
769 			dev_err(sc->sc_dev, CE_WARN,
770 			    "Can't allocate dma handle for req "
771 			    "buffer %d", i);
772 			goto exit;
773 		}
774 
775 		ret = ddi_dma_addr_bind_handle(req->dmah, NULL,
776 		    (caddr_t)&req->hdr,
777 		    sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
778 		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
779 		    NULL, &req->dmac, &req->ndmac);
780 		if (ret != DDI_DMA_MAPPED) {
781 			dev_err(sc->sc_dev, CE_WARN,
782 			    "Can't bind req buffer %d", i);
783 			goto exit;
784 		}
785 	}
786 
787 	return (0);
788 
789 exit:
790 	vioblk_free_reqs(sc);
791 	return (ENOMEM);
792 }
793 
794 
795 static int
796 vioblk_ksupdate(kstat_t *ksp, int rw)
797 {
798 	struct vioblk_softc *sc = ksp->ks_private;
799 
800 	if (rw == KSTAT_WRITE)
801 		return (EACCES);
802 
803 	sc->ks_data->sts_rw_cookiesmax.value.ui32 = sc->sc_stats.rw_cookiesmax;
804 	sc->ks_data->sts_intr_queuemax.value.ui32 = sc->sc_stats.intr_queuemax;
805 	sc->ks_data->sts_unsupp_errors.value.ui32 = sc->sc_stats.unsupp_errors;
806 	sc->ks_data->sts_nxio_errors.value.ui32 = sc->sc_stats.nxio_errors;
807 	sc->ks_data->sts_io_errors.value.ui32 = sc->sc_stats.io_errors;
808 	sc->ks_data->sts_rw_cacheflush.value.ui64 = sc->sc_stats.rw_cacheflush;
809 	sc->ks_data->sts_intr_total.value.ui64 = sc->sc_stats.intr_total;
810 
811 
812 	return (0);
813 }
814 
815 static int
816 vioblk_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
817 {
818 	int ret = DDI_SUCCESS;
819 	int instance;
820 	struct vioblk_softc *sc;
821 	struct virtio_softc *vsc;
822 	struct vioblk_stats *ks_data;
823 
824 	instance = ddi_get_instance(devinfo);
825 
826 	switch (cmd) {
827 	case DDI_ATTACH:
828 		break;
829 
830 	case DDI_RESUME:
831 	case DDI_PM_RESUME:
832 		dev_err(devinfo, CE_WARN, "resume not supported yet");
833 		return (DDI_FAILURE);
834 
835 	default:
836 		dev_err(devinfo, CE_WARN, "cmd 0x%x not recognized", cmd);
837 		return (DDI_FAILURE);
838 	}
839 
840 	sc = kmem_zalloc(sizeof (struct vioblk_softc), KM_SLEEP);
841 	ddi_set_driver_private(devinfo, sc);
842 
843 	vsc = &sc->sc_virtio;
844 
845 	/* Duplicate for faster access / less typing */
846 	sc->sc_dev = devinfo;
847 	vsc->sc_dev = devinfo;
848 
849 	cv_init(&sc->cv_devid, NULL, CV_DRIVER, NULL);
850 	mutex_init(&sc->lock_devid, NULL, MUTEX_DRIVER, NULL);
851 
852 	/*
853 	 * Initialize interrupt kstat.  This should not normally fail, since
854 	 * we don't use a persistent stat.  We do it this way to avoid having
855 	 * to test for it at run time on the hot path.
856 	 */
857 	sc->sc_intrstat = kstat_create("vioblk", instance,
858 	    "intrs", "controller", KSTAT_TYPE_NAMED,
859 	    sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
860 	    KSTAT_FLAG_PERSISTENT);
861 	if (sc->sc_intrstat == NULL) {
862 		dev_err(devinfo, CE_WARN, "kstat_create failed");
863 		goto exit_intrstat;
864 	}
865 	ks_data = (struct vioblk_stats *)sc->sc_intrstat->ks_data;
866 	kstat_named_init(&ks_data->sts_rw_outofmemory,
867 	    "total_rw_outofmemory", KSTAT_DATA_UINT64);
868 	kstat_named_init(&ks_data->sts_rw_badoffset,
869 	    "total_rw_badoffset", KSTAT_DATA_UINT64);
870 	kstat_named_init(&ks_data->sts_intr_total,
871 	    "total_intr", KSTAT_DATA_UINT64);
872 	kstat_named_init(&ks_data->sts_io_errors,
873 	    "total_io_errors", KSTAT_DATA_UINT32);
874 	kstat_named_init(&ks_data->sts_unsupp_errors,
875 	    "total_unsupp_errors", KSTAT_DATA_UINT32);
876 	kstat_named_init(&ks_data->sts_nxio_errors,
877 	    "total_nxio_errors", KSTAT_DATA_UINT32);
878 	kstat_named_init(&ks_data->sts_rw_cacheflush,
879 	    "total_rw_cacheflush", KSTAT_DATA_UINT64);
880 	kstat_named_init(&ks_data->sts_rw_cookiesmax,
881 	    "max_rw_cookies", KSTAT_DATA_UINT32);
882 	kstat_named_init(&ks_data->sts_intr_queuemax,
883 	    "max_intr_queue", KSTAT_DATA_UINT32);
884 	sc->ks_data = ks_data;
885 	sc->sc_intrstat->ks_private = sc;
886 	sc->sc_intrstat->ks_update = vioblk_ksupdate;
887 	kstat_install(sc->sc_intrstat);
888 
889 	/* map BAR0 */
890 	ret = ddi_regs_map_setup(devinfo, 1,
891 	    (caddr_t *)&sc->sc_virtio.sc_io_addr,
892 	    0, 0, &vioblk_attr, &sc->sc_virtio.sc_ioh);
893 	if (ret != DDI_SUCCESS) {
894 		dev_err(devinfo, CE_WARN, "unable to map bar0: [%d]", ret);
895 		goto exit_map;
896 	}
897 
898 	virtio_device_reset(&sc->sc_virtio);
899 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
900 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
901 
902 	if (vioblk_register_ints(sc)) {
903 		dev_err(devinfo, CE_WARN, "Unable to add interrupt");
904 		goto exit_int;
905 	}
906 
907 	ret = vioblk_dev_features(sc);
908 	if (ret)
909 		goto exit_features;
910 
911 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_RO)
912 		sc->sc_readonly = B_TRUE;
913 	else
914 		sc->sc_readonly = B_FALSE;
915 
916 	sc->sc_capacity = virtio_read_device_config_8(&sc->sc_virtio,
917 	    VIRTIO_BLK_CONFIG_CAPACITY);
918 	sc->sc_nblks = sc->sc_capacity;
919 
920 	sc->sc_blk_size = DEV_BSIZE;
921 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_BLK_SIZE) {
922 		sc->sc_blk_size = virtio_read_device_config_4(&sc->sc_virtio,
923 		    VIRTIO_BLK_CONFIG_BLK_SIZE);
924 	}
925 
926 	sc->sc_pblk_size = sc->sc_blk_size;
927 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_TOPOLOGY) {
928 		sc->sc_pblk_size <<= virtio_read_device_config_1(&sc->sc_virtio,
929 		    VIRTIO_BLK_CONFIG_TOPO_PBEXP);
930 	}
931 
932 	/* Flushing is not supported. */
933 	if (!(sc->sc_virtio.sc_features & VIRTIO_BLK_F_FLUSH)) {
934 		vioblk_ops.o_sync_cache = NULL;
935 	}
936 
937 	sc->sc_seg_max = DEF_MAXINDIRECT;
938 	/* The max number of segments (cookies) in a request */
939 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SEG_MAX) {
940 		sc->sc_seg_max = virtio_read_device_config_4(&sc->sc_virtio,
941 		    VIRTIO_BLK_CONFIG_SEG_MAX);
942 
943 		/* That's what Linux does. */
944 		if (!sc->sc_seg_max)
945 			sc->sc_seg_max = 1;
946 
947 		/*
948 		 * SEG_MAX corresponds to the number of _data_
949 		 * blocks in a request
950 		 */
951 		sc->sc_seg_max += 2;
952 	}
953 	/* 2 descriptors taken for header/status */
954 	vioblk_bd_dma_attr.dma_attr_sgllen = sc->sc_seg_max - 2;
955 
956 
957 	/* The maximum size for a cookie in a request. */
958 	sc->sc_seg_size_max = DEF_MAXSECTOR;
959 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SIZE_MAX) {
960 		sc->sc_seg_size_max = virtio_read_device_config_4(
961 		    &sc->sc_virtio, VIRTIO_BLK_CONFIG_SIZE_MAX);
962 	}
963 
964 	/* The maximum request size */
965 	vioblk_bd_dma_attr.dma_attr_maxxfer =
966 	    vioblk_bd_dma_attr.dma_attr_sgllen * sc->sc_seg_size_max;
967 
968 	dev_debug(devinfo, CE_NOTE,
969 	    "nblks=%" PRIu64 " blksize=%d (%d) num_seg=%d, "
970 	    "seg_size=%d, maxxfer=%" PRIu64,
971 	    sc->sc_nblks, sc->sc_blk_size, sc->sc_pblk_size,
972 	    vioblk_bd_dma_attr.dma_attr_sgllen,
973 	    sc->sc_seg_size_max,
974 	    vioblk_bd_dma_attr.dma_attr_maxxfer);
975 
976 
977 	sc->sc_vq = virtio_alloc_vq(&sc->sc_virtio, 0, 0,
978 	    sc->sc_seg_max, "I/O request");
979 	if (sc->sc_vq == NULL) {
980 		goto exit_alloc1;
981 	}
982 
983 	ret = vioblk_alloc_reqs(sc);
984 	if (ret) {
985 		goto exit_alloc2;
986 	}
987 
988 	sc->bd_h = bd_alloc_handle(sc, &vioblk_ops, &vioblk_bd_dma_attr,
989 	    KM_SLEEP);
990 
991 
992 	virtio_set_status(&sc->sc_virtio,
993 	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
994 	virtio_start_vq_intr(sc->sc_vq);
995 
996 	ret = virtio_enable_ints(&sc->sc_virtio);
997 	if (ret)
998 		goto exit_enable_ints;
999 
1000 	ret = bd_attach_handle(devinfo, sc->bd_h);
1001 	if (ret != DDI_SUCCESS) {
1002 		dev_err(devinfo, CE_WARN, "Failed to attach blkdev");
1003 		goto exit_attach_bd;
1004 	}
1005 
1006 	return (DDI_SUCCESS);
1007 
1008 exit_attach_bd:
1009 	/*
1010 	 * There is no virtio_disable_ints(), it's done in virtio_release_ints.
1011 	 * If they ever get split, don't forget to add a call here.
1012 	 */
1013 exit_enable_ints:
1014 	virtio_stop_vq_intr(sc->sc_vq);
1015 	bd_free_handle(sc->bd_h);
1016 	vioblk_free_reqs(sc);
1017 exit_alloc2:
1018 	virtio_free_vq(sc->sc_vq);
1019 exit_alloc1:
1020 exit_features:
1021 	virtio_release_ints(&sc->sc_virtio);
1022 exit_int:
1023 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1024 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1025 exit_map:
1026 	kstat_delete(sc->sc_intrstat);
1027 exit_intrstat:
1028 	mutex_destroy(&sc->lock_devid);
1029 	cv_destroy(&sc->cv_devid);
1030 	kmem_free(sc, sizeof (struct vioblk_softc));
1031 	return (DDI_FAILURE);
1032 }
1033 
1034 static int
1035 vioblk_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1036 {
1037 	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1038 
1039 	switch (cmd) {
1040 	case DDI_DETACH:
1041 		break;
1042 
1043 	case DDI_PM_SUSPEND:
1044 		cmn_err(CE_WARN, "suspend not supported yet");
1045 		return (DDI_FAILURE);
1046 
1047 	default:
1048 		cmn_err(CE_WARN, "cmd 0x%x unrecognized", cmd);
1049 		return (DDI_FAILURE);
1050 	}
1051 
1052 	(void) bd_detach_handle(sc->bd_h);
1053 	virtio_stop_vq_intr(sc->sc_vq);
1054 	virtio_release_ints(&sc->sc_virtio);
1055 	vioblk_free_reqs(sc);
1056 	virtio_free_vq(sc->sc_vq);
1057 	virtio_device_reset(&sc->sc_virtio);
1058 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1059 	kstat_delete(sc->sc_intrstat);
1060 	kmem_free(sc, sizeof (struct vioblk_softc));
1061 
1062 	return (DDI_SUCCESS);
1063 }
1064 
1065 static int
1066 vioblk_quiesce(dev_info_t *devinfo)
1067 {
1068 	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1069 
1070 	virtio_stop_vq_intr(sc->sc_vq);
1071 	virtio_device_reset(&sc->sc_virtio);
1072 
1073 	return (DDI_SUCCESS);
1074 }
1075 
1076 int
1077 _init(void)
1078 {
1079 	int rv;
1080 
1081 	bd_mod_init(&vioblk_dev_ops);
1082 
1083 	if ((rv = mod_install(&modlinkage)) != 0) {
1084 		bd_mod_fini(&vioblk_dev_ops);
1085 	}
1086 
1087 	return (rv);
1088 }
1089 
1090 int
1091 _fini(void)
1092 {
1093 	int rv;
1094 
1095 	if ((rv = mod_remove(&modlinkage)) == 0) {
1096 		bd_mod_fini(&vioblk_dev_ops);
1097 	}
1098 
1099 	return (rv);
1100 }
1101 
1102 int
1103 _info(struct modinfo *modinfop)
1104 {
1105 	return (mod_info(&modlinkage, modinfop));
1106 }
1107