xref: /titanic_52/usr/src/uts/common/io/vioblk/vioblk.c (revision 694c35faa87b858ecdadfe4fc592615f4eefbb07)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2012, Nexenta Systems, Inc. All rights reserved.
24  * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
25  */
26 
27 
28 #include <sys/modctl.h>
29 #include <sys/blkdev.h>
30 #include <sys/types.h>
31 #include <sys/errno.h>
32 #include <sys/param.h>
33 #include <sys/stropts.h>
34 #include <sys/stream.h>
35 #include <sys/strsubr.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/devops.h>
39 #include <sys/ksynch.h>
40 #include <sys/stat.h>
41 #include <sys/modctl.h>
42 #include <sys/debug.h>
43 #include <sys/pci.h>
44 #include <sys/sysmacros.h>
45 #include "virtiovar.h"
46 #include "virtioreg.h"
47 
48 /* Feature bits */
49 #define	VIRTIO_BLK_F_BARRIER	(1<<0)
50 #define	VIRTIO_BLK_F_SIZE_MAX	(1<<1)
51 #define	VIRTIO_BLK_F_SEG_MAX	(1<<2)
52 #define	VIRTIO_BLK_F_GEOMETRY	(1<<4)
53 #define	VIRTIO_BLK_F_RO		(1<<5)
54 #define	VIRTIO_BLK_F_BLK_SIZE	(1<<6)
55 #define	VIRTIO_BLK_F_SCSI	(1<<7)
56 #define	VIRTIO_BLK_F_FLUSH	(1<<9)
57 #define	VIRTIO_BLK_F_TOPOLOGY	(1<<10)
58 
59 /* Configuration registers */
60 #define	VIRTIO_BLK_CONFIG_CAPACITY	0 /* 64bit */
61 #define	VIRTIO_BLK_CONFIG_SIZE_MAX	8 /* 32bit */
62 #define	VIRTIO_BLK_CONFIG_SEG_MAX	12 /* 32bit */
63 #define	VIRTIO_BLK_CONFIG_GEOMETRY_C	16 /* 16bit */
64 #define	VIRTIO_BLK_CONFIG_GEOMETRY_H	18 /* 8bit */
65 #define	VIRTIO_BLK_CONFIG_GEOMETRY_S	19 /* 8bit */
66 #define	VIRTIO_BLK_CONFIG_BLK_SIZE	20 /* 32bit */
67 #define	VIRTIO_BLK_CONFIG_TOPOLOGY	24 /* 32bit */
68 
69 /* Command */
70 #define	VIRTIO_BLK_T_IN			0
71 #define	VIRTIO_BLK_T_OUT		1
72 #define	VIRTIO_BLK_T_SCSI_CMD		2
73 #define	VIRTIO_BLK_T_SCSI_CMD_OUT	3
74 #define	VIRTIO_BLK_T_FLUSH		4
75 #define	VIRTIO_BLK_T_FLUSH_OUT		5
76 #define	VIRTIO_BLK_T_GET_ID		8
77 #define	VIRTIO_BLK_T_BARRIER		0x80000000
78 
79 #define	VIRTIO_BLK_ID_BYTES	20 /* devid */
80 
81 /* Statuses */
82 #define	VIRTIO_BLK_S_OK		0
83 #define	VIRTIO_BLK_S_IOERR	1
84 #define	VIRTIO_BLK_S_UNSUPP	2
85 
86 #define	DEF_MAXINDIRECT		(128)
87 #define	DEF_MAXSECTOR		(4096)
88 
89 #define	VIOBLK_POISON		0xdead0001dead0001
90 
91 /*
92  * Static Variables.
93  */
94 static char vioblk_ident[] = "VirtIO block driver";
95 
96 /* Request header structure */
97 struct vioblk_req_hdr {
98 	uint32_t		type;   /* VIRTIO_BLK_T_* */
99 	uint32_t		ioprio;
100 	uint64_t		sector;
101 };
102 
103 struct vioblk_req {
104 	struct vioblk_req_hdr	hdr;
105 	uint8_t			status;
106 	uint8_t			unused[3];
107 	unsigned int		ndmac;
108 	ddi_dma_handle_t	dmah;
109 	ddi_dma_handle_t	bd_dmah;
110 	ddi_dma_cookie_t	dmac;
111 	bd_xfer_t		*xfer;
112 };
113 
114 struct vioblk_stats {
115 	struct kstat_named	sts_rw_outofmemory;
116 	struct kstat_named	sts_rw_badoffset;
117 	struct kstat_named	sts_rw_queuemax;
118 	struct kstat_named	sts_rw_cookiesmax;
119 	struct kstat_named	sts_rw_cacheflush;
120 	struct kstat_named	sts_intr_queuemax;
121 	struct kstat_named	sts_intr_total;
122 	struct kstat_named	sts_io_errors;
123 	struct kstat_named	sts_unsupp_errors;
124 	struct kstat_named	sts_nxio_errors;
125 };
126 
127 struct vioblk_lstats {
128 	uint64_t		rw_cacheflush;
129 	uint64_t		intr_total;
130 	unsigned int		rw_cookiesmax;
131 	unsigned int		intr_queuemax;
132 	unsigned int		io_errors;
133 	unsigned int		unsupp_errors;
134 	unsigned int		nxio_errors;
135 };
136 
137 struct vioblk_softc {
138 	dev_info_t		*sc_dev; /* mirrors virtio_softc->sc_dev */
139 	struct virtio_softc	sc_virtio;
140 	struct virtqueue	*sc_vq;
141 	bd_handle_t		bd_h;
142 	struct vioblk_req	*sc_reqs;
143 	struct vioblk_stats	*ks_data;
144 	kstat_t			*sc_intrstat;
145 	uint64_t		sc_capacity;
146 	uint64_t		sc_nblks;
147 	struct vioblk_lstats	sc_stats;
148 	short			sc_blkflags;
149 	boolean_t		sc_in_poll_mode;
150 	boolean_t		sc_readonly;
151 	int			sc_blk_size;
152 	int			sc_seg_max;
153 	int			sc_seg_size_max;
154 	kmutex_t		lock_devid;
155 	kcondvar_t		cv_devid;
156 	char			devid[VIRTIO_BLK_ID_BYTES + 1];
157 };
158 
159 static int vioblk_read(void *arg, bd_xfer_t *xfer);
160 static int vioblk_write(void *arg, bd_xfer_t *xfer);
161 static int vioblk_flush(void *arg, bd_xfer_t *xfer);
162 static void vioblk_driveinfo(void *arg, bd_drive_t *drive);
163 static int vioblk_mediainfo(void *arg, bd_media_t *media);
164 static int vioblk_devid_init(void *, dev_info_t *, ddi_devid_t *);
165 uint_t vioblk_int_handler(caddr_t arg1, caddr_t arg2);
166 
167 static bd_ops_t vioblk_ops = {
168 	BD_OPS_VERSION_0,
169 	vioblk_driveinfo,
170 	vioblk_mediainfo,
171 	vioblk_devid_init,
172 	vioblk_flush,
173 	vioblk_read,
174 	vioblk_write,
175 };
176 
177 static int vioblk_quiesce(dev_info_t *);
178 static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
179 static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
180 
181 static struct dev_ops vioblk_dev_ops = {
182 	DEVO_REV,
183 	0,
184 	ddi_no_info,
185 	nulldev,	/* identify */
186 	nulldev,	/* probe */
187 	vioblk_attach,	/* attach */
188 	vioblk_detach,	/* detach */
189 	nodev,		/* reset */
190 	NULL,		/* cb_ops */
191 	NULL,		/* bus_ops */
192 	NULL,		/* power */
193 	vioblk_quiesce	/* quiesce */
194 };
195 
196 
197 
198 /* Standard Module linkage initialization for a Streams driver */
199 extern struct mod_ops mod_driverops;
200 
201 static struct modldrv modldrv = {
202 	&mod_driverops,		/* Type of module.  This one is a driver */
203 	vioblk_ident,    /* short description */
204 	&vioblk_dev_ops	/* driver specific ops */
205 };
206 
207 static struct modlinkage modlinkage = {
208 	MODREV_1,
209 	{
210 		(void *)&modldrv,
211 		NULL,
212 	},
213 };
214 
215 ddi_device_acc_attr_t vioblk_attr = {
216 	DDI_DEVICE_ATTR_V0,
217 	DDI_NEVERSWAP_ACC,	/* virtio is always native byte order */
218 	DDI_STORECACHING_OK_ACC,
219 	DDI_DEFAULT_ACC
220 };
221 
222 /* DMA attr for the header/status blocks. */
223 static ddi_dma_attr_t vioblk_req_dma_attr = {
224 	DMA_ATTR_V0,			/* dma_attr version	*/
225 	0,				/* dma_attr_addr_lo	*/
226 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
227 	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
228 	1,				/* dma_attr_align	*/
229 	1,				/* dma_attr_burstsizes	*/
230 	1,				/* dma_attr_minxfer	*/
231 	0xFFFFFFFFull,			/* dma_attr_maxxfer	*/
232 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
233 	1,				/* dma_attr_sgllen	*/
234 	1,				/* dma_attr_granular	*/
235 	0,				/* dma_attr_flags	*/
236 };
237 
238 /* DMA attr for the data blocks. */
239 static ddi_dma_attr_t vioblk_bd_dma_attr = {
240 	DMA_ATTR_V0,			/* dma_attr version	*/
241 	0,				/* dma_attr_addr_lo	*/
242 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_addr_hi	*/
243 	0x00000000FFFFFFFFull,		/* dma_attr_count_max	*/
244 	1,				/* dma_attr_align	*/
245 	1,				/* dma_attr_burstsizes	*/
246 	1,				/* dma_attr_minxfer	*/
247 	0,				/* dma_attr_maxxfer, set in attach */
248 	0xFFFFFFFFFFFFFFFFull,		/* dma_attr_seg		*/
249 	0,				/* dma_attr_sgllen, set in attach */
250 	1,				/* dma_attr_granular	*/
251 	0,				/* dma_attr_flags	*/
252 };
253 
254 static int
255 vioblk_rw(struct vioblk_softc *sc, bd_xfer_t *xfer, int type,
256     uint32_t len)
257 {
258 	struct vioblk_req *req;
259 	struct vq_entry *ve_hdr;
260 	int total_cookies, write;
261 
262 	write = (type == VIRTIO_BLK_T_OUT ||
263 	    type == VIRTIO_BLK_T_FLUSH_OUT) ? 1 : 0;
264 	total_cookies = 2;
265 
266 	if ((xfer->x_blkno + xfer->x_nblks) > sc->sc_nblks) {
267 		sc->ks_data->sts_rw_badoffset.value.ui64++;
268 		return (EINVAL);
269 	}
270 
271 	/* allocate top entry */
272 	ve_hdr = vq_alloc_entry(sc->sc_vq);
273 	if (!ve_hdr) {
274 		sc->ks_data->sts_rw_outofmemory.value.ui64++;
275 		return (ENOMEM);
276 	}
277 
278 	/* getting request */
279 	req = &sc->sc_reqs[ve_hdr->qe_index];
280 	req->hdr.type = type;
281 	req->hdr.ioprio = 0;
282 	req->hdr.sector = xfer->x_blkno;
283 	req->xfer = xfer;
284 
285 	/* Header */
286 	virtio_ve_add_indirect_buf(ve_hdr, req->dmac.dmac_laddress,
287 	    sizeof (struct vioblk_req_hdr), B_TRUE);
288 
289 	/* Payload */
290 	if (len > 0) {
291 		virtio_ve_add_cookie(ve_hdr, xfer->x_dmah, xfer->x_dmac,
292 		    xfer->x_ndmac, write ? B_TRUE : B_FALSE);
293 		total_cookies += xfer->x_ndmac;
294 	}
295 
296 	/* Status */
297 	virtio_ve_add_indirect_buf(ve_hdr,
298 	    req->dmac.dmac_laddress + sizeof (struct vioblk_req_hdr),
299 	    sizeof (uint8_t), B_FALSE);
300 
301 	/* sending the whole chain to the device */
302 	virtio_push_chain(ve_hdr, B_TRUE);
303 
304 	if (sc->sc_stats.rw_cookiesmax < total_cookies)
305 		sc->sc_stats.rw_cookiesmax = total_cookies;
306 
307 	return (DDI_SUCCESS);
308 }
309 
310 /*
311  * Now in polling mode. Interrupts are off, so we
312  * 1) poll for the already queued requests to complete.
313  * 2) push our request.
314  * 3) wait for our request to complete.
315  */
316 static int
317 vioblk_rw_poll(struct vioblk_softc *sc, bd_xfer_t *xfer,
318     int type, uint32_t len)
319 {
320 	clock_t tmout;
321 	int ret;
322 
323 	ASSERT(xfer->x_flags & BD_XFER_POLL);
324 
325 	/* Prevent a hard hang. */
326 	tmout = drv_usectohz(30000000);
327 
328 	/* Poll for an empty queue */
329 	while (vq_num_used(sc->sc_vq)) {
330 		/* Check if any pending requests completed. */
331 		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
332 		if (ret != DDI_INTR_CLAIMED) {
333 			drv_usecwait(10);
334 			tmout -= 10;
335 			return (ETIMEDOUT);
336 		}
337 	}
338 
339 	ret = vioblk_rw(sc, xfer, type, len);
340 	if (ret)
341 		return (ret);
342 
343 	tmout = drv_usectohz(30000000);
344 	/* Poll for an empty queue again. */
345 	while (vq_num_used(sc->sc_vq)) {
346 		/* Check if any pending requests completed. */
347 		ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
348 		if (ret != DDI_INTR_CLAIMED) {
349 			drv_usecwait(10);
350 			tmout -= 10;
351 			return (ETIMEDOUT);
352 		}
353 	}
354 
355 	return (DDI_SUCCESS);
356 }
357 
358 static int
359 vioblk_read(void *arg, bd_xfer_t *xfer)
360 {
361 	int ret;
362 	struct vioblk_softc *sc = (void *)arg;
363 
364 	if (xfer->x_flags & BD_XFER_POLL) {
365 		if (!sc->sc_in_poll_mode) {
366 			virtio_stop_vq_intr(sc->sc_vq);
367 			sc->sc_in_poll_mode = 1;
368 		}
369 
370 		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_IN,
371 		    xfer->x_nblks * DEV_BSIZE);
372 	} else {
373 		if (sc->sc_in_poll_mode) {
374 			virtio_start_vq_intr(sc->sc_vq);
375 			sc->sc_in_poll_mode = 0;
376 		}
377 
378 		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_IN,
379 		    xfer->x_nblks * DEV_BSIZE);
380 	}
381 
382 	return (ret);
383 }
384 
385 static int
386 vioblk_write(void *arg, bd_xfer_t *xfer)
387 {
388 	int ret;
389 	struct vioblk_softc *sc = (void *)arg;
390 
391 	if (xfer->x_flags & BD_XFER_POLL) {
392 		if (!sc->sc_in_poll_mode) {
393 			virtio_stop_vq_intr(sc->sc_vq);
394 			sc->sc_in_poll_mode = 1;
395 		}
396 
397 		ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_OUT,
398 		    xfer->x_nblks * DEV_BSIZE);
399 	} else {
400 		if (sc->sc_in_poll_mode) {
401 			virtio_start_vq_intr(sc->sc_vq);
402 			sc->sc_in_poll_mode = 0;
403 		}
404 
405 		ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_OUT,
406 		    xfer->x_nblks * DEV_BSIZE);
407 	}
408 	return (ret);
409 }
410 
411 static int
412 vioblk_flush(void *arg, bd_xfer_t *xfer)
413 {
414 	int ret;
415 	struct vioblk_softc *sc = (void *)arg;
416 
417 	ASSERT((xfer->x_flags & BD_XFER_POLL) == 0);
418 
419 	ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_FLUSH_OUT,
420 	    xfer->x_nblks * DEV_BSIZE);
421 
422 	if (!ret)
423 		sc->sc_stats.rw_cacheflush++;
424 
425 	return (ret);
426 }
427 
428 
429 static void
430 vioblk_driveinfo(void *arg, bd_drive_t *drive)
431 {
432 	struct vioblk_softc *sc = (void *)arg;
433 
434 	drive->d_qsize = sc->sc_vq->vq_num;
435 	drive->d_removable = B_FALSE;
436 	drive->d_hotpluggable = B_TRUE;
437 	drive->d_target = 0;
438 	drive->d_lun = 0;
439 }
440 
441 static int
442 vioblk_mediainfo(void *arg, bd_media_t *media)
443 {
444 	struct vioblk_softc *sc = (void *)arg;
445 
446 	media->m_nblks = sc->sc_nblks;
447 	media->m_blksize = DEV_BSIZE;
448 	media->m_readonly = sc->sc_readonly;
449 	return (0);
450 }
451 
452 static int
453 vioblk_devid_init(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
454 {
455 	struct vioblk_softc *sc = (void *)arg;
456 	clock_t deadline;
457 	int ret;
458 	bd_xfer_t xfer;
459 
460 	deadline = ddi_get_lbolt() + (clock_t)drv_usectohz(3 * 1000000);
461 	(void) memset(&xfer, 0, sizeof (bd_xfer_t));
462 	xfer.x_nblks = 1;
463 
464 	ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_bd_dma_attr,
465 	    DDI_DMA_SLEEP, NULL, &xfer.x_dmah);
466 	if (ret != DDI_SUCCESS)
467 		goto out_alloc;
468 
469 	ret = ddi_dma_addr_bind_handle(xfer.x_dmah, NULL, (caddr_t)&sc->devid,
470 	    VIRTIO_BLK_ID_BYTES, DDI_DMA_READ | DDI_DMA_CONSISTENT,
471 	    DDI_DMA_SLEEP, NULL, &xfer.x_dmac, &xfer.x_ndmac);
472 	if (ret != DDI_DMA_MAPPED) {
473 		ret = DDI_FAILURE;
474 		goto out_map;
475 	}
476 
477 	mutex_enter(&sc->lock_devid);
478 
479 	ret = vioblk_rw(sc, &xfer, VIRTIO_BLK_T_GET_ID,
480 	    VIRTIO_BLK_ID_BYTES);
481 	if (ret) {
482 		mutex_exit(&sc->lock_devid);
483 		goto out_rw;
484 	}
485 
486 	/* wait for reply */
487 	ret = cv_timedwait(&sc->cv_devid, &sc->lock_devid, deadline);
488 	mutex_exit(&sc->lock_devid);
489 
490 	(void) ddi_dma_unbind_handle(xfer.x_dmah);
491 	ddi_dma_free_handle(&xfer.x_dmah);
492 
493 	/* timeout */
494 	if (ret < 0) {
495 		dev_err(devinfo, CE_WARN, "Cannot get devid from the device");
496 		return (DDI_FAILURE);
497 	}
498 
499 	ret = ddi_devid_init(devinfo, DEVID_ATA_SERIAL,
500 	    VIRTIO_BLK_ID_BYTES, sc->devid, devid);
501 	if (ret != DDI_SUCCESS) {
502 		dev_err(devinfo, CE_WARN, "Cannot build devid from the device");
503 		return (ret);
504 	}
505 
506 	dev_debug(sc->sc_dev, CE_NOTE,
507 	    "devid %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x",
508 	    sc->devid[0], sc->devid[1], sc->devid[2], sc->devid[3],
509 	    sc->devid[4], sc->devid[5], sc->devid[6], sc->devid[7],
510 	    sc->devid[8], sc->devid[9], sc->devid[10], sc->devid[11],
511 	    sc->devid[12], sc->devid[13], sc->devid[14], sc->devid[15],
512 	    sc->devid[16], sc->devid[17], sc->devid[18], sc->devid[19]);
513 
514 	return (0);
515 
516 out_rw:
517 	(void) ddi_dma_unbind_handle(xfer.x_dmah);
518 out_map:
519 	ddi_dma_free_handle(&xfer.x_dmah);
520 out_alloc:
521 	return (ret);
522 }
523 
524 static void
525 vioblk_show_features(struct vioblk_softc *sc, const char *prefix,
526     uint32_t features)
527 {
528 	char buf[512];
529 	char *bufp = buf;
530 	char *bufend = buf + sizeof (buf);
531 
532 	/* LINTED E_PTRDIFF_OVERFLOW */
533 	bufp += snprintf(bufp, bufend - bufp, prefix);
534 
535 	/* LINTED E_PTRDIFF_OVERFLOW */
536 	bufp += virtio_show_features(features, bufp, bufend - bufp);
537 
538 
539 	/* LINTED E_PTRDIFF_OVERFLOW */
540 	bufp += snprintf(bufp, bufend - bufp, "Vioblk ( ");
541 
542 	if (features & VIRTIO_BLK_F_BARRIER)
543 		/* LINTED E_PTRDIFF_OVERFLOW */
544 		bufp += snprintf(bufp, bufend - bufp, "BARRIER ");
545 	if (features & VIRTIO_BLK_F_SIZE_MAX)
546 		/* LINTED E_PTRDIFF_OVERFLOW */
547 		bufp += snprintf(bufp, bufend - bufp, "SIZE_MAX ");
548 	if (features & VIRTIO_BLK_F_SEG_MAX)
549 		/* LINTED E_PTRDIFF_OVERFLOW */
550 		bufp += snprintf(bufp, bufend - bufp, "SEG_MAX ");
551 	if (features & VIRTIO_BLK_F_GEOMETRY)
552 		/* LINTED E_PTRDIFF_OVERFLOW */
553 		bufp += snprintf(bufp, bufend - bufp, "GEOMETRY ");
554 	if (features & VIRTIO_BLK_F_RO)
555 		/* LINTED E_PTRDIFF_OVERFLOW */
556 		bufp += snprintf(bufp, bufend - bufp, "RO ");
557 	if (features & VIRTIO_BLK_F_BLK_SIZE)
558 		/* LINTED E_PTRDIFF_OVERFLOW */
559 		bufp += snprintf(bufp, bufend - bufp, "BLK_SIZE ");
560 	if (features & VIRTIO_BLK_F_SCSI)
561 		/* LINTED E_PTRDIFF_OVERFLOW */
562 		bufp += snprintf(bufp, bufend - bufp, "SCSI ");
563 	if (features & VIRTIO_BLK_F_FLUSH)
564 		/* LINTED E_PTRDIFF_OVERFLOW */
565 		bufp += snprintf(bufp, bufend - bufp, "FLUSH ");
566 	if (features & VIRTIO_BLK_F_TOPOLOGY)
567 		/* LINTED E_PTRDIFF_OVERFLOW */
568 		bufp += snprintf(bufp, bufend - bufp, "TOPOLOGY ");
569 
570 	/* LINTED E_PTRDIFF_OVERFLOW */
571 	bufp += snprintf(bufp, bufend - bufp, ")");
572 	*bufp = '\0';
573 
574 	dev_debug(sc->sc_dev, CE_NOTE, "%s", buf);
575 }
576 
577 static int
578 vioblk_dev_features(struct vioblk_softc *sc)
579 {
580 	uint32_t host_features;
581 
582 	host_features = virtio_negotiate_features(&sc->sc_virtio,
583 	    VIRTIO_BLK_F_RO |
584 	    VIRTIO_BLK_F_GEOMETRY |
585 	    VIRTIO_BLK_F_BLK_SIZE |
586 	    VIRTIO_BLK_F_FLUSH |
587 	    VIRTIO_BLK_F_SEG_MAX |
588 	    VIRTIO_BLK_F_SIZE_MAX |
589 	    VIRTIO_F_RING_INDIRECT_DESC);
590 
591 	vioblk_show_features(sc, "Host features: ", host_features);
592 	vioblk_show_features(sc, "Negotiated features: ",
593 	    sc->sc_virtio.sc_features);
594 
595 	if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
596 		dev_err(sc->sc_dev, CE_NOTE,
597 		    "Host does not support RING_INDIRECT_DESC, bye.");
598 		return (DDI_FAILURE);
599 	}
600 
601 	return (DDI_SUCCESS);
602 }
603 
604 /* ARGSUSED */
605 uint_t
606 vioblk_int_handler(caddr_t arg1, caddr_t arg2)
607 {
608 	struct virtio_softc *vsc = (void *)arg1;
609 	struct vioblk_softc *sc = container_of(vsc,
610 	    struct vioblk_softc, sc_virtio);
611 	struct vq_entry *ve;
612 	uint32_t len;
613 	int i = 0, error;
614 
615 	while ((ve = virtio_pull_chain(sc->sc_vq, &len))) {
616 		struct vioblk_req *req = &sc->sc_reqs[ve->qe_index];
617 		bd_xfer_t *xfer = req->xfer;
618 		uint8_t status = req->status;
619 		uint32_t type = req->hdr.type;
620 
621 		if (req->xfer == (void *)VIOBLK_POISON) {
622 			dev_err(sc->sc_dev, CE_WARN, "Poisoned descriptor!");
623 			virtio_free_chain(ve);
624 			return (DDI_INTR_CLAIMED);
625 		}
626 
627 		req->xfer = (void *) VIOBLK_POISON;
628 
629 		/* Note: blkdev tears down the payload mapping for us. */
630 		virtio_free_chain(ve);
631 
632 		/* returning payload back to blkdev */
633 		switch (status) {
634 			case VIRTIO_BLK_S_OK:
635 				error = 0;
636 				break;
637 			case VIRTIO_BLK_S_IOERR:
638 				error = EIO;
639 				sc->sc_stats.io_errors++;
640 				break;
641 			case VIRTIO_BLK_S_UNSUPP:
642 				sc->sc_stats.unsupp_errors++;
643 				error = ENOTTY;
644 				break;
645 			default:
646 				sc->sc_stats.nxio_errors++;
647 				error = ENXIO;
648 				break;
649 		}
650 
651 		if (type == VIRTIO_BLK_T_GET_ID) {
652 			/* notify devid_init */
653 			mutex_enter(&sc->lock_devid);
654 			cv_broadcast(&sc->cv_devid);
655 			mutex_exit(&sc->lock_devid);
656 		} else
657 			bd_xfer_done(xfer, error);
658 
659 		i++;
660 	}
661 
662 	/* update stats */
663 	if (sc->sc_stats.intr_queuemax < i)
664 		sc->sc_stats.intr_queuemax = i;
665 	sc->sc_stats.intr_total++;
666 
667 	return (DDI_INTR_CLAIMED);
668 }
669 
670 /* ARGSUSED */
671 uint_t
672 vioblk_config_handler(caddr_t arg1, caddr_t arg2)
673 {
674 	return (DDI_INTR_CLAIMED);
675 }
676 
677 static int
678 vioblk_register_ints(struct vioblk_softc *sc)
679 {
680 	int ret;
681 
682 	struct virtio_int_handler vioblk_conf_h = {
683 		vioblk_config_handler
684 	};
685 
686 	struct virtio_int_handler vioblk_vq_h[] = {
687 		{ vioblk_int_handler },
688 		{ NULL },
689 	};
690 
691 	ret = virtio_register_ints(&sc->sc_virtio,
692 	    &vioblk_conf_h, vioblk_vq_h);
693 
694 	return (ret);
695 }
696 
697 static void
698 vioblk_free_reqs(struct vioblk_softc *sc)
699 {
700 	int i, qsize;
701 
702 	qsize = sc->sc_vq->vq_num;
703 
704 	for (i = 0; i < qsize; i++) {
705 		struct vioblk_req *req = &sc->sc_reqs[i];
706 
707 		if (req->ndmac)
708 			(void) ddi_dma_unbind_handle(req->dmah);
709 
710 		if (req->dmah)
711 			ddi_dma_free_handle(&req->dmah);
712 	}
713 
714 	kmem_free(sc->sc_reqs, sizeof (struct vioblk_req) * qsize);
715 }
716 
717 static int
718 vioblk_alloc_reqs(struct vioblk_softc *sc)
719 {
720 	int i, qsize;
721 	int ret;
722 
723 	qsize = sc->sc_vq->vq_num;
724 
725 	sc->sc_reqs = kmem_zalloc(sizeof (struct vioblk_req) * qsize, KM_SLEEP);
726 
727 	for (i = 0; i < qsize; i++) {
728 		struct vioblk_req *req = &sc->sc_reqs[i];
729 
730 		ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_req_dma_attr,
731 		    DDI_DMA_SLEEP, NULL, &req->dmah);
732 		if (ret != DDI_SUCCESS) {
733 
734 			dev_err(sc->sc_dev, CE_WARN,
735 			    "Can't allocate dma handle for req "
736 			    "buffer %d", i);
737 			goto exit;
738 		}
739 
740 		ret = ddi_dma_addr_bind_handle(req->dmah, NULL,
741 		    (caddr_t)&req->hdr,
742 		    sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
743 		    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
744 		    NULL, &req->dmac, &req->ndmac);
745 		if (ret != DDI_DMA_MAPPED) {
746 			dev_err(sc->sc_dev, CE_WARN,
747 			    "Can't bind req buffer %d", i);
748 			goto exit;
749 		}
750 	}
751 
752 	return (0);
753 
754 exit:
755 	vioblk_free_reqs(sc);
756 	return (ENOMEM);
757 }
758 
759 
760 static int
761 vioblk_ksupdate(kstat_t *ksp, int rw)
762 {
763 	struct vioblk_softc *sc = ksp->ks_private;
764 
765 	if (rw == KSTAT_WRITE)
766 		return (EACCES);
767 
768 	sc->ks_data->sts_rw_cookiesmax.value.ui32 = sc->sc_stats.rw_cookiesmax;
769 	sc->ks_data->sts_intr_queuemax.value.ui32 = sc->sc_stats.intr_queuemax;
770 	sc->ks_data->sts_unsupp_errors.value.ui32 = sc->sc_stats.unsupp_errors;
771 	sc->ks_data->sts_nxio_errors.value.ui32 = sc->sc_stats.nxio_errors;
772 	sc->ks_data->sts_io_errors.value.ui32 = sc->sc_stats.io_errors;
773 	sc->ks_data->sts_rw_cacheflush.value.ui64 = sc->sc_stats.rw_cacheflush;
774 	sc->ks_data->sts_intr_total.value.ui64 = sc->sc_stats.intr_total;
775 
776 
777 	return (0);
778 }
779 
780 static int
781 vioblk_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
782 {
783 	int ret = DDI_SUCCESS;
784 	int instance;
785 	struct vioblk_softc *sc;
786 	struct virtio_softc *vsc;
787 	struct vioblk_stats *ks_data;
788 
789 	instance = ddi_get_instance(devinfo);
790 
791 	switch (cmd) {
792 	case DDI_ATTACH:
793 		break;
794 
795 	case DDI_RESUME:
796 	case DDI_PM_RESUME:
797 		dev_err(devinfo, CE_WARN, "resume not supported yet");
798 		ret = DDI_FAILURE;
799 		goto exit;
800 
801 	default:
802 		dev_err(devinfo, CE_WARN, "cmd 0x%x not recognized", cmd);
803 		ret = DDI_FAILURE;
804 		goto exit;
805 	}
806 
807 	sc = kmem_zalloc(sizeof (struct vioblk_softc), KM_SLEEP);
808 	ddi_set_driver_private(devinfo, sc);
809 
810 	vsc = &sc->sc_virtio;
811 
812 	/* Duplicate for faster access / less typing */
813 	sc->sc_dev = devinfo;
814 	vsc->sc_dev = devinfo;
815 
816 	cv_init(&sc->cv_devid, NULL, CV_DRIVER, NULL);
817 	mutex_init(&sc->lock_devid, NULL, MUTEX_DRIVER, NULL);
818 
819 	/*
820 	 * Initialize interrupt kstat.  This should not normally fail, since
821 	 * we don't use a persistent stat.  We do it this way to avoid having
822 	 * to test for it at run time on the hot path.
823 	 */
824 	sc->sc_intrstat = kstat_create("vioblk", instance,
825 	    "intrs", "controller", KSTAT_TYPE_NAMED,
826 	    sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
827 	    KSTAT_FLAG_PERSISTENT);
828 	if (sc->sc_intrstat == NULL) {
829 		dev_err(devinfo, CE_WARN, "kstat_create failed");
830 		goto exit_intrstat;
831 	}
832 	ks_data = (struct vioblk_stats *)sc->sc_intrstat->ks_data;
833 	kstat_named_init(&ks_data->sts_rw_outofmemory,
834 	    "total_rw_outofmemory", KSTAT_DATA_UINT64);
835 	kstat_named_init(&ks_data->sts_rw_badoffset,
836 	    "total_rw_badoffset", KSTAT_DATA_UINT64);
837 	kstat_named_init(&ks_data->sts_intr_total,
838 	    "total_intr", KSTAT_DATA_UINT64);
839 	kstat_named_init(&ks_data->sts_io_errors,
840 	    "total_io_errors", KSTAT_DATA_UINT32);
841 	kstat_named_init(&ks_data->sts_unsupp_errors,
842 	    "total_unsupp_errors", KSTAT_DATA_UINT32);
843 	kstat_named_init(&ks_data->sts_nxio_errors,
844 	    "total_nxio_errors", KSTAT_DATA_UINT32);
845 	kstat_named_init(&ks_data->sts_rw_cacheflush,
846 	    "total_rw_cacheflush", KSTAT_DATA_UINT64);
847 	kstat_named_init(&ks_data->sts_rw_cookiesmax,
848 	    "max_rw_cookies", KSTAT_DATA_UINT32);
849 	kstat_named_init(&ks_data->sts_intr_queuemax,
850 	    "max_intr_queue", KSTAT_DATA_UINT32);
851 	sc->ks_data = ks_data;
852 	sc->sc_intrstat->ks_private = sc;
853 	sc->sc_intrstat->ks_update = vioblk_ksupdate;
854 	kstat_install(sc->sc_intrstat);
855 
856 	/* map BAR0 */
857 	ret = ddi_regs_map_setup(devinfo, 1,
858 	    (caddr_t *)&sc->sc_virtio.sc_io_addr,
859 	    0, 0, &vioblk_attr, &sc->sc_virtio.sc_ioh);
860 	if (ret != DDI_SUCCESS) {
861 		dev_err(devinfo, CE_WARN, "unable to map bar0: [%d]", ret);
862 		goto exit_map;
863 	}
864 
865 	virtio_device_reset(&sc->sc_virtio);
866 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
867 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
868 
869 	if (vioblk_register_ints(sc)) {
870 		dev_err(devinfo, CE_WARN, "Unable to add interrupt");
871 		goto exit_int;
872 	}
873 
874 	ret = vioblk_dev_features(sc);
875 	if (ret)
876 		goto exit_features;
877 
878 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_RO)
879 		sc->sc_readonly = B_TRUE;
880 	else
881 		sc->sc_readonly = B_FALSE;
882 
883 	sc->sc_capacity = virtio_read_device_config_8(&sc->sc_virtio,
884 	    VIRTIO_BLK_CONFIG_CAPACITY);
885 	sc->sc_nblks = sc->sc_capacity;
886 
887 	/*
888 	 * BLK_SIZE is just a hint for the optimal logical block
889 	 * granularity. Ignored for now.
890 	 */
891 	sc->sc_blk_size = DEV_BSIZE;
892 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_BLK_SIZE) {
893 		sc->sc_blk_size = virtio_read_device_config_4(&sc->sc_virtio,
894 		    VIRTIO_BLK_CONFIG_BLK_SIZE);
895 	}
896 
897 	/* Flushing is not supported. */
898 	if (!(sc->sc_virtio.sc_features & VIRTIO_BLK_F_FLUSH)) {
899 		vioblk_ops.o_sync_cache = NULL;
900 	}
901 
902 	sc->sc_seg_max = DEF_MAXINDIRECT;
903 	/* The max number of segments (cookies) in a request */
904 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SEG_MAX) {
905 		sc->sc_seg_max = virtio_read_device_config_4(&sc->sc_virtio,
906 		    VIRTIO_BLK_CONFIG_SEG_MAX);
907 
908 		/* That's what Linux does. */
909 		if (!sc->sc_seg_max)
910 			sc->sc_seg_max = 1;
911 
912 		/*
913 		 * SEG_MAX corresponds to the number of _data_
914 		 * blocks in a request
915 		 */
916 		sc->sc_seg_max += 2;
917 	}
918 	/* 2 descriptors taken for header/status */
919 	vioblk_bd_dma_attr.dma_attr_sgllen = sc->sc_seg_max - 2;
920 
921 
922 	/* The maximum size for a cookie in a request. */
923 	sc->sc_seg_size_max = DEF_MAXSECTOR;
924 	if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SIZE_MAX) {
925 		sc->sc_seg_size_max = virtio_read_device_config_4(
926 		    &sc->sc_virtio, VIRTIO_BLK_CONFIG_SIZE_MAX);
927 	}
928 
929 	/* The maximum request size */
930 	vioblk_bd_dma_attr.dma_attr_maxxfer =
931 	    vioblk_bd_dma_attr.dma_attr_sgllen * sc->sc_seg_size_max;
932 
933 	dev_debug(devinfo, CE_NOTE,
934 	    "nblks=%" PRIu64 " blksize=%d  num_seg=%d, "
935 	    "seg_size=%d, maxxfer=%" PRIu64,
936 	    sc->sc_nblks, sc->sc_blk_size,
937 	    vioblk_bd_dma_attr.dma_attr_sgllen,
938 	    sc->sc_seg_size_max,
939 	    vioblk_bd_dma_attr.dma_attr_maxxfer);
940 
941 
942 	sc->sc_vq = virtio_alloc_vq(&sc->sc_virtio, 0, 0,
943 	    sc->sc_seg_max, "I/O request");
944 	if (sc->sc_vq == NULL) {
945 		goto exit_alloc1;
946 	}
947 
948 	ret = vioblk_alloc_reqs(sc);
949 	if (ret) {
950 		goto exit_alloc2;
951 	}
952 
953 	sc->bd_h = bd_alloc_handle(sc, &vioblk_ops, &vioblk_bd_dma_attr,
954 	    KM_SLEEP);
955 
956 
957 	virtio_set_status(&sc->sc_virtio,
958 	    VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
959 	virtio_start_vq_intr(sc->sc_vq);
960 
961 	ret = virtio_enable_ints(&sc->sc_virtio);
962 	if (ret)
963 		goto exit_enable_ints;
964 
965 	ret = bd_attach_handle(devinfo, sc->bd_h);
966 	if (ret != DDI_SUCCESS) {
967 		dev_err(devinfo, CE_WARN, "Failed to attach blkdev");
968 		goto exit_attach_bd;
969 	}
970 
971 	return (DDI_SUCCESS);
972 
973 exit_attach_bd:
974 	/*
975 	 * There is no virtio_disable_ints(), it's done in virtio_release_ints.
976 	 * If they ever get split, don't forget to add a call here.
977 	 */
978 exit_enable_ints:
979 	virtio_stop_vq_intr(sc->sc_vq);
980 	bd_free_handle(sc->bd_h);
981 	vioblk_free_reqs(sc);
982 exit_alloc2:
983 	virtio_free_vq(sc->sc_vq);
984 exit_alloc1:
985 exit_features:
986 	virtio_release_ints(&sc->sc_virtio);
987 exit_int:
988 	virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
989 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
990 exit_map:
991 	kstat_delete(sc->sc_intrstat);
992 exit_intrstat:
993 	mutex_destroy(&sc->lock_devid);
994 	cv_destroy(&sc->cv_devid);
995 	kmem_free(sc, sizeof (struct vioblk_softc));
996 exit:
997 	return (ret);
998 }
999 
1000 static int
1001 vioblk_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1002 {
1003 	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1004 
1005 	switch (cmd) {
1006 	case DDI_DETACH:
1007 		break;
1008 
1009 	case DDI_PM_SUSPEND:
1010 		cmn_err(CE_WARN, "suspend not supported yet");
1011 		return (DDI_FAILURE);
1012 
1013 	default:
1014 		cmn_err(CE_WARN, "cmd 0x%x unrecognized", cmd);
1015 		return (DDI_FAILURE);
1016 	}
1017 
1018 	(void) bd_detach_handle(sc->bd_h);
1019 	virtio_stop_vq_intr(sc->sc_vq);
1020 	virtio_release_ints(&sc->sc_virtio);
1021 	vioblk_free_reqs(sc);
1022 	virtio_free_vq(sc->sc_vq);
1023 	virtio_device_reset(&sc->sc_virtio);
1024 	ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1025 	kstat_delete(sc->sc_intrstat);
1026 	kmem_free(sc, sizeof (struct vioblk_softc));
1027 
1028 	return (DDI_SUCCESS);
1029 }
1030 
1031 static int
1032 vioblk_quiesce(dev_info_t *devinfo)
1033 {
1034 	struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1035 
1036 	virtio_stop_vq_intr(sc->sc_vq);
1037 	virtio_device_reset(&sc->sc_virtio);
1038 
1039 	return (DDI_SUCCESS);
1040 }
1041 
1042 int
1043 _init(void)
1044 {
1045 	int rv;
1046 
1047 	bd_mod_init(&vioblk_dev_ops);
1048 
1049 	if ((rv = mod_install(&modlinkage)) != 0) {
1050 		bd_mod_fini(&vioblk_dev_ops);
1051 	}
1052 
1053 	return (rv);
1054 }
1055 
1056 int
1057 _fini(void)
1058 {
1059 	int rv;
1060 
1061 	if ((rv = mod_remove(&modlinkage)) == 0) {
1062 		bd_mod_fini(&vioblk_dev_ops);
1063 	}
1064 
1065 	return (rv);
1066 }
1067 
1068 int
1069 _info(struct modinfo *modinfop)
1070 {
1071 	return (mod_info(&modlinkage, modinfop));
1072 }
1073