xref: /titanic_50/usr/src/uts/common/xen/io/xdb.c (revision 9fae04d87fb57bd267ff2fb7d3fbf75f423a71c8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Note: This is the backend part of the split PV disk driver. This driver
29  * is not a nexus driver, nor is it a leaf driver(block/char/stream driver).
30  * Currently, it does not create any minor node. So, although, it runs in
31  * backend domain, it will not be used directly from within dom0.
32  * It simply gets block I/O requests issued by frontend from a shared page
33  * (blkif ring buffer - defined by Xen) between backend and frontend domain,
34  * generates a buf, and push it down to underlying disk target driver via
35  * ldi interface. When buf is done, this driver will generate a response
36  * and put it into ring buffer to inform frontend of the status of the I/O
37  * request issued by it. When a new virtual device entry is added in xenstore,
38  * there will be an watch event sent from Xen to xvdi framework, who will,
39  * in turn, create the devinfo node and try to attach this driver
40  * (see xvdi_create_dev). When frontend peer changes its state to
41  * XenbusStateClose, an event will also be sent from Xen to xvdi framework,
42  * who will detach and remove this devinfo node (see i_xvdi_oestate_handler).
43  * I/O requests get from ring buffer and event coming from xenstore cannot be
44  * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition().
45  *
46  * Virtual device configuration is read/written from/to the database via
47  * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor.
48  * There is an on-going effort to make xvdi_* cover all xenbus_*.
49  */
50 
51 #include <sys/types.h>
52 #include <sys/conf.h>
53 #include <sys/ddi.h>
54 #include <sys/dditypes.h>
55 #include <sys/sunddi.h>
56 #include <sys/list.h>
57 #include <sys/dkio.h>
58 #include <sys/cmlb.h>
59 #include <sys/vtoc.h>
60 #include <sys/modctl.h>
61 #include <sys/bootconf.h>
62 #include <sys/promif.h>
63 #include <sys/sysmacros.h>
64 #include <public/io/xenbus.h>
65 #include <public/io/xs_wire.h>
66 #include <xen/sys/xenbus_impl.h>
67 #include <xen/sys/xendev.h>
68 #include <sys/gnttab.h>
69 #include <sys/scsi/generic/inquiry.h>
70 #include <vm/seg_kmem.h>
71 #include <vm/hat_i86.h>
72 #include <sys/gnttab.h>
73 #include <sys/lofi.h>
74 #include <io/xdf.h>
75 #include <xen/io/blkif_impl.h>
76 #include <io/xdb.h>
77 
78 static xdb_t *xdb_statep;
79 static int xdb_debug = 0;
80 
81 static void xdb_close(dev_info_t *);
82 static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t);
83 static int xdb_get_request(xdb_t *, blkif_request_t *);
84 static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *);
85 static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *);
86 static int xdb_biodone(buf_t *);
87 
88 
89 #ifdef DEBUG
90 /*
91  * debug aid functions
92  */
93 
94 static void
95 logva(xdb_t *vdp, uint64_t va)
96 {
97 	uint64_t *page_addrs;
98 	int i;
99 
100 	page_addrs = vdp->page_addrs;
101 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
102 		if (page_addrs[i] == va)
103 			debug_enter("VA remapping found!");
104 	}
105 
106 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
107 		if (page_addrs[i] == 0) {
108 			page_addrs[i] = va;
109 			break;
110 		}
111 	}
112 	ASSERT(i < XDB_MAX_IO_PAGES(vdp));
113 }
114 
115 static void
116 unlogva(xdb_t *vdp, uint64_t va)
117 {
118 	uint64_t *page_addrs;
119 	int i;
120 
121 	page_addrs = vdp->page_addrs;
122 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
123 		if (page_addrs[i] == va) {
124 			page_addrs[i] = 0;
125 			break;
126 		}
127 	}
128 	ASSERT(i < XDB_MAX_IO_PAGES(vdp));
129 }
130 
131 static void
132 xdb_dump_request_oe(blkif_request_t *req)
133 {
134 	int i;
135 
136 	/*
137 	 * Exploit the public interface definitions for BLKIF_OP_READ
138 	 * etc..
139 	 */
140 	char *op_name[] = { "read", "write", "barrier", "flush" };
141 
142 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "op=%s", op_name[req->operation]));
143 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "num of segments=%d",
144 	    req->nr_segments));
145 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "handle=%d", req->handle));
146 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "id=%llu",
147 	    (unsigned long long)req->id));
148 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "start sector=%llu",
149 	    (unsigned long long)req->sector_number));
150 	for (i = 0; i < req->nr_segments; i++) {
151 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "gref=%d, first sec=%d,"
152 		    "last sec=%d", req->seg[i].gref, req->seg[i].first_sect,
153 		    req->seg[i].last_sect));
154 	}
155 }
156 #endif /* DEBUG */
157 
158 /*
159  * Statistics.
160  */
161 static char *xdb_stats[] = {
162 	"rd_reqs",
163 	"wr_reqs",
164 	"br_reqs",
165 	"fl_reqs",
166 	"oo_reqs"
167 };
168 
169 static int
170 xdb_kstat_update(kstat_t *ksp, int flag)
171 {
172 	xdb_t *vdp;
173 	kstat_named_t *knp;
174 
175 	if (flag != KSTAT_READ)
176 		return (EACCES);
177 
178 	vdp = ksp->ks_private;
179 	knp = ksp->ks_data;
180 
181 	/*
182 	 * Assignment order should match that of the names in
183 	 * xdb_stats.
184 	 */
185 	(knp++)->value.ui64 = vdp->xs_stat_req_reads;
186 	(knp++)->value.ui64 = vdp->xs_stat_req_writes;
187 	(knp++)->value.ui64 = vdp->xs_stat_req_barriers;
188 	(knp++)->value.ui64 = vdp->xs_stat_req_flushes;
189 	(knp++)->value.ui64 = 0; /* oo_req */
190 
191 	return (0);
192 }
193 
194 static boolean_t
195 xdb_kstat_init(xdb_t *vdp)
196 {
197 	int nstat = sizeof (xdb_stats) / sizeof (xdb_stats[0]);
198 	char **cp = xdb_stats;
199 	kstat_named_t *knp;
200 
201 	if ((vdp->xs_kstats = kstat_create("xdb",
202 	    ddi_get_instance(vdp->xs_dip),
203 	    "req_statistics", "block", KSTAT_TYPE_NAMED,
204 	    nstat, 0)) == NULL)
205 		return (B_FALSE);
206 
207 	vdp->xs_kstats->ks_private = vdp;
208 	vdp->xs_kstats->ks_update = xdb_kstat_update;
209 
210 	knp = vdp->xs_kstats->ks_data;
211 	while (nstat > 0) {
212 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
213 		knp++;
214 		cp++;
215 		nstat--;
216 	}
217 
218 	kstat_install(vdp->xs_kstats);
219 
220 	return (B_TRUE);
221 }
222 
223 static char *
224 i_pathname(dev_info_t *dip)
225 {
226 	char *path, *rv;
227 
228 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
229 	(void) ddi_pathname(dip, path);
230 	rv = strdup(path);
231 	kmem_free(path, MAXPATHLEN);
232 
233 	return (rv);
234 }
235 
236 static buf_t *
237 xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq)
238 {
239 	buf_t *bp;
240 	uint8_t segs, curseg;
241 	int sectors;
242 	int i, err;
243 	gnttab_map_grant_ref_t mapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
244 	ddi_acc_handle_t acchdl;
245 
246 	acchdl = vdp->xs_ring_hdl;
247 	bp = XDB_XREQ2BP(xreq);
248 	curseg = xreq->xr_curseg;
249 	/* init a new xdb request */
250 	if (req != NULL) {
251 		ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
252 		boolean_t pagemapok = B_TRUE;
253 		uint8_t op = ddi_get8(acchdl, &req->operation);
254 
255 		xreq->xr_vdp = vdp;
256 		xreq->xr_op = op;
257 		xreq->xr_id = ddi_get64(acchdl, &req->id);
258 		segs = xreq->xr_buf_pages = ddi_get8(acchdl, &req->nr_segments);
259 		if (segs == 0) {
260 			if (op != BLKIF_OP_FLUSH_DISKCACHE)
261 				cmn_err(CE_WARN, "!non-BLKIF_OP_FLUSH_DISKCACHE"
262 				    " is seen from domain %d with zero "
263 				    "length data buffer!", vdp->xs_peer);
264 			bioinit(bp);
265 			bp->b_bcount = 0;
266 			bp->b_lblkno = 0;
267 			bp->b_un.b_addr = NULL;
268 			return (bp);
269 		} else if (op == BLKIF_OP_FLUSH_DISKCACHE) {
270 			cmn_err(CE_WARN, "!BLKIF_OP_FLUSH_DISKCACHE"
271 			    " is seen from domain %d with non-zero "
272 			    "length data buffer!", vdp->xs_peer);
273 		}
274 
275 		/*
276 		 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
277 		 * according to the definition of blk interface by Xen
278 		 * we do sanity check here
279 		 */
280 		if (segs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
281 			segs = xreq->xr_buf_pages =
282 			    BLKIF_MAX_SEGMENTS_PER_REQUEST;
283 
284 		for (i = 0; i < segs; i++) {
285 			uint8_t fs, ls;
286 
287 			mapops[i].host_addr =
288 			    (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
289 			    vdp->xs_iopage_va, xreq->xr_idx, i);
290 			mapops[i].dom = vdp->xs_peer;
291 			mapops[i].ref = ddi_get32(acchdl, &req->seg[i].gref);
292 			mapops[i].flags = GNTMAP_host_map;
293 			if (op != BLKIF_OP_READ)
294 				mapops[i].flags |= GNTMAP_readonly;
295 
296 			fs = ddi_get8(acchdl, &req->seg[i].first_sect);
297 			ls = ddi_get8(acchdl, &req->seg[i].last_sect);
298 
299 			/*
300 			 * first_sect should be no bigger than last_sect and
301 			 * both of them should be no bigger than
302 			 * XB_LAST_SECTOR_IN_SEG according to definition
303 			 * of blk interface by Xen, so sanity check again
304 			 */
305 			if (fs > XB_LAST_SECTOR_IN_SEG)
306 				fs = XB_LAST_SECTOR_IN_SEG;
307 			if (ls > XB_LAST_SECTOR_IN_SEG)
308 				ls = XB_LAST_SECTOR_IN_SEG;
309 			if (fs > ls)
310 				fs = ls;
311 
312 			xreq->xr_segs[i].fs = fs;
313 			xreq->xr_segs[i].ls = ls;
314 		}
315 
316 		/* map in io pages */
317 		err = xen_map_gref(GNTTABOP_map_grant_ref, mapops, i, B_FALSE);
318 		if (err != 0)
319 			return (NULL);
320 		for (i = 0; i < segs; i++) {
321 			/*
322 			 * Although HYPERVISOR_grant_table_op() returned no
323 			 * error, mapping of each single page can fail. So,
324 			 * we have to do the check here and handle the error
325 			 * if needed
326 			 */
327 			if (mapops[i].status != GNTST_okay) {
328 				int j;
329 				for (j = 0; j < i; j++) {
330 #ifdef DEBUG
331 					unlogva(vdp, mapops[j].host_addr);
332 #endif
333 					xen_release_pfn(
334 					    xreq->xr_plist[j].p_pagenum);
335 				}
336 				pagemapok = B_FALSE;
337 				break;
338 			}
339 			/* record page mapping handle for unmapping later */
340 			xreq->xr_page_hdls[i] = mapops[i].handle;
341 #ifdef DEBUG
342 			logva(vdp, mapops[i].host_addr);
343 #endif
344 			/*
345 			 * Pass the MFNs down using the shadow list (xr_pplist)
346 			 *
347 			 * This is pretty ugly since we have implict knowledge
348 			 * of how the rootnex binds buffers.
349 			 * The GNTTABOP_map_grant_ref op makes us do some ugly
350 			 * stuff since we're not allowed to touch these PTEs
351 			 * from the VM.
352 			 *
353 			 * Obviously, these aren't real page_t's. The rootnex
354 			 * only needs p_pagenum.
355 			 * Also, don't use btop() here or 32 bit PAE breaks.
356 			 */
357 			xreq->xr_pplist[i] = &xreq->xr_plist[i];
358 			xreq->xr_plist[i].p_pagenum =
359 			    xen_assign_pfn(mapops[i].dev_bus_addr >> PAGESHIFT);
360 		}
361 
362 		/*
363 		 * not all pages mapped in successfully, unmap those mapped-in
364 		 * page and return failure
365 		 */
366 		if (!pagemapok) {
367 			gnttab_unmap_grant_ref_t unmapop;
368 
369 			for (i = 0; i < segs; i++) {
370 				if (mapops[i].status != GNTST_okay)
371 					continue;
372 				unmapop.host_addr =
373 				    (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
374 				    vdp->xs_iopage_va, xreq->xr_idx, i);
375 				unmapop.dev_bus_addr = NULL;
376 				unmapop.handle = mapops[i].handle;
377 				(void) HYPERVISOR_grant_table_op(
378 				    GNTTABOP_unmap_grant_ref, &unmapop, 1);
379 			}
380 
381 			return (NULL);
382 		}
383 		bioinit(bp);
384 		bp->b_lblkno = ddi_get64(acchdl, &req->sector_number);
385 		bp->b_flags = B_BUSY | B_SHADOW | B_PHYS;
386 		bp->b_flags |= (ddi_get8(acchdl, &req->operation) ==
387 		    BLKIF_OP_READ) ? B_READ : (B_WRITE | B_ASYNC);
388 	} else {
389 		uint64_t blkst;
390 		int isread;
391 
392 		/* reuse this buf */
393 		blkst = bp->b_lblkno + bp->b_bcount / DEV_BSIZE;
394 		isread = bp->b_flags & B_READ;
395 		bioreset(bp);
396 		bp->b_lblkno = blkst;
397 		bp->b_flags = B_BUSY | B_SHADOW | B_PHYS;
398 		bp->b_flags |= isread ? B_READ : (B_WRITE | B_ASYNC);
399 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "reuse buf, xreq is %d!!",
400 		    xreq->xr_idx));
401 	}
402 
403 	/* form a buf */
404 	bp->b_un.b_addr = XDB_IOPAGE_VA(vdp->xs_iopage_va, xreq->xr_idx,
405 	    curseg) + xreq->xr_segs[curseg].fs * DEV_BSIZE;
406 	bp->b_shadow = &xreq->xr_pplist[curseg];
407 	bp->b_iodone = xdb_biodone;
408 	sectors = 0;
409 
410 	/*
411 	 * Run through the segments. There are XB_NUM_SECTORS_PER_SEG sectors
412 	 * per segment. On some OSes (e.g. Linux), there may be empty gaps
413 	 * between segments. (i.e. the first segment may end on sector 6 and
414 	 * the second segment start on sector 4).
415 	 *
416 	 * if a segments first sector is not set to 0, and this is not the
417 	 * first segment in our buf, end this buf now.
418 	 *
419 	 * if a segments last sector is not set to XB_LAST_SECTOR_IN_SEG, and
420 	 * this is not the last segment in the request, add this segment into
421 	 * the buf, then end this buf (updating the pointer to point to the
422 	 * next segment next time around).
423 	 */
424 	for (i = curseg; i < xreq->xr_buf_pages; i++) {
425 		if ((xreq->xr_segs[i].fs != 0) && (i != curseg)) {
426 			break;
427 		}
428 		sectors += (xreq->xr_segs[i].ls - xreq->xr_segs[i].fs + 1);
429 		if ((xreq->xr_segs[i].ls != XB_LAST_SECTOR_IN_SEG) &&
430 		    (i != (xreq->xr_buf_pages - 1))) {
431 			i++;
432 			break;
433 		}
434 	}
435 	xreq->xr_curseg = i;
436 	bp->b_bcount = sectors * DEV_BSIZE;
437 	bp->b_bufsize = bp->b_bcount;
438 
439 	return (bp);
440 }
441 
442 static xdb_request_t *
443 xdb_get_req(xdb_t *vdp)
444 {
445 	xdb_request_t *req;
446 	int idx;
447 
448 	ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
449 	ASSERT(vdp->xs_free_req != -1);
450 	req = &vdp->xs_req[vdp->xs_free_req];
451 	vdp->xs_free_req = req->xr_next;
452 	idx = req->xr_idx;
453 	bzero(req, sizeof (xdb_request_t));
454 	req->xr_idx = idx;
455 	return (req);
456 }
457 
458 static void
459 xdb_free_req(xdb_request_t *req)
460 {
461 	xdb_t *vdp = req->xr_vdp;
462 
463 	ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
464 	req->xr_next = vdp->xs_free_req;
465 	vdp->xs_free_req = req->xr_idx;
466 }
467 
468 static void
469 xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok)
470 {
471 	ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
472 
473 	if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id),
474 	    ddi_get8(acchdl, &req->operation), ok))
475 		xvdi_notify_oe(vdp->xs_dip);
476 }
477 
478 static void
479 xdb_init_ioreqs(xdb_t *vdp)
480 {
481 	int i;
482 
483 	ASSERT(vdp->xs_nentry);
484 
485 	if (vdp->xs_req == NULL)
486 		vdp->xs_req = kmem_alloc(vdp->xs_nentry *
487 		    sizeof (xdb_request_t), KM_SLEEP);
488 #ifdef DEBUG
489 	if (vdp->page_addrs == NULL)
490 		vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) *
491 		    sizeof (uint64_t), KM_SLEEP);
492 #endif
493 	for (i = 0; i < vdp->xs_nentry; i++) {
494 		vdp->xs_req[i].xr_idx = i;
495 		vdp->xs_req[i].xr_next = i + 1;
496 	}
497 	vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1;
498 	vdp->xs_free_req = 0;
499 
500 	/* alloc va in host dom for io page mapping */
501 	vdp->xs_iopage_va = vmem_xalloc(heap_arena,
502 	    XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0,
503 	    VM_SLEEP);
504 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
505 		hat_prepare_mapping(kas.a_hat,
506 		    vdp->xs_iopage_va + i * PAGESIZE, NULL);
507 }
508 
509 static void
510 xdb_uninit_ioreqs(xdb_t *vdp)
511 {
512 	int i;
513 
514 	for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
515 		hat_release_mapping(kas.a_hat,
516 		    vdp->xs_iopage_va + i * PAGESIZE);
517 	vmem_xfree(heap_arena, vdp->xs_iopage_va,
518 	    XDB_MAX_IO_PAGES(vdp) * PAGESIZE);
519 	if (vdp->xs_req != NULL) {
520 		kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t));
521 		vdp->xs_req = NULL;
522 	}
523 #ifdef DEBUG
524 	if (vdp->page_addrs != NULL) {
525 		kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) *
526 		    sizeof (uint64_t));
527 		vdp->page_addrs = NULL;
528 	}
529 #endif
530 }
531 
532 static uint_t
533 xdb_intr(caddr_t arg)
534 {
535 	xdb_t		*vdp = (xdb_t *)arg;
536 	dev_info_t	*dip = vdp->xs_dip;
537 	blkif_request_t	req, *reqp = &req;
538 	xdb_request_t	*xreq;
539 	buf_t		*bp;
540 	uint8_t		op;
541 	int		ret = DDI_INTR_UNCLAIMED;
542 
543 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
544 	    "xdb@%s: I/O request received from dom %d",
545 	    ddi_get_name_addr(dip), vdp->xs_peer));
546 
547 	mutex_enter(&vdp->xs_iomutex);
548 
549 	/* shouldn't touch ring buffer if not in connected state */
550 	if (!vdp->xs_if_connected) {
551 		mutex_exit(&vdp->xs_iomutex);
552 		return (DDI_INTR_UNCLAIMED);
553 	}
554 	ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised);
555 
556 	/*
557 	 * We'll loop till there is no more request in the ring
558 	 * We won't stuck in this loop for ever since the size of ring buffer
559 	 * is limited, and frontend will stop pushing requests into it when
560 	 * the ring buffer is full
561 	 */
562 
563 	/* req_event will be increased in xvdi_ring_get_request() */
564 	while (xdb_get_request(vdp, reqp)) {
565 		ret = DDI_INTR_CLAIMED;
566 
567 		op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation);
568 		if (op == BLKIF_OP_READ			||
569 		    op == BLKIF_OP_WRITE		||
570 		    op == BLKIF_OP_WRITE_BARRIER	||
571 		    op == BLKIF_OP_FLUSH_DISKCACHE) {
572 #ifdef DEBUG
573 			xdb_dump_request_oe(reqp);
574 #endif
575 			xreq = xdb_get_req(vdp);
576 			ASSERT(xreq);
577 			switch (op) {
578 			case BLKIF_OP_READ:
579 				vdp->xs_stat_req_reads++;
580 				break;
581 			case BLKIF_OP_WRITE_BARRIER:
582 				vdp->xs_stat_req_barriers++;
583 				/* FALLTHRU */
584 			case BLKIF_OP_WRITE:
585 				vdp->xs_stat_req_writes++;
586 				break;
587 			case BLKIF_OP_FLUSH_DISKCACHE:
588 				vdp->xs_stat_req_flushes++;
589 				break;
590 			}
591 
592 			xreq->xr_curseg = 0; /* start from first segment */
593 			bp = xdb_get_buf(vdp, reqp, xreq);
594 			if (bp == NULL) {
595 				/* failed to form a buf */
596 				xdb_free_req(xreq);
597 				xdb_response(vdp, reqp, B_FALSE);
598 				continue;
599 			}
600 			bp->av_forw = NULL;
601 
602 			XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
603 			    " buf %p, blkno %lld, size %lu, addr %p",
604 			    (void *)bp, (longlong_t)bp->b_blkno,
605 			    (ulong_t)bp->b_bcount, (void *)bp->b_un.b_addr));
606 
607 			/* send bp to underlying blk driver */
608 			if (vdp->xs_f_iobuf == NULL) {
609 				vdp->xs_f_iobuf = vdp->xs_l_iobuf = bp;
610 			} else {
611 				vdp->xs_l_iobuf->av_forw = bp;
612 				vdp->xs_l_iobuf = bp;
613 			}
614 		} else {
615 			xdb_response(vdp, reqp, B_FALSE);
616 			XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
617 			    "Unsupported cmd received from dom %d",
618 			    ddi_get_name_addr(dip), vdp->xs_peer));
619 		}
620 	}
621 	/* notify our taskq to push buf to underlying blk driver */
622 	if (ret == DDI_INTR_CLAIMED)
623 		cv_broadcast(&vdp->xs_iocv);
624 
625 	mutex_exit(&vdp->xs_iomutex);
626 
627 	return (ret);
628 }
629 
630 static int
631 xdb_biodone(buf_t *bp)
632 {
633 	int i, err, bioerr;
634 	uint8_t segs;
635 	gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
636 	xdb_request_t *xreq = XDB_BP2XREQ(bp);
637 	xdb_t *vdp = xreq->xr_vdp;
638 	buf_t *nbp;
639 
640 	bioerr = geterror(bp);
641 	if (bioerr)
642 		XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: I/O error %d",
643 		    ddi_get_name_addr(vdp->xs_dip), bioerr));
644 
645 	/* check if we are done w/ this I/O request */
646 	if ((bioerr == 0) && (xreq->xr_curseg < xreq->xr_buf_pages)) {
647 		nbp = xdb_get_buf(vdp, NULL, xreq);
648 		if (nbp) {
649 			err = ldi_strategy(vdp->xs_ldi_hdl, nbp);
650 			if (err == 0) {
651 				XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
652 				    "sent buf to backend ok"));
653 				return (DDI_SUCCESS);
654 			}
655 			bioerr = EIO;
656 			XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
657 			    "sent buf to backend dev failed, err=%d",
658 			    ddi_get_name_addr(vdp->xs_dip), err));
659 		} else {
660 			bioerr = EIO;
661 		}
662 	}
663 
664 	/* unmap io pages */
665 	segs = xreq->xr_buf_pages;
666 	/*
667 	 * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
668 	 * according to the definition of blk interface by Xen
669 	 */
670 	ASSERT(segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
671 	for (i = 0; i < segs; i++) {
672 		unmapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
673 		    vdp->xs_iopage_va, xreq->xr_idx, i);
674 #ifdef DEBUG
675 		mutex_enter(&vdp->xs_iomutex);
676 		unlogva(vdp, unmapops[i].host_addr);
677 		mutex_exit(&vdp->xs_iomutex);
678 #endif
679 		unmapops[i].dev_bus_addr = NULL;
680 		unmapops[i].handle = xreq->xr_page_hdls[i];
681 	}
682 	err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
683 	    unmapops, segs);
684 	ASSERT(!err);
685 
686 	/*
687 	 * If we have reached a barrier write or a cache flush , then we must
688 	 * flush all our I/Os.
689 	 */
690 	if (xreq->xr_op == BLKIF_OP_WRITE_BARRIER ||
691 	    xreq->xr_op == BLKIF_OP_FLUSH_DISKCACHE) {
692 		/*
693 		 * XXX At this point the write did succeed, so I don't
694 		 * believe we should report an error because the flush
695 		 * failed. However, this is a debatable point, so
696 		 * maybe we need to think more carefully about this.
697 		 * For now, just cast to void.
698 		 */
699 		(void) ldi_ioctl(vdp->xs_ldi_hdl,
700 		    DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, kcred, NULL);
701 	}
702 
703 	mutex_enter(&vdp->xs_iomutex);
704 
705 	/* send response back to frontend */
706 	if (vdp->xs_if_connected) {
707 		ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised);
708 		if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr))
709 			xvdi_notify_oe(vdp->xs_dip);
710 		XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
711 		    "sent resp back to frontend, id=%llu",
712 		    (unsigned long long)xreq->xr_id));
713 	}
714 	/* free io resources */
715 	biofini(bp);
716 	xdb_free_req(xreq);
717 
718 	vdp->xs_ionum--;
719 	if (!vdp->xs_if_connected && (vdp->xs_ionum == 0)) {
720 		/* we're closing, someone is waiting for I/O clean-up */
721 		cv_signal(&vdp->xs_ionumcv);
722 	}
723 
724 	mutex_exit(&vdp->xs_iomutex);
725 
726 	return (DDI_SUCCESS);
727 }
728 
729 static int
730 xdb_bindto_frontend(xdb_t *vdp)
731 {
732 	int err;
733 	char *oename;
734 	grant_ref_t gref;
735 	evtchn_port_t evtchn;
736 	dev_info_t *dip = vdp->xs_dip;
737 	char protocol[64] = "";
738 
739 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
740 
741 	/*
742 	 * Switch to the XenbusStateInitialised state.  This let's the
743 	 * frontend know that we're about to negotiate a connection.
744 	 */
745 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised);
746 
747 	/*
748 	 * Gather info from frontend
749 	 */
750 	oename = xvdi_get_oename(dip);
751 	if (oename == NULL)
752 		return (DDI_FAILURE);
753 
754 	err = xenbus_gather(XBT_NULL, oename,
755 	    XBP_RING_REF, "%lu", &gref,
756 	    XBP_EVENT_CHAN, "%u", &evtchn,
757 	    NULL);
758 	if (err != 0) {
759 		xvdi_dev_error(dip, err,
760 		    "Getting ring-ref and evtchn from frontend");
761 		return (DDI_FAILURE);
762 	}
763 
764 	vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE;
765 	vdp->xs_nentry = BLKIF_RING_SIZE;
766 	vdp->xs_entrysize = sizeof (union blkif_sring_entry);
767 
768 	err = xenbus_gather(XBT_NULL, oename,
769 	    XBP_PROTOCOL, "%63s", protocol, NULL);
770 	if (err)
771 		(void) strcpy(protocol, "unspecified, assuming native");
772 	else {
773 		/*
774 		 * We must check for NATIVE first, so that the fast path
775 		 * is taken for copying data from the guest to the host.
776 		 */
777 		if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) {
778 			if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
779 				vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32;
780 				vdp->xs_nentry = BLKIF_X86_32_RING_SIZE;
781 				vdp->xs_entrysize =
782 				    sizeof (union blkif_x86_32_sring_entry);
783 			} else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) ==
784 			    0) {
785 				vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64;
786 				vdp->xs_nentry = BLKIF_X86_64_RING_SIZE;
787 				vdp->xs_entrysize =
788 				    sizeof (union blkif_x86_64_sring_entry);
789 			} else {
790 				xvdi_fatal_error(dip, err, "unknown protocol");
791 				return (DDI_FAILURE);
792 			}
793 		}
794 	}
795 #ifdef DEBUG
796 	cmn_err(CE_NOTE, "!xdb@%s: blkif protocol '%s' ",
797 	    ddi_get_name_addr(dip), protocol);
798 #endif
799 
800 	/*
801 	 * Map and init ring.  The ring parameters must match those which
802 	 * have been allocated in the front end.
803 	 */
804 	if (xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize,
805 	    gref, &vdp->xs_ring) != DDI_SUCCESS)
806 		return (DDI_FAILURE);
807 
808 	/*
809 	 * This will be removed after we use shadow I/O ring request since
810 	 * we don't need to access the ring itself directly, thus the access
811 	 * handle is not needed
812 	 */
813 	vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl;
814 
815 	/* bind event channel */
816 	err = xvdi_bind_evtchn(dip, evtchn);
817 	if (err != DDI_SUCCESS) {
818 		xvdi_unmap_ring(vdp->xs_ring);
819 		return (DDI_FAILURE);
820 	}
821 
822 	return (DDI_SUCCESS);
823 }
824 
825 static void
826 xdb_unbindfrom_frontend(xdb_t *vdp)
827 {
828 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
829 
830 	xvdi_free_evtchn(vdp->xs_dip);
831 	xvdi_unmap_ring(vdp->xs_ring);
832 }
833 
834 /*
835  * xdb_params_change() initiates a allows change to the underlying device/file
836  * that the backend is accessing.  It does this by disconnecting from the
837  * frontend, closing the old device, clearing a bunch of xenbus parameters,
838  * and switching back to the XenbusStateInitialising state.  The frontend
839  * should notice this transition to the XenbusStateInitialising state and
840  * should attempt to reconnect to us (the backend).
841  */
842 static void
843 xdb_params_change(xdb_t *vdp, char *params, boolean_t update_xs)
844 {
845 	xenbus_transaction_t	xbt;
846 	dev_info_t		*dip = vdp->xs_dip;
847 	char			*xsname;
848 	int			err;
849 
850 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
851 	ASSERT(vdp->xs_params_path != NULL);
852 
853 	if ((xsname = xvdi_get_xsname(dip)) == NULL)
854 		return;
855 	if (strcmp(vdp->xs_params_path, params) == 0)
856 		return;
857 
858 	/*
859 	 * Close the device we're currently accessing and update the
860 	 * path which points to our backend device/file.
861 	 */
862 	xdb_close(dip);
863 	vdp->xs_fe_initialised = B_FALSE;
864 
865 trans_retry:
866 	if ((err = xenbus_transaction_start(&xbt)) != 0) {
867 		xvdi_dev_error(dip, err, "params change transaction init");
868 		goto errout;
869 	}
870 
871 	/*
872 	 * Delete all the xenbus properties that are connection dependant
873 	 * and go back to the initializing state so that the frontend
874 	 * driver can re-negotiate a connection.
875 	 */
876 	if (((err = xenbus_rm(xbt, xsname, XBP_FB)) != 0) ||
877 	    ((err = xenbus_rm(xbt, xsname, XBP_INFO)) != 0) ||
878 	    ((err = xenbus_rm(xbt, xsname, "sector-size")) != 0) ||
879 	    ((err = xenbus_rm(xbt, xsname, XBP_SECTORS)) != 0) ||
880 	    ((err = xenbus_rm(xbt, xsname, "instance")) != 0) ||
881 	    ((err = xenbus_rm(xbt, xsname, "node")) != 0) ||
882 	    (update_xs && ((err = xenbus_printf(xbt, xsname,
883 	    "params", "%s", params)) != 0)) ||
884 	    ((err = xvdi_switch_state(dip,
885 	    xbt, XenbusStateInitialising) > 0))) {
886 		(void) xenbus_transaction_end(xbt, 1);
887 		xvdi_dev_error(dip, err, "params change transaction setup");
888 		goto errout;
889 	}
890 
891 	if ((err = xenbus_transaction_end(xbt, 0)) != 0) {
892 		if (err == EAGAIN) {
893 			/* transaction is ended, don't need to abort it */
894 			goto trans_retry;
895 		}
896 		xvdi_dev_error(dip, err, "params change transaction commit");
897 		goto errout;
898 	}
899 
900 	/* Change the device that we plan to access */
901 	strfree(vdp->xs_params_path);
902 	vdp->xs_params_path = strdup(params);
903 	return;
904 
905 errout:
906 	(void) xvdi_switch_state(dip, xbt, XenbusStateInitialising);
907 }
908 
909 /*
910  * xdb_watch_params_cb() - This callback is invoked whenever there
911  * is an update to the following xenbus parameter:
912  *     /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
913  *
914  * This normally happens during xm block-configure operations, which
915  * are used to change CD device images for HVM domUs.
916  */
917 /*ARGSUSED*/
918 static void
919 xdb_watch_params_cb(dev_info_t *dip, const char *path, void *arg)
920 {
921 	xdb_t			*vdp = (xdb_t *)ddi_get_driver_private(dip);
922 	char			*xsname, *oename, *str, *str2;
923 
924 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
925 	    ((oename = xvdi_get_oename(dip)) == NULL)) {
926 		return;
927 	}
928 
929 	mutex_enter(&vdp->xs_cbmutex);
930 
931 	if (xenbus_read_str(xsname, "params", &str) != 0) {
932 		mutex_exit(&vdp->xs_cbmutex);
933 		return;
934 	}
935 
936 	if (strcmp(vdp->xs_params_path, str) == 0) {
937 		/* Nothing todo */
938 		mutex_exit(&vdp->xs_cbmutex);
939 		strfree(str);
940 		return;
941 	}
942 
943 	/*
944 	 * If the frontend isn't a cd device, doesn't support media
945 	 * requests, or has locked the media, then we can't change
946 	 * the params value.  restore the current value.
947 	 */
948 	str2 = NULL;
949 	if (!XDB_IS_FE_CD(vdp) ||
950 	    (xenbus_read_str(oename, XBP_MEDIA_REQ, &str2) != 0) ||
951 	    (strcmp(str2, XBV_MEDIA_REQ_LOCK) == 0)) {
952 		if (str2 != NULL)
953 			strfree(str2);
954 		strfree(str);
955 
956 		str = i_pathname(dip);
957 		cmn_err(CE_NOTE,
958 		    "!%s: media locked, ignoring params update", str);
959 		strfree(str);
960 
961 		mutex_exit(&vdp->xs_cbmutex);
962 		return;
963 	}
964 
965 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE,
966 	    "block-configure params request: \"%s\"", str));
967 
968 	xdb_params_change(vdp, str, B_FALSE);
969 	mutex_exit(&vdp->xs_cbmutex);
970 	strfree(str);
971 }
972 
973 /*
974  * xdb_watch_media_req_cb() - This callback is invoked whenever there
975  * is an update to the following xenbus parameter:
976  *     /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
977  *
978  * Media requests are only supported on CD devices and are issued by
979  * the frontend.  Currently the only supported media request operaions
980  * are "lock" and "eject".  A "lock" prevents the backend from changing
981  * the backing device/file (via xm block-configure).  An "eject" requests
982  * tells the backend device that it should disconnect from the frontend
983  * and closing the backing device/file that is currently in use.
984  */
985 /*ARGSUSED*/
986 static void
987 xdb_watch_media_req_cb(dev_info_t *dip, const char *path, void *arg)
988 {
989 	xdb_t			*vdp = (xdb_t *)ddi_get_driver_private(dip);
990 	char			*oename, *str;
991 
992 	mutex_enter(&vdp->xs_cbmutex);
993 
994 	if ((oename = xvdi_get_oename(dip)) == NULL) {
995 		mutex_exit(&vdp->xs_cbmutex);
996 		return;
997 	}
998 
999 	if (xenbus_read_str(oename, XBP_MEDIA_REQ, &str) != 0) {
1000 		mutex_exit(&vdp->xs_cbmutex);
1001 		return;
1002 	}
1003 
1004 	if (!XDB_IS_FE_CD(vdp)) {
1005 		xvdi_dev_error(dip, EINVAL,
1006 		    "media-req only supported for cdrom devices");
1007 		mutex_exit(&vdp->xs_cbmutex);
1008 		return;
1009 	}
1010 
1011 	if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
1012 		mutex_exit(&vdp->xs_cbmutex);
1013 		strfree(str);
1014 		return;
1015 	}
1016 	strfree(str);
1017 
1018 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "media eject request"));
1019 
1020 	xdb_params_change(vdp, "", B_TRUE);
1021 	(void) xenbus_printf(XBT_NULL, oename,
1022 	    XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE);
1023 	mutex_exit(&vdp->xs_cbmutex);
1024 }
1025 
1026 /*
1027  * If we're dealing with a cdrom device, let the frontend know that
1028  * we support media requests via XBP_MEDIA_REQ_SUP, and setup a watch
1029  * to handle those frontend media request changes, which modify the
1030  * following xenstore parameter:
1031  *	/local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
1032  */
1033 static boolean_t
1034 xdb_media_req_init(xdb_t *vdp)
1035 {
1036 	dev_info_t		*dip = vdp->xs_dip;
1037 	char			*xsname, *oename;
1038 
1039 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1040 
1041 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1042 	    ((oename = xvdi_get_oename(dip)) == NULL))
1043 		return (B_FALSE);
1044 
1045 	if (!XDB_IS_FE_CD(vdp))
1046 		return (B_TRUE);
1047 
1048 	if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ_SUP, "%d", 1) != 0)
1049 		return (B_FALSE);
1050 
1051 	if (xvdi_add_xb_watch_handler(dip, oename,
1052 	    XBP_MEDIA_REQ, xdb_watch_media_req_cb, NULL) != DDI_SUCCESS) {
1053 		xvdi_dev_error(dip, EAGAIN,
1054 		    "Failed to register watch for cdrom media requests");
1055 		return (B_FALSE);
1056 	}
1057 
1058 	return (B_TRUE);
1059 }
1060 
1061 /*
1062  * Get our params value.  Also, if we're using "params" then setup a
1063  * watch to handle xm block-configure operations which modify the
1064  * following xenstore parameter:
1065  *	/local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
1066  */
1067 static boolean_t
1068 xdb_params_init(xdb_t *vdp)
1069 {
1070 	dev_info_t		*dip = vdp->xs_dip;
1071 	char			*str, *xsname;
1072 	int			err, watch_params = B_FALSE;
1073 
1074 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1075 	ASSERT(vdp->xs_params_path == NULL);
1076 
1077 	if ((xsname = xvdi_get_xsname(dip)) == NULL)
1078 		return (B_FALSE);
1079 
1080 	if ((err = xenbus_read_str(xsname,
1081 	    "dynamic-device-path", &str)) == ENOENT) {
1082 		err = xenbus_read_str(xsname, "params", &str);
1083 		watch_params = B_TRUE;
1084 	}
1085 	if (err != 0)
1086 		return (B_FALSE);
1087 	vdp->xs_params_path = str;
1088 
1089 	/*
1090 	 * If we got our backing store path from "dynamic-device-path" then
1091 	 * there's no reason to watch "params"
1092 	 */
1093 	if (!watch_params)
1094 		return (B_TRUE);
1095 
1096 	if (xvdi_add_xb_watch_handler(dip, xsname, "params",
1097 	    xdb_watch_params_cb, NULL) != DDI_SUCCESS) {
1098 		strfree(vdp->xs_params_path);
1099 		vdp->xs_params_path = NULL;
1100 		return (B_FALSE);
1101 	}
1102 
1103 	return (B_TRUE);
1104 }
1105 
1106 #define	LOFI_CTRL_NODE	"/dev/lofictl"
1107 #define	LOFI_DEV_NODE	"/devices/pseudo/lofi@0:"
1108 #define	LOFI_MODE	(FREAD | FWRITE | FEXCL)
1109 
1110 static int
1111 xdb_setup_node(xdb_t *vdp, char *path)
1112 {
1113 	dev_info_t		*dip = vdp->xs_dip;
1114 	char			*xsname, *str;
1115 	ldi_handle_t		ldi_hdl;
1116 	struct lofi_ioctl	*li;
1117 	int			minor, err;
1118 
1119 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1120 
1121 	if ((xsname = xvdi_get_xsname(dip)) == NULL)
1122 		return (DDI_FAILURE);
1123 
1124 	if ((err = xenbus_read_str(xsname, "type", &str)) != 0) {
1125 		xvdi_dev_error(dip, err, "Getting type from backend device");
1126 		return (DDI_FAILURE);
1127 	}
1128 	if (strcmp(str, "file") == 0)
1129 		vdp->xs_type |= XDB_DEV_BE_LOFI;
1130 	strfree(str);
1131 
1132 	if (!XDB_IS_BE_LOFI(vdp)) {
1133 		(void) strlcpy(path, vdp->xs_params_path, MAXPATHLEN);
1134 		ASSERT(vdp->xs_lofi_path == NULL);
1135 		return (DDI_SUCCESS);
1136 	}
1137 
1138 	do {
1139 		err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
1140 		    &ldi_hdl, vdp->xs_ldi_li);
1141 	} while (err == EBUSY);
1142 	if (err != 0) {
1143 		return (DDI_FAILURE);
1144 	}
1145 
1146 	li = kmem_zalloc(sizeof (*li), KM_SLEEP);
1147 	(void) strlcpy(li->li_filename, vdp->xs_params_path,
1148 	    sizeof (li->li_filename));
1149 	err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
1150 	    LOFI_MODE | FKIOCTL, kcred, &minor);
1151 	(void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
1152 	kmem_free(li, sizeof (*li));
1153 
1154 	if (err != 0) {
1155 		cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s",
1156 		    ddi_get_name_addr(dip), vdp->xs_params_path);
1157 		return (DDI_FAILURE);
1158 	}
1159 
1160 	/*
1161 	 * return '/devices/...' instead of '/dev/lofi/...' since the
1162 	 * former is available immediately after calling ldi_ioctl
1163 	 */
1164 	(void) snprintf(path, MAXPATHLEN, LOFI_DEV_NODE "%d", minor);
1165 	(void) xenbus_printf(XBT_NULL, xsname, "node", "%s", path);
1166 
1167 	ASSERT(vdp->xs_lofi_path == NULL);
1168 	vdp->xs_lofi_path = strdup(path);
1169 
1170 	return (DDI_SUCCESS);
1171 }
1172 
1173 static void
1174 xdb_teardown_node(xdb_t *vdp)
1175 {
1176 	dev_info_t *dip = vdp->xs_dip;
1177 	ldi_handle_t ldi_hdl;
1178 	struct lofi_ioctl *li;
1179 	int err;
1180 
1181 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1182 
1183 	if (!XDB_IS_BE_LOFI(vdp))
1184 		return;
1185 
1186 	vdp->xs_type &= ~XDB_DEV_BE_LOFI;
1187 	ASSERT(vdp->xs_lofi_path != NULL);
1188 
1189 	li = kmem_zalloc(sizeof (*li), KM_SLEEP);
1190 	(void) strlcpy(li->li_filename, vdp->xs_params_path,
1191 	    sizeof (li->li_filename));
1192 
1193 	do {
1194 		err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
1195 		    &ldi_hdl, vdp->xs_ldi_li);
1196 	} while (err == EBUSY);
1197 
1198 	if (err != 0) {
1199 		kmem_free(li, sizeof (*li));
1200 		return;
1201 	}
1202 
1203 	if (ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE, (intptr_t)li,
1204 	    LOFI_MODE | FKIOCTL, kcred, NULL) != 0) {
1205 		cmn_err(CE_WARN, "xdb@%s: Failed to delete lofi dev for %s",
1206 		    ddi_get_name_addr(dip), li->li_filename);
1207 	}
1208 
1209 	(void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
1210 	kmem_free(li, sizeof (*li));
1211 
1212 	strfree(vdp->xs_lofi_path);
1213 	vdp->xs_lofi_path = NULL;
1214 }
1215 
1216 static int
1217 xdb_open_device(xdb_t *vdp)
1218 {
1219 	dev_info_t *dip = vdp->xs_dip;
1220 	uint64_t devsize;
1221 	int blksize;
1222 	char *nodepath;
1223 
1224 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1225 
1226 	if (strlen(vdp->xs_params_path) == 0) {
1227 		/*
1228 		 * it's possible to have no backing device when dealing
1229 		 * with a pv cdrom drive that has no virtual cd associated
1230 		 * with it.
1231 		 */
1232 		ASSERT(XDB_IS_FE_CD(vdp));
1233 		ASSERT(vdp->xs_sectors == 0);
1234 		ASSERT(vdp->xs_ldi_li == NULL);
1235 		ASSERT(vdp->xs_ldi_hdl == NULL);
1236 		return (DDI_SUCCESS);
1237 	}
1238 
1239 	if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0)
1240 		return (DDI_FAILURE);
1241 
1242 	nodepath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1243 
1244 	/* try to open backend device */
1245 	if (xdb_setup_node(vdp, nodepath) != DDI_SUCCESS) {
1246 		xvdi_dev_error(dip, ENXIO,
1247 		    "Getting device path of backend device");
1248 		ldi_ident_release(vdp->xs_ldi_li);
1249 		kmem_free(nodepath, MAXPATHLEN);
1250 		return (DDI_FAILURE);
1251 	}
1252 
1253 	if (ldi_open_by_name(nodepath,
1254 	    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE),
1255 	    kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) {
1256 		xdb_teardown_node(vdp);
1257 		ldi_ident_release(vdp->xs_ldi_li);
1258 		cmn_err(CE_WARN, "xdb@%s: Failed to open: %s",
1259 		    ddi_get_name_addr(dip), nodepath);
1260 		kmem_free(nodepath, MAXPATHLEN);
1261 		return (DDI_FAILURE);
1262 	}
1263 
1264 	if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) {
1265 		(void) ldi_close(vdp->xs_ldi_hdl,
1266 		    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
1267 		xdb_teardown_node(vdp);
1268 		ldi_ident_release(vdp->xs_ldi_li);
1269 		kmem_free(nodepath, MAXPATHLEN);
1270 		return (DDI_FAILURE);
1271 	}
1272 
1273 	blksize = ldi_prop_get_int64(vdp->xs_ldi_hdl,
1274 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
1275 	    "blksize", DEV_BSIZE);
1276 	if (blksize == DEV_BSIZE)
1277 		blksize = ldi_prop_get_int(vdp->xs_ldi_hdl,
1278 		    LDI_DEV_T_ANY | DDI_PROP_DONTPASS |
1279 		    DDI_PROP_NOTPROM, "device-blksize", DEV_BSIZE);
1280 
1281 	vdp->xs_sec_size = blksize;
1282 	vdp->xs_sectors = devsize / blksize;
1283 
1284 	/* check if the underlying device is a CD/DVD disc */
1285 	if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS,
1286 	    INQUIRY_DEVICE_TYPE, DTYPE_DIRECT) == DTYPE_RODIRECT)
1287 		vdp->xs_type |= XDB_DEV_BE_CD;
1288 
1289 	/* check if the underlying device is a removable disk */
1290 	if (ldi_prop_exists(vdp->xs_ldi_hdl,
1291 	    LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
1292 	    "removable-media"))
1293 		vdp->xs_type |= XDB_DEV_BE_RMB;
1294 
1295 	kmem_free(nodepath, MAXPATHLEN);
1296 	return (DDI_SUCCESS);
1297 }
1298 
1299 static void
1300 xdb_close_device(xdb_t *vdp)
1301 {
1302 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1303 
1304 	if (strlen(vdp->xs_params_path) == 0) {
1305 		ASSERT(XDB_IS_FE_CD(vdp));
1306 		ASSERT(vdp->xs_sectors == 0);
1307 		ASSERT(vdp->xs_ldi_li == NULL);
1308 		ASSERT(vdp->xs_ldi_hdl == NULL);
1309 		return;
1310 	}
1311 
1312 	(void) ldi_close(vdp->xs_ldi_hdl,
1313 	    FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
1314 	xdb_teardown_node(vdp);
1315 	ldi_ident_release(vdp->xs_ldi_li);
1316 	vdp->xs_type &= ~(XDB_DEV_BE_CD | XDB_DEV_BE_RMB);
1317 	vdp->xs_sectors = 0;
1318 	vdp->xs_ldi_li = NULL;
1319 	vdp->xs_ldi_hdl = NULL;
1320 }
1321 
1322 /*
1323  * Kick-off connect process
1324  * If xs_fe_initialised == B_TRUE and xs_hp_connected == B_TRUE
1325  * the xs_if_connected will be changed to B_TRUE on success,
1326  */
1327 static void
1328 xdb_start_connect(xdb_t *vdp)
1329 {
1330 	xenbus_transaction_t	xbt;
1331 	dev_info_t		*dip = vdp->xs_dip;
1332 	boolean_t		fb_exists;
1333 	int			err, instance = ddi_get_instance(dip);
1334 	uint64_t		sectors;
1335 	uint_t			dinfo, ssize;
1336 	char			*xsname;
1337 
1338 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1339 
1340 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1341 	    ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1))
1342 		return;
1343 
1344 	mutex_enter(&vdp->xs_iomutex);
1345 	/*
1346 	 * if the hotplug scripts haven't run or if the frontend is not
1347 	 * initialized, then we can't try to connect.
1348 	 */
1349 	if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) {
1350 		ASSERT(!vdp->xs_if_connected);
1351 		mutex_exit(&vdp->xs_iomutex);
1352 		return;
1353 	}
1354 
1355 	/* If we're already connected then there's nothing todo */
1356 	if (vdp->xs_if_connected) {
1357 		mutex_exit(&vdp->xs_iomutex);
1358 		return;
1359 	}
1360 	mutex_exit(&vdp->xs_iomutex);
1361 
1362 	/*
1363 	 * Start connect to frontend only when backend device are ready
1364 	 * and frontend has moved to XenbusStateInitialised, which means
1365 	 * ready to connect.
1366 	 */
1367 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE,
1368 	    "xdb@%s: starting connection process", ddi_get_name_addr(dip)));
1369 
1370 	if (xdb_open_device(vdp) != DDI_SUCCESS)
1371 		return;
1372 
1373 	if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) {
1374 		xdb_close_device(vdp);
1375 		return;
1376 	}
1377 
1378 	/* init i/o requests */
1379 	xdb_init_ioreqs(vdp);
1380 
1381 	if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp)
1382 	    != DDI_SUCCESS) {
1383 		xdb_uninit_ioreqs(vdp);
1384 		xdb_unbindfrom_frontend(vdp);
1385 		xdb_close_device(vdp);
1386 		return;
1387 	}
1388 
1389 	dinfo = 0;
1390 	if (XDB_IS_RO(vdp))
1391 		dinfo |= VDISK_READONLY;
1392 	if (XDB_IS_BE_RMB(vdp))
1393 		dinfo |= VDISK_REMOVABLE;
1394 	if (XDB_IS_BE_CD(vdp))
1395 		dinfo |= VDISK_CDROM;
1396 	if (XDB_IS_FE_CD(vdp))
1397 		dinfo |= VDISK_REMOVABLE | VDISK_CDROM;
1398 
1399 	/*
1400 	 * we can recieve intr any time from now on
1401 	 * mark that we're ready to take intr
1402 	 */
1403 	mutex_enter(&vdp->xs_iomutex);
1404 	ASSERT(vdp->xs_fe_initialised);
1405 	vdp->xs_if_connected = B_TRUE;
1406 	mutex_exit(&vdp->xs_iomutex);
1407 
1408 trans_retry:
1409 	/* write into xenstore the info needed by frontend */
1410 	if ((err = xenbus_transaction_start(&xbt)) != 0) {
1411 		xvdi_dev_error(dip, err, "connect transaction init");
1412 		goto errout;
1413 	}
1414 
1415 	/* If feature-barrier isn't present in xenstore, add it.  */
1416 	fb_exists = xenbus_exists(xsname, XBP_FB);
1417 
1418 	ssize = (vdp->xs_sec_size == 0) ? DEV_BSIZE : vdp->xs_sec_size;
1419 	sectors = vdp->xs_sectors;
1420 	if (((!fb_exists &&
1421 	    (err = xenbus_printf(xbt, xsname, XBP_FB, "%d", 1)))) ||
1422 	    (err = xenbus_printf(xbt, xsname, XBP_INFO, "%u", dinfo)) ||
1423 	    (err = xenbus_printf(xbt, xsname, XBP_SECTOR_SIZE, "%u", ssize)) ||
1424 	    (err = xenbus_printf(xbt, xsname,
1425 	    XBP_SECTORS, "%"PRIu64, sectors)) ||
1426 	    (err = xenbus_printf(xbt, xsname, "instance", "%d", instance)) ||
1427 	    ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0)) {
1428 		(void) xenbus_transaction_end(xbt, 1);
1429 		xvdi_dev_error(dip, err, "connect transaction setup");
1430 		goto errout;
1431 	}
1432 
1433 	if ((err = xenbus_transaction_end(xbt, 0)) != 0) {
1434 		if (err == EAGAIN) {
1435 			/* transaction is ended, don't need to abort it */
1436 			goto trans_retry;
1437 		}
1438 		xvdi_dev_error(dip, err, "connect transaction commit");
1439 		goto errout;
1440 	}
1441 
1442 	return;
1443 
1444 errout:
1445 	xdb_close(dip);
1446 }
1447 
1448 /*
1449  * Disconnect from frontend and close backend device
1450  */
1451 static void
1452 xdb_close(dev_info_t *dip)
1453 {
1454 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1455 
1456 	ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1457 	mutex_enter(&vdp->xs_iomutex);
1458 
1459 	/*
1460 	 * if the hotplug scripts haven't run or if the frontend is not
1461 	 * initialized, then we can't be connected, so there's no
1462 	 * connection to close.
1463 	 */
1464 	if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) {
1465 		ASSERT(!vdp->xs_if_connected);
1466 		mutex_exit(&vdp->xs_iomutex);
1467 		return;
1468 	}
1469 
1470 	/* if we're not connected, there's nothing to do */
1471 	if (!vdp->xs_if_connected) {
1472 		cv_broadcast(&vdp->xs_iocv);
1473 		mutex_exit(&vdp->xs_iomutex);
1474 		return;
1475 	}
1476 
1477 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "closing while connected"));
1478 
1479 	vdp->xs_if_connected = B_FALSE;
1480 	cv_broadcast(&vdp->xs_iocv);
1481 
1482 	mutex_exit(&vdp->xs_iomutex);
1483 
1484 	/* stop accepting I/O request from frontend */
1485 	ddi_remove_intr(dip, 0, NULL);
1486 
1487 	/* clear all on-going I/Os, if any */
1488 	mutex_enter(&vdp->xs_iomutex);
1489 	while (vdp->xs_ionum > 0)
1490 		cv_wait(&vdp->xs_ionumcv, &vdp->xs_iomutex);
1491 	mutex_exit(&vdp->xs_iomutex);
1492 
1493 	/* clean up resources and close this interface */
1494 	xdb_uninit_ioreqs(vdp);
1495 	xdb_unbindfrom_frontend(vdp);
1496 	xdb_close_device(vdp);
1497 	vdp->xs_peer = (domid_t)-1;
1498 }
1499 
1500 static void
1501 xdb_send_buf(void *arg)
1502 {
1503 	xdb_t	*vdp = (xdb_t *)arg;
1504 	buf_t	*bp;
1505 	int	err;
1506 
1507 	mutex_enter(&vdp->xs_iomutex);
1508 	while (vdp->xs_send_buf) {
1509 		if ((bp = vdp->xs_f_iobuf) == NULL) {
1510 			/* wait for some io to send */
1511 			XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
1512 			    "send buf waiting for io"));
1513 			cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex);
1514 			continue;
1515 		}
1516 
1517 		vdp->xs_f_iobuf = bp->av_forw;
1518 		bp->av_forw = NULL;
1519 		vdp->xs_ionum++;
1520 
1521 		mutex_exit(&vdp->xs_iomutex);
1522 		if (bp->b_bcount == 0) {
1523 			/* no I/O needs to be done */
1524 			(void) xdb_biodone(bp);
1525 			mutex_enter(&vdp->xs_iomutex);
1526 			continue;
1527 		}
1528 
1529 		err = EIO;
1530 		if (vdp->xs_ldi_hdl != NULL)
1531 			err = ldi_strategy(vdp->xs_ldi_hdl, bp);
1532 		if (err != 0) {
1533 			bp->b_flags |= B_ERROR;
1534 			(void) xdb_biodone(bp);
1535 			XDB_DBPRINT(XDB_DBG_IO, (CE_WARN,
1536 			    "xdb@%s: sent buf to backend devfailed, err=%d",
1537 			    ddi_get_name_addr(vdp->xs_dip), err));
1538 		} else {
1539 			XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
1540 			    "sent buf to backend ok"));
1541 		}
1542 		mutex_enter(&vdp->xs_iomutex);
1543 	}
1544 	XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "send buf finishing"));
1545 	mutex_exit(&vdp->xs_iomutex);
1546 }
1547 
1548 /*ARGSUSED*/
1549 static void
1550 xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
1551     void *impl_data)
1552 {
1553 	xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
1554 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1555 
1556 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
1557 	    "hotplug status change to %d!", ddi_get_name_addr(dip), state));
1558 
1559 	if (state != Connected)
1560 		return;
1561 
1562 	mutex_enter(&vdp->xs_cbmutex);
1563 
1564 	/* If hotplug script have already run, there's nothing todo */
1565 	if (vdp->xs_hp_connected) {
1566 		mutex_exit(&vdp->xs_cbmutex);
1567 		return;
1568 	}
1569 
1570 	vdp->xs_hp_connected = B_TRUE;
1571 	xdb_start_connect(vdp);
1572 	mutex_exit(&vdp->xs_cbmutex);
1573 }
1574 
1575 /*ARGSUSED*/
1576 static void
1577 xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
1578     void *impl_data)
1579 {
1580 	XenbusState new_state = *(XenbusState *)impl_data;
1581 	xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1582 
1583 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
1584 	    "otherend state change to %d!", ddi_get_name_addr(dip), new_state));
1585 
1586 	mutex_enter(&vdp->xs_cbmutex);
1587 
1588 	/*
1589 	 * Now it'd really be nice if there was a well defined state
1590 	 * transition model for xen frontend drivers, but unfortunatly
1591 	 * there isn't.  So we're stuck with assuming that all state
1592 	 * transitions are possible, and we'll just have to deal with
1593 	 * them regardless of what state we're in.
1594 	 */
1595 	switch (new_state) {
1596 	case XenbusStateUnknown:
1597 	case XenbusStateInitialising:
1598 	case XenbusStateInitWait:
1599 		/* tear down our connection to the frontend */
1600 		xdb_close(dip);
1601 		vdp->xs_fe_initialised = B_FALSE;
1602 		break;
1603 
1604 	case XenbusStateInitialised:
1605 		/*
1606 		 * If we were conected, then we need to drop the connection
1607 		 * and re-negotiate it.
1608 		 */
1609 		xdb_close(dip);
1610 		vdp->xs_fe_initialised = B_TRUE;
1611 		xdb_start_connect(vdp);
1612 		break;
1613 
1614 	case XenbusStateConnected:
1615 		/* nothing todo here other than congratulate the frontend */
1616 		break;
1617 
1618 	case XenbusStateClosing:
1619 		/* monkey see monkey do */
1620 		(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
1621 		break;
1622 
1623 	case XenbusStateClosed:
1624 		/* tear down our connection to the frontend */
1625 		xdb_close(dip);
1626 		vdp->xs_fe_initialised = B_FALSE;
1627 		(void) xvdi_switch_state(dip, XBT_NULL, new_state);
1628 		break;
1629 	}
1630 
1631 	mutex_exit(&vdp->xs_cbmutex);
1632 }
1633 
1634 static int
1635 xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1636 {
1637 	ddi_iblock_cookie_t	ibc;
1638 	xdb_t			*vdp;
1639 	int			instance = ddi_get_instance(dip);
1640 	char			*xsname, *oename;
1641 	char			*str;
1642 
1643 	switch (cmd) {
1644 	case DDI_RESUME:
1645 		return (DDI_FAILURE);
1646 	case DDI_ATTACH:
1647 		break;
1648 	default:
1649 		return (DDI_FAILURE);
1650 	}
1651 	/* DDI_ATTACH */
1652 
1653 	if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1654 	    ((oename = xvdi_get_oename(dip)) == NULL))
1655 		return (DDI_FAILURE);
1656 
1657 	/*
1658 	 * Disable auto-detach.  This is necessary so that we don't get
1659 	 * detached while we're disconnected from the front end.
1660 	 */
1661 	(void) ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1);
1662 
1663 	if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
1664 		return (DDI_FAILURE);
1665 
1666 	if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS)
1667 		return (DDI_FAILURE);
1668 
1669 	vdp = ddi_get_soft_state(xdb_statep, instance);
1670 	vdp->xs_dip = dip;
1671 	mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc);
1672 	mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc);
1673 	cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL);
1674 	cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL);
1675 	ddi_set_driver_private(dip, vdp);
1676 
1677 	if (!xdb_kstat_init(vdp))
1678 		goto errout1;
1679 
1680 	/* Check if the frontend device is supposed to be a cdrom */
1681 	if (xenbus_read_str(oename, XBP_DEV_TYPE, &str) != 0)
1682 		return (DDI_FAILURE);
1683 	if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
1684 		vdp->xs_type |= XDB_DEV_FE_CD;
1685 	strfree(str);
1686 
1687 	/* Check if the frontend device is supposed to be read only */
1688 	if (xenbus_read_str(xsname, "mode", &str) != 0)
1689 		return (DDI_FAILURE);
1690 	if ((strcmp(str, "r") == NULL) || (strcmp(str, "ro") == NULL))
1691 		vdp->xs_type |= XDB_DEV_RO;
1692 	strfree(str);
1693 
1694 	mutex_enter(&vdp->xs_cbmutex);
1695 	if (!xdb_media_req_init(vdp) || !xdb_params_init(vdp)) {
1696 		xvdi_remove_xb_watch_handlers(dip);
1697 		mutex_exit(&vdp->xs_cbmutex);
1698 		goto errout2;
1699 	}
1700 	mutex_exit(&vdp->xs_cbmutex);
1701 
1702 	vdp->xs_send_buf = B_TRUE;
1703 	vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1,
1704 	    TASKQ_DEFAULTPRI, 0);
1705 	(void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp,
1706 	    DDI_SLEEP);
1707 
1708 	/* Watch frontend and hotplug state change */
1709 	if ((xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change,
1710 	    NULL) != DDI_SUCCESS) ||
1711 	    (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change,
1712 	    NULL) != DDI_SUCCESS))
1713 		goto errout3;
1714 
1715 	/*
1716 	 * Kick-off hotplug script
1717 	 */
1718 	if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) {
1719 		cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script",
1720 		    ddi_get_name_addr(dip));
1721 		goto errout3;
1722 	}
1723 
1724 	/*
1725 	 * start waiting for hotplug event and otherend state event
1726 	 * mainly for debugging, frontend will not take any op seeing this
1727 	 */
1728 	(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
1729 
1730 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: attached!",
1731 	    ddi_get_name_addr(dip)));
1732 	return (DDI_SUCCESS);
1733 
1734 errout3:
1735 	ASSERT(vdp->xs_hp_connected && vdp->xs_if_connected);
1736 
1737 	xvdi_remove_event_handler(dip, NULL);
1738 
1739 	/* Disconnect from the backend */
1740 	mutex_enter(&vdp->xs_cbmutex);
1741 	mutex_enter(&vdp->xs_iomutex);
1742 	vdp->xs_send_buf = B_FALSE;
1743 	cv_broadcast(&vdp->xs_iocv);
1744 	mutex_exit(&vdp->xs_iomutex);
1745 	mutex_exit(&vdp->xs_cbmutex);
1746 
1747 	/* wait for all io to dtrain and destroy io taskq */
1748 	ddi_taskq_destroy(vdp->xs_iotaskq);
1749 
1750 	/* tear down block-configure watch */
1751 	mutex_enter(&vdp->xs_cbmutex);
1752 	xvdi_remove_xb_watch_handlers(dip);
1753 	mutex_exit(&vdp->xs_cbmutex);
1754 
1755 errout2:
1756 	/* remove kstats */
1757 	kstat_delete(vdp->xs_kstats);
1758 
1759 errout1:
1760 	/* free up driver state */
1761 	ddi_set_driver_private(dip, NULL);
1762 	cv_destroy(&vdp->xs_iocv);
1763 	cv_destroy(&vdp->xs_ionumcv);
1764 	mutex_destroy(&vdp->xs_cbmutex);
1765 	mutex_destroy(&vdp->xs_iomutex);
1766 	ddi_soft_state_free(xdb_statep, instance);
1767 
1768 	return (DDI_FAILURE);
1769 }
1770 
1771 /*ARGSUSED*/
1772 static int
1773 xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1774 {
1775 	int instance = ddi_get_instance(dip);
1776 	xdb_t *vdp = XDB_INST2SOFTS(instance);
1777 
1778 	switch (cmd) {
1779 	case DDI_SUSPEND:
1780 		return (DDI_FAILURE);
1781 	case DDI_DETACH:
1782 		break;
1783 	default:
1784 		return (DDI_FAILURE);
1785 	}
1786 
1787 	/* DDI_DETACH handling */
1788 
1789 	/* refuse to detach if we're still in use by the frontend */
1790 	mutex_enter(&vdp->xs_iomutex);
1791 	if (vdp->xs_if_connected) {
1792 		mutex_exit(&vdp->xs_iomutex);
1793 		return (DDI_FAILURE);
1794 	}
1795 	vdp->xs_send_buf = B_FALSE;
1796 	cv_broadcast(&vdp->xs_iocv);
1797 	mutex_exit(&vdp->xs_iomutex);
1798 
1799 	xvdi_remove_event_handler(dip, NULL);
1800 	(void) xvdi_post_event(dip, XEN_HP_REMOVE);
1801 
1802 	ddi_taskq_destroy(vdp->xs_iotaskq);
1803 
1804 	mutex_enter(&vdp->xs_cbmutex);
1805 	xvdi_remove_xb_watch_handlers(dip);
1806 	mutex_exit(&vdp->xs_cbmutex);
1807 
1808 	cv_destroy(&vdp->xs_iocv);
1809 	cv_destroy(&vdp->xs_ionumcv);
1810 	mutex_destroy(&vdp->xs_cbmutex);
1811 	mutex_destroy(&vdp->xs_iomutex);
1812 	kstat_delete(vdp->xs_kstats);
1813 	ddi_set_driver_private(dip, NULL);
1814 	ddi_soft_state_free(xdb_statep, instance);
1815 
1816 	XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: detached!",
1817 	    ddi_get_name_addr(dip)));
1818 	return (DDI_SUCCESS);
1819 }
1820 
1821 static struct dev_ops xdb_dev_ops = {
1822 	DEVO_REV,	/* devo_rev */
1823 	0,		/* devo_refcnt */
1824 	ddi_getinfo_1to1, /* devo_getinfo */
1825 	nulldev,	/* devo_identify */
1826 	nulldev,	/* devo_probe */
1827 	xdb_attach,	/* devo_attach */
1828 	xdb_detach,	/* devo_detach */
1829 	nodev,		/* devo_reset */
1830 	NULL,		/* devo_cb_ops */
1831 	NULL,		/* devo_bus_ops */
1832 	NULL,		/* power */
1833 	ddi_quiesce_not_needed, /* quiesce */
1834 };
1835 
1836 /*
1837  * Module linkage information for the kernel.
1838  */
1839 static struct modldrv modldrv = {
1840 	&mod_driverops,			/* Type of module. */
1841 	"vbd backend driver",		/* Name of the module */
1842 	&xdb_dev_ops			/* driver ops */
1843 };
1844 
1845 static struct modlinkage xdb_modlinkage = {
1846 	MODREV_1,
1847 	&modldrv,
1848 	NULL
1849 };
1850 
1851 int
1852 _init(void)
1853 {
1854 	int rv;
1855 
1856 	if ((rv = ddi_soft_state_init((void **)&xdb_statep,
1857 	    sizeof (xdb_t), 0)) == 0)
1858 		if ((rv = mod_install(&xdb_modlinkage)) != 0)
1859 			ddi_soft_state_fini((void **)&xdb_statep);
1860 	return (rv);
1861 }
1862 
1863 int
1864 _fini(void)
1865 {
1866 	int rv;
1867 
1868 	if ((rv = mod_remove(&xdb_modlinkage)) != 0)
1869 		return (rv);
1870 	ddi_soft_state_fini((void **)&xdb_statep);
1871 	return (rv);
1872 }
1873 
1874 int
1875 _info(struct modinfo *modinfop)
1876 {
1877 	return (mod_info(&xdb_modlinkage, modinfop));
1878 }
1879 
1880 static int
1881 xdb_get_request(xdb_t *vdp, blkif_request_t *req)
1882 {
1883 	void *src = xvdi_ring_get_request(vdp->xs_ring);
1884 
1885 	if (src == NULL)
1886 		return (0);
1887 
1888 	switch (vdp->xs_blk_protocol) {
1889 	case BLKIF_PROTOCOL_NATIVE:
1890 		(void) memcpy(req, src, sizeof (*req));
1891 		break;
1892 	case BLKIF_PROTOCOL_X86_32:
1893 		blkif_get_x86_32_req(req, src);
1894 		break;
1895 	case BLKIF_PROTOCOL_X86_64:
1896 		blkif_get_x86_64_req(req, src);
1897 		break;
1898 	default:
1899 		cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d",
1900 		    ddi_get_name_addr(vdp->xs_dip),
1901 		    vdp->xs_blk_protocol);
1902 	}
1903 	return (1);
1904 }
1905 
1906 static int
1907 xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status)
1908 {
1909 	ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
1910 	blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring);
1911 	blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp;
1912 	blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp;
1913 
1914 	ASSERT(rsp);
1915 
1916 	switch (vdp->xs_blk_protocol) {
1917 	case BLKIF_PROTOCOL_NATIVE:
1918 		ddi_put64(acchdl, &rsp->id, id);
1919 		ddi_put8(acchdl, &rsp->operation, op);
1920 		ddi_put16(acchdl, (uint16_t *)&rsp->status,
1921 		    status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
1922 		break;
1923 	case BLKIF_PROTOCOL_X86_32:
1924 		ddi_put64(acchdl, &rsp_32->id, id);
1925 		ddi_put8(acchdl, &rsp_32->operation, op);
1926 		ddi_put16(acchdl, (uint16_t *)&rsp_32->status,
1927 		    status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
1928 		break;
1929 	case BLKIF_PROTOCOL_X86_64:
1930 		ddi_put64(acchdl, &rsp_64->id, id);
1931 		ddi_put8(acchdl, &rsp_64->operation, op);
1932 		ddi_put16(acchdl, (uint16_t *)&rsp_64->status,
1933 		    status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
1934 		break;
1935 	default:
1936 		cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d",
1937 		    ddi_get_name_addr(vdp->xs_dip),
1938 		    vdp->xs_blk_protocol);
1939 	}
1940 
1941 	return (xvdi_ring_push_response(vdp->xs_ring));
1942 }
1943 
1944 static void
1945 blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src)
1946 {
1947 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
1948 	dst->operation = src->operation;
1949 	dst->nr_segments = src->nr_segments;
1950 	dst->handle = src->handle;
1951 	dst->id = src->id;
1952 	dst->sector_number = src->sector_number;
1953 	if (n > src->nr_segments)
1954 		n = src->nr_segments;
1955 	for (i = 0; i < n; i++)
1956 		dst->seg[i] = src->seg[i];
1957 }
1958 
1959 static void
1960 blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src)
1961 {
1962 	int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
1963 	dst->operation = src->operation;
1964 	dst->nr_segments = src->nr_segments;
1965 	dst->handle = src->handle;
1966 	dst->id = src->id;
1967 	dst->sector_number = src->sector_number;
1968 	if (n > src->nr_segments)
1969 		n = src->nr_segments;
1970 	for (i = 0; i < n; i++)
1971 		dst->seg[i] = src->seg[i];
1972 }
1973