xref: /titanic_51/usr/src/uts/i86pc/i86hvm/io/xdf_shell.c (revision a31148363f598def767ac48c5d82e1572e44b935)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <io/xdf_shell.h>
27 #include <sys/dkio.h>
28 #include <sys/scsi/scsi_types.h>
29 
30 /*
31  * General Notes
32  *
33  * We don't support disks with bad block mappins.  We have this
34  * limitation because the underlying xdf driver doesn't support
35  * bad block remapping.  If there is a need to support this feature
36  * it should be added directly to the xdf driver and we should just
37  * pass requests strait on through and let it handle the remapping.
38  * Also, it's probably worth pointing out that most modern disks do bad
39  * block remapping internally in the hardware so there's actually less
40  * of a chance of us ever discovering bad blocks.  Also, in most cases
41  * this driver (and the xdf driver) will only be used with virtualized
42  * devices, so one might wonder why a virtual device would ever actually
43  * experience bad blocks.  To wrap this up, you might be wondering how
44  * these bad block mappings get created and how they are managed.  Well,
45  * there are two tools for managing bad block mappings, format(1M) and
46  * addbadsec(1M).  Format(1M) can be used to do a surface scan of a disk
47  * to attempt to find bad block and create mappings for them.  Format(1M)
48  * and addbadsec(1M) can also be used to edit existing mappings that may
49  * be saved on the disk.
50  *
51  * The underlying PV driver that this driver passes on requests to is the
52  * xdf driver.  Since in most cases the xdf driver doesn't deal with
53  * physical disks it has it's own algorithm for assigning a physical
54  * geometry to a virtual disk (ie, cylinder count, head count, etc.)
55  * The default values chosen by the xdf driver may not match those
56  * assigned to a disk by a hardware disk emulator in an HVM environment.
57  * This is a problem since these physical geometry attributes affect
58  * things like the partition table, backup label location, etc.  So
59  * to emulate disk devices correctly we need to know the physical geometry
60  * that was assigned to a disk at the time of it's initalization.
61  * Normally in an HVM environment this information will passed to
62  * the BIOS and operating system from the hardware emulator that is
63  * emulating the disk devices.  In the case of a solaris dom0+xvm
64  * this would be qemu.  So to work around this issue, this driver will
65  * query the emulated hardware to get the assigned physical geometry
66  * and then pass this geometry onto the xdf driver so that it can use it.
67  * But really, this information is essentially metadata about the disk
68  * that should be kept with the disk image itself.  (Assuming or course
69  * that a disk image is the actual backingstore for this emulated device.)
70  * This metadata should also be made available to PV drivers via a common
71  * mechanism, probably the xenstore.  The fact that this metadata isn't
72  * available outside of HVM domains means that it's difficult to move
73  * disks between HVM and PV domains, since a fully PV domain will have no
74  * way of knowing what the correct geometry of the target device is.
75  * (Short of reading the disk, looking for things like partition tables
76  * and labels, and taking a best guess at what the geometry was when
77  * the disk was initialized.  Unsuprisingly, qemu actually does this.)
78  *
79  * This driver has to map xdf shell device instances into their corresponding
80  * xdf device instances.  We have to do this to ensure that when a user
81  * accesses a emulated xdf shell device we map those accesses to the proper
82  * paravirtualized device.  Basically what we need to know is how multiple
83  * 'disk' entries in a domU configuration file get mapped to emulated
84  * xdf shell devices and to xdf devices.  The 'disk' entry to xdf instance
85  * mappings we know because those are done within the Solaris xvdi code
86  * and the xpvd nexus driver.  But the config to emulated devices mappings
87  * are handled entirely within the xen management tool chain and the
88  * hardware emulator.  Since all the tools that establish these mappings
89  * live in dom0, dom0 should really supply us with this information,
90  * probably via the xenstore.  Unfortunatly it doesn't so, since there's
91  * no good way to determine this mapping dynamically, this driver uses
92  * a hard coded set of static mappings.  These mappings are hardware
93  * emulator specific because each different hardware emulator could have
94  * a different device tree with different xdf shell device paths.  This
95  * means that if we want to continue to use this static mapping approach
96  * to allow Solaris to run on different hardware emulators we'll have
97  * to analyze each of those emulators to determine what paths they
98  * use and hard code those paths into this driver.  yech.  This metadata
99  * really needs to be supplied to us by dom0.
100  *
101  * This driver access underlying xdf nodes.  Unfortunatly, devices
102  * must create minor nodes during attach, and for disk devices to create
103  * minor nodes, they have to look at the label on the disk, so this means
104  * that disk drivers must be able to access a disk contents during
105  * attach.  That means that this disk driver must be able to access
106  * underlying xdf nodes during attach.  Unfortunatly, due to device tree
107  * locking restrictions, we cannot have an attach operation occuring on
108  * this device and then attempt to access another device which may
109  * cause another attach to occur in a different device tree branch
110  * since this could result in deadlock.  Hence, this driver can only
111  * access xdf device nodes that we know are attached, and it can't use
112  * any ddi interfaces to access those nodes if those interfaces could
113  * trigger an attach of the xdf device.  So this driver works around
114  * these restrictions by talking directly to xdf devices via
115  * xdf_hvm_hold().  This interface takes a pathname to an xdf device,
116  * and if that device is already attached then it returns the a held dip
117  * pointer for that device node.  This prevents us from getting into
118  * deadlock situations, but now we need a mechanism to ensure that all
119  * the xdf device nodes this driver might access are attached before
120  * this driver tries to access them.  This is accomplished via the
121  * hvmboot_rootconf() callback which is invoked just before root is
122  * mounted.  hvmboot_rootconf() will attach xpvd and tell it to configure
123  * all xdf device visible to the system.  All these xdf device nodes
124  * will also be marked with the "ddi-no-autodetach" property so that
125  * once they are configured, the will not be automatically unconfigured.
126  * The only way that they could be unconfigured is if the administrator
127  * explicitly attempts to unload required modules via rem_drv(1M)
128  * or modunload(1M).
129  */
130 
131 /*
132  * 16 paritions + fdisk (see xdf.h)
133  */
134 #define	XDFS_DEV2UNIT(dev)	XDF_INST((getminor((dev))))
135 #define	XDFS_DEV2PART(dev)	XDF_PART((getminor((dev))))
136 
137 #define	OTYP_VALID(otyp)	((otyp == OTYP_BLK) ||			\
138 					(otyp == OTYP_CHR) ||		\
139 					(otyp == OTYP_LYR))
140 
141 #define	XDFS_NODES		4
142 
143 #define	XDFS_HVM_MODE(sp)	(XDFS_HVM_STATE(sp)->xdfs_hs_mode)
144 #define	XDFS_HVM_DIP(sp)	(XDFS_HVM_STATE(sp)->xdfs_hs_dip)
145 #define	XDFS_HVM_PATH(sp)	(XDFS_HVM_STATE(sp)->xdfs_hs_path)
146 #define	XDFS_HVM_STATE(sp)						\
147 		((xdfs_hvm_state_t *)(&((char *)(sp))[XDFS_HVM_STATE_OFFSET]))
148 #define	XDFS_HVM_STATE_OFFSET	(xdfs_ss_size - sizeof (xdfs_hvm_state_t))
149 #define	XDFS_HVM_SANE(sp)						\
150 		ASSERT(XDFS_HVM_MODE(sp));				\
151 		ASSERT(XDFS_HVM_DIP(sp) != NULL);			\
152 		ASSERT(XDFS_HVM_PATH(sp) != NULL);
153 
154 
155 typedef struct xdfs_hvm_state {
156 	boolean_t	xdfs_hs_mode;
157 	dev_info_t	*xdfs_hs_dip;
158 	char		*xdfs_hs_path;
159 } xdfs_hvm_state_t;
160 
161 /* local function and structure prototypes */
162 static int xdfs_iodone(struct buf *);
163 static boolean_t xdfs_isopen_part(xdfs_state_t *, int);
164 static boolean_t xdfs_isopen(xdfs_state_t *);
165 static cmlb_tg_ops_t xdfs_lb_ops;
166 
167 /*
168  * Globals
169  */
170 major_t			xdfs_major;
171 #define			xdfs_hvm_dev_ops (xdfs_c_hvm_dev_ops)
172 #define			xdfs_hvm_cb_ops (xdfs_hvm_dev_ops->devo_cb_ops)
173 
174 /*
175  * Private globals
176  */
177 volatile boolean_t	xdfs_pv_disable = B_FALSE;
178 static void		*xdfs_ssp;
179 static size_t		xdfs_ss_size;
180 
181 /*
182  * Private helper functions
183  */
184 static boolean_t
185 xdfs_tgt_hold(xdfs_state_t *xsp)
186 {
187 	mutex_enter(&xsp->xdfss_mutex);
188 	ASSERT(xsp->xdfss_tgt_holds >= 0);
189 	if (!xsp->xdfss_tgt_attached) {
190 		mutex_exit(&xsp->xdfss_mutex);
191 		return (B_FALSE);
192 	}
193 	xsp->xdfss_tgt_holds++;
194 	mutex_exit(&xsp->xdfss_mutex);
195 	return (B_TRUE);
196 }
197 
198 static void
199 xdfs_tgt_release(xdfs_state_t *xsp)
200 {
201 	mutex_enter(&xsp->xdfss_mutex);
202 	ASSERT(xsp->xdfss_tgt_attached);
203 	ASSERT(xsp->xdfss_tgt_holds > 0);
204 	if (--xsp->xdfss_tgt_holds == 0)
205 		cv_broadcast(&xsp->xdfss_cv);
206 	mutex_exit(&xsp->xdfss_mutex);
207 }
208 
209 /*ARGSUSED*/
210 static int
211 xdfs_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
212 {
213 	int		instance = ddi_get_instance(dip);
214 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
215 	int		rv;
216 
217 	if (xsp == NULL)
218 		return (ENXIO);
219 
220 	if (!xdfs_tgt_hold(xsp))
221 		return (ENXIO);
222 
223 	if (cmd == TG_GETVIRTGEOM) {
224 		cmlb_geom_t	pgeom, *vgeomp;
225 		diskaddr_t	capacity;
226 
227 		/*
228 		 * The native xdf driver doesn't support this ioctl.
229 		 * Intead of passing it on, emulate it here so that the
230 		 * results look the same as what we get for a real xdf
231 		 * shell device.
232 		 *
233 		 * Get the real size of the device
234 		 */
235 		if ((rv = xdf_lb_getinfo(xsp->xdfss_tgt_dip,
236 		    TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0)
237 			goto out;
238 		capacity = pgeom.g_capacity;
239 
240 		/*
241 		 * If the controller returned us something that doesn't
242 		 * really fit into an Int 13/function 8 geometry
243 		 * result, just fail the ioctl.  See PSARC 1998/313.
244 		 */
245 		if (capacity >= (63 * 254 * 1024)) {
246 			rv = EINVAL;
247 			goto out;
248 		}
249 
250 		vgeomp = (cmlb_geom_t *)arg;
251 		vgeomp->g_capacity	= capacity;
252 		vgeomp->g_nsect		= 63;
253 		vgeomp->g_nhead		= 254;
254 		vgeomp->g_ncyl		= capacity / (63 * 254);
255 		vgeomp->g_acyl		= 0;
256 		vgeomp->g_secsize	= 512;
257 		vgeomp->g_intrlv	= 1;
258 		vgeomp->g_rpm		= 3600;
259 		rv = 0;
260 		goto out;
261 	}
262 
263 	rv = xdf_lb_getinfo(xsp->xdfss_tgt_dip, cmd, arg, tg_cookie);
264 
265 out:
266 	xdfs_tgt_release(xsp);
267 	return (rv);
268 }
269 
270 static boolean_t
271 xdfs_isopen_part(xdfs_state_t *xsp, int part)
272 {
273 	int otyp;
274 
275 	ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
276 	for (otyp = 0; (otyp < OTYPCNT); otyp++) {
277 		if (xsp->xdfss_otyp_count[otyp][part] != 0) {
278 			ASSERT(xsp->xdfss_tgt_attached);
279 			ASSERT(xsp->xdfss_tgt_holds >= 0);
280 			return (B_TRUE);
281 		}
282 	}
283 	return (B_FALSE);
284 }
285 
286 static boolean_t
287 xdfs_isopen(xdfs_state_t *xsp)
288 {
289 	int part;
290 
291 	ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
292 	for (part = 0; part < XDF_PEXT; part++) {
293 		if (xdfs_isopen_part(xsp, part))
294 			return (B_TRUE);
295 	}
296 	return (B_FALSE);
297 }
298 
299 static int
300 xdfs_iodone(struct buf *bp)
301 {
302 	struct buf	*bp_orig = bp->b_chain;
303 
304 	/* Propegate back the io results */
305 	bp_orig->b_resid = bp->b_resid;
306 	bioerror(bp_orig, geterror(bp));
307 	biodone(bp_orig);
308 
309 	freerbuf(bp);
310 	return (0);
311 }
312 
313 static int
314 xdfs_cmlb_attach(xdfs_state_t *xsp)
315 {
316 	return (cmlb_attach(xsp->xdfss_dip, &xdfs_lb_ops,
317 	    xsp->xdfss_tgt_is_cd ? DTYPE_RODIRECT : DTYPE_DIRECT,
318 	    xdf_is_rm(xsp->xdfss_tgt_dip),
319 	    B_TRUE,
320 	    xdfs_c_cmlb_node_type(xsp),
321 	    xdfs_c_cmlb_alter_behavior(xsp),
322 	    xsp->xdfss_cmlbhandle, 0));
323 }
324 
325 static boolean_t
326 xdfs_tgt_probe(xdfs_state_t *xsp, dev_info_t *tgt_dip)
327 {
328 	cmlb_geom_t		pgeom;
329 	int			tgt_instance = ddi_get_instance(tgt_dip);
330 
331 	ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
332 	ASSERT(!xdfs_isopen(xsp));
333 	ASSERT(!xsp->xdfss_tgt_attached);
334 
335 	xsp->xdfss_tgt_dip = tgt_dip;
336 	xsp->xdfss_tgt_holds = 0;
337 	xsp->xdfss_tgt_dev = makedevice(ddi_driver_major(tgt_dip),
338 	    XDF_MINOR(tgt_instance, 0));
339 	ASSERT((xsp->xdfss_tgt_dev & XDF_PMASK) == 0);
340 	xsp->xdfss_tgt_is_cd = xdf_is_cd(tgt_dip);
341 
342 	/*
343 	 * GROSS HACK ALERT!  GROSS HACK ALERT!
344 	 *
345 	 * Before we can initialize the cmlb layer, we have to tell the
346 	 * underlying xdf device what it's physical geometry should be.
347 	 * See the block comments at the top of this file for more info.
348 	 */
349 	if (!xsp->xdfss_tgt_is_cd &&
350 	    ((xdfs_c_getpgeom(xsp->xdfss_dip, &pgeom) != 0) ||
351 	    (xdf_hvm_setpgeom(xsp->xdfss_tgt_dip, &pgeom) != 0)))
352 		return (B_FALSE);
353 
354 	/*
355 	 * Force the xdf front end driver to connect to the backend.  From
356 	 * the solaris device tree perspective, the xdf driver devinfo node
357 	 * is already in the ATTACHED state.  (Otherwise xdf_hvm_hold()
358 	 * would not have returned a dip.)  But this doesn't mean that the
359 	 * xdf device has actually established a connection to it's back
360 	 * end driver.  For us to be able to access the xdf device it needs
361 	 * to be connected.
362 	 */
363 	if (!xdf_hvm_connect(xsp->xdfss_tgt_dip)) {
364 		cmn_err(CE_WARN, "pv driver failed to connect: %s",
365 		    xsp->xdfss_pv);
366 		return (B_FALSE);
367 	}
368 
369 	if (xsp->xdfss_tgt_is_cd && !xdf_media_req_supported(tgt_dip)) {
370 		/*
371 		 * Unfortunatly, the dom0 backend driver doesn't support
372 		 * important media request operations like eject, so fail
373 		 * the probe (this should cause us to fall back to emulated
374 		 * hvm device access, which does support things like eject).
375 		 */
376 		return (B_FALSE);
377 	}
378 
379 	/* create kstat for iostat(1M) */
380 	if (xdf_kstat_create(xsp->xdfss_tgt_dip, (char *)xdfs_c_name,
381 	    tgt_instance) != 0)
382 		return (B_FALSE);
383 
384 	/*
385 	 * Now we need to mark ourselves as attached and drop xdfss_mutex.
386 	 * We do this because the final steps in the attach process will
387 	 * need to access the underlying disk to read the label and
388 	 * possibly the devid.
389 	 */
390 	xsp->xdfss_tgt_attached = B_TRUE;
391 	mutex_exit(&xsp->xdfss_mutex);
392 
393 	if (!xsp->xdfss_tgt_is_cd && xdfs_c_bb_check(xsp)) {
394 		cmn_err(CE_WARN, "pv disks with bad blocks are unsupported: %s",
395 		    xsp->xdfss_hvm);
396 		mutex_enter(&xsp->xdfss_mutex);
397 		xdf_kstat_delete(xsp->xdfss_tgt_dip);
398 		xsp->xdfss_tgt_attached = B_FALSE;
399 		return (B_FALSE);
400 	}
401 
402 	/*
403 	 * Initalize cmlb.  Note that for partition information cmlb
404 	 * will access the underly xdf disk device directly via
405 	 * xdfs_lb_rdwr() and xdfs_lb_getinfo().  There are no
406 	 * layered driver handles associated with this access because
407 	 * it is a direct disk access that doesn't go through
408 	 * any of the device nodes exported by the xdf device (since
409 	 * all exported device nodes only reflect the portion of
410 	 * the device visible via the partition/slice that the node
411 	 * is associated with.)  So while not observable via the LDI,
412 	 * this direct disk access is ok since we're actually holding
413 	 * the target device.
414 	 */
415 	if (xdfs_cmlb_attach(xsp) != 0) {
416 		mutex_enter(&xsp->xdfss_mutex);
417 		xdf_kstat_delete(xsp->xdfss_tgt_dip);
418 		xsp->xdfss_tgt_attached = B_FALSE;
419 		return (B_FALSE);
420 	}
421 
422 	/* setup devid string */
423 	xsp->xdfss_tgt_devid = NULL;
424 	if (!xsp->xdfss_tgt_is_cd)
425 		xdfs_c_devid_setup(xsp);
426 
427 	(void) cmlb_validate(xsp->xdfss_cmlbhandle, 0, 0);
428 
429 	/* Have the system report any newly created device nodes */
430 	ddi_report_dev(xsp->xdfss_dip);
431 
432 	mutex_enter(&xsp->xdfss_mutex);
433 	return (B_TRUE);
434 }
435 
436 static boolean_t
437 xdfs_tgt_detach(xdfs_state_t *xsp)
438 {
439 	ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
440 	ASSERT(xsp->xdfss_tgt_attached);
441 	ASSERT(xsp->xdfss_tgt_holds >= 0);
442 
443 	if ((xdfs_isopen(xsp)) || (xsp->xdfss_tgt_holds != 0))
444 		return (B_FALSE);
445 
446 	ddi_devid_unregister(xsp->xdfss_dip);
447 	if (xsp->xdfss_tgt_devid != NULL)
448 		ddi_devid_free(xsp->xdfss_tgt_devid);
449 
450 	xdf_kstat_delete(xsp->xdfss_tgt_dip);
451 	xsp->xdfss_tgt_attached = B_FALSE;
452 	return (B_TRUE);
453 }
454 
455 /*
456  * Xdf_shell interfaces that may be called from outside this file.
457  */
458 void
459 xdfs_minphys(struct buf *bp)
460 {
461 	xdfmin(bp);
462 }
463 
464 /*
465  * Cmlb ops vector, allows the cmlb module to directly access the entire
466  * xdf disk device without going through any partitioning layers.
467  */
468 int
469 xdfs_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr,
470     diskaddr_t start, size_t count, void *tg_cookie)
471 {
472 	int		instance = ddi_get_instance(dip);
473 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
474 	int		rv;
475 
476 	if (xsp == NULL)
477 		return (ENXIO);
478 
479 	if (!xdfs_tgt_hold(xsp))
480 		return (ENXIO);
481 
482 	rv = xdf_lb_rdwr(xsp->xdfss_tgt_dip,
483 	    cmd, bufaddr, start, count, tg_cookie);
484 
485 	xdfs_tgt_release(xsp);
486 	return (rv);
487 }
488 
489 /*
490  * Driver PV and HVM cb_ops entry points
491  */
492 /*ARGSUSED*/
493 static int
494 xdfs_open(dev_t *dev_p, int flag, int otyp, cred_t *credp)
495 {
496 	ldi_ident_t	li;
497 	dev_t		dev = *dev_p;
498 	int		instance = XDFS_DEV2UNIT(dev);
499 	int		part = XDFS_DEV2PART(dev);
500 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
501 	dev_t		tgt_devt = xsp->xdfss_tgt_dev | part;
502 	int		err = 0;
503 
504 	if ((otyp < 0) || (otyp >= OTYPCNT))
505 		return (EINVAL);
506 
507 	if (XDFS_HVM_MODE(xsp)) {
508 		if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
509 			return (ENOTSUP);
510 		return (xdfs_hvm_cb_ops->cb_open(dev_p, flag, otyp, credp));
511 	}
512 
513 	/* allocate an ldi handle */
514 	VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0);
515 
516 	mutex_enter(&xsp->xdfss_mutex);
517 
518 	/*
519 	 * We translate all device opens (chr, blk, and lyr) into
520 	 * block device opens.  Why?  Because for all the opens that
521 	 * come through this driver, we only keep around one LDI handle.
522 	 * So that handle can only be of one open type.  The reason
523 	 * that we choose the block interface for this is that to use
524 	 * the block interfaces for a device the system needs to allocate
525 	 * buf_ts, which are associated with system memory which can act
526 	 * as a cache for device data.  So normally when a block device
527 	 * is closed the system will ensure that all these pages get
528 	 * flushed out of memory.  But if we were to open the device
529 	 * as a character device, then when we went to close the underlying
530 	 * device (even if we had invoked the block interfaces) any data
531 	 * remaining in memory wouldn't necessairly be flushed out
532 	 * before the device was closed.
533 	 */
534 	if (xsp->xdfss_tgt_lh[part] == NULL) {
535 		ASSERT(!xdfs_isopen_part(xsp, part));
536 
537 		err = ldi_open_by_dev(&tgt_devt, OTYP_BLK, flag, credp,
538 		    &xsp->xdfss_tgt_lh[part], li);
539 
540 		if (err != 0) {
541 			mutex_exit(&xsp->xdfss_mutex);
542 			ldi_ident_release(li);
543 			return (err);
544 		}
545 
546 		/* Disk devices really shouldn't clone */
547 		ASSERT(tgt_devt == (xsp->xdfss_tgt_dev | part));
548 	} else {
549 		ldi_handle_t lh_tmp;
550 
551 		ASSERT(xdfs_isopen_part(xsp, part));
552 
553 		/* do ldi open/close to get flags and cred check */
554 		err = ldi_open_by_dev(&tgt_devt, OTYP_BLK, flag, credp,
555 		    &lh_tmp, li);
556 		if (err != 0) {
557 			mutex_exit(&xsp->xdfss_mutex);
558 			ldi_ident_release(li);
559 			return (err);
560 		}
561 
562 		/* Disk devices really shouldn't clone */
563 		ASSERT(tgt_devt == (xsp->xdfss_tgt_dev | part));
564 		(void) ldi_close(lh_tmp, flag, credp);
565 	}
566 	ldi_ident_release(li);
567 
568 	xsp->xdfss_otyp_count[otyp][part]++;
569 
570 	mutex_exit(&xsp->xdfss_mutex);
571 	return (0);
572 }
573 
574 /*ARGSUSED*/
575 static int
576 xdfs_close(dev_t dev, int flag, int otyp, cred_t *credp)
577 {
578 	int		instance = XDFS_DEV2UNIT(dev);
579 	int		part = XDFS_DEV2PART(dev);
580 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
581 	int		err = 0;
582 
583 	ASSERT((otyp >= 0) && otyp < OTYPCNT);
584 
585 	/* Sanity check the dev_t associated with this request. */
586 	ASSERT(getmajor(dev) == xdfs_major);
587 	if (getmajor(dev) != xdfs_major)
588 		return (ENXIO);
589 
590 	if (XDFS_HVM_MODE(xsp)) {
591 		if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
592 			return (ENOTSUP);
593 		return (xdfs_hvm_cb_ops->cb_close(dev, flag, otyp, credp));
594 	}
595 
596 	/*
597 	 * Sanity check that that the device is actually open.  On debug
598 	 * kernels we'll panic and on non-debug kernels we'll return failure.
599 	 */
600 	mutex_enter(&xsp->xdfss_mutex);
601 	ASSERT(xdfs_isopen_part(xsp, part));
602 	if (!xdfs_isopen_part(xsp, part)) {
603 		mutex_exit(&xsp->xdfss_mutex);
604 		return (ENXIO);
605 	}
606 
607 	ASSERT(xsp->xdfss_tgt_lh[part] != NULL);
608 	ASSERT(xsp->xdfss_otyp_count[otyp][part] > 0);
609 	if (otyp == OTYP_LYR) {
610 		xsp->xdfss_otyp_count[otyp][part]--;
611 	} else {
612 		xsp->xdfss_otyp_count[otyp][part] = 0;
613 	}
614 
615 	if (!xdfs_isopen_part(xsp, part)) {
616 		err = ldi_close(xsp->xdfss_tgt_lh[part], flag, credp);
617 		xsp->xdfss_tgt_lh[part] = NULL;
618 	}
619 
620 	mutex_exit(&xsp->xdfss_mutex);
621 
622 	return (err);
623 }
624 
625 int
626 xdfs_strategy(struct buf *bp)
627 {
628 	dev_t		dev = bp->b_edev;
629 	int		instance = XDFS_DEV2UNIT(dev);
630 	int		part = XDFS_DEV2PART(dev);
631 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
632 	dev_t		tgt_devt;
633 	struct buf	*bp_clone;
634 
635 	/* Sanity check the dev_t associated with this request. */
636 	ASSERT(getmajor(dev) == xdfs_major);
637 	if (getmajor(dev) != xdfs_major)
638 		goto err;
639 
640 	if (XDFS_HVM_MODE(xsp)) {
641 		if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
642 			return (ENOTSUP);
643 		return (xdfs_hvm_cb_ops->cb_strategy(bp));
644 	}
645 
646 	/*
647 	 * Sanity checks that the dev_t associated with the buf we were
648 	 * passed corresponds to an open partition.  On debug kernels we'll
649 	 * panic and on non-debug kernels we'll return failure.
650 	 */
651 	mutex_enter(&xsp->xdfss_mutex);
652 	ASSERT(xdfs_isopen_part(xsp, part));
653 	if (!xdfs_isopen_part(xsp, part)) {
654 		mutex_exit(&xsp->xdfss_mutex);
655 		goto err;
656 	}
657 	mutex_exit(&xsp->xdfss_mutex);
658 
659 	/* clone this buffer */
660 	tgt_devt = xsp->xdfss_tgt_dev | part;
661 	bp_clone = bioclone(bp, 0, bp->b_bcount, tgt_devt, bp->b_blkno,
662 	    xdfs_iodone, NULL, KM_SLEEP);
663 	bp_clone->b_chain = bp;
664 
665 	/*
666 	 * If we're being invoked on behalf of the physio() call in
667 	 * xdfs_dioctl_rwcmd() then b_private will be set to
668 	 * XB_SLICE_NONE and we need to propegate this flag into the
669 	 * cloned buffer so that the xdf driver will see it.
670 	 */
671 	if (bp->b_private == (void *)XB_SLICE_NONE)
672 		bp_clone->b_private = (void *)XB_SLICE_NONE;
673 
674 	/*
675 	 * Pass on the cloned buffer.  Note that we don't bother to check
676 	 * for failure because the xdf strategy routine will have to
677 	 * invoke biodone() if it wants to return an error, which means
678 	 * that the xdfs_iodone() callback will get invoked and it
679 	 * will propegate the error back up the stack and free the cloned
680 	 * buffer.
681 	 */
682 	ASSERT(xsp->xdfss_tgt_lh[part] != NULL);
683 	return (ldi_strategy(xsp->xdfss_tgt_lh[part], bp_clone));
684 
685 err:
686 	bioerror(bp, ENXIO);
687 	bp->b_resid = bp->b_bcount;
688 	biodone(bp);
689 	return (0);
690 }
691 
692 static int
693 xdfs_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
694 {
695 	int		instance = XDFS_DEV2UNIT(dev);
696 	int		part = XDFS_DEV2PART(dev);
697 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
698 
699 	if (!XDFS_HVM_MODE(xsp))
700 		return (ldi_dump(xsp->xdfss_tgt_lh[part], addr, blkno, nblk));
701 
702 	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
703 		return (ENOTSUP);
704 	return (xdfs_hvm_cb_ops->cb_dump(dev, addr, blkno, nblk));
705 }
706 
707 /*ARGSUSED*/
708 static int
709 xdfs_read(dev_t dev, struct uio *uio, cred_t *credp)
710 {
711 	int		instance = XDFS_DEV2UNIT(dev);
712 	int		part = XDFS_DEV2PART(dev);
713 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
714 
715 	if (!XDFS_HVM_MODE(xsp))
716 		return (ldi_read(xsp->xdfss_tgt_lh[part], uio, credp));
717 
718 	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
719 		return (ENOTSUP);
720 	return (xdfs_hvm_cb_ops->cb_read(dev, uio, credp));
721 }
722 
723 /*ARGSUSED*/
724 static int
725 xdfs_write(dev_t dev, struct uio *uio, cred_t *credp)
726 {
727 	int		instance = XDFS_DEV2UNIT(dev);
728 	int		part = XDFS_DEV2PART(dev);
729 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
730 
731 	if (!XDFS_HVM_MODE(xsp))
732 		return (ldi_write(xsp->xdfss_tgt_lh[part], uio, credp));
733 
734 	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
735 		return (ENOTSUP);
736 	return (xdfs_hvm_cb_ops->cb_write(dev, uio, credp));
737 }
738 
739 /*ARGSUSED*/
740 static int
741 xdfs_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
742 {
743 	int		instance = XDFS_DEV2UNIT(dev);
744 	int		part = XDFS_DEV2PART(dev);
745 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
746 
747 	if (!XDFS_HVM_MODE(xsp))
748 		return (ldi_aread(xsp->xdfss_tgt_lh[part], aio, credp));
749 
750 	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
751 	    (xdfs_hvm_cb_ops->cb_strategy == NULL) ||
752 	    (xdfs_hvm_cb_ops->cb_strategy == nodev) ||
753 	    (xdfs_hvm_cb_ops->cb_aread == NULL))
754 		return (ENOTSUP);
755 	return (xdfs_hvm_cb_ops->cb_aread(dev, aio, credp));
756 }
757 
758 /*ARGSUSED*/
759 static int
760 xdfs_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
761 {
762 	int		instance = XDFS_DEV2UNIT(dev);
763 	int		part = XDFS_DEV2PART(dev);
764 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
765 
766 	if (!XDFS_HVM_MODE(xsp))
767 		return (ldi_awrite(xsp->xdfss_tgt_lh[part], aio, credp));
768 
769 	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
770 	    (xdfs_hvm_cb_ops->cb_strategy == NULL) ||
771 	    (xdfs_hvm_cb_ops->cb_strategy == nodev) ||
772 	    (xdfs_hvm_cb_ops->cb_awrite == NULL))
773 		return (ENOTSUP);
774 	return (xdfs_hvm_cb_ops->cb_awrite(dev, aio, credp));
775 }
776 
777 static int
778 xdfs_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
779     int *rvalp)
780 {
781 	int		instance = XDFS_DEV2UNIT(dev);
782 	int		part = XDFS_DEV2PART(dev);
783 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
784 	int		rv;
785 	boolean_t	done;
786 
787 	if (XDFS_HVM_MODE(xsp)) {
788 		if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
789 			return (ENOTSUP);
790 		return (xdfs_hvm_cb_ops->cb_ioctl(
791 		    dev, cmd, arg, flag, credp, rvalp));
792 	}
793 
794 	rv = xdfs_c_ioctl(xsp, dev, part, cmd, arg, flag, credp, rvalp, &done);
795 	if (done)
796 		return (rv);
797 	rv = ldi_ioctl(xsp->xdfss_tgt_lh[part], cmd, arg, flag, credp, rvalp);
798 	if (rv == 0) {
799 		/* Force Geometry Validation */
800 		(void) cmlb_invalidate(xsp->xdfss_cmlbhandle, 0);
801 		(void) cmlb_validate(xsp->xdfss_cmlbhandle, 0, 0);
802 	}
803 	return (rv);
804 }
805 
806 static int
807 xdfs_hvm_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
808     int flags, char *name, caddr_t valuep, int *lengthp)
809 {
810 	int		instance = ddi_get_instance(dip);
811 	void		*xsp = ddi_get_soft_state(xdfs_ssp, instance);
812 
813 	ASSERT(XDFS_HVM_MODE(xsp));
814 
815 	if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
816 	    (xdfs_hvm_cb_ops->cb_prop_op == NULL) ||
817 	    (xdfs_hvm_cb_ops->cb_prop_op == nodev) ||
818 	    (xdfs_hvm_cb_ops->cb_prop_op == nulldev))
819 		return (DDI_PROP_NOT_FOUND);
820 
821 	return (xdfs_hvm_cb_ops->cb_prop_op(dev, dip, prop_op,
822 	    flags, name, valuep, lengthp));
823 }
824 
825 static int
826 xdfs_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
827     int flags, char *name, caddr_t valuep, int *lengthp)
828 {
829 	int		instance = ddi_get_instance(dip);
830 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
831 	int		rv;
832 	dev_info_t	*tgt_dip;
833 	dev_t		tgt_devt;
834 
835 	/*
836 	 * Sanity check that if a dev_t or dip were specified that they
837 	 * correspond to this device driver.  On debug kernels we'll
838 	 * panic and on non-debug kernels we'll return failure.
839 	 */
840 	ASSERT(ddi_driver_major(dip) == xdfs_major);
841 	ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdfs_major));
842 	if ((ddi_driver_major(dip) != xdfs_major) ||
843 	    ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdfs_major)))
844 		return (DDI_PROP_NOT_FOUND);
845 
846 	/*
847 	 * This property lookup might be associated with a device node
848 	 * that is not yet attached, if so pass it onto ddi_prop_op().
849 	 */
850 	if (xsp == NULL)
851 		return (ddi_prop_op(dev, dip, prop_op, flags,
852 		    name, valuep, lengthp));
853 
854 	/* If we're accessing the device in hvm mode, pass this request on */
855 	if (XDFS_HVM_MODE(xsp))
856 		return (xdfs_hvm_prop_op(dev, dip, prop_op,
857 		    flags, name, valuep, lengthp));
858 
859 	/*
860 	 * Make sure we only lookup static properties.
861 	 *
862 	 * If there are static properties of the underlying xdf driver
863 	 * that we want to mirror, then we'll have to explicity look them
864 	 * up and define them during attach.  There are a few reasons
865 	 * for this.  Most importantly, most static properties are typed
866 	 * and all dynamic properties are untyped, ie, for dynamic
867 	 * properties the caller must know the type of the property and
868 	 * how to interpret the value of the property.  the prop_op drivedr
869 	 * entry point is only designed for returning dynamic/untyped
870 	 * properties, so if we were to attempt to lookup and pass back
871 	 * static properties of the underlying device here then we would
872 	 * be losing the type information for those properties.  Another
873 	 * reason we don't want to pass on static property requests is that
874 	 * static properties are enumerable in the device tree, where as
875 	 * dynamic ones are not.
876 	 */
877 	flags |= DDI_PROP_DYNAMIC;
878 
879 	/*
880 	 * We can't use the ldi here to access the underlying device because
881 	 * the ldi actually opens the device, and that open might fail if the
882 	 * device has already been opened with the FEXCL flag.  If we used
883 	 * the ldi here, it would also be possible for some other caller to
884 	 * try open the device with the FEXCL flag and get a failure back
885 	 * because we have it open to do a property query.  Instad we'll
886 	 * grab a hold on the target dip.
887 	 */
888 	if (!xdfs_tgt_hold(xsp))
889 		return (DDI_PROP_NOT_FOUND);
890 
891 	/* figure out dip the dev_t we're going to pass on down */
892 	tgt_dip = xsp->xdfss_tgt_dip;
893 	if (dev == DDI_DEV_T_ANY) {
894 		tgt_devt = DDI_DEV_T_ANY;
895 	} else {
896 		tgt_devt = xsp->xdfss_tgt_dev | XDFS_DEV2PART(dev);
897 	}
898 
899 	/*
900 	 * Cdev_prop_op() is not a public interface, and normally the caller
901 	 * is required to make sure that the target driver actually implements
902 	 * this interface before trying to invoke it.  In this case we know
903 	 * that we're always accessing the xdf driver and it does have this
904 	 * interface defined, so we can skip the check.
905 	 */
906 	rv = cdev_prop_op(tgt_devt, tgt_dip,
907 	    prop_op, flags, name, valuep, lengthp);
908 
909 	xdfs_tgt_release(xsp);
910 	return (rv);
911 }
912 
913 /*
914  * Driver PV and HVM dev_ops entry points
915  */
916 /*ARGSUSED*/
917 static int
918 xdfs_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
919     void **result)
920 {
921 	dev_t		dev = (dev_t)arg;
922 	int		instance = XDFS_DEV2UNIT(dev);
923 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
924 
925 	switch (infocmd) {
926 		case DDI_INFO_DEVT2DEVINFO:
927 			if (xsp == NULL)
928 				return (DDI_FAILURE);
929 			if (XDFS_HVM_MODE(xsp))
930 				*result = XDFS_HVM_DIP(xsp);
931 			else
932 				*result = (void *)xsp->xdfss_dip;
933 			break;
934 		case DDI_INFO_DEVT2INSTANCE:
935 			*result = (void *)(intptr_t)instance;
936 			break;
937 		default:
938 			return (DDI_FAILURE);
939 	}
940 	return (DDI_SUCCESS);
941 }
942 
943 static int
944 xdfs_hvm_probe(dev_info_t *dip, char *path)
945 {
946 	int		instance = ddi_get_instance(dip);
947 	int		rv = DDI_PROBE_SUCCESS;
948 	void		*xsp;
949 
950 	ASSERT(path != NULL);
951 	cmn_err(CE_WARN, "PV access to device disabled: %s", path);
952 
953 	(void) ddi_soft_state_zalloc(xdfs_ssp, instance);
954 	VERIFY((xsp = ddi_get_soft_state(xdfs_ssp, instance)) != NULL);
955 
956 	if ((xdfs_hvm_dev_ops == NULL) ||
957 	    (xdfs_hvm_dev_ops->devo_probe == NULL) ||
958 	    ((rv = xdfs_hvm_dev_ops->devo_probe(dip)) == DDI_PROBE_FAILURE)) {
959 		ddi_soft_state_free(xdfs_ssp, instance);
960 		cmn_err(CE_WARN, "HVM probe of device failed: %s", path);
961 		kmem_free(path, MAXPATHLEN);
962 		return (DDI_PROBE_FAILURE);
963 	}
964 
965 	XDFS_HVM_MODE(xsp) = B_TRUE;
966 	XDFS_HVM_DIP(xsp) = dip;
967 	XDFS_HVM_PATH(xsp) = path;
968 
969 	return (rv);
970 }
971 
972 static int
973 xdfs_probe(dev_info_t *dip)
974 {
975 	int		instance = ddi_get_instance(dip);
976 	xdfs_state_t	*xsp;
977 	dev_info_t	*tgt_dip;
978 	char		*path;
979 	int		i, pv_disable;
980 
981 	/* if we've already probed the device then there's nothing todo */
982 	if (ddi_get_soft_state(xdfs_ssp, instance))
983 		return (DDI_PROBE_PARTIAL);
984 
985 	/* Figure out our pathname */
986 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
987 	(void) ddi_pathname(dip, path);
988 
989 	/* see if we should disable pv access mode */
990 	pv_disable = ddi_prop_get_int(DDI_DEV_T_ANY,
991 	    dip, DDI_PROP_NOTPROM, "pv_disable", 0);
992 
993 	if (xdfs_pv_disable || pv_disable)
994 		return (xdfs_hvm_probe(dip, path));
995 
996 	/*
997 	 * This xdf shell device layers on top of an xdf device.  So the first
998 	 * thing we need to do is determine which xdf device instance this
999 	 * xdf shell instance should be layered on top of.
1000 	 */
1001 	for (i = 0; xdfs_c_h2p_map[i].xdfs_h2p_hvm != NULL; i++) {
1002 		if (strcmp(xdfs_c_h2p_map[i].xdfs_h2p_hvm, path) == 0)
1003 			break;
1004 	}
1005 
1006 	if ((xdfs_c_h2p_map[i].xdfs_h2p_hvm == NULL) ||
1007 	    ((tgt_dip = xdf_hvm_hold(xdfs_c_h2p_map[i].xdfs_h2p_pv)) == NULL)) {
1008 		/*
1009 		 * UhOh.  We either don't know what xdf instance this xdf
1010 		 * shell device should be mapped to or the xdf node assocaited
1011 		 * with this instance isnt' attached.  in either case fall
1012 		 * back to hvm access.
1013 		 */
1014 		return (xdfs_hvm_probe(dip, path));
1015 	}
1016 
1017 	/* allocate and initialize our state structure */
1018 	(void) ddi_soft_state_zalloc(xdfs_ssp, instance);
1019 	xsp = ddi_get_soft_state(xdfs_ssp, instance);
1020 	mutex_init(&xsp->xdfss_mutex, NULL, MUTEX_DRIVER, NULL);
1021 	cv_init(&xsp->xdfss_cv, NULL, CV_DEFAULT, NULL);
1022 	mutex_enter(&xsp->xdfss_mutex);
1023 
1024 	xsp->xdfss_dip = dip;
1025 	xsp->xdfss_pv = xdfs_c_h2p_map[i].xdfs_h2p_pv;
1026 	xsp->xdfss_hvm = xdfs_c_h2p_map[i].xdfs_h2p_hvm;
1027 	xsp->xdfss_tgt_attached = B_FALSE;
1028 	cmlb_alloc_handle((cmlb_handle_t *)&xsp->xdfss_cmlbhandle);
1029 
1030 	if (!xdfs_tgt_probe(xsp, tgt_dip)) {
1031 		mutex_exit(&xsp->xdfss_mutex);
1032 		cmlb_free_handle(&xsp->xdfss_cmlbhandle);
1033 		ddi_soft_state_free(xdfs_ssp, instance);
1034 		ddi_release_devi(tgt_dip);
1035 		return (xdfs_hvm_probe(dip, path));
1036 	}
1037 	mutex_exit(&xsp->xdfss_mutex);
1038 
1039 	/*
1040 	 * Add a zero-length attribute to tell the world we support
1041 	 * kernel ioctls (for layered drivers).
1042 	 */
1043 	(void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
1044 	    DDI_KERNEL_IOCTL, NULL, 0);
1045 
1046 	kmem_free(path, MAXPATHLEN);
1047 	return (DDI_PROBE_SUCCESS);
1048 }
1049 
1050 static int
1051 xdfs_hvm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1052 {
1053 	int		instance = ddi_get_instance(dip);
1054 	void		*xsp = ddi_get_soft_state(xdfs_ssp, instance);
1055 	int		rv = DDI_FAILURE;
1056 
1057 	XDFS_HVM_SANE(xsp);
1058 
1059 	if ((xdfs_hvm_dev_ops == NULL) ||
1060 	    (xdfs_hvm_dev_ops->devo_attach == NULL) ||
1061 	    ((rv = xdfs_hvm_dev_ops->devo_attach(dip, cmd)) != DDI_SUCCESS)) {
1062 		cmn_err(CE_WARN, "HVM attach of device failed: %s",
1063 		    XDFS_HVM_PATH(xsp));
1064 		kmem_free(XDFS_HVM_PATH(xsp), MAXPATHLEN);
1065 		ddi_soft_state_free(xdfs_ssp, instance);
1066 		return (rv);
1067 	}
1068 
1069 	return (DDI_SUCCESS);
1070 }
1071 
1072 /*
1073  * Autoconfiguration Routines
1074  */
1075 static int
1076 xdfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1077 {
1078 	int		instance = ddi_get_instance(dip);
1079 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
1080 
1081 	if (xsp == NULL)
1082 		return (DDI_FAILURE);
1083 	if (XDFS_HVM_MODE(xsp))
1084 		return (xdfs_hvm_attach(dip, cmd));
1085 	if (cmd != DDI_ATTACH)
1086 		return (DDI_FAILURE);
1087 
1088 	xdfs_c_attach(xsp);
1089 	return (DDI_SUCCESS);
1090 }
1091 
1092 static int
1093 xdfs_hvm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1094 {
1095 	int		instance = ddi_get_instance(dip);
1096 	void		*xsp = ddi_get_soft_state(xdfs_ssp, instance);
1097 	int		rv;
1098 
1099 	XDFS_HVM_SANE(xsp);
1100 
1101 	if ((xdfs_hvm_dev_ops == NULL) ||
1102 	    (xdfs_hvm_dev_ops->devo_detach == NULL))
1103 		return (DDI_FAILURE);
1104 
1105 	if ((rv = xdfs_hvm_dev_ops->devo_detach(dip, cmd)) != DDI_SUCCESS)
1106 		return (rv);
1107 
1108 	kmem_free(XDFS_HVM_PATH(xsp), MAXPATHLEN);
1109 	ddi_soft_state_free(xdfs_ssp, instance);
1110 	return (DDI_SUCCESS);
1111 }
1112 
1113 static int
1114 xdfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1115 {
1116 	int		instance = ddi_get_instance(dip);
1117 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
1118 
1119 	if (XDFS_HVM_MODE(xsp))
1120 		return (xdfs_hvm_detach(dip, cmd));
1121 	if (cmd != DDI_DETACH)
1122 		return (DDI_FAILURE);
1123 
1124 	mutex_enter(&xsp->xdfss_mutex);
1125 	if (!xdfs_tgt_detach(xsp)) {
1126 		mutex_exit(&xsp->xdfss_mutex);
1127 		return (DDI_FAILURE);
1128 	}
1129 	mutex_exit(&xsp->xdfss_mutex);
1130 
1131 	cmlb_detach(xsp->xdfss_cmlbhandle, 0);
1132 	cmlb_free_handle(&xsp->xdfss_cmlbhandle);
1133 	ddi_release_devi(xsp->xdfss_tgt_dip);
1134 	ddi_soft_state_free(xdfs_ssp, instance);
1135 	ddi_prop_remove_all(dip);
1136 	return (DDI_SUCCESS);
1137 }
1138 
1139 static int
1140 xdfs_hvm_power(dev_info_t *dip, int component, int level)
1141 {
1142 	int		instance = ddi_get_instance(dip);
1143 	void		*xsp = ddi_get_soft_state(xdfs_ssp, instance);
1144 
1145 	XDFS_HVM_SANE(xsp);
1146 
1147 	if ((xdfs_hvm_dev_ops == NULL) ||
1148 	    (xdfs_hvm_dev_ops->devo_power == NULL))
1149 		return (DDI_FAILURE);
1150 	return (xdfs_hvm_dev_ops->devo_power(dip, component, level));
1151 }
1152 
1153 static int
1154 xdfs_power(dev_info_t *dip, int component, int level)
1155 {
1156 	int		instance = ddi_get_instance(dip);
1157 	xdfs_state_t	*xsp = ddi_get_soft_state(xdfs_ssp, instance);
1158 
1159 	if (XDFS_HVM_MODE(xsp))
1160 		return (xdfs_hvm_power(dip, component, level));
1161 	return (nodev());
1162 }
1163 
1164 /*
1165  * Cmlb ops vector
1166  */
1167 static cmlb_tg_ops_t xdfs_lb_ops = {
1168 	TG_DK_OPS_VERSION_1,
1169 	xdfs_lb_rdwr,
1170 	xdfs_lb_getinfo
1171 };
1172 
1173 /*
1174  * Device driver ops vector
1175  */
1176 static struct cb_ops xdfs_cb_ops = {
1177 	xdfs_open,		/* open */
1178 	xdfs_close,		/* close */
1179 	xdfs_strategy,		/* strategy */
1180 	nodev,			/* print */
1181 	xdfs_dump,		/* dump */
1182 	xdfs_read,		/* read */
1183 	xdfs_write,		/* write */
1184 	xdfs_ioctl,		/* ioctl */
1185 	nodev,			/* devmap */
1186 	nodev,			/* mmap */
1187 	nodev,			/* segmap */
1188 	nochpoll,		/* poll */
1189 	xdfs_prop_op,		/* cb_prop_op */
1190 	0,			/* streamtab  */
1191 	D_64BIT | D_MP | D_NEW,	/* Driver comaptibility flag */
1192 	CB_REV,			/* cb_rev */
1193 	xdfs_aread,		/* async read */
1194 	xdfs_awrite		/* async write */
1195 };
1196 
1197 struct dev_ops xdfs_ops = {
1198 	DEVO_REV,		/* devo_rev, */
1199 	0,			/* refcnt  */
1200 	xdfs_getinfo,		/* info */
1201 	nulldev,		/* identify */
1202 	xdfs_probe,		/* probe */
1203 	xdfs_attach,		/* attach */
1204 	xdfs_detach,		/* detach */
1205 	nodev,			/* reset */
1206 	&xdfs_cb_ops,		/* driver operations */
1207 	NULL,			/* bus operations */
1208 	xdfs_power,		/* power */
1209 	ddi_quiesce_not_supported, /* devo_quiesce */
1210 };
1211 
1212 /*
1213  * Module linkage information for the kernel.
1214  */
1215 static struct modldrv modldrv = {
1216 	&mod_driverops,		/* Type of module.  This one is a driver. */
1217 	NULL,			/* Module description.  Set by _init() */
1218 	&xdfs_ops,		/* Driver ops. */
1219 };
1220 
1221 static struct modlinkage modlinkage = {
1222 	MODREV_1, (void *)&modldrv, NULL
1223 };
1224 
1225 int
1226 _init(void)
1227 {
1228 	int rval;
1229 
1230 	xdfs_major = ddi_name_to_major((char *)xdfs_c_name);
1231 	if (xdfs_major == (major_t)-1)
1232 		return (EINVAL);
1233 
1234 	/*
1235 	 * Determine the size of our soft state structure.  The base
1236 	 * size of the structure is the larger of the hvm clients state
1237 	 * structure, or our shell state structure.  Then we'll align
1238 	 * the end of the structure to a pointer boundry and append
1239 	 * a xdfs_hvm_state_t structure.  This way the xdfs_hvm_state_t
1240 	 * structure is always present and we can use it to determine the
1241 	 * current device access mode (hvm or shell).
1242 	 */
1243 	xdfs_ss_size = MAX(xdfs_c_hvm_ss_size, sizeof (xdfs_state_t));
1244 	xdfs_ss_size = P2ROUNDUP(xdfs_ss_size, sizeof (uintptr_t));
1245 	xdfs_ss_size += sizeof (xdfs_hvm_state_t);
1246 
1247 	/*
1248 	 * In general ide usually supports 4 disk devices, this same
1249 	 * limitation also applies to software emulating ide devices.
1250 	 * so by default we pre-allocate 4 xdf shell soft state structures.
1251 	 */
1252 	if ((rval = ddi_soft_state_init(&xdfs_ssp,
1253 	    xdfs_ss_size, XDFS_NODES)) != 0)
1254 		return (rval);
1255 	*xdfs_c_hvm_ss = xdfs_ssp;
1256 
1257 	/* Install our module */
1258 	if (modldrv.drv_linkinfo == NULL)
1259 		modldrv.drv_linkinfo = (char *)xdfs_c_linkinfo;
1260 	if ((rval = mod_install(&modlinkage)) != 0) {
1261 		ddi_soft_state_fini(&xdfs_ssp);
1262 		return (rval);
1263 	}
1264 
1265 	return (0);
1266 }
1267 
1268 int
1269 _info(struct modinfo *modinfop)
1270 {
1271 	if (modldrv.drv_linkinfo == NULL)
1272 		modldrv.drv_linkinfo = (char *)xdfs_c_linkinfo;
1273 	return (mod_info(&modlinkage, modinfop));
1274 }
1275 
1276 int
1277 _fini(void)
1278 {
1279 	int	rval;
1280 	if ((rval = mod_remove(&modlinkage)) != 0)
1281 		return (rval);
1282 	ddi_soft_state_fini(&xdfs_ssp);
1283 	return (0);
1284 }
1285