xref: /titanic_53/usr/src/uts/sun4v/io/vds.c (revision 87a7269eb068de448caa9e4404e38bebf4449531)
11ae08745Sheppo /*
21ae08745Sheppo  * CDDL HEADER START
31ae08745Sheppo  *
41ae08745Sheppo  * The contents of this file are subject to the terms of the
51ae08745Sheppo  * Common Development and Distribution License (the "License").
61ae08745Sheppo  * You may not use this file except in compliance with the License.
71ae08745Sheppo  *
81ae08745Sheppo  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91ae08745Sheppo  * or http://www.opensolaris.org/os/licensing.
101ae08745Sheppo  * See the License for the specific language governing permissions
111ae08745Sheppo  * and limitations under the License.
121ae08745Sheppo  *
131ae08745Sheppo  * When distributing Covered Code, include this CDDL HEADER in each
141ae08745Sheppo  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151ae08745Sheppo  * If applicable, add the following below this CDDL HEADER, with the
161ae08745Sheppo  * fields enclosed by brackets "[]" replaced with your own identifying
171ae08745Sheppo  * information: Portions Copyright [yyyy] [name of copyright owner]
181ae08745Sheppo  *
191ae08745Sheppo  * CDDL HEADER END
201ae08745Sheppo  */
211ae08745Sheppo 
221ae08745Sheppo /*
233c96341aSnarayan  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
241ae08745Sheppo  * Use is subject to license terms.
251ae08745Sheppo  */
261ae08745Sheppo 
271ae08745Sheppo #pragma ident	"%Z%%M%	%I%	%E% SMI"
281ae08745Sheppo 
291ae08745Sheppo /*
301ae08745Sheppo  * Virtual disk server
311ae08745Sheppo  */
321ae08745Sheppo 
331ae08745Sheppo 
341ae08745Sheppo #include <sys/types.h>
351ae08745Sheppo #include <sys/conf.h>
364bac2208Snarayan #include <sys/crc32.h>
371ae08745Sheppo #include <sys/ddi.h>
381ae08745Sheppo #include <sys/dkio.h>
391ae08745Sheppo #include <sys/file.h>
401ae08745Sheppo #include <sys/mdeg.h>
411ae08745Sheppo #include <sys/modhash.h>
421ae08745Sheppo #include <sys/note.h>
431ae08745Sheppo #include <sys/pathname.h>
441ae08745Sheppo #include <sys/sunddi.h>
451ae08745Sheppo #include <sys/sunldi.h>
461ae08745Sheppo #include <sys/sysmacros.h>
471ae08745Sheppo #include <sys/vio_common.h>
481ae08745Sheppo #include <sys/vdsk_mailbox.h>
491ae08745Sheppo #include <sys/vdsk_common.h>
501ae08745Sheppo #include <sys/vtoc.h>
513c96341aSnarayan #include <sys/vfs.h>
523c96341aSnarayan #include <sys/stat.h>
53*87a7269eSachartre #include <sys/scsi/impl/uscsi.h>
54690555a1Sachartre #include <vm/seg_map.h>
551ae08745Sheppo 
561ae08745Sheppo /* Virtual disk server initialization flags */
57d10e4ef2Snarayan #define	VDS_LDI			0x01
58d10e4ef2Snarayan #define	VDS_MDEG		0x02
591ae08745Sheppo 
601ae08745Sheppo /* Virtual disk server tunable parameters */
613c96341aSnarayan #define	VDS_RETRIES		5
623c96341aSnarayan #define	VDS_LDC_DELAY		1000 /* 1 msecs */
633c96341aSnarayan #define	VDS_DEV_DELAY		10000000 /* 10 secs */
641ae08745Sheppo #define	VDS_NCHAINS		32
651ae08745Sheppo 
661ae08745Sheppo /* Identification parameters for MD, synthetic dkio(7i) structures, etc. */
671ae08745Sheppo #define	VDS_NAME		"virtual-disk-server"
681ae08745Sheppo 
691ae08745Sheppo #define	VD_NAME			"vd"
701ae08745Sheppo #define	VD_VOLUME_NAME		"vdisk"
711ae08745Sheppo #define	VD_ASCIILABEL		"Virtual Disk"
721ae08745Sheppo 
731ae08745Sheppo #define	VD_CHANNEL_ENDPOINT	"channel-endpoint"
741ae08745Sheppo #define	VD_ID_PROP		"id"
751ae08745Sheppo #define	VD_BLOCK_DEVICE_PROP	"vds-block-device"
76445b4c2eSsb155480 #define	VD_REG_PROP		"reg"
771ae08745Sheppo 
781ae08745Sheppo /* Virtual disk initialization flags */
793c96341aSnarayan #define	VD_DISK_READY		0x01
803c96341aSnarayan #define	VD_LOCKING		0x02
813c96341aSnarayan #define	VD_LDC			0x04
823c96341aSnarayan #define	VD_DRING		0x08
833c96341aSnarayan #define	VD_SID			0x10
843c96341aSnarayan #define	VD_SEQ_NUM		0x20
851ae08745Sheppo 
861ae08745Sheppo /* Flags for opening/closing backing devices via LDI */
871ae08745Sheppo #define	VD_OPEN_FLAGS		(FEXCL | FREAD | FWRITE)
881ae08745Sheppo 
89eba0cb4eSachartre /* Flags for writing to a vdisk which is a file */
90eba0cb4eSachartre #define	VD_FILE_WRITE_FLAGS	SM_ASYNC
91eba0cb4eSachartre 
92*87a7269eSachartre /* Number of backup labels */
93*87a7269eSachartre #define	VD_FILE_NUM_BACKUP	5
94*87a7269eSachartre 
95*87a7269eSachartre /* Timeout for SCSI I/O */
96*87a7269eSachartre #define	VD_SCSI_RDWR_TIMEOUT	30	/* 30 secs */
97*87a7269eSachartre 
981ae08745Sheppo /*
991ae08745Sheppo  * By Solaris convention, slice/partition 2 represents the entire disk;
1001ae08745Sheppo  * unfortunately, this convention does not appear to be codified.
1011ae08745Sheppo  */
1021ae08745Sheppo #define	VD_ENTIRE_DISK_SLICE	2
1031ae08745Sheppo 
1041ae08745Sheppo /* Return a cpp token as a string */
1051ae08745Sheppo #define	STRINGIZE(token)	#token
1061ae08745Sheppo 
1071ae08745Sheppo /*
1081ae08745Sheppo  * Print a message prefixed with the current function name to the message log
1091ae08745Sheppo  * (and optionally to the console for verbose boots); these macros use cpp's
1101ae08745Sheppo  * concatenation of string literals and C99 variable-length-argument-list
1111ae08745Sheppo  * macros
1121ae08745Sheppo  */
1131ae08745Sheppo #define	PRN(...)	_PRN("?%s():  "__VA_ARGS__, "")
1141ae08745Sheppo #define	_PRN(format, ...)					\
1151ae08745Sheppo 	cmn_err(CE_CONT, format"%s", __func__, __VA_ARGS__)
1161ae08745Sheppo 
1171ae08745Sheppo /* Return a pointer to the "i"th vdisk dring element */
1181ae08745Sheppo #define	VD_DRING_ELEM(i)	((vd_dring_entry_t *)(void *)	\
1191ae08745Sheppo 	    (vd->dring + (i)*vd->descriptor_size))
1201ae08745Sheppo 
1211ae08745Sheppo /* Return the virtual disk client's type as a string (for use in messages) */
1221ae08745Sheppo #define	VD_CLIENT(vd)							\
1231ae08745Sheppo 	(((vd)->xfer_mode == VIO_DESC_MODE) ? "in-band client" :	\
1241ae08745Sheppo 	    (((vd)->xfer_mode == VIO_DRING_MODE) ? "dring client" :	\
1251ae08745Sheppo 		(((vd)->xfer_mode == 0) ? "null client" :		\
1261ae08745Sheppo 		    "unsupported client")))
1271ae08745Sheppo 
128690555a1Sachartre /* Read disk label from a disk on file */
129690555a1Sachartre #define	VD_FILE_LABEL_READ(vd, labelp) \
130*87a7269eSachartre 	vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)labelp, \
131690555a1Sachartre 	    0, sizeof (struct dk_label))
132690555a1Sachartre 
133690555a1Sachartre /* Write disk label to a disk on file */
134690555a1Sachartre #define	VD_FILE_LABEL_WRITE(vd, labelp)	\
135*87a7269eSachartre 	vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)labelp, \
136690555a1Sachartre 	    0, sizeof (struct dk_label))
137690555a1Sachartre 
138445b4c2eSsb155480 /*
139445b4c2eSsb155480  * Specification of an MD node passed to the MDEG to filter any
140445b4c2eSsb155480  * 'vport' nodes that do not belong to the specified node. This
141445b4c2eSsb155480  * template is copied for each vds instance and filled in with
142445b4c2eSsb155480  * the appropriate 'cfg-handle' value before being passed to the MDEG.
143445b4c2eSsb155480  */
144445b4c2eSsb155480 static mdeg_prop_spec_t	vds_prop_template[] = {
145445b4c2eSsb155480 	{ MDET_PROP_STR,	"name",		VDS_NAME },
146445b4c2eSsb155480 	{ MDET_PROP_VAL,	"cfg-handle",	NULL },
147445b4c2eSsb155480 	{ MDET_LIST_END,	NULL, 		NULL }
148445b4c2eSsb155480 };
149445b4c2eSsb155480 
150445b4c2eSsb155480 #define	VDS_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val);
151445b4c2eSsb155480 
152445b4c2eSsb155480 /*
153445b4c2eSsb155480  * Matching criteria passed to the MDEG to register interest
154445b4c2eSsb155480  * in changes to 'virtual-device-port' nodes identified by their
155445b4c2eSsb155480  * 'id' property.
156445b4c2eSsb155480  */
157445b4c2eSsb155480 static md_prop_match_t	vd_prop_match[] = {
158445b4c2eSsb155480 	{ MDET_PROP_VAL,	VD_ID_PROP },
159445b4c2eSsb155480 	{ MDET_LIST_END,	NULL }
160445b4c2eSsb155480 };
161445b4c2eSsb155480 
162445b4c2eSsb155480 static mdeg_node_match_t vd_match = {"virtual-device-port",
163445b4c2eSsb155480 				    vd_prop_match};
164445b4c2eSsb155480 
1651ae08745Sheppo /* Debugging macros */
1661ae08745Sheppo #ifdef DEBUG
1673af08d82Slm66018 
1683af08d82Slm66018 static int	vd_msglevel = 0;
1693af08d82Slm66018 
1701ae08745Sheppo #define	PR0 if (vd_msglevel > 0)	PRN
1711ae08745Sheppo #define	PR1 if (vd_msglevel > 1)	PRN
1721ae08745Sheppo #define	PR2 if (vd_msglevel > 2)	PRN
1731ae08745Sheppo 
1741ae08745Sheppo #define	VD_DUMP_DRING_ELEM(elem)					\
1753c96341aSnarayan 	PR0("dst:%x op:%x st:%u nb:%lx addr:%lx ncook:%u\n",		\
1761ae08745Sheppo 	    elem->hdr.dstate,						\
1771ae08745Sheppo 	    elem->payload.operation,					\
1781ae08745Sheppo 	    elem->payload.status,					\
1791ae08745Sheppo 	    elem->payload.nbytes,					\
1801ae08745Sheppo 	    elem->payload.addr,						\
1811ae08745Sheppo 	    elem->payload.ncookies);
1821ae08745Sheppo 
1833af08d82Slm66018 char *
1843af08d82Slm66018 vd_decode_state(int state)
1853af08d82Slm66018 {
1863af08d82Slm66018 	char *str;
1873af08d82Slm66018 
1883af08d82Slm66018 #define	CASE_STATE(_s)	case _s: str = #_s; break;
1893af08d82Slm66018 
1903af08d82Slm66018 	switch (state) {
1913af08d82Slm66018 	CASE_STATE(VD_STATE_INIT)
1923af08d82Slm66018 	CASE_STATE(VD_STATE_VER)
1933af08d82Slm66018 	CASE_STATE(VD_STATE_ATTR)
1943af08d82Slm66018 	CASE_STATE(VD_STATE_DRING)
1953af08d82Slm66018 	CASE_STATE(VD_STATE_RDX)
1963af08d82Slm66018 	CASE_STATE(VD_STATE_DATA)
1973af08d82Slm66018 	default: str = "unknown"; break;
1983af08d82Slm66018 	}
1993af08d82Slm66018 
2003af08d82Slm66018 #undef CASE_STATE
2013af08d82Slm66018 
2023af08d82Slm66018 	return (str);
2033af08d82Slm66018 }
2043af08d82Slm66018 
2053af08d82Slm66018 void
2063af08d82Slm66018 vd_decode_tag(vio_msg_t *msg)
2073af08d82Slm66018 {
2083af08d82Slm66018 	char *tstr, *sstr, *estr;
2093af08d82Slm66018 
2103af08d82Slm66018 #define	CASE_TYPE(_s)	case _s: tstr = #_s; break;
2113af08d82Slm66018 
2123af08d82Slm66018 	switch (msg->tag.vio_msgtype) {
2133af08d82Slm66018 	CASE_TYPE(VIO_TYPE_CTRL)
2143af08d82Slm66018 	CASE_TYPE(VIO_TYPE_DATA)
2153af08d82Slm66018 	CASE_TYPE(VIO_TYPE_ERR)
2163af08d82Slm66018 	default: tstr = "unknown"; break;
2173af08d82Slm66018 	}
2183af08d82Slm66018 
2193af08d82Slm66018 #undef CASE_TYPE
2203af08d82Slm66018 
2213af08d82Slm66018 #define	CASE_SUBTYPE(_s) case _s: sstr = #_s; break;
2223af08d82Slm66018 
2233af08d82Slm66018 	switch (msg->tag.vio_subtype) {
2243af08d82Slm66018 	CASE_SUBTYPE(VIO_SUBTYPE_INFO)
2253af08d82Slm66018 	CASE_SUBTYPE(VIO_SUBTYPE_ACK)
2263af08d82Slm66018 	CASE_SUBTYPE(VIO_SUBTYPE_NACK)
2273af08d82Slm66018 	default: sstr = "unknown"; break;
2283af08d82Slm66018 	}
2293af08d82Slm66018 
2303af08d82Slm66018 #undef CASE_SUBTYPE
2313af08d82Slm66018 
2323af08d82Slm66018 #define	CASE_ENV(_s)	case _s: estr = #_s; break;
2333af08d82Slm66018 
2343af08d82Slm66018 	switch (msg->tag.vio_subtype_env) {
2353af08d82Slm66018 	CASE_ENV(VIO_VER_INFO)
2363af08d82Slm66018 	CASE_ENV(VIO_ATTR_INFO)
2373af08d82Slm66018 	CASE_ENV(VIO_DRING_REG)
2383af08d82Slm66018 	CASE_ENV(VIO_DRING_UNREG)
2393af08d82Slm66018 	CASE_ENV(VIO_RDX)
2403af08d82Slm66018 	CASE_ENV(VIO_PKT_DATA)
2413af08d82Slm66018 	CASE_ENV(VIO_DESC_DATA)
2423af08d82Slm66018 	CASE_ENV(VIO_DRING_DATA)
2433af08d82Slm66018 	default: estr = "unknown"; break;
2443af08d82Slm66018 	}
2453af08d82Slm66018 
2463af08d82Slm66018 #undef CASE_ENV
2473af08d82Slm66018 
2483af08d82Slm66018 	PR1("(%x/%x/%x) message : (%s/%s/%s)",
2493af08d82Slm66018 	    msg->tag.vio_msgtype, msg->tag.vio_subtype,
2503af08d82Slm66018 	    msg->tag.vio_subtype_env, tstr, sstr, estr);
2513af08d82Slm66018 }
2523af08d82Slm66018 
2531ae08745Sheppo #else	/* !DEBUG */
2543af08d82Slm66018 
2551ae08745Sheppo #define	PR0(...)
2561ae08745Sheppo #define	PR1(...)
2571ae08745Sheppo #define	PR2(...)
2581ae08745Sheppo 
2591ae08745Sheppo #define	VD_DUMP_DRING_ELEM(elem)
2601ae08745Sheppo 
2613af08d82Slm66018 #define	vd_decode_state(_s)	(NULL)
2623af08d82Slm66018 #define	vd_decode_tag(_s)	(NULL)
2633af08d82Slm66018 
2641ae08745Sheppo #endif	/* DEBUG */
2651ae08745Sheppo 
2661ae08745Sheppo 
267d10e4ef2Snarayan /*
268d10e4ef2Snarayan  * Soft state structure for a vds instance
269d10e4ef2Snarayan  */
2701ae08745Sheppo typedef struct vds {
2711ae08745Sheppo 	uint_t		initialized;	/* driver inst initialization flags */
2721ae08745Sheppo 	dev_info_t	*dip;		/* driver inst devinfo pointer */
2731ae08745Sheppo 	ldi_ident_t	ldi_ident;	/* driver's identifier for LDI */
2741ae08745Sheppo 	mod_hash_t	*vd_table;	/* table of virtual disks served */
275445b4c2eSsb155480 	mdeg_node_spec_t *ispecp;	/* mdeg node specification */
2761ae08745Sheppo 	mdeg_handle_t	mdeg;		/* handle for MDEG operations  */
2771ae08745Sheppo } vds_t;
2781ae08745Sheppo 
279d10e4ef2Snarayan /*
280d10e4ef2Snarayan  * Types of descriptor-processing tasks
281d10e4ef2Snarayan  */
282d10e4ef2Snarayan typedef enum vd_task_type {
283d10e4ef2Snarayan 	VD_NONFINAL_RANGE_TASK,	/* task for intermediate descriptor in range */
284d10e4ef2Snarayan 	VD_FINAL_RANGE_TASK,	/* task for last in a range of descriptors */
285d10e4ef2Snarayan } vd_task_type_t;
286d10e4ef2Snarayan 
287d10e4ef2Snarayan /*
288d10e4ef2Snarayan  * Structure describing the task for processing a descriptor
289d10e4ef2Snarayan  */
290d10e4ef2Snarayan typedef struct vd_task {
291d10e4ef2Snarayan 	struct vd		*vd;		/* vd instance task is for */
292d10e4ef2Snarayan 	vd_task_type_t		type;		/* type of descriptor task */
293d10e4ef2Snarayan 	int			index;		/* dring elem index for task */
294d10e4ef2Snarayan 	vio_msg_t		*msg;		/* VIO message task is for */
295d10e4ef2Snarayan 	size_t			msglen;		/* length of message content */
296d10e4ef2Snarayan 	vd_dring_payload_t	*request;	/* request task will perform */
297d10e4ef2Snarayan 	struct buf		buf;		/* buf(9s) for I/O request */
2984bac2208Snarayan 	ldc_mem_handle_t	mhdl;		/* task memory handle */
299d10e4ef2Snarayan } vd_task_t;
300d10e4ef2Snarayan 
301d10e4ef2Snarayan /*
302d10e4ef2Snarayan  * Soft state structure for a virtual disk instance
303d10e4ef2Snarayan  */
3041ae08745Sheppo typedef struct vd {
3051ae08745Sheppo 	uint_t			initialized;	/* vdisk initialization flags */
3061ae08745Sheppo 	vds_t			*vds;		/* server for this vdisk */
307d10e4ef2Snarayan 	ddi_taskq_t		*startq;	/* queue for I/O start tasks */
308d10e4ef2Snarayan 	ddi_taskq_t		*completionq;	/* queue for completion tasks */
3091ae08745Sheppo 	ldi_handle_t		ldi_handle[V_NUMPAR];	/* LDI slice handles */
3103c96341aSnarayan 	char			device_path[MAXPATHLEN + 1]; /* vdisk device */
3111ae08745Sheppo 	dev_t			dev[V_NUMPAR];	/* dev numbers for slices */
312e1ebb9ecSlm66018 	uint_t			nslices;	/* number of slices */
3131ae08745Sheppo 	size_t			vdisk_size;	/* number of blocks in vdisk */
3141ae08745Sheppo 	vd_disk_type_t		vdisk_type;	/* slice or entire disk */
3154bac2208Snarayan 	vd_disk_label_t		vdisk_label;	/* EFI or VTOC label */
316e1ebb9ecSlm66018 	ushort_t		max_xfer_sz;	/* max xfer size in DEV_BSIZE */
3171ae08745Sheppo 	boolean_t		pseudo;		/* underlying pseudo dev */
3183c96341aSnarayan 	boolean_t		file;		/* underlying file */
3193c96341aSnarayan 	vnode_t			*file_vnode;	/* file vnode */
3203c96341aSnarayan 	size_t			file_size;	/* file size */
321*87a7269eSachartre 	ddi_devid_t		file_devid;	/* devid for disk image */
3224bac2208Snarayan 	struct dk_efi		dk_efi;		/* synthetic for slice type */
3231ae08745Sheppo 	struct dk_geom		dk_geom;	/* synthetic for slice type */
3241ae08745Sheppo 	struct vtoc		vtoc;		/* synthetic for slice type */
3251ae08745Sheppo 	ldc_status_t		ldc_state;	/* LDC connection state */
3261ae08745Sheppo 	ldc_handle_t		ldc_handle;	/* handle for LDC comm */
3271ae08745Sheppo 	size_t			max_msglen;	/* largest LDC message len */
3281ae08745Sheppo 	vd_state_t		state;		/* client handshake state */
3291ae08745Sheppo 	uint8_t			xfer_mode;	/* transfer mode with client */
3301ae08745Sheppo 	uint32_t		sid;		/* client's session ID */
3311ae08745Sheppo 	uint64_t		seq_num;	/* message sequence number */
3321ae08745Sheppo 	uint64_t		dring_ident;	/* identifier of dring */
3331ae08745Sheppo 	ldc_dring_handle_t	dring_handle;	/* handle for dring ops */
3341ae08745Sheppo 	uint32_t		descriptor_size;	/* num bytes in desc */
3351ae08745Sheppo 	uint32_t		dring_len;	/* number of dring elements */
3361ae08745Sheppo 	caddr_t			dring;		/* address of dring */
3373af08d82Slm66018 	caddr_t			vio_msgp;	/* vio msg staging buffer */
338d10e4ef2Snarayan 	vd_task_t		inband_task;	/* task for inband descriptor */
339d10e4ef2Snarayan 	vd_task_t		*dring_task;	/* tasks dring elements */
340d10e4ef2Snarayan 
341d10e4ef2Snarayan 	kmutex_t		lock;		/* protects variables below */
342d10e4ef2Snarayan 	boolean_t		enabled;	/* is vdisk enabled? */
343d10e4ef2Snarayan 	boolean_t		reset_state;	/* reset connection state? */
344d10e4ef2Snarayan 	boolean_t		reset_ldc;	/* reset LDC channel? */
3451ae08745Sheppo } vd_t;
3461ae08745Sheppo 
3471ae08745Sheppo typedef struct vds_operation {
3483af08d82Slm66018 	char	*namep;
3491ae08745Sheppo 	uint8_t	operation;
350d10e4ef2Snarayan 	int	(*start)(vd_task_t *task);
351d10e4ef2Snarayan 	void	(*complete)(void *arg);
3521ae08745Sheppo } vds_operation_t;
3531ae08745Sheppo 
3540a55fbb7Slm66018 typedef struct vd_ioctl {
3550a55fbb7Slm66018 	uint8_t		operation;		/* vdisk operation */
3560a55fbb7Slm66018 	const char	*operation_name;	/* vdisk operation name */
3570a55fbb7Slm66018 	size_t		nbytes;			/* size of operation buffer */
3580a55fbb7Slm66018 	int		cmd;			/* corresponding ioctl cmd */
3590a55fbb7Slm66018 	const char	*cmd_name;		/* ioctl cmd name */
3600a55fbb7Slm66018 	void		*arg;			/* ioctl cmd argument */
3610a55fbb7Slm66018 	/* convert input vd_buf to output ioctl_arg */
3620a55fbb7Slm66018 	void		(*copyin)(void *vd_buf, void *ioctl_arg);
3630a55fbb7Slm66018 	/* convert input ioctl_arg to output vd_buf */
3640a55fbb7Slm66018 	void		(*copyout)(void *ioctl_arg, void *vd_buf);
3650a55fbb7Slm66018 } vd_ioctl_t;
3660a55fbb7Slm66018 
3670a55fbb7Slm66018 /* Define trivial copyin/copyout conversion function flag */
3680a55fbb7Slm66018 #define	VD_IDENTITY	((void (*)(void *, void *))-1)
3691ae08745Sheppo 
3701ae08745Sheppo 
3713c96341aSnarayan static int	vds_ldc_retries = VDS_RETRIES;
3723af08d82Slm66018 static int	vds_ldc_delay = VDS_LDC_DELAY;
3733c96341aSnarayan static int	vds_dev_retries = VDS_RETRIES;
3743c96341aSnarayan static int	vds_dev_delay = VDS_DEV_DELAY;
3751ae08745Sheppo static void	*vds_state;
3761ae08745Sheppo static uint64_t	vds_operations;	/* see vds_operation[] definition below */
3771ae08745Sheppo 
3781ae08745Sheppo static int	vd_open_flags = VD_OPEN_FLAGS;
3791ae08745Sheppo 
380eba0cb4eSachartre static uint_t	vd_file_write_flags = VD_FILE_WRITE_FLAGS;
381eba0cb4eSachartre 
382*87a7269eSachartre static short	vd_scsi_rdwr_timeout = VD_SCSI_RDWR_TIMEOUT;
383*87a7269eSachartre 
3840a55fbb7Slm66018 /*
3850a55fbb7Slm66018  * Supported protocol version pairs, from highest (newest) to lowest (oldest)
3860a55fbb7Slm66018  *
3870a55fbb7Slm66018  * Each supported major version should appear only once, paired with (and only
3880a55fbb7Slm66018  * with) its highest supported minor version number (as the protocol requires
3890a55fbb7Slm66018  * supporting all lower minor version numbers as well)
3900a55fbb7Slm66018  */
3910a55fbb7Slm66018 static const vio_ver_t	vds_version[] = {{1, 0}};
3920a55fbb7Slm66018 static const size_t	vds_num_versions =
3930a55fbb7Slm66018     sizeof (vds_version)/sizeof (vds_version[0]);
3940a55fbb7Slm66018 
3953af08d82Slm66018 static void vd_free_dring_task(vd_t *vdp);
3963c96341aSnarayan static int vd_setup_vd(vd_t *vd);
3973c96341aSnarayan static boolean_t vd_enabled(vd_t *vd);
3981ae08745Sheppo 
399690555a1Sachartre /*
400690555a1Sachartre  * Function:
401690555a1Sachartre  *	vd_file_rw
402690555a1Sachartre  *
403690555a1Sachartre  * Description:
404690555a1Sachartre  * 	Read or write to a disk on file.
405690555a1Sachartre  *
406690555a1Sachartre  * Parameters:
407690555a1Sachartre  *	vd		- disk on which the operation is performed.
408690555a1Sachartre  *	slice		- slice on which the operation is performed,
409*87a7269eSachartre  *			  VD_SLICE_NONE indicates that the operation
410*87a7269eSachartre  *			  is done using an absolute disk offset.
411690555a1Sachartre  *	operation	- operation to execute: read (VD_OP_BREAD) or
412690555a1Sachartre  *			  write (VD_OP_BWRITE).
413690555a1Sachartre  *	data		- buffer where data are read to or written from.
414690555a1Sachartre  *	blk		- starting block for the operation.
415690555a1Sachartre  *	len		- number of bytes to read or write.
416690555a1Sachartre  *
417690555a1Sachartre  * Return Code:
418690555a1Sachartre  *	n >= 0		- success, n indicates the number of bytes read
419690555a1Sachartre  *			  or written.
420690555a1Sachartre  *	-1		- error.
421690555a1Sachartre  */
422690555a1Sachartre static ssize_t
423690555a1Sachartre vd_file_rw(vd_t *vd, int slice, int operation, caddr_t data, size_t blk,
424690555a1Sachartre     size_t len)
425690555a1Sachartre {
426690555a1Sachartre 	caddr_t	maddr;
427690555a1Sachartre 	size_t offset, maxlen, moffset, mlen, n;
428690555a1Sachartre 	uint_t smflags;
429690555a1Sachartre 	enum seg_rw srw;
430690555a1Sachartre 
431690555a1Sachartre 	ASSERT(vd->file);
432690555a1Sachartre 	ASSERT(len > 0);
433690555a1Sachartre 
434*87a7269eSachartre 	if (slice == VD_SLICE_NONE) {
435690555a1Sachartre 		/* raw disk access */
436690555a1Sachartre 		offset = blk * DEV_BSIZE;
437690555a1Sachartre 	} else {
438690555a1Sachartre 		ASSERT(slice >= 0 && slice < V_NUMPAR);
439690555a1Sachartre 		if (blk >= vd->vtoc.v_part[slice].p_size) {
440690555a1Sachartre 			/* address past the end of the slice */
441690555a1Sachartre 			PR0("req_addr (0x%lx) > psize (0x%lx)",
442690555a1Sachartre 			    blk, vd->vtoc.v_part[slice].p_size);
443690555a1Sachartre 			return (0);
444690555a1Sachartre 		}
445690555a1Sachartre 
446690555a1Sachartre 		offset = (vd->vtoc.v_part[slice].p_start + blk) * DEV_BSIZE;
447690555a1Sachartre 
448690555a1Sachartre 		/*
449690555a1Sachartre 		 * If the requested size is greater than the size
450690555a1Sachartre 		 * of the partition, truncate the read/write.
451690555a1Sachartre 		 */
452690555a1Sachartre 		maxlen = (vd->vtoc.v_part[slice].p_size - blk) * DEV_BSIZE;
453690555a1Sachartre 
454690555a1Sachartre 		if (len > maxlen) {
455690555a1Sachartre 			PR0("I/O size truncated to %lu bytes from %lu bytes",
456690555a1Sachartre 			    maxlen, len);
457690555a1Sachartre 			len = maxlen;
458690555a1Sachartre 		}
459690555a1Sachartre 	}
460690555a1Sachartre 
461690555a1Sachartre 	/*
462690555a1Sachartre 	 * We have to ensure that we are reading/writing into the mmap
463690555a1Sachartre 	 * range. If we have a partial disk image (e.g. an image of
464690555a1Sachartre 	 * s0 instead s2) the system can try to access slices that
465690555a1Sachartre 	 * are not included into the disk image.
466690555a1Sachartre 	 */
467690555a1Sachartre 	if ((offset + len) >= vd->file_size) {
468690555a1Sachartre 		PR0("offset + nbytes (0x%lx + 0x%lx) >= "
469690555a1Sachartre 		    "file_size (0x%lx)", offset, len, vd->file_size);
470690555a1Sachartre 		return (-1);
471690555a1Sachartre 	}
472690555a1Sachartre 
473690555a1Sachartre 	srw = (operation == VD_OP_BREAD)? S_READ : S_WRITE;
474eba0cb4eSachartre 	smflags = (operation == VD_OP_BREAD)? 0 :
475eba0cb4eSachartre 	    (SM_WRITE | vd_file_write_flags);
476690555a1Sachartre 	n = len;
477690555a1Sachartre 
478690555a1Sachartre 	do {
479690555a1Sachartre 		/*
480690555a1Sachartre 		 * segmap_getmapflt() returns a MAXBSIZE chunk which is
481690555a1Sachartre 		 * MAXBSIZE aligned.
482690555a1Sachartre 		 */
483690555a1Sachartre 		moffset = offset & MAXBOFFSET;
484690555a1Sachartre 		mlen = MIN(MAXBSIZE - moffset, n);
485690555a1Sachartre 		maddr = segmap_getmapflt(segkmap, vd->file_vnode, offset,
486690555a1Sachartre 		    mlen, 1, srw);
487690555a1Sachartre 		/*
488690555a1Sachartre 		 * Fault in the pages so we can check for error and ensure
489690555a1Sachartre 		 * that we can safely used the mapped address.
490690555a1Sachartre 		 */
491690555a1Sachartre 		if (segmap_fault(kas.a_hat, segkmap, maddr, mlen,
492690555a1Sachartre 		    F_SOFTLOCK, srw) != 0) {
493690555a1Sachartre 			(void) segmap_release(segkmap, maddr, 0);
494690555a1Sachartre 			return (-1);
495690555a1Sachartre 		}
496690555a1Sachartre 
497690555a1Sachartre 		if (operation == VD_OP_BREAD)
498690555a1Sachartre 			bcopy(maddr + moffset, data, mlen);
499690555a1Sachartre 		else
500690555a1Sachartre 			bcopy(data, maddr + moffset, mlen);
501690555a1Sachartre 
502690555a1Sachartre 		if (segmap_fault(kas.a_hat, segkmap, maddr, mlen,
503690555a1Sachartre 		    F_SOFTUNLOCK, srw) != 0) {
504690555a1Sachartre 			(void) segmap_release(segkmap, maddr, 0);
505690555a1Sachartre 			return (-1);
506690555a1Sachartre 		}
507690555a1Sachartre 		if (segmap_release(segkmap, maddr, smflags) != 0)
508690555a1Sachartre 			return (-1);
509690555a1Sachartre 		n -= mlen;
510690555a1Sachartre 		offset += mlen;
511690555a1Sachartre 		data += mlen;
512690555a1Sachartre 
513690555a1Sachartre 	} while (n > 0);
514690555a1Sachartre 
515690555a1Sachartre 	return (len);
516690555a1Sachartre }
517690555a1Sachartre 
518*87a7269eSachartre /*
519*87a7269eSachartre  * Function:
520*87a7269eSachartre  *	vd_file_set_vtoc
521*87a7269eSachartre  *
522*87a7269eSachartre  * Description:
523*87a7269eSachartre  *	Set the vtoc of a disk image by writing the label and backup
524*87a7269eSachartre  *	labels into the disk image backend.
525*87a7269eSachartre  *
526*87a7269eSachartre  * Parameters:
527*87a7269eSachartre  *	vd		- disk on which the operation is performed.
528*87a7269eSachartre  *	label		- the data to be written.
529*87a7269eSachartre  *
530*87a7269eSachartre  * Return Code:
531*87a7269eSachartre  *	0		- success.
532*87a7269eSachartre  *	n > 0		- error, n indicates the errno code.
533*87a7269eSachartre  */
534*87a7269eSachartre static int
535*87a7269eSachartre vd_file_set_vtoc(vd_t *vd, struct dk_label *label)
536*87a7269eSachartre {
537*87a7269eSachartre 	int blk, sec, cyl, head, cnt;
538*87a7269eSachartre 
539*87a7269eSachartre 	ASSERT(vd->file);
540*87a7269eSachartre 
541*87a7269eSachartre 	if (VD_FILE_LABEL_WRITE(vd, label) < 0) {
542*87a7269eSachartre 		PR0("fail to write disk label");
543*87a7269eSachartre 		return (EIO);
544*87a7269eSachartre 	}
545*87a7269eSachartre 
546*87a7269eSachartre 	/*
547*87a7269eSachartre 	 * Backup labels are on the last alternate cylinder's
548*87a7269eSachartre 	 * first five odd sectors.
549*87a7269eSachartre 	 */
550*87a7269eSachartre 	if (label->dkl_acyl == 0) {
551*87a7269eSachartre 		PR0("no alternate cylinder, can not store backup labels");
552*87a7269eSachartre 		return (0);
553*87a7269eSachartre 	}
554*87a7269eSachartre 
555*87a7269eSachartre 	cyl = label->dkl_ncyl  + label->dkl_acyl - 1;
556*87a7269eSachartre 	head = label->dkl_nhead - 1;
557*87a7269eSachartre 
558*87a7269eSachartre 	blk = (cyl * ((label->dkl_nhead * label->dkl_nsect) - label->dkl_apc)) +
559*87a7269eSachartre 	    (head * label->dkl_nsect);
560*87a7269eSachartre 
561*87a7269eSachartre 	/*
562*87a7269eSachartre 	 * Write the backup labels. Make sure we don't try to write past
563*87a7269eSachartre 	 * the last cylinder.
564*87a7269eSachartre 	 */
565*87a7269eSachartre 	sec = 1;
566*87a7269eSachartre 
567*87a7269eSachartre 	for (cnt = 0; cnt < VD_FILE_NUM_BACKUP; cnt++) {
568*87a7269eSachartre 
569*87a7269eSachartre 		if (sec >= label->dkl_nsect) {
570*87a7269eSachartre 			PR0("not enough sector to store all backup labels");
571*87a7269eSachartre 			return (0);
572*87a7269eSachartre 		}
573*87a7269eSachartre 
574*87a7269eSachartre 		if (vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE, (caddr_t)label,
575*87a7269eSachartre 		    blk + sec, sizeof (struct dk_label)) < 0) {
576*87a7269eSachartre 			PR0("error writing backup label at block %d\n",
577*87a7269eSachartre 			    blk + sec);
578*87a7269eSachartre 			return (EIO);
579*87a7269eSachartre 		}
580*87a7269eSachartre 
581*87a7269eSachartre 		PR1("wrote backup label at block %d\n", blk + sec);
582*87a7269eSachartre 
583*87a7269eSachartre 		sec += 2;
584*87a7269eSachartre 	}
585*87a7269eSachartre 
586*87a7269eSachartre 	return (0);
587*87a7269eSachartre }
588*87a7269eSachartre 
589*87a7269eSachartre /*
590*87a7269eSachartre  * Function:
591*87a7269eSachartre  *	vd_file_get_devid_block
592*87a7269eSachartre  *
593*87a7269eSachartre  * Description:
594*87a7269eSachartre  *	Return the block number where the device id is stored.
595*87a7269eSachartre  *
596*87a7269eSachartre  * Parameters:
597*87a7269eSachartre  *	vd		- disk on which the operation is performed.
598*87a7269eSachartre  *	blkp		- pointer to the block number
599*87a7269eSachartre  *
600*87a7269eSachartre  * Return Code:
601*87a7269eSachartre  *	0		- success
602*87a7269eSachartre  *	ENOSPC		- disk has no space to store a device id
603*87a7269eSachartre  */
604*87a7269eSachartre static int
605*87a7269eSachartre vd_file_get_devid_block(vd_t *vd, size_t *blkp)
606*87a7269eSachartre {
607*87a7269eSachartre 	diskaddr_t spc, head, cyl;
608*87a7269eSachartre 
609*87a7269eSachartre 	ASSERT(vd->file);
610*87a7269eSachartre 	ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC);
611*87a7269eSachartre 
612*87a7269eSachartre 	/* this geometry doesn't allow us to have a devid */
613*87a7269eSachartre 	if (vd->dk_geom.dkg_acyl < 2) {
614*87a7269eSachartre 		PR0("not enough alternate cylinder available for devid "
615*87a7269eSachartre 		    "(acyl=%u)", vd->dk_geom.dkg_acyl);
616*87a7269eSachartre 		return (ENOSPC);
617*87a7269eSachartre 	}
618*87a7269eSachartre 
619*87a7269eSachartre 	/* the devid is in on the track next to the last cylinder */
620*87a7269eSachartre 	cyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl - 2;
621*87a7269eSachartre 	spc = vd->dk_geom.dkg_nhead * vd->dk_geom.dkg_nsect;
622*87a7269eSachartre 	head = vd->dk_geom.dkg_nhead - 1;
623*87a7269eSachartre 
624*87a7269eSachartre 	*blkp = (cyl * (spc - vd->dk_geom.dkg_apc)) +
625*87a7269eSachartre 	    (head * vd->dk_geom.dkg_nsect) + 1;
626*87a7269eSachartre 
627*87a7269eSachartre 	return (0);
628*87a7269eSachartre }
629*87a7269eSachartre 
630*87a7269eSachartre /*
631*87a7269eSachartre  * Return the checksum of a disk block containing an on-disk devid.
632*87a7269eSachartre  */
633*87a7269eSachartre static uint_t
634*87a7269eSachartre vd_dkdevid2cksum(struct dk_devid *dkdevid)
635*87a7269eSachartre {
636*87a7269eSachartre 	uint_t chksum, *ip;
637*87a7269eSachartre 	int i;
638*87a7269eSachartre 
639*87a7269eSachartre 	chksum = 0;
640*87a7269eSachartre 	ip = (uint_t *)dkdevid;
641*87a7269eSachartre 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int)); i++)
642*87a7269eSachartre 		chksum ^= ip[i];
643*87a7269eSachartre 
644*87a7269eSachartre 	return (chksum);
645*87a7269eSachartre }
646*87a7269eSachartre 
647*87a7269eSachartre /*
648*87a7269eSachartre  * Function:
649*87a7269eSachartre  *	vd_file_read_devid
650*87a7269eSachartre  *
651*87a7269eSachartre  * Description:
652*87a7269eSachartre  *	Read the device id stored on a disk image.
653*87a7269eSachartre  *
654*87a7269eSachartre  * Parameters:
655*87a7269eSachartre  *	vd		- disk on which the operation is performed.
656*87a7269eSachartre  *	devid		- the return address of the device ID.
657*87a7269eSachartre  *
658*87a7269eSachartre  * Return Code:
659*87a7269eSachartre  *	0		- success
660*87a7269eSachartre  *	EIO		- I/O error while trying to access the disk image
661*87a7269eSachartre  *	EINVAL		- no valid device id was found
662*87a7269eSachartre  *	ENOSPC		- disk has no space to store a device id
663*87a7269eSachartre  */
664*87a7269eSachartre static int
665*87a7269eSachartre vd_file_read_devid(vd_t *vd, ddi_devid_t *devid)
666*87a7269eSachartre {
667*87a7269eSachartre 	struct dk_devid *dkdevid;
668*87a7269eSachartre 	size_t blk;
669*87a7269eSachartre 	uint_t chksum;
670*87a7269eSachartre 	int status, sz;
671*87a7269eSachartre 
672*87a7269eSachartre 	if ((status = vd_file_get_devid_block(vd, &blk)) != 0)
673*87a7269eSachartre 		return (status);
674*87a7269eSachartre 
675*87a7269eSachartre 	dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP);
676*87a7269eSachartre 
677*87a7269eSachartre 	/* get the devid */
678*87a7269eSachartre 	if ((vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BREAD, (caddr_t)dkdevid, blk,
679*87a7269eSachartre 	    DEV_BSIZE)) < 0) {
680*87a7269eSachartre 		PR0("error reading devid block at %lu", blk);
681*87a7269eSachartre 		status = EIO;
682*87a7269eSachartre 		goto done;
683*87a7269eSachartre 	}
684*87a7269eSachartre 
685*87a7269eSachartre 	/* validate the revision */
686*87a7269eSachartre 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
687*87a7269eSachartre 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
688*87a7269eSachartre 		PR0("invalid devid found at block %lu (bad revision)", blk);
689*87a7269eSachartre 		status = EINVAL;
690*87a7269eSachartre 		goto done;
691*87a7269eSachartre 	}
692*87a7269eSachartre 
693*87a7269eSachartre 	/* compute checksum */
694*87a7269eSachartre 	chksum = vd_dkdevid2cksum(dkdevid);
695*87a7269eSachartre 
696*87a7269eSachartre 	/* compare the checksums */
697*87a7269eSachartre 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
698*87a7269eSachartre 		PR0("invalid devid found at block %lu (bad checksum)", blk);
699*87a7269eSachartre 		status = EINVAL;
700*87a7269eSachartre 		goto done;
701*87a7269eSachartre 	}
702*87a7269eSachartre 
703*87a7269eSachartre 	/* validate the device id */
704*87a7269eSachartre 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
705*87a7269eSachartre 		PR0("invalid devid found at block %lu", blk);
706*87a7269eSachartre 		status = EINVAL;
707*87a7269eSachartre 		goto done;
708*87a7269eSachartre 	}
709*87a7269eSachartre 
710*87a7269eSachartre 	PR1("devid read at block %lu", blk);
711*87a7269eSachartre 
712*87a7269eSachartre 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
713*87a7269eSachartre 	*devid = kmem_alloc(sz, KM_SLEEP);
714*87a7269eSachartre 	bcopy(&dkdevid->dkd_devid, *devid, sz);
715*87a7269eSachartre 
716*87a7269eSachartre done:
717*87a7269eSachartre 	kmem_free(dkdevid, DEV_BSIZE);
718*87a7269eSachartre 	return (status);
719*87a7269eSachartre 
720*87a7269eSachartre }
721*87a7269eSachartre 
722*87a7269eSachartre /*
723*87a7269eSachartre  * Function:
724*87a7269eSachartre  *	vd_file_write_devid
725*87a7269eSachartre  *
726*87a7269eSachartre  * Description:
727*87a7269eSachartre  *	Write a device id into disk image.
728*87a7269eSachartre  *
729*87a7269eSachartre  * Parameters:
730*87a7269eSachartre  *	vd		- disk on which the operation is performed.
731*87a7269eSachartre  *	devid		- the device ID to store.
732*87a7269eSachartre  *
733*87a7269eSachartre  * Return Code:
734*87a7269eSachartre  *	0		- success
735*87a7269eSachartre  *	EIO		- I/O error while trying to access the disk image
736*87a7269eSachartre  *	ENOSPC		- disk has no space to store a device id
737*87a7269eSachartre  */
738*87a7269eSachartre static int
739*87a7269eSachartre vd_file_write_devid(vd_t *vd, ddi_devid_t devid)
740*87a7269eSachartre {
741*87a7269eSachartre 	struct dk_devid *dkdevid;
742*87a7269eSachartre 	uint_t chksum;
743*87a7269eSachartre 	size_t blk;
744*87a7269eSachartre 	int status;
745*87a7269eSachartre 
746*87a7269eSachartre 	if ((status = vd_file_get_devid_block(vd, &blk)) != 0)
747*87a7269eSachartre 		return (status);
748*87a7269eSachartre 
749*87a7269eSachartre 	dkdevid = kmem_zalloc(DEV_BSIZE, KM_SLEEP);
750*87a7269eSachartre 
751*87a7269eSachartre 	/* set revision */
752*87a7269eSachartre 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
753*87a7269eSachartre 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
754*87a7269eSachartre 
755*87a7269eSachartre 	/* copy devid */
756*87a7269eSachartre 	bcopy(devid, &dkdevid->dkd_devid, ddi_devid_sizeof(devid));
757*87a7269eSachartre 
758*87a7269eSachartre 	/* compute checksum */
759*87a7269eSachartre 	chksum = vd_dkdevid2cksum(dkdevid);
760*87a7269eSachartre 
761*87a7269eSachartre 	/* set checksum */
762*87a7269eSachartre 	DKD_FORMCHKSUM(chksum, dkdevid);
763*87a7269eSachartre 
764*87a7269eSachartre 	/* store the devid */
765*87a7269eSachartre 	if ((status = vd_file_rw(vd, VD_SLICE_NONE, VD_OP_BWRITE,
766*87a7269eSachartre 	    (caddr_t)dkdevid, blk, DEV_BSIZE)) < 0) {
767*87a7269eSachartre 		PR0("Error writing devid block at %lu", blk);
768*87a7269eSachartre 		status = EIO;
769*87a7269eSachartre 	} else {
770*87a7269eSachartre 		PR1("devid written at block %lu", blk);
771*87a7269eSachartre 		status = 0;
772*87a7269eSachartre 	}
773*87a7269eSachartre 
774*87a7269eSachartre 	kmem_free(dkdevid, DEV_BSIZE);
775*87a7269eSachartre 	return (status);
776*87a7269eSachartre }
777*87a7269eSachartre 
778*87a7269eSachartre /*
779*87a7269eSachartre  * Function:
780*87a7269eSachartre  *	vd_scsi_rdwr
781*87a7269eSachartre  *
782*87a7269eSachartre  * Description:
783*87a7269eSachartre  * 	Read or write to a SCSI disk using an absolute disk offset.
784*87a7269eSachartre  *
785*87a7269eSachartre  * Parameters:
786*87a7269eSachartre  *	vd		- disk on which the operation is performed.
787*87a7269eSachartre  *	operation	- operation to execute: read (VD_OP_BREAD) or
788*87a7269eSachartre  *			  write (VD_OP_BWRITE).
789*87a7269eSachartre  *	data		- buffer where data are read to or written from.
790*87a7269eSachartre  *	blk		- starting block for the operation.
791*87a7269eSachartre  *	len		- number of bytes to read or write.
792*87a7269eSachartre  *
793*87a7269eSachartre  * Return Code:
794*87a7269eSachartre  *	0		- success
795*87a7269eSachartre  *	n != 0		- error.
796*87a7269eSachartre  */
797*87a7269eSachartre static int
798*87a7269eSachartre vd_scsi_rdwr(vd_t *vd, int operation, caddr_t data, size_t blk, size_t len)
799*87a7269eSachartre {
800*87a7269eSachartre 	struct uscsi_cmd ucmd;
801*87a7269eSachartre 	union scsi_cdb cdb;
802*87a7269eSachartre 	int nsectors, nblk;
803*87a7269eSachartre 	int max_sectors;
804*87a7269eSachartre 	int status, rval;
805*87a7269eSachartre 
806*87a7269eSachartre 	ASSERT(!vd->file);
807*87a7269eSachartre 
808*87a7269eSachartre 	max_sectors = vd->max_xfer_sz;
809*87a7269eSachartre 	nblk = (len / DEV_BSIZE);
810*87a7269eSachartre 
811*87a7269eSachartre 	if (len % DEV_BSIZE != 0)
812*87a7269eSachartre 		return (EINVAL);
813*87a7269eSachartre 
814*87a7269eSachartre 	/*
815*87a7269eSachartre 	 * Build and execute the uscsi ioctl.  We build a group0, group1
816*87a7269eSachartre 	 * or group4 command as necessary, since some targets
817*87a7269eSachartre 	 * do not support group1 commands.
818*87a7269eSachartre 	 */
819*87a7269eSachartre 	while (nblk) {
820*87a7269eSachartre 
821*87a7269eSachartre 		bzero(&ucmd, sizeof (ucmd));
822*87a7269eSachartre 		bzero(&cdb, sizeof (cdb));
823*87a7269eSachartre 
824*87a7269eSachartre 		nsectors = (max_sectors < nblk) ? max_sectors : nblk;
825*87a7269eSachartre 
826*87a7269eSachartre 		if (blk < (2 << 20) && nsectors <= 0xff) {
827*87a7269eSachartre 			FORMG0ADDR(&cdb, blk);
828*87a7269eSachartre 			FORMG0COUNT(&cdb, nsectors);
829*87a7269eSachartre 			ucmd.uscsi_cdblen = CDB_GROUP0;
830*87a7269eSachartre 		} else if (blk > 0xffffffff) {
831*87a7269eSachartre 			FORMG4LONGADDR(&cdb, blk);
832*87a7269eSachartre 			FORMG4COUNT(&cdb, nsectors);
833*87a7269eSachartre 			ucmd.uscsi_cdblen = CDB_GROUP4;
834*87a7269eSachartre 			cdb.scc_cmd |= SCMD_GROUP4;
835*87a7269eSachartre 		} else {
836*87a7269eSachartre 			FORMG1ADDR(&cdb, blk);
837*87a7269eSachartre 			FORMG1COUNT(&cdb, nsectors);
838*87a7269eSachartre 			ucmd.uscsi_cdblen = CDB_GROUP1;
839*87a7269eSachartre 			cdb.scc_cmd |= SCMD_GROUP1;
840*87a7269eSachartre 		}
841*87a7269eSachartre 
842*87a7269eSachartre 		ucmd.uscsi_cdb = (caddr_t)&cdb;
843*87a7269eSachartre 		ucmd.uscsi_bufaddr = data;
844*87a7269eSachartre 		ucmd.uscsi_buflen = nsectors * DEV_BSIZE;
845*87a7269eSachartre 		ucmd.uscsi_timeout = vd_scsi_rdwr_timeout;
846*87a7269eSachartre 		/*
847*87a7269eSachartre 		 * Set flags so that the command is isolated from normal
848*87a7269eSachartre 		 * commands and no error message is printed.
849*87a7269eSachartre 		 */
850*87a7269eSachartre 		ucmd.uscsi_flags = USCSI_ISOLATE | USCSI_SILENT;
851*87a7269eSachartre 
852*87a7269eSachartre 		if (operation == VD_OP_BREAD) {
853*87a7269eSachartre 			cdb.scc_cmd |= SCMD_READ;
854*87a7269eSachartre 			ucmd.uscsi_flags |= USCSI_READ;
855*87a7269eSachartre 		} else {
856*87a7269eSachartre 			cdb.scc_cmd |= SCMD_WRITE;
857*87a7269eSachartre 		}
858*87a7269eSachartre 
859*87a7269eSachartre 		status = ldi_ioctl(vd->ldi_handle[VD_ENTIRE_DISK_SLICE],
860*87a7269eSachartre 		    USCSICMD, (intptr_t)&ucmd, (vd_open_flags | FKIOCTL),
861*87a7269eSachartre 		    kcred, &rval);
862*87a7269eSachartre 
863*87a7269eSachartre 		if (status == 0)
864*87a7269eSachartre 			status = ucmd.uscsi_status;
865*87a7269eSachartre 
866*87a7269eSachartre 		if (status != 0)
867*87a7269eSachartre 			break;
868*87a7269eSachartre 
869*87a7269eSachartre 		/*
870*87a7269eSachartre 		 * Check if partial DMA breakup is required. If so, reduce
871*87a7269eSachartre 		 * the request size by half and retry the last request.
872*87a7269eSachartre 		 */
873*87a7269eSachartre 		if (ucmd.uscsi_resid == ucmd.uscsi_buflen) {
874*87a7269eSachartre 			max_sectors >>= 1;
875*87a7269eSachartre 			if (max_sectors <= 0) {
876*87a7269eSachartre 				status = EIO;
877*87a7269eSachartre 				break;
878*87a7269eSachartre 			}
879*87a7269eSachartre 			continue;
880*87a7269eSachartre 		}
881*87a7269eSachartre 
882*87a7269eSachartre 		if (ucmd.uscsi_resid != 0) {
883*87a7269eSachartre 			status = EIO;
884*87a7269eSachartre 			break;
885*87a7269eSachartre 		}
886*87a7269eSachartre 
887*87a7269eSachartre 		blk += nsectors;
888*87a7269eSachartre 		nblk -= nsectors;
889*87a7269eSachartre 		data += nsectors * DEV_BSIZE; /* SECSIZE */
890*87a7269eSachartre 	}
891*87a7269eSachartre 
892*87a7269eSachartre 	return (status);
893*87a7269eSachartre }
894*87a7269eSachartre 
8951ae08745Sheppo static int
896d10e4ef2Snarayan vd_start_bio(vd_task_t *task)
8971ae08745Sheppo {
8984bac2208Snarayan 	int			rv, status = 0;
899d10e4ef2Snarayan 	vd_t			*vd		= task->vd;
900d10e4ef2Snarayan 	vd_dring_payload_t	*request	= task->request;
901d10e4ef2Snarayan 	struct buf		*buf		= &task->buf;
9024bac2208Snarayan 	uint8_t			mtype;
9033c96341aSnarayan 	int 			slice;
904d10e4ef2Snarayan 
905d10e4ef2Snarayan 	ASSERT(vd != NULL);
906d10e4ef2Snarayan 	ASSERT(request != NULL);
9073c96341aSnarayan 
9083c96341aSnarayan 	slice = request->slice;
9093c96341aSnarayan 
910*87a7269eSachartre 	ASSERT(slice == VD_SLICE_NONE || slice < vd->nslices);
911d10e4ef2Snarayan 	ASSERT((request->operation == VD_OP_BREAD) ||
912d10e4ef2Snarayan 	    (request->operation == VD_OP_BWRITE));
913d10e4ef2Snarayan 
9141ae08745Sheppo 	if (request->nbytes == 0)
9151ae08745Sheppo 		return (EINVAL);	/* no service for trivial requests */
9161ae08745Sheppo 
917d10e4ef2Snarayan 	PR1("%s %lu bytes at block %lu",
918d10e4ef2Snarayan 	    (request->operation == VD_OP_BREAD) ? "Read" : "Write",
919d10e4ef2Snarayan 	    request->nbytes, request->addr);
9201ae08745Sheppo 
921d10e4ef2Snarayan 	bioinit(buf);
922d10e4ef2Snarayan 	buf->b_flags		= B_BUSY;
923d10e4ef2Snarayan 	buf->b_bcount		= request->nbytes;
924d10e4ef2Snarayan 	buf->b_lblkno		= request->addr;
925*87a7269eSachartre 	buf->b_edev = (slice == VD_SLICE_NONE)? NODEV : vd->dev[slice];
926d10e4ef2Snarayan 
9274bac2208Snarayan 	mtype = (&vd->inband_task == task) ? LDC_SHADOW_MAP : LDC_DIRECT_MAP;
9284bac2208Snarayan 
9294bac2208Snarayan 	/* Map memory exported by client */
9304bac2208Snarayan 	status = ldc_mem_map(task->mhdl, request->cookie, request->ncookies,
9314bac2208Snarayan 	    mtype, (request->operation == VD_OP_BREAD) ? LDC_MEM_W : LDC_MEM_R,
9324bac2208Snarayan 	    &(buf->b_un.b_addr), NULL);
9334bac2208Snarayan 	if (status != 0) {
9343af08d82Slm66018 		PR0("ldc_mem_map() returned err %d ", status);
9354bac2208Snarayan 		biofini(buf);
9364bac2208Snarayan 		return (status);
937d10e4ef2Snarayan 	}
938d10e4ef2Snarayan 
9394bac2208Snarayan 	status = ldc_mem_acquire(task->mhdl, 0, buf->b_bcount);
9404bac2208Snarayan 	if (status != 0) {
9414bac2208Snarayan 		(void) ldc_mem_unmap(task->mhdl);
9423af08d82Slm66018 		PR0("ldc_mem_acquire() returned err %d ", status);
9434bac2208Snarayan 		biofini(buf);
9444bac2208Snarayan 		return (status);
9454bac2208Snarayan 	}
9464bac2208Snarayan 
9474bac2208Snarayan 	buf->b_flags |= (request->operation == VD_OP_BREAD) ? B_READ : B_WRITE;
9484bac2208Snarayan 
949d10e4ef2Snarayan 	/* Start the block I/O */
9503c96341aSnarayan 	if (vd->file) {
951690555a1Sachartre 		rv = vd_file_rw(vd, slice, request->operation, buf->b_un.b_addr,
952690555a1Sachartre 		    request->addr, request->nbytes);
953690555a1Sachartre 		if (rv < 0) {
9543c96341aSnarayan 			request->nbytes = 0;
9553c96341aSnarayan 			status = EIO;
956690555a1Sachartre 		} else {
957690555a1Sachartre 			request->nbytes = rv;
958690555a1Sachartre 			status = 0;
9593c96341aSnarayan 		}
9603c96341aSnarayan 	} else {
961*87a7269eSachartre 		if (slice == VD_SLICE_NONE) {
962*87a7269eSachartre 			/*
963*87a7269eSachartre 			 * This is not a disk image so it is a real disk. We
964*87a7269eSachartre 			 * assume that the underlying device driver supports
965*87a7269eSachartre 			 * USCSICMD ioctls. This is the case of all SCSI devices
966*87a7269eSachartre 			 * (sd, ssd...).
967*87a7269eSachartre 			 *
968*87a7269eSachartre 			 * In the future if we have non-SCSI disks we would need
969*87a7269eSachartre 			 * to invoke the appropriate function to do I/O using an
970*87a7269eSachartre 			 * absolute disk offset (for example using DKIOCTL_RWCMD
971*87a7269eSachartre 			 * for IDE disks).
972*87a7269eSachartre 			 */
973*87a7269eSachartre 			rv = vd_scsi_rdwr(vd, request->operation,
974*87a7269eSachartre 			    buf->b_un.b_addr, request->addr, request->nbytes);
975*87a7269eSachartre 			if (rv != 0) {
976*87a7269eSachartre 				request->nbytes = 0;
977*87a7269eSachartre 				status = EIO;
978*87a7269eSachartre 			} else {
979*87a7269eSachartre 				status = 0;
980*87a7269eSachartre 			}
981*87a7269eSachartre 		} else {
9823c96341aSnarayan 			status = ldi_strategy(vd->ldi_handle[slice], buf);
9833c96341aSnarayan 			if (status == 0)
984*87a7269eSachartre 				/* will complete on completionq */
985*87a7269eSachartre 				return (EINPROGRESS);
986*87a7269eSachartre 		}
9873c96341aSnarayan 	}
9883c96341aSnarayan 
989d10e4ef2Snarayan 	/* Clean up after error */
9904bac2208Snarayan 	rv = ldc_mem_release(task->mhdl, 0, buf->b_bcount);
9914bac2208Snarayan 	if (rv) {
9923af08d82Slm66018 		PR0("ldc_mem_release() returned err %d ", rv);
9934bac2208Snarayan 	}
9944bac2208Snarayan 	rv = ldc_mem_unmap(task->mhdl);
9954bac2208Snarayan 	if (rv) {
9963af08d82Slm66018 		PR0("ldc_mem_unmap() returned err %d ", status);
9974bac2208Snarayan 	}
9984bac2208Snarayan 
999d10e4ef2Snarayan 	biofini(buf);
1000d10e4ef2Snarayan 	return (status);
1001d10e4ef2Snarayan }
1002d10e4ef2Snarayan 
1003d10e4ef2Snarayan static int
1004d10e4ef2Snarayan send_msg(ldc_handle_t ldc_handle, void *msg, size_t msglen)
1005d10e4ef2Snarayan {
10063af08d82Slm66018 	int	status;
1007d10e4ef2Snarayan 	size_t	nbytes;
1008d10e4ef2Snarayan 
10093af08d82Slm66018 	do {
1010d10e4ef2Snarayan 		nbytes = msglen;
1011d10e4ef2Snarayan 		status = ldc_write(ldc_handle, msg, &nbytes);
10123af08d82Slm66018 		if (status != EWOULDBLOCK)
10133af08d82Slm66018 			break;
10143af08d82Slm66018 		drv_usecwait(vds_ldc_delay);
10153af08d82Slm66018 	} while (status == EWOULDBLOCK);
1016d10e4ef2Snarayan 
1017d10e4ef2Snarayan 	if (status != 0) {
10183af08d82Slm66018 		if (status != ECONNRESET)
10193af08d82Slm66018 			PR0("ldc_write() returned errno %d", status);
1020d10e4ef2Snarayan 		return (status);
1021d10e4ef2Snarayan 	} else if (nbytes != msglen) {
10223af08d82Slm66018 		PR0("ldc_write() performed only partial write");
1023d10e4ef2Snarayan 		return (EIO);
1024d10e4ef2Snarayan 	}
1025d10e4ef2Snarayan 
1026d10e4ef2Snarayan 	PR1("SENT %lu bytes", msglen);
1027d10e4ef2Snarayan 	return (0);
1028d10e4ef2Snarayan }
1029d10e4ef2Snarayan 
1030d10e4ef2Snarayan static void
1031d10e4ef2Snarayan vd_need_reset(vd_t *vd, boolean_t reset_ldc)
1032d10e4ef2Snarayan {
1033d10e4ef2Snarayan 	mutex_enter(&vd->lock);
1034d10e4ef2Snarayan 	vd->reset_state	= B_TRUE;
1035d10e4ef2Snarayan 	vd->reset_ldc	= reset_ldc;
1036d10e4ef2Snarayan 	mutex_exit(&vd->lock);
1037d10e4ef2Snarayan }
1038d10e4ef2Snarayan 
1039d10e4ef2Snarayan /*
1040d10e4ef2Snarayan  * Reset the state of the connection with a client, if needed; reset the LDC
1041d10e4ef2Snarayan  * transport as well, if needed.  This function should only be called from the
10423af08d82Slm66018  * "vd_recv_msg", as it waits for tasks - otherwise a deadlock can occur.
1043d10e4ef2Snarayan  */
1044d10e4ef2Snarayan static void
1045d10e4ef2Snarayan vd_reset_if_needed(vd_t *vd)
1046d10e4ef2Snarayan {
1047d10e4ef2Snarayan 	int	status = 0;
1048d10e4ef2Snarayan 
1049d10e4ef2Snarayan 	mutex_enter(&vd->lock);
1050d10e4ef2Snarayan 	if (!vd->reset_state) {
1051d10e4ef2Snarayan 		ASSERT(!vd->reset_ldc);
1052d10e4ef2Snarayan 		mutex_exit(&vd->lock);
1053d10e4ef2Snarayan 		return;
1054d10e4ef2Snarayan 	}
1055d10e4ef2Snarayan 	mutex_exit(&vd->lock);
1056d10e4ef2Snarayan 
1057d10e4ef2Snarayan 	PR0("Resetting connection state with %s", VD_CLIENT(vd));
1058d10e4ef2Snarayan 
1059d10e4ef2Snarayan 	/*
1060d10e4ef2Snarayan 	 * Let any asynchronous I/O complete before possibly pulling the rug
1061d10e4ef2Snarayan 	 * out from under it; defer checking vd->reset_ldc, as one of the
1062d10e4ef2Snarayan 	 * asynchronous tasks might set it
1063d10e4ef2Snarayan 	 */
1064d10e4ef2Snarayan 	ddi_taskq_wait(vd->completionq);
1065d10e4ef2Snarayan 
10663c96341aSnarayan 	if (vd->file) {
10673c96341aSnarayan 		status = VOP_FSYNC(vd->file_vnode, FSYNC, kcred);
10683c96341aSnarayan 		if (status) {
10693c96341aSnarayan 			PR0("VOP_FSYNC returned errno %d", status);
10703c96341aSnarayan 		}
10713c96341aSnarayan 	}
10723c96341aSnarayan 
1073d10e4ef2Snarayan 	if ((vd->initialized & VD_DRING) &&
1074d10e4ef2Snarayan 	    ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0))
10753af08d82Slm66018 		PR0("ldc_mem_dring_unmap() returned errno %d", status);
1076d10e4ef2Snarayan 
10773af08d82Slm66018 	vd_free_dring_task(vd);
10783af08d82Slm66018 
10793af08d82Slm66018 	/* Free the staging buffer for msgs */
10803af08d82Slm66018 	if (vd->vio_msgp != NULL) {
10813af08d82Slm66018 		kmem_free(vd->vio_msgp, vd->max_msglen);
10823af08d82Slm66018 		vd->vio_msgp = NULL;
1083d10e4ef2Snarayan 	}
1084d10e4ef2Snarayan 
10853af08d82Slm66018 	/* Free the inband message buffer */
10863af08d82Slm66018 	if (vd->inband_task.msg != NULL) {
10873af08d82Slm66018 		kmem_free(vd->inband_task.msg, vd->max_msglen);
10883af08d82Slm66018 		vd->inband_task.msg = NULL;
10893af08d82Slm66018 	}
1090d10e4ef2Snarayan 
1091d10e4ef2Snarayan 	mutex_enter(&vd->lock);
10923af08d82Slm66018 
10933af08d82Slm66018 	if (vd->reset_ldc)
10943af08d82Slm66018 		PR0("taking down LDC channel");
1095e1ebb9ecSlm66018 	if (vd->reset_ldc && ((status = ldc_down(vd->ldc_handle)) != 0))
10963af08d82Slm66018 		PR0("ldc_down() returned errno %d", status);
1097d10e4ef2Snarayan 
1098d10e4ef2Snarayan 	vd->initialized	&= ~(VD_SID | VD_SEQ_NUM | VD_DRING);
1099d10e4ef2Snarayan 	vd->state	= VD_STATE_INIT;
1100d10e4ef2Snarayan 	vd->max_msglen	= sizeof (vio_msg_t);	/* baseline vio message size */
1101d10e4ef2Snarayan 
11023af08d82Slm66018 	/* Allocate the staging buffer */
11033af08d82Slm66018 	vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP);
11043af08d82Slm66018 
11053af08d82Slm66018 	PR0("calling ldc_up\n");
11063af08d82Slm66018 	(void) ldc_up(vd->ldc_handle);
11073af08d82Slm66018 
1108d10e4ef2Snarayan 	vd->reset_state	= B_FALSE;
1109d10e4ef2Snarayan 	vd->reset_ldc	= B_FALSE;
11103af08d82Slm66018 
1111d10e4ef2Snarayan 	mutex_exit(&vd->lock);
1112d10e4ef2Snarayan }
1113d10e4ef2Snarayan 
11143af08d82Slm66018 static void vd_recv_msg(void *arg);
11153af08d82Slm66018 
11163af08d82Slm66018 static void
11173af08d82Slm66018 vd_mark_in_reset(vd_t *vd)
11183af08d82Slm66018 {
11193af08d82Slm66018 	int status;
11203af08d82Slm66018 
11213af08d82Slm66018 	PR0("vd_mark_in_reset: marking vd in reset\n");
11223af08d82Slm66018 
11233af08d82Slm66018 	vd_need_reset(vd, B_FALSE);
11243af08d82Slm66018 	status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd, DDI_SLEEP);
11253af08d82Slm66018 	if (status == DDI_FAILURE) {
11263af08d82Slm66018 		PR0("cannot schedule task to recv msg\n");
11273af08d82Slm66018 		vd_need_reset(vd, B_TRUE);
11283af08d82Slm66018 		return;
11293af08d82Slm66018 	}
11303af08d82Slm66018 }
11313af08d82Slm66018 
1132d10e4ef2Snarayan static int
11333c96341aSnarayan vd_mark_elem_done(vd_t *vd, int idx, int elem_status, int elem_nbytes)
1134d10e4ef2Snarayan {
1135d10e4ef2Snarayan 	boolean_t		accepted;
1136d10e4ef2Snarayan 	int			status;
1137d10e4ef2Snarayan 	vd_dring_entry_t	*elem = VD_DRING_ELEM(idx);
1138d10e4ef2Snarayan 
11393af08d82Slm66018 	if (vd->reset_state)
11403af08d82Slm66018 		return (0);
1141d10e4ef2Snarayan 
1142d10e4ef2Snarayan 	/* Acquire the element */
11433af08d82Slm66018 	if (!vd->reset_state &&
11443af08d82Slm66018 	    (status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) {
11453af08d82Slm66018 		if (status == ECONNRESET) {
11463af08d82Slm66018 			vd_mark_in_reset(vd);
11473af08d82Slm66018 			return (0);
11483af08d82Slm66018 		} else {
11493af08d82Slm66018 			PR0("ldc_mem_dring_acquire() returned errno %d",
11503af08d82Slm66018 			    status);
1151d10e4ef2Snarayan 			return (status);
1152d10e4ef2Snarayan 		}
11533af08d82Slm66018 	}
1154d10e4ef2Snarayan 
1155d10e4ef2Snarayan 	/* Set the element's status and mark it done */
1156d10e4ef2Snarayan 	accepted = (elem->hdr.dstate == VIO_DESC_ACCEPTED);
1157d10e4ef2Snarayan 	if (accepted) {
11583c96341aSnarayan 		elem->payload.nbytes	= elem_nbytes;
1159d10e4ef2Snarayan 		elem->payload.status	= elem_status;
1160d10e4ef2Snarayan 		elem->hdr.dstate	= VIO_DESC_DONE;
1161d10e4ef2Snarayan 	} else {
1162d10e4ef2Snarayan 		/* Perhaps client timed out waiting for I/O... */
11633af08d82Slm66018 		PR0("element %u no longer \"accepted\"", idx);
1164d10e4ef2Snarayan 		VD_DUMP_DRING_ELEM(elem);
1165d10e4ef2Snarayan 	}
1166d10e4ef2Snarayan 	/* Release the element */
11673af08d82Slm66018 	if (!vd->reset_state &&
11683af08d82Slm66018 	    (status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) {
11693af08d82Slm66018 		if (status == ECONNRESET) {
11703af08d82Slm66018 			vd_mark_in_reset(vd);
11713af08d82Slm66018 			return (0);
11723af08d82Slm66018 		} else {
11733af08d82Slm66018 			PR0("ldc_mem_dring_release() returned errno %d",
11743af08d82Slm66018 			    status);
1175d10e4ef2Snarayan 			return (status);
1176d10e4ef2Snarayan 		}
11773af08d82Slm66018 	}
1178d10e4ef2Snarayan 
1179d10e4ef2Snarayan 	return (accepted ? 0 : EINVAL);
1180d10e4ef2Snarayan }
1181d10e4ef2Snarayan 
1182d10e4ef2Snarayan static void
1183d10e4ef2Snarayan vd_complete_bio(void *arg)
1184d10e4ef2Snarayan {
1185d10e4ef2Snarayan 	int			status		= 0;
1186d10e4ef2Snarayan 	vd_task_t		*task		= (vd_task_t *)arg;
1187d10e4ef2Snarayan 	vd_t			*vd		= task->vd;
1188d10e4ef2Snarayan 	vd_dring_payload_t	*request	= task->request;
1189d10e4ef2Snarayan 	struct buf		*buf		= &task->buf;
1190d10e4ef2Snarayan 
1191d10e4ef2Snarayan 
1192d10e4ef2Snarayan 	ASSERT(vd != NULL);
1193d10e4ef2Snarayan 	ASSERT(request != NULL);
1194d10e4ef2Snarayan 	ASSERT(task->msg != NULL);
1195d10e4ef2Snarayan 	ASSERT(task->msglen >= sizeof (*task->msg));
11963c96341aSnarayan 	ASSERT(!vd->file);
1197d10e4ef2Snarayan 
1198d10e4ef2Snarayan 	/* Wait for the I/O to complete */
1199d10e4ef2Snarayan 	request->status = biowait(buf);
1200d10e4ef2Snarayan 
12013c96341aSnarayan 	/* return back the number of bytes read/written */
12023c96341aSnarayan 	request->nbytes = buf->b_bcount - buf->b_resid;
12033c96341aSnarayan 
12044bac2208Snarayan 	/* Release the buffer */
12053af08d82Slm66018 	if (!vd->reset_state)
12064bac2208Snarayan 		status = ldc_mem_release(task->mhdl, 0, buf->b_bcount);
12074bac2208Snarayan 	if (status) {
12083af08d82Slm66018 		PR0("ldc_mem_release() returned errno %d copying to "
12093af08d82Slm66018 		    "client", status);
12103af08d82Slm66018 		if (status == ECONNRESET) {
12113af08d82Slm66018 			vd_mark_in_reset(vd);
12123af08d82Slm66018 		}
12131ae08745Sheppo 	}
12141ae08745Sheppo 
12153af08d82Slm66018 	/* Unmap the memory, even if in reset */
12164bac2208Snarayan 	status = ldc_mem_unmap(task->mhdl);
12174bac2208Snarayan 	if (status) {
12183af08d82Slm66018 		PR0("ldc_mem_unmap() returned errno %d copying to client",
12194bac2208Snarayan 		    status);
12203af08d82Slm66018 		if (status == ECONNRESET) {
12213af08d82Slm66018 			vd_mark_in_reset(vd);
12223af08d82Slm66018 		}
12234bac2208Snarayan 	}
12244bac2208Snarayan 
1225d10e4ef2Snarayan 	biofini(buf);
12261ae08745Sheppo 
1227d10e4ef2Snarayan 	/* Update the dring element for a dring client */
12283af08d82Slm66018 	if (!vd->reset_state && (status == 0) &&
12293af08d82Slm66018 	    (vd->xfer_mode == VIO_DRING_MODE)) {
12303c96341aSnarayan 		status = vd_mark_elem_done(vd, task->index,
12313c96341aSnarayan 		    request->status, request->nbytes);
12323af08d82Slm66018 		if (status == ECONNRESET)
12333af08d82Slm66018 			vd_mark_in_reset(vd);
12343af08d82Slm66018 	}
12351ae08745Sheppo 
1236d10e4ef2Snarayan 	/*
1237d10e4ef2Snarayan 	 * If a transport error occurred, arrange to "nack" the message when
1238d10e4ef2Snarayan 	 * the final task in the descriptor element range completes
1239d10e4ef2Snarayan 	 */
1240d10e4ef2Snarayan 	if (status != 0)
1241d10e4ef2Snarayan 		task->msg->tag.vio_subtype = VIO_SUBTYPE_NACK;
12421ae08745Sheppo 
1243d10e4ef2Snarayan 	/*
1244d10e4ef2Snarayan 	 * Only the final task for a range of elements will respond to and
1245d10e4ef2Snarayan 	 * free the message
1246d10e4ef2Snarayan 	 */
12473af08d82Slm66018 	if (task->type == VD_NONFINAL_RANGE_TASK) {
1248d10e4ef2Snarayan 		return;
12493af08d82Slm66018 	}
12501ae08745Sheppo 
1251d10e4ef2Snarayan 	/*
1252d10e4ef2Snarayan 	 * Send the "ack" or "nack" back to the client; if sending the message
1253d10e4ef2Snarayan 	 * via LDC fails, arrange to reset both the connection state and LDC
1254d10e4ef2Snarayan 	 * itself
1255d10e4ef2Snarayan 	 */
1256d10e4ef2Snarayan 	PR1("Sending %s",
1257d10e4ef2Snarayan 	    (task->msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK");
12583af08d82Slm66018 	if (!vd->reset_state) {
12593af08d82Slm66018 		status = send_msg(vd->ldc_handle, task->msg, task->msglen);
12603af08d82Slm66018 		switch (status) {
12613af08d82Slm66018 		case 0:
12623af08d82Slm66018 			break;
12633af08d82Slm66018 		case ECONNRESET:
12643af08d82Slm66018 			vd_mark_in_reset(vd);
12653af08d82Slm66018 			break;
12663af08d82Slm66018 		default:
12673af08d82Slm66018 			PR0("initiating full reset");
1268d10e4ef2Snarayan 			vd_need_reset(vd, B_TRUE);
12693af08d82Slm66018 			break;
12703af08d82Slm66018 		}
12713af08d82Slm66018 	}
12721ae08745Sheppo }
12731ae08745Sheppo 
12740a55fbb7Slm66018 static void
12750a55fbb7Slm66018 vd_geom2dk_geom(void *vd_buf, void *ioctl_arg)
12760a55fbb7Slm66018 {
12770a55fbb7Slm66018 	VD_GEOM2DK_GEOM((vd_geom_t *)vd_buf, (struct dk_geom *)ioctl_arg);
12780a55fbb7Slm66018 }
12790a55fbb7Slm66018 
12800a55fbb7Slm66018 static void
12810a55fbb7Slm66018 vd_vtoc2vtoc(void *vd_buf, void *ioctl_arg)
12820a55fbb7Slm66018 {
12830a55fbb7Slm66018 	VD_VTOC2VTOC((vd_vtoc_t *)vd_buf, (struct vtoc *)ioctl_arg);
12840a55fbb7Slm66018 }
12850a55fbb7Slm66018 
12860a55fbb7Slm66018 static void
12870a55fbb7Slm66018 dk_geom2vd_geom(void *ioctl_arg, void *vd_buf)
12880a55fbb7Slm66018 {
12890a55fbb7Slm66018 	DK_GEOM2VD_GEOM((struct dk_geom *)ioctl_arg, (vd_geom_t *)vd_buf);
12900a55fbb7Slm66018 }
12910a55fbb7Slm66018 
12920a55fbb7Slm66018 static void
12930a55fbb7Slm66018 vtoc2vd_vtoc(void *ioctl_arg, void *vd_buf)
12940a55fbb7Slm66018 {
12950a55fbb7Slm66018 	VTOC2VD_VTOC((struct vtoc *)ioctl_arg, (vd_vtoc_t *)vd_buf);
12960a55fbb7Slm66018 }
12970a55fbb7Slm66018 
12984bac2208Snarayan static void
12994bac2208Snarayan vd_get_efi_in(void *vd_buf, void *ioctl_arg)
13004bac2208Snarayan {
13014bac2208Snarayan 	vd_efi_t *vd_efi = (vd_efi_t *)vd_buf;
13024bac2208Snarayan 	dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg;
13034bac2208Snarayan 
13044bac2208Snarayan 	dk_efi->dki_lba = vd_efi->lba;
13054bac2208Snarayan 	dk_efi->dki_length = vd_efi->length;
13064bac2208Snarayan 	dk_efi->dki_data = kmem_zalloc(vd_efi->length, KM_SLEEP);
13074bac2208Snarayan }
13084bac2208Snarayan 
13094bac2208Snarayan static void
13104bac2208Snarayan vd_get_efi_out(void *ioctl_arg, void *vd_buf)
13114bac2208Snarayan {
13124bac2208Snarayan 	int len;
13134bac2208Snarayan 	vd_efi_t *vd_efi = (vd_efi_t *)vd_buf;
13144bac2208Snarayan 	dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg;
13154bac2208Snarayan 
13164bac2208Snarayan 	len = vd_efi->length;
13174bac2208Snarayan 	DK_EFI2VD_EFI(dk_efi, vd_efi);
13184bac2208Snarayan 	kmem_free(dk_efi->dki_data, len);
13194bac2208Snarayan }
13204bac2208Snarayan 
13214bac2208Snarayan static void
13224bac2208Snarayan vd_set_efi_in(void *vd_buf, void *ioctl_arg)
13234bac2208Snarayan {
13244bac2208Snarayan 	vd_efi_t *vd_efi = (vd_efi_t *)vd_buf;
13254bac2208Snarayan 	dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg;
13264bac2208Snarayan 
13274bac2208Snarayan 	dk_efi->dki_data = kmem_alloc(vd_efi->length, KM_SLEEP);
13284bac2208Snarayan 	VD_EFI2DK_EFI(vd_efi, dk_efi);
13294bac2208Snarayan }
13304bac2208Snarayan 
13314bac2208Snarayan static void
13324bac2208Snarayan vd_set_efi_out(void *ioctl_arg, void *vd_buf)
13334bac2208Snarayan {
13344bac2208Snarayan 	vd_efi_t *vd_efi = (vd_efi_t *)vd_buf;
13354bac2208Snarayan 	dk_efi_t *dk_efi = (dk_efi_t *)ioctl_arg;
13364bac2208Snarayan 
13374bac2208Snarayan 	kmem_free(dk_efi->dki_data, vd_efi->length);
13384bac2208Snarayan }
13394bac2208Snarayan 
13404bac2208Snarayan static int
13414bac2208Snarayan vd_read_vtoc(ldi_handle_t handle, struct vtoc *vtoc, vd_disk_label_t *label)
13424bac2208Snarayan {
13434bac2208Snarayan 	int status, rval;
13444bac2208Snarayan 	struct dk_gpt *efi;
13454bac2208Snarayan 	size_t efi_len;
13464bac2208Snarayan 
13474bac2208Snarayan 	*label = VD_DISK_LABEL_UNK;
13484bac2208Snarayan 
13494bac2208Snarayan 	status = ldi_ioctl(handle, DKIOCGVTOC, (intptr_t)vtoc,
13504bac2208Snarayan 	    (vd_open_flags | FKIOCTL), kcred, &rval);
13514bac2208Snarayan 
13524bac2208Snarayan 	if (status == 0) {
13534bac2208Snarayan 		*label = VD_DISK_LABEL_VTOC;
13544bac2208Snarayan 		return (0);
13554bac2208Snarayan 	} else if (status != ENOTSUP) {
13563af08d82Slm66018 		PR0("ldi_ioctl(DKIOCGVTOC) returned error %d", status);
13574bac2208Snarayan 		return (status);
13584bac2208Snarayan 	}
13594bac2208Snarayan 
13604bac2208Snarayan 	status = vds_efi_alloc_and_read(handle, &efi, &efi_len);
13614bac2208Snarayan 
13624bac2208Snarayan 	if (status) {
13633af08d82Slm66018 		PR0("vds_efi_alloc_and_read returned error %d", status);
13644bac2208Snarayan 		return (status);
13654bac2208Snarayan 	}
13664bac2208Snarayan 
13674bac2208Snarayan 	*label = VD_DISK_LABEL_EFI;
13684bac2208Snarayan 	vd_efi_to_vtoc(efi, vtoc);
13694bac2208Snarayan 	vd_efi_free(efi, efi_len);
13704bac2208Snarayan 
13714bac2208Snarayan 	return (0);
13724bac2208Snarayan }
13734bac2208Snarayan 
1374690555a1Sachartre static ushort_t
13753c96341aSnarayan vd_lbl2cksum(struct dk_label *label)
13763c96341aSnarayan {
13773c96341aSnarayan 	int	count;
1378690555a1Sachartre 	ushort_t sum, *sp;
13793c96341aSnarayan 
13803c96341aSnarayan 	count =	(sizeof (struct dk_label)) / (sizeof (short)) - 1;
1381690555a1Sachartre 	sp = (ushort_t *)label;
13823c96341aSnarayan 	sum = 0;
13833c96341aSnarayan 	while (count--) {
13843c96341aSnarayan 		sum ^= *sp++;
13853c96341aSnarayan 	}
13863c96341aSnarayan 
13873c96341aSnarayan 	return (sum);
13883c96341aSnarayan }
13893c96341aSnarayan 
1390*87a7269eSachartre /*
1391*87a7269eSachartre  * Handle ioctls to a disk slice.
1392*87a7269eSachartre  */
13931ae08745Sheppo static int
13940a55fbb7Slm66018 vd_do_slice_ioctl(vd_t *vd, int cmd, void *ioctl_arg)
13951ae08745Sheppo {
13964bac2208Snarayan 	dk_efi_t *dk_ioc;
13974bac2208Snarayan 
13984bac2208Snarayan 	switch (vd->vdisk_label) {
13994bac2208Snarayan 
1400*87a7269eSachartre 	/* ioctls for a slice from a disk with a VTOC label */
14014bac2208Snarayan 	case VD_DISK_LABEL_VTOC:
14024bac2208Snarayan 
14031ae08745Sheppo 		switch (cmd) {
14041ae08745Sheppo 		case DKIOCGGEOM:
14050a55fbb7Slm66018 			ASSERT(ioctl_arg != NULL);
14060a55fbb7Slm66018 			bcopy(&vd->dk_geom, ioctl_arg, sizeof (vd->dk_geom));
14071ae08745Sheppo 			return (0);
14081ae08745Sheppo 		case DKIOCGVTOC:
14090a55fbb7Slm66018 			ASSERT(ioctl_arg != NULL);
14100a55fbb7Slm66018 			bcopy(&vd->vtoc, ioctl_arg, sizeof (vd->vtoc));
14111ae08745Sheppo 			return (0);
1412*87a7269eSachartre 		default:
14133c96341aSnarayan 			return (ENOTSUP);
1414*87a7269eSachartre 		}
1415*87a7269eSachartre 
1416*87a7269eSachartre 	/* ioctls for a slice from a disk with an EFI label */
1417*87a7269eSachartre 	case VD_DISK_LABEL_EFI:
1418*87a7269eSachartre 
1419*87a7269eSachartre 		switch (cmd) {
1420*87a7269eSachartre 		case DKIOCGETEFI:
14213c96341aSnarayan 			ASSERT(ioctl_arg != NULL);
1422*87a7269eSachartre 			dk_ioc = (dk_efi_t *)ioctl_arg;
1423*87a7269eSachartre 			if (dk_ioc->dki_length < vd->dk_efi.dki_length)
1424*87a7269eSachartre 				return (EINVAL);
1425*87a7269eSachartre 			bcopy(vd->dk_efi.dki_data, dk_ioc->dki_data,
1426*87a7269eSachartre 			    vd->dk_efi.dki_length);
1427*87a7269eSachartre 			return (0);
1428*87a7269eSachartre 		default:
1429*87a7269eSachartre 			return (ENOTSUP);
1430*87a7269eSachartre 		}
1431*87a7269eSachartre 
1432*87a7269eSachartre 	default:
1433*87a7269eSachartre 		return (ENOTSUP);
1434*87a7269eSachartre 	}
1435*87a7269eSachartre }
1436*87a7269eSachartre 
1437*87a7269eSachartre /*
1438*87a7269eSachartre  * Handle ioctls to a disk image.
1439*87a7269eSachartre  */
1440*87a7269eSachartre static int
1441*87a7269eSachartre vd_do_file_ioctl(vd_t *vd, int cmd, void *ioctl_arg)
1442*87a7269eSachartre {
1443*87a7269eSachartre 	struct dk_label label;
1444*87a7269eSachartre 	struct dk_geom *geom;
1445*87a7269eSachartre 	struct vtoc *vtoc;
1446*87a7269eSachartre 	int i, rc;
1447*87a7269eSachartre 
1448*87a7269eSachartre 	ASSERT(vd->file);
1449*87a7269eSachartre 	ASSERT(vd->vdisk_label == VD_DISK_LABEL_VTOC);
1450*87a7269eSachartre 
1451*87a7269eSachartre 	switch (cmd) {
1452*87a7269eSachartre 
1453*87a7269eSachartre 	case DKIOCGGEOM:
1454*87a7269eSachartre 		ASSERT(ioctl_arg != NULL);
1455*87a7269eSachartre 		geom = (struct dk_geom *)ioctl_arg;
1456*87a7269eSachartre 
1457*87a7269eSachartre 		if (VD_FILE_LABEL_READ(vd, &label) < 0)
1458*87a7269eSachartre 			return (EIO);
1459*87a7269eSachartre 
1460*87a7269eSachartre 		if (label.dkl_magic != DKL_MAGIC ||
1461*87a7269eSachartre 		    label.dkl_cksum != vd_lbl2cksum(&label))
1462*87a7269eSachartre 			return (EINVAL);
1463*87a7269eSachartre 
1464*87a7269eSachartre 		bzero(geom, sizeof (struct dk_geom));
1465*87a7269eSachartre 		geom->dkg_ncyl = label.dkl_ncyl;
1466*87a7269eSachartre 		geom->dkg_acyl = label.dkl_acyl;
1467*87a7269eSachartre 		geom->dkg_nhead = label.dkl_nhead;
1468*87a7269eSachartre 		geom->dkg_nsect = label.dkl_nsect;
1469*87a7269eSachartre 		geom->dkg_intrlv = label.dkl_intrlv;
1470*87a7269eSachartre 		geom->dkg_apc = label.dkl_apc;
1471*87a7269eSachartre 		geom->dkg_rpm = label.dkl_rpm;
1472*87a7269eSachartre 		geom->dkg_pcyl = label.dkl_pcyl;
1473*87a7269eSachartre 		geom->dkg_write_reinstruct = label.dkl_write_reinstruct;
1474*87a7269eSachartre 		geom->dkg_read_reinstruct = label.dkl_read_reinstruct;
1475*87a7269eSachartre 
1476*87a7269eSachartre 		return (0);
1477*87a7269eSachartre 
1478*87a7269eSachartre 	case DKIOCGVTOC:
1479*87a7269eSachartre 		ASSERT(ioctl_arg != NULL);
1480*87a7269eSachartre 		vtoc = (struct vtoc *)ioctl_arg;
1481*87a7269eSachartre 
1482*87a7269eSachartre 		if (VD_FILE_LABEL_READ(vd, &label) < 0)
1483*87a7269eSachartre 			return (EIO);
1484*87a7269eSachartre 
1485*87a7269eSachartre 		if (label.dkl_magic != DKL_MAGIC ||
1486*87a7269eSachartre 		    label.dkl_cksum != vd_lbl2cksum(&label))
1487*87a7269eSachartre 			return (EINVAL);
1488*87a7269eSachartre 
1489*87a7269eSachartre 		bzero(vtoc, sizeof (struct vtoc));
1490*87a7269eSachartre 
1491*87a7269eSachartre 		vtoc->v_sanity = label.dkl_vtoc.v_sanity;
1492*87a7269eSachartre 		vtoc->v_version = label.dkl_vtoc.v_version;
1493*87a7269eSachartre 		vtoc->v_sectorsz = DEV_BSIZE;
1494*87a7269eSachartre 		vtoc->v_nparts = label.dkl_vtoc.v_nparts;
1495*87a7269eSachartre 
1496*87a7269eSachartre 		for (i = 0; i < vtoc->v_nparts; i++) {
1497*87a7269eSachartre 			vtoc->v_part[i].p_tag =
1498*87a7269eSachartre 			    label.dkl_vtoc.v_part[i].p_tag;
1499*87a7269eSachartre 			vtoc->v_part[i].p_flag =
1500*87a7269eSachartre 			    label.dkl_vtoc.v_part[i].p_flag;
1501*87a7269eSachartre 			vtoc->v_part[i].p_start =
1502*87a7269eSachartre 			    label.dkl_map[i].dkl_cylno *
1503*87a7269eSachartre 			    (label.dkl_nhead * label.dkl_nsect);
1504*87a7269eSachartre 			vtoc->v_part[i].p_size = label.dkl_map[i].dkl_nblk;
1505*87a7269eSachartre 			vtoc->timestamp[i] =
1506*87a7269eSachartre 			    label.dkl_vtoc.v_timestamp[i];
1507*87a7269eSachartre 		}
1508*87a7269eSachartre 		/*
1509*87a7269eSachartre 		 * The bootinfo array can not be copied with bcopy() because
1510*87a7269eSachartre 		 * elements are of type long in vtoc (so 64-bit) and of type
1511*87a7269eSachartre 		 * int in dk_vtoc (so 32-bit).
1512*87a7269eSachartre 		 */
1513*87a7269eSachartre 		vtoc->v_bootinfo[0] = label.dkl_vtoc.v_bootinfo[0];
1514*87a7269eSachartre 		vtoc->v_bootinfo[1] = label.dkl_vtoc.v_bootinfo[1];
1515*87a7269eSachartre 		vtoc->v_bootinfo[2] = label.dkl_vtoc.v_bootinfo[2];
1516*87a7269eSachartre 		bcopy(label.dkl_asciilabel, vtoc->v_asciilabel,
1517*87a7269eSachartre 		    LEN_DKL_ASCII);
1518*87a7269eSachartre 		bcopy(label.dkl_vtoc.v_volume, vtoc->v_volume,
1519*87a7269eSachartre 		    LEN_DKL_VVOL);
1520*87a7269eSachartre 
1521*87a7269eSachartre 		return (0);
1522*87a7269eSachartre 
1523*87a7269eSachartre 	case DKIOCSGEOM:
1524*87a7269eSachartre 		ASSERT(ioctl_arg != NULL);
1525*87a7269eSachartre 		geom = (struct dk_geom *)ioctl_arg;
1526*87a7269eSachartre 
1527*87a7269eSachartre 		if (geom->dkg_nhead == 0 || geom->dkg_nsect == 0)
1528*87a7269eSachartre 			return (EINVAL);
1529*87a7269eSachartre 
1530*87a7269eSachartre 		/*
1531*87a7269eSachartre 		 * The current device geometry is not updated, just the driver
1532*87a7269eSachartre 		 * "notion" of it. The device geometry will be effectively
1533*87a7269eSachartre 		 * updated when a label is written to the device during a next
1534*87a7269eSachartre 		 * DKIOCSVTOC.
1535*87a7269eSachartre 		 */
1536*87a7269eSachartre 		bcopy(ioctl_arg, &vd->dk_geom, sizeof (vd->dk_geom));
1537*87a7269eSachartre 		return (0);
1538*87a7269eSachartre 
1539*87a7269eSachartre 	case DKIOCSVTOC:
1540*87a7269eSachartre 		ASSERT(ioctl_arg != NULL);
1541*87a7269eSachartre 		ASSERT(vd->dk_geom.dkg_nhead != 0 &&
1542*87a7269eSachartre 		    vd->dk_geom.dkg_nsect != 0);
1543690555a1Sachartre 		vtoc = (struct vtoc *)ioctl_arg;
1544690555a1Sachartre 
1545690555a1Sachartre 		if (vtoc->v_sanity != VTOC_SANE ||
1546690555a1Sachartre 		    vtoc->v_sectorsz != DEV_BSIZE ||
1547690555a1Sachartre 		    vtoc->v_nparts != V_NUMPAR)
1548690555a1Sachartre 			return (EINVAL);
1549690555a1Sachartre 
1550690555a1Sachartre 		bzero(&label, sizeof (label));
1551690555a1Sachartre 		label.dkl_ncyl = vd->dk_geom.dkg_ncyl;
1552690555a1Sachartre 		label.dkl_acyl = vd->dk_geom.dkg_acyl;
1553690555a1Sachartre 		label.dkl_pcyl = vd->dk_geom.dkg_pcyl;
1554690555a1Sachartre 		label.dkl_nhead = vd->dk_geom.dkg_nhead;
1555690555a1Sachartre 		label.dkl_nsect = vd->dk_geom.dkg_nsect;
1556690555a1Sachartre 		label.dkl_intrlv = vd->dk_geom.dkg_intrlv;
1557690555a1Sachartre 		label.dkl_apc = vd->dk_geom.dkg_apc;
1558690555a1Sachartre 		label.dkl_rpm = vd->dk_geom.dkg_rpm;
1559*87a7269eSachartre 		label.dkl_write_reinstruct = vd->dk_geom.dkg_write_reinstruct;
1560*87a7269eSachartre 		label.dkl_read_reinstruct = vd->dk_geom.dkg_read_reinstruct;
1561690555a1Sachartre 
1562*87a7269eSachartre 		label.dkl_vtoc.v_nparts = V_NUMPAR;
1563*87a7269eSachartre 		label.dkl_vtoc.v_sanity = VTOC_SANE;
1564690555a1Sachartre 		label.dkl_vtoc.v_version = vtoc->v_version;
1565*87a7269eSachartre 		for (i = 0; i < V_NUMPAR; i++) {
1566690555a1Sachartre 			label.dkl_vtoc.v_timestamp[i] =
1567690555a1Sachartre 			    vtoc->timestamp[i];
1568690555a1Sachartre 			label.dkl_vtoc.v_part[i].p_tag =
1569690555a1Sachartre 			    vtoc->v_part[i].p_tag;
1570690555a1Sachartre 			label.dkl_vtoc.v_part[i].p_flag =
1571690555a1Sachartre 			    vtoc->v_part[i].p_flag;
1572690555a1Sachartre 			label.dkl_map[i].dkl_cylno =
1573690555a1Sachartre 			    vtoc->v_part[i].p_start /
1574690555a1Sachartre 			    (label.dkl_nhead * label.dkl_nsect);
1575690555a1Sachartre 			label.dkl_map[i].dkl_nblk =
1576690555a1Sachartre 			    vtoc->v_part[i].p_size;
15773c96341aSnarayan 		}
1578*87a7269eSachartre 		/*
1579*87a7269eSachartre 		 * The bootinfo array can not be copied with bcopy() because
1580*87a7269eSachartre 		 * elements are of type long in vtoc (so 64-bit) and of type
1581*87a7269eSachartre 		 * int in dk_vtoc (so 32-bit).
1582*87a7269eSachartre 		 */
1583*87a7269eSachartre 		label.dkl_vtoc.v_bootinfo[0] = vtoc->v_bootinfo[0];
1584*87a7269eSachartre 		label.dkl_vtoc.v_bootinfo[1] = vtoc->v_bootinfo[1];
1585*87a7269eSachartre 		label.dkl_vtoc.v_bootinfo[2] = vtoc->v_bootinfo[2];
1586690555a1Sachartre 		bcopy(vtoc->v_asciilabel, label.dkl_asciilabel,
1587690555a1Sachartre 		    LEN_DKL_ASCII);
1588690555a1Sachartre 		bcopy(vtoc->v_volume, label.dkl_vtoc.v_volume,
1589690555a1Sachartre 		    LEN_DKL_VVOL);
15903c96341aSnarayan 
15913c96341aSnarayan 		/* re-compute checksum */
1592690555a1Sachartre 		label.dkl_magic = DKL_MAGIC;
1593690555a1Sachartre 		label.dkl_cksum = vd_lbl2cksum(&label);
1594690555a1Sachartre 
1595*87a7269eSachartre 		/* write label to the disk image */
1596*87a7269eSachartre 		if ((rc = vd_file_set_vtoc(vd, &label)) != 0)
1597*87a7269eSachartre 			return (rc);
1598690555a1Sachartre 
1599690555a1Sachartre 		/* update the cached vdisk VTOC */
1600690555a1Sachartre 		bcopy(vtoc, &vd->vtoc, sizeof (vd->vtoc));
16013c96341aSnarayan 
1602*87a7269eSachartre 		/*
1603*87a7269eSachartre 		 * The disk geometry may have changed, so we need to write
1604*87a7269eSachartre 		 * the devid (if there is one) so that it is stored at the
1605*87a7269eSachartre 		 * right location.
1606*87a7269eSachartre 		 */
1607*87a7269eSachartre 		if (vd->file_devid != NULL &&
1608*87a7269eSachartre 		    vd_file_write_devid(vd, vd->file_devid) != 0) {
1609*87a7269eSachartre 			PR0("Fail to write devid");
16101ae08745Sheppo 		}
16114bac2208Snarayan 
16124bac2208Snarayan 		return (0);
16134bac2208Snarayan 
16144bac2208Snarayan 	default:
16154bac2208Snarayan 		return (ENOTSUP);
16164bac2208Snarayan 	}
16171ae08745Sheppo }
16181ae08745Sheppo 
16191ae08745Sheppo static int
16200a55fbb7Slm66018 vd_do_ioctl(vd_t *vd, vd_dring_payload_t *request, void* buf, vd_ioctl_t *ioctl)
16211ae08745Sheppo {
16221ae08745Sheppo 	int	rval = 0, status;
16231ae08745Sheppo 	size_t	nbytes = request->nbytes;	/* modifiable copy */
16241ae08745Sheppo 
16251ae08745Sheppo 
16261ae08745Sheppo 	ASSERT(request->slice < vd->nslices);
16271ae08745Sheppo 	PR0("Performing %s", ioctl->operation_name);
16281ae08745Sheppo 
16290a55fbb7Slm66018 	/* Get data from client and convert, if necessary */
16300a55fbb7Slm66018 	if (ioctl->copyin != NULL)  {
16311ae08745Sheppo 		ASSERT(nbytes != 0 && buf != NULL);
16321ae08745Sheppo 		PR1("Getting \"arg\" data from client");
16331ae08745Sheppo 		if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes,
16341ae08745Sheppo 		    request->cookie, request->ncookies,
16351ae08745Sheppo 		    LDC_COPY_IN)) != 0) {
16363af08d82Slm66018 			PR0("ldc_mem_copy() returned errno %d "
16371ae08745Sheppo 			    "copying from client", status);
16381ae08745Sheppo 			return (status);
16391ae08745Sheppo 		}
16400a55fbb7Slm66018 
16410a55fbb7Slm66018 		/* Convert client's data, if necessary */
16420a55fbb7Slm66018 		if (ioctl->copyin == VD_IDENTITY)	/* use client buffer */
16430a55fbb7Slm66018 			ioctl->arg = buf;
16440a55fbb7Slm66018 		else	/* convert client vdisk operation data to ioctl data */
16450a55fbb7Slm66018 			(ioctl->copyin)(buf, (void *)ioctl->arg);
16461ae08745Sheppo 	}
16471ae08745Sheppo 
16481ae08745Sheppo 	/*
16491ae08745Sheppo 	 * Handle single-slice block devices internally; otherwise, have the
16501ae08745Sheppo 	 * real driver perform the ioctl()
16511ae08745Sheppo 	 */
1652*87a7269eSachartre 	if (vd->file) {
1653*87a7269eSachartre 		if ((status = vd_do_file_ioctl(vd, ioctl->cmd,
1654*87a7269eSachartre 		    (void *)ioctl->arg)) != 0)
1655*87a7269eSachartre 			return (status);
1656*87a7269eSachartre 	} else if (vd->vdisk_type == VD_DISK_TYPE_SLICE && !vd->pseudo) {
16570a55fbb7Slm66018 		if ((status = vd_do_slice_ioctl(vd, ioctl->cmd,
16580a55fbb7Slm66018 		    (void *)ioctl->arg)) != 0)
16591ae08745Sheppo 			return (status);
16601ae08745Sheppo 	} else if ((status = ldi_ioctl(vd->ldi_handle[request->slice],
1661d10e4ef2Snarayan 	    ioctl->cmd, (intptr_t)ioctl->arg, (vd_open_flags | FKIOCTL),
1662d10e4ef2Snarayan 	    kcred, &rval)) != 0) {
16631ae08745Sheppo 		PR0("ldi_ioctl(%s) = errno %d", ioctl->cmd_name, status);
16641ae08745Sheppo 		return (status);
16651ae08745Sheppo 	}
16661ae08745Sheppo #ifdef DEBUG
16671ae08745Sheppo 	if (rval != 0) {
16683af08d82Slm66018 		PR0("%s set rval = %d, which is not being returned to client",
16691ae08745Sheppo 		    ioctl->cmd_name, rval);
16701ae08745Sheppo 	}
16711ae08745Sheppo #endif /* DEBUG */
16721ae08745Sheppo 
16730a55fbb7Slm66018 	/* Convert data and send to client, if necessary */
16740a55fbb7Slm66018 	if (ioctl->copyout != NULL)  {
16751ae08745Sheppo 		ASSERT(nbytes != 0 && buf != NULL);
16761ae08745Sheppo 		PR1("Sending \"arg\" data to client");
16770a55fbb7Slm66018 
16780a55fbb7Slm66018 		/* Convert ioctl data to vdisk operation data, if necessary */
16790a55fbb7Slm66018 		if (ioctl->copyout != VD_IDENTITY)
16800a55fbb7Slm66018 			(ioctl->copyout)((void *)ioctl->arg, buf);
16810a55fbb7Slm66018 
16821ae08745Sheppo 		if ((status = ldc_mem_copy(vd->ldc_handle, buf, 0, &nbytes,
16831ae08745Sheppo 		    request->cookie, request->ncookies,
16841ae08745Sheppo 		    LDC_COPY_OUT)) != 0) {
16853af08d82Slm66018 			PR0("ldc_mem_copy() returned errno %d "
16861ae08745Sheppo 			    "copying to client", status);
16871ae08745Sheppo 			return (status);
16881ae08745Sheppo 		}
16891ae08745Sheppo 	}
16901ae08745Sheppo 
16911ae08745Sheppo 	return (status);
16921ae08745Sheppo }
16931ae08745Sheppo 
16941ae08745Sheppo #define	RNDSIZE(expr) P2ROUNDUP(sizeof (expr), sizeof (uint64_t))
16951ae08745Sheppo static int
1696d10e4ef2Snarayan vd_ioctl(vd_task_t *task)
16971ae08745Sheppo {
1698*87a7269eSachartre 	int			i, status;
16991ae08745Sheppo 	void			*buf = NULL;
17000a55fbb7Slm66018 	struct dk_geom		dk_geom = {0};
17010a55fbb7Slm66018 	struct vtoc		vtoc = {0};
17024bac2208Snarayan 	struct dk_efi		dk_efi = {0};
1703d10e4ef2Snarayan 	vd_t			*vd		= task->vd;
1704d10e4ef2Snarayan 	vd_dring_payload_t	*request	= task->request;
17050a55fbb7Slm66018 	vd_ioctl_t		ioctl[] = {
17060a55fbb7Slm66018 		/* Command (no-copy) operations */
17070a55fbb7Slm66018 		{VD_OP_FLUSH, STRINGIZE(VD_OP_FLUSH), 0,
17080a55fbb7Slm66018 		    DKIOCFLUSHWRITECACHE, STRINGIZE(DKIOCFLUSHWRITECACHE),
17090a55fbb7Slm66018 		    NULL, NULL, NULL},
17100a55fbb7Slm66018 
17110a55fbb7Slm66018 		/* "Get" (copy-out) operations */
17120a55fbb7Slm66018 		{VD_OP_GET_WCE, STRINGIZE(VD_OP_GET_WCE), RNDSIZE(int),
17130a55fbb7Slm66018 		    DKIOCGETWCE, STRINGIZE(DKIOCGETWCE),
17144bac2208Snarayan 		    NULL, VD_IDENTITY, VD_IDENTITY},
17150a55fbb7Slm66018 		{VD_OP_GET_DISKGEOM, STRINGIZE(VD_OP_GET_DISKGEOM),
17160a55fbb7Slm66018 		    RNDSIZE(vd_geom_t),
17170a55fbb7Slm66018 		    DKIOCGGEOM, STRINGIZE(DKIOCGGEOM),
17180a55fbb7Slm66018 		    &dk_geom, NULL, dk_geom2vd_geom},
17190a55fbb7Slm66018 		{VD_OP_GET_VTOC, STRINGIZE(VD_OP_GET_VTOC), RNDSIZE(vd_vtoc_t),
17200a55fbb7Slm66018 		    DKIOCGVTOC, STRINGIZE(DKIOCGVTOC),
17210a55fbb7Slm66018 		    &vtoc, NULL, vtoc2vd_vtoc},
17224bac2208Snarayan 		{VD_OP_GET_EFI, STRINGIZE(VD_OP_GET_EFI), RNDSIZE(vd_efi_t),
17234bac2208Snarayan 		    DKIOCGETEFI, STRINGIZE(DKIOCGETEFI),
17244bac2208Snarayan 		    &dk_efi, vd_get_efi_in, vd_get_efi_out},
17250a55fbb7Slm66018 
17260a55fbb7Slm66018 		/* "Set" (copy-in) operations */
17270a55fbb7Slm66018 		{VD_OP_SET_WCE, STRINGIZE(VD_OP_SET_WCE), RNDSIZE(int),
17280a55fbb7Slm66018 		    DKIOCSETWCE, STRINGIZE(DKIOCSETWCE),
17294bac2208Snarayan 		    NULL, VD_IDENTITY, VD_IDENTITY},
17300a55fbb7Slm66018 		{VD_OP_SET_DISKGEOM, STRINGIZE(VD_OP_SET_DISKGEOM),
17310a55fbb7Slm66018 		    RNDSIZE(vd_geom_t),
17320a55fbb7Slm66018 		    DKIOCSGEOM, STRINGIZE(DKIOCSGEOM),
17330a55fbb7Slm66018 		    &dk_geom, vd_geom2dk_geom, NULL},
17340a55fbb7Slm66018 		{VD_OP_SET_VTOC, STRINGIZE(VD_OP_SET_VTOC), RNDSIZE(vd_vtoc_t),
17350a55fbb7Slm66018 		    DKIOCSVTOC, STRINGIZE(DKIOCSVTOC),
17360a55fbb7Slm66018 		    &vtoc, vd_vtoc2vtoc, NULL},
17374bac2208Snarayan 		{VD_OP_SET_EFI, STRINGIZE(VD_OP_SET_EFI), RNDSIZE(vd_efi_t),
17384bac2208Snarayan 		    DKIOCSETEFI, STRINGIZE(DKIOCSETEFI),
17394bac2208Snarayan 		    &dk_efi, vd_set_efi_in, vd_set_efi_out},
17400a55fbb7Slm66018 	};
17411ae08745Sheppo 	size_t		nioctls = (sizeof (ioctl))/(sizeof (ioctl[0]));
17421ae08745Sheppo 
17431ae08745Sheppo 
1744d10e4ef2Snarayan 	ASSERT(vd != NULL);
1745d10e4ef2Snarayan 	ASSERT(request != NULL);
17461ae08745Sheppo 	ASSERT(request->slice < vd->nslices);
17471ae08745Sheppo 
17481ae08745Sheppo 	/*
17491ae08745Sheppo 	 * Determine ioctl corresponding to caller's "operation" and
17501ae08745Sheppo 	 * validate caller's "nbytes"
17511ae08745Sheppo 	 */
17521ae08745Sheppo 	for (i = 0; i < nioctls; i++) {
17531ae08745Sheppo 		if (request->operation == ioctl[i].operation) {
17540a55fbb7Slm66018 			/* LDC memory operations require 8-byte multiples */
17550a55fbb7Slm66018 			ASSERT(ioctl[i].nbytes % sizeof (uint64_t) == 0);
17560a55fbb7Slm66018 
17574bac2208Snarayan 			if (request->operation == VD_OP_GET_EFI ||
17584bac2208Snarayan 			    request->operation == VD_OP_SET_EFI) {
17594bac2208Snarayan 				if (request->nbytes >= ioctl[i].nbytes)
17604bac2208Snarayan 					break;
17613af08d82Slm66018 				PR0("%s:  Expected at least nbytes = %lu, "
17624bac2208Snarayan 				    "got %lu", ioctl[i].operation_name,
17634bac2208Snarayan 				    ioctl[i].nbytes, request->nbytes);
17644bac2208Snarayan 				return (EINVAL);
17654bac2208Snarayan 			}
17664bac2208Snarayan 
17670a55fbb7Slm66018 			if (request->nbytes != ioctl[i].nbytes) {
17683af08d82Slm66018 				PR0("%s:  Expected nbytes = %lu, got %lu",
17690a55fbb7Slm66018 				    ioctl[i].operation_name, ioctl[i].nbytes,
17700a55fbb7Slm66018 				    request->nbytes);
17711ae08745Sheppo 				return (EINVAL);
17721ae08745Sheppo 			}
17731ae08745Sheppo 
17741ae08745Sheppo 			break;
17751ae08745Sheppo 		}
17761ae08745Sheppo 	}
17771ae08745Sheppo 	ASSERT(i < nioctls);	/* because "operation" already validated */
17781ae08745Sheppo 
17791ae08745Sheppo 	if (request->nbytes)
17801ae08745Sheppo 		buf = kmem_zalloc(request->nbytes, KM_SLEEP);
17811ae08745Sheppo 	status = vd_do_ioctl(vd, request, buf, &ioctl[i]);
17821ae08745Sheppo 	if (request->nbytes)
17831ae08745Sheppo 		kmem_free(buf, request->nbytes);
1784*87a7269eSachartre 
1785d10e4ef2Snarayan 	PR0("Returning %d", status);
17861ae08745Sheppo 	return (status);
17871ae08745Sheppo }
17881ae08745Sheppo 
17894bac2208Snarayan static int
17904bac2208Snarayan vd_get_devid(vd_task_t *task)
17914bac2208Snarayan {
17924bac2208Snarayan 	vd_t *vd = task->vd;
17934bac2208Snarayan 	vd_dring_payload_t *request = task->request;
17944bac2208Snarayan 	vd_devid_t *vd_devid;
17954bac2208Snarayan 	impl_devid_t *devid;
1796*87a7269eSachartre 	int status, bufid_len, devid_len, len, sz;
17973af08d82Slm66018 	int bufbytes;
17984bac2208Snarayan 
17993af08d82Slm66018 	PR1("Get Device ID, nbytes=%ld", request->nbytes);
18004bac2208Snarayan 
18013c96341aSnarayan 	if (vd->file) {
1802*87a7269eSachartre 		if (vd->file_devid == NULL) {
18033af08d82Slm66018 			PR2("No Device ID");
18044bac2208Snarayan 			return (ENOENT);
1805*87a7269eSachartre 		} else {
1806*87a7269eSachartre 			sz = ddi_devid_sizeof(vd->file_devid);
1807*87a7269eSachartre 			devid = kmem_alloc(sz, KM_SLEEP);
1808*87a7269eSachartre 			bcopy(vd->file_devid, devid, sz);
1809*87a7269eSachartre 		}
1810*87a7269eSachartre 	} else {
1811*87a7269eSachartre 		if (ddi_lyr_get_devid(vd->dev[request->slice],
1812*87a7269eSachartre 		    (ddi_devid_t *)&devid) != DDI_SUCCESS) {
1813*87a7269eSachartre 			PR2("No Device ID");
1814*87a7269eSachartre 			return (ENOENT);
1815*87a7269eSachartre 		}
18164bac2208Snarayan 	}
18174bac2208Snarayan 
18184bac2208Snarayan 	bufid_len = request->nbytes - sizeof (vd_devid_t) + 1;
18194bac2208Snarayan 	devid_len = DEVID_GETLEN(devid);
18204bac2208Snarayan 
18213af08d82Slm66018 	/*
18223af08d82Slm66018 	 * Save the buffer size here for use in deallocation.
18233af08d82Slm66018 	 * The actual number of bytes copied is returned in
18243af08d82Slm66018 	 * the 'nbytes' field of the request structure.
18253af08d82Slm66018 	 */
18263af08d82Slm66018 	bufbytes = request->nbytes;
18273af08d82Slm66018 
18283af08d82Slm66018 	vd_devid = kmem_zalloc(bufbytes, KM_SLEEP);
18294bac2208Snarayan 	vd_devid->length = devid_len;
18304bac2208Snarayan 	vd_devid->type = DEVID_GETTYPE(devid);
18314bac2208Snarayan 
18324bac2208Snarayan 	len = (devid_len > bufid_len)? bufid_len : devid_len;
18334bac2208Snarayan 
18344bac2208Snarayan 	bcopy(devid->did_id, vd_devid->id, len);
18354bac2208Snarayan 
18364bac2208Snarayan 	/* LDC memory operations require 8-byte multiples */
18374bac2208Snarayan 	ASSERT(request->nbytes % sizeof (uint64_t) == 0);
18384bac2208Snarayan 
18394bac2208Snarayan 	if ((status = ldc_mem_copy(vd->ldc_handle, (caddr_t)vd_devid, 0,
18404bac2208Snarayan 	    &request->nbytes, request->cookie, request->ncookies,
18414bac2208Snarayan 	    LDC_COPY_OUT)) != 0) {
18423af08d82Slm66018 		PR0("ldc_mem_copy() returned errno %d copying to client",
18434bac2208Snarayan 		    status);
18444bac2208Snarayan 	}
18453af08d82Slm66018 	PR1("post mem_copy: nbytes=%ld", request->nbytes);
18464bac2208Snarayan 
18473af08d82Slm66018 	kmem_free(vd_devid, bufbytes);
18484bac2208Snarayan 	ddi_devid_free((ddi_devid_t)devid);
18494bac2208Snarayan 
18504bac2208Snarayan 	return (status);
18514bac2208Snarayan }
18524bac2208Snarayan 
18531ae08745Sheppo /*
18541ae08745Sheppo  * Define the supported operations once the functions for performing them have
18551ae08745Sheppo  * been defined
18561ae08745Sheppo  */
18571ae08745Sheppo static const vds_operation_t	vds_operation[] = {
18583af08d82Slm66018 #define	X(_s)	#_s, _s
18593af08d82Slm66018 	{X(VD_OP_BREAD),	vd_start_bio,	vd_complete_bio},
18603af08d82Slm66018 	{X(VD_OP_BWRITE),	vd_start_bio,	vd_complete_bio},
18613af08d82Slm66018 	{X(VD_OP_FLUSH),	vd_ioctl,	NULL},
18623af08d82Slm66018 	{X(VD_OP_GET_WCE),	vd_ioctl,	NULL},
18633af08d82Slm66018 	{X(VD_OP_SET_WCE),	vd_ioctl,	NULL},
18643af08d82Slm66018 	{X(VD_OP_GET_VTOC),	vd_ioctl,	NULL},
18653af08d82Slm66018 	{X(VD_OP_SET_VTOC),	vd_ioctl,	NULL},
18663af08d82Slm66018 	{X(VD_OP_GET_DISKGEOM),	vd_ioctl,	NULL},
18673af08d82Slm66018 	{X(VD_OP_SET_DISKGEOM),	vd_ioctl,	NULL},
18683af08d82Slm66018 	{X(VD_OP_GET_EFI),	vd_ioctl,	NULL},
18693af08d82Slm66018 	{X(VD_OP_SET_EFI),	vd_ioctl,	NULL},
18703af08d82Slm66018 	{X(VD_OP_GET_DEVID),	vd_get_devid,	NULL},
18713af08d82Slm66018 #undef	X
18721ae08745Sheppo };
18731ae08745Sheppo 
18741ae08745Sheppo static const size_t	vds_noperations =
18751ae08745Sheppo 	(sizeof (vds_operation))/(sizeof (vds_operation[0]));
18761ae08745Sheppo 
18771ae08745Sheppo /*
1878d10e4ef2Snarayan  * Process a task specifying a client I/O request
18791ae08745Sheppo  */
18801ae08745Sheppo static int
1881d10e4ef2Snarayan vd_process_task(vd_task_t *task)
18821ae08745Sheppo {
1883d10e4ef2Snarayan 	int			i, status;
1884d10e4ef2Snarayan 	vd_t			*vd		= task->vd;
1885d10e4ef2Snarayan 	vd_dring_payload_t	*request	= task->request;
18861ae08745Sheppo 
18871ae08745Sheppo 
1888d10e4ef2Snarayan 	ASSERT(vd != NULL);
1889d10e4ef2Snarayan 	ASSERT(request != NULL);
18901ae08745Sheppo 
1891d10e4ef2Snarayan 	/* Find the requested operation */
18921ae08745Sheppo 	for (i = 0; i < vds_noperations; i++)
18931ae08745Sheppo 		if (request->operation == vds_operation[i].operation)
1894d10e4ef2Snarayan 			break;
1895d10e4ef2Snarayan 	if (i == vds_noperations) {
18963af08d82Slm66018 		PR0("Unsupported operation %u", request->operation);
18971ae08745Sheppo 		return (ENOTSUP);
18981ae08745Sheppo 	}
18991ae08745Sheppo 
19007636cb21Slm66018 	/* Range-check slice */
1901*87a7269eSachartre 	if (request->slice >= vd->nslices &&
1902*87a7269eSachartre 	    (vd->vdisk_type != VD_DISK_TYPE_DISK ||
1903*87a7269eSachartre 	    request->slice != VD_SLICE_NONE)) {
19043af08d82Slm66018 		PR0("Invalid \"slice\" %u (max %u) for virtual disk",
19057636cb21Slm66018 		    request->slice, (vd->nslices - 1));
19067636cb21Slm66018 		return (EINVAL);
19077636cb21Slm66018 	}
19087636cb21Slm66018 
19093af08d82Slm66018 	PR1("operation : %s", vds_operation[i].namep);
19103af08d82Slm66018 
1911d10e4ef2Snarayan 	/* Start the operation */
1912d10e4ef2Snarayan 	if ((status = vds_operation[i].start(task)) != EINPROGRESS) {
19133af08d82Slm66018 		PR0("operation : %s returned status %d",
19143af08d82Slm66018 		    vds_operation[i].namep, status);
1915d10e4ef2Snarayan 		request->status = status;	/* op succeeded or failed */
1916d10e4ef2Snarayan 		return (0);			/* but request completed */
19171ae08745Sheppo 	}
19181ae08745Sheppo 
1919d10e4ef2Snarayan 	ASSERT(vds_operation[i].complete != NULL);	/* debug case */
1920d10e4ef2Snarayan 	if (vds_operation[i].complete == NULL) {	/* non-debug case */
19213af08d82Slm66018 		PR0("Unexpected return of EINPROGRESS "
1922d10e4ef2Snarayan 		    "with no I/O completion handler");
1923d10e4ef2Snarayan 		request->status = EIO;	/* operation failed */
1924d10e4ef2Snarayan 		return (0);		/* but request completed */
19251ae08745Sheppo 	}
19261ae08745Sheppo 
19273af08d82Slm66018 	PR1("operation : kick off taskq entry for %s", vds_operation[i].namep);
19283af08d82Slm66018 
1929d10e4ef2Snarayan 	/* Queue a task to complete the operation */
1930d10e4ef2Snarayan 	status = ddi_taskq_dispatch(vd->completionq, vds_operation[i].complete,
1931d10e4ef2Snarayan 	    task, DDI_SLEEP);
1932d10e4ef2Snarayan 	/* ddi_taskq_dispatch(9f) guarantees success with DDI_SLEEP */
1933d10e4ef2Snarayan 	ASSERT(status == DDI_SUCCESS);
1934d10e4ef2Snarayan 
1935d10e4ef2Snarayan 	PR1("Operation in progress");
1936d10e4ef2Snarayan 	return (EINPROGRESS);	/* completion handler will finish request */
19371ae08745Sheppo }
19381ae08745Sheppo 
19391ae08745Sheppo /*
19400a55fbb7Slm66018  * Return true if the "type", "subtype", and "env" fields of the "tag" first
19410a55fbb7Slm66018  * argument match the corresponding remaining arguments; otherwise, return false
19421ae08745Sheppo  */
19430a55fbb7Slm66018 boolean_t
19441ae08745Sheppo vd_msgtype(vio_msg_tag_t *tag, int type, int subtype, int env)
19451ae08745Sheppo {
19461ae08745Sheppo 	return ((tag->vio_msgtype == type) &&
19471ae08745Sheppo 	    (tag->vio_subtype == subtype) &&
19480a55fbb7Slm66018 	    (tag->vio_subtype_env == env)) ? B_TRUE : B_FALSE;
19491ae08745Sheppo }
19501ae08745Sheppo 
19510a55fbb7Slm66018 /*
19520a55fbb7Slm66018  * Check whether the major/minor version specified in "ver_msg" is supported
19530a55fbb7Slm66018  * by this server.
19540a55fbb7Slm66018  */
19550a55fbb7Slm66018 static boolean_t
19560a55fbb7Slm66018 vds_supported_version(vio_ver_msg_t *ver_msg)
19570a55fbb7Slm66018 {
19580a55fbb7Slm66018 	for (int i = 0; i < vds_num_versions; i++) {
19590a55fbb7Slm66018 		ASSERT(vds_version[i].major > 0);
19600a55fbb7Slm66018 		ASSERT((i == 0) ||
19610a55fbb7Slm66018 		    (vds_version[i].major < vds_version[i-1].major));
19620a55fbb7Slm66018 
19630a55fbb7Slm66018 		/*
19640a55fbb7Slm66018 		 * If the major versions match, adjust the minor version, if
19650a55fbb7Slm66018 		 * necessary, down to the highest value supported by this
19660a55fbb7Slm66018 		 * server and return true so this message will get "ack"ed;
19670a55fbb7Slm66018 		 * the client should also support all minor versions lower
19680a55fbb7Slm66018 		 * than the value it sent
19690a55fbb7Slm66018 		 */
19700a55fbb7Slm66018 		if (ver_msg->ver_major == vds_version[i].major) {
19710a55fbb7Slm66018 			if (ver_msg->ver_minor > vds_version[i].minor) {
19720a55fbb7Slm66018 				PR0("Adjusting minor version from %u to %u",
19730a55fbb7Slm66018 				    ver_msg->ver_minor, vds_version[i].minor);
19740a55fbb7Slm66018 				ver_msg->ver_minor = vds_version[i].minor;
19750a55fbb7Slm66018 			}
19760a55fbb7Slm66018 			return (B_TRUE);
19770a55fbb7Slm66018 		}
19780a55fbb7Slm66018 
19790a55fbb7Slm66018 		/*
19800a55fbb7Slm66018 		 * If the message contains a higher major version number, set
19810a55fbb7Slm66018 		 * the message's major/minor versions to the current values
19820a55fbb7Slm66018 		 * and return false, so this message will get "nack"ed with
19830a55fbb7Slm66018 		 * these values, and the client will potentially try again
19840a55fbb7Slm66018 		 * with the same or a lower version
19850a55fbb7Slm66018 		 */
19860a55fbb7Slm66018 		if (ver_msg->ver_major > vds_version[i].major) {
19870a55fbb7Slm66018 			ver_msg->ver_major = vds_version[i].major;
19880a55fbb7Slm66018 			ver_msg->ver_minor = vds_version[i].minor;
19890a55fbb7Slm66018 			return (B_FALSE);
19900a55fbb7Slm66018 		}
19910a55fbb7Slm66018 
19920a55fbb7Slm66018 		/*
19930a55fbb7Slm66018 		 * Otherwise, the message's major version is less than the
19940a55fbb7Slm66018 		 * current major version, so continue the loop to the next
19950a55fbb7Slm66018 		 * (lower) supported version
19960a55fbb7Slm66018 		 */
19970a55fbb7Slm66018 	}
19980a55fbb7Slm66018 
19990a55fbb7Slm66018 	/*
20000a55fbb7Slm66018 	 * No common version was found; "ground" the version pair in the
20010a55fbb7Slm66018 	 * message to terminate negotiation
20020a55fbb7Slm66018 	 */
20030a55fbb7Slm66018 	ver_msg->ver_major = 0;
20040a55fbb7Slm66018 	ver_msg->ver_minor = 0;
20050a55fbb7Slm66018 	return (B_FALSE);
20060a55fbb7Slm66018 }
20070a55fbb7Slm66018 
20080a55fbb7Slm66018 /*
20090a55fbb7Slm66018  * Process a version message from a client.  vds expects to receive version
20100a55fbb7Slm66018  * messages from clients seeking service, but never issues version messages
20110a55fbb7Slm66018  * itself; therefore, vds can ACK or NACK client version messages, but does
20120a55fbb7Slm66018  * not expect to receive version-message ACKs or NACKs (and will treat such
20130a55fbb7Slm66018  * messages as invalid).
20140a55fbb7Slm66018  */
20151ae08745Sheppo static int
20160a55fbb7Slm66018 vd_process_ver_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
20171ae08745Sheppo {
20181ae08745Sheppo 	vio_ver_msg_t	*ver_msg = (vio_ver_msg_t *)msg;
20191ae08745Sheppo 
20201ae08745Sheppo 
20211ae08745Sheppo 	ASSERT(msglen >= sizeof (msg->tag));
20221ae08745Sheppo 
20231ae08745Sheppo 	if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO,
20241ae08745Sheppo 	    VIO_VER_INFO)) {
20251ae08745Sheppo 		return (ENOMSG);	/* not a version message */
20261ae08745Sheppo 	}
20271ae08745Sheppo 
20281ae08745Sheppo 	if (msglen != sizeof (*ver_msg)) {
20293af08d82Slm66018 		PR0("Expected %lu-byte version message; "
20301ae08745Sheppo 		    "received %lu bytes", sizeof (*ver_msg), msglen);
20311ae08745Sheppo 		return (EBADMSG);
20321ae08745Sheppo 	}
20331ae08745Sheppo 
20341ae08745Sheppo 	if (ver_msg->dev_class != VDEV_DISK) {
20353af08d82Slm66018 		PR0("Expected device class %u (disk); received %u",
20361ae08745Sheppo 		    VDEV_DISK, ver_msg->dev_class);
20371ae08745Sheppo 		return (EBADMSG);
20381ae08745Sheppo 	}
20391ae08745Sheppo 
20400a55fbb7Slm66018 	/*
20410a55fbb7Slm66018 	 * We're talking to the expected kind of client; set our device class
20420a55fbb7Slm66018 	 * for "ack/nack" back to the client
20430a55fbb7Slm66018 	 */
20441ae08745Sheppo 	ver_msg->dev_class = VDEV_DISK_SERVER;
20450a55fbb7Slm66018 
20460a55fbb7Slm66018 	/*
20470a55fbb7Slm66018 	 * Check whether the (valid) version message specifies a version
20480a55fbb7Slm66018 	 * supported by this server.  If the version is not supported, return
20490a55fbb7Slm66018 	 * EBADMSG so the message will get "nack"ed; vds_supported_version()
20500a55fbb7Slm66018 	 * will have updated the message with a supported version for the
20510a55fbb7Slm66018 	 * client to consider
20520a55fbb7Slm66018 	 */
20530a55fbb7Slm66018 	if (!vds_supported_version(ver_msg))
20540a55fbb7Slm66018 		return (EBADMSG);
20550a55fbb7Slm66018 
20560a55fbb7Slm66018 
20570a55fbb7Slm66018 	/*
20580a55fbb7Slm66018 	 * A version has been agreed upon; use the client's SID for
20590a55fbb7Slm66018 	 * communication on this channel now
20600a55fbb7Slm66018 	 */
20610a55fbb7Slm66018 	ASSERT(!(vd->initialized & VD_SID));
20620a55fbb7Slm66018 	vd->sid = ver_msg->tag.vio_sid;
20630a55fbb7Slm66018 	vd->initialized |= VD_SID;
20640a55fbb7Slm66018 
20650a55fbb7Slm66018 	/*
20660a55fbb7Slm66018 	 * When multiple versions are supported, this function should store
20670a55fbb7Slm66018 	 * the negotiated major and minor version values in the "vd" data
20680a55fbb7Slm66018 	 * structure to govern further communication; in particular, note that
20690a55fbb7Slm66018 	 * the client might have specified a lower minor version for the
20700a55fbb7Slm66018 	 * agreed major version than specifed in the vds_version[] array.  The
20710a55fbb7Slm66018 	 * following assertions should help remind future maintainers to make
20720a55fbb7Slm66018 	 * the appropriate changes to support multiple versions.
20730a55fbb7Slm66018 	 */
20740a55fbb7Slm66018 	ASSERT(vds_num_versions == 1);
20750a55fbb7Slm66018 	ASSERT(ver_msg->ver_major == vds_version[0].major);
20760a55fbb7Slm66018 	ASSERT(ver_msg->ver_minor == vds_version[0].minor);
20770a55fbb7Slm66018 
20780a55fbb7Slm66018 	PR0("Using major version %u, minor version %u",
20790a55fbb7Slm66018 	    ver_msg->ver_major, ver_msg->ver_minor);
20801ae08745Sheppo 	return (0);
20811ae08745Sheppo }
20821ae08745Sheppo 
20831ae08745Sheppo static int
20841ae08745Sheppo vd_process_attr_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
20851ae08745Sheppo {
20861ae08745Sheppo 	vd_attr_msg_t	*attr_msg = (vd_attr_msg_t *)msg;
20873c96341aSnarayan 	int		status, retry = 0;
20881ae08745Sheppo 
20891ae08745Sheppo 
20901ae08745Sheppo 	ASSERT(msglen >= sizeof (msg->tag));
20911ae08745Sheppo 
20921ae08745Sheppo 	if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO,
20931ae08745Sheppo 	    VIO_ATTR_INFO)) {
2094d10e4ef2Snarayan 		PR0("Message is not an attribute message");
2095d10e4ef2Snarayan 		return (ENOMSG);
20961ae08745Sheppo 	}
20971ae08745Sheppo 
20981ae08745Sheppo 	if (msglen != sizeof (*attr_msg)) {
20993af08d82Slm66018 		PR0("Expected %lu-byte attribute message; "
21001ae08745Sheppo 		    "received %lu bytes", sizeof (*attr_msg), msglen);
21011ae08745Sheppo 		return (EBADMSG);
21021ae08745Sheppo 	}
21031ae08745Sheppo 
21041ae08745Sheppo 	if (attr_msg->max_xfer_sz == 0) {
21053af08d82Slm66018 		PR0("Received maximum transfer size of 0 from client");
21061ae08745Sheppo 		return (EBADMSG);
21071ae08745Sheppo 	}
21081ae08745Sheppo 
21091ae08745Sheppo 	if ((attr_msg->xfer_mode != VIO_DESC_MODE) &&
21101ae08745Sheppo 	    (attr_msg->xfer_mode != VIO_DRING_MODE)) {
21113af08d82Slm66018 		PR0("Client requested unsupported transfer mode");
21121ae08745Sheppo 		return (EBADMSG);
21131ae08745Sheppo 	}
21141ae08745Sheppo 
21153c96341aSnarayan 	/*
21163c96341aSnarayan 	 * check if the underlying disk is ready, if not try accessing
21173c96341aSnarayan 	 * the device again. Open the vdisk device and extract info
21183c96341aSnarayan 	 * about it, as this is needed to respond to the attr info msg
21193c96341aSnarayan 	 */
21203c96341aSnarayan 	if ((vd->initialized & VD_DISK_READY) == 0) {
21213c96341aSnarayan 		PR0("Retry setting up disk (%s)", vd->device_path);
21223c96341aSnarayan 		do {
21233c96341aSnarayan 			status = vd_setup_vd(vd);
21243c96341aSnarayan 			if (status != EAGAIN || ++retry > vds_dev_retries)
21253c96341aSnarayan 				break;
21263c96341aSnarayan 
21273c96341aSnarayan 			/* incremental delay */
21283c96341aSnarayan 			delay(drv_usectohz(vds_dev_delay));
21293c96341aSnarayan 
21303c96341aSnarayan 			/* if vdisk is no longer enabled - return error */
21313c96341aSnarayan 			if (!vd_enabled(vd))
21323c96341aSnarayan 				return (ENXIO);
21333c96341aSnarayan 
21343c96341aSnarayan 		} while (status == EAGAIN);
21353c96341aSnarayan 
21363c96341aSnarayan 		if (status)
21373c96341aSnarayan 			return (ENXIO);
21383c96341aSnarayan 
21393c96341aSnarayan 		vd->initialized |= VD_DISK_READY;
21403c96341aSnarayan 		ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR);
21413c96341aSnarayan 		PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u",
21423c96341aSnarayan 		    ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"),
21433c96341aSnarayan 		    (vd->pseudo ? "yes" : "no"),
21443c96341aSnarayan 		    (vd->file ? "yes" : "no"),
21453c96341aSnarayan 		    vd->nslices);
21463c96341aSnarayan 	}
21473c96341aSnarayan 
21481ae08745Sheppo 	/* Success:  valid message and transfer mode */
21491ae08745Sheppo 	vd->xfer_mode = attr_msg->xfer_mode;
21503af08d82Slm66018 
21511ae08745Sheppo 	if (vd->xfer_mode == VIO_DESC_MODE) {
21523af08d82Slm66018 
21531ae08745Sheppo 		/*
21541ae08745Sheppo 		 * The vd_dring_inband_msg_t contains one cookie; need room
21551ae08745Sheppo 		 * for up to n-1 more cookies, where "n" is the number of full
21561ae08745Sheppo 		 * pages plus possibly one partial page required to cover
21571ae08745Sheppo 		 * "max_xfer_sz".  Add room for one more cookie if
21581ae08745Sheppo 		 * "max_xfer_sz" isn't an integral multiple of the page size.
21591ae08745Sheppo 		 * Must first get the maximum transfer size in bytes.
21601ae08745Sheppo 		 */
21611ae08745Sheppo 		size_t	max_xfer_bytes = attr_msg->vdisk_block_size ?
21621ae08745Sheppo 		    attr_msg->vdisk_block_size*attr_msg->max_xfer_sz :
21631ae08745Sheppo 		    attr_msg->max_xfer_sz;
21641ae08745Sheppo 		size_t	max_inband_msglen =
21651ae08745Sheppo 		    sizeof (vd_dring_inband_msg_t) +
21661ae08745Sheppo 		    ((max_xfer_bytes/PAGESIZE +
21671ae08745Sheppo 		    ((max_xfer_bytes % PAGESIZE) ? 1 : 0))*
21681ae08745Sheppo 		    (sizeof (ldc_mem_cookie_t)));
21691ae08745Sheppo 
21701ae08745Sheppo 		/*
21711ae08745Sheppo 		 * Set the maximum expected message length to
21721ae08745Sheppo 		 * accommodate in-band-descriptor messages with all
21731ae08745Sheppo 		 * their cookies
21741ae08745Sheppo 		 */
21751ae08745Sheppo 		vd->max_msglen = MAX(vd->max_msglen, max_inband_msglen);
2176d10e4ef2Snarayan 
2177d10e4ef2Snarayan 		/*
2178d10e4ef2Snarayan 		 * Initialize the data structure for processing in-band I/O
2179d10e4ef2Snarayan 		 * request descriptors
2180d10e4ef2Snarayan 		 */
2181d10e4ef2Snarayan 		vd->inband_task.vd	= vd;
21823af08d82Slm66018 		vd->inband_task.msg	= kmem_alloc(vd->max_msglen, KM_SLEEP);
2183d10e4ef2Snarayan 		vd->inband_task.index	= 0;
2184d10e4ef2Snarayan 		vd->inband_task.type	= VD_FINAL_RANGE_TASK;	/* range == 1 */
21851ae08745Sheppo 	}
21861ae08745Sheppo 
2187e1ebb9ecSlm66018 	/* Return the device's block size and max transfer size to the client */
2188e1ebb9ecSlm66018 	attr_msg->vdisk_block_size	= DEV_BSIZE;
2189e1ebb9ecSlm66018 	attr_msg->max_xfer_sz		= vd->max_xfer_sz;
2190e1ebb9ecSlm66018 
21911ae08745Sheppo 	attr_msg->vdisk_size = vd->vdisk_size;
21921ae08745Sheppo 	attr_msg->vdisk_type = vd->vdisk_type;
21931ae08745Sheppo 	attr_msg->operations = vds_operations;
21941ae08745Sheppo 	PR0("%s", VD_CLIENT(vd));
21953af08d82Slm66018 
21963af08d82Slm66018 	ASSERT(vd->dring_task == NULL);
21973af08d82Slm66018 
21981ae08745Sheppo 	return (0);
21991ae08745Sheppo }
22001ae08745Sheppo 
22011ae08745Sheppo static int
22021ae08745Sheppo vd_process_dring_reg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
22031ae08745Sheppo {
22041ae08745Sheppo 	int			status;
22051ae08745Sheppo 	size_t			expected;
22061ae08745Sheppo 	ldc_mem_info_t		dring_minfo;
22071ae08745Sheppo 	vio_dring_reg_msg_t	*reg_msg = (vio_dring_reg_msg_t *)msg;
22081ae08745Sheppo 
22091ae08745Sheppo 
22101ae08745Sheppo 	ASSERT(msglen >= sizeof (msg->tag));
22111ae08745Sheppo 
22121ae08745Sheppo 	if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO,
22131ae08745Sheppo 	    VIO_DRING_REG)) {
2214d10e4ef2Snarayan 		PR0("Message is not a register-dring message");
2215d10e4ef2Snarayan 		return (ENOMSG);
22161ae08745Sheppo 	}
22171ae08745Sheppo 
22181ae08745Sheppo 	if (msglen < sizeof (*reg_msg)) {
22193af08d82Slm66018 		PR0("Expected at least %lu-byte register-dring message; "
22201ae08745Sheppo 		    "received %lu bytes", sizeof (*reg_msg), msglen);
22211ae08745Sheppo 		return (EBADMSG);
22221ae08745Sheppo 	}
22231ae08745Sheppo 
22241ae08745Sheppo 	expected = sizeof (*reg_msg) +
22251ae08745Sheppo 	    (reg_msg->ncookies - 1)*(sizeof (reg_msg->cookie[0]));
22261ae08745Sheppo 	if (msglen != expected) {
22273af08d82Slm66018 		PR0("Expected %lu-byte register-dring message; "
22281ae08745Sheppo 		    "received %lu bytes", expected, msglen);
22291ae08745Sheppo 		return (EBADMSG);
22301ae08745Sheppo 	}
22311ae08745Sheppo 
22321ae08745Sheppo 	if (vd->initialized & VD_DRING) {
22333af08d82Slm66018 		PR0("A dring was previously registered; only support one");
22341ae08745Sheppo 		return (EBADMSG);
22351ae08745Sheppo 	}
22361ae08745Sheppo 
2237d10e4ef2Snarayan 	if (reg_msg->num_descriptors > INT32_MAX) {
22383af08d82Slm66018 		PR0("reg_msg->num_descriptors = %u; must be <= %u (%s)",
2239d10e4ef2Snarayan 		    reg_msg->ncookies, INT32_MAX, STRINGIZE(INT32_MAX));
2240d10e4ef2Snarayan 		return (EBADMSG);
2241d10e4ef2Snarayan 	}
2242d10e4ef2Snarayan 
22431ae08745Sheppo 	if (reg_msg->ncookies != 1) {
22441ae08745Sheppo 		/*
22451ae08745Sheppo 		 * In addition to fixing the assertion in the success case
22461ae08745Sheppo 		 * below, supporting drings which require more than one
22471ae08745Sheppo 		 * "cookie" requires increasing the value of vd->max_msglen
22481ae08745Sheppo 		 * somewhere in the code path prior to receiving the message
22491ae08745Sheppo 		 * which results in calling this function.  Note that without
22501ae08745Sheppo 		 * making this change, the larger message size required to
22511ae08745Sheppo 		 * accommodate multiple cookies cannot be successfully
22521ae08745Sheppo 		 * received, so this function will not even get called.
22531ae08745Sheppo 		 * Gracefully accommodating more dring cookies might
22541ae08745Sheppo 		 * reasonably demand exchanging an additional attribute or
22551ae08745Sheppo 		 * making a minor protocol adjustment
22561ae08745Sheppo 		 */
22573af08d82Slm66018 		PR0("reg_msg->ncookies = %u != 1", reg_msg->ncookies);
22581ae08745Sheppo 		return (EBADMSG);
22591ae08745Sheppo 	}
22601ae08745Sheppo 
22611ae08745Sheppo 	status = ldc_mem_dring_map(vd->ldc_handle, reg_msg->cookie,
22621ae08745Sheppo 	    reg_msg->ncookies, reg_msg->num_descriptors,
22634bac2208Snarayan 	    reg_msg->descriptor_size, LDC_DIRECT_MAP, &vd->dring_handle);
22641ae08745Sheppo 	if (status != 0) {
22653af08d82Slm66018 		PR0("ldc_mem_dring_map() returned errno %d", status);
22661ae08745Sheppo 		return (status);
22671ae08745Sheppo 	}
22681ae08745Sheppo 
22691ae08745Sheppo 	/*
22701ae08745Sheppo 	 * To remove the need for this assertion, must call
22711ae08745Sheppo 	 * ldc_mem_dring_nextcookie() successfully ncookies-1 times after a
22721ae08745Sheppo 	 * successful call to ldc_mem_dring_map()
22731ae08745Sheppo 	 */
22741ae08745Sheppo 	ASSERT(reg_msg->ncookies == 1);
22751ae08745Sheppo 
22761ae08745Sheppo 	if ((status =
22771ae08745Sheppo 	    ldc_mem_dring_info(vd->dring_handle, &dring_minfo)) != 0) {
22783af08d82Slm66018 		PR0("ldc_mem_dring_info() returned errno %d", status);
22791ae08745Sheppo 		if ((status = ldc_mem_dring_unmap(vd->dring_handle)) != 0)
22803af08d82Slm66018 			PR0("ldc_mem_dring_unmap() returned errno %d", status);
22811ae08745Sheppo 		return (status);
22821ae08745Sheppo 	}
22831ae08745Sheppo 
22841ae08745Sheppo 	if (dring_minfo.vaddr == NULL) {
22853af08d82Slm66018 		PR0("Descriptor ring virtual address is NULL");
22860a55fbb7Slm66018 		return (ENXIO);
22871ae08745Sheppo 	}
22881ae08745Sheppo 
22891ae08745Sheppo 
2290d10e4ef2Snarayan 	/* Initialize for valid message and mapped dring */
22911ae08745Sheppo 	PR1("descriptor size = %u, dring length = %u",
22921ae08745Sheppo 	    vd->descriptor_size, vd->dring_len);
22931ae08745Sheppo 	vd->initialized |= VD_DRING;
22941ae08745Sheppo 	vd->dring_ident = 1;	/* "There Can Be Only One" */
22951ae08745Sheppo 	vd->dring = dring_minfo.vaddr;
22961ae08745Sheppo 	vd->descriptor_size = reg_msg->descriptor_size;
22971ae08745Sheppo 	vd->dring_len = reg_msg->num_descriptors;
22981ae08745Sheppo 	reg_msg->dring_ident = vd->dring_ident;
2299d10e4ef2Snarayan 
2300d10e4ef2Snarayan 	/*
2301d10e4ef2Snarayan 	 * Allocate and initialize a "shadow" array of data structures for
2302d10e4ef2Snarayan 	 * tasks to process I/O requests in dring elements
2303d10e4ef2Snarayan 	 */
2304d10e4ef2Snarayan 	vd->dring_task =
2305d10e4ef2Snarayan 	    kmem_zalloc((sizeof (*vd->dring_task)) * vd->dring_len, KM_SLEEP);
2306d10e4ef2Snarayan 	for (int i = 0; i < vd->dring_len; i++) {
2307d10e4ef2Snarayan 		vd->dring_task[i].vd		= vd;
2308d10e4ef2Snarayan 		vd->dring_task[i].index		= i;
2309d10e4ef2Snarayan 		vd->dring_task[i].request	= &VD_DRING_ELEM(i)->payload;
23104bac2208Snarayan 
23114bac2208Snarayan 		status = ldc_mem_alloc_handle(vd->ldc_handle,
23124bac2208Snarayan 		    &(vd->dring_task[i].mhdl));
23134bac2208Snarayan 		if (status) {
23143af08d82Slm66018 			PR0("ldc_mem_alloc_handle() returned err %d ", status);
23154bac2208Snarayan 			return (ENXIO);
23164bac2208Snarayan 		}
23173af08d82Slm66018 
23183af08d82Slm66018 		vd->dring_task[i].msg = kmem_alloc(vd->max_msglen, KM_SLEEP);
2319d10e4ef2Snarayan 	}
2320d10e4ef2Snarayan 
23211ae08745Sheppo 	return (0);
23221ae08745Sheppo }
23231ae08745Sheppo 
23241ae08745Sheppo static int
23251ae08745Sheppo vd_process_dring_unreg_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
23261ae08745Sheppo {
23271ae08745Sheppo 	vio_dring_unreg_msg_t	*unreg_msg = (vio_dring_unreg_msg_t *)msg;
23281ae08745Sheppo 
23291ae08745Sheppo 
23301ae08745Sheppo 	ASSERT(msglen >= sizeof (msg->tag));
23311ae08745Sheppo 
23321ae08745Sheppo 	if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO,
23331ae08745Sheppo 	    VIO_DRING_UNREG)) {
2334d10e4ef2Snarayan 		PR0("Message is not an unregister-dring message");
2335d10e4ef2Snarayan 		return (ENOMSG);
23361ae08745Sheppo 	}
23371ae08745Sheppo 
23381ae08745Sheppo 	if (msglen != sizeof (*unreg_msg)) {
23393af08d82Slm66018 		PR0("Expected %lu-byte unregister-dring message; "
23401ae08745Sheppo 		    "received %lu bytes", sizeof (*unreg_msg), msglen);
23411ae08745Sheppo 		return (EBADMSG);
23421ae08745Sheppo 	}
23431ae08745Sheppo 
23441ae08745Sheppo 	if (unreg_msg->dring_ident != vd->dring_ident) {
23453af08d82Slm66018 		PR0("Expected dring ident %lu; received %lu",
23461ae08745Sheppo 		    vd->dring_ident, unreg_msg->dring_ident);
23471ae08745Sheppo 		return (EBADMSG);
23481ae08745Sheppo 	}
23491ae08745Sheppo 
23501ae08745Sheppo 	return (0);
23511ae08745Sheppo }
23521ae08745Sheppo 
23531ae08745Sheppo static int
23541ae08745Sheppo process_rdx_msg(vio_msg_t *msg, size_t msglen)
23551ae08745Sheppo {
23561ae08745Sheppo 	ASSERT(msglen >= sizeof (msg->tag));
23571ae08745Sheppo 
2358d10e4ef2Snarayan 	if (!vd_msgtype(&msg->tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX)) {
2359d10e4ef2Snarayan 		PR0("Message is not an RDX message");
2360d10e4ef2Snarayan 		return (ENOMSG);
2361d10e4ef2Snarayan 	}
23621ae08745Sheppo 
23631ae08745Sheppo 	if (msglen != sizeof (vio_rdx_msg_t)) {
23643af08d82Slm66018 		PR0("Expected %lu-byte RDX message; received %lu bytes",
23651ae08745Sheppo 		    sizeof (vio_rdx_msg_t), msglen);
23661ae08745Sheppo 		return (EBADMSG);
23671ae08745Sheppo 	}
23681ae08745Sheppo 
2369d10e4ef2Snarayan 	PR0("Valid RDX message");
23701ae08745Sheppo 	return (0);
23711ae08745Sheppo }
23721ae08745Sheppo 
23731ae08745Sheppo static int
23741ae08745Sheppo vd_check_seq_num(vd_t *vd, uint64_t seq_num)
23751ae08745Sheppo {
23761ae08745Sheppo 	if ((vd->initialized & VD_SEQ_NUM) && (seq_num != vd->seq_num + 1)) {
23773af08d82Slm66018 		PR0("Received seq_num %lu; expected %lu",
23781ae08745Sheppo 		    seq_num, (vd->seq_num + 1));
23793af08d82Slm66018 		PR0("initiating soft reset");
2380d10e4ef2Snarayan 		vd_need_reset(vd, B_FALSE);
23811ae08745Sheppo 		return (1);
23821ae08745Sheppo 	}
23831ae08745Sheppo 
23841ae08745Sheppo 	vd->seq_num = seq_num;
23851ae08745Sheppo 	vd->initialized |= VD_SEQ_NUM;	/* superfluous after first time... */
23861ae08745Sheppo 	return (0);
23871ae08745Sheppo }
23881ae08745Sheppo 
23891ae08745Sheppo /*
23901ae08745Sheppo  * Return the expected size of an inband-descriptor message with all the
23911ae08745Sheppo  * cookies it claims to include
23921ae08745Sheppo  */
23931ae08745Sheppo static size_t
23941ae08745Sheppo expected_inband_size(vd_dring_inband_msg_t *msg)
23951ae08745Sheppo {
23961ae08745Sheppo 	return ((sizeof (*msg)) +
23971ae08745Sheppo 	    (msg->payload.ncookies - 1)*(sizeof (msg->payload.cookie[0])));
23981ae08745Sheppo }
23991ae08745Sheppo 
24001ae08745Sheppo /*
24011ae08745Sheppo  * Process an in-band descriptor message:  used with clients like OBP, with
24021ae08745Sheppo  * which vds exchanges descriptors within VIO message payloads, rather than
24031ae08745Sheppo  * operating on them within a descriptor ring
24041ae08745Sheppo  */
24051ae08745Sheppo static int
24063af08d82Slm66018 vd_process_desc_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
24071ae08745Sheppo {
24081ae08745Sheppo 	size_t			expected;
24091ae08745Sheppo 	vd_dring_inband_msg_t	*desc_msg = (vd_dring_inband_msg_t *)msg;
24101ae08745Sheppo 
24111ae08745Sheppo 
24121ae08745Sheppo 	ASSERT(msglen >= sizeof (msg->tag));
24131ae08745Sheppo 
24141ae08745Sheppo 	if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO,
2415d10e4ef2Snarayan 	    VIO_DESC_DATA)) {
2416d10e4ef2Snarayan 		PR1("Message is not an in-band-descriptor message");
2417d10e4ef2Snarayan 		return (ENOMSG);
2418d10e4ef2Snarayan 	}
24191ae08745Sheppo 
24201ae08745Sheppo 	if (msglen < sizeof (*desc_msg)) {
24213af08d82Slm66018 		PR0("Expected at least %lu-byte descriptor message; "
24221ae08745Sheppo 		    "received %lu bytes", sizeof (*desc_msg), msglen);
24231ae08745Sheppo 		return (EBADMSG);
24241ae08745Sheppo 	}
24251ae08745Sheppo 
24261ae08745Sheppo 	if (msglen != (expected = expected_inband_size(desc_msg))) {
24273af08d82Slm66018 		PR0("Expected %lu-byte descriptor message; "
24281ae08745Sheppo 		    "received %lu bytes", expected, msglen);
24291ae08745Sheppo 		return (EBADMSG);
24301ae08745Sheppo 	}
24311ae08745Sheppo 
2432d10e4ef2Snarayan 	if (vd_check_seq_num(vd, desc_msg->hdr.seq_num) != 0)
24331ae08745Sheppo 		return (EBADMSG);
24341ae08745Sheppo 
2435d10e4ef2Snarayan 	/*
2436d10e4ef2Snarayan 	 * Valid message:  Set up the in-band descriptor task and process the
2437d10e4ef2Snarayan 	 * request.  Arrange to acknowledge the client's message, unless an
2438d10e4ef2Snarayan 	 * error processing the descriptor task results in setting
2439d10e4ef2Snarayan 	 * VIO_SUBTYPE_NACK
2440d10e4ef2Snarayan 	 */
2441d10e4ef2Snarayan 	PR1("Valid in-band-descriptor message");
2442d10e4ef2Snarayan 	msg->tag.vio_subtype = VIO_SUBTYPE_ACK;
24433af08d82Slm66018 
24443af08d82Slm66018 	ASSERT(vd->inband_task.msg != NULL);
24453af08d82Slm66018 
24463af08d82Slm66018 	bcopy(msg, vd->inband_task.msg, msglen);
2447d10e4ef2Snarayan 	vd->inband_task.msglen	= msglen;
24483af08d82Slm66018 
24493af08d82Slm66018 	/*
24503af08d82Slm66018 	 * The task request is now the payload of the message
24513af08d82Slm66018 	 * that was just copied into the body of the task.
24523af08d82Slm66018 	 */
24533af08d82Slm66018 	desc_msg = (vd_dring_inband_msg_t *)vd->inband_task.msg;
2454d10e4ef2Snarayan 	vd->inband_task.request	= &desc_msg->payload;
24553af08d82Slm66018 
2456d10e4ef2Snarayan 	return (vd_process_task(&vd->inband_task));
24571ae08745Sheppo }
24581ae08745Sheppo 
24591ae08745Sheppo static int
2460d10e4ef2Snarayan vd_process_element(vd_t *vd, vd_task_type_t type, uint32_t idx,
24613af08d82Slm66018     vio_msg_t *msg, size_t msglen)
24621ae08745Sheppo {
24631ae08745Sheppo 	int			status;
2464d10e4ef2Snarayan 	boolean_t		ready;
2465d10e4ef2Snarayan 	vd_dring_entry_t	*elem = VD_DRING_ELEM(idx);
24661ae08745Sheppo 
24671ae08745Sheppo 
2468d10e4ef2Snarayan 	/* Accept the updated dring element */
2469d10e4ef2Snarayan 	if ((status = ldc_mem_dring_acquire(vd->dring_handle, idx, idx)) != 0) {
24703af08d82Slm66018 		PR0("ldc_mem_dring_acquire() returned errno %d", status);
24711ae08745Sheppo 		return (status);
24721ae08745Sheppo 	}
2473d10e4ef2Snarayan 	ready = (elem->hdr.dstate == VIO_DESC_READY);
2474d10e4ef2Snarayan 	if (ready) {
2475d10e4ef2Snarayan 		elem->hdr.dstate = VIO_DESC_ACCEPTED;
2476d10e4ef2Snarayan 	} else {
24773af08d82Slm66018 		PR0("descriptor %u not ready", idx);
2478d10e4ef2Snarayan 		VD_DUMP_DRING_ELEM(elem);
2479d10e4ef2Snarayan 	}
2480d10e4ef2Snarayan 	if ((status = ldc_mem_dring_release(vd->dring_handle, idx, idx)) != 0) {
24813af08d82Slm66018 		PR0("ldc_mem_dring_release() returned errno %d", status);
24821ae08745Sheppo 		return (status);
24831ae08745Sheppo 	}
2484d10e4ef2Snarayan 	if (!ready)
2485d10e4ef2Snarayan 		return (EBUSY);
24861ae08745Sheppo 
24871ae08745Sheppo 
2488d10e4ef2Snarayan 	/* Initialize a task and process the accepted element */
2489d10e4ef2Snarayan 	PR1("Processing dring element %u", idx);
2490d10e4ef2Snarayan 	vd->dring_task[idx].type	= type;
24913af08d82Slm66018 
24923af08d82Slm66018 	/* duplicate msg buf for cookies etc. */
24933af08d82Slm66018 	bcopy(msg, vd->dring_task[idx].msg, msglen);
24943af08d82Slm66018 
2495d10e4ef2Snarayan 	vd->dring_task[idx].msglen	= msglen;
2496d10e4ef2Snarayan 	if ((status = vd_process_task(&vd->dring_task[idx])) != EINPROGRESS)
24973c96341aSnarayan 		status = vd_mark_elem_done(vd, idx,
24983c96341aSnarayan 		    vd->dring_task[idx].request->status,
24993c96341aSnarayan 		    vd->dring_task[idx].request->nbytes);
25001ae08745Sheppo 
25011ae08745Sheppo 	return (status);
25021ae08745Sheppo }
25031ae08745Sheppo 
25041ae08745Sheppo static int
2505d10e4ef2Snarayan vd_process_element_range(vd_t *vd, int start, int end,
25063af08d82Slm66018     vio_msg_t *msg, size_t msglen)
2507d10e4ef2Snarayan {
2508d10e4ef2Snarayan 	int		i, n, nelem, status = 0;
2509d10e4ef2Snarayan 	boolean_t	inprogress = B_FALSE;
2510d10e4ef2Snarayan 	vd_task_type_t	type;
2511d10e4ef2Snarayan 
2512d10e4ef2Snarayan 
2513d10e4ef2Snarayan 	ASSERT(start >= 0);
2514d10e4ef2Snarayan 	ASSERT(end >= 0);
2515d10e4ef2Snarayan 
2516d10e4ef2Snarayan 	/*
2517d10e4ef2Snarayan 	 * Arrange to acknowledge the client's message, unless an error
2518d10e4ef2Snarayan 	 * processing one of the dring elements results in setting
2519d10e4ef2Snarayan 	 * VIO_SUBTYPE_NACK
2520d10e4ef2Snarayan 	 */
2521d10e4ef2Snarayan 	msg->tag.vio_subtype = VIO_SUBTYPE_ACK;
2522d10e4ef2Snarayan 
2523d10e4ef2Snarayan 	/*
2524d10e4ef2Snarayan 	 * Process the dring elements in the range
2525d10e4ef2Snarayan 	 */
2526d10e4ef2Snarayan 	nelem = ((end < start) ? end + vd->dring_len : end) - start + 1;
2527d10e4ef2Snarayan 	for (i = start, n = nelem; n > 0; i = (i + 1) % vd->dring_len, n--) {
2528d10e4ef2Snarayan 		((vio_dring_msg_t *)msg)->end_idx = i;
2529d10e4ef2Snarayan 		type = (n == 1) ? VD_FINAL_RANGE_TASK : VD_NONFINAL_RANGE_TASK;
25303af08d82Slm66018 		status = vd_process_element(vd, type, i, msg, msglen);
2531d10e4ef2Snarayan 		if (status == EINPROGRESS)
2532d10e4ef2Snarayan 			inprogress = B_TRUE;
2533d10e4ef2Snarayan 		else if (status != 0)
2534d10e4ef2Snarayan 			break;
2535d10e4ef2Snarayan 	}
2536d10e4ef2Snarayan 
2537d10e4ef2Snarayan 	/*
2538d10e4ef2Snarayan 	 * If some, but not all, operations of a multi-element range are in
2539d10e4ef2Snarayan 	 * progress, wait for other operations to complete before returning
2540d10e4ef2Snarayan 	 * (which will result in "ack" or "nack" of the message).  Note that
2541d10e4ef2Snarayan 	 * all outstanding operations will need to complete, not just the ones
2542d10e4ef2Snarayan 	 * corresponding to the current range of dring elements; howevever, as
2543d10e4ef2Snarayan 	 * this situation is an error case, performance is less critical.
2544d10e4ef2Snarayan 	 */
2545d10e4ef2Snarayan 	if ((nelem > 1) && (status != EINPROGRESS) && inprogress)
2546d10e4ef2Snarayan 		ddi_taskq_wait(vd->completionq);
2547d10e4ef2Snarayan 
2548d10e4ef2Snarayan 	return (status);
2549d10e4ef2Snarayan }
2550d10e4ef2Snarayan 
2551d10e4ef2Snarayan static int
25523af08d82Slm66018 vd_process_dring_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
25531ae08745Sheppo {
25541ae08745Sheppo 	vio_dring_msg_t	*dring_msg = (vio_dring_msg_t *)msg;
25551ae08745Sheppo 
25561ae08745Sheppo 
25571ae08745Sheppo 	ASSERT(msglen >= sizeof (msg->tag));
25581ae08745Sheppo 
25591ae08745Sheppo 	if (!vd_msgtype(&msg->tag, VIO_TYPE_DATA, VIO_SUBTYPE_INFO,
25601ae08745Sheppo 	    VIO_DRING_DATA)) {
2561d10e4ef2Snarayan 		PR1("Message is not a dring-data message");
2562d10e4ef2Snarayan 		return (ENOMSG);
25631ae08745Sheppo 	}
25641ae08745Sheppo 
25651ae08745Sheppo 	if (msglen != sizeof (*dring_msg)) {
25663af08d82Slm66018 		PR0("Expected %lu-byte dring message; received %lu bytes",
25671ae08745Sheppo 		    sizeof (*dring_msg), msglen);
25681ae08745Sheppo 		return (EBADMSG);
25691ae08745Sheppo 	}
25701ae08745Sheppo 
2571d10e4ef2Snarayan 	if (vd_check_seq_num(vd, dring_msg->seq_num) != 0)
25721ae08745Sheppo 		return (EBADMSG);
25731ae08745Sheppo 
25741ae08745Sheppo 	if (dring_msg->dring_ident != vd->dring_ident) {
25753af08d82Slm66018 		PR0("Expected dring ident %lu; received ident %lu",
25761ae08745Sheppo 		    vd->dring_ident, dring_msg->dring_ident);
25771ae08745Sheppo 		return (EBADMSG);
25781ae08745Sheppo 	}
25791ae08745Sheppo 
2580d10e4ef2Snarayan 	if (dring_msg->start_idx >= vd->dring_len) {
25813af08d82Slm66018 		PR0("\"start_idx\" = %u; must be less than %u",
2582d10e4ef2Snarayan 		    dring_msg->start_idx, vd->dring_len);
2583d10e4ef2Snarayan 		return (EBADMSG);
2584d10e4ef2Snarayan 	}
25851ae08745Sheppo 
2586d10e4ef2Snarayan 	if ((dring_msg->end_idx < 0) ||
2587d10e4ef2Snarayan 	    (dring_msg->end_idx >= vd->dring_len)) {
25883af08d82Slm66018 		PR0("\"end_idx\" = %u; must be >= 0 and less than %u",
2589d10e4ef2Snarayan 		    dring_msg->end_idx, vd->dring_len);
2590d10e4ef2Snarayan 		return (EBADMSG);
2591d10e4ef2Snarayan 	}
2592d10e4ef2Snarayan 
2593d10e4ef2Snarayan 	/* Valid message; process range of updated dring elements */
2594d10e4ef2Snarayan 	PR1("Processing descriptor range, start = %u, end = %u",
2595d10e4ef2Snarayan 	    dring_msg->start_idx, dring_msg->end_idx);
2596d10e4ef2Snarayan 	return (vd_process_element_range(vd, dring_msg->start_idx,
25973af08d82Slm66018 	    dring_msg->end_idx, msg, msglen));
25981ae08745Sheppo }
25991ae08745Sheppo 
26001ae08745Sheppo static int
26011ae08745Sheppo recv_msg(ldc_handle_t ldc_handle, void *msg, size_t *nbytes)
26021ae08745Sheppo {
26031ae08745Sheppo 	int	retry, status;
26041ae08745Sheppo 	size_t	size = *nbytes;
26051ae08745Sheppo 
26061ae08745Sheppo 
26071ae08745Sheppo 	for (retry = 0, status = ETIMEDOUT;
26081ae08745Sheppo 	    retry < vds_ldc_retries && status == ETIMEDOUT;
26091ae08745Sheppo 	    retry++) {
26101ae08745Sheppo 		PR1("ldc_read() attempt %d", (retry + 1));
26111ae08745Sheppo 		*nbytes = size;
26121ae08745Sheppo 		status = ldc_read(ldc_handle, msg, nbytes);
26131ae08745Sheppo 	}
26141ae08745Sheppo 
26153af08d82Slm66018 	if (status) {
26163af08d82Slm66018 		PR0("ldc_read() returned errno %d", status);
26173af08d82Slm66018 		if (status != ECONNRESET)
26183af08d82Slm66018 			return (ENOMSG);
26191ae08745Sheppo 		return (status);
26201ae08745Sheppo 	} else if (*nbytes == 0) {
26211ae08745Sheppo 		PR1("ldc_read() returned 0 and no message read");
26221ae08745Sheppo 		return (ENOMSG);
26231ae08745Sheppo 	}
26241ae08745Sheppo 
26251ae08745Sheppo 	PR1("RCVD %lu-byte message", *nbytes);
26261ae08745Sheppo 	return (0);
26271ae08745Sheppo }
26281ae08745Sheppo 
26291ae08745Sheppo static int
26303af08d82Slm66018 vd_do_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
26311ae08745Sheppo {
26321ae08745Sheppo 	int		status;
26331ae08745Sheppo 
26341ae08745Sheppo 
26351ae08745Sheppo 	PR1("Processing (%x/%x/%x) message", msg->tag.vio_msgtype,
26361ae08745Sheppo 	    msg->tag.vio_subtype, msg->tag.vio_subtype_env);
26373af08d82Slm66018 #ifdef	DEBUG
26383af08d82Slm66018 	vd_decode_tag(msg);
26393af08d82Slm66018 #endif
26401ae08745Sheppo 
26411ae08745Sheppo 	/*
26421ae08745Sheppo 	 * Validate session ID up front, since it applies to all messages
26431ae08745Sheppo 	 * once set
26441ae08745Sheppo 	 */
26451ae08745Sheppo 	if ((msg->tag.vio_sid != vd->sid) && (vd->initialized & VD_SID)) {
26463af08d82Slm66018 		PR0("Expected SID %u, received %u", vd->sid,
26471ae08745Sheppo 		    msg->tag.vio_sid);
26481ae08745Sheppo 		return (EBADMSG);
26491ae08745Sheppo 	}
26501ae08745Sheppo 
26513af08d82Slm66018 	PR1("\tWhile in state %d (%s)", vd->state, vd_decode_state(vd->state));
26521ae08745Sheppo 
26531ae08745Sheppo 	/*
26541ae08745Sheppo 	 * Process the received message based on connection state
26551ae08745Sheppo 	 */
26561ae08745Sheppo 	switch (vd->state) {
26571ae08745Sheppo 	case VD_STATE_INIT:	/* expect version message */
26580a55fbb7Slm66018 		if ((status = vd_process_ver_msg(vd, msg, msglen)) != 0)
26591ae08745Sheppo 			return (status);
26601ae08745Sheppo 
26611ae08745Sheppo 		/* Version negotiated, move to that state */
26621ae08745Sheppo 		vd->state = VD_STATE_VER;
26631ae08745Sheppo 		return (0);
26641ae08745Sheppo 
26651ae08745Sheppo 	case VD_STATE_VER:	/* expect attribute message */
26661ae08745Sheppo 		if ((status = vd_process_attr_msg(vd, msg, msglen)) != 0)
26671ae08745Sheppo 			return (status);
26681ae08745Sheppo 
26691ae08745Sheppo 		/* Attributes exchanged, move to that state */
26701ae08745Sheppo 		vd->state = VD_STATE_ATTR;
26711ae08745Sheppo 		return (0);
26721ae08745Sheppo 
26731ae08745Sheppo 	case VD_STATE_ATTR:
26741ae08745Sheppo 		switch (vd->xfer_mode) {
26751ae08745Sheppo 		case VIO_DESC_MODE:	/* expect RDX message */
26761ae08745Sheppo 			if ((status = process_rdx_msg(msg, msglen)) != 0)
26771ae08745Sheppo 				return (status);
26781ae08745Sheppo 
26791ae08745Sheppo 			/* Ready to receive in-band descriptors */
26801ae08745Sheppo 			vd->state = VD_STATE_DATA;
26811ae08745Sheppo 			return (0);
26821ae08745Sheppo 
26831ae08745Sheppo 		case VIO_DRING_MODE:	/* expect register-dring message */
26841ae08745Sheppo 			if ((status =
26851ae08745Sheppo 			    vd_process_dring_reg_msg(vd, msg, msglen)) != 0)
26861ae08745Sheppo 				return (status);
26871ae08745Sheppo 
26881ae08745Sheppo 			/* One dring negotiated, move to that state */
26891ae08745Sheppo 			vd->state = VD_STATE_DRING;
26901ae08745Sheppo 			return (0);
26911ae08745Sheppo 
26921ae08745Sheppo 		default:
26931ae08745Sheppo 			ASSERT("Unsupported transfer mode");
26943af08d82Slm66018 			PR0("Unsupported transfer mode");
26951ae08745Sheppo 			return (ENOTSUP);
26961ae08745Sheppo 		}
26971ae08745Sheppo 
26981ae08745Sheppo 	case VD_STATE_DRING:	/* expect RDX, register-dring, or unreg-dring */
26991ae08745Sheppo 		if ((status = process_rdx_msg(msg, msglen)) == 0) {
27001ae08745Sheppo 			/* Ready to receive data */
27011ae08745Sheppo 			vd->state = VD_STATE_DATA;
27021ae08745Sheppo 			return (0);
27031ae08745Sheppo 		} else if (status != ENOMSG) {
27041ae08745Sheppo 			return (status);
27051ae08745Sheppo 		}
27061ae08745Sheppo 
27071ae08745Sheppo 
27081ae08745Sheppo 		/*
27091ae08745Sheppo 		 * If another register-dring message is received, stay in
27101ae08745Sheppo 		 * dring state in case the client sends RDX; although the
27111ae08745Sheppo 		 * protocol allows multiple drings, this server does not
27121ae08745Sheppo 		 * support using more than one
27131ae08745Sheppo 		 */
27141ae08745Sheppo 		if ((status =
27151ae08745Sheppo 		    vd_process_dring_reg_msg(vd, msg, msglen)) != ENOMSG)
27161ae08745Sheppo 			return (status);
27171ae08745Sheppo 
27181ae08745Sheppo 		/*
27191ae08745Sheppo 		 * Acknowledge an unregister-dring message, but reset the
27201ae08745Sheppo 		 * connection anyway:  Although the protocol allows
27211ae08745Sheppo 		 * unregistering drings, this server cannot serve a vdisk
27221ae08745Sheppo 		 * without its only dring
27231ae08745Sheppo 		 */
27241ae08745Sheppo 		status = vd_process_dring_unreg_msg(vd, msg, msglen);
27251ae08745Sheppo 		return ((status == 0) ? ENOTSUP : status);
27261ae08745Sheppo 
27271ae08745Sheppo 	case VD_STATE_DATA:
27281ae08745Sheppo 		switch (vd->xfer_mode) {
27291ae08745Sheppo 		case VIO_DESC_MODE:	/* expect in-band-descriptor message */
27303af08d82Slm66018 			return (vd_process_desc_msg(vd, msg, msglen));
27311ae08745Sheppo 
27321ae08745Sheppo 		case VIO_DRING_MODE:	/* expect dring-data or unreg-dring */
27331ae08745Sheppo 			/*
27341ae08745Sheppo 			 * Typically expect dring-data messages, so handle
27351ae08745Sheppo 			 * them first
27361ae08745Sheppo 			 */
27371ae08745Sheppo 			if ((status = vd_process_dring_msg(vd, msg,
27383af08d82Slm66018 			    msglen)) != ENOMSG)
27391ae08745Sheppo 				return (status);
27401ae08745Sheppo 
27411ae08745Sheppo 			/*
27421ae08745Sheppo 			 * Acknowledge an unregister-dring message, but reset
27431ae08745Sheppo 			 * the connection anyway:  Although the protocol
27441ae08745Sheppo 			 * allows unregistering drings, this server cannot
27451ae08745Sheppo 			 * serve a vdisk without its only dring
27461ae08745Sheppo 			 */
27471ae08745Sheppo 			status = vd_process_dring_unreg_msg(vd, msg, msglen);
27481ae08745Sheppo 			return ((status == 0) ? ENOTSUP : status);
27491ae08745Sheppo 
27501ae08745Sheppo 		default:
27511ae08745Sheppo 			ASSERT("Unsupported transfer mode");
27523af08d82Slm66018 			PR0("Unsupported transfer mode");
27531ae08745Sheppo 			return (ENOTSUP);
27541ae08745Sheppo 		}
27551ae08745Sheppo 
27561ae08745Sheppo 	default:
27571ae08745Sheppo 		ASSERT("Invalid client connection state");
27583af08d82Slm66018 		PR0("Invalid client connection state");
27591ae08745Sheppo 		return (ENOTSUP);
27601ae08745Sheppo 	}
27611ae08745Sheppo }
27621ae08745Sheppo 
2763d10e4ef2Snarayan static int
27643af08d82Slm66018 vd_process_msg(vd_t *vd, vio_msg_t *msg, size_t msglen)
27651ae08745Sheppo {
27661ae08745Sheppo 	int		status;
27671ae08745Sheppo 	boolean_t	reset_ldc = B_FALSE;
27681ae08745Sheppo 
27691ae08745Sheppo 
27701ae08745Sheppo 	/*
27711ae08745Sheppo 	 * Check that the message is at least big enough for a "tag", so that
27721ae08745Sheppo 	 * message processing can proceed based on tag-specified message type
27731ae08745Sheppo 	 */
27741ae08745Sheppo 	if (msglen < sizeof (vio_msg_tag_t)) {
27753af08d82Slm66018 		PR0("Received short (%lu-byte) message", msglen);
27761ae08745Sheppo 		/* Can't "nack" short message, so drop the big hammer */
27773af08d82Slm66018 		PR0("initiating full reset");
2778d10e4ef2Snarayan 		vd_need_reset(vd, B_TRUE);
2779d10e4ef2Snarayan 		return (EBADMSG);
27801ae08745Sheppo 	}
27811ae08745Sheppo 
27821ae08745Sheppo 	/*
27831ae08745Sheppo 	 * Process the message
27841ae08745Sheppo 	 */
27853af08d82Slm66018 	switch (status = vd_do_process_msg(vd, msg, msglen)) {
27861ae08745Sheppo 	case 0:
27871ae08745Sheppo 		/* "ack" valid, successfully-processed messages */
27881ae08745Sheppo 		msg->tag.vio_subtype = VIO_SUBTYPE_ACK;
27891ae08745Sheppo 		break;
27901ae08745Sheppo 
2791d10e4ef2Snarayan 	case EINPROGRESS:
2792d10e4ef2Snarayan 		/* The completion handler will "ack" or "nack" the message */
2793d10e4ef2Snarayan 		return (EINPROGRESS);
27941ae08745Sheppo 	case ENOMSG:
27953af08d82Slm66018 		PR0("Received unexpected message");
27961ae08745Sheppo 		_NOTE(FALLTHROUGH);
27971ae08745Sheppo 	case EBADMSG:
27981ae08745Sheppo 	case ENOTSUP:
27991ae08745Sheppo 		/* "nack" invalid messages */
28001ae08745Sheppo 		msg->tag.vio_subtype = VIO_SUBTYPE_NACK;
28011ae08745Sheppo 		break;
28021ae08745Sheppo 
28031ae08745Sheppo 	default:
28041ae08745Sheppo 		/* "nack" failed messages */
28051ae08745Sheppo 		msg->tag.vio_subtype = VIO_SUBTYPE_NACK;
28061ae08745Sheppo 		/* An LDC error probably occurred, so try resetting it */
28071ae08745Sheppo 		reset_ldc = B_TRUE;
28081ae08745Sheppo 		break;
28091ae08745Sheppo 	}
28101ae08745Sheppo 
28113af08d82Slm66018 	PR1("\tResulting in state %d (%s)", vd->state,
28123af08d82Slm66018 	    vd_decode_state(vd->state));
28133af08d82Slm66018 
2814d10e4ef2Snarayan 	/* Send the "ack" or "nack" to the client */
28151ae08745Sheppo 	PR1("Sending %s",
28161ae08745Sheppo 	    (msg->tag.vio_subtype == VIO_SUBTYPE_ACK) ? "ACK" : "NACK");
28171ae08745Sheppo 	if (send_msg(vd->ldc_handle, msg, msglen) != 0)
28181ae08745Sheppo 		reset_ldc = B_TRUE;
28191ae08745Sheppo 
2820d10e4ef2Snarayan 	/* Arrange to reset the connection for nack'ed or failed messages */
28213af08d82Slm66018 	if ((status != 0) || reset_ldc) {
28223af08d82Slm66018 		PR0("initiating %s reset",
28233af08d82Slm66018 		    (reset_ldc) ? "full" : "soft");
2824d10e4ef2Snarayan 		vd_need_reset(vd, reset_ldc);
28253af08d82Slm66018 	}
2826d10e4ef2Snarayan 
2827d10e4ef2Snarayan 	return (status);
2828d10e4ef2Snarayan }
2829d10e4ef2Snarayan 
2830d10e4ef2Snarayan static boolean_t
2831d10e4ef2Snarayan vd_enabled(vd_t *vd)
2832d10e4ef2Snarayan {
2833d10e4ef2Snarayan 	boolean_t	enabled;
2834d10e4ef2Snarayan 
2835d10e4ef2Snarayan 
2836d10e4ef2Snarayan 	mutex_enter(&vd->lock);
2837d10e4ef2Snarayan 	enabled = vd->enabled;
2838d10e4ef2Snarayan 	mutex_exit(&vd->lock);
2839d10e4ef2Snarayan 	return (enabled);
28401ae08745Sheppo }
28411ae08745Sheppo 
28421ae08745Sheppo static void
28430a55fbb7Slm66018 vd_recv_msg(void *arg)
28441ae08745Sheppo {
28451ae08745Sheppo 	vd_t	*vd = (vd_t *)arg;
28463af08d82Slm66018 	int	rv = 0, status = 0;
28471ae08745Sheppo 
28481ae08745Sheppo 	ASSERT(vd != NULL);
28493af08d82Slm66018 
2850d10e4ef2Snarayan 	PR2("New task to receive incoming message(s)");
28513af08d82Slm66018 
28523af08d82Slm66018 
2853d10e4ef2Snarayan 	while (vd_enabled(vd) && status == 0) {
2854d10e4ef2Snarayan 		size_t		msglen, msgsize;
28553af08d82Slm66018 		ldc_status_t	lstatus;
2856d10e4ef2Snarayan 
28570a55fbb7Slm66018 		/*
2858d10e4ef2Snarayan 		 * Receive and process a message
28590a55fbb7Slm66018 		 */
2860d10e4ef2Snarayan 		vd_reset_if_needed(vd);	/* can change vd->max_msglen */
28613af08d82Slm66018 
28623af08d82Slm66018 		/*
28633af08d82Slm66018 		 * check if channel is UP - else break out of loop
28643af08d82Slm66018 		 */
28653af08d82Slm66018 		status = ldc_status(vd->ldc_handle, &lstatus);
28663af08d82Slm66018 		if (lstatus != LDC_UP) {
28673af08d82Slm66018 			PR0("channel not up (status=%d), exiting recv loop\n",
28683af08d82Slm66018 			    lstatus);
28693af08d82Slm66018 			break;
28703af08d82Slm66018 		}
28713af08d82Slm66018 
28723af08d82Slm66018 		ASSERT(vd->max_msglen != 0);
28733af08d82Slm66018 
2874d10e4ef2Snarayan 		msgsize = vd->max_msglen; /* stable copy for alloc/free */
28753af08d82Slm66018 		msglen	= msgsize;	  /* actual len after recv_msg() */
28763af08d82Slm66018 
28773af08d82Slm66018 		status = recv_msg(vd->ldc_handle, vd->vio_msgp, &msglen);
28783af08d82Slm66018 		switch (status) {
28793af08d82Slm66018 		case 0:
28803af08d82Slm66018 			rv = vd_process_msg(vd, (vio_msg_t *)vd->vio_msgp,
28813af08d82Slm66018 			    msglen);
28823af08d82Slm66018 			/* check if max_msglen changed */
28833af08d82Slm66018 			if (msgsize != vd->max_msglen) {
28843af08d82Slm66018 				PR0("max_msglen changed 0x%lx to 0x%lx bytes\n",
28853af08d82Slm66018 				    msgsize, vd->max_msglen);
28863af08d82Slm66018 				kmem_free(vd->vio_msgp, msgsize);
28873af08d82Slm66018 				vd->vio_msgp =
28883af08d82Slm66018 				    kmem_alloc(vd->max_msglen, KM_SLEEP);
28893af08d82Slm66018 			}
28903af08d82Slm66018 			if (rv == EINPROGRESS)
28913af08d82Slm66018 				continue;
28923af08d82Slm66018 			break;
28933af08d82Slm66018 
28943af08d82Slm66018 		case ENOMSG:
28953af08d82Slm66018 			break;
28963af08d82Slm66018 
28973af08d82Slm66018 		case ECONNRESET:
28983af08d82Slm66018 			PR0("initiating soft reset (ECONNRESET)\n");
28993af08d82Slm66018 			vd_need_reset(vd, B_FALSE);
29003af08d82Slm66018 			status = 0;
29013af08d82Slm66018 			break;
29023af08d82Slm66018 
29033af08d82Slm66018 		default:
2904d10e4ef2Snarayan 			/* Probably an LDC failure; arrange to reset it */
29053af08d82Slm66018 			PR0("initiating full reset (status=0x%x)", status);
2906d10e4ef2Snarayan 			vd_need_reset(vd, B_TRUE);
29073af08d82Slm66018 			break;
29080a55fbb7Slm66018 		}
29091ae08745Sheppo 	}
29103af08d82Slm66018 
2911d10e4ef2Snarayan 	PR2("Task finished");
29120a55fbb7Slm66018 }
29130a55fbb7Slm66018 
29140a55fbb7Slm66018 static uint_t
29151ae08745Sheppo vd_handle_ldc_events(uint64_t event, caddr_t arg)
29161ae08745Sheppo {
29171ae08745Sheppo 	vd_t	*vd = (vd_t *)(void *)arg;
29183af08d82Slm66018 	int	status;
29191ae08745Sheppo 
29201ae08745Sheppo 	ASSERT(vd != NULL);
2921d10e4ef2Snarayan 
2922d10e4ef2Snarayan 	if (!vd_enabled(vd))
2923d10e4ef2Snarayan 		return (LDC_SUCCESS);
2924d10e4ef2Snarayan 
29253af08d82Slm66018 	if (event & LDC_EVT_DOWN) {
292634683adeSsg70180 		PR0("LDC_EVT_DOWN: LDC channel went down");
29273af08d82Slm66018 
29283af08d82Slm66018 		vd_need_reset(vd, B_TRUE);
29293af08d82Slm66018 		status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd,
29303af08d82Slm66018 		    DDI_SLEEP);
29313af08d82Slm66018 		if (status == DDI_FAILURE) {
29323af08d82Slm66018 			PR0("cannot schedule task to recv msg\n");
29333af08d82Slm66018 			vd_need_reset(vd, B_TRUE);
29343af08d82Slm66018 		}
29353af08d82Slm66018 	}
29363af08d82Slm66018 
2937d10e4ef2Snarayan 	if (event & LDC_EVT_RESET) {
29383af08d82Slm66018 		PR0("LDC_EVT_RESET: LDC channel was reset");
29393af08d82Slm66018 
29403af08d82Slm66018 		if (vd->state != VD_STATE_INIT) {
29413af08d82Slm66018 			PR0("scheduling full reset");
29423af08d82Slm66018 			vd_need_reset(vd, B_FALSE);
29433af08d82Slm66018 			status = ddi_taskq_dispatch(vd->startq, vd_recv_msg,
29443af08d82Slm66018 			    vd, DDI_SLEEP);
29453af08d82Slm66018 			if (status == DDI_FAILURE) {
29463af08d82Slm66018 				PR0("cannot schedule task to recv msg\n");
29473af08d82Slm66018 				vd_need_reset(vd, B_TRUE);
29483af08d82Slm66018 			}
29493af08d82Slm66018 
29503af08d82Slm66018 		} else {
29513af08d82Slm66018 			PR0("channel already reset, ignoring...\n");
29523af08d82Slm66018 			PR0("doing ldc up...\n");
29533af08d82Slm66018 			(void) ldc_up(vd->ldc_handle);
29543af08d82Slm66018 		}
29553af08d82Slm66018 
2956d10e4ef2Snarayan 		return (LDC_SUCCESS);
2957d10e4ef2Snarayan 	}
2958d10e4ef2Snarayan 
2959d10e4ef2Snarayan 	if (event & LDC_EVT_UP) {
29603af08d82Slm66018 		PR0("EVT_UP: LDC is up\nResetting client connection state");
29613af08d82Slm66018 		PR0("initiating soft reset");
2962d10e4ef2Snarayan 		vd_need_reset(vd, B_FALSE);
29633af08d82Slm66018 		status = ddi_taskq_dispatch(vd->startq, vd_recv_msg,
29643af08d82Slm66018 		    vd, DDI_SLEEP);
29653af08d82Slm66018 		if (status == DDI_FAILURE) {
29663af08d82Slm66018 			PR0("cannot schedule task to recv msg\n");
29673af08d82Slm66018 			vd_need_reset(vd, B_TRUE);
29683af08d82Slm66018 			return (LDC_SUCCESS);
29693af08d82Slm66018 		}
2970d10e4ef2Snarayan 	}
2971d10e4ef2Snarayan 
2972d10e4ef2Snarayan 	if (event & LDC_EVT_READ) {
2973d10e4ef2Snarayan 		int	status;
2974d10e4ef2Snarayan 
2975d10e4ef2Snarayan 		PR1("New data available");
2976d10e4ef2Snarayan 		/* Queue a task to receive the new data */
2977d10e4ef2Snarayan 		status = ddi_taskq_dispatch(vd->startq, vd_recv_msg, vd,
2978d10e4ef2Snarayan 		    DDI_SLEEP);
29793af08d82Slm66018 
29803af08d82Slm66018 		if (status == DDI_FAILURE) {
29813af08d82Slm66018 			PR0("cannot schedule task to recv msg\n");
29823af08d82Slm66018 			vd_need_reset(vd, B_TRUE);
29833af08d82Slm66018 		}
2984d10e4ef2Snarayan 	}
2985d10e4ef2Snarayan 
2986d10e4ef2Snarayan 	return (LDC_SUCCESS);
29871ae08745Sheppo }
29881ae08745Sheppo 
29891ae08745Sheppo static uint_t
29901ae08745Sheppo vds_check_for_vd(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
29911ae08745Sheppo {
29921ae08745Sheppo 	_NOTE(ARGUNUSED(key, val))
29931ae08745Sheppo 	(*((uint_t *)arg))++;
29941ae08745Sheppo 	return (MH_WALK_TERMINATE);
29951ae08745Sheppo }
29961ae08745Sheppo 
29971ae08745Sheppo 
29981ae08745Sheppo static int
29991ae08745Sheppo vds_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
30001ae08745Sheppo {
30011ae08745Sheppo 	uint_t	vd_present = 0;
30021ae08745Sheppo 	minor_t	instance;
30031ae08745Sheppo 	vds_t	*vds;
30041ae08745Sheppo 
30051ae08745Sheppo 
30061ae08745Sheppo 	switch (cmd) {
30071ae08745Sheppo 	case DDI_DETACH:
30081ae08745Sheppo 		/* the real work happens below */
30091ae08745Sheppo 		break;
30101ae08745Sheppo 	case DDI_SUSPEND:
3011d10e4ef2Snarayan 		PR0("No action required for DDI_SUSPEND");
30121ae08745Sheppo 		return (DDI_SUCCESS);
30131ae08745Sheppo 	default:
30143af08d82Slm66018 		PR0("Unrecognized \"cmd\"");
30151ae08745Sheppo 		return (DDI_FAILURE);
30161ae08745Sheppo 	}
30171ae08745Sheppo 
30181ae08745Sheppo 	ASSERT(cmd == DDI_DETACH);
30191ae08745Sheppo 	instance = ddi_get_instance(dip);
30201ae08745Sheppo 	if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) {
30213af08d82Slm66018 		PR0("Could not get state for instance %u", instance);
30221ae08745Sheppo 		ddi_soft_state_free(vds_state, instance);
30231ae08745Sheppo 		return (DDI_FAILURE);
30241ae08745Sheppo 	}
30251ae08745Sheppo 
30261ae08745Sheppo 	/* Do no detach when serving any vdisks */
30271ae08745Sheppo 	mod_hash_walk(vds->vd_table, vds_check_for_vd, &vd_present);
30281ae08745Sheppo 	if (vd_present) {
30291ae08745Sheppo 		PR0("Not detaching because serving vdisks");
30301ae08745Sheppo 		return (DDI_FAILURE);
30311ae08745Sheppo 	}
30321ae08745Sheppo 
30331ae08745Sheppo 	PR0("Detaching");
3034445b4c2eSsb155480 	if (vds->initialized & VDS_MDEG) {
30351ae08745Sheppo 		(void) mdeg_unregister(vds->mdeg);
3036445b4c2eSsb155480 		kmem_free(vds->ispecp->specp, sizeof (vds_prop_template));
3037445b4c2eSsb155480 		kmem_free(vds->ispecp, sizeof (mdeg_node_spec_t));
3038445b4c2eSsb155480 		vds->ispecp = NULL;
3039445b4c2eSsb155480 		vds->mdeg = NULL;
3040445b4c2eSsb155480 	}
3041445b4c2eSsb155480 
30421ae08745Sheppo 	if (vds->initialized & VDS_LDI)
30431ae08745Sheppo 		(void) ldi_ident_release(vds->ldi_ident);
30441ae08745Sheppo 	mod_hash_destroy_hash(vds->vd_table);
30451ae08745Sheppo 	ddi_soft_state_free(vds_state, instance);
30461ae08745Sheppo 	return (DDI_SUCCESS);
30471ae08745Sheppo }
30481ae08745Sheppo 
30491ae08745Sheppo static boolean_t
30501ae08745Sheppo is_pseudo_device(dev_info_t *dip)
30511ae08745Sheppo {
30521ae08745Sheppo 	dev_info_t	*parent, *root = ddi_root_node();
30531ae08745Sheppo 
30541ae08745Sheppo 
30551ae08745Sheppo 	for (parent = ddi_get_parent(dip); (parent != NULL) && (parent != root);
30561ae08745Sheppo 	    parent = ddi_get_parent(parent)) {
30571ae08745Sheppo 		if (strcmp(ddi_get_name(parent), DEVI_PSEUDO_NEXNAME) == 0)
30581ae08745Sheppo 			return (B_TRUE);
30591ae08745Sheppo 	}
30601ae08745Sheppo 
30611ae08745Sheppo 	return (B_FALSE);
30621ae08745Sheppo }
30631ae08745Sheppo 
30641ae08745Sheppo static int
30650a55fbb7Slm66018 vd_setup_full_disk(vd_t *vd)
30660a55fbb7Slm66018 {
30670a55fbb7Slm66018 	int		rval, status;
30680a55fbb7Slm66018 	major_t		major = getmajor(vd->dev[0]);
30690a55fbb7Slm66018 	minor_t		minor = getminor(vd->dev[0]) - VD_ENTIRE_DISK_SLICE;
30704bac2208Snarayan 	struct dk_minfo	dk_minfo;
30710a55fbb7Slm66018 
30724bac2208Snarayan 	/*
30734bac2208Snarayan 	 * At this point, vdisk_size is set to the size of partition 2 but
30744bac2208Snarayan 	 * this does not represent the size of the disk because partition 2
30754bac2208Snarayan 	 * may not cover the entire disk and its size does not include reserved
30764bac2208Snarayan 	 * blocks. So we update vdisk_size to be the size of the entire disk.
30774bac2208Snarayan 	 */
30784bac2208Snarayan 	if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCGMEDIAINFO,
30794bac2208Snarayan 	    (intptr_t)&dk_minfo, (vd_open_flags | FKIOCTL),
30804bac2208Snarayan 	    kcred, &rval)) != 0) {
3081690555a1Sachartre 		PRN("ldi_ioctl(DKIOCGMEDIAINFO) returned errno %d",
30824bac2208Snarayan 		    status);
30830a55fbb7Slm66018 		return (status);
30840a55fbb7Slm66018 	}
30854bac2208Snarayan 	vd->vdisk_size = dk_minfo.dki_capacity;
30860a55fbb7Slm66018 
30870a55fbb7Slm66018 	/* Set full-disk parameters */
30880a55fbb7Slm66018 	vd->vdisk_type	= VD_DISK_TYPE_DISK;
30890a55fbb7Slm66018 	vd->nslices	= (sizeof (vd->dev))/(sizeof (vd->dev[0]));
30900a55fbb7Slm66018 
30910a55fbb7Slm66018 	/* Move dev number and LDI handle to entire-disk-slice array elements */
30920a55fbb7Slm66018 	vd->dev[VD_ENTIRE_DISK_SLICE]		= vd->dev[0];
30930a55fbb7Slm66018 	vd->dev[0]				= 0;
30940a55fbb7Slm66018 	vd->ldi_handle[VD_ENTIRE_DISK_SLICE]	= vd->ldi_handle[0];
30950a55fbb7Slm66018 	vd->ldi_handle[0]			= NULL;
30960a55fbb7Slm66018 
30970a55fbb7Slm66018 	/* Initialize device numbers for remaining slices and open them */
30980a55fbb7Slm66018 	for (int slice = 0; slice < vd->nslices; slice++) {
30990a55fbb7Slm66018 		/*
31000a55fbb7Slm66018 		 * Skip the entire-disk slice, as it's already open and its
31010a55fbb7Slm66018 		 * device known
31020a55fbb7Slm66018 		 */
31030a55fbb7Slm66018 		if (slice == VD_ENTIRE_DISK_SLICE)
31040a55fbb7Slm66018 			continue;
31050a55fbb7Slm66018 		ASSERT(vd->dev[slice] == 0);
31060a55fbb7Slm66018 		ASSERT(vd->ldi_handle[slice] == NULL);
31070a55fbb7Slm66018 
31080a55fbb7Slm66018 		/*
31090a55fbb7Slm66018 		 * Construct the device number for the current slice
31100a55fbb7Slm66018 		 */
31110a55fbb7Slm66018 		vd->dev[slice] = makedevice(major, (minor + slice));
31120a55fbb7Slm66018 
31130a55fbb7Slm66018 		/*
311434683adeSsg70180 		 * Open all slices of the disk to serve them to the client.
311534683adeSsg70180 		 * Slices are opened exclusively to prevent other threads or
311634683adeSsg70180 		 * processes in the service domain from performing I/O to
311734683adeSsg70180 		 * slices being accessed by a client.  Failure to open a slice
311834683adeSsg70180 		 * results in vds not serving this disk, as the client could
311934683adeSsg70180 		 * attempt (and should be able) to access any slice immediately.
312034683adeSsg70180 		 * Any slices successfully opened before a failure will get
312134683adeSsg70180 		 * closed by vds_destroy_vd() as a result of the error returned
312234683adeSsg70180 		 * by this function.
312334683adeSsg70180 		 *
312434683adeSsg70180 		 * We need to do the open with FNDELAY so that opening an empty
312534683adeSsg70180 		 * slice does not fail.
31260a55fbb7Slm66018 		 */
31270a55fbb7Slm66018 		PR0("Opening device major %u, minor %u = slice %u",
31280a55fbb7Slm66018 		    major, minor, slice);
31290a55fbb7Slm66018 		if ((status = ldi_open_by_dev(&vd->dev[slice], OTYP_BLK,
313034683adeSsg70180 		    vd_open_flags | FNDELAY, kcred, &vd->ldi_handle[slice],
31310a55fbb7Slm66018 		    vd->vds->ldi_ident)) != 0) {
3132690555a1Sachartre 			PRN("ldi_open_by_dev() returned errno %d "
31330a55fbb7Slm66018 			    "for slice %u", status, slice);
31340a55fbb7Slm66018 			/* vds_destroy_vd() will close any open slices */
3135690555a1Sachartre 			vd->ldi_handle[slice] = NULL;
31360a55fbb7Slm66018 			return (status);
31370a55fbb7Slm66018 		}
31380a55fbb7Slm66018 	}
31390a55fbb7Slm66018 
31400a55fbb7Slm66018 	return (0);
31410a55fbb7Slm66018 }
31420a55fbb7Slm66018 
31430a55fbb7Slm66018 static int
31444bac2208Snarayan vd_setup_partition_efi(vd_t *vd)
31454bac2208Snarayan {
31464bac2208Snarayan 	efi_gpt_t *gpt;
31474bac2208Snarayan 	efi_gpe_t *gpe;
31484bac2208Snarayan 	struct uuid uuid = EFI_RESERVED;
31494bac2208Snarayan 	uint32_t crc;
31504bac2208Snarayan 	int length;
31514bac2208Snarayan 
31524bac2208Snarayan 	length = sizeof (efi_gpt_t) + sizeof (efi_gpe_t);
31534bac2208Snarayan 
31544bac2208Snarayan 	gpt = kmem_zalloc(length, KM_SLEEP);
31554bac2208Snarayan 	gpe = (efi_gpe_t *)(gpt + 1);
31564bac2208Snarayan 
31574bac2208Snarayan 	gpt->efi_gpt_Signature = LE_64(EFI_SIGNATURE);
31584bac2208Snarayan 	gpt->efi_gpt_Revision = LE_32(EFI_VERSION_CURRENT);
31594bac2208Snarayan 	gpt->efi_gpt_HeaderSize = LE_32(sizeof (efi_gpt_t));
31604bac2208Snarayan 	gpt->efi_gpt_FirstUsableLBA = LE_64(0ULL);
31614bac2208Snarayan 	gpt->efi_gpt_LastUsableLBA = LE_64(vd->vdisk_size - 1);
31624bac2208Snarayan 	gpt->efi_gpt_NumberOfPartitionEntries = LE_32(1);
31634bac2208Snarayan 	gpt->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (efi_gpe_t));
31644bac2208Snarayan 
31654bac2208Snarayan 	UUID_LE_CONVERT(gpe->efi_gpe_PartitionTypeGUID, uuid);
31664bac2208Snarayan 	gpe->efi_gpe_StartingLBA = gpt->efi_gpt_FirstUsableLBA;
31674bac2208Snarayan 	gpe->efi_gpe_EndingLBA = gpt->efi_gpt_LastUsableLBA;
31684bac2208Snarayan 
31694bac2208Snarayan 	CRC32(crc, gpe, sizeof (efi_gpe_t), -1U, crc32_table);
31704bac2208Snarayan 	gpt->efi_gpt_PartitionEntryArrayCRC32 = LE_32(~crc);
31714bac2208Snarayan 
31724bac2208Snarayan 	CRC32(crc, gpt, sizeof (efi_gpt_t), -1U, crc32_table);
31734bac2208Snarayan 	gpt->efi_gpt_HeaderCRC32 = LE_32(~crc);
31744bac2208Snarayan 
31754bac2208Snarayan 	vd->dk_efi.dki_lba = 0;
31764bac2208Snarayan 	vd->dk_efi.dki_length = length;
31774bac2208Snarayan 	vd->dk_efi.dki_data = gpt;
31784bac2208Snarayan 
31794bac2208Snarayan 	return (0);
31804bac2208Snarayan }
31814bac2208Snarayan 
31824bac2208Snarayan static int
31833c96341aSnarayan vd_setup_file(vd_t *vd)
31843c96341aSnarayan {
31853c96341aSnarayan 	int 		i, rval, status;
3186690555a1Sachartre 	ushort_t	sum;
31873c96341aSnarayan 	vattr_t		vattr;
31883c96341aSnarayan 	dev_t		dev;
3189*87a7269eSachartre 	size_t		size;
31903c96341aSnarayan 	char		*file_path = vd->device_path;
31913c96341aSnarayan 	char		dev_path[MAXPATHLEN + 1];
3192*87a7269eSachartre 	char		prefix;
31933c96341aSnarayan 	ldi_handle_t	lhandle;
31943c96341aSnarayan 	struct dk_cinfo	dk_cinfo;
3195690555a1Sachartre 	struct dk_label label;
31963c96341aSnarayan 
31973c96341aSnarayan 	/* make sure the file is valid */
31983c96341aSnarayan 	if ((status = lookupname(file_path, UIO_SYSSPACE, FOLLOW,
31993c96341aSnarayan 	    NULLVPP, &vd->file_vnode)) != 0) {
3200690555a1Sachartre 		PRN("Cannot lookup file(%s) errno %d", file_path, status);
32013c96341aSnarayan 		return (status);
32023c96341aSnarayan 	}
32033c96341aSnarayan 
32043c96341aSnarayan 	if (vd->file_vnode->v_type != VREG) {
3205690555a1Sachartre 		PRN("Invalid file type (%s)\n", file_path);
32063c96341aSnarayan 		VN_RELE(vd->file_vnode);
32073c96341aSnarayan 		return (EBADF);
32083c96341aSnarayan 	}
32093c96341aSnarayan 	VN_RELE(vd->file_vnode);
32103c96341aSnarayan 
32113c96341aSnarayan 	if ((status = vn_open(file_path, UIO_SYSSPACE, vd_open_flags | FOFFMAX,
32123c96341aSnarayan 	    0, &vd->file_vnode, 0, 0)) != 0) {
3213690555a1Sachartre 		PRN("vn_open(%s) = errno %d", file_path, status);
32143c96341aSnarayan 		return (status);
32153c96341aSnarayan 	}
32163c96341aSnarayan 
3217690555a1Sachartre 	/*
3218690555a1Sachartre 	 * We set vd->file now so that vds_destroy_vd will take care of
3219690555a1Sachartre 	 * closing the file and releasing the vnode in case of an error.
3220690555a1Sachartre 	 */
3221690555a1Sachartre 	vd->file = B_TRUE;
3222690555a1Sachartre 	vd->pseudo = B_FALSE;
3223690555a1Sachartre 
32243c96341aSnarayan 	vattr.va_mask = AT_SIZE;
32253c96341aSnarayan 	if ((status = VOP_GETATTR(vd->file_vnode, &vattr, 0, kcred)) != 0) {
3226690555a1Sachartre 		PRN("VOP_GETATTR(%s) = errno %d", file_path, status);
32273c96341aSnarayan 		return (EIO);
32283c96341aSnarayan 	}
32293c96341aSnarayan 
32303c96341aSnarayan 	vd->file_size = vattr.va_size;
32313c96341aSnarayan 	/* size should be at least sizeof(dk_label) */
32323c96341aSnarayan 	if (vd->file_size < sizeof (struct dk_label)) {
32333c96341aSnarayan 		PRN("Size of file has to be at least %ld bytes",
32343c96341aSnarayan 		    sizeof (struct dk_label));
32353c96341aSnarayan 		return (EIO);
32363c96341aSnarayan 	}
32373c96341aSnarayan 
3238690555a1Sachartre 	if (vd->file_vnode->v_flag & VNOMAP) {
3239690555a1Sachartre 		PRN("File %s cannot be mapped", file_path);
32403c96341aSnarayan 		return (EIO);
32413c96341aSnarayan 	}
32423c96341aSnarayan 
3243690555a1Sachartre 	/* read label from file */
3244690555a1Sachartre 	if (VD_FILE_LABEL_READ(vd, &label) < 0) {
3245690555a1Sachartre 		PRN("Can't read label from %s", file_path);
3246690555a1Sachartre 		return (EIO);
3247690555a1Sachartre 	}
32483c96341aSnarayan 
32493c96341aSnarayan 	/* label checksum */
3250690555a1Sachartre 	sum = vd_lbl2cksum(&label);
32513c96341aSnarayan 
3252690555a1Sachartre 	if (label.dkl_magic != DKL_MAGIC || label.dkl_cksum != sum) {
32533c96341aSnarayan 		PR0("%s has an invalid disk label "
32543c96341aSnarayan 		    "(magic=%x cksum=%x (expect %x))",
3255690555a1Sachartre 		    file_path, label.dkl_magic, label.dkl_cksum, sum);
32563c96341aSnarayan 
32573c96341aSnarayan 		/* default label */
3258690555a1Sachartre 		bzero(&label, sizeof (struct dk_label));
32593c96341aSnarayan 
32603c96341aSnarayan 		/*
32613c96341aSnarayan 		 * We must have a resonable number of cylinders and sectors so
32623c96341aSnarayan 		 * that newfs can run using default values.
32633c96341aSnarayan 		 *
32643c96341aSnarayan 		 * if (disk_size < 2MB)
32653c96341aSnarayan 		 * 	phys_cylinders = disk_size / 100K
32663c96341aSnarayan 		 * else
32673c96341aSnarayan 		 * 	phys_cylinders = disk_size / 300K
32683c96341aSnarayan 		 *
32693c96341aSnarayan 		 * phys_cylinders = (phys_cylinders == 0) ? 1 : phys_cylinders
32703c96341aSnarayan 		 * alt_cylinders = (phys_cylinders > 2) ? 2 : 0;
32713c96341aSnarayan 		 * data_cylinders = phys_cylinders - alt_cylinders
32723c96341aSnarayan 		 *
32733c96341aSnarayan 		 * sectors = disk_size / (phys_cylinders * blk_size)
32743c96341aSnarayan 		 */
32753c96341aSnarayan 		if (vd->file_size < (2 * 1024 * 1024))
3276690555a1Sachartre 			label.dkl_pcyl = vd->file_size / (100 * 1024);
32773c96341aSnarayan 		else
3278690555a1Sachartre 			label.dkl_pcyl = vd->file_size / (300 * 1024);
32793c96341aSnarayan 
3280690555a1Sachartre 		if (label.dkl_pcyl == 0)
3281690555a1Sachartre 			label.dkl_pcyl = 1;
32823c96341aSnarayan 
3283690555a1Sachartre 		if (label.dkl_pcyl > 2)
3284690555a1Sachartre 			label.dkl_acyl = 2;
32853c96341aSnarayan 		else
3286690555a1Sachartre 			label.dkl_acyl = 0;
32873c96341aSnarayan 
3288690555a1Sachartre 		label.dkl_nsect = vd->file_size /
3289690555a1Sachartre 		    (DEV_BSIZE * label.dkl_pcyl);
3290690555a1Sachartre 		label.dkl_ncyl = label.dkl_pcyl - label.dkl_acyl;
3291690555a1Sachartre 		label.dkl_nhead = 1;
3292690555a1Sachartre 		label.dkl_write_reinstruct = 0;
3293690555a1Sachartre 		label.dkl_read_reinstruct = 0;
3294690555a1Sachartre 		label.dkl_rpm = 7200;
3295690555a1Sachartre 		label.dkl_apc = 0;
3296690555a1Sachartre 		label.dkl_intrlv = 0;
3297690555a1Sachartre 		label.dkl_magic = DKL_MAGIC;
32983c96341aSnarayan 
32993c96341aSnarayan 		PR0("requested disk size: %ld bytes\n", vd->file_size);
3300690555a1Sachartre 		PR0("setup: ncyl=%d nhead=%d nsec=%d\n", label.dkl_pcyl,
3301690555a1Sachartre 		    label.dkl_nhead, label.dkl_nsect);
33023c96341aSnarayan 		PR0("provided disk size: %ld bytes\n", (uint64_t)
3303690555a1Sachartre 		    (label.dkl_pcyl *
3304690555a1Sachartre 		    label.dkl_nhead * label.dkl_nsect * DEV_BSIZE));
33053c96341aSnarayan 
3306*87a7269eSachartre 		if (vd->file_size < (1ULL << 20)) {
3307*87a7269eSachartre 			size = vd->file_size >> 10;
3308*87a7269eSachartre 			prefix = 'K'; /* Kilobyte */
3309*87a7269eSachartre 		} else if (vd->file_size < (1ULL << 30)) {
3310*87a7269eSachartre 			size = vd->file_size >> 20;
3311*87a7269eSachartre 			prefix = 'M'; /* Megabyte */
3312*87a7269eSachartre 		} else if (vd->file_size < (1ULL << 40)) {
3313*87a7269eSachartre 			size = vd->file_size >> 30;
3314*87a7269eSachartre 			prefix = 'G'; /* Gigabyte */
3315*87a7269eSachartre 		} else {
3316*87a7269eSachartre 			size = vd->file_size >> 40;
3317*87a7269eSachartre 			prefix = 'T'; /* Terabyte */
3318*87a7269eSachartre 		}
3319*87a7269eSachartre 
33203c96341aSnarayan 		/*
33213c96341aSnarayan 		 * We must have a correct label name otherwise format(1m) will
33223c96341aSnarayan 		 * not recognized the disk as labeled.
33233c96341aSnarayan 		 */
3324690555a1Sachartre 		(void) snprintf(label.dkl_asciilabel, LEN_DKL_ASCII,
3325*87a7269eSachartre 		    "SUN-DiskImage-%ld%cB cyl %d alt %d hd %d sec %d",
3326*87a7269eSachartre 		    size, prefix,
3327690555a1Sachartre 		    label.dkl_ncyl, label.dkl_acyl, label.dkl_nhead,
3328690555a1Sachartre 		    label.dkl_nsect);
33293c96341aSnarayan 
33303c96341aSnarayan 		/* default VTOC */
3331690555a1Sachartre 		label.dkl_vtoc.v_version = V_VERSION;
3332690555a1Sachartre 		label.dkl_vtoc.v_nparts = V_NUMPAR;
3333690555a1Sachartre 		label.dkl_vtoc.v_sanity = VTOC_SANE;
3334690555a1Sachartre 		label.dkl_vtoc.v_part[2].p_tag = V_BACKUP;
3335690555a1Sachartre 		label.dkl_map[2].dkl_cylno = 0;
3336690555a1Sachartre 		label.dkl_map[2].dkl_nblk = label.dkl_ncyl *
3337690555a1Sachartre 		    label.dkl_nhead * label.dkl_nsect;
3338690555a1Sachartre 		label.dkl_map[0] = label.dkl_map[2];
3339690555a1Sachartre 		label.dkl_map[0] = label.dkl_map[2];
3340690555a1Sachartre 		label.dkl_cksum = vd_lbl2cksum(&label);
3341690555a1Sachartre 
3342690555a1Sachartre 		/* write default label to file */
3343*87a7269eSachartre 		if ((rval = vd_file_set_vtoc(vd, &label)) != 0) {
3344690555a1Sachartre 			PRN("Can't write label to %s", file_path);
3345*87a7269eSachartre 			return (rval);
3346690555a1Sachartre 		}
33473c96341aSnarayan 	}
33483c96341aSnarayan 
3349690555a1Sachartre 	vd->nslices = label.dkl_vtoc.v_nparts;
33503c96341aSnarayan 
33513c96341aSnarayan 	/* sector size = block size = DEV_BSIZE */
3352*87a7269eSachartre 	vd->vdisk_size = vd->file_size / DEV_BSIZE;
33533c96341aSnarayan 	vd->vdisk_type = VD_DISK_TYPE_DISK;
33543c96341aSnarayan 	vd->vdisk_label = VD_DISK_LABEL_VTOC;
33553c96341aSnarayan 	vd->max_xfer_sz = maxphys / DEV_BSIZE; /* default transfer size */
33563c96341aSnarayan 
33573c96341aSnarayan 	/* Get max_xfer_sz from the device where the file is */
33583c96341aSnarayan 	dev = vd->file_vnode->v_vfsp->vfs_dev;
33593c96341aSnarayan 	dev_path[0] = NULL;
33603c96341aSnarayan 	if (ddi_dev_pathname(dev, S_IFBLK, dev_path) == DDI_SUCCESS) {
33613c96341aSnarayan 		PR0("underlying device = %s\n", dev_path);
33623c96341aSnarayan 	}
33633c96341aSnarayan 
33643c96341aSnarayan 	if ((status = ldi_open_by_dev(&dev, OTYP_BLK, FREAD,
33653c96341aSnarayan 	    kcred, &lhandle, vd->vds->ldi_ident)) != 0) {
33663c96341aSnarayan 		PR0("ldi_open_by_dev() returned errno %d for device %s",
33673c96341aSnarayan 		    status, dev_path);
33683c96341aSnarayan 	} else {
33693c96341aSnarayan 		if ((status = ldi_ioctl(lhandle, DKIOCINFO,
33703c96341aSnarayan 		    (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred,
33713c96341aSnarayan 		    &rval)) != 0) {
33723c96341aSnarayan 			PR0("ldi_ioctl(DKIOCINFO) returned errno %d for %s",
33733c96341aSnarayan 			    status, dev_path);
33743c96341aSnarayan 		} else {
33753c96341aSnarayan 			/*
33763c96341aSnarayan 			 * Store the device's max transfer size for
33773c96341aSnarayan 			 * return to the client
33783c96341aSnarayan 			 */
33793c96341aSnarayan 			vd->max_xfer_sz = dk_cinfo.dki_maxtransfer;
33803c96341aSnarayan 		}
33813c96341aSnarayan 
33823c96341aSnarayan 		PR0("close the device %s", dev_path);
33833c96341aSnarayan 		(void) ldi_close(lhandle, FREAD, kcred);
33843c96341aSnarayan 	}
33853c96341aSnarayan 
33863c96341aSnarayan 	PR0("using for file %s, dev %s, max_xfer = %u blks",
33873c96341aSnarayan 	    file_path, dev_path, vd->max_xfer_sz);
33883c96341aSnarayan 
3389690555a1Sachartre 	vd->dk_geom.dkg_ncyl = label.dkl_ncyl;
3390690555a1Sachartre 	vd->dk_geom.dkg_acyl = label.dkl_acyl;
3391690555a1Sachartre 	vd->dk_geom.dkg_pcyl = label.dkl_pcyl;
3392690555a1Sachartre 	vd->dk_geom.dkg_nhead = label.dkl_nhead;
3393690555a1Sachartre 	vd->dk_geom.dkg_nsect = label.dkl_nsect;
3394690555a1Sachartre 	vd->dk_geom.dkg_intrlv = label.dkl_intrlv;
3395690555a1Sachartre 	vd->dk_geom.dkg_apc = label.dkl_apc;
3396690555a1Sachartre 	vd->dk_geom.dkg_rpm = label.dkl_rpm;
3397690555a1Sachartre 	vd->dk_geom.dkg_write_reinstruct = label.dkl_write_reinstruct;
3398690555a1Sachartre 	vd->dk_geom.dkg_read_reinstruct = label.dkl_read_reinstruct;
33993c96341aSnarayan 
3400690555a1Sachartre 	vd->vtoc.v_sanity = label.dkl_vtoc.v_sanity;
3401690555a1Sachartre 	vd->vtoc.v_version = label.dkl_vtoc.v_version;
34023c96341aSnarayan 	vd->vtoc.v_sectorsz = DEV_BSIZE;
3403690555a1Sachartre 	vd->vtoc.v_nparts = label.dkl_vtoc.v_nparts;
34043c96341aSnarayan 
3405690555a1Sachartre 	bcopy(label.dkl_vtoc.v_volume, vd->vtoc.v_volume,
34063c96341aSnarayan 	    LEN_DKL_VVOL);
3407690555a1Sachartre 	bcopy(label.dkl_asciilabel, vd->vtoc.v_asciilabel,
34083c96341aSnarayan 	    LEN_DKL_ASCII);
34093c96341aSnarayan 
34103c96341aSnarayan 	for (i = 0; i < vd->nslices; i++) {
3411690555a1Sachartre 		vd->vtoc.timestamp[i] = label.dkl_vtoc.v_timestamp[i];
3412690555a1Sachartre 		vd->vtoc.v_part[i].p_tag = label.dkl_vtoc.v_part[i].p_tag;
3413690555a1Sachartre 		vd->vtoc.v_part[i].p_flag = label.dkl_vtoc.v_part[i].p_flag;
3414690555a1Sachartre 		vd->vtoc.v_part[i].p_start = label.dkl_map[i].dkl_cylno *
3415690555a1Sachartre 		    label.dkl_nhead * label.dkl_nsect;
3416690555a1Sachartre 		vd->vtoc.v_part[i].p_size = label.dkl_map[i].dkl_nblk;
34173c96341aSnarayan 		vd->ldi_handle[i] = NULL;
34183c96341aSnarayan 		vd->dev[i] = NULL;
34193c96341aSnarayan 	}
34203c96341aSnarayan 
3421*87a7269eSachartre 	/* Setup devid for the disk image */
3422*87a7269eSachartre 
3423*87a7269eSachartre 	status = vd_file_read_devid(vd, &vd->file_devid);
3424*87a7269eSachartre 
3425*87a7269eSachartre 	if (status == 0) {
3426*87a7269eSachartre 		/* a valid devid was found */
3427*87a7269eSachartre 		return (0);
3428*87a7269eSachartre 	}
3429*87a7269eSachartre 
3430*87a7269eSachartre 	if (status != EINVAL) {
3431*87a7269eSachartre 		/*
3432*87a7269eSachartre 		 * There was an error while trying to read the devid. So this
3433*87a7269eSachartre 		 * disk image may have a devid but we are unable to read it.
3434*87a7269eSachartre 		 */
3435*87a7269eSachartre 		PR0("can not read devid for %s", file_path);
3436*87a7269eSachartre 		vd->file_devid = NULL;
3437*87a7269eSachartre 		return (0);
3438*87a7269eSachartre 	}
3439*87a7269eSachartre 
3440*87a7269eSachartre 	/*
3441*87a7269eSachartre 	 * No valid device id was found so we create one. Note that a failure
3442*87a7269eSachartre 	 * to create a device id is not fatal and does not prevent the disk
3443*87a7269eSachartre 	 * image from being attached.
3444*87a7269eSachartre 	 */
3445*87a7269eSachartre 	PR1("creating devid for %s", file_path);
3446*87a7269eSachartre 
3447*87a7269eSachartre 	if (ddi_devid_init(vd->vds->dip, DEVID_FAB, NULL, 0,
3448*87a7269eSachartre 	    &vd->file_devid) != DDI_SUCCESS) {
3449*87a7269eSachartre 		PR0("fail to create devid for %s", file_path);
3450*87a7269eSachartre 		vd->file_devid = NULL;
3451*87a7269eSachartre 		return (0);
3452*87a7269eSachartre 	}
3453*87a7269eSachartre 
3454*87a7269eSachartre 	/* write devid to the disk image */
3455*87a7269eSachartre 	if (vd_file_write_devid(vd, vd->file_devid) != 0) {
3456*87a7269eSachartre 		PR0("fail to write devid for %s", file_path);
3457*87a7269eSachartre 		ddi_devid_free(vd->file_devid);
3458*87a7269eSachartre 		vd->file_devid = NULL;
3459*87a7269eSachartre 	}
3460*87a7269eSachartre 
34613c96341aSnarayan 	return (0);
34623c96341aSnarayan }
34633c96341aSnarayan 
34643c96341aSnarayan static int
34653c96341aSnarayan vd_setup_vd(vd_t *vd)
34661ae08745Sheppo {
3467e1ebb9ecSlm66018 	int		rval, status;
34681ae08745Sheppo 	dev_info_t	*dip;
34691ae08745Sheppo 	struct dk_cinfo	dk_cinfo;
34703c96341aSnarayan 	char		*device_path = vd->device_path;
34711ae08745Sheppo 
34724bac2208Snarayan 	/*
34734bac2208Snarayan 	 * We need to open with FNDELAY so that opening an empty partition
34744bac2208Snarayan 	 * does not fail.
34754bac2208Snarayan 	 */
34764bac2208Snarayan 	if ((status = ldi_open_by_name(device_path, vd_open_flags | FNDELAY,
34774bac2208Snarayan 	    kcred, &vd->ldi_handle[0], vd->vds->ldi_ident)) != 0) {
34783c96341aSnarayan 		PR0("ldi_open_by_name(%s) = errno %d", device_path, status);
3479690555a1Sachartre 		vd->ldi_handle[0] = NULL;
34803c96341aSnarayan 
34813c96341aSnarayan 		/* this may not be a device try opening as a file */
34823c96341aSnarayan 		if (status == ENXIO || status == ENODEV)
34833c96341aSnarayan 			status = vd_setup_file(vd);
34843c96341aSnarayan 		if (status) {
3485690555a1Sachartre 			PRN("Cannot use device/file (%s), errno=%d\n",
34863c96341aSnarayan 			    device_path, status);
34873c96341aSnarayan 			if (status == ENXIO || status == ENODEV ||
34883c96341aSnarayan 			    status == ENOENT) {
34893c96341aSnarayan 				return (EAGAIN);
34903c96341aSnarayan 			}
34913c96341aSnarayan 		}
34920a55fbb7Slm66018 		return (status);
34930a55fbb7Slm66018 	}
34940a55fbb7Slm66018 
34954bac2208Snarayan 	/*
34964bac2208Snarayan 	 * nslices must be updated now so that vds_destroy_vd() will close
34974bac2208Snarayan 	 * the slice we have just opened in case of an error.
34984bac2208Snarayan 	 */
34994bac2208Snarayan 	vd->nslices = 1;
35003c96341aSnarayan 	vd->file = B_FALSE;
35014bac2208Snarayan 
3502e1ebb9ecSlm66018 	/* Get device number and size of backing device */
35030a55fbb7Slm66018 	if ((status = ldi_get_dev(vd->ldi_handle[0], &vd->dev[0])) != 0) {
35041ae08745Sheppo 		PRN("ldi_get_dev() returned errno %d for %s",
3505e1ebb9ecSlm66018 		    status, device_path);
35061ae08745Sheppo 		return (status);
35071ae08745Sheppo 	}
35080a55fbb7Slm66018 	if (ldi_get_size(vd->ldi_handle[0], &vd->vdisk_size) != DDI_SUCCESS) {
3509e1ebb9ecSlm66018 		PRN("ldi_get_size() failed for %s", device_path);
35101ae08745Sheppo 		return (EIO);
35111ae08745Sheppo 	}
3512e1ebb9ecSlm66018 	vd->vdisk_size = lbtodb(vd->vdisk_size);	/* convert to blocks */
35131ae08745Sheppo 
3514e1ebb9ecSlm66018 	/* Verify backing device supports dk_cinfo, dk_geom, and vtoc */
3515e1ebb9ecSlm66018 	if ((status = ldi_ioctl(vd->ldi_handle[0], DKIOCINFO,
3516e1ebb9ecSlm66018 	    (intptr_t)&dk_cinfo, (vd_open_flags | FKIOCTL), kcred,
3517e1ebb9ecSlm66018 	    &rval)) != 0) {
3518e1ebb9ecSlm66018 		PRN("ldi_ioctl(DKIOCINFO) returned errno %d for %s",
3519e1ebb9ecSlm66018 		    status, device_path);
3520e1ebb9ecSlm66018 		return (status);
3521e1ebb9ecSlm66018 	}
3522e1ebb9ecSlm66018 	if (dk_cinfo.dki_partition >= V_NUMPAR) {
3523e1ebb9ecSlm66018 		PRN("slice %u >= maximum slice %u for %s",
3524e1ebb9ecSlm66018 		    dk_cinfo.dki_partition, V_NUMPAR, device_path);
3525e1ebb9ecSlm66018 		return (EIO);
3526e1ebb9ecSlm66018 	}
35274bac2208Snarayan 
35284bac2208Snarayan 	status = vd_read_vtoc(vd->ldi_handle[0], &vd->vtoc, &vd->vdisk_label);
35294bac2208Snarayan 
35304bac2208Snarayan 	if (status != 0) {
35314bac2208Snarayan 		PRN("vd_read_vtoc returned errno %d for %s",
3532e1ebb9ecSlm66018 		    status, device_path);
3533e1ebb9ecSlm66018 		return (status);
3534e1ebb9ecSlm66018 	}
35354bac2208Snarayan 
35364bac2208Snarayan 	if (vd->vdisk_label == VD_DISK_LABEL_VTOC &&
35374bac2208Snarayan 	    (status = ldi_ioctl(vd->ldi_handle[0], DKIOCGGEOM,
35384bac2208Snarayan 	    (intptr_t)&vd->dk_geom, (vd_open_flags | FKIOCTL),
35394bac2208Snarayan 	    kcred, &rval)) != 0) {
35404bac2208Snarayan 		PRN("ldi_ioctl(DKIOCGEOM) returned errno %d for %s",
3541e1ebb9ecSlm66018 		    status, device_path);
3542e1ebb9ecSlm66018 		return (status);
3543e1ebb9ecSlm66018 	}
3544e1ebb9ecSlm66018 
3545e1ebb9ecSlm66018 	/* Store the device's max transfer size for return to the client */
3546e1ebb9ecSlm66018 	vd->max_xfer_sz = dk_cinfo.dki_maxtransfer;
3547e1ebb9ecSlm66018 
3548e1ebb9ecSlm66018 	/* Determine if backing device is a pseudo device */
35491ae08745Sheppo 	if ((dip = ddi_hold_devi_by_instance(getmajor(vd->dev[0]),
35501ae08745Sheppo 	    dev_to_instance(vd->dev[0]), 0))  == NULL) {
3551e1ebb9ecSlm66018 		PRN("%s is no longer accessible", device_path);
35521ae08745Sheppo 		return (EIO);
35531ae08745Sheppo 	}
35541ae08745Sheppo 	vd->pseudo = is_pseudo_device(dip);
35551ae08745Sheppo 	ddi_release_devi(dip);
35561ae08745Sheppo 	if (vd->pseudo) {
35571ae08745Sheppo 		vd->vdisk_type	= VD_DISK_TYPE_SLICE;
35581ae08745Sheppo 		vd->nslices	= 1;
35591ae08745Sheppo 		return (0);	/* ...and we're done */
35601ae08745Sheppo 	}
35611ae08745Sheppo 
35620a55fbb7Slm66018 	/* If slice is entire-disk slice, initialize for full disk */
35630a55fbb7Slm66018 	if (dk_cinfo.dki_partition == VD_ENTIRE_DISK_SLICE)
35640a55fbb7Slm66018 		return (vd_setup_full_disk(vd));
35651ae08745Sheppo 
35660a55fbb7Slm66018 
3567e1ebb9ecSlm66018 	/* Otherwise, we have a non-entire slice of a device */
35681ae08745Sheppo 	vd->vdisk_type	= VD_DISK_TYPE_SLICE;
35691ae08745Sheppo 	vd->nslices	= 1;
35701ae08745Sheppo 
35714bac2208Snarayan 	if (vd->vdisk_label == VD_DISK_LABEL_EFI) {
35724bac2208Snarayan 		status = vd_setup_partition_efi(vd);
35734bac2208Snarayan 		return (status);
35744bac2208Snarayan 	}
35751ae08745Sheppo 
3576e1ebb9ecSlm66018 	/* Initialize dk_geom structure for single-slice device */
35771ae08745Sheppo 	if (vd->dk_geom.dkg_nsect == 0) {
3578690555a1Sachartre 		PRN("%s geometry claims 0 sectors per track", device_path);
35791ae08745Sheppo 		return (EIO);
35801ae08745Sheppo 	}
35811ae08745Sheppo 	if (vd->dk_geom.dkg_nhead == 0) {
3582690555a1Sachartre 		PRN("%s geometry claims 0 heads", device_path);
35831ae08745Sheppo 		return (EIO);
35841ae08745Sheppo 	}
35851ae08745Sheppo 	vd->dk_geom.dkg_ncyl =
3586e1ebb9ecSlm66018 	    vd->vdisk_size/vd->dk_geom.dkg_nsect/vd->dk_geom.dkg_nhead;
35871ae08745Sheppo 	vd->dk_geom.dkg_acyl = 0;
35881ae08745Sheppo 	vd->dk_geom.dkg_pcyl = vd->dk_geom.dkg_ncyl + vd->dk_geom.dkg_acyl;
35891ae08745Sheppo 
35901ae08745Sheppo 
3591e1ebb9ecSlm66018 	/* Initialize vtoc structure for single-slice device */
35921ae08745Sheppo 	bcopy(VD_VOLUME_NAME, vd->vtoc.v_volume,
35931ae08745Sheppo 	    MIN(sizeof (VD_VOLUME_NAME), sizeof (vd->vtoc.v_volume)));
35941ae08745Sheppo 	bzero(vd->vtoc.v_part, sizeof (vd->vtoc.v_part));
35951ae08745Sheppo 	vd->vtoc.v_nparts = 1;
35961ae08745Sheppo 	vd->vtoc.v_part[0].p_tag = V_UNASSIGNED;
35971ae08745Sheppo 	vd->vtoc.v_part[0].p_flag = 0;
35981ae08745Sheppo 	vd->vtoc.v_part[0].p_start = 0;
3599e1ebb9ecSlm66018 	vd->vtoc.v_part[0].p_size = vd->vdisk_size;
36001ae08745Sheppo 	bcopy(VD_ASCIILABEL, vd->vtoc.v_asciilabel,
36011ae08745Sheppo 	    MIN(sizeof (VD_ASCIILABEL), sizeof (vd->vtoc.v_asciilabel)));
36021ae08745Sheppo 
36031ae08745Sheppo 
36041ae08745Sheppo 	return (0);
36051ae08745Sheppo }
36061ae08745Sheppo 
36071ae08745Sheppo static int
3608e1ebb9ecSlm66018 vds_do_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id,
36091ae08745Sheppo     vd_t **vdp)
36101ae08745Sheppo {
36111ae08745Sheppo 	char			tq_name[TASKQ_NAMELEN];
36120a55fbb7Slm66018 	int			status;
36131ae08745Sheppo 	ddi_iblock_cookie_t	iblock = NULL;
36141ae08745Sheppo 	ldc_attr_t		ldc_attr;
36151ae08745Sheppo 	vd_t			*vd;
36161ae08745Sheppo 
36171ae08745Sheppo 
36181ae08745Sheppo 	ASSERT(vds != NULL);
3619e1ebb9ecSlm66018 	ASSERT(device_path != NULL);
36201ae08745Sheppo 	ASSERT(vdp != NULL);
3621e1ebb9ecSlm66018 	PR0("Adding vdisk for %s", device_path);
36221ae08745Sheppo 
36231ae08745Sheppo 	if ((vd = kmem_zalloc(sizeof (*vd), KM_NOSLEEP)) == NULL) {
36241ae08745Sheppo 		PRN("No memory for virtual disk");
36251ae08745Sheppo 		return (EAGAIN);
36261ae08745Sheppo 	}
36271ae08745Sheppo 	*vdp = vd;	/* assign here so vds_destroy_vd() can cleanup later */
36281ae08745Sheppo 	vd->vds = vds;
36293c96341aSnarayan 	(void) strncpy(vd->device_path, device_path, MAXPATHLEN);
36301ae08745Sheppo 
36310a55fbb7Slm66018 	/* Open vdisk and initialize parameters */
36323c96341aSnarayan 	if ((status = vd_setup_vd(vd)) == 0) {
36333c96341aSnarayan 		vd->initialized |= VD_DISK_READY;
36341ae08745Sheppo 
36353c96341aSnarayan 		ASSERT(vd->nslices > 0 && vd->nslices <= V_NUMPAR);
36363c96341aSnarayan 		PR0("vdisk_type = %s, pseudo = %s, file = %s, nslices = %u",
36373c96341aSnarayan 		    ((vd->vdisk_type == VD_DISK_TYPE_DISK) ? "disk" : "slice"),
36383c96341aSnarayan 		    (vd->pseudo ? "yes" : "no"), (vd->file ? "yes" : "no"),
36393c96341aSnarayan 		    vd->nslices);
36403c96341aSnarayan 	} else {
36413c96341aSnarayan 		if (status != EAGAIN)
36423c96341aSnarayan 			return (status);
36433c96341aSnarayan 	}
36441ae08745Sheppo 
36451ae08745Sheppo 	/* Initialize locking */
36461ae08745Sheppo 	if (ddi_get_soft_iblock_cookie(vds->dip, DDI_SOFTINT_MED,
36471ae08745Sheppo 	    &iblock) != DDI_SUCCESS) {
36481ae08745Sheppo 		PRN("Could not get iblock cookie.");
36491ae08745Sheppo 		return (EIO);
36501ae08745Sheppo 	}
36511ae08745Sheppo 
36521ae08745Sheppo 	mutex_init(&vd->lock, NULL, MUTEX_DRIVER, iblock);
36531ae08745Sheppo 	vd->initialized |= VD_LOCKING;
36541ae08745Sheppo 
36551ae08745Sheppo 
3656d10e4ef2Snarayan 	/* Create start and completion task queues for the vdisk */
3657d10e4ef2Snarayan 	(void) snprintf(tq_name, sizeof (tq_name), "vd_startq%lu", id);
36581ae08745Sheppo 	PR1("tq_name = %s", tq_name);
3659d10e4ef2Snarayan 	if ((vd->startq = ddi_taskq_create(vds->dip, tq_name, 1,
36601ae08745Sheppo 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
36611ae08745Sheppo 		PRN("Could not create task queue");
36621ae08745Sheppo 		return (EIO);
36631ae08745Sheppo 	}
3664d10e4ef2Snarayan 	(void) snprintf(tq_name, sizeof (tq_name), "vd_completionq%lu", id);
3665d10e4ef2Snarayan 	PR1("tq_name = %s", tq_name);
3666d10e4ef2Snarayan 	if ((vd->completionq = ddi_taskq_create(vds->dip, tq_name, 1,
3667d10e4ef2Snarayan 	    TASKQ_DEFAULTPRI, 0)) == NULL) {
3668d10e4ef2Snarayan 		PRN("Could not create task queue");
3669d10e4ef2Snarayan 		return (EIO);
3670d10e4ef2Snarayan 	}
3671d10e4ef2Snarayan 	vd->enabled = 1;	/* before callback can dispatch to startq */
36721ae08745Sheppo 
36731ae08745Sheppo 
36741ae08745Sheppo 	/* Bring up LDC */
36751ae08745Sheppo 	ldc_attr.devclass	= LDC_DEV_BLK_SVC;
36761ae08745Sheppo 	ldc_attr.instance	= ddi_get_instance(vds->dip);
36771ae08745Sheppo 	ldc_attr.mode		= LDC_MODE_UNRELIABLE;
3678e1ebb9ecSlm66018 	ldc_attr.mtu		= VD_LDC_MTU;
36791ae08745Sheppo 	if ((status = ldc_init(ldc_id, &ldc_attr, &vd->ldc_handle)) != 0) {
3680690555a1Sachartre 		PRN("Could not initialize LDC channel %lu, "
3681690555a1Sachartre 		    "init failed with error %d", ldc_id, status);
36821ae08745Sheppo 		return (status);
36831ae08745Sheppo 	}
36841ae08745Sheppo 	vd->initialized |= VD_LDC;
36851ae08745Sheppo 
36861ae08745Sheppo 	if ((status = ldc_reg_callback(vd->ldc_handle, vd_handle_ldc_events,
36871ae08745Sheppo 	    (caddr_t)vd)) != 0) {
3688690555a1Sachartre 		PRN("Could not initialize LDC channel %lu,"
3689690555a1Sachartre 		    "reg_callback failed with error %d", ldc_id, status);
36901ae08745Sheppo 		return (status);
36911ae08745Sheppo 	}
36921ae08745Sheppo 
36931ae08745Sheppo 	if ((status = ldc_open(vd->ldc_handle)) != 0) {
3694690555a1Sachartre 		PRN("Could not initialize LDC channel %lu,"
3695690555a1Sachartre 		    "open failed with error %d", ldc_id, status);
36961ae08745Sheppo 		return (status);
36971ae08745Sheppo 	}
36981ae08745Sheppo 
36993af08d82Slm66018 	if ((status = ldc_up(vd->ldc_handle)) != 0) {
370034683adeSsg70180 		PR0("ldc_up() returned errno %d", status);
37013af08d82Slm66018 	}
37023af08d82Slm66018 
37034bac2208Snarayan 	/* Allocate the inband task memory handle */
37044bac2208Snarayan 	status = ldc_mem_alloc_handle(vd->ldc_handle, &(vd->inband_task.mhdl));
37054bac2208Snarayan 	if (status) {
3706690555a1Sachartre 		PRN("Could not initialize LDC channel %lu,"
3707690555a1Sachartre 		    "alloc_handle failed with error %d", ldc_id, status);
37084bac2208Snarayan 		return (ENXIO);
37094bac2208Snarayan 	}
37101ae08745Sheppo 
37111ae08745Sheppo 	/* Add the successfully-initialized vdisk to the server's table */
37121ae08745Sheppo 	if (mod_hash_insert(vds->vd_table, (mod_hash_key_t)id, vd) != 0) {
37131ae08745Sheppo 		PRN("Error adding vdisk ID %lu to table", id);
37141ae08745Sheppo 		return (EIO);
37151ae08745Sheppo 	}
37161ae08745Sheppo 
37173af08d82Slm66018 	/* Allocate the staging buffer */
37183af08d82Slm66018 	vd->max_msglen	= sizeof (vio_msg_t);	/* baseline vio message size */
37193af08d82Slm66018 	vd->vio_msgp = kmem_alloc(vd->max_msglen, KM_SLEEP);
37203af08d82Slm66018 
37213af08d82Slm66018 	/* store initial state */
37223af08d82Slm66018 	vd->state = VD_STATE_INIT;
37233af08d82Slm66018 
37241ae08745Sheppo 	return (0);
37251ae08745Sheppo }
37261ae08745Sheppo 
37273af08d82Slm66018 static void
37283af08d82Slm66018 vd_free_dring_task(vd_t *vdp)
37293af08d82Slm66018 {
37303af08d82Slm66018 	if (vdp->dring_task != NULL) {
37313af08d82Slm66018 		ASSERT(vdp->dring_len != 0);
37323af08d82Slm66018 		/* Free all dring_task memory handles */
37333af08d82Slm66018 		for (int i = 0; i < vdp->dring_len; i++) {
37343af08d82Slm66018 			(void) ldc_mem_free_handle(vdp->dring_task[i].mhdl);
37353af08d82Slm66018 			kmem_free(vdp->dring_task[i].msg, vdp->max_msglen);
37363af08d82Slm66018 			vdp->dring_task[i].msg = NULL;
37373af08d82Slm66018 		}
37383af08d82Slm66018 		kmem_free(vdp->dring_task,
37393af08d82Slm66018 		    (sizeof (*vdp->dring_task)) * vdp->dring_len);
37403af08d82Slm66018 		vdp->dring_task = NULL;
37413af08d82Slm66018 	}
37423af08d82Slm66018 }
37433af08d82Slm66018 
37441ae08745Sheppo /*
37451ae08745Sheppo  * Destroy the state associated with a virtual disk
37461ae08745Sheppo  */
37471ae08745Sheppo static void
37481ae08745Sheppo vds_destroy_vd(void *arg)
37491ae08745Sheppo {
37501ae08745Sheppo 	vd_t	*vd = (vd_t *)arg;
375134683adeSsg70180 	int	retry = 0, rv;
37521ae08745Sheppo 
37531ae08745Sheppo 	if (vd == NULL)
37541ae08745Sheppo 		return;
37551ae08745Sheppo 
3756d10e4ef2Snarayan 	PR0("Destroying vdisk state");
3757d10e4ef2Snarayan 
37584bac2208Snarayan 	if (vd->dk_efi.dki_data != NULL)
37594bac2208Snarayan 		kmem_free(vd->dk_efi.dki_data, vd->dk_efi.dki_length);
37604bac2208Snarayan 
37611ae08745Sheppo 	/* Disable queuing requests for the vdisk */
37621ae08745Sheppo 	if (vd->initialized & VD_LOCKING) {
37631ae08745Sheppo 		mutex_enter(&vd->lock);
37641ae08745Sheppo 		vd->enabled = 0;
37651ae08745Sheppo 		mutex_exit(&vd->lock);
37661ae08745Sheppo 	}
37671ae08745Sheppo 
3768d10e4ef2Snarayan 	/* Drain and destroy start queue (*before* destroying completionq) */
3769d10e4ef2Snarayan 	if (vd->startq != NULL)
3770d10e4ef2Snarayan 		ddi_taskq_destroy(vd->startq);	/* waits for queued tasks */
3771d10e4ef2Snarayan 
3772d10e4ef2Snarayan 	/* Drain and destroy completion queue (*before* shutting down LDC) */
3773d10e4ef2Snarayan 	if (vd->completionq != NULL)
3774d10e4ef2Snarayan 		ddi_taskq_destroy(vd->completionq);	/* waits for tasks */
3775d10e4ef2Snarayan 
37763af08d82Slm66018 	vd_free_dring_task(vd);
37773af08d82Slm66018 
377834683adeSsg70180 	/* Free the inband task memory handle */
377934683adeSsg70180 	(void) ldc_mem_free_handle(vd->inband_task.mhdl);
378034683adeSsg70180 
378134683adeSsg70180 	/* Shut down LDC */
378234683adeSsg70180 	if (vd->initialized & VD_LDC) {
378334683adeSsg70180 		/* unmap the dring */
378434683adeSsg70180 		if (vd->initialized & VD_DRING)
378534683adeSsg70180 			(void) ldc_mem_dring_unmap(vd->dring_handle);
378634683adeSsg70180 
378734683adeSsg70180 		/* close LDC channel - retry on EAGAIN */
378834683adeSsg70180 		while ((rv = ldc_close(vd->ldc_handle)) == EAGAIN) {
378934683adeSsg70180 			if (++retry > vds_ldc_retries) {
379034683adeSsg70180 				PR0("Timed out closing channel");
379134683adeSsg70180 				break;
379234683adeSsg70180 			}
379334683adeSsg70180 			drv_usecwait(vds_ldc_delay);
379434683adeSsg70180 		}
379534683adeSsg70180 		if (rv == 0) {
379634683adeSsg70180 			(void) ldc_unreg_callback(vd->ldc_handle);
379734683adeSsg70180 			(void) ldc_fini(vd->ldc_handle);
379834683adeSsg70180 		} else {
379934683adeSsg70180 			/*
380034683adeSsg70180 			 * Closing the LDC channel has failed. Ideally we should
380134683adeSsg70180 			 * fail here but there is no Zeus level infrastructure
380234683adeSsg70180 			 * to handle this. The MD has already been changed and
380334683adeSsg70180 			 * we have to do the close. So we try to do as much
380434683adeSsg70180 			 * clean up as we can.
380534683adeSsg70180 			 */
380634683adeSsg70180 			(void) ldc_set_cb_mode(vd->ldc_handle, LDC_CB_DISABLE);
380734683adeSsg70180 			while (ldc_unreg_callback(vd->ldc_handle) == EAGAIN)
380834683adeSsg70180 				drv_usecwait(vds_ldc_delay);
380934683adeSsg70180 		}
381034683adeSsg70180 	}
381134683adeSsg70180 
38123af08d82Slm66018 	/* Free the staging buffer for msgs */
38133af08d82Slm66018 	if (vd->vio_msgp != NULL) {
38143af08d82Slm66018 		kmem_free(vd->vio_msgp, vd->max_msglen);
38153af08d82Slm66018 		vd->vio_msgp = NULL;
38163af08d82Slm66018 	}
38173af08d82Slm66018 
38183af08d82Slm66018 	/* Free the inband message buffer */
38193af08d82Slm66018 	if (vd->inband_task.msg != NULL) {
38203af08d82Slm66018 		kmem_free(vd->inband_task.msg, vd->max_msglen);
38213af08d82Slm66018 		vd->inband_task.msg = NULL;
3822d10e4ef2Snarayan 	}
38233c96341aSnarayan 	if (vd->file) {
3824690555a1Sachartre 		/* Close file */
38253c96341aSnarayan 		(void) VOP_CLOSE(vd->file_vnode, vd_open_flags, 1,
38263c96341aSnarayan 		    0, kcred);
38273c96341aSnarayan 		VN_RELE(vd->file_vnode);
3828*87a7269eSachartre 		if (vd->file_devid != NULL)
3829*87a7269eSachartre 			ddi_devid_free(vd->file_devid);
38303c96341aSnarayan 	} else {
38311ae08745Sheppo 		/* Close any open backing-device slices */
38321ae08745Sheppo 		for (uint_t slice = 0; slice < vd->nslices; slice++) {
38331ae08745Sheppo 			if (vd->ldi_handle[slice] != NULL) {
38341ae08745Sheppo 				PR0("Closing slice %u", slice);
38351ae08745Sheppo 				(void) ldi_close(vd->ldi_handle[slice],
38364bac2208Snarayan 				    vd_open_flags | FNDELAY, kcred);
38371ae08745Sheppo 			}
38381ae08745Sheppo 		}
38393c96341aSnarayan 	}
38401ae08745Sheppo 
38411ae08745Sheppo 	/* Free lock */
38421ae08745Sheppo 	if (vd->initialized & VD_LOCKING)
38431ae08745Sheppo 		mutex_destroy(&vd->lock);
38441ae08745Sheppo 
38451ae08745Sheppo 	/* Finally, free the vdisk structure itself */
38461ae08745Sheppo 	kmem_free(vd, sizeof (*vd));
38471ae08745Sheppo }
38481ae08745Sheppo 
38491ae08745Sheppo static int
3850e1ebb9ecSlm66018 vds_init_vd(vds_t *vds, uint64_t id, char *device_path, uint64_t ldc_id)
38511ae08745Sheppo {
38521ae08745Sheppo 	int	status;
38531ae08745Sheppo 	vd_t	*vd = NULL;
38541ae08745Sheppo 
38551ae08745Sheppo 
3856e1ebb9ecSlm66018 	if ((status = vds_do_init_vd(vds, id, device_path, ldc_id, &vd)) != 0)
38571ae08745Sheppo 		vds_destroy_vd(vd);
38581ae08745Sheppo 
38591ae08745Sheppo 	return (status);
38601ae08745Sheppo }
38611ae08745Sheppo 
38621ae08745Sheppo static int
38631ae08745Sheppo vds_do_get_ldc_id(md_t *md, mde_cookie_t vd_node, mde_cookie_t *channel,
38641ae08745Sheppo     uint64_t *ldc_id)
38651ae08745Sheppo {
38661ae08745Sheppo 	int	num_channels;
38671ae08745Sheppo 
38681ae08745Sheppo 
38691ae08745Sheppo 	/* Look for channel endpoint child(ren) of the vdisk MD node */
38701ae08745Sheppo 	if ((num_channels = md_scan_dag(md, vd_node,
38711ae08745Sheppo 	    md_find_name(md, VD_CHANNEL_ENDPOINT),
38721ae08745Sheppo 	    md_find_name(md, "fwd"), channel)) <= 0) {
38731ae08745Sheppo 		PRN("No \"%s\" found for virtual disk", VD_CHANNEL_ENDPOINT);
38741ae08745Sheppo 		return (-1);
38751ae08745Sheppo 	}
38761ae08745Sheppo 
38771ae08745Sheppo 	/* Get the "id" value for the first channel endpoint node */
38781ae08745Sheppo 	if (md_get_prop_val(md, channel[0], VD_ID_PROP, ldc_id) != 0) {
38791ae08745Sheppo 		PRN("No \"%s\" property found for \"%s\" of vdisk",
38801ae08745Sheppo 		    VD_ID_PROP, VD_CHANNEL_ENDPOINT);
38811ae08745Sheppo 		return (-1);
38821ae08745Sheppo 	}
38831ae08745Sheppo 
38841ae08745Sheppo 	if (num_channels > 1) {
38851ae08745Sheppo 		PRN("Using ID of first of multiple channels for this vdisk");
38861ae08745Sheppo 	}
38871ae08745Sheppo 
38881ae08745Sheppo 	return (0);
38891ae08745Sheppo }
38901ae08745Sheppo 
38911ae08745Sheppo static int
38921ae08745Sheppo vds_get_ldc_id(md_t *md, mde_cookie_t vd_node, uint64_t *ldc_id)
38931ae08745Sheppo {
38941ae08745Sheppo 	int		num_nodes, status;
38951ae08745Sheppo 	size_t		size;
38961ae08745Sheppo 	mde_cookie_t	*channel;
38971ae08745Sheppo 
38981ae08745Sheppo 
38991ae08745Sheppo 	if ((num_nodes = md_node_count(md)) <= 0) {
39001ae08745Sheppo 		PRN("Invalid node count in Machine Description subtree");
39011ae08745Sheppo 		return (-1);
39021ae08745Sheppo 	}
39031ae08745Sheppo 	size = num_nodes*(sizeof (*channel));
39041ae08745Sheppo 	channel = kmem_zalloc(size, KM_SLEEP);
39051ae08745Sheppo 	status = vds_do_get_ldc_id(md, vd_node, channel, ldc_id);
39061ae08745Sheppo 	kmem_free(channel, size);
39071ae08745Sheppo 
39081ae08745Sheppo 	return (status);
39091ae08745Sheppo }
39101ae08745Sheppo 
39111ae08745Sheppo static void
39121ae08745Sheppo vds_add_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node)
39131ae08745Sheppo {
3914e1ebb9ecSlm66018 	char		*device_path = NULL;
39151ae08745Sheppo 	uint64_t	id = 0, ldc_id = 0;
39161ae08745Sheppo 
39171ae08745Sheppo 
39181ae08745Sheppo 	if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) {
39191ae08745Sheppo 		PRN("Error getting vdisk \"%s\"", VD_ID_PROP);
39201ae08745Sheppo 		return;
39211ae08745Sheppo 	}
39221ae08745Sheppo 	PR0("Adding vdisk ID %lu", id);
39231ae08745Sheppo 	if (md_get_prop_str(md, vd_node, VD_BLOCK_DEVICE_PROP,
3924e1ebb9ecSlm66018 	    &device_path) != 0) {
39251ae08745Sheppo 		PRN("Error getting vdisk \"%s\"", VD_BLOCK_DEVICE_PROP);
39261ae08745Sheppo 		return;
39271ae08745Sheppo 	}
39281ae08745Sheppo 
39291ae08745Sheppo 	if (vds_get_ldc_id(md, vd_node, &ldc_id) != 0) {
39301ae08745Sheppo 		PRN("Error getting LDC ID for vdisk %lu", id);
39311ae08745Sheppo 		return;
39321ae08745Sheppo 	}
39331ae08745Sheppo 
3934e1ebb9ecSlm66018 	if (vds_init_vd(vds, id, device_path, ldc_id) != 0) {
39351ae08745Sheppo 		PRN("Failed to add vdisk ID %lu", id);
39361ae08745Sheppo 		return;
39371ae08745Sheppo 	}
39381ae08745Sheppo }
39391ae08745Sheppo 
39401ae08745Sheppo static void
39411ae08745Sheppo vds_remove_vd(vds_t *vds, md_t *md, mde_cookie_t vd_node)
39421ae08745Sheppo {
39431ae08745Sheppo 	uint64_t	id = 0;
39441ae08745Sheppo 
39451ae08745Sheppo 
39461ae08745Sheppo 	if (md_get_prop_val(md, vd_node, VD_ID_PROP, &id) != 0) {
39471ae08745Sheppo 		PRN("Unable to get \"%s\" property from vdisk's MD node",
39481ae08745Sheppo 		    VD_ID_PROP);
39491ae08745Sheppo 		return;
39501ae08745Sheppo 	}
39511ae08745Sheppo 	PR0("Removing vdisk ID %lu", id);
39521ae08745Sheppo 	if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)id) != 0)
39531ae08745Sheppo 		PRN("No vdisk entry found for vdisk ID %lu", id);
39541ae08745Sheppo }
39551ae08745Sheppo 
39561ae08745Sheppo static void
39571ae08745Sheppo vds_change_vd(vds_t *vds, md_t *prev_md, mde_cookie_t prev_vd_node,
39581ae08745Sheppo     md_t *curr_md, mde_cookie_t curr_vd_node)
39591ae08745Sheppo {
39601ae08745Sheppo 	char		*curr_dev, *prev_dev;
39611ae08745Sheppo 	uint64_t	curr_id = 0, curr_ldc_id = 0;
39621ae08745Sheppo 	uint64_t	prev_id = 0, prev_ldc_id = 0;
39631ae08745Sheppo 	size_t		len;
39641ae08745Sheppo 
39651ae08745Sheppo 
39661ae08745Sheppo 	/* Validate that vdisk ID has not changed */
39671ae08745Sheppo 	if (md_get_prop_val(prev_md, prev_vd_node, VD_ID_PROP, &prev_id) != 0) {
39681ae08745Sheppo 		PRN("Error getting previous vdisk \"%s\" property",
39691ae08745Sheppo 		    VD_ID_PROP);
39701ae08745Sheppo 		return;
39711ae08745Sheppo 	}
39721ae08745Sheppo 	if (md_get_prop_val(curr_md, curr_vd_node, VD_ID_PROP, &curr_id) != 0) {
39731ae08745Sheppo 		PRN("Error getting current vdisk \"%s\" property", VD_ID_PROP);
39741ae08745Sheppo 		return;
39751ae08745Sheppo 	}
39761ae08745Sheppo 	if (curr_id != prev_id) {
39771ae08745Sheppo 		PRN("Not changing vdisk:  ID changed from %lu to %lu",
39781ae08745Sheppo 		    prev_id, curr_id);
39791ae08745Sheppo 		return;
39801ae08745Sheppo 	}
39811ae08745Sheppo 
39821ae08745Sheppo 	/* Validate that LDC ID has not changed */
39831ae08745Sheppo 	if (vds_get_ldc_id(prev_md, prev_vd_node, &prev_ldc_id) != 0) {
39841ae08745Sheppo 		PRN("Error getting LDC ID for vdisk %lu", prev_id);
39851ae08745Sheppo 		return;
39861ae08745Sheppo 	}
39871ae08745Sheppo 
39881ae08745Sheppo 	if (vds_get_ldc_id(curr_md, curr_vd_node, &curr_ldc_id) != 0) {
39891ae08745Sheppo 		PRN("Error getting LDC ID for vdisk %lu", curr_id);
39901ae08745Sheppo 		return;
39911ae08745Sheppo 	}
39921ae08745Sheppo 	if (curr_ldc_id != prev_ldc_id) {
39930a55fbb7Slm66018 		_NOTE(NOTREACHED);	/* lint is confused */
39941ae08745Sheppo 		PRN("Not changing vdisk:  "
39951ae08745Sheppo 		    "LDC ID changed from %lu to %lu", prev_ldc_id, curr_ldc_id);
39961ae08745Sheppo 		return;
39971ae08745Sheppo 	}
39981ae08745Sheppo 
39991ae08745Sheppo 	/* Determine whether device path has changed */
40001ae08745Sheppo 	if (md_get_prop_str(prev_md, prev_vd_node, VD_BLOCK_DEVICE_PROP,
40011ae08745Sheppo 	    &prev_dev) != 0) {
40021ae08745Sheppo 		PRN("Error getting previous vdisk \"%s\"",
40031ae08745Sheppo 		    VD_BLOCK_DEVICE_PROP);
40041ae08745Sheppo 		return;
40051ae08745Sheppo 	}
40061ae08745Sheppo 	if (md_get_prop_str(curr_md, curr_vd_node, VD_BLOCK_DEVICE_PROP,
40071ae08745Sheppo 	    &curr_dev) != 0) {
40081ae08745Sheppo 		PRN("Error getting current vdisk \"%s\"", VD_BLOCK_DEVICE_PROP);
40091ae08745Sheppo 		return;
40101ae08745Sheppo 	}
40111ae08745Sheppo 	if (((len = strlen(curr_dev)) == strlen(prev_dev)) &&
40121ae08745Sheppo 	    (strncmp(curr_dev, prev_dev, len) == 0))
40131ae08745Sheppo 		return;	/* no relevant (supported) change */
40141ae08745Sheppo 
40151ae08745Sheppo 	PR0("Changing vdisk ID %lu", prev_id);
40163af08d82Slm66018 
40171ae08745Sheppo 	/* Remove old state, which will close vdisk and reset */
40181ae08745Sheppo 	if (mod_hash_destroy(vds->vd_table, (mod_hash_key_t)prev_id) != 0)
40191ae08745Sheppo 		PRN("No entry found for vdisk ID %lu", prev_id);
40203af08d82Slm66018 
40211ae08745Sheppo 	/* Re-initialize vdisk with new state */
40221ae08745Sheppo 	if (vds_init_vd(vds, curr_id, curr_dev, curr_ldc_id) != 0) {
40231ae08745Sheppo 		PRN("Failed to change vdisk ID %lu", curr_id);
40241ae08745Sheppo 		return;
40251ae08745Sheppo 	}
40261ae08745Sheppo }
40271ae08745Sheppo 
40281ae08745Sheppo static int
40291ae08745Sheppo vds_process_md(void *arg, mdeg_result_t *md)
40301ae08745Sheppo {
40311ae08745Sheppo 	int	i;
40321ae08745Sheppo 	vds_t	*vds = arg;
40331ae08745Sheppo 
40341ae08745Sheppo 
40351ae08745Sheppo 	if (md == NULL)
40361ae08745Sheppo 		return (MDEG_FAILURE);
40371ae08745Sheppo 	ASSERT(vds != NULL);
40381ae08745Sheppo 
40391ae08745Sheppo 	for (i = 0; i < md->removed.nelem; i++)
40401ae08745Sheppo 		vds_remove_vd(vds, md->removed.mdp, md->removed.mdep[i]);
40411ae08745Sheppo 	for (i = 0; i < md->match_curr.nelem; i++)
40421ae08745Sheppo 		vds_change_vd(vds, md->match_prev.mdp, md->match_prev.mdep[i],
40431ae08745Sheppo 		    md->match_curr.mdp, md->match_curr.mdep[i]);
40441ae08745Sheppo 	for (i = 0; i < md->added.nelem; i++)
40451ae08745Sheppo 		vds_add_vd(vds, md->added.mdp, md->added.mdep[i]);
40461ae08745Sheppo 
40471ae08745Sheppo 	return (MDEG_SUCCESS);
40481ae08745Sheppo }
40491ae08745Sheppo 
40503c96341aSnarayan 
40511ae08745Sheppo static int
40521ae08745Sheppo vds_do_attach(dev_info_t *dip)
40531ae08745Sheppo {
4054445b4c2eSsb155480 	int			status, sz;
4055445b4c2eSsb155480 	int			cfg_handle;
40561ae08745Sheppo 	minor_t			instance = ddi_get_instance(dip);
40571ae08745Sheppo 	vds_t			*vds;
4058445b4c2eSsb155480 	mdeg_prop_spec_t	*pspecp;
4059445b4c2eSsb155480 	mdeg_node_spec_t	*ispecp;
40601ae08745Sheppo 
40611ae08745Sheppo 	/*
40621ae08745Sheppo 	 * The "cfg-handle" property of a vds node in an MD contains the MD's
40631ae08745Sheppo 	 * notion of "instance", or unique identifier, for that node; OBP
40641ae08745Sheppo 	 * stores the value of the "cfg-handle" MD property as the value of
40651ae08745Sheppo 	 * the "reg" property on the node in the device tree it builds from
40661ae08745Sheppo 	 * the MD and passes to Solaris.  Thus, we look up the devinfo node's
40671ae08745Sheppo 	 * "reg" property value to uniquely identify this device instance when
40681ae08745Sheppo 	 * registering with the MD event-generation framework.  If the "reg"
40691ae08745Sheppo 	 * property cannot be found, the device tree state is presumably so
40701ae08745Sheppo 	 * broken that there is no point in continuing.
40711ae08745Sheppo 	 */
4072445b4c2eSsb155480 	if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4073445b4c2eSsb155480 	    VD_REG_PROP)) {
4074445b4c2eSsb155480 		PRN("vds \"%s\" property does not exist", VD_REG_PROP);
40751ae08745Sheppo 		return (DDI_FAILURE);
40761ae08745Sheppo 	}
40771ae08745Sheppo 
40781ae08745Sheppo 	/* Get the MD instance for later MDEG registration */
40791ae08745Sheppo 	cfg_handle = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
4080445b4c2eSsb155480 	    VD_REG_PROP, -1);
40811ae08745Sheppo 
40821ae08745Sheppo 	if (ddi_soft_state_zalloc(vds_state, instance) != DDI_SUCCESS) {
40831ae08745Sheppo 		PRN("Could not allocate state for instance %u", instance);
40841ae08745Sheppo 		return (DDI_FAILURE);
40851ae08745Sheppo 	}
40861ae08745Sheppo 
40871ae08745Sheppo 	if ((vds = ddi_get_soft_state(vds_state, instance)) == NULL) {
40881ae08745Sheppo 		PRN("Could not get state for instance %u", instance);
40891ae08745Sheppo 		ddi_soft_state_free(vds_state, instance);
40901ae08745Sheppo 		return (DDI_FAILURE);
40911ae08745Sheppo 	}
40921ae08745Sheppo 
40931ae08745Sheppo 	vds->dip	= dip;
40941ae08745Sheppo 	vds->vd_table	= mod_hash_create_ptrhash("vds_vd_table", VDS_NCHAINS,
4095*87a7269eSachartre 	    vds_destroy_vd, sizeof (void *));
4096*87a7269eSachartre 
40971ae08745Sheppo 	ASSERT(vds->vd_table != NULL);
40981ae08745Sheppo 
40991ae08745Sheppo 	if ((status = ldi_ident_from_dip(dip, &vds->ldi_ident)) != 0) {
41001ae08745Sheppo 		PRN("ldi_ident_from_dip() returned errno %d", status);
41011ae08745Sheppo 		return (DDI_FAILURE);
41021ae08745Sheppo 	}
41031ae08745Sheppo 	vds->initialized |= VDS_LDI;
41041ae08745Sheppo 
41051ae08745Sheppo 	/* Register for MD updates */
4106445b4c2eSsb155480 	sz = sizeof (vds_prop_template);
4107445b4c2eSsb155480 	pspecp = kmem_alloc(sz, KM_SLEEP);
4108445b4c2eSsb155480 	bcopy(vds_prop_template, pspecp, sz);
4109445b4c2eSsb155480 
4110445b4c2eSsb155480 	VDS_SET_MDEG_PROP_INST(pspecp, cfg_handle);
4111445b4c2eSsb155480 
4112445b4c2eSsb155480 	/* initialize the complete prop spec structure */
4113445b4c2eSsb155480 	ispecp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
4114445b4c2eSsb155480 	ispecp->namep = "virtual-device";
4115445b4c2eSsb155480 	ispecp->specp = pspecp;
4116445b4c2eSsb155480 
4117445b4c2eSsb155480 	if (mdeg_register(ispecp, &vd_match, vds_process_md, vds,
41181ae08745Sheppo 	    &vds->mdeg) != MDEG_SUCCESS) {
41191ae08745Sheppo 		PRN("Unable to register for MD updates");
4120445b4c2eSsb155480 		kmem_free(ispecp, sizeof (mdeg_node_spec_t));
4121445b4c2eSsb155480 		kmem_free(pspecp, sz);
41221ae08745Sheppo 		return (DDI_FAILURE);
41231ae08745Sheppo 	}
4124445b4c2eSsb155480 
4125445b4c2eSsb155480 	vds->ispecp = ispecp;
41261ae08745Sheppo 	vds->initialized |= VDS_MDEG;
41271ae08745Sheppo 
41280a55fbb7Slm66018 	/* Prevent auto-detaching so driver is available whenever MD changes */
41290a55fbb7Slm66018 	if (ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1) !=
41300a55fbb7Slm66018 	    DDI_PROP_SUCCESS) {
41310a55fbb7Slm66018 		PRN("failed to set \"%s\" property for instance %u",
41320a55fbb7Slm66018 		    DDI_NO_AUTODETACH, instance);
41330a55fbb7Slm66018 	}
41340a55fbb7Slm66018 
41351ae08745Sheppo 	ddi_report_dev(dip);
41361ae08745Sheppo 	return (DDI_SUCCESS);
41371ae08745Sheppo }
41381ae08745Sheppo 
41391ae08745Sheppo static int
41401ae08745Sheppo vds_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
41411ae08745Sheppo {
41421ae08745Sheppo 	int	status;
41431ae08745Sheppo 
41441ae08745Sheppo 	switch (cmd) {
41451ae08745Sheppo 	case DDI_ATTACH:
4146d10e4ef2Snarayan 		PR0("Attaching");
41471ae08745Sheppo 		if ((status = vds_do_attach(dip)) != DDI_SUCCESS)
41481ae08745Sheppo 			(void) vds_detach(dip, DDI_DETACH);
41491ae08745Sheppo 		return (status);
41501ae08745Sheppo 	case DDI_RESUME:
4151d10e4ef2Snarayan 		PR0("No action required for DDI_RESUME");
41521ae08745Sheppo 		return (DDI_SUCCESS);
41531ae08745Sheppo 	default:
41541ae08745Sheppo 		return (DDI_FAILURE);
41551ae08745Sheppo 	}
41561ae08745Sheppo }
41571ae08745Sheppo 
41581ae08745Sheppo static struct dev_ops vds_ops = {
41591ae08745Sheppo 	DEVO_REV,	/* devo_rev */
41601ae08745Sheppo 	0,		/* devo_refcnt */
41611ae08745Sheppo 	ddi_no_info,	/* devo_getinfo */
41621ae08745Sheppo 	nulldev,	/* devo_identify */
41631ae08745Sheppo 	nulldev,	/* devo_probe */
41641ae08745Sheppo 	vds_attach,	/* devo_attach */
41651ae08745Sheppo 	vds_detach,	/* devo_detach */
41661ae08745Sheppo 	nodev,		/* devo_reset */
41671ae08745Sheppo 	NULL,		/* devo_cb_ops */
41681ae08745Sheppo 	NULL,		/* devo_bus_ops */
41691ae08745Sheppo 	nulldev		/* devo_power */
41701ae08745Sheppo };
41711ae08745Sheppo 
41721ae08745Sheppo static struct modldrv modldrv = {
41731ae08745Sheppo 	&mod_driverops,
41741ae08745Sheppo 	"virtual disk server v%I%",
41751ae08745Sheppo 	&vds_ops,
41761ae08745Sheppo };
41771ae08745Sheppo 
41781ae08745Sheppo static struct modlinkage modlinkage = {
41791ae08745Sheppo 	MODREV_1,
41801ae08745Sheppo 	&modldrv,
41811ae08745Sheppo 	NULL
41821ae08745Sheppo };
41831ae08745Sheppo 
41841ae08745Sheppo 
41851ae08745Sheppo int
41861ae08745Sheppo _init(void)
41871ae08745Sheppo {
41881ae08745Sheppo 	int		i, status;
41891ae08745Sheppo 
4190d10e4ef2Snarayan 
41911ae08745Sheppo 	if ((status = ddi_soft_state_init(&vds_state, sizeof (vds_t), 1)) != 0)
41921ae08745Sheppo 		return (status);
41931ae08745Sheppo 	if ((status = mod_install(&modlinkage)) != 0) {
41941ae08745Sheppo 		ddi_soft_state_fini(&vds_state);
41951ae08745Sheppo 		return (status);
41961ae08745Sheppo 	}
41971ae08745Sheppo 
41981ae08745Sheppo 	/* Fill in the bit-mask of server-supported operations */
41991ae08745Sheppo 	for (i = 0; i < vds_noperations; i++)
42001ae08745Sheppo 		vds_operations |= 1 << (vds_operation[i].operation - 1);
42011ae08745Sheppo 
42021ae08745Sheppo 	return (0);
42031ae08745Sheppo }
42041ae08745Sheppo 
42051ae08745Sheppo int
42061ae08745Sheppo _info(struct modinfo *modinfop)
42071ae08745Sheppo {
42081ae08745Sheppo 	return (mod_info(&modlinkage, modinfop));
42091ae08745Sheppo }
42101ae08745Sheppo 
42111ae08745Sheppo int
42121ae08745Sheppo _fini(void)
42131ae08745Sheppo {
42141ae08745Sheppo 	int	status;
42151ae08745Sheppo 
4216d10e4ef2Snarayan 
42171ae08745Sheppo 	if ((status = mod_remove(&modlinkage)) != 0)
42181ae08745Sheppo 		return (status);
42191ae08745Sheppo 	ddi_soft_state_fini(&vds_state);
42201ae08745Sheppo 	return (0);
42211ae08745Sheppo }
4222