xref: /titanic_44/usr/src/uts/sun4v/io/vdc.c (revision 39c23413b8df94a95f67b34cfd4a4dfc3fd0b48d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * LDoms virtual disk client (vdc) device driver
31  *
32  * This driver runs on a guest logical domain and communicates with the virtual
33  * disk server (vds) driver running on the service domain which is exporting
34  * virtualized "disks" to the guest logical domain.
35  *
36  * The driver can be divided into four sections:
37  *
38  * 1) generic device driver housekeeping
39  *	_init, _fini, attach, detach, ops structures, etc.
40  *
41  * 2) communication channel setup
42  *	Setup the communications link over the LDC channel that vdc uses to
43  *	talk to the vDisk server. Initialise the descriptor ring which
44  *	allows the LDC clients to transfer data via memory mappings.
45  *
46  * 3) Support exported to upper layers (filesystems, etc)
47  *	The upper layers call into vdc via strategy(9E) and DKIO(7I)
48  *	ioctl calls. vdc will copy the data to be written to the descriptor
49  *	ring or maps the buffer to store the data read by the vDisk
50  *	server into the descriptor ring. It then sends a message to the
51  *	vDisk server requesting it to complete the operation.
52  *
53  * 4) Handling responses from vDisk server.
54  *	The vDisk server will ACK some or all of the messages vdc sends to it
55  *	(this is configured during the handshake). Upon receipt of an ACK
56  *	vdc will check the descriptor ring and signal to the upper layer
57  *	code waiting on the IO.
58  */
59 
60 #include <sys/atomic.h>
61 #include <sys/conf.h>
62 #include <sys/disp.h>
63 #include <sys/ddi.h>
64 #include <sys/dkio.h>
65 #include <sys/efi_partition.h>
66 #include <sys/fcntl.h>
67 #include <sys/file.h>
68 #include <sys/mach_descrip.h>
69 #include <sys/modctl.h>
70 #include <sys/mdeg.h>
71 #include <sys/note.h>
72 #include <sys/open.h>
73 #include <sys/sdt.h>
74 #include <sys/stat.h>
75 #include <sys/sunddi.h>
76 #include <sys/types.h>
77 #include <sys/promif.h>
78 #include <sys/vtoc.h>
79 #include <sys/archsystm.h>
80 #include <sys/sysmacros.h>
81 
82 #include <sys/cdio.h>
83 #include <sys/dktp/fdisk.h>
84 #include <sys/scsi/generic/sense.h>
85 #include <sys/scsi/impl/uscsi.h>	/* Needed for defn of USCSICMD ioctl */
86 
87 #include <sys/ldoms.h>
88 #include <sys/ldc.h>
89 #include <sys/vio_common.h>
90 #include <sys/vio_mailbox.h>
91 #include <sys/vdsk_common.h>
92 #include <sys/vdsk_mailbox.h>
93 #include <sys/vdc.h>
94 
95 /*
96  * function prototypes
97  */
98 
99 /* standard driver functions */
100 static int	vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred);
101 static int	vdc_close(dev_t dev, int flag, int otyp, cred_t *cred);
102 static int	vdc_strategy(struct buf *buf);
103 static int	vdc_print(dev_t dev, char *str);
104 static int	vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
105 static int	vdc_read(dev_t dev, struct uio *uio, cred_t *cred);
106 static int	vdc_write(dev_t dev, struct uio *uio, cred_t *cred);
107 static int	vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
108 			cred_t *credp, int *rvalp);
109 static int	vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred);
110 static int	vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred);
111 
112 static int	vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,
113 			void *arg, void **resultp);
114 static int	vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
115 static int	vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
116 
117 /* setup */
118 static void	vdc_min(struct buf *bufp);
119 static int	vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen);
120 static int	vdc_do_ldc_init(vdc_t *vdc);
121 static int	vdc_start_ldc_connection(vdc_t *vdc);
122 static int	vdc_create_device_nodes(vdc_t *vdc);
123 static int	vdc_create_device_nodes_efi(vdc_t *vdc);
124 static int	vdc_create_device_nodes_vtoc(vdc_t *vdc);
125 static int	vdc_create_device_nodes_props(vdc_t *vdc);
126 static int	vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id);
127 static int	vdc_do_ldc_up(vdc_t *vdc);
128 static void	vdc_terminate_ldc(vdc_t *vdc);
129 static int	vdc_init_descriptor_ring(vdc_t *vdc);
130 static void	vdc_destroy_descriptor_ring(vdc_t *vdc);
131 static int	vdc_setup_devid(vdc_t *vdc);
132 static void	vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi);
133 
134 /* handshake with vds */
135 static int		vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver);
136 static int		vdc_ver_negotiation(vdc_t *vdcp);
137 static int		vdc_init_attr_negotiation(vdc_t *vdc);
138 static int		vdc_attr_negotiation(vdc_t *vdcp);
139 static int		vdc_init_dring_negotiate(vdc_t *vdc);
140 static int		vdc_dring_negotiation(vdc_t *vdcp);
141 static int		vdc_send_rdx(vdc_t *vdcp);
142 static int		vdc_rdx_exchange(vdc_t *vdcp);
143 static boolean_t	vdc_is_supported_version(vio_ver_msg_t *ver_msg);
144 
145 /* processing incoming messages from vDisk server */
146 static void	vdc_process_msg_thread(vdc_t *vdc);
147 static int	vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp);
148 
149 static uint_t	vdc_handle_cb(uint64_t event, caddr_t arg);
150 static int	vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg);
151 static int	vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg);
152 static int	vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg);
153 static int	vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg);
154 static int	vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg);
155 static int 	vdc_send_request(vdc_t *vdcp, int operation,
156 		    caddr_t addr, size_t nbytes, int slice, diskaddr_t offset,
157 		    int cb_type, void *cb_arg, vio_desc_direction_t dir);
158 static int	vdc_map_to_shared_dring(vdc_t *vdcp, int idx);
159 static int 	vdc_populate_descriptor(vdc_t *vdcp, int operation,
160 		    caddr_t addr, size_t nbytes, int slice, diskaddr_t offset,
161 		    int cb_type, void *cb_arg, vio_desc_direction_t dir);
162 static int 	vdc_do_sync_op(vdc_t *vdcp, int operation,
163 		    caddr_t addr, size_t nbytes, int slice, diskaddr_t offset,
164 		    int cb_type, void *cb_arg, vio_desc_direction_t dir);
165 
166 static int	vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp);
167 static int	vdc_drain_response(vdc_t *vdcp);
168 static int	vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx);
169 static int	vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep);
170 static int	vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg);
171 
172 /* dkio */
173 static int	vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode);
174 static int	vdc_create_fake_geometry(vdc_t *vdc);
175 static int	vdc_setup_disk_layout(vdc_t *vdc);
176 static int	vdc_null_copy_func(vdc_t *vdc, void *from, void *to,
177 		    int mode, int dir);
178 static int	vdc_get_wce_convert(vdc_t *vdc, void *from, void *to,
179 		    int mode, int dir);
180 static int	vdc_set_wce_convert(vdc_t *vdc, void *from, void *to,
181 		    int mode, int dir);
182 static int	vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to,
183 		    int mode, int dir);
184 static int	vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to,
185 		    int mode, int dir);
186 static int	vdc_get_geom_convert(vdc_t *vdc, void *from, void *to,
187 		    int mode, int dir);
188 static int	vdc_set_geom_convert(vdc_t *vdc, void *from, void *to,
189 		    int mode, int dir);
190 static int	vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to,
191 		    int mode, int dir);
192 static int	vdc_get_efi_convert(vdc_t *vdc, void *from, void *to,
193 		    int mode, int dir);
194 static int	vdc_set_efi_convert(vdc_t *vdc, void *from, void *to,
195 		    int mode, int dir);
196 
197 /*
198  * Module variables
199  */
200 
201 /*
202  * Tunable variables to control how long vdc waits before timing out on
203  * various operations
204  */
205 static int	vdc_retries = 10;
206 
207 /* calculated from 'vdc_usec_timeout' during attach */
208 static uint64_t	vdc_hz_timeout;				/* units: Hz */
209 static uint64_t	vdc_usec_timeout = 30 * MICROSEC;	/* 30s units: ns */
210 
211 static uint64_t vdc_hz_min_ldc_delay;
212 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC;
213 static uint64_t vdc_hz_max_ldc_delay;
214 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC;
215 
216 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC;
217 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC;
218 
219 /* values for dumping - need to run in a tighter loop */
220 static uint64_t	vdc_usec_timeout_dump = 100 * MILLISEC;	/* 0.1s units: ns */
221 static int	vdc_dump_retries = 100;
222 
223 /* Count of the number of vdc instances attached */
224 static volatile uint32_t	vdc_instance_count = 0;
225 
226 /* Soft state pointer */
227 static void	*vdc_state;
228 
229 /*
230  * Controlling the verbosity of the error/debug messages
231  *
232  * vdc_msglevel - controls level of messages
233  * vdc_matchinst - 64-bit variable where each bit corresponds
234  *                 to the vdc instance the vdc_msglevel applies.
235  */
236 int		vdc_msglevel = 0x0;
237 uint64_t	vdc_matchinst = 0ull;
238 
239 /*
240  * Supported vDisk protocol version pairs.
241  *
242  * The first array entry is the latest and preferred version.
243  */
244 static const vio_ver_t	vdc_version[] = {{1, 0}};
245 
246 static struct cb_ops vdc_cb_ops = {
247 	vdc_open,	/* cb_open */
248 	vdc_close,	/* cb_close */
249 	vdc_strategy,	/* cb_strategy */
250 	vdc_print,	/* cb_print */
251 	vdc_dump,	/* cb_dump */
252 	vdc_read,	/* cb_read */
253 	vdc_write,	/* cb_write */
254 	vdc_ioctl,	/* cb_ioctl */
255 	nodev,		/* cb_devmap */
256 	nodev,		/* cb_mmap */
257 	nodev,		/* cb_segmap */
258 	nochpoll,	/* cb_chpoll */
259 	ddi_prop_op,	/* cb_prop_op */
260 	NULL,		/* cb_str */
261 	D_MP | D_64BIT,	/* cb_flag */
262 	CB_REV,		/* cb_rev */
263 	vdc_aread,	/* cb_aread */
264 	vdc_awrite	/* cb_awrite */
265 };
266 
267 static struct dev_ops vdc_ops = {
268 	DEVO_REV,	/* devo_rev */
269 	0,		/* devo_refcnt */
270 	vdc_getinfo,	/* devo_getinfo */
271 	nulldev,	/* devo_identify */
272 	nulldev,	/* devo_probe */
273 	vdc_attach,	/* devo_attach */
274 	vdc_detach,	/* devo_detach */
275 	nodev,		/* devo_reset */
276 	&vdc_cb_ops,	/* devo_cb_ops */
277 	NULL,		/* devo_bus_ops */
278 	nulldev		/* devo_power */
279 };
280 
281 static struct modldrv modldrv = {
282 	&mod_driverops,
283 	"virtual disk client %I%",
284 	&vdc_ops,
285 };
286 
287 static struct modlinkage modlinkage = {
288 	MODREV_1,
289 	&modldrv,
290 	NULL
291 };
292 
293 /* -------------------------------------------------------------------------- */
294 
295 /*
296  * Device Driver housekeeping and setup
297  */
298 
299 int
300 _init(void)
301 {
302 	int	status;
303 
304 	if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0)
305 		return (status);
306 	if ((status = mod_install(&modlinkage)) != 0)
307 		ddi_soft_state_fini(&vdc_state);
308 	vdc_efi_init(vd_process_ioctl);
309 	return (status);
310 }
311 
312 int
313 _info(struct modinfo *modinfop)
314 {
315 	return (mod_info(&modlinkage, modinfop));
316 }
317 
318 int
319 _fini(void)
320 {
321 	int	status;
322 
323 	if ((status = mod_remove(&modlinkage)) != 0)
324 		return (status);
325 	vdc_efi_fini();
326 	ddi_soft_state_fini(&vdc_state);
327 	return (0);
328 }
329 
330 static int
331 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,  void *arg, void **resultp)
332 {
333 	_NOTE(ARGUNUSED(dip))
334 
335 	int	instance = VDCUNIT((dev_t)arg);
336 	vdc_t	*vdc = NULL;
337 
338 	switch (cmd) {
339 	case DDI_INFO_DEVT2DEVINFO:
340 		if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
341 			*resultp = NULL;
342 			return (DDI_FAILURE);
343 		}
344 		*resultp = vdc->dip;
345 		return (DDI_SUCCESS);
346 	case DDI_INFO_DEVT2INSTANCE:
347 		*resultp = (void *)(uintptr_t)instance;
348 		return (DDI_SUCCESS);
349 	default:
350 		*resultp = NULL;
351 		return (DDI_FAILURE);
352 	}
353 }
354 
355 static int
356 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
357 {
358 	int	instance;
359 	int	rv;
360 	vdc_t	*vdc = NULL;
361 
362 	switch (cmd) {
363 	case DDI_DETACH:
364 		/* the real work happens below */
365 		break;
366 	case DDI_SUSPEND:
367 		/* nothing to do for this non-device */
368 		return (DDI_SUCCESS);
369 	default:
370 		return (DDI_FAILURE);
371 	}
372 
373 	ASSERT(cmd == DDI_DETACH);
374 	instance = ddi_get_instance(dip);
375 	DMSGX(1, "[%d] Entered\n", instance);
376 
377 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
378 		cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance);
379 		return (DDI_FAILURE);
380 	}
381 
382 	if (vdc->open_count) {
383 		DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance);
384 		return (DDI_FAILURE);
385 	}
386 
387 	DMSG(vdc, 0, "[%d] proceeding...\n", instance);
388 
389 	/* mark instance as detaching */
390 	vdc->lifecycle	= VDC_LC_DETACHING;
391 
392 	/*
393 	 * try and disable callbacks to prevent another handshake
394 	 */
395 	rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE);
396 	DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv);
397 
398 	if (vdc->initialized & VDC_THREAD) {
399 		mutex_enter(&vdc->read_lock);
400 		if ((vdc->read_state == VDC_READ_WAITING) ||
401 		    (vdc->read_state == VDC_READ_RESET)) {
402 			vdc->read_state = VDC_READ_RESET;
403 			cv_signal(&vdc->read_cv);
404 		}
405 
406 		mutex_exit(&vdc->read_lock);
407 
408 		/* wake up any thread waiting for connection to come online */
409 		mutex_enter(&vdc->lock);
410 		if (vdc->state == VDC_STATE_INIT_WAITING) {
411 			DMSG(vdc, 0,
412 			    "[%d] write reset - move to resetting state...\n",
413 			    instance);
414 			vdc->state = VDC_STATE_RESETTING;
415 			cv_signal(&vdc->initwait_cv);
416 		}
417 		mutex_exit(&vdc->lock);
418 
419 		/* now wait until state transitions to VDC_STATE_DETACH */
420 		thread_join(vdc->msg_proc_thr->t_did);
421 		ASSERT(vdc->state == VDC_STATE_DETACH);
422 		DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n",
423 		    vdc->instance);
424 	}
425 
426 	mutex_enter(&vdc->lock);
427 
428 	if (vdc->initialized & VDC_DRING)
429 		vdc_destroy_descriptor_ring(vdc);
430 
431 	if (vdc->initialized & VDC_LDC)
432 		vdc_terminate_ldc(vdc);
433 
434 	mutex_exit(&vdc->lock);
435 
436 	if (vdc->initialized & VDC_MINOR) {
437 		ddi_prop_remove_all(dip);
438 		ddi_remove_minor_node(dip, NULL);
439 	}
440 
441 	if (vdc->initialized & VDC_LOCKS) {
442 		mutex_destroy(&vdc->lock);
443 		mutex_destroy(&vdc->read_lock);
444 		cv_destroy(&vdc->initwait_cv);
445 		cv_destroy(&vdc->dring_free_cv);
446 		cv_destroy(&vdc->membind_cv);
447 		cv_destroy(&vdc->sync_pending_cv);
448 		cv_destroy(&vdc->sync_blocked_cv);
449 		cv_destroy(&vdc->read_cv);
450 		cv_destroy(&vdc->running_cv);
451 	}
452 
453 	if (vdc->minfo)
454 		kmem_free(vdc->minfo, sizeof (struct dk_minfo));
455 
456 	if (vdc->cinfo)
457 		kmem_free(vdc->cinfo, sizeof (struct dk_cinfo));
458 
459 	if (vdc->vtoc)
460 		kmem_free(vdc->vtoc, sizeof (struct vtoc));
461 
462 	if (vdc->label)
463 		kmem_free(vdc->label, DK_LABEL_SIZE);
464 
465 	if (vdc->devid) {
466 		ddi_devid_unregister(dip);
467 		ddi_devid_free(vdc->devid);
468 	}
469 
470 	if (vdc->initialized & VDC_SOFT_STATE)
471 		ddi_soft_state_free(vdc_state, instance);
472 
473 	DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc);
474 
475 	return (DDI_SUCCESS);
476 }
477 
478 
479 static int
480 vdc_do_attach(dev_info_t *dip)
481 {
482 	int		instance;
483 	vdc_t		*vdc = NULL;
484 	int		status;
485 
486 	ASSERT(dip != NULL);
487 
488 	instance = ddi_get_instance(dip);
489 	if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) {
490 		cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure",
491 		    instance);
492 		return (DDI_FAILURE);
493 	}
494 
495 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
496 		cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance);
497 		return (DDI_FAILURE);
498 	}
499 
500 	/*
501 	 * We assign the value to initialized in this case to zero out the
502 	 * variable and then set bits in it to indicate what has been done
503 	 */
504 	vdc->initialized = VDC_SOFT_STATE;
505 
506 	vdc_hz_timeout = drv_usectohz(vdc_usec_timeout);
507 
508 	vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc);
509 	vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc);
510 
511 	vdc->dip	= dip;
512 	vdc->instance	= instance;
513 	vdc->open_count	= 0;
514 	vdc->vdisk_type	= VD_DISK_TYPE_UNK;
515 	vdc->vdisk_label = VD_DISK_LABEL_UNK;
516 	vdc->state	= VDC_STATE_INIT;
517 	vdc->lifecycle	= VDC_LC_ATTACHING;
518 	vdc->ldc_state	= 0;
519 	vdc->session_id = 0;
520 	vdc->block_size = DEV_BSIZE;
521 	vdc->max_xfer_sz = maxphys / DEV_BSIZE;
522 
523 	vdc->vtoc = NULL;
524 	vdc->cinfo = NULL;
525 	vdc->minfo = NULL;
526 
527 	mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL);
528 	cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL);
529 	cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL);
530 	cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL);
531 	cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL);
532 
533 	vdc->threads_pending = 0;
534 	vdc->sync_op_pending = B_FALSE;
535 	vdc->sync_op_blocked = B_FALSE;
536 	cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL);
537 	cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL);
538 
539 	/* init blocking msg read functionality */
540 	mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL);
541 	cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL);
542 	vdc->read_state = VDC_READ_IDLE;
543 
544 	vdc->initialized |= VDC_LOCKS;
545 
546 	/* initialise LDC channel which will be used to communicate with vds */
547 	if ((status = vdc_do_ldc_init(vdc)) != 0) {
548 		cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance);
549 		goto return_status;
550 	}
551 
552 	/* initialize the thread responsible for managing state with server */
553 	vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread,
554 	    vdc, 0, &p0, TS_RUN, minclsyspri);
555 	if (vdc->msg_proc_thr == NULL) {
556 		cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread",
557 		    instance);
558 		return (DDI_FAILURE);
559 	}
560 
561 	vdc->initialized |= VDC_THREAD;
562 
563 	atomic_inc_32(&vdc_instance_count);
564 
565 	/*
566 	 * Once the handshake is complete, we can use the DRing to send
567 	 * requests to the vDisk server to calculate the geometry and
568 	 * VTOC of the "disk"
569 	 */
570 	status = vdc_setup_disk_layout(vdc);
571 	if (status != 0) {
572 		DMSG(vdc, 0, "[%d] Failed to discover disk layout (err%d)",
573 			vdc->instance, status);
574 		goto return_status;
575 	}
576 
577 	/*
578 	 * Now that we have the device info we can create the
579 	 * device nodes and properties
580 	 */
581 	status = vdc_create_device_nodes(vdc);
582 	if (status) {
583 		DMSG(vdc, 0, "[%d] Failed to create device nodes",
584 				instance);
585 		goto return_status;
586 	}
587 	status = vdc_create_device_nodes_props(vdc);
588 	if (status) {
589 		DMSG(vdc, 0, "[%d] Failed to create device nodes"
590 				" properties (%d)", instance, status);
591 		goto return_status;
592 	}
593 
594 	/*
595 	 * Setup devid
596 	 */
597 	if (vdc_setup_devid(vdc)) {
598 		DMSG(vdc, 0, "[%d] No device id available\n", instance);
599 	}
600 
601 	ddi_report_dev(dip);
602 	vdc->lifecycle	= VDC_LC_ONLINE;
603 	DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance);
604 
605 return_status:
606 	DMSG(vdc, 0, "[%d] Attach completed\n", instance);
607 	return (status);
608 }
609 
610 static int
611 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
612 {
613 	int	status;
614 
615 	switch (cmd) {
616 	case DDI_ATTACH:
617 		if ((status = vdc_do_attach(dip)) != 0)
618 			(void) vdc_detach(dip, DDI_DETACH);
619 		return (status);
620 	case DDI_RESUME:
621 		/* nothing to do for this non-device */
622 		return (DDI_SUCCESS);
623 	default:
624 		return (DDI_FAILURE);
625 	}
626 }
627 
628 static int
629 vdc_do_ldc_init(vdc_t *vdc)
630 {
631 	int			status = 0;
632 	ldc_status_t		ldc_state;
633 	ldc_attr_t		ldc_attr;
634 	uint64_t		ldc_id = 0;
635 	dev_info_t		*dip = NULL;
636 
637 	ASSERT(vdc != NULL);
638 
639 	dip = vdc->dip;
640 	vdc->initialized |= VDC_LDC;
641 
642 	if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) {
643 		DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property",
644 				vdc->instance);
645 		return (EIO);
646 	}
647 	vdc->ldc_id = ldc_id;
648 
649 	ldc_attr.devclass = LDC_DEV_BLK;
650 	ldc_attr.instance = vdc->instance;
651 	ldc_attr.mode = LDC_MODE_UNRELIABLE;	/* unreliable transport */
652 	ldc_attr.mtu = VD_LDC_MTU;
653 
654 	if ((vdc->initialized & VDC_LDC_INIT) == 0) {
655 		status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle);
656 		if (status != 0) {
657 			DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d",
658 					vdc->instance, ldc_id, status);
659 			return (status);
660 		}
661 		vdc->initialized |= VDC_LDC_INIT;
662 	}
663 	status = ldc_status(vdc->ldc_handle, &ldc_state);
664 	if (status != 0) {
665 		DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]",
666 				vdc->instance, status);
667 		return (status);
668 	}
669 	vdc->ldc_state = ldc_state;
670 
671 	if ((vdc->initialized & VDC_LDC_CB) == 0) {
672 		status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb,
673 		    (caddr_t)vdc);
674 		if (status != 0) {
675 			DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)",
676 					vdc->instance, status);
677 			return (status);
678 		}
679 		vdc->initialized |= VDC_LDC_CB;
680 	}
681 
682 	vdc->initialized |= VDC_LDC;
683 
684 	/*
685 	 * At this stage we have initialised LDC, we will now try and open
686 	 * the connection.
687 	 */
688 	if (vdc->ldc_state == LDC_INIT) {
689 		status = ldc_open(vdc->ldc_handle);
690 		if (status != 0) {
691 			DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d",
692 					vdc->instance, vdc->ldc_id, status);
693 			return (status);
694 		}
695 		vdc->initialized |= VDC_LDC_OPEN;
696 	}
697 
698 	return (status);
699 }
700 
701 static int
702 vdc_start_ldc_connection(vdc_t *vdc)
703 {
704 	int		status = 0;
705 
706 	ASSERT(vdc != NULL);
707 
708 	ASSERT(MUTEX_HELD(&vdc->lock));
709 
710 	status = vdc_do_ldc_up(vdc);
711 
712 	DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance);
713 
714 	return (status);
715 }
716 
717 static int
718 vdc_stop_ldc_connection(vdc_t *vdcp)
719 {
720 	int	status;
721 
722 	DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n",
723 		vdcp->state);
724 
725 	status = ldc_down(vdcp->ldc_handle);
726 	DMSG(vdcp, 0, "ldc_down() = %d\n", status);
727 
728 	vdcp->initialized &= ~VDC_HANDSHAKE;
729 	DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized);
730 
731 	return (status);
732 }
733 
734 static int
735 vdc_create_device_nodes_efi(vdc_t *vdc)
736 {
737 	ddi_remove_minor_node(vdc->dip, "h");
738 	ddi_remove_minor_node(vdc->dip, "h,raw");
739 
740 	if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK,
741 		VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE),
742 		DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
743 		cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'",
744 		    vdc->instance);
745 		return (EIO);
746 	}
747 
748 	/* if any device node is created we set this flag */
749 	vdc->initialized |= VDC_MINOR;
750 
751 	if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR,
752 		VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE),
753 		DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
754 		cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'",
755 		    vdc->instance);
756 		return (EIO);
757 	}
758 
759 	return (0);
760 }
761 
762 static int
763 vdc_create_device_nodes_vtoc(vdc_t *vdc)
764 {
765 	ddi_remove_minor_node(vdc->dip, "wd");
766 	ddi_remove_minor_node(vdc->dip, "wd,raw");
767 
768 	if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK,
769 		VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE),
770 		DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
771 		cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'",
772 		    vdc->instance);
773 		return (EIO);
774 	}
775 
776 	/* if any device node is created we set this flag */
777 	vdc->initialized |= VDC_MINOR;
778 
779 	if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR,
780 		VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE),
781 		DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
782 		cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'",
783 		    vdc->instance);
784 		return (EIO);
785 	}
786 
787 	return (0);
788 }
789 
790 /*
791  * Function:
792  *	vdc_create_device_nodes
793  *
794  * Description:
795  *	This function creates the block and character device nodes under
796  *	/devices along with the node properties. It is called as part of
797  *	the attach(9E) of the instance during the handshake with vds after
798  *	vds has sent the attributes to vdc.
799  *
800  *	If the device is of type VD_DISK_TYPE_SLICE then the minor node
801  *	of 2 is used in keeping with the Solaris convention that slice 2
802  *	refers to a whole disk. Slices start at 'a'
803  *
804  * Parameters:
805  *	vdc 		- soft state pointer
806  *
807  * Return Values
808  *	0		- Success
809  *	EIO		- Failed to create node
810  *	EINVAL		- Unknown type of disk exported
811  */
812 static int
813 vdc_create_device_nodes(vdc_t *vdc)
814 {
815 	char		name[sizeof ("s,raw")];
816 	dev_info_t	*dip = NULL;
817 	int		instance, status;
818 	int		num_slices = 1;
819 	int		i;
820 
821 	ASSERT(vdc != NULL);
822 
823 	instance = vdc->instance;
824 	dip = vdc->dip;
825 
826 	switch (vdc->vdisk_type) {
827 	case VD_DISK_TYPE_DISK:
828 		num_slices = V_NUMPAR;
829 		break;
830 	case VD_DISK_TYPE_SLICE:
831 		num_slices = 1;
832 		break;
833 	case VD_DISK_TYPE_UNK:
834 	default:
835 		return (EINVAL);
836 	}
837 
838 	/*
839 	 * Minor nodes are different for EFI disks: EFI disks do not have
840 	 * a minor node 'g' for the minor number corresponding to slice
841 	 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd'
842 	 * representing the whole disk.
843 	 */
844 	for (i = 0; i < num_slices; i++) {
845 
846 		if (i == VD_EFI_WD_SLICE) {
847 			if (vdc->vdisk_label == VD_DISK_LABEL_EFI)
848 				status = vdc_create_device_nodes_efi(vdc);
849 			else
850 				status = vdc_create_device_nodes_vtoc(vdc);
851 			if (status != 0)
852 				return (status);
853 			continue;
854 		}
855 
856 		(void) snprintf(name, sizeof (name), "%c", 'a' + i);
857 		if (ddi_create_minor_node(dip, name, S_IFBLK,
858 		    VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
859 			cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'",
860 				instance, name);
861 			return (EIO);
862 		}
863 
864 		/* if any device node is created we set this flag */
865 		vdc->initialized |= VDC_MINOR;
866 
867 		(void) snprintf(name, sizeof (name), "%c%s",
868 			'a' + i, ",raw");
869 		if (ddi_create_minor_node(dip, name, S_IFCHR,
870 		    VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
871 			cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'",
872 				instance, name);
873 			return (EIO);
874 		}
875 	}
876 
877 	return (0);
878 }
879 
880 /*
881  * Function:
882  *	vdc_create_device_nodes_props
883  *
884  * Description:
885  *	This function creates the block and character device nodes under
886  *	/devices along with the node properties. It is called as part of
887  *	the attach(9E) of the instance during the handshake with vds after
888  *	vds has sent the attributes to vdc.
889  *
890  * Parameters:
891  *	vdc 		- soft state pointer
892  *
893  * Return Values
894  *	0		- Success
895  *	EIO		- Failed to create device node property
896  *	EINVAL		- Unknown type of disk exported
897  */
898 static int
899 vdc_create_device_nodes_props(vdc_t *vdc)
900 {
901 	dev_info_t	*dip = NULL;
902 	int		instance;
903 	int		num_slices = 1;
904 	int64_t		size = 0;
905 	dev_t		dev;
906 	int		rv;
907 	int		i;
908 
909 	ASSERT(vdc != NULL);
910 
911 	instance = vdc->instance;
912 	dip = vdc->dip;
913 
914 	if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) {
915 		DMSG(vdc, 0, "![%d] Could not create device node property."
916 				" No VTOC available", instance);
917 		return (ENXIO);
918 	}
919 
920 	switch (vdc->vdisk_type) {
921 	case VD_DISK_TYPE_DISK:
922 		num_slices = V_NUMPAR;
923 		break;
924 	case VD_DISK_TYPE_SLICE:
925 		num_slices = 1;
926 		break;
927 	case VD_DISK_TYPE_UNK:
928 	default:
929 		return (EINVAL);
930 	}
931 
932 	for (i = 0; i < num_slices; i++) {
933 		dev = makedevice(ddi_driver_major(dip),
934 			VD_MAKE_DEV(instance, i));
935 
936 		size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz;
937 		DMSG(vdc, 0, "[%d] sz %ld (%ld Mb)  p_size %lx\n",
938 				instance, size, size / (1024 * 1024),
939 				vdc->vtoc->v_part[i].p_size);
940 
941 		rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size);
942 		if (rv != DDI_PROP_SUCCESS) {
943 			cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]",
944 				instance, VDC_SIZE_PROP_NAME, size);
945 			return (EIO);
946 		}
947 
948 		rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME,
949 			lbtodb(size));
950 		if (rv != DDI_PROP_SUCCESS) {
951 			cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]",
952 				instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size));
953 			return (EIO);
954 		}
955 	}
956 
957 	return (0);
958 }
959 
960 static int
961 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred)
962 {
963 	_NOTE(ARGUNUSED(cred))
964 
965 	int		instance;
966 	vdc_t		*vdc;
967 
968 	ASSERT(dev != NULL);
969 	instance = VDCUNIT(*dev);
970 
971 	if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK))
972 		return (EINVAL);
973 
974 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
975 		cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance);
976 		return (ENXIO);
977 	}
978 
979 	DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n",
980 			getminor(*dev), flag, otyp);
981 
982 	mutex_enter(&vdc->lock);
983 	vdc->open_count++;
984 	mutex_exit(&vdc->lock);
985 
986 	return (0);
987 }
988 
989 static int
990 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred)
991 {
992 	_NOTE(ARGUNUSED(cred))
993 
994 	int	instance;
995 	vdc_t	*vdc;
996 
997 	instance = VDCUNIT(dev);
998 
999 	if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK))
1000 		return (EINVAL);
1001 
1002 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
1003 		cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance);
1004 		return (ENXIO);
1005 	}
1006 
1007 	DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp);
1008 	if (vdc->dkio_flush_pending) {
1009 		DMSG(vdc, 0,
1010 		    "[%d] Cannot detach: %d outstanding DKIO flushes\n",
1011 		    instance, vdc->dkio_flush_pending);
1012 		return (EBUSY);
1013 	}
1014 
1015 	/*
1016 	 * Should not need the mutex here, since the framework should protect
1017 	 * against more opens on this device, but just in case.
1018 	 */
1019 	mutex_enter(&vdc->lock);
1020 	vdc->open_count--;
1021 	mutex_exit(&vdc->lock);
1022 
1023 	return (0);
1024 }
1025 
1026 static int
1027 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
1028 {
1029 	_NOTE(ARGUNUSED(credp))
1030 	_NOTE(ARGUNUSED(rvalp))
1031 
1032 	return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode));
1033 }
1034 
1035 static int
1036 vdc_print(dev_t dev, char *str)
1037 {
1038 	cmn_err(CE_NOTE, "vdc%d:  %s", VDCUNIT(dev), str);
1039 	return (0);
1040 }
1041 
1042 static int
1043 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
1044 {
1045 	int	rv;
1046 	size_t	nbytes = nblk * DEV_BSIZE;
1047 	int	instance = VDCUNIT(dev);
1048 	vdc_t	*vdc = NULL;
1049 
1050 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
1051 		cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance);
1052 		return (ENXIO);
1053 	}
1054 
1055 	DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n",
1056 	    instance, nbytes, blkno, (void *)addr);
1057 	rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes,
1058 	    VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir);
1059 	if (rv) {
1060 		DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv);
1061 		return (rv);
1062 	}
1063 
1064 	if (ddi_in_panic())
1065 		(void) vdc_drain_response(vdc);
1066 
1067 	DMSG(vdc, 0, "[%d] End\n", instance);
1068 
1069 	return (0);
1070 }
1071 
1072 /* -------------------------------------------------------------------------- */
1073 
1074 /*
1075  * Disk access routines
1076  *
1077  */
1078 
1079 /*
1080  * vdc_strategy()
1081  *
1082  * Return Value:
1083  *	0:	As per strategy(9E), the strategy() function must return 0
1084  *		[ bioerror(9f) sets b_flags to the proper error code ]
1085  */
1086 static int
1087 vdc_strategy(struct buf *buf)
1088 {
1089 	int	rv = -1;
1090 	vdc_t	*vdc = NULL;
1091 	int	instance = VDCUNIT(buf->b_edev);
1092 	int	op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE;
1093 
1094 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
1095 		cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance);
1096 		bioerror(buf, ENXIO);
1097 		biodone(buf);
1098 		return (0);
1099 	}
1100 
1101 	DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n",
1102 	    instance, (buf->b_flags & B_READ) ? "Read" : "Write",
1103 	    buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr);
1104 	DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc);
1105 
1106 	bp_mapin(buf);
1107 
1108 	rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr,
1109 	    buf->b_bcount, VDCPART(buf->b_edev), buf->b_lblkno,
1110 	    CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir :
1111 	    VIO_write_dir);
1112 
1113 	ASSERT(rv == 0 || rv == EINVAL);
1114 
1115 	/*
1116 	 * If the request was successfully sent, the strategy call returns and
1117 	 * the ACK handler calls the bioxxx functions when the vDisk server is
1118 	 * done.
1119 	 */
1120 	if (rv) {
1121 		DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv);
1122 		bioerror(buf, rv);
1123 		biodone(buf);
1124 	}
1125 
1126 	return (0);
1127 }
1128 
1129 /*
1130  * Function:
1131  *	vdc_min
1132  *
1133  * Description:
1134  *	Routine to limit the size of a data transfer. Used in
1135  *	conjunction with physio(9F).
1136  *
1137  * Arguments:
1138  *	bp - pointer to the indicated buf(9S) struct.
1139  *
1140  */
1141 static void
1142 vdc_min(struct buf *bufp)
1143 {
1144 	vdc_t	*vdc = NULL;
1145 	int	instance = VDCUNIT(bufp->b_edev);
1146 
1147 	vdc = ddi_get_soft_state(vdc_state, instance);
1148 	VERIFY(vdc != NULL);
1149 
1150 	if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) {
1151 		bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size;
1152 	}
1153 }
1154 
1155 static int
1156 vdc_read(dev_t dev, struct uio *uio, cred_t *cred)
1157 {
1158 	_NOTE(ARGUNUSED(cred))
1159 
1160 	DMSGX(1, "[%d] Entered", VDCUNIT(dev));
1161 	return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio));
1162 }
1163 
1164 static int
1165 vdc_write(dev_t dev, struct uio *uio, cred_t *cred)
1166 {
1167 	_NOTE(ARGUNUSED(cred))
1168 
1169 	DMSGX(1, "[%d] Entered", VDCUNIT(dev));
1170 	return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio));
1171 }
1172 
1173 static int
1174 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred)
1175 {
1176 	_NOTE(ARGUNUSED(cred))
1177 
1178 	DMSGX(1, "[%d] Entered", VDCUNIT(dev));
1179 	return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio));
1180 }
1181 
1182 static int
1183 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred)
1184 {
1185 	_NOTE(ARGUNUSED(cred))
1186 
1187 	DMSGX(1, "[%d] Entered", VDCUNIT(dev));
1188 	return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio));
1189 }
1190 
1191 
1192 /* -------------------------------------------------------------------------- */
1193 
1194 /*
1195  * Handshake support
1196  */
1197 
1198 
1199 /*
1200  * Function:
1201  *	vdc_init_ver_negotiation()
1202  *
1203  * Description:
1204  *
1205  * Arguments:
1206  *	vdc	- soft state pointer for this instance of the device driver.
1207  *
1208  * Return Code:
1209  *	0	- Success
1210  */
1211 static int
1212 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver)
1213 {
1214 	vio_ver_msg_t	pkt;
1215 	size_t		msglen = sizeof (pkt);
1216 	int		status = -1;
1217 
1218 	ASSERT(vdc != NULL);
1219 	ASSERT(mutex_owned(&vdc->lock));
1220 
1221 	DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance);
1222 
1223 	/*
1224 	 * set the Session ID to a unique value
1225 	 * (the lower 32 bits of the clock tick)
1226 	 */
1227 	vdc->session_id = ((uint32_t)gettick() & 0xffffffff);
1228 	DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id);
1229 
1230 	pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
1231 	pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
1232 	pkt.tag.vio_subtype_env = VIO_VER_INFO;
1233 	pkt.tag.vio_sid = vdc->session_id;
1234 	pkt.dev_class = VDEV_DISK;
1235 	pkt.ver_major = ver.major;
1236 	pkt.ver_minor = ver.minor;
1237 
1238 	status = vdc_send(vdc, (caddr_t)&pkt, &msglen);
1239 	DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n",
1240 	    vdc->instance, status);
1241 	if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) {
1242 		DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: "
1243 				"id(%lx) rv(%d) size(%ld)",
1244 				vdc->instance, vdc->ldc_handle,
1245 				status, msglen);
1246 		if (msglen != sizeof (vio_ver_msg_t))
1247 			status = ENOMSG;
1248 	}
1249 
1250 	return (status);
1251 }
1252 
1253 /*
1254  * Function:
1255  *	vdc_ver_negotiation()
1256  *
1257  * Description:
1258  *
1259  * Arguments:
1260  *	vdcp	- soft state pointer for this instance of the device driver.
1261  *
1262  * Return Code:
1263  *	0	- Success
1264  */
1265 static int
1266 vdc_ver_negotiation(vdc_t *vdcp)
1267 {
1268 	vio_msg_t vio_msg;
1269 	int status;
1270 
1271 	if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0]))
1272 		return (status);
1273 
1274 	/* release lock and wait for response */
1275 	mutex_exit(&vdcp->lock);
1276 	status = vdc_wait_for_response(vdcp, &vio_msg);
1277 	mutex_enter(&vdcp->lock);
1278 	if (status) {
1279 		DMSG(vdcp, 0,
1280 		    "[%d] Failed waiting for Ver negotiation response, rv(%d)",
1281 		    vdcp->instance, status);
1282 		return (status);
1283 	}
1284 
1285 	/* check type and sub_type ... */
1286 	if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL ||
1287 	    vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) {
1288 		DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n",
1289 				vdcp->instance);
1290 		return (EPROTO);
1291 	}
1292 
1293 	return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg));
1294 }
1295 
1296 /*
1297  * Function:
1298  *	vdc_init_attr_negotiation()
1299  *
1300  * Description:
1301  *
1302  * Arguments:
1303  *	vdc	- soft state pointer for this instance of the device driver.
1304  *
1305  * Return Code:
1306  *	0	- Success
1307  */
1308 static int
1309 vdc_init_attr_negotiation(vdc_t *vdc)
1310 {
1311 	vd_attr_msg_t	pkt;
1312 	size_t		msglen = sizeof (pkt);
1313 	int		status;
1314 
1315 	ASSERT(vdc != NULL);
1316 	ASSERT(mutex_owned(&vdc->lock));
1317 
1318 	DMSG(vdc, 0, "[%d] entered\n", vdc->instance);
1319 
1320 	/* fill in tag */
1321 	pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
1322 	pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
1323 	pkt.tag.vio_subtype_env = VIO_ATTR_INFO;
1324 	pkt.tag.vio_sid = vdc->session_id;
1325 	/* fill in payload */
1326 	pkt.max_xfer_sz = vdc->max_xfer_sz;
1327 	pkt.vdisk_block_size = vdc->block_size;
1328 	pkt.xfer_mode = VIO_DRING_MODE;
1329 	pkt.operations = 0;	/* server will set bits of valid operations */
1330 	pkt.vdisk_type = 0;	/* server will set to valid device type */
1331 	pkt.vdisk_size = 0;	/* server will set to valid size */
1332 
1333 	status = vdc_send(vdc, (caddr_t)&pkt, &msglen);
1334 	DMSG(vdc, 0, "Attr info sent (status = %d)\n", status);
1335 
1336 	if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) {
1337 		DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: "
1338 				"id(%lx) rv(%d) size(%ld)",
1339 				vdc->instance, vdc->ldc_handle,
1340 				status, msglen);
1341 		if (msglen != sizeof (vio_ver_msg_t))
1342 			status = ENOMSG;
1343 	}
1344 
1345 	return (status);
1346 }
1347 
1348 /*
1349  * Function:
1350  *	vdc_attr_negotiation()
1351  *
1352  * Description:
1353  *
1354  * Arguments:
1355  *	vdc	- soft state pointer for this instance of the device driver.
1356  *
1357  * Return Code:
1358  *	0	- Success
1359  */
1360 static int
1361 vdc_attr_negotiation(vdc_t *vdcp)
1362 {
1363 	int status;
1364 	vio_msg_t vio_msg;
1365 
1366 	if (status = vdc_init_attr_negotiation(vdcp))
1367 		return (status);
1368 
1369 	/* release lock and wait for response */
1370 	mutex_exit(&vdcp->lock);
1371 	status = vdc_wait_for_response(vdcp, &vio_msg);
1372 	mutex_enter(&vdcp->lock);
1373 	if (status) {
1374 		DMSG(vdcp, 0,
1375 		    "[%d] Failed waiting for Attr negotiation response, rv(%d)",
1376 		    vdcp->instance, status);
1377 		return (status);
1378 	}
1379 
1380 	/* check type and sub_type ... */
1381 	if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL ||
1382 	    vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) {
1383 		DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n",
1384 				vdcp->instance);
1385 		return (EPROTO);
1386 	}
1387 
1388 	return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg));
1389 }
1390 
1391 
1392 /*
1393  * Function:
1394  *	vdc_init_dring_negotiate()
1395  *
1396  * Description:
1397  *
1398  * Arguments:
1399  *	vdc	- soft state pointer for this instance of the device driver.
1400  *
1401  * Return Code:
1402  *	0	- Success
1403  */
1404 static int
1405 vdc_init_dring_negotiate(vdc_t *vdc)
1406 {
1407 	vio_dring_reg_msg_t	pkt;
1408 	size_t			msglen = sizeof (pkt);
1409 	int			status = -1;
1410 	int			retry;
1411 	int			nretries = 10;
1412 
1413 	ASSERT(vdc != NULL);
1414 	ASSERT(mutex_owned(&vdc->lock));
1415 
1416 	for (retry = 0; retry < nretries; retry++) {
1417 		status = vdc_init_descriptor_ring(vdc);
1418 		if (status != EAGAIN)
1419 			break;
1420 		drv_usecwait(vdc_min_timeout_ldc);
1421 	}
1422 
1423 	if (status != 0) {
1424 		DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n",
1425 				vdc->instance, status);
1426 		return (status);
1427 	}
1428 
1429 	DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n",
1430 			vdc->instance, status);
1431 
1432 	/* fill in tag */
1433 	pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
1434 	pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
1435 	pkt.tag.vio_subtype_env = VIO_DRING_REG;
1436 	pkt.tag.vio_sid = vdc->session_id;
1437 	/* fill in payload */
1438 	pkt.dring_ident = 0;
1439 	pkt.num_descriptors = vdc->dring_len;
1440 	pkt.descriptor_size = vdc->dring_entry_size;
1441 	pkt.options = (VIO_TX_DRING | VIO_RX_DRING);
1442 	pkt.ncookies = vdc->dring_cookie_count;
1443 	pkt.cookie[0] = vdc->dring_cookie[0];	/* for now just one cookie */
1444 
1445 	status = vdc_send(vdc, (caddr_t)&pkt, &msglen);
1446 	if (status != 0) {
1447 		DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)",
1448 				vdc->instance, status);
1449 	}
1450 
1451 	return (status);
1452 }
1453 
1454 
1455 /*
1456  * Function:
1457  *	vdc_dring_negotiation()
1458  *
1459  * Description:
1460  *
1461  * Arguments:
1462  *	vdc	- soft state pointer for this instance of the device driver.
1463  *
1464  * Return Code:
1465  *	0	- Success
1466  */
1467 static int
1468 vdc_dring_negotiation(vdc_t *vdcp)
1469 {
1470 	int status;
1471 	vio_msg_t vio_msg;
1472 
1473 	if (status = vdc_init_dring_negotiate(vdcp))
1474 		return (status);
1475 
1476 	/* release lock and wait for response */
1477 	mutex_exit(&vdcp->lock);
1478 	status = vdc_wait_for_response(vdcp, &vio_msg);
1479 	mutex_enter(&vdcp->lock);
1480 	if (status) {
1481 		DMSG(vdcp, 0,
1482 		    "[%d] Failed waiting for Dring negotiation response,"
1483 		    " rv(%d)", vdcp->instance, status);
1484 		return (status);
1485 	}
1486 
1487 	/* check type and sub_type ... */
1488 	if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL ||
1489 	    vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) {
1490 		DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n",
1491 				vdcp->instance);
1492 		return (EPROTO);
1493 	}
1494 
1495 	return (vdc_handle_dring_reg_msg(vdcp,
1496 		    (vio_dring_reg_msg_t *)&vio_msg));
1497 }
1498 
1499 
1500 /*
1501  * Function:
1502  *	vdc_send_rdx()
1503  *
1504  * Description:
1505  *
1506  * Arguments:
1507  *	vdc	- soft state pointer for this instance of the device driver.
1508  *
1509  * Return Code:
1510  *	0	- Success
1511  */
1512 static int
1513 vdc_send_rdx(vdc_t *vdcp)
1514 {
1515 	vio_msg_t	msg;
1516 	size_t		msglen = sizeof (vio_msg_t);
1517 	int		status;
1518 
1519 	/*
1520 	 * Send an RDX message to vds to indicate we are ready
1521 	 * to send data
1522 	 */
1523 	msg.tag.vio_msgtype = VIO_TYPE_CTRL;
1524 	msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
1525 	msg.tag.vio_subtype_env = VIO_RDX;
1526 	msg.tag.vio_sid = vdcp->session_id;
1527 	status = vdc_send(vdcp, (caddr_t)&msg, &msglen);
1528 	if (status != 0) {
1529 		DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)",
1530 		    vdcp->instance, status);
1531 	}
1532 
1533 	return (status);
1534 }
1535 
1536 /*
1537  * Function:
1538  *	vdc_handle_rdx()
1539  *
1540  * Description:
1541  *
1542  * Arguments:
1543  *	vdc	- soft state pointer for this instance of the device driver.
1544  *	msgp	- received msg
1545  *
1546  * Return Code:
1547  *	0	- Success
1548  */
1549 static int
1550 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp)
1551 {
1552 	_NOTE(ARGUNUSED(vdcp))
1553 	_NOTE(ARGUNUSED(msgp))
1554 
1555 	ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL);
1556 	ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK);
1557 	ASSERT(msgp->tag.vio_subtype_env == VIO_RDX);
1558 
1559 	DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance);
1560 
1561 	return (0);
1562 }
1563 
1564 /*
1565  * Function:
1566  *	vdc_rdx_exchange()
1567  *
1568  * Description:
1569  *
1570  * Arguments:
1571  *	vdc	- soft state pointer for this instance of the device driver.
1572  *
1573  * Return Code:
1574  *	0	- Success
1575  */
1576 static int
1577 vdc_rdx_exchange(vdc_t *vdcp)
1578 {
1579 	int status;
1580 	vio_msg_t vio_msg;
1581 
1582 	if (status = vdc_send_rdx(vdcp))
1583 		return (status);
1584 
1585 	/* release lock and wait for response */
1586 	mutex_exit(&vdcp->lock);
1587 	status = vdc_wait_for_response(vdcp, &vio_msg);
1588 	mutex_enter(&vdcp->lock);
1589 	if (status) {
1590 		DMSG(vdcp, 0,
1591 		    "[%d] Failed waiting for RDX response,"
1592 		    " rv(%d)", vdcp->instance, status);
1593 		return (status);
1594 	}
1595 
1596 	/* check type and sub_type ... */
1597 	if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL ||
1598 	    vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) {
1599 		DMSG(vdcp, 0, "[%d] Invalid RDX response\n",
1600 				vdcp->instance);
1601 		return (EPROTO);
1602 	}
1603 
1604 	return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg));
1605 }
1606 
1607 
1608 /* -------------------------------------------------------------------------- */
1609 
1610 /*
1611  * LDC helper routines
1612  */
1613 
1614 static int
1615 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp)
1616 {
1617 	int		status;
1618 	boolean_t	q_has_pkts = B_FALSE;
1619 	int		delay_time;
1620 	size_t		len;
1621 
1622 	mutex_enter(&vdc->read_lock);
1623 
1624 	if (vdc->read_state == VDC_READ_IDLE)
1625 		vdc->read_state = VDC_READ_WAITING;
1626 
1627 	while (vdc->read_state != VDC_READ_PENDING) {
1628 
1629 		/* detect if the connection has been reset */
1630 		if (vdc->read_state == VDC_READ_RESET) {
1631 			status = ECONNRESET;
1632 			goto done;
1633 		}
1634 
1635 		cv_wait(&vdc->read_cv, &vdc->read_lock);
1636 	}
1637 
1638 	/*
1639 	 * Until we get a blocking ldc read we have to retry
1640 	 * until the entire LDC message has arrived before
1641 	 * ldc_read() will succeed. Note we also bail out if
1642 	 * the chanel is reset or goes away.
1643 	 */
1644 	delay_time = vdc_ldc_read_init_delay;
1645 loop:
1646 	len = *nbytesp;
1647 	status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len);
1648 	switch (status) {
1649 	case EAGAIN:
1650 		delay_time *= 2;
1651 		if (delay_time >= vdc_ldc_read_max_delay)
1652 			delay_time = vdc_ldc_read_max_delay;
1653 		delay(delay_time);
1654 		goto loop;
1655 
1656 	case 0:
1657 		if (len == 0) {
1658 			DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with "
1659 				"no error!\n", vdc->instance);
1660 			goto loop;
1661 		}
1662 
1663 		*nbytesp = len;
1664 
1665 		/*
1666 		 * If there are pending messages, leave the
1667 		 * read state as pending. Otherwise, set the state
1668 		 * back to idle.
1669 		 */
1670 		status = ldc_chkq(vdc->ldc_handle, &q_has_pkts);
1671 		if (status == 0 && !q_has_pkts)
1672 			vdc->read_state = VDC_READ_IDLE;
1673 
1674 		break;
1675 	default:
1676 		DMSG(vdc, 0, "ldc_read returned %d\n", status);
1677 		break;
1678 	}
1679 
1680 done:
1681 	mutex_exit(&vdc->read_lock);
1682 
1683 	return (status);
1684 }
1685 
1686 
1687 
1688 #ifdef DEBUG
1689 void
1690 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg)
1691 {
1692 	char *ms, *ss, *ses;
1693 	switch (msg->tag.vio_msgtype) {
1694 #define	Q(_s)	case _s : ms = #_s; break;
1695 	Q(VIO_TYPE_CTRL)
1696 	Q(VIO_TYPE_DATA)
1697 	Q(VIO_TYPE_ERR)
1698 #undef Q
1699 	default: ms = "unknown"; break;
1700 	}
1701 
1702 	switch (msg->tag.vio_subtype) {
1703 #define	Q(_s)	case _s : ss = #_s; break;
1704 	Q(VIO_SUBTYPE_INFO)
1705 	Q(VIO_SUBTYPE_ACK)
1706 	Q(VIO_SUBTYPE_NACK)
1707 #undef Q
1708 	default: ss = "unknown"; break;
1709 	}
1710 
1711 	switch (msg->tag.vio_subtype_env) {
1712 #define	Q(_s)	case _s : ses = #_s; break;
1713 	Q(VIO_VER_INFO)
1714 	Q(VIO_ATTR_INFO)
1715 	Q(VIO_DRING_REG)
1716 	Q(VIO_DRING_UNREG)
1717 	Q(VIO_RDX)
1718 	Q(VIO_PKT_DATA)
1719 	Q(VIO_DESC_DATA)
1720 	Q(VIO_DRING_DATA)
1721 #undef Q
1722 	default: ses = "unknown"; break;
1723 	}
1724 
1725 	DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n",
1726 	    msg->tag.vio_msgtype, msg->tag.vio_subtype,
1727 	    msg->tag.vio_subtype_env, ms, ss, ses);
1728 }
1729 #endif
1730 
1731 /*
1732  * Function:
1733  *	vdc_send()
1734  *
1735  * Description:
1736  *	The function encapsulates the call to write a message using LDC.
1737  *	If LDC indicates that the call failed due to the queue being full,
1738  *	we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise
1739  *	we return the error returned by LDC.
1740  *
1741  * Arguments:
1742  *	ldc_handle	- LDC handle for the channel this instance of vdc uses
1743  *	pkt		- address of LDC message to be sent
1744  *	msglen		- the size of the message being sent. When the function
1745  *			  returns, this contains the number of bytes written.
1746  *
1747  * Return Code:
1748  *	0		- Success.
1749  *	EINVAL		- pkt or msglen were NULL
1750  *	ECONNRESET	- The connection was not up.
1751  *	EWOULDBLOCK	- LDC queue is full
1752  *	xxx		- other error codes returned by ldc_write
1753  */
1754 static int
1755 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen)
1756 {
1757 	size_t	size = 0;
1758 	int	status = 0;
1759 	clock_t delay_ticks;
1760 
1761 	ASSERT(vdc != NULL);
1762 	ASSERT(mutex_owned(&vdc->lock));
1763 	ASSERT(msglen != NULL);
1764 	ASSERT(*msglen != 0);
1765 
1766 #ifdef DEBUG
1767 	vdc_decode_tag(vdc, (vio_msg_t *)pkt);
1768 #endif
1769 	/*
1770 	 * Wait indefinitely to send if channel
1771 	 * is busy, but bail out if we succeed or
1772 	 * if the channel closes or is reset.
1773 	 */
1774 	delay_ticks = vdc_hz_min_ldc_delay;
1775 	do {
1776 		size = *msglen;
1777 		status = ldc_write(vdc->ldc_handle, pkt, &size);
1778 		if (status == EWOULDBLOCK) {
1779 			delay(delay_ticks);
1780 			/* geometric backoff */
1781 			delay_ticks *= 2;
1782 			if (delay_ticks > vdc_hz_max_ldc_delay)
1783 				delay_ticks = vdc_hz_max_ldc_delay;
1784 		}
1785 	} while (status == EWOULDBLOCK);
1786 
1787 	/* if LDC had serious issues --- reset vdc state */
1788 	if (status == EIO || status == ECONNRESET) {
1789 		/* LDC had serious issues --- reset vdc state */
1790 		mutex_enter(&vdc->read_lock);
1791 		if ((vdc->read_state == VDC_READ_WAITING) ||
1792 		    (vdc->read_state == VDC_READ_RESET))
1793 			cv_signal(&vdc->read_cv);
1794 		vdc->read_state = VDC_READ_RESET;
1795 		mutex_exit(&vdc->read_lock);
1796 
1797 		/* wake up any waiters in the reset thread */
1798 		if (vdc->state == VDC_STATE_INIT_WAITING) {
1799 			DMSG(vdc, 0, "[%d] write reset - "
1800 			    "vdc is resetting ..\n", vdc->instance);
1801 			vdc->state = VDC_STATE_RESETTING;
1802 			cv_signal(&vdc->initwait_cv);
1803 		}
1804 
1805 		return (ECONNRESET);
1806 	}
1807 
1808 	/* return the last size written */
1809 	*msglen = size;
1810 
1811 	return (status);
1812 }
1813 
1814 /*
1815  * Function:
1816  *	vdc_get_ldc_id()
1817  *
1818  * Description:
1819  *	This function gets the 'ldc-id' for this particular instance of vdc.
1820  *	The id returned is the guest domain channel endpoint LDC uses for
1821  *	communication with vds.
1822  *
1823  * Arguments:
1824  *	dip	- dev info pointer for this instance of the device driver.
1825  *	ldc_id	- pointer to variable used to return the 'ldc-id' found.
1826  *
1827  * Return Code:
1828  *	0	- Success.
1829  *	ENOENT	- Expected node or property did not exist.
1830  *	ENXIO	- Unexpected error communicating with MD framework
1831  */
1832 static int
1833 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id)
1834 {
1835 	int		status = ENOENT;
1836 	char		*node_name = NULL;
1837 	md_t		*mdp = NULL;
1838 	int		num_nodes;
1839 	int		num_vdevs;
1840 	int		num_chans;
1841 	mde_cookie_t	rootnode;
1842 	mde_cookie_t	*listp = NULL;
1843 	mde_cookie_t	*chanp = NULL;
1844 	boolean_t	found_inst = B_FALSE;
1845 	int		listsz;
1846 	int		idx;
1847 	uint64_t	md_inst;
1848 	int		obp_inst;
1849 	int		instance = ddi_get_instance(dip);
1850 
1851 	ASSERT(ldc_id != NULL);
1852 	*ldc_id = 0;
1853 
1854 	/*
1855 	 * Get the OBP instance number for comparison with the MD instance
1856 	 *
1857 	 * The "cfg-handle" property of a vdc node in an MD contains the MD's
1858 	 * notion of "instance", or unique identifier, for that node; OBP
1859 	 * stores the value of the "cfg-handle" MD property as the value of
1860 	 * the "reg" property on the node in the device tree it builds from
1861 	 * the MD and passes to Solaris.  Thus, we look up the devinfo node's
1862 	 * "reg" property value to uniquely identify this device instance.
1863 	 * If the "reg" property cannot be found, the device tree state is
1864 	 * presumably so broken that there is no point in continuing.
1865 	 */
1866 	if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) {
1867 		cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG);
1868 		return (ENOENT);
1869 	}
1870 	obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1871 			OBP_REG, -1);
1872 	DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst);
1873 
1874 	/*
1875 	 * We now walk the MD nodes and if an instance of a vdc node matches
1876 	 * the instance got from OBP we get the ldc-id property.
1877 	 */
1878 	if ((mdp = md_get_handle()) == NULL) {
1879 		cmn_err(CE_WARN, "unable to init machine description");
1880 		return (ENXIO);
1881 	}
1882 
1883 	num_nodes = md_node_count(mdp);
1884 	ASSERT(num_nodes > 0);
1885 
1886 	listsz = num_nodes * sizeof (mde_cookie_t);
1887 
1888 	/* allocate memory for nodes */
1889 	listp = kmem_zalloc(listsz, KM_SLEEP);
1890 	chanp = kmem_zalloc(listsz, KM_SLEEP);
1891 
1892 	rootnode = md_root_node(mdp);
1893 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1894 
1895 	/*
1896 	 * Search for all the virtual devices, we will then check to see which
1897 	 * ones are disk nodes.
1898 	 */
1899 	num_vdevs = md_scan_dag(mdp, rootnode,
1900 			md_find_name(mdp, VDC_MD_VDEV_NAME),
1901 			md_find_name(mdp, "fwd"), listp);
1902 
1903 	if (num_vdevs <= 0) {
1904 		cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME);
1905 		status = ENOENT;
1906 		goto done;
1907 	}
1908 
1909 	DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs);
1910 	for (idx = 0; idx < num_vdevs; idx++) {
1911 		status = md_get_prop_str(mdp, listp[idx], "name", &node_name);
1912 		if ((status != 0) || (node_name == NULL)) {
1913 			cmn_err(CE_NOTE, "Unable to get name of node type '%s'"
1914 					": err %d", VDC_MD_VDEV_NAME, status);
1915 			continue;
1916 		}
1917 
1918 		DMSGX(1, "[%d] Found node '%s'\n", instance, node_name);
1919 		if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) {
1920 			status = md_get_prop_val(mdp, listp[idx],
1921 					VDC_MD_CFG_HDL, &md_inst);
1922 			DMSGX(1, "[%d] vdc inst in MD=%lx\n",
1923 			    instance, md_inst);
1924 			if ((status == 0) && (md_inst == obp_inst)) {
1925 				found_inst = B_TRUE;
1926 				break;
1927 			}
1928 		}
1929 	}
1930 
1931 	if (!found_inst) {
1932 		DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME);
1933 		status = ENOENT;
1934 		goto done;
1935 	}
1936 	DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst);
1937 
1938 	/* get the channels for this node */
1939 	num_chans = md_scan_dag(mdp, listp[idx],
1940 			md_find_name(mdp, VDC_MD_CHAN_NAME),
1941 			md_find_name(mdp, "fwd"), chanp);
1942 
1943 	/* expecting at least one channel */
1944 	if (num_chans <= 0) {
1945 		cmn_err(CE_NOTE, "No '%s' node for '%s' port",
1946 				VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME);
1947 		status = ENOENT;
1948 		goto done;
1949 
1950 	} else if (num_chans != 1) {
1951 		DMSGX(0, "[%d] Expected 1 '%s' node for '%s' port, found %d\n",
1952 			instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME,
1953 			num_chans);
1954 	}
1955 
1956 	/*
1957 	 * We use the first channel found (index 0), irrespective of how
1958 	 * many are there in total.
1959 	 */
1960 	if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) {
1961 		cmn_err(CE_NOTE, "Channel '%s' property not found",
1962 				VDC_ID_PROP);
1963 		status = ENOENT;
1964 	}
1965 
1966 	DMSGX(0, "[%d] LDC id is 0x%lx\n", instance, *ldc_id);
1967 
1968 done:
1969 	if (chanp)
1970 		kmem_free(chanp, listsz);
1971 	if (listp)
1972 		kmem_free(listp, listsz);
1973 
1974 	(void) md_fini_handle(mdp);
1975 
1976 	return (status);
1977 }
1978 
1979 static int
1980 vdc_do_ldc_up(vdc_t *vdc)
1981 {
1982 	int		status;
1983 	ldc_status_t	ldc_state;
1984 
1985 	DMSG(vdc, 0, "[%d] Bringing up channel %lx\n",
1986 	    vdc->instance, vdc->ldc_id);
1987 
1988 	if (vdc->lifecycle == VDC_LC_DETACHING)
1989 		return (EINVAL);
1990 
1991 	if ((status = ldc_up(vdc->ldc_handle)) != 0) {
1992 		switch (status) {
1993 		case ECONNREFUSED:	/* listener not ready at other end */
1994 			DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n",
1995 					vdc->instance, vdc->ldc_id, status);
1996 			status = 0;
1997 			break;
1998 		default:
1999 			DMSG(vdc, 0, "[%d] Failed to bring up LDC: "
2000 			    "channel=%ld, err=%d", vdc->instance, vdc->ldc_id,
2001 			    status);
2002 			break;
2003 		}
2004 	}
2005 
2006 	if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) {
2007 		vdc->ldc_state = ldc_state;
2008 		if (ldc_state == LDC_UP) {
2009 			DMSG(vdc, 0, "[%d] LDC channel already up\n",
2010 			    vdc->instance);
2011 			vdc->seq_num = 1;
2012 			vdc->seq_num_reply = 0;
2013 		}
2014 	}
2015 
2016 	return (status);
2017 }
2018 
2019 /*
2020  * Function:
2021  *	vdc_terminate_ldc()
2022  *
2023  * Description:
2024  *
2025  * Arguments:
2026  *	vdc	- soft state pointer for this instance of the device driver.
2027  *
2028  * Return Code:
2029  *	None
2030  */
2031 static void
2032 vdc_terminate_ldc(vdc_t *vdc)
2033 {
2034 	int	instance = ddi_get_instance(vdc->dip);
2035 
2036 	ASSERT(vdc != NULL);
2037 	ASSERT(mutex_owned(&vdc->lock));
2038 
2039 	DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized);
2040 
2041 	if (vdc->initialized & VDC_LDC_OPEN) {
2042 		DMSG(vdc, 0, "[%d] ldc_close()\n", instance);
2043 		(void) ldc_close(vdc->ldc_handle);
2044 	}
2045 	if (vdc->initialized & VDC_LDC_CB) {
2046 		DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance);
2047 		(void) ldc_unreg_callback(vdc->ldc_handle);
2048 	}
2049 	if (vdc->initialized & VDC_LDC) {
2050 		DMSG(vdc, 0, "[%d] ldc_fini()\n", instance);
2051 		(void) ldc_fini(vdc->ldc_handle);
2052 		vdc->ldc_handle = NULL;
2053 	}
2054 
2055 	vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN);
2056 }
2057 
2058 /* -------------------------------------------------------------------------- */
2059 
2060 /*
2061  * Descriptor Ring helper routines
2062  */
2063 
2064 /*
2065  * Function:
2066  *	vdc_init_descriptor_ring()
2067  *
2068  * Description:
2069  *
2070  * Arguments:
2071  *	vdc	- soft state pointer for this instance of the device driver.
2072  *
2073  * Return Code:
2074  *	0	- Success
2075  */
2076 static int
2077 vdc_init_descriptor_ring(vdc_t *vdc)
2078 {
2079 	vd_dring_entry_t	*dep = NULL;	/* DRing Entry pointer */
2080 	int	status = 0;
2081 	int	i;
2082 
2083 	DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized);
2084 
2085 	ASSERT(vdc != NULL);
2086 	ASSERT(mutex_owned(&vdc->lock));
2087 	ASSERT(vdc->ldc_handle != NULL);
2088 
2089 	/* ensure we have enough room to store max sized block */
2090 	ASSERT(maxphys <= VD_MAX_BLOCK_SIZE);
2091 
2092 	if ((vdc->initialized & VDC_DRING_INIT) == 0) {
2093 		DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance);
2094 		/*
2095 		 * Calculate the maximum block size we can transmit using one
2096 		 * Descriptor Ring entry from the attributes returned by the
2097 		 * vDisk server. This is subject to a minimum of 'maxphys'
2098 		 * as we do not have the capability to split requests over
2099 		 * multiple DRing entries.
2100 		 */
2101 		if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) {
2102 			DMSG(vdc, 0, "[%d] using minimum DRing size\n",
2103 					vdc->instance);
2104 			vdc->dring_max_cookies = maxphys / PAGESIZE;
2105 		} else {
2106 			vdc->dring_max_cookies =
2107 				(vdc->max_xfer_sz * vdc->block_size) / PAGESIZE;
2108 		}
2109 		vdc->dring_entry_size = (sizeof (vd_dring_entry_t) +
2110 				(sizeof (ldc_mem_cookie_t) *
2111 					(vdc->dring_max_cookies - 1)));
2112 		vdc->dring_len = VD_DRING_LEN;
2113 
2114 		status = ldc_mem_dring_create(vdc->dring_len,
2115 				vdc->dring_entry_size, &vdc->ldc_dring_hdl);
2116 		if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) {
2117 			DMSG(vdc, 0, "[%d] Descriptor ring creation failed",
2118 					vdc->instance);
2119 			return (status);
2120 		}
2121 		vdc->initialized |= VDC_DRING_INIT;
2122 	}
2123 
2124 	if ((vdc->initialized & VDC_DRING_BOUND) == 0) {
2125 		DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance);
2126 		vdc->dring_cookie =
2127 			kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP);
2128 
2129 		status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl,
2130 				LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW,
2131 				&vdc->dring_cookie[0],
2132 				&vdc->dring_cookie_count);
2133 		if (status != 0) {
2134 			DMSG(vdc, 0, "[%d] Failed to bind descriptor ring "
2135 				"(%lx) to channel (%lx) status=%d\n",
2136 				vdc->instance, vdc->ldc_dring_hdl,
2137 				vdc->ldc_handle, status);
2138 			return (status);
2139 		}
2140 		ASSERT(vdc->dring_cookie_count == 1);
2141 		vdc->initialized |= VDC_DRING_BOUND;
2142 	}
2143 
2144 	status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info);
2145 	if (status != 0) {
2146 		DMSG(vdc, 0,
2147 		    "[%d] Failed to get info for descriptor ring (%lx)\n",
2148 		    vdc->instance, vdc->ldc_dring_hdl);
2149 		return (status);
2150 	}
2151 
2152 	if ((vdc->initialized & VDC_DRING_LOCAL) == 0) {
2153 		DMSG(vdc, 0, "[%d] local dring\n", vdc->instance);
2154 
2155 		/* Allocate the local copy of this dring */
2156 		vdc->local_dring =
2157 			kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t),
2158 						KM_SLEEP);
2159 		vdc->initialized |= VDC_DRING_LOCAL;
2160 	}
2161 
2162 	/*
2163 	 * Mark all DRing entries as free and initialize the private
2164 	 * descriptor's memory handles. If any entry is initialized,
2165 	 * we need to free it later so we set the bit in 'initialized'
2166 	 * at the start.
2167 	 */
2168 	vdc->initialized |= VDC_DRING_ENTRY;
2169 	for (i = 0; i < vdc->dring_len; i++) {
2170 		dep = VDC_GET_DRING_ENTRY_PTR(vdc, i);
2171 		dep->hdr.dstate = VIO_DESC_FREE;
2172 
2173 		status = ldc_mem_alloc_handle(vdc->ldc_handle,
2174 				&vdc->local_dring[i].desc_mhdl);
2175 		if (status != 0) {
2176 			DMSG(vdc, 0, "![%d] Failed to alloc mem handle for"
2177 					" descriptor %d", vdc->instance, i);
2178 			return (status);
2179 		}
2180 		vdc->local_dring[i].is_free = B_TRUE;
2181 		vdc->local_dring[i].dep = dep;
2182 	}
2183 
2184 	/* Initialize the starting index */
2185 	vdc->dring_curr_idx = 0;
2186 
2187 	return (status);
2188 }
2189 
2190 /*
2191  * Function:
2192  *	vdc_destroy_descriptor_ring()
2193  *
2194  * Description:
2195  *
2196  * Arguments:
2197  *	vdc	- soft state pointer for this instance of the device driver.
2198  *
2199  * Return Code:
2200  *	None
2201  */
2202 static void
2203 vdc_destroy_descriptor_ring(vdc_t *vdc)
2204 {
2205 	vdc_local_desc_t	*ldep = NULL;	/* Local Dring Entry Pointer */
2206 	ldc_mem_handle_t	mhdl = NULL;
2207 	ldc_mem_info_t		minfo;
2208 	int			status = -1;
2209 	int			i;	/* loop */
2210 
2211 	ASSERT(vdc != NULL);
2212 	ASSERT(mutex_owned(&vdc->lock));
2213 
2214 	DMSG(vdc, 0, "[%d] Entered\n", vdc->instance);
2215 
2216 	if (vdc->initialized & VDC_DRING_ENTRY) {
2217 		DMSG(vdc, 0,
2218 		    "[%d] Removing Local DRing entries\n", vdc->instance);
2219 		for (i = 0; i < vdc->dring_len; i++) {
2220 			ldep = &vdc->local_dring[i];
2221 			mhdl = ldep->desc_mhdl;
2222 
2223 			if (mhdl == NULL)
2224 				continue;
2225 
2226 			if ((status = ldc_mem_info(mhdl, &minfo)) != 0) {
2227 				DMSG(vdc, 0,
2228 				    "ldc_mem_info returned an error: %d\n",
2229 				    status);
2230 
2231 				/*
2232 				 * This must mean that the mem handle
2233 				 * is not valid. Clear it out so that
2234 				 * no one tries to use it.
2235 				 */
2236 				ldep->desc_mhdl = NULL;
2237 				continue;
2238 			}
2239 
2240 			if (minfo.status == LDC_BOUND) {
2241 				(void) ldc_mem_unbind_handle(mhdl);
2242 			}
2243 
2244 			(void) ldc_mem_free_handle(mhdl);
2245 
2246 			ldep->desc_mhdl = NULL;
2247 		}
2248 		vdc->initialized &= ~VDC_DRING_ENTRY;
2249 	}
2250 
2251 	if (vdc->initialized & VDC_DRING_LOCAL) {
2252 		DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance);
2253 		kmem_free(vdc->local_dring,
2254 				vdc->dring_len * sizeof (vdc_local_desc_t));
2255 		vdc->initialized &= ~VDC_DRING_LOCAL;
2256 	}
2257 
2258 	if (vdc->initialized & VDC_DRING_BOUND) {
2259 		DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance);
2260 		status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl);
2261 		if (status == 0) {
2262 			vdc->initialized &= ~VDC_DRING_BOUND;
2263 		} else {
2264 			DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx",
2265 				vdc->instance, status, vdc->ldc_dring_hdl);
2266 		}
2267 		kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t));
2268 	}
2269 
2270 	if (vdc->initialized & VDC_DRING_INIT) {
2271 		DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance);
2272 		status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl);
2273 		if (status == 0) {
2274 			vdc->ldc_dring_hdl = NULL;
2275 			bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t));
2276 			vdc->initialized &= ~VDC_DRING_INIT;
2277 		} else {
2278 			DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)",
2279 				vdc->instance, status, vdc->ldc_dring_hdl);
2280 		}
2281 	}
2282 }
2283 
2284 /*
2285  * Function:
2286  *	vdc_map_to_shared_ring()
2287  *
2288  * Description:
2289  *	Copy contents of the local descriptor to the shared
2290  *	memory descriptor.
2291  *
2292  * Arguments:
2293  *	vdcp	- soft state pointer for this instance of the device driver.
2294  *	idx	- descriptor ring index
2295  *
2296  * Return Code:
2297  *	None
2298  */
2299 static int
2300 vdc_map_to_shared_dring(vdc_t *vdcp, int idx)
2301 {
2302 	vdc_local_desc_t	*ldep;
2303 	vd_dring_entry_t	*dep;
2304 	int			rv;
2305 
2306 	ldep = &(vdcp->local_dring[idx]);
2307 
2308 	/* for now leave in the old pop_mem_hdl stuff */
2309 	if (ldep->nbytes > 0) {
2310 		rv = vdc_populate_mem_hdl(vdcp, ldep);
2311 		if (rv) {
2312 			DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n",
2313 			    vdcp->instance);
2314 			return (rv);
2315 		}
2316 	}
2317 
2318 	/*
2319 	 * fill in the data details into the DRing
2320 	 */
2321 	dep = ldep->dep;
2322 	ASSERT(dep != NULL);
2323 
2324 	dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp);
2325 	dep->payload.operation = ldep->operation;
2326 	dep->payload.addr = ldep->offset;
2327 	dep->payload.nbytes = ldep->nbytes;
2328 	dep->payload.status = (uint32_t)-1;	/* vds will set valid value */
2329 	dep->payload.slice = ldep->slice;
2330 	dep->hdr.dstate = VIO_DESC_READY;
2331 	dep->hdr.ack = 1;		/* request an ACK for every message */
2332 
2333 	return (0);
2334 }
2335 
2336 /*
2337  * Function:
2338  *	vdc_send_request
2339  *
2340  * Description:
2341  *	This routine writes the data to be transmitted to vds into the
2342  *	descriptor, notifies vds that the ring has been updated and
2343  *	then waits for the request to be processed.
2344  *
2345  * Arguments:
2346  *	vdcp	  - the soft state pointer
2347  *	operation - operation we want vds to perform (VD_OP_XXX)
2348  *	addr	  - address of data buf to be read/written.
2349  *	nbytes	  - number of bytes to read/write
2350  *	slice	  - the disk slice this request is for
2351  *	offset	  - relative disk offset
2352  *	cb_type   - type of call - STRATEGY or SYNC
2353  *	cb_arg	  - parameter to be sent to server (depends on VD_OP_XXX type)
2354  *			. mode for ioctl(9e)
2355  *			. LP64 diskaddr_t (block I/O)
2356  *	dir	  - direction of operation (READ/WRITE/BOTH)
2357  *
2358  * Return Codes:
2359  *	0
2360  *	EAGAIN
2361  *		EFAULT
2362  *		ENXIO
2363  *		EIO
2364  */
2365 static int
2366 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr,
2367     size_t nbytes, int slice, diskaddr_t offset, int cb_type,
2368     void *cb_arg, vio_desc_direction_t dir)
2369 {
2370 	ASSERT(vdcp != NULL);
2371 	ASSERT(slice < V_NUMPAR);
2372 
2373 	mutex_enter(&vdcp->lock);
2374 
2375 	do {
2376 		while (vdcp->state != VDC_STATE_RUNNING)
2377 			cv_wait(&vdcp->running_cv, &vdcp->lock);
2378 
2379 	} while (vdc_populate_descriptor(vdcp, operation, addr,
2380 	    nbytes, slice, offset, cb_type, cb_arg, dir));
2381 
2382 	mutex_exit(&vdcp->lock);
2383 	return (0);
2384 }
2385 
2386 
2387 /*
2388  * Function:
2389  *	vdc_populate_descriptor
2390  *
2391  * Description:
2392  *	This routine writes the data to be transmitted to vds into the
2393  *	descriptor, notifies vds that the ring has been updated and
2394  *	then waits for the request to be processed.
2395  *
2396  * Arguments:
2397  *	vdcp	  - the soft state pointer
2398  *	operation - operation we want vds to perform (VD_OP_XXX)
2399  *	addr	  - address of data buf to be read/written.
2400  *	nbytes	  - number of bytes to read/write
2401  *	slice	  - the disk slice this request is for
2402  *	offset	  - relative disk offset
2403  *	cb_type   - type of call - STRATEGY or SYNC
2404  *	cb_arg	  - parameter to be sent to server (depends on VD_OP_XXX type)
2405  *			. mode for ioctl(9e)
2406  *			. LP64 diskaddr_t (block I/O)
2407  *	dir	  - direction of operation (READ/WRITE/BOTH)
2408  *
2409  * Return Codes:
2410  *	0
2411  *	EAGAIN
2412  *		EFAULT
2413  *		ENXIO
2414  *		EIO
2415  */
2416 static int
2417 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr,
2418     size_t nbytes, int slice, diskaddr_t offset, int cb_type,
2419     void *cb_arg, vio_desc_direction_t dir)
2420 {
2421 	vdc_local_desc_t	*local_dep = NULL; /* Local Dring Pointer */
2422 	int			idx;		/* Index of DRing entry used */
2423 	int			next_idx;
2424 	vio_dring_msg_t		dmsg;
2425 	size_t			msglen;
2426 	int			rv;
2427 
2428 	ASSERT(MUTEX_HELD(&vdcp->lock));
2429 	vdcp->threads_pending++;
2430 loop:
2431 	DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx);
2432 
2433 	/* Get next available D-Ring entry */
2434 	idx = vdcp->dring_curr_idx;
2435 	local_dep = &(vdcp->local_dring[idx]);
2436 
2437 	if (!local_dep->is_free) {
2438 		DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n",
2439 		    vdcp->instance);
2440 		cv_wait(&vdcp->dring_free_cv, &vdcp->lock);
2441 		if (vdcp->state == VDC_STATE_RUNNING ||
2442 		    vdcp->state == VDC_STATE_HANDLE_PENDING) {
2443 			goto loop;
2444 		}
2445 		vdcp->threads_pending--;
2446 		return (ECONNRESET);
2447 	}
2448 
2449 	next_idx = idx + 1;
2450 	if (next_idx >= vdcp->dring_len)
2451 		next_idx = 0;
2452 	vdcp->dring_curr_idx = next_idx;
2453 
2454 	ASSERT(local_dep->is_free);
2455 
2456 	local_dep->operation = operation;
2457 	local_dep->addr = addr;
2458 	local_dep->nbytes = nbytes;
2459 	local_dep->slice = slice;
2460 	local_dep->offset = offset;
2461 	local_dep->cb_type = cb_type;
2462 	local_dep->cb_arg = cb_arg;
2463 	local_dep->dir = dir;
2464 
2465 	local_dep->is_free = B_FALSE;
2466 
2467 	rv = vdc_map_to_shared_dring(vdcp, idx);
2468 	if (rv) {
2469 		DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n",
2470 		    vdcp->instance);
2471 		/* free the descriptor */
2472 		local_dep->is_free = B_TRUE;
2473 		vdcp->dring_curr_idx = idx;
2474 		cv_wait(&vdcp->membind_cv, &vdcp->lock);
2475 		if (vdcp->state == VDC_STATE_RUNNING ||
2476 		    vdcp->state == VDC_STATE_HANDLE_PENDING) {
2477 			goto loop;
2478 		}
2479 		vdcp->threads_pending--;
2480 		return (ECONNRESET);
2481 	}
2482 
2483 	/*
2484 	 * Send a msg with the DRing details to vds
2485 	 */
2486 	VIO_INIT_DRING_DATA_TAG(dmsg);
2487 	VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp);
2488 	dmsg.dring_ident = vdcp->dring_ident;
2489 	dmsg.start_idx = idx;
2490 	dmsg.end_idx = idx;
2491 	vdcp->seq_num++;
2492 
2493 	DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp);
2494 
2495 	DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n",
2496 	    vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num);
2497 
2498 	/*
2499 	 * note we're still holding the lock here to
2500 	 * make sure the message goes out in order !!!...
2501 	 */
2502 	msglen = sizeof (dmsg);
2503 	rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen);
2504 	switch (rv) {
2505 	case ECONNRESET:
2506 		/*
2507 		 * vdc_send initiates the reset on failure.
2508 		 * Since the transaction has already been put
2509 		 * on the local dring, it will automatically get
2510 		 * retried when the channel is reset. Given that,
2511 		 * it is ok to just return success even though the
2512 		 * send failed.
2513 		 */
2514 		rv = 0;
2515 		break;
2516 
2517 	case 0: /* EOK */
2518 		DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv);
2519 		break;
2520 
2521 	default:
2522 		goto cleanup_and_exit;
2523 	}
2524 
2525 	vdcp->threads_pending--;
2526 	return (rv);
2527 
2528 cleanup_and_exit:
2529 	DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv);
2530 	return (ENXIO);
2531 }
2532 
2533 /*
2534  * Function:
2535  *	vdc_do_sync_op
2536  *
2537  * Description:
2538  * 	Wrapper around vdc_populate_descriptor that blocks until the
2539  * 	response to the message is available.
2540  *
2541  * Arguments:
2542  *	vdcp	  - the soft state pointer
2543  *	operation - operation we want vds to perform (VD_OP_XXX)
2544  *	addr	  - address of data buf to be read/written.
2545  *	nbytes	  - number of bytes to read/write
2546  *	slice	  - the disk slice this request is for
2547  *	offset	  - relative disk offset
2548  *	cb_type   - type of call - STRATEGY or SYNC
2549  *	cb_arg	  - parameter to be sent to server (depends on VD_OP_XXX type)
2550  *			. mode for ioctl(9e)
2551  *			. LP64 diskaddr_t (block I/O)
2552  *	dir	  - direction of operation (READ/WRITE/BOTH)
2553  *
2554  * Return Codes:
2555  *	0
2556  *	EAGAIN
2557  *		EFAULT
2558  *		ENXIO
2559  *		EIO
2560  */
2561 static int
2562 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes,
2563     int slice, diskaddr_t offset, int cb_type, void *cb_arg,
2564     vio_desc_direction_t dir)
2565 {
2566 	int status;
2567 
2568 	ASSERT(cb_type == CB_SYNC);
2569 
2570 	/*
2571 	 * Grab the lock, if blocked wait until the server
2572 	 * response causes us to wake up again.
2573 	 */
2574 	mutex_enter(&vdcp->lock);
2575 	vdcp->sync_op_cnt++;
2576 	while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH)
2577 		cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock);
2578 
2579 	if (vdcp->state == VDC_STATE_DETACH) {
2580 		cv_broadcast(&vdcp->sync_blocked_cv);
2581 		vdcp->sync_op_cnt--;
2582 		mutex_exit(&vdcp->lock);
2583 		return (ENXIO);
2584 	}
2585 
2586 	/* now block anyone other thread entering after us */
2587 	vdcp->sync_op_blocked = B_TRUE;
2588 	vdcp->sync_op_pending = B_TRUE;
2589 	mutex_exit(&vdcp->lock);
2590 
2591 	/*
2592 	 * No need to check return value - will return error only
2593 	 * in the DETACH case and we can fall through
2594 	 */
2595 	(void) vdc_send_request(vdcp, operation, addr,
2596 	    nbytes, slice, offset, cb_type, cb_arg, dir);
2597 
2598 	/*
2599 	 * block until our transaction completes.
2600 	 * Also anyone else waiting also gets to go next.
2601 	 */
2602 	mutex_enter(&vdcp->lock);
2603 	while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH)
2604 		cv_wait(&vdcp->sync_pending_cv, &vdcp->lock);
2605 
2606 	DMSG(vdcp, 2, ": operation returned %d\n", vdcp->sync_op_status);
2607 	if (vdcp->state == VDC_STATE_DETACH)
2608 		status = ENXIO;
2609 	else
2610 		status = vdcp->sync_op_status;
2611 	vdcp->sync_op_status = 0;
2612 	vdcp->sync_op_blocked = B_FALSE;
2613 	vdcp->sync_op_cnt--;
2614 
2615 	/* signal the next waiting thread */
2616 	cv_signal(&vdcp->sync_blocked_cv);
2617 	mutex_exit(&vdcp->lock);
2618 
2619 	return (status);
2620 }
2621 
2622 
2623 /*
2624  * Function:
2625  *	vdc_drain_response()
2626  *
2627  * Description:
2628  * 	When a guest is panicking, the completion of requests needs to be
2629  * 	handled differently because interrupts are disabled and vdc
2630  * 	will not get messages. We have to poll for the messages instead.
2631  *
2632  * Arguments:
2633  *	vdc	- soft state pointer for this instance of the device driver.
2634  *
2635  * Return Code:
2636  *	0	- Success
2637  */
2638 static int
2639 vdc_drain_response(vdc_t *vdc)
2640 {
2641 	int 			rv, idx, retries;
2642 	size_t			msglen;
2643 	vdc_local_desc_t 	*ldep = NULL;	/* Local Dring Entry Pointer */
2644 	vio_dring_msg_t		dmsg;
2645 
2646 	mutex_enter(&vdc->lock);
2647 
2648 	retries = 0;
2649 	for (;;) {
2650 		msglen = sizeof (dmsg);
2651 		rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen);
2652 		if (rv) {
2653 			rv = EINVAL;
2654 			break;
2655 		}
2656 
2657 		/*
2658 		 * if there are no packets wait and check again
2659 		 */
2660 		if ((rv == 0) && (msglen == 0)) {
2661 			if (retries++ > vdc_dump_retries) {
2662 				rv = EAGAIN;
2663 				break;
2664 			}
2665 
2666 			drv_usecwait(vdc_usec_timeout_dump);
2667 			continue;
2668 		}
2669 
2670 		/*
2671 		 * Ignore all messages that are not ACKs/NACKs to
2672 		 * DRing requests.
2673 		 */
2674 		if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) ||
2675 		    (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) {
2676 			DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n",
2677 			    dmsg.tag.vio_msgtype,
2678 			    dmsg.tag.vio_subtype,
2679 			    dmsg.tag.vio_subtype_env);
2680 			continue;
2681 		}
2682 
2683 		/*
2684 		 * set the appropriate return value for the current request.
2685 		 */
2686 		switch (dmsg.tag.vio_subtype) {
2687 		case VIO_SUBTYPE_ACK:
2688 			rv = 0;
2689 			break;
2690 		case VIO_SUBTYPE_NACK:
2691 			rv = EAGAIN;
2692 			break;
2693 		default:
2694 			continue;
2695 		}
2696 
2697 		idx = dmsg.start_idx;
2698 		if (idx >= vdc->dring_len) {
2699 			DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n",
2700 			    vdc->instance, idx);
2701 			continue;
2702 		}
2703 		ldep = &vdc->local_dring[idx];
2704 		if (ldep->dep->hdr.dstate != VIO_DESC_DONE) {
2705 			DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n",
2706 			    vdc->instance, idx, ldep->dep->hdr.dstate);
2707 			continue;
2708 		}
2709 
2710 		DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n",
2711 		    vdc->instance, idx, ldep->dep->hdr.dstate);
2712 		rv = vdc_depopulate_descriptor(vdc, idx);
2713 		if (rv) {
2714 			DMSG(vdc, 0,
2715 			    "[%d] Entry @ %d - depopulate failed ..\n",
2716 			    vdc->instance, idx);
2717 		}
2718 
2719 		/* if this is the last descriptor - break out of loop */
2720 		if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx)
2721 			break;
2722 	}
2723 
2724 	mutex_exit(&vdc->lock);
2725 	DMSG(vdc, 0, "End idx=%d\n", idx);
2726 
2727 	return (rv);
2728 }
2729 
2730 
2731 /*
2732  * Function:
2733  *	vdc_depopulate_descriptor()
2734  *
2735  * Description:
2736  *
2737  * Arguments:
2738  *	vdc	- soft state pointer for this instance of the device driver.
2739  *	idx	- Index of the Descriptor Ring entry being modified
2740  *
2741  * Return Code:
2742  *	0	- Success
2743  */
2744 static int
2745 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx)
2746 {
2747 	vd_dring_entry_t *dep = NULL;		/* Dring Entry Pointer */
2748 	vdc_local_desc_t *ldep = NULL;		/* Local Dring Entry Pointer */
2749 	int		status = ENXIO;
2750 	int		operation;
2751 	int		rv = 0;
2752 
2753 	ASSERT(vdc != NULL);
2754 	ASSERT(idx < vdc->dring_len);
2755 	ldep = &vdc->local_dring[idx];
2756 	ASSERT(ldep != NULL);
2757 	ASSERT(MUTEX_HELD(&vdc->lock));
2758 
2759 	DMSG(vdc, 2, ": idx = %d\n", idx);
2760 	dep = ldep->dep;
2761 	ASSERT(dep != NULL);
2762 	ASSERT((dep->hdr.dstate == VIO_DESC_DONE) ||
2763 			(dep->payload.status == ECANCELED));
2764 
2765 	VDC_MARK_DRING_ENTRY_FREE(vdc, idx);
2766 
2767 	ldep->is_free = B_TRUE;
2768 	DMSG(vdc, 2, ": is_free = %d\n", ldep->is_free);
2769 	status = dep->payload.status;
2770 	operation = dep->payload.operation;
2771 
2772 	/* the DKIO FLUSH operation never bind handles so we can return now */
2773 	if (operation == VD_OP_FLUSH)
2774 		return (status);
2775 
2776 	/*
2777 	 * If the upper layer passed in a misaligned address we copied the
2778 	 * data into an aligned buffer before sending it to LDC - we now
2779 	 * copy it back to the original buffer.
2780 	 */
2781 	if (ldep->align_addr) {
2782 		ASSERT(ldep->addr != NULL);
2783 		ASSERT(dep->payload.nbytes > 0);
2784 
2785 		bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes);
2786 		kmem_free(ldep->align_addr,
2787 			sizeof (caddr_t) * P2ROUNDUP(dep->payload.nbytes, 8));
2788 		ldep->align_addr = NULL;
2789 	}
2790 
2791 	rv = ldc_mem_unbind_handle(ldep->desc_mhdl);
2792 	if (rv != 0) {
2793 		DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)",
2794 				vdc->instance, ldep->desc_mhdl, idx, rv);
2795 		/*
2796 		 * The error returned by the vDisk server is more informative
2797 		 * and thus has a higher priority but if it isn't set we ensure
2798 		 * that this function returns an error.
2799 		 */
2800 		if (status == 0)
2801 			status = EINVAL;
2802 	}
2803 
2804 	cv_signal(&vdc->membind_cv);
2805 	cv_signal(&vdc->dring_free_cv);
2806 
2807 	return (status);
2808 }
2809 
2810 /*
2811  * Function:
2812  *	vdc_populate_mem_hdl()
2813  *
2814  * Description:
2815  *
2816  * Arguments:
2817  *	vdc	- soft state pointer for this instance of the device driver.
2818  *	idx	- Index of the Descriptor Ring entry being modified
2819  *	addr	- virtual address being mapped in
2820  *	nybtes	- number of bytes in 'addr'
2821  *	operation - the vDisk operation being performed (VD_OP_xxx)
2822  *
2823  * Return Code:
2824  *	0	- Success
2825  */
2826 static int
2827 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep)
2828 {
2829 	vd_dring_entry_t	*dep = NULL;
2830 	ldc_mem_handle_t	mhdl;
2831 	caddr_t			vaddr;
2832 	size_t			nbytes;
2833 	uint8_t			perm = LDC_MEM_RW;
2834 	uint8_t			maptype;
2835 	int			rv = 0;
2836 	int			i;
2837 
2838 	ASSERT(vdcp != NULL);
2839 
2840 	dep = ldep->dep;
2841 	mhdl = ldep->desc_mhdl;
2842 
2843 	switch (ldep->dir) {
2844 	case VIO_read_dir:
2845 		perm = LDC_MEM_W;
2846 		break;
2847 
2848 	case VIO_write_dir:
2849 		perm = LDC_MEM_R;
2850 		break;
2851 
2852 	case VIO_both_dir:
2853 		perm = LDC_MEM_RW;
2854 		break;
2855 
2856 	default:
2857 		ASSERT(0);	/* catch bad programming in vdc */
2858 	}
2859 
2860 	/*
2861 	 * LDC expects any addresses passed in to be 8-byte aligned. We need
2862 	 * to copy the contents of any misaligned buffers to a newly allocated
2863 	 * buffer and bind it instead (and copy the the contents back to the
2864 	 * original buffer passed in when depopulating the descriptor)
2865 	 */
2866 	vaddr = ldep->addr;
2867 	nbytes = ldep->nbytes;
2868 	if (((uint64_t)vaddr & 0x7) != 0) {
2869 		ASSERT(ldep->align_addr == NULL);
2870 		ldep->align_addr =
2871 			kmem_alloc(sizeof (caddr_t) *
2872 				P2ROUNDUP(nbytes, 8), KM_SLEEP);
2873 		DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating "
2874 		    "(buf=%p nb=%ld op=%d)\n",
2875 		    vdcp->instance, (void *)vaddr, (void *)ldep->align_addr,
2876 		    nbytes, ldep->operation);
2877 		if (perm != LDC_MEM_W)
2878 			bcopy(vaddr, ldep->align_addr, nbytes);
2879 		vaddr = ldep->align_addr;
2880 	}
2881 
2882 	maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP;
2883 	rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8),
2884 		maptype, perm, &dep->payload.cookie[0],
2885 		&dep->payload.ncookies);
2886 	DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n",
2887 			vdcp->instance, dep->payload.ncookies);
2888 	if (rv != 0) {
2889 		DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle "
2890 		    "(mhdl=%p, buf=%p, err=%d)\n",
2891 		    vdcp->instance, (void *)mhdl, (void *)vaddr, rv);
2892 		if (ldep->align_addr) {
2893 			kmem_free(ldep->align_addr,
2894 				sizeof (caddr_t) * P2ROUNDUP(nbytes, 8));
2895 			ldep->align_addr = NULL;
2896 		}
2897 		return (EAGAIN);
2898 	}
2899 
2900 	/*
2901 	 * Get the other cookies (if any).
2902 	 */
2903 	for (i = 1; i < dep->payload.ncookies; i++) {
2904 		rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]);
2905 		if (rv != 0) {
2906 			(void) ldc_mem_unbind_handle(mhdl);
2907 			DMSG(vdcp, 0, "?[%d] Failed to get next cookie "
2908 					"(mhdl=%lx cnum=%d), err=%d",
2909 					vdcp->instance, mhdl, i, rv);
2910 			if (ldep->align_addr) {
2911 				kmem_free(ldep->align_addr,
2912 					sizeof (caddr_t) * dep->payload.nbytes);
2913 				ldep->align_addr = NULL;
2914 			}
2915 			return (EAGAIN);
2916 		}
2917 	}
2918 
2919 	return (rv);
2920 }
2921 
2922 /*
2923  * Interrupt handlers for messages from LDC
2924  */
2925 
2926 /*
2927  * Function:
2928  *	vdc_handle_cb()
2929  *
2930  * Description:
2931  *
2932  * Arguments:
2933  *	event	- Type of event (LDC_EVT_xxx) that triggered the callback
2934  *	arg	- soft state pointer for this instance of the device driver.
2935  *
2936  * Return Code:
2937  *	0	- Success
2938  */
2939 static uint_t
2940 vdc_handle_cb(uint64_t event, caddr_t arg)
2941 {
2942 	ldc_status_t	ldc_state;
2943 	int		rv = 0;
2944 
2945 	vdc_t	*vdc = (vdc_t *)(void *)arg;
2946 
2947 	ASSERT(vdc != NULL);
2948 
2949 	DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num);
2950 
2951 	/*
2952 	 * Depending on the type of event that triggered this callback,
2953 	 * we modify the handshake state or read the data.
2954 	 *
2955 	 * NOTE: not done as a switch() as event could be triggered by
2956 	 * a state change and a read request. Also the ordering	of the
2957 	 * check for the event types is deliberate.
2958 	 */
2959 	if (event & LDC_EVT_UP) {
2960 		DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance);
2961 
2962 		mutex_enter(&vdc->lock);
2963 
2964 		/* get LDC state */
2965 		rv = ldc_status(vdc->ldc_handle, &ldc_state);
2966 		if (rv != 0) {
2967 			DMSG(vdc, 0, "[%d] Couldn't get LDC status %d",
2968 			    vdc->instance, rv);
2969 			return (LDC_SUCCESS);
2970 		}
2971 		if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) {
2972 			/*
2973 			 * Reset the transaction sequence numbers when
2974 			 * LDC comes up. We then kick off the handshake
2975 			 * negotiation with the vDisk server.
2976 			 */
2977 			vdc->seq_num = 1;
2978 			vdc->seq_num_reply = 0;
2979 			vdc->ldc_state = ldc_state;
2980 			cv_signal(&vdc->initwait_cv);
2981 		}
2982 
2983 		mutex_exit(&vdc->lock);
2984 	}
2985 
2986 	if (event & LDC_EVT_READ) {
2987 		DMSG(vdc, 0, "[%d] Received LDC_EVT_READ\n", vdc->instance);
2988 		mutex_enter(&vdc->read_lock);
2989 		cv_signal(&vdc->read_cv);
2990 		vdc->read_state = VDC_READ_PENDING;
2991 		mutex_exit(&vdc->read_lock);
2992 
2993 		/* that's all we have to do - no need to handle DOWN/RESET */
2994 		return (LDC_SUCCESS);
2995 	}
2996 
2997 	if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) {
2998 
2999 		DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance);
3000 
3001 		mutex_enter(&vdc->lock);
3002 		/*
3003 		 * Need to wake up any readers so they will
3004 		 * detect that a reset has occurred.
3005 		 */
3006 		mutex_enter(&vdc->read_lock);
3007 		if ((vdc->read_state == VDC_READ_WAITING) ||
3008 		    (vdc->read_state == VDC_READ_RESET))
3009 			cv_signal(&vdc->read_cv);
3010 		vdc->read_state = VDC_READ_RESET;
3011 		mutex_exit(&vdc->read_lock);
3012 
3013 		/* wake up any threads waiting for connection to come up */
3014 		if (vdc->state == VDC_STATE_INIT_WAITING) {
3015 			vdc->state = VDC_STATE_RESETTING;
3016 			cv_signal(&vdc->initwait_cv);
3017 		}
3018 
3019 		mutex_exit(&vdc->lock);
3020 	}
3021 
3022 	if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ))
3023 		DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received",
3024 				vdc->instance, event);
3025 
3026 	return (LDC_SUCCESS);
3027 }
3028 
3029 /*
3030  * Function:
3031  *	vdc_wait_for_response()
3032  *
3033  * Description:
3034  *	Block waiting for a response from the server. If there is
3035  *	no data the thread block on the read_cv that is signalled
3036  *	by the callback when an EVT_READ occurs.
3037  *
3038  * Arguments:
3039  *	vdcp	- soft state pointer for this instance of the device driver.
3040  *
3041  * Return Code:
3042  *	0	- Success
3043  */
3044 static int
3045 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp)
3046 {
3047 	size_t		nbytes = sizeof (*msgp);
3048 	int		status;
3049 
3050 	ASSERT(vdcp != NULL);
3051 
3052 	DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance);
3053 
3054 	status = vdc_recv(vdcp, msgp, &nbytes);
3055 	DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n",
3056 		status, (int)nbytes);
3057 	if (status) {
3058 		DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n",
3059 				vdcp->instance, status);
3060 		return (status);
3061 	}
3062 
3063 	if (nbytes < sizeof (vio_msg_tag_t)) {
3064 		DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n",
3065 			vdcp->instance, sizeof (vio_msg_tag_t), nbytes);
3066 		return (ENOMSG);
3067 	}
3068 
3069 	DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance,
3070 	    msgp->tag.vio_msgtype,
3071 	    msgp->tag.vio_subtype,
3072 	    msgp->tag.vio_subtype_env);
3073 
3074 	/*
3075 	 * Verify the Session ID of the message
3076 	 *
3077 	 * Every message after the Version has been negotiated should
3078 	 * have the correct session ID set.
3079 	 */
3080 	if ((msgp->tag.vio_sid != vdcp->session_id) &&
3081 	    (msgp->tag.vio_subtype_env != VIO_VER_INFO)) {
3082 		DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, "
3083 				"expected 0x%lx [seq num %lx @ %d]",
3084 			vdcp->instance, msgp->tag.vio_sid,
3085 			vdcp->session_id,
3086 			((vio_dring_msg_t *)msgp)->seq_num,
3087 			((vio_dring_msg_t *)msgp)->start_idx);
3088 		return (ENOMSG);
3089 	}
3090 	return (0);
3091 }
3092 
3093 
3094 /*
3095  * Function:
3096  *	vdc_resubmit_backup_dring()
3097  *
3098  * Description:
3099  *	Resubmit each descriptor in the backed up dring to
3100  * 	vDisk server. The Dring was backed up during connection
3101  *	reset.
3102  *
3103  * Arguments:
3104  *	vdcp	- soft state pointer for this instance of the device driver.
3105  *
3106  * Return Code:
3107  *	0	- Success
3108  */
3109 static int
3110 vdc_resubmit_backup_dring(vdc_t *vdcp)
3111 {
3112 	int		count;
3113 	int		b_idx;
3114 	int		rv;
3115 	int		dring_size;
3116 	int		status;
3117 	vio_msg_t	vio_msg;
3118 	vdc_local_desc_t	*curr_ldep;
3119 
3120 	ASSERT(MUTEX_NOT_HELD(&vdcp->lock));
3121 	ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING);
3122 
3123 	DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n",
3124 	    vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail);
3125 
3126 	/*
3127 	 * Walk the backup copy of the local descriptor ring and
3128 	 * resubmit all the outstanding transactions.
3129 	 */
3130 	b_idx = vdcp->local_dring_backup_tail;
3131 	for (count = 0; count < vdcp->local_dring_backup_len; count++) {
3132 
3133 		curr_ldep = &(vdcp->local_dring_backup[b_idx]);
3134 
3135 		/* only resubmit oustanding transactions */
3136 		if (!curr_ldep->is_free) {
3137 
3138 			DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx);
3139 			mutex_enter(&vdcp->lock);
3140 			rv = vdc_populate_descriptor(vdcp, curr_ldep->operation,
3141 			    curr_ldep->addr, curr_ldep->nbytes,
3142 			    curr_ldep->slice, curr_ldep->offset,
3143 			    curr_ldep->cb_type, curr_ldep->cb_arg,
3144 			    curr_ldep->dir);
3145 			mutex_exit(&vdcp->lock);
3146 			if (rv) {
3147 				DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n",
3148 				    vdcp->instance, b_idx);
3149 				return (rv);
3150 			}
3151 
3152 			/* Wait for the response message. */
3153 			DMSG(vdcp, 1, "waiting for response to idx=%x\n",
3154 			    b_idx);
3155 			status = vdc_wait_for_response(vdcp, &vio_msg);
3156 			if (status) {
3157 				DMSG(vdcp, 1, "[%d] wait_for_response "
3158 				    "returned err=%d\n", vdcp->instance,
3159 				    status);
3160 				return (status);
3161 			}
3162 
3163 			DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx);
3164 			status = vdc_process_data_msg(vdcp, &vio_msg);
3165 			if (status) {
3166 				DMSG(vdcp, 1, "[%d] process_data_msg "
3167 				    "returned err=%d\n", vdcp->instance,
3168 				    status);
3169 				return (status);
3170 			}
3171 		}
3172 
3173 		/* get the next element to submit */
3174 		if (++b_idx >= vdcp->local_dring_backup_len)
3175 			b_idx = 0;
3176 	}
3177 
3178 	/* all done - now clear up pending dring copy */
3179 	dring_size = vdcp->local_dring_backup_len *
3180 		sizeof (vdcp->local_dring_backup[0]);
3181 
3182 	(void) kmem_free(vdcp->local_dring_backup, dring_size);
3183 
3184 	vdcp->local_dring_backup = NULL;
3185 
3186 	return (0);
3187 }
3188 
3189 /*
3190  * Function:
3191  *	vdc_backup_local_dring()
3192  *
3193  * Description:
3194  *	Backup the current dring in the event of a reset. The Dring
3195  *	transactions will be resubmitted to the server when the
3196  *	connection is restored.
3197  *
3198  * Arguments:
3199  *	vdcp	- soft state pointer for this instance of the device driver.
3200  *
3201  * Return Code:
3202  *	NONE
3203  */
3204 static void
3205 vdc_backup_local_dring(vdc_t *vdcp)
3206 {
3207 	int dring_size;
3208 
3209 	ASSERT(vdcp->state == VDC_STATE_RESETTING);
3210 
3211 	/*
3212 	 * If the backup dring is stil around, it means
3213 	 * that the last restore did not complete. However,
3214 	 * since we never got back into the running state,
3215 	 * the backup copy we have is still valid.
3216 	 */
3217 	if (vdcp->local_dring_backup != NULL) {
3218 		DMSG(vdcp, 1, "reusing local descriptor ring backup "
3219 		    "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len,
3220 		    vdcp->local_dring_backup_tail);
3221 		return;
3222 	}
3223 
3224 	DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, "
3225 	    "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx);
3226 
3227 	dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]);
3228 
3229 	vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP);
3230 	bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size);
3231 
3232 	vdcp->local_dring_backup_tail = vdcp->dring_curr_idx;
3233 	vdcp->local_dring_backup_len = vdcp->dring_len;
3234 }
3235 
3236 /* -------------------------------------------------------------------------- */
3237 
3238 /*
3239  * The following functions process the incoming messages from vds
3240  */
3241 
3242 /*
3243  * Function:
3244  *      vdc_process_msg_thread()
3245  *
3246  * Description:
3247  *
3248  *	Main VDC message processing thread. Each vDisk instance
3249  * 	consists of a copy of this thread. This thread triggers
3250  * 	all the handshakes and data exchange with the server. It
3251  * 	also handles all channel resets
3252  *
3253  * Arguments:
3254  *      vdc     - soft state pointer for this instance of the device driver.
3255  *
3256  * Return Code:
3257  *      None
3258  */
3259 static void
3260 vdc_process_msg_thread(vdc_t *vdcp)
3261 {
3262 	int	status;
3263 
3264 	mutex_enter(&vdcp->lock);
3265 
3266 	for (;;) {
3267 
3268 #define	Q(_s)	(vdcp->state == _s) ? #_s :
3269 		DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state,
3270 		Q(VDC_STATE_INIT)
3271 		Q(VDC_STATE_INIT_WAITING)
3272 		Q(VDC_STATE_NEGOTIATE)
3273 		Q(VDC_STATE_HANDLE_PENDING)
3274 		Q(VDC_STATE_RUNNING)
3275 		Q(VDC_STATE_RESETTING)
3276 		Q(VDC_STATE_DETACH)
3277 		"UNKNOWN");
3278 
3279 		switch (vdcp->state) {
3280 		case VDC_STATE_INIT:
3281 
3282 			/* Check if have re-initializing repeatedly */
3283 			if (vdcp->hshake_cnt++ > VDC_RETRIES) {
3284 				vdcp->state = VDC_STATE_DETACH;
3285 				break;
3286 			}
3287 
3288 			/* Bring up connection with vds via LDC */
3289 			status = vdc_start_ldc_connection(vdcp);
3290 			switch (status) {
3291 			case EINVAL:
3292 				DMSG(vdcp, 0, "[%d] Could not start LDC",
3293 				    vdcp->instance);
3294 				vdcp->state = VDC_STATE_DETACH;
3295 				break;
3296 			case 0:
3297 				vdcp->state = VDC_STATE_INIT_WAITING;
3298 				break;
3299 			default:
3300 				vdcp->state = VDC_STATE_INIT_WAITING;
3301 				break;
3302 			}
3303 			break;
3304 
3305 		case VDC_STATE_INIT_WAITING:
3306 
3307 			/*
3308 			 * Let the callback event move us on
3309 			 * when channel is open to server
3310 			 */
3311 			while (vdcp->ldc_state != LDC_UP) {
3312 				cv_wait(&vdcp->initwait_cv, &vdcp->lock);
3313 				if (vdcp->state != VDC_STATE_INIT_WAITING) {
3314 					DMSG(vdcp, 0,
3315 				"state moved to %d out from under us...\n",
3316 					    vdcp->state);
3317 
3318 					break;
3319 				}
3320 			}
3321 			if (vdcp->state == VDC_STATE_INIT_WAITING &&
3322 			    vdcp->ldc_state == LDC_UP) {
3323 				vdcp->state = VDC_STATE_NEGOTIATE;
3324 			}
3325 			break;
3326 
3327 		case VDC_STATE_NEGOTIATE:
3328 			switch (status = vdc_ver_negotiation(vdcp)) {
3329 			case 0:
3330 				break;
3331 			default:
3332 				DMSG(vdcp, 0, "ver negotiate failed (%d)..\n",
3333 				    status);
3334 				goto reset;
3335 			}
3336 
3337 			switch (status = vdc_attr_negotiation(vdcp)) {
3338 			case 0:
3339 				break;
3340 			default:
3341 				DMSG(vdcp, 0, "attr negotiate failed (%d)..\n",
3342 				    status);
3343 				goto reset;
3344 			}
3345 
3346 			switch (status = vdc_dring_negotiation(vdcp)) {
3347 			case 0:
3348 				break;
3349 			default:
3350 				DMSG(vdcp, 0, "dring negotiate failed (%d)..\n",
3351 				    status);
3352 				goto reset;
3353 			}
3354 
3355 			switch (status = vdc_rdx_exchange(vdcp)) {
3356 			case 0:
3357 				vdcp->state = VDC_STATE_HANDLE_PENDING;
3358 				goto done;
3359 			default:
3360 				DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n",
3361 				    status);
3362 				goto reset;
3363 			}
3364 reset:
3365 			DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n",
3366 			    status);
3367 			vdcp->state = VDC_STATE_RESETTING;
3368 done:
3369 			DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n",
3370 			    vdcp->state);
3371 			break;
3372 
3373 		case VDC_STATE_HANDLE_PENDING:
3374 
3375 			mutex_exit(&vdcp->lock);
3376 			status = vdc_resubmit_backup_dring(vdcp);
3377 			mutex_enter(&vdcp->lock);
3378 
3379 			if (status)
3380 				vdcp->state = VDC_STATE_RESETTING;
3381 			else
3382 				vdcp->state = VDC_STATE_RUNNING;
3383 
3384 			break;
3385 
3386 		/* enter running state */
3387 		case VDC_STATE_RUNNING:
3388 			/*
3389 			 * Signal anyone waiting for the connection
3390 			 * to come on line.
3391 			 */
3392 			vdcp->hshake_cnt = 0;
3393 			cv_broadcast(&vdcp->running_cv);
3394 			mutex_exit(&vdcp->lock);
3395 
3396 			for (;;) {
3397 				vio_msg_t msg;
3398 				status = vdc_wait_for_response(vdcp, &msg);
3399 				if (status) break;
3400 
3401 				DMSG(vdcp, 1, "[%d] new pkt(s) available\n",
3402 					vdcp->instance);
3403 				status = vdc_process_data_msg(vdcp, &msg);
3404 				if (status) {
3405 					DMSG(vdcp, 1, "[%d] process_data_msg "
3406 					    "returned err=%d\n", vdcp->instance,
3407 					    status);
3408 					break;
3409 				}
3410 
3411 			}
3412 
3413 			mutex_enter(&vdcp->lock);
3414 
3415 			vdcp->state = VDC_STATE_RESETTING;
3416 			break;
3417 
3418 		case VDC_STATE_RESETTING:
3419 			DMSG(vdcp, 0, "Initiating channel reset "
3420 			    "(pending = %d)\n", (int)vdcp->threads_pending);
3421 
3422 			if (vdcp->self_reset) {
3423 				DMSG(vdcp, 0,
3424 				    "[%d] calling stop_ldc_connection.\n",
3425 				    vdcp->instance);
3426 				status = vdc_stop_ldc_connection(vdcp);
3427 				vdcp->self_reset = B_FALSE;
3428 			}
3429 
3430 			/*
3431 			 * Wait for all threads currently waiting
3432 			 * for a free dring entry to use.
3433 			 */
3434 			while (vdcp->threads_pending) {
3435 				cv_broadcast(&vdcp->membind_cv);
3436 				cv_broadcast(&vdcp->dring_free_cv);
3437 				mutex_exit(&vdcp->lock);
3438 				/* let them wake up */
3439 				drv_usecwait(vdc_min_timeout_ldc);
3440 				mutex_enter(&vdcp->lock);
3441 			}
3442 
3443 			ASSERT(vdcp->threads_pending == 0);
3444 
3445 			/* Sanity check that no thread is receiving */
3446 			ASSERT(vdcp->read_state != VDC_READ_WAITING);
3447 
3448 			vdcp->read_state = VDC_READ_IDLE;
3449 
3450 			vdc_backup_local_dring(vdcp);
3451 
3452 			/* cleanup the old d-ring */
3453 			vdc_destroy_descriptor_ring(vdcp);
3454 
3455 			/* go and start again */
3456 			vdcp->state = VDC_STATE_INIT;
3457 
3458 			break;
3459 
3460 		case VDC_STATE_DETACH:
3461 			DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n",
3462 			    vdcp->instance);
3463 
3464 			while (vdcp->sync_op_pending) {
3465 				cv_signal(&vdcp->sync_pending_cv);
3466 				cv_signal(&vdcp->sync_blocked_cv);
3467 				mutex_exit(&vdcp->lock);
3468 				drv_usecwait(vdc_min_timeout_ldc);
3469 				mutex_enter(&vdcp->lock);
3470 			}
3471 
3472 			cv_signal(&vdcp->running_cv);
3473 			mutex_exit(&vdcp->lock);
3474 
3475 			DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n",
3476 				vdcp->instance);
3477 			thread_exit();
3478 			break;
3479 		}
3480 	}
3481 }
3482 
3483 
3484 /*
3485  * Function:
3486  *	vdc_process_data_msg()
3487  *
3488  * Description:
3489  *	This function is called by the message processing thread each time
3490  *	a message with a msgtype of VIO_TYPE_DATA is received. It will either
3491  *	be an ACK or NACK from vds[1] which vdc handles as follows.
3492  *		ACK	- wake up the waiting thread
3493  *		NACK	- resend any messages necessary
3494  *
3495  *	[1] Although the message format allows it, vds should not send a
3496  *	    VIO_SUBTYPE_INFO message to vdc asking it to read data; if for
3497  *	    some bizarre reason it does, vdc will reset the connection.
3498  *
3499  * Arguments:
3500  *	vdc	- soft state pointer for this instance of the device driver.
3501  *	msg	- the LDC message sent by vds
3502  *
3503  * Return Code:
3504  *	0	- Success.
3505  *	> 0	- error value returned by LDC
3506  */
3507 static int
3508 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg)
3509 {
3510 	int			status = 0;
3511 	vio_dring_msg_t		*dring_msg;
3512 	vdc_local_desc_t	*ldep = NULL;
3513 	int			start, end;
3514 	int			idx;
3515 
3516 	dring_msg = (vio_dring_msg_t *)msg;
3517 
3518 	ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA);
3519 	ASSERT(vdcp != NULL);
3520 
3521 	mutex_enter(&vdcp->lock);
3522 
3523 	/*
3524 	 * Check to see if the message has bogus data
3525 	 */
3526 	idx = start = dring_msg->start_idx;
3527 	end = dring_msg->end_idx;
3528 	if ((start >= vdcp->dring_len) ||
3529 	    (end >= vdcp->dring_len) || (end < -1)) {
3530 		DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n",
3531 			vdcp->instance, start, end);
3532 		mutex_exit(&vdcp->lock);
3533 		return (EINVAL);
3534 	}
3535 
3536 	/*
3537 	 * Verify that the sequence number is what vdc expects.
3538 	 */
3539 	switch (vdc_verify_seq_num(vdcp, dring_msg)) {
3540 	case VDC_SEQ_NUM_TODO:
3541 		break;	/* keep processing this message */
3542 	case VDC_SEQ_NUM_SKIP:
3543 		mutex_exit(&vdcp->lock);
3544 		return (0);
3545 	case VDC_SEQ_NUM_INVALID:
3546 		mutex_exit(&vdcp->lock);
3547 		DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance);
3548 		return (ENXIO);
3549 	}
3550 
3551 	if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) {
3552 		DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance);
3553 		VDC_DUMP_DRING_MSG(dring_msg);
3554 		mutex_exit(&vdcp->lock);
3555 		return (EIO);
3556 
3557 	} else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) {
3558 		mutex_exit(&vdcp->lock);
3559 		return (EPROTO);
3560 	}
3561 
3562 	DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp);
3563 	DMSG(vdcp, 1, ": start %d end %d\n", start, end);
3564 	ASSERT(start == end);
3565 
3566 	ldep = &vdcp->local_dring[idx];
3567 
3568 	DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n",
3569 		ldep->dep->hdr.dstate, ldep->cb_type);
3570 
3571 	if (ldep->dep->hdr.dstate == VIO_DESC_DONE) {
3572 		struct buf *bufp;
3573 
3574 		switch (ldep->cb_type) {
3575 		case CB_SYNC:
3576 			ASSERT(vdcp->sync_op_pending);
3577 
3578 			status = vdc_depopulate_descriptor(vdcp, idx);
3579 			vdcp->sync_op_status = status;
3580 			vdcp->sync_op_pending = B_FALSE;
3581 			cv_signal(&vdcp->sync_pending_cv);
3582 			break;
3583 
3584 		case CB_STRATEGY:
3585 			bufp = ldep->cb_arg;
3586 			ASSERT(bufp != NULL);
3587 			status = ldep->dep->payload.status; /* Future:ntoh */
3588 			if (status != 0) {
3589 				DMSG(vdcp, 1, "strategy status=%d\n", status);
3590 				bioerror(bufp, status);
3591 			}
3592 			status = vdc_depopulate_descriptor(vdcp, idx);
3593 			biodone(bufp);
3594 			break;
3595 
3596 		default:
3597 			ASSERT(0);
3598 		}
3599 	}
3600 
3601 	/* let the arrival signal propogate */
3602 	mutex_exit(&vdcp->lock);
3603 
3604 	/* probe gives the count of how many entries were processed */
3605 	DTRACE_IO2(processed, int, 1, vdc_t *, vdcp);
3606 
3607 	return (0);
3608 }
3609 
3610 /*
3611  * Function:
3612  *	vdc_process_err_msg()
3613  *
3614  * NOTE: No error messages are used as part of the vDisk protocol
3615  */
3616 static int
3617 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg)
3618 {
3619 	_NOTE(ARGUNUSED(vdc))
3620 	_NOTE(ARGUNUSED(msg))
3621 
3622 	ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR);
3623 	DMSG(vdc, 1, "[%d] Got an ERR msg", vdc->instance);
3624 
3625 	return (ENOTSUP);
3626 }
3627 
3628 /*
3629  * Function:
3630  *	vdc_handle_ver_msg()
3631  *
3632  * Description:
3633  *
3634  * Arguments:
3635  *	vdc	- soft state pointer for this instance of the device driver.
3636  *	ver_msg	- LDC message sent by vDisk server
3637  *
3638  * Return Code:
3639  *	0	- Success
3640  */
3641 static int
3642 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg)
3643 {
3644 	int status = 0;
3645 
3646 	ASSERT(vdc != NULL);
3647 	ASSERT(mutex_owned(&vdc->lock));
3648 
3649 	if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) {
3650 		return (EPROTO);
3651 	}
3652 
3653 	if (ver_msg->dev_class != VDEV_DISK_SERVER) {
3654 		return (EINVAL);
3655 	}
3656 
3657 	switch (ver_msg->tag.vio_subtype) {
3658 	case VIO_SUBTYPE_ACK:
3659 		/*
3660 		 * We check to see if the version returned is indeed supported
3661 		 * (The server may have also adjusted the minor number downwards
3662 		 * and if so 'ver_msg' will contain the actual version agreed)
3663 		 */
3664 		if (vdc_is_supported_version(ver_msg)) {
3665 			vdc->ver.major = ver_msg->ver_major;
3666 			vdc->ver.minor = ver_msg->ver_minor;
3667 			ASSERT(vdc->ver.major > 0);
3668 		} else {
3669 			status = EPROTO;
3670 		}
3671 		break;
3672 
3673 	case VIO_SUBTYPE_NACK:
3674 		/*
3675 		 * call vdc_is_supported_version() which will return the next
3676 		 * supported version (if any) in 'ver_msg'
3677 		 */
3678 		(void) vdc_is_supported_version(ver_msg);
3679 		if (ver_msg->ver_major > 0) {
3680 			size_t len = sizeof (*ver_msg);
3681 
3682 			ASSERT(vdc->ver.major > 0);
3683 
3684 			/* reset the necessary fields and resend */
3685 			ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO;
3686 			ver_msg->dev_class = VDEV_DISK;
3687 
3688 			status = vdc_send(vdc, (caddr_t)ver_msg, &len);
3689 			DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n",
3690 					vdc->instance, status);
3691 			if (len != sizeof (*ver_msg))
3692 				status = EBADMSG;
3693 		} else {
3694 			DMSG(vdc, 0, "[%d] No common version with "
3695 					"vDisk server", vdc->instance);
3696 			status = ENOTSUP;
3697 		}
3698 
3699 		break;
3700 	case VIO_SUBTYPE_INFO:
3701 		/*
3702 		 * Handle the case where vds starts handshake
3703 		 * (for now only vdc is the instigatior)
3704 		 */
3705 		status = ENOTSUP;
3706 		break;
3707 
3708 	default:
3709 		status = EINVAL;
3710 		break;
3711 	}
3712 
3713 	return (status);
3714 }
3715 
3716 /*
3717  * Function:
3718  *	vdc_handle_attr_msg()
3719  *
3720  * Description:
3721  *
3722  * Arguments:
3723  *	vdc	- soft state pointer for this instance of the device driver.
3724  *	attr_msg	- LDC message sent by vDisk server
3725  *
3726  * Return Code:
3727  *	0	- Success
3728  */
3729 static int
3730 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg)
3731 {
3732 	int status = 0;
3733 
3734 	ASSERT(vdc != NULL);
3735 	ASSERT(mutex_owned(&vdc->lock));
3736 
3737 	if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) {
3738 		return (EPROTO);
3739 	}
3740 
3741 	switch (attr_msg->tag.vio_subtype) {
3742 	case VIO_SUBTYPE_ACK:
3743 		/*
3744 		 * We now verify the attributes sent by vds.
3745 		 */
3746 		vdc->vdisk_size = attr_msg->vdisk_size;
3747 		vdc->vdisk_type = attr_msg->vdisk_type;
3748 
3749 		DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n",
3750 			vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz);
3751 		DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n",
3752 			vdc->instance, vdc->block_size,
3753 			attr_msg->vdisk_block_size);
3754 
3755 		/*
3756 		 * We don't know at compile time what the vDisk server will
3757 		 * think are good values but we apply an large (arbitrary)
3758 		 * upper bound to prevent memory exhaustion in vdc if it was
3759 		 * allocating a DRing based of huge values sent by the server.
3760 		 * We probably will never exceed this except if the message
3761 		 * was garbage.
3762 		 */
3763 		if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <=
3764 				(PAGESIZE * DEV_BSIZE)) {
3765 			vdc->max_xfer_sz = attr_msg->max_xfer_sz;
3766 			vdc->block_size = attr_msg->vdisk_block_size;
3767 		} else {
3768 			DMSG(vdc, 0, "[%d] vds block transfer size too big;"
3769 				" using max supported by vdc", vdc->instance);
3770 		}
3771 
3772 		if ((attr_msg->xfer_mode != VIO_DRING_MODE) ||
3773 		    (attr_msg->vdisk_size > INT64_MAX) ||
3774 		    (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) {
3775 			DMSG(vdc, 0, "[%d] Invalid attributes from vds",
3776 					vdc->instance);
3777 			status = EINVAL;
3778 			break;
3779 		}
3780 
3781 		break;
3782 
3783 	case VIO_SUBTYPE_NACK:
3784 		/*
3785 		 * vds could not handle the attributes we sent so we
3786 		 * stop negotiating.
3787 		 */
3788 		status = EPROTO;
3789 		break;
3790 
3791 	case VIO_SUBTYPE_INFO:
3792 		/*
3793 		 * Handle the case where vds starts the handshake
3794 		 * (for now; vdc is the only supported instigatior)
3795 		 */
3796 		status = ENOTSUP;
3797 		break;
3798 
3799 	default:
3800 		status = ENOTSUP;
3801 		break;
3802 	}
3803 
3804 	return (status);
3805 }
3806 
3807 /*
3808  * Function:
3809  *	vdc_handle_dring_reg_msg()
3810  *
3811  * Description:
3812  *
3813  * Arguments:
3814  *	vdc		- soft state pointer for this instance of the driver.
3815  *	dring_msg	- LDC message sent by vDisk server
3816  *
3817  * Return Code:
3818  *	0	- Success
3819  */
3820 static int
3821 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg)
3822 {
3823 	int		status = 0;
3824 
3825 	ASSERT(vdc != NULL);
3826 	ASSERT(mutex_owned(&vdc->lock));
3827 
3828 	if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) {
3829 		return (EPROTO);
3830 	}
3831 
3832 	switch (dring_msg->tag.vio_subtype) {
3833 	case VIO_SUBTYPE_ACK:
3834 		/* save the received dring_ident */
3835 		vdc->dring_ident = dring_msg->dring_ident;
3836 		DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n",
3837 			vdc->instance, vdc->dring_ident);
3838 		break;
3839 
3840 	case VIO_SUBTYPE_NACK:
3841 		/*
3842 		 * vds could not handle the DRing info we sent so we
3843 		 * stop negotiating.
3844 		 */
3845 		DMSG(vdc, 0, "[%d] server could not register DRing\n",
3846 		    vdc->instance);
3847 		status = EPROTO;
3848 		break;
3849 
3850 	case VIO_SUBTYPE_INFO:
3851 		/*
3852 		 * Handle the case where vds starts handshake
3853 		 * (for now only vdc is the instigatior)
3854 		 */
3855 		status = ENOTSUP;
3856 		break;
3857 	default:
3858 		status = ENOTSUP;
3859 	}
3860 
3861 	return (status);
3862 }
3863 
3864 /*
3865  * Function:
3866  *	vdc_verify_seq_num()
3867  *
3868  * Description:
3869  *	This functions verifies that the sequence number sent back by the vDisk
3870  *	server with the latest message is what is expected (i.e. it is greater
3871  *	than the last seq num sent by the vDisk server and less than or equal
3872  *	to the last seq num generated by vdc).
3873  *
3874  *	It then checks the request ID to see if any requests need processing
3875  *	in the DRing.
3876  *
3877  * Arguments:
3878  *	vdc		- soft state pointer for this instance of the driver.
3879  *	dring_msg	- pointer to the LDC message sent by vds
3880  *
3881  * Return Code:
3882  *	VDC_SEQ_NUM_TODO	- Message needs to be processed
3883  *	VDC_SEQ_NUM_SKIP	- Message has already been processed
3884  *	VDC_SEQ_NUM_INVALID	- The seq numbers are so out of sync,
3885  *				  vdc cannot deal with them
3886  */
3887 static int
3888 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg)
3889 {
3890 	ASSERT(vdc != NULL);
3891 	ASSERT(dring_msg != NULL);
3892 	ASSERT(mutex_owned(&vdc->lock));
3893 
3894 	/*
3895 	 * Check to see if the messages were responded to in the correct
3896 	 * order by vds.
3897 	 */
3898 	if ((dring_msg->seq_num <= vdc->seq_num_reply) ||
3899 	    (dring_msg->seq_num > vdc->seq_num)) {
3900 		DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: "
3901 			"%lu > expected <= %lu (last proc req %lu sent %lu)\n",
3902 				vdc->instance, dring_msg->seq_num,
3903 				vdc->seq_num_reply, vdc->seq_num,
3904 				vdc->req_id_proc, vdc->req_id);
3905 		return (VDC_SEQ_NUM_INVALID);
3906 	}
3907 	vdc->seq_num_reply = dring_msg->seq_num;
3908 
3909 	if (vdc->req_id_proc < vdc->req_id)
3910 		return (VDC_SEQ_NUM_TODO);
3911 	else
3912 		return (VDC_SEQ_NUM_SKIP);
3913 }
3914 
3915 
3916 /*
3917  * Function:
3918  *	vdc_is_supported_version()
3919  *
3920  * Description:
3921  *	This routine checks if the major/minor version numbers specified in
3922  *	'ver_msg' are supported. If not it finds the next version that is
3923  *	in the supported version list 'vdc_version[]' and sets the fields in
3924  *	'ver_msg' to those values
3925  *
3926  * Arguments:
3927  *	ver_msg	- LDC message sent by vDisk server
3928  *
3929  * Return Code:
3930  *	B_TRUE	- Success
3931  *	B_FALSE	- Version not supported
3932  */
3933 static boolean_t
3934 vdc_is_supported_version(vio_ver_msg_t *ver_msg)
3935 {
3936 	int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]);
3937 
3938 	for (int i = 0; i < vdc_num_versions; i++) {
3939 		ASSERT(vdc_version[i].major > 0);
3940 		ASSERT((i == 0) ||
3941 		    (vdc_version[i].major < vdc_version[i-1].major));
3942 
3943 		/*
3944 		 * If the major versions match, adjust the minor version, if
3945 		 * necessary, down to the highest value supported by this
3946 		 * client. The server should support all minor versions lower
3947 		 * than the value it sent
3948 		 */
3949 		if (ver_msg->ver_major == vdc_version[i].major) {
3950 			if (ver_msg->ver_minor > vdc_version[i].minor) {
3951 				DMSGX(0,
3952 				    "Adjusting minor version from %u to %u",
3953 				    ver_msg->ver_minor, vdc_version[i].minor);
3954 				ver_msg->ver_minor = vdc_version[i].minor;
3955 			}
3956 			return (B_TRUE);
3957 		}
3958 
3959 		/*
3960 		 * If the message contains a higher major version number, set
3961 		 * the message's major/minor versions to the current values
3962 		 * and return false, so this message will get resent with
3963 		 * these values, and the server will potentially try again
3964 		 * with the same or a lower version
3965 		 */
3966 		if (ver_msg->ver_major > vdc_version[i].major) {
3967 			ver_msg->ver_major = vdc_version[i].major;
3968 			ver_msg->ver_minor = vdc_version[i].minor;
3969 			DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n",
3970 				ver_msg->ver_major, ver_msg->ver_minor);
3971 
3972 			return (B_FALSE);
3973 		}
3974 
3975 		/*
3976 		 * Otherwise, the message's major version is less than the
3977 		 * current major version, so continue the loop to the next
3978 		 * (lower) supported version
3979 		 */
3980 	}
3981 
3982 	/*
3983 	 * No common version was found; "ground" the version pair in the
3984 	 * message to terminate negotiation
3985 	 */
3986 	ver_msg->ver_major = 0;
3987 	ver_msg->ver_minor = 0;
3988 
3989 	return (B_FALSE);
3990 }
3991 /* -------------------------------------------------------------------------- */
3992 
3993 /*
3994  * DKIO(7) support
3995  */
3996 
3997 typedef struct vdc_dk_arg {
3998 	struct dk_callback	dkc;
3999 	int			mode;
4000 	dev_t			dev;
4001 	vdc_t			*vdc;
4002 } vdc_dk_arg_t;
4003 
4004 /*
4005  * Function:
4006  * 	vdc_dkio_flush_cb()
4007  *
4008  * Description:
4009  *	This routine is a callback for DKIOCFLUSHWRITECACHE which can be called
4010  *	by kernel code.
4011  *
4012  * Arguments:
4013  *	arg	- a pointer to a vdc_dk_arg_t structure.
4014  */
4015 void
4016 vdc_dkio_flush_cb(void *arg)
4017 {
4018 	struct vdc_dk_arg	*dk_arg = (struct vdc_dk_arg *)arg;
4019 	struct dk_callback	*dkc = NULL;
4020 	vdc_t			*vdc = NULL;
4021 	int			rv;
4022 
4023 	if (dk_arg == NULL) {
4024 		cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n");
4025 		return;
4026 	}
4027 	dkc = &dk_arg->dkc;
4028 	vdc = dk_arg->vdc;
4029 	ASSERT(vdc != NULL);
4030 
4031 	rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0,
4032 	    VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir);
4033 	if (rv != 0) {
4034 		DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n",
4035 			vdc->instance, rv,
4036 			ddi_model_convert_from(dk_arg->mode & FMODELS));
4037 	}
4038 
4039 	/*
4040 	 * Trigger the call back to notify the caller the the ioctl call has
4041 	 * been completed.
4042 	 */
4043 	if ((dk_arg->mode & FKIOCTL) &&
4044 	    (dkc != NULL) &&
4045 	    (dkc->dkc_callback != NULL)) {
4046 		ASSERT(dkc->dkc_cookie != NULL);
4047 		(*dkc->dkc_callback)(dkc->dkc_cookie, rv);
4048 	}
4049 
4050 	/* Indicate that one less DKIO write flush is outstanding */
4051 	mutex_enter(&vdc->lock);
4052 	vdc->dkio_flush_pending--;
4053 	ASSERT(vdc->dkio_flush_pending >= 0);
4054 	mutex_exit(&vdc->lock);
4055 
4056 	/* free the mem that was allocated when the callback was dispatched */
4057 	kmem_free(arg, sizeof (vdc_dk_arg_t));
4058 }
4059 
4060 /*
4061  * This structure is used in the DKIO(7I) array below.
4062  */
4063 typedef struct vdc_dk_ioctl {
4064 	uint8_t		op;		/* VD_OP_XXX value */
4065 	int		cmd;		/* Solaris ioctl operation number */
4066 	size_t		nbytes;		/* size of structure to be copied */
4067 
4068 	/* function to convert between vDisk and Solaris structure formats */
4069 	int	(*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg,
4070 	    int mode, int dir);
4071 } vdc_dk_ioctl_t;
4072 
4073 /*
4074  * Subset of DKIO(7I) operations currently supported
4075  */
4076 static vdc_dk_ioctl_t	dk_ioctl[] = {
4077 	{VD_OP_FLUSH,		DKIOCFLUSHWRITECACHE,	sizeof (int),
4078 		vdc_null_copy_func},
4079 	{VD_OP_GET_WCE,		DKIOCGETWCE,		sizeof (int),
4080 		vdc_get_wce_convert},
4081 	{VD_OP_SET_WCE,		DKIOCSETWCE,		sizeof (int),
4082 		vdc_set_wce_convert},
4083 	{VD_OP_GET_VTOC,	DKIOCGVTOC,		sizeof (vd_vtoc_t),
4084 		vdc_get_vtoc_convert},
4085 	{VD_OP_SET_VTOC,	DKIOCSVTOC,		sizeof (vd_vtoc_t),
4086 		vdc_set_vtoc_convert},
4087 	{VD_OP_GET_DISKGEOM,	DKIOCGGEOM,		sizeof (vd_geom_t),
4088 		vdc_get_geom_convert},
4089 	{VD_OP_GET_DISKGEOM,	DKIOCG_PHYGEOM,		sizeof (vd_geom_t),
4090 		vdc_get_geom_convert},
4091 	{VD_OP_GET_DISKGEOM, 	DKIOCG_VIRTGEOM,	sizeof (vd_geom_t),
4092 		vdc_get_geom_convert},
4093 	{VD_OP_SET_DISKGEOM,	DKIOCSGEOM,		sizeof (vd_geom_t),
4094 		vdc_set_geom_convert},
4095 	{VD_OP_GET_EFI,		DKIOCGETEFI,		0,
4096 		vdc_get_efi_convert},
4097 	{VD_OP_SET_EFI,		DKIOCSETEFI,		0,
4098 		vdc_set_efi_convert},
4099 
4100 	/*
4101 	 * These particular ioctls are not sent to the server - vdc fakes up
4102 	 * the necessary info.
4103 	 */
4104 	{0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func},
4105 	{0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func},
4106 	{0, USCSICMD,	sizeof (struct uscsi_cmd), vdc_null_copy_func},
4107 	{0, DKIOCREMOVABLE, 0, vdc_null_copy_func},
4108 	{0, CDROMREADOFFSET, 0, vdc_null_copy_func}
4109 };
4110 
4111 /*
4112  * Function:
4113  *	vd_process_ioctl()
4114  *
4115  * Description:
4116  *	This routine processes disk specific ioctl calls
4117  *
4118  * Arguments:
4119  *	dev	- the device number
4120  *	cmd	- the operation [dkio(7I)] to be processed
4121  *	arg	- pointer to user provided structure
4122  *		  (contains data to be set or reference parameter for get)
4123  *	mode	- bit flag, indicating open settings, 32/64 bit type, etc
4124  *
4125  * Return Code:
4126  *	0
4127  *	EFAULT
4128  *	ENXIO
4129  *	EIO
4130  *	ENOTSUP
4131  */
4132 static int
4133 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode)
4134 {
4135 	int		instance = VDCUNIT(dev);
4136 	vdc_t		*vdc = NULL;
4137 	int		rv = -1;
4138 	int		idx = 0;		/* index into dk_ioctl[] */
4139 	size_t		len = 0;		/* #bytes to send to vds */
4140 	size_t		alloc_len = 0;		/* #bytes to allocate mem for */
4141 	caddr_t		mem_p = NULL;
4142 	size_t		nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0]));
4143 	struct vtoc	vtoc_saved;
4144 	vdc_dk_ioctl_t	*iop;
4145 
4146 	vdc = ddi_get_soft_state(vdc_state, instance);
4147 	if (vdc == NULL) {
4148 		cmn_err(CE_NOTE, "![%d] Could not get soft state structure",
4149 		    instance);
4150 		return (ENXIO);
4151 	}
4152 
4153 	DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n",
4154 		instance, cmd, dev, ddi_model_convert_from(mode & FMODELS));
4155 
4156 	/*
4157 	 * Validate the ioctl operation to be performed.
4158 	 *
4159 	 * If we have looped through the array without finding a match then we
4160 	 * don't support this ioctl.
4161 	 */
4162 	for (idx = 0; idx < nioctls; idx++) {
4163 		if (cmd == dk_ioctl[idx].cmd)
4164 			break;
4165 	}
4166 
4167 	if (idx >= nioctls) {
4168 		DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n",
4169 		    vdc->instance, cmd);
4170 		return (ENOTSUP);
4171 	}
4172 
4173 	iop = &(dk_ioctl[idx]);
4174 
4175 	if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) {
4176 		/* size is not fixed for EFI ioctls, it depends on ioctl arg */
4177 		dk_efi_t	dk_efi;
4178 
4179 		rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode);
4180 		if (rv != 0)
4181 			return (EFAULT);
4182 
4183 		len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length;
4184 	} else {
4185 		len = iop->nbytes;
4186 	}
4187 
4188 	/*
4189 	 * Deal with the ioctls which the server does not provide. vdc can
4190 	 * fake these up and return immediately
4191 	 */
4192 	switch (cmd) {
4193 	case CDROMREADOFFSET:
4194 	case DKIOCREMOVABLE:
4195 	case USCSICMD:
4196 		return (ENOTTY);
4197 
4198 	case DKIOCINFO:
4199 		{
4200 			struct dk_cinfo	cinfo;
4201 			if (vdc->cinfo == NULL)
4202 				return (ENXIO);
4203 
4204 			bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo));
4205 			cinfo.dki_partition = VDCPART(dev);
4206 
4207 			rv = ddi_copyout(&cinfo, (void *)arg,
4208 					sizeof (struct dk_cinfo), mode);
4209 			if (rv != 0)
4210 				return (EFAULT);
4211 
4212 			return (0);
4213 		}
4214 
4215 	case DKIOCGMEDIAINFO:
4216 		{
4217 			if (vdc->minfo == NULL)
4218 				return (ENXIO);
4219 
4220 			rv = ddi_copyout(vdc->minfo, (void *)arg,
4221 					sizeof (struct dk_minfo), mode);
4222 			if (rv != 0)
4223 				return (EFAULT);
4224 
4225 			return (0);
4226 		}
4227 
4228 	case DKIOCFLUSHWRITECACHE:
4229 		{
4230 			struct dk_callback *dkc = (struct dk_callback *)arg;
4231 			vdc_dk_arg_t	*dkarg = NULL;
4232 
4233 			DMSG(vdc, 1, "[%d] Flush W$: mode %x\n",
4234 			    instance, mode);
4235 
4236 			/*
4237 			 * If the backing device is not a 'real' disk then the
4238 			 * W$ operation request to the vDisk server will fail
4239 			 * so we might as well save the cycles and return now.
4240 			 */
4241 			if (vdc->vdisk_type != VD_DISK_TYPE_DISK)
4242 				return (ENOTTY);
4243 
4244 			/*
4245 			 * If arg is NULL, then there is no callback function
4246 			 * registered and the call operates synchronously; we
4247 			 * break and continue with the rest of the function and
4248 			 * wait for vds to return (i.e. after the request to
4249 			 * vds returns successfully, all writes completed prior
4250 			 * to the ioctl will have been flushed from the disk
4251 			 * write cache to persistent media.
4252 			 *
4253 			 * If a callback function is registered, we dispatch
4254 			 * the request on a task queue and return immediately.
4255 			 * The callback will deal with informing the calling
4256 			 * thread that the flush request is completed.
4257 			 */
4258 			if (dkc == NULL)
4259 				break;
4260 
4261 			dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP);
4262 
4263 			dkarg->mode = mode;
4264 			dkarg->dev = dev;
4265 			bcopy(dkc, &dkarg->dkc, sizeof (*dkc));
4266 
4267 			mutex_enter(&vdc->lock);
4268 			vdc->dkio_flush_pending++;
4269 			dkarg->vdc = vdc;
4270 			mutex_exit(&vdc->lock);
4271 
4272 			/* put the request on a task queue */
4273 			rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb,
4274 				(void *)dkarg, DDI_SLEEP);
4275 			if (rv == NULL) {
4276 				/* clean up if dispatch fails */
4277 				mutex_enter(&vdc->lock);
4278 				vdc->dkio_flush_pending--;
4279 				kmem_free(dkarg, sizeof (vdc_dk_arg_t));
4280 			}
4281 
4282 			return (rv == NULL ? ENOMEM : 0);
4283 		}
4284 	}
4285 
4286 	/* catch programming error in vdc - should be a VD_OP_XXX ioctl */
4287 	ASSERT(iop->op != 0);
4288 
4289 	/* LDC requires that the memory being mapped is 8-byte aligned */
4290 	alloc_len = P2ROUNDUP(len, sizeof (uint64_t));
4291 	DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n",
4292 	    instance, len, alloc_len);
4293 
4294 	ASSERT(alloc_len != 0);	/* sanity check */
4295 	mem_p = kmem_zalloc(alloc_len, KM_SLEEP);
4296 
4297 	if (cmd == DKIOCSVTOC) {
4298 		/*
4299 		 * Save a copy of the current VTOC so that we can roll back
4300 		 * if the setting of the new VTOC fails.
4301 		 */
4302 		bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc));
4303 	}
4304 
4305 	/*
4306 	 * Call the conversion function for this ioctl whhich if necessary
4307 	 * converts from the Solaris format to the format ARC'ed
4308 	 * as part of the vDisk protocol (FWARC 2006/195)
4309 	 */
4310 	ASSERT(iop->convert != NULL);
4311 	rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN);
4312 	if (rv != 0) {
4313 		DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n",
4314 				instance, rv, cmd);
4315 		if (mem_p != NULL)
4316 			kmem_free(mem_p, alloc_len);
4317 		return (rv);
4318 	}
4319 
4320 	/*
4321 	 * send request to vds to service the ioctl.
4322 	 */
4323 	rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len,
4324 	    VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode,
4325 	    VIO_both_dir);
4326 
4327 	if (rv != 0) {
4328 		/*
4329 		 * This is not necessarily an error. The ioctl could
4330 		 * be returning a value such as ENOTTY to indicate
4331 		 * that the ioctl is not applicable.
4332 		 */
4333 		DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n",
4334 			instance, rv, cmd);
4335 		if (mem_p != NULL)
4336 			kmem_free(mem_p, alloc_len);
4337 
4338 		if (cmd == DKIOCSVTOC) {
4339 			/* update of the VTOC has failed, roll back */
4340 			bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc));
4341 		}
4342 
4343 		return (rv);
4344 	}
4345 
4346 	if (cmd == DKIOCSVTOC) {
4347 		/*
4348 		 * The VTOC has been changed. We need to update the device
4349 		 * nodes to handle the case where an EFI label has been
4350 		 * changed to a VTOC label. We also try and update the device
4351 		 * node properties. Failing to set the properties should
4352 		 * not cause an error to be return the caller though.
4353 		 */
4354 		vdc->vdisk_label = VD_DISK_LABEL_VTOC;
4355 		(void) vdc_create_device_nodes_vtoc(vdc);
4356 
4357 		if (vdc_create_device_nodes_props(vdc)) {
4358 			DMSG(vdc, 0, "![%d] Failed to update device nodes"
4359 			    " properties", vdc->instance);
4360 		}
4361 
4362 	} else if (cmd == DKIOCSETEFI) {
4363 		/*
4364 		 * The EFI has been changed. We need to update the device
4365 		 * nodes to handle the case where a VTOC label has been
4366 		 * changed to an EFI label. We also try and update the device
4367 		 * node properties. Failing to set the properties should
4368 		 * not cause an error to be return the caller though.
4369 		 */
4370 		struct dk_gpt *efi;
4371 		size_t efi_len;
4372 
4373 		vdc->vdisk_label = VD_DISK_LABEL_EFI;
4374 		(void) vdc_create_device_nodes_efi(vdc);
4375 
4376 		rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len);
4377 
4378 		if (rv == 0) {
4379 			vdc_store_efi(vdc, efi);
4380 			rv = vdc_create_device_nodes_props(vdc);
4381 			vd_efi_free(efi, efi_len);
4382 		}
4383 
4384 		if (rv) {
4385 			DMSG(vdc, 0, "![%d] Failed to update device nodes"
4386 			    " properties", vdc->instance);
4387 		}
4388 	}
4389 
4390 	/*
4391 	 * Call the conversion function (if it exists) for this ioctl
4392 	 * which converts from the format ARC'ed as part of the vDisk
4393 	 * protocol (FWARC 2006/195) back to a format understood by
4394 	 * the rest of Solaris.
4395 	 */
4396 	rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT);
4397 	if (rv != 0) {
4398 		DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n",
4399 				instance, rv, cmd);
4400 		if (mem_p != NULL)
4401 			kmem_free(mem_p, alloc_len);
4402 		return (rv);
4403 	}
4404 
4405 	if (mem_p != NULL)
4406 		kmem_free(mem_p, alloc_len);
4407 
4408 	return (rv);
4409 }
4410 
4411 /*
4412  * Function:
4413  *
4414  * Description:
4415  *	This is an empty conversion function used by ioctl calls which
4416  *	do not need to convert the data being passed in/out to userland
4417  */
4418 static int
4419 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir)
4420 {
4421 	_NOTE(ARGUNUSED(vdc))
4422 	_NOTE(ARGUNUSED(from))
4423 	_NOTE(ARGUNUSED(to))
4424 	_NOTE(ARGUNUSED(mode))
4425 	_NOTE(ARGUNUSED(dir))
4426 
4427 	return (0);
4428 }
4429 
4430 static int
4431 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to,
4432     int mode, int dir)
4433 {
4434 	_NOTE(ARGUNUSED(vdc))
4435 
4436 	if (dir == VD_COPYIN)
4437 		return (0);		/* nothing to do */
4438 
4439 	if (ddi_copyout(from, to, sizeof (int), mode) != 0)
4440 		return (EFAULT);
4441 
4442 	return (0);
4443 }
4444 
4445 static int
4446 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to,
4447     int mode, int dir)
4448 {
4449 	_NOTE(ARGUNUSED(vdc))
4450 
4451 	if (dir == VD_COPYOUT)
4452 		return (0);		/* nothing to do */
4453 
4454 	if (ddi_copyin(from, to, sizeof (int), mode) != 0)
4455 		return (EFAULT);
4456 
4457 	return (0);
4458 }
4459 
4460 /*
4461  * Function:
4462  *	vdc_get_vtoc_convert()
4463  *
4464  * Description:
4465  *	This routine performs the necessary convertions from the DKIOCGVTOC
4466  *	Solaris structure to the format defined in FWARC 2006/195.
4467  *
4468  *	In the struct vtoc definition, the timestamp field is marked as not
4469  *	supported so it is not part of vDisk protocol (FWARC 2006/195).
4470  *	However SVM uses that field to check it can write into the VTOC,
4471  *	so we fake up the info of that field.
4472  *
4473  * Arguments:
4474  *	vdc	- the vDisk client
4475  *	from	- the buffer containing the data to be copied from
4476  *	to	- the buffer to be copied to
4477  *	mode	- flags passed to ioctl() call
4478  *	dir	- the "direction" of the copy - VD_COPYIN or VD_COPYOUT
4479  *
4480  * Return Code:
4481  *	0	- Success
4482  *	ENXIO	- incorrect buffer passed in.
4483  *	EFAULT	- ddi_copyout routine encountered an error.
4484  */
4485 static int
4486 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir)
4487 {
4488 	int		i;
4489 	void		*tmp_mem = NULL;
4490 	void		*tmp_memp;
4491 	struct vtoc	vt;
4492 	struct vtoc32	vt32;
4493 	int		copy_len = 0;
4494 	int		rv = 0;
4495 
4496 	if (dir != VD_COPYOUT)
4497 		return (0);	/* nothing to do */
4498 
4499 	if ((from == NULL) || (to == NULL))
4500 		return (ENXIO);
4501 
4502 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32)
4503 		copy_len = sizeof (struct vtoc32);
4504 	else
4505 		copy_len = sizeof (struct vtoc);
4506 
4507 	tmp_mem = kmem_alloc(copy_len, KM_SLEEP);
4508 
4509 	VD_VTOC2VTOC((vd_vtoc_t *)from, &vt);
4510 
4511 	/* fake the VTOC timestamp field */
4512 	for (i = 0; i < V_NUMPAR; i++) {
4513 		vt.timestamp[i] = vdc->vtoc->timestamp[i];
4514 	}
4515 
4516 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
4517 		vtoctovtoc32(vt, vt32);
4518 		tmp_memp = &vt32;
4519 	} else {
4520 		tmp_memp = &vt;
4521 	}
4522 	rv = ddi_copyout(tmp_memp, to, copy_len, mode);
4523 	if (rv != 0)
4524 		rv = EFAULT;
4525 
4526 	kmem_free(tmp_mem, copy_len);
4527 	return (rv);
4528 }
4529 
4530 /*
4531  * Function:
4532  *	vdc_set_vtoc_convert()
4533  *
4534  * Description:
4535  *	This routine performs the necessary convertions from the DKIOCSVTOC
4536  *	Solaris structure to the format defined in FWARC 2006/195.
4537  *
4538  * Arguments:
4539  *	vdc	- the vDisk client
4540  *	from	- Buffer with data
4541  *	to	- Buffer where data is to be copied to
4542  *	mode	- flags passed to ioctl
4543  *	dir	- direction of copy (in or out)
4544  *
4545  * Return Code:
4546  *	0	- Success
4547  *	ENXIO	- Invalid buffer passed in
4548  *	EFAULT	- ddi_copyin of data failed
4549  */
4550 static int
4551 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir)
4552 {
4553 	void		*tmp_mem = NULL;
4554 	struct vtoc	vt;
4555 	struct vtoc	*vtp = &vt;
4556 	vd_vtoc_t	vtvd;
4557 	int		copy_len = 0;
4558 	int		rv = 0;
4559 
4560 	if (dir != VD_COPYIN)
4561 		return (0);	/* nothing to do */
4562 
4563 	if ((from == NULL) || (to == NULL))
4564 		return (ENXIO);
4565 
4566 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32)
4567 		copy_len = sizeof (struct vtoc32);
4568 	else
4569 		copy_len = sizeof (struct vtoc);
4570 
4571 	tmp_mem = kmem_alloc(copy_len, KM_SLEEP);
4572 
4573 	rv = ddi_copyin(from, tmp_mem, copy_len, mode);
4574 	if (rv != 0) {
4575 		kmem_free(tmp_mem, copy_len);
4576 		return (EFAULT);
4577 	}
4578 
4579 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
4580 		vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt);
4581 	} else {
4582 		vtp = tmp_mem;
4583 	}
4584 
4585 	/*
4586 	 * The VTOC is being changed, then vdc needs to update the copy
4587 	 * it saved in the soft state structure.
4588 	 */
4589 	bcopy(vtp, vdc->vtoc, sizeof (struct vtoc));
4590 
4591 	VTOC2VD_VTOC(vtp, &vtvd);
4592 	bcopy(&vtvd, to, sizeof (vd_vtoc_t));
4593 	kmem_free(tmp_mem, copy_len);
4594 
4595 	return (0);
4596 }
4597 
4598 /*
4599  * Function:
4600  *	vdc_get_geom_convert()
4601  *
4602  * Description:
4603  *	This routine performs the necessary convertions from the DKIOCGGEOM,
4604  *	DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format
4605  *	defined in FWARC 2006/195
4606  *
4607  * Arguments:
4608  *	vdc	- the vDisk client
4609  *	from	- Buffer with data
4610  *	to	- Buffer where data is to be copied to
4611  *	mode	- flags passed to ioctl
4612  *	dir	- direction of copy (in or out)
4613  *
4614  * Return Code:
4615  *	0	- Success
4616  *	ENXIO	- Invalid buffer passed in
4617  *	EFAULT	- ddi_copyout of data failed
4618  */
4619 static int
4620 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir)
4621 {
4622 	_NOTE(ARGUNUSED(vdc))
4623 
4624 	struct dk_geom	geom;
4625 	int	copy_len = sizeof (struct dk_geom);
4626 	int	rv = 0;
4627 
4628 	if (dir != VD_COPYOUT)
4629 		return (0);	/* nothing to do */
4630 
4631 	if ((from == NULL) || (to == NULL))
4632 		return (ENXIO);
4633 
4634 	VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom);
4635 	rv = ddi_copyout(&geom, to, copy_len, mode);
4636 	if (rv != 0)
4637 		rv = EFAULT;
4638 
4639 	return (rv);
4640 }
4641 
4642 /*
4643  * Function:
4644  *	vdc_set_geom_convert()
4645  *
4646  * Description:
4647  *	This routine performs the necessary convertions from the DKIOCSGEOM
4648  *	Solaris structure to the format defined in FWARC 2006/195.
4649  *
4650  * Arguments:
4651  *	vdc	- the vDisk client
4652  *	from	- Buffer with data
4653  *	to	- Buffer where data is to be copied to
4654  *	mode	- flags passed to ioctl
4655  *	dir	- direction of copy (in or out)
4656  *
4657  * Return Code:
4658  *	0	- Success
4659  *	ENXIO	- Invalid buffer passed in
4660  *	EFAULT	- ddi_copyin of data failed
4661  */
4662 static int
4663 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir)
4664 {
4665 	_NOTE(ARGUNUSED(vdc))
4666 
4667 	vd_geom_t	vdgeom;
4668 	void		*tmp_mem = NULL;
4669 	int		copy_len = sizeof (struct dk_geom);
4670 	int		rv = 0;
4671 
4672 	if (dir != VD_COPYIN)
4673 		return (0);	/* nothing to do */
4674 
4675 	if ((from == NULL) || (to == NULL))
4676 		return (ENXIO);
4677 
4678 	tmp_mem = kmem_alloc(copy_len, KM_SLEEP);
4679 
4680 	rv = ddi_copyin(from, tmp_mem, copy_len, mode);
4681 	if (rv != 0) {
4682 		kmem_free(tmp_mem, copy_len);
4683 		return (EFAULT);
4684 	}
4685 	DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom);
4686 	bcopy(&vdgeom, to, sizeof (vdgeom));
4687 	kmem_free(tmp_mem, copy_len);
4688 
4689 	return (0);
4690 }
4691 
4692 static int
4693 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir)
4694 {
4695 	_NOTE(ARGUNUSED(vdc))
4696 
4697 	vd_efi_t	*vd_efi;
4698 	dk_efi_t	dk_efi;
4699 	int		rv = 0;
4700 	void		*uaddr;
4701 
4702 	if ((from == NULL) || (to == NULL))
4703 		return (ENXIO);
4704 
4705 	if (dir == VD_COPYIN) {
4706 
4707 		vd_efi = (vd_efi_t *)to;
4708 
4709 		rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode);
4710 		if (rv != 0)
4711 			return (EFAULT);
4712 
4713 		vd_efi->lba = dk_efi.dki_lba;
4714 		vd_efi->length = dk_efi.dki_length;
4715 		bzero(vd_efi->data, vd_efi->length);
4716 
4717 	} else {
4718 
4719 		rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode);
4720 		if (rv != 0)
4721 			return (EFAULT);
4722 
4723 		uaddr = dk_efi.dki_data;
4724 
4725 		dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP);
4726 
4727 		VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi);
4728 
4729 		rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length,
4730 		    mode);
4731 		if (rv != 0)
4732 			return (EFAULT);
4733 
4734 		kmem_free(dk_efi.dki_data, dk_efi.dki_length);
4735 	}
4736 
4737 	return (0);
4738 }
4739 
4740 static int
4741 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir)
4742 {
4743 	_NOTE(ARGUNUSED(vdc))
4744 
4745 	dk_efi_t	dk_efi;
4746 	void		*uaddr;
4747 
4748 	if (dir == VD_COPYOUT)
4749 		return (0);	/* nothing to do */
4750 
4751 	if ((from == NULL) || (to == NULL))
4752 		return (ENXIO);
4753 
4754 	if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0)
4755 		return (EFAULT);
4756 
4757 	uaddr = dk_efi.dki_data;
4758 
4759 	dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP);
4760 
4761 	if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0)
4762 		return (EFAULT);
4763 
4764 	DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to);
4765 
4766 	kmem_free(dk_efi.dki_data, dk_efi.dki_length);
4767 
4768 	return (0);
4769 }
4770 
4771 /*
4772  * Function:
4773  *	vdc_create_fake_geometry()
4774  *
4775  * Description:
4776  *	This routine fakes up the disk info needed for some DKIO ioctls.
4777  *		- DKIOCINFO
4778  *		- DKIOCGMEDIAINFO
4779  *
4780  *	[ just like lofi(7D) and ramdisk(7D) ]
4781  *
4782  * Arguments:
4783  *	vdc	- soft state pointer for this instance of the device driver.
4784  *
4785  * Return Code:
4786  *	0	- Success
4787  */
4788 static int
4789 vdc_create_fake_geometry(vdc_t *vdc)
4790 {
4791 	ASSERT(vdc != NULL);
4792 
4793 	/*
4794 	 * Check if max_xfer_sz and vdisk_size are valid
4795 	 */
4796 	if (vdc->vdisk_size == 0 || vdc->max_xfer_sz == 0)
4797 		return (EIO);
4798 
4799 	/*
4800 	 * DKIOCINFO support
4801 	 */
4802 	vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
4803 
4804 	(void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME);
4805 	(void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME);
4806 	/* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */
4807 	vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz;
4808 	vdc->cinfo->dki_ctype = DKC_SCSI_CCS;
4809 	vdc->cinfo->dki_flags = DKI_FMTVOL;
4810 	vdc->cinfo->dki_cnum = 0;
4811 	vdc->cinfo->dki_addr = 0;
4812 	vdc->cinfo->dki_space = 0;
4813 	vdc->cinfo->dki_prio = 0;
4814 	vdc->cinfo->dki_vec = 0;
4815 	vdc->cinfo->dki_unit = vdc->instance;
4816 	vdc->cinfo->dki_slave = 0;
4817 	/*
4818 	 * The partition number will be created on the fly depending on the
4819 	 * actual slice (i.e. minor node) that is used to request the data.
4820 	 */
4821 	vdc->cinfo->dki_partition = 0;
4822 
4823 	/*
4824 	 * DKIOCGMEDIAINFO support
4825 	 */
4826 	if (vdc->minfo == NULL)
4827 		vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP);
4828 	vdc->minfo->dki_media_type = DK_FIXED_DISK;
4829 	vdc->minfo->dki_capacity = vdc->vdisk_size;
4830 	vdc->minfo->dki_lbsize = DEV_BSIZE;
4831 
4832 	return (0);
4833 }
4834 
4835 /*
4836  * Function:
4837  *	vdc_setup_disk_layout()
4838  *
4839  * Description:
4840  *	This routine discovers all the necessary details about the "disk"
4841  *	by requesting the data that is available from the vDisk server and by
4842  *	faking up the rest of the data.
4843  *
4844  * Arguments:
4845  *	vdc	- soft state pointer for this instance of the device driver.
4846  *
4847  * Return Code:
4848  *	0	- Success
4849  */
4850 static int
4851 vdc_setup_disk_layout(vdc_t *vdc)
4852 {
4853 	buf_t	*buf;	/* BREAD requests need to be in a buf_t structure */
4854 	dev_t	dev;
4855 	int	slice = 0;
4856 	int	rv, error;
4857 
4858 	ASSERT(vdc != NULL);
4859 
4860 	if (vdc->vtoc == NULL)
4861 		vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP);
4862 
4863 	dev = makedevice(ddi_driver_major(vdc->dip),
4864 				VD_MAKE_DEV(vdc->instance, 0));
4865 	rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL);
4866 
4867 	if (rv && rv != ENOTSUP) {
4868 		DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)",
4869 				vdc->instance, rv);
4870 		return (rv);
4871 	}
4872 
4873 	/*
4874 	 * The process of attempting to read VTOC will initiate
4875 	 * the handshake and establish a connection. Following
4876 	 * handshake, go ahead and create geometry.
4877 	 */
4878 	error = vdc_create_fake_geometry(vdc);
4879 	if (error != 0) {
4880 		DMSG(vdc, 0, "[%d] Failed to create disk geometry (err%d)",
4881 		    vdc->instance, error);
4882 		return (error);
4883 	}
4884 
4885 	if (rv == ENOTSUP) {
4886 		/*
4887 		 * If the device does not support VTOC then we try
4888 		 * to read an EFI label.
4889 		 */
4890 		struct dk_gpt *efi;
4891 		size_t efi_len;
4892 
4893 		rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len);
4894 
4895 		if (rv) {
4896 			DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)",
4897 			    vdc->instance, rv);
4898 			return (rv);
4899 		}
4900 
4901 		vdc->vdisk_label = VD_DISK_LABEL_EFI;
4902 		vdc_store_efi(vdc, efi);
4903 		vd_efi_free(efi, efi_len);
4904 
4905 		return (0);
4906 	}
4907 
4908 	vdc->vdisk_label = VD_DISK_LABEL_VTOC;
4909 
4910 	/*
4911 	 * FUTURE: This could be default way for reading the VTOC
4912 	 * from the disk as supposed to sending the VD_OP_GET_VTOC
4913 	 * to the server. Currently this is a sanity check.
4914 	 *
4915 	 * find the slice that represents the entire "disk" and use that to
4916 	 * read the disk label. The convention in Solaris is that slice 2
4917 	 * represents the whole disk so we check that it is, otherwise we
4918 	 * default to slice 0
4919 	 */
4920 	if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) &&
4921 	    (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) {
4922 		slice = 2;
4923 	} else {
4924 		slice = 0;
4925 	}
4926 
4927 	/*
4928 	 * Read disk label from start of disk
4929 	 */
4930 	vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP);
4931 	buf = kmem_alloc(sizeof (buf_t), KM_SLEEP);
4932 	bioinit(buf);
4933 	buf->b_un.b_addr = (caddr_t)vdc->label;
4934 	buf->b_bcount = DK_LABEL_SIZE;
4935 	buf->b_flags = B_BUSY | B_READ;
4936 	buf->b_dev = dev;
4937 	rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)vdc->label,
4938 	    DK_LABEL_SIZE, slice, 0, CB_STRATEGY, buf, VIO_read_dir);
4939 	if (rv) {
4940 		DMSG(vdc, 1, "[%d] Failed to read disk block 0\n",
4941 		    vdc->instance);
4942 		kmem_free(buf, sizeof (buf_t));
4943 		return (rv);
4944 	}
4945 	rv = biowait(buf);
4946 	biofini(buf);
4947 	kmem_free(buf, sizeof (buf_t));
4948 
4949 	return (rv);
4950 }
4951 
4952 /*
4953  * Function:
4954  *	vdc_setup_devid()
4955  *
4956  * Description:
4957  *	This routine discovers the devid of a vDisk. It requests the devid of
4958  *	the underlying device from the vDisk server, builds an encapsulated
4959  *	devid based on the retrieved devid and registers that new devid to
4960  *	the vDisk.
4961  *
4962  * Arguments:
4963  *	vdc	- soft state pointer for this instance of the device driver.
4964  *
4965  * Return Code:
4966  *	0	- A devid was succesfully registered for the vDisk
4967  */
4968 static int
4969 vdc_setup_devid(vdc_t *vdc)
4970 {
4971 	int rv;
4972 	vd_devid_t *vd_devid;
4973 	size_t bufsize, bufid_len;
4974 
4975 	/*
4976 	 * At first sight, we don't know the size of the devid that the
4977 	 * server will return but this size will be encoded into the
4978 	 * reply. So we do a first request using a default size then we
4979 	 * check if this size was large enough. If not then we do a second
4980 	 * request with the correct size returned by the server. Note that
4981 	 * ldc requires size to be 8-byte aligned.
4982 	 */
4983 	bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN),
4984 	    sizeof (uint64_t));
4985 	vd_devid = kmem_zalloc(bufsize, KM_SLEEP);
4986 	bufid_len = bufsize - sizeof (vd_efi_t) - 1;
4987 
4988 	rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid,
4989 	    bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir);
4990 
4991 	DMSG(vdc, 2, "sync_op returned %d\n", rv);
4992 
4993 	if (rv) {
4994 		kmem_free(vd_devid, bufsize);
4995 		return (rv);
4996 	}
4997 
4998 	if (vd_devid->length > bufid_len) {
4999 		/*
5000 		 * The returned devid is larger than the buffer used. Try again
5001 		 * with a buffer with the right size.
5002 		 */
5003 		kmem_free(vd_devid, bufsize);
5004 		bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length),
5005 		    sizeof (uint64_t));
5006 		vd_devid = kmem_zalloc(bufsize, KM_SLEEP);
5007 		bufid_len = bufsize - sizeof (vd_efi_t) - 1;
5008 
5009 		rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID,
5010 		    (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0,
5011 		    VIO_both_dir);
5012 
5013 		if (rv) {
5014 			kmem_free(vd_devid, bufsize);
5015 			return (rv);
5016 		}
5017 	}
5018 
5019 	/*
5020 	 * The virtual disk should have the same device id as the one associated
5021 	 * with the physical disk it is mapped on, otherwise sharing a disk
5022 	 * between a LDom and a non-LDom may not work (for example for a shared
5023 	 * SVM disk set).
5024 	 *
5025 	 * The DDI framework does not allow creating a device id with any
5026 	 * type so we first create a device id of type DEVID_ENCAP and then
5027 	 * we restore the orignal type of the physical device.
5028 	 */
5029 
5030 	DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length);
5031 
5032 	/* build an encapsulated devid based on the returned devid */
5033 	if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length,
5034 		vd_devid->id, &vdc->devid) != DDI_SUCCESS) {
5035 		DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance);
5036 		kmem_free(vd_devid, bufsize);
5037 		return (1);
5038 	}
5039 
5040 	DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type);
5041 
5042 	ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS);
5043 
5044 	kmem_free(vd_devid, bufsize);
5045 
5046 	if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) {
5047 		DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance);
5048 		return (1);
5049 	}
5050 
5051 	return (0);
5052 }
5053 
5054 static void
5055 vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi)
5056 {
5057 	struct vtoc *vtoc = vdc->vtoc;
5058 
5059 	vd_efi_to_vtoc(efi, vtoc);
5060 	if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) {
5061 		/*
5062 		 * vd_efi_to_vtoc() will store information about the EFI Sun
5063 		 * reserved partition (representing the entire disk) into
5064 		 * partition 7. However single-slice device will only have
5065 		 * that single partition and the vdc driver expects to find
5066 		 * information about that partition in slice 0. So we need
5067 		 * to copy information from slice 7 to slice 0.
5068 		 */
5069 		vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag;
5070 		vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag;
5071 		vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start;
5072 		vtoc->v_part[0].p_size =  vtoc->v_part[VD_EFI_WD_SLICE].p_size;
5073 	}
5074 }
5075