xref: /illumos-gate/usr/src/uts/sun4v/io/vdc.c (revision 5d0bc3ededb82d77f7c33d8f58e517a837ba5140)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * LDoms virtual disk client (vdc) device driver
31  *
32  * This driver runs on a guest logical domain and communicates with the virtual
33  * disk server (vds) driver running on the service domain which is exporting
34  * virtualized "disks" to the guest logical domain.
35  *
36  * The driver can be divided into four sections:
37  *
38  * 1) generic device driver housekeeping
39  *	_init, _fini, attach, detach, ops structures, etc.
40  *
41  * 2) communication channel setup
42  *	Setup the communications link over the LDC channel that vdc uses to
43  *	talk to the vDisk server. Initialise the descriptor ring which
44  *	allows the LDC clients to transfer data via memory mappings.
45  *
46  * 3) Support exported to upper layers (filesystems, etc)
47  *	The upper layers call into vdc via strategy(9E) and DKIO(7I)
48  *	ioctl calls. vdc will copy the data to be written to the descriptor
49  *	ring or maps the buffer to store the data read by the vDisk
50  *	server into the descriptor ring. It then sends a message to the
51  *	vDisk server requesting it to complete the operation.
52  *
53  * 4) Handling responses from vDisk server.
54  *	The vDisk server will ACK some or all of the messages vdc sends to it
55  *	(this is configured during the handshake). Upon receipt of an ACK
56  *	vdc will check the descriptor ring and signal to the upper layer
57  *	code waiting on the IO.
58  */
59 
60 #include <sys/conf.h>
61 #include <sys/disp.h>
62 #include <sys/ddi.h>
63 #include <sys/dkio.h>
64 #include <sys/efi_partition.h>
65 #include <sys/fcntl.h>
66 #include <sys/file.h>
67 #include <sys/mach_descrip.h>
68 #include <sys/modctl.h>
69 #include <sys/mdeg.h>
70 #include <sys/note.h>
71 #include <sys/open.h>
72 #include <sys/stat.h>
73 #include <sys/sunddi.h>
74 #include <sys/types.h>
75 #include <sys/promif.h>
76 #include <sys/vtoc.h>
77 #include <sys/archsystm.h>
78 #include <sys/sysmacros.h>
79 
80 #include <sys/cdio.h>
81 #include <sys/dktp/cm.h>
82 #include <sys/dktp/fdisk.h>
83 #include <sys/scsi/generic/sense.h>
84 #include <sys/scsi/impl/uscsi.h>	/* Needed for defn of USCSICMD ioctl */
85 #include <sys/scsi/targets/sddef.h>
86 
87 #include <sys/ldoms.h>
88 #include <sys/ldc.h>
89 #include <sys/vio_common.h>
90 #include <sys/vio_mailbox.h>
91 #include <sys/vdsk_common.h>
92 #include <sys/vdsk_mailbox.h>
93 #include <sys/vdc.h>
94 
95 /*
96  * function prototypes
97  */
98 
99 /* standard driver functions */
100 static int	vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred);
101 static int	vdc_close(dev_t dev, int flag, int otyp, cred_t *cred);
102 static int	vdc_strategy(struct buf *buf);
103 static int	vdc_print(dev_t dev, char *str);
104 static int	vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
105 static int	vdc_read(dev_t dev, struct uio *uio, cred_t *cred);
106 static int	vdc_write(dev_t dev, struct uio *uio, cred_t *cred);
107 static int	vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
108 			cred_t *credp, int *rvalp);
109 static int	vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred);
110 static int	vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred);
111 
112 static int	vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,
113 			void *arg, void **resultp);
114 static int	vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
115 static int	vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
116 
117 /* setup */
118 static int	vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen);
119 static int	vdc_do_ldc_init(vdc_t *vdc);
120 static int	vdc_start_ldc_connection(vdc_t *vdc);
121 static int	vdc_create_device_nodes(vdc_t *vdc);
122 static int	vdc_create_device_nodes_props(vdc_t *vdc);
123 static int	vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id);
124 static int	vdc_do_ldc_up(vdc_t *vdc);
125 static void	vdc_terminate_ldc(vdc_t *vdc);
126 static int	vdc_init_descriptor_ring(vdc_t *vdc);
127 static void	vdc_destroy_descriptor_ring(vdc_t *vdc);
128 
129 /* handshake with vds */
130 static void		vdc_init_handshake_negotiation(void *arg);
131 static int		vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver);
132 static int		vdc_init_attr_negotiation(vdc_t *vdc);
133 static int		vdc_init_dring_negotiate(vdc_t *vdc);
134 static void		vdc_reset_connection(vdc_t *vdc, boolean_t resetldc);
135 static boolean_t	vdc_is_able_to_tx_data(vdc_t *vdc, int flag);
136 static boolean_t	vdc_is_supported_version(vio_ver_msg_t *ver_msg);
137 
138 /* processing incoming messages from vDisk server */
139 static void	vdc_process_msg_thread(vdc_t *vdc);
140 static void	vdc_process_msg(void *arg);
141 static void	vdc_do_process_msg(vdc_t *vdc);
142 static uint_t	vdc_handle_cb(uint64_t event, caddr_t arg);
143 static int	vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg);
144 static int	vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg);
145 static int	vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg);
146 static int	vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg);
147 static int	vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg);
148 static int	vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg);
149 static int	vdc_get_next_dring_entry_id(vdc_t *vdc, uint_t needed);
150 static int	vdc_populate_descriptor(vdc_t *vdc, caddr_t addr,
151 			size_t nbytes, int op, uint64_t arg, uint64_t slice);
152 static int	vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx,
153 			vio_dring_msg_t dmsg);
154 static int	vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx);
155 static int	vdc_get_response(vdc_t *vdc, int start, int end);
156 static int	vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx,
157 			caddr_t addr, size_t nbytes, int operation);
158 static boolean_t vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int
159 			num_msgs);
160 
161 /* dkio */
162 static int	vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode);
163 static int	vdc_create_fake_geometry(vdc_t *vdc);
164 static int	vdc_setup_disk_layout(vdc_t *vdc);
165 static int	vdc_null_copy_func(void *from, void *to, int mode, int dir);
166 static int	vdc_get_vtoc_convert(void *from, void *to, int mode, int dir);
167 static int	vdc_set_vtoc_convert(void *from, void *to, int mode, int dir);
168 static int	vdc_get_geom_convert(void *from, void *to, int mode, int dir);
169 static int	vdc_set_geom_convert(void *from, void *to, int mode, int dir);
170 static int	vdc_uscsicmd_convert(void *from, void *to, int mode, int dir);
171 
172 /*
173  * Module variables
174  */
175 uint64_t	vdc_hz_timeout;
176 uint64_t	vdc_usec_timeout = VDC_USEC_TIMEOUT_MIN;
177 uint64_t	vdc_dump_usec_timeout = VDC_USEC_TIMEOUT_MIN / 300;
178 static int	vdc_retries = VDC_RETRIES;
179 static int	vdc_dump_retries = VDC_RETRIES * 10;
180 
181 /* Soft state pointer */
182 static void	*vdc_state;
183 
184 /* variable level controlling the verbosity of the error/debug messages */
185 int	vdc_msglevel = 0;
186 
187 /*
188  * Supported vDisk protocol version pairs.
189  *
190  * The first array entry is the latest and preferred version.
191  */
192 static const vio_ver_t	vdc_version[] = {{1, 0}};
193 
194 static void
195 vdc_msg(const char *format, ...)
196 {
197 	va_list	args;
198 
199 	va_start(args, format);
200 	vcmn_err(CE_CONT, format, args);
201 	va_end(args);
202 }
203 
204 static struct cb_ops vdc_cb_ops = {
205 	vdc_open,	/* cb_open */
206 	vdc_close,	/* cb_close */
207 	vdc_strategy,	/* cb_strategy */
208 	vdc_print,	/* cb_print */
209 	vdc_dump,	/* cb_dump */
210 	vdc_read,	/* cb_read */
211 	vdc_write,	/* cb_write */
212 	vdc_ioctl,	/* cb_ioctl */
213 	nodev,		/* cb_devmap */
214 	nodev,		/* cb_mmap */
215 	nodev,		/* cb_segmap */
216 	nochpoll,	/* cb_chpoll */
217 	ddi_prop_op,	/* cb_prop_op */
218 	NULL,		/* cb_str */
219 	D_MP | D_64BIT,	/* cb_flag */
220 	CB_REV,		/* cb_rev */
221 	vdc_aread,	/* cb_aread */
222 	vdc_awrite	/* cb_awrite */
223 };
224 
225 static struct dev_ops vdc_ops = {
226 	DEVO_REV,	/* devo_rev */
227 	0,		/* devo_refcnt */
228 	vdc_getinfo,	/* devo_getinfo */
229 	nulldev,	/* devo_identify */
230 	nulldev,	/* devo_probe */
231 	vdc_attach,	/* devo_attach */
232 	vdc_detach,	/* devo_detach */
233 	nodev,		/* devo_reset */
234 	&vdc_cb_ops,	/* devo_cb_ops */
235 	NULL,		/* devo_bus_ops */
236 	nulldev		/* devo_power */
237 };
238 
239 static struct modldrv modldrv = {
240 	&mod_driverops,
241 	"virtual disk client %I%",
242 	&vdc_ops,
243 };
244 
245 static struct modlinkage modlinkage = {
246 	MODREV_1,
247 	&modldrv,
248 	NULL
249 };
250 
251 /* -------------------------------------------------------------------------- */
252 
253 /*
254  * Device Driver housekeeping and setup
255  */
256 
257 int
258 _init(void)
259 {
260 	int	status;
261 
262 	if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0)
263 		return (status);
264 	if ((status = mod_install(&modlinkage)) != 0)
265 		ddi_soft_state_fini(&vdc_state);
266 	return (status);
267 }
268 
269 int
270 _info(struct modinfo *modinfop)
271 {
272 	return (mod_info(&modlinkage, modinfop));
273 }
274 
275 int
276 _fini(void)
277 {
278 	int	status;
279 
280 	if ((status = mod_remove(&modlinkage)) != 0)
281 		return (status);
282 	ddi_soft_state_fini(&vdc_state);
283 	return (0);
284 }
285 
286 static int
287 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd,  void *arg, void **resultp)
288 {
289 	_NOTE(ARGUNUSED(dip))
290 
291 	int	instance = SDUNIT(getminor((dev_t)arg));
292 	vdc_t	*vdc = NULL;
293 
294 	switch (cmd) {
295 	case DDI_INFO_DEVT2DEVINFO:
296 		if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
297 			*resultp = NULL;
298 			return (DDI_FAILURE);
299 		}
300 		*resultp = vdc->dip;
301 		return (DDI_SUCCESS);
302 	case DDI_INFO_DEVT2INSTANCE:
303 		*resultp = (void *)(uintptr_t)instance;
304 		return (DDI_SUCCESS);
305 	default:
306 		*resultp = NULL;
307 		return (DDI_FAILURE);
308 	}
309 }
310 
311 static int
312 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
313 {
314 	int	instance;
315 	int	rv;
316 	uint_t	retries = 0;
317 	vdc_t	*vdc = NULL;
318 
319 	switch (cmd) {
320 	case DDI_DETACH:
321 		/* the real work happens below */
322 		break;
323 	case DDI_SUSPEND:
324 		/* nothing to do for this non-device */
325 		return (DDI_SUCCESS);
326 	default:
327 		return (DDI_FAILURE);
328 	}
329 
330 	ASSERT(cmd == DDI_DETACH);
331 	instance = ddi_get_instance(dip);
332 	PR1("%s[%d] Entered\n", __func__, instance);
333 
334 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
335 		vdc_msg("%s[%d]:  Could not get state structure.",
336 		    __func__, instance);
337 		return (DDI_FAILURE);
338 	}
339 
340 	if (vdc->open) {
341 		PR0("%s[%d]: Cannot detach: device is open",
342 				__func__, instance);
343 		return (DDI_FAILURE);
344 	}
345 
346 	PR0("%s[%d] proceeding...\n", __func__, instance);
347 
348 	/*
349 	 * try and disable callbacks to prevent another handshake
350 	 */
351 	rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE);
352 	PR0("%s[%d] callback disabled (rv=%d)\n", __func__, instance, rv);
353 
354 	/*
355 	 * Prevent any more attempts to start a handshake with the vdisk
356 	 * server and tear down the existing connection.
357 	 */
358 	mutex_enter(&vdc->lock);
359 	vdc->initialized |= VDC_HANDSHAKE_STOP;
360 	vdc_reset_connection(vdc, B_TRUE);
361 	mutex_exit(&vdc->lock);
362 
363 	if (vdc->initialized & VDC_THREAD) {
364 		mutex_enter(&vdc->msg_proc_lock);
365 		vdc->msg_proc_thr_state = VDC_THR_STOP;
366 		vdc->msg_pending = B_TRUE;
367 		cv_signal(&vdc->msg_proc_cv);
368 
369 		while (vdc->msg_proc_thr_state != VDC_THR_DONE) {
370 			PR0("%s[%d]: Waiting for thread to exit\n",
371 				__func__, instance);
372 			rv = cv_timedwait(&vdc->msg_proc_cv,
373 				&vdc->msg_proc_lock, VD_GET_TIMEOUT_HZ(1));
374 			if ((rv == -1) && (retries++ > vdc_retries))
375 				break;
376 		}
377 		mutex_exit(&vdc->msg_proc_lock);
378 	}
379 
380 	mutex_enter(&vdc->lock);
381 
382 	if (vdc->initialized & VDC_DRING)
383 		vdc_destroy_descriptor_ring(vdc);
384 
385 	if (vdc->initialized & VDC_LDC)
386 		vdc_terminate_ldc(vdc);
387 
388 	mutex_exit(&vdc->lock);
389 
390 	if (vdc->initialized & VDC_MINOR) {
391 		ddi_prop_remove_all(dip);
392 		ddi_remove_minor_node(dip, NULL);
393 	}
394 
395 	if (vdc->initialized & VDC_LOCKS) {
396 		mutex_destroy(&vdc->lock);
397 		mutex_destroy(&vdc->attach_lock);
398 		mutex_destroy(&vdc->msg_proc_lock);
399 		mutex_destroy(&vdc->dring_lock);
400 		cv_destroy(&vdc->cv);
401 		cv_destroy(&vdc->attach_cv);
402 		cv_destroy(&vdc->msg_proc_cv);
403 	}
404 
405 	if (vdc->minfo)
406 		kmem_free(vdc->minfo, sizeof (struct dk_minfo));
407 
408 	if (vdc->cinfo)
409 		kmem_free(vdc->cinfo, sizeof (struct dk_cinfo));
410 
411 	if (vdc->vtoc)
412 		kmem_free(vdc->vtoc, sizeof (struct vtoc));
413 
414 	if (vdc->label)
415 		kmem_free(vdc->label, DK_LABEL_SIZE);
416 
417 	if (vdc->initialized & VDC_SOFT_STATE)
418 		ddi_soft_state_free(vdc_state, instance);
419 
420 	PR0("%s[%d] End %p\n", __func__, instance, vdc);
421 
422 	return (DDI_SUCCESS);
423 }
424 
425 
426 static int
427 vdc_do_attach(dev_info_t *dip)
428 {
429 	int		instance;
430 	vdc_t		*vdc = NULL;
431 	int		status;
432 	uint_t		retries = 0;
433 
434 	ASSERT(dip != NULL);
435 
436 	instance = ddi_get_instance(dip);
437 	if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) {
438 		vdc_msg("%s:(%d): Couldn't alloc state structure",
439 		    __func__, instance);
440 		return (DDI_FAILURE);
441 	}
442 
443 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
444 		vdc_msg("%s:(%d): Could not get state structure.",
445 		    __func__, instance);
446 		return (DDI_FAILURE);
447 	}
448 
449 	/*
450 	 * We assign the value to initialized in this case to zero out the
451 	 * variable and then set bits in it to indicate what has been done
452 	 */
453 	vdc->initialized = VDC_SOFT_STATE;
454 
455 	vdc_hz_timeout = drv_usectohz(vdc_usec_timeout);
456 
457 	vdc->dip	= dip;
458 	vdc->instance	= instance;
459 	vdc->open	= 0;
460 	vdc->vdisk_type	= VD_DISK_TYPE_UNK;
461 	vdc->state	= VD_STATE_INIT;
462 	vdc->ldc_state	= 0;
463 	vdc->session_id = 0;
464 	vdc->block_size = DEV_BSIZE;
465 	vdc->max_xfer_sz = maxphys / DEV_BSIZE;
466 
467 	vdc->vtoc = NULL;
468 	vdc->cinfo = NULL;
469 	vdc->minfo = NULL;
470 
471 	mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL);
472 	mutex_init(&vdc->attach_lock, NULL, MUTEX_DRIVER, NULL);
473 	mutex_init(&vdc->msg_proc_lock, NULL, MUTEX_DRIVER, NULL);
474 	mutex_init(&vdc->dring_lock, NULL, MUTEX_DRIVER, NULL);
475 	cv_init(&vdc->cv, NULL, CV_DRIVER, NULL);
476 	cv_init(&vdc->attach_cv, NULL, CV_DRIVER, NULL);
477 	cv_init(&vdc->msg_proc_cv, NULL, CV_DRIVER, NULL);
478 	vdc->initialized |= VDC_LOCKS;
479 
480 	vdc->msg_pending = B_FALSE;
481 	vdc->msg_proc_thr_id = thread_create(NULL, 0, vdc_process_msg_thread,
482 		vdc, 0, &p0, TS_RUN, minclsyspri);
483 	if (vdc->msg_proc_thr_id == NULL) {
484 		cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread",
485 				instance);
486 		return (DDI_FAILURE);
487 	}
488 	vdc->initialized |= VDC_THREAD;
489 
490 	/* initialise LDC channel which will be used to communicate with vds */
491 	if (vdc_do_ldc_init(vdc) != 0) {
492 		cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance);
493 		return (DDI_FAILURE);
494 	}
495 
496 	/* Bring up connection with vds via LDC */
497 	status = vdc_start_ldc_connection(vdc);
498 	if (status != 0) {
499 		vdc_msg("%s[%d]  Could not start LDC", __func__, instance);
500 		return (DDI_FAILURE);
501 	}
502 
503 	/*
504 	 * We need to wait until the handshake has completed before leaving
505 	 * the attach(). This is to allow the device node(s) to be created
506 	 * and the first usage of the filesystem to succeed.
507 	 */
508 	mutex_enter(&vdc->attach_lock);
509 	while ((vdc->ldc_state != LDC_UP) ||
510 		(vdc->state != VD_STATE_DATA)) {
511 
512 		PR0("%s[%d] handshake in progress [VD %d (LDC %d)]\n",
513 			__func__, instance, vdc->state, vdc->ldc_state);
514 
515 		status = cv_timedwait(&vdc->attach_cv, &vdc->attach_lock,
516 				VD_GET_TIMEOUT_HZ(1));
517 		if (status == -1) {
518 			if (retries >= vdc_retries) {
519 				PR0("%s[%d] Give up handshake wait.\n",
520 						__func__, instance);
521 				mutex_exit(&vdc->attach_lock);
522 				return (DDI_FAILURE);
523 			} else {
524 				PR0("%s[%d] Retry #%d for handshake.\n",
525 						__func__, instance, retries);
526 				vdc_init_handshake_negotiation(vdc);
527 				retries++;
528 			}
529 		}
530 	}
531 	mutex_exit(&vdc->attach_lock);
532 
533 	/*
534 	 * Once the handshake is complete, we can use the DRing to send
535 	 * requests to the vDisk server to calculate the geometry and
536 	 * VTOC of the "disk"
537 	 */
538 	status = vdc_setup_disk_layout(vdc);
539 	if (status != 0) {
540 		cmn_err(CE_NOTE, "[%d] Failed to discover disk layout (err%d)",
541 				vdc->instance, status);
542 	}
543 
544 	/*
545 	 * Now that we have the device info we can create the
546 	 * device nodes and properties
547 	 */
548 	status = vdc_create_device_nodes(vdc);
549 	if (status) {
550 		cmn_err(CE_NOTE, "[%d] Failed to create device nodes",
551 				instance);
552 		return (status);
553 	}
554 	status = vdc_create_device_nodes_props(vdc);
555 	if (status) {
556 		cmn_err(CE_NOTE, "[%d] Failed to create device nodes"
557 				" properties (%d)", instance, status);
558 		return (status);
559 	}
560 
561 	ddi_report_dev(dip);
562 
563 	PR0("%s[%d] Attach completed\n", __func__, instance);
564 	return (status);
565 }
566 
567 static int
568 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
569 {
570 	int	status;
571 
572 	switch (cmd) {
573 	case DDI_ATTACH:
574 		if ((status = vdc_do_attach(dip)) != 0)
575 			(void) vdc_detach(dip, DDI_DETACH);
576 		return (status);
577 	case DDI_RESUME:
578 		/* nothing to do for this non-device */
579 		return (DDI_SUCCESS);
580 	default:
581 		return (DDI_FAILURE);
582 	}
583 }
584 
585 static int
586 vdc_do_ldc_init(vdc_t *vdc)
587 {
588 	int			status = 0;
589 	ldc_status_t		ldc_state;
590 	ldc_attr_t		ldc_attr;
591 	uint64_t		ldc_id = 0;
592 	dev_info_t		*dip = NULL;
593 
594 	ASSERT(vdc != NULL);
595 
596 	dip = vdc->dip;
597 	vdc->initialized |= VDC_LDC;
598 
599 	if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) {
600 		vdc_msg("%s:  Failed to get <ldc_id> property\n", __func__);
601 		return (EIO);
602 	}
603 	vdc->ldc_id = ldc_id;
604 
605 	ldc_attr.devclass = LDC_DEV_BLK;
606 	ldc_attr.instance = vdc->instance;
607 	ldc_attr.mode = LDC_MODE_UNRELIABLE;	/* unreliable transport */
608 	ldc_attr.qlen = VD_LDC_QLEN;
609 
610 	if ((vdc->initialized & VDC_LDC_INIT) == 0) {
611 		status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle);
612 		if (status != 0) {
613 			cmn_err(CE_NOTE, "[%d] ldc_init(chan %ld) returned %d",
614 					vdc->instance, ldc_id, status);
615 			return (status);
616 		}
617 		vdc->initialized |= VDC_LDC_INIT;
618 	}
619 	status = ldc_status(vdc->ldc_handle, &ldc_state);
620 	if (status != 0) {
621 		vdc_msg("Cannot discover LDC status [err=%d].", status);
622 		return (status);
623 	}
624 	vdc->ldc_state = ldc_state;
625 
626 	if ((vdc->initialized & VDC_LDC_CB) == 0) {
627 		status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb,
628 		    (caddr_t)vdc);
629 		if (status != 0) {
630 			vdc_msg("%s: ldc_reg_callback()=%d", __func__, status);
631 			return (status);
632 		}
633 		vdc->initialized |= VDC_LDC_CB;
634 	}
635 
636 	vdc->initialized |= VDC_LDC;
637 
638 	/*
639 	 * At this stage we have initialised LDC, we will now try and open
640 	 * the connection.
641 	 */
642 	if (vdc->ldc_state == LDC_INIT) {
643 		status = ldc_open(vdc->ldc_handle);
644 		if (status != 0) {
645 			cmn_err(CE_NOTE, "[%d] ldc_open(chan %ld) returned %d",
646 					vdc->instance, vdc->ldc_id, status);
647 			return (status);
648 		}
649 		vdc->initialized |= VDC_LDC_OPEN;
650 	}
651 
652 	return (status);
653 }
654 
655 static int
656 vdc_start_ldc_connection(vdc_t *vdc)
657 {
658 	int		status = 0;
659 
660 	ASSERT(vdc != NULL);
661 
662 	mutex_enter(&vdc->lock);
663 
664 	if (vdc->ldc_state == LDC_UP) {
665 		PR0("%s:  LDC is already UP ..\n", __func__);
666 		mutex_exit(&vdc->lock);
667 		return (0);
668 	}
669 
670 	status = vdc_do_ldc_up(vdc);
671 
672 	PR0("%s[%d] Finished bringing up LDC\n", __func__, vdc->instance);
673 
674 	mutex_exit(&vdc->lock);
675 
676 	return (status);
677 }
678 
679 
680 /*
681  * Function:
682  *	vdc_create_device_nodes
683  *
684  * Description:
685  *	This function creates the block and character device nodes under
686  *	/devices along with the node properties. It is called as part of
687  *	the attach(9E) of the instance during the handshake with vds after
688  *	vds has sent the attributes to vdc.
689  *
690  *	If the device is of type VD_DISK_TYPE_SLICE then the minor node
691  *	of 2 is used in keeping with the Solaris convention that slice 2
692  *	refers to a whole disk. Slices start at 'a'
693  *
694  * Parameters:
695  *	vdc 		- soft state pointer
696  *
697  * Return Values
698  *	0		- Success
699  *	EIO		- Failed to create node
700  *	EINVAL		- Unknown type of disk exported
701  */
702 static int
703 vdc_create_device_nodes(vdc_t *vdc)
704 {
705 	/* uses NNNN which is OK as long as # of disks <= 10000 */
706 	char		name[sizeof ("disk@NNNN:s,raw")];
707 	dev_info_t	*dip = NULL;
708 	int		instance;
709 	int		num_slices = 1;
710 	int		i;
711 
712 	ASSERT(vdc != NULL);
713 
714 	instance = vdc->instance;
715 	dip = vdc->dip;
716 
717 	switch (vdc->vdisk_type) {
718 	case VD_DISK_TYPE_DISK:
719 		num_slices = V_NUMPAR;
720 		break;
721 	case VD_DISK_TYPE_SLICE:
722 		num_slices = 1;
723 		break;
724 	case VD_DISK_TYPE_UNK:
725 	default:
726 		return (EINVAL);
727 	}
728 
729 	for (i = 0; i < num_slices; i++) {
730 		(void) snprintf(name, sizeof (name), "%c", 'a' + i);
731 		if (ddi_create_minor_node(dip, name, S_IFBLK,
732 		    VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
733 			vdc_msg("%s[%d]: Couldn't add block node %s.",
734 				__func__, instance, name);
735 			return (EIO);
736 		}
737 
738 		/* if any device node is created we set this flag */
739 		vdc->initialized |= VDC_MINOR;
740 
741 		(void) snprintf(name, sizeof (name), "%c%s",
742 			'a' + i, ",raw");
743 		if (ddi_create_minor_node(dip, name, S_IFCHR,
744 		    VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) {
745 			vdc_msg("%s[%d]:  Could not add raw node %s.",
746 				__func__, instance, name);
747 			return (EIO);
748 		}
749 	}
750 
751 	return (0);
752 }
753 
754 /*
755  * Function:
756  *	vdc_create_device_nodes_props
757  *
758  * Description:
759  *	This function creates the block and character device nodes under
760  *	/devices along with the node properties. It is called as part of
761  *	the attach(9E) of the instance during the handshake with vds after
762  *	vds has sent the attributes to vdc.
763  *
764  * Parameters:
765  *	vdc 		- soft state pointer
766  *
767  * Return Values
768  *	0		- Success
769  *	EIO		- Failed to create device node property
770  *	EINVAL		- Unknown type of disk exported
771  */
772 static int
773 vdc_create_device_nodes_props(vdc_t *vdc)
774 {
775 	dev_info_t	*dip = NULL;
776 	int		instance;
777 	int		num_slices = 1;
778 	int64_t		size = 0;
779 	dev_t		dev;
780 	int		rv;
781 	int		i;
782 
783 	ASSERT(vdc != NULL);
784 
785 	instance = vdc->instance;
786 	dip = vdc->dip;
787 
788 	if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) {
789 		cmn_err(CE_NOTE, "![%d] Could not create device node property."
790 				" No VTOC available", instance);
791 		return (ENXIO);
792 	}
793 
794 	switch (vdc->vdisk_type) {
795 	case VD_DISK_TYPE_DISK:
796 		num_slices = V_NUMPAR;
797 		break;
798 	case VD_DISK_TYPE_SLICE:
799 		num_slices = 1;
800 		break;
801 	case VD_DISK_TYPE_UNK:
802 	default:
803 		return (EINVAL);
804 	}
805 
806 	for (i = 0; i < num_slices; i++) {
807 		dev = makedevice(ddi_driver_major(dip),
808 			VD_MAKE_DEV(instance, i));
809 
810 		size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz;
811 		PR0("%s[%d] sz %ld (%ld Mb)  p_size %lx\n",
812 				__func__, instance, size, size / (1024 * 1024),
813 				vdc->vtoc->v_part[i].p_size);
814 
815 		rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size);
816 		if (rv != DDI_PROP_SUCCESS) {
817 			vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n",
818 				__func__, instance, VDC_SIZE_PROP_NAME, size);
819 			return (EIO);
820 		}
821 
822 		rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME,
823 			lbtodb(size));
824 		if (rv != DDI_PROP_SUCCESS) {
825 			vdc_msg("%s:(%d): Couldn't add \"%s\" [%d]\n", __func__,
826 				instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size));
827 			return (EIO);
828 		}
829 	}
830 
831 	return (0);
832 }
833 
834 static int
835 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred)
836 {
837 	_NOTE(ARGUNUSED(cred))
838 
839 	int		instance;
840 	vdc_t		*vdc;
841 
842 	ASSERT(dev != NULL);
843 	instance = SDUNIT(getminor(*dev));
844 
845 	PR0("%s[%d] minor = %d flag = %x, otyp = %x\n", __func__, instance,
846 			getminor(*dev), flag, otyp);
847 
848 	if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK))
849 		return (EINVAL);
850 
851 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
852 		vdc_msg("%s[%d] Could not get state.", __func__, instance);
853 		return (ENXIO);
854 	}
855 
856 	/*
857 	 * Check to see if we can communicate with vds
858 	 */
859 	if (!vdc_is_able_to_tx_data(vdc, flag)) {
860 		PR0("%s[%d] Not ready to transmit data\n", __func__, instance);
861 		return (ENOLINK);
862 	}
863 
864 	mutex_enter(&vdc->lock);
865 	vdc->open++;
866 	mutex_exit(&vdc->lock);
867 
868 	return (0);
869 }
870 
871 static int
872 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred)
873 {
874 	_NOTE(ARGUNUSED(cred))
875 
876 	int	instance;
877 	vdc_t	*vdc;
878 
879 	instance = SDUNIT(getminor(dev));
880 
881 	PR0("%s[%d] flag = %x, otyp = %x\n", __func__, instance, flag, otyp);
882 
883 	if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK))
884 		return (EINVAL);
885 
886 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
887 		vdc_msg("%s[%d] Could not get state.", __func__, instance);
888 		return (ENXIO);
889 	}
890 
891 	/*
892 	 * Check to see if we can communicate with vds
893 	 */
894 	if (!vdc_is_able_to_tx_data(vdc, 0)) {
895 		PR0("%s[%d] Not ready to transmit data\n", __func__, instance);
896 		return (ETIMEDOUT);
897 	}
898 
899 	if (vdc->dkio_flush_pending) {
900 		PR0("%s[%d]: Cannot detach: %d outstanding DKIO flushes",
901 			__func__, instance, vdc->dkio_flush_pending);
902 		return (EBUSY);
903 	}
904 
905 	/*
906 	 * Should not need the mutex here, since the framework should protect
907 	 * against more opens on this device, but just in case.
908 	 */
909 	mutex_enter(&vdc->lock);
910 	vdc->open--;
911 	mutex_exit(&vdc->lock);
912 
913 	return (0);
914 }
915 
916 static int
917 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
918 {
919 	_NOTE(ARGUNUSED(credp))
920 	_NOTE(ARGUNUSED(rvalp))
921 
922 	return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode));
923 }
924 
925 static int
926 vdc_print(dev_t dev, char *str)
927 {
928 	cmn_err(CE_NOTE, "vdc%d:  %s", SDUNIT(getminor(dev)), str);
929 	return (0);
930 }
931 
932 static int
933 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
934 {
935 	int			rv = 0;
936 	size_t			nbytes = (nblk * DEV_BSIZE);
937 	int			instance = SDUNIT(getminor(dev));
938 	vdc_t			*vdc;
939 
940 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
941 		vdc_msg("%s (%d):  Could not get state.", __func__, instance);
942 		return (ENXIO);
943 	}
944 
945 	rv = vdc_populate_descriptor(vdc, addr, nbytes, VD_OP_BWRITE,
946 					blkno, SDPART(getminor(dev)));
947 
948 	PR1("%s: status=%d\n", __func__, rv);
949 
950 	return (rv);
951 }
952 
953 /* -------------------------------------------------------------------------- */
954 
955 /*
956  * Disk access routines
957  *
958  */
959 
960 /*
961  * vdc_strategy()
962  *
963  * Return Value:
964  *	0:	As per strategy(9E), the strategy() function must return 0
965  *		[ bioerror(9f) sets b_flags to the proper error code ]
966  */
967 static int
968 vdc_strategy(struct buf *buf)
969 {
970 	int		rv = -1;
971 	vdc_t		*vdc = NULL;
972 	int		instance = SDUNIT(getminor(buf->b_edev));
973 	int	op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE;
974 
975 	PR1("%s: %s %ld bytes at block %ld : b_addr=0x%p",
976 	    __func__, (buf->b_flags & B_READ) ? "Read" : "Write",
977 	    buf->b_bcount, buf->b_lblkno, buf->b_un.b_addr);
978 
979 	if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) {
980 		vdc_msg("%s[%d]:  Could not get state.", __func__, instance);
981 		bioerror(buf, ENXIO);
982 		biodone(buf);
983 		return (0);
984 	}
985 
986 	ASSERT(buf->b_bcount <= (vdc->max_xfer_sz * vdc->block_size));
987 
988 	if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) {
989 		vdc_msg("%s: Not ready to transmit data", __func__);
990 		bioerror(buf, ENXIO);
991 		biodone(buf);
992 		return (0);
993 	}
994 	bp_mapin(buf);
995 
996 	rv = vdc_populate_descriptor(vdc, buf->b_un.b_addr, buf->b_bcount, op,
997 			buf->b_lblkno, SDPART(getminor(buf->b_edev)));
998 
999 	PR1("%s: status=%d", __func__, rv);
1000 	bioerror(buf, rv);
1001 	biodone(buf);
1002 	return (0);
1003 }
1004 
1005 
1006 static int
1007 vdc_read(dev_t dev, struct uio *uio, cred_t *cred)
1008 {
1009 	_NOTE(ARGUNUSED(cred))
1010 
1011 	PR1("vdc_read():  Entered");
1012 	return (physio(vdc_strategy, NULL, dev, B_READ, minphys, uio));
1013 }
1014 
1015 static int
1016 vdc_write(dev_t dev, struct uio *uio, cred_t *cred)
1017 {
1018 	_NOTE(ARGUNUSED(cred))
1019 
1020 	PR1("vdc_write():  Entered");
1021 	return (physio(vdc_strategy, NULL, dev, B_WRITE, minphys, uio));
1022 }
1023 
1024 static int
1025 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred)
1026 {
1027 	_NOTE(ARGUNUSED(cred))
1028 
1029 	PR1("vdc_aread():  Entered");
1030 	return (aphysio(vdc_strategy, anocancel, dev, B_READ, minphys, aio));
1031 }
1032 
1033 static int
1034 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred)
1035 {
1036 	_NOTE(ARGUNUSED(cred))
1037 
1038 	PR1("vdc_awrite():  Entered");
1039 	return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, minphys, aio));
1040 }
1041 
1042 
1043 /* -------------------------------------------------------------------------- */
1044 
1045 /*
1046  * Handshake support
1047  */
1048 
1049 /*
1050  * vdc_init_handshake_negotiation
1051  *
1052  * Description:
1053  *	This function is called to trigger the handshake negotiations between
1054  *	the client (vdc) and the server (vds). It may be called multiple times.
1055  *
1056  * Parameters:
1057  *	vdc - soft state pointer
1058  */
1059 static void
1060 vdc_init_handshake_negotiation(void *arg)
1061 {
1062 	vdc_t		*vdc = (vdc_t *)(void *)arg;
1063 	ldc_status_t	ldc_state;
1064 	vd_state_t	state;
1065 	int		status;
1066 
1067 	ASSERT(vdc != NULL);
1068 
1069 	PR0("[%d] Initializing vdc<->vds handshake\n", vdc->instance);
1070 
1071 	/* get LDC state */
1072 	status = ldc_status(vdc->ldc_handle, &ldc_state);
1073 	if (status != 0) {
1074 		cmn_err(CE_NOTE, "[%d] Couldn't get LDC status: err=%d",
1075 				vdc->instance, status);
1076 		return;
1077 	}
1078 
1079 	/*
1080 	 * If the LDC connection is not UP we bring it up now and return.
1081 	 * The handshake will be started again when the callback is
1082 	 * triggered due to the UP event.
1083 	 */
1084 	if (ldc_state != LDC_UP) {
1085 		PR0("[%d] Triggering an LDC_UP and returning\n", vdc->instance);
1086 		(void) vdc_do_ldc_up(vdc);
1087 		return;
1088 	}
1089 
1090 	mutex_enter(&vdc->lock);
1091 	/*
1092 	 * Do not continue if another thread has triggered a handshake which
1093 	 * has not been reset or detach() has stopped further handshakes.
1094 	 */
1095 	if (vdc->initialized & (VDC_HANDSHAKE | VDC_HANDSHAKE_STOP)) {
1096 		PR0("%s[%d] Negotiation not triggered. [init=%x]\n",
1097 			__func__, vdc->instance, vdc->initialized);
1098 		mutex_exit(&vdc->lock);
1099 		return;
1100 	}
1101 
1102 	if (vdc->hshake_cnt++ > vdc_retries) {
1103 		cmn_err(CE_NOTE, "[%d] Failed repeatedly to complete handshake"
1104 				"with vDisk server", vdc->instance);
1105 		mutex_exit(&vdc->lock);
1106 		return;
1107 	}
1108 
1109 	vdc->initialized |= VDC_HANDSHAKE;
1110 	vdc->ldc_state = ldc_state;
1111 
1112 	state = vdc->state;
1113 
1114 	if (state == VD_STATE_INIT) {
1115 		/*
1116 		 * Set the desired version parameter to the first entry in the
1117 		 * version array. If this specific version is not supported,
1118 		 * the response handling code will step down the version number
1119 		 * to the next array entry and deal with it accordingly.
1120 		 */
1121 		(void) vdc_init_ver_negotiation(vdc, vdc_version[0]);
1122 	} else if (state == VD_STATE_VER) {
1123 		(void) vdc_init_attr_negotiation(vdc);
1124 	} else if (state == VD_STATE_ATTR) {
1125 		(void) vdc_init_dring_negotiate(vdc);
1126 	} else if (state == VD_STATE_DATA) {
1127 		/*
1128 		 * nothing to do - we have already completed the negotiation
1129 		 * and we can transmit data when ready.
1130 		 */
1131 		PR0("%s[%d] Negotiation triggered after handshake completed",
1132 			__func__, vdc->instance);
1133 	}
1134 
1135 	mutex_exit(&vdc->lock);
1136 }
1137 
1138 /*
1139  * Function:
1140  *	vdc_init_ver_negotiation()
1141  *
1142  * Description:
1143  *
1144  * Arguments:
1145  *	vdc	- soft state pointer for this instance of the device driver.
1146  *
1147  * Return Code:
1148  *	0	- Success
1149  */
1150 static int
1151 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver)
1152 {
1153 	vio_ver_msg_t	pkt;
1154 	size_t		msglen = sizeof (pkt);
1155 	int		status = -1;
1156 
1157 	PR0("%s: Entered.\n", __func__);
1158 
1159 	ASSERT(vdc != NULL);
1160 	ASSERT(mutex_owned(&vdc->lock));
1161 
1162 	/*
1163 	 * set the Session ID to a unique value
1164 	 * (the lower 32 bits of the clock tick)
1165 	 */
1166 	vdc->session_id = ((uint32_t)gettick() & 0xffffffff);
1167 
1168 	pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
1169 	pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
1170 	pkt.tag.vio_subtype_env = VIO_VER_INFO;
1171 	pkt.tag.vio_sid = vdc->session_id;
1172 	pkt.dev_class = VDEV_DISK;
1173 	pkt.ver_major = ver.major;
1174 	pkt.ver_minor = ver.minor;
1175 
1176 	status = vdc_send(vdc, (caddr_t)&pkt, &msglen);
1177 	PR0("%s: vdc_send(status = %d)\n", __func__, status);
1178 
1179 	if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) {
1180 		PR0("%s[%d] vdc_send failed: id(%lx) rv(%d) size(%d)\n",
1181 				__func__, vdc->instance, vdc->ldc_handle,
1182 				status, msglen);
1183 		if (msglen != sizeof (vio_ver_msg_t))
1184 			status = ENOMSG;
1185 	}
1186 
1187 	return (status);
1188 }
1189 
1190 /*
1191  * Function:
1192  *	vdc_init_attr_negotiation()
1193  *
1194  * Description:
1195  *
1196  * Arguments:
1197  *	vdc	- soft state pointer for this instance of the device driver.
1198  *
1199  * Return Code:
1200  *	0	- Success
1201  */
1202 static int
1203 vdc_init_attr_negotiation(vdc_t *vdc)
1204 {
1205 	vd_attr_msg_t	pkt;
1206 	size_t		msglen = sizeof (pkt);
1207 	int		status;
1208 
1209 	ASSERT(vdc != NULL);
1210 	ASSERT(mutex_owned(&vdc->lock));
1211 
1212 	PR0("%s[%d] entered\n", __func__, vdc->instance);
1213 
1214 	/* fill in tag */
1215 	pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
1216 	pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
1217 	pkt.tag.vio_subtype_env = VIO_ATTR_INFO;
1218 	pkt.tag.vio_sid = vdc->session_id;
1219 	/* fill in payload */
1220 	pkt.max_xfer_sz = vdc->max_xfer_sz;
1221 	pkt.vdisk_block_size = vdc->block_size;
1222 	pkt.xfer_mode = VIO_DRING_MODE;
1223 	pkt.operations = 0;	/* server will set bits of valid operations */
1224 	pkt.vdisk_type = 0;	/* server will set to valid device type */
1225 	pkt.vdisk_size = 0;	/* server will set to valid size */
1226 
1227 	status = vdc_send(vdc, (caddr_t)&pkt, &msglen);
1228 	PR0("%s: vdc_send(status = %d)\n", __func__, status);
1229 
1230 	if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) {
1231 		PR0("%s[%d] ldc_write failed: id(%lx) rv(%d) size (%d)\n",
1232 			__func__, vdc->instance, vdc->ldc_handle,
1233 			status, msglen);
1234 		if (msglen != sizeof (vio_ver_msg_t))
1235 			status = ENOMSG;
1236 	}
1237 
1238 	return (status);
1239 }
1240 
1241 /*
1242  * Function:
1243  *	vdc_init_dring_negotiate()
1244  *
1245  * Description:
1246  *
1247  * Arguments:
1248  *	vdc	- soft state pointer for this instance of the device driver.
1249  *
1250  * Return Code:
1251  *	0	- Success
1252  */
1253 static int
1254 vdc_init_dring_negotiate(vdc_t *vdc)
1255 {
1256 	vio_dring_reg_msg_t	pkt;
1257 	size_t			msglen = sizeof (pkt);
1258 	int			status = -1;
1259 
1260 	ASSERT(vdc != NULL);
1261 	ASSERT(mutex_owned(&vdc->lock));
1262 
1263 	status = vdc_init_descriptor_ring(vdc);
1264 	if (status != 0) {
1265 		cmn_err(CE_CONT, "[%d] Failed to init DRing (status = %d)\n",
1266 				vdc->instance, status);
1267 		vdc_destroy_descriptor_ring(vdc);
1268 		vdc_reset_connection(vdc, B_FALSE);
1269 		return (status);
1270 	}
1271 	PR0("%s[%d] Init of descriptor ring completed (status = %d)\n",
1272 			__func__, vdc->instance, status);
1273 
1274 	/* fill in tag */
1275 	pkt.tag.vio_msgtype = VIO_TYPE_CTRL;
1276 	pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
1277 	pkt.tag.vio_subtype_env = VIO_DRING_REG;
1278 	pkt.tag.vio_sid = vdc->session_id;
1279 	/* fill in payload */
1280 	pkt.dring_ident = 0;
1281 	pkt.num_descriptors = VD_DRING_LEN;
1282 	pkt.descriptor_size = VD_DRING_ENTRY_SZ;
1283 	pkt.options = (VIO_TX_DRING | VIO_RX_DRING);
1284 	pkt.ncookies = vdc->dring_cookie_count;
1285 	pkt.cookie[0] = vdc->dring_cookie[0];	/* for now just one cookie */
1286 
1287 	status = vdc_send(vdc, (caddr_t)&pkt, &msglen);
1288 	if (status != 0) {
1289 		PR0("%s[%d] Failed to register DRing (status = %d)\n",
1290 				__func__, vdc->instance, status);
1291 		vdc_reset_connection(vdc, B_FALSE);
1292 	}
1293 
1294 	return (status);
1295 }
1296 
1297 
1298 /* -------------------------------------------------------------------------- */
1299 
1300 /*
1301  * LDC helper routines
1302  */
1303 
1304 /*
1305  * Function:
1306  *	vdc_send()
1307  *
1308  * Description:
1309  *	The function encapsulates the call to write a message using LDC.
1310  *	If LDC indicates that the call failed due to the queue being full,
1311  *	we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise
1312  *	we return the error returned by LDC.
1313  *
1314  * Arguments:
1315  *	ldc_handle	- LDC handle for the channel this instance of vdc uses
1316  *	pkt		- address of LDC message to be sent
1317  *	msglen		- the size of the message being sent. When the function
1318  *			  returns, this contains the number of bytes written.
1319  *
1320  * Return Code:
1321  *	0		- Success.
1322  *	EINVAL		- pkt or msglen were NULL
1323  *	ECONNRESET	- The connection was not up.
1324  *	EWOULDBLOCK	- LDC queue is full
1325  *	xxx		- other error codes returned by ldc_write
1326  */
1327 static int
1328 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen)
1329 {
1330 	size_t	size = 0;
1331 	int	retries = 0;
1332 	int	status = 0;
1333 
1334 	ASSERT(vdc != NULL);
1335 	ASSERT(mutex_owned(&vdc->lock));
1336 	ASSERT(msglen != NULL);
1337 	ASSERT(*msglen != 0);
1338 
1339 	do {
1340 		size = *msglen;
1341 		status = ldc_write(vdc->ldc_handle, pkt, &size);
1342 	} while (status == EWOULDBLOCK && retries++ < vdc_retries);
1343 
1344 	/* if LDC had serious issues --- reset vdc state */
1345 	if (status == EIO || status == ECONNRESET) {
1346 		vdc_reset_connection(vdc, B_TRUE);
1347 	}
1348 
1349 	/* return the last size written */
1350 	*msglen = size;
1351 
1352 	return (status);
1353 }
1354 
1355 /*
1356  * Function:
1357  *	vdc_get_ldc_id()
1358  *
1359  * Description:
1360  *	This function gets the 'ldc-id' for this particular instance of vdc.
1361  *	The id returned is the guest domain channel endpoint LDC uses for
1362  *	communication with vds.
1363  *
1364  * Arguments:
1365  *	dip	- dev info pointer for this instance of the device driver.
1366  *	ldc_id	- pointer to variable used to return the 'ldc-id' found.
1367  *
1368  * Return Code:
1369  *	0	- Success.
1370  *	ENOENT	- Expected node or property did not exist.
1371  *	ENXIO	- Unexpected error communicating with MD framework
1372  */
1373 static int
1374 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id)
1375 {
1376 	int		status = ENOENT;
1377 	char		*node_name = NULL;
1378 	md_t		*mdp = NULL;
1379 	int		num_nodes;
1380 	int		num_vdevs;
1381 	int		num_chans;
1382 	mde_cookie_t	rootnode;
1383 	mde_cookie_t	*listp = NULL;
1384 	mde_cookie_t	*chanp = NULL;
1385 	boolean_t	found_inst = B_FALSE;
1386 	int		listsz;
1387 	int		idx;
1388 	uint64_t	md_inst;
1389 	int		obp_inst;
1390 	int		instance = ddi_get_instance(dip);
1391 
1392 	ASSERT(ldc_id != NULL);
1393 	*ldc_id = 0;
1394 
1395 	/*
1396 	 * Get the OBP instance number for comparison with the MD instance
1397 	 *
1398 	 * The "cfg-handle" property of a vdc node in an MD contains the MD's
1399 	 * notion of "instance", or unique identifier, for that node; OBP
1400 	 * stores the value of the "cfg-handle" MD property as the value of
1401 	 * the "reg" property on the node in the device tree it builds from
1402 	 * the MD and passes to Solaris.  Thus, we look up the devinfo node's
1403 	 * "reg" property value to uniquely identify this device instance.
1404 	 * If the "reg" property cannot be found, the device tree state is
1405 	 * presumably so broken that there is no point in continuing.
1406 	 */
1407 	if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) {
1408 		cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG);
1409 		return (ENOENT);
1410 	}
1411 	obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
1412 			OBP_REG, -1);
1413 	PR1("%s[%d]: OBP inst=%d\n", __func__, instance, obp_inst);
1414 
1415 	/*
1416 	 * We now walk the MD nodes and if an instance of a vdc node matches
1417 	 * the instance got from OBP we get the ldc-id property.
1418 	 */
1419 	if ((mdp = md_get_handle()) == NULL) {
1420 		cmn_err(CE_WARN, "unable to init machine description");
1421 		return (ENXIO);
1422 	}
1423 
1424 	num_nodes = md_node_count(mdp);
1425 	ASSERT(num_nodes > 0);
1426 
1427 	listsz = num_nodes * sizeof (mde_cookie_t);
1428 
1429 	/* allocate memory for nodes */
1430 	listp = kmem_zalloc(listsz, KM_SLEEP);
1431 	chanp = kmem_zalloc(listsz, KM_SLEEP);
1432 
1433 	rootnode = md_root_node(mdp);
1434 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1435 
1436 	/*
1437 	 * Search for all the virtual devices, we will then check to see which
1438 	 * ones are disk nodes.
1439 	 */
1440 	num_vdevs = md_scan_dag(mdp, rootnode,
1441 			md_find_name(mdp, VDC_MD_VDEV_NAME),
1442 			md_find_name(mdp, "fwd"), listp);
1443 
1444 	if (num_vdevs <= 0) {
1445 		cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME);
1446 		status = ENOENT;
1447 		goto done;
1448 	}
1449 
1450 	PR1("%s[%d] num_vdevs=%d\n", __func__, instance, num_vdevs);
1451 	for (idx = 0; idx < num_vdevs; idx++) {
1452 		status = md_get_prop_str(mdp, listp[idx], "name", &node_name);
1453 		if ((status != 0) || (node_name == NULL)) {
1454 			cmn_err(CE_NOTE, "Unable to get name of node type '%s'"
1455 					": err %d", VDC_MD_VDEV_NAME, status);
1456 			continue;
1457 		}
1458 
1459 		PR1("%s[%d] Found node %s\n", __func__, instance, node_name);
1460 		if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) {
1461 			status = md_get_prop_val(mdp, listp[idx],
1462 					VDC_MD_CFG_HDL, &md_inst);
1463 			PR1("%s[%d] vdc inst# in MD=%d\n",
1464 					__func__, instance, md_inst);
1465 			if ((status == 0) && (md_inst == obp_inst)) {
1466 				found_inst = B_TRUE;
1467 				break;
1468 			}
1469 		}
1470 	}
1471 
1472 	if (!found_inst) {
1473 		cmn_err(CE_NOTE, "Unable to find correct '%s' node",
1474 				VDC_MD_DISK_NAME);
1475 		status = ENOENT;
1476 		goto done;
1477 	}
1478 	PR0("%s[%d] MD inst=%d\n", __func__, instance, md_inst);
1479 
1480 	/* get the channels for this node */
1481 	num_chans = md_scan_dag(mdp, listp[idx],
1482 			md_find_name(mdp, VDC_MD_CHAN_NAME),
1483 			md_find_name(mdp, "fwd"), chanp);
1484 
1485 	/* expecting at least one channel */
1486 	if (num_chans <= 0) {
1487 		cmn_err(CE_NOTE, "No '%s' node for '%s' port",
1488 				VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME);
1489 		status = ENOENT;
1490 		goto done;
1491 
1492 	} else if (num_chans != 1) {
1493 		PR0("%s[%d] Expected 1 '%s' node for '%s' port, found %d\n",
1494 			__func__, instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME,
1495 			num_chans);
1496 	}
1497 
1498 	/*
1499 	 * We use the first channel found (index 0), irrespective of how
1500 	 * many are there in total.
1501 	 */
1502 	if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) {
1503 		cmn_err(CE_NOTE, "Channel '%s' property not found",
1504 				VDC_ID_PROP);
1505 		status = ENOENT;
1506 	}
1507 
1508 	PR0("%s[%d] LDC id is 0x%lx\n", __func__, instance, *ldc_id);
1509 
1510 done:
1511 	if (chanp)
1512 		kmem_free(chanp, listsz);
1513 	if (listp)
1514 		kmem_free(listp, listsz);
1515 
1516 	(void) md_fini_handle(mdp);
1517 
1518 	return (status);
1519 }
1520 
1521 static int
1522 vdc_do_ldc_up(vdc_t *vdc)
1523 {
1524 	int	status;
1525 
1526 	PR0("[%d] Bringing up channel %x\n", vdc->instance, vdc->ldc_id);
1527 
1528 	if ((status = ldc_up(vdc->ldc_handle)) != 0) {
1529 		switch (status) {
1530 		case ECONNREFUSED:	/* listener not ready at other end */
1531 			PR0("%s: ldc_up(%d,...) return %d\n",
1532 					__func__, vdc->ldc_id, status);
1533 			status = 0;
1534 			break;
1535 		default:
1536 			cmn_err(CE_NOTE, "[%d] Failed to bring up LDC: "
1537 					"channel=%ld, err=%d",
1538 					vdc->instance, vdc->ldc_id, status);
1539 		}
1540 	}
1541 
1542 	return (status);
1543 }
1544 
1545 
1546 /*
1547  * vdc_is_able_to_tx_data()
1548  *
1549  * Description:
1550  *	This function checks if we are able to send data to the
1551  *	vDisk server (vds). The LDC connection needs to be up and
1552  *	vdc & vds need to have completed the handshake negotiation.
1553  *
1554  * Parameters:
1555  *	vdc 		- soft state pointer
1556  *	flag		- flag to indicate if we can block or not
1557  *			  [ If O_NONBLOCK or O_NDELAY (which are defined in
1558  *			    open(2)) are set then do not block)
1559  *
1560  * Return Values
1561  *	B_TRUE		- can talk to vds
1562  *	B_FALSE		- unable to talk to vds
1563  */
1564 static boolean_t
1565 vdc_is_able_to_tx_data(vdc_t *vdc, int flag)
1566 {
1567 	vd_state_t	state;
1568 	uint32_t	ldc_state;
1569 	uint_t		retries = 0;
1570 	int		rv = -1;
1571 
1572 	ASSERT(vdc != NULL);
1573 
1574 	mutex_enter(&vdc->lock);
1575 	state = vdc->state;
1576 	ldc_state = vdc->ldc_state;
1577 	mutex_exit(&vdc->lock);
1578 
1579 	if ((state == VD_STATE_DATA) && (ldc_state == LDC_UP))
1580 		return (B_TRUE);
1581 
1582 	if ((flag & O_NONBLOCK) || (flag & O_NDELAY)) {
1583 		PR0("%s[%d] Not ready to tx - state %d LDC state %d\n",
1584 			__func__, vdc->instance, state, ldc_state);
1585 		return (B_FALSE);
1586 	}
1587 
1588 	/*
1589 	 * We want to check and see if any negotiations triggered earlier
1590 	 * have succeeded. We are prepared to wait a little while in case
1591 	 * they are still in progress.
1592 	 */
1593 	mutex_enter(&vdc->lock);
1594 	while ((vdc->ldc_state != LDC_UP) || (vdc->state != VD_STATE_DATA)) {
1595 		PR0("%s: Waiting for connection at state %d (LDC state %d)\n",
1596 			__func__, vdc->state, vdc->ldc_state);
1597 
1598 		rv = cv_timedwait(&vdc->cv, &vdc->lock,
1599 			VD_GET_TIMEOUT_HZ(retries));
1600 
1601 		/*
1602 		 * An rv of -1 indicates that we timed out without the LDC
1603 		 * state changing so it looks like the other side (vdc) is
1604 		 * not yet ready/responding.
1605 		 *
1606 		 * Any other value of rv indicates that the LDC triggered an
1607 		 * interrupt so we just loop again, check the handshake state
1608 		 * and keep waiting if necessary.
1609 		 */
1610 		if (rv == -1) {
1611 			if (retries >= vdc_retries) {
1612 				PR0("%s[%d] handshake wait timed out.\n",
1613 						__func__, vdc->instance);
1614 				mutex_exit(&vdc->lock);
1615 				return (B_FALSE);
1616 			} else {
1617 				PR1("%s[%d] Retry #%d for handshake timedout\n",
1618 					__func__, vdc->instance, retries);
1619 				retries++;
1620 			}
1621 		}
1622 	}
1623 
1624 	ASSERT(vdc->ldc_state == LDC_UP);
1625 	ASSERT(vdc->state == VD_STATE_DATA);
1626 
1627 	mutex_exit(&vdc->lock);
1628 
1629 	return (B_TRUE);
1630 }
1631 
1632 
1633 /*
1634  * Function:
1635  *	vdc_terminate_ldc()
1636  *
1637  * Description:
1638  *
1639  * Arguments:
1640  *	vdc	- soft state pointer for this instance of the device driver.
1641  *
1642  * Return Code:
1643  *	None
1644  */
1645 static void
1646 vdc_terminate_ldc(vdc_t *vdc)
1647 {
1648 	int	instance = ddi_get_instance(vdc->dip);
1649 
1650 	ASSERT(vdc != NULL);
1651 	ASSERT(mutex_owned(&vdc->lock));
1652 
1653 	PR0("%s[%d] initialized=%x\n", __func__, instance, vdc->initialized);
1654 
1655 	if (vdc->initialized & VDC_LDC_OPEN) {
1656 		PR0("%s[%d]: ldc_close()\n", __func__, instance);
1657 		(void) ldc_close(vdc->ldc_handle);
1658 	}
1659 	if (vdc->initialized & VDC_LDC_CB) {
1660 		PR0("%s[%d]: ldc_unreg_callback()\n", __func__, instance);
1661 		(void) ldc_unreg_callback(vdc->ldc_handle);
1662 	}
1663 	if (vdc->initialized & VDC_LDC) {
1664 		PR0("%s[%d]: ldc_fini()\n", __func__, instance);
1665 		(void) ldc_fini(vdc->ldc_handle);
1666 		vdc->ldc_handle = NULL;
1667 	}
1668 
1669 	vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN);
1670 }
1671 
1672 /*
1673  * Function:
1674  *	vdc_reset_connection()
1675  *
1676  * Description:
1677  *
1678  * Arguments:
1679  *	vdc	- soft state pointer for this instance of the device driver.
1680  *	reset_ldc - Flag whether or not to reset the LDC connection also.
1681  *
1682  * Return Code:
1683  *	None
1684  */
1685 static void
1686 vdc_reset_connection(vdc_t *vdc, boolean_t reset_ldc)
1687 {
1688 	int	status;
1689 
1690 	ASSERT(vdc != NULL);
1691 	ASSERT(mutex_owned(&vdc->lock));
1692 
1693 	PR0("%s[%d] Entered\n", __func__, vdc->instance);
1694 
1695 	vdc->state = VD_STATE_INIT;
1696 
1697 	if (reset_ldc) {
1698 		status = ldc_reset(vdc->ldc_handle);
1699 		PR0("%s[%d]  ldc_reset() = %d\n",
1700 				__func__, vdc->instance, status);
1701 	}
1702 
1703 	vdc->initialized &= ~VDC_HANDSHAKE;
1704 	PR0("%s[%d] init=%x\n", __func__, vdc->instance, vdc->initialized);
1705 }
1706 
1707 /* -------------------------------------------------------------------------- */
1708 
1709 /*
1710  * Descriptor Ring helper routines
1711  */
1712 
1713 /*
1714  * Function:
1715  *	vdc_init_descriptor_ring()
1716  *
1717  * Description:
1718  *
1719  * Arguments:
1720  *	vdc	- soft state pointer for this instance of the device driver.
1721  *
1722  * Return Code:
1723  *	0	- Success
1724  */
1725 static int
1726 vdc_init_descriptor_ring(vdc_t *vdc)
1727 {
1728 	vd_dring_entry_t	*dep = NULL;	/* DRing Entry pointer */
1729 	int	status = 0;
1730 	int	i;
1731 
1732 	PR0("%s[%d] initialized=%x\n",
1733 			__func__, vdc->instance, vdc->initialized);
1734 
1735 	ASSERT(vdc != NULL);
1736 	ASSERT(mutex_owned(&vdc->lock));
1737 	ASSERT(vdc->ldc_handle != NULL);
1738 
1739 	if ((vdc->initialized & VDC_DRING_INIT) == 0) {
1740 		PR0("%s[%d] ldc_mem_dring_create\n", __func__, vdc->instance);
1741 		status = ldc_mem_dring_create(VD_DRING_LEN, VD_DRING_ENTRY_SZ,
1742 				&vdc->ldc_dring_hdl);
1743 		if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) {
1744 			PR0("%s: Failed to create a descriptor ring", __func__);
1745 			return (status);
1746 		}
1747 		vdc->dring_entry_size = VD_DRING_ENTRY_SZ;
1748 		vdc->dring_len = VD_DRING_LEN;
1749 		vdc->initialized |= VDC_DRING_INIT;
1750 	}
1751 
1752 	if ((vdc->initialized & VDC_DRING_BOUND) == 0) {
1753 		PR0("%s[%d] ldc_mem_dring_bind\n", __func__, vdc->instance);
1754 		vdc->dring_cookie =
1755 			kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP);
1756 
1757 		status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl,
1758 				LDC_SHADOW_MAP, LDC_MEM_RW,
1759 				&vdc->dring_cookie[0],
1760 				&vdc->dring_cookie_count);
1761 		if (status != 0) {
1762 			PR0("%s: Failed to bind descriptor ring (%p) "
1763 				"to channel (%p)\n",
1764 				__func__, vdc->ldc_dring_hdl, vdc->ldc_handle);
1765 			return (status);
1766 		}
1767 		ASSERT(vdc->dring_cookie_count == 1);
1768 		vdc->initialized |= VDC_DRING_BOUND;
1769 	}
1770 
1771 	status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info);
1772 	if (status != 0) {
1773 		PR0("%s: Failed to get info for descriptor ring (%p)\n",
1774 			__func__, vdc->ldc_dring_hdl);
1775 		return (status);
1776 	}
1777 
1778 	if ((vdc->initialized & VDC_DRING_LOCAL) == 0) {
1779 		PR0("%s[%d] local dring\n", __func__, vdc->instance);
1780 
1781 		/* Allocate the local copy of this dring */
1782 		vdc->local_dring =
1783 			kmem_zalloc(VD_DRING_LEN * sizeof (vdc_local_desc_t),
1784 						KM_SLEEP);
1785 		vdc->initialized |= VDC_DRING_LOCAL;
1786 	}
1787 
1788 	/*
1789 	 * Mark all DRing entries as free and initialize the private
1790 	 * descriptor's memory handles. If any entry is initialized,
1791 	 * we need to free it later so we set the bit in 'initialized'
1792 	 * at the start.
1793 	 */
1794 	vdc->initialized |= VDC_DRING_ENTRY;
1795 	for (i = 0; i < VD_DRING_LEN; i++) {
1796 		dep = VDC_GET_DRING_ENTRY_PTR(vdc, i);
1797 		dep->hdr.dstate = VIO_DESC_FREE;
1798 
1799 		status = ldc_mem_alloc_handle(vdc->ldc_handle,
1800 				&vdc->local_dring[i].desc_mhdl);
1801 		if (status != 0) {
1802 			cmn_err(CE_NOTE, "![%d] Failed to alloc mem handle for"
1803 					" descriptor %d", vdc->instance, i);
1804 			return (status);
1805 		}
1806 		vdc->local_dring[i].flags = VIO_DESC_FREE;
1807 		vdc->local_dring[i].dep = dep;
1808 
1809 		mutex_init(&vdc->local_dring[i].lock, NULL, MUTEX_DRIVER, NULL);
1810 		cv_init(&vdc->local_dring[i].cv, NULL, CV_DRIVER, NULL);
1811 	}
1812 
1813 	/*
1814 	 * We init the index of the last DRing entry used. Since the code to
1815 	 * get the next available entry increments it before selecting one,
1816 	 * we set it to the last DRing entry so that it wraps around to zero
1817 	 * for the 1st entry to be used.
1818 	 */
1819 	vdc->dring_curr_idx = VD_DRING_LEN - 1;
1820 
1821 	return (status);
1822 }
1823 
1824 /*
1825  * Function:
1826  *	vdc_destroy_descriptor_ring()
1827  *
1828  * Description:
1829  *
1830  * Arguments:
1831  *	vdc	- soft state pointer for this instance of the device driver.
1832  *
1833  * Return Code:
1834  *	None
1835  */
1836 static void
1837 vdc_destroy_descriptor_ring(vdc_t *vdc)
1838 {
1839 	vdc_local_desc_t	*ldep = NULL;	/* Local Dring Entry Pointer */
1840 	ldc_mem_handle_t	mhdl = NULL;
1841 	int			status = -1;
1842 	int			i;	/* loop */
1843 
1844 	ASSERT(vdc != NULL);
1845 	ASSERT(mutex_owned(&vdc->lock));
1846 	ASSERT(vdc->state == VD_STATE_INIT);
1847 
1848 	PR0("%s: Entered\n", __func__);
1849 
1850 	if (vdc->initialized & VDC_DRING_ENTRY) {
1851 		PR0("[%d] Removing Local DRing entries\n", vdc->instance);
1852 		for (i = 0; i < VD_DRING_LEN; i++) {
1853 			ldep = &vdc->local_dring[i];
1854 			mhdl = ldep->desc_mhdl;
1855 
1856 			if (mhdl == NULL)
1857 				continue;
1858 
1859 			(void) ldc_mem_free_handle(mhdl);
1860 			mutex_destroy(&ldep->lock);
1861 			cv_destroy(&ldep->cv);
1862 		}
1863 		vdc->initialized &= ~VDC_DRING_ENTRY;
1864 	}
1865 
1866 	if (vdc->initialized & VDC_DRING_LOCAL) {
1867 		PR0("[%d] Freeing Local DRing\n", vdc->instance);
1868 		kmem_free(vdc->local_dring,
1869 				VD_DRING_LEN * sizeof (vdc_local_desc_t));
1870 		vdc->initialized &= ~VDC_DRING_LOCAL;
1871 	}
1872 
1873 	if (vdc->initialized & VDC_DRING_BOUND) {
1874 		PR0("[%d] Unbinding DRing\n", vdc->instance);
1875 		status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl);
1876 		if (status == 0) {
1877 			vdc->initialized &= ~VDC_DRING_BOUND;
1878 		} else {
1879 			vdc_msg("%s: Failed to unbind Descriptor Ring (%lx)\n",
1880 				vdc->ldc_dring_hdl);
1881 		}
1882 	}
1883 
1884 	if (vdc->initialized & VDC_DRING_INIT) {
1885 		PR0("[%d] Destroying DRing\n", vdc->instance);
1886 		status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl);
1887 		if (status == 0) {
1888 			vdc->ldc_dring_hdl = NULL;
1889 			bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t));
1890 			vdc->initialized &= ~VDC_DRING_INIT;
1891 		} else {
1892 			vdc_msg("%s: Failed to destroy Descriptor Ring (%lx)\n",
1893 					vdc->ldc_dring_hdl);
1894 		}
1895 	}
1896 }
1897 
1898 /*
1899  * vdc_get_next_dring_entry_idx()
1900  *
1901  * Description:
1902  *	This function gets the index of the next Descriptor Ring entry available
1903  *
1904  * Return Value:
1905  *	0 <= rv < VD_DRING_LEN		Next available slot
1906  *	-1 				DRing is full
1907  */
1908 static int
1909 vdc_get_next_dring_entry_idx(vdc_t *vdc, uint_t num_slots_needed)
1910 {
1911 	_NOTE(ARGUNUSED(num_slots_needed))
1912 
1913 	vd_dring_entry_t	*dep = NULL;	/* Dring Entry Pointer */
1914 	int			idx = -1;
1915 	int			start_idx = 0;
1916 
1917 	ASSERT(vdc != NULL);
1918 	ASSERT(vdc->dring_len == VD_DRING_LEN);
1919 	ASSERT(vdc->dring_curr_idx >= 0);
1920 	ASSERT(vdc->dring_curr_idx < VD_DRING_LEN);
1921 	ASSERT(mutex_owned(&vdc->dring_lock));
1922 
1923 	/* Start at the last entry used */
1924 	idx = start_idx = vdc->dring_curr_idx;
1925 
1926 	/*
1927 	 * Loop through Descriptor Ring checking for a free entry until we reach
1928 	 * the entry we started at. We should never come close to filling the
1929 	 * Ring at any stage, instead this is just to prevent an entry which
1930 	 * gets into an inconsistent state (e.g. due to a request timing out)
1931 	 * from blocking progress.
1932 	 */
1933 	do {
1934 		/* Get the next entry after the last known index tried */
1935 		idx = (idx + 1) % VD_DRING_LEN;
1936 
1937 		dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx);
1938 		ASSERT(dep != NULL);
1939 
1940 		if (dep->hdr.dstate == VIO_DESC_FREE) {
1941 			ASSERT(idx >= 0);
1942 			ASSERT(idx < VD_DRING_LEN);
1943 			vdc->dring_curr_idx = idx;
1944 			return (idx);
1945 
1946 		} else if (dep->hdr.dstate == VIO_DESC_READY) {
1947 			PR0("%s: Entry %d waiting to be accepted\n",
1948 					__func__, idx);
1949 			continue;
1950 
1951 		} else if (dep->hdr.dstate == VIO_DESC_ACCEPTED) {
1952 			PR0("%s: Entry %d waiting to be processed\n",
1953 					__func__, idx);
1954 			continue;
1955 
1956 		} else if (dep->hdr.dstate == VIO_DESC_DONE) {
1957 			PR0("%s: Entry %d done but not marked free\n",
1958 					__func__, idx);
1959 
1960 			/*
1961 			 * If we are currently panicking, interrupts are
1962 			 * disabled and we will not be getting ACKs from the
1963 			 * vDisk server so we mark the descriptor ring entries
1964 			 * as FREE here instead of in the ACK handler.
1965 			 */
1966 			if (panicstr) {
1967 				(void) vdc_depopulate_descriptor(vdc, idx);
1968 				dep->hdr.dstate = VIO_DESC_FREE;
1969 				vdc->local_dring[idx].flags = VIO_DESC_FREE;
1970 			}
1971 			continue;
1972 
1973 		} else {
1974 			vdc_msg("Public Descriptor Ring entry corrupted");
1975 			mutex_enter(&vdc->lock);
1976 			vdc_reset_connection(vdc, B_FALSE);
1977 			mutex_exit(&vdc->lock);
1978 			return (-1);
1979 		}
1980 
1981 	} while (idx != start_idx);
1982 
1983 	return (-1);
1984 }
1985 
1986 /*
1987  * Function:
1988  *	vdc_populate_descriptor
1989  *
1990  * Description:
1991  *	This routine writes the data to be transmitted to vds into the
1992  *	descriptor, notifies vds that the ring has been updated and
1993  *	then waits for the request to be processed.
1994  *
1995  * Arguments:
1996  *	vdc	- the soft state pointer
1997  *	addr	- start address of memory region.
1998  *	nbytes	- number of bytes to read/write
1999  *	operation - operation we want vds to perform (VD_OP_XXX)
2000  *	arg	- parameter to be sent to server (depends on VD_OP_XXX type)
2001  *			. mode for ioctl(9e)
2002  *			. LP64 diskaddr_t (block I/O)
2003  *	slice	- the disk slice this request is for
2004  *
2005  * Return Codes:
2006  *	0
2007  *	EAGAIN
2008  *		EFAULT
2009  *		ENXIO
2010  *		EIO
2011  */
2012 static int
2013 vdc_populate_descriptor(vdc_t *vdc, caddr_t addr, size_t nbytes, int operation,
2014 				uint64_t arg, uint64_t slice)
2015 {
2016 	vdc_local_desc_t *local_dep = NULL;	/* Local Dring Entry Pointer */
2017 	vd_dring_entry_t *dep = NULL;		/* Dring Entry Pointer */
2018 	int			idx = 0;	/* Index of DRing entry used */
2019 	vio_dring_msg_t		dmsg;
2020 	size_t			msglen = sizeof (dmsg);
2021 	int			retries = 0;
2022 	int			rv;
2023 
2024 	ASSERT(vdc != NULL);
2025 	ASSERT(slice < V_NUMPAR);
2026 
2027 	/*
2028 	 * Get next available DRing entry.
2029 	 */
2030 	mutex_enter(&vdc->dring_lock);
2031 	idx = vdc_get_next_dring_entry_idx(vdc, 1);
2032 	if (idx == -1) {
2033 		mutex_exit(&vdc->dring_lock);
2034 		vdc_msg("%s[%d]: no descriptor ring entry avail, seq=%d\n",
2035 				__func__, vdc->instance, vdc->seq_num);
2036 
2037 		/*
2038 		 * Since strategy should not block we don't wait for the DRing
2039 		 * to empty and instead return
2040 		 */
2041 		return (EAGAIN);
2042 	}
2043 
2044 	ASSERT(idx < VD_DRING_LEN);
2045 	local_dep = &vdc->local_dring[idx];
2046 	dep = local_dep->dep;
2047 	ASSERT(dep != NULL);
2048 
2049 	/*
2050 	 * Wait for anybody still using the DRing entry to finish.
2051 	 * (e.g. still waiting for vds to respond to a request)
2052 	 */
2053 	mutex_enter(&local_dep->lock);
2054 
2055 	switch (operation) {
2056 	case VD_OP_BREAD:
2057 	case VD_OP_BWRITE:
2058 		PR1("buf=%p, block=%lx, nbytes=%lx\n", addr, arg, nbytes);
2059 		dep->payload.addr = (diskaddr_t)arg;
2060 		rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes, operation);
2061 		break;
2062 
2063 	case VD_OP_GET_VTOC:
2064 	case VD_OP_SET_VTOC:
2065 	case VD_OP_GET_DISKGEOM:
2066 	case VD_OP_SET_DISKGEOM:
2067 	case VD_OP_SCSICMD:
2068 		if (nbytes > 0) {
2069 			rv = vdc_populate_mem_hdl(vdc, idx, addr, nbytes,
2070 							operation);
2071 		}
2072 		break;
2073 
2074 	case VD_OP_FLUSH:
2075 	case VD_OP_GET_WCE:
2076 	case VD_OP_SET_WCE:
2077 		rv = 0;		/* nothing to bind */
2078 		break;
2079 
2080 	default:
2081 		cmn_err(CE_NOTE, "[%d] Unsupported vDisk operation [%d]\n",
2082 				vdc->instance, operation);
2083 		rv = EINVAL;
2084 	}
2085 
2086 	if (rv != 0) {
2087 		mutex_exit(&local_dep->lock);
2088 		mutex_exit(&vdc->dring_lock);
2089 		return (rv);
2090 	}
2091 
2092 	/*
2093 	 * fill in the data details into the DRing
2094 	 */
2095 	dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdc);
2096 	dep->payload.operation = operation;
2097 	dep->payload.nbytes = nbytes;
2098 	dep->payload.status = EINPROGRESS;	/* vds will set valid value */
2099 	dep->payload.slice = slice;
2100 	dep->hdr.dstate = VIO_DESC_READY;
2101 	dep->hdr.ack = 1;		/* request an ACK for every message */
2102 
2103 	local_dep->flags = VIO_DESC_READY;
2104 	local_dep->addr = addr;
2105 
2106 	/*
2107 	 * Send a msg with the DRing details to vds
2108 	 */
2109 	VIO_INIT_DRING_DATA_TAG(dmsg);
2110 	VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc);
2111 	dmsg.dring_ident = vdc->dring_ident;
2112 	dmsg.start_idx = idx;
2113 	dmsg.end_idx = idx;
2114 
2115 	PR1("ident=0x%llx, st=%d, end=%d, seq=%d req=%d dep=%p\n",
2116 			vdc->dring_ident, dmsg.start_idx, dmsg.end_idx,
2117 			dmsg.seq_num, dep->payload.req_id, dep);
2118 
2119 	mutex_enter(&vdc->lock);
2120 	rv = vdc_send(vdc, (caddr_t)&dmsg, &msglen);
2121 	mutex_exit(&vdc->lock);
2122 	PR1("%s[%d]: ldc_write() rv=%d\n", __func__, vdc->instance, rv);
2123 	if (rv != 0) {
2124 		mutex_exit(&local_dep->lock);
2125 		mutex_exit(&vdc->dring_lock);
2126 		vdc_msg("%s: ldc_write(%d)\n", __func__, rv);
2127 		return (EAGAIN);
2128 	}
2129 
2130 	/*
2131 	 * If the message was successfully sent, we increment the sequence
2132 	 * number to be used by the next message
2133 	 */
2134 	vdc->seq_num++;
2135 
2136 	/*
2137 	 * XXX - potential performance enhancement (Investigate at a later date)
2138 	 *
2139 	 * for calls from strategy(9E), instead of waiting for a response from
2140 	 * vds, we could return at this stage and let the ACK handling code
2141 	 * trigger the biodone(9F)
2142 	 */
2143 
2144 	/*
2145 	 * When a guest is panicking, the completion of requests needs to be
2146 	 * handled differently because interrupts are disabled and vdc
2147 	 * will not get messages. We have to poll for the messages instead.
2148 	 */
2149 	if (ddi_in_panic()) {
2150 		int start = 0;
2151 		retries = 0;
2152 		for (;;) {
2153 			msglen = sizeof (dmsg);
2154 			rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg,
2155 					&msglen);
2156 			if (rv) {
2157 				rv = EINVAL;
2158 				break;
2159 			}
2160 
2161 			/*
2162 			 * if there are no packets wait and check again
2163 			 */
2164 			if ((rv == 0) && (msglen == 0)) {
2165 				if (retries++ > vdc_dump_retries) {
2166 					PR0("[%d] Giving up waiting, idx %d\n",
2167 							vdc->instance, idx);
2168 					rv = EAGAIN;
2169 					break;
2170 				}
2171 
2172 				PR1("Waiting for next packet @ %d\n", idx);
2173 				delay(drv_usectohz(vdc_dump_usec_timeout));
2174 				continue;
2175 			}
2176 
2177 			/*
2178 			 * Ignore all messages that are not ACKs/NACKs to
2179 			 * DRing requests.
2180 			 */
2181 			if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) ||
2182 			    (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) {
2183 				PR0("discarding pkt: type=%d sub=%d env=%d\n",
2184 					dmsg.tag.vio_msgtype,
2185 					dmsg.tag.vio_subtype,
2186 					dmsg.tag.vio_subtype_env);
2187 				continue;
2188 			}
2189 
2190 			/*
2191 			 * set the appropriate return value for the
2192 			 * current request.
2193 			 */
2194 			switch (dmsg.tag.vio_subtype) {
2195 			case VIO_SUBTYPE_ACK:
2196 				rv = 0;
2197 				break;
2198 			case VIO_SUBTYPE_NACK:
2199 				rv = EAGAIN;
2200 				break;
2201 			default:
2202 				continue;
2203 			}
2204 
2205 			start = dmsg.start_idx;
2206 			if (start >= VD_DRING_LEN) {
2207 				PR0("[%d] Bogus ack data : start %d\n",
2208 					vdc->instance, start);
2209 				continue;
2210 			}
2211 
2212 			dep = VDC_GET_DRING_ENTRY_PTR(vdc, start);
2213 
2214 			PR1("[%d] Dumping start=%d idx=%d state=%d\n",
2215 				vdc->instance, start, idx, dep->hdr.dstate);
2216 
2217 			if (dep->hdr.dstate != VIO_DESC_DONE) {
2218 				PR0("[%d] Entry @ %d - state !DONE %d\n",
2219 					vdc->instance, start, dep->hdr.dstate);
2220 				continue;
2221 			}
2222 
2223 			(void) vdc_depopulate_descriptor(vdc, start);
2224 
2225 			/*
2226 			 * We want to process all Dring entries up to
2227 			 * the current one so that we can return an
2228 			 * error with the correct request.
2229 			 */
2230 			if (idx > start) {
2231 				PR0("[%d] Looping: start %d, idx %d\n",
2232 						vdc->instance, idx, start);
2233 				continue;
2234 			}
2235 
2236 			/* exit - all outstanding requests are completed */
2237 			break;
2238 		}
2239 
2240 		mutex_exit(&local_dep->lock);
2241 		mutex_exit(&vdc->dring_lock);
2242 
2243 		return (rv);
2244 	}
2245 
2246 	/*
2247 	 * Now watch the DRing entries we modified to get the response
2248 	 * from vds.
2249 	 */
2250 	rv = vdc_wait_for_descriptor_update(vdc, idx, dmsg);
2251 	if (rv == ETIMEDOUT) {
2252 		/* debug info when dumping state on vds side */
2253 		dep->payload.status = ECANCELED;
2254 	}
2255 
2256 	rv = vdc_depopulate_descriptor(vdc, idx);
2257 	PR1("%s[%d] Status=%d\n", __func__, vdc->instance, rv);
2258 
2259 	mutex_exit(&local_dep->lock);
2260 	mutex_exit(&vdc->dring_lock);
2261 
2262 	return (rv);
2263 }
2264 
2265 /*
2266  * Function:
2267  *	vdc_wait_for_descriptor_update()
2268  *
2269  * Description:
2270  *
2271  * Arguments:
2272  *	vdc	- soft state pointer for this instance of the device driver.
2273  *	idx	- Index of the Descriptor Ring entry being modified
2274  *	dmsg	- LDC message sent by vDisk server
2275  *
2276  * Return Code:
2277  *	0	- Success
2278  */
2279 static int
2280 vdc_wait_for_descriptor_update(vdc_t *vdc, uint_t idx, vio_dring_msg_t dmsg)
2281 {
2282 	vd_dring_entry_t *dep = NULL;		/* Dring Entry Pointer */
2283 	vdc_local_desc_t *local_dep = NULL;	/* Local Dring Entry Pointer */
2284 	size_t	msglen = sizeof (dmsg);
2285 	int	retries = 0;
2286 	int	status = 0;
2287 	int	rv = 0;
2288 
2289 	ASSERT(vdc != NULL);
2290 	ASSERT(mutex_owned(&vdc->dring_lock));
2291 	ASSERT(idx < VD_DRING_LEN);
2292 	local_dep = &vdc->local_dring[idx];
2293 	ASSERT(local_dep != NULL);
2294 	dep = local_dep->dep;
2295 	ASSERT(dep != NULL);
2296 
2297 	while (dep->hdr.dstate != VIO_DESC_DONE) {
2298 		rv = cv_timedwait(&local_dep->cv, &local_dep->lock,
2299 			VD_GET_TIMEOUT_HZ(retries));
2300 		if (rv == -1) {
2301 			/*
2302 			 * If they persist in ignoring us we'll storm off in a
2303 			 * huff and return ETIMEDOUT to the upper layers.
2304 			 */
2305 			if (retries >= vdc_retries) {
2306 				PR0("%s: Finished waiting on entry %d\n",
2307 					__func__, idx);
2308 				status = ETIMEDOUT;
2309 				break;
2310 			} else {
2311 				retries++;
2312 				PR0("%s[%d]: Timeout #%d on entry %d "
2313 				    "[seq %d][req %d]\n", __func__,
2314 				    vdc->instance,
2315 				    retries, idx, dmsg.seq_num,
2316 				    dep->payload.req_id);
2317 			}
2318 
2319 			if (dep->hdr.dstate & VIO_DESC_ACCEPTED) {
2320 				PR0("%s[%d]: vds has accessed entry %d [seq %d]"
2321 				    "[req %d] but not ack'ed it yet\n",
2322 				    __func__, vdc->instance, idx, dmsg.seq_num,
2323 				    dep->payload.req_id);
2324 				continue;
2325 			}
2326 
2327 			/*
2328 			 * we resend the message as it may have been dropped
2329 			 * and have never made it to the other side (vds).
2330 			 * (We reuse the original message but update seq ID)
2331 			 */
2332 			VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc);
2333 			retries = 0;
2334 			mutex_enter(&vdc->lock);
2335 			status = vdc_send(vdc, (caddr_t)&dmsg, &msglen);
2336 			mutex_exit(&vdc->lock);
2337 			if (status != 0) {
2338 				vdc_msg("%s: Error (%d) while resending after "
2339 					"timeout\n", __func__, status);
2340 				status = ETIMEDOUT;
2341 				break;
2342 			}
2343 			/*
2344 			 * If the message was successfully sent, we increment
2345 			 * the sequence number to be used by the next message.
2346 			 */
2347 			vdc->seq_num++;
2348 		}
2349 	}
2350 
2351 	return (status);
2352 }
2353 
2354 static int
2355 vdc_get_response(vdc_t *vdc, int start, int end)
2356 {
2357 	vdc_local_desc_t	*ldep = NULL;	/* Local Dring Entry Pointer */
2358 	vd_dring_entry_t	*dep = NULL;	/* Dring Entry Pointer */
2359 	int			status = ENXIO;
2360 	int			idx = -1;
2361 
2362 	ASSERT(vdc != NULL);
2363 	ASSERT(start >= 0);
2364 	ASSERT(start <= VD_DRING_LEN);
2365 	ASSERT(start >= -1);
2366 	ASSERT(start <= VD_DRING_LEN);
2367 
2368 	idx = start;
2369 	ldep = &vdc->local_dring[idx];
2370 	ASSERT(ldep != NULL);
2371 	dep = ldep->dep;
2372 	ASSERT(dep != NULL);
2373 
2374 	PR0("%s[%d] DRING entry=%d status=%d\n", __func__, vdc->instance,
2375 			idx, VIO_GET_DESC_STATE(dep->hdr.dstate));
2376 	while (VIO_GET_DESC_STATE(dep->hdr.dstate) == VIO_DESC_DONE) {
2377 		if ((end != -1) && (idx > end))
2378 			return (0);
2379 
2380 		switch (ldep->operation) {
2381 		case VD_OP_BREAD:
2382 		case VD_OP_BWRITE:
2383 			/* call bioxxx */
2384 			break;
2385 		default:
2386 			/* signal waiter */
2387 			break;
2388 		}
2389 
2390 		/* Clear the DRing entry */
2391 		status = vdc_depopulate_descriptor(vdc, idx);
2392 		PR0("%s[%d] Status=%d\n", __func__, vdc->instance, status);
2393 
2394 		/* loop accounting to get next DRing entry */
2395 		idx++;
2396 		ldep = &vdc->local_dring[idx];
2397 		dep = ldep->dep;
2398 	}
2399 
2400 	return (status);
2401 }
2402 
2403 /*
2404  * Function:
2405  *	vdc_depopulate_descriptor()
2406  *
2407  * Description:
2408  *
2409  * Arguments:
2410  *	vdc	- soft state pointer for this instance of the device driver.
2411  *	idx	- Index of the Descriptor Ring entry being modified
2412  *
2413  * Return Code:
2414  *	0	- Success
2415  */
2416 static int
2417 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx)
2418 {
2419 	vd_dring_entry_t *dep = NULL;		/* Dring Entry Pointer */
2420 	vdc_local_desc_t *ldep = NULL;		/* Local Dring Entry Pointer */
2421 	int		status = ENXIO;
2422 	int		operation;
2423 	int		rv = 0;
2424 
2425 	ASSERT(vdc != NULL);
2426 	ASSERT(idx < VD_DRING_LEN);
2427 	ldep = &vdc->local_dring[idx];
2428 	ASSERT(ldep != NULL);
2429 	dep = ldep->dep;
2430 	ASSERT(dep != NULL);
2431 
2432 	status = dep->payload.status;
2433 	operation = dep->payload.operation;
2434 	VDC_MARK_DRING_ENTRY_FREE(vdc, idx);
2435 	ldep = &vdc->local_dring[idx];
2436 	VIO_SET_DESC_STATE(ldep->flags, VIO_DESC_FREE);
2437 
2438 	/* the DKIO W$ operations never bind handles so we can return now */
2439 	if ((operation == VD_OP_FLUSH) ||
2440 	    (operation == VD_OP_GET_WCE) ||
2441 	    (operation == VD_OP_SET_WCE))
2442 		return (status);
2443 
2444 	/*
2445 	 * If the upper layer passed in a misaligned address we copied the
2446 	 * data into an aligned buffer before sending it to LDC - we now
2447 	 * copy it back to the original buffer.
2448 	 */
2449 	if (ldep->align_addr) {
2450 		ASSERT(ldep->addr != NULL);
2451 		ASSERT(dep->payload.nbytes > 0);
2452 
2453 		bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes);
2454 		kmem_free(ldep->align_addr,
2455 				sizeof (caddr_t) * dep->payload.nbytes);
2456 		ldep->align_addr = NULL;
2457 	}
2458 
2459 	rv = ldc_mem_unbind_handle(ldep->desc_mhdl);
2460 	if (rv != 0) {
2461 		cmn_err(CE_NOTE, "[%d] unbind mem hdl 0x%lx @ idx %d failed:%d",
2462 				vdc->instance, ldep->desc_mhdl, idx, rv);
2463 		/*
2464 		 * The error returned by the vDisk server is more informative
2465 		 * and thus has a higher priority but if it isn't set we ensure
2466 		 * that this function returns an error.
2467 		 */
2468 		if (status == 0)
2469 			status = EINVAL;
2470 	}
2471 
2472 	return (status);
2473 }
2474 
2475 /*
2476  * Function:
2477  *	vdc_populate_mem_hdl()
2478  *
2479  * Description:
2480  *
2481  * Arguments:
2482  *	vdc	- soft state pointer for this instance of the device driver.
2483  *	idx	- Index of the Descriptor Ring entry being modified
2484  *	addr	- virtual address being mapped in
2485  *	nybtes	- number of bytes in 'addr'
2486  *	operation - the vDisk operation being performed (VD_OP_xxx)
2487  *
2488  * Return Code:
2489  *	0	- Success
2490  */
2491 static int
2492 vdc_populate_mem_hdl(vdc_t *vdc, uint_t idx, caddr_t addr, size_t nbytes,
2493 			int operation)
2494 {
2495 	vd_dring_entry_t	*dep = NULL;
2496 	vdc_local_desc_t	*ldep = NULL;
2497 	ldc_mem_handle_t	mhdl;
2498 	caddr_t			vaddr;
2499 	int			perm = LDC_MEM_RW;
2500 	int			rv = 0;
2501 	int			i;
2502 
2503 	ASSERT(vdc != NULL);
2504 	ASSERT(idx < VD_DRING_LEN);
2505 
2506 	dep = VDC_GET_DRING_ENTRY_PTR(vdc, idx);
2507 	ldep = &vdc->local_dring[idx];
2508 	mhdl = ldep->desc_mhdl;
2509 
2510 	switch (operation) {
2511 	case VD_OP_BREAD:
2512 		perm = LDC_MEM_W;
2513 		break;
2514 
2515 	case VD_OP_BWRITE:
2516 		perm = LDC_MEM_R;
2517 		break;
2518 
2519 	case VD_OP_GET_VTOC:
2520 	case VD_OP_SET_VTOC:
2521 	case VD_OP_GET_DISKGEOM:
2522 	case VD_OP_SET_DISKGEOM:
2523 	case VD_OP_SCSICMD:
2524 		perm = LDC_MEM_RW;
2525 		break;
2526 
2527 	default:
2528 		ASSERT(0);	/* catch bad programming in vdc */
2529 	}
2530 
2531 	/*
2532 	 * LDC expects any addresses passed in to be 8-byte aligned. We need
2533 	 * to copy the contents of any misaligned buffers to a newly allocated
2534 	 * buffer and bind it instead (and copy the the contents back to the
2535 	 * original buffer passed in when depopulating the descriptor)
2536 	 */
2537 	vaddr = addr;
2538 	if (((uint64_t)addr & 0x7) != 0) {
2539 		ldep->align_addr =
2540 			kmem_zalloc(sizeof (caddr_t) * nbytes, KM_SLEEP);
2541 		PR0("%s[%d] Misaligned address %lx reallocating "
2542 		    "(buf=%lx entry=%d)\n",
2543 		    __func__, vdc->instance, addr, ldep->align_addr, idx);
2544 		bcopy(addr, ldep->align_addr, nbytes);
2545 		vaddr = ldep->align_addr;
2546 	}
2547 
2548 	rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8),
2549 		vdc->dring_mem_info.mtype, perm, &dep->payload.cookie[0],
2550 		&dep->payload.ncookies);
2551 	PR1("%s[%d] bound mem handle; ncookies=%d\n",
2552 			__func__, vdc->instance, dep->payload.ncookies);
2553 	if (rv != 0) {
2554 		vdc_msg("%s[%d] failed to ldc_mem_bind_handle "
2555 		    "(mhdl=%lx, buf=%lx entry=%d err=%d)\n",
2556 		    __func__, vdc->instance, mhdl, addr, idx, rv);
2557 		if (ldep->align_addr) {
2558 			kmem_free(ldep->align_addr,
2559 					sizeof (caddr_t) * dep->payload.nbytes);
2560 			ldep->align_addr = NULL;
2561 		}
2562 		return (EAGAIN);
2563 	}
2564 
2565 	/*
2566 	 * Get the other cookies (if any).
2567 	 */
2568 	for (i = 1; i < dep->payload.ncookies; i++) {
2569 		rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]);
2570 		if (rv != 0) {
2571 			(void) ldc_mem_unbind_handle(mhdl);
2572 			vdc_msg("%s: failed to get next cookie(mhdl=%lx "
2573 				"cnum=%d), err=%d", __func__, mhdl, i, rv);
2574 			if (ldep->align_addr) {
2575 				kmem_free(ldep->align_addr,
2576 					sizeof (caddr_t) * dep->payload.nbytes);
2577 				ldep->align_addr = NULL;
2578 			}
2579 			return (EAGAIN);
2580 		}
2581 	}
2582 
2583 	return (rv);
2584 }
2585 
2586 /*
2587  * Interrupt handlers for messages from LDC
2588  */
2589 
2590 /*
2591  * Function:
2592  *	vdc_handle_cb()
2593  *
2594  * Description:
2595  *
2596  * Arguments:
2597  *	event	- Type of event (LDC_EVT_xxx) that triggered the callback
2598  *	arg	- soft state pointer for this instance of the device driver.
2599  *
2600  * Return Code:
2601  *	0	- Success
2602  */
2603 static uint_t
2604 vdc_handle_cb(uint64_t event, caddr_t arg)
2605 {
2606 	ldc_status_t	ldc_state;
2607 	int		rv = 0;
2608 
2609 	vdc_t	*vdc = (vdc_t *)(void *)arg;
2610 
2611 	ASSERT(vdc != NULL);
2612 
2613 	PR1("%s[%d] event=%x seqID=%d\n",
2614 			__func__, vdc->instance, event, vdc->seq_num);
2615 
2616 	/*
2617 	 * Depending on the type of event that triggered this callback,
2618 	 * we modify the handhske state or read the data.
2619 	 *
2620 	 * NOTE: not done as a switch() as event could be triggered by
2621 	 * a state change and a read request. Also the ordering	of the
2622 	 * check for the event types is deliberate.
2623 	 */
2624 	if (event & LDC_EVT_UP) {
2625 		PR0("%s[%d] Received LDC_EVT_UP\n", __func__, vdc->instance);
2626 
2627 		/* get LDC state */
2628 		rv = ldc_status(vdc->ldc_handle, &ldc_state);
2629 		if (rv != 0) {
2630 			cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d",
2631 					vdc->instance, rv);
2632 			mutex_enter(&vdc->lock);
2633 			vdc_reset_connection(vdc, B_TRUE);
2634 			mutex_exit(&vdc->lock);
2635 			return (LDC_SUCCESS);
2636 		}
2637 
2638 		/*
2639 		 * Reset the transaction sequence numbers when LDC comes up.
2640 		 * We then kick off the handshake negotiation with the vDisk
2641 		 * server.
2642 		 */
2643 		mutex_enter(&vdc->lock);
2644 		vdc->seq_num = 1;
2645 		vdc->seq_num_reply = 0;
2646 		vdc->ldc_state = ldc_state;
2647 		ASSERT(ldc_state == LDC_UP);
2648 		mutex_exit(&vdc->lock);
2649 
2650 		vdc_init_handshake_negotiation(vdc);
2651 
2652 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2653 	}
2654 
2655 	if (event & LDC_EVT_READ) {
2656 		/*
2657 		 * Wake up the worker thread to process the message
2658 		 */
2659 		mutex_enter(&vdc->msg_proc_lock);
2660 		vdc->msg_pending = B_TRUE;
2661 		cv_signal(&vdc->msg_proc_cv);
2662 		mutex_exit(&vdc->msg_proc_lock);
2663 
2664 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
2665 
2666 		/* that's all we have to do - no need to handle DOWN/RESET */
2667 		return (LDC_SUCCESS);
2668 	}
2669 
2670 	if (event & LDC_EVT_RESET) {
2671 		PR0("%s[%d] Recvd LDC RESET event\n", __func__, vdc->instance);
2672 
2673 		/* get LDC state */
2674 		rv = ldc_status(vdc->ldc_handle, &ldc_state);
2675 		if (rv != 0) {
2676 			cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d",
2677 					vdc->instance, rv);
2678 			ldc_state = LDC_OPEN;
2679 		}
2680 		mutex_enter(&vdc->lock);
2681 		vdc->ldc_state = ldc_state;
2682 		vdc_reset_connection(vdc, B_FALSE);
2683 		mutex_exit(&vdc->lock);
2684 
2685 		vdc_init_handshake_negotiation(vdc);
2686 	}
2687 
2688 	if (event & LDC_EVT_DOWN) {
2689 		PR0("%s[%d] Recvd LDC DOWN event\n", __func__, vdc->instance);
2690 
2691 		/* get LDC state */
2692 		rv = ldc_status(vdc->ldc_handle, &ldc_state);
2693 		if (rv != 0) {
2694 			cmn_err(CE_NOTE, "[%d] Couldn't get LDC status %d",
2695 					vdc->instance, rv);
2696 			ldc_state = LDC_OPEN;
2697 		}
2698 		mutex_enter(&vdc->lock);
2699 		vdc->ldc_state = ldc_state;
2700 		vdc_reset_connection(vdc, B_TRUE);
2701 		mutex_exit(&vdc->lock);
2702 	}
2703 
2704 	if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ))
2705 		cmn_err(CE_NOTE, "![%d] Unexpected LDC event (%lx) received",
2706 				vdc->instance, event);
2707 
2708 	return (LDC_SUCCESS);
2709 }
2710 
2711 /* -------------------------------------------------------------------------- */
2712 
2713 /*
2714  * The following functions process the incoming messages from vds
2715  */
2716 
2717 
2718 /*
2719  * Function:
2720  *	vdc_process_msg_thread()
2721  *
2722  * Description:
2723  *
2724  * Arguments:
2725  *	vdc	- soft state pointer for this instance of the device driver.
2726  *
2727  * Return Code:
2728  *	None
2729  */
2730 static void
2731 vdc_process_msg_thread(vdc_t *vdc)
2732 {
2733 	int		status = 0;
2734 	boolean_t	q_is_empty = B_TRUE;
2735 
2736 	ASSERT(vdc != NULL);
2737 
2738 	mutex_enter(&vdc->msg_proc_lock);
2739 	PR0("%s[%d]: Starting\n", __func__, vdc->instance);
2740 
2741 	vdc->msg_proc_thr_state = VDC_THR_RUNNING;
2742 
2743 	while (vdc->msg_proc_thr_state == VDC_THR_RUNNING) {
2744 
2745 		PR1("%s[%d] Waiting\n", __func__, vdc->instance);
2746 		while (!vdc->msg_pending)
2747 			cv_wait(&vdc->msg_proc_cv, &vdc->msg_proc_lock);
2748 
2749 		PR1("%s[%d] Message Received\n", __func__, vdc->instance);
2750 
2751 		/* check if there is data */
2752 		status = ldc_chkq(vdc->ldc_handle, &q_is_empty);
2753 		if ((status != 0) &&
2754 		    (vdc->msg_proc_thr_state == VDC_THR_RUNNING)) {
2755 			cmn_err(CE_NOTE, "[%d] Unable to communicate with vDisk"
2756 					" server. Cannot check LDC queue: %d",
2757 					vdc->instance, status);
2758 			mutex_enter(&vdc->lock);
2759 			vdc_reset_connection(vdc, B_FALSE);
2760 			mutex_exit(&vdc->lock);
2761 			vdc->msg_proc_thr_state = VDC_THR_STOP;
2762 			continue;
2763 		}
2764 
2765 		if (!q_is_empty) {
2766 			PR1("%s: new pkt(s) available\n", __func__);
2767 			vdc_process_msg(vdc);
2768 		}
2769 
2770 		vdc->msg_pending = B_FALSE;
2771 	}
2772 
2773 	PR0("Message processing thread stopped\n");
2774 	vdc->msg_pending = B_FALSE;
2775 	vdc->msg_proc_thr_state = VDC_THR_DONE;
2776 	cv_signal(&vdc->msg_proc_cv);
2777 	mutex_exit(&vdc->msg_proc_lock);
2778 	thread_exit();
2779 }
2780 
2781 
2782 /*
2783  * Function:
2784  *	vdc_process_msg()
2785  *
2786  * Description:
2787  *	This function is called by the message processing thread each time it
2788  *	is triggered when LDC sends an interrupt to indicate that there are
2789  *	more packets on the queue. When it is called it will continue to loop
2790  *	and read the messages until there are no more left of the queue. If it
2791  *	encounters an invalid sized message it will drop it and check the next
2792  *	message.
2793  *
2794  * Arguments:
2795  *	arg	- soft state pointer for this instance of the device driver.
2796  *
2797  * Return Code:
2798  *	None.
2799  */
2800 static void
2801 vdc_process_msg(void *arg)
2802 {
2803 	vdc_t		*vdc = (vdc_t *)(void *)arg;
2804 	vio_msg_t	vio_msg;
2805 	size_t		nbytes = sizeof (vio_msg);
2806 	int		status;
2807 
2808 	ASSERT(vdc != NULL);
2809 
2810 	mutex_enter(&vdc->lock);
2811 
2812 	PR1("%s\n", __func__);
2813 
2814 	for (;;) {
2815 
2816 		/* read all messages - until no more left */
2817 		status = ldc_read(vdc->ldc_handle, (caddr_t)&vio_msg, &nbytes);
2818 
2819 		if (status) {
2820 			vdc_msg("%s: ldc_read() failed = %d", __func__, status);
2821 
2822 			/* if status is ECONNRESET --- reset vdc state */
2823 			if (status == EIO || status == ECONNRESET) {
2824 				vdc_reset_connection(vdc, B_TRUE);
2825 			}
2826 
2827 			mutex_exit(&vdc->lock);
2828 			return;
2829 		}
2830 
2831 		if ((nbytes > 0) && (nbytes < sizeof (vio_msg_tag_t))) {
2832 			cmn_err(CE_CONT, "![%d] Expect %lu bytes; recv'd %lu\n",
2833 				vdc->instance, sizeof (vio_msg_tag_t), nbytes);
2834 			mutex_exit(&vdc->lock);
2835 			return;
2836 		}
2837 
2838 		if (nbytes == 0) {
2839 			PR2("%s[%d]: ldc_read() done..\n",
2840 					__func__, vdc->instance);
2841 			mutex_exit(&vdc->lock);
2842 			return;
2843 		}
2844 
2845 		PR1("%s[%d] (%x/%x/%x)\n", __func__, vdc->instance,
2846 		    vio_msg.tag.vio_msgtype,
2847 		    vio_msg.tag.vio_subtype,
2848 		    vio_msg.tag.vio_subtype_env);
2849 
2850 		/*
2851 		 * Verify the Session ID of the message
2852 		 *
2853 		 * Every message after the Version has been negotiated should
2854 		 * have the correct session ID set.
2855 		 */
2856 		if ((vio_msg.tag.vio_sid != vdc->session_id) &&
2857 		    (vio_msg.tag.vio_subtype_env != VIO_VER_INFO)) {
2858 			cmn_err(CE_NOTE, "[%d] Invalid SID 0x%x, expect 0x%lx",
2859 				vdc->instance, vio_msg.tag.vio_sid,
2860 				vdc->session_id);
2861 			vdc_reset_connection(vdc, B_FALSE);
2862 			mutex_exit(&vdc->lock);
2863 			return;
2864 		}
2865 
2866 		switch (vio_msg.tag.vio_msgtype) {
2867 		case VIO_TYPE_CTRL:
2868 			status = vdc_process_ctrl_msg(vdc, vio_msg);
2869 			break;
2870 		case VIO_TYPE_DATA:
2871 			status = vdc_process_data_msg(vdc, vio_msg);
2872 			break;
2873 		case VIO_TYPE_ERR:
2874 			status = vdc_process_err_msg(vdc, vio_msg);
2875 			break;
2876 		default:
2877 			PR1("%s", __func__);
2878 			status = EINVAL;
2879 			break;
2880 		}
2881 
2882 		if (status != 0) {
2883 			PR0("%s[%d] Error (%d) occcurred processing msg\n",
2884 					__func__, vdc->instance, status);
2885 			vdc_reset_connection(vdc, B_FALSE);
2886 		}
2887 	}
2888 	_NOTE(NOTREACHED)
2889 }
2890 
2891 /*
2892  * Function:
2893  *	vdc_process_ctrl_msg()
2894  *
2895  * Description:
2896  *	This function is called by the message processing thread each time
2897  *	an LDC message with a msgtype of VIO_TYPE_CTRL is received.
2898  *
2899  * Arguments:
2900  *	vdc	- soft state pointer for this instance of the device driver.
2901  *	msg	- the LDC message sent by vds
2902  *
2903  * Return Codes:
2904  *	0	- Success.
2905  *	EPROTO	- A message was received which shouldn't have happened according
2906  *		  to the protocol
2907  *	ENOTSUP	- An action which is allowed according to the protocol but which
2908  *		  isn't (or doesn't need to be) implemented yet.
2909  *	EINVAL	- An invalid value was returned as part of a message.
2910  */
2911 static int
2912 vdc_process_ctrl_msg(vdc_t *vdc, vio_msg_t msg)
2913 {
2914 	int			status = -1;
2915 
2916 	ASSERT(msg.tag.vio_msgtype == VIO_TYPE_CTRL);
2917 	ASSERT(vdc != NULL);
2918 	ASSERT(mutex_owned(&vdc->lock));
2919 
2920 	/* Depending on which state we are in; process the message */
2921 	switch (vdc->state) {
2922 	case VD_STATE_INIT:
2923 		status = vdc_handle_ver_msg(vdc, (vio_ver_msg_t *)&msg);
2924 		break;
2925 
2926 	case VD_STATE_VER:
2927 		status = vdc_handle_attr_msg(vdc, (vd_attr_msg_t *)&msg);
2928 		break;
2929 
2930 	case VD_STATE_ATTR:
2931 		status = vdc_handle_dring_reg_msg(vdc,
2932 				(vio_dring_reg_msg_t *)&msg);
2933 		break;
2934 
2935 	case VD_STATE_RDX:
2936 		if (msg.tag.vio_subtype_env != VIO_RDX) {
2937 			status = EPROTO;
2938 			break;
2939 		}
2940 
2941 		PR0("%s: Received RDX - handshake successful\n", __func__);
2942 
2943 		vdc->hshake_cnt = 0;	/* reset failed handshake count */
2944 		status = 0;
2945 		vdc->state = VD_STATE_DATA;
2946 
2947 		cv_broadcast(&vdc->attach_cv);
2948 		break;
2949 
2950 	case VD_STATE_DATA:
2951 	default:
2952 		cmn_err(CE_NOTE, "[%d] Unexpected handshake state %d",
2953 				vdc->instance, vdc->state);
2954 		status = EPROTO;
2955 		break;
2956 	}
2957 
2958 	return (status);
2959 }
2960 
2961 
2962 /*
2963  * Function:
2964  *	vdc_process_data_msg()
2965  *
2966  * Description:
2967  *	This function is called by the message processing thread each time
2968  *	a message with a msgtype of VIO_TYPE_DATA is received. It will either
2969  *	be an ACK or NACK from vds[1] which vdc handles as follows.
2970  *		ACK	- wake up the waiting thread
2971  *		NACK	- resend any messages necessary
2972  *
2973  *	[1] Although the message format allows it, vds should not send a
2974  *	    VIO_SUBTYPE_INFO message to vdc asking it to read data; if for
2975  *	    some bizarre reason it does, vdc will reset the connection.
2976  *
2977  * Arguments:
2978  *	vdc	- soft state pointer for this instance of the device driver.
2979  *	msg	- the LDC message sent by vds
2980  *
2981  * Return Code:
2982  *	0	- Success.
2983  *	> 0	- error value returned by LDC
2984  */
2985 static int
2986 vdc_process_data_msg(vdc_t *vdc, vio_msg_t msg)
2987 {
2988 	int			status = 0;
2989 	vdc_local_desc_t	*local_dep = NULL;
2990 	vio_dring_msg_t		*dring_msg = NULL;
2991 	uint_t			num_msgs;
2992 	uint_t			start;
2993 	uint_t			end;
2994 	uint_t			i;
2995 
2996 	ASSERT(msg.tag.vio_msgtype == VIO_TYPE_DATA);
2997 	ASSERT(vdc != NULL);
2998 	ASSERT(mutex_owned(&vdc->lock));
2999 
3000 	dring_msg = (vio_dring_msg_t *)&msg;
3001 
3002 	/*
3003 	 * Check to see if the message has bogus data
3004 	 */
3005 	start = dring_msg->start_idx;
3006 	end = dring_msg->end_idx;
3007 	if ((start >= VD_DRING_LEN) || (end >= VD_DRING_LEN)) {
3008 		vdc_msg("%s: Bogus ACK data : start %d, end %d\n",
3009 			__func__, start, end);
3010 		return (EPROTO);
3011 	}
3012 
3013 	/*
3014 	 * calculate the number of messages that vds ACK'ed
3015 	 *
3016 	 * Assumes, (like the rest of vdc) that there is a 1:1 mapping
3017 	 * between requests and Dring entries.
3018 	 */
3019 	num_msgs = (end >= start) ?
3020 			(end - start + 1) :
3021 			(VD_DRING_LEN - start + end + 1);
3022 
3023 	/*
3024 	 * Verify that the sequence number is what vdc expects.
3025 	 */
3026 	if (!vdc_verify_seq_num(vdc, dring_msg, num_msgs)) {
3027 		return (ENXIO);
3028 	}
3029 
3030 	/*
3031 	 * Wake the thread waiting for each DRing entry ACK'ed
3032 	 */
3033 	for (i = 0; i < num_msgs; i++) {
3034 		int idx = (start + i) % VD_DRING_LEN;
3035 
3036 		local_dep = &vdc->local_dring[idx];
3037 		mutex_enter(&local_dep->lock);
3038 		cv_signal(&local_dep->cv);
3039 		mutex_exit(&local_dep->lock);
3040 	}
3041 
3042 	if (msg.tag.vio_subtype == VIO_SUBTYPE_NACK) {
3043 		PR0("%s: DATA NACK\n", __func__);
3044 		VDC_DUMP_DRING_MSG(dring_msg);
3045 		vdc_reset_connection(vdc, B_FALSE);
3046 
3047 		/* we need to drop the lock to trigger the handshake */
3048 		mutex_exit(&vdc->lock);
3049 		vdc_init_handshake_negotiation(vdc);
3050 		mutex_enter(&vdc->lock);
3051 	} else if (msg.tag.vio_subtype == VIO_SUBTYPE_INFO) {
3052 		status = EPROTO;
3053 	}
3054 
3055 	return (status);
3056 }
3057 
3058 /*
3059  * Function:
3060  *	vdc_process_err_msg()
3061  *
3062  * NOTE: No error messages are used as part of the vDisk protocol
3063  */
3064 static int
3065 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg)
3066 {
3067 	_NOTE(ARGUNUSED(vdc))
3068 	_NOTE(ARGUNUSED(msg))
3069 
3070 	ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR);
3071 	cmn_err(CE_NOTE, "[%d] Got an ERR msg", vdc->instance);
3072 
3073 	return (ENOTSUP);
3074 }
3075 
3076 /*
3077  * Function:
3078  *	vdc_handle_ver_msg()
3079  *
3080  * Description:
3081  *
3082  * Arguments:
3083  *	vdc	- soft state pointer for this instance of the device driver.
3084  *	ver_msg	- LDC message sent by vDisk server
3085  *
3086  * Return Code:
3087  *	0	- Success
3088  */
3089 static int
3090 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg)
3091 {
3092 	int status = 0;
3093 
3094 	ASSERT(vdc != NULL);
3095 	ASSERT(mutex_owned(&vdc->lock));
3096 
3097 	if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) {
3098 		return (EPROTO);
3099 	}
3100 
3101 	if (ver_msg->dev_class != VDEV_DISK_SERVER) {
3102 		return (EINVAL);
3103 	}
3104 
3105 	switch (ver_msg->tag.vio_subtype) {
3106 	case VIO_SUBTYPE_ACK:
3107 		/*
3108 		 * We check to see if the version returned is indeed supported
3109 		 * (The server may have also adjusted the minor number downwards
3110 		 * and if so 'ver_msg' will contain the actual version agreed)
3111 		 */
3112 		if (vdc_is_supported_version(ver_msg)) {
3113 			vdc->ver.major = ver_msg->ver_major;
3114 			vdc->ver.minor = ver_msg->ver_minor;
3115 			ASSERT(vdc->ver.major > 0);
3116 
3117 			vdc->state = VD_STATE_VER;
3118 			status = vdc_init_attr_negotiation(vdc);
3119 		} else {
3120 			status = EPROTO;
3121 		}
3122 		break;
3123 
3124 	case VIO_SUBTYPE_NACK:
3125 		/*
3126 		 * call vdc_is_supported_version() which will return the next
3127 		 * supported version (if any) in 'ver_msg'
3128 		 */
3129 		(void) vdc_is_supported_version(ver_msg);
3130 		if (ver_msg->ver_major > 0) {
3131 			size_t len = sizeof (*ver_msg);
3132 
3133 			ASSERT(vdc->ver.major > 0);
3134 
3135 			/* reset the necessary fields and resend */
3136 			ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO;
3137 			ver_msg->dev_class = VDEV_DISK;
3138 
3139 			status = vdc_send(vdc, (caddr_t)ver_msg, &len);
3140 			PR0("[%d] Resend VER info (LDC status = %d)\n",
3141 					vdc->instance, status);
3142 			if (len != sizeof (*ver_msg))
3143 				status = EBADMSG;
3144 		} else {
3145 			cmn_err(CE_NOTE, "[%d] No common version with "
3146 					"vDisk server", vdc->instance);
3147 			status = ENOTSUP;
3148 		}
3149 
3150 		break;
3151 	case VIO_SUBTYPE_INFO:
3152 		/*
3153 		 * Handle the case where vds starts handshake
3154 		 * (for now only vdc is the instigatior)
3155 		 */
3156 		status = ENOTSUP;
3157 		break;
3158 
3159 	default:
3160 		status = EINVAL;
3161 		break;
3162 	}
3163 
3164 	return (status);
3165 }
3166 
3167 /*
3168  * Function:
3169  *	vdc_handle_attr_msg()
3170  *
3171  * Description:
3172  *
3173  * Arguments:
3174  *	vdc	- soft state pointer for this instance of the device driver.
3175  *	attr_msg	- LDC message sent by vDisk server
3176  *
3177  * Return Code:
3178  *	0	- Success
3179  */
3180 static int
3181 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg)
3182 {
3183 	int status = 0;
3184 
3185 	ASSERT(vdc != NULL);
3186 	ASSERT(mutex_owned(&vdc->lock));
3187 
3188 	if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) {
3189 		return (EPROTO);
3190 	}
3191 
3192 	switch (attr_msg->tag.vio_subtype) {
3193 	case VIO_SUBTYPE_ACK:
3194 		/*
3195 		 * We now verify the attributes sent by vds.
3196 		 */
3197 		vdc->vdisk_size = attr_msg->vdisk_size;
3198 		vdc->vdisk_type = attr_msg->vdisk_type;
3199 
3200 		if ((attr_msg->max_xfer_sz != vdc->max_xfer_sz) ||
3201 		    (attr_msg->vdisk_block_size != vdc->block_size)) {
3202 			/*
3203 			 * Future support: step down to the block size
3204 			 * and max transfer size suggested by the
3205 			 * server. (If this value is less than 128K
3206 			 * then multiple Dring entries per request
3207 			 * would need to be implemented)
3208 			 */
3209 			cmn_err(CE_NOTE, "[%d] Couldn't process block "
3210 				"attributes from vds", vdc->instance);
3211 			status = EINVAL;
3212 			break;
3213 		}
3214 
3215 		if ((attr_msg->xfer_mode != VIO_DRING_MODE) ||
3216 		    (attr_msg->vdisk_size > INT64_MAX) ||
3217 		    (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) {
3218 			vdc_msg("%s[%d] Couldn't process attrs "
3219 			    "from vds", __func__, vdc->instance);
3220 			status = EINVAL;
3221 			break;
3222 		}
3223 
3224 		vdc->state = VD_STATE_ATTR;
3225 		status = vdc_init_dring_negotiate(vdc);
3226 		break;
3227 
3228 	case VIO_SUBTYPE_NACK:
3229 		/*
3230 		 * vds could not handle the attributes we sent so we
3231 		 * stop negotiating.
3232 		 */
3233 		status = EPROTO;
3234 		break;
3235 
3236 	case VIO_SUBTYPE_INFO:
3237 		/*
3238 		 * Handle the case where vds starts the handshake
3239 		 * (for now; vdc is the only supported instigatior)
3240 		 */
3241 		status = ENOTSUP;
3242 		break;
3243 
3244 	default:
3245 		status = ENOTSUP;
3246 		break;
3247 	}
3248 
3249 	return (status);
3250 }
3251 
3252 /*
3253  * Function:
3254  *	vdc_handle_dring_reg_msg()
3255  *
3256  * Description:
3257  *
3258  * Arguments:
3259  *	vdc		- soft state pointer for this instance of the driver.
3260  *	dring_msg	- LDC message sent by vDisk server
3261  *
3262  * Return Code:
3263  *	0	- Success
3264  */
3265 static int
3266 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg)
3267 {
3268 	int		status = 0;
3269 	vio_rdx_msg_t	msg = {0};
3270 	size_t		msglen = sizeof (msg);
3271 
3272 	ASSERT(vdc != NULL);
3273 	ASSERT(mutex_owned(&vdc->lock));
3274 
3275 	if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) {
3276 		return (EPROTO);
3277 	}
3278 
3279 	switch (dring_msg->tag.vio_subtype) {
3280 	case VIO_SUBTYPE_ACK:
3281 		/* save the received dring_ident */
3282 		vdc->dring_ident = dring_msg->dring_ident;
3283 		PR0("%s[%d] Received dring ident=0x%lx\n",
3284 			__func__, vdc->instance, vdc->dring_ident);
3285 
3286 		/*
3287 		 * Send an RDX message to vds to indicate we are ready
3288 		 * to send data
3289 		 */
3290 		msg.tag.vio_msgtype = VIO_TYPE_CTRL;
3291 		msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
3292 		msg.tag.vio_subtype_env = VIO_RDX;
3293 		msg.tag.vio_sid = vdc->session_id;
3294 		status = vdc_send(vdc, (caddr_t)&msg, &msglen);
3295 		if (status != 0) {
3296 			cmn_err(CE_NOTE, "[%d] Failed to send RDX"
3297 				" message (%d)", vdc->instance, status);
3298 			break;
3299 		}
3300 
3301 		vdc->state = VD_STATE_RDX;
3302 		break;
3303 
3304 	case VIO_SUBTYPE_NACK:
3305 		/*
3306 		 * vds could not handle the DRing info we sent so we
3307 		 * stop negotiating.
3308 		 */
3309 		cmn_err(CE_CONT, "server could not register DRing\n");
3310 		vdc_reset_connection(vdc, B_FALSE);
3311 		vdc_destroy_descriptor_ring(vdc);
3312 		status = EPROTO;
3313 		break;
3314 
3315 	case VIO_SUBTYPE_INFO:
3316 		/*
3317 		 * Handle the case where vds starts handshake
3318 		 * (for now only vdc is the instigatior)
3319 		 */
3320 		status = ENOTSUP;
3321 		break;
3322 	default:
3323 		status = ENOTSUP;
3324 	}
3325 
3326 	return (status);
3327 }
3328 
3329 /*
3330  * Function:
3331  *	vdc_verify_seq_num()
3332  *
3333  * Description:
3334  *	This functions verifies that the sequence number sent back by vds with
3335  *	the latest message correctly follows the last request processed.
3336  *
3337  * Arguments:
3338  *	vdc		- soft state pointer for this instance of the driver.
3339  *	dring_msg	- pointer to the LDC message sent by vds
3340  *	num_msgs	- the number of requests being acknowledged
3341  *
3342  * Return Code:
3343  *	B_TRUE	- Success.
3344  *	B_FALSE	- The seq numbers are so out of sync, vdc cannot deal with them
3345  */
3346 static boolean_t
3347 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg, int num_msgs)
3348 {
3349 	ASSERT(vdc != NULL);
3350 	ASSERT(dring_msg != NULL);
3351 
3352 	/*
3353 	 * Check to see if the messages were responded to in the correct
3354 	 * order by vds. There are 3 possible scenarios:
3355 	 *	- the seq_num we expected is returned (everything is OK)
3356 	 *	- a seq_num earlier than the last one acknowledged is returned,
3357 	 *	  if so something is seriously wrong so we reset the connection
3358 	 *	- a seq_num greater than what we expected is returned.
3359 	 */
3360 	if (dring_msg->seq_num != (vdc->seq_num_reply + num_msgs)) {
3361 		vdc_msg("%s[%d]: Bogus seq_num %d, expected %d\n",
3362 			__func__, vdc->instance, dring_msg->seq_num,
3363 			vdc->seq_num_reply + num_msgs);
3364 		if (dring_msg->seq_num < (vdc->seq_num_reply + num_msgs)) {
3365 			return (B_FALSE);
3366 		} else {
3367 			/*
3368 			 * vds has responded with a seq_num greater than what we
3369 			 * expected
3370 			 */
3371 			return (B_FALSE);
3372 		}
3373 	}
3374 	vdc->seq_num_reply += num_msgs;
3375 
3376 	return (B_TRUE);
3377 }
3378 
3379 
3380 /*
3381  * Function:
3382  *	vdc_is_supported_version()
3383  *
3384  * Description:
3385  *	This routine checks if the major/minor version numbers specified in
3386  *	'ver_msg' are supported. If not it finds the next version that is
3387  *	in the supported version list 'vdc_version[]' and sets the fields in
3388  *	'ver_msg' to those values
3389  *
3390  * Arguments:
3391  *	ver_msg	- LDC message sent by vDisk server
3392  *
3393  * Return Code:
3394  *	B_TRUE	- Success
3395  *	B_FALSE	- Version not supported
3396  */
3397 static boolean_t
3398 vdc_is_supported_version(vio_ver_msg_t *ver_msg)
3399 {
3400 	int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]);
3401 
3402 	for (int i = 0; i < vdc_num_versions; i++) {
3403 		ASSERT(vdc_version[i].major > 0);
3404 		ASSERT((i == 0) ||
3405 		    (vdc_version[i].major < vdc_version[i-1].major));
3406 
3407 		/*
3408 		 * If the major versions match, adjust the minor version, if
3409 		 * necessary, down to the highest value supported by this
3410 		 * client. The server should support all minor versions lower
3411 		 * than the value it sent
3412 		 */
3413 		if (ver_msg->ver_major == vdc_version[i].major) {
3414 			if (ver_msg->ver_minor > vdc_version[i].minor) {
3415 				PR0("Adjusting minor version from %u to %u",
3416 				    ver_msg->ver_minor, vdc_version[i].minor);
3417 				ver_msg->ver_minor = vdc_version[i].minor;
3418 			}
3419 			return (B_TRUE);
3420 		}
3421 
3422 		/*
3423 		 * If the message contains a higher major version number, set
3424 		 * the message's major/minor versions to the current values
3425 		 * and return false, so this message will get resent with
3426 		 * these values, and the server will potentially try again
3427 		 * with the same or a lower version
3428 		 */
3429 		if (ver_msg->ver_major > vdc_version[i].major) {
3430 			ver_msg->ver_major = vdc_version[i].major;
3431 			ver_msg->ver_minor = vdc_version[i].minor;
3432 			PR0("Suggesting major/minor (0x%x/0x%x)\n",
3433 				ver_msg->ver_major, ver_msg->ver_minor);
3434 
3435 			return (B_FALSE);
3436 		}
3437 
3438 		/*
3439 		 * Otherwise, the message's major version is less than the
3440 		 * current major version, so continue the loop to the next
3441 		 * (lower) supported version
3442 		 */
3443 	}
3444 
3445 	/*
3446 	 * No common version was found; "ground" the version pair in the
3447 	 * message to terminate negotiation
3448 	 */
3449 	ver_msg->ver_major = 0;
3450 	ver_msg->ver_minor = 0;
3451 
3452 	return (B_FALSE);
3453 }
3454 /* -------------------------------------------------------------------------- */
3455 
3456 /*
3457  * DKIO(7) support
3458  */
3459 
3460 typedef struct vdc_dk_arg {
3461 	struct dk_callback	dkc;
3462 	int			mode;
3463 	dev_t			dev;
3464 	vdc_t			*vdc;
3465 } vdc_dk_arg_t;
3466 
3467 /*
3468  * Function:
3469  * 	vdc_dkio_flush_cb()
3470  *
3471  * Description:
3472  *	This routine is a callback for DKIOCFLUSHWRITECACHE which can be called
3473  *	by kernel code.
3474  *
3475  * Arguments:
3476  *	arg	- a pointer to a vdc_dk_arg_t structure.
3477  */
3478 void
3479 vdc_dkio_flush_cb(void *arg)
3480 {
3481 	struct vdc_dk_arg	*dk_arg = (struct vdc_dk_arg *)arg;
3482 	struct dk_callback	*dkc = NULL;
3483 	vdc_t			*vdc = NULL;
3484 	int			rv;
3485 
3486 	if (dk_arg == NULL) {
3487 		vdc_msg("%s[?] DKIOCFLUSHWRITECACHE arg is NULL\n", __func__);
3488 		return;
3489 	}
3490 	dkc = &dk_arg->dkc;
3491 	vdc = dk_arg->vdc;
3492 	ASSERT(vdc != NULL);
3493 
3494 	rv = vdc_populate_descriptor(vdc, NULL, 0, VD_OP_FLUSH,
3495 		dk_arg->mode, SDPART(getminor(dk_arg->dev)));
3496 	if (rv != 0) {
3497 		PR0("%s[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n",
3498 			__func__, vdc->instance, rv,
3499 			ddi_model_convert_from(dk_arg->mode & FMODELS));
3500 	}
3501 
3502 	/*
3503 	 * Trigger the call back to notify the caller the the ioctl call has
3504 	 * been completed.
3505 	 */
3506 	if ((dk_arg->mode & FKIOCTL) &&
3507 	    (dkc != NULL) &&
3508 	    (dkc->dkc_callback != NULL)) {
3509 		ASSERT(dkc->dkc_cookie != NULL);
3510 		(*dkc->dkc_callback)(dkc->dkc_cookie, rv);
3511 	}
3512 
3513 	/* Indicate that one less DKIO write flush is outstanding */
3514 	mutex_enter(&vdc->lock);
3515 	vdc->dkio_flush_pending--;
3516 	ASSERT(vdc->dkio_flush_pending >= 0);
3517 	mutex_exit(&vdc->lock);
3518 
3519 	/* free the mem that was allocated when the callback was dispatched */
3520 	kmem_free(arg, sizeof (vdc_dk_arg_t));
3521 }
3522 
3523 /*
3524  * This structure is used in the DKIO(7I) array below.
3525  */
3526 typedef struct vdc_dk_ioctl {
3527 	uint8_t		op;		/* VD_OP_XXX value */
3528 	int		cmd;		/* Solaris ioctl operation number */
3529 	size_t		nbytes;		/* size of structure to be copied */
3530 
3531 	/* function to convert between vDisk and Solaris structure formats */
3532 	int	(*convert)(void *vd_buf, void *ioctl_arg, int mode, int dir);
3533 } vdc_dk_ioctl_t;
3534 
3535 /*
3536  * Subset of DKIO(7I) operations currently supported
3537  */
3538 static vdc_dk_ioctl_t	dk_ioctl[] = {
3539 	{VD_OP_FLUSH,		DKIOCFLUSHWRITECACHE,	sizeof (int),
3540 		vdc_null_copy_func},
3541 	{VD_OP_GET_WCE,		DKIOCGETWCE,		sizeof (int),
3542 		vdc_null_copy_func},
3543 	{VD_OP_SET_WCE,		DKIOCSETWCE,		sizeof (int),
3544 		vdc_null_copy_func},
3545 	{VD_OP_GET_VTOC,	DKIOCGVTOC,		sizeof (vd_vtoc_t),
3546 		vdc_get_vtoc_convert},
3547 	{VD_OP_SET_VTOC,	DKIOCSVTOC,		sizeof (vd_vtoc_t),
3548 		vdc_set_vtoc_convert},
3549 	{VD_OP_SET_DISKGEOM,	DKIOCSGEOM,		sizeof (vd_geom_t),
3550 		vdc_get_geom_convert},
3551 	{VD_OP_GET_DISKGEOM,	DKIOCGGEOM,		sizeof (vd_geom_t),
3552 		vdc_get_geom_convert},
3553 	{VD_OP_GET_DISKGEOM,	DKIOCG_PHYGEOM,		sizeof (vd_geom_t),
3554 		vdc_get_geom_convert},
3555 	{VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM,		sizeof (vd_geom_t),
3556 		vdc_get_geom_convert},
3557 	{VD_OP_SET_DISKGEOM, DKIOCSGEOM,		sizeof (vd_geom_t),
3558 		vdc_set_geom_convert},
3559 
3560 	/*
3561 	 * These particular ioctls are not sent to the server - vdc fakes up
3562 	 * the necessary info.
3563 	 */
3564 	{0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func},
3565 	{0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func},
3566 	{0, USCSICMD,	sizeof (struct uscsi_cmd), vdc_null_copy_func},
3567 	{0, DKIOCREMOVABLE, 0, vdc_null_copy_func},
3568 	{0, CDROMREADOFFSET, 0, vdc_null_copy_func}
3569 };
3570 
3571 /*
3572  * Function:
3573  *	vd_process_ioctl()
3574  *
3575  * Description:
3576  *	This routine processes disk specific ioctl calls
3577  *
3578  * Arguments:
3579  *	dev	- the device number
3580  *	cmd	- the operation [dkio(7I)] to be processed
3581  *	arg	- pointer to user provided structure
3582  *		  (contains data to be set or reference parameter for get)
3583  *	mode	- bit flag, indicating open settings, 32/64 bit type, etc
3584  *
3585  * Return Code:
3586  *	0
3587  *	EFAULT
3588  *	ENXIO
3589  *	EIO
3590  *	ENOTSUP
3591  */
3592 static int
3593 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode)
3594 {
3595 	int		instance = SDUNIT(getminor(dev));
3596 	vdc_t		*vdc = NULL;
3597 	int		rv = -1;
3598 	int		idx = 0;		/* index into dk_ioctl[] */
3599 	size_t		len = 0;		/* #bytes to send to vds */
3600 	size_t		alloc_len = 0;		/* #bytes to allocate mem for */
3601 	caddr_t		mem_p = NULL;
3602 	size_t		nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0]));
3603 
3604 	PR0("%s: Processing ioctl(%x) for dev %x : model %x\n",
3605 		__func__, cmd, dev, ddi_model_convert_from(mode & FMODELS));
3606 
3607 	vdc = ddi_get_soft_state(vdc_state, instance);
3608 	if (vdc == NULL) {
3609 		cmn_err(CE_NOTE, "![%d] Could not get soft state structure",
3610 		    instance);
3611 		return (ENXIO);
3612 	}
3613 
3614 	/*
3615 	 * Check to see if we can communicate with the vDisk server
3616 	 */
3617 	if (!vdc_is_able_to_tx_data(vdc, O_NONBLOCK)) {
3618 		PR0("%s[%d] Not ready to transmit data\n", __func__, instance);
3619 		return (ENOLINK);
3620 	}
3621 
3622 	/*
3623 	 * Validate the ioctl operation to be performed.
3624 	 *
3625 	 * If we have looped through the array without finding a match then we
3626 	 * don't support this ioctl.
3627 	 */
3628 	for (idx = 0; idx < nioctls; idx++) {
3629 		if (cmd == dk_ioctl[idx].cmd)
3630 			break;
3631 	}
3632 
3633 	if (idx >= nioctls) {
3634 		PR0("%s[%d] Unsupported ioctl(%x)\n",
3635 				__func__, vdc->instance, cmd);
3636 		return (ENOTSUP);
3637 	}
3638 
3639 	len = dk_ioctl[idx].nbytes;
3640 
3641 	/*
3642 	 * Deal with the ioctls which the server does not provide. vdc can
3643 	 * fake these up and return immediately
3644 	 */
3645 	switch (cmd) {
3646 	case CDROMREADOFFSET:
3647 	case DKIOCREMOVABLE:
3648 	case USCSICMD:
3649 		return (ENOTTY);
3650 
3651 	case DKIOCINFO:
3652 		{
3653 			struct dk_cinfo	cinfo;
3654 			if (vdc->cinfo == NULL)
3655 				return (ENXIO);
3656 
3657 			bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo));
3658 			cinfo.dki_partition = SDPART(getminor(dev));
3659 
3660 			rv = ddi_copyout(&cinfo, (void *)arg,
3661 					sizeof (struct dk_cinfo), mode);
3662 			if (rv != 0)
3663 				return (EFAULT);
3664 
3665 			return (0);
3666 		}
3667 
3668 	case DKIOCGMEDIAINFO:
3669 		{
3670 			if (vdc->minfo == NULL)
3671 				return (ENXIO);
3672 
3673 			rv = ddi_copyout(vdc->minfo, (void *)arg,
3674 					sizeof (struct dk_minfo), mode);
3675 			if (rv != 0)
3676 				return (EFAULT);
3677 
3678 			return (0);
3679 		}
3680 
3681 	case DKIOCFLUSHWRITECACHE:
3682 		{
3683 			struct dk_callback *dkc = (struct dk_callback *)arg;
3684 			vdc_dk_arg_t	*dkarg = NULL;
3685 
3686 			PR1("[%d] Flush W$: mode %x\n", instance, mode);
3687 
3688 			/*
3689 			 * If the backing device is not a 'real' disk then the
3690 			 * W$ operation request to the vDisk server will fail
3691 			 * so we might as well save the cycles and return now.
3692 			 */
3693 			if (vdc->vdisk_type != VD_DISK_TYPE_DISK)
3694 				return (ENOTTY);
3695 
3696 			/*
3697 			 * If arg is NULL, then there is no callback function
3698 			 * registered and the call operates synchronously; we
3699 			 * break and continue with the rest of the function and
3700 			 * wait for vds to return (i.e. after the request to
3701 			 * vds returns successfully, all writes completed prior
3702 			 * to the ioctl will have been flushed from the disk
3703 			 * write cache to persistent media.
3704 			 *
3705 			 * If a callback function is registered, we dispatch
3706 			 * the request on a task queue and return immediately.
3707 			 * The callback will deal with informing the calling
3708 			 * thread that the flush request is completed.
3709 			 */
3710 			if (dkc == NULL)
3711 				break;
3712 
3713 			dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP);
3714 
3715 			dkarg->mode = mode;
3716 			dkarg->dev = dev;
3717 			bcopy(dkc, &dkarg->dkc, sizeof (*dkc));
3718 
3719 			mutex_enter(&vdc->lock);
3720 			vdc->dkio_flush_pending++;
3721 			dkarg->vdc = vdc;
3722 			mutex_exit(&vdc->lock);
3723 
3724 			/* put the request on a task queue */
3725 			rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb,
3726 				(void *)dkarg, DDI_SLEEP);
3727 
3728 			return (rv == NULL ? ENOMEM : 0);
3729 		}
3730 	}
3731 
3732 	/* catch programming error in vdc - should be a VD_OP_XXX ioctl */
3733 	ASSERT(dk_ioctl[idx].op != 0);
3734 
3735 	/* LDC requires that the memory being mapped is 8-byte aligned */
3736 	alloc_len = P2ROUNDUP(len, sizeof (uint64_t));
3737 	PR1("%s[%d]: struct size %d alloc %d\n",
3738 			__func__, instance, len, alloc_len);
3739 
3740 	ASSERT(alloc_len != 0);	/* sanity check */
3741 	mem_p = kmem_zalloc(alloc_len, KM_SLEEP);
3742 
3743 	/*
3744 	 * Call the conversion function for this ioctl whhich if necessary
3745 	 * converts from the Solaris format to the format ARC'ed
3746 	 * as part of the vDisk protocol (FWARC 2006/195)
3747 	 */
3748 	ASSERT(dk_ioctl[idx].convert != NULL);
3749 	rv = (dk_ioctl[idx].convert)(arg, mem_p, mode, VD_COPYIN);
3750 	if (rv != 0) {
3751 		PR0("%s[%d]: convert returned %d for ioctl 0x%x\n",
3752 				__func__, instance, rv, cmd);
3753 		if (mem_p != NULL)
3754 			kmem_free(mem_p, alloc_len);
3755 		return (rv);
3756 	}
3757 
3758 	/*
3759 	 * send request to vds to service the ioctl.
3760 	 */
3761 	rv = vdc_populate_descriptor(vdc, mem_p, alloc_len, dk_ioctl[idx].op,
3762 			mode, SDPART((getminor(dev))));
3763 	if (rv != 0) {
3764 		/*
3765 		 * This is not necessarily an error. The ioctl could
3766 		 * be returning a value such as ENOTTY to indicate
3767 		 * that the ioctl is not applicable.
3768 		 */
3769 		PR0("%s[%d]: vds returned %d for ioctl 0x%x\n",
3770 			__func__, instance, rv, cmd);
3771 		if (mem_p != NULL)
3772 			kmem_free(mem_p, alloc_len);
3773 		return (rv);
3774 	}
3775 
3776 	/*
3777 	 * If the VTOC has been changed, then vdc needs to update the copy
3778 	 * it saved in the soft state structure and try and update the device
3779 	 * node properties. Failing to set the properties should not cause
3780 	 * an error to be return the caller though.
3781 	 */
3782 	if (cmd == DKIOCSVTOC) {
3783 		bcopy(mem_p, vdc->vtoc, sizeof (struct vtoc));
3784 		if (vdc_create_device_nodes_props(vdc)) {
3785 			cmn_err(CE_NOTE, "![%d] Failed to update device nodes"
3786 				" properties", instance);
3787 		}
3788 	}
3789 
3790 	/*
3791 	 * Call the conversion function (if it exists) for this ioctl
3792 	 * which converts from the format ARC'ed as part of the vDisk
3793 	 * protocol (FWARC 2006/195) back to a format understood by
3794 	 * the rest of Solaris.
3795 	 */
3796 	rv = (dk_ioctl[idx].convert)(mem_p, arg, mode, VD_COPYOUT);
3797 	if (rv != 0) {
3798 		PR0("%s[%d]: convert returned %d for ioctl 0x%x\n",
3799 				__func__, instance, rv, cmd);
3800 		if (mem_p != NULL)
3801 			kmem_free(mem_p, alloc_len);
3802 		return (rv);
3803 	}
3804 
3805 	if (mem_p != NULL)
3806 		kmem_free(mem_p, alloc_len);
3807 
3808 	return (rv);
3809 }
3810 
3811 /*
3812  * Function:
3813  *
3814  * Description:
3815  *	This is an empty conversion function used by ioctl calls which
3816  *	do not need to convert the data being passed in/out to userland
3817  */
3818 static int
3819 vdc_null_copy_func(void *from, void *to, int mode, int dir)
3820 {
3821 	_NOTE(ARGUNUSED(from))
3822 	_NOTE(ARGUNUSED(to))
3823 	_NOTE(ARGUNUSED(mode))
3824 	_NOTE(ARGUNUSED(dir))
3825 
3826 	return (0);
3827 }
3828 
3829 /*
3830  * Function:
3831  *	vdc_get_vtoc_convert()
3832  *
3833  * Description:
3834  *	This routine fakes up the disk info needed for some DKIO ioctls.
3835  *
3836  * Arguments:
3837  *	from	- the buffer containing the data to be copied from
3838  *	to	- the buffer to be copied to
3839  *	mode	- flags passed to ioctl() call
3840  *	dir	- the "direction" of the copy - VD_COPYIN or VD_COPYOUT
3841  *
3842  * Return Code:
3843  *	0	- Success
3844  *	ENXIO	- incorrect buffer passed in.
3845  *	EFAULT	- ddi_copyxxx routine encountered an error.
3846  */
3847 static int
3848 vdc_get_vtoc_convert(void *from, void *to, int mode, int dir)
3849 {
3850 	void		*tmp_mem = NULL;
3851 	void		*tmp_memp;
3852 	struct vtoc	vt;
3853 	struct vtoc32	vt32;
3854 	int		copy_len = 0;
3855 	int		rv = 0;
3856 
3857 	if (dir != VD_COPYOUT)
3858 		return (0);	/* nothing to do */
3859 
3860 	if ((from == NULL) || (to == NULL))
3861 		return (ENXIO);
3862 
3863 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32)
3864 		copy_len = sizeof (struct vtoc32);
3865 	else
3866 		copy_len = sizeof (struct vtoc);
3867 
3868 	tmp_mem = kmem_alloc(copy_len, KM_SLEEP);
3869 
3870 	VD_VTOC2VTOC((vd_vtoc_t *)from, &vt);
3871 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
3872 		vtoctovtoc32(vt, vt32);
3873 		tmp_memp = &vt32;
3874 	} else {
3875 		tmp_memp = &vt;
3876 	}
3877 	rv = ddi_copyout(tmp_memp, to, copy_len, mode);
3878 	if (rv != 0)
3879 		rv = EFAULT;
3880 
3881 	kmem_free(tmp_mem, copy_len);
3882 	return (rv);
3883 }
3884 
3885 /*
3886  * Function:
3887  *	vdc_set_vtoc_convert()
3888  *
3889  * Description:
3890  *
3891  * Arguments:
3892  *	from	- Buffer with data
3893  *	to	- Buffer where data is to be copied to
3894  *	mode	- flags passed to ioctl
3895  *	dir	- direction of copy (in or out)
3896  *
3897  * Return Code:
3898  *	0	- Success
3899  *	ENXIO	- Invalid buffer passed in
3900  *	EFAULT	- ddi_copyin of data failed
3901  */
3902 static int
3903 vdc_set_vtoc_convert(void *from, void *to, int mode, int dir)
3904 {
3905 	void		*tmp_mem = NULL;
3906 	struct vtoc	vt;
3907 	struct vtoc	*vtp = &vt;
3908 	vd_vtoc_t	vtvd;
3909 	int		copy_len = 0;
3910 	int		rv = 0;
3911 
3912 	if (dir != VD_COPYIN)
3913 		return (0);	/* nothing to do */
3914 
3915 	if ((from == NULL) || (to == NULL))
3916 		return (ENXIO);
3917 
3918 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32)
3919 		copy_len = sizeof (struct vtoc32);
3920 	else
3921 		copy_len = sizeof (struct vtoc);
3922 
3923 	tmp_mem = kmem_alloc(copy_len, KM_SLEEP);
3924 
3925 	rv = ddi_copyin(from, tmp_mem, copy_len, mode);
3926 	if (rv != 0) {
3927 		kmem_free(tmp_mem, copy_len);
3928 		return (EFAULT);
3929 	}
3930 
3931 	if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) {
3932 		vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt);
3933 	} else {
3934 		vtp = tmp_mem;
3935 	}
3936 
3937 	VTOC2VD_VTOC(vtp, &vtvd);
3938 	bcopy(&vtvd, to, sizeof (vd_vtoc_t));
3939 	kmem_free(tmp_mem, copy_len);
3940 
3941 	return (0);
3942 }
3943 
3944 /*
3945  * Function:
3946  *	vdc_get_geom_convert()
3947  *
3948  * Description:
3949  *
3950  * Arguments:
3951  *	from	- Buffer with data
3952  *	to	- Buffer where data is to be copied to
3953  *	mode	- flags passed to ioctl
3954  *	dir	- direction of copy (in or out)
3955  *
3956  * Return Code:
3957  *	0	- Success
3958  *	ENXIO	- Invalid buffer passed in
3959  *	EFAULT	- ddi_copyin of data failed
3960  */
3961 static int
3962 vdc_get_geom_convert(void *from, void *to, int mode, int dir)
3963 {
3964 	struct dk_geom	geom;
3965 	int	copy_len = sizeof (struct dk_geom);
3966 	int	rv = 0;
3967 
3968 	if (dir != VD_COPYOUT)
3969 		return (0);	/* nothing to do */
3970 
3971 	if ((from == NULL) || (to == NULL))
3972 		return (ENXIO);
3973 
3974 	VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom);
3975 	rv = ddi_copyout(&geom, to, copy_len, mode);
3976 	if (rv != 0)
3977 		rv = EFAULT;
3978 
3979 	return (rv);
3980 }
3981 
3982 /*
3983  * Function:
3984  *	vdc_set_geom_convert()
3985  *
3986  * Description:
3987  *	This routine performs the necessary convertions from the DKIOCSVTOC
3988  *	Solaris structure to the format defined in FWARC 2006/195
3989  *
3990  * Arguments:
3991  *	from	- Buffer with data
3992  *	to	- Buffer where data is to be copied to
3993  *	mode	- flags passed to ioctl
3994  *	dir	- direction of copy (in or out)
3995  *
3996  * Return Code:
3997  *	0	- Success
3998  *	ENXIO	- Invalid buffer passed in
3999  *	EFAULT	- ddi_copyin of data failed
4000  */
4001 static int
4002 vdc_set_geom_convert(void *from, void *to, int mode, int dir)
4003 {
4004 	vd_geom_t	vdgeom;
4005 	void		*tmp_mem = NULL;
4006 	int		copy_len = sizeof (struct dk_geom);
4007 	int		rv = 0;
4008 
4009 	if (dir != VD_COPYIN)
4010 		return (0);	/* nothing to do */
4011 
4012 	if ((from == NULL) || (to == NULL))
4013 		return (ENXIO);
4014 
4015 	tmp_mem = kmem_alloc(copy_len, KM_SLEEP);
4016 
4017 	rv = ddi_copyin(from, tmp_mem, copy_len, mode);
4018 	if (rv != 0) {
4019 		kmem_free(tmp_mem, copy_len);
4020 		return (EFAULT);
4021 	}
4022 	DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom);
4023 	bcopy(&vdgeom, to, sizeof (vdgeom));
4024 	kmem_free(tmp_mem, copy_len);
4025 
4026 	return (0);
4027 }
4028 
4029 /*
4030  * Function:
4031  *	vdc_create_fake_geometry()
4032  *
4033  * Description:
4034  *	This routine fakes up the disk info needed for some DKIO ioctls.
4035  *		- DKIOCINFO
4036  *		- DKIOCGMEDIAINFO
4037  *
4038  *	[ just like lofi(7D) and ramdisk(7D) ]
4039  *
4040  * Arguments:
4041  *	vdc	- soft state pointer for this instance of the device driver.
4042  *
4043  * Return Code:
4044  *	0	- Success
4045  */
4046 static int
4047 vdc_create_fake_geometry(vdc_t *vdc)
4048 {
4049 	int	rv = 0;
4050 
4051 	ASSERT(vdc != NULL);
4052 
4053 	/*
4054 	 * DKIOCINFO support
4055 	 */
4056 	vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
4057 
4058 	(void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME);
4059 	(void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME);
4060 	/* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */
4061 	vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz;
4062 	vdc->cinfo->dki_ctype = DKC_SCSI_CCS;
4063 	vdc->cinfo->dki_flags = DKI_FMTVOL;
4064 	vdc->cinfo->dki_cnum = 0;
4065 	vdc->cinfo->dki_addr = 0;
4066 	vdc->cinfo->dki_space = 0;
4067 	vdc->cinfo->dki_prio = 0;
4068 	vdc->cinfo->dki_vec = 0;
4069 	vdc->cinfo->dki_unit = vdc->instance;
4070 	vdc->cinfo->dki_slave = 0;
4071 	/*
4072 	 * The partition number will be created on the fly depending on the
4073 	 * actual slice (i.e. minor node) that is used to request the data.
4074 	 */
4075 	vdc->cinfo->dki_partition = 0;
4076 
4077 	/*
4078 	 * DKIOCGMEDIAINFO support
4079 	 */
4080 	if (vdc->minfo == NULL)
4081 		vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP);
4082 	vdc->minfo->dki_media_type = DK_FIXED_DISK;
4083 	vdc->minfo->dki_capacity = 1;
4084 	vdc->minfo->dki_lbsize = DEV_BSIZE;
4085 
4086 	return (rv);
4087 }
4088 
4089 /*
4090  * Function:
4091  *	vdc_setup_disk_layout()
4092  *
4093  * Description:
4094  *	This routine discovers all the necessary details about the "disk"
4095  *	by requesting the data that is available from the vDisk server and by
4096  *	faking up the rest of the data.
4097  *
4098  * Arguments:
4099  *	vdc	- soft state pointer for this instance of the device driver.
4100  *
4101  * Return Code:
4102  *	0	- Success
4103  */
4104 static int
4105 vdc_setup_disk_layout(vdc_t *vdc)
4106 {
4107 	dev_t	dev;
4108 	int	slice = 0;
4109 	int	rv;
4110 
4111 	ASSERT(vdc != NULL);
4112 
4113 	rv = vdc_create_fake_geometry(vdc);
4114 	if (rv != 0) {
4115 		cmn_err(CE_NOTE, "[%d] Failed to create disk geometry (err%d)",
4116 				vdc->instance, rv);
4117 	}
4118 
4119 	if (vdc->vtoc == NULL)
4120 		vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP);
4121 
4122 	dev = makedevice(ddi_driver_major(vdc->dip),
4123 				VD_MAKE_DEV(vdc->instance, 0));
4124 	rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL);
4125 	if (rv) {
4126 		cmn_err(CE_NOTE, "[%d] Failed to get VTOC (err=%d)",
4127 				vdc->instance, rv);
4128 		return (rv);
4129 	}
4130 
4131 	/*
4132 	 * Read disk label from start of disk
4133 	 */
4134 	vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP);
4135 
4136 	/*
4137 	 * find the slice that represents the entire "disk" and use that to
4138 	 * read the disk label. The convention in Solaris is that slice 2
4139 	 * represents the whole disk so we check that it is otherwise we
4140 	 * default to slice 0
4141 	 */
4142 	if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) &&
4143 	    (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) {
4144 		slice = 2;
4145 	} else {
4146 		slice = 0;
4147 	}
4148 	rv = vdc_populate_descriptor(vdc, (caddr_t)vdc->label, DK_LABEL_SIZE,
4149 			VD_OP_BREAD, 0, slice);
4150 
4151 	return (rv);
4152 }
4153