xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision 8668df41d90e075636bc3817b28ad77cbd470959)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/mmu.h>
56 #include <sys/pte.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/hat_sfmmu.h>
60 #include <sys/vm_machparam.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/seg_kpm.h>
63 #include <sys/note.h>
64 #include <sys/ivintr.h>
65 #include <sys/hypervisor_api.h>
66 #include <sys/ldc.h>
67 #include <sys/ldc_impl.h>
68 #include <sys/cnex.h>
69 #include <sys/hsvc.h>
70 
71 /* Core internal functions */
72 static int i_ldc_h2v_error(int h_error);
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
76 static void i_ldc_reset_state(ldc_chan_t *ldcp);
77 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
78 
79 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
80 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
81 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
82 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
83     uint8_t ctrlmsg);
84 
85 /* Interrupt handling functions */
86 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
87 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
88 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
89 
90 /* Read method functions */
91 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
92 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
93 	size_t *sizep);
94 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
95 	size_t *sizep);
96 
97 /* Write method functions */
98 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
99 	size_t *sizep);
100 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
101 	size_t *sizep);
102 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
103 	size_t *sizep);
104 
105 /* Pkt processing internal functions */
106 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
107 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
108 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
109 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
110 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
111 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
112 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
113 
114 /* Memory synchronization internal functions */
115 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
116     uint8_t direction, uint64_t offset, size_t size);
117 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
118     uint8_t direction, uint64_t start, uint64_t end);
119 
120 /* LDC Version */
121 static ldc_ver_t ldc_versions[] = { {1, 0} };
122 
123 /* number of supported versions */
124 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
125 
126 /* Module State Pointer */
127 static ldc_soft_state_t *ldcssp;
128 
129 static struct modldrv md = {
130 	&mod_miscops,			/* This is a misc module */
131 	"sun4v LDC module v%I%",	/* Name of the module */
132 };
133 
134 static struct modlinkage ml = {
135 	MODREV_1,
136 	&md,
137 	NULL
138 };
139 
140 static uint64_t ldc_sup_minor;		/* Supported minor number */
141 static hsvc_info_t ldc_hsvc = {
142 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
143 };
144 
145 static uint64_t intr_sup_minor;		/* Supported minor number */
146 static hsvc_info_t intr_hsvc = {
147 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
148 };
149 
150 /*
151  * LDC framework supports mapping remote domain's memory
152  * either directly or via shadow memory pages. Default
153  * support is currently implemented via shadow copy.
154  * Direct map can be enabled by setting 'ldc_shmem_enabled'
155  */
156 int ldc_shmem_enabled = 0;
157 
158 /*
159  * The no. of MTU size messages that can be stored in
160  * the LDC Tx queue. The number of Tx queue entries is
161  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
162  */
163 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
164 
165 /*
166  * The minimum queue length. This is the size of the smallest
167  * LDC queue. If the computed value is less than this default,
168  * the queue length is rounded up to 'ldc_queue_entries'.
169  */
170 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
171 
172 /*
173  * Pages exported for remote access over each channel is
174  * maintained in a table registered with the Hypervisor.
175  * The default number of entries in the table is set to
176  * 'ldc_mtbl_entries'.
177  */
178 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
179 
180 /*
181  * LDC retry count and delay - when the HV returns EWOULDBLOCK
182  * the operation is retried 'ldc_max_retries' times with a
183  * wait of 'ldc_delay' usecs between each retry.
184  */
185 int ldc_max_retries = LDC_MAX_RETRIES;
186 clock_t ldc_delay = LDC_DELAY;
187 
188 /*
189  * delay between each retry of channel unregistration in
190  * ldc_close(), to wait for pending interrupts to complete.
191  */
192 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
193 
194 #ifdef DEBUG
195 
196 /*
197  * Print debug messages
198  *
199  * set ldcdbg to 0x7 for enabling all msgs
200  * 0x4 - Warnings
201  * 0x2 - All debug messages
202  * 0x1 - Minimal debug messages
203  *
204  * set ldcdbgchan to the channel number you want to debug
205  * setting it to -1 prints debug messages for all channels
206  * NOTE: ldcdbgchan has no effect on error messages
207  */
208 
209 #define	DBG_ALL_LDCS -1
210 
211 int ldcdbg = 0x0;
212 int64_t ldcdbgchan = DBG_ALL_LDCS;
213 boolean_t ldc_inject_reset_flag = B_FALSE;
214 
215 static void
216 ldcdebug(int64_t id, const char *fmt, ...)
217 {
218 	char buf[512];
219 	va_list ap;
220 
221 	/*
222 	 * Do not return if,
223 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
224 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
225 	 * debug channel = caller specified channel
226 	 */
227 	if ((id != DBG_ALL_LDCS) &&
228 	    (ldcdbgchan != DBG_ALL_LDCS) &&
229 	    (ldcdbgchan != id)) {
230 		return;
231 	}
232 
233 	va_start(ap, fmt);
234 	(void) vsprintf(buf, fmt, ap);
235 	va_end(ap);
236 
237 	cmn_err(CE_CONT, "?%s", buf);
238 }
239 
240 static boolean_t
241 ldc_inject_reset(ldc_chan_t *ldcp)
242 {
243 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
244 		return (B_FALSE);
245 
246 	if (!ldc_inject_reset_flag)
247 		return (B_FALSE);
248 
249 	/* clear the injection state */
250 	ldc_inject_reset_flag = 0;
251 
252 	return (B_TRUE);
253 }
254 
255 #define	D1		\
256 if (ldcdbg & 0x01)	\
257 	ldcdebug
258 
259 #define	D2		\
260 if (ldcdbg & 0x02)	\
261 	ldcdebug
262 
263 #define	DWARN		\
264 if (ldcdbg & 0x04)	\
265 	ldcdebug
266 
267 #define	DUMP_PAYLOAD(id, addr)						\
268 {									\
269 	char buf[65*3];							\
270 	int i;								\
271 	uint8_t *src = (uint8_t *)addr;					\
272 	for (i = 0; i < 64; i++, src++)					\
273 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
274 	(void) sprintf(&buf[i * 3], "|\n");				\
275 	D2((id), "payload: %s", buf);					\
276 }
277 
278 #define	DUMP_LDC_PKT(c, s, addr)					\
279 {									\
280 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
281 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
282 	if (msg->type == LDC_DATA) {                                    \
283 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
284 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
285 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
286 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
287 	    (msg->env & LDC_LEN_MASK));					\
288 	} else { 							\
289 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
290 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
291 	} 								\
292 }
293 
294 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_reset(_ldcp)
295 
296 #else
297 
298 #define	DBG_ALL_LDCS -1
299 
300 #define	D1
301 #define	D2
302 #define	DWARN
303 
304 #define	DUMP_PAYLOAD(id, addr)
305 #define	DUMP_LDC_PKT(c, s, addr)
306 
307 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
308 
309 #endif
310 
311 #define	ZERO_PKT(p)			\
312 	bzero((p), sizeof (ldc_msg_t));
313 
314 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
315 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
316 
317 
318 int
319 _init(void)
320 {
321 	int status;
322 
323 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
324 	if (status != 0) {
325 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
326 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
327 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
328 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
329 		return (-1);
330 	}
331 
332 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
333 	if (status != 0) {
334 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
335 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
336 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
337 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
338 		(void) hsvc_unregister(&ldc_hsvc);
339 		return (-1);
340 	}
341 
342 	/* allocate soft state structure */
343 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
344 
345 	/* Link the module into the system */
346 	status = mod_install(&ml);
347 	if (status != 0) {
348 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
349 		return (status);
350 	}
351 
352 	/* Initialize the LDC state structure */
353 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
354 
355 	mutex_enter(&ldcssp->lock);
356 
357 	/* Create a cache for memory handles */
358 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
359 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
360 	if (ldcssp->memhdl_cache == NULL) {
361 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
362 		mutex_exit(&ldcssp->lock);
363 		return (-1);
364 	}
365 
366 	/* Create cache for memory segment structures */
367 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
368 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
369 	if (ldcssp->memseg_cache == NULL) {
370 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
371 		mutex_exit(&ldcssp->lock);
372 		return (-1);
373 	}
374 
375 
376 	ldcssp->channel_count = 0;
377 	ldcssp->channels_open = 0;
378 	ldcssp->chan_list = NULL;
379 	ldcssp->dring_list = NULL;
380 
381 	mutex_exit(&ldcssp->lock);
382 
383 	return (0);
384 }
385 
386 int
387 _info(struct modinfo *modinfop)
388 {
389 	/* Report status of the dynamically loadable driver module */
390 	return (mod_info(&ml, modinfop));
391 }
392 
393 int
394 _fini(void)
395 {
396 	int 		rv, status;
397 	ldc_chan_t 	*ldcp;
398 	ldc_dring_t 	*dringp;
399 	ldc_mem_info_t 	minfo;
400 
401 	/* Unlink the driver module from the system */
402 	status = mod_remove(&ml);
403 	if (status) {
404 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
405 		return (EIO);
406 	}
407 
408 	/* close and finalize channels */
409 	ldcp = ldcssp->chan_list;
410 	while (ldcp != NULL) {
411 		(void) ldc_close((ldc_handle_t)ldcp);
412 		(void) ldc_fini((ldc_handle_t)ldcp);
413 
414 		ldcp = ldcp->next;
415 	}
416 
417 	/* Free descriptor rings */
418 	dringp = ldcssp->dring_list;
419 	while (dringp != NULL) {
420 		dringp = dringp->next;
421 
422 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
423 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
424 			if (minfo.status == LDC_BOUND) {
425 				(void) ldc_mem_dring_unbind(
426 						(ldc_dring_handle_t)dringp);
427 			}
428 			if (minfo.status == LDC_MAPPED) {
429 				(void) ldc_mem_dring_unmap(
430 						(ldc_dring_handle_t)dringp);
431 			}
432 		}
433 
434 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
435 	}
436 	ldcssp->dring_list = NULL;
437 
438 	/* Destroy kmem caches */
439 	kmem_cache_destroy(ldcssp->memhdl_cache);
440 	kmem_cache_destroy(ldcssp->memseg_cache);
441 
442 	/*
443 	 * We have successfully "removed" the driver.
444 	 * Destroying soft states
445 	 */
446 	mutex_destroy(&ldcssp->lock);
447 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
448 
449 	(void) hsvc_unregister(&ldc_hsvc);
450 	(void) hsvc_unregister(&intr_hsvc);
451 
452 	return (status);
453 }
454 
455 /* -------------------------------------------------------------------------- */
456 
457 /*
458  * LDC Link Layer Internal Functions
459  */
460 
461 /*
462  * Translate HV Errors to sun4v error codes
463  */
464 static int
465 i_ldc_h2v_error(int h_error)
466 {
467 	switch (h_error) {
468 
469 	case	H_EOK:
470 		return (0);
471 
472 	case	H_ENORADDR:
473 		return (EFAULT);
474 
475 	case	H_EBADPGSZ:
476 	case	H_EINVAL:
477 		return (EINVAL);
478 
479 	case	H_EWOULDBLOCK:
480 		return (EWOULDBLOCK);
481 
482 	case	H_ENOACCESS:
483 	case	H_ENOMAP:
484 		return (EACCES);
485 
486 	case	H_EIO:
487 	case	H_ECPUERROR:
488 		return (EIO);
489 
490 	case	H_ENOTSUPPORTED:
491 		return (ENOTSUP);
492 
493 	case 	H_ETOOMANY:
494 		return (ENOSPC);
495 
496 	case	H_ECHANNEL:
497 		return (ECHRNG);
498 	default:
499 		break;
500 	}
501 
502 	return (EIO);
503 }
504 
505 /*
506  * Reconfigure the transmit queue
507  */
508 static int
509 i_ldc_txq_reconf(ldc_chan_t *ldcp)
510 {
511 	int rv;
512 
513 	ASSERT(MUTEX_HELD(&ldcp->lock));
514 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
515 
516 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
517 	if (rv) {
518 		cmn_err(CE_WARN,
519 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
520 		return (EIO);
521 	}
522 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
523 	    &(ldcp->tx_tail), &(ldcp->link_state));
524 	if (rv) {
525 		cmn_err(CE_WARN,
526 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
527 		return (EIO);
528 	}
529 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
530 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
531 	    ldcp->link_state);
532 
533 	return (0);
534 }
535 
536 /*
537  * Reconfigure the receive queue
538  */
539 static int
540 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
541 {
542 	int rv;
543 	uint64_t rx_head, rx_tail;
544 
545 	ASSERT(MUTEX_HELD(&ldcp->lock));
546 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
547 	    &(ldcp->link_state));
548 	if (rv) {
549 		cmn_err(CE_WARN,
550 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
551 		    ldcp->id);
552 		return (EIO);
553 	}
554 
555 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
556 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
557 			ldcp->rx_q_entries);
558 		if (rv) {
559 			cmn_err(CE_WARN,
560 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
561 			    ldcp->id);
562 			return (EIO);
563 		}
564 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
565 		    ldcp->id);
566 	}
567 
568 	return (0);
569 }
570 
571 
572 /*
573  * Drain the contents of the receive queue
574  */
575 static int
576 i_ldc_rxq_drain(ldc_chan_t *ldcp)
577 {
578 	int rv;
579 	uint64_t rx_head, rx_tail;
580 
581 	ASSERT(MUTEX_HELD(&ldcp->lock));
582 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
583 	    &(ldcp->link_state));
584 	if (rv) {
585 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
586 		    ldcp->id);
587 		return (EIO);
588 	}
589 
590 	/* flush contents by setting the head = tail */
591 	return (i_ldc_set_rx_head(ldcp, rx_tail));
592 }
593 
594 
595 /*
596  * Reset LDC state structure and its contents
597  */
598 static void
599 i_ldc_reset_state(ldc_chan_t *ldcp)
600 {
601 	ASSERT(MUTEX_HELD(&ldcp->lock));
602 	ldcp->last_msg_snt = LDC_INIT_SEQID;
603 	ldcp->last_ack_rcd = 0;
604 	ldcp->last_msg_rcd = 0;
605 	ldcp->tx_ackd_head = ldcp->tx_head;
606 	ldcp->next_vidx = 0;
607 	ldcp->hstate = 0;
608 	ldcp->tstate = TS_OPEN;
609 	ldcp->status = LDC_OPEN;
610 
611 	if (ldcp->link_state == LDC_CHANNEL_UP ||
612 	    ldcp->link_state == LDC_CHANNEL_RESET) {
613 
614 		if (ldcp->mode == LDC_MODE_RAW) {
615 			ldcp->status = LDC_UP;
616 			ldcp->tstate = TS_UP;
617 		} else {
618 			ldcp->status = LDC_READY;
619 			ldcp->tstate |= TS_LINK_READY;
620 		}
621 	}
622 }
623 
624 /*
625  * Reset a LDC channel
626  */
627 static void
628 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
629 {
630 	D1(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
631 
632 	ASSERT(MUTEX_HELD(&ldcp->lock));
633 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
634 
635 	/* reconfig Tx and Rx queues */
636 	(void) i_ldc_txq_reconf(ldcp);
637 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
638 
639 	/* Clear Tx and Rx interrupts */
640 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
641 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
642 
643 	/* Reset channel state */
644 	i_ldc_reset_state(ldcp);
645 
646 	/* Mark channel in reset */
647 	ldcp->tstate |= TS_IN_RESET;
648 }
649 
650 
651 /*
652  * Clear pending interrupts
653  */
654 static void
655 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
656 {
657 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
658 
659 	ASSERT(MUTEX_HELD(&ldcp->lock));
660 	ASSERT(cinfo->dip != NULL);
661 
662 	switch (itype) {
663 	case CNEX_TX_INTR:
664 		/* check Tx interrupt */
665 		if (ldcp->tx_intr_state)
666 			ldcp->tx_intr_state = LDC_INTR_NONE;
667 		else
668 			return;
669 		break;
670 
671 	case CNEX_RX_INTR:
672 		/* check Rx interrupt */
673 		if (ldcp->rx_intr_state)
674 			ldcp->rx_intr_state = LDC_INTR_NONE;
675 		else
676 			return;
677 		break;
678 	}
679 
680 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
681 	D2(ldcp->id,
682 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
683 	    ldcp->id, itype);
684 }
685 
686 /*
687  * Set the receive queue head
688  * Resets connection and returns an error if it fails.
689  */
690 static int
691 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
692 {
693 	int 	rv;
694 	int 	retries;
695 
696 	ASSERT(MUTEX_HELD(&ldcp->lock));
697 	for (retries = 0; retries < ldc_max_retries; retries++) {
698 
699 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
700 			return (0);
701 
702 		if (rv != H_EWOULDBLOCK)
703 			break;
704 
705 		/* wait for ldc_delay usecs */
706 		drv_usecwait(ldc_delay);
707 	}
708 
709 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
710 		ldcp->id, head);
711 	mutex_enter(&ldcp->tx_lock);
712 	i_ldc_reset(ldcp, B_TRUE);
713 	mutex_exit(&ldcp->tx_lock);
714 
715 	return (ECONNRESET);
716 }
717 
718 
719 /*
720  * Returns the tx_tail to be used for transfer
721  * Re-reads the TX queue ptrs if and only if the
722  * the cached head and tail are equal (queue is full)
723  */
724 static int
725 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
726 {
727 	int 		rv;
728 	uint64_t 	current_head, new_tail;
729 
730 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
731 	/* Read the head and tail ptrs from HV */
732 	rv = hv_ldc_tx_get_state(ldcp->id,
733 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
734 	if (rv) {
735 		cmn_err(CE_WARN,
736 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
737 		    ldcp->id);
738 		return (EIO);
739 	}
740 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
741 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
742 		    ldcp->id);
743 		return (ECONNRESET);
744 	}
745 
746 	/* In reliable mode, check against last ACKd msg */
747 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
748 		ldcp->mode == LDC_MODE_STREAM)
749 		? ldcp->tx_ackd_head : ldcp->tx_head;
750 
751 	/* increment the tail */
752 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
753 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
754 
755 	if (new_tail == current_head) {
756 		DWARN(ldcp->id,
757 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
758 		    ldcp->id);
759 		return (EWOULDBLOCK);
760 	}
761 
762 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
763 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
764 
765 	*tail = ldcp->tx_tail;
766 	return (0);
767 }
768 
769 /*
770  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
771  * and retry ldc_max_retries times before returning an error.
772  * Returns 0, EWOULDBLOCK or EIO
773  */
774 static int
775 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
776 {
777 	int		rv, retval = EWOULDBLOCK;
778 	int 		retries;
779 
780 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
781 	for (retries = 0; retries < ldc_max_retries; retries++) {
782 
783 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
784 			retval = 0;
785 			break;
786 		}
787 		if (rv != H_EWOULDBLOCK) {
788 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
789 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
790 			retval = EIO;
791 			break;
792 		}
793 
794 		/* wait for ldc_delay usecs */
795 		drv_usecwait(ldc_delay);
796 	}
797 	return (retval);
798 }
799 
800 /*
801  * Send a LDC message
802  */
803 static int
804 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
805     uint8_t ctrlmsg)
806 {
807 	int		rv;
808 	ldc_msg_t 	*pkt;
809 	uint64_t	tx_tail;
810 	uint32_t	curr_seqid = ldcp->last_msg_snt;
811 
812 	/* Obtain Tx lock */
813 	mutex_enter(&ldcp->tx_lock);
814 
815 	/* get the current tail for the message */
816 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
817 	if (rv) {
818 		DWARN(ldcp->id,
819 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
820 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
821 		    ldcp->id, pkttype, subtype, ctrlmsg);
822 		mutex_exit(&ldcp->tx_lock);
823 		return (rv);
824 	}
825 
826 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
827 	ZERO_PKT(pkt);
828 
829 	/* Initialize the packet */
830 	pkt->type = pkttype;
831 	pkt->stype = subtype;
832 	pkt->ctrl = ctrlmsg;
833 
834 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
835 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
836 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
837 		curr_seqid++;
838 		if (ldcp->mode != LDC_MODE_RAW) {
839 			pkt->seqid = curr_seqid;
840 			pkt->ackid = ldcp->last_msg_rcd;
841 		}
842 	}
843 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
844 
845 	/* initiate the send by calling into HV and set the new tail */
846 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
847 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
848 
849 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
850 	if (rv) {
851 		DWARN(ldcp->id,
852 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
853 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
854 		    ldcp->id, pkttype, subtype, ctrlmsg);
855 		mutex_exit(&ldcp->tx_lock);
856 		return (EIO);
857 	}
858 
859 	ldcp->last_msg_snt = curr_seqid;
860 	ldcp->tx_tail = tx_tail;
861 
862 	mutex_exit(&ldcp->tx_lock);
863 	return (0);
864 }
865 
866 /*
867  * Checks if packet was received in right order
868  * in the case of a reliable link.
869  * Returns 0 if in order, else EIO
870  */
871 static int
872 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
873 {
874 	/* No seqid checking for RAW mode */
875 	if (ldcp->mode == LDC_MODE_RAW)
876 		return (0);
877 
878 	/* No seqid checking for version, RTS, RTR message */
879 	if (msg->ctrl == LDC_VER ||
880 	    msg->ctrl == LDC_RTS ||
881 	    msg->ctrl == LDC_RTR)
882 		return (0);
883 
884 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
885 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
886 		DWARN(ldcp->id,
887 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
888 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
889 		    (ldcp->last_msg_rcd + 1));
890 		return (EIO);
891 	}
892 
893 	return (0);
894 }
895 
896 
897 /*
898  * Process an incoming version ctrl message
899  */
900 static int
901 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
902 {
903 	int 		rv = 0, idx = ldcp->next_vidx;
904 	ldc_msg_t 	*pkt;
905 	uint64_t	tx_tail;
906 	ldc_ver_t	*rcvd_ver;
907 
908 	/* get the received version */
909 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
910 
911 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
912 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
913 
914 	/* Obtain Tx lock */
915 	mutex_enter(&ldcp->tx_lock);
916 
917 	switch (msg->stype) {
918 	case LDC_INFO:
919 
920 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
921 			(void) i_ldc_txq_reconf(ldcp);
922 			i_ldc_reset_state(ldcp);
923 			mutex_exit(&ldcp->tx_lock);
924 			return (EAGAIN);
925 		}
926 
927 		/* get the current tail and pkt for the response */
928 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
929 		if (rv != 0) {
930 			DWARN(ldcp->id,
931 			    "i_ldc_process_VER: (0x%llx) err sending "
932 			    "version ACK/NACK\n", ldcp->id);
933 			i_ldc_reset(ldcp, B_TRUE);
934 			mutex_exit(&ldcp->tx_lock);
935 			return (ECONNRESET);
936 		}
937 
938 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
939 		ZERO_PKT(pkt);
940 
941 		/* initialize the packet */
942 		pkt->type = LDC_CTRL;
943 		pkt->ctrl = LDC_VER;
944 
945 		for (;;) {
946 
947 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
948 			    rcvd_ver->major, rcvd_ver->minor,
949 			    ldc_versions[idx].major, ldc_versions[idx].minor);
950 
951 			if (rcvd_ver->major == ldc_versions[idx].major) {
952 				/* major version match - ACK version */
953 				pkt->stype = LDC_ACK;
954 
955 				/*
956 				 * lower minor version to the one this endpt
957 				 * supports, if necessary
958 				 */
959 				if (rcvd_ver->minor > ldc_versions[idx].minor)
960 					rcvd_ver->minor =
961 						ldc_versions[idx].minor;
962 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
963 
964 				break;
965 			}
966 
967 			if (rcvd_ver->major > ldc_versions[idx].major) {
968 
969 				D1(ldcp->id, "i_ldc_process_VER: using next"
970 				    " lower idx=%d, v%u.%u\n", idx,
971 				    ldc_versions[idx].major,
972 				    ldc_versions[idx].minor);
973 
974 				/* nack with next lower version */
975 				pkt->stype = LDC_NACK;
976 				bcopy(&ldc_versions[idx], pkt->udata,
977 				    sizeof (ldc_versions[idx]));
978 				ldcp->next_vidx = idx;
979 				break;
980 			}
981 
982 			/* next major version */
983 			idx++;
984 
985 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
986 
987 			if (idx == LDC_NUM_VERS) {
988 				/* no version match - send NACK */
989 				pkt->stype = LDC_NACK;
990 				bzero(pkt->udata, sizeof (ldc_ver_t));
991 				ldcp->next_vidx = 0;
992 				break;
993 			}
994 		}
995 
996 		/* initiate the send by calling into HV and set the new tail */
997 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
998 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
999 
1000 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1001 		if (rv == 0) {
1002 			ldcp->tx_tail = tx_tail;
1003 			if (pkt->stype == LDC_ACK) {
1004 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1005 				    " version ACK\n", ldcp->id);
1006 				/* Save the ACK'd version */
1007 				ldcp->version.major = rcvd_ver->major;
1008 				ldcp->version.minor = rcvd_ver->minor;
1009 				ldcp->hstate |= TS_RCVD_VER;
1010 				ldcp->tstate |= TS_VER_DONE;
1011 				DWARN(DBG_ALL_LDCS,
1012 				    "(0x%llx) Sent ACK, "
1013 				    "Agreed on version v%u.%u\n",
1014 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1015 			}
1016 		} else {
1017 			DWARN(ldcp->id,
1018 			    "i_ldc_process_VER: (0x%llx) error sending "
1019 			    "ACK/NACK\n", ldcp->id);
1020 			i_ldc_reset(ldcp, B_TRUE);
1021 			mutex_exit(&ldcp->tx_lock);
1022 			return (ECONNRESET);
1023 		}
1024 
1025 		break;
1026 
1027 	case LDC_ACK:
1028 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1029 			if (ldcp->version.major != rcvd_ver->major ||
1030 				ldcp->version.minor != rcvd_ver->minor) {
1031 
1032 				/* mismatched version - reset connection */
1033 				DWARN(ldcp->id,
1034 					"i_ldc_process_VER: (0x%llx) recvd"
1035 					" ACK ver != sent ACK ver\n", ldcp->id);
1036 				i_ldc_reset(ldcp, B_TRUE);
1037 				mutex_exit(&ldcp->tx_lock);
1038 				return (ECONNRESET);
1039 			}
1040 		} else {
1041 			/* SUCCESS - we have agreed on a version */
1042 			ldcp->version.major = rcvd_ver->major;
1043 			ldcp->version.minor = rcvd_ver->minor;
1044 			ldcp->tstate |= TS_VER_DONE;
1045 		}
1046 
1047 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1048 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1049 
1050 		/* initiate RTS-RTR-RDX handshake */
1051 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1052 		if (rv) {
1053 			DWARN(ldcp->id,
1054 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1055 			    ldcp->id);
1056 			i_ldc_reset(ldcp, B_TRUE);
1057 			mutex_exit(&ldcp->tx_lock);
1058 			return (ECONNRESET);
1059 		}
1060 
1061 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1062 		ZERO_PKT(pkt);
1063 
1064 		pkt->type = LDC_CTRL;
1065 		pkt->stype = LDC_INFO;
1066 		pkt->ctrl = LDC_RTS;
1067 		pkt->env = ldcp->mode;
1068 		if (ldcp->mode != LDC_MODE_RAW)
1069 			pkt->seqid = LDC_INIT_SEQID;
1070 
1071 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1072 
1073 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1074 
1075 		/* initiate the send by calling into HV and set the new tail */
1076 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1077 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1078 
1079 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1080 		if (rv) {
1081 			D2(ldcp->id,
1082 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1083 			    ldcp->id);
1084 			i_ldc_reset(ldcp, B_TRUE);
1085 			mutex_exit(&ldcp->tx_lock);
1086 			return (ECONNRESET);
1087 		}
1088 
1089 		ldcp->tx_tail = tx_tail;
1090 		ldcp->hstate |= TS_SENT_RTS;
1091 
1092 		break;
1093 
1094 	case LDC_NACK:
1095 		/* check if version in NACK is zero */
1096 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1097 			/* version handshake failure */
1098 			DWARN(DBG_ALL_LDCS,
1099 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1100 			    ldcp->id);
1101 			i_ldc_reset(ldcp, B_TRUE);
1102 			mutex_exit(&ldcp->tx_lock);
1103 			return (ECONNRESET);
1104 		}
1105 
1106 		/* get the current tail and pkt for the response */
1107 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1108 		if (rv != 0) {
1109 			cmn_err(CE_NOTE,
1110 			    "i_ldc_process_VER: (0x%lx) err sending "
1111 			    "version ACK/NACK\n", ldcp->id);
1112 			i_ldc_reset(ldcp, B_TRUE);
1113 			mutex_exit(&ldcp->tx_lock);
1114 			return (ECONNRESET);
1115 		}
1116 
1117 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1118 		ZERO_PKT(pkt);
1119 
1120 		/* initialize the packet */
1121 		pkt->type = LDC_CTRL;
1122 		pkt->ctrl = LDC_VER;
1123 		pkt->stype = LDC_INFO;
1124 
1125 		/* check ver in NACK msg has a match */
1126 		for (;;) {
1127 			if (rcvd_ver->major == ldc_versions[idx].major) {
1128 				/*
1129 				 * major version match - resubmit request
1130 				 * if lower minor version to the one this endpt
1131 				 * supports, if necessary
1132 				 */
1133 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1134 					rcvd_ver->minor =
1135 						ldc_versions[idx].minor;
1136 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1137 				break;
1138 
1139 			}
1140 
1141 			if (rcvd_ver->major > ldc_versions[idx].major) {
1142 
1143 				D1(ldcp->id, "i_ldc_process_VER: using next"
1144 				    " lower idx=%d, v%u.%u\n", idx,
1145 				    ldc_versions[idx].major,
1146 				    ldc_versions[idx].minor);
1147 
1148 				/* send next lower version */
1149 				bcopy(&ldc_versions[idx], pkt->udata,
1150 				    sizeof (ldc_versions[idx]));
1151 				ldcp->next_vidx = idx;
1152 				break;
1153 			}
1154 
1155 			/* next version */
1156 			idx++;
1157 
1158 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1159 
1160 			if (idx == LDC_NUM_VERS) {
1161 				/* no version match - terminate */
1162 				ldcp->next_vidx = 0;
1163 				mutex_exit(&ldcp->tx_lock);
1164 				return (ECONNRESET);
1165 			}
1166 		}
1167 
1168 		/* initiate the send by calling into HV and set the new tail */
1169 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1170 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1171 
1172 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1173 		if (rv == 0) {
1174 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1175 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1176 			    ldc_versions[idx].minor);
1177 			ldcp->tx_tail = tx_tail;
1178 		} else {
1179 			cmn_err(CE_NOTE,
1180 			    "i_ldc_process_VER: (0x%lx) error sending version"
1181 			    "INFO\n", ldcp->id);
1182 			i_ldc_reset(ldcp, B_TRUE);
1183 			mutex_exit(&ldcp->tx_lock);
1184 			return (ECONNRESET);
1185 		}
1186 
1187 		break;
1188 	}
1189 
1190 	mutex_exit(&ldcp->tx_lock);
1191 	return (rv);
1192 }
1193 
1194 
1195 /*
1196  * Process an incoming RTS ctrl message
1197  */
1198 static int
1199 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1200 {
1201 	int 		rv = 0;
1202 	ldc_msg_t 	*pkt;
1203 	uint64_t	tx_tail;
1204 	boolean_t	sent_NACK = B_FALSE;
1205 
1206 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1207 
1208 	switch (msg->stype) {
1209 	case LDC_NACK:
1210 		DWARN(ldcp->id,
1211 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1212 		    ldcp->id);
1213 
1214 		/* Reset the channel -- as we cannot continue */
1215 		mutex_enter(&ldcp->tx_lock);
1216 		i_ldc_reset(ldcp, B_TRUE);
1217 		mutex_exit(&ldcp->tx_lock);
1218 		rv = ECONNRESET;
1219 		break;
1220 
1221 	case LDC_INFO:
1222 
1223 		/* check mode */
1224 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1225 			cmn_err(CE_NOTE,
1226 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1227 			    ldcp->id);
1228 			/*
1229 			 * send NACK in response to MODE message
1230 			 * get the current tail for the response
1231 			 */
1232 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1233 			if (rv) {
1234 				/* if cannot send NACK - reset channel */
1235 				mutex_enter(&ldcp->tx_lock);
1236 				i_ldc_reset(ldcp, B_TRUE);
1237 				mutex_exit(&ldcp->tx_lock);
1238 				rv = ECONNRESET;
1239 				break;
1240 			}
1241 			sent_NACK = B_TRUE;
1242 		}
1243 		break;
1244 	default:
1245 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1246 		    ldcp->id);
1247 		mutex_enter(&ldcp->tx_lock);
1248 		i_ldc_reset(ldcp, B_TRUE);
1249 		mutex_exit(&ldcp->tx_lock);
1250 		rv = ECONNRESET;
1251 		break;
1252 	}
1253 
1254 	/*
1255 	 * If either the connection was reset (when rv != 0) or
1256 	 * a NACK was sent, we return. In the case of a NACK
1257 	 * we dont want to consume the packet that came in but
1258 	 * not record that we received the RTS
1259 	 */
1260 	if (rv || sent_NACK)
1261 		return (rv);
1262 
1263 	/* record RTS received */
1264 	ldcp->hstate |= TS_RCVD_RTS;
1265 
1266 	/* store initial SEQID info */
1267 	ldcp->last_msg_snt = msg->seqid;
1268 
1269 	/* Obtain Tx lock */
1270 	mutex_enter(&ldcp->tx_lock);
1271 
1272 	/* get the current tail for the response */
1273 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1274 	if (rv != 0) {
1275 		cmn_err(CE_NOTE,
1276 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1277 		    ldcp->id);
1278 		i_ldc_reset(ldcp, B_TRUE);
1279 		mutex_exit(&ldcp->tx_lock);
1280 		return (ECONNRESET);
1281 	}
1282 
1283 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1284 	ZERO_PKT(pkt);
1285 
1286 	/* initialize the packet */
1287 	pkt->type = LDC_CTRL;
1288 	pkt->stype = LDC_INFO;
1289 	pkt->ctrl = LDC_RTR;
1290 	pkt->env = ldcp->mode;
1291 	if (ldcp->mode != LDC_MODE_RAW)
1292 		pkt->seqid = LDC_INIT_SEQID;
1293 
1294 	ldcp->last_msg_rcd = msg->seqid;
1295 
1296 	/* initiate the send by calling into HV and set the new tail */
1297 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1298 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1299 
1300 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1301 	if (rv == 0) {
1302 		D2(ldcp->id,
1303 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1304 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1305 
1306 		ldcp->tx_tail = tx_tail;
1307 		ldcp->hstate |= TS_SENT_RTR;
1308 
1309 	} else {
1310 		cmn_err(CE_NOTE,
1311 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1312 		    ldcp->id);
1313 		i_ldc_reset(ldcp, B_TRUE);
1314 		mutex_exit(&ldcp->tx_lock);
1315 		return (ECONNRESET);
1316 	}
1317 
1318 	mutex_exit(&ldcp->tx_lock);
1319 	return (0);
1320 }
1321 
1322 /*
1323  * Process an incoming RTR ctrl message
1324  */
1325 static int
1326 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1327 {
1328 	int 		rv = 0;
1329 	boolean_t	sent_NACK = B_FALSE;
1330 
1331 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1332 
1333 	switch (msg->stype) {
1334 	case LDC_NACK:
1335 		/* RTR NACK received */
1336 		DWARN(ldcp->id,
1337 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1338 		    ldcp->id);
1339 
1340 		/* Reset the channel -- as we cannot continue */
1341 		mutex_enter(&ldcp->tx_lock);
1342 		i_ldc_reset(ldcp, B_TRUE);
1343 		mutex_exit(&ldcp->tx_lock);
1344 		rv = ECONNRESET;
1345 
1346 		break;
1347 
1348 	case LDC_INFO:
1349 
1350 		/* check mode */
1351 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1352 			DWARN(ldcp->id,
1353 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1354 			    "expecting 0x%x, got 0x%x\n",
1355 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1356 			/*
1357 			 * send NACK in response to MODE message
1358 			 * get the current tail for the response
1359 			 */
1360 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1361 			if (rv) {
1362 				/* if cannot send NACK - reset channel */
1363 				mutex_enter(&ldcp->tx_lock);
1364 				i_ldc_reset(ldcp, B_TRUE);
1365 				mutex_exit(&ldcp->tx_lock);
1366 				rv = ECONNRESET;
1367 				break;
1368 			}
1369 			sent_NACK = B_TRUE;
1370 		}
1371 		break;
1372 
1373 	default:
1374 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1375 		    ldcp->id);
1376 
1377 		/* Reset the channel -- as we cannot continue */
1378 		mutex_enter(&ldcp->tx_lock);
1379 		i_ldc_reset(ldcp, B_TRUE);
1380 		mutex_exit(&ldcp->tx_lock);
1381 		rv = ECONNRESET;
1382 		break;
1383 	}
1384 
1385 	/*
1386 	 * If either the connection was reset (when rv != 0) or
1387 	 * a NACK was sent, we return. In the case of a NACK
1388 	 * we dont want to consume the packet that came in but
1389 	 * not record that we received the RTR
1390 	 */
1391 	if (rv || sent_NACK)
1392 		return (rv);
1393 
1394 	ldcp->last_msg_snt = msg->seqid;
1395 	ldcp->hstate |= TS_RCVD_RTR;
1396 
1397 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1398 	if (rv) {
1399 		cmn_err(CE_NOTE,
1400 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1401 		    ldcp->id);
1402 		mutex_enter(&ldcp->tx_lock);
1403 		i_ldc_reset(ldcp, B_TRUE);
1404 		mutex_exit(&ldcp->tx_lock);
1405 		return (ECONNRESET);
1406 	}
1407 	D2(ldcp->id,
1408 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1409 
1410 	ldcp->hstate |= TS_SENT_RDX;
1411 	ldcp->tstate |= TS_HSHAKE_DONE;
1412 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1413 		ldcp->status = LDC_UP;
1414 
1415 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1416 
1417 	return (0);
1418 }
1419 
1420 
1421 /*
1422  * Process an incoming RDX ctrl message
1423  */
1424 static int
1425 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1426 {
1427 	int	rv = 0;
1428 
1429 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1430 
1431 	switch (msg->stype) {
1432 	case LDC_NACK:
1433 		/* RDX NACK received */
1434 		DWARN(ldcp->id,
1435 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1436 		    ldcp->id);
1437 
1438 		/* Reset the channel -- as we cannot continue */
1439 		mutex_enter(&ldcp->tx_lock);
1440 		i_ldc_reset(ldcp, B_TRUE);
1441 		mutex_exit(&ldcp->tx_lock);
1442 		rv = ECONNRESET;
1443 
1444 		break;
1445 
1446 	case LDC_INFO:
1447 
1448 		/*
1449 		 * if channel is UP and a RDX received after data transmission
1450 		 * has commenced it is an error
1451 		 */
1452 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1453 			DWARN(DBG_ALL_LDCS,
1454 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1455 			    " - LDC reset\n", ldcp->id);
1456 			mutex_enter(&ldcp->tx_lock);
1457 			i_ldc_reset(ldcp, B_TRUE);
1458 			mutex_exit(&ldcp->tx_lock);
1459 			return (ECONNRESET);
1460 		}
1461 
1462 		ldcp->hstate |= TS_RCVD_RDX;
1463 		ldcp->tstate |= TS_HSHAKE_DONE;
1464 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1465 			ldcp->status = LDC_UP;
1466 
1467 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1468 		break;
1469 
1470 	default:
1471 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1472 		    ldcp->id);
1473 
1474 		/* Reset the channel -- as we cannot continue */
1475 		mutex_enter(&ldcp->tx_lock);
1476 		i_ldc_reset(ldcp, B_TRUE);
1477 		mutex_exit(&ldcp->tx_lock);
1478 		rv = ECONNRESET;
1479 		break;
1480 	}
1481 
1482 	return (rv);
1483 }
1484 
1485 /*
1486  * Process an incoming ACK for a data packet
1487  */
1488 static int
1489 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1490 {
1491 	int		rv;
1492 	uint64_t 	tx_head;
1493 	ldc_msg_t	*pkt;
1494 
1495 	/* Obtain Tx lock */
1496 	mutex_enter(&ldcp->tx_lock);
1497 
1498 	/*
1499 	 * Read the current Tx head and tail
1500 	 */
1501 	rv = hv_ldc_tx_get_state(ldcp->id,
1502 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1503 	if (rv != 0) {
1504 		cmn_err(CE_WARN,
1505 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1506 		    ldcp->id);
1507 
1508 		/* Reset the channel -- as we cannot continue */
1509 		i_ldc_reset(ldcp, B_TRUE);
1510 		mutex_exit(&ldcp->tx_lock);
1511 		return (ECONNRESET);
1512 	}
1513 
1514 	/*
1515 	 * loop from where the previous ACK location was to the
1516 	 * current head location. This is how far the HV has
1517 	 * actually send pkts. Pkts between head and tail are
1518 	 * yet to be sent by HV.
1519 	 */
1520 	tx_head = ldcp->tx_ackd_head;
1521 	for (;;) {
1522 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1523 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1524 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1525 
1526 		if (pkt->seqid == msg->ackid) {
1527 			D2(ldcp->id,
1528 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1529 			    ldcp->id);
1530 			ldcp->last_ack_rcd = msg->ackid;
1531 			ldcp->tx_ackd_head = tx_head;
1532 			break;
1533 		}
1534 		if (tx_head == ldcp->tx_head) {
1535 			/* could not find packet */
1536 			DWARN(ldcp->id,
1537 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1538 			    ldcp->id);
1539 
1540 			/* Reset the channel -- as we cannot continue */
1541 			i_ldc_reset(ldcp, B_TRUE);
1542 			mutex_exit(&ldcp->tx_lock);
1543 			return (ECONNRESET);
1544 		}
1545 	}
1546 
1547 	mutex_exit(&ldcp->tx_lock);
1548 	return (0);
1549 }
1550 
1551 /*
1552  * Process incoming control message
1553  * Return 0 - session can continue
1554  *        EAGAIN - reprocess packet - state was changed
1555  *	  ECONNRESET - channel was reset
1556  */
1557 static int
1558 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1559 {
1560 	int 		rv = 0;
1561 
1562 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1563 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1564 
1565 	switch (ldcp->tstate & ~TS_IN_RESET) {
1566 
1567 	case TS_OPEN:
1568 	case TS_READY:
1569 
1570 		switch (msg->ctrl & LDC_CTRL_MASK) {
1571 		case LDC_VER:
1572 			/* process version message */
1573 			rv = i_ldc_process_VER(ldcp, msg);
1574 			break;
1575 		default:
1576 			DWARN(ldcp->id,
1577 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1578 			    "tstate=0x%x\n", ldcp->id,
1579 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1580 			break;
1581 		}
1582 
1583 		break;
1584 
1585 	case TS_VREADY:
1586 
1587 		switch (msg->ctrl & LDC_CTRL_MASK) {
1588 		case LDC_VER:
1589 			/* process version message */
1590 			rv = i_ldc_process_VER(ldcp, msg);
1591 			break;
1592 		case LDC_RTS:
1593 			/* process RTS message */
1594 			rv = i_ldc_process_RTS(ldcp, msg);
1595 			break;
1596 		case LDC_RTR:
1597 			/* process RTR message */
1598 			rv = i_ldc_process_RTR(ldcp, msg);
1599 			break;
1600 		case LDC_RDX:
1601 			/* process RDX message */
1602 			rv = i_ldc_process_RDX(ldcp, msg);
1603 			break;
1604 		default:
1605 			DWARN(ldcp->id,
1606 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1607 			    "tstate=0x%x\n", ldcp->id,
1608 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1609 			break;
1610 		}
1611 
1612 		break;
1613 
1614 	case TS_UP:
1615 
1616 		switch (msg->ctrl & LDC_CTRL_MASK) {
1617 		case LDC_VER:
1618 			DWARN(ldcp->id,
1619 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1620 			    "- LDC reset\n", ldcp->id);
1621 			/* peer is redoing version negotiation */
1622 			mutex_enter(&ldcp->tx_lock);
1623 			(void) i_ldc_txq_reconf(ldcp);
1624 			i_ldc_reset_state(ldcp);
1625 			mutex_exit(&ldcp->tx_lock);
1626 			rv = EAGAIN;
1627 			break;
1628 
1629 		case LDC_RDX:
1630 			/* process RDX message */
1631 			rv = i_ldc_process_RDX(ldcp, msg);
1632 			break;
1633 
1634 		default:
1635 			DWARN(ldcp->id,
1636 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1637 			    "tstate=0x%x\n", ldcp->id,
1638 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1639 			break;
1640 		}
1641 	}
1642 
1643 	return (rv);
1644 }
1645 
1646 /*
1647  * Register channel with the channel nexus
1648  */
1649 static int
1650 i_ldc_register_channel(ldc_chan_t *ldcp)
1651 {
1652 	int		rv = 0;
1653 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1654 
1655 	if (cinfo->dip == NULL) {
1656 		DWARN(ldcp->id,
1657 		    "i_ldc_register_channel: cnex has not registered\n");
1658 		return (EAGAIN);
1659 	}
1660 
1661 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1662 	if (rv) {
1663 		DWARN(ldcp->id,
1664 		    "i_ldc_register_channel: cannot register channel\n");
1665 		return (rv);
1666 	}
1667 
1668 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1669 	    i_ldc_tx_hdlr, ldcp, NULL);
1670 	if (rv) {
1671 		DWARN(ldcp->id,
1672 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1673 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1674 		return (rv);
1675 	}
1676 
1677 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1678 	    i_ldc_rx_hdlr, ldcp, NULL);
1679 	if (rv) {
1680 		DWARN(ldcp->id,
1681 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1682 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1683 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1684 		return (rv);
1685 	}
1686 
1687 	ldcp->tstate |= TS_CNEX_RDY;
1688 
1689 	return (0);
1690 }
1691 
1692 /*
1693  * Unregister a channel with the channel nexus
1694  */
1695 static int
1696 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1697 {
1698 	int		rv = 0;
1699 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1700 
1701 	if (cinfo->dip == NULL) {
1702 		DWARN(ldcp->id,
1703 		    "i_ldc_unregister_channel: cnex has not registered\n");
1704 		return (EAGAIN);
1705 	}
1706 
1707 	if (ldcp->tstate & TS_CNEX_RDY) {
1708 
1709 		/* Remove the Rx interrupt */
1710 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1711 		if (rv) {
1712 			if (rv != EAGAIN) {
1713 				DWARN(ldcp->id,
1714 				    "i_ldc_unregister_channel: err removing "
1715 				    "Rx intr\n");
1716 				return (rv);
1717 			}
1718 
1719 			/*
1720 			 * If interrupts are pending and handler has
1721 			 * finished running, clear interrupt and try
1722 			 * again
1723 			 */
1724 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1725 				return (rv);
1726 
1727 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1728 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1729 			    CNEX_RX_INTR);
1730 			if (rv) {
1731 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1732 				    "err removing Rx interrupt\n");
1733 				return (rv);
1734 			}
1735 		}
1736 
1737 		/* Remove the Tx interrupt */
1738 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1739 		if (rv) {
1740 			DWARN(ldcp->id,
1741 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1742 			return (rv);
1743 		}
1744 
1745 		/* Unregister the channel */
1746 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1747 		if (rv) {
1748 			DWARN(ldcp->id,
1749 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1750 			return (rv);
1751 		}
1752 
1753 		ldcp->tstate &= ~TS_CNEX_RDY;
1754 	}
1755 
1756 	return (0);
1757 }
1758 
1759 
1760 /*
1761  * LDC transmit interrupt handler
1762  *    triggered for chanel up/down/reset events
1763  *    and Tx queue content changes
1764  */
1765 static uint_t
1766 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1767 {
1768 	_NOTE(ARGUNUSED(arg2))
1769 
1770 	int 		rv;
1771 	ldc_chan_t 	*ldcp;
1772 	boolean_t 	notify_client = B_FALSE;
1773 	uint64_t	notify_event = 0, link_state;
1774 
1775 	/* Get the channel for which interrupt was received */
1776 	ASSERT(arg1 != NULL);
1777 	ldcp = (ldc_chan_t *)arg1;
1778 
1779 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1780 	    ldcp->id, ldcp);
1781 
1782 	/* Lock channel */
1783 	mutex_enter(&ldcp->lock);
1784 
1785 	/* Obtain Tx lock */
1786 	mutex_enter(&ldcp->tx_lock);
1787 
1788 	/* mark interrupt as pending */
1789 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
1790 
1791 	/* save current link state */
1792 	link_state = ldcp->link_state;
1793 
1794 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1795 	    &ldcp->link_state);
1796 	if (rv) {
1797 		cmn_err(CE_WARN,
1798 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1799 		    ldcp->id, rv);
1800 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1801 		mutex_exit(&ldcp->tx_lock);
1802 		mutex_exit(&ldcp->lock);
1803 		return (DDI_INTR_CLAIMED);
1804 	}
1805 
1806 	/*
1807 	 * reset the channel state if the channel went down
1808 	 * (other side unconfigured queue) or channel was reset
1809 	 * (other side reconfigured its queue)
1810 	 */
1811 	if (link_state != ldcp->link_state &&
1812 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
1813 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1814 		i_ldc_reset(ldcp, B_FALSE);
1815 		notify_client = B_TRUE;
1816 		notify_event = LDC_EVT_DOWN;
1817 	}
1818 
1819 	if (link_state != ldcp->link_state &&
1820 	    ldcp->link_state == LDC_CHANNEL_RESET) {
1821 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1822 		i_ldc_reset(ldcp, B_FALSE);
1823 		notify_client = B_TRUE;
1824 		notify_event = LDC_EVT_RESET;
1825 	}
1826 
1827 	if (link_state != ldcp->link_state &&
1828 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
1829 	    ldcp->link_state == LDC_CHANNEL_UP) {
1830 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1831 		notify_client = B_TRUE;
1832 		notify_event = LDC_EVT_RESET;
1833 		ldcp->tstate |= TS_LINK_READY;
1834 		ldcp->status = LDC_READY;
1835 	}
1836 
1837 	/* if callbacks are disabled, do not notify */
1838 	if (!ldcp->cb_enabled)
1839 		notify_client = B_FALSE;
1840 
1841 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1842 
1843 	if (notify_client) {
1844 		ldcp->cb_inprogress = B_TRUE;
1845 		mutex_exit(&ldcp->tx_lock);
1846 		mutex_exit(&ldcp->lock);
1847 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1848 		if (rv) {
1849 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1850 			    "failure", ldcp->id);
1851 		}
1852 		mutex_enter(&ldcp->lock);
1853 		ldcp->cb_inprogress = B_FALSE;
1854 	}
1855 
1856 	mutex_exit(&ldcp->lock);
1857 
1858 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1859 
1860 	return (DDI_INTR_CLAIMED);
1861 }
1862 
1863 /*
1864  * LDC receive interrupt handler
1865  *    triggered for channel with data pending to read
1866  *    i.e. Rx queue content changes
1867  */
1868 static uint_t
1869 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1870 {
1871 	_NOTE(ARGUNUSED(arg2))
1872 
1873 	int		rv;
1874 	uint64_t 	rx_head, rx_tail;
1875 	ldc_msg_t 	*msg;
1876 	ldc_chan_t 	*ldcp;
1877 	boolean_t 	notify_client = B_FALSE;
1878 	uint64_t	notify_event = 0;
1879 	uint64_t	link_state, first_fragment = 0;
1880 
1881 
1882 	/* Get the channel for which interrupt was received */
1883 	if (arg1 == NULL) {
1884 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1885 		return (DDI_INTR_UNCLAIMED);
1886 	}
1887 
1888 	ldcp = (ldc_chan_t *)arg1;
1889 
1890 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1891 	    ldcp->id, ldcp);
1892 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1893 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1894 	    ldcp->link_state);
1895 
1896 	/* Lock channel */
1897 	mutex_enter(&ldcp->lock);
1898 
1899 	/* mark interrupt as pending */
1900 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1901 
1902 	/*
1903 	 * Read packet(s) from the queue
1904 	 */
1905 	for (;;) {
1906 
1907 		link_state = ldcp->link_state;
1908 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1909 		    &ldcp->link_state);
1910 		if (rv) {
1911 			cmn_err(CE_WARN,
1912 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1913 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1914 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1915 			mutex_exit(&ldcp->lock);
1916 			return (DDI_INTR_CLAIMED);
1917 		}
1918 
1919 		/*
1920 		 * reset the channel state if the channel went down
1921 		 * (other side unconfigured queue) or channel was reset
1922 		 * (other side reconfigured its queue)
1923 		 */
1924 
1925 		if (link_state != ldcp->link_state) {
1926 
1927 			switch (ldcp->link_state) {
1928 			case LDC_CHANNEL_DOWN:
1929 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1930 				    "link down\n", ldcp->id);
1931 				mutex_enter(&ldcp->tx_lock);
1932 				i_ldc_reset(ldcp, B_FALSE);
1933 				mutex_exit(&ldcp->tx_lock);
1934 				notify_client = B_TRUE;
1935 				notify_event = LDC_EVT_DOWN;
1936 				goto loop_exit;
1937 
1938 			case LDC_CHANNEL_UP:
1939 				D1(ldcp->id, "i_ldc_rx_hdlr: "
1940 				    "channel link up\n", ldcp->id);
1941 
1942 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
1943 					notify_client = B_TRUE;
1944 					notify_event = LDC_EVT_RESET;
1945 					ldcp->tstate |= TS_LINK_READY;
1946 					ldcp->status = LDC_READY;
1947 				}
1948 				break;
1949 
1950 			case LDC_CHANNEL_RESET:
1951 			default:
1952 #ifdef DEBUG
1953 force_reset:
1954 #endif
1955 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1956 				    "link reset\n", ldcp->id);
1957 				mutex_enter(&ldcp->tx_lock);
1958 				i_ldc_reset(ldcp, B_FALSE);
1959 				mutex_exit(&ldcp->tx_lock);
1960 				notify_client = B_TRUE;
1961 				notify_event = LDC_EVT_RESET;
1962 				break;
1963 			}
1964 		}
1965 
1966 #ifdef DEBUG
1967 		if (LDC_INJECT_RESET(ldcp))
1968 			goto force_reset;
1969 #endif
1970 
1971 		if (rx_head == rx_tail) {
1972 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1973 			    ldcp->id);
1974 			break;
1975 		}
1976 
1977 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1978 		    rx_head, rx_tail);
1979 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1980 		    ldcp->rx_q_va + rx_head);
1981 
1982 		/* get the message */
1983 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1984 
1985 		/* if channel is in RAW mode or data pkt, notify and return */
1986 		if (ldcp->mode == LDC_MODE_RAW) {
1987 			notify_client = B_TRUE;
1988 			notify_event |= LDC_EVT_READ;
1989 			break;
1990 		}
1991 
1992 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1993 
1994 			/* discard packet if channel is not up */
1995 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
1996 
1997 				/* move the head one position */
1998 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1999 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2000 
2001 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2002 					break;
2003 
2004 				continue;
2005 			} else {
2006 				if ((ldcp->tstate & TS_IN_RESET) == 0)
2007 					notify_client = B_TRUE;
2008 				notify_event |= LDC_EVT_READ;
2009 				break;
2010 			}
2011 		}
2012 
2013 		/* Check the sequence ID for the message received */
2014 		rv = i_ldc_check_seqid(ldcp, msg);
2015 		if (rv != 0) {
2016 
2017 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
2018 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
2019 
2020 			/* Reset last_msg_rcd to start of message */
2021 			if (first_fragment != 0) {
2022 				ldcp->last_msg_rcd = first_fragment - 1;
2023 				first_fragment = 0;
2024 			}
2025 
2026 			/*
2027 			 * Send a NACK due to seqid mismatch
2028 			 */
2029 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
2030 			    (msg->ctrl & LDC_CTRL_MASK));
2031 
2032 			if (rv) {
2033 				cmn_err(CE_NOTE,
2034 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
2035 				    "CTRL/NACK msg\n", ldcp->id);
2036 
2037 				/* if cannot send NACK - reset channel */
2038 				mutex_enter(&ldcp->tx_lock);
2039 				i_ldc_reset(ldcp, B_TRUE);
2040 				mutex_exit(&ldcp->tx_lock);
2041 				rv = ECONNRESET;
2042 				break;
2043 			}
2044 
2045 			/* purge receive queue */
2046 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2047 			break;
2048 		}
2049 
2050 		/* record the message ID */
2051 		ldcp->last_msg_rcd = msg->seqid;
2052 
2053 		/* process control messages */
2054 		if (msg->type & LDC_CTRL) {
2055 			/* save current internal state */
2056 			uint64_t tstate = ldcp->tstate;
2057 
2058 			rv = i_ldc_ctrlmsg(ldcp, msg);
2059 			if (rv == EAGAIN) {
2060 				/* re-process pkt - state was adjusted */
2061 				continue;
2062 			}
2063 			if (rv == ECONNRESET) {
2064 				notify_client = B_TRUE;
2065 				notify_event = LDC_EVT_RESET;
2066 				break;
2067 			}
2068 
2069 			/*
2070 			 * control message processing was successful
2071 			 * channel transitioned to ready for communication
2072 			 */
2073 			if (rv == 0 && ldcp->tstate == TS_UP &&
2074 			    (tstate & ~TS_IN_RESET) !=
2075 			    (ldcp->tstate & ~TS_IN_RESET)) {
2076 				notify_client = B_TRUE;
2077 				notify_event = LDC_EVT_UP;
2078 			}
2079 		}
2080 
2081 		/* process data ACKs */
2082 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2083 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2084 				notify_client = B_TRUE;
2085 				notify_event = LDC_EVT_RESET;
2086 				break;
2087 			}
2088 		}
2089 
2090 		/* move the head one position */
2091 		rx_head = (rx_head + LDC_PACKET_SIZE) %
2092 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2093 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2094 			notify_client = B_TRUE;
2095 			notify_event = LDC_EVT_RESET;
2096 			break;
2097 		}
2098 
2099 	} /* for */
2100 
2101 loop_exit:
2102 
2103 	/* if callbacks are disabled, do not notify */
2104 	if (!ldcp->cb_enabled)
2105 		notify_client = B_FALSE;
2106 
2107 	/*
2108 	 * If there are data packets in the queue, the ldc_read will
2109 	 * clear interrupts after draining the queue, else clear interrupts
2110 	 */
2111 	if ((notify_event & LDC_EVT_READ) == 0) {
2112 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2113 	} else
2114 		ldcp->rx_intr_state = LDC_INTR_PEND;
2115 
2116 
2117 	if (notify_client) {
2118 		ldcp->cb_inprogress = B_TRUE;
2119 		mutex_exit(&ldcp->lock);
2120 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2121 		if (rv) {
2122 			DWARN(ldcp->id,
2123 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
2124 			    ldcp->id);
2125 		}
2126 		mutex_enter(&ldcp->lock);
2127 		ldcp->cb_inprogress = B_FALSE;
2128 	}
2129 
2130 	mutex_exit(&ldcp->lock);
2131 
2132 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
2133 	return (DDI_INTR_CLAIMED);
2134 }
2135 
2136 
2137 /* -------------------------------------------------------------------------- */
2138 
2139 /*
2140  * LDC API functions
2141  */
2142 
2143 /*
2144  * Initialize the channel. Allocate internal structure and memory for
2145  * TX/RX queues, and initialize locks.
2146  */
2147 int
2148 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2149 {
2150 	ldc_chan_t 	*ldcp;
2151 	int		rv, exit_val;
2152 	uint64_t	ra_base, nentries;
2153 	uint64_t	qlen;
2154 
2155 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2156 
2157 	if (attr == NULL) {
2158 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2159 		return (EINVAL);
2160 	}
2161 	if (handle == NULL) {
2162 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2163 		return (EINVAL);
2164 	}
2165 
2166 	/* check if channel is valid */
2167 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2168 	if (rv == H_ECHANNEL) {
2169 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2170 		return (EINVAL);
2171 	}
2172 
2173 	/* check if the channel has already been initialized */
2174 	mutex_enter(&ldcssp->lock);
2175 	ldcp = ldcssp->chan_list;
2176 	while (ldcp != NULL) {
2177 		if (ldcp->id == id) {
2178 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2179 			    id);
2180 			mutex_exit(&ldcssp->lock);
2181 			return (EADDRINUSE);
2182 		}
2183 		ldcp = ldcp->next;
2184 	}
2185 	mutex_exit(&ldcssp->lock);
2186 
2187 	ASSERT(ldcp == NULL);
2188 
2189 	*handle = 0;
2190 
2191 	/* Allocate an ldcp structure */
2192 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2193 
2194 	/*
2195 	 * Initialize the channel and Tx lock
2196 	 *
2197 	 * The channel 'lock' protects the entire channel and
2198 	 * should be acquired before initializing, resetting,
2199 	 * destroying or reading from a channel.
2200 	 *
2201 	 * The 'tx_lock' should be acquired prior to transmitting
2202 	 * data over the channel. The lock should also be acquired
2203 	 * prior to channel reconfiguration (in order to prevent
2204 	 * concurrent writes).
2205 	 *
2206 	 * ORDERING: When both locks are being acquired, to prevent
2207 	 * deadlocks, the channel lock should be always acquired prior
2208 	 * to the tx_lock.
2209 	 */
2210 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2211 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2212 
2213 	/* Initialize the channel */
2214 	ldcp->id = id;
2215 	ldcp->cb = NULL;
2216 	ldcp->cb_arg = NULL;
2217 	ldcp->cb_inprogress = B_FALSE;
2218 	ldcp->cb_enabled = B_FALSE;
2219 	ldcp->next = NULL;
2220 
2221 	/* Read attributes */
2222 	ldcp->mode = attr->mode;
2223 	ldcp->devclass = attr->devclass;
2224 	ldcp->devinst = attr->instance;
2225 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2226 
2227 	D1(ldcp->id,
2228 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2229 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2230 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2231 
2232 	ldcp->next_vidx = 0;
2233 	ldcp->tstate = TS_IN_RESET;
2234 	ldcp->hstate = 0;
2235 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2236 	ldcp->last_ack_rcd = 0;
2237 	ldcp->last_msg_rcd = 0;
2238 
2239 	ldcp->stream_bufferp = NULL;
2240 	ldcp->exp_dring_list = NULL;
2241 	ldcp->imp_dring_list = NULL;
2242 	ldcp->mhdl_list = NULL;
2243 
2244 	ldcp->tx_intr_state = LDC_INTR_NONE;
2245 	ldcp->rx_intr_state = LDC_INTR_NONE;
2246 
2247 	/* Initialize payload size depending on whether channel is reliable */
2248 	switch (ldcp->mode) {
2249 	case LDC_MODE_RAW:
2250 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2251 		ldcp->read_p = i_ldc_read_raw;
2252 		ldcp->write_p = i_ldc_write_raw;
2253 		break;
2254 	case LDC_MODE_UNRELIABLE:
2255 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2256 		ldcp->read_p = i_ldc_read_packet;
2257 		ldcp->write_p = i_ldc_write_packet;
2258 		break;
2259 	case LDC_MODE_RELIABLE:
2260 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2261 		ldcp->read_p = i_ldc_read_packet;
2262 		ldcp->write_p = i_ldc_write_packet;
2263 		break;
2264 	case LDC_MODE_STREAM:
2265 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2266 
2267 		ldcp->stream_remains = 0;
2268 		ldcp->stream_offset = 0;
2269 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2270 		ldcp->read_p = i_ldc_read_stream;
2271 		ldcp->write_p = i_ldc_write_stream;
2272 		break;
2273 	default:
2274 		exit_val = EINVAL;
2275 		goto cleanup_on_exit;
2276 	}
2277 
2278 	/*
2279 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2280 	 * value is smaller than default length of ldc_queue_entries,
2281 	 * qlen is set to ldc_queue_entries..
2282 	 */
2283 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2284 	ldcp->rx_q_entries =
2285 		(qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2286 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2287 
2288 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen);
2289 
2290 	/* Create a transmit queue */
2291 	ldcp->tx_q_va = (uint64_t)
2292 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2293 	if (ldcp->tx_q_va == NULL) {
2294 		cmn_err(CE_WARN,
2295 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2296 		    ldcp->id);
2297 		exit_val = ENOMEM;
2298 		goto cleanup_on_exit;
2299 	}
2300 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2301 
2302 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2303 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2304 
2305 	ldcp->tstate |= TS_TXQ_RDY;
2306 
2307 	/* Create a receive queue */
2308 	ldcp->rx_q_va = (uint64_t)
2309 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2310 	if (ldcp->rx_q_va == NULL) {
2311 		cmn_err(CE_WARN,
2312 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2313 		    ldcp->id);
2314 		exit_val = ENOMEM;
2315 		goto cleanup_on_exit;
2316 	}
2317 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2318 
2319 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2320 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2321 
2322 	ldcp->tstate |= TS_RXQ_RDY;
2323 
2324 	/* Init descriptor ring and memory handle list lock */
2325 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2326 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2327 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2328 
2329 	/* mark status as INITialized */
2330 	ldcp->status = LDC_INIT;
2331 
2332 	/* Add to channel list */
2333 	mutex_enter(&ldcssp->lock);
2334 	ldcp->next = ldcssp->chan_list;
2335 	ldcssp->chan_list = ldcp;
2336 	ldcssp->channel_count++;
2337 	mutex_exit(&ldcssp->lock);
2338 
2339 	/* set the handle */
2340 	*handle = (ldc_handle_t)ldcp;
2341 
2342 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2343 
2344 	return (0);
2345 
2346 cleanup_on_exit:
2347 
2348 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2349 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2350 
2351 	if (ldcp->tstate & TS_TXQ_RDY)
2352 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2353 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2354 
2355 	if (ldcp->tstate & TS_RXQ_RDY)
2356 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2357 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2358 
2359 	mutex_destroy(&ldcp->tx_lock);
2360 	mutex_destroy(&ldcp->lock);
2361 
2362 	if (ldcp)
2363 		kmem_free(ldcp, sizeof (ldc_chan_t));
2364 
2365 	return (exit_val);
2366 }
2367 
2368 /*
2369  * Finalizes the LDC connection. It will return EBUSY if the
2370  * channel is open. A ldc_close() has to be done prior to
2371  * a ldc_fini operation. It frees TX/RX queues, associated
2372  * with the channel
2373  */
2374 int
2375 ldc_fini(ldc_handle_t handle)
2376 {
2377 	ldc_chan_t 	*ldcp;
2378 	ldc_chan_t 	*tmp_ldcp;
2379 	uint64_t 	id;
2380 
2381 	if (handle == NULL) {
2382 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2383 		return (EINVAL);
2384 	}
2385 	ldcp = (ldc_chan_t *)handle;
2386 	id = ldcp->id;
2387 
2388 	mutex_enter(&ldcp->lock);
2389 
2390 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2391 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2392 		    ldcp->id);
2393 		mutex_exit(&ldcp->lock);
2394 		return (EBUSY);
2395 	}
2396 
2397 	/* Remove from the channel list */
2398 	mutex_enter(&ldcssp->lock);
2399 	tmp_ldcp = ldcssp->chan_list;
2400 	if (tmp_ldcp == ldcp) {
2401 		ldcssp->chan_list = ldcp->next;
2402 		ldcp->next = NULL;
2403 	} else {
2404 		while (tmp_ldcp != NULL) {
2405 			if (tmp_ldcp->next == ldcp) {
2406 				tmp_ldcp->next = ldcp->next;
2407 				ldcp->next = NULL;
2408 				break;
2409 			}
2410 			tmp_ldcp = tmp_ldcp->next;
2411 		}
2412 		if (tmp_ldcp == NULL) {
2413 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2414 			mutex_exit(&ldcssp->lock);
2415 			mutex_exit(&ldcp->lock);
2416 			return (EINVAL);
2417 		}
2418 	}
2419 
2420 	ldcssp->channel_count--;
2421 
2422 	mutex_exit(&ldcssp->lock);
2423 
2424 	/* Free the map table for this channel */
2425 	if (ldcp->mtbl) {
2426 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2427 		if (ldcp->mtbl->contigmem)
2428 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2429 		else
2430 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2431 		mutex_destroy(&ldcp->mtbl->lock);
2432 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2433 	}
2434 
2435 	/* Destroy descriptor ring and memory handle list lock */
2436 	mutex_destroy(&ldcp->exp_dlist_lock);
2437 	mutex_destroy(&ldcp->imp_dlist_lock);
2438 	mutex_destroy(&ldcp->mlist_lock);
2439 
2440 	/* Free the stream buffer for STREAM_MODE */
2441 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2442 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2443 
2444 	/* Free the RX queue */
2445 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2446 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2447 	ldcp->tstate &= ~TS_RXQ_RDY;
2448 
2449 	/* Free the TX queue */
2450 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2451 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2452 	ldcp->tstate &= ~TS_TXQ_RDY;
2453 
2454 	mutex_exit(&ldcp->lock);
2455 
2456 	/* Destroy mutex */
2457 	mutex_destroy(&ldcp->tx_lock);
2458 	mutex_destroy(&ldcp->lock);
2459 
2460 	/* free channel structure */
2461 	kmem_free(ldcp, sizeof (ldc_chan_t));
2462 
2463 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2464 
2465 	return (0);
2466 }
2467 
2468 /*
2469  * Open the LDC channel for use. It registers the TX/RX queues
2470  * with the Hypervisor. It also specifies the interrupt number
2471  * and target CPU for this channel
2472  */
2473 int
2474 ldc_open(ldc_handle_t handle)
2475 {
2476 	ldc_chan_t 	*ldcp;
2477 	int 		rv;
2478 
2479 	if (handle == NULL) {
2480 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2481 		return (EINVAL);
2482 	}
2483 
2484 	ldcp = (ldc_chan_t *)handle;
2485 
2486 	mutex_enter(&ldcp->lock);
2487 
2488 	if (ldcp->tstate < TS_INIT) {
2489 		DWARN(ldcp->id,
2490 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2491 		mutex_exit(&ldcp->lock);
2492 		return (EFAULT);
2493 	}
2494 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2495 		DWARN(ldcp->id,
2496 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2497 		mutex_exit(&ldcp->lock);
2498 		return (EFAULT);
2499 	}
2500 
2501 	/*
2502 	 * Unregister/Register the tx queue with the hypervisor
2503 	 */
2504 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2505 	if (rv) {
2506 		cmn_err(CE_WARN,
2507 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2508 		    ldcp->id);
2509 		mutex_exit(&ldcp->lock);
2510 		return (EIO);
2511 	}
2512 
2513 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2514 	if (rv) {
2515 		cmn_err(CE_WARN,
2516 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2517 		    ldcp->id);
2518 		mutex_exit(&ldcp->lock);
2519 		return (EIO);
2520 	}
2521 
2522 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2523 	    ldcp->id);
2524 
2525 	/*
2526 	 * Unregister/Register the rx queue with the hypervisor
2527 	 */
2528 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2529 	if (rv) {
2530 		cmn_err(CE_WARN,
2531 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2532 		    ldcp->id);
2533 		mutex_exit(&ldcp->lock);
2534 		return (EIO);
2535 	}
2536 
2537 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2538 	if (rv) {
2539 		cmn_err(CE_WARN,
2540 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2541 		    ldcp->id);
2542 		mutex_exit(&ldcp->lock);
2543 		return (EIO);
2544 	}
2545 
2546 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2547 	    ldcp->id);
2548 
2549 	ldcp->tstate |= TS_QCONF_RDY;
2550 
2551 	/* Register the channel with the channel nexus */
2552 	rv = i_ldc_register_channel(ldcp);
2553 	if (rv && rv != EAGAIN) {
2554 		cmn_err(CE_WARN,
2555 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2556 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2557 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2558 		mutex_exit(&ldcp->lock);
2559 		return (EIO);
2560 	}
2561 
2562 	/* mark channel in OPEN state */
2563 	ldcp->status = LDC_OPEN;
2564 
2565 	/* Read channel state */
2566 	rv = hv_ldc_tx_get_state(ldcp->id,
2567 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2568 	if (rv) {
2569 		cmn_err(CE_WARN,
2570 		    "ldc_open: (0x%lx) cannot read channel state\n",
2571 		    ldcp->id);
2572 		(void) i_ldc_unregister_channel(ldcp);
2573 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2574 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2575 		mutex_exit(&ldcp->lock);
2576 		return (EIO);
2577 	}
2578 
2579 	/*
2580 	 * set the ACKd head to current head location for reliable &
2581 	 * streaming mode
2582 	 */
2583 	ldcp->tx_ackd_head = ldcp->tx_head;
2584 
2585 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2586 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2587 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2588 		ldcp->tstate |= TS_LINK_READY;
2589 		ldcp->status = LDC_READY;
2590 	}
2591 
2592 	/*
2593 	 * if channel is being opened in RAW mode - no handshake is needed
2594 	 * switch the channel READY and UP state
2595 	 */
2596 	if (ldcp->mode == LDC_MODE_RAW) {
2597 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2598 		ldcp->status = LDC_UP;
2599 	}
2600 
2601 	mutex_exit(&ldcp->lock);
2602 
2603 	/*
2604 	 * Increment number of open channels
2605 	 */
2606 	mutex_enter(&ldcssp->lock);
2607 	ldcssp->channels_open++;
2608 	mutex_exit(&ldcssp->lock);
2609 
2610 	D1(ldcp->id,
2611 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
2612 	    "(tstate=0x%x, status=0x%x)\n",
2613 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
2614 
2615 	return (0);
2616 }
2617 
2618 /*
2619  * Close the LDC connection. It will return EBUSY if there
2620  * are memory segments or descriptor rings either bound to or
2621  * mapped over the channel
2622  */
2623 int
2624 ldc_close(ldc_handle_t handle)
2625 {
2626 	ldc_chan_t 	*ldcp;
2627 	int		rv = 0, retries = 0;
2628 	boolean_t	chk_done = B_FALSE;
2629 
2630 	if (handle == NULL) {
2631 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2632 		return (EINVAL);
2633 	}
2634 	ldcp = (ldc_chan_t *)handle;
2635 
2636 	mutex_enter(&ldcp->lock);
2637 
2638 	/* return error if channel is not open */
2639 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
2640 		DWARN(ldcp->id,
2641 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2642 		mutex_exit(&ldcp->lock);
2643 		return (EFAULT);
2644 	}
2645 
2646 	/* if any memory handles, drings, are bound or mapped cannot close */
2647 	if (ldcp->mhdl_list != NULL) {
2648 		DWARN(ldcp->id,
2649 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2650 		    ldcp->id);
2651 		mutex_exit(&ldcp->lock);
2652 		return (EBUSY);
2653 	}
2654 	if (ldcp->exp_dring_list != NULL) {
2655 		DWARN(ldcp->id,
2656 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2657 		    ldcp->id);
2658 		mutex_exit(&ldcp->lock);
2659 		return (EBUSY);
2660 	}
2661 	if (ldcp->imp_dring_list != NULL) {
2662 		DWARN(ldcp->id,
2663 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2664 		    ldcp->id);
2665 		mutex_exit(&ldcp->lock);
2666 		return (EBUSY);
2667 	}
2668 
2669 	if (ldcp->cb_inprogress) {
2670 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
2671 		    ldcp->id);
2672 		mutex_exit(&ldcp->lock);
2673 		return (EWOULDBLOCK);
2674 	}
2675 
2676 	/* Obtain Tx lock */
2677 	mutex_enter(&ldcp->tx_lock);
2678 
2679 	/*
2680 	 * Wait for pending transmits to complete i.e Tx queue to drain
2681 	 * if there are pending pkts - wait 1 ms and retry again
2682 	 */
2683 	for (;;) {
2684 
2685 		rv = hv_ldc_tx_get_state(ldcp->id,
2686 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2687 		if (rv) {
2688 			cmn_err(CE_WARN,
2689 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2690 			mutex_exit(&ldcp->tx_lock);
2691 			mutex_exit(&ldcp->lock);
2692 			return (EIO);
2693 		}
2694 
2695 		if (ldcp->tx_head == ldcp->tx_tail ||
2696 		    ldcp->link_state != LDC_CHANNEL_UP) {
2697 			break;
2698 		}
2699 
2700 		if (chk_done) {
2701 			DWARN(ldcp->id,
2702 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2703 			    ldcp->id);
2704 			break;
2705 		}
2706 
2707 		/* wait for one ms and try again */
2708 		delay(drv_usectohz(1000));
2709 		chk_done = B_TRUE;
2710 	}
2711 
2712 	/*
2713 	 * Drain the Tx and Rx queues as we are closing the
2714 	 * channel. We dont care about any pending packets.
2715 	 * We have to also drain the queue prior to clearing
2716 	 * pending interrupts, otherwise the HV will trigger
2717 	 * an interrupt the moment the interrupt state is
2718 	 * cleared.
2719 	 */
2720 	(void) i_ldc_txq_reconf(ldcp);
2721 	(void) i_ldc_rxq_drain(ldcp);
2722 
2723 	/*
2724 	 * Unregister the channel with the nexus
2725 	 */
2726 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2727 
2728 		mutex_exit(&ldcp->tx_lock);
2729 		mutex_exit(&ldcp->lock);
2730 
2731 		/* if any error other than EAGAIN return back */
2732 		if (rv != EAGAIN || retries >= ldc_max_retries) {
2733 			cmn_err(CE_WARN,
2734 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2735 			    ldcp->id, rv);
2736 			return (rv);
2737 		}
2738 
2739 		/*
2740 		 * As there could be pending interrupts we need
2741 		 * to wait and try again
2742 		 */
2743 		drv_usecwait(ldc_close_delay);
2744 		mutex_enter(&ldcp->lock);
2745 		mutex_enter(&ldcp->tx_lock);
2746 		retries++;
2747 	}
2748 
2749 	/*
2750 	 * Unregister queues
2751 	 */
2752 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2753 	if (rv) {
2754 		cmn_err(CE_WARN,
2755 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2756 		    ldcp->id);
2757 		mutex_exit(&ldcp->tx_lock);
2758 		mutex_exit(&ldcp->lock);
2759 		return (EIO);
2760 	}
2761 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2762 	if (rv) {
2763 		cmn_err(CE_WARN,
2764 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2765 		    ldcp->id);
2766 		mutex_exit(&ldcp->tx_lock);
2767 		mutex_exit(&ldcp->lock);
2768 		return (EIO);
2769 	}
2770 
2771 	ldcp->tstate &= ~TS_QCONF_RDY;
2772 
2773 	/* Reset channel state information */
2774 	i_ldc_reset_state(ldcp);
2775 
2776 	/* Mark channel as down and in initialized state */
2777 	ldcp->tx_ackd_head = 0;
2778 	ldcp->tx_head = 0;
2779 	ldcp->tstate = TS_IN_RESET|TS_INIT;
2780 	ldcp->status = LDC_INIT;
2781 
2782 	mutex_exit(&ldcp->tx_lock);
2783 	mutex_exit(&ldcp->lock);
2784 
2785 	/* Decrement number of open channels */
2786 	mutex_enter(&ldcssp->lock);
2787 	ldcssp->channels_open--;
2788 	mutex_exit(&ldcssp->lock);
2789 
2790 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2791 
2792 	return (0);
2793 }
2794 
2795 /*
2796  * Register channel callback
2797  */
2798 int
2799 ldc_reg_callback(ldc_handle_t handle,
2800     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2801 {
2802 	ldc_chan_t *ldcp;
2803 
2804 	if (handle == NULL) {
2805 		DWARN(DBG_ALL_LDCS,
2806 		    "ldc_reg_callback: invalid channel handle\n");
2807 		return (EINVAL);
2808 	}
2809 	if (((uint64_t)cb) < KERNELBASE) {
2810 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2811 		return (EINVAL);
2812 	}
2813 	ldcp = (ldc_chan_t *)handle;
2814 
2815 	mutex_enter(&ldcp->lock);
2816 
2817 	if (ldcp->cb) {
2818 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2819 		    ldcp->id);
2820 		mutex_exit(&ldcp->lock);
2821 		return (EIO);
2822 	}
2823 	if (ldcp->cb_inprogress) {
2824 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2825 		    ldcp->id);
2826 		mutex_exit(&ldcp->lock);
2827 		return (EWOULDBLOCK);
2828 	}
2829 
2830 	ldcp->cb = cb;
2831 	ldcp->cb_arg = arg;
2832 	ldcp->cb_enabled = B_TRUE;
2833 
2834 	D1(ldcp->id,
2835 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2836 	    ldcp->id);
2837 
2838 	mutex_exit(&ldcp->lock);
2839 
2840 	return (0);
2841 }
2842 
2843 /*
2844  * Unregister channel callback
2845  */
2846 int
2847 ldc_unreg_callback(ldc_handle_t handle)
2848 {
2849 	ldc_chan_t *ldcp;
2850 
2851 	if (handle == NULL) {
2852 		DWARN(DBG_ALL_LDCS,
2853 		    "ldc_unreg_callback: invalid channel handle\n");
2854 		return (EINVAL);
2855 	}
2856 	ldcp = (ldc_chan_t *)handle;
2857 
2858 	mutex_enter(&ldcp->lock);
2859 
2860 	if (ldcp->cb == NULL) {
2861 		DWARN(ldcp->id,
2862 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2863 		    ldcp->id);
2864 		mutex_exit(&ldcp->lock);
2865 		return (EIO);
2866 	}
2867 	if (ldcp->cb_inprogress) {
2868 		DWARN(ldcp->id,
2869 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2870 		    ldcp->id);
2871 		mutex_exit(&ldcp->lock);
2872 		return (EWOULDBLOCK);
2873 	}
2874 
2875 	ldcp->cb = NULL;
2876 	ldcp->cb_arg = NULL;
2877 	ldcp->cb_enabled = B_FALSE;
2878 
2879 	D1(ldcp->id,
2880 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2881 	    ldcp->id);
2882 
2883 	mutex_exit(&ldcp->lock);
2884 
2885 	return (0);
2886 }
2887 
2888 
2889 /*
2890  * Bring a channel up by initiating a handshake with the peer
2891  * This call is asynchronous. It will complete at a later point
2892  * in time when the peer responds back with an RTR.
2893  */
2894 int
2895 ldc_up(ldc_handle_t handle)
2896 {
2897 	int 		rv;
2898 	ldc_chan_t 	*ldcp;
2899 	ldc_msg_t 	*ldcmsg;
2900 	uint64_t 	tx_tail, tstate;
2901 
2902 	if (handle == NULL) {
2903 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2904 		return (EINVAL);
2905 	}
2906 	ldcp = (ldc_chan_t *)handle;
2907 
2908 	mutex_enter(&ldcp->lock);
2909 
2910 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
2911 
2912 	/* clear the reset state */
2913 	tstate = ldcp->tstate;
2914 	ldcp->tstate &= ~TS_IN_RESET;
2915 
2916 	if (ldcp->tstate == TS_UP) {
2917 		DWARN(ldcp->id,
2918 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2919 		    ldcp->id);
2920 
2921 		/* mark channel as up */
2922 		ldcp->status = LDC_UP;
2923 
2924 		/*
2925 		 * if channel was in reset state and there was
2926 		 * pending data clear interrupt state. this will
2927 		 * trigger an interrupt, causing the RX handler to
2928 		 * to invoke the client's callback
2929 		 */
2930 		if ((tstate & TS_IN_RESET) &&
2931 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
2932 			D1(ldcp->id,
2933 			    "ldc_up: (0x%llx) channel has pending data, "
2934 			    "clearing interrupt\n", ldcp->id);
2935 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2936 		}
2937 
2938 		mutex_exit(&ldcp->lock);
2939 		return (0);
2940 	}
2941 
2942 	/* if the channel is in RAW mode - mark it as UP, if READY */
2943 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2944 		ldcp->tstate = TS_UP;
2945 		mutex_exit(&ldcp->lock);
2946 		return (0);
2947 	}
2948 
2949 	/* Don't start another handshake if there is one in progress */
2950 	if (ldcp->hstate) {
2951 		D1(ldcp->id,
2952 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2953 		    ldcp->id);
2954 		mutex_exit(&ldcp->lock);
2955 		return (0);
2956 	}
2957 
2958 	mutex_enter(&ldcp->tx_lock);
2959 
2960 	/* get the current tail for the LDC msg */
2961 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2962 	if (rv) {
2963 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2964 		    ldcp->id);
2965 		mutex_exit(&ldcp->tx_lock);
2966 		mutex_exit(&ldcp->lock);
2967 		return (ECONNREFUSED);
2968 	}
2969 
2970 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
2971 	ZERO_PKT(ldcmsg);
2972 
2973 	ldcmsg->type = LDC_CTRL;
2974 	ldcmsg->stype = LDC_INFO;
2975 	ldcmsg->ctrl = LDC_VER;
2976 	ldcp->next_vidx = 0;
2977 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
2978 
2979 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
2980 
2981 	/* initiate the send by calling into HV and set the new tail */
2982 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
2983 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2984 
2985 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
2986 	if (rv) {
2987 		DWARN(ldcp->id,
2988 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
2989 		    ldcp->id, rv);
2990 		mutex_exit(&ldcp->tx_lock);
2991 		mutex_exit(&ldcp->lock);
2992 		return (rv);
2993 	}
2994 
2995 	ldcp->hstate |= TS_SENT_VER;
2996 	ldcp->tx_tail = tx_tail;
2997 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
2998 
2999 	mutex_exit(&ldcp->tx_lock);
3000 	mutex_exit(&ldcp->lock);
3001 
3002 	return (rv);
3003 }
3004 
3005 
3006 /*
3007  * Bring a channel down by resetting its state and queues
3008  */
3009 int
3010 ldc_down(ldc_handle_t handle)
3011 {
3012 	ldc_chan_t 	*ldcp;
3013 
3014 	if (handle == NULL) {
3015 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3016 		return (EINVAL);
3017 	}
3018 	ldcp = (ldc_chan_t *)handle;
3019 	mutex_enter(&ldcp->lock);
3020 	mutex_enter(&ldcp->tx_lock);
3021 	i_ldc_reset(ldcp, B_TRUE);
3022 	mutex_exit(&ldcp->tx_lock);
3023 	mutex_exit(&ldcp->lock);
3024 
3025 	return (0);
3026 }
3027 
3028 /*
3029  * Get the current channel status
3030  */
3031 int
3032 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3033 {
3034 	ldc_chan_t *ldcp;
3035 
3036 	if (handle == NULL || status == NULL) {
3037 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3038 		return (EINVAL);
3039 	}
3040 	ldcp = (ldc_chan_t *)handle;
3041 
3042 	*status = ((ldc_chan_t *)handle)->status;
3043 
3044 	D1(ldcp->id,
3045 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3046 	return (0);
3047 }
3048 
3049 
3050 /*
3051  * Set the channel's callback mode - enable/disable callbacks
3052  */
3053 int
3054 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3055 {
3056 	ldc_chan_t 	*ldcp;
3057 
3058 	if (handle == NULL) {
3059 		DWARN(DBG_ALL_LDCS,
3060 		    "ldc_set_intr_mode: invalid channel handle\n");
3061 		return (EINVAL);
3062 	}
3063 	ldcp = (ldc_chan_t *)handle;
3064 
3065 	/*
3066 	 * Record no callbacks should be invoked
3067 	 */
3068 	mutex_enter(&ldcp->lock);
3069 
3070 	switch (cmode) {
3071 	case LDC_CB_DISABLE:
3072 		if (!ldcp->cb_enabled) {
3073 			DWARN(ldcp->id,
3074 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3075 			    ldcp->id);
3076 			break;
3077 		}
3078 		ldcp->cb_enabled = B_FALSE;
3079 
3080 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3081 		    ldcp->id);
3082 		break;
3083 
3084 	case LDC_CB_ENABLE:
3085 		if (ldcp->cb_enabled) {
3086 			DWARN(ldcp->id,
3087 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3088 			    ldcp->id);
3089 			break;
3090 		}
3091 		ldcp->cb_enabled = B_TRUE;
3092 
3093 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3094 		    ldcp->id);
3095 		break;
3096 	}
3097 
3098 	mutex_exit(&ldcp->lock);
3099 
3100 	return (0);
3101 }
3102 
3103 /*
3104  * Check to see if there are packets on the incoming queue
3105  * Will return hasdata = B_FALSE if there are no packets
3106  */
3107 int
3108 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3109 {
3110 	int 		rv;
3111 	uint64_t 	rx_head, rx_tail;
3112 	ldc_chan_t 	*ldcp;
3113 
3114 	if (handle == NULL) {
3115 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3116 		return (EINVAL);
3117 	}
3118 	ldcp = (ldc_chan_t *)handle;
3119 
3120 	*hasdata = B_FALSE;
3121 
3122 	mutex_enter(&ldcp->lock);
3123 
3124 	if (ldcp->tstate != TS_UP) {
3125 		D1(ldcp->id,
3126 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3127 		mutex_exit(&ldcp->lock);
3128 		return (ECONNRESET);
3129 	}
3130 
3131 	/* Read packet(s) from the queue */
3132 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3133 	    &ldcp->link_state);
3134 	if (rv != 0) {
3135 		cmn_err(CE_WARN,
3136 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3137 		mutex_exit(&ldcp->lock);
3138 		return (EIO);
3139 	}
3140 	/* reset the channel state if the channel went down */
3141 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3142 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3143 		mutex_enter(&ldcp->tx_lock);
3144 		i_ldc_reset(ldcp, B_FALSE);
3145 		mutex_exit(&ldcp->tx_lock);
3146 		mutex_exit(&ldcp->lock);
3147 		return (ECONNRESET);
3148 	}
3149 
3150 	if ((rx_head != rx_tail) ||
3151 	    (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) {
3152 		D1(ldcp->id,
3153 		    "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n",
3154 		    ldcp->id);
3155 		*hasdata = B_TRUE;
3156 	}
3157 
3158 	mutex_exit(&ldcp->lock);
3159 
3160 	return (0);
3161 }
3162 
3163 
3164 /*
3165  * Read 'size' amount of bytes or less. If incoming buffer
3166  * is more than 'size', ENOBUFS is returned.
3167  *
3168  * On return, size contains the number of bytes read.
3169  */
3170 int
3171 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3172 {
3173 	ldc_chan_t 	*ldcp;
3174 	uint64_t 	rx_head = 0, rx_tail = 0;
3175 	int		rv = 0, exit_val;
3176 
3177 	if (handle == NULL) {
3178 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3179 		return (EINVAL);
3180 	}
3181 
3182 	ldcp = (ldc_chan_t *)handle;
3183 
3184 	/* channel lock */
3185 	mutex_enter(&ldcp->lock);
3186 
3187 	if (ldcp->tstate != TS_UP) {
3188 		DWARN(ldcp->id,
3189 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3190 		    ldcp->id);
3191 		exit_val = ECONNRESET;
3192 	} else {
3193 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3194 	}
3195 
3196 	/*
3197 	 * if queue has been drained - clear interrupt
3198 	 */
3199 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3200 	    &ldcp->link_state);
3201 	if (rv != 0) {
3202 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3203 		    ldcp->id);
3204 		mutex_enter(&ldcp->tx_lock);
3205 		i_ldc_reset(ldcp, B_TRUE);
3206 		mutex_exit(&ldcp->tx_lock);
3207 		return (ECONNRESET);
3208 	}
3209 
3210 	if (exit_val == 0) {
3211 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3212 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3213 			mutex_enter(&ldcp->tx_lock);
3214 			i_ldc_reset(ldcp, B_FALSE);
3215 			exit_val = ECONNRESET;
3216 			mutex_exit(&ldcp->tx_lock);
3217 		}
3218 		if ((rv == 0) &&
3219 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3220 		    (rx_head == rx_tail)) {
3221 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3222 		}
3223 	}
3224 
3225 	mutex_exit(&ldcp->lock);
3226 	return (exit_val);
3227 }
3228 
3229 /*
3230  * Basic raw mondo read -
3231  * no interpretation of mondo contents at all.
3232  *
3233  * Enter and exit with ldcp->lock held by caller
3234  */
3235 static int
3236 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3237 {
3238 	uint64_t 	q_size_mask;
3239 	ldc_msg_t 	*msgp;
3240 	uint8_t		*msgbufp;
3241 	int		rv = 0, space;
3242 	uint64_t 	rx_head, rx_tail;
3243 
3244 	space = *sizep;
3245 
3246 	if (space < LDC_PAYLOAD_SIZE_RAW)
3247 		return (ENOBUFS);
3248 
3249 	ASSERT(mutex_owned(&ldcp->lock));
3250 
3251 	/* compute mask for increment */
3252 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3253 
3254 	/*
3255 	 * Read packet(s) from the queue
3256 	 */
3257 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3258 	    &ldcp->link_state);
3259 	if (rv != 0) {
3260 		cmn_err(CE_WARN,
3261 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3262 		    ldcp->id);
3263 		return (EIO);
3264 	}
3265 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3266 		" rxt=0x%llx, st=0x%llx\n",
3267 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
3268 
3269 	/* reset the channel state if the channel went down */
3270 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3271 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3272 		mutex_enter(&ldcp->tx_lock);
3273 		i_ldc_reset(ldcp, B_FALSE);
3274 		mutex_exit(&ldcp->tx_lock);
3275 		return (ECONNRESET);
3276 	}
3277 
3278 	/*
3279 	 * Check for empty queue
3280 	 */
3281 	if (rx_head == rx_tail) {
3282 		*sizep = 0;
3283 		return (0);
3284 	}
3285 
3286 	/* get the message */
3287 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3288 
3289 	/* if channel is in RAW mode, copy data and return */
3290 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3291 
3292 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3293 
3294 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3295 
3296 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3297 
3298 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3299 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3300 
3301 	return (rv);
3302 }
3303 
3304 /*
3305  * Process LDC mondos to build larger packets
3306  * with either un-reliable or reliable delivery.
3307  *
3308  * Enter and exit with ldcp->lock held by caller
3309  */
3310 static int
3311 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3312 {
3313 	int		rv = 0;
3314 	uint64_t 	rx_head = 0, rx_tail = 0;
3315 	uint64_t 	curr_head = 0;
3316 	ldc_msg_t 	*msg;
3317 	caddr_t 	target;
3318 	size_t 		len = 0, bytes_read = 0;
3319 	int 		retries = 0;
3320 	uint64_t 	q_size_mask;
3321 	uint64_t	first_fragment = 0;
3322 
3323 	target = target_bufp;
3324 
3325 	ASSERT(mutex_owned(&ldcp->lock));
3326 
3327 	/* check if the buffer and size are valid */
3328 	if (target_bufp == NULL || *sizep == 0) {
3329 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3330 		    ldcp->id);
3331 		return (EINVAL);
3332 	}
3333 
3334 	/* compute mask for increment */
3335 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3336 
3337 	/*
3338 	 * Read packet(s) from the queue
3339 	 */
3340 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3341 	    &ldcp->link_state);
3342 	if (rv != 0) {
3343 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3344 		    ldcp->id);
3345 		mutex_enter(&ldcp->tx_lock);
3346 		i_ldc_reset(ldcp, B_TRUE);
3347 		mutex_exit(&ldcp->tx_lock);
3348 		return (ECONNRESET);
3349 	}
3350 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3351 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3352 
3353 	/* reset the channel state if the channel went down */
3354 	if (ldcp->link_state != LDC_CHANNEL_UP)
3355 		goto channel_is_reset;
3356 
3357 	for (;;) {
3358 
3359 		if (curr_head == rx_tail) {
3360 			rv = hv_ldc_rx_get_state(ldcp->id,
3361 			    &rx_head, &rx_tail, &ldcp->link_state);
3362 			if (rv != 0) {
3363 				cmn_err(CE_WARN,
3364 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3365 				    ldcp->id);
3366 				mutex_enter(&ldcp->tx_lock);
3367 				i_ldc_reset(ldcp, B_TRUE);
3368 				mutex_exit(&ldcp->tx_lock);
3369 				return (ECONNRESET);
3370 			}
3371 			if (ldcp->link_state != LDC_CHANNEL_UP)
3372 				goto channel_is_reset;
3373 
3374 			if (curr_head == rx_tail) {
3375 
3376 				/* If in the middle of a fragmented xfer */
3377 				if (first_fragment != 0) {
3378 
3379 					/* wait for ldc_delay usecs */
3380 					drv_usecwait(ldc_delay);
3381 
3382 					if (++retries < ldc_max_retries)
3383 						continue;
3384 
3385 					*sizep = 0;
3386 					ldcp->last_msg_rcd = first_fragment - 1;
3387 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3388 						"(0x%llx) read timeout",
3389 						ldcp->id);
3390 					return (EAGAIN);
3391 				}
3392 				*sizep = 0;
3393 				break;
3394 			}
3395 		}
3396 		retries = 0;
3397 
3398 		D2(ldcp->id,
3399 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3400 		    ldcp->id, curr_head, rx_head, rx_tail);
3401 
3402 		/* get the message */
3403 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3404 
3405 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3406 		    ldcp->rx_q_va + curr_head);
3407 
3408 		/* Check the message ID for the message received */
3409 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3410 
3411 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3412 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3413 
3414 			/* throw away data */
3415 			bytes_read = 0;
3416 
3417 			/* Reset last_msg_rcd to start of message */
3418 			if (first_fragment != 0) {
3419 				ldcp->last_msg_rcd = first_fragment - 1;
3420 				first_fragment = 0;
3421 			}
3422 			/*
3423 			 * Send a NACK -- invalid seqid
3424 			 * get the current tail for the response
3425 			 */
3426 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3427 			    (msg->ctrl & LDC_CTRL_MASK));
3428 			if (rv) {
3429 				cmn_err(CE_NOTE,
3430 				    "ldc_read: (0x%lx) err sending "
3431 				    "NACK msg\n", ldcp->id);
3432 
3433 				/* if cannot send NACK - reset channel */
3434 				mutex_enter(&ldcp->tx_lock);
3435 				i_ldc_reset(ldcp, B_FALSE);
3436 				mutex_exit(&ldcp->tx_lock);
3437 				rv = ECONNRESET;
3438 				break;
3439 			}
3440 
3441 			/* purge receive queue */
3442 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3443 
3444 			break;
3445 		}
3446 
3447 		/*
3448 		 * Process any messages of type CTRL messages
3449 		 * Future implementations should try to pass these
3450 		 * to LDC link by resetting the intr state.
3451 		 *
3452 		 * NOTE: not done as a switch() as type can be both ctrl+data
3453 		 */
3454 		if (msg->type & LDC_CTRL) {
3455 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3456 				if (rv == EAGAIN)
3457 					continue;
3458 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3459 				*sizep = 0;
3460 				bytes_read = 0;
3461 				break;
3462 			}
3463 		}
3464 
3465 		/* process data ACKs */
3466 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3467 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3468 				*sizep = 0;
3469 				bytes_read = 0;
3470 				break;
3471 			}
3472 		}
3473 
3474 		/* process data messages */
3475 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3476 
3477 			uint8_t *msgbuf = (uint8_t *)(
3478 				(ldcp->mode == LDC_MODE_RELIABLE ||
3479 				ldcp->mode == LDC_MODE_STREAM)
3480 				? msg->rdata : msg->udata);
3481 
3482 			D2(ldcp->id,
3483 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3484 
3485 			/* get the packet length */
3486 			len = (msg->env & LDC_LEN_MASK);
3487 
3488 				/*
3489 				 * FUTURE OPTIMIZATION:
3490 				 * dont need to set q head for every
3491 				 * packet we read just need to do this when
3492 				 * we are done or need to wait for more
3493 				 * mondos to make a full packet - this is
3494 				 * currently expensive.
3495 				 */
3496 
3497 			if (first_fragment == 0) {
3498 
3499 				/*
3500 				 * first packets should always have the start
3501 				 * bit set (even for a single packet). If not
3502 				 * throw away the packet
3503 				 */
3504 				if (!(msg->env & LDC_FRAG_START)) {
3505 
3506 					DWARN(DBG_ALL_LDCS,
3507 					    "ldc_read: (0x%llx) not start - "
3508 					    "frag=%x\n", ldcp->id,
3509 					    (msg->env) & LDC_FRAG_MASK);
3510 
3511 					/* toss pkt, inc head, cont reading */
3512 					bytes_read = 0;
3513 					target = target_bufp;
3514 					curr_head =
3515 						(curr_head + LDC_PACKET_SIZE)
3516 						& q_size_mask;
3517 					if (rv = i_ldc_set_rx_head(ldcp,
3518 						curr_head))
3519 						break;
3520 
3521 					continue;
3522 				}
3523 
3524 				first_fragment = msg->seqid;
3525 			} else {
3526 				/* check to see if this is a pkt w/ START bit */
3527 				if (msg->env & LDC_FRAG_START) {
3528 					DWARN(DBG_ALL_LDCS,
3529 					    "ldc_read:(0x%llx) unexpected pkt"
3530 					    " env=0x%x discarding %d bytes,"
3531 					    " lastmsg=%d, currentmsg=%d\n",
3532 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3533 					    bytes_read, ldcp->last_msg_rcd,
3534 					    msg->seqid);
3535 
3536 					/* throw data we have read so far */
3537 					bytes_read = 0;
3538 					target = target_bufp;
3539 					first_fragment = msg->seqid;
3540 
3541 					if (rv = i_ldc_set_rx_head(ldcp,
3542 						curr_head))
3543 						break;
3544 				}
3545 			}
3546 
3547 			/* copy (next) pkt into buffer */
3548 			if (len <= (*sizep - bytes_read)) {
3549 				bcopy(msgbuf, target, len);
3550 				target += len;
3551 				bytes_read += len;
3552 			} else {
3553 				/*
3554 				 * there is not enough space in the buffer to
3555 				 * read this pkt. throw message away & continue
3556 				 * reading data from queue
3557 				 */
3558 				DWARN(DBG_ALL_LDCS,
3559 				    "ldc_read: (0x%llx) buffer too small, "
3560 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3561 				    curr_head, *sizep, bytes_read+len);
3562 
3563 				first_fragment = 0;
3564 				target = target_bufp;
3565 				bytes_read = 0;
3566 
3567 				/* throw away everything received so far */
3568 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3569 					break;
3570 
3571 				/* continue reading remaining pkts */
3572 				continue;
3573 			}
3574 		}
3575 
3576 		/* set the message id */
3577 		ldcp->last_msg_rcd = msg->seqid;
3578 
3579 		/* move the head one position */
3580 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3581 
3582 		if (msg->env & LDC_FRAG_STOP) {
3583 
3584 			/*
3585 			 * All pkts that are part of this fragmented transfer
3586 			 * have been read or this was a single pkt read
3587 			 * or there was an error
3588 			 */
3589 
3590 			/* set the queue head */
3591 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3592 				bytes_read = 0;
3593 
3594 			*sizep = bytes_read;
3595 
3596 			break;
3597 		}
3598 
3599 		/* advance head if it is a DATA ACK */
3600 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3601 
3602 			/* set the queue head */
3603 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3604 				bytes_read = 0;
3605 				break;
3606 			}
3607 
3608 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3609 			    ldcp->id, curr_head);
3610 		}
3611 
3612 	} /* for (;;) */
3613 
3614 
3615 	/*
3616 	 * If useful data was read - Send msg ACK
3617 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3618 	 */
3619 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3620 		ldcp->mode == LDC_MODE_STREAM)) {
3621 
3622 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3623 		if (rv && rv != EWOULDBLOCK) {
3624 			cmn_err(CE_NOTE,
3625 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3626 
3627 			/* if cannot send ACK - reset channel */
3628 			goto channel_is_reset;
3629 		}
3630 	}
3631 
3632 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3633 
3634 	return (rv);
3635 
3636 channel_is_reset:
3637 	mutex_enter(&ldcp->tx_lock);
3638 	i_ldc_reset(ldcp, B_FALSE);
3639 	mutex_exit(&ldcp->tx_lock);
3640 	return (ECONNRESET);
3641 }
3642 
3643 /*
3644  * Use underlying reliable packet mechanism to fetch
3645  * and buffer incoming packets so we can hand them back as
3646  * a basic byte stream.
3647  *
3648  * Enter and exit with ldcp->lock held by caller
3649  */
3650 static int
3651 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3652 {
3653 	int	rv;
3654 	size_t	size;
3655 
3656 	ASSERT(mutex_owned(&ldcp->lock));
3657 
3658 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3659 		ldcp->id, *sizep);
3660 
3661 	if (ldcp->stream_remains == 0) {
3662 		size = ldcp->mtu;
3663 		rv = i_ldc_read_packet(ldcp,
3664 			(caddr_t)ldcp->stream_bufferp, &size);
3665 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3666 			ldcp->id, size);
3667 
3668 		if (rv != 0)
3669 			return (rv);
3670 
3671 		ldcp->stream_remains = size;
3672 		ldcp->stream_offset = 0;
3673 	}
3674 
3675 	size = MIN(ldcp->stream_remains, *sizep);
3676 
3677 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3678 	ldcp->stream_offset += size;
3679 	ldcp->stream_remains -= size;
3680 
3681 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3682 		ldcp->id, size);
3683 
3684 	*sizep = size;
3685 	return (0);
3686 }
3687 
3688 /*
3689  * Write specified amount of bytes to the channel
3690  * in multiple pkts of pkt_payload size. Each
3691  * packet is tagged with an unique packet ID in
3692  * the case of a reliable link.
3693  *
3694  * On return, size contains the number of bytes written.
3695  */
3696 int
3697 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3698 {
3699 	ldc_chan_t	*ldcp;
3700 	int		rv = 0;
3701 
3702 	if (handle == NULL) {
3703 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3704 		return (EINVAL);
3705 	}
3706 	ldcp = (ldc_chan_t *)handle;
3707 
3708 	/* check if writes can occur */
3709 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3710 		/*
3711 		 * Could not get the lock - channel could
3712 		 * be in the process of being unconfigured
3713 		 * or reader has encountered an error
3714 		 */
3715 		return (EAGAIN);
3716 	}
3717 
3718 	/* check if non-zero data to write */
3719 	if (buf == NULL || sizep == NULL) {
3720 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3721 		    ldcp->id);
3722 		mutex_exit(&ldcp->tx_lock);
3723 		return (EINVAL);
3724 	}
3725 
3726 	if (*sizep == 0) {
3727 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3728 		    ldcp->id);
3729 		mutex_exit(&ldcp->tx_lock);
3730 		return (0);
3731 	}
3732 
3733 	/* Check if channel is UP for data exchange */
3734 	if (ldcp->tstate != TS_UP) {
3735 		DWARN(ldcp->id,
3736 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3737 		    ldcp->id);
3738 		*sizep = 0;
3739 		rv = ECONNRESET;
3740 	} else {
3741 		rv = ldcp->write_p(ldcp, buf, sizep);
3742 	}
3743 
3744 	mutex_exit(&ldcp->tx_lock);
3745 
3746 	return (rv);
3747 }
3748 
3749 /*
3750  * Write a raw packet to the channel
3751  * On return, size contains the number of bytes written.
3752  */
3753 static int
3754 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3755 {
3756 	ldc_msg_t 	*ldcmsg;
3757 	uint64_t 	tx_head, tx_tail, new_tail;
3758 	int		rv = 0;
3759 	size_t		size;
3760 
3761 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3762 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3763 
3764 	size = *sizep;
3765 
3766 	/*
3767 	 * Check to see if the packet size is less than or
3768 	 * equal to packet size support in raw mode
3769 	 */
3770 	if (size > ldcp->pkt_payload) {
3771 		DWARN(ldcp->id,
3772 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3773 		    ldcp->id, *sizep);
3774 		*sizep = 0;
3775 		return (EMSGSIZE);
3776 	}
3777 
3778 	/* get the qptrs for the tx queue */
3779 	rv = hv_ldc_tx_get_state(ldcp->id,
3780 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3781 	if (rv != 0) {
3782 		cmn_err(CE_WARN,
3783 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3784 		*sizep = 0;
3785 		return (EIO);
3786 	}
3787 
3788 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3789 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3790 		DWARN(ldcp->id,
3791 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3792 
3793 		*sizep = 0;
3794 		if (mutex_tryenter(&ldcp->lock)) {
3795 			i_ldc_reset(ldcp, B_FALSE);
3796 			mutex_exit(&ldcp->lock);
3797 		} else {
3798 			/*
3799 			 * Release Tx lock, and then reacquire channel
3800 			 * and Tx lock in correct order
3801 			 */
3802 			mutex_exit(&ldcp->tx_lock);
3803 			mutex_enter(&ldcp->lock);
3804 			mutex_enter(&ldcp->tx_lock);
3805 			i_ldc_reset(ldcp, B_FALSE);
3806 			mutex_exit(&ldcp->lock);
3807 		}
3808 		return (ECONNRESET);
3809 	}
3810 
3811 	tx_tail = ldcp->tx_tail;
3812 	tx_head = ldcp->tx_head;
3813 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3814 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3815 
3816 	if (new_tail == tx_head) {
3817 		DWARN(DBG_ALL_LDCS,
3818 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3819 		*sizep = 0;
3820 		return (EWOULDBLOCK);
3821 	}
3822 
3823 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3824 	    ldcp->id, size);
3825 
3826 	/* Send the data now */
3827 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3828 
3829 	/* copy the data into pkt */
3830 	bcopy((uint8_t *)buf, ldcmsg, size);
3831 
3832 	/* increment tail */
3833 	tx_tail = new_tail;
3834 
3835 	/*
3836 	 * All packets have been copied into the TX queue
3837 	 * update the tail ptr in the HV
3838 	 */
3839 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3840 	if (rv) {
3841 		if (rv == EWOULDBLOCK) {
3842 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3843 			    ldcp->id);
3844 			*sizep = 0;
3845 			return (EWOULDBLOCK);
3846 		}
3847 
3848 		*sizep = 0;
3849 		if (mutex_tryenter(&ldcp->lock)) {
3850 			i_ldc_reset(ldcp, B_FALSE);
3851 			mutex_exit(&ldcp->lock);
3852 		} else {
3853 			/*
3854 			 * Release Tx lock, and then reacquire channel
3855 			 * and Tx lock in correct order
3856 			 */
3857 			mutex_exit(&ldcp->tx_lock);
3858 			mutex_enter(&ldcp->lock);
3859 			mutex_enter(&ldcp->tx_lock);
3860 			i_ldc_reset(ldcp, B_FALSE);
3861 			mutex_exit(&ldcp->lock);
3862 		}
3863 		return (ECONNRESET);
3864 	}
3865 
3866 	ldcp->tx_tail = tx_tail;
3867 	*sizep = size;
3868 
3869 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3870 
3871 	return (rv);
3872 }
3873 
3874 
3875 /*
3876  * Write specified amount of bytes to the channel
3877  * in multiple pkts of pkt_payload size. Each
3878  * packet is tagged with an unique packet ID in
3879  * the case of a reliable link.
3880  *
3881  * On return, size contains the number of bytes written.
3882  * This function needs to ensure that the write size is < MTU size
3883  */
3884 static int
3885 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3886 {
3887 	ldc_msg_t 	*ldcmsg;
3888 	uint64_t 	tx_head, tx_tail, new_tail, start;
3889 	uint64_t	txq_size_mask, numavail;
3890 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3891 	size_t 		len, bytes_written = 0, remaining;
3892 	int		rv;
3893 	uint32_t	curr_seqid;
3894 
3895 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3896 
3897 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3898 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3899 		ldcp->mode == LDC_MODE_STREAM);
3900 
3901 	/* compute mask for increment */
3902 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3903 
3904 	/* get the qptrs for the tx queue */
3905 	rv = hv_ldc_tx_get_state(ldcp->id,
3906 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3907 	if (rv != 0) {
3908 		cmn_err(CE_WARN,
3909 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3910 		*size = 0;
3911 		return (EIO);
3912 	}
3913 
3914 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3915 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3916 		DWARN(ldcp->id,
3917 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3918 		*size = 0;
3919 		if (mutex_tryenter(&ldcp->lock)) {
3920 			i_ldc_reset(ldcp, B_FALSE);
3921 			mutex_exit(&ldcp->lock);
3922 		} else {
3923 			/*
3924 			 * Release Tx lock, and then reacquire channel
3925 			 * and Tx lock in correct order
3926 			 */
3927 			mutex_exit(&ldcp->tx_lock);
3928 			mutex_enter(&ldcp->lock);
3929 			mutex_enter(&ldcp->tx_lock);
3930 			i_ldc_reset(ldcp, B_FALSE);
3931 			mutex_exit(&ldcp->lock);
3932 		}
3933 		return (ECONNRESET);
3934 	}
3935 
3936 	tx_tail = ldcp->tx_tail;
3937 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3938 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3939 
3940 	/*
3941 	 * Link mode determines whether we use HV Tx head or the
3942 	 * private protocol head (corresponding to last ACKd pkt) for
3943 	 * determining how much we can write
3944 	 */
3945 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3946 		ldcp->mode == LDC_MODE_STREAM)
3947 		? ldcp->tx_ackd_head : ldcp->tx_head;
3948 	if (new_tail == tx_head) {
3949 		DWARN(DBG_ALL_LDCS,
3950 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3951 		*size = 0;
3952 		return (EWOULDBLOCK);
3953 	}
3954 
3955 	/*
3956 	 * Make sure that the LDC Tx queue has enough space
3957 	 */
3958 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
3959 		+ ldcp->tx_q_entries - 1;
3960 	numavail %= ldcp->tx_q_entries;
3961 
3962 	if (*size > (numavail * ldcp->pkt_payload)) {
3963 		DWARN(DBG_ALL_LDCS,
3964 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
3965 		return (EWOULDBLOCK);
3966 	}
3967 
3968 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3969 	    ldcp->id, *size);
3970 
3971 	/* Send the data now */
3972 	bytes_written = 0;
3973 	curr_seqid = ldcp->last_msg_snt;
3974 	start = tx_tail;
3975 
3976 	while (*size > bytes_written) {
3977 
3978 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3979 
3980 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
3981 			ldcp->mode == LDC_MODE_STREAM)
3982 			? ldcmsg->rdata : ldcmsg->udata);
3983 
3984 		ldcmsg->type = LDC_DATA;
3985 		ldcmsg->stype = LDC_INFO;
3986 		ldcmsg->ctrl = 0;
3987 
3988 		remaining = *size - bytes_written;
3989 		len = min(ldcp->pkt_payload, remaining);
3990 		ldcmsg->env = (uint8_t)len;
3991 
3992 		curr_seqid++;
3993 		ldcmsg->seqid = curr_seqid;
3994 
3995 		/* copy the data into pkt */
3996 		bcopy(source, msgbuf, len);
3997 
3998 		source += len;
3999 		bytes_written += len;
4000 
4001 		/* increment tail */
4002 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4003 
4004 		ASSERT(tx_tail != tx_head);
4005 	}
4006 
4007 	/* Set the start and stop bits */
4008 	ldcmsg->env |= LDC_FRAG_STOP;
4009 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4010 	ldcmsg->env |= LDC_FRAG_START;
4011 
4012 	/*
4013 	 * All packets have been copied into the TX queue
4014 	 * update the tail ptr in the HV
4015 	 */
4016 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4017 	if (rv == 0) {
4018 		ldcp->tx_tail = tx_tail;
4019 		ldcp->last_msg_snt = curr_seqid;
4020 		*size = bytes_written;
4021 	} else {
4022 		int rv2;
4023 
4024 		if (rv != EWOULDBLOCK) {
4025 			*size = 0;
4026 			if (mutex_tryenter(&ldcp->lock)) {
4027 				i_ldc_reset(ldcp, B_FALSE);
4028 				mutex_exit(&ldcp->lock);
4029 			} else {
4030 				/*
4031 				 * Release Tx lock, and then reacquire channel
4032 				 * and Tx lock in correct order
4033 				 */
4034 				mutex_exit(&ldcp->tx_lock);
4035 				mutex_enter(&ldcp->lock);
4036 				mutex_enter(&ldcp->tx_lock);
4037 				i_ldc_reset(ldcp, B_FALSE);
4038 				mutex_exit(&ldcp->lock);
4039 			}
4040 			return (ECONNRESET);
4041 		}
4042 
4043 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4044 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4045 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4046 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4047 
4048 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4049 		    &tx_head, &tx_tail, &ldcp->link_state);
4050 
4051 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4052 			"(head 0x%x, tail 0x%x state 0x%x)\n",
4053 			rv2, tx_head, tx_tail, ldcp->link_state);
4054 
4055 		*size = 0;
4056 	}
4057 
4058 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4059 
4060 	return (rv);
4061 }
4062 
4063 /*
4064  * Write specified amount of bytes to the channel
4065  * in multiple pkts of pkt_payload size. Each
4066  * packet is tagged with an unique packet ID in
4067  * the case of a reliable link.
4068  *
4069  * On return, size contains the number of bytes written.
4070  * This function needs to ensure that the write size is < MTU size
4071  */
4072 static int
4073 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4074 {
4075 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4076 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
4077 
4078 	/* Truncate packet to max of MTU size */
4079 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4080 	return (i_ldc_write_packet(ldcp, buf, sizep));
4081 }
4082 
4083 
4084 /*
4085  * Interfaces for channel nexus to register/unregister with LDC module
4086  * The nexus will register functions to be used to register individual
4087  * channels with the nexus and enable interrupts for the channels
4088  */
4089 int
4090 ldc_register(ldc_cnex_t *cinfo)
4091 {
4092 	ldc_chan_t	*ldcp;
4093 
4094 	if (cinfo == NULL || cinfo->dip == NULL ||
4095 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4096 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4097 	    cinfo->clr_intr == NULL) {
4098 
4099 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4100 		return (EINVAL);
4101 	}
4102 
4103 	mutex_enter(&ldcssp->lock);
4104 
4105 	/* nexus registration */
4106 	ldcssp->cinfo.dip = cinfo->dip;
4107 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4108 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4109 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4110 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4111 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4112 
4113 	/* register any channels that might have been previously initialized */
4114 	ldcp = ldcssp->chan_list;
4115 	while (ldcp) {
4116 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4117 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4118 			(void) i_ldc_register_channel(ldcp);
4119 
4120 		ldcp = ldcp->next;
4121 	}
4122 
4123 	mutex_exit(&ldcssp->lock);
4124 
4125 	return (0);
4126 }
4127 
4128 int
4129 ldc_unregister(ldc_cnex_t *cinfo)
4130 {
4131 	if (cinfo == NULL || cinfo->dip == NULL) {
4132 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4133 		return (EINVAL);
4134 	}
4135 
4136 	mutex_enter(&ldcssp->lock);
4137 
4138 	if (cinfo->dip != ldcssp->cinfo.dip) {
4139 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4140 		mutex_exit(&ldcssp->lock);
4141 		return (EINVAL);
4142 	}
4143 
4144 	/* nexus unregister */
4145 	ldcssp->cinfo.dip = NULL;
4146 	ldcssp->cinfo.reg_chan = NULL;
4147 	ldcssp->cinfo.unreg_chan = NULL;
4148 	ldcssp->cinfo.add_intr = NULL;
4149 	ldcssp->cinfo.rem_intr = NULL;
4150 	ldcssp->cinfo.clr_intr = NULL;
4151 
4152 	mutex_exit(&ldcssp->lock);
4153 
4154 	return (0);
4155 }
4156 
4157 
4158 /* ------------------------------------------------------------------------- */
4159 
4160 /*
4161  * Allocate a memory handle for the channel and link it into the list
4162  * Also choose which memory table to use if this is the first handle
4163  * being assigned to this channel
4164  */
4165 int
4166 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
4167 {
4168 	ldc_chan_t 	*ldcp;
4169 	ldc_mhdl_t	*mhdl;
4170 
4171 	if (handle == NULL) {
4172 		DWARN(DBG_ALL_LDCS,
4173 		    "ldc_mem_alloc_handle: invalid channel handle\n");
4174 		return (EINVAL);
4175 	}
4176 	ldcp = (ldc_chan_t *)handle;
4177 
4178 	mutex_enter(&ldcp->lock);
4179 
4180 	/* check to see if channel is initalized */
4181 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4182 		DWARN(ldcp->id,
4183 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
4184 		    ldcp->id);
4185 		mutex_exit(&ldcp->lock);
4186 		return (EINVAL);
4187 	}
4188 
4189 	/* allocate handle for channel */
4190 	mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP);
4191 
4192 	/* initialize the lock */
4193 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
4194 
4195 	mhdl->myshadow = B_FALSE;
4196 	mhdl->memseg = NULL;
4197 	mhdl->ldcp = ldcp;
4198 	mhdl->status = LDC_UNBOUND;
4199 
4200 	/* insert memory handle (@ head) into list */
4201 	if (ldcp->mhdl_list == NULL) {
4202 		ldcp->mhdl_list = mhdl;
4203 		mhdl->next = NULL;
4204 	} else {
4205 		/* insert @ head */
4206 		mhdl->next = ldcp->mhdl_list;
4207 		ldcp->mhdl_list = mhdl;
4208 	}
4209 
4210 	/* return the handle */
4211 	*mhandle = (ldc_mem_handle_t)mhdl;
4212 
4213 	mutex_exit(&ldcp->lock);
4214 
4215 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4216 	    ldcp->id, mhdl);
4217 
4218 	return (0);
4219 }
4220 
4221 /*
4222  * Free memory handle for the channel and unlink it from the list
4223  */
4224 int
4225 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4226 {
4227 	ldc_mhdl_t 	*mhdl, *phdl;
4228 	ldc_chan_t 	*ldcp;
4229 
4230 	if (mhandle == NULL) {
4231 		DWARN(DBG_ALL_LDCS,
4232 		    "ldc_mem_free_handle: invalid memory handle\n");
4233 		return (EINVAL);
4234 	}
4235 	mhdl = (ldc_mhdl_t *)mhandle;
4236 
4237 	mutex_enter(&mhdl->lock);
4238 
4239 	ldcp = mhdl->ldcp;
4240 
4241 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4242 		DWARN(ldcp->id,
4243 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4244 		    mhdl);
4245 		mutex_exit(&mhdl->lock);
4246 		return (EINVAL);
4247 	}
4248 	mutex_exit(&mhdl->lock);
4249 
4250 	mutex_enter(&ldcp->mlist_lock);
4251 
4252 	phdl = ldcp->mhdl_list;
4253 
4254 	/* first handle */
4255 	if (phdl == mhdl) {
4256 		ldcp->mhdl_list = mhdl->next;
4257 		mutex_destroy(&mhdl->lock);
4258 		kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4259 
4260 		D1(ldcp->id,
4261 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4262 		    ldcp->id, mhdl);
4263 	} else {
4264 		/* walk the list - unlink and free */
4265 		while (phdl != NULL) {
4266 			if (phdl->next == mhdl) {
4267 				phdl->next = mhdl->next;
4268 				mutex_destroy(&mhdl->lock);
4269 				kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4270 				D1(ldcp->id,
4271 				    "ldc_mem_free_handle: (0x%llx) freed "
4272 				    "handle 0x%llx\n", ldcp->id, mhdl);
4273 				break;
4274 			}
4275 			phdl = phdl->next;
4276 		}
4277 	}
4278 
4279 	if (phdl == NULL) {
4280 		DWARN(ldcp->id,
4281 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4282 		mutex_exit(&ldcp->mlist_lock);
4283 		return (EINVAL);
4284 	}
4285 
4286 	mutex_exit(&ldcp->mlist_lock);
4287 
4288 	return (0);
4289 }
4290 
4291 /*
4292  * Bind a memory handle to a virtual address.
4293  * The virtual address is converted to the corresponding real addresses.
4294  * Returns pointer to the first ldc_mem_cookie and the total number
4295  * of cookies for this virtual address. Other cookies can be obtained
4296  * using the ldc_mem_nextcookie() call. If the pages are stored in
4297  * consecutive locations in the table, a single cookie corresponding to
4298  * the first location is returned. The cookie size spans all the entries.
4299  *
4300  * If the VA corresponds to a page that is already being exported, reuse
4301  * the page and do not export it again. Bump the page's use count.
4302  */
4303 int
4304 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4305     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4306 {
4307 	ldc_mhdl_t	*mhdl;
4308 	ldc_chan_t 	*ldcp;
4309 	ldc_mtbl_t	*mtbl;
4310 	ldc_memseg_t	*memseg;
4311 	ldc_mte_t	tmp_mte;
4312 	uint64_t	index, prev_index = 0;
4313 	int64_t		cookie_idx;
4314 	uintptr_t	raddr, ra_aligned;
4315 	uint64_t	psize, poffset, v_offset;
4316 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4317 	pgcnt_t		npages;
4318 	caddr_t		v_align, addr;
4319 	int 		i, rv;
4320 
4321 	if (mhandle == NULL) {
4322 		DWARN(DBG_ALL_LDCS,
4323 		    "ldc_mem_bind_handle: invalid memory handle\n");
4324 		return (EINVAL);
4325 	}
4326 	mhdl = (ldc_mhdl_t *)mhandle;
4327 	ldcp = mhdl->ldcp;
4328 
4329 	/* clear count */
4330 	*ccount = 0;
4331 
4332 	mutex_enter(&mhdl->lock);
4333 
4334 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4335 		DWARN(ldcp->id,
4336 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4337 		    mhandle);
4338 		mutex_exit(&mhdl->lock);
4339 		return (EINVAL);
4340 	}
4341 
4342 	/* Force address and size to be 8-byte aligned */
4343 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4344 		DWARN(ldcp->id,
4345 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4346 		mutex_exit(&mhdl->lock);
4347 		return (EINVAL);
4348 	}
4349 
4350 	/*
4351 	 * If this channel is binding a memory handle for the
4352 	 * first time allocate it a memory map table and initialize it
4353 	 */
4354 	if ((mtbl = ldcp->mtbl) == NULL) {
4355 
4356 		mutex_enter(&ldcp->lock);
4357 
4358 		/* Allocate and initialize the map table structure */
4359 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
4360 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
4361 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
4362 		mtbl->next_entry = NULL;
4363 		mtbl->contigmem = B_TRUE;
4364 
4365 		/* Allocate the table itself */
4366 		mtbl->table = (ldc_mte_slot_t *)
4367 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
4368 		if (mtbl->table == NULL) {
4369 
4370 			/* allocate a page of memory using kmem_alloc */
4371 			mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP);
4372 			mtbl->size = MMU_PAGESIZE;
4373 			mtbl->contigmem = B_FALSE;
4374 			mtbl->num_entries = mtbl->num_avail =
4375 				mtbl->size / sizeof (ldc_mte_slot_t);
4376 			DWARN(ldcp->id,
4377 			    "ldc_mem_bind_handle: (0x%llx) reduced tbl size "
4378 			    "to %lx entries\n", ldcp->id, mtbl->num_entries);
4379 		}
4380 
4381 		/* zero out the memory */
4382 		bzero(mtbl->table, mtbl->size);
4383 
4384 		/* initialize the lock */
4385 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
4386 
4387 		/* register table for this channel */
4388 		rv = hv_ldc_set_map_table(ldcp->id,
4389 		    va_to_pa(mtbl->table), mtbl->num_entries);
4390 		if (rv != 0) {
4391 			cmn_err(CE_WARN,
4392 			    "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
4393 			    ldcp->id, rv);
4394 			if (mtbl->contigmem)
4395 				contig_mem_free(mtbl->table, mtbl->size);
4396 			else
4397 				kmem_free(mtbl->table, mtbl->size);
4398 			mutex_destroy(&mtbl->lock);
4399 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4400 			mutex_exit(&ldcp->lock);
4401 			mutex_exit(&mhdl->lock);
4402 			return (EIO);
4403 		}
4404 
4405 		ldcp->mtbl = mtbl;
4406 		mutex_exit(&ldcp->lock);
4407 
4408 		D1(ldcp->id,
4409 		    "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
4410 		    ldcp->id, ldcp->mtbl->table);
4411 	}
4412 
4413 	/* FUTURE: get the page size, pgsz code, and shift */
4414 	pg_size = MMU_PAGESIZE;
4415 	pg_size_code = page_szc(pg_size);
4416 	pg_shift = page_get_shift(pg_size_code);
4417 	pg_mask = ~(pg_size - 1);
4418 
4419 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4420 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4421 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4422 
4423 	/* aligned VA and its offset */
4424 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4425 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4426 
4427 	npages = (len+v_offset)/pg_size;
4428 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4429 
4430 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4431 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4432 	    ldcp->id, vaddr, v_align, v_offset, npages);
4433 
4434 	/* lock the memory table - exclusive access to channel */
4435 	mutex_enter(&mtbl->lock);
4436 
4437 	if (npages > mtbl->num_avail) {
4438 		D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4439 		    ldcp->id);
4440 		mutex_exit(&mtbl->lock);
4441 		mutex_exit(&mhdl->lock);
4442 		return (ENOMEM);
4443 	}
4444 
4445 	/* Allocate a memseg structure */
4446 	memseg = mhdl->memseg =
4447 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
4448 
4449 	/* Allocate memory to store all pages and cookies */
4450 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4451 	memseg->cookies =
4452 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4453 
4454 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4455 	    ldcp->id, npages);
4456 
4457 	addr = v_align;
4458 
4459 	/*
4460 	 * Check if direct shared memory map is enabled, if not change
4461 	 * the mapping type to include SHADOW_MAP.
4462 	 */
4463 	if (ldc_shmem_enabled == 0)
4464 		mtype = LDC_SHADOW_MAP;
4465 
4466 	/*
4467 	 * Table slots are used in a round-robin manner. The algorithm permits
4468 	 * inserting duplicate entries. Slots allocated earlier will typically
4469 	 * get freed before we get back to reusing the slot.Inserting duplicate
4470 	 * entries should be OK as we only lookup entries using the cookie addr
4471 	 * i.e. tbl index, during export, unexport and copy operation.
4472 	 *
4473 	 * One implementation what was tried was to search for a duplicate
4474 	 * page entry first and reuse it. The search overhead is very high and
4475 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4476 	 * So it does make sense to avoid searching for duplicates.
4477 	 *
4478 	 * But during the process of searching for a free slot, if we find a
4479 	 * duplicate entry we will go ahead and use it, and bump its use count.
4480 	 */
4481 
4482 	/* index to start searching from */
4483 	index = mtbl->next_entry;
4484 	cookie_idx = -1;
4485 
4486 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4487 
4488 	if (mtype & LDC_DIRECT_MAP) {
4489 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4490 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4491 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4492 	}
4493 
4494 	if (mtype & LDC_SHADOW_MAP) {
4495 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4496 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4497 	}
4498 
4499 	if (mtype & LDC_IO_MAP) {
4500 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4501 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4502 	}
4503 
4504 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4505 
4506 	tmp_mte.mte_pgszc = pg_size_code;
4507 
4508 	/* initialize each mem table entry */
4509 	for (i = 0; i < npages; i++) {
4510 
4511 		/* check if slot is available in the table */
4512 		while (mtbl->table[index].entry.ll != 0) {
4513 
4514 			index = (index + 1) % mtbl->num_entries;
4515 
4516 			if (index == mtbl->next_entry) {
4517 				/* we have looped around */
4518 				DWARN(DBG_ALL_LDCS,
4519 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4520 				    "entry\n", ldcp->id);
4521 				*ccount = 0;
4522 
4523 				/* NOTE: free memory, remove previous entries */
4524 				/* this shouldnt happen as num_avail was ok */
4525 
4526 				mutex_exit(&mtbl->lock);
4527 				mutex_exit(&mhdl->lock);
4528 				return (ENOMEM);
4529 			}
4530 		}
4531 
4532 		/* get the real address */
4533 		raddr = va_to_pa((void *)addr);
4534 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4535 
4536 		/* build the mte */
4537 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4538 
4539 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4540 
4541 		/* update entry in table */
4542 		mtbl->table[index].entry = tmp_mte;
4543 
4544 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4545 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4546 
4547 		/* calculate the size and offset for this export range */
4548 		if (i == 0) {
4549 			/* first page */
4550 			psize = min((pg_size - v_offset), len);
4551 			poffset = v_offset;
4552 
4553 		} else if (i == (npages - 1)) {
4554 			/* last page */
4555 			psize =	(((uintptr_t)(vaddr + len)) &
4556 				    ((uint64_t)(pg_size-1)));
4557 			if (psize == 0)
4558 				psize = pg_size;
4559 			poffset = 0;
4560 
4561 		} else {
4562 			/* middle pages */
4563 			psize = pg_size;
4564 			poffset = 0;
4565 		}
4566 
4567 		/* store entry for this page */
4568 		memseg->pages[i].index = index;
4569 		memseg->pages[i].raddr = raddr;
4570 		memseg->pages[i].offset = poffset;
4571 		memseg->pages[i].size = psize;
4572 		memseg->pages[i].mte = &(mtbl->table[index]);
4573 
4574 		/* create the cookie */
4575 		if (i == 0 || (index != prev_index + 1)) {
4576 			cookie_idx++;
4577 			memseg->cookies[cookie_idx].addr =
4578 				IDX2COOKIE(index, pg_size_code, pg_shift);
4579 			memseg->cookies[cookie_idx].addr |= poffset;
4580 			memseg->cookies[cookie_idx].size = psize;
4581 
4582 		} else {
4583 			memseg->cookies[cookie_idx].size += psize;
4584 		}
4585 
4586 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4587 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4588 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4589 		    ldcp->id, addr, index, raddr, psize, poffset);
4590 
4591 		/* decrement number of available entries */
4592 		mtbl->num_avail--;
4593 
4594 		/* increment va by page size */
4595 		addr += pg_size;
4596 
4597 		/* increment index */
4598 		prev_index = index;
4599 		index = (index + 1) % mtbl->num_entries;
4600 
4601 		/* save the next slot */
4602 		mtbl->next_entry = index;
4603 	}
4604 
4605 	mutex_exit(&mtbl->lock);
4606 
4607 	/* memory handle = bound */
4608 	mhdl->mtype = mtype;
4609 	mhdl->perm = perm;
4610 	mhdl->status = LDC_BOUND;
4611 
4612 	/* update memseg_t */
4613 	memseg->vaddr = vaddr;
4614 	memseg->raddr = memseg->pages[0].raddr;
4615 	memseg->size = len;
4616 	memseg->npages = npages;
4617 	memseg->ncookies = cookie_idx + 1;
4618 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4619 
4620 	/* return count and first cookie */
4621 	*ccount = memseg->ncookies;
4622 	cookie->addr = memseg->cookies[0].addr;
4623 	cookie->size = memseg->cookies[0].size;
4624 
4625 	D1(ldcp->id,
4626 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4627 	    "pgs=0x%llx cookies=0x%llx\n",
4628 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4629 
4630 	mutex_exit(&mhdl->lock);
4631 	return (0);
4632 }
4633 
4634 /*
4635  * Return the next cookie associated with the specified memory handle
4636  */
4637 int
4638 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4639 {
4640 	ldc_mhdl_t	*mhdl;
4641 	ldc_chan_t 	*ldcp;
4642 	ldc_memseg_t	*memseg;
4643 
4644 	if (mhandle == NULL) {
4645 		DWARN(DBG_ALL_LDCS,
4646 		    "ldc_mem_nextcookie: invalid memory handle\n");
4647 		return (EINVAL);
4648 	}
4649 	mhdl = (ldc_mhdl_t *)mhandle;
4650 
4651 	mutex_enter(&mhdl->lock);
4652 
4653 	ldcp = mhdl->ldcp;
4654 	memseg = mhdl->memseg;
4655 
4656 	if (cookie == 0) {
4657 		DWARN(ldcp->id,
4658 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4659 		    ldcp->id);
4660 		mutex_exit(&mhdl->lock);
4661 		return (EINVAL);
4662 	}
4663 
4664 	if (memseg->next_cookie != 0) {
4665 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4666 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4667 		memseg->next_cookie++;
4668 		if (memseg->next_cookie == memseg->ncookies)
4669 			memseg->next_cookie = 0;
4670 
4671 	} else {
4672 		DWARN(ldcp->id,
4673 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4674 		cookie->addr = 0;
4675 		cookie->size = 0;
4676 		mutex_exit(&mhdl->lock);
4677 		return (EINVAL);
4678 	}
4679 
4680 	D1(ldcp->id,
4681 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4682 	    ldcp->id, cookie->addr, cookie->size);
4683 
4684 	mutex_exit(&mhdl->lock);
4685 	return (0);
4686 }
4687 
4688 /*
4689  * Unbind the virtual memory region associated with the specified
4690  * memory handle. Allassociated cookies are freed and the corresponding
4691  * RA space is no longer exported.
4692  */
4693 int
4694 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4695 {
4696 	ldc_mhdl_t	*mhdl;
4697 	ldc_chan_t 	*ldcp;
4698 	ldc_mtbl_t	*mtbl;
4699 	ldc_memseg_t	*memseg;
4700 	uint64_t	cookie_addr;
4701 	uint64_t	pg_shift, pg_size_code;
4702 	int		i, rv;
4703 
4704 	if (mhandle == NULL) {
4705 		DWARN(DBG_ALL_LDCS,
4706 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4707 		return (EINVAL);
4708 	}
4709 	mhdl = (ldc_mhdl_t *)mhandle;
4710 
4711 	mutex_enter(&mhdl->lock);
4712 
4713 	if (mhdl->status == LDC_UNBOUND) {
4714 		DWARN(DBG_ALL_LDCS,
4715 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4716 		    mhandle);
4717 		mutex_exit(&mhdl->lock);
4718 		return (EINVAL);
4719 	}
4720 
4721 	ldcp = mhdl->ldcp;
4722 	mtbl = ldcp->mtbl;
4723 
4724 	memseg = mhdl->memseg;
4725 
4726 	/* lock the memory table - exclusive access to channel */
4727 	mutex_enter(&mtbl->lock);
4728 
4729 	/* undo the pages exported */
4730 	for (i = 0; i < memseg->npages; i++) {
4731 
4732 		/* check for mapped pages, revocation cookie != 0 */
4733 		if (memseg->pages[i].mte->cookie) {
4734 
4735 			pg_size_code = page_szc(memseg->pages[i].size);
4736 			pg_shift = page_get_shift(memseg->pages[i].size);
4737 			cookie_addr = IDX2COOKIE(memseg->pages[i].index,
4738 			    pg_size_code, pg_shift);
4739 
4740 			D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke "
4741 			    "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id,
4742 			    cookie_addr, memseg->pages[i].mte->cookie);
4743 			rv = hv_ldc_revoke(ldcp->id, cookie_addr,
4744 			    memseg->pages[i].mte->cookie);
4745 			if (rv) {
4746 				DWARN(ldcp->id,
4747 				    "ldc_mem_unbind_handle: (0x%llx) cannot "
4748 				    "revoke mapping, cookie %llx\n", ldcp->id,
4749 				    cookie_addr);
4750 			}
4751 		}
4752 
4753 		/* clear the entry from the table */
4754 		memseg->pages[i].mte->entry.ll = 0;
4755 		mtbl->num_avail++;
4756 	}
4757 	mutex_exit(&mtbl->lock);
4758 
4759 	/* free the allocated memseg and page structures */
4760 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4761 	kmem_free(memseg->cookies,
4762 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4763 	kmem_cache_free(ldcssp->memseg_cache, memseg);
4764 
4765 	/* uninitialize the memory handle */
4766 	mhdl->memseg = NULL;
4767 	mhdl->status = LDC_UNBOUND;
4768 
4769 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4770 	    ldcp->id, mhdl);
4771 
4772 	mutex_exit(&mhdl->lock);
4773 	return (0);
4774 }
4775 
4776 /*
4777  * Get information about the dring. The base address of the descriptor
4778  * ring along with the type and permission are returned back.
4779  */
4780 int
4781 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4782 {
4783 	ldc_mhdl_t	*mhdl;
4784 
4785 	if (mhandle == NULL) {
4786 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4787 		return (EINVAL);
4788 	}
4789 	mhdl = (ldc_mhdl_t *)mhandle;
4790 
4791 	if (minfo == NULL) {
4792 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4793 		return (EINVAL);
4794 	}
4795 
4796 	mutex_enter(&mhdl->lock);
4797 
4798 	minfo->status = mhdl->status;
4799 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4800 		minfo->vaddr = mhdl->memseg->vaddr;
4801 		minfo->raddr = mhdl->memseg->raddr;
4802 		minfo->mtype = mhdl->mtype;
4803 		minfo->perm = mhdl->perm;
4804 	}
4805 	mutex_exit(&mhdl->lock);
4806 
4807 	return (0);
4808 }
4809 
4810 /*
4811  * Copy data either from or to the client specified virtual address
4812  * space to or from the exported memory associated with the cookies.
4813  * The direction argument determines whether the data is read from or
4814  * written to exported memory.
4815  */
4816 int
4817 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4818     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4819 {
4820 	ldc_chan_t 	*ldcp;
4821 	uint64_t	local_voff, local_valign;
4822 	uint64_t	cookie_addr, cookie_size;
4823 	uint64_t	pg_shift, pg_size, pg_size_code;
4824 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4825 	uint64_t	local_ra, local_poff, local_psize;
4826 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4827 	pgcnt_t		npages;
4828 	size_t		len = *size;
4829 	int 		i, rv = 0;
4830 
4831 	uint64_t	chid;
4832 
4833 	if (handle == NULL) {
4834 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4835 		return (EINVAL);
4836 	}
4837 	ldcp = (ldc_chan_t *)handle;
4838 	chid = ldcp->id;
4839 
4840 	/* check to see if channel is UP */
4841 	if (ldcp->tstate != TS_UP) {
4842 		DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4843 		    chid);
4844 		return (ECONNRESET);
4845 	}
4846 
4847 	/* Force address and size to be 8-byte aligned */
4848 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4849 		DWARN(chid,
4850 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4851 		return (EINVAL);
4852 	}
4853 
4854 	/* Find the size of the exported memory */
4855 	export_size = 0;
4856 	for (i = 0; i < ccount; i++)
4857 		export_size += cookies[i].size;
4858 
4859 	/* check to see if offset is valid */
4860 	if (off > export_size) {
4861 		DWARN(chid,
4862 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4863 		    chid);
4864 		return (EINVAL);
4865 	}
4866 
4867 	/*
4868 	 * Check to see if the export size is smaller than the size we
4869 	 * are requesting to copy - if so flag an error
4870 	 */
4871 	if ((export_size - off) < *size) {
4872 		DWARN(chid,
4873 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4874 		    chid);
4875 		return (EINVAL);
4876 	}
4877 
4878 	total_bal = min(export_size, *size);
4879 
4880 	/* FUTURE: get the page size, pgsz code, and shift */
4881 	pg_size = MMU_PAGESIZE;
4882 	pg_size_code = page_szc(pg_size);
4883 	pg_shift = page_get_shift(pg_size_code);
4884 
4885 	D1(chid, "ldc_mem_copy: copying data "
4886 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4887 	    chid, vaddr, pg_size, pg_size_code, pg_shift);
4888 
4889 	/* aligned VA and its offset */
4890 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4891 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4892 
4893 	npages = (len+local_voff)/pg_size;
4894 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4895 
4896 	D1(chid,
4897 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4898 	    chid, vaddr, local_valign, local_voff, npages);
4899 
4900 	local_ra = va_to_pa((void *)local_valign);
4901 	local_poff = local_voff;
4902 	local_psize = min(len, (pg_size - local_voff));
4903 
4904 	len -= local_psize;
4905 
4906 	/*
4907 	 * find the first cookie in the list of cookies
4908 	 * if the offset passed in is not zero
4909 	 */
4910 	for (idx = 0; idx < ccount; idx++) {
4911 		cookie_size = cookies[idx].size;
4912 		if (off < cookie_size)
4913 			break;
4914 		off -= cookie_size;
4915 	}
4916 
4917 	cookie_addr = cookies[idx].addr + off;
4918 	cookie_size = cookies[idx].size - off;
4919 
4920 	export_caddr = cookie_addr & ~(pg_size - 1);
4921 	export_poff = cookie_addr & (pg_size - 1);
4922 	export_psize = min(cookie_size, (pg_size - export_poff));
4923 
4924 	for (;;) {
4925 
4926 		copy_size = min(export_psize, local_psize);
4927 
4928 		D1(chid,
4929 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4930 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4931 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4932 		    " total_bal=0x%llx\n",
4933 		    chid, direction, export_caddr, local_ra, export_poff,
4934 		    local_poff, export_psize, local_psize, copy_size,
4935 		    total_bal);
4936 
4937 		rv = hv_ldc_copy(chid, direction,
4938 		    (export_caddr + export_poff), (local_ra + local_poff),
4939 		    copy_size, &copied_len);
4940 
4941 		if (rv != 0) {
4942 			int 		error = EIO;
4943 			uint64_t	rx_hd, rx_tl;
4944 
4945 			DWARN(chid,
4946 			    "ldc_mem_copy: (0x%llx) err %d during copy\n",
4947 			    (unsigned long long)chid, rv);
4948 			DWARN(chid,
4949 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, "
4950 			    "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
4951 			    " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
4952 			    " copied_len=0x%lx, total_bal=0x%lx\n",
4953 			    chid, direction, export_caddr, local_ra,
4954 			    export_poff, local_poff, export_psize, local_psize,
4955 			    copy_size, copied_len, total_bal);
4956 
4957 			*size = *size - total_bal;
4958 
4959 			/*
4960 			 * check if reason for copy error was due to
4961 			 * a channel reset. we need to grab the lock
4962 			 * just in case we have to do a reset.
4963 			 */
4964 			mutex_enter(&ldcp->lock);
4965 			mutex_enter(&ldcp->tx_lock);
4966 
4967 			rv = hv_ldc_rx_get_state(ldcp->id,
4968 			    &rx_hd, &rx_tl, &(ldcp->link_state));
4969 			if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4970 			    ldcp->link_state == LDC_CHANNEL_RESET) {
4971 				i_ldc_reset(ldcp, B_FALSE);
4972 				error = ECONNRESET;
4973 			}
4974 
4975 			mutex_exit(&ldcp->tx_lock);
4976 			mutex_exit(&ldcp->lock);
4977 
4978 			return (error);
4979 		}
4980 
4981 		ASSERT(copied_len <= copy_size);
4982 
4983 		D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len);
4984 		export_poff += copied_len;
4985 		local_poff += copied_len;
4986 		export_psize -= copied_len;
4987 		local_psize -= copied_len;
4988 		cookie_size -= copied_len;
4989 
4990 		total_bal -= copied_len;
4991 
4992 		if (copy_size != copied_len)
4993 			continue;
4994 
4995 		if (export_psize == 0 && total_bal != 0) {
4996 
4997 			if (cookie_size == 0) {
4998 				idx++;
4999 				cookie_addr = cookies[idx].addr;
5000 				cookie_size = cookies[idx].size;
5001 
5002 				export_caddr = cookie_addr & ~(pg_size - 1);
5003 				export_poff = cookie_addr & (pg_size - 1);
5004 				export_psize =
5005 					min(cookie_size, (pg_size-export_poff));
5006 			} else {
5007 				export_caddr += pg_size;
5008 				export_poff = 0;
5009 				export_psize = min(cookie_size, pg_size);
5010 			}
5011 		}
5012 
5013 		if (local_psize == 0 && total_bal != 0) {
5014 			local_valign += pg_size;
5015 			local_ra = va_to_pa((void *)local_valign);
5016 			local_poff = 0;
5017 			local_psize = min(pg_size, len);
5018 			len -= local_psize;
5019 		}
5020 
5021 		/* check if we are all done */
5022 		if (total_bal == 0)
5023 			break;
5024 	}
5025 
5026 
5027 	D1(chid,
5028 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
5029 	    chid, *size);
5030 
5031 	return (0);
5032 }
5033 
5034 /*
5035  * Copy data either from or to the client specified virtual address
5036  * space to or from HV physical memory.
5037  *
5038  * The direction argument determines whether the data is read from or
5039  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
5040  * to the ldc_mem_copy interface
5041  */
5042 int
5043 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size,
5044     caddr_t paddr, uint8_t direction)
5045 {
5046 	ldc_chan_t 	*ldcp;
5047 	uint64_t	local_voff, local_valign;
5048 	uint64_t	pg_shift, pg_size, pg_size_code;
5049 	uint64_t 	target_pa, target_poff, target_psize, target_size;
5050 	uint64_t	local_ra, local_poff, local_psize;
5051 	uint64_t	copy_size, copied_len = 0;
5052 	pgcnt_t		npages;
5053 	size_t		len = *size;
5054 	int 		rv = 0;
5055 
5056 	if (handle == NULL) {
5057 		DWARN(DBG_ALL_LDCS,
5058 		    "ldc_mem_rdwr_cookie: invalid channel handle\n");
5059 		return (EINVAL);
5060 	}
5061 	ldcp = (ldc_chan_t *)handle;
5062 
5063 	mutex_enter(&ldcp->lock);
5064 
5065 	/* check to see if channel is UP */
5066 	if (ldcp->tstate != TS_UP) {
5067 		DWARN(ldcp->id,
5068 		    "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n",
5069 		    ldcp->id);
5070 		mutex_exit(&ldcp->lock);
5071 		return (ECONNRESET);
5072 	}
5073 
5074 	/* Force address and size to be 8-byte aligned */
5075 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
5076 		DWARN(ldcp->id,
5077 		    "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n");
5078 		mutex_exit(&ldcp->lock);
5079 		return (EINVAL);
5080 	}
5081 
5082 	target_size = *size;
5083 
5084 	/* FUTURE: get the page size, pgsz code, and shift */
5085 	pg_size = MMU_PAGESIZE;
5086 	pg_size_code = page_szc(pg_size);
5087 	pg_shift = page_get_shift(pg_size_code);
5088 
5089 	D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data "
5090 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
5091 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
5092 
5093 	/* aligned VA and its offset */
5094 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
5095 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
5096 
5097 	npages = (len + local_voff) / pg_size;
5098 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
5099 
5100 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, "
5101 	    "val=0x%llx,off=0x%x,pgs=0x%x\n",
5102 	    ldcp->id, vaddr, local_valign, local_voff, npages);
5103 
5104 	local_ra = va_to_pa((void *)local_valign);
5105 	local_poff = local_voff;
5106 	local_psize = min(len, (pg_size - local_voff));
5107 
5108 	len -= local_psize;
5109 
5110 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
5111 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
5112 	target_psize = pg_size - target_poff;
5113 
5114 	for (;;) {
5115 
5116 		copy_size = min(target_psize, local_psize);
5117 
5118 		D1(ldcp->id,
5119 		    "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
5120 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
5121 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5122 		    " total_bal=0x%llx\n",
5123 		    ldcp->id, direction, target_pa, local_ra, target_poff,
5124 		    local_poff, target_psize, local_psize, copy_size,
5125 		    target_size);
5126 
5127 		rv = hv_ldc_copy(ldcp->id, direction,
5128 		    (target_pa + target_poff), (local_ra + local_poff),
5129 		    copy_size, &copied_len);
5130 
5131 		if (rv != 0) {
5132 			DWARN(DBG_ALL_LDCS,
5133 			    "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n",
5134 			    ldcp->id, rv);
5135 			DWARN(DBG_ALL_LDCS,
5136 			    "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, "
5137 			    "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, "
5138 			    "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, "
5139 			    "copy_sz=0x%llx, total_bal=0x%llx\n",
5140 			    ldcp->id, direction, target_pa, local_ra,
5141 			    target_poff, local_poff, target_psize, local_psize,
5142 			    copy_size, target_size);
5143 
5144 			*size = *size - target_size;
5145 			mutex_exit(&ldcp->lock);
5146 			return (i_ldc_h2v_error(rv));
5147 		}
5148 
5149 		D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n",
5150 		    copied_len);
5151 		target_poff += copied_len;
5152 		local_poff += copied_len;
5153 		target_psize -= copied_len;
5154 		local_psize -= copied_len;
5155 
5156 		target_size -= copied_len;
5157 
5158 		if (copy_size != copied_len)
5159 			continue;
5160 
5161 		if (target_psize == 0 && target_size != 0) {
5162 			target_pa += pg_size;
5163 			target_poff = 0;
5164 			target_psize = min(pg_size, target_size);
5165 		}
5166 
5167 		if (local_psize == 0 && target_size != 0) {
5168 			local_valign += pg_size;
5169 			local_ra = va_to_pa((void *)local_valign);
5170 			local_poff = 0;
5171 			local_psize = min(pg_size, len);
5172 			len -= local_psize;
5173 		}
5174 
5175 		/* check if we are all done */
5176 		if (target_size == 0)
5177 			break;
5178 	}
5179 
5180 	mutex_exit(&ldcp->lock);
5181 
5182 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n",
5183 	    ldcp->id, *size);
5184 
5185 	return (0);
5186 }
5187 
5188 /*
5189  * Map an exported memory segment into the local address space. If the
5190  * memory range was exported for direct map access, a HV call is made
5191  * to allocate a RA range. If the map is done via a shadow copy, local
5192  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
5193  * the mapping is a direct map then the RA is returned in 'raddr'.
5194  */
5195 int
5196 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
5197     uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr)
5198 {
5199 	int		i, j, idx, rv, retries;
5200 	ldc_chan_t 	*ldcp;
5201 	ldc_mhdl_t	*mhdl;
5202 	ldc_memseg_t	*memseg;
5203 	caddr_t		tmpaddr;
5204 	uint64_t	map_perm = perm;
5205 	uint64_t	pg_size, pg_shift, pg_size_code, pg_mask;
5206 	uint64_t	exp_size = 0, base_off, map_size, npages;
5207 	uint64_t	cookie_addr, cookie_off, cookie_size;
5208 	tte_t		ldc_tte;
5209 
5210 	if (mhandle == NULL) {
5211 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
5212 		return (EINVAL);
5213 	}
5214 	mhdl = (ldc_mhdl_t *)mhandle;
5215 
5216 	mutex_enter(&mhdl->lock);
5217 
5218 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
5219 	    mhdl->memseg != NULL) {
5220 		DWARN(DBG_ALL_LDCS,
5221 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
5222 		mutex_exit(&mhdl->lock);
5223 		return (EINVAL);
5224 	}
5225 
5226 	ldcp = mhdl->ldcp;
5227 
5228 	mutex_enter(&ldcp->lock);
5229 
5230 	if (ldcp->tstate != TS_UP) {
5231 		DWARN(ldcp->id,
5232 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
5233 		    ldcp->id);
5234 		mutex_exit(&ldcp->lock);
5235 		mutex_exit(&mhdl->lock);
5236 		return (ECONNRESET);
5237 	}
5238 
5239 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5240 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
5241 		mutex_exit(&ldcp->lock);
5242 		mutex_exit(&mhdl->lock);
5243 		return (EINVAL);
5244 	}
5245 
5246 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
5247 	    ldcp->id, cookie->addr, cookie->size);
5248 
5249 	/* FUTURE: get the page size, pgsz code, and shift */
5250 	pg_size = MMU_PAGESIZE;
5251 	pg_size_code = page_szc(pg_size);
5252 	pg_shift = page_get_shift(pg_size_code);
5253 	pg_mask = ~(pg_size - 1);
5254 
5255 	/* calculate the number of pages in the exported cookie */
5256 	base_off = cookie[0].addr & (pg_size - 1);
5257 	for (idx = 0; idx < ccount; idx++)
5258 		exp_size += cookie[idx].size;
5259 	map_size = P2ROUNDUP((exp_size + base_off), pg_size);
5260 	npages = (map_size >> pg_shift);
5261 
5262 	/* Allocate memseg structure */
5263 	memseg = mhdl->memseg =
5264 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
5265 
5266 	/* Allocate memory to store all pages and cookies */
5267 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
5268 	memseg->cookies =
5269 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
5270 
5271 	D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx,"
5272 	    "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages);
5273 
5274 	/*
5275 	 * Check if direct map over shared memory is enabled, if not change
5276 	 * the mapping type to SHADOW_MAP.
5277 	 */
5278 	if (ldc_shmem_enabled == 0)
5279 		mtype = LDC_SHADOW_MAP;
5280 
5281 	/*
5282 	 * Check to see if the client is requesting direct or shadow map
5283 	 * If direct map is requested, try to map remote memory first,
5284 	 * and if that fails, revert to shadow map
5285 	 */
5286 	if (mtype == LDC_DIRECT_MAP) {
5287 
5288 		/* Allocate kernel virtual space for mapping */
5289 		memseg->vaddr = vmem_xalloc(heap_arena, map_size,
5290 		    pg_size, 0, 0, NULL, NULL, VM_NOSLEEP);
5291 		if (memseg->vaddr == NULL) {
5292 			cmn_err(CE_WARN,
5293 			    "ldc_mem_map: (0x%lx) memory map failed\n",
5294 			    ldcp->id);
5295 			kmem_free(memseg->cookies,
5296 			    (sizeof (ldc_mem_cookie_t) * ccount));
5297 			kmem_free(memseg->pages,
5298 			    (sizeof (ldc_page_t) * npages));
5299 			kmem_cache_free(ldcssp->memseg_cache, memseg);
5300 
5301 			mutex_exit(&ldcp->lock);
5302 			mutex_exit(&mhdl->lock);
5303 			return (ENOMEM);
5304 		}
5305 
5306 		/* Unload previous mapping */
5307 		hat_unload(kas.a_hat, memseg->vaddr, map_size,
5308 		    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5309 
5310 		/* for each cookie passed in - map into address space */
5311 		idx = 0;
5312 		cookie_size = 0;
5313 		tmpaddr = memseg->vaddr;
5314 
5315 		for (i = 0; i < npages; i++) {
5316 
5317 			if (cookie_size == 0) {
5318 				ASSERT(idx < ccount);
5319 				cookie_addr = cookie[idx].addr & pg_mask;
5320 				cookie_off = cookie[idx].addr & (pg_size - 1);
5321 				cookie_size =
5322 				    P2ROUNDUP((cookie_off + cookie[idx].size),
5323 					pg_size);
5324 				idx++;
5325 			}
5326 
5327 			D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping "
5328 			    "cookie 0x%llx, bal=0x%llx\n", ldcp->id,
5329 			    cookie_addr, cookie_size);
5330 
5331 			/* map the cookie into address space */
5332 			for (retries = 0; retries < ldc_max_retries;
5333 			    retries++) {
5334 
5335 				rv = hv_ldc_mapin(ldcp->id, cookie_addr,
5336 				    &memseg->pages[i].raddr, &map_perm);
5337 				if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY)
5338 					break;
5339 
5340 				drv_usecwait(ldc_delay);
5341 			}
5342 
5343 			if (rv || memseg->pages[i].raddr == 0) {
5344 				DWARN(ldcp->id,
5345 				    "ldc_mem_map: (0x%llx) hv mapin err %d\n",
5346 				    ldcp->id, rv);
5347 
5348 				/* remove previous mapins */
5349 				hat_unload(kas.a_hat, memseg->vaddr, map_size,
5350 				    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5351 				for (j = 0; j < i; j++) {
5352 					rv = hv_ldc_unmap(
5353 							memseg->pages[j].raddr);
5354 					if (rv) {
5355 						DWARN(ldcp->id,
5356 						    "ldc_mem_map: (0x%llx) "
5357 						    "cannot unmap ra=0x%llx\n",
5358 					    ldcp->id,
5359 						    memseg->pages[j].raddr);
5360 					}
5361 				}
5362 
5363 				/* free kernel virtual space */
5364 				vmem_free(heap_arena, (void *)memseg->vaddr,
5365 				    map_size);
5366 
5367 				/* direct map failed - revert to shadow map */
5368 				mtype = LDC_SHADOW_MAP;
5369 				break;
5370 
5371 			} else {
5372 
5373 				D1(ldcp->id,
5374 				    "ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
5375 				    "0x%llx, cookie=0x%llx, perm=0x%llx\n",
5376 				    ldcp->id, tmpaddr, memseg->pages[i].raddr,
5377 				    cookie_addr, perm);
5378 
5379 				/*
5380 				 * NOTE: Calling hat_devload directly, causes it
5381 				 * to look for page_t using the pfn. Since this
5382 				 * addr is greater than the memlist, it treates
5383 				 * it as non-memory
5384 				 */
5385 				sfmmu_memtte(&ldc_tte,
5386 				    (pfn_t)(memseg->pages[i].raddr >> pg_shift),
5387 				    PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K);
5388 
5389 				D1(ldcp->id,
5390 				    "ldc_mem_map: (0x%llx) ra 0x%llx -> "
5391 				    "tte 0x%llx\n", ldcp->id,
5392 				    memseg->pages[i].raddr, ldc_tte);
5393 
5394 				sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr,
5395 				    NULL, HAT_LOAD_LOCK);
5396 
5397 				cookie_size -= pg_size;
5398 				cookie_addr += pg_size;
5399 				tmpaddr += pg_size;
5400 			}
5401 		}
5402 	}
5403 
5404 	if (mtype == LDC_SHADOW_MAP) {
5405 		if (*vaddr == NULL) {
5406 			memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP);
5407 			mhdl->myshadow = B_TRUE;
5408 
5409 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
5410 			    "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr);
5411 		} else {
5412 			/*
5413 			 * Use client supplied memory for memseg->vaddr
5414 			 * WARNING: assuming that client mem is >= exp_size
5415 			 */
5416 			memseg->vaddr = *vaddr;
5417 		}
5418 
5419 		/* Save all page and cookie information */
5420 		for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) {
5421 			memseg->pages[i].raddr = va_to_pa(tmpaddr);
5422 			memseg->pages[i].size = pg_size;
5423 			tmpaddr += pg_size;
5424 		}
5425 
5426 	}
5427 
5428 	/* save all cookies */
5429 	bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t));
5430 
5431 	/* update memseg_t */
5432 	memseg->raddr = memseg->pages[0].raddr;
5433 	memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size;
5434 	memseg->npages = npages;
5435 	memseg->ncookies = ccount;
5436 	memseg->next_cookie = 0;
5437 
5438 	/* memory handle = mapped */
5439 	mhdl->mtype = mtype;
5440 	mhdl->perm = perm;
5441 	mhdl->status = LDC_MAPPED;
5442 
5443 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5444 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5445 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5446 	    memseg->npages, memseg->ncookies);
5447 
5448 	if (mtype == LDC_SHADOW_MAP)
5449 		base_off = 0;
5450 	if (raddr)
5451 		*raddr = (caddr_t)(memseg->raddr | base_off);
5452 	if (vaddr)
5453 		*vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off);
5454 
5455 	mutex_exit(&ldcp->lock);
5456 	mutex_exit(&mhdl->lock);
5457 	return (0);
5458 }
5459 
5460 /*
5461  * Unmap a memory segment. Free shadow memory (if any).
5462  */
5463 int
5464 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5465 {
5466 	int		i, rv;
5467 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5468 	ldc_chan_t 	*ldcp;
5469 	ldc_memseg_t	*memseg;
5470 
5471 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5472 		DWARN(DBG_ALL_LDCS,
5473 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5474 		    mhandle);
5475 		return (EINVAL);
5476 	}
5477 
5478 	mutex_enter(&mhdl->lock);
5479 
5480 	ldcp = mhdl->ldcp;
5481 	memseg = mhdl->memseg;
5482 
5483 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5484 	    ldcp->id, mhdl);
5485 
5486 	/* if we allocated shadow memory - free it */
5487 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5488 		kmem_free(memseg->vaddr, memseg->size);
5489 	} else if (mhdl->mtype == LDC_DIRECT_MAP) {
5490 
5491 		/* unmap in the case of DIRECT_MAP */
5492 		hat_unload(kas.a_hat, memseg->vaddr, memseg->size,
5493 		    HAT_UNLOAD_UNLOCK);
5494 
5495 		for (i = 0; i < memseg->npages; i++) {
5496 			rv = hv_ldc_unmap(memseg->pages[i].raddr);
5497 			if (rv) {
5498 				cmn_err(CE_WARN,
5499 				    "ldc_mem_map: (0x%lx) hv unmap err %d\n",
5500 				    ldcp->id, rv);
5501 			}
5502 		}
5503 
5504 		vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size);
5505 	}
5506 
5507 	/* free the allocated memseg and page structures */
5508 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5509 	kmem_free(memseg->cookies,
5510 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5511 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5512 
5513 	/* uninitialize the memory handle */
5514 	mhdl->memseg = NULL;
5515 	mhdl->status = LDC_UNBOUND;
5516 
5517 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5518 	    ldcp->id, mhdl);
5519 
5520 	mutex_exit(&mhdl->lock);
5521 	return (0);
5522 }
5523 
5524 /*
5525  * Internal entry point for LDC mapped memory entry consistency
5526  * semantics. Acquire copies the contents of the remote memory
5527  * into the local shadow copy. The release operation copies the local
5528  * contents into the remote memory. The offset and size specify the
5529  * bounds for the memory range being synchronized.
5530  */
5531 static int
5532 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5533     uint64_t offset, size_t size)
5534 {
5535 	int 		err;
5536 	ldc_mhdl_t	*mhdl;
5537 	ldc_chan_t	*ldcp;
5538 	ldc_memseg_t	*memseg;
5539 	caddr_t		local_vaddr;
5540 	size_t		copy_size;
5541 
5542 	if (mhandle == NULL) {
5543 		DWARN(DBG_ALL_LDCS,
5544 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5545 		return (EINVAL);
5546 	}
5547 	mhdl = (ldc_mhdl_t *)mhandle;
5548 
5549 	mutex_enter(&mhdl->lock);
5550 
5551 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5552 		DWARN(DBG_ALL_LDCS,
5553 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5554 		mutex_exit(&mhdl->lock);
5555 		return (EINVAL);
5556 	}
5557 
5558 	/* do nothing for direct map */
5559 	if (mhdl->mtype == LDC_DIRECT_MAP) {
5560 		mutex_exit(&mhdl->lock);
5561 		return (0);
5562 	}
5563 
5564 	/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
5565 	if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) ||
5566 	    (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) {
5567 		mutex_exit(&mhdl->lock);
5568 		return (0);
5569 	}
5570 
5571 	if (offset >= mhdl->memseg->size ||
5572 	    (offset + size) > mhdl->memseg->size) {
5573 		DWARN(DBG_ALL_LDCS,
5574 		    "i_ldc_mem_acquire_release: memory out of range\n");
5575 		mutex_exit(&mhdl->lock);
5576 		return (EINVAL);
5577 	}
5578 
5579 	/* get the channel handle and memory segment */
5580 	ldcp = mhdl->ldcp;
5581 	memseg = mhdl->memseg;
5582 
5583 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5584 
5585 		local_vaddr = memseg->vaddr + offset;
5586 		copy_size = size;
5587 
5588 		/* copy to/from remote from/to local memory */
5589 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5590 		    &copy_size, memseg->cookies, memseg->ncookies,
5591 		    direction);
5592 		if (err || copy_size != size) {
5593 			DWARN(ldcp->id,
5594 			    "i_ldc_mem_acquire_release: copy failed\n");
5595 			mutex_exit(&mhdl->lock);
5596 			return (err);
5597 		}
5598 	}
5599 
5600 	mutex_exit(&mhdl->lock);
5601 
5602 	return (0);
5603 }
5604 
5605 /*
5606  * Ensure that the contents in the remote memory seg are consistent
5607  * with the contents if of local segment
5608  */
5609 int
5610 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5611 {
5612 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5613 }
5614 
5615 
5616 /*
5617  * Ensure that the contents in the local memory seg are consistent
5618  * with the contents if of remote segment
5619  */
5620 int
5621 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5622 {
5623 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5624 }
5625 
5626 /*
5627  * Allocate a descriptor ring. The size of each each descriptor
5628  * must be 8-byte aligned and the entire ring should be a multiple
5629  * of MMU_PAGESIZE.
5630  */
5631 int
5632 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5633 {
5634 	ldc_dring_t *dringp;
5635 	size_t size = (dsize * len);
5636 
5637 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5638 	    len, dsize);
5639 
5640 	if (dhandle == NULL) {
5641 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5642 		return (EINVAL);
5643 	}
5644 
5645 	if (len == 0) {
5646 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5647 		return (EINVAL);
5648 	}
5649 
5650 	/* descriptor size should be 8-byte aligned */
5651 	if (dsize == 0 || (dsize & 0x7)) {
5652 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5653 		return (EINVAL);
5654 	}
5655 
5656 	*dhandle = 0;
5657 
5658 	/* Allocate a desc ring structure */
5659 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5660 
5661 	/* Initialize dring */
5662 	dringp->length = len;
5663 	dringp->dsize = dsize;
5664 
5665 	/* round off to multiple of pagesize */
5666 	dringp->size = (size & MMU_PAGEMASK);
5667 	if (size & MMU_PAGEOFFSET)
5668 		dringp->size += MMU_PAGESIZE;
5669 
5670 	dringp->status = LDC_UNBOUND;
5671 
5672 	/* allocate descriptor ring memory */
5673 	dringp->base = kmem_zalloc(dringp->size, KM_SLEEP);
5674 
5675 	/* initialize the desc ring lock */
5676 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5677 
5678 	/* Add descriptor ring to the head of global list */
5679 	mutex_enter(&ldcssp->lock);
5680 	dringp->next = ldcssp->dring_list;
5681 	ldcssp->dring_list = dringp;
5682 	mutex_exit(&ldcssp->lock);
5683 
5684 	*dhandle = (ldc_dring_handle_t)dringp;
5685 
5686 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5687 
5688 	return (0);
5689 }
5690 
5691 
5692 /*
5693  * Destroy a descriptor ring.
5694  */
5695 int
5696 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5697 {
5698 	ldc_dring_t *dringp;
5699 	ldc_dring_t *tmp_dringp;
5700 
5701 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5702 
5703 	if (dhandle == NULL) {
5704 		DWARN(DBG_ALL_LDCS,
5705 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5706 		return (EINVAL);
5707 	}
5708 	dringp = (ldc_dring_t *)dhandle;
5709 
5710 	if (dringp->status == LDC_BOUND) {
5711 		DWARN(DBG_ALL_LDCS,
5712 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5713 		return (EACCES);
5714 	}
5715 
5716 	mutex_enter(&dringp->lock);
5717 	mutex_enter(&ldcssp->lock);
5718 
5719 	/* remove from linked list - if not bound */
5720 	tmp_dringp = ldcssp->dring_list;
5721 	if (tmp_dringp == dringp) {
5722 		ldcssp->dring_list = dringp->next;
5723 		dringp->next = NULL;
5724 
5725 	} else {
5726 		while (tmp_dringp != NULL) {
5727 			if (tmp_dringp->next == dringp) {
5728 				tmp_dringp->next = dringp->next;
5729 				dringp->next = NULL;
5730 				break;
5731 			}
5732 			tmp_dringp = tmp_dringp->next;
5733 		}
5734 		if (tmp_dringp == NULL) {
5735 			DWARN(DBG_ALL_LDCS,
5736 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5737 			mutex_exit(&ldcssp->lock);
5738 			mutex_exit(&dringp->lock);
5739 			return (EINVAL);
5740 		}
5741 	}
5742 
5743 	mutex_exit(&ldcssp->lock);
5744 
5745 	/* free the descriptor ring */
5746 	kmem_free(dringp->base, dringp->size);
5747 
5748 	mutex_exit(&dringp->lock);
5749 
5750 	/* destroy dring lock */
5751 	mutex_destroy(&dringp->lock);
5752 
5753 	/* free desc ring object */
5754 	kmem_free(dringp, sizeof (ldc_dring_t));
5755 
5756 	return (0);
5757 }
5758 
5759 /*
5760  * Bind a previously allocated dring to a channel. The channel should
5761  * be OPEN in order to bind the ring to the channel. Returns back a
5762  * descriptor ring cookie. The descriptor ring is exported for remote
5763  * access by the client at the other end of the channel. An entry for
5764  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5765  */
5766 int
5767 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5768     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5769 {
5770 	int		err;
5771 	ldc_chan_t 	*ldcp;
5772 	ldc_dring_t	*dringp;
5773 	ldc_mem_handle_t mhandle;
5774 
5775 	/* check to see if channel is initalized */
5776 	if (handle == NULL) {
5777 		DWARN(DBG_ALL_LDCS,
5778 		    "ldc_mem_dring_bind: invalid channel handle\n");
5779 		return (EINVAL);
5780 	}
5781 	ldcp = (ldc_chan_t *)handle;
5782 
5783 	if (dhandle == NULL) {
5784 		DWARN(DBG_ALL_LDCS,
5785 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5786 		return (EINVAL);
5787 	}
5788 	dringp = (ldc_dring_t *)dhandle;
5789 
5790 	if (cookie == NULL) {
5791 		DWARN(ldcp->id,
5792 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5793 		return (EINVAL);
5794 	}
5795 
5796 	mutex_enter(&dringp->lock);
5797 
5798 	if (dringp->status == LDC_BOUND) {
5799 		DWARN(DBG_ALL_LDCS,
5800 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5801 		    ldcp->id);
5802 		mutex_exit(&dringp->lock);
5803 		return (EINVAL);
5804 	}
5805 
5806 	if ((perm & LDC_MEM_RW) == 0) {
5807 		DWARN(DBG_ALL_LDCS,
5808 		    "ldc_mem_dring_bind: invalid permissions\n");
5809 		mutex_exit(&dringp->lock);
5810 		return (EINVAL);
5811 	}
5812 
5813 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5814 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5815 		mutex_exit(&dringp->lock);
5816 		return (EINVAL);
5817 	}
5818 
5819 	dringp->ldcp = ldcp;
5820 
5821 	/* create an memory handle */
5822 	err = ldc_mem_alloc_handle(handle, &mhandle);
5823 	if (err || mhandle == NULL) {
5824 		DWARN(DBG_ALL_LDCS,
5825 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5826 		    ldcp->id);
5827 		mutex_exit(&dringp->lock);
5828 		return (err);
5829 	}
5830 	dringp->mhdl = mhandle;
5831 
5832 	/* bind the descriptor ring to channel */
5833 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5834 	    mtype, perm, cookie, ccount);
5835 	if (err) {
5836 		DWARN(ldcp->id,
5837 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5838 		    ldcp->id);
5839 		mutex_exit(&dringp->lock);
5840 		return (err);
5841 	}
5842 
5843 	/*
5844 	 * For now return error if we get more than one cookie
5845 	 * FUTURE: Return multiple cookies ..
5846 	 */
5847 	if (*ccount > 1) {
5848 		(void) ldc_mem_unbind_handle(mhandle);
5849 		(void) ldc_mem_free_handle(mhandle);
5850 
5851 		dringp->ldcp = NULL;
5852 		dringp->mhdl = NULL;
5853 		*ccount = 0;
5854 
5855 		mutex_exit(&dringp->lock);
5856 		return (EAGAIN);
5857 	}
5858 
5859 	/* Add descriptor ring to channel's exported dring list */
5860 	mutex_enter(&ldcp->exp_dlist_lock);
5861 	dringp->ch_next = ldcp->exp_dring_list;
5862 	ldcp->exp_dring_list = dringp;
5863 	mutex_exit(&ldcp->exp_dlist_lock);
5864 
5865 	dringp->status = LDC_BOUND;
5866 
5867 	mutex_exit(&dringp->lock);
5868 
5869 	return (0);
5870 }
5871 
5872 /*
5873  * Return the next cookie associated with the specified dring handle
5874  */
5875 int
5876 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5877 {
5878 	int		rv = 0;
5879 	ldc_dring_t 	*dringp;
5880 	ldc_chan_t	*ldcp;
5881 
5882 	if (dhandle == NULL) {
5883 		DWARN(DBG_ALL_LDCS,
5884 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5885 		return (EINVAL);
5886 	}
5887 	dringp = (ldc_dring_t *)dhandle;
5888 	mutex_enter(&dringp->lock);
5889 
5890 	if (dringp->status != LDC_BOUND) {
5891 		DWARN(DBG_ALL_LDCS,
5892 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5893 		    "is not bound\n", dringp);
5894 		mutex_exit(&dringp->lock);
5895 		return (EINVAL);
5896 	}
5897 
5898 	ldcp = dringp->ldcp;
5899 
5900 	if (cookie == NULL) {
5901 		DWARN(ldcp->id,
5902 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5903 		    ldcp->id);
5904 		mutex_exit(&dringp->lock);
5905 		return (EINVAL);
5906 	}
5907 
5908 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5909 	mutex_exit(&dringp->lock);
5910 
5911 	return (rv);
5912 }
5913 /*
5914  * Unbind a previously bound dring from a channel.
5915  */
5916 int
5917 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5918 {
5919 	ldc_dring_t 	*dringp;
5920 	ldc_dring_t	*tmp_dringp;
5921 	ldc_chan_t	*ldcp;
5922 
5923 	if (dhandle == NULL) {
5924 		DWARN(DBG_ALL_LDCS,
5925 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5926 		return (EINVAL);
5927 	}
5928 	dringp = (ldc_dring_t *)dhandle;
5929 
5930 	mutex_enter(&dringp->lock);
5931 
5932 	if (dringp->status == LDC_UNBOUND) {
5933 		DWARN(DBG_ALL_LDCS,
5934 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5935 		    dringp);
5936 		mutex_exit(&dringp->lock);
5937 		return (EINVAL);
5938 	}
5939 	ldcp = dringp->ldcp;
5940 
5941 	mutex_enter(&ldcp->exp_dlist_lock);
5942 
5943 	tmp_dringp = ldcp->exp_dring_list;
5944 	if (tmp_dringp == dringp) {
5945 		ldcp->exp_dring_list = dringp->ch_next;
5946 		dringp->ch_next = NULL;
5947 
5948 	} else {
5949 		while (tmp_dringp != NULL) {
5950 			if (tmp_dringp->ch_next == dringp) {
5951 				tmp_dringp->ch_next = dringp->ch_next;
5952 				dringp->ch_next = NULL;
5953 				break;
5954 			}
5955 			tmp_dringp = tmp_dringp->ch_next;
5956 		}
5957 		if (tmp_dringp == NULL) {
5958 			DWARN(DBG_ALL_LDCS,
5959 			    "ldc_mem_dring_unbind: invalid descriptor\n");
5960 			mutex_exit(&ldcp->exp_dlist_lock);
5961 			mutex_exit(&dringp->lock);
5962 			return (EINVAL);
5963 		}
5964 	}
5965 
5966 	mutex_exit(&ldcp->exp_dlist_lock);
5967 
5968 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
5969 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5970 
5971 	dringp->ldcp = NULL;
5972 	dringp->mhdl = NULL;
5973 	dringp->status = LDC_UNBOUND;
5974 
5975 	mutex_exit(&dringp->lock);
5976 
5977 	return (0);
5978 }
5979 
5980 /*
5981  * Get information about the dring. The base address of the descriptor
5982  * ring along with the type and permission are returned back.
5983  */
5984 int
5985 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
5986 {
5987 	ldc_dring_t	*dringp;
5988 	int		rv;
5989 
5990 	if (dhandle == NULL) {
5991 		DWARN(DBG_ALL_LDCS,
5992 		    "ldc_mem_dring_info: invalid desc ring handle\n");
5993 		return (EINVAL);
5994 	}
5995 	dringp = (ldc_dring_t *)dhandle;
5996 
5997 	mutex_enter(&dringp->lock);
5998 
5999 	if (dringp->mhdl) {
6000 		rv = ldc_mem_info(dringp->mhdl, minfo);
6001 		if (rv) {
6002 			DWARN(DBG_ALL_LDCS,
6003 			    "ldc_mem_dring_info: error reading mem info\n");
6004 			mutex_exit(&dringp->lock);
6005 			return (rv);
6006 		}
6007 	} else {
6008 		minfo->vaddr = dringp->base;
6009 		minfo->raddr = NULL;
6010 		minfo->status = dringp->status;
6011 	}
6012 
6013 	mutex_exit(&dringp->lock);
6014 
6015 	return (0);
6016 }
6017 
6018 /*
6019  * Map an exported descriptor ring into the local address space. If the
6020  * descriptor ring was exported for direct map access, a HV call is made
6021  * to allocate a RA range. If the map is done via a shadow copy, local
6022  * shadow memory is allocated.
6023  */
6024 int
6025 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
6026     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
6027     ldc_dring_handle_t *dhandle)
6028 {
6029 	int		err;
6030 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
6031 	ldc_mem_handle_t mhandle;
6032 	ldc_dring_t	*dringp;
6033 	size_t		dring_size;
6034 
6035 	if (dhandle == NULL) {
6036 		DWARN(DBG_ALL_LDCS,
6037 		    "ldc_mem_dring_map: invalid dhandle\n");
6038 		return (EINVAL);
6039 	}
6040 
6041 	/* check to see if channel is initalized */
6042 	if (handle == NULL) {
6043 		DWARN(DBG_ALL_LDCS,
6044 		    "ldc_mem_dring_map: invalid channel handle\n");
6045 		return (EINVAL);
6046 	}
6047 	ldcp = (ldc_chan_t *)handle;
6048 
6049 	if (cookie == NULL) {
6050 		DWARN(ldcp->id,
6051 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
6052 		    ldcp->id);
6053 		return (EINVAL);
6054 	}
6055 
6056 	/* FUTURE: For now we support only one cookie per dring */
6057 	ASSERT(ccount == 1);
6058 
6059 	if (cookie->size < (dsize * len)) {
6060 		DWARN(ldcp->id,
6061 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
6062 		    ldcp->id);
6063 		return (EINVAL);
6064 	}
6065 
6066 	*dhandle = 0;
6067 
6068 	/* Allocate an dring structure */
6069 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
6070 
6071 	D1(ldcp->id,
6072 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
6073 	    mtype, len, dsize, cookie->addr, cookie->size);
6074 
6075 	/* Initialize dring */
6076 	dringp->length = len;
6077 	dringp->dsize = dsize;
6078 
6079 	/* round of to multiple of page size */
6080 	dring_size = len * dsize;
6081 	dringp->size = (dring_size & MMU_PAGEMASK);
6082 	if (dring_size & MMU_PAGEOFFSET)
6083 		dringp->size += MMU_PAGESIZE;
6084 
6085 	dringp->ldcp = ldcp;
6086 
6087 	/* create an memory handle */
6088 	err = ldc_mem_alloc_handle(handle, &mhandle);
6089 	if (err || mhandle == NULL) {
6090 		DWARN(DBG_ALL_LDCS,
6091 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
6092 		    err);
6093 		kmem_free(dringp, sizeof (ldc_dring_t));
6094 		return (ENOMEM);
6095 	}
6096 
6097 	dringp->mhdl = mhandle;
6098 	dringp->base = NULL;
6099 
6100 	/* map the dring into local memory */
6101 	err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW,
6102 	    &(dringp->base), NULL);
6103 	if (err || dringp->base == NULL) {
6104 		cmn_err(CE_WARN,
6105 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
6106 		(void) ldc_mem_free_handle(mhandle);
6107 		kmem_free(dringp, sizeof (ldc_dring_t));
6108 		return (ENOMEM);
6109 	}
6110 
6111 	/* initialize the desc ring lock */
6112 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
6113 
6114 	/* Add descriptor ring to channel's imported dring list */
6115 	mutex_enter(&ldcp->imp_dlist_lock);
6116 	dringp->ch_next = ldcp->imp_dring_list;
6117 	ldcp->imp_dring_list = dringp;
6118 	mutex_exit(&ldcp->imp_dlist_lock);
6119 
6120 	dringp->status = LDC_MAPPED;
6121 
6122 	*dhandle = (ldc_dring_handle_t)dringp;
6123 
6124 	return (0);
6125 }
6126 
6127 /*
6128  * Unmap a descriptor ring. Free shadow memory (if any).
6129  */
6130 int
6131 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
6132 {
6133 	ldc_dring_t 	*dringp;
6134 	ldc_dring_t	*tmp_dringp;
6135 	ldc_chan_t	*ldcp;
6136 
6137 	if (dhandle == NULL) {
6138 		DWARN(DBG_ALL_LDCS,
6139 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
6140 		return (EINVAL);
6141 	}
6142 	dringp = (ldc_dring_t *)dhandle;
6143 
6144 	if (dringp->status != LDC_MAPPED) {
6145 		DWARN(DBG_ALL_LDCS,
6146 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
6147 		return (EINVAL);
6148 	}
6149 
6150 	mutex_enter(&dringp->lock);
6151 
6152 	ldcp = dringp->ldcp;
6153 
6154 	mutex_enter(&ldcp->imp_dlist_lock);
6155 
6156 	/* find and unlink the desc ring from channel import list */
6157 	tmp_dringp = ldcp->imp_dring_list;
6158 	if (tmp_dringp == dringp) {
6159 		ldcp->imp_dring_list = dringp->ch_next;
6160 		dringp->ch_next = NULL;
6161 
6162 	} else {
6163 		while (tmp_dringp != NULL) {
6164 			if (tmp_dringp->ch_next == dringp) {
6165 				tmp_dringp->ch_next = dringp->ch_next;
6166 				dringp->ch_next = NULL;
6167 				break;
6168 			}
6169 			tmp_dringp = tmp_dringp->ch_next;
6170 		}
6171 		if (tmp_dringp == NULL) {
6172 			DWARN(DBG_ALL_LDCS,
6173 			    "ldc_mem_dring_unmap: invalid descriptor\n");
6174 			mutex_exit(&ldcp->imp_dlist_lock);
6175 			mutex_exit(&dringp->lock);
6176 			return (EINVAL);
6177 		}
6178 	}
6179 
6180 	mutex_exit(&ldcp->imp_dlist_lock);
6181 
6182 	/* do a LDC memory handle unmap and free */
6183 	(void) ldc_mem_unmap(dringp->mhdl);
6184 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6185 
6186 	dringp->status = 0;
6187 	dringp->ldcp = NULL;
6188 
6189 	mutex_exit(&dringp->lock);
6190 
6191 	/* destroy dring lock */
6192 	mutex_destroy(&dringp->lock);
6193 
6194 	/* free desc ring object */
6195 	kmem_free(dringp, sizeof (ldc_dring_t));
6196 
6197 	return (0);
6198 }
6199 
6200 /*
6201  * Internal entry point for descriptor ring access entry consistency
6202  * semantics. Acquire copies the contents of the remote descriptor ring
6203  * into the local shadow copy. The release operation copies the local
6204  * contents into the remote dring. The start and end locations specify
6205  * bounds for the entries being synchronized.
6206  */
6207 static int
6208 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
6209     uint8_t direction, uint64_t start, uint64_t end)
6210 {
6211 	int 			err;
6212 	ldc_dring_t		*dringp;
6213 	ldc_chan_t		*ldcp;
6214 	uint64_t		soff;
6215 	size_t			copy_size;
6216 
6217 	if (dhandle == NULL) {
6218 		DWARN(DBG_ALL_LDCS,
6219 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
6220 		return (EINVAL);
6221 	}
6222 	dringp = (ldc_dring_t *)dhandle;
6223 	mutex_enter(&dringp->lock);
6224 
6225 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
6226 		DWARN(DBG_ALL_LDCS,
6227 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
6228 		mutex_exit(&dringp->lock);
6229 		return (EINVAL);
6230 	}
6231 
6232 	if (start >= dringp->length || end >= dringp->length) {
6233 		DWARN(DBG_ALL_LDCS,
6234 		    "i_ldc_dring_acquire_release: index out of range\n");
6235 		mutex_exit(&dringp->lock);
6236 		return (EINVAL);
6237 	}
6238 
6239 	/* get the channel handle */
6240 	ldcp = dringp->ldcp;
6241 
6242 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
6243 		((dringp->length - start) * dringp->dsize);
6244 
6245 	/* Calculate the relative offset for the first desc */
6246 	soff = (start * dringp->dsize);
6247 
6248 	/* copy to/from remote from/to local memory */
6249 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
6250 	    soff, copy_size);
6251 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6252 	    direction, soff, copy_size);
6253 	if (err) {
6254 		DWARN(ldcp->id,
6255 		    "i_ldc_dring_acquire_release: copy failed\n");
6256 		mutex_exit(&dringp->lock);
6257 		return (err);
6258 	}
6259 
6260 	/* do the balance */
6261 	if (start > end) {
6262 		copy_size = ((end + 1) * dringp->dsize);
6263 		soff = 0;
6264 
6265 		/* copy to/from remote from/to local memory */
6266 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
6267 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
6268 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6269 		    direction, soff, copy_size);
6270 		if (err) {
6271 			DWARN(ldcp->id,
6272 			    "i_ldc_dring_acquire_release: copy failed\n");
6273 			mutex_exit(&dringp->lock);
6274 			return (err);
6275 		}
6276 	}
6277 
6278 	mutex_exit(&dringp->lock);
6279 
6280 	return (0);
6281 }
6282 
6283 /*
6284  * Ensure that the contents in the local dring are consistent
6285  * with the contents if of remote dring
6286  */
6287 int
6288 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6289 {
6290 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
6291 }
6292 
6293 /*
6294  * Ensure that the contents in the remote dring are consistent
6295  * with the contents if of local dring
6296  */
6297 int
6298 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6299 {
6300 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
6301 }
6302 
6303 
6304 /* ------------------------------------------------------------------------- */
6305