xref: /titanic_51/usr/src/uts/sun4v/io/ldc.c (revision a3c4695861e3f0a8d3706f77ccd53683cca48d67)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/mmu.h>
56 #include <sys/pte.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/hat_sfmmu.h>
60 #include <sys/vm_machparam.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/seg_kpm.h>
63 #include <sys/note.h>
64 #include <sys/ivintr.h>
65 #include <sys/hypervisor_api.h>
66 #include <sys/ldc.h>
67 #include <sys/ldc_impl.h>
68 #include <sys/cnex.h>
69 #include <sys/hsvc.h>
70 #include <sys/sdt.h>
71 
72 /* Core internal functions */
73 static int i_ldc_h2v_error(int h_error);
74 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
75 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
76 static int i_ldc_rxq_drain(ldc_chan_t *ldcp);
77 static void i_ldc_reset_state(ldc_chan_t *ldcp);
78 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
79 
80 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
81 static void i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head);
82 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
83 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
84 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
85     uint8_t ctrlmsg);
86 
87 static int  i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head);
88 static void i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head);
89 static uint64_t i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
90     uint64_t *tail, uint64_t *link_state);
91 static uint64_t i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head,
92     uint64_t *tail, uint64_t *link_state);
93 static int i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head,
94     uint64_t rx_tail);
95 static uint_t i_ldc_chkq(ldc_chan_t *ldcp);
96 
97 /* Interrupt handling functions */
98 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
99 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
100 static uint_t i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
101     uint64_t *notify_event);
102 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
103 
104 /* Read method functions */
105 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
106 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
107 	size_t *sizep);
108 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
109 	size_t *sizep);
110 
111 /* Write method functions */
112 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
113 	size_t *sizep);
114 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
115 	size_t *sizep);
116 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
117 	size_t *sizep);
118 
119 /* Pkt processing internal functions */
120 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
121 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
122 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
123 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
124 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
125 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
126 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
127 
128 /* Memory synchronization internal functions */
129 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
130     uint8_t direction, uint64_t offset, size_t size);
131 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
132     uint8_t direction, uint64_t start, uint64_t end);
133 
134 /* LDC Version */
135 static ldc_ver_t ldc_versions[] = { {1, 0} };
136 
137 /* number of supported versions */
138 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
139 
140 /* Invalid value for the ldc_chan_t rx_ack_head field */
141 #define	ACKPEEK_HEAD_INVALID	((uint64_t)-1)
142 
143 
144 /* Module State Pointer */
145 static ldc_soft_state_t *ldcssp;
146 
147 static struct modldrv md = {
148 	&mod_miscops,			/* This is a misc module */
149 	"sun4v LDC module v%I%",	/* Name of the module */
150 };
151 
152 static struct modlinkage ml = {
153 	MODREV_1,
154 	&md,
155 	NULL
156 };
157 
158 static uint64_t ldc_sup_minor;		/* Supported minor number */
159 static hsvc_info_t ldc_hsvc = {
160 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
161 };
162 
163 /*
164  * LDC framework supports mapping remote domain's memory
165  * either directly or via shadow memory pages. Default
166  * support is currently implemented via shadow copy.
167  * Direct map can be enabled by setting 'ldc_shmem_enabled'
168  */
169 int ldc_shmem_enabled = 0;
170 
171 /*
172  * The no. of MTU size messages that can be stored in
173  * the LDC Tx queue. The number of Tx queue entries is
174  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
175  */
176 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
177 
178 /*
179  * The minimum queue length. This is the size of the smallest
180  * LDC queue. If the computed value is less than this default,
181  * the queue length is rounded up to 'ldc_queue_entries'.
182  */
183 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
184 
185 /*
186  * The length of the reliable-mode data queue in terms of the LDC
187  * receive queue length. i.e., the number of times larger than the
188  * LDC receive queue that the data queue should be. The HV receive
189  * queue is required to be a power of 2 and this implementation
190  * assumes the data queue will also be a power of 2. By making the
191  * multiplier a power of 2, we ensure the data queue will be a
192  * power of 2. We use a multiplier because the receive queue is
193  * sized to be sane relative to the MTU and the same is needed for
194  * the data queue.
195  */
196 uint64_t ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
197 
198 /*
199  * Pages exported for remote access over each channel is
200  * maintained in a table registered with the Hypervisor.
201  * The default number of entries in the table is set to
202  * 'ldc_mtbl_entries'.
203  */
204 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
205 
206 /*
207  * LDC retry count and delay - when the HV returns EWOULDBLOCK
208  * the operation is retried 'ldc_max_retries' times with a
209  * wait of 'ldc_delay' usecs between each retry.
210  */
211 int ldc_max_retries = LDC_MAX_RETRIES;
212 clock_t ldc_delay = LDC_DELAY;
213 
214 /*
215  * delay between each retry of channel unregistration in
216  * ldc_close(), to wait for pending interrupts to complete.
217  */
218 clock_t ldc_close_delay = LDC_CLOSE_DELAY;
219 
220 #ifdef DEBUG
221 
222 /*
223  * Print debug messages
224  *
225  * set ldcdbg to 0x7 for enabling all msgs
226  * 0x4 - Warnings
227  * 0x2 - All debug messages
228  * 0x1 - Minimal debug messages
229  *
230  * set ldcdbgchan to the channel number you want to debug
231  * setting it to -1 prints debug messages for all channels
232  * NOTE: ldcdbgchan has no effect on error messages
233  */
234 
235 #define	DBG_ALL_LDCS -1
236 
237 int ldcdbg = 0x0;
238 int64_t ldcdbgchan = DBG_ALL_LDCS;
239 uint64_t ldc_inject_err_flag = 0;
240 
241 static void
242 ldcdebug(int64_t id, const char *fmt, ...)
243 {
244 	char buf[512];
245 	va_list ap;
246 
247 	/*
248 	 * Do not return if,
249 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
250 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
251 	 * debug channel = caller specified channel
252 	 */
253 	if ((id != DBG_ALL_LDCS) &&
254 	    (ldcdbgchan != DBG_ALL_LDCS) &&
255 	    (ldcdbgchan != id)) {
256 		return;
257 	}
258 
259 	va_start(ap, fmt);
260 	(void) vsprintf(buf, fmt, ap);
261 	va_end(ap);
262 
263 	cmn_err(CE_CONT, "?%s", buf);
264 }
265 
266 #define	LDC_ERR_RESET	0x1
267 #define	LDC_ERR_PKTLOSS	0x2
268 #define	LDC_ERR_DQFULL	0x4
269 
270 static boolean_t
271 ldc_inject_error(ldc_chan_t *ldcp, uint64_t error)
272 {
273 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
274 		return (B_FALSE);
275 
276 	if ((ldc_inject_err_flag & error) == 0)
277 		return (B_FALSE);
278 
279 	/* clear the injection state */
280 	ldc_inject_err_flag &= ~error;
281 
282 	return (B_TRUE);
283 }
284 
285 #define	D1		\
286 if (ldcdbg & 0x01)	\
287 	ldcdebug
288 
289 #define	D2		\
290 if (ldcdbg & 0x02)	\
291 	ldcdebug
292 
293 #define	DWARN		\
294 if (ldcdbg & 0x04)	\
295 	ldcdebug
296 
297 #define	DUMP_PAYLOAD(id, addr)						\
298 {									\
299 	char buf[65*3];							\
300 	int i;								\
301 	uint8_t *src = (uint8_t *)addr;					\
302 	for (i = 0; i < 64; i++, src++)					\
303 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
304 	(void) sprintf(&buf[i * 3], "|\n");				\
305 	D2((id), "payload: %s", buf);					\
306 }
307 
308 #define	DUMP_LDC_PKT(c, s, addr)					\
309 {									\
310 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
311 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
312 	if (msg->type == LDC_DATA) {                                    \
313 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
314 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
315 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
316 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
317 	    (msg->env & LDC_LEN_MASK));					\
318 	} else { 							\
319 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
320 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
321 	} 								\
322 }
323 
324 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_error(_ldcp, LDC_ERR_RESET)
325 #define	LDC_INJECT_PKTLOSS(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_PKTLOSS)
326 #define	LDC_INJECT_DQFULL(_ldcp) ldc_inject_error(_ldcp, LDC_ERR_DQFULL)
327 
328 #else
329 
330 #define	DBG_ALL_LDCS -1
331 
332 #define	D1
333 #define	D2
334 #define	DWARN
335 
336 #define	DUMP_PAYLOAD(id, addr)
337 #define	DUMP_LDC_PKT(c, s, addr)
338 
339 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
340 #define	LDC_INJECT_PKTLOSS(_ldcp) (B_FALSE)
341 #define	LDC_INJECT_DQFULL(_ldcp) (B_FALSE)
342 
343 #endif
344 
345 /*
346  * dtrace SDT probes to ease tracing of the rx data queue and HV queue
347  * lengths. Just pass the head, tail, and entries values so that the
348  * length can be calculated in a dtrace script when the probe is enabled.
349  */
350 #define	TRACE_RXDQ_LENGTH(ldcp)						\
351 	DTRACE_PROBE4(rxdq__size,					\
352 	uint64_t, ldcp->id,						\
353 	uint64_t, ldcp->rx_dq_head,					\
354 	uint64_t, ldcp->rx_dq_tail,					\
355 	uint64_t, ldcp->rx_dq_entries)
356 
357 #define	TRACE_RXHVQ_LENGTH(ldcp, head, tail)				\
358 	DTRACE_PROBE4(rxhvq__size,					\
359 	uint64_t, ldcp->id,						\
360 	uint64_t, head,							\
361 	uint64_t, tail,							\
362 	uint64_t, ldcp->rx_q_entries)
363 
364 /* A dtrace SDT probe to ease tracing of data queue copy operations */
365 #define	TRACE_RXDQ_COPY(ldcp, bytes)					\
366 	DTRACE_PROBE2(rxdq__copy, uint64_t, ldcp->id, uint64_t, bytes)	\
367 
368 /* The amount of contiguous space at the tail of the queue */
369 #define	Q_CONTIG_SPACE(head, tail, size)				\
370 	((head) <= (tail) ? ((size) - (tail)) :				\
371 	((head) - (tail) - LDC_PACKET_SIZE))
372 
373 #define	ZERO_PKT(p)			\
374 	bzero((p), sizeof (ldc_msg_t));
375 
376 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
377 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
378 
379 int
380 _init(void)
381 {
382 	int status;
383 
384 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
385 	if (status != 0) {
386 		cmn_err(CE_NOTE, "!%s: cannot negotiate hypervisor LDC services"
387 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
388 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
389 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
390 		return (-1);
391 	}
392 
393 	/* allocate soft state structure */
394 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
395 
396 	/* Link the module into the system */
397 	status = mod_install(&ml);
398 	if (status != 0) {
399 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
400 		return (status);
401 	}
402 
403 	/* Initialize the LDC state structure */
404 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
405 
406 	mutex_enter(&ldcssp->lock);
407 
408 	/* Create a cache for memory handles */
409 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
410 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
411 	if (ldcssp->memhdl_cache == NULL) {
412 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
413 		mutex_exit(&ldcssp->lock);
414 		return (-1);
415 	}
416 
417 	/* Create cache for memory segment structures */
418 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
419 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
420 	if (ldcssp->memseg_cache == NULL) {
421 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
422 		mutex_exit(&ldcssp->lock);
423 		return (-1);
424 	}
425 
426 
427 	ldcssp->channel_count = 0;
428 	ldcssp->channels_open = 0;
429 	ldcssp->chan_list = NULL;
430 	ldcssp->dring_list = NULL;
431 
432 	mutex_exit(&ldcssp->lock);
433 
434 	return (0);
435 }
436 
437 int
438 _info(struct modinfo *modinfop)
439 {
440 	/* Report status of the dynamically loadable driver module */
441 	return (mod_info(&ml, modinfop));
442 }
443 
444 int
445 _fini(void)
446 {
447 	int 		rv, status;
448 	ldc_chan_t 	*tmp_ldcp, *ldcp;
449 	ldc_dring_t 	*tmp_dringp, *dringp;
450 	ldc_mem_info_t 	minfo;
451 
452 	/* Unlink the driver module from the system */
453 	status = mod_remove(&ml);
454 	if (status) {
455 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
456 		return (EIO);
457 	}
458 
459 	/* Free descriptor rings */
460 	dringp = ldcssp->dring_list;
461 	while (dringp != NULL) {
462 		tmp_dringp = dringp->next;
463 
464 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
465 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
466 			if (minfo.status == LDC_BOUND) {
467 				(void) ldc_mem_dring_unbind(
468 				    (ldc_dring_handle_t)dringp);
469 			}
470 			if (minfo.status == LDC_MAPPED) {
471 				(void) ldc_mem_dring_unmap(
472 				    (ldc_dring_handle_t)dringp);
473 			}
474 		}
475 
476 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
477 		dringp = tmp_dringp;
478 	}
479 	ldcssp->dring_list = NULL;
480 
481 	/* close and finalize channels */
482 	ldcp = ldcssp->chan_list;
483 	while (ldcp != NULL) {
484 		tmp_ldcp = ldcp->next;
485 
486 		(void) ldc_close((ldc_handle_t)ldcp);
487 		(void) ldc_fini((ldc_handle_t)ldcp);
488 
489 		ldcp = tmp_ldcp;
490 	}
491 	ldcssp->chan_list = NULL;
492 
493 	/* Destroy kmem caches */
494 	kmem_cache_destroy(ldcssp->memhdl_cache);
495 	kmem_cache_destroy(ldcssp->memseg_cache);
496 
497 	/*
498 	 * We have successfully "removed" the driver.
499 	 * Destroying soft states
500 	 */
501 	mutex_destroy(&ldcssp->lock);
502 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
503 
504 	(void) hsvc_unregister(&ldc_hsvc);
505 
506 	return (status);
507 }
508 
509 /* -------------------------------------------------------------------------- */
510 
511 /*
512  * LDC Link Layer Internal Functions
513  */
514 
515 /*
516  * Translate HV Errors to sun4v error codes
517  */
518 static int
519 i_ldc_h2v_error(int h_error)
520 {
521 	switch (h_error) {
522 
523 	case	H_EOK:
524 		return (0);
525 
526 	case	H_ENORADDR:
527 		return (EFAULT);
528 
529 	case	H_EBADPGSZ:
530 	case	H_EINVAL:
531 		return (EINVAL);
532 
533 	case	H_EWOULDBLOCK:
534 		return (EWOULDBLOCK);
535 
536 	case	H_ENOACCESS:
537 	case	H_ENOMAP:
538 		return (EACCES);
539 
540 	case	H_EIO:
541 	case	H_ECPUERROR:
542 		return (EIO);
543 
544 	case	H_ENOTSUPPORTED:
545 		return (ENOTSUP);
546 
547 	case 	H_ETOOMANY:
548 		return (ENOSPC);
549 
550 	case	H_ECHANNEL:
551 		return (ECHRNG);
552 	default:
553 		break;
554 	}
555 
556 	return (EIO);
557 }
558 
559 /*
560  * Reconfigure the transmit queue
561  */
562 static int
563 i_ldc_txq_reconf(ldc_chan_t *ldcp)
564 {
565 	int rv;
566 
567 	ASSERT(MUTEX_HELD(&ldcp->lock));
568 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
569 
570 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
571 	if (rv) {
572 		cmn_err(CE_WARN,
573 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
574 		return (EIO);
575 	}
576 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
577 	    &(ldcp->tx_tail), &(ldcp->link_state));
578 	if (rv) {
579 		cmn_err(CE_WARN,
580 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
581 		return (EIO);
582 	}
583 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
584 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
585 	    ldcp->link_state);
586 
587 	return (0);
588 }
589 
590 /*
591  * Reconfigure the receive queue
592  */
593 static int
594 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
595 {
596 	int rv;
597 	uint64_t rx_head, rx_tail;
598 
599 	ASSERT(MUTEX_HELD(&ldcp->lock));
600 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
601 	    &(ldcp->link_state));
602 	if (rv) {
603 		cmn_err(CE_WARN,
604 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
605 		    ldcp->id);
606 		return (EIO);
607 	}
608 
609 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
610 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
611 		    ldcp->rx_q_entries);
612 		if (rv) {
613 			cmn_err(CE_WARN,
614 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
615 			    ldcp->id);
616 			return (EIO);
617 		}
618 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
619 		    ldcp->id);
620 	}
621 
622 	return (0);
623 }
624 
625 
626 /*
627  * Drain the contents of the receive queue
628  */
629 static int
630 i_ldc_rxq_drain(ldc_chan_t *ldcp)
631 {
632 	int rv;
633 	uint64_t rx_head, rx_tail;
634 
635 	ASSERT(MUTEX_HELD(&ldcp->lock));
636 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
637 	    &(ldcp->link_state));
638 	if (rv) {
639 		cmn_err(CE_WARN, "i_ldc_rxq_drain: (0x%lx) cannot get state",
640 		    ldcp->id);
641 		return (EIO);
642 	}
643 
644 	/* flush contents by setting the head = tail */
645 	return (i_ldc_set_rx_head(ldcp, rx_tail));
646 }
647 
648 
649 /*
650  * Reset LDC state structure and its contents
651  */
652 static void
653 i_ldc_reset_state(ldc_chan_t *ldcp)
654 {
655 	ASSERT(MUTEX_HELD(&ldcp->lock));
656 	ldcp->last_msg_snt = LDC_INIT_SEQID;
657 	ldcp->last_ack_rcd = 0;
658 	ldcp->last_msg_rcd = 0;
659 	ldcp->tx_ackd_head = ldcp->tx_head;
660 	ldcp->stream_remains = 0;
661 	ldcp->next_vidx = 0;
662 	ldcp->hstate = 0;
663 	ldcp->tstate = TS_OPEN;
664 	ldcp->status = LDC_OPEN;
665 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
666 	ldcp->rx_dq_head = 0;
667 	ldcp->rx_dq_tail = 0;
668 
669 	if (ldcp->link_state == LDC_CHANNEL_UP ||
670 	    ldcp->link_state == LDC_CHANNEL_RESET) {
671 
672 		if (ldcp->mode == LDC_MODE_RAW) {
673 			ldcp->status = LDC_UP;
674 			ldcp->tstate = TS_UP;
675 		} else {
676 			ldcp->status = LDC_READY;
677 			ldcp->tstate |= TS_LINK_READY;
678 		}
679 	}
680 }
681 
682 /*
683  * Reset a LDC channel
684  */
685 static void
686 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
687 {
688 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
689 
690 	ASSERT(MUTEX_HELD(&ldcp->lock));
691 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
692 
693 	/* reconfig Tx and Rx queues */
694 	(void) i_ldc_txq_reconf(ldcp);
695 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
696 
697 	/* Clear Tx and Rx interrupts */
698 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
699 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
700 
701 	/* Reset channel state */
702 	i_ldc_reset_state(ldcp);
703 
704 	/* Mark channel in reset */
705 	ldcp->tstate |= TS_IN_RESET;
706 }
707 
708 
709 /*
710  * Clear pending interrupts
711  */
712 static void
713 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
714 {
715 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
716 
717 	ASSERT(MUTEX_HELD(&ldcp->lock));
718 	ASSERT(cinfo->dip != NULL);
719 
720 	switch (itype) {
721 	case CNEX_TX_INTR:
722 		/* check Tx interrupt */
723 		if (ldcp->tx_intr_state)
724 			ldcp->tx_intr_state = LDC_INTR_NONE;
725 		else
726 			return;
727 		break;
728 
729 	case CNEX_RX_INTR:
730 		/* check Rx interrupt */
731 		if (ldcp->rx_intr_state)
732 			ldcp->rx_intr_state = LDC_INTR_NONE;
733 		else
734 			return;
735 		break;
736 	}
737 
738 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
739 	D2(ldcp->id,
740 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
741 	    ldcp->id, itype);
742 }
743 
744 /*
745  * Set the receive queue head
746  * Resets connection and returns an error if it fails.
747  */
748 static int
749 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
750 {
751 	int 	rv;
752 	int 	retries;
753 
754 	ASSERT(MUTEX_HELD(&ldcp->lock));
755 	for (retries = 0; retries < ldc_max_retries; retries++) {
756 
757 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
758 			return (0);
759 
760 		if (rv != H_EWOULDBLOCK)
761 			break;
762 
763 		/* wait for ldc_delay usecs */
764 		drv_usecwait(ldc_delay);
765 	}
766 
767 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
768 	    ldcp->id, head);
769 	mutex_enter(&ldcp->tx_lock);
770 	i_ldc_reset(ldcp, B_TRUE);
771 	mutex_exit(&ldcp->tx_lock);
772 
773 	return (ECONNRESET);
774 }
775 
776 /*
777  * Returns the tx_head to be used for transfer
778  */
779 static void
780 i_ldc_get_tx_head(ldc_chan_t *ldcp, uint64_t *head)
781 {
782 	ldc_msg_t 	*pkt;
783 
784 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
785 
786 	/* get current Tx head */
787 	*head = ldcp->tx_head;
788 
789 	/*
790 	 * Reliable mode will use the ACKd head instead of the regular tx_head.
791 	 * Also in Reliable mode, advance ackd_head for all non DATA/INFO pkts,
792 	 * up to the current location of tx_head. This needs to be done
793 	 * as the peer will only ACK DATA/INFO pkts.
794 	 */
795 	if (ldcp->mode == LDC_MODE_RELIABLE || ldcp->mode == LDC_MODE_STREAM) {
796 		while (ldcp->tx_ackd_head != ldcp->tx_head) {
797 			pkt = (ldc_msg_t *)(ldcp->tx_q_va + ldcp->tx_ackd_head);
798 			if ((pkt->type & LDC_DATA) && (pkt->stype & LDC_INFO)) {
799 				break;
800 			}
801 			/* advance ACKd head */
802 			ldcp->tx_ackd_head =
803 			    (ldcp->tx_ackd_head + LDC_PACKET_SIZE) %
804 			    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
805 		}
806 		*head = ldcp->tx_ackd_head;
807 	}
808 }
809 
810 /*
811  * Returns the tx_tail to be used for transfer
812  * Re-reads the TX queue ptrs if and only if the
813  * the cached head and tail are equal (queue is full)
814  */
815 static int
816 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
817 {
818 	int 		rv;
819 	uint64_t 	current_head, new_tail;
820 
821 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
822 	/* Read the head and tail ptrs from HV */
823 	rv = hv_ldc_tx_get_state(ldcp->id,
824 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
825 	if (rv) {
826 		cmn_err(CE_WARN,
827 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
828 		    ldcp->id);
829 		return (EIO);
830 	}
831 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
832 		D1(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
833 		    ldcp->id);
834 		return (ECONNRESET);
835 	}
836 
837 	i_ldc_get_tx_head(ldcp, &current_head);
838 
839 	/* increment the tail */
840 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
841 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
842 
843 	if (new_tail == current_head) {
844 		DWARN(ldcp->id,
845 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
846 		    ldcp->id);
847 		return (EWOULDBLOCK);
848 	}
849 
850 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
851 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
852 
853 	*tail = ldcp->tx_tail;
854 	return (0);
855 }
856 
857 /*
858  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
859  * and retry ldc_max_retries times before returning an error.
860  * Returns 0, EWOULDBLOCK or EIO
861  */
862 static int
863 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
864 {
865 	int		rv, retval = EWOULDBLOCK;
866 	int 		retries;
867 
868 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
869 	for (retries = 0; retries < ldc_max_retries; retries++) {
870 
871 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
872 			retval = 0;
873 			break;
874 		}
875 		if (rv != H_EWOULDBLOCK) {
876 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
877 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
878 			retval = EIO;
879 			break;
880 		}
881 
882 		/* wait for ldc_delay usecs */
883 		drv_usecwait(ldc_delay);
884 	}
885 	return (retval);
886 }
887 
888 /*
889  * Copy a data packet from the HV receive queue to the data queue.
890  * Caller must ensure that the data queue is not already full.
891  *
892  * The *head argument represents the current head pointer for the HV
893  * receive queue. After copying a packet from the HV receive queue,
894  * the *head pointer will be updated. This allows the caller to update
895  * the head pointer in HV using the returned *head value.
896  */
897 void
898 i_ldc_rxdq_copy(ldc_chan_t *ldcp, uint64_t *head)
899 {
900 	uint64_t	q_size, dq_size;
901 
902 	ASSERT(MUTEX_HELD(&ldcp->lock));
903 
904 	q_size  = ldcp->rx_q_entries << LDC_PACKET_SHIFT;
905 	dq_size = ldcp->rx_dq_entries << LDC_PACKET_SHIFT;
906 
907 	ASSERT(Q_CONTIG_SPACE(ldcp->rx_dq_head, ldcp->rx_dq_tail,
908 	    dq_size) >= LDC_PACKET_SIZE);
909 
910 	bcopy((void *)(ldcp->rx_q_va + *head),
911 	    (void *)(ldcp->rx_dq_va + ldcp->rx_dq_tail), LDC_PACKET_SIZE);
912 	TRACE_RXDQ_COPY(ldcp, LDC_PACKET_SIZE);
913 
914 	/* Update rx head */
915 	*head = (*head + LDC_PACKET_SIZE) % q_size;
916 
917 	/* Update dq tail */
918 	ldcp->rx_dq_tail = (ldcp->rx_dq_tail + LDC_PACKET_SIZE) % dq_size;
919 }
920 
921 /*
922  * Update the Rx data queue head pointer
923  */
924 static int
925 i_ldc_set_rxdq_head(ldc_chan_t *ldcp, uint64_t head)
926 {
927 	ldcp->rx_dq_head = head;
928 	return (0);
929 }
930 
931 /*
932  * Get the Rx data queue head and tail pointers
933  */
934 static uint64_t
935 i_ldc_dq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
936     uint64_t *link_state)
937 {
938 	_NOTE(ARGUNUSED(link_state))
939 	*head = ldcp->rx_dq_head;
940 	*tail = ldcp->rx_dq_tail;
941 	return (0);
942 }
943 
944 /*
945  * Wrapper for the Rx HV queue set head function. Giving the
946  * data queue and HV queue set head functions the same type.
947  */
948 static uint64_t
949 i_ldc_hvq_rx_get_state(ldc_chan_t *ldcp, uint64_t *head, uint64_t *tail,
950     uint64_t *link_state)
951 {
952 	return (i_ldc_h2v_error(hv_ldc_rx_get_state(ldcp->id, head, tail,
953 	    link_state)));
954 }
955 
956 /*
957  * LDC receive interrupt handler
958  *    triggered for channel with data pending to read
959  *    i.e. Rx queue content changes
960  */
961 static uint_t
962 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
963 {
964 	_NOTE(ARGUNUSED(arg2))
965 
966 	ldc_chan_t	*ldcp;
967 	boolean_t	notify;
968 	uint64_t	event;
969 	int		rv;
970 
971 	/* Get the channel for which interrupt was received */
972 	if (arg1 == NULL) {
973 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
974 		return (DDI_INTR_UNCLAIMED);
975 	}
976 
977 	ldcp = (ldc_chan_t *)arg1;
978 
979 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
980 	    ldcp->id, ldcp);
981 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
982 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
983 	    ldcp->link_state);
984 
985 	/* Lock channel */
986 	mutex_enter(&ldcp->lock);
987 
988 	/* Mark the interrupt as being actively handled */
989 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
990 
991 	(void) i_ldc_rx_process_hvq(ldcp, &notify, &event);
992 
993 	if (ldcp->mode != LDC_MODE_STREAM) {
994 		/*
995 		 * If there are no data packets on the queue, clear
996 		 * the interrupt. Otherwise, the ldc_read will clear
997 		 * interrupts after draining the queue. To indicate the
998 		 * interrupt has not yet been cleared, it is marked
999 		 * as pending.
1000 		 */
1001 		if ((event & LDC_EVT_READ) == 0) {
1002 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1003 		} else {
1004 			ldcp->rx_intr_state = LDC_INTR_PEND;
1005 		}
1006 	}
1007 
1008 	/* if callbacks are disabled, do not notify */
1009 	if (notify && ldcp->cb_enabled) {
1010 		ldcp->cb_inprogress = B_TRUE;
1011 		mutex_exit(&ldcp->lock);
1012 		rv = ldcp->cb(event, ldcp->cb_arg);
1013 		if (rv) {
1014 			DWARN(ldcp->id,
1015 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1016 			    ldcp->id);
1017 		}
1018 		mutex_enter(&ldcp->lock);
1019 		ldcp->cb_inprogress = B_FALSE;
1020 	}
1021 
1022 	if (ldcp->mode == LDC_MODE_STREAM) {
1023 		/*
1024 		 * If we are using a secondary data queue, clear the
1025 		 * interrupt. We should have processed all CTRL packets
1026 		 * and copied all DATA packets to the secondary queue.
1027 		 * Even if secondary queue filled up, clear the interrupts,
1028 		 * this will trigger another interrupt and force the
1029 		 * handler to copy more data.
1030 		 */
1031 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1032 	}
1033 
1034 	mutex_exit(&ldcp->lock);
1035 
1036 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1037 
1038 	return (DDI_INTR_CLAIMED);
1039 }
1040 
1041 /*
1042  * Wrapper for the Rx HV queue processing function to be used when
1043  * checking the Rx HV queue for data packets. Unlike the interrupt
1044  * handler code flow, the Rx interrupt is not cleared here and
1045  * callbacks are not made.
1046  */
1047 static uint_t
1048 i_ldc_chkq(ldc_chan_t *ldcp)
1049 {
1050 	boolean_t	notify;
1051 	uint64_t	event;
1052 
1053 	return (i_ldc_rx_process_hvq(ldcp, &notify, &event));
1054 }
1055 
1056 /*
1057  * Send a LDC message
1058  */
1059 static int
1060 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
1061     uint8_t ctrlmsg)
1062 {
1063 	int		rv;
1064 	ldc_msg_t 	*pkt;
1065 	uint64_t	tx_tail;
1066 	uint32_t	curr_seqid;
1067 
1068 	/* Obtain Tx lock */
1069 	mutex_enter(&ldcp->tx_lock);
1070 
1071 	curr_seqid = ldcp->last_msg_snt;
1072 
1073 	/* get the current tail for the message */
1074 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1075 	if (rv) {
1076 		DWARN(ldcp->id,
1077 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
1078 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
1079 		    ldcp->id, pkttype, subtype, ctrlmsg);
1080 		mutex_exit(&ldcp->tx_lock);
1081 		return (rv);
1082 	}
1083 
1084 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1085 	ZERO_PKT(pkt);
1086 
1087 	/* Initialize the packet */
1088 	pkt->type = pkttype;
1089 	pkt->stype = subtype;
1090 	pkt->ctrl = ctrlmsg;
1091 
1092 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
1093 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
1094 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
1095 		curr_seqid++;
1096 		if (ldcp->mode != LDC_MODE_RAW) {
1097 			pkt->seqid = curr_seqid;
1098 			pkt->ackid = ldcp->last_msg_rcd;
1099 		}
1100 	}
1101 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
1102 
1103 	/* initiate the send by calling into HV and set the new tail */
1104 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1105 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1106 
1107 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1108 	if (rv) {
1109 		DWARN(ldcp->id,
1110 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
1111 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
1112 		    ldcp->id, pkttype, subtype, ctrlmsg);
1113 		mutex_exit(&ldcp->tx_lock);
1114 		return (EIO);
1115 	}
1116 
1117 	ldcp->last_msg_snt = curr_seqid;
1118 	ldcp->tx_tail = tx_tail;
1119 
1120 	mutex_exit(&ldcp->tx_lock);
1121 	return (0);
1122 }
1123 
1124 /*
1125  * Checks if packet was received in right order
1126  * in the case of a reliable link.
1127  * Returns 0 if in order, else EIO
1128  */
1129 static int
1130 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
1131 {
1132 	/* No seqid checking for RAW mode */
1133 	if (ldcp->mode == LDC_MODE_RAW)
1134 		return (0);
1135 
1136 	/* No seqid checking for version, RTS, RTR message */
1137 	if (msg->ctrl == LDC_VER ||
1138 	    msg->ctrl == LDC_RTS ||
1139 	    msg->ctrl == LDC_RTR)
1140 		return (0);
1141 
1142 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
1143 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
1144 		DWARN(ldcp->id,
1145 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
1146 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
1147 		    (ldcp->last_msg_rcd + 1));
1148 		return (EIO);
1149 	}
1150 
1151 #ifdef DEBUG
1152 	if (LDC_INJECT_PKTLOSS(ldcp)) {
1153 		DWARN(ldcp->id,
1154 		    "i_ldc_check_seqid: (0x%llx) inject pkt loss\n", ldcp->id);
1155 		return (EIO);
1156 	}
1157 #endif
1158 
1159 	return (0);
1160 }
1161 
1162 
1163 /*
1164  * Process an incoming version ctrl message
1165  */
1166 static int
1167 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
1168 {
1169 	int 		rv = 0, idx = ldcp->next_vidx;
1170 	ldc_msg_t 	*pkt;
1171 	uint64_t	tx_tail;
1172 	ldc_ver_t	*rcvd_ver;
1173 
1174 	/* get the received version */
1175 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
1176 
1177 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
1178 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1179 
1180 	/* Obtain Tx lock */
1181 	mutex_enter(&ldcp->tx_lock);
1182 
1183 	switch (msg->stype) {
1184 	case LDC_INFO:
1185 
1186 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1187 			(void) i_ldc_txq_reconf(ldcp);
1188 			i_ldc_reset_state(ldcp);
1189 			mutex_exit(&ldcp->tx_lock);
1190 			return (EAGAIN);
1191 		}
1192 
1193 		/* get the current tail and pkt for the response */
1194 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1195 		if (rv != 0) {
1196 			DWARN(ldcp->id,
1197 			    "i_ldc_process_VER: (0x%llx) err sending "
1198 			    "version ACK/NACK\n", ldcp->id);
1199 			i_ldc_reset(ldcp, B_TRUE);
1200 			mutex_exit(&ldcp->tx_lock);
1201 			return (ECONNRESET);
1202 		}
1203 
1204 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1205 		ZERO_PKT(pkt);
1206 
1207 		/* initialize the packet */
1208 		pkt->type = LDC_CTRL;
1209 		pkt->ctrl = LDC_VER;
1210 
1211 		for (;;) {
1212 
1213 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
1214 			    rcvd_ver->major, rcvd_ver->minor,
1215 			    ldc_versions[idx].major, ldc_versions[idx].minor);
1216 
1217 			if (rcvd_ver->major == ldc_versions[idx].major) {
1218 				/* major version match - ACK version */
1219 				pkt->stype = LDC_ACK;
1220 
1221 				/*
1222 				 * lower minor version to the one this endpt
1223 				 * supports, if necessary
1224 				 */
1225 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1226 					rcvd_ver->minor =
1227 					    ldc_versions[idx].minor;
1228 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1229 
1230 				break;
1231 			}
1232 
1233 			if (rcvd_ver->major > ldc_versions[idx].major) {
1234 
1235 				D1(ldcp->id, "i_ldc_process_VER: using next"
1236 				    " lower idx=%d, v%u.%u\n", idx,
1237 				    ldc_versions[idx].major,
1238 				    ldc_versions[idx].minor);
1239 
1240 				/* nack with next lower version */
1241 				pkt->stype = LDC_NACK;
1242 				bcopy(&ldc_versions[idx], pkt->udata,
1243 				    sizeof (ldc_versions[idx]));
1244 				ldcp->next_vidx = idx;
1245 				break;
1246 			}
1247 
1248 			/* next major version */
1249 			idx++;
1250 
1251 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1252 
1253 			if (idx == LDC_NUM_VERS) {
1254 				/* no version match - send NACK */
1255 				pkt->stype = LDC_NACK;
1256 				bzero(pkt->udata, sizeof (ldc_ver_t));
1257 				ldcp->next_vidx = 0;
1258 				break;
1259 			}
1260 		}
1261 
1262 		/* initiate the send by calling into HV and set the new tail */
1263 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1264 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1265 
1266 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1267 		if (rv == 0) {
1268 			ldcp->tx_tail = tx_tail;
1269 			if (pkt->stype == LDC_ACK) {
1270 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
1271 				    " version ACK\n", ldcp->id);
1272 				/* Save the ACK'd version */
1273 				ldcp->version.major = rcvd_ver->major;
1274 				ldcp->version.minor = rcvd_ver->minor;
1275 				ldcp->hstate |= TS_RCVD_VER;
1276 				ldcp->tstate |= TS_VER_DONE;
1277 				D1(DBG_ALL_LDCS,
1278 				    "(0x%llx) Sent ACK, "
1279 				    "Agreed on version v%u.%u\n",
1280 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1281 			}
1282 		} else {
1283 			DWARN(ldcp->id,
1284 			    "i_ldc_process_VER: (0x%llx) error sending "
1285 			    "ACK/NACK\n", ldcp->id);
1286 			i_ldc_reset(ldcp, B_TRUE);
1287 			mutex_exit(&ldcp->tx_lock);
1288 			return (ECONNRESET);
1289 		}
1290 
1291 		break;
1292 
1293 	case LDC_ACK:
1294 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
1295 			if (ldcp->version.major != rcvd_ver->major ||
1296 			    ldcp->version.minor != rcvd_ver->minor) {
1297 
1298 				/* mismatched version - reset connection */
1299 				DWARN(ldcp->id,
1300 				    "i_ldc_process_VER: (0x%llx) recvd"
1301 				    " ACK ver != sent ACK ver\n", ldcp->id);
1302 				i_ldc_reset(ldcp, B_TRUE);
1303 				mutex_exit(&ldcp->tx_lock);
1304 				return (ECONNRESET);
1305 			}
1306 		} else {
1307 			/* SUCCESS - we have agreed on a version */
1308 			ldcp->version.major = rcvd_ver->major;
1309 			ldcp->version.minor = rcvd_ver->minor;
1310 			ldcp->tstate |= TS_VER_DONE;
1311 		}
1312 
1313 		D1(ldcp->id, "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1314 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1315 
1316 		/* initiate RTS-RTR-RDX handshake */
1317 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1318 		if (rv) {
1319 			DWARN(ldcp->id,
1320 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1321 			    ldcp->id);
1322 			i_ldc_reset(ldcp, B_TRUE);
1323 			mutex_exit(&ldcp->tx_lock);
1324 			return (ECONNRESET);
1325 		}
1326 
1327 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1328 		ZERO_PKT(pkt);
1329 
1330 		pkt->type = LDC_CTRL;
1331 		pkt->stype = LDC_INFO;
1332 		pkt->ctrl = LDC_RTS;
1333 		pkt->env = ldcp->mode;
1334 		if (ldcp->mode != LDC_MODE_RAW)
1335 			pkt->seqid = LDC_INIT_SEQID;
1336 
1337 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1338 
1339 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1340 
1341 		/* initiate the send by calling into HV and set the new tail */
1342 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1343 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1344 
1345 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1346 		if (rv) {
1347 			D2(ldcp->id,
1348 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1349 			    ldcp->id);
1350 			i_ldc_reset(ldcp, B_TRUE);
1351 			mutex_exit(&ldcp->tx_lock);
1352 			return (ECONNRESET);
1353 		}
1354 
1355 		ldcp->tx_tail = tx_tail;
1356 		ldcp->hstate |= TS_SENT_RTS;
1357 
1358 		break;
1359 
1360 	case LDC_NACK:
1361 		/* check if version in NACK is zero */
1362 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1363 			/* version handshake failure */
1364 			DWARN(DBG_ALL_LDCS,
1365 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1366 			    ldcp->id);
1367 			i_ldc_reset(ldcp, B_TRUE);
1368 			mutex_exit(&ldcp->tx_lock);
1369 			return (ECONNRESET);
1370 		}
1371 
1372 		/* get the current tail and pkt for the response */
1373 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1374 		if (rv != 0) {
1375 			cmn_err(CE_NOTE,
1376 			    "i_ldc_process_VER: (0x%lx) err sending "
1377 			    "version ACK/NACK\n", ldcp->id);
1378 			i_ldc_reset(ldcp, B_TRUE);
1379 			mutex_exit(&ldcp->tx_lock);
1380 			return (ECONNRESET);
1381 		}
1382 
1383 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1384 		ZERO_PKT(pkt);
1385 
1386 		/* initialize the packet */
1387 		pkt->type = LDC_CTRL;
1388 		pkt->ctrl = LDC_VER;
1389 		pkt->stype = LDC_INFO;
1390 
1391 		/* check ver in NACK msg has a match */
1392 		for (;;) {
1393 			if (rcvd_ver->major == ldc_versions[idx].major) {
1394 				/*
1395 				 * major version match - resubmit request
1396 				 * if lower minor version to the one this endpt
1397 				 * supports, if necessary
1398 				 */
1399 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1400 					rcvd_ver->minor =
1401 					    ldc_versions[idx].minor;
1402 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1403 				break;
1404 			}
1405 
1406 			if (rcvd_ver->major > ldc_versions[idx].major) {
1407 
1408 				D1(ldcp->id, "i_ldc_process_VER: using next"
1409 				    " lower idx=%d, v%u.%u\n", idx,
1410 				    ldc_versions[idx].major,
1411 				    ldc_versions[idx].minor);
1412 
1413 				/* send next lower version */
1414 				bcopy(&ldc_versions[idx], pkt->udata,
1415 				    sizeof (ldc_versions[idx]));
1416 				ldcp->next_vidx = idx;
1417 				break;
1418 			}
1419 
1420 			/* next version */
1421 			idx++;
1422 
1423 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1424 
1425 			if (idx == LDC_NUM_VERS) {
1426 				/* no version match - terminate */
1427 				ldcp->next_vidx = 0;
1428 				mutex_exit(&ldcp->tx_lock);
1429 				return (ECONNRESET);
1430 			}
1431 		}
1432 
1433 		/* initiate the send by calling into HV and set the new tail */
1434 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1435 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1436 
1437 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1438 		if (rv == 0) {
1439 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1440 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1441 			    ldc_versions[idx].minor);
1442 			ldcp->tx_tail = tx_tail;
1443 		} else {
1444 			cmn_err(CE_NOTE,
1445 			    "i_ldc_process_VER: (0x%lx) error sending version"
1446 			    "INFO\n", ldcp->id);
1447 			i_ldc_reset(ldcp, B_TRUE);
1448 			mutex_exit(&ldcp->tx_lock);
1449 			return (ECONNRESET);
1450 		}
1451 
1452 		break;
1453 	}
1454 
1455 	mutex_exit(&ldcp->tx_lock);
1456 	return (rv);
1457 }
1458 
1459 
1460 /*
1461  * Process an incoming RTS ctrl message
1462  */
1463 static int
1464 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1465 {
1466 	int 		rv = 0;
1467 	ldc_msg_t 	*pkt;
1468 	uint64_t	tx_tail;
1469 	boolean_t	sent_NACK = B_FALSE;
1470 
1471 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1472 
1473 	switch (msg->stype) {
1474 	case LDC_NACK:
1475 		DWARN(ldcp->id,
1476 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1477 		    ldcp->id);
1478 
1479 		/* Reset the channel -- as we cannot continue */
1480 		mutex_enter(&ldcp->tx_lock);
1481 		i_ldc_reset(ldcp, B_TRUE);
1482 		mutex_exit(&ldcp->tx_lock);
1483 		rv = ECONNRESET;
1484 		break;
1485 
1486 	case LDC_INFO:
1487 
1488 		/* check mode */
1489 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1490 			cmn_err(CE_NOTE,
1491 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1492 			    ldcp->id);
1493 			/*
1494 			 * send NACK in response to MODE message
1495 			 * get the current tail for the response
1496 			 */
1497 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1498 			if (rv) {
1499 				/* if cannot send NACK - reset channel */
1500 				mutex_enter(&ldcp->tx_lock);
1501 				i_ldc_reset(ldcp, B_TRUE);
1502 				mutex_exit(&ldcp->tx_lock);
1503 				rv = ECONNRESET;
1504 				break;
1505 			}
1506 			sent_NACK = B_TRUE;
1507 		}
1508 		break;
1509 	default:
1510 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1511 		    ldcp->id);
1512 		mutex_enter(&ldcp->tx_lock);
1513 		i_ldc_reset(ldcp, B_TRUE);
1514 		mutex_exit(&ldcp->tx_lock);
1515 		rv = ECONNRESET;
1516 		break;
1517 	}
1518 
1519 	/*
1520 	 * If either the connection was reset (when rv != 0) or
1521 	 * a NACK was sent, we return. In the case of a NACK
1522 	 * we dont want to consume the packet that came in but
1523 	 * not record that we received the RTS
1524 	 */
1525 	if (rv || sent_NACK)
1526 		return (rv);
1527 
1528 	/* record RTS received */
1529 	ldcp->hstate |= TS_RCVD_RTS;
1530 
1531 	/* store initial SEQID info */
1532 	ldcp->last_msg_snt = msg->seqid;
1533 
1534 	/* Obtain Tx lock */
1535 	mutex_enter(&ldcp->tx_lock);
1536 
1537 	/* get the current tail for the response */
1538 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1539 	if (rv != 0) {
1540 		cmn_err(CE_NOTE,
1541 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1542 		    ldcp->id);
1543 		i_ldc_reset(ldcp, B_TRUE);
1544 		mutex_exit(&ldcp->tx_lock);
1545 		return (ECONNRESET);
1546 	}
1547 
1548 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1549 	ZERO_PKT(pkt);
1550 
1551 	/* initialize the packet */
1552 	pkt->type = LDC_CTRL;
1553 	pkt->stype = LDC_INFO;
1554 	pkt->ctrl = LDC_RTR;
1555 	pkt->env = ldcp->mode;
1556 	if (ldcp->mode != LDC_MODE_RAW)
1557 		pkt->seqid = LDC_INIT_SEQID;
1558 
1559 	ldcp->last_msg_rcd = msg->seqid;
1560 
1561 	/* initiate the send by calling into HV and set the new tail */
1562 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1563 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1564 
1565 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1566 	if (rv == 0) {
1567 		D2(ldcp->id,
1568 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1569 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1570 
1571 		ldcp->tx_tail = tx_tail;
1572 		ldcp->hstate |= TS_SENT_RTR;
1573 
1574 	} else {
1575 		cmn_err(CE_NOTE,
1576 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1577 		    ldcp->id);
1578 		i_ldc_reset(ldcp, B_TRUE);
1579 		mutex_exit(&ldcp->tx_lock);
1580 		return (ECONNRESET);
1581 	}
1582 
1583 	mutex_exit(&ldcp->tx_lock);
1584 	return (0);
1585 }
1586 
1587 /*
1588  * Process an incoming RTR ctrl message
1589  */
1590 static int
1591 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1592 {
1593 	int 		rv = 0;
1594 	boolean_t	sent_NACK = B_FALSE;
1595 
1596 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1597 
1598 	switch (msg->stype) {
1599 	case LDC_NACK:
1600 		/* RTR NACK received */
1601 		DWARN(ldcp->id,
1602 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1603 		    ldcp->id);
1604 
1605 		/* Reset the channel -- as we cannot continue */
1606 		mutex_enter(&ldcp->tx_lock);
1607 		i_ldc_reset(ldcp, B_TRUE);
1608 		mutex_exit(&ldcp->tx_lock);
1609 		rv = ECONNRESET;
1610 
1611 		break;
1612 
1613 	case LDC_INFO:
1614 
1615 		/* check mode */
1616 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1617 			DWARN(ldcp->id,
1618 			    "i_ldc_process_RTR: (0x%llx) mode mismatch, "
1619 			    "expecting 0x%x, got 0x%x\n",
1620 			    ldcp->id, ldcp->mode, (ldc_mode_t)msg->env);
1621 			/*
1622 			 * send NACK in response to MODE message
1623 			 * get the current tail for the response
1624 			 */
1625 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1626 			if (rv) {
1627 				/* if cannot send NACK - reset channel */
1628 				mutex_enter(&ldcp->tx_lock);
1629 				i_ldc_reset(ldcp, B_TRUE);
1630 				mutex_exit(&ldcp->tx_lock);
1631 				rv = ECONNRESET;
1632 				break;
1633 			}
1634 			sent_NACK = B_TRUE;
1635 		}
1636 		break;
1637 
1638 	default:
1639 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1640 		    ldcp->id);
1641 
1642 		/* Reset the channel -- as we cannot continue */
1643 		mutex_enter(&ldcp->tx_lock);
1644 		i_ldc_reset(ldcp, B_TRUE);
1645 		mutex_exit(&ldcp->tx_lock);
1646 		rv = ECONNRESET;
1647 		break;
1648 	}
1649 
1650 	/*
1651 	 * If either the connection was reset (when rv != 0) or
1652 	 * a NACK was sent, we return. In the case of a NACK
1653 	 * we dont want to consume the packet that came in but
1654 	 * not record that we received the RTR
1655 	 */
1656 	if (rv || sent_NACK)
1657 		return (rv);
1658 
1659 	ldcp->last_msg_snt = msg->seqid;
1660 	ldcp->hstate |= TS_RCVD_RTR;
1661 
1662 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1663 	if (rv) {
1664 		cmn_err(CE_NOTE,
1665 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1666 		    ldcp->id);
1667 		mutex_enter(&ldcp->tx_lock);
1668 		i_ldc_reset(ldcp, B_TRUE);
1669 		mutex_exit(&ldcp->tx_lock);
1670 		return (ECONNRESET);
1671 	}
1672 	D2(ldcp->id,
1673 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1674 
1675 	ldcp->hstate |= TS_SENT_RDX;
1676 	ldcp->tstate |= TS_HSHAKE_DONE;
1677 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1678 		ldcp->status = LDC_UP;
1679 
1680 	D1(ldcp->id, "(0x%llx) Handshake Complete\n", ldcp->id);
1681 
1682 	return (0);
1683 }
1684 
1685 
1686 /*
1687  * Process an incoming RDX ctrl message
1688  */
1689 static int
1690 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1691 {
1692 	int	rv = 0;
1693 
1694 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1695 
1696 	switch (msg->stype) {
1697 	case LDC_NACK:
1698 		/* RDX NACK received */
1699 		DWARN(ldcp->id,
1700 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1701 		    ldcp->id);
1702 
1703 		/* Reset the channel -- as we cannot continue */
1704 		mutex_enter(&ldcp->tx_lock);
1705 		i_ldc_reset(ldcp, B_TRUE);
1706 		mutex_exit(&ldcp->tx_lock);
1707 		rv = ECONNRESET;
1708 
1709 		break;
1710 
1711 	case LDC_INFO:
1712 
1713 		/*
1714 		 * if channel is UP and a RDX received after data transmission
1715 		 * has commenced it is an error
1716 		 */
1717 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1718 			DWARN(DBG_ALL_LDCS,
1719 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1720 			    " - LDC reset\n", ldcp->id);
1721 			mutex_enter(&ldcp->tx_lock);
1722 			i_ldc_reset(ldcp, B_TRUE);
1723 			mutex_exit(&ldcp->tx_lock);
1724 			return (ECONNRESET);
1725 		}
1726 
1727 		ldcp->hstate |= TS_RCVD_RDX;
1728 		ldcp->tstate |= TS_HSHAKE_DONE;
1729 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1730 			ldcp->status = LDC_UP;
1731 
1732 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1733 		break;
1734 
1735 	default:
1736 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1737 		    ldcp->id);
1738 
1739 		/* Reset the channel -- as we cannot continue */
1740 		mutex_enter(&ldcp->tx_lock);
1741 		i_ldc_reset(ldcp, B_TRUE);
1742 		mutex_exit(&ldcp->tx_lock);
1743 		rv = ECONNRESET;
1744 		break;
1745 	}
1746 
1747 	return (rv);
1748 }
1749 
1750 /*
1751  * Process an incoming ACK for a data packet
1752  */
1753 static int
1754 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1755 {
1756 	int		rv;
1757 	uint64_t 	tx_head;
1758 	ldc_msg_t	*pkt;
1759 
1760 	/* Obtain Tx lock */
1761 	mutex_enter(&ldcp->tx_lock);
1762 
1763 	/*
1764 	 * Read the current Tx head and tail
1765 	 */
1766 	rv = hv_ldc_tx_get_state(ldcp->id,
1767 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1768 	if (rv != 0) {
1769 		cmn_err(CE_WARN,
1770 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1771 		    ldcp->id);
1772 
1773 		/* Reset the channel -- as we cannot continue */
1774 		i_ldc_reset(ldcp, B_TRUE);
1775 		mutex_exit(&ldcp->tx_lock);
1776 		return (ECONNRESET);
1777 	}
1778 
1779 	/*
1780 	 * loop from where the previous ACK location was to the
1781 	 * current head location. This is how far the HV has
1782 	 * actually send pkts. Pkts between head and tail are
1783 	 * yet to be sent by HV.
1784 	 */
1785 	tx_head = ldcp->tx_ackd_head;
1786 	for (;;) {
1787 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1788 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1789 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1790 
1791 		if (pkt->seqid == msg->ackid) {
1792 			D2(ldcp->id,
1793 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1794 			    ldcp->id);
1795 			ldcp->last_ack_rcd = msg->ackid;
1796 			ldcp->tx_ackd_head = tx_head;
1797 			break;
1798 		}
1799 		if (tx_head == ldcp->tx_head) {
1800 			/* could not find packet */
1801 			DWARN(ldcp->id,
1802 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1803 			    ldcp->id);
1804 
1805 			/* Reset the channel -- as we cannot continue */
1806 			i_ldc_reset(ldcp, B_TRUE);
1807 			mutex_exit(&ldcp->tx_lock);
1808 			return (ECONNRESET);
1809 		}
1810 	}
1811 
1812 	mutex_exit(&ldcp->tx_lock);
1813 	return (0);
1814 }
1815 
1816 /*
1817  * Process incoming control message
1818  * Return 0 - session can continue
1819  *        EAGAIN - reprocess packet - state was changed
1820  *	  ECONNRESET - channel was reset
1821  */
1822 static int
1823 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1824 {
1825 	int 		rv = 0;
1826 
1827 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1828 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1829 
1830 	switch (ldcp->tstate & ~TS_IN_RESET) {
1831 
1832 	case TS_OPEN:
1833 	case TS_READY:
1834 
1835 		switch (msg->ctrl & LDC_CTRL_MASK) {
1836 		case LDC_VER:
1837 			/* process version message */
1838 			rv = i_ldc_process_VER(ldcp, msg);
1839 			break;
1840 		default:
1841 			DWARN(ldcp->id,
1842 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1843 			    "tstate=0x%x\n", ldcp->id,
1844 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1845 			break;
1846 		}
1847 
1848 		break;
1849 
1850 	case TS_VREADY:
1851 
1852 		switch (msg->ctrl & LDC_CTRL_MASK) {
1853 		case LDC_VER:
1854 			/* process version message */
1855 			rv = i_ldc_process_VER(ldcp, msg);
1856 			break;
1857 		case LDC_RTS:
1858 			/* process RTS message */
1859 			rv = i_ldc_process_RTS(ldcp, msg);
1860 			break;
1861 		case LDC_RTR:
1862 			/* process RTR message */
1863 			rv = i_ldc_process_RTR(ldcp, msg);
1864 			break;
1865 		case LDC_RDX:
1866 			/* process RDX message */
1867 			rv = i_ldc_process_RDX(ldcp, msg);
1868 			break;
1869 		default:
1870 			DWARN(ldcp->id,
1871 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1872 			    "tstate=0x%x\n", ldcp->id,
1873 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1874 			break;
1875 		}
1876 
1877 		break;
1878 
1879 	case TS_UP:
1880 
1881 		switch (msg->ctrl & LDC_CTRL_MASK) {
1882 		case LDC_VER:
1883 			DWARN(ldcp->id,
1884 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1885 			    "- LDC reset\n", ldcp->id);
1886 			/* peer is redoing version negotiation */
1887 			mutex_enter(&ldcp->tx_lock);
1888 			(void) i_ldc_txq_reconf(ldcp);
1889 			i_ldc_reset_state(ldcp);
1890 			mutex_exit(&ldcp->tx_lock);
1891 			rv = EAGAIN;
1892 			break;
1893 
1894 		case LDC_RDX:
1895 			/* process RDX message */
1896 			rv = i_ldc_process_RDX(ldcp, msg);
1897 			break;
1898 
1899 		default:
1900 			DWARN(ldcp->id,
1901 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1902 			    "tstate=0x%x\n", ldcp->id,
1903 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1904 			break;
1905 		}
1906 	}
1907 
1908 	return (rv);
1909 }
1910 
1911 /*
1912  * Register channel with the channel nexus
1913  */
1914 static int
1915 i_ldc_register_channel(ldc_chan_t *ldcp)
1916 {
1917 	int		rv = 0;
1918 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1919 
1920 	if (cinfo->dip == NULL) {
1921 		DWARN(ldcp->id,
1922 		    "i_ldc_register_channel: cnex has not registered\n");
1923 		return (EAGAIN);
1924 	}
1925 
1926 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1927 	if (rv) {
1928 		DWARN(ldcp->id,
1929 		    "i_ldc_register_channel: cannot register channel\n");
1930 		return (rv);
1931 	}
1932 
1933 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1934 	    i_ldc_tx_hdlr, ldcp, NULL);
1935 	if (rv) {
1936 		DWARN(ldcp->id,
1937 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1938 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1939 		return (rv);
1940 	}
1941 
1942 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1943 	    i_ldc_rx_hdlr, ldcp, NULL);
1944 	if (rv) {
1945 		DWARN(ldcp->id,
1946 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1947 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1948 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1949 		return (rv);
1950 	}
1951 
1952 	ldcp->tstate |= TS_CNEX_RDY;
1953 
1954 	return (0);
1955 }
1956 
1957 /*
1958  * Unregister a channel with the channel nexus
1959  */
1960 static int
1961 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1962 {
1963 	int		rv = 0;
1964 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1965 
1966 	if (cinfo->dip == NULL) {
1967 		DWARN(ldcp->id,
1968 		    "i_ldc_unregister_channel: cnex has not registered\n");
1969 		return (EAGAIN);
1970 	}
1971 
1972 	if (ldcp->tstate & TS_CNEX_RDY) {
1973 
1974 		/* Remove the Rx interrupt */
1975 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1976 		if (rv) {
1977 			if (rv != EAGAIN) {
1978 				DWARN(ldcp->id,
1979 				    "i_ldc_unregister_channel: err removing "
1980 				    "Rx intr\n");
1981 				return (rv);
1982 			}
1983 
1984 			/*
1985 			 * If interrupts are pending and handler has
1986 			 * finished running, clear interrupt and try
1987 			 * again
1988 			 */
1989 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1990 				return (rv);
1991 
1992 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1993 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1994 			    CNEX_RX_INTR);
1995 			if (rv) {
1996 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1997 				    "err removing Rx interrupt\n");
1998 				return (rv);
1999 			}
2000 		}
2001 
2002 		/* Remove the Tx interrupt */
2003 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
2004 		if (rv) {
2005 			DWARN(ldcp->id,
2006 			    "i_ldc_unregister_channel: err removing Tx intr\n");
2007 			return (rv);
2008 		}
2009 
2010 		/* Unregister the channel */
2011 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
2012 		if (rv) {
2013 			DWARN(ldcp->id,
2014 			    "i_ldc_unregister_channel: cannot unreg channel\n");
2015 			return (rv);
2016 		}
2017 
2018 		ldcp->tstate &= ~TS_CNEX_RDY;
2019 	}
2020 
2021 	return (0);
2022 }
2023 
2024 
2025 /*
2026  * LDC transmit interrupt handler
2027  *    triggered for chanel up/down/reset events
2028  *    and Tx queue content changes
2029  */
2030 static uint_t
2031 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
2032 {
2033 	_NOTE(ARGUNUSED(arg2))
2034 
2035 	int 		rv;
2036 	ldc_chan_t 	*ldcp;
2037 	boolean_t 	notify_client = B_FALSE;
2038 	uint64_t	notify_event = 0, link_state;
2039 
2040 	/* Get the channel for which interrupt was received */
2041 	ASSERT(arg1 != NULL);
2042 	ldcp = (ldc_chan_t *)arg1;
2043 
2044 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
2045 	    ldcp->id, ldcp);
2046 
2047 	/* Lock channel */
2048 	mutex_enter(&ldcp->lock);
2049 
2050 	/* Obtain Tx lock */
2051 	mutex_enter(&ldcp->tx_lock);
2052 
2053 	/* mark interrupt as pending */
2054 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
2055 
2056 	/* save current link state */
2057 	link_state = ldcp->link_state;
2058 
2059 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
2060 	    &ldcp->link_state);
2061 	if (rv) {
2062 		cmn_err(CE_WARN,
2063 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
2064 		    ldcp->id, rv);
2065 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2066 		mutex_exit(&ldcp->tx_lock);
2067 		mutex_exit(&ldcp->lock);
2068 		return (DDI_INTR_CLAIMED);
2069 	}
2070 
2071 	/*
2072 	 * reset the channel state if the channel went down
2073 	 * (other side unconfigured queue) or channel was reset
2074 	 * (other side reconfigured its queue)
2075 	 */
2076 	if (link_state != ldcp->link_state &&
2077 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
2078 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
2079 		i_ldc_reset(ldcp, B_FALSE);
2080 		notify_client = B_TRUE;
2081 		notify_event = LDC_EVT_DOWN;
2082 	}
2083 
2084 	if (link_state != ldcp->link_state &&
2085 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2086 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
2087 		i_ldc_reset(ldcp, B_FALSE);
2088 		notify_client = B_TRUE;
2089 		notify_event = LDC_EVT_RESET;
2090 	}
2091 
2092 	if (link_state != ldcp->link_state &&
2093 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
2094 	    ldcp->link_state == LDC_CHANNEL_UP) {
2095 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
2096 		notify_client = B_TRUE;
2097 		notify_event = LDC_EVT_RESET;
2098 		ldcp->tstate |= TS_LINK_READY;
2099 		ldcp->status = LDC_READY;
2100 	}
2101 
2102 	/* if callbacks are disabled, do not notify */
2103 	if (!ldcp->cb_enabled)
2104 		notify_client = B_FALSE;
2105 
2106 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
2107 	mutex_exit(&ldcp->tx_lock);
2108 
2109 	if (notify_client) {
2110 		ldcp->cb_inprogress = B_TRUE;
2111 		mutex_exit(&ldcp->lock);
2112 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2113 		if (rv) {
2114 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
2115 			    "failure", ldcp->id);
2116 		}
2117 		mutex_enter(&ldcp->lock);
2118 		ldcp->cb_inprogress = B_FALSE;
2119 	}
2120 
2121 	mutex_exit(&ldcp->lock);
2122 
2123 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
2124 
2125 	return (DDI_INTR_CLAIMED);
2126 }
2127 
2128 /*
2129  * Process the Rx HV queue.
2130  *
2131  * Returns 0 if data packets were found and no errors were encountered,
2132  * otherwise returns an error. In either case, the *notify argument is
2133  * set to indicate whether or not the client callback function should
2134  * be invoked. The *event argument is set to contain the callback event.
2135  *
2136  * Depending on the channel mode, packets are handled differently:
2137  *
2138  * RAW MODE
2139  * For raw mode channels, when a data packet is encountered,
2140  * processing stops and all packets are left on the queue to be removed
2141  * and processed by the ldc_read code path.
2142  *
2143  * UNRELIABLE MODE
2144  * For unreliable mode, when a data packet is encountered, processing
2145  * stops, and all packets are left on the queue to be removed and
2146  * processed by the ldc_read code path. Control packets are processed
2147  * inline if they are encountered before any data packets.
2148  *
2149  * STEAMING MODE
2150  * For streaming mode channels, all packets on the receive queue
2151  * are processed: data packets are copied to the data queue and
2152  * control packets are processed inline. Packets are only left on
2153  * the receive queue when the data queue is full.
2154  */
2155 static uint_t
2156 i_ldc_rx_process_hvq(ldc_chan_t *ldcp, boolean_t *notify_client,
2157     uint64_t *notify_event)
2158 {
2159 	int		rv;
2160 	uint64_t 	rx_head, rx_tail;
2161 	ldc_msg_t 	*msg;
2162 	uint64_t	link_state, first_fragment = 0;
2163 	boolean_t	trace_length = B_TRUE;
2164 
2165 	ASSERT(MUTEX_HELD(&ldcp->lock));
2166 	*notify_client = B_FALSE;
2167 	*notify_event = 0;
2168 
2169 	/*
2170 	 * Read packet(s) from the queue
2171 	 */
2172 	for (;;) {
2173 
2174 		link_state = ldcp->link_state;
2175 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2176 		    &ldcp->link_state);
2177 		if (rv) {
2178 			cmn_err(CE_WARN,
2179 			    "i_ldc_rx_process_hvq: (0x%lx) cannot read "
2180 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
2181 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2182 			return (EIO);
2183 		}
2184 
2185 		/*
2186 		 * reset the channel state if the channel went down
2187 		 * (other side unconfigured queue) or channel was reset
2188 		 * (other side reconfigured its queue)
2189 		 */
2190 
2191 		if (link_state != ldcp->link_state) {
2192 
2193 			switch (ldcp->link_state) {
2194 			case LDC_CHANNEL_DOWN:
2195 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2196 				    "link down\n", ldcp->id);
2197 				mutex_enter(&ldcp->tx_lock);
2198 				i_ldc_reset(ldcp, B_FALSE);
2199 				mutex_exit(&ldcp->tx_lock);
2200 				*notify_client = B_TRUE;
2201 				*notify_event = LDC_EVT_DOWN;
2202 				goto loop_exit;
2203 
2204 			case LDC_CHANNEL_UP:
2205 				D1(ldcp->id, "i_ldc_rx_process_hvq: "
2206 				    "channel link up\n", ldcp->id);
2207 
2208 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
2209 					*notify_client = B_TRUE;
2210 					*notify_event = LDC_EVT_RESET;
2211 					ldcp->tstate |= TS_LINK_READY;
2212 					ldcp->status = LDC_READY;
2213 				}
2214 				break;
2215 
2216 			case LDC_CHANNEL_RESET:
2217 			default:
2218 #ifdef DEBUG
2219 force_reset:
2220 #endif
2221 				D1(ldcp->id, "i_ldc_rx_process_hvq: channel "
2222 				    "link reset\n", ldcp->id);
2223 				mutex_enter(&ldcp->tx_lock);
2224 				i_ldc_reset(ldcp, B_FALSE);
2225 				mutex_exit(&ldcp->tx_lock);
2226 				*notify_client = B_TRUE;
2227 				*notify_event = LDC_EVT_RESET;
2228 				break;
2229 			}
2230 		}
2231 
2232 #ifdef DEBUG
2233 		if (LDC_INJECT_RESET(ldcp))
2234 			goto force_reset;
2235 #endif
2236 		if (trace_length) {
2237 			TRACE_RXHVQ_LENGTH(ldcp, rx_head, rx_tail);
2238 			trace_length = B_FALSE;
2239 		}
2240 
2241 		if (rx_head == rx_tail) {
2242 			D2(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2243 			    "No packets\n", ldcp->id);
2244 			break;
2245 		}
2246 
2247 		D2(ldcp->id, "i_ldc_rx_process_hvq: head=0x%llx, "
2248 		    "tail=0x%llx\n", rx_head, rx_tail);
2249 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_process_hvq rcd",
2250 		    ldcp->rx_q_va + rx_head);
2251 
2252 		/* get the message */
2253 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2254 
2255 		/* if channel is in RAW mode or data pkt, notify and return */
2256 		if (ldcp->mode == LDC_MODE_RAW) {
2257 			*notify_client = B_TRUE;
2258 			*notify_event |= LDC_EVT_READ;
2259 			break;
2260 		}
2261 
2262 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2263 
2264 			/* discard packet if channel is not up */
2265 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
2266 
2267 				/* move the head one position */
2268 				rx_head = (rx_head + LDC_PACKET_SIZE) %
2269 				    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2270 
2271 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
2272 					break;
2273 
2274 				continue;
2275 			} else {
2276 				uint64_t dq_head, dq_tail;
2277 
2278 				/* process only STREAM mode data packets */
2279 				if (ldcp->mode != LDC_MODE_STREAM) {
2280 					if ((ldcp->tstate & TS_IN_RESET) == 0)
2281 						*notify_client = B_TRUE;
2282 					*notify_event |= LDC_EVT_READ;
2283 					break;
2284 				}
2285 
2286 				/* don't process packet if queue full */
2287 				(void) i_ldc_dq_rx_get_state(ldcp, &dq_head,
2288 				    &dq_tail, NULL);
2289 				dq_tail = (dq_tail + LDC_PACKET_SIZE) %
2290 				    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT);
2291 				if (dq_tail == dq_head ||
2292 				    LDC_INJECT_DQFULL(ldcp)) {
2293 					rv = ENOSPC;
2294 					break;
2295 				}
2296 			}
2297 		}
2298 
2299 		/* Check the sequence ID for the message received */
2300 		rv = i_ldc_check_seqid(ldcp, msg);
2301 		if (rv != 0) {
2302 
2303 			DWARN(ldcp->id, "i_ldc_rx_process_hvq: (0x%llx) "
2304 			    "seqid error, q_ptrs=0x%lx,0x%lx", ldcp->id,
2305 			    rx_head, rx_tail);
2306 
2307 			/* Reset last_msg_rcd to start of message */
2308 			if (first_fragment != 0) {
2309 				ldcp->last_msg_rcd = first_fragment - 1;
2310 				first_fragment = 0;
2311 			}
2312 
2313 			/*
2314 			 * Send a NACK due to seqid mismatch
2315 			 */
2316 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2317 			    (msg->ctrl & LDC_CTRL_MASK));
2318 
2319 			if (rv) {
2320 				cmn_err(CE_NOTE, "i_ldc_rx_process_hvq: "
2321 				    "(0x%lx) err sending CTRL/DATA NACK msg\n",
2322 				    ldcp->id);
2323 
2324 				/* if cannot send NACK - reset channel */
2325 				mutex_enter(&ldcp->tx_lock);
2326 				i_ldc_reset(ldcp, B_TRUE);
2327 				mutex_exit(&ldcp->tx_lock);
2328 
2329 				*notify_client = B_TRUE;
2330 				*notify_event = LDC_EVT_RESET;
2331 				break;
2332 			}
2333 
2334 			/* purge receive queue */
2335 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2336 			break;
2337 		}
2338 
2339 		/* record the message ID */
2340 		ldcp->last_msg_rcd = msg->seqid;
2341 
2342 		/* process control messages */
2343 		if (msg->type & LDC_CTRL) {
2344 			/* save current internal state */
2345 			uint64_t tstate = ldcp->tstate;
2346 
2347 			rv = i_ldc_ctrlmsg(ldcp, msg);
2348 			if (rv == EAGAIN) {
2349 				/* re-process pkt - state was adjusted */
2350 				continue;
2351 			}
2352 			if (rv == ECONNRESET) {
2353 				*notify_client = B_TRUE;
2354 				*notify_event = LDC_EVT_RESET;
2355 				break;
2356 			}
2357 
2358 			/*
2359 			 * control message processing was successful
2360 			 * channel transitioned to ready for communication
2361 			 */
2362 			if (rv == 0 && ldcp->tstate == TS_UP &&
2363 			    (tstate & ~TS_IN_RESET) !=
2364 			    (ldcp->tstate & ~TS_IN_RESET)) {
2365 				*notify_client = B_TRUE;
2366 				*notify_event = LDC_EVT_UP;
2367 			}
2368 		}
2369 
2370 		/* process data NACKs */
2371 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
2372 			DWARN(ldcp->id,
2373 			    "i_ldc_rx_process_hvq: (0x%llx) received DATA/NACK",
2374 			    ldcp->id);
2375 			mutex_enter(&ldcp->tx_lock);
2376 			i_ldc_reset(ldcp, B_TRUE);
2377 			mutex_exit(&ldcp->tx_lock);
2378 			*notify_client = B_TRUE;
2379 			*notify_event = LDC_EVT_RESET;
2380 			break;
2381 		}
2382 
2383 		/* process data ACKs */
2384 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2385 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2386 				*notify_client = B_TRUE;
2387 				*notify_event = LDC_EVT_RESET;
2388 				break;
2389 			}
2390 		}
2391 
2392 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
2393 			ASSERT(ldcp->mode == LDC_MODE_STREAM);
2394 
2395 			/*
2396 			 * Copy the data packet to the data queue. Note
2397 			 * that the copy routine updates the rx_head pointer.
2398 			 */
2399 			i_ldc_rxdq_copy(ldcp, &rx_head);
2400 
2401 			if ((ldcp->tstate & TS_IN_RESET) == 0)
2402 				*notify_client = B_TRUE;
2403 			*notify_event |= LDC_EVT_READ;
2404 		} else {
2405 			rx_head = (rx_head + LDC_PACKET_SIZE) %
2406 			    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2407 		}
2408 
2409 		/* move the head one position */
2410 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2411 			*notify_client = B_TRUE;
2412 			*notify_event = LDC_EVT_RESET;
2413 			break;
2414 		}
2415 
2416 	} /* for */
2417 
2418 loop_exit:
2419 
2420 	if (ldcp->mode == LDC_MODE_STREAM) {
2421 		/* ACK data packets */
2422 		if ((*notify_event &
2423 		    (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ) {
2424 			int ack_rv;
2425 			ack_rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
2426 			if (ack_rv && ack_rv != EWOULDBLOCK) {
2427 				cmn_err(CE_NOTE,
2428 				    "i_ldc_rx_process_hvq: (0x%lx) cannot "
2429 				    "send ACK\n", ldcp->id);
2430 
2431 				mutex_enter(&ldcp->tx_lock);
2432 				i_ldc_reset(ldcp, B_FALSE);
2433 				mutex_exit(&ldcp->tx_lock);
2434 
2435 				*notify_client = B_TRUE;
2436 				*notify_event = LDC_EVT_RESET;
2437 				goto skip_ackpeek;
2438 			}
2439 		}
2440 
2441 		/*
2442 		 * If we have no more space on the data queue, make sure
2443 		 * there are no ACKs on the rx queue waiting to be processed.
2444 		 */
2445 		if (rv == ENOSPC) {
2446 			if (i_ldc_rx_ackpeek(ldcp, rx_head, rx_tail) != 0) {
2447 				ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2448 				*notify_client = B_TRUE;
2449 				*notify_event = LDC_EVT_RESET;
2450 			}
2451 		} else {
2452 			ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2453 		}
2454 	}
2455 
2456 skip_ackpeek:
2457 
2458 	/* Return, indicating whether or not data packets were found */
2459 	if ((*notify_event & (LDC_EVT_READ | LDC_EVT_RESET)) == LDC_EVT_READ)
2460 		return (0);
2461 
2462 	return (ENOMSG);
2463 }
2464 
2465 /*
2466  * Process any ACK packets on the HV receive queue.
2467  *
2468  * This function is only used by STREAMING mode channels when the
2469  * secondary data queue fills up and there are packets remaining on
2470  * the HV receive queue.
2471  */
2472 int
2473 i_ldc_rx_ackpeek(ldc_chan_t *ldcp, uint64_t rx_head, uint64_t rx_tail)
2474 {
2475 	int		rv = 0;
2476 	ldc_msg_t	*msg;
2477 
2478 	if (ldcp->rx_ack_head == ACKPEEK_HEAD_INVALID)
2479 		ldcp->rx_ack_head = rx_head;
2480 
2481 	while (ldcp->rx_ack_head != rx_tail) {
2482 		msg = (ldc_msg_t *)(ldcp->rx_q_va + ldcp->rx_ack_head);
2483 
2484 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2485 			if (rv = i_ldc_process_data_ACK(ldcp, msg))
2486 				break;
2487 			msg->stype &= ~LDC_ACK;
2488 		}
2489 
2490 		ldcp->rx_ack_head =
2491 		    (ldcp->rx_ack_head + LDC_PACKET_SIZE) %
2492 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2493 	}
2494 	return (rv);
2495 }
2496 
2497 /* -------------------------------------------------------------------------- */
2498 
2499 /*
2500  * LDC API functions
2501  */
2502 
2503 /*
2504  * Initialize the channel. Allocate internal structure and memory for
2505  * TX/RX queues, and initialize locks.
2506  */
2507 int
2508 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2509 {
2510 	ldc_chan_t 	*ldcp;
2511 	int		rv, exit_val;
2512 	uint64_t	ra_base, nentries;
2513 	uint64_t	qlen;
2514 
2515 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2516 
2517 	if (attr == NULL) {
2518 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2519 		return (EINVAL);
2520 	}
2521 	if (handle == NULL) {
2522 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2523 		return (EINVAL);
2524 	}
2525 
2526 	/* check if channel is valid */
2527 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2528 	if (rv == H_ECHANNEL) {
2529 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2530 		return (EINVAL);
2531 	}
2532 
2533 	/* check if the channel has already been initialized */
2534 	mutex_enter(&ldcssp->lock);
2535 	ldcp = ldcssp->chan_list;
2536 	while (ldcp != NULL) {
2537 		if (ldcp->id == id) {
2538 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2539 			    id);
2540 			mutex_exit(&ldcssp->lock);
2541 			return (EADDRINUSE);
2542 		}
2543 		ldcp = ldcp->next;
2544 	}
2545 	mutex_exit(&ldcssp->lock);
2546 
2547 	ASSERT(ldcp == NULL);
2548 
2549 	*handle = 0;
2550 
2551 	/* Allocate an ldcp structure */
2552 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2553 
2554 	/*
2555 	 * Initialize the channel and Tx lock
2556 	 *
2557 	 * The channel 'lock' protects the entire channel and
2558 	 * should be acquired before initializing, resetting,
2559 	 * destroying or reading from a channel.
2560 	 *
2561 	 * The 'tx_lock' should be acquired prior to transmitting
2562 	 * data over the channel. The lock should also be acquired
2563 	 * prior to channel reconfiguration (in order to prevent
2564 	 * concurrent writes).
2565 	 *
2566 	 * ORDERING: When both locks are being acquired, to prevent
2567 	 * deadlocks, the channel lock should be always acquired prior
2568 	 * to the tx_lock.
2569 	 */
2570 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2571 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2572 
2573 	/* Initialize the channel */
2574 	ldcp->id = id;
2575 	ldcp->cb = NULL;
2576 	ldcp->cb_arg = NULL;
2577 	ldcp->cb_inprogress = B_FALSE;
2578 	ldcp->cb_enabled = B_FALSE;
2579 	ldcp->next = NULL;
2580 
2581 	/* Read attributes */
2582 	ldcp->mode = attr->mode;
2583 	ldcp->devclass = attr->devclass;
2584 	ldcp->devinst = attr->instance;
2585 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2586 
2587 	D1(ldcp->id,
2588 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2589 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2590 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2591 
2592 	ldcp->next_vidx = 0;
2593 	ldcp->tstate = TS_IN_RESET;
2594 	ldcp->hstate = 0;
2595 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2596 	ldcp->last_ack_rcd = 0;
2597 	ldcp->last_msg_rcd = 0;
2598 	ldcp->rx_ack_head = ACKPEEK_HEAD_INVALID;
2599 
2600 	ldcp->stream_bufferp = NULL;
2601 	ldcp->exp_dring_list = NULL;
2602 	ldcp->imp_dring_list = NULL;
2603 	ldcp->mhdl_list = NULL;
2604 
2605 	ldcp->tx_intr_state = LDC_INTR_NONE;
2606 	ldcp->rx_intr_state = LDC_INTR_NONE;
2607 
2608 	/* Initialize payload size depending on whether channel is reliable */
2609 	switch (ldcp->mode) {
2610 	case LDC_MODE_RAW:
2611 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2612 		ldcp->read_p = i_ldc_read_raw;
2613 		ldcp->write_p = i_ldc_write_raw;
2614 		break;
2615 	case LDC_MODE_UNRELIABLE:
2616 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2617 		ldcp->read_p = i_ldc_read_packet;
2618 		ldcp->write_p = i_ldc_write_packet;
2619 		break;
2620 	case LDC_MODE_RELIABLE:
2621 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2622 		ldcp->read_p = i_ldc_read_packet;
2623 		ldcp->write_p = i_ldc_write_packet;
2624 		break;
2625 	case LDC_MODE_STREAM:
2626 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2627 
2628 		ldcp->stream_remains = 0;
2629 		ldcp->stream_offset = 0;
2630 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2631 		ldcp->read_p = i_ldc_read_stream;
2632 		ldcp->write_p = i_ldc_write_stream;
2633 		break;
2634 	default:
2635 		exit_val = EINVAL;
2636 		goto cleanup_on_exit;
2637 	}
2638 
2639 	/*
2640 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2641 	 * value is smaller than default length of ldc_queue_entries,
2642 	 * qlen is set to ldc_queue_entries. Ensure that computed
2643 	 * length is a power-of-two value.
2644 	 */
2645 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2646 	if (!ISP2(qlen)) {
2647 		uint64_t	tmp = 1;
2648 		while (qlen) {
2649 			qlen >>= 1; tmp <<= 1;
2650 		}
2651 		qlen = tmp;
2652 	}
2653 
2654 	ldcp->rx_q_entries =
2655 	    (qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2656 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2657 
2658 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", ldcp->rx_q_entries);
2659 
2660 	/* Create a transmit queue */
2661 	ldcp->tx_q_va = (uint64_t)
2662 	    contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2663 	if (ldcp->tx_q_va == NULL) {
2664 		cmn_err(CE_WARN,
2665 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2666 		    ldcp->id);
2667 		exit_val = ENOMEM;
2668 		goto cleanup_on_exit;
2669 	}
2670 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2671 
2672 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2673 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2674 
2675 	ldcp->tstate |= TS_TXQ_RDY;
2676 
2677 	/* Create a receive queue */
2678 	ldcp->rx_q_va = (uint64_t)
2679 	    contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2680 	if (ldcp->rx_q_va == NULL) {
2681 		cmn_err(CE_WARN,
2682 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2683 		    ldcp->id);
2684 		exit_val = ENOMEM;
2685 		goto cleanup_on_exit;
2686 	}
2687 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2688 
2689 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2690 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2691 
2692 	ldcp->tstate |= TS_RXQ_RDY;
2693 
2694 	/* Setup a separate read data queue */
2695 	if (ldcp->mode == LDC_MODE_STREAM) {
2696 		ldcp->readq_get_state = i_ldc_dq_rx_get_state;
2697 		ldcp->readq_set_head  = i_ldc_set_rxdq_head;
2698 
2699 		/* Make sure the data queue multiplier is a power of 2 */
2700 		if (!ISP2(ldc_rxdq_multiplier)) {
2701 			D1(ldcp->id, "ldc_init: (0x%llx) ldc_rxdq_multiplier "
2702 			    "not a power of 2, resetting", ldcp->id);
2703 			ldc_rxdq_multiplier = LDC_RXDQ_MULTIPLIER;
2704 		}
2705 
2706 		ldcp->rx_dq_entries = ldc_rxdq_multiplier * ldcp->rx_q_entries;
2707 		ldcp->rx_dq_va = (uint64_t)
2708 		    kmem_alloc(ldcp->rx_dq_entries << LDC_PACKET_SHIFT,
2709 		    KM_SLEEP);
2710 		if (ldcp->rx_dq_va == NULL) {
2711 			cmn_err(CE_WARN,
2712 			    "ldc_init: (0x%lx) RX data queue "
2713 			    "allocation failed\n", ldcp->id);
2714 			exit_val = ENOMEM;
2715 			goto cleanup_on_exit;
2716 		}
2717 
2718 		ldcp->rx_dq_head = ldcp->rx_dq_tail = 0;
2719 
2720 		D2(ldcp->id, "ldc_init: rx_dq_va=0x%llx, "
2721 		    "rx_dq_entries=0x%llx\n", ldcp->rx_dq_va,
2722 		    ldcp->rx_dq_entries);
2723 	} else {
2724 		ldcp->readq_get_state = i_ldc_hvq_rx_get_state;
2725 		ldcp->readq_set_head  = i_ldc_set_rx_head;
2726 	}
2727 
2728 	/* Init descriptor ring and memory handle list lock */
2729 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2730 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2731 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2732 
2733 	/* mark status as INITialized */
2734 	ldcp->status = LDC_INIT;
2735 
2736 	/* Add to channel list */
2737 	mutex_enter(&ldcssp->lock);
2738 	ldcp->next = ldcssp->chan_list;
2739 	ldcssp->chan_list = ldcp;
2740 	ldcssp->channel_count++;
2741 	mutex_exit(&ldcssp->lock);
2742 
2743 	/* set the handle */
2744 	*handle = (ldc_handle_t)ldcp;
2745 
2746 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2747 
2748 	return (0);
2749 
2750 cleanup_on_exit:
2751 
2752 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2753 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2754 
2755 	if (ldcp->tstate & TS_TXQ_RDY)
2756 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2757 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2758 
2759 	if (ldcp->tstate & TS_RXQ_RDY)
2760 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2761 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2762 
2763 	mutex_destroy(&ldcp->tx_lock);
2764 	mutex_destroy(&ldcp->lock);
2765 
2766 	if (ldcp)
2767 		kmem_free(ldcp, sizeof (ldc_chan_t));
2768 
2769 	return (exit_val);
2770 }
2771 
2772 /*
2773  * Finalizes the LDC connection. It will return EBUSY if the
2774  * channel is open. A ldc_close() has to be done prior to
2775  * a ldc_fini operation. It frees TX/RX queues, associated
2776  * with the channel
2777  */
2778 int
2779 ldc_fini(ldc_handle_t handle)
2780 {
2781 	ldc_chan_t 	*ldcp;
2782 	ldc_chan_t 	*tmp_ldcp;
2783 	uint64_t 	id;
2784 
2785 	if (handle == NULL) {
2786 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2787 		return (EINVAL);
2788 	}
2789 	ldcp = (ldc_chan_t *)handle;
2790 	id = ldcp->id;
2791 
2792 	mutex_enter(&ldcp->lock);
2793 
2794 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2795 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2796 		    ldcp->id);
2797 		mutex_exit(&ldcp->lock);
2798 		return (EBUSY);
2799 	}
2800 
2801 	/* Remove from the channel list */
2802 	mutex_enter(&ldcssp->lock);
2803 	tmp_ldcp = ldcssp->chan_list;
2804 	if (tmp_ldcp == ldcp) {
2805 		ldcssp->chan_list = ldcp->next;
2806 		ldcp->next = NULL;
2807 	} else {
2808 		while (tmp_ldcp != NULL) {
2809 			if (tmp_ldcp->next == ldcp) {
2810 				tmp_ldcp->next = ldcp->next;
2811 				ldcp->next = NULL;
2812 				break;
2813 			}
2814 			tmp_ldcp = tmp_ldcp->next;
2815 		}
2816 		if (tmp_ldcp == NULL) {
2817 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2818 			mutex_exit(&ldcssp->lock);
2819 			mutex_exit(&ldcp->lock);
2820 			return (EINVAL);
2821 		}
2822 	}
2823 
2824 	ldcssp->channel_count--;
2825 
2826 	mutex_exit(&ldcssp->lock);
2827 
2828 	/* Free the map table for this channel */
2829 	if (ldcp->mtbl) {
2830 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2831 		if (ldcp->mtbl->contigmem)
2832 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2833 		else
2834 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2835 		mutex_destroy(&ldcp->mtbl->lock);
2836 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2837 	}
2838 
2839 	/* Destroy descriptor ring and memory handle list lock */
2840 	mutex_destroy(&ldcp->exp_dlist_lock);
2841 	mutex_destroy(&ldcp->imp_dlist_lock);
2842 	mutex_destroy(&ldcp->mlist_lock);
2843 
2844 	/* Free the stream buffer for STREAM_MODE */
2845 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2846 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2847 
2848 	/* Free the RX queue */
2849 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2850 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2851 	ldcp->tstate &= ~TS_RXQ_RDY;
2852 
2853 	/* Free the RX data queue */
2854 	if (ldcp->mode == LDC_MODE_STREAM) {
2855 		kmem_free((caddr_t)ldcp->rx_dq_va,
2856 		    (ldcp->rx_dq_entries << LDC_PACKET_SHIFT));
2857 	}
2858 
2859 	/* Free the TX queue */
2860 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2861 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2862 	ldcp->tstate &= ~TS_TXQ_RDY;
2863 
2864 	mutex_exit(&ldcp->lock);
2865 
2866 	/* Destroy mutex */
2867 	mutex_destroy(&ldcp->tx_lock);
2868 	mutex_destroy(&ldcp->lock);
2869 
2870 	/* free channel structure */
2871 	kmem_free(ldcp, sizeof (ldc_chan_t));
2872 
2873 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2874 
2875 	return (0);
2876 }
2877 
2878 /*
2879  * Open the LDC channel for use. It registers the TX/RX queues
2880  * with the Hypervisor. It also specifies the interrupt number
2881  * and target CPU for this channel
2882  */
2883 int
2884 ldc_open(ldc_handle_t handle)
2885 {
2886 	ldc_chan_t 	*ldcp;
2887 	int 		rv;
2888 
2889 	if (handle == NULL) {
2890 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2891 		return (EINVAL);
2892 	}
2893 
2894 	ldcp = (ldc_chan_t *)handle;
2895 
2896 	mutex_enter(&ldcp->lock);
2897 
2898 	if (ldcp->tstate < TS_INIT) {
2899 		DWARN(ldcp->id,
2900 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2901 		mutex_exit(&ldcp->lock);
2902 		return (EFAULT);
2903 	}
2904 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2905 		DWARN(ldcp->id,
2906 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2907 		mutex_exit(&ldcp->lock);
2908 		return (EFAULT);
2909 	}
2910 
2911 	/*
2912 	 * Unregister/Register the tx queue with the hypervisor
2913 	 */
2914 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2915 	if (rv) {
2916 		cmn_err(CE_WARN,
2917 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2918 		    ldcp->id);
2919 		mutex_exit(&ldcp->lock);
2920 		return (EIO);
2921 	}
2922 
2923 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2924 	if (rv) {
2925 		cmn_err(CE_WARN,
2926 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2927 		    ldcp->id);
2928 		mutex_exit(&ldcp->lock);
2929 		return (EIO);
2930 	}
2931 
2932 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2933 	    ldcp->id);
2934 
2935 	/*
2936 	 * Unregister/Register the rx queue with the hypervisor
2937 	 */
2938 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2939 	if (rv) {
2940 		cmn_err(CE_WARN,
2941 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2942 		    ldcp->id);
2943 		mutex_exit(&ldcp->lock);
2944 		return (EIO);
2945 	}
2946 
2947 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2948 	if (rv) {
2949 		cmn_err(CE_WARN,
2950 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2951 		    ldcp->id);
2952 		mutex_exit(&ldcp->lock);
2953 		return (EIO);
2954 	}
2955 
2956 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2957 	    ldcp->id);
2958 
2959 	ldcp->tstate |= TS_QCONF_RDY;
2960 
2961 	/* Register the channel with the channel nexus */
2962 	rv = i_ldc_register_channel(ldcp);
2963 	if (rv && rv != EAGAIN) {
2964 		cmn_err(CE_WARN,
2965 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2966 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2967 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2968 		mutex_exit(&ldcp->lock);
2969 		return (EIO);
2970 	}
2971 
2972 	/* mark channel in OPEN state */
2973 	ldcp->status = LDC_OPEN;
2974 
2975 	/* Read channel state */
2976 	rv = hv_ldc_tx_get_state(ldcp->id,
2977 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2978 	if (rv) {
2979 		cmn_err(CE_WARN,
2980 		    "ldc_open: (0x%lx) cannot read channel state\n",
2981 		    ldcp->id);
2982 		(void) i_ldc_unregister_channel(ldcp);
2983 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2984 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2985 		mutex_exit(&ldcp->lock);
2986 		return (EIO);
2987 	}
2988 
2989 	/*
2990 	 * set the ACKd head to current head location for reliable &
2991 	 * streaming mode
2992 	 */
2993 	ldcp->tx_ackd_head = ldcp->tx_head;
2994 
2995 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2996 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2997 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2998 		ldcp->tstate |= TS_LINK_READY;
2999 		ldcp->status = LDC_READY;
3000 	}
3001 
3002 	/*
3003 	 * if channel is being opened in RAW mode - no handshake is needed
3004 	 * switch the channel READY and UP state
3005 	 */
3006 	if (ldcp->mode == LDC_MODE_RAW) {
3007 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
3008 		ldcp->status = LDC_UP;
3009 	}
3010 
3011 	mutex_exit(&ldcp->lock);
3012 
3013 	/*
3014 	 * Increment number of open channels
3015 	 */
3016 	mutex_enter(&ldcssp->lock);
3017 	ldcssp->channels_open++;
3018 	mutex_exit(&ldcssp->lock);
3019 
3020 	D1(ldcp->id,
3021 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
3022 	    "(tstate=0x%x, status=0x%x)\n",
3023 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
3024 
3025 	return (0);
3026 }
3027 
3028 /*
3029  * Close the LDC connection. It will return EBUSY if there
3030  * are memory segments or descriptor rings either bound to or
3031  * mapped over the channel
3032  */
3033 int
3034 ldc_close(ldc_handle_t handle)
3035 {
3036 	ldc_chan_t 	*ldcp;
3037 	int		rv = 0, retries = 0;
3038 	boolean_t	chk_done = B_FALSE;
3039 
3040 	if (handle == NULL) {
3041 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
3042 		return (EINVAL);
3043 	}
3044 	ldcp = (ldc_chan_t *)handle;
3045 
3046 	mutex_enter(&ldcp->lock);
3047 
3048 	/* return error if channel is not open */
3049 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
3050 		DWARN(ldcp->id,
3051 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
3052 		mutex_exit(&ldcp->lock);
3053 		return (EFAULT);
3054 	}
3055 
3056 	/* if any memory handles, drings, are bound or mapped cannot close */
3057 	if (ldcp->mhdl_list != NULL) {
3058 		DWARN(ldcp->id,
3059 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
3060 		    ldcp->id);
3061 		mutex_exit(&ldcp->lock);
3062 		return (EBUSY);
3063 	}
3064 	if (ldcp->exp_dring_list != NULL) {
3065 		DWARN(ldcp->id,
3066 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
3067 		    ldcp->id);
3068 		mutex_exit(&ldcp->lock);
3069 		return (EBUSY);
3070 	}
3071 	if (ldcp->imp_dring_list != NULL) {
3072 		DWARN(ldcp->id,
3073 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
3074 		    ldcp->id);
3075 		mutex_exit(&ldcp->lock);
3076 		return (EBUSY);
3077 	}
3078 
3079 	if (ldcp->cb_inprogress) {
3080 		DWARN(ldcp->id, "ldc_close: (0x%llx) callback active\n",
3081 		    ldcp->id);
3082 		mutex_exit(&ldcp->lock);
3083 		return (EWOULDBLOCK);
3084 	}
3085 
3086 	/* Obtain Tx lock */
3087 	mutex_enter(&ldcp->tx_lock);
3088 
3089 	/*
3090 	 * Wait for pending transmits to complete i.e Tx queue to drain
3091 	 * if there are pending pkts - wait 1 ms and retry again
3092 	 */
3093 	for (;;) {
3094 
3095 		rv = hv_ldc_tx_get_state(ldcp->id,
3096 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3097 		if (rv) {
3098 			cmn_err(CE_WARN,
3099 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
3100 			mutex_exit(&ldcp->tx_lock);
3101 			mutex_exit(&ldcp->lock);
3102 			return (EIO);
3103 		}
3104 
3105 		if (ldcp->tx_head == ldcp->tx_tail ||
3106 		    ldcp->link_state != LDC_CHANNEL_UP) {
3107 			break;
3108 		}
3109 
3110 		if (chk_done) {
3111 			DWARN(ldcp->id,
3112 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
3113 			    ldcp->id);
3114 			break;
3115 		}
3116 
3117 		/* wait for one ms and try again */
3118 		delay(drv_usectohz(1000));
3119 		chk_done = B_TRUE;
3120 	}
3121 
3122 	/*
3123 	 * Drain the Tx and Rx queues as we are closing the
3124 	 * channel. We dont care about any pending packets.
3125 	 * We have to also drain the queue prior to clearing
3126 	 * pending interrupts, otherwise the HV will trigger
3127 	 * an interrupt the moment the interrupt state is
3128 	 * cleared.
3129 	 */
3130 	(void) i_ldc_txq_reconf(ldcp);
3131 	(void) i_ldc_rxq_drain(ldcp);
3132 
3133 	/*
3134 	 * Unregister the channel with the nexus
3135 	 */
3136 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
3137 
3138 		mutex_exit(&ldcp->tx_lock);
3139 		mutex_exit(&ldcp->lock);
3140 
3141 		/* if any error other than EAGAIN return back */
3142 		if (rv != EAGAIN || retries >= ldc_max_retries) {
3143 			cmn_err(CE_WARN,
3144 			    "ldc_close: (0x%lx) unregister failed, %d\n",
3145 			    ldcp->id, rv);
3146 			return (rv);
3147 		}
3148 
3149 		/*
3150 		 * As there could be pending interrupts we need
3151 		 * to wait and try again
3152 		 */
3153 		drv_usecwait(ldc_close_delay);
3154 		mutex_enter(&ldcp->lock);
3155 		mutex_enter(&ldcp->tx_lock);
3156 		retries++;
3157 	}
3158 
3159 	/*
3160 	 * Unregister queues
3161 	 */
3162 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
3163 	if (rv) {
3164 		cmn_err(CE_WARN,
3165 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
3166 		    ldcp->id);
3167 		mutex_exit(&ldcp->tx_lock);
3168 		mutex_exit(&ldcp->lock);
3169 		return (EIO);
3170 	}
3171 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
3172 	if (rv) {
3173 		cmn_err(CE_WARN,
3174 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
3175 		    ldcp->id);
3176 		mutex_exit(&ldcp->tx_lock);
3177 		mutex_exit(&ldcp->lock);
3178 		return (EIO);
3179 	}
3180 
3181 	ldcp->tstate &= ~TS_QCONF_RDY;
3182 
3183 	/* Reset channel state information */
3184 	i_ldc_reset_state(ldcp);
3185 
3186 	/* Mark channel as down and in initialized state */
3187 	ldcp->tx_ackd_head = 0;
3188 	ldcp->tx_head = 0;
3189 	ldcp->tstate = TS_IN_RESET|TS_INIT;
3190 	ldcp->status = LDC_INIT;
3191 
3192 	mutex_exit(&ldcp->tx_lock);
3193 	mutex_exit(&ldcp->lock);
3194 
3195 	/* Decrement number of open channels */
3196 	mutex_enter(&ldcssp->lock);
3197 	ldcssp->channels_open--;
3198 	mutex_exit(&ldcssp->lock);
3199 
3200 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
3201 
3202 	return (0);
3203 }
3204 
3205 /*
3206  * Register channel callback
3207  */
3208 int
3209 ldc_reg_callback(ldc_handle_t handle,
3210     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
3211 {
3212 	ldc_chan_t *ldcp;
3213 
3214 	if (handle == NULL) {
3215 		DWARN(DBG_ALL_LDCS,
3216 		    "ldc_reg_callback: invalid channel handle\n");
3217 		return (EINVAL);
3218 	}
3219 	if (((uint64_t)cb) < KERNELBASE) {
3220 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
3221 		return (EINVAL);
3222 	}
3223 	ldcp = (ldc_chan_t *)handle;
3224 
3225 	mutex_enter(&ldcp->lock);
3226 
3227 	if (ldcp->cb) {
3228 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
3229 		    ldcp->id);
3230 		mutex_exit(&ldcp->lock);
3231 		return (EIO);
3232 	}
3233 	if (ldcp->cb_inprogress) {
3234 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
3235 		    ldcp->id);
3236 		mutex_exit(&ldcp->lock);
3237 		return (EWOULDBLOCK);
3238 	}
3239 
3240 	ldcp->cb = cb;
3241 	ldcp->cb_arg = arg;
3242 	ldcp->cb_enabled = B_TRUE;
3243 
3244 	D1(ldcp->id,
3245 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
3246 	    ldcp->id);
3247 
3248 	mutex_exit(&ldcp->lock);
3249 
3250 	return (0);
3251 }
3252 
3253 /*
3254  * Unregister channel callback
3255  */
3256 int
3257 ldc_unreg_callback(ldc_handle_t handle)
3258 {
3259 	ldc_chan_t *ldcp;
3260 
3261 	if (handle == NULL) {
3262 		DWARN(DBG_ALL_LDCS,
3263 		    "ldc_unreg_callback: invalid channel handle\n");
3264 		return (EINVAL);
3265 	}
3266 	ldcp = (ldc_chan_t *)handle;
3267 
3268 	mutex_enter(&ldcp->lock);
3269 
3270 	if (ldcp->cb == NULL) {
3271 		DWARN(ldcp->id,
3272 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
3273 		    ldcp->id);
3274 		mutex_exit(&ldcp->lock);
3275 		return (EIO);
3276 	}
3277 	if (ldcp->cb_inprogress) {
3278 		DWARN(ldcp->id,
3279 		    "ldc_unreg_callback: (0x%llx) callback active\n",
3280 		    ldcp->id);
3281 		mutex_exit(&ldcp->lock);
3282 		return (EWOULDBLOCK);
3283 	}
3284 
3285 	ldcp->cb = NULL;
3286 	ldcp->cb_arg = NULL;
3287 	ldcp->cb_enabled = B_FALSE;
3288 
3289 	D1(ldcp->id,
3290 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
3291 	    ldcp->id);
3292 
3293 	mutex_exit(&ldcp->lock);
3294 
3295 	return (0);
3296 }
3297 
3298 
3299 /*
3300  * Bring a channel up by initiating a handshake with the peer
3301  * This call is asynchronous. It will complete at a later point
3302  * in time when the peer responds back with an RTR.
3303  */
3304 int
3305 ldc_up(ldc_handle_t handle)
3306 {
3307 	int 		rv;
3308 	ldc_chan_t 	*ldcp;
3309 	ldc_msg_t 	*ldcmsg;
3310 	uint64_t 	tx_tail, tstate, link_state;
3311 
3312 	if (handle == NULL) {
3313 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
3314 		return (EINVAL);
3315 	}
3316 	ldcp = (ldc_chan_t *)handle;
3317 
3318 	mutex_enter(&ldcp->lock);
3319 
3320 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
3321 
3322 	/* clear the reset state */
3323 	tstate = ldcp->tstate;
3324 	ldcp->tstate &= ~TS_IN_RESET;
3325 
3326 	if (ldcp->tstate == TS_UP) {
3327 		DWARN(ldcp->id,
3328 		    "ldc_up: (0x%llx) channel is already in UP state\n",
3329 		    ldcp->id);
3330 
3331 		/* mark channel as up */
3332 		ldcp->status = LDC_UP;
3333 
3334 		/*
3335 		 * if channel was in reset state and there was
3336 		 * pending data clear interrupt state. this will
3337 		 * trigger an interrupt, causing the RX handler to
3338 		 * to invoke the client's callback
3339 		 */
3340 		if ((tstate & TS_IN_RESET) &&
3341 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
3342 			D1(ldcp->id,
3343 			    "ldc_up: (0x%llx) channel has pending data, "
3344 			    "clearing interrupt\n", ldcp->id);
3345 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3346 		}
3347 
3348 		mutex_exit(&ldcp->lock);
3349 		return (0);
3350 	}
3351 
3352 	/* if the channel is in RAW mode - mark it as UP, if READY */
3353 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
3354 		ldcp->tstate = TS_UP;
3355 		mutex_exit(&ldcp->lock);
3356 		return (0);
3357 	}
3358 
3359 	/* Don't start another handshake if there is one in progress */
3360 	if (ldcp->hstate) {
3361 		D1(ldcp->id,
3362 		    "ldc_up: (0x%llx) channel handshake in progress\n",
3363 		    ldcp->id);
3364 		mutex_exit(&ldcp->lock);
3365 		return (0);
3366 	}
3367 
3368 	mutex_enter(&ldcp->tx_lock);
3369 
3370 	/* save current link state */
3371 	link_state = ldcp->link_state;
3372 
3373 	/* get the current tail for the LDC msg */
3374 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
3375 	if (rv) {
3376 		D1(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
3377 		    ldcp->id);
3378 		mutex_exit(&ldcp->tx_lock);
3379 		mutex_exit(&ldcp->lock);
3380 		return (ECONNREFUSED);
3381 	}
3382 
3383 	/*
3384 	 * If i_ldc_get_tx_tail() changed link_state to either RESET or UP,
3385 	 * from a previous state of DOWN, then mark the channel as
3386 	 * being ready for handshake.
3387 	 */
3388 	if ((link_state == LDC_CHANNEL_DOWN) &&
3389 	    (link_state != ldcp->link_state)) {
3390 
3391 		ASSERT((ldcp->link_state == LDC_CHANNEL_RESET) ||
3392 		    (ldcp->link_state == LDC_CHANNEL_UP));
3393 
3394 		if (ldcp->mode == LDC_MODE_RAW) {
3395 			ldcp->status = LDC_UP;
3396 			ldcp->tstate = TS_UP;
3397 			mutex_exit(&ldcp->tx_lock);
3398 			mutex_exit(&ldcp->lock);
3399 			return (0);
3400 		} else {
3401 			ldcp->status = LDC_READY;
3402 			ldcp->tstate |= TS_LINK_READY;
3403 		}
3404 
3405 	}
3406 
3407 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3408 	ZERO_PKT(ldcmsg);
3409 
3410 	ldcmsg->type = LDC_CTRL;
3411 	ldcmsg->stype = LDC_INFO;
3412 	ldcmsg->ctrl = LDC_VER;
3413 	ldcp->next_vidx = 0;
3414 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
3415 
3416 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
3417 
3418 	/* initiate the send by calling into HV and set the new tail */
3419 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
3420 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3421 
3422 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3423 	if (rv) {
3424 		DWARN(ldcp->id,
3425 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
3426 		    ldcp->id, rv);
3427 		mutex_exit(&ldcp->tx_lock);
3428 		mutex_exit(&ldcp->lock);
3429 		return (rv);
3430 	}
3431 
3432 	ldcp->hstate |= TS_SENT_VER;
3433 	ldcp->tx_tail = tx_tail;
3434 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
3435 
3436 	mutex_exit(&ldcp->tx_lock);
3437 	mutex_exit(&ldcp->lock);
3438 
3439 	return (rv);
3440 }
3441 
3442 
3443 /*
3444  * Bring a channel down by resetting its state and queues
3445  */
3446 int
3447 ldc_down(ldc_handle_t handle)
3448 {
3449 	ldc_chan_t 	*ldcp;
3450 
3451 	if (handle == NULL) {
3452 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
3453 		return (EINVAL);
3454 	}
3455 	ldcp = (ldc_chan_t *)handle;
3456 	mutex_enter(&ldcp->lock);
3457 	mutex_enter(&ldcp->tx_lock);
3458 	i_ldc_reset(ldcp, B_TRUE);
3459 	mutex_exit(&ldcp->tx_lock);
3460 	mutex_exit(&ldcp->lock);
3461 
3462 	return (0);
3463 }
3464 
3465 /*
3466  * Get the current channel status
3467  */
3468 int
3469 ldc_status(ldc_handle_t handle, ldc_status_t *status)
3470 {
3471 	ldc_chan_t *ldcp;
3472 
3473 	if (handle == NULL || status == NULL) {
3474 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
3475 		return (EINVAL);
3476 	}
3477 	ldcp = (ldc_chan_t *)handle;
3478 
3479 	*status = ((ldc_chan_t *)handle)->status;
3480 
3481 	D1(ldcp->id,
3482 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
3483 	return (0);
3484 }
3485 
3486 
3487 /*
3488  * Set the channel's callback mode - enable/disable callbacks
3489  */
3490 int
3491 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3492 {
3493 	ldc_chan_t 	*ldcp;
3494 
3495 	if (handle == NULL) {
3496 		DWARN(DBG_ALL_LDCS,
3497 		    "ldc_set_intr_mode: invalid channel handle\n");
3498 		return (EINVAL);
3499 	}
3500 	ldcp = (ldc_chan_t *)handle;
3501 
3502 	/*
3503 	 * Record no callbacks should be invoked
3504 	 */
3505 	mutex_enter(&ldcp->lock);
3506 
3507 	switch (cmode) {
3508 	case LDC_CB_DISABLE:
3509 		if (!ldcp->cb_enabled) {
3510 			DWARN(ldcp->id,
3511 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3512 			    ldcp->id);
3513 			break;
3514 		}
3515 		ldcp->cb_enabled = B_FALSE;
3516 
3517 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3518 		    ldcp->id);
3519 		break;
3520 
3521 	case LDC_CB_ENABLE:
3522 		if (ldcp->cb_enabled) {
3523 			DWARN(ldcp->id,
3524 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3525 			    ldcp->id);
3526 			break;
3527 		}
3528 		ldcp->cb_enabled = B_TRUE;
3529 
3530 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3531 		    ldcp->id);
3532 		break;
3533 	}
3534 
3535 	mutex_exit(&ldcp->lock);
3536 
3537 	return (0);
3538 }
3539 
3540 /*
3541  * Check to see if there are packets on the incoming queue
3542  * Will return hasdata = B_FALSE if there are no packets
3543  */
3544 int
3545 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3546 {
3547 	int 		rv;
3548 	uint64_t 	rx_head, rx_tail;
3549 	ldc_chan_t 	*ldcp;
3550 
3551 	if (handle == NULL) {
3552 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3553 		return (EINVAL);
3554 	}
3555 	ldcp = (ldc_chan_t *)handle;
3556 
3557 	*hasdata = B_FALSE;
3558 
3559 	mutex_enter(&ldcp->lock);
3560 
3561 	if (ldcp->tstate != TS_UP) {
3562 		D1(ldcp->id,
3563 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3564 		mutex_exit(&ldcp->lock);
3565 		return (ECONNRESET);
3566 	}
3567 
3568 	/* Read packet(s) from the queue */
3569 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3570 	    &ldcp->link_state);
3571 	if (rv != 0) {
3572 		cmn_err(CE_WARN,
3573 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3574 		mutex_exit(&ldcp->lock);
3575 		return (EIO);
3576 	}
3577 
3578 	/* reset the channel state if the channel went down */
3579 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3580 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3581 		mutex_enter(&ldcp->tx_lock);
3582 		i_ldc_reset(ldcp, B_FALSE);
3583 		mutex_exit(&ldcp->tx_lock);
3584 		mutex_exit(&ldcp->lock);
3585 		return (ECONNRESET);
3586 	}
3587 
3588 	switch (ldcp->mode) {
3589 	case LDC_MODE_RAW:
3590 		/*
3591 		 * In raw mode, there are no ctrl packets, so checking
3592 		 * if the queue is non-empty is sufficient.
3593 		 */
3594 		*hasdata = (rx_head != rx_tail);
3595 		break;
3596 
3597 	case LDC_MODE_UNRELIABLE:
3598 		/*
3599 		 * In unreliable mode, if the queue is non-empty, we need
3600 		 * to check if it actually contains unread data packets.
3601 		 * The queue may just contain ctrl packets.
3602 		 */
3603 		if (rx_head != rx_tail)
3604 			*hasdata = (i_ldc_chkq(ldcp) == 0);
3605 		break;
3606 
3607 	case LDC_MODE_STREAM:
3608 		/*
3609 		 * In stream mode, first check for 'stream_remains' > 0.
3610 		 * Otherwise, if the data queue head and tail pointers
3611 		 * differ, there must be data to read.
3612 		 */
3613 		if (ldcp->stream_remains > 0)
3614 			*hasdata = B_TRUE;
3615 		else
3616 			*hasdata = (ldcp->rx_dq_head != ldcp->rx_dq_tail);
3617 		break;
3618 
3619 	default:
3620 		cmn_err(CE_WARN, "ldc_chkq: (0x%lx) unexpected channel mode "
3621 		    "(0x%x)", ldcp->id, ldcp->mode);
3622 		mutex_exit(&ldcp->lock);
3623 		return (EIO);
3624 	}
3625 
3626 	mutex_exit(&ldcp->lock);
3627 
3628 	return (0);
3629 }
3630 
3631 
3632 /*
3633  * Read 'size' amount of bytes or less. If incoming buffer
3634  * is more than 'size', ENOBUFS is returned.
3635  *
3636  * On return, size contains the number of bytes read.
3637  */
3638 int
3639 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3640 {
3641 	ldc_chan_t 	*ldcp;
3642 	uint64_t 	rx_head = 0, rx_tail = 0;
3643 	int		rv = 0, exit_val;
3644 
3645 	if (handle == NULL) {
3646 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3647 		return (EINVAL);
3648 	}
3649 
3650 	ldcp = (ldc_chan_t *)handle;
3651 
3652 	/* channel lock */
3653 	mutex_enter(&ldcp->lock);
3654 
3655 	if (ldcp->tstate != TS_UP) {
3656 		DWARN(ldcp->id,
3657 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3658 		    ldcp->id);
3659 		exit_val = ECONNRESET;
3660 	} else if (ldcp->mode == LDC_MODE_STREAM) {
3661 		TRACE_RXDQ_LENGTH(ldcp);
3662 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3663 		mutex_exit(&ldcp->lock);
3664 		return (exit_val);
3665 	} else {
3666 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3667 	}
3668 
3669 	/*
3670 	 * if queue has been drained - clear interrupt
3671 	 */
3672 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3673 	    &ldcp->link_state);
3674 	if (rv != 0) {
3675 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3676 		    ldcp->id);
3677 		mutex_enter(&ldcp->tx_lock);
3678 		i_ldc_reset(ldcp, B_TRUE);
3679 		mutex_exit(&ldcp->tx_lock);
3680 		mutex_exit(&ldcp->lock);
3681 		return (ECONNRESET);
3682 	}
3683 
3684 	if (exit_val == 0) {
3685 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3686 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3687 			mutex_enter(&ldcp->tx_lock);
3688 			i_ldc_reset(ldcp, B_FALSE);
3689 			exit_val = ECONNRESET;
3690 			mutex_exit(&ldcp->tx_lock);
3691 		}
3692 		if ((rv == 0) &&
3693 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3694 		    (rx_head == rx_tail)) {
3695 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3696 		}
3697 	}
3698 
3699 	mutex_exit(&ldcp->lock);
3700 	return (exit_val);
3701 }
3702 
3703 /*
3704  * Basic raw mondo read -
3705  * no interpretation of mondo contents at all.
3706  *
3707  * Enter and exit with ldcp->lock held by caller
3708  */
3709 static int
3710 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3711 {
3712 	uint64_t 	q_size_mask;
3713 	ldc_msg_t 	*msgp;
3714 	uint8_t		*msgbufp;
3715 	int		rv = 0, space;
3716 	uint64_t 	rx_head, rx_tail;
3717 
3718 	space = *sizep;
3719 
3720 	if (space < LDC_PAYLOAD_SIZE_RAW)
3721 		return (ENOBUFS);
3722 
3723 	ASSERT(mutex_owned(&ldcp->lock));
3724 
3725 	/* compute mask for increment */
3726 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3727 
3728 	/*
3729 	 * Read packet(s) from the queue
3730 	 */
3731 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3732 	    &ldcp->link_state);
3733 	if (rv != 0) {
3734 		cmn_err(CE_WARN,
3735 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3736 		    ldcp->id);
3737 		return (EIO);
3738 	}
3739 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3740 	    " rxt=0x%llx, st=0x%llx\n",
3741 	    ldcp->id, rx_head, rx_tail, ldcp->link_state);
3742 
3743 	/* reset the channel state if the channel went down */
3744 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3745 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3746 		mutex_enter(&ldcp->tx_lock);
3747 		i_ldc_reset(ldcp, B_FALSE);
3748 		mutex_exit(&ldcp->tx_lock);
3749 		return (ECONNRESET);
3750 	}
3751 
3752 	/*
3753 	 * Check for empty queue
3754 	 */
3755 	if (rx_head == rx_tail) {
3756 		*sizep = 0;
3757 		return (0);
3758 	}
3759 
3760 	/* get the message */
3761 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3762 
3763 	/* if channel is in RAW mode, copy data and return */
3764 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3765 
3766 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3767 
3768 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3769 
3770 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3771 
3772 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3773 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3774 
3775 	return (rv);
3776 }
3777 
3778 /*
3779  * Process LDC mondos to build larger packets
3780  * with either un-reliable or reliable delivery.
3781  *
3782  * Enter and exit with ldcp->lock held by caller
3783  */
3784 static int
3785 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3786 {
3787 	int		rv = 0;
3788 	uint64_t 	rx_head = 0, rx_tail = 0;
3789 	uint64_t 	curr_head = 0;
3790 	ldc_msg_t 	*msg;
3791 	caddr_t 	target;
3792 	size_t 		len = 0, bytes_read = 0;
3793 	int 		retries = 0;
3794 	uint64_t 	q_va, q_size_mask;
3795 	uint64_t	first_fragment = 0;
3796 
3797 	target = target_bufp;
3798 
3799 	ASSERT(mutex_owned(&ldcp->lock));
3800 
3801 	/* check if the buffer and size are valid */
3802 	if (target_bufp == NULL || *sizep == 0) {
3803 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3804 		    ldcp->id);
3805 		return (EINVAL);
3806 	}
3807 
3808 	/* Set q_va and compute increment mask for the appropriate queue */
3809 	if (ldcp->mode == LDC_MODE_STREAM) {
3810 		q_va	    = ldcp->rx_dq_va;
3811 		q_size_mask = (ldcp->rx_dq_entries-1)<<LDC_PACKET_SHIFT;
3812 	} else {
3813 		q_va	    = ldcp->rx_q_va;
3814 		q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3815 	}
3816 
3817 	/*
3818 	 * Read packet(s) from the queue
3819 	 */
3820 	rv = ldcp->readq_get_state(ldcp, &curr_head, &rx_tail,
3821 	    &ldcp->link_state);
3822 	if (rv != 0) {
3823 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3824 		    ldcp->id);
3825 		mutex_enter(&ldcp->tx_lock);
3826 		i_ldc_reset(ldcp, B_TRUE);
3827 		mutex_exit(&ldcp->tx_lock);
3828 		return (ECONNRESET);
3829 	}
3830 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3831 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3832 
3833 	/* reset the channel state if the channel went down */
3834 	if (ldcp->link_state != LDC_CHANNEL_UP)
3835 		goto channel_is_reset;
3836 
3837 	for (;;) {
3838 
3839 		if (curr_head == rx_tail) {
3840 			/*
3841 			 * If a data queue is being used, check the Rx HV
3842 			 * queue. This will copy over any new data packets
3843 			 * that have arrived.
3844 			 */
3845 			if (ldcp->mode == LDC_MODE_STREAM)
3846 				(void) i_ldc_chkq(ldcp);
3847 
3848 			rv = ldcp->readq_get_state(ldcp,
3849 			    &rx_head, &rx_tail, &ldcp->link_state);
3850 			if (rv != 0) {
3851 				cmn_err(CE_WARN,
3852 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3853 				    ldcp->id);
3854 				mutex_enter(&ldcp->tx_lock);
3855 				i_ldc_reset(ldcp, B_TRUE);
3856 				mutex_exit(&ldcp->tx_lock);
3857 				return (ECONNRESET);
3858 			}
3859 
3860 			if (ldcp->link_state != LDC_CHANNEL_UP)
3861 				goto channel_is_reset;
3862 
3863 			if (curr_head == rx_tail) {
3864 
3865 				/* If in the middle of a fragmented xfer */
3866 				if (first_fragment != 0) {
3867 
3868 					/* wait for ldc_delay usecs */
3869 					drv_usecwait(ldc_delay);
3870 
3871 					if (++retries < ldc_max_retries)
3872 						continue;
3873 
3874 					*sizep = 0;
3875 					if (ldcp->mode != LDC_MODE_STREAM)
3876 						ldcp->last_msg_rcd =
3877 						    first_fragment - 1;
3878 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3879 					    "(0x%llx) read timeout", ldcp->id);
3880 					return (EAGAIN);
3881 				}
3882 				*sizep = 0;
3883 				break;
3884 			}
3885 		}
3886 		retries = 0;
3887 
3888 		D2(ldcp->id,
3889 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3890 		    ldcp->id, curr_head, rx_head, rx_tail);
3891 
3892 		/* get the message */
3893 		msg = (ldc_msg_t *)(q_va + curr_head);
3894 
3895 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3896 		    ldcp->rx_q_va + curr_head);
3897 
3898 		/* Check the message ID for the message received */
3899 		if (ldcp->mode != LDC_MODE_STREAM) {
3900 			if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3901 
3902 				DWARN(ldcp->id, "ldc_read: (0x%llx) seqid "
3903 				    "error, q_ptrs=0x%lx,0x%lx",
3904 				    ldcp->id, rx_head, rx_tail);
3905 
3906 				/* throw away data */
3907 				bytes_read = 0;
3908 
3909 				/* Reset last_msg_rcd to start of message */
3910 				if (first_fragment != 0) {
3911 					ldcp->last_msg_rcd = first_fragment - 1;
3912 					first_fragment = 0;
3913 				}
3914 				/*
3915 				 * Send a NACK -- invalid seqid
3916 				 * get the current tail for the response
3917 				 */
3918 				rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3919 				    (msg->ctrl & LDC_CTRL_MASK));
3920 				if (rv) {
3921 					cmn_err(CE_NOTE,
3922 					    "ldc_read: (0x%lx) err sending "
3923 					    "NACK msg\n", ldcp->id);
3924 
3925 					/* if cannot send NACK - reset chan */
3926 					mutex_enter(&ldcp->tx_lock);
3927 					i_ldc_reset(ldcp, B_FALSE);
3928 					mutex_exit(&ldcp->tx_lock);
3929 					rv = ECONNRESET;
3930 					break;
3931 				}
3932 
3933 				/* purge receive queue */
3934 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3935 
3936 				break;
3937 			}
3938 
3939 			/*
3940 			 * Process any messages of type CTRL messages
3941 			 * Future implementations should try to pass these
3942 			 * to LDC link by resetting the intr state.
3943 			 *
3944 			 * NOTE: not done as a switch() as type can be
3945 			 * both ctrl+data
3946 			 */
3947 			if (msg->type & LDC_CTRL) {
3948 				if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3949 					if (rv == EAGAIN)
3950 						continue;
3951 					rv = i_ldc_set_rx_head(ldcp, rx_tail);
3952 					*sizep = 0;
3953 					bytes_read = 0;
3954 					break;
3955 				}
3956 			}
3957 
3958 			/* process data ACKs */
3959 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3960 				if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3961 					*sizep = 0;
3962 					bytes_read = 0;
3963 					break;
3964 				}
3965 			}
3966 
3967 			/* process data NACKs */
3968 			if ((msg->type & LDC_DATA) && (msg->stype & LDC_NACK)) {
3969 				DWARN(ldcp->id,
3970 				    "ldc_read: (0x%llx) received DATA/NACK",
3971 				    ldcp->id);
3972 				mutex_enter(&ldcp->tx_lock);
3973 				i_ldc_reset(ldcp, B_TRUE);
3974 				mutex_exit(&ldcp->tx_lock);
3975 				return (ECONNRESET);
3976 			}
3977 		}
3978 
3979 		/* process data messages */
3980 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3981 
3982 			uint8_t *msgbuf = (uint8_t *)(
3983 			    (ldcp->mode == LDC_MODE_RELIABLE ||
3984 			    ldcp->mode == LDC_MODE_STREAM) ?
3985 			    msg->rdata : msg->udata);
3986 
3987 			D2(ldcp->id,
3988 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3989 
3990 			/* get the packet length */
3991 			len = (msg->env & LDC_LEN_MASK);
3992 
3993 				/*
3994 				 * FUTURE OPTIMIZATION:
3995 				 * dont need to set q head for every
3996 				 * packet we read just need to do this when
3997 				 * we are done or need to wait for more
3998 				 * mondos to make a full packet - this is
3999 				 * currently expensive.
4000 				 */
4001 
4002 			if (first_fragment == 0) {
4003 
4004 				/*
4005 				 * first packets should always have the start
4006 				 * bit set (even for a single packet). If not
4007 				 * throw away the packet
4008 				 */
4009 				if (!(msg->env & LDC_FRAG_START)) {
4010 
4011 					DWARN(DBG_ALL_LDCS,
4012 					    "ldc_read: (0x%llx) not start - "
4013 					    "frag=%x\n", ldcp->id,
4014 					    (msg->env) & LDC_FRAG_MASK);
4015 
4016 					/* toss pkt, inc head, cont reading */
4017 					bytes_read = 0;
4018 					target = target_bufp;
4019 					curr_head =
4020 					    (curr_head + LDC_PACKET_SIZE)
4021 					    & q_size_mask;
4022 					if (rv = ldcp->readq_set_head(ldcp,
4023 					    curr_head))
4024 						break;
4025 
4026 					continue;
4027 				}
4028 
4029 				first_fragment = msg->seqid;
4030 			} else {
4031 				/* check to see if this is a pkt w/ START bit */
4032 				if (msg->env & LDC_FRAG_START) {
4033 					DWARN(DBG_ALL_LDCS,
4034 					    "ldc_read:(0x%llx) unexpected pkt"
4035 					    " env=0x%x discarding %d bytes,"
4036 					    " lastmsg=%d, currentmsg=%d\n",
4037 					    ldcp->id, msg->env&LDC_FRAG_MASK,
4038 					    bytes_read, ldcp->last_msg_rcd,
4039 					    msg->seqid);
4040 
4041 					/* throw data we have read so far */
4042 					bytes_read = 0;
4043 					target = target_bufp;
4044 					first_fragment = msg->seqid;
4045 
4046 					if (rv = ldcp->readq_set_head(ldcp,
4047 					    curr_head))
4048 						break;
4049 				}
4050 			}
4051 
4052 			/* copy (next) pkt into buffer */
4053 			if (len <= (*sizep - bytes_read)) {
4054 				bcopy(msgbuf, target, len);
4055 				target += len;
4056 				bytes_read += len;
4057 			} else {
4058 				/*
4059 				 * there is not enough space in the buffer to
4060 				 * read this pkt. throw message away & continue
4061 				 * reading data from queue
4062 				 */
4063 				DWARN(DBG_ALL_LDCS,
4064 				    "ldc_read: (0x%llx) buffer too small, "
4065 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
4066 				    curr_head, *sizep, bytes_read+len);
4067 
4068 				first_fragment = 0;
4069 				target = target_bufp;
4070 				bytes_read = 0;
4071 
4072 				/* throw away everything received so far */
4073 				if (rv = ldcp->readq_set_head(ldcp, curr_head))
4074 					break;
4075 
4076 				/* continue reading remaining pkts */
4077 				continue;
4078 			}
4079 		}
4080 
4081 		/* set the message id */
4082 		if (ldcp->mode != LDC_MODE_STREAM)
4083 			ldcp->last_msg_rcd = msg->seqid;
4084 
4085 		/* move the head one position */
4086 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
4087 
4088 		if (msg->env & LDC_FRAG_STOP) {
4089 
4090 			/*
4091 			 * All pkts that are part of this fragmented transfer
4092 			 * have been read or this was a single pkt read
4093 			 * or there was an error
4094 			 */
4095 
4096 			/* set the queue head */
4097 			if (rv = ldcp->readq_set_head(ldcp, curr_head))
4098 				bytes_read = 0;
4099 
4100 			*sizep = bytes_read;
4101 
4102 			break;
4103 		}
4104 
4105 		/* advance head if it is a CTRL packet or a DATA ACK packet */
4106 		if ((msg->type & LDC_CTRL) ||
4107 		    ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK))) {
4108 
4109 			/* set the queue head */
4110 			if (rv = ldcp->readq_set_head(ldcp, curr_head)) {
4111 				bytes_read = 0;
4112 				break;
4113 			}
4114 
4115 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
4116 			    ldcp->id, curr_head);
4117 		}
4118 
4119 	} /* for (;;) */
4120 
4121 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
4122 
4123 	return (rv);
4124 
4125 channel_is_reset:
4126 	mutex_enter(&ldcp->tx_lock);
4127 	i_ldc_reset(ldcp, B_FALSE);
4128 	mutex_exit(&ldcp->tx_lock);
4129 	return (ECONNRESET);
4130 }
4131 
4132 /*
4133  * Use underlying reliable packet mechanism to fetch
4134  * and buffer incoming packets so we can hand them back as
4135  * a basic byte stream.
4136  *
4137  * Enter and exit with ldcp->lock held by caller
4138  */
4139 static int
4140 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
4141 {
4142 	int	rv;
4143 	size_t	size;
4144 
4145 	ASSERT(mutex_owned(&ldcp->lock));
4146 
4147 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
4148 	    ldcp->id, *sizep);
4149 
4150 	if (ldcp->stream_remains == 0) {
4151 		size = ldcp->mtu;
4152 		rv = i_ldc_read_packet(ldcp,
4153 		    (caddr_t)ldcp->stream_bufferp, &size);
4154 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
4155 		    ldcp->id, size);
4156 
4157 		if (rv != 0)
4158 			return (rv);
4159 
4160 		ldcp->stream_remains = size;
4161 		ldcp->stream_offset = 0;
4162 	}
4163 
4164 	size = MIN(ldcp->stream_remains, *sizep);
4165 
4166 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
4167 	ldcp->stream_offset += size;
4168 	ldcp->stream_remains -= size;
4169 
4170 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
4171 	    ldcp->id, size);
4172 
4173 	*sizep = size;
4174 	return (0);
4175 }
4176 
4177 /*
4178  * Write specified amount of bytes to the channel
4179  * in multiple pkts of pkt_payload size. Each
4180  * packet is tagged with an unique packet ID in
4181  * the case of a reliable link.
4182  *
4183  * On return, size contains the number of bytes written.
4184  */
4185 int
4186 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
4187 {
4188 	ldc_chan_t	*ldcp;
4189 	int		rv = 0;
4190 
4191 	if (handle == NULL) {
4192 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
4193 		return (EINVAL);
4194 	}
4195 	ldcp = (ldc_chan_t *)handle;
4196 
4197 	/* check if writes can occur */
4198 	if (!mutex_tryenter(&ldcp->tx_lock)) {
4199 		/*
4200 		 * Could not get the lock - channel could
4201 		 * be in the process of being unconfigured
4202 		 * or reader has encountered an error
4203 		 */
4204 		return (EAGAIN);
4205 	}
4206 
4207 	/* check if non-zero data to write */
4208 	if (buf == NULL || sizep == NULL) {
4209 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
4210 		    ldcp->id);
4211 		mutex_exit(&ldcp->tx_lock);
4212 		return (EINVAL);
4213 	}
4214 
4215 	if (*sizep == 0) {
4216 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
4217 		    ldcp->id);
4218 		mutex_exit(&ldcp->tx_lock);
4219 		return (0);
4220 	}
4221 
4222 	/* Check if channel is UP for data exchange */
4223 	if (ldcp->tstate != TS_UP) {
4224 		DWARN(ldcp->id,
4225 		    "ldc_write: (0x%llx) channel is not in UP state\n",
4226 		    ldcp->id);
4227 		*sizep = 0;
4228 		rv = ECONNRESET;
4229 	} else {
4230 		rv = ldcp->write_p(ldcp, buf, sizep);
4231 	}
4232 
4233 	mutex_exit(&ldcp->tx_lock);
4234 
4235 	return (rv);
4236 }
4237 
4238 /*
4239  * Write a raw packet to the channel
4240  * On return, size contains the number of bytes written.
4241  */
4242 static int
4243 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4244 {
4245 	ldc_msg_t 	*ldcmsg;
4246 	uint64_t 	tx_head, tx_tail, new_tail;
4247 	int		rv = 0;
4248 	size_t		size;
4249 
4250 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4251 	ASSERT(ldcp->mode == LDC_MODE_RAW);
4252 
4253 	size = *sizep;
4254 
4255 	/*
4256 	 * Check to see if the packet size is less than or
4257 	 * equal to packet size support in raw mode
4258 	 */
4259 	if (size > ldcp->pkt_payload) {
4260 		DWARN(ldcp->id,
4261 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
4262 		    ldcp->id, *sizep);
4263 		*sizep = 0;
4264 		return (EMSGSIZE);
4265 	}
4266 
4267 	/* get the qptrs for the tx queue */
4268 	rv = hv_ldc_tx_get_state(ldcp->id,
4269 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4270 	if (rv != 0) {
4271 		cmn_err(CE_WARN,
4272 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4273 		*sizep = 0;
4274 		return (EIO);
4275 	}
4276 
4277 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4278 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4279 		DWARN(ldcp->id,
4280 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4281 
4282 		*sizep = 0;
4283 		if (mutex_tryenter(&ldcp->lock)) {
4284 			i_ldc_reset(ldcp, B_FALSE);
4285 			mutex_exit(&ldcp->lock);
4286 		} else {
4287 			/*
4288 			 * Release Tx lock, and then reacquire channel
4289 			 * and Tx lock in correct order
4290 			 */
4291 			mutex_exit(&ldcp->tx_lock);
4292 			mutex_enter(&ldcp->lock);
4293 			mutex_enter(&ldcp->tx_lock);
4294 			i_ldc_reset(ldcp, B_FALSE);
4295 			mutex_exit(&ldcp->lock);
4296 		}
4297 		return (ECONNRESET);
4298 	}
4299 
4300 	tx_tail = ldcp->tx_tail;
4301 	tx_head = ldcp->tx_head;
4302 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
4303 	    ((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
4304 
4305 	if (new_tail == tx_head) {
4306 		DWARN(DBG_ALL_LDCS,
4307 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4308 		*sizep = 0;
4309 		return (EWOULDBLOCK);
4310 	}
4311 
4312 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4313 	    ldcp->id, size);
4314 
4315 	/* Send the data now */
4316 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4317 
4318 	/* copy the data into pkt */
4319 	bcopy((uint8_t *)buf, ldcmsg, size);
4320 
4321 	/* increment tail */
4322 	tx_tail = new_tail;
4323 
4324 	/*
4325 	 * All packets have been copied into the TX queue
4326 	 * update the tail ptr in the HV
4327 	 */
4328 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4329 	if (rv) {
4330 		if (rv == EWOULDBLOCK) {
4331 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
4332 			    ldcp->id);
4333 			*sizep = 0;
4334 			return (EWOULDBLOCK);
4335 		}
4336 
4337 		*sizep = 0;
4338 		if (mutex_tryenter(&ldcp->lock)) {
4339 			i_ldc_reset(ldcp, B_FALSE);
4340 			mutex_exit(&ldcp->lock);
4341 		} else {
4342 			/*
4343 			 * Release Tx lock, and then reacquire channel
4344 			 * and Tx lock in correct order
4345 			 */
4346 			mutex_exit(&ldcp->tx_lock);
4347 			mutex_enter(&ldcp->lock);
4348 			mutex_enter(&ldcp->tx_lock);
4349 			i_ldc_reset(ldcp, B_FALSE);
4350 			mutex_exit(&ldcp->lock);
4351 		}
4352 		return (ECONNRESET);
4353 	}
4354 
4355 	ldcp->tx_tail = tx_tail;
4356 	*sizep = size;
4357 
4358 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
4359 
4360 	return (rv);
4361 }
4362 
4363 
4364 /*
4365  * Write specified amount of bytes to the channel
4366  * in multiple pkts of pkt_payload size. Each
4367  * packet is tagged with an unique packet ID in
4368  * the case of a reliable link.
4369  *
4370  * On return, size contains the number of bytes written.
4371  * This function needs to ensure that the write size is < MTU size
4372  */
4373 static int
4374 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
4375 {
4376 	ldc_msg_t 	*ldcmsg;
4377 	uint64_t 	tx_head, tx_tail, new_tail, start;
4378 	uint64_t	txq_size_mask, numavail;
4379 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
4380 	size_t 		len, bytes_written = 0, remaining;
4381 	int		rv;
4382 	uint32_t	curr_seqid;
4383 
4384 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4385 
4386 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
4387 	    ldcp->mode == LDC_MODE_UNRELIABLE ||
4388 	    ldcp->mode == LDC_MODE_STREAM);
4389 
4390 	/* compute mask for increment */
4391 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
4392 
4393 	/* get the qptrs for the tx queue */
4394 	rv = hv_ldc_tx_get_state(ldcp->id,
4395 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
4396 	if (rv != 0) {
4397 		cmn_err(CE_WARN,
4398 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
4399 		*size = 0;
4400 		return (EIO);
4401 	}
4402 
4403 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4404 	    ldcp->link_state == LDC_CHANNEL_RESET) {
4405 		DWARN(ldcp->id,
4406 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
4407 		*size = 0;
4408 		if (mutex_tryenter(&ldcp->lock)) {
4409 			i_ldc_reset(ldcp, B_FALSE);
4410 			mutex_exit(&ldcp->lock);
4411 		} else {
4412 			/*
4413 			 * Release Tx lock, and then reacquire channel
4414 			 * and Tx lock in correct order
4415 			 */
4416 			mutex_exit(&ldcp->tx_lock);
4417 			mutex_enter(&ldcp->lock);
4418 			mutex_enter(&ldcp->tx_lock);
4419 			i_ldc_reset(ldcp, B_FALSE);
4420 			mutex_exit(&ldcp->lock);
4421 		}
4422 		return (ECONNRESET);
4423 	}
4424 
4425 	tx_tail = ldcp->tx_tail;
4426 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
4427 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT);
4428 
4429 	/*
4430 	 * Check to see if the queue is full. The check is done using
4431 	 * the appropriate head based on the link mode.
4432 	 */
4433 	i_ldc_get_tx_head(ldcp, &tx_head);
4434 
4435 	if (new_tail == tx_head) {
4436 		DWARN(DBG_ALL_LDCS,
4437 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
4438 		*size = 0;
4439 		return (EWOULDBLOCK);
4440 	}
4441 
4442 	/*
4443 	 * Make sure that the LDC Tx queue has enough space
4444 	 */
4445 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
4446 	    + ldcp->tx_q_entries - 1;
4447 	numavail %= ldcp->tx_q_entries;
4448 
4449 	if (*size > (numavail * ldcp->pkt_payload)) {
4450 		DWARN(DBG_ALL_LDCS,
4451 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
4452 		return (EWOULDBLOCK);
4453 	}
4454 
4455 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
4456 	    ldcp->id, *size);
4457 
4458 	/* Send the data now */
4459 	bytes_written = 0;
4460 	curr_seqid = ldcp->last_msg_snt;
4461 	start = tx_tail;
4462 
4463 	while (*size > bytes_written) {
4464 
4465 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
4466 
4467 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
4468 		    ldcp->mode == LDC_MODE_STREAM) ?
4469 		    ldcmsg->rdata : ldcmsg->udata);
4470 
4471 		ldcmsg->type = LDC_DATA;
4472 		ldcmsg->stype = LDC_INFO;
4473 		ldcmsg->ctrl = 0;
4474 
4475 		remaining = *size - bytes_written;
4476 		len = min(ldcp->pkt_payload, remaining);
4477 		ldcmsg->env = (uint8_t)len;
4478 
4479 		curr_seqid++;
4480 		ldcmsg->seqid = curr_seqid;
4481 
4482 		/* copy the data into pkt */
4483 		bcopy(source, msgbuf, len);
4484 
4485 		source += len;
4486 		bytes_written += len;
4487 
4488 		/* increment tail */
4489 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
4490 
4491 		ASSERT(tx_tail != tx_head);
4492 	}
4493 
4494 	/* Set the start and stop bits */
4495 	ldcmsg->env |= LDC_FRAG_STOP;
4496 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
4497 	ldcmsg->env |= LDC_FRAG_START;
4498 
4499 	/*
4500 	 * All packets have been copied into the TX queue
4501 	 * update the tail ptr in the HV
4502 	 */
4503 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
4504 	if (rv == 0) {
4505 		ldcp->tx_tail = tx_tail;
4506 		ldcp->last_msg_snt = curr_seqid;
4507 		*size = bytes_written;
4508 	} else {
4509 		int rv2;
4510 
4511 		if (rv != EWOULDBLOCK) {
4512 			*size = 0;
4513 			if (mutex_tryenter(&ldcp->lock)) {
4514 				i_ldc_reset(ldcp, B_FALSE);
4515 				mutex_exit(&ldcp->lock);
4516 			} else {
4517 				/*
4518 				 * Release Tx lock, and then reacquire channel
4519 				 * and Tx lock in correct order
4520 				 */
4521 				mutex_exit(&ldcp->tx_lock);
4522 				mutex_enter(&ldcp->lock);
4523 				mutex_enter(&ldcp->tx_lock);
4524 				i_ldc_reset(ldcp, B_FALSE);
4525 				mutex_exit(&ldcp->lock);
4526 			}
4527 			return (ECONNRESET);
4528 		}
4529 
4530 		D1(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
4531 		    "old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
4532 		    rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
4533 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
4534 
4535 		rv2 = hv_ldc_tx_get_state(ldcp->id,
4536 		    &tx_head, &tx_tail, &ldcp->link_state);
4537 
4538 		D1(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
4539 		    "(head 0x%x, tail 0x%x state 0x%x)\n",
4540 		    rv2, tx_head, tx_tail, ldcp->link_state);
4541 
4542 		*size = 0;
4543 	}
4544 
4545 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4546 
4547 	return (rv);
4548 }
4549 
4550 /*
4551  * Write specified amount of bytes to the channel
4552  * in multiple pkts of pkt_payload size. Each
4553  * packet is tagged with an unique packet ID in
4554  * the case of a reliable link.
4555  *
4556  * On return, size contains the number of bytes written.
4557  * This function needs to ensure that the write size is < MTU size
4558  */
4559 static int
4560 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4561 {
4562 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4563 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
4564 
4565 	/* Truncate packet to max of MTU size */
4566 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4567 	return (i_ldc_write_packet(ldcp, buf, sizep));
4568 }
4569 
4570 
4571 /*
4572  * Interfaces for channel nexus to register/unregister with LDC module
4573  * The nexus will register functions to be used to register individual
4574  * channels with the nexus and enable interrupts for the channels
4575  */
4576 int
4577 ldc_register(ldc_cnex_t *cinfo)
4578 {
4579 	ldc_chan_t	*ldcp;
4580 
4581 	if (cinfo == NULL || cinfo->dip == NULL ||
4582 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4583 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4584 	    cinfo->clr_intr == NULL) {
4585 
4586 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4587 		return (EINVAL);
4588 	}
4589 
4590 	mutex_enter(&ldcssp->lock);
4591 
4592 	/* nexus registration */
4593 	ldcssp->cinfo.dip = cinfo->dip;
4594 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4595 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4596 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4597 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4598 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4599 
4600 	/* register any channels that might have been previously initialized */
4601 	ldcp = ldcssp->chan_list;
4602 	while (ldcp) {
4603 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4604 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4605 			(void) i_ldc_register_channel(ldcp);
4606 
4607 		ldcp = ldcp->next;
4608 	}
4609 
4610 	mutex_exit(&ldcssp->lock);
4611 
4612 	return (0);
4613 }
4614 
4615 int
4616 ldc_unregister(ldc_cnex_t *cinfo)
4617 {
4618 	if (cinfo == NULL || cinfo->dip == NULL) {
4619 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4620 		return (EINVAL);
4621 	}
4622 
4623 	mutex_enter(&ldcssp->lock);
4624 
4625 	if (cinfo->dip != ldcssp->cinfo.dip) {
4626 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4627 		mutex_exit(&ldcssp->lock);
4628 		return (EINVAL);
4629 	}
4630 
4631 	/* nexus unregister */
4632 	ldcssp->cinfo.dip = NULL;
4633 	ldcssp->cinfo.reg_chan = NULL;
4634 	ldcssp->cinfo.unreg_chan = NULL;
4635 	ldcssp->cinfo.add_intr = NULL;
4636 	ldcssp->cinfo.rem_intr = NULL;
4637 	ldcssp->cinfo.clr_intr = NULL;
4638 
4639 	mutex_exit(&ldcssp->lock);
4640 
4641 	return (0);
4642 }
4643 
4644 
4645 /* ------------------------------------------------------------------------- */
4646 
4647 /*
4648  * Allocate a memory handle for the channel and link it into the list
4649  * Also choose which memory table to use if this is the first handle
4650  * being assigned to this channel
4651  */
4652 int
4653 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
4654 {
4655 	ldc_chan_t 	*ldcp;
4656 	ldc_mhdl_t	*mhdl;
4657 
4658 	if (handle == NULL) {
4659 		DWARN(DBG_ALL_LDCS,
4660 		    "ldc_mem_alloc_handle: invalid channel handle\n");
4661 		return (EINVAL);
4662 	}
4663 	ldcp = (ldc_chan_t *)handle;
4664 
4665 	mutex_enter(&ldcp->lock);
4666 
4667 	/* check to see if channel is initalized */
4668 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4669 		DWARN(ldcp->id,
4670 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
4671 		    ldcp->id);
4672 		mutex_exit(&ldcp->lock);
4673 		return (EINVAL);
4674 	}
4675 
4676 	/* allocate handle for channel */
4677 	mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP);
4678 
4679 	/* initialize the lock */
4680 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
4681 
4682 	mhdl->myshadow = B_FALSE;
4683 	mhdl->memseg = NULL;
4684 	mhdl->ldcp = ldcp;
4685 	mhdl->status = LDC_UNBOUND;
4686 
4687 	/* insert memory handle (@ head) into list */
4688 	if (ldcp->mhdl_list == NULL) {
4689 		ldcp->mhdl_list = mhdl;
4690 		mhdl->next = NULL;
4691 	} else {
4692 		/* insert @ head */
4693 		mhdl->next = ldcp->mhdl_list;
4694 		ldcp->mhdl_list = mhdl;
4695 	}
4696 
4697 	/* return the handle */
4698 	*mhandle = (ldc_mem_handle_t)mhdl;
4699 
4700 	mutex_exit(&ldcp->lock);
4701 
4702 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4703 	    ldcp->id, mhdl);
4704 
4705 	return (0);
4706 }
4707 
4708 /*
4709  * Free memory handle for the channel and unlink it from the list
4710  */
4711 int
4712 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4713 {
4714 	ldc_mhdl_t 	*mhdl, *phdl;
4715 	ldc_chan_t 	*ldcp;
4716 
4717 	if (mhandle == NULL) {
4718 		DWARN(DBG_ALL_LDCS,
4719 		    "ldc_mem_free_handle: invalid memory handle\n");
4720 		return (EINVAL);
4721 	}
4722 	mhdl = (ldc_mhdl_t *)mhandle;
4723 
4724 	mutex_enter(&mhdl->lock);
4725 
4726 	ldcp = mhdl->ldcp;
4727 
4728 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4729 		DWARN(ldcp->id,
4730 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4731 		    mhdl);
4732 		mutex_exit(&mhdl->lock);
4733 		return (EINVAL);
4734 	}
4735 	mutex_exit(&mhdl->lock);
4736 
4737 	mutex_enter(&ldcp->mlist_lock);
4738 
4739 	phdl = ldcp->mhdl_list;
4740 
4741 	/* first handle */
4742 	if (phdl == mhdl) {
4743 		ldcp->mhdl_list = mhdl->next;
4744 		mutex_destroy(&mhdl->lock);
4745 		kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4746 
4747 		D1(ldcp->id,
4748 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4749 		    ldcp->id, mhdl);
4750 	} else {
4751 		/* walk the list - unlink and free */
4752 		while (phdl != NULL) {
4753 			if (phdl->next == mhdl) {
4754 				phdl->next = mhdl->next;
4755 				mutex_destroy(&mhdl->lock);
4756 				kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4757 				D1(ldcp->id,
4758 				    "ldc_mem_free_handle: (0x%llx) freed "
4759 				    "handle 0x%llx\n", ldcp->id, mhdl);
4760 				break;
4761 			}
4762 			phdl = phdl->next;
4763 		}
4764 	}
4765 
4766 	if (phdl == NULL) {
4767 		DWARN(ldcp->id,
4768 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4769 		mutex_exit(&ldcp->mlist_lock);
4770 		return (EINVAL);
4771 	}
4772 
4773 	mutex_exit(&ldcp->mlist_lock);
4774 
4775 	return (0);
4776 }
4777 
4778 /*
4779  * Bind a memory handle to a virtual address.
4780  * The virtual address is converted to the corresponding real addresses.
4781  * Returns pointer to the first ldc_mem_cookie and the total number
4782  * of cookies for this virtual address. Other cookies can be obtained
4783  * using the ldc_mem_nextcookie() call. If the pages are stored in
4784  * consecutive locations in the table, a single cookie corresponding to
4785  * the first location is returned. The cookie size spans all the entries.
4786  *
4787  * If the VA corresponds to a page that is already being exported, reuse
4788  * the page and do not export it again. Bump the page's use count.
4789  */
4790 int
4791 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4792     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4793 {
4794 	ldc_mhdl_t	*mhdl;
4795 	ldc_chan_t 	*ldcp;
4796 	ldc_mtbl_t	*mtbl;
4797 	ldc_memseg_t	*memseg;
4798 	ldc_mte_t	tmp_mte;
4799 	uint64_t	index, prev_index = 0;
4800 	int64_t		cookie_idx;
4801 	uintptr_t	raddr, ra_aligned;
4802 	uint64_t	psize, poffset, v_offset;
4803 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4804 	pgcnt_t		npages;
4805 	caddr_t		v_align, addr;
4806 	int 		i, rv;
4807 
4808 	if (mhandle == NULL) {
4809 		DWARN(DBG_ALL_LDCS,
4810 		    "ldc_mem_bind_handle: invalid memory handle\n");
4811 		return (EINVAL);
4812 	}
4813 	mhdl = (ldc_mhdl_t *)mhandle;
4814 	ldcp = mhdl->ldcp;
4815 
4816 	/* clear count */
4817 	*ccount = 0;
4818 
4819 	mutex_enter(&mhdl->lock);
4820 
4821 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4822 		DWARN(ldcp->id,
4823 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4824 		    mhandle);
4825 		mutex_exit(&mhdl->lock);
4826 		return (EINVAL);
4827 	}
4828 
4829 	/* Force address and size to be 8-byte aligned */
4830 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4831 		DWARN(ldcp->id,
4832 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4833 		mutex_exit(&mhdl->lock);
4834 		return (EINVAL);
4835 	}
4836 
4837 	/*
4838 	 * If this channel is binding a memory handle for the
4839 	 * first time allocate it a memory map table and initialize it
4840 	 */
4841 	if ((mtbl = ldcp->mtbl) == NULL) {
4842 
4843 		mutex_enter(&ldcp->lock);
4844 
4845 		/* Allocate and initialize the map table structure */
4846 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
4847 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
4848 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
4849 		mtbl->next_entry = NULL;
4850 		mtbl->contigmem = B_TRUE;
4851 
4852 		/* Allocate the table itself */
4853 		mtbl->table = (ldc_mte_slot_t *)
4854 		    contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
4855 		if (mtbl->table == NULL) {
4856 
4857 			/* allocate a page of memory using kmem_alloc */
4858 			mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP);
4859 			mtbl->size = MMU_PAGESIZE;
4860 			mtbl->contigmem = B_FALSE;
4861 			mtbl->num_entries = mtbl->num_avail =
4862 			    mtbl->size / sizeof (ldc_mte_slot_t);
4863 			DWARN(ldcp->id,
4864 			    "ldc_mem_bind_handle: (0x%llx) reduced tbl size "
4865 			    "to %lx entries\n", ldcp->id, mtbl->num_entries);
4866 		}
4867 
4868 		/* zero out the memory */
4869 		bzero(mtbl->table, mtbl->size);
4870 
4871 		/* initialize the lock */
4872 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
4873 
4874 		/* register table for this channel */
4875 		rv = hv_ldc_set_map_table(ldcp->id,
4876 		    va_to_pa(mtbl->table), mtbl->num_entries);
4877 		if (rv != 0) {
4878 			cmn_err(CE_WARN,
4879 			    "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
4880 			    ldcp->id, rv);
4881 			if (mtbl->contigmem)
4882 				contig_mem_free(mtbl->table, mtbl->size);
4883 			else
4884 				kmem_free(mtbl->table, mtbl->size);
4885 			mutex_destroy(&mtbl->lock);
4886 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4887 			mutex_exit(&ldcp->lock);
4888 			mutex_exit(&mhdl->lock);
4889 			return (EIO);
4890 		}
4891 
4892 		ldcp->mtbl = mtbl;
4893 		mutex_exit(&ldcp->lock);
4894 
4895 		D1(ldcp->id,
4896 		    "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
4897 		    ldcp->id, ldcp->mtbl->table);
4898 	}
4899 
4900 	/* FUTURE: get the page size, pgsz code, and shift */
4901 	pg_size = MMU_PAGESIZE;
4902 	pg_size_code = page_szc(pg_size);
4903 	pg_shift = page_get_shift(pg_size_code);
4904 	pg_mask = ~(pg_size - 1);
4905 
4906 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4907 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4908 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4909 
4910 	/* aligned VA and its offset */
4911 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4912 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4913 
4914 	npages = (len+v_offset)/pg_size;
4915 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4916 
4917 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4918 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4919 	    ldcp->id, vaddr, v_align, v_offset, npages);
4920 
4921 	/* lock the memory table - exclusive access to channel */
4922 	mutex_enter(&mtbl->lock);
4923 
4924 	if (npages > mtbl->num_avail) {
4925 		D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4926 		    ldcp->id);
4927 		mutex_exit(&mtbl->lock);
4928 		mutex_exit(&mhdl->lock);
4929 		return (ENOMEM);
4930 	}
4931 
4932 	/* Allocate a memseg structure */
4933 	memseg = mhdl->memseg =
4934 	    kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
4935 
4936 	/* Allocate memory to store all pages and cookies */
4937 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4938 	memseg->cookies =
4939 	    kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4940 
4941 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4942 	    ldcp->id, npages);
4943 
4944 	addr = v_align;
4945 
4946 	/*
4947 	 * Check if direct shared memory map is enabled, if not change
4948 	 * the mapping type to include SHADOW_MAP.
4949 	 */
4950 	if (ldc_shmem_enabled == 0)
4951 		mtype = LDC_SHADOW_MAP;
4952 
4953 	/*
4954 	 * Table slots are used in a round-robin manner. The algorithm permits
4955 	 * inserting duplicate entries. Slots allocated earlier will typically
4956 	 * get freed before we get back to reusing the slot.Inserting duplicate
4957 	 * entries should be OK as we only lookup entries using the cookie addr
4958 	 * i.e. tbl index, during export, unexport and copy operation.
4959 	 *
4960 	 * One implementation what was tried was to search for a duplicate
4961 	 * page entry first and reuse it. The search overhead is very high and
4962 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4963 	 * So it does make sense to avoid searching for duplicates.
4964 	 *
4965 	 * But during the process of searching for a free slot, if we find a
4966 	 * duplicate entry we will go ahead and use it, and bump its use count.
4967 	 */
4968 
4969 	/* index to start searching from */
4970 	index = mtbl->next_entry;
4971 	cookie_idx = -1;
4972 
4973 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4974 
4975 	if (mtype & LDC_DIRECT_MAP) {
4976 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4977 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4978 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4979 	}
4980 
4981 	if (mtype & LDC_SHADOW_MAP) {
4982 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4983 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4984 	}
4985 
4986 	if (mtype & LDC_IO_MAP) {
4987 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4988 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4989 	}
4990 
4991 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4992 
4993 	tmp_mte.mte_pgszc = pg_size_code;
4994 
4995 	/* initialize each mem table entry */
4996 	for (i = 0; i < npages; i++) {
4997 
4998 		/* check if slot is available in the table */
4999 		while (mtbl->table[index].entry.ll != 0) {
5000 
5001 			index = (index + 1) % mtbl->num_entries;
5002 
5003 			if (index == mtbl->next_entry) {
5004 				/* we have looped around */
5005 				DWARN(DBG_ALL_LDCS,
5006 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
5007 				    "entry\n", ldcp->id);
5008 				*ccount = 0;
5009 
5010 				/* NOTE: free memory, remove previous entries */
5011 				/* this shouldnt happen as num_avail was ok */
5012 
5013 				mutex_exit(&mtbl->lock);
5014 				mutex_exit(&mhdl->lock);
5015 				return (ENOMEM);
5016 			}
5017 		}
5018 
5019 		/* get the real address */
5020 		raddr = va_to_pa((void *)addr);
5021 		ra_aligned = ((uintptr_t)raddr & pg_mask);
5022 
5023 		/* build the mte */
5024 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
5025 
5026 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
5027 
5028 		/* update entry in table */
5029 		mtbl->table[index].entry = tmp_mte;
5030 
5031 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
5032 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
5033 
5034 		/* calculate the size and offset for this export range */
5035 		if (i == 0) {
5036 			/* first page */
5037 			psize = min((pg_size - v_offset), len);
5038 			poffset = v_offset;
5039 
5040 		} else if (i == (npages - 1)) {
5041 			/* last page */
5042 			psize =	(((uintptr_t)(vaddr + len)) &
5043 			    ((uint64_t)(pg_size-1)));
5044 			if (psize == 0)
5045 				psize = pg_size;
5046 			poffset = 0;
5047 
5048 		} else {
5049 			/* middle pages */
5050 			psize = pg_size;
5051 			poffset = 0;
5052 		}
5053 
5054 		/* store entry for this page */
5055 		memseg->pages[i].index = index;
5056 		memseg->pages[i].raddr = raddr;
5057 		memseg->pages[i].offset = poffset;
5058 		memseg->pages[i].size = psize;
5059 		memseg->pages[i].mte = &(mtbl->table[index]);
5060 
5061 		/* create the cookie */
5062 		if (i == 0 || (index != prev_index + 1)) {
5063 			cookie_idx++;
5064 			memseg->cookies[cookie_idx].addr =
5065 			    IDX2COOKIE(index, pg_size_code, pg_shift);
5066 			memseg->cookies[cookie_idx].addr |= poffset;
5067 			memseg->cookies[cookie_idx].size = psize;
5068 
5069 		} else {
5070 			memseg->cookies[cookie_idx].size += psize;
5071 		}
5072 
5073 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
5074 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
5075 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
5076 		    ldcp->id, addr, index, raddr, psize, poffset);
5077 
5078 		/* decrement number of available entries */
5079 		mtbl->num_avail--;
5080 
5081 		/* increment va by page size */
5082 		addr += pg_size;
5083 
5084 		/* increment index */
5085 		prev_index = index;
5086 		index = (index + 1) % mtbl->num_entries;
5087 
5088 		/* save the next slot */
5089 		mtbl->next_entry = index;
5090 	}
5091 
5092 	mutex_exit(&mtbl->lock);
5093 
5094 	/* memory handle = bound */
5095 	mhdl->mtype = mtype;
5096 	mhdl->perm = perm;
5097 	mhdl->status = LDC_BOUND;
5098 
5099 	/* update memseg_t */
5100 	memseg->vaddr = vaddr;
5101 	memseg->raddr = memseg->pages[0].raddr;
5102 	memseg->size = len;
5103 	memseg->npages = npages;
5104 	memseg->ncookies = cookie_idx + 1;
5105 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
5106 
5107 	/* return count and first cookie */
5108 	*ccount = memseg->ncookies;
5109 	cookie->addr = memseg->cookies[0].addr;
5110 	cookie->size = memseg->cookies[0].size;
5111 
5112 	D1(ldcp->id,
5113 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
5114 	    "pgs=0x%llx cookies=0x%llx\n",
5115 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
5116 
5117 	mutex_exit(&mhdl->lock);
5118 	return (0);
5119 }
5120 
5121 /*
5122  * Return the next cookie associated with the specified memory handle
5123  */
5124 int
5125 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
5126 {
5127 	ldc_mhdl_t	*mhdl;
5128 	ldc_chan_t 	*ldcp;
5129 	ldc_memseg_t	*memseg;
5130 
5131 	if (mhandle == NULL) {
5132 		DWARN(DBG_ALL_LDCS,
5133 		    "ldc_mem_nextcookie: invalid memory handle\n");
5134 		return (EINVAL);
5135 	}
5136 	mhdl = (ldc_mhdl_t *)mhandle;
5137 
5138 	mutex_enter(&mhdl->lock);
5139 
5140 	ldcp = mhdl->ldcp;
5141 	memseg = mhdl->memseg;
5142 
5143 	if (cookie == 0) {
5144 		DWARN(ldcp->id,
5145 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
5146 		    ldcp->id);
5147 		mutex_exit(&mhdl->lock);
5148 		return (EINVAL);
5149 	}
5150 
5151 	if (memseg->next_cookie != 0) {
5152 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
5153 		cookie->size = memseg->cookies[memseg->next_cookie].size;
5154 		memseg->next_cookie++;
5155 		if (memseg->next_cookie == memseg->ncookies)
5156 			memseg->next_cookie = 0;
5157 
5158 	} else {
5159 		DWARN(ldcp->id,
5160 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
5161 		cookie->addr = 0;
5162 		cookie->size = 0;
5163 		mutex_exit(&mhdl->lock);
5164 		return (EINVAL);
5165 	}
5166 
5167 	D1(ldcp->id,
5168 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
5169 	    ldcp->id, cookie->addr, cookie->size);
5170 
5171 	mutex_exit(&mhdl->lock);
5172 	return (0);
5173 }
5174 
5175 /*
5176  * Unbind the virtual memory region associated with the specified
5177  * memory handle. Allassociated cookies are freed and the corresponding
5178  * RA space is no longer exported.
5179  */
5180 int
5181 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
5182 {
5183 	ldc_mhdl_t	*mhdl;
5184 	ldc_chan_t 	*ldcp;
5185 	ldc_mtbl_t	*mtbl;
5186 	ldc_memseg_t	*memseg;
5187 	uint64_t	cookie_addr;
5188 	uint64_t	pg_shift, pg_size_code;
5189 	int		i, rv;
5190 
5191 	if (mhandle == NULL) {
5192 		DWARN(DBG_ALL_LDCS,
5193 		    "ldc_mem_unbind_handle: invalid memory handle\n");
5194 		return (EINVAL);
5195 	}
5196 	mhdl = (ldc_mhdl_t *)mhandle;
5197 
5198 	mutex_enter(&mhdl->lock);
5199 
5200 	if (mhdl->status == LDC_UNBOUND) {
5201 		DWARN(DBG_ALL_LDCS,
5202 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
5203 		    mhandle);
5204 		mutex_exit(&mhdl->lock);
5205 		return (EINVAL);
5206 	}
5207 
5208 	ldcp = mhdl->ldcp;
5209 	mtbl = ldcp->mtbl;
5210 
5211 	memseg = mhdl->memseg;
5212 
5213 	/* lock the memory table - exclusive access to channel */
5214 	mutex_enter(&mtbl->lock);
5215 
5216 	/* undo the pages exported */
5217 	for (i = 0; i < memseg->npages; i++) {
5218 
5219 		/* check for mapped pages, revocation cookie != 0 */
5220 		if (memseg->pages[i].mte->cookie) {
5221 
5222 			pg_size_code = page_szc(memseg->pages[i].size);
5223 			pg_shift = page_get_shift(memseg->pages[i].size);
5224 			cookie_addr = IDX2COOKIE(memseg->pages[i].index,
5225 			    pg_size_code, pg_shift);
5226 
5227 			D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke "
5228 			    "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id,
5229 			    cookie_addr, memseg->pages[i].mte->cookie);
5230 			rv = hv_ldc_revoke(ldcp->id, cookie_addr,
5231 			    memseg->pages[i].mte->cookie);
5232 			if (rv) {
5233 				DWARN(ldcp->id,
5234 				    "ldc_mem_unbind_handle: (0x%llx) cannot "
5235 				    "revoke mapping, cookie %llx\n", ldcp->id,
5236 				    cookie_addr);
5237 			}
5238 		}
5239 
5240 		/* clear the entry from the table */
5241 		memseg->pages[i].mte->entry.ll = 0;
5242 		mtbl->num_avail++;
5243 	}
5244 	mutex_exit(&mtbl->lock);
5245 
5246 	/* free the allocated memseg and page structures */
5247 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5248 	kmem_free(memseg->cookies,
5249 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
5250 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5251 
5252 	/* uninitialize the memory handle */
5253 	mhdl->memseg = NULL;
5254 	mhdl->status = LDC_UNBOUND;
5255 
5256 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
5257 	    ldcp->id, mhdl);
5258 
5259 	mutex_exit(&mhdl->lock);
5260 	return (0);
5261 }
5262 
5263 /*
5264  * Get information about the dring. The base address of the descriptor
5265  * ring along with the type and permission are returned back.
5266  */
5267 int
5268 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
5269 {
5270 	ldc_mhdl_t	*mhdl;
5271 
5272 	if (mhandle == NULL) {
5273 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
5274 		return (EINVAL);
5275 	}
5276 	mhdl = (ldc_mhdl_t *)mhandle;
5277 
5278 	if (minfo == NULL) {
5279 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
5280 		return (EINVAL);
5281 	}
5282 
5283 	mutex_enter(&mhdl->lock);
5284 
5285 	minfo->status = mhdl->status;
5286 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
5287 		minfo->vaddr = mhdl->memseg->vaddr;
5288 		minfo->raddr = mhdl->memseg->raddr;
5289 		minfo->mtype = mhdl->mtype;
5290 		minfo->perm = mhdl->perm;
5291 	}
5292 	mutex_exit(&mhdl->lock);
5293 
5294 	return (0);
5295 }
5296 
5297 /*
5298  * Copy data either from or to the client specified virtual address
5299  * space to or from the exported memory associated with the cookies.
5300  * The direction argument determines whether the data is read from or
5301  * written to exported memory.
5302  */
5303 int
5304 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
5305     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
5306 {
5307 	ldc_chan_t 	*ldcp;
5308 	uint64_t	local_voff, local_valign;
5309 	uint64_t	cookie_addr, cookie_size;
5310 	uint64_t	pg_shift, pg_size, pg_size_code;
5311 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
5312 	uint64_t	local_ra, local_poff, local_psize;
5313 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
5314 	pgcnt_t		npages;
5315 	size_t		len = *size;
5316 	int 		i, rv = 0;
5317 
5318 	uint64_t	chid;
5319 
5320 	if (handle == NULL) {
5321 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
5322 		return (EINVAL);
5323 	}
5324 	ldcp = (ldc_chan_t *)handle;
5325 	chid = ldcp->id;
5326 
5327 	/* check to see if channel is UP */
5328 	if (ldcp->tstate != TS_UP) {
5329 		DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n",
5330 		    chid);
5331 		return (ECONNRESET);
5332 	}
5333 
5334 	/* Force address and size to be 8-byte aligned */
5335 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
5336 		DWARN(chid,
5337 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
5338 		return (EINVAL);
5339 	}
5340 
5341 	/* Find the size of the exported memory */
5342 	export_size = 0;
5343 	for (i = 0; i < ccount; i++)
5344 		export_size += cookies[i].size;
5345 
5346 	/* check to see if offset is valid */
5347 	if (off > export_size) {
5348 		DWARN(chid,
5349 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
5350 		    chid);
5351 		return (EINVAL);
5352 	}
5353 
5354 	/*
5355 	 * Check to see if the export size is smaller than the size we
5356 	 * are requesting to copy - if so flag an error
5357 	 */
5358 	if ((export_size - off) < *size) {
5359 		DWARN(chid,
5360 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
5361 		    chid);
5362 		return (EINVAL);
5363 	}
5364 
5365 	total_bal = min(export_size, *size);
5366 
5367 	/* FUTURE: get the page size, pgsz code, and shift */
5368 	pg_size = MMU_PAGESIZE;
5369 	pg_size_code = page_szc(pg_size);
5370 	pg_shift = page_get_shift(pg_size_code);
5371 
5372 	D1(chid, "ldc_mem_copy: copying data "
5373 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
5374 	    chid, vaddr, pg_size, pg_size_code, pg_shift);
5375 
5376 	/* aligned VA and its offset */
5377 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
5378 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
5379 
5380 	npages = (len+local_voff)/pg_size;
5381 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
5382 
5383 	D1(chid,
5384 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
5385 	    chid, vaddr, local_valign, local_voff, npages);
5386 
5387 	local_ra = va_to_pa((void *)local_valign);
5388 	local_poff = local_voff;
5389 	local_psize = min(len, (pg_size - local_voff));
5390 
5391 	len -= local_psize;
5392 
5393 	/*
5394 	 * find the first cookie in the list of cookies
5395 	 * if the offset passed in is not zero
5396 	 */
5397 	for (idx = 0; idx < ccount; idx++) {
5398 		cookie_size = cookies[idx].size;
5399 		if (off < cookie_size)
5400 			break;
5401 		off -= cookie_size;
5402 	}
5403 
5404 	cookie_addr = cookies[idx].addr + off;
5405 	cookie_size = cookies[idx].size - off;
5406 
5407 	export_caddr = cookie_addr & ~(pg_size - 1);
5408 	export_poff = cookie_addr & (pg_size - 1);
5409 	export_psize = min(cookie_size, (pg_size - export_poff));
5410 
5411 	for (;;) {
5412 
5413 		copy_size = min(export_psize, local_psize);
5414 
5415 		D1(chid,
5416 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
5417 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
5418 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5419 		    " total_bal=0x%llx\n",
5420 		    chid, direction, export_caddr, local_ra, export_poff,
5421 		    local_poff, export_psize, local_psize, copy_size,
5422 		    total_bal);
5423 
5424 		rv = hv_ldc_copy(chid, direction,
5425 		    (export_caddr + export_poff), (local_ra + local_poff),
5426 		    copy_size, &copied_len);
5427 
5428 		if (rv != 0) {
5429 			int 		error = EIO;
5430 			uint64_t	rx_hd, rx_tl;
5431 
5432 			DWARN(chid,
5433 			    "ldc_mem_copy: (0x%llx) err %d during copy\n",
5434 			    (unsigned long long)chid, rv);
5435 			DWARN(chid,
5436 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, "
5437 			    "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
5438 			    " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
5439 			    " copied_len=0x%lx, total_bal=0x%lx\n",
5440 			    chid, direction, export_caddr, local_ra,
5441 			    export_poff, local_poff, export_psize, local_psize,
5442 			    copy_size, copied_len, total_bal);
5443 
5444 			*size = *size - total_bal;
5445 
5446 			/*
5447 			 * check if reason for copy error was due to
5448 			 * a channel reset. we need to grab the lock
5449 			 * just in case we have to do a reset.
5450 			 */
5451 			mutex_enter(&ldcp->lock);
5452 			mutex_enter(&ldcp->tx_lock);
5453 
5454 			rv = hv_ldc_rx_get_state(ldcp->id,
5455 			    &rx_hd, &rx_tl, &(ldcp->link_state));
5456 			if (ldcp->link_state == LDC_CHANNEL_DOWN ||
5457 			    ldcp->link_state == LDC_CHANNEL_RESET) {
5458 				i_ldc_reset(ldcp, B_FALSE);
5459 				error = ECONNRESET;
5460 			}
5461 
5462 			mutex_exit(&ldcp->tx_lock);
5463 			mutex_exit(&ldcp->lock);
5464 
5465 			return (error);
5466 		}
5467 
5468 		ASSERT(copied_len <= copy_size);
5469 
5470 		D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len);
5471 		export_poff += copied_len;
5472 		local_poff += copied_len;
5473 		export_psize -= copied_len;
5474 		local_psize -= copied_len;
5475 		cookie_size -= copied_len;
5476 
5477 		total_bal -= copied_len;
5478 
5479 		if (copy_size != copied_len)
5480 			continue;
5481 
5482 		if (export_psize == 0 && total_bal != 0) {
5483 
5484 			if (cookie_size == 0) {
5485 				idx++;
5486 				cookie_addr = cookies[idx].addr;
5487 				cookie_size = cookies[idx].size;
5488 
5489 				export_caddr = cookie_addr & ~(pg_size - 1);
5490 				export_poff = cookie_addr & (pg_size - 1);
5491 				export_psize =
5492 				    min(cookie_size, (pg_size-export_poff));
5493 			} else {
5494 				export_caddr += pg_size;
5495 				export_poff = 0;
5496 				export_psize = min(cookie_size, pg_size);
5497 			}
5498 		}
5499 
5500 		if (local_psize == 0 && total_bal != 0) {
5501 			local_valign += pg_size;
5502 			local_ra = va_to_pa((void *)local_valign);
5503 			local_poff = 0;
5504 			local_psize = min(pg_size, len);
5505 			len -= local_psize;
5506 		}
5507 
5508 		/* check if we are all done */
5509 		if (total_bal == 0)
5510 			break;
5511 	}
5512 
5513 
5514 	D1(chid,
5515 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
5516 	    chid, *size);
5517 
5518 	return (0);
5519 }
5520 
5521 /*
5522  * Copy data either from or to the client specified virtual address
5523  * space to or from HV physical memory.
5524  *
5525  * The direction argument determines whether the data is read from or
5526  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
5527  * to the ldc_mem_copy interface
5528  */
5529 int
5530 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size,
5531     caddr_t paddr, uint8_t direction)
5532 {
5533 	ldc_chan_t 	*ldcp;
5534 	uint64_t	local_voff, local_valign;
5535 	uint64_t	pg_shift, pg_size, pg_size_code;
5536 	uint64_t 	target_pa, target_poff, target_psize, target_size;
5537 	uint64_t	local_ra, local_poff, local_psize;
5538 	uint64_t	copy_size, copied_len = 0;
5539 	pgcnt_t		npages;
5540 	size_t		len = *size;
5541 	int 		rv = 0;
5542 
5543 	if (handle == NULL) {
5544 		DWARN(DBG_ALL_LDCS,
5545 		    "ldc_mem_rdwr_cookie: invalid channel handle\n");
5546 		return (EINVAL);
5547 	}
5548 	ldcp = (ldc_chan_t *)handle;
5549 
5550 	mutex_enter(&ldcp->lock);
5551 
5552 	/* check to see if channel is UP */
5553 	if (ldcp->tstate != TS_UP) {
5554 		DWARN(ldcp->id,
5555 		    "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n",
5556 		    ldcp->id);
5557 		mutex_exit(&ldcp->lock);
5558 		return (ECONNRESET);
5559 	}
5560 
5561 	/* Force address and size to be 8-byte aligned */
5562 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
5563 		DWARN(ldcp->id,
5564 		    "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n");
5565 		mutex_exit(&ldcp->lock);
5566 		return (EINVAL);
5567 	}
5568 
5569 	target_size = *size;
5570 
5571 	/* FUTURE: get the page size, pgsz code, and shift */
5572 	pg_size = MMU_PAGESIZE;
5573 	pg_size_code = page_szc(pg_size);
5574 	pg_shift = page_get_shift(pg_size_code);
5575 
5576 	D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data "
5577 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
5578 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
5579 
5580 	/* aligned VA and its offset */
5581 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
5582 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
5583 
5584 	npages = (len + local_voff) / pg_size;
5585 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
5586 
5587 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, "
5588 	    "val=0x%llx,off=0x%x,pgs=0x%x\n",
5589 	    ldcp->id, vaddr, local_valign, local_voff, npages);
5590 
5591 	local_ra = va_to_pa((void *)local_valign);
5592 	local_poff = local_voff;
5593 	local_psize = min(len, (pg_size - local_voff));
5594 
5595 	len -= local_psize;
5596 
5597 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
5598 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
5599 	target_psize = pg_size - target_poff;
5600 
5601 	for (;;) {
5602 
5603 		copy_size = min(target_psize, local_psize);
5604 
5605 		D1(ldcp->id,
5606 		    "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
5607 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
5608 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5609 		    " total_bal=0x%llx\n",
5610 		    ldcp->id, direction, target_pa, local_ra, target_poff,
5611 		    local_poff, target_psize, local_psize, copy_size,
5612 		    target_size);
5613 
5614 		rv = hv_ldc_copy(ldcp->id, direction,
5615 		    (target_pa + target_poff), (local_ra + local_poff),
5616 		    copy_size, &copied_len);
5617 
5618 		if (rv != 0) {
5619 			DWARN(DBG_ALL_LDCS,
5620 			    "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n",
5621 			    ldcp->id, rv);
5622 			DWARN(DBG_ALL_LDCS,
5623 			    "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, "
5624 			    "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, "
5625 			    "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, "
5626 			    "copy_sz=0x%llx, total_bal=0x%llx\n",
5627 			    ldcp->id, direction, target_pa, local_ra,
5628 			    target_poff, local_poff, target_psize, local_psize,
5629 			    copy_size, target_size);
5630 
5631 			*size = *size - target_size;
5632 			mutex_exit(&ldcp->lock);
5633 			return (i_ldc_h2v_error(rv));
5634 		}
5635 
5636 		D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n",
5637 		    copied_len);
5638 		target_poff += copied_len;
5639 		local_poff += copied_len;
5640 		target_psize -= copied_len;
5641 		local_psize -= copied_len;
5642 
5643 		target_size -= copied_len;
5644 
5645 		if (copy_size != copied_len)
5646 			continue;
5647 
5648 		if (target_psize == 0 && target_size != 0) {
5649 			target_pa += pg_size;
5650 			target_poff = 0;
5651 			target_psize = min(pg_size, target_size);
5652 		}
5653 
5654 		if (local_psize == 0 && target_size != 0) {
5655 			local_valign += pg_size;
5656 			local_ra = va_to_pa((void *)local_valign);
5657 			local_poff = 0;
5658 			local_psize = min(pg_size, len);
5659 			len -= local_psize;
5660 		}
5661 
5662 		/* check if we are all done */
5663 		if (target_size == 0)
5664 			break;
5665 	}
5666 
5667 	mutex_exit(&ldcp->lock);
5668 
5669 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n",
5670 	    ldcp->id, *size);
5671 
5672 	return (0);
5673 }
5674 
5675 /*
5676  * Map an exported memory segment into the local address space. If the
5677  * memory range was exported for direct map access, a HV call is made
5678  * to allocate a RA range. If the map is done via a shadow copy, local
5679  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
5680  * the mapping is a direct map then the RA is returned in 'raddr'.
5681  */
5682 int
5683 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
5684     uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr)
5685 {
5686 	int		i, j, idx, rv, retries;
5687 	ldc_chan_t 	*ldcp;
5688 	ldc_mhdl_t	*mhdl;
5689 	ldc_memseg_t	*memseg;
5690 	caddr_t		tmpaddr;
5691 	uint64_t	map_perm = perm;
5692 	uint64_t	pg_size, pg_shift, pg_size_code, pg_mask;
5693 	uint64_t	exp_size = 0, base_off, map_size, npages;
5694 	uint64_t	cookie_addr, cookie_off, cookie_size;
5695 	tte_t		ldc_tte;
5696 
5697 	if (mhandle == NULL) {
5698 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
5699 		return (EINVAL);
5700 	}
5701 	mhdl = (ldc_mhdl_t *)mhandle;
5702 
5703 	mutex_enter(&mhdl->lock);
5704 
5705 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
5706 	    mhdl->memseg != NULL) {
5707 		DWARN(DBG_ALL_LDCS,
5708 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
5709 		mutex_exit(&mhdl->lock);
5710 		return (EINVAL);
5711 	}
5712 
5713 	ldcp = mhdl->ldcp;
5714 
5715 	mutex_enter(&ldcp->lock);
5716 
5717 	if (ldcp->tstate != TS_UP) {
5718 		DWARN(ldcp->id,
5719 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
5720 		    ldcp->id);
5721 		mutex_exit(&ldcp->lock);
5722 		mutex_exit(&mhdl->lock);
5723 		return (ECONNRESET);
5724 	}
5725 
5726 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5727 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
5728 		mutex_exit(&ldcp->lock);
5729 		mutex_exit(&mhdl->lock);
5730 		return (EINVAL);
5731 	}
5732 
5733 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
5734 	    ldcp->id, cookie->addr, cookie->size);
5735 
5736 	/* FUTURE: get the page size, pgsz code, and shift */
5737 	pg_size = MMU_PAGESIZE;
5738 	pg_size_code = page_szc(pg_size);
5739 	pg_shift = page_get_shift(pg_size_code);
5740 	pg_mask = ~(pg_size - 1);
5741 
5742 	/* calculate the number of pages in the exported cookie */
5743 	base_off = cookie[0].addr & (pg_size - 1);
5744 	for (idx = 0; idx < ccount; idx++)
5745 		exp_size += cookie[idx].size;
5746 	map_size = P2ROUNDUP((exp_size + base_off), pg_size);
5747 	npages = (map_size >> pg_shift);
5748 
5749 	/* Allocate memseg structure */
5750 	memseg = mhdl->memseg =
5751 	    kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
5752 
5753 	/* Allocate memory to store all pages and cookies */
5754 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
5755 	memseg->cookies =
5756 	    kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
5757 
5758 	D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx,"
5759 	    "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages);
5760 
5761 	/*
5762 	 * Check if direct map over shared memory is enabled, if not change
5763 	 * the mapping type to SHADOW_MAP.
5764 	 */
5765 	if (ldc_shmem_enabled == 0)
5766 		mtype = LDC_SHADOW_MAP;
5767 
5768 	/*
5769 	 * Check to see if the client is requesting direct or shadow map
5770 	 * If direct map is requested, try to map remote memory first,
5771 	 * and if that fails, revert to shadow map
5772 	 */
5773 	if (mtype == LDC_DIRECT_MAP) {
5774 
5775 		/* Allocate kernel virtual space for mapping */
5776 		memseg->vaddr = vmem_xalloc(heap_arena, map_size,
5777 		    pg_size, 0, 0, NULL, NULL, VM_NOSLEEP);
5778 		if (memseg->vaddr == NULL) {
5779 			cmn_err(CE_WARN,
5780 			    "ldc_mem_map: (0x%lx) memory map failed\n",
5781 			    ldcp->id);
5782 			kmem_free(memseg->cookies,
5783 			    (sizeof (ldc_mem_cookie_t) * ccount));
5784 			kmem_free(memseg->pages,
5785 			    (sizeof (ldc_page_t) * npages));
5786 			kmem_cache_free(ldcssp->memseg_cache, memseg);
5787 
5788 			mutex_exit(&ldcp->lock);
5789 			mutex_exit(&mhdl->lock);
5790 			return (ENOMEM);
5791 		}
5792 
5793 		/* Unload previous mapping */
5794 		hat_unload(kas.a_hat, memseg->vaddr, map_size,
5795 		    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5796 
5797 		/* for each cookie passed in - map into address space */
5798 		idx = 0;
5799 		cookie_size = 0;
5800 		tmpaddr = memseg->vaddr;
5801 
5802 		for (i = 0; i < npages; i++) {
5803 
5804 			if (cookie_size == 0) {
5805 				ASSERT(idx < ccount);
5806 				cookie_addr = cookie[idx].addr & pg_mask;
5807 				cookie_off = cookie[idx].addr & (pg_size - 1);
5808 				cookie_size =
5809 				    P2ROUNDUP((cookie_off + cookie[idx].size),
5810 				    pg_size);
5811 				idx++;
5812 			}
5813 
5814 			D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping "
5815 			    "cookie 0x%llx, bal=0x%llx\n", ldcp->id,
5816 			    cookie_addr, cookie_size);
5817 
5818 			/* map the cookie into address space */
5819 			for (retries = 0; retries < ldc_max_retries;
5820 			    retries++) {
5821 
5822 				rv = hv_ldc_mapin(ldcp->id, cookie_addr,
5823 				    &memseg->pages[i].raddr, &map_perm);
5824 				if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY)
5825 					break;
5826 
5827 				drv_usecwait(ldc_delay);
5828 			}
5829 
5830 			if (rv || memseg->pages[i].raddr == 0) {
5831 				DWARN(ldcp->id,
5832 				    "ldc_mem_map: (0x%llx) hv mapin err %d\n",
5833 				    ldcp->id, rv);
5834 
5835 				/* remove previous mapins */
5836 				hat_unload(kas.a_hat, memseg->vaddr, map_size,
5837 				    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5838 				for (j = 0; j < i; j++) {
5839 					rv = hv_ldc_unmap(
5840 					    memseg->pages[j].raddr);
5841 					if (rv) {
5842 						DWARN(ldcp->id,
5843 						    "ldc_mem_map: (0x%llx) "
5844 						    "cannot unmap ra=0x%llx\n",
5845 						    ldcp->id,
5846 						    memseg->pages[j].raddr);
5847 					}
5848 				}
5849 
5850 				/* free kernel virtual space */
5851 				vmem_free(heap_arena, (void *)memseg->vaddr,
5852 				    map_size);
5853 
5854 				/* direct map failed - revert to shadow map */
5855 				mtype = LDC_SHADOW_MAP;
5856 				break;
5857 
5858 			} else {
5859 
5860 				D1(ldcp->id,
5861 				    "ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
5862 				    "0x%llx, cookie=0x%llx, perm=0x%llx\n",
5863 				    ldcp->id, tmpaddr, memseg->pages[i].raddr,
5864 				    cookie_addr, perm);
5865 
5866 				/*
5867 				 * NOTE: Calling hat_devload directly, causes it
5868 				 * to look for page_t using the pfn. Since this
5869 				 * addr is greater than the memlist, it treates
5870 				 * it as non-memory
5871 				 */
5872 				sfmmu_memtte(&ldc_tte,
5873 				    (pfn_t)(memseg->pages[i].raddr >> pg_shift),
5874 				    PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K);
5875 
5876 				D1(ldcp->id,
5877 				    "ldc_mem_map: (0x%llx) ra 0x%llx -> "
5878 				    "tte 0x%llx\n", ldcp->id,
5879 				    memseg->pages[i].raddr, ldc_tte);
5880 
5881 				sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr,
5882 				    NULL, HAT_LOAD_LOCK);
5883 
5884 				cookie_size -= pg_size;
5885 				cookie_addr += pg_size;
5886 				tmpaddr += pg_size;
5887 			}
5888 		}
5889 	}
5890 
5891 	if (mtype == LDC_SHADOW_MAP) {
5892 		if (*vaddr == NULL) {
5893 			memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP);
5894 			mhdl->myshadow = B_TRUE;
5895 
5896 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
5897 			    "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr);
5898 		} else {
5899 			/*
5900 			 * Use client supplied memory for memseg->vaddr
5901 			 * WARNING: assuming that client mem is >= exp_size
5902 			 */
5903 			memseg->vaddr = *vaddr;
5904 		}
5905 
5906 		/* Save all page and cookie information */
5907 		for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) {
5908 			memseg->pages[i].raddr = va_to_pa(tmpaddr);
5909 			memseg->pages[i].size = pg_size;
5910 			tmpaddr += pg_size;
5911 		}
5912 
5913 	}
5914 
5915 	/* save all cookies */
5916 	bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t));
5917 
5918 	/* update memseg_t */
5919 	memseg->raddr = memseg->pages[0].raddr;
5920 	memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size;
5921 	memseg->npages = npages;
5922 	memseg->ncookies = ccount;
5923 	memseg->next_cookie = 0;
5924 
5925 	/* memory handle = mapped */
5926 	mhdl->mtype = mtype;
5927 	mhdl->perm = perm;
5928 	mhdl->status = LDC_MAPPED;
5929 
5930 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5931 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5932 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5933 	    memseg->npages, memseg->ncookies);
5934 
5935 	if (mtype == LDC_SHADOW_MAP)
5936 		base_off = 0;
5937 	if (raddr)
5938 		*raddr = (caddr_t)(memseg->raddr | base_off);
5939 	if (vaddr)
5940 		*vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off);
5941 
5942 	mutex_exit(&ldcp->lock);
5943 	mutex_exit(&mhdl->lock);
5944 	return (0);
5945 }
5946 
5947 /*
5948  * Unmap a memory segment. Free shadow memory (if any).
5949  */
5950 int
5951 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5952 {
5953 	int		i, rv;
5954 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5955 	ldc_chan_t 	*ldcp;
5956 	ldc_memseg_t	*memseg;
5957 
5958 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5959 		DWARN(DBG_ALL_LDCS,
5960 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5961 		    mhandle);
5962 		return (EINVAL);
5963 	}
5964 
5965 	mutex_enter(&mhdl->lock);
5966 
5967 	ldcp = mhdl->ldcp;
5968 	memseg = mhdl->memseg;
5969 
5970 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5971 	    ldcp->id, mhdl);
5972 
5973 	/* if we allocated shadow memory - free it */
5974 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5975 		kmem_free(memseg->vaddr, memseg->size);
5976 	} else if (mhdl->mtype == LDC_DIRECT_MAP) {
5977 
5978 		/* unmap in the case of DIRECT_MAP */
5979 		hat_unload(kas.a_hat, memseg->vaddr, memseg->size,
5980 		    HAT_UNLOAD_UNLOCK);
5981 
5982 		for (i = 0; i < memseg->npages; i++) {
5983 			rv = hv_ldc_unmap(memseg->pages[i].raddr);
5984 			if (rv) {
5985 				cmn_err(CE_WARN,
5986 				    "ldc_mem_map: (0x%lx) hv unmap err %d\n",
5987 				    ldcp->id, rv);
5988 			}
5989 		}
5990 
5991 		vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size);
5992 	}
5993 
5994 	/* free the allocated memseg and page structures */
5995 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5996 	kmem_free(memseg->cookies,
5997 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5998 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5999 
6000 	/* uninitialize the memory handle */
6001 	mhdl->memseg = NULL;
6002 	mhdl->status = LDC_UNBOUND;
6003 
6004 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
6005 	    ldcp->id, mhdl);
6006 
6007 	mutex_exit(&mhdl->lock);
6008 	return (0);
6009 }
6010 
6011 /*
6012  * Internal entry point for LDC mapped memory entry consistency
6013  * semantics. Acquire copies the contents of the remote memory
6014  * into the local shadow copy. The release operation copies the local
6015  * contents into the remote memory. The offset and size specify the
6016  * bounds for the memory range being synchronized.
6017  */
6018 static int
6019 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
6020     uint64_t offset, size_t size)
6021 {
6022 	int 		err;
6023 	ldc_mhdl_t	*mhdl;
6024 	ldc_chan_t	*ldcp;
6025 	ldc_memseg_t	*memseg;
6026 	caddr_t		local_vaddr;
6027 	size_t		copy_size;
6028 
6029 	if (mhandle == NULL) {
6030 		DWARN(DBG_ALL_LDCS,
6031 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
6032 		return (EINVAL);
6033 	}
6034 	mhdl = (ldc_mhdl_t *)mhandle;
6035 
6036 	mutex_enter(&mhdl->lock);
6037 
6038 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
6039 		DWARN(DBG_ALL_LDCS,
6040 		    "i_ldc_mem_acquire_release: not mapped memory\n");
6041 		mutex_exit(&mhdl->lock);
6042 		return (EINVAL);
6043 	}
6044 
6045 	/* do nothing for direct map */
6046 	if (mhdl->mtype == LDC_DIRECT_MAP) {
6047 		mutex_exit(&mhdl->lock);
6048 		return (0);
6049 	}
6050 
6051 	/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
6052 	if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) ||
6053 	    (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) {
6054 		mutex_exit(&mhdl->lock);
6055 		return (0);
6056 	}
6057 
6058 	if (offset >= mhdl->memseg->size ||
6059 	    (offset + size) > mhdl->memseg->size) {
6060 		DWARN(DBG_ALL_LDCS,
6061 		    "i_ldc_mem_acquire_release: memory out of range\n");
6062 		mutex_exit(&mhdl->lock);
6063 		return (EINVAL);
6064 	}
6065 
6066 	/* get the channel handle and memory segment */
6067 	ldcp = mhdl->ldcp;
6068 	memseg = mhdl->memseg;
6069 
6070 	if (mhdl->mtype == LDC_SHADOW_MAP) {
6071 
6072 		local_vaddr = memseg->vaddr + offset;
6073 		copy_size = size;
6074 
6075 		/* copy to/from remote from/to local memory */
6076 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
6077 		    &copy_size, memseg->cookies, memseg->ncookies,
6078 		    direction);
6079 		if (err || copy_size != size) {
6080 			DWARN(ldcp->id,
6081 			    "i_ldc_mem_acquire_release: copy failed\n");
6082 			mutex_exit(&mhdl->lock);
6083 			return (err);
6084 		}
6085 	}
6086 
6087 	mutex_exit(&mhdl->lock);
6088 
6089 	return (0);
6090 }
6091 
6092 /*
6093  * Ensure that the contents in the remote memory seg are consistent
6094  * with the contents if of local segment
6095  */
6096 int
6097 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
6098 {
6099 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
6100 }
6101 
6102 
6103 /*
6104  * Ensure that the contents in the local memory seg are consistent
6105  * with the contents if of remote segment
6106  */
6107 int
6108 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
6109 {
6110 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
6111 }
6112 
6113 /*
6114  * Allocate a descriptor ring. The size of each each descriptor
6115  * must be 8-byte aligned and the entire ring should be a multiple
6116  * of MMU_PAGESIZE.
6117  */
6118 int
6119 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
6120 {
6121 	ldc_dring_t *dringp;
6122 	size_t size = (dsize * len);
6123 
6124 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
6125 	    len, dsize);
6126 
6127 	if (dhandle == NULL) {
6128 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
6129 		return (EINVAL);
6130 	}
6131 
6132 	if (len == 0) {
6133 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
6134 		return (EINVAL);
6135 	}
6136 
6137 	/* descriptor size should be 8-byte aligned */
6138 	if (dsize == 0 || (dsize & 0x7)) {
6139 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
6140 		return (EINVAL);
6141 	}
6142 
6143 	*dhandle = 0;
6144 
6145 	/* Allocate a desc ring structure */
6146 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
6147 
6148 	/* Initialize dring */
6149 	dringp->length = len;
6150 	dringp->dsize = dsize;
6151 
6152 	/* round off to multiple of pagesize */
6153 	dringp->size = (size & MMU_PAGEMASK);
6154 	if (size & MMU_PAGEOFFSET)
6155 		dringp->size += MMU_PAGESIZE;
6156 
6157 	dringp->status = LDC_UNBOUND;
6158 
6159 	/* allocate descriptor ring memory */
6160 	dringp->base = kmem_zalloc(dringp->size, KM_SLEEP);
6161 
6162 	/* initialize the desc ring lock */
6163 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
6164 
6165 	/* Add descriptor ring to the head of global list */
6166 	mutex_enter(&ldcssp->lock);
6167 	dringp->next = ldcssp->dring_list;
6168 	ldcssp->dring_list = dringp;
6169 	mutex_exit(&ldcssp->lock);
6170 
6171 	*dhandle = (ldc_dring_handle_t)dringp;
6172 
6173 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
6174 
6175 	return (0);
6176 }
6177 
6178 
6179 /*
6180  * Destroy a descriptor ring.
6181  */
6182 int
6183 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
6184 {
6185 	ldc_dring_t *dringp;
6186 	ldc_dring_t *tmp_dringp;
6187 
6188 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
6189 
6190 	if (dhandle == NULL) {
6191 		DWARN(DBG_ALL_LDCS,
6192 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
6193 		return (EINVAL);
6194 	}
6195 	dringp = (ldc_dring_t *)dhandle;
6196 
6197 	if (dringp->status == LDC_BOUND) {
6198 		DWARN(DBG_ALL_LDCS,
6199 		    "ldc_mem_dring_destroy: desc ring is bound\n");
6200 		return (EACCES);
6201 	}
6202 
6203 	mutex_enter(&dringp->lock);
6204 	mutex_enter(&ldcssp->lock);
6205 
6206 	/* remove from linked list - if not bound */
6207 	tmp_dringp = ldcssp->dring_list;
6208 	if (tmp_dringp == dringp) {
6209 		ldcssp->dring_list = dringp->next;
6210 		dringp->next = NULL;
6211 
6212 	} else {
6213 		while (tmp_dringp != NULL) {
6214 			if (tmp_dringp->next == dringp) {
6215 				tmp_dringp->next = dringp->next;
6216 				dringp->next = NULL;
6217 				break;
6218 			}
6219 			tmp_dringp = tmp_dringp->next;
6220 		}
6221 		if (tmp_dringp == NULL) {
6222 			DWARN(DBG_ALL_LDCS,
6223 			    "ldc_mem_dring_destroy: invalid descriptor\n");
6224 			mutex_exit(&ldcssp->lock);
6225 			mutex_exit(&dringp->lock);
6226 			return (EINVAL);
6227 		}
6228 	}
6229 
6230 	mutex_exit(&ldcssp->lock);
6231 
6232 	/* free the descriptor ring */
6233 	kmem_free(dringp->base, dringp->size);
6234 
6235 	mutex_exit(&dringp->lock);
6236 
6237 	/* destroy dring lock */
6238 	mutex_destroy(&dringp->lock);
6239 
6240 	/* free desc ring object */
6241 	kmem_free(dringp, sizeof (ldc_dring_t));
6242 
6243 	return (0);
6244 }
6245 
6246 /*
6247  * Bind a previously allocated dring to a channel. The channel should
6248  * be OPEN in order to bind the ring to the channel. Returns back a
6249  * descriptor ring cookie. The descriptor ring is exported for remote
6250  * access by the client at the other end of the channel. An entry for
6251  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
6252  */
6253 int
6254 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
6255     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
6256 {
6257 	int		err;
6258 	ldc_chan_t 	*ldcp;
6259 	ldc_dring_t	*dringp;
6260 	ldc_mem_handle_t mhandle;
6261 
6262 	/* check to see if channel is initalized */
6263 	if (handle == NULL) {
6264 		DWARN(DBG_ALL_LDCS,
6265 		    "ldc_mem_dring_bind: invalid channel handle\n");
6266 		return (EINVAL);
6267 	}
6268 	ldcp = (ldc_chan_t *)handle;
6269 
6270 	if (dhandle == NULL) {
6271 		DWARN(DBG_ALL_LDCS,
6272 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
6273 		return (EINVAL);
6274 	}
6275 	dringp = (ldc_dring_t *)dhandle;
6276 
6277 	if (cookie == NULL) {
6278 		DWARN(ldcp->id,
6279 		    "ldc_mem_dring_bind: invalid cookie arg\n");
6280 		return (EINVAL);
6281 	}
6282 
6283 	mutex_enter(&dringp->lock);
6284 
6285 	if (dringp->status == LDC_BOUND) {
6286 		DWARN(DBG_ALL_LDCS,
6287 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
6288 		    ldcp->id);
6289 		mutex_exit(&dringp->lock);
6290 		return (EINVAL);
6291 	}
6292 
6293 	if ((perm & LDC_MEM_RW) == 0) {
6294 		DWARN(DBG_ALL_LDCS,
6295 		    "ldc_mem_dring_bind: invalid permissions\n");
6296 		mutex_exit(&dringp->lock);
6297 		return (EINVAL);
6298 	}
6299 
6300 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
6301 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
6302 		mutex_exit(&dringp->lock);
6303 		return (EINVAL);
6304 	}
6305 
6306 	dringp->ldcp = ldcp;
6307 
6308 	/* create an memory handle */
6309 	err = ldc_mem_alloc_handle(handle, &mhandle);
6310 	if (err || mhandle == NULL) {
6311 		DWARN(DBG_ALL_LDCS,
6312 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
6313 		    ldcp->id);
6314 		mutex_exit(&dringp->lock);
6315 		return (err);
6316 	}
6317 	dringp->mhdl = mhandle;
6318 
6319 	/* bind the descriptor ring to channel */
6320 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
6321 	    mtype, perm, cookie, ccount);
6322 	if (err) {
6323 		DWARN(ldcp->id,
6324 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
6325 		    ldcp->id);
6326 		mutex_exit(&dringp->lock);
6327 		return (err);
6328 	}
6329 
6330 	/*
6331 	 * For now return error if we get more than one cookie
6332 	 * FUTURE: Return multiple cookies ..
6333 	 */
6334 	if (*ccount > 1) {
6335 		(void) ldc_mem_unbind_handle(mhandle);
6336 		(void) ldc_mem_free_handle(mhandle);
6337 
6338 		dringp->ldcp = NULL;
6339 		dringp->mhdl = NULL;
6340 		*ccount = 0;
6341 
6342 		mutex_exit(&dringp->lock);
6343 		return (EAGAIN);
6344 	}
6345 
6346 	/* Add descriptor ring to channel's exported dring list */
6347 	mutex_enter(&ldcp->exp_dlist_lock);
6348 	dringp->ch_next = ldcp->exp_dring_list;
6349 	ldcp->exp_dring_list = dringp;
6350 	mutex_exit(&ldcp->exp_dlist_lock);
6351 
6352 	dringp->status = LDC_BOUND;
6353 
6354 	mutex_exit(&dringp->lock);
6355 
6356 	return (0);
6357 }
6358 
6359 /*
6360  * Return the next cookie associated with the specified dring handle
6361  */
6362 int
6363 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
6364 {
6365 	int		rv = 0;
6366 	ldc_dring_t 	*dringp;
6367 	ldc_chan_t	*ldcp;
6368 
6369 	if (dhandle == NULL) {
6370 		DWARN(DBG_ALL_LDCS,
6371 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
6372 		return (EINVAL);
6373 	}
6374 	dringp = (ldc_dring_t *)dhandle;
6375 	mutex_enter(&dringp->lock);
6376 
6377 	if (dringp->status != LDC_BOUND) {
6378 		DWARN(DBG_ALL_LDCS,
6379 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
6380 		    "is not bound\n", dringp);
6381 		mutex_exit(&dringp->lock);
6382 		return (EINVAL);
6383 	}
6384 
6385 	ldcp = dringp->ldcp;
6386 
6387 	if (cookie == NULL) {
6388 		DWARN(ldcp->id,
6389 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
6390 		    ldcp->id);
6391 		mutex_exit(&dringp->lock);
6392 		return (EINVAL);
6393 	}
6394 
6395 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
6396 	mutex_exit(&dringp->lock);
6397 
6398 	return (rv);
6399 }
6400 /*
6401  * Unbind a previously bound dring from a channel.
6402  */
6403 int
6404 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
6405 {
6406 	ldc_dring_t 	*dringp;
6407 	ldc_dring_t	*tmp_dringp;
6408 	ldc_chan_t	*ldcp;
6409 
6410 	if (dhandle == NULL) {
6411 		DWARN(DBG_ALL_LDCS,
6412 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
6413 		return (EINVAL);
6414 	}
6415 	dringp = (ldc_dring_t *)dhandle;
6416 
6417 	mutex_enter(&dringp->lock);
6418 
6419 	if (dringp->status == LDC_UNBOUND) {
6420 		DWARN(DBG_ALL_LDCS,
6421 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
6422 		    dringp);
6423 		mutex_exit(&dringp->lock);
6424 		return (EINVAL);
6425 	}
6426 	ldcp = dringp->ldcp;
6427 
6428 	mutex_enter(&ldcp->exp_dlist_lock);
6429 
6430 	tmp_dringp = ldcp->exp_dring_list;
6431 	if (tmp_dringp == dringp) {
6432 		ldcp->exp_dring_list = dringp->ch_next;
6433 		dringp->ch_next = NULL;
6434 
6435 	} else {
6436 		while (tmp_dringp != NULL) {
6437 			if (tmp_dringp->ch_next == dringp) {
6438 				tmp_dringp->ch_next = dringp->ch_next;
6439 				dringp->ch_next = NULL;
6440 				break;
6441 			}
6442 			tmp_dringp = tmp_dringp->ch_next;
6443 		}
6444 		if (tmp_dringp == NULL) {
6445 			DWARN(DBG_ALL_LDCS,
6446 			    "ldc_mem_dring_unbind: invalid descriptor\n");
6447 			mutex_exit(&ldcp->exp_dlist_lock);
6448 			mutex_exit(&dringp->lock);
6449 			return (EINVAL);
6450 		}
6451 	}
6452 
6453 	mutex_exit(&ldcp->exp_dlist_lock);
6454 
6455 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
6456 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6457 
6458 	dringp->ldcp = NULL;
6459 	dringp->mhdl = NULL;
6460 	dringp->status = LDC_UNBOUND;
6461 
6462 	mutex_exit(&dringp->lock);
6463 
6464 	return (0);
6465 }
6466 
6467 /*
6468  * Get information about the dring. The base address of the descriptor
6469  * ring along with the type and permission are returned back.
6470  */
6471 int
6472 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
6473 {
6474 	ldc_dring_t	*dringp;
6475 	int		rv;
6476 
6477 	if (dhandle == NULL) {
6478 		DWARN(DBG_ALL_LDCS,
6479 		    "ldc_mem_dring_info: invalid desc ring handle\n");
6480 		return (EINVAL);
6481 	}
6482 	dringp = (ldc_dring_t *)dhandle;
6483 
6484 	mutex_enter(&dringp->lock);
6485 
6486 	if (dringp->mhdl) {
6487 		rv = ldc_mem_info(dringp->mhdl, minfo);
6488 		if (rv) {
6489 			DWARN(DBG_ALL_LDCS,
6490 			    "ldc_mem_dring_info: error reading mem info\n");
6491 			mutex_exit(&dringp->lock);
6492 			return (rv);
6493 		}
6494 	} else {
6495 		minfo->vaddr = dringp->base;
6496 		minfo->raddr = NULL;
6497 		minfo->status = dringp->status;
6498 	}
6499 
6500 	mutex_exit(&dringp->lock);
6501 
6502 	return (0);
6503 }
6504 
6505 /*
6506  * Map an exported descriptor ring into the local address space. If the
6507  * descriptor ring was exported for direct map access, a HV call is made
6508  * to allocate a RA range. If the map is done via a shadow copy, local
6509  * shadow memory is allocated.
6510  */
6511 int
6512 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
6513     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
6514     ldc_dring_handle_t *dhandle)
6515 {
6516 	int		err;
6517 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
6518 	ldc_mem_handle_t mhandle;
6519 	ldc_dring_t	*dringp;
6520 	size_t		dring_size;
6521 
6522 	if (dhandle == NULL) {
6523 		DWARN(DBG_ALL_LDCS,
6524 		    "ldc_mem_dring_map: invalid dhandle\n");
6525 		return (EINVAL);
6526 	}
6527 
6528 	/* check to see if channel is initalized */
6529 	if (handle == NULL) {
6530 		DWARN(DBG_ALL_LDCS,
6531 		    "ldc_mem_dring_map: invalid channel handle\n");
6532 		return (EINVAL);
6533 	}
6534 	ldcp = (ldc_chan_t *)handle;
6535 
6536 	if (cookie == NULL) {
6537 		DWARN(ldcp->id,
6538 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
6539 		    ldcp->id);
6540 		return (EINVAL);
6541 	}
6542 
6543 	/* FUTURE: For now we support only one cookie per dring */
6544 	ASSERT(ccount == 1);
6545 
6546 	if (cookie->size < (dsize * len)) {
6547 		DWARN(ldcp->id,
6548 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
6549 		    ldcp->id);
6550 		return (EINVAL);
6551 	}
6552 
6553 	*dhandle = 0;
6554 
6555 	/* Allocate an dring structure */
6556 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
6557 
6558 	D1(ldcp->id,
6559 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
6560 	    mtype, len, dsize, cookie->addr, cookie->size);
6561 
6562 	/* Initialize dring */
6563 	dringp->length = len;
6564 	dringp->dsize = dsize;
6565 
6566 	/* round of to multiple of page size */
6567 	dring_size = len * dsize;
6568 	dringp->size = (dring_size & MMU_PAGEMASK);
6569 	if (dring_size & MMU_PAGEOFFSET)
6570 		dringp->size += MMU_PAGESIZE;
6571 
6572 	dringp->ldcp = ldcp;
6573 
6574 	/* create an memory handle */
6575 	err = ldc_mem_alloc_handle(handle, &mhandle);
6576 	if (err || mhandle == NULL) {
6577 		DWARN(DBG_ALL_LDCS,
6578 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
6579 		    err);
6580 		kmem_free(dringp, sizeof (ldc_dring_t));
6581 		return (ENOMEM);
6582 	}
6583 
6584 	dringp->mhdl = mhandle;
6585 	dringp->base = NULL;
6586 
6587 	/* map the dring into local memory */
6588 	err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW,
6589 	    &(dringp->base), NULL);
6590 	if (err || dringp->base == NULL) {
6591 		cmn_err(CE_WARN,
6592 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
6593 		(void) ldc_mem_free_handle(mhandle);
6594 		kmem_free(dringp, sizeof (ldc_dring_t));
6595 		return (ENOMEM);
6596 	}
6597 
6598 	/* initialize the desc ring lock */
6599 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
6600 
6601 	/* Add descriptor ring to channel's imported dring list */
6602 	mutex_enter(&ldcp->imp_dlist_lock);
6603 	dringp->ch_next = ldcp->imp_dring_list;
6604 	ldcp->imp_dring_list = dringp;
6605 	mutex_exit(&ldcp->imp_dlist_lock);
6606 
6607 	dringp->status = LDC_MAPPED;
6608 
6609 	*dhandle = (ldc_dring_handle_t)dringp;
6610 
6611 	return (0);
6612 }
6613 
6614 /*
6615  * Unmap a descriptor ring. Free shadow memory (if any).
6616  */
6617 int
6618 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
6619 {
6620 	ldc_dring_t 	*dringp;
6621 	ldc_dring_t	*tmp_dringp;
6622 	ldc_chan_t	*ldcp;
6623 
6624 	if (dhandle == NULL) {
6625 		DWARN(DBG_ALL_LDCS,
6626 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
6627 		return (EINVAL);
6628 	}
6629 	dringp = (ldc_dring_t *)dhandle;
6630 
6631 	if (dringp->status != LDC_MAPPED) {
6632 		DWARN(DBG_ALL_LDCS,
6633 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
6634 		return (EINVAL);
6635 	}
6636 
6637 	mutex_enter(&dringp->lock);
6638 
6639 	ldcp = dringp->ldcp;
6640 
6641 	mutex_enter(&ldcp->imp_dlist_lock);
6642 
6643 	/* find and unlink the desc ring from channel import list */
6644 	tmp_dringp = ldcp->imp_dring_list;
6645 	if (tmp_dringp == dringp) {
6646 		ldcp->imp_dring_list = dringp->ch_next;
6647 		dringp->ch_next = NULL;
6648 
6649 	} else {
6650 		while (tmp_dringp != NULL) {
6651 			if (tmp_dringp->ch_next == dringp) {
6652 				tmp_dringp->ch_next = dringp->ch_next;
6653 				dringp->ch_next = NULL;
6654 				break;
6655 			}
6656 			tmp_dringp = tmp_dringp->ch_next;
6657 		}
6658 		if (tmp_dringp == NULL) {
6659 			DWARN(DBG_ALL_LDCS,
6660 			    "ldc_mem_dring_unmap: invalid descriptor\n");
6661 			mutex_exit(&ldcp->imp_dlist_lock);
6662 			mutex_exit(&dringp->lock);
6663 			return (EINVAL);
6664 		}
6665 	}
6666 
6667 	mutex_exit(&ldcp->imp_dlist_lock);
6668 
6669 	/* do a LDC memory handle unmap and free */
6670 	(void) ldc_mem_unmap(dringp->mhdl);
6671 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6672 
6673 	dringp->status = 0;
6674 	dringp->ldcp = NULL;
6675 
6676 	mutex_exit(&dringp->lock);
6677 
6678 	/* destroy dring lock */
6679 	mutex_destroy(&dringp->lock);
6680 
6681 	/* free desc ring object */
6682 	kmem_free(dringp, sizeof (ldc_dring_t));
6683 
6684 	return (0);
6685 }
6686 
6687 /*
6688  * Internal entry point for descriptor ring access entry consistency
6689  * semantics. Acquire copies the contents of the remote descriptor ring
6690  * into the local shadow copy. The release operation copies the local
6691  * contents into the remote dring. The start and end locations specify
6692  * bounds for the entries being synchronized.
6693  */
6694 static int
6695 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
6696     uint8_t direction, uint64_t start, uint64_t end)
6697 {
6698 	int 			err;
6699 	ldc_dring_t		*dringp;
6700 	ldc_chan_t		*ldcp;
6701 	uint64_t		soff;
6702 	size_t			copy_size;
6703 
6704 	if (dhandle == NULL) {
6705 		DWARN(DBG_ALL_LDCS,
6706 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
6707 		return (EINVAL);
6708 	}
6709 	dringp = (ldc_dring_t *)dhandle;
6710 	mutex_enter(&dringp->lock);
6711 
6712 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
6713 		DWARN(DBG_ALL_LDCS,
6714 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
6715 		mutex_exit(&dringp->lock);
6716 		return (EINVAL);
6717 	}
6718 
6719 	if (start >= dringp->length || end >= dringp->length) {
6720 		DWARN(DBG_ALL_LDCS,
6721 		    "i_ldc_dring_acquire_release: index out of range\n");
6722 		mutex_exit(&dringp->lock);
6723 		return (EINVAL);
6724 	}
6725 
6726 	/* get the channel handle */
6727 	ldcp = dringp->ldcp;
6728 
6729 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
6730 	    ((dringp->length - start) * dringp->dsize);
6731 
6732 	/* Calculate the relative offset for the first desc */
6733 	soff = (start * dringp->dsize);
6734 
6735 	/* copy to/from remote from/to local memory */
6736 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
6737 	    soff, copy_size);
6738 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6739 	    direction, soff, copy_size);
6740 	if (err) {
6741 		DWARN(ldcp->id,
6742 		    "i_ldc_dring_acquire_release: copy failed\n");
6743 		mutex_exit(&dringp->lock);
6744 		return (err);
6745 	}
6746 
6747 	/* do the balance */
6748 	if (start > end) {
6749 		copy_size = ((end + 1) * dringp->dsize);
6750 		soff = 0;
6751 
6752 		/* copy to/from remote from/to local memory */
6753 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
6754 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
6755 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6756 		    direction, soff, copy_size);
6757 		if (err) {
6758 			DWARN(ldcp->id,
6759 			    "i_ldc_dring_acquire_release: copy failed\n");
6760 			mutex_exit(&dringp->lock);
6761 			return (err);
6762 		}
6763 	}
6764 
6765 	mutex_exit(&dringp->lock);
6766 
6767 	return (0);
6768 }
6769 
6770 /*
6771  * Ensure that the contents in the local dring are consistent
6772  * with the contents if of remote dring
6773  */
6774 int
6775 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6776 {
6777 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
6778 }
6779 
6780 /*
6781  * Ensure that the contents in the remote dring are consistent
6782  * with the contents if of local dring
6783  */
6784 int
6785 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6786 {
6787 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
6788 }
6789 
6790 
6791 /* ------------------------------------------------------------------------- */
6792