xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision 32232bf4531ddb458488ba01c9df68685921ebf4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/mmu.h>
56 #include <sys/pte.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/hat_sfmmu.h>
60 #include <sys/vm_machparam.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/seg_kpm.h>
63 #include <sys/note.h>
64 #include <sys/ivintr.h>
65 #include <sys/hypervisor_api.h>
66 #include <sys/ldc.h>
67 #include <sys/ldc_impl.h>
68 #include <sys/cnex.h>
69 #include <sys/hsvc.h>
70 
71 /* Core internal functions */
72 static int i_ldc_h2v_error(int h_error);
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp);
75 static void i_ldc_reset_state(ldc_chan_t *ldcp);
76 static void i_ldc_reset(ldc_chan_t *ldcp);
77 
78 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
79 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
80 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
81 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
82     uint8_t ctrlmsg);
83 
84 /* Interrupt handling functions */
85 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
86 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
87 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
88 
89 /* Read method functions */
90 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
91 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
92 	size_t *sizep);
93 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
94 	size_t *sizep);
95 
96 /* Write method functions */
97 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
98 	size_t *sizep);
99 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
100 	size_t *sizep);
101 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
102 	size_t *sizep);
103 
104 /* Pkt processing internal functions */
105 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
106 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
107 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
108 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
109 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
110 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
111 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
112 
113 /* Memory synchronization internal functions */
114 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
115     uint8_t direction, uint64_t offset, size_t size);
116 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
117     uint8_t direction, uint64_t start, uint64_t end);
118 
119 /* LDC Version */
120 static ldc_ver_t ldc_versions[] = { {1, 0} };
121 
122 /* number of supported versions */
123 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
124 
125 /* Module State Pointer */
126 static ldc_soft_state_t *ldcssp;
127 
128 static struct modldrv md = {
129 	&mod_miscops,			/* This is a misc module */
130 	"sun4v LDC module v%I%",	/* Name of the module */
131 };
132 
133 static struct modlinkage ml = {
134 	MODREV_1,
135 	&md,
136 	NULL
137 };
138 
139 static uint64_t ldc_sup_minor;		/* Supported minor number */
140 static hsvc_info_t ldc_hsvc = {
141 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
142 };
143 
144 static uint64_t intr_sup_minor;		/* Supported minor number */
145 static hsvc_info_t intr_hsvc = {
146 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
147 };
148 
149 /*
150  * LDC framework supports mapping remote domain's memory
151  * either directly or via shadow memory pages. Default
152  * support is currently implemented via shadow copy.
153  * Direct map can be enabled by setting 'ldc_shmem_enabled'
154  */
155 int ldc_shmem_enabled = 0;
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * Pages exported for remote access over each channel is
173  * maintained in a table registered with the Hypervisor.
174  * The default number of entries in the table is set to
175  * 'ldc_mtbl_entries'.
176  */
177 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
178 
179 /*
180  * LDC retry count and delay - when the HV returns EWOULDBLOCK
181  * the operation is retried 'ldc_max_retries' times with a
182  * wait of 'ldc_delay' usecs between each retry.
183  */
184 int ldc_max_retries = LDC_MAX_RETRIES;
185 clock_t ldc_delay = LDC_DELAY;
186 
187 #ifdef DEBUG
188 
189 /*
190  * Print debug messages
191  *
192  * set ldcdbg to 0x7 for enabling all msgs
193  * 0x4 - Warnings
194  * 0x2 - All debug messages
195  * 0x1 - Minimal debug messages
196  *
197  * set ldcdbgchan to the channel number you want to debug
198  * setting it to -1 prints debug messages for all channels
199  * NOTE: ldcdbgchan has no effect on error messages
200  */
201 
202 #define	DBG_ALL_LDCS -1
203 
204 int ldcdbg = 0x0;
205 int64_t ldcdbgchan = DBG_ALL_LDCS;
206 
207 static void
208 ldcdebug(int64_t id, const char *fmt, ...)
209 {
210 	char buf[512];
211 	va_list ap;
212 
213 	/*
214 	 * Do not return if,
215 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
216 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
217 	 * debug channel = caller specified channel
218 	 */
219 	if ((id != DBG_ALL_LDCS) &&
220 	    (ldcdbgchan != DBG_ALL_LDCS) &&
221 	    (ldcdbgchan != id)) {
222 		return;
223 	}
224 
225 	va_start(ap, fmt);
226 	(void) vsprintf(buf, fmt, ap);
227 	va_end(ap);
228 
229 	cmn_err(CE_CONT, "?%s\n", buf);
230 }
231 
232 #define	D1		\
233 if (ldcdbg & 0x01)	\
234 	ldcdebug
235 
236 #define	D2		\
237 if (ldcdbg & 0x02)	\
238 	ldcdebug
239 
240 #define	DWARN		\
241 if (ldcdbg & 0x04)	\
242 	ldcdebug
243 
244 #define	DUMP_PAYLOAD(id, addr)						\
245 {									\
246 	char buf[65*3];							\
247 	int i;								\
248 	uint8_t *src = (uint8_t *)addr;					\
249 	for (i = 0; i < 64; i++, src++)					\
250 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
251 	(void) sprintf(&buf[i * 3], "|\n");				\
252 	D2((id), "payload: %s", buf);					\
253 }
254 
255 #define	DUMP_LDC_PKT(c, s, addr)					\
256 {									\
257 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
258 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
259 	if (msg->type == LDC_DATA) {                                    \
260 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
261 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
262 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
263 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
264 	    (msg->env & LDC_LEN_MASK));					\
265 	} else { 							\
266 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
267 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
268 	} 								\
269 }
270 
271 #else
272 
273 #define	DBG_ALL_LDCS -1
274 
275 #define	D1
276 #define	D2
277 #define	DWARN
278 
279 #define	DUMP_PAYLOAD(id, addr)
280 #define	DUMP_LDC_PKT(c, s, addr)
281 
282 #endif
283 
284 #define	ZERO_PKT(p)			\
285 	bzero((p), sizeof (ldc_msg_t));
286 
287 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
288 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
289 
290 
291 int
292 _init(void)
293 {
294 	int status;
295 
296 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
297 	if (status != 0) {
298 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
299 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
300 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
301 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
302 		return (-1);
303 	}
304 
305 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
306 	if (status != 0) {
307 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
308 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
309 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
310 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
311 		(void) hsvc_unregister(&ldc_hsvc);
312 		return (-1);
313 	}
314 
315 	/* allocate soft state structure */
316 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
317 
318 	/* Link the module into the system */
319 	status = mod_install(&ml);
320 	if (status != 0) {
321 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
322 		return (status);
323 	}
324 
325 	/* Initialize the LDC state structure */
326 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
327 
328 	mutex_enter(&ldcssp->lock);
329 
330 	/* Create a cache for memory handles */
331 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
332 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
333 	if (ldcssp->memhdl_cache == NULL) {
334 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
335 		mutex_exit(&ldcssp->lock);
336 		return (-1);
337 	}
338 
339 	/* Create cache for memory segment structures */
340 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
341 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
342 	if (ldcssp->memseg_cache == NULL) {
343 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
344 		mutex_exit(&ldcssp->lock);
345 		return (-1);
346 	}
347 
348 
349 	ldcssp->channel_count = 0;
350 	ldcssp->channels_open = 0;
351 	ldcssp->chan_list = NULL;
352 	ldcssp->dring_list = NULL;
353 
354 	mutex_exit(&ldcssp->lock);
355 
356 	return (0);
357 }
358 
359 int
360 _info(struct modinfo *modinfop)
361 {
362 	/* Report status of the dynamically loadable driver module */
363 	return (mod_info(&ml, modinfop));
364 }
365 
366 int
367 _fini(void)
368 {
369 	int 		rv, status;
370 	ldc_chan_t 	*ldcp;
371 	ldc_dring_t 	*dringp;
372 	ldc_mem_info_t 	minfo;
373 
374 	/* Unlink the driver module from the system */
375 	status = mod_remove(&ml);
376 	if (status) {
377 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
378 		return (EIO);
379 	}
380 
381 	/* close and finalize channels */
382 	ldcp = ldcssp->chan_list;
383 	while (ldcp != NULL) {
384 		(void) ldc_close((ldc_handle_t)ldcp);
385 		(void) ldc_fini((ldc_handle_t)ldcp);
386 
387 		ldcp = ldcp->next;
388 	}
389 
390 	/* Free descriptor rings */
391 	dringp = ldcssp->dring_list;
392 	while (dringp != NULL) {
393 		dringp = dringp->next;
394 
395 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
396 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
397 			if (minfo.status == LDC_BOUND) {
398 				(void) ldc_mem_dring_unbind(
399 						(ldc_dring_handle_t)dringp);
400 			}
401 			if (minfo.status == LDC_MAPPED) {
402 				(void) ldc_mem_dring_unmap(
403 						(ldc_dring_handle_t)dringp);
404 			}
405 		}
406 
407 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
408 	}
409 	ldcssp->dring_list = NULL;
410 
411 	/* Destroy kmem caches */
412 	kmem_cache_destroy(ldcssp->memhdl_cache);
413 	kmem_cache_destroy(ldcssp->memseg_cache);
414 
415 	/*
416 	 * We have successfully "removed" the driver.
417 	 * Destroying soft states
418 	 */
419 	mutex_destroy(&ldcssp->lock);
420 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
421 
422 	(void) hsvc_unregister(&ldc_hsvc);
423 	(void) hsvc_unregister(&intr_hsvc);
424 
425 	return (status);
426 }
427 
428 /* -------------------------------------------------------------------------- */
429 
430 /*
431  * LDC Link Layer Internal Functions
432  */
433 
434 /*
435  * Translate HV Errors to sun4v error codes
436  */
437 static int
438 i_ldc_h2v_error(int h_error)
439 {
440 	switch (h_error) {
441 
442 	case	H_EOK:
443 		return (0);
444 
445 	case	H_ENORADDR:
446 		return (EFAULT);
447 
448 	case	H_EBADPGSZ:
449 	case	H_EINVAL:
450 		return (EINVAL);
451 
452 	case	H_EWOULDBLOCK:
453 		return (EWOULDBLOCK);
454 
455 	case	H_ENOACCESS:
456 	case	H_ENOMAP:
457 		return (EACCES);
458 
459 	case	H_EIO:
460 	case	H_ECPUERROR:
461 		return (EIO);
462 
463 	case	H_ENOTSUPPORTED:
464 		return (ENOTSUP);
465 
466 	case 	H_ETOOMANY:
467 		return (ENOSPC);
468 
469 	case	H_ECHANNEL:
470 		return (ECHRNG);
471 	default:
472 		break;
473 	}
474 
475 	return (EIO);
476 }
477 
478 /*
479  * Reconfigure the transmit queue
480  */
481 static int
482 i_ldc_txq_reconf(ldc_chan_t *ldcp)
483 {
484 	int rv;
485 
486 	ASSERT(MUTEX_HELD(&ldcp->lock));
487 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
488 
489 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
490 	if (rv) {
491 		cmn_err(CE_WARN,
492 		    "ldc_tx_qconf: (0x%lx) cannot set qconf", ldcp->id);
493 		return (EIO);
494 	}
495 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
496 	    &(ldcp->tx_tail), &(ldcp->link_state));
497 	if (rv) {
498 		cmn_err(CE_WARN,
499 		    "ldc_tx_get_state: (0x%lx) cannot get qptrs", ldcp->id);
500 		return (EIO);
501 	}
502 	D1(ldcp->id, "ldc_tx_get_state: (0x%llx) h=0x%llx,t=0x%llx,"
503 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
504 	    ldcp->link_state);
505 
506 	return (0);
507 }
508 
509 /*
510  * Reconfigure the receive queue
511  */
512 static int
513 i_ldc_rxq_reconf(ldc_chan_t *ldcp)
514 {
515 	int rv;
516 	uint64_t rx_head, rx_tail;
517 
518 	ASSERT(MUTEX_HELD(&ldcp->lock));
519 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
520 	    &(ldcp->link_state));
521 	if (rv) {
522 		cmn_err(CE_WARN,
523 		    "ldc_rx_getstate: (0x%lx) cannot get state",
524 		    ldcp->id);
525 		return (EIO);
526 	}
527 
528 	if (rx_head != rx_tail || ldcp->tstate > TS_READY) {
529 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
530 			ldcp->rx_q_entries);
531 		if (rv) {
532 			cmn_err(CE_WARN,
533 			    "ldc_rx_qconf: (0x%lx) cannot set qconf",
534 			    ldcp->id);
535 			return (EIO);
536 		}
537 		D1(ldcp->id, "ldc_rx_qconf: (0x%llx) completed qconf",
538 		    ldcp->id);
539 	}
540 
541 	return (0);
542 }
543 
544 /*
545  * Reset LDC state structure and its contents
546  */
547 static void
548 i_ldc_reset_state(ldc_chan_t *ldcp)
549 {
550 	ASSERT(MUTEX_HELD(&ldcp->lock));
551 	ldcp->last_msg_snt = LDC_INIT_SEQID;
552 	ldcp->last_ack_rcd = 0;
553 	ldcp->last_msg_rcd = 0;
554 	ldcp->tx_ackd_head = ldcp->tx_head;
555 	ldcp->next_vidx = 0;
556 	ldcp->hstate = 0;
557 	ldcp->tstate = TS_OPEN;
558 	ldcp->status = LDC_OPEN;
559 
560 	if (ldcp->link_state == LDC_CHANNEL_UP ||
561 	    ldcp->link_state == LDC_CHANNEL_RESET) {
562 
563 		if (ldcp->mode == LDC_MODE_RAW) {
564 			ldcp->status = LDC_UP;
565 			ldcp->tstate = TS_UP;
566 		} else {
567 			ldcp->status = LDC_READY;
568 			ldcp->tstate |= TS_LINK_READY;
569 		}
570 	}
571 }
572 
573 /*
574  * Reset a LDC channel
575  */
576 static void
577 i_ldc_reset(ldc_chan_t *ldcp)
578 {
579 	D2(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
580 
581 	ASSERT(MUTEX_HELD(&ldcp->lock));
582 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
583 
584 	(void) i_ldc_txq_reconf(ldcp);
585 	(void) i_ldc_rxq_reconf(ldcp);
586 	i_ldc_reset_state(ldcp);
587 }
588 
589 
590 /*
591  * Clear pending interrupts
592  */
593 static void
594 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
595 {
596 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
597 
598 	ASSERT(MUTEX_HELD(&ldcp->lock));
599 
600 	if (cinfo->dip) {
601 		/* check Tx interrupt */
602 		if (itype == CNEX_TX_INTR) {
603 			if (ldcp->tx_intr_pending)
604 				ldcp->tx_intr_pending = B_FALSE;
605 			else
606 				return;
607 		}
608 		/* check Rx interrupt */
609 		if (itype == CNEX_RX_INTR) {
610 			if (ldcp->rx_intr_pending)
611 				ldcp->rx_intr_pending = B_FALSE;
612 			else
613 				return;
614 		}
615 
616 		(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
617 		D2(ldcp->id,
618 		    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
619 		    ldcp->id, itype);
620 	}
621 }
622 
623 /*
624  * Set the receive queue head
625  * Resets connection and returns an error if it fails.
626  */
627 static int
628 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
629 {
630 	int 	rv;
631 	int 	retries;
632 
633 	ASSERT(MUTEX_HELD(&ldcp->lock));
634 	for (retries = 0; retries < ldc_max_retries; retries++) {
635 
636 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
637 			return (0);
638 
639 		if (rv != H_EWOULDBLOCK)
640 			break;
641 
642 		/* wait for ldc_delay usecs */
643 		drv_usecwait(ldc_delay);
644 	}
645 
646 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
647 		ldcp->id, head);
648 	mutex_enter(&ldcp->tx_lock);
649 	i_ldc_reset(ldcp);
650 	mutex_exit(&ldcp->tx_lock);
651 
652 	return (ECONNRESET);
653 }
654 
655 
656 /*
657  * Returns the tx_tail to be used for transfer
658  * Re-reads the TX queue ptrs if and only if the
659  * the cached head and tail are equal (queue is full)
660  */
661 static int
662 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
663 {
664 	int 		rv;
665 	uint64_t 	current_head, new_tail;
666 
667 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
668 	/* Read the head and tail ptrs from HV */
669 	rv = hv_ldc_tx_get_state(ldcp->id,
670 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
671 	if (rv) {
672 		cmn_err(CE_WARN,
673 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
674 		    ldcp->id);
675 		return (EIO);
676 	}
677 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
678 		DWARN(DBG_ALL_LDCS,
679 		    "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
680 		    ldcp->id);
681 		return (ECONNRESET);
682 	}
683 
684 	/* In reliable mode, check against last ACKd msg */
685 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
686 		ldcp->mode == LDC_MODE_STREAM)
687 		? ldcp->tx_ackd_head : ldcp->tx_head;
688 
689 	/* increment the tail */
690 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
691 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
692 
693 	if (new_tail == current_head) {
694 		DWARN(ldcp->id,
695 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
696 		    ldcp->id);
697 		return (EWOULDBLOCK);
698 	}
699 
700 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
701 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
702 
703 	*tail = ldcp->tx_tail;
704 	return (0);
705 }
706 
707 /*
708  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
709  * and retry ldc_max_retries times before returning an error.
710  * Returns 0, EWOULDBLOCK or EIO
711  */
712 static int
713 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
714 {
715 	int		rv, retval = EWOULDBLOCK;
716 	int 		retries;
717 
718 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
719 	for (retries = 0; retries < ldc_max_retries; retries++) {
720 
721 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
722 			retval = 0;
723 			break;
724 		}
725 		if (rv != H_EWOULDBLOCK) {
726 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
727 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
728 			retval = EIO;
729 			break;
730 		}
731 
732 		/* wait for ldc_delay usecs */
733 		drv_usecwait(ldc_delay);
734 	}
735 	return (retval);
736 }
737 
738 /*
739  * Send a LDC message
740  */
741 static int
742 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
743     uint8_t ctrlmsg)
744 {
745 	int		rv;
746 	ldc_msg_t 	*pkt;
747 	uint64_t	tx_tail;
748 	uint32_t	curr_seqid = ldcp->last_msg_snt;
749 
750 	/* Obtain Tx lock */
751 	mutex_enter(&ldcp->tx_lock);
752 
753 	/* get the current tail for the message */
754 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
755 	if (rv) {
756 		DWARN(ldcp->id,
757 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
758 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
759 		    ldcp->id, pkttype, subtype, ctrlmsg);
760 		mutex_exit(&ldcp->tx_lock);
761 		return (rv);
762 	}
763 
764 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
765 	ZERO_PKT(pkt);
766 
767 	/* Initialize the packet */
768 	pkt->type = pkttype;
769 	pkt->stype = subtype;
770 	pkt->ctrl = ctrlmsg;
771 
772 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
773 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
774 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
775 		curr_seqid++;
776 		if (ldcp->mode != LDC_MODE_RAW) {
777 			pkt->seqid = curr_seqid;
778 			pkt->ackid = ldcp->last_msg_rcd;
779 		}
780 	}
781 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
782 
783 	/* initiate the send by calling into HV and set the new tail */
784 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
785 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
786 
787 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
788 	if (rv) {
789 		DWARN(ldcp->id,
790 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
791 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
792 		    ldcp->id, pkttype, subtype, ctrlmsg);
793 		mutex_exit(&ldcp->tx_lock);
794 		return (EIO);
795 	}
796 
797 	ldcp->last_msg_snt = curr_seqid;
798 	ldcp->tx_tail = tx_tail;
799 
800 	mutex_exit(&ldcp->tx_lock);
801 	return (0);
802 }
803 
804 /*
805  * Checks if packet was received in right order
806  * in the case of a reliable link.
807  * Returns 0 if in order, else EIO
808  */
809 static int
810 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
811 {
812 	/* No seqid checking for RAW mode */
813 	if (ldcp->mode == LDC_MODE_RAW)
814 		return (0);
815 
816 	/* No seqid checking for version, RTS, RTR message */
817 	if (msg->ctrl == LDC_VER ||
818 	    msg->ctrl == LDC_RTS ||
819 	    msg->ctrl == LDC_RTR)
820 		return (0);
821 
822 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
823 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
824 		DWARN(ldcp->id,
825 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
826 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
827 		    (ldcp->last_msg_rcd + 1));
828 		return (EIO);
829 	}
830 
831 	return (0);
832 }
833 
834 
835 /*
836  * Process an incoming version ctrl message
837  */
838 static int
839 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
840 {
841 	int 		rv = 0, idx = ldcp->next_vidx;
842 	ldc_msg_t 	*pkt;
843 	uint64_t	tx_tail;
844 	ldc_ver_t	*rcvd_ver;
845 
846 	/* get the received version */
847 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
848 
849 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
850 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
851 
852 	/* Obtain Tx lock */
853 	mutex_enter(&ldcp->tx_lock);
854 
855 	switch (msg->stype) {
856 	case LDC_INFO:
857 
858 		/* get the current tail and pkt for the response */
859 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
860 		if (rv != 0) {
861 			DWARN(ldcp->id,
862 			    "i_ldc_process_VER: (0x%llx) err sending "
863 			    "version ACK/NACK\n", ldcp->id);
864 			i_ldc_reset(ldcp);
865 			mutex_exit(&ldcp->tx_lock);
866 			return (ECONNRESET);
867 		}
868 
869 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
870 		ZERO_PKT(pkt);
871 
872 		/* initialize the packet */
873 		pkt->type = LDC_CTRL;
874 		pkt->ctrl = LDC_VER;
875 
876 		for (;;) {
877 
878 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
879 			    rcvd_ver->major, rcvd_ver->minor,
880 			    ldc_versions[idx].major, ldc_versions[idx].minor);
881 
882 			if (rcvd_ver->major == ldc_versions[idx].major) {
883 				/* major version match - ACK version */
884 				pkt->stype = LDC_ACK;
885 
886 				/*
887 				 * lower minor version to the one this endpt
888 				 * supports, if necessary
889 				 */
890 				if (rcvd_ver->minor > ldc_versions[idx].minor)
891 					rcvd_ver->minor =
892 						ldc_versions[idx].minor;
893 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
894 
895 				break;
896 			}
897 
898 			if (rcvd_ver->major > ldc_versions[idx].major) {
899 
900 				D1(ldcp->id, "i_ldc_process_VER: using next"
901 				    " lower idx=%d, v%u.%u\n", idx,
902 				    ldc_versions[idx].major,
903 				    ldc_versions[idx].minor);
904 
905 				/* nack with next lower version */
906 				pkt->stype = LDC_NACK;
907 				bcopy(&ldc_versions[idx], pkt->udata,
908 				    sizeof (ldc_versions[idx]));
909 				ldcp->next_vidx = idx;
910 				break;
911 			}
912 
913 			/* next major version */
914 			idx++;
915 
916 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
917 
918 			if (idx == LDC_NUM_VERS) {
919 				/* no version match - send NACK */
920 				pkt->stype = LDC_NACK;
921 				bzero(pkt->udata, sizeof (ldc_ver_t));
922 				ldcp->next_vidx = 0;
923 				break;
924 			}
925 		}
926 
927 		/* initiate the send by calling into HV and set the new tail */
928 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
929 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
930 
931 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
932 		if (rv == 0) {
933 			ldcp->tx_tail = tx_tail;
934 			if (pkt->stype == LDC_ACK) {
935 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
936 				    " version ACK\n", ldcp->id);
937 				/* Save the ACK'd version */
938 				ldcp->version.major = rcvd_ver->major;
939 				ldcp->version.minor = rcvd_ver->minor;
940 				ldcp->hstate |= TS_RCVD_VER;
941 				ldcp->tstate |= TS_VER_DONE;
942 				DWARN(DBG_ALL_LDCS,
943 				    "(0x%llx) Agreed on version v%u.%u\n",
944 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
945 			}
946 		} else {
947 			DWARN(ldcp->id,
948 			    "i_ldc_process_VER: (0x%llx) error sending "
949 			    "ACK/NACK\n", ldcp->id);
950 			i_ldc_reset(ldcp);
951 			mutex_exit(&ldcp->tx_lock);
952 			return (ECONNRESET);
953 		}
954 
955 		break;
956 
957 	case LDC_ACK:
958 		/* SUCCESS - we have agreed on a version */
959 		ldcp->version.major = rcvd_ver->major;
960 		ldcp->version.minor = rcvd_ver->minor;
961 		ldcp->tstate |= TS_VER_DONE;
962 
963 		D1(DBG_ALL_LDCS, "(0x%llx) Agreed on version v%u.%u\n",
964 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
965 
966 		/* initiate RTS-RTR-RDX handshake */
967 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
968 		if (rv) {
969 			DWARN(ldcp->id,
970 			    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
971 			    ldcp->id);
972 			i_ldc_reset(ldcp);
973 			mutex_exit(&ldcp->tx_lock);
974 			return (ECONNRESET);
975 		}
976 
977 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
978 		ZERO_PKT(pkt);
979 
980 		pkt->type = LDC_CTRL;
981 		pkt->stype = LDC_INFO;
982 		pkt->ctrl = LDC_RTS;
983 		pkt->env = ldcp->mode;
984 		if (ldcp->mode != LDC_MODE_RAW)
985 			pkt->seqid = LDC_INIT_SEQID;
986 
987 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
988 
989 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
990 
991 		/* initiate the send by calling into HV and set the new tail */
992 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
993 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
994 
995 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
996 		if (rv) {
997 			D2(ldcp->id,
998 			    "i_ldc_process_VER: (0x%llx) no listener\n",
999 			    ldcp->id);
1000 			i_ldc_reset(ldcp);
1001 			mutex_exit(&ldcp->tx_lock);
1002 			return (ECONNRESET);
1003 		}
1004 
1005 		ldcp->tx_tail = tx_tail;
1006 		ldcp->hstate |= TS_SENT_RTS;
1007 
1008 		break;
1009 
1010 	case LDC_NACK:
1011 		/* check if version in NACK is zero */
1012 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1013 			/* version handshake failure */
1014 			DWARN(DBG_ALL_LDCS,
1015 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1016 			    ldcp->id);
1017 			i_ldc_reset(ldcp);
1018 			mutex_exit(&ldcp->tx_lock);
1019 			return (ECONNRESET);
1020 		}
1021 
1022 		/* get the current tail and pkt for the response */
1023 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1024 		if (rv != 0) {
1025 			cmn_err(CE_NOTE,
1026 			    "i_ldc_process_VER: (0x%lx) err sending "
1027 			    "version ACK/NACK\n", ldcp->id);
1028 			i_ldc_reset(ldcp);
1029 			mutex_exit(&ldcp->tx_lock);
1030 			return (ECONNRESET);
1031 		}
1032 
1033 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1034 		ZERO_PKT(pkt);
1035 
1036 		/* initialize the packet */
1037 		pkt->type = LDC_CTRL;
1038 		pkt->ctrl = LDC_VER;
1039 		pkt->stype = LDC_INFO;
1040 
1041 		/* check ver in NACK msg has a match */
1042 		for (;;) {
1043 			if (rcvd_ver->major == ldc_versions[idx].major) {
1044 				/*
1045 				 * major version match - resubmit request
1046 				 * if lower minor version to the one this endpt
1047 				 * supports, if necessary
1048 				 */
1049 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1050 					rcvd_ver->minor =
1051 						ldc_versions[idx].minor;
1052 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1053 				break;
1054 
1055 			}
1056 
1057 			if (rcvd_ver->major > ldc_versions[idx].major) {
1058 
1059 				D1(ldcp->id, "i_ldc_process_VER: using next"
1060 				    " lower idx=%d, v%u.%u\n", idx,
1061 				    ldc_versions[idx].major,
1062 				    ldc_versions[idx].minor);
1063 
1064 				/* send next lower version */
1065 				bcopy(&ldc_versions[idx], pkt->udata,
1066 				    sizeof (ldc_versions[idx]));
1067 				ldcp->next_vidx = idx;
1068 				break;
1069 			}
1070 
1071 			/* next version */
1072 			idx++;
1073 
1074 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1075 
1076 			if (idx == LDC_NUM_VERS) {
1077 				/* no version match - terminate */
1078 				ldcp->next_vidx = 0;
1079 				mutex_exit(&ldcp->tx_lock);
1080 				return (ECONNRESET);
1081 			}
1082 		}
1083 
1084 		/* initiate the send by calling into HV and set the new tail */
1085 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1086 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1087 
1088 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1089 		if (rv == 0) {
1090 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1091 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1092 			    ldc_versions[idx].minor);
1093 			ldcp->tx_tail = tx_tail;
1094 		} else {
1095 			cmn_err(CE_NOTE,
1096 			    "i_ldc_process_VER: (0x%lx) error sending version"
1097 			    "INFO\n", ldcp->id);
1098 			i_ldc_reset(ldcp);
1099 			mutex_exit(&ldcp->tx_lock);
1100 			return (ECONNRESET);
1101 		}
1102 
1103 		break;
1104 	}
1105 
1106 	mutex_exit(&ldcp->tx_lock);
1107 	return (rv);
1108 }
1109 
1110 
1111 /*
1112  * Process an incoming RTS ctrl message
1113  */
1114 static int
1115 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1116 {
1117 	int 		rv = 0;
1118 	ldc_msg_t 	*pkt;
1119 	uint64_t	tx_tail;
1120 	boolean_t	sent_NACK = B_FALSE;
1121 
1122 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1123 
1124 	switch (msg->stype) {
1125 	case LDC_NACK:
1126 		DWARN(ldcp->id,
1127 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1128 		    ldcp->id);
1129 
1130 		/* Reset the channel -- as we cannot continue */
1131 		mutex_enter(&ldcp->tx_lock);
1132 		i_ldc_reset(ldcp);
1133 		mutex_exit(&ldcp->tx_lock);
1134 		rv = ECONNRESET;
1135 		break;
1136 
1137 	case LDC_INFO:
1138 
1139 		/* check mode */
1140 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1141 			cmn_err(CE_NOTE,
1142 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1143 			    ldcp->id);
1144 			/*
1145 			 * send NACK in response to MODE message
1146 			 * get the current tail for the response
1147 			 */
1148 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1149 			if (rv) {
1150 				/* if cannot send NACK - reset channel */
1151 				mutex_enter(&ldcp->tx_lock);
1152 				i_ldc_reset(ldcp);
1153 				mutex_exit(&ldcp->tx_lock);
1154 				rv = ECONNRESET;
1155 				break;
1156 			}
1157 			sent_NACK = B_TRUE;
1158 		}
1159 		break;
1160 	default:
1161 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1162 		    ldcp->id);
1163 		mutex_enter(&ldcp->tx_lock);
1164 		i_ldc_reset(ldcp);
1165 		mutex_exit(&ldcp->tx_lock);
1166 		rv = ECONNRESET;
1167 		break;
1168 	}
1169 
1170 	/*
1171 	 * If either the connection was reset (when rv != 0) or
1172 	 * a NACK was sent, we return. In the case of a NACK
1173 	 * we dont want to consume the packet that came in but
1174 	 * not record that we received the RTS
1175 	 */
1176 	if (rv || sent_NACK)
1177 		return (rv);
1178 
1179 	/* record RTS received */
1180 	ldcp->hstate |= TS_RCVD_RTS;
1181 
1182 	/* store initial SEQID info */
1183 	ldcp->last_msg_snt = msg->seqid;
1184 
1185 	/* Obtain Tx lock */
1186 	mutex_enter(&ldcp->tx_lock);
1187 
1188 	/* get the current tail for the response */
1189 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1190 	if (rv != 0) {
1191 		cmn_err(CE_NOTE,
1192 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1193 		    ldcp->id);
1194 		i_ldc_reset(ldcp);
1195 		mutex_exit(&ldcp->tx_lock);
1196 		return (ECONNRESET);
1197 	}
1198 
1199 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1200 	ZERO_PKT(pkt);
1201 
1202 	/* initialize the packet */
1203 	pkt->type = LDC_CTRL;
1204 	pkt->stype = LDC_INFO;
1205 	pkt->ctrl = LDC_RTR;
1206 	pkt->env = ldcp->mode;
1207 	if (ldcp->mode != LDC_MODE_RAW)
1208 		pkt->seqid = LDC_INIT_SEQID;
1209 
1210 	ldcp->last_msg_rcd = msg->seqid;
1211 
1212 	/* initiate the send by calling into HV and set the new tail */
1213 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1214 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1215 
1216 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1217 	if (rv == 0) {
1218 		D2(ldcp->id,
1219 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1220 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1221 
1222 		ldcp->tx_tail = tx_tail;
1223 		ldcp->hstate |= TS_SENT_RTR;
1224 
1225 	} else {
1226 		cmn_err(CE_NOTE,
1227 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1228 		    ldcp->id);
1229 		i_ldc_reset(ldcp);
1230 		mutex_exit(&ldcp->tx_lock);
1231 		return (ECONNRESET);
1232 	}
1233 
1234 	mutex_exit(&ldcp->tx_lock);
1235 	return (0);
1236 }
1237 
1238 /*
1239  * Process an incoming RTR ctrl message
1240  */
1241 static int
1242 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1243 {
1244 	int 		rv = 0;
1245 	boolean_t	sent_NACK = B_FALSE;
1246 
1247 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1248 
1249 	switch (msg->stype) {
1250 	case LDC_NACK:
1251 		/* RTR NACK received */
1252 		DWARN(ldcp->id,
1253 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1254 		    ldcp->id);
1255 
1256 		/* Reset the channel -- as we cannot continue */
1257 		mutex_enter(&ldcp->tx_lock);
1258 		i_ldc_reset(ldcp);
1259 		mutex_exit(&ldcp->tx_lock);
1260 		rv = ECONNRESET;
1261 
1262 		break;
1263 
1264 	case LDC_INFO:
1265 
1266 		/* check mode */
1267 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1268 			DWARN(ldcp->id,
1269 			    "i_ldc_process_RTR: (0x%llx) mode mismatch\n",
1270 			    ldcp->id);
1271 			/*
1272 			 * send NACK in response to MODE message
1273 			 * get the current tail for the response
1274 			 */
1275 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1276 			if (rv) {
1277 				/* if cannot send NACK - reset channel */
1278 				mutex_enter(&ldcp->tx_lock);
1279 				i_ldc_reset(ldcp);
1280 				mutex_exit(&ldcp->tx_lock);
1281 				rv = ECONNRESET;
1282 				break;
1283 			}
1284 			sent_NACK = B_TRUE;
1285 		}
1286 		break;
1287 
1288 	default:
1289 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1290 		    ldcp->id);
1291 
1292 		/* Reset the channel -- as we cannot continue */
1293 		mutex_enter(&ldcp->tx_lock);
1294 		i_ldc_reset(ldcp);
1295 		mutex_exit(&ldcp->tx_lock);
1296 		rv = ECONNRESET;
1297 		break;
1298 	}
1299 
1300 	/*
1301 	 * If either the connection was reset (when rv != 0) or
1302 	 * a NACK was sent, we return. In the case of a NACK
1303 	 * we dont want to consume the packet that came in but
1304 	 * not record that we received the RTR
1305 	 */
1306 	if (rv || sent_NACK)
1307 		return (rv);
1308 
1309 	ldcp->last_msg_snt = msg->seqid;
1310 	ldcp->hstate |= TS_RCVD_RTR;
1311 
1312 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1313 	if (rv) {
1314 		cmn_err(CE_NOTE,
1315 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1316 		    ldcp->id);
1317 		mutex_enter(&ldcp->tx_lock);
1318 		i_ldc_reset(ldcp);
1319 		mutex_exit(&ldcp->tx_lock);
1320 		return (ECONNRESET);
1321 	}
1322 	D2(ldcp->id,
1323 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1324 
1325 	ldcp->hstate |= TS_SENT_RDX;
1326 	ldcp->tstate |= TS_HSHAKE_DONE;
1327 	ldcp->status = LDC_UP;
1328 
1329 	DWARN(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1330 
1331 	return (0);
1332 }
1333 
1334 
1335 /*
1336  * Process an incoming RDX ctrl message
1337  */
1338 static int
1339 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1340 {
1341 	int	rv = 0;
1342 
1343 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1344 
1345 	switch (msg->stype) {
1346 	case LDC_NACK:
1347 		/* RDX NACK received */
1348 		DWARN(ldcp->id,
1349 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1350 		    ldcp->id);
1351 
1352 		/* Reset the channel -- as we cannot continue */
1353 		mutex_enter(&ldcp->tx_lock);
1354 		i_ldc_reset(ldcp);
1355 		mutex_exit(&ldcp->tx_lock);
1356 		rv = ECONNRESET;
1357 
1358 		break;
1359 
1360 	case LDC_INFO:
1361 
1362 		/*
1363 		 * if channel is UP and a RDX received after data transmission
1364 		 * has commenced it is an error
1365 		 */
1366 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1367 			DWARN(DBG_ALL_LDCS,
1368 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1369 			    " - LDC reset\n", ldcp->id);
1370 			mutex_enter(&ldcp->tx_lock);
1371 			i_ldc_reset(ldcp);
1372 			mutex_exit(&ldcp->tx_lock);
1373 			return (ECONNRESET);
1374 		}
1375 
1376 		ldcp->hstate |= TS_RCVD_RDX;
1377 		ldcp->tstate |= TS_HSHAKE_DONE;
1378 		ldcp->status = LDC_UP;
1379 
1380 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1381 		break;
1382 
1383 	default:
1384 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1385 		    ldcp->id);
1386 
1387 		/* Reset the channel -- as we cannot continue */
1388 		mutex_enter(&ldcp->tx_lock);
1389 		i_ldc_reset(ldcp);
1390 		mutex_exit(&ldcp->tx_lock);
1391 		rv = ECONNRESET;
1392 		break;
1393 	}
1394 
1395 	return (rv);
1396 }
1397 
1398 /*
1399  * Process an incoming ACK for a data packet
1400  */
1401 static int
1402 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1403 {
1404 	int		rv;
1405 	uint64_t 	tx_head;
1406 	ldc_msg_t	*pkt;
1407 
1408 	/* Obtain Tx lock */
1409 	mutex_enter(&ldcp->tx_lock);
1410 
1411 	/*
1412 	 * Read the current Tx head and tail
1413 	 */
1414 	rv = hv_ldc_tx_get_state(ldcp->id,
1415 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1416 	if (rv != 0) {
1417 		cmn_err(CE_WARN,
1418 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1419 		    ldcp->id);
1420 
1421 		/* Reset the channel -- as we cannot continue */
1422 		i_ldc_reset(ldcp);
1423 		mutex_exit(&ldcp->tx_lock);
1424 		return (ECONNRESET);
1425 	}
1426 
1427 	/*
1428 	 * loop from where the previous ACK location was to the
1429 	 * current head location. This is how far the HV has
1430 	 * actually send pkts. Pkts between head and tail are
1431 	 * yet to be sent by HV.
1432 	 */
1433 	tx_head = ldcp->tx_ackd_head;
1434 	for (;;) {
1435 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1436 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1437 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1438 
1439 		if (pkt->seqid == msg->ackid) {
1440 			D2(ldcp->id,
1441 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1442 			    ldcp->id);
1443 			ldcp->last_ack_rcd = msg->ackid;
1444 			ldcp->tx_ackd_head = tx_head;
1445 			break;
1446 		}
1447 		if (tx_head == ldcp->tx_head) {
1448 			/* could not find packet */
1449 			DWARN(ldcp->id,
1450 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1451 			    ldcp->id);
1452 
1453 			/* Reset the channel -- as we cannot continue */
1454 			i_ldc_reset(ldcp);
1455 			mutex_exit(&ldcp->tx_lock);
1456 			return (ECONNRESET);
1457 		}
1458 	}
1459 
1460 	mutex_exit(&ldcp->tx_lock);
1461 	return (0);
1462 }
1463 
1464 /*
1465  * Process incoming control message
1466  * Return 0 - session can continue
1467  *        EAGAIN - reprocess packet - state was changed
1468  *	  ECONNRESET - channel was reset
1469  */
1470 static int
1471 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1472 {
1473 	int 		rv = 0;
1474 
1475 	switch (ldcp->tstate) {
1476 
1477 	case TS_OPEN:
1478 	case TS_READY:
1479 
1480 		switch (msg->ctrl & LDC_CTRL_MASK) {
1481 		case LDC_VER:
1482 			/* process version message */
1483 			rv = i_ldc_process_VER(ldcp, msg);
1484 			break;
1485 		default:
1486 			DWARN(ldcp->id,
1487 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1488 			    "tstate=0x%x\n", ldcp->id,
1489 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1490 			break;
1491 		}
1492 
1493 		break;
1494 
1495 	case TS_VREADY:
1496 
1497 		switch (msg->ctrl & LDC_CTRL_MASK) {
1498 		case LDC_VER:
1499 			/* peer is redoing version negotiation */
1500 			mutex_enter(&ldcp->tx_lock);
1501 			(void) i_ldc_txq_reconf(ldcp);
1502 			i_ldc_reset_state(ldcp);
1503 			mutex_exit(&ldcp->tx_lock);
1504 			rv = EAGAIN;
1505 			break;
1506 		case LDC_RTS:
1507 			/* process RTS message */
1508 			rv = i_ldc_process_RTS(ldcp, msg);
1509 			break;
1510 		case LDC_RTR:
1511 			/* process RTR message */
1512 			rv = i_ldc_process_RTR(ldcp, msg);
1513 			break;
1514 		case LDC_RDX:
1515 			/* process RDX message */
1516 			rv = i_ldc_process_RDX(ldcp, msg);
1517 			break;
1518 		default:
1519 			DWARN(ldcp->id,
1520 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1521 			    "tstate=0x%x\n", ldcp->id,
1522 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1523 			break;
1524 		}
1525 
1526 		break;
1527 
1528 	case TS_UP:
1529 
1530 		switch (msg->ctrl & LDC_CTRL_MASK) {
1531 		case LDC_VER:
1532 			DWARN(ldcp->id,
1533 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1534 			    "- LDC reset\n", ldcp->id);
1535 			/* peer is redoing version negotiation */
1536 			mutex_enter(&ldcp->tx_lock);
1537 			(void) i_ldc_txq_reconf(ldcp);
1538 			i_ldc_reset_state(ldcp);
1539 			mutex_exit(&ldcp->tx_lock);
1540 			rv = EAGAIN;
1541 			break;
1542 
1543 		case LDC_RDX:
1544 			/* process RDX message */
1545 			rv = i_ldc_process_RDX(ldcp, msg);
1546 			break;
1547 
1548 		default:
1549 			DWARN(ldcp->id,
1550 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1551 			    "tstate=0x%x\n", ldcp->id,
1552 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1553 			break;
1554 		}
1555 	}
1556 
1557 	return (rv);
1558 }
1559 
1560 /*
1561  * Register channel with the channel nexus
1562  */
1563 static int
1564 i_ldc_register_channel(ldc_chan_t *ldcp)
1565 {
1566 	int		rv = 0;
1567 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1568 
1569 	if (cinfo->dip == NULL) {
1570 		DWARN(ldcp->id,
1571 		    "i_ldc_register_channel: cnex has not registered\n");
1572 		return (EAGAIN);
1573 	}
1574 
1575 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1576 	if (rv) {
1577 		DWARN(ldcp->id,
1578 		    "i_ldc_register_channel: cannot register channel\n");
1579 		return (rv);
1580 	}
1581 
1582 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1583 	    i_ldc_tx_hdlr, ldcp, NULL);
1584 	if (rv) {
1585 		DWARN(ldcp->id,
1586 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1587 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1588 		return (rv);
1589 	}
1590 
1591 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1592 	    i_ldc_rx_hdlr, ldcp, NULL);
1593 	if (rv) {
1594 		DWARN(ldcp->id,
1595 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1596 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1597 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1598 		return (rv);
1599 	}
1600 
1601 	ldcp->tstate |= TS_CNEX_RDY;
1602 
1603 	return (0);
1604 }
1605 
1606 /*
1607  * Unregister a channel with the channel nexus
1608  */
1609 static int
1610 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1611 {
1612 	int		rv = 0;
1613 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1614 
1615 	if (cinfo->dip == NULL) {
1616 		DWARN(ldcp->id,
1617 		    "i_ldc_unregister_channel: cnex has not registered\n");
1618 		return (EAGAIN);
1619 	}
1620 
1621 	if (ldcp->tstate & TS_CNEX_RDY) {
1622 
1623 		/* Remove the Rx interrupt */
1624 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1625 		if (rv) {
1626 			DWARN(ldcp->id,
1627 			    "i_ldc_unregister_channel: err removing Rx intr\n");
1628 			return (rv);
1629 		}
1630 
1631 		/* Remove the Tx interrupt */
1632 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1633 		if (rv) {
1634 			DWARN(ldcp->id,
1635 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1636 			return (rv);
1637 		}
1638 
1639 		/* Unregister the channel */
1640 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1641 		if (rv) {
1642 			DWARN(ldcp->id,
1643 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1644 			return (rv);
1645 		}
1646 
1647 		ldcp->tstate &= ~TS_CNEX_RDY;
1648 	}
1649 
1650 	return (0);
1651 }
1652 
1653 
1654 /*
1655  * LDC transmit interrupt handler
1656  *    triggered for chanel up/down/reset events
1657  *    and Tx queue content changes
1658  */
1659 static uint_t
1660 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1661 {
1662 	_NOTE(ARGUNUSED(arg2))
1663 
1664 	int 		rv;
1665 	ldc_chan_t 	*ldcp;
1666 	boolean_t 	notify_client = B_FALSE;
1667 	uint64_t	notify_event = 0;
1668 
1669 	/* Get the channel for which interrupt was received */
1670 	ASSERT(arg1 != NULL);
1671 	ldcp = (ldc_chan_t *)arg1;
1672 
1673 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1674 	    ldcp->id, ldcp);
1675 
1676 	/* Lock channel */
1677 	mutex_enter(&ldcp->lock);
1678 
1679 	/* Obtain Tx lock */
1680 	mutex_enter(&ldcp->tx_lock);
1681 
1682 	/* mark interrupt as pending */
1683 	ldcp->tx_intr_pending = B_TRUE;
1684 
1685 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1686 	    &ldcp->link_state);
1687 	if (rv) {
1688 		cmn_err(CE_WARN,
1689 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1690 		    ldcp->id, rv);
1691 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1692 		mutex_exit(&ldcp->tx_lock);
1693 		mutex_exit(&ldcp->lock);
1694 		return (DDI_INTR_CLAIMED);
1695 	}
1696 
1697 	/*
1698 	 * reset the channel state if the channel went down
1699 	 * (other side unconfigured queue) or channel was reset
1700 	 * (other side reconfigured its queue)
1701 	 */
1702 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1703 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1704 		i_ldc_reset(ldcp);
1705 		notify_client = B_TRUE;
1706 		notify_event = LDC_EVT_DOWN;
1707 	}
1708 
1709 	if (ldcp->link_state == LDC_CHANNEL_RESET) {
1710 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1711 		i_ldc_reset(ldcp);
1712 		notify_client = B_TRUE;
1713 		notify_event = LDC_EVT_RESET;
1714 	}
1715 
1716 	if (ldcp->tstate == TS_OPEN && ldcp->link_state == LDC_CHANNEL_UP) {
1717 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1718 		notify_client = B_TRUE;
1719 		notify_event = LDC_EVT_RESET;
1720 		ldcp->tstate |= TS_LINK_READY;
1721 		ldcp->status = LDC_READY;
1722 	}
1723 
1724 	/* if callbacks are disabled, do not notify */
1725 	if (!ldcp->cb_enabled)
1726 		notify_client = B_FALSE;
1727 
1728 	if (notify_client)
1729 		ldcp->cb_inprogress = B_TRUE;
1730 
1731 	/* Unlock channel */
1732 	mutex_exit(&ldcp->tx_lock);
1733 	mutex_exit(&ldcp->lock);
1734 
1735 	if (notify_client) {
1736 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1737 		if (rv) {
1738 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1739 			    "failure", ldcp->id);
1740 		}
1741 		mutex_enter(&ldcp->lock);
1742 		ldcp->cb_inprogress = B_FALSE;
1743 		mutex_exit(&ldcp->lock);
1744 	}
1745 
1746 	mutex_enter(&ldcp->lock);
1747 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1748 	mutex_exit(&ldcp->lock);
1749 
1750 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1751 
1752 	return (DDI_INTR_CLAIMED);
1753 }
1754 
1755 /*
1756  * LDC receive interrupt handler
1757  *    triggered for channel with data pending to read
1758  *    i.e. Rx queue content changes
1759  */
1760 static uint_t
1761 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1762 {
1763 	_NOTE(ARGUNUSED(arg2))
1764 
1765 	int		rv;
1766 	uint64_t 	rx_head, rx_tail;
1767 	ldc_msg_t 	*msg;
1768 	ldc_chan_t 	*ldcp;
1769 	boolean_t 	notify_client = B_FALSE;
1770 	uint64_t	notify_event = 0;
1771 	uint64_t	first_fragment = 0;
1772 
1773 	/* Get the channel for which interrupt was received */
1774 	if (arg1 == NULL) {
1775 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1776 		return (DDI_INTR_UNCLAIMED);
1777 	}
1778 
1779 	ldcp = (ldc_chan_t *)arg1;
1780 
1781 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1782 	    ldcp->id, ldcp);
1783 
1784 	/* Lock channel */
1785 	mutex_enter(&ldcp->lock);
1786 
1787 	/* mark interrupt as pending */
1788 	ldcp->rx_intr_pending = B_TRUE;
1789 
1790 	/*
1791 	 * Read packet(s) from the queue
1792 	 */
1793 	for (;;) {
1794 
1795 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1796 		    &ldcp->link_state);
1797 		if (rv) {
1798 			cmn_err(CE_WARN,
1799 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1800 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1801 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1802 			mutex_exit(&ldcp->lock);
1803 			return (DDI_INTR_CLAIMED);
1804 		}
1805 
1806 		/*
1807 		 * reset the channel state if the channel went down
1808 		 * (other side unconfigured queue) or channel was reset
1809 		 * (other side reconfigured its queue
1810 		 */
1811 		if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1812 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link down\n",
1813 			    ldcp->id);
1814 			mutex_enter(&ldcp->tx_lock);
1815 			i_ldc_reset(ldcp);
1816 			mutex_exit(&ldcp->tx_lock);
1817 			notify_client = B_TRUE;
1818 			notify_event = LDC_EVT_DOWN;
1819 			break;
1820 		}
1821 		if (ldcp->link_state == LDC_CHANNEL_RESET) {
1822 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link reset\n",
1823 			    ldcp->id);
1824 			mutex_enter(&ldcp->tx_lock);
1825 			i_ldc_reset(ldcp);
1826 			mutex_exit(&ldcp->tx_lock);
1827 			notify_client = B_TRUE;
1828 			notify_event = LDC_EVT_RESET;
1829 		}
1830 
1831 		if (ldcp->tstate == TS_OPEN &&
1832 		    ldcp->link_state == LDC_CHANNEL_UP) {
1833 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link up\n",
1834 			    ldcp->id);
1835 			notify_client = B_TRUE;
1836 			notify_event = LDC_EVT_RESET;
1837 			ldcp->tstate |= TS_LINK_READY;
1838 			ldcp->status = LDC_READY;
1839 		}
1840 
1841 		if (rx_head == rx_tail) {
1842 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1843 			    ldcp->id);
1844 			break;
1845 		}
1846 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1847 		    rx_head, rx_tail);
1848 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1849 		    ldcp->rx_q_va + rx_head);
1850 
1851 		/* get the message */
1852 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1853 
1854 		/* if channel is in RAW mode or data pkt, notify and return */
1855 		if (ldcp->mode == LDC_MODE_RAW) {
1856 			notify_client = B_TRUE;
1857 			notify_event |= LDC_EVT_READ;
1858 			break;
1859 		}
1860 
1861 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1862 
1863 			/* discard packet if channel is not up */
1864 			if (ldcp->tstate != TS_UP) {
1865 
1866 				/* move the head one position */
1867 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1868 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1869 
1870 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1871 					break;
1872 
1873 				continue;
1874 			} else {
1875 				notify_client = B_TRUE;
1876 				notify_event |= LDC_EVT_READ;
1877 				break;
1878 			}
1879 		}
1880 
1881 		/* Check the sequence ID for the message received */
1882 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
1883 
1884 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
1885 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
1886 
1887 			/* Reset last_msg_rcd to start of message */
1888 			if (first_fragment != 0) {
1889 				ldcp->last_msg_rcd = first_fragment - 1;
1890 				first_fragment = 0;
1891 			}
1892 
1893 			/*
1894 			 * Send a NACK due to seqid mismatch
1895 			 */
1896 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
1897 			    (msg->ctrl & LDC_CTRL_MASK));
1898 
1899 			if (rv) {
1900 				cmn_err(CE_NOTE,
1901 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
1902 				    "CTRL/NACK msg\n", ldcp->id);
1903 
1904 				/* if cannot send NACK - reset channel */
1905 				mutex_enter(&ldcp->tx_lock);
1906 				i_ldc_reset(ldcp);
1907 				mutex_exit(&ldcp->tx_lock);
1908 				rv = ECONNRESET;
1909 				break;
1910 			}
1911 
1912 			/* purge receive queue */
1913 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
1914 			break;
1915 		}
1916 
1917 		/* record the message ID */
1918 		ldcp->last_msg_rcd = msg->seqid;
1919 
1920 		/* process control messages */
1921 		if (msg->type & LDC_CTRL) {
1922 			/* save current internal state */
1923 			uint64_t tstate = ldcp->tstate;
1924 
1925 			rv = i_ldc_ctrlmsg(ldcp, msg);
1926 			if (rv == EAGAIN) {
1927 				/* re-process pkt - state was adjusted */
1928 				continue;
1929 			}
1930 			if (rv == ECONNRESET) {
1931 				notify_client = B_TRUE;
1932 				notify_event = LDC_EVT_RESET;
1933 				break;
1934 			}
1935 
1936 			/*
1937 			 * control message processing was successful
1938 			 * channel transitioned to ready for communication
1939 			 */
1940 			if (rv == 0 && ldcp->tstate == TS_UP &&
1941 			    tstate != ldcp->tstate) {
1942 				notify_client = B_TRUE;
1943 				notify_event = LDC_EVT_UP;
1944 			}
1945 		}
1946 
1947 		/* process data ACKs */
1948 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
1949 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
1950 				notify_client = B_TRUE;
1951 				notify_event = LDC_EVT_RESET;
1952 				break;
1953 			}
1954 		}
1955 
1956 		/* move the head one position */
1957 		rx_head = (rx_head + LDC_PACKET_SIZE) %
1958 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1959 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
1960 			notify_client = B_TRUE;
1961 			notify_event = LDC_EVT_RESET;
1962 			break;
1963 		}
1964 
1965 	} /* for */
1966 
1967 	/* if callbacks are disabled, do not notify */
1968 	if (!ldcp->cb_enabled)
1969 		notify_client = B_FALSE;
1970 
1971 	if (notify_client)
1972 		ldcp->cb_inprogress = B_TRUE;
1973 
1974 	/* Unlock channel */
1975 	mutex_exit(&ldcp->lock);
1976 
1977 	if (notify_client) {
1978 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1979 		if (rv) {
1980 			DWARN(ldcp->id,
1981 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1982 			    ldcp->id);
1983 		}
1984 		mutex_enter(&ldcp->lock);
1985 		ldcp->cb_inprogress = B_FALSE;
1986 		mutex_exit(&ldcp->lock);
1987 	}
1988 
1989 	mutex_enter(&ldcp->lock);
1990 
1991 	/*
1992 	 * If there are data packets in the queue, the ldc_read will
1993 	 * clear interrupts after draining the queue, else clear interrupts
1994 	 */
1995 	if ((notify_event & LDC_EVT_READ) == 0) {
1996 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1997 	}
1998 
1999 	mutex_exit(&ldcp->lock);
2000 
2001 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
2002 	return (DDI_INTR_CLAIMED);
2003 }
2004 
2005 
2006 /* -------------------------------------------------------------------------- */
2007 
2008 /*
2009  * LDC API functions
2010  */
2011 
2012 /*
2013  * Initialize the channel. Allocate internal structure and memory for
2014  * TX/RX queues, and initialize locks.
2015  */
2016 int
2017 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2018 {
2019 	ldc_chan_t 	*ldcp;
2020 	int		rv, exit_val;
2021 	uint64_t	ra_base, nentries;
2022 	uint64_t	qlen;
2023 
2024 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2025 
2026 	if (attr == NULL) {
2027 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2028 		return (EINVAL);
2029 	}
2030 	if (handle == NULL) {
2031 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2032 		return (EINVAL);
2033 	}
2034 
2035 	/* check if channel is valid */
2036 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2037 	if (rv == H_ECHANNEL) {
2038 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2039 		return (EINVAL);
2040 	}
2041 
2042 	/* check if the channel has already been initialized */
2043 	mutex_enter(&ldcssp->lock);
2044 	ldcp = ldcssp->chan_list;
2045 	while (ldcp != NULL) {
2046 		if (ldcp->id == id) {
2047 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2048 			    id);
2049 			mutex_exit(&ldcssp->lock);
2050 			return (EADDRINUSE);
2051 		}
2052 		ldcp = ldcp->next;
2053 	}
2054 	mutex_exit(&ldcssp->lock);
2055 
2056 	ASSERT(ldcp == NULL);
2057 
2058 	*handle = 0;
2059 
2060 	/* Allocate an ldcp structure */
2061 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2062 
2063 	/*
2064 	 * Initialize the channel and Tx lock
2065 	 *
2066 	 * The channel 'lock' protects the entire channel and
2067 	 * should be acquired before initializing, resetting,
2068 	 * destroying or reading from a channel.
2069 	 *
2070 	 * The 'tx_lock' should be acquired prior to transmitting
2071 	 * data over the channel. The lock should also be acquired
2072 	 * prior to channel reconfiguration (in order to prevent
2073 	 * concurrent writes).
2074 	 *
2075 	 * ORDERING: When both locks are being acquired, to prevent
2076 	 * deadlocks, the channel lock should be always acquired prior
2077 	 * to the tx_lock.
2078 	 */
2079 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2080 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2081 
2082 	/* Initialize the channel */
2083 	ldcp->id = id;
2084 	ldcp->cb = NULL;
2085 	ldcp->cb_arg = NULL;
2086 	ldcp->cb_inprogress = B_FALSE;
2087 	ldcp->cb_enabled = B_FALSE;
2088 	ldcp->next = NULL;
2089 
2090 	/* Read attributes */
2091 	ldcp->mode = attr->mode;
2092 	ldcp->devclass = attr->devclass;
2093 	ldcp->devinst = attr->instance;
2094 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2095 
2096 	D1(ldcp->id,
2097 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2098 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2099 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2100 
2101 	ldcp->next_vidx = 0;
2102 	ldcp->tstate = 0;
2103 	ldcp->hstate = 0;
2104 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2105 	ldcp->last_ack_rcd = 0;
2106 	ldcp->last_msg_rcd = 0;
2107 
2108 	ldcp->stream_bufferp = NULL;
2109 	ldcp->exp_dring_list = NULL;
2110 	ldcp->imp_dring_list = NULL;
2111 	ldcp->mhdl_list = NULL;
2112 
2113 	/* Initialize payload size depending on whether channel is reliable */
2114 	switch (ldcp->mode) {
2115 	case LDC_MODE_RAW:
2116 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2117 		ldcp->read_p = i_ldc_read_raw;
2118 		ldcp->write_p = i_ldc_write_raw;
2119 		break;
2120 	case LDC_MODE_UNRELIABLE:
2121 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2122 		ldcp->read_p = i_ldc_read_packet;
2123 		ldcp->write_p = i_ldc_write_packet;
2124 		break;
2125 	case LDC_MODE_RELIABLE:
2126 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2127 		ldcp->read_p = i_ldc_read_packet;
2128 		ldcp->write_p = i_ldc_write_packet;
2129 		break;
2130 	case LDC_MODE_STREAM:
2131 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2132 
2133 		ldcp->stream_remains = 0;
2134 		ldcp->stream_offset = 0;
2135 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2136 		ldcp->read_p = i_ldc_read_stream;
2137 		ldcp->write_p = i_ldc_write_stream;
2138 		break;
2139 	default:
2140 		exit_val = EINVAL;
2141 		goto cleanup_on_exit;
2142 	}
2143 
2144 	/*
2145 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2146 	 * value is smaller than default length of ldc_queue_entries,
2147 	 * qlen is set to ldc_queue_entries..
2148 	 */
2149 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2150 	ldcp->rx_q_entries =
2151 		(qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2152 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2153 
2154 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen);
2155 
2156 	/* Create a transmit queue */
2157 	ldcp->tx_q_va = (uint64_t)
2158 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2159 	if (ldcp->tx_q_va == NULL) {
2160 		cmn_err(CE_WARN,
2161 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2162 		    ldcp->id);
2163 		exit_val = ENOMEM;
2164 		goto cleanup_on_exit;
2165 	}
2166 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2167 
2168 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2169 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2170 
2171 	ldcp->tstate |= TS_TXQ_RDY;
2172 
2173 	/* Create a receive queue */
2174 	ldcp->rx_q_va = (uint64_t)
2175 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2176 	if (ldcp->rx_q_va == NULL) {
2177 		cmn_err(CE_WARN,
2178 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2179 		    ldcp->id);
2180 		exit_val = ENOMEM;
2181 		goto cleanup_on_exit;
2182 	}
2183 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2184 
2185 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2186 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2187 
2188 	ldcp->tstate |= TS_RXQ_RDY;
2189 
2190 	/* Init descriptor ring and memory handle list lock */
2191 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2192 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2193 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2194 
2195 	/* mark status as INITialized */
2196 	ldcp->status = LDC_INIT;
2197 
2198 	/* Add to channel list */
2199 	mutex_enter(&ldcssp->lock);
2200 	ldcp->next = ldcssp->chan_list;
2201 	ldcssp->chan_list = ldcp;
2202 	ldcssp->channel_count++;
2203 	mutex_exit(&ldcssp->lock);
2204 
2205 	/* set the handle */
2206 	*handle = (ldc_handle_t)ldcp;
2207 
2208 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2209 
2210 	return (0);
2211 
2212 cleanup_on_exit:
2213 
2214 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2215 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2216 
2217 	if (ldcp->tstate & TS_TXQ_RDY)
2218 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2219 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2220 
2221 	if (ldcp->tstate & TS_RXQ_RDY)
2222 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2223 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2224 
2225 	mutex_destroy(&ldcp->tx_lock);
2226 	mutex_destroy(&ldcp->lock);
2227 
2228 	if (ldcp)
2229 		kmem_free(ldcp, sizeof (ldc_chan_t));
2230 
2231 	return (exit_val);
2232 }
2233 
2234 /*
2235  * Finalizes the LDC connection. It will return EBUSY if the
2236  * channel is open. A ldc_close() has to be done prior to
2237  * a ldc_fini operation. It frees TX/RX queues, associated
2238  * with the channel
2239  */
2240 int
2241 ldc_fini(ldc_handle_t handle)
2242 {
2243 	ldc_chan_t 	*ldcp;
2244 	ldc_chan_t 	*tmp_ldcp;
2245 	uint64_t 	id;
2246 
2247 	if (handle == NULL) {
2248 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2249 		return (EINVAL);
2250 	}
2251 	ldcp = (ldc_chan_t *)handle;
2252 	id = ldcp->id;
2253 
2254 	mutex_enter(&ldcp->lock);
2255 
2256 	if (ldcp->tstate > TS_INIT) {
2257 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2258 		    ldcp->id);
2259 		mutex_exit(&ldcp->lock);
2260 		return (EBUSY);
2261 	}
2262 
2263 	/* Remove from the channel list */
2264 	mutex_enter(&ldcssp->lock);
2265 	tmp_ldcp = ldcssp->chan_list;
2266 	if (tmp_ldcp == ldcp) {
2267 		ldcssp->chan_list = ldcp->next;
2268 		ldcp->next = NULL;
2269 	} else {
2270 		while (tmp_ldcp != NULL) {
2271 			if (tmp_ldcp->next == ldcp) {
2272 				tmp_ldcp->next = ldcp->next;
2273 				ldcp->next = NULL;
2274 				break;
2275 			}
2276 			tmp_ldcp = tmp_ldcp->next;
2277 		}
2278 		if (tmp_ldcp == NULL) {
2279 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2280 			mutex_exit(&ldcssp->lock);
2281 			mutex_exit(&ldcp->lock);
2282 			return (EINVAL);
2283 		}
2284 	}
2285 
2286 	ldcssp->channel_count--;
2287 
2288 	mutex_exit(&ldcssp->lock);
2289 
2290 	/* Free the map table for this channel */
2291 	if (ldcp->mtbl) {
2292 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2293 		contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2294 		mutex_destroy(&ldcp->mtbl->lock);
2295 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2296 	}
2297 
2298 	/* Destroy descriptor ring and memory handle list lock */
2299 	mutex_destroy(&ldcp->exp_dlist_lock);
2300 	mutex_destroy(&ldcp->imp_dlist_lock);
2301 	mutex_destroy(&ldcp->mlist_lock);
2302 
2303 	/* Free the stream buffer for STREAM_MODE */
2304 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2305 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2306 
2307 	/* Free the RX queue */
2308 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2309 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2310 	ldcp->tstate &= ~TS_RXQ_RDY;
2311 
2312 	/* Free the TX queue */
2313 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2314 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2315 	ldcp->tstate &= ~TS_TXQ_RDY;
2316 
2317 
2318 	mutex_exit(&ldcp->lock);
2319 
2320 	/* Destroy mutex */
2321 	mutex_destroy(&ldcp->tx_lock);
2322 	mutex_destroy(&ldcp->lock);
2323 
2324 	/* free channel structure */
2325 	kmem_free(ldcp, sizeof (ldc_chan_t));
2326 
2327 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2328 
2329 	return (0);
2330 }
2331 
2332 /*
2333  * Open the LDC channel for use. It registers the TX/RX queues
2334  * with the Hypervisor. It also specifies the interrupt number
2335  * and target CPU for this channel
2336  */
2337 int
2338 ldc_open(ldc_handle_t handle)
2339 {
2340 	ldc_chan_t 	*ldcp;
2341 	int 		rv;
2342 
2343 	if (handle == NULL) {
2344 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2345 		return (EINVAL);
2346 	}
2347 
2348 	ldcp = (ldc_chan_t *)handle;
2349 
2350 	mutex_enter(&ldcp->lock);
2351 
2352 	if (ldcp->tstate < TS_INIT) {
2353 		DWARN(ldcp->id,
2354 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2355 		mutex_exit(&ldcp->lock);
2356 		return (EFAULT);
2357 	}
2358 	if (ldcp->tstate >= TS_OPEN) {
2359 		DWARN(ldcp->id,
2360 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2361 		mutex_exit(&ldcp->lock);
2362 		return (EFAULT);
2363 	}
2364 
2365 	/*
2366 	 * Unregister/Register the tx queue with the hypervisor
2367 	 */
2368 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2369 	if (rv) {
2370 		cmn_err(CE_WARN,
2371 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2372 		    ldcp->id);
2373 		mutex_exit(&ldcp->lock);
2374 		return (EIO);
2375 	}
2376 
2377 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2378 	if (rv) {
2379 		cmn_err(CE_WARN,
2380 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2381 		    ldcp->id);
2382 		mutex_exit(&ldcp->lock);
2383 		return (EIO);
2384 	}
2385 
2386 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2387 	    ldcp->id);
2388 
2389 	/*
2390 	 * Unregister/Register the rx queue with the hypervisor
2391 	 */
2392 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2393 	if (rv) {
2394 		cmn_err(CE_WARN,
2395 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2396 		    ldcp->id);
2397 		mutex_exit(&ldcp->lock);
2398 		return (EIO);
2399 	}
2400 
2401 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2402 	if (rv) {
2403 		cmn_err(CE_WARN,
2404 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2405 		    ldcp->id);
2406 		mutex_exit(&ldcp->lock);
2407 		return (EIO);
2408 	}
2409 
2410 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2411 	    ldcp->id);
2412 
2413 	ldcp->tstate |= TS_QCONF_RDY;
2414 
2415 	/* Register the channel with the channel nexus */
2416 	rv = i_ldc_register_channel(ldcp);
2417 	if (rv && rv != EAGAIN) {
2418 		cmn_err(CE_WARN,
2419 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2420 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2421 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2422 		mutex_exit(&ldcp->lock);
2423 		return (EIO);
2424 	}
2425 
2426 	/* mark channel in OPEN state */
2427 	ldcp->status = LDC_OPEN;
2428 
2429 	/* Read channel state */
2430 	rv = hv_ldc_tx_get_state(ldcp->id,
2431 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2432 	if (rv) {
2433 		cmn_err(CE_WARN,
2434 		    "ldc_open: (0x%lx) cannot read channel state\n",
2435 		    ldcp->id);
2436 		(void) i_ldc_unregister_channel(ldcp);
2437 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2438 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2439 		mutex_exit(&ldcp->lock);
2440 		return (EIO);
2441 	}
2442 
2443 	/*
2444 	 * set the ACKd head to current head location for reliable &
2445 	 * streaming mode
2446 	 */
2447 	ldcp->tx_ackd_head = ldcp->tx_head;
2448 
2449 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2450 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2451 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2452 		ldcp->tstate |= TS_LINK_READY;
2453 		ldcp->status = LDC_READY;
2454 	}
2455 
2456 	/*
2457 	 * if channel is being opened in RAW mode - no handshake is needed
2458 	 * switch the channel READY and UP state
2459 	 */
2460 	if (ldcp->mode == LDC_MODE_RAW) {
2461 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2462 		ldcp->status = LDC_UP;
2463 	}
2464 
2465 	mutex_exit(&ldcp->lock);
2466 
2467 	/*
2468 	 * Increment number of open channels
2469 	 */
2470 	mutex_enter(&ldcssp->lock);
2471 	ldcssp->channels_open++;
2472 	mutex_exit(&ldcssp->lock);
2473 
2474 	D1(ldcp->id,
2475 	    "ldc_open: (0x%llx) channel (0x%p) open for use (tstate=0x%x)\n",
2476 	    ldcp->id, ldcp, ldcp->tstate);
2477 
2478 	return (0);
2479 }
2480 
2481 /*
2482  * Close the LDC connection. It will return EBUSY if there
2483  * are memory segments or descriptor rings either bound to or
2484  * mapped over the channel
2485  */
2486 int
2487 ldc_close(ldc_handle_t handle)
2488 {
2489 	ldc_chan_t 	*ldcp;
2490 	int		rv = 0, retries = 0;
2491 	boolean_t	chk_done = B_FALSE;
2492 
2493 	if (handle == NULL) {
2494 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2495 		return (EINVAL);
2496 	}
2497 	ldcp = (ldc_chan_t *)handle;
2498 
2499 	mutex_enter(&ldcp->lock);
2500 
2501 	/* return error if channel is not open */
2502 	if (ldcp->tstate < TS_OPEN) {
2503 		DWARN(ldcp->id,
2504 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2505 		mutex_exit(&ldcp->lock);
2506 		return (EFAULT);
2507 	}
2508 
2509 	/* if any memory handles, drings, are bound or mapped cannot close */
2510 	if (ldcp->mhdl_list != NULL) {
2511 		DWARN(ldcp->id,
2512 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2513 		    ldcp->id);
2514 		mutex_exit(&ldcp->lock);
2515 		return (EBUSY);
2516 	}
2517 	if (ldcp->exp_dring_list != NULL) {
2518 		DWARN(ldcp->id,
2519 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2520 		    ldcp->id);
2521 		mutex_exit(&ldcp->lock);
2522 		return (EBUSY);
2523 	}
2524 	if (ldcp->imp_dring_list != NULL) {
2525 		DWARN(ldcp->id,
2526 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2527 		    ldcp->id);
2528 		mutex_exit(&ldcp->lock);
2529 		return (EBUSY);
2530 	}
2531 
2532 	/* Obtain Tx lock */
2533 	mutex_enter(&ldcp->tx_lock);
2534 
2535 	/*
2536 	 * Wait for pending transmits to complete i.e Tx queue to drain
2537 	 * if there are pending pkts - wait 1 ms and retry again
2538 	 */
2539 	for (;;) {
2540 
2541 		rv = hv_ldc_tx_get_state(ldcp->id,
2542 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2543 		if (rv) {
2544 			cmn_err(CE_WARN,
2545 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2546 			mutex_exit(&ldcp->tx_lock);
2547 			mutex_exit(&ldcp->lock);
2548 			return (EIO);
2549 		}
2550 
2551 		if (ldcp->tx_head == ldcp->tx_tail ||
2552 		    ldcp->link_state != LDC_CHANNEL_UP) {
2553 			break;
2554 		}
2555 
2556 		if (chk_done) {
2557 			DWARN(ldcp->id,
2558 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2559 			    ldcp->id);
2560 			break;
2561 		}
2562 
2563 		/* wait for one ms and try again */
2564 		delay(drv_usectohz(1000));
2565 		chk_done = B_TRUE;
2566 	}
2567 
2568 	/*
2569 	 * Unregister the channel with the nexus
2570 	 */
2571 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2572 
2573 		mutex_exit(&ldcp->tx_lock);
2574 		mutex_exit(&ldcp->lock);
2575 
2576 		/* if any error other than EAGAIN return back */
2577 		if (rv != EAGAIN || retries >= LDC_MAX_RETRIES) {
2578 			cmn_err(CE_WARN,
2579 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2580 			    ldcp->id, rv);
2581 			return (rv);
2582 		}
2583 
2584 		/*
2585 		 * As there could be pending interrupts we need
2586 		 * to wait and try again
2587 		 */
2588 		drv_usecwait(LDC_DELAY);
2589 		mutex_enter(&ldcp->lock);
2590 		mutex_enter(&ldcp->tx_lock);
2591 		retries++;
2592 	}
2593 
2594 	/*
2595 	 * Unregister queues
2596 	 */
2597 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2598 	if (rv) {
2599 		cmn_err(CE_WARN,
2600 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2601 		    ldcp->id);
2602 		mutex_exit(&ldcp->tx_lock);
2603 		mutex_exit(&ldcp->lock);
2604 		return (EIO);
2605 	}
2606 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2607 	if (rv) {
2608 		cmn_err(CE_WARN,
2609 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2610 		    ldcp->id);
2611 		mutex_exit(&ldcp->tx_lock);
2612 		mutex_exit(&ldcp->lock);
2613 		return (EIO);
2614 	}
2615 
2616 	ldcp->tstate &= ~TS_QCONF_RDY;
2617 
2618 	/* Reset channel state information */
2619 	i_ldc_reset_state(ldcp);
2620 
2621 	/* Mark channel as down and in initialized state */
2622 	ldcp->tx_ackd_head = 0;
2623 	ldcp->tx_head = 0;
2624 	ldcp->tstate = TS_INIT;
2625 	ldcp->status = LDC_INIT;
2626 
2627 	mutex_exit(&ldcp->tx_lock);
2628 	mutex_exit(&ldcp->lock);
2629 
2630 	/* Decrement number of open channels */
2631 	mutex_enter(&ldcssp->lock);
2632 	ldcssp->channels_open--;
2633 	mutex_exit(&ldcssp->lock);
2634 
2635 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2636 
2637 	return (0);
2638 }
2639 
2640 /*
2641  * Register channel callback
2642  */
2643 int
2644 ldc_reg_callback(ldc_handle_t handle,
2645     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2646 {
2647 	ldc_chan_t *ldcp;
2648 
2649 	if (handle == NULL) {
2650 		DWARN(DBG_ALL_LDCS,
2651 		    "ldc_reg_callback: invalid channel handle\n");
2652 		return (EINVAL);
2653 	}
2654 	if (((uint64_t)cb) < KERNELBASE) {
2655 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2656 		return (EINVAL);
2657 	}
2658 	ldcp = (ldc_chan_t *)handle;
2659 
2660 	mutex_enter(&ldcp->lock);
2661 
2662 	if (ldcp->cb) {
2663 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2664 		    ldcp->id);
2665 		mutex_exit(&ldcp->lock);
2666 		return (EIO);
2667 	}
2668 	if (ldcp->cb_inprogress) {
2669 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2670 		    ldcp->id);
2671 		mutex_exit(&ldcp->lock);
2672 		return (EWOULDBLOCK);
2673 	}
2674 
2675 	ldcp->cb = cb;
2676 	ldcp->cb_arg = arg;
2677 	ldcp->cb_enabled = B_TRUE;
2678 
2679 	D1(ldcp->id,
2680 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2681 	    ldcp->id);
2682 
2683 	mutex_exit(&ldcp->lock);
2684 
2685 	return (0);
2686 }
2687 
2688 /*
2689  * Unregister channel callback
2690  */
2691 int
2692 ldc_unreg_callback(ldc_handle_t handle)
2693 {
2694 	ldc_chan_t *ldcp;
2695 
2696 	if (handle == NULL) {
2697 		DWARN(DBG_ALL_LDCS,
2698 		    "ldc_unreg_callback: invalid channel handle\n");
2699 		return (EINVAL);
2700 	}
2701 	ldcp = (ldc_chan_t *)handle;
2702 
2703 	mutex_enter(&ldcp->lock);
2704 
2705 	if (ldcp->cb == NULL) {
2706 		DWARN(ldcp->id,
2707 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2708 		    ldcp->id);
2709 		mutex_exit(&ldcp->lock);
2710 		return (EIO);
2711 	}
2712 	if (ldcp->cb_inprogress) {
2713 		DWARN(ldcp->id,
2714 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2715 		    ldcp->id);
2716 		mutex_exit(&ldcp->lock);
2717 		return (EWOULDBLOCK);
2718 	}
2719 
2720 	ldcp->cb = NULL;
2721 	ldcp->cb_arg = NULL;
2722 	ldcp->cb_enabled = B_FALSE;
2723 
2724 	D1(ldcp->id,
2725 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2726 	    ldcp->id);
2727 
2728 	mutex_exit(&ldcp->lock);
2729 
2730 	return (0);
2731 }
2732 
2733 
2734 /*
2735  * Bring a channel up by initiating a handshake with the peer
2736  * This call is asynchronous. It will complete at a later point
2737  * in time when the peer responds back with an RTR.
2738  */
2739 int
2740 ldc_up(ldc_handle_t handle)
2741 {
2742 	int 		rv;
2743 	ldc_chan_t 	*ldcp;
2744 	ldc_msg_t 	*ldcmsg;
2745 	uint64_t 	tx_tail;
2746 
2747 	if (handle == NULL) {
2748 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2749 		return (EINVAL);
2750 	}
2751 	ldcp = (ldc_chan_t *)handle;
2752 
2753 	mutex_enter(&ldcp->lock);
2754 
2755 	if (ldcp->tstate == TS_UP) {
2756 		D2(ldcp->id,
2757 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2758 		    ldcp->id);
2759 		mutex_exit(&ldcp->lock);
2760 		return (0);
2761 	}
2762 
2763 	/* if the channel is in RAW mode - mark it as UP, if READY */
2764 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2765 		ldcp->tstate = TS_UP;
2766 		mutex_exit(&ldcp->lock);
2767 		return (0);
2768 	}
2769 
2770 	/* Don't start another handshake if there is one in progress */
2771 	if (ldcp->hstate) {
2772 		D2(ldcp->id,
2773 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2774 		    ldcp->id);
2775 		mutex_exit(&ldcp->lock);
2776 		return (0);
2777 	}
2778 
2779 	mutex_enter(&ldcp->tx_lock);
2780 
2781 	/* get the current tail for the LDC msg */
2782 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2783 	if (rv) {
2784 		DWARN(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2785 		    ldcp->id);
2786 		mutex_exit(&ldcp->tx_lock);
2787 		mutex_exit(&ldcp->lock);
2788 		return (ECONNREFUSED);
2789 	}
2790 
2791 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
2792 	ZERO_PKT(ldcmsg);
2793 
2794 	ldcmsg->type = LDC_CTRL;
2795 	ldcmsg->stype = LDC_INFO;
2796 	ldcmsg->ctrl = LDC_VER;
2797 	ldcp->next_vidx = 0;
2798 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
2799 
2800 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
2801 
2802 	/* initiate the send by calling into HV and set the new tail */
2803 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
2804 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2805 
2806 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
2807 	if (rv) {
2808 		DWARN(ldcp->id,
2809 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
2810 		    ldcp->id, rv);
2811 		mutex_exit(&ldcp->tx_lock);
2812 		mutex_exit(&ldcp->lock);
2813 		return (rv);
2814 	}
2815 
2816 	ldcp->hstate |= TS_SENT_VER;
2817 	ldcp->tx_tail = tx_tail;
2818 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
2819 
2820 	mutex_exit(&ldcp->tx_lock);
2821 	mutex_exit(&ldcp->lock);
2822 
2823 	return (rv);
2824 }
2825 
2826 
2827 /*
2828  * Bring a channel down by resetting its state and queues
2829  */
2830 int
2831 ldc_down(ldc_handle_t handle)
2832 {
2833 	ldc_chan_t 	*ldcp;
2834 
2835 	if (handle == NULL) {
2836 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
2837 		return (EINVAL);
2838 	}
2839 	ldcp = (ldc_chan_t *)handle;
2840 
2841 	mutex_enter(&ldcp->lock);
2842 	mutex_enter(&ldcp->tx_lock);
2843 	i_ldc_reset(ldcp);
2844 	mutex_exit(&ldcp->tx_lock);
2845 	mutex_exit(&ldcp->lock);
2846 
2847 	return (0);
2848 }
2849 
2850 /*
2851  * Get the current channel status
2852  */
2853 int
2854 ldc_status(ldc_handle_t handle, ldc_status_t *status)
2855 {
2856 	ldc_chan_t *ldcp;
2857 
2858 	if (handle == NULL || status == NULL) {
2859 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
2860 		return (EINVAL);
2861 	}
2862 	ldcp = (ldc_chan_t *)handle;
2863 
2864 	*status = ((ldc_chan_t *)handle)->status;
2865 
2866 	D1(ldcp->id,
2867 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
2868 	return (0);
2869 }
2870 
2871 
2872 /*
2873  * Set the channel's callback mode - enable/disable callbacks
2874  */
2875 int
2876 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
2877 {
2878 	ldc_chan_t 	*ldcp;
2879 
2880 	if (handle == NULL) {
2881 		DWARN(DBG_ALL_LDCS,
2882 		    "ldc_set_intr_mode: invalid channel handle\n");
2883 		return (EINVAL);
2884 	}
2885 	ldcp = (ldc_chan_t *)handle;
2886 
2887 	/*
2888 	 * Record no callbacks should be invoked
2889 	 */
2890 	mutex_enter(&ldcp->lock);
2891 
2892 	switch (cmode) {
2893 	case LDC_CB_DISABLE:
2894 		if (!ldcp->cb_enabled) {
2895 			DWARN(ldcp->id,
2896 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
2897 			    ldcp->id);
2898 			break;
2899 		}
2900 		ldcp->cb_enabled = B_FALSE;
2901 
2902 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
2903 		    ldcp->id);
2904 		break;
2905 
2906 	case LDC_CB_ENABLE:
2907 		if (ldcp->cb_enabled) {
2908 			DWARN(ldcp->id,
2909 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
2910 			    ldcp->id);
2911 			break;
2912 		}
2913 		ldcp->cb_enabled = B_TRUE;
2914 
2915 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
2916 		    ldcp->id);
2917 		break;
2918 	}
2919 
2920 	mutex_exit(&ldcp->lock);
2921 
2922 	return (0);
2923 }
2924 
2925 /*
2926  * Check to see if there are packets on the incoming queue
2927  * Will return hasdata = B_FALSE if there are no packets
2928  */
2929 int
2930 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
2931 {
2932 	int 		rv;
2933 	uint64_t 	rx_head, rx_tail;
2934 	ldc_chan_t 	*ldcp;
2935 
2936 	if (handle == NULL) {
2937 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
2938 		return (EINVAL);
2939 	}
2940 	ldcp = (ldc_chan_t *)handle;
2941 
2942 	*hasdata = B_FALSE;
2943 
2944 	mutex_enter(&ldcp->lock);
2945 
2946 	if (ldcp->tstate != TS_UP) {
2947 		D1(ldcp->id,
2948 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
2949 		mutex_exit(&ldcp->lock);
2950 		return (ECONNRESET);
2951 	}
2952 
2953 	/* Read packet(s) from the queue */
2954 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2955 	    &ldcp->link_state);
2956 	if (rv != 0) {
2957 		cmn_err(CE_WARN,
2958 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
2959 		mutex_exit(&ldcp->lock);
2960 		return (EIO);
2961 	}
2962 	/* reset the channel state if the channel went down */
2963 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
2964 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2965 		mutex_enter(&ldcp->tx_lock);
2966 		i_ldc_reset(ldcp);
2967 		mutex_exit(&ldcp->tx_lock);
2968 		mutex_exit(&ldcp->lock);
2969 		return (ECONNRESET);
2970 	}
2971 
2972 	if ((rx_head != rx_tail) ||
2973 	    (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) {
2974 		D1(ldcp->id,
2975 		    "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n",
2976 		    ldcp->id);
2977 		*hasdata = B_TRUE;
2978 	}
2979 
2980 	mutex_exit(&ldcp->lock);
2981 
2982 	return (0);
2983 }
2984 
2985 
2986 /*
2987  * Read 'size' amount of bytes or less. If incoming buffer
2988  * is more than 'size', ENOBUFS is returned.
2989  *
2990  * On return, size contains the number of bytes read.
2991  */
2992 int
2993 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
2994 {
2995 	ldc_chan_t 	*ldcp;
2996 	uint64_t 	rx_head = 0, rx_tail = 0;
2997 	int		rv = 0, exit_val;
2998 
2999 	if (handle == NULL) {
3000 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3001 		return (EINVAL);
3002 	}
3003 
3004 	ldcp = (ldc_chan_t *)handle;
3005 
3006 	/* channel lock */
3007 	mutex_enter(&ldcp->lock);
3008 
3009 	if (ldcp->tstate != TS_UP) {
3010 		DWARN(ldcp->id,
3011 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3012 		    ldcp->id);
3013 		exit_val = ECONNRESET;
3014 	} else {
3015 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3016 	}
3017 
3018 	/*
3019 	 * if queue has been drained - clear interrupt
3020 	 */
3021 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3022 	    &ldcp->link_state);
3023 	if (exit_val == 0 && rv == 0 && rx_head == rx_tail) {
3024 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3025 	}
3026 
3027 	mutex_exit(&ldcp->lock);
3028 	return (exit_val);
3029 }
3030 
3031 /*
3032  * Basic raw mondo read -
3033  * no interpretation of mondo contents at all.
3034  *
3035  * Enter and exit with ldcp->lock held by caller
3036  */
3037 static int
3038 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3039 {
3040 	uint64_t 	q_size_mask;
3041 	ldc_msg_t 	*msgp;
3042 	uint8_t		*msgbufp;
3043 	int		rv = 0, space;
3044 	uint64_t 	rx_head, rx_tail;
3045 
3046 	space = *sizep;
3047 
3048 	if (space < LDC_PAYLOAD_SIZE_RAW)
3049 		return (ENOBUFS);
3050 
3051 	ASSERT(mutex_owned(&ldcp->lock));
3052 
3053 	/* compute mask for increment */
3054 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3055 
3056 	/*
3057 	 * Read packet(s) from the queue
3058 	 */
3059 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3060 	    &ldcp->link_state);
3061 	if (rv != 0) {
3062 		cmn_err(CE_WARN,
3063 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3064 		    ldcp->id);
3065 		return (EIO);
3066 	}
3067 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3068 		" rxt=0x%llx, st=0x%llx\n",
3069 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
3070 
3071 	/* reset the channel state if the channel went down */
3072 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
3073 		mutex_enter(&ldcp->tx_lock);
3074 		i_ldc_reset(ldcp);
3075 		mutex_exit(&ldcp->tx_lock);
3076 		return (ECONNRESET);
3077 	}
3078 
3079 	/*
3080 	 * Check for empty queue
3081 	 */
3082 	if (rx_head == rx_tail) {
3083 		*sizep = 0;
3084 		return (0);
3085 	}
3086 
3087 	/* get the message */
3088 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3089 
3090 	/* if channel is in RAW mode, copy data and return */
3091 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3092 
3093 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3094 
3095 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3096 
3097 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3098 
3099 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3100 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3101 
3102 	return (rv);
3103 }
3104 
3105 /*
3106  * Process LDC mondos to build larger packets
3107  * with either un-reliable or reliable delivery.
3108  *
3109  * Enter and exit with ldcp->lock held by caller
3110  */
3111 static int
3112 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3113 {
3114 	int		rv = 0;
3115 	uint64_t 	rx_head = 0, rx_tail = 0;
3116 	uint64_t 	curr_head = 0;
3117 	ldc_msg_t 	*msg;
3118 	caddr_t 	target;
3119 	size_t 		len = 0, bytes_read = 0;
3120 	int 		retries = 0;
3121 	uint64_t 	q_size_mask;
3122 	uint64_t	first_fragment = 0;
3123 
3124 	target = target_bufp;
3125 
3126 	ASSERT(mutex_owned(&ldcp->lock));
3127 
3128 	/* compute mask for increment */
3129 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3130 
3131 	/*
3132 	 * Read packet(s) from the queue
3133 	 */
3134 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3135 	    &ldcp->link_state);
3136 	if (rv != 0) {
3137 		cmn_err(CE_WARN,
3138 		    "ldc_read: (0x%lx) unable to read queue ptrs",
3139 		    ldcp->id);
3140 		return (EIO);
3141 	}
3142 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3143 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3144 
3145 	/* reset the channel state if the channel went down */
3146 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
3147 		mutex_enter(&ldcp->tx_lock);
3148 		i_ldc_reset(ldcp);
3149 		mutex_exit(&ldcp->tx_lock);
3150 		return (ECONNRESET);
3151 	}
3152 
3153 	for (;;) {
3154 
3155 		if (curr_head == rx_tail) {
3156 			rv = hv_ldc_rx_get_state(ldcp->id,
3157 			    &rx_head, &rx_tail, &ldcp->link_state);
3158 			if (rv != 0) {
3159 				cmn_err(CE_WARN,
3160 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3161 				    ldcp->id);
3162 				return (EIO);
3163 			}
3164 			/* reset the channel state if the channel went down */
3165 			if (ldcp->link_state == LDC_CHANNEL_DOWN) {
3166 				mutex_enter(&ldcp->tx_lock);
3167 				i_ldc_reset(ldcp);
3168 				mutex_exit(&ldcp->tx_lock);
3169 				return (ECONNRESET);
3170 			}
3171 		}
3172 
3173 		if (curr_head == rx_tail) {
3174 
3175 			/* If in the middle of a fragmented xfer */
3176 			if (first_fragment != 0) {
3177 
3178 				/* wait for ldc_delay usecs */
3179 				drv_usecwait(ldc_delay);
3180 
3181 				if (++retries < ldc_max_retries)
3182 					continue;
3183 
3184 				*sizep = 0;
3185 				ldcp->last_msg_rcd = first_fragment - 1;
3186 				DWARN(DBG_ALL_LDCS,
3187 					"ldc_read: (0x%llx) read timeout",
3188 					ldcp->id);
3189 				return (ETIMEDOUT);
3190 			}
3191 			*sizep = 0;
3192 			break;
3193 		}
3194 		retries = 0;
3195 
3196 		D2(ldcp->id,
3197 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3198 		    ldcp->id, curr_head, rx_head, rx_tail);
3199 
3200 		/* get the message */
3201 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3202 
3203 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3204 		    ldcp->rx_q_va + curr_head);
3205 
3206 		/* Check the message ID for the message received */
3207 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3208 
3209 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3210 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3211 
3212 			/* throw away data */
3213 			bytes_read = 0;
3214 
3215 			/* Reset last_msg_rcd to start of message */
3216 			if (first_fragment != 0) {
3217 				ldcp->last_msg_rcd = first_fragment - 1;
3218 				first_fragment = 0;
3219 			}
3220 			/*
3221 			 * Send a NACK -- invalid seqid
3222 			 * get the current tail for the response
3223 			 */
3224 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3225 			    (msg->ctrl & LDC_CTRL_MASK));
3226 			if (rv) {
3227 				cmn_err(CE_NOTE,
3228 				    "ldc_read: (0x%lx) err sending "
3229 				    "NACK msg\n", ldcp->id);
3230 
3231 				/* if cannot send NACK - reset channel */
3232 				mutex_enter(&ldcp->tx_lock);
3233 				i_ldc_reset(ldcp);
3234 				mutex_exit(&ldcp->tx_lock);
3235 				rv = ECONNRESET;
3236 				break;
3237 			}
3238 
3239 			/* purge receive queue */
3240 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3241 
3242 			break;
3243 		}
3244 
3245 		/*
3246 		 * Process any messages of type CTRL messages
3247 		 * Future implementations should try to pass these
3248 		 * to LDC link by resetting the intr state.
3249 		 *
3250 		 * NOTE: not done as a switch() as type can be both ctrl+data
3251 		 */
3252 		if (msg->type & LDC_CTRL) {
3253 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3254 				if (rv == EAGAIN)
3255 					continue;
3256 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3257 				*sizep = 0;
3258 				bytes_read = 0;
3259 				break;
3260 			}
3261 		}
3262 
3263 		/* process data ACKs */
3264 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3265 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3266 				*sizep = 0;
3267 				bytes_read = 0;
3268 				break;
3269 			}
3270 		}
3271 
3272 		/* process data messages */
3273 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3274 
3275 			uint8_t *msgbuf = (uint8_t *)(
3276 				(ldcp->mode == LDC_MODE_RELIABLE ||
3277 				ldcp->mode == LDC_MODE_STREAM)
3278 				? msg->rdata : msg->udata);
3279 
3280 			D2(ldcp->id,
3281 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3282 
3283 			/* get the packet length */
3284 			len = (msg->env & LDC_LEN_MASK);
3285 
3286 				/*
3287 				 * FUTURE OPTIMIZATION:
3288 				 * dont need to set q head for every
3289 				 * packet we read just need to do this when
3290 				 * we are done or need to wait for more
3291 				 * mondos to make a full packet - this is
3292 				 * currently expensive.
3293 				 */
3294 
3295 			if (first_fragment == 0) {
3296 
3297 				/*
3298 				 * first packets should always have the start
3299 				 * bit set (even for a single packet). If not
3300 				 * throw away the packet
3301 				 */
3302 				if (!(msg->env & LDC_FRAG_START)) {
3303 
3304 					DWARN(DBG_ALL_LDCS,
3305 					    "ldc_read: (0x%llx) not start - "
3306 					    "frag=%x\n", ldcp->id,
3307 					    (msg->env) & LDC_FRAG_MASK);
3308 
3309 					/* toss pkt, inc head, cont reading */
3310 					bytes_read = 0;
3311 					target = target_bufp;
3312 					curr_head =
3313 						(curr_head + LDC_PACKET_SIZE)
3314 						& q_size_mask;
3315 					if (rv = i_ldc_set_rx_head(ldcp,
3316 						curr_head))
3317 						break;
3318 
3319 					continue;
3320 				}
3321 
3322 				first_fragment = msg->seqid;
3323 			} else {
3324 				/* check to see if this is a pkt w/ START bit */
3325 				if (msg->env & LDC_FRAG_START) {
3326 					DWARN(DBG_ALL_LDCS,
3327 					    "ldc_read:(0x%llx) unexpected pkt"
3328 					    " env=0x%x discarding %d bytes,"
3329 					    " lastmsg=%d, currentmsg=%d\n",
3330 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3331 					    bytes_read, ldcp->last_msg_rcd,
3332 					    msg->seqid);
3333 
3334 					/* throw data we have read so far */
3335 					bytes_read = 0;
3336 					target = target_bufp;
3337 					first_fragment = msg->seqid;
3338 
3339 					if (rv = i_ldc_set_rx_head(ldcp,
3340 						curr_head))
3341 						break;
3342 				}
3343 			}
3344 
3345 			/* copy (next) pkt into buffer */
3346 			if (len <= (*sizep - bytes_read)) {
3347 				bcopy(msgbuf, target, len);
3348 				target += len;
3349 				bytes_read += len;
3350 			} else {
3351 				/*
3352 				 * there is not enough space in the buffer to
3353 				 * read this pkt. throw message away & continue
3354 				 * reading data from queue
3355 				 */
3356 				DWARN(DBG_ALL_LDCS,
3357 				    "ldc_read: (0x%llx) buffer too small, "
3358 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3359 				    curr_head, *sizep, bytes_read+len);
3360 
3361 				first_fragment = 0;
3362 				target = target_bufp;
3363 				bytes_read = 0;
3364 
3365 				/* throw away everything received so far */
3366 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3367 					break;
3368 
3369 				/* continue reading remaining pkts */
3370 				continue;
3371 			}
3372 		}
3373 
3374 		/* set the message id */
3375 		ldcp->last_msg_rcd = msg->seqid;
3376 
3377 		/* move the head one position */
3378 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3379 
3380 		if (msg->env & LDC_FRAG_STOP) {
3381 
3382 			/*
3383 			 * All pkts that are part of this fragmented transfer
3384 			 * have been read or this was a single pkt read
3385 			 * or there was an error
3386 			 */
3387 
3388 			/* set the queue head */
3389 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3390 				bytes_read = 0;
3391 
3392 			*sizep = bytes_read;
3393 
3394 			break;
3395 		}
3396 
3397 		/* advance head if it is a DATA ACK */
3398 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3399 
3400 			/* set the queue head */
3401 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3402 				bytes_read = 0;
3403 				break;
3404 			}
3405 
3406 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3407 			    ldcp->id, curr_head);
3408 		}
3409 
3410 	} /* for (;;) */
3411 
3412 
3413 	/*
3414 	 * If useful data was read - Send msg ACK
3415 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3416 	 */
3417 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3418 		ldcp->mode == LDC_MODE_STREAM)) {
3419 
3420 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3421 		if (rv) {
3422 			cmn_err(CE_NOTE,
3423 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3424 
3425 			/* if cannot send ACK - reset channel */
3426 			mutex_enter(&ldcp->tx_lock);
3427 			i_ldc_reset(ldcp);
3428 			mutex_exit(&ldcp->tx_lock);
3429 			rv = ECONNRESET;
3430 		}
3431 	}
3432 
3433 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3434 
3435 	return (rv);
3436 }
3437 
3438 /*
3439  * Use underlying reliable packet mechanism to fetch
3440  * and buffer incoming packets so we can hand them back as
3441  * a basic byte stream.
3442  *
3443  * Enter and exit with ldcp->lock held by caller
3444  */
3445 static int
3446 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3447 {
3448 	int	rv;
3449 	size_t	size;
3450 
3451 	ASSERT(mutex_owned(&ldcp->lock));
3452 
3453 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3454 		ldcp->id, *sizep);
3455 
3456 	if (ldcp->stream_remains == 0) {
3457 		size = ldcp->mtu;
3458 		rv = i_ldc_read_packet(ldcp,
3459 			(caddr_t)ldcp->stream_bufferp, &size);
3460 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3461 			ldcp->id, size);
3462 
3463 		if (rv != 0)
3464 			return (rv);
3465 
3466 		ldcp->stream_remains = size;
3467 		ldcp->stream_offset = 0;
3468 	}
3469 
3470 	size = MIN(ldcp->stream_remains, *sizep);
3471 
3472 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3473 	ldcp->stream_offset += size;
3474 	ldcp->stream_remains -= size;
3475 
3476 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3477 		ldcp->id, size);
3478 
3479 	*sizep = size;
3480 	return (0);
3481 }
3482 
3483 /*
3484  * Write specified amount of bytes to the channel
3485  * in multiple pkts of pkt_payload size. Each
3486  * packet is tagged with an unique packet ID in
3487  * the case of a reliable link.
3488  *
3489  * On return, size contains the number of bytes written.
3490  */
3491 int
3492 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3493 {
3494 	ldc_chan_t	*ldcp;
3495 	int		rv = 0;
3496 
3497 	if (handle == NULL) {
3498 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3499 		return (EINVAL);
3500 	}
3501 	ldcp = (ldc_chan_t *)handle;
3502 
3503 	/* check if writes can occur */
3504 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3505 		/*
3506 		 * Could not get the lock - channel could
3507 		 * be in the process of being unconfigured
3508 		 * or reader has encountered an error
3509 		 */
3510 		return (EAGAIN);
3511 	}
3512 
3513 	/* check if non-zero data to write */
3514 	if (buf == NULL || sizep == NULL) {
3515 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3516 		    ldcp->id);
3517 		mutex_exit(&ldcp->tx_lock);
3518 		return (EINVAL);
3519 	}
3520 
3521 	if (*sizep == 0) {
3522 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3523 		    ldcp->id);
3524 		mutex_exit(&ldcp->tx_lock);
3525 		return (0);
3526 	}
3527 
3528 	/* Check if channel is UP for data exchange */
3529 	if (ldcp->tstate != TS_UP) {
3530 		DWARN(ldcp->id,
3531 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3532 		    ldcp->id);
3533 		*sizep = 0;
3534 		rv = ECONNRESET;
3535 	} else {
3536 		rv = ldcp->write_p(ldcp, buf, sizep);
3537 	}
3538 
3539 	mutex_exit(&ldcp->tx_lock);
3540 
3541 	return (rv);
3542 }
3543 
3544 /*
3545  * Write a raw packet to the channel
3546  * On return, size contains the number of bytes written.
3547  */
3548 static int
3549 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3550 {
3551 	ldc_msg_t 	*ldcmsg;
3552 	uint64_t 	tx_head, tx_tail, new_tail;
3553 	int		rv = 0;
3554 	size_t		size;
3555 
3556 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3557 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3558 
3559 	size = *sizep;
3560 
3561 	/*
3562 	 * Check to see if the packet size is less than or
3563 	 * equal to packet size support in raw mode
3564 	 */
3565 	if (size > ldcp->pkt_payload) {
3566 		DWARN(ldcp->id,
3567 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3568 		    ldcp->id, *sizep);
3569 		*sizep = 0;
3570 		return (EMSGSIZE);
3571 	}
3572 
3573 	/* get the qptrs for the tx queue */
3574 	rv = hv_ldc_tx_get_state(ldcp->id,
3575 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3576 	if (rv != 0) {
3577 		cmn_err(CE_WARN,
3578 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3579 		*sizep = 0;
3580 		return (EIO);
3581 	}
3582 
3583 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3584 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3585 		DWARN(ldcp->id,
3586 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3587 
3588 		*sizep = 0;
3589 		if (mutex_tryenter(&ldcp->lock)) {
3590 			i_ldc_reset(ldcp);
3591 			mutex_exit(&ldcp->lock);
3592 		} else {
3593 			/*
3594 			 * Release Tx lock, and then reacquire channel
3595 			 * and Tx lock in correct order
3596 			 */
3597 			mutex_exit(&ldcp->tx_lock);
3598 			mutex_enter(&ldcp->lock);
3599 			mutex_enter(&ldcp->tx_lock);
3600 			i_ldc_reset(ldcp);
3601 			mutex_exit(&ldcp->lock);
3602 		}
3603 		return (ECONNRESET);
3604 	}
3605 
3606 	tx_tail = ldcp->tx_tail;
3607 	tx_head = ldcp->tx_head;
3608 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3609 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3610 
3611 	if (new_tail == tx_head) {
3612 		DWARN(DBG_ALL_LDCS,
3613 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3614 		*sizep = 0;
3615 		return (EWOULDBLOCK);
3616 	}
3617 
3618 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3619 	    ldcp->id, size);
3620 
3621 	/* Send the data now */
3622 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3623 
3624 	/* copy the data into pkt */
3625 	bcopy((uint8_t *)buf, ldcmsg, size);
3626 
3627 	/* increment tail */
3628 	tx_tail = new_tail;
3629 
3630 	/*
3631 	 * All packets have been copied into the TX queue
3632 	 * update the tail ptr in the HV
3633 	 */
3634 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3635 	if (rv) {
3636 		if (rv == EWOULDBLOCK) {
3637 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3638 			    ldcp->id);
3639 			*sizep = 0;
3640 			return (EWOULDBLOCK);
3641 		}
3642 
3643 		*sizep = 0;
3644 		if (mutex_tryenter(&ldcp->lock)) {
3645 			i_ldc_reset(ldcp);
3646 			mutex_exit(&ldcp->lock);
3647 		} else {
3648 			/*
3649 			 * Release Tx lock, and then reacquire channel
3650 			 * and Tx lock in correct order
3651 			 */
3652 			mutex_exit(&ldcp->tx_lock);
3653 			mutex_enter(&ldcp->lock);
3654 			mutex_enter(&ldcp->tx_lock);
3655 			i_ldc_reset(ldcp);
3656 			mutex_exit(&ldcp->lock);
3657 		}
3658 		return (ECONNRESET);
3659 	}
3660 
3661 	ldcp->tx_tail = tx_tail;
3662 	*sizep = size;
3663 
3664 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3665 
3666 	return (rv);
3667 }
3668 
3669 
3670 /*
3671  * Write specified amount of bytes to the channel
3672  * in multiple pkts of pkt_payload size. Each
3673  * packet is tagged with an unique packet ID in
3674  * the case of a reliable link.
3675  *
3676  * On return, size contains the number of bytes written.
3677  * This function needs to ensure that the write size is < MTU size
3678  */
3679 static int
3680 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3681 {
3682 	ldc_msg_t 	*ldcmsg;
3683 	uint64_t 	tx_head, tx_tail, new_tail, start;
3684 	uint64_t	txq_size_mask, numavail;
3685 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3686 	size_t 		len, bytes_written = 0, remaining;
3687 	int		rv;
3688 	uint32_t	curr_seqid;
3689 
3690 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3691 
3692 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3693 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3694 		ldcp->mode == LDC_MODE_STREAM);
3695 
3696 	/* compute mask for increment */
3697 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3698 
3699 	/* get the qptrs for the tx queue */
3700 	rv = hv_ldc_tx_get_state(ldcp->id,
3701 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3702 	if (rv != 0) {
3703 		cmn_err(CE_WARN,
3704 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3705 		*size = 0;
3706 		return (EIO);
3707 	}
3708 
3709 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3710 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3711 		DWARN(ldcp->id,
3712 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3713 		*size = 0;
3714 		if (mutex_tryenter(&ldcp->lock)) {
3715 			i_ldc_reset(ldcp);
3716 			mutex_exit(&ldcp->lock);
3717 		} else {
3718 			/*
3719 			 * Release Tx lock, and then reacquire channel
3720 			 * and Tx lock in correct order
3721 			 */
3722 			mutex_exit(&ldcp->tx_lock);
3723 			mutex_enter(&ldcp->lock);
3724 			mutex_enter(&ldcp->tx_lock);
3725 			i_ldc_reset(ldcp);
3726 			mutex_exit(&ldcp->lock);
3727 		}
3728 		return (ECONNRESET);
3729 	}
3730 
3731 	tx_tail = ldcp->tx_tail;
3732 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3733 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3734 
3735 	/*
3736 	 * Link mode determines whether we use HV Tx head or the
3737 	 * private protocol head (corresponding to last ACKd pkt) for
3738 	 * determining how much we can write
3739 	 */
3740 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3741 		ldcp->mode == LDC_MODE_STREAM)
3742 		? ldcp->tx_ackd_head : ldcp->tx_head;
3743 	if (new_tail == tx_head) {
3744 		DWARN(DBG_ALL_LDCS,
3745 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3746 		*size = 0;
3747 		return (EWOULDBLOCK);
3748 	}
3749 
3750 	/*
3751 	 * Make sure that the LDC Tx queue has enough space
3752 	 */
3753 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
3754 		+ ldcp->tx_q_entries - 1;
3755 	numavail %= ldcp->tx_q_entries;
3756 
3757 	if (*size > (numavail * ldcp->pkt_payload)) {
3758 		DWARN(DBG_ALL_LDCS,
3759 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
3760 		return (EWOULDBLOCK);
3761 	}
3762 
3763 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3764 	    ldcp->id, *size);
3765 
3766 	/* Send the data now */
3767 	bytes_written = 0;
3768 	curr_seqid = ldcp->last_msg_snt;
3769 	start = tx_tail;
3770 
3771 	while (*size > bytes_written) {
3772 
3773 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3774 
3775 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
3776 			ldcp->mode == LDC_MODE_STREAM)
3777 			? ldcmsg->rdata : ldcmsg->udata);
3778 
3779 		ldcmsg->type = LDC_DATA;
3780 		ldcmsg->stype = LDC_INFO;
3781 		ldcmsg->ctrl = 0;
3782 
3783 		remaining = *size - bytes_written;
3784 		len = min(ldcp->pkt_payload, remaining);
3785 		ldcmsg->env = (uint8_t)len;
3786 
3787 		curr_seqid++;
3788 		ldcmsg->seqid = curr_seqid;
3789 
3790 		DUMP_LDC_PKT(ldcp, "ldc_write snd data", (uint64_t)ldcmsg);
3791 
3792 		/* copy the data into pkt */
3793 		bcopy(source, msgbuf, len);
3794 
3795 		source += len;
3796 		bytes_written += len;
3797 
3798 		/* increment tail */
3799 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
3800 
3801 		ASSERT(tx_tail != tx_head);
3802 	}
3803 
3804 	/* Set the start and stop bits */
3805 	ldcmsg->env |= LDC_FRAG_STOP;
3806 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
3807 	ldcmsg->env |= LDC_FRAG_START;
3808 
3809 	/*
3810 	 * All packets have been copied into the TX queue
3811 	 * update the tail ptr in the HV
3812 	 */
3813 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3814 	if (rv == 0) {
3815 		ldcp->tx_tail = tx_tail;
3816 		ldcp->last_msg_snt = curr_seqid;
3817 		*size = bytes_written;
3818 	} else {
3819 		int rv2;
3820 
3821 		if (rv != EWOULDBLOCK) {
3822 			*size = 0;
3823 			if (mutex_tryenter(&ldcp->lock)) {
3824 				i_ldc_reset(ldcp);
3825 				mutex_exit(&ldcp->lock);
3826 			} else {
3827 				/*
3828 				 * Release Tx lock, and then reacquire channel
3829 				 * and Tx lock in correct order
3830 				 */
3831 				mutex_exit(&ldcp->tx_lock);
3832 				mutex_enter(&ldcp->lock);
3833 				mutex_enter(&ldcp->tx_lock);
3834 				i_ldc_reset(ldcp);
3835 				mutex_exit(&ldcp->lock);
3836 			}
3837 			return (ECONNRESET);
3838 		}
3839 
3840 		DWARN(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
3841 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
3842 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
3843 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
3844 
3845 		rv2 = hv_ldc_tx_get_state(ldcp->id,
3846 		    &tx_head, &tx_tail, &ldcp->link_state);
3847 
3848 		DWARN(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
3849 			"(head 0x%x, tail 0x%x state 0x%x)\n",
3850 			rv2, tx_head, tx_tail, ldcp->link_state);
3851 
3852 		*size = 0;
3853 	}
3854 
3855 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
3856 
3857 	return (rv);
3858 }
3859 
3860 /*
3861  * Write specified amount of bytes to the channel
3862  * in multiple pkts of pkt_payload size. Each
3863  * packet is tagged with an unique packet ID in
3864  * the case of a reliable link.
3865  *
3866  * On return, size contains the number of bytes written.
3867  * This function needs to ensure that the write size is < MTU size
3868  */
3869 static int
3870 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3871 {
3872 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3873 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
3874 
3875 	/* Truncate packet to max of MTU size */
3876 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
3877 	return (i_ldc_write_packet(ldcp, buf, sizep));
3878 }
3879 
3880 
3881 /*
3882  * Interfaces for channel nexus to register/unregister with LDC module
3883  * The nexus will register functions to be used to register individual
3884  * channels with the nexus and enable interrupts for the channels
3885  */
3886 int
3887 ldc_register(ldc_cnex_t *cinfo)
3888 {
3889 	ldc_chan_t	*ldcp;
3890 
3891 	if (cinfo == NULL || cinfo->dip == NULL ||
3892 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
3893 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
3894 	    cinfo->clr_intr == NULL) {
3895 
3896 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
3897 		return (EINVAL);
3898 	}
3899 
3900 	mutex_enter(&ldcssp->lock);
3901 
3902 	/* nexus registration */
3903 	ldcssp->cinfo.dip = cinfo->dip;
3904 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
3905 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
3906 	ldcssp->cinfo.add_intr = cinfo->add_intr;
3907 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
3908 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
3909 
3910 	/* register any channels that might have been previously initialized */
3911 	ldcp = ldcssp->chan_list;
3912 	while (ldcp) {
3913 		if ((ldcp->tstate & TS_QCONF_RDY) &&
3914 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
3915 			(void) i_ldc_register_channel(ldcp);
3916 
3917 		ldcp = ldcp->next;
3918 	}
3919 
3920 	mutex_exit(&ldcssp->lock);
3921 
3922 	return (0);
3923 }
3924 
3925 int
3926 ldc_unregister(ldc_cnex_t *cinfo)
3927 {
3928 	if (cinfo == NULL || cinfo->dip == NULL) {
3929 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
3930 		return (EINVAL);
3931 	}
3932 
3933 	mutex_enter(&ldcssp->lock);
3934 
3935 	if (cinfo->dip != ldcssp->cinfo.dip) {
3936 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
3937 		mutex_exit(&ldcssp->lock);
3938 		return (EINVAL);
3939 	}
3940 
3941 	/* nexus unregister */
3942 	ldcssp->cinfo.dip = NULL;
3943 	ldcssp->cinfo.reg_chan = NULL;
3944 	ldcssp->cinfo.unreg_chan = NULL;
3945 	ldcssp->cinfo.add_intr = NULL;
3946 	ldcssp->cinfo.rem_intr = NULL;
3947 	ldcssp->cinfo.clr_intr = NULL;
3948 
3949 	mutex_exit(&ldcssp->lock);
3950 
3951 	return (0);
3952 }
3953 
3954 
3955 /* ------------------------------------------------------------------------- */
3956 
3957 /*
3958  * Allocate a memory handle for the channel and link it into the list
3959  * Also choose which memory table to use if this is the first handle
3960  * being assigned to this channel
3961  */
3962 int
3963 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
3964 {
3965 	ldc_chan_t 	*ldcp;
3966 	ldc_mhdl_t	*mhdl;
3967 
3968 	if (handle == NULL) {
3969 		DWARN(DBG_ALL_LDCS,
3970 		    "ldc_mem_alloc_handle: invalid channel handle\n");
3971 		return (EINVAL);
3972 	}
3973 	ldcp = (ldc_chan_t *)handle;
3974 
3975 	mutex_enter(&ldcp->lock);
3976 
3977 	/* check to see if channel is initalized */
3978 	if (ldcp->tstate < TS_INIT) {
3979 		DWARN(ldcp->id,
3980 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
3981 		    ldcp->id);
3982 		mutex_exit(&ldcp->lock);
3983 		return (EINVAL);
3984 	}
3985 
3986 	/* allocate handle for channel */
3987 	mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP);
3988 
3989 	/* initialize the lock */
3990 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
3991 
3992 	mhdl->myshadow = B_FALSE;
3993 	mhdl->memseg = NULL;
3994 	mhdl->ldcp = ldcp;
3995 	mhdl->status = LDC_UNBOUND;
3996 
3997 	/* insert memory handle (@ head) into list */
3998 	if (ldcp->mhdl_list == NULL) {
3999 		ldcp->mhdl_list = mhdl;
4000 		mhdl->next = NULL;
4001 	} else {
4002 		/* insert @ head */
4003 		mhdl->next = ldcp->mhdl_list;
4004 		ldcp->mhdl_list = mhdl;
4005 	}
4006 
4007 	/* return the handle */
4008 	*mhandle = (ldc_mem_handle_t)mhdl;
4009 
4010 	mutex_exit(&ldcp->lock);
4011 
4012 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4013 	    ldcp->id, mhdl);
4014 
4015 	return (0);
4016 }
4017 
4018 /*
4019  * Free memory handle for the channel and unlink it from the list
4020  */
4021 int
4022 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4023 {
4024 	ldc_mhdl_t 	*mhdl, *phdl;
4025 	ldc_chan_t 	*ldcp;
4026 
4027 	if (mhandle == NULL) {
4028 		DWARN(DBG_ALL_LDCS,
4029 		    "ldc_mem_free_handle: invalid memory handle\n");
4030 		return (EINVAL);
4031 	}
4032 	mhdl = (ldc_mhdl_t *)mhandle;
4033 
4034 	mutex_enter(&mhdl->lock);
4035 
4036 	ldcp = mhdl->ldcp;
4037 
4038 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4039 		DWARN(ldcp->id,
4040 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4041 		    mhdl);
4042 		mutex_exit(&mhdl->lock);
4043 		return (EINVAL);
4044 	}
4045 	mutex_exit(&mhdl->lock);
4046 
4047 	mutex_enter(&ldcp->mlist_lock);
4048 
4049 	phdl = ldcp->mhdl_list;
4050 
4051 	/* first handle */
4052 	if (phdl == mhdl) {
4053 		ldcp->mhdl_list = mhdl->next;
4054 		mutex_destroy(&mhdl->lock);
4055 		kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4056 
4057 		D1(ldcp->id,
4058 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4059 		    ldcp->id, mhdl);
4060 	} else {
4061 		/* walk the list - unlink and free */
4062 		while (phdl != NULL) {
4063 			if (phdl->next == mhdl) {
4064 				phdl->next = mhdl->next;
4065 				mutex_destroy(&mhdl->lock);
4066 				kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4067 				D1(ldcp->id,
4068 				    "ldc_mem_free_handle: (0x%llx) freed "
4069 				    "handle 0x%llx\n", ldcp->id, mhdl);
4070 				break;
4071 			}
4072 			phdl = phdl->next;
4073 		}
4074 	}
4075 
4076 	if (phdl == NULL) {
4077 		DWARN(ldcp->id,
4078 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4079 		mutex_exit(&ldcp->mlist_lock);
4080 		return (EINVAL);
4081 	}
4082 
4083 	mutex_exit(&ldcp->mlist_lock);
4084 
4085 	return (0);
4086 }
4087 
4088 /*
4089  * Bind a memory handle to a virtual address.
4090  * The virtual address is converted to the corresponding real addresses.
4091  * Returns pointer to the first ldc_mem_cookie and the total number
4092  * of cookies for this virtual address. Other cookies can be obtained
4093  * using the ldc_mem_nextcookie() call. If the pages are stored in
4094  * consecutive locations in the table, a single cookie corresponding to
4095  * the first location is returned. The cookie size spans all the entries.
4096  *
4097  * If the VA corresponds to a page that is already being exported, reuse
4098  * the page and do not export it again. Bump the page's use count.
4099  */
4100 int
4101 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4102     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4103 {
4104 	ldc_mhdl_t	*mhdl;
4105 	ldc_chan_t 	*ldcp;
4106 	ldc_mtbl_t	*mtbl;
4107 	ldc_memseg_t	*memseg;
4108 	ldc_mte_t	tmp_mte;
4109 	uint64_t	index, prev_index = 0;
4110 	int64_t		cookie_idx;
4111 	uintptr_t	raddr, ra_aligned;
4112 	uint64_t	psize, poffset, v_offset;
4113 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4114 	pgcnt_t		npages;
4115 	caddr_t		v_align, addr;
4116 	int 		i, rv;
4117 
4118 	if (mhandle == NULL) {
4119 		DWARN(DBG_ALL_LDCS,
4120 		    "ldc_mem_bind_handle: invalid memory handle\n");
4121 		return (EINVAL);
4122 	}
4123 	mhdl = (ldc_mhdl_t *)mhandle;
4124 	ldcp = mhdl->ldcp;
4125 
4126 	/* clear count */
4127 	*ccount = 0;
4128 
4129 	mutex_enter(&mhdl->lock);
4130 
4131 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4132 		DWARN(ldcp->id,
4133 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4134 		    mhandle);
4135 		mutex_exit(&mhdl->lock);
4136 		return (EINVAL);
4137 	}
4138 
4139 	/* Force address and size to be 8-byte aligned */
4140 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4141 		DWARN(ldcp->id,
4142 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4143 		mutex_exit(&mhdl->lock);
4144 		return (EINVAL);
4145 	}
4146 
4147 	/*
4148 	 * If this channel is binding a memory handle for the
4149 	 * first time allocate it a memory map table and initialize it
4150 	 */
4151 	if ((mtbl = ldcp->mtbl) == NULL) {
4152 
4153 		mutex_enter(&ldcp->lock);
4154 
4155 		/* Allocate and initialize the map table structure */
4156 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
4157 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
4158 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
4159 		mtbl->next_entry = NULL;
4160 
4161 		/* Allocate the table itself */
4162 		mtbl->table = (ldc_mte_slot_t *)
4163 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
4164 		if (mtbl->table == NULL) {
4165 			cmn_err(CE_WARN,
4166 			    "ldc_mem_bind_handle: (0x%lx) error allocating "
4167 			    "table memory", ldcp->id);
4168 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4169 			mutex_exit(&ldcp->lock);
4170 			mutex_exit(&mhdl->lock);
4171 			return (ENOMEM);
4172 		}
4173 
4174 		/* zero out the memory */
4175 		bzero(mtbl->table, mtbl->size);
4176 
4177 		/* initialize the lock */
4178 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
4179 
4180 		/* register table for this channel */
4181 		rv = hv_ldc_set_map_table(ldcp->id,
4182 		    va_to_pa(mtbl->table), mtbl->num_entries);
4183 		if (rv != 0) {
4184 			cmn_err(CE_WARN,
4185 			    "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
4186 			    ldcp->id, rv);
4187 			contig_mem_free(mtbl->table, mtbl->size);
4188 			mutex_destroy(&mtbl->lock);
4189 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4190 			mutex_exit(&ldcp->lock);
4191 			mutex_exit(&mhdl->lock);
4192 			return (EIO);
4193 		}
4194 
4195 		ldcp->mtbl = mtbl;
4196 		mutex_exit(&ldcp->lock);
4197 
4198 		D1(ldcp->id,
4199 		    "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
4200 		    ldcp->id, ldcp->mtbl->table);
4201 	}
4202 
4203 	/* FUTURE: get the page size, pgsz code, and shift */
4204 	pg_size = MMU_PAGESIZE;
4205 	pg_size_code = page_szc(pg_size);
4206 	pg_shift = page_get_shift(pg_size_code);
4207 	pg_mask = ~(pg_size - 1);
4208 
4209 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4210 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4211 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4212 
4213 	/* aligned VA and its offset */
4214 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4215 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4216 
4217 	npages = (len+v_offset)/pg_size;
4218 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4219 
4220 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4221 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4222 	    ldcp->id, vaddr, v_align, v_offset, npages);
4223 
4224 	/* lock the memory table - exclusive access to channel */
4225 	mutex_enter(&mtbl->lock);
4226 
4227 	if (npages > mtbl->num_avail) {
4228 		DWARN(ldcp->id,
4229 		    "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4230 		    ldcp->id);
4231 		mutex_exit(&mtbl->lock);
4232 		mutex_exit(&mhdl->lock);
4233 		return (ENOMEM);
4234 	}
4235 
4236 	/* Allocate a memseg structure */
4237 	memseg = mhdl->memseg =
4238 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
4239 
4240 	/* Allocate memory to store all pages and cookies */
4241 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4242 	memseg->cookies =
4243 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4244 
4245 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4246 	    ldcp->id, npages);
4247 
4248 	addr = v_align;
4249 
4250 	/*
4251 	 * Check if direct shared memory map is enabled, if not change
4252 	 * the mapping type to include SHADOW_MAP.
4253 	 */
4254 	if (ldc_shmem_enabled == 0)
4255 		mtype = LDC_SHADOW_MAP;
4256 
4257 	/*
4258 	 * Table slots are used in a round-robin manner. The algorithm permits
4259 	 * inserting duplicate entries. Slots allocated earlier will typically
4260 	 * get freed before we get back to reusing the slot.Inserting duplicate
4261 	 * entries should be OK as we only lookup entries using the cookie addr
4262 	 * i.e. tbl index, during export, unexport and copy operation.
4263 	 *
4264 	 * One implementation what was tried was to search for a duplicate
4265 	 * page entry first and reuse it. The search overhead is very high and
4266 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4267 	 * So it does make sense to avoid searching for duplicates.
4268 	 *
4269 	 * But during the process of searching for a free slot, if we find a
4270 	 * duplicate entry we will go ahead and use it, and bump its use count.
4271 	 */
4272 
4273 	/* index to start searching from */
4274 	index = mtbl->next_entry;
4275 	cookie_idx = -1;
4276 
4277 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4278 
4279 	if (mtype & LDC_DIRECT_MAP) {
4280 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4281 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4282 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4283 	}
4284 
4285 	if (mtype & LDC_SHADOW_MAP) {
4286 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4287 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4288 	}
4289 
4290 	if (mtype & LDC_IO_MAP) {
4291 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4292 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4293 	}
4294 
4295 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4296 
4297 	tmp_mte.mte_pgszc = pg_size_code;
4298 
4299 	/* initialize each mem table entry */
4300 	for (i = 0; i < npages; i++) {
4301 
4302 		/* check if slot is available in the table */
4303 		while (mtbl->table[index].entry.ll != 0) {
4304 
4305 			index = (index + 1) % mtbl->num_entries;
4306 
4307 			if (index == mtbl->next_entry) {
4308 				/* we have looped around */
4309 				DWARN(DBG_ALL_LDCS,
4310 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4311 				    "entry\n", ldcp->id);
4312 				*ccount = 0;
4313 
4314 				/* NOTE: free memory, remove previous entries */
4315 				/* this shouldnt happen as num_avail was ok */
4316 
4317 				mutex_exit(&mtbl->lock);
4318 				mutex_exit(&mhdl->lock);
4319 				return (ENOMEM);
4320 			}
4321 		}
4322 
4323 		/* get the real address */
4324 		raddr = va_to_pa((void *)addr);
4325 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4326 
4327 		/* build the mte */
4328 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4329 
4330 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4331 
4332 		/* update entry in table */
4333 		mtbl->table[index].entry = tmp_mte;
4334 
4335 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4336 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4337 
4338 		/* calculate the size and offset for this export range */
4339 		if (i == 0) {
4340 			/* first page */
4341 			psize = min((pg_size - v_offset), len);
4342 			poffset = v_offset;
4343 
4344 		} else if (i == (npages - 1)) {
4345 			/* last page */
4346 			psize =	(((uintptr_t)(vaddr + len)) &
4347 				    ((uint64_t)(pg_size-1)));
4348 			if (psize == 0)
4349 				psize = pg_size;
4350 			poffset = 0;
4351 
4352 		} else {
4353 			/* middle pages */
4354 			psize = pg_size;
4355 			poffset = 0;
4356 		}
4357 
4358 		/* store entry for this page */
4359 		memseg->pages[i].index = index;
4360 		memseg->pages[i].raddr = raddr;
4361 		memseg->pages[i].offset = poffset;
4362 		memseg->pages[i].size = psize;
4363 		memseg->pages[i].mte = &(mtbl->table[index]);
4364 
4365 		/* create the cookie */
4366 		if (i == 0 || (index != prev_index + 1)) {
4367 			cookie_idx++;
4368 			memseg->cookies[cookie_idx].addr =
4369 				IDX2COOKIE(index, pg_size_code, pg_shift);
4370 			memseg->cookies[cookie_idx].addr |= poffset;
4371 			memseg->cookies[cookie_idx].size = psize;
4372 
4373 		} else {
4374 			memseg->cookies[cookie_idx].size += psize;
4375 		}
4376 
4377 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4378 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4379 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4380 		    ldcp->id, addr, index, raddr, psize, poffset);
4381 
4382 		/* decrement number of available entries */
4383 		mtbl->num_avail--;
4384 
4385 		/* increment va by page size */
4386 		addr += pg_size;
4387 
4388 		/* increment index */
4389 		prev_index = index;
4390 		index = (index + 1) % mtbl->num_entries;
4391 
4392 		/* save the next slot */
4393 		mtbl->next_entry = index;
4394 	}
4395 
4396 	mutex_exit(&mtbl->lock);
4397 
4398 	/* memory handle = bound */
4399 	mhdl->mtype = mtype;
4400 	mhdl->perm = perm;
4401 	mhdl->status = LDC_BOUND;
4402 
4403 	/* update memseg_t */
4404 	memseg->vaddr = vaddr;
4405 	memseg->raddr = memseg->pages[0].raddr;
4406 	memseg->size = len;
4407 	memseg->npages = npages;
4408 	memseg->ncookies = cookie_idx + 1;
4409 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4410 
4411 	/* return count and first cookie */
4412 	*ccount = memseg->ncookies;
4413 	cookie->addr = memseg->cookies[0].addr;
4414 	cookie->size = memseg->cookies[0].size;
4415 
4416 	D1(ldcp->id,
4417 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4418 	    "pgs=0x%llx cookies=0x%llx\n",
4419 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4420 
4421 	mutex_exit(&mhdl->lock);
4422 	return (0);
4423 }
4424 
4425 /*
4426  * Return the next cookie associated with the specified memory handle
4427  */
4428 int
4429 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4430 {
4431 	ldc_mhdl_t	*mhdl;
4432 	ldc_chan_t 	*ldcp;
4433 	ldc_memseg_t	*memseg;
4434 
4435 	if (mhandle == NULL) {
4436 		DWARN(DBG_ALL_LDCS,
4437 		    "ldc_mem_nextcookie: invalid memory handle\n");
4438 		return (EINVAL);
4439 	}
4440 	mhdl = (ldc_mhdl_t *)mhandle;
4441 
4442 	mutex_enter(&mhdl->lock);
4443 
4444 	ldcp = mhdl->ldcp;
4445 	memseg = mhdl->memseg;
4446 
4447 	if (cookie == 0) {
4448 		DWARN(ldcp->id,
4449 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4450 		    ldcp->id);
4451 		mutex_exit(&mhdl->lock);
4452 		return (EINVAL);
4453 	}
4454 
4455 	if (memseg->next_cookie != 0) {
4456 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4457 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4458 		memseg->next_cookie++;
4459 		if (memseg->next_cookie == memseg->ncookies)
4460 			memseg->next_cookie = 0;
4461 
4462 	} else {
4463 		DWARN(ldcp->id,
4464 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4465 		cookie->addr = 0;
4466 		cookie->size = 0;
4467 		mutex_exit(&mhdl->lock);
4468 		return (EINVAL);
4469 	}
4470 
4471 	D1(ldcp->id,
4472 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4473 	    ldcp->id, cookie->addr, cookie->size);
4474 
4475 	mutex_exit(&mhdl->lock);
4476 	return (0);
4477 }
4478 
4479 /*
4480  * Unbind the virtual memory region associated with the specified
4481  * memory handle. Allassociated cookies are freed and the corresponding
4482  * RA space is no longer exported.
4483  */
4484 int
4485 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4486 {
4487 	ldc_mhdl_t	*mhdl;
4488 	ldc_chan_t 	*ldcp;
4489 	ldc_mtbl_t	*mtbl;
4490 	ldc_memseg_t	*memseg;
4491 	uint64_t	cookie_addr;
4492 	uint64_t	pg_shift, pg_size_code;
4493 	int		i, rv;
4494 
4495 	if (mhandle == NULL) {
4496 		DWARN(DBG_ALL_LDCS,
4497 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4498 		return (EINVAL);
4499 	}
4500 	mhdl = (ldc_mhdl_t *)mhandle;
4501 
4502 	mutex_enter(&mhdl->lock);
4503 
4504 	if (mhdl->status == LDC_UNBOUND) {
4505 		DWARN(DBG_ALL_LDCS,
4506 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4507 		    mhandle);
4508 		mutex_exit(&mhdl->lock);
4509 		return (EINVAL);
4510 	}
4511 
4512 	ldcp = mhdl->ldcp;
4513 	mtbl = ldcp->mtbl;
4514 
4515 	memseg = mhdl->memseg;
4516 
4517 	/* lock the memory table - exclusive access to channel */
4518 	mutex_enter(&mtbl->lock);
4519 
4520 	/* undo the pages exported */
4521 	for (i = 0; i < memseg->npages; i++) {
4522 
4523 		/* check for mapped pages, revocation cookie != 0 */
4524 		if (memseg->pages[i].mte->cookie) {
4525 
4526 			pg_size_code = page_szc(memseg->pages[i].size);
4527 			pg_shift = page_get_shift(memseg->pages[i].size);
4528 			cookie_addr = IDX2COOKIE(memseg->pages[i].index,
4529 			    pg_size_code, pg_shift);
4530 
4531 			D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke "
4532 			    "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id,
4533 			    cookie_addr, memseg->pages[i].mte->cookie);
4534 			rv = hv_ldc_revoke(ldcp->id, cookie_addr,
4535 			    memseg->pages[i].mte->cookie);
4536 			if (rv) {
4537 				DWARN(ldcp->id,
4538 				    "ldc_mem_unbind_handle: (0x%llx) cannot "
4539 				    "revoke mapping, cookie %llx\n", ldcp->id,
4540 				    cookie_addr);
4541 			}
4542 		}
4543 
4544 		/* clear the entry from the table */
4545 		memseg->pages[i].mte->entry.ll = 0;
4546 		mtbl->num_avail++;
4547 	}
4548 	mutex_exit(&mtbl->lock);
4549 
4550 	/* free the allocated memseg and page structures */
4551 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4552 	kmem_free(memseg->cookies,
4553 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4554 	kmem_cache_free(ldcssp->memseg_cache, memseg);
4555 
4556 	/* uninitialize the memory handle */
4557 	mhdl->memseg = NULL;
4558 	mhdl->status = LDC_UNBOUND;
4559 
4560 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4561 	    ldcp->id, mhdl);
4562 
4563 	mutex_exit(&mhdl->lock);
4564 	return (0);
4565 }
4566 
4567 /*
4568  * Get information about the dring. The base address of the descriptor
4569  * ring along with the type and permission are returned back.
4570  */
4571 int
4572 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4573 {
4574 	ldc_mhdl_t	*mhdl;
4575 
4576 	if (mhandle == NULL) {
4577 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4578 		return (EINVAL);
4579 	}
4580 	mhdl = (ldc_mhdl_t *)mhandle;
4581 
4582 	if (minfo == NULL) {
4583 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4584 		return (EINVAL);
4585 	}
4586 
4587 	mutex_enter(&mhdl->lock);
4588 
4589 	minfo->status = mhdl->status;
4590 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4591 		minfo->vaddr = mhdl->memseg->vaddr;
4592 		minfo->raddr = mhdl->memseg->raddr;
4593 		minfo->mtype = mhdl->mtype;
4594 		minfo->perm = mhdl->perm;
4595 	}
4596 	mutex_exit(&mhdl->lock);
4597 
4598 	return (0);
4599 }
4600 
4601 /*
4602  * Copy data either from or to the client specified virtual address
4603  * space to or from the exported memory associated with the cookies.
4604  * The direction argument determines whether the data is read from or
4605  * written to exported memory.
4606  */
4607 int
4608 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4609     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4610 {
4611 	ldc_chan_t 	*ldcp;
4612 	uint64_t	local_voff, local_valign;
4613 	uint64_t	cookie_addr, cookie_size;
4614 	uint64_t	pg_shift, pg_size, pg_size_code;
4615 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4616 	uint64_t	local_ra, local_poff, local_psize;
4617 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4618 	pgcnt_t		npages;
4619 	size_t		len = *size;
4620 	int 		i, rv = 0;
4621 
4622 	if (handle == NULL) {
4623 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4624 		return (EINVAL);
4625 	}
4626 	ldcp = (ldc_chan_t *)handle;
4627 
4628 	mutex_enter(&ldcp->lock);
4629 
4630 	/* check to see if channel is UP */
4631 	if (ldcp->tstate != TS_UP) {
4632 		DWARN(ldcp->id, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4633 		    ldcp->id);
4634 		mutex_exit(&ldcp->lock);
4635 		return (EINVAL);
4636 	}
4637 
4638 	/* Force address and size to be 8-byte aligned */
4639 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4640 		DWARN(ldcp->id,
4641 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4642 		mutex_exit(&ldcp->lock);
4643 		return (EINVAL);
4644 	}
4645 
4646 	/* Find the size of the exported memory */
4647 	export_size = 0;
4648 	for (i = 0; i < ccount; i++)
4649 		export_size += cookies[i].size;
4650 
4651 	/* check to see if offset is valid */
4652 	if (off > export_size) {
4653 		DWARN(ldcp->id,
4654 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4655 		    ldcp->id);
4656 		mutex_exit(&ldcp->lock);
4657 		return (EINVAL);
4658 	}
4659 
4660 	/*
4661 	 * Check to see if the export size is smaller than the size we
4662 	 * are requesting to copy - if so flag an error
4663 	 */
4664 	if ((export_size - off) < *size) {
4665 		DWARN(ldcp->id,
4666 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4667 		    ldcp->id);
4668 		mutex_exit(&ldcp->lock);
4669 		return (EINVAL);
4670 	}
4671 
4672 	total_bal = min(export_size, *size);
4673 
4674 	/* FUTURE: get the page size, pgsz code, and shift */
4675 	pg_size = MMU_PAGESIZE;
4676 	pg_size_code = page_szc(pg_size);
4677 	pg_shift = page_get_shift(pg_size_code);
4678 
4679 	D1(ldcp->id, "ldc_mem_copy: copying data "
4680 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4681 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4682 
4683 	/* aligned VA and its offset */
4684 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4685 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4686 
4687 	npages = (len+local_voff)/pg_size;
4688 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4689 
4690 	D1(ldcp->id,
4691 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4692 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4693 
4694 	local_ra = va_to_pa((void *)local_valign);
4695 	local_poff = local_voff;
4696 	local_psize = min(len, (pg_size - local_voff));
4697 
4698 	len -= local_psize;
4699 
4700 	/*
4701 	 * find the first cookie in the list of cookies
4702 	 * if the offset passed in is not zero
4703 	 */
4704 	for (idx = 0; idx < ccount; idx++) {
4705 		cookie_size = cookies[idx].size;
4706 		if (off < cookie_size)
4707 			break;
4708 		off -= cookie_size;
4709 	}
4710 
4711 	cookie_addr = cookies[idx].addr + off;
4712 	cookie_size = cookies[idx].size - off;
4713 
4714 	export_caddr = cookie_addr & ~(pg_size - 1);
4715 	export_poff = cookie_addr & (pg_size - 1);
4716 	export_psize = min(cookie_size, (pg_size - export_poff));
4717 
4718 	for (;;) {
4719 
4720 		copy_size = min(export_psize, local_psize);
4721 
4722 		D1(ldcp->id,
4723 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4724 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4725 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4726 		    " total_bal=0x%llx\n",
4727 		    ldcp->id, direction, export_caddr, local_ra, export_poff,
4728 		    local_poff, export_psize, local_psize, copy_size,
4729 		    total_bal);
4730 
4731 		rv = hv_ldc_copy(ldcp->id, direction,
4732 		    (export_caddr + export_poff), (local_ra + local_poff),
4733 		    copy_size, &copied_len);
4734 
4735 		if (rv != 0) {
4736 			cmn_err(CE_WARN,
4737 			    "ldc_mem_copy: (0x%lx) err %d during copy\n",
4738 			    ldcp->id, rv);
4739 			DWARN(ldcp->id,
4740 			    "ldc_mem_copy: (0x%lx) dir=0x%x, caddr=0x%lx, "
4741 			    "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
4742 			    " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
4743 			    " copied_len=0x%lx, total_bal=0x%lx\n",
4744 			    ldcp->id, direction, export_caddr, local_ra,
4745 			    export_poff, local_poff, export_psize, local_psize,
4746 			    copy_size, copied_len, total_bal);
4747 
4748 			*size = *size - total_bal;
4749 			mutex_exit(&ldcp->lock);
4750 			return (EIO);
4751 		}
4752 
4753 		ASSERT(copied_len <= copy_size);
4754 
4755 		D2(ldcp->id, "ldc_mem_copy: copied=0x%llx\n", copied_len);
4756 		export_poff += copied_len;
4757 		local_poff += copied_len;
4758 		export_psize -= copied_len;
4759 		local_psize -= copied_len;
4760 		cookie_size -= copied_len;
4761 
4762 		total_bal -= copied_len;
4763 
4764 		if (copy_size != copied_len)
4765 			continue;
4766 
4767 		if (export_psize == 0 && total_bal != 0) {
4768 
4769 			if (cookie_size == 0) {
4770 				idx++;
4771 				cookie_addr = cookies[idx].addr;
4772 				cookie_size = cookies[idx].size;
4773 
4774 				export_caddr = cookie_addr & ~(pg_size - 1);
4775 				export_poff = cookie_addr & (pg_size - 1);
4776 				export_psize =
4777 					min(cookie_size, (pg_size-export_poff));
4778 			} else {
4779 				export_caddr += pg_size;
4780 				export_poff = 0;
4781 				export_psize = min(cookie_size, pg_size);
4782 			}
4783 		}
4784 
4785 		if (local_psize == 0 && total_bal != 0) {
4786 			local_valign += pg_size;
4787 			local_ra = va_to_pa((void *)local_valign);
4788 			local_poff = 0;
4789 			local_psize = min(pg_size, len);
4790 			len -= local_psize;
4791 		}
4792 
4793 		/* check if we are all done */
4794 		if (total_bal == 0)
4795 			break;
4796 	}
4797 
4798 	mutex_exit(&ldcp->lock);
4799 
4800 	D1(ldcp->id,
4801 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
4802 	    ldcp->id, *size);
4803 
4804 	return (0);
4805 }
4806 
4807 /*
4808  * Copy data either from or to the client specified virtual address
4809  * space to or from HV physical memory.
4810  *
4811  * The direction argument determines whether the data is read from or
4812  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
4813  * to the ldc_mem_copy interface
4814  */
4815 int
4816 ldc_mem_rdwr_pa(ldc_handle_t handle, caddr_t vaddr, size_t *size,
4817     caddr_t paddr, uint8_t direction)
4818 {
4819 	ldc_chan_t 	*ldcp;
4820 	uint64_t	local_voff, local_valign;
4821 	uint64_t	pg_shift, pg_size, pg_size_code;
4822 	uint64_t 	target_pa, target_poff, target_psize, target_size;
4823 	uint64_t	local_ra, local_poff, local_psize;
4824 	uint64_t	copy_size, copied_len = 0;
4825 	pgcnt_t		npages;
4826 	size_t		len = *size;
4827 	int 		rv = 0;
4828 
4829 	if (handle == NULL) {
4830 		DWARN(DBG_ALL_LDCS,
4831 		    "ldc_mem_rdwr_pa: invalid channel handle\n");
4832 		return (EINVAL);
4833 	}
4834 	ldcp = (ldc_chan_t *)handle;
4835 
4836 	mutex_enter(&ldcp->lock);
4837 
4838 	/* check to see if channel is UP */
4839 	if (ldcp->tstate != TS_UP) {
4840 		DWARN(ldcp->id,
4841 		    "ldc_mem_rdwr_pa: (0x%llx) channel is not UP\n",
4842 		    ldcp->id);
4843 		mutex_exit(&ldcp->lock);
4844 		return (EINVAL);
4845 	}
4846 
4847 	/* Force address and size to be 8-byte aligned */
4848 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4849 		DWARN(ldcp->id,
4850 		    "ldc_mem_rdwr_pa: addr/size is not 8-byte aligned\n");
4851 		mutex_exit(&ldcp->lock);
4852 		return (EINVAL);
4853 	}
4854 
4855 	target_size = *size;
4856 
4857 	/* FUTURE: get the page size, pgsz code, and shift */
4858 	pg_size = MMU_PAGESIZE;
4859 	pg_size_code = page_szc(pg_size);
4860 	pg_shift = page_get_shift(pg_size_code);
4861 
4862 	D1(ldcp->id, "ldc_mem_rdwr_pa: copying data "
4863 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4864 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4865 
4866 	/* aligned VA and its offset */
4867 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
4868 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4869 
4870 	npages = (len + local_voff) / pg_size;
4871 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
4872 
4873 	D1(ldcp->id,
4874 	    "ldc_mem_rdwr_pa: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4875 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4876 
4877 	local_ra = va_to_pa((void *)local_valign);
4878 	local_poff = local_voff;
4879 	local_psize = min(len, (pg_size - local_voff));
4880 
4881 	len -= local_psize;
4882 
4883 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
4884 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
4885 	target_psize = pg_size - target_poff;
4886 
4887 	for (;;) {
4888 
4889 		copy_size = min(target_psize, local_psize);
4890 
4891 		D1(ldcp->id,
4892 		    "ldc_mem_rdwr_pa: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
4893 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4894 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4895 		    " total_bal=0x%llx\n",
4896 		    ldcp->id, direction, target_pa, local_ra, target_poff,
4897 		    local_poff, target_psize, local_psize, copy_size,
4898 		    target_size);
4899 
4900 		rv = hv_ldc_copy(ldcp->id, direction,
4901 		    (target_pa + target_poff), (local_ra + local_poff),
4902 		    copy_size, &copied_len);
4903 
4904 		if (rv != 0) {
4905 			cmn_err(CE_WARN,
4906 			    "ldc_mem_rdwr_pa: (0x%lx) err %d during copy\n",
4907 			    ldcp->id, rv);
4908 			DWARN(DBG_ALL_LDCS,
4909 			    "ldc_mem_rdwr_pa: (0x%llx) dir=%lld,tar_pa=0x%llx, "
4910 			    "loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4911 			    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4912 			    " total_bal=0x%llx\n",
4913 			    ldcp->id, direction, target_pa, local_ra,
4914 			    target_poff, local_poff, target_psize, local_psize,
4915 			    copy_size, target_size);
4916 
4917 			*size = *size - target_size;
4918 			mutex_exit(&ldcp->lock);
4919 			return (i_ldc_h2v_error(rv));
4920 		}
4921 
4922 		D2(ldcp->id, "ldc_mem_rdwr_pa: copied=0x%llx\n", copied_len);
4923 		target_poff += copied_len;
4924 		local_poff += copied_len;
4925 		target_psize -= copied_len;
4926 		local_psize -= copied_len;
4927 
4928 		target_size -= copied_len;
4929 
4930 		if (copy_size != copied_len)
4931 			continue;
4932 
4933 		if (target_psize == 0 && target_size != 0) {
4934 			target_pa += pg_size;
4935 			target_poff = 0;
4936 			target_psize = min(pg_size, target_size);
4937 		}
4938 
4939 		if (local_psize == 0 && target_size != 0) {
4940 			local_valign += pg_size;
4941 			local_ra = va_to_pa((void *)local_valign);
4942 			local_poff = 0;
4943 			local_psize = min(pg_size, len);
4944 			len -= local_psize;
4945 		}
4946 
4947 		/* check if we are all done */
4948 		if (target_size == 0)
4949 			break;
4950 	}
4951 
4952 	mutex_exit(&ldcp->lock);
4953 
4954 	D1(ldcp->id, "ldc_mem_rdwr_pa: (0x%llx) done copying sz=0x%llx\n",
4955 	    ldcp->id, *size);
4956 
4957 	return (0);
4958 }
4959 
4960 /*
4961  * Map an exported memory segment into the local address space. If the
4962  * memory range was exported for direct map access, a HV call is made
4963  * to allocate a RA range. If the map is done via a shadow copy, local
4964  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
4965  * the mapping is a direct map then the RA is returned in 'raddr'.
4966  */
4967 int
4968 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
4969     uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr)
4970 {
4971 	int		i, j, idx, rv, retries;
4972 	ldc_chan_t 	*ldcp;
4973 	ldc_mhdl_t	*mhdl;
4974 	ldc_memseg_t	*memseg;
4975 	caddr_t		tmpaddr;
4976 	uint64_t	map_perm = perm;
4977 	uint64_t	pg_size, pg_shift, pg_size_code, pg_mask;
4978 	uint64_t	exp_size = 0, base_off, map_size, npages;
4979 	uint64_t	cookie_addr, cookie_off, cookie_size;
4980 	tte_t		ldc_tte;
4981 
4982 	if (mhandle == NULL) {
4983 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
4984 		return (EINVAL);
4985 	}
4986 	mhdl = (ldc_mhdl_t *)mhandle;
4987 
4988 	mutex_enter(&mhdl->lock);
4989 
4990 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
4991 	    mhdl->memseg != NULL) {
4992 		DWARN(DBG_ALL_LDCS,
4993 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
4994 		mutex_exit(&mhdl->lock);
4995 		return (EINVAL);
4996 	}
4997 
4998 	ldcp = mhdl->ldcp;
4999 
5000 	mutex_enter(&ldcp->lock);
5001 
5002 	if (ldcp->tstate != TS_UP) {
5003 		DWARN(ldcp->id,
5004 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
5005 		    ldcp->id);
5006 		mutex_exit(&ldcp->lock);
5007 		mutex_exit(&mhdl->lock);
5008 		return (EINVAL);
5009 	}
5010 
5011 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5012 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
5013 		mutex_exit(&ldcp->lock);
5014 		mutex_exit(&mhdl->lock);
5015 		return (EINVAL);
5016 	}
5017 
5018 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
5019 	    ldcp->id, cookie->addr, cookie->size);
5020 
5021 	/* FUTURE: get the page size, pgsz code, and shift */
5022 	pg_size = MMU_PAGESIZE;
5023 	pg_size_code = page_szc(pg_size);
5024 	pg_shift = page_get_shift(pg_size_code);
5025 	pg_mask = ~(pg_size - 1);
5026 
5027 	/* calculate the number of pages in the exported cookie */
5028 	base_off = cookie[0].addr & (pg_size - 1);
5029 	for (idx = 0; idx < ccount; idx++)
5030 		exp_size += cookie[idx].size;
5031 	map_size = P2ROUNDUP((exp_size + base_off), pg_size);
5032 	npages = (map_size >> pg_shift);
5033 
5034 	/* Allocate memseg structure */
5035 	memseg = mhdl->memseg =
5036 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
5037 
5038 	/* Allocate memory to store all pages and cookies */
5039 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
5040 	memseg->cookies =
5041 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
5042 
5043 	D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx,"
5044 	    "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages);
5045 
5046 	/*
5047 	 * Check if direct map over shared memory is enabled, if not change
5048 	 * the mapping type to SHADOW_MAP.
5049 	 */
5050 	if (ldc_shmem_enabled == 0)
5051 		mtype = LDC_SHADOW_MAP;
5052 
5053 	/*
5054 	 * Check to see if the client is requesting direct or shadow map
5055 	 * If direct map is requested, try to map remote memory first,
5056 	 * and if that fails, revert to shadow map
5057 	 */
5058 	if (mtype == LDC_DIRECT_MAP) {
5059 
5060 		/* Allocate kernel virtual space for mapping */
5061 		memseg->vaddr = vmem_xalloc(heap_arena, map_size,
5062 		    pg_size, 0, 0, NULL, NULL, VM_NOSLEEP);
5063 		if (memseg->vaddr == NULL) {
5064 			cmn_err(CE_WARN,
5065 			    "ldc_mem_map: (0x%lx) memory map failed\n",
5066 			    ldcp->id);
5067 			kmem_free(memseg->cookies,
5068 			    (sizeof (ldc_mem_cookie_t) * ccount));
5069 			kmem_free(memseg->pages,
5070 			    (sizeof (ldc_page_t) * npages));
5071 			kmem_cache_free(ldcssp->memseg_cache, memseg);
5072 
5073 			mutex_exit(&ldcp->lock);
5074 			mutex_exit(&mhdl->lock);
5075 			return (ENOMEM);
5076 		}
5077 
5078 		/* Unload previous mapping */
5079 		hat_unload(kas.a_hat, memseg->vaddr, map_size,
5080 		    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5081 
5082 		/* for each cookie passed in - map into address space */
5083 		idx = 0;
5084 		cookie_size = 0;
5085 		tmpaddr = memseg->vaddr;
5086 
5087 		for (i = 0; i < npages; i++) {
5088 
5089 			if (cookie_size == 0) {
5090 				ASSERT(idx < ccount);
5091 				cookie_addr = cookie[idx].addr & pg_mask;
5092 				cookie_off = cookie[idx].addr & (pg_size - 1);
5093 				cookie_size =
5094 				    P2ROUNDUP((cookie_off + cookie[idx].size),
5095 					pg_size);
5096 				idx++;
5097 			}
5098 
5099 			D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping "
5100 			    "cookie 0x%llx, bal=0x%llx\n", ldcp->id,
5101 			    cookie_addr, cookie_size);
5102 
5103 			/* map the cookie into address space */
5104 			for (retries = 0; retries < ldc_max_retries;
5105 			    retries++) {
5106 
5107 				rv = hv_ldc_mapin(ldcp->id, cookie_addr,
5108 				    &memseg->pages[i].raddr, &map_perm);
5109 
5110 				if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY)
5111 					break;
5112 
5113 				drv_usecwait(ldc_delay);
5114 			}
5115 
5116 			if (rv || memseg->pages[i].raddr == 0) {
5117 				DWARN(ldcp->id,
5118 				    "ldc_mem_map: (0x%llx) hv mapin err %d\n",
5119 				    ldcp->id, rv);
5120 
5121 				/* remove previous mapins */
5122 				hat_unload(kas.a_hat, memseg->vaddr, map_size,
5123 				    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5124 				for (j = 0; j < i; j++) {
5125 					rv = hv_ldc_unmap(
5126 							memseg->pages[j].raddr);
5127 					if (rv) {
5128 						DWARN(ldcp->id,
5129 						    "ldc_mem_map: (0x%llx) "
5130 						    "cannot unmap ra=0x%llx\n",
5131 					    ldcp->id,
5132 						    memseg->pages[j].raddr);
5133 					}
5134 				}
5135 
5136 				/* free kernel virtual space */
5137 				vmem_free(heap_arena, (void *)memseg->vaddr,
5138 				    memseg->size);
5139 
5140 				/* direct map failed - revert to shadow map */
5141 				mtype = LDC_SHADOW_MAP;
5142 				break;
5143 
5144 			} else {
5145 
5146 				D1(ldcp->id,
5147 				    "ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
5148 				    "0x%llx, cookie=0x%llx, perm=0x%llx\n",
5149 				    ldcp->id, tmpaddr, memseg->pages[i].raddr,
5150 				    cookie_addr, perm);
5151 
5152 				/*
5153 				 * NOTE: Calling hat_devload directly, causes it
5154 				 * to look for page_t using the pfn. Since this
5155 				 * addr is greater than the memlist, it treates
5156 				 * it as non-memory
5157 				 */
5158 				sfmmu_memtte(&ldc_tte,
5159 				    (pfn_t)(memseg->pages[i].raddr >> pg_shift),
5160 				    PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K);
5161 
5162 				D1(ldcp->id,
5163 				    "ldc_mem_map: (0x%llx) ra 0x%llx -> "
5164 				    "tte 0x%llx\n", ldcp->id,
5165 				    memseg->pages[i].raddr, ldc_tte);
5166 
5167 				sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr,
5168 				    NULL, HAT_LOAD_LOCK);
5169 
5170 				cookie_size -= pg_size;
5171 				cookie_addr += pg_size;
5172 				tmpaddr += pg_size;
5173 			}
5174 		}
5175 	}
5176 
5177 	if (mtype == LDC_SHADOW_MAP) {
5178 		if (*vaddr == NULL) {
5179 			memseg->vaddr =
5180 				contig_mem_alloc_align(exp_size, PAGESIZE);
5181 			if (memseg->vaddr == NULL) {
5182 				cmn_err(CE_WARN, "ldc_mem_map: shadow memory "
5183 				    "allocation failed\n");
5184 				kmem_free(memseg->cookies,
5185 				    (sizeof (ldc_mem_cookie_t) * ccount));
5186 				kmem_free(memseg->pages,
5187 				    (sizeof (ldc_page_t) * npages));
5188 				kmem_cache_free(ldcssp->memseg_cache, memseg);
5189 				mutex_exit(&ldcp->lock);
5190 				mutex_exit(&mhdl->lock);
5191 				return (ENOMEM);
5192 			}
5193 
5194 			bzero(memseg->vaddr, exp_size);
5195 			mhdl->myshadow = B_TRUE;
5196 
5197 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
5198 			    "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr);
5199 		} else {
5200 			/*
5201 			 * Use client supplied memory for memseg->vaddr
5202 			 * WARNING: assuming that client mem is >= exp_size
5203 			 */
5204 			memseg->vaddr = *vaddr;
5205 		}
5206 
5207 		/* Save all page and cookie information */
5208 		for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) {
5209 			memseg->pages[i].raddr = va_to_pa(tmpaddr);
5210 			memseg->pages[i].size = pg_size;
5211 			tmpaddr += pg_size;
5212 		}
5213 
5214 	}
5215 
5216 	/* save all cookies */
5217 	bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t));
5218 
5219 	/* update memseg_t */
5220 	memseg->raddr = memseg->pages[0].raddr;
5221 	memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size;
5222 	memseg->npages = npages;
5223 	memseg->ncookies = ccount;
5224 	memseg->next_cookie = 0;
5225 
5226 	/* memory handle = mapped */
5227 	mhdl->mtype = mtype;
5228 	mhdl->perm = perm;
5229 	mhdl->status = LDC_MAPPED;
5230 
5231 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5232 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5233 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5234 	    memseg->npages, memseg->ncookies);
5235 
5236 	if (mtype == LDC_SHADOW_MAP)
5237 		base_off = 0;
5238 	if (raddr)
5239 		*raddr = (caddr_t)(memseg->raddr | base_off);
5240 	if (vaddr)
5241 		*vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off);
5242 
5243 	mutex_exit(&ldcp->lock);
5244 	mutex_exit(&mhdl->lock);
5245 	return (0);
5246 }
5247 
5248 /*
5249  * Unmap a memory segment. Free shadow memory (if any).
5250  */
5251 int
5252 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5253 {
5254 	int		i, rv;
5255 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5256 	ldc_chan_t 	*ldcp;
5257 	ldc_memseg_t	*memseg;
5258 
5259 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5260 		DWARN(DBG_ALL_LDCS,
5261 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5262 		    mhandle);
5263 		return (EINVAL);
5264 	}
5265 
5266 	mutex_enter(&mhdl->lock);
5267 
5268 	ldcp = mhdl->ldcp;
5269 	memseg = mhdl->memseg;
5270 
5271 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5272 	    ldcp->id, mhdl);
5273 
5274 	/* if we allocated shadow memory - free it */
5275 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5276 		contig_mem_free(memseg->vaddr, memseg->size);
5277 	} else if (mhdl->mtype == LDC_DIRECT_MAP) {
5278 
5279 		/* unmap in the case of DIRECT_MAP */
5280 		hat_unload(kas.a_hat, memseg->vaddr, memseg->size,
5281 		    HAT_UNLOAD_UNLOCK);
5282 
5283 		for (i = 0; i < memseg->npages; i++) {
5284 			rv = hv_ldc_unmap(memseg->pages[i].raddr);
5285 			if (rv) {
5286 				cmn_err(CE_WARN,
5287 				    "ldc_mem_map: (0x%lx) hv unmap err %d\n",
5288 				    ldcp->id, rv);
5289 			}
5290 		}
5291 
5292 		vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size);
5293 	}
5294 
5295 	/* free the allocated memseg and page structures */
5296 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5297 	kmem_free(memseg->cookies,
5298 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5299 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5300 
5301 	/* uninitialize the memory handle */
5302 	mhdl->memseg = NULL;
5303 	mhdl->status = LDC_UNBOUND;
5304 
5305 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5306 	    ldcp->id, mhdl);
5307 
5308 	mutex_exit(&mhdl->lock);
5309 	return (0);
5310 }
5311 
5312 /*
5313  * Internal entry point for LDC mapped memory entry consistency
5314  * semantics. Acquire copies the contents of the remote memory
5315  * into the local shadow copy. The release operation copies the local
5316  * contents into the remote memory. The offset and size specify the
5317  * bounds for the memory range being synchronized.
5318  */
5319 static int
5320 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5321     uint64_t offset, size_t size)
5322 {
5323 	int 		err;
5324 	ldc_mhdl_t	*mhdl;
5325 	ldc_chan_t	*ldcp;
5326 	ldc_memseg_t	*memseg;
5327 	caddr_t		local_vaddr;
5328 	size_t		copy_size;
5329 
5330 	if (mhandle == NULL) {
5331 		DWARN(DBG_ALL_LDCS,
5332 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5333 		return (EINVAL);
5334 	}
5335 	mhdl = (ldc_mhdl_t *)mhandle;
5336 
5337 	mutex_enter(&mhdl->lock);
5338 
5339 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5340 		DWARN(DBG_ALL_LDCS,
5341 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5342 		mutex_exit(&mhdl->lock);
5343 		return (EINVAL);
5344 	}
5345 
5346 	/* do nothing for direct map */
5347 	if (mhdl->mtype == LDC_DIRECT_MAP) {
5348 		mutex_exit(&mhdl->lock);
5349 		return (0);
5350 	}
5351 
5352 	/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
5353 	if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) ||
5354 	    (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) {
5355 		mutex_exit(&mhdl->lock);
5356 		return (0);
5357 	}
5358 
5359 	if (offset >= mhdl->memseg->size ||
5360 	    (offset + size) > mhdl->memseg->size) {
5361 		DWARN(DBG_ALL_LDCS,
5362 		    "i_ldc_mem_acquire_release: memory out of range\n");
5363 		mutex_exit(&mhdl->lock);
5364 		return (EINVAL);
5365 	}
5366 
5367 	/* get the channel handle and memory segment */
5368 	ldcp = mhdl->ldcp;
5369 	memseg = mhdl->memseg;
5370 
5371 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5372 
5373 		local_vaddr = memseg->vaddr + offset;
5374 		copy_size = size;
5375 
5376 		/* copy to/from remote from/to local memory */
5377 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5378 		    &copy_size, memseg->cookies, memseg->ncookies,
5379 		    direction);
5380 		if (err || copy_size != size) {
5381 			cmn_err(CE_WARN,
5382 			    "i_ldc_mem_acquire_release: copy failed\n");
5383 			mutex_exit(&mhdl->lock);
5384 			return (err);
5385 		}
5386 	}
5387 
5388 	mutex_exit(&mhdl->lock);
5389 
5390 	return (0);
5391 }
5392 
5393 /*
5394  * Ensure that the contents in the remote memory seg are consistent
5395  * with the contents if of local segment
5396  */
5397 int
5398 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5399 {
5400 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5401 }
5402 
5403 
5404 /*
5405  * Ensure that the contents in the local memory seg are consistent
5406  * with the contents if of remote segment
5407  */
5408 int
5409 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5410 {
5411 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5412 }
5413 
5414 /*
5415  * Allocate a descriptor ring. The size of each each descriptor
5416  * must be 8-byte aligned and the entire ring should be a multiple
5417  * of MMU_PAGESIZE.
5418  */
5419 int
5420 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5421 {
5422 	ldc_dring_t *dringp;
5423 	size_t size = (dsize * len);
5424 
5425 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5426 	    len, dsize);
5427 
5428 	if (dhandle == NULL) {
5429 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5430 		return (EINVAL);
5431 	}
5432 
5433 	if (len == 0) {
5434 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5435 		return (EINVAL);
5436 	}
5437 
5438 	/* descriptor size should be 8-byte aligned */
5439 	if (dsize == 0 || (dsize & 0x7)) {
5440 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5441 		return (EINVAL);
5442 	}
5443 
5444 	*dhandle = 0;
5445 
5446 	/* Allocate a desc ring structure */
5447 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5448 
5449 	/* Initialize dring */
5450 	dringp->length = len;
5451 	dringp->dsize = dsize;
5452 
5453 	/* round off to multiple of pagesize */
5454 	dringp->size = (size & MMU_PAGEMASK);
5455 	if (size & MMU_PAGEOFFSET)
5456 		dringp->size += MMU_PAGESIZE;
5457 
5458 	dringp->status = LDC_UNBOUND;
5459 
5460 	/* allocate descriptor ring memory */
5461 	dringp->base = contig_mem_alloc_align(dringp->size, PAGESIZE);
5462 	if (dringp->base == NULL) {
5463 		cmn_err(CE_WARN,
5464 		    "ldc_mem_dring_create: unable to alloc desc\n");
5465 		kmem_free(dringp, sizeof (ldc_dring_t));
5466 		return (ENOMEM);
5467 	}
5468 
5469 	bzero(dringp->base, dringp->size);
5470 
5471 	/* initialize the desc ring lock */
5472 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5473 
5474 	/* Add descriptor ring to the head of global list */
5475 	mutex_enter(&ldcssp->lock);
5476 	dringp->next = ldcssp->dring_list;
5477 	ldcssp->dring_list = dringp;
5478 	mutex_exit(&ldcssp->lock);
5479 
5480 	*dhandle = (ldc_dring_handle_t)dringp;
5481 
5482 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5483 
5484 	return (0);
5485 }
5486 
5487 
5488 /*
5489  * Destroy a descriptor ring.
5490  */
5491 int
5492 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5493 {
5494 	ldc_dring_t *dringp;
5495 	ldc_dring_t *tmp_dringp;
5496 
5497 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5498 
5499 	if (dhandle == NULL) {
5500 		DWARN(DBG_ALL_LDCS,
5501 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5502 		return (EINVAL);
5503 	}
5504 	dringp = (ldc_dring_t *)dhandle;
5505 
5506 	if (dringp->status == LDC_BOUND) {
5507 		DWARN(DBG_ALL_LDCS,
5508 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5509 		return (EACCES);
5510 	}
5511 
5512 	mutex_enter(&dringp->lock);
5513 	mutex_enter(&ldcssp->lock);
5514 
5515 	/* remove from linked list - if not bound */
5516 	tmp_dringp = ldcssp->dring_list;
5517 	if (tmp_dringp == dringp) {
5518 		ldcssp->dring_list = dringp->next;
5519 		dringp->next = NULL;
5520 
5521 	} else {
5522 		while (tmp_dringp != NULL) {
5523 			if (tmp_dringp->next == dringp) {
5524 				tmp_dringp->next = dringp->next;
5525 				dringp->next = NULL;
5526 				break;
5527 			}
5528 			tmp_dringp = tmp_dringp->next;
5529 		}
5530 		if (tmp_dringp == NULL) {
5531 			DWARN(DBG_ALL_LDCS,
5532 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5533 			mutex_exit(&ldcssp->lock);
5534 			mutex_exit(&dringp->lock);
5535 			return (EINVAL);
5536 		}
5537 	}
5538 
5539 	mutex_exit(&ldcssp->lock);
5540 
5541 	/* free the descriptor ring */
5542 	contig_mem_free((caddr_t)dringp->base, dringp->size);
5543 
5544 	mutex_exit(&dringp->lock);
5545 
5546 	/* destroy dring lock */
5547 	mutex_destroy(&dringp->lock);
5548 
5549 	/* free desc ring object */
5550 	kmem_free(dringp, sizeof (ldc_dring_t));
5551 
5552 	return (0);
5553 }
5554 
5555 /*
5556  * Bind a previously allocated dring to a channel. The channel should
5557  * be OPEN in order to bind the ring to the channel. Returns back a
5558  * descriptor ring cookie. The descriptor ring is exported for remote
5559  * access by the client at the other end of the channel. An entry for
5560  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5561  */
5562 int
5563 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5564     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5565 {
5566 	int		err;
5567 	ldc_chan_t 	*ldcp;
5568 	ldc_dring_t	*dringp;
5569 	ldc_mem_handle_t mhandle;
5570 
5571 	/* check to see if channel is initalized */
5572 	if (handle == NULL) {
5573 		DWARN(DBG_ALL_LDCS,
5574 		    "ldc_mem_dring_bind: invalid channel handle\n");
5575 		return (EINVAL);
5576 	}
5577 	ldcp = (ldc_chan_t *)handle;
5578 
5579 	if (dhandle == NULL) {
5580 		DWARN(DBG_ALL_LDCS,
5581 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5582 		return (EINVAL);
5583 	}
5584 	dringp = (ldc_dring_t *)dhandle;
5585 
5586 	if (cookie == NULL) {
5587 		DWARN(ldcp->id,
5588 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5589 		return (EINVAL);
5590 	}
5591 
5592 	mutex_enter(&dringp->lock);
5593 
5594 	if (dringp->status == LDC_BOUND) {
5595 		DWARN(DBG_ALL_LDCS,
5596 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5597 		    ldcp->id);
5598 		mutex_exit(&dringp->lock);
5599 		return (EINVAL);
5600 	}
5601 
5602 	if ((perm & LDC_MEM_RW) == 0) {
5603 		DWARN(DBG_ALL_LDCS,
5604 		    "ldc_mem_dring_bind: invalid permissions\n");
5605 		mutex_exit(&dringp->lock);
5606 		return (EINVAL);
5607 	}
5608 
5609 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5610 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5611 		mutex_exit(&dringp->lock);
5612 		return (EINVAL);
5613 	}
5614 
5615 	dringp->ldcp = ldcp;
5616 
5617 	/* create an memory handle */
5618 	err = ldc_mem_alloc_handle(handle, &mhandle);
5619 	if (err || mhandle == NULL) {
5620 		DWARN(DBG_ALL_LDCS,
5621 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5622 		    ldcp->id);
5623 		mutex_exit(&dringp->lock);
5624 		return (err);
5625 	}
5626 	dringp->mhdl = mhandle;
5627 
5628 	/* bind the descriptor ring to channel */
5629 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5630 	    mtype, perm, cookie, ccount);
5631 	if (err) {
5632 		DWARN(ldcp->id,
5633 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5634 		    ldcp->id);
5635 		mutex_exit(&dringp->lock);
5636 		return (err);
5637 	}
5638 
5639 	/*
5640 	 * For now return error if we get more than one cookie
5641 	 * FUTURE: Return multiple cookies ..
5642 	 */
5643 	if (*ccount > 1) {
5644 		(void) ldc_mem_unbind_handle(mhandle);
5645 		(void) ldc_mem_free_handle(mhandle);
5646 
5647 		dringp->ldcp = NULL;
5648 		dringp->mhdl = NULL;
5649 		*ccount = 0;
5650 
5651 		mutex_exit(&dringp->lock);
5652 		return (EAGAIN);
5653 	}
5654 
5655 	/* Add descriptor ring to channel's exported dring list */
5656 	mutex_enter(&ldcp->exp_dlist_lock);
5657 	dringp->ch_next = ldcp->exp_dring_list;
5658 	ldcp->exp_dring_list = dringp;
5659 	mutex_exit(&ldcp->exp_dlist_lock);
5660 
5661 	dringp->status = LDC_BOUND;
5662 
5663 	mutex_exit(&dringp->lock);
5664 
5665 	return (0);
5666 }
5667 
5668 /*
5669  * Return the next cookie associated with the specified dring handle
5670  */
5671 int
5672 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5673 {
5674 	int		rv = 0;
5675 	ldc_dring_t 	*dringp;
5676 	ldc_chan_t	*ldcp;
5677 
5678 	if (dhandle == NULL) {
5679 		DWARN(DBG_ALL_LDCS,
5680 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5681 		return (EINVAL);
5682 	}
5683 	dringp = (ldc_dring_t *)dhandle;
5684 	mutex_enter(&dringp->lock);
5685 
5686 	if (dringp->status != LDC_BOUND) {
5687 		DWARN(DBG_ALL_LDCS,
5688 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5689 		    "is not bound\n", dringp);
5690 		mutex_exit(&dringp->lock);
5691 		return (EINVAL);
5692 	}
5693 
5694 	ldcp = dringp->ldcp;
5695 
5696 	if (cookie == NULL) {
5697 		DWARN(ldcp->id,
5698 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5699 		    ldcp->id);
5700 		mutex_exit(&dringp->lock);
5701 		return (EINVAL);
5702 	}
5703 
5704 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5705 	mutex_exit(&dringp->lock);
5706 
5707 	return (rv);
5708 }
5709 /*
5710  * Unbind a previously bound dring from a channel.
5711  */
5712 int
5713 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5714 {
5715 	ldc_dring_t 	*dringp;
5716 	ldc_dring_t	*tmp_dringp;
5717 	ldc_chan_t	*ldcp;
5718 
5719 	if (dhandle == NULL) {
5720 		DWARN(DBG_ALL_LDCS,
5721 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5722 		return (EINVAL);
5723 	}
5724 	dringp = (ldc_dring_t *)dhandle;
5725 
5726 	mutex_enter(&dringp->lock);
5727 
5728 	if (dringp->status == LDC_UNBOUND) {
5729 		DWARN(DBG_ALL_LDCS,
5730 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5731 		    dringp);
5732 		mutex_exit(&dringp->lock);
5733 		return (EINVAL);
5734 	}
5735 	ldcp = dringp->ldcp;
5736 
5737 	mutex_enter(&ldcp->exp_dlist_lock);
5738 
5739 	tmp_dringp = ldcp->exp_dring_list;
5740 	if (tmp_dringp == dringp) {
5741 		ldcp->exp_dring_list = dringp->ch_next;
5742 		dringp->ch_next = NULL;
5743 
5744 	} else {
5745 		while (tmp_dringp != NULL) {
5746 			if (tmp_dringp->ch_next == dringp) {
5747 				tmp_dringp->ch_next = dringp->ch_next;
5748 				dringp->ch_next = NULL;
5749 				break;
5750 			}
5751 			tmp_dringp = tmp_dringp->ch_next;
5752 		}
5753 		if (tmp_dringp == NULL) {
5754 			DWARN(DBG_ALL_LDCS,
5755 			    "ldc_mem_dring_unbind: invalid descriptor\n");
5756 			mutex_exit(&ldcp->exp_dlist_lock);
5757 			mutex_exit(&dringp->lock);
5758 			return (EINVAL);
5759 		}
5760 	}
5761 
5762 	mutex_exit(&ldcp->exp_dlist_lock);
5763 
5764 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
5765 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5766 
5767 	dringp->ldcp = NULL;
5768 	dringp->mhdl = NULL;
5769 	dringp->status = LDC_UNBOUND;
5770 
5771 	mutex_exit(&dringp->lock);
5772 
5773 	return (0);
5774 }
5775 
5776 /*
5777  * Get information about the dring. The base address of the descriptor
5778  * ring along with the type and permission are returned back.
5779  */
5780 int
5781 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
5782 {
5783 	ldc_dring_t	*dringp;
5784 	int		rv;
5785 
5786 	if (dhandle == NULL) {
5787 		DWARN(DBG_ALL_LDCS,
5788 		    "ldc_mem_dring_info: invalid desc ring handle\n");
5789 		return (EINVAL);
5790 	}
5791 	dringp = (ldc_dring_t *)dhandle;
5792 
5793 	mutex_enter(&dringp->lock);
5794 
5795 	if (dringp->mhdl) {
5796 		rv = ldc_mem_info(dringp->mhdl, minfo);
5797 		if (rv) {
5798 			DWARN(DBG_ALL_LDCS,
5799 			    "ldc_mem_dring_info: error reading mem info\n");
5800 			mutex_exit(&dringp->lock);
5801 			return (rv);
5802 		}
5803 	} else {
5804 		minfo->vaddr = dringp->base;
5805 		minfo->raddr = NULL;
5806 		minfo->status = dringp->status;
5807 	}
5808 
5809 	mutex_exit(&dringp->lock);
5810 
5811 	return (0);
5812 }
5813 
5814 /*
5815  * Map an exported descriptor ring into the local address space. If the
5816  * descriptor ring was exported for direct map access, a HV call is made
5817  * to allocate a RA range. If the map is done via a shadow copy, local
5818  * shadow memory is allocated.
5819  */
5820 int
5821 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
5822     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
5823     ldc_dring_handle_t *dhandle)
5824 {
5825 	int		err;
5826 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
5827 	ldc_mem_handle_t mhandle;
5828 	ldc_dring_t	*dringp;
5829 	size_t		dring_size;
5830 
5831 	if (dhandle == NULL) {
5832 		DWARN(DBG_ALL_LDCS,
5833 		    "ldc_mem_dring_map: invalid dhandle\n");
5834 		return (EINVAL);
5835 	}
5836 
5837 	/* check to see if channel is initalized */
5838 	if (handle == NULL) {
5839 		DWARN(DBG_ALL_LDCS,
5840 		    "ldc_mem_dring_map: invalid channel handle\n");
5841 		return (EINVAL);
5842 	}
5843 	ldcp = (ldc_chan_t *)handle;
5844 
5845 	if (cookie == NULL) {
5846 		DWARN(ldcp->id,
5847 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
5848 		    ldcp->id);
5849 		return (EINVAL);
5850 	}
5851 
5852 	/* FUTURE: For now we support only one cookie per dring */
5853 	ASSERT(ccount == 1);
5854 
5855 	if (cookie->size < (dsize * len)) {
5856 		DWARN(ldcp->id,
5857 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
5858 		    ldcp->id);
5859 		return (EINVAL);
5860 	}
5861 
5862 	*dhandle = 0;
5863 
5864 	/* Allocate an dring structure */
5865 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5866 
5867 	D1(ldcp->id,
5868 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
5869 	    mtype, len, dsize, cookie->addr, cookie->size);
5870 
5871 	/* Initialize dring */
5872 	dringp->length = len;
5873 	dringp->dsize = dsize;
5874 
5875 	/* round of to multiple of page size */
5876 	dring_size = len * dsize;
5877 	dringp->size = (dring_size & MMU_PAGEMASK);
5878 	if (dring_size & MMU_PAGEOFFSET)
5879 		dringp->size += MMU_PAGESIZE;
5880 
5881 	dringp->ldcp = ldcp;
5882 
5883 	/* create an memory handle */
5884 	err = ldc_mem_alloc_handle(handle, &mhandle);
5885 	if (err || mhandle == NULL) {
5886 		DWARN(DBG_ALL_LDCS,
5887 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
5888 		    err);
5889 		kmem_free(dringp, sizeof (ldc_dring_t));
5890 		return (ENOMEM);
5891 	}
5892 
5893 	dringp->mhdl = mhandle;
5894 	dringp->base = NULL;
5895 
5896 	/* map the dring into local memory */
5897 	err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW,
5898 	    &(dringp->base), NULL);
5899 	if (err || dringp->base == NULL) {
5900 		cmn_err(CE_WARN,
5901 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
5902 		(void) ldc_mem_free_handle(mhandle);
5903 		kmem_free(dringp, sizeof (ldc_dring_t));
5904 		return (ENOMEM);
5905 	}
5906 
5907 	/* initialize the desc ring lock */
5908 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5909 
5910 	/* Add descriptor ring to channel's imported dring list */
5911 	mutex_enter(&ldcp->imp_dlist_lock);
5912 	dringp->ch_next = ldcp->imp_dring_list;
5913 	ldcp->imp_dring_list = dringp;
5914 	mutex_exit(&ldcp->imp_dlist_lock);
5915 
5916 	dringp->status = LDC_MAPPED;
5917 
5918 	*dhandle = (ldc_dring_handle_t)dringp;
5919 
5920 	return (0);
5921 }
5922 
5923 /*
5924  * Unmap a descriptor ring. Free shadow memory (if any).
5925  */
5926 int
5927 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
5928 {
5929 	ldc_dring_t 	*dringp;
5930 	ldc_dring_t	*tmp_dringp;
5931 	ldc_chan_t	*ldcp;
5932 
5933 	if (dhandle == NULL) {
5934 		DWARN(DBG_ALL_LDCS,
5935 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
5936 		return (EINVAL);
5937 	}
5938 	dringp = (ldc_dring_t *)dhandle;
5939 
5940 	if (dringp->status != LDC_MAPPED) {
5941 		DWARN(DBG_ALL_LDCS,
5942 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
5943 		return (EINVAL);
5944 	}
5945 
5946 	mutex_enter(&dringp->lock);
5947 
5948 	ldcp = dringp->ldcp;
5949 
5950 	mutex_enter(&ldcp->imp_dlist_lock);
5951 
5952 	/* find and unlink the desc ring from channel import list */
5953 	tmp_dringp = ldcp->imp_dring_list;
5954 	if (tmp_dringp == dringp) {
5955 		ldcp->imp_dring_list = dringp->ch_next;
5956 		dringp->ch_next = NULL;
5957 
5958 	} else {
5959 		while (tmp_dringp != NULL) {
5960 			if (tmp_dringp->ch_next == dringp) {
5961 				tmp_dringp->ch_next = dringp->ch_next;
5962 				dringp->ch_next = NULL;
5963 				break;
5964 			}
5965 			tmp_dringp = tmp_dringp->ch_next;
5966 		}
5967 		if (tmp_dringp == NULL) {
5968 			DWARN(DBG_ALL_LDCS,
5969 			    "ldc_mem_dring_unmap: invalid descriptor\n");
5970 			mutex_exit(&ldcp->imp_dlist_lock);
5971 			mutex_exit(&dringp->lock);
5972 			return (EINVAL);
5973 		}
5974 	}
5975 
5976 	mutex_exit(&ldcp->imp_dlist_lock);
5977 
5978 	/* do a LDC memory handle unmap and free */
5979 	(void) ldc_mem_unmap(dringp->mhdl);
5980 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5981 
5982 	dringp->status = 0;
5983 	dringp->ldcp = NULL;
5984 
5985 	mutex_exit(&dringp->lock);
5986 
5987 	/* destroy dring lock */
5988 	mutex_destroy(&dringp->lock);
5989 
5990 	/* free desc ring object */
5991 	kmem_free(dringp, sizeof (ldc_dring_t));
5992 
5993 	return (0);
5994 }
5995 
5996 /*
5997  * Internal entry point for descriptor ring access entry consistency
5998  * semantics. Acquire copies the contents of the remote descriptor ring
5999  * into the local shadow copy. The release operation copies the local
6000  * contents into the remote dring. The start and end locations specify
6001  * bounds for the entries being synchronized.
6002  */
6003 static int
6004 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
6005     uint8_t direction, uint64_t start, uint64_t end)
6006 {
6007 	int 			err;
6008 	ldc_dring_t		*dringp;
6009 	ldc_chan_t		*ldcp;
6010 	uint64_t		soff;
6011 	size_t			copy_size;
6012 
6013 	if (dhandle == NULL) {
6014 		DWARN(DBG_ALL_LDCS,
6015 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
6016 		return (EINVAL);
6017 	}
6018 	dringp = (ldc_dring_t *)dhandle;
6019 	mutex_enter(&dringp->lock);
6020 
6021 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
6022 		DWARN(DBG_ALL_LDCS,
6023 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
6024 		mutex_exit(&dringp->lock);
6025 		return (EINVAL);
6026 	}
6027 
6028 	if (start >= dringp->length || end >= dringp->length) {
6029 		DWARN(DBG_ALL_LDCS,
6030 		    "i_ldc_dring_acquire_release: index out of range\n");
6031 		mutex_exit(&dringp->lock);
6032 		return (EINVAL);
6033 	}
6034 
6035 	/* get the channel handle */
6036 	ldcp = dringp->ldcp;
6037 
6038 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
6039 		((dringp->length - start) * dringp->dsize);
6040 
6041 	/* Calculate the relative offset for the first desc */
6042 	soff = (start * dringp->dsize);
6043 
6044 	/* copy to/from remote from/to local memory */
6045 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
6046 	    soff, copy_size);
6047 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6048 	    direction, soff, copy_size);
6049 	if (err) {
6050 		DWARN(ldcp->id,
6051 		    "i_ldc_dring_acquire_release: copy failed\n");
6052 		mutex_exit(&dringp->lock);
6053 		return (err);
6054 	}
6055 
6056 	/* do the balance */
6057 	if (start > end) {
6058 		copy_size = ((end + 1) * dringp->dsize);
6059 		soff = 0;
6060 
6061 		/* copy to/from remote from/to local memory */
6062 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
6063 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
6064 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6065 		    direction, soff, copy_size);
6066 		if (err) {
6067 			DWARN(ldcp->id,
6068 			    "i_ldc_dring_acquire_release: copy failed\n");
6069 			mutex_exit(&dringp->lock);
6070 			return (err);
6071 		}
6072 	}
6073 
6074 	mutex_exit(&dringp->lock);
6075 
6076 	return (0);
6077 }
6078 
6079 /*
6080  * Ensure that the contents in the local dring are consistent
6081  * with the contents if of remote dring
6082  */
6083 int
6084 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6085 {
6086 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
6087 }
6088 
6089 /*
6090  * Ensure that the contents in the remote dring are consistent
6091  * with the contents if of local dring
6092  */
6093 int
6094 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6095 {
6096 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
6097 }
6098 
6099 
6100 /* ------------------------------------------------------------------------- */
6101