xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision 4d27faddb2c6fa5d33bb0b6c6847a8b7a526441d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/mmu.h>
56 #include <sys/pte.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/hat_sfmmu.h>
60 #include <sys/vm_machparam.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/seg_kpm.h>
63 #include <sys/note.h>
64 #include <sys/ivintr.h>
65 #include <sys/hypervisor_api.h>
66 #include <sys/ldc.h>
67 #include <sys/ldc_impl.h>
68 #include <sys/cnex.h>
69 #include <sys/hsvc.h>
70 
71 /* Core internal functions */
72 static int i_ldc_h2v_error(int h_error);
73 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
74 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset);
75 static void i_ldc_reset_state(ldc_chan_t *ldcp);
76 static void i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset);
77 
78 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
79 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
80 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
81 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
82     uint8_t ctrlmsg);
83 
84 /* Interrupt handling functions */
85 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
86 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
87 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
88 
89 /* Read method functions */
90 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
91 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
92 	size_t *sizep);
93 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
94 	size_t *sizep);
95 
96 /* Write method functions */
97 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
98 	size_t *sizep);
99 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
100 	size_t *sizep);
101 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
102 	size_t *sizep);
103 
104 /* Pkt processing internal functions */
105 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
106 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
107 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
108 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
109 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
110 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
111 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
112 
113 /* Memory synchronization internal functions */
114 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
115     uint8_t direction, uint64_t offset, size_t size);
116 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
117     uint8_t direction, uint64_t start, uint64_t end);
118 
119 /* LDC Version */
120 static ldc_ver_t ldc_versions[] = { {1, 0} };
121 
122 /* number of supported versions */
123 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
124 
125 /* Module State Pointer */
126 static ldc_soft_state_t *ldcssp;
127 
128 static struct modldrv md = {
129 	&mod_miscops,			/* This is a misc module */
130 	"sun4v LDC module v%I%",	/* Name of the module */
131 };
132 
133 static struct modlinkage ml = {
134 	MODREV_1,
135 	&md,
136 	NULL
137 };
138 
139 static uint64_t ldc_sup_minor;		/* Supported minor number */
140 static hsvc_info_t ldc_hsvc = {
141 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
142 };
143 
144 static uint64_t intr_sup_minor;		/* Supported minor number */
145 static hsvc_info_t intr_hsvc = {
146 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
147 };
148 
149 /*
150  * LDC framework supports mapping remote domain's memory
151  * either directly or via shadow memory pages. Default
152  * support is currently implemented via shadow copy.
153  * Direct map can be enabled by setting 'ldc_shmem_enabled'
154  */
155 int ldc_shmem_enabled = 0;
156 
157 /*
158  * The no. of MTU size messages that can be stored in
159  * the LDC Tx queue. The number of Tx queue entries is
160  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
161  */
162 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
163 
164 /*
165  * The minimum queue length. This is the size of the smallest
166  * LDC queue. If the computed value is less than this default,
167  * the queue length is rounded up to 'ldc_queue_entries'.
168  */
169 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
170 
171 /*
172  * Pages exported for remote access over each channel is
173  * maintained in a table registered with the Hypervisor.
174  * The default number of entries in the table is set to
175  * 'ldc_mtbl_entries'.
176  */
177 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
178 
179 /*
180  * LDC retry count and delay - when the HV returns EWOULDBLOCK
181  * the operation is retried 'ldc_max_retries' times with a
182  * wait of 'ldc_delay' usecs between each retry.
183  */
184 int ldc_max_retries = LDC_MAX_RETRIES;
185 clock_t ldc_delay = LDC_DELAY;
186 
187 #ifdef DEBUG
188 
189 /*
190  * Print debug messages
191  *
192  * set ldcdbg to 0x7 for enabling all msgs
193  * 0x4 - Warnings
194  * 0x2 - All debug messages
195  * 0x1 - Minimal debug messages
196  *
197  * set ldcdbgchan to the channel number you want to debug
198  * setting it to -1 prints debug messages for all channels
199  * NOTE: ldcdbgchan has no effect on error messages
200  */
201 
202 #define	DBG_ALL_LDCS -1
203 
204 int ldcdbg = 0x0;
205 int64_t ldcdbgchan = DBG_ALL_LDCS;
206 boolean_t ldc_inject_reset_flag = B_FALSE;
207 
208 static void
209 ldcdebug(int64_t id, const char *fmt, ...)
210 {
211 	char buf[512];
212 	va_list ap;
213 
214 	/*
215 	 * Do not return if,
216 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
217 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
218 	 * debug channel = caller specified channel
219 	 */
220 	if ((id != DBG_ALL_LDCS) &&
221 	    (ldcdbgchan != DBG_ALL_LDCS) &&
222 	    (ldcdbgchan != id)) {
223 		return;
224 	}
225 
226 	va_start(ap, fmt);
227 	(void) vsprintf(buf, fmt, ap);
228 	va_end(ap);
229 
230 	cmn_err(CE_CONT, "?%s", buf);
231 }
232 
233 static boolean_t
234 ldc_inject_reset(ldc_chan_t *ldcp)
235 {
236 	if ((ldcdbgchan != DBG_ALL_LDCS) && (ldcdbgchan != ldcp->id))
237 		return (B_FALSE);
238 
239 	if (!ldc_inject_reset_flag)
240 		return (B_FALSE);
241 
242 	/* clear the injection state */
243 	ldc_inject_reset_flag = 0;
244 
245 	return (B_TRUE);
246 }
247 
248 #define	D1		\
249 if (ldcdbg & 0x01)	\
250 	ldcdebug
251 
252 #define	D2		\
253 if (ldcdbg & 0x02)	\
254 	ldcdebug
255 
256 #define	DWARN		\
257 if (ldcdbg & 0x04)	\
258 	ldcdebug
259 
260 #define	DUMP_PAYLOAD(id, addr)						\
261 {									\
262 	char buf[65*3];							\
263 	int i;								\
264 	uint8_t *src = (uint8_t *)addr;					\
265 	for (i = 0; i < 64; i++, src++)					\
266 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
267 	(void) sprintf(&buf[i * 3], "|\n");				\
268 	D2((id), "payload: %s", buf);					\
269 }
270 
271 #define	DUMP_LDC_PKT(c, s, addr)					\
272 {									\
273 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
274 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
275 	if (msg->type == LDC_DATA) {                                    \
276 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
277 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
278 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
279 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
280 	    (msg->env & LDC_LEN_MASK));					\
281 	} else { 							\
282 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
283 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
284 	} 								\
285 }
286 
287 #define	LDC_INJECT_RESET(_ldcp)	ldc_inject_reset(_ldcp)
288 
289 #else
290 
291 #define	DBG_ALL_LDCS -1
292 
293 #define	D1
294 #define	D2
295 #define	DWARN
296 
297 #define	DUMP_PAYLOAD(id, addr)
298 #define	DUMP_LDC_PKT(c, s, addr)
299 
300 #define	LDC_INJECT_RESET(_ldcp)	(B_FALSE)
301 
302 #endif
303 
304 #define	ZERO_PKT(p)			\
305 	bzero((p), sizeof (ldc_msg_t));
306 
307 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
308 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
309 
310 
311 int
312 _init(void)
313 {
314 	int status;
315 
316 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
317 	if (status != 0) {
318 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
319 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
320 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
321 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
322 		return (-1);
323 	}
324 
325 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
326 	if (status != 0) {
327 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
328 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
329 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
330 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
331 		(void) hsvc_unregister(&ldc_hsvc);
332 		return (-1);
333 	}
334 
335 	/* allocate soft state structure */
336 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
337 
338 	/* Link the module into the system */
339 	status = mod_install(&ml);
340 	if (status != 0) {
341 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
342 		return (status);
343 	}
344 
345 	/* Initialize the LDC state structure */
346 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
347 
348 	mutex_enter(&ldcssp->lock);
349 
350 	/* Create a cache for memory handles */
351 	ldcssp->memhdl_cache = kmem_cache_create("ldc_memhdl_cache",
352 	    sizeof (ldc_mhdl_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
353 	if (ldcssp->memhdl_cache == NULL) {
354 		DWARN(DBG_ALL_LDCS, "_init: ldc_memhdl cache create failed\n");
355 		mutex_exit(&ldcssp->lock);
356 		return (-1);
357 	}
358 
359 	/* Create cache for memory segment structures */
360 	ldcssp->memseg_cache = kmem_cache_create("ldc_memseg_cache",
361 	    sizeof (ldc_memseg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
362 	if (ldcssp->memseg_cache == NULL) {
363 		DWARN(DBG_ALL_LDCS, "_init: ldc_memseg cache create failed\n");
364 		mutex_exit(&ldcssp->lock);
365 		return (-1);
366 	}
367 
368 
369 	ldcssp->channel_count = 0;
370 	ldcssp->channels_open = 0;
371 	ldcssp->chan_list = NULL;
372 	ldcssp->dring_list = NULL;
373 
374 	mutex_exit(&ldcssp->lock);
375 
376 	return (0);
377 }
378 
379 int
380 _info(struct modinfo *modinfop)
381 {
382 	/* Report status of the dynamically loadable driver module */
383 	return (mod_info(&ml, modinfop));
384 }
385 
386 int
387 _fini(void)
388 {
389 	int 		rv, status;
390 	ldc_chan_t 	*ldcp;
391 	ldc_dring_t 	*dringp;
392 	ldc_mem_info_t 	minfo;
393 
394 	/* Unlink the driver module from the system */
395 	status = mod_remove(&ml);
396 	if (status) {
397 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
398 		return (EIO);
399 	}
400 
401 	/* close and finalize channels */
402 	ldcp = ldcssp->chan_list;
403 	while (ldcp != NULL) {
404 		(void) ldc_close((ldc_handle_t)ldcp);
405 		(void) ldc_fini((ldc_handle_t)ldcp);
406 
407 		ldcp = ldcp->next;
408 	}
409 
410 	/* Free descriptor rings */
411 	dringp = ldcssp->dring_list;
412 	while (dringp != NULL) {
413 		dringp = dringp->next;
414 
415 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
416 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
417 			if (minfo.status == LDC_BOUND) {
418 				(void) ldc_mem_dring_unbind(
419 						(ldc_dring_handle_t)dringp);
420 			}
421 			if (minfo.status == LDC_MAPPED) {
422 				(void) ldc_mem_dring_unmap(
423 						(ldc_dring_handle_t)dringp);
424 			}
425 		}
426 
427 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
428 	}
429 	ldcssp->dring_list = NULL;
430 
431 	/* Destroy kmem caches */
432 	kmem_cache_destroy(ldcssp->memhdl_cache);
433 	kmem_cache_destroy(ldcssp->memseg_cache);
434 
435 	/*
436 	 * We have successfully "removed" the driver.
437 	 * Destroying soft states
438 	 */
439 	mutex_destroy(&ldcssp->lock);
440 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
441 
442 	(void) hsvc_unregister(&ldc_hsvc);
443 	(void) hsvc_unregister(&intr_hsvc);
444 
445 	return (status);
446 }
447 
448 /* -------------------------------------------------------------------------- */
449 
450 /*
451  * LDC Link Layer Internal Functions
452  */
453 
454 /*
455  * Translate HV Errors to sun4v error codes
456  */
457 static int
458 i_ldc_h2v_error(int h_error)
459 {
460 	switch (h_error) {
461 
462 	case	H_EOK:
463 		return (0);
464 
465 	case	H_ENORADDR:
466 		return (EFAULT);
467 
468 	case	H_EBADPGSZ:
469 	case	H_EINVAL:
470 		return (EINVAL);
471 
472 	case	H_EWOULDBLOCK:
473 		return (EWOULDBLOCK);
474 
475 	case	H_ENOACCESS:
476 	case	H_ENOMAP:
477 		return (EACCES);
478 
479 	case	H_EIO:
480 	case	H_ECPUERROR:
481 		return (EIO);
482 
483 	case	H_ENOTSUPPORTED:
484 		return (ENOTSUP);
485 
486 	case 	H_ETOOMANY:
487 		return (ENOSPC);
488 
489 	case	H_ECHANNEL:
490 		return (ECHRNG);
491 	default:
492 		break;
493 	}
494 
495 	return (EIO);
496 }
497 
498 /*
499  * Reconfigure the transmit queue
500  */
501 static int
502 i_ldc_txq_reconf(ldc_chan_t *ldcp)
503 {
504 	int rv;
505 
506 	ASSERT(MUTEX_HELD(&ldcp->lock));
507 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
508 
509 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
510 	if (rv) {
511 		cmn_err(CE_WARN,
512 		    "i_ldc_txq_reconf: (0x%lx) cannot set qconf", ldcp->id);
513 		return (EIO);
514 	}
515 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
516 	    &(ldcp->tx_tail), &(ldcp->link_state));
517 	if (rv) {
518 		cmn_err(CE_WARN,
519 		    "i_ldc_txq_reconf: (0x%lx) cannot get qptrs", ldcp->id);
520 		return (EIO);
521 	}
522 	D1(ldcp->id, "i_ldc_txq_reconf: (0x%llx) h=0x%llx,t=0x%llx,"
523 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
524 	    ldcp->link_state);
525 
526 	return (0);
527 }
528 
529 /*
530  * Reconfigure the receive queue
531  */
532 static int
533 i_ldc_rxq_reconf(ldc_chan_t *ldcp, boolean_t force_reset)
534 {
535 	int rv;
536 	uint64_t rx_head, rx_tail;
537 
538 	ASSERT(MUTEX_HELD(&ldcp->lock));
539 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
540 	    &(ldcp->link_state));
541 	if (rv) {
542 		cmn_err(CE_WARN,
543 		    "i_ldc_rxq_reconf: (0x%lx) cannot get state",
544 		    ldcp->id);
545 		return (EIO);
546 	}
547 
548 	if (force_reset || (ldcp->tstate & ~TS_IN_RESET) == TS_UP) {
549 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
550 			ldcp->rx_q_entries);
551 		if (rv) {
552 			cmn_err(CE_WARN,
553 			    "i_ldc_rxq_reconf: (0x%lx) cannot set qconf",
554 			    ldcp->id);
555 			return (EIO);
556 		}
557 		D1(ldcp->id, "i_ldc_rxq_reconf: (0x%llx) completed q reconf",
558 		    ldcp->id);
559 	}
560 
561 	return (0);
562 }
563 
564 /*
565  * Reset LDC state structure and its contents
566  */
567 static void
568 i_ldc_reset_state(ldc_chan_t *ldcp)
569 {
570 	ASSERT(MUTEX_HELD(&ldcp->lock));
571 	ldcp->last_msg_snt = LDC_INIT_SEQID;
572 	ldcp->last_ack_rcd = 0;
573 	ldcp->last_msg_rcd = 0;
574 	ldcp->tx_ackd_head = ldcp->tx_head;
575 	ldcp->next_vidx = 0;
576 	ldcp->hstate = 0;
577 	ldcp->tstate = TS_OPEN;
578 	ldcp->status = LDC_OPEN;
579 
580 	if (ldcp->link_state == LDC_CHANNEL_UP ||
581 	    ldcp->link_state == LDC_CHANNEL_RESET) {
582 
583 		if (ldcp->mode == LDC_MODE_RAW) {
584 			ldcp->status = LDC_UP;
585 			ldcp->tstate = TS_UP;
586 		} else {
587 			ldcp->status = LDC_READY;
588 			ldcp->tstate |= TS_LINK_READY;
589 		}
590 	}
591 }
592 
593 /*
594  * Reset a LDC channel
595  */
596 static void
597 i_ldc_reset(ldc_chan_t *ldcp, boolean_t force_reset)
598 {
599 	DWARN(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
600 
601 	ASSERT(MUTEX_HELD(&ldcp->lock));
602 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
603 
604 	/* reconfig Tx and Rx queues */
605 	(void) i_ldc_txq_reconf(ldcp);
606 	(void) i_ldc_rxq_reconf(ldcp, force_reset);
607 
608 	/* Clear Tx and Rx interrupts */
609 	(void) i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
610 	(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
611 
612 	/* Reset channel state */
613 	i_ldc_reset_state(ldcp);
614 
615 	/* Mark channel in reset */
616 	ldcp->tstate |= TS_IN_RESET;
617 }
618 
619 
620 /*
621  * Clear pending interrupts
622  */
623 static void
624 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
625 {
626 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
627 
628 	ASSERT(MUTEX_HELD(&ldcp->lock));
629 	ASSERT(cinfo->dip != NULL);
630 
631 	switch (itype) {
632 	case CNEX_TX_INTR:
633 		/* check Tx interrupt */
634 		if (ldcp->tx_intr_state)
635 			ldcp->tx_intr_state = LDC_INTR_NONE;
636 		else
637 			return;
638 		break;
639 
640 	case CNEX_RX_INTR:
641 		/* check Rx interrupt */
642 		if (ldcp->rx_intr_state)
643 			ldcp->rx_intr_state = LDC_INTR_NONE;
644 		else
645 			return;
646 		break;
647 	}
648 
649 	(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
650 	D2(ldcp->id,
651 	    "i_ldc_clear_intr: (0x%llx) cleared 0x%x intr\n",
652 	    ldcp->id, itype);
653 }
654 
655 /*
656  * Set the receive queue head
657  * Resets connection and returns an error if it fails.
658  */
659 static int
660 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
661 {
662 	int 	rv;
663 	int 	retries;
664 
665 	ASSERT(MUTEX_HELD(&ldcp->lock));
666 	for (retries = 0; retries < ldc_max_retries; retries++) {
667 
668 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
669 			return (0);
670 
671 		if (rv != H_EWOULDBLOCK)
672 			break;
673 
674 		/* wait for ldc_delay usecs */
675 		drv_usecwait(ldc_delay);
676 	}
677 
678 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
679 		ldcp->id, head);
680 	mutex_enter(&ldcp->tx_lock);
681 	i_ldc_reset(ldcp, B_TRUE);
682 	mutex_exit(&ldcp->tx_lock);
683 
684 	return (ECONNRESET);
685 }
686 
687 
688 /*
689  * Returns the tx_tail to be used for transfer
690  * Re-reads the TX queue ptrs if and only if the
691  * the cached head and tail are equal (queue is full)
692  */
693 static int
694 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
695 {
696 	int 		rv;
697 	uint64_t 	current_head, new_tail;
698 
699 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
700 	/* Read the head and tail ptrs from HV */
701 	rv = hv_ldc_tx_get_state(ldcp->id,
702 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
703 	if (rv) {
704 		cmn_err(CE_WARN,
705 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
706 		    ldcp->id);
707 		return (EIO);
708 	}
709 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
710 		DWARN(DBG_ALL_LDCS,
711 		    "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
712 		    ldcp->id);
713 		return (ECONNRESET);
714 	}
715 
716 	/* In reliable mode, check against last ACKd msg */
717 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
718 		ldcp->mode == LDC_MODE_STREAM)
719 		? ldcp->tx_ackd_head : ldcp->tx_head;
720 
721 	/* increment the tail */
722 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
723 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
724 
725 	if (new_tail == current_head) {
726 		DWARN(ldcp->id,
727 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
728 		    ldcp->id);
729 		return (EWOULDBLOCK);
730 	}
731 
732 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
733 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
734 
735 	*tail = ldcp->tx_tail;
736 	return (0);
737 }
738 
739 /*
740  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
741  * and retry ldc_max_retries times before returning an error.
742  * Returns 0, EWOULDBLOCK or EIO
743  */
744 static int
745 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
746 {
747 	int		rv, retval = EWOULDBLOCK;
748 	int 		retries;
749 
750 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
751 	for (retries = 0; retries < ldc_max_retries; retries++) {
752 
753 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
754 			retval = 0;
755 			break;
756 		}
757 		if (rv != H_EWOULDBLOCK) {
758 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
759 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
760 			retval = EIO;
761 			break;
762 		}
763 
764 		/* wait for ldc_delay usecs */
765 		drv_usecwait(ldc_delay);
766 	}
767 	return (retval);
768 }
769 
770 /*
771  * Send a LDC message
772  */
773 static int
774 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
775     uint8_t ctrlmsg)
776 {
777 	int		rv;
778 	ldc_msg_t 	*pkt;
779 	uint64_t	tx_tail;
780 	uint32_t	curr_seqid = ldcp->last_msg_snt;
781 
782 	/* Obtain Tx lock */
783 	mutex_enter(&ldcp->tx_lock);
784 
785 	/* get the current tail for the message */
786 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
787 	if (rv) {
788 		DWARN(ldcp->id,
789 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
790 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
791 		    ldcp->id, pkttype, subtype, ctrlmsg);
792 		mutex_exit(&ldcp->tx_lock);
793 		return (rv);
794 	}
795 
796 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
797 	ZERO_PKT(pkt);
798 
799 	/* Initialize the packet */
800 	pkt->type = pkttype;
801 	pkt->stype = subtype;
802 	pkt->ctrl = ctrlmsg;
803 
804 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
805 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
806 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
807 		curr_seqid++;
808 		if (ldcp->mode != LDC_MODE_RAW) {
809 			pkt->seqid = curr_seqid;
810 			pkt->ackid = ldcp->last_msg_rcd;
811 		}
812 	}
813 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
814 
815 	/* initiate the send by calling into HV and set the new tail */
816 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
817 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
818 
819 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
820 	if (rv) {
821 		DWARN(ldcp->id,
822 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
823 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
824 		    ldcp->id, pkttype, subtype, ctrlmsg);
825 		mutex_exit(&ldcp->tx_lock);
826 		return (EIO);
827 	}
828 
829 	ldcp->last_msg_snt = curr_seqid;
830 	ldcp->tx_tail = tx_tail;
831 
832 	mutex_exit(&ldcp->tx_lock);
833 	return (0);
834 }
835 
836 /*
837  * Checks if packet was received in right order
838  * in the case of a reliable link.
839  * Returns 0 if in order, else EIO
840  */
841 static int
842 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
843 {
844 	/* No seqid checking for RAW mode */
845 	if (ldcp->mode == LDC_MODE_RAW)
846 		return (0);
847 
848 	/* No seqid checking for version, RTS, RTR message */
849 	if (msg->ctrl == LDC_VER ||
850 	    msg->ctrl == LDC_RTS ||
851 	    msg->ctrl == LDC_RTR)
852 		return (0);
853 
854 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
855 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
856 		DWARN(ldcp->id,
857 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
858 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
859 		    (ldcp->last_msg_rcd + 1));
860 		return (EIO);
861 	}
862 
863 	return (0);
864 }
865 
866 
867 /*
868  * Process an incoming version ctrl message
869  */
870 static int
871 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
872 {
873 	int 		rv = 0, idx = ldcp->next_vidx;
874 	ldc_msg_t 	*pkt;
875 	uint64_t	tx_tail;
876 	ldc_ver_t	*rcvd_ver;
877 
878 	/* get the received version */
879 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
880 
881 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
882 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
883 
884 	/* Obtain Tx lock */
885 	mutex_enter(&ldcp->tx_lock);
886 
887 	switch (msg->stype) {
888 	case LDC_INFO:
889 
890 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
891 			(void) i_ldc_txq_reconf(ldcp);
892 			i_ldc_reset_state(ldcp);
893 			mutex_exit(&ldcp->tx_lock);
894 			return (EAGAIN);
895 		}
896 
897 		/* get the current tail and pkt for the response */
898 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
899 		if (rv != 0) {
900 			DWARN(ldcp->id,
901 			    "i_ldc_process_VER: (0x%llx) err sending "
902 			    "version ACK/NACK\n", ldcp->id);
903 			i_ldc_reset(ldcp, B_TRUE);
904 			mutex_exit(&ldcp->tx_lock);
905 			return (ECONNRESET);
906 		}
907 
908 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
909 		ZERO_PKT(pkt);
910 
911 		/* initialize the packet */
912 		pkt->type = LDC_CTRL;
913 		pkt->ctrl = LDC_VER;
914 
915 		for (;;) {
916 
917 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
918 			    rcvd_ver->major, rcvd_ver->minor,
919 			    ldc_versions[idx].major, ldc_versions[idx].minor);
920 
921 			if (rcvd_ver->major == ldc_versions[idx].major) {
922 				/* major version match - ACK version */
923 				pkt->stype = LDC_ACK;
924 
925 				/*
926 				 * lower minor version to the one this endpt
927 				 * supports, if necessary
928 				 */
929 				if (rcvd_ver->minor > ldc_versions[idx].minor)
930 					rcvd_ver->minor =
931 						ldc_versions[idx].minor;
932 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
933 
934 				break;
935 			}
936 
937 			if (rcvd_ver->major > ldc_versions[idx].major) {
938 
939 				D1(ldcp->id, "i_ldc_process_VER: using next"
940 				    " lower idx=%d, v%u.%u\n", idx,
941 				    ldc_versions[idx].major,
942 				    ldc_versions[idx].minor);
943 
944 				/* nack with next lower version */
945 				pkt->stype = LDC_NACK;
946 				bcopy(&ldc_versions[idx], pkt->udata,
947 				    sizeof (ldc_versions[idx]));
948 				ldcp->next_vidx = idx;
949 				break;
950 			}
951 
952 			/* next major version */
953 			idx++;
954 
955 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
956 
957 			if (idx == LDC_NUM_VERS) {
958 				/* no version match - send NACK */
959 				pkt->stype = LDC_NACK;
960 				bzero(pkt->udata, sizeof (ldc_ver_t));
961 				ldcp->next_vidx = 0;
962 				break;
963 			}
964 		}
965 
966 		/* initiate the send by calling into HV and set the new tail */
967 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
968 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
969 
970 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
971 		if (rv == 0) {
972 			ldcp->tx_tail = tx_tail;
973 			if (pkt->stype == LDC_ACK) {
974 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
975 				    " version ACK\n", ldcp->id);
976 				/* Save the ACK'd version */
977 				ldcp->version.major = rcvd_ver->major;
978 				ldcp->version.minor = rcvd_ver->minor;
979 				ldcp->hstate |= TS_RCVD_VER;
980 				ldcp->tstate |= TS_VER_DONE;
981 				DWARN(DBG_ALL_LDCS,
982 				    "(0x%llx) Sent ACK, "
983 				    "Agreed on version v%u.%u\n",
984 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
985 			}
986 		} else {
987 			DWARN(ldcp->id,
988 			    "i_ldc_process_VER: (0x%llx) error sending "
989 			    "ACK/NACK\n", ldcp->id);
990 			i_ldc_reset(ldcp, B_TRUE);
991 			mutex_exit(&ldcp->tx_lock);
992 			return (ECONNRESET);
993 		}
994 
995 		break;
996 
997 	case LDC_ACK:
998 		if ((ldcp->tstate & ~TS_IN_RESET) == TS_VREADY) {
999 			if (ldcp->version.major != rcvd_ver->major ||
1000 				ldcp->version.minor != rcvd_ver->minor) {
1001 
1002 				/* mismatched version - reset connection */
1003 				DWARN(ldcp->id,
1004 					"i_ldc_process_VER: (0x%llx) recvd"
1005 					" ACK ver != sent ACK ver\n", ldcp->id);
1006 				i_ldc_reset(ldcp, B_TRUE);
1007 				mutex_exit(&ldcp->tx_lock);
1008 				return (ECONNRESET);
1009 			}
1010 		} else {
1011 			/* SUCCESS - we have agreed on a version */
1012 			ldcp->version.major = rcvd_ver->major;
1013 			ldcp->version.minor = rcvd_ver->minor;
1014 			ldcp->tstate |= TS_VER_DONE;
1015 		}
1016 
1017 		DWARN(DBG_ALL_LDCS,
1018 		    "(0x%llx) Got ACK, Agreed on version v%u.%u\n",
1019 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
1020 
1021 		/* initiate RTS-RTR-RDX handshake */
1022 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1023 		if (rv) {
1024 			DWARN(ldcp->id,
1025 		    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
1026 			    ldcp->id);
1027 			i_ldc_reset(ldcp, B_TRUE);
1028 			mutex_exit(&ldcp->tx_lock);
1029 			return (ECONNRESET);
1030 		}
1031 
1032 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1033 		ZERO_PKT(pkt);
1034 
1035 		pkt->type = LDC_CTRL;
1036 		pkt->stype = LDC_INFO;
1037 		pkt->ctrl = LDC_RTS;
1038 		pkt->env = ldcp->mode;
1039 		if (ldcp->mode != LDC_MODE_RAW)
1040 			pkt->seqid = LDC_INIT_SEQID;
1041 
1042 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
1043 
1044 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
1045 
1046 		/* initiate the send by calling into HV and set the new tail */
1047 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1048 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1049 
1050 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1051 		if (rv) {
1052 			D2(ldcp->id,
1053 			    "i_ldc_process_VER: (0x%llx) no listener\n",
1054 			    ldcp->id);
1055 			i_ldc_reset(ldcp, B_TRUE);
1056 			mutex_exit(&ldcp->tx_lock);
1057 			return (ECONNRESET);
1058 		}
1059 
1060 		ldcp->tx_tail = tx_tail;
1061 		ldcp->hstate |= TS_SENT_RTS;
1062 
1063 		break;
1064 
1065 	case LDC_NACK:
1066 		/* check if version in NACK is zero */
1067 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
1068 			/* version handshake failure */
1069 			DWARN(DBG_ALL_LDCS,
1070 			    "i_ldc_process_VER: (0x%llx) no version match\n",
1071 			    ldcp->id);
1072 			i_ldc_reset(ldcp, B_TRUE);
1073 			mutex_exit(&ldcp->tx_lock);
1074 			return (ECONNRESET);
1075 		}
1076 
1077 		/* get the current tail and pkt for the response */
1078 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1079 		if (rv != 0) {
1080 			cmn_err(CE_NOTE,
1081 			    "i_ldc_process_VER: (0x%lx) err sending "
1082 			    "version ACK/NACK\n", ldcp->id);
1083 			i_ldc_reset(ldcp, B_TRUE);
1084 			mutex_exit(&ldcp->tx_lock);
1085 			return (ECONNRESET);
1086 		}
1087 
1088 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1089 		ZERO_PKT(pkt);
1090 
1091 		/* initialize the packet */
1092 		pkt->type = LDC_CTRL;
1093 		pkt->ctrl = LDC_VER;
1094 		pkt->stype = LDC_INFO;
1095 
1096 		/* check ver in NACK msg has a match */
1097 		for (;;) {
1098 			if (rcvd_ver->major == ldc_versions[idx].major) {
1099 				/*
1100 				 * major version match - resubmit request
1101 				 * if lower minor version to the one this endpt
1102 				 * supports, if necessary
1103 				 */
1104 				if (rcvd_ver->minor > ldc_versions[idx].minor)
1105 					rcvd_ver->minor =
1106 						ldc_versions[idx].minor;
1107 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
1108 				break;
1109 
1110 			}
1111 
1112 			if (rcvd_ver->major > ldc_versions[idx].major) {
1113 
1114 				D1(ldcp->id, "i_ldc_process_VER: using next"
1115 				    " lower idx=%d, v%u.%u\n", idx,
1116 				    ldc_versions[idx].major,
1117 				    ldc_versions[idx].minor);
1118 
1119 				/* send next lower version */
1120 				bcopy(&ldc_versions[idx], pkt->udata,
1121 				    sizeof (ldc_versions[idx]));
1122 				ldcp->next_vidx = idx;
1123 				break;
1124 			}
1125 
1126 			/* next version */
1127 			idx++;
1128 
1129 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1130 
1131 			if (idx == LDC_NUM_VERS) {
1132 				/* no version match - terminate */
1133 				ldcp->next_vidx = 0;
1134 				mutex_exit(&ldcp->tx_lock);
1135 				return (ECONNRESET);
1136 			}
1137 		}
1138 
1139 		/* initiate the send by calling into HV and set the new tail */
1140 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1141 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1142 
1143 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1144 		if (rv == 0) {
1145 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1146 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1147 			    ldc_versions[idx].minor);
1148 			ldcp->tx_tail = tx_tail;
1149 		} else {
1150 			cmn_err(CE_NOTE,
1151 			    "i_ldc_process_VER: (0x%lx) error sending version"
1152 			    "INFO\n", ldcp->id);
1153 			i_ldc_reset(ldcp, B_TRUE);
1154 			mutex_exit(&ldcp->tx_lock);
1155 			return (ECONNRESET);
1156 		}
1157 
1158 		break;
1159 	}
1160 
1161 	mutex_exit(&ldcp->tx_lock);
1162 	return (rv);
1163 }
1164 
1165 
1166 /*
1167  * Process an incoming RTS ctrl message
1168  */
1169 static int
1170 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1171 {
1172 	int 		rv = 0;
1173 	ldc_msg_t 	*pkt;
1174 	uint64_t	tx_tail;
1175 	boolean_t	sent_NACK = B_FALSE;
1176 
1177 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1178 
1179 	switch (msg->stype) {
1180 	case LDC_NACK:
1181 		DWARN(ldcp->id,
1182 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1183 		    ldcp->id);
1184 
1185 		/* Reset the channel -- as we cannot continue */
1186 		mutex_enter(&ldcp->tx_lock);
1187 		i_ldc_reset(ldcp, B_TRUE);
1188 		mutex_exit(&ldcp->tx_lock);
1189 		rv = ECONNRESET;
1190 		break;
1191 
1192 	case LDC_INFO:
1193 
1194 		/* check mode */
1195 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1196 			cmn_err(CE_NOTE,
1197 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1198 			    ldcp->id);
1199 			/*
1200 			 * send NACK in response to MODE message
1201 			 * get the current tail for the response
1202 			 */
1203 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1204 			if (rv) {
1205 				/* if cannot send NACK - reset channel */
1206 				mutex_enter(&ldcp->tx_lock);
1207 				i_ldc_reset(ldcp, B_TRUE);
1208 				mutex_exit(&ldcp->tx_lock);
1209 				rv = ECONNRESET;
1210 				break;
1211 			}
1212 			sent_NACK = B_TRUE;
1213 		}
1214 		break;
1215 	default:
1216 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1217 		    ldcp->id);
1218 		mutex_enter(&ldcp->tx_lock);
1219 		i_ldc_reset(ldcp, B_TRUE);
1220 		mutex_exit(&ldcp->tx_lock);
1221 		rv = ECONNRESET;
1222 		break;
1223 	}
1224 
1225 	/*
1226 	 * If either the connection was reset (when rv != 0) or
1227 	 * a NACK was sent, we return. In the case of a NACK
1228 	 * we dont want to consume the packet that came in but
1229 	 * not record that we received the RTS
1230 	 */
1231 	if (rv || sent_NACK)
1232 		return (rv);
1233 
1234 	/* record RTS received */
1235 	ldcp->hstate |= TS_RCVD_RTS;
1236 
1237 	/* store initial SEQID info */
1238 	ldcp->last_msg_snt = msg->seqid;
1239 
1240 	/* Obtain Tx lock */
1241 	mutex_enter(&ldcp->tx_lock);
1242 
1243 	/* get the current tail for the response */
1244 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1245 	if (rv != 0) {
1246 		cmn_err(CE_NOTE,
1247 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1248 		    ldcp->id);
1249 		i_ldc_reset(ldcp, B_TRUE);
1250 		mutex_exit(&ldcp->tx_lock);
1251 		return (ECONNRESET);
1252 	}
1253 
1254 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1255 	ZERO_PKT(pkt);
1256 
1257 	/* initialize the packet */
1258 	pkt->type = LDC_CTRL;
1259 	pkt->stype = LDC_INFO;
1260 	pkt->ctrl = LDC_RTR;
1261 	pkt->env = ldcp->mode;
1262 	if (ldcp->mode != LDC_MODE_RAW)
1263 		pkt->seqid = LDC_INIT_SEQID;
1264 
1265 	ldcp->last_msg_rcd = msg->seqid;
1266 
1267 	/* initiate the send by calling into HV and set the new tail */
1268 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1269 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1270 
1271 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1272 	if (rv == 0) {
1273 		D2(ldcp->id,
1274 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1275 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1276 
1277 		ldcp->tx_tail = tx_tail;
1278 		ldcp->hstate |= TS_SENT_RTR;
1279 
1280 	} else {
1281 		cmn_err(CE_NOTE,
1282 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1283 		    ldcp->id);
1284 		i_ldc_reset(ldcp, B_TRUE);
1285 		mutex_exit(&ldcp->tx_lock);
1286 		return (ECONNRESET);
1287 	}
1288 
1289 	mutex_exit(&ldcp->tx_lock);
1290 	return (0);
1291 }
1292 
1293 /*
1294  * Process an incoming RTR ctrl message
1295  */
1296 static int
1297 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1298 {
1299 	int 		rv = 0;
1300 	boolean_t	sent_NACK = B_FALSE;
1301 
1302 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1303 
1304 	switch (msg->stype) {
1305 	case LDC_NACK:
1306 		/* RTR NACK received */
1307 		DWARN(ldcp->id,
1308 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1309 		    ldcp->id);
1310 
1311 		/* Reset the channel -- as we cannot continue */
1312 		mutex_enter(&ldcp->tx_lock);
1313 		i_ldc_reset(ldcp, B_TRUE);
1314 		mutex_exit(&ldcp->tx_lock);
1315 		rv = ECONNRESET;
1316 
1317 		break;
1318 
1319 	case LDC_INFO:
1320 
1321 		/* check mode */
1322 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1323 			DWARN(ldcp->id,
1324 			    "i_ldc_process_RTR: (0x%llx) mode mismatch\n",
1325 			    ldcp->id);
1326 			/*
1327 			 * send NACK in response to MODE message
1328 			 * get the current tail for the response
1329 			 */
1330 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1331 			if (rv) {
1332 				/* if cannot send NACK - reset channel */
1333 				mutex_enter(&ldcp->tx_lock);
1334 				i_ldc_reset(ldcp, B_TRUE);
1335 				mutex_exit(&ldcp->tx_lock);
1336 				rv = ECONNRESET;
1337 				break;
1338 			}
1339 			sent_NACK = B_TRUE;
1340 		}
1341 		break;
1342 
1343 	default:
1344 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1345 		    ldcp->id);
1346 
1347 		/* Reset the channel -- as we cannot continue */
1348 		mutex_enter(&ldcp->tx_lock);
1349 		i_ldc_reset(ldcp, B_TRUE);
1350 		mutex_exit(&ldcp->tx_lock);
1351 		rv = ECONNRESET;
1352 		break;
1353 	}
1354 
1355 	/*
1356 	 * If either the connection was reset (when rv != 0) or
1357 	 * a NACK was sent, we return. In the case of a NACK
1358 	 * we dont want to consume the packet that came in but
1359 	 * not record that we received the RTR
1360 	 */
1361 	if (rv || sent_NACK)
1362 		return (rv);
1363 
1364 	ldcp->last_msg_snt = msg->seqid;
1365 	ldcp->hstate |= TS_RCVD_RTR;
1366 
1367 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1368 	if (rv) {
1369 		cmn_err(CE_NOTE,
1370 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1371 		    ldcp->id);
1372 		mutex_enter(&ldcp->tx_lock);
1373 		i_ldc_reset(ldcp, B_TRUE);
1374 		mutex_exit(&ldcp->tx_lock);
1375 		return (ECONNRESET);
1376 	}
1377 	D2(ldcp->id,
1378 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1379 
1380 	ldcp->hstate |= TS_SENT_RDX;
1381 	ldcp->tstate |= TS_HSHAKE_DONE;
1382 	if ((ldcp->tstate & TS_IN_RESET) == 0)
1383 		ldcp->status = LDC_UP;
1384 
1385 	DWARN(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1386 
1387 	return (0);
1388 }
1389 
1390 
1391 /*
1392  * Process an incoming RDX ctrl message
1393  */
1394 static int
1395 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1396 {
1397 	int	rv = 0;
1398 
1399 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1400 
1401 	switch (msg->stype) {
1402 	case LDC_NACK:
1403 		/* RDX NACK received */
1404 		DWARN(ldcp->id,
1405 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1406 		    ldcp->id);
1407 
1408 		/* Reset the channel -- as we cannot continue */
1409 		mutex_enter(&ldcp->tx_lock);
1410 		i_ldc_reset(ldcp, B_TRUE);
1411 		mutex_exit(&ldcp->tx_lock);
1412 		rv = ECONNRESET;
1413 
1414 		break;
1415 
1416 	case LDC_INFO:
1417 
1418 		/*
1419 		 * if channel is UP and a RDX received after data transmission
1420 		 * has commenced it is an error
1421 		 */
1422 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1423 			DWARN(DBG_ALL_LDCS,
1424 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1425 			    " - LDC reset\n", ldcp->id);
1426 			mutex_enter(&ldcp->tx_lock);
1427 			i_ldc_reset(ldcp, B_TRUE);
1428 			mutex_exit(&ldcp->tx_lock);
1429 			return (ECONNRESET);
1430 		}
1431 
1432 		ldcp->hstate |= TS_RCVD_RDX;
1433 		ldcp->tstate |= TS_HSHAKE_DONE;
1434 		if ((ldcp->tstate & TS_IN_RESET) == 0)
1435 			ldcp->status = LDC_UP;
1436 
1437 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1438 		break;
1439 
1440 	default:
1441 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1442 		    ldcp->id);
1443 
1444 		/* Reset the channel -- as we cannot continue */
1445 		mutex_enter(&ldcp->tx_lock);
1446 		i_ldc_reset(ldcp, B_TRUE);
1447 		mutex_exit(&ldcp->tx_lock);
1448 		rv = ECONNRESET;
1449 		break;
1450 	}
1451 
1452 	return (rv);
1453 }
1454 
1455 /*
1456  * Process an incoming ACK for a data packet
1457  */
1458 static int
1459 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1460 {
1461 	int		rv;
1462 	uint64_t 	tx_head;
1463 	ldc_msg_t	*pkt;
1464 
1465 	/* Obtain Tx lock */
1466 	mutex_enter(&ldcp->tx_lock);
1467 
1468 	/*
1469 	 * Read the current Tx head and tail
1470 	 */
1471 	rv = hv_ldc_tx_get_state(ldcp->id,
1472 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1473 	if (rv != 0) {
1474 		cmn_err(CE_WARN,
1475 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1476 		    ldcp->id);
1477 
1478 		/* Reset the channel -- as we cannot continue */
1479 		i_ldc_reset(ldcp, B_TRUE);
1480 		mutex_exit(&ldcp->tx_lock);
1481 		return (ECONNRESET);
1482 	}
1483 
1484 	/*
1485 	 * loop from where the previous ACK location was to the
1486 	 * current head location. This is how far the HV has
1487 	 * actually send pkts. Pkts between head and tail are
1488 	 * yet to be sent by HV.
1489 	 */
1490 	tx_head = ldcp->tx_ackd_head;
1491 	for (;;) {
1492 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1493 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1494 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1495 
1496 		if (pkt->seqid == msg->ackid) {
1497 			D2(ldcp->id,
1498 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1499 			    ldcp->id);
1500 			ldcp->last_ack_rcd = msg->ackid;
1501 			ldcp->tx_ackd_head = tx_head;
1502 			break;
1503 		}
1504 		if (tx_head == ldcp->tx_head) {
1505 			/* could not find packet */
1506 			DWARN(ldcp->id,
1507 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1508 			    ldcp->id);
1509 
1510 			/* Reset the channel -- as we cannot continue */
1511 			i_ldc_reset(ldcp, B_TRUE);
1512 			mutex_exit(&ldcp->tx_lock);
1513 			return (ECONNRESET);
1514 		}
1515 	}
1516 
1517 	mutex_exit(&ldcp->tx_lock);
1518 	return (0);
1519 }
1520 
1521 /*
1522  * Process incoming control message
1523  * Return 0 - session can continue
1524  *        EAGAIN - reprocess packet - state was changed
1525  *	  ECONNRESET - channel was reset
1526  */
1527 static int
1528 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1529 {
1530 	int 		rv = 0;
1531 
1532 	D1(ldcp->id, "i_ldc_ctrlmsg: (%llx) tstate = %lx, hstate = %lx\n",
1533 	    ldcp->id, ldcp->tstate, ldcp->hstate);
1534 
1535 	switch (ldcp->tstate & ~TS_IN_RESET) {
1536 
1537 	case TS_OPEN:
1538 	case TS_READY:
1539 
1540 		switch (msg->ctrl & LDC_CTRL_MASK) {
1541 		case LDC_VER:
1542 			/* process version message */
1543 			rv = i_ldc_process_VER(ldcp, msg);
1544 			break;
1545 		default:
1546 			DWARN(ldcp->id,
1547 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1548 			    "tstate=0x%x\n", ldcp->id,
1549 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1550 			break;
1551 		}
1552 
1553 		break;
1554 
1555 	case TS_VREADY:
1556 
1557 		switch (msg->ctrl & LDC_CTRL_MASK) {
1558 		case LDC_VER:
1559 			/* process version message */
1560 			rv = i_ldc_process_VER(ldcp, msg);
1561 			break;
1562 		case LDC_RTS:
1563 			/* process RTS message */
1564 			rv = i_ldc_process_RTS(ldcp, msg);
1565 			break;
1566 		case LDC_RTR:
1567 			/* process RTR message */
1568 			rv = i_ldc_process_RTR(ldcp, msg);
1569 			break;
1570 		case LDC_RDX:
1571 			/* process RDX message */
1572 			rv = i_ldc_process_RDX(ldcp, msg);
1573 			break;
1574 		default:
1575 			DWARN(ldcp->id,
1576 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1577 			    "tstate=0x%x\n", ldcp->id,
1578 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1579 			break;
1580 		}
1581 
1582 		break;
1583 
1584 	case TS_UP:
1585 
1586 		switch (msg->ctrl & LDC_CTRL_MASK) {
1587 		case LDC_VER:
1588 			DWARN(ldcp->id,
1589 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1590 			    "- LDC reset\n", ldcp->id);
1591 			/* peer is redoing version negotiation */
1592 			mutex_enter(&ldcp->tx_lock);
1593 			(void) i_ldc_txq_reconf(ldcp);
1594 			i_ldc_reset_state(ldcp);
1595 			mutex_exit(&ldcp->tx_lock);
1596 			rv = EAGAIN;
1597 			break;
1598 
1599 		case LDC_RDX:
1600 			/* process RDX message */
1601 			rv = i_ldc_process_RDX(ldcp, msg);
1602 			break;
1603 
1604 		default:
1605 			DWARN(ldcp->id,
1606 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1607 			    "tstate=0x%x\n", ldcp->id,
1608 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1609 			break;
1610 		}
1611 	}
1612 
1613 	return (rv);
1614 }
1615 
1616 /*
1617  * Register channel with the channel nexus
1618  */
1619 static int
1620 i_ldc_register_channel(ldc_chan_t *ldcp)
1621 {
1622 	int		rv = 0;
1623 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1624 
1625 	if (cinfo->dip == NULL) {
1626 		DWARN(ldcp->id,
1627 		    "i_ldc_register_channel: cnex has not registered\n");
1628 		return (EAGAIN);
1629 	}
1630 
1631 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1632 	if (rv) {
1633 		DWARN(ldcp->id,
1634 		    "i_ldc_register_channel: cannot register channel\n");
1635 		return (rv);
1636 	}
1637 
1638 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1639 	    i_ldc_tx_hdlr, ldcp, NULL);
1640 	if (rv) {
1641 		DWARN(ldcp->id,
1642 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1643 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1644 		return (rv);
1645 	}
1646 
1647 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1648 	    i_ldc_rx_hdlr, ldcp, NULL);
1649 	if (rv) {
1650 		DWARN(ldcp->id,
1651 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1652 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1653 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1654 		return (rv);
1655 	}
1656 
1657 	ldcp->tstate |= TS_CNEX_RDY;
1658 
1659 	return (0);
1660 }
1661 
1662 /*
1663  * Unregister a channel with the channel nexus
1664  */
1665 static int
1666 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1667 {
1668 	int		rv = 0;
1669 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1670 
1671 	if (cinfo->dip == NULL) {
1672 		DWARN(ldcp->id,
1673 		    "i_ldc_unregister_channel: cnex has not registered\n");
1674 		return (EAGAIN);
1675 	}
1676 
1677 	if (ldcp->tstate & TS_CNEX_RDY) {
1678 
1679 		/* Remove the Rx interrupt */
1680 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1681 		if (rv) {
1682 			if (rv != EAGAIN) {
1683 				DWARN(ldcp->id,
1684 				    "i_ldc_unregister_channel: err removing "
1685 				    "Rx intr\n");
1686 				return (rv);
1687 			}
1688 
1689 			/*
1690 			 * If interrupts are pending and handler has
1691 			 * finished running, clear interrupt and try
1692 			 * again
1693 			 */
1694 			if (ldcp->rx_intr_state != LDC_INTR_PEND)
1695 				return (rv);
1696 
1697 			(void) i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1698 			rv = cinfo->rem_intr(cinfo->dip, ldcp->id,
1699 			    CNEX_RX_INTR);
1700 			if (rv) {
1701 				DWARN(ldcp->id, "i_ldc_unregister_channel: "
1702 				    "err removing Rx interrupt\n");
1703 				return (rv);
1704 			}
1705 		}
1706 
1707 		/* Remove the Tx interrupt */
1708 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1709 		if (rv) {
1710 			DWARN(ldcp->id,
1711 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1712 			return (rv);
1713 		}
1714 
1715 		/* Unregister the channel */
1716 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1717 		if (rv) {
1718 			DWARN(ldcp->id,
1719 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1720 			return (rv);
1721 		}
1722 
1723 		ldcp->tstate &= ~TS_CNEX_RDY;
1724 	}
1725 
1726 	return (0);
1727 }
1728 
1729 
1730 /*
1731  * LDC transmit interrupt handler
1732  *    triggered for chanel up/down/reset events
1733  *    and Tx queue content changes
1734  */
1735 static uint_t
1736 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1737 {
1738 	_NOTE(ARGUNUSED(arg2))
1739 
1740 	int 		rv;
1741 	ldc_chan_t 	*ldcp;
1742 	boolean_t 	notify_client = B_FALSE;
1743 	uint64_t	notify_event = 0, link_state;
1744 
1745 	/* Get the channel for which interrupt was received */
1746 	ASSERT(arg1 != NULL);
1747 	ldcp = (ldc_chan_t *)arg1;
1748 
1749 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1750 	    ldcp->id, ldcp);
1751 
1752 	/* Lock channel */
1753 	mutex_enter(&ldcp->lock);
1754 
1755 	/* Obtain Tx lock */
1756 	mutex_enter(&ldcp->tx_lock);
1757 
1758 	/* mark interrupt as pending */
1759 	ldcp->tx_intr_state = LDC_INTR_ACTIVE;
1760 
1761 	/* save current link state */
1762 	link_state = ldcp->link_state;
1763 
1764 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1765 	    &ldcp->link_state);
1766 	if (rv) {
1767 		cmn_err(CE_WARN,
1768 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1769 		    ldcp->id, rv);
1770 		i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1771 		mutex_exit(&ldcp->tx_lock);
1772 		mutex_exit(&ldcp->lock);
1773 		return (DDI_INTR_CLAIMED);
1774 	}
1775 
1776 	/*
1777 	 * reset the channel state if the channel went down
1778 	 * (other side unconfigured queue) or channel was reset
1779 	 * (other side reconfigured its queue)
1780 	 */
1781 	if (link_state != ldcp->link_state &&
1782 	    ldcp->link_state == LDC_CHANNEL_DOWN) {
1783 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1784 		i_ldc_reset(ldcp, B_FALSE);
1785 		notify_client = B_TRUE;
1786 		notify_event = LDC_EVT_DOWN;
1787 	}
1788 
1789 	if (link_state != ldcp->link_state &&
1790 	    ldcp->link_state == LDC_CHANNEL_RESET) {
1791 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1792 		i_ldc_reset(ldcp, B_FALSE);
1793 		notify_client = B_TRUE;
1794 		notify_event = LDC_EVT_RESET;
1795 	}
1796 
1797 	if (link_state != ldcp->link_state &&
1798 	    (ldcp->tstate & ~TS_IN_RESET) == TS_OPEN &&
1799 	    ldcp->link_state == LDC_CHANNEL_UP) {
1800 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1801 		notify_client = B_TRUE;
1802 		notify_event = LDC_EVT_RESET;
1803 		ldcp->tstate |= TS_LINK_READY;
1804 		ldcp->status = LDC_READY;
1805 	}
1806 
1807 	/* if callbacks are disabled, do not notify */
1808 	if (!ldcp->cb_enabled)
1809 		notify_client = B_FALSE;
1810 
1811 	/* Unlock channel */
1812 
1813 	if (notify_client) {
1814 		ldcp->cb_inprogress = B_TRUE;
1815 		mutex_exit(&ldcp->tx_lock);
1816 		mutex_exit(&ldcp->lock);
1817 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1818 		if (rv) {
1819 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1820 			    "failure", ldcp->id);
1821 		}
1822 		mutex_enter(&ldcp->lock);
1823 		ldcp->cb_inprogress = B_FALSE;
1824 	}
1825 
1826 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1827 	mutex_exit(&ldcp->lock);
1828 
1829 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1830 
1831 	return (DDI_INTR_CLAIMED);
1832 }
1833 
1834 /*
1835  * LDC receive interrupt handler
1836  *    triggered for channel with data pending to read
1837  *    i.e. Rx queue content changes
1838  */
1839 static uint_t
1840 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1841 {
1842 	_NOTE(ARGUNUSED(arg2))
1843 
1844 	int		rv;
1845 	uint64_t 	rx_head, rx_tail;
1846 	ldc_msg_t 	*msg;
1847 	ldc_chan_t 	*ldcp;
1848 	boolean_t 	notify_client = B_FALSE;
1849 	uint64_t	notify_event = 0;
1850 	uint64_t	link_state, first_fragment = 0;
1851 
1852 
1853 	/* Get the channel for which interrupt was received */
1854 	if (arg1 == NULL) {
1855 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1856 		return (DDI_INTR_UNCLAIMED);
1857 	}
1858 
1859 	ldcp = (ldc_chan_t *)arg1;
1860 
1861 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1862 	    ldcp->id, ldcp);
1863 	D1(ldcp->id, "i_ldc_rx_hdlr: (%llx) USR%lx/TS%lx/HS%lx, LSTATE=%lx\n",
1864 	    ldcp->id, ldcp->status, ldcp->tstate, ldcp->hstate,
1865 	    ldcp->link_state);
1866 
1867 	/* Lock channel */
1868 	mutex_enter(&ldcp->lock);
1869 
1870 	/* mark interrupt as pending */
1871 	ldcp->rx_intr_state = LDC_INTR_ACTIVE;
1872 
1873 	/*
1874 	 * Read packet(s) from the queue
1875 	 */
1876 	for (;;) {
1877 
1878 		link_state = ldcp->link_state;
1879 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1880 		    &ldcp->link_state);
1881 		if (rv) {
1882 			cmn_err(CE_WARN,
1883 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1884 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1885 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1886 			mutex_exit(&ldcp->lock);
1887 			return (DDI_INTR_CLAIMED);
1888 		}
1889 
1890 		/*
1891 		 * reset the channel state if the channel went down
1892 		 * (other side unconfigured queue) or channel was reset
1893 		 * (other side reconfigured its queue)
1894 		 */
1895 
1896 		if (link_state != ldcp->link_state) {
1897 			switch (ldcp->link_state) {
1898 			case LDC_CHANNEL_DOWN:
1899 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1900 				    "link down\n", ldcp->id);
1901 				mutex_enter(&ldcp->tx_lock);
1902 				i_ldc_reset(ldcp, B_FALSE);
1903 				mutex_exit(&ldcp->tx_lock);
1904 				notify_client = B_TRUE;
1905 				notify_event = LDC_EVT_DOWN;
1906 				goto loop_exit;
1907 
1908 			case LDC_CHANNEL_UP:
1909 				D1(ldcp->id, "i_ldc_rx_hdlr: "
1910 				    "channel link up\n", ldcp->id);
1911 
1912 				if ((ldcp->tstate & ~TS_IN_RESET) == TS_OPEN) {
1913 					notify_client = B_TRUE;
1914 					notify_event = LDC_EVT_RESET;
1915 					ldcp->tstate |= TS_LINK_READY;
1916 					ldcp->status = LDC_READY;
1917 				}
1918 				break;
1919 
1920 			case LDC_CHANNEL_RESET:
1921 			default:
1922 #ifdef DEBUG
1923 force_reset:
1924 #endif
1925 				D1(ldcp->id, "i_ldc_rx_hdlr: channel "
1926 				    "link reset\n", ldcp->id);
1927 				mutex_enter(&ldcp->tx_lock);
1928 				i_ldc_reset(ldcp, B_FALSE);
1929 				mutex_exit(&ldcp->tx_lock);
1930 				notify_client = B_TRUE;
1931 				notify_event = LDC_EVT_RESET;
1932 				break;
1933 			}
1934 		}
1935 
1936 #ifdef DEBUG
1937 		if (LDC_INJECT_RESET(ldcp))
1938 			goto force_reset;
1939 #endif
1940 
1941 		if (rx_head == rx_tail) {
1942 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1943 			    ldcp->id);
1944 			break;
1945 		}
1946 
1947 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1948 		    rx_head, rx_tail);
1949 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1950 		    ldcp->rx_q_va + rx_head);
1951 
1952 		/* get the message */
1953 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1954 
1955 		/* if channel is in RAW mode or data pkt, notify and return */
1956 		if (ldcp->mode == LDC_MODE_RAW) {
1957 			notify_client = B_TRUE;
1958 			notify_event |= LDC_EVT_READ;
1959 			break;
1960 		}
1961 
1962 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1963 
1964 			/* discard packet if channel is not up */
1965 			if ((ldcp->tstate & ~TS_IN_RESET) != TS_UP) {
1966 
1967 				/* move the head one position */
1968 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1969 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1970 
1971 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1972 					break;
1973 
1974 				continue;
1975 			} else {
1976 				if ((ldcp->tstate & TS_IN_RESET) == 0)
1977 					notify_client = B_TRUE;
1978 				notify_event |= LDC_EVT_READ;
1979 				break;
1980 			}
1981 		}
1982 
1983 		/* Check the sequence ID for the message received */
1984 		rv = i_ldc_check_seqid(ldcp, msg);
1985 		if (rv != 0) {
1986 
1987 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
1988 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
1989 
1990 			/* Reset last_msg_rcd to start of message */
1991 			if (first_fragment != 0) {
1992 				ldcp->last_msg_rcd = first_fragment - 1;
1993 				first_fragment = 0;
1994 			}
1995 
1996 			/*
1997 			 * Send a NACK due to seqid mismatch
1998 			 */
1999 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
2000 			    (msg->ctrl & LDC_CTRL_MASK));
2001 
2002 			if (rv) {
2003 				cmn_err(CE_NOTE,
2004 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
2005 				    "CTRL/NACK msg\n", ldcp->id);
2006 
2007 				/* if cannot send NACK - reset channel */
2008 				mutex_enter(&ldcp->tx_lock);
2009 				i_ldc_reset(ldcp, B_TRUE);
2010 				mutex_exit(&ldcp->tx_lock);
2011 				rv = ECONNRESET;
2012 				break;
2013 			}
2014 
2015 			/* purge receive queue */
2016 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2017 			break;
2018 		}
2019 
2020 		/* record the message ID */
2021 		ldcp->last_msg_rcd = msg->seqid;
2022 
2023 		/* process control messages */
2024 		if (msg->type & LDC_CTRL) {
2025 			/* save current internal state */
2026 			uint64_t tstate = ldcp->tstate;
2027 
2028 			rv = i_ldc_ctrlmsg(ldcp, msg);
2029 			if (rv == EAGAIN) {
2030 				/* re-process pkt - state was adjusted */
2031 				continue;
2032 			}
2033 			if (rv == ECONNRESET) {
2034 				notify_client = B_TRUE;
2035 				notify_event = LDC_EVT_RESET;
2036 				break;
2037 			}
2038 
2039 			/*
2040 			 * control message processing was successful
2041 			 * channel transitioned to ready for communication
2042 			 */
2043 			if (rv == 0 && ldcp->tstate == TS_UP &&
2044 			    (tstate & ~TS_IN_RESET) !=
2045 			    (ldcp->tstate & ~TS_IN_RESET)) {
2046 				notify_client = B_TRUE;
2047 				notify_event = LDC_EVT_UP;
2048 			}
2049 		}
2050 
2051 		/* process data ACKs */
2052 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
2053 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
2054 				notify_client = B_TRUE;
2055 				notify_event = LDC_EVT_RESET;
2056 				break;
2057 			}
2058 		}
2059 
2060 		/* move the head one position */
2061 		rx_head = (rx_head + LDC_PACKET_SIZE) %
2062 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2063 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
2064 			notify_client = B_TRUE;
2065 			notify_event = LDC_EVT_RESET;
2066 			break;
2067 		}
2068 
2069 	} /* for */
2070 
2071 loop_exit:
2072 
2073 	/* if callbacks are disabled, do not notify */
2074 	if (!ldcp->cb_enabled)
2075 		notify_client = B_FALSE;
2076 
2077 	/*
2078 	 * If there are data packets in the queue, the ldc_read will
2079 	 * clear interrupts after draining the queue, else clear interrupts
2080 	 */
2081 	if ((notify_event & LDC_EVT_READ) == 0) {
2082 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2083 	} else
2084 		ldcp->rx_intr_state = LDC_INTR_PEND;
2085 
2086 	mutex_exit(&ldcp->lock);
2087 
2088 	if (notify_client) {
2089 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
2090 		if (rv) {
2091 			DWARN(ldcp->id,
2092 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
2093 			    ldcp->id);
2094 		}
2095 	}
2096 
2097 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
2098 	return (DDI_INTR_CLAIMED);
2099 }
2100 
2101 
2102 /* -------------------------------------------------------------------------- */
2103 
2104 /*
2105  * LDC API functions
2106  */
2107 
2108 /*
2109  * Initialize the channel. Allocate internal structure and memory for
2110  * TX/RX queues, and initialize locks.
2111  */
2112 int
2113 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
2114 {
2115 	ldc_chan_t 	*ldcp;
2116 	int		rv, exit_val;
2117 	uint64_t	ra_base, nentries;
2118 	uint64_t	qlen;
2119 
2120 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
2121 
2122 	if (attr == NULL) {
2123 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
2124 		return (EINVAL);
2125 	}
2126 	if (handle == NULL) {
2127 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
2128 		return (EINVAL);
2129 	}
2130 
2131 	/* check if channel is valid */
2132 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
2133 	if (rv == H_ECHANNEL) {
2134 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
2135 		return (EINVAL);
2136 	}
2137 
2138 	/* check if the channel has already been initialized */
2139 	mutex_enter(&ldcssp->lock);
2140 	ldcp = ldcssp->chan_list;
2141 	while (ldcp != NULL) {
2142 		if (ldcp->id == id) {
2143 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
2144 			    id);
2145 			mutex_exit(&ldcssp->lock);
2146 			return (EADDRINUSE);
2147 		}
2148 		ldcp = ldcp->next;
2149 	}
2150 	mutex_exit(&ldcssp->lock);
2151 
2152 	ASSERT(ldcp == NULL);
2153 
2154 	*handle = 0;
2155 
2156 	/* Allocate an ldcp structure */
2157 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2158 
2159 	/*
2160 	 * Initialize the channel and Tx lock
2161 	 *
2162 	 * The channel 'lock' protects the entire channel and
2163 	 * should be acquired before initializing, resetting,
2164 	 * destroying or reading from a channel.
2165 	 *
2166 	 * The 'tx_lock' should be acquired prior to transmitting
2167 	 * data over the channel. The lock should also be acquired
2168 	 * prior to channel reconfiguration (in order to prevent
2169 	 * concurrent writes).
2170 	 *
2171 	 * ORDERING: When both locks are being acquired, to prevent
2172 	 * deadlocks, the channel lock should be always acquired prior
2173 	 * to the tx_lock.
2174 	 */
2175 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2176 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2177 
2178 	/* Initialize the channel */
2179 	ldcp->id = id;
2180 	ldcp->cb = NULL;
2181 	ldcp->cb_arg = NULL;
2182 	ldcp->cb_inprogress = B_FALSE;
2183 	ldcp->cb_enabled = B_FALSE;
2184 	ldcp->next = NULL;
2185 
2186 	/* Read attributes */
2187 	ldcp->mode = attr->mode;
2188 	ldcp->devclass = attr->devclass;
2189 	ldcp->devinst = attr->instance;
2190 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2191 
2192 	D1(ldcp->id,
2193 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2194 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2195 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2196 
2197 	ldcp->next_vidx = 0;
2198 	ldcp->tstate = TS_IN_RESET;
2199 	ldcp->hstate = 0;
2200 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2201 	ldcp->last_ack_rcd = 0;
2202 	ldcp->last_msg_rcd = 0;
2203 
2204 	ldcp->stream_bufferp = NULL;
2205 	ldcp->exp_dring_list = NULL;
2206 	ldcp->imp_dring_list = NULL;
2207 	ldcp->mhdl_list = NULL;
2208 
2209 	ldcp->tx_intr_state = LDC_INTR_NONE;
2210 	ldcp->rx_intr_state = LDC_INTR_NONE;
2211 
2212 	/* Initialize payload size depending on whether channel is reliable */
2213 	switch (ldcp->mode) {
2214 	case LDC_MODE_RAW:
2215 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2216 		ldcp->read_p = i_ldc_read_raw;
2217 		ldcp->write_p = i_ldc_write_raw;
2218 		break;
2219 	case LDC_MODE_UNRELIABLE:
2220 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2221 		ldcp->read_p = i_ldc_read_packet;
2222 		ldcp->write_p = i_ldc_write_packet;
2223 		break;
2224 	case LDC_MODE_RELIABLE:
2225 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2226 		ldcp->read_p = i_ldc_read_packet;
2227 		ldcp->write_p = i_ldc_write_packet;
2228 		break;
2229 	case LDC_MODE_STREAM:
2230 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2231 
2232 		ldcp->stream_remains = 0;
2233 		ldcp->stream_offset = 0;
2234 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2235 		ldcp->read_p = i_ldc_read_stream;
2236 		ldcp->write_p = i_ldc_write_stream;
2237 		break;
2238 	default:
2239 		exit_val = EINVAL;
2240 		goto cleanup_on_exit;
2241 	}
2242 
2243 	/*
2244 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2245 	 * value is smaller than default length of ldc_queue_entries,
2246 	 * qlen is set to ldc_queue_entries..
2247 	 */
2248 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2249 	ldcp->rx_q_entries =
2250 		(qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2251 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2252 
2253 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen);
2254 
2255 	/* Create a transmit queue */
2256 	ldcp->tx_q_va = (uint64_t)
2257 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2258 	if (ldcp->tx_q_va == NULL) {
2259 		cmn_err(CE_WARN,
2260 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2261 		    ldcp->id);
2262 		exit_val = ENOMEM;
2263 		goto cleanup_on_exit;
2264 	}
2265 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2266 
2267 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2268 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2269 
2270 	ldcp->tstate |= TS_TXQ_RDY;
2271 
2272 	/* Create a receive queue */
2273 	ldcp->rx_q_va = (uint64_t)
2274 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2275 	if (ldcp->rx_q_va == NULL) {
2276 		cmn_err(CE_WARN,
2277 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2278 		    ldcp->id);
2279 		exit_val = ENOMEM;
2280 		goto cleanup_on_exit;
2281 	}
2282 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2283 
2284 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2285 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2286 
2287 	ldcp->tstate |= TS_RXQ_RDY;
2288 
2289 	/* Init descriptor ring and memory handle list lock */
2290 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2291 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2292 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2293 
2294 	/* mark status as INITialized */
2295 	ldcp->status = LDC_INIT;
2296 
2297 	/* Add to channel list */
2298 	mutex_enter(&ldcssp->lock);
2299 	ldcp->next = ldcssp->chan_list;
2300 	ldcssp->chan_list = ldcp;
2301 	ldcssp->channel_count++;
2302 	mutex_exit(&ldcssp->lock);
2303 
2304 	/* set the handle */
2305 	*handle = (ldc_handle_t)ldcp;
2306 
2307 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2308 
2309 	return (0);
2310 
2311 cleanup_on_exit:
2312 
2313 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2314 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2315 
2316 	if (ldcp->tstate & TS_TXQ_RDY)
2317 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2318 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2319 
2320 	if (ldcp->tstate & TS_RXQ_RDY)
2321 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2322 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2323 
2324 	mutex_destroy(&ldcp->tx_lock);
2325 	mutex_destroy(&ldcp->lock);
2326 
2327 	if (ldcp)
2328 		kmem_free(ldcp, sizeof (ldc_chan_t));
2329 
2330 	return (exit_val);
2331 }
2332 
2333 /*
2334  * Finalizes the LDC connection. It will return EBUSY if the
2335  * channel is open. A ldc_close() has to be done prior to
2336  * a ldc_fini operation. It frees TX/RX queues, associated
2337  * with the channel
2338  */
2339 int
2340 ldc_fini(ldc_handle_t handle)
2341 {
2342 	ldc_chan_t 	*ldcp;
2343 	ldc_chan_t 	*tmp_ldcp;
2344 	uint64_t 	id;
2345 
2346 	if (handle == NULL) {
2347 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2348 		return (EINVAL);
2349 	}
2350 	ldcp = (ldc_chan_t *)handle;
2351 	id = ldcp->id;
2352 
2353 	mutex_enter(&ldcp->lock);
2354 
2355 	if ((ldcp->tstate & ~TS_IN_RESET) > TS_INIT) {
2356 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2357 		    ldcp->id);
2358 		mutex_exit(&ldcp->lock);
2359 		return (EBUSY);
2360 	}
2361 
2362 	/* Remove from the channel list */
2363 	mutex_enter(&ldcssp->lock);
2364 	tmp_ldcp = ldcssp->chan_list;
2365 	if (tmp_ldcp == ldcp) {
2366 		ldcssp->chan_list = ldcp->next;
2367 		ldcp->next = NULL;
2368 	} else {
2369 		while (tmp_ldcp != NULL) {
2370 			if (tmp_ldcp->next == ldcp) {
2371 				tmp_ldcp->next = ldcp->next;
2372 				ldcp->next = NULL;
2373 				break;
2374 			}
2375 			tmp_ldcp = tmp_ldcp->next;
2376 		}
2377 		if (tmp_ldcp == NULL) {
2378 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2379 			mutex_exit(&ldcssp->lock);
2380 			mutex_exit(&ldcp->lock);
2381 			return (EINVAL);
2382 		}
2383 	}
2384 
2385 	ldcssp->channel_count--;
2386 
2387 	mutex_exit(&ldcssp->lock);
2388 
2389 	/* Free the map table for this channel */
2390 	if (ldcp->mtbl) {
2391 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2392 		if (ldcp->mtbl->contigmem)
2393 			contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2394 		else
2395 			kmem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2396 		mutex_destroy(&ldcp->mtbl->lock);
2397 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2398 	}
2399 
2400 	/* Destroy descriptor ring and memory handle list lock */
2401 	mutex_destroy(&ldcp->exp_dlist_lock);
2402 	mutex_destroy(&ldcp->imp_dlist_lock);
2403 	mutex_destroy(&ldcp->mlist_lock);
2404 
2405 	/* Free the stream buffer for STREAM_MODE */
2406 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2407 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2408 
2409 	/* Free the RX queue */
2410 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2411 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2412 	ldcp->tstate &= ~TS_RXQ_RDY;
2413 
2414 	/* Free the TX queue */
2415 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2416 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2417 	ldcp->tstate &= ~TS_TXQ_RDY;
2418 
2419 	mutex_exit(&ldcp->lock);
2420 
2421 	/* Destroy mutex */
2422 	mutex_destroy(&ldcp->tx_lock);
2423 	mutex_destroy(&ldcp->lock);
2424 
2425 	/* free channel structure */
2426 	kmem_free(ldcp, sizeof (ldc_chan_t));
2427 
2428 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2429 
2430 	return (0);
2431 }
2432 
2433 /*
2434  * Open the LDC channel for use. It registers the TX/RX queues
2435  * with the Hypervisor. It also specifies the interrupt number
2436  * and target CPU for this channel
2437  */
2438 int
2439 ldc_open(ldc_handle_t handle)
2440 {
2441 	ldc_chan_t 	*ldcp;
2442 	int 		rv;
2443 
2444 	if (handle == NULL) {
2445 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2446 		return (EINVAL);
2447 	}
2448 
2449 	ldcp = (ldc_chan_t *)handle;
2450 
2451 	mutex_enter(&ldcp->lock);
2452 
2453 	if (ldcp->tstate < TS_INIT) {
2454 		DWARN(ldcp->id,
2455 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2456 		mutex_exit(&ldcp->lock);
2457 		return (EFAULT);
2458 	}
2459 	if ((ldcp->tstate & ~TS_IN_RESET) >= TS_OPEN) {
2460 		DWARN(ldcp->id,
2461 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2462 		mutex_exit(&ldcp->lock);
2463 		return (EFAULT);
2464 	}
2465 
2466 	/*
2467 	 * Unregister/Register the tx queue with the hypervisor
2468 	 */
2469 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2470 	if (rv) {
2471 		cmn_err(CE_WARN,
2472 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2473 		    ldcp->id);
2474 		mutex_exit(&ldcp->lock);
2475 		return (EIO);
2476 	}
2477 
2478 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2479 	if (rv) {
2480 		cmn_err(CE_WARN,
2481 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2482 		    ldcp->id);
2483 		mutex_exit(&ldcp->lock);
2484 		return (EIO);
2485 	}
2486 
2487 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2488 	    ldcp->id);
2489 
2490 	/*
2491 	 * Unregister/Register the rx queue with the hypervisor
2492 	 */
2493 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2494 	if (rv) {
2495 		cmn_err(CE_WARN,
2496 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2497 		    ldcp->id);
2498 		mutex_exit(&ldcp->lock);
2499 		return (EIO);
2500 	}
2501 
2502 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2503 	if (rv) {
2504 		cmn_err(CE_WARN,
2505 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2506 		    ldcp->id);
2507 		mutex_exit(&ldcp->lock);
2508 		return (EIO);
2509 	}
2510 
2511 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2512 	    ldcp->id);
2513 
2514 	ldcp->tstate |= TS_QCONF_RDY;
2515 
2516 	/* Register the channel with the channel nexus */
2517 	rv = i_ldc_register_channel(ldcp);
2518 	if (rv && rv != EAGAIN) {
2519 		cmn_err(CE_WARN,
2520 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2521 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2522 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2523 		mutex_exit(&ldcp->lock);
2524 		return (EIO);
2525 	}
2526 
2527 	/* mark channel in OPEN state */
2528 	ldcp->status = LDC_OPEN;
2529 
2530 	/* Read channel state */
2531 	rv = hv_ldc_tx_get_state(ldcp->id,
2532 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2533 	if (rv) {
2534 		cmn_err(CE_WARN,
2535 		    "ldc_open: (0x%lx) cannot read channel state\n",
2536 		    ldcp->id);
2537 		(void) i_ldc_unregister_channel(ldcp);
2538 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2539 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2540 		mutex_exit(&ldcp->lock);
2541 		return (EIO);
2542 	}
2543 
2544 	/*
2545 	 * set the ACKd head to current head location for reliable &
2546 	 * streaming mode
2547 	 */
2548 	ldcp->tx_ackd_head = ldcp->tx_head;
2549 
2550 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2551 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2552 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2553 		ldcp->tstate |= TS_LINK_READY;
2554 		ldcp->status = LDC_READY;
2555 	}
2556 
2557 	/*
2558 	 * if channel is being opened in RAW mode - no handshake is needed
2559 	 * switch the channel READY and UP state
2560 	 */
2561 	if (ldcp->mode == LDC_MODE_RAW) {
2562 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2563 		ldcp->status = LDC_UP;
2564 	}
2565 
2566 	mutex_exit(&ldcp->lock);
2567 
2568 	/*
2569 	 * Increment number of open channels
2570 	 */
2571 	mutex_enter(&ldcssp->lock);
2572 	ldcssp->channels_open++;
2573 	mutex_exit(&ldcssp->lock);
2574 
2575 	DWARN(ldcp->id,
2576 	    "ldc_open: (0x%llx) channel (0x%p) open for use "
2577 	    "(tstate=0x%x, status=0x%x)\n",
2578 	    ldcp->id, ldcp, ldcp->tstate, ldcp->status);
2579 
2580 	return (0);
2581 }
2582 
2583 /*
2584  * Close the LDC connection. It will return EBUSY if there
2585  * are memory segments or descriptor rings either bound to or
2586  * mapped over the channel
2587  */
2588 int
2589 ldc_close(ldc_handle_t handle)
2590 {
2591 	ldc_chan_t 	*ldcp;
2592 	int		rv = 0, retries = 0;
2593 	boolean_t	chk_done = B_FALSE;
2594 
2595 	if (handle == NULL) {
2596 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2597 		return (EINVAL);
2598 	}
2599 	ldcp = (ldc_chan_t *)handle;
2600 
2601 	mutex_enter(&ldcp->lock);
2602 
2603 	/* return error if channel is not open */
2604 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_OPEN) {
2605 		DWARN(ldcp->id,
2606 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2607 		mutex_exit(&ldcp->lock);
2608 		return (EFAULT);
2609 	}
2610 
2611 	/* if any memory handles, drings, are bound or mapped cannot close */
2612 	if (ldcp->mhdl_list != NULL) {
2613 		DWARN(ldcp->id,
2614 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2615 		    ldcp->id);
2616 		mutex_exit(&ldcp->lock);
2617 		return (EBUSY);
2618 	}
2619 	if (ldcp->exp_dring_list != NULL) {
2620 		DWARN(ldcp->id,
2621 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2622 		    ldcp->id);
2623 		mutex_exit(&ldcp->lock);
2624 		return (EBUSY);
2625 	}
2626 	if (ldcp->imp_dring_list != NULL) {
2627 		DWARN(ldcp->id,
2628 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2629 		    ldcp->id);
2630 		mutex_exit(&ldcp->lock);
2631 		return (EBUSY);
2632 	}
2633 
2634 	/* Obtain Tx lock */
2635 	mutex_enter(&ldcp->tx_lock);
2636 
2637 	/*
2638 	 * Wait for pending transmits to complete i.e Tx queue to drain
2639 	 * if there are pending pkts - wait 1 ms and retry again
2640 	 */
2641 	for (;;) {
2642 
2643 		rv = hv_ldc_tx_get_state(ldcp->id,
2644 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2645 		if (rv) {
2646 			cmn_err(CE_WARN,
2647 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2648 			mutex_exit(&ldcp->tx_lock);
2649 			mutex_exit(&ldcp->lock);
2650 			return (EIO);
2651 		}
2652 
2653 		if (ldcp->tx_head == ldcp->tx_tail ||
2654 		    ldcp->link_state != LDC_CHANNEL_UP) {
2655 			break;
2656 		}
2657 
2658 		if (chk_done) {
2659 			DWARN(ldcp->id,
2660 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2661 			    ldcp->id);
2662 			break;
2663 		}
2664 
2665 		/* wait for one ms and try again */
2666 		delay(drv_usectohz(1000));
2667 		chk_done = B_TRUE;
2668 	}
2669 
2670 	/*
2671 	 * Drain the Tx and Rx queues
2672 	 */
2673 	(void) i_ldc_txq_reconf(ldcp);
2674 	(void) i_ldc_rxq_reconf(ldcp, B_TRUE);
2675 
2676 	/*
2677 	 * Unregister the channel with the nexus
2678 	 */
2679 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2680 
2681 		mutex_exit(&ldcp->tx_lock);
2682 		mutex_exit(&ldcp->lock);
2683 
2684 		/* if any error other than EAGAIN return back */
2685 		if (rv != EAGAIN || retries >= LDC_MAX_RETRIES) {
2686 			cmn_err(CE_WARN,
2687 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2688 			    ldcp->id, rv);
2689 			return (rv);
2690 		}
2691 
2692 		/*
2693 		 * As there could be pending interrupts we need
2694 		 * to wait and try again
2695 		 */
2696 		drv_usecwait(LDC_DELAY);
2697 		mutex_enter(&ldcp->lock);
2698 		mutex_enter(&ldcp->tx_lock);
2699 		retries++;
2700 	}
2701 
2702 	/*
2703 	 * Unregister queues
2704 	 */
2705 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2706 	if (rv) {
2707 		cmn_err(CE_WARN,
2708 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2709 		    ldcp->id);
2710 		mutex_exit(&ldcp->tx_lock);
2711 		mutex_exit(&ldcp->lock);
2712 		return (EIO);
2713 	}
2714 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2715 	if (rv) {
2716 		cmn_err(CE_WARN,
2717 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2718 		    ldcp->id);
2719 		mutex_exit(&ldcp->tx_lock);
2720 		mutex_exit(&ldcp->lock);
2721 		return (EIO);
2722 	}
2723 
2724 	ldcp->tstate &= ~TS_QCONF_RDY;
2725 
2726 	/* Reset channel state information */
2727 	i_ldc_reset_state(ldcp);
2728 
2729 	/* Mark channel as down and in initialized state */
2730 	ldcp->tx_ackd_head = 0;
2731 	ldcp->tx_head = 0;
2732 	ldcp->tstate = TS_IN_RESET|TS_INIT;
2733 	ldcp->status = LDC_INIT;
2734 
2735 	mutex_exit(&ldcp->tx_lock);
2736 	mutex_exit(&ldcp->lock);
2737 
2738 	/* Decrement number of open channels */
2739 	mutex_enter(&ldcssp->lock);
2740 	ldcssp->channels_open--;
2741 	mutex_exit(&ldcssp->lock);
2742 
2743 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2744 
2745 	return (0);
2746 }
2747 
2748 /*
2749  * Register channel callback
2750  */
2751 int
2752 ldc_reg_callback(ldc_handle_t handle,
2753     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2754 {
2755 	ldc_chan_t *ldcp;
2756 
2757 	if (handle == NULL) {
2758 		DWARN(DBG_ALL_LDCS,
2759 		    "ldc_reg_callback: invalid channel handle\n");
2760 		return (EINVAL);
2761 	}
2762 	if (((uint64_t)cb) < KERNELBASE) {
2763 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2764 		return (EINVAL);
2765 	}
2766 	ldcp = (ldc_chan_t *)handle;
2767 
2768 	mutex_enter(&ldcp->lock);
2769 
2770 	if (ldcp->cb) {
2771 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2772 		    ldcp->id);
2773 		mutex_exit(&ldcp->lock);
2774 		return (EIO);
2775 	}
2776 	if (ldcp->cb_inprogress) {
2777 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2778 		    ldcp->id);
2779 		mutex_exit(&ldcp->lock);
2780 		return (EWOULDBLOCK);
2781 	}
2782 
2783 	ldcp->cb = cb;
2784 	ldcp->cb_arg = arg;
2785 	ldcp->cb_enabled = B_TRUE;
2786 
2787 	D1(ldcp->id,
2788 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2789 	    ldcp->id);
2790 
2791 	mutex_exit(&ldcp->lock);
2792 
2793 	return (0);
2794 }
2795 
2796 /*
2797  * Unregister channel callback
2798  */
2799 int
2800 ldc_unreg_callback(ldc_handle_t handle)
2801 {
2802 	ldc_chan_t *ldcp;
2803 
2804 	if (handle == NULL) {
2805 		DWARN(DBG_ALL_LDCS,
2806 		    "ldc_unreg_callback: invalid channel handle\n");
2807 		return (EINVAL);
2808 	}
2809 	ldcp = (ldc_chan_t *)handle;
2810 
2811 	mutex_enter(&ldcp->lock);
2812 
2813 	if (ldcp->cb == NULL) {
2814 		DWARN(ldcp->id,
2815 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2816 		    ldcp->id);
2817 		mutex_exit(&ldcp->lock);
2818 		return (EIO);
2819 	}
2820 	if (ldcp->cb_inprogress) {
2821 		DWARN(ldcp->id,
2822 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2823 		    ldcp->id);
2824 		mutex_exit(&ldcp->lock);
2825 		return (EWOULDBLOCK);
2826 	}
2827 
2828 	ldcp->cb = NULL;
2829 	ldcp->cb_arg = NULL;
2830 	ldcp->cb_enabled = B_FALSE;
2831 
2832 	D1(ldcp->id,
2833 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2834 	    ldcp->id);
2835 
2836 	mutex_exit(&ldcp->lock);
2837 
2838 	return (0);
2839 }
2840 
2841 
2842 /*
2843  * Bring a channel up by initiating a handshake with the peer
2844  * This call is asynchronous. It will complete at a later point
2845  * in time when the peer responds back with an RTR.
2846  */
2847 int
2848 ldc_up(ldc_handle_t handle)
2849 {
2850 	int 		rv;
2851 	ldc_chan_t 	*ldcp;
2852 	ldc_msg_t 	*ldcmsg;
2853 	uint64_t 	tx_tail, tstate;
2854 
2855 	if (handle == NULL) {
2856 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2857 		return (EINVAL);
2858 	}
2859 	ldcp = (ldc_chan_t *)handle;
2860 
2861 	mutex_enter(&ldcp->lock);
2862 
2863 	D1(ldcp->id, "ldc_up: (0x%llx) doing channel UP\n", ldcp->id);
2864 
2865 	/* clear the reset state */
2866 	tstate = ldcp->tstate;
2867 	ldcp->tstate &= ~TS_IN_RESET;
2868 
2869 	if (ldcp->tstate == TS_UP) {
2870 		DWARN(ldcp->id,
2871 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2872 		    ldcp->id);
2873 
2874 		/* mark channel as up */
2875 		ldcp->status = LDC_UP;
2876 
2877 		/*
2878 		 * if channel was in reset state and there was
2879 		 * pending data clear interrupt state. this will
2880 		 * trigger an interrupt, causing the RX handler to
2881 		 * to invoke the client's callback
2882 		 */
2883 		if ((tstate & TS_IN_RESET) &&
2884 		    ldcp->rx_intr_state == LDC_INTR_PEND) {
2885 			DWARN(ldcp->id,
2886 			    "ldc_up: (0x%llx) channel has pending data, "
2887 			    "clearing interrupt\n", ldcp->id);
2888 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2889 		}
2890 
2891 		mutex_exit(&ldcp->lock);
2892 		return (0);
2893 	}
2894 
2895 	/* if the channel is in RAW mode - mark it as UP, if READY */
2896 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2897 		ldcp->tstate = TS_UP;
2898 		mutex_exit(&ldcp->lock);
2899 		return (0);
2900 	}
2901 
2902 	/* Don't start another handshake if there is one in progress */
2903 	if (ldcp->hstate) {
2904 		D1(ldcp->id,
2905 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2906 		    ldcp->id);
2907 		mutex_exit(&ldcp->lock);
2908 		return (0);
2909 	}
2910 
2911 	mutex_enter(&ldcp->tx_lock);
2912 
2913 	/* get the current tail for the LDC msg */
2914 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2915 	if (rv) {
2916 		DWARN(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2917 		    ldcp->id);
2918 		mutex_exit(&ldcp->tx_lock);
2919 		mutex_exit(&ldcp->lock);
2920 		return (ECONNREFUSED);
2921 	}
2922 
2923 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
2924 	ZERO_PKT(ldcmsg);
2925 
2926 	ldcmsg->type = LDC_CTRL;
2927 	ldcmsg->stype = LDC_INFO;
2928 	ldcmsg->ctrl = LDC_VER;
2929 	ldcp->next_vidx = 0;
2930 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
2931 
2932 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
2933 
2934 	/* initiate the send by calling into HV and set the new tail */
2935 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
2936 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2937 
2938 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
2939 	if (rv) {
2940 		DWARN(ldcp->id,
2941 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
2942 		    ldcp->id, rv);
2943 		mutex_exit(&ldcp->tx_lock);
2944 		mutex_exit(&ldcp->lock);
2945 		return (rv);
2946 	}
2947 
2948 	ldcp->hstate |= TS_SENT_VER;
2949 	ldcp->tx_tail = tx_tail;
2950 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
2951 
2952 	mutex_exit(&ldcp->tx_lock);
2953 	mutex_exit(&ldcp->lock);
2954 
2955 	return (rv);
2956 }
2957 
2958 
2959 /*
2960  * Bring a channel down by resetting its state and queues
2961  */
2962 int
2963 ldc_down(ldc_handle_t handle)
2964 {
2965 	ldc_chan_t 	*ldcp;
2966 
2967 	if (handle == NULL) {
2968 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
2969 		return (EINVAL);
2970 	}
2971 	ldcp = (ldc_chan_t *)handle;
2972 	mutex_enter(&ldcp->lock);
2973 	mutex_enter(&ldcp->tx_lock);
2974 	i_ldc_reset(ldcp, B_TRUE);
2975 	mutex_exit(&ldcp->tx_lock);
2976 	mutex_exit(&ldcp->lock);
2977 
2978 	return (0);
2979 }
2980 
2981 /*
2982  * Get the current channel status
2983  */
2984 int
2985 ldc_status(ldc_handle_t handle, ldc_status_t *status)
2986 {
2987 	ldc_chan_t *ldcp;
2988 
2989 	if (handle == NULL || status == NULL) {
2990 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
2991 		return (EINVAL);
2992 	}
2993 	ldcp = (ldc_chan_t *)handle;
2994 
2995 	*status = ((ldc_chan_t *)handle)->status;
2996 
2997 	DWARN(ldcp->id,
2998 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
2999 	return (0);
3000 }
3001 
3002 
3003 /*
3004  * Set the channel's callback mode - enable/disable callbacks
3005  */
3006 int
3007 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
3008 {
3009 	ldc_chan_t 	*ldcp;
3010 
3011 	if (handle == NULL) {
3012 		DWARN(DBG_ALL_LDCS,
3013 		    "ldc_set_intr_mode: invalid channel handle\n");
3014 		return (EINVAL);
3015 	}
3016 	ldcp = (ldc_chan_t *)handle;
3017 
3018 	/*
3019 	 * Record no callbacks should be invoked
3020 	 */
3021 	mutex_enter(&ldcp->lock);
3022 
3023 	switch (cmode) {
3024 	case LDC_CB_DISABLE:
3025 		if (!ldcp->cb_enabled) {
3026 			DWARN(ldcp->id,
3027 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
3028 			    ldcp->id);
3029 			break;
3030 		}
3031 		ldcp->cb_enabled = B_FALSE;
3032 
3033 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
3034 		    ldcp->id);
3035 		break;
3036 
3037 	case LDC_CB_ENABLE:
3038 		if (ldcp->cb_enabled) {
3039 			DWARN(ldcp->id,
3040 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
3041 			    ldcp->id);
3042 			break;
3043 		}
3044 		ldcp->cb_enabled = B_TRUE;
3045 
3046 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
3047 		    ldcp->id);
3048 		break;
3049 	}
3050 
3051 	mutex_exit(&ldcp->lock);
3052 
3053 	return (0);
3054 }
3055 
3056 /*
3057  * Check to see if there are packets on the incoming queue
3058  * Will return hasdata = B_FALSE if there are no packets
3059  */
3060 int
3061 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
3062 {
3063 	int 		rv;
3064 	uint64_t 	rx_head, rx_tail;
3065 	ldc_chan_t 	*ldcp;
3066 
3067 	if (handle == NULL) {
3068 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
3069 		return (EINVAL);
3070 	}
3071 	ldcp = (ldc_chan_t *)handle;
3072 
3073 	*hasdata = B_FALSE;
3074 
3075 	mutex_enter(&ldcp->lock);
3076 
3077 	if (ldcp->tstate != TS_UP) {
3078 		D1(ldcp->id,
3079 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
3080 		mutex_exit(&ldcp->lock);
3081 		return (ECONNRESET);
3082 	}
3083 
3084 	/* Read packet(s) from the queue */
3085 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3086 	    &ldcp->link_state);
3087 	if (rv != 0) {
3088 		cmn_err(CE_WARN,
3089 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
3090 		mutex_exit(&ldcp->lock);
3091 		return (EIO);
3092 	}
3093 	/* reset the channel state if the channel went down */
3094 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3095 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3096 		mutex_enter(&ldcp->tx_lock);
3097 		i_ldc_reset(ldcp, B_FALSE);
3098 		mutex_exit(&ldcp->tx_lock);
3099 		mutex_exit(&ldcp->lock);
3100 		return (ECONNRESET);
3101 	}
3102 
3103 	if ((rx_head != rx_tail) ||
3104 	    (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_remains > 0)) {
3105 		D1(ldcp->id,
3106 		    "ldc_chkq: (0x%llx) queue has pkt(s) or buffered data\n",
3107 		    ldcp->id);
3108 		*hasdata = B_TRUE;
3109 	}
3110 
3111 	mutex_exit(&ldcp->lock);
3112 
3113 	return (0);
3114 }
3115 
3116 
3117 /*
3118  * Read 'size' amount of bytes or less. If incoming buffer
3119  * is more than 'size', ENOBUFS is returned.
3120  *
3121  * On return, size contains the number of bytes read.
3122  */
3123 int
3124 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
3125 {
3126 	ldc_chan_t 	*ldcp;
3127 	uint64_t 	rx_head = 0, rx_tail = 0;
3128 	int		rv = 0, exit_val;
3129 
3130 	if (handle == NULL) {
3131 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
3132 		return (EINVAL);
3133 	}
3134 
3135 	ldcp = (ldc_chan_t *)handle;
3136 
3137 	/* channel lock */
3138 	mutex_enter(&ldcp->lock);
3139 
3140 	if (ldcp->tstate != TS_UP) {
3141 		DWARN(ldcp->id,
3142 		    "ldc_read: (0x%llx) channel is not in UP state\n",
3143 		    ldcp->id);
3144 		exit_val = ECONNRESET;
3145 	} else {
3146 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
3147 	}
3148 
3149 	/*
3150 	 * if queue has been drained - clear interrupt
3151 	 */
3152 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3153 	    &ldcp->link_state);
3154 
3155 	ASSERT(rv == 0);
3156 
3157 	if (exit_val == 0) {
3158 		if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3159 		    ldcp->link_state == LDC_CHANNEL_RESET) {
3160 			mutex_enter(&ldcp->tx_lock);
3161 			i_ldc_reset(ldcp, B_FALSE);
3162 			exit_val = ECONNRESET;
3163 			mutex_exit(&ldcp->tx_lock);
3164 		}
3165 		if ((rv == 0) &&
3166 		    (ldcp->rx_intr_state == LDC_INTR_PEND) &&
3167 		    (rx_head == rx_tail)) {
3168 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
3169 		}
3170 	}
3171 
3172 	mutex_exit(&ldcp->lock);
3173 	return (exit_val);
3174 }
3175 
3176 /*
3177  * Basic raw mondo read -
3178  * no interpretation of mondo contents at all.
3179  *
3180  * Enter and exit with ldcp->lock held by caller
3181  */
3182 static int
3183 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3184 {
3185 	uint64_t 	q_size_mask;
3186 	ldc_msg_t 	*msgp;
3187 	uint8_t		*msgbufp;
3188 	int		rv = 0, space;
3189 	uint64_t 	rx_head, rx_tail;
3190 
3191 	space = *sizep;
3192 
3193 	if (space < LDC_PAYLOAD_SIZE_RAW)
3194 		return (ENOBUFS);
3195 
3196 	ASSERT(mutex_owned(&ldcp->lock));
3197 
3198 	/* compute mask for increment */
3199 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3200 
3201 	/*
3202 	 * Read packet(s) from the queue
3203 	 */
3204 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
3205 	    &ldcp->link_state);
3206 	if (rv != 0) {
3207 		cmn_err(CE_WARN,
3208 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3209 		    ldcp->id);
3210 		return (EIO);
3211 	}
3212 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3213 		" rxt=0x%llx, st=0x%llx\n",
3214 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
3215 
3216 	/* reset the channel state if the channel went down */
3217 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3218 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3219 		mutex_enter(&ldcp->tx_lock);
3220 		i_ldc_reset(ldcp, B_FALSE);
3221 		mutex_exit(&ldcp->tx_lock);
3222 		return (ECONNRESET);
3223 	}
3224 
3225 	/*
3226 	 * Check for empty queue
3227 	 */
3228 	if (rx_head == rx_tail) {
3229 		*sizep = 0;
3230 		return (0);
3231 	}
3232 
3233 	/* get the message */
3234 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3235 
3236 	/* if channel is in RAW mode, copy data and return */
3237 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3238 
3239 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3240 
3241 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3242 
3243 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3244 
3245 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3246 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3247 
3248 	return (rv);
3249 }
3250 
3251 /*
3252  * Process LDC mondos to build larger packets
3253  * with either un-reliable or reliable delivery.
3254  *
3255  * Enter and exit with ldcp->lock held by caller
3256  */
3257 static int
3258 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3259 {
3260 	int		rv = 0;
3261 	uint64_t 	rx_head = 0, rx_tail = 0;
3262 	uint64_t 	curr_head = 0;
3263 	ldc_msg_t 	*msg;
3264 	caddr_t 	target;
3265 	size_t 		len = 0, bytes_read = 0;
3266 	int 		retries = 0;
3267 	uint64_t 	q_size_mask;
3268 	uint64_t	first_fragment = 0;
3269 
3270 	target = target_bufp;
3271 
3272 	ASSERT(mutex_owned(&ldcp->lock));
3273 
3274 	/* check if the buffer and size are valid */
3275 	if (target_bufp == NULL || *sizep == 0) {
3276 		DWARN(ldcp->id, "ldc_read: (0x%llx) invalid buffer/size\n",
3277 		    ldcp->id);
3278 		return (EINVAL);
3279 	}
3280 
3281 	/* compute mask for increment */
3282 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3283 
3284 	/*
3285 	 * Read packet(s) from the queue
3286 	 */
3287 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3288 	    &ldcp->link_state);
3289 	if (rv != 0) {
3290 		cmn_err(CE_WARN, "ldc_read: (0x%lx) unable to read queue ptrs",
3291 		    ldcp->id);
3292 		mutex_enter(&ldcp->tx_lock);
3293 		i_ldc_reset(ldcp, B_TRUE);
3294 		mutex_exit(&ldcp->tx_lock);
3295 		return (ECONNRESET);
3296 	}
3297 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3298 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3299 
3300 	/* reset the channel state if the channel went down */
3301 	if (ldcp->link_state != LDC_CHANNEL_UP)
3302 		goto channel_is_reset;
3303 
3304 	for (;;) {
3305 
3306 		if (curr_head == rx_tail) {
3307 			rv = hv_ldc_rx_get_state(ldcp->id,
3308 			    &rx_head, &rx_tail, &ldcp->link_state);
3309 			if (rv != 0) {
3310 				cmn_err(CE_WARN,
3311 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3312 				    ldcp->id);
3313 				mutex_enter(&ldcp->tx_lock);
3314 				i_ldc_reset(ldcp, B_TRUE);
3315 				mutex_exit(&ldcp->tx_lock);
3316 				return (ECONNRESET);
3317 			}
3318 			if (ldcp->link_state != LDC_CHANNEL_UP)
3319 				goto channel_is_reset;
3320 
3321 			if (curr_head == rx_tail) {
3322 
3323 				/* If in the middle of a fragmented xfer */
3324 				if (first_fragment != 0) {
3325 
3326 					/* wait for ldc_delay usecs */
3327 					drv_usecwait(ldc_delay);
3328 
3329 					if (++retries < ldc_max_retries)
3330 						continue;
3331 
3332 					*sizep = 0;
3333 					ldcp->last_msg_rcd = first_fragment - 1;
3334 					DWARN(DBG_ALL_LDCS, "ldc_read: "
3335 						"(0x%llx) read timeout",
3336 						ldcp->id);
3337 					return (EAGAIN);
3338 				}
3339 				*sizep = 0;
3340 				break;
3341 			}
3342 		}
3343 		retries = 0;
3344 
3345 		D2(ldcp->id,
3346 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3347 		    ldcp->id, curr_head, rx_head, rx_tail);
3348 
3349 		/* get the message */
3350 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3351 
3352 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3353 		    ldcp->rx_q_va + curr_head);
3354 
3355 		/* Check the message ID for the message received */
3356 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3357 
3358 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3359 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3360 
3361 			/* throw away data */
3362 			bytes_read = 0;
3363 
3364 			/* Reset last_msg_rcd to start of message */
3365 			if (first_fragment != 0) {
3366 				ldcp->last_msg_rcd = first_fragment - 1;
3367 				first_fragment = 0;
3368 			}
3369 			/*
3370 			 * Send a NACK -- invalid seqid
3371 			 * get the current tail for the response
3372 			 */
3373 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3374 			    (msg->ctrl & LDC_CTRL_MASK));
3375 			if (rv) {
3376 				cmn_err(CE_NOTE,
3377 				    "ldc_read: (0x%lx) err sending "
3378 				    "NACK msg\n", ldcp->id);
3379 
3380 				/* if cannot send NACK - reset channel */
3381 				mutex_enter(&ldcp->tx_lock);
3382 				i_ldc_reset(ldcp, B_FALSE);
3383 				mutex_exit(&ldcp->tx_lock);
3384 				rv = ECONNRESET;
3385 				break;
3386 			}
3387 
3388 			/* purge receive queue */
3389 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3390 
3391 			break;
3392 		}
3393 
3394 		/*
3395 		 * Process any messages of type CTRL messages
3396 		 * Future implementations should try to pass these
3397 		 * to LDC link by resetting the intr state.
3398 		 *
3399 		 * NOTE: not done as a switch() as type can be both ctrl+data
3400 		 */
3401 		if (msg->type & LDC_CTRL) {
3402 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3403 				if (rv == EAGAIN)
3404 					continue;
3405 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3406 				*sizep = 0;
3407 				bytes_read = 0;
3408 				break;
3409 			}
3410 		}
3411 
3412 		/* process data ACKs */
3413 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3414 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3415 				*sizep = 0;
3416 				bytes_read = 0;
3417 				break;
3418 			}
3419 		}
3420 
3421 		/* process data messages */
3422 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3423 
3424 			uint8_t *msgbuf = (uint8_t *)(
3425 				(ldcp->mode == LDC_MODE_RELIABLE ||
3426 				ldcp->mode == LDC_MODE_STREAM)
3427 				? msg->rdata : msg->udata);
3428 
3429 			D2(ldcp->id,
3430 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3431 
3432 			/* get the packet length */
3433 			len = (msg->env & LDC_LEN_MASK);
3434 
3435 				/*
3436 				 * FUTURE OPTIMIZATION:
3437 				 * dont need to set q head for every
3438 				 * packet we read just need to do this when
3439 				 * we are done or need to wait for more
3440 				 * mondos to make a full packet - this is
3441 				 * currently expensive.
3442 				 */
3443 
3444 			if (first_fragment == 0) {
3445 
3446 				/*
3447 				 * first packets should always have the start
3448 				 * bit set (even for a single packet). If not
3449 				 * throw away the packet
3450 				 */
3451 				if (!(msg->env & LDC_FRAG_START)) {
3452 
3453 					DWARN(DBG_ALL_LDCS,
3454 					    "ldc_read: (0x%llx) not start - "
3455 					    "frag=%x\n", ldcp->id,
3456 					    (msg->env) & LDC_FRAG_MASK);
3457 
3458 					/* toss pkt, inc head, cont reading */
3459 					bytes_read = 0;
3460 					target = target_bufp;
3461 					curr_head =
3462 						(curr_head + LDC_PACKET_SIZE)
3463 						& q_size_mask;
3464 					if (rv = i_ldc_set_rx_head(ldcp,
3465 						curr_head))
3466 						break;
3467 
3468 					continue;
3469 				}
3470 
3471 				first_fragment = msg->seqid;
3472 			} else {
3473 				/* check to see if this is a pkt w/ START bit */
3474 				if (msg->env & LDC_FRAG_START) {
3475 					DWARN(DBG_ALL_LDCS,
3476 					    "ldc_read:(0x%llx) unexpected pkt"
3477 					    " env=0x%x discarding %d bytes,"
3478 					    " lastmsg=%d, currentmsg=%d\n",
3479 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3480 					    bytes_read, ldcp->last_msg_rcd,
3481 					    msg->seqid);
3482 
3483 					/* throw data we have read so far */
3484 					bytes_read = 0;
3485 					target = target_bufp;
3486 					first_fragment = msg->seqid;
3487 
3488 					if (rv = i_ldc_set_rx_head(ldcp,
3489 						curr_head))
3490 						break;
3491 				}
3492 			}
3493 
3494 			/* copy (next) pkt into buffer */
3495 			if (len <= (*sizep - bytes_read)) {
3496 				bcopy(msgbuf, target, len);
3497 				target += len;
3498 				bytes_read += len;
3499 			} else {
3500 				/*
3501 				 * there is not enough space in the buffer to
3502 				 * read this pkt. throw message away & continue
3503 				 * reading data from queue
3504 				 */
3505 				DWARN(DBG_ALL_LDCS,
3506 				    "ldc_read: (0x%llx) buffer too small, "
3507 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3508 				    curr_head, *sizep, bytes_read+len);
3509 
3510 				first_fragment = 0;
3511 				target = target_bufp;
3512 				bytes_read = 0;
3513 
3514 				/* throw away everything received so far */
3515 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3516 					break;
3517 
3518 				/* continue reading remaining pkts */
3519 				continue;
3520 			}
3521 		}
3522 
3523 		/* set the message id */
3524 		ldcp->last_msg_rcd = msg->seqid;
3525 
3526 		/* move the head one position */
3527 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3528 
3529 		if (msg->env & LDC_FRAG_STOP) {
3530 
3531 			/*
3532 			 * All pkts that are part of this fragmented transfer
3533 			 * have been read or this was a single pkt read
3534 			 * or there was an error
3535 			 */
3536 
3537 			/* set the queue head */
3538 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3539 				bytes_read = 0;
3540 
3541 			*sizep = bytes_read;
3542 
3543 			break;
3544 		}
3545 
3546 		/* advance head if it is a DATA ACK */
3547 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3548 
3549 			/* set the queue head */
3550 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3551 				bytes_read = 0;
3552 				break;
3553 			}
3554 
3555 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3556 			    ldcp->id, curr_head);
3557 		}
3558 
3559 	} /* for (;;) */
3560 
3561 
3562 	/*
3563 	 * If useful data was read - Send msg ACK
3564 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3565 	 */
3566 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3567 		ldcp->mode == LDC_MODE_STREAM)) {
3568 
3569 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3570 		if (rv && rv != EWOULDBLOCK) {
3571 			cmn_err(CE_NOTE,
3572 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3573 
3574 			/* if cannot send ACK - reset channel */
3575 			goto channel_is_reset;
3576 		}
3577 	}
3578 
3579 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3580 
3581 	return (rv);
3582 
3583 channel_is_reset:
3584 	mutex_enter(&ldcp->tx_lock);
3585 	i_ldc_reset(ldcp, B_FALSE);
3586 	mutex_exit(&ldcp->tx_lock);
3587 	return (ECONNRESET);
3588 }
3589 
3590 /*
3591  * Use underlying reliable packet mechanism to fetch
3592  * and buffer incoming packets so we can hand them back as
3593  * a basic byte stream.
3594  *
3595  * Enter and exit with ldcp->lock held by caller
3596  */
3597 static int
3598 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3599 {
3600 	int	rv;
3601 	size_t	size;
3602 
3603 	ASSERT(mutex_owned(&ldcp->lock));
3604 
3605 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3606 		ldcp->id, *sizep);
3607 
3608 	if (ldcp->stream_remains == 0) {
3609 		size = ldcp->mtu;
3610 		rv = i_ldc_read_packet(ldcp,
3611 			(caddr_t)ldcp->stream_bufferp, &size);
3612 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3613 			ldcp->id, size);
3614 
3615 		if (rv != 0)
3616 			return (rv);
3617 
3618 		ldcp->stream_remains = size;
3619 		ldcp->stream_offset = 0;
3620 	}
3621 
3622 	size = MIN(ldcp->stream_remains, *sizep);
3623 
3624 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3625 	ldcp->stream_offset += size;
3626 	ldcp->stream_remains -= size;
3627 
3628 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3629 		ldcp->id, size);
3630 
3631 	*sizep = size;
3632 	return (0);
3633 }
3634 
3635 /*
3636  * Write specified amount of bytes to the channel
3637  * in multiple pkts of pkt_payload size. Each
3638  * packet is tagged with an unique packet ID in
3639  * the case of a reliable link.
3640  *
3641  * On return, size contains the number of bytes written.
3642  */
3643 int
3644 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3645 {
3646 	ldc_chan_t	*ldcp;
3647 	int		rv = 0;
3648 
3649 	if (handle == NULL) {
3650 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3651 		return (EINVAL);
3652 	}
3653 	ldcp = (ldc_chan_t *)handle;
3654 
3655 	/* check if writes can occur */
3656 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3657 		/*
3658 		 * Could not get the lock - channel could
3659 		 * be in the process of being unconfigured
3660 		 * or reader has encountered an error
3661 		 */
3662 		return (EAGAIN);
3663 	}
3664 
3665 	/* check if non-zero data to write */
3666 	if (buf == NULL || sizep == NULL) {
3667 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3668 		    ldcp->id);
3669 		mutex_exit(&ldcp->tx_lock);
3670 		return (EINVAL);
3671 	}
3672 
3673 	if (*sizep == 0) {
3674 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3675 		    ldcp->id);
3676 		mutex_exit(&ldcp->tx_lock);
3677 		return (0);
3678 	}
3679 
3680 	/* Check if channel is UP for data exchange */
3681 	if (ldcp->tstate != TS_UP) {
3682 		DWARN(ldcp->id,
3683 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3684 		    ldcp->id);
3685 		*sizep = 0;
3686 		rv = ECONNRESET;
3687 	} else {
3688 		rv = ldcp->write_p(ldcp, buf, sizep);
3689 	}
3690 
3691 	mutex_exit(&ldcp->tx_lock);
3692 
3693 	return (rv);
3694 }
3695 
3696 /*
3697  * Write a raw packet to the channel
3698  * On return, size contains the number of bytes written.
3699  */
3700 static int
3701 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3702 {
3703 	ldc_msg_t 	*ldcmsg;
3704 	uint64_t 	tx_head, tx_tail, new_tail;
3705 	int		rv = 0;
3706 	size_t		size;
3707 
3708 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3709 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3710 
3711 	size = *sizep;
3712 
3713 	/*
3714 	 * Check to see if the packet size is less than or
3715 	 * equal to packet size support in raw mode
3716 	 */
3717 	if (size > ldcp->pkt_payload) {
3718 		DWARN(ldcp->id,
3719 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3720 		    ldcp->id, *sizep);
3721 		*sizep = 0;
3722 		return (EMSGSIZE);
3723 	}
3724 
3725 	/* get the qptrs for the tx queue */
3726 	rv = hv_ldc_tx_get_state(ldcp->id,
3727 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3728 	if (rv != 0) {
3729 		cmn_err(CE_WARN,
3730 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3731 		*sizep = 0;
3732 		return (EIO);
3733 	}
3734 
3735 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3736 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3737 		DWARN(ldcp->id,
3738 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3739 
3740 		*sizep = 0;
3741 		if (mutex_tryenter(&ldcp->lock)) {
3742 			i_ldc_reset(ldcp, B_FALSE);
3743 			mutex_exit(&ldcp->lock);
3744 		} else {
3745 			/*
3746 			 * Release Tx lock, and then reacquire channel
3747 			 * and Tx lock in correct order
3748 			 */
3749 			mutex_exit(&ldcp->tx_lock);
3750 			mutex_enter(&ldcp->lock);
3751 			mutex_enter(&ldcp->tx_lock);
3752 			i_ldc_reset(ldcp, B_FALSE);
3753 			mutex_exit(&ldcp->lock);
3754 		}
3755 		return (ECONNRESET);
3756 	}
3757 
3758 	tx_tail = ldcp->tx_tail;
3759 	tx_head = ldcp->tx_head;
3760 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3761 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3762 
3763 	if (new_tail == tx_head) {
3764 		DWARN(DBG_ALL_LDCS,
3765 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3766 		*sizep = 0;
3767 		return (EWOULDBLOCK);
3768 	}
3769 
3770 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3771 	    ldcp->id, size);
3772 
3773 	/* Send the data now */
3774 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3775 
3776 	/* copy the data into pkt */
3777 	bcopy((uint8_t *)buf, ldcmsg, size);
3778 
3779 	/* increment tail */
3780 	tx_tail = new_tail;
3781 
3782 	/*
3783 	 * All packets have been copied into the TX queue
3784 	 * update the tail ptr in the HV
3785 	 */
3786 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3787 	if (rv) {
3788 		if (rv == EWOULDBLOCK) {
3789 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3790 			    ldcp->id);
3791 			*sizep = 0;
3792 			return (EWOULDBLOCK);
3793 		}
3794 
3795 		*sizep = 0;
3796 		if (mutex_tryenter(&ldcp->lock)) {
3797 			i_ldc_reset(ldcp, B_FALSE);
3798 			mutex_exit(&ldcp->lock);
3799 		} else {
3800 			/*
3801 			 * Release Tx lock, and then reacquire channel
3802 			 * and Tx lock in correct order
3803 			 */
3804 			mutex_exit(&ldcp->tx_lock);
3805 			mutex_enter(&ldcp->lock);
3806 			mutex_enter(&ldcp->tx_lock);
3807 			i_ldc_reset(ldcp, B_FALSE);
3808 			mutex_exit(&ldcp->lock);
3809 		}
3810 		return (ECONNRESET);
3811 	}
3812 
3813 	ldcp->tx_tail = tx_tail;
3814 	*sizep = size;
3815 
3816 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3817 
3818 	return (rv);
3819 }
3820 
3821 
3822 /*
3823  * Write specified amount of bytes to the channel
3824  * in multiple pkts of pkt_payload size. Each
3825  * packet is tagged with an unique packet ID in
3826  * the case of a reliable link.
3827  *
3828  * On return, size contains the number of bytes written.
3829  * This function needs to ensure that the write size is < MTU size
3830  */
3831 static int
3832 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3833 {
3834 	ldc_msg_t 	*ldcmsg;
3835 	uint64_t 	tx_head, tx_tail, new_tail, start;
3836 	uint64_t	txq_size_mask, numavail;
3837 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3838 	size_t 		len, bytes_written = 0, remaining;
3839 	int		rv;
3840 	uint32_t	curr_seqid;
3841 
3842 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3843 
3844 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3845 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3846 		ldcp->mode == LDC_MODE_STREAM);
3847 
3848 	/* compute mask for increment */
3849 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3850 
3851 	/* get the qptrs for the tx queue */
3852 	rv = hv_ldc_tx_get_state(ldcp->id,
3853 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3854 	if (rv != 0) {
3855 		cmn_err(CE_WARN,
3856 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3857 		*size = 0;
3858 		return (EIO);
3859 	}
3860 
3861 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3862 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3863 		DWARN(ldcp->id,
3864 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3865 		*size = 0;
3866 		if (mutex_tryenter(&ldcp->lock)) {
3867 			i_ldc_reset(ldcp, B_FALSE);
3868 			mutex_exit(&ldcp->lock);
3869 		} else {
3870 			/*
3871 			 * Release Tx lock, and then reacquire channel
3872 			 * and Tx lock in correct order
3873 			 */
3874 			mutex_exit(&ldcp->tx_lock);
3875 			mutex_enter(&ldcp->lock);
3876 			mutex_enter(&ldcp->tx_lock);
3877 			i_ldc_reset(ldcp, B_FALSE);
3878 			mutex_exit(&ldcp->lock);
3879 		}
3880 		return (ECONNRESET);
3881 	}
3882 
3883 	tx_tail = ldcp->tx_tail;
3884 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3885 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3886 
3887 	/*
3888 	 * Link mode determines whether we use HV Tx head or the
3889 	 * private protocol head (corresponding to last ACKd pkt) for
3890 	 * determining how much we can write
3891 	 */
3892 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3893 		ldcp->mode == LDC_MODE_STREAM)
3894 		? ldcp->tx_ackd_head : ldcp->tx_head;
3895 	if (new_tail == tx_head) {
3896 		DWARN(DBG_ALL_LDCS,
3897 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3898 		*size = 0;
3899 		return (EWOULDBLOCK);
3900 	}
3901 
3902 	/*
3903 	 * Make sure that the LDC Tx queue has enough space
3904 	 */
3905 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
3906 		+ ldcp->tx_q_entries - 1;
3907 	numavail %= ldcp->tx_q_entries;
3908 
3909 	if (*size > (numavail * ldcp->pkt_payload)) {
3910 		DWARN(DBG_ALL_LDCS,
3911 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
3912 		return (EWOULDBLOCK);
3913 	}
3914 
3915 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3916 	    ldcp->id, *size);
3917 
3918 	/* Send the data now */
3919 	bytes_written = 0;
3920 	curr_seqid = ldcp->last_msg_snt;
3921 	start = tx_tail;
3922 
3923 	while (*size > bytes_written) {
3924 
3925 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3926 
3927 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
3928 			ldcp->mode == LDC_MODE_STREAM)
3929 			? ldcmsg->rdata : ldcmsg->udata);
3930 
3931 		ldcmsg->type = LDC_DATA;
3932 		ldcmsg->stype = LDC_INFO;
3933 		ldcmsg->ctrl = 0;
3934 
3935 		remaining = *size - bytes_written;
3936 		len = min(ldcp->pkt_payload, remaining);
3937 		ldcmsg->env = (uint8_t)len;
3938 
3939 		curr_seqid++;
3940 		ldcmsg->seqid = curr_seqid;
3941 
3942 		/* copy the data into pkt */
3943 		bcopy(source, msgbuf, len);
3944 
3945 		source += len;
3946 		bytes_written += len;
3947 
3948 		/* increment tail */
3949 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
3950 
3951 		ASSERT(tx_tail != tx_head);
3952 	}
3953 
3954 	/* Set the start and stop bits */
3955 	ldcmsg->env |= LDC_FRAG_STOP;
3956 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
3957 	ldcmsg->env |= LDC_FRAG_START;
3958 
3959 	/*
3960 	 * All packets have been copied into the TX queue
3961 	 * update the tail ptr in the HV
3962 	 */
3963 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3964 	if (rv == 0) {
3965 		ldcp->tx_tail = tx_tail;
3966 		ldcp->last_msg_snt = curr_seqid;
3967 		*size = bytes_written;
3968 	} else {
3969 		int rv2;
3970 
3971 		if (rv != EWOULDBLOCK) {
3972 			*size = 0;
3973 			if (mutex_tryenter(&ldcp->lock)) {
3974 				i_ldc_reset(ldcp, B_FALSE);
3975 				mutex_exit(&ldcp->lock);
3976 			} else {
3977 				/*
3978 				 * Release Tx lock, and then reacquire channel
3979 				 * and Tx lock in correct order
3980 				 */
3981 				mutex_exit(&ldcp->tx_lock);
3982 				mutex_enter(&ldcp->lock);
3983 				mutex_enter(&ldcp->tx_lock);
3984 				i_ldc_reset(ldcp, B_FALSE);
3985 				mutex_exit(&ldcp->lock);
3986 			}
3987 			return (ECONNRESET);
3988 		}
3989 
3990 		DWARN(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
3991 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
3992 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
3993 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
3994 
3995 		rv2 = hv_ldc_tx_get_state(ldcp->id,
3996 		    &tx_head, &tx_tail, &ldcp->link_state);
3997 
3998 		DWARN(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
3999 			"(head 0x%x, tail 0x%x state 0x%x)\n",
4000 			rv2, tx_head, tx_tail, ldcp->link_state);
4001 
4002 		*size = 0;
4003 	}
4004 
4005 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
4006 
4007 	return (rv);
4008 }
4009 
4010 /*
4011  * Write specified amount of bytes to the channel
4012  * in multiple pkts of pkt_payload size. Each
4013  * packet is tagged with an unique packet ID in
4014  * the case of a reliable link.
4015  *
4016  * On return, size contains the number of bytes written.
4017  * This function needs to ensure that the write size is < MTU size
4018  */
4019 static int
4020 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
4021 {
4022 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
4023 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
4024 
4025 	/* Truncate packet to max of MTU size */
4026 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
4027 	return (i_ldc_write_packet(ldcp, buf, sizep));
4028 }
4029 
4030 
4031 /*
4032  * Interfaces for channel nexus to register/unregister with LDC module
4033  * The nexus will register functions to be used to register individual
4034  * channels with the nexus and enable interrupts for the channels
4035  */
4036 int
4037 ldc_register(ldc_cnex_t *cinfo)
4038 {
4039 	ldc_chan_t	*ldcp;
4040 
4041 	if (cinfo == NULL || cinfo->dip == NULL ||
4042 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
4043 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
4044 	    cinfo->clr_intr == NULL) {
4045 
4046 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
4047 		return (EINVAL);
4048 	}
4049 
4050 	mutex_enter(&ldcssp->lock);
4051 
4052 	/* nexus registration */
4053 	ldcssp->cinfo.dip = cinfo->dip;
4054 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
4055 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
4056 	ldcssp->cinfo.add_intr = cinfo->add_intr;
4057 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
4058 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
4059 
4060 	/* register any channels that might have been previously initialized */
4061 	ldcp = ldcssp->chan_list;
4062 	while (ldcp) {
4063 		if ((ldcp->tstate & TS_QCONF_RDY) &&
4064 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
4065 			(void) i_ldc_register_channel(ldcp);
4066 
4067 		ldcp = ldcp->next;
4068 	}
4069 
4070 	mutex_exit(&ldcssp->lock);
4071 
4072 	return (0);
4073 }
4074 
4075 int
4076 ldc_unregister(ldc_cnex_t *cinfo)
4077 {
4078 	if (cinfo == NULL || cinfo->dip == NULL) {
4079 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
4080 		return (EINVAL);
4081 	}
4082 
4083 	mutex_enter(&ldcssp->lock);
4084 
4085 	if (cinfo->dip != ldcssp->cinfo.dip) {
4086 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
4087 		mutex_exit(&ldcssp->lock);
4088 		return (EINVAL);
4089 	}
4090 
4091 	/* nexus unregister */
4092 	ldcssp->cinfo.dip = NULL;
4093 	ldcssp->cinfo.reg_chan = NULL;
4094 	ldcssp->cinfo.unreg_chan = NULL;
4095 	ldcssp->cinfo.add_intr = NULL;
4096 	ldcssp->cinfo.rem_intr = NULL;
4097 	ldcssp->cinfo.clr_intr = NULL;
4098 
4099 	mutex_exit(&ldcssp->lock);
4100 
4101 	return (0);
4102 }
4103 
4104 
4105 /* ------------------------------------------------------------------------- */
4106 
4107 /*
4108  * Allocate a memory handle for the channel and link it into the list
4109  * Also choose which memory table to use if this is the first handle
4110  * being assigned to this channel
4111  */
4112 int
4113 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
4114 {
4115 	ldc_chan_t 	*ldcp;
4116 	ldc_mhdl_t	*mhdl;
4117 
4118 	if (handle == NULL) {
4119 		DWARN(DBG_ALL_LDCS,
4120 		    "ldc_mem_alloc_handle: invalid channel handle\n");
4121 		return (EINVAL);
4122 	}
4123 	ldcp = (ldc_chan_t *)handle;
4124 
4125 	mutex_enter(&ldcp->lock);
4126 
4127 	/* check to see if channel is initalized */
4128 	if ((ldcp->tstate & ~TS_IN_RESET) < TS_INIT) {
4129 		DWARN(ldcp->id,
4130 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
4131 		    ldcp->id);
4132 		mutex_exit(&ldcp->lock);
4133 		return (EINVAL);
4134 	}
4135 
4136 	/* allocate handle for channel */
4137 	mhdl = kmem_cache_alloc(ldcssp->memhdl_cache, KM_SLEEP);
4138 
4139 	/* initialize the lock */
4140 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
4141 
4142 	mhdl->myshadow = B_FALSE;
4143 	mhdl->memseg = NULL;
4144 	mhdl->ldcp = ldcp;
4145 	mhdl->status = LDC_UNBOUND;
4146 
4147 	/* insert memory handle (@ head) into list */
4148 	if (ldcp->mhdl_list == NULL) {
4149 		ldcp->mhdl_list = mhdl;
4150 		mhdl->next = NULL;
4151 	} else {
4152 		/* insert @ head */
4153 		mhdl->next = ldcp->mhdl_list;
4154 		ldcp->mhdl_list = mhdl;
4155 	}
4156 
4157 	/* return the handle */
4158 	*mhandle = (ldc_mem_handle_t)mhdl;
4159 
4160 	mutex_exit(&ldcp->lock);
4161 
4162 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4163 	    ldcp->id, mhdl);
4164 
4165 	return (0);
4166 }
4167 
4168 /*
4169  * Free memory handle for the channel and unlink it from the list
4170  */
4171 int
4172 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4173 {
4174 	ldc_mhdl_t 	*mhdl, *phdl;
4175 	ldc_chan_t 	*ldcp;
4176 
4177 	if (mhandle == NULL) {
4178 		DWARN(DBG_ALL_LDCS,
4179 		    "ldc_mem_free_handle: invalid memory handle\n");
4180 		return (EINVAL);
4181 	}
4182 	mhdl = (ldc_mhdl_t *)mhandle;
4183 
4184 	mutex_enter(&mhdl->lock);
4185 
4186 	ldcp = mhdl->ldcp;
4187 
4188 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4189 		DWARN(ldcp->id,
4190 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4191 		    mhdl);
4192 		mutex_exit(&mhdl->lock);
4193 		return (EINVAL);
4194 	}
4195 	mutex_exit(&mhdl->lock);
4196 
4197 	mutex_enter(&ldcp->mlist_lock);
4198 
4199 	phdl = ldcp->mhdl_list;
4200 
4201 	/* first handle */
4202 	if (phdl == mhdl) {
4203 		ldcp->mhdl_list = mhdl->next;
4204 		mutex_destroy(&mhdl->lock);
4205 		kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4206 
4207 		D1(ldcp->id,
4208 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4209 		    ldcp->id, mhdl);
4210 	} else {
4211 		/* walk the list - unlink and free */
4212 		while (phdl != NULL) {
4213 			if (phdl->next == mhdl) {
4214 				phdl->next = mhdl->next;
4215 				mutex_destroy(&mhdl->lock);
4216 				kmem_cache_free(ldcssp->memhdl_cache, mhdl);
4217 				D1(ldcp->id,
4218 				    "ldc_mem_free_handle: (0x%llx) freed "
4219 				    "handle 0x%llx\n", ldcp->id, mhdl);
4220 				break;
4221 			}
4222 			phdl = phdl->next;
4223 		}
4224 	}
4225 
4226 	if (phdl == NULL) {
4227 		DWARN(ldcp->id,
4228 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4229 		mutex_exit(&ldcp->mlist_lock);
4230 		return (EINVAL);
4231 	}
4232 
4233 	mutex_exit(&ldcp->mlist_lock);
4234 
4235 	return (0);
4236 }
4237 
4238 /*
4239  * Bind a memory handle to a virtual address.
4240  * The virtual address is converted to the corresponding real addresses.
4241  * Returns pointer to the first ldc_mem_cookie and the total number
4242  * of cookies for this virtual address. Other cookies can be obtained
4243  * using the ldc_mem_nextcookie() call. If the pages are stored in
4244  * consecutive locations in the table, a single cookie corresponding to
4245  * the first location is returned. The cookie size spans all the entries.
4246  *
4247  * If the VA corresponds to a page that is already being exported, reuse
4248  * the page and do not export it again. Bump the page's use count.
4249  */
4250 int
4251 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4252     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4253 {
4254 	ldc_mhdl_t	*mhdl;
4255 	ldc_chan_t 	*ldcp;
4256 	ldc_mtbl_t	*mtbl;
4257 	ldc_memseg_t	*memseg;
4258 	ldc_mte_t	tmp_mte;
4259 	uint64_t	index, prev_index = 0;
4260 	int64_t		cookie_idx;
4261 	uintptr_t	raddr, ra_aligned;
4262 	uint64_t	psize, poffset, v_offset;
4263 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4264 	pgcnt_t		npages;
4265 	caddr_t		v_align, addr;
4266 	int 		i, rv;
4267 
4268 	if (mhandle == NULL) {
4269 		DWARN(DBG_ALL_LDCS,
4270 		    "ldc_mem_bind_handle: invalid memory handle\n");
4271 		return (EINVAL);
4272 	}
4273 	mhdl = (ldc_mhdl_t *)mhandle;
4274 	ldcp = mhdl->ldcp;
4275 
4276 	/* clear count */
4277 	*ccount = 0;
4278 
4279 	mutex_enter(&mhdl->lock);
4280 
4281 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4282 		DWARN(ldcp->id,
4283 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4284 		    mhandle);
4285 		mutex_exit(&mhdl->lock);
4286 		return (EINVAL);
4287 	}
4288 
4289 	/* Force address and size to be 8-byte aligned */
4290 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4291 		DWARN(ldcp->id,
4292 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4293 		mutex_exit(&mhdl->lock);
4294 		return (EINVAL);
4295 	}
4296 
4297 	/*
4298 	 * If this channel is binding a memory handle for the
4299 	 * first time allocate it a memory map table and initialize it
4300 	 */
4301 	if ((mtbl = ldcp->mtbl) == NULL) {
4302 
4303 		mutex_enter(&ldcp->lock);
4304 
4305 		/* Allocate and initialize the map table structure */
4306 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
4307 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
4308 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
4309 		mtbl->next_entry = NULL;
4310 		mtbl->contigmem = B_TRUE;
4311 
4312 		/* Allocate the table itself */
4313 		mtbl->table = (ldc_mte_slot_t *)
4314 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
4315 		if (mtbl->table == NULL) {
4316 
4317 			/* allocate a page of memory using kmem_alloc */
4318 			mtbl->table = kmem_alloc(MMU_PAGESIZE, KM_SLEEP);
4319 			mtbl->size = MMU_PAGESIZE;
4320 			mtbl->contigmem = B_FALSE;
4321 			mtbl->num_entries = mtbl->num_avail =
4322 				mtbl->size / sizeof (ldc_mte_slot_t);
4323 			DWARN(ldcp->id,
4324 			    "ldc_mem_bind_handle: (0x%llx) reduced tbl size "
4325 			    "to %lx entries\n", ldcp->id, mtbl->num_entries);
4326 		}
4327 
4328 		/* zero out the memory */
4329 		bzero(mtbl->table, mtbl->size);
4330 
4331 		/* initialize the lock */
4332 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
4333 
4334 		/* register table for this channel */
4335 		rv = hv_ldc_set_map_table(ldcp->id,
4336 		    va_to_pa(mtbl->table), mtbl->num_entries);
4337 		if (rv != 0) {
4338 			cmn_err(CE_WARN,
4339 			    "ldc_mem_bind_handle: (0x%lx) err %d mapping tbl",
4340 			    ldcp->id, rv);
4341 			if (mtbl->contigmem)
4342 				contig_mem_free(mtbl->table, mtbl->size);
4343 			else
4344 				kmem_free(mtbl->table, mtbl->size);
4345 			mutex_destroy(&mtbl->lock);
4346 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
4347 			mutex_exit(&ldcp->lock);
4348 			mutex_exit(&mhdl->lock);
4349 			return (EIO);
4350 		}
4351 
4352 		ldcp->mtbl = mtbl;
4353 		mutex_exit(&ldcp->lock);
4354 
4355 		D1(ldcp->id,
4356 		    "ldc_mem_bind_handle: (0x%llx) alloc'd map table 0x%llx\n",
4357 		    ldcp->id, ldcp->mtbl->table);
4358 	}
4359 
4360 	/* FUTURE: get the page size, pgsz code, and shift */
4361 	pg_size = MMU_PAGESIZE;
4362 	pg_size_code = page_szc(pg_size);
4363 	pg_shift = page_get_shift(pg_size_code);
4364 	pg_mask = ~(pg_size - 1);
4365 
4366 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4367 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4368 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4369 
4370 	/* aligned VA and its offset */
4371 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4372 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4373 
4374 	npages = (len+v_offset)/pg_size;
4375 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4376 
4377 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4378 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4379 	    ldcp->id, vaddr, v_align, v_offset, npages);
4380 
4381 	/* lock the memory table - exclusive access to channel */
4382 	mutex_enter(&mtbl->lock);
4383 
4384 	if (npages > mtbl->num_avail) {
4385 		D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4386 		    ldcp->id);
4387 		mutex_exit(&mtbl->lock);
4388 		mutex_exit(&mhdl->lock);
4389 		return (ENOMEM);
4390 	}
4391 
4392 	/* Allocate a memseg structure */
4393 	memseg = mhdl->memseg =
4394 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
4395 
4396 	/* Allocate memory to store all pages and cookies */
4397 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4398 	memseg->cookies =
4399 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4400 
4401 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4402 	    ldcp->id, npages);
4403 
4404 	addr = v_align;
4405 
4406 	/*
4407 	 * Check if direct shared memory map is enabled, if not change
4408 	 * the mapping type to include SHADOW_MAP.
4409 	 */
4410 	if (ldc_shmem_enabled == 0)
4411 		mtype = LDC_SHADOW_MAP;
4412 
4413 	/*
4414 	 * Table slots are used in a round-robin manner. The algorithm permits
4415 	 * inserting duplicate entries. Slots allocated earlier will typically
4416 	 * get freed before we get back to reusing the slot.Inserting duplicate
4417 	 * entries should be OK as we only lookup entries using the cookie addr
4418 	 * i.e. tbl index, during export, unexport and copy operation.
4419 	 *
4420 	 * One implementation what was tried was to search for a duplicate
4421 	 * page entry first and reuse it. The search overhead is very high and
4422 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4423 	 * So it does make sense to avoid searching for duplicates.
4424 	 *
4425 	 * But during the process of searching for a free slot, if we find a
4426 	 * duplicate entry we will go ahead and use it, and bump its use count.
4427 	 */
4428 
4429 	/* index to start searching from */
4430 	index = mtbl->next_entry;
4431 	cookie_idx = -1;
4432 
4433 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4434 
4435 	if (mtype & LDC_DIRECT_MAP) {
4436 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4437 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4438 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4439 	}
4440 
4441 	if (mtype & LDC_SHADOW_MAP) {
4442 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4443 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4444 	}
4445 
4446 	if (mtype & LDC_IO_MAP) {
4447 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4448 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4449 	}
4450 
4451 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4452 
4453 	tmp_mte.mte_pgszc = pg_size_code;
4454 
4455 	/* initialize each mem table entry */
4456 	for (i = 0; i < npages; i++) {
4457 
4458 		/* check if slot is available in the table */
4459 		while (mtbl->table[index].entry.ll != 0) {
4460 
4461 			index = (index + 1) % mtbl->num_entries;
4462 
4463 			if (index == mtbl->next_entry) {
4464 				/* we have looped around */
4465 				DWARN(DBG_ALL_LDCS,
4466 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4467 				    "entry\n", ldcp->id);
4468 				*ccount = 0;
4469 
4470 				/* NOTE: free memory, remove previous entries */
4471 				/* this shouldnt happen as num_avail was ok */
4472 
4473 				mutex_exit(&mtbl->lock);
4474 				mutex_exit(&mhdl->lock);
4475 				return (ENOMEM);
4476 			}
4477 		}
4478 
4479 		/* get the real address */
4480 		raddr = va_to_pa((void *)addr);
4481 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4482 
4483 		/* build the mte */
4484 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4485 
4486 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4487 
4488 		/* update entry in table */
4489 		mtbl->table[index].entry = tmp_mte;
4490 
4491 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4492 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4493 
4494 		/* calculate the size and offset for this export range */
4495 		if (i == 0) {
4496 			/* first page */
4497 			psize = min((pg_size - v_offset), len);
4498 			poffset = v_offset;
4499 
4500 		} else if (i == (npages - 1)) {
4501 			/* last page */
4502 			psize =	(((uintptr_t)(vaddr + len)) &
4503 				    ((uint64_t)(pg_size-1)));
4504 			if (psize == 0)
4505 				psize = pg_size;
4506 			poffset = 0;
4507 
4508 		} else {
4509 			/* middle pages */
4510 			psize = pg_size;
4511 			poffset = 0;
4512 		}
4513 
4514 		/* store entry for this page */
4515 		memseg->pages[i].index = index;
4516 		memseg->pages[i].raddr = raddr;
4517 		memseg->pages[i].offset = poffset;
4518 		memseg->pages[i].size = psize;
4519 		memseg->pages[i].mte = &(mtbl->table[index]);
4520 
4521 		/* create the cookie */
4522 		if (i == 0 || (index != prev_index + 1)) {
4523 			cookie_idx++;
4524 			memseg->cookies[cookie_idx].addr =
4525 				IDX2COOKIE(index, pg_size_code, pg_shift);
4526 			memseg->cookies[cookie_idx].addr |= poffset;
4527 			memseg->cookies[cookie_idx].size = psize;
4528 
4529 		} else {
4530 			memseg->cookies[cookie_idx].size += psize;
4531 		}
4532 
4533 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4534 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4535 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4536 		    ldcp->id, addr, index, raddr, psize, poffset);
4537 
4538 		/* decrement number of available entries */
4539 		mtbl->num_avail--;
4540 
4541 		/* increment va by page size */
4542 		addr += pg_size;
4543 
4544 		/* increment index */
4545 		prev_index = index;
4546 		index = (index + 1) % mtbl->num_entries;
4547 
4548 		/* save the next slot */
4549 		mtbl->next_entry = index;
4550 	}
4551 
4552 	mutex_exit(&mtbl->lock);
4553 
4554 	/* memory handle = bound */
4555 	mhdl->mtype = mtype;
4556 	mhdl->perm = perm;
4557 	mhdl->status = LDC_BOUND;
4558 
4559 	/* update memseg_t */
4560 	memseg->vaddr = vaddr;
4561 	memseg->raddr = memseg->pages[0].raddr;
4562 	memseg->size = len;
4563 	memseg->npages = npages;
4564 	memseg->ncookies = cookie_idx + 1;
4565 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4566 
4567 	/* return count and first cookie */
4568 	*ccount = memseg->ncookies;
4569 	cookie->addr = memseg->cookies[0].addr;
4570 	cookie->size = memseg->cookies[0].size;
4571 
4572 	D1(ldcp->id,
4573 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4574 	    "pgs=0x%llx cookies=0x%llx\n",
4575 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4576 
4577 	mutex_exit(&mhdl->lock);
4578 	return (0);
4579 }
4580 
4581 /*
4582  * Return the next cookie associated with the specified memory handle
4583  */
4584 int
4585 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4586 {
4587 	ldc_mhdl_t	*mhdl;
4588 	ldc_chan_t 	*ldcp;
4589 	ldc_memseg_t	*memseg;
4590 
4591 	if (mhandle == NULL) {
4592 		DWARN(DBG_ALL_LDCS,
4593 		    "ldc_mem_nextcookie: invalid memory handle\n");
4594 		return (EINVAL);
4595 	}
4596 	mhdl = (ldc_mhdl_t *)mhandle;
4597 
4598 	mutex_enter(&mhdl->lock);
4599 
4600 	ldcp = mhdl->ldcp;
4601 	memseg = mhdl->memseg;
4602 
4603 	if (cookie == 0) {
4604 		DWARN(ldcp->id,
4605 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4606 		    ldcp->id);
4607 		mutex_exit(&mhdl->lock);
4608 		return (EINVAL);
4609 	}
4610 
4611 	if (memseg->next_cookie != 0) {
4612 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4613 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4614 		memseg->next_cookie++;
4615 		if (memseg->next_cookie == memseg->ncookies)
4616 			memseg->next_cookie = 0;
4617 
4618 	} else {
4619 		DWARN(ldcp->id,
4620 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4621 		cookie->addr = 0;
4622 		cookie->size = 0;
4623 		mutex_exit(&mhdl->lock);
4624 		return (EINVAL);
4625 	}
4626 
4627 	D1(ldcp->id,
4628 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4629 	    ldcp->id, cookie->addr, cookie->size);
4630 
4631 	mutex_exit(&mhdl->lock);
4632 	return (0);
4633 }
4634 
4635 /*
4636  * Unbind the virtual memory region associated with the specified
4637  * memory handle. Allassociated cookies are freed and the corresponding
4638  * RA space is no longer exported.
4639  */
4640 int
4641 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4642 {
4643 	ldc_mhdl_t	*mhdl;
4644 	ldc_chan_t 	*ldcp;
4645 	ldc_mtbl_t	*mtbl;
4646 	ldc_memseg_t	*memseg;
4647 	uint64_t	cookie_addr;
4648 	uint64_t	pg_shift, pg_size_code;
4649 	int		i, rv;
4650 
4651 	if (mhandle == NULL) {
4652 		DWARN(DBG_ALL_LDCS,
4653 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4654 		return (EINVAL);
4655 	}
4656 	mhdl = (ldc_mhdl_t *)mhandle;
4657 
4658 	mutex_enter(&mhdl->lock);
4659 
4660 	if (mhdl->status == LDC_UNBOUND) {
4661 		DWARN(DBG_ALL_LDCS,
4662 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4663 		    mhandle);
4664 		mutex_exit(&mhdl->lock);
4665 		return (EINVAL);
4666 	}
4667 
4668 	ldcp = mhdl->ldcp;
4669 	mtbl = ldcp->mtbl;
4670 
4671 	memseg = mhdl->memseg;
4672 
4673 	/* lock the memory table - exclusive access to channel */
4674 	mutex_enter(&mtbl->lock);
4675 
4676 	/* undo the pages exported */
4677 	for (i = 0; i < memseg->npages; i++) {
4678 
4679 		/* check for mapped pages, revocation cookie != 0 */
4680 		if (memseg->pages[i].mte->cookie) {
4681 
4682 			pg_size_code = page_szc(memseg->pages[i].size);
4683 			pg_shift = page_get_shift(memseg->pages[i].size);
4684 			cookie_addr = IDX2COOKIE(memseg->pages[i].index,
4685 			    pg_size_code, pg_shift);
4686 
4687 			D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) revoke "
4688 			    "cookie 0x%llx, rcookie 0x%llx\n", ldcp->id,
4689 			    cookie_addr, memseg->pages[i].mte->cookie);
4690 			rv = hv_ldc_revoke(ldcp->id, cookie_addr,
4691 			    memseg->pages[i].mte->cookie);
4692 			if (rv) {
4693 				DWARN(ldcp->id,
4694 				    "ldc_mem_unbind_handle: (0x%llx) cannot "
4695 				    "revoke mapping, cookie %llx\n", ldcp->id,
4696 				    cookie_addr);
4697 			}
4698 		}
4699 
4700 		/* clear the entry from the table */
4701 		memseg->pages[i].mte->entry.ll = 0;
4702 		mtbl->num_avail++;
4703 	}
4704 	mutex_exit(&mtbl->lock);
4705 
4706 	/* free the allocated memseg and page structures */
4707 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4708 	kmem_free(memseg->cookies,
4709 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4710 	kmem_cache_free(ldcssp->memseg_cache, memseg);
4711 
4712 	/* uninitialize the memory handle */
4713 	mhdl->memseg = NULL;
4714 	mhdl->status = LDC_UNBOUND;
4715 
4716 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4717 	    ldcp->id, mhdl);
4718 
4719 	mutex_exit(&mhdl->lock);
4720 	return (0);
4721 }
4722 
4723 /*
4724  * Get information about the dring. The base address of the descriptor
4725  * ring along with the type and permission are returned back.
4726  */
4727 int
4728 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4729 {
4730 	ldc_mhdl_t	*mhdl;
4731 
4732 	if (mhandle == NULL) {
4733 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4734 		return (EINVAL);
4735 	}
4736 	mhdl = (ldc_mhdl_t *)mhandle;
4737 
4738 	if (minfo == NULL) {
4739 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4740 		return (EINVAL);
4741 	}
4742 
4743 	mutex_enter(&mhdl->lock);
4744 
4745 	minfo->status = mhdl->status;
4746 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4747 		minfo->vaddr = mhdl->memseg->vaddr;
4748 		minfo->raddr = mhdl->memseg->raddr;
4749 		minfo->mtype = mhdl->mtype;
4750 		minfo->perm = mhdl->perm;
4751 	}
4752 	mutex_exit(&mhdl->lock);
4753 
4754 	return (0);
4755 }
4756 
4757 /*
4758  * Copy data either from or to the client specified virtual address
4759  * space to or from the exported memory associated with the cookies.
4760  * The direction argument determines whether the data is read from or
4761  * written to exported memory.
4762  */
4763 int
4764 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4765     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4766 {
4767 	ldc_chan_t 	*ldcp;
4768 	uint64_t	local_voff, local_valign;
4769 	uint64_t	cookie_addr, cookie_size;
4770 	uint64_t	pg_shift, pg_size, pg_size_code;
4771 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4772 	uint64_t	local_ra, local_poff, local_psize;
4773 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4774 	pgcnt_t		npages;
4775 	size_t		len = *size;
4776 	int 		i, rv = 0;
4777 
4778 	uint64_t	chid;
4779 
4780 	if (handle == NULL) {
4781 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4782 		return (EINVAL);
4783 	}
4784 	ldcp = (ldc_chan_t *)handle;
4785 	chid = ldcp->id;
4786 
4787 	/* check to see if channel is UP */
4788 	if (ldcp->tstate != TS_UP) {
4789 		DWARN(chid, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4790 		    chid);
4791 		return (ECONNRESET);
4792 	}
4793 
4794 	/* Force address and size to be 8-byte aligned */
4795 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4796 		DWARN(chid,
4797 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4798 		return (EINVAL);
4799 	}
4800 
4801 	/* Find the size of the exported memory */
4802 	export_size = 0;
4803 	for (i = 0; i < ccount; i++)
4804 		export_size += cookies[i].size;
4805 
4806 	/* check to see if offset is valid */
4807 	if (off > export_size) {
4808 		DWARN(chid,
4809 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4810 		    chid);
4811 		return (EINVAL);
4812 	}
4813 
4814 	/*
4815 	 * Check to see if the export size is smaller than the size we
4816 	 * are requesting to copy - if so flag an error
4817 	 */
4818 	if ((export_size - off) < *size) {
4819 		DWARN(chid,
4820 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4821 		    chid);
4822 		return (EINVAL);
4823 	}
4824 
4825 	total_bal = min(export_size, *size);
4826 
4827 	/* FUTURE: get the page size, pgsz code, and shift */
4828 	pg_size = MMU_PAGESIZE;
4829 	pg_size_code = page_szc(pg_size);
4830 	pg_shift = page_get_shift(pg_size_code);
4831 
4832 	D1(chid, "ldc_mem_copy: copying data "
4833 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4834 	    chid, vaddr, pg_size, pg_size_code, pg_shift);
4835 
4836 	/* aligned VA and its offset */
4837 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4838 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4839 
4840 	npages = (len+local_voff)/pg_size;
4841 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4842 
4843 	D1(chid,
4844 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4845 	    chid, vaddr, local_valign, local_voff, npages);
4846 
4847 	local_ra = va_to_pa((void *)local_valign);
4848 	local_poff = local_voff;
4849 	local_psize = min(len, (pg_size - local_voff));
4850 
4851 	len -= local_psize;
4852 
4853 	/*
4854 	 * find the first cookie in the list of cookies
4855 	 * if the offset passed in is not zero
4856 	 */
4857 	for (idx = 0; idx < ccount; idx++) {
4858 		cookie_size = cookies[idx].size;
4859 		if (off < cookie_size)
4860 			break;
4861 		off -= cookie_size;
4862 	}
4863 
4864 	cookie_addr = cookies[idx].addr + off;
4865 	cookie_size = cookies[idx].size - off;
4866 
4867 	export_caddr = cookie_addr & ~(pg_size - 1);
4868 	export_poff = cookie_addr & (pg_size - 1);
4869 	export_psize = min(cookie_size, (pg_size - export_poff));
4870 
4871 	for (;;) {
4872 
4873 		copy_size = min(export_psize, local_psize);
4874 
4875 		D1(chid,
4876 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4877 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4878 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4879 		    " total_bal=0x%llx\n",
4880 		    chid, direction, export_caddr, local_ra, export_poff,
4881 		    local_poff, export_psize, local_psize, copy_size,
4882 		    total_bal);
4883 
4884 		rv = hv_ldc_copy(chid, direction,
4885 		    (export_caddr + export_poff), (local_ra + local_poff),
4886 		    copy_size, &copied_len);
4887 
4888 		if (rv != 0) {
4889 			int 		error = EIO;
4890 			uint64_t	rx_hd, rx_tl;
4891 
4892 			DWARN(chid,
4893 			    "ldc_mem_copy: (0x%llx) err %d during copy\n",
4894 			    (unsigned long long)chid, rv);
4895 			DWARN(chid,
4896 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%lx, "
4897 			    "loc_ra=0x%lx, exp_poff=0x%lx, loc_poff=0x%lx,"
4898 			    " exp_psz=0x%lx, loc_psz=0x%lx, copy_sz=0x%lx,"
4899 			    " copied_len=0x%lx, total_bal=0x%lx\n",
4900 			    chid, direction, export_caddr, local_ra,
4901 			    export_poff, local_poff, export_psize, local_psize,
4902 			    copy_size, copied_len, total_bal);
4903 
4904 			*size = *size - total_bal;
4905 
4906 			/*
4907 			 * check if reason for copy error was due to
4908 			 * a channel reset. we need to grab the lock
4909 			 * just in case we have to do a reset.
4910 			 */
4911 			mutex_enter(&ldcp->lock);
4912 			mutex_enter(&ldcp->tx_lock);
4913 
4914 			rv = hv_ldc_rx_get_state(ldcp->id,
4915 			    &rx_hd, &rx_tl, &(ldcp->link_state));
4916 			if (ldcp->link_state == LDC_CHANNEL_DOWN ||
4917 			    ldcp->link_state == LDC_CHANNEL_RESET) {
4918 				i_ldc_reset(ldcp, B_FALSE);
4919 				error = ECONNRESET;
4920 			}
4921 
4922 			mutex_exit(&ldcp->tx_lock);
4923 			mutex_exit(&ldcp->lock);
4924 
4925 			return (error);
4926 		}
4927 
4928 		ASSERT(copied_len <= copy_size);
4929 
4930 		D2(chid, "ldc_mem_copy: copied=0x%llx\n", copied_len);
4931 		export_poff += copied_len;
4932 		local_poff += copied_len;
4933 		export_psize -= copied_len;
4934 		local_psize -= copied_len;
4935 		cookie_size -= copied_len;
4936 
4937 		total_bal -= copied_len;
4938 
4939 		if (copy_size != copied_len)
4940 			continue;
4941 
4942 		if (export_psize == 0 && total_bal != 0) {
4943 
4944 			if (cookie_size == 0) {
4945 				idx++;
4946 				cookie_addr = cookies[idx].addr;
4947 				cookie_size = cookies[idx].size;
4948 
4949 				export_caddr = cookie_addr & ~(pg_size - 1);
4950 				export_poff = cookie_addr & (pg_size - 1);
4951 				export_psize =
4952 					min(cookie_size, (pg_size-export_poff));
4953 			} else {
4954 				export_caddr += pg_size;
4955 				export_poff = 0;
4956 				export_psize = min(cookie_size, pg_size);
4957 			}
4958 		}
4959 
4960 		if (local_psize == 0 && total_bal != 0) {
4961 			local_valign += pg_size;
4962 			local_ra = va_to_pa((void *)local_valign);
4963 			local_poff = 0;
4964 			local_psize = min(pg_size, len);
4965 			len -= local_psize;
4966 		}
4967 
4968 		/* check if we are all done */
4969 		if (total_bal == 0)
4970 			break;
4971 	}
4972 
4973 
4974 	D1(chid,
4975 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
4976 	    chid, *size);
4977 
4978 	return (0);
4979 }
4980 
4981 /*
4982  * Copy data either from or to the client specified virtual address
4983  * space to or from HV physical memory.
4984  *
4985  * The direction argument determines whether the data is read from or
4986  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
4987  * to the ldc_mem_copy interface
4988  */
4989 int
4990 ldc_mem_rdwr_cookie(ldc_handle_t handle, caddr_t vaddr, size_t *size,
4991     caddr_t paddr, uint8_t direction)
4992 {
4993 	ldc_chan_t 	*ldcp;
4994 	uint64_t	local_voff, local_valign;
4995 	uint64_t	pg_shift, pg_size, pg_size_code;
4996 	uint64_t 	target_pa, target_poff, target_psize, target_size;
4997 	uint64_t	local_ra, local_poff, local_psize;
4998 	uint64_t	copy_size, copied_len = 0;
4999 	pgcnt_t		npages;
5000 	size_t		len = *size;
5001 	int 		rv = 0;
5002 
5003 	if (handle == NULL) {
5004 		DWARN(DBG_ALL_LDCS,
5005 		    "ldc_mem_rdwr_cookie: invalid channel handle\n");
5006 		return (EINVAL);
5007 	}
5008 	ldcp = (ldc_chan_t *)handle;
5009 
5010 	mutex_enter(&ldcp->lock);
5011 
5012 	/* check to see if channel is UP */
5013 	if (ldcp->tstate != TS_UP) {
5014 		DWARN(ldcp->id,
5015 		    "ldc_mem_rdwr_cookie: (0x%llx) channel is not UP\n",
5016 		    ldcp->id);
5017 		mutex_exit(&ldcp->lock);
5018 		return (ECONNRESET);
5019 	}
5020 
5021 	/* Force address and size to be 8-byte aligned */
5022 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
5023 		DWARN(ldcp->id,
5024 		    "ldc_mem_rdwr_cookie: addr/size is not 8-byte aligned\n");
5025 		mutex_exit(&ldcp->lock);
5026 		return (EINVAL);
5027 	}
5028 
5029 	target_size = *size;
5030 
5031 	/* FUTURE: get the page size, pgsz code, and shift */
5032 	pg_size = MMU_PAGESIZE;
5033 	pg_size_code = page_szc(pg_size);
5034 	pg_shift = page_get_shift(pg_size_code);
5035 
5036 	D1(ldcp->id, "ldc_mem_rdwr_cookie: copying data "
5037 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
5038 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
5039 
5040 	/* aligned VA and its offset */
5041 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
5042 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
5043 
5044 	npages = (len + local_voff) / pg_size;
5045 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
5046 
5047 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) v=0x%llx, "
5048 	    "val=0x%llx,off=0x%x,pgs=0x%x\n",
5049 	    ldcp->id, vaddr, local_valign, local_voff, npages);
5050 
5051 	local_ra = va_to_pa((void *)local_valign);
5052 	local_poff = local_voff;
5053 	local_psize = min(len, (pg_size - local_voff));
5054 
5055 	len -= local_psize;
5056 
5057 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
5058 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
5059 	target_psize = pg_size - target_poff;
5060 
5061 	for (;;) {
5062 
5063 		copy_size = min(target_psize, local_psize);
5064 
5065 		D1(ldcp->id,
5066 		    "ldc_mem_rdwr_cookie: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
5067 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
5068 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
5069 		    " total_bal=0x%llx\n",
5070 		    ldcp->id, direction, target_pa, local_ra, target_poff,
5071 		    local_poff, target_psize, local_psize, copy_size,
5072 		    target_size);
5073 
5074 		rv = hv_ldc_copy(ldcp->id, direction,
5075 		    (target_pa + target_poff), (local_ra + local_poff),
5076 		    copy_size, &copied_len);
5077 
5078 		if (rv != 0) {
5079 			DWARN(DBG_ALL_LDCS,
5080 			    "ldc_mem_rdwr_cookie: (0x%lx) err %d during copy\n",
5081 			    ldcp->id, rv);
5082 			DWARN(DBG_ALL_LDCS,
5083 			    "ldc_mem_rdwr_cookie: (0x%llx) dir=%lld, "
5084 			    "tar_pa=0x%llx, loc_ra=0x%llx, tar_poff=0x%llx, "
5085 			    "loc_poff=0x%llx, tar_psz=0x%llx, loc_psz=0x%llx, "
5086 			    "copy_sz=0x%llx, total_bal=0x%llx\n",
5087 			    ldcp->id, direction, target_pa, local_ra,
5088 			    target_poff, local_poff, target_psize, local_psize,
5089 			    copy_size, target_size);
5090 
5091 			*size = *size - target_size;
5092 			mutex_exit(&ldcp->lock);
5093 			return (i_ldc_h2v_error(rv));
5094 		}
5095 
5096 		D2(ldcp->id, "ldc_mem_rdwr_cookie: copied=0x%llx\n",
5097 		    copied_len);
5098 		target_poff += copied_len;
5099 		local_poff += copied_len;
5100 		target_psize -= copied_len;
5101 		local_psize -= copied_len;
5102 
5103 		target_size -= copied_len;
5104 
5105 		if (copy_size != copied_len)
5106 			continue;
5107 
5108 		if (target_psize == 0 && target_size != 0) {
5109 			target_pa += pg_size;
5110 			target_poff = 0;
5111 			target_psize = min(pg_size, target_size);
5112 		}
5113 
5114 		if (local_psize == 0 && target_size != 0) {
5115 			local_valign += pg_size;
5116 			local_ra = va_to_pa((void *)local_valign);
5117 			local_poff = 0;
5118 			local_psize = min(pg_size, len);
5119 			len -= local_psize;
5120 		}
5121 
5122 		/* check if we are all done */
5123 		if (target_size == 0)
5124 			break;
5125 	}
5126 
5127 	mutex_exit(&ldcp->lock);
5128 
5129 	D1(ldcp->id, "ldc_mem_rdwr_cookie: (0x%llx) done copying sz=0x%llx\n",
5130 	    ldcp->id, *size);
5131 
5132 	return (0);
5133 }
5134 
5135 /*
5136  * Map an exported memory segment into the local address space. If the
5137  * memory range was exported for direct map access, a HV call is made
5138  * to allocate a RA range. If the map is done via a shadow copy, local
5139  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
5140  * the mapping is a direct map then the RA is returned in 'raddr'.
5141  */
5142 int
5143 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
5144     uint8_t mtype, uint8_t perm, caddr_t *vaddr, caddr_t *raddr)
5145 {
5146 	int		i, j, idx, rv, retries;
5147 	ldc_chan_t 	*ldcp;
5148 	ldc_mhdl_t	*mhdl;
5149 	ldc_memseg_t	*memseg;
5150 	caddr_t		tmpaddr;
5151 	uint64_t	map_perm = perm;
5152 	uint64_t	pg_size, pg_shift, pg_size_code, pg_mask;
5153 	uint64_t	exp_size = 0, base_off, map_size, npages;
5154 	uint64_t	cookie_addr, cookie_off, cookie_size;
5155 	tte_t		ldc_tte;
5156 
5157 	if (mhandle == NULL) {
5158 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
5159 		return (EINVAL);
5160 	}
5161 	mhdl = (ldc_mhdl_t *)mhandle;
5162 
5163 	mutex_enter(&mhdl->lock);
5164 
5165 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
5166 	    mhdl->memseg != NULL) {
5167 		DWARN(DBG_ALL_LDCS,
5168 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
5169 		mutex_exit(&mhdl->lock);
5170 		return (EINVAL);
5171 	}
5172 
5173 	ldcp = mhdl->ldcp;
5174 
5175 	mutex_enter(&ldcp->lock);
5176 
5177 	if (ldcp->tstate != TS_UP) {
5178 		DWARN(ldcp->id,
5179 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
5180 		    ldcp->id);
5181 		mutex_exit(&ldcp->lock);
5182 		mutex_exit(&mhdl->lock);
5183 		return (ECONNRESET);
5184 	}
5185 
5186 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5187 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
5188 		mutex_exit(&ldcp->lock);
5189 		mutex_exit(&mhdl->lock);
5190 		return (EINVAL);
5191 	}
5192 
5193 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
5194 	    ldcp->id, cookie->addr, cookie->size);
5195 
5196 	/* FUTURE: get the page size, pgsz code, and shift */
5197 	pg_size = MMU_PAGESIZE;
5198 	pg_size_code = page_szc(pg_size);
5199 	pg_shift = page_get_shift(pg_size_code);
5200 	pg_mask = ~(pg_size - 1);
5201 
5202 	/* calculate the number of pages in the exported cookie */
5203 	base_off = cookie[0].addr & (pg_size - 1);
5204 	for (idx = 0; idx < ccount; idx++)
5205 		exp_size += cookie[idx].size;
5206 	map_size = P2ROUNDUP((exp_size + base_off), pg_size);
5207 	npages = (map_size >> pg_shift);
5208 
5209 	/* Allocate memseg structure */
5210 	memseg = mhdl->memseg =
5211 		kmem_cache_alloc(ldcssp->memseg_cache, KM_SLEEP);
5212 
5213 	/* Allocate memory to store all pages and cookies */
5214 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
5215 	memseg->cookies =
5216 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
5217 
5218 	D2(ldcp->id, "ldc_mem_map: (0x%llx) exp_size=0x%llx, map_size=0x%llx,"
5219 	    "pages=0x%llx\n", ldcp->id, exp_size, map_size, npages);
5220 
5221 	/*
5222 	 * Check if direct map over shared memory is enabled, if not change
5223 	 * the mapping type to SHADOW_MAP.
5224 	 */
5225 	if (ldc_shmem_enabled == 0)
5226 		mtype = LDC_SHADOW_MAP;
5227 
5228 	/*
5229 	 * Check to see if the client is requesting direct or shadow map
5230 	 * If direct map is requested, try to map remote memory first,
5231 	 * and if that fails, revert to shadow map
5232 	 */
5233 	if (mtype == LDC_DIRECT_MAP) {
5234 
5235 		/* Allocate kernel virtual space for mapping */
5236 		memseg->vaddr = vmem_xalloc(heap_arena, map_size,
5237 		    pg_size, 0, 0, NULL, NULL, VM_NOSLEEP);
5238 		if (memseg->vaddr == NULL) {
5239 			cmn_err(CE_WARN,
5240 			    "ldc_mem_map: (0x%lx) memory map failed\n",
5241 			    ldcp->id);
5242 			kmem_free(memseg->cookies,
5243 			    (sizeof (ldc_mem_cookie_t) * ccount));
5244 			kmem_free(memseg->pages,
5245 			    (sizeof (ldc_page_t) * npages));
5246 			kmem_cache_free(ldcssp->memseg_cache, memseg);
5247 
5248 			mutex_exit(&ldcp->lock);
5249 			mutex_exit(&mhdl->lock);
5250 			return (ENOMEM);
5251 		}
5252 
5253 		/* Unload previous mapping */
5254 		hat_unload(kas.a_hat, memseg->vaddr, map_size,
5255 		    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5256 
5257 		/* for each cookie passed in - map into address space */
5258 		idx = 0;
5259 		cookie_size = 0;
5260 		tmpaddr = memseg->vaddr;
5261 
5262 		for (i = 0; i < npages; i++) {
5263 
5264 			if (cookie_size == 0) {
5265 				ASSERT(idx < ccount);
5266 				cookie_addr = cookie[idx].addr & pg_mask;
5267 				cookie_off = cookie[idx].addr & (pg_size - 1);
5268 				cookie_size =
5269 				    P2ROUNDUP((cookie_off + cookie[idx].size),
5270 					pg_size);
5271 				idx++;
5272 			}
5273 
5274 			D1(ldcp->id, "ldc_mem_map: (0x%llx) mapping "
5275 			    "cookie 0x%llx, bal=0x%llx\n", ldcp->id,
5276 			    cookie_addr, cookie_size);
5277 
5278 			/* map the cookie into address space */
5279 			for (retries = 0; retries < ldc_max_retries;
5280 			    retries++) {
5281 
5282 				rv = hv_ldc_mapin(ldcp->id, cookie_addr,
5283 				    &memseg->pages[i].raddr, &map_perm);
5284 				if (rv != H_EWOULDBLOCK && rv != H_ETOOMANY)
5285 					break;
5286 
5287 				drv_usecwait(ldc_delay);
5288 			}
5289 
5290 			if (rv || memseg->pages[i].raddr == 0) {
5291 				DWARN(ldcp->id,
5292 				    "ldc_mem_map: (0x%llx) hv mapin err %d\n",
5293 				    ldcp->id, rv);
5294 
5295 				/* remove previous mapins */
5296 				hat_unload(kas.a_hat, memseg->vaddr, map_size,
5297 				    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
5298 				for (j = 0; j < i; j++) {
5299 					rv = hv_ldc_unmap(
5300 							memseg->pages[j].raddr);
5301 					if (rv) {
5302 						DWARN(ldcp->id,
5303 						    "ldc_mem_map: (0x%llx) "
5304 						    "cannot unmap ra=0x%llx\n",
5305 					    ldcp->id,
5306 						    memseg->pages[j].raddr);
5307 					}
5308 				}
5309 
5310 				/* free kernel virtual space */
5311 				vmem_free(heap_arena, (void *)memseg->vaddr,
5312 				    map_size);
5313 
5314 				/* direct map failed - revert to shadow map */
5315 				mtype = LDC_SHADOW_MAP;
5316 				break;
5317 
5318 			} else {
5319 
5320 				D1(ldcp->id,
5321 				    "ldc_mem_map: (0x%llx) vtop map 0x%llx -> "
5322 				    "0x%llx, cookie=0x%llx, perm=0x%llx\n",
5323 				    ldcp->id, tmpaddr, memseg->pages[i].raddr,
5324 				    cookie_addr, perm);
5325 
5326 				/*
5327 				 * NOTE: Calling hat_devload directly, causes it
5328 				 * to look for page_t using the pfn. Since this
5329 				 * addr is greater than the memlist, it treates
5330 				 * it as non-memory
5331 				 */
5332 				sfmmu_memtte(&ldc_tte,
5333 				    (pfn_t)(memseg->pages[i].raddr >> pg_shift),
5334 				    PROT_READ | PROT_WRITE | HAT_NOSYNC, TTE8K);
5335 
5336 				D1(ldcp->id,
5337 				    "ldc_mem_map: (0x%llx) ra 0x%llx -> "
5338 				    "tte 0x%llx\n", ldcp->id,
5339 				    memseg->pages[i].raddr, ldc_tte);
5340 
5341 				sfmmu_tteload(kas.a_hat, &ldc_tte, tmpaddr,
5342 				    NULL, HAT_LOAD_LOCK);
5343 
5344 				cookie_size -= pg_size;
5345 				cookie_addr += pg_size;
5346 				tmpaddr += pg_size;
5347 			}
5348 		}
5349 	}
5350 
5351 	if (mtype == LDC_SHADOW_MAP) {
5352 		if (*vaddr == NULL) {
5353 			memseg->vaddr = kmem_zalloc(exp_size, KM_SLEEP);
5354 			mhdl->myshadow = B_TRUE;
5355 
5356 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
5357 			    "shadow page va=0x%llx\n", ldcp->id, memseg->vaddr);
5358 		} else {
5359 			/*
5360 			 * Use client supplied memory for memseg->vaddr
5361 			 * WARNING: assuming that client mem is >= exp_size
5362 			 */
5363 			memseg->vaddr = *vaddr;
5364 		}
5365 
5366 		/* Save all page and cookie information */
5367 		for (i = 0, tmpaddr = memseg->vaddr; i < npages; i++) {
5368 			memseg->pages[i].raddr = va_to_pa(tmpaddr);
5369 			memseg->pages[i].size = pg_size;
5370 			tmpaddr += pg_size;
5371 		}
5372 
5373 	}
5374 
5375 	/* save all cookies */
5376 	bcopy(cookie, memseg->cookies, ccount * sizeof (ldc_mem_cookie_t));
5377 
5378 	/* update memseg_t */
5379 	memseg->raddr = memseg->pages[0].raddr;
5380 	memseg->size = (mtype == LDC_SHADOW_MAP) ? exp_size : map_size;
5381 	memseg->npages = npages;
5382 	memseg->ncookies = ccount;
5383 	memseg->next_cookie = 0;
5384 
5385 	/* memory handle = mapped */
5386 	mhdl->mtype = mtype;
5387 	mhdl->perm = perm;
5388 	mhdl->status = LDC_MAPPED;
5389 
5390 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5391 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5392 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5393 	    memseg->npages, memseg->ncookies);
5394 
5395 	if (mtype == LDC_SHADOW_MAP)
5396 		base_off = 0;
5397 	if (raddr)
5398 		*raddr = (caddr_t)(memseg->raddr | base_off);
5399 	if (vaddr)
5400 		*vaddr = (caddr_t)((uintptr_t)memseg->vaddr | base_off);
5401 
5402 	mutex_exit(&ldcp->lock);
5403 	mutex_exit(&mhdl->lock);
5404 	return (0);
5405 }
5406 
5407 /*
5408  * Unmap a memory segment. Free shadow memory (if any).
5409  */
5410 int
5411 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5412 {
5413 	int		i, rv;
5414 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5415 	ldc_chan_t 	*ldcp;
5416 	ldc_memseg_t	*memseg;
5417 
5418 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5419 		DWARN(DBG_ALL_LDCS,
5420 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5421 		    mhandle);
5422 		return (EINVAL);
5423 	}
5424 
5425 	mutex_enter(&mhdl->lock);
5426 
5427 	ldcp = mhdl->ldcp;
5428 	memseg = mhdl->memseg;
5429 
5430 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5431 	    ldcp->id, mhdl);
5432 
5433 	/* if we allocated shadow memory - free it */
5434 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5435 		kmem_free(memseg->vaddr, memseg->size);
5436 	} else if (mhdl->mtype == LDC_DIRECT_MAP) {
5437 
5438 		/* unmap in the case of DIRECT_MAP */
5439 		hat_unload(kas.a_hat, memseg->vaddr, memseg->size,
5440 		    HAT_UNLOAD_UNLOCK);
5441 
5442 		for (i = 0; i < memseg->npages; i++) {
5443 			rv = hv_ldc_unmap(memseg->pages[i].raddr);
5444 			if (rv) {
5445 				cmn_err(CE_WARN,
5446 				    "ldc_mem_map: (0x%lx) hv unmap err %d\n",
5447 				    ldcp->id, rv);
5448 			}
5449 		}
5450 
5451 		vmem_free(heap_arena, (void *)memseg->vaddr, memseg->size);
5452 	}
5453 
5454 	/* free the allocated memseg and page structures */
5455 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5456 	kmem_free(memseg->cookies,
5457 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5458 	kmem_cache_free(ldcssp->memseg_cache, memseg);
5459 
5460 	/* uninitialize the memory handle */
5461 	mhdl->memseg = NULL;
5462 	mhdl->status = LDC_UNBOUND;
5463 
5464 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5465 	    ldcp->id, mhdl);
5466 
5467 	mutex_exit(&mhdl->lock);
5468 	return (0);
5469 }
5470 
5471 /*
5472  * Internal entry point for LDC mapped memory entry consistency
5473  * semantics. Acquire copies the contents of the remote memory
5474  * into the local shadow copy. The release operation copies the local
5475  * contents into the remote memory. The offset and size specify the
5476  * bounds for the memory range being synchronized.
5477  */
5478 static int
5479 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5480     uint64_t offset, size_t size)
5481 {
5482 	int 		err;
5483 	ldc_mhdl_t	*mhdl;
5484 	ldc_chan_t	*ldcp;
5485 	ldc_memseg_t	*memseg;
5486 	caddr_t		local_vaddr;
5487 	size_t		copy_size;
5488 
5489 	if (mhandle == NULL) {
5490 		DWARN(DBG_ALL_LDCS,
5491 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5492 		return (EINVAL);
5493 	}
5494 	mhdl = (ldc_mhdl_t *)mhandle;
5495 
5496 	mutex_enter(&mhdl->lock);
5497 
5498 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5499 		DWARN(DBG_ALL_LDCS,
5500 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5501 		mutex_exit(&mhdl->lock);
5502 		return (EINVAL);
5503 	}
5504 
5505 	/* do nothing for direct map */
5506 	if (mhdl->mtype == LDC_DIRECT_MAP) {
5507 		mutex_exit(&mhdl->lock);
5508 		return (0);
5509 	}
5510 
5511 	/* do nothing if COPY_IN+MEM_W and COPY_OUT+MEM_R */
5512 	if ((direction == LDC_COPY_IN && (mhdl->perm & LDC_MEM_R) == 0) ||
5513 	    (direction == LDC_COPY_OUT && (mhdl->perm & LDC_MEM_W) == 0)) {
5514 		mutex_exit(&mhdl->lock);
5515 		return (0);
5516 	}
5517 
5518 	if (offset >= mhdl->memseg->size ||
5519 	    (offset + size) > mhdl->memseg->size) {
5520 		DWARN(DBG_ALL_LDCS,
5521 		    "i_ldc_mem_acquire_release: memory out of range\n");
5522 		mutex_exit(&mhdl->lock);
5523 		return (EINVAL);
5524 	}
5525 
5526 	/* get the channel handle and memory segment */
5527 	ldcp = mhdl->ldcp;
5528 	memseg = mhdl->memseg;
5529 
5530 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5531 
5532 		local_vaddr = memseg->vaddr + offset;
5533 		copy_size = size;
5534 
5535 		/* copy to/from remote from/to local memory */
5536 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5537 		    &copy_size, memseg->cookies, memseg->ncookies,
5538 		    direction);
5539 		if (err || copy_size != size) {
5540 			cmn_err(CE_WARN,
5541 			    "i_ldc_mem_acquire_release: copy failed\n");
5542 			mutex_exit(&mhdl->lock);
5543 			return (err);
5544 		}
5545 	}
5546 
5547 	mutex_exit(&mhdl->lock);
5548 
5549 	return (0);
5550 }
5551 
5552 /*
5553  * Ensure that the contents in the remote memory seg are consistent
5554  * with the contents if of local segment
5555  */
5556 int
5557 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5558 {
5559 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5560 }
5561 
5562 
5563 /*
5564  * Ensure that the contents in the local memory seg are consistent
5565  * with the contents if of remote segment
5566  */
5567 int
5568 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5569 {
5570 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5571 }
5572 
5573 /*
5574  * Allocate a descriptor ring. The size of each each descriptor
5575  * must be 8-byte aligned and the entire ring should be a multiple
5576  * of MMU_PAGESIZE.
5577  */
5578 int
5579 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5580 {
5581 	ldc_dring_t *dringp;
5582 	size_t size = (dsize * len);
5583 
5584 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5585 	    len, dsize);
5586 
5587 	if (dhandle == NULL) {
5588 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5589 		return (EINVAL);
5590 	}
5591 
5592 	if (len == 0) {
5593 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5594 		return (EINVAL);
5595 	}
5596 
5597 	/* descriptor size should be 8-byte aligned */
5598 	if (dsize == 0 || (dsize & 0x7)) {
5599 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5600 		return (EINVAL);
5601 	}
5602 
5603 	*dhandle = 0;
5604 
5605 	/* Allocate a desc ring structure */
5606 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5607 
5608 	/* Initialize dring */
5609 	dringp->length = len;
5610 	dringp->dsize = dsize;
5611 
5612 	/* round off to multiple of pagesize */
5613 	dringp->size = (size & MMU_PAGEMASK);
5614 	if (size & MMU_PAGEOFFSET)
5615 		dringp->size += MMU_PAGESIZE;
5616 
5617 	dringp->status = LDC_UNBOUND;
5618 
5619 	/* allocate descriptor ring memory */
5620 	dringp->base = kmem_zalloc(dringp->size, KM_SLEEP);
5621 
5622 	/* initialize the desc ring lock */
5623 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5624 
5625 	/* Add descriptor ring to the head of global list */
5626 	mutex_enter(&ldcssp->lock);
5627 	dringp->next = ldcssp->dring_list;
5628 	ldcssp->dring_list = dringp;
5629 	mutex_exit(&ldcssp->lock);
5630 
5631 	*dhandle = (ldc_dring_handle_t)dringp;
5632 
5633 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5634 
5635 	return (0);
5636 }
5637 
5638 
5639 /*
5640  * Destroy a descriptor ring.
5641  */
5642 int
5643 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5644 {
5645 	ldc_dring_t *dringp;
5646 	ldc_dring_t *tmp_dringp;
5647 
5648 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5649 
5650 	if (dhandle == NULL) {
5651 		DWARN(DBG_ALL_LDCS,
5652 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5653 		return (EINVAL);
5654 	}
5655 	dringp = (ldc_dring_t *)dhandle;
5656 
5657 	if (dringp->status == LDC_BOUND) {
5658 		DWARN(DBG_ALL_LDCS,
5659 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5660 		return (EACCES);
5661 	}
5662 
5663 	mutex_enter(&dringp->lock);
5664 	mutex_enter(&ldcssp->lock);
5665 
5666 	/* remove from linked list - if not bound */
5667 	tmp_dringp = ldcssp->dring_list;
5668 	if (tmp_dringp == dringp) {
5669 		ldcssp->dring_list = dringp->next;
5670 		dringp->next = NULL;
5671 
5672 	} else {
5673 		while (tmp_dringp != NULL) {
5674 			if (tmp_dringp->next == dringp) {
5675 				tmp_dringp->next = dringp->next;
5676 				dringp->next = NULL;
5677 				break;
5678 			}
5679 			tmp_dringp = tmp_dringp->next;
5680 		}
5681 		if (tmp_dringp == NULL) {
5682 			DWARN(DBG_ALL_LDCS,
5683 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5684 			mutex_exit(&ldcssp->lock);
5685 			mutex_exit(&dringp->lock);
5686 			return (EINVAL);
5687 		}
5688 	}
5689 
5690 	mutex_exit(&ldcssp->lock);
5691 
5692 	/* free the descriptor ring */
5693 	kmem_free(dringp->base, dringp->size);
5694 
5695 	mutex_exit(&dringp->lock);
5696 
5697 	/* destroy dring lock */
5698 	mutex_destroy(&dringp->lock);
5699 
5700 	/* free desc ring object */
5701 	kmem_free(dringp, sizeof (ldc_dring_t));
5702 
5703 	return (0);
5704 }
5705 
5706 /*
5707  * Bind a previously allocated dring to a channel. The channel should
5708  * be OPEN in order to bind the ring to the channel. Returns back a
5709  * descriptor ring cookie. The descriptor ring is exported for remote
5710  * access by the client at the other end of the channel. An entry for
5711  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5712  */
5713 int
5714 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5715     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5716 {
5717 	int		err;
5718 	ldc_chan_t 	*ldcp;
5719 	ldc_dring_t	*dringp;
5720 	ldc_mem_handle_t mhandle;
5721 
5722 	/* check to see if channel is initalized */
5723 	if (handle == NULL) {
5724 		DWARN(DBG_ALL_LDCS,
5725 		    "ldc_mem_dring_bind: invalid channel handle\n");
5726 		return (EINVAL);
5727 	}
5728 	ldcp = (ldc_chan_t *)handle;
5729 
5730 	if (dhandle == NULL) {
5731 		DWARN(DBG_ALL_LDCS,
5732 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5733 		return (EINVAL);
5734 	}
5735 	dringp = (ldc_dring_t *)dhandle;
5736 
5737 	if (cookie == NULL) {
5738 		DWARN(ldcp->id,
5739 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5740 		return (EINVAL);
5741 	}
5742 
5743 	mutex_enter(&dringp->lock);
5744 
5745 	if (dringp->status == LDC_BOUND) {
5746 		DWARN(DBG_ALL_LDCS,
5747 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5748 		    ldcp->id);
5749 		mutex_exit(&dringp->lock);
5750 		return (EINVAL);
5751 	}
5752 
5753 	if ((perm & LDC_MEM_RW) == 0) {
5754 		DWARN(DBG_ALL_LDCS,
5755 		    "ldc_mem_dring_bind: invalid permissions\n");
5756 		mutex_exit(&dringp->lock);
5757 		return (EINVAL);
5758 	}
5759 
5760 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5761 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5762 		mutex_exit(&dringp->lock);
5763 		return (EINVAL);
5764 	}
5765 
5766 	dringp->ldcp = ldcp;
5767 
5768 	/* create an memory handle */
5769 	err = ldc_mem_alloc_handle(handle, &mhandle);
5770 	if (err || mhandle == NULL) {
5771 		DWARN(DBG_ALL_LDCS,
5772 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5773 		    ldcp->id);
5774 		mutex_exit(&dringp->lock);
5775 		return (err);
5776 	}
5777 	dringp->mhdl = mhandle;
5778 
5779 	/* bind the descriptor ring to channel */
5780 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5781 	    mtype, perm, cookie, ccount);
5782 	if (err) {
5783 		DWARN(ldcp->id,
5784 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5785 		    ldcp->id);
5786 		mutex_exit(&dringp->lock);
5787 		return (err);
5788 	}
5789 
5790 	/*
5791 	 * For now return error if we get more than one cookie
5792 	 * FUTURE: Return multiple cookies ..
5793 	 */
5794 	if (*ccount > 1) {
5795 		(void) ldc_mem_unbind_handle(mhandle);
5796 		(void) ldc_mem_free_handle(mhandle);
5797 
5798 		dringp->ldcp = NULL;
5799 		dringp->mhdl = NULL;
5800 		*ccount = 0;
5801 
5802 		mutex_exit(&dringp->lock);
5803 		return (EAGAIN);
5804 	}
5805 
5806 	/* Add descriptor ring to channel's exported dring list */
5807 	mutex_enter(&ldcp->exp_dlist_lock);
5808 	dringp->ch_next = ldcp->exp_dring_list;
5809 	ldcp->exp_dring_list = dringp;
5810 	mutex_exit(&ldcp->exp_dlist_lock);
5811 
5812 	dringp->status = LDC_BOUND;
5813 
5814 	mutex_exit(&dringp->lock);
5815 
5816 	return (0);
5817 }
5818 
5819 /*
5820  * Return the next cookie associated with the specified dring handle
5821  */
5822 int
5823 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5824 {
5825 	int		rv = 0;
5826 	ldc_dring_t 	*dringp;
5827 	ldc_chan_t	*ldcp;
5828 
5829 	if (dhandle == NULL) {
5830 		DWARN(DBG_ALL_LDCS,
5831 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5832 		return (EINVAL);
5833 	}
5834 	dringp = (ldc_dring_t *)dhandle;
5835 	mutex_enter(&dringp->lock);
5836 
5837 	if (dringp->status != LDC_BOUND) {
5838 		DWARN(DBG_ALL_LDCS,
5839 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5840 		    "is not bound\n", dringp);
5841 		mutex_exit(&dringp->lock);
5842 		return (EINVAL);
5843 	}
5844 
5845 	ldcp = dringp->ldcp;
5846 
5847 	if (cookie == NULL) {
5848 		DWARN(ldcp->id,
5849 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5850 		    ldcp->id);
5851 		mutex_exit(&dringp->lock);
5852 		return (EINVAL);
5853 	}
5854 
5855 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5856 	mutex_exit(&dringp->lock);
5857 
5858 	return (rv);
5859 }
5860 /*
5861  * Unbind a previously bound dring from a channel.
5862  */
5863 int
5864 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5865 {
5866 	ldc_dring_t 	*dringp;
5867 	ldc_dring_t	*tmp_dringp;
5868 	ldc_chan_t	*ldcp;
5869 
5870 	if (dhandle == NULL) {
5871 		DWARN(DBG_ALL_LDCS,
5872 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5873 		return (EINVAL);
5874 	}
5875 	dringp = (ldc_dring_t *)dhandle;
5876 
5877 	mutex_enter(&dringp->lock);
5878 
5879 	if (dringp->status == LDC_UNBOUND) {
5880 		DWARN(DBG_ALL_LDCS,
5881 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5882 		    dringp);
5883 		mutex_exit(&dringp->lock);
5884 		return (EINVAL);
5885 	}
5886 	ldcp = dringp->ldcp;
5887 
5888 	mutex_enter(&ldcp->exp_dlist_lock);
5889 
5890 	tmp_dringp = ldcp->exp_dring_list;
5891 	if (tmp_dringp == dringp) {
5892 		ldcp->exp_dring_list = dringp->ch_next;
5893 		dringp->ch_next = NULL;
5894 
5895 	} else {
5896 		while (tmp_dringp != NULL) {
5897 			if (tmp_dringp->ch_next == dringp) {
5898 				tmp_dringp->ch_next = dringp->ch_next;
5899 				dringp->ch_next = NULL;
5900 				break;
5901 			}
5902 			tmp_dringp = tmp_dringp->ch_next;
5903 		}
5904 		if (tmp_dringp == NULL) {
5905 			DWARN(DBG_ALL_LDCS,
5906 			    "ldc_mem_dring_unbind: invalid descriptor\n");
5907 			mutex_exit(&ldcp->exp_dlist_lock);
5908 			mutex_exit(&dringp->lock);
5909 			return (EINVAL);
5910 		}
5911 	}
5912 
5913 	mutex_exit(&ldcp->exp_dlist_lock);
5914 
5915 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
5916 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5917 
5918 	dringp->ldcp = NULL;
5919 	dringp->mhdl = NULL;
5920 	dringp->status = LDC_UNBOUND;
5921 
5922 	mutex_exit(&dringp->lock);
5923 
5924 	return (0);
5925 }
5926 
5927 /*
5928  * Get information about the dring. The base address of the descriptor
5929  * ring along with the type and permission are returned back.
5930  */
5931 int
5932 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
5933 {
5934 	ldc_dring_t	*dringp;
5935 	int		rv;
5936 
5937 	if (dhandle == NULL) {
5938 		DWARN(DBG_ALL_LDCS,
5939 		    "ldc_mem_dring_info: invalid desc ring handle\n");
5940 		return (EINVAL);
5941 	}
5942 	dringp = (ldc_dring_t *)dhandle;
5943 
5944 	mutex_enter(&dringp->lock);
5945 
5946 	if (dringp->mhdl) {
5947 		rv = ldc_mem_info(dringp->mhdl, minfo);
5948 		if (rv) {
5949 			DWARN(DBG_ALL_LDCS,
5950 			    "ldc_mem_dring_info: error reading mem info\n");
5951 			mutex_exit(&dringp->lock);
5952 			return (rv);
5953 		}
5954 	} else {
5955 		minfo->vaddr = dringp->base;
5956 		minfo->raddr = NULL;
5957 		minfo->status = dringp->status;
5958 	}
5959 
5960 	mutex_exit(&dringp->lock);
5961 
5962 	return (0);
5963 }
5964 
5965 /*
5966  * Map an exported descriptor ring into the local address space. If the
5967  * descriptor ring was exported for direct map access, a HV call is made
5968  * to allocate a RA range. If the map is done via a shadow copy, local
5969  * shadow memory is allocated.
5970  */
5971 int
5972 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
5973     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
5974     ldc_dring_handle_t *dhandle)
5975 {
5976 	int		err;
5977 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
5978 	ldc_mem_handle_t mhandle;
5979 	ldc_dring_t	*dringp;
5980 	size_t		dring_size;
5981 
5982 	if (dhandle == NULL) {
5983 		DWARN(DBG_ALL_LDCS,
5984 		    "ldc_mem_dring_map: invalid dhandle\n");
5985 		return (EINVAL);
5986 	}
5987 
5988 	/* check to see if channel is initalized */
5989 	if (handle == NULL) {
5990 		DWARN(DBG_ALL_LDCS,
5991 		    "ldc_mem_dring_map: invalid channel handle\n");
5992 		return (EINVAL);
5993 	}
5994 	ldcp = (ldc_chan_t *)handle;
5995 
5996 	if (cookie == NULL) {
5997 		DWARN(ldcp->id,
5998 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
5999 		    ldcp->id);
6000 		return (EINVAL);
6001 	}
6002 
6003 	/* FUTURE: For now we support only one cookie per dring */
6004 	ASSERT(ccount == 1);
6005 
6006 	if (cookie->size < (dsize * len)) {
6007 		DWARN(ldcp->id,
6008 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
6009 		    ldcp->id);
6010 		return (EINVAL);
6011 	}
6012 
6013 	*dhandle = 0;
6014 
6015 	/* Allocate an dring structure */
6016 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
6017 
6018 	D1(ldcp->id,
6019 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
6020 	    mtype, len, dsize, cookie->addr, cookie->size);
6021 
6022 	/* Initialize dring */
6023 	dringp->length = len;
6024 	dringp->dsize = dsize;
6025 
6026 	/* round of to multiple of page size */
6027 	dring_size = len * dsize;
6028 	dringp->size = (dring_size & MMU_PAGEMASK);
6029 	if (dring_size & MMU_PAGEOFFSET)
6030 		dringp->size += MMU_PAGESIZE;
6031 
6032 	dringp->ldcp = ldcp;
6033 
6034 	/* create an memory handle */
6035 	err = ldc_mem_alloc_handle(handle, &mhandle);
6036 	if (err || mhandle == NULL) {
6037 		DWARN(DBG_ALL_LDCS,
6038 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
6039 		    err);
6040 		kmem_free(dringp, sizeof (ldc_dring_t));
6041 		return (ENOMEM);
6042 	}
6043 
6044 	dringp->mhdl = mhandle;
6045 	dringp->base = NULL;
6046 
6047 	/* map the dring into local memory */
6048 	err = ldc_mem_map(mhandle, cookie, ccount, mtype, LDC_MEM_RW,
6049 	    &(dringp->base), NULL);
6050 	if (err || dringp->base == NULL) {
6051 		cmn_err(CE_WARN,
6052 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
6053 		(void) ldc_mem_free_handle(mhandle);
6054 		kmem_free(dringp, sizeof (ldc_dring_t));
6055 		return (ENOMEM);
6056 	}
6057 
6058 	/* initialize the desc ring lock */
6059 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
6060 
6061 	/* Add descriptor ring to channel's imported dring list */
6062 	mutex_enter(&ldcp->imp_dlist_lock);
6063 	dringp->ch_next = ldcp->imp_dring_list;
6064 	ldcp->imp_dring_list = dringp;
6065 	mutex_exit(&ldcp->imp_dlist_lock);
6066 
6067 	dringp->status = LDC_MAPPED;
6068 
6069 	*dhandle = (ldc_dring_handle_t)dringp;
6070 
6071 	return (0);
6072 }
6073 
6074 /*
6075  * Unmap a descriptor ring. Free shadow memory (if any).
6076  */
6077 int
6078 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
6079 {
6080 	ldc_dring_t 	*dringp;
6081 	ldc_dring_t	*tmp_dringp;
6082 	ldc_chan_t	*ldcp;
6083 
6084 	if (dhandle == NULL) {
6085 		DWARN(DBG_ALL_LDCS,
6086 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
6087 		return (EINVAL);
6088 	}
6089 	dringp = (ldc_dring_t *)dhandle;
6090 
6091 	if (dringp->status != LDC_MAPPED) {
6092 		DWARN(DBG_ALL_LDCS,
6093 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
6094 		return (EINVAL);
6095 	}
6096 
6097 	mutex_enter(&dringp->lock);
6098 
6099 	ldcp = dringp->ldcp;
6100 
6101 	mutex_enter(&ldcp->imp_dlist_lock);
6102 
6103 	/* find and unlink the desc ring from channel import list */
6104 	tmp_dringp = ldcp->imp_dring_list;
6105 	if (tmp_dringp == dringp) {
6106 		ldcp->imp_dring_list = dringp->ch_next;
6107 		dringp->ch_next = NULL;
6108 
6109 	} else {
6110 		while (tmp_dringp != NULL) {
6111 			if (tmp_dringp->ch_next == dringp) {
6112 				tmp_dringp->ch_next = dringp->ch_next;
6113 				dringp->ch_next = NULL;
6114 				break;
6115 			}
6116 			tmp_dringp = tmp_dringp->ch_next;
6117 		}
6118 		if (tmp_dringp == NULL) {
6119 			DWARN(DBG_ALL_LDCS,
6120 			    "ldc_mem_dring_unmap: invalid descriptor\n");
6121 			mutex_exit(&ldcp->imp_dlist_lock);
6122 			mutex_exit(&dringp->lock);
6123 			return (EINVAL);
6124 		}
6125 	}
6126 
6127 	mutex_exit(&ldcp->imp_dlist_lock);
6128 
6129 	/* do a LDC memory handle unmap and free */
6130 	(void) ldc_mem_unmap(dringp->mhdl);
6131 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
6132 
6133 	dringp->status = 0;
6134 	dringp->ldcp = NULL;
6135 
6136 	mutex_exit(&dringp->lock);
6137 
6138 	/* destroy dring lock */
6139 	mutex_destroy(&dringp->lock);
6140 
6141 	/* free desc ring object */
6142 	kmem_free(dringp, sizeof (ldc_dring_t));
6143 
6144 	return (0);
6145 }
6146 
6147 /*
6148  * Internal entry point for descriptor ring access entry consistency
6149  * semantics. Acquire copies the contents of the remote descriptor ring
6150  * into the local shadow copy. The release operation copies the local
6151  * contents into the remote dring. The start and end locations specify
6152  * bounds for the entries being synchronized.
6153  */
6154 static int
6155 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
6156     uint8_t direction, uint64_t start, uint64_t end)
6157 {
6158 	int 			err;
6159 	ldc_dring_t		*dringp;
6160 	ldc_chan_t		*ldcp;
6161 	uint64_t		soff;
6162 	size_t			copy_size;
6163 
6164 	if (dhandle == NULL) {
6165 		DWARN(DBG_ALL_LDCS,
6166 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
6167 		return (EINVAL);
6168 	}
6169 	dringp = (ldc_dring_t *)dhandle;
6170 	mutex_enter(&dringp->lock);
6171 
6172 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
6173 		DWARN(DBG_ALL_LDCS,
6174 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
6175 		mutex_exit(&dringp->lock);
6176 		return (EINVAL);
6177 	}
6178 
6179 	if (start >= dringp->length || end >= dringp->length) {
6180 		DWARN(DBG_ALL_LDCS,
6181 		    "i_ldc_dring_acquire_release: index out of range\n");
6182 		mutex_exit(&dringp->lock);
6183 		return (EINVAL);
6184 	}
6185 
6186 	/* get the channel handle */
6187 	ldcp = dringp->ldcp;
6188 
6189 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
6190 		((dringp->length - start) * dringp->dsize);
6191 
6192 	/* Calculate the relative offset for the first desc */
6193 	soff = (start * dringp->dsize);
6194 
6195 	/* copy to/from remote from/to local memory */
6196 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
6197 	    soff, copy_size);
6198 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6199 	    direction, soff, copy_size);
6200 	if (err) {
6201 		DWARN(ldcp->id,
6202 		    "i_ldc_dring_acquire_release: copy failed\n");
6203 		mutex_exit(&dringp->lock);
6204 		return (err);
6205 	}
6206 
6207 	/* do the balance */
6208 	if (start > end) {
6209 		copy_size = ((end + 1) * dringp->dsize);
6210 		soff = 0;
6211 
6212 		/* copy to/from remote from/to local memory */
6213 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
6214 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
6215 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
6216 		    direction, soff, copy_size);
6217 		if (err) {
6218 			DWARN(ldcp->id,
6219 			    "i_ldc_dring_acquire_release: copy failed\n");
6220 			mutex_exit(&dringp->lock);
6221 			return (err);
6222 		}
6223 	}
6224 
6225 	mutex_exit(&dringp->lock);
6226 
6227 	return (0);
6228 }
6229 
6230 /*
6231  * Ensure that the contents in the local dring are consistent
6232  * with the contents if of remote dring
6233  */
6234 int
6235 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6236 {
6237 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
6238 }
6239 
6240 /*
6241  * Ensure that the contents in the remote dring are consistent
6242  * with the contents if of local dring
6243  */
6244 int
6245 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
6246 {
6247 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
6248 }
6249 
6250 
6251 /* ------------------------------------------------------------------------- */
6252