xref: /titanic_41/usr/src/uts/sun4v/io/ldc.c (revision f29fbe76ce646ba8bcf62627d89087d5f02c62a8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Link Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/note.h>
56 #include <sys/ivintr.h>
57 #include <sys/hypervisor_api.h>
58 #include <sys/ldc.h>
59 #include <sys/ldc_impl.h>
60 #include <sys/cnex.h>
61 #include <sys/hsvc.h>
62 
63 /* Core internal functions */
64 static int i_ldc_h2v_error(int h_error);
65 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
66 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp);
67 static void i_ldc_reset_state(ldc_chan_t *ldcp);
68 static void i_ldc_reset(ldc_chan_t *ldcp);
69 
70 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
71 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
72 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
73 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
74     uint8_t ctrlmsg);
75 
76 /* Interrupt handling functions */
77 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
78 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
79 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
80 
81 /* Read method functions */
82 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
83 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
84 	size_t *sizep);
85 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
86 	size_t *sizep);
87 
88 /* Write method functions */
89 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
90 	size_t *sizep);
91 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
92 	size_t *sizep);
93 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
94 	size_t *sizep);
95 
96 /* Pkt processing internal functions */
97 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
98 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
99 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
100 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
101 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
102 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
103 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
104 
105 /* Memory synchronization internal functions */
106 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
107     uint8_t direction, uint64_t offset, size_t size);
108 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
109     uint8_t direction, uint64_t start, uint64_t end);
110 
111 /* LDC Version */
112 static ldc_ver_t ldc_versions[] = { {1, 0} };
113 
114 /* number of supported versions */
115 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
116 
117 /* Module State Pointer */
118 static ldc_soft_state_t *ldcssp;
119 
120 static struct modldrv md = {
121 	&mod_miscops,			/* This is a misc module */
122 	"sun4v LDC module v%I%",	/* Name of the module */
123 };
124 
125 static struct modlinkage ml = {
126 	MODREV_1,
127 	&md,
128 	NULL
129 };
130 
131 static uint64_t ldc_sup_minor;		/* Supported minor number */
132 static hsvc_info_t ldc_hsvc = {
133 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
134 };
135 
136 static uint64_t intr_sup_minor;		/* Supported minor number */
137 static hsvc_info_t intr_hsvc = {
138 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
139 };
140 
141 
142 /*
143  * The no. of MTU size messages that can be stored in
144  * the LDC Tx queue. The number of Tx queue entries is
145  * then computed as (mtu * mtu_msgs)/sizeof(queue_entry)
146  */
147 uint64_t ldc_mtu_msgs = LDC_MTU_MSGS;
148 
149 /*
150  * The minimum queue length. This is the size of the smallest
151  * LDC queue. If the computed value is less than this default,
152  * the queue length is rounded up to 'ldc_queue_entries'.
153  */
154 uint64_t ldc_queue_entries = LDC_QUEUE_ENTRIES;
155 
156 /*
157  * Pages exported for remote access over each channel is
158  * maintained in a table registered with the Hypervisor.
159  * The default number of entries in the table is set to
160  * 'ldc_mtbl_entries'.
161  */
162 uint64_t ldc_maptable_entries = LDC_MTBL_ENTRIES;
163 
164 /*
165  * LDC retry count and delay - when the HV returns EWOULDBLOCK
166  * the operation is retried 'ldc_max_retries' times with a
167  * wait of 'ldc_delay' usecs between each retry.
168  */
169 int ldc_max_retries = LDC_MAX_RETRIES;
170 clock_t ldc_delay = LDC_DELAY;
171 
172 #ifdef DEBUG
173 
174 /*
175  * Print debug messages
176  *
177  * set ldcdbg to 0x7 for enabling all msgs
178  * 0x4 - Warnings
179  * 0x2 - All debug messages
180  * 0x1 - Minimal debug messages
181  *
182  * set ldcdbgchan to the channel number you want to debug
183  * setting it to -1 prints debug messages for all channels
184  * NOTE: ldcdbgchan has no effect on error messages
185  */
186 
187 #define	DBG_ALL_LDCS -1
188 
189 int ldcdbg = 0x0;
190 int64_t ldcdbgchan = DBG_ALL_LDCS;
191 
192 static void
193 ldcdebug(int64_t id, const char *fmt, ...)
194 {
195 	char buf[512];
196 	va_list ap;
197 
198 	/*
199 	 * Do not return if,
200 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
201 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
202 	 * debug channel = caller specified channel
203 	 */
204 	if ((id != DBG_ALL_LDCS) &&
205 	    (ldcdbgchan != DBG_ALL_LDCS) &&
206 	    (ldcdbgchan != id)) {
207 		return;
208 	}
209 
210 	va_start(ap, fmt);
211 	(void) vsprintf(buf, fmt, ap);
212 	va_end(ap);
213 
214 	cmn_err(CE_CONT, "?%s\n", buf);
215 }
216 
217 #define	D1		\
218 if (ldcdbg & 0x01)	\
219 	ldcdebug
220 
221 #define	D2		\
222 if (ldcdbg & 0x02)	\
223 	ldcdebug
224 
225 #define	DWARN		\
226 if (ldcdbg & 0x04)	\
227 	ldcdebug
228 
229 #define	DUMP_PAYLOAD(id, addr)						\
230 {									\
231 	char buf[65*3];							\
232 	int i;								\
233 	uint8_t *src = (uint8_t *)addr;					\
234 	for (i = 0; i < 64; i++, src++)					\
235 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
236 	(void) sprintf(&buf[i * 3], "|\n");				\
237 	D2((id), "payload: %s", buf);					\
238 }
239 
240 #define	DUMP_LDC_PKT(c, s, addr)					\
241 {									\
242 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
243 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
244 	if (msg->type == LDC_DATA) {                                    \
245 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
246 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
247 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
248 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
249 	    (msg->env & LDC_LEN_MASK));					\
250 	} else { 							\
251 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
252 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
253 	} 								\
254 }
255 
256 #else
257 
258 #define	DBG_ALL_LDCS -1
259 
260 #define	D1
261 #define	D2
262 #define	DWARN
263 
264 #define	DUMP_PAYLOAD(id, addr)
265 #define	DUMP_LDC_PKT(c, s, addr)
266 
267 #endif
268 
269 #define	ZERO_PKT(p)			\
270 	bzero((p), sizeof (ldc_msg_t));
271 
272 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
273 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
274 
275 
276 int
277 _init(void)
278 {
279 	int status;
280 
281 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
282 	if (status != 0) {
283 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
284 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
285 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
286 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
287 		return (-1);
288 	}
289 
290 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
291 	if (status != 0) {
292 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
293 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
294 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
295 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
296 		(void) hsvc_unregister(&ldc_hsvc);
297 		return (-1);
298 	}
299 
300 	/* allocate soft state structure */
301 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
302 
303 	/* Link the module into the system */
304 	status = mod_install(&ml);
305 	if (status != 0) {
306 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
307 		return (status);
308 	}
309 
310 	/* Initialize the LDC state structure */
311 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
312 
313 	mutex_enter(&ldcssp->lock);
314 
315 	ldcssp->channel_count = 0;
316 	ldcssp->channels_open = 0;
317 	ldcssp->chan_list = NULL;
318 	ldcssp->dring_list = NULL;
319 
320 	mutex_exit(&ldcssp->lock);
321 
322 	return (0);
323 }
324 
325 int
326 _info(struct modinfo *modinfop)
327 {
328 	/* Report status of the dynamically loadable driver module */
329 	return (mod_info(&ml, modinfop));
330 }
331 
332 int
333 _fini(void)
334 {
335 	int 		rv, status;
336 	ldc_chan_t 	*ldcp;
337 	ldc_dring_t 	*dringp;
338 	ldc_mem_info_t 	minfo;
339 
340 	/* Unlink the driver module from the system */
341 	status = mod_remove(&ml);
342 	if (status) {
343 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
344 		return (EIO);
345 	}
346 
347 	/* close and finalize channels */
348 	ldcp = ldcssp->chan_list;
349 	while (ldcp != NULL) {
350 		(void) ldc_close((ldc_handle_t)ldcp);
351 		(void) ldc_fini((ldc_handle_t)ldcp);
352 
353 		ldcp = ldcp->next;
354 	}
355 
356 	/* Free descriptor rings */
357 	dringp = ldcssp->dring_list;
358 	while (dringp != NULL) {
359 		dringp = dringp->next;
360 
361 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
362 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
363 			if (minfo.status == LDC_BOUND) {
364 				(void) ldc_mem_dring_unbind(
365 						(ldc_dring_handle_t)dringp);
366 			}
367 			if (minfo.status == LDC_MAPPED) {
368 				(void) ldc_mem_dring_unmap(
369 						(ldc_dring_handle_t)dringp);
370 			}
371 		}
372 
373 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
374 	}
375 	ldcssp->dring_list = NULL;
376 
377 	/*
378 	 * We have successfully "removed" the driver.
379 	 * Destroying soft states
380 	 */
381 	mutex_destroy(&ldcssp->lock);
382 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
383 
384 	(void) hsvc_unregister(&ldc_hsvc);
385 	(void) hsvc_unregister(&intr_hsvc);
386 
387 	return (status);
388 }
389 
390 /* -------------------------------------------------------------------------- */
391 
392 /*
393  * LDC Link Layer Internal Functions
394  */
395 
396 /*
397  * Translate HV Errors to sun4v error codes
398  */
399 static int
400 i_ldc_h2v_error(int h_error)
401 {
402 	switch (h_error) {
403 
404 	case	H_EOK:
405 		return (0);
406 
407 	case	H_ENORADDR:
408 		return (EFAULT);
409 
410 	case	H_EBADPGSZ:
411 	case	H_EINVAL:
412 		return (EINVAL);
413 
414 	case	H_EWOULDBLOCK:
415 		return (EWOULDBLOCK);
416 
417 	case	H_ENOACCESS:
418 	case	H_ENOMAP:
419 		return (EACCES);
420 
421 	case	H_EIO:
422 	case	H_ECPUERROR:
423 		return (EIO);
424 
425 	case	H_ENOTSUPPORTED:
426 		return (ENOTSUP);
427 
428 	case 	H_ETOOMANY:
429 		return (ENOSPC);
430 
431 	case	H_ECHANNEL:
432 		return (ECHRNG);
433 	default:
434 		break;
435 	}
436 
437 	return (EIO);
438 }
439 
440 /*
441  * Reconfigure the transmit queue
442  */
443 static int
444 i_ldc_txq_reconf(ldc_chan_t *ldcp)
445 {
446 	int rv;
447 
448 	ASSERT(MUTEX_HELD(&ldcp->lock));
449 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
450 
451 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
452 	if (rv) {
453 		cmn_err(CE_WARN,
454 		    "ldc_tx_qconf: (0x%lx) cannot set qconf", ldcp->id);
455 		return (EIO);
456 	}
457 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
458 	    &(ldcp->tx_tail), &(ldcp->link_state));
459 	if (rv) {
460 		cmn_err(CE_WARN,
461 		    "ldc_tx_get_state: (0x%lx) cannot get qptrs", ldcp->id);
462 		return (EIO);
463 	}
464 	D1(ldcp->id, "ldc_tx_get_state: (0x%llx) h=0x%llx,t=0x%llx,"
465 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
466 	    ldcp->link_state);
467 
468 	return (0);
469 }
470 
471 /*
472  * Reconfigure the receive queue
473  */
474 static int
475 i_ldc_rxq_reconf(ldc_chan_t *ldcp)
476 {
477 	int rv;
478 	uint64_t rx_head, rx_tail;
479 
480 	ASSERT(MUTEX_HELD(&ldcp->lock));
481 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
482 	    &(ldcp->link_state));
483 	if (rv) {
484 		cmn_err(CE_WARN,
485 		    "ldc_rx_getstate: (0x%lx) cannot get state",
486 		    ldcp->id);
487 		return (EIO);
488 	}
489 
490 	if (rx_head != rx_tail || ldcp->tstate > TS_READY) {
491 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
492 			ldcp->rx_q_entries);
493 		if (rv) {
494 			cmn_err(CE_WARN,
495 			    "ldc_rx_qconf: (0x%lx) cannot set qconf",
496 			    ldcp->id);
497 			return (EIO);
498 		}
499 		D1(ldcp->id, "ldc_rx_qconf: (0x%llx) completed qconf",
500 		    ldcp->id);
501 	}
502 
503 	return (0);
504 }
505 
506 /*
507  * Reset LDC state structure and its contents
508  */
509 static void
510 i_ldc_reset_state(ldc_chan_t *ldcp)
511 {
512 	ASSERT(MUTEX_HELD(&ldcp->lock));
513 	ldcp->last_msg_snt = LDC_INIT_SEQID;
514 	ldcp->last_ack_rcd = 0;
515 	ldcp->last_msg_rcd = 0;
516 	ldcp->tx_ackd_head = ldcp->tx_head;
517 	ldcp->next_vidx = 0;
518 	ldcp->hstate = 0;
519 	ldcp->tstate = TS_OPEN;
520 	ldcp->status = LDC_OPEN;
521 
522 	if (ldcp->link_state == LDC_CHANNEL_UP ||
523 	    ldcp->link_state == LDC_CHANNEL_RESET) {
524 
525 		if (ldcp->mode == LDC_MODE_RAW) {
526 			ldcp->status = LDC_UP;
527 			ldcp->tstate = TS_UP;
528 		} else {
529 			ldcp->status = LDC_READY;
530 			ldcp->tstate |= TS_LINK_READY;
531 		}
532 	}
533 }
534 
535 /*
536  * Reset a LDC channel
537  */
538 static void
539 i_ldc_reset(ldc_chan_t *ldcp)
540 {
541 	D2(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
542 
543 	ASSERT(MUTEX_HELD(&ldcp->lock));
544 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
545 
546 	(void) i_ldc_txq_reconf(ldcp);
547 	(void) i_ldc_rxq_reconf(ldcp);
548 	i_ldc_reset_state(ldcp);
549 }
550 
551 /*
552  * Clear pending interrupts
553  */
554 static void
555 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
556 {
557 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
558 
559 	ASSERT(MUTEX_HELD(&ldcp->lock));
560 	if (cinfo->dip && ldcp->intr_pending) {
561 		ldcp->intr_pending = B_FALSE;
562 		(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
563 	}
564 }
565 
566 /*
567  * Set the receive queue head
568  * Resets connection and returns an error if it fails.
569  */
570 static int
571 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
572 {
573 	int 	rv;
574 	int 	retries;
575 
576 	ASSERT(MUTEX_HELD(&ldcp->lock));
577 	for (retries = 0; retries < ldc_max_retries; retries++) {
578 
579 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
580 			return (0);
581 
582 		if (rv != H_EWOULDBLOCK)
583 			break;
584 
585 		/* wait for ldc_delay usecs */
586 		drv_usecwait(ldc_delay);
587 	}
588 
589 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
590 		ldcp->id, head);
591 	mutex_enter(&ldcp->tx_lock);
592 	i_ldc_reset(ldcp);
593 	mutex_exit(&ldcp->tx_lock);
594 
595 	return (ECONNRESET);
596 }
597 
598 
599 /*
600  * Returns the tx_tail to be used for transfer
601  * Re-reads the TX queue ptrs if and only if the
602  * the cached head and tail are equal (queue is full)
603  */
604 static int
605 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
606 {
607 	int 		rv;
608 	uint64_t 	current_head, new_tail;
609 
610 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
611 	/* Read the head and tail ptrs from HV */
612 	rv = hv_ldc_tx_get_state(ldcp->id,
613 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
614 	if (rv) {
615 		cmn_err(CE_WARN,
616 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
617 		    ldcp->id);
618 		return (EIO);
619 	}
620 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
621 		DWARN(DBG_ALL_LDCS,
622 		    "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
623 		    ldcp->id);
624 		return (ECONNRESET);
625 	}
626 
627 	/* In reliable mode, check against last ACKd msg */
628 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
629 		ldcp->mode == LDC_MODE_STREAM)
630 		? ldcp->tx_ackd_head : ldcp->tx_head;
631 
632 	/* increment the tail */
633 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
634 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
635 
636 	if (new_tail == current_head) {
637 		DWARN(ldcp->id,
638 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
639 		    ldcp->id);
640 		return (EWOULDBLOCK);
641 	}
642 
643 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
644 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
645 
646 	*tail = ldcp->tx_tail;
647 	return (0);
648 }
649 
650 /*
651  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
652  * and retry ldc_max_retries times before returning an error.
653  * Returns 0, EWOULDBLOCK or EIO
654  */
655 static int
656 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
657 {
658 	int		rv, retval = EWOULDBLOCK;
659 	int 		retries;
660 
661 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
662 	for (retries = 0; retries < ldc_max_retries; retries++) {
663 
664 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
665 			retval = 0;
666 			break;
667 		}
668 		if (rv != H_EWOULDBLOCK) {
669 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
670 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
671 			retval = EIO;
672 			break;
673 		}
674 
675 		/* wait for ldc_delay usecs */
676 		drv_usecwait(ldc_delay);
677 	}
678 	return (retval);
679 }
680 
681 /*
682  * Send a LDC message
683  */
684 static int
685 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
686     uint8_t ctrlmsg)
687 {
688 	int		rv;
689 	ldc_msg_t 	*pkt;
690 	uint64_t	tx_tail;
691 	uint32_t	curr_seqid = ldcp->last_msg_snt;
692 
693 	/* Obtain Tx lock */
694 	mutex_enter(&ldcp->tx_lock);
695 
696 	/* get the current tail for the message */
697 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
698 	if (rv) {
699 		DWARN(ldcp->id,
700 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
701 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
702 		    ldcp->id, pkttype, subtype, ctrlmsg);
703 		mutex_exit(&ldcp->tx_lock);
704 		return (rv);
705 	}
706 
707 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
708 	ZERO_PKT(pkt);
709 
710 	/* Initialize the packet */
711 	pkt->type = pkttype;
712 	pkt->stype = subtype;
713 	pkt->ctrl = ctrlmsg;
714 
715 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
716 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
717 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
718 		curr_seqid++;
719 		if (ldcp->mode != LDC_MODE_RAW) {
720 			pkt->seqid = curr_seqid;
721 			pkt->ackid = ldcp->last_msg_rcd;
722 		}
723 	}
724 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
725 
726 	/* initiate the send by calling into HV and set the new tail */
727 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
728 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
729 
730 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
731 	if (rv) {
732 		DWARN(ldcp->id,
733 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
734 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
735 		    ldcp->id, pkttype, subtype, ctrlmsg);
736 		mutex_exit(&ldcp->tx_lock);
737 		return (EIO);
738 	}
739 
740 	ldcp->last_msg_snt = curr_seqid;
741 	ldcp->tx_tail = tx_tail;
742 
743 	mutex_exit(&ldcp->tx_lock);
744 	return (0);
745 }
746 
747 /*
748  * Checks if packet was received in right order
749  * in the case of a reliable link.
750  * Returns 0 if in order, else EIO
751  */
752 static int
753 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
754 {
755 	/* No seqid checking for RAW mode */
756 	if (ldcp->mode == LDC_MODE_RAW)
757 		return (0);
758 
759 	/* No seqid checking for version, RTS, RTR message */
760 	if (msg->ctrl == LDC_VER ||
761 	    msg->ctrl == LDC_RTS ||
762 	    msg->ctrl == LDC_RTR)
763 		return (0);
764 
765 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
766 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
767 		DWARN(ldcp->id,
768 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
769 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
770 		    (ldcp->last_msg_rcd + 1));
771 		return (EIO);
772 	}
773 
774 	return (0);
775 }
776 
777 
778 /*
779  * Process an incoming version ctrl message
780  */
781 static int
782 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
783 {
784 	int 		rv = 0, idx = ldcp->next_vidx;
785 	ldc_msg_t 	*pkt;
786 	uint64_t	tx_tail;
787 	ldc_ver_t	*rcvd_ver;
788 
789 	/* get the received version */
790 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
791 
792 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
793 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
794 
795 	/* Obtain Tx lock */
796 	mutex_enter(&ldcp->tx_lock);
797 
798 	switch (msg->stype) {
799 	case LDC_INFO:
800 
801 		/* get the current tail and pkt for the response */
802 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
803 		if (rv != 0) {
804 			DWARN(ldcp->id,
805 			    "i_ldc_process_VER: (0x%llx) err sending "
806 			    "version ACK/NACK\n", ldcp->id);
807 			i_ldc_reset(ldcp);
808 			mutex_exit(&ldcp->tx_lock);
809 			return (ECONNRESET);
810 		}
811 
812 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
813 		ZERO_PKT(pkt);
814 
815 		/* initialize the packet */
816 		pkt->type = LDC_CTRL;
817 		pkt->ctrl = LDC_VER;
818 
819 		for (;;) {
820 
821 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
822 			    rcvd_ver->major, rcvd_ver->minor,
823 			    ldc_versions[idx].major, ldc_versions[idx].minor);
824 
825 			if (rcvd_ver->major == ldc_versions[idx].major) {
826 				/* major version match - ACK version */
827 				pkt->stype = LDC_ACK;
828 
829 				/*
830 				 * lower minor version to the one this endpt
831 				 * supports, if necessary
832 				 */
833 				if (rcvd_ver->minor > ldc_versions[idx].minor)
834 					rcvd_ver->minor =
835 						ldc_versions[idx].minor;
836 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
837 
838 				break;
839 			}
840 
841 			if (rcvd_ver->major > ldc_versions[idx].major) {
842 
843 				D1(ldcp->id, "i_ldc_process_VER: using next"
844 				    " lower idx=%d, v%u.%u\n", idx,
845 				    ldc_versions[idx].major,
846 				    ldc_versions[idx].minor);
847 
848 				/* nack with next lower version */
849 				pkt->stype = LDC_NACK;
850 				bcopy(&ldc_versions[idx], pkt->udata,
851 				    sizeof (ldc_versions[idx]));
852 				ldcp->next_vidx = idx;
853 				break;
854 			}
855 
856 			/* next major version */
857 			idx++;
858 
859 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
860 
861 			if (idx == LDC_NUM_VERS) {
862 				/* no version match - send NACK */
863 				pkt->stype = LDC_NACK;
864 				bzero(pkt->udata, sizeof (ldc_ver_t));
865 				ldcp->next_vidx = 0;
866 				break;
867 			}
868 		}
869 
870 		/* initiate the send by calling into HV and set the new tail */
871 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
872 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
873 
874 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
875 		if (rv == 0) {
876 			ldcp->tx_tail = tx_tail;
877 			if (pkt->stype == LDC_ACK) {
878 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
879 				    " version ACK\n", ldcp->id);
880 				/* Save the ACK'd version */
881 				ldcp->version.major = rcvd_ver->major;
882 				ldcp->version.minor = rcvd_ver->minor;
883 				ldcp->hstate |= TS_RCVD_VER;
884 				ldcp->tstate |= TS_VER_DONE;
885 				DWARN(DBG_ALL_LDCS,
886 				    "(0x%llx) Agreed on version v%u.%u\n",
887 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
888 			}
889 		} else {
890 			DWARN(ldcp->id,
891 			    "i_ldc_process_VER: (0x%llx) error sending "
892 			    "ACK/NACK\n", ldcp->id);
893 			i_ldc_reset(ldcp);
894 			mutex_exit(&ldcp->tx_lock);
895 			return (ECONNRESET);
896 		}
897 
898 		break;
899 
900 	case LDC_ACK:
901 		/* SUCCESS - we have agreed on a version */
902 		ldcp->version.major = rcvd_ver->major;
903 		ldcp->version.minor = rcvd_ver->minor;
904 		ldcp->tstate |= TS_VER_DONE;
905 
906 		D1(DBG_ALL_LDCS, "(0x%llx) Agreed on version v%u.%u\n",
907 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
908 
909 		/* initiate RTS-RTR-RDX handshake */
910 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
911 		if (rv) {
912 			DWARN(ldcp->id,
913 			    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
914 			    ldcp->id);
915 			i_ldc_reset(ldcp);
916 			mutex_exit(&ldcp->tx_lock);
917 			return (ECONNRESET);
918 		}
919 
920 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
921 		ZERO_PKT(pkt);
922 
923 		pkt->type = LDC_CTRL;
924 		pkt->stype = LDC_INFO;
925 		pkt->ctrl = LDC_RTS;
926 		pkt->env = ldcp->mode;
927 		if (ldcp->mode != LDC_MODE_RAW)
928 			pkt->seqid = LDC_INIT_SEQID;
929 
930 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
931 
932 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
933 
934 		/* initiate the send by calling into HV and set the new tail */
935 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
936 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
937 
938 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
939 		if (rv) {
940 			D2(ldcp->id,
941 			    "i_ldc_process_VER: (0x%llx) no listener\n",
942 			    ldcp->id);
943 			i_ldc_reset(ldcp);
944 			mutex_exit(&ldcp->tx_lock);
945 			return (ECONNRESET);
946 		}
947 
948 		ldcp->tx_tail = tx_tail;
949 		ldcp->hstate |= TS_SENT_RTS;
950 
951 		break;
952 
953 	case LDC_NACK:
954 		/* check if version in NACK is zero */
955 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
956 			/* version handshake failure */
957 			DWARN(DBG_ALL_LDCS,
958 			    "i_ldc_process_VER: (0x%llx) no version match\n",
959 			    ldcp->id);
960 			i_ldc_reset(ldcp);
961 			mutex_exit(&ldcp->tx_lock);
962 			return (ECONNRESET);
963 		}
964 
965 		/* get the current tail and pkt for the response */
966 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
967 		if (rv != 0) {
968 			cmn_err(CE_NOTE,
969 			    "i_ldc_process_VER: (0x%lx) err sending "
970 			    "version ACK/NACK\n", ldcp->id);
971 			i_ldc_reset(ldcp);
972 			mutex_exit(&ldcp->tx_lock);
973 			return (ECONNRESET);
974 		}
975 
976 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
977 		ZERO_PKT(pkt);
978 
979 		/* initialize the packet */
980 		pkt->type = LDC_CTRL;
981 		pkt->ctrl = LDC_VER;
982 		pkt->stype = LDC_INFO;
983 
984 		/* check ver in NACK msg has a match */
985 		for (;;) {
986 			if (rcvd_ver->major == ldc_versions[idx].major) {
987 				/*
988 				 * major version match - resubmit request
989 				 * if lower minor version to the one this endpt
990 				 * supports, if necessary
991 				 */
992 				if (rcvd_ver->minor > ldc_versions[idx].minor)
993 					rcvd_ver->minor =
994 						ldc_versions[idx].minor;
995 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
996 				break;
997 
998 			}
999 
1000 			if (rcvd_ver->major > ldc_versions[idx].major) {
1001 
1002 				D1(ldcp->id, "i_ldc_process_VER: using next"
1003 				    " lower idx=%d, v%u.%u\n", idx,
1004 				    ldc_versions[idx].major,
1005 				    ldc_versions[idx].minor);
1006 
1007 				/* send next lower version */
1008 				bcopy(&ldc_versions[idx], pkt->udata,
1009 				    sizeof (ldc_versions[idx]));
1010 				ldcp->next_vidx = idx;
1011 				break;
1012 			}
1013 
1014 			/* next version */
1015 			idx++;
1016 
1017 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
1018 
1019 			if (idx == LDC_NUM_VERS) {
1020 				/* no version match - terminate */
1021 				ldcp->next_vidx = 0;
1022 				mutex_exit(&ldcp->tx_lock);
1023 				return (ECONNRESET);
1024 			}
1025 		}
1026 
1027 		/* initiate the send by calling into HV and set the new tail */
1028 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1029 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1030 
1031 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1032 		if (rv == 0) {
1033 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1034 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1035 			    ldc_versions[idx].minor);
1036 			ldcp->tx_tail = tx_tail;
1037 		} else {
1038 			cmn_err(CE_NOTE,
1039 			    "i_ldc_process_VER: (0x%lx) error sending version"
1040 			    "INFO\n", ldcp->id);
1041 			i_ldc_reset(ldcp);
1042 			mutex_exit(&ldcp->tx_lock);
1043 			return (ECONNRESET);
1044 		}
1045 
1046 		break;
1047 	}
1048 
1049 	mutex_exit(&ldcp->tx_lock);
1050 	return (rv);
1051 }
1052 
1053 
1054 /*
1055  * Process an incoming RTS ctrl message
1056  */
1057 static int
1058 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1059 {
1060 	int 		rv = 0;
1061 	ldc_msg_t 	*pkt;
1062 	uint64_t	tx_tail;
1063 	boolean_t	sent_NACK = B_FALSE;
1064 
1065 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1066 
1067 	switch (msg->stype) {
1068 	case LDC_NACK:
1069 		DWARN(ldcp->id,
1070 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1071 		    ldcp->id);
1072 
1073 		/* Reset the channel -- as we cannot continue */
1074 		mutex_enter(&ldcp->tx_lock);
1075 		i_ldc_reset(ldcp);
1076 		mutex_exit(&ldcp->tx_lock);
1077 		rv = ECONNRESET;
1078 		break;
1079 
1080 	case LDC_INFO:
1081 
1082 		/* check mode */
1083 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1084 			cmn_err(CE_NOTE,
1085 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1086 			    ldcp->id);
1087 			/*
1088 			 * send NACK in response to MODE message
1089 			 * get the current tail for the response
1090 			 */
1091 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1092 			if (rv) {
1093 				/* if cannot send NACK - reset channel */
1094 				mutex_enter(&ldcp->tx_lock);
1095 				i_ldc_reset(ldcp);
1096 				mutex_exit(&ldcp->tx_lock);
1097 				rv = ECONNRESET;
1098 				break;
1099 			}
1100 			sent_NACK = B_TRUE;
1101 		}
1102 		break;
1103 	default:
1104 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1105 		    ldcp->id);
1106 		mutex_enter(&ldcp->tx_lock);
1107 		i_ldc_reset(ldcp);
1108 		mutex_exit(&ldcp->tx_lock);
1109 		rv = ECONNRESET;
1110 		break;
1111 	}
1112 
1113 	/*
1114 	 * If either the connection was reset (when rv != 0) or
1115 	 * a NACK was sent, we return. In the case of a NACK
1116 	 * we dont want to consume the packet that came in but
1117 	 * not record that we received the RTS
1118 	 */
1119 	if (rv || sent_NACK)
1120 		return (rv);
1121 
1122 	/* record RTS received */
1123 	ldcp->hstate |= TS_RCVD_RTS;
1124 
1125 	/* store initial SEQID info */
1126 	ldcp->last_msg_snt = msg->seqid;
1127 
1128 	/* Obtain Tx lock */
1129 	mutex_enter(&ldcp->tx_lock);
1130 
1131 	/* get the current tail for the response */
1132 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1133 	if (rv != 0) {
1134 		cmn_err(CE_NOTE,
1135 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1136 		    ldcp->id);
1137 		i_ldc_reset(ldcp);
1138 		mutex_exit(&ldcp->tx_lock);
1139 		return (ECONNRESET);
1140 	}
1141 
1142 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1143 	ZERO_PKT(pkt);
1144 
1145 	/* initialize the packet */
1146 	pkt->type = LDC_CTRL;
1147 	pkt->stype = LDC_INFO;
1148 	pkt->ctrl = LDC_RTR;
1149 	pkt->env = ldcp->mode;
1150 	if (ldcp->mode != LDC_MODE_RAW)
1151 		pkt->seqid = LDC_INIT_SEQID;
1152 
1153 	ldcp->last_msg_rcd = msg->seqid;
1154 
1155 	/* initiate the send by calling into HV and set the new tail */
1156 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1157 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1158 
1159 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1160 	if (rv == 0) {
1161 		D2(ldcp->id,
1162 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1163 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1164 
1165 		ldcp->tx_tail = tx_tail;
1166 		ldcp->hstate |= TS_SENT_RTR;
1167 
1168 	} else {
1169 		cmn_err(CE_NOTE,
1170 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1171 		    ldcp->id);
1172 		i_ldc_reset(ldcp);
1173 		mutex_exit(&ldcp->tx_lock);
1174 		return (ECONNRESET);
1175 	}
1176 
1177 	mutex_exit(&ldcp->tx_lock);
1178 	return (0);
1179 }
1180 
1181 /*
1182  * Process an incoming RTR ctrl message
1183  */
1184 static int
1185 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1186 {
1187 	int 		rv = 0;
1188 	boolean_t	sent_NACK = B_FALSE;
1189 
1190 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1191 
1192 	switch (msg->stype) {
1193 	case LDC_NACK:
1194 		/* RTR NACK received */
1195 		DWARN(ldcp->id,
1196 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1197 		    ldcp->id);
1198 
1199 		/* Reset the channel -- as we cannot continue */
1200 		mutex_enter(&ldcp->tx_lock);
1201 		i_ldc_reset(ldcp);
1202 		mutex_exit(&ldcp->tx_lock);
1203 		rv = ECONNRESET;
1204 
1205 		break;
1206 
1207 	case LDC_INFO:
1208 
1209 		/* check mode */
1210 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1211 			DWARN(ldcp->id,
1212 			    "i_ldc_process_RTR: (0x%llx) mode mismatch\n",
1213 			    ldcp->id);
1214 			/*
1215 			 * send NACK in response to MODE message
1216 			 * get the current tail for the response
1217 			 */
1218 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1219 			if (rv) {
1220 				/* if cannot send NACK - reset channel */
1221 				mutex_enter(&ldcp->tx_lock);
1222 				i_ldc_reset(ldcp);
1223 				mutex_exit(&ldcp->tx_lock);
1224 				rv = ECONNRESET;
1225 				break;
1226 			}
1227 			sent_NACK = B_TRUE;
1228 		}
1229 		break;
1230 
1231 	default:
1232 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1233 		    ldcp->id);
1234 
1235 		/* Reset the channel -- as we cannot continue */
1236 		mutex_enter(&ldcp->tx_lock);
1237 		i_ldc_reset(ldcp);
1238 		mutex_exit(&ldcp->tx_lock);
1239 		rv = ECONNRESET;
1240 		break;
1241 	}
1242 
1243 	/*
1244 	 * If either the connection was reset (when rv != 0) or
1245 	 * a NACK was sent, we return. In the case of a NACK
1246 	 * we dont want to consume the packet that came in but
1247 	 * not record that we received the RTR
1248 	 */
1249 	if (rv || sent_NACK)
1250 		return (rv);
1251 
1252 	ldcp->last_msg_snt = msg->seqid;
1253 	ldcp->hstate |= TS_RCVD_RTR;
1254 
1255 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1256 	if (rv) {
1257 		cmn_err(CE_NOTE,
1258 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1259 		    ldcp->id);
1260 		mutex_enter(&ldcp->tx_lock);
1261 		i_ldc_reset(ldcp);
1262 		mutex_exit(&ldcp->tx_lock);
1263 		return (ECONNRESET);
1264 	}
1265 	D2(ldcp->id,
1266 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1267 
1268 	ldcp->hstate |= TS_SENT_RDX;
1269 	ldcp->tstate |= TS_HSHAKE_DONE;
1270 	ldcp->status = LDC_UP;
1271 
1272 	DWARN(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1273 
1274 	return (0);
1275 }
1276 
1277 
1278 /*
1279  * Process an incoming RDX ctrl message
1280  */
1281 static int
1282 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1283 {
1284 	int	rv = 0;
1285 
1286 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1287 
1288 	switch (msg->stype) {
1289 	case LDC_NACK:
1290 		/* RDX NACK received */
1291 		DWARN(ldcp->id,
1292 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1293 		    ldcp->id);
1294 
1295 		/* Reset the channel -- as we cannot continue */
1296 		mutex_enter(&ldcp->tx_lock);
1297 		i_ldc_reset(ldcp);
1298 		mutex_exit(&ldcp->tx_lock);
1299 		rv = ECONNRESET;
1300 
1301 		break;
1302 
1303 	case LDC_INFO:
1304 
1305 		/*
1306 		 * if channel is UP and a RDX received after data transmission
1307 		 * has commenced it is an error
1308 		 */
1309 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1310 			DWARN(DBG_ALL_LDCS,
1311 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1312 			    " - LDC reset\n", ldcp->id);
1313 			mutex_enter(&ldcp->tx_lock);
1314 			i_ldc_reset(ldcp);
1315 			mutex_exit(&ldcp->tx_lock);
1316 			return (ECONNRESET);
1317 		}
1318 
1319 		ldcp->hstate |= TS_RCVD_RDX;
1320 		ldcp->tstate |= TS_HSHAKE_DONE;
1321 		ldcp->status = LDC_UP;
1322 
1323 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1324 		break;
1325 
1326 	default:
1327 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1328 		    ldcp->id);
1329 
1330 		/* Reset the channel -- as we cannot continue */
1331 		mutex_enter(&ldcp->tx_lock);
1332 		i_ldc_reset(ldcp);
1333 		mutex_exit(&ldcp->tx_lock);
1334 		rv = ECONNRESET;
1335 		break;
1336 	}
1337 
1338 	return (rv);
1339 }
1340 
1341 /*
1342  * Process an incoming ACK for a data packet
1343  */
1344 static int
1345 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1346 {
1347 	int		rv;
1348 	uint64_t 	tx_head;
1349 	ldc_msg_t	*pkt;
1350 
1351 	/* Obtain Tx lock */
1352 	mutex_enter(&ldcp->tx_lock);
1353 
1354 	/*
1355 	 * Read the current Tx head and tail
1356 	 */
1357 	rv = hv_ldc_tx_get_state(ldcp->id,
1358 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1359 	if (rv != 0) {
1360 		cmn_err(CE_WARN,
1361 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1362 		    ldcp->id);
1363 
1364 		/* Reset the channel -- as we cannot continue */
1365 		i_ldc_reset(ldcp);
1366 		mutex_exit(&ldcp->tx_lock);
1367 		return (ECONNRESET);
1368 	}
1369 
1370 	/*
1371 	 * loop from where the previous ACK location was to the
1372 	 * current head location. This is how far the HV has
1373 	 * actually send pkts. Pkts between head and tail are
1374 	 * yet to be sent by HV.
1375 	 */
1376 	tx_head = ldcp->tx_ackd_head;
1377 	for (;;) {
1378 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1379 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1380 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1381 
1382 		if (pkt->seqid == msg->ackid) {
1383 			D2(ldcp->id,
1384 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1385 			    ldcp->id);
1386 			ldcp->last_ack_rcd = msg->ackid;
1387 			ldcp->tx_ackd_head = tx_head;
1388 			break;
1389 		}
1390 		if (tx_head == ldcp->tx_head) {
1391 			/* could not find packet */
1392 			DWARN(ldcp->id,
1393 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1394 			    ldcp->id);
1395 
1396 			/* Reset the channel -- as we cannot continue */
1397 			i_ldc_reset(ldcp);
1398 			mutex_exit(&ldcp->tx_lock);
1399 			return (ECONNRESET);
1400 		}
1401 	}
1402 
1403 	mutex_exit(&ldcp->tx_lock);
1404 	return (0);
1405 }
1406 
1407 /*
1408  * Process incoming control message
1409  * Return 0 - session can continue
1410  *        EAGAIN - reprocess packet - state was changed
1411  *	  ECONNRESET - channel was reset
1412  */
1413 static int
1414 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1415 {
1416 	int 		rv = 0;
1417 
1418 	switch (ldcp->tstate) {
1419 
1420 	case TS_OPEN:
1421 	case TS_READY:
1422 
1423 		switch (msg->ctrl & LDC_CTRL_MASK) {
1424 		case LDC_VER:
1425 			/* process version message */
1426 			rv = i_ldc_process_VER(ldcp, msg);
1427 			break;
1428 		default:
1429 			DWARN(ldcp->id,
1430 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1431 			    "tstate=0x%x\n", ldcp->id,
1432 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1433 			break;
1434 		}
1435 
1436 		break;
1437 
1438 	case TS_VREADY:
1439 
1440 		switch (msg->ctrl & LDC_CTRL_MASK) {
1441 		case LDC_VER:
1442 			/* peer is redoing version negotiation */
1443 			mutex_enter(&ldcp->tx_lock);
1444 			(void) i_ldc_txq_reconf(ldcp);
1445 			i_ldc_reset_state(ldcp);
1446 			mutex_exit(&ldcp->tx_lock);
1447 			rv = EAGAIN;
1448 			break;
1449 		case LDC_RTS:
1450 			/* process RTS message */
1451 			rv = i_ldc_process_RTS(ldcp, msg);
1452 			break;
1453 		case LDC_RTR:
1454 			/* process RTR message */
1455 			rv = i_ldc_process_RTR(ldcp, msg);
1456 			break;
1457 		case LDC_RDX:
1458 			/* process RDX message */
1459 			rv = i_ldc_process_RDX(ldcp, msg);
1460 			break;
1461 		default:
1462 			DWARN(ldcp->id,
1463 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1464 			    "tstate=0x%x\n", ldcp->id,
1465 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1466 			break;
1467 		}
1468 
1469 		break;
1470 
1471 	case TS_UP:
1472 
1473 		switch (msg->ctrl & LDC_CTRL_MASK) {
1474 		case LDC_VER:
1475 			DWARN(ldcp->id,
1476 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1477 			    "- LDC reset\n", ldcp->id);
1478 			/* peer is redoing version negotiation */
1479 			mutex_enter(&ldcp->tx_lock);
1480 			(void) i_ldc_txq_reconf(ldcp);
1481 			i_ldc_reset_state(ldcp);
1482 			mutex_exit(&ldcp->tx_lock);
1483 			rv = EAGAIN;
1484 			break;
1485 
1486 		case LDC_RDX:
1487 			/* process RDX message */
1488 			rv = i_ldc_process_RDX(ldcp, msg);
1489 			break;
1490 
1491 		default:
1492 			DWARN(ldcp->id,
1493 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1494 			    "tstate=0x%x\n", ldcp->id,
1495 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1496 			break;
1497 		}
1498 	}
1499 
1500 	return (rv);
1501 }
1502 
1503 /*
1504  * Register channel with the channel nexus
1505  */
1506 static int
1507 i_ldc_register_channel(ldc_chan_t *ldcp)
1508 {
1509 	int		rv = 0;
1510 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1511 
1512 	if (cinfo->dip == NULL) {
1513 		DWARN(ldcp->id,
1514 		    "i_ldc_register_channel: cnex has not registered\n");
1515 		return (EAGAIN);
1516 	}
1517 
1518 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1519 	if (rv) {
1520 		DWARN(ldcp->id,
1521 		    "i_ldc_register_channel: cannot register channel\n");
1522 		return (rv);
1523 	}
1524 
1525 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1526 	    i_ldc_tx_hdlr, ldcp, NULL);
1527 	if (rv) {
1528 		DWARN(ldcp->id,
1529 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1530 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1531 		return (rv);
1532 	}
1533 
1534 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1535 	    i_ldc_rx_hdlr, ldcp, NULL);
1536 	if (rv) {
1537 		DWARN(ldcp->id,
1538 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1539 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1540 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1541 		return (rv);
1542 	}
1543 
1544 	ldcp->tstate |= TS_CNEX_RDY;
1545 
1546 	return (0);
1547 }
1548 
1549 /*
1550  * Unregister a channel with the channel nexus
1551  */
1552 static int
1553 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1554 {
1555 	int		rv = 0;
1556 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1557 
1558 	if (cinfo->dip == NULL) {
1559 		DWARN(ldcp->id,
1560 		    "i_ldc_unregister_channel: cnex has not registered\n");
1561 		return (EAGAIN);
1562 	}
1563 
1564 	if (ldcp->tstate & TS_CNEX_RDY) {
1565 
1566 		/* Remove the Rx interrupt */
1567 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1568 		if (rv) {
1569 			DWARN(ldcp->id,
1570 			    "i_ldc_unregister_channel: err removing Rx intr\n");
1571 			return (rv);
1572 		}
1573 
1574 		/* Remove the Tx interrupt */
1575 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1576 		if (rv) {
1577 			DWARN(ldcp->id,
1578 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1579 			return (rv);
1580 		}
1581 
1582 		/* Unregister the channel */
1583 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1584 		if (rv) {
1585 			DWARN(ldcp->id,
1586 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1587 			return (rv);
1588 		}
1589 
1590 		ldcp->tstate &= ~TS_CNEX_RDY;
1591 	}
1592 
1593 	return (0);
1594 }
1595 
1596 
1597 /*
1598  * LDC transmit interrupt handler
1599  *    triggered for chanel up/down/reset events
1600  *    and Tx queue content changes
1601  */
1602 static uint_t
1603 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1604 {
1605 	_NOTE(ARGUNUSED(arg2))
1606 
1607 	int 		rv;
1608 	ldc_chan_t 	*ldcp;
1609 	boolean_t 	notify_client = B_FALSE;
1610 	uint64_t	notify_event = 0;
1611 
1612 	/* Get the channel for which interrupt was received */
1613 	ASSERT(arg1 != NULL);
1614 	ldcp = (ldc_chan_t *)arg1;
1615 
1616 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1617 	    ldcp->id, ldcp);
1618 
1619 	/* Lock channel */
1620 	mutex_enter(&ldcp->lock);
1621 
1622 	/* Obtain Tx lock */
1623 	mutex_enter(&ldcp->tx_lock);
1624 
1625 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1626 	    &ldcp->link_state);
1627 	if (rv) {
1628 		cmn_err(CE_WARN,
1629 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1630 		    ldcp->id, rv);
1631 		mutex_exit(&ldcp->tx_lock);
1632 		mutex_exit(&ldcp->lock);
1633 		return (DDI_INTR_CLAIMED);
1634 	}
1635 
1636 	/*
1637 	 * reset the channel state if the channel went down
1638 	 * (other side unconfigured queue) or channel was reset
1639 	 * (other side reconfigured its queue)
1640 	 */
1641 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1642 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1643 		i_ldc_reset(ldcp);
1644 		notify_client = B_TRUE;
1645 		notify_event = LDC_EVT_DOWN;
1646 	}
1647 
1648 	if (ldcp->link_state == LDC_CHANNEL_RESET) {
1649 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1650 		i_ldc_reset(ldcp);
1651 		notify_client = B_TRUE;
1652 		notify_event = LDC_EVT_RESET;
1653 	}
1654 
1655 	if (ldcp->tstate == TS_OPEN && ldcp->link_state == LDC_CHANNEL_UP) {
1656 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1657 		notify_client = B_TRUE;
1658 		notify_event = LDC_EVT_RESET;
1659 		ldcp->tstate |= TS_LINK_READY;
1660 		ldcp->status = LDC_READY;
1661 	}
1662 
1663 	/* if callbacks are disabled, do not notify */
1664 	if (!ldcp->cb_enabled)
1665 		notify_client = B_FALSE;
1666 
1667 	if (notify_client)
1668 		ldcp->cb_inprogress = B_TRUE;
1669 
1670 	/* Unlock channel */
1671 	mutex_exit(&ldcp->tx_lock);
1672 	mutex_exit(&ldcp->lock);
1673 
1674 	if (notify_client) {
1675 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1676 		if (rv) {
1677 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1678 			    "failure", ldcp->id);
1679 		}
1680 		mutex_enter(&ldcp->lock);
1681 		ldcp->cb_inprogress = B_FALSE;
1682 		mutex_exit(&ldcp->lock);
1683 	}
1684 
1685 	mutex_enter(&ldcp->lock);
1686 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1687 	mutex_exit(&ldcp->lock);
1688 
1689 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1690 
1691 	return (DDI_INTR_CLAIMED);
1692 }
1693 
1694 /*
1695  * LDC receive interrupt handler
1696  *    triggered for channel with data pending to read
1697  *    i.e. Rx queue content changes
1698  */
1699 static uint_t
1700 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1701 {
1702 	_NOTE(ARGUNUSED(arg2))
1703 
1704 	int		rv;
1705 	uint64_t 	rx_head, rx_tail;
1706 	ldc_msg_t 	*msg;
1707 	ldc_chan_t 	*ldcp;
1708 	boolean_t 	notify_client = B_FALSE;
1709 	uint64_t	notify_event = 0;
1710 	uint64_t	first_fragment = 0;
1711 
1712 	/* Get the channel for which interrupt was received */
1713 	if (arg1 == NULL) {
1714 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1715 		return (DDI_INTR_UNCLAIMED);
1716 	}
1717 
1718 	ldcp = (ldc_chan_t *)arg1;
1719 
1720 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1721 	    ldcp->id, ldcp);
1722 
1723 	/* Lock channel */
1724 	mutex_enter(&ldcp->lock);
1725 
1726 	/* mark interrupt as pending */
1727 	ldcp->intr_pending = B_TRUE;
1728 
1729 	/*
1730 	 * Read packet(s) from the queue
1731 	 */
1732 	for (;;) {
1733 
1734 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1735 		    &ldcp->link_state);
1736 		if (rv) {
1737 			cmn_err(CE_WARN,
1738 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1739 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1740 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1741 			mutex_exit(&ldcp->lock);
1742 			return (DDI_INTR_CLAIMED);
1743 		}
1744 
1745 		/*
1746 		 * reset the channel state if the channel went down
1747 		 * (other side unconfigured queue) or channel was reset
1748 		 * (other side reconfigured its queue
1749 		 */
1750 		if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1751 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link down\n",
1752 			    ldcp->id);
1753 			mutex_enter(&ldcp->tx_lock);
1754 			i_ldc_reset(ldcp);
1755 			mutex_exit(&ldcp->tx_lock);
1756 			notify_client = B_TRUE;
1757 			notify_event = LDC_EVT_DOWN;
1758 			break;
1759 		}
1760 		if (ldcp->link_state == LDC_CHANNEL_RESET) {
1761 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link reset\n",
1762 			    ldcp->id);
1763 			mutex_enter(&ldcp->tx_lock);
1764 			i_ldc_reset(ldcp);
1765 			mutex_exit(&ldcp->tx_lock);
1766 			notify_client = B_TRUE;
1767 			notify_event = LDC_EVT_RESET;
1768 		}
1769 
1770 		if (ldcp->tstate == TS_OPEN &&
1771 		    ldcp->link_state == LDC_CHANNEL_UP) {
1772 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link up\n",
1773 			    ldcp->id);
1774 			notify_client = B_TRUE;
1775 			notify_event = LDC_EVT_RESET;
1776 			ldcp->tstate |= TS_LINK_READY;
1777 			ldcp->status = LDC_READY;
1778 		}
1779 
1780 		if (rx_head == rx_tail) {
1781 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1782 			    ldcp->id);
1783 			break;
1784 		}
1785 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1786 		    rx_head, rx_tail);
1787 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1788 		    ldcp->rx_q_va + rx_head);
1789 
1790 		/* get the message */
1791 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1792 
1793 		/* if channel is in RAW mode or data pkt, notify and return */
1794 		if (ldcp->mode == LDC_MODE_RAW) {
1795 			notify_client = B_TRUE;
1796 			notify_event |= LDC_EVT_READ;
1797 			break;
1798 		}
1799 
1800 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1801 
1802 			/* discard packet if channel is not up */
1803 			if (ldcp->tstate != TS_UP) {
1804 
1805 				/* move the head one position */
1806 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1807 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1808 
1809 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1810 					break;
1811 
1812 				continue;
1813 			} else {
1814 				notify_client = B_TRUE;
1815 				notify_event |= LDC_EVT_READ;
1816 				break;
1817 			}
1818 		}
1819 
1820 		/* Check the sequence ID for the message received */
1821 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
1822 
1823 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
1824 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
1825 
1826 			/* Reset last_msg_rcd to start of message */
1827 			if (first_fragment != 0) {
1828 				ldcp->last_msg_rcd = first_fragment - 1;
1829 				first_fragment = 0;
1830 			}
1831 
1832 			/*
1833 			 * Send a NACK due to seqid mismatch
1834 			 */
1835 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
1836 			    (msg->ctrl & LDC_CTRL_MASK));
1837 
1838 			if (rv) {
1839 				cmn_err(CE_NOTE,
1840 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
1841 				    "CTRL/NACK msg\n", ldcp->id);
1842 
1843 				/* if cannot send NACK - reset channel */
1844 				mutex_enter(&ldcp->tx_lock);
1845 				i_ldc_reset(ldcp);
1846 				mutex_exit(&ldcp->tx_lock);
1847 				rv = ECONNRESET;
1848 				break;
1849 			}
1850 
1851 			/* purge receive queue */
1852 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
1853 			break;
1854 		}
1855 
1856 		/* record the message ID */
1857 		ldcp->last_msg_rcd = msg->seqid;
1858 
1859 		/* process control messages */
1860 		if (msg->type & LDC_CTRL) {
1861 			/* save current internal state */
1862 			uint64_t tstate = ldcp->tstate;
1863 
1864 			rv = i_ldc_ctrlmsg(ldcp, msg);
1865 			if (rv == EAGAIN) {
1866 				/* re-process pkt - state was adjusted */
1867 				continue;
1868 			}
1869 			if (rv == ECONNRESET) {
1870 				notify_client = B_TRUE;
1871 				notify_event = LDC_EVT_RESET;
1872 				break;
1873 			}
1874 
1875 			/*
1876 			 * control message processing was successful
1877 			 * channel transitioned to ready for communication
1878 			 */
1879 			if (rv == 0 && ldcp->tstate == TS_UP &&
1880 			    tstate != ldcp->tstate) {
1881 				notify_client = B_TRUE;
1882 				notify_event = LDC_EVT_UP;
1883 			}
1884 		}
1885 
1886 		/* process data ACKs */
1887 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
1888 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
1889 				notify_client = B_TRUE;
1890 				notify_event = LDC_EVT_RESET;
1891 				break;
1892 			}
1893 		}
1894 
1895 		/* move the head one position */
1896 		rx_head = (rx_head + LDC_PACKET_SIZE) %
1897 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1898 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
1899 			notify_client = B_TRUE;
1900 			notify_event = LDC_EVT_RESET;
1901 			break;
1902 		}
1903 
1904 	} /* for */
1905 
1906 	/* if callbacks are disabled, do not notify */
1907 	if (!ldcp->cb_enabled)
1908 		notify_client = B_FALSE;
1909 
1910 	if (notify_client)
1911 		ldcp->cb_inprogress = B_TRUE;
1912 
1913 	/* Unlock channel */
1914 	mutex_exit(&ldcp->lock);
1915 
1916 	if (notify_client) {
1917 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1918 		if (rv) {
1919 			DWARN(ldcp->id,
1920 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1921 			    ldcp->id);
1922 		}
1923 		mutex_enter(&ldcp->lock);
1924 		ldcp->cb_inprogress = B_FALSE;
1925 		mutex_exit(&ldcp->lock);
1926 	}
1927 
1928 	mutex_enter(&ldcp->lock);
1929 
1930 	/*
1931 	 * If there are data packets in the queue, the ldc_read will
1932 	 * clear interrupts after draining the queue, else clear interrupts
1933 	 */
1934 	if ((notify_event & LDC_EVT_READ) == 0) {
1935 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1936 	}
1937 
1938 	mutex_exit(&ldcp->lock);
1939 
1940 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1941 	return (DDI_INTR_CLAIMED);
1942 }
1943 
1944 
1945 /* -------------------------------------------------------------------------- */
1946 
1947 /*
1948  * LDC API functions
1949  */
1950 
1951 /*
1952  * Initialize the channel. Allocate internal structure and memory for
1953  * TX/RX queues, and initialize locks.
1954  */
1955 int
1956 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
1957 {
1958 	ldc_chan_t 	*ldcp;
1959 	int		rv, exit_val;
1960 	uint64_t	ra_base, nentries;
1961 	uint64_t	qlen;
1962 
1963 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
1964 
1965 	if (attr == NULL) {
1966 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
1967 		return (EINVAL);
1968 	}
1969 	if (handle == NULL) {
1970 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
1971 		return (EINVAL);
1972 	}
1973 
1974 	/* check if channel is valid */
1975 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
1976 	if (rv == H_ECHANNEL) {
1977 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
1978 		return (EINVAL);
1979 	}
1980 
1981 	/* check if the channel has already been initialized */
1982 	mutex_enter(&ldcssp->lock);
1983 	ldcp = ldcssp->chan_list;
1984 	while (ldcp != NULL) {
1985 		if (ldcp->id == id) {
1986 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
1987 			    id);
1988 			mutex_exit(&ldcssp->lock);
1989 			return (EADDRINUSE);
1990 		}
1991 		ldcp = ldcp->next;
1992 	}
1993 	mutex_exit(&ldcssp->lock);
1994 
1995 	ASSERT(ldcp == NULL);
1996 
1997 	*handle = 0;
1998 
1999 	/* Allocate an ldcp structure */
2000 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
2001 
2002 	/*
2003 	 * Initialize the channel and Tx lock
2004 	 *
2005 	 * The channel 'lock' protects the entire channel and
2006 	 * should be acquired before initializing, resetting,
2007 	 * destroying or reading from a channel.
2008 	 *
2009 	 * The 'tx_lock' should be acquired prior to transmitting
2010 	 * data over the channel. The lock should also be acquired
2011 	 * prior to channel reconfiguration (in order to prevent
2012 	 * concurrent writes).
2013 	 *
2014 	 * ORDERING: When both locks are being acquired, to prevent
2015 	 * deadlocks, the channel lock should be always acquired prior
2016 	 * to the tx_lock.
2017 	 */
2018 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
2019 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
2020 
2021 	/* Initialize the channel */
2022 	ldcp->id = id;
2023 	ldcp->cb = NULL;
2024 	ldcp->cb_arg = NULL;
2025 	ldcp->cb_inprogress = B_FALSE;
2026 	ldcp->cb_enabled = B_FALSE;
2027 	ldcp->next = NULL;
2028 
2029 	/* Read attributes */
2030 	ldcp->mode = attr->mode;
2031 	ldcp->devclass = attr->devclass;
2032 	ldcp->devinst = attr->instance;
2033 	ldcp->mtu = (attr->mtu > 0) ? attr->mtu : LDC_DEFAULT_MTU;
2034 
2035 	D1(ldcp->id,
2036 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2037 	    "instance=0x%llx, mode=%d, mtu=%d\n",
2038 	    ldcp->id, ldcp->devclass, ldcp->devinst, ldcp->mode, ldcp->mtu);
2039 
2040 	ldcp->next_vidx = 0;
2041 	ldcp->tstate = 0;
2042 	ldcp->hstate = 0;
2043 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2044 	ldcp->last_ack_rcd = 0;
2045 	ldcp->last_msg_rcd = 0;
2046 
2047 	ldcp->stream_bufferp = NULL;
2048 	ldcp->exp_dring_list = NULL;
2049 	ldcp->imp_dring_list = NULL;
2050 	ldcp->mhdl_list = NULL;
2051 
2052 	/* Initialize payload size depending on whether channel is reliable */
2053 	switch (ldcp->mode) {
2054 	case LDC_MODE_RAW:
2055 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2056 		ldcp->read_p = i_ldc_read_raw;
2057 		ldcp->write_p = i_ldc_write_raw;
2058 		break;
2059 	case LDC_MODE_UNRELIABLE:
2060 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2061 		ldcp->read_p = i_ldc_read_packet;
2062 		ldcp->write_p = i_ldc_write_packet;
2063 		break;
2064 	case LDC_MODE_RELIABLE:
2065 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2066 		ldcp->read_p = i_ldc_read_packet;
2067 		ldcp->write_p = i_ldc_write_packet;
2068 		break;
2069 	case LDC_MODE_STREAM:
2070 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2071 
2072 		ldcp->stream_remains = 0;
2073 		ldcp->stream_offset = 0;
2074 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2075 		ldcp->read_p = i_ldc_read_stream;
2076 		ldcp->write_p = i_ldc_write_stream;
2077 		break;
2078 	default:
2079 		exit_val = EINVAL;
2080 		goto cleanup_on_exit;
2081 	}
2082 
2083 	/*
2084 	 * qlen is (mtu * ldc_mtu_msgs) / pkt_payload. If this
2085 	 * value is smaller than default length of ldc_queue_entries,
2086 	 * qlen is set to ldc_queue_entries..
2087 	 */
2088 	qlen = (ldcp->mtu * ldc_mtu_msgs) / ldcp->pkt_payload;
2089 	ldcp->rx_q_entries =
2090 		(qlen < ldc_queue_entries) ? ldc_queue_entries : qlen;
2091 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2092 
2093 	D1(ldcp->id, "ldc_init: queue length = 0x%llx\n", qlen);
2094 
2095 	/* Create a transmit queue */
2096 	ldcp->tx_q_va = (uint64_t)
2097 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2098 	if (ldcp->tx_q_va == NULL) {
2099 		cmn_err(CE_WARN,
2100 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2101 		    ldcp->id);
2102 		exit_val = ENOMEM;
2103 		goto cleanup_on_exit;
2104 	}
2105 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2106 
2107 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2108 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2109 
2110 	ldcp->tstate |= TS_TXQ_RDY;
2111 
2112 	/* Create a receive queue */
2113 	ldcp->rx_q_va = (uint64_t)
2114 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2115 	if (ldcp->rx_q_va == NULL) {
2116 		cmn_err(CE_WARN,
2117 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2118 		    ldcp->id);
2119 		exit_val = ENOMEM;
2120 		goto cleanup_on_exit;
2121 	}
2122 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2123 
2124 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2125 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2126 
2127 	ldcp->tstate |= TS_RXQ_RDY;
2128 
2129 	/* Init descriptor ring and memory handle list lock */
2130 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2131 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2132 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2133 
2134 	/* mark status as INITialized */
2135 	ldcp->status = LDC_INIT;
2136 
2137 	/* Add to channel list */
2138 	mutex_enter(&ldcssp->lock);
2139 	ldcp->next = ldcssp->chan_list;
2140 	ldcssp->chan_list = ldcp;
2141 	ldcssp->channel_count++;
2142 	mutex_exit(&ldcssp->lock);
2143 
2144 	/* set the handle */
2145 	*handle = (ldc_handle_t)ldcp;
2146 
2147 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2148 
2149 	return (0);
2150 
2151 cleanup_on_exit:
2152 
2153 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2154 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2155 
2156 	if (ldcp->tstate & TS_TXQ_RDY)
2157 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2158 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2159 
2160 	if (ldcp->tstate & TS_RXQ_RDY)
2161 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2162 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2163 
2164 	mutex_destroy(&ldcp->tx_lock);
2165 	mutex_destroy(&ldcp->lock);
2166 
2167 	if (ldcp)
2168 		kmem_free(ldcp, sizeof (ldc_chan_t));
2169 
2170 	return (exit_val);
2171 }
2172 
2173 /*
2174  * Finalizes the LDC connection. It will return EBUSY if the
2175  * channel is open. A ldc_close() has to be done prior to
2176  * a ldc_fini operation. It frees TX/RX queues, associated
2177  * with the channel
2178  */
2179 int
2180 ldc_fini(ldc_handle_t handle)
2181 {
2182 	ldc_chan_t 	*ldcp;
2183 	ldc_chan_t 	*tmp_ldcp;
2184 	uint64_t 	id;
2185 
2186 	if (handle == NULL) {
2187 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2188 		return (EINVAL);
2189 	}
2190 	ldcp = (ldc_chan_t *)handle;
2191 	id = ldcp->id;
2192 
2193 	mutex_enter(&ldcp->lock);
2194 
2195 	if (ldcp->tstate > TS_INIT) {
2196 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2197 		    ldcp->id);
2198 		mutex_exit(&ldcp->lock);
2199 		return (EBUSY);
2200 	}
2201 
2202 	/* Remove from the channel list */
2203 	mutex_enter(&ldcssp->lock);
2204 	tmp_ldcp = ldcssp->chan_list;
2205 	if (tmp_ldcp == ldcp) {
2206 		ldcssp->chan_list = ldcp->next;
2207 		ldcp->next = NULL;
2208 	} else {
2209 		while (tmp_ldcp != NULL) {
2210 			if (tmp_ldcp->next == ldcp) {
2211 				tmp_ldcp->next = ldcp->next;
2212 				ldcp->next = NULL;
2213 				break;
2214 			}
2215 			tmp_ldcp = tmp_ldcp->next;
2216 		}
2217 		if (tmp_ldcp == NULL) {
2218 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2219 			mutex_exit(&ldcssp->lock);
2220 			mutex_exit(&ldcp->lock);
2221 			return (EINVAL);
2222 		}
2223 	}
2224 
2225 	ldcssp->channel_count--;
2226 
2227 	mutex_exit(&ldcssp->lock);
2228 
2229 	/* Free the map table for this channel */
2230 	if (ldcp->mtbl) {
2231 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2232 		contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2233 		mutex_destroy(&ldcp->mtbl->lock);
2234 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2235 	}
2236 
2237 	/* Destroy descriptor ring and memory handle list lock */
2238 	mutex_destroy(&ldcp->exp_dlist_lock);
2239 	mutex_destroy(&ldcp->imp_dlist_lock);
2240 	mutex_destroy(&ldcp->mlist_lock);
2241 
2242 	/* Free the stream buffer for STREAM_MODE */
2243 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2244 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2245 
2246 	/* Free the RX queue */
2247 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2248 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2249 	ldcp->tstate &= ~TS_RXQ_RDY;
2250 
2251 	/* Free the TX queue */
2252 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2253 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2254 	ldcp->tstate &= ~TS_TXQ_RDY;
2255 
2256 
2257 	mutex_exit(&ldcp->lock);
2258 
2259 	/* Destroy mutex */
2260 	mutex_destroy(&ldcp->tx_lock);
2261 	mutex_destroy(&ldcp->lock);
2262 
2263 	/* free channel structure */
2264 	kmem_free(ldcp, sizeof (ldc_chan_t));
2265 
2266 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2267 
2268 	return (0);
2269 }
2270 
2271 /*
2272  * Open the LDC channel for use. It registers the TX/RX queues
2273  * with the Hypervisor. It also specifies the interrupt number
2274  * and target CPU for this channel
2275  */
2276 int
2277 ldc_open(ldc_handle_t handle)
2278 {
2279 	ldc_chan_t 	*ldcp;
2280 	int 		rv;
2281 
2282 	if (handle == NULL) {
2283 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2284 		return (EINVAL);
2285 	}
2286 
2287 	ldcp = (ldc_chan_t *)handle;
2288 
2289 	mutex_enter(&ldcp->lock);
2290 
2291 	if (ldcp->tstate < TS_INIT) {
2292 		DWARN(ldcp->id,
2293 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2294 		mutex_exit(&ldcp->lock);
2295 		return (EFAULT);
2296 	}
2297 	if (ldcp->tstate >= TS_OPEN) {
2298 		DWARN(ldcp->id,
2299 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2300 		mutex_exit(&ldcp->lock);
2301 		return (EFAULT);
2302 	}
2303 
2304 	/*
2305 	 * Unregister/Register the tx queue with the hypervisor
2306 	 */
2307 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2308 	if (rv) {
2309 		cmn_err(CE_WARN,
2310 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2311 		    ldcp->id);
2312 		mutex_exit(&ldcp->lock);
2313 		return (EIO);
2314 	}
2315 
2316 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2317 	if (rv) {
2318 		cmn_err(CE_WARN,
2319 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2320 		    ldcp->id);
2321 		mutex_exit(&ldcp->lock);
2322 		return (EIO);
2323 	}
2324 
2325 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2326 	    ldcp->id);
2327 
2328 	/*
2329 	 * Unregister/Register the rx queue with the hypervisor
2330 	 */
2331 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2332 	if (rv) {
2333 		cmn_err(CE_WARN,
2334 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2335 		    ldcp->id);
2336 		mutex_exit(&ldcp->lock);
2337 		return (EIO);
2338 	}
2339 
2340 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2341 	if (rv) {
2342 		cmn_err(CE_WARN,
2343 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2344 		    ldcp->id);
2345 		mutex_exit(&ldcp->lock);
2346 		return (EIO);
2347 	}
2348 
2349 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2350 	    ldcp->id);
2351 
2352 	ldcp->tstate |= TS_QCONF_RDY;
2353 
2354 	/* Register the channel with the channel nexus */
2355 	rv = i_ldc_register_channel(ldcp);
2356 	if (rv && rv != EAGAIN) {
2357 		cmn_err(CE_WARN,
2358 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2359 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2360 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2361 		mutex_exit(&ldcp->lock);
2362 		return (EIO);
2363 	}
2364 
2365 	/* mark channel in OPEN state */
2366 	ldcp->status = LDC_OPEN;
2367 
2368 	/* Read channel state */
2369 	rv = hv_ldc_tx_get_state(ldcp->id,
2370 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2371 	if (rv) {
2372 		cmn_err(CE_WARN,
2373 		    "ldc_open: (0x%lx) cannot read channel state\n",
2374 		    ldcp->id);
2375 		(void) i_ldc_unregister_channel(ldcp);
2376 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2377 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2378 		mutex_exit(&ldcp->lock);
2379 		return (EIO);
2380 	}
2381 
2382 	/*
2383 	 * set the ACKd head to current head location for reliable &
2384 	 * streaming mode
2385 	 */
2386 	ldcp->tx_ackd_head = ldcp->tx_head;
2387 
2388 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2389 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2390 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2391 		ldcp->tstate |= TS_LINK_READY;
2392 		ldcp->status = LDC_READY;
2393 	}
2394 
2395 	/*
2396 	 * if channel is being opened in RAW mode - no handshake is needed
2397 	 * switch the channel READY and UP state
2398 	 */
2399 	if (ldcp->mode == LDC_MODE_RAW) {
2400 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2401 		ldcp->status = LDC_UP;
2402 	}
2403 
2404 	mutex_exit(&ldcp->lock);
2405 
2406 	/*
2407 	 * Increment number of open channels
2408 	 */
2409 	mutex_enter(&ldcssp->lock);
2410 	ldcssp->channels_open++;
2411 	mutex_exit(&ldcssp->lock);
2412 
2413 	D1(ldcp->id,
2414 	    "ldc_open: (0x%llx) channel (0x%p) open for use (tstate=0x%x)\n",
2415 	    ldcp->id, ldcp, ldcp->tstate);
2416 
2417 	return (0);
2418 }
2419 
2420 /*
2421  * Close the LDC connection. It will return EBUSY if there
2422  * are memory segments or descriptor rings either bound to or
2423  * mapped over the channel
2424  */
2425 int
2426 ldc_close(ldc_handle_t handle)
2427 {
2428 	ldc_chan_t 	*ldcp;
2429 	int		rv = 0, retries = 0;
2430 	boolean_t	chk_done = B_FALSE;
2431 
2432 	if (handle == NULL) {
2433 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2434 		return (EINVAL);
2435 	}
2436 	ldcp = (ldc_chan_t *)handle;
2437 
2438 	mutex_enter(&ldcp->lock);
2439 
2440 	/* return error if channel is not open */
2441 	if (ldcp->tstate < TS_OPEN) {
2442 		DWARN(ldcp->id,
2443 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2444 		mutex_exit(&ldcp->lock);
2445 		return (EFAULT);
2446 	}
2447 
2448 	/* if any memory handles, drings, are bound or mapped cannot close */
2449 	if (ldcp->mhdl_list != NULL) {
2450 		DWARN(ldcp->id,
2451 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2452 		    ldcp->id);
2453 		mutex_exit(&ldcp->lock);
2454 		return (EBUSY);
2455 	}
2456 	if (ldcp->exp_dring_list != NULL) {
2457 		DWARN(ldcp->id,
2458 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2459 		    ldcp->id);
2460 		mutex_exit(&ldcp->lock);
2461 		return (EBUSY);
2462 	}
2463 	if (ldcp->imp_dring_list != NULL) {
2464 		DWARN(ldcp->id,
2465 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2466 		    ldcp->id);
2467 		mutex_exit(&ldcp->lock);
2468 		return (EBUSY);
2469 	}
2470 
2471 	/* Obtain Tx lock */
2472 	mutex_enter(&ldcp->tx_lock);
2473 
2474 	/*
2475 	 * Wait for pending transmits to complete i.e Tx queue to drain
2476 	 * if there are pending pkts - wait 1 ms and retry again
2477 	 */
2478 	for (;;) {
2479 
2480 		rv = hv_ldc_tx_get_state(ldcp->id,
2481 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2482 		if (rv) {
2483 			cmn_err(CE_WARN,
2484 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2485 			mutex_exit(&ldcp->tx_lock);
2486 			mutex_exit(&ldcp->lock);
2487 			return (EIO);
2488 		}
2489 
2490 		if (ldcp->tx_head == ldcp->tx_tail ||
2491 		    ldcp->link_state != LDC_CHANNEL_UP) {
2492 			break;
2493 		}
2494 
2495 		if (chk_done) {
2496 			DWARN(ldcp->id,
2497 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2498 			    ldcp->id);
2499 			break;
2500 		}
2501 
2502 		/* wait for one ms and try again */
2503 		delay(drv_usectohz(1000));
2504 		chk_done = B_TRUE;
2505 	}
2506 
2507 	/*
2508 	 * Unregister the channel with the nexus
2509 	 */
2510 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2511 
2512 		mutex_exit(&ldcp->tx_lock);
2513 		mutex_exit(&ldcp->lock);
2514 
2515 		/* if any error other than EAGAIN return back */
2516 		if (rv != EAGAIN || retries >= LDC_MAX_RETRIES) {
2517 			cmn_err(CE_WARN,
2518 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2519 			    ldcp->id, rv);
2520 			return (rv);
2521 		}
2522 
2523 		/*
2524 		 * As there could be pending interrupts we need
2525 		 * to wait and try again
2526 		 */
2527 		drv_usecwait(LDC_DELAY);
2528 		mutex_enter(&ldcp->lock);
2529 		mutex_enter(&ldcp->tx_lock);
2530 		retries++;
2531 	}
2532 
2533 	/*
2534 	 * Unregister queues
2535 	 */
2536 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2537 	if (rv) {
2538 		cmn_err(CE_WARN,
2539 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2540 		    ldcp->id);
2541 		mutex_exit(&ldcp->tx_lock);
2542 		mutex_exit(&ldcp->lock);
2543 		return (EIO);
2544 	}
2545 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2546 	if (rv) {
2547 		cmn_err(CE_WARN,
2548 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2549 		    ldcp->id);
2550 		mutex_exit(&ldcp->tx_lock);
2551 		mutex_exit(&ldcp->lock);
2552 		return (EIO);
2553 	}
2554 
2555 	ldcp->tstate &= ~TS_QCONF_RDY;
2556 
2557 	/* Reset channel state information */
2558 	i_ldc_reset_state(ldcp);
2559 
2560 	/* Mark channel as down and in initialized state */
2561 	ldcp->tx_ackd_head = 0;
2562 	ldcp->tx_head = 0;
2563 	ldcp->tstate = TS_INIT;
2564 	ldcp->status = LDC_INIT;
2565 
2566 	mutex_exit(&ldcp->tx_lock);
2567 	mutex_exit(&ldcp->lock);
2568 
2569 	/* Decrement number of open channels */
2570 	mutex_enter(&ldcssp->lock);
2571 	ldcssp->channels_open--;
2572 	mutex_exit(&ldcssp->lock);
2573 
2574 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2575 
2576 	return (0);
2577 }
2578 
2579 /*
2580  * Register channel callback
2581  */
2582 int
2583 ldc_reg_callback(ldc_handle_t handle,
2584     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2585 {
2586 	ldc_chan_t *ldcp;
2587 
2588 	if (handle == NULL) {
2589 		DWARN(DBG_ALL_LDCS,
2590 		    "ldc_reg_callback: invalid channel handle\n");
2591 		return (EINVAL);
2592 	}
2593 	if (((uint64_t)cb) < KERNELBASE) {
2594 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2595 		return (EINVAL);
2596 	}
2597 	ldcp = (ldc_chan_t *)handle;
2598 
2599 	mutex_enter(&ldcp->lock);
2600 
2601 	if (ldcp->cb) {
2602 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2603 		    ldcp->id);
2604 		mutex_exit(&ldcp->lock);
2605 		return (EIO);
2606 	}
2607 	if (ldcp->cb_inprogress) {
2608 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2609 		    ldcp->id);
2610 		mutex_exit(&ldcp->lock);
2611 		return (EWOULDBLOCK);
2612 	}
2613 
2614 	ldcp->cb = cb;
2615 	ldcp->cb_arg = arg;
2616 	ldcp->cb_enabled = B_TRUE;
2617 
2618 	D1(ldcp->id,
2619 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2620 	    ldcp->id);
2621 
2622 	mutex_exit(&ldcp->lock);
2623 
2624 	return (0);
2625 }
2626 
2627 /*
2628  * Unregister channel callback
2629  */
2630 int
2631 ldc_unreg_callback(ldc_handle_t handle)
2632 {
2633 	ldc_chan_t *ldcp;
2634 
2635 	if (handle == NULL) {
2636 		DWARN(DBG_ALL_LDCS,
2637 		    "ldc_unreg_callback: invalid channel handle\n");
2638 		return (EINVAL);
2639 	}
2640 	ldcp = (ldc_chan_t *)handle;
2641 
2642 	mutex_enter(&ldcp->lock);
2643 
2644 	if (ldcp->cb == NULL) {
2645 		DWARN(ldcp->id,
2646 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2647 		    ldcp->id);
2648 		mutex_exit(&ldcp->lock);
2649 		return (EIO);
2650 	}
2651 	if (ldcp->cb_inprogress) {
2652 		DWARN(ldcp->id,
2653 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2654 		    ldcp->id);
2655 		mutex_exit(&ldcp->lock);
2656 		return (EWOULDBLOCK);
2657 	}
2658 
2659 	ldcp->cb = NULL;
2660 	ldcp->cb_arg = NULL;
2661 	ldcp->cb_enabled = B_FALSE;
2662 
2663 	D1(ldcp->id,
2664 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2665 	    ldcp->id);
2666 
2667 	mutex_exit(&ldcp->lock);
2668 
2669 	return (0);
2670 }
2671 
2672 
2673 /*
2674  * Bring a channel up by initiating a handshake with the peer
2675  * This call is asynchronous. It will complete at a later point
2676  * in time when the peer responds back with an RTR.
2677  */
2678 int
2679 ldc_up(ldc_handle_t handle)
2680 {
2681 	int 		rv;
2682 	ldc_chan_t 	*ldcp;
2683 	ldc_msg_t 	*ldcmsg;
2684 	uint64_t 	tx_tail;
2685 
2686 	if (handle == NULL) {
2687 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2688 		return (EINVAL);
2689 	}
2690 	ldcp = (ldc_chan_t *)handle;
2691 
2692 	mutex_enter(&ldcp->lock);
2693 
2694 	if (ldcp->tstate == TS_UP) {
2695 		D2(ldcp->id,
2696 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2697 		    ldcp->id);
2698 		mutex_exit(&ldcp->lock);
2699 		return (0);
2700 	}
2701 
2702 	/* if the channel is in RAW mode - mark it as UP, if READY */
2703 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2704 		ldcp->tstate = TS_UP;
2705 		mutex_exit(&ldcp->lock);
2706 		return (0);
2707 	}
2708 
2709 	/* Don't start another handshake if there is one in progress */
2710 	if (ldcp->hstate) {
2711 		D2(ldcp->id,
2712 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2713 		    ldcp->id);
2714 		mutex_exit(&ldcp->lock);
2715 		return (0);
2716 	}
2717 
2718 	mutex_enter(&ldcp->tx_lock);
2719 
2720 	/* get the current tail for the LDC msg */
2721 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2722 	if (rv) {
2723 		DWARN(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2724 		    ldcp->id);
2725 		mutex_exit(&ldcp->tx_lock);
2726 		mutex_exit(&ldcp->lock);
2727 		return (ECONNREFUSED);
2728 	}
2729 
2730 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
2731 	ZERO_PKT(ldcmsg);
2732 
2733 	ldcmsg->type = LDC_CTRL;
2734 	ldcmsg->stype = LDC_INFO;
2735 	ldcmsg->ctrl = LDC_VER;
2736 	ldcp->next_vidx = 0;
2737 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
2738 
2739 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
2740 
2741 	/* initiate the send by calling into HV and set the new tail */
2742 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
2743 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2744 
2745 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
2746 	if (rv) {
2747 		DWARN(ldcp->id,
2748 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
2749 		    ldcp->id, rv);
2750 		mutex_exit(&ldcp->tx_lock);
2751 		mutex_exit(&ldcp->lock);
2752 		return (rv);
2753 	}
2754 
2755 	ldcp->hstate |= TS_SENT_VER;
2756 	ldcp->tx_tail = tx_tail;
2757 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
2758 
2759 	mutex_exit(&ldcp->tx_lock);
2760 	mutex_exit(&ldcp->lock);
2761 
2762 	return (rv);
2763 }
2764 
2765 
2766 /*
2767  * Bring a channel down by resetting its state and queues
2768  */
2769 int
2770 ldc_down(ldc_handle_t handle)
2771 {
2772 	ldc_chan_t 	*ldcp;
2773 
2774 	if (handle == NULL) {
2775 		DWARN(DBG_ALL_LDCS, "ldc_down: invalid channel handle\n");
2776 		return (EINVAL);
2777 	}
2778 	ldcp = (ldc_chan_t *)handle;
2779 
2780 	mutex_enter(&ldcp->lock);
2781 	mutex_enter(&ldcp->tx_lock);
2782 	i_ldc_reset(ldcp);
2783 	mutex_exit(&ldcp->tx_lock);
2784 	mutex_exit(&ldcp->lock);
2785 
2786 	return (0);
2787 }
2788 
2789 /*
2790  * Get the current channel status
2791  */
2792 int
2793 ldc_status(ldc_handle_t handle, ldc_status_t *status)
2794 {
2795 	ldc_chan_t *ldcp;
2796 
2797 	if (handle == NULL || status == NULL) {
2798 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
2799 		return (EINVAL);
2800 	}
2801 	ldcp = (ldc_chan_t *)handle;
2802 
2803 	*status = ((ldc_chan_t *)handle)->status;
2804 
2805 	D1(ldcp->id,
2806 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
2807 	return (0);
2808 }
2809 
2810 
2811 /*
2812  * Set the channel's callback mode - enable/disable callbacks
2813  */
2814 int
2815 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
2816 {
2817 	ldc_chan_t 	*ldcp;
2818 
2819 	if (handle == NULL) {
2820 		DWARN(DBG_ALL_LDCS,
2821 		    "ldc_set_intr_mode: invalid channel handle\n");
2822 		return (EINVAL);
2823 	}
2824 	ldcp = (ldc_chan_t *)handle;
2825 
2826 	/*
2827 	 * Record no callbacks should be invoked
2828 	 */
2829 	mutex_enter(&ldcp->lock);
2830 
2831 	switch (cmode) {
2832 	case LDC_CB_DISABLE:
2833 		if (!ldcp->cb_enabled) {
2834 			DWARN(ldcp->id,
2835 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
2836 			    ldcp->id);
2837 			break;
2838 		}
2839 		ldcp->cb_enabled = B_FALSE;
2840 
2841 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
2842 		    ldcp->id);
2843 		break;
2844 
2845 	case LDC_CB_ENABLE:
2846 		if (ldcp->cb_enabled) {
2847 			DWARN(ldcp->id,
2848 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
2849 			    ldcp->id);
2850 			break;
2851 		}
2852 		ldcp->cb_enabled = B_TRUE;
2853 
2854 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
2855 		    ldcp->id);
2856 		break;
2857 	}
2858 
2859 	mutex_exit(&ldcp->lock);
2860 
2861 	return (0);
2862 }
2863 
2864 /*
2865  * Check to see if there are packets on the incoming queue
2866  * Will return hasdata = B_FALSE if there are no packets
2867  */
2868 int
2869 ldc_chkq(ldc_handle_t handle, boolean_t *hasdata)
2870 {
2871 	int 		rv;
2872 	uint64_t 	rx_head, rx_tail;
2873 	ldc_chan_t 	*ldcp;
2874 
2875 	if (handle == NULL) {
2876 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
2877 		return (EINVAL);
2878 	}
2879 	ldcp = (ldc_chan_t *)handle;
2880 
2881 	*hasdata = B_FALSE;
2882 
2883 	mutex_enter(&ldcp->lock);
2884 
2885 	if (ldcp->tstate != TS_UP) {
2886 		D1(ldcp->id,
2887 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
2888 		mutex_exit(&ldcp->lock);
2889 		return (ECONNRESET);
2890 	}
2891 
2892 	/* Read packet(s) from the queue */
2893 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2894 	    &ldcp->link_state);
2895 	if (rv != 0) {
2896 		cmn_err(CE_WARN,
2897 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
2898 		mutex_exit(&ldcp->lock);
2899 		return (EIO);
2900 	}
2901 	/* reset the channel state if the channel went down */
2902 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
2903 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2904 		mutex_enter(&ldcp->tx_lock);
2905 		i_ldc_reset(ldcp);
2906 		mutex_exit(&ldcp->tx_lock);
2907 		mutex_exit(&ldcp->lock);
2908 		return (ECONNRESET);
2909 	}
2910 
2911 	if (rx_head != rx_tail) {
2912 		D1(ldcp->id, "ldc_chkq: (0x%llx) queue has pkt(s)\n", ldcp->id);
2913 		*hasdata = B_TRUE;
2914 	}
2915 
2916 	mutex_exit(&ldcp->lock);
2917 
2918 	return (0);
2919 }
2920 
2921 
2922 /*
2923  * Read 'size' amount of bytes or less. If incoming buffer
2924  * is more than 'size', ENOBUFS is returned.
2925  *
2926  * On return, size contains the number of bytes read.
2927  */
2928 int
2929 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
2930 {
2931 	ldc_chan_t 	*ldcp;
2932 	uint64_t 	rx_head = 0, rx_tail = 0;
2933 	int		rv = 0, exit_val;
2934 
2935 	if (handle == NULL) {
2936 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
2937 		return (EINVAL);
2938 	}
2939 
2940 	ldcp = (ldc_chan_t *)handle;
2941 
2942 	/* channel lock */
2943 	mutex_enter(&ldcp->lock);
2944 
2945 	if (ldcp->tstate != TS_UP) {
2946 		DWARN(ldcp->id,
2947 		    "ldc_read: (0x%llx) channel is not in UP state\n",
2948 		    ldcp->id);
2949 		exit_val = ECONNRESET;
2950 	} else {
2951 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
2952 	}
2953 
2954 	/*
2955 	 * if queue has been drained - clear interrupt
2956 	 */
2957 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2958 	    &ldcp->link_state);
2959 	if (exit_val == 0 && rv == 0 && rx_head == rx_tail) {
2960 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2961 	}
2962 
2963 	mutex_exit(&ldcp->lock);
2964 	return (exit_val);
2965 }
2966 
2967 /*
2968  * Basic raw mondo read -
2969  * no interpretation of mondo contents at all.
2970  *
2971  * Enter and exit with ldcp->lock held by caller
2972  */
2973 static int
2974 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
2975 {
2976 	uint64_t 	q_size_mask;
2977 	ldc_msg_t 	*msgp;
2978 	uint8_t		*msgbufp;
2979 	int		rv = 0, space;
2980 	uint64_t 	rx_head, rx_tail;
2981 
2982 	space = *sizep;
2983 
2984 	if (space < LDC_PAYLOAD_SIZE_RAW)
2985 		return (ENOBUFS);
2986 
2987 	ASSERT(mutex_owned(&ldcp->lock));
2988 
2989 	/* compute mask for increment */
2990 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
2991 
2992 	/*
2993 	 * Read packet(s) from the queue
2994 	 */
2995 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2996 	    &ldcp->link_state);
2997 	if (rv != 0) {
2998 		cmn_err(CE_WARN,
2999 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
3000 		    ldcp->id);
3001 		return (EIO);
3002 	}
3003 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
3004 		" rxt=0x%llx, st=0x%llx\n",
3005 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
3006 
3007 	/* reset the channel state if the channel went down */
3008 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
3009 		mutex_enter(&ldcp->tx_lock);
3010 		i_ldc_reset(ldcp);
3011 		mutex_exit(&ldcp->tx_lock);
3012 		return (ECONNRESET);
3013 	}
3014 
3015 	/*
3016 	 * Check for empty queue
3017 	 */
3018 	if (rx_head == rx_tail) {
3019 		*sizep = 0;
3020 		return (0);
3021 	}
3022 
3023 	/* get the message */
3024 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
3025 
3026 	/* if channel is in RAW mode, copy data and return */
3027 	msgbufp = (uint8_t *)&(msgp->raw[0]);
3028 
3029 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3030 
3031 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3032 
3033 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3034 
3035 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3036 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3037 
3038 	return (rv);
3039 }
3040 
3041 /*
3042  * Process LDC mondos to build larger packets
3043  * with either un-reliable or reliable delivery.
3044  *
3045  * Enter and exit with ldcp->lock held by caller
3046  */
3047 static int
3048 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3049 {
3050 	int		rv = 0;
3051 	uint64_t 	rx_head = 0, rx_tail = 0;
3052 	uint64_t 	curr_head = 0;
3053 	ldc_msg_t 	*msg;
3054 	caddr_t 	target;
3055 	size_t 		len = 0, bytes_read = 0;
3056 	int 		retries = 0;
3057 	uint64_t 	q_size_mask;
3058 	uint64_t	first_fragment = 0;
3059 
3060 	target = target_bufp;
3061 
3062 	ASSERT(mutex_owned(&ldcp->lock));
3063 
3064 	/* compute mask for increment */
3065 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3066 
3067 	/*
3068 	 * Read packet(s) from the queue
3069 	 */
3070 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3071 	    &ldcp->link_state);
3072 	if (rv != 0) {
3073 		cmn_err(CE_WARN,
3074 		    "ldc_read: (0x%lx) unable to read queue ptrs",
3075 		    ldcp->id);
3076 		return (EIO);
3077 	}
3078 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3079 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3080 
3081 	/* reset the channel state if the channel went down */
3082 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
3083 		mutex_enter(&ldcp->tx_lock);
3084 		i_ldc_reset(ldcp);
3085 		mutex_exit(&ldcp->tx_lock);
3086 		return (ECONNRESET);
3087 	}
3088 
3089 	for (;;) {
3090 
3091 		if (curr_head == rx_tail) {
3092 			rv = hv_ldc_rx_get_state(ldcp->id,
3093 			    &rx_head, &rx_tail, &ldcp->link_state);
3094 			if (rv != 0) {
3095 				cmn_err(CE_WARN,
3096 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3097 				    ldcp->id);
3098 				return (EIO);
3099 			}
3100 			/* reset the channel state if the channel went down */
3101 			if (ldcp->link_state == LDC_CHANNEL_DOWN) {
3102 				mutex_enter(&ldcp->tx_lock);
3103 				i_ldc_reset(ldcp);
3104 				mutex_exit(&ldcp->tx_lock);
3105 				return (ECONNRESET);
3106 			}
3107 		}
3108 
3109 		if (curr_head == rx_tail) {
3110 
3111 			/* If in the middle of a fragmented xfer */
3112 			if (first_fragment != 0) {
3113 
3114 				/* wait for ldc_delay usecs */
3115 				drv_usecwait(ldc_delay);
3116 
3117 				if (++retries < ldc_max_retries)
3118 					continue;
3119 
3120 				*sizep = 0;
3121 				ldcp->last_msg_rcd = first_fragment - 1;
3122 				DWARN(DBG_ALL_LDCS,
3123 					"ldc_read: (0x%llx) read timeout",
3124 					ldcp->id);
3125 				return (ETIMEDOUT);
3126 			}
3127 			*sizep = 0;
3128 			break;
3129 		}
3130 		retries = 0;
3131 
3132 		D2(ldcp->id,
3133 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3134 		    ldcp->id, curr_head, rx_head, rx_tail);
3135 
3136 		/* get the message */
3137 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3138 
3139 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3140 		    ldcp->rx_q_va + curr_head);
3141 
3142 		/* Check the message ID for the message received */
3143 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3144 
3145 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3146 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3147 
3148 			/* throw away data */
3149 			bytes_read = 0;
3150 
3151 			/* Reset last_msg_rcd to start of message */
3152 			if (first_fragment != 0) {
3153 				ldcp->last_msg_rcd = first_fragment - 1;
3154 				first_fragment = 0;
3155 			}
3156 			/*
3157 			 * Send a NACK -- invalid seqid
3158 			 * get the current tail for the response
3159 			 */
3160 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3161 			    (msg->ctrl & LDC_CTRL_MASK));
3162 			if (rv) {
3163 				cmn_err(CE_NOTE,
3164 				    "ldc_read: (0x%lx) err sending "
3165 				    "NACK msg\n", ldcp->id);
3166 
3167 				/* if cannot send NACK - reset channel */
3168 				mutex_enter(&ldcp->tx_lock);
3169 				i_ldc_reset(ldcp);
3170 				mutex_exit(&ldcp->tx_lock);
3171 				rv = ECONNRESET;
3172 				break;
3173 			}
3174 
3175 			/* purge receive queue */
3176 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3177 
3178 			break;
3179 		}
3180 
3181 		/*
3182 		 * Process any messages of type CTRL messages
3183 		 * Future implementations should try to pass these
3184 		 * to LDC link by resetting the intr state.
3185 		 *
3186 		 * NOTE: not done as a switch() as type can be both ctrl+data
3187 		 */
3188 		if (msg->type & LDC_CTRL) {
3189 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3190 				if (rv == EAGAIN)
3191 					continue;
3192 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3193 				*sizep = 0;
3194 				bytes_read = 0;
3195 				break;
3196 			}
3197 		}
3198 
3199 		/* process data ACKs */
3200 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3201 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3202 				*sizep = 0;
3203 				bytes_read = 0;
3204 				break;
3205 			}
3206 		}
3207 
3208 		/* process data messages */
3209 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3210 
3211 			uint8_t *msgbuf = (uint8_t *)(
3212 				(ldcp->mode == LDC_MODE_RELIABLE ||
3213 				ldcp->mode == LDC_MODE_STREAM)
3214 				? msg->rdata : msg->udata);
3215 
3216 			D2(ldcp->id,
3217 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3218 
3219 			/* get the packet length */
3220 			len = (msg->env & LDC_LEN_MASK);
3221 
3222 				/*
3223 				 * FUTURE OPTIMIZATION:
3224 				 * dont need to set q head for every
3225 				 * packet we read just need to do this when
3226 				 * we are done or need to wait for more
3227 				 * mondos to make a full packet - this is
3228 				 * currently expensive.
3229 				 */
3230 
3231 			if (first_fragment == 0) {
3232 
3233 				/*
3234 				 * first packets should always have the start
3235 				 * bit set (even for a single packet). If not
3236 				 * throw away the packet
3237 				 */
3238 				if (!(msg->env & LDC_FRAG_START)) {
3239 
3240 					DWARN(DBG_ALL_LDCS,
3241 					    "ldc_read: (0x%llx) not start - "
3242 					    "frag=%x\n", ldcp->id,
3243 					    (msg->env) & LDC_FRAG_MASK);
3244 
3245 					/* toss pkt, inc head, cont reading */
3246 					bytes_read = 0;
3247 					target = target_bufp;
3248 					curr_head =
3249 						(curr_head + LDC_PACKET_SIZE)
3250 						& q_size_mask;
3251 					if (rv = i_ldc_set_rx_head(ldcp,
3252 						curr_head))
3253 						break;
3254 
3255 					continue;
3256 				}
3257 
3258 				first_fragment = msg->seqid;
3259 			} else {
3260 				/* check to see if this is a pkt w/ START bit */
3261 				if (msg->env & LDC_FRAG_START) {
3262 					DWARN(DBG_ALL_LDCS,
3263 					    "ldc_read:(0x%llx) unexpected pkt"
3264 					    " env=0x%x discarding %d bytes,"
3265 					    " lastmsg=%d, currentmsg=%d\n",
3266 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3267 					    bytes_read, ldcp->last_msg_rcd,
3268 					    msg->seqid);
3269 
3270 					/* throw data we have read so far */
3271 					bytes_read = 0;
3272 					target = target_bufp;
3273 					first_fragment = msg->seqid;
3274 
3275 					if (rv = i_ldc_set_rx_head(ldcp,
3276 						curr_head))
3277 						break;
3278 				}
3279 			}
3280 
3281 			/* copy (next) pkt into buffer */
3282 			if (len <= (*sizep - bytes_read)) {
3283 				bcopy(msgbuf, target, len);
3284 				target += len;
3285 				bytes_read += len;
3286 			} else {
3287 				/*
3288 				 * there is not enough space in the buffer to
3289 				 * read this pkt. throw message away & continue
3290 				 * reading data from queue
3291 				 */
3292 				DWARN(DBG_ALL_LDCS,
3293 				    "ldc_read: (0x%llx) buffer too small, "
3294 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3295 				    curr_head, *sizep, bytes_read+len);
3296 
3297 				first_fragment = 0;
3298 				target = target_bufp;
3299 				bytes_read = 0;
3300 
3301 				/* throw away everything received so far */
3302 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3303 					break;
3304 
3305 				/* continue reading remaining pkts */
3306 				continue;
3307 			}
3308 		}
3309 
3310 		/* set the message id */
3311 		ldcp->last_msg_rcd = msg->seqid;
3312 
3313 		/* move the head one position */
3314 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3315 
3316 		if (msg->env & LDC_FRAG_STOP) {
3317 
3318 			/*
3319 			 * All pkts that are part of this fragmented transfer
3320 			 * have been read or this was a single pkt read
3321 			 * or there was an error
3322 			 */
3323 
3324 			/* set the queue head */
3325 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3326 				bytes_read = 0;
3327 
3328 			*sizep = bytes_read;
3329 
3330 			break;
3331 		}
3332 
3333 		/* advance head if it is a DATA ACK */
3334 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3335 
3336 			/* set the queue head */
3337 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3338 				bytes_read = 0;
3339 				break;
3340 			}
3341 
3342 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3343 			    ldcp->id, curr_head);
3344 		}
3345 
3346 	} /* for (;;) */
3347 
3348 
3349 	/*
3350 	 * If useful data was read - Send msg ACK
3351 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3352 	 */
3353 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3354 		ldcp->mode == LDC_MODE_STREAM)) {
3355 
3356 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3357 		if (rv) {
3358 			cmn_err(CE_NOTE,
3359 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3360 
3361 			/* if cannot send ACK - reset channel */
3362 			mutex_enter(&ldcp->tx_lock);
3363 			i_ldc_reset(ldcp);
3364 			mutex_exit(&ldcp->tx_lock);
3365 			rv = ECONNRESET;
3366 		}
3367 	}
3368 
3369 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3370 
3371 	return (rv);
3372 }
3373 
3374 /*
3375  * Use underlying reliable packet mechanism to fetch
3376  * and buffer incoming packets so we can hand them back as
3377  * a basic byte stream.
3378  *
3379  * Enter and exit with ldcp->lock held by caller
3380  */
3381 static int
3382 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3383 {
3384 	int	rv;
3385 	size_t	size;
3386 
3387 	ASSERT(mutex_owned(&ldcp->lock));
3388 
3389 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3390 		ldcp->id, *sizep);
3391 
3392 	if (ldcp->stream_remains == 0) {
3393 		size = ldcp->mtu;
3394 		rv = i_ldc_read_packet(ldcp,
3395 			(caddr_t)ldcp->stream_bufferp, &size);
3396 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3397 			ldcp->id, size);
3398 
3399 		if (rv != 0)
3400 			return (rv);
3401 
3402 		ldcp->stream_remains = size;
3403 		ldcp->stream_offset = 0;
3404 	}
3405 
3406 	size = MIN(ldcp->stream_remains, *sizep);
3407 
3408 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3409 	ldcp->stream_offset += size;
3410 	ldcp->stream_remains -= size;
3411 
3412 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3413 		ldcp->id, size);
3414 
3415 	*sizep = size;
3416 	return (0);
3417 }
3418 
3419 /*
3420  * Write specified amount of bytes to the channel
3421  * in multiple pkts of pkt_payload size. Each
3422  * packet is tagged with an unique packet ID in
3423  * the case of a reliable link.
3424  *
3425  * On return, size contains the number of bytes written.
3426  */
3427 int
3428 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3429 {
3430 	ldc_chan_t	*ldcp;
3431 	int		rv = 0;
3432 
3433 	if (handle == NULL) {
3434 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3435 		return (EINVAL);
3436 	}
3437 	ldcp = (ldc_chan_t *)handle;
3438 
3439 	/* check if writes can occur */
3440 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3441 		/*
3442 		 * Could not get the lock - channel could
3443 		 * be in the process of being unconfigured
3444 		 * or reader has encountered an error
3445 		 */
3446 		return (EAGAIN);
3447 	}
3448 
3449 	/* check if non-zero data to write */
3450 	if (buf == NULL || sizep == NULL) {
3451 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3452 		    ldcp->id);
3453 		mutex_exit(&ldcp->tx_lock);
3454 		return (EINVAL);
3455 	}
3456 
3457 	if (*sizep == 0) {
3458 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3459 		    ldcp->id);
3460 		mutex_exit(&ldcp->tx_lock);
3461 		return (0);
3462 	}
3463 
3464 	/* Check if channel is UP for data exchange */
3465 	if (ldcp->tstate != TS_UP) {
3466 		DWARN(ldcp->id,
3467 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3468 		    ldcp->id);
3469 		*sizep = 0;
3470 		rv = ECONNRESET;
3471 	} else {
3472 		rv = ldcp->write_p(ldcp, buf, sizep);
3473 	}
3474 
3475 	mutex_exit(&ldcp->tx_lock);
3476 
3477 	return (rv);
3478 }
3479 
3480 /*
3481  * Write a raw packet to the channel
3482  * On return, size contains the number of bytes written.
3483  */
3484 static int
3485 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3486 {
3487 	ldc_msg_t 	*ldcmsg;
3488 	uint64_t 	tx_head, tx_tail, new_tail;
3489 	int		rv = 0;
3490 	size_t		size;
3491 
3492 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3493 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3494 
3495 	size = *sizep;
3496 
3497 	/*
3498 	 * Check to see if the packet size is less than or
3499 	 * equal to packet size support in raw mode
3500 	 */
3501 	if (size > ldcp->pkt_payload) {
3502 		DWARN(ldcp->id,
3503 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3504 		    ldcp->id, *sizep);
3505 		*sizep = 0;
3506 		return (EMSGSIZE);
3507 	}
3508 
3509 	/* get the qptrs for the tx queue */
3510 	rv = hv_ldc_tx_get_state(ldcp->id,
3511 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3512 	if (rv != 0) {
3513 		cmn_err(CE_WARN,
3514 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3515 		*sizep = 0;
3516 		return (EIO);
3517 	}
3518 
3519 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3520 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3521 		DWARN(ldcp->id,
3522 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3523 
3524 		*sizep = 0;
3525 		if (mutex_tryenter(&ldcp->lock)) {
3526 			i_ldc_reset(ldcp);
3527 			mutex_exit(&ldcp->lock);
3528 		} else {
3529 			/*
3530 			 * Release Tx lock, and then reacquire channel
3531 			 * and Tx lock in correct order
3532 			 */
3533 			mutex_exit(&ldcp->tx_lock);
3534 			mutex_enter(&ldcp->lock);
3535 			mutex_enter(&ldcp->tx_lock);
3536 			i_ldc_reset(ldcp);
3537 			mutex_exit(&ldcp->lock);
3538 		}
3539 		return (ECONNRESET);
3540 	}
3541 
3542 	tx_tail = ldcp->tx_tail;
3543 	tx_head = ldcp->tx_head;
3544 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3545 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3546 
3547 	if (new_tail == tx_head) {
3548 		DWARN(DBG_ALL_LDCS,
3549 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3550 		*sizep = 0;
3551 		return (EWOULDBLOCK);
3552 	}
3553 
3554 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3555 	    ldcp->id, size);
3556 
3557 	/* Send the data now */
3558 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3559 
3560 	/* copy the data into pkt */
3561 	bcopy((uint8_t *)buf, ldcmsg, size);
3562 
3563 	/* increment tail */
3564 	tx_tail = new_tail;
3565 
3566 	/*
3567 	 * All packets have been copied into the TX queue
3568 	 * update the tail ptr in the HV
3569 	 */
3570 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3571 	if (rv) {
3572 		if (rv == EWOULDBLOCK) {
3573 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3574 			    ldcp->id);
3575 			*sizep = 0;
3576 			return (EWOULDBLOCK);
3577 		}
3578 
3579 		*sizep = 0;
3580 		if (mutex_tryenter(&ldcp->lock)) {
3581 			i_ldc_reset(ldcp);
3582 			mutex_exit(&ldcp->lock);
3583 		} else {
3584 			/*
3585 			 * Release Tx lock, and then reacquire channel
3586 			 * and Tx lock in correct order
3587 			 */
3588 			mutex_exit(&ldcp->tx_lock);
3589 			mutex_enter(&ldcp->lock);
3590 			mutex_enter(&ldcp->tx_lock);
3591 			i_ldc_reset(ldcp);
3592 			mutex_exit(&ldcp->lock);
3593 		}
3594 		return (ECONNRESET);
3595 	}
3596 
3597 	ldcp->tx_tail = tx_tail;
3598 	*sizep = size;
3599 
3600 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3601 
3602 	return (rv);
3603 }
3604 
3605 
3606 /*
3607  * Write specified amount of bytes to the channel
3608  * in multiple pkts of pkt_payload size. Each
3609  * packet is tagged with an unique packet ID in
3610  * the case of a reliable link.
3611  *
3612  * On return, size contains the number of bytes written.
3613  * This function needs to ensure that the write size is < MTU size
3614  */
3615 static int
3616 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3617 {
3618 	ldc_msg_t 	*ldcmsg;
3619 	uint64_t 	tx_head, tx_tail, new_tail, start;
3620 	uint64_t	txq_size_mask, numavail;
3621 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3622 	size_t 		len, bytes_written = 0, remaining;
3623 	int		rv;
3624 	uint32_t	curr_seqid;
3625 
3626 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3627 
3628 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3629 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3630 		ldcp->mode == LDC_MODE_STREAM);
3631 
3632 	/* compute mask for increment */
3633 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3634 
3635 	/* get the qptrs for the tx queue */
3636 	rv = hv_ldc_tx_get_state(ldcp->id,
3637 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3638 	if (rv != 0) {
3639 		cmn_err(CE_WARN,
3640 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3641 		*size = 0;
3642 		return (EIO);
3643 	}
3644 
3645 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3646 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3647 		DWARN(ldcp->id,
3648 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3649 		*size = 0;
3650 		if (mutex_tryenter(&ldcp->lock)) {
3651 			i_ldc_reset(ldcp);
3652 			mutex_exit(&ldcp->lock);
3653 		} else {
3654 			/*
3655 			 * Release Tx lock, and then reacquire channel
3656 			 * and Tx lock in correct order
3657 			 */
3658 			mutex_exit(&ldcp->tx_lock);
3659 			mutex_enter(&ldcp->lock);
3660 			mutex_enter(&ldcp->tx_lock);
3661 			i_ldc_reset(ldcp);
3662 			mutex_exit(&ldcp->lock);
3663 		}
3664 		return (ECONNRESET);
3665 	}
3666 
3667 	tx_tail = ldcp->tx_tail;
3668 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3669 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3670 
3671 	/*
3672 	 * Link mode determines whether we use HV Tx head or the
3673 	 * private protocol head (corresponding to last ACKd pkt) for
3674 	 * determining how much we can write
3675 	 */
3676 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3677 		ldcp->mode == LDC_MODE_STREAM)
3678 		? ldcp->tx_ackd_head : ldcp->tx_head;
3679 	if (new_tail == tx_head) {
3680 		DWARN(DBG_ALL_LDCS,
3681 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3682 		*size = 0;
3683 		return (EWOULDBLOCK);
3684 	}
3685 
3686 	/*
3687 	 * Make sure that the LDC Tx queue has enough space
3688 	 */
3689 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
3690 		+ ldcp->tx_q_entries - 1;
3691 	numavail %= ldcp->tx_q_entries;
3692 
3693 	if (*size > (numavail * ldcp->pkt_payload)) {
3694 		DWARN(DBG_ALL_LDCS,
3695 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
3696 		return (EWOULDBLOCK);
3697 	}
3698 
3699 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3700 	    ldcp->id, *size);
3701 
3702 	/* Send the data now */
3703 	bytes_written = 0;
3704 	curr_seqid = ldcp->last_msg_snt;
3705 	start = tx_tail;
3706 
3707 	while (*size > bytes_written) {
3708 
3709 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3710 
3711 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
3712 			ldcp->mode == LDC_MODE_STREAM)
3713 			? ldcmsg->rdata : ldcmsg->udata);
3714 
3715 		ldcmsg->type = LDC_DATA;
3716 		ldcmsg->stype = LDC_INFO;
3717 		ldcmsg->ctrl = 0;
3718 
3719 		remaining = *size - bytes_written;
3720 		len = min(ldcp->pkt_payload, remaining);
3721 		ldcmsg->env = (uint8_t)len;
3722 
3723 		curr_seqid++;
3724 		ldcmsg->seqid = curr_seqid;
3725 
3726 		DUMP_LDC_PKT(ldcp, "ldc_write snd data", (uint64_t)ldcmsg);
3727 
3728 		/* copy the data into pkt */
3729 		bcopy(source, msgbuf, len);
3730 
3731 		source += len;
3732 		bytes_written += len;
3733 
3734 		/* increment tail */
3735 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
3736 
3737 		ASSERT(tx_tail != tx_head);
3738 	}
3739 
3740 	/* Set the start and stop bits */
3741 	ldcmsg->env |= LDC_FRAG_STOP;
3742 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
3743 	ldcmsg->env |= LDC_FRAG_START;
3744 
3745 	/*
3746 	 * All packets have been copied into the TX queue
3747 	 * update the tail ptr in the HV
3748 	 */
3749 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3750 	if (rv == 0) {
3751 		ldcp->tx_tail = tx_tail;
3752 		ldcp->last_msg_snt = curr_seqid;
3753 		*size = bytes_written;
3754 	} else {
3755 		int rv2;
3756 
3757 		if (rv != EWOULDBLOCK) {
3758 			*size = 0;
3759 			if (mutex_tryenter(&ldcp->lock)) {
3760 				i_ldc_reset(ldcp);
3761 				mutex_exit(&ldcp->lock);
3762 			} else {
3763 				/*
3764 				 * Release Tx lock, and then reacquire channel
3765 				 * and Tx lock in correct order
3766 				 */
3767 				mutex_exit(&ldcp->tx_lock);
3768 				mutex_enter(&ldcp->lock);
3769 				mutex_enter(&ldcp->tx_lock);
3770 				i_ldc_reset(ldcp);
3771 				mutex_exit(&ldcp->lock);
3772 			}
3773 			return (ECONNRESET);
3774 		}
3775 
3776 		DWARN(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
3777 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
3778 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
3779 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
3780 
3781 		rv2 = hv_ldc_tx_get_state(ldcp->id,
3782 		    &tx_head, &tx_tail, &ldcp->link_state);
3783 
3784 		DWARN(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
3785 			"(head 0x%x, tail 0x%x state 0x%x)\n",
3786 			rv2, tx_head, tx_tail, ldcp->link_state);
3787 
3788 		*size = 0;
3789 	}
3790 
3791 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
3792 
3793 	return (rv);
3794 }
3795 
3796 /*
3797  * Write specified amount of bytes to the channel
3798  * in multiple pkts of pkt_payload size. Each
3799  * packet is tagged with an unique packet ID in
3800  * the case of a reliable link.
3801  *
3802  * On return, size contains the number of bytes written.
3803  * This function needs to ensure that the write size is < MTU size
3804  */
3805 static int
3806 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3807 {
3808 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3809 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
3810 
3811 	/* Truncate packet to max of MTU size */
3812 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
3813 	return (i_ldc_write_packet(ldcp, buf, sizep));
3814 }
3815 
3816 
3817 /*
3818  * Interfaces for channel nexus to register/unregister with LDC module
3819  * The nexus will register functions to be used to register individual
3820  * channels with the nexus and enable interrupts for the channels
3821  */
3822 int
3823 ldc_register(ldc_cnex_t *cinfo)
3824 {
3825 	ldc_chan_t	*ldcp;
3826 
3827 	if (cinfo == NULL || cinfo->dip == NULL ||
3828 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
3829 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
3830 	    cinfo->clr_intr == NULL) {
3831 
3832 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
3833 		return (EINVAL);
3834 	}
3835 
3836 	mutex_enter(&ldcssp->lock);
3837 
3838 	/* nexus registration */
3839 	ldcssp->cinfo.dip = cinfo->dip;
3840 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
3841 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
3842 	ldcssp->cinfo.add_intr = cinfo->add_intr;
3843 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
3844 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
3845 
3846 	/* register any channels that might have been previously initialized */
3847 	ldcp = ldcssp->chan_list;
3848 	while (ldcp) {
3849 		if ((ldcp->tstate & TS_QCONF_RDY) &&
3850 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
3851 			(void) i_ldc_register_channel(ldcp);
3852 
3853 		ldcp = ldcp->next;
3854 	}
3855 
3856 	mutex_exit(&ldcssp->lock);
3857 
3858 	return (0);
3859 }
3860 
3861 int
3862 ldc_unregister(ldc_cnex_t *cinfo)
3863 {
3864 	if (cinfo == NULL || cinfo->dip == NULL) {
3865 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
3866 		return (EINVAL);
3867 	}
3868 
3869 	mutex_enter(&ldcssp->lock);
3870 
3871 	if (cinfo->dip != ldcssp->cinfo.dip) {
3872 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
3873 		mutex_exit(&ldcssp->lock);
3874 		return (EINVAL);
3875 	}
3876 
3877 	/* nexus unregister */
3878 	ldcssp->cinfo.dip = NULL;
3879 	ldcssp->cinfo.reg_chan = NULL;
3880 	ldcssp->cinfo.unreg_chan = NULL;
3881 	ldcssp->cinfo.add_intr = NULL;
3882 	ldcssp->cinfo.rem_intr = NULL;
3883 	ldcssp->cinfo.clr_intr = NULL;
3884 
3885 	mutex_exit(&ldcssp->lock);
3886 
3887 	return (0);
3888 }
3889 
3890 
3891 /* ------------------------------------------------------------------------- */
3892 
3893 /*
3894  * Allocate a memory handle for the channel and link it into the list
3895  * Also choose which memory table to use if this is the first handle
3896  * being assigned to this channel
3897  */
3898 int
3899 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
3900 {
3901 	ldc_chan_t 	*ldcp;
3902 	ldc_mhdl_t	*mhdl;
3903 	int 		rv;
3904 
3905 	if (handle == NULL) {
3906 		DWARN(DBG_ALL_LDCS,
3907 		    "ldc_mem_alloc_handle: invalid channel handle\n");
3908 		return (EINVAL);
3909 	}
3910 	ldcp = (ldc_chan_t *)handle;
3911 
3912 	mutex_enter(&ldcp->lock);
3913 
3914 	/* check to see if channel is initalized */
3915 	if (ldcp->tstate < TS_INIT) {
3916 		DWARN(ldcp->id,
3917 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
3918 		    ldcp->id);
3919 		mutex_exit(&ldcp->lock);
3920 		return (EINVAL);
3921 	}
3922 
3923 	/*
3924 	 * If this channel is allocating a mem handle for the
3925 	 * first time allocate it a memory map table and initialize it
3926 	 */
3927 	if (ldcp->mtbl == NULL) {
3928 
3929 		ldc_mtbl_t *mtbl;
3930 
3931 		/* Allocate and initialize the map table structure */
3932 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
3933 		mtbl->num_entries = mtbl->num_avail = ldc_maptable_entries;
3934 		mtbl->size = ldc_maptable_entries * sizeof (ldc_mte_slot_t);
3935 		mtbl->next_entry = NULL;
3936 
3937 		/* Allocate the table itself */
3938 		mtbl->table = (ldc_mte_slot_t *)
3939 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
3940 		if (mtbl->table == NULL) {
3941 			cmn_err(CE_WARN,
3942 			    "ldc_mem_alloc_handle: (0x%lx) error allocating "
3943 			    "table memory", ldcp->id);
3944 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
3945 			mutex_exit(&ldcp->lock);
3946 			return (ENOMEM);
3947 		}
3948 
3949 		/* zero out the memory */
3950 		bzero(mtbl->table, mtbl->size);
3951 
3952 		/* initialize the lock */
3953 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
3954 
3955 		/* register table for this channel */
3956 		rv = hv_ldc_set_map_table(ldcp->id,
3957 		    va_to_pa(mtbl->table), mtbl->num_entries);
3958 		if (rv != 0) {
3959 			cmn_err(CE_WARN,
3960 			    "ldc_mem_alloc_handle: (0x%lx) err %d mapping tbl",
3961 			    ldcp->id, rv);
3962 			contig_mem_free(mtbl->table, mtbl->size);
3963 			mutex_destroy(&mtbl->lock);
3964 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
3965 			mutex_exit(&ldcp->lock);
3966 			return (EIO);
3967 		}
3968 
3969 		ldcp->mtbl = mtbl;
3970 
3971 		D1(ldcp->id,
3972 		    "ldc_mem_alloc_handle: (0x%llx) alloc'd map table 0x%llx\n",
3973 		    ldcp->id, ldcp->mtbl->table);
3974 	}
3975 
3976 	/* allocate handle for channel */
3977 	mhdl = kmem_zalloc(sizeof (ldc_mhdl_t), KM_SLEEP);
3978 
3979 	/* initialize the lock */
3980 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
3981 
3982 	mhdl->status = LDC_UNBOUND;
3983 	mhdl->ldcp = ldcp;
3984 
3985 	/* insert memory handle (@ head) into list */
3986 	if (ldcp->mhdl_list == NULL) {
3987 		ldcp->mhdl_list = mhdl;
3988 		mhdl->next = NULL;
3989 	} else {
3990 		/* insert @ head */
3991 		mhdl->next = ldcp->mhdl_list;
3992 		ldcp->mhdl_list = mhdl;
3993 	}
3994 
3995 	/* return the handle */
3996 	*mhandle = (ldc_mem_handle_t)mhdl;
3997 
3998 	mutex_exit(&ldcp->lock);
3999 
4000 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
4001 	    ldcp->id, mhdl);
4002 
4003 	return (0);
4004 }
4005 
4006 /*
4007  * Free memory handle for the channel and unlink it from the list
4008  */
4009 int
4010 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
4011 {
4012 	ldc_mhdl_t 	*mhdl, *phdl;
4013 	ldc_chan_t 	*ldcp;
4014 
4015 	if (mhandle == NULL) {
4016 		DWARN(DBG_ALL_LDCS,
4017 		    "ldc_mem_free_handle: invalid memory handle\n");
4018 		return (EINVAL);
4019 	}
4020 	mhdl = (ldc_mhdl_t *)mhandle;
4021 
4022 	mutex_enter(&mhdl->lock);
4023 
4024 	ldcp = mhdl->ldcp;
4025 
4026 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4027 		DWARN(ldcp->id,
4028 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4029 		    mhdl);
4030 		mutex_exit(&mhdl->lock);
4031 		return (EINVAL);
4032 	}
4033 	mutex_exit(&mhdl->lock);
4034 
4035 	mutex_enter(&ldcp->mlist_lock);
4036 
4037 	phdl = ldcp->mhdl_list;
4038 
4039 	/* first handle */
4040 	if (phdl == mhdl) {
4041 		ldcp->mhdl_list = mhdl->next;
4042 		mutex_destroy(&mhdl->lock);
4043 		kmem_free(mhdl, sizeof (ldc_mhdl_t));
4044 		D1(ldcp->id,
4045 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4046 		    ldcp->id, mhdl);
4047 	} else {
4048 		/* walk the list - unlink and free */
4049 		while (phdl != NULL) {
4050 			if (phdl->next == mhdl) {
4051 				phdl->next = mhdl->next;
4052 				mutex_destroy(&mhdl->lock);
4053 				kmem_free(mhdl, sizeof (ldc_mhdl_t));
4054 				D1(ldcp->id,
4055 				    "ldc_mem_free_handle: (0x%llx) freed "
4056 				    "handle 0x%llx\n", ldcp->id, mhdl);
4057 				break;
4058 			}
4059 			phdl = phdl->next;
4060 		}
4061 	}
4062 
4063 	if (phdl == NULL) {
4064 		DWARN(ldcp->id,
4065 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4066 		mutex_exit(&ldcp->mlist_lock);
4067 		return (EINVAL);
4068 	}
4069 
4070 	mutex_exit(&ldcp->mlist_lock);
4071 
4072 	return (0);
4073 }
4074 
4075 /*
4076  * Bind a memory handle to a virtual address.
4077  * The virtual address is converted to the corresponding real addresses.
4078  * Returns pointer to the first ldc_mem_cookie and the total number
4079  * of cookies for this virtual address. Other cookies can be obtained
4080  * using the ldc_mem_nextcookie() call. If the pages are stored in
4081  * consecutive locations in the table, a single cookie corresponding to
4082  * the first location is returned. The cookie size spans all the entries.
4083  *
4084  * If the VA corresponds to a page that is already being exported, reuse
4085  * the page and do not export it again. Bump the page's use count.
4086  */
4087 int
4088 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4089     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4090 {
4091 	ldc_mhdl_t	*mhdl;
4092 	ldc_chan_t 	*ldcp;
4093 	ldc_mtbl_t	*mtbl;
4094 	ldc_memseg_t	*memseg;
4095 	ldc_mte_t	tmp_mte;
4096 	uint64_t	index, prev_index = 0;
4097 	int64_t		cookie_idx;
4098 	uintptr_t	raddr, ra_aligned;
4099 	uint64_t	psize, poffset, v_offset;
4100 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4101 	pgcnt_t		npages;
4102 	caddr_t		v_align, addr;
4103 	int 		i;
4104 
4105 	if (mhandle == NULL) {
4106 		DWARN(DBG_ALL_LDCS,
4107 		    "ldc_mem_bind_handle: invalid memory handle\n");
4108 		return (EINVAL);
4109 	}
4110 	mhdl = (ldc_mhdl_t *)mhandle;
4111 	ldcp = mhdl->ldcp;
4112 	mtbl = ldcp->mtbl;
4113 
4114 	/* clear count */
4115 	*ccount = 0;
4116 
4117 	mutex_enter(&mhdl->lock);
4118 
4119 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4120 		DWARN(ldcp->id,
4121 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4122 		    mhandle);
4123 		mutex_exit(&mhdl->lock);
4124 		return (EINVAL);
4125 	}
4126 
4127 	/* Force address and size to be 8-byte aligned */
4128 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4129 		DWARN(ldcp->id,
4130 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4131 		mutex_exit(&mhdl->lock);
4132 		return (EINVAL);
4133 	}
4134 
4135 	/* FUTURE: get the page size, pgsz code, and shift */
4136 	pg_size = MMU_PAGESIZE;
4137 	pg_size_code = page_szc(pg_size);
4138 	pg_shift = page_get_shift(pg_size_code);
4139 	pg_mask = ~(pg_size - 1);
4140 
4141 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4142 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4143 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4144 
4145 	/* aligned VA and its offset */
4146 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4147 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4148 
4149 	npages = (len+v_offset)/pg_size;
4150 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4151 
4152 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4153 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4154 	    ldcp->id, vaddr, v_align, v_offset, npages);
4155 
4156 	/* lock the memory table - exclusive access to channel */
4157 	mutex_enter(&mtbl->lock);
4158 
4159 	if (npages > mtbl->num_avail) {
4160 		DWARN(ldcp->id,
4161 		    "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4162 		    ldcp->id);
4163 		mutex_exit(&mtbl->lock);
4164 		mutex_exit(&mhdl->lock);
4165 		return (ENOMEM);
4166 	}
4167 
4168 	/* Allocate a memseg structure */
4169 	memseg = mhdl->memseg = kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
4170 
4171 	/* Allocate memory to store all pages and cookies */
4172 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4173 	memseg->cookies =
4174 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4175 
4176 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4177 	    ldcp->id, npages);
4178 
4179 	addr = v_align;
4180 
4181 	/*
4182 	 * Table slots are used in a round-robin manner. The algorithm permits
4183 	 * inserting duplicate entries. Slots allocated earlier will typically
4184 	 * get freed before we get back to reusing the slot.Inserting duplicate
4185 	 * entries should be OK as we only lookup entries using the cookie addr
4186 	 * i.e. tbl index, during export, unexport and copy operation.
4187 	 *
4188 	 * One implementation what was tried was to search for a duplicate
4189 	 * page entry first and reuse it. The search overhead is very high and
4190 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4191 	 * So it does make sense to avoid searching for duplicates.
4192 	 *
4193 	 * But during the process of searching for a free slot, if we find a
4194 	 * duplicate entry we will go ahead and use it, and bump its use count.
4195 	 */
4196 
4197 	/* index to start searching from */
4198 	index = mtbl->next_entry;
4199 	cookie_idx = -1;
4200 
4201 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4202 
4203 	if (mtype & LDC_DIRECT_MAP) {
4204 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4205 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4206 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4207 	}
4208 
4209 	if (mtype & LDC_SHADOW_MAP) {
4210 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4211 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4212 	}
4213 
4214 	if (mtype & LDC_IO_MAP) {
4215 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4216 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4217 	}
4218 
4219 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4220 
4221 	tmp_mte.mte_pgszc = pg_size_code;
4222 
4223 	/* initialize each mem table entry */
4224 	for (i = 0; i < npages; i++) {
4225 
4226 		/* check if slot is available in the table */
4227 		while (mtbl->table[index].entry.ll != 0) {
4228 
4229 			index = (index + 1) % mtbl->num_entries;
4230 
4231 			if (index == mtbl->next_entry) {
4232 				/* we have looped around */
4233 				DWARN(DBG_ALL_LDCS,
4234 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4235 				    "entry\n", ldcp->id);
4236 				*ccount = 0;
4237 
4238 				/* NOTE: free memory, remove previous entries */
4239 				/* this shouldnt happen as num_avail was ok */
4240 
4241 				mutex_exit(&mtbl->lock);
4242 				mutex_exit(&mhdl->lock);
4243 				return (ENOMEM);
4244 			}
4245 		}
4246 
4247 		/* get the real address */
4248 		raddr = va_to_pa((void *)addr);
4249 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4250 
4251 		/* build the mte */
4252 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4253 
4254 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4255 
4256 		/* update entry in table */
4257 		mtbl->table[index].entry = tmp_mte;
4258 
4259 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4260 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4261 
4262 		/* calculate the size and offset for this export range */
4263 		if (i == 0) {
4264 			/* first page */
4265 			psize = min((pg_size - v_offset), len);
4266 			poffset = v_offset;
4267 
4268 		} else if (i == (npages - 1)) {
4269 			/* last page */
4270 			psize =	(((uintptr_t)(vaddr + len)) &
4271 				    ((uint64_t)(pg_size-1)));
4272 			if (psize == 0)
4273 				psize = pg_size;
4274 			poffset = 0;
4275 
4276 		} else {
4277 			/* middle pages */
4278 			psize = pg_size;
4279 			poffset = 0;
4280 		}
4281 
4282 		/* store entry for this page */
4283 		memseg->pages[i].index = index;
4284 		memseg->pages[i].raddr = raddr;
4285 		memseg->pages[i].offset = poffset;
4286 		memseg->pages[i].size = psize;
4287 		memseg->pages[i].mte = &(mtbl->table[index]);
4288 
4289 		/* create the cookie */
4290 		if (i == 0 || (index != prev_index + 1)) {
4291 			cookie_idx++;
4292 			memseg->cookies[cookie_idx].addr =
4293 				IDX2COOKIE(index, pg_size_code, pg_shift);
4294 			memseg->cookies[cookie_idx].addr |= poffset;
4295 			memseg->cookies[cookie_idx].size = psize;
4296 
4297 		} else {
4298 			memseg->cookies[cookie_idx].size += psize;
4299 		}
4300 
4301 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4302 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4303 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4304 		    ldcp->id, addr, index, raddr, psize, poffset);
4305 
4306 		/* decrement number of available entries */
4307 		mtbl->num_avail--;
4308 
4309 		/* increment va by page size */
4310 		addr += pg_size;
4311 
4312 		/* increment index */
4313 		prev_index = index;
4314 		index = (index + 1) % mtbl->num_entries;
4315 
4316 		/* save the next slot */
4317 		mtbl->next_entry = index;
4318 	}
4319 
4320 	mutex_exit(&mtbl->lock);
4321 
4322 	/* memory handle = bound */
4323 	mhdl->mtype = mtype;
4324 	mhdl->perm = perm;
4325 	mhdl->status = LDC_BOUND;
4326 
4327 	/* update memseg_t */
4328 	memseg->vaddr = vaddr;
4329 	memseg->raddr = memseg->pages[0].raddr;
4330 	memseg->size = len;
4331 	memseg->npages = npages;
4332 	memseg->ncookies = cookie_idx + 1;
4333 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4334 
4335 	/* return count and first cookie */
4336 	*ccount = memseg->ncookies;
4337 	cookie->addr = memseg->cookies[0].addr;
4338 	cookie->size = memseg->cookies[0].size;
4339 
4340 	D1(ldcp->id,
4341 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4342 	    "pgs=0x%llx cookies=0x%llx\n",
4343 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4344 
4345 	mutex_exit(&mhdl->lock);
4346 	return (0);
4347 }
4348 
4349 /*
4350  * Return the next cookie associated with the specified memory handle
4351  */
4352 int
4353 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4354 {
4355 	ldc_mhdl_t	*mhdl;
4356 	ldc_chan_t 	*ldcp;
4357 	ldc_memseg_t	*memseg;
4358 
4359 	if (mhandle == NULL) {
4360 		DWARN(DBG_ALL_LDCS,
4361 		    "ldc_mem_nextcookie: invalid memory handle\n");
4362 		return (EINVAL);
4363 	}
4364 	mhdl = (ldc_mhdl_t *)mhandle;
4365 
4366 	mutex_enter(&mhdl->lock);
4367 
4368 	ldcp = mhdl->ldcp;
4369 	memseg = mhdl->memseg;
4370 
4371 	if (cookie == 0) {
4372 		DWARN(ldcp->id,
4373 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4374 		    ldcp->id);
4375 		mutex_exit(&mhdl->lock);
4376 		return (EINVAL);
4377 	}
4378 
4379 	if (memseg->next_cookie != 0) {
4380 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4381 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4382 		memseg->next_cookie++;
4383 		if (memseg->next_cookie == memseg->ncookies)
4384 			memseg->next_cookie = 0;
4385 
4386 	} else {
4387 		DWARN(ldcp->id,
4388 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4389 		cookie->addr = 0;
4390 		cookie->size = 0;
4391 		mutex_exit(&mhdl->lock);
4392 		return (EINVAL);
4393 	}
4394 
4395 	D1(ldcp->id,
4396 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4397 	    ldcp->id, cookie->addr, cookie->size);
4398 
4399 	mutex_exit(&mhdl->lock);
4400 	return (0);
4401 }
4402 
4403 /*
4404  * Unbind the virtual memory region associated with the specified
4405  * memory handle. Allassociated cookies are freed and the corresponding
4406  * RA space is no longer exported.
4407  */
4408 int
4409 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4410 {
4411 	ldc_mhdl_t	*mhdl;
4412 	ldc_chan_t 	*ldcp;
4413 	ldc_mtbl_t	*mtbl;
4414 	ldc_memseg_t	*memseg;
4415 	int		i;
4416 
4417 	if (mhandle == NULL) {
4418 		DWARN(DBG_ALL_LDCS,
4419 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4420 		return (EINVAL);
4421 	}
4422 	mhdl = (ldc_mhdl_t *)mhandle;
4423 
4424 	mutex_enter(&mhdl->lock);
4425 
4426 	if (mhdl->status == LDC_UNBOUND) {
4427 		DWARN(DBG_ALL_LDCS,
4428 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4429 		    mhandle);
4430 		mutex_exit(&mhdl->lock);
4431 		return (EINVAL);
4432 	}
4433 
4434 	ldcp = mhdl->ldcp;
4435 	mtbl = ldcp->mtbl;
4436 
4437 	memseg = mhdl->memseg;
4438 
4439 	/* lock the memory table - exclusive access to channel */
4440 	mutex_enter(&mtbl->lock);
4441 
4442 	/* undo the pages exported */
4443 	for (i = 0; i < memseg->npages; i++) {
4444 
4445 		/* FUTURE: check for mapped pages */
4446 		if (memseg->pages[i].mte->cookie) {
4447 			_NOTE(EMPTY)
4448 		}
4449 
4450 		/* clear the entry from the table */
4451 		memseg->pages[i].mte->entry.ll = 0;
4452 		mtbl->num_avail++;
4453 	}
4454 	mutex_exit(&mtbl->lock);
4455 
4456 	/* free the allocated memseg and page structures */
4457 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4458 	kmem_free(memseg->cookies,
4459 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4460 	kmem_free(memseg, sizeof (ldc_memseg_t));
4461 
4462 	/* uninitialize the memory handle */
4463 	mhdl->memseg = NULL;
4464 	mhdl->status = LDC_UNBOUND;
4465 
4466 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4467 	    ldcp->id, mhdl);
4468 
4469 	mutex_exit(&mhdl->lock);
4470 	return (0);
4471 }
4472 
4473 /*
4474  * Get information about the dring. The base address of the descriptor
4475  * ring along with the type and permission are returned back.
4476  */
4477 int
4478 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4479 {
4480 	ldc_mhdl_t	*mhdl;
4481 
4482 	if (mhandle == NULL) {
4483 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4484 		return (EINVAL);
4485 	}
4486 	mhdl = (ldc_mhdl_t *)mhandle;
4487 
4488 	if (minfo == NULL) {
4489 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4490 		return (EINVAL);
4491 	}
4492 
4493 	mutex_enter(&mhdl->lock);
4494 
4495 	minfo->status = mhdl->status;
4496 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4497 		minfo->vaddr = mhdl->memseg->vaddr;
4498 		minfo->raddr = mhdl->memseg->raddr;
4499 		minfo->mtype = mhdl->mtype;
4500 		minfo->perm = mhdl->perm;
4501 	}
4502 	mutex_exit(&mhdl->lock);
4503 
4504 	return (0);
4505 }
4506 
4507 /*
4508  * Copy data either from or to the client specified virtual address
4509  * space to or from the exported memory associated with the cookies.
4510  * The direction argument determines whether the data is read from or
4511  * written to exported memory.
4512  */
4513 int
4514 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4515     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4516 {
4517 	ldc_chan_t 	*ldcp;
4518 	uint64_t	local_voff, local_valign;
4519 	uint64_t	cookie_addr, cookie_size;
4520 	uint64_t	pg_shift, pg_size, pg_size_code;
4521 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4522 	uint64_t	local_ra, local_poff, local_psize;
4523 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4524 	pgcnt_t		npages;
4525 	size_t		len = *size;
4526 	int 		i, rv = 0;
4527 
4528 	if (handle == NULL) {
4529 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4530 		return (EINVAL);
4531 	}
4532 	ldcp = (ldc_chan_t *)handle;
4533 
4534 	mutex_enter(&ldcp->lock);
4535 
4536 	/* check to see if channel is UP */
4537 	if (ldcp->tstate != TS_UP) {
4538 		DWARN(ldcp->id, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4539 		    ldcp->id);
4540 		mutex_exit(&ldcp->lock);
4541 		return (EINVAL);
4542 	}
4543 
4544 	/* Force address and size to be 8-byte aligned */
4545 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4546 		DWARN(ldcp->id,
4547 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4548 		mutex_exit(&ldcp->lock);
4549 		return (EINVAL);
4550 	}
4551 
4552 	/* Find the size of the exported memory */
4553 	export_size = 0;
4554 	for (i = 0; i < ccount; i++)
4555 		export_size += cookies[i].size;
4556 
4557 	/* check to see if offset is valid */
4558 	if (off > export_size) {
4559 		DWARN(ldcp->id,
4560 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4561 		    ldcp->id);
4562 		mutex_exit(&ldcp->lock);
4563 		return (EINVAL);
4564 	}
4565 
4566 	/*
4567 	 * Check to see if the export size is smaller than the size we
4568 	 * are requesting to copy - if so flag an error
4569 	 */
4570 	if ((export_size - off) < *size) {
4571 		DWARN(ldcp->id,
4572 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4573 		    ldcp->id);
4574 		mutex_exit(&ldcp->lock);
4575 		return (EINVAL);
4576 	}
4577 
4578 	total_bal = min(export_size, *size);
4579 
4580 	/* FUTURE: get the page size, pgsz code, and shift */
4581 	pg_size = MMU_PAGESIZE;
4582 	pg_size_code = page_szc(pg_size);
4583 	pg_shift = page_get_shift(pg_size_code);
4584 
4585 	D1(ldcp->id, "ldc_mem_copy: copying data "
4586 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4587 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4588 
4589 	/* aligned VA and its offset */
4590 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4591 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4592 
4593 	npages = (len+local_voff)/pg_size;
4594 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4595 
4596 	D1(ldcp->id,
4597 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4598 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4599 
4600 	local_ra = va_to_pa((void *)local_valign);
4601 	local_poff = local_voff;
4602 	local_psize = min(len, (pg_size - local_voff));
4603 
4604 	len -= local_psize;
4605 
4606 	/*
4607 	 * find the first cookie in the list of cookies
4608 	 * if the offset passed in is not zero
4609 	 */
4610 	for (idx = 0; idx < ccount; idx++) {
4611 		cookie_size = cookies[idx].size;
4612 		if (off < cookie_size)
4613 			break;
4614 		off -= cookie_size;
4615 	}
4616 
4617 	cookie_addr = cookies[idx].addr + off;
4618 	cookie_size = cookies[idx].size - off;
4619 
4620 	export_caddr = cookie_addr & ~(pg_size - 1);
4621 	export_poff = cookie_addr & (pg_size - 1);
4622 	export_psize = min(cookie_size, (pg_size - export_poff));
4623 
4624 	for (;;) {
4625 
4626 		copy_size = min(export_psize, local_psize);
4627 
4628 		D1(ldcp->id,
4629 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4630 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4631 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4632 		    " total_bal=0x%llx\n",
4633 		    ldcp->id, direction, export_caddr, local_ra, export_poff,
4634 		    local_poff, export_psize, local_psize, copy_size,
4635 		    total_bal);
4636 
4637 		rv = hv_ldc_copy(ldcp->id, direction,
4638 		    (export_caddr + export_poff), (local_ra + local_poff),
4639 		    copy_size, &copied_len);
4640 
4641 		if (rv != 0) {
4642 			cmn_err(CE_WARN,
4643 			    "ldc_mem_copy: (0x%lx) err %d during copy\n",
4644 			    ldcp->id, rv);
4645 			DWARN(DBG_ALL_LDCS,
4646 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%llx, "
4647 			    "loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4648 			    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4649 			    " copied_len=0x%llx, total_bal=0x%llx\n",
4650 			    ldcp->id, direction, export_caddr, local_ra,
4651 			    export_poff, local_poff, export_psize, local_psize,
4652 			    copy_size, copied_len, total_bal);
4653 
4654 			*size = *size - total_bal;
4655 			mutex_exit(&ldcp->lock);
4656 			return (EIO);
4657 		}
4658 
4659 		ASSERT(copied_len <= copy_size);
4660 
4661 		D2(ldcp->id, "ldc_mem_copy: copied=0x%llx\n", copied_len);
4662 		export_poff += copied_len;
4663 		local_poff += copied_len;
4664 		export_psize -= copied_len;
4665 		local_psize -= copied_len;
4666 		cookie_size -= copied_len;
4667 
4668 		total_bal -= copied_len;
4669 
4670 		if (copy_size != copied_len)
4671 			continue;
4672 
4673 		if (export_psize == 0 && total_bal != 0) {
4674 
4675 			if (cookie_size == 0) {
4676 				idx++;
4677 				cookie_addr = cookies[idx].addr;
4678 				cookie_size = cookies[idx].size;
4679 
4680 				export_caddr = cookie_addr & ~(pg_size - 1);
4681 				export_poff = cookie_addr & (pg_size - 1);
4682 				export_psize =
4683 					min(cookie_size, (pg_size-export_poff));
4684 			} else {
4685 				export_caddr += pg_size;
4686 				export_poff = 0;
4687 				export_psize = min(cookie_size, pg_size);
4688 			}
4689 		}
4690 
4691 		if (local_psize == 0 && total_bal != 0) {
4692 			local_valign += pg_size;
4693 			local_ra = va_to_pa((void *)local_valign);
4694 			local_poff = 0;
4695 			local_psize = min(pg_size, len);
4696 			len -= local_psize;
4697 		}
4698 
4699 		/* check if we are all done */
4700 		if (total_bal == 0)
4701 			break;
4702 	}
4703 
4704 	mutex_exit(&ldcp->lock);
4705 
4706 	D1(ldcp->id,
4707 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
4708 	    ldcp->id, *size);
4709 
4710 	return (0);
4711 }
4712 
4713 /*
4714  * Copy data either from or to the client specified virtual address
4715  * space to or from HV physical memory.
4716  *
4717  * The direction argument determines whether the data is read from or
4718  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
4719  * to the ldc_mem_copy interface
4720  */
4721 int
4722 ldc_mem_rdwr_pa(ldc_handle_t handle, caddr_t vaddr, size_t *size,
4723     caddr_t paddr, uint8_t direction)
4724 {
4725 	ldc_chan_t 	*ldcp;
4726 	uint64_t	local_voff, local_valign;
4727 	uint64_t	pg_shift, pg_size, pg_size_code;
4728 	uint64_t 	target_pa, target_poff, target_psize, target_size;
4729 	uint64_t	local_ra, local_poff, local_psize;
4730 	uint64_t	copy_size, copied_len = 0;
4731 	pgcnt_t		npages;
4732 	size_t		len = *size;
4733 	int 		rv = 0;
4734 
4735 	if (handle == NULL) {
4736 		DWARN(DBG_ALL_LDCS,
4737 		    "ldc_mem_rdwr_pa: invalid channel handle\n");
4738 		return (EINVAL);
4739 	}
4740 	ldcp = (ldc_chan_t *)handle;
4741 
4742 	mutex_enter(&ldcp->lock);
4743 
4744 	/* check to see if channel is UP */
4745 	if (ldcp->tstate != TS_UP) {
4746 		DWARN(ldcp->id,
4747 		    "ldc_mem_rdwr_pa: (0x%llx) channel is not UP\n",
4748 		    ldcp->id);
4749 		mutex_exit(&ldcp->lock);
4750 		return (EINVAL);
4751 	}
4752 
4753 	/* Force address and size to be 8-byte aligned */
4754 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4755 		DWARN(ldcp->id,
4756 		    "ldc_mem_rdwr_pa: addr/size is not 8-byte aligned\n");
4757 		mutex_exit(&ldcp->lock);
4758 		return (EINVAL);
4759 	}
4760 
4761 	target_size = *size;
4762 
4763 	/* FUTURE: get the page size, pgsz code, and shift */
4764 	pg_size = MMU_PAGESIZE;
4765 	pg_size_code = page_szc(pg_size);
4766 	pg_shift = page_get_shift(pg_size_code);
4767 
4768 	D1(ldcp->id, "ldc_mem_rdwr_pa: copying data "
4769 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4770 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4771 
4772 	/* aligned VA and its offset */
4773 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
4774 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4775 
4776 	npages = (len + local_voff) / pg_size;
4777 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
4778 
4779 	D1(ldcp->id,
4780 	    "ldc_mem_rdwr_pa: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4781 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4782 
4783 	local_ra = va_to_pa((void *)local_valign);
4784 	local_poff = local_voff;
4785 	local_psize = min(len, (pg_size - local_voff));
4786 
4787 	len -= local_psize;
4788 
4789 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
4790 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
4791 	target_psize = pg_size - target_poff;
4792 
4793 	for (;;) {
4794 
4795 		copy_size = min(target_psize, local_psize);
4796 
4797 		D1(ldcp->id,
4798 		    "ldc_mem_rdwr_pa: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
4799 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4800 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4801 		    " total_bal=0x%llx\n",
4802 		    ldcp->id, direction, target_pa, local_ra, target_poff,
4803 		    local_poff, target_psize, local_psize, copy_size,
4804 		    target_size);
4805 
4806 		rv = hv_ldc_copy(ldcp->id, direction,
4807 		    (target_pa + target_poff), (local_ra + local_poff),
4808 		    copy_size, &copied_len);
4809 
4810 		if (rv != 0) {
4811 			cmn_err(CE_WARN,
4812 			    "ldc_mem_rdwr_pa: (0x%lx) err %d during copy\n",
4813 			    ldcp->id, rv);
4814 			DWARN(DBG_ALL_LDCS,
4815 			    "ldc_mem_rdwr_pa: (0x%llx) dir=%lld,tar_pa=0x%llx, "
4816 			    "loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4817 			    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4818 			    " total_bal=0x%llx\n",
4819 			    ldcp->id, direction, target_pa, local_ra,
4820 			    target_poff, local_poff, target_psize, local_psize,
4821 			    copy_size, target_size);
4822 
4823 			*size = *size - target_size;
4824 			mutex_exit(&ldcp->lock);
4825 			return (i_ldc_h2v_error(rv));
4826 		}
4827 
4828 		D2(ldcp->id, "ldc_mem_rdwr_pa: copied=0x%llx\n", copied_len);
4829 		target_poff += copied_len;
4830 		local_poff += copied_len;
4831 		target_psize -= copied_len;
4832 		local_psize -= copied_len;
4833 
4834 		target_size -= copied_len;
4835 
4836 		if (copy_size != copied_len)
4837 			continue;
4838 
4839 		if (target_psize == 0 && target_size != 0) {
4840 			target_pa += pg_size;
4841 			target_poff = 0;
4842 			target_psize = min(pg_size, target_size);
4843 		}
4844 
4845 		if (local_psize == 0 && target_size != 0) {
4846 			local_valign += pg_size;
4847 			local_ra = va_to_pa((void *)local_valign);
4848 			local_poff = 0;
4849 			local_psize = min(pg_size, len);
4850 			len -= local_psize;
4851 		}
4852 
4853 		/* check if we are all done */
4854 		if (target_size == 0)
4855 			break;
4856 	}
4857 
4858 	mutex_exit(&ldcp->lock);
4859 
4860 	D1(ldcp->id, "ldc_mem_rdwr_pa: (0x%llx) done copying sz=0x%llx\n",
4861 	    ldcp->id, *size);
4862 
4863 	return (0);
4864 }
4865 
4866 /*
4867  * Map an exported memory segment into the local address space. If the
4868  * memory range was exported for direct map access, a HV call is made
4869  * to allocate a RA range. If the map is done via a shadow copy, local
4870  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
4871  * the mapping is a direct map then the RA is returned in 'raddr'.
4872  */
4873 int
4874 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
4875     uint8_t mtype, caddr_t *vaddr, caddr_t *raddr)
4876 {
4877 	int		i, idx;
4878 	ldc_chan_t 	*ldcp;
4879 	ldc_mhdl_t	*mhdl;
4880 	ldc_memseg_t	*memseg;
4881 	caddr_t		shadow_base = NULL, tmpaddr;
4882 	uint64_t	pg_size, pg_shift, pg_size_code;
4883 	uint64_t	exp_size = 0, npages;
4884 
4885 	if (mhandle == NULL) {
4886 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
4887 		return (EINVAL);
4888 	}
4889 	mhdl = (ldc_mhdl_t *)mhandle;
4890 
4891 	mutex_enter(&mhdl->lock);
4892 
4893 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
4894 	    mhdl->memseg != NULL) {
4895 		DWARN(DBG_ALL_LDCS,
4896 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
4897 		mutex_exit(&mhdl->lock);
4898 		return (EINVAL);
4899 	}
4900 
4901 	ldcp = mhdl->ldcp;
4902 
4903 	mutex_enter(&ldcp->lock);
4904 
4905 	if (ldcp->tstate != TS_UP) {
4906 		DWARN(ldcp->id,
4907 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
4908 		    ldcp->id);
4909 		mutex_exit(&ldcp->lock);
4910 		mutex_exit(&mhdl->lock);
4911 		return (EINVAL);
4912 	}
4913 
4914 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
4915 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
4916 		mutex_exit(&ldcp->lock);
4917 		mutex_exit(&mhdl->lock);
4918 		return (EINVAL);
4919 	}
4920 
4921 	if (mtype == LDC_SHADOW_MAP && vaddr == NULL) {
4922 		DWARN(ldcp->id,
4923 		    "ldc_mem_map: invalid vaddr arg0x%llx\n", vaddr);
4924 		mutex_exit(&ldcp->lock);
4925 		mutex_exit(&mhdl->lock);
4926 		return (EINVAL);
4927 	}
4928 
4929 	if (mtype == LDC_SHADOW_MAP &&
4930 	    (vaddr) && ((uintptr_t)(*vaddr) & MMU_PAGEOFFSET)) {
4931 		DWARN(ldcp->id,
4932 		    "ldc_mem_map: vaddr not page aligned, 0x%llx\n", *vaddr);
4933 		mutex_exit(&ldcp->lock);
4934 		mutex_exit(&mhdl->lock);
4935 		return (EINVAL);
4936 	}
4937 
4938 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
4939 	    ldcp->id, cookie->addr, cookie->size);
4940 
4941 	/* FUTURE: get the page size, pgsz code, and shift */
4942 	pg_size = MMU_PAGESIZE;
4943 	pg_size_code = page_szc(pg_size);
4944 	pg_shift = page_get_shift(pg_size_code);
4945 
4946 	/* calculate the number of pages in the exported cookie */
4947 	for (idx = 0; idx < ccount; idx++) {
4948 		if (cookie[idx].addr & MMU_PAGEOFFSET ||
4949 			cookie[idx].size & MMU_PAGEOFFSET) {
4950 			DWARN(ldcp->id,
4951 			    "ldc_mem_map: cookie addr/size not page aligned, "
4952 			    "0x%llx\n", cookie[idx].addr);
4953 			mutex_exit(&ldcp->lock);
4954 			mutex_exit(&mhdl->lock);
4955 			return (EINVAL);
4956 		}
4957 		exp_size += cookie[idx].size;
4958 	}
4959 	npages = (exp_size >> pg_shift);
4960 
4961 	/* Allocate memseg structure */
4962 	memseg = mhdl->memseg =	kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
4963 
4964 	/* Allocate memory to store all pages and cookies */
4965 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4966 	memseg->cookies =
4967 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
4968 
4969 	D2(ldcp->id, "ldc_mem_map: (0x%llx) processing 0x%llx pages\n",
4970 	    ldcp->id, npages);
4971 
4972 	/* Check to see if the client is requesting direct or shadow map */
4973 	if (mtype == LDC_SHADOW_MAP) {
4974 		if (*vaddr == NULL) {
4975 			shadow_base =
4976 				contig_mem_alloc_align(exp_size, PAGESIZE);
4977 			if (shadow_base == NULL) {
4978 				cmn_err(CE_WARN, "ldc_mem_map: shadow memory "
4979 				    "allocation failed\n");
4980 				kmem_free(memseg->cookies,
4981 				    (sizeof (ldc_mem_cookie_t) * ccount));
4982 				kmem_free(memseg->pages,
4983 				    (sizeof (ldc_page_t) * npages));
4984 				kmem_free(memseg, sizeof (ldc_memseg_t));
4985 				mutex_exit(&ldcp->lock);
4986 				mutex_exit(&mhdl->lock);
4987 				return (ENOMEM);
4988 			}
4989 
4990 			bzero(shadow_base, exp_size);
4991 			mhdl->myshadow = B_TRUE;
4992 
4993 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
4994 			    "shadow page va=0x%llx\n", ldcp->id, shadow_base);
4995 		} else {
4996 			/*
4997 			 * Use client supplied memory for shadow_base
4998 			 * WARNING: assuming that client mem is >= exp_size
4999 			 */
5000 			shadow_base = *vaddr;
5001 		}
5002 	} else if (mtype == LDC_DIRECT_MAP) {
5003 		/* FUTURE: Do a direct map by calling into HV */
5004 		_NOTE(EMPTY)
5005 	}
5006 
5007 	/* Save all page and cookie information */
5008 	for (i = 0, tmpaddr = shadow_base; i < npages; i++) {
5009 		memseg->pages[i].raddr = va_to_pa(tmpaddr);
5010 		memseg->pages[i].size = pg_size;
5011 		memseg->pages[i].index = 0;
5012 		memseg->pages[i].offset = 0;
5013 		memseg->pages[i].mte = NULL;
5014 		tmpaddr += pg_size;
5015 	}
5016 	for (i = 0; i < ccount; i++) {
5017 		memseg->cookies[i].addr = cookie[i].addr;
5018 		memseg->cookies[i].size = cookie[i].size;
5019 	}
5020 
5021 	/* update memseg_t */
5022 	memseg->vaddr = shadow_base;
5023 	memseg->raddr = memseg->pages[0].raddr;
5024 	memseg->size = exp_size;
5025 	memseg->npages = npages;
5026 	memseg->ncookies = ccount;
5027 	memseg->next_cookie = 0;
5028 
5029 	/* memory handle = mapped */
5030 	mhdl->mtype = mtype;
5031 	mhdl->perm = 0;
5032 	mhdl->status = LDC_MAPPED;
5033 
5034 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5035 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5036 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5037 	    memseg->npages, memseg->ncookies);
5038 
5039 	if (raddr)
5040 		*raddr = (caddr_t)memseg->raddr;
5041 	if (vaddr)
5042 		*vaddr = memseg->vaddr;
5043 
5044 	mutex_exit(&ldcp->lock);
5045 	mutex_exit(&mhdl->lock);
5046 	return (0);
5047 }
5048 
5049 /*
5050  * Unmap a memory segment. Free shadow memory (if any).
5051  */
5052 int
5053 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5054 {
5055 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5056 	ldc_chan_t 	*ldcp;
5057 	ldc_memseg_t	*memseg;
5058 
5059 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5060 		DWARN(DBG_ALL_LDCS,
5061 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5062 		    mhandle);
5063 		return (EINVAL);
5064 	}
5065 
5066 	mutex_enter(&mhdl->lock);
5067 
5068 	ldcp = mhdl->ldcp;
5069 	memseg = mhdl->memseg;
5070 
5071 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5072 	    ldcp->id, mhdl);
5073 
5074 	/* if we allocated shadow memory - free it */
5075 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5076 		contig_mem_free(memseg->vaddr, memseg->size);
5077 	}
5078 
5079 	/* free the allocated memseg and page structures */
5080 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5081 	kmem_free(memseg->cookies,
5082 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5083 	kmem_free(memseg, sizeof (ldc_memseg_t));
5084 
5085 	/* uninitialize the memory handle */
5086 	mhdl->memseg = NULL;
5087 	mhdl->status = LDC_UNBOUND;
5088 
5089 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5090 	    ldcp->id, mhdl);
5091 
5092 	mutex_exit(&mhdl->lock);
5093 	return (0);
5094 }
5095 
5096 /*
5097  * Internal entry point for LDC mapped memory entry consistency
5098  * semantics. Acquire copies the contents of the remote memory
5099  * into the local shadow copy. The release operation copies the local
5100  * contents into the remote memory. The offset and size specify the
5101  * bounds for the memory range being synchronized.
5102  */
5103 static int
5104 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5105     uint64_t offset, size_t size)
5106 {
5107 	int 		err;
5108 	ldc_mhdl_t	*mhdl;
5109 	ldc_chan_t	*ldcp;
5110 	ldc_memseg_t	*memseg;
5111 	caddr_t		local_vaddr;
5112 	size_t		copy_size;
5113 
5114 	if (mhandle == NULL) {
5115 		DWARN(DBG_ALL_LDCS,
5116 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5117 		return (EINVAL);
5118 	}
5119 	mhdl = (ldc_mhdl_t *)mhandle;
5120 
5121 	mutex_enter(&mhdl->lock);
5122 
5123 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5124 		DWARN(DBG_ALL_LDCS,
5125 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5126 		mutex_exit(&mhdl->lock);
5127 		return (EINVAL);
5128 	}
5129 
5130 	if (offset >= mhdl->memseg->size ||
5131 	    (offset + size) > mhdl->memseg->size) {
5132 		DWARN(DBG_ALL_LDCS,
5133 		    "i_ldc_mem_acquire_release: memory out of range\n");
5134 		mutex_exit(&mhdl->lock);
5135 		return (EINVAL);
5136 	}
5137 
5138 	/* get the channel handle and memory segment */
5139 	ldcp = mhdl->ldcp;
5140 	memseg = mhdl->memseg;
5141 
5142 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5143 
5144 		local_vaddr = memseg->vaddr + offset;
5145 		copy_size = size;
5146 
5147 		/* copy to/from remote from/to local memory */
5148 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5149 		    &copy_size, memseg->cookies, memseg->ncookies,
5150 		    direction);
5151 		if (err || copy_size != size) {
5152 			cmn_err(CE_WARN,
5153 			    "i_ldc_mem_acquire_release: copy failed\n");
5154 			mutex_exit(&mhdl->lock);
5155 			return (err);
5156 		}
5157 	}
5158 
5159 	mutex_exit(&mhdl->lock);
5160 
5161 	return (0);
5162 }
5163 
5164 /*
5165  * Ensure that the contents in the remote memory seg are consistent
5166  * with the contents if of local segment
5167  */
5168 int
5169 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5170 {
5171 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5172 }
5173 
5174 
5175 /*
5176  * Ensure that the contents in the local memory seg are consistent
5177  * with the contents if of remote segment
5178  */
5179 int
5180 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5181 {
5182 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5183 }
5184 
5185 /*
5186  * Allocate a descriptor ring. The size of each each descriptor
5187  * must be 8-byte aligned and the entire ring should be a multiple
5188  * of MMU_PAGESIZE.
5189  */
5190 int
5191 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5192 {
5193 	ldc_dring_t *dringp;
5194 	size_t size = (dsize * len);
5195 
5196 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5197 	    len, dsize);
5198 
5199 	if (dhandle == NULL) {
5200 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5201 		return (EINVAL);
5202 	}
5203 
5204 	if (len == 0) {
5205 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5206 		return (EINVAL);
5207 	}
5208 
5209 	/* descriptor size should be 8-byte aligned */
5210 	if (dsize == 0 || (dsize & 0x7)) {
5211 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5212 		return (EINVAL);
5213 	}
5214 
5215 	*dhandle = 0;
5216 
5217 	/* Allocate a desc ring structure */
5218 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5219 
5220 	/* Initialize dring */
5221 	dringp->length = len;
5222 	dringp->dsize = dsize;
5223 
5224 	/* round off to multiple of pagesize */
5225 	dringp->size = (size & MMU_PAGEMASK);
5226 	if (size & MMU_PAGEOFFSET)
5227 		dringp->size += MMU_PAGESIZE;
5228 
5229 	dringp->status = LDC_UNBOUND;
5230 
5231 	/* allocate descriptor ring memory */
5232 	dringp->base = contig_mem_alloc_align(dringp->size, PAGESIZE);
5233 	if (dringp->base == NULL) {
5234 		cmn_err(CE_WARN,
5235 		    "ldc_mem_dring_create: unable to alloc desc\n");
5236 		kmem_free(dringp, sizeof (ldc_dring_t));
5237 		return (ENOMEM);
5238 	}
5239 
5240 	bzero(dringp->base, dringp->size);
5241 
5242 	/* initialize the desc ring lock */
5243 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5244 
5245 	/* Add descriptor ring to the head of global list */
5246 	mutex_enter(&ldcssp->lock);
5247 	dringp->next = ldcssp->dring_list;
5248 	ldcssp->dring_list = dringp;
5249 	mutex_exit(&ldcssp->lock);
5250 
5251 	*dhandle = (ldc_dring_handle_t)dringp;
5252 
5253 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5254 
5255 	return (0);
5256 }
5257 
5258 
5259 /*
5260  * Destroy a descriptor ring.
5261  */
5262 int
5263 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5264 {
5265 	ldc_dring_t *dringp;
5266 	ldc_dring_t *tmp_dringp;
5267 
5268 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5269 
5270 	if (dhandle == NULL) {
5271 		DWARN(DBG_ALL_LDCS,
5272 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5273 		return (EINVAL);
5274 	}
5275 	dringp = (ldc_dring_t *)dhandle;
5276 
5277 	if (dringp->status == LDC_BOUND) {
5278 		DWARN(DBG_ALL_LDCS,
5279 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5280 		return (EACCES);
5281 	}
5282 
5283 	mutex_enter(&dringp->lock);
5284 	mutex_enter(&ldcssp->lock);
5285 
5286 	/* remove from linked list - if not bound */
5287 	tmp_dringp = ldcssp->dring_list;
5288 	if (tmp_dringp == dringp) {
5289 		ldcssp->dring_list = dringp->next;
5290 		dringp->next = NULL;
5291 
5292 	} else {
5293 		while (tmp_dringp != NULL) {
5294 			if (tmp_dringp->next == dringp) {
5295 				tmp_dringp->next = dringp->next;
5296 				dringp->next = NULL;
5297 				break;
5298 			}
5299 			tmp_dringp = tmp_dringp->next;
5300 		}
5301 		if (tmp_dringp == NULL) {
5302 			DWARN(DBG_ALL_LDCS,
5303 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5304 			mutex_exit(&ldcssp->lock);
5305 			mutex_exit(&dringp->lock);
5306 			return (EINVAL);
5307 		}
5308 	}
5309 
5310 	mutex_exit(&ldcssp->lock);
5311 
5312 	/* free the descriptor ring */
5313 	contig_mem_free((caddr_t)dringp->base, dringp->size);
5314 
5315 	mutex_exit(&dringp->lock);
5316 
5317 	/* destroy dring lock */
5318 	mutex_destroy(&dringp->lock);
5319 
5320 	/* free desc ring object */
5321 	kmem_free(dringp, sizeof (ldc_dring_t));
5322 
5323 	return (0);
5324 }
5325 
5326 /*
5327  * Bind a previously allocated dring to a channel. The channel should
5328  * be OPEN in order to bind the ring to the channel. Returns back a
5329  * descriptor ring cookie. The descriptor ring is exported for remote
5330  * access by the client at the other end of the channel. An entry for
5331  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5332  */
5333 int
5334 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5335     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5336 {
5337 	int		err;
5338 	ldc_chan_t 	*ldcp;
5339 	ldc_dring_t	*dringp;
5340 	ldc_mem_handle_t mhandle;
5341 
5342 	/* check to see if channel is initalized */
5343 	if (handle == NULL) {
5344 		DWARN(DBG_ALL_LDCS,
5345 		    "ldc_mem_dring_bind: invalid channel handle\n");
5346 		return (EINVAL);
5347 	}
5348 	ldcp = (ldc_chan_t *)handle;
5349 
5350 	if (dhandle == NULL) {
5351 		DWARN(DBG_ALL_LDCS,
5352 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5353 		return (EINVAL);
5354 	}
5355 	dringp = (ldc_dring_t *)dhandle;
5356 
5357 	if (cookie == NULL) {
5358 		DWARN(ldcp->id,
5359 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5360 		return (EINVAL);
5361 	}
5362 
5363 	mutex_enter(&dringp->lock);
5364 
5365 	if (dringp->status == LDC_BOUND) {
5366 		DWARN(DBG_ALL_LDCS,
5367 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5368 		    ldcp->id);
5369 		mutex_exit(&dringp->lock);
5370 		return (EINVAL);
5371 	}
5372 
5373 	if ((perm & LDC_MEM_RW) == 0) {
5374 		DWARN(DBG_ALL_LDCS,
5375 		    "ldc_mem_dring_bind: invalid permissions\n");
5376 		mutex_exit(&dringp->lock);
5377 		return (EINVAL);
5378 	}
5379 
5380 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5381 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5382 		mutex_exit(&dringp->lock);
5383 		return (EINVAL);
5384 	}
5385 
5386 	dringp->ldcp = ldcp;
5387 
5388 	/* create an memory handle */
5389 	err = ldc_mem_alloc_handle(handle, &mhandle);
5390 	if (err || mhandle == NULL) {
5391 		DWARN(DBG_ALL_LDCS,
5392 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5393 		    ldcp->id);
5394 		mutex_exit(&dringp->lock);
5395 		return (err);
5396 	}
5397 	dringp->mhdl = mhandle;
5398 
5399 	/* bind the descriptor ring to channel */
5400 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5401 	    mtype, perm, cookie, ccount);
5402 	if (err) {
5403 		DWARN(ldcp->id,
5404 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5405 		    ldcp->id);
5406 		mutex_exit(&dringp->lock);
5407 		return (err);
5408 	}
5409 
5410 	/*
5411 	 * For now return error if we get more than one cookie
5412 	 * FUTURE: Return multiple cookies ..
5413 	 */
5414 	if (*ccount > 1) {
5415 		(void) ldc_mem_unbind_handle(mhandle);
5416 		(void) ldc_mem_free_handle(mhandle);
5417 
5418 		dringp->ldcp = NULL;
5419 		dringp->mhdl = NULL;
5420 		*ccount = 0;
5421 
5422 		mutex_exit(&dringp->lock);
5423 		return (EAGAIN);
5424 	}
5425 
5426 	/* Add descriptor ring to channel's exported dring list */
5427 	mutex_enter(&ldcp->exp_dlist_lock);
5428 	dringp->ch_next = ldcp->exp_dring_list;
5429 	ldcp->exp_dring_list = dringp;
5430 	mutex_exit(&ldcp->exp_dlist_lock);
5431 
5432 	dringp->status = LDC_BOUND;
5433 
5434 	mutex_exit(&dringp->lock);
5435 
5436 	return (0);
5437 }
5438 
5439 /*
5440  * Return the next cookie associated with the specified dring handle
5441  */
5442 int
5443 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5444 {
5445 	int		rv = 0;
5446 	ldc_dring_t 	*dringp;
5447 	ldc_chan_t	*ldcp;
5448 
5449 	if (dhandle == NULL) {
5450 		DWARN(DBG_ALL_LDCS,
5451 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5452 		return (EINVAL);
5453 	}
5454 	dringp = (ldc_dring_t *)dhandle;
5455 	mutex_enter(&dringp->lock);
5456 
5457 	if (dringp->status != LDC_BOUND) {
5458 		DWARN(DBG_ALL_LDCS,
5459 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5460 		    "is not bound\n", dringp);
5461 		mutex_exit(&dringp->lock);
5462 		return (EINVAL);
5463 	}
5464 
5465 	ldcp = dringp->ldcp;
5466 
5467 	if (cookie == NULL) {
5468 		DWARN(ldcp->id,
5469 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5470 		    ldcp->id);
5471 		mutex_exit(&dringp->lock);
5472 		return (EINVAL);
5473 	}
5474 
5475 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5476 	mutex_exit(&dringp->lock);
5477 
5478 	return (rv);
5479 }
5480 /*
5481  * Unbind a previously bound dring from a channel.
5482  */
5483 int
5484 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5485 {
5486 	ldc_dring_t 	*dringp;
5487 	ldc_dring_t	*tmp_dringp;
5488 	ldc_chan_t	*ldcp;
5489 
5490 	if (dhandle == NULL) {
5491 		DWARN(DBG_ALL_LDCS,
5492 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5493 		return (EINVAL);
5494 	}
5495 	dringp = (ldc_dring_t *)dhandle;
5496 
5497 	mutex_enter(&dringp->lock);
5498 
5499 	if (dringp->status == LDC_UNBOUND) {
5500 		DWARN(DBG_ALL_LDCS,
5501 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5502 		    dringp);
5503 		mutex_exit(&dringp->lock);
5504 		return (EINVAL);
5505 	}
5506 	ldcp = dringp->ldcp;
5507 
5508 	mutex_enter(&ldcp->exp_dlist_lock);
5509 
5510 	tmp_dringp = ldcp->exp_dring_list;
5511 	if (tmp_dringp == dringp) {
5512 		ldcp->exp_dring_list = dringp->ch_next;
5513 		dringp->ch_next = NULL;
5514 
5515 	} else {
5516 		while (tmp_dringp != NULL) {
5517 			if (tmp_dringp->ch_next == dringp) {
5518 				tmp_dringp->ch_next = dringp->ch_next;
5519 				dringp->ch_next = NULL;
5520 				break;
5521 			}
5522 			tmp_dringp = tmp_dringp->ch_next;
5523 		}
5524 		if (tmp_dringp == NULL) {
5525 			DWARN(DBG_ALL_LDCS,
5526 			    "ldc_mem_dring_unbind: invalid descriptor\n");
5527 			mutex_exit(&ldcp->exp_dlist_lock);
5528 			mutex_exit(&dringp->lock);
5529 			return (EINVAL);
5530 		}
5531 	}
5532 
5533 	mutex_exit(&ldcp->exp_dlist_lock);
5534 
5535 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
5536 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5537 
5538 	dringp->ldcp = NULL;
5539 	dringp->mhdl = NULL;
5540 	dringp->status = LDC_UNBOUND;
5541 
5542 	mutex_exit(&dringp->lock);
5543 
5544 	return (0);
5545 }
5546 
5547 /*
5548  * Get information about the dring. The base address of the descriptor
5549  * ring along with the type and permission are returned back.
5550  */
5551 int
5552 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
5553 {
5554 	ldc_dring_t	*dringp;
5555 	int		rv;
5556 
5557 	if (dhandle == NULL) {
5558 		DWARN(DBG_ALL_LDCS,
5559 		    "ldc_mem_dring_info: invalid desc ring handle\n");
5560 		return (EINVAL);
5561 	}
5562 	dringp = (ldc_dring_t *)dhandle;
5563 
5564 	mutex_enter(&dringp->lock);
5565 
5566 	if (dringp->mhdl) {
5567 		rv = ldc_mem_info(dringp->mhdl, minfo);
5568 		if (rv) {
5569 			DWARN(DBG_ALL_LDCS,
5570 			    "ldc_mem_dring_info: error reading mem info\n");
5571 			mutex_exit(&dringp->lock);
5572 			return (rv);
5573 		}
5574 	} else {
5575 		minfo->vaddr = dringp->base;
5576 		minfo->raddr = NULL;
5577 		minfo->status = dringp->status;
5578 	}
5579 
5580 	mutex_exit(&dringp->lock);
5581 
5582 	return (0);
5583 }
5584 
5585 /*
5586  * Map an exported descriptor ring into the local address space. If the
5587  * descriptor ring was exported for direct map access, a HV call is made
5588  * to allocate a RA range. If the map is done via a shadow copy, local
5589  * shadow memory is allocated.
5590  */
5591 int
5592 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
5593     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
5594     ldc_dring_handle_t *dhandle)
5595 {
5596 	int		err;
5597 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
5598 	ldc_mem_handle_t mhandle;
5599 	ldc_dring_t	*dringp;
5600 	size_t		dring_size;
5601 
5602 	if (dhandle == NULL) {
5603 		DWARN(DBG_ALL_LDCS,
5604 		    "ldc_mem_dring_map: invalid dhandle\n");
5605 		return (EINVAL);
5606 	}
5607 
5608 	/* check to see if channel is initalized */
5609 	if (handle == NULL) {
5610 		DWARN(DBG_ALL_LDCS,
5611 		    "ldc_mem_dring_map: invalid channel handle\n");
5612 		return (EINVAL);
5613 	}
5614 	ldcp = (ldc_chan_t *)handle;
5615 
5616 	if (cookie == NULL) {
5617 		DWARN(ldcp->id,
5618 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
5619 		    ldcp->id);
5620 		return (EINVAL);
5621 	}
5622 
5623 	/* FUTURE: For now we support only one cookie per dring */
5624 	ASSERT(ccount == 1);
5625 
5626 	if (cookie->size < (dsize * len)) {
5627 		DWARN(ldcp->id,
5628 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
5629 		    ldcp->id);
5630 		return (EINVAL);
5631 	}
5632 
5633 	*dhandle = 0;
5634 
5635 	/* Allocate an dring structure */
5636 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5637 
5638 	D1(ldcp->id,
5639 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
5640 	    mtype, len, dsize, cookie->addr, cookie->size);
5641 
5642 	/* Initialize dring */
5643 	dringp->length = len;
5644 	dringp->dsize = dsize;
5645 
5646 	/* round of to multiple of page size */
5647 	dring_size = len * dsize;
5648 	dringp->size = (dring_size & MMU_PAGEMASK);
5649 	if (dring_size & MMU_PAGEOFFSET)
5650 		dringp->size += MMU_PAGESIZE;
5651 
5652 	dringp->ldcp = ldcp;
5653 
5654 	/* create an memory handle */
5655 	err = ldc_mem_alloc_handle(handle, &mhandle);
5656 	if (err || mhandle == NULL) {
5657 		DWARN(DBG_ALL_LDCS,
5658 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
5659 		    err);
5660 		kmem_free(dringp, sizeof (ldc_dring_t));
5661 		return (ENOMEM);
5662 	}
5663 
5664 	dringp->mhdl = mhandle;
5665 	dringp->base = NULL;
5666 
5667 	/* map the dring into local memory */
5668 	err = ldc_mem_map(mhandle, cookie, ccount, mtype,
5669 	    &(dringp->base), NULL);
5670 	if (err || dringp->base == NULL) {
5671 		cmn_err(CE_WARN,
5672 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
5673 		(void) ldc_mem_free_handle(mhandle);
5674 		kmem_free(dringp, sizeof (ldc_dring_t));
5675 		return (ENOMEM);
5676 	}
5677 
5678 	/* initialize the desc ring lock */
5679 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5680 
5681 	/* Add descriptor ring to channel's imported dring list */
5682 	mutex_enter(&ldcp->imp_dlist_lock);
5683 	dringp->ch_next = ldcp->imp_dring_list;
5684 	ldcp->imp_dring_list = dringp;
5685 	mutex_exit(&ldcp->imp_dlist_lock);
5686 
5687 	dringp->status = LDC_MAPPED;
5688 
5689 	*dhandle = (ldc_dring_handle_t)dringp;
5690 
5691 	return (0);
5692 }
5693 
5694 /*
5695  * Unmap a descriptor ring. Free shadow memory (if any).
5696  */
5697 int
5698 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
5699 {
5700 	ldc_dring_t 	*dringp;
5701 	ldc_dring_t	*tmp_dringp;
5702 	ldc_chan_t	*ldcp;
5703 
5704 	if (dhandle == NULL) {
5705 		DWARN(DBG_ALL_LDCS,
5706 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
5707 		return (EINVAL);
5708 	}
5709 	dringp = (ldc_dring_t *)dhandle;
5710 
5711 	if (dringp->status != LDC_MAPPED) {
5712 		DWARN(DBG_ALL_LDCS,
5713 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
5714 		return (EINVAL);
5715 	}
5716 
5717 	mutex_enter(&dringp->lock);
5718 
5719 	ldcp = dringp->ldcp;
5720 
5721 	mutex_enter(&ldcp->imp_dlist_lock);
5722 
5723 	/* find and unlink the desc ring from channel import list */
5724 	tmp_dringp = ldcp->imp_dring_list;
5725 	if (tmp_dringp == dringp) {
5726 		ldcp->imp_dring_list = dringp->ch_next;
5727 		dringp->ch_next = NULL;
5728 
5729 	} else {
5730 		while (tmp_dringp != NULL) {
5731 			if (tmp_dringp->ch_next == dringp) {
5732 				tmp_dringp->ch_next = dringp->ch_next;
5733 				dringp->ch_next = NULL;
5734 				break;
5735 			}
5736 			tmp_dringp = tmp_dringp->ch_next;
5737 		}
5738 		if (tmp_dringp == NULL) {
5739 			DWARN(DBG_ALL_LDCS,
5740 			    "ldc_mem_dring_unmap: invalid descriptor\n");
5741 			mutex_exit(&ldcp->imp_dlist_lock);
5742 			mutex_exit(&dringp->lock);
5743 			return (EINVAL);
5744 		}
5745 	}
5746 
5747 	mutex_exit(&ldcp->imp_dlist_lock);
5748 
5749 	/* do a LDC memory handle unmap and free */
5750 	(void) ldc_mem_unmap(dringp->mhdl);
5751 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5752 
5753 	dringp->status = 0;
5754 	dringp->ldcp = NULL;
5755 
5756 	mutex_exit(&dringp->lock);
5757 
5758 	/* destroy dring lock */
5759 	mutex_destroy(&dringp->lock);
5760 
5761 	/* free desc ring object */
5762 	kmem_free(dringp, sizeof (ldc_dring_t));
5763 
5764 	return (0);
5765 }
5766 
5767 /*
5768  * Internal entry point for descriptor ring access entry consistency
5769  * semantics. Acquire copies the contents of the remote descriptor ring
5770  * into the local shadow copy. The release operation copies the local
5771  * contents into the remote dring. The start and end locations specify
5772  * bounds for the entries being synchronized.
5773  */
5774 static int
5775 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
5776     uint8_t direction, uint64_t start, uint64_t end)
5777 {
5778 	int 			err;
5779 	ldc_dring_t		*dringp;
5780 	ldc_chan_t		*ldcp;
5781 	uint64_t		soff;
5782 	size_t			copy_size;
5783 
5784 	if (dhandle == NULL) {
5785 		DWARN(DBG_ALL_LDCS,
5786 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
5787 		return (EINVAL);
5788 	}
5789 	dringp = (ldc_dring_t *)dhandle;
5790 	mutex_enter(&dringp->lock);
5791 
5792 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
5793 		DWARN(DBG_ALL_LDCS,
5794 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
5795 		mutex_exit(&dringp->lock);
5796 		return (EINVAL);
5797 	}
5798 
5799 	if (start >= dringp->length || end >= dringp->length) {
5800 		DWARN(DBG_ALL_LDCS,
5801 		    "i_ldc_dring_acquire_release: index out of range\n");
5802 		mutex_exit(&dringp->lock);
5803 		return (EINVAL);
5804 	}
5805 
5806 	/* get the channel handle */
5807 	ldcp = dringp->ldcp;
5808 
5809 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
5810 		((dringp->length - start) * dringp->dsize);
5811 
5812 	/* Calculate the relative offset for the first desc */
5813 	soff = (start * dringp->dsize);
5814 
5815 	/* copy to/from remote from/to local memory */
5816 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
5817 	    soff, copy_size);
5818 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
5819 	    direction, soff, copy_size);
5820 	if (err) {
5821 		DWARN(ldcp->id,
5822 		    "i_ldc_dring_acquire_release: copy failed\n");
5823 		mutex_exit(&dringp->lock);
5824 		return (err);
5825 	}
5826 
5827 	/* do the balance */
5828 	if (start > end) {
5829 		copy_size = ((end + 1) * dringp->dsize);
5830 		soff = 0;
5831 
5832 		/* copy to/from remote from/to local memory */
5833 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
5834 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
5835 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
5836 		    direction, soff, copy_size);
5837 		if (err) {
5838 			DWARN(ldcp->id,
5839 			    "i_ldc_dring_acquire_release: copy failed\n");
5840 			mutex_exit(&dringp->lock);
5841 			return (err);
5842 		}
5843 	}
5844 
5845 	mutex_exit(&dringp->lock);
5846 
5847 	return (0);
5848 }
5849 
5850 /*
5851  * Ensure that the contents in the local dring are consistent
5852  * with the contents if of remote dring
5853  */
5854 int
5855 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
5856 {
5857 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
5858 }
5859 
5860 /*
5861  * Ensure that the contents in the remote dring are consistent
5862  * with the contents if of local dring
5863  */
5864 int
5865 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
5866 {
5867 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
5868 }
5869 
5870 
5871 /* ------------------------------------------------------------------------- */
5872