xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision f7209cf2d2e114162cab4b5252c05118a6b7e54a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Transport Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/note.h>
56 #include <sys/ivintr.h>
57 #include <sys/hypervisor_api.h>
58 #include <sys/ldc.h>
59 #include <sys/ldc_impl.h>
60 #include <sys/cnex.h>
61 #include <sys/hsvc.h>
62 
63 /* Core internal functions */
64 static int i_ldc_h2v_error(int h_error);
65 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
66 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp);
67 static void i_ldc_reset_state(ldc_chan_t *ldcp);
68 static void i_ldc_reset(ldc_chan_t *ldcp);
69 
70 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
71 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
72 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
73 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
74     uint8_t ctrlmsg);
75 
76 /* Interrupt handling functions */
77 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
78 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
79 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
80 
81 /* Read method functions */
82 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
83 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
84 	size_t *sizep);
85 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
86 	size_t *sizep);
87 
88 /* Write method functions */
89 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
90 	size_t *sizep);
91 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
92 	size_t *sizep);
93 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
94 	size_t *sizep);
95 
96 /* Pkt processing internal functions */
97 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
98 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
99 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
100 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
101 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
102 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
103 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
104 
105 /* Memory synchronization internal functions */
106 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
107     uint8_t direction, uint64_t offset, size_t size);
108 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
109     uint8_t direction, uint64_t start, uint64_t end);
110 
111 /* LDC Version */
112 static ldc_ver_t ldc_versions[] = { {1, 0} };
113 
114 /* number of supported versions */
115 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
116 
117 /* Module State Pointer */
118 static ldc_soft_state_t *ldcssp;
119 
120 static struct modldrv md = {
121 	&mod_miscops,			/* This is a misc module */
122 	"sun4v LDC module v%I%",	/* Name of the module */
123 };
124 
125 static struct modlinkage ml = {
126 	MODREV_1,
127 	&md,
128 	NULL
129 };
130 
131 static uint64_t ldc_sup_minor;		/* Supported minor number */
132 static hsvc_info_t ldc_hsvc = {
133 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
134 };
135 
136 static uint64_t intr_sup_minor;		/* Supported minor number */
137 static hsvc_info_t intr_hsvc = {
138 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
139 };
140 
141 /*
142  * LDC retry count and delay
143  */
144 int ldc_max_retries = LDC_MAX_RETRIES;
145 clock_t ldc_delay = LDC_DELAY;
146 
147 #ifdef DEBUG
148 
149 /*
150  * Print debug messages
151  *
152  * set ldcdbg to 0x7 for enabling all msgs
153  * 0x4 - Warnings
154  * 0x2 - All debug messages
155  * 0x1 - Minimal debug messages
156  *
157  * set ldcdbgchan to the channel number you want to debug
158  * setting it to -1 prints debug messages for all channels
159  * NOTE: ldcdbgchan has no effect on error messages
160  */
161 
162 #define	DBG_ALL_LDCS -1
163 
164 int ldcdbg = 0x0;
165 int64_t ldcdbgchan = DBG_ALL_LDCS;
166 
167 static void
168 ldcdebug(int64_t id, const char *fmt, ...)
169 {
170 	char buf[512];
171 	va_list ap;
172 
173 	/*
174 	 * Do not return if,
175 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
176 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
177 	 * debug channel = caller specified channel
178 	 */
179 	if ((id != DBG_ALL_LDCS) &&
180 	    (ldcdbgchan != DBG_ALL_LDCS) &&
181 	    (ldcdbgchan != id)) {
182 		return;
183 	}
184 
185 	va_start(ap, fmt);
186 	(void) vsprintf(buf, fmt, ap);
187 	va_end(ap);
188 
189 	cmn_err(CE_CONT, "?%s\n", buf);
190 }
191 
192 #define	D1		\
193 if (ldcdbg & 0x01)	\
194 	ldcdebug
195 
196 #define	D2		\
197 if (ldcdbg & 0x02)	\
198 	ldcdebug
199 
200 #define	DWARN		\
201 if (ldcdbg & 0x04)	\
202 	ldcdebug
203 
204 #define	DUMP_PAYLOAD(id, addr)						\
205 {									\
206 	char buf[65*3];							\
207 	int i;								\
208 	uint8_t *src = (uint8_t *)addr;					\
209 	for (i = 0; i < 64; i++, src++)					\
210 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
211 	(void) sprintf(&buf[i * 3], "|\n");				\
212 	D2((id), "payload: %s", buf);					\
213 }
214 
215 #define	DUMP_LDC_PKT(c, s, addr)					\
216 {									\
217 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
218 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
219 	if (msg->type == LDC_DATA) {                                    \
220 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
221 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
222 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
223 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
224 	    (msg->env & LDC_LEN_MASK));					\
225 	} else { 							\
226 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
227 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
228 	} 								\
229 }
230 
231 #else
232 
233 #define	DBG_ALL_LDCS -1
234 
235 #define	D1
236 #define	D2
237 #define	DWARN
238 
239 #define	DUMP_PAYLOAD(id, addr)
240 #define	DUMP_LDC_PKT(c, s, addr)
241 
242 #endif
243 
244 #define	ZERO_PKT(p)			\
245 	bzero((p), sizeof (ldc_msg_t));
246 
247 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
248 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
249 
250 
251 int
252 _init(void)
253 {
254 	int status;
255 
256 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
257 	if (status != 0) {
258 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
259 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
260 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
261 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
262 		return (-1);
263 	}
264 
265 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
266 	if (status != 0) {
267 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
268 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
269 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
270 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
271 		(void) hsvc_unregister(&ldc_hsvc);
272 		return (-1);
273 	}
274 
275 	/* allocate soft state structure */
276 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
277 
278 	/* Link the module into the system */
279 	status = mod_install(&ml);
280 	if (status != 0) {
281 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
282 		return (status);
283 	}
284 
285 	/* Initialize the LDC state structure */
286 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
287 
288 	mutex_enter(&ldcssp->lock);
289 
290 	ldcssp->channel_count = 0;
291 	ldcssp->channels_open = 0;
292 	ldcssp->chan_list = NULL;
293 	ldcssp->dring_list = NULL;
294 
295 	mutex_exit(&ldcssp->lock);
296 
297 	return (0);
298 }
299 
300 int
301 _info(struct modinfo *modinfop)
302 {
303 	/* Report status of the dynamically loadable driver module */
304 	return (mod_info(&ml, modinfop));
305 }
306 
307 int
308 _fini(void)
309 {
310 	int 		rv, status;
311 	ldc_chan_t 	*ldcp;
312 	ldc_dring_t 	*dringp;
313 	ldc_mem_info_t 	minfo;
314 
315 	/* Unlink the driver module from the system */
316 	status = mod_remove(&ml);
317 	if (status) {
318 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
319 		return (EIO);
320 	}
321 
322 	/* close and finalize channels */
323 	ldcp = ldcssp->chan_list;
324 	while (ldcp != NULL) {
325 		(void) ldc_close((ldc_handle_t)ldcp);
326 		(void) ldc_fini((ldc_handle_t)ldcp);
327 
328 		ldcp = ldcp->next;
329 	}
330 
331 	/* Free descriptor rings */
332 	dringp = ldcssp->dring_list;
333 	while (dringp != NULL) {
334 		dringp = dringp->next;
335 
336 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
337 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
338 			if (minfo.status == LDC_BOUND) {
339 				(void) ldc_mem_dring_unbind(
340 						(ldc_dring_handle_t)dringp);
341 			}
342 			if (minfo.status == LDC_MAPPED) {
343 				(void) ldc_mem_dring_unmap(
344 						(ldc_dring_handle_t)dringp);
345 			}
346 		}
347 
348 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
349 	}
350 	ldcssp->dring_list = NULL;
351 
352 	/*
353 	 * We have successfully "removed" the driver.
354 	 * Destroying soft states
355 	 */
356 	mutex_destroy(&ldcssp->lock);
357 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
358 
359 	(void) hsvc_unregister(&ldc_hsvc);
360 	(void) hsvc_unregister(&intr_hsvc);
361 
362 	return (status);
363 }
364 
365 /* -------------------------------------------------------------------------- */
366 
367 /*
368  * LDC Transport Internal Functions
369  */
370 
371 /*
372  * Translate HV Errors to sun4v error codes
373  */
374 static int
375 i_ldc_h2v_error(int h_error)
376 {
377 	switch (h_error) {
378 
379 	case	H_EOK:
380 		return (0);
381 
382 	case	H_ENORADDR:
383 		return (EFAULT);
384 
385 	case	H_EBADPGSZ:
386 	case	H_EINVAL:
387 		return (EINVAL);
388 
389 	case	H_EWOULDBLOCK:
390 		return (EWOULDBLOCK);
391 
392 	case	H_ENOACCESS:
393 	case	H_ENOMAP:
394 		return (EACCES);
395 
396 	case	H_EIO:
397 	case	H_ECPUERROR:
398 		return (EIO);
399 
400 	case	H_ENOTSUPPORTED:
401 		return (ENOTSUP);
402 
403 	case 	H_ETOOMANY:
404 		return (ENOSPC);
405 
406 	case	H_ECHANNEL:
407 		return (ECHRNG);
408 	default:
409 		break;
410 	}
411 
412 	return (EIO);
413 }
414 
415 /*
416  * Reconfigure the transmit queue
417  */
418 static int
419 i_ldc_txq_reconf(ldc_chan_t *ldcp)
420 {
421 	int rv;
422 
423 	ASSERT(MUTEX_HELD(&ldcp->lock));
424 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
425 
426 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
427 	if (rv) {
428 		cmn_err(CE_WARN,
429 		    "ldc_tx_qconf: (0x%lx) cannot set qconf", ldcp->id);
430 		return (EIO);
431 	}
432 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
433 	    &(ldcp->tx_tail), &(ldcp->link_state));
434 	if (rv) {
435 		cmn_err(CE_WARN,
436 		    "ldc_tx_get_state: (0x%lx) cannot get qptrs", ldcp->id);
437 		return (EIO);
438 	}
439 	D1(ldcp->id, "ldc_tx_get_state: (0x%llx) h=0x%llx,t=0x%llx,"
440 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
441 	    ldcp->link_state);
442 
443 	return (0);
444 }
445 
446 /*
447  * Reconfigure the receive queue
448  */
449 static int
450 i_ldc_rxq_reconf(ldc_chan_t *ldcp)
451 {
452 	int rv;
453 	uint64_t rx_head, rx_tail;
454 
455 	ASSERT(MUTEX_HELD(&ldcp->lock));
456 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
457 	    &(ldcp->link_state));
458 	if (rv) {
459 		cmn_err(CE_WARN,
460 		    "ldc_rx_getstate: (0x%lx) cannot get state",
461 		    ldcp->id);
462 		return (EIO);
463 	}
464 
465 	if (rx_head != rx_tail || ldcp->tstate > TS_READY) {
466 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
467 			ldcp->rx_q_entries);
468 		if (rv) {
469 			cmn_err(CE_WARN,
470 			    "ldc_rx_qconf: (0x%lx) cannot set qconf",
471 			    ldcp->id);
472 			return (EIO);
473 		}
474 		D1(ldcp->id, "ldc_rx_qconf: (0x%llx) completed qconf",
475 		    ldcp->id);
476 	}
477 
478 	return (0);
479 }
480 
481 /*
482  * Reset LDC state structure and its contents
483  */
484 static void
485 i_ldc_reset_state(ldc_chan_t *ldcp)
486 {
487 	ASSERT(MUTEX_HELD(&ldcp->lock));
488 	ldcp->last_msg_snt = LDC_INIT_SEQID;
489 	ldcp->last_ack_rcd = 0;
490 	ldcp->last_msg_rcd = 0;
491 	ldcp->tx_ackd_head = ldcp->tx_head;
492 	ldcp->next_vidx = 0;
493 	ldcp->hstate = 0;
494 	ldcp->tstate = TS_OPEN;
495 	ldcp->status = LDC_OPEN;
496 
497 	if (ldcp->link_state == LDC_CHANNEL_UP ||
498 	    ldcp->link_state == LDC_CHANNEL_RESET) {
499 
500 		if (ldcp->mode == LDC_MODE_RAW) {
501 			ldcp->status = LDC_UP;
502 			ldcp->tstate = TS_UP;
503 		} else {
504 			ldcp->status = LDC_READY;
505 			ldcp->tstate |= TS_LINK_READY;
506 		}
507 	}
508 }
509 
510 /*
511  * Reset a LDC channel
512  */
513 static void
514 i_ldc_reset(ldc_chan_t *ldcp)
515 {
516 	D2(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
517 
518 	ASSERT(MUTEX_HELD(&ldcp->lock));
519 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
520 
521 	(void) i_ldc_txq_reconf(ldcp);
522 	(void) i_ldc_rxq_reconf(ldcp);
523 	i_ldc_reset_state(ldcp);
524 }
525 
526 /*
527  * Clear pending interrupts
528  */
529 static void
530 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
531 {
532 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
533 
534 	ASSERT(MUTEX_HELD(&ldcp->lock));
535 	if (cinfo->dip && ldcp->intr_pending) {
536 		ldcp->intr_pending = B_FALSE;
537 		(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
538 	}
539 }
540 
541 /*
542  * Set the receive queue head
543  * Resets connection and returns an error if it fails.
544  */
545 static int
546 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
547 {
548 	int 	rv;
549 	int 	retries;
550 
551 	ASSERT(MUTEX_HELD(&ldcp->lock));
552 	for (retries = 0; retries < ldc_max_retries; retries++) {
553 
554 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
555 			return (0);
556 
557 		if (rv != H_EWOULDBLOCK)
558 			break;
559 
560 		/* wait for ldc_delay usecs */
561 		drv_usecwait(ldc_delay);
562 	}
563 
564 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
565 		ldcp->id, head);
566 	mutex_enter(&ldcp->tx_lock);
567 	i_ldc_reset(ldcp);
568 	mutex_exit(&ldcp->tx_lock);
569 
570 	return (ECONNRESET);
571 }
572 
573 
574 /*
575  * Returns the tx_tail to be used for transfer
576  * Re-reads the TX queue ptrs if and only if the
577  * the cached head and tail are equal (queue is full)
578  */
579 static int
580 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
581 {
582 	int 		rv;
583 	uint64_t 	current_head, new_tail;
584 
585 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
586 	/* Read the head and tail ptrs from HV */
587 	rv = hv_ldc_tx_get_state(ldcp->id,
588 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
589 	if (rv) {
590 		cmn_err(CE_WARN,
591 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
592 		    ldcp->id);
593 		return (EIO);
594 	}
595 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
596 		DWARN(DBG_ALL_LDCS,
597 		    "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
598 		    ldcp->id);
599 		return (ECONNRESET);
600 	}
601 
602 	/* In reliable mode, check against last ACKd msg */
603 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
604 		ldcp->mode == LDC_MODE_STREAM)
605 		? ldcp->tx_ackd_head : ldcp->tx_head;
606 
607 	/* increment the tail */
608 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
609 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
610 
611 	if (new_tail == current_head) {
612 		DWARN(ldcp->id,
613 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
614 		    ldcp->id);
615 		return (EWOULDBLOCK);
616 	}
617 
618 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
619 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
620 
621 	*tail = ldcp->tx_tail;
622 	return (0);
623 }
624 
625 /*
626  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
627  * and retry ldc_max_retries times before returning an error.
628  * Returns 0, EWOULDBLOCK or EIO
629  */
630 static int
631 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
632 {
633 	int		rv, retval = EWOULDBLOCK;
634 	int 		retries;
635 
636 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
637 	for (retries = 0; retries < ldc_max_retries; retries++) {
638 
639 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
640 			retval = 0;
641 			break;
642 		}
643 		if (rv != H_EWOULDBLOCK) {
644 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
645 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
646 			retval = EIO;
647 			break;
648 		}
649 
650 		/* wait for ldc_delay usecs */
651 		drv_usecwait(ldc_delay);
652 	}
653 	return (retval);
654 }
655 
656 /*
657  * Send a LDC message
658  */
659 static int
660 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
661     uint8_t ctrlmsg)
662 {
663 	int		rv;
664 	ldc_msg_t 	*pkt;
665 	uint64_t	tx_tail;
666 	uint32_t	curr_seqid = ldcp->last_msg_snt;
667 
668 	/* Obtain Tx lock */
669 	mutex_enter(&ldcp->tx_lock);
670 
671 	/* get the current tail for the message */
672 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
673 	if (rv) {
674 		DWARN(ldcp->id,
675 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
676 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
677 		    ldcp->id, pkttype, subtype, ctrlmsg);
678 		mutex_exit(&ldcp->tx_lock);
679 		return (rv);
680 	}
681 
682 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
683 	ZERO_PKT(pkt);
684 
685 	/* Initialize the packet */
686 	pkt->type = pkttype;
687 	pkt->stype = subtype;
688 	pkt->ctrl = ctrlmsg;
689 
690 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
691 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
692 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
693 		curr_seqid++;
694 		if (ldcp->mode != LDC_MODE_RAW) {
695 			pkt->seqid = curr_seqid;
696 			pkt->ackid = ldcp->last_msg_rcd;
697 		}
698 	}
699 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
700 
701 	/* initiate the send by calling into HV and set the new tail */
702 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
703 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
704 
705 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
706 	if (rv) {
707 		DWARN(ldcp->id,
708 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
709 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
710 		    ldcp->id, pkttype, subtype, ctrlmsg);
711 		mutex_exit(&ldcp->tx_lock);
712 		return (EIO);
713 	}
714 
715 	ldcp->last_msg_snt = curr_seqid;
716 	ldcp->tx_tail = tx_tail;
717 
718 	mutex_exit(&ldcp->tx_lock);
719 	return (0);
720 }
721 
722 /*
723  * Checks if packet was received in right order
724  * in the case of a reliable transport.
725  * Returns 0 if in order, else EIO
726  */
727 static int
728 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
729 {
730 	/* No seqid checking for RAW mode */
731 	if (ldcp->mode == LDC_MODE_RAW)
732 		return (0);
733 
734 	/* No seqid checking for version, RTS, RTR message */
735 	if (msg->ctrl == LDC_VER ||
736 	    msg->ctrl == LDC_RTS ||
737 	    msg->ctrl == LDC_RTR)
738 		return (0);
739 
740 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
741 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
742 		DWARN(ldcp->id,
743 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
744 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
745 		    (ldcp->last_msg_rcd + 1));
746 		return (EIO);
747 	}
748 
749 	return (0);
750 }
751 
752 
753 /*
754  * Process an incoming version ctrl message
755  */
756 static int
757 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
758 {
759 	int 		rv = 0, idx = ldcp->next_vidx;
760 	ldc_msg_t 	*pkt;
761 	uint64_t	tx_tail;
762 	ldc_ver_t	*rcvd_ver;
763 
764 	/* get the received version */
765 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
766 
767 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
768 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
769 
770 	/* Obtain Tx lock */
771 	mutex_enter(&ldcp->tx_lock);
772 
773 	switch (msg->stype) {
774 	case LDC_INFO:
775 
776 		/* get the current tail and pkt for the response */
777 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
778 		if (rv != 0) {
779 			DWARN(ldcp->id,
780 			    "i_ldc_process_VER: (0x%llx) err sending "
781 			    "version ACK/NACK\n", ldcp->id);
782 			i_ldc_reset(ldcp);
783 			mutex_exit(&ldcp->tx_lock);
784 			return (ECONNRESET);
785 		}
786 
787 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
788 		ZERO_PKT(pkt);
789 
790 		/* initialize the packet */
791 		pkt->type = LDC_CTRL;
792 		pkt->ctrl = LDC_VER;
793 
794 		for (;;) {
795 
796 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
797 			    rcvd_ver->major, rcvd_ver->minor,
798 			    ldc_versions[idx].major, ldc_versions[idx].minor);
799 
800 			if (rcvd_ver->major == ldc_versions[idx].major) {
801 				/* major version match - ACK version */
802 				pkt->stype = LDC_ACK;
803 
804 				/*
805 				 * lower minor version to the one this endpt
806 				 * supports, if necessary
807 				 */
808 				if (rcvd_ver->minor > ldc_versions[idx].minor)
809 					rcvd_ver->minor =
810 						ldc_versions[idx].minor;
811 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
812 
813 				break;
814 			}
815 
816 			if (rcvd_ver->major > ldc_versions[idx].major) {
817 
818 				D1(ldcp->id, "i_ldc_process_VER: using next"
819 				    " lower idx=%d, v%u.%u\n", idx,
820 				    ldc_versions[idx].major,
821 				    ldc_versions[idx].minor);
822 
823 				/* nack with next lower version */
824 				pkt->stype = LDC_NACK;
825 				bcopy(&ldc_versions[idx], pkt->udata,
826 				    sizeof (ldc_versions[idx]));
827 				ldcp->next_vidx = idx;
828 				break;
829 			}
830 
831 			/* next major version */
832 			idx++;
833 
834 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
835 
836 			if (idx == LDC_NUM_VERS) {
837 				/* no version match - send NACK */
838 				pkt->stype = LDC_NACK;
839 				bzero(pkt->udata, sizeof (ldc_ver_t));
840 				ldcp->next_vidx = 0;
841 				break;
842 			}
843 		}
844 
845 		/* initiate the send by calling into HV and set the new tail */
846 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
847 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
848 
849 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
850 		if (rv == 0) {
851 			ldcp->tx_tail = tx_tail;
852 			if (pkt->stype == LDC_ACK) {
853 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
854 				    " version ACK\n", ldcp->id);
855 				/* Save the ACK'd version */
856 				ldcp->version.major = rcvd_ver->major;
857 				ldcp->version.minor = rcvd_ver->minor;
858 				ldcp->hstate |= TS_RCVD_VER;
859 				ldcp->tstate |= TS_VER_DONE;
860 				DWARN(DBG_ALL_LDCS,
861 				    "(0x%llx) Agreed on version v%u.%u\n",
862 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
863 			}
864 		} else {
865 			DWARN(ldcp->id,
866 			    "i_ldc_process_VER: (0x%llx) error sending "
867 			    "ACK/NACK\n", ldcp->id);
868 			i_ldc_reset(ldcp);
869 			mutex_exit(&ldcp->tx_lock);
870 			return (ECONNRESET);
871 		}
872 
873 		break;
874 
875 	case LDC_ACK:
876 		/* SUCCESS - we have agreed on a version */
877 		ldcp->version.major = rcvd_ver->major;
878 		ldcp->version.minor = rcvd_ver->minor;
879 		ldcp->tstate |= TS_VER_DONE;
880 
881 		D1(DBG_ALL_LDCS, "(0x%llx) Agreed on version v%u.%u\n",
882 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
883 
884 		/* initiate RTS-RTR-RDX handshake */
885 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
886 		if (rv) {
887 			DWARN(ldcp->id,
888 			    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
889 			    ldcp->id);
890 			i_ldc_reset(ldcp);
891 			mutex_exit(&ldcp->tx_lock);
892 			return (ECONNRESET);
893 		}
894 
895 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
896 		ZERO_PKT(pkt);
897 
898 		pkt->type = LDC_CTRL;
899 		pkt->stype = LDC_INFO;
900 		pkt->ctrl = LDC_RTS;
901 		pkt->env = ldcp->mode;
902 		if (ldcp->mode != LDC_MODE_RAW)
903 			pkt->seqid = LDC_INIT_SEQID;
904 
905 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
906 
907 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
908 
909 		/* initiate the send by calling into HV and set the new tail */
910 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
911 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
912 
913 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
914 		if (rv) {
915 			D2(ldcp->id,
916 			    "i_ldc_process_VER: (0x%llx) no listener\n",
917 			    ldcp->id);
918 			i_ldc_reset(ldcp);
919 			mutex_exit(&ldcp->tx_lock);
920 			return (ECONNRESET);
921 		}
922 
923 		ldcp->tx_tail = tx_tail;
924 		ldcp->hstate |= TS_SENT_RTS;
925 
926 		break;
927 
928 	case LDC_NACK:
929 		/* check if version in NACK is zero */
930 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
931 			/* version handshake failure */
932 			DWARN(DBG_ALL_LDCS,
933 			    "i_ldc_process_VER: (0x%llx) no version match\n",
934 			    ldcp->id);
935 			i_ldc_reset(ldcp);
936 			mutex_exit(&ldcp->tx_lock);
937 			return (ECONNRESET);
938 		}
939 
940 		/* get the current tail and pkt for the response */
941 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
942 		if (rv != 0) {
943 			cmn_err(CE_NOTE,
944 			    "i_ldc_process_VER: (0x%lx) err sending "
945 			    "version ACK/NACK\n", ldcp->id);
946 			i_ldc_reset(ldcp);
947 			mutex_exit(&ldcp->tx_lock);
948 			return (ECONNRESET);
949 		}
950 
951 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
952 		ZERO_PKT(pkt);
953 
954 		/* initialize the packet */
955 		pkt->type = LDC_CTRL;
956 		pkt->ctrl = LDC_VER;
957 		pkt->stype = LDC_INFO;
958 
959 		/* check ver in NACK msg has a match */
960 		for (;;) {
961 			if (rcvd_ver->major == ldc_versions[idx].major) {
962 				/*
963 				 * major version match - resubmit request
964 				 * if lower minor version to the one this endpt
965 				 * supports, if necessary
966 				 */
967 				if (rcvd_ver->minor > ldc_versions[idx].minor)
968 					rcvd_ver->minor =
969 						ldc_versions[idx].minor;
970 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
971 				break;
972 
973 			}
974 
975 			if (rcvd_ver->major > ldc_versions[idx].major) {
976 
977 				D1(ldcp->id, "i_ldc_process_VER: using next"
978 				    " lower idx=%d, v%u.%u\n", idx,
979 				    ldc_versions[idx].major,
980 				    ldc_versions[idx].minor);
981 
982 				/* send next lower version */
983 				bcopy(&ldc_versions[idx], pkt->udata,
984 				    sizeof (ldc_versions[idx]));
985 				ldcp->next_vidx = idx;
986 				break;
987 			}
988 
989 			/* next version */
990 			idx++;
991 
992 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
993 
994 			if (idx == LDC_NUM_VERS) {
995 				/* no version match - terminate */
996 				ldcp->next_vidx = 0;
997 				mutex_exit(&ldcp->tx_lock);
998 				return (ECONNRESET);
999 			}
1000 		}
1001 
1002 		/* initiate the send by calling into HV and set the new tail */
1003 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1004 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1005 
1006 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1007 		if (rv == 0) {
1008 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
1009 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
1010 			    ldc_versions[idx].minor);
1011 			ldcp->tx_tail = tx_tail;
1012 		} else {
1013 			cmn_err(CE_NOTE,
1014 			    "i_ldc_process_VER: (0x%lx) error sending version"
1015 			    "INFO\n", ldcp->id);
1016 			i_ldc_reset(ldcp);
1017 			mutex_exit(&ldcp->tx_lock);
1018 			return (ECONNRESET);
1019 		}
1020 
1021 		break;
1022 	}
1023 
1024 	mutex_exit(&ldcp->tx_lock);
1025 	return (rv);
1026 }
1027 
1028 
1029 /*
1030  * Process an incoming RTS ctrl message
1031  */
1032 static int
1033 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1034 {
1035 	int 		rv = 0;
1036 	ldc_msg_t 	*pkt;
1037 	uint64_t	tx_tail;
1038 	boolean_t	sent_NACK = B_FALSE;
1039 
1040 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1041 
1042 	switch (msg->stype) {
1043 	case LDC_NACK:
1044 		DWARN(ldcp->id,
1045 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1046 		    ldcp->id);
1047 
1048 		/* Reset the channel -- as we cannot continue */
1049 		mutex_enter(&ldcp->tx_lock);
1050 		i_ldc_reset(ldcp);
1051 		mutex_exit(&ldcp->tx_lock);
1052 		rv = ECONNRESET;
1053 		break;
1054 
1055 	case LDC_INFO:
1056 
1057 		/* check mode */
1058 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1059 			cmn_err(CE_NOTE,
1060 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1061 			    ldcp->id);
1062 			/*
1063 			 * send NACK in response to MODE message
1064 			 * get the current tail for the response
1065 			 */
1066 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1067 			if (rv) {
1068 				/* if cannot send NACK - reset channel */
1069 				mutex_enter(&ldcp->tx_lock);
1070 				i_ldc_reset(ldcp);
1071 				mutex_exit(&ldcp->tx_lock);
1072 				rv = ECONNRESET;
1073 				break;
1074 			}
1075 			sent_NACK = B_TRUE;
1076 		}
1077 		break;
1078 	default:
1079 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1080 		    ldcp->id);
1081 		mutex_enter(&ldcp->tx_lock);
1082 		i_ldc_reset(ldcp);
1083 		mutex_exit(&ldcp->tx_lock);
1084 		rv = ECONNRESET;
1085 		break;
1086 	}
1087 
1088 	/*
1089 	 * If either the connection was reset (when rv != 0) or
1090 	 * a NACK was sent, we return. In the case of a NACK
1091 	 * we dont want to consume the packet that came in but
1092 	 * not record that we received the RTS
1093 	 */
1094 	if (rv || sent_NACK)
1095 		return (rv);
1096 
1097 	/* record RTS received */
1098 	ldcp->hstate |= TS_RCVD_RTS;
1099 
1100 	/* store initial SEQID info */
1101 	ldcp->last_msg_snt = msg->seqid;
1102 
1103 	/* Obtain Tx lock */
1104 	mutex_enter(&ldcp->tx_lock);
1105 
1106 	/* get the current tail for the response */
1107 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1108 	if (rv != 0) {
1109 		cmn_err(CE_NOTE,
1110 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1111 		    ldcp->id);
1112 		i_ldc_reset(ldcp);
1113 		mutex_exit(&ldcp->tx_lock);
1114 		return (ECONNRESET);
1115 	}
1116 
1117 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1118 	ZERO_PKT(pkt);
1119 
1120 	/* initialize the packet */
1121 	pkt->type = LDC_CTRL;
1122 	pkt->stype = LDC_INFO;
1123 	pkt->ctrl = LDC_RTR;
1124 	pkt->env = ldcp->mode;
1125 	if (ldcp->mode != LDC_MODE_RAW)
1126 		pkt->seqid = LDC_INIT_SEQID;
1127 
1128 	ldcp->last_msg_rcd = msg->seqid;
1129 
1130 	/* initiate the send by calling into HV and set the new tail */
1131 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1132 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1133 
1134 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1135 	if (rv == 0) {
1136 		D2(ldcp->id,
1137 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1138 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1139 
1140 		ldcp->tx_tail = tx_tail;
1141 		ldcp->hstate |= TS_SENT_RTR;
1142 
1143 	} else {
1144 		cmn_err(CE_NOTE,
1145 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1146 		    ldcp->id);
1147 		i_ldc_reset(ldcp);
1148 		mutex_exit(&ldcp->tx_lock);
1149 		return (ECONNRESET);
1150 	}
1151 
1152 	mutex_exit(&ldcp->tx_lock);
1153 	return (0);
1154 }
1155 
1156 /*
1157  * Process an incoming RTR ctrl message
1158  */
1159 static int
1160 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1161 {
1162 	int 		rv = 0;
1163 	boolean_t	sent_NACK = B_FALSE;
1164 
1165 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1166 
1167 	switch (msg->stype) {
1168 	case LDC_NACK:
1169 		/* RTR NACK received */
1170 		DWARN(ldcp->id,
1171 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1172 		    ldcp->id);
1173 
1174 		/* Reset the channel -- as we cannot continue */
1175 		mutex_enter(&ldcp->tx_lock);
1176 		i_ldc_reset(ldcp);
1177 		mutex_exit(&ldcp->tx_lock);
1178 		rv = ECONNRESET;
1179 
1180 		break;
1181 
1182 	case LDC_INFO:
1183 
1184 		/* check mode */
1185 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1186 			DWARN(ldcp->id,
1187 			    "i_ldc_process_RTR: (0x%llx) mode mismatch\n",
1188 			    ldcp->id);
1189 			/*
1190 			 * send NACK in response to MODE message
1191 			 * get the current tail for the response
1192 			 */
1193 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1194 			if (rv) {
1195 				/* if cannot send NACK - reset channel */
1196 				mutex_enter(&ldcp->tx_lock);
1197 				i_ldc_reset(ldcp);
1198 				mutex_exit(&ldcp->tx_lock);
1199 				rv = ECONNRESET;
1200 				break;
1201 			}
1202 			sent_NACK = B_TRUE;
1203 		}
1204 		break;
1205 
1206 	default:
1207 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1208 		    ldcp->id);
1209 
1210 		/* Reset the channel -- as we cannot continue */
1211 		mutex_enter(&ldcp->tx_lock);
1212 		i_ldc_reset(ldcp);
1213 		mutex_exit(&ldcp->tx_lock);
1214 		rv = ECONNRESET;
1215 		break;
1216 	}
1217 
1218 	/*
1219 	 * If either the connection was reset (when rv != 0) or
1220 	 * a NACK was sent, we return. In the case of a NACK
1221 	 * we dont want to consume the packet that came in but
1222 	 * not record that we received the RTR
1223 	 */
1224 	if (rv || sent_NACK)
1225 		return (rv);
1226 
1227 	ldcp->last_msg_snt = msg->seqid;
1228 	ldcp->hstate |= TS_RCVD_RTR;
1229 
1230 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1231 	if (rv) {
1232 		cmn_err(CE_NOTE,
1233 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1234 		    ldcp->id);
1235 		mutex_enter(&ldcp->tx_lock);
1236 		i_ldc_reset(ldcp);
1237 		mutex_exit(&ldcp->tx_lock);
1238 		return (ECONNRESET);
1239 	}
1240 	D2(ldcp->id,
1241 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1242 
1243 	ldcp->hstate |= TS_SENT_RDX;
1244 	ldcp->tstate |= TS_HSHAKE_DONE;
1245 	ldcp->status = LDC_UP;
1246 
1247 	DWARN(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1248 
1249 	return (0);
1250 }
1251 
1252 
1253 /*
1254  * Process an incoming RDX ctrl message
1255  */
1256 static int
1257 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1258 {
1259 	int	rv = 0;
1260 
1261 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1262 
1263 	switch (msg->stype) {
1264 	case LDC_NACK:
1265 		/* RDX NACK received */
1266 		DWARN(ldcp->id,
1267 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1268 		    ldcp->id);
1269 
1270 		/* Reset the channel -- as we cannot continue */
1271 		mutex_enter(&ldcp->tx_lock);
1272 		i_ldc_reset(ldcp);
1273 		mutex_exit(&ldcp->tx_lock);
1274 		rv = ECONNRESET;
1275 
1276 		break;
1277 
1278 	case LDC_INFO:
1279 
1280 		/*
1281 		 * if channel is UP and a RDX received after data transmission
1282 		 * has commenced it is an error
1283 		 */
1284 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1285 			DWARN(DBG_ALL_LDCS,
1286 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1287 			    " - LDC reset\n", ldcp->id);
1288 			mutex_enter(&ldcp->tx_lock);
1289 			i_ldc_reset(ldcp);
1290 			mutex_exit(&ldcp->tx_lock);
1291 			return (ECONNRESET);
1292 		}
1293 
1294 		ldcp->hstate |= TS_RCVD_RDX;
1295 		ldcp->tstate |= TS_HSHAKE_DONE;
1296 		ldcp->status = LDC_UP;
1297 
1298 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1299 		break;
1300 
1301 	default:
1302 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1303 		    ldcp->id);
1304 
1305 		/* Reset the channel -- as we cannot continue */
1306 		mutex_enter(&ldcp->tx_lock);
1307 		i_ldc_reset(ldcp);
1308 		mutex_exit(&ldcp->tx_lock);
1309 		rv = ECONNRESET;
1310 		break;
1311 	}
1312 
1313 	return (rv);
1314 }
1315 
1316 /*
1317  * Process an incoming ACK for a data packet
1318  */
1319 static int
1320 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1321 {
1322 	int		rv;
1323 	uint64_t 	tx_head;
1324 	ldc_msg_t	*pkt;
1325 
1326 	/* Obtain Tx lock */
1327 	mutex_enter(&ldcp->tx_lock);
1328 
1329 	/*
1330 	 * Read the current Tx head and tail
1331 	 */
1332 	rv = hv_ldc_tx_get_state(ldcp->id,
1333 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1334 	if (rv != 0) {
1335 		cmn_err(CE_WARN,
1336 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1337 		    ldcp->id);
1338 
1339 		/* Reset the channel -- as we cannot continue */
1340 		i_ldc_reset(ldcp);
1341 		mutex_exit(&ldcp->tx_lock);
1342 		return (ECONNRESET);
1343 	}
1344 
1345 	/*
1346 	 * loop from where the previous ACK location was to the
1347 	 * current head location. This is how far the HV has
1348 	 * actually send pkts. Pkts between head and tail are
1349 	 * yet to be sent by HV.
1350 	 */
1351 	tx_head = ldcp->tx_ackd_head;
1352 	for (;;) {
1353 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1354 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1355 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1356 
1357 		if (pkt->seqid == msg->ackid) {
1358 			D2(ldcp->id,
1359 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1360 			    ldcp->id);
1361 			ldcp->last_ack_rcd = msg->ackid;
1362 			ldcp->tx_ackd_head = tx_head;
1363 			break;
1364 		}
1365 		if (tx_head == ldcp->tx_head) {
1366 			/* could not find packet */
1367 			DWARN(ldcp->id,
1368 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1369 			    ldcp->id);
1370 
1371 			/* Reset the channel -- as we cannot continue */
1372 			i_ldc_reset(ldcp);
1373 			mutex_exit(&ldcp->tx_lock);
1374 			return (ECONNRESET);
1375 		}
1376 	}
1377 
1378 	mutex_exit(&ldcp->tx_lock);
1379 	return (0);
1380 }
1381 
1382 /*
1383  * Process incoming control message
1384  * Return 0 - session can continue
1385  *        EAGAIN - reprocess packet - state was changed
1386  *	  ECONNRESET - channel was reset
1387  */
1388 static int
1389 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1390 {
1391 	int 		rv = 0;
1392 
1393 	switch (ldcp->tstate) {
1394 
1395 	case TS_OPEN:
1396 	case TS_READY:
1397 
1398 		switch (msg->ctrl & LDC_CTRL_MASK) {
1399 		case LDC_VER:
1400 			/* process version message */
1401 			rv = i_ldc_process_VER(ldcp, msg);
1402 			break;
1403 		default:
1404 			DWARN(ldcp->id,
1405 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1406 			    "tstate=0x%x\n", ldcp->id,
1407 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1408 			break;
1409 		}
1410 
1411 		break;
1412 
1413 	case TS_VREADY:
1414 
1415 		switch (msg->ctrl & LDC_CTRL_MASK) {
1416 		case LDC_VER:
1417 			/* peer is redoing version negotiation */
1418 			mutex_enter(&ldcp->tx_lock);
1419 			(void) i_ldc_txq_reconf(ldcp);
1420 			i_ldc_reset_state(ldcp);
1421 			mutex_exit(&ldcp->tx_lock);
1422 			rv = EAGAIN;
1423 			break;
1424 		case LDC_RTS:
1425 			/* process RTS message */
1426 			rv = i_ldc_process_RTS(ldcp, msg);
1427 			break;
1428 		case LDC_RTR:
1429 			/* process RTR message */
1430 			rv = i_ldc_process_RTR(ldcp, msg);
1431 			break;
1432 		case LDC_RDX:
1433 			/* process RDX message */
1434 			rv = i_ldc_process_RDX(ldcp, msg);
1435 			break;
1436 		default:
1437 			DWARN(ldcp->id,
1438 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1439 			    "tstate=0x%x\n", ldcp->id,
1440 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1441 			break;
1442 		}
1443 
1444 		break;
1445 
1446 	case TS_UP:
1447 
1448 		switch (msg->ctrl & LDC_CTRL_MASK) {
1449 		case LDC_VER:
1450 			DWARN(ldcp->id,
1451 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1452 			    "- LDC reset\n", ldcp->id);
1453 			/* peer is redoing version negotiation */
1454 			mutex_enter(&ldcp->tx_lock);
1455 			(void) i_ldc_txq_reconf(ldcp);
1456 			i_ldc_reset_state(ldcp);
1457 			mutex_exit(&ldcp->tx_lock);
1458 			rv = EAGAIN;
1459 			break;
1460 
1461 		case LDC_RDX:
1462 			/* process RDX message */
1463 			rv = i_ldc_process_RDX(ldcp, msg);
1464 			break;
1465 
1466 		default:
1467 			DWARN(ldcp->id,
1468 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1469 			    "tstate=0x%x\n", ldcp->id,
1470 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1471 			break;
1472 		}
1473 	}
1474 
1475 	return (rv);
1476 }
1477 
1478 /*
1479  * Register channel with the channel nexus
1480  */
1481 static int
1482 i_ldc_register_channel(ldc_chan_t *ldcp)
1483 {
1484 	int		rv = 0;
1485 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1486 
1487 	if (cinfo->dip == NULL) {
1488 		DWARN(ldcp->id,
1489 		    "i_ldc_register_channel: cnex has not registered\n");
1490 		return (EAGAIN);
1491 	}
1492 
1493 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1494 	if (rv) {
1495 		DWARN(ldcp->id,
1496 		    "i_ldc_register_channel: cannot register channel\n");
1497 		return (rv);
1498 	}
1499 
1500 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1501 	    i_ldc_tx_hdlr, ldcp, NULL);
1502 	if (rv) {
1503 		DWARN(ldcp->id,
1504 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1505 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1506 		return (rv);
1507 	}
1508 
1509 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1510 	    i_ldc_rx_hdlr, ldcp, NULL);
1511 	if (rv) {
1512 		DWARN(ldcp->id,
1513 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1514 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1515 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1516 		return (rv);
1517 	}
1518 
1519 	ldcp->tstate |= TS_CNEX_RDY;
1520 
1521 	return (0);
1522 }
1523 
1524 /*
1525  * Unregister a channel with the channel nexus
1526  */
1527 static int
1528 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1529 {
1530 	int		rv = 0;
1531 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1532 
1533 	if (cinfo->dip == NULL) {
1534 		DWARN(ldcp->id,
1535 		    "i_ldc_unregister_channel: cnex has not registered\n");
1536 		return (EAGAIN);
1537 	}
1538 
1539 	if (ldcp->tstate & TS_CNEX_RDY) {
1540 
1541 		/* Remove the Rx interrupt */
1542 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1543 		if (rv) {
1544 			DWARN(ldcp->id,
1545 			    "i_ldc_unregister_channel: err removing Rx intr\n");
1546 			return (rv);
1547 		}
1548 
1549 		/* Remove the Tx interrupt */
1550 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1551 		if (rv) {
1552 			DWARN(ldcp->id,
1553 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1554 			return (rv);
1555 		}
1556 
1557 		/* Unregister the channel */
1558 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1559 		if (rv) {
1560 			DWARN(ldcp->id,
1561 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1562 			return (rv);
1563 		}
1564 
1565 		ldcp->tstate &= ~TS_CNEX_RDY;
1566 	}
1567 
1568 	return (0);
1569 }
1570 
1571 
1572 /*
1573  * LDC transmit interrupt handler
1574  *    triggered for chanel up/down/reset events
1575  *    and Tx queue content changes
1576  */
1577 static uint_t
1578 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1579 {
1580 	_NOTE(ARGUNUSED(arg2))
1581 
1582 	int 		rv;
1583 	ldc_chan_t 	*ldcp;
1584 	boolean_t 	notify_client = B_FALSE;
1585 	uint64_t	notify_event = 0;
1586 
1587 	/* Get the channel for which interrupt was received */
1588 	ASSERT(arg1 != NULL);
1589 	ldcp = (ldc_chan_t *)arg1;
1590 
1591 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1592 	    ldcp->id, ldcp);
1593 
1594 	/* Lock channel */
1595 	mutex_enter(&ldcp->lock);
1596 
1597 	/* Obtain Tx lock */
1598 	mutex_enter(&ldcp->tx_lock);
1599 
1600 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1601 	    &ldcp->link_state);
1602 	if (rv) {
1603 		cmn_err(CE_WARN,
1604 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1605 		    ldcp->id, rv);
1606 		mutex_exit(&ldcp->tx_lock);
1607 		mutex_exit(&ldcp->lock);
1608 		return (DDI_INTR_CLAIMED);
1609 	}
1610 
1611 	/*
1612 	 * reset the channel state if the channel went down
1613 	 * (other side unconfigured queue) or channel was reset
1614 	 * (other side reconfigured its queue)
1615 	 */
1616 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1617 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1618 		i_ldc_reset(ldcp);
1619 		notify_client = B_TRUE;
1620 		notify_event = LDC_EVT_DOWN;
1621 	}
1622 
1623 	if (ldcp->link_state == LDC_CHANNEL_RESET) {
1624 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1625 		i_ldc_reset(ldcp);
1626 		notify_client = B_TRUE;
1627 		notify_event = LDC_EVT_RESET;
1628 	}
1629 
1630 	if (ldcp->tstate == TS_OPEN && ldcp->link_state == LDC_CHANNEL_UP) {
1631 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1632 		notify_client = B_TRUE;
1633 		notify_event = LDC_EVT_RESET;
1634 		ldcp->tstate |= TS_LINK_READY;
1635 		ldcp->status = LDC_READY;
1636 	}
1637 
1638 	/* if callbacks are disabled, do not notify */
1639 	if (!ldcp->cb_enabled)
1640 		notify_client = B_FALSE;
1641 
1642 	if (notify_client)
1643 		ldcp->cb_inprogress = B_TRUE;
1644 
1645 	/* Unlock channel */
1646 	mutex_exit(&ldcp->tx_lock);
1647 	mutex_exit(&ldcp->lock);
1648 
1649 	if (notify_client) {
1650 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1651 		if (rv) {
1652 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1653 			    "failure", ldcp->id);
1654 		}
1655 		mutex_enter(&ldcp->lock);
1656 		ldcp->cb_inprogress = B_FALSE;
1657 		mutex_exit(&ldcp->lock);
1658 	}
1659 
1660 	mutex_enter(&ldcp->lock);
1661 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1662 	mutex_exit(&ldcp->lock);
1663 
1664 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1665 
1666 	return (DDI_INTR_CLAIMED);
1667 }
1668 
1669 /*
1670  * LDC receive interrupt handler
1671  *    triggered for channel with data pending to read
1672  *    i.e. Rx queue content changes
1673  */
1674 static uint_t
1675 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1676 {
1677 	_NOTE(ARGUNUSED(arg2))
1678 
1679 	int		rv;
1680 	uint64_t 	rx_head, rx_tail;
1681 	ldc_msg_t 	*msg;
1682 	ldc_chan_t 	*ldcp;
1683 	boolean_t 	notify_client = B_FALSE;
1684 	uint64_t	notify_event = 0;
1685 	uint64_t	first_fragment = 0;
1686 
1687 	/* Get the channel for which interrupt was received */
1688 	if (arg1 == NULL) {
1689 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1690 		return (DDI_INTR_UNCLAIMED);
1691 	}
1692 
1693 	ldcp = (ldc_chan_t *)arg1;
1694 
1695 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1696 	    ldcp->id, ldcp);
1697 
1698 	/* Lock channel */
1699 	mutex_enter(&ldcp->lock);
1700 
1701 	/* mark interrupt as pending */
1702 	ldcp->intr_pending = B_TRUE;
1703 
1704 	/*
1705 	 * Read packet(s) from the queue
1706 	 */
1707 	for (;;) {
1708 
1709 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1710 		    &ldcp->link_state);
1711 		if (rv) {
1712 			cmn_err(CE_WARN,
1713 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1714 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1715 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1716 			mutex_exit(&ldcp->lock);
1717 			return (DDI_INTR_CLAIMED);
1718 		}
1719 
1720 		/*
1721 		 * reset the channel state if the channel went down
1722 		 * (other side unconfigured queue) or channel was reset
1723 		 * (other side reconfigured its queue
1724 		 */
1725 		if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1726 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link down\n",
1727 			    ldcp->id);
1728 			mutex_enter(&ldcp->tx_lock);
1729 			i_ldc_reset(ldcp);
1730 			mutex_exit(&ldcp->tx_lock);
1731 			notify_client = B_TRUE;
1732 			notify_event = LDC_EVT_DOWN;
1733 			break;
1734 		}
1735 		if (ldcp->link_state == LDC_CHANNEL_RESET) {
1736 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link reset\n",
1737 			    ldcp->id);
1738 			mutex_enter(&ldcp->tx_lock);
1739 			i_ldc_reset(ldcp);
1740 			mutex_exit(&ldcp->tx_lock);
1741 			notify_client = B_TRUE;
1742 			notify_event = LDC_EVT_RESET;
1743 		}
1744 
1745 		if (ldcp->tstate == TS_OPEN &&
1746 		    ldcp->link_state == LDC_CHANNEL_UP) {
1747 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link up\n",
1748 			    ldcp->id);
1749 			notify_client = B_TRUE;
1750 			notify_event = LDC_EVT_RESET;
1751 			ldcp->tstate |= TS_LINK_READY;
1752 			ldcp->status = LDC_READY;
1753 		}
1754 
1755 		if (rx_head == rx_tail) {
1756 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1757 			    ldcp->id);
1758 			break;
1759 		}
1760 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1761 		    rx_head, rx_tail);
1762 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1763 		    ldcp->rx_q_va + rx_head);
1764 
1765 		/* get the message */
1766 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1767 
1768 		/* if channel is in RAW mode or data pkt, notify and return */
1769 		if (ldcp->mode == LDC_MODE_RAW) {
1770 			notify_client = B_TRUE;
1771 			notify_event |= LDC_EVT_READ;
1772 			break;
1773 		}
1774 
1775 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1776 
1777 			/* discard packet if channel is not up */
1778 			if (ldcp->tstate != TS_UP) {
1779 
1780 				/* move the head one position */
1781 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1782 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1783 
1784 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1785 					break;
1786 
1787 				continue;
1788 			} else {
1789 				notify_client = B_TRUE;
1790 				notify_event |= LDC_EVT_READ;
1791 				break;
1792 			}
1793 		}
1794 
1795 		/* Check the sequence ID for the message received */
1796 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
1797 
1798 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
1799 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
1800 
1801 			/* Reset last_msg_rcd to start of message */
1802 			if (first_fragment != 0) {
1803 				ldcp->last_msg_rcd = first_fragment - 1;
1804 				first_fragment = 0;
1805 			}
1806 
1807 			/*
1808 			 * Send a NACK due to seqid mismatch
1809 			 */
1810 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
1811 			    (msg->ctrl & LDC_CTRL_MASK));
1812 
1813 			if (rv) {
1814 				cmn_err(CE_NOTE,
1815 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
1816 				    "CTRL/NACK msg\n", ldcp->id);
1817 
1818 				/* if cannot send NACK - reset channel */
1819 				mutex_enter(&ldcp->tx_lock);
1820 				i_ldc_reset(ldcp);
1821 				mutex_exit(&ldcp->tx_lock);
1822 				rv = ECONNRESET;
1823 				break;
1824 			}
1825 
1826 			/* purge receive queue */
1827 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
1828 			break;
1829 		}
1830 
1831 		/* record the message ID */
1832 		ldcp->last_msg_rcd = msg->seqid;
1833 
1834 		/* process control messages */
1835 		if (msg->type & LDC_CTRL) {
1836 			/* save current internal state */
1837 			uint64_t tstate = ldcp->tstate;
1838 
1839 			rv = i_ldc_ctrlmsg(ldcp, msg);
1840 			if (rv == EAGAIN) {
1841 				/* re-process pkt - state was adjusted */
1842 				continue;
1843 			}
1844 			if (rv == ECONNRESET) {
1845 				notify_client = B_TRUE;
1846 				notify_event = LDC_EVT_RESET;
1847 				break;
1848 			}
1849 
1850 			/*
1851 			 * control message processing was successful
1852 			 * channel transitioned to ready for communication
1853 			 */
1854 			if (rv == 0 && ldcp->tstate == TS_UP &&
1855 			    tstate != ldcp->tstate) {
1856 				notify_client = B_TRUE;
1857 				notify_event = LDC_EVT_UP;
1858 			}
1859 		}
1860 
1861 		/* process data ACKs */
1862 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
1863 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
1864 				notify_client = B_TRUE;
1865 				notify_event = LDC_EVT_RESET;
1866 				break;
1867 			}
1868 		}
1869 
1870 		/* move the head one position */
1871 		rx_head = (rx_head + LDC_PACKET_SIZE) %
1872 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1873 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
1874 			notify_client = B_TRUE;
1875 			notify_event = LDC_EVT_RESET;
1876 			break;
1877 		}
1878 
1879 	} /* for */
1880 
1881 	/* if callbacks are disabled, do not notify */
1882 	if (!ldcp->cb_enabled)
1883 		notify_client = B_FALSE;
1884 
1885 	if (notify_client)
1886 		ldcp->cb_inprogress = B_TRUE;
1887 
1888 	/* Unlock channel */
1889 	mutex_exit(&ldcp->lock);
1890 
1891 	if (notify_client) {
1892 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1893 		if (rv) {
1894 			DWARN(ldcp->id,
1895 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1896 			    ldcp->id);
1897 		}
1898 		mutex_enter(&ldcp->lock);
1899 		ldcp->cb_inprogress = B_FALSE;
1900 		mutex_exit(&ldcp->lock);
1901 	}
1902 
1903 	mutex_enter(&ldcp->lock);
1904 
1905 	/*
1906 	 * If there are data packets in the queue, the ldc_read will
1907 	 * clear interrupts after draining the queue, else clear interrupts
1908 	 */
1909 	if ((notify_event & LDC_EVT_READ) == 0) {
1910 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1911 	}
1912 
1913 	mutex_exit(&ldcp->lock);
1914 
1915 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1916 	return (DDI_INTR_CLAIMED);
1917 }
1918 
1919 
1920 /* -------------------------------------------------------------------------- */
1921 
1922 /*
1923  * LDC API functions
1924  */
1925 
1926 /*
1927  * Initialize the channel. Allocate internal structure and memory for
1928  * TX/RX queues, and initialize locks.
1929  */
1930 int
1931 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
1932 {
1933 	ldc_chan_t 	*ldcp;
1934 	int		rv, exit_val;
1935 	uint64_t	ra_base, nentries;
1936 
1937 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
1938 
1939 	if (attr == NULL) {
1940 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
1941 		return (EINVAL);
1942 	}
1943 	if (handle == NULL) {
1944 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
1945 		return (EINVAL);
1946 	}
1947 
1948 	/* check if channel is valid */
1949 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
1950 	if (rv == H_ECHANNEL) {
1951 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
1952 		return (EINVAL);
1953 	}
1954 
1955 	/* check if the channel has already been initialized */
1956 	mutex_enter(&ldcssp->lock);
1957 	ldcp = ldcssp->chan_list;
1958 	while (ldcp != NULL) {
1959 		if (ldcp->id == id) {
1960 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
1961 			    id);
1962 			mutex_exit(&ldcssp->lock);
1963 			return (EADDRINUSE);
1964 		}
1965 		ldcp = ldcp->next;
1966 	}
1967 	mutex_exit(&ldcssp->lock);
1968 
1969 	ASSERT(ldcp == NULL);
1970 
1971 	*handle = 0;
1972 
1973 	/* Allocate an ldcp structure */
1974 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
1975 
1976 	/*
1977 	 * Initialize the channel and Tx lock
1978 	 *
1979 	 * The channel 'lock' protects the entire channel and
1980 	 * should be acquired before initializing, resetting,
1981 	 * destroying or reading from a channel.
1982 	 *
1983 	 * The 'tx_lock' should be acquired prior to transmitting
1984 	 * data over the channel. The lock should also be acquired
1985 	 * prior to channel reconfiguration (in order to prevent
1986 	 * concurrent writes).
1987 	 *
1988 	 * ORDERING: When both locks are being acquired, to prevent
1989 	 * deadlocks, the channel lock should be always acquired prior
1990 	 * to the tx_lock.
1991 	 */
1992 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
1993 	mutex_init(&ldcp->tx_lock, NULL, MUTEX_DRIVER, NULL);
1994 
1995 	/* Initialize the channel */
1996 	ldcp->id = id;
1997 	ldcp->cb = NULL;
1998 	ldcp->cb_arg = NULL;
1999 	ldcp->cb_inprogress = B_FALSE;
2000 	ldcp->cb_enabled = B_FALSE;
2001 	ldcp->next = NULL;
2002 
2003 	/* Read attributes */
2004 	ldcp->mode = attr->mode;
2005 	ldcp->devclass = attr->devclass;
2006 	ldcp->devinst = attr->instance;
2007 
2008 	ldcp->rx_q_entries =
2009 		(attr->qlen > 0) ? attr->qlen : LDC_QUEUE_ENTRIES;
2010 	ldcp->tx_q_entries = ldcp->rx_q_entries;
2011 
2012 	D1(ldcp->id,
2013 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
2014 	    "instance=0x%llx,mode=%d, qlen=%d\n",
2015 	    ldcp->id, ldcp->devclass, ldcp->devinst,
2016 	    ldcp->mode, ldcp->rx_q_entries);
2017 
2018 	ldcp->next_vidx = 0;
2019 	ldcp->tstate = 0;
2020 	ldcp->hstate = 0;
2021 	ldcp->last_msg_snt = LDC_INIT_SEQID;
2022 	ldcp->last_ack_rcd = 0;
2023 	ldcp->last_msg_rcd = 0;
2024 
2025 	ldcp->stream_bufferp = NULL;
2026 	ldcp->exp_dring_list = NULL;
2027 	ldcp->imp_dring_list = NULL;
2028 	ldcp->mhdl_list = NULL;
2029 
2030 	/* Initialize payload size depending on whether channel is reliable */
2031 	switch (ldcp->mode) {
2032 	case LDC_MODE_RAW:
2033 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
2034 		ldcp->read_p = i_ldc_read_raw;
2035 		ldcp->write_p = i_ldc_write_raw;
2036 		ldcp->mtu = 0;
2037 		break;
2038 	case LDC_MODE_UNRELIABLE:
2039 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
2040 		ldcp->read_p = i_ldc_read_packet;
2041 		ldcp->write_p = i_ldc_write_packet;
2042 		ldcp->mtu = 0;
2043 		break;
2044 	case LDC_MODE_RELIABLE:
2045 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2046 		ldcp->read_p = i_ldc_read_packet;
2047 		ldcp->write_p = i_ldc_write_packet;
2048 		ldcp->mtu = 0;
2049 		break;
2050 	case LDC_MODE_STREAM:
2051 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
2052 
2053 		ldcp->stream_remains = 0;
2054 		ldcp->stream_offset = 0;
2055 		ldcp->mtu = LDC_STREAM_MTU;
2056 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
2057 		ldcp->read_p = i_ldc_read_stream;
2058 		ldcp->write_p = i_ldc_write_stream;
2059 		break;
2060 	default:
2061 		exit_val = EINVAL;
2062 		goto cleanup_on_exit;
2063 	}
2064 
2065 	/* Create a transmit queue */
2066 	ldcp->tx_q_va = (uint64_t)
2067 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2068 	if (ldcp->tx_q_va == NULL) {
2069 		cmn_err(CE_WARN,
2070 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
2071 		    ldcp->id);
2072 		exit_val = ENOMEM;
2073 		goto cleanup_on_exit;
2074 	}
2075 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
2076 
2077 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
2078 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
2079 
2080 	ldcp->tstate |= TS_TXQ_RDY;
2081 
2082 	/* Create a receive queue */
2083 	ldcp->rx_q_va = (uint64_t)
2084 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
2085 	if (ldcp->rx_q_va == NULL) {
2086 		cmn_err(CE_WARN,
2087 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
2088 		    ldcp->id);
2089 		exit_val = ENOMEM;
2090 		goto cleanup_on_exit;
2091 	}
2092 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
2093 
2094 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
2095 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
2096 
2097 	ldcp->tstate |= TS_RXQ_RDY;
2098 
2099 	/* Init descriptor ring and memory handle list lock */
2100 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2101 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
2102 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
2103 
2104 	/* mark status as INITialized */
2105 	ldcp->status = LDC_INIT;
2106 
2107 	/* Add to channel list */
2108 	mutex_enter(&ldcssp->lock);
2109 	ldcp->next = ldcssp->chan_list;
2110 	ldcssp->chan_list = ldcp;
2111 	ldcssp->channel_count++;
2112 	mutex_exit(&ldcssp->lock);
2113 
2114 	/* set the handle */
2115 	*handle = (ldc_handle_t)ldcp;
2116 
2117 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2118 
2119 	return (0);
2120 
2121 cleanup_on_exit:
2122 
2123 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2124 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2125 
2126 	if (ldcp->tstate & TS_TXQ_RDY)
2127 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2128 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2129 
2130 	if (ldcp->tstate & TS_RXQ_RDY)
2131 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2132 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2133 
2134 	mutex_destroy(&ldcp->tx_lock);
2135 	mutex_destroy(&ldcp->lock);
2136 
2137 	if (ldcp)
2138 		kmem_free(ldcp, sizeof (ldc_chan_t));
2139 
2140 	return (exit_val);
2141 }
2142 
2143 /*
2144  * Finalizes the LDC connection. It will return EBUSY if the
2145  * channel is open. A ldc_close() has to be done prior to
2146  * a ldc_fini operation. It frees TX/RX queues, associated
2147  * with the channel
2148  */
2149 int
2150 ldc_fini(ldc_handle_t handle)
2151 {
2152 	ldc_chan_t 	*ldcp;
2153 	ldc_chan_t 	*tmp_ldcp;
2154 	uint64_t 	id;
2155 
2156 	if (handle == NULL) {
2157 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2158 		return (EINVAL);
2159 	}
2160 	ldcp = (ldc_chan_t *)handle;
2161 	id = ldcp->id;
2162 
2163 	mutex_enter(&ldcp->lock);
2164 
2165 	if (ldcp->tstate > TS_INIT) {
2166 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2167 		    ldcp->id);
2168 		mutex_exit(&ldcp->lock);
2169 		return (EBUSY);
2170 	}
2171 
2172 	/* Remove from the channel list */
2173 	mutex_enter(&ldcssp->lock);
2174 	tmp_ldcp = ldcssp->chan_list;
2175 	if (tmp_ldcp == ldcp) {
2176 		ldcssp->chan_list = ldcp->next;
2177 		ldcp->next = NULL;
2178 	} else {
2179 		while (tmp_ldcp != NULL) {
2180 			if (tmp_ldcp->next == ldcp) {
2181 				tmp_ldcp->next = ldcp->next;
2182 				ldcp->next = NULL;
2183 				break;
2184 			}
2185 			tmp_ldcp = tmp_ldcp->next;
2186 		}
2187 		if (tmp_ldcp == NULL) {
2188 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2189 			mutex_exit(&ldcssp->lock);
2190 			mutex_exit(&ldcp->lock);
2191 			return (EINVAL);
2192 		}
2193 	}
2194 
2195 	ldcssp->channel_count--;
2196 
2197 	mutex_exit(&ldcssp->lock);
2198 
2199 	/* Free the map table for this channel */
2200 	if (ldcp->mtbl) {
2201 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2202 		contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2203 		mutex_destroy(&ldcp->mtbl->lock);
2204 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2205 	}
2206 
2207 	/* Destroy descriptor ring and memory handle list lock */
2208 	mutex_destroy(&ldcp->exp_dlist_lock);
2209 	mutex_destroy(&ldcp->imp_dlist_lock);
2210 	mutex_destroy(&ldcp->mlist_lock);
2211 
2212 	/* Free the stream buffer for STREAM_MODE */
2213 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2214 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2215 
2216 	/* Free the RX queue */
2217 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2218 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2219 	ldcp->tstate &= ~TS_RXQ_RDY;
2220 
2221 	/* Free the TX queue */
2222 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2223 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2224 	ldcp->tstate &= ~TS_TXQ_RDY;
2225 
2226 
2227 	mutex_exit(&ldcp->lock);
2228 
2229 	/* Destroy mutex */
2230 	mutex_destroy(&ldcp->tx_lock);
2231 	mutex_destroy(&ldcp->lock);
2232 
2233 	/* free channel structure */
2234 	kmem_free(ldcp, sizeof (ldc_chan_t));
2235 
2236 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2237 
2238 	return (0);
2239 }
2240 
2241 /*
2242  * Open the LDC channel for use. It registers the TX/RX queues
2243  * with the Hypervisor. It also specifies the interrupt number
2244  * and target CPU for this channel
2245  */
2246 int
2247 ldc_open(ldc_handle_t handle)
2248 {
2249 	ldc_chan_t 	*ldcp;
2250 	int 		rv;
2251 
2252 	if (handle == NULL) {
2253 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2254 		return (EINVAL);
2255 	}
2256 
2257 	ldcp = (ldc_chan_t *)handle;
2258 
2259 	mutex_enter(&ldcp->lock);
2260 
2261 	if (ldcp->tstate < TS_INIT) {
2262 		DWARN(ldcp->id,
2263 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2264 		mutex_exit(&ldcp->lock);
2265 		return (EFAULT);
2266 	}
2267 	if (ldcp->tstate >= TS_OPEN) {
2268 		DWARN(ldcp->id,
2269 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2270 		mutex_exit(&ldcp->lock);
2271 		return (EFAULT);
2272 	}
2273 
2274 	/*
2275 	 * Unregister/Register the tx queue with the hypervisor
2276 	 */
2277 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2278 	if (rv) {
2279 		cmn_err(CE_WARN,
2280 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2281 		    ldcp->id);
2282 		mutex_exit(&ldcp->lock);
2283 		return (EIO);
2284 	}
2285 
2286 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2287 	if (rv) {
2288 		cmn_err(CE_WARN,
2289 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2290 		    ldcp->id);
2291 		mutex_exit(&ldcp->lock);
2292 		return (EIO);
2293 	}
2294 
2295 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2296 	    ldcp->id);
2297 
2298 	/*
2299 	 * Unregister/Register the rx queue with the hypervisor
2300 	 */
2301 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2302 	if (rv) {
2303 		cmn_err(CE_WARN,
2304 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2305 		    ldcp->id);
2306 		mutex_exit(&ldcp->lock);
2307 		return (EIO);
2308 	}
2309 
2310 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2311 	if (rv) {
2312 		cmn_err(CE_WARN,
2313 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2314 		    ldcp->id);
2315 		mutex_exit(&ldcp->lock);
2316 		return (EIO);
2317 	}
2318 
2319 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2320 	    ldcp->id);
2321 
2322 	ldcp->tstate |= TS_QCONF_RDY;
2323 
2324 	/* Register the channel with the channel nexus */
2325 	rv = i_ldc_register_channel(ldcp);
2326 	if (rv && rv != EAGAIN) {
2327 		cmn_err(CE_WARN,
2328 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2329 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2330 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2331 		mutex_exit(&ldcp->lock);
2332 		return (EIO);
2333 	}
2334 
2335 	/* mark channel in OPEN state */
2336 	ldcp->status = LDC_OPEN;
2337 
2338 	/* Read channel state */
2339 	rv = hv_ldc_tx_get_state(ldcp->id,
2340 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2341 	if (rv) {
2342 		cmn_err(CE_WARN,
2343 		    "ldc_open: (0x%lx) cannot read channel state\n",
2344 		    ldcp->id);
2345 		(void) i_ldc_unregister_channel(ldcp);
2346 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2347 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2348 		mutex_exit(&ldcp->lock);
2349 		return (EIO);
2350 	}
2351 
2352 	/*
2353 	 * set the ACKd head to current head location for reliable &
2354 	 * streaming mode
2355 	 */
2356 	ldcp->tx_ackd_head = ldcp->tx_head;
2357 
2358 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2359 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2360 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2361 		ldcp->tstate |= TS_LINK_READY;
2362 		ldcp->status = LDC_READY;
2363 	}
2364 
2365 	/*
2366 	 * if channel is being opened in RAW mode - no handshake is needed
2367 	 * switch the channel READY and UP state
2368 	 */
2369 	if (ldcp->mode == LDC_MODE_RAW) {
2370 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2371 		ldcp->status = LDC_UP;
2372 	}
2373 
2374 	mutex_exit(&ldcp->lock);
2375 
2376 	/*
2377 	 * Increment number of open channels
2378 	 */
2379 	mutex_enter(&ldcssp->lock);
2380 	ldcssp->channels_open++;
2381 	mutex_exit(&ldcssp->lock);
2382 
2383 	D1(ldcp->id,
2384 	    "ldc_open: (0x%llx) channel (0x%p) open for use (tstate=0x%x)\n",
2385 	    ldcp->id, ldcp, ldcp->tstate);
2386 
2387 	return (0);
2388 }
2389 
2390 /*
2391  * Close the LDC connection. It will return EBUSY if there
2392  * are memory segments or descriptor rings either bound to or
2393  * mapped over the channel
2394  */
2395 int
2396 ldc_close(ldc_handle_t handle)
2397 {
2398 	ldc_chan_t 	*ldcp;
2399 	int		rv = 0, retries = 0;
2400 	boolean_t	chk_done = B_FALSE;
2401 
2402 	if (handle == NULL) {
2403 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2404 		return (EINVAL);
2405 	}
2406 	ldcp = (ldc_chan_t *)handle;
2407 
2408 	mutex_enter(&ldcp->lock);
2409 
2410 	/* return error if channel is not open */
2411 	if (ldcp->tstate < TS_OPEN) {
2412 		DWARN(ldcp->id,
2413 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2414 		mutex_exit(&ldcp->lock);
2415 		return (EFAULT);
2416 	}
2417 
2418 	/* if any memory handles, drings, are bound or mapped cannot close */
2419 	if (ldcp->mhdl_list != NULL) {
2420 		DWARN(ldcp->id,
2421 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2422 		    ldcp->id);
2423 		mutex_exit(&ldcp->lock);
2424 		return (EBUSY);
2425 	}
2426 	if (ldcp->exp_dring_list != NULL) {
2427 		DWARN(ldcp->id,
2428 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2429 		    ldcp->id);
2430 		mutex_exit(&ldcp->lock);
2431 		return (EBUSY);
2432 	}
2433 	if (ldcp->imp_dring_list != NULL) {
2434 		DWARN(ldcp->id,
2435 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2436 		    ldcp->id);
2437 		mutex_exit(&ldcp->lock);
2438 		return (EBUSY);
2439 	}
2440 
2441 	/* Obtain Tx lock */
2442 	mutex_enter(&ldcp->tx_lock);
2443 
2444 	/*
2445 	 * Wait for pending transmits to complete i.e Tx queue to drain
2446 	 * if there are pending pkts - wait 1 ms and retry again
2447 	 */
2448 	for (;;) {
2449 
2450 		rv = hv_ldc_tx_get_state(ldcp->id,
2451 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2452 		if (rv) {
2453 			cmn_err(CE_WARN,
2454 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2455 			mutex_exit(&ldcp->tx_lock);
2456 			mutex_exit(&ldcp->lock);
2457 			return (EIO);
2458 		}
2459 
2460 		if (ldcp->tx_head == ldcp->tx_tail ||
2461 		    ldcp->link_state != LDC_CHANNEL_UP) {
2462 			break;
2463 		}
2464 
2465 		if (chk_done) {
2466 			DWARN(ldcp->id,
2467 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2468 			    ldcp->id);
2469 			break;
2470 		}
2471 
2472 		/* wait for one ms and try again */
2473 		delay(drv_usectohz(1000));
2474 		chk_done = B_TRUE;
2475 	}
2476 
2477 	/*
2478 	 * Unregister the channel with the nexus
2479 	 */
2480 	while ((rv = i_ldc_unregister_channel(ldcp)) != 0) {
2481 
2482 		mutex_exit(&ldcp->tx_lock);
2483 		mutex_exit(&ldcp->lock);
2484 
2485 		/* if any error other than EAGAIN return back */
2486 		if (rv != EAGAIN || retries >= LDC_MAX_RETRIES) {
2487 			cmn_err(CE_WARN,
2488 			    "ldc_close: (0x%lx) unregister failed, %d\n",
2489 			    ldcp->id, rv);
2490 			return (rv);
2491 		}
2492 
2493 		/*
2494 		 * As there could be pending interrupts we need
2495 		 * to wait and try again
2496 		 */
2497 		drv_usecwait(LDC_DELAY);
2498 		mutex_enter(&ldcp->lock);
2499 		mutex_enter(&ldcp->tx_lock);
2500 		retries++;
2501 	}
2502 
2503 	/*
2504 	 * Unregister queues
2505 	 */
2506 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2507 	if (rv) {
2508 		cmn_err(CE_WARN,
2509 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2510 		    ldcp->id);
2511 		mutex_exit(&ldcp->tx_lock);
2512 		mutex_exit(&ldcp->lock);
2513 		return (EIO);
2514 	}
2515 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2516 	if (rv) {
2517 		cmn_err(CE_WARN,
2518 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2519 		    ldcp->id);
2520 		mutex_exit(&ldcp->tx_lock);
2521 		mutex_exit(&ldcp->lock);
2522 		return (EIO);
2523 	}
2524 
2525 	ldcp->tstate &= ~TS_QCONF_RDY;
2526 
2527 	/* Reset channel state information */
2528 	i_ldc_reset_state(ldcp);
2529 
2530 	/* Mark channel as down and in initialized state */
2531 	ldcp->tx_ackd_head = 0;
2532 	ldcp->tx_head = 0;
2533 	ldcp->tstate = TS_INIT;
2534 	ldcp->status = LDC_INIT;
2535 
2536 	mutex_exit(&ldcp->tx_lock);
2537 	mutex_exit(&ldcp->lock);
2538 
2539 	/* Decrement number of open channels */
2540 	mutex_enter(&ldcssp->lock);
2541 	ldcssp->channels_open--;
2542 	mutex_exit(&ldcssp->lock);
2543 
2544 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2545 
2546 	return (0);
2547 }
2548 
2549 /*
2550  * Register channel callback
2551  */
2552 int
2553 ldc_reg_callback(ldc_handle_t handle,
2554     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2555 {
2556 	ldc_chan_t *ldcp;
2557 
2558 	if (handle == NULL) {
2559 		DWARN(DBG_ALL_LDCS,
2560 		    "ldc_reg_callback: invalid channel handle\n");
2561 		return (EINVAL);
2562 	}
2563 	if (((uint64_t)cb) < KERNELBASE) {
2564 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2565 		return (EINVAL);
2566 	}
2567 	ldcp = (ldc_chan_t *)handle;
2568 
2569 	mutex_enter(&ldcp->lock);
2570 
2571 	if (ldcp->cb) {
2572 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2573 		    ldcp->id);
2574 		mutex_exit(&ldcp->lock);
2575 		return (EIO);
2576 	}
2577 	if (ldcp->cb_inprogress) {
2578 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2579 		    ldcp->id);
2580 		mutex_exit(&ldcp->lock);
2581 		return (EWOULDBLOCK);
2582 	}
2583 
2584 	ldcp->cb = cb;
2585 	ldcp->cb_arg = arg;
2586 	ldcp->cb_enabled = B_TRUE;
2587 
2588 	D1(ldcp->id,
2589 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2590 	    ldcp->id);
2591 
2592 	mutex_exit(&ldcp->lock);
2593 
2594 	return (0);
2595 }
2596 
2597 /*
2598  * Unregister channel callback
2599  */
2600 int
2601 ldc_unreg_callback(ldc_handle_t handle)
2602 {
2603 	ldc_chan_t *ldcp;
2604 
2605 	if (handle == NULL) {
2606 		DWARN(DBG_ALL_LDCS,
2607 		    "ldc_unreg_callback: invalid channel handle\n");
2608 		return (EINVAL);
2609 	}
2610 	ldcp = (ldc_chan_t *)handle;
2611 
2612 	mutex_enter(&ldcp->lock);
2613 
2614 	if (ldcp->cb == NULL) {
2615 		DWARN(ldcp->id,
2616 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2617 		    ldcp->id);
2618 		mutex_exit(&ldcp->lock);
2619 		return (EIO);
2620 	}
2621 	if (ldcp->cb_inprogress) {
2622 		DWARN(ldcp->id,
2623 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2624 		    ldcp->id);
2625 		mutex_exit(&ldcp->lock);
2626 		return (EWOULDBLOCK);
2627 	}
2628 
2629 	ldcp->cb = NULL;
2630 	ldcp->cb_arg = NULL;
2631 	ldcp->cb_enabled = B_FALSE;
2632 
2633 	D1(ldcp->id,
2634 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2635 	    ldcp->id);
2636 
2637 	mutex_exit(&ldcp->lock);
2638 
2639 	return (0);
2640 }
2641 
2642 
2643 /*
2644  * Bring a channel up by initiating a handshake with the peer
2645  * This call is asynchronous. It will complete at a later point
2646  * in time when the peer responds back with an RTR.
2647  */
2648 int
2649 ldc_up(ldc_handle_t handle)
2650 {
2651 	int 		rv;
2652 	ldc_chan_t 	*ldcp;
2653 	ldc_msg_t 	*ldcmsg;
2654 	uint64_t 	tx_tail;
2655 
2656 	if (handle == NULL) {
2657 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2658 		return (EINVAL);
2659 	}
2660 	ldcp = (ldc_chan_t *)handle;
2661 
2662 	mutex_enter(&ldcp->lock);
2663 
2664 	if (ldcp->tstate == TS_UP) {
2665 		D2(ldcp->id,
2666 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2667 		    ldcp->id);
2668 		mutex_exit(&ldcp->lock);
2669 		return (0);
2670 	}
2671 
2672 	/* if the channel is in RAW mode - mark it as UP, if READY */
2673 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2674 		ldcp->tstate = TS_UP;
2675 		mutex_exit(&ldcp->lock);
2676 		return (0);
2677 	}
2678 
2679 	/* Don't start another handshake if there is one in progress */
2680 	if (ldcp->hstate) {
2681 		D2(ldcp->id,
2682 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2683 		    ldcp->id);
2684 		mutex_exit(&ldcp->lock);
2685 		return (0);
2686 	}
2687 
2688 	mutex_enter(&ldcp->tx_lock);
2689 
2690 	/* get the current tail for the LDC msg */
2691 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2692 	if (rv) {
2693 		DWARN(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2694 		    ldcp->id);
2695 		mutex_exit(&ldcp->tx_lock);
2696 		mutex_exit(&ldcp->lock);
2697 		return (ECONNREFUSED);
2698 	}
2699 
2700 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
2701 	ZERO_PKT(ldcmsg);
2702 
2703 	ldcmsg->type = LDC_CTRL;
2704 	ldcmsg->stype = LDC_INFO;
2705 	ldcmsg->ctrl = LDC_VER;
2706 	ldcp->next_vidx = 0;
2707 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
2708 
2709 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
2710 
2711 	/* initiate the send by calling into HV and set the new tail */
2712 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
2713 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2714 
2715 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
2716 	if (rv) {
2717 		DWARN(ldcp->id,
2718 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
2719 		    ldcp->id, rv);
2720 		mutex_exit(&ldcp->tx_lock);
2721 		mutex_exit(&ldcp->lock);
2722 		return (rv);
2723 	}
2724 
2725 	ldcp->hstate |= TS_SENT_VER;
2726 	ldcp->tx_tail = tx_tail;
2727 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
2728 
2729 	mutex_exit(&ldcp->tx_lock);
2730 	mutex_exit(&ldcp->lock);
2731 
2732 	return (rv);
2733 }
2734 
2735 
2736 /*
2737  * Reset a channel by re-registering the Rx queues
2738  */
2739 int
2740 ldc_reset(ldc_handle_t handle)
2741 {
2742 	ldc_chan_t 	*ldcp;
2743 
2744 	if (handle == NULL) {
2745 		DWARN(DBG_ALL_LDCS, "ldc_reset: invalid channel handle\n");
2746 		return (EINVAL);
2747 	}
2748 	ldcp = (ldc_chan_t *)handle;
2749 
2750 	mutex_enter(&ldcp->lock);
2751 	mutex_enter(&ldcp->tx_lock);
2752 	i_ldc_reset(ldcp);
2753 	mutex_exit(&ldcp->tx_lock);
2754 	mutex_exit(&ldcp->lock);
2755 
2756 	return (0);
2757 }
2758 
2759 /*
2760  * Get the current channel status
2761  */
2762 int
2763 ldc_status(ldc_handle_t handle, ldc_status_t *status)
2764 {
2765 	ldc_chan_t *ldcp;
2766 
2767 	if (handle == NULL || status == NULL) {
2768 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
2769 		return (EINVAL);
2770 	}
2771 	ldcp = (ldc_chan_t *)handle;
2772 
2773 	*status = ((ldc_chan_t *)handle)->status;
2774 
2775 	D1(ldcp->id,
2776 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
2777 	return (0);
2778 }
2779 
2780 
2781 /*
2782  * Set the channel's callback mode - enable/disable callbacks
2783  */
2784 int
2785 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
2786 {
2787 	ldc_chan_t 	*ldcp;
2788 
2789 	if (handle == NULL) {
2790 		DWARN(DBG_ALL_LDCS,
2791 		    "ldc_set_intr_mode: invalid channel handle\n");
2792 		return (EINVAL);
2793 	}
2794 	ldcp = (ldc_chan_t *)handle;
2795 
2796 	/*
2797 	 * Record no callbacks should be invoked
2798 	 */
2799 	mutex_enter(&ldcp->lock);
2800 
2801 	switch (cmode) {
2802 	case LDC_CB_DISABLE:
2803 		if (!ldcp->cb_enabled) {
2804 			DWARN(ldcp->id,
2805 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
2806 			    ldcp->id);
2807 			break;
2808 		}
2809 		ldcp->cb_enabled = B_FALSE;
2810 
2811 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
2812 		    ldcp->id);
2813 		break;
2814 
2815 	case LDC_CB_ENABLE:
2816 		if (ldcp->cb_enabled) {
2817 			DWARN(ldcp->id,
2818 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
2819 			    ldcp->id);
2820 			break;
2821 		}
2822 		ldcp->cb_enabled = B_TRUE;
2823 
2824 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
2825 		    ldcp->id);
2826 		break;
2827 	}
2828 
2829 	mutex_exit(&ldcp->lock);
2830 
2831 	return (0);
2832 }
2833 
2834 /*
2835  * Check to see if there are packets on the incoming queue
2836  * Will return isempty = B_FALSE if there are  packets
2837  */
2838 int
2839 ldc_chkq(ldc_handle_t handle, boolean_t *isempty)
2840 {
2841 	int 		rv;
2842 	uint64_t 	rx_head, rx_tail;
2843 	ldc_chan_t 	*ldcp;
2844 
2845 	if (handle == NULL) {
2846 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
2847 		return (EINVAL);
2848 	}
2849 	ldcp = (ldc_chan_t *)handle;
2850 
2851 	*isempty = B_TRUE;
2852 
2853 	mutex_enter(&ldcp->lock);
2854 
2855 	if (ldcp->tstate != TS_UP) {
2856 		D1(ldcp->id,
2857 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
2858 		mutex_exit(&ldcp->lock);
2859 		return (ECONNRESET);
2860 	}
2861 
2862 	/* Read packet(s) from the queue */
2863 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2864 	    &ldcp->link_state);
2865 	if (rv != 0) {
2866 		cmn_err(CE_WARN,
2867 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
2868 		mutex_exit(&ldcp->lock);
2869 		return (EIO);
2870 	}
2871 	/* reset the channel state if the channel went down */
2872 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
2873 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2874 		mutex_enter(&ldcp->tx_lock);
2875 		i_ldc_reset(ldcp);
2876 		mutex_exit(&ldcp->tx_lock);
2877 		mutex_exit(&ldcp->lock);
2878 		return (ECONNRESET);
2879 	}
2880 
2881 	if (rx_head != rx_tail) {
2882 		D1(ldcp->id, "ldc_chkq: (0x%llx) queue has pkt(s)\n", ldcp->id);
2883 		*isempty = B_FALSE;
2884 	}
2885 
2886 	mutex_exit(&ldcp->lock);
2887 
2888 	return (0);
2889 }
2890 
2891 
2892 /*
2893  * Read 'size' amount of bytes or less. If incoming buffer
2894  * is more than 'size', ENOBUFS is returned.
2895  *
2896  * On return, size contains the number of bytes read.
2897  */
2898 int
2899 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
2900 {
2901 	ldc_chan_t 	*ldcp;
2902 	uint64_t 	rx_head = 0, rx_tail = 0;
2903 	int		rv = 0, exit_val;
2904 
2905 	if (handle == NULL) {
2906 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
2907 		return (EINVAL);
2908 	}
2909 
2910 	ldcp = (ldc_chan_t *)handle;
2911 
2912 	/* channel lock */
2913 	mutex_enter(&ldcp->lock);
2914 
2915 	if (ldcp->tstate != TS_UP) {
2916 		DWARN(ldcp->id,
2917 		    "ldc_read: (0x%llx) channel is not in UP state\n",
2918 		    ldcp->id);
2919 		exit_val = ECONNRESET;
2920 	} else {
2921 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
2922 	}
2923 
2924 	/*
2925 	 * if queue has been drained - clear interrupt
2926 	 */
2927 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2928 	    &ldcp->link_state);
2929 	if (exit_val == 0 && rv == 0 && rx_head == rx_tail) {
2930 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2931 	}
2932 
2933 	mutex_exit(&ldcp->lock);
2934 	return (exit_val);
2935 }
2936 
2937 /*
2938  * Basic raw mondo read -
2939  * no interpretation of mondo contents at all.
2940  *
2941  * Enter and exit with ldcp->lock held by caller
2942  */
2943 static int
2944 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
2945 {
2946 	uint64_t 	q_size_mask;
2947 	ldc_msg_t 	*msgp;
2948 	uint8_t		*msgbufp;
2949 	int		rv = 0, space;
2950 	uint64_t 	rx_head, rx_tail;
2951 
2952 	space = *sizep;
2953 
2954 	if (space < LDC_PAYLOAD_SIZE_RAW)
2955 		return (ENOBUFS);
2956 
2957 	ASSERT(mutex_owned(&ldcp->lock));
2958 
2959 	/* compute mask for increment */
2960 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
2961 
2962 	/*
2963 	 * Read packet(s) from the queue
2964 	 */
2965 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2966 	    &ldcp->link_state);
2967 	if (rv != 0) {
2968 		cmn_err(CE_WARN,
2969 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
2970 		    ldcp->id);
2971 		return (EIO);
2972 	}
2973 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
2974 		" rxt=0x%llx, st=0x%llx\n",
2975 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
2976 
2977 	/* reset the channel state if the channel went down */
2978 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
2979 		mutex_enter(&ldcp->tx_lock);
2980 		i_ldc_reset(ldcp);
2981 		mutex_exit(&ldcp->tx_lock);
2982 		return (ECONNRESET);
2983 	}
2984 
2985 	/*
2986 	 * Check for empty queue
2987 	 */
2988 	if (rx_head == rx_tail) {
2989 		*sizep = 0;
2990 		return (0);
2991 	}
2992 
2993 	/* get the message */
2994 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2995 
2996 	/* if channel is in RAW mode, copy data and return */
2997 	msgbufp = (uint8_t *)&(msgp->raw[0]);
2998 
2999 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
3000 
3001 	DUMP_PAYLOAD(ldcp->id, msgbufp);
3002 
3003 	*sizep = LDC_PAYLOAD_SIZE_RAW;
3004 
3005 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
3006 	rv = i_ldc_set_rx_head(ldcp, rx_head);
3007 
3008 	return (rv);
3009 }
3010 
3011 /*
3012  * Process LDC mondos to build larger packets
3013  * with either un-reliable or reliable delivery.
3014  *
3015  * Enter and exit with ldcp->lock held by caller
3016  */
3017 static int
3018 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3019 {
3020 	int		rv = 0;
3021 	uint64_t 	rx_head = 0, rx_tail = 0;
3022 	uint64_t 	curr_head = 0;
3023 	ldc_msg_t 	*msg;
3024 	caddr_t 	target;
3025 	size_t 		len = 0, bytes_read = 0;
3026 	int 		retries = 0;
3027 	uint64_t 	q_size_mask;
3028 	uint64_t	first_fragment = 0;
3029 
3030 	target = target_bufp;
3031 
3032 	ASSERT(mutex_owned(&ldcp->lock));
3033 
3034 	/* compute mask for increment */
3035 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
3036 
3037 	/*
3038 	 * Read packet(s) from the queue
3039 	 */
3040 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
3041 	    &ldcp->link_state);
3042 	if (rv != 0) {
3043 		cmn_err(CE_WARN,
3044 		    "ldc_read: (0x%lx) unable to read queue ptrs",
3045 		    ldcp->id);
3046 		return (EIO);
3047 	}
3048 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
3049 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
3050 
3051 	/* reset the channel state if the channel went down */
3052 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
3053 		mutex_enter(&ldcp->tx_lock);
3054 		i_ldc_reset(ldcp);
3055 		mutex_exit(&ldcp->tx_lock);
3056 		return (ECONNRESET);
3057 	}
3058 
3059 	for (;;) {
3060 
3061 		if (curr_head == rx_tail) {
3062 			rv = hv_ldc_rx_get_state(ldcp->id,
3063 			    &rx_head, &rx_tail, &ldcp->link_state);
3064 			if (rv != 0) {
3065 				cmn_err(CE_WARN,
3066 				    "ldc_read: (0x%lx) cannot read queue ptrs",
3067 				    ldcp->id);
3068 				return (EIO);
3069 			}
3070 			/* reset the channel state if the channel went down */
3071 			if (ldcp->link_state == LDC_CHANNEL_DOWN) {
3072 				mutex_enter(&ldcp->tx_lock);
3073 				i_ldc_reset(ldcp);
3074 				mutex_exit(&ldcp->tx_lock);
3075 				return (ECONNRESET);
3076 			}
3077 		}
3078 
3079 		if (curr_head == rx_tail) {
3080 
3081 			/* If in the middle of a fragmented xfer */
3082 			if (first_fragment != 0) {
3083 
3084 				/* wait for ldc_delay usecs */
3085 				drv_usecwait(ldc_delay);
3086 
3087 				if (++retries < ldc_max_retries)
3088 					continue;
3089 
3090 				*sizep = 0;
3091 				ldcp->last_msg_rcd = first_fragment - 1;
3092 				DWARN(DBG_ALL_LDCS,
3093 					"ldc_read: (0x%llx) read timeout",
3094 					ldcp->id);
3095 				return (ETIMEDOUT);
3096 			}
3097 			*sizep = 0;
3098 			break;
3099 		}
3100 		retries = 0;
3101 
3102 		D2(ldcp->id,
3103 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
3104 		    ldcp->id, curr_head, rx_head, rx_tail);
3105 
3106 		/* get the message */
3107 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
3108 
3109 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
3110 		    ldcp->rx_q_va + curr_head);
3111 
3112 		/* Check the message ID for the message received */
3113 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
3114 
3115 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
3116 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
3117 
3118 			/* throw away data */
3119 			bytes_read = 0;
3120 
3121 			/* Reset last_msg_rcd to start of message */
3122 			if (first_fragment != 0) {
3123 				ldcp->last_msg_rcd = first_fragment - 1;
3124 				first_fragment = 0;
3125 			}
3126 			/*
3127 			 * Send a NACK -- invalid seqid
3128 			 * get the current tail for the response
3129 			 */
3130 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
3131 			    (msg->ctrl & LDC_CTRL_MASK));
3132 			if (rv) {
3133 				cmn_err(CE_NOTE,
3134 				    "ldc_read: (0x%lx) err sending "
3135 				    "NACK msg\n", ldcp->id);
3136 
3137 				/* if cannot send NACK - reset channel */
3138 				mutex_enter(&ldcp->tx_lock);
3139 				i_ldc_reset(ldcp);
3140 				mutex_exit(&ldcp->tx_lock);
3141 				rv = ECONNRESET;
3142 				break;
3143 			}
3144 
3145 			/* purge receive queue */
3146 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3147 
3148 			break;
3149 		}
3150 
3151 		/*
3152 		 * Process any messages of type CTRL messages
3153 		 * Future implementations should try to pass these to
3154 		 * LDC transport by resetting the intr state.
3155 		 *
3156 		 * NOTE: not done as a switch() as type can be both ctrl+data
3157 		 */
3158 		if (msg->type & LDC_CTRL) {
3159 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3160 				if (rv == EAGAIN)
3161 					continue;
3162 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3163 				*sizep = 0;
3164 				bytes_read = 0;
3165 				break;
3166 			}
3167 		}
3168 
3169 		/* process data ACKs */
3170 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3171 			if (rv = i_ldc_process_data_ACK(ldcp, msg)) {
3172 				*sizep = 0;
3173 				bytes_read = 0;
3174 				break;
3175 			}
3176 		}
3177 
3178 		/* process data messages */
3179 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3180 
3181 			uint8_t *msgbuf = (uint8_t *)(
3182 				(ldcp->mode == LDC_MODE_RELIABLE ||
3183 				ldcp->mode == LDC_MODE_STREAM)
3184 				? msg->rdata : msg->udata);
3185 
3186 			D2(ldcp->id,
3187 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3188 
3189 			/* get the packet length */
3190 			len = (msg->env & LDC_LEN_MASK);
3191 
3192 				/*
3193 				 * FUTURE OPTIMIZATION:
3194 				 * dont need to set q head for every
3195 				 * packet we read just need to do this when
3196 				 * we are done or need to wait for more
3197 				 * mondos to make a full packet - this is
3198 				 * currently expensive.
3199 				 */
3200 
3201 			if (first_fragment == 0) {
3202 
3203 				/*
3204 				 * first packets should always have the start
3205 				 * bit set (even for a single packet). If not
3206 				 * throw away the packet
3207 				 */
3208 				if (!(msg->env & LDC_FRAG_START)) {
3209 
3210 					DWARN(DBG_ALL_LDCS,
3211 					    "ldc_read: (0x%llx) not start - "
3212 					    "frag=%x\n", ldcp->id,
3213 					    (msg->env) & LDC_FRAG_MASK);
3214 
3215 					/* toss pkt, inc head, cont reading */
3216 					bytes_read = 0;
3217 					target = target_bufp;
3218 					curr_head =
3219 						(curr_head + LDC_PACKET_SIZE)
3220 						& q_size_mask;
3221 					if (rv = i_ldc_set_rx_head(ldcp,
3222 						curr_head))
3223 						break;
3224 
3225 					continue;
3226 				}
3227 
3228 				first_fragment = msg->seqid;
3229 			} else {
3230 				/* check to see if this is a pkt w/ START bit */
3231 				if (msg->env & LDC_FRAG_START) {
3232 					DWARN(DBG_ALL_LDCS,
3233 					    "ldc_read:(0x%llx) unexpected pkt"
3234 					    " env=0x%x discarding %d bytes,"
3235 					    " lastmsg=%d, currentmsg=%d\n",
3236 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3237 					    bytes_read, ldcp->last_msg_rcd,
3238 					    msg->seqid);
3239 
3240 					/* throw data we have read so far */
3241 					bytes_read = 0;
3242 					target = target_bufp;
3243 					first_fragment = msg->seqid;
3244 
3245 					if (rv = i_ldc_set_rx_head(ldcp,
3246 						curr_head))
3247 						break;
3248 				}
3249 			}
3250 
3251 			/* copy (next) pkt into buffer */
3252 			if (len <= (*sizep - bytes_read)) {
3253 				bcopy(msgbuf, target, len);
3254 				target += len;
3255 				bytes_read += len;
3256 			} else {
3257 				/*
3258 				 * there is not enough space in the buffer to
3259 				 * read this pkt. throw message away & continue
3260 				 * reading data from queue
3261 				 */
3262 				DWARN(DBG_ALL_LDCS,
3263 				    "ldc_read: (0x%llx) buffer too small, "
3264 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3265 				    curr_head, *sizep, bytes_read+len);
3266 
3267 				first_fragment = 0;
3268 				target = target_bufp;
3269 				bytes_read = 0;
3270 
3271 				/* throw away everything received so far */
3272 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3273 					break;
3274 
3275 				/* continue reading remaining pkts */
3276 				continue;
3277 			}
3278 		}
3279 
3280 		/* set the message id */
3281 		ldcp->last_msg_rcd = msg->seqid;
3282 
3283 		/* move the head one position */
3284 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3285 
3286 		if (msg->env & LDC_FRAG_STOP) {
3287 
3288 			/*
3289 			 * All pkts that are part of this fragmented transfer
3290 			 * have been read or this was a single pkt read
3291 			 * or there was an error
3292 			 */
3293 
3294 			/* set the queue head */
3295 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3296 				bytes_read = 0;
3297 
3298 			*sizep = bytes_read;
3299 
3300 			break;
3301 		}
3302 
3303 		/* advance head if it is a DATA ACK */
3304 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3305 
3306 			/* set the queue head */
3307 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3308 				bytes_read = 0;
3309 				break;
3310 			}
3311 
3312 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3313 			    ldcp->id, curr_head);
3314 		}
3315 
3316 	} /* for (;;) */
3317 
3318 
3319 	/*
3320 	 * If useful data was read - Send msg ACK
3321 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3322 	 */
3323 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3324 		ldcp->mode == LDC_MODE_STREAM)) {
3325 
3326 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3327 		if (rv) {
3328 			cmn_err(CE_NOTE,
3329 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3330 
3331 			/* if cannot send ACK - reset channel */
3332 			mutex_enter(&ldcp->tx_lock);
3333 			i_ldc_reset(ldcp);
3334 			mutex_exit(&ldcp->tx_lock);
3335 			rv = ECONNRESET;
3336 		}
3337 	}
3338 
3339 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3340 
3341 	return (rv);
3342 }
3343 
3344 /*
3345  * Use underlying reliable packet mechanism to fetch
3346  * and buffer incoming packets so we can hand them back as
3347  * a basic byte stream.
3348  *
3349  * Enter and exit with ldcp->lock held by caller
3350  */
3351 static int
3352 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3353 {
3354 	int	rv;
3355 	size_t	size;
3356 
3357 	ASSERT(mutex_owned(&ldcp->lock));
3358 
3359 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3360 		ldcp->id, *sizep);
3361 
3362 	if (ldcp->stream_remains == 0) {
3363 		size = ldcp->mtu;
3364 		rv = i_ldc_read_packet(ldcp,
3365 			(caddr_t)ldcp->stream_bufferp, &size);
3366 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3367 			ldcp->id, size);
3368 
3369 		if (rv != 0)
3370 			return (rv);
3371 
3372 		ldcp->stream_remains = size;
3373 		ldcp->stream_offset = 0;
3374 	}
3375 
3376 	size = MIN(ldcp->stream_remains, *sizep);
3377 
3378 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3379 	ldcp->stream_offset += size;
3380 	ldcp->stream_remains -= size;
3381 
3382 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3383 		ldcp->id, size);
3384 
3385 	*sizep = size;
3386 	return (0);
3387 }
3388 
3389 /*
3390  * Write specified amount of bytes to the channel
3391  * in multiple pkts of pkt_payload size. Each
3392  * packet is tagged with an unique packet ID in
3393  * the case of a reliable transport.
3394  *
3395  * On return, size contains the number of bytes written.
3396  */
3397 int
3398 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3399 {
3400 	ldc_chan_t	*ldcp;
3401 	int		rv = 0;
3402 
3403 	if (handle == NULL) {
3404 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3405 		return (EINVAL);
3406 	}
3407 	ldcp = (ldc_chan_t *)handle;
3408 
3409 	/* check if writes can occur */
3410 	if (!mutex_tryenter(&ldcp->tx_lock)) {
3411 		/*
3412 		 * Could not get the lock - channel could
3413 		 * be in the process of being unconfigured
3414 		 * or reader has encountered an error
3415 		 */
3416 		return (EAGAIN);
3417 	}
3418 
3419 	/* check if non-zero data to write */
3420 	if (buf == NULL || sizep == NULL) {
3421 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3422 		    ldcp->id);
3423 		mutex_exit(&ldcp->tx_lock);
3424 		return (EINVAL);
3425 	}
3426 
3427 	if (*sizep == 0) {
3428 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3429 		    ldcp->id);
3430 		mutex_exit(&ldcp->tx_lock);
3431 		return (0);
3432 	}
3433 
3434 	/* Check if channel is UP for data exchange */
3435 	if (ldcp->tstate != TS_UP) {
3436 		DWARN(ldcp->id,
3437 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3438 		    ldcp->id);
3439 		*sizep = 0;
3440 		rv = ECONNRESET;
3441 	} else {
3442 		rv = ldcp->write_p(ldcp, buf, sizep);
3443 	}
3444 
3445 	mutex_exit(&ldcp->tx_lock);
3446 
3447 	return (rv);
3448 }
3449 
3450 /*
3451  * Write a raw packet to the channel
3452  * On return, size contains the number of bytes written.
3453  */
3454 static int
3455 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3456 {
3457 	ldc_msg_t 	*ldcmsg;
3458 	uint64_t 	tx_head, tx_tail, new_tail;
3459 	int		rv = 0;
3460 	size_t		size;
3461 
3462 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3463 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3464 
3465 	size = *sizep;
3466 
3467 	/*
3468 	 * Check to see if the packet size is less than or
3469 	 * equal to packet size support in raw mode
3470 	 */
3471 	if (size > ldcp->pkt_payload) {
3472 		DWARN(ldcp->id,
3473 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3474 		    ldcp->id, *sizep);
3475 		*sizep = 0;
3476 		return (EMSGSIZE);
3477 	}
3478 
3479 	/* get the qptrs for the tx queue */
3480 	rv = hv_ldc_tx_get_state(ldcp->id,
3481 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3482 	if (rv != 0) {
3483 		cmn_err(CE_WARN,
3484 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3485 		*sizep = 0;
3486 		return (EIO);
3487 	}
3488 
3489 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3490 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3491 		DWARN(ldcp->id,
3492 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3493 
3494 		*sizep = 0;
3495 		if (mutex_tryenter(&ldcp->lock)) {
3496 			i_ldc_reset(ldcp);
3497 			mutex_exit(&ldcp->lock);
3498 		} else {
3499 			/*
3500 			 * Release Tx lock, and then reacquire channel
3501 			 * and Tx lock in correct order
3502 			 */
3503 			mutex_exit(&ldcp->tx_lock);
3504 			mutex_enter(&ldcp->lock);
3505 			mutex_enter(&ldcp->tx_lock);
3506 			i_ldc_reset(ldcp);
3507 			mutex_exit(&ldcp->lock);
3508 		}
3509 		return (ECONNRESET);
3510 	}
3511 
3512 	tx_tail = ldcp->tx_tail;
3513 	tx_head = ldcp->tx_head;
3514 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3515 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3516 
3517 	if (new_tail == tx_head) {
3518 		DWARN(DBG_ALL_LDCS,
3519 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3520 		*sizep = 0;
3521 		return (EWOULDBLOCK);
3522 	}
3523 
3524 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3525 	    ldcp->id, size);
3526 
3527 	/* Send the data now */
3528 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3529 
3530 	/* copy the data into pkt */
3531 	bcopy((uint8_t *)buf, ldcmsg, size);
3532 
3533 	/* increment tail */
3534 	tx_tail = new_tail;
3535 
3536 	/*
3537 	 * All packets have been copied into the TX queue
3538 	 * update the tail ptr in the HV
3539 	 */
3540 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3541 	if (rv) {
3542 		if (rv == EWOULDBLOCK) {
3543 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3544 			    ldcp->id);
3545 			*sizep = 0;
3546 			return (EWOULDBLOCK);
3547 		}
3548 
3549 		*sizep = 0;
3550 		if (mutex_tryenter(&ldcp->lock)) {
3551 			i_ldc_reset(ldcp);
3552 			mutex_exit(&ldcp->lock);
3553 		} else {
3554 			/*
3555 			 * Release Tx lock, and then reacquire channel
3556 			 * and Tx lock in correct order
3557 			 */
3558 			mutex_exit(&ldcp->tx_lock);
3559 			mutex_enter(&ldcp->lock);
3560 			mutex_enter(&ldcp->tx_lock);
3561 			i_ldc_reset(ldcp);
3562 			mutex_exit(&ldcp->lock);
3563 		}
3564 		return (ECONNRESET);
3565 	}
3566 
3567 	ldcp->tx_tail = tx_tail;
3568 	*sizep = size;
3569 
3570 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3571 
3572 	return (rv);
3573 }
3574 
3575 
3576 /*
3577  * Write specified amount of bytes to the channel
3578  * in multiple pkts of pkt_payload size. Each
3579  * packet is tagged with an unique packet ID in
3580  * the case of a reliable transport.
3581  *
3582  * On return, size contains the number of bytes written.
3583  * This function needs to ensure that the write size is < MTU size
3584  */
3585 static int
3586 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3587 {
3588 	ldc_msg_t 	*ldcmsg;
3589 	uint64_t 	tx_head, tx_tail, new_tail, start;
3590 	uint64_t	txq_size_mask, numavail;
3591 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3592 	size_t 		len, bytes_written = 0, remaining;
3593 	int		rv;
3594 	uint32_t	curr_seqid;
3595 
3596 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3597 
3598 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3599 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3600 		ldcp->mode == LDC_MODE_STREAM);
3601 
3602 	/* compute mask for increment */
3603 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3604 
3605 	/* get the qptrs for the tx queue */
3606 	rv = hv_ldc_tx_get_state(ldcp->id,
3607 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3608 	if (rv != 0) {
3609 		cmn_err(CE_WARN,
3610 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3611 		*size = 0;
3612 		return (EIO);
3613 	}
3614 
3615 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3616 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3617 		DWARN(ldcp->id,
3618 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3619 		*size = 0;
3620 		if (mutex_tryenter(&ldcp->lock)) {
3621 			i_ldc_reset(ldcp);
3622 			mutex_exit(&ldcp->lock);
3623 		} else {
3624 			/*
3625 			 * Release Tx lock, and then reacquire channel
3626 			 * and Tx lock in correct order
3627 			 */
3628 			mutex_exit(&ldcp->tx_lock);
3629 			mutex_enter(&ldcp->lock);
3630 			mutex_enter(&ldcp->tx_lock);
3631 			i_ldc_reset(ldcp);
3632 			mutex_exit(&ldcp->lock);
3633 		}
3634 		return (ECONNRESET);
3635 	}
3636 
3637 	tx_tail = ldcp->tx_tail;
3638 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3639 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3640 
3641 	/*
3642 	 * Transport mode determines whether we use HV Tx head or the
3643 	 * private protocol head (corresponding to last ACKd pkt) for
3644 	 * determining how much we can write
3645 	 */
3646 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3647 		ldcp->mode == LDC_MODE_STREAM)
3648 		? ldcp->tx_ackd_head : ldcp->tx_head;
3649 	if (new_tail == tx_head) {
3650 		DWARN(DBG_ALL_LDCS,
3651 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3652 		*size = 0;
3653 		return (EWOULDBLOCK);
3654 	}
3655 
3656 	/*
3657 	 * Make sure that the LDC Tx queue has enough space
3658 	 */
3659 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
3660 		+ ldcp->tx_q_entries - 1;
3661 	numavail %= ldcp->tx_q_entries;
3662 
3663 	if (*size > (numavail * ldcp->pkt_payload)) {
3664 		DWARN(DBG_ALL_LDCS,
3665 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
3666 		return (EWOULDBLOCK);
3667 	}
3668 
3669 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3670 	    ldcp->id, *size);
3671 
3672 	/* Send the data now */
3673 	bytes_written = 0;
3674 	curr_seqid = ldcp->last_msg_snt;
3675 	start = tx_tail;
3676 
3677 	while (*size > bytes_written) {
3678 
3679 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3680 
3681 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
3682 			ldcp->mode == LDC_MODE_STREAM)
3683 			? ldcmsg->rdata : ldcmsg->udata);
3684 
3685 		ldcmsg->type = LDC_DATA;
3686 		ldcmsg->stype = LDC_INFO;
3687 		ldcmsg->ctrl = 0;
3688 
3689 		remaining = *size - bytes_written;
3690 		len = min(ldcp->pkt_payload, remaining);
3691 		ldcmsg->env = (uint8_t)len;
3692 
3693 		curr_seqid++;
3694 		ldcmsg->seqid = curr_seqid;
3695 
3696 		DUMP_LDC_PKT(ldcp, "ldc_write snd data", (uint64_t)ldcmsg);
3697 
3698 		/* copy the data into pkt */
3699 		bcopy(source, msgbuf, len);
3700 
3701 		source += len;
3702 		bytes_written += len;
3703 
3704 		/* increment tail */
3705 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
3706 
3707 		ASSERT(tx_tail != tx_head);
3708 	}
3709 
3710 	/* Set the start and stop bits */
3711 	ldcmsg->env |= LDC_FRAG_STOP;
3712 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
3713 	ldcmsg->env |= LDC_FRAG_START;
3714 
3715 	/*
3716 	 * All packets have been copied into the TX queue
3717 	 * update the tail ptr in the HV
3718 	 */
3719 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3720 	if (rv == 0) {
3721 		ldcp->tx_tail = tx_tail;
3722 		ldcp->last_msg_snt = curr_seqid;
3723 		*size = bytes_written;
3724 	} else {
3725 		int rv2;
3726 
3727 		if (rv != EWOULDBLOCK) {
3728 			*size = 0;
3729 			if (mutex_tryenter(&ldcp->lock)) {
3730 				i_ldc_reset(ldcp);
3731 				mutex_exit(&ldcp->lock);
3732 			} else {
3733 				/*
3734 				 * Release Tx lock, and then reacquire channel
3735 				 * and Tx lock in correct order
3736 				 */
3737 				mutex_exit(&ldcp->tx_lock);
3738 				mutex_enter(&ldcp->lock);
3739 				mutex_enter(&ldcp->tx_lock);
3740 				i_ldc_reset(ldcp);
3741 				mutex_exit(&ldcp->lock);
3742 			}
3743 			return (ECONNRESET);
3744 		}
3745 
3746 		DWARN(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
3747 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
3748 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
3749 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
3750 
3751 		rv2 = hv_ldc_tx_get_state(ldcp->id,
3752 		    &tx_head, &tx_tail, &ldcp->link_state);
3753 
3754 		DWARN(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
3755 			"(head 0x%x, tail 0x%x state 0x%x)\n",
3756 			rv2, tx_head, tx_tail, ldcp->link_state);
3757 
3758 		*size = 0;
3759 	}
3760 
3761 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
3762 
3763 	return (rv);
3764 }
3765 
3766 /*
3767  * Write specified amount of bytes to the channel
3768  * in multiple pkts of pkt_payload size. Each
3769  * packet is tagged with an unique packet ID in
3770  * the case of a reliable transport.
3771  *
3772  * On return, size contains the number of bytes written.
3773  * This function needs to ensure that the write size is < MTU size
3774  */
3775 static int
3776 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3777 {
3778 	ASSERT(MUTEX_HELD(&ldcp->tx_lock));
3779 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
3780 
3781 	/* Truncate packet to max of MTU size */
3782 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
3783 	return (i_ldc_write_packet(ldcp, buf, sizep));
3784 }
3785 
3786 
3787 /*
3788  * Interfaces for channel nexus to register/unregister with LDC module
3789  * The nexus will register functions to be used to register individual
3790  * channels with the nexus and enable interrupts for the channels
3791  */
3792 int
3793 ldc_register(ldc_cnex_t *cinfo)
3794 {
3795 	ldc_chan_t	*ldcp;
3796 
3797 	if (cinfo == NULL || cinfo->dip == NULL ||
3798 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
3799 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
3800 	    cinfo->clr_intr == NULL) {
3801 
3802 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
3803 		return (EINVAL);
3804 	}
3805 
3806 	mutex_enter(&ldcssp->lock);
3807 
3808 	/* nexus registration */
3809 	ldcssp->cinfo.dip = cinfo->dip;
3810 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
3811 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
3812 	ldcssp->cinfo.add_intr = cinfo->add_intr;
3813 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
3814 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
3815 
3816 	/* register any channels that might have been previously initialized */
3817 	ldcp = ldcssp->chan_list;
3818 	while (ldcp) {
3819 		if ((ldcp->tstate & TS_QCONF_RDY) &&
3820 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
3821 			(void) i_ldc_register_channel(ldcp);
3822 
3823 		ldcp = ldcp->next;
3824 	}
3825 
3826 	mutex_exit(&ldcssp->lock);
3827 
3828 	return (0);
3829 }
3830 
3831 int
3832 ldc_unregister(ldc_cnex_t *cinfo)
3833 {
3834 	if (cinfo == NULL || cinfo->dip == NULL) {
3835 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
3836 		return (EINVAL);
3837 	}
3838 
3839 	mutex_enter(&ldcssp->lock);
3840 
3841 	if (cinfo->dip != ldcssp->cinfo.dip) {
3842 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
3843 		mutex_exit(&ldcssp->lock);
3844 		return (EINVAL);
3845 	}
3846 
3847 	/* nexus unregister */
3848 	ldcssp->cinfo.dip = NULL;
3849 	ldcssp->cinfo.reg_chan = NULL;
3850 	ldcssp->cinfo.unreg_chan = NULL;
3851 	ldcssp->cinfo.add_intr = NULL;
3852 	ldcssp->cinfo.rem_intr = NULL;
3853 	ldcssp->cinfo.clr_intr = NULL;
3854 
3855 	mutex_exit(&ldcssp->lock);
3856 
3857 	return (0);
3858 }
3859 
3860 
3861 /* ------------------------------------------------------------------------- */
3862 
3863 /*
3864  * Allocate a memory handle for the channel and link it into the list
3865  * Also choose which memory table to use if this is the first handle
3866  * being assigned to this channel
3867  */
3868 int
3869 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
3870 {
3871 	ldc_chan_t 	*ldcp;
3872 	ldc_mhdl_t	*mhdl;
3873 	int 		rv;
3874 
3875 	if (handle == NULL) {
3876 		DWARN(DBG_ALL_LDCS,
3877 		    "ldc_mem_alloc_handle: invalid channel handle\n");
3878 		return (EINVAL);
3879 	}
3880 	ldcp = (ldc_chan_t *)handle;
3881 
3882 	mutex_enter(&ldcp->lock);
3883 
3884 	/* check to see if channel is initalized */
3885 	if (ldcp->tstate < TS_INIT) {
3886 		DWARN(ldcp->id,
3887 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
3888 		    ldcp->id);
3889 		mutex_exit(&ldcp->lock);
3890 		return (EINVAL);
3891 	}
3892 
3893 	/*
3894 	 * If this channel is allocating a mem handle for the
3895 	 * first time allocate it a memory map table and initialize it
3896 	 */
3897 	if (ldcp->mtbl == NULL) {
3898 
3899 		ldc_mtbl_t *mtbl;
3900 
3901 		/* Allocate and initialize the map table structure */
3902 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
3903 		mtbl->size = MTBL_MAX_SIZE;
3904 		mtbl->num_entries = mtbl->num_avail =
3905 			(MTBL_MAX_SIZE/sizeof (ldc_mte_slot_t));
3906 		mtbl->next_entry = NULL;
3907 
3908 		/* Allocate the table itself */
3909 		mtbl->table = (ldc_mte_slot_t *)
3910 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
3911 		if (mtbl->table == NULL) {
3912 			cmn_err(CE_WARN,
3913 			    "ldc_mem_alloc_handle: (0x%lx) error allocating "
3914 			    "table memory", ldcp->id);
3915 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
3916 			mutex_exit(&ldcp->lock);
3917 			return (ENOMEM);
3918 		}
3919 
3920 		/* zero out the memory */
3921 		bzero(mtbl->table, mtbl->size);
3922 
3923 		/* initialize the lock */
3924 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
3925 
3926 		/* register table for this channel */
3927 		rv = hv_ldc_set_map_table(ldcp->id,
3928 		    va_to_pa(mtbl->table), mtbl->num_entries);
3929 		if (rv != 0) {
3930 			cmn_err(CE_WARN,
3931 			    "ldc_mem_alloc_handle: (0x%lx) err %d mapping tbl",
3932 			    ldcp->id, rv);
3933 			contig_mem_free(mtbl->table, mtbl->size);
3934 			mutex_destroy(&mtbl->lock);
3935 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
3936 			mutex_exit(&ldcp->lock);
3937 			return (EIO);
3938 		}
3939 
3940 		ldcp->mtbl = mtbl;
3941 
3942 		D1(ldcp->id,
3943 		    "ldc_mem_alloc_handle: (0x%llx) alloc'd map table 0x%llx\n",
3944 		    ldcp->id, ldcp->mtbl->table);
3945 	}
3946 
3947 	/* allocate handle for channel */
3948 	mhdl = kmem_zalloc(sizeof (ldc_mhdl_t), KM_SLEEP);
3949 
3950 	/* initialize the lock */
3951 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
3952 
3953 	mhdl->status = LDC_UNBOUND;
3954 	mhdl->ldcp = ldcp;
3955 
3956 	/* insert memory handle (@ head) into list */
3957 	if (ldcp->mhdl_list == NULL) {
3958 		ldcp->mhdl_list = mhdl;
3959 		mhdl->next = NULL;
3960 	} else {
3961 		/* insert @ head */
3962 		mhdl->next = ldcp->mhdl_list;
3963 		ldcp->mhdl_list = mhdl;
3964 	}
3965 
3966 	/* return the handle */
3967 	*mhandle = (ldc_mem_handle_t)mhdl;
3968 
3969 	mutex_exit(&ldcp->lock);
3970 
3971 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
3972 	    ldcp->id, mhdl);
3973 
3974 	return (0);
3975 }
3976 
3977 /*
3978  * Free memory handle for the channel and unlink it from the list
3979  */
3980 int
3981 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
3982 {
3983 	ldc_mhdl_t 	*mhdl, *phdl;
3984 	ldc_chan_t 	*ldcp;
3985 
3986 	if (mhandle == NULL) {
3987 		DWARN(DBG_ALL_LDCS,
3988 		    "ldc_mem_free_handle: invalid memory handle\n");
3989 		return (EINVAL);
3990 	}
3991 	mhdl = (ldc_mhdl_t *)mhandle;
3992 
3993 	mutex_enter(&mhdl->lock);
3994 
3995 	ldcp = mhdl->ldcp;
3996 
3997 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
3998 		DWARN(ldcp->id,
3999 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
4000 		    mhdl);
4001 		mutex_exit(&mhdl->lock);
4002 		return (EINVAL);
4003 	}
4004 	mutex_exit(&mhdl->lock);
4005 
4006 	mutex_enter(&ldcp->mlist_lock);
4007 
4008 	phdl = ldcp->mhdl_list;
4009 
4010 	/* first handle */
4011 	if (phdl == mhdl) {
4012 		ldcp->mhdl_list = mhdl->next;
4013 		mutex_destroy(&mhdl->lock);
4014 		kmem_free(mhdl, sizeof (ldc_mhdl_t));
4015 		D1(ldcp->id,
4016 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
4017 		    ldcp->id, mhdl);
4018 	} else {
4019 		/* walk the list - unlink and free */
4020 		while (phdl != NULL) {
4021 			if (phdl->next == mhdl) {
4022 				phdl->next = mhdl->next;
4023 				mutex_destroy(&mhdl->lock);
4024 				kmem_free(mhdl, sizeof (ldc_mhdl_t));
4025 				D1(ldcp->id,
4026 				    "ldc_mem_free_handle: (0x%llx) freed "
4027 				    "handle 0x%llx\n", ldcp->id, mhdl);
4028 				break;
4029 			}
4030 			phdl = phdl->next;
4031 		}
4032 	}
4033 
4034 	if (phdl == NULL) {
4035 		DWARN(ldcp->id,
4036 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
4037 		mutex_exit(&ldcp->mlist_lock);
4038 		return (EINVAL);
4039 	}
4040 
4041 	mutex_exit(&ldcp->mlist_lock);
4042 
4043 	return (0);
4044 }
4045 
4046 /*
4047  * Bind a memory handle to a virtual address.
4048  * The virtual address is converted to the corresponding real addresses.
4049  * Returns pointer to the first ldc_mem_cookie and the total number
4050  * of cookies for this virtual address. Other cookies can be obtained
4051  * using the ldc_mem_nextcookie() call. If the pages are stored in
4052  * consecutive locations in the table, a single cookie corresponding to
4053  * the first location is returned. The cookie size spans all the entries.
4054  *
4055  * If the VA corresponds to a page that is already being exported, reuse
4056  * the page and do not export it again. Bump the page's use count.
4057  */
4058 int
4059 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
4060     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
4061 {
4062 	ldc_mhdl_t	*mhdl;
4063 	ldc_chan_t 	*ldcp;
4064 	ldc_mtbl_t	*mtbl;
4065 	ldc_memseg_t	*memseg;
4066 	ldc_mte_t	tmp_mte;
4067 	uint64_t	index, prev_index = 0;
4068 	int64_t		cookie_idx;
4069 	uintptr_t	raddr, ra_aligned;
4070 	uint64_t	psize, poffset, v_offset;
4071 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
4072 	pgcnt_t		npages;
4073 	caddr_t		v_align, addr;
4074 	int 		i;
4075 
4076 	if (mhandle == NULL) {
4077 		DWARN(DBG_ALL_LDCS,
4078 		    "ldc_mem_bind_handle: invalid memory handle\n");
4079 		return (EINVAL);
4080 	}
4081 	mhdl = (ldc_mhdl_t *)mhandle;
4082 	ldcp = mhdl->ldcp;
4083 	mtbl = ldcp->mtbl;
4084 
4085 	/* clear count */
4086 	*ccount = 0;
4087 
4088 	mutex_enter(&mhdl->lock);
4089 
4090 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
4091 		DWARN(ldcp->id,
4092 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
4093 		    mhandle);
4094 		mutex_exit(&mhdl->lock);
4095 		return (EINVAL);
4096 	}
4097 
4098 	/* Force address and size to be 8-byte aligned */
4099 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4100 		DWARN(ldcp->id,
4101 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
4102 		mutex_exit(&mhdl->lock);
4103 		return (EINVAL);
4104 	}
4105 
4106 	/* FUTURE: get the page size, pgsz code, and shift */
4107 	pg_size = MMU_PAGESIZE;
4108 	pg_size_code = page_szc(pg_size);
4109 	pg_shift = page_get_shift(pg_size_code);
4110 	pg_mask = ~(pg_size - 1);
4111 
4112 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
4113 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4114 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4115 
4116 	/* aligned VA and its offset */
4117 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
4118 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
4119 
4120 	npages = (len+v_offset)/pg_size;
4121 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
4122 
4123 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
4124 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4125 	    ldcp->id, vaddr, v_align, v_offset, npages);
4126 
4127 	/* lock the memory table - exclusive access to channel */
4128 	mutex_enter(&mtbl->lock);
4129 
4130 	if (npages > mtbl->num_avail) {
4131 		DWARN(ldcp->id,
4132 		    "ldc_mem_bind_handle: (0x%llx) no table entries\n",
4133 		    ldcp->id);
4134 		mutex_exit(&mtbl->lock);
4135 		mutex_exit(&mhdl->lock);
4136 		return (ENOMEM);
4137 	}
4138 
4139 	/* Allocate a memseg structure */
4140 	memseg = mhdl->memseg = kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
4141 
4142 	/* Allocate memory to store all pages and cookies */
4143 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4144 	memseg->cookies =
4145 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
4146 
4147 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
4148 	    ldcp->id, npages);
4149 
4150 	addr = v_align;
4151 
4152 	/*
4153 	 * Table slots are used in a round-robin manner. The algorithm permits
4154 	 * inserting duplicate entries. Slots allocated earlier will typically
4155 	 * get freed before we get back to reusing the slot.Inserting duplicate
4156 	 * entries should be OK as we only lookup entries using the cookie addr
4157 	 * i.e. tbl index, during export, unexport and copy operation.
4158 	 *
4159 	 * One implementation what was tried was to search for a duplicate
4160 	 * page entry first and reuse it. The search overhead is very high and
4161 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
4162 	 * So it does make sense to avoid searching for duplicates.
4163 	 *
4164 	 * But during the process of searching for a free slot, if we find a
4165 	 * duplicate entry we will go ahead and use it, and bump its use count.
4166 	 */
4167 
4168 	/* index to start searching from */
4169 	index = mtbl->next_entry;
4170 	cookie_idx = -1;
4171 
4172 	tmp_mte.ll = 0;	/* initialise fields to 0 */
4173 
4174 	if (mtype & LDC_DIRECT_MAP) {
4175 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
4176 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
4177 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
4178 	}
4179 
4180 	if (mtype & LDC_SHADOW_MAP) {
4181 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
4182 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
4183 	}
4184 
4185 	if (mtype & LDC_IO_MAP) {
4186 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
4187 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
4188 	}
4189 
4190 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4191 
4192 	tmp_mte.mte_pgszc = pg_size_code;
4193 
4194 	/* initialize each mem table entry */
4195 	for (i = 0; i < npages; i++) {
4196 
4197 		/* check if slot is available in the table */
4198 		while (mtbl->table[index].entry.ll != 0) {
4199 
4200 			index = (index + 1) % mtbl->num_entries;
4201 
4202 			if (index == mtbl->next_entry) {
4203 				/* we have looped around */
4204 				DWARN(DBG_ALL_LDCS,
4205 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
4206 				    "entry\n", ldcp->id);
4207 				*ccount = 0;
4208 
4209 				/* NOTE: free memory, remove previous entries */
4210 				/* this shouldnt happen as num_avail was ok */
4211 
4212 				mutex_exit(&mtbl->lock);
4213 				mutex_exit(&mhdl->lock);
4214 				return (ENOMEM);
4215 			}
4216 		}
4217 
4218 		/* get the real address */
4219 		raddr = va_to_pa((void *)addr);
4220 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4221 
4222 		/* build the mte */
4223 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4224 
4225 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4226 
4227 		/* update entry in table */
4228 		mtbl->table[index].entry = tmp_mte;
4229 
4230 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4231 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4232 
4233 		/* calculate the size and offset for this export range */
4234 		if (i == 0) {
4235 			/* first page */
4236 			psize = min((pg_size - v_offset), len);
4237 			poffset = v_offset;
4238 
4239 		} else if (i == (npages - 1)) {
4240 			/* last page */
4241 			psize =	(((uintptr_t)(vaddr + len)) &
4242 				    ((uint64_t)(pg_size-1)));
4243 			if (psize == 0)
4244 				psize = pg_size;
4245 			poffset = 0;
4246 
4247 		} else {
4248 			/* middle pages */
4249 			psize = pg_size;
4250 			poffset = 0;
4251 		}
4252 
4253 		/* store entry for this page */
4254 		memseg->pages[i].index = index;
4255 		memseg->pages[i].raddr = raddr;
4256 		memseg->pages[i].offset = poffset;
4257 		memseg->pages[i].size = psize;
4258 		memseg->pages[i].mte = &(mtbl->table[index]);
4259 
4260 		/* create the cookie */
4261 		if (i == 0 || (index != prev_index + 1)) {
4262 			cookie_idx++;
4263 			memseg->cookies[cookie_idx].addr =
4264 				IDX2COOKIE(index, pg_size_code, pg_shift);
4265 			memseg->cookies[cookie_idx].addr |= poffset;
4266 			memseg->cookies[cookie_idx].size = psize;
4267 
4268 		} else {
4269 			memseg->cookies[cookie_idx].size += psize;
4270 		}
4271 
4272 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4273 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4274 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4275 		    ldcp->id, addr, index, raddr, psize, poffset);
4276 
4277 		/* decrement number of available entries */
4278 		mtbl->num_avail--;
4279 
4280 		/* increment va by page size */
4281 		addr += pg_size;
4282 
4283 		/* increment index */
4284 		prev_index = index;
4285 		index = (index + 1) % mtbl->num_entries;
4286 
4287 		/* save the next slot */
4288 		mtbl->next_entry = index;
4289 	}
4290 
4291 	mutex_exit(&mtbl->lock);
4292 
4293 	/* memory handle = bound */
4294 	mhdl->mtype = mtype;
4295 	mhdl->perm = perm;
4296 	mhdl->status = LDC_BOUND;
4297 
4298 	/* update memseg_t */
4299 	memseg->vaddr = vaddr;
4300 	memseg->raddr = memseg->pages[0].raddr;
4301 	memseg->size = len;
4302 	memseg->npages = npages;
4303 	memseg->ncookies = cookie_idx + 1;
4304 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4305 
4306 	/* return count and first cookie */
4307 	*ccount = memseg->ncookies;
4308 	cookie->addr = memseg->cookies[0].addr;
4309 	cookie->size = memseg->cookies[0].size;
4310 
4311 	D1(ldcp->id,
4312 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4313 	    "pgs=0x%llx cookies=0x%llx\n",
4314 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4315 
4316 	mutex_exit(&mhdl->lock);
4317 	return (0);
4318 }
4319 
4320 /*
4321  * Return the next cookie associated with the specified memory handle
4322  */
4323 int
4324 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4325 {
4326 	ldc_mhdl_t	*mhdl;
4327 	ldc_chan_t 	*ldcp;
4328 	ldc_memseg_t	*memseg;
4329 
4330 	if (mhandle == NULL) {
4331 		DWARN(DBG_ALL_LDCS,
4332 		    "ldc_mem_nextcookie: invalid memory handle\n");
4333 		return (EINVAL);
4334 	}
4335 	mhdl = (ldc_mhdl_t *)mhandle;
4336 
4337 	mutex_enter(&mhdl->lock);
4338 
4339 	ldcp = mhdl->ldcp;
4340 	memseg = mhdl->memseg;
4341 
4342 	if (cookie == 0) {
4343 		DWARN(ldcp->id,
4344 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4345 		    ldcp->id);
4346 		mutex_exit(&mhdl->lock);
4347 		return (EINVAL);
4348 	}
4349 
4350 	if (memseg->next_cookie != 0) {
4351 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4352 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4353 		memseg->next_cookie++;
4354 		if (memseg->next_cookie == memseg->ncookies)
4355 			memseg->next_cookie = 0;
4356 
4357 	} else {
4358 		DWARN(ldcp->id,
4359 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4360 		cookie->addr = 0;
4361 		cookie->size = 0;
4362 		mutex_exit(&mhdl->lock);
4363 		return (EINVAL);
4364 	}
4365 
4366 	D1(ldcp->id,
4367 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4368 	    ldcp->id, cookie->addr, cookie->size);
4369 
4370 	mutex_exit(&mhdl->lock);
4371 	return (0);
4372 }
4373 
4374 /*
4375  * Unbind the virtual memory region associated with the specified
4376  * memory handle. Allassociated cookies are freed and the corresponding
4377  * RA space is no longer exported.
4378  */
4379 int
4380 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4381 {
4382 	ldc_mhdl_t	*mhdl;
4383 	ldc_chan_t 	*ldcp;
4384 	ldc_mtbl_t	*mtbl;
4385 	ldc_memseg_t	*memseg;
4386 	int		i;
4387 
4388 	if (mhandle == NULL) {
4389 		DWARN(DBG_ALL_LDCS,
4390 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4391 		return (EINVAL);
4392 	}
4393 	mhdl = (ldc_mhdl_t *)mhandle;
4394 
4395 	mutex_enter(&mhdl->lock);
4396 
4397 	if (mhdl->status == LDC_UNBOUND) {
4398 		DWARN(DBG_ALL_LDCS,
4399 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4400 		    mhandle);
4401 		mutex_exit(&mhdl->lock);
4402 		return (EINVAL);
4403 	}
4404 
4405 	ldcp = mhdl->ldcp;
4406 	mtbl = ldcp->mtbl;
4407 
4408 	memseg = mhdl->memseg;
4409 
4410 	/* lock the memory table - exclusive access to channel */
4411 	mutex_enter(&mtbl->lock);
4412 
4413 	/* undo the pages exported */
4414 	for (i = 0; i < memseg->npages; i++) {
4415 
4416 		/* FUTURE: check for mapped pages */
4417 		if (memseg->pages[i].mte->cookie) {
4418 			_NOTE(EMPTY)
4419 		}
4420 
4421 		/* clear the entry from the table */
4422 		memseg->pages[i].mte->entry.ll = 0;
4423 		mtbl->num_avail++;
4424 	}
4425 	mutex_exit(&mtbl->lock);
4426 
4427 	/* free the allocated memseg and page structures */
4428 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4429 	kmem_free(memseg->cookies,
4430 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4431 	kmem_free(memseg, sizeof (ldc_memseg_t));
4432 
4433 	/* uninitialize the memory handle */
4434 	mhdl->memseg = NULL;
4435 	mhdl->status = LDC_UNBOUND;
4436 
4437 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4438 	    ldcp->id, mhdl);
4439 
4440 	mutex_exit(&mhdl->lock);
4441 	return (0);
4442 }
4443 
4444 /*
4445  * Get information about the dring. The base address of the descriptor
4446  * ring along with the type and permission are returned back.
4447  */
4448 int
4449 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4450 {
4451 	ldc_mhdl_t	*mhdl;
4452 
4453 	if (mhandle == NULL) {
4454 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4455 		return (EINVAL);
4456 	}
4457 	mhdl = (ldc_mhdl_t *)mhandle;
4458 
4459 	if (minfo == NULL) {
4460 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4461 		return (EINVAL);
4462 	}
4463 
4464 	mutex_enter(&mhdl->lock);
4465 
4466 	minfo->status = mhdl->status;
4467 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4468 		minfo->vaddr = mhdl->memseg->vaddr;
4469 		minfo->raddr = mhdl->memseg->raddr;
4470 		minfo->mtype = mhdl->mtype;
4471 		minfo->perm = mhdl->perm;
4472 	}
4473 	mutex_exit(&mhdl->lock);
4474 
4475 	return (0);
4476 }
4477 
4478 /*
4479  * Copy data either from or to the client specified virtual address
4480  * space to or from the exported memory associated with the cookies.
4481  * The direction argument determines whether the data is read from or
4482  * written to exported memory.
4483  */
4484 int
4485 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4486     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4487 {
4488 	ldc_chan_t 	*ldcp;
4489 	uint64_t	local_voff, local_valign;
4490 	uint64_t	cookie_addr, cookie_size;
4491 	uint64_t	pg_shift, pg_size, pg_size_code;
4492 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4493 	uint64_t	local_ra, local_poff, local_psize;
4494 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4495 	pgcnt_t		npages;
4496 	size_t		len = *size;
4497 	int 		i, rv = 0;
4498 
4499 	if (handle == NULL) {
4500 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4501 		return (EINVAL);
4502 	}
4503 	ldcp = (ldc_chan_t *)handle;
4504 
4505 	mutex_enter(&ldcp->lock);
4506 
4507 	/* check to see if channel is UP */
4508 	if (ldcp->tstate != TS_UP) {
4509 		DWARN(ldcp->id, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4510 		    ldcp->id);
4511 		mutex_exit(&ldcp->lock);
4512 		return (EINVAL);
4513 	}
4514 
4515 	/* Force address and size to be 8-byte aligned */
4516 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4517 		DWARN(ldcp->id,
4518 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4519 		mutex_exit(&ldcp->lock);
4520 		return (EINVAL);
4521 	}
4522 
4523 	/* Find the size of the exported memory */
4524 	export_size = 0;
4525 	for (i = 0; i < ccount; i++)
4526 		export_size += cookies[i].size;
4527 
4528 	/* check to see if offset is valid */
4529 	if (off > export_size) {
4530 		DWARN(ldcp->id,
4531 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4532 		    ldcp->id);
4533 		mutex_exit(&ldcp->lock);
4534 		return (EINVAL);
4535 	}
4536 
4537 	/*
4538 	 * Check to see if the export size is smaller than the size we
4539 	 * are requesting to copy - if so flag an error
4540 	 */
4541 	if ((export_size - off) < *size) {
4542 		DWARN(ldcp->id,
4543 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4544 		    ldcp->id);
4545 		mutex_exit(&ldcp->lock);
4546 		return (EINVAL);
4547 	}
4548 
4549 	total_bal = min(export_size, *size);
4550 
4551 	/* FUTURE: get the page size, pgsz code, and shift */
4552 	pg_size = MMU_PAGESIZE;
4553 	pg_size_code = page_szc(pg_size);
4554 	pg_shift = page_get_shift(pg_size_code);
4555 
4556 	D1(ldcp->id, "ldc_mem_copy: copying data "
4557 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4558 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4559 
4560 	/* aligned VA and its offset */
4561 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4562 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4563 
4564 	npages = (len+local_voff)/pg_size;
4565 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4566 
4567 	D1(ldcp->id,
4568 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4569 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4570 
4571 	local_ra = va_to_pa((void *)local_valign);
4572 	local_poff = local_voff;
4573 	local_psize = min(len, (pg_size - local_voff));
4574 
4575 	len -= local_psize;
4576 
4577 	/*
4578 	 * find the first cookie in the list of cookies
4579 	 * if the offset passed in is not zero
4580 	 */
4581 	for (idx = 0; idx < ccount; idx++) {
4582 		cookie_size = cookies[idx].size;
4583 		if (off < cookie_size)
4584 			break;
4585 		off -= cookie_size;
4586 	}
4587 
4588 	cookie_addr = cookies[idx].addr + off;
4589 	cookie_size = cookies[idx].size - off;
4590 
4591 	export_caddr = cookie_addr & ~(pg_size - 1);
4592 	export_poff = cookie_addr & (pg_size - 1);
4593 	export_psize = min(cookie_size, (pg_size - export_poff));
4594 
4595 	for (;;) {
4596 
4597 		copy_size = min(export_psize, local_psize);
4598 
4599 		D1(ldcp->id,
4600 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4601 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4602 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4603 		    " total_bal=0x%llx\n",
4604 		    ldcp->id, direction, export_caddr, local_ra, export_poff,
4605 		    local_poff, export_psize, local_psize, copy_size,
4606 		    total_bal);
4607 
4608 		rv = hv_ldc_copy(ldcp->id, direction,
4609 		    (export_caddr + export_poff), (local_ra + local_poff),
4610 		    copy_size, &copied_len);
4611 
4612 		if (rv != 0) {
4613 			cmn_err(CE_WARN,
4614 			    "ldc_mem_copy: (0x%lx) err %d during copy\n",
4615 			    ldcp->id, rv);
4616 			DWARN(DBG_ALL_LDCS,
4617 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%llx, "
4618 			    "loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4619 			    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4620 			    " copied_len=0x%llx, total_bal=0x%llx\n",
4621 			    ldcp->id, direction, export_caddr, local_ra,
4622 			    export_poff, local_poff, export_psize, local_psize,
4623 			    copy_size, copied_len, total_bal);
4624 
4625 			*size = *size - total_bal;
4626 			mutex_exit(&ldcp->lock);
4627 			return (EIO);
4628 		}
4629 
4630 		ASSERT(copied_len <= copy_size);
4631 
4632 		D2(ldcp->id, "ldc_mem_copy: copied=0x%llx\n", copied_len);
4633 		export_poff += copied_len;
4634 		local_poff += copied_len;
4635 		export_psize -= copied_len;
4636 		local_psize -= copied_len;
4637 		cookie_size -= copied_len;
4638 
4639 		total_bal -= copied_len;
4640 
4641 		if (copy_size != copied_len)
4642 			continue;
4643 
4644 		if (export_psize == 0 && total_bal != 0) {
4645 
4646 			if (cookie_size == 0) {
4647 				idx++;
4648 				cookie_addr = cookies[idx].addr;
4649 				cookie_size = cookies[idx].size;
4650 
4651 				export_caddr = cookie_addr & ~(pg_size - 1);
4652 				export_poff = cookie_addr & (pg_size - 1);
4653 				export_psize =
4654 					min(cookie_size, (pg_size-export_poff));
4655 			} else {
4656 				export_caddr += pg_size;
4657 				export_poff = 0;
4658 				export_psize = min(cookie_size, pg_size);
4659 			}
4660 		}
4661 
4662 		if (local_psize == 0 && total_bal != 0) {
4663 			local_valign += pg_size;
4664 			local_ra = va_to_pa((void *)local_valign);
4665 			local_poff = 0;
4666 			local_psize = min(pg_size, len);
4667 			len -= local_psize;
4668 		}
4669 
4670 		/* check if we are all done */
4671 		if (total_bal == 0)
4672 			break;
4673 	}
4674 
4675 	mutex_exit(&ldcp->lock);
4676 
4677 	D1(ldcp->id,
4678 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
4679 	    ldcp->id, *size);
4680 
4681 	return (0);
4682 }
4683 
4684 /*
4685  * Copy data either from or to the client specified virtual address
4686  * space to or from HV physical memory.
4687  *
4688  * The direction argument determines whether the data is read from or
4689  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
4690  * to the ldc_mem_copy interface
4691  */
4692 int
4693 ldc_mem_rdwr_pa(ldc_handle_t handle, caddr_t vaddr, size_t *size,
4694     caddr_t paddr, uint8_t direction)
4695 {
4696 	ldc_chan_t 	*ldcp;
4697 	uint64_t	local_voff, local_valign;
4698 	uint64_t	pg_shift, pg_size, pg_size_code;
4699 	uint64_t 	target_pa, target_poff, target_psize, target_size;
4700 	uint64_t	local_ra, local_poff, local_psize;
4701 	uint64_t	copy_size, copied_len = 0;
4702 	pgcnt_t		npages;
4703 	size_t		len = *size;
4704 	int 		rv = 0;
4705 
4706 	if (handle == NULL) {
4707 		DWARN(DBG_ALL_LDCS,
4708 		    "ldc_mem_rdwr_pa: invalid channel handle\n");
4709 		return (EINVAL);
4710 	}
4711 	ldcp = (ldc_chan_t *)handle;
4712 
4713 	mutex_enter(&ldcp->lock);
4714 
4715 	/* check to see if channel is UP */
4716 	if (ldcp->tstate != TS_UP) {
4717 		DWARN(ldcp->id,
4718 		    "ldc_mem_rdwr_pa: (0x%llx) channel is not UP\n",
4719 		    ldcp->id);
4720 		mutex_exit(&ldcp->lock);
4721 		return (EINVAL);
4722 	}
4723 
4724 	/* Force address and size to be 8-byte aligned */
4725 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4726 		DWARN(ldcp->id,
4727 		    "ldc_mem_rdwr_pa: addr/size is not 8-byte aligned\n");
4728 		mutex_exit(&ldcp->lock);
4729 		return (EINVAL);
4730 	}
4731 
4732 	target_size = *size;
4733 
4734 	/* FUTURE: get the page size, pgsz code, and shift */
4735 	pg_size = MMU_PAGESIZE;
4736 	pg_size_code = page_szc(pg_size);
4737 	pg_shift = page_get_shift(pg_size_code);
4738 
4739 	D1(ldcp->id, "ldc_mem_rdwr_pa: copying data "
4740 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4741 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4742 
4743 	/* aligned VA and its offset */
4744 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
4745 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4746 
4747 	npages = (len + local_voff) / pg_size;
4748 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
4749 
4750 	D1(ldcp->id,
4751 	    "ldc_mem_rdwr_pa: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4752 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4753 
4754 	local_ra = va_to_pa((void *)local_valign);
4755 	local_poff = local_voff;
4756 	local_psize = min(len, (pg_size - local_voff));
4757 
4758 	len -= local_psize;
4759 
4760 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
4761 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
4762 	target_psize = pg_size - target_poff;
4763 
4764 	for (;;) {
4765 
4766 		copy_size = min(target_psize, local_psize);
4767 
4768 		D1(ldcp->id,
4769 		    "ldc_mem_rdwr_pa: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
4770 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4771 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4772 		    " total_bal=0x%llx\n",
4773 		    ldcp->id, direction, target_pa, local_ra, target_poff,
4774 		    local_poff, target_psize, local_psize, copy_size,
4775 		    target_size);
4776 
4777 		rv = hv_ldc_copy(ldcp->id, direction,
4778 		    (target_pa + target_poff), (local_ra + local_poff),
4779 		    copy_size, &copied_len);
4780 
4781 		if (rv != 0) {
4782 			cmn_err(CE_WARN,
4783 			    "ldc_mem_rdwr_pa: (0x%lx) err %d during copy\n",
4784 			    ldcp->id, rv);
4785 			DWARN(DBG_ALL_LDCS,
4786 			    "ldc_mem_rdwr_pa: (0x%llx) dir=%lld,tar_pa=0x%llx, "
4787 			    "loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4788 			    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4789 			    " total_bal=0x%llx\n",
4790 			    ldcp->id, direction, target_pa, local_ra,
4791 			    target_poff, local_poff, target_psize, local_psize,
4792 			    copy_size, target_size);
4793 
4794 			*size = *size - target_size;
4795 			mutex_exit(&ldcp->lock);
4796 			return (i_ldc_h2v_error(rv));
4797 		}
4798 
4799 		D2(ldcp->id, "ldc_mem_rdwr_pa: copied=0x%llx\n", copied_len);
4800 		target_poff += copied_len;
4801 		local_poff += copied_len;
4802 		target_psize -= copied_len;
4803 		local_psize -= copied_len;
4804 
4805 		target_size -= copied_len;
4806 
4807 		if (copy_size != copied_len)
4808 			continue;
4809 
4810 		if (target_psize == 0 && target_size != 0) {
4811 			target_pa += pg_size;
4812 			target_poff = 0;
4813 			target_psize = min(pg_size, target_size);
4814 		}
4815 
4816 		if (local_psize == 0 && target_size != 0) {
4817 			local_valign += pg_size;
4818 			local_ra = va_to_pa((void *)local_valign);
4819 			local_poff = 0;
4820 			local_psize = min(pg_size, len);
4821 			len -= local_psize;
4822 		}
4823 
4824 		/* check if we are all done */
4825 		if (target_size == 0)
4826 			break;
4827 	}
4828 
4829 	mutex_exit(&ldcp->lock);
4830 
4831 	D1(ldcp->id, "ldc_mem_rdwr_pa: (0x%llx) done copying sz=0x%llx\n",
4832 	    ldcp->id, *size);
4833 
4834 	return (0);
4835 }
4836 
4837 /*
4838  * Map an exported memory segment into the local address space. If the
4839  * memory range was exported for direct map access, a HV call is made
4840  * to allocate a RA range. If the map is done via a shadow copy, local
4841  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
4842  * the mapping is a direct map then the RA is returned in 'raddr'.
4843  */
4844 int
4845 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
4846     uint8_t mtype, caddr_t *vaddr, caddr_t *raddr)
4847 {
4848 	int		i, idx;
4849 	ldc_chan_t 	*ldcp;
4850 	ldc_mhdl_t	*mhdl;
4851 	ldc_memseg_t	*memseg;
4852 	caddr_t		shadow_base = NULL, tmpaddr;
4853 	uint64_t	pg_size, pg_shift, pg_size_code;
4854 	uint64_t	exp_size = 0, npages;
4855 
4856 	if (mhandle == NULL) {
4857 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
4858 		return (EINVAL);
4859 	}
4860 	mhdl = (ldc_mhdl_t *)mhandle;
4861 
4862 	mutex_enter(&mhdl->lock);
4863 
4864 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
4865 	    mhdl->memseg != NULL) {
4866 		DWARN(DBG_ALL_LDCS,
4867 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
4868 		mutex_exit(&mhdl->lock);
4869 		return (EINVAL);
4870 	}
4871 
4872 	ldcp = mhdl->ldcp;
4873 
4874 	mutex_enter(&ldcp->lock);
4875 
4876 	if (ldcp->tstate != TS_UP) {
4877 		DWARN(ldcp->id,
4878 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
4879 		    ldcp->id);
4880 		mutex_exit(&ldcp->lock);
4881 		mutex_exit(&mhdl->lock);
4882 		return (EINVAL);
4883 	}
4884 
4885 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
4886 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
4887 		mutex_exit(&ldcp->lock);
4888 		mutex_exit(&mhdl->lock);
4889 		return (EINVAL);
4890 	}
4891 
4892 	if (mtype == LDC_SHADOW_MAP && vaddr == NULL) {
4893 		DWARN(ldcp->id,
4894 		    "ldc_mem_map: invalid vaddr arg0x%llx\n", vaddr);
4895 		mutex_exit(&ldcp->lock);
4896 		mutex_exit(&mhdl->lock);
4897 		return (EINVAL);
4898 	}
4899 
4900 	if (mtype == LDC_SHADOW_MAP &&
4901 	    (vaddr) && ((uintptr_t)(*vaddr) & MMU_PAGEOFFSET)) {
4902 		DWARN(ldcp->id,
4903 		    "ldc_mem_map: vaddr not page aligned, 0x%llx\n", *vaddr);
4904 		mutex_exit(&ldcp->lock);
4905 		mutex_exit(&mhdl->lock);
4906 		return (EINVAL);
4907 	}
4908 
4909 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
4910 	    ldcp->id, cookie->addr, cookie->size);
4911 
4912 	/* FUTURE: get the page size, pgsz code, and shift */
4913 	pg_size = MMU_PAGESIZE;
4914 	pg_size_code = page_szc(pg_size);
4915 	pg_shift = page_get_shift(pg_size_code);
4916 
4917 	/* calculate the number of pages in the exported cookie */
4918 	for (idx = 0; idx < ccount; idx++) {
4919 		if (cookie[idx].addr & MMU_PAGEOFFSET ||
4920 			cookie[idx].size & MMU_PAGEOFFSET) {
4921 			DWARN(ldcp->id,
4922 			    "ldc_mem_map: cookie addr/size not page aligned, "
4923 			    "0x%llx\n", cookie[idx].addr);
4924 			mutex_exit(&ldcp->lock);
4925 			mutex_exit(&mhdl->lock);
4926 			return (EINVAL);
4927 		}
4928 		exp_size += cookie[idx].size;
4929 	}
4930 	npages = (exp_size >> pg_shift);
4931 
4932 	/* Allocate memseg structure */
4933 	memseg = mhdl->memseg =	kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
4934 
4935 	/* Allocate memory to store all pages and cookies */
4936 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4937 	memseg->cookies =
4938 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
4939 
4940 	D2(ldcp->id, "ldc_mem_map: (0x%llx) processing 0x%llx pages\n",
4941 	    ldcp->id, npages);
4942 
4943 	/* Check to see if the client is requesting direct or shadow map */
4944 	if (mtype == LDC_SHADOW_MAP) {
4945 		if (*vaddr == NULL) {
4946 			shadow_base =
4947 				contig_mem_alloc_align(exp_size, PAGESIZE);
4948 			if (shadow_base == NULL) {
4949 				cmn_err(CE_WARN, "ldc_mem_map: shadow memory "
4950 				    "allocation failed\n");
4951 				kmem_free(memseg->cookies,
4952 				    (sizeof (ldc_mem_cookie_t) * ccount));
4953 				kmem_free(memseg->pages,
4954 				    (sizeof (ldc_page_t) * npages));
4955 				kmem_free(memseg, sizeof (ldc_memseg_t));
4956 				mutex_exit(&ldcp->lock);
4957 				mutex_exit(&mhdl->lock);
4958 				return (ENOMEM);
4959 			}
4960 
4961 			bzero(shadow_base, exp_size);
4962 			mhdl->myshadow = B_TRUE;
4963 
4964 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
4965 			    "shadow page va=0x%llx\n", ldcp->id, shadow_base);
4966 		} else {
4967 			/*
4968 			 * Use client supplied memory for shadow_base
4969 			 * WARNING: assuming that client mem is >= exp_size
4970 			 */
4971 			shadow_base = *vaddr;
4972 		}
4973 	} else if (mtype == LDC_DIRECT_MAP) {
4974 		/* FUTURE: Do a direct map by calling into HV */
4975 		_NOTE(EMPTY)
4976 	}
4977 
4978 	/* Save all page and cookie information */
4979 	for (i = 0, tmpaddr = shadow_base; i < npages; i++) {
4980 		memseg->pages[i].raddr = va_to_pa(tmpaddr);
4981 		memseg->pages[i].size = pg_size;
4982 		memseg->pages[i].index = 0;
4983 		memseg->pages[i].offset = 0;
4984 		memseg->pages[i].mte = NULL;
4985 		tmpaddr += pg_size;
4986 	}
4987 	for (i = 0; i < ccount; i++) {
4988 		memseg->cookies[i].addr = cookie[i].addr;
4989 		memseg->cookies[i].size = cookie[i].size;
4990 	}
4991 
4992 	/* update memseg_t */
4993 	memseg->vaddr = shadow_base;
4994 	memseg->raddr = memseg->pages[0].raddr;
4995 	memseg->size = exp_size;
4996 	memseg->npages = npages;
4997 	memseg->ncookies = ccount;
4998 	memseg->next_cookie = 0;
4999 
5000 	/* memory handle = mapped */
5001 	mhdl->mtype = mtype;
5002 	mhdl->perm = 0;
5003 	mhdl->status = LDC_MAPPED;
5004 
5005 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
5006 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
5007 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
5008 	    memseg->npages, memseg->ncookies);
5009 
5010 	if (raddr)
5011 		*raddr = (caddr_t)memseg->raddr;
5012 	if (vaddr)
5013 		*vaddr = memseg->vaddr;
5014 
5015 	mutex_exit(&ldcp->lock);
5016 	mutex_exit(&mhdl->lock);
5017 	return (0);
5018 }
5019 
5020 /*
5021  * Unmap a memory segment. Free shadow memory (if any).
5022  */
5023 int
5024 ldc_mem_unmap(ldc_mem_handle_t mhandle)
5025 {
5026 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
5027 	ldc_chan_t 	*ldcp;
5028 	ldc_memseg_t	*memseg;
5029 
5030 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
5031 		DWARN(DBG_ALL_LDCS,
5032 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
5033 		    mhandle);
5034 		return (EINVAL);
5035 	}
5036 
5037 	mutex_enter(&mhdl->lock);
5038 
5039 	ldcp = mhdl->ldcp;
5040 	memseg = mhdl->memseg;
5041 
5042 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
5043 	    ldcp->id, mhdl);
5044 
5045 	/* if we allocated shadow memory - free it */
5046 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
5047 		contig_mem_free(memseg->vaddr, memseg->size);
5048 	}
5049 
5050 	/* free the allocated memseg and page structures */
5051 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
5052 	kmem_free(memseg->cookies,
5053 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
5054 	kmem_free(memseg, sizeof (ldc_memseg_t));
5055 
5056 	/* uninitialize the memory handle */
5057 	mhdl->memseg = NULL;
5058 	mhdl->status = LDC_UNBOUND;
5059 
5060 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
5061 	    ldcp->id, mhdl);
5062 
5063 	mutex_exit(&mhdl->lock);
5064 	return (0);
5065 }
5066 
5067 /*
5068  * Internal entry point for LDC mapped memory entry consistency
5069  * semantics. Acquire copies the contents of the remote memory
5070  * into the local shadow copy. The release operation copies the local
5071  * contents into the remote memory. The offset and size specify the
5072  * bounds for the memory range being synchronized.
5073  */
5074 static int
5075 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
5076     uint64_t offset, size_t size)
5077 {
5078 	int 		err;
5079 	ldc_mhdl_t	*mhdl;
5080 	ldc_chan_t	*ldcp;
5081 	ldc_memseg_t	*memseg;
5082 	caddr_t		local_vaddr;
5083 	size_t		copy_size;
5084 
5085 	if (mhandle == NULL) {
5086 		DWARN(DBG_ALL_LDCS,
5087 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
5088 		return (EINVAL);
5089 	}
5090 	mhdl = (ldc_mhdl_t *)mhandle;
5091 
5092 	mutex_enter(&mhdl->lock);
5093 
5094 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
5095 		DWARN(DBG_ALL_LDCS,
5096 		    "i_ldc_mem_acquire_release: not mapped memory\n");
5097 		mutex_exit(&mhdl->lock);
5098 		return (EINVAL);
5099 	}
5100 
5101 	if (offset >= mhdl->memseg->size ||
5102 	    (offset + size) > mhdl->memseg->size) {
5103 		DWARN(DBG_ALL_LDCS,
5104 		    "i_ldc_mem_acquire_release: memory out of range\n");
5105 		mutex_exit(&mhdl->lock);
5106 		return (EINVAL);
5107 	}
5108 
5109 	/* get the channel handle and memory segment */
5110 	ldcp = mhdl->ldcp;
5111 	memseg = mhdl->memseg;
5112 
5113 	if (mhdl->mtype == LDC_SHADOW_MAP) {
5114 
5115 		local_vaddr = memseg->vaddr + offset;
5116 		copy_size = size;
5117 
5118 		/* copy to/from remote from/to local memory */
5119 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
5120 		    &copy_size, memseg->cookies, memseg->ncookies,
5121 		    direction);
5122 		if (err || copy_size != size) {
5123 			cmn_err(CE_WARN,
5124 			    "i_ldc_mem_acquire_release: copy failed\n");
5125 			mutex_exit(&mhdl->lock);
5126 			return (err);
5127 		}
5128 	}
5129 
5130 	mutex_exit(&mhdl->lock);
5131 
5132 	return (0);
5133 }
5134 
5135 /*
5136  * Ensure that the contents in the remote memory seg are consistent
5137  * with the contents if of local segment
5138  */
5139 int
5140 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5141 {
5142 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
5143 }
5144 
5145 
5146 /*
5147  * Ensure that the contents in the local memory seg are consistent
5148  * with the contents if of remote segment
5149  */
5150 int
5151 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
5152 {
5153 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
5154 }
5155 
5156 /*
5157  * Allocate a descriptor ring. The size of each each descriptor
5158  * must be 8-byte aligned and the entire ring should be a multiple
5159  * of MMU_PAGESIZE.
5160  */
5161 int
5162 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
5163 {
5164 	ldc_dring_t *dringp;
5165 	size_t size = (dsize * len);
5166 
5167 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
5168 	    len, dsize);
5169 
5170 	if (dhandle == NULL) {
5171 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
5172 		return (EINVAL);
5173 	}
5174 
5175 	if (len == 0) {
5176 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
5177 		return (EINVAL);
5178 	}
5179 
5180 	/* descriptor size should be 8-byte aligned */
5181 	if (dsize == 0 || (dsize & 0x7)) {
5182 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
5183 		return (EINVAL);
5184 	}
5185 
5186 	*dhandle = 0;
5187 
5188 	/* Allocate a desc ring structure */
5189 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5190 
5191 	/* Initialize dring */
5192 	dringp->length = len;
5193 	dringp->dsize = dsize;
5194 
5195 	/* round off to multiple of pagesize */
5196 	dringp->size = (size & MMU_PAGEMASK);
5197 	if (size & MMU_PAGEOFFSET)
5198 		dringp->size += MMU_PAGESIZE;
5199 
5200 	dringp->status = LDC_UNBOUND;
5201 
5202 	/* allocate descriptor ring memory */
5203 	dringp->base = contig_mem_alloc_align(dringp->size, PAGESIZE);
5204 	if (dringp->base == NULL) {
5205 		cmn_err(CE_WARN,
5206 		    "ldc_mem_dring_create: unable to alloc desc\n");
5207 		kmem_free(dringp, sizeof (ldc_dring_t));
5208 		return (ENOMEM);
5209 	}
5210 
5211 	bzero(dringp->base, dringp->size);
5212 
5213 	/* initialize the desc ring lock */
5214 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5215 
5216 	/* Add descriptor ring to the head of global list */
5217 	mutex_enter(&ldcssp->lock);
5218 	dringp->next = ldcssp->dring_list;
5219 	ldcssp->dring_list = dringp;
5220 	mutex_exit(&ldcssp->lock);
5221 
5222 	*dhandle = (ldc_dring_handle_t)dringp;
5223 
5224 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5225 
5226 	return (0);
5227 }
5228 
5229 
5230 /*
5231  * Destroy a descriptor ring.
5232  */
5233 int
5234 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5235 {
5236 	ldc_dring_t *dringp;
5237 	ldc_dring_t *tmp_dringp;
5238 
5239 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5240 
5241 	if (dhandle == NULL) {
5242 		DWARN(DBG_ALL_LDCS,
5243 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5244 		return (EINVAL);
5245 	}
5246 	dringp = (ldc_dring_t *)dhandle;
5247 
5248 	if (dringp->status == LDC_BOUND) {
5249 		DWARN(DBG_ALL_LDCS,
5250 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5251 		return (EACCES);
5252 	}
5253 
5254 	mutex_enter(&dringp->lock);
5255 	mutex_enter(&ldcssp->lock);
5256 
5257 	/* remove from linked list - if not bound */
5258 	tmp_dringp = ldcssp->dring_list;
5259 	if (tmp_dringp == dringp) {
5260 		ldcssp->dring_list = dringp->next;
5261 		dringp->next = NULL;
5262 
5263 	} else {
5264 		while (tmp_dringp != NULL) {
5265 			if (tmp_dringp->next == dringp) {
5266 				tmp_dringp->next = dringp->next;
5267 				dringp->next = NULL;
5268 				break;
5269 			}
5270 			tmp_dringp = tmp_dringp->next;
5271 		}
5272 		if (tmp_dringp == NULL) {
5273 			DWARN(DBG_ALL_LDCS,
5274 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5275 			mutex_exit(&ldcssp->lock);
5276 			mutex_exit(&dringp->lock);
5277 			return (EINVAL);
5278 		}
5279 	}
5280 
5281 	mutex_exit(&ldcssp->lock);
5282 
5283 	/* free the descriptor ring */
5284 	contig_mem_free((caddr_t)dringp->base, dringp->size);
5285 
5286 	mutex_exit(&dringp->lock);
5287 
5288 	/* destroy dring lock */
5289 	mutex_destroy(&dringp->lock);
5290 
5291 	/* free desc ring object */
5292 	kmem_free(dringp, sizeof (ldc_dring_t));
5293 
5294 	return (0);
5295 }
5296 
5297 /*
5298  * Bind a previously allocated dring to a channel. The channel should
5299  * be OPEN in order to bind the ring to the channel. Returns back a
5300  * descriptor ring cookie. The descriptor ring is exported for remote
5301  * access by the client at the other end of the channel. An entry for
5302  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5303  */
5304 int
5305 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5306     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5307 {
5308 	int		err;
5309 	ldc_chan_t 	*ldcp;
5310 	ldc_dring_t	*dringp;
5311 	ldc_mem_handle_t mhandle;
5312 
5313 	/* check to see if channel is initalized */
5314 	if (handle == NULL) {
5315 		DWARN(DBG_ALL_LDCS,
5316 		    "ldc_mem_dring_bind: invalid channel handle\n");
5317 		return (EINVAL);
5318 	}
5319 	ldcp = (ldc_chan_t *)handle;
5320 
5321 	if (dhandle == NULL) {
5322 		DWARN(DBG_ALL_LDCS,
5323 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5324 		return (EINVAL);
5325 	}
5326 	dringp = (ldc_dring_t *)dhandle;
5327 
5328 	if (cookie == NULL) {
5329 		DWARN(ldcp->id,
5330 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5331 		return (EINVAL);
5332 	}
5333 
5334 	mutex_enter(&dringp->lock);
5335 
5336 	if (dringp->status == LDC_BOUND) {
5337 		DWARN(DBG_ALL_LDCS,
5338 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5339 		    ldcp->id);
5340 		mutex_exit(&dringp->lock);
5341 		return (EINVAL);
5342 	}
5343 
5344 	if ((perm & LDC_MEM_RW) == 0) {
5345 		DWARN(DBG_ALL_LDCS,
5346 		    "ldc_mem_dring_bind: invalid permissions\n");
5347 		mutex_exit(&dringp->lock);
5348 		return (EINVAL);
5349 	}
5350 
5351 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5352 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5353 		mutex_exit(&dringp->lock);
5354 		return (EINVAL);
5355 	}
5356 
5357 	dringp->ldcp = ldcp;
5358 
5359 	/* create an memory handle */
5360 	err = ldc_mem_alloc_handle(handle, &mhandle);
5361 	if (err || mhandle == NULL) {
5362 		DWARN(DBG_ALL_LDCS,
5363 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5364 		    ldcp->id);
5365 		mutex_exit(&dringp->lock);
5366 		return (err);
5367 	}
5368 	dringp->mhdl = mhandle;
5369 
5370 	/* bind the descriptor ring to channel */
5371 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5372 	    mtype, perm, cookie, ccount);
5373 	if (err) {
5374 		DWARN(ldcp->id,
5375 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5376 		    ldcp->id);
5377 		mutex_exit(&dringp->lock);
5378 		return (err);
5379 	}
5380 
5381 	/*
5382 	 * For now return error if we get more than one cookie
5383 	 * FUTURE: Return multiple cookies ..
5384 	 */
5385 	if (*ccount > 1) {
5386 		(void) ldc_mem_unbind_handle(mhandle);
5387 		(void) ldc_mem_free_handle(mhandle);
5388 
5389 		dringp->ldcp = NULL;
5390 		dringp->mhdl = NULL;
5391 		*ccount = 0;
5392 
5393 		mutex_exit(&dringp->lock);
5394 		return (EAGAIN);
5395 	}
5396 
5397 	/* Add descriptor ring to channel's exported dring list */
5398 	mutex_enter(&ldcp->exp_dlist_lock);
5399 	dringp->ch_next = ldcp->exp_dring_list;
5400 	ldcp->exp_dring_list = dringp;
5401 	mutex_exit(&ldcp->exp_dlist_lock);
5402 
5403 	dringp->status = LDC_BOUND;
5404 
5405 	mutex_exit(&dringp->lock);
5406 
5407 	return (0);
5408 }
5409 
5410 /*
5411  * Return the next cookie associated with the specified dring handle
5412  */
5413 int
5414 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5415 {
5416 	int		rv = 0;
5417 	ldc_dring_t 	*dringp;
5418 	ldc_chan_t	*ldcp;
5419 
5420 	if (dhandle == NULL) {
5421 		DWARN(DBG_ALL_LDCS,
5422 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5423 		return (EINVAL);
5424 	}
5425 	dringp = (ldc_dring_t *)dhandle;
5426 	mutex_enter(&dringp->lock);
5427 
5428 	if (dringp->status != LDC_BOUND) {
5429 		DWARN(DBG_ALL_LDCS,
5430 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5431 		    "is not bound\n", dringp);
5432 		mutex_exit(&dringp->lock);
5433 		return (EINVAL);
5434 	}
5435 
5436 	ldcp = dringp->ldcp;
5437 
5438 	if (cookie == NULL) {
5439 		DWARN(ldcp->id,
5440 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5441 		    ldcp->id);
5442 		mutex_exit(&dringp->lock);
5443 		return (EINVAL);
5444 	}
5445 
5446 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5447 	mutex_exit(&dringp->lock);
5448 
5449 	return (rv);
5450 }
5451 /*
5452  * Unbind a previously bound dring from a channel.
5453  */
5454 int
5455 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5456 {
5457 	ldc_dring_t 	*dringp;
5458 	ldc_dring_t	*tmp_dringp;
5459 	ldc_chan_t	*ldcp;
5460 
5461 	if (dhandle == NULL) {
5462 		DWARN(DBG_ALL_LDCS,
5463 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5464 		return (EINVAL);
5465 	}
5466 	dringp = (ldc_dring_t *)dhandle;
5467 
5468 	mutex_enter(&dringp->lock);
5469 
5470 	if (dringp->status == LDC_UNBOUND) {
5471 		DWARN(DBG_ALL_LDCS,
5472 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5473 		    dringp);
5474 		mutex_exit(&dringp->lock);
5475 		return (EINVAL);
5476 	}
5477 	ldcp = dringp->ldcp;
5478 
5479 	mutex_enter(&ldcp->exp_dlist_lock);
5480 
5481 	tmp_dringp = ldcp->exp_dring_list;
5482 	if (tmp_dringp == dringp) {
5483 		ldcp->exp_dring_list = dringp->ch_next;
5484 		dringp->ch_next = NULL;
5485 
5486 	} else {
5487 		while (tmp_dringp != NULL) {
5488 			if (tmp_dringp->ch_next == dringp) {
5489 				tmp_dringp->ch_next = dringp->ch_next;
5490 				dringp->ch_next = NULL;
5491 				break;
5492 			}
5493 			tmp_dringp = tmp_dringp->ch_next;
5494 		}
5495 		if (tmp_dringp == NULL) {
5496 			DWARN(DBG_ALL_LDCS,
5497 			    "ldc_mem_dring_unbind: invalid descriptor\n");
5498 			mutex_exit(&ldcp->exp_dlist_lock);
5499 			mutex_exit(&dringp->lock);
5500 			return (EINVAL);
5501 		}
5502 	}
5503 
5504 	mutex_exit(&ldcp->exp_dlist_lock);
5505 
5506 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
5507 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5508 
5509 	dringp->ldcp = NULL;
5510 	dringp->mhdl = NULL;
5511 	dringp->status = LDC_UNBOUND;
5512 
5513 	mutex_exit(&dringp->lock);
5514 
5515 	return (0);
5516 }
5517 
5518 /*
5519  * Get information about the dring. The base address of the descriptor
5520  * ring along with the type and permission are returned back.
5521  */
5522 int
5523 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
5524 {
5525 	ldc_dring_t	*dringp;
5526 	int		rv;
5527 
5528 	if (dhandle == NULL) {
5529 		DWARN(DBG_ALL_LDCS,
5530 		    "ldc_mem_dring_info: invalid desc ring handle\n");
5531 		return (EINVAL);
5532 	}
5533 	dringp = (ldc_dring_t *)dhandle;
5534 
5535 	mutex_enter(&dringp->lock);
5536 
5537 	if (dringp->mhdl) {
5538 		rv = ldc_mem_info(dringp->mhdl, minfo);
5539 		if (rv) {
5540 			DWARN(DBG_ALL_LDCS,
5541 			    "ldc_mem_dring_info: error reading mem info\n");
5542 			mutex_exit(&dringp->lock);
5543 			return (rv);
5544 		}
5545 	} else {
5546 		minfo->vaddr = dringp->base;
5547 		minfo->raddr = NULL;
5548 		minfo->status = dringp->status;
5549 	}
5550 
5551 	mutex_exit(&dringp->lock);
5552 
5553 	return (0);
5554 }
5555 
5556 /*
5557  * Map an exported descriptor ring into the local address space. If the
5558  * descriptor ring was exported for direct map access, a HV call is made
5559  * to allocate a RA range. If the map is done via a shadow copy, local
5560  * shadow memory is allocated.
5561  */
5562 int
5563 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
5564     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
5565     ldc_dring_handle_t *dhandle)
5566 {
5567 	int		err;
5568 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
5569 	ldc_mem_handle_t mhandle;
5570 	ldc_dring_t	*dringp;
5571 	size_t		dring_size;
5572 
5573 	if (dhandle == NULL) {
5574 		DWARN(DBG_ALL_LDCS,
5575 		    "ldc_mem_dring_map: invalid dhandle\n");
5576 		return (EINVAL);
5577 	}
5578 
5579 	/* check to see if channel is initalized */
5580 	if (handle == NULL) {
5581 		DWARN(DBG_ALL_LDCS,
5582 		    "ldc_mem_dring_map: invalid channel handle\n");
5583 		return (EINVAL);
5584 	}
5585 	ldcp = (ldc_chan_t *)handle;
5586 
5587 	if (cookie == NULL) {
5588 		DWARN(ldcp->id,
5589 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
5590 		    ldcp->id);
5591 		return (EINVAL);
5592 	}
5593 
5594 	/* FUTURE: For now we support only one cookie per dring */
5595 	ASSERT(ccount == 1);
5596 
5597 	if (cookie->size < (dsize * len)) {
5598 		DWARN(ldcp->id,
5599 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
5600 		    ldcp->id);
5601 		return (EINVAL);
5602 	}
5603 
5604 	*dhandle = 0;
5605 
5606 	/* Allocate an dring structure */
5607 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5608 
5609 	D1(ldcp->id,
5610 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
5611 	    mtype, len, dsize, cookie->addr, cookie->size);
5612 
5613 	/* Initialize dring */
5614 	dringp->length = len;
5615 	dringp->dsize = dsize;
5616 
5617 	/* round of to multiple of page size */
5618 	dring_size = len * dsize;
5619 	dringp->size = (dring_size & MMU_PAGEMASK);
5620 	if (dring_size & MMU_PAGEOFFSET)
5621 		dringp->size += MMU_PAGESIZE;
5622 
5623 	dringp->ldcp = ldcp;
5624 
5625 	/* create an memory handle */
5626 	err = ldc_mem_alloc_handle(handle, &mhandle);
5627 	if (err || mhandle == NULL) {
5628 		DWARN(DBG_ALL_LDCS,
5629 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
5630 		    err);
5631 		kmem_free(dringp, sizeof (ldc_dring_t));
5632 		return (ENOMEM);
5633 	}
5634 
5635 	dringp->mhdl = mhandle;
5636 	dringp->base = NULL;
5637 
5638 	/* map the dring into local memory */
5639 	err = ldc_mem_map(mhandle, cookie, ccount, mtype,
5640 	    &(dringp->base), NULL);
5641 	if (err || dringp->base == NULL) {
5642 		cmn_err(CE_WARN,
5643 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
5644 		(void) ldc_mem_free_handle(mhandle);
5645 		kmem_free(dringp, sizeof (ldc_dring_t));
5646 		return (ENOMEM);
5647 	}
5648 
5649 	/* initialize the desc ring lock */
5650 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5651 
5652 	/* Add descriptor ring to channel's imported dring list */
5653 	mutex_enter(&ldcp->imp_dlist_lock);
5654 	dringp->ch_next = ldcp->imp_dring_list;
5655 	ldcp->imp_dring_list = dringp;
5656 	mutex_exit(&ldcp->imp_dlist_lock);
5657 
5658 	dringp->status = LDC_MAPPED;
5659 
5660 	*dhandle = (ldc_dring_handle_t)dringp;
5661 
5662 	return (0);
5663 }
5664 
5665 /*
5666  * Unmap a descriptor ring. Free shadow memory (if any).
5667  */
5668 int
5669 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
5670 {
5671 	ldc_dring_t 	*dringp;
5672 	ldc_dring_t	*tmp_dringp;
5673 	ldc_chan_t	*ldcp;
5674 
5675 	if (dhandle == NULL) {
5676 		DWARN(DBG_ALL_LDCS,
5677 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
5678 		return (EINVAL);
5679 	}
5680 	dringp = (ldc_dring_t *)dhandle;
5681 
5682 	if (dringp->status != LDC_MAPPED) {
5683 		DWARN(DBG_ALL_LDCS,
5684 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
5685 		return (EINVAL);
5686 	}
5687 
5688 	mutex_enter(&dringp->lock);
5689 
5690 	ldcp = dringp->ldcp;
5691 
5692 	mutex_enter(&ldcp->imp_dlist_lock);
5693 
5694 	/* find and unlink the desc ring from channel import list */
5695 	tmp_dringp = ldcp->imp_dring_list;
5696 	if (tmp_dringp == dringp) {
5697 		ldcp->imp_dring_list = dringp->ch_next;
5698 		dringp->ch_next = NULL;
5699 
5700 	} else {
5701 		while (tmp_dringp != NULL) {
5702 			if (tmp_dringp->ch_next == dringp) {
5703 				tmp_dringp->ch_next = dringp->ch_next;
5704 				dringp->ch_next = NULL;
5705 				break;
5706 			}
5707 			tmp_dringp = tmp_dringp->ch_next;
5708 		}
5709 		if (tmp_dringp == NULL) {
5710 			DWARN(DBG_ALL_LDCS,
5711 			    "ldc_mem_dring_unmap: invalid descriptor\n");
5712 			mutex_exit(&ldcp->imp_dlist_lock);
5713 			mutex_exit(&dringp->lock);
5714 			return (EINVAL);
5715 		}
5716 	}
5717 
5718 	mutex_exit(&ldcp->imp_dlist_lock);
5719 
5720 	/* do a LDC memory handle unmap and free */
5721 	(void) ldc_mem_unmap(dringp->mhdl);
5722 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5723 
5724 	dringp->status = 0;
5725 	dringp->ldcp = NULL;
5726 
5727 	mutex_exit(&dringp->lock);
5728 
5729 	/* destroy dring lock */
5730 	mutex_destroy(&dringp->lock);
5731 
5732 	/* free desc ring object */
5733 	kmem_free(dringp, sizeof (ldc_dring_t));
5734 
5735 	return (0);
5736 }
5737 
5738 /*
5739  * Internal entry point for descriptor ring access entry consistency
5740  * semantics. Acquire copies the contents of the remote descriptor ring
5741  * into the local shadow copy. The release operation copies the local
5742  * contents into the remote dring. The start and end locations specify
5743  * bounds for the entries being synchronized.
5744  */
5745 static int
5746 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
5747     uint8_t direction, uint64_t start, uint64_t end)
5748 {
5749 	int 			err;
5750 	ldc_dring_t		*dringp;
5751 	ldc_chan_t		*ldcp;
5752 	uint64_t		soff;
5753 	size_t			copy_size;
5754 
5755 	if (dhandle == NULL) {
5756 		DWARN(DBG_ALL_LDCS,
5757 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
5758 		return (EINVAL);
5759 	}
5760 	dringp = (ldc_dring_t *)dhandle;
5761 	mutex_enter(&dringp->lock);
5762 
5763 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
5764 		DWARN(DBG_ALL_LDCS,
5765 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
5766 		mutex_exit(&dringp->lock);
5767 		return (EINVAL);
5768 	}
5769 
5770 	if (start >= dringp->length || end >= dringp->length) {
5771 		DWARN(DBG_ALL_LDCS,
5772 		    "i_ldc_dring_acquire_release: index out of range\n");
5773 		mutex_exit(&dringp->lock);
5774 		return (EINVAL);
5775 	}
5776 
5777 	/* get the channel handle */
5778 	ldcp = dringp->ldcp;
5779 
5780 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
5781 		((dringp->length - start) * dringp->dsize);
5782 
5783 	/* Calculate the relative offset for the first desc */
5784 	soff = (start * dringp->dsize);
5785 
5786 	/* copy to/from remote from/to local memory */
5787 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
5788 	    soff, copy_size);
5789 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
5790 	    direction, soff, copy_size);
5791 	if (err) {
5792 		DWARN(ldcp->id,
5793 		    "i_ldc_dring_acquire_release: copy failed\n");
5794 		mutex_exit(&dringp->lock);
5795 		return (err);
5796 	}
5797 
5798 	/* do the balance */
5799 	if (start > end) {
5800 		copy_size = ((end + 1) * dringp->dsize);
5801 		soff = 0;
5802 
5803 		/* copy to/from remote from/to local memory */
5804 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
5805 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
5806 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
5807 		    direction, soff, copy_size);
5808 		if (err) {
5809 			DWARN(ldcp->id,
5810 			    "i_ldc_dring_acquire_release: copy failed\n");
5811 			mutex_exit(&dringp->lock);
5812 			return (err);
5813 		}
5814 	}
5815 
5816 	mutex_exit(&dringp->lock);
5817 
5818 	return (0);
5819 }
5820 
5821 /*
5822  * Ensure that the contents in the local dring are consistent
5823  * with the contents if of remote dring
5824  */
5825 int
5826 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
5827 {
5828 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
5829 }
5830 
5831 /*
5832  * Ensure that the contents in the remote dring are consistent
5833  * with the contents if of local dring
5834  */
5835 int
5836 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
5837 {
5838 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
5839 }
5840 
5841 
5842 /* ------------------------------------------------------------------------- */
5843