xref: /titanic_50/usr/src/uts/sun4v/io/ldc.c (revision 355b4669e025ff377602b6fc7caaf30dbc218371)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Transport Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/note.h>
56 #include <sys/ivintr.h>
57 #include <sys/hypervisor_api.h>
58 #include <sys/ldc.h>
59 #include <sys/ldc_impl.h>
60 #include <sys/cnex.h>
61 #include <sys/hsvc.h>
62 
63 /* Core internal functions */
64 static int i_ldc_h2v_error(int h_error);
65 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
66 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp);
67 static void i_ldc_reset_state(ldc_chan_t *ldcp);
68 static void i_ldc_reset(ldc_chan_t *ldcp);
69 
70 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
71 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
72 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
73 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
74     uint8_t ctrlmsg);
75 
76 /* Interrupt handling functions */
77 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
78 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
79 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
80 
81 /* Read method functions */
82 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
83 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
84 	size_t *sizep);
85 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
86 	size_t *sizep);
87 
88 /* Write method functions */
89 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
90 	size_t *sizep);
91 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
92 	size_t *sizep);
93 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
94 	size_t *sizep);
95 
96 /* Pkt processing internal functions */
97 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
98 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
99 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
100 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
101 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
102 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
103 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
104 
105 /* Memory synchronization internal functions */
106 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
107     uint8_t direction, uint64_t offset, size_t size);
108 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
109     uint8_t direction, uint64_t start, uint64_t end);
110 
111 /* LDC Version */
112 static ldc_ver_t ldc_versions[] = { {1, 0} };
113 
114 /* number of supported versions */
115 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
116 
117 /* Module State Pointer */
118 static ldc_soft_state_t *ldcssp;
119 
120 static struct modldrv md = {
121 	&mod_miscops,			/* This is a misc module */
122 	"sun4v LDC module v%I%",	/* Name of the module */
123 };
124 
125 static struct modlinkage ml = {
126 	MODREV_1,
127 	&md,
128 	NULL
129 };
130 
131 static uint64_t ldc_sup_minor;		/* Supported minor number */
132 static hsvc_info_t ldc_hsvc = {
133 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
134 };
135 
136 static uint64_t intr_sup_minor;		/* Supported minor number */
137 static hsvc_info_t intr_hsvc = {
138 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
139 };
140 
141 /*
142  * LDC retry count and delay
143  */
144 int ldc_max_retries = LDC_MAX_RETRIES;
145 clock_t ldc_delay = LDC_DELAY;
146 
147 #ifdef DEBUG
148 
149 /*
150  * Print debug messages
151  *
152  * set ldcdbg to 0x7 for enabling all msgs
153  * 0x4 - Warnings
154  * 0x2 - All debug messages
155  * 0x1 - Minimal debug messages
156  *
157  * set ldcdbgchan to the channel number you want to debug
158  * setting it to -1 prints debug messages for all channels
159  * NOTE: ldcdbgchan has no effect on error messages
160  */
161 
162 #define	DBG_ALL_LDCS -1
163 
164 int ldcdbg = 0x0;
165 int64_t ldcdbgchan = DBG_ALL_LDCS;
166 
167 static void
168 ldcdebug(int64_t id, const char *fmt, ...)
169 {
170 	char buf[512];
171 	va_list ap;
172 
173 	/*
174 	 * Do not return if,
175 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
176 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
177 	 * debug channel = caller specified channel
178 	 */
179 	if ((id != DBG_ALL_LDCS) &&
180 	    (ldcdbgchan != DBG_ALL_LDCS) &&
181 	    (ldcdbgchan != id)) {
182 		return;
183 	}
184 
185 	va_start(ap, fmt);
186 	(void) vsprintf(buf, fmt, ap);
187 	va_end(ap);
188 
189 	cmn_err(CE_CONT, "?%s\n", buf);
190 }
191 
192 #define	D1		\
193 if (ldcdbg & 0x01)	\
194 	ldcdebug
195 
196 #define	D2		\
197 if (ldcdbg & 0x02)	\
198 	ldcdebug
199 
200 #define	DWARN		\
201 if (ldcdbg & 0x04)	\
202 	ldcdebug
203 
204 #define	DUMP_PAYLOAD(id, addr)						\
205 {									\
206 	char buf[65*3];							\
207 	int i;								\
208 	uint8_t *src = (uint8_t *)addr;					\
209 	for (i = 0; i < 64; i++, src++)					\
210 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
211 	(void) sprintf(&buf[i * 3], "|\n");				\
212 	D2((id), "payload: %s", buf);					\
213 }
214 
215 #define	DUMP_LDC_PKT(c, s, addr)					\
216 {									\
217 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
218 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
219 	if (msg->type == LDC_DATA) {                                    \
220 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
221 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
222 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
223 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
224 	    (msg->env & LDC_LEN_MASK));					\
225 	} else { 							\
226 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
227 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
228 	} 								\
229 }
230 
231 #else
232 
233 #define	DBG_ALL_LDCS -1
234 
235 #define	D1
236 #define	D2
237 #define	DWARN
238 
239 #define	DUMP_PAYLOAD(id, addr)
240 #define	DUMP_LDC_PKT(c, s, addr)
241 
242 #endif
243 
244 #define	ZERO_PKT(p)			\
245 	bzero((p), sizeof (ldc_msg_t));
246 
247 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
248 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
249 
250 
251 int
252 _init(void)
253 {
254 	int status;
255 
256 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
257 	if (status != 0) {
258 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
259 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
260 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
261 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
262 		return (-1);
263 	}
264 
265 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
266 	if (status != 0) {
267 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
268 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
269 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
270 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
271 		(void) hsvc_unregister(&ldc_hsvc);
272 		return (-1);
273 	}
274 
275 	/* allocate soft state structure */
276 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
277 
278 	/* Link the module into the system */
279 	status = mod_install(&ml);
280 	if (status != 0) {
281 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
282 		return (status);
283 	}
284 
285 	/* Initialize the LDC state structure */
286 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
287 
288 	mutex_enter(&ldcssp->lock);
289 
290 	ldcssp->channel_count = 0;
291 	ldcssp->channels_open = 0;
292 	ldcssp->chan_list = NULL;
293 	ldcssp->dring_list = NULL;
294 
295 	mutex_exit(&ldcssp->lock);
296 
297 	return (0);
298 }
299 
300 int
301 _info(struct modinfo *modinfop)
302 {
303 	/* Report status of the dynamically loadable driver module */
304 	return (mod_info(&ml, modinfop));
305 }
306 
307 int
308 _fini(void)
309 {
310 	int 		rv, status;
311 	ldc_chan_t 	*ldcp;
312 	ldc_dring_t 	*dringp;
313 	ldc_mem_info_t 	minfo;
314 
315 	/* Unlink the driver module from the system */
316 	status = mod_remove(&ml);
317 	if (status) {
318 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
319 		return (EIO);
320 	}
321 
322 	/* close and finalize channels */
323 	ldcp = ldcssp->chan_list;
324 	while (ldcp != NULL) {
325 		(void) ldc_close((ldc_handle_t)ldcp);
326 		(void) ldc_fini((ldc_handle_t)ldcp);
327 
328 		ldcp = ldcp->next;
329 	}
330 
331 	/* Free descriptor rings */
332 	dringp = ldcssp->dring_list;
333 	while (dringp != NULL) {
334 		dringp = dringp->next;
335 
336 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
337 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
338 			if (minfo.status == LDC_BOUND) {
339 				(void) ldc_mem_dring_unbind(
340 						(ldc_dring_handle_t)dringp);
341 			}
342 			if (minfo.status == LDC_MAPPED) {
343 				(void) ldc_mem_dring_unmap(
344 						(ldc_dring_handle_t)dringp);
345 			}
346 		}
347 
348 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
349 	}
350 	ldcssp->dring_list = NULL;
351 
352 	/*
353 	 * We have successfully "removed" the driver.
354 	 * Destroying soft states
355 	 */
356 	mutex_destroy(&ldcssp->lock);
357 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
358 
359 	(void) hsvc_unregister(&ldc_hsvc);
360 	(void) hsvc_unregister(&intr_hsvc);
361 
362 	return (status);
363 }
364 
365 /* -------------------------------------------------------------------------- */
366 
367 /*
368  * LDC Transport Internal Functions
369  */
370 
371 /*
372  * Translate HV Errors to sun4v error codes
373  */
374 static int
375 i_ldc_h2v_error(int h_error)
376 {
377 	switch (h_error) {
378 
379 	case	H_EOK:
380 		return (0);
381 
382 	case	H_ENORADDR:
383 		return (EFAULT);
384 
385 	case	H_EBADPGSZ:
386 	case	H_EINVAL:
387 		return (EINVAL);
388 
389 	case	H_EWOULDBLOCK:
390 		return (EWOULDBLOCK);
391 
392 	case	H_ENOACCESS:
393 	case	H_ENOMAP:
394 		return (EACCES);
395 
396 	case	H_EIO:
397 	case	H_ECPUERROR:
398 		return (EIO);
399 
400 	case	H_ENOTSUPPORTED:
401 		return (ENOTSUP);
402 
403 	case 	H_ETOOMANY:
404 		return (ENOSPC);
405 
406 	case	H_ECHANNEL:
407 		return (ECHRNG);
408 	default:
409 		break;
410 	}
411 
412 	return (EIO);
413 }
414 
415 /*
416  * Reconfigure the transmit queue
417  */
418 static int
419 i_ldc_txq_reconf(ldc_chan_t *ldcp)
420 {
421 	int rv;
422 
423 	ASSERT(MUTEX_HELD(&ldcp->lock));
424 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
425 	if (rv) {
426 		cmn_err(CE_WARN,
427 		    "ldc_tx_qconf: (0x%lx) cannot set qconf", ldcp->id);
428 		return (EIO);
429 	}
430 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
431 	    &(ldcp->tx_tail), &(ldcp->link_state));
432 	if (rv) {
433 		cmn_err(CE_WARN,
434 		    "ldc_tx_get_state: (0x%lx) cannot get qptrs", ldcp->id);
435 		return (EIO);
436 	}
437 	D1(ldcp->id, "ldc_tx_get_state: (0x%llx) h=0x%llx,t=0x%llx,"
438 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
439 	    ldcp->link_state);
440 
441 	return (0);
442 }
443 
444 /*
445  * Reconfigure the receive queue
446  */
447 static int
448 i_ldc_rxq_reconf(ldc_chan_t *ldcp)
449 {
450 	int rv;
451 	uint64_t rx_head, rx_tail;
452 
453 	ASSERT(MUTEX_HELD(&ldcp->lock));
454 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
455 	    &(ldcp->link_state));
456 	if (rv) {
457 		cmn_err(CE_WARN,
458 		    "ldc_rx_getstate: (0x%lx) cannot get state",
459 		    ldcp->id);
460 		return (EIO);
461 	}
462 
463 	if (rx_head != rx_tail || ldcp->tstate > TS_READY) {
464 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
465 			ldcp->rx_q_entries);
466 		if (rv) {
467 			cmn_err(CE_WARN,
468 			    "ldc_rx_qconf: (0x%lx) cannot set qconf",
469 			    ldcp->id);
470 			return (EIO);
471 		}
472 		D1(ldcp->id, "ldc_rx_qconf: (0x%llx) completed qconf",
473 		    ldcp->id);
474 	}
475 
476 	return (0);
477 }
478 
479 /*
480  * Reset LDC state structure and its contents
481  */
482 static void
483 i_ldc_reset_state(ldc_chan_t *ldcp)
484 {
485 	ASSERT(MUTEX_HELD(&ldcp->lock));
486 	ldcp->last_msg_snt = LDC_INIT_SEQID;
487 	ldcp->last_ack_rcd = 0;
488 	ldcp->last_msg_rcd = 0;
489 	ldcp->tx_ackd_head = ldcp->tx_head;
490 	ldcp->next_vidx = 0;
491 	ldcp->hstate = 0;
492 	ldcp->tstate = TS_OPEN;
493 	ldcp->status = LDC_OPEN;
494 
495 	if (ldcp->link_state == LDC_CHANNEL_UP ||
496 	    ldcp->link_state == LDC_CHANNEL_RESET) {
497 
498 		if (ldcp->mode == LDC_MODE_RAW) {
499 			ldcp->status = LDC_UP;
500 			ldcp->tstate = TS_UP;
501 		} else {
502 			ldcp->status = LDC_READY;
503 			ldcp->tstate |= TS_LINK_READY;
504 		}
505 	}
506 }
507 
508 /*
509  * Reset a LDC channel
510  */
511 static void
512 i_ldc_reset(ldc_chan_t *ldcp)
513 {
514 	D2(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
515 
516 	(void) i_ldc_txq_reconf(ldcp);
517 	(void) i_ldc_rxq_reconf(ldcp);
518 	i_ldc_reset_state(ldcp);
519 }
520 
521 /*
522  * Clear pending interrupts
523  */
524 static void
525 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
526 {
527 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
528 
529 	ASSERT(MUTEX_HELD(&ldcp->lock));
530 	if (cinfo->dip && ldcp->intr_pending) {
531 		ldcp->intr_pending = B_FALSE;
532 		(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
533 	}
534 }
535 
536 /*
537  * Set the receive queue head
538  * Resets connection and returns an error if it fails.
539  */
540 static int
541 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
542 {
543 	int 	rv;
544 	int 	retries;
545 
546 	ASSERT(MUTEX_HELD(&ldcp->lock));
547 	for (retries = 0; retries < ldc_max_retries; retries++) {
548 
549 		if ((rv = hv_ldc_rx_set_qhead(ldcp->id, head)) == 0)
550 			return (0);
551 
552 		if (rv != H_EWOULDBLOCK)
553 			break;
554 
555 		/* wait for ldc_delay usecs */
556 		drv_usecwait(ldc_delay);
557 	}
558 
559 	cmn_err(CE_WARN, "ldc_rx_set_qhead: (0x%lx) cannot set qhead 0x%lx",
560 		ldcp->id, head);
561 	i_ldc_reset(ldcp);
562 
563 	return (ECONNRESET);
564 }
565 
566 
567 /*
568  * Returns the tx_tail to be used for transfer
569  * Re-reads the TX queue ptrs if and only if the
570  * the cached head and tail are equal (queue is full)
571  */
572 static int
573 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
574 {
575 	int 		rv;
576 	uint64_t 	current_head, new_tail;
577 
578 	ASSERT(MUTEX_HELD(&ldcp->lock));
579 	/* Read the head and tail ptrs from HV */
580 	rv = hv_ldc_tx_get_state(ldcp->id,
581 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
582 	if (rv) {
583 		cmn_err(CE_WARN,
584 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
585 		    ldcp->id);
586 		return (EIO);
587 	}
588 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
589 		DWARN(DBG_ALL_LDCS,
590 		    "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
591 		    ldcp->id);
592 		return (ECONNRESET);
593 	}
594 
595 	/* In reliable mode, check against last ACKd msg */
596 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
597 		ldcp->mode == LDC_MODE_STREAM)
598 		? ldcp->tx_ackd_head : ldcp->tx_head;
599 
600 	/* increment the tail */
601 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
602 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
603 
604 	if (new_tail == current_head) {
605 		DWARN(ldcp->id,
606 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
607 		    ldcp->id);
608 		return (EWOULDBLOCK);
609 	}
610 
611 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
612 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
613 
614 	*tail = ldcp->tx_tail;
615 	return (0);
616 }
617 
618 /*
619  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
620  * and retry ldc_max_retries times before returning an error.
621  * Returns 0, EWOULDBLOCK or EIO
622  */
623 static int
624 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
625 {
626 	int		rv, retval = EWOULDBLOCK;
627 	int 		retries;
628 
629 	ASSERT(MUTEX_HELD(&ldcp->lock));
630 	for (retries = 0; retries < ldc_max_retries; retries++) {
631 
632 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
633 			retval = 0;
634 			break;
635 		}
636 		if (rv != H_EWOULDBLOCK) {
637 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
638 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
639 			retval = EIO;
640 			break;
641 		}
642 
643 		/* wait for ldc_delay usecs */
644 		drv_usecwait(ldc_delay);
645 	}
646 	return (retval);
647 }
648 
649 /*
650  * Send a LDC message
651  */
652 static int
653 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
654     uint8_t ctrlmsg)
655 {
656 	int		rv;
657 	ldc_msg_t 	*pkt;
658 	uint64_t	tx_tail;
659 	uint32_t	curr_seqid = ldcp->last_msg_snt;
660 
661 	ASSERT(MUTEX_HELD(&ldcp->lock));
662 	/* get the current tail for the message */
663 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
664 	if (rv) {
665 		DWARN(ldcp->id,
666 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
667 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
668 		    ldcp->id, pkttype, subtype, ctrlmsg);
669 		return (rv);
670 	}
671 
672 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
673 	ZERO_PKT(pkt);
674 
675 	/* Initialize the packet */
676 	pkt->type = pkttype;
677 	pkt->stype = subtype;
678 	pkt->ctrl = ctrlmsg;
679 
680 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
681 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
682 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
683 		curr_seqid++;
684 		if (ldcp->mode != LDC_MODE_RAW) {
685 			pkt->seqid = curr_seqid;
686 			pkt->ackid = ldcp->last_msg_rcd;
687 		}
688 	}
689 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
690 
691 	/* initiate the send by calling into HV and set the new tail */
692 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
693 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
694 
695 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
696 	if (rv) {
697 		DWARN(ldcp->id,
698 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
699 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
700 		    ldcp->id, pkttype, subtype, ctrlmsg);
701 		return (EIO);
702 	}
703 
704 	ldcp->last_msg_snt = curr_seqid;
705 	ldcp->tx_tail = tx_tail;
706 
707 	return (0);
708 }
709 
710 /*
711  * Checks if packet was received in right order
712  * in the case of a reliable transport.
713  * Returns 0 if in order, else EIO
714  */
715 static int
716 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
717 {
718 	/* No seqid checking for RAW mode */
719 	if (ldcp->mode == LDC_MODE_RAW)
720 		return (0);
721 
722 	/* No seqid checking for version, RTS, RTR message */
723 	if (msg->ctrl == LDC_VER ||
724 	    msg->ctrl == LDC_RTS ||
725 	    msg->ctrl == LDC_RTR)
726 		return (0);
727 
728 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
729 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
730 		DWARN(ldcp->id,
731 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
732 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
733 		    (ldcp->last_msg_rcd + 1));
734 		return (EIO);
735 	}
736 
737 	return (0);
738 }
739 
740 
741 /*
742  * Process an incoming version ctrl message
743  */
744 static int
745 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
746 {
747 	int 		rv = 0, idx = ldcp->next_vidx;
748 	ldc_msg_t 	*pkt;
749 	uint64_t	tx_tail;
750 	ldc_ver_t	*rcvd_ver;
751 
752 	/* get the received version */
753 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
754 
755 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
756 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
757 
758 	switch (msg->stype) {
759 	case LDC_INFO:
760 
761 		/* get the current tail and pkt for the response */
762 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
763 		if (rv != 0) {
764 			DWARN(ldcp->id,
765 			    "i_ldc_process_VER: (0x%llx) err sending "
766 			    "version ACK/NACK\n", ldcp->id);
767 			i_ldc_reset(ldcp);
768 			return (ECONNRESET);
769 		}
770 
771 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
772 		ZERO_PKT(pkt);
773 
774 		/* initialize the packet */
775 		pkt->type = LDC_CTRL;
776 		pkt->ctrl = LDC_VER;
777 
778 		for (;;) {
779 
780 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
781 			    rcvd_ver->major, rcvd_ver->minor,
782 			    ldc_versions[idx].major, ldc_versions[idx].minor);
783 
784 			if (rcvd_ver->major == ldc_versions[idx].major) {
785 				/* major version match - ACK version */
786 				pkt->stype = LDC_ACK;
787 
788 				/*
789 				 * lower minor version to the one this endpt
790 				 * supports, if necessary
791 				 */
792 				if (rcvd_ver->minor > ldc_versions[idx].minor)
793 					rcvd_ver->minor =
794 						ldc_versions[idx].minor;
795 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
796 
797 				break;
798 			}
799 
800 			if (rcvd_ver->major > ldc_versions[idx].major) {
801 
802 				D1(ldcp->id, "i_ldc_process_VER: using next"
803 				    " lower idx=%d, v%u.%u\n", idx,
804 				    ldc_versions[idx].major,
805 				    ldc_versions[idx].minor);
806 
807 				/* nack with next lower version */
808 				pkt->stype = LDC_NACK;
809 				bcopy(&ldc_versions[idx], pkt->udata,
810 				    sizeof (ldc_versions[idx]));
811 				ldcp->next_vidx = idx;
812 				break;
813 			}
814 
815 			/* next major version */
816 			idx++;
817 
818 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
819 
820 			if (idx == LDC_NUM_VERS) {
821 				/* no version match - send NACK */
822 				pkt->stype = LDC_NACK;
823 				bzero(pkt->udata, sizeof (ldc_ver_t));
824 				ldcp->next_vidx = 0;
825 				break;
826 			}
827 		}
828 
829 		/* initiate the send by calling into HV and set the new tail */
830 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
831 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
832 
833 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
834 		if (rv == 0) {
835 			ldcp->tx_tail = tx_tail;
836 			if (pkt->stype == LDC_ACK) {
837 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
838 				    " version ACK\n", ldcp->id);
839 				/* Save the ACK'd version */
840 				ldcp->version.major = rcvd_ver->major;
841 				ldcp->version.minor = rcvd_ver->minor;
842 				ldcp->hstate |= TS_RCVD_VER;
843 				ldcp->tstate |= TS_VER_DONE;
844 				DWARN(DBG_ALL_LDCS,
845 				    "(0x%llx) Agreed on version v%u.%u\n",
846 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
847 			}
848 		} else {
849 			DWARN(ldcp->id,
850 			    "i_ldc_process_VER: (0x%llx) error sending "
851 			    "ACK/NACK\n", ldcp->id);
852 			i_ldc_reset(ldcp);
853 			return (ECONNRESET);
854 		}
855 
856 		break;
857 
858 	case LDC_ACK:
859 		/* SUCCESS - we have agreed on a version */
860 		ldcp->version.major = rcvd_ver->major;
861 		ldcp->version.minor = rcvd_ver->minor;
862 		ldcp->tstate |= TS_VER_DONE;
863 
864 		D1(DBG_ALL_LDCS, "(0x%llx) Agreed on version v%u.%u\n",
865 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
866 
867 		/* initiate RTS-RTR-RDX handshake */
868 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
869 		if (rv) {
870 			DWARN(ldcp->id,
871 			    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
872 			    ldcp->id);
873 			i_ldc_reset(ldcp);
874 			return (ECONNRESET);
875 		}
876 
877 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
878 		ZERO_PKT(pkt);
879 
880 		pkt->type = LDC_CTRL;
881 		pkt->stype = LDC_INFO;
882 		pkt->ctrl = LDC_RTS;
883 		pkt->env = ldcp->mode;
884 		if (ldcp->mode != LDC_MODE_RAW)
885 			pkt->seqid = LDC_INIT_SEQID;
886 
887 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
888 
889 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
890 
891 		/* initiate the send by calling into HV and set the new tail */
892 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
893 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
894 
895 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
896 		if (rv) {
897 			D2(ldcp->id,
898 			    "i_ldc_process_VER: (0x%llx) no listener\n",
899 			    ldcp->id);
900 			i_ldc_reset(ldcp);
901 			return (ECONNRESET);
902 		}
903 
904 		ldcp->tx_tail = tx_tail;
905 		ldcp->hstate |= TS_SENT_RTS;
906 
907 		break;
908 
909 	case LDC_NACK:
910 		/* check if version in NACK is zero */
911 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
912 			/* version handshake failure */
913 			DWARN(DBG_ALL_LDCS,
914 			    "i_ldc_process_VER: (0x%llx) no version match\n",
915 			    ldcp->id);
916 			i_ldc_reset(ldcp);
917 			return (ECONNRESET);
918 		}
919 
920 		/* get the current tail and pkt for the response */
921 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
922 		if (rv != 0) {
923 			cmn_err(CE_NOTE,
924 			    "i_ldc_process_VER: (0x%lx) err sending "
925 			    "version ACK/NACK\n", ldcp->id);
926 			i_ldc_reset(ldcp);
927 			return (ECONNRESET);
928 		}
929 
930 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
931 		ZERO_PKT(pkt);
932 
933 		/* initialize the packet */
934 		pkt->type = LDC_CTRL;
935 		pkt->ctrl = LDC_VER;
936 		pkt->stype = LDC_INFO;
937 
938 		/* check ver in NACK msg has a match */
939 		for (;;) {
940 			if (rcvd_ver->major == ldc_versions[idx].major) {
941 				/*
942 				 * major version match - resubmit request
943 				 * if lower minor version to the one this endpt
944 				 * supports, if necessary
945 				 */
946 				if (rcvd_ver->minor > ldc_versions[idx].minor)
947 					rcvd_ver->minor =
948 						ldc_versions[idx].minor;
949 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
950 				break;
951 
952 			}
953 
954 			if (rcvd_ver->major > ldc_versions[idx].major) {
955 
956 				D1(ldcp->id, "i_ldc_process_VER: using next"
957 				    " lower idx=%d, v%u.%u\n", idx,
958 				    ldc_versions[idx].major,
959 				    ldc_versions[idx].minor);
960 
961 				/* send next lower version */
962 				bcopy(&ldc_versions[idx], pkt->udata,
963 				    sizeof (ldc_versions[idx]));
964 				ldcp->next_vidx = idx;
965 				break;
966 			}
967 
968 			/* next version */
969 			idx++;
970 
971 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
972 
973 			if (idx == LDC_NUM_VERS) {
974 				/* no version match - terminate */
975 				ldcp->next_vidx = 0;
976 				return (ECONNRESET);
977 			}
978 		}
979 
980 		/* initiate the send by calling into HV and set the new tail */
981 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
982 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
983 
984 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
985 		if (rv == 0) {
986 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
987 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
988 			    ldc_versions[idx].minor);
989 			ldcp->tx_tail = tx_tail;
990 		} else {
991 			cmn_err(CE_NOTE,
992 			    "i_ldc_process_VER: (0x%lx) error sending version"
993 			    "INFO\n", ldcp->id);
994 			i_ldc_reset(ldcp);
995 			return (ECONNRESET);
996 		}
997 
998 		break;
999 	}
1000 
1001 	return (rv);
1002 }
1003 
1004 
1005 /*
1006  * Process an incoming RTS ctrl message
1007  */
1008 static int
1009 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
1010 {
1011 	int 		rv = 0;
1012 	ldc_msg_t 	*pkt;
1013 	uint64_t	tx_tail;
1014 	boolean_t	sent_NACK = B_FALSE;
1015 
1016 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1017 
1018 	switch (msg->stype) {
1019 	case LDC_NACK:
1020 		DWARN(ldcp->id,
1021 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1022 		    ldcp->id);
1023 
1024 		/* Reset the channel -- as we cannot continue */
1025 		i_ldc_reset(ldcp);
1026 		rv = ECONNRESET;
1027 		break;
1028 
1029 	case LDC_INFO:
1030 
1031 		/* check mode */
1032 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1033 			cmn_err(CE_NOTE,
1034 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1035 			    ldcp->id);
1036 			/*
1037 			 * send NACK in response to MODE message
1038 			 * get the current tail for the response
1039 			 */
1040 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1041 			if (rv) {
1042 				/* if cannot send NACK - reset channel */
1043 				i_ldc_reset(ldcp);
1044 				rv = ECONNRESET;
1045 				break;
1046 			}
1047 			sent_NACK = B_TRUE;
1048 		}
1049 		break;
1050 	default:
1051 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1052 		    ldcp->id);
1053 		i_ldc_reset(ldcp);
1054 		rv = ECONNRESET;
1055 		break;
1056 	}
1057 
1058 	/*
1059 	 * If either the connection was reset (when rv != 0) or
1060 	 * a NACK was sent, we return. In the case of a NACK
1061 	 * we dont want to consume the packet that came in but
1062 	 * not record that we received the RTS
1063 	 */
1064 	if (rv || sent_NACK)
1065 		return (rv);
1066 
1067 	/* record RTS received */
1068 	ldcp->hstate |= TS_RCVD_RTS;
1069 
1070 	/* store initial SEQID info */
1071 	ldcp->last_msg_snt = msg->seqid;
1072 
1073 	/* get the current tail for the response */
1074 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1075 	if (rv != 0) {
1076 		cmn_err(CE_NOTE,
1077 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1078 		    ldcp->id);
1079 		i_ldc_reset(ldcp);
1080 		return (ECONNRESET);
1081 	}
1082 
1083 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1084 	ZERO_PKT(pkt);
1085 
1086 	/* initialize the packet */
1087 	pkt->type = LDC_CTRL;
1088 	pkt->stype = LDC_INFO;
1089 	pkt->ctrl = LDC_RTR;
1090 	pkt->env = ldcp->mode;
1091 	if (ldcp->mode != LDC_MODE_RAW)
1092 		pkt->seqid = LDC_INIT_SEQID;
1093 
1094 	ldcp->last_msg_rcd = msg->seqid;
1095 
1096 	/* initiate the send by calling into HV and set the new tail */
1097 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1098 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1099 
1100 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1101 	if (rv == 0) {
1102 		D2(ldcp->id,
1103 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1104 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1105 
1106 		ldcp->tx_tail = tx_tail;
1107 		ldcp->hstate |= TS_SENT_RTR;
1108 
1109 	} else {
1110 		cmn_err(CE_NOTE,
1111 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1112 		    ldcp->id);
1113 		i_ldc_reset(ldcp);
1114 		return (ECONNRESET);
1115 	}
1116 
1117 	return (0);
1118 }
1119 
1120 /*
1121  * Process an incoming RTR ctrl message
1122  */
1123 static int
1124 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1125 {
1126 	int 		rv = 0;
1127 	boolean_t	sent_NACK = B_FALSE;
1128 
1129 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1130 
1131 	switch (msg->stype) {
1132 	case LDC_NACK:
1133 		/* RTR NACK received */
1134 		DWARN(ldcp->id,
1135 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1136 		    ldcp->id);
1137 
1138 		/* Reset the channel -- as we cannot continue */
1139 		i_ldc_reset(ldcp);
1140 		rv = ECONNRESET;
1141 
1142 		break;
1143 
1144 	case LDC_INFO:
1145 
1146 		/* check mode */
1147 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1148 			DWARN(ldcp->id,
1149 			    "i_ldc_process_RTR: (0x%llx) mode mismatch\n",
1150 			    ldcp->id);
1151 			/*
1152 			 * send NACK in response to MODE message
1153 			 * get the current tail for the response
1154 			 */
1155 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1156 			if (rv) {
1157 				/* if cannot send NACK - reset channel */
1158 				i_ldc_reset(ldcp);
1159 				rv = ECONNRESET;
1160 				break;
1161 			}
1162 			sent_NACK = B_TRUE;
1163 		}
1164 		break;
1165 
1166 	default:
1167 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1168 		    ldcp->id);
1169 
1170 		/* Reset the channel -- as we cannot continue */
1171 		i_ldc_reset(ldcp);
1172 		rv = ECONNRESET;
1173 		break;
1174 	}
1175 
1176 	/*
1177 	 * If either the connection was reset (when rv != 0) or
1178 	 * a NACK was sent, we return. In the case of a NACK
1179 	 * we dont want to consume the packet that came in but
1180 	 * not record that we received the RTR
1181 	 */
1182 	if (rv || sent_NACK)
1183 		return (rv);
1184 
1185 	ldcp->last_msg_snt = msg->seqid;
1186 	ldcp->hstate |= TS_RCVD_RTR;
1187 
1188 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1189 	if (rv) {
1190 		cmn_err(CE_NOTE,
1191 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1192 		    ldcp->id);
1193 		i_ldc_reset(ldcp);
1194 		return (ECONNRESET);
1195 	}
1196 	D2(ldcp->id,
1197 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1198 
1199 	ldcp->hstate |= TS_SENT_RDX;
1200 	ldcp->tstate |= TS_HSHAKE_DONE;
1201 	ldcp->status = LDC_UP;
1202 
1203 	DWARN(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1204 
1205 	return (0);
1206 }
1207 
1208 
1209 /*
1210  * Process an incoming RDX ctrl message
1211  */
1212 static int
1213 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1214 {
1215 	int	rv = 0;
1216 
1217 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1218 
1219 	switch (msg->stype) {
1220 	case LDC_NACK:
1221 		/* RDX NACK received */
1222 		DWARN(ldcp->id,
1223 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1224 		    ldcp->id);
1225 
1226 		/* Reset the channel -- as we cannot continue */
1227 		i_ldc_reset(ldcp);
1228 		rv = ECONNRESET;
1229 
1230 		break;
1231 
1232 	case LDC_INFO:
1233 
1234 		/*
1235 		 * if channel is UP and a RDX received after data transmission
1236 		 * has commenced it is an error
1237 		 */
1238 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1239 			DWARN(DBG_ALL_LDCS,
1240 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1241 			    " - LDC reset\n", ldcp->id);
1242 			i_ldc_reset(ldcp);
1243 			return (ECONNRESET);
1244 		}
1245 
1246 		ldcp->hstate |= TS_RCVD_RDX;
1247 		ldcp->tstate |= TS_HSHAKE_DONE;
1248 		ldcp->status = LDC_UP;
1249 
1250 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1251 		break;
1252 
1253 	default:
1254 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1255 		    ldcp->id);
1256 
1257 		/* Reset the channel -- as we cannot continue */
1258 		i_ldc_reset(ldcp);
1259 		rv = ECONNRESET;
1260 		break;
1261 	}
1262 
1263 	return (rv);
1264 }
1265 
1266 /*
1267  * Process an incoming ACK for a data packet
1268  */
1269 static int
1270 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1271 {
1272 	int		rv;
1273 	uint64_t 	tx_head;
1274 	ldc_msg_t	*pkt;
1275 
1276 	/*
1277 	 * Read the curret Tx head and tail
1278 	 */
1279 	rv = hv_ldc_tx_get_state(ldcp->id,
1280 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1281 	if (rv != 0) {
1282 		cmn_err(CE_WARN,
1283 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1284 		    ldcp->id);
1285 		return (0);
1286 	}
1287 
1288 	/*
1289 	 * loop from where the previous ACK location was to the
1290 	 * current head location. This is how far the HV has
1291 	 * actually send pkts. Pkts between head and tail are
1292 	 * yet to be sent by HV.
1293 	 */
1294 	tx_head = ldcp->tx_ackd_head;
1295 	for (;;) {
1296 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1297 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1298 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1299 
1300 		if (pkt->seqid == msg->ackid) {
1301 			D2(ldcp->id,
1302 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1303 			    ldcp->id);
1304 			ldcp->last_ack_rcd = msg->ackid;
1305 			ldcp->tx_ackd_head = tx_head;
1306 			break;
1307 		}
1308 		if (tx_head == ldcp->tx_head) {
1309 			/* could not find packet */
1310 			DWARN(ldcp->id,
1311 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1312 			    ldcp->id);
1313 			break;
1314 		}
1315 	}
1316 
1317 	return (0);
1318 }
1319 
1320 /*
1321  * Process incoming control message
1322  * Return 0 - session can continue
1323  *        EAGAIN - reprocess packet - state was changed
1324  *	  ECONNRESET - channel was reset
1325  */
1326 static int
1327 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1328 {
1329 	int 		rv = 0;
1330 
1331 	switch (ldcp->tstate) {
1332 
1333 	case TS_OPEN:
1334 	case TS_READY:
1335 
1336 		switch (msg->ctrl & LDC_CTRL_MASK) {
1337 		case LDC_VER:
1338 			/* process version message */
1339 			rv = i_ldc_process_VER(ldcp, msg);
1340 			break;
1341 		default:
1342 			DWARN(ldcp->id,
1343 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1344 			    "tstate=0x%x\n", ldcp->id,
1345 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1346 			break;
1347 		}
1348 
1349 		break;
1350 
1351 	case TS_VREADY:
1352 
1353 		switch (msg->ctrl & LDC_CTRL_MASK) {
1354 		case LDC_VER:
1355 			/* peer is redoing version negotiation */
1356 			(void) i_ldc_txq_reconf(ldcp);
1357 			i_ldc_reset_state(ldcp);
1358 			rv = EAGAIN;
1359 			break;
1360 		case LDC_RTS:
1361 			/* process RTS message */
1362 			rv = i_ldc_process_RTS(ldcp, msg);
1363 			break;
1364 		case LDC_RTR:
1365 			/* process RTR message */
1366 			rv = i_ldc_process_RTR(ldcp, msg);
1367 			break;
1368 		case LDC_RDX:
1369 			/* process RDX message */
1370 			rv = i_ldc_process_RDX(ldcp, msg);
1371 			break;
1372 		default:
1373 			DWARN(ldcp->id,
1374 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1375 			    "tstate=0x%x\n", ldcp->id,
1376 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1377 			break;
1378 		}
1379 
1380 		break;
1381 
1382 	case TS_UP:
1383 
1384 		switch (msg->ctrl & LDC_CTRL_MASK) {
1385 		case LDC_VER:
1386 			DWARN(ldcp->id,
1387 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1388 			    "- LDC reset\n", ldcp->id);
1389 			/* peer is redoing version negotiation */
1390 			(void) i_ldc_txq_reconf(ldcp);
1391 			i_ldc_reset_state(ldcp);
1392 			rv = EAGAIN;
1393 			break;
1394 
1395 		case LDC_RDX:
1396 			/* process RDX message */
1397 			rv = i_ldc_process_RDX(ldcp, msg);
1398 			break;
1399 
1400 		default:
1401 			DWARN(ldcp->id,
1402 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1403 			    "tstate=0x%x\n", ldcp->id,
1404 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1405 			break;
1406 		}
1407 	}
1408 
1409 	return (rv);
1410 }
1411 
1412 /*
1413  * Register channel with the channel nexus
1414  */
1415 static int
1416 i_ldc_register_channel(ldc_chan_t *ldcp)
1417 {
1418 	int		rv = 0;
1419 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1420 
1421 	if (cinfo->dip == NULL) {
1422 		DWARN(ldcp->id,
1423 		    "i_ldc_register_channel: cnex has not registered\n");
1424 		return (EAGAIN);
1425 	}
1426 
1427 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1428 	if (rv) {
1429 		DWARN(ldcp->id,
1430 		    "i_ldc_register_channel: cannot register channel\n");
1431 		return (rv);
1432 	}
1433 
1434 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1435 	    i_ldc_tx_hdlr, ldcp, NULL);
1436 	if (rv) {
1437 		DWARN(ldcp->id,
1438 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1439 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1440 		return (rv);
1441 	}
1442 
1443 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1444 	    i_ldc_rx_hdlr, ldcp, NULL);
1445 	if (rv) {
1446 		DWARN(ldcp->id,
1447 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1448 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1449 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1450 		return (rv);
1451 	}
1452 
1453 	ldcp->tstate |= TS_CNEX_RDY;
1454 
1455 	return (0);
1456 }
1457 
1458 /*
1459  * Unregister a channel with the channel nexus
1460  */
1461 static int
1462 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1463 {
1464 	int		rv = 0;
1465 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1466 
1467 	if (cinfo->dip == NULL) {
1468 		DWARN(ldcp->id,
1469 		    "i_ldc_unregister_channel: cnex has not registered\n");
1470 		return (EAGAIN);
1471 	}
1472 
1473 	if (ldcp->tstate & TS_CNEX_RDY) {
1474 
1475 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1476 		if (rv) {
1477 			DWARN(ldcp->id,
1478 			    "i_ldc_unregister_channel: err removing Rx intr\n");
1479 		}
1480 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1481 		if (rv) {
1482 			DWARN(ldcp->id,
1483 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1484 		}
1485 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1486 		if (rv) {
1487 			DWARN(ldcp->id,
1488 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1489 		}
1490 
1491 		ldcp->tstate &= ~TS_CNEX_RDY;
1492 	}
1493 
1494 	return (0);
1495 }
1496 
1497 
1498 /*
1499  * LDC transmit interrupt handler
1500  *    triggered for chanel up/down/reset events
1501  *    and Tx queue content changes
1502  */
1503 static uint_t
1504 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1505 {
1506 	_NOTE(ARGUNUSED(arg2))
1507 
1508 	int 		rv;
1509 	ldc_chan_t 	*ldcp;
1510 	boolean_t 	notify_client = B_FALSE;
1511 	uint64_t	notify_event = 0;
1512 
1513 	/* Get the channel for which interrupt was received */
1514 	ASSERT(arg1 != NULL);
1515 	ldcp = (ldc_chan_t *)arg1;
1516 
1517 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1518 	    ldcp->id, ldcp);
1519 
1520 	/* Lock channel */
1521 	mutex_enter(&ldcp->lock);
1522 
1523 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1524 	    &ldcp->link_state);
1525 	if (rv) {
1526 		cmn_err(CE_WARN,
1527 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1528 		    ldcp->id, rv);
1529 		mutex_exit(&ldcp->lock);
1530 		return (DDI_INTR_CLAIMED);
1531 	}
1532 
1533 	/*
1534 	 * reset the channel state if the channel went down
1535 	 * (other side unconfigured queue) or channel was reset
1536 	 * (other side reconfigured its queue)
1537 	 */
1538 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1539 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1540 		i_ldc_reset(ldcp);
1541 		notify_client = B_TRUE;
1542 		notify_event = LDC_EVT_DOWN;
1543 	}
1544 
1545 	if (ldcp->link_state == LDC_CHANNEL_RESET) {
1546 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1547 		i_ldc_reset(ldcp);
1548 		notify_client = B_TRUE;
1549 		notify_event = LDC_EVT_RESET;
1550 	}
1551 
1552 	if (ldcp->tstate == TS_OPEN && ldcp->link_state == LDC_CHANNEL_UP) {
1553 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1554 		notify_client = B_TRUE;
1555 		notify_event = LDC_EVT_RESET;
1556 		ldcp->tstate |= TS_LINK_READY;
1557 		ldcp->status = LDC_READY;
1558 	}
1559 
1560 	/* if callbacks are disabled, do not notify */
1561 	if (!ldcp->cb_enabled)
1562 		notify_client = B_FALSE;
1563 
1564 	if (notify_client)
1565 		ldcp->cb_inprogress = B_TRUE;
1566 
1567 	/* Unlock channel */
1568 	mutex_exit(&ldcp->lock);
1569 
1570 	if (notify_client) {
1571 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1572 		if (rv) {
1573 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1574 			    "failure", ldcp->id);
1575 		}
1576 		mutex_enter(&ldcp->lock);
1577 		ldcp->cb_inprogress = B_FALSE;
1578 		mutex_exit(&ldcp->lock);
1579 	}
1580 
1581 	mutex_enter(&ldcp->lock);
1582 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1583 	mutex_exit(&ldcp->lock);
1584 
1585 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1586 
1587 	return (DDI_INTR_CLAIMED);
1588 }
1589 
1590 /*
1591  * LDC receive interrupt handler
1592  *    triggered for channel with data pending to read
1593  *    i.e. Rx queue content changes
1594  */
1595 static uint_t
1596 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1597 {
1598 	_NOTE(ARGUNUSED(arg2))
1599 
1600 	int		rv;
1601 	uint64_t 	rx_head, rx_tail;
1602 	ldc_msg_t 	*msg;
1603 	ldc_chan_t 	*ldcp;
1604 	boolean_t 	notify_client = B_FALSE;
1605 	uint64_t	notify_event = 0;
1606 
1607 	/* Get the channel for which interrupt was received */
1608 	if (arg1 == NULL) {
1609 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1610 		return (DDI_INTR_UNCLAIMED);
1611 	}
1612 
1613 	ldcp = (ldc_chan_t *)arg1;
1614 
1615 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1616 	    ldcp->id, ldcp);
1617 
1618 	/* Lock channel */
1619 	mutex_enter(&ldcp->lock);
1620 
1621 	/* mark interrupt as pending */
1622 	ldcp->intr_pending = B_TRUE;
1623 
1624 	/*
1625 	 * Read packet(s) from the queue
1626 	 */
1627 	for (;;) {
1628 
1629 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1630 		    &ldcp->link_state);
1631 		if (rv) {
1632 			cmn_err(CE_WARN,
1633 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1634 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1635 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1636 			mutex_exit(&ldcp->lock);
1637 			return (DDI_INTR_CLAIMED);
1638 		}
1639 
1640 		/*
1641 		 * reset the channel state if the channel went down
1642 		 * (other side unconfigured queue) or channel was reset
1643 		 * (other side reconfigured its queue
1644 		 */
1645 		if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1646 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link down\n",
1647 			    ldcp->id);
1648 			i_ldc_reset(ldcp);
1649 			notify_client = B_TRUE;
1650 			notify_event = LDC_EVT_DOWN;
1651 			break;
1652 		}
1653 		if (ldcp->link_state == LDC_CHANNEL_RESET) {
1654 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link reset\n",
1655 			    ldcp->id);
1656 			i_ldc_reset(ldcp);
1657 			notify_client = B_TRUE;
1658 			notify_event = LDC_EVT_RESET;
1659 		}
1660 
1661 		if (ldcp->tstate == TS_OPEN &&
1662 		    ldcp->link_state == LDC_CHANNEL_UP) {
1663 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link up\n",
1664 			    ldcp->id);
1665 			notify_client = B_TRUE;
1666 			notify_event = LDC_EVT_RESET;
1667 			ldcp->tstate |= TS_LINK_READY;
1668 			ldcp->status = LDC_READY;
1669 		}
1670 
1671 		if (rx_head == rx_tail) {
1672 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1673 			    ldcp->id);
1674 			break;
1675 		}
1676 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1677 		    rx_head, rx_tail);
1678 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1679 		    ldcp->rx_q_va + rx_head);
1680 
1681 		/* get the message */
1682 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1683 
1684 		/* if channel is in RAW mode or data pkt, notify and return */
1685 		if (ldcp->mode == LDC_MODE_RAW) {
1686 			notify_client = B_TRUE;
1687 			notify_event |= LDC_EVT_READ;
1688 			break;
1689 		}
1690 
1691 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1692 
1693 			/* discard packet if channel is not up */
1694 			if (ldcp->tstate != TS_UP) {
1695 
1696 				/* move the head one position */
1697 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1698 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1699 
1700 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1701 					break;
1702 
1703 				continue;
1704 			} else {
1705 				notify_client = B_TRUE;
1706 				notify_event |= LDC_EVT_READ;
1707 				break;
1708 			}
1709 		}
1710 
1711 		/* Check the sequence ID for the message received */
1712 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
1713 
1714 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
1715 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
1716 
1717 			/* Reset last_msg_rcd to start of message */
1718 			if (ldcp->first_fragment != 0) {
1719 				ldcp->last_msg_rcd =
1720 					ldcp->first_fragment - 1;
1721 				ldcp->first_fragment = 0;
1722 			}
1723 			/*
1724 			 * Send a NACK due to seqid mismatch
1725 			 */
1726 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
1727 			    (msg->ctrl & LDC_CTRL_MASK));
1728 
1729 			if (rv) {
1730 				cmn_err(CE_NOTE,
1731 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
1732 				    "CTRL/NACK msg\n", ldcp->id);
1733 			}
1734 
1735 			/* purge receive queue */
1736 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
1737 			break;
1738 		}
1739 
1740 		/* record the message ID */
1741 		ldcp->last_msg_rcd = msg->seqid;
1742 
1743 		/* process control messages */
1744 		if (msg->type & LDC_CTRL) {
1745 			/* save current internal state */
1746 			uint64_t tstate = ldcp->tstate;
1747 
1748 			rv = i_ldc_ctrlmsg(ldcp, msg);
1749 			if (rv == EAGAIN) {
1750 				/* re-process pkt - state was adjusted */
1751 				continue;
1752 			}
1753 			if (rv == ECONNRESET) {
1754 				notify_client = B_TRUE;
1755 				notify_event = LDC_EVT_RESET;
1756 				break;
1757 			}
1758 
1759 			/*
1760 			 * control message processing was successful
1761 			 * channel transitioned to ready for communication
1762 			 */
1763 			if (rv == 0 && ldcp->tstate == TS_UP &&
1764 			    tstate != ldcp->tstate) {
1765 				notify_client = B_TRUE;
1766 				notify_event = LDC_EVT_UP;
1767 			}
1768 		}
1769 
1770 		/* process data ACKs */
1771 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
1772 			(void) i_ldc_process_data_ACK(ldcp, msg);
1773 		}
1774 
1775 		/* move the head one position */
1776 		rx_head = (rx_head + LDC_PACKET_SIZE) %
1777 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1778 		if (rv = i_ldc_set_rx_head(ldcp, rx_head)) {
1779 			notify_client = B_TRUE;
1780 			notify_event = LDC_EVT_RESET;
1781 			break;
1782 		}
1783 
1784 	} /* for */
1785 
1786 	/* if callbacks are disabled, do not notify */
1787 	if (!ldcp->cb_enabled)
1788 		notify_client = B_FALSE;
1789 
1790 	if (notify_client)
1791 		ldcp->cb_inprogress = B_TRUE;
1792 
1793 	/* Unlock channel */
1794 	mutex_exit(&ldcp->lock);
1795 
1796 	if (notify_client) {
1797 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1798 		if (rv) {
1799 			DWARN(ldcp->id,
1800 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1801 			    ldcp->id);
1802 		}
1803 		mutex_enter(&ldcp->lock);
1804 		ldcp->cb_inprogress = B_FALSE;
1805 		mutex_exit(&ldcp->lock);
1806 	}
1807 
1808 	mutex_enter(&ldcp->lock);
1809 
1810 	/*
1811 	 * If there are data packets in the queue, the ldc_read will
1812 	 * clear interrupts after draining the queue, else clear interrupts
1813 	 */
1814 	if ((notify_event & LDC_EVT_READ) == 0) {
1815 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1816 	}
1817 
1818 	mutex_exit(&ldcp->lock);
1819 
1820 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1821 	return (DDI_INTR_CLAIMED);
1822 }
1823 
1824 
1825 /* -------------------------------------------------------------------------- */
1826 
1827 /*
1828  * LDC API functions
1829  */
1830 
1831 /*
1832  * Initialize the channel. Allocate internal structure and memory for
1833  * TX/RX queues, and initialize locks.
1834  */
1835 int
1836 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
1837 {
1838 	ldc_chan_t 	*ldcp;
1839 	int		rv, exit_val;
1840 	uint64_t	ra_base, nentries;
1841 
1842 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
1843 
1844 	if (attr == NULL) {
1845 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
1846 		return (EINVAL);
1847 	}
1848 	if (handle == NULL) {
1849 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
1850 		return (EINVAL);
1851 	}
1852 
1853 	/* check if channel is valid */
1854 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
1855 	if (rv == H_ECHANNEL) {
1856 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
1857 		return (EINVAL);
1858 	}
1859 
1860 	/* check if the channel has already been initialized */
1861 	mutex_enter(&ldcssp->lock);
1862 	ldcp = ldcssp->chan_list;
1863 	while (ldcp != NULL) {
1864 		if (ldcp->id == id) {
1865 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
1866 			    id);
1867 			mutex_exit(&ldcssp->lock);
1868 			return (EADDRINUSE);
1869 		}
1870 		ldcp = ldcp->next;
1871 	}
1872 	mutex_exit(&ldcssp->lock);
1873 
1874 	ASSERT(ldcp == NULL);
1875 
1876 	*handle = 0;
1877 
1878 	/* Allocate an ldcp structure */
1879 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
1880 
1881 	/* Initialize the channel lock */
1882 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
1883 
1884 	/* Channel specific processing */
1885 	mutex_enter(&ldcp->lock);
1886 
1887 	/* Initialize the channel */
1888 	ldcp->id = id;
1889 	ldcp->cb = NULL;
1890 	ldcp->cb_arg = NULL;
1891 	ldcp->cb_inprogress = B_FALSE;
1892 	ldcp->cb_enabled = B_FALSE;
1893 	ldcp->next = NULL;
1894 
1895 	/* Read attributes */
1896 	ldcp->mode = attr->mode;
1897 	ldcp->devclass = attr->devclass;
1898 	ldcp->devinst = attr->instance;
1899 
1900 	ldcp->rx_q_entries =
1901 		(attr->qlen > 0) ? attr->qlen : LDC_QUEUE_ENTRIES;
1902 	ldcp->tx_q_entries = ldcp->rx_q_entries;
1903 
1904 	D1(ldcp->id,
1905 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
1906 	    "instance=0x%llx,mode=%d, qlen=%d\n",
1907 	    ldcp->id, ldcp->devclass, ldcp->devinst,
1908 	    ldcp->mode, ldcp->rx_q_entries);
1909 
1910 	ldcp->next_vidx = 0;
1911 	ldcp->tstate = 0;
1912 	ldcp->hstate = 0;
1913 	ldcp->last_msg_snt = LDC_INIT_SEQID;
1914 	ldcp->last_ack_rcd = 0;
1915 	ldcp->last_msg_rcd = 0;
1916 
1917 	ldcp->stream_bufferp = NULL;
1918 	ldcp->exp_dring_list = NULL;
1919 	ldcp->imp_dring_list = NULL;
1920 	ldcp->mhdl_list = NULL;
1921 
1922 	/* Initialize payload size depending on whether channel is reliable */
1923 	switch (ldcp->mode) {
1924 	case LDC_MODE_RAW:
1925 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
1926 		ldcp->read_p = i_ldc_read_raw;
1927 		ldcp->write_p = i_ldc_write_raw;
1928 		ldcp->mtu = 0;
1929 		break;
1930 	case LDC_MODE_UNRELIABLE:
1931 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
1932 		ldcp->read_p = i_ldc_read_packet;
1933 		ldcp->write_p = i_ldc_write_packet;
1934 		ldcp->mtu = 0;
1935 		break;
1936 	case LDC_MODE_RELIABLE:
1937 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
1938 		ldcp->read_p = i_ldc_read_packet;
1939 		ldcp->write_p = i_ldc_write_packet;
1940 		ldcp->mtu = 0;
1941 		break;
1942 	case LDC_MODE_STREAM:
1943 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
1944 
1945 		ldcp->stream_remains = 0;
1946 		ldcp->stream_offset = 0;
1947 		ldcp->mtu = LDC_STREAM_MTU;
1948 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
1949 		ldcp->read_p = i_ldc_read_stream;
1950 		ldcp->write_p = i_ldc_write_stream;
1951 		break;
1952 	default:
1953 		exit_val = EINVAL;
1954 		goto cleanup_on_exit;
1955 	}
1956 
1957 	/* Create a transmit queue */
1958 	ldcp->tx_q_va = (uint64_t)
1959 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1960 	if (ldcp->tx_q_va == NULL) {
1961 		cmn_err(CE_WARN,
1962 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
1963 		    ldcp->id);
1964 		exit_val = ENOMEM;
1965 		goto cleanup_on_exit;
1966 	}
1967 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
1968 
1969 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
1970 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
1971 
1972 	ldcp->tstate |= TS_TXQ_RDY;
1973 
1974 	/* Create a receive queue */
1975 	ldcp->rx_q_va = (uint64_t)
1976 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1977 	if (ldcp->rx_q_va == NULL) {
1978 		cmn_err(CE_WARN,
1979 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
1980 		    ldcp->id);
1981 		exit_val = ENOMEM;
1982 		goto cleanup_on_exit;
1983 	}
1984 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
1985 
1986 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
1987 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
1988 
1989 	ldcp->tstate |= TS_RXQ_RDY;
1990 
1991 	/* Init descriptor ring and memory handle list lock */
1992 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
1993 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
1994 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
1995 
1996 	/* mark status as INITialized */
1997 	ldcp->status = LDC_INIT;
1998 
1999 	mutex_exit(&ldcp->lock);
2000 
2001 	/* Add to channel list */
2002 	mutex_enter(&ldcssp->lock);
2003 	ldcp->next = ldcssp->chan_list;
2004 	ldcssp->chan_list = ldcp;
2005 	ldcssp->channel_count++;
2006 	mutex_exit(&ldcssp->lock);
2007 
2008 	/* set the handle */
2009 	*handle = (ldc_handle_t)ldcp;
2010 
2011 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
2012 
2013 	return (0);
2014 
2015 cleanup_on_exit:
2016 
2017 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2018 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2019 
2020 	if (ldcp->tstate & TS_TXQ_RDY)
2021 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2022 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2023 
2024 	if (ldcp->tstate & TS_RXQ_RDY)
2025 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2026 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2027 
2028 	mutex_exit(&ldcp->lock);
2029 	mutex_destroy(&ldcp->lock);
2030 
2031 	if (ldcp)
2032 		kmem_free(ldcp, sizeof (ldc_chan_t));
2033 
2034 	return (exit_val);
2035 }
2036 
2037 /*
2038  * Finalizes the LDC connection. It will return EBUSY if the
2039  * channel is open. A ldc_close() has to be done prior to
2040  * a ldc_fini operation. It frees TX/RX queues, associated
2041  * with the channel
2042  */
2043 int
2044 ldc_fini(ldc_handle_t handle)
2045 {
2046 	ldc_chan_t 	*ldcp;
2047 	ldc_chan_t 	*tmp_ldcp;
2048 	uint64_t 	id;
2049 
2050 	if (handle == NULL) {
2051 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2052 		return (EINVAL);
2053 	}
2054 	ldcp = (ldc_chan_t *)handle;
2055 	id = ldcp->id;
2056 
2057 	mutex_enter(&ldcp->lock);
2058 
2059 	if (ldcp->tstate > TS_INIT) {
2060 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2061 		    ldcp->id);
2062 		mutex_exit(&ldcp->lock);
2063 		return (EBUSY);
2064 	}
2065 
2066 	/* Remove from the channel list */
2067 	mutex_enter(&ldcssp->lock);
2068 	tmp_ldcp = ldcssp->chan_list;
2069 	if (tmp_ldcp == ldcp) {
2070 		ldcssp->chan_list = ldcp->next;
2071 		ldcp->next = NULL;
2072 	} else {
2073 		while (tmp_ldcp != NULL) {
2074 			if (tmp_ldcp->next == ldcp) {
2075 				tmp_ldcp->next = ldcp->next;
2076 				ldcp->next = NULL;
2077 				break;
2078 			}
2079 			tmp_ldcp = tmp_ldcp->next;
2080 		}
2081 		if (tmp_ldcp == NULL) {
2082 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2083 			mutex_exit(&ldcssp->lock);
2084 			mutex_exit(&ldcp->lock);
2085 			return (EINVAL);
2086 		}
2087 	}
2088 
2089 	ldcssp->channel_count--;
2090 
2091 	mutex_exit(&ldcssp->lock);
2092 
2093 	/* Free the map table for this channel */
2094 	if (ldcp->mtbl) {
2095 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2096 		contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2097 		mutex_destroy(&ldcp->mtbl->lock);
2098 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2099 	}
2100 
2101 	/* Destroy descriptor ring and memory handle list lock */
2102 	mutex_destroy(&ldcp->exp_dlist_lock);
2103 	mutex_destroy(&ldcp->imp_dlist_lock);
2104 	mutex_destroy(&ldcp->mlist_lock);
2105 
2106 	/* Free the stream buffer for STREAM_MODE */
2107 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2108 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2109 
2110 	/* Free the RX queue */
2111 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2112 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2113 	ldcp->tstate &= ~TS_RXQ_RDY;
2114 
2115 	/* Free the TX queue */
2116 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2117 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2118 	ldcp->tstate &= ~TS_TXQ_RDY;
2119 
2120 
2121 	mutex_exit(&ldcp->lock);
2122 
2123 	/* Destroy mutex */
2124 	mutex_destroy(&ldcp->lock);
2125 
2126 	/* free channel structure */
2127 	kmem_free(ldcp, sizeof (ldc_chan_t));
2128 
2129 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2130 
2131 	return (0);
2132 }
2133 
2134 /*
2135  * Open the LDC channel for use. It registers the TX/RX queues
2136  * with the Hypervisor. It also specifies the interrupt number
2137  * and target CPU for this channel
2138  */
2139 int
2140 ldc_open(ldc_handle_t handle)
2141 {
2142 	ldc_chan_t 	*ldcp;
2143 	int 		rv;
2144 
2145 	if (handle == NULL) {
2146 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2147 		return (EINVAL);
2148 	}
2149 
2150 	ldcp = (ldc_chan_t *)handle;
2151 
2152 	mutex_enter(&ldcp->lock);
2153 
2154 	if (ldcp->tstate < TS_INIT) {
2155 		DWARN(ldcp->id,
2156 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2157 		mutex_exit(&ldcp->lock);
2158 		return (EFAULT);
2159 	}
2160 	if (ldcp->tstate >= TS_OPEN) {
2161 		DWARN(ldcp->id,
2162 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2163 		mutex_exit(&ldcp->lock);
2164 		return (EFAULT);
2165 	}
2166 
2167 	/*
2168 	 * Unregister/Register the tx queue with the hypervisor
2169 	 */
2170 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2171 	if (rv) {
2172 		cmn_err(CE_WARN,
2173 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2174 		    ldcp->id);
2175 		mutex_exit(&ldcp->lock);
2176 		return (EIO);
2177 	}
2178 
2179 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2180 	if (rv) {
2181 		cmn_err(CE_WARN,
2182 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2183 		    ldcp->id);
2184 		mutex_exit(&ldcp->lock);
2185 		return (EIO);
2186 	}
2187 
2188 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2189 	    ldcp->id);
2190 
2191 	/*
2192 	 * Unregister/Register the rx queue with the hypervisor
2193 	 */
2194 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2195 	if (rv) {
2196 		cmn_err(CE_WARN,
2197 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2198 		    ldcp->id);
2199 		mutex_exit(&ldcp->lock);
2200 		return (EIO);
2201 	}
2202 
2203 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2204 	if (rv) {
2205 		cmn_err(CE_WARN,
2206 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2207 		    ldcp->id);
2208 		mutex_exit(&ldcp->lock);
2209 		return (EIO);
2210 	}
2211 
2212 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2213 	    ldcp->id);
2214 
2215 	ldcp->tstate |= TS_QCONF_RDY;
2216 
2217 	/* Register the channel with the channel nexus */
2218 	rv = i_ldc_register_channel(ldcp);
2219 	if (rv && rv != EAGAIN) {
2220 		cmn_err(CE_WARN,
2221 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2222 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2223 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2224 		mutex_exit(&ldcp->lock);
2225 		return (EIO);
2226 	}
2227 
2228 	/* mark channel in OPEN state */
2229 	ldcp->status = LDC_OPEN;
2230 
2231 	/* Read channel state */
2232 	rv = hv_ldc_tx_get_state(ldcp->id,
2233 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2234 	if (rv) {
2235 		cmn_err(CE_WARN,
2236 		    "ldc_open: (0x%lx) cannot read channel state\n",
2237 		    ldcp->id);
2238 		(void) i_ldc_unregister_channel(ldcp);
2239 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2240 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2241 		mutex_exit(&ldcp->lock);
2242 		return (EIO);
2243 	}
2244 
2245 	/*
2246 	 * set the ACKd head to current head location for reliable &
2247 	 * streaming mode
2248 	 */
2249 	ldcp->tx_ackd_head = ldcp->tx_head;
2250 
2251 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2252 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2253 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2254 		ldcp->tstate |= TS_LINK_READY;
2255 		ldcp->status = LDC_READY;
2256 	}
2257 
2258 	/*
2259 	 * if channel is being opened in RAW mode - no handshake is needed
2260 	 * switch the channel READY and UP state
2261 	 */
2262 	if (ldcp->mode == LDC_MODE_RAW) {
2263 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2264 		ldcp->status = LDC_UP;
2265 	}
2266 
2267 	mutex_exit(&ldcp->lock);
2268 
2269 	/*
2270 	 * Increment number of open channels
2271 	 */
2272 	mutex_enter(&ldcssp->lock);
2273 	ldcssp->channels_open++;
2274 	mutex_exit(&ldcssp->lock);
2275 
2276 	D1(ldcp->id,
2277 	    "ldc_open: (0x%llx) channel (0x%p) open for use (tstate=0x%x)\n",
2278 	    ldcp->id, ldcp, ldcp->tstate);
2279 
2280 	return (0);
2281 }
2282 
2283 /*
2284  * Close the LDC connection. It will return EBUSY if there
2285  * are memory segments or descriptor rings either bound to or
2286  * mapped over the channel
2287  */
2288 int
2289 ldc_close(ldc_handle_t handle)
2290 {
2291 	ldc_chan_t 	*ldcp;
2292 	int		rv = 0;
2293 	boolean_t	chk_done = B_FALSE;
2294 
2295 	if (handle == NULL) {
2296 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2297 		return (EINVAL);
2298 	}
2299 	ldcp = (ldc_chan_t *)handle;
2300 
2301 	mutex_enter(&ldcp->lock);
2302 
2303 	/* return error if channel is not open */
2304 	if (ldcp->tstate < TS_OPEN) {
2305 		DWARN(ldcp->id,
2306 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2307 		mutex_exit(&ldcp->lock);
2308 		return (EFAULT);
2309 	}
2310 
2311 	/* if any memory handles, drings, are bound or mapped cannot close */
2312 	if (ldcp->mhdl_list != NULL) {
2313 		DWARN(ldcp->id,
2314 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2315 		    ldcp->id);
2316 		mutex_exit(&ldcp->lock);
2317 		return (EBUSY);
2318 	}
2319 	if (ldcp->exp_dring_list != NULL) {
2320 		DWARN(ldcp->id,
2321 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2322 		    ldcp->id);
2323 		mutex_exit(&ldcp->lock);
2324 		return (EBUSY);
2325 	}
2326 	if (ldcp->imp_dring_list != NULL) {
2327 		DWARN(ldcp->id,
2328 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2329 		    ldcp->id);
2330 		mutex_exit(&ldcp->lock);
2331 		return (EBUSY);
2332 	}
2333 
2334 	/*
2335 	 * Wait for pending transmits to complete i.e Tx queue to drain
2336 	 * if there are pending pkts - wait 1 ms and retry again
2337 	 */
2338 	for (;;) {
2339 
2340 		rv = hv_ldc_tx_get_state(ldcp->id,
2341 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2342 		if (rv) {
2343 			cmn_err(CE_WARN,
2344 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2345 			mutex_exit(&ldcp->lock);
2346 			return (EIO);
2347 		}
2348 
2349 		if (ldcp->tx_head == ldcp->tx_tail ||
2350 		    ldcp->link_state != LDC_CHANNEL_UP) {
2351 			break;
2352 		}
2353 
2354 		if (chk_done) {
2355 			DWARN(ldcp->id,
2356 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2357 			    ldcp->id);
2358 			break;
2359 		}
2360 
2361 		/* wait for one ms and try again */
2362 		delay(drv_usectohz(1000));
2363 		chk_done = B_TRUE;
2364 	}
2365 
2366 	/*
2367 	 * Unregister the channel with the nexus
2368 	 */
2369 	rv = i_ldc_unregister_channel(ldcp);
2370 	if (rv && rv != EAGAIN) {
2371 		cmn_err(CE_WARN,
2372 		    "ldc_close: (0x%lx) channel unregister failed\n",
2373 		    ldcp->id);
2374 		mutex_exit(&ldcp->lock);
2375 		return (rv);
2376 	}
2377 
2378 	/*
2379 	 * Unregister queues
2380 	 */
2381 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2382 	if (rv) {
2383 		cmn_err(CE_WARN,
2384 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2385 		    ldcp->id);
2386 		mutex_exit(&ldcp->lock);
2387 		return (EIO);
2388 	}
2389 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2390 	if (rv) {
2391 		cmn_err(CE_WARN,
2392 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2393 		    ldcp->id);
2394 		mutex_exit(&ldcp->lock);
2395 		return (EIO);
2396 	}
2397 
2398 	ldcp->tstate &= ~TS_QCONF_RDY;
2399 
2400 	/* Reset channel state information */
2401 	i_ldc_reset_state(ldcp);
2402 
2403 	/* Mark channel as down and in initialized state */
2404 	ldcp->tx_ackd_head = 0;
2405 	ldcp->tx_head = 0;
2406 	ldcp->tstate = TS_INIT;
2407 	ldcp->status = LDC_INIT;
2408 
2409 	mutex_exit(&ldcp->lock);
2410 
2411 	/* Decrement number of open channels */
2412 	mutex_enter(&ldcssp->lock);
2413 	ldcssp->channels_open--;
2414 	mutex_exit(&ldcssp->lock);
2415 
2416 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2417 
2418 	return (0);
2419 }
2420 
2421 /*
2422  * Register channel callback
2423  */
2424 int
2425 ldc_reg_callback(ldc_handle_t handle,
2426     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2427 {
2428 	ldc_chan_t *ldcp;
2429 
2430 	if (handle == NULL) {
2431 		DWARN(DBG_ALL_LDCS,
2432 		    "ldc_reg_callback: invalid channel handle\n");
2433 		return (EINVAL);
2434 	}
2435 	if (((uint64_t)cb) < KERNELBASE) {
2436 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2437 		return (EINVAL);
2438 	}
2439 	ldcp = (ldc_chan_t *)handle;
2440 
2441 	mutex_enter(&ldcp->lock);
2442 
2443 	if (ldcp->cb) {
2444 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2445 		    ldcp->id);
2446 		mutex_exit(&ldcp->lock);
2447 		return (EIO);
2448 	}
2449 	if (ldcp->cb_inprogress) {
2450 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2451 		    ldcp->id);
2452 		mutex_exit(&ldcp->lock);
2453 		return (EWOULDBLOCK);
2454 	}
2455 
2456 	ldcp->cb = cb;
2457 	ldcp->cb_arg = arg;
2458 	ldcp->cb_enabled = B_TRUE;
2459 
2460 	D1(ldcp->id,
2461 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2462 	    ldcp->id);
2463 
2464 	mutex_exit(&ldcp->lock);
2465 
2466 	return (0);
2467 }
2468 
2469 /*
2470  * Unregister channel callback
2471  */
2472 int
2473 ldc_unreg_callback(ldc_handle_t handle)
2474 {
2475 	ldc_chan_t *ldcp;
2476 
2477 	if (handle == NULL) {
2478 		DWARN(DBG_ALL_LDCS,
2479 		    "ldc_unreg_callback: invalid channel handle\n");
2480 		return (EINVAL);
2481 	}
2482 	ldcp = (ldc_chan_t *)handle;
2483 
2484 	mutex_enter(&ldcp->lock);
2485 
2486 	if (ldcp->cb == NULL) {
2487 		DWARN(ldcp->id,
2488 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2489 		    ldcp->id);
2490 		mutex_exit(&ldcp->lock);
2491 		return (EIO);
2492 	}
2493 	if (ldcp->cb_inprogress) {
2494 		DWARN(ldcp->id,
2495 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2496 		    ldcp->id);
2497 		mutex_exit(&ldcp->lock);
2498 		return (EWOULDBLOCK);
2499 	}
2500 
2501 	ldcp->cb = NULL;
2502 	ldcp->cb_arg = NULL;
2503 	ldcp->cb_enabled = B_FALSE;
2504 
2505 	D1(ldcp->id,
2506 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2507 	    ldcp->id);
2508 
2509 	mutex_exit(&ldcp->lock);
2510 
2511 	return (0);
2512 }
2513 
2514 
2515 /*
2516  * Bring a channel up by initiating a handshake with the peer
2517  * This call is asynchronous. It will complete at a later point
2518  * in time when the peer responds back with an RTR.
2519  */
2520 int
2521 ldc_up(ldc_handle_t handle)
2522 {
2523 	int 		rv;
2524 	ldc_chan_t 	*ldcp;
2525 	ldc_msg_t 	*ldcmsg;
2526 	uint64_t 	tx_tail;
2527 
2528 	if (handle == NULL) {
2529 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2530 		return (EINVAL);
2531 	}
2532 	ldcp = (ldc_chan_t *)handle;
2533 
2534 	mutex_enter(&ldcp->lock);
2535 
2536 	if (ldcp->tstate == TS_UP) {
2537 		D2(ldcp->id,
2538 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2539 		    ldcp->id);
2540 		mutex_exit(&ldcp->lock);
2541 		return (0);
2542 	}
2543 
2544 	/* if the channel is in RAW mode - mark it as UP, if READY */
2545 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2546 		ldcp->tstate = TS_UP;
2547 		mutex_exit(&ldcp->lock);
2548 		return (0);
2549 	}
2550 
2551 	/* Don't start another handshake if there is one in progress */
2552 	if (ldcp->hstate) {
2553 		D2(ldcp->id,
2554 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2555 		    ldcp->id);
2556 		mutex_exit(&ldcp->lock);
2557 		return (0);
2558 	}
2559 
2560 	/* get the current tail for the LDC msg */
2561 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2562 	if (rv) {
2563 		DWARN(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2564 		    ldcp->id);
2565 		mutex_exit(&ldcp->lock);
2566 		return (ECONNREFUSED);
2567 	}
2568 
2569 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
2570 	ZERO_PKT(ldcmsg);
2571 
2572 	ldcmsg->type = LDC_CTRL;
2573 	ldcmsg->stype = LDC_INFO;
2574 	ldcmsg->ctrl = LDC_VER;
2575 	ldcp->next_vidx = 0;
2576 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
2577 
2578 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
2579 
2580 	/* initiate the send by calling into HV and set the new tail */
2581 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
2582 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2583 
2584 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
2585 	if (rv) {
2586 		DWARN(ldcp->id,
2587 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
2588 		    ldcp->id, rv);
2589 		mutex_exit(&ldcp->lock);
2590 		return (rv);
2591 	}
2592 
2593 	ldcp->hstate |= TS_SENT_VER;
2594 	ldcp->tx_tail = tx_tail;
2595 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
2596 
2597 	mutex_exit(&ldcp->lock);
2598 
2599 	return (rv);
2600 }
2601 
2602 
2603 /*
2604  * Reset a channel by re-registering the Rx queues
2605  */
2606 int
2607 ldc_reset(ldc_handle_t handle)
2608 {
2609 	ldc_chan_t 	*ldcp;
2610 
2611 	if (handle == NULL) {
2612 		DWARN(DBG_ALL_LDCS, "ldc_reset: invalid channel handle\n");
2613 		return (EINVAL);
2614 	}
2615 	ldcp = (ldc_chan_t *)handle;
2616 
2617 	mutex_enter(&ldcp->lock);
2618 	i_ldc_reset(ldcp);
2619 	mutex_exit(&ldcp->lock);
2620 
2621 	return (0);
2622 }
2623 
2624 /*
2625  * Get the current channel status
2626  */
2627 int
2628 ldc_status(ldc_handle_t handle, ldc_status_t *status)
2629 {
2630 	ldc_chan_t *ldcp;
2631 
2632 	if (handle == NULL || status == NULL) {
2633 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
2634 		return (EINVAL);
2635 	}
2636 	ldcp = (ldc_chan_t *)handle;
2637 
2638 	*status = ((ldc_chan_t *)handle)->status;
2639 
2640 	D1(ldcp->id,
2641 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
2642 	return (0);
2643 }
2644 
2645 
2646 /*
2647  * Set the channel's callback mode - enable/disable callbacks
2648  */
2649 int
2650 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
2651 {
2652 	ldc_chan_t 	*ldcp;
2653 
2654 	if (handle == NULL) {
2655 		DWARN(DBG_ALL_LDCS,
2656 		    "ldc_set_intr_mode: invalid channel handle\n");
2657 		return (EINVAL);
2658 	}
2659 	ldcp = (ldc_chan_t *)handle;
2660 
2661 	/*
2662 	 * Record no callbacks should be invoked
2663 	 */
2664 	mutex_enter(&ldcp->lock);
2665 
2666 	switch (cmode) {
2667 	case LDC_CB_DISABLE:
2668 		if (!ldcp->cb_enabled) {
2669 			DWARN(ldcp->id,
2670 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
2671 			    ldcp->id);
2672 			break;
2673 		}
2674 		ldcp->cb_enabled = B_FALSE;
2675 
2676 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
2677 		    ldcp->id);
2678 		break;
2679 
2680 	case LDC_CB_ENABLE:
2681 		if (ldcp->cb_enabled) {
2682 			DWARN(ldcp->id,
2683 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
2684 			    ldcp->id);
2685 			break;
2686 		}
2687 		ldcp->cb_enabled = B_TRUE;
2688 
2689 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
2690 		    ldcp->id);
2691 		break;
2692 	}
2693 
2694 	mutex_exit(&ldcp->lock);
2695 
2696 	return (0);
2697 }
2698 
2699 /*
2700  * Check to see if there are packets on the incoming queue
2701  * Will return isempty = B_FALSE if there are  packets
2702  */
2703 int
2704 ldc_chkq(ldc_handle_t handle, boolean_t *isempty)
2705 {
2706 	int 		rv;
2707 	uint64_t 	rx_head, rx_tail;
2708 	ldc_chan_t 	*ldcp;
2709 
2710 	if (handle == NULL) {
2711 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
2712 		return (EINVAL);
2713 	}
2714 	ldcp = (ldc_chan_t *)handle;
2715 
2716 	*isempty = B_TRUE;
2717 
2718 	mutex_enter(&ldcp->lock);
2719 
2720 	if (ldcp->tstate != TS_UP) {
2721 		D1(ldcp->id,
2722 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
2723 		mutex_exit(&ldcp->lock);
2724 		return (ECONNRESET);
2725 	}
2726 
2727 	/* Read packet(s) from the queue */
2728 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2729 	    &ldcp->link_state);
2730 	if (rv != 0) {
2731 		cmn_err(CE_WARN,
2732 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
2733 		mutex_exit(&ldcp->lock);
2734 		return (EIO);
2735 	}
2736 	/* reset the channel state if the channel went down */
2737 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
2738 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2739 		i_ldc_reset(ldcp);
2740 		mutex_exit(&ldcp->lock);
2741 		return (ECONNRESET);
2742 	}
2743 
2744 	if (rx_head != rx_tail) {
2745 		D1(ldcp->id, "ldc_chkq: (0x%llx) queue has pkt(s)\n", ldcp->id);
2746 		*isempty = B_FALSE;
2747 	}
2748 
2749 	mutex_exit(&ldcp->lock);
2750 
2751 	return (0);
2752 }
2753 
2754 
2755 /*
2756  * Read 'size' amount of bytes or less. If incoming buffer
2757  * is more than 'size', ENOBUFS is returned.
2758  *
2759  * On return, size contains the number of bytes read.
2760  */
2761 int
2762 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
2763 {
2764 	ldc_chan_t 	*ldcp;
2765 	uint64_t 	rx_head = 0, rx_tail = 0;
2766 	int		rv = 0, exit_val;
2767 
2768 	if (handle == NULL) {
2769 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
2770 		return (EINVAL);
2771 	}
2772 
2773 	ldcp = (ldc_chan_t *)handle;
2774 
2775 	/* channel lock */
2776 	mutex_enter(&ldcp->lock);
2777 
2778 	if (ldcp->tstate != TS_UP) {
2779 		DWARN(ldcp->id,
2780 		    "ldc_read: (0x%llx) channel is not in UP state\n",
2781 		    ldcp->id);
2782 		exit_val = ECONNRESET;
2783 	} else {
2784 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
2785 	}
2786 
2787 	/*
2788 	 * if queue has been drained - clear interrupt
2789 	 */
2790 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2791 	    &ldcp->link_state);
2792 	if (exit_val == 0 && rv == 0 && rx_head == rx_tail) {
2793 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2794 	}
2795 
2796 	mutex_exit(&ldcp->lock);
2797 	return (exit_val);
2798 }
2799 
2800 /*
2801  * Basic raw mondo read -
2802  * no interpretation of mondo contents at all.
2803  *
2804  * Enter and exit with ldcp->lock held by caller
2805  */
2806 static int
2807 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
2808 {
2809 	uint64_t 	q_size_mask;
2810 	ldc_msg_t 	*msgp;
2811 	uint8_t		*msgbufp;
2812 	int		rv = 0, space;
2813 	uint64_t 	rx_head, rx_tail;
2814 
2815 	space = *sizep;
2816 
2817 	if (space < LDC_PAYLOAD_SIZE_RAW)
2818 		return (ENOBUFS);
2819 
2820 	ASSERT(mutex_owned(&ldcp->lock));
2821 
2822 	/* compute mask for increment */
2823 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
2824 
2825 	/*
2826 	 * Read packet(s) from the queue
2827 	 */
2828 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2829 	    &ldcp->link_state);
2830 	if (rv != 0) {
2831 		cmn_err(CE_WARN,
2832 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
2833 		    ldcp->id);
2834 		return (EIO);
2835 	}
2836 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
2837 		" rxt=0x%llx, st=0x%llx\n",
2838 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
2839 
2840 	/* reset the channel state if the channel went down */
2841 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
2842 		i_ldc_reset(ldcp);
2843 		return (ECONNRESET);
2844 	}
2845 
2846 	/*
2847 	 * Check for empty queue
2848 	 */
2849 	if (rx_head == rx_tail) {
2850 		*sizep = 0;
2851 		return (0);
2852 	}
2853 
2854 	/* get the message */
2855 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2856 
2857 	/* if channel is in RAW mode, copy data and return */
2858 	msgbufp = (uint8_t *)&(msgp->raw[0]);
2859 
2860 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
2861 
2862 	DUMP_PAYLOAD(ldcp->id, msgbufp);
2863 
2864 	*sizep = LDC_PAYLOAD_SIZE_RAW;
2865 
2866 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
2867 	rv = i_ldc_set_rx_head(ldcp, rx_head);
2868 
2869 	return (rv);
2870 }
2871 
2872 /*
2873  * Process LDC mondos to build larger packets
2874  * with either un-reliable or reliable delivery.
2875  *
2876  * Enter and exit with ldcp->lock held by caller
2877  */
2878 static int
2879 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
2880 {
2881 	int		rv = 0;
2882 	uint64_t 	rx_head = 0, rx_tail = 0;
2883 	uint64_t 	curr_head = 0;
2884 	ldc_msg_t 	*msg;
2885 	caddr_t 	target;
2886 	size_t 		len = 0, bytes_read = 0;
2887 	int 		retries = 0;
2888 	uint64_t 	q_size_mask;
2889 
2890 	target = target_bufp;
2891 
2892 	ASSERT(mutex_owned(&ldcp->lock));
2893 
2894 	/* reset first frag to 0 */
2895 	ldcp->first_fragment = 0;
2896 
2897 	/* compute mask for increment */
2898 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
2899 
2900 	/*
2901 	 * Read packet(s) from the queue
2902 	 */
2903 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
2904 	    &ldcp->link_state);
2905 	if (rv != 0) {
2906 		cmn_err(CE_WARN,
2907 		    "ldc_read: (0x%lx) unable to read queue ptrs",
2908 		    ldcp->id);
2909 		return (EIO);
2910 	}
2911 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
2912 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
2913 
2914 	/* reset the channel state if the channel went down */
2915 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
2916 		i_ldc_reset(ldcp);
2917 		return (ECONNRESET);
2918 	}
2919 
2920 	for (;;) {
2921 
2922 		if (curr_head == rx_tail) {
2923 			rv = hv_ldc_rx_get_state(ldcp->id,
2924 			    &rx_head, &rx_tail, &ldcp->link_state);
2925 			if (rv != 0) {
2926 				cmn_err(CE_WARN,
2927 				    "ldc_read: (0x%lx) cannot read queue ptrs",
2928 				    ldcp->id);
2929 				return (EIO);
2930 			}
2931 			/* reset the channel state if the channel went down */
2932 			if (ldcp->link_state == LDC_CHANNEL_DOWN) {
2933 				i_ldc_reset(ldcp);
2934 				return (ECONNRESET);
2935 			}
2936 		}
2937 
2938 		if (curr_head == rx_tail) {
2939 
2940 			/* If in the middle of a fragmented xfer */
2941 			if (ldcp->first_fragment != 0) {
2942 
2943 				/* wait for ldc_delay usecs */
2944 				drv_usecwait(ldc_delay);
2945 
2946 				if (++retries < ldc_max_retries)
2947 					continue;
2948 
2949 				*sizep = 0;
2950 				ldcp->last_msg_rcd = ldcp->first_fragment - 1;
2951 				DWARN(DBG_ALL_LDCS,
2952 					"ldc_read: (0x%llx) read timeout",
2953 					ldcp->id);
2954 				return (ETIMEDOUT);
2955 			}
2956 			*sizep = 0;
2957 			break;
2958 		}
2959 		retries = 0;
2960 
2961 		D2(ldcp->id,
2962 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
2963 		    ldcp->id, curr_head, rx_head, rx_tail);
2964 
2965 		/* get the message */
2966 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
2967 
2968 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
2969 		    ldcp->rx_q_va + curr_head);
2970 
2971 		/* Check the message ID for the message received */
2972 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
2973 
2974 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
2975 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
2976 
2977 			/* throw away data */
2978 			bytes_read = 0;
2979 
2980 			/* Reset last_msg_rcd to start of message */
2981 			if (ldcp->first_fragment != 0) {
2982 				ldcp->last_msg_rcd =
2983 					ldcp->first_fragment - 1;
2984 				ldcp->first_fragment = 0;
2985 			}
2986 			/*
2987 			 * Send a NACK -- invalid seqid
2988 			 * get the current tail for the response
2989 			 */
2990 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2991 			    (msg->ctrl & LDC_CTRL_MASK));
2992 			if (rv) {
2993 				cmn_err(CE_NOTE,
2994 				    "ldc_read: (0x%lx) err sending "
2995 				    "NACK msg\n", ldcp->id);
2996 			}
2997 
2998 			/* purge receive queue */
2999 			rv = i_ldc_set_rx_head(ldcp, rx_tail);
3000 
3001 			break;
3002 		}
3003 
3004 		/*
3005 		 * Process any messages of type CTRL messages
3006 		 * Future implementations should try to pass these to
3007 		 * LDC transport by resetting the intr state.
3008 		 *
3009 		 * NOTE: not done as a switch() as type can be both ctrl+data
3010 		 */
3011 		if (msg->type & LDC_CTRL) {
3012 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
3013 				if (rv == EAGAIN)
3014 					continue;
3015 				rv = i_ldc_set_rx_head(ldcp, rx_tail);
3016 				*sizep = 0;
3017 				bytes_read = 0;
3018 				break;
3019 			}
3020 		}
3021 
3022 		/* process data ACKs */
3023 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3024 			(void) i_ldc_process_data_ACK(ldcp, msg);
3025 		}
3026 
3027 		/* process data messages */
3028 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3029 
3030 			uint8_t *msgbuf = (uint8_t *)(
3031 				(ldcp->mode == LDC_MODE_RELIABLE ||
3032 				ldcp->mode == LDC_MODE_STREAM)
3033 				? msg->rdata : msg->udata);
3034 
3035 			D2(ldcp->id,
3036 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3037 
3038 			/* get the packet length */
3039 			len = (msg->env & LDC_LEN_MASK);
3040 
3041 				/*
3042 				 * FUTURE OPTIMIZATION:
3043 				 * dont need to set q head for every
3044 				 * packet we read just need to do this when
3045 				 * we are done or need to wait for more
3046 				 * mondos to make a full packet - this is
3047 				 * currently expensive.
3048 				 */
3049 
3050 			if (ldcp->first_fragment == 0) {
3051 
3052 				/*
3053 				 * first packets should always have the start
3054 				 * bit set (even for a single packet). If not
3055 				 * throw away the packet
3056 				 */
3057 				if (!(msg->env & LDC_FRAG_START)) {
3058 
3059 					DWARN(DBG_ALL_LDCS,
3060 					    "ldc_read: (0x%llx) not start - "
3061 					    "frag=%x\n", ldcp->id,
3062 					    (msg->env) & LDC_FRAG_MASK);
3063 
3064 					/* toss pkt, inc head, cont reading */
3065 					bytes_read = 0;
3066 					target = target_bufp;
3067 					curr_head =
3068 						(curr_head + LDC_PACKET_SIZE)
3069 						& q_size_mask;
3070 					if (rv = i_ldc_set_rx_head(ldcp,
3071 						curr_head))
3072 						break;
3073 
3074 					continue;
3075 				}
3076 
3077 				ldcp->first_fragment = msg->seqid;
3078 			} else {
3079 				/* check to see if this is a pkt w/ START bit */
3080 				if (msg->env & LDC_FRAG_START) {
3081 					DWARN(DBG_ALL_LDCS,
3082 					    "ldc_read:(0x%llx) unexpected pkt"
3083 					    " env=0x%x discarding %d bytes,"
3084 					    " lastmsg=%d, currentmsg=%d\n",
3085 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3086 					    bytes_read, ldcp->last_msg_rcd,
3087 					    msg->seqid);
3088 
3089 					/* throw data we have read so far */
3090 					bytes_read = 0;
3091 					target = target_bufp;
3092 					ldcp->first_fragment = msg->seqid;
3093 
3094 					if (rv = i_ldc_set_rx_head(ldcp,
3095 						curr_head))
3096 						break;
3097 				}
3098 			}
3099 
3100 			/* copy (next) pkt into buffer */
3101 			if (len <= (*sizep - bytes_read)) {
3102 				bcopy(msgbuf, target, len);
3103 				target += len;
3104 				bytes_read += len;
3105 			} else {
3106 				/*
3107 				 * there is not enough space in the buffer to
3108 				 * read this pkt. throw message away & continue
3109 				 * reading data from queue
3110 				 */
3111 				DWARN(DBG_ALL_LDCS,
3112 				    "ldc_read: (0x%llx) buffer too small, "
3113 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3114 				    curr_head, *sizep, bytes_read+len);
3115 
3116 				ldcp->first_fragment = 0;
3117 				target = target_bufp;
3118 				bytes_read = 0;
3119 
3120 				/* throw away everything received so far */
3121 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3122 					break;
3123 
3124 				/* continue reading remaining pkts */
3125 				continue;
3126 			}
3127 		}
3128 
3129 		/* set the message id */
3130 		ldcp->last_msg_rcd = msg->seqid;
3131 
3132 		/* move the head one position */
3133 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3134 
3135 		if (msg->env & LDC_FRAG_STOP) {
3136 
3137 			/*
3138 			 * All pkts that are part of this fragmented transfer
3139 			 * have been read or this was a single pkt read
3140 			 * or there was an error
3141 			 */
3142 
3143 			/* set the queue head */
3144 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3145 				bytes_read = 0;
3146 
3147 			*sizep = bytes_read;
3148 
3149 			break;
3150 		}
3151 
3152 		/* advance head if it is a DATA ACK */
3153 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3154 
3155 			/* set the queue head */
3156 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3157 				bytes_read = 0;
3158 				break;
3159 			}
3160 
3161 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3162 			    ldcp->id, curr_head);
3163 		}
3164 
3165 	} /* for (;;) */
3166 
3167 
3168 	/*
3169 	 * If useful data was read - Send msg ACK
3170 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3171 	 */
3172 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3173 		ldcp->mode == LDC_MODE_STREAM)) {
3174 
3175 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3176 		if (rv != 0) {
3177 			cmn_err(CE_NOTE,
3178 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3179 			return (0);
3180 		}
3181 	}
3182 
3183 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3184 
3185 	return (rv);
3186 }
3187 
3188 /*
3189  * Use underlying reliable packet mechanism to fetch
3190  * and buffer incoming packets so we can hand them back as
3191  * a basic byte stream.
3192  *
3193  * Enter and exit with ldcp->lock held by caller
3194  */
3195 static int
3196 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3197 {
3198 	int	rv;
3199 	size_t	size;
3200 
3201 	ASSERT(mutex_owned(&ldcp->lock));
3202 
3203 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3204 		ldcp->id, *sizep);
3205 
3206 	if (ldcp->stream_remains == 0) {
3207 		size = ldcp->mtu;
3208 		rv = i_ldc_read_packet(ldcp,
3209 			(caddr_t)ldcp->stream_bufferp, &size);
3210 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3211 			ldcp->id, size);
3212 
3213 		if (rv != 0)
3214 			return (rv);
3215 
3216 		ldcp->stream_remains = size;
3217 		ldcp->stream_offset = 0;
3218 	}
3219 
3220 	size = MIN(ldcp->stream_remains, *sizep);
3221 
3222 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3223 	ldcp->stream_offset += size;
3224 	ldcp->stream_remains -= size;
3225 
3226 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3227 		ldcp->id, size);
3228 
3229 	*sizep = size;
3230 	return (0);
3231 }
3232 
3233 /*
3234  * Write specified amount of bytes to the channel
3235  * in multiple pkts of pkt_payload size. Each
3236  * packet is tagged with an unique packet ID in
3237  * the case of a reliable transport.
3238  *
3239  * On return, size contains the number of bytes written.
3240  */
3241 int
3242 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3243 {
3244 	ldc_chan_t	*ldcp;
3245 	int		rv = 0;
3246 
3247 	if (handle == NULL) {
3248 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3249 		return (EINVAL);
3250 	}
3251 	ldcp = (ldc_chan_t *)handle;
3252 
3253 	mutex_enter(&ldcp->lock);
3254 
3255 	/* check if non-zero data to write */
3256 	if (buf == NULL || sizep == NULL) {
3257 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3258 		    ldcp->id);
3259 		mutex_exit(&ldcp->lock);
3260 		return (EINVAL);
3261 	}
3262 
3263 	if (*sizep == 0) {
3264 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3265 		    ldcp->id);
3266 		mutex_exit(&ldcp->lock);
3267 		return (0);
3268 	}
3269 
3270 	/* Check if channel is UP for data exchange */
3271 	if (ldcp->tstate != TS_UP) {
3272 		DWARN(ldcp->id,
3273 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3274 		    ldcp->id);
3275 		*sizep = 0;
3276 		rv = ECONNRESET;
3277 	} else {
3278 		rv = ldcp->write_p(ldcp, buf, sizep);
3279 	}
3280 
3281 	mutex_exit(&ldcp->lock);
3282 
3283 	return (rv);
3284 }
3285 
3286 /*
3287  * Write a raw packet to the channel
3288  * On return, size contains the number of bytes written.
3289  */
3290 static int
3291 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3292 {
3293 	ldc_msg_t 	*ldcmsg;
3294 	uint64_t 	tx_head, tx_tail, new_tail;
3295 	int		rv = 0;
3296 	size_t		size;
3297 
3298 	ASSERT(mutex_owned(&ldcp->lock));
3299 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3300 
3301 	size = *sizep;
3302 
3303 	/*
3304 	 * Check to see if the packet size is less than or
3305 	 * equal to packet size support in raw mode
3306 	 */
3307 	if (size > ldcp->pkt_payload) {
3308 		DWARN(ldcp->id,
3309 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3310 		    ldcp->id, *sizep);
3311 		*sizep = 0;
3312 		return (EMSGSIZE);
3313 	}
3314 
3315 	/* get the qptrs for the tx queue */
3316 	rv = hv_ldc_tx_get_state(ldcp->id,
3317 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3318 	if (rv != 0) {
3319 		cmn_err(CE_WARN,
3320 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3321 		*sizep = 0;
3322 		return (EIO);
3323 	}
3324 
3325 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3326 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3327 		DWARN(ldcp->id,
3328 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3329 		i_ldc_reset(ldcp);
3330 		*sizep = 0;
3331 		return (ECONNRESET);
3332 	}
3333 
3334 	tx_tail = ldcp->tx_tail;
3335 	tx_head = ldcp->tx_head;
3336 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3337 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3338 
3339 	if (new_tail == tx_head) {
3340 		DWARN(DBG_ALL_LDCS,
3341 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3342 		*sizep = 0;
3343 		return (EWOULDBLOCK);
3344 	}
3345 
3346 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3347 	    ldcp->id, size);
3348 
3349 	/* Send the data now */
3350 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3351 
3352 		/* copy the data into pkt */
3353 	bcopy((uint8_t *)buf, ldcmsg, size);
3354 
3355 		/* increment tail */
3356 	tx_tail = new_tail;
3357 
3358 	/*
3359 	 * All packets have been copied into the TX queue
3360 	 * update the tail ptr in the HV
3361 	 */
3362 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3363 	if (rv) {
3364 		if (rv == EWOULDBLOCK) {
3365 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3366 			    ldcp->id);
3367 			*sizep = 0;
3368 			return (EWOULDBLOCK);
3369 		}
3370 
3371 		/* cannot write data - reset channel */
3372 		i_ldc_reset(ldcp);
3373 		*sizep = 0;
3374 		return (ECONNRESET);
3375 	}
3376 
3377 	ldcp->tx_tail = tx_tail;
3378 	*sizep = size;
3379 
3380 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3381 
3382 	return (rv);
3383 }
3384 
3385 
3386 /*
3387  * Write specified amount of bytes to the channel
3388  * in multiple pkts of pkt_payload size. Each
3389  * packet is tagged with an unique packet ID in
3390  * the case of a reliable transport.
3391  *
3392  * On return, size contains the number of bytes written.
3393  * This function needs to ensure that the write size is < MTU size
3394  */
3395 static int
3396 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3397 {
3398 	ldc_msg_t 	*ldcmsg;
3399 	uint64_t 	tx_head, tx_tail, new_tail, start;
3400 	uint64_t	txq_size_mask, numavail;
3401 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3402 	size_t 		len, bytes_written = 0, remaining;
3403 	int		rv;
3404 	uint32_t	curr_seqid;
3405 
3406 	ASSERT(mutex_owned(&ldcp->lock));
3407 
3408 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3409 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3410 		ldcp->mode == LDC_MODE_STREAM);
3411 
3412 	/* compute mask for increment */
3413 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3414 
3415 	/* get the qptrs for the tx queue */
3416 	rv = hv_ldc_tx_get_state(ldcp->id,
3417 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3418 	if (rv != 0) {
3419 		cmn_err(CE_WARN,
3420 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3421 		*size = 0;
3422 		return (EIO);
3423 	}
3424 
3425 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3426 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3427 		DWARN(ldcp->id,
3428 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3429 		*size = 0;
3430 		i_ldc_reset(ldcp);
3431 		return (ECONNRESET);
3432 	}
3433 
3434 	tx_tail = ldcp->tx_tail;
3435 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3436 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3437 
3438 	/*
3439 	 * Transport mode determines whether we use HV Tx head or the
3440 	 * private protocol head (corresponding to last ACKd pkt) for
3441 	 * determining how much we can write
3442 	 */
3443 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3444 		ldcp->mode == LDC_MODE_STREAM)
3445 		? ldcp->tx_ackd_head : ldcp->tx_head;
3446 	if (new_tail == tx_head) {
3447 		DWARN(DBG_ALL_LDCS,
3448 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3449 		*size = 0;
3450 		return (EWOULDBLOCK);
3451 	}
3452 
3453 	/*
3454 	 * Make sure that the LDC Tx queue has enough space
3455 	 */
3456 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
3457 		+ ldcp->tx_q_entries - 1;
3458 	numavail %= ldcp->tx_q_entries;
3459 
3460 	if (*size > (numavail * ldcp->pkt_payload)) {
3461 		DWARN(DBG_ALL_LDCS,
3462 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
3463 		return (EWOULDBLOCK);
3464 	}
3465 
3466 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3467 	    ldcp->id, *size);
3468 
3469 	/* Send the data now */
3470 	bytes_written = 0;
3471 	curr_seqid = ldcp->last_msg_snt;
3472 	start = tx_tail;
3473 
3474 	while (*size > bytes_written) {
3475 
3476 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3477 
3478 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
3479 			ldcp->mode == LDC_MODE_STREAM)
3480 			? ldcmsg->rdata : ldcmsg->udata);
3481 
3482 		ldcmsg->type = LDC_DATA;
3483 		ldcmsg->stype = LDC_INFO;
3484 		ldcmsg->ctrl = 0;
3485 
3486 		remaining = *size - bytes_written;
3487 		len = min(ldcp->pkt_payload, remaining);
3488 		ldcmsg->env = (uint8_t)len;
3489 
3490 		curr_seqid++;
3491 		ldcmsg->seqid = curr_seqid;
3492 
3493 		DUMP_LDC_PKT(ldcp, "ldc_write snd data", (uint64_t)ldcmsg);
3494 
3495 		/* copy the data into pkt */
3496 		bcopy(source, msgbuf, len);
3497 
3498 		source += len;
3499 		bytes_written += len;
3500 
3501 		/* increment tail */
3502 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
3503 
3504 		ASSERT(tx_tail != tx_head);
3505 	}
3506 
3507 	/* Set the start and stop bits */
3508 	ldcmsg->env |= LDC_FRAG_STOP;
3509 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
3510 	ldcmsg->env |= LDC_FRAG_START;
3511 
3512 	/*
3513 	 * All packets have been copied into the TX queue
3514 	 * update the tail ptr in the HV
3515 	 */
3516 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3517 	if (rv == 0) {
3518 		ldcp->tx_tail = tx_tail;
3519 		ldcp->last_msg_snt = curr_seqid;
3520 		*size = bytes_written;
3521 	} else {
3522 		int rv2;
3523 
3524 		if (rv != EWOULDBLOCK) {
3525 			/* cannot write data - reset channel */
3526 			i_ldc_reset(ldcp);
3527 			*size = 0;
3528 			return (ECONNRESET);
3529 		}
3530 
3531 		DWARN(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
3532 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
3533 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
3534 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
3535 
3536 		rv2 = hv_ldc_tx_get_state(ldcp->id,
3537 		    &tx_head, &tx_tail, &ldcp->link_state);
3538 
3539 		DWARN(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
3540 			"(head 0x%x, tail 0x%x state 0x%x)\n",
3541 			rv2, tx_head, tx_tail, ldcp->link_state);
3542 
3543 		*size = 0;
3544 	}
3545 
3546 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
3547 
3548 	return (rv);
3549 }
3550 
3551 /*
3552  * Write specified amount of bytes to the channel
3553  * in multiple pkts of pkt_payload size. Each
3554  * packet is tagged with an unique packet ID in
3555  * the case of a reliable transport.
3556  *
3557  * On return, size contains the number of bytes written.
3558  * This function needs to ensure that the write size is < MTU size
3559  */
3560 static int
3561 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3562 {
3563 	ASSERT(mutex_owned(&ldcp->lock));
3564 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
3565 
3566 	/* Truncate packet to max of MTU size */
3567 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
3568 	return (i_ldc_write_packet(ldcp, buf, sizep));
3569 }
3570 
3571 
3572 /*
3573  * Interfaces for channel nexus to register/unregister with LDC module
3574  * The nexus will register functions to be used to register individual
3575  * channels with the nexus and enable interrupts for the channels
3576  */
3577 int
3578 ldc_register(ldc_cnex_t *cinfo)
3579 {
3580 	ldc_chan_t	*ldcp;
3581 
3582 	if (cinfo == NULL || cinfo->dip == NULL ||
3583 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
3584 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
3585 	    cinfo->clr_intr == NULL) {
3586 
3587 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
3588 		return (EINVAL);
3589 	}
3590 
3591 	mutex_enter(&ldcssp->lock);
3592 
3593 	/* nexus registration */
3594 	ldcssp->cinfo.dip = cinfo->dip;
3595 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
3596 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
3597 	ldcssp->cinfo.add_intr = cinfo->add_intr;
3598 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
3599 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
3600 
3601 	/* register any channels that might have been previously initialized */
3602 	ldcp = ldcssp->chan_list;
3603 	while (ldcp) {
3604 		if ((ldcp->tstate & TS_QCONF_RDY) &&
3605 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
3606 			(void) i_ldc_register_channel(ldcp);
3607 
3608 		ldcp = ldcp->next;
3609 	}
3610 
3611 	mutex_exit(&ldcssp->lock);
3612 
3613 	return (0);
3614 }
3615 
3616 int
3617 ldc_unregister(ldc_cnex_t *cinfo)
3618 {
3619 	if (cinfo == NULL || cinfo->dip == NULL) {
3620 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
3621 		return (EINVAL);
3622 	}
3623 
3624 	mutex_enter(&ldcssp->lock);
3625 
3626 	if (cinfo->dip != ldcssp->cinfo.dip) {
3627 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
3628 		mutex_exit(&ldcssp->lock);
3629 		return (EINVAL);
3630 	}
3631 
3632 	/* nexus unregister */
3633 	ldcssp->cinfo.dip = NULL;
3634 	ldcssp->cinfo.reg_chan = NULL;
3635 	ldcssp->cinfo.unreg_chan = NULL;
3636 	ldcssp->cinfo.add_intr = NULL;
3637 	ldcssp->cinfo.rem_intr = NULL;
3638 	ldcssp->cinfo.clr_intr = NULL;
3639 
3640 	mutex_exit(&ldcssp->lock);
3641 
3642 	return (0);
3643 }
3644 
3645 
3646 /* ------------------------------------------------------------------------- */
3647 
3648 /*
3649  * Allocate a memory handle for the channel and link it into the list
3650  * Also choose which memory table to use if this is the first handle
3651  * being assigned to this channel
3652  */
3653 int
3654 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
3655 {
3656 	ldc_chan_t 	*ldcp;
3657 	ldc_mhdl_t	*mhdl;
3658 	int 		rv;
3659 
3660 	if (handle == NULL) {
3661 		DWARN(DBG_ALL_LDCS,
3662 		    "ldc_mem_alloc_handle: invalid channel handle\n");
3663 		return (EINVAL);
3664 	}
3665 	ldcp = (ldc_chan_t *)handle;
3666 
3667 	mutex_enter(&ldcp->lock);
3668 
3669 	/* check to see if channel is initalized */
3670 	if (ldcp->tstate < TS_INIT) {
3671 		DWARN(ldcp->id,
3672 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
3673 		    ldcp->id);
3674 		mutex_exit(&ldcp->lock);
3675 		return (EINVAL);
3676 	}
3677 
3678 	/*
3679 	 * If this channel is allocating a mem handle for the
3680 	 * first time allocate it a memory map table and initialize it
3681 	 */
3682 	if (ldcp->mtbl == NULL) {
3683 
3684 		ldc_mtbl_t *mtbl;
3685 
3686 		/* Allocate and initialize the map table structure */
3687 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
3688 		mtbl->size = MTBL_MAX_SIZE;
3689 		mtbl->num_entries = mtbl->num_avail =
3690 			(MTBL_MAX_SIZE/sizeof (ldc_mte_slot_t));
3691 		mtbl->next_entry = NULL;
3692 
3693 		/* Allocate the table itself */
3694 		mtbl->table = (ldc_mte_slot_t *)
3695 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
3696 		if (mtbl->table == NULL) {
3697 			cmn_err(CE_WARN,
3698 			    "ldc_mem_alloc_handle: (0x%lx) error allocating "
3699 			    "table memory", ldcp->id);
3700 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
3701 			mutex_exit(&ldcp->lock);
3702 			return (ENOMEM);
3703 		}
3704 
3705 		/* zero out the memory */
3706 		bzero(mtbl->table, mtbl->size);
3707 
3708 		/* initialize the lock */
3709 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
3710 
3711 		/* register table for this channel */
3712 		rv = hv_ldc_set_map_table(ldcp->id,
3713 		    va_to_pa(mtbl->table), mtbl->num_entries);
3714 		if (rv != 0) {
3715 			cmn_err(CE_WARN,
3716 			    "ldc_mem_alloc_handle: (0x%lx) err %d mapping tbl",
3717 			    ldcp->id, rv);
3718 			contig_mem_free(mtbl->table, mtbl->size);
3719 			mutex_destroy(&mtbl->lock);
3720 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
3721 			mutex_exit(&ldcp->lock);
3722 			return (EIO);
3723 		}
3724 
3725 		ldcp->mtbl = mtbl;
3726 
3727 		D1(ldcp->id,
3728 		    "ldc_mem_alloc_handle: (0x%llx) alloc'd map table 0x%llx\n",
3729 		    ldcp->id, ldcp->mtbl->table);
3730 	}
3731 
3732 	/* allocate handle for channel */
3733 	mhdl = kmem_zalloc(sizeof (ldc_mhdl_t), KM_SLEEP);
3734 
3735 	/* initialize the lock */
3736 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
3737 
3738 	mhdl->status = LDC_UNBOUND;
3739 	mhdl->ldcp = ldcp;
3740 
3741 	/* insert memory handle (@ head) into list */
3742 	if (ldcp->mhdl_list == NULL) {
3743 		ldcp->mhdl_list = mhdl;
3744 		mhdl->next = NULL;
3745 	} else {
3746 		/* insert @ head */
3747 		mhdl->next = ldcp->mhdl_list;
3748 		ldcp->mhdl_list = mhdl;
3749 	}
3750 
3751 	/* return the handle */
3752 	*mhandle = (ldc_mem_handle_t)mhdl;
3753 
3754 	mutex_exit(&ldcp->lock);
3755 
3756 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
3757 	    ldcp->id, mhdl);
3758 
3759 	return (0);
3760 }
3761 
3762 /*
3763  * Free memory handle for the channel and unlink it from the list
3764  */
3765 int
3766 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
3767 {
3768 	ldc_mhdl_t 	*mhdl, *phdl;
3769 	ldc_chan_t 	*ldcp;
3770 
3771 	if (mhandle == NULL) {
3772 		DWARN(DBG_ALL_LDCS,
3773 		    "ldc_mem_free_handle: invalid memory handle\n");
3774 		return (EINVAL);
3775 	}
3776 	mhdl = (ldc_mhdl_t *)mhandle;
3777 
3778 	mutex_enter(&mhdl->lock);
3779 
3780 	ldcp = mhdl->ldcp;
3781 
3782 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
3783 		DWARN(ldcp->id,
3784 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
3785 		    mhdl);
3786 		mutex_exit(&mhdl->lock);
3787 		return (EINVAL);
3788 	}
3789 	mutex_exit(&mhdl->lock);
3790 
3791 	mutex_enter(&ldcp->mlist_lock);
3792 
3793 	phdl = ldcp->mhdl_list;
3794 
3795 	/* first handle */
3796 	if (phdl == mhdl) {
3797 		ldcp->mhdl_list = mhdl->next;
3798 		mutex_destroy(&mhdl->lock);
3799 		kmem_free(mhdl, sizeof (ldc_mhdl_t));
3800 		D1(ldcp->id,
3801 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
3802 		    ldcp->id, mhdl);
3803 	} else {
3804 		/* walk the list - unlink and free */
3805 		while (phdl != NULL) {
3806 			if (phdl->next == mhdl) {
3807 				phdl->next = mhdl->next;
3808 				mutex_destroy(&mhdl->lock);
3809 				kmem_free(mhdl, sizeof (ldc_mhdl_t));
3810 				D1(ldcp->id,
3811 				    "ldc_mem_free_handle: (0x%llx) freed "
3812 				    "handle 0x%llx\n", ldcp->id, mhdl);
3813 				break;
3814 			}
3815 			phdl = phdl->next;
3816 		}
3817 	}
3818 
3819 	if (phdl == NULL) {
3820 		DWARN(ldcp->id,
3821 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
3822 		mutex_exit(&ldcp->mlist_lock);
3823 		return (EINVAL);
3824 	}
3825 
3826 	mutex_exit(&ldcp->mlist_lock);
3827 
3828 	return (0);
3829 }
3830 
3831 /*
3832  * Bind a memory handle to a virtual address.
3833  * The virtual address is converted to the corresponding real addresses.
3834  * Returns pointer to the first ldc_mem_cookie and the total number
3835  * of cookies for this virtual address. Other cookies can be obtained
3836  * using the ldc_mem_nextcookie() call. If the pages are stored in
3837  * consecutive locations in the table, a single cookie corresponding to
3838  * the first location is returned. The cookie size spans all the entries.
3839  *
3840  * If the VA corresponds to a page that is already being exported, reuse
3841  * the page and do not export it again. Bump the page's use count.
3842  */
3843 int
3844 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
3845     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
3846 {
3847 	ldc_mhdl_t	*mhdl;
3848 	ldc_chan_t 	*ldcp;
3849 	ldc_mtbl_t	*mtbl;
3850 	ldc_memseg_t	*memseg;
3851 	ldc_mte_t	tmp_mte;
3852 	uint64_t	index, prev_index = 0;
3853 	int64_t		cookie_idx;
3854 	uintptr_t	raddr, ra_aligned;
3855 	uint64_t	psize, poffset, v_offset;
3856 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
3857 	pgcnt_t		npages;
3858 	caddr_t		v_align, addr;
3859 	int 		i;
3860 
3861 	if (mhandle == NULL) {
3862 		DWARN(DBG_ALL_LDCS,
3863 		    "ldc_mem_bind_handle: invalid memory handle\n");
3864 		return (EINVAL);
3865 	}
3866 	mhdl = (ldc_mhdl_t *)mhandle;
3867 	ldcp = mhdl->ldcp;
3868 	mtbl = ldcp->mtbl;
3869 
3870 	/* clear count */
3871 	*ccount = 0;
3872 
3873 	mutex_enter(&mhdl->lock);
3874 
3875 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
3876 		DWARN(ldcp->id,
3877 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
3878 		    mhandle);
3879 		mutex_exit(&mhdl->lock);
3880 		return (EINVAL);
3881 	}
3882 
3883 	/* Force address and size to be 8-byte aligned */
3884 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
3885 		DWARN(ldcp->id,
3886 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
3887 		mutex_exit(&mhdl->lock);
3888 		return (EINVAL);
3889 	}
3890 
3891 	/* FUTURE: get the page size, pgsz code, and shift */
3892 	pg_size = MMU_PAGESIZE;
3893 	pg_size_code = page_szc(pg_size);
3894 	pg_shift = page_get_shift(pg_size_code);
3895 	pg_mask = ~(pg_size - 1);
3896 
3897 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
3898 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
3899 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
3900 
3901 	/* aligned VA and its offset */
3902 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
3903 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
3904 
3905 	npages = (len+v_offset)/pg_size;
3906 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
3907 
3908 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
3909 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
3910 	    ldcp->id, vaddr, v_align, v_offset, npages);
3911 
3912 	/* lock the memory table - exclusive access to channel */
3913 	mutex_enter(&mtbl->lock);
3914 
3915 	if (npages > mtbl->num_avail) {
3916 		DWARN(ldcp->id,
3917 		    "ldc_mem_bind_handle: (0x%llx) no table entries\n",
3918 		    ldcp->id);
3919 		mutex_exit(&mtbl->lock);
3920 		mutex_exit(&mhdl->lock);
3921 		return (ENOMEM);
3922 	}
3923 
3924 	/* Allocate a memseg structure */
3925 	memseg = mhdl->memseg = kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
3926 
3927 	/* Allocate memory to store all pages and cookies */
3928 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
3929 	memseg->cookies =
3930 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
3931 
3932 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
3933 	    ldcp->id, npages);
3934 
3935 	addr = v_align;
3936 
3937 	/*
3938 	 * Table slots are used in a round-robin manner. The algorithm permits
3939 	 * inserting duplicate entries. Slots allocated earlier will typically
3940 	 * get freed before we get back to reusing the slot.Inserting duplicate
3941 	 * entries should be OK as we only lookup entries using the cookie addr
3942 	 * i.e. tbl index, during export, unexport and copy operation.
3943 	 *
3944 	 * One implementation what was tried was to search for a duplicate
3945 	 * page entry first and reuse it. The search overhead is very high and
3946 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
3947 	 * So it does make sense to avoid searching for duplicates.
3948 	 *
3949 	 * But during the process of searching for a free slot, if we find a
3950 	 * duplicate entry we will go ahead and use it, and bump its use count.
3951 	 */
3952 
3953 	/* index to start searching from */
3954 	index = mtbl->next_entry;
3955 	cookie_idx = -1;
3956 
3957 	tmp_mte.ll = 0;	/* initialise fields to 0 */
3958 
3959 	if (mtype & LDC_DIRECT_MAP) {
3960 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
3961 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
3962 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
3963 	}
3964 
3965 	if (mtype & LDC_SHADOW_MAP) {
3966 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
3967 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
3968 	}
3969 
3970 	if (mtype & LDC_IO_MAP) {
3971 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
3972 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
3973 	}
3974 
3975 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
3976 
3977 	tmp_mte.mte_pgszc = pg_size_code;
3978 
3979 	/* initialize each mem table entry */
3980 	for (i = 0; i < npages; i++) {
3981 
3982 		/* check if slot is available in the table */
3983 		while (mtbl->table[index].entry.ll != 0) {
3984 
3985 			index = (index + 1) % mtbl->num_entries;
3986 
3987 			if (index == mtbl->next_entry) {
3988 				/* we have looped around */
3989 				DWARN(DBG_ALL_LDCS,
3990 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
3991 				    "entry\n", ldcp->id);
3992 				*ccount = 0;
3993 
3994 				/* NOTE: free memory, remove previous entries */
3995 				/* this shouldnt happen as num_avail was ok */
3996 
3997 				mutex_exit(&mtbl->lock);
3998 				mutex_exit(&mhdl->lock);
3999 				return (ENOMEM);
4000 			}
4001 		}
4002 
4003 		/* get the real address */
4004 		raddr = va_to_pa((void *)addr);
4005 		ra_aligned = ((uintptr_t)raddr & pg_mask);
4006 
4007 		/* build the mte */
4008 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
4009 
4010 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
4011 
4012 		/* update entry in table */
4013 		mtbl->table[index].entry = tmp_mte;
4014 
4015 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
4016 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
4017 
4018 		/* calculate the size and offset for this export range */
4019 		if (i == 0) {
4020 			/* first page */
4021 			psize = min((pg_size - v_offset), len);
4022 			poffset = v_offset;
4023 
4024 		} else if (i == (npages - 1)) {
4025 			/* last page */
4026 			psize =	(((uintptr_t)(vaddr + len)) &
4027 				    ((uint64_t)(pg_size-1)));
4028 			if (psize == 0)
4029 				psize = pg_size;
4030 			poffset = 0;
4031 
4032 		} else {
4033 			/* middle pages */
4034 			psize = pg_size;
4035 			poffset = 0;
4036 		}
4037 
4038 		/* store entry for this page */
4039 		memseg->pages[i].index = index;
4040 		memseg->pages[i].raddr = raddr;
4041 		memseg->pages[i].offset = poffset;
4042 		memseg->pages[i].size = psize;
4043 		memseg->pages[i].mte = &(mtbl->table[index]);
4044 
4045 		/* create the cookie */
4046 		if (i == 0 || (index != prev_index + 1)) {
4047 			cookie_idx++;
4048 			memseg->cookies[cookie_idx].addr =
4049 				IDX2COOKIE(index, pg_size_code, pg_shift);
4050 			memseg->cookies[cookie_idx].addr |= poffset;
4051 			memseg->cookies[cookie_idx].size = psize;
4052 
4053 		} else {
4054 			memseg->cookies[cookie_idx].size += psize;
4055 		}
4056 
4057 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4058 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4059 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4060 		    ldcp->id, addr, index, raddr, psize, poffset);
4061 
4062 		/* decrement number of available entries */
4063 		mtbl->num_avail--;
4064 
4065 		/* increment va by page size */
4066 		addr += pg_size;
4067 
4068 		/* increment index */
4069 		prev_index = index;
4070 		index = (index + 1) % mtbl->num_entries;
4071 
4072 		/* save the next slot */
4073 		mtbl->next_entry = index;
4074 	}
4075 
4076 	mutex_exit(&mtbl->lock);
4077 
4078 	/* memory handle = bound */
4079 	mhdl->mtype = mtype;
4080 	mhdl->perm = perm;
4081 	mhdl->status = LDC_BOUND;
4082 
4083 	/* update memseg_t */
4084 	memseg->vaddr = vaddr;
4085 	memseg->raddr = memseg->pages[0].raddr;
4086 	memseg->size = len;
4087 	memseg->npages = npages;
4088 	memseg->ncookies = cookie_idx + 1;
4089 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4090 
4091 	/* return count and first cookie */
4092 	*ccount = memseg->ncookies;
4093 	cookie->addr = memseg->cookies[0].addr;
4094 	cookie->size = memseg->cookies[0].size;
4095 
4096 	D1(ldcp->id,
4097 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4098 	    "pgs=0x%llx cookies=0x%llx\n",
4099 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4100 
4101 	mutex_exit(&mhdl->lock);
4102 	return (0);
4103 }
4104 
4105 /*
4106  * Return the next cookie associated with the specified memory handle
4107  */
4108 int
4109 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4110 {
4111 	ldc_mhdl_t	*mhdl;
4112 	ldc_chan_t 	*ldcp;
4113 	ldc_memseg_t	*memseg;
4114 
4115 	if (mhandle == NULL) {
4116 		DWARN(DBG_ALL_LDCS,
4117 		    "ldc_mem_nextcookie: invalid memory handle\n");
4118 		return (EINVAL);
4119 	}
4120 	mhdl = (ldc_mhdl_t *)mhandle;
4121 
4122 	mutex_enter(&mhdl->lock);
4123 
4124 	ldcp = mhdl->ldcp;
4125 	memseg = mhdl->memseg;
4126 
4127 	if (cookie == 0) {
4128 		DWARN(ldcp->id,
4129 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4130 		    ldcp->id);
4131 		mutex_exit(&mhdl->lock);
4132 		return (EINVAL);
4133 	}
4134 
4135 	if (memseg->next_cookie != 0) {
4136 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4137 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4138 		memseg->next_cookie++;
4139 		if (memseg->next_cookie == memseg->ncookies)
4140 			memseg->next_cookie = 0;
4141 
4142 	} else {
4143 		DWARN(ldcp->id,
4144 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4145 		cookie->addr = 0;
4146 		cookie->size = 0;
4147 		mutex_exit(&mhdl->lock);
4148 		return (EINVAL);
4149 	}
4150 
4151 	D1(ldcp->id,
4152 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4153 	    ldcp->id, cookie->addr, cookie->size);
4154 
4155 	mutex_exit(&mhdl->lock);
4156 	return (0);
4157 }
4158 
4159 /*
4160  * Unbind the virtual memory region associated with the specified
4161  * memory handle. Allassociated cookies are freed and the corresponding
4162  * RA space is no longer exported.
4163  */
4164 int
4165 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4166 {
4167 	ldc_mhdl_t	*mhdl;
4168 	ldc_chan_t 	*ldcp;
4169 	ldc_mtbl_t	*mtbl;
4170 	ldc_memseg_t	*memseg;
4171 	int		i;
4172 
4173 	if (mhandle == NULL) {
4174 		DWARN(DBG_ALL_LDCS,
4175 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4176 		return (EINVAL);
4177 	}
4178 	mhdl = (ldc_mhdl_t *)mhandle;
4179 
4180 	mutex_enter(&mhdl->lock);
4181 
4182 	if (mhdl->status == LDC_UNBOUND) {
4183 		DWARN(DBG_ALL_LDCS,
4184 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4185 		    mhandle);
4186 		mutex_exit(&mhdl->lock);
4187 		return (EINVAL);
4188 	}
4189 
4190 	ldcp = mhdl->ldcp;
4191 	mtbl = ldcp->mtbl;
4192 
4193 	memseg = mhdl->memseg;
4194 
4195 	/* lock the memory table - exclusive access to channel */
4196 	mutex_enter(&mtbl->lock);
4197 
4198 	/* undo the pages exported */
4199 	for (i = 0; i < memseg->npages; i++) {
4200 
4201 		/* FUTURE: check for mapped pages */
4202 		if (memseg->pages[i].mte->cookie) {
4203 			_NOTE(EMPTY)
4204 		}
4205 
4206 		/* clear the entry from the table */
4207 		memseg->pages[i].mte->entry.ll = 0;
4208 		mtbl->num_avail++;
4209 	}
4210 	mutex_exit(&mtbl->lock);
4211 
4212 	/* free the allocated memseg and page structures */
4213 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4214 	kmem_free(memseg->cookies,
4215 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4216 	kmem_free(memseg, sizeof (ldc_memseg_t));
4217 
4218 	/* uninitialize the memory handle */
4219 	mhdl->memseg = NULL;
4220 	mhdl->status = LDC_UNBOUND;
4221 
4222 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4223 	    ldcp->id, mhdl);
4224 
4225 	mutex_exit(&mhdl->lock);
4226 	return (0);
4227 }
4228 
4229 /*
4230  * Get information about the dring. The base address of the descriptor
4231  * ring along with the type and permission are returned back.
4232  */
4233 int
4234 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4235 {
4236 	ldc_mhdl_t	*mhdl;
4237 
4238 	if (mhandle == NULL) {
4239 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4240 		return (EINVAL);
4241 	}
4242 	mhdl = (ldc_mhdl_t *)mhandle;
4243 
4244 	if (minfo == NULL) {
4245 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4246 		return (EINVAL);
4247 	}
4248 
4249 	mutex_enter(&mhdl->lock);
4250 
4251 	minfo->status = mhdl->status;
4252 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4253 		minfo->vaddr = mhdl->memseg->vaddr;
4254 		minfo->raddr = mhdl->memseg->raddr;
4255 		minfo->mtype = mhdl->mtype;
4256 		minfo->perm = mhdl->perm;
4257 	}
4258 	mutex_exit(&mhdl->lock);
4259 
4260 	return (0);
4261 }
4262 
4263 /*
4264  * Copy data either from or to the client specified virtual address
4265  * space to or from the exported memory associated with the cookies.
4266  * The direction argument determines whether the data is read from or
4267  * written to exported memory.
4268  */
4269 int
4270 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4271     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4272 {
4273 	ldc_chan_t 	*ldcp;
4274 	uint64_t	local_voff, local_valign;
4275 	uint64_t	cookie_addr, cookie_size;
4276 	uint64_t	pg_shift, pg_size, pg_size_code;
4277 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4278 	uint64_t	local_ra, local_poff, local_psize;
4279 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4280 	pgcnt_t		npages;
4281 	size_t		len = *size;
4282 	int 		i, rv = 0;
4283 
4284 	if (handle == NULL) {
4285 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4286 		return (EINVAL);
4287 	}
4288 	ldcp = (ldc_chan_t *)handle;
4289 
4290 	mutex_enter(&ldcp->lock);
4291 
4292 	/* check to see if channel is UP */
4293 	if (ldcp->tstate != TS_UP) {
4294 		DWARN(ldcp->id, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4295 		    ldcp->id);
4296 		mutex_exit(&ldcp->lock);
4297 		return (EINVAL);
4298 	}
4299 
4300 	/* Force address and size to be 8-byte aligned */
4301 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4302 		DWARN(ldcp->id,
4303 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4304 		mutex_exit(&ldcp->lock);
4305 		return (EINVAL);
4306 	}
4307 
4308 	/* Find the size of the exported memory */
4309 	export_size = 0;
4310 	for (i = 0; i < ccount; i++)
4311 		export_size += cookies[i].size;
4312 
4313 	/* check to see if offset is valid */
4314 	if (off > export_size) {
4315 		DWARN(ldcp->id,
4316 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4317 		    ldcp->id);
4318 		mutex_exit(&ldcp->lock);
4319 		return (EINVAL);
4320 	}
4321 
4322 	/*
4323 	 * Check to see if the export size is smaller than the size we
4324 	 * are requesting to copy - if so flag an error
4325 	 */
4326 	if ((export_size - off) < *size) {
4327 		DWARN(ldcp->id,
4328 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4329 		    ldcp->id);
4330 		mutex_exit(&ldcp->lock);
4331 		return (EINVAL);
4332 	}
4333 
4334 	total_bal = min(export_size, *size);
4335 
4336 	/* FUTURE: get the page size, pgsz code, and shift */
4337 	pg_size = MMU_PAGESIZE;
4338 	pg_size_code = page_szc(pg_size);
4339 	pg_shift = page_get_shift(pg_size_code);
4340 
4341 	D1(ldcp->id, "ldc_mem_copy: copying data "
4342 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4343 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4344 
4345 	/* aligned VA and its offset */
4346 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4347 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4348 
4349 	npages = (len+local_voff)/pg_size;
4350 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4351 
4352 	D1(ldcp->id,
4353 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4354 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4355 
4356 	local_ra = va_to_pa((void *)local_valign);
4357 	local_poff = local_voff;
4358 	local_psize = min(len, (pg_size - local_voff));
4359 
4360 	len -= local_psize;
4361 
4362 	/*
4363 	 * find the first cookie in the list of cookies
4364 	 * if the offset passed in is not zero
4365 	 */
4366 	for (idx = 0; idx < ccount; idx++) {
4367 		cookie_size = cookies[idx].size;
4368 		if (off < cookie_size)
4369 			break;
4370 		off -= cookie_size;
4371 	}
4372 
4373 	cookie_addr = cookies[idx].addr + off;
4374 	cookie_size = cookies[idx].size - off;
4375 
4376 	export_caddr = cookie_addr & ~(pg_size - 1);
4377 	export_poff = cookie_addr & (pg_size - 1);
4378 	export_psize = min(cookie_size, (pg_size - export_poff));
4379 
4380 	for (;;) {
4381 
4382 		copy_size = min(export_psize, local_psize);
4383 
4384 		D1(ldcp->id,
4385 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4386 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4387 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4388 		    " total_bal=0x%llx\n",
4389 		    ldcp->id, direction, export_caddr, local_ra, export_poff,
4390 		    local_poff, export_psize, local_psize, copy_size,
4391 		    total_bal);
4392 
4393 		rv = hv_ldc_copy(ldcp->id, direction,
4394 		    (export_caddr + export_poff), (local_ra + local_poff),
4395 		    copy_size, &copied_len);
4396 
4397 		if (rv != 0) {
4398 			cmn_err(CE_WARN,
4399 			    "ldc_mem_copy: (0x%lx) err %d during copy\n",
4400 			    ldcp->id, rv);
4401 			DWARN(DBG_ALL_LDCS,
4402 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%llx, "
4403 			    "loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4404 			    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4405 			    " copied_len=0x%llx, total_bal=0x%llx\n",
4406 			    ldcp->id, direction, export_caddr, local_ra,
4407 			    export_poff, local_poff, export_psize, local_psize,
4408 			    copy_size, copied_len, total_bal);
4409 
4410 			*size = *size - total_bal;
4411 			mutex_exit(&ldcp->lock);
4412 			return (EIO);
4413 		}
4414 
4415 		ASSERT(copied_len <= copy_size);
4416 
4417 		D2(ldcp->id, "ldc_mem_copy: copied=0x%llx\n", copied_len);
4418 		export_poff += copied_len;
4419 		local_poff += copied_len;
4420 		export_psize -= copied_len;
4421 		local_psize -= copied_len;
4422 		cookie_size -= copied_len;
4423 
4424 		total_bal -= copied_len;
4425 
4426 		if (copy_size != copied_len)
4427 			continue;
4428 
4429 		if (export_psize == 0 && total_bal != 0) {
4430 
4431 			if (cookie_size == 0) {
4432 				idx++;
4433 				cookie_addr = cookies[idx].addr;
4434 				cookie_size = cookies[idx].size;
4435 
4436 				export_caddr = cookie_addr & ~(pg_size - 1);
4437 				export_poff = cookie_addr & (pg_size - 1);
4438 				export_psize =
4439 					min(cookie_size, (pg_size-export_poff));
4440 			} else {
4441 				export_caddr += pg_size;
4442 				export_poff = 0;
4443 				export_psize = min(cookie_size, pg_size);
4444 			}
4445 		}
4446 
4447 		if (local_psize == 0 && total_bal != 0) {
4448 			local_valign += pg_size;
4449 			local_ra = va_to_pa((void *)local_valign);
4450 			local_poff = 0;
4451 			local_psize = min(pg_size, len);
4452 			len -= local_psize;
4453 		}
4454 
4455 		/* check if we are all done */
4456 		if (total_bal == 0)
4457 			break;
4458 	}
4459 
4460 	mutex_exit(&ldcp->lock);
4461 
4462 	D1(ldcp->id,
4463 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
4464 	    ldcp->id, *size);
4465 
4466 	return (0);
4467 }
4468 
4469 /*
4470  * Copy data either from or to the client specified virtual address
4471  * space to or from HV physical memory.
4472  *
4473  * The direction argument determines whether the data is read from or
4474  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
4475  * to the ldc_mem_copy interface
4476  */
4477 int
4478 ldc_mem_rdwr_pa(ldc_handle_t handle, caddr_t vaddr, size_t *size,
4479     caddr_t paddr, uint8_t direction)
4480 {
4481 	ldc_chan_t 	*ldcp;
4482 	uint64_t	local_voff, local_valign;
4483 	uint64_t	pg_shift, pg_size, pg_size_code;
4484 	uint64_t 	target_pa, target_poff, target_psize, target_size;
4485 	uint64_t	local_ra, local_poff, local_psize;
4486 	uint64_t	copy_size, copied_len = 0;
4487 	pgcnt_t		npages;
4488 	size_t		len = *size;
4489 	int 		rv = 0;
4490 
4491 	if (handle == NULL) {
4492 		DWARN(DBG_ALL_LDCS,
4493 		    "ldc_mem_rdwr_pa: invalid channel handle\n");
4494 		return (EINVAL);
4495 	}
4496 	ldcp = (ldc_chan_t *)handle;
4497 
4498 	mutex_enter(&ldcp->lock);
4499 
4500 	/* check to see if channel is UP */
4501 	if (ldcp->tstate != TS_UP) {
4502 		DWARN(ldcp->id,
4503 		    "ldc_mem_rdwr_pa: (0x%llx) channel is not UP\n",
4504 		    ldcp->id);
4505 		mutex_exit(&ldcp->lock);
4506 		return (EINVAL);
4507 	}
4508 
4509 	/* Force address and size to be 8-byte aligned */
4510 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4511 		DWARN(ldcp->id,
4512 		    "ldc_mem_rdwr_pa: addr/size is not 8-byte aligned\n");
4513 		mutex_exit(&ldcp->lock);
4514 		return (EINVAL);
4515 	}
4516 
4517 	target_size = *size;
4518 
4519 	/* FUTURE: get the page size, pgsz code, and shift */
4520 	pg_size = MMU_PAGESIZE;
4521 	pg_size_code = page_szc(pg_size);
4522 	pg_shift = page_get_shift(pg_size_code);
4523 
4524 	D1(ldcp->id, "ldc_mem_rdwr_pa: copying data "
4525 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4526 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4527 
4528 	/* aligned VA and its offset */
4529 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
4530 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4531 
4532 	npages = (len + local_voff) / pg_size;
4533 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
4534 
4535 	D1(ldcp->id,
4536 	    "ldc_mem_rdwr_pa: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4537 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4538 
4539 	local_ra = va_to_pa((void *)local_valign);
4540 	local_poff = local_voff;
4541 	local_psize = min(len, (pg_size - local_voff));
4542 
4543 	len -= local_psize;
4544 
4545 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
4546 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
4547 	target_psize = pg_size - target_poff;
4548 
4549 	for (;;) {
4550 
4551 		copy_size = min(target_psize, local_psize);
4552 
4553 		D1(ldcp->id,
4554 		    "ldc_mem_rdwr_pa: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
4555 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4556 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4557 		    " total_bal=0x%llx\n",
4558 		    ldcp->id, direction, target_pa, local_ra, target_poff,
4559 		    local_poff, target_psize, local_psize, copy_size,
4560 		    target_size);
4561 
4562 		rv = hv_ldc_copy(ldcp->id, direction,
4563 		    (target_pa + target_poff), (local_ra + local_poff),
4564 		    copy_size, &copied_len);
4565 
4566 		if (rv != 0) {
4567 			cmn_err(CE_WARN,
4568 			    "ldc_mem_rdwr_pa: (0x%lx) err %d during copy\n",
4569 			    ldcp->id, rv);
4570 			DWARN(DBG_ALL_LDCS,
4571 			    "ldc_mem_rdwr_pa: (0x%llx) dir=%lld,tar_pa=0x%llx, "
4572 			    "loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4573 			    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4574 			    " total_bal=0x%llx\n",
4575 			    ldcp->id, direction, target_pa, local_ra,
4576 			    target_poff, local_poff, target_psize, local_psize,
4577 			    copy_size, target_size);
4578 
4579 			*size = *size - target_size;
4580 			mutex_exit(&ldcp->lock);
4581 			return (i_ldc_h2v_error(rv));
4582 		}
4583 
4584 		D2(ldcp->id, "ldc_mem_rdwr_pa: copied=0x%llx\n", copied_len);
4585 		target_poff += copied_len;
4586 		local_poff += copied_len;
4587 		target_psize -= copied_len;
4588 		local_psize -= copied_len;
4589 
4590 		target_size -= copied_len;
4591 
4592 		if (copy_size != copied_len)
4593 			continue;
4594 
4595 		if (target_psize == 0 && target_size != 0) {
4596 			target_pa += pg_size;
4597 			target_poff = 0;
4598 			target_psize = min(pg_size, target_size);
4599 		}
4600 
4601 		if (local_psize == 0 && target_size != 0) {
4602 			local_valign += pg_size;
4603 			local_ra = va_to_pa((void *)local_valign);
4604 			local_poff = 0;
4605 			local_psize = min(pg_size, len);
4606 			len -= local_psize;
4607 		}
4608 
4609 		/* check if we are all done */
4610 		if (target_size == 0)
4611 			break;
4612 	}
4613 
4614 	mutex_exit(&ldcp->lock);
4615 
4616 	D1(ldcp->id, "ldc_mem_rdwr_pa: (0x%llx) done copying sz=0x%llx\n",
4617 	    ldcp->id, *size);
4618 
4619 	return (0);
4620 }
4621 
4622 /*
4623  * Map an exported memory segment into the local address space. If the
4624  * memory range was exported for direct map access, a HV call is made
4625  * to allocate a RA range. If the map is done via a shadow copy, local
4626  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
4627  * the mapping is a direct map then the RA is returned in 'raddr'.
4628  */
4629 int
4630 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
4631     uint8_t mtype, caddr_t *vaddr, caddr_t *raddr)
4632 {
4633 	int		i, idx;
4634 	ldc_chan_t 	*ldcp;
4635 	ldc_mhdl_t	*mhdl;
4636 	ldc_memseg_t	*memseg;
4637 	caddr_t		shadow_base = NULL, tmpaddr;
4638 	uint64_t	pg_size, pg_shift, pg_size_code;
4639 	uint64_t	exp_size = 0, npages;
4640 
4641 	if (mhandle == NULL) {
4642 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
4643 		return (EINVAL);
4644 	}
4645 	mhdl = (ldc_mhdl_t *)mhandle;
4646 
4647 	mutex_enter(&mhdl->lock);
4648 
4649 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
4650 	    mhdl->memseg != NULL) {
4651 		DWARN(DBG_ALL_LDCS,
4652 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
4653 		mutex_exit(&mhdl->lock);
4654 		return (EINVAL);
4655 	}
4656 
4657 	ldcp = mhdl->ldcp;
4658 
4659 	mutex_enter(&ldcp->lock);
4660 
4661 	if (ldcp->tstate != TS_UP) {
4662 		DWARN(ldcp->id,
4663 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
4664 		    ldcp->id);
4665 		mutex_exit(&ldcp->lock);
4666 		mutex_exit(&mhdl->lock);
4667 		return (EINVAL);
4668 	}
4669 
4670 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
4671 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
4672 		mutex_exit(&ldcp->lock);
4673 		mutex_exit(&mhdl->lock);
4674 		return (EINVAL);
4675 	}
4676 
4677 	if (mtype == LDC_SHADOW_MAP && vaddr == NULL) {
4678 		DWARN(ldcp->id,
4679 		    "ldc_mem_map: invalid vaddr arg0x%llx\n", vaddr);
4680 		mutex_exit(&ldcp->lock);
4681 		mutex_exit(&mhdl->lock);
4682 		return (EINVAL);
4683 	}
4684 
4685 	if (mtype == LDC_SHADOW_MAP &&
4686 	    (vaddr) && ((uintptr_t)(*vaddr) & MMU_PAGEOFFSET)) {
4687 		DWARN(ldcp->id,
4688 		    "ldc_mem_map: vaddr not page aligned, 0x%llx\n", *vaddr);
4689 		mutex_exit(&ldcp->lock);
4690 		mutex_exit(&mhdl->lock);
4691 		return (EINVAL);
4692 	}
4693 
4694 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
4695 	    mhandle, cookie->addr, cookie->size);
4696 
4697 	/* FUTURE: get the page size, pgsz code, and shift */
4698 	pg_size = MMU_PAGESIZE;
4699 	pg_size_code = page_szc(pg_size);
4700 	pg_shift = page_get_shift(pg_size_code);
4701 
4702 	/* calculate the number of pages in the exported cookie */
4703 	for (idx = 0; idx < ccount; idx++) {
4704 		if (cookie[idx].addr & MMU_PAGEOFFSET ||
4705 			cookie[idx].size & MMU_PAGEOFFSET) {
4706 			DWARN(ldcp->id,
4707 			    "ldc_mem_map: cookie addr/size not page aligned, "
4708 			    "0x%llx\n", cookie[idx].addr);
4709 			mutex_exit(&ldcp->lock);
4710 			mutex_exit(&mhdl->lock);
4711 			return (EINVAL);
4712 		}
4713 		exp_size += cookie[idx].size;
4714 	}
4715 	npages = (exp_size >> pg_shift);
4716 
4717 	/* Allocate memseg structure */
4718 	memseg = mhdl->memseg =	kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
4719 
4720 	/* Allocate memory to store all pages and cookies */
4721 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4722 	memseg->cookies =
4723 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
4724 
4725 	D2(ldcp->id, "ldc_mem_map: (0x%llx) processing 0x%llx pages\n",
4726 	    ldcp->id, npages);
4727 
4728 	/* Check to see if the client is requesting direct or shadow map */
4729 	if (mtype == LDC_SHADOW_MAP) {
4730 		if (*vaddr == NULL) {
4731 			shadow_base =
4732 				contig_mem_alloc_align(exp_size, PAGESIZE);
4733 			if (shadow_base == NULL) {
4734 				cmn_err(CE_WARN, "ldc_mem_map: shadow memory "
4735 				    "allocation failed\n");
4736 				kmem_free(memseg->cookies,
4737 				    (sizeof (ldc_mem_cookie_t) * ccount));
4738 				kmem_free(memseg->pages,
4739 				    (sizeof (ldc_page_t) * npages));
4740 				kmem_free(memseg, sizeof (ldc_memseg_t));
4741 				mutex_exit(&ldcp->lock);
4742 				mutex_exit(&mhdl->lock);
4743 				return (ENOMEM);
4744 			}
4745 
4746 			bzero(shadow_base, exp_size);
4747 			mhdl->myshadow = B_TRUE;
4748 
4749 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
4750 			    "shadow page va=0x%llx\n", ldcp->id, shadow_base);
4751 		} else {
4752 			/*
4753 			 * Use client supplied memory for shadow_base
4754 			 * WARNING: assuming that client mem is >= exp_size
4755 			 */
4756 			shadow_base = *vaddr;
4757 		}
4758 	} else if (mtype == LDC_DIRECT_MAP) {
4759 		/* FUTURE: Do a direct map by calling into HV */
4760 		_NOTE(EMPTY)
4761 	}
4762 
4763 	/* Save all page and cookie information */
4764 	for (i = 0, tmpaddr = shadow_base; i < npages; i++) {
4765 		memseg->pages[i].raddr = va_to_pa(tmpaddr);
4766 		memseg->pages[i].size = pg_size;
4767 		memseg->pages[i].index = 0;
4768 		memseg->pages[i].offset = 0;
4769 		memseg->pages[i].mte = NULL;
4770 		tmpaddr += pg_size;
4771 	}
4772 	for (i = 0; i < ccount; i++) {
4773 		memseg->cookies[i].addr = cookie[i].addr;
4774 		memseg->cookies[i].size = cookie[i].size;
4775 	}
4776 
4777 	/* update memseg_t */
4778 	memseg->vaddr = shadow_base;
4779 	memseg->raddr = memseg->pages[0].raddr;
4780 	memseg->size = exp_size;
4781 	memseg->npages = npages;
4782 	memseg->ncookies = ccount;
4783 	memseg->next_cookie = 0;
4784 
4785 	/* memory handle = mapped */
4786 	mhdl->mtype = mtype;
4787 	mhdl->perm = 0;
4788 	mhdl->status = LDC_MAPPED;
4789 
4790 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
4791 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
4792 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
4793 	    memseg->npages, memseg->ncookies);
4794 
4795 	if (raddr)
4796 		*raddr = (caddr_t)memseg->raddr;
4797 	if (vaddr)
4798 		*vaddr = memseg->vaddr;
4799 
4800 	mutex_exit(&ldcp->lock);
4801 	mutex_exit(&mhdl->lock);
4802 	return (0);
4803 }
4804 
4805 /*
4806  * Unmap a memory segment. Free shadow memory (if any).
4807  */
4808 int
4809 ldc_mem_unmap(ldc_mem_handle_t mhandle)
4810 {
4811 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
4812 	ldc_chan_t 	*ldcp;
4813 	ldc_memseg_t	*memseg;
4814 
4815 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
4816 		DWARN(DBG_ALL_LDCS,
4817 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
4818 		    mhandle);
4819 		return (EINVAL);
4820 	}
4821 
4822 	mutex_enter(&mhdl->lock);
4823 
4824 	ldcp = mhdl->ldcp;
4825 	memseg = mhdl->memseg;
4826 
4827 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
4828 	    ldcp->id, mhdl);
4829 
4830 	/* if we allocated shadow memory - free it */
4831 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
4832 		contig_mem_free(memseg->vaddr, memseg->size);
4833 	}
4834 
4835 	/* free the allocated memseg and page structures */
4836 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4837 	kmem_free(memseg->cookies,
4838 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
4839 	kmem_free(memseg, sizeof (ldc_memseg_t));
4840 
4841 	/* uninitialize the memory handle */
4842 	mhdl->memseg = NULL;
4843 	mhdl->status = LDC_UNBOUND;
4844 
4845 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
4846 	    ldcp->id, mhdl);
4847 
4848 	mutex_exit(&mhdl->lock);
4849 	return (0);
4850 }
4851 
4852 /*
4853  * Internal entry point for LDC mapped memory entry consistency
4854  * semantics. Acquire copies the contents of the remote memory
4855  * into the local shadow copy. The release operation copies the local
4856  * contents into the remote memory. The offset and size specify the
4857  * bounds for the memory range being synchronized.
4858  */
4859 static int
4860 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
4861     uint64_t offset, size_t size)
4862 {
4863 	int 		err;
4864 	ldc_mhdl_t	*mhdl;
4865 	ldc_chan_t	*ldcp;
4866 	ldc_memseg_t	*memseg;
4867 	caddr_t		local_vaddr;
4868 	size_t		copy_size;
4869 
4870 	if (mhandle == NULL) {
4871 		DWARN(DBG_ALL_LDCS,
4872 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
4873 		return (EINVAL);
4874 	}
4875 	mhdl = (ldc_mhdl_t *)mhandle;
4876 
4877 	mutex_enter(&mhdl->lock);
4878 
4879 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
4880 		DWARN(DBG_ALL_LDCS,
4881 		    "i_ldc_mem_acquire_release: not mapped memory\n");
4882 		mutex_exit(&mhdl->lock);
4883 		return (EINVAL);
4884 	}
4885 
4886 	if (offset >= mhdl->memseg->size ||
4887 	    (offset + size) > mhdl->memseg->size) {
4888 		DWARN(DBG_ALL_LDCS,
4889 		    "i_ldc_mem_acquire_release: memory out of range\n");
4890 		mutex_exit(&mhdl->lock);
4891 		return (EINVAL);
4892 	}
4893 
4894 	/* get the channel handle and memory segment */
4895 	ldcp = mhdl->ldcp;
4896 	memseg = mhdl->memseg;
4897 
4898 	if (mhdl->mtype == LDC_SHADOW_MAP) {
4899 
4900 		local_vaddr = memseg->vaddr + offset;
4901 		copy_size = size;
4902 
4903 		/* copy to/from remote from/to local memory */
4904 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
4905 		    &copy_size, memseg->cookies, memseg->ncookies,
4906 		    direction);
4907 		if (err || copy_size != size) {
4908 			cmn_err(CE_WARN,
4909 			    "i_ldc_mem_acquire_release: copy failed\n");
4910 			mutex_exit(&mhdl->lock);
4911 			return (err);
4912 		}
4913 	}
4914 
4915 	mutex_exit(&mhdl->lock);
4916 
4917 	return (0);
4918 }
4919 
4920 /*
4921  * Ensure that the contents in the remote memory seg are consistent
4922  * with the contents if of local segment
4923  */
4924 int
4925 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
4926 {
4927 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
4928 }
4929 
4930 
4931 /*
4932  * Ensure that the contents in the local memory seg are consistent
4933  * with the contents if of remote segment
4934  */
4935 int
4936 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
4937 {
4938 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
4939 }
4940 
4941 /*
4942  * Allocate a descriptor ring. The size of each each descriptor
4943  * must be 8-byte aligned and the entire ring should be a multiple
4944  * of MMU_PAGESIZE.
4945  */
4946 int
4947 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
4948 {
4949 	ldc_dring_t *dringp;
4950 	size_t size = (dsize * len);
4951 
4952 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
4953 	    len, dsize);
4954 
4955 	if (dhandle == NULL) {
4956 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
4957 		return (EINVAL);
4958 	}
4959 
4960 	if (len == 0) {
4961 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
4962 		return (EINVAL);
4963 	}
4964 
4965 	/* descriptor size should be 8-byte aligned */
4966 	if (dsize == 0 || (dsize & 0x7)) {
4967 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
4968 		return (EINVAL);
4969 	}
4970 
4971 	*dhandle = 0;
4972 
4973 	/* Allocate a desc ring structure */
4974 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
4975 
4976 	/* Initialize dring */
4977 	dringp->length = len;
4978 	dringp->dsize = dsize;
4979 
4980 	/* round off to multiple of pagesize */
4981 	dringp->size = (size & MMU_PAGEMASK);
4982 	if (size & MMU_PAGEOFFSET)
4983 		dringp->size += MMU_PAGESIZE;
4984 
4985 	dringp->status = LDC_UNBOUND;
4986 
4987 	/* allocate descriptor ring memory */
4988 	dringp->base = contig_mem_alloc_align(dringp->size, PAGESIZE);
4989 	if (dringp->base == NULL) {
4990 		cmn_err(CE_WARN,
4991 		    "ldc_mem_dring_create: unable to alloc desc\n");
4992 		kmem_free(dringp, sizeof (ldc_dring_t));
4993 		return (ENOMEM);
4994 	}
4995 
4996 	bzero(dringp->base, dringp->size);
4997 
4998 	/* initialize the desc ring lock */
4999 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5000 
5001 	/* Add descriptor ring to the head of global list */
5002 	mutex_enter(&ldcssp->lock);
5003 	dringp->next = ldcssp->dring_list;
5004 	ldcssp->dring_list = dringp;
5005 	mutex_exit(&ldcssp->lock);
5006 
5007 	*dhandle = (ldc_dring_handle_t)dringp;
5008 
5009 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
5010 
5011 	return (0);
5012 }
5013 
5014 
5015 /*
5016  * Destroy a descriptor ring.
5017  */
5018 int
5019 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5020 {
5021 	ldc_dring_t *dringp;
5022 	ldc_dring_t *tmp_dringp;
5023 
5024 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5025 
5026 	if (dhandle == NULL) {
5027 		DWARN(DBG_ALL_LDCS,
5028 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5029 		return (EINVAL);
5030 	}
5031 	dringp = (ldc_dring_t *)dhandle;
5032 
5033 	if (dringp->status == LDC_BOUND) {
5034 		DWARN(DBG_ALL_LDCS,
5035 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5036 		return (EACCES);
5037 	}
5038 
5039 	mutex_enter(&dringp->lock);
5040 	mutex_enter(&ldcssp->lock);
5041 
5042 	/* remove from linked list - if not bound */
5043 	tmp_dringp = ldcssp->dring_list;
5044 	if (tmp_dringp == dringp) {
5045 		ldcssp->dring_list = dringp->next;
5046 		dringp->next = NULL;
5047 
5048 	} else {
5049 		while (tmp_dringp != NULL) {
5050 			if (tmp_dringp->next == dringp) {
5051 				tmp_dringp->next = dringp->next;
5052 				dringp->next = NULL;
5053 				break;
5054 			}
5055 			tmp_dringp = tmp_dringp->next;
5056 		}
5057 		if (tmp_dringp == NULL) {
5058 			DWARN(DBG_ALL_LDCS,
5059 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5060 			mutex_exit(&ldcssp->lock);
5061 			mutex_exit(&dringp->lock);
5062 			return (EINVAL);
5063 		}
5064 	}
5065 
5066 	mutex_exit(&ldcssp->lock);
5067 
5068 	/* free the descriptor ring */
5069 	contig_mem_free((caddr_t)dringp->base, dringp->size);
5070 
5071 	mutex_exit(&dringp->lock);
5072 
5073 	/* destroy dring lock */
5074 	mutex_destroy(&dringp->lock);
5075 
5076 	/* free desc ring object */
5077 	kmem_free(dringp, sizeof (ldc_dring_t));
5078 
5079 	return (0);
5080 }
5081 
5082 /*
5083  * Bind a previously allocated dring to a channel. The channel should
5084  * be OPEN in order to bind the ring to the channel. Returns back a
5085  * descriptor ring cookie. The descriptor ring is exported for remote
5086  * access by the client at the other end of the channel. An entry for
5087  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5088  */
5089 int
5090 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5091     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5092 {
5093 	int		err;
5094 	ldc_chan_t 	*ldcp;
5095 	ldc_dring_t	*dringp;
5096 	ldc_mem_handle_t mhandle;
5097 
5098 	/* check to see if channel is initalized */
5099 	if (handle == NULL) {
5100 		DWARN(DBG_ALL_LDCS,
5101 		    "ldc_mem_dring_bind: invalid channel handle\n");
5102 		return (EINVAL);
5103 	}
5104 	ldcp = (ldc_chan_t *)handle;
5105 
5106 	if (dhandle == NULL) {
5107 		DWARN(DBG_ALL_LDCS,
5108 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5109 		return (EINVAL);
5110 	}
5111 	dringp = (ldc_dring_t *)dhandle;
5112 
5113 	if (cookie == NULL) {
5114 		DWARN(ldcp->id,
5115 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5116 		return (EINVAL);
5117 	}
5118 
5119 	mutex_enter(&dringp->lock);
5120 
5121 	if (dringp->status == LDC_BOUND) {
5122 		DWARN(DBG_ALL_LDCS,
5123 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5124 		    ldcp->id);
5125 		mutex_exit(&dringp->lock);
5126 		return (EINVAL);
5127 	}
5128 
5129 	if ((perm & LDC_MEM_RW) == 0) {
5130 		DWARN(DBG_ALL_LDCS,
5131 		    "ldc_mem_dring_bind: invalid permissions\n");
5132 		mutex_exit(&dringp->lock);
5133 		return (EINVAL);
5134 	}
5135 
5136 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5137 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5138 		mutex_exit(&dringp->lock);
5139 		return (EINVAL);
5140 	}
5141 
5142 	dringp->ldcp = ldcp;
5143 
5144 	/* create an memory handle */
5145 	err = ldc_mem_alloc_handle(handle, &mhandle);
5146 	if (err || mhandle == NULL) {
5147 		DWARN(DBG_ALL_LDCS,
5148 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5149 		    ldcp->id);
5150 		mutex_exit(&dringp->lock);
5151 		return (err);
5152 	}
5153 	dringp->mhdl = mhandle;
5154 
5155 	/* bind the descriptor ring to channel */
5156 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5157 	    mtype, perm, cookie, ccount);
5158 	if (err) {
5159 		DWARN(ldcp->id,
5160 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5161 		    ldcp->id);
5162 		mutex_exit(&dringp->lock);
5163 		return (err);
5164 	}
5165 
5166 	/*
5167 	 * For now return error if we get more than one cookie
5168 	 * FUTURE: Return multiple cookies ..
5169 	 */
5170 	if (*ccount > 1) {
5171 		(void) ldc_mem_unbind_handle(mhandle);
5172 		(void) ldc_mem_free_handle(mhandle);
5173 
5174 		dringp->ldcp = NULL;
5175 		dringp->mhdl = NULL;
5176 		*ccount = 0;
5177 
5178 		mutex_exit(&dringp->lock);
5179 		return (EAGAIN);
5180 	}
5181 
5182 	/* Add descriptor ring to channel's exported dring list */
5183 	mutex_enter(&ldcp->exp_dlist_lock);
5184 	dringp->ch_next = ldcp->exp_dring_list;
5185 	ldcp->exp_dring_list = dringp;
5186 	mutex_exit(&ldcp->exp_dlist_lock);
5187 
5188 	dringp->status = LDC_BOUND;
5189 
5190 	mutex_exit(&dringp->lock);
5191 
5192 	return (0);
5193 }
5194 
5195 /*
5196  * Return the next cookie associated with the specified dring handle
5197  */
5198 int
5199 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5200 {
5201 	int		rv = 0;
5202 	ldc_dring_t 	*dringp;
5203 	ldc_chan_t	*ldcp;
5204 
5205 	if (dhandle == NULL) {
5206 		DWARN(DBG_ALL_LDCS,
5207 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5208 		return (EINVAL);
5209 	}
5210 	dringp = (ldc_dring_t *)dhandle;
5211 	mutex_enter(&dringp->lock);
5212 
5213 	if (dringp->status != LDC_BOUND) {
5214 		DWARN(DBG_ALL_LDCS,
5215 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5216 		    "is not bound\n", dringp);
5217 		mutex_exit(&dringp->lock);
5218 		return (EINVAL);
5219 	}
5220 
5221 	ldcp = dringp->ldcp;
5222 
5223 	if (cookie == NULL) {
5224 		DWARN(ldcp->id,
5225 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5226 		    ldcp->id);
5227 		mutex_exit(&dringp->lock);
5228 		return (EINVAL);
5229 	}
5230 
5231 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5232 	mutex_exit(&dringp->lock);
5233 
5234 	return (rv);
5235 }
5236 /*
5237  * Unbind a previously bound dring from a channel.
5238  */
5239 int
5240 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5241 {
5242 	ldc_dring_t 	*dringp;
5243 	ldc_dring_t	*tmp_dringp;
5244 	ldc_chan_t	*ldcp;
5245 
5246 	if (dhandle == NULL) {
5247 		DWARN(DBG_ALL_LDCS,
5248 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5249 		return (EINVAL);
5250 	}
5251 	dringp = (ldc_dring_t *)dhandle;
5252 
5253 	mutex_enter(&dringp->lock);
5254 
5255 	if (dringp->status == LDC_UNBOUND) {
5256 		DWARN(DBG_ALL_LDCS,
5257 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5258 		    dringp);
5259 		mutex_exit(&dringp->lock);
5260 		return (EINVAL);
5261 	}
5262 	ldcp = dringp->ldcp;
5263 
5264 	mutex_enter(&ldcp->exp_dlist_lock);
5265 
5266 	tmp_dringp = ldcp->exp_dring_list;
5267 	if (tmp_dringp == dringp) {
5268 		ldcp->exp_dring_list = dringp->ch_next;
5269 		dringp->ch_next = NULL;
5270 
5271 	} else {
5272 		while (tmp_dringp != NULL) {
5273 			if (tmp_dringp->ch_next == dringp) {
5274 				tmp_dringp->ch_next = dringp->ch_next;
5275 				dringp->ch_next = NULL;
5276 				break;
5277 			}
5278 			tmp_dringp = tmp_dringp->ch_next;
5279 		}
5280 		if (tmp_dringp == NULL) {
5281 			DWARN(DBG_ALL_LDCS,
5282 			    "ldc_mem_dring_unbind: invalid descriptor\n");
5283 			mutex_exit(&ldcp->exp_dlist_lock);
5284 			mutex_exit(&dringp->lock);
5285 			return (EINVAL);
5286 		}
5287 	}
5288 
5289 	mutex_exit(&ldcp->exp_dlist_lock);
5290 
5291 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
5292 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5293 
5294 	dringp->ldcp = NULL;
5295 	dringp->mhdl = NULL;
5296 	dringp->status = LDC_UNBOUND;
5297 
5298 	mutex_exit(&dringp->lock);
5299 
5300 	return (0);
5301 }
5302 
5303 /*
5304  * Get information about the dring. The base address of the descriptor
5305  * ring along with the type and permission are returned back.
5306  */
5307 int
5308 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
5309 {
5310 	ldc_dring_t	*dringp;
5311 	int		rv;
5312 
5313 	if (dhandle == NULL) {
5314 		DWARN(DBG_ALL_LDCS,
5315 		    "ldc_mem_dring_info: invalid desc ring handle\n");
5316 		return (EINVAL);
5317 	}
5318 	dringp = (ldc_dring_t *)dhandle;
5319 
5320 	mutex_enter(&dringp->lock);
5321 
5322 	if (dringp->mhdl) {
5323 		rv = ldc_mem_info(dringp->mhdl, minfo);
5324 		if (rv) {
5325 			DWARN(DBG_ALL_LDCS,
5326 			    "ldc_mem_dring_info: error reading mem info\n");
5327 			mutex_exit(&dringp->lock);
5328 			return (rv);
5329 		}
5330 	} else {
5331 		minfo->vaddr = dringp->base;
5332 		minfo->raddr = NULL;
5333 		minfo->status = dringp->status;
5334 	}
5335 
5336 	mutex_exit(&dringp->lock);
5337 
5338 	return (0);
5339 }
5340 
5341 /*
5342  * Map an exported descriptor ring into the local address space. If the
5343  * descriptor ring was exported for direct map access, a HV call is made
5344  * to allocate a RA range. If the map is done via a shadow copy, local
5345  * shadow memory is allocated.
5346  */
5347 int
5348 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
5349     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
5350     ldc_dring_handle_t *dhandle)
5351 {
5352 	int		err;
5353 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
5354 	ldc_mem_handle_t mhandle;
5355 	ldc_dring_t	*dringp;
5356 	size_t		dring_size;
5357 
5358 	if (dhandle == NULL) {
5359 		DWARN(DBG_ALL_LDCS,
5360 		    "ldc_mem_dring_map: invalid dhandle\n");
5361 		return (EINVAL);
5362 	}
5363 
5364 	/* check to see if channel is initalized */
5365 	if (handle == NULL) {
5366 		DWARN(DBG_ALL_LDCS,
5367 		    "ldc_mem_dring_map: invalid channel handle\n");
5368 		return (EINVAL);
5369 	}
5370 	ldcp = (ldc_chan_t *)handle;
5371 
5372 	if (cookie == NULL) {
5373 		DWARN(ldcp->id,
5374 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
5375 		    ldcp->id);
5376 		return (EINVAL);
5377 	}
5378 
5379 	/* FUTURE: For now we support only one cookie per dring */
5380 	ASSERT(ccount == 1);
5381 
5382 	if (cookie->size < (dsize * len)) {
5383 		DWARN(ldcp->id,
5384 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
5385 		    ldcp->id);
5386 		return (EINVAL);
5387 	}
5388 
5389 	*dhandle = 0;
5390 
5391 	/* Allocate an dring structure */
5392 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5393 
5394 	D1(ldcp->id,
5395 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
5396 	    mtype, len, dsize, cookie->addr, cookie->size);
5397 
5398 	/* Initialize dring */
5399 	dringp->length = len;
5400 	dringp->dsize = dsize;
5401 
5402 	/* round of to multiple of page size */
5403 	dring_size = len * dsize;
5404 	dringp->size = (dring_size & MMU_PAGEMASK);
5405 	if (dring_size & MMU_PAGEOFFSET)
5406 		dringp->size += MMU_PAGESIZE;
5407 
5408 	dringp->ldcp = ldcp;
5409 
5410 	/* create an memory handle */
5411 	err = ldc_mem_alloc_handle(handle, &mhandle);
5412 	if (err || mhandle == NULL) {
5413 		DWARN(DBG_ALL_LDCS,
5414 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
5415 		    err);
5416 		kmem_free(dringp, sizeof (ldc_dring_t));
5417 		return (ENOMEM);
5418 	}
5419 
5420 	dringp->mhdl = mhandle;
5421 	dringp->base = NULL;
5422 
5423 	/* map the dring into local memory */
5424 	err = ldc_mem_map(mhandle, cookie, ccount, mtype,
5425 	    &(dringp->base), NULL);
5426 	if (err || dringp->base == NULL) {
5427 		cmn_err(CE_WARN,
5428 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
5429 		(void) ldc_mem_free_handle(mhandle);
5430 		kmem_free(dringp, sizeof (ldc_dring_t));
5431 		return (ENOMEM);
5432 	}
5433 
5434 	/* initialize the desc ring lock */
5435 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5436 
5437 	/* Add descriptor ring to channel's imported dring list */
5438 	mutex_enter(&ldcp->imp_dlist_lock);
5439 	dringp->ch_next = ldcp->imp_dring_list;
5440 	ldcp->imp_dring_list = dringp;
5441 	mutex_exit(&ldcp->imp_dlist_lock);
5442 
5443 	dringp->status = LDC_MAPPED;
5444 
5445 	*dhandle = (ldc_dring_handle_t)dringp;
5446 
5447 	return (0);
5448 }
5449 
5450 /*
5451  * Unmap a descriptor ring. Free shadow memory (if any).
5452  */
5453 int
5454 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
5455 {
5456 	ldc_dring_t 	*dringp;
5457 	ldc_dring_t	*tmp_dringp;
5458 	ldc_chan_t	*ldcp;
5459 
5460 	if (dhandle == NULL) {
5461 		DWARN(DBG_ALL_LDCS,
5462 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
5463 		return (EINVAL);
5464 	}
5465 	dringp = (ldc_dring_t *)dhandle;
5466 
5467 	if (dringp->status != LDC_MAPPED) {
5468 		DWARN(DBG_ALL_LDCS,
5469 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
5470 		return (EINVAL);
5471 	}
5472 
5473 	mutex_enter(&dringp->lock);
5474 
5475 	ldcp = dringp->ldcp;
5476 
5477 	mutex_enter(&ldcp->imp_dlist_lock);
5478 
5479 	/* find and unlink the desc ring from channel import list */
5480 	tmp_dringp = ldcp->imp_dring_list;
5481 	if (tmp_dringp == dringp) {
5482 		ldcp->imp_dring_list = dringp->ch_next;
5483 		dringp->ch_next = NULL;
5484 
5485 	} else {
5486 		while (tmp_dringp != NULL) {
5487 			if (tmp_dringp->ch_next == dringp) {
5488 				tmp_dringp->ch_next = dringp->ch_next;
5489 				dringp->ch_next = NULL;
5490 				break;
5491 			}
5492 			tmp_dringp = tmp_dringp->ch_next;
5493 		}
5494 		if (tmp_dringp == NULL) {
5495 			DWARN(DBG_ALL_LDCS,
5496 			    "ldc_mem_dring_unmap: invalid descriptor\n");
5497 			mutex_exit(&ldcp->imp_dlist_lock);
5498 			mutex_exit(&dringp->lock);
5499 			return (EINVAL);
5500 		}
5501 	}
5502 
5503 	mutex_exit(&ldcp->imp_dlist_lock);
5504 
5505 	/* do a LDC memory handle unmap and free */
5506 	(void) ldc_mem_unmap(dringp->mhdl);
5507 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5508 
5509 	dringp->status = 0;
5510 	dringp->ldcp = NULL;
5511 
5512 	mutex_exit(&dringp->lock);
5513 
5514 	/* destroy dring lock */
5515 	mutex_destroy(&dringp->lock);
5516 
5517 	/* free desc ring object */
5518 	kmem_free(dringp, sizeof (ldc_dring_t));
5519 
5520 	return (0);
5521 }
5522 
5523 /*
5524  * Internal entry point for descriptor ring access entry consistency
5525  * semantics. Acquire copies the contents of the remote descriptor ring
5526  * into the local shadow copy. The release operation copies the local
5527  * contents into the remote dring. The start and end locations specify
5528  * bounds for the entries being synchronized.
5529  */
5530 static int
5531 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
5532     uint8_t direction, uint64_t start, uint64_t end)
5533 {
5534 	int 			err;
5535 	ldc_dring_t		*dringp;
5536 	ldc_chan_t		*ldcp;
5537 	uint64_t		soff;
5538 	size_t			copy_size;
5539 
5540 	if (dhandle == NULL) {
5541 		DWARN(DBG_ALL_LDCS,
5542 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
5543 		return (EINVAL);
5544 	}
5545 	dringp = (ldc_dring_t *)dhandle;
5546 	mutex_enter(&dringp->lock);
5547 
5548 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
5549 		DWARN(DBG_ALL_LDCS,
5550 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
5551 		mutex_exit(&dringp->lock);
5552 		return (EINVAL);
5553 	}
5554 
5555 	if (start >= dringp->length || end >= dringp->length) {
5556 		DWARN(DBG_ALL_LDCS,
5557 		    "i_ldc_dring_acquire_release: index out of range\n");
5558 		mutex_exit(&dringp->lock);
5559 		return (EINVAL);
5560 	}
5561 
5562 	/* get the channel handle */
5563 	ldcp = dringp->ldcp;
5564 
5565 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
5566 		((dringp->length - start) * dringp->dsize);
5567 
5568 	/* Calculate the relative offset for the first desc */
5569 	soff = (start * dringp->dsize);
5570 
5571 	/* copy to/from remote from/to local memory */
5572 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
5573 	    soff, copy_size);
5574 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
5575 	    direction, soff, copy_size);
5576 	if (err) {
5577 		DWARN(ldcp->id,
5578 		    "i_ldc_dring_acquire_release: copy failed\n");
5579 		mutex_exit(&dringp->lock);
5580 		return (err);
5581 	}
5582 
5583 	/* do the balance */
5584 	if (start > end) {
5585 		copy_size = ((end + 1) * dringp->dsize);
5586 		soff = 0;
5587 
5588 		/* copy to/from remote from/to local memory */
5589 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
5590 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
5591 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
5592 		    direction, soff, copy_size);
5593 		if (err) {
5594 			DWARN(ldcp->id,
5595 			    "i_ldc_dring_acquire_release: copy failed\n");
5596 			mutex_exit(&dringp->lock);
5597 			return (err);
5598 		}
5599 	}
5600 
5601 	mutex_exit(&dringp->lock);
5602 
5603 	return (0);
5604 }
5605 
5606 /*
5607  * Ensure that the contents in the local dring are consistent
5608  * with the contents if of remote dring
5609  */
5610 int
5611 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
5612 {
5613 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
5614 }
5615 
5616 /*
5617  * Ensure that the contents in the remote dring are consistent
5618  * with the contents if of local dring
5619  */
5620 int
5621 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
5622 {
5623 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
5624 }
5625 
5626 
5627 /* ------------------------------------------------------------------------- */
5628