xref: /titanic_44/usr/src/uts/sun4v/io/ldc.c (revision 1ae0874509b6811fdde1dfd46f0d93fd09867a3f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4v LDC Transport Layer
31  */
32 #include <sys/types.h>
33 #include <sys/file.h>
34 #include <sys/errno.h>
35 #include <sys/open.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/cmn_err.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/stat.h> /* needed for S_IFBLK and S_IFCHR */
43 #include <sys/debug.h>
44 #include <sys/types.h>
45 #include <sys/cred.h>
46 #include <sys/promif.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/cyclic.h>
50 #include <sys/machsystm.h>
51 #include <sys/vm.h>
52 #include <sys/cpu.h>
53 #include <sys/intreg.h>
54 #include <sys/machcpuvar.h>
55 #include <sys/note.h>
56 #include <sys/ivintr.h>
57 #include <sys/hypervisor_api.h>
58 #include <sys/ldc.h>
59 #include <sys/ldc_impl.h>
60 #include <sys/cnex.h>
61 #include <sys/hsvc.h>
62 
63 /* Core internal functions */
64 static int i_ldc_h2v_error(int h_error);
65 static int i_ldc_txq_reconf(ldc_chan_t *ldcp);
66 static int i_ldc_rxq_reconf(ldc_chan_t *ldcp);
67 static void i_ldc_reset_state(ldc_chan_t *ldcp);
68 static void i_ldc_reset(ldc_chan_t *ldcp);
69 
70 static int i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail);
71 static int i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail);
72 static int i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head);
73 static int i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
74     uint8_t ctrlmsg);
75 
76 /* Interrupt handling functions */
77 static uint_t i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2);
78 static uint_t i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2);
79 static void i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype);
80 
81 /* Read method functions */
82 static int i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep);
83 static int i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
84 	size_t *sizep);
85 static int i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
86 	size_t *sizep);
87 
88 /* Write method functions */
89 static int i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t target_bufp,
90 	size_t *sizep);
91 static int i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t target_bufp,
92 	size_t *sizep);
93 static int i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t target_bufp,
94 	size_t *sizep);
95 
96 /* Pkt processing internal functions */
97 static int i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
98 static int i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *ldcmsg);
99 static int i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg);
100 static int i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg);
101 static int i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg);
102 static int i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg);
103 static int i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg);
104 
105 /* Memory synchronization internal functions */
106 static int i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle,
107     uint8_t direction, uint64_t offset, size_t size);
108 static int i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
109     uint8_t direction, uint64_t start, uint64_t end);
110 
111 /* LDC Version */
112 static ldc_ver_t ldc_versions[] = { {1, 0} };
113 
114 /* number of supported versions */
115 #define	LDC_NUM_VERS	(sizeof (ldc_versions) / sizeof (ldc_versions[0]))
116 
117 /* Module State Pointer */
118 static ldc_soft_state_t *ldcssp;
119 
120 static struct modldrv md = {
121 	&mod_miscops,			/* This is a misc module */
122 	"sun4v LDC module v%I%",	/* Name of the module */
123 };
124 
125 static struct modlinkage ml = {
126 	MODREV_1,
127 	&md,
128 	NULL
129 };
130 
131 static uint64_t ldc_sup_minor;		/* Supported minor number */
132 static hsvc_info_t ldc_hsvc = {
133 	HSVC_REV_1, NULL, HSVC_GROUP_LDC, 1, 0, "ldc"
134 };
135 
136 static uint64_t intr_sup_minor;		/* Supported minor number */
137 static hsvc_info_t intr_hsvc = {
138 	HSVC_REV_1, NULL, HSVC_GROUP_INTR, 1, 0, "ldc"
139 };
140 
141 #ifdef DEBUG
142 
143 /*
144  * Print debug messages
145  *
146  * set ldcdbg to 0x7 for enabling all msgs
147  * 0x4 - Warnings
148  * 0x2 - All debug messages
149  * 0x1 - Minimal debug messages
150  *
151  * set ldcdbgchan to the channel number you want to debug
152  * setting it to -1 prints debug messages for all channels
153  * NOTE: ldcdbgchan has no effect on error messages
154  */
155 
156 #define	DBG_ALL_LDCS -1
157 
158 int ldcdbg = 0x0;
159 int64_t ldcdbgchan = DBG_ALL_LDCS;
160 
161 static void
162 ldcdebug(int64_t id, const char *fmt, ...)
163 {
164 	char buf[512];
165 	va_list ap;
166 
167 	/*
168 	 * Do not return if,
169 	 * caller wants to print it anyway - (id == DBG_ALL_LDCS)
170 	 * debug channel is set to all LDCs - (ldcdbgchan == DBG_ALL_LDCS)
171 	 * debug channel = caller specified channel
172 	 */
173 	if ((id != DBG_ALL_LDCS) &&
174 	    (ldcdbgchan != DBG_ALL_LDCS) &&
175 	    (ldcdbgchan != id)) {
176 		return;
177 	}
178 
179 	va_start(ap, fmt);
180 	(void) vsprintf(buf, fmt, ap);
181 	va_end(ap);
182 
183 	cmn_err(CE_CONT, "?%s\n", buf);
184 }
185 
186 #define	D1		\
187 if (ldcdbg & 0x01)	\
188 	ldcdebug
189 
190 #define	D2		\
191 if (ldcdbg & 0x02)	\
192 	ldcdebug
193 
194 #define	DWARN		\
195 if (ldcdbg & 0x04)	\
196 	ldcdebug
197 
198 #define	DUMP_PAYLOAD(id, addr)						\
199 {									\
200 	char buf[65*3];							\
201 	int i;								\
202 	uint8_t *src = (uint8_t *)addr;					\
203 	for (i = 0; i < 64; i++, src++)					\
204 		(void) sprintf(&buf[i * 3], "|%02x", *src);		\
205 	(void) sprintf(&buf[i * 3], "|\n");				\
206 	D2((id), "payload: %s", buf);					\
207 }
208 
209 #define	DUMP_LDC_PKT(c, s, addr)					\
210 {									\
211 	ldc_msg_t *msg = (ldc_msg_t *)(addr);				\
212 	uint32_t mid = ((c)->mode != LDC_MODE_RAW) ? msg->seqid : 0;	\
213 	if (msg->type == LDC_DATA) {                                    \
214 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env[%c%c,sz=%d])",	\
215 	    (s), mid, msg->type, msg->stype, msg->ctrl,			\
216 	    (msg->env & LDC_FRAG_START) ? 'B' : ' ',                    \
217 	    (msg->env & LDC_FRAG_STOP) ? 'E' : ' ',                     \
218 	    (msg->env & LDC_LEN_MASK));					\
219 	} else { 							\
220 	    D2((c)->id, "%s: msg%d (/%x/%x/%x/,env=%x)", (s),		\
221 	    mid, msg->type, msg->stype, msg->ctrl, msg->env);		\
222 	} 								\
223 }
224 
225 #else
226 
227 #define	DBG_ALL_LDCS -1
228 
229 #define	D1
230 #define	D2
231 #define	DWARN
232 
233 #define	DUMP_PAYLOAD(id, addr)
234 #define	DUMP_LDC_PKT(c, s, addr)
235 
236 #endif
237 
238 #define	ZERO_PKT(p)			\
239 	bzero((p), sizeof (ldc_msg_t));
240 
241 #define	IDX2COOKIE(idx, pg_szc, pg_shift)				\
242 	(((pg_szc) << LDC_COOKIE_PGSZC_SHIFT) | ((idx) << (pg_shift)))
243 
244 
245 int
246 _init(void)
247 {
248 	int status;
249 
250 	status = hsvc_register(&ldc_hsvc, &ldc_sup_minor);
251 	if (status != 0) {
252 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor LDC services"
253 		    " group: 0x%lx major: %ld minor: %ld errno: %d",
254 		    ldc_hsvc.hsvc_modname, ldc_hsvc.hsvc_group,
255 		    ldc_hsvc.hsvc_major, ldc_hsvc.hsvc_minor, status);
256 		return (-1);
257 	}
258 
259 	status = hsvc_register(&intr_hsvc, &intr_sup_minor);
260 	if (status != 0) {
261 		cmn_err(CE_WARN, "%s: cannot negotiate hypervisor interrupt "
262 		    "services group: 0x%lx major: %ld minor: %ld errno: %d",
263 		    intr_hsvc.hsvc_modname, intr_hsvc.hsvc_group,
264 		    intr_hsvc.hsvc_major, intr_hsvc.hsvc_minor, status);
265 		(void) hsvc_unregister(&ldc_hsvc);
266 		return (-1);
267 	}
268 
269 	/* allocate soft state structure */
270 	ldcssp = kmem_zalloc(sizeof (ldc_soft_state_t), KM_SLEEP);
271 
272 	/* Link the module into the system */
273 	status = mod_install(&ml);
274 	if (status != 0) {
275 		kmem_free(ldcssp, sizeof (ldc_soft_state_t));
276 		return (status);
277 	}
278 
279 	/* Initialize the LDC state structure */
280 	mutex_init(&ldcssp->lock, NULL, MUTEX_DRIVER, NULL);
281 
282 	mutex_enter(&ldcssp->lock);
283 
284 	ldcssp->channel_count = 0;
285 	ldcssp->channels_open = 0;
286 	ldcssp->chan_list = NULL;
287 	ldcssp->dring_list = NULL;
288 
289 	mutex_exit(&ldcssp->lock);
290 
291 	return (0);
292 }
293 
294 int
295 _info(struct modinfo *modinfop)
296 {
297 	/* Report status of the dynamically loadable driver module */
298 	return (mod_info(&ml, modinfop));
299 }
300 
301 int
302 _fini(void)
303 {
304 	int 		rv, status;
305 	ldc_chan_t 	*ldcp;
306 	ldc_dring_t 	*dringp;
307 	ldc_mem_info_t 	minfo;
308 
309 	/* Unlink the driver module from the system */
310 	status = mod_remove(&ml);
311 	if (status) {
312 		DWARN(DBG_ALL_LDCS, "_fini: mod_remove failed\n");
313 		return (EIO);
314 	}
315 
316 	/* close and finalize channels */
317 	ldcp = ldcssp->chan_list;
318 	while (ldcp != NULL) {
319 		(void) ldc_close((ldc_handle_t)ldcp);
320 		(void) ldc_fini((ldc_handle_t)ldcp);
321 
322 		ldcp = ldcp->next;
323 	}
324 
325 	/* Free descriptor rings */
326 	dringp = ldcssp->dring_list;
327 	while (dringp != NULL) {
328 		dringp = dringp->next;
329 
330 		rv = ldc_mem_dring_info((ldc_dring_handle_t)dringp, &minfo);
331 		if (rv == 0 && minfo.status != LDC_UNBOUND) {
332 			if (minfo.status == LDC_BOUND) {
333 				(void) ldc_mem_dring_unbind(
334 						(ldc_dring_handle_t)dringp);
335 			}
336 			if (minfo.status == LDC_MAPPED) {
337 				(void) ldc_mem_dring_unmap(
338 						(ldc_dring_handle_t)dringp);
339 			}
340 		}
341 
342 		(void) ldc_mem_dring_destroy((ldc_dring_handle_t)dringp);
343 	}
344 	ldcssp->dring_list = NULL;
345 
346 	/*
347 	 * We have successfully "removed" the driver.
348 	 * Destroying soft states
349 	 */
350 	mutex_destroy(&ldcssp->lock);
351 	kmem_free(ldcssp, sizeof (ldc_soft_state_t));
352 
353 	(void) hsvc_unregister(&ldc_hsvc);
354 	(void) hsvc_unregister(&intr_hsvc);
355 
356 	return (status);
357 }
358 
359 /* -------------------------------------------------------------------------- */
360 
361 /*
362  * LDC Transport Internal Functions
363  */
364 
365 /*
366  * Translate HV Errors to sun4v error codes
367  */
368 static int
369 i_ldc_h2v_error(int h_error)
370 {
371 	switch (h_error) {
372 
373 	case	H_EOK:
374 		return (0);
375 
376 	case	H_ENORADDR:
377 		return (EFAULT);
378 
379 	case	H_EBADPGSZ:
380 	case	H_EINVAL:
381 		return (EINVAL);
382 
383 	case	H_EWOULDBLOCK:
384 		return (EWOULDBLOCK);
385 
386 	case	H_ENOACCESS:
387 	case	H_ENOMAP:
388 		return (EACCES);
389 
390 	case	H_EIO:
391 	case	H_ECPUERROR:
392 		return (EIO);
393 
394 	case	H_ENOTSUPPORTED:
395 		return (ENOTSUP);
396 
397 	case 	H_ETOOMANY:
398 		return (ENOSPC);
399 
400 	case	H_ECHANNEL:
401 		return (ECHRNG);
402 	default:
403 		break;
404 	}
405 
406 	return (EIO);
407 }
408 
409 /*
410  * Reconfigure the transmit queue
411  */
412 static int
413 i_ldc_txq_reconf(ldc_chan_t *ldcp)
414 {
415 	int rv;
416 
417 	ASSERT(MUTEX_HELD(&ldcp->lock));
418 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
419 	if (rv) {
420 		cmn_err(CE_WARN,
421 		    "ldc_tx_qconf: (0x%lx) cannot set qconf", ldcp->id);
422 		return (EIO);
423 	}
424 	rv = hv_ldc_tx_get_state(ldcp->id, &(ldcp->tx_head),
425 	    &(ldcp->tx_tail), &(ldcp->link_state));
426 	if (rv) {
427 		cmn_err(CE_WARN,
428 		    "ldc_tx_get_state: (0x%lx) cannot get qptrs", ldcp->id);
429 		return (EIO);
430 	}
431 	D1(ldcp->id, "ldc_tx_get_state: (0x%llx) h=0x%llx,t=0x%llx,"
432 	    "s=0x%llx\n", ldcp->id, ldcp->tx_head, ldcp->tx_tail,
433 	    ldcp->link_state);
434 
435 	return (0);
436 }
437 
438 /*
439  * Reconfigure the receive queue
440  */
441 static int
442 i_ldc_rxq_reconf(ldc_chan_t *ldcp)
443 {
444 	int rv;
445 	uint64_t rx_head, rx_tail;
446 
447 	ASSERT(MUTEX_HELD(&ldcp->lock));
448 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
449 	    &(ldcp->link_state));
450 	if (rv) {
451 		cmn_err(CE_WARN,
452 		    "ldc_rx_getstate: (0x%lx) cannot get state",
453 		    ldcp->id);
454 		return (EIO);
455 	}
456 
457 	if (rx_head != rx_tail || ldcp->tstate > TS_READY) {
458 		rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra,
459 			ldcp->rx_q_entries);
460 		if (rv) {
461 			cmn_err(CE_WARN,
462 			    "ldc_rx_qconf: (0x%lx) cannot set qconf",
463 			    ldcp->id);
464 			return (EIO);
465 		}
466 		D1(ldcp->id, "ldc_rx_qconf: (0x%llx) completed qconf",
467 		    ldcp->id);
468 	}
469 
470 	return (0);
471 }
472 
473 /*
474  * Reset LDC state structure and its contents
475  */
476 static void
477 i_ldc_reset_state(ldc_chan_t *ldcp)
478 {
479 	ASSERT(MUTEX_HELD(&ldcp->lock));
480 	ldcp->last_msg_snt = LDC_INIT_SEQID;
481 	ldcp->last_ack_rcd = 0;
482 	ldcp->last_msg_rcd = 0;
483 	ldcp->tx_ackd_head = ldcp->tx_head;
484 	ldcp->next_vidx = 0;
485 	ldcp->hstate = 0;
486 	ldcp->tstate = TS_OPEN;
487 	ldcp->status = LDC_OPEN;
488 
489 	if (ldcp->link_state == LDC_CHANNEL_UP ||
490 	    ldcp->link_state == LDC_CHANNEL_RESET) {
491 
492 		if (ldcp->mode == LDC_MODE_RAW) {
493 			ldcp->status = LDC_UP;
494 			ldcp->tstate = TS_UP;
495 		} else {
496 			ldcp->status = LDC_READY;
497 			ldcp->tstate |= TS_LINK_READY;
498 		}
499 	}
500 }
501 
502 /*
503  * Reset a LDC channel
504  */
505 static void
506 i_ldc_reset(ldc_chan_t *ldcp)
507 {
508 	D2(ldcp->id, "i_ldc_reset: (0x%llx) channel reset\n", ldcp->id);
509 
510 	(void) i_ldc_txq_reconf(ldcp);
511 	(void) i_ldc_rxq_reconf(ldcp);
512 	i_ldc_reset_state(ldcp);
513 }
514 
515 /*
516  * Clear pending interrupts
517  */
518 static void
519 i_ldc_clear_intr(ldc_chan_t *ldcp, cnex_intrtype_t itype)
520 {
521 	ldc_cnex_t *cinfo = &ldcssp->cinfo;
522 
523 	ASSERT(MUTEX_HELD(&ldcp->lock));
524 	if (cinfo->dip && ldcp->intr_pending) {
525 		ldcp->intr_pending = B_FALSE;
526 		(void) cinfo->clr_intr(cinfo->dip, ldcp->id, itype);
527 	}
528 }
529 
530 /*
531  * Set the receive queue head
532  * Returns an error if it fails
533  */
534 static int
535 i_ldc_set_rx_head(ldc_chan_t *ldcp, uint64_t head)
536 {
537 	int rv;
538 
539 	ASSERT(MUTEX_HELD(&ldcp->lock));
540 	rv = hv_ldc_rx_set_qhead(ldcp->id, head);
541 	if (rv && rv != H_EWOULDBLOCK) {
542 		cmn_err(CE_WARN,
543 		    "ldc_rx_set_qhead: (0x%lx) cannot set qhead", ldcp->id);
544 		i_ldc_reset(ldcp);
545 		return (ECONNRESET);
546 	}
547 
548 	return (0);
549 }
550 
551 
552 /*
553  * Returns the tx_tail to be used for transfer
554  * Re-reads the TX queue ptrs if and only if the
555  * the cached head and tail are equal (queue is full)
556  */
557 static int
558 i_ldc_get_tx_tail(ldc_chan_t *ldcp, uint64_t *tail)
559 {
560 	int 		rv;
561 	uint64_t 	current_head, new_tail;
562 
563 	ASSERT(MUTEX_HELD(&ldcp->lock));
564 	/* Read the head and tail ptrs from HV */
565 	rv = hv_ldc_tx_get_state(ldcp->id,
566 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
567 	if (rv) {
568 		cmn_err(CE_WARN,
569 		    "i_ldc_get_tx_tail: (0x%lx) cannot read qptrs\n",
570 		    ldcp->id);
571 		return (EIO);
572 	}
573 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
574 		DWARN(DBG_ALL_LDCS,
575 		    "i_ldc_get_tx_tail: (0x%llx) channel not ready\n",
576 		    ldcp->id);
577 		return (ECONNRESET);
578 	}
579 
580 	/* In reliable mode, check against last ACKd msg */
581 	current_head = (ldcp->mode == LDC_MODE_RELIABLE ||
582 		ldcp->mode == LDC_MODE_STREAM)
583 		? ldcp->tx_ackd_head : ldcp->tx_head;
584 
585 	/* increment the tail */
586 	new_tail = (ldcp->tx_tail + LDC_PACKET_SIZE) %
587 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
588 
589 	if (new_tail == current_head) {
590 		DWARN(ldcp->id,
591 		    "i_ldc_get_tx_tail: (0x%llx) TX queue is full\n",
592 		    ldcp->id);
593 		return (EWOULDBLOCK);
594 	}
595 
596 	D2(ldcp->id, "i_ldc_get_tx_tail: (0x%llx) head=0x%llx, tail=0x%llx\n",
597 	    ldcp->id, ldcp->tx_head, ldcp->tx_tail);
598 
599 	*tail = ldcp->tx_tail;
600 	return (0);
601 }
602 
603 /*
604  * Set the tail pointer. If HV returns EWOULDBLOCK, it will back off
605  * and retry LDC_CHK_CNT times before returning an error.
606  * Returns 0, EWOULDBLOCK or EIO
607  */
608 static int
609 i_ldc_set_tx_tail(ldc_chan_t *ldcp, uint64_t tail)
610 {
611 	int		rv, retval = EWOULDBLOCK;
612 	int 		loop_cnt, chk_cnt;
613 
614 	ASSERT(MUTEX_HELD(&ldcp->lock));
615 	for (chk_cnt = 0; chk_cnt < LDC_CHK_CNT; chk_cnt++) {
616 
617 		if ((rv = hv_ldc_tx_set_qtail(ldcp->id, tail)) == 0) {
618 			retval = 0;
619 			break;
620 		}
621 		if (rv != H_EWOULDBLOCK) {
622 			DWARN(ldcp->id, "i_ldc_set_tx_tail: (0x%llx) set "
623 			    "qtail=0x%llx failed, rv=%d\n", ldcp->id, tail, rv);
624 			retval = EIO;
625 			break;
626 		}
627 
628 		/* spin LDC_LOOP_CNT and then try again */
629 		for (loop_cnt = 0; loop_cnt < LDC_LOOP_CNT; loop_cnt++);
630 	}
631 	return (retval);
632 }
633 
634 /*
635  * Send a LDC message
636  */
637 static int
638 i_ldc_send_pkt(ldc_chan_t *ldcp, uint8_t pkttype, uint8_t subtype,
639     uint8_t ctrlmsg)
640 {
641 	int		rv;
642 	ldc_msg_t 	*pkt;
643 	uint64_t	tx_tail;
644 	uint32_t	curr_seqid = ldcp->last_msg_snt;
645 
646 	ASSERT(MUTEX_HELD(&ldcp->lock));
647 	/* get the current tail for the message */
648 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
649 	if (rv) {
650 		DWARN(ldcp->id,
651 		    "i_ldc_send_pkt: (0x%llx) error sending pkt, "
652 		    "type=0x%x,subtype=0x%x,ctrl=0x%x\n",
653 		    ldcp->id, pkttype, subtype, ctrlmsg);
654 		return (rv);
655 	}
656 
657 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
658 	ZERO_PKT(pkt);
659 
660 	/* Initialize the packet */
661 	pkt->type = pkttype;
662 	pkt->stype = subtype;
663 	pkt->ctrl = ctrlmsg;
664 
665 	/* Store ackid/seqid iff it is RELIABLE mode & not a RTS/RTR message */
666 	if (((ctrlmsg & LDC_CTRL_MASK) != LDC_RTS) &&
667 	    ((ctrlmsg & LDC_CTRL_MASK) != LDC_RTR)) {
668 		curr_seqid++;
669 		if (ldcp->mode != LDC_MODE_RAW) {
670 			pkt->seqid = curr_seqid;
671 			pkt->ackid = ldcp->last_msg_rcd;
672 		}
673 	}
674 	DUMP_LDC_PKT(ldcp, "i_ldc_send_pkt", (uint64_t)pkt);
675 
676 	/* initiate the send by calling into HV and set the new tail */
677 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
678 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
679 
680 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
681 	if (rv) {
682 		DWARN(ldcp->id,
683 		    "i_ldc_send_pkt:(0x%llx) error sending pkt, "
684 		    "type=0x%x,stype=0x%x,ctrl=0x%x\n",
685 		    ldcp->id, pkttype, subtype, ctrlmsg);
686 		return (EIO);
687 	}
688 
689 	ldcp->last_msg_snt = curr_seqid;
690 	ldcp->tx_tail = tx_tail;
691 
692 	return (0);
693 }
694 
695 /*
696  * Checks if packet was received in right order
697  * in the case of a reliable transport.
698  * Returns 0 if in order, else EIO
699  */
700 static int
701 i_ldc_check_seqid(ldc_chan_t *ldcp, ldc_msg_t *msg)
702 {
703 	/* No seqid checking for RAW mode */
704 	if (ldcp->mode == LDC_MODE_RAW)
705 		return (0);
706 
707 	/* No seqid checking for version, RTS, RTR message */
708 	if (msg->ctrl == LDC_VER ||
709 	    msg->ctrl == LDC_RTS ||
710 	    msg->ctrl == LDC_RTR)
711 		return (0);
712 
713 	/* Initial seqid to use is sent in RTS/RTR and saved in last_msg_rcd */
714 	if (msg->seqid != (ldcp->last_msg_rcd + 1)) {
715 		DWARN(ldcp->id,
716 		    "i_ldc_check_seqid: (0x%llx) out-of-order pkt, got 0x%x, "
717 		    "expecting 0x%x\n", ldcp->id, msg->seqid,
718 		    (ldcp->last_msg_rcd + 1));
719 		return (EIO);
720 	}
721 
722 	return (0);
723 }
724 
725 
726 /*
727  * Process an incoming version ctrl message
728  */
729 static int
730 i_ldc_process_VER(ldc_chan_t *ldcp, ldc_msg_t *msg)
731 {
732 	int 		rv = 0, idx = ldcp->next_vidx;
733 	ldc_msg_t 	*pkt;
734 	uint64_t	tx_tail;
735 	ldc_ver_t	*rcvd_ver;
736 
737 	/* get the received version */
738 	rcvd_ver = (ldc_ver_t *)((uint64_t)msg + LDC_PAYLOAD_VER_OFF);
739 
740 	D2(ldcp->id, "i_ldc_process_VER: (0x%llx) received VER v%u.%u\n",
741 	    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
742 
743 	switch (msg->stype) {
744 	case LDC_INFO:
745 
746 		/* get the current tail and pkt for the response */
747 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
748 		if (rv != 0) {
749 			DWARN(ldcp->id,
750 			    "i_ldc_process_VER: (0x%llx) err sending "
751 			    "version ACK/NACK\n", ldcp->id);
752 			i_ldc_reset(ldcp);
753 			return (ECONNRESET);
754 		}
755 
756 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
757 		ZERO_PKT(pkt);
758 
759 		/* initialize the packet */
760 		pkt->type = LDC_CTRL;
761 		pkt->ctrl = LDC_VER;
762 
763 		for (;;) {
764 
765 			D1(ldcp->id, "i_ldc_process_VER: got %u.%u chk %u.%u\n",
766 			    rcvd_ver->major, rcvd_ver->minor,
767 			    ldc_versions[idx].major, ldc_versions[idx].minor);
768 
769 			if (rcvd_ver->major == ldc_versions[idx].major) {
770 				/* major version match - ACK version */
771 				pkt->stype = LDC_ACK;
772 
773 				/*
774 				 * lower minor version to the one this endpt
775 				 * supports, if necessary
776 				 */
777 				if (rcvd_ver->minor > ldc_versions[idx].minor)
778 					rcvd_ver->minor =
779 						ldc_versions[idx].minor;
780 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
781 
782 				break;
783 			}
784 
785 			if (rcvd_ver->major > ldc_versions[idx].major) {
786 
787 				D1(ldcp->id, "i_ldc_process_VER: using next"
788 				    " lower idx=%d, v%u.%u\n", idx,
789 				    ldc_versions[idx].major,
790 				    ldc_versions[idx].minor);
791 
792 				/* nack with next lower version */
793 				pkt->stype = LDC_NACK;
794 				bcopy(&ldc_versions[idx], pkt->udata,
795 				    sizeof (ldc_versions[idx]));
796 				ldcp->next_vidx = idx;
797 				break;
798 			}
799 
800 			/* next major version */
801 			idx++;
802 
803 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
804 
805 			if (idx == LDC_NUM_VERS) {
806 				/* no version match - send NACK */
807 				pkt->stype = LDC_NACK;
808 				bzero(pkt->udata, sizeof (ldc_ver_t));
809 				ldcp->next_vidx = 0;
810 				break;
811 			}
812 		}
813 
814 		/* initiate the send by calling into HV and set the new tail */
815 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
816 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
817 
818 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
819 		if (rv == 0) {
820 			ldcp->tx_tail = tx_tail;
821 			if (pkt->stype == LDC_ACK) {
822 				D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent"
823 				    " version ACK\n", ldcp->id);
824 				/* Save the ACK'd version */
825 				ldcp->version.major = rcvd_ver->major;
826 				ldcp->version.minor = rcvd_ver->minor;
827 				ldcp->tstate |= TS_VER_DONE;
828 				DWARN(DBG_ALL_LDCS,
829 				    "(0x%llx) Agreed on version v%u.%u\n",
830 				    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
831 			}
832 		} else {
833 			DWARN(ldcp->id,
834 			    "i_ldc_process_VER: (0x%llx) error sending "
835 			    "ACK/NACK\n", ldcp->id);
836 			i_ldc_reset(ldcp);
837 			return (ECONNRESET);
838 		}
839 
840 		break;
841 
842 	case LDC_ACK:
843 		/* SUCCESS - we have agreed on a version */
844 		ldcp->version.major = rcvd_ver->major;
845 		ldcp->version.minor = rcvd_ver->minor;
846 		ldcp->tstate |= TS_VER_DONE;
847 
848 		D1(DBG_ALL_LDCS, "(0x%llx) Agreed on version v%u.%u\n",
849 		    ldcp->id, rcvd_ver->major, rcvd_ver->minor);
850 
851 		/* initiate RTS-RTR-RDX handshake */
852 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
853 		if (rv) {
854 			DWARN(ldcp->id,
855 			    "i_ldc_process_VER: (0x%llx) cannot send RTS\n",
856 			    ldcp->id);
857 			i_ldc_reset(ldcp);
858 			return (ECONNRESET);
859 		}
860 
861 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
862 		ZERO_PKT(pkt);
863 
864 		pkt->type = LDC_CTRL;
865 		pkt->stype = LDC_INFO;
866 		pkt->ctrl = LDC_RTS;
867 		pkt->env = ldcp->mode;
868 		if (ldcp->mode != LDC_MODE_RAW)
869 			pkt->seqid = LDC_INIT_SEQID;
870 
871 		ldcp->last_msg_rcd = LDC_INIT_SEQID;
872 
873 		DUMP_LDC_PKT(ldcp, "i_ldc_process_VER snd rts", (uint64_t)pkt);
874 
875 		/* initiate the send by calling into HV and set the new tail */
876 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
877 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
878 
879 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
880 		if (rv) {
881 			D2(ldcp->id,
882 			    "i_ldc_process_VER: (0x%llx) no listener\n",
883 			    ldcp->id);
884 			i_ldc_reset(ldcp);
885 			return (ECONNRESET);
886 		}
887 
888 		ldcp->last_msg_snt++;
889 		ldcp->tx_tail = tx_tail;
890 		ldcp->hstate |= TS_SENT_RTS;
891 
892 		break;
893 
894 	case LDC_NACK:
895 		/* check if version in NACK is zero */
896 		if (rcvd_ver->major == 0 && rcvd_ver->minor == 0) {
897 			/* version handshake failure */
898 			DWARN(DBG_ALL_LDCS,
899 			    "i_ldc_process_VER: (0x%llx) no version match\n",
900 			    ldcp->id);
901 			i_ldc_reset(ldcp);
902 			return (ECONNRESET);
903 		}
904 
905 		/* get the current tail and pkt for the response */
906 		rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
907 		if (rv != 0) {
908 			cmn_err(CE_NOTE,
909 			    "i_ldc_process_VER: (0x%lx) err sending "
910 			    "version ACK/NACK\n", ldcp->id);
911 			i_ldc_reset(ldcp);
912 			return (ECONNRESET);
913 		}
914 
915 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
916 		ZERO_PKT(pkt);
917 
918 		/* initialize the packet */
919 		pkt->type = LDC_CTRL;
920 		pkt->ctrl = LDC_VER;
921 		pkt->stype = LDC_INFO;
922 
923 		/* check ver in NACK msg has a match */
924 		for (;;) {
925 			if (rcvd_ver->major == ldc_versions[idx].major) {
926 				/*
927 				 * major version match - resubmit request
928 				 * if lower minor version to the one this endpt
929 				 * supports, if necessary
930 				 */
931 				if (rcvd_ver->minor > ldc_versions[idx].minor)
932 					rcvd_ver->minor =
933 						ldc_versions[idx].minor;
934 				bcopy(rcvd_ver, pkt->udata, sizeof (*rcvd_ver));
935 				break;
936 
937 			}
938 
939 			if (rcvd_ver->major > ldc_versions[idx].major) {
940 
941 				D1(ldcp->id, "i_ldc_process_VER: using next"
942 				    " lower idx=%d, v%u.%u\n", idx,
943 				    ldc_versions[idx].major,
944 				    ldc_versions[idx].minor);
945 
946 				/* send next lower version */
947 				bcopy(&ldc_versions[idx], pkt->udata,
948 				    sizeof (ldc_versions[idx]));
949 				ldcp->next_vidx = idx;
950 				break;
951 			}
952 
953 			/* next version */
954 			idx++;
955 
956 			D1(ldcp->id, "i_ldc_process_VER: inc idx %x\n", idx);
957 
958 			if (idx == LDC_NUM_VERS) {
959 				/* no version match - terminate */
960 				ldcp->next_vidx = 0;
961 				return (ECONNRESET);
962 			}
963 		}
964 
965 		/* initiate the send by calling into HV and set the new tail */
966 		tx_tail = (tx_tail + LDC_PACKET_SIZE) %
967 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
968 
969 		rv = i_ldc_set_tx_tail(ldcp, tx_tail);
970 		if (rv == 0) {
971 			D2(ldcp->id, "i_ldc_process_VER: (0x%llx) sent version"
972 			    "INFO v%u.%u\n", ldcp->id, ldc_versions[idx].major,
973 			    ldc_versions[idx].minor);
974 			ldcp->tx_tail = tx_tail;
975 		} else {
976 			cmn_err(CE_NOTE,
977 			    "i_ldc_process_VER: (0x%lx) error sending version"
978 			    "INFO\n", ldcp->id);
979 			i_ldc_reset(ldcp);
980 			return (ECONNRESET);
981 		}
982 
983 		break;
984 	}
985 
986 	return (rv);
987 }
988 
989 
990 /*
991  * Process an incoming RTS ctrl message
992  */
993 static int
994 i_ldc_process_RTS(ldc_chan_t *ldcp, ldc_msg_t *msg)
995 {
996 	int 		rv = 0;
997 	ldc_msg_t 	*pkt;
998 	uint64_t	tx_tail;
999 	boolean_t	sent_NACK = B_FALSE;
1000 
1001 	D2(ldcp->id, "i_ldc_process_RTS: (0x%llx) received RTS\n", ldcp->id);
1002 
1003 	switch (msg->stype) {
1004 	case LDC_NACK:
1005 		DWARN(ldcp->id,
1006 		    "i_ldc_process_RTS: (0x%llx) RTS NACK received\n",
1007 		    ldcp->id);
1008 
1009 		/* Reset the channel -- as we cannot continue */
1010 		i_ldc_reset(ldcp);
1011 		rv = ECONNRESET;
1012 		break;
1013 
1014 	case LDC_INFO:
1015 
1016 		/* check mode */
1017 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1018 			cmn_err(CE_NOTE,
1019 			    "i_ldc_process_RTS: (0x%lx) mode mismatch\n",
1020 			    ldcp->id);
1021 			/*
1022 			 * send NACK in response to MODE message
1023 			 * get the current tail for the response
1024 			 */
1025 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTS);
1026 			if (rv) {
1027 				/* if cannot send NACK - reset channel */
1028 				i_ldc_reset(ldcp);
1029 				rv = ECONNRESET;
1030 				break;
1031 			}
1032 			sent_NACK = B_TRUE;
1033 		}
1034 		break;
1035 	default:
1036 		DWARN(ldcp->id, "i_ldc_process_RTS: (0x%llx) unexp ACK\n",
1037 		    ldcp->id);
1038 		i_ldc_reset(ldcp);
1039 		rv = ECONNRESET;
1040 		break;
1041 	}
1042 
1043 	/*
1044 	 * If either the connection was reset (when rv != 0) or
1045 	 * a NACK was sent, we return. In the case of a NACK
1046 	 * we dont want to consume the packet that came in but
1047 	 * not record that we received the RTS
1048 	 */
1049 	if (rv || sent_NACK)
1050 		return (rv);
1051 
1052 	/* record RTS received */
1053 	ldcp->hstate |= TS_RCVD_RTS;
1054 
1055 	/* store initial SEQID info */
1056 	ldcp->last_msg_snt = msg->seqid;
1057 
1058 	/* get the current tail for the response */
1059 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
1060 	if (rv != 0) {
1061 		cmn_err(CE_NOTE,
1062 		    "i_ldc_process_RTS: (0x%lx) err sending RTR\n",
1063 		    ldcp->id);
1064 		i_ldc_reset(ldcp);
1065 		return (ECONNRESET);
1066 	}
1067 
1068 	pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
1069 	ZERO_PKT(pkt);
1070 
1071 	/* initialize the packet */
1072 	pkt->type = LDC_CTRL;
1073 	pkt->stype = LDC_INFO;
1074 	pkt->ctrl = LDC_RTR;
1075 	pkt->env = ldcp->mode;
1076 	if (ldcp->mode != LDC_MODE_RAW)
1077 		pkt->seqid = LDC_INIT_SEQID;
1078 
1079 	ldcp->last_msg_rcd = msg->seqid;
1080 
1081 	/* initiate the send by calling into HV and set the new tail */
1082 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
1083 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1084 
1085 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
1086 	if (rv == 0) {
1087 		D2(ldcp->id,
1088 		    "i_ldc_process_RTS: (0x%llx) sent RTR\n", ldcp->id);
1089 		DUMP_LDC_PKT(ldcp, "i_ldc_process_RTS sent rtr", (uint64_t)pkt);
1090 
1091 		ldcp->tx_tail = tx_tail;
1092 		ldcp->hstate |= TS_SENT_RTR;
1093 
1094 	} else {
1095 		cmn_err(CE_NOTE,
1096 		    "i_ldc_process_RTS: (0x%lx) error sending RTR\n",
1097 		    ldcp->id);
1098 		i_ldc_reset(ldcp);
1099 		return (ECONNRESET);
1100 	}
1101 
1102 	return (0);
1103 }
1104 
1105 /*
1106  * Process an incoming RTR ctrl message
1107  */
1108 static int
1109 i_ldc_process_RTR(ldc_chan_t *ldcp, ldc_msg_t *msg)
1110 {
1111 	int 		rv = 0;
1112 	boolean_t	sent_NACK = B_FALSE;
1113 
1114 	D2(ldcp->id, "i_ldc_process_RTR: (0x%llx) received RTR\n", ldcp->id);
1115 
1116 	switch (msg->stype) {
1117 	case LDC_NACK:
1118 		/* RTR NACK received */
1119 		DWARN(ldcp->id,
1120 		    "i_ldc_process_RTR: (0x%llx) RTR NACK received\n",
1121 		    ldcp->id);
1122 
1123 		/* Reset the channel -- as we cannot continue */
1124 		i_ldc_reset(ldcp);
1125 		rv = ECONNRESET;
1126 
1127 		break;
1128 
1129 	case LDC_INFO:
1130 
1131 		/* check mode */
1132 		if (ldcp->mode != (ldc_mode_t)msg->env) {
1133 			DWARN(ldcp->id,
1134 			    "i_ldc_process_RTR: (0x%llx) mode mismatch\n",
1135 			    ldcp->id);
1136 			/*
1137 			 * send NACK in response to MODE message
1138 			 * get the current tail for the response
1139 			 */
1140 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK, LDC_RTR);
1141 			if (rv) {
1142 				/* if cannot send NACK - reset channel */
1143 				i_ldc_reset(ldcp);
1144 				rv = ECONNRESET;
1145 				break;
1146 			}
1147 			sent_NACK = B_TRUE;
1148 		}
1149 		break;
1150 
1151 	default:
1152 		DWARN(ldcp->id, "i_ldc_process_RTR: (0x%llx) unexp ACK\n",
1153 		    ldcp->id);
1154 
1155 		/* Reset the channel -- as we cannot continue */
1156 		i_ldc_reset(ldcp);
1157 		rv = ECONNRESET;
1158 		break;
1159 	}
1160 
1161 	/*
1162 	 * If either the connection was reset (when rv != 0) or
1163 	 * a NACK was sent, we return. In the case of a NACK
1164 	 * we dont want to consume the packet that came in but
1165 	 * not record that we received the RTR
1166 	 */
1167 	if (rv || sent_NACK)
1168 		return (rv);
1169 
1170 	ldcp->last_msg_snt = msg->seqid;
1171 	ldcp->hstate |= TS_RCVD_RTR;
1172 
1173 	rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_INFO, LDC_RDX);
1174 	if (rv) {
1175 		cmn_err(CE_NOTE,
1176 		    "i_ldc_process_RTR: (0x%lx) cannot send RDX\n",
1177 		    ldcp->id);
1178 		i_ldc_reset(ldcp);
1179 		return (ECONNRESET);
1180 	}
1181 	D2(ldcp->id,
1182 	    "i_ldc_process_RTR: (0x%llx) sent RDX\n", ldcp->id);
1183 
1184 	ldcp->hstate |= TS_SENT_RDX;
1185 	ldcp->tstate |= TS_HSHAKE_DONE;
1186 	ldcp->status = LDC_UP;
1187 
1188 	DWARN(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1189 
1190 	return (0);
1191 }
1192 
1193 
1194 /*
1195  * Process an incoming RDX ctrl message
1196  */
1197 static int
1198 i_ldc_process_RDX(ldc_chan_t *ldcp, ldc_msg_t *msg)
1199 {
1200 	int	rv = 0;
1201 
1202 	D2(ldcp->id, "i_ldc_process_RDX: (0x%llx) received RDX\n", ldcp->id);
1203 
1204 	switch (msg->stype) {
1205 	case LDC_NACK:
1206 		/* RDX NACK received */
1207 		DWARN(ldcp->id,
1208 		    "i_ldc_process_RDX: (0x%llx) RDX NACK received\n",
1209 		    ldcp->id);
1210 
1211 		/* Reset the channel -- as we cannot continue */
1212 		i_ldc_reset(ldcp);
1213 		rv = ECONNRESET;
1214 
1215 		break;
1216 
1217 	case LDC_INFO:
1218 
1219 		/*
1220 		 * if channel is UP and a RDX received after data transmission
1221 		 * has commenced it is an error
1222 		 */
1223 		if ((ldcp->tstate == TS_UP) && (ldcp->hstate & TS_RCVD_RDX)) {
1224 			DWARN(DBG_ALL_LDCS,
1225 			    "i_ldc_process_RDX: (0x%llx) unexpected RDX"
1226 			    " - LDC reset\n", ldcp->id);
1227 			i_ldc_reset(ldcp);
1228 			return (ECONNRESET);
1229 		}
1230 
1231 		ldcp->hstate |= TS_RCVD_RDX;
1232 		ldcp->tstate |= TS_HSHAKE_DONE;
1233 		ldcp->status = LDC_UP;
1234 
1235 		D1(DBG_ALL_LDCS, "(0x%llx) Handshake Complete\n", ldcp->id);
1236 		break;
1237 
1238 	default:
1239 		DWARN(ldcp->id, "i_ldc_process_RDX: (0x%llx) unexp ACK\n",
1240 		    ldcp->id);
1241 
1242 		/* Reset the channel -- as we cannot continue */
1243 		i_ldc_reset(ldcp);
1244 		rv = ECONNRESET;
1245 		break;
1246 	}
1247 
1248 	return (rv);
1249 }
1250 
1251 /*
1252  * Process an incoming ACK for a data packet
1253  */
1254 static int
1255 i_ldc_process_data_ACK(ldc_chan_t *ldcp, ldc_msg_t *msg)
1256 {
1257 	int		rv;
1258 	uint64_t 	tx_head;
1259 	ldc_msg_t	*pkt;
1260 
1261 	/*
1262 	 * Read the curret Tx head and tail
1263 	 */
1264 	rv = hv_ldc_tx_get_state(ldcp->id,
1265 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
1266 	if (rv != 0) {
1267 		cmn_err(CE_WARN,
1268 		    "i_ldc_process_data_ACK: (0x%lx) cannot read qptrs\n",
1269 		    ldcp->id);
1270 		return (0);
1271 	}
1272 
1273 	/*
1274 	 * loop from where the previous ACK location was to the
1275 	 * current head location. This is how far the HV has
1276 	 * actually send pkts. Pkts between head and tail are
1277 	 * yet to be sent by HV.
1278 	 */
1279 	tx_head = ldcp->tx_ackd_head;
1280 	for (;;) {
1281 		pkt = (ldc_msg_t *)(ldcp->tx_q_va + tx_head);
1282 		tx_head = (tx_head + LDC_PACKET_SIZE) %
1283 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1284 
1285 		if (pkt->seqid == msg->ackid) {
1286 			D2(ldcp->id,
1287 			    "i_ldc_process_data_ACK: (0x%llx) found packet\n",
1288 			    ldcp->id);
1289 			ldcp->last_ack_rcd = msg->ackid;
1290 			ldcp->tx_ackd_head = tx_head;
1291 			break;
1292 		}
1293 		if (tx_head == ldcp->tx_head) {
1294 			/* could not find packet */
1295 			DWARN(ldcp->id,
1296 			    "i_ldc_process_data_ACK: (0x%llx) invalid ACKid\n",
1297 			    ldcp->id);
1298 			break;
1299 		}
1300 	}
1301 
1302 	return (0);
1303 }
1304 
1305 /*
1306  * Process incoming control message
1307  * Return 0 - session can continue
1308  *        EAGAIN - reprocess packet - state was changed
1309  *	  ECONNRESET - channel was reset
1310  */
1311 static int
1312 i_ldc_ctrlmsg(ldc_chan_t *ldcp, ldc_msg_t *msg)
1313 {
1314 	int 		rv = 0;
1315 
1316 	switch (ldcp->tstate) {
1317 
1318 	case TS_OPEN:
1319 	case TS_READY:
1320 
1321 		switch (msg->ctrl & LDC_CTRL_MASK) {
1322 		case LDC_VER:
1323 			/* process version message */
1324 			rv = i_ldc_process_VER(ldcp, msg);
1325 			break;
1326 		default:
1327 			DWARN(ldcp->id,
1328 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1329 			    "tstate=0x%x\n", ldcp->id,
1330 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1331 			break;
1332 		}
1333 
1334 		break;
1335 
1336 	case TS_VREADY:
1337 
1338 		switch (msg->ctrl & LDC_CTRL_MASK) {
1339 		case LDC_VER:
1340 			/* peer is redoing version negotiation */
1341 			(void) i_ldc_txq_reconf(ldcp);
1342 			i_ldc_reset_state(ldcp);
1343 			rv = EAGAIN;
1344 			break;
1345 		case LDC_RTS:
1346 			/* process RTS message */
1347 			rv = i_ldc_process_RTS(ldcp, msg);
1348 			break;
1349 		case LDC_RTR:
1350 			/* process RTR message */
1351 			rv = i_ldc_process_RTR(ldcp, msg);
1352 			break;
1353 		case LDC_RDX:
1354 			/* process RDX message */
1355 			rv = i_ldc_process_RDX(ldcp, msg);
1356 			break;
1357 		default:
1358 			DWARN(ldcp->id,
1359 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1360 			    "tstate=0x%x\n", ldcp->id,
1361 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1362 			break;
1363 		}
1364 
1365 		break;
1366 
1367 	case TS_UP:
1368 
1369 		switch (msg->ctrl & LDC_CTRL_MASK) {
1370 		case LDC_VER:
1371 			DWARN(ldcp->id,
1372 			    "i_ldc_ctrlmsg: (0x%llx) unexpected VER "
1373 			    "- LDC reset\n", ldcp->id);
1374 			/* peer is redoing version negotiation */
1375 			(void) i_ldc_txq_reconf(ldcp);
1376 			i_ldc_reset_state(ldcp);
1377 			rv = EAGAIN;
1378 			break;
1379 
1380 		case LDC_RDX:
1381 			/* process RDX message */
1382 			rv = i_ldc_process_RDX(ldcp, msg);
1383 			break;
1384 
1385 		default:
1386 			DWARN(ldcp->id,
1387 			    "i_ldc_ctrlmsg: (0x%llx) unexp ctrl 0x%x "
1388 			    "tstate=0x%x\n", ldcp->id,
1389 			    (msg->ctrl & LDC_CTRL_MASK), ldcp->tstate);
1390 			break;
1391 		}
1392 	}
1393 
1394 	return (rv);
1395 }
1396 
1397 /*
1398  * Register channel with the channel nexus
1399  */
1400 static int
1401 i_ldc_register_channel(ldc_chan_t *ldcp)
1402 {
1403 	int		rv = 0;
1404 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1405 
1406 	if (cinfo->dip == NULL) {
1407 		DWARN(ldcp->id,
1408 		    "i_ldc_register_channel: cnex has not registered\n");
1409 		return (EAGAIN);
1410 	}
1411 
1412 	rv = cinfo->reg_chan(cinfo->dip, ldcp->id, ldcp->devclass);
1413 	if (rv) {
1414 		DWARN(ldcp->id,
1415 		    "i_ldc_register_channel: cannot register channel\n");
1416 		return (rv);
1417 	}
1418 
1419 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR,
1420 	    i_ldc_tx_hdlr, ldcp, NULL);
1421 	if (rv) {
1422 		DWARN(ldcp->id,
1423 		    "i_ldc_register_channel: cannot add Tx interrupt\n");
1424 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1425 		return (rv);
1426 	}
1427 
1428 	rv = cinfo->add_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR,
1429 	    i_ldc_rx_hdlr, ldcp, NULL);
1430 	if (rv) {
1431 		DWARN(ldcp->id,
1432 		    "i_ldc_register_channel: cannot add Rx interrupt\n");
1433 		(void) cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1434 		(void) cinfo->unreg_chan(cinfo->dip, ldcp->id);
1435 		return (rv);
1436 	}
1437 
1438 	ldcp->tstate |= TS_CNEX_RDY;
1439 
1440 	return (0);
1441 }
1442 
1443 /*
1444  * Unregister a channel with the channel nexus
1445  */
1446 static int
1447 i_ldc_unregister_channel(ldc_chan_t *ldcp)
1448 {
1449 	int		rv = 0;
1450 	ldc_cnex_t	*cinfo = &ldcssp->cinfo;
1451 
1452 	if (cinfo->dip == NULL) {
1453 		DWARN(ldcp->id,
1454 		    "i_ldc_unregister_channel: cnex has not registered\n");
1455 		return (EAGAIN);
1456 	}
1457 
1458 	if (ldcp->tstate & TS_CNEX_RDY) {
1459 
1460 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_RX_INTR);
1461 		if (rv) {
1462 			DWARN(ldcp->id,
1463 			    "i_ldc_unregister_channel: err removing Rx intr\n");
1464 		}
1465 		rv = cinfo->rem_intr(cinfo->dip, ldcp->id, CNEX_TX_INTR);
1466 		if (rv) {
1467 			DWARN(ldcp->id,
1468 			    "i_ldc_unregister_channel: err removing Tx intr\n");
1469 		}
1470 		rv = cinfo->unreg_chan(ldcssp->cinfo.dip, ldcp->id);
1471 		if (rv) {
1472 			DWARN(ldcp->id,
1473 			    "i_ldc_unregister_channel: cannot unreg channel\n");
1474 		}
1475 
1476 		ldcp->tstate &= ~TS_CNEX_RDY;
1477 	}
1478 
1479 	return (0);
1480 }
1481 
1482 
1483 /*
1484  * LDC transmit interrupt handler
1485  *    triggered for chanel up/down/reset events
1486  *    and Tx queue content changes
1487  */
1488 static uint_t
1489 i_ldc_tx_hdlr(caddr_t arg1, caddr_t arg2)
1490 {
1491 	_NOTE(ARGUNUSED(arg2))
1492 
1493 	int 		rv;
1494 	ldc_chan_t 	*ldcp;
1495 	boolean_t 	notify_client = B_FALSE;
1496 	uint64_t	notify_event = 0;
1497 
1498 	/* Get the channel for which interrupt was received */
1499 	ASSERT(arg1 != NULL);
1500 	ldcp = (ldc_chan_t *)arg1;
1501 
1502 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1503 	    ldcp->id, ldcp);
1504 
1505 	/* Lock channel */
1506 	mutex_enter(&ldcp->lock);
1507 
1508 	rv = hv_ldc_tx_get_state(ldcp->id, &ldcp->tx_head, &ldcp->tx_tail,
1509 	    &ldcp->link_state);
1510 	if (rv) {
1511 		cmn_err(CE_WARN,
1512 		    "i_ldc_tx_hdlr: (0x%lx) cannot read queue ptrs rv=0x%d\n",
1513 		    ldcp->id, rv);
1514 		mutex_exit(&ldcp->lock);
1515 		return (DDI_INTR_CLAIMED);
1516 	}
1517 
1518 	/*
1519 	 * reset the channel state if the channel went down
1520 	 * (other side unconfigured queue) or channel was reset
1521 	 * (other side reconfigured its queue)
1522 	 */
1523 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1524 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link down\n", ldcp->id);
1525 		i_ldc_reset(ldcp);
1526 		notify_client = B_TRUE;
1527 		notify_event = LDC_EVT_DOWN;
1528 	}
1529 
1530 	if (ldcp->link_state == LDC_CHANNEL_RESET) {
1531 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link reset\n", ldcp->id);
1532 		i_ldc_reset(ldcp);
1533 		notify_client = B_TRUE;
1534 		notify_event = LDC_EVT_RESET;
1535 	}
1536 
1537 	if (ldcp->tstate == TS_OPEN && ldcp->link_state == LDC_CHANNEL_UP) {
1538 		D1(ldcp->id, "i_ldc_tx_hdlr: channel link up\n", ldcp->id);
1539 		notify_client = B_TRUE;
1540 		notify_event = LDC_EVT_RESET;
1541 		ldcp->tstate |= TS_LINK_READY;
1542 		ldcp->status = LDC_READY;
1543 	}
1544 
1545 	/* if callbacks are disabled, do not notify */
1546 	if (!ldcp->cb_enabled)
1547 		notify_client = B_FALSE;
1548 
1549 	if (notify_client)
1550 		ldcp->cb_inprogress = B_TRUE;
1551 
1552 	/* Unlock channel */
1553 	mutex_exit(&ldcp->lock);
1554 
1555 	if (notify_client) {
1556 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1557 		if (rv) {
1558 			DWARN(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) callback "
1559 			    "failure", ldcp->id);
1560 		}
1561 		mutex_enter(&ldcp->lock);
1562 		ldcp->cb_inprogress = B_FALSE;
1563 		mutex_exit(&ldcp->lock);
1564 	}
1565 
1566 	mutex_enter(&ldcp->lock);
1567 	i_ldc_clear_intr(ldcp, CNEX_TX_INTR);
1568 	mutex_exit(&ldcp->lock);
1569 
1570 	D1(ldcp->id, "i_ldc_tx_hdlr: (0x%llx) exiting handler", ldcp->id);
1571 
1572 	return (DDI_INTR_CLAIMED);
1573 }
1574 
1575 /*
1576  * LDC receive interrupt handler
1577  *    triggered for channel with data pending to read
1578  *    i.e. Rx queue content changes
1579  */
1580 static uint_t
1581 i_ldc_rx_hdlr(caddr_t arg1, caddr_t arg2)
1582 {
1583 	_NOTE(ARGUNUSED(arg2))
1584 
1585 	int		rv;
1586 	uint64_t 	rx_head, rx_tail;
1587 	ldc_msg_t 	*msg;
1588 	ldc_chan_t 	*ldcp;
1589 	boolean_t 	notify_client = B_FALSE;
1590 	uint64_t	notify_event = 0;
1591 
1592 	/* Get the channel for which interrupt was received */
1593 	if (arg1 == NULL) {
1594 		cmn_err(CE_WARN, "i_ldc_rx_hdlr: invalid arg\n");
1595 		return (DDI_INTR_UNCLAIMED);
1596 	}
1597 
1598 	ldcp = (ldc_chan_t *)arg1;
1599 
1600 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) Received intr, ldcp=0x%p\n",
1601 	    ldcp->id, ldcp);
1602 
1603 	/* Lock channel */
1604 	mutex_enter(&ldcp->lock);
1605 
1606 	/* mark interrupt as pending */
1607 	ldcp->intr_pending = B_TRUE;
1608 
1609 	/*
1610 	 * Read packet(s) from the queue
1611 	 */
1612 	for (;;) {
1613 
1614 		rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
1615 		    &ldcp->link_state);
1616 		if (rv) {
1617 			cmn_err(CE_WARN,
1618 			    "i_ldc_rx_hdlr: (0x%lx) cannot read "
1619 			    "queue ptrs, rv=0x%d\n", ldcp->id, rv);
1620 			i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1621 			mutex_exit(&ldcp->lock);
1622 			return (DDI_INTR_CLAIMED);
1623 		}
1624 
1625 		/*
1626 		 * reset the channel state if the channel went down
1627 		 * (other side unconfigured queue) or channel was reset
1628 		 * (other side reconfigured its queue
1629 		 */
1630 		if (ldcp->link_state == LDC_CHANNEL_DOWN) {
1631 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link down\n",
1632 			    ldcp->id);
1633 			i_ldc_reset(ldcp);
1634 			notify_client = B_TRUE;
1635 			notify_event = LDC_EVT_DOWN;
1636 			break;
1637 		}
1638 		if (ldcp->link_state == LDC_CHANNEL_RESET) {
1639 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link reset\n",
1640 			    ldcp->id);
1641 			i_ldc_reset(ldcp);
1642 			notify_client = B_TRUE;
1643 			notify_event = LDC_EVT_RESET;
1644 		}
1645 
1646 		if (ldcp->tstate == TS_OPEN &&
1647 		    ldcp->link_state == LDC_CHANNEL_UP) {
1648 			D1(ldcp->id, "i_ldc_rx_hdlr: channel link up\n",
1649 			    ldcp->id);
1650 			notify_client = B_TRUE;
1651 			notify_event = LDC_EVT_RESET;
1652 			ldcp->tstate |= TS_LINK_READY;
1653 			ldcp->status = LDC_READY;
1654 		}
1655 
1656 		if (rx_head == rx_tail) {
1657 			D2(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) No packets\n",
1658 			    ldcp->id);
1659 			break;
1660 		}
1661 		D2(ldcp->id, "i_ldc_rx_hdlr: head=0x%llx, tail=0x%llx\n",
1662 		    rx_head, rx_tail);
1663 		DUMP_LDC_PKT(ldcp, "i_ldc_rx_hdlr rcd",
1664 		    ldcp->rx_q_va + rx_head);
1665 
1666 		/* get the message */
1667 		msg = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
1668 
1669 		/* if channel is in RAW mode or data pkt, notify and return */
1670 		if (ldcp->mode == LDC_MODE_RAW) {
1671 			notify_client = B_TRUE;
1672 			notify_event |= LDC_EVT_READ;
1673 			break;
1674 		}
1675 
1676 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
1677 
1678 			/* discard packet if channel is not up */
1679 			if (ldcp->tstate != TS_UP) {
1680 
1681 				/* move the head one position */
1682 				rx_head = (rx_head + LDC_PACKET_SIZE) %
1683 				(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1684 
1685 				if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1686 					break;
1687 
1688 				continue;
1689 			} else {
1690 				notify_client = B_TRUE;
1691 				notify_event |= LDC_EVT_READ;
1692 				break;
1693 			}
1694 		}
1695 
1696 		/* Check the sequence ID for the message received */
1697 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
1698 
1699 			DWARN(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) seqid error, "
1700 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
1701 
1702 			/* Reset last_msg_rcd to start of message */
1703 			if (ldcp->first_fragment != 0) {
1704 				ldcp->last_msg_rcd =
1705 					ldcp->first_fragment - 1;
1706 				ldcp->first_fragment = 0;
1707 			}
1708 			/*
1709 			 * Send a NACK due to seqid mismatch
1710 			 */
1711 			rv = i_ldc_send_pkt(ldcp, LDC_CTRL, LDC_NACK,
1712 			    (msg->ctrl & LDC_CTRL_MASK));
1713 
1714 			if (rv) {
1715 				cmn_err(CE_NOTE,
1716 				    "i_ldc_rx_hdlr: (0x%lx) err sending "
1717 				    "CTRL/NACK msg\n", ldcp->id);
1718 			}
1719 
1720 			/* purge receive queue */
1721 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
1722 			break;
1723 		}
1724 
1725 		/* record the message ID */
1726 		ldcp->last_msg_rcd = msg->seqid;
1727 
1728 		/* process control messages */
1729 		if (msg->type & LDC_CTRL) {
1730 			/* save current internal state */
1731 			uint64_t tstate = ldcp->tstate;
1732 
1733 			rv = i_ldc_ctrlmsg(ldcp, msg);
1734 			if (rv == EAGAIN) {
1735 				/* re-process pkt - state was adjusted */
1736 				continue;
1737 			}
1738 			if (rv == ECONNRESET) {
1739 				notify_client = B_TRUE;
1740 				notify_event = LDC_EVT_RESET;
1741 				break;
1742 			}
1743 
1744 			/*
1745 			 * control message processing was successful
1746 			 * channel transitioned to ready for communication
1747 			 */
1748 			if (rv == 0 && ldcp->tstate == TS_UP &&
1749 			    tstate != ldcp->tstate) {
1750 				notify_client = B_TRUE;
1751 				notify_event = LDC_EVT_UP;
1752 			}
1753 		}
1754 
1755 		/* process data ACKs */
1756 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
1757 			(void) i_ldc_process_data_ACK(ldcp, msg);
1758 		}
1759 
1760 		/* move the head one position */
1761 		rx_head = (rx_head + LDC_PACKET_SIZE) %
1762 			(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1763 		if (rv = i_ldc_set_rx_head(ldcp, rx_head))
1764 			break;
1765 
1766 	} /* for */
1767 
1768 	/* if callbacks are disabled, do not notify */
1769 	if (!ldcp->cb_enabled)
1770 		notify_client = B_FALSE;
1771 
1772 	if (notify_client)
1773 		ldcp->cb_inprogress = B_TRUE;
1774 
1775 	/* Unlock channel */
1776 	mutex_exit(&ldcp->lock);
1777 
1778 	if (notify_client) {
1779 		rv = ldcp->cb(notify_event, ldcp->cb_arg);
1780 		if (rv) {
1781 			DWARN(ldcp->id,
1782 			    "i_ldc_rx_hdlr: (0x%llx) callback failure",
1783 			    ldcp->id);
1784 		}
1785 		mutex_enter(&ldcp->lock);
1786 		ldcp->cb_inprogress = B_FALSE;
1787 		mutex_exit(&ldcp->lock);
1788 	}
1789 
1790 	mutex_enter(&ldcp->lock);
1791 
1792 	/*
1793 	 * If there are data packets in the queue, the ldc_read will
1794 	 * clear interrupts after draining the queue, else clear interrupts
1795 	 */
1796 	if ((notify_event & LDC_EVT_READ) == 0) {
1797 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
1798 	}
1799 
1800 	mutex_exit(&ldcp->lock);
1801 
1802 	D1(ldcp->id, "i_ldc_rx_hdlr: (0x%llx) exiting handler", ldcp->id);
1803 	return (DDI_INTR_CLAIMED);
1804 }
1805 
1806 
1807 /* -------------------------------------------------------------------------- */
1808 
1809 /*
1810  * LDC API functions
1811  */
1812 
1813 /*
1814  * Initialize the channel. Allocate internal structure and memory for
1815  * TX/RX queues, and initialize locks.
1816  */
1817 int
1818 ldc_init(uint64_t id, ldc_attr_t *attr, ldc_handle_t *handle)
1819 {
1820 	ldc_chan_t 	*ldcp;
1821 	int		rv, exit_val;
1822 	uint64_t	ra_base, nentries;
1823 
1824 	exit_val = EINVAL;	/* guarantee an error if exit on failure */
1825 
1826 	if (attr == NULL) {
1827 		DWARN(id, "ldc_init: (0x%llx) invalid attr\n", id);
1828 		return (EINVAL);
1829 	}
1830 	if (handle == NULL) {
1831 		DWARN(id, "ldc_init: (0x%llx) invalid handle\n", id);
1832 		return (EINVAL);
1833 	}
1834 
1835 	/* check if channel is valid */
1836 	rv = hv_ldc_tx_qinfo(id, &ra_base, &nentries);
1837 	if (rv == H_ECHANNEL) {
1838 		DWARN(id, "ldc_init: (0x%llx) invalid channel id\n", id);
1839 		return (EINVAL);
1840 	}
1841 
1842 	/* check if the channel has already been initialized */
1843 	mutex_enter(&ldcssp->lock);
1844 	ldcp = ldcssp->chan_list;
1845 	while (ldcp != NULL) {
1846 		if (ldcp->id == id) {
1847 			DWARN(id, "ldc_init: (0x%llx) already initialized\n",
1848 			    id);
1849 			mutex_exit(&ldcssp->lock);
1850 			return (EADDRINUSE);
1851 		}
1852 		ldcp = ldcp->next;
1853 	}
1854 	mutex_exit(&ldcssp->lock);
1855 
1856 	ASSERT(ldcp == NULL);
1857 
1858 	*handle = 0;
1859 
1860 	/* Allocate an ldcp structure */
1861 	ldcp = kmem_zalloc(sizeof (ldc_chan_t), KM_SLEEP);
1862 
1863 	/* Initialize the channel lock */
1864 	mutex_init(&ldcp->lock, NULL, MUTEX_DRIVER, NULL);
1865 
1866 	/* Channel specific processing */
1867 	mutex_enter(&ldcp->lock);
1868 
1869 	/* Initialize the channel */
1870 	ldcp->id = id;
1871 	ldcp->cb = NULL;
1872 	ldcp->cb_arg = NULL;
1873 	ldcp->cb_inprogress = B_FALSE;
1874 	ldcp->cb_enabled = B_FALSE;
1875 	ldcp->next = NULL;
1876 
1877 	/* Read attributes */
1878 	ldcp->mode = attr->mode;
1879 	ldcp->devclass = attr->devclass;
1880 	ldcp->devinst = attr->instance;
1881 
1882 	ldcp->rx_q_entries =
1883 		(attr->qlen > 0) ? attr->qlen : LDC_QUEUE_ENTRIES;
1884 	ldcp->tx_q_entries = ldcp->rx_q_entries;
1885 
1886 	D1(ldcp->id,
1887 	    "ldc_init: (0x%llx) channel attributes, class=0x%x, "
1888 	    "instance=0x%llx,mode=%d, qlen=%d\n",
1889 	    ldcp->id, ldcp->devclass, ldcp->devinst,
1890 	    ldcp->mode, ldcp->rx_q_entries);
1891 
1892 	ldcp->next_vidx = 0;
1893 	ldcp->tstate = 0;
1894 	ldcp->hstate = 0;
1895 	ldcp->last_msg_snt = LDC_INIT_SEQID;
1896 	ldcp->last_ack_rcd = 0;
1897 	ldcp->last_msg_rcd = 0;
1898 
1899 	ldcp->stream_bufferp = NULL;
1900 	ldcp->exp_dring_list = NULL;
1901 	ldcp->imp_dring_list = NULL;
1902 	ldcp->mhdl_list = NULL;
1903 
1904 	/* Initialize payload size depending on whether channel is reliable */
1905 	switch (ldcp->mode) {
1906 	case LDC_MODE_RAW:
1907 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RAW;
1908 		ldcp->read_p = i_ldc_read_raw;
1909 		ldcp->write_p = i_ldc_write_raw;
1910 		ldcp->mtu = 0;
1911 		break;
1912 	case LDC_MODE_UNRELIABLE:
1913 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_UNRELIABLE;
1914 		ldcp->read_p = i_ldc_read_packet;
1915 		ldcp->write_p = i_ldc_write_packet;
1916 		ldcp->mtu = 0;
1917 		break;
1918 	case LDC_MODE_RELIABLE:
1919 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
1920 		ldcp->read_p = i_ldc_read_packet;
1921 		ldcp->write_p = i_ldc_write_packet;
1922 		ldcp->mtu = 0;
1923 		break;
1924 	case LDC_MODE_STREAM:
1925 		ldcp->pkt_payload = LDC_PAYLOAD_SIZE_RELIABLE;
1926 
1927 		ldcp->stream_remains = 0;
1928 		ldcp->stream_offset = 0;
1929 		ldcp->mtu = LDC_STREAM_MTU;
1930 		ldcp->stream_bufferp = kmem_alloc(ldcp->mtu, KM_SLEEP);
1931 		ldcp->read_p = i_ldc_read_stream;
1932 		ldcp->write_p = i_ldc_write_stream;
1933 		break;
1934 	default:
1935 		exit_val = EINVAL;
1936 		goto cleanup_on_exit;
1937 	}
1938 
1939 	/* Create a transmit queue */
1940 	ldcp->tx_q_va = (uint64_t)
1941 		contig_mem_alloc(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
1942 	if (ldcp->tx_q_va == NULL) {
1943 		cmn_err(CE_WARN,
1944 		    "ldc_init: (0x%lx) TX queue allocation failed\n",
1945 		    ldcp->id);
1946 		exit_val = ENOMEM;
1947 		goto cleanup_on_exit;
1948 	}
1949 	ldcp->tx_q_ra = va_to_pa((caddr_t)ldcp->tx_q_va);
1950 
1951 	D2(ldcp->id, "ldc_init: txq_va=0x%llx, txq_ra=0x%llx, entries=0x%llx\n",
1952 	    ldcp->tx_q_va, ldcp->tx_q_ra, ldcp->tx_q_entries);
1953 
1954 	ldcp->tstate |= TS_TXQ_RDY;
1955 
1956 	/* Create a receive queue */
1957 	ldcp->rx_q_va = (uint64_t)
1958 		contig_mem_alloc(ldcp->rx_q_entries << LDC_PACKET_SHIFT);
1959 	if (ldcp->rx_q_va == NULL) {
1960 		cmn_err(CE_WARN,
1961 		    "ldc_init: (0x%lx) RX queue allocation failed\n",
1962 		    ldcp->id);
1963 		exit_val = ENOMEM;
1964 		goto cleanup_on_exit;
1965 	}
1966 	ldcp->rx_q_ra = va_to_pa((caddr_t)ldcp->rx_q_va);
1967 
1968 	D2(ldcp->id, "ldc_init: rxq_va=0x%llx, rxq_ra=0x%llx, entries=0x%llx\n",
1969 	    ldcp->rx_q_va, ldcp->rx_q_ra, ldcp->rx_q_entries);
1970 
1971 	ldcp->tstate |= TS_RXQ_RDY;
1972 
1973 	/* Init descriptor ring and memory handle list lock */
1974 	mutex_init(&ldcp->exp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
1975 	mutex_init(&ldcp->imp_dlist_lock, NULL, MUTEX_DRIVER, NULL);
1976 	mutex_init(&ldcp->mlist_lock, NULL, MUTEX_DRIVER, NULL);
1977 
1978 	/* mark status as INITialized */
1979 	ldcp->status = LDC_INIT;
1980 
1981 	mutex_exit(&ldcp->lock);
1982 
1983 	/* Add to channel list */
1984 	mutex_enter(&ldcssp->lock);
1985 	ldcp->next = ldcssp->chan_list;
1986 	ldcssp->chan_list = ldcp;
1987 	ldcssp->channel_count++;
1988 	mutex_exit(&ldcssp->lock);
1989 
1990 	/* set the handle */
1991 	*handle = (ldc_handle_t)ldcp;
1992 
1993 	D1(ldcp->id, "ldc_init: (0x%llx) channel initialized\n", ldcp->id);
1994 
1995 	return (0);
1996 
1997 cleanup_on_exit:
1998 
1999 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2000 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2001 
2002 	if (ldcp->tstate & TS_TXQ_RDY)
2003 		contig_mem_free((caddr_t)ldcp->tx_q_va,
2004 		    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2005 
2006 	if (ldcp->tstate & TS_RXQ_RDY)
2007 		contig_mem_free((caddr_t)ldcp->rx_q_va,
2008 		    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2009 
2010 	mutex_exit(&ldcp->lock);
2011 	mutex_destroy(&ldcp->lock);
2012 
2013 	if (ldcp)
2014 		kmem_free(ldcp, sizeof (ldc_chan_t));
2015 
2016 	return (exit_val);
2017 }
2018 
2019 /*
2020  * Finalizes the LDC connection. It will return EBUSY if the
2021  * channel is open. A ldc_close() has to be done prior to
2022  * a ldc_fini operation. It frees TX/RX queues, associated
2023  * with the channel
2024  */
2025 int
2026 ldc_fini(ldc_handle_t handle)
2027 {
2028 	ldc_chan_t 	*ldcp;
2029 	ldc_chan_t 	*tmp_ldcp;
2030 	uint64_t 	id;
2031 
2032 	if (handle == NULL) {
2033 		DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel handle\n");
2034 		return (EINVAL);
2035 	}
2036 	ldcp = (ldc_chan_t *)handle;
2037 	id = ldcp->id;
2038 
2039 	mutex_enter(&ldcp->lock);
2040 
2041 	if (ldcp->tstate > TS_INIT) {
2042 		DWARN(ldcp->id, "ldc_fini: (0x%llx) channel is open\n",
2043 		    ldcp->id);
2044 		mutex_exit(&ldcp->lock);
2045 		return (EBUSY);
2046 	}
2047 
2048 	/* Remove from the channel list */
2049 	mutex_enter(&ldcssp->lock);
2050 	tmp_ldcp = ldcssp->chan_list;
2051 	if (tmp_ldcp == ldcp) {
2052 		ldcssp->chan_list = ldcp->next;
2053 		ldcp->next = NULL;
2054 	} else {
2055 		while (tmp_ldcp != NULL) {
2056 			if (tmp_ldcp->next == ldcp) {
2057 				tmp_ldcp->next = ldcp->next;
2058 				ldcp->next = NULL;
2059 				break;
2060 			}
2061 			tmp_ldcp = tmp_ldcp->next;
2062 		}
2063 		if (tmp_ldcp == NULL) {
2064 			DWARN(DBG_ALL_LDCS, "ldc_fini: invalid channel hdl\n");
2065 			mutex_exit(&ldcssp->lock);
2066 			mutex_exit(&ldcp->lock);
2067 			return (EINVAL);
2068 		}
2069 	}
2070 
2071 	ldcssp->channel_count--;
2072 
2073 	mutex_exit(&ldcssp->lock);
2074 
2075 	/* Free the map table for this channel */
2076 	if (ldcp->mtbl) {
2077 		(void) hv_ldc_set_map_table(ldcp->id, NULL, NULL);
2078 		contig_mem_free(ldcp->mtbl->table, ldcp->mtbl->size);
2079 		mutex_destroy(&ldcp->mtbl->lock);
2080 		kmem_free(ldcp->mtbl, sizeof (ldc_mtbl_t));
2081 	}
2082 
2083 	/* Destroy descriptor ring and memory handle list lock */
2084 	mutex_destroy(&ldcp->exp_dlist_lock);
2085 	mutex_destroy(&ldcp->imp_dlist_lock);
2086 	mutex_destroy(&ldcp->mlist_lock);
2087 
2088 	/* Free the stream buffer for STREAM_MODE */
2089 	if (ldcp->mode == LDC_MODE_STREAM && ldcp->stream_bufferp)
2090 		kmem_free(ldcp->stream_bufferp, ldcp->mtu);
2091 
2092 	/* Free the RX queue */
2093 	contig_mem_free((caddr_t)ldcp->rx_q_va,
2094 	    (ldcp->rx_q_entries << LDC_PACKET_SHIFT));
2095 	ldcp->tstate &= ~TS_RXQ_RDY;
2096 
2097 	/* Free the TX queue */
2098 	contig_mem_free((caddr_t)ldcp->tx_q_va,
2099 	    (ldcp->tx_q_entries << LDC_PACKET_SHIFT));
2100 	ldcp->tstate &= ~TS_TXQ_RDY;
2101 
2102 
2103 	mutex_exit(&ldcp->lock);
2104 
2105 	/* Destroy mutex */
2106 	mutex_destroy(&ldcp->lock);
2107 
2108 	/* free channel structure */
2109 	kmem_free(ldcp, sizeof (ldc_chan_t));
2110 
2111 	D1(id, "ldc_fini: (0x%llx) channel finalized\n", id);
2112 
2113 	return (0);
2114 }
2115 
2116 /*
2117  * Open the LDC channel for use. It registers the TX/RX queues
2118  * with the Hypervisor. It also specifies the interrupt number
2119  * and target CPU for this channel
2120  */
2121 int
2122 ldc_open(ldc_handle_t handle)
2123 {
2124 	ldc_chan_t 	*ldcp;
2125 	int 		rv;
2126 
2127 	if (handle == NULL) {
2128 		DWARN(DBG_ALL_LDCS, "ldc_open: invalid channel handle\n");
2129 		return (EINVAL);
2130 	}
2131 
2132 	ldcp = (ldc_chan_t *)handle;
2133 
2134 	mutex_enter(&ldcp->lock);
2135 
2136 	if (ldcp->tstate < TS_INIT) {
2137 		DWARN(ldcp->id,
2138 		    "ldc_open: (0x%llx) channel not initialized\n", ldcp->id);
2139 		mutex_exit(&ldcp->lock);
2140 		return (EFAULT);
2141 	}
2142 	if (ldcp->tstate >= TS_OPEN) {
2143 		DWARN(ldcp->id,
2144 		    "ldc_open: (0x%llx) channel is already open\n", ldcp->id);
2145 		mutex_exit(&ldcp->lock);
2146 		return (EFAULT);
2147 	}
2148 
2149 	/*
2150 	 * Unregister/Register the tx queue with the hypervisor
2151 	 */
2152 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2153 	if (rv) {
2154 		cmn_err(CE_WARN,
2155 		    "ldc_open: (0x%lx) channel tx queue unconf failed\n",
2156 		    ldcp->id);
2157 		mutex_exit(&ldcp->lock);
2158 		return (EIO);
2159 	}
2160 
2161 	rv = hv_ldc_tx_qconf(ldcp->id, ldcp->tx_q_ra, ldcp->tx_q_entries);
2162 	if (rv) {
2163 		cmn_err(CE_WARN,
2164 		    "ldc_open: (0x%lx) channel tx queue conf failed\n",
2165 		    ldcp->id);
2166 		mutex_exit(&ldcp->lock);
2167 		return (EIO);
2168 	}
2169 
2170 	D2(ldcp->id, "ldc_open: (0x%llx) registered tx queue with LDC\n",
2171 	    ldcp->id);
2172 
2173 	/*
2174 	 * Unregister/Register the rx queue with the hypervisor
2175 	 */
2176 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2177 	if (rv) {
2178 		cmn_err(CE_WARN,
2179 		    "ldc_open: (0x%lx) channel rx queue unconf failed\n",
2180 		    ldcp->id);
2181 		mutex_exit(&ldcp->lock);
2182 		return (EIO);
2183 	}
2184 
2185 	rv = hv_ldc_rx_qconf(ldcp->id, ldcp->rx_q_ra, ldcp->rx_q_entries);
2186 	if (rv) {
2187 		cmn_err(CE_WARN,
2188 		    "ldc_open: (0x%lx) channel rx queue conf failed\n",
2189 		    ldcp->id);
2190 		mutex_exit(&ldcp->lock);
2191 		return (EIO);
2192 	}
2193 
2194 	D2(ldcp->id, "ldc_open: (0x%llx) registered rx queue with LDC\n",
2195 	    ldcp->id);
2196 
2197 	ldcp->tstate |= TS_QCONF_RDY;
2198 
2199 	/* Register the channel with the channel nexus */
2200 	rv = i_ldc_register_channel(ldcp);
2201 	if (rv && rv != EAGAIN) {
2202 		cmn_err(CE_WARN,
2203 		    "ldc_open: (0x%lx) channel register failed\n", ldcp->id);
2204 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2205 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2206 		mutex_exit(&ldcp->lock);
2207 		return (EIO);
2208 	}
2209 
2210 	/* mark channel in OPEN state */
2211 	ldcp->status = LDC_OPEN;
2212 
2213 	/* Read channel state */
2214 	rv = hv_ldc_tx_get_state(ldcp->id,
2215 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2216 	if (rv) {
2217 		cmn_err(CE_WARN,
2218 		    "ldc_open: (0x%lx) cannot read channel state\n",
2219 		    ldcp->id);
2220 		(void) i_ldc_unregister_channel(ldcp);
2221 		(void) hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2222 		(void) hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2223 		mutex_exit(&ldcp->lock);
2224 		return (EIO);
2225 	}
2226 
2227 	/*
2228 	 * set the ACKd head to current head location for reliable &
2229 	 * streaming mode
2230 	 */
2231 	ldcp->tx_ackd_head = ldcp->tx_head;
2232 
2233 	/* mark channel ready if HV report link is UP (peer alloc'd Rx queue) */
2234 	if (ldcp->link_state == LDC_CHANNEL_UP ||
2235 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2236 		ldcp->tstate |= TS_LINK_READY;
2237 		ldcp->status = LDC_READY;
2238 	}
2239 
2240 	/*
2241 	 * if channel is being opened in RAW mode - no handshake is needed
2242 	 * switch the channel READY and UP state
2243 	 */
2244 	if (ldcp->mode == LDC_MODE_RAW) {
2245 		ldcp->tstate = TS_UP;	/* set bits associated with LDC UP */
2246 		ldcp->status = LDC_UP;
2247 	}
2248 
2249 	mutex_exit(&ldcp->lock);
2250 
2251 	/*
2252 	 * Increment number of open channels
2253 	 */
2254 	mutex_enter(&ldcssp->lock);
2255 	ldcssp->channels_open++;
2256 	mutex_exit(&ldcssp->lock);
2257 
2258 	D1(ldcp->id,
2259 	    "ldc_open: (0x%llx) channel (0x%p) open for use (tstate=0x%x)\n",
2260 	    ldcp->id, ldcp, ldcp->tstate);
2261 
2262 	return (0);
2263 }
2264 
2265 /*
2266  * Close the LDC connection. It will return EBUSY if there
2267  * are memory segments or descriptor rings either bound to or
2268  * mapped over the channel
2269  */
2270 int
2271 ldc_close(ldc_handle_t handle)
2272 {
2273 	ldc_chan_t 	*ldcp;
2274 	int		rv = 0;
2275 	boolean_t	chk_done = B_FALSE;
2276 
2277 	if (handle == NULL) {
2278 		DWARN(DBG_ALL_LDCS, "ldc_close: invalid channel handle\n");
2279 		return (EINVAL);
2280 	}
2281 	ldcp = (ldc_chan_t *)handle;
2282 
2283 	mutex_enter(&ldcp->lock);
2284 
2285 	/* return error if channel is not open */
2286 	if (ldcp->tstate < TS_OPEN) {
2287 		DWARN(ldcp->id,
2288 		    "ldc_close: (0x%llx) channel is not open\n", ldcp->id);
2289 		mutex_exit(&ldcp->lock);
2290 		return (EFAULT);
2291 	}
2292 
2293 	/* if any memory handles, drings, are bound or mapped cannot close */
2294 	if (ldcp->mhdl_list != NULL) {
2295 		DWARN(ldcp->id,
2296 		    "ldc_close: (0x%llx) channel has bound memory handles\n",
2297 		    ldcp->id);
2298 		mutex_exit(&ldcp->lock);
2299 		return (EBUSY);
2300 	}
2301 	if (ldcp->exp_dring_list != NULL) {
2302 		DWARN(ldcp->id,
2303 		    "ldc_close: (0x%llx) channel has bound descriptor rings\n",
2304 		    ldcp->id);
2305 		mutex_exit(&ldcp->lock);
2306 		return (EBUSY);
2307 	}
2308 	if (ldcp->imp_dring_list != NULL) {
2309 		DWARN(ldcp->id,
2310 		    "ldc_close: (0x%llx) channel has mapped descriptor rings\n",
2311 		    ldcp->id);
2312 		mutex_exit(&ldcp->lock);
2313 		return (EBUSY);
2314 	}
2315 
2316 	/*
2317 	 * Wait for pending transmits to complete i.e Tx queue to drain
2318 	 * if there are pending pkts - wait 1 ms and retry again
2319 	 */
2320 	for (;;) {
2321 
2322 		rv = hv_ldc_tx_get_state(ldcp->id,
2323 		    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
2324 		if (rv) {
2325 			cmn_err(CE_WARN,
2326 			    "ldc_close: (0x%lx) cannot read qptrs\n", ldcp->id);
2327 			mutex_exit(&ldcp->lock);
2328 			return (EIO);
2329 		}
2330 
2331 		if (ldcp->tx_head == ldcp->tx_tail ||
2332 		    ldcp->link_state != LDC_CHANNEL_UP) {
2333 			break;
2334 		}
2335 
2336 		if (chk_done) {
2337 			DWARN(ldcp->id,
2338 			    "ldc_close: (0x%llx) Tx queue drain timeout\n",
2339 			    ldcp->id);
2340 			break;
2341 		}
2342 
2343 		/* wait for one ms and try again */
2344 		delay(drv_usectohz(1000));
2345 		chk_done = B_TRUE;
2346 	}
2347 
2348 	/*
2349 	 * Unregister the channel with the nexus
2350 	 */
2351 	rv = i_ldc_unregister_channel(ldcp);
2352 	if (rv && rv != EAGAIN) {
2353 		cmn_err(CE_WARN,
2354 		    "ldc_close: (0x%lx) channel unregister failed\n",
2355 		    ldcp->id);
2356 		mutex_exit(&ldcp->lock);
2357 		return (rv);
2358 	}
2359 
2360 	/*
2361 	 * Unregister queues
2362 	 */
2363 	rv = hv_ldc_tx_qconf(ldcp->id, NULL, NULL);
2364 	if (rv) {
2365 		cmn_err(CE_WARN,
2366 		    "ldc_close: (0x%lx) channel TX queue unconf failed\n",
2367 		    ldcp->id);
2368 		mutex_exit(&ldcp->lock);
2369 		return (EIO);
2370 	}
2371 	rv = hv_ldc_rx_qconf(ldcp->id, NULL, NULL);
2372 	if (rv) {
2373 		cmn_err(CE_WARN,
2374 		    "ldc_close: (0x%lx) channel RX queue unconf failed\n",
2375 		    ldcp->id);
2376 		mutex_exit(&ldcp->lock);
2377 		return (EIO);
2378 	}
2379 
2380 	ldcp->tstate &= ~TS_QCONF_RDY;
2381 
2382 	/* Reset channel state information */
2383 	i_ldc_reset_state(ldcp);
2384 
2385 	/* Mark channel as down and in initialized state */
2386 	ldcp->tx_ackd_head = 0;
2387 	ldcp->tx_head = 0;
2388 	ldcp->tstate = TS_INIT;
2389 	ldcp->status = LDC_INIT;
2390 
2391 	mutex_exit(&ldcp->lock);
2392 
2393 	/* Decrement number of open channels */
2394 	mutex_enter(&ldcssp->lock);
2395 	ldcssp->channels_open--;
2396 	mutex_exit(&ldcssp->lock);
2397 
2398 	D1(ldcp->id, "ldc_close: (0x%llx) channel closed\n", ldcp->id);
2399 
2400 	return (0);
2401 }
2402 
2403 /*
2404  * Register channel callback
2405  */
2406 int
2407 ldc_reg_callback(ldc_handle_t handle,
2408     uint_t(*cb)(uint64_t event, caddr_t arg), caddr_t arg)
2409 {
2410 	ldc_chan_t *ldcp;
2411 
2412 	if (handle == NULL) {
2413 		DWARN(DBG_ALL_LDCS,
2414 		    "ldc_reg_callback: invalid channel handle\n");
2415 		return (EINVAL);
2416 	}
2417 	if (((uint64_t)cb) < KERNELBASE) {
2418 		DWARN(DBG_ALL_LDCS, "ldc_reg_callback: invalid callback\n");
2419 		return (EINVAL);
2420 	}
2421 	ldcp = (ldc_chan_t *)handle;
2422 
2423 	mutex_enter(&ldcp->lock);
2424 
2425 	if (ldcp->cb) {
2426 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback exists\n",
2427 		    ldcp->id);
2428 		mutex_exit(&ldcp->lock);
2429 		return (EIO);
2430 	}
2431 	if (ldcp->cb_inprogress) {
2432 		DWARN(ldcp->id, "ldc_reg_callback: (0x%llx) callback active\n",
2433 		    ldcp->id);
2434 		mutex_exit(&ldcp->lock);
2435 		return (EWOULDBLOCK);
2436 	}
2437 
2438 	ldcp->cb = cb;
2439 	ldcp->cb_arg = arg;
2440 	ldcp->cb_enabled = B_TRUE;
2441 
2442 	D1(ldcp->id,
2443 	    "ldc_reg_callback: (0x%llx) registered callback for channel\n",
2444 	    ldcp->id);
2445 
2446 	mutex_exit(&ldcp->lock);
2447 
2448 	return (0);
2449 }
2450 
2451 /*
2452  * Unregister channel callback
2453  */
2454 int
2455 ldc_unreg_callback(ldc_handle_t handle)
2456 {
2457 	ldc_chan_t *ldcp;
2458 
2459 	if (handle == NULL) {
2460 		DWARN(DBG_ALL_LDCS,
2461 		    "ldc_unreg_callback: invalid channel handle\n");
2462 		return (EINVAL);
2463 	}
2464 	ldcp = (ldc_chan_t *)handle;
2465 
2466 	mutex_enter(&ldcp->lock);
2467 
2468 	if (ldcp->cb == NULL) {
2469 		DWARN(ldcp->id,
2470 		    "ldc_unreg_callback: (0x%llx) no callback exists\n",
2471 		    ldcp->id);
2472 		mutex_exit(&ldcp->lock);
2473 		return (EIO);
2474 	}
2475 	if (ldcp->cb_inprogress) {
2476 		DWARN(ldcp->id,
2477 		    "ldc_unreg_callback: (0x%llx) callback active\n",
2478 		    ldcp->id);
2479 		mutex_exit(&ldcp->lock);
2480 		return (EWOULDBLOCK);
2481 	}
2482 
2483 	ldcp->cb = NULL;
2484 	ldcp->cb_arg = NULL;
2485 	ldcp->cb_enabled = B_FALSE;
2486 
2487 	D1(ldcp->id,
2488 	    "ldc_unreg_callback: (0x%llx) unregistered callback for channel\n",
2489 	    ldcp->id);
2490 
2491 	mutex_exit(&ldcp->lock);
2492 
2493 	return (0);
2494 }
2495 
2496 
2497 /*
2498  * Bring a channel up by initiating a handshake with the peer
2499  * This call is asynchronous. It will complete at a later point
2500  * in time when the peer responds back with an RTR.
2501  */
2502 int
2503 ldc_up(ldc_handle_t handle)
2504 {
2505 	int 		rv;
2506 	ldc_chan_t 	*ldcp;
2507 	ldc_msg_t 	*ldcmsg;
2508 	uint64_t 	tx_tail;
2509 
2510 	if (handle == NULL) {
2511 		DWARN(DBG_ALL_LDCS, "ldc_up: invalid channel handle\n");
2512 		return (EINVAL);
2513 	}
2514 	ldcp = (ldc_chan_t *)handle;
2515 
2516 	mutex_enter(&ldcp->lock);
2517 
2518 	if (ldcp->tstate == TS_UP) {
2519 		D2(ldcp->id,
2520 		    "ldc_up: (0x%llx) channel is already in UP state\n",
2521 		    ldcp->id);
2522 		mutex_exit(&ldcp->lock);
2523 		return (0);
2524 	}
2525 
2526 	/* if the channel is in RAW mode - mark it as UP, if READY */
2527 	if (ldcp->mode == LDC_MODE_RAW && ldcp->tstate >= TS_READY) {
2528 		ldcp->tstate = TS_UP;
2529 		mutex_exit(&ldcp->lock);
2530 		return (0);
2531 	}
2532 
2533 	/* Don't start another handshake if there is one in progress */
2534 	if (ldcp->hstate) {
2535 		D2(ldcp->id,
2536 		    "ldc_up: (0x%llx) channel handshake in progress\n",
2537 		    ldcp->id);
2538 		mutex_exit(&ldcp->lock);
2539 		return (0);
2540 	}
2541 
2542 	/* get the current tail for the LDC msg */
2543 	rv = i_ldc_get_tx_tail(ldcp, &tx_tail);
2544 	if (rv) {
2545 		DWARN(ldcp->id, "ldc_up: (0x%llx) cannot initiate handshake\n",
2546 		    ldcp->id);
2547 		mutex_exit(&ldcp->lock);
2548 		return (ECONNREFUSED);
2549 	}
2550 
2551 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
2552 	ZERO_PKT(ldcmsg);
2553 
2554 	ldcmsg->type = LDC_CTRL;
2555 	ldcmsg->stype = LDC_INFO;
2556 	ldcmsg->ctrl = LDC_VER;
2557 	ldcp->next_vidx = 0;
2558 	bcopy(&ldc_versions[0], ldcmsg->udata, sizeof (ldc_versions[0]));
2559 
2560 	DUMP_LDC_PKT(ldcp, "ldc_up snd ver", (uint64_t)ldcmsg);
2561 
2562 	/* initiate the send by calling into HV and set the new tail */
2563 	tx_tail = (tx_tail + LDC_PACKET_SIZE) %
2564 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
2565 
2566 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
2567 	if (rv) {
2568 		DWARN(ldcp->id,
2569 		    "ldc_up: (0x%llx) cannot initiate handshake rv=%d\n",
2570 		    ldcp->id, rv);
2571 		mutex_exit(&ldcp->lock);
2572 		return (rv);
2573 	}
2574 
2575 	ldcp->tx_tail = tx_tail;
2576 	D1(ldcp->id, "ldc_up: (0x%llx) channel up initiated\n", ldcp->id);
2577 
2578 	mutex_exit(&ldcp->lock);
2579 
2580 	return (rv);
2581 }
2582 
2583 
2584 /*
2585  * Reset a channel by re-registering the Rx queues
2586  */
2587 int
2588 ldc_reset(ldc_handle_t handle)
2589 {
2590 	ldc_chan_t 	*ldcp;
2591 
2592 	if (handle == NULL) {
2593 		DWARN(DBG_ALL_LDCS, "ldc_reset: invalid channel handle\n");
2594 		return (EINVAL);
2595 	}
2596 	ldcp = (ldc_chan_t *)handle;
2597 
2598 	mutex_enter(&ldcp->lock);
2599 	i_ldc_reset(ldcp);
2600 	mutex_exit(&ldcp->lock);
2601 
2602 	return (0);
2603 }
2604 
2605 /*
2606  * Get the current channel status
2607  */
2608 int
2609 ldc_status(ldc_handle_t handle, ldc_status_t *status)
2610 {
2611 	ldc_chan_t *ldcp;
2612 
2613 	if (handle == NULL || status == NULL) {
2614 		DWARN(DBG_ALL_LDCS, "ldc_status: invalid argument\n");
2615 		return (EINVAL);
2616 	}
2617 	ldcp = (ldc_chan_t *)handle;
2618 
2619 	*status = ((ldc_chan_t *)handle)->status;
2620 
2621 	D1(ldcp->id,
2622 	    "ldc_status: (0x%llx) returned status %d\n", ldcp->id, *status);
2623 	return (0);
2624 }
2625 
2626 
2627 /*
2628  * Set the channel's callback mode - enable/disable callbacks
2629  */
2630 int
2631 ldc_set_cb_mode(ldc_handle_t handle, ldc_cb_mode_t cmode)
2632 {
2633 	ldc_chan_t 	*ldcp;
2634 
2635 	if (handle == NULL) {
2636 		DWARN(DBG_ALL_LDCS,
2637 		    "ldc_set_intr_mode: invalid channel handle\n");
2638 		return (EINVAL);
2639 	}
2640 	ldcp = (ldc_chan_t *)handle;
2641 
2642 	/*
2643 	 * Record no callbacks should be invoked
2644 	 */
2645 	mutex_enter(&ldcp->lock);
2646 
2647 	switch (cmode) {
2648 	case LDC_CB_DISABLE:
2649 		if (!ldcp->cb_enabled) {
2650 			DWARN(ldcp->id,
2651 			    "ldc_set_cb_mode: (0x%llx) callbacks disabled\n",
2652 			    ldcp->id);
2653 			break;
2654 		}
2655 		ldcp->cb_enabled = B_FALSE;
2656 
2657 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) disabled callbacks\n",
2658 		    ldcp->id);
2659 		break;
2660 
2661 	case LDC_CB_ENABLE:
2662 		if (ldcp->cb_enabled) {
2663 			DWARN(ldcp->id,
2664 			    "ldc_set_cb_mode: (0x%llx) callbacks enabled\n",
2665 			    ldcp->id);
2666 			break;
2667 		}
2668 		ldcp->cb_enabled = B_TRUE;
2669 
2670 		D1(ldcp->id, "ldc_set_cb_mode: (0x%llx) enabled callbacks\n",
2671 		    ldcp->id);
2672 		break;
2673 	}
2674 
2675 	mutex_exit(&ldcp->lock);
2676 
2677 	return (0);
2678 }
2679 
2680 /*
2681  * Check to see if there are packets on the incoming queue
2682  * Will return isempty = B_FALSE if there are  packets
2683  */
2684 int
2685 ldc_chkq(ldc_handle_t handle, boolean_t *isempty)
2686 {
2687 	int 		rv;
2688 	uint64_t 	rx_head, rx_tail;
2689 	ldc_chan_t 	*ldcp;
2690 
2691 	if (handle == NULL) {
2692 		DWARN(DBG_ALL_LDCS, "ldc_chkq: invalid channel handle\n");
2693 		return (EINVAL);
2694 	}
2695 	ldcp = (ldc_chan_t *)handle;
2696 
2697 	*isempty = B_TRUE;
2698 
2699 	mutex_enter(&ldcp->lock);
2700 
2701 	if (ldcp->tstate != TS_UP) {
2702 		D1(ldcp->id,
2703 		    "ldc_chkq: (0x%llx) channel is not up\n", ldcp->id);
2704 		mutex_exit(&ldcp->lock);
2705 		return (ECONNRESET);
2706 	}
2707 
2708 	/* Read packet(s) from the queue */
2709 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2710 	    &ldcp->link_state);
2711 	if (rv != 0) {
2712 		cmn_err(CE_WARN,
2713 		    "ldc_chkq: (0x%lx) unable to read queue ptrs", ldcp->id);
2714 		mutex_exit(&ldcp->lock);
2715 		return (EIO);
2716 	}
2717 	/* reset the channel state if the channel went down */
2718 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
2719 	    ldcp->link_state == LDC_CHANNEL_RESET) {
2720 		i_ldc_reset(ldcp);
2721 		mutex_exit(&ldcp->lock);
2722 		return (ECONNRESET);
2723 	}
2724 
2725 	if (rx_head != rx_tail) {
2726 		D1(ldcp->id, "ldc_chkq: (0x%llx) queue has pkt(s)\n", ldcp->id);
2727 		*isempty = B_FALSE;
2728 	}
2729 
2730 	mutex_exit(&ldcp->lock);
2731 
2732 	return (0);
2733 }
2734 
2735 
2736 /*
2737  * Read 'size' amount of bytes or less. If incoming buffer
2738  * is more than 'size', ENOBUFS is returned.
2739  *
2740  * On return, size contains the number of bytes read.
2741  */
2742 int
2743 ldc_read(ldc_handle_t handle, caddr_t bufp, size_t *sizep)
2744 {
2745 	ldc_chan_t 	*ldcp;
2746 	uint64_t 	rx_head = 0, rx_tail = 0;
2747 	int		rv = 0, exit_val;
2748 
2749 	if (handle == NULL) {
2750 		DWARN(DBG_ALL_LDCS, "ldc_read: invalid channel handle\n");
2751 		return (EINVAL);
2752 	}
2753 
2754 	ldcp = (ldc_chan_t *)handle;
2755 
2756 	/* channel lock */
2757 	mutex_enter(&ldcp->lock);
2758 
2759 	if (ldcp->tstate != TS_UP) {
2760 		DWARN(ldcp->id,
2761 		    "ldc_read: (0x%llx) channel is not in UP state\n",
2762 		    ldcp->id);
2763 		exit_val = ECONNRESET;
2764 	} else {
2765 		exit_val = ldcp->read_p(ldcp, bufp, sizep);
2766 	}
2767 
2768 	/*
2769 	 * if queue has been drained - clear interrupt
2770 	 */
2771 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2772 	    &ldcp->link_state);
2773 	if (exit_val == 0 && rv == 0 && rx_head == rx_tail) {
2774 		i_ldc_clear_intr(ldcp, CNEX_RX_INTR);
2775 	}
2776 
2777 	mutex_exit(&ldcp->lock);
2778 	return (exit_val);
2779 }
2780 
2781 /*
2782  * Basic raw mondo read -
2783  * no interpretation of mondo contents at all.
2784  *
2785  * Enter and exit with ldcp->lock held by caller
2786  */
2787 static int
2788 i_ldc_read_raw(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
2789 {
2790 	uint64_t 	q_size_mask;
2791 	ldc_msg_t 	*msgp;
2792 	uint8_t		*msgbufp;
2793 	int		rv = 0, space;
2794 	uint64_t 	rx_head, rx_tail;
2795 
2796 	space = *sizep;
2797 
2798 	if (space < LDC_PAYLOAD_SIZE_RAW)
2799 		return (ENOBUFS);
2800 
2801 	ASSERT(mutex_owned(&ldcp->lock));
2802 
2803 	/* compute mask for increment */
2804 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
2805 
2806 	/*
2807 	 * Read packet(s) from the queue
2808 	 */
2809 	rv = hv_ldc_rx_get_state(ldcp->id, &rx_head, &rx_tail,
2810 	    &ldcp->link_state);
2811 	if (rv != 0) {
2812 		cmn_err(CE_WARN,
2813 		    "ldc_read_raw: (0x%lx) unable to read queue ptrs",
2814 		    ldcp->id);
2815 		return (EIO);
2816 	}
2817 	D1(ldcp->id, "ldc_read_raw: (0x%llx) rxh=0x%llx,"
2818 		" rxt=0x%llx, st=0x%llx\n",
2819 		ldcp->id, rx_head, rx_tail, ldcp->link_state);
2820 
2821 	/* reset the channel state if the channel went down */
2822 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
2823 		i_ldc_reset(ldcp);
2824 		return (ECONNRESET);
2825 	}
2826 
2827 	/*
2828 	 * Check for empty queue
2829 	 */
2830 	if (rx_head == rx_tail) {
2831 		*sizep = 0;
2832 		return (0);
2833 	}
2834 
2835 	/* get the message */
2836 	msgp = (ldc_msg_t *)(ldcp->rx_q_va + rx_head);
2837 
2838 	/* if channel is in RAW mode, copy data and return */
2839 	msgbufp = (uint8_t *)&(msgp->raw[0]);
2840 
2841 	bcopy(msgbufp, target_bufp, LDC_PAYLOAD_SIZE_RAW);
2842 
2843 	DUMP_PAYLOAD(ldcp->id, msgbufp);
2844 
2845 	*sizep = LDC_PAYLOAD_SIZE_RAW;
2846 
2847 	rx_head = (rx_head + LDC_PACKET_SIZE) & q_size_mask;
2848 	(void) i_ldc_set_rx_head(ldcp, rx_head);
2849 
2850 	return (rv);
2851 }
2852 
2853 /*
2854  * Process LDC mondos to build larger packets
2855  * with either un-reliable or reliable delivery.
2856  *
2857  * Enter and exit with ldcp->lock held by caller
2858  */
2859 static int
2860 i_ldc_read_packet(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
2861 {
2862 	int		rv = 0;
2863 	uint64_t 	rx_head = 0, rx_tail = 0;
2864 	uint64_t 	curr_head = 0;
2865 	ldc_msg_t 	*msg;
2866 	caddr_t 	target;
2867 	size_t 		len = 0, bytes_read = 0;
2868 	int 		loop_cnt = 0, chk_cnt = 0;
2869 	uint64_t 	q_size_mask;
2870 
2871 	target = target_bufp;
2872 
2873 	ASSERT(mutex_owned(&ldcp->lock));
2874 
2875 	/* reset first frag to 0 */
2876 	ldcp->first_fragment = 0;
2877 
2878 	/* compute mask for increment */
2879 	q_size_mask = (ldcp->rx_q_entries-1)<<LDC_PACKET_SHIFT;
2880 
2881 	/*
2882 	 * Read packet(s) from the queue
2883 	 */
2884 	rv = hv_ldc_rx_get_state(ldcp->id, &curr_head, &rx_tail,
2885 	    &ldcp->link_state);
2886 	if (rv != 0) {
2887 		cmn_err(CE_WARN,
2888 		    "ldc_read: (0x%lx) unable to read queue ptrs",
2889 		    ldcp->id);
2890 		return (EIO);
2891 	}
2892 	D1(ldcp->id, "ldc_read: (0x%llx) chd=0x%llx, tl=0x%llx, st=0x%llx\n",
2893 	    ldcp->id, curr_head, rx_tail, ldcp->link_state);
2894 
2895 	/* reset the channel state if the channel went down */
2896 	if (ldcp->link_state == LDC_CHANNEL_DOWN) {
2897 		i_ldc_reset(ldcp);
2898 		return (ECONNRESET);
2899 	}
2900 
2901 	for (;;) {
2902 
2903 		if (curr_head == rx_tail) {
2904 			rv = hv_ldc_rx_get_state(ldcp->id,
2905 			    &rx_head, &rx_tail, &ldcp->link_state);
2906 			if (rv != 0) {
2907 				cmn_err(CE_WARN,
2908 				    "ldc_read: (0x%lx) cannot read queue ptrs",
2909 				    ldcp->id);
2910 				return (EIO);
2911 			}
2912 			/* reset the channel state if the channel went down */
2913 			if (ldcp->link_state == LDC_CHANNEL_DOWN) {
2914 				i_ldc_reset(ldcp);
2915 				return (ECONNRESET);
2916 			}
2917 		}
2918 
2919 		if (curr_head == rx_tail) {
2920 
2921 			/* If in the middle of a fragmented xfer */
2922 			if (ldcp->first_fragment != 0) {
2923 				if (++loop_cnt > LDC_LOOP_CNT) {
2924 					loop_cnt = 0;
2925 					++chk_cnt;
2926 				}
2927 				if (chk_cnt < LDC_CHK_CNT) {
2928 					continue;
2929 				} else {
2930 					*sizep = 0;
2931 					ldcp->last_msg_rcd =
2932 						ldcp->first_fragment - 1;
2933 					DWARN(DBG_ALL_LDCS,
2934 					    "ldc_read: (0x%llx) read timeout",
2935 					    ldcp->id);
2936 					return (ETIMEDOUT);
2937 				}
2938 			}
2939 			*sizep = 0;
2940 			break;
2941 		}
2942 		loop_cnt = 0;
2943 		chk_cnt = 0;
2944 
2945 		D2(ldcp->id,
2946 		    "ldc_read: (0x%llx) chd=0x%llx, rxhd=0x%llx, rxtl=0x%llx\n",
2947 		    ldcp->id, curr_head, rx_head, rx_tail);
2948 
2949 		/* get the message */
2950 		msg = (ldc_msg_t *)(ldcp->rx_q_va + curr_head);
2951 
2952 		DUMP_LDC_PKT(ldcp, "ldc_read received pkt",
2953 		    ldcp->rx_q_va + curr_head);
2954 
2955 		/* Check the message ID for the message received */
2956 		if ((rv = i_ldc_check_seqid(ldcp, msg)) != 0) {
2957 
2958 			DWARN(ldcp->id, "ldc_read: (0x%llx) seqid error, "
2959 			    "q_ptrs=0x%lx,0x%lx", ldcp->id, rx_head, rx_tail);
2960 
2961 			/* Reset last_msg_rcd to start of message */
2962 			if (ldcp->first_fragment != 0) {
2963 				ldcp->last_msg_rcd =
2964 					ldcp->first_fragment - 1;
2965 				ldcp->first_fragment = 0;
2966 			}
2967 			/*
2968 			 * Send a NACK -- invalid seqid
2969 			 * get the current tail for the response
2970 			 */
2971 			rv = i_ldc_send_pkt(ldcp, msg->type, LDC_NACK,
2972 			    (msg->ctrl & LDC_CTRL_MASK));
2973 			if (rv) {
2974 				cmn_err(CE_NOTE,
2975 				    "ldc_read: (0x%lx) err sending "
2976 				    "NACK msg\n", ldcp->id);
2977 			}
2978 
2979 			/* purge receive queue */
2980 			(void) i_ldc_set_rx_head(ldcp, rx_tail);
2981 
2982 			break;
2983 		}
2984 
2985 		/*
2986 		 * Process any messages of type CTRL messages
2987 		 * Future implementations should try to pass these to
2988 		 * LDC transport by resetting the intr state.
2989 		 *
2990 		 * NOTE: not done as a switch() as type can be both ctrl+data
2991 		 */
2992 		if (msg->type & LDC_CTRL) {
2993 			if (rv = i_ldc_ctrlmsg(ldcp, msg)) {
2994 				if (rv == EAGAIN)
2995 					continue;
2996 				(void) i_ldc_set_rx_head(ldcp, rx_tail);
2997 				*sizep = 0;
2998 				bytes_read = 0;
2999 				rv = ECONNRESET;
3000 				break;
3001 			}
3002 		}
3003 
3004 		/* process data ACKs */
3005 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3006 			(void) i_ldc_process_data_ACK(ldcp, msg);
3007 		}
3008 
3009 		/* process data messages */
3010 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_INFO)) {
3011 
3012 			uint8_t *msgbuf = (uint8_t *)(
3013 				(ldcp->mode == LDC_MODE_RELIABLE ||
3014 				ldcp->mode == LDC_MODE_STREAM)
3015 				? msg->rdata : msg->udata);
3016 
3017 			D2(ldcp->id,
3018 			    "ldc_read: (0x%llx) received data msg\n", ldcp->id);
3019 
3020 			/* get the packet length */
3021 			len = (msg->env & LDC_LEN_MASK);
3022 
3023 				/*
3024 				 * FUTURE OPTIMIZATION:
3025 				 * dont need to set q head for every
3026 				 * packet we read just need to do this when
3027 				 * we are done or need to wait for more
3028 				 * mondos to make a full packet - this is
3029 				 * currently expensive.
3030 				 */
3031 
3032 			if (ldcp->first_fragment == 0) {
3033 
3034 				/*
3035 				 * first packets should always have the start
3036 				 * bit set (even for a single packet). If not
3037 				 * throw away the packet
3038 				 */
3039 				if (!(msg->env & LDC_FRAG_START)) {
3040 
3041 					DWARN(DBG_ALL_LDCS,
3042 					    "ldc_read: (0x%llx) not start - "
3043 					    "frag=%x\n", ldcp->id,
3044 					    (msg->env) & LDC_FRAG_MASK);
3045 
3046 					/* toss pkt, inc head, cont reading */
3047 					bytes_read = 0;
3048 					target = target_bufp;
3049 					curr_head =
3050 						(curr_head + LDC_PACKET_SIZE)
3051 						& q_size_mask;
3052 					if (rv = i_ldc_set_rx_head(ldcp,
3053 						curr_head))
3054 						break;
3055 
3056 					continue;
3057 				}
3058 
3059 				ldcp->first_fragment = msg->seqid;
3060 			} else {
3061 				/* check to see if this is a pkt w/ START bit */
3062 				if (msg->env & LDC_FRAG_START) {
3063 					DWARN(DBG_ALL_LDCS,
3064 					    "ldc_read:(0x%llx) unexpected pkt"
3065 					    " env=0x%x discarding %d bytes,"
3066 					    " lastmsg=%d, currentmsg=%d\n",
3067 					    ldcp->id, msg->env&LDC_FRAG_MASK,
3068 					    bytes_read, ldcp->last_msg_rcd,
3069 					    msg->seqid);
3070 
3071 					/* throw data we have read so far */
3072 					bytes_read = 0;
3073 					target = target_bufp;
3074 					ldcp->first_fragment = msg->seqid;
3075 
3076 					if (rv = i_ldc_set_rx_head(ldcp,
3077 						curr_head))
3078 						break;
3079 				}
3080 			}
3081 
3082 			/* copy (next) pkt into buffer */
3083 			if (len <= (*sizep - bytes_read)) {
3084 				bcopy(msgbuf, target, len);
3085 				target += len;
3086 				bytes_read += len;
3087 			} else {
3088 				/*
3089 				 * there is not enough space in the buffer to
3090 				 * read this pkt. throw message away & continue
3091 				 * reading data from queue
3092 				 */
3093 				DWARN(DBG_ALL_LDCS,
3094 				    "ldc_read: (0x%llx) buffer too small, "
3095 				    "head=0x%lx, expect=%d, got=%d\n", ldcp->id,
3096 				    curr_head, *sizep, bytes_read+len);
3097 
3098 				ldcp->first_fragment = 0;
3099 				target = target_bufp;
3100 				bytes_read = 0;
3101 
3102 				/* throw away everything received so far */
3103 				if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3104 					break;
3105 
3106 				/* continue reading remaining pkts */
3107 				continue;
3108 			}
3109 		}
3110 
3111 		/* set the message id */
3112 		ldcp->last_msg_rcd = msg->seqid;
3113 
3114 		/* move the head one position */
3115 		curr_head = (curr_head + LDC_PACKET_SIZE) & q_size_mask;
3116 
3117 		if (msg->env & LDC_FRAG_STOP) {
3118 
3119 			/*
3120 			 * All pkts that are part of this fragmented transfer
3121 			 * have been read or this was a single pkt read
3122 			 * or there was an error
3123 			 */
3124 
3125 			/* set the queue head */
3126 			if (rv = i_ldc_set_rx_head(ldcp, curr_head))
3127 				bytes_read = 0;
3128 
3129 			*sizep = bytes_read;
3130 
3131 			break;
3132 		}
3133 
3134 		/* advance head if it is a DATA ACK */
3135 		if ((msg->type & LDC_DATA) && (msg->stype & LDC_ACK)) {
3136 
3137 			/* set the queue head */
3138 			if (rv = i_ldc_set_rx_head(ldcp, curr_head)) {
3139 				bytes_read = 0;
3140 				break;
3141 			}
3142 
3143 			D2(ldcp->id, "ldc_read: (0x%llx) set ACK qhead 0x%llx",
3144 			    ldcp->id, curr_head);
3145 		}
3146 
3147 	} /* for (;;) */
3148 
3149 
3150 	/*
3151 	 * If useful data was read - Send msg ACK
3152 	 * OPTIMIZE: do not send ACK for all msgs - use some frequency
3153 	 */
3154 	if ((bytes_read > 0) && (ldcp->mode == LDC_MODE_RELIABLE ||
3155 		ldcp->mode == LDC_MODE_STREAM)) {
3156 
3157 		rv = i_ldc_send_pkt(ldcp, LDC_DATA, LDC_ACK, 0);
3158 		if (rv != 0) {
3159 			cmn_err(CE_NOTE,
3160 			    "ldc_read: (0x%lx) cannot send ACK\n", ldcp->id);
3161 			return (0);
3162 		}
3163 	}
3164 
3165 	D2(ldcp->id, "ldc_read: (0x%llx) end size=%d", ldcp->id, *sizep);
3166 
3167 	return (rv);
3168 }
3169 
3170 /*
3171  * Use underlying reliable packet mechanism to fetch
3172  * and buffer incoming packets so we can hand them back as
3173  * a basic byte stream.
3174  *
3175  * Enter and exit with ldcp->lock held by caller
3176  */
3177 static int
3178 i_ldc_read_stream(ldc_chan_t *ldcp, caddr_t target_bufp, size_t *sizep)
3179 {
3180 	int	rv;
3181 	size_t	size;
3182 
3183 	ASSERT(mutex_owned(&ldcp->lock));
3184 
3185 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) buffer size=%d",
3186 		ldcp->id, *sizep);
3187 
3188 	if (ldcp->stream_remains == 0) {
3189 		size = ldcp->mtu;
3190 		rv = i_ldc_read_packet(ldcp,
3191 			(caddr_t)ldcp->stream_bufferp, &size);
3192 		D2(ldcp->id, "i_ldc_read_stream: read packet (0x%llx) size=%d",
3193 			ldcp->id, size);
3194 
3195 		if (rv != 0)
3196 			return (rv);
3197 
3198 		ldcp->stream_remains = size;
3199 		ldcp->stream_offset = 0;
3200 	}
3201 
3202 	size = MIN(ldcp->stream_remains, *sizep);
3203 
3204 	bcopy(ldcp->stream_bufferp + ldcp->stream_offset, target_bufp, size);
3205 	ldcp->stream_offset += size;
3206 	ldcp->stream_remains -= size;
3207 
3208 	D2(ldcp->id, "i_ldc_read_stream: (0x%llx) fill from buffer size=%d",
3209 		ldcp->id, size);
3210 
3211 	*sizep = size;
3212 	return (0);
3213 }
3214 
3215 /*
3216  * Write specified amount of bytes to the channel
3217  * in multiple pkts of pkt_payload size. Each
3218  * packet is tagged with an unique packet ID in
3219  * the case of a reliable transport.
3220  *
3221  * On return, size contains the number of bytes written.
3222  */
3223 int
3224 ldc_write(ldc_handle_t handle, caddr_t buf, size_t *sizep)
3225 {
3226 	ldc_chan_t	*ldcp;
3227 	int		rv = 0;
3228 
3229 	if (handle == NULL) {
3230 		DWARN(DBG_ALL_LDCS, "ldc_write: invalid channel handle\n");
3231 		return (EINVAL);
3232 	}
3233 	ldcp = (ldc_chan_t *)handle;
3234 
3235 	mutex_enter(&ldcp->lock);
3236 
3237 	/* check if non-zero data to write */
3238 	if (buf == NULL || sizep == NULL) {
3239 		DWARN(ldcp->id, "ldc_write: (0x%llx) invalid data write\n",
3240 		    ldcp->id);
3241 		mutex_exit(&ldcp->lock);
3242 		return (EINVAL);
3243 	}
3244 
3245 	if (*sizep == 0) {
3246 		DWARN(ldcp->id, "ldc_write: (0x%llx) write size of zero\n",
3247 		    ldcp->id);
3248 		mutex_exit(&ldcp->lock);
3249 		return (0);
3250 	}
3251 
3252 	/* Check if channel is UP for data exchange */
3253 	if (ldcp->tstate != TS_UP) {
3254 		DWARN(ldcp->id,
3255 		    "ldc_write: (0x%llx) channel is not in UP state\n",
3256 		    ldcp->id);
3257 		*sizep = 0;
3258 		rv = ECONNRESET;
3259 	} else {
3260 		rv = ldcp->write_p(ldcp, buf, sizep);
3261 	}
3262 
3263 	mutex_exit(&ldcp->lock);
3264 
3265 	return (rv);
3266 }
3267 
3268 /*
3269  * Write a raw packet to the channel
3270  * On return, size contains the number of bytes written.
3271  */
3272 static int
3273 i_ldc_write_raw(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3274 {
3275 	ldc_msg_t 	*ldcmsg;
3276 	uint64_t 	tx_head, tx_tail, new_tail;
3277 	int		rv = 0;
3278 	size_t		size;
3279 
3280 	ASSERT(mutex_owned(&ldcp->lock));
3281 	ASSERT(ldcp->mode == LDC_MODE_RAW);
3282 
3283 	size = *sizep;
3284 
3285 	/*
3286 	 * Check to see if the packet size is less than or
3287 	 * equal to packet size support in raw mode
3288 	 */
3289 	if (size > ldcp->pkt_payload) {
3290 		DWARN(ldcp->id,
3291 		    "ldc_write: (0x%llx) invalid size (0x%llx) for RAW mode\n",
3292 		    ldcp->id, *sizep);
3293 		*sizep = 0;
3294 		return (EMSGSIZE);
3295 	}
3296 
3297 	/* get the qptrs for the tx queue */
3298 	rv = hv_ldc_tx_get_state(ldcp->id,
3299 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3300 	if (rv != 0) {
3301 		cmn_err(CE_WARN,
3302 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3303 		*sizep = 0;
3304 		return (EIO);
3305 	}
3306 
3307 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3308 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3309 		DWARN(ldcp->id,
3310 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3311 		i_ldc_reset(ldcp);
3312 		*sizep = 0;
3313 		return (ECONNRESET);
3314 	}
3315 
3316 	tx_tail = ldcp->tx_tail;
3317 	tx_head = ldcp->tx_head;
3318 	new_tail = (tx_tail + LDC_PACKET_SIZE) &
3319 		((ldcp->tx_q_entries-1) << LDC_PACKET_SHIFT);
3320 
3321 	if (new_tail == tx_head) {
3322 		DWARN(DBG_ALL_LDCS,
3323 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3324 		*sizep = 0;
3325 		return (EWOULDBLOCK);
3326 	}
3327 
3328 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3329 	    ldcp->id, size);
3330 
3331 	/* Send the data now */
3332 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3333 
3334 		/* copy the data into pkt */
3335 	bcopy((uint8_t *)buf, ldcmsg, size);
3336 
3337 		/* increment tail */
3338 	tx_tail = new_tail;
3339 
3340 	/*
3341 	 * All packets have been copied into the TX queue
3342 	 * update the tail ptr in the HV
3343 	 */
3344 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3345 	if (rv) {
3346 		if (rv == EWOULDBLOCK) {
3347 			DWARN(ldcp->id, "ldc_write: (0x%llx) write timed out\n",
3348 			    ldcp->id);
3349 			*sizep = 0;
3350 			return (EWOULDBLOCK);
3351 		}
3352 
3353 		/* cannot write data - reset channel */
3354 		i_ldc_reset(ldcp);
3355 		*sizep = 0;
3356 		return (ECONNRESET);
3357 	}
3358 
3359 	ldcp->tx_tail = tx_tail;
3360 	*sizep = size;
3361 
3362 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, size);
3363 
3364 	return (rv);
3365 }
3366 
3367 
3368 /*
3369  * Write specified amount of bytes to the channel
3370  * in multiple pkts of pkt_payload size. Each
3371  * packet is tagged with an unique packet ID in
3372  * the case of a reliable transport.
3373  *
3374  * On return, size contains the number of bytes written.
3375  * This function needs to ensure that the write size is < MTU size
3376  */
3377 static int
3378 i_ldc_write_packet(ldc_chan_t *ldcp, caddr_t buf, size_t *size)
3379 {
3380 	ldc_msg_t 	*ldcmsg;
3381 	uint64_t 	tx_head, tx_tail, new_tail, start;
3382 	uint64_t	txq_size_mask, numavail;
3383 	uint8_t 	*msgbuf, *source = (uint8_t *)buf;
3384 	size_t 		len, bytes_written = 0, remaining;
3385 	int		rv;
3386 	uint32_t	curr_seqid;
3387 
3388 	ASSERT(mutex_owned(&ldcp->lock));
3389 
3390 	ASSERT(ldcp->mode == LDC_MODE_RELIABLE ||
3391 		ldcp->mode == LDC_MODE_UNRELIABLE ||
3392 		ldcp->mode == LDC_MODE_STREAM);
3393 
3394 	/* compute mask for increment */
3395 	txq_size_mask = (ldcp->tx_q_entries - 1) << LDC_PACKET_SHIFT;
3396 
3397 	/* get the qptrs for the tx queue */
3398 	rv = hv_ldc_tx_get_state(ldcp->id,
3399 	    &ldcp->tx_head, &ldcp->tx_tail, &ldcp->link_state);
3400 	if (rv != 0) {
3401 		cmn_err(CE_WARN,
3402 		    "ldc_write: (0x%lx) cannot read queue ptrs\n", ldcp->id);
3403 		*size = 0;
3404 		return (EIO);
3405 	}
3406 
3407 	if (ldcp->link_state == LDC_CHANNEL_DOWN ||
3408 	    ldcp->link_state == LDC_CHANNEL_RESET) {
3409 		DWARN(ldcp->id,
3410 		    "ldc_write: (0x%llx) channel down/reset\n", ldcp->id);
3411 		*size = 0;
3412 		i_ldc_reset(ldcp);
3413 		return (ECONNRESET);
3414 	}
3415 
3416 	tx_tail = ldcp->tx_tail;
3417 	new_tail = (tx_tail + LDC_PACKET_SIZE) %
3418 		(ldcp->tx_q_entries << LDC_PACKET_SHIFT);
3419 
3420 	/*
3421 	 * Transport mode determines whether we use HV Tx head or the
3422 	 * private protocol head (corresponding to last ACKd pkt) for
3423 	 * determining how much we can write
3424 	 */
3425 	tx_head = (ldcp->mode == LDC_MODE_RELIABLE ||
3426 		ldcp->mode == LDC_MODE_STREAM)
3427 		? ldcp->tx_ackd_head : ldcp->tx_head;
3428 	if (new_tail == tx_head) {
3429 		DWARN(DBG_ALL_LDCS,
3430 		    "ldc_write: (0x%llx) TX queue is full\n", ldcp->id);
3431 		*size = 0;
3432 		return (EWOULDBLOCK);
3433 	}
3434 
3435 	/*
3436 	 * Make sure that the LDC Tx queue has enough space
3437 	 */
3438 	numavail = (tx_head >> LDC_PACKET_SHIFT) - (tx_tail >> LDC_PACKET_SHIFT)
3439 		+ ldcp->tx_q_entries - 1;
3440 	numavail %= ldcp->tx_q_entries;
3441 
3442 	if (*size > (numavail * ldcp->pkt_payload)) {
3443 		DWARN(DBG_ALL_LDCS,
3444 		    "ldc_write: (0x%llx) TX queue has no space\n", ldcp->id);
3445 		return (EWOULDBLOCK);
3446 	}
3447 
3448 	D2(ldcp->id, "ldc_write: (0x%llx) start xfer size=%d",
3449 	    ldcp->id, *size);
3450 
3451 	/* Send the data now */
3452 	bytes_written = 0;
3453 	curr_seqid = ldcp->last_msg_snt;
3454 	start = tx_tail;
3455 
3456 	while (*size > bytes_written) {
3457 
3458 		ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + tx_tail);
3459 
3460 		msgbuf = (uint8_t *)((ldcp->mode == LDC_MODE_RELIABLE ||
3461 			ldcp->mode == LDC_MODE_STREAM)
3462 			? ldcmsg->rdata : ldcmsg->udata);
3463 
3464 		ldcmsg->type = LDC_DATA;
3465 		ldcmsg->stype = LDC_INFO;
3466 		ldcmsg->ctrl = 0;
3467 
3468 		remaining = *size - bytes_written;
3469 		len = min(ldcp->pkt_payload, remaining);
3470 		ldcmsg->env = (uint8_t)len;
3471 
3472 		curr_seqid++;
3473 		ldcmsg->seqid = curr_seqid;
3474 
3475 		DUMP_LDC_PKT(ldcp, "ldc_write snd data", (uint64_t)ldcmsg);
3476 
3477 		/* copy the data into pkt */
3478 		bcopy(source, msgbuf, len);
3479 
3480 		source += len;
3481 		bytes_written += len;
3482 
3483 		/* increment tail */
3484 		tx_tail = (tx_tail + LDC_PACKET_SIZE) & txq_size_mask;
3485 
3486 		ASSERT(tx_tail != tx_head);
3487 	}
3488 
3489 	/* Set the start and stop bits */
3490 	ldcmsg->env |= LDC_FRAG_STOP;
3491 	ldcmsg = (ldc_msg_t *)(ldcp->tx_q_va + start);
3492 	ldcmsg->env |= LDC_FRAG_START;
3493 
3494 	/*
3495 	 * All packets have been copied into the TX queue
3496 	 * update the tail ptr in the HV
3497 	 */
3498 	rv = i_ldc_set_tx_tail(ldcp, tx_tail);
3499 	if (rv == 0) {
3500 		ldcp->tx_tail = tx_tail;
3501 		ldcp->last_msg_snt = curr_seqid;
3502 		*size = bytes_written;
3503 	} else {
3504 		int rv2;
3505 
3506 		if (rv != EWOULDBLOCK) {
3507 			/* cannot write data - reset channel */
3508 			i_ldc_reset(ldcp);
3509 			*size = 0;
3510 			return (ECONNRESET);
3511 		}
3512 
3513 		DWARN(ldcp->id, "hv_tx_set_tail returns 0x%x (head 0x%x, "
3514 			"old tail 0x%x, new tail 0x%x, qsize=0x%x)\n",
3515 			rv, ldcp->tx_head, ldcp->tx_tail, tx_tail,
3516 			(ldcp->tx_q_entries << LDC_PACKET_SHIFT));
3517 
3518 		rv2 = hv_ldc_tx_get_state(ldcp->id,
3519 		    &tx_head, &tx_tail, &ldcp->link_state);
3520 
3521 		DWARN(ldcp->id, "hv_ldc_tx_get_state returns 0x%x "
3522 			"(head 0x%x, tail 0x%x state 0x%x)\n",
3523 			rv2, tx_head, tx_tail, ldcp->link_state);
3524 
3525 		*size = 0;
3526 	}
3527 
3528 	D2(ldcp->id, "ldc_write: (0x%llx) end xfer size=%d", ldcp->id, *size);
3529 
3530 	return (rv);
3531 }
3532 
3533 /*
3534  * Write specified amount of bytes to the channel
3535  * in multiple pkts of pkt_payload size. Each
3536  * packet is tagged with an unique packet ID in
3537  * the case of a reliable transport.
3538  *
3539  * On return, size contains the number of bytes written.
3540  * This function needs to ensure that the write size is < MTU size
3541  */
3542 static int
3543 i_ldc_write_stream(ldc_chan_t *ldcp, caddr_t buf, size_t *sizep)
3544 {
3545 	ASSERT(mutex_owned(&ldcp->lock));
3546 	ASSERT(ldcp->mode == LDC_MODE_STREAM);
3547 
3548 	/* Truncate packet to max of MTU size */
3549 	if (*sizep > ldcp->mtu) *sizep = ldcp->mtu;
3550 	return (i_ldc_write_packet(ldcp, buf, sizep));
3551 }
3552 
3553 
3554 /*
3555  * Interfaces for channel nexus to register/unregister with LDC module
3556  * The nexus will register functions to be used to register individual
3557  * channels with the nexus and enable interrupts for the channels
3558  */
3559 int
3560 ldc_register(ldc_cnex_t *cinfo)
3561 {
3562 	ldc_chan_t	*ldcp;
3563 
3564 	if (cinfo == NULL || cinfo->dip == NULL ||
3565 	    cinfo->reg_chan == NULL || cinfo->unreg_chan == NULL ||
3566 	    cinfo->add_intr == NULL || cinfo->rem_intr == NULL ||
3567 	    cinfo->clr_intr == NULL) {
3568 
3569 		DWARN(DBG_ALL_LDCS, "ldc_register: invalid nexus info\n");
3570 		return (EINVAL);
3571 	}
3572 
3573 	mutex_enter(&ldcssp->lock);
3574 
3575 	/* nexus registration */
3576 	ldcssp->cinfo.dip = cinfo->dip;
3577 	ldcssp->cinfo.reg_chan = cinfo->reg_chan;
3578 	ldcssp->cinfo.unreg_chan = cinfo->unreg_chan;
3579 	ldcssp->cinfo.add_intr = cinfo->add_intr;
3580 	ldcssp->cinfo.rem_intr = cinfo->rem_intr;
3581 	ldcssp->cinfo.clr_intr = cinfo->clr_intr;
3582 
3583 	/* register any channels that might have been previously initialized */
3584 	ldcp = ldcssp->chan_list;
3585 	while (ldcp) {
3586 		if ((ldcp->tstate & TS_QCONF_RDY) &&
3587 		    (ldcp->tstate & TS_CNEX_RDY) == 0)
3588 			(void) i_ldc_register_channel(ldcp);
3589 
3590 		ldcp = ldcp->next;
3591 	}
3592 
3593 	mutex_exit(&ldcssp->lock);
3594 
3595 	return (0);
3596 }
3597 
3598 int
3599 ldc_unregister(ldc_cnex_t *cinfo)
3600 {
3601 	if (cinfo == NULL || cinfo->dip == NULL) {
3602 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid nexus info\n");
3603 		return (EINVAL);
3604 	}
3605 
3606 	mutex_enter(&ldcssp->lock);
3607 
3608 	if (cinfo->dip != ldcssp->cinfo.dip) {
3609 		DWARN(DBG_ALL_LDCS, "ldc_unregister: invalid dip\n");
3610 		mutex_exit(&ldcssp->lock);
3611 		return (EINVAL);
3612 	}
3613 
3614 	/* nexus unregister */
3615 	ldcssp->cinfo.dip = NULL;
3616 	ldcssp->cinfo.reg_chan = NULL;
3617 	ldcssp->cinfo.unreg_chan = NULL;
3618 	ldcssp->cinfo.add_intr = NULL;
3619 	ldcssp->cinfo.rem_intr = NULL;
3620 	ldcssp->cinfo.clr_intr = NULL;
3621 
3622 	mutex_exit(&ldcssp->lock);
3623 
3624 	return (0);
3625 }
3626 
3627 
3628 /* ------------------------------------------------------------------------- */
3629 
3630 /*
3631  * Allocate a memory handle for the channel and link it into the list
3632  * Also choose which memory table to use if this is the first handle
3633  * being assigned to this channel
3634  */
3635 int
3636 ldc_mem_alloc_handle(ldc_handle_t handle, ldc_mem_handle_t *mhandle)
3637 {
3638 	ldc_chan_t 	*ldcp;
3639 	ldc_mhdl_t	*mhdl;
3640 	int 		rv;
3641 
3642 	if (handle == NULL) {
3643 		DWARN(DBG_ALL_LDCS,
3644 		    "ldc_mem_alloc_handle: invalid channel handle\n");
3645 		return (EINVAL);
3646 	}
3647 	ldcp = (ldc_chan_t *)handle;
3648 
3649 	mutex_enter(&ldcp->lock);
3650 
3651 	/* check to see if channel is initalized */
3652 	if (ldcp->tstate < TS_INIT) {
3653 		DWARN(ldcp->id,
3654 		    "ldc_mem_alloc_handle: (0x%llx) channel not initialized\n",
3655 		    ldcp->id);
3656 		mutex_exit(&ldcp->lock);
3657 		return (EINVAL);
3658 	}
3659 
3660 	/*
3661 	 * If this channel is allocating a mem handle for the
3662 	 * first time allocate it a memory map table and initialize it
3663 	 */
3664 	if (ldcp->mtbl == NULL) {
3665 
3666 		ldc_mtbl_t *mtbl;
3667 
3668 		/* Allocate and initialize the map table structure */
3669 		mtbl = kmem_zalloc(sizeof (ldc_mtbl_t), KM_SLEEP);
3670 		mtbl->size = MTBL_MAX_SIZE;
3671 		mtbl->num_entries = mtbl->num_avail =
3672 			(MTBL_MAX_SIZE/sizeof (ldc_mte_slot_t));
3673 		mtbl->next_entry = NULL;
3674 
3675 		/* Allocate the table itself */
3676 		mtbl->table = (ldc_mte_slot_t *)
3677 			contig_mem_alloc_align(mtbl->size, MMU_PAGESIZE);
3678 		if (mtbl->table == NULL) {
3679 			cmn_err(CE_WARN,
3680 			    "ldc_mem_alloc_handle: (0x%lx) error allocating "
3681 			    "table memory", ldcp->id);
3682 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
3683 			mutex_exit(&ldcp->lock);
3684 			return (ENOMEM);
3685 		}
3686 
3687 		/* zero out the memory */
3688 		bzero(mtbl->table, mtbl->size);
3689 
3690 		/* initialize the lock */
3691 		mutex_init(&mtbl->lock, NULL, MUTEX_DRIVER, NULL);
3692 
3693 		/* register table for this channel */
3694 		rv = hv_ldc_set_map_table(ldcp->id,
3695 		    va_to_pa(mtbl->table), mtbl->num_entries);
3696 		if (rv != 0) {
3697 			cmn_err(CE_WARN,
3698 			    "ldc_mem_alloc_handle: (0x%lx) err %d mapping tbl",
3699 			    ldcp->id, rv);
3700 			contig_mem_free(mtbl->table, mtbl->size);
3701 			mutex_destroy(&mtbl->lock);
3702 			kmem_free(mtbl, sizeof (ldc_mtbl_t));
3703 			mutex_exit(&ldcp->lock);
3704 			return (EIO);
3705 		}
3706 
3707 		ldcp->mtbl = mtbl;
3708 
3709 		D1(ldcp->id,
3710 		    "ldc_mem_alloc_handle: (0x%llx) alloc'd map table 0x%llx\n",
3711 		    ldcp->id, ldcp->mtbl->table);
3712 	}
3713 
3714 	/* allocate handle for channel */
3715 	mhdl = kmem_zalloc(sizeof (ldc_mhdl_t), KM_SLEEP);
3716 
3717 	/* initialize the lock */
3718 	mutex_init(&mhdl->lock, NULL, MUTEX_DRIVER, NULL);
3719 
3720 	mhdl->status = LDC_UNBOUND;
3721 	mhdl->ldcp = ldcp;
3722 
3723 	/* insert memory handle (@ head) into list */
3724 	if (ldcp->mhdl_list == NULL) {
3725 		ldcp->mhdl_list = mhdl;
3726 		mhdl->next = NULL;
3727 	} else {
3728 		/* insert @ head */
3729 		mhdl->next = ldcp->mhdl_list;
3730 		ldcp->mhdl_list = mhdl;
3731 	}
3732 
3733 	/* return the handle */
3734 	*mhandle = (ldc_mem_handle_t)mhdl;
3735 
3736 	mutex_exit(&ldcp->lock);
3737 
3738 	D1(ldcp->id, "ldc_mem_alloc_handle: (0x%llx) allocated handle 0x%llx\n",
3739 	    ldcp->id, mhdl);
3740 
3741 	return (0);
3742 }
3743 
3744 /*
3745  * Free memory handle for the channel and unlink it from the list
3746  */
3747 int
3748 ldc_mem_free_handle(ldc_mem_handle_t mhandle)
3749 {
3750 	ldc_mhdl_t 	*mhdl, *phdl;
3751 	ldc_chan_t 	*ldcp;
3752 
3753 	if (mhandle == NULL) {
3754 		DWARN(DBG_ALL_LDCS,
3755 		    "ldc_mem_free_handle: invalid memory handle\n");
3756 		return (EINVAL);
3757 	}
3758 	mhdl = (ldc_mhdl_t *)mhandle;
3759 
3760 	mutex_enter(&mhdl->lock);
3761 
3762 	ldcp = mhdl->ldcp;
3763 
3764 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
3765 		DWARN(ldcp->id,
3766 		    "ldc_mem_free_handle: cannot free, 0x%llx hdl bound\n",
3767 		    mhdl);
3768 		mutex_exit(&mhdl->lock);
3769 		return (EINVAL);
3770 	}
3771 	mutex_exit(&mhdl->lock);
3772 
3773 	mutex_enter(&ldcp->mlist_lock);
3774 
3775 	phdl = ldcp->mhdl_list;
3776 
3777 	/* first handle */
3778 	if (phdl == mhdl) {
3779 		ldcp->mhdl_list = mhdl->next;
3780 		mutex_destroy(&mhdl->lock);
3781 		kmem_free(mhdl, sizeof (ldc_mhdl_t));
3782 		D1(ldcp->id,
3783 		    "ldc_mem_free_handle: (0x%llx) freed handle 0x%llx\n",
3784 		    ldcp->id, mhdl);
3785 	} else {
3786 		/* walk the list - unlink and free */
3787 		while (phdl != NULL) {
3788 			if (phdl->next == mhdl) {
3789 				phdl->next = mhdl->next;
3790 				mutex_destroy(&mhdl->lock);
3791 				kmem_free(mhdl, sizeof (ldc_mhdl_t));
3792 				D1(ldcp->id,
3793 				    "ldc_mem_free_handle: (0x%llx) freed "
3794 				    "handle 0x%llx\n", ldcp->id, mhdl);
3795 				break;
3796 			}
3797 			phdl = phdl->next;
3798 		}
3799 	}
3800 
3801 	if (phdl == NULL) {
3802 		DWARN(ldcp->id,
3803 		    "ldc_mem_free_handle: invalid handle 0x%llx\n", mhdl);
3804 		mutex_exit(&ldcp->mlist_lock);
3805 		return (EINVAL);
3806 	}
3807 
3808 	mutex_exit(&ldcp->mlist_lock);
3809 
3810 	return (0);
3811 }
3812 
3813 /*
3814  * Bind a memory handle to a virtual address.
3815  * The virtual address is converted to the corresponding real addresses.
3816  * Returns pointer to the first ldc_mem_cookie and the total number
3817  * of cookies for this virtual address. Other cookies can be obtained
3818  * using the ldc_mem_nextcookie() call. If the pages are stored in
3819  * consecutive locations in the table, a single cookie corresponding to
3820  * the first location is returned. The cookie size spans all the entries.
3821  *
3822  * If the VA corresponds to a page that is already being exported, reuse
3823  * the page and do not export it again. Bump the page's use count.
3824  */
3825 int
3826 ldc_mem_bind_handle(ldc_mem_handle_t mhandle, caddr_t vaddr, size_t len,
3827     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
3828 {
3829 	ldc_mhdl_t	*mhdl;
3830 	ldc_chan_t 	*ldcp;
3831 	ldc_mtbl_t	*mtbl;
3832 	ldc_memseg_t	*memseg;
3833 	ldc_mte_t	tmp_mte;
3834 	uint64_t	index, prev_index = 0;
3835 	int64_t		cookie_idx;
3836 	uintptr_t	raddr, ra_aligned;
3837 	uint64_t	psize, poffset, v_offset;
3838 	uint64_t	pg_shift, pg_size, pg_size_code, pg_mask;
3839 	pgcnt_t		npages;
3840 	caddr_t		v_align, addr;
3841 	int 		i;
3842 
3843 	if (mhandle == NULL) {
3844 		DWARN(DBG_ALL_LDCS,
3845 		    "ldc_mem_bind_handle: invalid memory handle\n");
3846 		return (EINVAL);
3847 	}
3848 	mhdl = (ldc_mhdl_t *)mhandle;
3849 	ldcp = mhdl->ldcp;
3850 	mtbl = ldcp->mtbl;
3851 
3852 	/* clear count */
3853 	*ccount = 0;
3854 
3855 	mutex_enter(&mhdl->lock);
3856 
3857 	if (mhdl->status == LDC_BOUND || mhdl->memseg != NULL) {
3858 		DWARN(ldcp->id,
3859 		    "ldc_mem_bind_handle: (0x%x) handle already bound\n",
3860 		    mhandle);
3861 		mutex_exit(&mhdl->lock);
3862 		return (EINVAL);
3863 	}
3864 
3865 	/* Force address and size to be 8-byte aligned */
3866 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
3867 		DWARN(ldcp->id,
3868 		    "ldc_mem_bind_handle: addr/size is not 8-byte aligned\n");
3869 		mutex_exit(&mhdl->lock);
3870 		return (EINVAL);
3871 	}
3872 
3873 	/* FUTURE: get the page size, pgsz code, and shift */
3874 	pg_size = MMU_PAGESIZE;
3875 	pg_size_code = page_szc(pg_size);
3876 	pg_shift = page_get_shift(pg_size_code);
3877 	pg_mask = ~(pg_size - 1);
3878 
3879 	D1(ldcp->id, "ldc_mem_bind_handle: (0x%llx) binding "
3880 	    "va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
3881 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
3882 
3883 	/* aligned VA and its offset */
3884 	v_align = (caddr_t)(((uintptr_t)vaddr) & ~(pg_size - 1));
3885 	v_offset = ((uintptr_t)vaddr) & (pg_size - 1);
3886 
3887 	npages = (len+v_offset)/pg_size;
3888 	npages = ((len+v_offset)%pg_size == 0) ? npages : npages+1;
3889 
3890 	D1(ldcp->id, "ldc_mem_bind_handle: binding "
3891 	    "(0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
3892 	    ldcp->id, vaddr, v_align, v_offset, npages);
3893 
3894 	/* lock the memory table - exclusive access to channel */
3895 	mutex_enter(&mtbl->lock);
3896 
3897 	if (npages > mtbl->num_avail) {
3898 		DWARN(ldcp->id,
3899 		    "ldc_mem_bind_handle: (0x%llx) no table entries\n",
3900 		    ldcp->id);
3901 		mutex_exit(&mtbl->lock);
3902 		mutex_exit(&mhdl->lock);
3903 		return (ENOMEM);
3904 	}
3905 
3906 	/* Allocate a memseg structure */
3907 	memseg = mhdl->memseg = kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
3908 
3909 	/* Allocate memory to store all pages and cookies */
3910 	memseg->pages = kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
3911 	memseg->cookies =
3912 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * npages), KM_SLEEP);
3913 
3914 	D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) processing 0x%llx pages\n",
3915 	    ldcp->id, npages);
3916 
3917 	addr = v_align;
3918 
3919 	/*
3920 	 * Table slots are used in a round-robin manner. The algorithm permits
3921 	 * inserting duplicate entries. Slots allocated earlier will typically
3922 	 * get freed before we get back to reusing the slot.Inserting duplicate
3923 	 * entries should be OK as we only lookup entries using the cookie addr
3924 	 * i.e. tbl index, during export, unexport and copy operation.
3925 	 *
3926 	 * One implementation what was tried was to search for a duplicate
3927 	 * page entry first and reuse it. The search overhead is very high and
3928 	 * in the vnet case dropped the perf by almost half, 50 to 24 mbps.
3929 	 * So it does make sense to avoid searching for duplicates.
3930 	 *
3931 	 * But during the process of searching for a free slot, if we find a
3932 	 * duplicate entry we will go ahead and use it, and bump its use count.
3933 	 */
3934 
3935 	/* index to start searching from */
3936 	index = mtbl->next_entry;
3937 	cookie_idx = -1;
3938 
3939 	tmp_mte.ll = 0;	/* initialise fields to 0 */
3940 
3941 	if (mtype & LDC_DIRECT_MAP) {
3942 		tmp_mte.mte_r = (perm & LDC_MEM_R) ? 1 : 0;
3943 		tmp_mte.mte_w = (perm & LDC_MEM_W) ? 1 : 0;
3944 		tmp_mte.mte_x = (perm & LDC_MEM_X) ? 1 : 0;
3945 	}
3946 
3947 	if (mtype & LDC_SHADOW_MAP) {
3948 		tmp_mte.mte_cr = (perm & LDC_MEM_R) ? 1 : 0;
3949 		tmp_mte.mte_cw = (perm & LDC_MEM_W) ? 1 : 0;
3950 	}
3951 
3952 	if (mtype & LDC_IO_MAP) {
3953 		tmp_mte.mte_ir = (perm & LDC_MEM_R) ? 1 : 0;
3954 		tmp_mte.mte_iw = (perm & LDC_MEM_W) ? 1 : 0;
3955 	}
3956 
3957 	D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
3958 
3959 	tmp_mte.mte_pgszc = pg_size_code;
3960 
3961 	/* initialize each mem table entry */
3962 	for (i = 0; i < npages; i++) {
3963 
3964 		/* check if slot is available in the table */
3965 		while (mtbl->table[index].entry.ll != 0) {
3966 
3967 			index = (index + 1) % mtbl->num_entries;
3968 
3969 			if (index == mtbl->next_entry) {
3970 				/* we have looped around */
3971 				DWARN(DBG_ALL_LDCS,
3972 				    "ldc_mem_bind_handle: (0x%llx) cannot find "
3973 				    "entry\n", ldcp->id);
3974 				*ccount = 0;
3975 
3976 				/* NOTE: free memory, remove previous entries */
3977 				/* this shouldnt happen as num_avail was ok */
3978 
3979 				mutex_exit(&mtbl->lock);
3980 				mutex_exit(&mhdl->lock);
3981 				return (ENOMEM);
3982 			}
3983 		}
3984 
3985 		/* get the real address */
3986 		raddr = va_to_pa((void *)addr);
3987 		ra_aligned = ((uintptr_t)raddr & pg_mask);
3988 
3989 		/* build the mte */
3990 		tmp_mte.mte_rpfn = ra_aligned >> pg_shift;
3991 
3992 		D1(ldcp->id, "ldc_mem_bind_handle mte=0x%llx\n", tmp_mte.ll);
3993 
3994 		/* update entry in table */
3995 		mtbl->table[index].entry = tmp_mte;
3996 
3997 		D2(ldcp->id, "ldc_mem_bind_handle: (0x%llx) stored MTE 0x%llx"
3998 		    " into loc 0x%llx\n", ldcp->id, tmp_mte.ll, index);
3999 
4000 		/* calculate the size and offset for this export range */
4001 		if (i == 0) {
4002 			/* first page */
4003 			psize = min((pg_size - v_offset), len);
4004 			poffset = v_offset;
4005 
4006 		} else if (i == (npages - 1)) {
4007 			/* last page */
4008 			psize =	(((uintptr_t)(vaddr + len)) &
4009 				    ((uint64_t)(pg_size-1)));
4010 			if (psize == 0)
4011 				psize = pg_size;
4012 			poffset = 0;
4013 
4014 		} else {
4015 			/* middle pages */
4016 			psize = pg_size;
4017 			poffset = 0;
4018 		}
4019 
4020 		/* store entry for this page */
4021 		memseg->pages[i].index = index;
4022 		memseg->pages[i].raddr = raddr;
4023 		memseg->pages[i].offset = poffset;
4024 		memseg->pages[i].size = psize;
4025 		memseg->pages[i].mte = &(mtbl->table[index]);
4026 
4027 		/* create the cookie */
4028 		if (i == 0 || (index != prev_index + 1)) {
4029 			cookie_idx++;
4030 			memseg->cookies[cookie_idx].addr =
4031 				IDX2COOKIE(index, pg_size_code, pg_shift);
4032 			memseg->cookies[cookie_idx].addr |= poffset;
4033 			memseg->cookies[cookie_idx].size = psize;
4034 
4035 		} else {
4036 			memseg->cookies[cookie_idx].size += psize;
4037 		}
4038 
4039 		D1(ldcp->id, "ldc_mem_bind_handle: bound "
4040 		    "(0x%llx) va=0x%llx, idx=0x%llx, "
4041 		    "ra=0x%llx(sz=0x%x,off=0x%x)\n",
4042 		    ldcp->id, addr, index, raddr, psize, poffset);
4043 
4044 		/* decrement number of available entries */
4045 		mtbl->num_avail--;
4046 
4047 		/* increment va by page size */
4048 		addr += pg_size;
4049 
4050 		/* increment index */
4051 		prev_index = index;
4052 		index = (index + 1) % mtbl->num_entries;
4053 
4054 		/* save the next slot */
4055 		mtbl->next_entry = index;
4056 	}
4057 
4058 	mutex_exit(&mtbl->lock);
4059 
4060 	/* memory handle = bound */
4061 	mhdl->mtype = mtype;
4062 	mhdl->perm = perm;
4063 	mhdl->status = LDC_BOUND;
4064 
4065 	/* update memseg_t */
4066 	memseg->vaddr = vaddr;
4067 	memseg->raddr = memseg->pages[0].raddr;
4068 	memseg->size = len;
4069 	memseg->npages = npages;
4070 	memseg->ncookies = cookie_idx + 1;
4071 	memseg->next_cookie = (memseg->ncookies > 1) ? 1 : 0;
4072 
4073 	/* return count and first cookie */
4074 	*ccount = memseg->ncookies;
4075 	cookie->addr = memseg->cookies[0].addr;
4076 	cookie->size = memseg->cookies[0].size;
4077 
4078 	D1(ldcp->id,
4079 	    "ldc_mem_bind_handle: (0x%llx) bound 0x%llx, va=0x%llx, "
4080 	    "pgs=0x%llx cookies=0x%llx\n",
4081 	    ldcp->id, mhdl, vaddr, npages, memseg->ncookies);
4082 
4083 	mutex_exit(&mhdl->lock);
4084 	return (0);
4085 }
4086 
4087 /*
4088  * Return the next cookie associated with the specified memory handle
4089  */
4090 int
4091 ldc_mem_nextcookie(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie)
4092 {
4093 	ldc_mhdl_t	*mhdl;
4094 	ldc_chan_t 	*ldcp;
4095 	ldc_memseg_t	*memseg;
4096 
4097 	if (mhandle == NULL) {
4098 		DWARN(DBG_ALL_LDCS,
4099 		    "ldc_mem_nextcookie: invalid memory handle\n");
4100 		return (EINVAL);
4101 	}
4102 	mhdl = (ldc_mhdl_t *)mhandle;
4103 
4104 	mutex_enter(&mhdl->lock);
4105 
4106 	ldcp = mhdl->ldcp;
4107 	memseg = mhdl->memseg;
4108 
4109 	if (cookie == 0) {
4110 		DWARN(ldcp->id,
4111 		    "ldc_mem_nextcookie:(0x%llx) invalid cookie arg\n",
4112 		    ldcp->id);
4113 		mutex_exit(&mhdl->lock);
4114 		return (EINVAL);
4115 	}
4116 
4117 	if (memseg->next_cookie != 0) {
4118 		cookie->addr = memseg->cookies[memseg->next_cookie].addr;
4119 		cookie->size = memseg->cookies[memseg->next_cookie].size;
4120 		memseg->next_cookie++;
4121 		if (memseg->next_cookie == memseg->ncookies)
4122 			memseg->next_cookie = 0;
4123 
4124 	} else {
4125 		DWARN(ldcp->id,
4126 		    "ldc_mem_nextcookie:(0x%llx) no more cookies\n", ldcp->id);
4127 		cookie->addr = 0;
4128 		cookie->size = 0;
4129 		mutex_exit(&mhdl->lock);
4130 		return (EINVAL);
4131 	}
4132 
4133 	D1(ldcp->id,
4134 	    "ldc_mem_nextcookie: (0x%llx) cookie addr=0x%llx,sz=0x%llx\n",
4135 	    ldcp->id, cookie->addr, cookie->size);
4136 
4137 	mutex_exit(&mhdl->lock);
4138 	return (0);
4139 }
4140 
4141 /*
4142  * Unbind the virtual memory region associated with the specified
4143  * memory handle. Allassociated cookies are freed and the corresponding
4144  * RA space is no longer exported.
4145  */
4146 int
4147 ldc_mem_unbind_handle(ldc_mem_handle_t mhandle)
4148 {
4149 	ldc_mhdl_t	*mhdl;
4150 	ldc_chan_t 	*ldcp;
4151 	ldc_mtbl_t	*mtbl;
4152 	ldc_memseg_t	*memseg;
4153 	int		i;
4154 
4155 	if (mhandle == NULL) {
4156 		DWARN(DBG_ALL_LDCS,
4157 		    "ldc_mem_unbind_handle: invalid memory handle\n");
4158 		return (EINVAL);
4159 	}
4160 	mhdl = (ldc_mhdl_t *)mhandle;
4161 
4162 	mutex_enter(&mhdl->lock);
4163 
4164 	if (mhdl->status == LDC_UNBOUND) {
4165 		DWARN(DBG_ALL_LDCS,
4166 		    "ldc_mem_unbind_handle: (0x%x) handle is not bound\n",
4167 		    mhandle);
4168 		mutex_exit(&mhdl->lock);
4169 		return (EINVAL);
4170 	}
4171 
4172 	ldcp = mhdl->ldcp;
4173 	mtbl = ldcp->mtbl;
4174 
4175 	memseg = mhdl->memseg;
4176 
4177 	/* lock the memory table - exclusive access to channel */
4178 	mutex_enter(&mtbl->lock);
4179 
4180 	/* undo the pages exported */
4181 	for (i = 0; i < memseg->npages; i++) {
4182 
4183 		/* FUTURE: check for mapped pages */
4184 		if (memseg->pages[i].mte->cookie) {
4185 			_NOTE(EMPTY)
4186 		}
4187 
4188 		/* clear the entry from the table */
4189 		memseg->pages[i].mte->entry.ll = 0;
4190 		mtbl->num_avail++;
4191 	}
4192 	mutex_exit(&mtbl->lock);
4193 
4194 	/* free the allocated memseg and page structures */
4195 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4196 	kmem_free(memseg->cookies,
4197 	    (sizeof (ldc_mem_cookie_t) * memseg->npages));
4198 	kmem_free(memseg, sizeof (ldc_memseg_t));
4199 
4200 	/* uninitialize the memory handle */
4201 	mhdl->memseg = NULL;
4202 	mhdl->status = LDC_UNBOUND;
4203 
4204 	D1(ldcp->id, "ldc_mem_unbind_handle: (0x%llx) unbound handle 0x%llx\n",
4205 	    ldcp->id, mhdl);
4206 
4207 	mutex_exit(&mhdl->lock);
4208 	return (0);
4209 }
4210 
4211 /*
4212  * Get information about the dring. The base address of the descriptor
4213  * ring along with the type and permission are returned back.
4214  */
4215 int
4216 ldc_mem_info(ldc_mem_handle_t mhandle, ldc_mem_info_t *minfo)
4217 {
4218 	ldc_mhdl_t	*mhdl;
4219 
4220 	if (mhandle == NULL) {
4221 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid memory handle\n");
4222 		return (EINVAL);
4223 	}
4224 	mhdl = (ldc_mhdl_t *)mhandle;
4225 
4226 	if (minfo == NULL) {
4227 		DWARN(DBG_ALL_LDCS, "ldc_mem_info: invalid args\n");
4228 		return (EINVAL);
4229 	}
4230 
4231 	mutex_enter(&mhdl->lock);
4232 
4233 	minfo->status = mhdl->status;
4234 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED) {
4235 		minfo->vaddr = mhdl->memseg->vaddr;
4236 		minfo->raddr = mhdl->memseg->raddr;
4237 		minfo->mtype = mhdl->mtype;
4238 		minfo->perm = mhdl->perm;
4239 	}
4240 	mutex_exit(&mhdl->lock);
4241 
4242 	return (0);
4243 }
4244 
4245 /*
4246  * Copy data either from or to the client specified virtual address
4247  * space to or from the exported memory associated with the cookies.
4248  * The direction argument determines whether the data is read from or
4249  * written to exported memory.
4250  */
4251 int
4252 ldc_mem_copy(ldc_handle_t handle, caddr_t vaddr, uint64_t off, size_t *size,
4253     ldc_mem_cookie_t *cookies, uint32_t ccount, uint8_t direction)
4254 {
4255 	ldc_chan_t 	*ldcp;
4256 	uint64_t	local_voff, local_valign;
4257 	uint64_t	cookie_addr, cookie_size;
4258 	uint64_t	pg_shift, pg_size, pg_size_code;
4259 	uint64_t 	export_caddr, export_poff, export_psize, export_size;
4260 	uint64_t	local_ra, local_poff, local_psize;
4261 	uint64_t	copy_size, copied_len = 0, total_bal = 0, idx = 0;
4262 	pgcnt_t		npages;
4263 	size_t		len = *size;
4264 	int 		i, rv = 0;
4265 
4266 	if (handle == NULL) {
4267 		DWARN(DBG_ALL_LDCS, "ldc_mem_copy: invalid channel handle\n");
4268 		return (EINVAL);
4269 	}
4270 	ldcp = (ldc_chan_t *)handle;
4271 
4272 	mutex_enter(&ldcp->lock);
4273 
4274 	/* check to see if channel is UP */
4275 	if (ldcp->tstate != TS_UP) {
4276 		DWARN(ldcp->id, "ldc_mem_copy: (0x%llx) channel is not UP\n",
4277 		    ldcp->id);
4278 		mutex_exit(&ldcp->lock);
4279 		return (EINVAL);
4280 	}
4281 
4282 	/* Force address and size to be 8-byte aligned */
4283 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4284 		DWARN(ldcp->id,
4285 		    "ldc_mem_copy: addr/sz is not 8-byte aligned\n");
4286 		mutex_exit(&ldcp->lock);
4287 		return (EINVAL);
4288 	}
4289 
4290 	/* Find the size of the exported memory */
4291 	export_size = 0;
4292 	for (i = 0; i < ccount; i++)
4293 		export_size += cookies[i].size;
4294 
4295 	/* check to see if offset is valid */
4296 	if (off > export_size) {
4297 		DWARN(ldcp->id,
4298 		    "ldc_mem_copy: (0x%llx) start offset > export mem size\n",
4299 		    ldcp->id);
4300 		mutex_exit(&ldcp->lock);
4301 		return (EINVAL);
4302 	}
4303 
4304 	/*
4305 	 * Check to see if the export size is smaller than the size we
4306 	 * are requesting to copy - if so flag an error
4307 	 */
4308 	if ((export_size - off) < *size) {
4309 		DWARN(ldcp->id,
4310 		    "ldc_mem_copy: (0x%llx) copy size > export mem size\n",
4311 		    ldcp->id);
4312 		mutex_exit(&ldcp->lock);
4313 		return (EINVAL);
4314 	}
4315 
4316 	total_bal = min(export_size, *size);
4317 
4318 	/* FUTURE: get the page size, pgsz code, and shift */
4319 	pg_size = MMU_PAGESIZE;
4320 	pg_size_code = page_szc(pg_size);
4321 	pg_shift = page_get_shift(pg_size_code);
4322 
4323 	D1(ldcp->id, "ldc_mem_copy: copying data "
4324 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4325 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4326 
4327 	/* aligned VA and its offset */
4328 	local_valign = (((uintptr_t)vaddr) & ~(pg_size - 1));
4329 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4330 
4331 	npages = (len+local_voff)/pg_size;
4332 	npages = ((len+local_voff)%pg_size == 0) ? npages : npages+1;
4333 
4334 	D1(ldcp->id,
4335 	    "ldc_mem_copy: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4336 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4337 
4338 	local_ra = va_to_pa((void *)local_valign);
4339 	local_poff = local_voff;
4340 	local_psize = min(len, (pg_size - local_voff));
4341 
4342 	len -= local_psize;
4343 
4344 	/*
4345 	 * find the first cookie in the list of cookies
4346 	 * if the offset passed in is not zero
4347 	 */
4348 	for (idx = 0; idx < ccount; idx++) {
4349 		cookie_size = cookies[idx].size;
4350 		if (off < cookie_size)
4351 			break;
4352 		off -= cookie_size;
4353 	}
4354 
4355 	cookie_addr = cookies[idx].addr + off;
4356 	cookie_size = cookies[idx].size - off;
4357 
4358 	export_caddr = cookie_addr & ~(pg_size - 1);
4359 	export_poff = cookie_addr & (pg_size - 1);
4360 	export_psize = min(cookie_size, (pg_size - export_poff));
4361 
4362 	for (;;) {
4363 
4364 		copy_size = min(export_psize, local_psize);
4365 
4366 		D1(ldcp->id,
4367 		    "ldc_mem_copy:(0x%llx) dir=0x%x, caddr=0x%llx,"
4368 		    " loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4369 		    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4370 		    " total_bal=0x%llx\n",
4371 		    ldcp->id, direction, export_caddr, local_ra, export_poff,
4372 		    local_poff, export_psize, local_psize, copy_size,
4373 		    total_bal);
4374 
4375 		rv = hv_ldc_copy(ldcp->id, direction,
4376 		    (export_caddr + export_poff), (local_ra + local_poff),
4377 		    copy_size, &copied_len);
4378 
4379 		if (rv != 0) {
4380 			cmn_err(CE_WARN,
4381 			    "ldc_mem_copy: (0x%lx) err %d during copy\n",
4382 			    ldcp->id, rv);
4383 			DWARN(DBG_ALL_LDCS,
4384 			    "ldc_mem_copy: (0x%llx) dir=0x%x, caddr=0x%llx, "
4385 			    "loc_ra=0x%llx, exp_poff=0x%llx, loc_poff=0x%llx,"
4386 			    " exp_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4387 			    " copied_len=0x%llx, total_bal=0x%llx\n",
4388 			    ldcp->id, direction, export_caddr, local_ra,
4389 			    export_poff, local_poff, export_psize, local_psize,
4390 			    copy_size, copied_len, total_bal);
4391 
4392 			*size = *size - total_bal;
4393 			mutex_exit(&ldcp->lock);
4394 			return (EIO);
4395 		}
4396 
4397 		ASSERT(copied_len <= copy_size);
4398 
4399 		D2(ldcp->id, "ldc_mem_copy: copied=0x%llx\n", copied_len);
4400 		export_poff += copied_len;
4401 		local_poff += copied_len;
4402 		export_psize -= copied_len;
4403 		local_psize -= copied_len;
4404 		cookie_size -= copied_len;
4405 
4406 		total_bal -= copied_len;
4407 
4408 		if (copy_size != copied_len)
4409 			continue;
4410 
4411 		if (export_psize == 0 && total_bal != 0) {
4412 
4413 			if (cookie_size == 0) {
4414 				idx++;
4415 				cookie_addr = cookies[idx].addr;
4416 				cookie_size = cookies[idx].size;
4417 
4418 				export_caddr = cookie_addr & ~(pg_size - 1);
4419 				export_poff = cookie_addr & (pg_size - 1);
4420 				export_psize =
4421 					min(cookie_size, (pg_size-export_poff));
4422 			} else {
4423 				export_caddr += pg_size;
4424 				export_poff = 0;
4425 				export_psize = min(cookie_size, pg_size);
4426 			}
4427 		}
4428 
4429 		if (local_psize == 0 && total_bal != 0) {
4430 			local_valign += pg_size;
4431 			local_ra = va_to_pa((void *)local_valign);
4432 			local_poff = 0;
4433 			local_psize = min(pg_size, len);
4434 			len -= local_psize;
4435 		}
4436 
4437 		/* check if we are all done */
4438 		if (total_bal == 0)
4439 			break;
4440 	}
4441 
4442 	mutex_exit(&ldcp->lock);
4443 
4444 	D1(ldcp->id,
4445 	    "ldc_mem_copy: (0x%llx) done copying sz=0x%llx\n",
4446 	    ldcp->id, *size);
4447 
4448 	return (0);
4449 }
4450 
4451 /*
4452  * Copy data either from or to the client specified virtual address
4453  * space to or from HV physical memory.
4454  *
4455  * The direction argument determines whether the data is read from or
4456  * written to HV memory. direction values are LDC_COPY_IN/OUT similar
4457  * to the ldc_mem_copy interface
4458  */
4459 int
4460 ldc_mem_rdwr_pa(ldc_handle_t handle, caddr_t vaddr, size_t *size,
4461     caddr_t paddr, uint8_t direction)
4462 {
4463 	ldc_chan_t 	*ldcp;
4464 	uint64_t	local_voff, local_valign;
4465 	uint64_t	pg_shift, pg_size, pg_size_code;
4466 	uint64_t 	target_pa, target_poff, target_psize, target_size;
4467 	uint64_t	local_ra, local_poff, local_psize;
4468 	uint64_t	copy_size, copied_len = 0;
4469 	pgcnt_t		npages;
4470 	size_t		len = *size;
4471 	int 		rv = 0;
4472 
4473 	if (handle == NULL) {
4474 		DWARN(DBG_ALL_LDCS,
4475 		    "ldc_mem_rdwr_pa: invalid channel handle\n");
4476 		return (EINVAL);
4477 	}
4478 	ldcp = (ldc_chan_t *)handle;
4479 
4480 	mutex_enter(&ldcp->lock);
4481 
4482 	/* check to see if channel is UP */
4483 	if (ldcp->tstate != TS_UP) {
4484 		DWARN(ldcp->id,
4485 		    "ldc_mem_rdwr_pa: (0x%llx) channel is not UP\n",
4486 		    ldcp->id);
4487 		mutex_exit(&ldcp->lock);
4488 		return (EINVAL);
4489 	}
4490 
4491 	/* Force address and size to be 8-byte aligned */
4492 	if ((((uintptr_t)vaddr | len) & 0x7) != 0) {
4493 		DWARN(ldcp->id,
4494 		    "ldc_mem_rdwr_pa: addr/size is not 8-byte aligned\n");
4495 		mutex_exit(&ldcp->lock);
4496 		return (EINVAL);
4497 	}
4498 
4499 	target_size = *size;
4500 
4501 	/* FUTURE: get the page size, pgsz code, and shift */
4502 	pg_size = MMU_PAGESIZE;
4503 	pg_size_code = page_szc(pg_size);
4504 	pg_shift = page_get_shift(pg_size_code);
4505 
4506 	D1(ldcp->id, "ldc_mem_rdwr_pa: copying data "
4507 	    "(0x%llx) va 0x%llx pgsz=0x%llx, pgszc=0x%llx, pg_shift=0x%llx\n",
4508 	    ldcp->id, vaddr, pg_size, pg_size_code, pg_shift);
4509 
4510 	/* aligned VA and its offset */
4511 	local_valign = ((uintptr_t)vaddr) & ~(pg_size - 1);
4512 	local_voff = ((uintptr_t)vaddr) & (pg_size - 1);
4513 
4514 	npages = (len + local_voff) / pg_size;
4515 	npages = ((len + local_voff) % pg_size == 0) ? npages : npages+1;
4516 
4517 	D1(ldcp->id,
4518 	    "ldc_mem_rdwr_pa: (0x%llx) v=0x%llx,val=0x%llx,off=0x%x,pgs=0x%x\n",
4519 	    ldcp->id, vaddr, local_valign, local_voff, npages);
4520 
4521 	local_ra = va_to_pa((void *)local_valign);
4522 	local_poff = local_voff;
4523 	local_psize = min(len, (pg_size - local_voff));
4524 
4525 	len -= local_psize;
4526 
4527 	target_pa = ((uintptr_t)paddr) & ~(pg_size - 1);
4528 	target_poff = ((uintptr_t)paddr) & (pg_size - 1);
4529 	target_psize = pg_size - target_poff;
4530 
4531 	for (;;) {
4532 
4533 		copy_size = min(target_psize, local_psize);
4534 
4535 		D1(ldcp->id,
4536 		    "ldc_mem_rdwr_pa: (0x%llx) dir=0x%x, tar_pa=0x%llx,"
4537 		    " loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4538 		    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4539 		    " total_bal=0x%llx\n",
4540 		    ldcp->id, direction, target_pa, local_ra, target_poff,
4541 		    local_poff, target_psize, local_psize, copy_size,
4542 		    target_size);
4543 
4544 		rv = hv_ldc_copy(ldcp->id, direction,
4545 		    (target_pa + target_poff), (local_ra + local_poff),
4546 		    copy_size, &copied_len);
4547 
4548 		if (rv != 0) {
4549 			cmn_err(CE_WARN,
4550 			    "ldc_mem_rdwr_pa: (0x%lx) err %d during copy\n",
4551 			    ldcp->id, rv);
4552 			DWARN(DBG_ALL_LDCS,
4553 			    "ldc_mem_rdwr_pa: (0x%llx) dir=%lld,tar_pa=0x%llx, "
4554 			    "loc_ra=0x%llx, tar_poff=0x%llx, loc_poff=0x%llx,"
4555 			    " tar_psz=0x%llx, loc_psz=0x%llx, copy_sz=0x%llx,"
4556 			    " total_bal=0x%llx\n",
4557 			    ldcp->id, direction, target_pa, local_ra,
4558 			    target_poff, local_poff, target_psize, local_psize,
4559 			    copy_size, target_size);
4560 
4561 			*size = *size - target_size;
4562 			mutex_exit(&ldcp->lock);
4563 			return (i_ldc_h2v_error(rv));
4564 		}
4565 
4566 		D2(ldcp->id, "ldc_mem_rdwr_pa: copied=0x%llx\n", copied_len);
4567 		target_poff += copied_len;
4568 		local_poff += copied_len;
4569 		target_psize -= copied_len;
4570 		local_psize -= copied_len;
4571 
4572 		target_size -= copied_len;
4573 
4574 		if (copy_size != copied_len)
4575 			continue;
4576 
4577 		if (target_psize == 0 && target_size != 0) {
4578 			target_pa += pg_size;
4579 			target_poff = 0;
4580 			target_psize = min(pg_size, target_size);
4581 		}
4582 
4583 		if (local_psize == 0 && target_size != 0) {
4584 			local_valign += pg_size;
4585 			local_ra = va_to_pa((void *)local_valign);
4586 			local_poff = 0;
4587 			local_psize = min(pg_size, len);
4588 			len -= local_psize;
4589 		}
4590 
4591 		/* check if we are all done */
4592 		if (target_size == 0)
4593 			break;
4594 	}
4595 
4596 	mutex_exit(&ldcp->lock);
4597 
4598 	D1(ldcp->id, "ldc_mem_rdwr_pa: (0x%llx) done copying sz=0x%llx\n",
4599 	    ldcp->id, *size);
4600 
4601 	return (0);
4602 }
4603 
4604 /*
4605  * Map an exported memory segment into the local address space. If the
4606  * memory range was exported for direct map access, a HV call is made
4607  * to allocate a RA range. If the map is done via a shadow copy, local
4608  * shadow memory is allocated and the base VA is returned in 'vaddr'. If
4609  * the mapping is a direct map then the RA is returned in 'raddr'.
4610  */
4611 int
4612 ldc_mem_map(ldc_mem_handle_t mhandle, ldc_mem_cookie_t *cookie, uint32_t ccount,
4613     uint8_t mtype, caddr_t *vaddr, caddr_t *raddr)
4614 {
4615 	int		i, idx;
4616 	ldc_chan_t 	*ldcp;
4617 	ldc_mhdl_t	*mhdl;
4618 	ldc_memseg_t	*memseg;
4619 	caddr_t		shadow_base = NULL, tmpaddr;
4620 	uint64_t	pg_size, pg_shift, pg_size_code;
4621 	uint64_t	exp_size = 0, npages;
4622 
4623 	if (mhandle == NULL) {
4624 		DWARN(DBG_ALL_LDCS, "ldc_mem_map: invalid memory handle\n");
4625 		return (EINVAL);
4626 	}
4627 	mhdl = (ldc_mhdl_t *)mhandle;
4628 
4629 	mutex_enter(&mhdl->lock);
4630 
4631 	if (mhdl->status == LDC_BOUND || mhdl->status == LDC_MAPPED ||
4632 	    mhdl->memseg != NULL) {
4633 		DWARN(DBG_ALL_LDCS,
4634 		    "ldc_mem_map: (0x%llx) handle bound/mapped\n", mhandle);
4635 		mutex_exit(&mhdl->lock);
4636 		return (EINVAL);
4637 	}
4638 
4639 	ldcp = mhdl->ldcp;
4640 
4641 	mutex_enter(&ldcp->lock);
4642 
4643 	if (ldcp->tstate != TS_UP) {
4644 		DWARN(ldcp->id,
4645 		    "ldc_mem_dring_map: (0x%llx) channel is not UP\n",
4646 		    ldcp->id);
4647 		mutex_exit(&ldcp->lock);
4648 		mutex_exit(&mhdl->lock);
4649 		return (EINVAL);
4650 	}
4651 
4652 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
4653 		DWARN(ldcp->id, "ldc_mem_map: invalid map type\n");
4654 		mutex_exit(&ldcp->lock);
4655 		mutex_exit(&mhdl->lock);
4656 		return (EINVAL);
4657 	}
4658 
4659 	if (mtype == LDC_SHADOW_MAP && vaddr == NULL) {
4660 		DWARN(ldcp->id,
4661 		    "ldc_mem_map: invalid vaddr arg0x%llx\n", vaddr);
4662 		mutex_exit(&ldcp->lock);
4663 		mutex_exit(&mhdl->lock);
4664 		return (EINVAL);
4665 	}
4666 
4667 	if (mtype == LDC_SHADOW_MAP &&
4668 	    (vaddr) && ((uintptr_t)(*vaddr) & MMU_PAGEOFFSET)) {
4669 		DWARN(ldcp->id,
4670 		    "ldc_mem_map: vaddr not page aligned, 0x%llx\n", *vaddr);
4671 		mutex_exit(&ldcp->lock);
4672 		mutex_exit(&mhdl->lock);
4673 		return (EINVAL);
4674 	}
4675 
4676 	D1(ldcp->id, "ldc_mem_map: (0x%llx) cookie = 0x%llx,0x%llx\n",
4677 	    mhandle, cookie->addr, cookie->size);
4678 
4679 	/* FUTURE: get the page size, pgsz code, and shift */
4680 	pg_size = MMU_PAGESIZE;
4681 	pg_size_code = page_szc(pg_size);
4682 	pg_shift = page_get_shift(pg_size_code);
4683 
4684 	/* calculate the number of pages in the exported cookie */
4685 	for (idx = 0; idx < ccount; idx++) {
4686 		if (cookie[idx].addr & MMU_PAGEOFFSET ||
4687 			cookie[idx].size & MMU_PAGEOFFSET) {
4688 			DWARN(ldcp->id,
4689 			    "ldc_mem_map: cookie addr/size not page aligned, "
4690 			    "0x%llx\n", cookie[idx].addr);
4691 			mutex_exit(&ldcp->lock);
4692 			mutex_exit(&mhdl->lock);
4693 			return (EINVAL);
4694 		}
4695 		exp_size += cookie[idx].size;
4696 	}
4697 	npages = (exp_size >> pg_shift);
4698 
4699 	/* Allocate memseg structure */
4700 	memseg = mhdl->memseg =	kmem_zalloc(sizeof (ldc_memseg_t), KM_SLEEP);
4701 
4702 	/* Allocate memory to store all pages and cookies */
4703 	memseg->pages =	kmem_zalloc((sizeof (ldc_page_t) * npages), KM_SLEEP);
4704 	memseg->cookies =
4705 		kmem_zalloc((sizeof (ldc_mem_cookie_t) * ccount), KM_SLEEP);
4706 
4707 	D2(ldcp->id, "ldc_mem_map: (0x%llx) processing 0x%llx pages\n",
4708 	    ldcp->id, npages);
4709 
4710 	/* Check to see if the client is requesting direct or shadow map */
4711 	if (mtype == LDC_SHADOW_MAP) {
4712 		if (*vaddr == NULL) {
4713 			shadow_base =
4714 				contig_mem_alloc_align(exp_size, PAGESIZE);
4715 			if (shadow_base == NULL) {
4716 				cmn_err(CE_WARN, "ldc_mem_map: shadow memory "
4717 				    "allocation failed\n");
4718 				kmem_free(memseg->cookies,
4719 				    (sizeof (ldc_mem_cookie_t) * ccount));
4720 				kmem_free(memseg->pages,
4721 				    (sizeof (ldc_page_t) * npages));
4722 				kmem_free(memseg, sizeof (ldc_memseg_t));
4723 				mutex_exit(&ldcp->lock);
4724 				mutex_exit(&mhdl->lock);
4725 				return (ENOMEM);
4726 			}
4727 
4728 			bzero(shadow_base, exp_size);
4729 			mhdl->myshadow = B_TRUE;
4730 
4731 			D1(ldcp->id, "ldc_mem_map: (0x%llx) allocated "
4732 			    "shadow page va=0x%llx\n", ldcp->id, shadow_base);
4733 		} else {
4734 			/*
4735 			 * Use client supplied memory for shadow_base
4736 			 * WARNING: assuming that client mem is >= exp_size
4737 			 */
4738 			shadow_base = *vaddr;
4739 		}
4740 	} else if (mtype == LDC_DIRECT_MAP) {
4741 		/* FUTURE: Do a direct map by calling into HV */
4742 		_NOTE(EMPTY)
4743 	}
4744 
4745 	/* Save all page and cookie information */
4746 	for (i = 0, tmpaddr = shadow_base; i < npages; i++) {
4747 		memseg->pages[i].raddr = va_to_pa(tmpaddr);
4748 		memseg->pages[i].size = pg_size;
4749 		memseg->pages[i].index = 0;
4750 		memseg->pages[i].offset = 0;
4751 		memseg->pages[i].mte = NULL;
4752 		tmpaddr += pg_size;
4753 	}
4754 	for (i = 0; i < ccount; i++) {
4755 		memseg->cookies[i].addr = cookie[i].addr;
4756 		memseg->cookies[i].size = cookie[i].size;
4757 	}
4758 
4759 	/* update memseg_t */
4760 	memseg->vaddr = shadow_base;
4761 	memseg->raddr = memseg->pages[0].raddr;
4762 	memseg->size = exp_size;
4763 	memseg->npages = npages;
4764 	memseg->ncookies = ccount;
4765 	memseg->next_cookie = 0;
4766 
4767 	/* memory handle = mapped */
4768 	mhdl->mtype = mtype;
4769 	mhdl->perm = 0;
4770 	mhdl->status = LDC_MAPPED;
4771 
4772 	D1(ldcp->id, "ldc_mem_map: (0x%llx) mapped 0x%llx, ra=0x%llx, "
4773 	    "va=0x%llx, pgs=0x%llx cookies=0x%llx\n",
4774 	    ldcp->id, mhdl, memseg->raddr, memseg->vaddr,
4775 	    memseg->npages, memseg->ncookies);
4776 
4777 	if (raddr)
4778 		*raddr = (caddr_t)memseg->raddr;
4779 	if (vaddr)
4780 		*vaddr = memseg->vaddr;
4781 
4782 	mutex_exit(&ldcp->lock);
4783 	mutex_exit(&mhdl->lock);
4784 	return (0);
4785 }
4786 
4787 /*
4788  * Unmap a memory segment. Free shadow memory (if any).
4789  */
4790 int
4791 ldc_mem_unmap(ldc_mem_handle_t mhandle)
4792 {
4793 	ldc_mhdl_t	*mhdl = (ldc_mhdl_t *)mhandle;
4794 	ldc_chan_t 	*ldcp;
4795 	ldc_memseg_t	*memseg;
4796 
4797 	if (mhdl == 0 || mhdl->status != LDC_MAPPED) {
4798 		DWARN(DBG_ALL_LDCS,
4799 		    "ldc_mem_unmap: (0x%llx) handle is not mapped\n",
4800 		    mhandle);
4801 		return (EINVAL);
4802 	}
4803 
4804 	mutex_enter(&mhdl->lock);
4805 
4806 	ldcp = mhdl->ldcp;
4807 	memseg = mhdl->memseg;
4808 
4809 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapping handle 0x%llx\n",
4810 	    ldcp->id, mhdl);
4811 
4812 	/* if we allocated shadow memory - free it */
4813 	if (mhdl->mtype == LDC_SHADOW_MAP && mhdl->myshadow) {
4814 		contig_mem_free(memseg->vaddr, memseg->size);
4815 	}
4816 
4817 	/* free the allocated memseg and page structures */
4818 	kmem_free(memseg->pages, (sizeof (ldc_page_t) * memseg->npages));
4819 	kmem_free(memseg->cookies,
4820 	    (sizeof (ldc_mem_cookie_t) * memseg->ncookies));
4821 	kmem_free(memseg, sizeof (ldc_memseg_t));
4822 
4823 	/* uninitialize the memory handle */
4824 	mhdl->memseg = NULL;
4825 	mhdl->status = LDC_UNBOUND;
4826 
4827 	D1(ldcp->id, "ldc_mem_unmap: (0x%llx) unmapped handle 0x%llx\n",
4828 	    ldcp->id, mhdl);
4829 
4830 	mutex_exit(&mhdl->lock);
4831 	return (0);
4832 }
4833 
4834 /*
4835  * Internal entry point for LDC mapped memory entry consistency
4836  * semantics. Acquire copies the contents of the remote memory
4837  * into the local shadow copy. The release operation copies the local
4838  * contents into the remote memory. The offset and size specify the
4839  * bounds for the memory range being synchronized.
4840  */
4841 static int
4842 i_ldc_mem_acquire_release(ldc_mem_handle_t mhandle, uint8_t direction,
4843     uint64_t offset, size_t size)
4844 {
4845 	int 		err;
4846 	ldc_mhdl_t	*mhdl;
4847 	ldc_chan_t	*ldcp;
4848 	ldc_memseg_t	*memseg;
4849 	caddr_t		local_vaddr;
4850 	size_t		copy_size;
4851 
4852 	if (mhandle == NULL) {
4853 		DWARN(DBG_ALL_LDCS,
4854 		    "i_ldc_mem_acquire_release: invalid memory handle\n");
4855 		return (EINVAL);
4856 	}
4857 	mhdl = (ldc_mhdl_t *)mhandle;
4858 
4859 	mutex_enter(&mhdl->lock);
4860 
4861 	if (mhdl->status != LDC_MAPPED || mhdl->ldcp == NULL) {
4862 		DWARN(DBG_ALL_LDCS,
4863 		    "i_ldc_mem_acquire_release: not mapped memory\n");
4864 		mutex_exit(&mhdl->lock);
4865 		return (EINVAL);
4866 	}
4867 
4868 	if (offset >= mhdl->memseg->size ||
4869 	    (offset + size) > mhdl->memseg->size) {
4870 		DWARN(DBG_ALL_LDCS,
4871 		    "i_ldc_mem_acquire_release: memory out of range\n");
4872 		mutex_exit(&mhdl->lock);
4873 		return (EINVAL);
4874 	}
4875 
4876 	/* get the channel handle and memory segment */
4877 	ldcp = mhdl->ldcp;
4878 	memseg = mhdl->memseg;
4879 
4880 	if (mhdl->mtype == LDC_SHADOW_MAP) {
4881 
4882 		local_vaddr = memseg->vaddr + offset;
4883 		copy_size = size;
4884 
4885 		/* copy to/from remote from/to local memory */
4886 		err = ldc_mem_copy((ldc_handle_t)ldcp, local_vaddr, offset,
4887 		    &copy_size, memseg->cookies, memseg->ncookies,
4888 		    direction);
4889 		if (err || copy_size != size) {
4890 			cmn_err(CE_WARN,
4891 			    "i_ldc_mem_acquire_release: copy failed\n");
4892 			mutex_exit(&mhdl->lock);
4893 			return (err);
4894 		}
4895 	}
4896 
4897 	mutex_exit(&mhdl->lock);
4898 
4899 	return (0);
4900 }
4901 
4902 /*
4903  * Ensure that the contents in the remote memory seg are consistent
4904  * with the contents if of local segment
4905  */
4906 int
4907 ldc_mem_acquire(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
4908 {
4909 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_IN, offset, size));
4910 }
4911 
4912 
4913 /*
4914  * Ensure that the contents in the local memory seg are consistent
4915  * with the contents if of remote segment
4916  */
4917 int
4918 ldc_mem_release(ldc_mem_handle_t mhandle, uint64_t offset, uint64_t size)
4919 {
4920 	return (i_ldc_mem_acquire_release(mhandle, LDC_COPY_OUT, offset, size));
4921 }
4922 
4923 /*
4924  * Allocate a descriptor ring. The size of each each descriptor
4925  * must be 8-byte aligned and the entire ring should be a multiple
4926  * of MMU_PAGESIZE.
4927  */
4928 int
4929 ldc_mem_dring_create(uint32_t len, uint32_t dsize, ldc_dring_handle_t *dhandle)
4930 {
4931 	ldc_dring_t *dringp;
4932 	size_t size = (dsize * len);
4933 
4934 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: len=0x%x, size=0x%x\n",
4935 	    len, dsize);
4936 
4937 	if (dhandle == NULL) {
4938 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid dhandle\n");
4939 		return (EINVAL);
4940 	}
4941 
4942 	if (len == 0) {
4943 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid length\n");
4944 		return (EINVAL);
4945 	}
4946 
4947 	/* descriptor size should be 8-byte aligned */
4948 	if (dsize == 0 || (dsize & 0x7)) {
4949 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_create: invalid size\n");
4950 		return (EINVAL);
4951 	}
4952 
4953 	*dhandle = 0;
4954 
4955 	/* Allocate a desc ring structure */
4956 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
4957 
4958 	/* Initialize dring */
4959 	dringp->length = len;
4960 	dringp->dsize = dsize;
4961 
4962 	/* round off to multiple of pagesize */
4963 	dringp->size = (size & MMU_PAGEMASK);
4964 	if (size & MMU_PAGEOFFSET)
4965 		dringp->size += MMU_PAGESIZE;
4966 
4967 	dringp->status = LDC_UNBOUND;
4968 
4969 	/* allocate descriptor ring memory */
4970 	dringp->base = contig_mem_alloc_align(dringp->size, PAGESIZE);
4971 	if (dringp->base == NULL) {
4972 		cmn_err(CE_WARN,
4973 		    "ldc_mem_dring_create: unable to alloc desc\n");
4974 		kmem_free(dringp, sizeof (ldc_dring_t));
4975 		return (ENOMEM);
4976 	}
4977 
4978 	bzero(dringp->base, dringp->size);
4979 
4980 	/* initialize the desc ring lock */
4981 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
4982 
4983 	/* Add descriptor ring to the head of global list */
4984 	mutex_enter(&ldcssp->lock);
4985 	dringp->next = ldcssp->dring_list;
4986 	ldcssp->dring_list = dringp;
4987 	mutex_exit(&ldcssp->lock);
4988 
4989 	*dhandle = (ldc_dring_handle_t)dringp;
4990 
4991 	D1(DBG_ALL_LDCS, "ldc_mem_dring_create: dring allocated\n");
4992 
4993 	return (0);
4994 }
4995 
4996 
4997 /*
4998  * Destroy a descriptor ring.
4999  */
5000 int
5001 ldc_mem_dring_destroy(ldc_dring_handle_t dhandle)
5002 {
5003 	ldc_dring_t *dringp;
5004 	ldc_dring_t *tmp_dringp;
5005 
5006 	D1(DBG_ALL_LDCS, "ldc_mem_dring_destroy: entered\n");
5007 
5008 	if (dhandle == NULL) {
5009 		DWARN(DBG_ALL_LDCS,
5010 		    "ldc_mem_dring_destroy: invalid desc ring handle\n");
5011 		return (EINVAL);
5012 	}
5013 	dringp = (ldc_dring_t *)dhandle;
5014 
5015 	if (dringp->status == LDC_BOUND) {
5016 		DWARN(DBG_ALL_LDCS,
5017 		    "ldc_mem_dring_destroy: desc ring is bound\n");
5018 		return (EACCES);
5019 	}
5020 
5021 	mutex_enter(&dringp->lock);
5022 	mutex_enter(&ldcssp->lock);
5023 
5024 	/* remove from linked list - if not bound */
5025 	tmp_dringp = ldcssp->dring_list;
5026 	if (tmp_dringp == dringp) {
5027 		ldcssp->dring_list = dringp->next;
5028 		dringp->next = NULL;
5029 
5030 	} else {
5031 		while (tmp_dringp != NULL) {
5032 			if (tmp_dringp->next == dringp) {
5033 				tmp_dringp->next = dringp->next;
5034 				dringp->next = NULL;
5035 				break;
5036 			}
5037 			tmp_dringp = tmp_dringp->next;
5038 		}
5039 		if (tmp_dringp == NULL) {
5040 			DWARN(DBG_ALL_LDCS,
5041 			    "ldc_mem_dring_destroy: invalid descriptor\n");
5042 			mutex_exit(&ldcssp->lock);
5043 			mutex_exit(&dringp->lock);
5044 			return (EINVAL);
5045 		}
5046 	}
5047 
5048 	mutex_exit(&ldcssp->lock);
5049 
5050 	/* free the descriptor ring */
5051 	contig_mem_free((caddr_t)dringp->base, dringp->size);
5052 
5053 	mutex_exit(&dringp->lock);
5054 
5055 	/* destroy dring lock */
5056 	mutex_destroy(&dringp->lock);
5057 
5058 	/* free desc ring object */
5059 	kmem_free(dringp, sizeof (ldc_dring_t));
5060 
5061 	return (0);
5062 }
5063 
5064 /*
5065  * Bind a previously allocated dring to a channel. The channel should
5066  * be OPEN in order to bind the ring to the channel. Returns back a
5067  * descriptor ring cookie. The descriptor ring is exported for remote
5068  * access by the client at the other end of the channel. An entry for
5069  * dring pages is stored in map table (via call to ldc_mem_bind_handle).
5070  */
5071 int
5072 ldc_mem_dring_bind(ldc_handle_t handle, ldc_dring_handle_t dhandle,
5073     uint8_t mtype, uint8_t perm, ldc_mem_cookie_t *cookie, uint32_t *ccount)
5074 {
5075 	int		err;
5076 	ldc_chan_t 	*ldcp;
5077 	ldc_dring_t	*dringp;
5078 	ldc_mem_handle_t mhandle;
5079 
5080 	/* check to see if channel is initalized */
5081 	if (handle == NULL) {
5082 		DWARN(DBG_ALL_LDCS,
5083 		    "ldc_mem_dring_bind: invalid channel handle\n");
5084 		return (EINVAL);
5085 	}
5086 	ldcp = (ldc_chan_t *)handle;
5087 
5088 	if (dhandle == NULL) {
5089 		DWARN(DBG_ALL_LDCS,
5090 		    "ldc_mem_dring_bind: invalid desc ring handle\n");
5091 		return (EINVAL);
5092 	}
5093 	dringp = (ldc_dring_t *)dhandle;
5094 
5095 	if (cookie == NULL) {
5096 		DWARN(ldcp->id,
5097 		    "ldc_mem_dring_bind: invalid cookie arg\n");
5098 		return (EINVAL);
5099 	}
5100 
5101 	mutex_enter(&dringp->lock);
5102 
5103 	if (dringp->status == LDC_BOUND) {
5104 		DWARN(DBG_ALL_LDCS,
5105 		    "ldc_mem_dring_bind: (0x%llx) descriptor ring is bound\n",
5106 		    ldcp->id);
5107 		mutex_exit(&dringp->lock);
5108 		return (EINVAL);
5109 	}
5110 
5111 	if ((perm & LDC_MEM_RW) == 0) {
5112 		DWARN(DBG_ALL_LDCS,
5113 		    "ldc_mem_dring_bind: invalid permissions\n");
5114 		mutex_exit(&dringp->lock);
5115 		return (EINVAL);
5116 	}
5117 
5118 	if ((mtype & (LDC_SHADOW_MAP|LDC_DIRECT_MAP|LDC_IO_MAP)) == 0) {
5119 		DWARN(DBG_ALL_LDCS, "ldc_mem_dring_bind: invalid type\n");
5120 		mutex_exit(&dringp->lock);
5121 		return (EINVAL);
5122 	}
5123 
5124 	dringp->ldcp = ldcp;
5125 
5126 	/* create an memory handle */
5127 	err = ldc_mem_alloc_handle(handle, &mhandle);
5128 	if (err || mhandle == NULL) {
5129 		DWARN(DBG_ALL_LDCS,
5130 		    "ldc_mem_dring_bind: (0x%llx) error allocating mhandle\n",
5131 		    ldcp->id);
5132 		mutex_exit(&dringp->lock);
5133 		return (err);
5134 	}
5135 	dringp->mhdl = mhandle;
5136 
5137 	/* bind the descriptor ring to channel */
5138 	err = ldc_mem_bind_handle(mhandle, dringp->base, dringp->size,
5139 	    mtype, perm, cookie, ccount);
5140 	if (err) {
5141 		DWARN(ldcp->id,
5142 		    "ldc_mem_dring_bind: (0x%llx) error binding mhandle\n",
5143 		    ldcp->id);
5144 		mutex_exit(&dringp->lock);
5145 		return (err);
5146 	}
5147 
5148 	/*
5149 	 * For now return error if we get more than one cookie
5150 	 * FUTURE: Return multiple cookies ..
5151 	 */
5152 	if (*ccount > 1) {
5153 		(void) ldc_mem_unbind_handle(mhandle);
5154 		(void) ldc_mem_free_handle(mhandle);
5155 
5156 		dringp->ldcp = NULL;
5157 		dringp->mhdl = NULL;
5158 		*ccount = 0;
5159 
5160 		mutex_exit(&dringp->lock);
5161 		return (EAGAIN);
5162 	}
5163 
5164 	/* Add descriptor ring to channel's exported dring list */
5165 	mutex_enter(&ldcp->exp_dlist_lock);
5166 	dringp->ch_next = ldcp->exp_dring_list;
5167 	ldcp->exp_dring_list = dringp;
5168 	mutex_exit(&ldcp->exp_dlist_lock);
5169 
5170 	dringp->status = LDC_BOUND;
5171 
5172 	mutex_exit(&dringp->lock);
5173 
5174 	return (0);
5175 }
5176 
5177 /*
5178  * Return the next cookie associated with the specified dring handle
5179  */
5180 int
5181 ldc_mem_dring_nextcookie(ldc_dring_handle_t dhandle, ldc_mem_cookie_t *cookie)
5182 {
5183 	int		rv = 0;
5184 	ldc_dring_t 	*dringp;
5185 	ldc_chan_t	*ldcp;
5186 
5187 	if (dhandle == NULL) {
5188 		DWARN(DBG_ALL_LDCS,
5189 		    "ldc_mem_dring_nextcookie: invalid desc ring handle\n");
5190 		return (EINVAL);
5191 	}
5192 	dringp = (ldc_dring_t *)dhandle;
5193 	mutex_enter(&dringp->lock);
5194 
5195 	if (dringp->status != LDC_BOUND) {
5196 		DWARN(DBG_ALL_LDCS,
5197 		    "ldc_mem_dring_nextcookie: descriptor ring 0x%llx "
5198 		    "is not bound\n", dringp);
5199 		mutex_exit(&dringp->lock);
5200 		return (EINVAL);
5201 	}
5202 
5203 	ldcp = dringp->ldcp;
5204 
5205 	if (cookie == NULL) {
5206 		DWARN(ldcp->id,
5207 		    "ldc_mem_dring_nextcookie:(0x%llx) invalid cookie arg\n",
5208 		    ldcp->id);
5209 		mutex_exit(&dringp->lock);
5210 		return (EINVAL);
5211 	}
5212 
5213 	rv = ldc_mem_nextcookie((ldc_mem_handle_t)dringp->mhdl, cookie);
5214 	mutex_exit(&dringp->lock);
5215 
5216 	return (rv);
5217 }
5218 /*
5219  * Unbind a previously bound dring from a channel.
5220  */
5221 int
5222 ldc_mem_dring_unbind(ldc_dring_handle_t dhandle)
5223 {
5224 	ldc_dring_t 	*dringp;
5225 	ldc_dring_t	*tmp_dringp;
5226 	ldc_chan_t	*ldcp;
5227 
5228 	if (dhandle == NULL) {
5229 		DWARN(DBG_ALL_LDCS,
5230 		    "ldc_mem_dring_unbind: invalid desc ring handle\n");
5231 		return (EINVAL);
5232 	}
5233 	dringp = (ldc_dring_t *)dhandle;
5234 
5235 	mutex_enter(&dringp->lock);
5236 
5237 	if (dringp->status == LDC_UNBOUND) {
5238 		DWARN(DBG_ALL_LDCS,
5239 		    "ldc_mem_dring_bind: descriptor ring 0x%llx is unbound\n",
5240 		    dringp);
5241 		mutex_exit(&dringp->lock);
5242 		return (EINVAL);
5243 	}
5244 	ldcp = dringp->ldcp;
5245 
5246 	mutex_enter(&ldcp->exp_dlist_lock);
5247 
5248 	tmp_dringp = ldcp->exp_dring_list;
5249 	if (tmp_dringp == dringp) {
5250 		ldcp->exp_dring_list = dringp->ch_next;
5251 		dringp->ch_next = NULL;
5252 
5253 	} else {
5254 		while (tmp_dringp != NULL) {
5255 			if (tmp_dringp->ch_next == dringp) {
5256 				tmp_dringp->ch_next = dringp->ch_next;
5257 				dringp->ch_next = NULL;
5258 				break;
5259 			}
5260 			tmp_dringp = tmp_dringp->ch_next;
5261 		}
5262 		if (tmp_dringp == NULL) {
5263 			DWARN(DBG_ALL_LDCS,
5264 			    "ldc_mem_dring_unbind: invalid descriptor\n");
5265 			mutex_exit(&ldcp->exp_dlist_lock);
5266 			mutex_exit(&dringp->lock);
5267 			return (EINVAL);
5268 		}
5269 	}
5270 
5271 	mutex_exit(&ldcp->exp_dlist_lock);
5272 
5273 	(void) ldc_mem_unbind_handle((ldc_mem_handle_t)dringp->mhdl);
5274 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5275 
5276 	dringp->ldcp = NULL;
5277 	dringp->mhdl = NULL;
5278 	dringp->status = LDC_UNBOUND;
5279 
5280 	mutex_exit(&dringp->lock);
5281 
5282 	return (0);
5283 }
5284 
5285 /*
5286  * Get information about the dring. The base address of the descriptor
5287  * ring along with the type and permission are returned back.
5288  */
5289 int
5290 ldc_mem_dring_info(ldc_dring_handle_t dhandle, ldc_mem_info_t *minfo)
5291 {
5292 	ldc_dring_t	*dringp;
5293 	int		rv;
5294 
5295 	if (dhandle == NULL) {
5296 		DWARN(DBG_ALL_LDCS,
5297 		    "ldc_mem_dring_info: invalid desc ring handle\n");
5298 		return (EINVAL);
5299 	}
5300 	dringp = (ldc_dring_t *)dhandle;
5301 
5302 	mutex_enter(&dringp->lock);
5303 
5304 	if (dringp->mhdl) {
5305 		rv = ldc_mem_info(dringp->mhdl, minfo);
5306 		if (rv) {
5307 			DWARN(DBG_ALL_LDCS,
5308 			    "ldc_mem_dring_info: error reading mem info\n");
5309 			mutex_exit(&dringp->lock);
5310 			return (rv);
5311 		}
5312 	} else {
5313 		minfo->vaddr = dringp->base;
5314 		minfo->raddr = NULL;
5315 		minfo->status = dringp->status;
5316 	}
5317 
5318 	mutex_exit(&dringp->lock);
5319 
5320 	return (0);
5321 }
5322 
5323 /*
5324  * Map an exported descriptor ring into the local address space. If the
5325  * descriptor ring was exported for direct map access, a HV call is made
5326  * to allocate a RA range. If the map is done via a shadow copy, local
5327  * shadow memory is allocated.
5328  */
5329 int
5330 ldc_mem_dring_map(ldc_handle_t handle, ldc_mem_cookie_t *cookie,
5331     uint32_t ccount, uint32_t len, uint32_t dsize, uint8_t mtype,
5332     ldc_dring_handle_t *dhandle)
5333 {
5334 	int		err;
5335 	ldc_chan_t 	*ldcp = (ldc_chan_t *)handle;
5336 	ldc_mem_handle_t mhandle;
5337 	ldc_dring_t	*dringp;
5338 	size_t		dring_size;
5339 
5340 	if (dhandle == NULL) {
5341 		DWARN(DBG_ALL_LDCS,
5342 		    "ldc_mem_dring_map: invalid dhandle\n");
5343 		return (EINVAL);
5344 	}
5345 
5346 	/* check to see if channel is initalized */
5347 	if (handle == NULL) {
5348 		DWARN(DBG_ALL_LDCS,
5349 		    "ldc_mem_dring_map: invalid channel handle\n");
5350 		return (EINVAL);
5351 	}
5352 	ldcp = (ldc_chan_t *)handle;
5353 
5354 	if (cookie == NULL) {
5355 		DWARN(ldcp->id,
5356 		    "ldc_mem_dring_map: (0x%llx) invalid cookie\n",
5357 		    ldcp->id);
5358 		return (EINVAL);
5359 	}
5360 
5361 	/* FUTURE: For now we support only one cookie per dring */
5362 	ASSERT(ccount == 1);
5363 
5364 	if (cookie->size < (dsize * len)) {
5365 		DWARN(ldcp->id,
5366 		    "ldc_mem_dring_map: (0x%llx) invalid dsize/len\n",
5367 		    ldcp->id);
5368 		return (EINVAL);
5369 	}
5370 
5371 	*dhandle = 0;
5372 
5373 	/* Allocate an dring structure */
5374 	dringp = kmem_zalloc(sizeof (ldc_dring_t), KM_SLEEP);
5375 
5376 	D1(ldcp->id,
5377 	    "ldc_mem_dring_map: 0x%x,0x%x,0x%x,0x%llx,0x%llx\n",
5378 	    mtype, len, dsize, cookie->addr, cookie->size);
5379 
5380 	/* Initialize dring */
5381 	dringp->length = len;
5382 	dringp->dsize = dsize;
5383 
5384 	/* round of to multiple of page size */
5385 	dring_size = len * dsize;
5386 	dringp->size = (dring_size & MMU_PAGEMASK);
5387 	if (dring_size & MMU_PAGEOFFSET)
5388 		dringp->size += MMU_PAGESIZE;
5389 
5390 	dringp->ldcp = ldcp;
5391 
5392 	/* create an memory handle */
5393 	err = ldc_mem_alloc_handle(handle, &mhandle);
5394 	if (err || mhandle == NULL) {
5395 		DWARN(DBG_ALL_LDCS,
5396 		    "ldc_mem_dring_map: cannot alloc hdl err=%d\n",
5397 		    err);
5398 		kmem_free(dringp, sizeof (ldc_dring_t));
5399 		return (ENOMEM);
5400 	}
5401 
5402 	dringp->mhdl = mhandle;
5403 	dringp->base = NULL;
5404 
5405 	/* map the dring into local memory */
5406 	err = ldc_mem_map(mhandle, cookie, ccount, mtype,
5407 	    &(dringp->base), NULL);
5408 	if (err || dringp->base == NULL) {
5409 		cmn_err(CE_WARN,
5410 		    "ldc_mem_dring_map: cannot map desc ring err=%d\n", err);
5411 		(void) ldc_mem_free_handle(mhandle);
5412 		kmem_free(dringp, sizeof (ldc_dring_t));
5413 		return (ENOMEM);
5414 	}
5415 
5416 	/* initialize the desc ring lock */
5417 	mutex_init(&dringp->lock, NULL, MUTEX_DRIVER, NULL);
5418 
5419 	/* Add descriptor ring to channel's imported dring list */
5420 	mutex_enter(&ldcp->imp_dlist_lock);
5421 	dringp->ch_next = ldcp->imp_dring_list;
5422 	ldcp->imp_dring_list = dringp;
5423 	mutex_exit(&ldcp->imp_dlist_lock);
5424 
5425 	dringp->status = LDC_MAPPED;
5426 
5427 	*dhandle = (ldc_dring_handle_t)dringp;
5428 
5429 	return (0);
5430 }
5431 
5432 /*
5433  * Unmap a descriptor ring. Free shadow memory (if any).
5434  */
5435 int
5436 ldc_mem_dring_unmap(ldc_dring_handle_t dhandle)
5437 {
5438 	ldc_dring_t 	*dringp;
5439 	ldc_dring_t	*tmp_dringp;
5440 	ldc_chan_t	*ldcp;
5441 
5442 	if (dhandle == NULL) {
5443 		DWARN(DBG_ALL_LDCS,
5444 		    "ldc_mem_dring_unmap: invalid desc ring handle\n");
5445 		return (EINVAL);
5446 	}
5447 	dringp = (ldc_dring_t *)dhandle;
5448 
5449 	if (dringp->status != LDC_MAPPED) {
5450 		DWARN(DBG_ALL_LDCS,
5451 		    "ldc_mem_dring_unmap: not a mapped desc ring\n");
5452 		return (EINVAL);
5453 	}
5454 
5455 	mutex_enter(&dringp->lock);
5456 
5457 	ldcp = dringp->ldcp;
5458 
5459 	mutex_enter(&ldcp->imp_dlist_lock);
5460 
5461 	/* find and unlink the desc ring from channel import list */
5462 	tmp_dringp = ldcp->imp_dring_list;
5463 	if (tmp_dringp == dringp) {
5464 		ldcp->imp_dring_list = dringp->ch_next;
5465 		dringp->ch_next = NULL;
5466 
5467 	} else {
5468 		while (tmp_dringp != NULL) {
5469 			if (tmp_dringp->ch_next == dringp) {
5470 				tmp_dringp->ch_next = dringp->ch_next;
5471 				dringp->ch_next = NULL;
5472 				break;
5473 			}
5474 			tmp_dringp = tmp_dringp->ch_next;
5475 		}
5476 		if (tmp_dringp == NULL) {
5477 			DWARN(DBG_ALL_LDCS,
5478 			    "ldc_mem_dring_unmap: invalid descriptor\n");
5479 			mutex_exit(&ldcp->imp_dlist_lock);
5480 			mutex_exit(&dringp->lock);
5481 			return (EINVAL);
5482 		}
5483 	}
5484 
5485 	mutex_exit(&ldcp->imp_dlist_lock);
5486 
5487 	/* do a LDC memory handle unmap and free */
5488 	(void) ldc_mem_unmap(dringp->mhdl);
5489 	(void) ldc_mem_free_handle((ldc_mem_handle_t)dringp->mhdl);
5490 
5491 	dringp->status = 0;
5492 	dringp->ldcp = NULL;
5493 
5494 	mutex_exit(&dringp->lock);
5495 
5496 	/* destroy dring lock */
5497 	mutex_destroy(&dringp->lock);
5498 
5499 	/* free desc ring object */
5500 	kmem_free(dringp, sizeof (ldc_dring_t));
5501 
5502 	return (0);
5503 }
5504 
5505 /*
5506  * Internal entry point for descriptor ring access entry consistency
5507  * semantics. Acquire copies the contents of the remote descriptor ring
5508  * into the local shadow copy. The release operation copies the local
5509  * contents into the remote dring. The start and end locations specify
5510  * bounds for the entries being synchronized.
5511  */
5512 static int
5513 i_ldc_dring_acquire_release(ldc_dring_handle_t dhandle,
5514     uint8_t direction, uint64_t start, uint64_t end)
5515 {
5516 	int 			err;
5517 	ldc_dring_t		*dringp;
5518 	ldc_chan_t		*ldcp;
5519 	uint64_t		soff;
5520 	size_t			copy_size;
5521 
5522 	if (dhandle == NULL) {
5523 		DWARN(DBG_ALL_LDCS,
5524 		    "i_ldc_dring_acquire_release: invalid desc ring handle\n");
5525 		return (EINVAL);
5526 	}
5527 	dringp = (ldc_dring_t *)dhandle;
5528 	mutex_enter(&dringp->lock);
5529 
5530 	if (dringp->status != LDC_MAPPED || dringp->ldcp == NULL) {
5531 		DWARN(DBG_ALL_LDCS,
5532 		    "i_ldc_dring_acquire_release: not a mapped desc ring\n");
5533 		mutex_exit(&dringp->lock);
5534 		return (EINVAL);
5535 	}
5536 
5537 	if (start >= dringp->length || end >= dringp->length) {
5538 		DWARN(DBG_ALL_LDCS,
5539 		    "i_ldc_dring_acquire_release: index out of range\n");
5540 		mutex_exit(&dringp->lock);
5541 		return (EINVAL);
5542 	}
5543 
5544 	/* get the channel handle */
5545 	ldcp = dringp->ldcp;
5546 
5547 	copy_size = (start <= end) ? (((end - start) + 1) * dringp->dsize) :
5548 		((dringp->length - start) * dringp->dsize);
5549 
5550 	/* Calculate the relative offset for the first desc */
5551 	soff = (start * dringp->dsize);
5552 
5553 	/* copy to/from remote from/to local memory */
5554 	D1(ldcp->id, "i_ldc_dring_acquire_release: c1 off=0x%llx sz=0x%llx\n",
5555 	    soff, copy_size);
5556 	err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
5557 	    direction, soff, copy_size);
5558 	if (err) {
5559 		DWARN(ldcp->id,
5560 		    "i_ldc_dring_acquire_release: copy failed\n");
5561 		mutex_exit(&dringp->lock);
5562 		return (err);
5563 	}
5564 
5565 	/* do the balance */
5566 	if (start > end) {
5567 		copy_size = ((end + 1) * dringp->dsize);
5568 		soff = 0;
5569 
5570 		/* copy to/from remote from/to local memory */
5571 		D1(ldcp->id, "i_ldc_dring_acquire_release: c2 "
5572 		    "off=0x%llx sz=0x%llx\n", soff, copy_size);
5573 		err = i_ldc_mem_acquire_release((ldc_mem_handle_t)dringp->mhdl,
5574 		    direction, soff, copy_size);
5575 		if (err) {
5576 			DWARN(ldcp->id,
5577 			    "i_ldc_dring_acquire_release: copy failed\n");
5578 			mutex_exit(&dringp->lock);
5579 			return (err);
5580 		}
5581 	}
5582 
5583 	mutex_exit(&dringp->lock);
5584 
5585 	return (0);
5586 }
5587 
5588 /*
5589  * Ensure that the contents in the local dring are consistent
5590  * with the contents if of remote dring
5591  */
5592 int
5593 ldc_mem_dring_acquire(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
5594 {
5595 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_IN, start, end));
5596 }
5597 
5598 /*
5599  * Ensure that the contents in the remote dring are consistent
5600  * with the contents if of local dring
5601  */
5602 int
5603 ldc_mem_dring_release(ldc_dring_handle_t dhandle, uint64_t start, uint64_t end)
5604 {
5605 	return (i_ldc_dring_acquire_release(dhandle, LDC_COPY_OUT, start, end));
5606 }
5607 
5608 
5609 /* ------------------------------------------------------------------------- */
5610